{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 239662, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0862714990277976e-05, "grad_norm": 111.34592789710862, "learning_rate": 3.3375052148518985e-08, "loss": 10.1386, "step": 5 }, { "epoch": 4.172542998055595e-05, "grad_norm": 103.85443008468907, "learning_rate": 7.509386733416771e-08, "loss": 10.1728, "step": 10 }, { "epoch": 6.258814497083393e-05, "grad_norm": 111.79010941280083, "learning_rate": 1.1681268251981644e-07, "loss": 10.2807, "step": 15 }, { "epoch": 8.34508599611119e-05, "grad_norm": 105.0938998123197, "learning_rate": 1.5853149770546516e-07, "loss": 10.4379, "step": 20 }, { "epoch": 0.00010431357495138987, "grad_norm": 82.88356138039661, "learning_rate": 2.0025031289111392e-07, "loss": 9.9797, "step": 25 }, { "epoch": 0.00012517628994166785, "grad_norm": 83.7810727652631, "learning_rate": 2.419691280767627e-07, "loss": 9.8429, "step": 30 }, { "epoch": 0.00014603900493194582, "grad_norm": 64.81197222388609, "learning_rate": 2.8368794326241136e-07, "loss": 9.6187, "step": 35 }, { "epoch": 0.0001669017199222238, "grad_norm": 54.00256205417996, "learning_rate": 3.254067584480601e-07, "loss": 9.3197, "step": 40 }, { "epoch": 0.00018776443491250178, "grad_norm": 40.293719914476775, "learning_rate": 3.671255736337088e-07, "loss": 9.1157, "step": 45 }, { "epoch": 0.00020862714990277974, "grad_norm": 35.80423791781652, "learning_rate": 4.088443888193576e-07, "loss": 8.4758, "step": 50 }, { "epoch": 0.0002294898648930577, "grad_norm": 29.786301394206085, "learning_rate": 4.5056320400500624e-07, "loss": 8.1413, "step": 55 }, { "epoch": 0.0002503525798833357, "grad_norm": 26.979941975745163, "learning_rate": 4.92282019190655e-07, "loss": 8.2968, "step": 60 }, { "epoch": 0.0002712152948736137, "grad_norm": 22.265387548354802, "learning_rate": 5.340008343763038e-07, "loss": 7.8942, "step": 65 }, { "epoch": 0.00029207800986389164, "grad_norm": 24.93255937047162, "learning_rate": 5.757196495619525e-07, "loss": 7.9836, "step": 70 }, { "epoch": 0.00031294072485416963, "grad_norm": 18.90630598756626, "learning_rate": 6.174384647476012e-07, "loss": 7.748, "step": 75 }, { "epoch": 0.0003338034398444476, "grad_norm": 19.84621867240916, "learning_rate": 6.591572799332499e-07, "loss": 7.176, "step": 80 }, { "epoch": 0.00035466615483472556, "grad_norm": 19.687153861093353, "learning_rate": 7.008760951188987e-07, "loss": 7.068, "step": 85 }, { "epoch": 0.00037552886982500356, "grad_norm": 17.846936012700453, "learning_rate": 7.425949103045474e-07, "loss": 7.4262, "step": 90 }, { "epoch": 0.0003963915848152815, "grad_norm": 20.244123312521687, "learning_rate": 7.843137254901962e-07, "loss": 7.0131, "step": 95 }, { "epoch": 0.0004172542998055595, "grad_norm": 18.98737451202722, "learning_rate": 8.260325406758448e-07, "loss": 6.9833, "step": 100 }, { "epoch": 0.0004381170147958375, "grad_norm": 23.120876168583045, "learning_rate": 8.677513558614936e-07, "loss": 6.7912, "step": 105 }, { "epoch": 0.0004589797297861154, "grad_norm": 19.858641810539197, "learning_rate": 9.094701710471424e-07, "loss": 6.538, "step": 110 }, { "epoch": 0.0004798424447763934, "grad_norm": 20.961987509288225, "learning_rate": 9.511889862327911e-07, "loss": 6.4538, "step": 115 }, { "epoch": 0.0005007051597666714, "grad_norm": 19.828553749155176, "learning_rate": 9.929078014184399e-07, "loss": 6.3462, "step": 120 }, { "epoch": 0.0005215678747569494, "grad_norm": 23.842751541051673, "learning_rate": 1.0346266166040887e-06, "loss": 6.3101, "step": 125 }, { "epoch": 0.0005424305897472274, "grad_norm": 21.491959950603636, "learning_rate": 1.0763454317897372e-06, "loss": 6.3549, "step": 130 }, { "epoch": 0.0005632933047375053, "grad_norm": 17.25989830615238, "learning_rate": 1.118064246975386e-06, "loss": 6.2114, "step": 135 }, { "epoch": 0.0005841560197277833, "grad_norm": 26.829057919276654, "learning_rate": 1.1597830621610346e-06, "loss": 5.8806, "step": 140 }, { "epoch": 0.0006050187347180613, "grad_norm": 24.740178372376874, "learning_rate": 1.2015018773466834e-06, "loss": 6.2573, "step": 145 }, { "epoch": 0.0006258814497083393, "grad_norm": 21.814968322665752, "learning_rate": 1.2432206925323322e-06, "loss": 5.9087, "step": 150 }, { "epoch": 0.0006467441646986173, "grad_norm": 19.610415847329723, "learning_rate": 1.284939507717981e-06, "loss": 5.9569, "step": 155 }, { "epoch": 0.0006676068796888952, "grad_norm": 20.240349084506896, "learning_rate": 1.3266583229036297e-06, "loss": 5.8139, "step": 160 }, { "epoch": 0.0006884695946791731, "grad_norm": 22.554399214572456, "learning_rate": 1.3683771380892783e-06, "loss": 6.0869, "step": 165 }, { "epoch": 0.0007093323096694511, "grad_norm": 21.292841006264723, "learning_rate": 1.410095953274927e-06, "loss": 5.7723, "step": 170 }, { "epoch": 0.0007301950246597291, "grad_norm": 22.304459654570554, "learning_rate": 1.4518147684605757e-06, "loss": 5.6311, "step": 175 }, { "epoch": 0.0007510577396500071, "grad_norm": 20.72394567794571, "learning_rate": 1.4935335836462244e-06, "loss": 5.8008, "step": 180 }, { "epoch": 0.0007719204546402851, "grad_norm": 20.06401473054045, "learning_rate": 1.5352523988318732e-06, "loss": 5.6167, "step": 185 }, { "epoch": 0.000792783169630563, "grad_norm": 21.148669171890397, "learning_rate": 1.576971214017522e-06, "loss": 5.76, "step": 190 }, { "epoch": 0.000813645884620841, "grad_norm": 26.99405668893651, "learning_rate": 1.6186900292031708e-06, "loss": 5.6745, "step": 195 }, { "epoch": 0.000834508599611119, "grad_norm": 40.8143798108301, "learning_rate": 1.6604088443888194e-06, "loss": 5.9065, "step": 200 }, { "epoch": 0.000855371314601397, "grad_norm": 22.200529295648643, "learning_rate": 1.7021276595744682e-06, "loss": 5.7397, "step": 205 }, { "epoch": 0.000876234029591675, "grad_norm": 22.201413560549828, "learning_rate": 1.743846474760117e-06, "loss": 5.4422, "step": 210 }, { "epoch": 0.000897096744581953, "grad_norm": 33.07325361352935, "learning_rate": 1.7855652899457657e-06, "loss": 5.7, "step": 215 }, { "epoch": 0.0009179594595722308, "grad_norm": 23.68713766568243, "learning_rate": 1.8272841051314143e-06, "loss": 5.2241, "step": 220 }, { "epoch": 0.0009388221745625088, "grad_norm": 20.090900524666772, "learning_rate": 1.869002920317063e-06, "loss": 5.1394, "step": 225 }, { "epoch": 0.0009596848895527868, "grad_norm": 35.387812152974774, "learning_rate": 1.910721735502712e-06, "loss": 5.1847, "step": 230 }, { "epoch": 0.0009805476045430647, "grad_norm": 49.16478874904408, "learning_rate": 1.9524405506883605e-06, "loss": 5.2956, "step": 235 }, { "epoch": 0.0010014103195333428, "grad_norm": 45.14845198982, "learning_rate": 1.9941593658740094e-06, "loss": 5.4352, "step": 240 }, { "epoch": 0.0010222730345236207, "grad_norm": 26.553826717078344, "learning_rate": 2.035878181059658e-06, "loss": 5.5127, "step": 245 }, { "epoch": 0.0010431357495138988, "grad_norm": 39.917392111661684, "learning_rate": 2.077596996245307e-06, "loss": 5.3076, "step": 250 }, { "epoch": 0.0010639984645041767, "grad_norm": 45.94062575765353, "learning_rate": 2.1193158114309556e-06, "loss": 5.2167, "step": 255 }, { "epoch": 0.0010848611794944548, "grad_norm": 49.60637021722534, "learning_rate": 2.161034626616604e-06, "loss": 5.1098, "step": 260 }, { "epoch": 0.0011057238944847327, "grad_norm": 54.51897091423706, "learning_rate": 2.202753441802253e-06, "loss": 5.3477, "step": 265 }, { "epoch": 0.0011265866094750106, "grad_norm": 47.7862913374327, "learning_rate": 2.2444722569879017e-06, "loss": 4.9047, "step": 270 }, { "epoch": 0.0011474493244652887, "grad_norm": 48.39252624691024, "learning_rate": 2.2861910721735507e-06, "loss": 5.0953, "step": 275 }, { "epoch": 0.0011683120394555665, "grad_norm": 67.89320678026422, "learning_rate": 2.3279098873591993e-06, "loss": 5.3341, "step": 280 }, { "epoch": 0.0011891747544458446, "grad_norm": 24.464957590001003, "learning_rate": 2.369628702544848e-06, "loss": 5.2547, "step": 285 }, { "epoch": 0.0012100374694361225, "grad_norm": 72.51021926554967, "learning_rate": 2.4113475177304965e-06, "loss": 5.3022, "step": 290 }, { "epoch": 0.0012309001844264004, "grad_norm": 27.323082045928306, "learning_rate": 2.4530663329161455e-06, "loss": 5.098, "step": 295 }, { "epoch": 0.0012517628994166785, "grad_norm": 48.33397017879407, "learning_rate": 2.494785148101794e-06, "loss": 4.9347, "step": 300 }, { "epoch": 0.0012726256144069564, "grad_norm": 32.56541299949736, "learning_rate": 2.5365039632874426e-06, "loss": 5.1376, "step": 305 }, { "epoch": 0.0012934883293972345, "grad_norm": 45.44582862268174, "learning_rate": 2.5782227784730916e-06, "loss": 4.9328, "step": 310 }, { "epoch": 0.0013143510443875124, "grad_norm": 53.0710168310652, "learning_rate": 2.61994159365874e-06, "loss": 4.837, "step": 315 }, { "epoch": 0.0013352137593777905, "grad_norm": 38.799718986146814, "learning_rate": 2.661660408844389e-06, "loss": 4.6502, "step": 320 }, { "epoch": 0.0013560764743680684, "grad_norm": 32.47708396268518, "learning_rate": 2.7033792240300377e-06, "loss": 5.218, "step": 325 }, { "epoch": 0.0013769391893583463, "grad_norm": 36.26638333133021, "learning_rate": 2.7450980392156867e-06, "loss": 5.0688, "step": 330 }, { "epoch": 0.0013978019043486244, "grad_norm": 37.58562836083793, "learning_rate": 2.7868168544013353e-06, "loss": 5.1845, "step": 335 }, { "epoch": 0.0014186646193389022, "grad_norm": 36.033366842190496, "learning_rate": 2.8285356695869843e-06, "loss": 5.0439, "step": 340 }, { "epoch": 0.0014395273343291804, "grad_norm": 40.34054770777155, "learning_rate": 2.870254484772633e-06, "loss": 5.102, "step": 345 }, { "epoch": 0.0014603900493194582, "grad_norm": 35.81507045580286, "learning_rate": 2.911973299958281e-06, "loss": 4.6945, "step": 350 }, { "epoch": 0.0014812527643097361, "grad_norm": 46.71809972335905, "learning_rate": 2.95369211514393e-06, "loss": 4.8013, "step": 355 }, { "epoch": 0.0015021154793000142, "grad_norm": 31.986883423324024, "learning_rate": 2.9954109303295786e-06, "loss": 4.9616, "step": 360 }, { "epoch": 0.001522978194290292, "grad_norm": 32.1699006226776, "learning_rate": 3.0371297455152276e-06, "loss": 4.9755, "step": 365 }, { "epoch": 0.0015438409092805702, "grad_norm": 33.49573884753421, "learning_rate": 3.078848560700876e-06, "loss": 4.5846, "step": 370 }, { "epoch": 0.001564703624270848, "grad_norm": 41.13418496539261, "learning_rate": 3.120567375886525e-06, "loss": 4.5585, "step": 375 }, { "epoch": 0.001585566339261126, "grad_norm": 54.412253687431274, "learning_rate": 3.1622861910721737e-06, "loss": 4.8288, "step": 380 }, { "epoch": 0.001606429054251404, "grad_norm": 38.85032982692284, "learning_rate": 3.2040050062578227e-06, "loss": 4.9084, "step": 385 }, { "epoch": 0.001627291769241682, "grad_norm": 33.057278873904785, "learning_rate": 3.245723821443471e-06, "loss": 4.7741, "step": 390 }, { "epoch": 0.00164815448423196, "grad_norm": 52.83635730381295, "learning_rate": 3.2874426366291203e-06, "loss": 4.6411, "step": 395 }, { "epoch": 0.001669017199222238, "grad_norm": 52.18263321391136, "learning_rate": 3.3291614518147685e-06, "loss": 4.7038, "step": 400 }, { "epoch": 0.001689879914212516, "grad_norm": 47.61382172890018, "learning_rate": 3.3708802670004175e-06, "loss": 4.5735, "step": 405 }, { "epoch": 0.001710742629202794, "grad_norm": 53.87837214876616, "learning_rate": 3.412599082186066e-06, "loss": 4.6947, "step": 410 }, { "epoch": 0.0017316053441930718, "grad_norm": 32.12601783907377, "learning_rate": 3.454317897371715e-06, "loss": 4.6804, "step": 415 }, { "epoch": 0.00175246805918335, "grad_norm": 32.24168630237805, "learning_rate": 3.4960367125573636e-06, "loss": 4.5648, "step": 420 }, { "epoch": 0.0017733307741736278, "grad_norm": 89.302184367537, "learning_rate": 3.5377555277430126e-06, "loss": 4.5178, "step": 425 }, { "epoch": 0.001794193489163906, "grad_norm": 82.15758451674932, "learning_rate": 3.579474342928661e-06, "loss": 4.5494, "step": 430 }, { "epoch": 0.0018150562041541838, "grad_norm": 36.8293578203873, "learning_rate": 3.62119315811431e-06, "loss": 4.672, "step": 435 }, { "epoch": 0.0018359189191444617, "grad_norm": 47.945159090827396, "learning_rate": 3.6629119732999583e-06, "loss": 4.3606, "step": 440 }, { "epoch": 0.0018567816341347398, "grad_norm": 48.3943689166475, "learning_rate": 3.7046307884856073e-06, "loss": 4.354, "step": 445 }, { "epoch": 0.0018776443491250177, "grad_norm": 52.69124508470319, "learning_rate": 3.746349603671256e-06, "loss": 4.3961, "step": 450 }, { "epoch": 0.0018985070641152958, "grad_norm": 36.46804961655075, "learning_rate": 3.788068418856905e-06, "loss": 4.2873, "step": 455 }, { "epoch": 0.0019193697791055737, "grad_norm": 29.53706857279455, "learning_rate": 3.8297872340425535e-06, "loss": 4.3058, "step": 460 }, { "epoch": 0.0019402324940958518, "grad_norm": 34.57445106358178, "learning_rate": 3.871506049228203e-06, "loss": 4.4299, "step": 465 }, { "epoch": 0.0019610952090861294, "grad_norm": 60.97291030257093, "learning_rate": 3.913224864413851e-06, "loss": 4.427, "step": 470 }, { "epoch": 0.0019819579240764077, "grad_norm": 76.27443380936562, "learning_rate": 3.954943679599499e-06, "loss": 4.4946, "step": 475 }, { "epoch": 0.0020028206390666856, "grad_norm": 30.092541083424955, "learning_rate": 3.996662494785149e-06, "loss": 4.391, "step": 480 }, { "epoch": 0.0020236833540569635, "grad_norm": 24.524989877711548, "learning_rate": 4.038381309970797e-06, "loss": 4.3413, "step": 485 }, { "epoch": 0.0020445460690472414, "grad_norm": 35.781867502049195, "learning_rate": 4.080100125156446e-06, "loss": 4.4839, "step": 490 }, { "epoch": 0.0020654087840375197, "grad_norm": 27.46360915105955, "learning_rate": 4.121818940342094e-06, "loss": 4.3844, "step": 495 }, { "epoch": 0.0020862714990277976, "grad_norm": 24.693924400614033, "learning_rate": 4.163537755527744e-06, "loss": 3.9378, "step": 500 }, { "epoch": 0.0021071342140180755, "grad_norm": 64.29243727063665, "learning_rate": 4.2052565707133915e-06, "loss": 3.9994, "step": 505 }, { "epoch": 0.0021279969290083534, "grad_norm": 35.882518016425806, "learning_rate": 4.246975385899041e-06, "loss": 3.892, "step": 510 }, { "epoch": 0.0021488596439986313, "grad_norm": 30.63223634736716, "learning_rate": 4.2886942010846895e-06, "loss": 3.8322, "step": 515 }, { "epoch": 0.0021697223589889096, "grad_norm": 44.91261818621819, "learning_rate": 4.330413016270338e-06, "loss": 3.7995, "step": 520 }, { "epoch": 0.0021905850739791875, "grad_norm": 54.58822779585146, "learning_rate": 4.372131831455987e-06, "loss": 3.8826, "step": 525 }, { "epoch": 0.0022114477889694653, "grad_norm": 43.76133601568226, "learning_rate": 4.413850646641636e-06, "loss": 3.8142, "step": 530 }, { "epoch": 0.0022323105039597432, "grad_norm": 26.700082878210456, "learning_rate": 4.455569461827285e-06, "loss": 3.9003, "step": 535 }, { "epoch": 0.002253173218950021, "grad_norm": 26.522988856001298, "learning_rate": 4.497288277012933e-06, "loss": 3.6454, "step": 540 }, { "epoch": 0.0022740359339402994, "grad_norm": 29.95416762120622, "learning_rate": 4.539007092198582e-06, "loss": 3.6349, "step": 545 }, { "epoch": 0.0022948986489305773, "grad_norm": 24.85655711507942, "learning_rate": 4.580725907384231e-06, "loss": 3.4419, "step": 550 }, { "epoch": 0.002315761363920855, "grad_norm": 31.094727351008395, "learning_rate": 4.622444722569879e-06, "loss": 3.4507, "step": 555 }, { "epoch": 0.002336624078911133, "grad_norm": 44.168396433874655, "learning_rate": 4.664163537755528e-06, "loss": 3.4103, "step": 560 }, { "epoch": 0.002357486793901411, "grad_norm": 30.356501269738803, "learning_rate": 4.705882352941177e-06, "loss": 3.1735, "step": 565 }, { "epoch": 0.0023783495088916893, "grad_norm": 34.20392855491884, "learning_rate": 4.7476011681268255e-06, "loss": 3.0438, "step": 570 }, { "epoch": 0.002399212223881967, "grad_norm": 35.77481235235855, "learning_rate": 4.789319983312474e-06, "loss": 2.9761, "step": 575 }, { "epoch": 0.002420074938872245, "grad_norm": 54.611433562368525, "learning_rate": 4.8310387984981235e-06, "loss": 3.0361, "step": 580 }, { "epoch": 0.002440937653862523, "grad_norm": 23.553558089972036, "learning_rate": 4.872757613683772e-06, "loss": 2.8305, "step": 585 }, { "epoch": 0.002461800368852801, "grad_norm": 34.14754965953843, "learning_rate": 4.914476428869421e-06, "loss": 2.8931, "step": 590 }, { "epoch": 0.002482663083843079, "grad_norm": 30.95420119291304, "learning_rate": 4.956195244055069e-06, "loss": 2.8606, "step": 595 }, { "epoch": 0.002503525798833357, "grad_norm": 35.84692739391711, "learning_rate": 4.997914059240718e-06, "loss": 2.7717, "step": 600 }, { "epoch": 0.002524388513823635, "grad_norm": 27.201251171700576, "learning_rate": 5.039632874426366e-06, "loss": 2.706, "step": 605 }, { "epoch": 0.002545251228813913, "grad_norm": 38.98773966533365, "learning_rate": 5.081351689612015e-06, "loss": 2.6743, "step": 610 }, { "epoch": 0.0025661139438041907, "grad_norm": 32.929771668705406, "learning_rate": 5.1230705047976635e-06, "loss": 2.7135, "step": 615 }, { "epoch": 0.002586976658794469, "grad_norm": 31.107591033036936, "learning_rate": 5.164789319983313e-06, "loss": 2.5296, "step": 620 }, { "epoch": 0.002607839373784747, "grad_norm": 33.29393600053677, "learning_rate": 5.2065081351689615e-06, "loss": 2.6676, "step": 625 }, { "epoch": 0.0026287020887750248, "grad_norm": 50.32074933505615, "learning_rate": 5.24822695035461e-06, "loss": 2.5268, "step": 630 }, { "epoch": 0.0026495648037653027, "grad_norm": 36.56180435386037, "learning_rate": 5.289945765540259e-06, "loss": 2.6817, "step": 635 }, { "epoch": 0.002670427518755581, "grad_norm": 24.687419326011263, "learning_rate": 5.331664580725908e-06, "loss": 2.598, "step": 640 }, { "epoch": 0.002691290233745859, "grad_norm": 18.779763079746267, "learning_rate": 5.373383395911557e-06, "loss": 2.57, "step": 645 }, { "epoch": 0.0027121529487361368, "grad_norm": 19.550811454988793, "learning_rate": 5.415102211097205e-06, "loss": 2.5085, "step": 650 }, { "epoch": 0.0027330156637264146, "grad_norm": 20.895661351527355, "learning_rate": 5.456821026282854e-06, "loss": 2.3683, "step": 655 }, { "epoch": 0.0027538783787166925, "grad_norm": 19.283450419686055, "learning_rate": 5.498539841468503e-06, "loss": 2.42, "step": 660 }, { "epoch": 0.002774741093706971, "grad_norm": 21.98130139258574, "learning_rate": 5.540258656654152e-06, "loss": 2.4401, "step": 665 }, { "epoch": 0.0027956038086972487, "grad_norm": 47.643651177318525, "learning_rate": 5.5819774718398e-06, "loss": 2.4836, "step": 670 }, { "epoch": 0.0028164665236875266, "grad_norm": 24.732880098182108, "learning_rate": 5.623696287025449e-06, "loss": 2.3048, "step": 675 }, { "epoch": 0.0028373292386778045, "grad_norm": 15.210678790388355, "learning_rate": 5.665415102211098e-06, "loss": 2.1959, "step": 680 }, { "epoch": 0.0028581919536680824, "grad_norm": 22.255633433099742, "learning_rate": 5.707133917396747e-06, "loss": 2.3598, "step": 685 }, { "epoch": 0.0028790546686583607, "grad_norm": 27.66501764533808, "learning_rate": 5.7488527325823955e-06, "loss": 2.3745, "step": 690 }, { "epoch": 0.0028999173836486386, "grad_norm": 24.11568400909354, "learning_rate": 5.790571547768043e-06, "loss": 2.2289, "step": 695 }, { "epoch": 0.0029207800986389165, "grad_norm": 25.436351286234753, "learning_rate": 5.8322903629536935e-06, "loss": 2.2412, "step": 700 }, { "epoch": 0.0029416428136291944, "grad_norm": 26.98199040669251, "learning_rate": 5.874009178139341e-06, "loss": 2.3147, "step": 705 }, { "epoch": 0.0029625055286194722, "grad_norm": 44.23321434418372, "learning_rate": 5.91572799332499e-06, "loss": 2.3898, "step": 710 }, { "epoch": 0.0029833682436097506, "grad_norm": 13.903028502205984, "learning_rate": 5.957446808510638e-06, "loss": 2.2536, "step": 715 }, { "epoch": 0.0030042309586000284, "grad_norm": 32.1225176261157, "learning_rate": 5.999165623696287e-06, "loss": 2.2128, "step": 720 }, { "epoch": 0.0030250936735903063, "grad_norm": 22.290273153660443, "learning_rate": 6.040884438881936e-06, "loss": 2.1021, "step": 725 }, { "epoch": 0.003045956388580584, "grad_norm": 20.459703105562166, "learning_rate": 6.082603254067585e-06, "loss": 2.1634, "step": 730 }, { "epoch": 0.003066819103570862, "grad_norm": 18.333060702138116, "learning_rate": 6.1243220692532335e-06, "loss": 2.1983, "step": 735 }, { "epoch": 0.0030876818185611404, "grad_norm": 16.364922106979286, "learning_rate": 6.166040884438882e-06, "loss": 2.1111, "step": 740 }, { "epoch": 0.0031085445335514183, "grad_norm": 23.955666242459312, "learning_rate": 6.2077596996245315e-06, "loss": 2.2627, "step": 745 }, { "epoch": 0.003129407248541696, "grad_norm": 23.294307809814356, "learning_rate": 6.24947851481018e-06, "loss": 2.3584, "step": 750 }, { "epoch": 0.003150269963531974, "grad_norm": 18.37678190561605, "learning_rate": 6.291197329995829e-06, "loss": 2.1061, "step": 755 }, { "epoch": 0.003171132678522252, "grad_norm": 46.68186724126571, "learning_rate": 6.332916145181477e-06, "loss": 2.1751, "step": 760 }, { "epoch": 0.0031919953935125303, "grad_norm": 12.191161457884466, "learning_rate": 6.374634960367127e-06, "loss": 2.0924, "step": 765 }, { "epoch": 0.003212858108502808, "grad_norm": 11.984269214158447, "learning_rate": 6.416353775552775e-06, "loss": 2.1119, "step": 770 }, { "epoch": 0.003233720823493086, "grad_norm": 16.68665286640823, "learning_rate": 6.458072590738424e-06, "loss": 2.1124, "step": 775 }, { "epoch": 0.003254583538483364, "grad_norm": 14.456066324028924, "learning_rate": 6.4997914059240715e-06, "loss": 2.1509, "step": 780 }, { "epoch": 0.0032754462534736422, "grad_norm": 13.786409310336726, "learning_rate": 6.541510221109722e-06, "loss": 2.0447, "step": 785 }, { "epoch": 0.00329630896846392, "grad_norm": 13.025607498480793, "learning_rate": 6.5832290362953695e-06, "loss": 2.0021, "step": 790 }, { "epoch": 0.003317171683454198, "grad_norm": 17.92086084190608, "learning_rate": 6.624947851481018e-06, "loss": 2.0355, "step": 795 }, { "epoch": 0.003338034398444476, "grad_norm": 49.056181366475236, "learning_rate": 6.666666666666667e-06, "loss": 1.9754, "step": 800 }, { "epoch": 0.003358897113434754, "grad_norm": 20.818185159546925, "learning_rate": 6.708385481852316e-06, "loss": 2.107, "step": 805 }, { "epoch": 0.003379759828425032, "grad_norm": 19.91624954678699, "learning_rate": 6.750104297037965e-06, "loss": 2.0427, "step": 810 }, { "epoch": 0.00340062254341531, "grad_norm": 13.329082765238935, "learning_rate": 6.791823112223613e-06, "loss": 2.0117, "step": 815 }, { "epoch": 0.003421485258405588, "grad_norm": 20.415707339047767, "learning_rate": 6.833541927409262e-06, "loss": 1.8483, "step": 820 }, { "epoch": 0.0034423479733958658, "grad_norm": 16.603976106377967, "learning_rate": 6.875260742594911e-06, "loss": 2.017, "step": 825 }, { "epoch": 0.0034632106883861436, "grad_norm": 18.97210494852075, "learning_rate": 6.91697955778056e-06, "loss": 2.0138, "step": 830 }, { "epoch": 0.003484073403376422, "grad_norm": 15.58637211545925, "learning_rate": 6.958698372966208e-06, "loss": 1.9988, "step": 835 }, { "epoch": 0.0035049361183667, "grad_norm": 17.997964778511435, "learning_rate": 7.000417188151857e-06, "loss": 1.927, "step": 840 }, { "epoch": 0.0035257988333569777, "grad_norm": 19.19784312905818, "learning_rate": 7.0421360033375055e-06, "loss": 1.9517, "step": 845 }, { "epoch": 0.0035466615483472556, "grad_norm": 19.38910513154194, "learning_rate": 7.083854818523155e-06, "loss": 1.9772, "step": 850 }, { "epoch": 0.0035675242633375335, "grad_norm": 11.955995040147812, "learning_rate": 7.1255736337088035e-06, "loss": 1.9152, "step": 855 }, { "epoch": 0.003588386978327812, "grad_norm": 15.26992432952776, "learning_rate": 7.167292448894452e-06, "loss": 1.923, "step": 860 }, { "epoch": 0.0036092496933180897, "grad_norm": 15.85507636807784, "learning_rate": 7.2090112640801e-06, "loss": 1.8988, "step": 865 }, { "epoch": 0.0036301124083083676, "grad_norm": 13.57378681423072, "learning_rate": 7.25073007926575e-06, "loss": 1.8485, "step": 870 }, { "epoch": 0.0036509751232986455, "grad_norm": 14.7585874887787, "learning_rate": 7.292448894451399e-06, "loss": 1.8281, "step": 875 }, { "epoch": 0.0036718378382889234, "grad_norm": 13.32581585676624, "learning_rate": 7.334167709637046e-06, "loss": 1.7712, "step": 880 }, { "epoch": 0.0036927005532792017, "grad_norm": 20.01687138103178, "learning_rate": 7.375886524822695e-06, "loss": 1.9175, "step": 885 }, { "epoch": 0.0037135632682694796, "grad_norm": 20.222135803889532, "learning_rate": 7.417605340008344e-06, "loss": 1.9013, "step": 890 }, { "epoch": 0.0037344259832597575, "grad_norm": 20.185519342955452, "learning_rate": 7.459324155193993e-06, "loss": 1.8867, "step": 895 }, { "epoch": 0.0037552886982500353, "grad_norm": 14.328236089190195, "learning_rate": 7.5010429703796415e-06, "loss": 1.8729, "step": 900 }, { "epoch": 0.0037761514132403137, "grad_norm": 18.437993110723305, "learning_rate": 7.54276178556529e-06, "loss": 1.7506, "step": 905 }, { "epoch": 0.0037970141282305915, "grad_norm": 15.143774720836666, "learning_rate": 7.5844806007509395e-06, "loss": 1.7822, "step": 910 }, { "epoch": 0.0038178768432208694, "grad_norm": 16.120194160093245, "learning_rate": 7.626199415936588e-06, "loss": 1.7415, "step": 915 }, { "epoch": 0.0038387395582111473, "grad_norm": 13.132615807542273, "learning_rate": 7.667918231122237e-06, "loss": 1.6913, "step": 920 }, { "epoch": 0.003859602273201425, "grad_norm": 29.206390081386832, "learning_rate": 7.709637046307885e-06, "loss": 1.7119, "step": 925 }, { "epoch": 0.0038804649881917035, "grad_norm": 15.215748815954655, "learning_rate": 7.751355861493535e-06, "loss": 1.7962, "step": 930 }, { "epoch": 0.0039013277031819814, "grad_norm": 12.12765253987352, "learning_rate": 7.793074676679182e-06, "loss": 1.6565, "step": 935 }, { "epoch": 0.003922190418172259, "grad_norm": 14.785331236687025, "learning_rate": 7.834793491864832e-06, "loss": 1.701, "step": 940 }, { "epoch": 0.003943053133162538, "grad_norm": 15.837750652506509, "learning_rate": 7.87651230705048e-06, "loss": 1.6831, "step": 945 }, { "epoch": 0.0039639158481528155, "grad_norm": 13.312926454247794, "learning_rate": 7.918231122236129e-06, "loss": 1.6926, "step": 950 }, { "epoch": 0.003984778563143093, "grad_norm": 14.326257239749859, "learning_rate": 7.959949937421778e-06, "loss": 1.6576, "step": 955 }, { "epoch": 0.004005641278133371, "grad_norm": 13.834349240479114, "learning_rate": 8.001668752607426e-06, "loss": 1.6544, "step": 960 }, { "epoch": 0.004026503993123649, "grad_norm": 19.964972943192848, "learning_rate": 8.043387567793076e-06, "loss": 1.5829, "step": 965 }, { "epoch": 0.004047366708113927, "grad_norm": 10.986440363032909, "learning_rate": 8.085106382978723e-06, "loss": 1.4913, "step": 970 }, { "epoch": 0.004068229423104205, "grad_norm": 10.987828863800752, "learning_rate": 8.126825198164373e-06, "loss": 1.541, "step": 975 }, { "epoch": 0.004089092138094483, "grad_norm": 9.972174388775189, "learning_rate": 8.168544013350022e-06, "loss": 1.558, "step": 980 }, { "epoch": 0.004109954853084761, "grad_norm": 14.528773937976354, "learning_rate": 8.21026282853567e-06, "loss": 1.433, "step": 985 }, { "epoch": 0.0041308175680750394, "grad_norm": 10.562553147434324, "learning_rate": 8.25198164372132e-06, "loss": 1.5309, "step": 990 }, { "epoch": 0.004151680283065317, "grad_norm": 18.374449363894016, "learning_rate": 8.293700458906969e-06, "loss": 1.5187, "step": 995 }, { "epoch": 0.004172542998055595, "grad_norm": 10.572611151587978, "learning_rate": 8.335419274092616e-06, "loss": 1.5273, "step": 1000 }, { "epoch": 0.004193405713045873, "grad_norm": 16.953112571521828, "learning_rate": 8.377138089278266e-06, "loss": 1.492, "step": 1005 }, { "epoch": 0.004214268428036151, "grad_norm": 9.211456984665828, "learning_rate": 8.418856904463914e-06, "loss": 1.5081, "step": 1010 }, { "epoch": 0.004235131143026429, "grad_norm": 12.663547311559325, "learning_rate": 8.460575719649563e-06, "loss": 1.5004, "step": 1015 }, { "epoch": 0.004255993858016707, "grad_norm": 10.757188994044144, "learning_rate": 8.50229453483521e-06, "loss": 1.3591, "step": 1020 }, { "epoch": 0.004276856573006985, "grad_norm": 9.871762993280013, "learning_rate": 8.54401335002086e-06, "loss": 1.3743, "step": 1025 }, { "epoch": 0.0042977192879972625, "grad_norm": 12.45630918689565, "learning_rate": 8.585732165206508e-06, "loss": 1.3811, "step": 1030 }, { "epoch": 0.00431858200298754, "grad_norm": 16.914510762083047, "learning_rate": 8.627450980392157e-06, "loss": 1.3939, "step": 1035 }, { "epoch": 0.004339444717977819, "grad_norm": 9.060927327962638, "learning_rate": 8.669169795577807e-06, "loss": 1.399, "step": 1040 }, { "epoch": 0.004360307432968097, "grad_norm": 11.122345534718935, "learning_rate": 8.710888610763454e-06, "loss": 1.3743, "step": 1045 }, { "epoch": 0.004381170147958375, "grad_norm": 11.584205027225119, "learning_rate": 8.752607425949104e-06, "loss": 1.3115, "step": 1050 }, { "epoch": 0.004402032862948653, "grad_norm": 8.824153890102401, "learning_rate": 8.794326241134753e-06, "loss": 1.2985, "step": 1055 }, { "epoch": 0.004422895577938931, "grad_norm": 11.982999947216383, "learning_rate": 8.836045056320401e-06, "loss": 1.3009, "step": 1060 }, { "epoch": 0.004443758292929209, "grad_norm": 9.675260012783154, "learning_rate": 8.87776387150605e-06, "loss": 1.2365, "step": 1065 }, { "epoch": 0.0044646210079194865, "grad_norm": 17.445968526875816, "learning_rate": 8.919482686691698e-06, "loss": 1.2737, "step": 1070 }, { "epoch": 0.004485483722909764, "grad_norm": 11.254960010904087, "learning_rate": 8.961201501877348e-06, "loss": 1.2713, "step": 1075 }, { "epoch": 0.004506346437900042, "grad_norm": 12.266141964164449, "learning_rate": 9.002920317062997e-06, "loss": 1.1415, "step": 1080 }, { "epoch": 0.00452720915289032, "grad_norm": 11.061023485273678, "learning_rate": 9.044639132248645e-06, "loss": 1.1922, "step": 1085 }, { "epoch": 0.004548071867880599, "grad_norm": 10.342318014897302, "learning_rate": 9.086357947434294e-06, "loss": 1.2201, "step": 1090 }, { "epoch": 0.004568934582870877, "grad_norm": 10.540723482583061, "learning_rate": 9.128076762619942e-06, "loss": 1.184, "step": 1095 }, { "epoch": 0.004589797297861155, "grad_norm": 12.194405837100584, "learning_rate": 9.169795577805591e-06, "loss": 1.2257, "step": 1100 }, { "epoch": 0.0046106600128514325, "grad_norm": 24.142470382797423, "learning_rate": 9.21151439299124e-06, "loss": 1.1757, "step": 1105 }, { "epoch": 0.00463152272784171, "grad_norm": 8.84477454443934, "learning_rate": 9.253233208176888e-06, "loss": 1.1766, "step": 1110 }, { "epoch": 0.004652385442831988, "grad_norm": 9.19748153197469, "learning_rate": 9.294952023362536e-06, "loss": 1.0985, "step": 1115 }, { "epoch": 0.004673248157822266, "grad_norm": 13.931503802744396, "learning_rate": 9.336670838548186e-06, "loss": 1.0579, "step": 1120 }, { "epoch": 0.004694110872812544, "grad_norm": 7.815063509323381, "learning_rate": 9.378389653733835e-06, "loss": 1.0977, "step": 1125 }, { "epoch": 0.004714973587802822, "grad_norm": 10.786265562166204, "learning_rate": 9.420108468919483e-06, "loss": 1.0263, "step": 1130 }, { "epoch": 0.004735836302793101, "grad_norm": 15.302954229224527, "learning_rate": 9.461827284105132e-06, "loss": 1.0615, "step": 1135 }, { "epoch": 0.004756699017783379, "grad_norm": 10.074336387084234, "learning_rate": 9.503546099290782e-06, "loss": 1.0107, "step": 1140 }, { "epoch": 0.0047775617327736565, "grad_norm": 11.442772440418311, "learning_rate": 9.54526491447643e-06, "loss": 1.0906, "step": 1145 }, { "epoch": 0.004798424447763934, "grad_norm": 10.211054937115962, "learning_rate": 9.586983729662079e-06, "loss": 0.9577, "step": 1150 }, { "epoch": 0.004819287162754212, "grad_norm": 7.847372053364711, "learning_rate": 9.628702544847726e-06, "loss": 1.0247, "step": 1155 }, { "epoch": 0.00484014987774449, "grad_norm": 11.178156823090037, "learning_rate": 9.670421360033376e-06, "loss": 1.0159, "step": 1160 }, { "epoch": 0.004861012592734768, "grad_norm": 10.613618744260705, "learning_rate": 9.712140175219025e-06, "loss": 0.983, "step": 1165 }, { "epoch": 0.004881875307725046, "grad_norm": 11.260009511335639, "learning_rate": 9.753858990404673e-06, "loss": 0.9485, "step": 1170 }, { "epoch": 0.004902738022715324, "grad_norm": 12.222752183527712, "learning_rate": 9.795577805590322e-06, "loss": 0.9896, "step": 1175 }, { "epoch": 0.004923600737705602, "grad_norm": 10.005975823404823, "learning_rate": 9.837296620775972e-06, "loss": 0.9976, "step": 1180 }, { "epoch": 0.00494446345269588, "grad_norm": 9.918521681353328, "learning_rate": 9.87901543596162e-06, "loss": 0.959, "step": 1185 }, { "epoch": 0.004965326167686158, "grad_norm": 9.065480348197553, "learning_rate": 9.920734251147269e-06, "loss": 0.9469, "step": 1190 }, { "epoch": 0.004986188882676436, "grad_norm": 10.875055127719191, "learning_rate": 9.962453066332917e-06, "loss": 0.8772, "step": 1195 }, { "epoch": 0.005007051597666714, "grad_norm": 8.633617344889045, "learning_rate": 1.0004171881518564e-05, "loss": 0.886, "step": 1200 }, { "epoch": 0.005027914312656992, "grad_norm": 9.481421475431214, "learning_rate": 1.0045890696704216e-05, "loss": 0.945, "step": 1205 }, { "epoch": 0.00504877702764727, "grad_norm": 9.4863934595766, "learning_rate": 1.0087609511889865e-05, "loss": 0.8532, "step": 1210 }, { "epoch": 0.005069639742637548, "grad_norm": 8.92808291182316, "learning_rate": 1.0129328327075511e-05, "loss": 0.7688, "step": 1215 }, { "epoch": 0.005090502457627826, "grad_norm": 8.462999266754757, "learning_rate": 1.017104714226116e-05, "loss": 0.868, "step": 1220 }, { "epoch": 0.0051113651726181035, "grad_norm": 8.21082377291239, "learning_rate": 1.0212765957446808e-05, "loss": 0.8125, "step": 1225 }, { "epoch": 0.005132227887608381, "grad_norm": 8.295607620157154, "learning_rate": 1.0254484772632458e-05, "loss": 0.7756, "step": 1230 }, { "epoch": 0.00515309060259866, "grad_norm": 7.333652334581929, "learning_rate": 1.0296203587818107e-05, "loss": 0.77, "step": 1235 }, { "epoch": 0.005173953317588938, "grad_norm": 10.889953441376555, "learning_rate": 1.0337922403003755e-05, "loss": 0.8466, "step": 1240 }, { "epoch": 0.005194816032579216, "grad_norm": 8.74363155570099, "learning_rate": 1.0379641218189404e-05, "loss": 0.7446, "step": 1245 }, { "epoch": 0.005215678747569494, "grad_norm": 12.167369298693531, "learning_rate": 1.0421360033375052e-05, "loss": 0.8007, "step": 1250 }, { "epoch": 0.005236541462559772, "grad_norm": 9.921495115359804, "learning_rate": 1.0463078848560701e-05, "loss": 0.706, "step": 1255 }, { "epoch": 0.0052574041775500496, "grad_norm": 8.756396197257875, "learning_rate": 1.050479766374635e-05, "loss": 0.7037, "step": 1260 }, { "epoch": 0.0052782668925403274, "grad_norm": 7.713676186018467, "learning_rate": 1.0546516478931998e-05, "loss": 0.7306, "step": 1265 }, { "epoch": 0.005299129607530605, "grad_norm": 12.50055137615994, "learning_rate": 1.0588235294117648e-05, "loss": 0.7135, "step": 1270 }, { "epoch": 0.005319992322520883, "grad_norm": 7.838268361734335, "learning_rate": 1.0629954109303297e-05, "loss": 0.7558, "step": 1275 }, { "epoch": 0.005340855037511162, "grad_norm": 7.453458593753157, "learning_rate": 1.0671672924488945e-05, "loss": 0.6856, "step": 1280 }, { "epoch": 0.00536171775250144, "grad_norm": 11.657717767365652, "learning_rate": 1.0713391739674594e-05, "loss": 0.7142, "step": 1285 }, { "epoch": 0.005382580467491718, "grad_norm": 7.958211530732564, "learning_rate": 1.0755110554860242e-05, "loss": 0.7097, "step": 1290 }, { "epoch": 0.005403443182481996, "grad_norm": 15.13442340048053, "learning_rate": 1.0796829370045892e-05, "loss": 0.7258, "step": 1295 }, { "epoch": 0.0054243058974722735, "grad_norm": 23.223824115408824, "learning_rate": 1.0838548185231541e-05, "loss": 0.6578, "step": 1300 }, { "epoch": 0.005445168612462551, "grad_norm": 11.572137545980572, "learning_rate": 1.0880267000417189e-05, "loss": 0.6449, "step": 1305 }, { "epoch": 0.005466031327452829, "grad_norm": 9.706264955489209, "learning_rate": 1.0921985815602838e-05, "loss": 0.6531, "step": 1310 }, { "epoch": 0.005486894042443107, "grad_norm": 12.232140415691866, "learning_rate": 1.0963704630788488e-05, "loss": 0.6058, "step": 1315 }, { "epoch": 0.005507756757433385, "grad_norm": 13.050994159487535, "learning_rate": 1.1005423445974135e-05, "loss": 0.6064, "step": 1320 }, { "epoch": 0.005528619472423663, "grad_norm": 8.20557252403525, "learning_rate": 1.1047142261159785e-05, "loss": 0.6402, "step": 1325 }, { "epoch": 0.005549482187413942, "grad_norm": 8.70290206854742, "learning_rate": 1.1088861076345432e-05, "loss": 0.6695, "step": 1330 }, { "epoch": 0.00557034490240422, "grad_norm": 9.599567572694545, "learning_rate": 1.1130579891531082e-05, "loss": 0.5977, "step": 1335 }, { "epoch": 0.0055912076173944975, "grad_norm": 8.875948055233247, "learning_rate": 1.1172298706716731e-05, "loss": 0.6043, "step": 1340 }, { "epoch": 0.005612070332384775, "grad_norm": 8.201076792836066, "learning_rate": 1.1214017521902379e-05, "loss": 0.6218, "step": 1345 }, { "epoch": 0.005632933047375053, "grad_norm": 10.748213382649155, "learning_rate": 1.1255736337088028e-05, "loss": 0.622, "step": 1350 }, { "epoch": 0.005653795762365331, "grad_norm": 6.977258843290734, "learning_rate": 1.1297455152273676e-05, "loss": 0.6196, "step": 1355 }, { "epoch": 0.005674658477355609, "grad_norm": 8.415922237556773, "learning_rate": 1.1339173967459326e-05, "loss": 0.6495, "step": 1360 }, { "epoch": 0.005695521192345887, "grad_norm": 8.96068332383778, "learning_rate": 1.1380892782644975e-05, "loss": 0.5471, "step": 1365 }, { "epoch": 0.005716383907336165, "grad_norm": 7.357148194488467, "learning_rate": 1.1422611597830621e-05, "loss": 0.5263, "step": 1370 }, { "epoch": 0.005737246622326443, "grad_norm": 7.9329314651414515, "learning_rate": 1.1464330413016272e-05, "loss": 0.5566, "step": 1375 }, { "epoch": 0.005758109337316721, "grad_norm": 7.699511485958797, "learning_rate": 1.1506049228201922e-05, "loss": 0.5866, "step": 1380 }, { "epoch": 0.005778972052306999, "grad_norm": 7.114106688574835, "learning_rate": 1.1547768043387568e-05, "loss": 0.5671, "step": 1385 }, { "epoch": 0.005799834767297277, "grad_norm": 8.887531631786599, "learning_rate": 1.1589486858573219e-05, "loss": 0.5353, "step": 1390 }, { "epoch": 0.005820697482287555, "grad_norm": 13.521585960738184, "learning_rate": 1.1631205673758865e-05, "loss": 0.5269, "step": 1395 }, { "epoch": 0.005841560197277833, "grad_norm": 7.437090809778842, "learning_rate": 1.1672924488944514e-05, "loss": 0.5195, "step": 1400 }, { "epoch": 0.005862422912268111, "grad_norm": 8.043392235821923, "learning_rate": 1.1714643304130164e-05, "loss": 0.5199, "step": 1405 }, { "epoch": 0.005883285627258389, "grad_norm": 7.226759433817312, "learning_rate": 1.1756362119315811e-05, "loss": 0.4894, "step": 1410 }, { "epoch": 0.005904148342248667, "grad_norm": 9.237835513043981, "learning_rate": 1.179808093450146e-05, "loss": 0.5379, "step": 1415 }, { "epoch": 0.0059250110572389445, "grad_norm": 8.491151921729404, "learning_rate": 1.183979974968711e-05, "loss": 0.4733, "step": 1420 }, { "epoch": 0.005945873772229223, "grad_norm": 6.474088980619093, "learning_rate": 1.1881518564872758e-05, "loss": 0.4451, "step": 1425 }, { "epoch": 0.005966736487219501, "grad_norm": 7.4198872662491375, "learning_rate": 1.1923237380058407e-05, "loss": 0.4365, "step": 1430 }, { "epoch": 0.005987599202209779, "grad_norm": 7.809080905977138, "learning_rate": 1.1964956195244055e-05, "loss": 0.5299, "step": 1435 }, { "epoch": 0.006008461917200057, "grad_norm": 7.246543170625769, "learning_rate": 1.2006675010429704e-05, "loss": 0.4899, "step": 1440 }, { "epoch": 0.006029324632190335, "grad_norm": 10.112563977228916, "learning_rate": 1.2048393825615354e-05, "loss": 0.4485, "step": 1445 }, { "epoch": 0.006050187347180613, "grad_norm": 10.767392283275608, "learning_rate": 1.2090112640801002e-05, "loss": 0.4783, "step": 1450 }, { "epoch": 0.0060710500621708905, "grad_norm": 8.051377136126348, "learning_rate": 1.2131831455986651e-05, "loss": 0.4569, "step": 1455 }, { "epoch": 0.006091912777161168, "grad_norm": 6.335745123293946, "learning_rate": 1.2173550271172299e-05, "loss": 0.4394, "step": 1460 }, { "epoch": 0.006112775492151446, "grad_norm": 9.37897836249294, "learning_rate": 1.2215269086357948e-05, "loss": 0.4314, "step": 1465 }, { "epoch": 0.006133638207141724, "grad_norm": 7.463010101022985, "learning_rate": 1.2256987901543598e-05, "loss": 0.4446, "step": 1470 }, { "epoch": 0.006154500922132003, "grad_norm": 9.64999349363609, "learning_rate": 1.2298706716729245e-05, "loss": 0.5054, "step": 1475 }, { "epoch": 0.006175363637122281, "grad_norm": 6.409168335434757, "learning_rate": 1.2340425531914895e-05, "loss": 0.4494, "step": 1480 }, { "epoch": 0.006196226352112559, "grad_norm": 8.001673037955868, "learning_rate": 1.2382144347100544e-05, "loss": 0.4482, "step": 1485 }, { "epoch": 0.006217089067102837, "grad_norm": 10.247620258829237, "learning_rate": 1.2423863162286192e-05, "loss": 0.5156, "step": 1490 }, { "epoch": 0.0062379517820931145, "grad_norm": 9.628791994036359, "learning_rate": 1.2465581977471841e-05, "loss": 0.4842, "step": 1495 }, { "epoch": 0.006258814497083392, "grad_norm": 7.490007554552558, "learning_rate": 1.2507300792657489e-05, "loss": 0.4355, "step": 1500 }, { "epoch": 0.00627967721207367, "grad_norm": 6.41707262665491, "learning_rate": 1.2549019607843138e-05, "loss": 0.4337, "step": 1505 }, { "epoch": 0.006300539927063948, "grad_norm": 7.336815290248884, "learning_rate": 1.2590738423028788e-05, "loss": 0.4623, "step": 1510 }, { "epoch": 0.006321402642054226, "grad_norm": 6.12281869382282, "learning_rate": 1.2632457238214436e-05, "loss": 0.3941, "step": 1515 }, { "epoch": 0.006342265357044504, "grad_norm": 7.574167753008698, "learning_rate": 1.2674176053400085e-05, "loss": 0.407, "step": 1520 }, { "epoch": 0.006363128072034783, "grad_norm": 8.094437720420002, "learning_rate": 1.2715894868585734e-05, "loss": 0.3836, "step": 1525 }, { "epoch": 0.0063839907870250606, "grad_norm": 6.915454251483842, "learning_rate": 1.2757613683771382e-05, "loss": 0.4161, "step": 1530 }, { "epoch": 0.0064048535020153384, "grad_norm": 14.916392499129106, "learning_rate": 1.2799332498957032e-05, "loss": 0.3721, "step": 1535 }, { "epoch": 0.006425716217005616, "grad_norm": 21.74765531103719, "learning_rate": 1.284105131414268e-05, "loss": 0.382, "step": 1540 }, { "epoch": 0.006446578931995894, "grad_norm": 8.073389330089551, "learning_rate": 1.2882770129328329e-05, "loss": 0.4221, "step": 1545 }, { "epoch": 0.006467441646986172, "grad_norm": 6.877397813419333, "learning_rate": 1.2924488944513978e-05, "loss": 0.3567, "step": 1550 }, { "epoch": 0.00648830436197645, "grad_norm": 7.11492872132604, "learning_rate": 1.2966207759699624e-05, "loss": 0.3777, "step": 1555 }, { "epoch": 0.006509167076966728, "grad_norm": 6.774763203399536, "learning_rate": 1.3007926574885275e-05, "loss": 0.4434, "step": 1560 }, { "epoch": 0.006530029791957006, "grad_norm": 7.110459095360717, "learning_rate": 1.3049645390070925e-05, "loss": 0.3926, "step": 1565 }, { "epoch": 0.0065508925069472845, "grad_norm": 6.835066664840512, "learning_rate": 1.309136420525657e-05, "loss": 0.4427, "step": 1570 }, { "epoch": 0.006571755221937562, "grad_norm": 6.992611427372712, "learning_rate": 1.3133083020442222e-05, "loss": 0.4427, "step": 1575 }, { "epoch": 0.00659261793692784, "grad_norm": 5.916921062916263, "learning_rate": 1.3174801835627868e-05, "loss": 0.37, "step": 1580 }, { "epoch": 0.006613480651918118, "grad_norm": 8.118731589371363, "learning_rate": 1.3216520650813517e-05, "loss": 0.3901, "step": 1585 }, { "epoch": 0.006634343366908396, "grad_norm": 8.034705373699206, "learning_rate": 1.3258239465999168e-05, "loss": 0.4062, "step": 1590 }, { "epoch": 0.006655206081898674, "grad_norm": 7.300006013424799, "learning_rate": 1.3299958281184814e-05, "loss": 0.3376, "step": 1595 }, { "epoch": 0.006676068796888952, "grad_norm": 8.067454652982054, "learning_rate": 1.3341677096370464e-05, "loss": 0.3729, "step": 1600 }, { "epoch": 0.00669693151187923, "grad_norm": 6.33792431213318, "learning_rate": 1.3383395911556112e-05, "loss": 0.3982, "step": 1605 }, { "epoch": 0.006717794226869508, "grad_norm": 6.234942768759629, "learning_rate": 1.3425114726741761e-05, "loss": 0.351, "step": 1610 }, { "epoch": 0.0067386569418597855, "grad_norm": 7.744604819710227, "learning_rate": 1.346683354192741e-05, "loss": 0.4587, "step": 1615 }, { "epoch": 0.006759519656850064, "grad_norm": 8.58699640350829, "learning_rate": 1.3508552357113058e-05, "loss": 0.3054, "step": 1620 }, { "epoch": 0.006780382371840342, "grad_norm": 6.118501044694093, "learning_rate": 1.3550271172298708e-05, "loss": 0.3615, "step": 1625 }, { "epoch": 0.00680124508683062, "grad_norm": 6.858576643159175, "learning_rate": 1.3591989987484357e-05, "loss": 0.3605, "step": 1630 }, { "epoch": 0.006822107801820898, "grad_norm": 9.331356613242894, "learning_rate": 1.3633708802670005e-05, "loss": 0.3496, "step": 1635 }, { "epoch": 0.006842970516811176, "grad_norm": 5.866656011095419, "learning_rate": 1.3675427617855654e-05, "loss": 0.3789, "step": 1640 }, { "epoch": 0.006863833231801454, "grad_norm": 5.389017909694335, "learning_rate": 1.3717146433041302e-05, "loss": 0.325, "step": 1645 }, { "epoch": 0.0068846959467917315, "grad_norm": 6.407740866225074, "learning_rate": 1.3758865248226951e-05, "loss": 0.3587, "step": 1650 }, { "epoch": 0.006905558661782009, "grad_norm": 5.151579967875349, "learning_rate": 1.38005840634126e-05, "loss": 0.3321, "step": 1655 }, { "epoch": 0.006926421376772287, "grad_norm": 6.79460244787683, "learning_rate": 1.3842302878598248e-05, "loss": 0.3414, "step": 1660 }, { "epoch": 0.006947284091762566, "grad_norm": 7.9575972022210095, "learning_rate": 1.3884021693783898e-05, "loss": 0.379, "step": 1665 }, { "epoch": 0.006968146806752844, "grad_norm": 5.672207015656775, "learning_rate": 1.3925740508969547e-05, "loss": 0.3175, "step": 1670 }, { "epoch": 0.006989009521743122, "grad_norm": 6.3859484435804506, "learning_rate": 1.3967459324155195e-05, "loss": 0.3774, "step": 1675 }, { "epoch": 0.0070098722367334, "grad_norm": 6.342472346210146, "learning_rate": 1.4009178139340844e-05, "loss": 0.3012, "step": 1680 }, { "epoch": 0.007030734951723678, "grad_norm": 5.704171324262254, "learning_rate": 1.4050896954526492e-05, "loss": 0.3367, "step": 1685 }, { "epoch": 0.0070515976667139555, "grad_norm": 6.111438660241081, "learning_rate": 1.4092615769712142e-05, "loss": 0.3347, "step": 1690 }, { "epoch": 0.007072460381704233, "grad_norm": 5.021856075989104, "learning_rate": 1.4134334584897791e-05, "loss": 0.3186, "step": 1695 }, { "epoch": 0.007093323096694511, "grad_norm": 7.01806406742777, "learning_rate": 1.4176053400083439e-05, "loss": 0.359, "step": 1700 }, { "epoch": 0.007114185811684789, "grad_norm": 6.4158625083820935, "learning_rate": 1.4217772215269088e-05, "loss": 0.3445, "step": 1705 }, { "epoch": 0.007135048526675067, "grad_norm": 11.711135373627677, "learning_rate": 1.4259491030454736e-05, "loss": 0.37, "step": 1710 }, { "epoch": 0.007155911241665346, "grad_norm": 6.278251061629517, "learning_rate": 1.4301209845640385e-05, "loss": 0.333, "step": 1715 }, { "epoch": 0.007176773956655624, "grad_norm": 6.254418015250174, "learning_rate": 1.4342928660826035e-05, "loss": 0.3139, "step": 1720 }, { "epoch": 0.0071976366716459015, "grad_norm": 6.2791612716159495, "learning_rate": 1.4384647476011682e-05, "loss": 0.3578, "step": 1725 }, { "epoch": 0.007218499386636179, "grad_norm": 4.947962138739219, "learning_rate": 1.4426366291197332e-05, "loss": 0.3277, "step": 1730 }, { "epoch": 0.007239362101626457, "grad_norm": 5.451366007674778, "learning_rate": 1.4468085106382981e-05, "loss": 0.363, "step": 1735 }, { "epoch": 0.007260224816616735, "grad_norm": 5.658918390912525, "learning_rate": 1.4509803921568629e-05, "loss": 0.3247, "step": 1740 }, { "epoch": 0.007281087531607013, "grad_norm": 7.428375866632875, "learning_rate": 1.4551522736754278e-05, "loss": 0.3458, "step": 1745 }, { "epoch": 0.007301950246597291, "grad_norm": 5.798910073379224, "learning_rate": 1.4593241551939924e-05, "loss": 0.3029, "step": 1750 }, { "epoch": 0.007322812961587569, "grad_norm": 7.075868855848005, "learning_rate": 1.4634960367125574e-05, "loss": 0.3058, "step": 1755 }, { "epoch": 0.007343675676577847, "grad_norm": 5.481300739258738, "learning_rate": 1.4676679182311225e-05, "loss": 0.3034, "step": 1760 }, { "epoch": 0.0073645383915681255, "grad_norm": 8.672658191537145, "learning_rate": 1.4718397997496871e-05, "loss": 0.3096, "step": 1765 }, { "epoch": 0.007385401106558403, "grad_norm": 6.884971023546957, "learning_rate": 1.476011681268252e-05, "loss": 0.3018, "step": 1770 }, { "epoch": 0.007406263821548681, "grad_norm": 6.171694641121566, "learning_rate": 1.4801835627868172e-05, "loss": 0.2964, "step": 1775 }, { "epoch": 0.007427126536538959, "grad_norm": 5.531696934609981, "learning_rate": 1.4843554443053818e-05, "loss": 0.2757, "step": 1780 }, { "epoch": 0.007447989251529237, "grad_norm": 4.955307222467459, "learning_rate": 1.4885273258239467e-05, "loss": 0.2998, "step": 1785 }, { "epoch": 0.007468851966519515, "grad_norm": 7.002411405773632, "learning_rate": 1.4926992073425115e-05, "loss": 0.3541, "step": 1790 }, { "epoch": 0.007489714681509793, "grad_norm": 5.046259094245935, "learning_rate": 1.4968710888610764e-05, "loss": 0.2881, "step": 1795 }, { "epoch": 0.007510577396500071, "grad_norm": 11.939782957516373, "learning_rate": 1.5010429703796414e-05, "loss": 0.3422, "step": 1800 }, { "epoch": 0.0075314401114903486, "grad_norm": 7.052808017240188, "learning_rate": 1.5052148518982061e-05, "loss": 0.3222, "step": 1805 }, { "epoch": 0.007552302826480627, "grad_norm": 5.628058486523806, "learning_rate": 1.509386733416771e-05, "loss": 0.3154, "step": 1810 }, { "epoch": 0.007573165541470905, "grad_norm": 5.684254521611413, "learning_rate": 1.513558614935336e-05, "loss": 0.3072, "step": 1815 }, { "epoch": 0.007594028256461183, "grad_norm": 5.025408544056291, "learning_rate": 1.5177304964539008e-05, "loss": 0.3075, "step": 1820 }, { "epoch": 0.007614890971451461, "grad_norm": 5.3318343688941265, "learning_rate": 1.5219023779724657e-05, "loss": 0.3068, "step": 1825 }, { "epoch": 0.007635753686441739, "grad_norm": 5.092255035560314, "learning_rate": 1.5260742594910303e-05, "loss": 0.3228, "step": 1830 }, { "epoch": 0.007656616401432017, "grad_norm": 6.233646854985075, "learning_rate": 1.5302461410095956e-05, "loss": 0.2896, "step": 1835 }, { "epoch": 0.007677479116422295, "grad_norm": 5.709797721736024, "learning_rate": 1.5344180225281606e-05, "loss": 0.2858, "step": 1840 }, { "epoch": 0.0076983418314125725, "grad_norm": 5.725237124145164, "learning_rate": 1.538589904046725e-05, "loss": 0.3289, "step": 1845 }, { "epoch": 0.00771920454640285, "grad_norm": 5.104719493671645, "learning_rate": 1.54276178556529e-05, "loss": 0.2797, "step": 1850 }, { "epoch": 0.007740067261393128, "grad_norm": 5.597633971330549, "learning_rate": 1.5469336670838547e-05, "loss": 0.3115, "step": 1855 }, { "epoch": 0.007760929976383407, "grad_norm": 7.704105491111727, "learning_rate": 1.5511055486024196e-05, "loss": 0.3098, "step": 1860 }, { "epoch": 0.007781792691373685, "grad_norm": 5.154982980609121, "learning_rate": 1.5552774301209846e-05, "loss": 0.2718, "step": 1865 }, { "epoch": 0.007802655406363963, "grad_norm": 6.23774604205085, "learning_rate": 1.5594493116395495e-05, "loss": 0.2554, "step": 1870 }, { "epoch": 0.00782351812135424, "grad_norm": 7.147199247669823, "learning_rate": 1.5636211931581145e-05, "loss": 0.3016, "step": 1875 }, { "epoch": 0.007844380836344518, "grad_norm": 9.929712258035567, "learning_rate": 1.5677930746766794e-05, "loss": 0.3176, "step": 1880 }, { "epoch": 0.007865243551334796, "grad_norm": 5.198918208025414, "learning_rate": 1.571964956195244e-05, "loss": 0.3113, "step": 1885 }, { "epoch": 0.007886106266325075, "grad_norm": 4.790785844785767, "learning_rate": 1.576136837713809e-05, "loss": 0.2719, "step": 1890 }, { "epoch": 0.007906968981315352, "grad_norm": 8.54271944878354, "learning_rate": 1.580308719232374e-05, "loss": 0.2809, "step": 1895 }, { "epoch": 0.007927831696305631, "grad_norm": 6.923292229908575, "learning_rate": 1.584480600750939e-05, "loss": 0.2913, "step": 1900 }, { "epoch": 0.007948694411295908, "grad_norm": 5.353132691576838, "learning_rate": 1.5886524822695038e-05, "loss": 0.2774, "step": 1905 }, { "epoch": 0.007969557126286187, "grad_norm": 5.5358559042843325, "learning_rate": 1.5928243637880684e-05, "loss": 0.2733, "step": 1910 }, { "epoch": 0.007990419841276464, "grad_norm": 4.647128667336297, "learning_rate": 1.5969962453066333e-05, "loss": 0.3192, "step": 1915 }, { "epoch": 0.008011282556266743, "grad_norm": 5.623547573911648, "learning_rate": 1.6011681268251983e-05, "loss": 0.3084, "step": 1920 }, { "epoch": 0.00803214527125702, "grad_norm": 4.881524862793194, "learning_rate": 1.6053400083437632e-05, "loss": 0.2508, "step": 1925 }, { "epoch": 0.008053007986247298, "grad_norm": 5.620713130960062, "learning_rate": 1.609511889862328e-05, "loss": 0.2834, "step": 1930 }, { "epoch": 0.008073870701237577, "grad_norm": 13.703010101523416, "learning_rate": 1.6136837713808928e-05, "loss": 0.3216, "step": 1935 }, { "epoch": 0.008094733416227854, "grad_norm": 6.0503820883573765, "learning_rate": 1.6178556528994577e-05, "loss": 0.2866, "step": 1940 }, { "epoch": 0.008115596131218133, "grad_norm": 5.768962735098446, "learning_rate": 1.6220275344180226e-05, "loss": 0.2339, "step": 1945 }, { "epoch": 0.00813645884620841, "grad_norm": 5.598295265305774, "learning_rate": 1.6261994159365876e-05, "loss": 0.2527, "step": 1950 }, { "epoch": 0.008157321561198689, "grad_norm": 6.767484582797481, "learning_rate": 1.6303712974551525e-05, "loss": 0.2681, "step": 1955 }, { "epoch": 0.008178184276188966, "grad_norm": 4.305233138898723, "learning_rate": 1.634543178973717e-05, "loss": 0.248, "step": 1960 }, { "epoch": 0.008199046991179244, "grad_norm": 8.520731415416993, "learning_rate": 1.638715060492282e-05, "loss": 0.3099, "step": 1965 }, { "epoch": 0.008219909706169521, "grad_norm": 4.778421328222519, "learning_rate": 1.642886942010847e-05, "loss": 0.3099, "step": 1970 }, { "epoch": 0.0082407724211598, "grad_norm": 4.866514464500968, "learning_rate": 1.647058823529412e-05, "loss": 0.2896, "step": 1975 }, { "epoch": 0.008261635136150079, "grad_norm": 4.668112033342819, "learning_rate": 1.651230705047977e-05, "loss": 0.2541, "step": 1980 }, { "epoch": 0.008282497851140356, "grad_norm": 5.67799501054019, "learning_rate": 1.655402586566542e-05, "loss": 0.268, "step": 1985 }, { "epoch": 0.008303360566130635, "grad_norm": 4.727330037265877, "learning_rate": 1.6595744680851064e-05, "loss": 0.257, "step": 1990 }, { "epoch": 0.008324223281120912, "grad_norm": 5.969971514078009, "learning_rate": 1.6637463496036714e-05, "loss": 0.2521, "step": 1995 }, { "epoch": 0.00834508599611119, "grad_norm": 7.525422933501349, "learning_rate": 1.6679182311222363e-05, "loss": 0.289, "step": 2000 }, { "epoch": 0.008365948711101467, "grad_norm": 4.792370600402943, "learning_rate": 1.6720901126408013e-05, "loss": 0.3298, "step": 2005 }, { "epoch": 0.008386811426091746, "grad_norm": 5.6190147699818525, "learning_rate": 1.6762619941593662e-05, "loss": 0.2621, "step": 2010 }, { "epoch": 0.008407674141082023, "grad_norm": 4.331373056964811, "learning_rate": 1.6804338756779308e-05, "loss": 0.2685, "step": 2015 }, { "epoch": 0.008428536856072302, "grad_norm": 4.085927261340408, "learning_rate": 1.6846057571964958e-05, "loss": 0.267, "step": 2020 }, { "epoch": 0.008449399571062579, "grad_norm": 4.48356773839831, "learning_rate": 1.6887776387150607e-05, "loss": 0.2635, "step": 2025 }, { "epoch": 0.008470262286052858, "grad_norm": 5.078785949538297, "learning_rate": 1.6929495202336253e-05, "loss": 0.2648, "step": 2030 }, { "epoch": 0.008491125001043136, "grad_norm": 5.847653595385071, "learning_rate": 1.6971214017521906e-05, "loss": 0.2685, "step": 2035 }, { "epoch": 0.008511987716033413, "grad_norm": 4.039276119770126, "learning_rate": 1.7012932832707552e-05, "loss": 0.2844, "step": 2040 }, { "epoch": 0.008532850431023692, "grad_norm": 6.247806611170299, "learning_rate": 1.70546516478932e-05, "loss": 0.2873, "step": 2045 }, { "epoch": 0.00855371314601397, "grad_norm": 5.329058482964378, "learning_rate": 1.709637046307885e-05, "loss": 0.2527, "step": 2050 }, { "epoch": 0.008574575861004248, "grad_norm": 4.562233293547116, "learning_rate": 1.7138089278264497e-05, "loss": 0.2854, "step": 2055 }, { "epoch": 0.008595438575994525, "grad_norm": 4.566864467100608, "learning_rate": 1.7179808093450146e-05, "loss": 0.3123, "step": 2060 }, { "epoch": 0.008616301290984804, "grad_norm": 5.280577632785557, "learning_rate": 1.7221526908635796e-05, "loss": 0.3059, "step": 2065 }, { "epoch": 0.00863716400597508, "grad_norm": 6.462182879993629, "learning_rate": 1.7263245723821445e-05, "loss": 0.3152, "step": 2070 }, { "epoch": 0.00865802672096536, "grad_norm": 4.670879018728968, "learning_rate": 1.7304964539007094e-05, "loss": 0.2628, "step": 2075 }, { "epoch": 0.008678889435955638, "grad_norm": 5.998825635218914, "learning_rate": 1.734668335419274e-05, "loss": 0.2466, "step": 2080 }, { "epoch": 0.008699752150945915, "grad_norm": 5.25801377345867, "learning_rate": 1.738840216937839e-05, "loss": 0.2598, "step": 2085 }, { "epoch": 0.008720614865936194, "grad_norm": 5.552089986017591, "learning_rate": 1.743012098456404e-05, "loss": 0.2786, "step": 2090 }, { "epoch": 0.008741477580926471, "grad_norm": 3.90707486268274, "learning_rate": 1.747183979974969e-05, "loss": 0.2612, "step": 2095 }, { "epoch": 0.00876234029591675, "grad_norm": 6.678495264649931, "learning_rate": 1.7513558614935338e-05, "loss": 0.2789, "step": 2100 }, { "epoch": 0.008783203010907027, "grad_norm": 5.275353088704, "learning_rate": 1.7555277430120984e-05, "loss": 0.285, "step": 2105 }, { "epoch": 0.008804065725897306, "grad_norm": 9.63766219074865, "learning_rate": 1.7596996245306634e-05, "loss": 0.2427, "step": 2110 }, { "epoch": 0.008824928440887583, "grad_norm": 4.094934552766602, "learning_rate": 1.7638715060492283e-05, "loss": 0.2384, "step": 2115 }, { "epoch": 0.008845791155877861, "grad_norm": 4.912162035403142, "learning_rate": 1.7680433875677932e-05, "loss": 0.2645, "step": 2120 }, { "epoch": 0.00886665387086814, "grad_norm": 8.452811804652779, "learning_rate": 1.7722152690863582e-05, "loss": 0.2543, "step": 2125 }, { "epoch": 0.008887516585858417, "grad_norm": 4.435350574536671, "learning_rate": 1.776387150604923e-05, "loss": 0.2809, "step": 2130 }, { "epoch": 0.008908379300848696, "grad_norm": 6.637336250737818, "learning_rate": 1.7805590321234877e-05, "loss": 0.2632, "step": 2135 }, { "epoch": 0.008929242015838973, "grad_norm": 5.209180893956191, "learning_rate": 1.7847309136420527e-05, "loss": 0.2696, "step": 2140 }, { "epoch": 0.008950104730829252, "grad_norm": 6.784013857233379, "learning_rate": 1.7889027951606176e-05, "loss": 0.2931, "step": 2145 }, { "epoch": 0.008970967445819529, "grad_norm": 4.904656573568872, "learning_rate": 1.7930746766791826e-05, "loss": 0.2767, "step": 2150 }, { "epoch": 0.008991830160809807, "grad_norm": 4.725576898568431, "learning_rate": 1.7972465581977475e-05, "loss": 0.2413, "step": 2155 }, { "epoch": 0.009012692875800084, "grad_norm": 5.121474476973913, "learning_rate": 1.801418439716312e-05, "loss": 0.2695, "step": 2160 }, { "epoch": 0.009033555590790363, "grad_norm": 5.32592010276937, "learning_rate": 1.805590321234877e-05, "loss": 0.3189, "step": 2165 }, { "epoch": 0.00905441830578064, "grad_norm": 5.612735232007547, "learning_rate": 1.809762202753442e-05, "loss": 0.201, "step": 2170 }, { "epoch": 0.009075281020770919, "grad_norm": 5.041036097551216, "learning_rate": 1.813934084272007e-05, "loss": 0.2407, "step": 2175 }, { "epoch": 0.009096143735761198, "grad_norm": 7.24700059633963, "learning_rate": 1.818105965790572e-05, "loss": 0.2527, "step": 2180 }, { "epoch": 0.009117006450751475, "grad_norm": 4.519171767418444, "learning_rate": 1.8222778473091365e-05, "loss": 0.2708, "step": 2185 }, { "epoch": 0.009137869165741754, "grad_norm": 6.872840806539324, "learning_rate": 1.8264497288277014e-05, "loss": 0.2594, "step": 2190 }, { "epoch": 0.00915873188073203, "grad_norm": 6.784708622042111, "learning_rate": 1.8306216103462664e-05, "loss": 0.2535, "step": 2195 }, { "epoch": 0.00917959459572231, "grad_norm": 4.896011609914181, "learning_rate": 1.834793491864831e-05, "loss": 0.241, "step": 2200 }, { "epoch": 0.009200457310712586, "grad_norm": 6.595358738175565, "learning_rate": 1.8389653733833962e-05, "loss": 0.222, "step": 2205 }, { "epoch": 0.009221320025702865, "grad_norm": 4.187341521534567, "learning_rate": 1.843137254901961e-05, "loss": 0.2345, "step": 2210 }, { "epoch": 0.009242182740693142, "grad_norm": 5.4966896999644765, "learning_rate": 1.8473091364205258e-05, "loss": 0.2599, "step": 2215 }, { "epoch": 0.00926304545568342, "grad_norm": 5.965075475232657, "learning_rate": 1.8514810179390907e-05, "loss": 0.2309, "step": 2220 }, { "epoch": 0.0092839081706737, "grad_norm": 4.046014374322984, "learning_rate": 1.8556528994576553e-05, "loss": 0.2394, "step": 2225 }, { "epoch": 0.009304770885663977, "grad_norm": 4.187548863587732, "learning_rate": 1.8598247809762203e-05, "loss": 0.2623, "step": 2230 }, { "epoch": 0.009325633600654255, "grad_norm": 5.888268246160415, "learning_rate": 1.8639966624947856e-05, "loss": 0.2008, "step": 2235 }, { "epoch": 0.009346496315644532, "grad_norm": 7.394345586944625, "learning_rate": 1.86816854401335e-05, "loss": 0.274, "step": 2240 }, { "epoch": 0.009367359030634811, "grad_norm": 4.673492701869096, "learning_rate": 1.872340425531915e-05, "loss": 0.2723, "step": 2245 }, { "epoch": 0.009388221745625088, "grad_norm": 5.120913752721386, "learning_rate": 1.8765123070504797e-05, "loss": 0.2825, "step": 2250 }, { "epoch": 0.009409084460615367, "grad_norm": 6.762795443255908, "learning_rate": 1.8806841885690446e-05, "loss": 0.2399, "step": 2255 }, { "epoch": 0.009429947175605644, "grad_norm": 4.228557866912384, "learning_rate": 1.8848560700876096e-05, "loss": 0.2229, "step": 2260 }, { "epoch": 0.009450809890595923, "grad_norm": 3.6120408913926245, "learning_rate": 1.8890279516061745e-05, "loss": 0.2323, "step": 2265 }, { "epoch": 0.009471672605586201, "grad_norm": 8.832817130342088, "learning_rate": 1.8931998331247395e-05, "loss": 0.2449, "step": 2270 }, { "epoch": 0.009492535320576478, "grad_norm": 4.562965233899825, "learning_rate": 1.8973717146433044e-05, "loss": 0.2622, "step": 2275 }, { "epoch": 0.009513398035566757, "grad_norm": 4.195203128205197, "learning_rate": 1.901543596161869e-05, "loss": 0.237, "step": 2280 }, { "epoch": 0.009534260750557034, "grad_norm": 12.541022878346869, "learning_rate": 1.905715477680434e-05, "loss": 0.2429, "step": 2285 }, { "epoch": 0.009555123465547313, "grad_norm": 4.73869668155897, "learning_rate": 1.909887359198999e-05, "loss": 0.2563, "step": 2290 }, { "epoch": 0.00957598618053759, "grad_norm": 5.0821567277065025, "learning_rate": 1.914059240717564e-05, "loss": 0.2544, "step": 2295 }, { "epoch": 0.009596848895527869, "grad_norm": 7.035297396383643, "learning_rate": 1.9182311222361288e-05, "loss": 0.2335, "step": 2300 }, { "epoch": 0.009617711610518146, "grad_norm": 7.5490279884153475, "learning_rate": 1.9224030037546934e-05, "loss": 0.2544, "step": 2305 }, { "epoch": 0.009638574325508424, "grad_norm": 4.9103463166539685, "learning_rate": 1.9265748852732583e-05, "loss": 0.2351, "step": 2310 }, { "epoch": 0.009659437040498701, "grad_norm": 4.959987543980194, "learning_rate": 1.9307467667918233e-05, "loss": 0.2468, "step": 2315 }, { "epoch": 0.00968029975548898, "grad_norm": 6.726929370410437, "learning_rate": 1.9349186483103882e-05, "loss": 0.2422, "step": 2320 }, { "epoch": 0.009701162470479259, "grad_norm": 4.071665194735348, "learning_rate": 1.939090529828953e-05, "loss": 0.2366, "step": 2325 }, { "epoch": 0.009722025185469536, "grad_norm": 10.32733768131613, "learning_rate": 1.9432624113475178e-05, "loss": 0.244, "step": 2330 }, { "epoch": 0.009742887900459815, "grad_norm": 4.574392438006924, "learning_rate": 1.9474342928660827e-05, "loss": 0.255, "step": 2335 }, { "epoch": 0.009763750615450092, "grad_norm": 4.6348502005896846, "learning_rate": 1.9516061743846476e-05, "loss": 0.2718, "step": 2340 }, { "epoch": 0.00978461333044037, "grad_norm": 5.6083096685956155, "learning_rate": 1.9557780559032126e-05, "loss": 0.2454, "step": 2345 }, { "epoch": 0.009805476045430648, "grad_norm": 6.290115832139054, "learning_rate": 1.9599499374217775e-05, "loss": 0.2501, "step": 2350 }, { "epoch": 0.009826338760420926, "grad_norm": 5.265305466212621, "learning_rate": 1.964121818940342e-05, "loss": 0.2317, "step": 2355 }, { "epoch": 0.009847201475411203, "grad_norm": 4.07164840239995, "learning_rate": 1.968293700458907e-05, "loss": 0.2401, "step": 2360 }, { "epoch": 0.009868064190401482, "grad_norm": 3.8191256456424107, "learning_rate": 1.972465581977472e-05, "loss": 0.2478, "step": 2365 }, { "epoch": 0.00988892690539176, "grad_norm": 5.459413805808702, "learning_rate": 1.976637463496037e-05, "loss": 0.2428, "step": 2370 }, { "epoch": 0.009909789620382038, "grad_norm": 4.684259710535575, "learning_rate": 1.980809345014602e-05, "loss": 0.2296, "step": 2375 }, { "epoch": 0.009930652335372317, "grad_norm": 5.190127120062716, "learning_rate": 1.984981226533167e-05, "loss": 0.2566, "step": 2380 }, { "epoch": 0.009951515050362594, "grad_norm": 4.623246795595396, "learning_rate": 1.9891531080517314e-05, "loss": 0.2702, "step": 2385 }, { "epoch": 0.009972377765352872, "grad_norm": 4.469097213718733, "learning_rate": 1.9933249895702964e-05, "loss": 0.228, "step": 2390 }, { "epoch": 0.00999324048034315, "grad_norm": 5.0982891198416365, "learning_rate": 1.997496871088861e-05, "loss": 0.2415, "step": 2395 }, { "epoch": 0.010014103195333428, "grad_norm": 4.253152348925473, "learning_rate": 1.9991661454713646e-05, "loss": 0.2431, "step": 2400 }, { "epoch": 0.010034965910323705, "grad_norm": 3.55579290355745, "learning_rate": 1.997086063599737e-05, "loss": 0.2461, "step": 2405 }, { "epoch": 0.010055828625313984, "grad_norm": 4.318499021612224, "learning_rate": 1.9950124610640275e-05, "loss": 0.2641, "step": 2410 }, { "epoch": 0.010076691340304263, "grad_norm": 5.487960562565871, "learning_rate": 1.992945304296004e-05, "loss": 0.2625, "step": 2415 }, { "epoch": 0.01009755405529454, "grad_norm": 4.2344657809457, "learning_rate": 1.990884559970408e-05, "loss": 0.216, "step": 2420 }, { "epoch": 0.010118416770284818, "grad_norm": 4.642112086655064, "learning_rate": 1.9888301950026946e-05, "loss": 0.2425, "step": 2425 }, { "epoch": 0.010139279485275095, "grad_norm": 4.054573270763669, "learning_rate": 1.986782176546803e-05, "loss": 0.2209, "step": 2430 }, { "epoch": 0.010160142200265374, "grad_norm": 3.9728569188307588, "learning_rate": 1.9847404719929514e-05, "loss": 0.2093, "step": 2435 }, { "epoch": 0.010181004915255651, "grad_norm": 4.864873341630401, "learning_rate": 1.9827050489654563e-05, "loss": 0.2957, "step": 2440 }, { "epoch": 0.01020186763024593, "grad_norm": 4.048458941652993, "learning_rate": 1.9806758753205742e-05, "loss": 0.2201, "step": 2445 }, { "epoch": 0.010222730345236207, "grad_norm": 4.186358153969143, "learning_rate": 1.9786529191443724e-05, "loss": 0.1943, "step": 2450 }, { "epoch": 0.010243593060226486, "grad_norm": 5.231339609329628, "learning_rate": 1.976636148750619e-05, "loss": 0.2249, "step": 2455 }, { "epoch": 0.010264455775216763, "grad_norm": 4.954507580320985, "learning_rate": 1.9746255326786972e-05, "loss": 0.2266, "step": 2460 }, { "epoch": 0.010285318490207042, "grad_norm": 4.305289093490932, "learning_rate": 1.9726210396915487e-05, "loss": 0.2245, "step": 2465 }, { "epoch": 0.01030618120519732, "grad_norm": 4.670495732961256, "learning_rate": 1.97062263877363e-05, "loss": 0.2385, "step": 2470 }, { "epoch": 0.010327043920187597, "grad_norm": 13.608851267155199, "learning_rate": 1.9686302991289005e-05, "loss": 0.2474, "step": 2475 }, { "epoch": 0.010347906635177876, "grad_norm": 9.093905469818482, "learning_rate": 1.9666439901788262e-05, "loss": 0.2349, "step": 2480 }, { "epoch": 0.010368769350168153, "grad_norm": 6.574104444542784, "learning_rate": 1.9646636815604123e-05, "loss": 0.2068, "step": 2485 }, { "epoch": 0.010389632065158432, "grad_norm": 25.24553362675413, "learning_rate": 1.9626893431242503e-05, "loss": 0.2422, "step": 2490 }, { "epoch": 0.010410494780148709, "grad_norm": 4.550506074995248, "learning_rate": 1.9607209449325915e-05, "loss": 0.2448, "step": 2495 }, { "epoch": 0.010431357495138988, "grad_norm": 4.9333924474677415, "learning_rate": 1.9587584572574413e-05, "loss": 0.2332, "step": 2500 }, { "epoch": 0.010452220210129265, "grad_norm": 3.797445579613741, "learning_rate": 1.9568018505786723e-05, "loss": 0.1951, "step": 2505 }, { "epoch": 0.010473082925119543, "grad_norm": 4.984192069810902, "learning_rate": 1.9548510955821585e-05, "loss": 0.2505, "step": 2510 }, { "epoch": 0.010493945640109822, "grad_norm": 5.533706566166493, "learning_rate": 1.952906163157933e-05, "loss": 0.2197, "step": 2515 }, { "epoch": 0.010514808355100099, "grad_norm": 5.191724862928775, "learning_rate": 1.9509670243983617e-05, "loss": 0.2182, "step": 2520 }, { "epoch": 0.010535671070090378, "grad_norm": 4.0992785936787985, "learning_rate": 1.9490336505963395e-05, "loss": 0.239, "step": 2525 }, { "epoch": 0.010556533785080655, "grad_norm": 4.6403292040962425, "learning_rate": 1.947106013243505e-05, "loss": 0.21, "step": 2530 }, { "epoch": 0.010577396500070934, "grad_norm": 3.8228113029741775, "learning_rate": 1.9451840840284746e-05, "loss": 0.2237, "step": 2535 }, { "epoch": 0.01059825921506121, "grad_norm": 3.965429963538037, "learning_rate": 1.9432678348350974e-05, "loss": 0.2408, "step": 2540 }, { "epoch": 0.01061912193005149, "grad_norm": 3.348699708255833, "learning_rate": 1.941357237740726e-05, "loss": 0.2064, "step": 2545 }, { "epoch": 0.010639984645041766, "grad_norm": 4.924008723869925, "learning_rate": 1.9394522650145094e-05, "loss": 0.1998, "step": 2550 }, { "epoch": 0.010660847360032045, "grad_norm": 4.487360893713038, "learning_rate": 1.9375528891157002e-05, "loss": 0.2297, "step": 2555 }, { "epoch": 0.010681710075022324, "grad_norm": 4.628338686069118, "learning_rate": 1.9356590826919838e-05, "loss": 0.2219, "step": 2560 }, { "epoch": 0.010702572790012601, "grad_norm": 7.7008493336418375, "learning_rate": 1.933770818577825e-05, "loss": 0.2177, "step": 2565 }, { "epoch": 0.01072343550500288, "grad_norm": 3.7263704710493415, "learning_rate": 1.9318880697928292e-05, "loss": 0.2826, "step": 2570 }, { "epoch": 0.010744298219993157, "grad_norm": 4.484656383597405, "learning_rate": 1.9300108095401243e-05, "loss": 0.229, "step": 2575 }, { "epoch": 0.010765160934983435, "grad_norm": 6.380506452478398, "learning_rate": 1.9281390112047582e-05, "loss": 0.1949, "step": 2580 }, { "epoch": 0.010786023649973712, "grad_norm": 3.733290253588814, "learning_rate": 1.9262726483521157e-05, "loss": 0.2007, "step": 2585 }, { "epoch": 0.010806886364963991, "grad_norm": 4.029116572588037, "learning_rate": 1.9244116947263486e-05, "loss": 0.1994, "step": 2590 }, { "epoch": 0.010827749079954268, "grad_norm": 3.789366675457456, "learning_rate": 1.9225561242488244e-05, "loss": 0.2341, "step": 2595 }, { "epoch": 0.010848611794944547, "grad_norm": 5.447679085212109, "learning_rate": 1.9207059110165934e-05, "loss": 0.2251, "step": 2600 }, { "epoch": 0.010869474509934824, "grad_norm": 6.077827273175305, "learning_rate": 1.918861029300868e-05, "loss": 0.2077, "step": 2605 }, { "epoch": 0.010890337224925103, "grad_norm": 4.414599175664792, "learning_rate": 1.917021453545519e-05, "loss": 0.2217, "step": 2610 }, { "epoch": 0.010911199939915382, "grad_norm": 4.8108362589969245, "learning_rate": 1.9151871583655936e-05, "loss": 0.2205, "step": 2615 }, { "epoch": 0.010932062654905659, "grad_norm": 7.472203526709092, "learning_rate": 1.913358118545838e-05, "loss": 0.2434, "step": 2620 }, { "epoch": 0.010952925369895937, "grad_norm": 4.350680715843388, "learning_rate": 1.911534309039246e-05, "loss": 0.1968, "step": 2625 }, { "epoch": 0.010973788084886214, "grad_norm": 5.906064551742272, "learning_rate": 1.9097157049656154e-05, "loss": 0.2248, "step": 2630 }, { "epoch": 0.010994650799876493, "grad_norm": 4.029693790976357, "learning_rate": 1.907902281610125e-05, "loss": 0.2035, "step": 2635 }, { "epoch": 0.01101551351486677, "grad_norm": 3.9344166464570365, "learning_rate": 1.906094014421923e-05, "loss": 0.1849, "step": 2640 }, { "epoch": 0.011036376229857049, "grad_norm": 4.056848853378116, "learning_rate": 1.9042908790127297e-05, "loss": 0.2518, "step": 2645 }, { "epoch": 0.011057238944847326, "grad_norm": 4.180986555309109, "learning_rate": 1.9024928511554576e-05, "loss": 0.2433, "step": 2650 }, { "epoch": 0.011078101659837605, "grad_norm": 5.007443209157643, "learning_rate": 1.900699906782844e-05, "loss": 0.2404, "step": 2655 }, { "epoch": 0.011098964374827883, "grad_norm": 4.611923730817439, "learning_rate": 1.8989120219860966e-05, "loss": 0.2051, "step": 2660 }, { "epoch": 0.01111982708981816, "grad_norm": 4.7708781804524, "learning_rate": 1.8971291730135564e-05, "loss": 0.2065, "step": 2665 }, { "epoch": 0.01114068980480844, "grad_norm": 4.397803132589757, "learning_rate": 1.895351336269371e-05, "loss": 0.2295, "step": 2670 }, { "epoch": 0.011161552519798716, "grad_norm": 3.472271267063352, "learning_rate": 1.8935784883121835e-05, "loss": 0.244, "step": 2675 }, { "epoch": 0.011182415234788995, "grad_norm": 5.592727073570129, "learning_rate": 1.8918106058538346e-05, "loss": 0.2199, "step": 2680 }, { "epoch": 0.011203277949779272, "grad_norm": 4.134731688381304, "learning_rate": 1.8900476657580775e-05, "loss": 0.1925, "step": 2685 }, { "epoch": 0.01122414066476955, "grad_norm": 3.0520907611563475, "learning_rate": 1.8882896450393047e-05, "loss": 0.1739, "step": 2690 }, { "epoch": 0.011245003379759828, "grad_norm": 4.297422455939682, "learning_rate": 1.8865365208612927e-05, "loss": 0.1994, "step": 2695 }, { "epoch": 0.011265866094750106, "grad_norm": 4.127106356744211, "learning_rate": 1.8847882705359528e-05, "loss": 0.2222, "step": 2700 }, { "epoch": 0.011286728809740385, "grad_norm": 4.5132502601773545, "learning_rate": 1.8830448715221e-05, "loss": 0.2423, "step": 2705 }, { "epoch": 0.011307591524730662, "grad_norm": 3.673310019246116, "learning_rate": 1.8813063014242303e-05, "loss": 0.1962, "step": 2710 }, { "epoch": 0.011328454239720941, "grad_norm": 9.453817833020544, "learning_rate": 1.879572537991315e-05, "loss": 0.1913, "step": 2715 }, { "epoch": 0.011349316954711218, "grad_norm": 4.004948727398111, "learning_rate": 1.877843559115603e-05, "loss": 0.2031, "step": 2720 }, { "epoch": 0.011370179669701497, "grad_norm": 3.871779506838119, "learning_rate": 1.876119342831435e-05, "loss": 0.2571, "step": 2725 }, { "epoch": 0.011391042384691774, "grad_norm": 4.181340017998712, "learning_rate": 1.8743998673140766e-05, "loss": 0.2165, "step": 2730 }, { "epoch": 0.011411905099682053, "grad_norm": 3.5551082012383084, "learning_rate": 1.8726851108785534e-05, "loss": 0.2041, "step": 2735 }, { "epoch": 0.01143276781467233, "grad_norm": 6.973013690258297, "learning_rate": 1.8709750519785022e-05, "loss": 0.2106, "step": 2740 }, { "epoch": 0.011453630529662608, "grad_norm": 4.62458758066895, "learning_rate": 1.8692696692050384e-05, "loss": 0.2315, "step": 2745 }, { "epoch": 0.011474493244652885, "grad_norm": 3.8758232410676974, "learning_rate": 1.8675689412856254e-05, "loss": 0.2476, "step": 2750 }, { "epoch": 0.011495355959643164, "grad_norm": 3.943190516235696, "learning_rate": 1.8658728470829628e-05, "loss": 0.2382, "step": 2755 }, { "epoch": 0.011516218674633443, "grad_norm": 2.9083100534646023, "learning_rate": 1.864181365593883e-05, "loss": 0.173, "step": 2760 }, { "epoch": 0.01153708138962372, "grad_norm": 5.487226091408963, "learning_rate": 1.8624944759482567e-05, "loss": 0.2001, "step": 2765 }, { "epoch": 0.011557944104613999, "grad_norm": 3.460337096151453, "learning_rate": 1.8608121574079145e-05, "loss": 0.1965, "step": 2770 }, { "epoch": 0.011578806819604276, "grad_norm": 3.322191736766515, "learning_rate": 1.8591343893655728e-05, "loss": 0.1854, "step": 2775 }, { "epoch": 0.011599669534594554, "grad_norm": 3.7885370403222085, "learning_rate": 1.8574611513437773e-05, "loss": 0.2079, "step": 2780 }, { "epoch": 0.011620532249584831, "grad_norm": 7.436075433375114, "learning_rate": 1.8557924229938487e-05, "loss": 0.1941, "step": 2785 }, { "epoch": 0.01164139496457511, "grad_norm": 4.1043291380771, "learning_rate": 1.854128184094848e-05, "loss": 0.192, "step": 2790 }, { "epoch": 0.011662257679565387, "grad_norm": 4.422416599952308, "learning_rate": 1.8524684145525446e-05, "loss": 0.1765, "step": 2795 }, { "epoch": 0.011683120394555666, "grad_norm": 4.419206654095447, "learning_rate": 1.8508130943983974e-05, "loss": 0.206, "step": 2800 }, { "epoch": 0.011703983109545945, "grad_norm": 4.488044123396681, "learning_rate": 1.8491622037885467e-05, "loss": 0.2617, "step": 2805 }, { "epoch": 0.011724845824536222, "grad_norm": 3.23763656124399, "learning_rate": 1.847515723002815e-05, "loss": 0.2333, "step": 2810 }, { "epoch": 0.0117457085395265, "grad_norm": 5.000349168982164, "learning_rate": 1.8458736324437165e-05, "loss": 0.1979, "step": 2815 }, { "epoch": 0.011766571254516777, "grad_norm": 2.979031623173272, "learning_rate": 1.8442359126354787e-05, "loss": 0.1706, "step": 2820 }, { "epoch": 0.011787433969507056, "grad_norm": 5.354820556299419, "learning_rate": 1.842602544223072e-05, "loss": 0.2055, "step": 2825 }, { "epoch": 0.011808296684497333, "grad_norm": 3.2884969129515333, "learning_rate": 1.8409735079712482e-05, "loss": 0.1851, "step": 2830 }, { "epoch": 0.011829159399487612, "grad_norm": 3.3214275570398164, "learning_rate": 1.8393487847635893e-05, "loss": 0.2048, "step": 2835 }, { "epoch": 0.011850022114477889, "grad_norm": 2.4838297422949345, "learning_rate": 1.837728355601566e-05, "loss": 0.173, "step": 2840 }, { "epoch": 0.011870884829468168, "grad_norm": 3.2893446450081485, "learning_rate": 1.836112201603605e-05, "loss": 0.1923, "step": 2845 }, { "epoch": 0.011891747544458446, "grad_norm": 4.141465654183864, "learning_rate": 1.834500304004163e-05, "loss": 0.1729, "step": 2850 }, { "epoch": 0.011912610259448723, "grad_norm": 4.013154705713155, "learning_rate": 1.8328926441528126e-05, "loss": 0.186, "step": 2855 }, { "epoch": 0.011933472974439002, "grad_norm": 4.080019015213254, "learning_rate": 1.8312892035133372e-05, "loss": 0.2014, "step": 2860 }, { "epoch": 0.01195433568942928, "grad_norm": 3.835068581859609, "learning_rate": 1.829689963662831e-05, "loss": 0.1828, "step": 2865 }, { "epoch": 0.011975198404419558, "grad_norm": 3.282545057300874, "learning_rate": 1.828094906290812e-05, "loss": 0.1819, "step": 2870 }, { "epoch": 0.011996061119409835, "grad_norm": 5.257051891842878, "learning_rate": 1.826504013198341e-05, "loss": 0.2087, "step": 2875 }, { "epoch": 0.012016923834400114, "grad_norm": 3.968464881208713, "learning_rate": 1.8249172662971476e-05, "loss": 0.2106, "step": 2880 }, { "epoch": 0.01203778654939039, "grad_norm": 3.762102267629204, "learning_rate": 1.82333464760877e-05, "loss": 0.2101, "step": 2885 }, { "epoch": 0.01205864926438067, "grad_norm": 2.7556874935081708, "learning_rate": 1.8217561392636952e-05, "loss": 0.1903, "step": 2890 }, { "epoch": 0.012079511979370947, "grad_norm": 3.807875478612738, "learning_rate": 1.820181723500516e-05, "loss": 0.1882, "step": 2895 }, { "epoch": 0.012100374694361225, "grad_norm": 2.998143085221011, "learning_rate": 1.8186113826650872e-05, "loss": 0.2156, "step": 2900 }, { "epoch": 0.012121237409351504, "grad_norm": 4.892071756202262, "learning_rate": 1.8170450992096987e-05, "loss": 0.1946, "step": 2905 }, { "epoch": 0.012142100124341781, "grad_norm": 3.461606271498584, "learning_rate": 1.8154828556922477e-05, "loss": 0.1881, "step": 2910 }, { "epoch": 0.01216296283933206, "grad_norm": 4.532043650985166, "learning_rate": 1.813924634775426e-05, "loss": 0.186, "step": 2915 }, { "epoch": 0.012183825554322337, "grad_norm": 5.314158077207962, "learning_rate": 1.8123704192259093e-05, "loss": 0.1571, "step": 2920 }, { "epoch": 0.012204688269312616, "grad_norm": 4.684729668561944, "learning_rate": 1.8108201919135597e-05, "loss": 0.1964, "step": 2925 }, { "epoch": 0.012225550984302893, "grad_norm": 5.120570374579885, "learning_rate": 1.8092739358106303e-05, "loss": 0.1942, "step": 2930 }, { "epoch": 0.012246413699293171, "grad_norm": 3.5609407678647207, "learning_rate": 1.80773163399098e-05, "loss": 0.181, "step": 2935 }, { "epoch": 0.012267276414283448, "grad_norm": 3.471961573639142, "learning_rate": 1.8061932696292963e-05, "loss": 0.1944, "step": 2940 }, { "epoch": 0.012288139129273727, "grad_norm": 4.16446496462489, "learning_rate": 1.804658826000325e-05, "loss": 0.2024, "step": 2945 }, { "epoch": 0.012309001844264006, "grad_norm": 3.446319251630635, "learning_rate": 1.803128286478102e-05, "loss": 0.2625, "step": 2950 }, { "epoch": 0.012329864559254283, "grad_norm": 3.414714202483766, "learning_rate": 1.801601634535204e-05, "loss": 0.2118, "step": 2955 }, { "epoch": 0.012350727274244562, "grad_norm": 7.295818106301903, "learning_rate": 1.800078853741992e-05, "loss": 0.1866, "step": 2960 }, { "epoch": 0.012371589989234839, "grad_norm": 4.140101533031159, "learning_rate": 1.7985599277658728e-05, "loss": 0.1615, "step": 2965 }, { "epoch": 0.012392452704225117, "grad_norm": 3.8579792976072187, "learning_rate": 1.797044840370562e-05, "loss": 0.1878, "step": 2970 }, { "epoch": 0.012413315419215394, "grad_norm": 3.3942148739347915, "learning_rate": 1.7955335754153548e-05, "loss": 0.1736, "step": 2975 }, { "epoch": 0.012434178134205673, "grad_norm": 5.682336012400295, "learning_rate": 1.7940261168544037e-05, "loss": 0.2207, "step": 2980 }, { "epoch": 0.01245504084919595, "grad_norm": 2.5337644118712754, "learning_rate": 1.7925224487360042e-05, "loss": 0.2058, "step": 2985 }, { "epoch": 0.012475903564186229, "grad_norm": 3.730621426548873, "learning_rate": 1.7910225552018844e-05, "loss": 0.1876, "step": 2990 }, { "epoch": 0.012496766279176508, "grad_norm": 3.55658016462136, "learning_rate": 1.7895264204865038e-05, "loss": 0.1964, "step": 2995 }, { "epoch": 0.012517628994166785, "grad_norm": 5.268531344228882, "learning_rate": 1.7880340289163555e-05, "loss": 0.1867, "step": 3000 }, { "epoch": 0.012538491709157064, "grad_norm": 2.9026364241343545, "learning_rate": 1.7865453649092788e-05, "loss": 0.1986, "step": 3005 }, { "epoch": 0.01255935442414734, "grad_norm": 3.2508488329555374, "learning_rate": 1.7850604129737745e-05, "loss": 0.1779, "step": 3010 }, { "epoch": 0.01258021713913762, "grad_norm": 4.8451096162895455, "learning_rate": 1.783579157708327e-05, "loss": 0.1945, "step": 3015 }, { "epoch": 0.012601079854127896, "grad_norm": 3.7005619377114525, "learning_rate": 1.782101583800737e-05, "loss": 0.1841, "step": 3020 }, { "epoch": 0.012621942569118175, "grad_norm": 2.633382104150294, "learning_rate": 1.7806276760274514e-05, "loss": 0.1691, "step": 3025 }, { "epoch": 0.012642805284108452, "grad_norm": 3.498966932315229, "learning_rate": 1.779157419252908e-05, "loss": 0.1705, "step": 3030 }, { "epoch": 0.01266366799909873, "grad_norm": 3.2999459165650453, "learning_rate": 1.7776907984288838e-05, "loss": 0.1645, "step": 3035 }, { "epoch": 0.012684530714089008, "grad_norm": 6.412480145094707, "learning_rate": 1.7762277985938437e-05, "loss": 0.1861, "step": 3040 }, { "epoch": 0.012705393429079287, "grad_norm": 4.567049179188777, "learning_rate": 1.7747684048723045e-05, "loss": 0.159, "step": 3045 }, { "epoch": 0.012726256144069565, "grad_norm": 4.204377342096934, "learning_rate": 1.7733126024741947e-05, "loss": 0.208, "step": 3050 }, { "epoch": 0.012747118859059842, "grad_norm": 3.405476703242242, "learning_rate": 1.77186037669423e-05, "loss": 0.1797, "step": 3055 }, { "epoch": 0.012767981574050121, "grad_norm": 3.9806910387486276, "learning_rate": 1.7704117129112845e-05, "loss": 0.1391, "step": 3060 }, { "epoch": 0.012788844289040398, "grad_norm": 10.079912810546594, "learning_rate": 1.768966596587777e-05, "loss": 0.185, "step": 3065 }, { "epoch": 0.012809707004030677, "grad_norm": 3.2844497096136855, "learning_rate": 1.7675250132690544e-05, "loss": 0.2026, "step": 3070 }, { "epoch": 0.012830569719020954, "grad_norm": 2.7957136884089695, "learning_rate": 1.7660869485827864e-05, "loss": 0.1593, "step": 3075 }, { "epoch": 0.012851432434011233, "grad_norm": 3.912043118794249, "learning_rate": 1.7646523882383644e-05, "loss": 0.1671, "step": 3080 }, { "epoch": 0.01287229514900151, "grad_norm": 3.2601719826021074, "learning_rate": 1.763221318026303e-05, "loss": 0.1989, "step": 3085 }, { "epoch": 0.012893157863991788, "grad_norm": 3.082198816608572, "learning_rate": 1.7617937238176507e-05, "loss": 0.1663, "step": 3090 }, { "epoch": 0.012914020578982067, "grad_norm": 2.7867475507256736, "learning_rate": 1.760369591563402e-05, "loss": 0.1762, "step": 3095 }, { "epoch": 0.012934883293972344, "grad_norm": 5.745637836513582, "learning_rate": 1.7589489072939198e-05, "loss": 0.177, "step": 3100 }, { "epoch": 0.012955746008962623, "grad_norm": 3.3450658418862833, "learning_rate": 1.7575316571183567e-05, "loss": 0.1897, "step": 3105 }, { "epoch": 0.0129766087239529, "grad_norm": 4.012829117299606, "learning_rate": 1.7561178272240855e-05, "loss": 0.16, "step": 3110 }, { "epoch": 0.012997471438943179, "grad_norm": 3.449963080330154, "learning_rate": 1.7547074038761355e-05, "loss": 0.1864, "step": 3115 }, { "epoch": 0.013018334153933456, "grad_norm": 3.053863947348516, "learning_rate": 1.7533003734166293e-05, "loss": 0.1926, "step": 3120 }, { "epoch": 0.013039196868923734, "grad_norm": 3.91940451960808, "learning_rate": 1.7518967222642304e-05, "loss": 0.1403, "step": 3125 }, { "epoch": 0.013060059583914011, "grad_norm": 2.8362283244818474, "learning_rate": 1.7504964369135892e-05, "loss": 0.1466, "step": 3130 }, { "epoch": 0.01308092229890429, "grad_norm": 5.298103931788811, "learning_rate": 1.7490995039348006e-05, "loss": 0.1647, "step": 3135 }, { "epoch": 0.013101785013894569, "grad_norm": 3.37167946967171, "learning_rate": 1.74770590997286e-05, "loss": 0.1582, "step": 3140 }, { "epoch": 0.013122647728884846, "grad_norm": 2.9312622265456247, "learning_rate": 1.746315641747129e-05, "loss": 0.1763, "step": 3145 }, { "epoch": 0.013143510443875125, "grad_norm": 4.361252673806539, "learning_rate": 1.7449286860508043e-05, "loss": 0.1738, "step": 3150 }, { "epoch": 0.013164373158865402, "grad_norm": 7.781487871622051, "learning_rate": 1.7435450297503876e-05, "loss": 0.1635, "step": 3155 }, { "epoch": 0.01318523587385568, "grad_norm": 5.3762512012253865, "learning_rate": 1.742164659785168e-05, "loss": 0.1532, "step": 3160 }, { "epoch": 0.013206098588845958, "grad_norm": 3.14062856834372, "learning_rate": 1.740787563166699e-05, "loss": 0.1858, "step": 3165 }, { "epoch": 0.013226961303836236, "grad_norm": 3.3125641683564853, "learning_rate": 1.7394137269782902e-05, "loss": 0.1768, "step": 3170 }, { "epoch": 0.013247824018826513, "grad_norm": 3.2849676567741626, "learning_rate": 1.7380431383744945e-05, "loss": 0.1602, "step": 3175 }, { "epoch": 0.013268686733816792, "grad_norm": 3.8815237868163885, "learning_rate": 1.736675784580606e-05, "loss": 0.1735, "step": 3180 }, { "epoch": 0.013289549448807069, "grad_norm": 3.308463331690801, "learning_rate": 1.735311652892159e-05, "loss": 0.1514, "step": 3185 }, { "epoch": 0.013310412163797348, "grad_norm": 3.8533183689442208, "learning_rate": 1.7339507306744327e-05, "loss": 0.1737, "step": 3190 }, { "epoch": 0.013331274878787627, "grad_norm": 3.4696037724664035, "learning_rate": 1.7325930053619576e-05, "loss": 0.1817, "step": 3195 }, { "epoch": 0.013352137593777904, "grad_norm": 3.420440847921789, "learning_rate": 1.7312384644580326e-05, "loss": 0.1639, "step": 3200 }, { "epoch": 0.013373000308768182, "grad_norm": 2.767049276526537, "learning_rate": 1.729887095534238e-05, "loss": 0.1604, "step": 3205 }, { "epoch": 0.01339386302375846, "grad_norm": 2.5955658985460786, "learning_rate": 1.7285388862299567e-05, "loss": 0.1702, "step": 3210 }, { "epoch": 0.013414725738748738, "grad_norm": 3.0715120425399935, "learning_rate": 1.727193824251903e-05, "loss": 0.1635, "step": 3215 }, { "epoch": 0.013435588453739015, "grad_norm": 4.642266255554973, "learning_rate": 1.725851897373648e-05, "loss": 0.1544, "step": 3220 }, { "epoch": 0.013456451168729294, "grad_norm": 2.751605867383773, "learning_rate": 1.7245130934351528e-05, "loss": 0.1498, "step": 3225 }, { "epoch": 0.013477313883719571, "grad_norm": 3.5518720975104516, "learning_rate": 1.7231774003423096e-05, "loss": 0.1779, "step": 3230 }, { "epoch": 0.01349817659870985, "grad_norm": 3.1592049175170573, "learning_rate": 1.721844806066478e-05, "loss": 0.1716, "step": 3235 }, { "epoch": 0.013519039313700128, "grad_norm": 2.8683099867572874, "learning_rate": 1.7205152986440338e-05, "loss": 0.1562, "step": 3240 }, { "epoch": 0.013539902028690405, "grad_norm": 5.032380339478845, "learning_rate": 1.7191888661759158e-05, "loss": 0.199, "step": 3245 }, { "epoch": 0.013560764743680684, "grad_norm": 3.7011860102304706, "learning_rate": 1.7178654968271788e-05, "loss": 0.2064, "step": 3250 }, { "epoch": 0.013581627458670961, "grad_norm": 3.759957521233128, "learning_rate": 1.7165451788265503e-05, "loss": 0.1611, "step": 3255 }, { "epoch": 0.01360249017366124, "grad_norm": 2.374660889621415, "learning_rate": 1.7152279004659922e-05, "loss": 0.1596, "step": 3260 }, { "epoch": 0.013623352888651517, "grad_norm": 3.0030215925474537, "learning_rate": 1.7139136501002612e-05, "loss": 0.1869, "step": 3265 }, { "epoch": 0.013644215603641796, "grad_norm": 2.654554940037075, "learning_rate": 1.7126024161464806e-05, "loss": 0.1454, "step": 3270 }, { "epoch": 0.013665078318632073, "grad_norm": 3.7028568359446474, "learning_rate": 1.7112941870837076e-05, "loss": 0.1967, "step": 3275 }, { "epoch": 0.013685941033622352, "grad_norm": 2.9412980229867487, "learning_rate": 1.7099889514525124e-05, "loss": 0.1594, "step": 3280 }, { "epoch": 0.01370680374861263, "grad_norm": 3.3618622662310407, "learning_rate": 1.7086866978545516e-05, "loss": 0.1623, "step": 3285 }, { "epoch": 0.013727666463602907, "grad_norm": 2.751872423521605, "learning_rate": 1.707387414952155e-05, "loss": 0.1444, "step": 3290 }, { "epoch": 0.013748529178593186, "grad_norm": 11.546104073028845, "learning_rate": 1.706091091467908e-05, "loss": 0.1567, "step": 3295 }, { "epoch": 0.013769391893583463, "grad_norm": 3.518081303657424, "learning_rate": 1.7047977161842406e-05, "loss": 0.1541, "step": 3300 }, { "epoch": 0.013790254608573742, "grad_norm": 3.5744906997915993, "learning_rate": 1.7035072779430217e-05, "loss": 0.16, "step": 3305 }, { "epoch": 0.013811117323564019, "grad_norm": 3.0278668124554633, "learning_rate": 1.7022197656451526e-05, "loss": 0.1545, "step": 3310 }, { "epoch": 0.013831980038554298, "grad_norm": 2.7347370198584446, "learning_rate": 1.7009351682501672e-05, "loss": 0.1616, "step": 3315 }, { "epoch": 0.013852842753544575, "grad_norm": 2.85239423351377, "learning_rate": 1.699653474775834e-05, "loss": 0.1679, "step": 3320 }, { "epoch": 0.013873705468534853, "grad_norm": 4.668999646609489, "learning_rate": 1.6983746742977623e-05, "loss": 0.1697, "step": 3325 }, { "epoch": 0.013894568183525132, "grad_norm": 3.799172837197831, "learning_rate": 1.69709875594901e-05, "loss": 0.1572, "step": 3330 }, { "epoch": 0.013915430898515409, "grad_norm": 4.1953656610215, "learning_rate": 1.6958257089196973e-05, "loss": 0.1669, "step": 3335 }, { "epoch": 0.013936293613505688, "grad_norm": 4.0229099894557665, "learning_rate": 1.6945555224566204e-05, "loss": 0.2111, "step": 3340 }, { "epoch": 0.013957156328495965, "grad_norm": 1.958112598968676, "learning_rate": 1.693288185862872e-05, "loss": 0.1498, "step": 3345 }, { "epoch": 0.013978019043486244, "grad_norm": 3.415655648196216, "learning_rate": 1.69202368849746e-05, "loss": 0.1596, "step": 3350 }, { "epoch": 0.01399888175847652, "grad_norm": 5.223786522899317, "learning_rate": 1.6907620197749347e-05, "loss": 0.1496, "step": 3355 }, { "epoch": 0.0140197444734668, "grad_norm": 4.543985272546182, "learning_rate": 1.6895031691650172e-05, "loss": 0.1661, "step": 3360 }, { "epoch": 0.014040607188457076, "grad_norm": 4.0951291298674795, "learning_rate": 1.6882471261922273e-05, "loss": 0.1342, "step": 3365 }, { "epoch": 0.014061469903447355, "grad_norm": 3.154485376380193, "learning_rate": 1.6869938804355195e-05, "loss": 0.2003, "step": 3370 }, { "epoch": 0.014082332618437632, "grad_norm": 3.0077877659168446, "learning_rate": 1.6857434215279202e-05, "loss": 0.1756, "step": 3375 }, { "epoch": 0.014103195333427911, "grad_norm": 3.43778621572246, "learning_rate": 1.684495739156166e-05, "loss": 0.1951, "step": 3380 }, { "epoch": 0.01412405804841819, "grad_norm": 3.1974323400411637, "learning_rate": 1.6832508230603467e-05, "loss": 0.1397, "step": 3385 }, { "epoch": 0.014144920763408467, "grad_norm": 2.4986208047896485, "learning_rate": 1.6820086630335523e-05, "loss": 0.149, "step": 3390 }, { "epoch": 0.014165783478398745, "grad_norm": 3.4070924938832614, "learning_rate": 1.6807692489215205e-05, "loss": 0.171, "step": 3395 }, { "epoch": 0.014186646193389022, "grad_norm": 2.338397589980464, "learning_rate": 1.6795325706222888e-05, "loss": 0.1415, "step": 3400 }, { "epoch": 0.014207508908379301, "grad_norm": 2.6234608562123523, "learning_rate": 1.678298618085846e-05, "loss": 0.183, "step": 3405 }, { "epoch": 0.014228371623369578, "grad_norm": 2.9249143841614296, "learning_rate": 1.677067381313794e-05, "loss": 0.1351, "step": 3410 }, { "epoch": 0.014249234338359857, "grad_norm": 3.8836987572452557, "learning_rate": 1.6758388503590028e-05, "loss": 0.1819, "step": 3415 }, { "epoch": 0.014270097053350134, "grad_norm": 3.12158827976205, "learning_rate": 1.6746130153252757e-05, "loss": 0.1757, "step": 3420 }, { "epoch": 0.014290959768340413, "grad_norm": 3.971181801030303, "learning_rate": 1.6733898663670142e-05, "loss": 0.1656, "step": 3425 }, { "epoch": 0.014311822483330692, "grad_norm": 3.3803096036983487, "learning_rate": 1.672169393688885e-05, "loss": 0.1591, "step": 3430 }, { "epoch": 0.014332685198320969, "grad_norm": 3.305612521975449, "learning_rate": 1.6709515875454898e-05, "loss": 0.1524, "step": 3435 }, { "epoch": 0.014353547913311247, "grad_norm": 2.6247271103906633, "learning_rate": 1.669736438241042e-05, "loss": 0.1564, "step": 3440 }, { "epoch": 0.014374410628301524, "grad_norm": 6.8642497852804025, "learning_rate": 1.6685239361290388e-05, "loss": 0.2051, "step": 3445 }, { "epoch": 0.014395273343291803, "grad_norm": 2.7128084333016624, "learning_rate": 1.6673140716119405e-05, "loss": 0.1809, "step": 3450 }, { "epoch": 0.01441613605828208, "grad_norm": 2.7043094945790442, "learning_rate": 1.666106835140853e-05, "loss": 0.1876, "step": 3455 }, { "epoch": 0.014436998773272359, "grad_norm": 3.100442720711301, "learning_rate": 1.6649022172152087e-05, "loss": 0.1542, "step": 3460 }, { "epoch": 0.014457861488262636, "grad_norm": 3.3972445435273166, "learning_rate": 1.663700208382455e-05, "loss": 0.2055, "step": 3465 }, { "epoch": 0.014478724203252915, "grad_norm": 3.466447622658841, "learning_rate": 1.6625007992377414e-05, "loss": 0.1794, "step": 3470 }, { "epoch": 0.014499586918243193, "grad_norm": 2.5741199355796103, "learning_rate": 1.66130398042361e-05, "loss": 0.1485, "step": 3475 }, { "epoch": 0.01452044963323347, "grad_norm": 3.1779031594507274, "learning_rate": 1.66010974262969e-05, "loss": 0.168, "step": 3480 }, { "epoch": 0.014541312348223749, "grad_norm": 5.849385320016674, "learning_rate": 1.6589180765923934e-05, "loss": 0.1657, "step": 3485 }, { "epoch": 0.014562175063214026, "grad_norm": 2.270164374745932, "learning_rate": 1.657728973094612e-05, "loss": 0.1647, "step": 3490 }, { "epoch": 0.014583037778204305, "grad_norm": 3.327998491366539, "learning_rate": 1.6565424229654205e-05, "loss": 0.1708, "step": 3495 }, { "epoch": 0.014603900493194582, "grad_norm": 4.505538610609902, "learning_rate": 1.6553584170797746e-05, "loss": 0.1708, "step": 3500 }, { "epoch": 0.01462476320818486, "grad_norm": 3.0017059660487, "learning_rate": 1.6541769463582226e-05, "loss": 0.1398, "step": 3505 }, { "epoch": 0.014645625923175138, "grad_norm": 2.6532387065515985, "learning_rate": 1.6529980017666086e-05, "loss": 0.1512, "step": 3510 }, { "epoch": 0.014666488638165416, "grad_norm": 2.8418420117297516, "learning_rate": 1.651821574315783e-05, "loss": 0.173, "step": 3515 }, { "epoch": 0.014687351353155693, "grad_norm": 2.6359674021348174, "learning_rate": 1.650647655061316e-05, "loss": 0.1512, "step": 3520 }, { "epoch": 0.014708214068145972, "grad_norm": 3.5799179955391045, "learning_rate": 1.6494762351032103e-05, "loss": 0.1586, "step": 3525 }, { "epoch": 0.014729076783136251, "grad_norm": 3.015178498887363, "learning_rate": 1.64830730558562e-05, "loss": 0.1511, "step": 3530 }, { "epoch": 0.014749939498126528, "grad_norm": 2.2537877695254287, "learning_rate": 1.647140857696566e-05, "loss": 0.145, "step": 3535 }, { "epoch": 0.014770802213116807, "grad_norm": 3.450796283039053, "learning_rate": 1.6459768826676605e-05, "loss": 0.1772, "step": 3540 }, { "epoch": 0.014791664928107084, "grad_norm": 3.0557953890777467, "learning_rate": 1.644815371773828e-05, "loss": 0.1438, "step": 3545 }, { "epoch": 0.014812527643097363, "grad_norm": 5.882335919041203, "learning_rate": 1.6436563163330317e-05, "loss": 0.1644, "step": 3550 }, { "epoch": 0.01483339035808764, "grad_norm": 3.1524862938005804, "learning_rate": 1.6424997077059996e-05, "loss": 0.1627, "step": 3555 }, { "epoch": 0.014854253073077918, "grad_norm": 4.1103649059119824, "learning_rate": 1.6413455372959555e-05, "loss": 0.157, "step": 3560 }, { "epoch": 0.014875115788068195, "grad_norm": 2.7037704006339873, "learning_rate": 1.6401937965483503e-05, "loss": 0.1541, "step": 3565 }, { "epoch": 0.014895978503058474, "grad_norm": 4.293634687743757, "learning_rate": 1.6390444769505945e-05, "loss": 0.1519, "step": 3570 }, { "epoch": 0.014916841218048753, "grad_norm": 3.1007958515873937, "learning_rate": 1.637897570031796e-05, "loss": 0.1633, "step": 3575 }, { "epoch": 0.01493770393303903, "grad_norm": 2.612307230538456, "learning_rate": 1.636753067362495e-05, "loss": 0.1584, "step": 3580 }, { "epoch": 0.014958566648029309, "grad_norm": 3.341230385873685, "learning_rate": 1.6356109605544083e-05, "loss": 0.1657, "step": 3585 }, { "epoch": 0.014979429363019586, "grad_norm": 3.6703547820693103, "learning_rate": 1.6344712412601664e-05, "loss": 0.179, "step": 3590 }, { "epoch": 0.015000292078009864, "grad_norm": 2.641496871832317, "learning_rate": 1.633333901173058e-05, "loss": 0.2073, "step": 3595 }, { "epoch": 0.015021154793000141, "grad_norm": 2.570897615773774, "learning_rate": 1.632198932026781e-05, "loss": 0.1463, "step": 3600 }, { "epoch": 0.01504201750799042, "grad_norm": 2.3339622967202898, "learning_rate": 1.6310663255951817e-05, "loss": 0.1544, "step": 3605 }, { "epoch": 0.015062880222980697, "grad_norm": 3.933907211621365, "learning_rate": 1.6299360736920103e-05, "loss": 0.1526, "step": 3610 }, { "epoch": 0.015083742937970976, "grad_norm": 2.8172452233397896, "learning_rate": 1.6288081681706728e-05, "loss": 0.1458, "step": 3615 }, { "epoch": 0.015104605652961255, "grad_norm": 3.6682898143428875, "learning_rate": 1.6276826009239792e-05, "loss": 0.1597, "step": 3620 }, { "epoch": 0.015125468367951532, "grad_norm": 2.4186805118351278, "learning_rate": 1.6265593638839042e-05, "loss": 0.1614, "step": 3625 }, { "epoch": 0.01514633108294181, "grad_norm": 3.2752985673243136, "learning_rate": 1.625438449021341e-05, "loss": 0.1482, "step": 3630 }, { "epoch": 0.015167193797932087, "grad_norm": 3.491609361990696, "learning_rate": 1.6243198483458625e-05, "loss": 0.1668, "step": 3635 }, { "epoch": 0.015188056512922366, "grad_norm": 2.5966247730030423, "learning_rate": 1.623203553905479e-05, "loss": 0.1649, "step": 3640 }, { "epoch": 0.015208919227912643, "grad_norm": 3.077230548407308, "learning_rate": 1.6220895577864043e-05, "loss": 0.1475, "step": 3645 }, { "epoch": 0.015229781942902922, "grad_norm": 1.98923557211459, "learning_rate": 1.620977852112818e-05, "loss": 0.1707, "step": 3650 }, { "epoch": 0.015250644657893199, "grad_norm": 2.181669299629006, "learning_rate": 1.619868429046632e-05, "loss": 0.1441, "step": 3655 }, { "epoch": 0.015271507372883478, "grad_norm": 2.8171880835557843, "learning_rate": 1.618761280787258e-05, "loss": 0.1627, "step": 3660 }, { "epoch": 0.015292370087873755, "grad_norm": 3.1442114126260687, "learning_rate": 1.617656399571379e-05, "loss": 0.1574, "step": 3665 }, { "epoch": 0.015313232802864033, "grad_norm": 2.478152904915682, "learning_rate": 1.616553777672717e-05, "loss": 0.155, "step": 3670 }, { "epoch": 0.015334095517854312, "grad_norm": 3.261516230161958, "learning_rate": 1.6154534074018102e-05, "loss": 0.1525, "step": 3675 }, { "epoch": 0.01535495823284459, "grad_norm": 3.061994244339732, "learning_rate": 1.6143552811057857e-05, "loss": 0.1569, "step": 3680 }, { "epoch": 0.015375820947834868, "grad_norm": 2.637582250962271, "learning_rate": 1.6132593911681354e-05, "loss": 0.1491, "step": 3685 }, { "epoch": 0.015396683662825145, "grad_norm": 2.789967220839171, "learning_rate": 1.6121657300084956e-05, "loss": 0.1374, "step": 3690 }, { "epoch": 0.015417546377815424, "grad_norm": 3.421881508593666, "learning_rate": 1.6110742900824255e-05, "loss": 0.1596, "step": 3695 }, { "epoch": 0.0154384090928057, "grad_norm": 3.609722430273145, "learning_rate": 1.60998506388119e-05, "loss": 0.1851, "step": 3700 }, { "epoch": 0.01545927180779598, "grad_norm": 2.649055264051402, "learning_rate": 1.608898043931541e-05, "loss": 0.1425, "step": 3705 }, { "epoch": 0.015480134522786257, "grad_norm": 3.8846865932604184, "learning_rate": 1.6078132227955037e-05, "loss": 0.1428, "step": 3710 }, { "epoch": 0.015500997237776535, "grad_norm": 2.3306499007920976, "learning_rate": 1.6067305930701616e-05, "loss": 0.1425, "step": 3715 }, { "epoch": 0.015521859952766814, "grad_norm": 2.434556945381028, "learning_rate": 1.6056501473874453e-05, "loss": 0.1514, "step": 3720 }, { "epoch": 0.015542722667757091, "grad_norm": 2.149523709313979, "learning_rate": 1.604571878413921e-05, "loss": 0.1271, "step": 3725 }, { "epoch": 0.01556358538274737, "grad_norm": 3.630478984906083, "learning_rate": 1.6034957788505817e-05, "loss": 0.12, "step": 3730 }, { "epoch": 0.015584448097737647, "grad_norm": 2.889243328862027, "learning_rate": 1.6024218414326407e-05, "loss": 0.1415, "step": 3735 }, { "epoch": 0.015605310812727926, "grad_norm": 3.7753549542595515, "learning_rate": 1.6013500589293244e-05, "loss": 0.1578, "step": 3740 }, { "epoch": 0.015626173527718203, "grad_norm": 3.9451932005853156, "learning_rate": 1.6002804241436675e-05, "loss": 0.1334, "step": 3745 }, { "epoch": 0.01564703624270848, "grad_norm": 2.3535078616395526, "learning_rate": 1.5992129299123117e-05, "loss": 0.1318, "step": 3750 }, { "epoch": 0.01566789895769876, "grad_norm": 4.058167209744241, "learning_rate": 1.5981475691053024e-05, "loss": 0.1442, "step": 3755 }, { "epoch": 0.015688761672689035, "grad_norm": 2.159778833640312, "learning_rate": 1.59708433462589e-05, "loss": 0.1357, "step": 3760 }, { "epoch": 0.015709624387679314, "grad_norm": 2.8109511682912327, "learning_rate": 1.5960232194103313e-05, "loss": 0.1435, "step": 3765 }, { "epoch": 0.015730487102669593, "grad_norm": 2.5661028559540924, "learning_rate": 1.5949642164276906e-05, "loss": 0.1611, "step": 3770 }, { "epoch": 0.01575134981765987, "grad_norm": 2.7252765315399756, "learning_rate": 1.5939073186796468e-05, "loss": 0.159, "step": 3775 }, { "epoch": 0.01577221253265015, "grad_norm": 2.1476155564646255, "learning_rate": 1.5928525192002965e-05, "loss": 0.1404, "step": 3780 }, { "epoch": 0.015793075247640426, "grad_norm": 2.6219171560436285, "learning_rate": 1.5917998110559636e-05, "loss": 0.1486, "step": 3785 }, { "epoch": 0.015813937962630704, "grad_norm": 2.230095256482454, "learning_rate": 1.5907491873450057e-05, "loss": 0.1441, "step": 3790 }, { "epoch": 0.015834800677620983, "grad_norm": 2.9247799102528425, "learning_rate": 1.589700641197626e-05, "loss": 0.1194, "step": 3795 }, { "epoch": 0.015855663392611262, "grad_norm": 2.826637148023554, "learning_rate": 1.588654165775683e-05, "loss": 0.1483, "step": 3800 }, { "epoch": 0.015876526107601537, "grad_norm": 2.335287506927521, "learning_rate": 1.5876097542725035e-05, "loss": 0.1663, "step": 3805 }, { "epoch": 0.015897388822591816, "grad_norm": 1.8989804195858784, "learning_rate": 1.5865673999126982e-05, "loss": 0.1426, "step": 3810 }, { "epoch": 0.015918251537582095, "grad_norm": 2.6186209735530874, "learning_rate": 1.585527095951975e-05, "loss": 0.1776, "step": 3815 }, { "epoch": 0.015939114252572373, "grad_norm": 2.3404626604175696, "learning_rate": 1.5844888356769564e-05, "loss": 0.1418, "step": 3820 }, { "epoch": 0.015959976967562652, "grad_norm": 2.9355074722988084, "learning_rate": 1.5834526124049995e-05, "loss": 0.1592, "step": 3825 }, { "epoch": 0.015980839682552928, "grad_norm": 3.7178337077873533, "learning_rate": 1.582418419484012e-05, "loss": 0.1549, "step": 3830 }, { "epoch": 0.016001702397543206, "grad_norm": 2.666896824298114, "learning_rate": 1.5813862502922743e-05, "loss": 0.1412, "step": 3835 }, { "epoch": 0.016022565112533485, "grad_norm": 2.3357739023819826, "learning_rate": 1.5803560982382625e-05, "loss": 0.1303, "step": 3840 }, { "epoch": 0.016043427827523764, "grad_norm": 2.942130952972851, "learning_rate": 1.5793279567604705e-05, "loss": 0.1549, "step": 3845 }, { "epoch": 0.01606429054251404, "grad_norm": 2.670532312730681, "learning_rate": 1.5783018193272353e-05, "loss": 0.1476, "step": 3850 }, { "epoch": 0.016085153257504318, "grad_norm": 2.697267557718884, "learning_rate": 1.5772776794365604e-05, "loss": 0.1491, "step": 3855 }, { "epoch": 0.016106015972494597, "grad_norm": 3.1578400798755335, "learning_rate": 1.5762555306159456e-05, "loss": 0.1448, "step": 3860 }, { "epoch": 0.016126878687484875, "grad_norm": 2.4360172569498277, "learning_rate": 1.575235366422215e-05, "loss": 0.1431, "step": 3865 }, { "epoch": 0.016147741402475154, "grad_norm": 2.9390242035531773, "learning_rate": 1.5742171804413428e-05, "loss": 0.1272, "step": 3870 }, { "epoch": 0.01616860411746543, "grad_norm": 1.9194891599038337, "learning_rate": 1.5732009662882898e-05, "loss": 0.1436, "step": 3875 }, { "epoch": 0.016189466832455708, "grad_norm": 2.4696851650362004, "learning_rate": 1.572186717606829e-05, "loss": 0.1345, "step": 3880 }, { "epoch": 0.016210329547445987, "grad_norm": 3.291322493958134, "learning_rate": 1.5711744280693838e-05, "loss": 0.1142, "step": 3885 }, { "epoch": 0.016231192262436266, "grad_norm": 3.2323710968971997, "learning_rate": 1.5701640913768598e-05, "loss": 0.1272, "step": 3890 }, { "epoch": 0.01625205497742654, "grad_norm": 2.6783355567764, "learning_rate": 1.569155701258479e-05, "loss": 0.1232, "step": 3895 }, { "epoch": 0.01627291769241682, "grad_norm": 2.5196958574003747, "learning_rate": 1.5681492514716186e-05, "loss": 0.1388, "step": 3900 }, { "epoch": 0.0162937804074071, "grad_norm": 2.1456529238054984, "learning_rate": 1.567144735801649e-05, "loss": 0.13, "step": 3905 }, { "epoch": 0.016314643122397377, "grad_norm": 2.5453411850239807, "learning_rate": 1.56614214806177e-05, "loss": 0.1561, "step": 3910 }, { "epoch": 0.016335505837387656, "grad_norm": 2.6095835666930842, "learning_rate": 1.5651414820928516e-05, "loss": 0.1512, "step": 3915 }, { "epoch": 0.01635636855237793, "grad_norm": 5.0508977804498905, "learning_rate": 1.564142731763278e-05, "loss": 0.1209, "step": 3920 }, { "epoch": 0.01637723126736821, "grad_norm": 4.502338381557943, "learning_rate": 1.563145890968786e-05, "loss": 0.1636, "step": 3925 }, { "epoch": 0.01639809398235849, "grad_norm": 4.223026179577568, "learning_rate": 1.56215095363231e-05, "loss": 0.1079, "step": 3930 }, { "epoch": 0.016418956697348767, "grad_norm": 2.049471093190108, "learning_rate": 1.5611579137038256e-05, "loss": 0.1548, "step": 3935 }, { "epoch": 0.016439819412339043, "grad_norm": 3.1685410248111245, "learning_rate": 1.5601667651601975e-05, "loss": 0.1537, "step": 3940 }, { "epoch": 0.01646068212732932, "grad_norm": 3.8857516299283987, "learning_rate": 1.5591775020050222e-05, "loss": 0.1679, "step": 3945 }, { "epoch": 0.0164815448423196, "grad_norm": 2.8708592050631943, "learning_rate": 1.558190118268479e-05, "loss": 0.1432, "step": 3950 }, { "epoch": 0.01650240755730988, "grad_norm": 3.1841652301172303, "learning_rate": 1.557204608007177e-05, "loss": 0.1268, "step": 3955 }, { "epoch": 0.016523270272300158, "grad_norm": 2.7062969020265726, "learning_rate": 1.5562209653040054e-05, "loss": 0.1664, "step": 3960 }, { "epoch": 0.016544132987290433, "grad_norm": 3.124310047359023, "learning_rate": 1.5552391842679834e-05, "loss": 0.1503, "step": 3965 }, { "epoch": 0.016564995702280712, "grad_norm": 2.5553262682176836, "learning_rate": 1.5542592590341138e-05, "loss": 0.1383, "step": 3970 }, { "epoch": 0.01658585841727099, "grad_norm": 2.547309523443808, "learning_rate": 1.5532811837632343e-05, "loss": 0.1347, "step": 3975 }, { "epoch": 0.01660672113226127, "grad_norm": 2.599155506362734, "learning_rate": 1.5523049526418713e-05, "loss": 0.1378, "step": 3980 }, { "epoch": 0.016627583847251545, "grad_norm": 2.513753766043267, "learning_rate": 1.5513305598820957e-05, "loss": 0.1419, "step": 3985 }, { "epoch": 0.016648446562241823, "grad_norm": 4.092991274962581, "learning_rate": 1.550357999721379e-05, "loss": 0.1246, "step": 3990 }, { "epoch": 0.016669309277232102, "grad_norm": 2.299667116017063, "learning_rate": 1.5493872664224488e-05, "loss": 0.1336, "step": 3995 }, { "epoch": 0.01669017199222238, "grad_norm": 1.8953825436314002, "learning_rate": 1.5484183542731475e-05, "loss": 0.1161, "step": 4000 }, { "epoch": 0.016711034707212656, "grad_norm": 2.2050967756708677, "learning_rate": 1.5474512575862913e-05, "loss": 0.1235, "step": 4005 }, { "epoch": 0.016731897422202935, "grad_norm": 3.304122302455404, "learning_rate": 1.5464859706995297e-05, "loss": 0.1361, "step": 4010 }, { "epoch": 0.016752760137193214, "grad_norm": 5.467572565996071, "learning_rate": 1.5455224879752058e-05, "loss": 0.1201, "step": 4015 }, { "epoch": 0.016773622852183492, "grad_norm": 1.9472333893337384, "learning_rate": 1.5445608038002193e-05, "loss": 0.1482, "step": 4020 }, { "epoch": 0.01679448556717377, "grad_norm": 3.291474869258443, "learning_rate": 1.5436009125858875e-05, "loss": 0.1195, "step": 4025 }, { "epoch": 0.016815348282164046, "grad_norm": 3.670294726836516, "learning_rate": 1.5426428087678103e-05, "loss": 0.1434, "step": 4030 }, { "epoch": 0.016836210997154325, "grad_norm": 2.305242202873126, "learning_rate": 1.5416864868057335e-05, "loss": 0.1341, "step": 4035 }, { "epoch": 0.016857073712144604, "grad_norm": 3.7706810868125755, "learning_rate": 1.5407319411834164e-05, "loss": 0.1408, "step": 4040 }, { "epoch": 0.016877936427134883, "grad_norm": 2.476664804113217, "learning_rate": 1.5397791664084946e-05, "loss": 0.1348, "step": 4045 }, { "epoch": 0.016898799142125158, "grad_norm": 2.2731583279503904, "learning_rate": 1.538828157012351e-05, "loss": 0.1519, "step": 4050 }, { "epoch": 0.016919661857115437, "grad_norm": 2.7772109051849645, "learning_rate": 1.5378789075499813e-05, "loss": 0.1344, "step": 4055 }, { "epoch": 0.016940524572105715, "grad_norm": 2.627735844161003, "learning_rate": 1.5369314125998648e-05, "loss": 0.1411, "step": 4060 }, { "epoch": 0.016961387287095994, "grad_norm": 3.4013162333842417, "learning_rate": 1.5359856667638333e-05, "loss": 0.1536, "step": 4065 }, { "epoch": 0.016982250002086273, "grad_norm": 2.358295855554931, "learning_rate": 1.5350416646669416e-05, "loss": 0.1351, "step": 4070 }, { "epoch": 0.017003112717076548, "grad_norm": 3.3359159952395174, "learning_rate": 1.5340994009573415e-05, "loss": 0.1315, "step": 4075 }, { "epoch": 0.017023975432066827, "grad_norm": 4.456487948992392, "learning_rate": 1.5331588703061505e-05, "loss": 0.1352, "step": 4080 }, { "epoch": 0.017044838147057106, "grad_norm": 2.321353594168093, "learning_rate": 1.5322200674073292e-05, "loss": 0.1336, "step": 4085 }, { "epoch": 0.017065700862047384, "grad_norm": 2.151772694575858, "learning_rate": 1.531282986977553e-05, "loss": 0.1405, "step": 4090 }, { "epoch": 0.01708656357703766, "grad_norm": 3.384945474405517, "learning_rate": 1.5303476237560884e-05, "loss": 0.151, "step": 4095 }, { "epoch": 0.01710742629202794, "grad_norm": 2.792939910905864, "learning_rate": 1.5294139725046687e-05, "loss": 0.1739, "step": 4100 }, { "epoch": 0.017128289007018217, "grad_norm": 2.774941755158871, "learning_rate": 1.5284820280073695e-05, "loss": 0.1307, "step": 4105 }, { "epoch": 0.017149151722008496, "grad_norm": 2.316212718341001, "learning_rate": 1.527551785070489e-05, "loss": 0.1379, "step": 4110 }, { "epoch": 0.017170014436998775, "grad_norm": 2.6106551121467096, "learning_rate": 1.5266232385224246e-05, "loss": 0.141, "step": 4115 }, { "epoch": 0.01719087715198905, "grad_norm": 3.5775376084675843, "learning_rate": 1.5256963832135512e-05, "loss": 0.1353, "step": 4120 }, { "epoch": 0.01721173986697933, "grad_norm": 2.795283544925706, "learning_rate": 1.5247712140161041e-05, "loss": 0.1303, "step": 4125 }, { "epoch": 0.017232602581969608, "grad_norm": 2.3734387792590605, "learning_rate": 1.5238477258240578e-05, "loss": 0.1462, "step": 4130 }, { "epoch": 0.017253465296959886, "grad_norm": 2.385502585863085, "learning_rate": 1.5229259135530079e-05, "loss": 0.1408, "step": 4135 }, { "epoch": 0.01727432801195016, "grad_norm": 1.882201997528217, "learning_rate": 1.5220057721400534e-05, "loss": 0.1343, "step": 4140 }, { "epoch": 0.01729519072694044, "grad_norm": 2.1947903182732187, "learning_rate": 1.5210872965436809e-05, "loss": 0.1225, "step": 4145 }, { "epoch": 0.01731605344193072, "grad_norm": 2.9197675749196907, "learning_rate": 1.520170481743648e-05, "loss": 0.1506, "step": 4150 }, { "epoch": 0.017336916156920998, "grad_norm": 2.2456712599717634, "learning_rate": 1.5192553227408672e-05, "loss": 0.1417, "step": 4155 }, { "epoch": 0.017357778871911277, "grad_norm": 3.0036154705898856, "learning_rate": 1.5183418145572932e-05, "loss": 0.1263, "step": 4160 }, { "epoch": 0.017378641586901552, "grad_norm": 6.980073190952408, "learning_rate": 1.517429952235807e-05, "loss": 0.1416, "step": 4165 }, { "epoch": 0.01739950430189183, "grad_norm": 3.193814265125092, "learning_rate": 1.5165197308401054e-05, "loss": 0.1322, "step": 4170 }, { "epoch": 0.01742036701688211, "grad_norm": 2.0870719233794612, "learning_rate": 1.515611145454586e-05, "loss": 0.1344, "step": 4175 }, { "epoch": 0.017441229731872388, "grad_norm": 2.2921354205317357, "learning_rate": 1.5147041911842385e-05, "loss": 0.1198, "step": 4180 }, { "epoch": 0.017462092446862663, "grad_norm": 2.08481988210899, "learning_rate": 1.5137988631545325e-05, "loss": 0.1696, "step": 4185 }, { "epoch": 0.017482955161852942, "grad_norm": 2.273347441827716, "learning_rate": 1.5128951565113062e-05, "loss": 0.1371, "step": 4190 }, { "epoch": 0.01750381787684322, "grad_norm": 2.348378143631636, "learning_rate": 1.5119930664206597e-05, "loss": 0.127, "step": 4195 }, { "epoch": 0.0175246805918335, "grad_norm": 2.5693113014385855, "learning_rate": 1.511092588068845e-05, "loss": 0.1281, "step": 4200 }, { "epoch": 0.01754554330682378, "grad_norm": 2.7917231288136, "learning_rate": 1.5101937166621579e-05, "loss": 0.1704, "step": 4205 }, { "epoch": 0.017566406021814054, "grad_norm": 2.46303585746178, "learning_rate": 1.5092964474268312e-05, "loss": 0.1536, "step": 4210 }, { "epoch": 0.017587268736804332, "grad_norm": 2.526790672477958, "learning_rate": 1.5084007756089287e-05, "loss": 0.1483, "step": 4215 }, { "epoch": 0.01760813145179461, "grad_norm": 2.212652021205878, "learning_rate": 1.5075066964742386e-05, "loss": 0.1167, "step": 4220 }, { "epoch": 0.01762899416678489, "grad_norm": 3.629062477795263, "learning_rate": 1.5066142053081684e-05, "loss": 0.1365, "step": 4225 }, { "epoch": 0.017649856881775165, "grad_norm": 3.9014083024113413, "learning_rate": 1.5057232974156412e-05, "loss": 0.1607, "step": 4230 }, { "epoch": 0.017670719596765444, "grad_norm": 2.6981887883188, "learning_rate": 1.5048339681209918e-05, "loss": 0.1195, "step": 4235 }, { "epoch": 0.017691582311755723, "grad_norm": 2.3944591356200307, "learning_rate": 1.5039462127678626e-05, "loss": 0.1263, "step": 4240 }, { "epoch": 0.017712445026746, "grad_norm": 1.9266644958005077, "learning_rate": 1.5030600267191027e-05, "loss": 0.162, "step": 4245 }, { "epoch": 0.01773330774173628, "grad_norm": 2.377582060418544, "learning_rate": 1.5021754053566653e-05, "loss": 0.1528, "step": 4250 }, { "epoch": 0.017754170456726556, "grad_norm": 2.4755635839235905, "learning_rate": 1.5012923440815061e-05, "loss": 0.141, "step": 4255 }, { "epoch": 0.017775033171716834, "grad_norm": 2.0885248411784576, "learning_rate": 1.5004108383134852e-05, "loss": 0.1381, "step": 4260 }, { "epoch": 0.017795895886707113, "grad_norm": 3.080654958910524, "learning_rate": 1.4995308834912638e-05, "loss": 0.1181, "step": 4265 }, { "epoch": 0.017816758601697392, "grad_norm": 2.663455961052527, "learning_rate": 1.4986524750722086e-05, "loss": 0.1235, "step": 4270 }, { "epoch": 0.017837621316687667, "grad_norm": 2.71008120621649, "learning_rate": 1.4977756085322903e-05, "loss": 0.1448, "step": 4275 }, { "epoch": 0.017858484031677946, "grad_norm": 2.5610271077460642, "learning_rate": 1.496900279365988e-05, "loss": 0.1184, "step": 4280 }, { "epoch": 0.017879346746668225, "grad_norm": 2.472794523987858, "learning_rate": 1.4960264830861914e-05, "loss": 0.1291, "step": 4285 }, { "epoch": 0.017900209461658503, "grad_norm": 2.243800417678195, "learning_rate": 1.4951542152241024e-05, "loss": 0.125, "step": 4290 }, { "epoch": 0.01792107217664878, "grad_norm": 3.3947618175833667, "learning_rate": 1.494283471329142e-05, "loss": 0.1388, "step": 4295 }, { "epoch": 0.017941934891639057, "grad_norm": 2.9270694875222696, "learning_rate": 1.4934142469688532e-05, "loss": 0.1585, "step": 4300 }, { "epoch": 0.017962797606629336, "grad_norm": 2.3572408963617995, "learning_rate": 1.4925465377288057e-05, "loss": 0.1636, "step": 4305 }, { "epoch": 0.017983660321619615, "grad_norm": 2.016351682364858, "learning_rate": 1.4916803392125039e-05, "loss": 0.1216, "step": 4310 }, { "epoch": 0.018004523036609894, "grad_norm": 2.0311583120874546, "learning_rate": 1.490815647041291e-05, "loss": 0.1231, "step": 4315 }, { "epoch": 0.01802538575160017, "grad_norm": 3.0273552863642954, "learning_rate": 1.4899524568542576e-05, "loss": 0.1444, "step": 4320 }, { "epoch": 0.018046248466590448, "grad_norm": 2.6176435212253226, "learning_rate": 1.4890907643081487e-05, "loss": 0.1452, "step": 4325 }, { "epoch": 0.018067111181580726, "grad_norm": 2.707587386901936, "learning_rate": 1.4882305650772717e-05, "loss": 0.141, "step": 4330 }, { "epoch": 0.018087973896571005, "grad_norm": 4.758549810271028, "learning_rate": 1.4873718548534055e-05, "loss": 0.1129, "step": 4335 }, { "epoch": 0.01810883661156128, "grad_norm": 2.9679915470850795, "learning_rate": 1.4865146293457106e-05, "loss": 0.1672, "step": 4340 }, { "epoch": 0.01812969932655156, "grad_norm": 2.2136344504324637, "learning_rate": 1.4856588842806376e-05, "loss": 0.1043, "step": 4345 }, { "epoch": 0.018150562041541838, "grad_norm": 2.2174401481557324, "learning_rate": 1.484804615401839e-05, "loss": 0.1211, "step": 4350 }, { "epoch": 0.018171424756532117, "grad_norm": 1.9021806338300862, "learning_rate": 1.4839518184700786e-05, "loss": 0.1451, "step": 4355 }, { "epoch": 0.018192287471522395, "grad_norm": 4.32289730487288, "learning_rate": 1.4831004892631453e-05, "loss": 0.1337, "step": 4360 }, { "epoch": 0.01821315018651267, "grad_norm": 1.9893884241595545, "learning_rate": 1.4822506235757645e-05, "loss": 0.1289, "step": 4365 }, { "epoch": 0.01823401290150295, "grad_norm": 2.385065858093634, "learning_rate": 1.4814022172195099e-05, "loss": 0.1195, "step": 4370 }, { "epoch": 0.018254875616493228, "grad_norm": 2.982594032484622, "learning_rate": 1.4805552660227174e-05, "loss": 0.1259, "step": 4375 }, { "epoch": 0.018275738331483507, "grad_norm": 4.1112516146477756, "learning_rate": 1.4797097658304003e-05, "loss": 0.1216, "step": 4380 }, { "epoch": 0.018296601046473782, "grad_norm": 4.549252704828442, "learning_rate": 1.4788657125041606e-05, "loss": 0.1112, "step": 4385 }, { "epoch": 0.01831746376146406, "grad_norm": 2.9779069061321346, "learning_rate": 1.4780231019221073e-05, "loss": 0.1279, "step": 4390 }, { "epoch": 0.01833832647645434, "grad_norm": 2.410855570310327, "learning_rate": 1.4771819299787701e-05, "loss": 0.1386, "step": 4395 }, { "epoch": 0.01835918919144462, "grad_norm": 2.7688245663116864, "learning_rate": 1.476342192585015e-05, "loss": 0.1199, "step": 4400 }, { "epoch": 0.018380051906434897, "grad_norm": 3.1985511209484674, "learning_rate": 1.4755038856679618e-05, "loss": 0.1195, "step": 4405 }, { "epoch": 0.018400914621425173, "grad_norm": 2.729251098749986, "learning_rate": 1.474667005170901e-05, "loss": 0.1319, "step": 4410 }, { "epoch": 0.01842177733641545, "grad_norm": 4.478137250073668, "learning_rate": 1.4738315470532112e-05, "loss": 0.1256, "step": 4415 }, { "epoch": 0.01844264005140573, "grad_norm": 2.13841623833388, "learning_rate": 1.4729975072902762e-05, "loss": 0.1349, "step": 4420 }, { "epoch": 0.01846350276639601, "grad_norm": 2.0527746669358216, "learning_rate": 1.4721648818734057e-05, "loss": 0.135, "step": 4425 }, { "epoch": 0.018484365481386284, "grad_norm": 2.024520125407746, "learning_rate": 1.4713336668097522e-05, "loss": 0.1221, "step": 4430 }, { "epoch": 0.018505228196376563, "grad_norm": 1.9049067627212437, "learning_rate": 1.4705038581222316e-05, "loss": 0.1415, "step": 4435 }, { "epoch": 0.01852609091136684, "grad_norm": 2.5188010874515343, "learning_rate": 1.4696754518494441e-05, "loss": 0.134, "step": 4440 }, { "epoch": 0.01854695362635712, "grad_norm": 3.1656798441271716, "learning_rate": 1.4688484440455926e-05, "loss": 0.1527, "step": 4445 }, { "epoch": 0.0185678163413474, "grad_norm": 2.811463872043304, "learning_rate": 1.4680228307804057e-05, "loss": 0.1599, "step": 4450 }, { "epoch": 0.018588679056337674, "grad_norm": 2.7619841037910073, "learning_rate": 1.4671986081390586e-05, "loss": 0.1074, "step": 4455 }, { "epoch": 0.018609541771327953, "grad_norm": 2.2393055583266883, "learning_rate": 1.4663757722220954e-05, "loss": 0.1274, "step": 4460 }, { "epoch": 0.018630404486318232, "grad_norm": 3.1405115973653523, "learning_rate": 1.4655543191453505e-05, "loss": 0.1282, "step": 4465 }, { "epoch": 0.01865126720130851, "grad_norm": 2.502943171188662, "learning_rate": 1.4647342450398729e-05, "loss": 0.1375, "step": 4470 }, { "epoch": 0.018672129916298786, "grad_norm": 1.75709027007313, "learning_rate": 1.4639155460518495e-05, "loss": 0.14, "step": 4475 }, { "epoch": 0.018692992631289065, "grad_norm": 2.3900014550829307, "learning_rate": 1.4630982183425281e-05, "loss": 0.1309, "step": 4480 }, { "epoch": 0.018713855346279343, "grad_norm": 2.4891872555655032, "learning_rate": 1.4622822580881428e-05, "loss": 0.1318, "step": 4485 }, { "epoch": 0.018734718061269622, "grad_norm": 2.523076640325985, "learning_rate": 1.4614676614798384e-05, "loss": 0.1337, "step": 4490 }, { "epoch": 0.0187555807762599, "grad_norm": 2.4555429976161927, "learning_rate": 1.4606544247235963e-05, "loss": 0.1351, "step": 4495 }, { "epoch": 0.018776443491250176, "grad_norm": 2.168508555176554, "learning_rate": 1.4598425440401587e-05, "loss": 0.1035, "step": 4500 }, { "epoch": 0.018797306206240455, "grad_norm": 2.870885001090204, "learning_rate": 1.4590320156649566e-05, "loss": 0.138, "step": 4505 }, { "epoch": 0.018818168921230734, "grad_norm": 2.713996827234399, "learning_rate": 1.4582228358480366e-05, "loss": 0.1379, "step": 4510 }, { "epoch": 0.018839031636221013, "grad_norm": 2.9673498740372724, "learning_rate": 1.4574150008539854e-05, "loss": 0.1452, "step": 4515 }, { "epoch": 0.018859894351211288, "grad_norm": 2.6652247482827423, "learning_rate": 1.4566085069618614e-05, "loss": 0.1114, "step": 4520 }, { "epoch": 0.018880757066201567, "grad_norm": 1.9305490027142715, "learning_rate": 1.4558033504651185e-05, "loss": 0.1311, "step": 4525 }, { "epoch": 0.018901619781191845, "grad_norm": 2.6761179755941464, "learning_rate": 1.4549995276715378e-05, "loss": 0.1358, "step": 4530 }, { "epoch": 0.018922482496182124, "grad_norm": 1.893255918737791, "learning_rate": 1.4541970349031556e-05, "loss": 0.1322, "step": 4535 }, { "epoch": 0.018943345211172403, "grad_norm": 2.5258106223139305, "learning_rate": 1.4533958684961912e-05, "loss": 0.1458, "step": 4540 }, { "epoch": 0.018964207926162678, "grad_norm": 5.709908740287619, "learning_rate": 1.4525960248009787e-05, "loss": 0.1373, "step": 4545 }, { "epoch": 0.018985070641152957, "grad_norm": 2.4039963170464227, "learning_rate": 1.4517975001818965e-05, "loss": 0.1315, "step": 4550 }, { "epoch": 0.019005933356143236, "grad_norm": 3.057901721131383, "learning_rate": 1.4510002910172967e-05, "loss": 0.1358, "step": 4555 }, { "epoch": 0.019026796071133514, "grad_norm": 2.751222580910356, "learning_rate": 1.450204393699438e-05, "loss": 0.1205, "step": 4560 }, { "epoch": 0.01904765878612379, "grad_norm": 2.3959197156023313, "learning_rate": 1.4494098046344164e-05, "loss": 0.1298, "step": 4565 }, { "epoch": 0.01906852150111407, "grad_norm": 4.586464992081483, "learning_rate": 1.448616520242096e-05, "loss": 0.1191, "step": 4570 }, { "epoch": 0.019089384216104347, "grad_norm": 2.985182416845483, "learning_rate": 1.4478245369560434e-05, "loss": 0.1296, "step": 4575 }, { "epoch": 0.019110246931094626, "grad_norm": 3.1208202952595476, "learning_rate": 1.4470338512234574e-05, "loss": 0.1146, "step": 4580 }, { "epoch": 0.019131109646084905, "grad_norm": 2.6897465820459248, "learning_rate": 1.4462444595051055e-05, "loss": 0.1443, "step": 4585 }, { "epoch": 0.01915197236107518, "grad_norm": 1.834107459427791, "learning_rate": 1.4454563582752548e-05, "loss": 0.1175, "step": 4590 }, { "epoch": 0.01917283507606546, "grad_norm": 2.403139062743005, "learning_rate": 1.4446695440216063e-05, "loss": 0.1166, "step": 4595 }, { "epoch": 0.019193697791055737, "grad_norm": 1.9948590007673088, "learning_rate": 1.4438840132452308e-05, "loss": 0.128, "step": 4600 }, { "epoch": 0.019214560506046016, "grad_norm": 5.647510763524407, "learning_rate": 1.4430997624605007e-05, "loss": 0.1211, "step": 4605 }, { "epoch": 0.01923542322103629, "grad_norm": 1.813361766775962, "learning_rate": 1.442316788195028e-05, "loss": 0.1204, "step": 4610 }, { "epoch": 0.01925628593602657, "grad_norm": 3.6213759582806153, "learning_rate": 1.4415350869895977e-05, "loss": 0.1461, "step": 4615 }, { "epoch": 0.01927714865101685, "grad_norm": 3.4922980381872795, "learning_rate": 1.4407546553981042e-05, "loss": 0.1453, "step": 4620 }, { "epoch": 0.019298011366007128, "grad_norm": 2.211490959509534, "learning_rate": 1.439975489987488e-05, "loss": 0.1566, "step": 4625 }, { "epoch": 0.019318874080997403, "grad_norm": 2.3406314038482914, "learning_rate": 1.4391975873376714e-05, "loss": 0.1308, "step": 4630 }, { "epoch": 0.019339736795987682, "grad_norm": 2.1091319984766534, "learning_rate": 1.4384209440414962e-05, "loss": 0.1157, "step": 4635 }, { "epoch": 0.01936059951097796, "grad_norm": 2.2834843016407125, "learning_rate": 1.4376455567046607e-05, "loss": 0.1263, "step": 4640 }, { "epoch": 0.01938146222596824, "grad_norm": 2.1086795751952394, "learning_rate": 1.4368714219456572e-05, "loss": 0.1062, "step": 4645 }, { "epoch": 0.019402324940958518, "grad_norm": 2.506187677948822, "learning_rate": 1.4360985363957102e-05, "loss": 0.1303, "step": 4650 }, { "epoch": 0.019423187655948793, "grad_norm": 2.323605475938618, "learning_rate": 1.4353268966987152e-05, "loss": 0.1472, "step": 4655 }, { "epoch": 0.019444050370939072, "grad_norm": 2.959850974414044, "learning_rate": 1.434556499511177e-05, "loss": 0.128, "step": 4660 }, { "epoch": 0.01946491308592935, "grad_norm": 2.797956491006526, "learning_rate": 1.4337873415021495e-05, "loss": 0.1176, "step": 4665 }, { "epoch": 0.01948577580091963, "grad_norm": 2.4875110217172627, "learning_rate": 1.4330194193531737e-05, "loss": 0.1262, "step": 4670 }, { "epoch": 0.019506638515909905, "grad_norm": 2.8789856852037787, "learning_rate": 1.4322527297582198e-05, "loss": 0.1276, "step": 4675 }, { "epoch": 0.019527501230900184, "grad_norm": 1.8561148910195742, "learning_rate": 1.4314872694236253e-05, "loss": 0.132, "step": 4680 }, { "epoch": 0.019548363945890462, "grad_norm": 2.5729646638438908, "learning_rate": 1.4307230350680373e-05, "loss": 0.1196, "step": 4685 }, { "epoch": 0.01956922666088074, "grad_norm": 2.8764127419954426, "learning_rate": 1.4299600234223519e-05, "loss": 0.1329, "step": 4690 }, { "epoch": 0.01959008937587102, "grad_norm": 3.3405664493288243, "learning_rate": 1.4291982312296566e-05, "loss": 0.1373, "step": 4695 }, { "epoch": 0.019610952090861295, "grad_norm": 2.7041599315353513, "learning_rate": 1.4284376552451718e-05, "loss": 0.1686, "step": 4700 }, { "epoch": 0.019631814805851574, "grad_norm": 1.9205745299751873, "learning_rate": 1.4276782922361921e-05, "loss": 0.1406, "step": 4705 }, { "epoch": 0.019652677520841853, "grad_norm": 1.7141097658910576, "learning_rate": 1.4269201389820285e-05, "loss": 0.1375, "step": 4710 }, { "epoch": 0.01967354023583213, "grad_norm": 2.7803152567670915, "learning_rate": 1.4261631922739524e-05, "loss": 0.0971, "step": 4715 }, { "epoch": 0.019694402950822407, "grad_norm": 1.4444042150647027, "learning_rate": 1.4254074489151373e-05, "loss": 0.0899, "step": 4720 }, { "epoch": 0.019715265665812685, "grad_norm": 1.9458030731328162, "learning_rate": 1.4246529057206018e-05, "loss": 0.0971, "step": 4725 }, { "epoch": 0.019736128380802964, "grad_norm": 2.258329897524763, "learning_rate": 1.4238995595171554e-05, "loss": 0.1215, "step": 4730 }, { "epoch": 0.019756991095793243, "grad_norm": 3.131530198208182, "learning_rate": 1.4231474071433392e-05, "loss": 0.1253, "step": 4735 }, { "epoch": 0.01977785381078352, "grad_norm": 3.1819849649107277, "learning_rate": 1.4223964454493727e-05, "loss": 0.1367, "step": 4740 }, { "epoch": 0.019798716525773797, "grad_norm": 2.480308585636667, "learning_rate": 1.4216466712970978e-05, "loss": 0.1453, "step": 4745 }, { "epoch": 0.019819579240764076, "grad_norm": 1.9056403630292706, "learning_rate": 1.4208980815599232e-05, "loss": 0.1166, "step": 4750 }, { "epoch": 0.019840441955754354, "grad_norm": 1.7888703113891995, "learning_rate": 1.42015067312277e-05, "loss": 0.1173, "step": 4755 }, { "epoch": 0.019861304670744633, "grad_norm": 2.060285556913248, "learning_rate": 1.419404442882017e-05, "loss": 0.1367, "step": 4760 }, { "epoch": 0.01988216738573491, "grad_norm": 2.356677747391441, "learning_rate": 1.4186593877454473e-05, "loss": 0.1289, "step": 4765 }, { "epoch": 0.019903030100725187, "grad_norm": 2.3769129768942068, "learning_rate": 1.4179155046321938e-05, "loss": 0.1163, "step": 4770 }, { "epoch": 0.019923892815715466, "grad_norm": 3.1215308689472914, "learning_rate": 1.4171727904726852e-05, "loss": 0.1399, "step": 4775 }, { "epoch": 0.019944755530705745, "grad_norm": 2.543736567875367, "learning_rate": 1.4164312422085949e-05, "loss": 0.142, "step": 4780 }, { "epoch": 0.019965618245696024, "grad_norm": 2.6195507967297584, "learning_rate": 1.415690856792785e-05, "loss": 0.1296, "step": 4785 }, { "epoch": 0.0199864809606863, "grad_norm": 1.9167150161466115, "learning_rate": 1.4149516311892564e-05, "loss": 0.1011, "step": 4790 }, { "epoch": 0.020007343675676578, "grad_norm": 2.109926677539052, "learning_rate": 1.4142135623730951e-05, "loss": 0.1073, "step": 4795 }, { "epoch": 0.020028206390666856, "grad_norm": 2.6342101071726884, "learning_rate": 1.413476647330421e-05, "loss": 0.1341, "step": 4800 }, { "epoch": 0.020049069105657135, "grad_norm": 2.5869126439847805, "learning_rate": 1.4127408830583357e-05, "loss": 0.1467, "step": 4805 }, { "epoch": 0.02006993182064741, "grad_norm": 2.2498209540850036, "learning_rate": 1.412006266564871e-05, "loss": 0.1155, "step": 4810 }, { "epoch": 0.02009079453563769, "grad_norm": 2.5127949951806205, "learning_rate": 1.4112727948689378e-05, "loss": 0.1151, "step": 4815 }, { "epoch": 0.020111657250627968, "grad_norm": 3.3298813457933796, "learning_rate": 1.4105404650002763e-05, "loss": 0.1438, "step": 4820 }, { "epoch": 0.020132519965618247, "grad_norm": 1.8006883983261106, "learning_rate": 1.409809273999404e-05, "loss": 0.0966, "step": 4825 }, { "epoch": 0.020153382680608525, "grad_norm": 2.0484194657817736, "learning_rate": 1.4090792189175665e-05, "loss": 0.0934, "step": 4830 }, { "epoch": 0.0201742453955988, "grad_norm": 3.6713778836461755, "learning_rate": 1.4083502968166867e-05, "loss": 0.1528, "step": 4835 }, { "epoch": 0.02019510811058908, "grad_norm": 2.196335397758428, "learning_rate": 1.4076225047693163e-05, "loss": 0.1165, "step": 4840 }, { "epoch": 0.020215970825579358, "grad_norm": 3.6392579642370664, "learning_rate": 1.4068958398585857e-05, "loss": 0.1138, "step": 4845 }, { "epoch": 0.020236833540569637, "grad_norm": 2.2706167638401906, "learning_rate": 1.4061702991781549e-05, "loss": 0.1222, "step": 4850 }, { "epoch": 0.020257696255559912, "grad_norm": 2.0273129207244933, "learning_rate": 1.4054458798321644e-05, "loss": 0.1295, "step": 4855 }, { "epoch": 0.02027855897055019, "grad_norm": 2.8726439195003164, "learning_rate": 1.4047225789351872e-05, "loss": 0.1416, "step": 4860 }, { "epoch": 0.02029942168554047, "grad_norm": 2.1933540303906303, "learning_rate": 1.4040003936121807e-05, "loss": 0.113, "step": 4865 }, { "epoch": 0.02032028440053075, "grad_norm": 2.0172490504364045, "learning_rate": 1.4032793209984377e-05, "loss": 0.141, "step": 4870 }, { "epoch": 0.020341147115521027, "grad_norm": 3.7111334998954626, "learning_rate": 1.40255935823954e-05, "loss": 0.117, "step": 4875 }, { "epoch": 0.020362009830511302, "grad_norm": 2.937124261676298, "learning_rate": 1.4018405024913097e-05, "loss": 0.1555, "step": 4880 }, { "epoch": 0.02038287254550158, "grad_norm": 2.6470251958533346, "learning_rate": 1.4011227509197627e-05, "loss": 0.1416, "step": 4885 }, { "epoch": 0.02040373526049186, "grad_norm": 2.1483808759900453, "learning_rate": 1.4004061007010621e-05, "loss": 0.1195, "step": 4890 }, { "epoch": 0.02042459797548214, "grad_norm": 10.16213251949029, "learning_rate": 1.3996905490214703e-05, "loss": 0.1368, "step": 4895 }, { "epoch": 0.020445460690472414, "grad_norm": 1.7978297476662923, "learning_rate": 1.398976093077304e-05, "loss": 0.1091, "step": 4900 }, { "epoch": 0.020466323405462693, "grad_norm": 2.533086285510483, "learning_rate": 1.3982627300748873e-05, "loss": 0.1305, "step": 4905 }, { "epoch": 0.02048718612045297, "grad_norm": 1.790533942080799, "learning_rate": 1.3975504572305054e-05, "loss": 0.1195, "step": 4910 }, { "epoch": 0.02050804883544325, "grad_norm": 1.9027658867773782, "learning_rate": 1.3968392717703605e-05, "loss": 0.1213, "step": 4915 }, { "epoch": 0.020528911550433526, "grad_norm": 1.9155982970961627, "learning_rate": 1.3961291709305251e-05, "loss": 0.1123, "step": 4920 }, { "epoch": 0.020549774265423804, "grad_norm": 2.388073296092133, "learning_rate": 1.3954201519568967e-05, "loss": 0.1135, "step": 4925 }, { "epoch": 0.020570636980414083, "grad_norm": 2.262740451475683, "learning_rate": 1.3947122121051544e-05, "loss": 0.1212, "step": 4930 }, { "epoch": 0.020591499695404362, "grad_norm": 2.2120963685102737, "learning_rate": 1.3940053486407124e-05, "loss": 0.1321, "step": 4935 }, { "epoch": 0.02061236241039464, "grad_norm": 1.5149108991696463, "learning_rate": 1.3932995588386773e-05, "loss": 0.1055, "step": 4940 }, { "epoch": 0.020633225125384916, "grad_norm": 2.3349219053905066, "learning_rate": 1.3925948399838031e-05, "loss": 0.0963, "step": 4945 }, { "epoch": 0.020654087840375195, "grad_norm": 2.1488545739616205, "learning_rate": 1.3918911893704481e-05, "loss": 0.1267, "step": 4950 }, { "epoch": 0.020674950555365473, "grad_norm": 2.5364270384304666, "learning_rate": 1.3911886043025294e-05, "loss": 0.1068, "step": 4955 }, { "epoch": 0.020695813270355752, "grad_norm": 2.4473830972863, "learning_rate": 1.3904870820934818e-05, "loss": 0.123, "step": 4960 }, { "epoch": 0.020716675985346027, "grad_norm": 3.1546744852507915, "learning_rate": 1.389786620066213e-05, "loss": 0.131, "step": 4965 }, { "epoch": 0.020737538700336306, "grad_norm": 2.114132316705039, "learning_rate": 1.3890872155530626e-05, "loss": 0.1144, "step": 4970 }, { "epoch": 0.020758401415326585, "grad_norm": 2.552342168334644, "learning_rate": 1.3883888658957567e-05, "loss": 0.1174, "step": 4975 }, { "epoch": 0.020779264130316864, "grad_norm": 2.471770699091332, "learning_rate": 1.3876915684453676e-05, "loss": 0.1109, "step": 4980 }, { "epoch": 0.020800126845307142, "grad_norm": 2.5679627896362587, "learning_rate": 1.3869953205622712e-05, "loss": 0.1193, "step": 4985 }, { "epoch": 0.020820989560297418, "grad_norm": 2.8878197828564787, "learning_rate": 1.3863001196161044e-05, "loss": 0.1083, "step": 4990 }, { "epoch": 0.020841852275287696, "grad_norm": 2.225719812186924, "learning_rate": 1.3856059629857234e-05, "loss": 0.1319, "step": 4995 }, { "epoch": 0.020862714990277975, "grad_norm": 1.8248734509635998, "learning_rate": 1.3849128480591636e-05, "loss": 0.1137, "step": 5000 }, { "epoch": 0.020883577705268254, "grad_norm": 3.190571002905294, "learning_rate": 1.3842207722335964e-05, "loss": 0.1248, "step": 5005 }, { "epoch": 0.02090444042025853, "grad_norm": 1.8844851615240974, "learning_rate": 1.383529732915289e-05, "loss": 0.0857, "step": 5010 }, { "epoch": 0.020925303135248808, "grad_norm": 2.8185408187233536, "learning_rate": 1.382839727519564e-05, "loss": 0.119, "step": 5015 }, { "epoch": 0.020946165850239087, "grad_norm": 2.252159692659196, "learning_rate": 1.3821507534707585e-05, "loss": 0.1398, "step": 5020 }, { "epoch": 0.020967028565229365, "grad_norm": 2.421039563694879, "learning_rate": 1.3814628082021825e-05, "loss": 0.1069, "step": 5025 }, { "epoch": 0.020987891280219644, "grad_norm": 1.7278591692539502, "learning_rate": 1.3807758891560812e-05, "loss": 0.1076, "step": 5030 }, { "epoch": 0.02100875399520992, "grad_norm": 2.2318132775839614, "learning_rate": 1.380089993783593e-05, "loss": 0.1069, "step": 5035 }, { "epoch": 0.021029616710200198, "grad_norm": 1.674046684551486, "learning_rate": 1.3794051195447103e-05, "loss": 0.0971, "step": 5040 }, { "epoch": 0.021050479425190477, "grad_norm": 2.002817584903178, "learning_rate": 1.3787212639082405e-05, "loss": 0.1117, "step": 5045 }, { "epoch": 0.021071342140180756, "grad_norm": 1.6881983694884053, "learning_rate": 1.3780384243517667e-05, "loss": 0.1178, "step": 5050 }, { "epoch": 0.02109220485517103, "grad_norm": 3.811234569635717, "learning_rate": 1.377356598361607e-05, "loss": 0.1216, "step": 5055 }, { "epoch": 0.02111306757016131, "grad_norm": 2.5170620657674223, "learning_rate": 1.376675783432779e-05, "loss": 0.1124, "step": 5060 }, { "epoch": 0.02113393028515159, "grad_norm": 1.9555907680132445, "learning_rate": 1.3759959770689568e-05, "loss": 0.1153, "step": 5065 }, { "epoch": 0.021154793000141867, "grad_norm": 2.1513595781600428, "learning_rate": 1.3753171767824365e-05, "loss": 0.1078, "step": 5070 }, { "epoch": 0.021175655715132146, "grad_norm": 1.3920611172717245, "learning_rate": 1.3746393800940955e-05, "loss": 0.1473, "step": 5075 }, { "epoch": 0.02119651843012242, "grad_norm": 1.8481544907403744, "learning_rate": 1.3739625845333557e-05, "loss": 0.1119, "step": 5080 }, { "epoch": 0.0212173811451127, "grad_norm": 1.933826374264633, "learning_rate": 1.3732867876381448e-05, "loss": 0.1329, "step": 5085 }, { "epoch": 0.02123824386010298, "grad_norm": 2.6958574227676055, "learning_rate": 1.3726119869548595e-05, "loss": 0.1253, "step": 5090 }, { "epoch": 0.021259106575093258, "grad_norm": 2.4345924413017452, "learning_rate": 1.3719381800383278e-05, "loss": 0.1134, "step": 5095 }, { "epoch": 0.021279969290083533, "grad_norm": 2.5240843646825253, "learning_rate": 1.371265364451771e-05, "loss": 0.1116, "step": 5100 }, { "epoch": 0.02130083200507381, "grad_norm": 1.975037774327523, "learning_rate": 1.370593537766768e-05, "loss": 0.1363, "step": 5105 }, { "epoch": 0.02132169472006409, "grad_norm": 1.966750079144231, "learning_rate": 1.3699226975632183e-05, "loss": 0.127, "step": 5110 }, { "epoch": 0.02134255743505437, "grad_norm": 2.2172915328670855, "learning_rate": 1.3692528414293037e-05, "loss": 0.1181, "step": 5115 }, { "epoch": 0.021363420150044648, "grad_norm": 3.3016734325653423, "learning_rate": 1.368583966961454e-05, "loss": 0.1013, "step": 5120 }, { "epoch": 0.021384282865034923, "grad_norm": 2.1853540407554957, "learning_rate": 1.3679160717643101e-05, "loss": 0.1197, "step": 5125 }, { "epoch": 0.021405145580025202, "grad_norm": 4.120832683696928, "learning_rate": 1.367249153450687e-05, "loss": 0.1297, "step": 5130 }, { "epoch": 0.02142600829501548, "grad_norm": 2.86830862776717, "learning_rate": 1.3665832096415397e-05, "loss": 0.1347, "step": 5135 }, { "epoch": 0.02144687101000576, "grad_norm": 2.2505181662729266, "learning_rate": 1.3659182379659258e-05, "loss": 0.118, "step": 5140 }, { "epoch": 0.021467733724996035, "grad_norm": 1.8207293207706619, "learning_rate": 1.3652542360609713e-05, "loss": 0.1238, "step": 5145 }, { "epoch": 0.021488596439986313, "grad_norm": 2.689544060470715, "learning_rate": 1.3645912015718348e-05, "loss": 0.1245, "step": 5150 }, { "epoch": 0.021509459154976592, "grad_norm": 2.365196426095714, "learning_rate": 1.3639291321516726e-05, "loss": 0.1396, "step": 5155 }, { "epoch": 0.02153032186996687, "grad_norm": 1.8313717535536729, "learning_rate": 1.3632680254616031e-05, "loss": 0.119, "step": 5160 }, { "epoch": 0.02155118458495715, "grad_norm": 1.9831528370507732, "learning_rate": 1.362607879170674e-05, "loss": 0.1278, "step": 5165 }, { "epoch": 0.021572047299947425, "grad_norm": 1.9272770008700533, "learning_rate": 1.3619486909558252e-05, "loss": 0.1114, "step": 5170 }, { "epoch": 0.021592910014937704, "grad_norm": 14.708562061931874, "learning_rate": 1.3612904585018568e-05, "loss": 0.1151, "step": 5175 }, { "epoch": 0.021613772729927982, "grad_norm": 2.6088277033836826, "learning_rate": 1.3606331795013932e-05, "loss": 0.1064, "step": 5180 }, { "epoch": 0.02163463544491826, "grad_norm": 2.610867989954861, "learning_rate": 1.3599768516548497e-05, "loss": 0.1185, "step": 5185 }, { "epoch": 0.021655498159908537, "grad_norm": 3.500163617279604, "learning_rate": 1.3593214726704001e-05, "loss": 0.0803, "step": 5190 }, { "epoch": 0.021676360874898815, "grad_norm": 2.693169307976253, "learning_rate": 1.3586670402639408e-05, "loss": 0.101, "step": 5195 }, { "epoch": 0.021697223589889094, "grad_norm": 2.1067619694343684, "learning_rate": 1.3580135521590583e-05, "loss": 0.0994, "step": 5200 }, { "epoch": 0.021718086304879373, "grad_norm": 1.882930536998214, "learning_rate": 1.3573610060869967e-05, "loss": 0.1204, "step": 5205 }, { "epoch": 0.021738949019869648, "grad_norm": 1.8557385090254512, "learning_rate": 1.3567093997866237e-05, "loss": 0.1556, "step": 5210 }, { "epoch": 0.021759811734859927, "grad_norm": 1.6351255505273052, "learning_rate": 1.3560587310043976e-05, "loss": 0.1518, "step": 5215 }, { "epoch": 0.021780674449850206, "grad_norm": 1.905618791400881, "learning_rate": 1.3554089974943356e-05, "loss": 0.0967, "step": 5220 }, { "epoch": 0.021801537164840484, "grad_norm": 2.189323228981547, "learning_rate": 1.35476019701798e-05, "loss": 0.1122, "step": 5225 }, { "epoch": 0.021822399879830763, "grad_norm": 2.1075658715195833, "learning_rate": 1.3541123273443673e-05, "loss": 0.1124, "step": 5230 }, { "epoch": 0.02184326259482104, "grad_norm": 3.854365588737092, "learning_rate": 1.3534653862499934e-05, "loss": 0.1082, "step": 5235 }, { "epoch": 0.021864125309811317, "grad_norm": 2.4614029794913215, "learning_rate": 1.3528193715187852e-05, "loss": 0.0983, "step": 5240 }, { "epoch": 0.021884988024801596, "grad_norm": 1.974110896556456, "learning_rate": 1.3521742809420649e-05, "loss": 0.1154, "step": 5245 }, { "epoch": 0.021905850739791875, "grad_norm": 1.7441083231743473, "learning_rate": 1.3515301123185213e-05, "loss": 0.1189, "step": 5250 }, { "epoch": 0.02192671345478215, "grad_norm": 2.1031070153685483, "learning_rate": 1.3508868634541772e-05, "loss": 0.1179, "step": 5255 }, { "epoch": 0.02194757616977243, "grad_norm": 2.2181422672572286, "learning_rate": 1.3502445321623565e-05, "loss": 0.114, "step": 5260 }, { "epoch": 0.021968438884762707, "grad_norm": 2.3233474748147693, "learning_rate": 1.349603116263656e-05, "loss": 0.1164, "step": 5265 }, { "epoch": 0.021989301599752986, "grad_norm": 2.6362562395171865, "learning_rate": 1.348962613585912e-05, "loss": 0.1016, "step": 5270 }, { "epoch": 0.022010164314743265, "grad_norm": 2.596963962586209, "learning_rate": 1.3483230219641691e-05, "loss": 0.1606, "step": 5275 }, { "epoch": 0.02203102702973354, "grad_norm": 2.226117206358534, "learning_rate": 1.3476843392406526e-05, "loss": 0.09, "step": 5280 }, { "epoch": 0.02205188974472382, "grad_norm": 2.3227980298563233, "learning_rate": 1.347046563264734e-05, "loss": 0.1207, "step": 5285 }, { "epoch": 0.022072752459714098, "grad_norm": 2.7250948243615043, "learning_rate": 1.3464096918929032e-05, "loss": 0.1159, "step": 5290 }, { "epoch": 0.022093615174704376, "grad_norm": 1.6759367075895824, "learning_rate": 1.3457737229887374e-05, "loss": 0.1133, "step": 5295 }, { "epoch": 0.02211447788969465, "grad_norm": 2.618693728911071, "learning_rate": 1.3451386544228706e-05, "loss": 0.119, "step": 5300 }, { "epoch": 0.02213534060468493, "grad_norm": 2.8944238078957305, "learning_rate": 1.3445044840729644e-05, "loss": 0.1201, "step": 5305 }, { "epoch": 0.02215620331967521, "grad_norm": 2.4067135474864427, "learning_rate": 1.3438712098236779e-05, "loss": 0.1389, "step": 5310 }, { "epoch": 0.022177066034665488, "grad_norm": 2.4206010963228564, "learning_rate": 1.343238829566638e-05, "loss": 0.1066, "step": 5315 }, { "epoch": 0.022197928749655767, "grad_norm": 1.9340985699945425, "learning_rate": 1.3426073412004101e-05, "loss": 0.1248, "step": 5320 }, { "epoch": 0.022218791464646042, "grad_norm": 1.5982396235745144, "learning_rate": 1.3419767426304684e-05, "loss": 0.0951, "step": 5325 }, { "epoch": 0.02223965417963632, "grad_norm": 2.208865022237136, "learning_rate": 1.341347031769167e-05, "loss": 0.1071, "step": 5330 }, { "epoch": 0.0222605168946266, "grad_norm": 2.3358509189236956, "learning_rate": 1.340718206535711e-05, "loss": 0.1303, "step": 5335 }, { "epoch": 0.02228137960961688, "grad_norm": 1.6669831671867843, "learning_rate": 1.3400902648561274e-05, "loss": 0.1049, "step": 5340 }, { "epoch": 0.022302242324607154, "grad_norm": 1.8341064240010503, "learning_rate": 1.3394632046632363e-05, "loss": 0.1209, "step": 5345 }, { "epoch": 0.022323105039597432, "grad_norm": 2.3509950716336, "learning_rate": 1.3388370238966228e-05, "loss": 0.1175, "step": 5350 }, { "epoch": 0.02234396775458771, "grad_norm": 2.051303302422621, "learning_rate": 1.3382117205026077e-05, "loss": 0.1151, "step": 5355 }, { "epoch": 0.02236483046957799, "grad_norm": 1.759416815383369, "learning_rate": 1.3375872924342202e-05, "loss": 0.1044, "step": 5360 }, { "epoch": 0.02238569318456827, "grad_norm": 1.9111177689084564, "learning_rate": 1.336963737651169e-05, "loss": 0.1044, "step": 5365 }, { "epoch": 0.022406555899558544, "grad_norm": 1.9926480454818647, "learning_rate": 1.3363410541198148e-05, "loss": 0.1099, "step": 5370 }, { "epoch": 0.022427418614548823, "grad_norm": 1.9552901976203434, "learning_rate": 1.3357192398131416e-05, "loss": 0.113, "step": 5375 }, { "epoch": 0.0224482813295391, "grad_norm": 2.3248780758146697, "learning_rate": 1.3350982927107306e-05, "loss": 0.1276, "step": 5380 }, { "epoch": 0.02246914404452938, "grad_norm": 2.5367379047184393, "learning_rate": 1.3344782107987311e-05, "loss": 0.0885, "step": 5385 }, { "epoch": 0.022490006759519655, "grad_norm": 1.5843447815564051, "learning_rate": 1.3338589920698335e-05, "loss": 0.1151, "step": 5390 }, { "epoch": 0.022510869474509934, "grad_norm": 2.521092248240235, "learning_rate": 1.3332406345232424e-05, "loss": 0.1067, "step": 5395 }, { "epoch": 0.022531732189500213, "grad_norm": 2.58133047699455, "learning_rate": 1.332623136164649e-05, "loss": 0.1415, "step": 5400 }, { "epoch": 0.02255259490449049, "grad_norm": 1.7808829529336008, "learning_rate": 1.3320064950062037e-05, "loss": 0.1077, "step": 5405 }, { "epoch": 0.02257345761948077, "grad_norm": 1.485969956583892, "learning_rate": 1.3313907090664915e-05, "loss": 0.1024, "step": 5410 }, { "epoch": 0.022594320334471046, "grad_norm": 3.5489317754972034, "learning_rate": 1.3307757763705014e-05, "loss": 0.1216, "step": 5415 }, { "epoch": 0.022615183049461324, "grad_norm": 2.3546248850375475, "learning_rate": 1.3301616949496033e-05, "loss": 0.1169, "step": 5420 }, { "epoch": 0.022636045764451603, "grad_norm": 2.718096265558289, "learning_rate": 1.3295484628415197e-05, "loss": 0.1105, "step": 5425 }, { "epoch": 0.022656908479441882, "grad_norm": 1.857446579298366, "learning_rate": 1.3289360780903e-05, "loss": 0.1002, "step": 5430 }, { "epoch": 0.022677771194432157, "grad_norm": 2.7493871187764696, "learning_rate": 1.328324538746294e-05, "loss": 0.1103, "step": 5435 }, { "epoch": 0.022698633909422436, "grad_norm": 2.499787395635012, "learning_rate": 1.3277138428661254e-05, "loss": 0.1156, "step": 5440 }, { "epoch": 0.022719496624412715, "grad_norm": 1.6823675961241682, "learning_rate": 1.3271039885126677e-05, "loss": 0.0909, "step": 5445 }, { "epoch": 0.022740359339402993, "grad_norm": 2.550847351061393, "learning_rate": 1.326494973755016e-05, "loss": 0.1148, "step": 5450 }, { "epoch": 0.022761222054393272, "grad_norm": 2.2964276711157217, "learning_rate": 1.3258867966684632e-05, "loss": 0.1098, "step": 5455 }, { "epoch": 0.022782084769383548, "grad_norm": 2.4555901217776572, "learning_rate": 1.3252794553344725e-05, "loss": 0.1068, "step": 5460 }, { "epoch": 0.022802947484373826, "grad_norm": 1.9953734976927102, "learning_rate": 1.3246729478406544e-05, "loss": 0.1128, "step": 5465 }, { "epoch": 0.022823810199364105, "grad_norm": 1.8886367833071027, "learning_rate": 1.3240672722807399e-05, "loss": 0.1059, "step": 5470 }, { "epoch": 0.022844672914354384, "grad_norm": 1.75646543727413, "learning_rate": 1.323462426754555e-05, "loss": 0.1028, "step": 5475 }, { "epoch": 0.02286553562934466, "grad_norm": 2.5482449463720065, "learning_rate": 1.322858409367997e-05, "loss": 0.1035, "step": 5480 }, { "epoch": 0.022886398344334938, "grad_norm": 2.324812430528143, "learning_rate": 1.3222552182330084e-05, "loss": 0.1184, "step": 5485 }, { "epoch": 0.022907261059325217, "grad_norm": 7.524221063384772, "learning_rate": 1.3216528514675524e-05, "loss": 0.1104, "step": 5490 }, { "epoch": 0.022928123774315495, "grad_norm": 5.093252941497807, "learning_rate": 1.321051307195589e-05, "loss": 0.1139, "step": 5495 }, { "epoch": 0.02294898648930577, "grad_norm": 1.6551971164665096, "learning_rate": 1.3204505835470498e-05, "loss": 0.1022, "step": 5500 }, { "epoch": 0.02296984920429605, "grad_norm": 3.0402665478112216, "learning_rate": 1.3198506786578128e-05, "loss": 0.113, "step": 5505 }, { "epoch": 0.022990711919286328, "grad_norm": 1.6716619748007926, "learning_rate": 1.3192515906696797e-05, "loss": 0.1254, "step": 5510 }, { "epoch": 0.023011574634276607, "grad_norm": 1.9245487354291517, "learning_rate": 1.3186533177303513e-05, "loss": 0.1269, "step": 5515 }, { "epoch": 0.023032437349266886, "grad_norm": 2.6120397220672538, "learning_rate": 1.318055857993402e-05, "loss": 0.1255, "step": 5520 }, { "epoch": 0.02305330006425716, "grad_norm": 3.380646600836914, "learning_rate": 1.317459209618258e-05, "loss": 0.1344, "step": 5525 }, { "epoch": 0.02307416277924744, "grad_norm": 2.924060369927257, "learning_rate": 1.3168633707701726e-05, "loss": 0.1065, "step": 5530 }, { "epoch": 0.02309502549423772, "grad_norm": 2.4084546764320387, "learning_rate": 1.3162683396202004e-05, "loss": 0.1096, "step": 5535 }, { "epoch": 0.023115888209227997, "grad_norm": 2.182533912657349, "learning_rate": 1.315674114345179e-05, "loss": 0.0981, "step": 5540 }, { "epoch": 0.023136750924218272, "grad_norm": 2.397206130368788, "learning_rate": 1.3150806931276992e-05, "loss": 0.1257, "step": 5545 }, { "epoch": 0.02315761363920855, "grad_norm": 2.09771257994739, "learning_rate": 1.3144880741560864e-05, "loss": 0.1303, "step": 5550 }, { "epoch": 0.02317847635419883, "grad_norm": 3.410052649674074, "learning_rate": 1.3138962556243748e-05, "loss": 0.1201, "step": 5555 }, { "epoch": 0.02319933906918911, "grad_norm": 2.1806618172473606, "learning_rate": 1.313305235732286e-05, "loss": 0.1158, "step": 5560 }, { "epoch": 0.023220201784179387, "grad_norm": 2.0317937990307207, "learning_rate": 1.3127150126852044e-05, "loss": 0.1183, "step": 5565 }, { "epoch": 0.023241064499169663, "grad_norm": 2.8178238952525394, "learning_rate": 1.3121255846941552e-05, "loss": 0.1075, "step": 5570 }, { "epoch": 0.02326192721415994, "grad_norm": 1.874661289213823, "learning_rate": 1.311536949975782e-05, "loss": 0.0958, "step": 5575 }, { "epoch": 0.02328278992915022, "grad_norm": 2.5016819820918883, "learning_rate": 1.3109491067523226e-05, "loss": 0.1157, "step": 5580 }, { "epoch": 0.0233036526441405, "grad_norm": 2.2951391645819013, "learning_rate": 1.3103620532515888e-05, "loss": 0.1168, "step": 5585 }, { "epoch": 0.023324515359130774, "grad_norm": 1.5432612296731592, "learning_rate": 1.3097757877069414e-05, "loss": 0.0982, "step": 5590 }, { "epoch": 0.023345378074121053, "grad_norm": 1.6685077068575627, "learning_rate": 1.30919030835727e-05, "loss": 0.1009, "step": 5595 }, { "epoch": 0.023366240789111332, "grad_norm": 1.8715370075044493, "learning_rate": 1.3086056134469694e-05, "loss": 0.1243, "step": 5600 }, { "epoch": 0.02338710350410161, "grad_norm": 2.1456390919459847, "learning_rate": 1.308021701225918e-05, "loss": 0.1022, "step": 5605 }, { "epoch": 0.02340796621909189, "grad_norm": 2.472401735937011, "learning_rate": 1.3074385699494558e-05, "loss": 0.1296, "step": 5610 }, { "epoch": 0.023428828934082165, "grad_norm": 1.947215757664515, "learning_rate": 1.306856217878363e-05, "loss": 0.122, "step": 5615 }, { "epoch": 0.023449691649072443, "grad_norm": 1.9309895696256782, "learning_rate": 1.3062746432788368e-05, "loss": 0.1216, "step": 5620 }, { "epoch": 0.023470554364062722, "grad_norm": 2.771529383916491, "learning_rate": 1.3056938444224709e-05, "loss": 0.0876, "step": 5625 }, { "epoch": 0.023491417079053, "grad_norm": 1.854178561045456, "learning_rate": 1.3051138195862342e-05, "loss": 0.1121, "step": 5630 }, { "epoch": 0.023512279794043276, "grad_norm": 2.5920589291916816, "learning_rate": 1.3045345670524481e-05, "loss": 0.0901, "step": 5635 }, { "epoch": 0.023533142509033555, "grad_norm": 2.0365533657556667, "learning_rate": 1.303956085108766e-05, "loss": 0.1161, "step": 5640 }, { "epoch": 0.023554005224023834, "grad_norm": 2.286035123583612, "learning_rate": 1.3033783720481523e-05, "loss": 0.1197, "step": 5645 }, { "epoch": 0.023574867939014112, "grad_norm": 2.8382485001369333, "learning_rate": 1.30280142616886e-05, "loss": 0.1091, "step": 5650 }, { "epoch": 0.02359573065400439, "grad_norm": 1.388573896213297, "learning_rate": 1.3022252457744109e-05, "loss": 0.1228, "step": 5655 }, { "epoch": 0.023616593368994666, "grad_norm": 2.085689485828943, "learning_rate": 1.3016498291735744e-05, "loss": 0.1018, "step": 5660 }, { "epoch": 0.023637456083984945, "grad_norm": 2.1593347543999584, "learning_rate": 1.3010751746803456e-05, "loss": 0.0983, "step": 5665 }, { "epoch": 0.023658318798975224, "grad_norm": 2.0217091670101186, "learning_rate": 1.3005012806139262e-05, "loss": 0.0981, "step": 5670 }, { "epoch": 0.023679181513965503, "grad_norm": 3.035510894241554, "learning_rate": 1.2999281452987027e-05, "loss": 0.1157, "step": 5675 }, { "epoch": 0.023700044228955778, "grad_norm": 2.2431048154980324, "learning_rate": 1.2993557670642266e-05, "loss": 0.1111, "step": 5680 }, { "epoch": 0.023720906943946057, "grad_norm": 1.792223387649859, "learning_rate": 1.2987841442451926e-05, "loss": 0.1272, "step": 5685 }, { "epoch": 0.023741769658936335, "grad_norm": 3.158577260548826, "learning_rate": 1.2982132751814197e-05, "loss": 0.1101, "step": 5690 }, { "epoch": 0.023762632373926614, "grad_norm": 2.454443845927068, "learning_rate": 1.2976431582178313e-05, "loss": 0.0928, "step": 5695 }, { "epoch": 0.023783495088916893, "grad_norm": 1.7768340150483029, "learning_rate": 1.2970737917044326e-05, "loss": 0.1275, "step": 5700 }, { "epoch": 0.023804357803907168, "grad_norm": 2.2921312040412296, "learning_rate": 1.2965051739962933e-05, "loss": 0.0994, "step": 5705 }, { "epoch": 0.023825220518897447, "grad_norm": 1.6796767810539597, "learning_rate": 1.295937303453526e-05, "loss": 0.0988, "step": 5710 }, { "epoch": 0.023846083233887726, "grad_norm": 3.008850633063917, "learning_rate": 1.295370178441267e-05, "loss": 0.1017, "step": 5715 }, { "epoch": 0.023866945948878004, "grad_norm": 2.7200480033956844, "learning_rate": 1.294803797329656e-05, "loss": 0.0983, "step": 5720 }, { "epoch": 0.02388780866386828, "grad_norm": 1.8318987462517935, "learning_rate": 1.2942381584938175e-05, "loss": 0.0949, "step": 5725 }, { "epoch": 0.02390867137885856, "grad_norm": 1.630337082337026, "learning_rate": 1.2936732603138396e-05, "loss": 0.101, "step": 5730 }, { "epoch": 0.023929534093848837, "grad_norm": 4.222104569280208, "learning_rate": 1.2931091011747553e-05, "loss": 0.0883, "step": 5735 }, { "epoch": 0.023950396808839116, "grad_norm": 1.8228648679207118, "learning_rate": 1.2925456794665245e-05, "loss": 0.0933, "step": 5740 }, { "epoch": 0.023971259523829395, "grad_norm": 2.2068353287139972, "learning_rate": 1.2919829935840127e-05, "loss": 0.1102, "step": 5745 }, { "epoch": 0.02399212223881967, "grad_norm": 2.100768485363344, "learning_rate": 1.2914210419269718e-05, "loss": 0.1031, "step": 5750 }, { "epoch": 0.02401298495380995, "grad_norm": 2.0498915543238287, "learning_rate": 1.2908598229000223e-05, "loss": 0.094, "step": 5755 }, { "epoch": 0.024033847668800228, "grad_norm": 1.684622126132924, "learning_rate": 1.2902993349126341e-05, "loss": 0.1123, "step": 5760 }, { "epoch": 0.024054710383790506, "grad_norm": 2.121400217486185, "learning_rate": 1.2897395763791063e-05, "loss": 0.1044, "step": 5765 }, { "epoch": 0.02407557309878078, "grad_norm": 2.1673462611178818, "learning_rate": 1.28918054571855e-05, "loss": 0.1105, "step": 5770 }, { "epoch": 0.02409643581377106, "grad_norm": 2.303398796109367, "learning_rate": 1.2886222413548684e-05, "loss": 0.113, "step": 5775 }, { "epoch": 0.02411729852876134, "grad_norm": 1.6064811876113234, "learning_rate": 1.2880646617167378e-05, "loss": 0.105, "step": 5780 }, { "epoch": 0.024138161243751618, "grad_norm": 2.111622568404812, "learning_rate": 1.2875078052375907e-05, "loss": 0.0957, "step": 5785 }, { "epoch": 0.024159023958741893, "grad_norm": 2.059521340242364, "learning_rate": 1.2869516703555962e-05, "loss": 0.0898, "step": 5790 }, { "epoch": 0.024179886673732172, "grad_norm": 1.653258165903517, "learning_rate": 1.2863962555136412e-05, "loss": 0.091, "step": 5795 }, { "epoch": 0.02420074938872245, "grad_norm": 1.9058028070974613, "learning_rate": 1.2858415591593128e-05, "loss": 0.1128, "step": 5800 }, { "epoch": 0.02422161210371273, "grad_norm": 2.6116035149282375, "learning_rate": 1.2852875797448799e-05, "loss": 0.1074, "step": 5805 }, { "epoch": 0.024242474818703008, "grad_norm": 2.0878649099335744, "learning_rate": 1.2847343157272753e-05, "loss": 0.1529, "step": 5810 }, { "epoch": 0.024263337533693283, "grad_norm": 2.3090615176480918, "learning_rate": 1.2841817655680772e-05, "loss": 0.0991, "step": 5815 }, { "epoch": 0.024284200248683562, "grad_norm": 1.4297929757414, "learning_rate": 1.2836299277334916e-05, "loss": 0.1129, "step": 5820 }, { "epoch": 0.02430506296367384, "grad_norm": 2.655821451227961, "learning_rate": 1.2830788006943334e-05, "loss": 0.1165, "step": 5825 }, { "epoch": 0.02432592567866412, "grad_norm": 2.3763162824819672, "learning_rate": 1.2825283829260106e-05, "loss": 0.098, "step": 5830 }, { "epoch": 0.024346788393654395, "grad_norm": 2.4429350236117404, "learning_rate": 1.2819786729085055e-05, "loss": 0.1067, "step": 5835 }, { "epoch": 0.024367651108644674, "grad_norm": 2.402006481244084, "learning_rate": 1.2814296691263552e-05, "loss": 0.1096, "step": 5840 }, { "epoch": 0.024388513823634952, "grad_norm": 1.9696921060103592, "learning_rate": 1.2808813700686382e-05, "loss": 0.101, "step": 5845 }, { "epoch": 0.02440937653862523, "grad_norm": 2.4112777175828772, "learning_rate": 1.2803337742289532e-05, "loss": 0.0962, "step": 5850 }, { "epoch": 0.02443023925361551, "grad_norm": 2.71518393071984, "learning_rate": 1.2797868801054033e-05, "loss": 0.1073, "step": 5855 }, { "epoch": 0.024451101968605785, "grad_norm": 2.02169966755519, "learning_rate": 1.2792406862005784e-05, "loss": 0.093, "step": 5860 }, { "epoch": 0.024471964683596064, "grad_norm": 2.703908704182103, "learning_rate": 1.278695191021539e-05, "loss": 0.1149, "step": 5865 }, { "epoch": 0.024492827398586343, "grad_norm": 2.231980139513336, "learning_rate": 1.278150393079797e-05, "loss": 0.1083, "step": 5870 }, { "epoch": 0.02451369011357662, "grad_norm": 2.240424792328713, "learning_rate": 1.2776062908913002e-05, "loss": 0.1055, "step": 5875 }, { "epoch": 0.024534552828566897, "grad_norm": 2.6556866842180744, "learning_rate": 1.2770628829764154e-05, "loss": 0.1183, "step": 5880 }, { "epoch": 0.024555415543557176, "grad_norm": 2.105903612749471, "learning_rate": 1.2765201678599101e-05, "loss": 0.1306, "step": 5885 }, { "epoch": 0.024576278258547454, "grad_norm": 2.011318488370724, "learning_rate": 1.2759781440709378e-05, "loss": 0.1013, "step": 5890 }, { "epoch": 0.024597140973537733, "grad_norm": 2.5114791654780144, "learning_rate": 1.2754368101430186e-05, "loss": 0.1075, "step": 5895 }, { "epoch": 0.024618003688528012, "grad_norm": 2.4880677436487613, "learning_rate": 1.2748961646140247e-05, "loss": 0.1137, "step": 5900 }, { "epoch": 0.024638866403518287, "grad_norm": 2.6426277118067745, "learning_rate": 1.2743562060261641e-05, "loss": 0.0971, "step": 5905 }, { "epoch": 0.024659729118508566, "grad_norm": 1.919599409040053, "learning_rate": 1.2738169329259609e-05, "loss": 0.1104, "step": 5910 }, { "epoch": 0.024680591833498845, "grad_norm": 1.912397055478628, "learning_rate": 1.273278343864243e-05, "loss": 0.1044, "step": 5915 }, { "epoch": 0.024701454548489123, "grad_norm": 2.1598371377132337, "learning_rate": 1.2727404373961232e-05, "loss": 0.1141, "step": 5920 }, { "epoch": 0.0247223172634794, "grad_norm": 1.5169780780290898, "learning_rate": 1.2722032120809826e-05, "loss": 0.0744, "step": 5925 }, { "epoch": 0.024743179978469677, "grad_norm": 1.9241266273203035, "learning_rate": 1.2716666664824568e-05, "loss": 0.0835, "step": 5930 }, { "epoch": 0.024764042693459956, "grad_norm": 1.5672139066349835, "learning_rate": 1.2711307991684176e-05, "loss": 0.0948, "step": 5935 }, { "epoch": 0.024784905408450235, "grad_norm": 2.2968846773409135, "learning_rate": 1.2705956087109574e-05, "loss": 0.1229, "step": 5940 }, { "epoch": 0.024805768123440514, "grad_norm": 1.8377885794002973, "learning_rate": 1.2700610936863738e-05, "loss": 0.093, "step": 5945 }, { "epoch": 0.02482663083843079, "grad_norm": 1.5894502911316815, "learning_rate": 1.2695272526751532e-05, "loss": 0.1124, "step": 5950 }, { "epoch": 0.024847493553421068, "grad_norm": 1.6714436628799458, "learning_rate": 1.268994084261955e-05, "loss": 0.0892, "step": 5955 }, { "epoch": 0.024868356268411346, "grad_norm": 1.8842728082168543, "learning_rate": 1.2684615870355964e-05, "loss": 0.1002, "step": 5960 }, { "epoch": 0.024889218983401625, "grad_norm": 2.41247307623765, "learning_rate": 1.2679297595890354e-05, "loss": 0.1161, "step": 5965 }, { "epoch": 0.0249100816983919, "grad_norm": 1.655168612869422, "learning_rate": 1.2673986005193573e-05, "loss": 0.0946, "step": 5970 }, { "epoch": 0.02493094441338218, "grad_norm": 1.3671285835892601, "learning_rate": 1.2668681084277568e-05, "loss": 0.1015, "step": 5975 }, { "epoch": 0.024951807128372458, "grad_norm": 2.838669933304841, "learning_rate": 1.2663382819195242e-05, "loss": 0.102, "step": 5980 }, { "epoch": 0.024972669843362737, "grad_norm": 2.2031400451648433, "learning_rate": 1.2658091196040292e-05, "loss": 0.0948, "step": 5985 }, { "epoch": 0.024993532558353015, "grad_norm": 1.7707716273847027, "learning_rate": 1.2652806200947059e-05, "loss": 0.0949, "step": 5990 }, { "epoch": 0.02501439527334329, "grad_norm": 2.017576599174439, "learning_rate": 1.2647527820090375e-05, "loss": 0.111, "step": 5995 }, { "epoch": 0.02503525798833357, "grad_norm": 1.8279246364187876, "learning_rate": 1.2642256039685408e-05, "loss": 0.108, "step": 6000 }, { "epoch": 0.025056120703323848, "grad_norm": 2.2458365305337447, "learning_rate": 1.2636990845987518e-05, "loss": 0.1176, "step": 6005 }, { "epoch": 0.025076983418314127, "grad_norm": 2.4087577246322804, "learning_rate": 1.2631732225292094e-05, "loss": 0.1091, "step": 6010 }, { "epoch": 0.025097846133304402, "grad_norm": 2.108074091620855, "learning_rate": 1.2626480163934425e-05, "loss": 0.091, "step": 6015 }, { "epoch": 0.02511870884829468, "grad_norm": 3.050527213437986, "learning_rate": 1.2621234648289523e-05, "loss": 0.1126, "step": 6020 }, { "epoch": 0.02513957156328496, "grad_norm": 1.9034359241898284, "learning_rate": 1.2615995664771998e-05, "loss": 0.1094, "step": 6025 }, { "epoch": 0.02516043427827524, "grad_norm": 3.382110919681313, "learning_rate": 1.2610763199835901e-05, "loss": 0.1042, "step": 6030 }, { "epoch": 0.025181296993265517, "grad_norm": 1.6701403858957855, "learning_rate": 1.2605537239974577e-05, "loss": 0.1127, "step": 6035 }, { "epoch": 0.025202159708255793, "grad_norm": 2.194757505953987, "learning_rate": 1.2600317771720514e-05, "loss": 0.1216, "step": 6040 }, { "epoch": 0.02522302242324607, "grad_norm": 2.03755520019822, "learning_rate": 1.2595104781645214e-05, "loss": 0.1049, "step": 6045 }, { "epoch": 0.02524388513823635, "grad_norm": 2.421258417255674, "learning_rate": 1.2589898256359025e-05, "loss": 0.1041, "step": 6050 }, { "epoch": 0.02526474785322663, "grad_norm": 2.3607574252674755, "learning_rate": 1.258469818251101e-05, "loss": 0.0939, "step": 6055 }, { "epoch": 0.025285610568216904, "grad_norm": 1.9225245592447187, "learning_rate": 1.257950454678881e-05, "loss": 0.1032, "step": 6060 }, { "epoch": 0.025306473283207183, "grad_norm": 1.7147271424398898, "learning_rate": 1.257431733591848e-05, "loss": 0.1003, "step": 6065 }, { "epoch": 0.02532733599819746, "grad_norm": 1.8571452342868031, "learning_rate": 1.2569136536664361e-05, "loss": 0.0907, "step": 6070 }, { "epoch": 0.02534819871318774, "grad_norm": 1.6250967277456365, "learning_rate": 1.2563962135828943e-05, "loss": 0.1107, "step": 6075 }, { "epoch": 0.025369061428178016, "grad_norm": 2.3169497526007463, "learning_rate": 1.2558794120252706e-05, "loss": 0.0882, "step": 6080 }, { "epoch": 0.025389924143168294, "grad_norm": 1.6944948926192995, "learning_rate": 1.2553632476814e-05, "loss": 0.0938, "step": 6085 }, { "epoch": 0.025410786858158573, "grad_norm": 1.7619964623897064, "learning_rate": 1.2548477192428885e-05, "loss": 0.0965, "step": 6090 }, { "epoch": 0.025431649573148852, "grad_norm": 2.214019902672365, "learning_rate": 1.2543328254051008e-05, "loss": 0.1132, "step": 6095 }, { "epoch": 0.02545251228813913, "grad_norm": 1.8430724028847683, "learning_rate": 1.253818564867146e-05, "loss": 0.1155, "step": 6100 }, { "epoch": 0.025473375003129406, "grad_norm": 1.5992565233352587, "learning_rate": 1.2533049363318626e-05, "loss": 0.0996, "step": 6105 }, { "epoch": 0.025494237718119685, "grad_norm": 2.470838867383936, "learning_rate": 1.2527919385058067e-05, "loss": 0.1022, "step": 6110 }, { "epoch": 0.025515100433109963, "grad_norm": 1.2198101868548201, "learning_rate": 1.252279570099237e-05, "loss": 0.0793, "step": 6115 }, { "epoch": 0.025535963148100242, "grad_norm": 1.4953547104416496, "learning_rate": 1.2517678298261013e-05, "loss": 0.1044, "step": 6120 }, { "epoch": 0.025556825863090517, "grad_norm": 1.6492649678837268, "learning_rate": 1.2512567164040237e-05, "loss": 0.0917, "step": 6125 }, { "epoch": 0.025577688578080796, "grad_norm": 2.2575739902841425, "learning_rate": 1.2507462285542902e-05, "loss": 0.1109, "step": 6130 }, { "epoch": 0.025598551293071075, "grad_norm": 1.906121948910402, "learning_rate": 1.2502363650018357e-05, "loss": 0.1143, "step": 6135 }, { "epoch": 0.025619414008061354, "grad_norm": 1.5014233798883128, "learning_rate": 1.2497271244752307e-05, "loss": 0.0808, "step": 6140 }, { "epoch": 0.025640276723051632, "grad_norm": 3.6989315904712585, "learning_rate": 1.249218505706668e-05, "loss": 0.1079, "step": 6145 }, { "epoch": 0.025661139438041908, "grad_norm": 1.4432909816694968, "learning_rate": 1.2487105074319487e-05, "loss": 0.0911, "step": 6150 }, { "epoch": 0.025682002153032187, "grad_norm": 1.687694314463687, "learning_rate": 1.2482031283904705e-05, "loss": 0.1154, "step": 6155 }, { "epoch": 0.025702864868022465, "grad_norm": 2.6230791928717823, "learning_rate": 1.2476963673252127e-05, "loss": 0.1032, "step": 6160 }, { "epoch": 0.025723727583012744, "grad_norm": 2.053380539007455, "learning_rate": 1.2471902229827249e-05, "loss": 0.1091, "step": 6165 }, { "epoch": 0.02574459029800302, "grad_norm": 1.7741295242178485, "learning_rate": 1.2466846941131128e-05, "loss": 0.0963, "step": 6170 }, { "epoch": 0.025765453012993298, "grad_norm": 1.5435478161437821, "learning_rate": 1.2461797794700255e-05, "loss": 0.0925, "step": 6175 }, { "epoch": 0.025786315727983577, "grad_norm": 2.2064038213255137, "learning_rate": 1.245675477810643e-05, "loss": 0.1045, "step": 6180 }, { "epoch": 0.025807178442973856, "grad_norm": 2.0691738593850717, "learning_rate": 1.2451717878956626e-05, "loss": 0.1039, "step": 6185 }, { "epoch": 0.025828041157964134, "grad_norm": 1.173562144537702, "learning_rate": 1.244668708489287e-05, "loss": 0.0973, "step": 6190 }, { "epoch": 0.02584890387295441, "grad_norm": 1.4129045680423618, "learning_rate": 1.2441662383592112e-05, "loss": 0.0879, "step": 6195 }, { "epoch": 0.02586976658794469, "grad_norm": 2.039912965117092, "learning_rate": 1.243664376276609e-05, "loss": 0.1089, "step": 6200 }, { "epoch": 0.025890629302934967, "grad_norm": 1.7544387176248897, "learning_rate": 1.2431631210161222e-05, "loss": 0.1181, "step": 6205 }, { "epoch": 0.025911492017925246, "grad_norm": 1.8795237898192236, "learning_rate": 1.2426624713558459e-05, "loss": 0.1128, "step": 6210 }, { "epoch": 0.02593235473291552, "grad_norm": 2.0783636210080503, "learning_rate": 1.2421624260773178e-05, "loss": 0.1056, "step": 6215 }, { "epoch": 0.0259532174479058, "grad_norm": 1.93229903046523, "learning_rate": 1.2416629839655046e-05, "loss": 0.0925, "step": 6220 }, { "epoch": 0.02597408016289608, "grad_norm": 1.8244396609454931, "learning_rate": 1.24116414380879e-05, "loss": 0.1074, "step": 6225 }, { "epoch": 0.025994942877886357, "grad_norm": 1.9167752677394534, "learning_rate": 1.2406659043989627e-05, "loss": 0.0929, "step": 6230 }, { "epoch": 0.026015805592876636, "grad_norm": 1.8658345582445814, "learning_rate": 1.2401682645312035e-05, "loss": 0.0969, "step": 6235 }, { "epoch": 0.02603666830786691, "grad_norm": 1.9055187241706282, "learning_rate": 1.239671223004073e-05, "loss": 0.0992, "step": 6240 }, { "epoch": 0.02605753102285719, "grad_norm": 2.038499291443347, "learning_rate": 1.2391747786195005e-05, "loss": 0.1026, "step": 6245 }, { "epoch": 0.02607839373784747, "grad_norm": 2.749535215864943, "learning_rate": 1.2386789301827707e-05, "loss": 0.1265, "step": 6250 }, { "epoch": 0.026099256452837748, "grad_norm": 2.0977150165385403, "learning_rate": 1.2381836765025112e-05, "loss": 0.1031, "step": 6255 }, { "epoch": 0.026120119167828023, "grad_norm": 1.934447757864465, "learning_rate": 1.2376890163906831e-05, "loss": 0.1042, "step": 6260 }, { "epoch": 0.026140981882818302, "grad_norm": 1.7273272124387413, "learning_rate": 1.2371949486625658e-05, "loss": 0.1003, "step": 6265 }, { "epoch": 0.02616184459780858, "grad_norm": 1.9047446614547883, "learning_rate": 1.236701472136747e-05, "loss": 0.0984, "step": 6270 }, { "epoch": 0.02618270731279886, "grad_norm": 2.059119667989798, "learning_rate": 1.2362085856351103e-05, "loss": 0.099, "step": 6275 }, { "epoch": 0.026203570027789138, "grad_norm": 1.7642044880947159, "learning_rate": 1.2357162879828237e-05, "loss": 0.0894, "step": 6280 }, { "epoch": 0.026224432742779413, "grad_norm": 1.8599632846736285, "learning_rate": 1.2352245780083272e-05, "loss": 0.1039, "step": 6285 }, { "epoch": 0.026245295457769692, "grad_norm": 4.024013448386592, "learning_rate": 1.2347334545433216e-05, "loss": 0.1047, "step": 6290 }, { "epoch": 0.02626615817275997, "grad_norm": 2.1050534200493134, "learning_rate": 1.2342429164227569e-05, "loss": 0.0989, "step": 6295 }, { "epoch": 0.02628702088775025, "grad_norm": 1.999830418463403, "learning_rate": 1.2337529624848202e-05, "loss": 0.1053, "step": 6300 }, { "epoch": 0.026307883602740525, "grad_norm": 2.3105582642170788, "learning_rate": 1.2332635915709245e-05, "loss": 0.0868, "step": 6305 }, { "epoch": 0.026328746317730804, "grad_norm": 3.0161968864464126, "learning_rate": 1.2327748025256978e-05, "loss": 0.1166, "step": 6310 }, { "epoch": 0.026349609032721082, "grad_norm": 1.7879870086551874, "learning_rate": 1.2322865941969699e-05, "loss": 0.0871, "step": 6315 }, { "epoch": 0.02637047174771136, "grad_norm": 2.457418533827392, "learning_rate": 1.2317989654357627e-05, "loss": 0.111, "step": 6320 }, { "epoch": 0.02639133446270164, "grad_norm": 2.156944211538404, "learning_rate": 1.2313119150962785e-05, "loss": 0.0892, "step": 6325 }, { "epoch": 0.026412197177691915, "grad_norm": 3.7504938657794518, "learning_rate": 1.2308254420358877e-05, "loss": 0.1041, "step": 6330 }, { "epoch": 0.026433059892682194, "grad_norm": 2.206990573080726, "learning_rate": 1.2303395451151184e-05, "loss": 0.0973, "step": 6335 }, { "epoch": 0.026453922607672473, "grad_norm": 1.9684396617208124, "learning_rate": 1.229854223197646e-05, "loss": 0.0831, "step": 6340 }, { "epoch": 0.02647478532266275, "grad_norm": 1.9464090536015572, "learning_rate": 1.2293694751502795e-05, "loss": 0.0903, "step": 6345 }, { "epoch": 0.026495648037653027, "grad_norm": 1.5656893126450446, "learning_rate": 1.2288852998429535e-05, "loss": 0.0936, "step": 6350 }, { "epoch": 0.026516510752643305, "grad_norm": 2.1271921994428005, "learning_rate": 1.2284016961487146e-05, "loss": 0.1049, "step": 6355 }, { "epoch": 0.026537373467633584, "grad_norm": 2.1459240499995698, "learning_rate": 1.2279186629437118e-05, "loss": 0.0954, "step": 6360 }, { "epoch": 0.026558236182623863, "grad_norm": 1.8397296300417005, "learning_rate": 1.2274361991071847e-05, "loss": 0.1094, "step": 6365 }, { "epoch": 0.026579098897614138, "grad_norm": 1.6672798803945255, "learning_rate": 1.2269543035214542e-05, "loss": 0.0911, "step": 6370 }, { "epoch": 0.026599961612604417, "grad_norm": 2.0789889659540557, "learning_rate": 1.2264729750719086e-05, "loss": 0.1173, "step": 6375 }, { "epoch": 0.026620824327594696, "grad_norm": 1.7441836453515955, "learning_rate": 1.225992212646996e-05, "loss": 0.0835, "step": 6380 }, { "epoch": 0.026641687042584974, "grad_norm": 1.7498249956485854, "learning_rate": 1.2255120151382117e-05, "loss": 0.0871, "step": 6385 }, { "epoch": 0.026662549757575253, "grad_norm": 2.468284056892097, "learning_rate": 1.2250323814400877e-05, "loss": 0.0919, "step": 6390 }, { "epoch": 0.02668341247256553, "grad_norm": 1.551705407688676, "learning_rate": 1.2245533104501826e-05, "loss": 0.0973, "step": 6395 }, { "epoch": 0.026704275187555807, "grad_norm": 1.5428715049712831, "learning_rate": 1.22407480106907e-05, "loss": 0.1163, "step": 6400 }, { "epoch": 0.026725137902546086, "grad_norm": 1.4972985266052794, "learning_rate": 1.223596852200329e-05, "loss": 0.0976, "step": 6405 }, { "epoch": 0.026746000617536365, "grad_norm": 3.1963672768513796, "learning_rate": 1.2231194627505325e-05, "loss": 0.1068, "step": 6410 }, { "epoch": 0.02676686333252664, "grad_norm": 1.3442078253792962, "learning_rate": 1.2226426316292382e-05, "loss": 0.0952, "step": 6415 }, { "epoch": 0.02678772604751692, "grad_norm": 1.6504720739207694, "learning_rate": 1.222166357748976e-05, "loss": 0.0899, "step": 6420 }, { "epoch": 0.026808588762507198, "grad_norm": 2.1812709675486808, "learning_rate": 1.22169064002524e-05, "loss": 0.0906, "step": 6425 }, { "epoch": 0.026829451477497476, "grad_norm": 2.891068377238715, "learning_rate": 1.2212154773764762e-05, "loss": 0.0982, "step": 6430 }, { "epoch": 0.026850314192487755, "grad_norm": 1.6180905281148406, "learning_rate": 1.2207408687240732e-05, "loss": 0.0861, "step": 6435 }, { "epoch": 0.02687117690747803, "grad_norm": 1.8769913173476531, "learning_rate": 1.2202668129923519e-05, "loss": 0.0838, "step": 6440 }, { "epoch": 0.02689203962246831, "grad_norm": 1.9578271334340873, "learning_rate": 1.2197933091085541e-05, "loss": 0.1036, "step": 6445 }, { "epoch": 0.026912902337458588, "grad_norm": 1.7363673800492572, "learning_rate": 1.2193203560028342e-05, "loss": 0.0969, "step": 6450 }, { "epoch": 0.026933765052448867, "grad_norm": 2.3155240271198316, "learning_rate": 1.2188479526082475e-05, "loss": 0.0976, "step": 6455 }, { "epoch": 0.026954627767439142, "grad_norm": 2.188225277796133, "learning_rate": 1.2183760978607409e-05, "loss": 0.1077, "step": 6460 }, { "epoch": 0.02697549048242942, "grad_norm": 2.0744490846900914, "learning_rate": 1.2179047906991421e-05, "loss": 0.1129, "step": 6465 }, { "epoch": 0.0269963531974197, "grad_norm": 1.8946911273112397, "learning_rate": 1.2174340300651506e-05, "loss": 0.0922, "step": 6470 }, { "epoch": 0.027017215912409978, "grad_norm": 1.801518523371211, "learning_rate": 1.2169638149033272e-05, "loss": 0.1081, "step": 6475 }, { "epoch": 0.027038078627400257, "grad_norm": 2.290532194012461, "learning_rate": 1.2164941441610834e-05, "loss": 0.0997, "step": 6480 }, { "epoch": 0.027058941342390532, "grad_norm": 2.041705915828957, "learning_rate": 1.2160250167886727e-05, "loss": 0.0962, "step": 6485 }, { "epoch": 0.02707980405738081, "grad_norm": 2.3350612846291754, "learning_rate": 1.21555643173918e-05, "loss": 0.1153, "step": 6490 }, { "epoch": 0.02710066677237109, "grad_norm": 1.7204402179268021, "learning_rate": 1.2150883879685118e-05, "loss": 0.094, "step": 6495 }, { "epoch": 0.02712152948736137, "grad_norm": 2.228896664686716, "learning_rate": 1.2146208844353868e-05, "loss": 0.1172, "step": 6500 }, { "epoch": 0.027142392202351644, "grad_norm": 3.6591926125832095, "learning_rate": 1.2141539201013262e-05, "loss": 0.0886, "step": 6505 }, { "epoch": 0.027163254917341922, "grad_norm": 2.6775862800042933, "learning_rate": 1.2136874939306429e-05, "loss": 0.0959, "step": 6510 }, { "epoch": 0.0271841176323322, "grad_norm": 1.6234399213618473, "learning_rate": 1.213221604890434e-05, "loss": 0.0935, "step": 6515 }, { "epoch": 0.02720498034732248, "grad_norm": 1.957013096896598, "learning_rate": 1.2127562519505686e-05, "loss": 0.1033, "step": 6520 }, { "epoch": 0.02722584306231276, "grad_norm": 2.873669887568415, "learning_rate": 1.2122914340836807e-05, "loss": 0.0705, "step": 6525 }, { "epoch": 0.027246705777303034, "grad_norm": 2.6641292413112114, "learning_rate": 1.211827150265158e-05, "loss": 0.1012, "step": 6530 }, { "epoch": 0.027267568492293313, "grad_norm": 1.9294974944566805, "learning_rate": 1.2113633994731325e-05, "loss": 0.1134, "step": 6535 }, { "epoch": 0.02728843120728359, "grad_norm": 1.8490360226891491, "learning_rate": 1.2109001806884724e-05, "loss": 0.1022, "step": 6540 }, { "epoch": 0.02730929392227387, "grad_norm": 2.453083765670024, "learning_rate": 1.2104374928947712e-05, "loss": 0.0965, "step": 6545 }, { "epoch": 0.027330156637264146, "grad_norm": 1.4992538777031346, "learning_rate": 1.209975335078339e-05, "loss": 0.0879, "step": 6550 }, { "epoch": 0.027351019352254424, "grad_norm": 1.9459621817205082, "learning_rate": 1.2095137062281931e-05, "loss": 0.0916, "step": 6555 }, { "epoch": 0.027371882067244703, "grad_norm": 3.0764295222763147, "learning_rate": 1.2090526053360494e-05, "loss": 0.1009, "step": 6560 }, { "epoch": 0.027392744782234982, "grad_norm": 1.9010004630774981, "learning_rate": 1.2085920313963111e-05, "loss": 0.0971, "step": 6565 }, { "epoch": 0.02741360749722526, "grad_norm": 1.7956686794244276, "learning_rate": 1.2081319834060618e-05, "loss": 0.1005, "step": 6570 }, { "epoch": 0.027434470212215536, "grad_norm": 1.5376639683569677, "learning_rate": 1.2076724603650554e-05, "loss": 0.0837, "step": 6575 }, { "epoch": 0.027455332927205815, "grad_norm": 2.4297583621144163, "learning_rate": 1.2072134612757065e-05, "loss": 0.1022, "step": 6580 }, { "epoch": 0.027476195642196093, "grad_norm": 2.2829923396450136, "learning_rate": 1.2067549851430817e-05, "loss": 0.0988, "step": 6585 }, { "epoch": 0.027497058357186372, "grad_norm": 1.8899093155835187, "learning_rate": 1.2062970309748914e-05, "loss": 0.076, "step": 6590 }, { "epoch": 0.027517921072176647, "grad_norm": 1.8575592507539807, "learning_rate": 1.2058395977814784e-05, "loss": 0.0986, "step": 6595 }, { "epoch": 0.027538783787166926, "grad_norm": 2.6240524903614117, "learning_rate": 1.205382684575813e-05, "loss": 0.1047, "step": 6600 }, { "epoch": 0.027559646502157205, "grad_norm": 2.031031930069234, "learning_rate": 1.2049262903734784e-05, "loss": 0.1104, "step": 6605 }, { "epoch": 0.027580509217147484, "grad_norm": 1.8702932275476667, "learning_rate": 1.2044704141926675e-05, "loss": 0.078, "step": 6610 }, { "epoch": 0.027601371932137762, "grad_norm": 1.8979906337595367, "learning_rate": 1.2040150550541707e-05, "loss": 0.0995, "step": 6615 }, { "epoch": 0.027622234647128038, "grad_norm": 1.8183343333640671, "learning_rate": 1.2035602119813667e-05, "loss": 0.0945, "step": 6620 }, { "epoch": 0.027643097362118316, "grad_norm": 1.7904812297096155, "learning_rate": 1.2031058840002163e-05, "loss": 0.0966, "step": 6625 }, { "epoch": 0.027663960077108595, "grad_norm": 1.489873054755679, "learning_rate": 1.2026520701392519e-05, "loss": 0.0978, "step": 6630 }, { "epoch": 0.027684822792098874, "grad_norm": 2.208237063177224, "learning_rate": 1.2021987694295682e-05, "loss": 0.0986, "step": 6635 }, { "epoch": 0.02770568550708915, "grad_norm": 2.043731709383543, "learning_rate": 1.2017459809048154e-05, "loss": 0.1103, "step": 6640 }, { "epoch": 0.027726548222079428, "grad_norm": 2.7191298341946646, "learning_rate": 1.2012937036011886e-05, "loss": 0.1324, "step": 6645 }, { "epoch": 0.027747410937069707, "grad_norm": 2.092305463329181, "learning_rate": 1.200841936557421e-05, "loss": 0.0892, "step": 6650 }, { "epoch": 0.027768273652059985, "grad_norm": 1.7547439015667852, "learning_rate": 1.2003906788147735e-05, "loss": 0.0789, "step": 6655 }, { "epoch": 0.027789136367050264, "grad_norm": 3.0595585470692166, "learning_rate": 1.1999399294170275e-05, "loss": 0.0957, "step": 6660 }, { "epoch": 0.02780999908204054, "grad_norm": 2.5443658690624944, "learning_rate": 1.1994896874104762e-05, "loss": 0.1228, "step": 6665 }, { "epoch": 0.027830861797030818, "grad_norm": 2.0412977796506784, "learning_rate": 1.1990399518439153e-05, "loss": 0.1233, "step": 6670 }, { "epoch": 0.027851724512021097, "grad_norm": 2.6628550073140502, "learning_rate": 1.1985907217686359e-05, "loss": 0.1105, "step": 6675 }, { "epoch": 0.027872587227011376, "grad_norm": 2.614847759089199, "learning_rate": 1.1981419962384144e-05, "loss": 0.1313, "step": 6680 }, { "epoch": 0.02789344994200165, "grad_norm": 1.792405418767412, "learning_rate": 1.1976937743095054e-05, "loss": 0.0904, "step": 6685 }, { "epoch": 0.02791431265699193, "grad_norm": 1.5451669726291328, "learning_rate": 1.1972460550406339e-05, "loss": 0.0889, "step": 6690 }, { "epoch": 0.02793517537198221, "grad_norm": 2.4214298791682376, "learning_rate": 1.1967988374929846e-05, "loss": 0.1057, "step": 6695 }, { "epoch": 0.027956038086972487, "grad_norm": 2.0089514703849427, "learning_rate": 1.1963521207301965e-05, "loss": 0.096, "step": 6700 }, { "epoch": 0.027976900801962763, "grad_norm": 2.309317755429882, "learning_rate": 1.1959059038183522e-05, "loss": 0.087, "step": 6705 }, { "epoch": 0.02799776351695304, "grad_norm": 1.8585590911631473, "learning_rate": 1.1954601858259715e-05, "loss": 0.1038, "step": 6710 }, { "epoch": 0.02801862623194332, "grad_norm": 1.8316674020756816, "learning_rate": 1.1950149658240027e-05, "loss": 0.1031, "step": 6715 }, { "epoch": 0.0280394889469336, "grad_norm": 2.6072423255160966, "learning_rate": 1.194570242885814e-05, "loss": 0.1087, "step": 6720 }, { "epoch": 0.028060351661923878, "grad_norm": 3.116040985147593, "learning_rate": 1.1941260160871853e-05, "loss": 0.0855, "step": 6725 }, { "epoch": 0.028081214376914153, "grad_norm": 1.8984237191414228, "learning_rate": 1.1936822845063012e-05, "loss": 0.0958, "step": 6730 }, { "epoch": 0.02810207709190443, "grad_norm": 1.9202431006698555, "learning_rate": 1.1932390472237421e-05, "loss": 0.1353, "step": 6735 }, { "epoch": 0.02812293980689471, "grad_norm": 2.1078849212404402, "learning_rate": 1.1927963033224768e-05, "loss": 0.1049, "step": 6740 }, { "epoch": 0.02814380252188499, "grad_norm": 2.5464987569446853, "learning_rate": 1.192354051887853e-05, "loss": 0.0797, "step": 6745 }, { "epoch": 0.028164665236875264, "grad_norm": 2.2650903243763856, "learning_rate": 1.1919122920075918e-05, "loss": 0.0982, "step": 6750 }, { "epoch": 0.028185527951865543, "grad_norm": 2.1202988575409982, "learning_rate": 1.1914710227717781e-05, "loss": 0.1158, "step": 6755 }, { "epoch": 0.028206390666855822, "grad_norm": 2.1936944443568476, "learning_rate": 1.1910302432728527e-05, "loss": 0.0986, "step": 6760 }, { "epoch": 0.0282272533818461, "grad_norm": 1.6380991732325572, "learning_rate": 1.190589952605605e-05, "loss": 0.095, "step": 6765 }, { "epoch": 0.02824811609683638, "grad_norm": 2.0394344494322887, "learning_rate": 1.190150149867166e-05, "loss": 0.0883, "step": 6770 }, { "epoch": 0.028268978811826655, "grad_norm": 1.1358056554448523, "learning_rate": 1.1897108341569983e-05, "loss": 0.0828, "step": 6775 }, { "epoch": 0.028289841526816933, "grad_norm": 1.5824235672945348, "learning_rate": 1.1892720045768902e-05, "loss": 0.0816, "step": 6780 }, { "epoch": 0.028310704241807212, "grad_norm": 2.1238417292441025, "learning_rate": 1.188833660230948e-05, "loss": 0.0895, "step": 6785 }, { "epoch": 0.02833156695679749, "grad_norm": 2.1088955364139466, "learning_rate": 1.1883958002255873e-05, "loss": 0.0835, "step": 6790 }, { "epoch": 0.028352429671787766, "grad_norm": 2.2542959910493154, "learning_rate": 1.1879584236695251e-05, "loss": 0.1249, "step": 6795 }, { "epoch": 0.028373292386778045, "grad_norm": 1.7203388367033563, "learning_rate": 1.1875215296737746e-05, "loss": 0.0924, "step": 6800 }, { "epoch": 0.028394155101768324, "grad_norm": 1.6092228418855985, "learning_rate": 1.187085117351634e-05, "loss": 0.0776, "step": 6805 }, { "epoch": 0.028415017816758602, "grad_norm": 2.188542461539559, "learning_rate": 1.1866491858186824e-05, "loss": 0.1108, "step": 6810 }, { "epoch": 0.02843588053174888, "grad_norm": 2.0892970439781355, "learning_rate": 1.18621373419277e-05, "loss": 0.0877, "step": 6815 }, { "epoch": 0.028456743246739157, "grad_norm": 2.133458058600677, "learning_rate": 1.185778761594011e-05, "loss": 0.0865, "step": 6820 }, { "epoch": 0.028477605961729435, "grad_norm": 1.658638916239133, "learning_rate": 1.1853442671447775e-05, "loss": 0.0975, "step": 6825 }, { "epoch": 0.028498468676719714, "grad_norm": 2.247417869300717, "learning_rate": 1.1849102499696901e-05, "loss": 0.0883, "step": 6830 }, { "epoch": 0.028519331391709993, "grad_norm": 2.910476515427894, "learning_rate": 1.184476709195612e-05, "loss": 0.0818, "step": 6835 }, { "epoch": 0.028540194106700268, "grad_norm": 2.9339690884560206, "learning_rate": 1.18404364395164e-05, "loss": 0.1207, "step": 6840 }, { "epoch": 0.028561056821690547, "grad_norm": 2.0316414224212425, "learning_rate": 1.1836110533690999e-05, "loss": 0.1077, "step": 6845 }, { "epoch": 0.028581919536680826, "grad_norm": 1.7822740158205799, "learning_rate": 1.1831789365815363e-05, "loss": 0.1022, "step": 6850 }, { "epoch": 0.028602782251671104, "grad_norm": 1.6331289913229814, "learning_rate": 1.1827472927247064e-05, "loss": 0.0867, "step": 6855 }, { "epoch": 0.028623644966661383, "grad_norm": 1.9586653013718522, "learning_rate": 1.1823161209365732e-05, "loss": 0.1, "step": 6860 }, { "epoch": 0.02864450768165166, "grad_norm": 1.8218375068329304, "learning_rate": 1.181885420357298e-05, "loss": 0.1104, "step": 6865 }, { "epoch": 0.028665370396641937, "grad_norm": 2.1527529950859567, "learning_rate": 1.181455190129233e-05, "loss": 0.0987, "step": 6870 }, { "epoch": 0.028686233111632216, "grad_norm": 2.395667203278192, "learning_rate": 1.1810254293969139e-05, "loss": 0.0855, "step": 6875 }, { "epoch": 0.028707095826622495, "grad_norm": 1.5089519939238998, "learning_rate": 1.1805961373070535e-05, "loss": 0.1002, "step": 6880 }, { "epoch": 0.02872795854161277, "grad_norm": 2.2449139888861653, "learning_rate": 1.180167313008534e-05, "loss": 0.1075, "step": 6885 }, { "epoch": 0.02874882125660305, "grad_norm": 1.8621017460093434, "learning_rate": 1.1797389556523997e-05, "loss": 0.083, "step": 6890 }, { "epoch": 0.028769683971593327, "grad_norm": 1.9021122502516163, "learning_rate": 1.1793110643918514e-05, "loss": 0.1047, "step": 6895 }, { "epoch": 0.028790546686583606, "grad_norm": 1.7141881182841225, "learning_rate": 1.1788836383822367e-05, "loss": 0.0844, "step": 6900 }, { "epoch": 0.028811409401573885, "grad_norm": 1.4852513245314551, "learning_rate": 1.178456676781046e-05, "loss": 0.096, "step": 6905 }, { "epoch": 0.02883227211656416, "grad_norm": 2.571358950741396, "learning_rate": 1.1780301787479032e-05, "loss": 0.1143, "step": 6910 }, { "epoch": 0.02885313483155444, "grad_norm": 1.7069932761924302, "learning_rate": 1.17760414344456e-05, "loss": 0.1042, "step": 6915 }, { "epoch": 0.028873997546544718, "grad_norm": 1.895554452993096, "learning_rate": 1.177178570034889e-05, "loss": 0.0932, "step": 6920 }, { "epoch": 0.028894860261534996, "grad_norm": 2.8122184492347944, "learning_rate": 1.1767534576848758e-05, "loss": 0.1368, "step": 6925 }, { "epoch": 0.02891572297652527, "grad_norm": 2.4041313064636247, "learning_rate": 1.1763288055626128e-05, "loss": 0.1037, "step": 6930 }, { "epoch": 0.02893658569151555, "grad_norm": 1.6934947134068514, "learning_rate": 1.175904612838293e-05, "loss": 0.0851, "step": 6935 }, { "epoch": 0.02895744840650583, "grad_norm": 2.7736609733929454, "learning_rate": 1.175480878684202e-05, "loss": 0.0968, "step": 6940 }, { "epoch": 0.028978311121496108, "grad_norm": 2.867670544572405, "learning_rate": 1.1750576022747117e-05, "loss": 0.0849, "step": 6945 }, { "epoch": 0.028999173836486387, "grad_norm": 1.895399693282586, "learning_rate": 1.174634782786274e-05, "loss": 0.0983, "step": 6950 }, { "epoch": 0.029020036551476662, "grad_norm": 2.813422437060054, "learning_rate": 1.1742124193974134e-05, "loss": 0.0859, "step": 6955 }, { "epoch": 0.02904089926646694, "grad_norm": 1.4870658038230997, "learning_rate": 1.1737905112887205e-05, "loss": 0.0998, "step": 6960 }, { "epoch": 0.02906176198145722, "grad_norm": 2.1099975735915293, "learning_rate": 1.1733690576428454e-05, "loss": 0.1032, "step": 6965 }, { "epoch": 0.029082624696447498, "grad_norm": 2.240825347748576, "learning_rate": 1.1729480576444912e-05, "loss": 0.0926, "step": 6970 }, { "epoch": 0.029103487411437774, "grad_norm": 2.1298597124538414, "learning_rate": 1.172527510480407e-05, "loss": 0.1157, "step": 6975 }, { "epoch": 0.029124350126428052, "grad_norm": 1.0038579207213798, "learning_rate": 1.172107415339382e-05, "loss": 0.0696, "step": 6980 }, { "epoch": 0.02914521284141833, "grad_norm": 1.7442721107735433, "learning_rate": 1.1716877714122377e-05, "loss": 0.0907, "step": 6985 }, { "epoch": 0.02916607555640861, "grad_norm": 2.0716560021416495, "learning_rate": 1.1712685778918225e-05, "loss": 0.0982, "step": 6990 }, { "epoch": 0.029186938271398885, "grad_norm": 2.8293713322881415, "learning_rate": 1.1708498339730047e-05, "loss": 0.1008, "step": 6995 }, { "epoch": 0.029207800986389164, "grad_norm": 1.5161586834761267, "learning_rate": 1.170431538852666e-05, "loss": 0.0926, "step": 7000 }, { "epoch": 0.029228663701379443, "grad_norm": 1.771001610034316, "learning_rate": 1.1700136917296958e-05, "loss": 0.0828, "step": 7005 }, { "epoch": 0.02924952641636972, "grad_norm": 1.7266468181758972, "learning_rate": 1.1695962918049828e-05, "loss": 0.0899, "step": 7010 }, { "epoch": 0.02927038913136, "grad_norm": 1.5454669801786067, "learning_rate": 1.1691793382814107e-05, "loss": 0.1143, "step": 7015 }, { "epoch": 0.029291251846350275, "grad_norm": 1.8403580090660177, "learning_rate": 1.1687628303638509e-05, "loss": 0.0954, "step": 7020 }, { "epoch": 0.029312114561340554, "grad_norm": 3.4277610497907123, "learning_rate": 1.1683467672591559e-05, "loss": 0.0822, "step": 7025 }, { "epoch": 0.029332977276330833, "grad_norm": 1.9501527388276245, "learning_rate": 1.1679311481761536e-05, "loss": 0.0949, "step": 7030 }, { "epoch": 0.02935383999132111, "grad_norm": 1.4056081561080251, "learning_rate": 1.1675159723256405e-05, "loss": 0.0841, "step": 7035 }, { "epoch": 0.029374702706311387, "grad_norm": 1.3911025604885576, "learning_rate": 1.1671012389203745e-05, "loss": 0.1056, "step": 7040 }, { "epoch": 0.029395565421301666, "grad_norm": 2.2418466004367685, "learning_rate": 1.1666869471750718e-05, "loss": 0.0848, "step": 7045 }, { "epoch": 0.029416428136291944, "grad_norm": 1.8463471305603618, "learning_rate": 1.1662730963063967e-05, "loss": 0.0978, "step": 7050 }, { "epoch": 0.029437290851282223, "grad_norm": 1.176088580923618, "learning_rate": 1.1658596855329575e-05, "loss": 0.0877, "step": 7055 }, { "epoch": 0.029458153566272502, "grad_norm": 1.260704415444494, "learning_rate": 1.165446714075301e-05, "loss": 0.0774, "step": 7060 }, { "epoch": 0.029479016281262777, "grad_norm": 1.4615463696716076, "learning_rate": 1.1650341811559043e-05, "loss": 0.0941, "step": 7065 }, { "epoch": 0.029499878996253056, "grad_norm": 1.6469011367798068, "learning_rate": 1.16462208599917e-05, "loss": 0.0924, "step": 7070 }, { "epoch": 0.029520741711243335, "grad_norm": 2.103022105957234, "learning_rate": 1.1642104278314192e-05, "loss": 0.0844, "step": 7075 }, { "epoch": 0.029541604426233613, "grad_norm": 1.863148749458387, "learning_rate": 1.1637992058808875e-05, "loss": 0.0739, "step": 7080 }, { "epoch": 0.02956246714122389, "grad_norm": 1.7394127593159594, "learning_rate": 1.1633884193777159e-05, "loss": 0.1074, "step": 7085 }, { "epoch": 0.029583329856214167, "grad_norm": 1.930941486375074, "learning_rate": 1.1629780675539462e-05, "loss": 0.078, "step": 7090 }, { "epoch": 0.029604192571204446, "grad_norm": 2.2035174880232336, "learning_rate": 1.1625681496435159e-05, "loss": 0.1048, "step": 7095 }, { "epoch": 0.029625055286194725, "grad_norm": 2.1223765594016832, "learning_rate": 1.1621586648822508e-05, "loss": 0.0964, "step": 7100 }, { "epoch": 0.029645918001185004, "grad_norm": 1.466179104450683, "learning_rate": 1.1617496125078598e-05, "loss": 0.0771, "step": 7105 }, { "epoch": 0.02966678071617528, "grad_norm": 1.7256585975701682, "learning_rate": 1.1613409917599278e-05, "loss": 0.0862, "step": 7110 }, { "epoch": 0.029687643431165558, "grad_norm": 3.0158992018802895, "learning_rate": 1.1609328018799118e-05, "loss": 0.101, "step": 7115 }, { "epoch": 0.029708506146155837, "grad_norm": 1.8799643886510429, "learning_rate": 1.1605250421111326e-05, "loss": 0.0909, "step": 7120 }, { "epoch": 0.029729368861146115, "grad_norm": 0.9655162262931961, "learning_rate": 1.160117711698771e-05, "loss": 0.0926, "step": 7125 }, { "epoch": 0.02975023157613639, "grad_norm": 1.54972922571204, "learning_rate": 1.1597108098898606e-05, "loss": 0.0935, "step": 7130 }, { "epoch": 0.02977109429112667, "grad_norm": 5.228122072017904, "learning_rate": 1.1593043359332825e-05, "loss": 0.1061, "step": 7135 }, { "epoch": 0.029791957006116948, "grad_norm": 1.952776498079622, "learning_rate": 1.1588982890797592e-05, "loss": 0.0824, "step": 7140 }, { "epoch": 0.029812819721107227, "grad_norm": 1.8124164385921326, "learning_rate": 1.1584926685818494e-05, "loss": 0.0939, "step": 7145 }, { "epoch": 0.029833682436097506, "grad_norm": 1.3579180784874083, "learning_rate": 1.1580874736939413e-05, "loss": 0.0844, "step": 7150 }, { "epoch": 0.02985454515108778, "grad_norm": 1.4384905027090003, "learning_rate": 1.1576827036722473e-05, "loss": 0.0982, "step": 7155 }, { "epoch": 0.02987540786607806, "grad_norm": 2.1830977537386613, "learning_rate": 1.1572783577747987e-05, "loss": 0.0976, "step": 7160 }, { "epoch": 0.02989627058106834, "grad_norm": 1.1989545551143463, "learning_rate": 1.1568744352614393e-05, "loss": 0.083, "step": 7165 }, { "epoch": 0.029917133296058617, "grad_norm": 1.6092575721687463, "learning_rate": 1.1564709353938202e-05, "loss": 0.079, "step": 7170 }, { "epoch": 0.029937996011048892, "grad_norm": 1.6032509689189773, "learning_rate": 1.1560678574353932e-05, "loss": 0.0881, "step": 7175 }, { "epoch": 0.02995885872603917, "grad_norm": 0.9423695980438267, "learning_rate": 1.155665200651407e-05, "loss": 0.0884, "step": 7180 }, { "epoch": 0.02997972144102945, "grad_norm": 2.0843308009292083, "learning_rate": 1.1552629643088999e-05, "loss": 0.1236, "step": 7185 }, { "epoch": 0.03000058415601973, "grad_norm": 2.056445255408267, "learning_rate": 1.1548611476766943e-05, "loss": 0.1056, "step": 7190 }, { "epoch": 0.030021446871010007, "grad_norm": 2.152523097070865, "learning_rate": 1.154459750025392e-05, "loss": 0.0798, "step": 7195 }, { "epoch": 0.030042309586000283, "grad_norm": 3.692223561285766, "learning_rate": 1.1540587706273687e-05, "loss": 0.0764, "step": 7200 }, { "epoch": 0.03006317230099056, "grad_norm": 2.0885679946339755, "learning_rate": 1.1536582087567667e-05, "loss": 0.1125, "step": 7205 }, { "epoch": 0.03008403501598084, "grad_norm": 1.748536682935594, "learning_rate": 1.1532580636894919e-05, "loss": 0.0915, "step": 7210 }, { "epoch": 0.03010489773097112, "grad_norm": 1.9471734001446188, "learning_rate": 1.152858334703206e-05, "loss": 0.089, "step": 7215 }, { "epoch": 0.030125760445961394, "grad_norm": 1.4010183631654467, "learning_rate": 1.1524590210773231e-05, "loss": 0.084, "step": 7220 }, { "epoch": 0.030146623160951673, "grad_norm": 1.744841215038568, "learning_rate": 1.1520601220930025e-05, "loss": 0.1124, "step": 7225 }, { "epoch": 0.030167485875941952, "grad_norm": 2.3618909608191614, "learning_rate": 1.1516616370331439e-05, "loss": 0.108, "step": 7230 }, { "epoch": 0.03018834859093223, "grad_norm": 3.4555089957090614, "learning_rate": 1.1512635651823826e-05, "loss": 0.0795, "step": 7235 }, { "epoch": 0.03020921130592251, "grad_norm": 1.4577184558879024, "learning_rate": 1.1508659058270832e-05, "loss": 0.0777, "step": 7240 }, { "epoch": 0.030230074020912785, "grad_norm": 2.4131885260317842, "learning_rate": 1.1504686582553345e-05, "loss": 0.0899, "step": 7245 }, { "epoch": 0.030250936735903063, "grad_norm": 1.281205108489553, "learning_rate": 1.150071821756945e-05, "loss": 0.0778, "step": 7250 }, { "epoch": 0.030271799450893342, "grad_norm": 2.116266093830076, "learning_rate": 1.1496753956234354e-05, "loss": 0.078, "step": 7255 }, { "epoch": 0.03029266216588362, "grad_norm": 1.9917639565907992, "learning_rate": 1.149279379148036e-05, "loss": 0.0864, "step": 7260 }, { "epoch": 0.030313524880873896, "grad_norm": 2.074595771763681, "learning_rate": 1.1488837716256793e-05, "loss": 0.1029, "step": 7265 }, { "epoch": 0.030334387595864175, "grad_norm": 1.9707022585500984, "learning_rate": 1.1484885723529961e-05, "loss": 0.0922, "step": 7270 }, { "epoch": 0.030355250310854454, "grad_norm": 1.8186908382679312, "learning_rate": 1.1480937806283095e-05, "loss": 0.1042, "step": 7275 }, { "epoch": 0.030376113025844732, "grad_norm": 1.6502594735977854, "learning_rate": 1.1476993957516294e-05, "loss": 0.0878, "step": 7280 }, { "epoch": 0.030396975740835008, "grad_norm": 2.2436452712919186, "learning_rate": 1.1473054170246485e-05, "loss": 0.0751, "step": 7285 }, { "epoch": 0.030417838455825286, "grad_norm": 2.0028393801127025, "learning_rate": 1.1469118437507357e-05, "loss": 0.0828, "step": 7290 }, { "epoch": 0.030438701170815565, "grad_norm": 2.8071855502582874, "learning_rate": 1.1465186752349316e-05, "loss": 0.0873, "step": 7295 }, { "epoch": 0.030459563885805844, "grad_norm": 1.7826011299664757, "learning_rate": 1.1461259107839438e-05, "loss": 0.1085, "step": 7300 }, { "epoch": 0.030480426600796123, "grad_norm": 2.324332944103442, "learning_rate": 1.145733549706141e-05, "loss": 0.1002, "step": 7305 }, { "epoch": 0.030501289315786398, "grad_norm": 2.4396990953982076, "learning_rate": 1.1453415913115482e-05, "loss": 0.0909, "step": 7310 }, { "epoch": 0.030522152030776677, "grad_norm": 1.339874181526996, "learning_rate": 1.1449500349118415e-05, "loss": 0.0761, "step": 7315 }, { "epoch": 0.030543014745766955, "grad_norm": 2.4227410601536206, "learning_rate": 1.1445588798203429e-05, "loss": 0.0977, "step": 7320 }, { "epoch": 0.030563877460757234, "grad_norm": 3.61930619195136, "learning_rate": 1.1441681253520158e-05, "loss": 0.0904, "step": 7325 }, { "epoch": 0.03058474017574751, "grad_norm": 1.8483013113348021, "learning_rate": 1.1437777708234593e-05, "loss": 0.0966, "step": 7330 }, { "epoch": 0.030605602890737788, "grad_norm": 1.6573426485867326, "learning_rate": 1.1433878155529036e-05, "loss": 0.0853, "step": 7335 }, { "epoch": 0.030626465605728067, "grad_norm": 1.6031657572443014, "learning_rate": 1.1429982588602051e-05, "loss": 0.0828, "step": 7340 }, { "epoch": 0.030647328320718346, "grad_norm": 2.2121523975827833, "learning_rate": 1.1426091000668407e-05, "loss": 0.1132, "step": 7345 }, { "epoch": 0.030668191035708624, "grad_norm": 1.5163025833113346, "learning_rate": 1.1422203384959038e-05, "loss": 0.087, "step": 7350 }, { "epoch": 0.0306890537506989, "grad_norm": 1.9348000108818737, "learning_rate": 1.1418319734720989e-05, "loss": 0.1031, "step": 7355 }, { "epoch": 0.03070991646568918, "grad_norm": 1.343605719053046, "learning_rate": 1.1414440043217357e-05, "loss": 0.0717, "step": 7360 }, { "epoch": 0.030730779180679457, "grad_norm": 1.5728926734516153, "learning_rate": 1.1410564303727265e-05, "loss": 0.0842, "step": 7365 }, { "epoch": 0.030751641895669736, "grad_norm": 2.01352773761485, "learning_rate": 1.1406692509545788e-05, "loss": 0.0906, "step": 7370 }, { "epoch": 0.03077250461066001, "grad_norm": 2.1775080411398, "learning_rate": 1.1402824653983928e-05, "loss": 0.0856, "step": 7375 }, { "epoch": 0.03079336732565029, "grad_norm": 1.5883637357481009, "learning_rate": 1.139896073036854e-05, "loss": 0.1086, "step": 7380 }, { "epoch": 0.03081423004064057, "grad_norm": 3.5109923010121578, "learning_rate": 1.1395100732042308e-05, "loss": 0.1014, "step": 7385 }, { "epoch": 0.030835092755630848, "grad_norm": 1.8509408491615809, "learning_rate": 1.1391244652363677e-05, "loss": 0.0831, "step": 7390 }, { "epoch": 0.030855955470621126, "grad_norm": 2.486411731284587, "learning_rate": 1.138739248470682e-05, "loss": 0.1031, "step": 7395 }, { "epoch": 0.0308768181856114, "grad_norm": 1.4990469469953212, "learning_rate": 1.1383544222461585e-05, "loss": 0.1001, "step": 7400 }, { "epoch": 0.03089768090060168, "grad_norm": 2.710649632972031, "learning_rate": 1.1379699859033435e-05, "loss": 0.0922, "step": 7405 }, { "epoch": 0.03091854361559196, "grad_norm": 1.9855977906376168, "learning_rate": 1.1375859387843427e-05, "loss": 0.1106, "step": 7410 }, { "epoch": 0.030939406330582238, "grad_norm": 2.0839688611773233, "learning_rate": 1.1372022802328141e-05, "loss": 0.1087, "step": 7415 }, { "epoch": 0.030960269045572513, "grad_norm": 1.613686377122046, "learning_rate": 1.1368190095939642e-05, "loss": 0.1005, "step": 7420 }, { "epoch": 0.030981131760562792, "grad_norm": 4.653255239963496, "learning_rate": 1.1364361262145434e-05, "loss": 0.0859, "step": 7425 }, { "epoch": 0.03100199447555307, "grad_norm": 1.8172821189725643, "learning_rate": 1.1360536294428411e-05, "loss": 0.0996, "step": 7430 }, { "epoch": 0.03102285719054335, "grad_norm": 1.7199808896961255, "learning_rate": 1.1356715186286811e-05, "loss": 0.065, "step": 7435 }, { "epoch": 0.031043719905533628, "grad_norm": 1.4458590057461884, "learning_rate": 1.1352897931234173e-05, "loss": 0.095, "step": 7440 }, { "epoch": 0.031064582620523903, "grad_norm": 1.7411238766059378, "learning_rate": 1.1349084522799278e-05, "loss": 0.1017, "step": 7445 }, { "epoch": 0.031085445335514182, "grad_norm": 2.1598357694616466, "learning_rate": 1.134527495452613e-05, "loss": 0.0928, "step": 7450 }, { "epoch": 0.03110630805050446, "grad_norm": 1.8563931484155312, "learning_rate": 1.134146921997387e-05, "loss": 0.1126, "step": 7455 }, { "epoch": 0.03112717076549474, "grad_norm": 1.6171678714506383, "learning_rate": 1.1337667312716773e-05, "loss": 0.084, "step": 7460 }, { "epoch": 0.031148033480485015, "grad_norm": 1.4308065467124558, "learning_rate": 1.1333869226344171e-05, "loss": 0.1054, "step": 7465 }, { "epoch": 0.031168896195475294, "grad_norm": 1.4375012188525609, "learning_rate": 1.1330074954460419e-05, "loss": 0.0864, "step": 7470 }, { "epoch": 0.031189758910465572, "grad_norm": 1.1663925658732466, "learning_rate": 1.1326284490684856e-05, "loss": 0.0867, "step": 7475 }, { "epoch": 0.03121062162545585, "grad_norm": 2.7405189096550027, "learning_rate": 1.1322497828651747e-05, "loss": 0.0771, "step": 7480 }, { "epoch": 0.03123148434044613, "grad_norm": 2.4413489130455757, "learning_rate": 1.1318714962010249e-05, "loss": 0.0904, "step": 7485 }, { "epoch": 0.031252347055436405, "grad_norm": 2.081325060563902, "learning_rate": 1.1314935884424358e-05, "loss": 0.0903, "step": 7490 }, { "epoch": 0.031273209770426684, "grad_norm": 1.6180573969047956, "learning_rate": 1.1311160589572872e-05, "loss": 0.0825, "step": 7495 }, { "epoch": 0.03129407248541696, "grad_norm": 2.350588513580184, "learning_rate": 1.1307389071149342e-05, "loss": 0.1027, "step": 7500 }, { "epoch": 0.03131493520040724, "grad_norm": 1.0938676815546127, "learning_rate": 1.1303621322862024e-05, "loss": 0.073, "step": 7505 }, { "epoch": 0.03133579791539752, "grad_norm": 2.191860434573946, "learning_rate": 1.1299857338433846e-05, "loss": 0.0902, "step": 7510 }, { "epoch": 0.0313566606303878, "grad_norm": 1.6622258463024966, "learning_rate": 1.129609711160236e-05, "loss": 0.0776, "step": 7515 }, { "epoch": 0.03137752334537807, "grad_norm": 1.7956015894291057, "learning_rate": 1.1292340636119683e-05, "loss": 0.0789, "step": 7520 }, { "epoch": 0.03139838606036835, "grad_norm": 1.7877849426103674, "learning_rate": 1.128858790575248e-05, "loss": 0.0862, "step": 7525 }, { "epoch": 0.03141924877535863, "grad_norm": 2.2307569292042837, "learning_rate": 1.1284838914281896e-05, "loss": 0.0889, "step": 7530 }, { "epoch": 0.03144011149034891, "grad_norm": 5.14068492493188, "learning_rate": 1.1281093655503535e-05, "loss": 0.0906, "step": 7535 }, { "epoch": 0.031460974205339186, "grad_norm": 1.4550933586227441, "learning_rate": 1.1277352123227397e-05, "loss": 0.0888, "step": 7540 }, { "epoch": 0.031481836920329465, "grad_norm": 1.377932803048618, "learning_rate": 1.1273614311277844e-05, "loss": 0.0947, "step": 7545 }, { "epoch": 0.03150269963531974, "grad_norm": 2.0310593812496944, "learning_rate": 1.1269880213493559e-05, "loss": 0.1058, "step": 7550 }, { "epoch": 0.03152356235031002, "grad_norm": 1.60003425412877, "learning_rate": 1.12661498237275e-05, "loss": 0.0884, "step": 7555 }, { "epoch": 0.0315444250653003, "grad_norm": 1.6212502138994198, "learning_rate": 1.126242313584686e-05, "loss": 0.0872, "step": 7560 }, { "epoch": 0.03156528778029057, "grad_norm": 1.2663956031761203, "learning_rate": 1.1258700143733022e-05, "loss": 0.079, "step": 7565 }, { "epoch": 0.03158615049528085, "grad_norm": 1.852599496581432, "learning_rate": 1.1254980841281516e-05, "loss": 0.1099, "step": 7570 }, { "epoch": 0.03160701321027113, "grad_norm": 2.6937888891510053, "learning_rate": 1.1251265222401983e-05, "loss": 0.0955, "step": 7575 }, { "epoch": 0.03162787592526141, "grad_norm": 1.8631208527044887, "learning_rate": 1.1247553281018127e-05, "loss": 0.0924, "step": 7580 }, { "epoch": 0.03164873864025169, "grad_norm": 2.377231141380073, "learning_rate": 1.1243845011067673e-05, "loss": 0.1101, "step": 7585 }, { "epoch": 0.031669601355241966, "grad_norm": 1.498048694324408, "learning_rate": 1.1240140406502333e-05, "loss": 0.0783, "step": 7590 }, { "epoch": 0.031690464070232245, "grad_norm": 2.072115225218177, "learning_rate": 1.1236439461287754e-05, "loss": 0.0919, "step": 7595 }, { "epoch": 0.031711326785222524, "grad_norm": 2.631497008360703, "learning_rate": 1.1232742169403486e-05, "loss": 0.0821, "step": 7600 }, { "epoch": 0.0317321895002128, "grad_norm": 2.1726441646585184, "learning_rate": 1.1229048524842932e-05, "loss": 0.0735, "step": 7605 }, { "epoch": 0.031753052215203074, "grad_norm": 1.3968451970940823, "learning_rate": 1.1225358521613319e-05, "loss": 0.0875, "step": 7610 }, { "epoch": 0.03177391493019335, "grad_norm": 1.5474468259590444, "learning_rate": 1.1221672153735643e-05, "loss": 0.0853, "step": 7615 }, { "epoch": 0.03179477764518363, "grad_norm": 1.9711946208117443, "learning_rate": 1.121798941524464e-05, "loss": 0.0821, "step": 7620 }, { "epoch": 0.03181564036017391, "grad_norm": 1.9791248119425244, "learning_rate": 1.1214310300188738e-05, "loss": 0.0953, "step": 7625 }, { "epoch": 0.03183650307516419, "grad_norm": 1.132287241913744, "learning_rate": 1.1210634802630022e-05, "loss": 0.0905, "step": 7630 }, { "epoch": 0.03185736579015447, "grad_norm": 1.298384892581187, "learning_rate": 1.1206962916644189e-05, "loss": 0.1084, "step": 7635 }, { "epoch": 0.03187822850514475, "grad_norm": 1.5605964839793893, "learning_rate": 1.120329463632051e-05, "loss": 0.0864, "step": 7640 }, { "epoch": 0.031899091220135026, "grad_norm": 2.1162521305286095, "learning_rate": 1.1199629955761792e-05, "loss": 0.0882, "step": 7645 }, { "epoch": 0.031919953935125304, "grad_norm": 2.9545799053698345, "learning_rate": 1.1195968869084335e-05, "loss": 0.0703, "step": 7650 }, { "epoch": 0.031940816650115576, "grad_norm": 2.003391355662056, "learning_rate": 1.1192311370417894e-05, "loss": 0.0824, "step": 7655 }, { "epoch": 0.031961679365105855, "grad_norm": 1.4537785466482338, "learning_rate": 1.1188657453905643e-05, "loss": 0.0764, "step": 7660 }, { "epoch": 0.031982542080096134, "grad_norm": 1.8482485552002967, "learning_rate": 1.1185007113704117e-05, "loss": 0.0747, "step": 7665 }, { "epoch": 0.03200340479508641, "grad_norm": 1.6675293376030533, "learning_rate": 1.1181360343983207e-05, "loss": 0.0904, "step": 7670 }, { "epoch": 0.03202426751007669, "grad_norm": 2.2881934970748095, "learning_rate": 1.1177717138926091e-05, "loss": 0.1218, "step": 7675 }, { "epoch": 0.03204513022506697, "grad_norm": 2.151744021435389, "learning_rate": 1.1174077492729205e-05, "loss": 0.0914, "step": 7680 }, { "epoch": 0.03206599294005725, "grad_norm": 1.8800537759434304, "learning_rate": 1.1170441399602207e-05, "loss": 0.0872, "step": 7685 }, { "epoch": 0.03208685565504753, "grad_norm": 1.7426240593396938, "learning_rate": 1.1166808853767935e-05, "loss": 0.0742, "step": 7690 }, { "epoch": 0.032107718370037806, "grad_norm": 1.9079751701383865, "learning_rate": 1.1163179849462371e-05, "loss": 0.0961, "step": 7695 }, { "epoch": 0.03212858108502808, "grad_norm": 1.739965295560567, "learning_rate": 1.1159554380934594e-05, "loss": 0.0947, "step": 7700 }, { "epoch": 0.03214944380001836, "grad_norm": 1.8207690694846588, "learning_rate": 1.1155932442446762e-05, "loss": 0.0918, "step": 7705 }, { "epoch": 0.032170306515008636, "grad_norm": 3.5062093468615827, "learning_rate": 1.1152314028274044e-05, "loss": 0.1075, "step": 7710 }, { "epoch": 0.032191169229998914, "grad_norm": 2.3979552193873084, "learning_rate": 1.1148699132704612e-05, "loss": 0.0947, "step": 7715 }, { "epoch": 0.03221203194498919, "grad_norm": 1.7439720051466063, "learning_rate": 1.1145087750039584e-05, "loss": 0.0907, "step": 7720 }, { "epoch": 0.03223289465997947, "grad_norm": 2.6996126576614845, "learning_rate": 1.1141479874592991e-05, "loss": 0.0818, "step": 7725 }, { "epoch": 0.03225375737496975, "grad_norm": 1.9601247529971186, "learning_rate": 1.1137875500691742e-05, "loss": 0.0835, "step": 7730 }, { "epoch": 0.03227462008996003, "grad_norm": 1.8567594892297736, "learning_rate": 1.1134274622675583e-05, "loss": 0.093, "step": 7735 }, { "epoch": 0.03229548280495031, "grad_norm": 1.520311643598755, "learning_rate": 1.1130677234897065e-05, "loss": 0.0818, "step": 7740 }, { "epoch": 0.03231634551994058, "grad_norm": 3.488705788601303, "learning_rate": 1.11270833317215e-05, "loss": 0.0853, "step": 7745 }, { "epoch": 0.03233720823493086, "grad_norm": 1.6860563758676252, "learning_rate": 1.1123492907526925e-05, "loss": 0.0805, "step": 7750 }, { "epoch": 0.03235807094992114, "grad_norm": 1.6315132084645758, "learning_rate": 1.1119905956704073e-05, "loss": 0.0841, "step": 7755 }, { "epoch": 0.032378933664911416, "grad_norm": 1.6597972860669263, "learning_rate": 1.111632247365633e-05, "loss": 0.1117, "step": 7760 }, { "epoch": 0.032399796379901695, "grad_norm": 1.968851390696052, "learning_rate": 1.1112742452799696e-05, "loss": 0.0883, "step": 7765 }, { "epoch": 0.032420659094891974, "grad_norm": 1.734679577511544, "learning_rate": 1.1109165888562747e-05, "loss": 0.0763, "step": 7770 }, { "epoch": 0.03244152180988225, "grad_norm": 1.2918226541586673, "learning_rate": 1.1105592775386614e-05, "loss": 0.0793, "step": 7775 }, { "epoch": 0.03246238452487253, "grad_norm": 2.555450782470362, "learning_rate": 1.1102023107724925e-05, "loss": 0.0985, "step": 7780 }, { "epoch": 0.03248324723986281, "grad_norm": 1.9585720793878196, "learning_rate": 1.1098456880043781e-05, "loss": 0.0907, "step": 7785 }, { "epoch": 0.03250410995485308, "grad_norm": 1.4547742656975513, "learning_rate": 1.109489408682173e-05, "loss": 0.0774, "step": 7790 }, { "epoch": 0.03252497266984336, "grad_norm": 2.157770889289027, "learning_rate": 1.10913347225497e-05, "loss": 0.0835, "step": 7795 }, { "epoch": 0.03254583538483364, "grad_norm": 2.741700689718173, "learning_rate": 1.1087778781731e-05, "loss": 0.1171, "step": 7800 }, { "epoch": 0.03256669809982392, "grad_norm": 1.8238358201530926, "learning_rate": 1.1084226258881259e-05, "loss": 0.0993, "step": 7805 }, { "epoch": 0.0325875608148142, "grad_norm": 1.0584544264947107, "learning_rate": 1.1080677148528394e-05, "loss": 0.0857, "step": 7810 }, { "epoch": 0.032608423529804476, "grad_norm": 1.7460457139557006, "learning_rate": 1.1077131445212588e-05, "loss": 0.0853, "step": 7815 }, { "epoch": 0.032629286244794754, "grad_norm": 1.7117781833175474, "learning_rate": 1.1073589143486245e-05, "loss": 0.0929, "step": 7820 }, { "epoch": 0.03265014895978503, "grad_norm": 1.7691747284584727, "learning_rate": 1.1070050237913948e-05, "loss": 0.0993, "step": 7825 }, { "epoch": 0.03267101167477531, "grad_norm": 1.7542970206251338, "learning_rate": 1.1066514723072438e-05, "loss": 0.0863, "step": 7830 }, { "epoch": 0.032691874389765584, "grad_norm": 2.152960630207276, "learning_rate": 1.1062982593550574e-05, "loss": 0.1, "step": 7835 }, { "epoch": 0.03271273710475586, "grad_norm": 1.3838084808801712, "learning_rate": 1.1059453843949291e-05, "loss": 0.095, "step": 7840 }, { "epoch": 0.03273359981974614, "grad_norm": 1.5630620978153589, "learning_rate": 1.1055928468881571e-05, "loss": 0.077, "step": 7845 }, { "epoch": 0.03275446253473642, "grad_norm": 1.5241064636404975, "learning_rate": 1.1052406462972418e-05, "loss": 0.0848, "step": 7850 }, { "epoch": 0.0327753252497267, "grad_norm": 1.5554630205442823, "learning_rate": 1.1048887820858809e-05, "loss": 0.092, "step": 7855 }, { "epoch": 0.03279618796471698, "grad_norm": 2.626407939159493, "learning_rate": 1.1045372537189657e-05, "loss": 0.0888, "step": 7860 }, { "epoch": 0.032817050679707256, "grad_norm": 2.1522774295983105, "learning_rate": 1.1041860606625799e-05, "loss": 0.0821, "step": 7865 }, { "epoch": 0.032837913394697535, "grad_norm": 3.0208674125811767, "learning_rate": 1.1038352023839935e-05, "loss": 0.0815, "step": 7870 }, { "epoch": 0.032858776109687814, "grad_norm": 1.8004290807952184, "learning_rate": 1.1034846783516619e-05, "loss": 0.0987, "step": 7875 }, { "epoch": 0.032879638824678085, "grad_norm": 0.8694517551176746, "learning_rate": 1.1031344880352204e-05, "loss": 0.0615, "step": 7880 }, { "epoch": 0.032900501539668364, "grad_norm": 2.011329230677747, "learning_rate": 1.1027846309054816e-05, "loss": 0.0828, "step": 7885 }, { "epoch": 0.03292136425465864, "grad_norm": 1.7837049193993266, "learning_rate": 1.1024351064344333e-05, "loss": 0.0696, "step": 7890 }, { "epoch": 0.03294222696964892, "grad_norm": 1.6819314342371061, "learning_rate": 1.1020859140952326e-05, "loss": 0.1074, "step": 7895 }, { "epoch": 0.0329630896846392, "grad_norm": 1.6547754911371173, "learning_rate": 1.1017370533622051e-05, "loss": 0.09, "step": 7900 }, { "epoch": 0.03298395239962948, "grad_norm": 2.0727457071009123, "learning_rate": 1.10138852371084e-05, "loss": 0.0876, "step": 7905 }, { "epoch": 0.03300481511461976, "grad_norm": 2.590213685027443, "learning_rate": 1.101040324617787e-05, "loss": 0.0889, "step": 7910 }, { "epoch": 0.03302567782961004, "grad_norm": 1.9933696124955367, "learning_rate": 1.100692455560854e-05, "loss": 0.0904, "step": 7915 }, { "epoch": 0.033046540544600315, "grad_norm": 1.5839447865908765, "learning_rate": 1.1003449160190023e-05, "loss": 0.0811, "step": 7920 }, { "epoch": 0.03306740325959059, "grad_norm": 1.526281584979676, "learning_rate": 1.0999977054723443e-05, "loss": 0.0822, "step": 7925 }, { "epoch": 0.033088265974580866, "grad_norm": 1.485638069771087, "learning_rate": 1.09965082340214e-05, "loss": 0.0915, "step": 7930 }, { "epoch": 0.033109128689571145, "grad_norm": 1.7277620597283854, "learning_rate": 1.0993042692907946e-05, "loss": 0.066, "step": 7935 }, { "epoch": 0.033129991404561424, "grad_norm": 1.4350795934299954, "learning_rate": 1.0989580426218533e-05, "loss": 0.088, "step": 7940 }, { "epoch": 0.0331508541195517, "grad_norm": 1.8440356822228154, "learning_rate": 1.0986121428799991e-05, "loss": 0.0758, "step": 7945 }, { "epoch": 0.03317171683454198, "grad_norm": 2.182199606821304, "learning_rate": 1.0982665695510508e-05, "loss": 0.102, "step": 7950 }, { "epoch": 0.03319257954953226, "grad_norm": 1.8435333131147769, "learning_rate": 1.0979213221219578e-05, "loss": 0.0901, "step": 7955 }, { "epoch": 0.03321344226452254, "grad_norm": 2.79208756118211, "learning_rate": 1.0975764000807979e-05, "loss": 0.0766, "step": 7960 }, { "epoch": 0.03323430497951282, "grad_norm": 2.0557458779871376, "learning_rate": 1.097231802916774e-05, "loss": 0.0906, "step": 7965 }, { "epoch": 0.03325516769450309, "grad_norm": 2.2190370780616764, "learning_rate": 1.0968875301202108e-05, "loss": 0.1077, "step": 7970 }, { "epoch": 0.03327603040949337, "grad_norm": 5.790421699372989, "learning_rate": 1.0965435811825522e-05, "loss": 0.1082, "step": 7975 }, { "epoch": 0.03329689312448365, "grad_norm": 1.4226225586148955, "learning_rate": 1.096199955596357e-05, "loss": 0.0911, "step": 7980 }, { "epoch": 0.033317755839473925, "grad_norm": 1.6756829398073365, "learning_rate": 1.0958566528552968e-05, "loss": 0.0713, "step": 7985 }, { "epoch": 0.033338618554464204, "grad_norm": 2.1947189076540474, "learning_rate": 1.0955136724541527e-05, "loss": 0.0951, "step": 7990 }, { "epoch": 0.03335948126945448, "grad_norm": 3.575583428872213, "learning_rate": 1.0951710138888114e-05, "loss": 0.078, "step": 7995 }, { "epoch": 0.03338034398444476, "grad_norm": 2.5141225970168493, "learning_rate": 1.0948286766562628e-05, "loss": 0.1188, "step": 8000 }, { "epoch": 0.03340120669943504, "grad_norm": 1.9031553914811896, "learning_rate": 1.0944866602545974e-05, "loss": 0.093, "step": 8005 }, { "epoch": 0.03342206941442531, "grad_norm": 1.3180306346180386, "learning_rate": 1.0941449641830018e-05, "loss": 0.082, "step": 8010 }, { "epoch": 0.03344293212941559, "grad_norm": 1.7346047004404135, "learning_rate": 1.0938035879417566e-05, "loss": 0.0851, "step": 8015 }, { "epoch": 0.03346379484440587, "grad_norm": 1.984016185740032, "learning_rate": 1.0934625310322335e-05, "loss": 0.111, "step": 8020 }, { "epoch": 0.03348465755939615, "grad_norm": 1.4075139677350097, "learning_rate": 1.0931217929568911e-05, "loss": 0.0828, "step": 8025 }, { "epoch": 0.03350552027438643, "grad_norm": 2.728443433642732, "learning_rate": 1.0927813732192733e-05, "loss": 0.1056, "step": 8030 }, { "epoch": 0.033526382989376706, "grad_norm": 1.5714142086828382, "learning_rate": 1.0924412713240054e-05, "loss": 0.0777, "step": 8035 }, { "epoch": 0.033547245704366985, "grad_norm": 1.720798730216522, "learning_rate": 1.092101486776791e-05, "loss": 0.0794, "step": 8040 }, { "epoch": 0.033568108419357263, "grad_norm": 1.39957379359017, "learning_rate": 1.0917620190844094e-05, "loss": 0.0945, "step": 8045 }, { "epoch": 0.03358897113434754, "grad_norm": 1.251201095348661, "learning_rate": 1.0914228677547126e-05, "loss": 0.0841, "step": 8050 }, { "epoch": 0.033609833849337814, "grad_norm": 1.710212894213692, "learning_rate": 1.0910840322966214e-05, "loss": 0.0997, "step": 8055 }, { "epoch": 0.03363069656432809, "grad_norm": 1.45108468702787, "learning_rate": 1.0907455122201242e-05, "loss": 0.0814, "step": 8060 }, { "epoch": 0.03365155927931837, "grad_norm": 1.7228601402122006, "learning_rate": 1.0904073070362724e-05, "loss": 0.073, "step": 8065 }, { "epoch": 0.03367242199430865, "grad_norm": 1.8597523808090657, "learning_rate": 1.0900694162571776e-05, "loss": 0.0843, "step": 8070 }, { "epoch": 0.03369328470929893, "grad_norm": 3.1482523264572246, "learning_rate": 1.0897318393960098e-05, "loss": 0.094, "step": 8075 }, { "epoch": 0.03371414742428921, "grad_norm": 1.8780304109913954, "learning_rate": 1.0893945759669934e-05, "loss": 0.1073, "step": 8080 }, { "epoch": 0.03373501013927949, "grad_norm": 1.8662152019192657, "learning_rate": 1.0890576254854045e-05, "loss": 0.0829, "step": 8085 }, { "epoch": 0.033755872854269765, "grad_norm": 1.40477814973548, "learning_rate": 1.0887209874675675e-05, "loss": 0.0862, "step": 8090 }, { "epoch": 0.033776735569260044, "grad_norm": 1.7061303736328606, "learning_rate": 1.0883846614308537e-05, "loss": 0.0843, "step": 8095 }, { "epoch": 0.033797598284250316, "grad_norm": 1.6909871749085517, "learning_rate": 1.0880486468936771e-05, "loss": 0.0959, "step": 8100 }, { "epoch": 0.033818460999240595, "grad_norm": 2.298734032860153, "learning_rate": 1.0877129433754914e-05, "loss": 0.1023, "step": 8105 }, { "epoch": 0.03383932371423087, "grad_norm": 1.4215651004141792, "learning_rate": 1.0873775503967875e-05, "loss": 0.0871, "step": 8110 }, { "epoch": 0.03386018642922115, "grad_norm": 1.2869107853717474, "learning_rate": 1.0870424674790916e-05, "loss": 0.0649, "step": 8115 }, { "epoch": 0.03388104914421143, "grad_norm": 1.8603653594094804, "learning_rate": 1.0867076941449603e-05, "loss": 0.0872, "step": 8120 }, { "epoch": 0.03390191185920171, "grad_norm": 2.7831813425987284, "learning_rate": 1.0863732299179792e-05, "loss": 0.0918, "step": 8125 }, { "epoch": 0.03392277457419199, "grad_norm": 1.733560956634098, "learning_rate": 1.08603907432276e-05, "loss": 0.081, "step": 8130 }, { "epoch": 0.03394363728918227, "grad_norm": 1.9537393579474966, "learning_rate": 1.0857052268849366e-05, "loss": 0.1084, "step": 8135 }, { "epoch": 0.033964500004172546, "grad_norm": 1.6226661602115402, "learning_rate": 1.0853716871311639e-05, "loss": 0.0795, "step": 8140 }, { "epoch": 0.03398536271916282, "grad_norm": 9.474301039251595, "learning_rate": 1.0850384545891135e-05, "loss": 0.0778, "step": 8145 }, { "epoch": 0.034006225434153096, "grad_norm": 1.9523322643508043, "learning_rate": 1.0847055287874717e-05, "loss": 0.0732, "step": 8150 }, { "epoch": 0.034027088149143375, "grad_norm": 1.7435837333949937, "learning_rate": 1.0843729092559361e-05, "loss": 0.0786, "step": 8155 }, { "epoch": 0.034047950864133654, "grad_norm": 1.6262275215127378, "learning_rate": 1.0840405955252137e-05, "loss": 0.0697, "step": 8160 }, { "epoch": 0.03406881357912393, "grad_norm": 1.4024441949220554, "learning_rate": 1.0837085871270177e-05, "loss": 0.0908, "step": 8165 }, { "epoch": 0.03408967629411421, "grad_norm": 1.3427834421131755, "learning_rate": 1.0833768835940641e-05, "loss": 0.0925, "step": 8170 }, { "epoch": 0.03411053900910449, "grad_norm": 1.6488258564849083, "learning_rate": 1.0830454844600695e-05, "loss": 0.0891, "step": 8175 }, { "epoch": 0.03413140172409477, "grad_norm": 1.414721862976347, "learning_rate": 1.0827143892597492e-05, "loss": 0.082, "step": 8180 }, { "epoch": 0.03415226443908505, "grad_norm": 1.704344941669454, "learning_rate": 1.0823835975288122e-05, "loss": 0.089, "step": 8185 }, { "epoch": 0.03417312715407532, "grad_norm": 2.221812227874423, "learning_rate": 1.0820531088039609e-05, "loss": 0.0816, "step": 8190 }, { "epoch": 0.0341939898690656, "grad_norm": 2.125138012937202, "learning_rate": 1.0817229226228873e-05, "loss": 0.0864, "step": 8195 }, { "epoch": 0.03421485258405588, "grad_norm": 1.6066602083131765, "learning_rate": 1.0813930385242694e-05, "loss": 0.0907, "step": 8200 }, { "epoch": 0.034235715299046156, "grad_norm": 1.4943559677368334, "learning_rate": 1.0810634560477709e-05, "loss": 0.0761, "step": 8205 }, { "epoch": 0.034256578014036435, "grad_norm": 2.3184322590529183, "learning_rate": 1.0807341747340352e-05, "loss": 0.0852, "step": 8210 }, { "epoch": 0.03427744072902671, "grad_norm": 2.1494409779489647, "learning_rate": 1.0804051941246856e-05, "loss": 0.0847, "step": 8215 }, { "epoch": 0.03429830344401699, "grad_norm": 2.1726410280237953, "learning_rate": 1.0800765137623219e-05, "loss": 0.0943, "step": 8220 }, { "epoch": 0.03431916615900727, "grad_norm": 1.5949052326277324, "learning_rate": 1.0797481331905162e-05, "loss": 0.0853, "step": 8225 }, { "epoch": 0.03434002887399755, "grad_norm": 2.0078514969751358, "learning_rate": 1.0794200519538123e-05, "loss": 0.0927, "step": 8230 }, { "epoch": 0.03436089158898782, "grad_norm": 3.1400549224039622, "learning_rate": 1.079092269597722e-05, "loss": 0.0972, "step": 8235 }, { "epoch": 0.0343817543039781, "grad_norm": 1.131024330663504, "learning_rate": 1.078764785668722e-05, "loss": 0.0861, "step": 8240 }, { "epoch": 0.03440261701896838, "grad_norm": 1.4443417752821464, "learning_rate": 1.0784375997142525e-05, "loss": 0.0827, "step": 8245 }, { "epoch": 0.03442347973395866, "grad_norm": 2.097366316160226, "learning_rate": 1.0781107112827138e-05, "loss": 0.0918, "step": 8250 }, { "epoch": 0.034444342448948936, "grad_norm": 3.1415008456764117, "learning_rate": 1.077784119923464e-05, "loss": 0.0791, "step": 8255 }, { "epoch": 0.034465205163939215, "grad_norm": 1.696368558702773, "learning_rate": 1.0774578251868156e-05, "loss": 0.1027, "step": 8260 }, { "epoch": 0.034486067878929494, "grad_norm": 1.9250566029077074, "learning_rate": 1.077131826624034e-05, "loss": 0.0737, "step": 8265 }, { "epoch": 0.03450693059391977, "grad_norm": 2.529428280059271, "learning_rate": 1.0768061237873345e-05, "loss": 0.0839, "step": 8270 }, { "epoch": 0.03452779330891005, "grad_norm": 1.82281591103939, "learning_rate": 1.0764807162298792e-05, "loss": 0.0984, "step": 8275 }, { "epoch": 0.03454865602390032, "grad_norm": 2.1375777262819744, "learning_rate": 1.076155603505775e-05, "loss": 0.0909, "step": 8280 }, { "epoch": 0.0345695187388906, "grad_norm": 1.6324971065071119, "learning_rate": 1.0758307851700716e-05, "loss": 0.0815, "step": 8285 }, { "epoch": 0.03459038145388088, "grad_norm": 2.033131519697678, "learning_rate": 1.075506260778757e-05, "loss": 0.0944, "step": 8290 }, { "epoch": 0.03461124416887116, "grad_norm": 1.4517107438521115, "learning_rate": 1.075182029888757e-05, "loss": 0.0907, "step": 8295 }, { "epoch": 0.03463210688386144, "grad_norm": 1.86747077393538, "learning_rate": 1.074858092057932e-05, "loss": 0.096, "step": 8300 }, { "epoch": 0.03465296959885172, "grad_norm": 1.5723416404624493, "learning_rate": 1.0745344468450734e-05, "loss": 0.0911, "step": 8305 }, { "epoch": 0.034673832313841996, "grad_norm": 1.3608288185472759, "learning_rate": 1.0742110938099029e-05, "loss": 0.0827, "step": 8310 }, { "epoch": 0.034694695028832274, "grad_norm": 1.4344699442027309, "learning_rate": 1.0738880325130686e-05, "loss": 0.0783, "step": 8315 }, { "epoch": 0.03471555774382255, "grad_norm": 2.6395359701348555, "learning_rate": 1.0735652625161435e-05, "loss": 0.0969, "step": 8320 }, { "epoch": 0.034736420458812825, "grad_norm": 1.4808256917430493, "learning_rate": 1.0732427833816217e-05, "loss": 0.094, "step": 8325 }, { "epoch": 0.034757283173803104, "grad_norm": 1.8244076322062441, "learning_rate": 1.0729205946729173e-05, "loss": 0.0788, "step": 8330 }, { "epoch": 0.03477814588879338, "grad_norm": 1.6500887509789914, "learning_rate": 1.0725986959543606e-05, "loss": 0.0949, "step": 8335 }, { "epoch": 0.03479900860378366, "grad_norm": 2.6075621016802004, "learning_rate": 1.0722770867911974e-05, "loss": 0.0888, "step": 8340 }, { "epoch": 0.03481987131877394, "grad_norm": 1.7516315393894768, "learning_rate": 1.0719557667495846e-05, "loss": 0.0938, "step": 8345 }, { "epoch": 0.03484073403376422, "grad_norm": 1.6504042243156747, "learning_rate": 1.0716347353965883e-05, "loss": 0.0668, "step": 8350 }, { "epoch": 0.0348615967487545, "grad_norm": 1.8107198523862875, "learning_rate": 1.0713139923001829e-05, "loss": 0.0749, "step": 8355 }, { "epoch": 0.034882459463744776, "grad_norm": 1.101094040929036, "learning_rate": 1.0709935370292465e-05, "loss": 0.0863, "step": 8360 }, { "epoch": 0.034903322178735055, "grad_norm": 1.2158891132188956, "learning_rate": 1.0706733691535597e-05, "loss": 0.078, "step": 8365 }, { "epoch": 0.03492418489372533, "grad_norm": 1.6250603236706578, "learning_rate": 1.070353488243802e-05, "loss": 0.0887, "step": 8370 }, { "epoch": 0.034945047608715606, "grad_norm": 2.9351944168723763, "learning_rate": 1.0700338938715516e-05, "loss": 0.0912, "step": 8375 }, { "epoch": 0.034965910323705884, "grad_norm": 1.5129412024302797, "learning_rate": 1.0697145856092803e-05, "loss": 0.0719, "step": 8380 }, { "epoch": 0.03498677303869616, "grad_norm": 1.5325728151386864, "learning_rate": 1.069395563030354e-05, "loss": 0.1114, "step": 8385 }, { "epoch": 0.03500763575368644, "grad_norm": 1.454739401819061, "learning_rate": 1.069076825709027e-05, "loss": 0.0874, "step": 8390 }, { "epoch": 0.03502849846867672, "grad_norm": 1.248217000448663, "learning_rate": 1.0687583732204425e-05, "loss": 0.0674, "step": 8395 }, { "epoch": 0.035049361183667, "grad_norm": 1.3488270039424577, "learning_rate": 1.0684402051406283e-05, "loss": 0.0678, "step": 8400 }, { "epoch": 0.03507022389865728, "grad_norm": 1.435172101688267, "learning_rate": 1.068122321046496e-05, "loss": 0.0672, "step": 8405 }, { "epoch": 0.03509108661364756, "grad_norm": 2.2884359961841603, "learning_rate": 1.0678047205158368e-05, "loss": 0.0868, "step": 8410 }, { "epoch": 0.03511194932863783, "grad_norm": 1.5525356234909018, "learning_rate": 1.0674874031273214e-05, "loss": 0.0817, "step": 8415 }, { "epoch": 0.03513281204362811, "grad_norm": 2.0070902196268805, "learning_rate": 1.0671703684604955e-05, "loss": 0.0827, "step": 8420 }, { "epoch": 0.035153674758618386, "grad_norm": 1.7836880886354283, "learning_rate": 1.0668536160957783e-05, "loss": 0.0631, "step": 8425 }, { "epoch": 0.035174537473608665, "grad_norm": 1.8689026708323693, "learning_rate": 1.0665371456144612e-05, "loss": 0.0919, "step": 8430 }, { "epoch": 0.035195400188598944, "grad_norm": 2.4075432340170915, "learning_rate": 1.0662209565987033e-05, "loss": 0.0815, "step": 8435 }, { "epoch": 0.03521626290358922, "grad_norm": 2.0714002190904206, "learning_rate": 1.0659050486315314e-05, "loss": 0.0819, "step": 8440 }, { "epoch": 0.0352371256185795, "grad_norm": 1.771058732998048, "learning_rate": 1.0655894212968358e-05, "loss": 0.0811, "step": 8445 }, { "epoch": 0.03525798833356978, "grad_norm": 1.8896527102271696, "learning_rate": 1.0652740741793693e-05, "loss": 0.0831, "step": 8450 }, { "epoch": 0.03527885104856006, "grad_norm": 2.3653729050883396, "learning_rate": 1.0649590068647441e-05, "loss": 0.0909, "step": 8455 }, { "epoch": 0.03529971376355033, "grad_norm": 1.4342880367863382, "learning_rate": 1.0646442189394302e-05, "loss": 0.0902, "step": 8460 }, { "epoch": 0.03532057647854061, "grad_norm": 1.636047878406127, "learning_rate": 1.064329709990752e-05, "loss": 0.0743, "step": 8465 }, { "epoch": 0.03534143919353089, "grad_norm": 1.1767919926158776, "learning_rate": 1.0640154796068875e-05, "loss": 0.0841, "step": 8470 }, { "epoch": 0.03536230190852117, "grad_norm": 1.9902837980278683, "learning_rate": 1.0637015273768649e-05, "loss": 0.0847, "step": 8475 }, { "epoch": 0.035383164623511446, "grad_norm": 1.5039622413105826, "learning_rate": 1.063387852890561e-05, "loss": 0.0802, "step": 8480 }, { "epoch": 0.035404027338501724, "grad_norm": 1.6262801016458182, "learning_rate": 1.0630744557386987e-05, "loss": 0.0843, "step": 8485 }, { "epoch": 0.035424890053492, "grad_norm": 1.4023062721055572, "learning_rate": 1.0627613355128441e-05, "loss": 0.0905, "step": 8490 }, { "epoch": 0.03544575276848228, "grad_norm": 2.5664008928469064, "learning_rate": 1.0624484918054057e-05, "loss": 0.0853, "step": 8495 }, { "epoch": 0.03546661548347256, "grad_norm": 1.5751954697634465, "learning_rate": 1.062135924209631e-05, "loss": 0.0767, "step": 8500 }, { "epoch": 0.03548747819846283, "grad_norm": 6.689048915513135, "learning_rate": 1.0618236323196047e-05, "loss": 0.094, "step": 8505 }, { "epoch": 0.03550834091345311, "grad_norm": 1.438446441482953, "learning_rate": 1.0615116157302468e-05, "loss": 0.0759, "step": 8510 }, { "epoch": 0.03552920362844339, "grad_norm": 1.578846725771026, "learning_rate": 1.0611998740373092e-05, "loss": 0.0652, "step": 8515 }, { "epoch": 0.03555006634343367, "grad_norm": 1.9674336103874892, "learning_rate": 1.0608884068373752e-05, "loss": 0.0743, "step": 8520 }, { "epoch": 0.03557092905842395, "grad_norm": 1.6573803237611369, "learning_rate": 1.060577213727856e-05, "loss": 0.0758, "step": 8525 }, { "epoch": 0.035591791773414226, "grad_norm": 1.4647454871566412, "learning_rate": 1.0602662943069894e-05, "loss": 0.0823, "step": 8530 }, { "epoch": 0.035612654488404505, "grad_norm": 1.330759337546663, "learning_rate": 1.0599556481738365e-05, "loss": 0.0853, "step": 8535 }, { "epoch": 0.035633517203394784, "grad_norm": 1.722312561131081, "learning_rate": 1.0596452749282805e-05, "loss": 0.0947, "step": 8540 }, { "epoch": 0.03565437991838506, "grad_norm": 1.6467173163004256, "learning_rate": 1.0593351741710247e-05, "loss": 0.0768, "step": 8545 }, { "epoch": 0.035675242633375334, "grad_norm": 2.016493711339246, "learning_rate": 1.0590253455035893e-05, "loss": 0.0995, "step": 8550 }, { "epoch": 0.03569610534836561, "grad_norm": 1.7749342954585956, "learning_rate": 1.0587157885283098e-05, "loss": 0.0795, "step": 8555 }, { "epoch": 0.03571696806335589, "grad_norm": 1.6011959544538659, "learning_rate": 1.0584065028483357e-05, "loss": 0.0894, "step": 8560 }, { "epoch": 0.03573783077834617, "grad_norm": 1.3006145603949435, "learning_rate": 1.0580974880676264e-05, "loss": 0.0728, "step": 8565 }, { "epoch": 0.03575869349333645, "grad_norm": 1.1108492021646503, "learning_rate": 1.0577887437909515e-05, "loss": 0.0637, "step": 8570 }, { "epoch": 0.03577955620832673, "grad_norm": 1.500554682225872, "learning_rate": 1.0574802696238861e-05, "loss": 0.0815, "step": 8575 }, { "epoch": 0.03580041892331701, "grad_norm": 1.7178772184721607, "learning_rate": 1.057172065172811e-05, "loss": 0.0719, "step": 8580 }, { "epoch": 0.035821281638307285, "grad_norm": 1.0606755514518447, "learning_rate": 1.0568641300449091e-05, "loss": 0.072, "step": 8585 }, { "epoch": 0.03584214435329756, "grad_norm": 1.6629487441927508, "learning_rate": 1.056556463848164e-05, "loss": 0.0822, "step": 8590 }, { "epoch": 0.035863007068287836, "grad_norm": 2.150209422747487, "learning_rate": 1.0562490661913573e-05, "loss": 0.0778, "step": 8595 }, { "epoch": 0.035883869783278115, "grad_norm": 4.201228454788521, "learning_rate": 1.0559419366840674e-05, "loss": 0.0826, "step": 8600 }, { "epoch": 0.035904732498268394, "grad_norm": 1.3226734142760643, "learning_rate": 1.0556350749366665e-05, "loss": 0.0809, "step": 8605 }, { "epoch": 0.03592559521325867, "grad_norm": 1.7641671240677033, "learning_rate": 1.0553284805603193e-05, "loss": 0.0745, "step": 8610 }, { "epoch": 0.03594645792824895, "grad_norm": 1.7234705353615563, "learning_rate": 1.05502215316698e-05, "loss": 0.089, "step": 8615 }, { "epoch": 0.03596732064323923, "grad_norm": 1.7296264572881457, "learning_rate": 1.0547160923693915e-05, "loss": 0.112, "step": 8620 }, { "epoch": 0.03598818335822951, "grad_norm": 1.0198311263237911, "learning_rate": 1.0544102977810821e-05, "loss": 0.0896, "step": 8625 }, { "epoch": 0.03600904607321979, "grad_norm": 1.6735226522627797, "learning_rate": 1.0541047690163646e-05, "loss": 0.0852, "step": 8630 }, { "epoch": 0.03602990878821006, "grad_norm": 2.1155011860013535, "learning_rate": 1.0537995056903329e-05, "loss": 0.0759, "step": 8635 }, { "epoch": 0.03605077150320034, "grad_norm": 1.9034541476214208, "learning_rate": 1.053494507418861e-05, "loss": 0.0723, "step": 8640 }, { "epoch": 0.03607163421819062, "grad_norm": 1.3052331318966586, "learning_rate": 1.053189773818601e-05, "loss": 0.0878, "step": 8645 }, { "epoch": 0.036092496933180895, "grad_norm": 1.8897986017384472, "learning_rate": 1.0528853045069805e-05, "loss": 0.0656, "step": 8650 }, { "epoch": 0.036113359648171174, "grad_norm": 1.454477188677423, "learning_rate": 1.0525810991022007e-05, "loss": 0.0931, "step": 8655 }, { "epoch": 0.03613422236316145, "grad_norm": 1.3908224135040543, "learning_rate": 1.0522771572232348e-05, "loss": 0.0733, "step": 8660 }, { "epoch": 0.03615508507815173, "grad_norm": 1.7094498565780911, "learning_rate": 1.0519734784898254e-05, "loss": 0.0952, "step": 8665 }, { "epoch": 0.03617594779314201, "grad_norm": 1.491623505416914, "learning_rate": 1.0516700625224836e-05, "loss": 0.0839, "step": 8670 }, { "epoch": 0.03619681050813229, "grad_norm": 1.4388608480326601, "learning_rate": 1.0513669089424854e-05, "loss": 0.078, "step": 8675 }, { "epoch": 0.03621767322312256, "grad_norm": 1.4840244391348911, "learning_rate": 1.0510640173718705e-05, "loss": 0.0868, "step": 8680 }, { "epoch": 0.03623853593811284, "grad_norm": 2.4427837531217493, "learning_rate": 1.0507613874334413e-05, "loss": 0.0792, "step": 8685 }, { "epoch": 0.03625939865310312, "grad_norm": 1.251716701616154, "learning_rate": 1.050459018750759e-05, "loss": 0.0675, "step": 8690 }, { "epoch": 0.0362802613680934, "grad_norm": 1.2414776348928565, "learning_rate": 1.0501569109481432e-05, "loss": 0.0807, "step": 8695 }, { "epoch": 0.036301124083083676, "grad_norm": 1.593668890643568, "learning_rate": 1.0498550636506692e-05, "loss": 0.0697, "step": 8700 }, { "epoch": 0.036321986798073955, "grad_norm": 1.2963966225307233, "learning_rate": 1.0495534764841665e-05, "loss": 0.0777, "step": 8705 }, { "epoch": 0.03634284951306423, "grad_norm": 1.4470645991194502, "learning_rate": 1.0492521490752164e-05, "loss": 0.0879, "step": 8710 }, { "epoch": 0.03636371222805451, "grad_norm": 1.6510149625829162, "learning_rate": 1.0489510810511495e-05, "loss": 0.0624, "step": 8715 }, { "epoch": 0.03638457494304479, "grad_norm": 1.6843208548187503, "learning_rate": 1.0486502720400457e-05, "loss": 0.0669, "step": 8720 }, { "epoch": 0.03640543765803506, "grad_norm": 1.5800753309998927, "learning_rate": 1.0483497216707304e-05, "loss": 0.0915, "step": 8725 }, { "epoch": 0.03642630037302534, "grad_norm": 1.8262971240842991, "learning_rate": 1.0480494295727734e-05, "loss": 0.1114, "step": 8730 }, { "epoch": 0.03644716308801562, "grad_norm": 1.5171987198973391, "learning_rate": 1.0477493953764865e-05, "loss": 0.0793, "step": 8735 }, { "epoch": 0.0364680258030059, "grad_norm": 1.446716530330546, "learning_rate": 1.0474496187129226e-05, "loss": 0.0771, "step": 8740 }, { "epoch": 0.03648888851799618, "grad_norm": 2.161789561694487, "learning_rate": 1.0471500992138725e-05, "loss": 0.0864, "step": 8745 }, { "epoch": 0.036509751232986457, "grad_norm": 1.7900629369331518, "learning_rate": 1.0468508365118641e-05, "loss": 0.0922, "step": 8750 }, { "epoch": 0.036530613947976735, "grad_norm": 2.430775477717356, "learning_rate": 1.0465518302401592e-05, "loss": 0.0774, "step": 8755 }, { "epoch": 0.036551476662967014, "grad_norm": 1.8846724030935709, "learning_rate": 1.0462530800327533e-05, "loss": 0.081, "step": 8760 }, { "epoch": 0.03657233937795729, "grad_norm": 1.8723283447526637, "learning_rate": 1.0459545855243722e-05, "loss": 0.0812, "step": 8765 }, { "epoch": 0.036593202092947565, "grad_norm": 1.6840277659502356, "learning_rate": 1.0456563463504716e-05, "loss": 0.0887, "step": 8770 }, { "epoch": 0.03661406480793784, "grad_norm": 1.871469581387468, "learning_rate": 1.0453583621472337e-05, "loss": 0.1185, "step": 8775 }, { "epoch": 0.03663492752292812, "grad_norm": 1.7895267942125586, "learning_rate": 1.0450606325515657e-05, "loss": 0.0738, "step": 8780 }, { "epoch": 0.0366557902379184, "grad_norm": 2.019796740624266, "learning_rate": 1.0447631572010991e-05, "loss": 0.0844, "step": 8785 }, { "epoch": 0.03667665295290868, "grad_norm": 1.4979284577342662, "learning_rate": 1.044465935734187e-05, "loss": 0.0919, "step": 8790 }, { "epoch": 0.03669751566789896, "grad_norm": 0.8790848321733642, "learning_rate": 1.0441689677899018e-05, "loss": 0.0647, "step": 8795 }, { "epoch": 0.03671837838288924, "grad_norm": 1.3765875833920274, "learning_rate": 1.0438722530080336e-05, "loss": 0.1025, "step": 8800 }, { "epoch": 0.036739241097879516, "grad_norm": 1.6636724445989026, "learning_rate": 1.0435757910290896e-05, "loss": 0.0826, "step": 8805 }, { "epoch": 0.036760103812869795, "grad_norm": 1.9560933178826219, "learning_rate": 1.0432795814942905e-05, "loss": 0.1082, "step": 8810 }, { "epoch": 0.036780966527860066, "grad_norm": 1.6761924577302358, "learning_rate": 1.0429836240455693e-05, "loss": 0.0709, "step": 8815 }, { "epoch": 0.036801829242850345, "grad_norm": 1.8898348489630663, "learning_rate": 1.0426879183255704e-05, "loss": 0.0861, "step": 8820 }, { "epoch": 0.036822691957840624, "grad_norm": 2.094087168999069, "learning_rate": 1.0423924639776466e-05, "loss": 0.071, "step": 8825 }, { "epoch": 0.0368435546728309, "grad_norm": 1.6428469392976157, "learning_rate": 1.0420972606458572e-05, "loss": 0.1042, "step": 8830 }, { "epoch": 0.03686441738782118, "grad_norm": 1.9539030033400149, "learning_rate": 1.0418023079749679e-05, "loss": 0.0662, "step": 8835 }, { "epoch": 0.03688528010281146, "grad_norm": 1.1007473876481717, "learning_rate": 1.0415076056104465e-05, "loss": 0.063, "step": 8840 }, { "epoch": 0.03690614281780174, "grad_norm": 1.6140570186670204, "learning_rate": 1.0412131531984638e-05, "loss": 0.0961, "step": 8845 }, { "epoch": 0.03692700553279202, "grad_norm": 1.4539719139343843, "learning_rate": 1.0409189503858887e-05, "loss": 0.0731, "step": 8850 }, { "epoch": 0.036947868247782296, "grad_norm": 2.3991477089906077, "learning_rate": 1.0406249968202899e-05, "loss": 0.0784, "step": 8855 }, { "epoch": 0.03696873096277257, "grad_norm": 1.4925651192247484, "learning_rate": 1.0403312921499316e-05, "loss": 0.0768, "step": 8860 }, { "epoch": 0.03698959367776285, "grad_norm": 1.2967640633019275, "learning_rate": 1.0400378360237725e-05, "loss": 0.0831, "step": 8865 }, { "epoch": 0.037010456392753126, "grad_norm": 1.905400411722398, "learning_rate": 1.0397446280914635e-05, "loss": 0.1087, "step": 8870 }, { "epoch": 0.037031319107743405, "grad_norm": 1.2730641877365128, "learning_rate": 1.0394516680033482e-05, "loss": 0.0841, "step": 8875 }, { "epoch": 0.03705218182273368, "grad_norm": 1.670325912925455, "learning_rate": 1.0391589554104577e-05, "loss": 0.0828, "step": 8880 }, { "epoch": 0.03707304453772396, "grad_norm": 1.5561194538433498, "learning_rate": 1.0388664899645114e-05, "loss": 0.0821, "step": 8885 }, { "epoch": 0.03709390725271424, "grad_norm": 2.425594731324393, "learning_rate": 1.0385742713179146e-05, "loss": 0.0612, "step": 8890 }, { "epoch": 0.03711476996770452, "grad_norm": 2.233758641760923, "learning_rate": 1.038282299123756e-05, "loss": 0.0804, "step": 8895 }, { "epoch": 0.0371356326826948, "grad_norm": 1.8520478832253084, "learning_rate": 1.0379905730358073e-05, "loss": 0.0829, "step": 8900 }, { "epoch": 0.03715649539768507, "grad_norm": 1.760053062005525, "learning_rate": 1.0376990927085204e-05, "loss": 0.0907, "step": 8905 }, { "epoch": 0.03717735811267535, "grad_norm": 2.133378783903728, "learning_rate": 1.0374078577970261e-05, "loss": 0.0711, "step": 8910 }, { "epoch": 0.03719822082766563, "grad_norm": 2.229376669312837, "learning_rate": 1.0371168679571322e-05, "loss": 0.0869, "step": 8915 }, { "epoch": 0.037219083542655906, "grad_norm": 1.2284256008071552, "learning_rate": 1.0368261228453224e-05, "loss": 0.0791, "step": 8920 }, { "epoch": 0.037239946257646185, "grad_norm": 1.552128072004393, "learning_rate": 1.0365356221187537e-05, "loss": 0.0921, "step": 8925 }, { "epoch": 0.037260808972636464, "grad_norm": 1.396403868368627, "learning_rate": 1.0362453654352553e-05, "loss": 0.0759, "step": 8930 }, { "epoch": 0.03728167168762674, "grad_norm": 1.8416850703576118, "learning_rate": 1.0359553524533265e-05, "loss": 0.0803, "step": 8935 }, { "epoch": 0.03730253440261702, "grad_norm": 1.8228624490245935, "learning_rate": 1.0356655828321358e-05, "loss": 0.0756, "step": 8940 }, { "epoch": 0.0373233971176073, "grad_norm": 1.4018031450957398, "learning_rate": 1.035376056231518e-05, "loss": 0.089, "step": 8945 }, { "epoch": 0.03734425983259757, "grad_norm": 1.4304957973683754, "learning_rate": 1.0350867723119738e-05, "loss": 0.0867, "step": 8950 }, { "epoch": 0.03736512254758785, "grad_norm": 1.7168133340832912, "learning_rate": 1.0347977307346671e-05, "loss": 0.0805, "step": 8955 }, { "epoch": 0.03738598526257813, "grad_norm": 1.5393118940774877, "learning_rate": 1.0345089311614238e-05, "loss": 0.0701, "step": 8960 }, { "epoch": 0.03740684797756841, "grad_norm": 1.4074675257555618, "learning_rate": 1.0342203732547307e-05, "loss": 0.0874, "step": 8965 }, { "epoch": 0.03742771069255869, "grad_norm": 1.321581170317237, "learning_rate": 1.033932056677732e-05, "loss": 0.0704, "step": 8970 }, { "epoch": 0.037448573407548966, "grad_norm": 1.828530261978311, "learning_rate": 1.03364398109423e-05, "loss": 0.1056, "step": 8975 }, { "epoch": 0.037469436122539244, "grad_norm": 1.8355810994451371, "learning_rate": 1.0333561461686823e-05, "loss": 0.101, "step": 8980 }, { "epoch": 0.03749029883752952, "grad_norm": 2.239197226834379, "learning_rate": 1.0330685515661991e-05, "loss": 0.0836, "step": 8985 }, { "epoch": 0.0375111615525198, "grad_norm": 1.3384475151115156, "learning_rate": 1.0327811969525441e-05, "loss": 0.08, "step": 8990 }, { "epoch": 0.037532024267510074, "grad_norm": 1.4535076302339331, "learning_rate": 1.0324940819941304e-05, "loss": 0.0761, "step": 8995 }, { "epoch": 0.03755288698250035, "grad_norm": 4.357775409028355, "learning_rate": 1.0322072063580205e-05, "loss": 0.0739, "step": 9000 }, { "epoch": 0.03757374969749063, "grad_norm": 1.3329983270340822, "learning_rate": 1.0319205697119235e-05, "loss": 0.0773, "step": 9005 }, { "epoch": 0.03759461241248091, "grad_norm": 1.418097851944143, "learning_rate": 1.0316341717241945e-05, "loss": 0.08, "step": 9010 }, { "epoch": 0.03761547512747119, "grad_norm": 1.9134855365564136, "learning_rate": 1.0313480120638321e-05, "loss": 0.06, "step": 9015 }, { "epoch": 0.03763633784246147, "grad_norm": 1.7828971423392985, "learning_rate": 1.0310620904004787e-05, "loss": 0.0724, "step": 9020 }, { "epoch": 0.037657200557451746, "grad_norm": 2.0464135234740257, "learning_rate": 1.0307764064044152e-05, "loss": 0.08, "step": 9025 }, { "epoch": 0.037678063272442025, "grad_norm": 1.473783682788095, "learning_rate": 1.0304909597465633e-05, "loss": 0.0883, "step": 9030 }, { "epoch": 0.037698925987432304, "grad_norm": 1.9664270159571722, "learning_rate": 1.0302057500984816e-05, "loss": 0.091, "step": 9035 }, { "epoch": 0.037719788702422576, "grad_norm": 1.9891991652780028, "learning_rate": 1.0299207771323652e-05, "loss": 0.0966, "step": 9040 }, { "epoch": 0.037740651417412854, "grad_norm": 1.6099224630374929, "learning_rate": 1.0296360405210424e-05, "loss": 0.0788, "step": 9045 }, { "epoch": 0.03776151413240313, "grad_norm": 2.1876169507649217, "learning_rate": 1.0293515399379758e-05, "loss": 0.0916, "step": 9050 }, { "epoch": 0.03778237684739341, "grad_norm": 2.0803078285915695, "learning_rate": 1.0290672750572584e-05, "loss": 0.0713, "step": 9055 }, { "epoch": 0.03780323956238369, "grad_norm": 2.474547934465082, "learning_rate": 1.0287832455536126e-05, "loss": 0.0866, "step": 9060 }, { "epoch": 0.03782410227737397, "grad_norm": 1.7670671623413345, "learning_rate": 1.02849945110239e-05, "loss": 0.0831, "step": 9065 }, { "epoch": 0.03784496499236425, "grad_norm": 1.727019065370087, "learning_rate": 1.0282158913795672e-05, "loss": 0.0966, "step": 9070 }, { "epoch": 0.03786582770735453, "grad_norm": 1.3705360530735018, "learning_rate": 1.0279325660617474e-05, "loss": 0.0731, "step": 9075 }, { "epoch": 0.037886690422344806, "grad_norm": 1.6201966383586668, "learning_rate": 1.0276494748261563e-05, "loss": 0.0921, "step": 9080 }, { "epoch": 0.03790755313733508, "grad_norm": 1.5653631000265928, "learning_rate": 1.0273666173506415e-05, "loss": 0.0833, "step": 9085 }, { "epoch": 0.037928415852325356, "grad_norm": 1.3544037786948577, "learning_rate": 1.0270839933136714e-05, "loss": 0.0602, "step": 9090 }, { "epoch": 0.037949278567315635, "grad_norm": 1.7915617058602296, "learning_rate": 1.0268016023943325e-05, "loss": 0.0901, "step": 9095 }, { "epoch": 0.037970141282305914, "grad_norm": 1.9033931291402333, "learning_rate": 1.0265194442723296e-05, "loss": 0.064, "step": 9100 }, { "epoch": 0.03799100399729619, "grad_norm": 2.1338581761643214, "learning_rate": 1.0262375186279826e-05, "loss": 0.075, "step": 9105 }, { "epoch": 0.03801186671228647, "grad_norm": 1.5542661085685203, "learning_rate": 1.0259558251422256e-05, "loss": 0.0733, "step": 9110 }, { "epoch": 0.03803272942727675, "grad_norm": 2.021966948029421, "learning_rate": 1.0256743634966057e-05, "loss": 0.0985, "step": 9115 }, { "epoch": 0.03805359214226703, "grad_norm": 1.5718837831082946, "learning_rate": 1.0253931333732813e-05, "loss": 0.0918, "step": 9120 }, { "epoch": 0.03807445485725731, "grad_norm": 1.0199940218566996, "learning_rate": 1.02511213445502e-05, "loss": 0.0663, "step": 9125 }, { "epoch": 0.03809531757224758, "grad_norm": 1.3404766669158958, "learning_rate": 1.024831366425198e-05, "loss": 0.0856, "step": 9130 }, { "epoch": 0.03811618028723786, "grad_norm": 2.3368015630027847, "learning_rate": 1.0245508289677986e-05, "loss": 0.1111, "step": 9135 }, { "epoch": 0.03813704300222814, "grad_norm": 1.3943630735190329, "learning_rate": 1.0242705217674094e-05, "loss": 0.0886, "step": 9140 }, { "epoch": 0.038157905717218416, "grad_norm": 1.2649171817483464, "learning_rate": 1.0239904445092221e-05, "loss": 0.0768, "step": 9145 }, { "epoch": 0.038178768432208694, "grad_norm": 1.540152200605205, "learning_rate": 1.0237105968790312e-05, "loss": 0.0926, "step": 9150 }, { "epoch": 0.03819963114719897, "grad_norm": 1.4661353543368296, "learning_rate": 1.0234309785632312e-05, "loss": 0.0783, "step": 9155 }, { "epoch": 0.03822049386218925, "grad_norm": 2.2471292037043313, "learning_rate": 1.0231515892488162e-05, "loss": 0.0952, "step": 9160 }, { "epoch": 0.03824135657717953, "grad_norm": 1.8955744588866306, "learning_rate": 1.022872428623378e-05, "loss": 0.0784, "step": 9165 }, { "epoch": 0.03826221929216981, "grad_norm": 1.3052565467560426, "learning_rate": 1.022593496375105e-05, "loss": 0.0652, "step": 9170 }, { "epoch": 0.03828308200716008, "grad_norm": 1.9416401676798878, "learning_rate": 1.0223147921927803e-05, "loss": 0.0826, "step": 9175 }, { "epoch": 0.03830394472215036, "grad_norm": 2.556871876084816, "learning_rate": 1.0220363157657804e-05, "loss": 0.0573, "step": 9180 }, { "epoch": 0.03832480743714064, "grad_norm": 1.2725523021284955, "learning_rate": 1.0217580667840739e-05, "loss": 0.0821, "step": 9185 }, { "epoch": 0.03834567015213092, "grad_norm": 1.0366668111749746, "learning_rate": 1.0214800449382194e-05, "loss": 0.0726, "step": 9190 }, { "epoch": 0.038366532867121196, "grad_norm": 1.531335154810892, "learning_rate": 1.0212022499193655e-05, "loss": 0.0661, "step": 9195 }, { "epoch": 0.038387395582111475, "grad_norm": 1.641020485436611, "learning_rate": 1.0209246814192477e-05, "loss": 0.0736, "step": 9200 }, { "epoch": 0.038408258297101754, "grad_norm": 1.3713793774283773, "learning_rate": 1.020647339130188e-05, "loss": 0.0739, "step": 9205 }, { "epoch": 0.03842912101209203, "grad_norm": 2.2192247497966258, "learning_rate": 1.0203702227450934e-05, "loss": 0.0683, "step": 9210 }, { "epoch": 0.038449983727082304, "grad_norm": 1.8191356246300525, "learning_rate": 1.0200933319574533e-05, "loss": 0.0698, "step": 9215 }, { "epoch": 0.03847084644207258, "grad_norm": 1.7147817850668141, "learning_rate": 1.0198166664613403e-05, "loss": 0.0854, "step": 9220 }, { "epoch": 0.03849170915706286, "grad_norm": 2.329340152436137, "learning_rate": 1.0195402259514062e-05, "loss": 0.0701, "step": 9225 }, { "epoch": 0.03851257187205314, "grad_norm": 1.3850663142058932, "learning_rate": 1.019264010122883e-05, "loss": 0.0646, "step": 9230 }, { "epoch": 0.03853343458704342, "grad_norm": 1.5772372034625917, "learning_rate": 1.0189880186715797e-05, "loss": 0.0729, "step": 9235 }, { "epoch": 0.0385542973020337, "grad_norm": 1.9576986487096844, "learning_rate": 1.0187122512938816e-05, "loss": 0.0794, "step": 9240 }, { "epoch": 0.03857516001702398, "grad_norm": 1.3405671718413619, "learning_rate": 1.0184367076867495e-05, "loss": 0.1014, "step": 9245 }, { "epoch": 0.038596022732014255, "grad_norm": 1.672550445338608, "learning_rate": 1.0181613875477166e-05, "loss": 0.0736, "step": 9250 }, { "epoch": 0.038616885447004534, "grad_norm": 2.151464791350611, "learning_rate": 1.017886290574889e-05, "loss": 0.0748, "step": 9255 }, { "epoch": 0.038637748161994806, "grad_norm": 2.3566560371685443, "learning_rate": 1.0176114164669431e-05, "loss": 0.0891, "step": 9260 }, { "epoch": 0.038658610876985085, "grad_norm": 2.330103333879482, "learning_rate": 1.0173367649231245e-05, "loss": 0.0828, "step": 9265 }, { "epoch": 0.038679473591975364, "grad_norm": 1.932022507313608, "learning_rate": 1.0170623356432474e-05, "loss": 0.0806, "step": 9270 }, { "epoch": 0.03870033630696564, "grad_norm": 1.7201913065220866, "learning_rate": 1.0167881283276918e-05, "loss": 0.0654, "step": 9275 }, { "epoch": 0.03872119902195592, "grad_norm": 2.3121747153738395, "learning_rate": 1.0165141426774028e-05, "loss": 0.0809, "step": 9280 }, { "epoch": 0.0387420617369462, "grad_norm": 2.063475352564742, "learning_rate": 1.0162403783938897e-05, "loss": 0.0799, "step": 9285 }, { "epoch": 0.03876292445193648, "grad_norm": 2.3062971747578693, "learning_rate": 1.0159668351792242e-05, "loss": 0.1078, "step": 9290 }, { "epoch": 0.03878378716692676, "grad_norm": 2.2586112779690892, "learning_rate": 1.0156935127360387e-05, "loss": 0.0868, "step": 9295 }, { "epoch": 0.038804649881917036, "grad_norm": 1.9981570591792104, "learning_rate": 1.0154204107675258e-05, "loss": 0.079, "step": 9300 }, { "epoch": 0.03882551259690731, "grad_norm": 1.7401185959801357, "learning_rate": 1.015147528977436e-05, "loss": 0.085, "step": 9305 }, { "epoch": 0.03884637531189759, "grad_norm": 1.2054589072611037, "learning_rate": 1.0148748670700767e-05, "loss": 0.0726, "step": 9310 }, { "epoch": 0.038867238026887865, "grad_norm": 1.5039023457138962, "learning_rate": 1.0146024247503118e-05, "loss": 0.0607, "step": 9315 }, { "epoch": 0.038888100741878144, "grad_norm": 1.332340087329436, "learning_rate": 1.0143302017235583e-05, "loss": 0.0646, "step": 9320 }, { "epoch": 0.03890896345686842, "grad_norm": 2.403674161615756, "learning_rate": 1.0140581976957873e-05, "loss": 0.0878, "step": 9325 }, { "epoch": 0.0389298261718587, "grad_norm": 1.3861305660097178, "learning_rate": 1.0137864123735206e-05, "loss": 0.0695, "step": 9330 }, { "epoch": 0.03895068888684898, "grad_norm": 1.323900910440498, "learning_rate": 1.0135148454638311e-05, "loss": 0.0728, "step": 9335 }, { "epoch": 0.03897155160183926, "grad_norm": 1.2541391320036392, "learning_rate": 1.0132434966743398e-05, "loss": 0.0842, "step": 9340 }, { "epoch": 0.03899241431682954, "grad_norm": 2.4092632538937924, "learning_rate": 1.0129723657132164e-05, "loss": 0.0718, "step": 9345 }, { "epoch": 0.03901327703181981, "grad_norm": 1.9269946036605878, "learning_rate": 1.0127014522891761e-05, "loss": 0.0756, "step": 9350 }, { "epoch": 0.03903413974681009, "grad_norm": 1.5808633258825031, "learning_rate": 1.0124307561114794e-05, "loss": 0.0861, "step": 9355 }, { "epoch": 0.03905500246180037, "grad_norm": 1.3186464535613702, "learning_rate": 1.0121602768899308e-05, "loss": 0.0711, "step": 9360 }, { "epoch": 0.039075865176790646, "grad_norm": 1.6326152638876184, "learning_rate": 1.0118900143348767e-05, "loss": 0.0778, "step": 9365 }, { "epoch": 0.039096727891780925, "grad_norm": 1.6784854865122527, "learning_rate": 1.0116199681572048e-05, "loss": 0.06, "step": 9370 }, { "epoch": 0.0391175906067712, "grad_norm": 1.5415051218646088, "learning_rate": 1.0113501380683425e-05, "loss": 0.0701, "step": 9375 }, { "epoch": 0.03913845332176148, "grad_norm": 1.667998295979107, "learning_rate": 1.0110805237802565e-05, "loss": 0.0706, "step": 9380 }, { "epoch": 0.03915931603675176, "grad_norm": 1.7203617147614376, "learning_rate": 1.0108111250054498e-05, "loss": 0.0708, "step": 9385 }, { "epoch": 0.03918017875174204, "grad_norm": 1.6843708127239876, "learning_rate": 1.010541941456961e-05, "loss": 0.0765, "step": 9390 }, { "epoch": 0.03920104146673231, "grad_norm": 1.8783775523342536, "learning_rate": 1.010272972848365e-05, "loss": 0.0877, "step": 9395 }, { "epoch": 0.03922190418172259, "grad_norm": 2.0092427149726166, "learning_rate": 1.0100042188937685e-05, "loss": 0.0711, "step": 9400 }, { "epoch": 0.03924276689671287, "grad_norm": 1.7961008847314244, "learning_rate": 1.0097356793078106e-05, "loss": 0.0847, "step": 9405 }, { "epoch": 0.03926362961170315, "grad_norm": 1.264777085692364, "learning_rate": 1.0094673538056617e-05, "loss": 0.0827, "step": 9410 }, { "epoch": 0.039284492326693427, "grad_norm": 1.3327484380869425, "learning_rate": 1.0091992421030214e-05, "loss": 0.0788, "step": 9415 }, { "epoch": 0.039305355041683705, "grad_norm": 1.1974763159817599, "learning_rate": 1.008931343916118e-05, "loss": 0.0689, "step": 9420 }, { "epoch": 0.039326217756673984, "grad_norm": 3.9673075703006173, "learning_rate": 1.0086636589617062e-05, "loss": 0.0996, "step": 9425 }, { "epoch": 0.03934708047166426, "grad_norm": 1.7381204316100243, "learning_rate": 1.0083961869570668e-05, "loss": 0.0742, "step": 9430 }, { "epoch": 0.03936794318665454, "grad_norm": 1.4805770556344975, "learning_rate": 1.008128927620005e-05, "loss": 0.0749, "step": 9435 }, { "epoch": 0.03938880590164481, "grad_norm": 1.948933225760474, "learning_rate": 1.0078618806688497e-05, "loss": 0.0839, "step": 9440 }, { "epoch": 0.03940966861663509, "grad_norm": 1.1534288962917243, "learning_rate": 1.0075950458224509e-05, "loss": 0.0704, "step": 9445 }, { "epoch": 0.03943053133162537, "grad_norm": 1.3340870285638575, "learning_rate": 1.0073284228001803e-05, "loss": 0.0861, "step": 9450 }, { "epoch": 0.03945139404661565, "grad_norm": 1.3505465008859574, "learning_rate": 1.0070620113219287e-05, "loss": 0.0742, "step": 9455 }, { "epoch": 0.03947225676160593, "grad_norm": 1.3631604040688825, "learning_rate": 1.006795811108105e-05, "loss": 0.071, "step": 9460 }, { "epoch": 0.03949311947659621, "grad_norm": 1.2080661384193736, "learning_rate": 1.006529821879636e-05, "loss": 0.0668, "step": 9465 }, { "epoch": 0.039513982191586486, "grad_norm": 1.622895284555981, "learning_rate": 1.0062640433579634e-05, "loss": 0.0796, "step": 9470 }, { "epoch": 0.039534844906576765, "grad_norm": 1.4393021833558897, "learning_rate": 1.0059984752650434e-05, "loss": 0.0763, "step": 9475 }, { "epoch": 0.03955570762156704, "grad_norm": 1.8161947344355407, "learning_rate": 1.0057331173233468e-05, "loss": 0.0692, "step": 9480 }, { "epoch": 0.039576570336557315, "grad_norm": 1.5427706470689715, "learning_rate": 1.0054679692558556e-05, "loss": 0.0619, "step": 9485 }, { "epoch": 0.039597433051547594, "grad_norm": 2.047694278116586, "learning_rate": 1.0052030307860627e-05, "loss": 0.0955, "step": 9490 }, { "epoch": 0.03961829576653787, "grad_norm": 1.1974835877360743, "learning_rate": 1.0049383016379712e-05, "loss": 0.062, "step": 9495 }, { "epoch": 0.03963915848152815, "grad_norm": 2.010752431605821, "learning_rate": 1.0046737815360928e-05, "loss": 0.0861, "step": 9500 }, { "epoch": 0.03966002119651843, "grad_norm": 1.6220870846816793, "learning_rate": 1.0044094702054457e-05, "loss": 0.0776, "step": 9505 }, { "epoch": 0.03968088391150871, "grad_norm": 1.3471505624393603, "learning_rate": 1.0041453673715553e-05, "loss": 0.0667, "step": 9510 }, { "epoch": 0.03970174662649899, "grad_norm": 1.3171570673051738, "learning_rate": 1.0038814727604511e-05, "loss": 0.0717, "step": 9515 }, { "epoch": 0.039722609341489266, "grad_norm": 1.803750765787678, "learning_rate": 1.0036177860986671e-05, "loss": 0.079, "step": 9520 }, { "epoch": 0.039743472056479545, "grad_norm": 1.4439192698394985, "learning_rate": 1.003354307113239e-05, "loss": 0.0833, "step": 9525 }, { "epoch": 0.03976433477146982, "grad_norm": 1.2651263096515761, "learning_rate": 1.0030910355317046e-05, "loss": 0.0883, "step": 9530 }, { "epoch": 0.039785197486460096, "grad_norm": 3.539124687076443, "learning_rate": 1.0028279710821016e-05, "loss": 0.0793, "step": 9535 }, { "epoch": 0.039806060201450374, "grad_norm": 1.0221559908227351, "learning_rate": 1.0025651134929666e-05, "loss": 0.0777, "step": 9540 }, { "epoch": 0.03982692291644065, "grad_norm": 1.8778790836217765, "learning_rate": 1.0023024624933339e-05, "loss": 0.0824, "step": 9545 }, { "epoch": 0.03984778563143093, "grad_norm": 3.4366292482627636, "learning_rate": 1.0020400178127352e-05, "loss": 0.0779, "step": 9550 }, { "epoch": 0.03986864834642121, "grad_norm": 1.1965745101728444, "learning_rate": 1.0017777791811964e-05, "loss": 0.0745, "step": 9555 }, { "epoch": 0.03988951106141149, "grad_norm": 1.8555386381761931, "learning_rate": 1.0015157463292391e-05, "loss": 0.0732, "step": 9560 }, { "epoch": 0.03991037377640177, "grad_norm": 1.4503024757770047, "learning_rate": 1.0012539189878772e-05, "loss": 0.0677, "step": 9565 }, { "epoch": 0.03993123649139205, "grad_norm": 1.5854662073050796, "learning_rate": 1.0009922968886166e-05, "loss": 0.0806, "step": 9570 }, { "epoch": 0.03995209920638232, "grad_norm": 1.762893041605009, "learning_rate": 1.0007308797634543e-05, "loss": 0.0949, "step": 9575 }, { "epoch": 0.0399729619213726, "grad_norm": 1.4283109786729185, "learning_rate": 1.0004696673448771e-05, "loss": 0.0828, "step": 9580 }, { "epoch": 0.039993824636362876, "grad_norm": 1.7653996577538085, "learning_rate": 1.00020865936586e-05, "loss": 0.0789, "step": 9585 }, { "epoch": 0.040014687351353155, "grad_norm": 4.256698916712202, "learning_rate": 9.999478555598656e-06, "loss": 0.0704, "step": 9590 }, { "epoch": 0.040035550066343434, "grad_norm": 1.2314105413246037, "learning_rate": 9.996872556608427e-06, "loss": 0.0604, "step": 9595 }, { "epoch": 0.04005641278133371, "grad_norm": 1.898873838568262, "learning_rate": 9.994268594032249e-06, "loss": 0.0692, "step": 9600 }, { "epoch": 0.04007727549632399, "grad_norm": 3.7053689079295435, "learning_rate": 9.991666665219305e-06, "loss": 0.0815, "step": 9605 }, { "epoch": 0.04009813821131427, "grad_norm": 1.9223633637019935, "learning_rate": 9.989066767523599e-06, "loss": 0.0715, "step": 9610 }, { "epoch": 0.04011900092630455, "grad_norm": 0.9811845961920418, "learning_rate": 9.986468898303954e-06, "loss": 0.0766, "step": 9615 }, { "epoch": 0.04013986364129482, "grad_norm": 1.400299967533034, "learning_rate": 9.983873054924002e-06, "loss": 0.1005, "step": 9620 }, { "epoch": 0.0401607263562851, "grad_norm": 1.1984226482830376, "learning_rate": 9.981279234752167e-06, "loss": 0.0809, "step": 9625 }, { "epoch": 0.04018158907127538, "grad_norm": 2.189132168632873, "learning_rate": 9.978687435161653e-06, "loss": 0.0785, "step": 9630 }, { "epoch": 0.04020245178626566, "grad_norm": 1.0871648301570471, "learning_rate": 9.976097653530442e-06, "loss": 0.0664, "step": 9635 }, { "epoch": 0.040223314501255936, "grad_norm": 2.6428355803763024, "learning_rate": 9.973509887241278e-06, "loss": 0.0672, "step": 9640 }, { "epoch": 0.040244177216246214, "grad_norm": 2.4816745092426467, "learning_rate": 9.970924133681644e-06, "loss": 0.0999, "step": 9645 }, { "epoch": 0.04026503993123649, "grad_norm": 1.0438732820042127, "learning_rate": 9.968340390243773e-06, "loss": 0.0908, "step": 9650 }, { "epoch": 0.04028590264622677, "grad_norm": 1.4325010262598934, "learning_rate": 9.965758654324624e-06, "loss": 0.0624, "step": 9655 }, { "epoch": 0.04030676536121705, "grad_norm": 1.9494219907613206, "learning_rate": 9.963178923325867e-06, "loss": 0.0861, "step": 9660 }, { "epoch": 0.04032762807620732, "grad_norm": 1.9669039683622977, "learning_rate": 9.960601194653886e-06, "loss": 0.0696, "step": 9665 }, { "epoch": 0.0403484907911976, "grad_norm": 1.7710568330341554, "learning_rate": 9.958025465719753e-06, "loss": 0.0815, "step": 9670 }, { "epoch": 0.04036935350618788, "grad_norm": 3.007497606562583, "learning_rate": 9.955451733939221e-06, "loss": 0.1048, "step": 9675 }, { "epoch": 0.04039021622117816, "grad_norm": 1.6865426636033314, "learning_rate": 9.952879996732733e-06, "loss": 0.0712, "step": 9680 }, { "epoch": 0.04041107893616844, "grad_norm": 1.4715788699045074, "learning_rate": 9.950310251525372e-06, "loss": 0.06, "step": 9685 }, { "epoch": 0.040431941651158716, "grad_norm": 2.006207039796867, "learning_rate": 9.947742495746888e-06, "loss": 0.0727, "step": 9690 }, { "epoch": 0.040452804366148995, "grad_norm": 1.412477070126111, "learning_rate": 9.945176726831666e-06, "loss": 0.0582, "step": 9695 }, { "epoch": 0.040473667081139274, "grad_norm": 3.8864620037275626, "learning_rate": 9.942612942218718e-06, "loss": 0.0898, "step": 9700 }, { "epoch": 0.04049452979612955, "grad_norm": 1.643260292355927, "learning_rate": 9.940051139351681e-06, "loss": 0.0814, "step": 9705 }, { "epoch": 0.040515392511119824, "grad_norm": 1.5076122945285706, "learning_rate": 9.937491315678796e-06, "loss": 0.0642, "step": 9710 }, { "epoch": 0.0405362552261101, "grad_norm": 1.8476692894049054, "learning_rate": 9.934933468652903e-06, "loss": 0.0723, "step": 9715 }, { "epoch": 0.04055711794110038, "grad_norm": 1.590373663502368, "learning_rate": 9.93237759573143e-06, "loss": 0.0734, "step": 9720 }, { "epoch": 0.04057798065609066, "grad_norm": 2.292502691540987, "learning_rate": 9.92982369437638e-06, "loss": 0.0805, "step": 9725 }, { "epoch": 0.04059884337108094, "grad_norm": 2.0680303163447746, "learning_rate": 9.927271762054324e-06, "loss": 0.0877, "step": 9730 }, { "epoch": 0.04061970608607122, "grad_norm": 1.22046897952995, "learning_rate": 9.924721796236386e-06, "loss": 0.0708, "step": 9735 }, { "epoch": 0.0406405688010615, "grad_norm": 1.364082551570833, "learning_rate": 9.92217379439823e-06, "loss": 0.058, "step": 9740 }, { "epoch": 0.040661431516051776, "grad_norm": 1.6662707939614843, "learning_rate": 9.919627754020064e-06, "loss": 0.0726, "step": 9745 }, { "epoch": 0.040682294231042054, "grad_norm": 1.810783694132772, "learning_rate": 9.917083672586614e-06, "loss": 0.0741, "step": 9750 }, { "epoch": 0.040703156946032326, "grad_norm": 2.133336251133753, "learning_rate": 9.914541547587124e-06, "loss": 0.0826, "step": 9755 }, { "epoch": 0.040724019661022605, "grad_norm": 3.607073318379737, "learning_rate": 9.91200137651533e-06, "loss": 0.0921, "step": 9760 }, { "epoch": 0.040744882376012884, "grad_norm": 4.956345084741653, "learning_rate": 9.909463156869476e-06, "loss": 0.1213, "step": 9765 }, { "epoch": 0.04076574509100316, "grad_norm": 1.662155622675058, "learning_rate": 9.906926886152276e-06, "loss": 0.0648, "step": 9770 }, { "epoch": 0.04078660780599344, "grad_norm": 1.1720676179158944, "learning_rate": 9.90439256187092e-06, "loss": 0.0784, "step": 9775 }, { "epoch": 0.04080747052098372, "grad_norm": 1.3879052894351838, "learning_rate": 9.901860181537056e-06, "loss": 0.0748, "step": 9780 }, { "epoch": 0.040828333235974, "grad_norm": 1.6715584724091113, "learning_rate": 9.899329742666795e-06, "loss": 0.0903, "step": 9785 }, { "epoch": 0.04084919595096428, "grad_norm": 0.925637834259461, "learning_rate": 9.896801242780675e-06, "loss": 0.0733, "step": 9790 }, { "epoch": 0.04087005866595455, "grad_norm": 1.5359707512386014, "learning_rate": 9.894274679403675e-06, "loss": 0.0677, "step": 9795 }, { "epoch": 0.04089092138094483, "grad_norm": 1.2121871730709193, "learning_rate": 9.891750050065186e-06, "loss": 0.0686, "step": 9800 }, { "epoch": 0.04091178409593511, "grad_norm": 1.6509195516780293, "learning_rate": 9.889227352299016e-06, "loss": 0.0841, "step": 9805 }, { "epoch": 0.040932646810925385, "grad_norm": 2.3588713194329367, "learning_rate": 9.886706583643374e-06, "loss": 0.1111, "step": 9810 }, { "epoch": 0.040953509525915664, "grad_norm": 1.5235168979054565, "learning_rate": 9.884187741640851e-06, "loss": 0.086, "step": 9815 }, { "epoch": 0.04097437224090594, "grad_norm": 1.6167740793495193, "learning_rate": 9.88167082383843e-06, "loss": 0.084, "step": 9820 }, { "epoch": 0.04099523495589622, "grad_norm": 1.4025692999812123, "learning_rate": 9.879155827787456e-06, "loss": 0.0677, "step": 9825 }, { "epoch": 0.0410160976708865, "grad_norm": 1.7378131107889991, "learning_rate": 9.876642751043638e-06, "loss": 0.098, "step": 9830 }, { "epoch": 0.04103696038587678, "grad_norm": 0.982859895528612, "learning_rate": 9.87413159116703e-06, "loss": 0.0799, "step": 9835 }, { "epoch": 0.04105782310086705, "grad_norm": 2.2235432403582323, "learning_rate": 9.871622345722036e-06, "loss": 0.0841, "step": 9840 }, { "epoch": 0.04107868581585733, "grad_norm": 1.5409104234297173, "learning_rate": 9.86911501227738e-06, "loss": 0.0827, "step": 9845 }, { "epoch": 0.04109954853084761, "grad_norm": 2.4959125637011734, "learning_rate": 9.866609588406112e-06, "loss": 0.0706, "step": 9850 }, { "epoch": 0.04112041124583789, "grad_norm": 3.5950374398237543, "learning_rate": 9.864106071685593e-06, "loss": 0.0938, "step": 9855 }, { "epoch": 0.041141273960828166, "grad_norm": 1.5380735910868701, "learning_rate": 9.861604459697484e-06, "loss": 0.0782, "step": 9860 }, { "epoch": 0.041162136675818445, "grad_norm": 1.6452289072426693, "learning_rate": 9.859104750027738e-06, "loss": 0.0817, "step": 9865 }, { "epoch": 0.041182999390808724, "grad_norm": 1.0034378747474646, "learning_rate": 9.856606940266582e-06, "loss": 0.0775, "step": 9870 }, { "epoch": 0.041203862105799, "grad_norm": 2.0473604626240576, "learning_rate": 9.854111028008525e-06, "loss": 0.1117, "step": 9875 }, { "epoch": 0.04122472482078928, "grad_norm": 1.6208561913953974, "learning_rate": 9.851617010852334e-06, "loss": 0.0744, "step": 9880 }, { "epoch": 0.04124558753577955, "grad_norm": 2.144598960631824, "learning_rate": 9.849124886401022e-06, "loss": 0.0809, "step": 9885 }, { "epoch": 0.04126645025076983, "grad_norm": 2.6306857554102936, "learning_rate": 9.846634652261852e-06, "loss": 0.0788, "step": 9890 }, { "epoch": 0.04128731296576011, "grad_norm": 1.264465422058831, "learning_rate": 9.84414630604632e-06, "loss": 0.0686, "step": 9895 }, { "epoch": 0.04130817568075039, "grad_norm": 1.4324552648340538, "learning_rate": 9.841659845370136e-06, "loss": 0.0818, "step": 9900 }, { "epoch": 0.04132903839574067, "grad_norm": 2.064487470677988, "learning_rate": 9.839175267853233e-06, "loss": 0.0751, "step": 9905 }, { "epoch": 0.04134990111073095, "grad_norm": 1.367036714017634, "learning_rate": 9.836692571119746e-06, "loss": 0.0696, "step": 9910 }, { "epoch": 0.041370763825721225, "grad_norm": 1.539915734067065, "learning_rate": 9.834211752797999e-06, "loss": 0.0646, "step": 9915 }, { "epoch": 0.041391626540711504, "grad_norm": 1.3666862853450785, "learning_rate": 9.831732810520508e-06, "loss": 0.0719, "step": 9920 }, { "epoch": 0.04141248925570178, "grad_norm": 1.926268478163226, "learning_rate": 9.829255741923959e-06, "loss": 0.079, "step": 9925 }, { "epoch": 0.041433351970692055, "grad_norm": 1.7225136607684541, "learning_rate": 9.826780544649208e-06, "loss": 0.0842, "step": 9930 }, { "epoch": 0.041454214685682333, "grad_norm": 1.3988705532383086, "learning_rate": 9.82430721634126e-06, "loss": 0.0701, "step": 9935 }, { "epoch": 0.04147507740067261, "grad_norm": 2.0837211011374395, "learning_rate": 9.82183575464928e-06, "loss": 0.0876, "step": 9940 }, { "epoch": 0.04149594011566289, "grad_norm": 1.4706842378321268, "learning_rate": 9.819366157226557e-06, "loss": 0.0588, "step": 9945 }, { "epoch": 0.04151680283065317, "grad_norm": 1.9312558314695119, "learning_rate": 9.816898421730517e-06, "loss": 0.0841, "step": 9950 }, { "epoch": 0.04153766554564345, "grad_norm": 1.6035921717593606, "learning_rate": 9.814432545822705e-06, "loss": 0.0656, "step": 9955 }, { "epoch": 0.04155852826063373, "grad_norm": 1.8744941175142011, "learning_rate": 9.811968527168769e-06, "loss": 0.0651, "step": 9960 }, { "epoch": 0.041579390975624006, "grad_norm": 1.115340174820244, "learning_rate": 9.809506363438459e-06, "loss": 0.0729, "step": 9965 }, { "epoch": 0.041600253690614285, "grad_norm": 1.412646324687122, "learning_rate": 9.807046052305625e-06, "loss": 0.0867, "step": 9970 }, { "epoch": 0.04162111640560456, "grad_norm": 1.5739475957490887, "learning_rate": 9.804587591448191e-06, "loss": 0.0998, "step": 9975 }, { "epoch": 0.041641979120594835, "grad_norm": 1.2034298750994241, "learning_rate": 9.802130978548147e-06, "loss": 0.0707, "step": 9980 }, { "epoch": 0.041662841835585114, "grad_norm": 1.1522968798171245, "learning_rate": 9.799676211291567e-06, "loss": 0.075, "step": 9985 }, { "epoch": 0.04168370455057539, "grad_norm": 1.5398302745191168, "learning_rate": 9.797223287368561e-06, "loss": 0.0763, "step": 9990 }, { "epoch": 0.04170456726556567, "grad_norm": 1.052760055670072, "learning_rate": 9.794772204473287e-06, "loss": 0.0849, "step": 9995 }, { "epoch": 0.04172542998055595, "grad_norm": 1.9648476836590363, "learning_rate": 9.792322960303949e-06, "loss": 0.0757, "step": 10000 }, { "epoch": 0.04174629269554623, "grad_norm": 1.666497677188363, "learning_rate": 9.789875552562766e-06, "loss": 0.0639, "step": 10005 }, { "epoch": 0.04176715541053651, "grad_norm": 1.1142954212702685, "learning_rate": 9.787429978955984e-06, "loss": 0.0867, "step": 10010 }, { "epoch": 0.04178801812552679, "grad_norm": 2.2567159269013257, "learning_rate": 9.784986237193854e-06, "loss": 0.084, "step": 10015 }, { "epoch": 0.04180888084051706, "grad_norm": 1.4738636894179873, "learning_rate": 9.782544324990623e-06, "loss": 0.077, "step": 10020 }, { "epoch": 0.04182974355550734, "grad_norm": 1.337331599468649, "learning_rate": 9.780104240064537e-06, "loss": 0.0705, "step": 10025 }, { "epoch": 0.041850606270497616, "grad_norm": 1.519039340710865, "learning_rate": 9.777665980137821e-06, "loss": 0.0647, "step": 10030 }, { "epoch": 0.041871468985487895, "grad_norm": 1.639226118040281, "learning_rate": 9.775229542936671e-06, "loss": 0.0823, "step": 10035 }, { "epoch": 0.04189233170047817, "grad_norm": 1.5514003592576198, "learning_rate": 9.772794926191248e-06, "loss": 0.0608, "step": 10040 }, { "epoch": 0.04191319441546845, "grad_norm": 1.3177093116800107, "learning_rate": 9.770362127635668e-06, "loss": 0.0715, "step": 10045 }, { "epoch": 0.04193405713045873, "grad_norm": 1.5568311969808002, "learning_rate": 9.767931145008e-06, "loss": 0.0736, "step": 10050 }, { "epoch": 0.04195491984544901, "grad_norm": 1.8182716360221187, "learning_rate": 9.765501976050237e-06, "loss": 0.1122, "step": 10055 }, { "epoch": 0.04197578256043929, "grad_norm": 1.2104836875445812, "learning_rate": 9.763074618508315e-06, "loss": 0.0649, "step": 10060 }, { "epoch": 0.04199664527542956, "grad_norm": 1.598234217156276, "learning_rate": 9.760649070132082e-06, "loss": 0.0786, "step": 10065 }, { "epoch": 0.04201750799041984, "grad_norm": 1.3365570204403963, "learning_rate": 9.758225328675297e-06, "loss": 0.0668, "step": 10070 }, { "epoch": 0.04203837070541012, "grad_norm": 1.711089905443245, "learning_rate": 9.755803391895626e-06, "loss": 0.0786, "step": 10075 }, { "epoch": 0.042059233420400396, "grad_norm": 1.4264698373399503, "learning_rate": 9.753383257554627e-06, "loss": 0.0682, "step": 10080 }, { "epoch": 0.042080096135390675, "grad_norm": 1.7075033970543512, "learning_rate": 9.75096492341774e-06, "loss": 0.0798, "step": 10085 }, { "epoch": 0.042100958850380954, "grad_norm": 1.8246010886663113, "learning_rate": 9.748548387254286e-06, "loss": 0.078, "step": 10090 }, { "epoch": 0.04212182156537123, "grad_norm": 1.526885315637289, "learning_rate": 9.74613364683745e-06, "loss": 0.0673, "step": 10095 }, { "epoch": 0.04214268428036151, "grad_norm": 1.1607363173083902, "learning_rate": 9.74372069994428e-06, "loss": 0.0754, "step": 10100 }, { "epoch": 0.04216354699535179, "grad_norm": 1.4354673976666137, "learning_rate": 9.74130954435567e-06, "loss": 0.081, "step": 10105 }, { "epoch": 0.04218440971034206, "grad_norm": 1.394069646068763, "learning_rate": 9.738900177856362e-06, "loss": 0.0774, "step": 10110 }, { "epoch": 0.04220527242533234, "grad_norm": 2.006353106845942, "learning_rate": 9.736492598234927e-06, "loss": 0.0951, "step": 10115 }, { "epoch": 0.04222613514032262, "grad_norm": 1.6671981658362842, "learning_rate": 9.73408680328376e-06, "loss": 0.0585, "step": 10120 }, { "epoch": 0.0422469978553129, "grad_norm": 9.515840671016445, "learning_rate": 9.731682790799078e-06, "loss": 0.0772, "step": 10125 }, { "epoch": 0.04226786057030318, "grad_norm": 2.842995336157719, "learning_rate": 9.729280558580899e-06, "loss": 0.0953, "step": 10130 }, { "epoch": 0.042288723285293456, "grad_norm": 2.2312650137067007, "learning_rate": 9.726880104433044e-06, "loss": 0.0821, "step": 10135 }, { "epoch": 0.042309586000283735, "grad_norm": 1.3905180106977155, "learning_rate": 9.724481426163131e-06, "loss": 0.0759, "step": 10140 }, { "epoch": 0.04233044871527401, "grad_norm": 1.5638262866297183, "learning_rate": 9.72208452158255e-06, "loss": 0.0658, "step": 10145 }, { "epoch": 0.04235131143026429, "grad_norm": 1.500928139371398, "learning_rate": 9.719689388506468e-06, "loss": 0.0856, "step": 10150 }, { "epoch": 0.042372174145254564, "grad_norm": 1.426644323037977, "learning_rate": 9.717296024753825e-06, "loss": 0.0755, "step": 10155 }, { "epoch": 0.04239303686024484, "grad_norm": 1.6814135499594536, "learning_rate": 9.714904428147311e-06, "loss": 0.0687, "step": 10160 }, { "epoch": 0.04241389957523512, "grad_norm": 1.1586636878415506, "learning_rate": 9.712514596513369e-06, "loss": 0.0651, "step": 10165 }, { "epoch": 0.0424347622902254, "grad_norm": 2.2680359292478682, "learning_rate": 9.710126527682181e-06, "loss": 0.0879, "step": 10170 }, { "epoch": 0.04245562500521568, "grad_norm": 1.721083196053819, "learning_rate": 9.707740219487663e-06, "loss": 0.0734, "step": 10175 }, { "epoch": 0.04247648772020596, "grad_norm": 2.7048748922953774, "learning_rate": 9.705355669767456e-06, "loss": 0.0893, "step": 10180 }, { "epoch": 0.042497350435196236, "grad_norm": 1.522432526026843, "learning_rate": 9.702972876362913e-06, "loss": 0.0706, "step": 10185 }, { "epoch": 0.042518213150186515, "grad_norm": 1.54629622177458, "learning_rate": 9.700591837119106e-06, "loss": 0.0869, "step": 10190 }, { "epoch": 0.042539075865176794, "grad_norm": 1.951640338151981, "learning_rate": 9.698212549884788e-06, "loss": 0.0713, "step": 10195 }, { "epoch": 0.042559938580167066, "grad_norm": 2.1042917157631393, "learning_rate": 9.695835012512422e-06, "loss": 0.0848, "step": 10200 }, { "epoch": 0.042580801295157344, "grad_norm": 1.5544566281628025, "learning_rate": 9.693459222858145e-06, "loss": 0.0752, "step": 10205 }, { "epoch": 0.04260166401014762, "grad_norm": 1.4060273911178187, "learning_rate": 9.691085178781775e-06, "loss": 0.0738, "step": 10210 }, { "epoch": 0.0426225267251379, "grad_norm": 2.461595350112413, "learning_rate": 9.688712878146789e-06, "loss": 0.0787, "step": 10215 }, { "epoch": 0.04264338944012818, "grad_norm": 1.3002257180196826, "learning_rate": 9.686342318820332e-06, "loss": 0.0913, "step": 10220 }, { "epoch": 0.04266425215511846, "grad_norm": 1.9679342288264428, "learning_rate": 9.683973498673192e-06, "loss": 0.098, "step": 10225 }, { "epoch": 0.04268511487010874, "grad_norm": 1.783679487110866, "learning_rate": 9.681606415579809e-06, "loss": 0.0844, "step": 10230 }, { "epoch": 0.04270597758509902, "grad_norm": 2.5958401440703485, "learning_rate": 9.679241067418252e-06, "loss": 0.0717, "step": 10235 }, { "epoch": 0.042726840300089296, "grad_norm": 1.7613350787824964, "learning_rate": 9.67687745207022e-06, "loss": 0.0932, "step": 10240 }, { "epoch": 0.04274770301507957, "grad_norm": 1.580888789823384, "learning_rate": 9.674515567421025e-06, "loss": 0.0822, "step": 10245 }, { "epoch": 0.042768565730069846, "grad_norm": 1.1247088707616117, "learning_rate": 9.672155411359602e-06, "loss": 0.0777, "step": 10250 }, { "epoch": 0.042789428445060125, "grad_norm": 1.2426479456623283, "learning_rate": 9.66979698177848e-06, "loss": 0.0699, "step": 10255 }, { "epoch": 0.042810291160050404, "grad_norm": 1.523331876903272, "learning_rate": 9.667440276573783e-06, "loss": 0.0718, "step": 10260 }, { "epoch": 0.04283115387504068, "grad_norm": 2.8757705274498355, "learning_rate": 9.665085293645229e-06, "loss": 0.0658, "step": 10265 }, { "epoch": 0.04285201659003096, "grad_norm": 1.0658157402909376, "learning_rate": 9.662732030896109e-06, "loss": 0.0692, "step": 10270 }, { "epoch": 0.04287287930502124, "grad_norm": 2.1170202915066065, "learning_rate": 9.660380486233292e-06, "loss": 0.0734, "step": 10275 }, { "epoch": 0.04289374202001152, "grad_norm": 0.9507770737061001, "learning_rate": 9.658030657567205e-06, "loss": 0.0646, "step": 10280 }, { "epoch": 0.0429146047350018, "grad_norm": 1.5402658054347385, "learning_rate": 9.655682542811837e-06, "loss": 0.074, "step": 10285 }, { "epoch": 0.04293546744999207, "grad_norm": 1.34283524324796, "learning_rate": 9.653336139884721e-06, "loss": 0.0728, "step": 10290 }, { "epoch": 0.04295633016498235, "grad_norm": 1.6887309118196274, "learning_rate": 9.650991446706931e-06, "loss": 0.0548, "step": 10295 }, { "epoch": 0.04297719287997263, "grad_norm": 1.5759094534348328, "learning_rate": 9.648648461203076e-06, "loss": 0.0802, "step": 10300 }, { "epoch": 0.042998055594962906, "grad_norm": 1.4679893843380412, "learning_rate": 9.646307181301292e-06, "loss": 0.0657, "step": 10305 }, { "epoch": 0.043018918309953184, "grad_norm": 2.6859070043873103, "learning_rate": 9.643967604933224e-06, "loss": 0.0682, "step": 10310 }, { "epoch": 0.04303978102494346, "grad_norm": 1.423029674815058, "learning_rate": 9.641629730034037e-06, "loss": 0.07, "step": 10315 }, { "epoch": 0.04306064373993374, "grad_norm": 9.571681413766717, "learning_rate": 9.639293554542393e-06, "loss": 0.0777, "step": 10320 }, { "epoch": 0.04308150645492402, "grad_norm": 1.5513679810835623, "learning_rate": 9.636959076400448e-06, "loss": 0.0701, "step": 10325 }, { "epoch": 0.0431023691699143, "grad_norm": 2.022030707046825, "learning_rate": 9.634626293553847e-06, "loss": 0.0768, "step": 10330 }, { "epoch": 0.04312323188490457, "grad_norm": 1.858759623737835, "learning_rate": 9.632295203951716e-06, "loss": 0.0745, "step": 10335 }, { "epoch": 0.04314409459989485, "grad_norm": 2.245082708776336, "learning_rate": 9.629965805546648e-06, "loss": 0.0875, "step": 10340 }, { "epoch": 0.04316495731488513, "grad_norm": 1.306788237945564, "learning_rate": 9.6276380962947e-06, "loss": 0.0612, "step": 10345 }, { "epoch": 0.04318582002987541, "grad_norm": 1.8442184470182703, "learning_rate": 9.625312074155395e-06, "loss": 0.0816, "step": 10350 }, { "epoch": 0.043206682744865686, "grad_norm": 2.442325154646614, "learning_rate": 9.622987737091695e-06, "loss": 0.0766, "step": 10355 }, { "epoch": 0.043227545459855965, "grad_norm": 1.0854755686786588, "learning_rate": 9.620665083070007e-06, "loss": 0.077, "step": 10360 }, { "epoch": 0.043248408174846244, "grad_norm": 3.382404676055812, "learning_rate": 9.618344110060169e-06, "loss": 0.0965, "step": 10365 }, { "epoch": 0.04326927088983652, "grad_norm": 1.0631009020568858, "learning_rate": 9.616024816035454e-06, "loss": 0.072, "step": 10370 }, { "epoch": 0.043290133604826794, "grad_norm": 1.2740462405983524, "learning_rate": 9.61370719897255e-06, "loss": 0.0656, "step": 10375 }, { "epoch": 0.04331099631981707, "grad_norm": 1.499219631718863, "learning_rate": 9.611391256851555e-06, "loss": 0.0909, "step": 10380 }, { "epoch": 0.04333185903480735, "grad_norm": 1.2958484165343471, "learning_rate": 9.609076987655972e-06, "loss": 0.076, "step": 10385 }, { "epoch": 0.04335272174979763, "grad_norm": 2.8728683890923135, "learning_rate": 9.606764389372702e-06, "loss": 0.0839, "step": 10390 }, { "epoch": 0.04337358446478791, "grad_norm": 1.8565133924295443, "learning_rate": 9.604453459992034e-06, "loss": 0.0795, "step": 10395 }, { "epoch": 0.04339444717977819, "grad_norm": 1.5053083610309006, "learning_rate": 9.602144197507649e-06, "loss": 0.0724, "step": 10400 }, { "epoch": 0.04341530989476847, "grad_norm": 1.2194168479540972, "learning_rate": 9.599836599916588e-06, "loss": 0.0752, "step": 10405 }, { "epoch": 0.043436172609758746, "grad_norm": 1.0439921353742319, "learning_rate": 9.59753066521927e-06, "loss": 0.0676, "step": 10410 }, { "epoch": 0.043457035324749024, "grad_norm": 1.3442614976699205, "learning_rate": 9.59522639141947e-06, "loss": 0.0662, "step": 10415 }, { "epoch": 0.043477898039739296, "grad_norm": 1.546811570394597, "learning_rate": 9.59292377652432e-06, "loss": 0.0728, "step": 10420 }, { "epoch": 0.043498760754729575, "grad_norm": 2.5579195153451, "learning_rate": 9.590622818544295e-06, "loss": 0.0976, "step": 10425 }, { "epoch": 0.043519623469719854, "grad_norm": 1.631442245270429, "learning_rate": 9.588323515493214e-06, "loss": 0.0642, "step": 10430 }, { "epoch": 0.04354048618471013, "grad_norm": 1.789519816638124, "learning_rate": 9.586025865388218e-06, "loss": 0.0751, "step": 10435 }, { "epoch": 0.04356134889970041, "grad_norm": 1.2000096409879557, "learning_rate": 9.58372986624978e-06, "loss": 0.0764, "step": 10440 }, { "epoch": 0.04358221161469069, "grad_norm": 1.8780958670219583, "learning_rate": 9.581435516101691e-06, "loss": 0.0737, "step": 10445 }, { "epoch": 0.04360307432968097, "grad_norm": 1.5498070672659683, "learning_rate": 9.579142812971048e-06, "loss": 0.0823, "step": 10450 }, { "epoch": 0.04362393704467125, "grad_norm": 0.8510196294230824, "learning_rate": 9.576851754888252e-06, "loss": 0.0718, "step": 10455 }, { "epoch": 0.043644799759661526, "grad_norm": 1.3518760415448479, "learning_rate": 9.574562339887002e-06, "loss": 0.0711, "step": 10460 }, { "epoch": 0.0436656624746518, "grad_norm": 1.4825622283193478, "learning_rate": 9.572274566004285e-06, "loss": 0.0721, "step": 10465 }, { "epoch": 0.04368652518964208, "grad_norm": 1.246693854045137, "learning_rate": 9.569988431280364e-06, "loss": 0.0912, "step": 10470 }, { "epoch": 0.043707387904632355, "grad_norm": 0.9796972685561712, "learning_rate": 9.567703933758789e-06, "loss": 0.0814, "step": 10475 }, { "epoch": 0.043728250619622634, "grad_norm": 2.771396165366895, "learning_rate": 9.565421071486368e-06, "loss": 0.0812, "step": 10480 }, { "epoch": 0.04374911333461291, "grad_norm": 1.0397037668322608, "learning_rate": 9.56313984251317e-06, "loss": 0.0728, "step": 10485 }, { "epoch": 0.04376997604960319, "grad_norm": 1.4097525404761722, "learning_rate": 9.560860244892523e-06, "loss": 0.0647, "step": 10490 }, { "epoch": 0.04379083876459347, "grad_norm": 1.6461147112087116, "learning_rate": 9.558582276680996e-06, "loss": 0.0607, "step": 10495 }, { "epoch": 0.04381170147958375, "grad_norm": 0.8680912425592497, "learning_rate": 9.556305935938404e-06, "loss": 0.0788, "step": 10500 }, { "epoch": 0.04383256419457403, "grad_norm": 1.2719345244511957, "learning_rate": 9.554031220727789e-06, "loss": 0.0782, "step": 10505 }, { "epoch": 0.0438534269095643, "grad_norm": 1.130049646509835, "learning_rate": 9.551758129115418e-06, "loss": 0.0636, "step": 10510 }, { "epoch": 0.04387428962455458, "grad_norm": 4.143343370171745, "learning_rate": 9.549486659170786e-06, "loss": 0.0693, "step": 10515 }, { "epoch": 0.04389515233954486, "grad_norm": 1.6486873639872541, "learning_rate": 9.547216808966591e-06, "loss": 0.0743, "step": 10520 }, { "epoch": 0.043916015054535136, "grad_norm": 2.7287198055753796, "learning_rate": 9.544948576578741e-06, "loss": 0.0658, "step": 10525 }, { "epoch": 0.043936877769525415, "grad_norm": 1.3395765346656887, "learning_rate": 9.542681960086337e-06, "loss": 0.0782, "step": 10530 }, { "epoch": 0.043957740484515694, "grad_norm": 1.8306942681321838, "learning_rate": 9.54041695757168e-06, "loss": 0.0657, "step": 10535 }, { "epoch": 0.04397860319950597, "grad_norm": 2.1667849712422944, "learning_rate": 9.538153567120247e-06, "loss": 0.0723, "step": 10540 }, { "epoch": 0.04399946591449625, "grad_norm": 1.571301718277195, "learning_rate": 9.5358917868207e-06, "loss": 0.0636, "step": 10545 }, { "epoch": 0.04402032862948653, "grad_norm": 1.1167822587305563, "learning_rate": 9.53363161476487e-06, "loss": 0.065, "step": 10550 }, { "epoch": 0.0440411913444768, "grad_norm": 1.241675567994467, "learning_rate": 9.531373049047745e-06, "loss": 0.0864, "step": 10555 }, { "epoch": 0.04406205405946708, "grad_norm": 1.960390941780656, "learning_rate": 9.529116087767484e-06, "loss": 0.073, "step": 10560 }, { "epoch": 0.04408291677445736, "grad_norm": 1.7397740349671005, "learning_rate": 9.526860729025387e-06, "loss": 0.0867, "step": 10565 }, { "epoch": 0.04410377948944764, "grad_norm": 1.7357015178395954, "learning_rate": 9.524606970925902e-06, "loss": 0.0878, "step": 10570 }, { "epoch": 0.04412464220443792, "grad_norm": 1.6728536871570872, "learning_rate": 9.522354811576614e-06, "loss": 0.0684, "step": 10575 }, { "epoch": 0.044145504919428195, "grad_norm": 1.3549535552155123, "learning_rate": 9.520104249088237e-06, "loss": 0.0634, "step": 10580 }, { "epoch": 0.044166367634418474, "grad_norm": 1.9154251841181256, "learning_rate": 9.517855281574613e-06, "loss": 0.0809, "step": 10585 }, { "epoch": 0.04418723034940875, "grad_norm": 1.2830064915832182, "learning_rate": 9.515607907152698e-06, "loss": 0.0861, "step": 10590 }, { "epoch": 0.04420809306439903, "grad_norm": 1.752118791392996, "learning_rate": 9.513362123942559e-06, "loss": 0.082, "step": 10595 }, { "epoch": 0.0442289557793893, "grad_norm": 1.5395110362909512, "learning_rate": 9.511117930067371e-06, "loss": 0.0732, "step": 10600 }, { "epoch": 0.04424981849437958, "grad_norm": 1.7068530411841125, "learning_rate": 9.508875323653403e-06, "loss": 0.0568, "step": 10605 }, { "epoch": 0.04427068120936986, "grad_norm": 1.7070790571743164, "learning_rate": 9.506634302830016e-06, "loss": 0.0689, "step": 10610 }, { "epoch": 0.04429154392436014, "grad_norm": 1.5203243923558485, "learning_rate": 9.50439486572966e-06, "loss": 0.1013, "step": 10615 }, { "epoch": 0.04431240663935042, "grad_norm": 1.0099674714331868, "learning_rate": 9.502157010487853e-06, "loss": 0.0698, "step": 10620 }, { "epoch": 0.0443332693543407, "grad_norm": 1.3710953711511733, "learning_rate": 9.499920735243197e-06, "loss": 0.0651, "step": 10625 }, { "epoch": 0.044354132069330976, "grad_norm": 1.9928391365364455, "learning_rate": 9.497686038137347e-06, "loss": 0.0868, "step": 10630 }, { "epoch": 0.044374994784321255, "grad_norm": 1.3559887902995826, "learning_rate": 9.495452917315023e-06, "loss": 0.0991, "step": 10635 }, { "epoch": 0.04439585749931153, "grad_norm": 1.720187520899583, "learning_rate": 9.493221370924001e-06, "loss": 0.0724, "step": 10640 }, { "epoch": 0.044416720214301805, "grad_norm": 1.3101353097888975, "learning_rate": 9.490991397115097e-06, "loss": 0.0673, "step": 10645 }, { "epoch": 0.044437582929292084, "grad_norm": 1.7219467417315055, "learning_rate": 9.488762994042164e-06, "loss": 0.0893, "step": 10650 }, { "epoch": 0.04445844564428236, "grad_norm": 1.9468639236680303, "learning_rate": 9.48653615986209e-06, "loss": 0.0672, "step": 10655 }, { "epoch": 0.04447930835927264, "grad_norm": 1.9605424307186787, "learning_rate": 9.484310892734795e-06, "loss": 0.0769, "step": 10660 }, { "epoch": 0.04450017107426292, "grad_norm": 1.6929822225664397, "learning_rate": 9.48208719082321e-06, "loss": 0.0751, "step": 10665 }, { "epoch": 0.0445210337892532, "grad_norm": 2.0816793105648173, "learning_rate": 9.479865052293288e-06, "loss": 0.0619, "step": 10670 }, { "epoch": 0.04454189650424348, "grad_norm": 1.1394675080511156, "learning_rate": 9.477644475313984e-06, "loss": 0.0707, "step": 10675 }, { "epoch": 0.04456275921923376, "grad_norm": 1.549116324859885, "learning_rate": 9.475425458057258e-06, "loss": 0.0756, "step": 10680 }, { "epoch": 0.044583621934224035, "grad_norm": 1.367451739587858, "learning_rate": 9.473207998698054e-06, "loss": 0.0609, "step": 10685 }, { "epoch": 0.04460448464921431, "grad_norm": 1.2876669710496773, "learning_rate": 9.47099209541432e-06, "loss": 0.0588, "step": 10690 }, { "epoch": 0.044625347364204586, "grad_norm": 1.4779188196850885, "learning_rate": 9.468777746386975e-06, "loss": 0.0641, "step": 10695 }, { "epoch": 0.044646210079194865, "grad_norm": 1.0828530877511318, "learning_rate": 9.466564949799916e-06, "loss": 0.0828, "step": 10700 }, { "epoch": 0.04466707279418514, "grad_norm": 0.7827857317230023, "learning_rate": 9.464353703840012e-06, "loss": 0.0657, "step": 10705 }, { "epoch": 0.04468793550917542, "grad_norm": 0.9602739707852401, "learning_rate": 9.462144006697094e-06, "loss": 0.0735, "step": 10710 }, { "epoch": 0.0447087982241657, "grad_norm": 1.4449981870401818, "learning_rate": 9.459935856563948e-06, "loss": 0.0821, "step": 10715 }, { "epoch": 0.04472966093915598, "grad_norm": 1.910340630120448, "learning_rate": 9.457729251636314e-06, "loss": 0.0785, "step": 10720 }, { "epoch": 0.04475052365414626, "grad_norm": 2.402516766990592, "learning_rate": 9.455524190112878e-06, "loss": 0.0985, "step": 10725 }, { "epoch": 0.04477138636913654, "grad_norm": 2.1409091877540463, "learning_rate": 9.453320670195256e-06, "loss": 0.085, "step": 10730 }, { "epoch": 0.04479224908412681, "grad_norm": 1.3016923576766133, "learning_rate": 9.451118690088006e-06, "loss": 0.0691, "step": 10735 }, { "epoch": 0.04481311179911709, "grad_norm": 1.7451271742626078, "learning_rate": 9.448918247998605e-06, "loss": 0.0748, "step": 10740 }, { "epoch": 0.044833974514107366, "grad_norm": 2.515399649783704, "learning_rate": 9.446719342137457e-06, "loss": 0.0746, "step": 10745 }, { "epoch": 0.044854837229097645, "grad_norm": 1.4164436881351619, "learning_rate": 9.444521970717872e-06, "loss": 0.0692, "step": 10750 }, { "epoch": 0.044875699944087924, "grad_norm": 1.8431710099592755, "learning_rate": 9.442326131956075e-06, "loss": 0.0773, "step": 10755 }, { "epoch": 0.0448965626590782, "grad_norm": 2.8386109472512224, "learning_rate": 9.44013182407119e-06, "loss": 0.0931, "step": 10760 }, { "epoch": 0.04491742537406848, "grad_norm": 1.3865596728921368, "learning_rate": 9.437939045285232e-06, "loss": 0.0687, "step": 10765 }, { "epoch": 0.04493828808905876, "grad_norm": 1.5274722032501347, "learning_rate": 9.435747793823114e-06, "loss": 0.0627, "step": 10770 }, { "epoch": 0.04495915080404904, "grad_norm": 2.4651297568292687, "learning_rate": 9.433558067912627e-06, "loss": 0.0765, "step": 10775 }, { "epoch": 0.04498001351903931, "grad_norm": 1.4922328678355365, "learning_rate": 9.431369865784442e-06, "loss": 0.0784, "step": 10780 }, { "epoch": 0.04500087623402959, "grad_norm": 1.1717309289798337, "learning_rate": 9.4291831856721e-06, "loss": 0.1917, "step": 10785 }, { "epoch": 0.04502173894901987, "grad_norm": 2.088514783367143, "learning_rate": 9.426998025812007e-06, "loss": 0.0791, "step": 10790 }, { "epoch": 0.04504260166401015, "grad_norm": 2.22153259678337, "learning_rate": 9.42481438444343e-06, "loss": 0.0865, "step": 10795 }, { "epoch": 0.045063464379000426, "grad_norm": 1.395239325581135, "learning_rate": 9.422632259808493e-06, "loss": 0.0577, "step": 10800 }, { "epoch": 0.045084327093990705, "grad_norm": 1.2500182313290182, "learning_rate": 9.420451650152158e-06, "loss": 0.0658, "step": 10805 }, { "epoch": 0.04510518980898098, "grad_norm": 2.0115081983331984, "learning_rate": 9.418272553722239e-06, "loss": 0.0748, "step": 10810 }, { "epoch": 0.04512605252397126, "grad_norm": 1.2430197179847573, "learning_rate": 9.416094968769381e-06, "loss": 0.0682, "step": 10815 }, { "epoch": 0.04514691523896154, "grad_norm": 1.3505444090074268, "learning_rate": 9.41391889354706e-06, "loss": 0.0837, "step": 10820 }, { "epoch": 0.04516777795395181, "grad_norm": 2.7245767159093, "learning_rate": 9.411744326311574e-06, "loss": 0.0846, "step": 10825 }, { "epoch": 0.04518864066894209, "grad_norm": 1.663086518225985, "learning_rate": 9.409571265322044e-06, "loss": 0.0884, "step": 10830 }, { "epoch": 0.04520950338393237, "grad_norm": 1.3315642270407269, "learning_rate": 9.407399708840394e-06, "loss": 0.0705, "step": 10835 }, { "epoch": 0.04523036609892265, "grad_norm": 2.2390188415449317, "learning_rate": 9.40522965513137e-06, "loss": 0.0759, "step": 10840 }, { "epoch": 0.04525122881391293, "grad_norm": 1.4986190037241243, "learning_rate": 9.403061102462502e-06, "loss": 0.0823, "step": 10845 }, { "epoch": 0.045272091528903206, "grad_norm": 1.401038173559513, "learning_rate": 9.400894049104123e-06, "loss": 0.0797, "step": 10850 }, { "epoch": 0.045292954243893485, "grad_norm": 1.6023528941711978, "learning_rate": 9.39872849332936e-06, "loss": 0.0753, "step": 10855 }, { "epoch": 0.045313816958883764, "grad_norm": 1.8507429089723724, "learning_rate": 9.396564433414114e-06, "loss": 0.0691, "step": 10860 }, { "epoch": 0.04533467967387404, "grad_norm": 1.7646841612754443, "learning_rate": 9.394401867637065e-06, "loss": 0.0831, "step": 10865 }, { "epoch": 0.045355542388864314, "grad_norm": 1.5295367414582939, "learning_rate": 9.392240794279668e-06, "loss": 0.0727, "step": 10870 }, { "epoch": 0.04537640510385459, "grad_norm": 1.5007659397580813, "learning_rate": 9.390081211626147e-06, "loss": 0.0814, "step": 10875 }, { "epoch": 0.04539726781884487, "grad_norm": 1.6941578171980272, "learning_rate": 9.387923117963478e-06, "loss": 0.073, "step": 10880 }, { "epoch": 0.04541813053383515, "grad_norm": 0.8649992279184073, "learning_rate": 9.385766511581398e-06, "loss": 0.0724, "step": 10885 }, { "epoch": 0.04543899324882543, "grad_norm": 1.4395672360192506, "learning_rate": 9.383611390772392e-06, "loss": 0.0687, "step": 10890 }, { "epoch": 0.04545985596381571, "grad_norm": 2.2247021945761287, "learning_rate": 9.381457753831683e-06, "loss": 0.0765, "step": 10895 }, { "epoch": 0.04548071867880599, "grad_norm": 1.3436358717461827, "learning_rate": 9.379305599057239e-06, "loss": 0.065, "step": 10900 }, { "epoch": 0.045501581393796266, "grad_norm": 1.1024684506931957, "learning_rate": 9.377154924749758e-06, "loss": 0.0659, "step": 10905 }, { "epoch": 0.045522444108786544, "grad_norm": 1.8332638434012325, "learning_rate": 9.37500572921266e-06, "loss": 0.0739, "step": 10910 }, { "epoch": 0.045543306823776816, "grad_norm": 1.6268235864411473, "learning_rate": 9.37285801075209e-06, "loss": 0.0749, "step": 10915 }, { "epoch": 0.045564169538767095, "grad_norm": 1.2106413757619456, "learning_rate": 9.370711767676907e-06, "loss": 0.0807, "step": 10920 }, { "epoch": 0.045585032253757374, "grad_norm": 1.1631383761739462, "learning_rate": 9.368566998298679e-06, "loss": 0.0799, "step": 10925 }, { "epoch": 0.04560589496874765, "grad_norm": 1.0418028093348481, "learning_rate": 9.36642370093168e-06, "loss": 0.072, "step": 10930 }, { "epoch": 0.04562675768373793, "grad_norm": 2.0093008816113596, "learning_rate": 9.364281873892878e-06, "loss": 0.0755, "step": 10935 }, { "epoch": 0.04564762039872821, "grad_norm": 1.6070720055031908, "learning_rate": 9.362141515501934e-06, "loss": 0.0831, "step": 10940 }, { "epoch": 0.04566848311371849, "grad_norm": 1.3583475005233394, "learning_rate": 9.360002624081205e-06, "loss": 0.0546, "step": 10945 }, { "epoch": 0.04568934582870877, "grad_norm": 1.266206274297242, "learning_rate": 9.35786519795572e-06, "loss": 0.0894, "step": 10950 }, { "epoch": 0.04571020854369904, "grad_norm": 1.2990501377140733, "learning_rate": 9.355729235453182e-06, "loss": 0.0686, "step": 10955 }, { "epoch": 0.04573107125868932, "grad_norm": 2.125140351480254, "learning_rate": 9.353594734903981e-06, "loss": 0.0599, "step": 10960 }, { "epoch": 0.0457519339736796, "grad_norm": 1.3928226740043008, "learning_rate": 9.351461694641153e-06, "loss": 0.0844, "step": 10965 }, { "epoch": 0.045772796688669876, "grad_norm": 0.9359033008850547, "learning_rate": 9.349330113000405e-06, "loss": 0.0607, "step": 10970 }, { "epoch": 0.045793659403660154, "grad_norm": 1.1186349768062829, "learning_rate": 9.347199988320094e-06, "loss": 0.0768, "step": 10975 }, { "epoch": 0.04581452211865043, "grad_norm": 1.425305822790663, "learning_rate": 9.345071318941232e-06, "loss": 0.0756, "step": 10980 }, { "epoch": 0.04583538483364071, "grad_norm": 1.5754612199314877, "learning_rate": 9.342944103207467e-06, "loss": 0.072, "step": 10985 }, { "epoch": 0.04585624754863099, "grad_norm": 2.254401361029715, "learning_rate": 9.340818339465084e-06, "loss": 0.0742, "step": 10990 }, { "epoch": 0.04587711026362127, "grad_norm": 1.1518121589992332, "learning_rate": 9.338694026063012e-06, "loss": 0.0718, "step": 10995 }, { "epoch": 0.04589797297861154, "grad_norm": 1.7871221890309728, "learning_rate": 9.336571161352794e-06, "loss": 0.0617, "step": 11000 }, { "epoch": 0.04591883569360182, "grad_norm": 1.9412795612159028, "learning_rate": 9.334449743688603e-06, "loss": 0.0764, "step": 11005 }, { "epoch": 0.0459396984085921, "grad_norm": 1.8958362994044153, "learning_rate": 9.332329771427225e-06, "loss": 0.0814, "step": 11010 }, { "epoch": 0.04596056112358238, "grad_norm": 1.2335693276763107, "learning_rate": 9.33021124292806e-06, "loss": 0.0746, "step": 11015 }, { "epoch": 0.045981423838572656, "grad_norm": 1.382339068789231, "learning_rate": 9.32809415655311e-06, "loss": 0.0684, "step": 11020 }, { "epoch": 0.046002286553562935, "grad_norm": 1.207631947017694, "learning_rate": 9.325978510666984e-06, "loss": 0.07, "step": 11025 }, { "epoch": 0.046023149268553214, "grad_norm": 1.066391735745383, "learning_rate": 9.323864303636877e-06, "loss": 0.0655, "step": 11030 }, { "epoch": 0.04604401198354349, "grad_norm": 2.034625005181826, "learning_rate": 9.32175153383258e-06, "loss": 0.0813, "step": 11035 }, { "epoch": 0.04606487469853377, "grad_norm": 1.5445238176951186, "learning_rate": 9.319640199626471e-06, "loss": 0.0667, "step": 11040 }, { "epoch": 0.04608573741352404, "grad_norm": 1.3813665467882457, "learning_rate": 9.3175302993935e-06, "loss": 0.0743, "step": 11045 }, { "epoch": 0.04610660012851432, "grad_norm": 1.28133948875582, "learning_rate": 9.315421831511195e-06, "loss": 0.0655, "step": 11050 }, { "epoch": 0.0461274628435046, "grad_norm": 1.4347255948830113, "learning_rate": 9.313314794359656e-06, "loss": 0.0686, "step": 11055 }, { "epoch": 0.04614832555849488, "grad_norm": 1.812622220842516, "learning_rate": 9.311209186321542e-06, "loss": 0.0611, "step": 11060 }, { "epoch": 0.04616918827348516, "grad_norm": 2.695054783092747, "learning_rate": 9.309105005782071e-06, "loss": 0.0656, "step": 11065 }, { "epoch": 0.04619005098847544, "grad_norm": 1.3651070476274485, "learning_rate": 9.307002251129016e-06, "loss": 0.071, "step": 11070 }, { "epoch": 0.046210913703465716, "grad_norm": 1.7857596553866169, "learning_rate": 9.3049009207527e-06, "loss": 0.0785, "step": 11075 }, { "epoch": 0.046231776418455994, "grad_norm": 1.770509691189056, "learning_rate": 9.302801013045979e-06, "loss": 0.0752, "step": 11080 }, { "epoch": 0.04625263913344627, "grad_norm": 1.7909207897262556, "learning_rate": 9.30070252640426e-06, "loss": 0.06, "step": 11085 }, { "epoch": 0.046273501848436545, "grad_norm": 1.9459784121882093, "learning_rate": 9.298605459225477e-06, "loss": 0.07, "step": 11090 }, { "epoch": 0.046294364563426824, "grad_norm": 1.2268026739351165, "learning_rate": 9.296509809910086e-06, "loss": 0.0555, "step": 11095 }, { "epoch": 0.0463152272784171, "grad_norm": 1.486119041799247, "learning_rate": 9.294415576861077e-06, "loss": 0.0631, "step": 11100 }, { "epoch": 0.04633608999340738, "grad_norm": 1.3485548647362628, "learning_rate": 9.292322758483944e-06, "loss": 0.0724, "step": 11105 }, { "epoch": 0.04635695270839766, "grad_norm": 1.1334357015552372, "learning_rate": 9.290231353186702e-06, "loss": 0.0714, "step": 11110 }, { "epoch": 0.04637781542338794, "grad_norm": 1.4235082168753426, "learning_rate": 9.28814135937987e-06, "loss": 0.0608, "step": 11115 }, { "epoch": 0.04639867813837822, "grad_norm": 2.0604772106438562, "learning_rate": 9.28605277547647e-06, "loss": 0.0643, "step": 11120 }, { "epoch": 0.046419540853368496, "grad_norm": 2.5887884784446533, "learning_rate": 9.283965599892024e-06, "loss": 0.0749, "step": 11125 }, { "epoch": 0.046440403568358775, "grad_norm": 1.559927642110303, "learning_rate": 9.281879831044534e-06, "loss": 0.0823, "step": 11130 }, { "epoch": 0.04646126628334905, "grad_norm": 2.777169743451401, "learning_rate": 9.279795467354505e-06, "loss": 0.0847, "step": 11135 }, { "epoch": 0.046482128998339325, "grad_norm": 1.4310297502772535, "learning_rate": 9.277712507244908e-06, "loss": 0.0637, "step": 11140 }, { "epoch": 0.046502991713329604, "grad_norm": 1.7775438266167716, "learning_rate": 9.275630949141207e-06, "loss": 0.1012, "step": 11145 }, { "epoch": 0.04652385442831988, "grad_norm": 1.2654665068525168, "learning_rate": 9.273550791471323e-06, "loss": 0.0822, "step": 11150 }, { "epoch": 0.04654471714331016, "grad_norm": 1.6731572029754436, "learning_rate": 9.271472032665652e-06, "loss": 0.067, "step": 11155 }, { "epoch": 0.04656557985830044, "grad_norm": 1.29526245943808, "learning_rate": 9.26939467115705e-06, "loss": 0.0606, "step": 11160 }, { "epoch": 0.04658644257329072, "grad_norm": 1.511140829307886, "learning_rate": 9.26731870538083e-06, "loss": 0.0796, "step": 11165 }, { "epoch": 0.046607305288281, "grad_norm": 1.8023214319604683, "learning_rate": 9.265244133774754e-06, "loss": 0.0635, "step": 11170 }, { "epoch": 0.04662816800327128, "grad_norm": 2.141613386004515, "learning_rate": 9.263170954779039e-06, "loss": 0.0866, "step": 11175 }, { "epoch": 0.04664903071826155, "grad_norm": 1.2926311410184574, "learning_rate": 9.261099166836331e-06, "loss": 0.0646, "step": 11180 }, { "epoch": 0.04666989343325183, "grad_norm": 1.393730769649788, "learning_rate": 9.259028768391724e-06, "loss": 0.0771, "step": 11185 }, { "epoch": 0.046690756148242106, "grad_norm": 1.00382534017907, "learning_rate": 9.256959757892744e-06, "loss": 0.0597, "step": 11190 }, { "epoch": 0.046711618863232385, "grad_norm": 1.3108390616269017, "learning_rate": 9.25489213378934e-06, "loss": 0.0453, "step": 11195 }, { "epoch": 0.046732481578222664, "grad_norm": 2.493794451553075, "learning_rate": 9.25282589453388e-06, "loss": 0.0898, "step": 11200 }, { "epoch": 0.04675334429321294, "grad_norm": 2.0967977399003623, "learning_rate": 9.250761038581156e-06, "loss": 0.0919, "step": 11205 }, { "epoch": 0.04677420700820322, "grad_norm": 2.0318345399140636, "learning_rate": 9.248697564388373e-06, "loss": 0.067, "step": 11210 }, { "epoch": 0.0467950697231935, "grad_norm": 1.762542844782598, "learning_rate": 9.24663547041514e-06, "loss": 0.0643, "step": 11215 }, { "epoch": 0.04681593243818378, "grad_norm": 1.3717658881232453, "learning_rate": 9.244574755123474e-06, "loss": 0.0851, "step": 11220 }, { "epoch": 0.04683679515317405, "grad_norm": 2.4839014321180595, "learning_rate": 9.242515416977782e-06, "loss": 0.0698, "step": 11225 }, { "epoch": 0.04685765786816433, "grad_norm": 1.8146658840195777, "learning_rate": 9.240457454444873e-06, "loss": 0.0732, "step": 11230 }, { "epoch": 0.04687852058315461, "grad_norm": 1.2778761285623874, "learning_rate": 9.238400865993942e-06, "loss": 0.0642, "step": 11235 }, { "epoch": 0.04689938329814489, "grad_norm": 1.7049395774558365, "learning_rate": 9.236345650096562e-06, "loss": 0.0716, "step": 11240 }, { "epoch": 0.046920246013135165, "grad_norm": 1.4352907648431295, "learning_rate": 9.234291805226694e-06, "loss": 0.0742, "step": 11245 }, { "epoch": 0.046941108728125444, "grad_norm": 1.207201966110266, "learning_rate": 9.232239329860665e-06, "loss": 0.0592, "step": 11250 }, { "epoch": 0.04696197144311572, "grad_norm": 1.7389527677290613, "learning_rate": 9.230188222477184e-06, "loss": 0.0877, "step": 11255 }, { "epoch": 0.046982834158106, "grad_norm": 1.2245270988029988, "learning_rate": 9.228138481557309e-06, "loss": 0.0678, "step": 11260 }, { "epoch": 0.04700369687309628, "grad_norm": 1.1205082173958916, "learning_rate": 9.22609010558447e-06, "loss": 0.0504, "step": 11265 }, { "epoch": 0.04702455958808655, "grad_norm": 2.0355936524936817, "learning_rate": 9.224043093044446e-06, "loss": 0.0826, "step": 11270 }, { "epoch": 0.04704542230307683, "grad_norm": 1.7508327255803644, "learning_rate": 9.22199744242537e-06, "loss": 0.0699, "step": 11275 }, { "epoch": 0.04706628501806711, "grad_norm": 1.821761818375813, "learning_rate": 9.219953152217719e-06, "loss": 0.0793, "step": 11280 }, { "epoch": 0.04708714773305739, "grad_norm": 1.3468365147896855, "learning_rate": 9.217910220914315e-06, "loss": 0.0693, "step": 11285 }, { "epoch": 0.04710801044804767, "grad_norm": 1.8002218980490223, "learning_rate": 9.215868647010311e-06, "loss": 0.0757, "step": 11290 }, { "epoch": 0.047128873163037946, "grad_norm": 1.3929115475855003, "learning_rate": 9.213828429003195e-06, "loss": 0.0783, "step": 11295 }, { "epoch": 0.047149735878028225, "grad_norm": 1.132859525011059, "learning_rate": 9.211789565392785e-06, "loss": 0.0853, "step": 11300 }, { "epoch": 0.0471705985930185, "grad_norm": 1.6779671383838535, "learning_rate": 9.209752054681218e-06, "loss": 0.0685, "step": 11305 }, { "epoch": 0.04719146130800878, "grad_norm": 1.2376887319940104, "learning_rate": 9.207715895372946e-06, "loss": 0.086, "step": 11310 }, { "epoch": 0.047212324022999054, "grad_norm": 1.1680798511609574, "learning_rate": 9.205681085974744e-06, "loss": 0.0703, "step": 11315 }, { "epoch": 0.04723318673798933, "grad_norm": 1.553297303814677, "learning_rate": 9.20364762499569e-06, "loss": 0.066, "step": 11320 }, { "epoch": 0.04725404945297961, "grad_norm": 1.372723352292861, "learning_rate": 9.201615510947165e-06, "loss": 0.0607, "step": 11325 }, { "epoch": 0.04727491216796989, "grad_norm": 1.3515311500036604, "learning_rate": 9.19958474234285e-06, "loss": 0.0582, "step": 11330 }, { "epoch": 0.04729577488296017, "grad_norm": 1.115180090987264, "learning_rate": 9.197555317698726e-06, "loss": 0.0664, "step": 11335 }, { "epoch": 0.04731663759795045, "grad_norm": 0.9987867559034218, "learning_rate": 9.19552723553306e-06, "loss": 0.0697, "step": 11340 }, { "epoch": 0.047337500312940727, "grad_norm": 1.4837169081307682, "learning_rate": 9.193500494366408e-06, "loss": 0.0824, "step": 11345 }, { "epoch": 0.047358363027931005, "grad_norm": 1.7219642854074855, "learning_rate": 9.191475092721605e-06, "loss": 0.0537, "step": 11350 }, { "epoch": 0.047379225742921284, "grad_norm": 2.0133134489468145, "learning_rate": 9.18945102912376e-06, "loss": 0.0704, "step": 11355 }, { "epoch": 0.047400088457911556, "grad_norm": 1.376688500280941, "learning_rate": 9.187428302100265e-06, "loss": 0.064, "step": 11360 }, { "epoch": 0.047420951172901835, "grad_norm": 1.5394103773178551, "learning_rate": 9.185406910180771e-06, "loss": 0.0835, "step": 11365 }, { "epoch": 0.04744181388789211, "grad_norm": 1.6704843772126685, "learning_rate": 9.183386851897192e-06, "loss": 0.0758, "step": 11370 }, { "epoch": 0.04746267660288239, "grad_norm": 1.4038067893976838, "learning_rate": 9.181368125783707e-06, "loss": 0.0777, "step": 11375 }, { "epoch": 0.04748353931787267, "grad_norm": 1.2650843758908343, "learning_rate": 9.179350730376748e-06, "loss": 0.0662, "step": 11380 }, { "epoch": 0.04750440203286295, "grad_norm": 2.628416047918583, "learning_rate": 9.177334664214994e-06, "loss": 0.0652, "step": 11385 }, { "epoch": 0.04752526474785323, "grad_norm": 0.9140449512632854, "learning_rate": 9.175319925839368e-06, "loss": 0.0647, "step": 11390 }, { "epoch": 0.04754612746284351, "grad_norm": 1.1187131519218874, "learning_rate": 9.173306513793046e-06, "loss": 0.059, "step": 11395 }, { "epoch": 0.047566990177833786, "grad_norm": 1.379507864213594, "learning_rate": 9.171294426621424e-06, "loss": 0.0813, "step": 11400 }, { "epoch": 0.04758785289282406, "grad_norm": 1.5510505962515362, "learning_rate": 9.169283662872142e-06, "loss": 0.0632, "step": 11405 }, { "epoch": 0.047608715607814336, "grad_norm": 1.5808778181087642, "learning_rate": 9.167274221095066e-06, "loss": 0.0769, "step": 11410 }, { "epoch": 0.047629578322804615, "grad_norm": 1.3235828117591024, "learning_rate": 9.165266099842285e-06, "loss": 0.0556, "step": 11415 }, { "epoch": 0.047650441037794894, "grad_norm": 1.47705507983604, "learning_rate": 9.163259297668102e-06, "loss": 0.0607, "step": 11420 }, { "epoch": 0.04767130375278517, "grad_norm": 2.3132489097548774, "learning_rate": 9.161253813129044e-06, "loss": 0.077, "step": 11425 }, { "epoch": 0.04769216646777545, "grad_norm": 1.9676692339021966, "learning_rate": 9.159249644783842e-06, "loss": 0.0726, "step": 11430 }, { "epoch": 0.04771302918276573, "grad_norm": 1.6013029792673994, "learning_rate": 9.157246791193435e-06, "loss": 0.0583, "step": 11435 }, { "epoch": 0.04773389189775601, "grad_norm": 1.496737412605992, "learning_rate": 9.155245250920966e-06, "loss": 0.0773, "step": 11440 }, { "epoch": 0.04775475461274629, "grad_norm": 0.8703283998619781, "learning_rate": 9.15324502253177e-06, "loss": 0.0665, "step": 11445 }, { "epoch": 0.04777561732773656, "grad_norm": 1.268231277611595, "learning_rate": 9.151246104593382e-06, "loss": 0.0619, "step": 11450 }, { "epoch": 0.04779648004272684, "grad_norm": 1.5501969886721347, "learning_rate": 9.149248495675519e-06, "loss": 0.0617, "step": 11455 }, { "epoch": 0.04781734275771712, "grad_norm": 1.3266518748578078, "learning_rate": 9.147252194350089e-06, "loss": 0.0722, "step": 11460 }, { "epoch": 0.047838205472707396, "grad_norm": 1.3623770282732293, "learning_rate": 9.145257199191174e-06, "loss": 0.0841, "step": 11465 }, { "epoch": 0.047859068187697675, "grad_norm": 1.0484814361483967, "learning_rate": 9.143263508775039e-06, "loss": 0.0701, "step": 11470 }, { "epoch": 0.04787993090268795, "grad_norm": 0.7949180040489485, "learning_rate": 9.141271121680112e-06, "loss": 0.0614, "step": 11475 }, { "epoch": 0.04790079361767823, "grad_norm": 1.5348590140492344, "learning_rate": 9.139280036486996e-06, "loss": 0.0712, "step": 11480 }, { "epoch": 0.04792165633266851, "grad_norm": 1.6326721529281465, "learning_rate": 9.137290251778454e-06, "loss": 0.0685, "step": 11485 }, { "epoch": 0.04794251904765879, "grad_norm": 1.2427628308966814, "learning_rate": 9.135301766139408e-06, "loss": 0.0572, "step": 11490 }, { "epoch": 0.04796338176264906, "grad_norm": 1.313536224366078, "learning_rate": 9.133314578156937e-06, "loss": 0.0685, "step": 11495 }, { "epoch": 0.04798424447763934, "grad_norm": 1.744348370285148, "learning_rate": 9.131328686420267e-06, "loss": 0.0422, "step": 11500 }, { "epoch": 0.04800510719262962, "grad_norm": 1.4104704239390802, "learning_rate": 9.129344089520774e-06, "loss": 0.0755, "step": 11505 }, { "epoch": 0.0480259699076199, "grad_norm": 1.0255773743717858, "learning_rate": 9.127360786051971e-06, "loss": 0.0678, "step": 11510 }, { "epoch": 0.048046832622610176, "grad_norm": 1.7704810685801484, "learning_rate": 9.125378774609512e-06, "loss": 0.0734, "step": 11515 }, { "epoch": 0.048067695337600455, "grad_norm": 1.6970660997952256, "learning_rate": 9.123398053791189e-06, "loss": 0.0829, "step": 11520 }, { "epoch": 0.048088558052590734, "grad_norm": 1.067451003334292, "learning_rate": 9.121418622196914e-06, "loss": 0.0821, "step": 11525 }, { "epoch": 0.04810942076758101, "grad_norm": 1.5371184946995233, "learning_rate": 9.119440478428734e-06, "loss": 0.0667, "step": 11530 }, { "epoch": 0.04813028348257129, "grad_norm": 1.382306717591953, "learning_rate": 9.117463621090809e-06, "loss": 0.0643, "step": 11535 }, { "epoch": 0.04815114619756156, "grad_norm": 1.2617142719095995, "learning_rate": 9.115488048789426e-06, "loss": 0.0767, "step": 11540 }, { "epoch": 0.04817200891255184, "grad_norm": 1.9123490300007222, "learning_rate": 9.113513760132977e-06, "loss": 0.0679, "step": 11545 }, { "epoch": 0.04819287162754212, "grad_norm": 2.15266094213153, "learning_rate": 9.11154075373196e-06, "loss": 0.0616, "step": 11550 }, { "epoch": 0.0482137343425324, "grad_norm": 1.4835809736408905, "learning_rate": 9.109569028198987e-06, "loss": 0.0769, "step": 11555 }, { "epoch": 0.04823459705752268, "grad_norm": 1.3303608673216065, "learning_rate": 9.107598582148769e-06, "loss": 0.0665, "step": 11560 }, { "epoch": 0.04825545977251296, "grad_norm": 1.6106444617795683, "learning_rate": 9.105629414198109e-06, "loss": 0.0651, "step": 11565 }, { "epoch": 0.048276322487503236, "grad_norm": 1.3796188140247894, "learning_rate": 9.103661522965906e-06, "loss": 0.0551, "step": 11570 }, { "epoch": 0.048297185202493514, "grad_norm": 1.9710892437233238, "learning_rate": 9.101694907073141e-06, "loss": 0.058, "step": 11575 }, { "epoch": 0.048318047917483786, "grad_norm": 1.8437704147001546, "learning_rate": 9.099729565142893e-06, "loss": 0.0767, "step": 11580 }, { "epoch": 0.048338910632474065, "grad_norm": 1.747215697209383, "learning_rate": 9.097765495800307e-06, "loss": 0.0643, "step": 11585 }, { "epoch": 0.048359773347464344, "grad_norm": 1.4075836870905907, "learning_rate": 9.095802697672613e-06, "loss": 0.0756, "step": 11590 }, { "epoch": 0.04838063606245462, "grad_norm": 1.3366925918525427, "learning_rate": 9.09384116938911e-06, "loss": 0.0804, "step": 11595 }, { "epoch": 0.0484014987774449, "grad_norm": 1.2265161117961254, "learning_rate": 9.091880909581162e-06, "loss": 0.0681, "step": 11600 }, { "epoch": 0.04842236149243518, "grad_norm": 1.538808676090589, "learning_rate": 9.08992191688221e-06, "loss": 0.0549, "step": 11605 }, { "epoch": 0.04844322420742546, "grad_norm": 1.020816944173477, "learning_rate": 9.08796418992774e-06, "loss": 0.0549, "step": 11610 }, { "epoch": 0.04846408692241574, "grad_norm": 1.3411474292350367, "learning_rate": 9.086007727355298e-06, "loss": 0.052, "step": 11615 }, { "epoch": 0.048484949637406016, "grad_norm": 1.268921533580906, "learning_rate": 9.084052527804492e-06, "loss": 0.081, "step": 11620 }, { "epoch": 0.04850581235239629, "grad_norm": 1.6400335892137041, "learning_rate": 9.08209858991697e-06, "loss": 0.0649, "step": 11625 }, { "epoch": 0.04852667506738657, "grad_norm": 1.2648076733218607, "learning_rate": 9.08014591233642e-06, "loss": 0.0694, "step": 11630 }, { "epoch": 0.048547537782376846, "grad_norm": 0.9738653159592625, "learning_rate": 9.078194493708584e-06, "loss": 0.0706, "step": 11635 }, { "epoch": 0.048568400497367124, "grad_norm": 1.0798058459461926, "learning_rate": 9.076244332681226e-06, "loss": 0.0652, "step": 11640 }, { "epoch": 0.0485892632123574, "grad_norm": 1.350184245844402, "learning_rate": 9.07429542790415e-06, "loss": 0.0653, "step": 11645 }, { "epoch": 0.04861012592734768, "grad_norm": 1.1908873290970061, "learning_rate": 9.072347778029193e-06, "loss": 0.0737, "step": 11650 }, { "epoch": 0.04863098864233796, "grad_norm": 1.2810948618973181, "learning_rate": 9.070401381710204e-06, "loss": 0.071, "step": 11655 }, { "epoch": 0.04865185135732824, "grad_norm": 1.21623038597582, "learning_rate": 9.068456237603065e-06, "loss": 0.0843, "step": 11660 }, { "epoch": 0.04867271407231852, "grad_norm": 1.125171771284484, "learning_rate": 9.066512344365666e-06, "loss": 0.0601, "step": 11665 }, { "epoch": 0.04869357678730879, "grad_norm": 1.364295869868383, "learning_rate": 9.064569700657917e-06, "loss": 0.0613, "step": 11670 }, { "epoch": 0.04871443950229907, "grad_norm": 1.1764284076849716, "learning_rate": 9.06262830514173e-06, "loss": 0.0703, "step": 11675 }, { "epoch": 0.04873530221728935, "grad_norm": 1.270562560175883, "learning_rate": 9.060688156481033e-06, "loss": 0.0624, "step": 11680 }, { "epoch": 0.048756164932279626, "grad_norm": 0.9613085190589771, "learning_rate": 9.058749253341741e-06, "loss": 0.072, "step": 11685 }, { "epoch": 0.048777027647269905, "grad_norm": 0.76649572456327, "learning_rate": 9.056811594391779e-06, "loss": 0.0702, "step": 11690 }, { "epoch": 0.048797890362260184, "grad_norm": 1.278386696805878, "learning_rate": 9.054875178301059e-06, "loss": 0.07, "step": 11695 }, { "epoch": 0.04881875307725046, "grad_norm": 1.5377249530484747, "learning_rate": 9.052940003741486e-06, "loss": 0.0622, "step": 11700 }, { "epoch": 0.04883961579224074, "grad_norm": 1.5106802913064155, "learning_rate": 9.051006069386943e-06, "loss": 0.066, "step": 11705 }, { "epoch": 0.04886047850723102, "grad_norm": 1.1112002959779501, "learning_rate": 9.04907337391331e-06, "loss": 0.0599, "step": 11710 }, { "epoch": 0.04888134122222129, "grad_norm": 1.4351030936714795, "learning_rate": 9.047141915998435e-06, "loss": 0.0755, "step": 11715 }, { "epoch": 0.04890220393721157, "grad_norm": 1.528593225022516, "learning_rate": 9.045211694322139e-06, "loss": 0.0693, "step": 11720 }, { "epoch": 0.04892306665220185, "grad_norm": 2.6458322185593084, "learning_rate": 9.04328270756622e-06, "loss": 0.0771, "step": 11725 }, { "epoch": 0.04894392936719213, "grad_norm": 1.9840537383791488, "learning_rate": 9.041354954414442e-06, "loss": 0.0671, "step": 11730 }, { "epoch": 0.04896479208218241, "grad_norm": 1.3787335866681445, "learning_rate": 9.039428433552527e-06, "loss": 0.0637, "step": 11735 }, { "epoch": 0.048985654797172686, "grad_norm": 1.3675800019416022, "learning_rate": 9.037503143668165e-06, "loss": 0.0791, "step": 11740 }, { "epoch": 0.049006517512162964, "grad_norm": 2.3579719378999964, "learning_rate": 9.03557908345099e-06, "loss": 0.0599, "step": 11745 }, { "epoch": 0.04902738022715324, "grad_norm": 1.424044960882816, "learning_rate": 9.033656251592598e-06, "loss": 0.1057, "step": 11750 }, { "epoch": 0.04904824294214352, "grad_norm": 1.3988001763379692, "learning_rate": 9.031734646786534e-06, "loss": 0.0669, "step": 11755 }, { "epoch": 0.049069105657133794, "grad_norm": 1.9298602540880903, "learning_rate": 9.029814267728278e-06, "loss": 0.0696, "step": 11760 }, { "epoch": 0.04908996837212407, "grad_norm": 0.9230373938141113, "learning_rate": 9.02789511311526e-06, "loss": 0.0552, "step": 11765 }, { "epoch": 0.04911083108711435, "grad_norm": 1.560477538807225, "learning_rate": 9.025977181646842e-06, "loss": 0.0745, "step": 11770 }, { "epoch": 0.04913169380210463, "grad_norm": 2.2931038678645455, "learning_rate": 9.024060472024324e-06, "loss": 0.0811, "step": 11775 }, { "epoch": 0.04915255651709491, "grad_norm": 1.6435462501590712, "learning_rate": 9.022144982950924e-06, "loss": 0.0601, "step": 11780 }, { "epoch": 0.04917341923208519, "grad_norm": 1.0025343773513848, "learning_rate": 9.020230713131803e-06, "loss": 0.0585, "step": 11785 }, { "epoch": 0.049194281947075466, "grad_norm": 2.3064138765304656, "learning_rate": 9.018317661274034e-06, "loss": 0.0882, "step": 11790 }, { "epoch": 0.049215144662065745, "grad_norm": 1.8703558986869169, "learning_rate": 9.016405826086608e-06, "loss": 0.0612, "step": 11795 }, { "epoch": 0.049236007377056024, "grad_norm": 1.7618390811643583, "learning_rate": 9.014495206280432e-06, "loss": 0.0585, "step": 11800 }, { "epoch": 0.049256870092046295, "grad_norm": 3.04867756540641, "learning_rate": 9.012585800568331e-06, "loss": 0.0638, "step": 11805 }, { "epoch": 0.049277732807036574, "grad_norm": 1.8637939991371517, "learning_rate": 9.01067760766503e-06, "loss": 0.0659, "step": 11810 }, { "epoch": 0.04929859552202685, "grad_norm": 1.3208911307791649, "learning_rate": 9.008770626287161e-06, "loss": 0.0749, "step": 11815 }, { "epoch": 0.04931945823701713, "grad_norm": 1.0253819169046483, "learning_rate": 9.006864855153254e-06, "loss": 0.0566, "step": 11820 }, { "epoch": 0.04934032095200741, "grad_norm": 1.5131335551052223, "learning_rate": 9.00496029298374e-06, "loss": 0.0844, "step": 11825 }, { "epoch": 0.04936118366699769, "grad_norm": 1.3951359469789377, "learning_rate": 9.003056938500942e-06, "loss": 0.0596, "step": 11830 }, { "epoch": 0.04938204638198797, "grad_norm": 1.3449640470687938, "learning_rate": 9.001154790429068e-06, "loss": 0.0868, "step": 11835 }, { "epoch": 0.04940290909697825, "grad_norm": 3.9244733218636667, "learning_rate": 8.999253847494221e-06, "loss": 0.0683, "step": 11840 }, { "epoch": 0.049423771811968525, "grad_norm": 1.540660918051378, "learning_rate": 8.997354108424375e-06, "loss": 0.0617, "step": 11845 }, { "epoch": 0.0494446345269588, "grad_norm": 1.4961168347316027, "learning_rate": 8.995455571949391e-06, "loss": 0.0613, "step": 11850 }, { "epoch": 0.049465497241949076, "grad_norm": 1.9106958144785315, "learning_rate": 8.993558236801008e-06, "loss": 0.0647, "step": 11855 }, { "epoch": 0.049486359956939355, "grad_norm": 1.782048785897743, "learning_rate": 8.991662101712826e-06, "loss": 0.0654, "step": 11860 }, { "epoch": 0.049507222671929633, "grad_norm": 1.391247907090555, "learning_rate": 8.989767165420322e-06, "loss": 0.0803, "step": 11865 }, { "epoch": 0.04952808538691991, "grad_norm": 1.6643886900675982, "learning_rate": 8.987873426660836e-06, "loss": 0.0845, "step": 11870 }, { "epoch": 0.04954894810191019, "grad_norm": 0.8863254896712832, "learning_rate": 8.985980884173564e-06, "loss": 0.082, "step": 11875 }, { "epoch": 0.04956981081690047, "grad_norm": 1.5969575947665646, "learning_rate": 8.984089536699568e-06, "loss": 0.0793, "step": 11880 }, { "epoch": 0.04959067353189075, "grad_norm": 1.6424379618898028, "learning_rate": 8.982199382981757e-06, "loss": 0.0724, "step": 11885 }, { "epoch": 0.04961153624688103, "grad_norm": 2.509110146146561, "learning_rate": 8.980310421764893e-06, "loss": 0.0731, "step": 11890 }, { "epoch": 0.0496323989618713, "grad_norm": 1.065116274254203, "learning_rate": 8.978422651795589e-06, "loss": 0.0687, "step": 11895 }, { "epoch": 0.04965326167686158, "grad_norm": 1.0660523619778006, "learning_rate": 8.976536071822293e-06, "loss": 0.0708, "step": 11900 }, { "epoch": 0.04967412439185186, "grad_norm": 1.5669540767286811, "learning_rate": 8.9746506805953e-06, "loss": 0.0747, "step": 11905 }, { "epoch": 0.049694987106842135, "grad_norm": 1.0381001023118421, "learning_rate": 8.97276647686674e-06, "loss": 0.0665, "step": 11910 }, { "epoch": 0.049715849821832414, "grad_norm": 1.4975799143145985, "learning_rate": 8.970883459390578e-06, "loss": 0.0713, "step": 11915 }, { "epoch": 0.04973671253682269, "grad_norm": 0.850240213809226, "learning_rate": 8.969001626922599e-06, "loss": 0.0665, "step": 11920 }, { "epoch": 0.04975757525181297, "grad_norm": 1.5077767910101092, "learning_rate": 8.967120978220432e-06, "loss": 0.0695, "step": 11925 }, { "epoch": 0.04977843796680325, "grad_norm": 1.265154313855315, "learning_rate": 8.96524151204351e-06, "loss": 0.064, "step": 11930 }, { "epoch": 0.04979930068179353, "grad_norm": 1.210168535915105, "learning_rate": 8.963363227153095e-06, "loss": 0.0577, "step": 11935 }, { "epoch": 0.0498201633967838, "grad_norm": 1.518736566635352, "learning_rate": 8.961486122312266e-06, "loss": 0.0612, "step": 11940 }, { "epoch": 0.04984102611177408, "grad_norm": 1.1972207982664478, "learning_rate": 8.959610196285913e-06, "loss": 0.0704, "step": 11945 }, { "epoch": 0.04986188882676436, "grad_norm": 2.1213196859475962, "learning_rate": 8.957735447840732e-06, "loss": 0.0807, "step": 11950 }, { "epoch": 0.04988275154175464, "grad_norm": 1.3977999384921613, "learning_rate": 8.955861875745228e-06, "loss": 0.0898, "step": 11955 }, { "epoch": 0.049903614256744916, "grad_norm": 1.6527007967470897, "learning_rate": 8.953989478769706e-06, "loss": 0.0761, "step": 11960 }, { "epoch": 0.049924476971735195, "grad_norm": 1.542848358141461, "learning_rate": 8.95211825568627e-06, "loss": 0.0666, "step": 11965 }, { "epoch": 0.04994533968672547, "grad_norm": 1.2140252688897994, "learning_rate": 8.950248205268825e-06, "loss": 0.0699, "step": 11970 }, { "epoch": 0.04996620240171575, "grad_norm": 1.2783773116786812, "learning_rate": 8.94837932629306e-06, "loss": 0.0634, "step": 11975 }, { "epoch": 0.04998706511670603, "grad_norm": 1.4208320051839234, "learning_rate": 8.946511617536455e-06, "loss": 0.075, "step": 11980 }, { "epoch": 0.0500079278316963, "grad_norm": 0.8288319055631375, "learning_rate": 8.944645077778282e-06, "loss": 0.0539, "step": 11985 }, { "epoch": 0.05002879054668658, "grad_norm": 1.452263801928728, "learning_rate": 8.942779705799586e-06, "loss": 0.063, "step": 11990 }, { "epoch": 0.05004965326167686, "grad_norm": 1.0782328051142402, "learning_rate": 8.940915500383196e-06, "loss": 0.0709, "step": 11995 }, { "epoch": 0.05007051597666714, "grad_norm": 2.062704565702089, "learning_rate": 8.939052460313715e-06, "loss": 0.0658, "step": 12000 }, { "epoch": 0.05009137869165742, "grad_norm": 1.4725839476952254, "learning_rate": 8.93719058437752e-06, "loss": 0.0611, "step": 12005 }, { "epoch": 0.050112241406647696, "grad_norm": 1.1548612735014858, "learning_rate": 8.935329871362749e-06, "loss": 0.0739, "step": 12010 }, { "epoch": 0.050133104121637975, "grad_norm": 2.4673864085489243, "learning_rate": 8.933470320059317e-06, "loss": 0.0699, "step": 12015 }, { "epoch": 0.050153966836628254, "grad_norm": 4.58871596841674, "learning_rate": 8.931611929258894e-06, "loss": 0.0658, "step": 12020 }, { "epoch": 0.05017482955161853, "grad_norm": 1.5458081977224019, "learning_rate": 8.92975469775491e-06, "loss": 0.0723, "step": 12025 }, { "epoch": 0.050195692266608805, "grad_norm": 2.7512443794885217, "learning_rate": 8.927898624342548e-06, "loss": 0.0756, "step": 12030 }, { "epoch": 0.05021655498159908, "grad_norm": 1.8646270554091893, "learning_rate": 8.926043707818747e-06, "loss": 0.0662, "step": 12035 }, { "epoch": 0.05023741769658936, "grad_norm": 2.011707143737466, "learning_rate": 8.924189946982196e-06, "loss": 0.0764, "step": 12040 }, { "epoch": 0.05025828041157964, "grad_norm": 1.5924391642090445, "learning_rate": 8.922337340633326e-06, "loss": 0.0763, "step": 12045 }, { "epoch": 0.05027914312656992, "grad_norm": 1.1039962849973948, "learning_rate": 8.920485887574315e-06, "loss": 0.0673, "step": 12050 }, { "epoch": 0.0503000058415602, "grad_norm": 1.3054739035339706, "learning_rate": 8.918635586609071e-06, "loss": 0.1026, "step": 12055 }, { "epoch": 0.05032086855655048, "grad_norm": 1.2495401514033697, "learning_rate": 8.916786436543248e-06, "loss": 0.0546, "step": 12060 }, { "epoch": 0.050341731271540756, "grad_norm": 1.3245533006482408, "learning_rate": 8.91493843618423e-06, "loss": 0.0836, "step": 12065 }, { "epoch": 0.050362593986531035, "grad_norm": 1.8288565440172604, "learning_rate": 8.913091584341127e-06, "loss": 0.0841, "step": 12070 }, { "epoch": 0.050383456701521306, "grad_norm": 1.3837474125172429, "learning_rate": 8.911245879824778e-06, "loss": 0.0687, "step": 12075 }, { "epoch": 0.050404319416511585, "grad_norm": 1.4489722391241018, "learning_rate": 8.909401321447744e-06, "loss": 0.06, "step": 12080 }, { "epoch": 0.050425182131501864, "grad_norm": 1.5547885885511223, "learning_rate": 8.907557908024307e-06, "loss": 0.0731, "step": 12085 }, { "epoch": 0.05044604484649214, "grad_norm": 2.02485506630587, "learning_rate": 8.905715638370466e-06, "loss": 0.0647, "step": 12090 }, { "epoch": 0.05046690756148242, "grad_norm": 1.6747108403334858, "learning_rate": 8.903874511303932e-06, "loss": 0.0714, "step": 12095 }, { "epoch": 0.0504877702764727, "grad_norm": 1.89584659643716, "learning_rate": 8.902034525644124e-06, "loss": 0.0763, "step": 12100 }, { "epoch": 0.05050863299146298, "grad_norm": 1.741598568580111, "learning_rate": 8.900195680212173e-06, "loss": 0.0699, "step": 12105 }, { "epoch": 0.05052949570645326, "grad_norm": 1.5901951461685635, "learning_rate": 8.898357973830911e-06, "loss": 0.0828, "step": 12110 }, { "epoch": 0.050550358421443536, "grad_norm": 0.8810511863045395, "learning_rate": 8.896521405324877e-06, "loss": 0.0523, "step": 12115 }, { "epoch": 0.05057122113643381, "grad_norm": 1.5968706442677902, "learning_rate": 8.894685973520291e-06, "loss": 0.064, "step": 12120 }, { "epoch": 0.05059208385142409, "grad_norm": 1.6477008103982402, "learning_rate": 8.892851677245086e-06, "loss": 0.0803, "step": 12125 }, { "epoch": 0.050612946566414366, "grad_norm": 1.1662770586812194, "learning_rate": 8.891018515328875e-06, "loss": 0.0661, "step": 12130 }, { "epoch": 0.050633809281404644, "grad_norm": 0.9309194136029182, "learning_rate": 8.889186486602964e-06, "loss": 0.0708, "step": 12135 }, { "epoch": 0.05065467199639492, "grad_norm": 1.697193522116404, "learning_rate": 8.887355589900346e-06, "loss": 0.0726, "step": 12140 }, { "epoch": 0.0506755347113852, "grad_norm": 1.4496198937851066, "learning_rate": 8.885525824055686e-06, "loss": 0.0549, "step": 12145 }, { "epoch": 0.05069639742637548, "grad_norm": 2.1694733097772776, "learning_rate": 8.88369718790534e-06, "loss": 0.0827, "step": 12150 }, { "epoch": 0.05071726014136576, "grad_norm": 2.134887945855895, "learning_rate": 8.881869680287331e-06, "loss": 0.0776, "step": 12155 }, { "epoch": 0.05073812285635603, "grad_norm": 1.607012706584965, "learning_rate": 8.88004330004136e-06, "loss": 0.0741, "step": 12160 }, { "epoch": 0.05075898557134631, "grad_norm": 1.4348855230358717, "learning_rate": 8.878218046008792e-06, "loss": 0.0709, "step": 12165 }, { "epoch": 0.05077984828633659, "grad_norm": 1.2711300183427237, "learning_rate": 8.876393917032664e-06, "loss": 0.0551, "step": 12170 }, { "epoch": 0.05080071100132687, "grad_norm": 1.3363481275248177, "learning_rate": 8.874570911957671e-06, "loss": 0.0555, "step": 12175 }, { "epoch": 0.050821573716317146, "grad_norm": 1.3285823611334593, "learning_rate": 8.872749029630173e-06, "loss": 0.0662, "step": 12180 }, { "epoch": 0.050842436431307425, "grad_norm": 1.4523102744985683, "learning_rate": 8.870928268898187e-06, "loss": 0.0709, "step": 12185 }, { "epoch": 0.050863299146297704, "grad_norm": 1.4474353045056312, "learning_rate": 8.869108628611378e-06, "loss": 0.0669, "step": 12190 }, { "epoch": 0.05088416186128798, "grad_norm": 1.0134785388480028, "learning_rate": 8.867290107621069e-06, "loss": 0.0758, "step": 12195 }, { "epoch": 0.05090502457627826, "grad_norm": 2.8447083800819732, "learning_rate": 8.865472704780225e-06, "loss": 0.0712, "step": 12200 }, { "epoch": 0.05092588729126853, "grad_norm": 1.0327119339708304, "learning_rate": 8.863656418943466e-06, "loss": 0.058, "step": 12205 }, { "epoch": 0.05094675000625881, "grad_norm": 1.2858998749748705, "learning_rate": 8.86184124896704e-06, "loss": 0.0635, "step": 12210 }, { "epoch": 0.05096761272124909, "grad_norm": 1.7967510953407855, "learning_rate": 8.860027193708844e-06, "loss": 0.0818, "step": 12215 }, { "epoch": 0.05098847543623937, "grad_norm": 1.1979967590439453, "learning_rate": 8.858214252028407e-06, "loss": 0.0812, "step": 12220 }, { "epoch": 0.05100933815122965, "grad_norm": 1.3847120945954994, "learning_rate": 8.856402422786895e-06, "loss": 0.0646, "step": 12225 }, { "epoch": 0.05103020086621993, "grad_norm": 1.2285782581233748, "learning_rate": 8.854591704847095e-06, "loss": 0.0624, "step": 12230 }, { "epoch": 0.051051063581210206, "grad_norm": 1.6502306836053093, "learning_rate": 8.852782097073432e-06, "loss": 0.079, "step": 12235 }, { "epoch": 0.051071926296200484, "grad_norm": 1.061098258049872, "learning_rate": 8.850973598331946e-06, "loss": 0.0662, "step": 12240 }, { "epoch": 0.05109278901119076, "grad_norm": 1.1074203489693513, "learning_rate": 8.849166207490302e-06, "loss": 0.0779, "step": 12245 }, { "epoch": 0.051113651726181035, "grad_norm": 1.6116309115836425, "learning_rate": 8.847359923417786e-06, "loss": 0.0692, "step": 12250 }, { "epoch": 0.051134514441171314, "grad_norm": 0.9499721206075311, "learning_rate": 8.845554744985289e-06, "loss": 0.0648, "step": 12255 }, { "epoch": 0.05115537715616159, "grad_norm": 1.8292465733976315, "learning_rate": 8.843750671065323e-06, "loss": 0.0672, "step": 12260 }, { "epoch": 0.05117623987115187, "grad_norm": 1.7787856095022283, "learning_rate": 8.841947700532008e-06, "loss": 0.0792, "step": 12265 }, { "epoch": 0.05119710258614215, "grad_norm": 1.3726062851944814, "learning_rate": 8.840145832261065e-06, "loss": 0.0597, "step": 12270 }, { "epoch": 0.05121796530113243, "grad_norm": 1.6642224904376142, "learning_rate": 8.838345065129825e-06, "loss": 0.0722, "step": 12275 }, { "epoch": 0.05123882801612271, "grad_norm": 1.3461476542446218, "learning_rate": 8.836545398017211e-06, "loss": 0.0598, "step": 12280 }, { "epoch": 0.051259690731112986, "grad_norm": 2.0825653741674004, "learning_rate": 8.834746829803753e-06, "loss": 0.0838, "step": 12285 }, { "epoch": 0.051280553446103265, "grad_norm": 1.585401877064097, "learning_rate": 8.832949359371567e-06, "loss": 0.0778, "step": 12290 }, { "epoch": 0.05130141616109354, "grad_norm": 1.3726481216350135, "learning_rate": 8.831152985604363e-06, "loss": 0.0655, "step": 12295 }, { "epoch": 0.051322278876083816, "grad_norm": 1.1837940853880657, "learning_rate": 8.829357707387443e-06, "loss": 0.0543, "step": 12300 }, { "epoch": 0.051343141591074094, "grad_norm": 1.620141583257411, "learning_rate": 8.82756352360769e-06, "loss": 0.0648, "step": 12305 }, { "epoch": 0.05136400430606437, "grad_norm": 1.1482784995312167, "learning_rate": 8.825770433153571e-06, "loss": 0.0701, "step": 12310 }, { "epoch": 0.05138486702105465, "grad_norm": 1.3206786498108982, "learning_rate": 8.823978434915135e-06, "loss": 0.0606, "step": 12315 }, { "epoch": 0.05140572973604493, "grad_norm": 2.099250749281463, "learning_rate": 8.822187527784005e-06, "loss": 0.0694, "step": 12320 }, { "epoch": 0.05142659245103521, "grad_norm": 0.9136915319389322, "learning_rate": 8.820397710653378e-06, "loss": 0.0648, "step": 12325 }, { "epoch": 0.05144745516602549, "grad_norm": 2.708183060252021, "learning_rate": 8.818608982418024e-06, "loss": 0.0908, "step": 12330 }, { "epoch": 0.05146831788101577, "grad_norm": 1.891944055275166, "learning_rate": 8.816821341974281e-06, "loss": 0.0671, "step": 12335 }, { "epoch": 0.05148918059600604, "grad_norm": 1.7671918332316003, "learning_rate": 8.815034788220056e-06, "loss": 0.0626, "step": 12340 }, { "epoch": 0.05151004331099632, "grad_norm": 2.013019215749758, "learning_rate": 8.813249320054809e-06, "loss": 0.0697, "step": 12345 }, { "epoch": 0.051530906025986596, "grad_norm": 1.2450455373344398, "learning_rate": 8.811464936379567e-06, "loss": 0.0664, "step": 12350 }, { "epoch": 0.051551768740976875, "grad_norm": 1.1821329285183195, "learning_rate": 8.809681636096915e-06, "loss": 0.0643, "step": 12355 }, { "epoch": 0.051572631455967154, "grad_norm": 1.1456020751796787, "learning_rate": 8.807899418110985e-06, "loss": 0.0629, "step": 12360 }, { "epoch": 0.05159349417095743, "grad_norm": 1.7229777067448029, "learning_rate": 8.80611828132747e-06, "loss": 0.0887, "step": 12365 }, { "epoch": 0.05161435688594771, "grad_norm": 1.361607032951489, "learning_rate": 8.804338224653602e-06, "loss": 0.0711, "step": 12370 }, { "epoch": 0.05163521960093799, "grad_norm": 1.594919093840392, "learning_rate": 8.802559246998164e-06, "loss": 0.0757, "step": 12375 }, { "epoch": 0.05165608231592827, "grad_norm": 1.2875195852797683, "learning_rate": 8.80078134727148e-06, "loss": 0.0689, "step": 12380 }, { "epoch": 0.05167694503091854, "grad_norm": 8.020184668747067, "learning_rate": 8.799004524385418e-06, "loss": 0.0718, "step": 12385 }, { "epoch": 0.05169780774590882, "grad_norm": 1.2888324900529349, "learning_rate": 8.797228777253375e-06, "loss": 0.0681, "step": 12390 }, { "epoch": 0.0517186704608991, "grad_norm": 1.499431157089563, "learning_rate": 8.795454104790288e-06, "loss": 0.0598, "step": 12395 }, { "epoch": 0.05173953317588938, "grad_norm": 1.0465790750083082, "learning_rate": 8.793680505912627e-06, "loss": 0.0642, "step": 12400 }, { "epoch": 0.051760395890879655, "grad_norm": 1.2331940530659506, "learning_rate": 8.791907979538388e-06, "loss": 0.0703, "step": 12405 }, { "epoch": 0.051781258605869934, "grad_norm": 1.2372022386545836, "learning_rate": 8.79013652458709e-06, "loss": 0.0653, "step": 12410 }, { "epoch": 0.05180212132086021, "grad_norm": 1.6872502737401684, "learning_rate": 8.788366139979785e-06, "loss": 0.0688, "step": 12415 }, { "epoch": 0.05182298403585049, "grad_norm": 1.4152207021894236, "learning_rate": 8.786596824639035e-06, "loss": 0.0508, "step": 12420 }, { "epoch": 0.05184384675084077, "grad_norm": 2.005290456170438, "learning_rate": 8.784828577488926e-06, "loss": 0.0622, "step": 12425 }, { "epoch": 0.05186470946583104, "grad_norm": 1.4607578183611258, "learning_rate": 8.783061397455054e-06, "loss": 0.0751, "step": 12430 }, { "epoch": 0.05188557218082132, "grad_norm": 0.9825585748083027, "learning_rate": 8.781295283464537e-06, "loss": 0.068, "step": 12435 }, { "epoch": 0.0519064348958116, "grad_norm": 1.363595135516006, "learning_rate": 8.77953023444599e-06, "loss": 0.0592, "step": 12440 }, { "epoch": 0.05192729761080188, "grad_norm": 1.0101756606843466, "learning_rate": 8.777766249329545e-06, "loss": 0.051, "step": 12445 }, { "epoch": 0.05194816032579216, "grad_norm": 1.4437175338534138, "learning_rate": 8.776003327046833e-06, "loss": 0.0656, "step": 12450 }, { "epoch": 0.051969023040782436, "grad_norm": 1.7248340954890973, "learning_rate": 8.774241466530986e-06, "loss": 0.0547, "step": 12455 }, { "epoch": 0.051989885755772715, "grad_norm": 1.254963710983213, "learning_rate": 8.77248066671664e-06, "loss": 0.0477, "step": 12460 }, { "epoch": 0.052010748470762994, "grad_norm": 1.4743369302645897, "learning_rate": 8.770720926539915e-06, "loss": 0.0551, "step": 12465 }, { "epoch": 0.05203161118575327, "grad_norm": 2.056207023322275, "learning_rate": 8.768962244938441e-06, "loss": 0.0736, "step": 12470 }, { "epoch": 0.052052473900743544, "grad_norm": 1.4186722541301249, "learning_rate": 8.767204620851329e-06, "loss": 0.0703, "step": 12475 }, { "epoch": 0.05207333661573382, "grad_norm": 1.318262292881949, "learning_rate": 8.765448053219174e-06, "loss": 0.0553, "step": 12480 }, { "epoch": 0.0520941993307241, "grad_norm": 1.1366188171463334, "learning_rate": 8.763692540984063e-06, "loss": 0.0564, "step": 12485 }, { "epoch": 0.05211506204571438, "grad_norm": 0.9288093973543017, "learning_rate": 8.761938083089567e-06, "loss": 0.0781, "step": 12490 }, { "epoch": 0.05213592476070466, "grad_norm": 1.5522023321953555, "learning_rate": 8.76018467848073e-06, "loss": 0.061, "step": 12495 }, { "epoch": 0.05215678747569494, "grad_norm": 1.9909091258301181, "learning_rate": 8.758432326104078e-06, "loss": 0.0741, "step": 12500 }, { "epoch": 0.05217765019068522, "grad_norm": 2.464468694411569, "learning_rate": 8.75668102490761e-06, "loss": 0.0642, "step": 12505 }, { "epoch": 0.052198512905675495, "grad_norm": 1.1391510532651745, "learning_rate": 8.754930773840797e-06, "loss": 0.0613, "step": 12510 }, { "epoch": 0.052219375620665774, "grad_norm": 1.3253527171328483, "learning_rate": 8.753181571854578e-06, "loss": 0.0788, "step": 12515 }, { "epoch": 0.052240238335656046, "grad_norm": 1.9014077229923168, "learning_rate": 8.751433417901363e-06, "loss": 0.0688, "step": 12520 }, { "epoch": 0.052261101050646325, "grad_norm": 1.3919450313066148, "learning_rate": 8.74968631093502e-06, "loss": 0.0572, "step": 12525 }, { "epoch": 0.052281963765636603, "grad_norm": 1.2953922706230863, "learning_rate": 8.747940249910882e-06, "loss": 0.0494, "step": 12530 }, { "epoch": 0.05230282648062688, "grad_norm": 1.8051775352385624, "learning_rate": 8.746195233785741e-06, "loss": 0.0634, "step": 12535 }, { "epoch": 0.05232368919561716, "grad_norm": 2.0700828359111183, "learning_rate": 8.744451261517845e-06, "loss": 0.0656, "step": 12540 }, { "epoch": 0.05234455191060744, "grad_norm": 1.3293992267504071, "learning_rate": 8.74270833206689e-06, "loss": 0.0686, "step": 12545 }, { "epoch": 0.05236541462559772, "grad_norm": 1.265687461785446, "learning_rate": 8.740966444394035e-06, "loss": 0.0662, "step": 12550 }, { "epoch": 0.052386277340588, "grad_norm": 1.9937732658227172, "learning_rate": 8.739225597461872e-06, "loss": 0.0703, "step": 12555 }, { "epoch": 0.052407140055578276, "grad_norm": 1.8359635202614906, "learning_rate": 8.73748579023445e-06, "loss": 0.0572, "step": 12560 }, { "epoch": 0.05242800277056855, "grad_norm": 2.4886958243599064, "learning_rate": 8.735747021677259e-06, "loss": 0.068, "step": 12565 }, { "epoch": 0.05244886548555883, "grad_norm": 2.3401218501199863, "learning_rate": 8.734009290757223e-06, "loss": 0.0671, "step": 12570 }, { "epoch": 0.052469728200549105, "grad_norm": 1.207317787239406, "learning_rate": 8.732272596442715e-06, "loss": 0.0612, "step": 12575 }, { "epoch": 0.052490590915539384, "grad_norm": 1.7105209070117082, "learning_rate": 8.730536937703534e-06, "loss": 0.0592, "step": 12580 }, { "epoch": 0.05251145363052966, "grad_norm": 1.534941373716195, "learning_rate": 8.728802313510913e-06, "loss": 0.0619, "step": 12585 }, { "epoch": 0.05253231634551994, "grad_norm": 1.162917918382246, "learning_rate": 8.727068722837521e-06, "loss": 0.0472, "step": 12590 }, { "epoch": 0.05255317906051022, "grad_norm": 2.2139254842579166, "learning_rate": 8.72533616465745e-06, "loss": 0.0638, "step": 12595 }, { "epoch": 0.0525740417755005, "grad_norm": 1.8732880831285281, "learning_rate": 8.723604637946218e-06, "loss": 0.0705, "step": 12600 }, { "epoch": 0.05259490449049078, "grad_norm": 1.2607911951954598, "learning_rate": 8.721874141680766e-06, "loss": 0.0619, "step": 12605 }, { "epoch": 0.05261576720548105, "grad_norm": 1.2499605577170185, "learning_rate": 8.720144674839452e-06, "loss": 0.0902, "step": 12610 }, { "epoch": 0.05263662992047133, "grad_norm": 1.3317074007788654, "learning_rate": 8.718416236402059e-06, "loss": 0.059, "step": 12615 }, { "epoch": 0.05265749263546161, "grad_norm": 1.4118671459446404, "learning_rate": 8.716688825349776e-06, "loss": 0.0494, "step": 12620 }, { "epoch": 0.052678355350451886, "grad_norm": 2.1894687270968305, "learning_rate": 8.714962440665211e-06, "loss": 0.0838, "step": 12625 }, { "epoch": 0.052699218065442165, "grad_norm": 1.9096319376441848, "learning_rate": 8.713237081332379e-06, "loss": 0.0718, "step": 12630 }, { "epoch": 0.05272008078043244, "grad_norm": 2.557302106842901, "learning_rate": 8.711512746336702e-06, "loss": 0.0777, "step": 12635 }, { "epoch": 0.05274094349542272, "grad_norm": 1.1297392486097704, "learning_rate": 8.709789434665012e-06, "loss": 0.0673, "step": 12640 }, { "epoch": 0.052761806210413, "grad_norm": 1.1897880379307697, "learning_rate": 8.708067145305533e-06, "loss": 0.0647, "step": 12645 }, { "epoch": 0.05278266892540328, "grad_norm": 1.539576492123381, "learning_rate": 8.7063458772479e-06, "loss": 0.0792, "step": 12650 }, { "epoch": 0.05280353164039355, "grad_norm": 1.8728687575279046, "learning_rate": 8.70462562948314e-06, "loss": 0.0781, "step": 12655 }, { "epoch": 0.05282439435538383, "grad_norm": 1.7313227069437396, "learning_rate": 8.702906401003674e-06, "loss": 0.0861, "step": 12660 }, { "epoch": 0.05284525707037411, "grad_norm": 1.572581803507502, "learning_rate": 8.701188190803319e-06, "loss": 0.0725, "step": 12665 }, { "epoch": 0.05286611978536439, "grad_norm": 1.4764188273192653, "learning_rate": 8.699470997877279e-06, "loss": 0.0621, "step": 12670 }, { "epoch": 0.052886982500354666, "grad_norm": 1.5886350524413908, "learning_rate": 8.697754821222148e-06, "loss": 0.0473, "step": 12675 }, { "epoch": 0.052907845215344945, "grad_norm": 2.7276064643455227, "learning_rate": 8.696039659835904e-06, "loss": 0.0644, "step": 12680 }, { "epoch": 0.052928707930335224, "grad_norm": 1.6969317837047817, "learning_rate": 8.694325512717905e-06, "loss": 0.0744, "step": 12685 }, { "epoch": 0.0529495706453255, "grad_norm": 1.0328126724354176, "learning_rate": 8.692612378868894e-06, "loss": 0.0554, "step": 12690 }, { "epoch": 0.05297043336031578, "grad_norm": 1.027865177168345, "learning_rate": 8.69090025729099e-06, "loss": 0.0723, "step": 12695 }, { "epoch": 0.05299129607530605, "grad_norm": 2.3747270740174304, "learning_rate": 8.689189146987682e-06, "loss": 0.0655, "step": 12700 }, { "epoch": 0.05301215879029633, "grad_norm": 1.4232212065495933, "learning_rate": 8.687479046963842e-06, "loss": 0.0646, "step": 12705 }, { "epoch": 0.05303302150528661, "grad_norm": 1.1195744672541603, "learning_rate": 8.685769956225705e-06, "loss": 0.0674, "step": 12710 }, { "epoch": 0.05305388422027689, "grad_norm": 1.1118038776528383, "learning_rate": 8.684061873780873e-06, "loss": 0.0586, "step": 12715 }, { "epoch": 0.05307474693526717, "grad_norm": 1.5984192715215229, "learning_rate": 8.68235479863832e-06, "loss": 0.0845, "step": 12720 }, { "epoch": 0.05309560965025745, "grad_norm": 0.8885253795815501, "learning_rate": 8.680648729808379e-06, "loss": 0.0628, "step": 12725 }, { "epoch": 0.053116472365247726, "grad_norm": 1.792032119984295, "learning_rate": 8.678943666302742e-06, "loss": 0.0642, "step": 12730 }, { "epoch": 0.053137335080238005, "grad_norm": 1.3196916452438363, "learning_rate": 8.677239607134464e-06, "loss": 0.0698, "step": 12735 }, { "epoch": 0.053158197795228276, "grad_norm": 1.212316924603072, "learning_rate": 8.675536551317954e-06, "loss": 0.0577, "step": 12740 }, { "epoch": 0.053179060510218555, "grad_norm": 1.4417613624599175, "learning_rate": 8.673834497868971e-06, "loss": 0.0645, "step": 12745 }, { "epoch": 0.053199923225208834, "grad_norm": 1.4780598534032, "learning_rate": 8.672133445804631e-06, "loss": 0.0691, "step": 12750 }, { "epoch": 0.05322078594019911, "grad_norm": 1.253080121077271, "learning_rate": 8.670433394143397e-06, "loss": 0.0634, "step": 12755 }, { "epoch": 0.05324164865518939, "grad_norm": 1.882466374643993, "learning_rate": 8.668734341905071e-06, "loss": 0.0797, "step": 12760 }, { "epoch": 0.05326251137017967, "grad_norm": 5.334524147862459, "learning_rate": 8.667036288110812e-06, "loss": 0.0674, "step": 12765 }, { "epoch": 0.05328337408516995, "grad_norm": 1.8190175910191844, "learning_rate": 8.665339231783114e-06, "loss": 0.0843, "step": 12770 }, { "epoch": 0.05330423680016023, "grad_norm": 1.285325561491848, "learning_rate": 8.663643171945807e-06, "loss": 0.0657, "step": 12775 }, { "epoch": 0.053325099515150506, "grad_norm": 1.480750170444548, "learning_rate": 8.661948107624064e-06, "loss": 0.0709, "step": 12780 }, { "epoch": 0.05334596223014078, "grad_norm": 1.1025089911911528, "learning_rate": 8.660254037844388e-06, "loss": 0.0545, "step": 12785 }, { "epoch": 0.05336682494513106, "grad_norm": 1.4932371164467593, "learning_rate": 8.658560961634615e-06, "loss": 0.0528, "step": 12790 }, { "epoch": 0.053387687660121336, "grad_norm": 1.0939712317815284, "learning_rate": 8.656868878023917e-06, "loss": 0.0693, "step": 12795 }, { "epoch": 0.053408550375111614, "grad_norm": 1.0324560446694653, "learning_rate": 8.655177786042782e-06, "loss": 0.0529, "step": 12800 }, { "epoch": 0.05342941309010189, "grad_norm": 1.9051305433077521, "learning_rate": 8.653487684723036e-06, "loss": 0.0749, "step": 12805 }, { "epoch": 0.05345027580509217, "grad_norm": 1.6374481286422955, "learning_rate": 8.651798573097818e-06, "loss": 0.0664, "step": 12810 }, { "epoch": 0.05347113852008245, "grad_norm": 1.6127353941016263, "learning_rate": 8.65011045020159e-06, "loss": 0.0785, "step": 12815 }, { "epoch": 0.05349200123507273, "grad_norm": 2.0190272427621987, "learning_rate": 8.648423315070135e-06, "loss": 0.081, "step": 12820 }, { "epoch": 0.05351286395006301, "grad_norm": 0.6708031964413143, "learning_rate": 8.646737166740553e-06, "loss": 0.0543, "step": 12825 }, { "epoch": 0.05353372666505328, "grad_norm": 0.9654997553849005, "learning_rate": 8.645052004251253e-06, "loss": 0.0568, "step": 12830 }, { "epoch": 0.05355458938004356, "grad_norm": 1.4007610133760207, "learning_rate": 8.643367826641954e-06, "loss": 0.0683, "step": 12835 }, { "epoch": 0.05357545209503384, "grad_norm": 0.8758174049093422, "learning_rate": 8.641684632953693e-06, "loss": 0.0639, "step": 12840 }, { "epoch": 0.053596314810024116, "grad_norm": 1.1310525188580924, "learning_rate": 8.640002422228805e-06, "loss": 0.0859, "step": 12845 }, { "epoch": 0.053617177525014395, "grad_norm": 1.369187074454694, "learning_rate": 8.63832119351093e-06, "loss": 0.0657, "step": 12850 }, { "epoch": 0.053638040240004674, "grad_norm": 0.9147538612862249, "learning_rate": 8.636640945845018e-06, "loss": 0.0634, "step": 12855 }, { "epoch": 0.05365890295499495, "grad_norm": 1.1197578609844272, "learning_rate": 8.63496167827731e-06, "loss": 0.056, "step": 12860 }, { "epoch": 0.05367976566998523, "grad_norm": 1.2288884353109928, "learning_rate": 8.633283389855348e-06, "loss": 0.0626, "step": 12865 }, { "epoch": 0.05370062838497551, "grad_norm": 1.5190971588881035, "learning_rate": 8.631606079627972e-06, "loss": 0.0579, "step": 12870 }, { "epoch": 0.05372149109996578, "grad_norm": 1.5825683751428652, "learning_rate": 8.629929746645307e-06, "loss": 0.0714, "step": 12875 }, { "epoch": 0.05374235381495606, "grad_norm": 1.781904863626971, "learning_rate": 8.628254389958783e-06, "loss": 0.0696, "step": 12880 }, { "epoch": 0.05376321652994634, "grad_norm": 1.8617313604861643, "learning_rate": 8.626580008621103e-06, "loss": 0.0653, "step": 12885 }, { "epoch": 0.05378407924493662, "grad_norm": 1.398309449600779, "learning_rate": 8.624906601686265e-06, "loss": 0.061, "step": 12890 }, { "epoch": 0.0538049419599269, "grad_norm": 1.15610541611891, "learning_rate": 8.62323416820955e-06, "loss": 0.0505, "step": 12895 }, { "epoch": 0.053825804674917176, "grad_norm": 1.6463291851397053, "learning_rate": 8.621562707247518e-06, "loss": 0.0655, "step": 12900 }, { "epoch": 0.053846667389907454, "grad_norm": 1.6210126352698369, "learning_rate": 8.619892217858014e-06, "loss": 0.083, "step": 12905 }, { "epoch": 0.05386753010489773, "grad_norm": 1.278577771247205, "learning_rate": 8.618222699100154e-06, "loss": 0.0569, "step": 12910 }, { "epoch": 0.05388839281988801, "grad_norm": 1.3457134851125492, "learning_rate": 8.616554150034335e-06, "loss": 0.0545, "step": 12915 }, { "epoch": 0.053909255534878284, "grad_norm": 1.1574801215436124, "learning_rate": 8.614886569722222e-06, "loss": 0.0902, "step": 12920 }, { "epoch": 0.05393011824986856, "grad_norm": 2.0264400029704452, "learning_rate": 8.61321995722675e-06, "loss": 0.0809, "step": 12925 }, { "epoch": 0.05395098096485884, "grad_norm": 1.245889478037745, "learning_rate": 8.611554311612128e-06, "loss": 0.0557, "step": 12930 }, { "epoch": 0.05397184367984912, "grad_norm": 1.2626523327376677, "learning_rate": 8.609889631943833e-06, "loss": 0.0632, "step": 12935 }, { "epoch": 0.0539927063948394, "grad_norm": 1.7473112525979881, "learning_rate": 8.608225917288596e-06, "loss": 0.0794, "step": 12940 }, { "epoch": 0.05401356910982968, "grad_norm": 1.373211478086715, "learning_rate": 8.606563166714414e-06, "loss": 0.0581, "step": 12945 }, { "epoch": 0.054034431824819956, "grad_norm": 0.931846742448053, "learning_rate": 8.60490137929055e-06, "loss": 0.0685, "step": 12950 }, { "epoch": 0.054055294539810235, "grad_norm": 1.2760798887432436, "learning_rate": 8.603240554087516e-06, "loss": 0.0668, "step": 12955 }, { "epoch": 0.054076157254800514, "grad_norm": 1.1505062521888962, "learning_rate": 8.601580690177087e-06, "loss": 0.0726, "step": 12960 }, { "epoch": 0.054097019969790786, "grad_norm": 1.0713168767469954, "learning_rate": 8.599921786632281e-06, "loss": 0.0532, "step": 12965 }, { "epoch": 0.054117882684781064, "grad_norm": 1.0587208953974832, "learning_rate": 8.598263842527377e-06, "loss": 0.0637, "step": 12970 }, { "epoch": 0.05413874539977134, "grad_norm": 2.472562832348732, "learning_rate": 8.596606856937898e-06, "loss": 0.0762, "step": 12975 }, { "epoch": 0.05415960811476162, "grad_norm": 1.3646221122693143, "learning_rate": 8.594950828940613e-06, "loss": 0.0503, "step": 12980 }, { "epoch": 0.0541804708297519, "grad_norm": 2.8930756952973704, "learning_rate": 8.593295757613542e-06, "loss": 0.0624, "step": 12985 }, { "epoch": 0.05420133354474218, "grad_norm": 1.192441773136856, "learning_rate": 8.591641642035935e-06, "loss": 0.0506, "step": 12990 }, { "epoch": 0.05422219625973246, "grad_norm": 1.7020160582786439, "learning_rate": 8.589988481288293e-06, "loss": 0.0677, "step": 12995 }, { "epoch": 0.05424305897472274, "grad_norm": 1.5628669058258515, "learning_rate": 8.588336274452352e-06, "loss": 0.0661, "step": 13000 }, { "epoch": 0.054263921689713016, "grad_norm": 1.5333023357437792, "learning_rate": 8.58668502061108e-06, "loss": 0.0705, "step": 13005 }, { "epoch": 0.05428478440470329, "grad_norm": 1.4972599637756825, "learning_rate": 8.585034718848688e-06, "loss": 0.0647, "step": 13010 }, { "epoch": 0.054305647119693566, "grad_norm": 1.4369254969315164, "learning_rate": 8.583385368250605e-06, "loss": 0.0674, "step": 13015 }, { "epoch": 0.054326509834683845, "grad_norm": 1.3326402604664442, "learning_rate": 8.581736967903502e-06, "loss": 0.0769, "step": 13020 }, { "epoch": 0.054347372549674124, "grad_norm": 2.0935197343400107, "learning_rate": 8.58008951689527e-06, "loss": 0.0562, "step": 13025 }, { "epoch": 0.0543682352646644, "grad_norm": 1.2087943809809827, "learning_rate": 8.57844301431503e-06, "loss": 0.0732, "step": 13030 }, { "epoch": 0.05438909797965468, "grad_norm": 1.4868783546499802, "learning_rate": 8.576797459253123e-06, "loss": 0.0871, "step": 13035 }, { "epoch": 0.05440996069464496, "grad_norm": 1.7672737235710954, "learning_rate": 8.575152850801108e-06, "loss": 0.0841, "step": 13040 }, { "epoch": 0.05443082340963524, "grad_norm": 1.0430493452882799, "learning_rate": 8.57350918805177e-06, "loss": 0.0631, "step": 13045 }, { "epoch": 0.05445168612462552, "grad_norm": 1.1917913059081697, "learning_rate": 8.571866470099105e-06, "loss": 0.0731, "step": 13050 }, { "epoch": 0.05447254883961579, "grad_norm": 4.7058240036509424, "learning_rate": 8.570224696038329e-06, "loss": 0.0738, "step": 13055 }, { "epoch": 0.05449341155460607, "grad_norm": 1.476663155326667, "learning_rate": 8.568583864965866e-06, "loss": 0.0768, "step": 13060 }, { "epoch": 0.05451427426959635, "grad_norm": 1.868042356552023, "learning_rate": 8.566943975979351e-06, "loss": 0.0753, "step": 13065 }, { "epoch": 0.054535136984586625, "grad_norm": 1.2565927240942534, "learning_rate": 8.565305028177631e-06, "loss": 0.0585, "step": 13070 }, { "epoch": 0.054555999699576904, "grad_norm": 1.4617301569223697, "learning_rate": 8.563667020660755e-06, "loss": 0.0674, "step": 13075 }, { "epoch": 0.05457686241456718, "grad_norm": 1.2633723073930743, "learning_rate": 8.562029952529977e-06, "loss": 0.0448, "step": 13080 }, { "epoch": 0.05459772512955746, "grad_norm": 1.1577081740603714, "learning_rate": 8.560393822887758e-06, "loss": 0.0694, "step": 13085 }, { "epoch": 0.05461858784454774, "grad_norm": 2.337787663551455, "learning_rate": 8.558758630837754e-06, "loss": 0.0574, "step": 13090 }, { "epoch": 0.05463945055953802, "grad_norm": 1.176645609167329, "learning_rate": 8.557124375484817e-06, "loss": 0.0645, "step": 13095 }, { "epoch": 0.05466031327452829, "grad_norm": 1.4712898729030546, "learning_rate": 8.555491055935005e-06, "loss": 0.104, "step": 13100 }, { "epoch": 0.05468117598951857, "grad_norm": 1.535401063278884, "learning_rate": 8.553858671295557e-06, "loss": 0.0606, "step": 13105 }, { "epoch": 0.05470203870450885, "grad_norm": 1.2715201976907766, "learning_rate": 8.552227220674912e-06, "loss": 0.0538, "step": 13110 }, { "epoch": 0.05472290141949913, "grad_norm": 1.801301893992312, "learning_rate": 8.550596703182702e-06, "loss": 0.0753, "step": 13115 }, { "epoch": 0.054743764134489406, "grad_norm": 1.5989636632944428, "learning_rate": 8.548967117929738e-06, "loss": 0.0645, "step": 13120 }, { "epoch": 0.054764626849479685, "grad_norm": 0.8252229951806399, "learning_rate": 8.547338464028019e-06, "loss": 0.06, "step": 13125 }, { "epoch": 0.054785489564469964, "grad_norm": 1.0007493855112704, "learning_rate": 8.545710740590734e-06, "loss": 0.0647, "step": 13130 }, { "epoch": 0.05480635227946024, "grad_norm": 1.2456060308953383, "learning_rate": 8.544083946732247e-06, "loss": 0.0651, "step": 13135 }, { "epoch": 0.05482721499445052, "grad_norm": 0.9954961717592821, "learning_rate": 8.542458081568104e-06, "loss": 0.0568, "step": 13140 }, { "epoch": 0.05484807770944079, "grad_norm": 1.1179876877138373, "learning_rate": 8.54083314421503e-06, "loss": 0.0652, "step": 13145 }, { "epoch": 0.05486894042443107, "grad_norm": 0.6682182104474674, "learning_rate": 8.539209133790922e-06, "loss": 0.0514, "step": 13150 }, { "epoch": 0.05488980313942135, "grad_norm": 2.007530266733478, "learning_rate": 8.537586049414855e-06, "loss": 0.0717, "step": 13155 }, { "epoch": 0.05491066585441163, "grad_norm": 0.9231335171877477, "learning_rate": 8.535963890207073e-06, "loss": 0.0457, "step": 13160 }, { "epoch": 0.05493152856940191, "grad_norm": 1.2862451333275304, "learning_rate": 8.534342655288992e-06, "loss": 0.0683, "step": 13165 }, { "epoch": 0.05495239128439219, "grad_norm": 0.9569497213126651, "learning_rate": 8.532722343783191e-06, "loss": 0.0538, "step": 13170 }, { "epoch": 0.054973253999382465, "grad_norm": 2.4228616327900245, "learning_rate": 8.531102954813418e-06, "loss": 0.0822, "step": 13175 }, { "epoch": 0.054994116714372744, "grad_norm": 1.1569443992267516, "learning_rate": 8.529484487504586e-06, "loss": 0.0722, "step": 13180 }, { "epoch": 0.05501497942936302, "grad_norm": 1.8189268384279562, "learning_rate": 8.52786694098277e-06, "loss": 0.0568, "step": 13185 }, { "epoch": 0.055035842144353295, "grad_norm": 2.094941757441483, "learning_rate": 8.526250314375196e-06, "loss": 0.0663, "step": 13190 }, { "epoch": 0.05505670485934357, "grad_norm": 1.6830453407148, "learning_rate": 8.52463460681026e-06, "loss": 0.0681, "step": 13195 }, { "epoch": 0.05507756757433385, "grad_norm": 0.9181571235636881, "learning_rate": 8.523019817417506e-06, "loss": 0.0738, "step": 13200 }, { "epoch": 0.05509843028932413, "grad_norm": 1.5505637518541253, "learning_rate": 8.521405945327632e-06, "loss": 0.0738, "step": 13205 }, { "epoch": 0.05511929300431441, "grad_norm": 1.505783204694762, "learning_rate": 8.519792989672497e-06, "loss": 0.0648, "step": 13210 }, { "epoch": 0.05514015571930469, "grad_norm": 0.9038909897311448, "learning_rate": 8.518180949585093e-06, "loss": 0.062, "step": 13215 }, { "epoch": 0.05516101843429497, "grad_norm": 1.1272096354053331, "learning_rate": 8.516569824199574e-06, "loss": 0.0712, "step": 13220 }, { "epoch": 0.055181881149285246, "grad_norm": 1.8337356071095403, "learning_rate": 8.514959612651236e-06, "loss": 0.066, "step": 13225 }, { "epoch": 0.055202743864275525, "grad_norm": 1.0231943664063876, "learning_rate": 8.513350314076517e-06, "loss": 0.0682, "step": 13230 }, { "epoch": 0.055223606579265797, "grad_norm": 0.9188031590006753, "learning_rate": 8.511741927612998e-06, "loss": 0.0572, "step": 13235 }, { "epoch": 0.055244469294256075, "grad_norm": 1.1936292223282026, "learning_rate": 8.5101344523994e-06, "loss": 0.0555, "step": 13240 }, { "epoch": 0.055265332009246354, "grad_norm": 1.3207528511206943, "learning_rate": 8.508527887575587e-06, "loss": 0.0578, "step": 13245 }, { "epoch": 0.05528619472423663, "grad_norm": 1.513041253089038, "learning_rate": 8.506922232282548e-06, "loss": 0.0665, "step": 13250 }, { "epoch": 0.05530705743922691, "grad_norm": 1.4845745903406893, "learning_rate": 8.50531748566242e-06, "loss": 0.0543, "step": 13255 }, { "epoch": 0.05532792015421719, "grad_norm": 0.8129675153660609, "learning_rate": 8.50371364685846e-06, "loss": 0.0686, "step": 13260 }, { "epoch": 0.05534878286920747, "grad_norm": 1.3182653044359463, "learning_rate": 8.502110715015063e-06, "loss": 0.1153, "step": 13265 }, { "epoch": 0.05536964558419775, "grad_norm": 1.832428293595103, "learning_rate": 8.500508689277753e-06, "loss": 0.0553, "step": 13270 }, { "epoch": 0.05539050829918803, "grad_norm": 1.2334017976963627, "learning_rate": 8.498907568793171e-06, "loss": 0.0616, "step": 13275 }, { "epoch": 0.0554113710141783, "grad_norm": 0.8562236486394892, "learning_rate": 8.497307352709098e-06, "loss": 0.0669, "step": 13280 }, { "epoch": 0.05543223372916858, "grad_norm": 1.5464803712049366, "learning_rate": 8.495708040174425e-06, "loss": 0.0721, "step": 13285 }, { "epoch": 0.055453096444158856, "grad_norm": 1.3288185143467264, "learning_rate": 8.49410963033917e-06, "loss": 0.0733, "step": 13290 }, { "epoch": 0.055473959159149135, "grad_norm": 0.8683935865971556, "learning_rate": 8.49251212235447e-06, "loss": 0.0496, "step": 13295 }, { "epoch": 0.05549482187413941, "grad_norm": 1.363991086178926, "learning_rate": 8.490915515372573e-06, "loss": 0.0479, "step": 13300 }, { "epoch": 0.05551568458912969, "grad_norm": 1.8518574673730381, "learning_rate": 8.48931980854685e-06, "loss": 0.0724, "step": 13305 }, { "epoch": 0.05553654730411997, "grad_norm": 1.2291930439976575, "learning_rate": 8.487725001031783e-06, "loss": 0.0524, "step": 13310 }, { "epoch": 0.05555741001911025, "grad_norm": 3.7132720695177843, "learning_rate": 8.486131091982963e-06, "loss": 0.0441, "step": 13315 }, { "epoch": 0.05557827273410053, "grad_norm": 0.9991760331800437, "learning_rate": 8.484538080557094e-06, "loss": 0.0754, "step": 13320 }, { "epoch": 0.0555991354490908, "grad_norm": 1.6505873367625463, "learning_rate": 8.482945965911985e-06, "loss": 0.0689, "step": 13325 }, { "epoch": 0.05561999816408108, "grad_norm": 1.2086900973355965, "learning_rate": 8.48135474720655e-06, "loss": 0.051, "step": 13330 }, { "epoch": 0.05564086087907136, "grad_norm": 4.327322598957361, "learning_rate": 8.479764423600812e-06, "loss": 0.068, "step": 13335 }, { "epoch": 0.055661723594061636, "grad_norm": 1.53460929207296, "learning_rate": 8.478174994255891e-06, "loss": 0.0563, "step": 13340 }, { "epoch": 0.055682586309051915, "grad_norm": 1.5922662623839035, "learning_rate": 8.47658645833401e-06, "loss": 0.0772, "step": 13345 }, { "epoch": 0.055703449024042194, "grad_norm": 1.4233151346865904, "learning_rate": 8.47499881499849e-06, "loss": 0.0672, "step": 13350 }, { "epoch": 0.05572431173903247, "grad_norm": 1.4209664545347165, "learning_rate": 8.473412063413745e-06, "loss": 0.0563, "step": 13355 }, { "epoch": 0.05574517445402275, "grad_norm": 1.4855925384549014, "learning_rate": 8.47182620274529e-06, "loss": 0.0608, "step": 13360 }, { "epoch": 0.05576603716901302, "grad_norm": 1.5427565574442494, "learning_rate": 8.47024123215973e-06, "loss": 0.061, "step": 13365 }, { "epoch": 0.0557868998840033, "grad_norm": 1.427785159861837, "learning_rate": 8.468657150824757e-06, "loss": 0.0642, "step": 13370 }, { "epoch": 0.05580776259899358, "grad_norm": 1.508671815574487, "learning_rate": 8.467073957909157e-06, "loss": 0.0645, "step": 13375 }, { "epoch": 0.05582862531398386, "grad_norm": 1.148106510876176, "learning_rate": 8.465491652582808e-06, "loss": 0.0575, "step": 13380 }, { "epoch": 0.05584948802897414, "grad_norm": 0.7162862634442287, "learning_rate": 8.46391023401666e-06, "loss": 0.0515, "step": 13385 }, { "epoch": 0.05587035074396442, "grad_norm": 1.564404191395234, "learning_rate": 8.462329701382758e-06, "loss": 0.0827, "step": 13390 }, { "epoch": 0.055891213458954696, "grad_norm": 1.3709497603014165, "learning_rate": 8.460750053854223e-06, "loss": 0.0536, "step": 13395 }, { "epoch": 0.055912076173944975, "grad_norm": 1.3706329722521504, "learning_rate": 8.459171290605262e-06, "loss": 0.0531, "step": 13400 }, { "epoch": 0.05593293888893525, "grad_norm": 0.9849702214261218, "learning_rate": 8.457593410811153e-06, "loss": 0.0527, "step": 13405 }, { "epoch": 0.055953801603925525, "grad_norm": 1.3242747802084374, "learning_rate": 8.456016413648257e-06, "loss": 0.0693, "step": 13410 }, { "epoch": 0.055974664318915804, "grad_norm": 1.4995386952612755, "learning_rate": 8.454440298294005e-06, "loss": 0.0625, "step": 13415 }, { "epoch": 0.05599552703390608, "grad_norm": 1.4055722920577542, "learning_rate": 8.452865063926902e-06, "loss": 0.0571, "step": 13420 }, { "epoch": 0.05601638974889636, "grad_norm": 2.0844112075422507, "learning_rate": 8.451290709726527e-06, "loss": 0.0775, "step": 13425 }, { "epoch": 0.05603725246388664, "grad_norm": 1.4687891300783023, "learning_rate": 8.449717234873522e-06, "loss": 0.0609, "step": 13430 }, { "epoch": 0.05605811517887692, "grad_norm": 1.2855444889571856, "learning_rate": 8.448144638549602e-06, "loss": 0.0845, "step": 13435 }, { "epoch": 0.0560789778938672, "grad_norm": 1.5613284787177213, "learning_rate": 8.446572919937549e-06, "loss": 0.0761, "step": 13440 }, { "epoch": 0.056099840608857476, "grad_norm": 1.7476773930794787, "learning_rate": 8.445002078221199e-06, "loss": 0.0605, "step": 13445 }, { "epoch": 0.056120703323847755, "grad_norm": 1.8782365642241152, "learning_rate": 8.443432112585463e-06, "loss": 0.0467, "step": 13450 }, { "epoch": 0.05614156603883803, "grad_norm": 1.5266966098276797, "learning_rate": 8.441863022216302e-06, "loss": 0.0623, "step": 13455 }, { "epoch": 0.056162428753828306, "grad_norm": 1.392273681031674, "learning_rate": 8.440294806300742e-06, "loss": 0.0609, "step": 13460 }, { "epoch": 0.056183291468818584, "grad_norm": 0.5619833207693304, "learning_rate": 8.438727464026861e-06, "loss": 0.0671, "step": 13465 }, { "epoch": 0.05620415418380886, "grad_norm": 1.7545634344604986, "learning_rate": 8.437160994583798e-06, "loss": 0.0643, "step": 13470 }, { "epoch": 0.05622501689879914, "grad_norm": 1.1714294715788283, "learning_rate": 8.435595397161739e-06, "loss": 0.073, "step": 13475 }, { "epoch": 0.05624587961378942, "grad_norm": 1.3651344663725518, "learning_rate": 8.434030670951924e-06, "loss": 0.0681, "step": 13480 }, { "epoch": 0.0562667423287797, "grad_norm": 1.6441365421438476, "learning_rate": 8.432466815146646e-06, "loss": 0.0647, "step": 13485 }, { "epoch": 0.05628760504376998, "grad_norm": 2.8628397870010147, "learning_rate": 8.430903828939238e-06, "loss": 0.0651, "step": 13490 }, { "epoch": 0.05630846775876026, "grad_norm": 1.1612468164458323, "learning_rate": 8.429341711524089e-06, "loss": 0.0538, "step": 13495 }, { "epoch": 0.05632933047375053, "grad_norm": 1.5528207652132815, "learning_rate": 8.427780462096625e-06, "loss": 0.0514, "step": 13500 }, { "epoch": 0.05635019318874081, "grad_norm": 1.8261617880332883, "learning_rate": 8.426220079853318e-06, "loss": 0.058, "step": 13505 }, { "epoch": 0.056371055903731086, "grad_norm": 1.7270985796010476, "learning_rate": 8.424660563991678e-06, "loss": 0.0826, "step": 13510 }, { "epoch": 0.056391918618721365, "grad_norm": 1.2673557169616907, "learning_rate": 8.423101913710261e-06, "loss": 0.0561, "step": 13515 }, { "epoch": 0.056412781333711644, "grad_norm": 1.3050193080871109, "learning_rate": 8.421544128208654e-06, "loss": 0.0711, "step": 13520 }, { "epoch": 0.05643364404870192, "grad_norm": 1.7501042266032247, "learning_rate": 8.419987206687481e-06, "loss": 0.0598, "step": 13525 }, { "epoch": 0.0564545067636922, "grad_norm": 1.2421916761318839, "learning_rate": 8.418431148348402e-06, "loss": 0.0646, "step": 13530 }, { "epoch": 0.05647536947868248, "grad_norm": 1.0223675587342462, "learning_rate": 8.41687595239411e-06, "loss": 0.0556, "step": 13535 }, { "epoch": 0.05649623219367276, "grad_norm": 1.4276722410279548, "learning_rate": 8.415321618028326e-06, "loss": 0.0793, "step": 13540 }, { "epoch": 0.05651709490866303, "grad_norm": 1.3652860156963955, "learning_rate": 8.413768144455799e-06, "loss": 0.0816, "step": 13545 }, { "epoch": 0.05653795762365331, "grad_norm": 1.098192661295034, "learning_rate": 8.412215530882311e-06, "loss": 0.0549, "step": 13550 }, { "epoch": 0.05655882033864359, "grad_norm": 1.620351400364764, "learning_rate": 8.410663776514665e-06, "loss": 0.07, "step": 13555 }, { "epoch": 0.05657968305363387, "grad_norm": 1.653718279433507, "learning_rate": 8.409112880560688e-06, "loss": 0.0722, "step": 13560 }, { "epoch": 0.056600545768624146, "grad_norm": 1.0469603844030217, "learning_rate": 8.407562842229229e-06, "loss": 0.0634, "step": 13565 }, { "epoch": 0.056621408483614424, "grad_norm": 1.232164104444681, "learning_rate": 8.40601366073016e-06, "loss": 0.0577, "step": 13570 }, { "epoch": 0.0566422711986047, "grad_norm": 0.6071064556927919, "learning_rate": 8.40446533527437e-06, "loss": 0.0489, "step": 13575 }, { "epoch": 0.05666313391359498, "grad_norm": 1.619313748932909, "learning_rate": 8.402917865073762e-06, "loss": 0.0725, "step": 13580 }, { "epoch": 0.05668399662858526, "grad_norm": 2.206132780243311, "learning_rate": 8.401371249341261e-06, "loss": 0.064, "step": 13585 }, { "epoch": 0.05670485934357553, "grad_norm": 1.2489224629259774, "learning_rate": 8.399825487290803e-06, "loss": 0.0628, "step": 13590 }, { "epoch": 0.05672572205856581, "grad_norm": 1.2745913931157633, "learning_rate": 8.398280578137333e-06, "loss": 0.0536, "step": 13595 }, { "epoch": 0.05674658477355609, "grad_norm": 1.2077504808039687, "learning_rate": 8.39673652109681e-06, "loss": 0.048, "step": 13600 }, { "epoch": 0.05676744748854637, "grad_norm": 1.3594659860948695, "learning_rate": 8.395193315386199e-06, "loss": 0.0663, "step": 13605 }, { "epoch": 0.05678831020353665, "grad_norm": 1.1379914361315853, "learning_rate": 8.393650960223472e-06, "loss": 0.0641, "step": 13610 }, { "epoch": 0.056809172918526926, "grad_norm": 2.1251780194261545, "learning_rate": 8.392109454827609e-06, "loss": 0.095, "step": 13615 }, { "epoch": 0.056830035633517205, "grad_norm": 1.2998111196194935, "learning_rate": 8.390568798418597e-06, "loss": 0.0652, "step": 13620 }, { "epoch": 0.056850898348507484, "grad_norm": 1.791195156204558, "learning_rate": 8.389028990217411e-06, "loss": 0.0666, "step": 13625 }, { "epoch": 0.05687176106349776, "grad_norm": 1.7161722523946064, "learning_rate": 8.387490029446043e-06, "loss": 0.0636, "step": 13630 }, { "epoch": 0.056892623778488034, "grad_norm": 2.1422444289664746, "learning_rate": 8.385951915327476e-06, "loss": 0.0843, "step": 13635 }, { "epoch": 0.05691348649347831, "grad_norm": 1.3946145869735256, "learning_rate": 8.384414647085688e-06, "loss": 0.0594, "step": 13640 }, { "epoch": 0.05693434920846859, "grad_norm": 1.2264061064059557, "learning_rate": 8.382878223945652e-06, "loss": 0.0638, "step": 13645 }, { "epoch": 0.05695521192345887, "grad_norm": 1.964556246701527, "learning_rate": 8.381342645133343e-06, "loss": 0.072, "step": 13650 }, { "epoch": 0.05697607463844915, "grad_norm": 1.942047277024423, "learning_rate": 8.37980790987572e-06, "loss": 0.0807, "step": 13655 }, { "epoch": 0.05699693735343943, "grad_norm": 1.2207133851827106, "learning_rate": 8.37827401740074e-06, "loss": 0.0664, "step": 13660 }, { "epoch": 0.05701780006842971, "grad_norm": 1.3147365025772573, "learning_rate": 8.376740966937337e-06, "loss": 0.0644, "step": 13665 }, { "epoch": 0.057038662783419986, "grad_norm": 1.4699354557720565, "learning_rate": 8.375208757715447e-06, "loss": 0.0647, "step": 13670 }, { "epoch": 0.057059525498410264, "grad_norm": 1.097334737594751, "learning_rate": 8.373677388965976e-06, "loss": 0.0554, "step": 13675 }, { "epoch": 0.057080388213400536, "grad_norm": 1.623802120850447, "learning_rate": 8.372146859920828e-06, "loss": 0.0652, "step": 13680 }, { "epoch": 0.057101250928390815, "grad_norm": 1.183894078366817, "learning_rate": 8.370617169812883e-06, "loss": 0.0549, "step": 13685 }, { "epoch": 0.057122113643381094, "grad_norm": 1.5706051568718447, "learning_rate": 8.369088317875998e-06, "loss": 0.0587, "step": 13690 }, { "epoch": 0.05714297635837137, "grad_norm": 1.003985440312083, "learning_rate": 8.36756030334502e-06, "loss": 0.0603, "step": 13695 }, { "epoch": 0.05716383907336165, "grad_norm": 1.2835077137713824, "learning_rate": 8.366033125455762e-06, "loss": 0.065, "step": 13700 }, { "epoch": 0.05718470178835193, "grad_norm": 1.2330863737616355, "learning_rate": 8.364506783445019e-06, "loss": 0.06, "step": 13705 }, { "epoch": 0.05720556450334221, "grad_norm": 1.0327266839019253, "learning_rate": 8.362981276550561e-06, "loss": 0.05, "step": 13710 }, { "epoch": 0.05722642721833249, "grad_norm": 1.8332703389196532, "learning_rate": 8.361456604011127e-06, "loss": 0.0781, "step": 13715 }, { "epoch": 0.057247289933322766, "grad_norm": 1.6064922385682017, "learning_rate": 8.359932765066428e-06, "loss": 0.0704, "step": 13720 }, { "epoch": 0.05726815264831304, "grad_norm": 1.7371380604477649, "learning_rate": 8.358409758957153e-06, "loss": 0.0563, "step": 13725 }, { "epoch": 0.05728901536330332, "grad_norm": 0.9405848755114031, "learning_rate": 8.356887584924942e-06, "loss": 0.0555, "step": 13730 }, { "epoch": 0.057309878078293595, "grad_norm": 0.7455675070409985, "learning_rate": 8.355366242212417e-06, "loss": 0.0503, "step": 13735 }, { "epoch": 0.057330740793283874, "grad_norm": 1.431958407261957, "learning_rate": 8.353845730063158e-06, "loss": 0.0623, "step": 13740 }, { "epoch": 0.05735160350827415, "grad_norm": 0.8550404277732175, "learning_rate": 8.352326047721708e-06, "loss": 0.0434, "step": 13745 }, { "epoch": 0.05737246622326443, "grad_norm": 1.6059945518630192, "learning_rate": 8.350807194433573e-06, "loss": 0.0655, "step": 13750 }, { "epoch": 0.05739332893825471, "grad_norm": 1.5872899548221486, "learning_rate": 8.349289169445222e-06, "loss": 0.0613, "step": 13755 }, { "epoch": 0.05741419165324499, "grad_norm": 1.0409630210609837, "learning_rate": 8.347771972004074e-06, "loss": 0.0721, "step": 13760 }, { "epoch": 0.05743505436823527, "grad_norm": 1.8288891267446952, "learning_rate": 8.346255601358515e-06, "loss": 0.0664, "step": 13765 }, { "epoch": 0.05745591708322554, "grad_norm": 1.3688654251471792, "learning_rate": 8.344740056757878e-06, "loss": 0.0992, "step": 13770 }, { "epoch": 0.05747677979821582, "grad_norm": 1.420820211974812, "learning_rate": 8.343225337452454e-06, "loss": 0.0722, "step": 13775 }, { "epoch": 0.0574976425132061, "grad_norm": 1.2755877652871548, "learning_rate": 8.341711442693491e-06, "loss": 0.0625, "step": 13780 }, { "epoch": 0.057518505228196376, "grad_norm": 1.0875899736300332, "learning_rate": 8.340198371733177e-06, "loss": 0.0563, "step": 13785 }, { "epoch": 0.057539367943186655, "grad_norm": 1.3015900598099657, "learning_rate": 8.338686123824654e-06, "loss": 0.0638, "step": 13790 }, { "epoch": 0.057560230658176934, "grad_norm": 1.1470169956858511, "learning_rate": 8.337174698222016e-06, "loss": 0.0641, "step": 13795 }, { "epoch": 0.05758109337316721, "grad_norm": 0.9983076054085597, "learning_rate": 8.335664094180296e-06, "loss": 0.0655, "step": 13800 }, { "epoch": 0.05760195608815749, "grad_norm": 1.0350893266554129, "learning_rate": 8.334154310955476e-06, "loss": 0.0625, "step": 13805 }, { "epoch": 0.05762281880314777, "grad_norm": 1.7997089942696594, "learning_rate": 8.33264534780448e-06, "loss": 0.0592, "step": 13810 }, { "epoch": 0.05764368151813804, "grad_norm": 1.139390829289658, "learning_rate": 8.331137203985173e-06, "loss": 0.067, "step": 13815 }, { "epoch": 0.05766454423312832, "grad_norm": 3.547626578361589, "learning_rate": 8.32962987875636e-06, "loss": 0.1034, "step": 13820 }, { "epoch": 0.0576854069481186, "grad_norm": 3.0799934072315516, "learning_rate": 8.328123371377786e-06, "loss": 0.0692, "step": 13825 }, { "epoch": 0.05770626966310888, "grad_norm": 3.0052683740536392, "learning_rate": 8.326617681110129e-06, "loss": 0.0548, "step": 13830 }, { "epoch": 0.05772713237809916, "grad_norm": 1.3064335309971762, "learning_rate": 8.325112807215005e-06, "loss": 0.0536, "step": 13835 }, { "epoch": 0.057747995093089435, "grad_norm": 1.5442203802437597, "learning_rate": 8.323608748954965e-06, "loss": 0.055, "step": 13840 }, { "epoch": 0.057768857808079714, "grad_norm": 1.145737651707971, "learning_rate": 8.322105505593487e-06, "loss": 0.0675, "step": 13845 }, { "epoch": 0.05778972052306999, "grad_norm": 1.3000035010151254, "learning_rate": 8.320603076394987e-06, "loss": 0.0563, "step": 13850 }, { "epoch": 0.05781058323806027, "grad_norm": 1.9260085564850602, "learning_rate": 8.319101460624805e-06, "loss": 0.0619, "step": 13855 }, { "epoch": 0.05783144595305054, "grad_norm": 1.7196079881187172, "learning_rate": 8.317600657549214e-06, "loss": 0.0684, "step": 13860 }, { "epoch": 0.05785230866804082, "grad_norm": 1.2045071621705505, "learning_rate": 8.316100666435404e-06, "loss": 0.0636, "step": 13865 }, { "epoch": 0.0578731713830311, "grad_norm": 1.2561442660386348, "learning_rate": 8.3146014865515e-06, "loss": 0.0782, "step": 13870 }, { "epoch": 0.05789403409802138, "grad_norm": 1.01634245961559, "learning_rate": 8.313103117166544e-06, "loss": 0.0564, "step": 13875 }, { "epoch": 0.05791489681301166, "grad_norm": 1.613652295255335, "learning_rate": 8.3116055575505e-06, "loss": 0.0702, "step": 13880 }, { "epoch": 0.05793575952800194, "grad_norm": 0.8433542415673672, "learning_rate": 8.310108806974256e-06, "loss": 0.0632, "step": 13885 }, { "epoch": 0.057956622242992216, "grad_norm": 1.8272484127663302, "learning_rate": 8.308612864709615e-06, "loss": 0.0757, "step": 13890 }, { "epoch": 0.057977484957982495, "grad_norm": 1.648224843168512, "learning_rate": 8.307117730029302e-06, "loss": 0.047, "step": 13895 }, { "epoch": 0.05799834767297277, "grad_norm": 1.2257063147925245, "learning_rate": 8.30562340220695e-06, "loss": 0.0731, "step": 13900 }, { "epoch": 0.058019210387963045, "grad_norm": 2.3485807785770647, "learning_rate": 8.304129880517112e-06, "loss": 0.079, "step": 13905 }, { "epoch": 0.058040073102953324, "grad_norm": 1.0574816931201978, "learning_rate": 8.302637164235254e-06, "loss": 0.0672, "step": 13910 }, { "epoch": 0.0580609358179436, "grad_norm": 1.1886605824981122, "learning_rate": 8.30114525263775e-06, "loss": 0.0582, "step": 13915 }, { "epoch": 0.05808179853293388, "grad_norm": 1.0095396543730086, "learning_rate": 8.299654145001888e-06, "loss": 0.0559, "step": 13920 }, { "epoch": 0.05810266124792416, "grad_norm": 1.1277744418320201, "learning_rate": 8.298163840605863e-06, "loss": 0.0586, "step": 13925 }, { "epoch": 0.05812352396291444, "grad_norm": 0.816755931422259, "learning_rate": 8.296674338728776e-06, "loss": 0.0517, "step": 13930 }, { "epoch": 0.05814438667790472, "grad_norm": 0.8547955730340947, "learning_rate": 8.295185638650628e-06, "loss": 0.0528, "step": 13935 }, { "epoch": 0.058165249392894997, "grad_norm": 1.67814014677209, "learning_rate": 8.293697739652336e-06, "loss": 0.0701, "step": 13940 }, { "epoch": 0.05818611210788527, "grad_norm": 1.3507251983471653, "learning_rate": 8.29221064101571e-06, "loss": 0.0662, "step": 13945 }, { "epoch": 0.05820697482287555, "grad_norm": 9.345202207681696, "learning_rate": 8.290724342023468e-06, "loss": 0.0766, "step": 13950 }, { "epoch": 0.058227837537865826, "grad_norm": 1.144481862983142, "learning_rate": 8.289238841959222e-06, "loss": 0.0564, "step": 13955 }, { "epoch": 0.058248700252856105, "grad_norm": 1.4879593076742306, "learning_rate": 8.28775414010748e-06, "loss": 0.0579, "step": 13960 }, { "epoch": 0.05826956296784638, "grad_norm": 1.6866724651113847, "learning_rate": 8.286270235753657e-06, "loss": 0.0775, "step": 13965 }, { "epoch": 0.05829042568283666, "grad_norm": 1.1133782979115143, "learning_rate": 8.284787128184056e-06, "loss": 0.0695, "step": 13970 }, { "epoch": 0.05831128839782694, "grad_norm": 1.5083662553677204, "learning_rate": 8.283304816685873e-06, "loss": 0.0588, "step": 13975 }, { "epoch": 0.05833215111281722, "grad_norm": 1.5154289247452655, "learning_rate": 8.2818233005472e-06, "loss": 0.0729, "step": 13980 }, { "epoch": 0.0583530138278075, "grad_norm": 1.0228792835953766, "learning_rate": 8.280342579057018e-06, "loss": 0.0528, "step": 13985 }, { "epoch": 0.05837387654279777, "grad_norm": 0.8700131673068173, "learning_rate": 8.278862651505199e-06, "loss": 0.0588, "step": 13990 }, { "epoch": 0.05839473925778805, "grad_norm": 1.1699741462133584, "learning_rate": 8.277383517182498e-06, "loss": 0.0591, "step": 13995 }, { "epoch": 0.05841560197277833, "grad_norm": 1.2744703503127952, "learning_rate": 8.275905175380567e-06, "loss": 0.06, "step": 14000 }, { "epoch": 0.058436464687768606, "grad_norm": 1.3803464225275164, "learning_rate": 8.274427625391934e-06, "loss": 0.0591, "step": 14005 }, { "epoch": 0.058457327402758885, "grad_norm": 2.160964084952754, "learning_rate": 8.272950866510013e-06, "loss": 0.0571, "step": 14010 }, { "epoch": 0.058478190117749164, "grad_norm": 1.5762256709972355, "learning_rate": 8.271474898029102e-06, "loss": 0.0786, "step": 14015 }, { "epoch": 0.05849905283273944, "grad_norm": 1.2861686974068738, "learning_rate": 8.269999719244384e-06, "loss": 0.054, "step": 14020 }, { "epoch": 0.05851991554772972, "grad_norm": 1.9497427300914434, "learning_rate": 8.26852532945191e-06, "loss": 0.0612, "step": 14025 }, { "epoch": 0.05854077826272, "grad_norm": 0.9122153693170179, "learning_rate": 8.26705172794862e-06, "loss": 0.0602, "step": 14030 }, { "epoch": 0.05856164097771027, "grad_norm": 1.4694283591782835, "learning_rate": 8.265578914032326e-06, "loss": 0.0595, "step": 14035 }, { "epoch": 0.05858250369270055, "grad_norm": 1.2618527832121433, "learning_rate": 8.264106887001714e-06, "loss": 0.0445, "step": 14040 }, { "epoch": 0.05860336640769083, "grad_norm": 1.507901455428996, "learning_rate": 8.262635646156352e-06, "loss": 0.0582, "step": 14045 }, { "epoch": 0.05862422912268111, "grad_norm": 2.7499749290340234, "learning_rate": 8.26116519079667e-06, "loss": 0.0562, "step": 14050 }, { "epoch": 0.05864509183767139, "grad_norm": 1.5308786669082508, "learning_rate": 8.259695520223976e-06, "loss": 0.0806, "step": 14055 }, { "epoch": 0.058665954552661666, "grad_norm": 2.4210038745101787, "learning_rate": 8.258226633740447e-06, "loss": 0.0896, "step": 14060 }, { "epoch": 0.058686817267651945, "grad_norm": 1.3220149628661864, "learning_rate": 8.256758530649127e-06, "loss": 0.044, "step": 14065 }, { "epoch": 0.05870767998264222, "grad_norm": 1.1725110161238645, "learning_rate": 8.255291210253922e-06, "loss": 0.0648, "step": 14070 }, { "epoch": 0.0587285426976325, "grad_norm": 1.4313848694457196, "learning_rate": 8.253824671859616e-06, "loss": 0.0585, "step": 14075 }, { "epoch": 0.058749405412622774, "grad_norm": 1.0984253116799894, "learning_rate": 8.252358914771847e-06, "loss": 0.0517, "step": 14080 }, { "epoch": 0.05877026812761305, "grad_norm": 5.3815497696672, "learning_rate": 8.250893938297119e-06, "loss": 0.0672, "step": 14085 }, { "epoch": 0.05879113084260333, "grad_norm": 1.760689742710361, "learning_rate": 8.249429741742797e-06, "loss": 0.0564, "step": 14090 }, { "epoch": 0.05881199355759361, "grad_norm": 1.5948011956950443, "learning_rate": 8.247966324417105e-06, "loss": 0.0717, "step": 14095 }, { "epoch": 0.05883285627258389, "grad_norm": 2.9328779390407367, "learning_rate": 8.246503685629131e-06, "loss": 0.0529, "step": 14100 }, { "epoch": 0.05885371898757417, "grad_norm": 0.8502950807795071, "learning_rate": 8.245041824688815e-06, "loss": 0.0759, "step": 14105 }, { "epoch": 0.058874581702564446, "grad_norm": 1.8130505260875858, "learning_rate": 8.243580740906953e-06, "loss": 0.0773, "step": 14110 }, { "epoch": 0.058895444417554725, "grad_norm": 1.3830188112624762, "learning_rate": 8.242120433595198e-06, "loss": 0.0562, "step": 14115 }, { "epoch": 0.058916307132545004, "grad_norm": 1.5880354057281374, "learning_rate": 8.240660902066058e-06, "loss": 0.0507, "step": 14120 }, { "epoch": 0.058937169847535276, "grad_norm": 0.8125151213739935, "learning_rate": 8.239202145632886e-06, "loss": 0.0495, "step": 14125 }, { "epoch": 0.058958032562525554, "grad_norm": 1.4137348392772056, "learning_rate": 8.23774416360989e-06, "loss": 0.0672, "step": 14130 }, { "epoch": 0.05897889527751583, "grad_norm": 3.1223859591719503, "learning_rate": 8.236286955312135e-06, "loss": 0.0725, "step": 14135 }, { "epoch": 0.05899975799250611, "grad_norm": 1.6295213875496137, "learning_rate": 8.234830520055514e-06, "loss": 0.0596, "step": 14140 }, { "epoch": 0.05902062070749639, "grad_norm": 2.2119784199311665, "learning_rate": 8.233374857156787e-06, "loss": 0.0688, "step": 14145 }, { "epoch": 0.05904148342248667, "grad_norm": 0.7647083053521202, "learning_rate": 8.231919965933547e-06, "loss": 0.0645, "step": 14150 }, { "epoch": 0.05906234613747695, "grad_norm": 1.0210319382603545, "learning_rate": 8.230465845704237e-06, "loss": 0.0492, "step": 14155 }, { "epoch": 0.05908320885246723, "grad_norm": 0.9273217751638728, "learning_rate": 8.229012495788137e-06, "loss": 0.0576, "step": 14160 }, { "epoch": 0.059104071567457506, "grad_norm": 1.2977969549047155, "learning_rate": 8.22755991550537e-06, "loss": 0.0647, "step": 14165 }, { "epoch": 0.05912493428244778, "grad_norm": 1.1665820363499506, "learning_rate": 8.226108104176905e-06, "loss": 0.0732, "step": 14170 }, { "epoch": 0.059145796997438056, "grad_norm": 2.1414488365696607, "learning_rate": 8.224657061124535e-06, "loss": 0.0577, "step": 14175 }, { "epoch": 0.059166659712428335, "grad_norm": 3.9376417414027327, "learning_rate": 8.22320678567091e-06, "loss": 0.0684, "step": 14180 }, { "epoch": 0.059187522427418614, "grad_norm": 5.580121478221672, "learning_rate": 8.221757277139498e-06, "loss": 0.059, "step": 14185 }, { "epoch": 0.05920838514240889, "grad_norm": 1.697543298000446, "learning_rate": 8.220308534854611e-06, "loss": 0.0665, "step": 14190 }, { "epoch": 0.05922924785739917, "grad_norm": 1.4564933117278018, "learning_rate": 8.218860558141389e-06, "loss": 0.0627, "step": 14195 }, { "epoch": 0.05925011057238945, "grad_norm": 1.5380794652273975, "learning_rate": 8.217413346325811e-06, "loss": 0.061, "step": 14200 }, { "epoch": 0.05927097328737973, "grad_norm": 1.4972704837261357, "learning_rate": 8.215966898734679e-06, "loss": 0.0548, "step": 14205 }, { "epoch": 0.05929183600237001, "grad_norm": 1.3486245593276562, "learning_rate": 8.214521214695627e-06, "loss": 0.0545, "step": 14210 }, { "epoch": 0.05931269871736028, "grad_norm": 0.9147054168810212, "learning_rate": 8.213076293537117e-06, "loss": 0.06, "step": 14215 }, { "epoch": 0.05933356143235056, "grad_norm": 1.253326873900794, "learning_rate": 8.211632134588438e-06, "loss": 0.0428, "step": 14220 }, { "epoch": 0.05935442414734084, "grad_norm": 2.572772955368442, "learning_rate": 8.210188737179706e-06, "loss": 0.0716, "step": 14225 }, { "epoch": 0.059375286862331116, "grad_norm": 1.4050508150575542, "learning_rate": 8.208746100641855e-06, "loss": 0.0614, "step": 14230 }, { "epoch": 0.059396149577321394, "grad_norm": 1.5365582903745787, "learning_rate": 8.20730422430665e-06, "loss": 0.0736, "step": 14235 }, { "epoch": 0.05941701229231167, "grad_norm": 1.448496293339434, "learning_rate": 8.205863107506668e-06, "loss": 0.0622, "step": 14240 }, { "epoch": 0.05943787500730195, "grad_norm": 0.9840536204175331, "learning_rate": 8.204422749575314e-06, "loss": 0.0662, "step": 14245 }, { "epoch": 0.05945873772229223, "grad_norm": 1.7526109700337262, "learning_rate": 8.202983149846807e-06, "loss": 0.0565, "step": 14250 }, { "epoch": 0.05947960043728251, "grad_norm": 1.9036093923475417, "learning_rate": 8.201544307656188e-06, "loss": 0.0813, "step": 14255 }, { "epoch": 0.05950046315227278, "grad_norm": 0.9956235056516393, "learning_rate": 8.200106222339306e-06, "loss": 0.0554, "step": 14260 }, { "epoch": 0.05952132586726306, "grad_norm": 1.824485081076898, "learning_rate": 8.198668893232835e-06, "loss": 0.0542, "step": 14265 }, { "epoch": 0.05954218858225334, "grad_norm": 1.875486806523392, "learning_rate": 8.197232319674255e-06, "loss": 0.0573, "step": 14270 }, { "epoch": 0.05956305129724362, "grad_norm": 1.4719954325492621, "learning_rate": 8.195796501001863e-06, "loss": 0.074, "step": 14275 }, { "epoch": 0.059583914012233896, "grad_norm": 1.7690889814351378, "learning_rate": 8.194361436554766e-06, "loss": 0.0817, "step": 14280 }, { "epoch": 0.059604776727224175, "grad_norm": 1.0610665046885202, "learning_rate": 8.192927125672875e-06, "loss": 0.064, "step": 14285 }, { "epoch": 0.059625639442214454, "grad_norm": 1.4856141533359868, "learning_rate": 8.19149356769692e-06, "loss": 0.0849, "step": 14290 }, { "epoch": 0.05964650215720473, "grad_norm": 0.5419508032113682, "learning_rate": 8.190060761968429e-06, "loss": 0.0516, "step": 14295 }, { "epoch": 0.05966736487219501, "grad_norm": 1.2565897163557742, "learning_rate": 8.188628707829738e-06, "loss": 0.0699, "step": 14300 }, { "epoch": 0.05968822758718528, "grad_norm": 2.8637935694231613, "learning_rate": 8.187197404623995e-06, "loss": 0.0547, "step": 14305 }, { "epoch": 0.05970909030217556, "grad_norm": 1.2113313333569518, "learning_rate": 8.185766851695141e-06, "loss": 0.0496, "step": 14310 }, { "epoch": 0.05972995301716584, "grad_norm": 1.0762788889732267, "learning_rate": 8.184337048387928e-06, "loss": 0.0533, "step": 14315 }, { "epoch": 0.05975081573215612, "grad_norm": 1.5674390904419893, "learning_rate": 8.182907994047899e-06, "loss": 0.0739, "step": 14320 }, { "epoch": 0.0597716784471464, "grad_norm": 1.1988437567112997, "learning_rate": 8.181479688021406e-06, "loss": 0.0526, "step": 14325 }, { "epoch": 0.05979254116213668, "grad_norm": 1.6613553667695309, "learning_rate": 8.180052129655596e-06, "loss": 0.0597, "step": 14330 }, { "epoch": 0.059813403877126955, "grad_norm": 2.0957520409110106, "learning_rate": 8.17862531829841e-06, "loss": 0.0551, "step": 14335 }, { "epoch": 0.059834266592117234, "grad_norm": 0.8811642966803844, "learning_rate": 8.177199253298592e-06, "loss": 0.0747, "step": 14340 }, { "epoch": 0.05985512930710751, "grad_norm": 1.1643962028474024, "learning_rate": 8.17577393400567e-06, "loss": 0.0681, "step": 14345 }, { "epoch": 0.059875992022097785, "grad_norm": 1.4703051392212025, "learning_rate": 8.174349359769976e-06, "loss": 0.0492, "step": 14350 }, { "epoch": 0.059896854737088064, "grad_norm": 1.4533233453336787, "learning_rate": 8.17292552994263e-06, "loss": 0.0714, "step": 14355 }, { "epoch": 0.05991771745207834, "grad_norm": 2.214814217005099, "learning_rate": 8.171502443875542e-06, "loss": 0.0741, "step": 14360 }, { "epoch": 0.05993858016706862, "grad_norm": 0.8874090793982411, "learning_rate": 8.170080100921413e-06, "loss": 0.0713, "step": 14365 }, { "epoch": 0.0599594428820589, "grad_norm": 1.0871818342457469, "learning_rate": 8.168658500433728e-06, "loss": 0.054, "step": 14370 }, { "epoch": 0.05998030559704918, "grad_norm": 2.2109453340412255, "learning_rate": 8.167237641766767e-06, "loss": 0.0599, "step": 14375 }, { "epoch": 0.06000116831203946, "grad_norm": 1.9715560523019258, "learning_rate": 8.165817524275592e-06, "loss": 0.0718, "step": 14380 }, { "epoch": 0.060022031027029736, "grad_norm": 1.0653077180243464, "learning_rate": 8.164398147316047e-06, "loss": 0.0492, "step": 14385 }, { "epoch": 0.060042893742020015, "grad_norm": 1.0357287745674606, "learning_rate": 8.162979510244762e-06, "loss": 0.0504, "step": 14390 }, { "epoch": 0.06006375645701029, "grad_norm": 1.7201628089850878, "learning_rate": 8.16156161241915e-06, "loss": 0.0565, "step": 14395 }, { "epoch": 0.060084619172000565, "grad_norm": 1.8989946581101242, "learning_rate": 8.160144453197408e-06, "loss": 0.0836, "step": 14400 }, { "epoch": 0.060105481886990844, "grad_norm": 1.0338224542510748, "learning_rate": 8.158728031938504e-06, "loss": 0.0582, "step": 14405 }, { "epoch": 0.06012634460198112, "grad_norm": 0.7748686517728108, "learning_rate": 8.157312348002194e-06, "loss": 0.0591, "step": 14410 }, { "epoch": 0.0601472073169714, "grad_norm": 1.0897963032020073, "learning_rate": 8.155897400749003e-06, "loss": 0.0735, "step": 14415 }, { "epoch": 0.06016807003196168, "grad_norm": 1.5650056924534892, "learning_rate": 8.15448318954024e-06, "loss": 0.0549, "step": 14420 }, { "epoch": 0.06018893274695196, "grad_norm": 1.404073760050666, "learning_rate": 8.153069713737983e-06, "loss": 0.0707, "step": 14425 }, { "epoch": 0.06020979546194224, "grad_norm": 1.0461589683824375, "learning_rate": 8.151656972705088e-06, "loss": 0.049, "step": 14430 }, { "epoch": 0.06023065817693252, "grad_norm": 1.237473574326247, "learning_rate": 8.15024496580518e-06, "loss": 0.073, "step": 14435 }, { "epoch": 0.06025152089192279, "grad_norm": 1.0111225609395231, "learning_rate": 8.148833692402659e-06, "loss": 0.0613, "step": 14440 }, { "epoch": 0.06027238360691307, "grad_norm": 2.035948886860385, "learning_rate": 8.14742315186269e-06, "loss": 0.0662, "step": 14445 }, { "epoch": 0.060293246321903346, "grad_norm": 1.4149500889535236, "learning_rate": 8.146013343551214e-06, "loss": 0.0675, "step": 14450 }, { "epoch": 0.060314109036893625, "grad_norm": 1.3177406084882304, "learning_rate": 8.144604266834934e-06, "loss": 0.0572, "step": 14455 }, { "epoch": 0.060334971751883903, "grad_norm": 1.3618402876382207, "learning_rate": 8.143195921081321e-06, "loss": 0.0578, "step": 14460 }, { "epoch": 0.06035583446687418, "grad_norm": 2.1951690910422506, "learning_rate": 8.14178830565861e-06, "loss": 0.0744, "step": 14465 }, { "epoch": 0.06037669718186446, "grad_norm": 1.3477355964585958, "learning_rate": 8.140381419935807e-06, "loss": 0.0676, "step": 14470 }, { "epoch": 0.06039755989685474, "grad_norm": 0.659772861865666, "learning_rate": 8.138975263282671e-06, "loss": 0.0461, "step": 14475 }, { "epoch": 0.06041842261184502, "grad_norm": 1.581812426653295, "learning_rate": 8.13756983506973e-06, "loss": 0.0742, "step": 14480 }, { "epoch": 0.06043928532683529, "grad_norm": 1.3552333625113655, "learning_rate": 8.13616513466827e-06, "loss": 0.0599, "step": 14485 }, { "epoch": 0.06046014804182557, "grad_norm": 1.1408437469435961, "learning_rate": 8.134761161450338e-06, "loss": 0.0561, "step": 14490 }, { "epoch": 0.06048101075681585, "grad_norm": 1.0634566133481664, "learning_rate": 8.133357914788737e-06, "loss": 0.0492, "step": 14495 }, { "epoch": 0.06050187347180613, "grad_norm": 1.0163164290020774, "learning_rate": 8.131955394057026e-06, "loss": 0.0634, "step": 14500 }, { "epoch": 0.060522736186796405, "grad_norm": 1.076758150739463, "learning_rate": 8.130553598629527e-06, "loss": 0.0623, "step": 14505 }, { "epoch": 0.060543598901786684, "grad_norm": 1.3712776082665779, "learning_rate": 8.129152527881308e-06, "loss": 0.0492, "step": 14510 }, { "epoch": 0.06056446161677696, "grad_norm": 1.118216085900672, "learning_rate": 8.127752181188193e-06, "loss": 0.0778, "step": 14515 }, { "epoch": 0.06058532433176724, "grad_norm": 1.8642097461460012, "learning_rate": 8.126352557926765e-06, "loss": 0.0614, "step": 14520 }, { "epoch": 0.06060618704675751, "grad_norm": 1.3084040023611305, "learning_rate": 8.124953657474347e-06, "loss": 0.0673, "step": 14525 }, { "epoch": 0.06062704976174779, "grad_norm": 1.6034090748501428, "learning_rate": 8.123555479209022e-06, "loss": 0.079, "step": 14530 }, { "epoch": 0.06064791247673807, "grad_norm": 1.3472596608935727, "learning_rate": 8.122158022509616e-06, "loss": 0.0611, "step": 14535 }, { "epoch": 0.06066877519172835, "grad_norm": 1.670268670323362, "learning_rate": 8.120761286755706e-06, "loss": 0.0521, "step": 14540 }, { "epoch": 0.06068963790671863, "grad_norm": 1.0539614507500021, "learning_rate": 8.119365271327614e-06, "loss": 0.0563, "step": 14545 }, { "epoch": 0.06071050062170891, "grad_norm": 1.4153858990384798, "learning_rate": 8.117969975606405e-06, "loss": 0.0578, "step": 14550 }, { "epoch": 0.060731363336699186, "grad_norm": 1.6045137045496891, "learning_rate": 8.116575398973897e-06, "loss": 0.0735, "step": 14555 }, { "epoch": 0.060752226051689465, "grad_norm": 3.7642716309675, "learning_rate": 8.115181540812639e-06, "loss": 0.0524, "step": 14560 }, { "epoch": 0.06077308876667974, "grad_norm": 0.7902527601700273, "learning_rate": 8.11378840050593e-06, "loss": 0.0629, "step": 14565 }, { "epoch": 0.060793951481670015, "grad_norm": 1.7117718886260742, "learning_rate": 8.11239597743781e-06, "loss": 0.0557, "step": 14570 }, { "epoch": 0.060814814196660294, "grad_norm": 1.8415438605345464, "learning_rate": 8.111004270993052e-06, "loss": 0.0729, "step": 14575 }, { "epoch": 0.06083567691165057, "grad_norm": 2.591619026202506, "learning_rate": 8.109613280557177e-06, "loss": 0.0626, "step": 14580 }, { "epoch": 0.06085653962664085, "grad_norm": 1.7551723243132216, "learning_rate": 8.108223005516435e-06, "loss": 0.0719, "step": 14585 }, { "epoch": 0.06087740234163113, "grad_norm": 1.253005096972957, "learning_rate": 8.106833445257817e-06, "loss": 0.0606, "step": 14590 }, { "epoch": 0.06089826505662141, "grad_norm": 1.1529650692834266, "learning_rate": 8.10544459916905e-06, "loss": 0.0704, "step": 14595 }, { "epoch": 0.06091912777161169, "grad_norm": 1.9604242356206618, "learning_rate": 8.10405646663859e-06, "loss": 0.0691, "step": 14600 }, { "epoch": 0.060939990486601966, "grad_norm": 0.9780087869035429, "learning_rate": 8.102669047055631e-06, "loss": 0.056, "step": 14605 }, { "epoch": 0.060960853201592245, "grad_norm": 1.1063488028202348, "learning_rate": 8.101282339810095e-06, "loss": 0.0678, "step": 14610 }, { "epoch": 0.06098171591658252, "grad_norm": 1.0278422194767516, "learning_rate": 8.099896344292638e-06, "loss": 0.0561, "step": 14615 }, { "epoch": 0.061002578631572796, "grad_norm": 1.4290899965499657, "learning_rate": 8.098511059894639e-06, "loss": 0.0586, "step": 14620 }, { "epoch": 0.061023441346563075, "grad_norm": 1.1459014280045512, "learning_rate": 8.097126486008215e-06, "loss": 0.0763, "step": 14625 }, { "epoch": 0.06104430406155335, "grad_norm": 1.3757541064192833, "learning_rate": 8.095742622026206e-06, "loss": 0.0582, "step": 14630 }, { "epoch": 0.06106516677654363, "grad_norm": 1.613957163387538, "learning_rate": 8.094359467342175e-06, "loss": 0.0611, "step": 14635 }, { "epoch": 0.06108602949153391, "grad_norm": 1.3615289537492667, "learning_rate": 8.09297702135041e-06, "loss": 0.0602, "step": 14640 }, { "epoch": 0.06110689220652419, "grad_norm": 1.3220977166862156, "learning_rate": 8.09159528344593e-06, "loss": 0.0716, "step": 14645 }, { "epoch": 0.06112775492151447, "grad_norm": 1.203075518216315, "learning_rate": 8.09021425302447e-06, "loss": 0.0484, "step": 14650 }, { "epoch": 0.06114861763650475, "grad_norm": 0.9574594331580124, "learning_rate": 8.088833929482489e-06, "loss": 0.0538, "step": 14655 }, { "epoch": 0.06116948035149502, "grad_norm": 1.3571566235278691, "learning_rate": 8.087454312217165e-06, "loss": 0.0566, "step": 14660 }, { "epoch": 0.0611903430664853, "grad_norm": 1.125829780418519, "learning_rate": 8.0860754006264e-06, "loss": 0.0769, "step": 14665 }, { "epoch": 0.061211205781475576, "grad_norm": 1.4112210594633205, "learning_rate": 8.084697194108809e-06, "loss": 0.0623, "step": 14670 }, { "epoch": 0.061232068496465855, "grad_norm": 1.3804680053671878, "learning_rate": 8.083319692063727e-06, "loss": 0.068, "step": 14675 }, { "epoch": 0.061252931211456134, "grad_norm": 3.216102339609913, "learning_rate": 8.081942893891204e-06, "loss": 0.061, "step": 14680 }, { "epoch": 0.06127379392644641, "grad_norm": 1.6063452502303235, "learning_rate": 8.080566798992007e-06, "loss": 0.0768, "step": 14685 }, { "epoch": 0.06129465664143669, "grad_norm": 2.5395178032590255, "learning_rate": 8.079191406767618e-06, "loss": 0.0833, "step": 14690 }, { "epoch": 0.06131551935642697, "grad_norm": 1.9146362885088488, "learning_rate": 8.077816716620224e-06, "loss": 0.0596, "step": 14695 }, { "epoch": 0.06133638207141725, "grad_norm": 1.3226148208511745, "learning_rate": 8.07644272795273e-06, "loss": 0.0594, "step": 14700 }, { "epoch": 0.06135724478640752, "grad_norm": 1.5123168862446421, "learning_rate": 8.075069440168755e-06, "loss": 0.073, "step": 14705 }, { "epoch": 0.0613781075013978, "grad_norm": 1.5624044213585428, "learning_rate": 8.073696852672622e-06, "loss": 0.0599, "step": 14710 }, { "epoch": 0.06139897021638808, "grad_norm": 1.1741972001751664, "learning_rate": 8.072324964869365e-06, "loss": 0.058, "step": 14715 }, { "epoch": 0.06141983293137836, "grad_norm": 0.8524248177934562, "learning_rate": 8.07095377616472e-06, "loss": 0.0913, "step": 14720 }, { "epoch": 0.061440695646368636, "grad_norm": 1.124663301462431, "learning_rate": 8.069583285965142e-06, "loss": 0.0929, "step": 14725 }, { "epoch": 0.061461558361358914, "grad_norm": 0.7496238909123033, "learning_rate": 8.068213493677775e-06, "loss": 0.0518, "step": 14730 }, { "epoch": 0.06148242107634919, "grad_norm": 0.5450815117518568, "learning_rate": 8.066844398710481e-06, "loss": 0.0606, "step": 14735 }, { "epoch": 0.06150328379133947, "grad_norm": 1.133080690880706, "learning_rate": 8.065476000471818e-06, "loss": 0.0603, "step": 14740 }, { "epoch": 0.06152414650632975, "grad_norm": 1.8753189572984996, "learning_rate": 8.064108298371047e-06, "loss": 0.0725, "step": 14745 }, { "epoch": 0.06154500922132002, "grad_norm": 0.9706693374759073, "learning_rate": 8.06274129181813e-06, "loss": 0.0521, "step": 14750 }, { "epoch": 0.0615658719363103, "grad_norm": 1.2828801886077221, "learning_rate": 8.061374980223731e-06, "loss": 0.0486, "step": 14755 }, { "epoch": 0.06158673465130058, "grad_norm": 1.2145226603206092, "learning_rate": 8.06000936299921e-06, "loss": 0.0537, "step": 14760 }, { "epoch": 0.06160759736629086, "grad_norm": 0.9566229691442043, "learning_rate": 8.05864443955663e-06, "loss": 0.0631, "step": 14765 }, { "epoch": 0.06162846008128114, "grad_norm": 0.9518409135221345, "learning_rate": 8.057280209308741e-06, "loss": 0.0717, "step": 14770 }, { "epoch": 0.061649322796271416, "grad_norm": 1.3742818726940746, "learning_rate": 8.055916671668998e-06, "loss": 0.0709, "step": 14775 }, { "epoch": 0.061670185511261695, "grad_norm": 0.947263221137646, "learning_rate": 8.054553826051547e-06, "loss": 0.061, "step": 14780 }, { "epoch": 0.061691048226251974, "grad_norm": 1.4630017226837229, "learning_rate": 8.05319167187123e-06, "loss": 0.056, "step": 14785 }, { "epoch": 0.06171191094124225, "grad_norm": 1.4152196481765087, "learning_rate": 8.051830208543572e-06, "loss": 0.0634, "step": 14790 }, { "epoch": 0.061732773656232524, "grad_norm": 0.9961017723182788, "learning_rate": 8.050469435484804e-06, "loss": 0.055, "step": 14795 }, { "epoch": 0.0617536363712228, "grad_norm": 0.9664396594886601, "learning_rate": 8.049109352111835e-06, "loss": 0.0541, "step": 14800 }, { "epoch": 0.06177449908621308, "grad_norm": 1.237090374231603, "learning_rate": 8.047749957842272e-06, "loss": 0.0592, "step": 14805 }, { "epoch": 0.06179536180120336, "grad_norm": 2.192797195968433, "learning_rate": 8.046391252094405e-06, "loss": 0.0553, "step": 14810 }, { "epoch": 0.06181622451619364, "grad_norm": 1.5213410317146123, "learning_rate": 8.045033234287211e-06, "loss": 0.0689, "step": 14815 }, { "epoch": 0.06183708723118392, "grad_norm": 1.8834511917286019, "learning_rate": 8.04367590384036e-06, "loss": 0.0655, "step": 14820 }, { "epoch": 0.0618579499461742, "grad_norm": 1.7533064509357166, "learning_rate": 8.042319260174196e-06, "loss": 0.0701, "step": 14825 }, { "epoch": 0.061878812661164476, "grad_norm": 1.6140706102101832, "learning_rate": 8.040963302709758e-06, "loss": 0.0634, "step": 14830 }, { "epoch": 0.061899675376154754, "grad_norm": 1.3820677807173465, "learning_rate": 8.039608030868759e-06, "loss": 0.0706, "step": 14835 }, { "epoch": 0.061920538091145026, "grad_norm": 1.942302049194303, "learning_rate": 8.038253444073604e-06, "loss": 0.0574, "step": 14840 }, { "epoch": 0.061941400806135305, "grad_norm": 0.9970547467102909, "learning_rate": 8.03689954174737e-06, "loss": 0.0623, "step": 14845 }, { "epoch": 0.061962263521125584, "grad_norm": 1.1930849811427944, "learning_rate": 8.035546323313815e-06, "loss": 0.0646, "step": 14850 }, { "epoch": 0.06198312623611586, "grad_norm": 1.4646023322873993, "learning_rate": 8.034193788197382e-06, "loss": 0.067, "step": 14855 }, { "epoch": 0.06200398895110614, "grad_norm": 0.8430843542606702, "learning_rate": 8.03284193582319e-06, "loss": 0.0644, "step": 14860 }, { "epoch": 0.06202485166609642, "grad_norm": 1.4723033361288913, "learning_rate": 8.03149076561703e-06, "loss": 0.0612, "step": 14865 }, { "epoch": 0.0620457143810867, "grad_norm": 1.7769190999281588, "learning_rate": 8.030140277005372e-06, "loss": 0.0569, "step": 14870 }, { "epoch": 0.06206657709607698, "grad_norm": 1.6281910889809754, "learning_rate": 8.02879046941536e-06, "loss": 0.0579, "step": 14875 }, { "epoch": 0.062087439811067256, "grad_norm": 1.3249718779254778, "learning_rate": 8.027441342274814e-06, "loss": 0.0565, "step": 14880 }, { "epoch": 0.06210830252605753, "grad_norm": 6.142218560846352, "learning_rate": 8.026092895012228e-06, "loss": 0.06, "step": 14885 }, { "epoch": 0.06212916524104781, "grad_norm": 1.730682812724934, "learning_rate": 8.024745127056763e-06, "loss": 0.0685, "step": 14890 }, { "epoch": 0.062150027956038086, "grad_norm": 0.9745302775606531, "learning_rate": 8.02339803783825e-06, "loss": 0.0456, "step": 14895 }, { "epoch": 0.062170890671028364, "grad_norm": 2.0816198904968073, "learning_rate": 8.022051626787198e-06, "loss": 0.0611, "step": 14900 }, { "epoch": 0.06219175338601864, "grad_norm": 5.356879478879465, "learning_rate": 8.020705893334776e-06, "loss": 0.0504, "step": 14905 }, { "epoch": 0.06221261610100892, "grad_norm": 1.6352893174084395, "learning_rate": 8.019360836912825e-06, "loss": 0.0899, "step": 14910 }, { "epoch": 0.0622334788159992, "grad_norm": 0.9628662587363591, "learning_rate": 8.018016456953855e-06, "loss": 0.0531, "step": 14915 }, { "epoch": 0.06225434153098948, "grad_norm": 0.9034194070208933, "learning_rate": 8.016672752891035e-06, "loss": 0.0731, "step": 14920 }, { "epoch": 0.06227520424597976, "grad_norm": 1.5280377268524181, "learning_rate": 8.015329724158203e-06, "loss": 0.0541, "step": 14925 }, { "epoch": 0.06229606696097003, "grad_norm": 1.6895964024650574, "learning_rate": 8.01398737018986e-06, "loss": 0.0598, "step": 14930 }, { "epoch": 0.06231692967596031, "grad_norm": 1.5953638808186417, "learning_rate": 8.012645690421173e-06, "loss": 0.0576, "step": 14935 }, { "epoch": 0.06233779239095059, "grad_norm": 1.603043832954642, "learning_rate": 8.011304684287965e-06, "loss": 0.0578, "step": 14940 }, { "epoch": 0.062358655105940866, "grad_norm": 1.004596513294741, "learning_rate": 8.009964351226722e-06, "loss": 0.0511, "step": 14945 }, { "epoch": 0.062379517820931145, "grad_norm": 0.95551056327591, "learning_rate": 8.008624690674591e-06, "loss": 0.0496, "step": 14950 }, { "epoch": 0.062400380535921424, "grad_norm": 1.5349796772633253, "learning_rate": 8.007285702069377e-06, "loss": 0.0659, "step": 14955 }, { "epoch": 0.0624212432509117, "grad_norm": 1.444246767202655, "learning_rate": 8.005947384849543e-06, "loss": 0.0622, "step": 14960 }, { "epoch": 0.06244210596590198, "grad_norm": 1.4086373862359434, "learning_rate": 8.004609738454205e-06, "loss": 0.055, "step": 14965 }, { "epoch": 0.06246296868089226, "grad_norm": 1.1033429723826236, "learning_rate": 8.003272762323142e-06, "loss": 0.0664, "step": 14970 }, { "epoch": 0.06248383139588253, "grad_norm": 0.9711826349399432, "learning_rate": 8.001936455896784e-06, "loss": 0.0539, "step": 14975 }, { "epoch": 0.06250469411087281, "grad_norm": 1.2398535837068383, "learning_rate": 8.000600818616211e-06, "loss": 0.0477, "step": 14980 }, { "epoch": 0.0625255568258631, "grad_norm": 1.298150584519416, "learning_rate": 7.999265849923162e-06, "loss": 0.0648, "step": 14985 }, { "epoch": 0.06254641954085337, "grad_norm": 1.3903055461465164, "learning_rate": 7.997931549260026e-06, "loss": 0.0656, "step": 14990 }, { "epoch": 0.06256728225584364, "grad_norm": 1.5250615392202567, "learning_rate": 7.996597916069842e-06, "loss": 0.0636, "step": 14995 }, { "epoch": 0.06258814497083393, "grad_norm": 1.146345567570489, "learning_rate": 7.995264949796297e-06, "loss": 0.0694, "step": 15000 }, { "epoch": 0.0626090076858242, "grad_norm": 0.9179443228395219, "learning_rate": 7.99393264988373e-06, "loss": 0.0734, "step": 15005 }, { "epoch": 0.06262987040081448, "grad_norm": 1.3264258749327877, "learning_rate": 7.992601015777127e-06, "loss": 0.0548, "step": 15010 }, { "epoch": 0.06265073311580475, "grad_norm": 1.1587125190096297, "learning_rate": 7.991270046922122e-06, "loss": 0.0616, "step": 15015 }, { "epoch": 0.06267159583079504, "grad_norm": 1.3509022944691496, "learning_rate": 7.989939742764993e-06, "loss": 0.0528, "step": 15020 }, { "epoch": 0.06269245854578531, "grad_norm": 2.573053928371604, "learning_rate": 7.988610102752665e-06, "loss": 0.0664, "step": 15025 }, { "epoch": 0.0627133212607756, "grad_norm": 1.4632258473768103, "learning_rate": 7.987281126332703e-06, "loss": 0.0745, "step": 15030 }, { "epoch": 0.06273418397576587, "grad_norm": 1.814054420980923, "learning_rate": 7.98595281295332e-06, "loss": 0.0591, "step": 15035 }, { "epoch": 0.06275504669075614, "grad_norm": 1.106814714999339, "learning_rate": 7.984625162063371e-06, "loss": 0.0641, "step": 15040 }, { "epoch": 0.06277590940574643, "grad_norm": 1.1276600314506007, "learning_rate": 7.983298173112347e-06, "loss": 0.0597, "step": 15045 }, { "epoch": 0.0627967721207367, "grad_norm": 1.0395184954401722, "learning_rate": 7.981971845550385e-06, "loss": 0.0553, "step": 15050 }, { "epoch": 0.06281763483572698, "grad_norm": 0.6386364836831819, "learning_rate": 7.98064617882826e-06, "loss": 0.0674, "step": 15055 }, { "epoch": 0.06283849755071726, "grad_norm": 1.5459877691215993, "learning_rate": 7.979321172397379e-06, "loss": 0.0647, "step": 15060 }, { "epoch": 0.06285936026570754, "grad_norm": 1.1151734586702913, "learning_rate": 7.977996825709797e-06, "loss": 0.0651, "step": 15065 }, { "epoch": 0.06288022298069781, "grad_norm": 1.9742152602160008, "learning_rate": 7.976673138218197e-06, "loss": 0.0614, "step": 15070 }, { "epoch": 0.0629010856956881, "grad_norm": 2.7521152134840534, "learning_rate": 7.975350109375902e-06, "loss": 0.0755, "step": 15075 }, { "epoch": 0.06292194841067837, "grad_norm": 1.2519780520022714, "learning_rate": 7.974027738636866e-06, "loss": 0.0699, "step": 15080 }, { "epoch": 0.06294281112566864, "grad_norm": 2.0237744930209143, "learning_rate": 7.972706025455682e-06, "loss": 0.0648, "step": 15085 }, { "epoch": 0.06296367384065893, "grad_norm": 1.3656697710750147, "learning_rate": 7.971384969287568e-06, "loss": 0.0434, "step": 15090 }, { "epoch": 0.0629845365556492, "grad_norm": 0.8736843641708552, "learning_rate": 7.97006456958838e-06, "loss": 0.0567, "step": 15095 }, { "epoch": 0.06300539927063949, "grad_norm": 1.0130288682612847, "learning_rate": 7.9687448258146e-06, "loss": 0.0688, "step": 15100 }, { "epoch": 0.06302626198562976, "grad_norm": 1.6431752575016785, "learning_rate": 7.967425737423346e-06, "loss": 0.065, "step": 15105 }, { "epoch": 0.06304712470062004, "grad_norm": 1.4244706791294166, "learning_rate": 7.966107303872357e-06, "loss": 0.0786, "step": 15110 }, { "epoch": 0.06306798741561032, "grad_norm": 1.0072688974570636, "learning_rate": 7.964789524620009e-06, "loss": 0.0482, "step": 15115 }, { "epoch": 0.0630888501306006, "grad_norm": 1.4839224176043366, "learning_rate": 7.963472399125297e-06, "loss": 0.0753, "step": 15120 }, { "epoch": 0.06310971284559087, "grad_norm": 1.5402208618600801, "learning_rate": 7.962155926847844e-06, "loss": 0.0653, "step": 15125 }, { "epoch": 0.06313057556058115, "grad_norm": 0.8778685968457753, "learning_rate": 7.960840107247901e-06, "loss": 0.0592, "step": 15130 }, { "epoch": 0.06315143827557143, "grad_norm": 1.3801656611504936, "learning_rate": 7.959524939786341e-06, "loss": 0.0599, "step": 15135 }, { "epoch": 0.0631723009905617, "grad_norm": 2.4932791898140403, "learning_rate": 7.95821042392466e-06, "loss": 0.0647, "step": 15140 }, { "epoch": 0.06319316370555199, "grad_norm": 1.7616487241195635, "learning_rate": 7.956896559124979e-06, "loss": 0.0598, "step": 15145 }, { "epoch": 0.06321402642054226, "grad_norm": 1.226965890627051, "learning_rate": 7.955583344850033e-06, "loss": 0.0987, "step": 15150 }, { "epoch": 0.06323488913553255, "grad_norm": 1.1525076797759397, "learning_rate": 7.95427078056319e-06, "loss": 0.0579, "step": 15155 }, { "epoch": 0.06325575185052282, "grad_norm": 1.2001557204022442, "learning_rate": 7.952958865728425e-06, "loss": 0.0518, "step": 15160 }, { "epoch": 0.0632766145655131, "grad_norm": 1.165812504887999, "learning_rate": 7.95164759981034e-06, "loss": 0.0591, "step": 15165 }, { "epoch": 0.06329747728050338, "grad_norm": 1.1851683610922086, "learning_rate": 7.950336982274147e-06, "loss": 0.0482, "step": 15170 }, { "epoch": 0.06331833999549365, "grad_norm": 1.0163517384235798, "learning_rate": 7.949027012585687e-06, "loss": 0.0594, "step": 15175 }, { "epoch": 0.06333920271048393, "grad_norm": 2.0054653793487454, "learning_rate": 7.947717690211406e-06, "loss": 0.06, "step": 15180 }, { "epoch": 0.0633600654254742, "grad_norm": 1.3868434198850839, "learning_rate": 7.946409014618366e-06, "loss": 0.0551, "step": 15185 }, { "epoch": 0.06338092814046449, "grad_norm": 1.3042006642601147, "learning_rate": 7.945100985274246e-06, "loss": 0.0584, "step": 15190 }, { "epoch": 0.06340179085545476, "grad_norm": 1.3697626603847552, "learning_rate": 7.943793601647342e-06, "loss": 0.046, "step": 15195 }, { "epoch": 0.06342265357044505, "grad_norm": 1.2141783476038301, "learning_rate": 7.942486863206554e-06, "loss": 0.061, "step": 15200 }, { "epoch": 0.06344351628543532, "grad_norm": 1.666314807014988, "learning_rate": 7.9411807694214e-06, "loss": 0.0517, "step": 15205 }, { "epoch": 0.0634643790004256, "grad_norm": 1.4024279029630584, "learning_rate": 7.939875319762003e-06, "loss": 0.0762, "step": 15210 }, { "epoch": 0.06348524171541588, "grad_norm": 1.1250136081844917, "learning_rate": 7.938570513699101e-06, "loss": 0.0547, "step": 15215 }, { "epoch": 0.06350610443040615, "grad_norm": 0.8377558046441169, "learning_rate": 7.937266350704034e-06, "loss": 0.0515, "step": 15220 }, { "epoch": 0.06352696714539643, "grad_norm": 1.5991010174608922, "learning_rate": 7.935962830248757e-06, "loss": 0.0589, "step": 15225 }, { "epoch": 0.0635478298603867, "grad_norm": 1.7685962403215523, "learning_rate": 7.934659951805827e-06, "loss": 0.0593, "step": 15230 }, { "epoch": 0.06356869257537699, "grad_norm": 1.6769024507489907, "learning_rate": 7.93335771484841e-06, "loss": 0.0805, "step": 15235 }, { "epoch": 0.06358955529036726, "grad_norm": 1.022680990029911, "learning_rate": 7.932056118850274e-06, "loss": 0.0514, "step": 15240 }, { "epoch": 0.06361041800535755, "grad_norm": 1.1657172983831974, "learning_rate": 7.930755163285794e-06, "loss": 0.0694, "step": 15245 }, { "epoch": 0.06363128072034782, "grad_norm": 1.1699225671368023, "learning_rate": 7.929454847629945e-06, "loss": 0.0603, "step": 15250 }, { "epoch": 0.06365214343533811, "grad_norm": 1.3050943287216288, "learning_rate": 7.928155171358306e-06, "loss": 0.0964, "step": 15255 }, { "epoch": 0.06367300615032838, "grad_norm": 8.316646537187275, "learning_rate": 7.92685613394706e-06, "loss": 0.0523, "step": 15260 }, { "epoch": 0.06369386886531865, "grad_norm": 1.264056907109748, "learning_rate": 7.925557734872988e-06, "loss": 0.0595, "step": 15265 }, { "epoch": 0.06371473158030894, "grad_norm": 1.4170330003390255, "learning_rate": 7.924259973613469e-06, "loss": 0.0649, "step": 15270 }, { "epoch": 0.06373559429529921, "grad_norm": 1.7638588952187715, "learning_rate": 7.922962849646486e-06, "loss": 0.0584, "step": 15275 }, { "epoch": 0.0637564570102895, "grad_norm": 1.844831939234089, "learning_rate": 7.921666362450614e-06, "loss": 0.0558, "step": 15280 }, { "epoch": 0.06377731972527977, "grad_norm": 1.2644369588839741, "learning_rate": 7.92037051150503e-06, "loss": 0.0582, "step": 15285 }, { "epoch": 0.06379818244027005, "grad_norm": 1.7454779720951754, "learning_rate": 7.919075296289506e-06, "loss": 0.0564, "step": 15290 }, { "epoch": 0.06381904515526032, "grad_norm": 1.1874390263808114, "learning_rate": 7.917780716284404e-06, "loss": 0.0495, "step": 15295 }, { "epoch": 0.06383990787025061, "grad_norm": 1.8889915025694142, "learning_rate": 7.91648677097069e-06, "loss": 0.0601, "step": 15300 }, { "epoch": 0.06386077058524088, "grad_norm": 1.3705721581396522, "learning_rate": 7.915193459829914e-06, "loss": 0.0655, "step": 15305 }, { "epoch": 0.06388163330023115, "grad_norm": 2.1259233377824724, "learning_rate": 7.913900782344227e-06, "loss": 0.0677, "step": 15310 }, { "epoch": 0.06390249601522144, "grad_norm": 2.563370474314051, "learning_rate": 7.912608737996367e-06, "loss": 0.083, "step": 15315 }, { "epoch": 0.06392335873021171, "grad_norm": 1.2166689913705313, "learning_rate": 7.911317326269661e-06, "loss": 0.0628, "step": 15320 }, { "epoch": 0.063944221445202, "grad_norm": 1.3551439605104494, "learning_rate": 7.910026546648034e-06, "loss": 0.0695, "step": 15325 }, { "epoch": 0.06396508416019227, "grad_norm": 1.274235055964179, "learning_rate": 7.90873639861599e-06, "loss": 0.0477, "step": 15330 }, { "epoch": 0.06398594687518255, "grad_norm": 1.0241393530350011, "learning_rate": 7.90744688165863e-06, "loss": 0.0543, "step": 15335 }, { "epoch": 0.06400680959017283, "grad_norm": 0.986379864767898, "learning_rate": 7.906157995261637e-06, "loss": 0.0467, "step": 15340 }, { "epoch": 0.06402767230516311, "grad_norm": 1.6432970992750864, "learning_rate": 7.904869738911287e-06, "loss": 0.0564, "step": 15345 }, { "epoch": 0.06404853502015338, "grad_norm": 1.5528633016335103, "learning_rate": 7.903582112094432e-06, "loss": 0.0723, "step": 15350 }, { "epoch": 0.06406939773514365, "grad_norm": 1.1054107594354827, "learning_rate": 7.902295114298518e-06, "loss": 0.0556, "step": 15355 }, { "epoch": 0.06409026045013394, "grad_norm": 1.050215373897982, "learning_rate": 7.901008745011567e-06, "loss": 0.0773, "step": 15360 }, { "epoch": 0.06411112316512421, "grad_norm": 1.804060862859794, "learning_rate": 7.899723003722194e-06, "loss": 0.0617, "step": 15365 }, { "epoch": 0.0641319858801145, "grad_norm": 1.6400089411757715, "learning_rate": 7.89843788991959e-06, "loss": 0.0608, "step": 15370 }, { "epoch": 0.06415284859510477, "grad_norm": 2.169993285642523, "learning_rate": 7.897153403093528e-06, "loss": 0.0667, "step": 15375 }, { "epoch": 0.06417371131009506, "grad_norm": 0.9299072653560372, "learning_rate": 7.89586954273436e-06, "loss": 0.0562, "step": 15380 }, { "epoch": 0.06419457402508533, "grad_norm": 0.9311398202447113, "learning_rate": 7.894586308333019e-06, "loss": 0.0544, "step": 15385 }, { "epoch": 0.06421543674007561, "grad_norm": 1.393661836489861, "learning_rate": 7.893303699381024e-06, "loss": 0.063, "step": 15390 }, { "epoch": 0.06423629945506588, "grad_norm": 1.2452905779678354, "learning_rate": 7.892021715370462e-06, "loss": 0.0537, "step": 15395 }, { "epoch": 0.06425716217005616, "grad_norm": 1.4937185244655395, "learning_rate": 7.890740355794e-06, "loss": 0.076, "step": 15400 }, { "epoch": 0.06427802488504644, "grad_norm": 2.2704265002558968, "learning_rate": 7.889459620144883e-06, "loss": 0.057, "step": 15405 }, { "epoch": 0.06429888760003671, "grad_norm": 1.2013196461250983, "learning_rate": 7.888179507916935e-06, "loss": 0.0583, "step": 15410 }, { "epoch": 0.064319750315027, "grad_norm": 1.2278183683769919, "learning_rate": 7.886900018604546e-06, "loss": 0.0645, "step": 15415 }, { "epoch": 0.06434061303001727, "grad_norm": 1.5574936957266596, "learning_rate": 7.885621151702688e-06, "loss": 0.0549, "step": 15420 }, { "epoch": 0.06436147574500756, "grad_norm": 1.2293874027274674, "learning_rate": 7.884342906706903e-06, "loss": 0.067, "step": 15425 }, { "epoch": 0.06438233845999783, "grad_norm": 1.219612550940808, "learning_rate": 7.8830652831133e-06, "loss": 0.0615, "step": 15430 }, { "epoch": 0.06440320117498811, "grad_norm": 1.2079058752284304, "learning_rate": 7.88178828041857e-06, "loss": 0.0696, "step": 15435 }, { "epoch": 0.06442406388997839, "grad_norm": 1.057362593220452, "learning_rate": 7.88051189811997e-06, "loss": 0.0615, "step": 15440 }, { "epoch": 0.06444492660496866, "grad_norm": 0.8694947447973241, "learning_rate": 7.87923613571532e-06, "loss": 0.0594, "step": 15445 }, { "epoch": 0.06446578931995894, "grad_norm": 1.2191911258856092, "learning_rate": 7.87796099270302e-06, "loss": 0.088, "step": 15450 }, { "epoch": 0.06448665203494922, "grad_norm": 1.0338052600812966, "learning_rate": 7.876686468582028e-06, "loss": 0.0447, "step": 15455 }, { "epoch": 0.0645075147499395, "grad_norm": 1.0316096174563412, "learning_rate": 7.875412562851878e-06, "loss": 0.0491, "step": 15460 }, { "epoch": 0.06452837746492977, "grad_norm": 1.3810018084149907, "learning_rate": 7.874139275012665e-06, "loss": 0.068, "step": 15465 }, { "epoch": 0.06454924017992006, "grad_norm": 1.2470341365940771, "learning_rate": 7.872866604565049e-06, "loss": 0.0517, "step": 15470 }, { "epoch": 0.06457010289491033, "grad_norm": 1.2596022644481601, "learning_rate": 7.871594551010258e-06, "loss": 0.0654, "step": 15475 }, { "epoch": 0.06459096560990062, "grad_norm": 2.004891076170938, "learning_rate": 7.870323113850084e-06, "loss": 0.0614, "step": 15480 }, { "epoch": 0.06461182832489089, "grad_norm": 1.209072196740695, "learning_rate": 7.869052292586879e-06, "loss": 0.062, "step": 15485 }, { "epoch": 0.06463269103988116, "grad_norm": 1.1241511505093351, "learning_rate": 7.867782086723558e-06, "loss": 0.0509, "step": 15490 }, { "epoch": 0.06465355375487145, "grad_norm": 1.6710163047329543, "learning_rate": 7.866512495763603e-06, "loss": 0.0502, "step": 15495 }, { "epoch": 0.06467441646986172, "grad_norm": 1.3571630747142713, "learning_rate": 7.865243519211049e-06, "loss": 0.0816, "step": 15500 }, { "epoch": 0.064695279184852, "grad_norm": 1.3457925578145082, "learning_rate": 7.863975156570492e-06, "loss": 0.0664, "step": 15505 }, { "epoch": 0.06471614189984227, "grad_norm": 1.1371496479711012, "learning_rate": 7.862707407347093e-06, "loss": 0.052, "step": 15510 }, { "epoch": 0.06473700461483256, "grad_norm": 1.088216807217462, "learning_rate": 7.861440271046566e-06, "loss": 0.0727, "step": 15515 }, { "epoch": 0.06475786732982283, "grad_norm": 1.9670033417335684, "learning_rate": 7.860173747175182e-06, "loss": 0.0581, "step": 15520 }, { "epoch": 0.06477873004481312, "grad_norm": 1.1712618948943159, "learning_rate": 7.858907835239773e-06, "loss": 0.0523, "step": 15525 }, { "epoch": 0.06479959275980339, "grad_norm": 1.3509299320557728, "learning_rate": 7.857642534747723e-06, "loss": 0.0553, "step": 15530 }, { "epoch": 0.06482045547479366, "grad_norm": 0.9548823107113215, "learning_rate": 7.856377845206972e-06, "loss": 0.0619, "step": 15535 }, { "epoch": 0.06484131818978395, "grad_norm": 1.3423647603288014, "learning_rate": 7.855113766126015e-06, "loss": 0.0668, "step": 15540 }, { "epoch": 0.06486218090477422, "grad_norm": 1.2453969805645757, "learning_rate": 7.853850297013898e-06, "loss": 0.0602, "step": 15545 }, { "epoch": 0.0648830436197645, "grad_norm": 5.031320643069006, "learning_rate": 7.852587437380224e-06, "loss": 0.0679, "step": 15550 }, { "epoch": 0.06490390633475478, "grad_norm": 1.2902544030075966, "learning_rate": 7.851325186735144e-06, "loss": 0.0631, "step": 15555 }, { "epoch": 0.06492476904974506, "grad_norm": 1.287820903586612, "learning_rate": 7.85006354458936e-06, "loss": 0.0631, "step": 15560 }, { "epoch": 0.06494563176473533, "grad_norm": 1.1193866680113613, "learning_rate": 7.848802510454128e-06, "loss": 0.077, "step": 15565 }, { "epoch": 0.06496649447972562, "grad_norm": 1.132923316399394, "learning_rate": 7.847542083841248e-06, "loss": 0.079, "step": 15570 }, { "epoch": 0.06498735719471589, "grad_norm": 1.6027232471522184, "learning_rate": 7.846282264263076e-06, "loss": 0.0463, "step": 15575 }, { "epoch": 0.06500821990970616, "grad_norm": 2.045580227701862, "learning_rate": 7.845023051232505e-06, "loss": 0.0647, "step": 15580 }, { "epoch": 0.06502908262469645, "grad_norm": 0.9001759981409211, "learning_rate": 7.843764444262983e-06, "loss": 0.0542, "step": 15585 }, { "epoch": 0.06504994533968672, "grad_norm": 1.370487083312202, "learning_rate": 7.842506442868508e-06, "loss": 0.0638, "step": 15590 }, { "epoch": 0.065070808054677, "grad_norm": 1.4612727012324176, "learning_rate": 7.84124904656361e-06, "loss": 0.0595, "step": 15595 }, { "epoch": 0.06509167076966728, "grad_norm": 1.351230963434756, "learning_rate": 7.839992254863375e-06, "loss": 0.0633, "step": 15600 }, { "epoch": 0.06511253348465756, "grad_norm": 0.9871410703863203, "learning_rate": 7.838736067283434e-06, "loss": 0.0602, "step": 15605 }, { "epoch": 0.06513339619964784, "grad_norm": 1.3577849746501953, "learning_rate": 7.837480483339947e-06, "loss": 0.0589, "step": 15610 }, { "epoch": 0.06515425891463812, "grad_norm": 1.4670753480316845, "learning_rate": 7.836225502549635e-06, "loss": 0.0611, "step": 15615 }, { "epoch": 0.0651751216296284, "grad_norm": 1.7528119226726058, "learning_rate": 7.834971124429747e-06, "loss": 0.0607, "step": 15620 }, { "epoch": 0.06519598434461867, "grad_norm": 1.0531476102387844, "learning_rate": 7.833717348498078e-06, "loss": 0.0678, "step": 15625 }, { "epoch": 0.06521684705960895, "grad_norm": 1.0888590963323612, "learning_rate": 7.832464174272963e-06, "loss": 0.0518, "step": 15630 }, { "epoch": 0.06523770977459922, "grad_norm": 1.4551894281737838, "learning_rate": 7.831211601273274e-06, "loss": 0.052, "step": 15635 }, { "epoch": 0.06525857248958951, "grad_norm": 1.1790339155695364, "learning_rate": 7.829959629018423e-06, "loss": 0.0651, "step": 15640 }, { "epoch": 0.06527943520457978, "grad_norm": 1.310691509851878, "learning_rate": 7.828708257028362e-06, "loss": 0.0459, "step": 15645 }, { "epoch": 0.06530029791957007, "grad_norm": 1.535552852921744, "learning_rate": 7.827457484823573e-06, "loss": 0.063, "step": 15650 }, { "epoch": 0.06532116063456034, "grad_norm": 0.7163311111706386, "learning_rate": 7.82620731192508e-06, "loss": 0.0421, "step": 15655 }, { "epoch": 0.06534202334955062, "grad_norm": 1.3223952489918287, "learning_rate": 7.824957737854444e-06, "loss": 0.0779, "step": 15660 }, { "epoch": 0.0653628860645409, "grad_norm": 1.2406836941509949, "learning_rate": 7.823708762133754e-06, "loss": 0.0561, "step": 15665 }, { "epoch": 0.06538374877953117, "grad_norm": 1.3180949808077256, "learning_rate": 7.822460384285637e-06, "loss": 0.0567, "step": 15670 }, { "epoch": 0.06540461149452145, "grad_norm": 1.3259950215888874, "learning_rate": 7.82121260383325e-06, "loss": 0.058, "step": 15675 }, { "epoch": 0.06542547420951172, "grad_norm": 1.2062210612873674, "learning_rate": 7.819965420300288e-06, "loss": 0.0549, "step": 15680 }, { "epoch": 0.06544633692450201, "grad_norm": 0.9948847698747874, "learning_rate": 7.818718833210973e-06, "loss": 0.053, "step": 15685 }, { "epoch": 0.06546719963949228, "grad_norm": 1.278302184055426, "learning_rate": 7.817472842090059e-06, "loss": 0.0659, "step": 15690 }, { "epoch": 0.06548806235448257, "grad_norm": 1.7199518426973417, "learning_rate": 7.816227446462827e-06, "loss": 0.0623, "step": 15695 }, { "epoch": 0.06550892506947284, "grad_norm": 1.2852029983669668, "learning_rate": 7.814982645855092e-06, "loss": 0.0589, "step": 15700 }, { "epoch": 0.06552978778446313, "grad_norm": 0.9394364974609012, "learning_rate": 7.813738439793196e-06, "loss": 0.0462, "step": 15705 }, { "epoch": 0.0655506504994534, "grad_norm": 1.7611510504188699, "learning_rate": 7.812494827804006e-06, "loss": 0.0579, "step": 15710 }, { "epoch": 0.06557151321444367, "grad_norm": 1.2799371773112498, "learning_rate": 7.811251809414918e-06, "loss": 0.066, "step": 15715 }, { "epoch": 0.06559237592943395, "grad_norm": 0.9184089494387988, "learning_rate": 7.810009384153854e-06, "loss": 0.0604, "step": 15720 }, { "epoch": 0.06561323864442423, "grad_norm": 1.6997637776436179, "learning_rate": 7.808767551549268e-06, "loss": 0.0698, "step": 15725 }, { "epoch": 0.06563410135941451, "grad_norm": 1.090528373297535, "learning_rate": 7.807526311130122e-06, "loss": 0.0479, "step": 15730 }, { "epoch": 0.06565496407440478, "grad_norm": 1.5488200289963385, "learning_rate": 7.80628566242592e-06, "loss": 0.0619, "step": 15735 }, { "epoch": 0.06567582678939507, "grad_norm": 1.2161757102287607, "learning_rate": 7.805045604966678e-06, "loss": 0.0686, "step": 15740 }, { "epoch": 0.06569668950438534, "grad_norm": 1.1909178341034679, "learning_rate": 7.803806138282938e-06, "loss": 0.0549, "step": 15745 }, { "epoch": 0.06571755221937563, "grad_norm": 1.044855445630737, "learning_rate": 7.802567261905764e-06, "loss": 0.0598, "step": 15750 }, { "epoch": 0.0657384149343659, "grad_norm": 1.50554121615723, "learning_rate": 7.80132897536674e-06, "loss": 0.0589, "step": 15755 }, { "epoch": 0.06575927764935617, "grad_norm": 1.7404336648236143, "learning_rate": 7.800091278197972e-06, "loss": 0.068, "step": 15760 }, { "epoch": 0.06578014036434646, "grad_norm": 0.9855750185379284, "learning_rate": 7.798854169932084e-06, "loss": 0.0524, "step": 15765 }, { "epoch": 0.06580100307933673, "grad_norm": 1.083150073427197, "learning_rate": 7.797617650102218e-06, "loss": 0.0575, "step": 15770 }, { "epoch": 0.06582186579432701, "grad_norm": 0.7974719508170318, "learning_rate": 7.796381718242035e-06, "loss": 0.055, "step": 15775 }, { "epoch": 0.06584272850931729, "grad_norm": 2.402512843511922, "learning_rate": 7.795146373885713e-06, "loss": 0.0693, "step": 15780 }, { "epoch": 0.06586359122430757, "grad_norm": 1.5177119037650777, "learning_rate": 7.793911616567947e-06, "loss": 0.0556, "step": 15785 }, { "epoch": 0.06588445393929784, "grad_norm": 1.3145329733010784, "learning_rate": 7.792677445823948e-06, "loss": 0.0655, "step": 15790 }, { "epoch": 0.06590531665428813, "grad_norm": 0.8770651027220142, "learning_rate": 7.79144386118944e-06, "loss": 0.0492, "step": 15795 }, { "epoch": 0.0659261793692784, "grad_norm": 1.2113207862747035, "learning_rate": 7.790210862200664e-06, "loss": 0.0676, "step": 15800 }, { "epoch": 0.06594704208426867, "grad_norm": 1.2148198857696397, "learning_rate": 7.788978448394373e-06, "loss": 0.0598, "step": 15805 }, { "epoch": 0.06596790479925896, "grad_norm": 1.1427670714828102, "learning_rate": 7.787746619307834e-06, "loss": 0.0435, "step": 15810 }, { "epoch": 0.06598876751424923, "grad_norm": 1.041892424592497, "learning_rate": 7.786515374478823e-06, "loss": 0.0651, "step": 15815 }, { "epoch": 0.06600963022923952, "grad_norm": 1.4697219318111463, "learning_rate": 7.78528471344563e-06, "loss": 0.0624, "step": 15820 }, { "epoch": 0.06603049294422979, "grad_norm": 1.9032021268320258, "learning_rate": 7.784054635747056e-06, "loss": 0.0653, "step": 15825 }, { "epoch": 0.06605135565922007, "grad_norm": 1.5377881520398216, "learning_rate": 7.782825140922414e-06, "loss": 0.0611, "step": 15830 }, { "epoch": 0.06607221837421035, "grad_norm": 8.228618452929467, "learning_rate": 7.78159622851152e-06, "loss": 0.051, "step": 15835 }, { "epoch": 0.06609308108920063, "grad_norm": 0.7835412351476546, "learning_rate": 7.780367898054701e-06, "loss": 0.0478, "step": 15840 }, { "epoch": 0.0661139438041909, "grad_norm": 1.5855510040587955, "learning_rate": 7.779140149092795e-06, "loss": 0.0622, "step": 15845 }, { "epoch": 0.06613480651918117, "grad_norm": 1.1807196266969193, "learning_rate": 7.777912981167145e-06, "loss": 0.0621, "step": 15850 }, { "epoch": 0.06615566923417146, "grad_norm": 1.3613346870664413, "learning_rate": 7.776686393819596e-06, "loss": 0.0598, "step": 15855 }, { "epoch": 0.06617653194916173, "grad_norm": 1.0875314345107794, "learning_rate": 7.77546038659251e-06, "loss": 0.058, "step": 15860 }, { "epoch": 0.06619739466415202, "grad_norm": 1.1591566703062612, "learning_rate": 7.774234959028738e-06, "loss": 0.0575, "step": 15865 }, { "epoch": 0.06621825737914229, "grad_norm": 1.3104701752162302, "learning_rate": 7.77301011067165e-06, "loss": 0.0665, "step": 15870 }, { "epoch": 0.06623912009413258, "grad_norm": 1.411590814891101, "learning_rate": 7.77178584106511e-06, "loss": 0.0679, "step": 15875 }, { "epoch": 0.06625998280912285, "grad_norm": 1.2203670736840775, "learning_rate": 7.77056214975349e-06, "loss": 0.0558, "step": 15880 }, { "epoch": 0.06628084552411313, "grad_norm": 1.4694745320216038, "learning_rate": 7.769339036281659e-06, "loss": 0.0523, "step": 15885 }, { "epoch": 0.0663017082391034, "grad_norm": 1.196178948779726, "learning_rate": 7.768116500194992e-06, "loss": 0.0641, "step": 15890 }, { "epoch": 0.06632257095409368, "grad_norm": 1.0855269882156728, "learning_rate": 7.766894541039361e-06, "loss": 0.0564, "step": 15895 }, { "epoch": 0.06634343366908396, "grad_norm": 0.9815024148949604, "learning_rate": 7.765673158361143e-06, "loss": 0.0551, "step": 15900 }, { "epoch": 0.06636429638407423, "grad_norm": 1.350078927493442, "learning_rate": 7.764452351707208e-06, "loss": 0.0666, "step": 15905 }, { "epoch": 0.06638515909906452, "grad_norm": 1.1224339528765088, "learning_rate": 7.763232120624928e-06, "loss": 0.0578, "step": 15910 }, { "epoch": 0.06640602181405479, "grad_norm": 1.119286950086383, "learning_rate": 7.762012464662174e-06, "loss": 0.0616, "step": 15915 }, { "epoch": 0.06642688452904508, "grad_norm": 1.2188649113251788, "learning_rate": 7.76079338336731e-06, "loss": 0.0642, "step": 15920 }, { "epoch": 0.06644774724403535, "grad_norm": 1.1437435126510902, "learning_rate": 7.759574876289198e-06, "loss": 0.0576, "step": 15925 }, { "epoch": 0.06646860995902563, "grad_norm": 1.256946561487109, "learning_rate": 7.758356942977197e-06, "loss": 0.0474, "step": 15930 }, { "epoch": 0.0664894726740159, "grad_norm": 1.7874149382691327, "learning_rate": 7.757139582981162e-06, "loss": 0.0629, "step": 15935 }, { "epoch": 0.06651033538900618, "grad_norm": 1.3570832191029885, "learning_rate": 7.755922795851439e-06, "loss": 0.0622, "step": 15940 }, { "epoch": 0.06653119810399646, "grad_norm": 1.4656420292465415, "learning_rate": 7.754706581138868e-06, "loss": 0.0525, "step": 15945 }, { "epoch": 0.06655206081898674, "grad_norm": 1.0016634429874576, "learning_rate": 7.753490938394784e-06, "loss": 0.0703, "step": 15950 }, { "epoch": 0.06657292353397702, "grad_norm": 1.714028965220676, "learning_rate": 7.752275867171014e-06, "loss": 0.0542, "step": 15955 }, { "epoch": 0.0665937862489673, "grad_norm": 0.8546688561203939, "learning_rate": 7.751061367019873e-06, "loss": 0.0569, "step": 15960 }, { "epoch": 0.06661464896395758, "grad_norm": 1.1156298528102908, "learning_rate": 7.749847437494172e-06, "loss": 0.0531, "step": 15965 }, { "epoch": 0.06663551167894785, "grad_norm": 1.6585618148070238, "learning_rate": 7.748634078147208e-06, "loss": 0.0723, "step": 15970 }, { "epoch": 0.06665637439393814, "grad_norm": 1.5336371136493983, "learning_rate": 7.747421288532772e-06, "loss": 0.0646, "step": 15975 }, { "epoch": 0.06667723710892841, "grad_norm": 1.6162457959017344, "learning_rate": 7.746209068205134e-06, "loss": 0.0479, "step": 15980 }, { "epoch": 0.06669809982391868, "grad_norm": 1.1393601797679545, "learning_rate": 7.744997416719066e-06, "loss": 0.058, "step": 15985 }, { "epoch": 0.06671896253890897, "grad_norm": 0.7209642911692388, "learning_rate": 7.743786333629815e-06, "loss": 0.0616, "step": 15990 }, { "epoch": 0.06673982525389924, "grad_norm": 1.045728864605604, "learning_rate": 7.742575818493122e-06, "loss": 0.072, "step": 15995 }, { "epoch": 0.06676068796888952, "grad_norm": 1.403298895282476, "learning_rate": 7.741365870865211e-06, "loss": 0.0677, "step": 16000 }, { "epoch": 0.0667815506838798, "grad_norm": 1.72350422587762, "learning_rate": 7.740156490302791e-06, "loss": 0.0639, "step": 16005 }, { "epoch": 0.06680241339887008, "grad_norm": 3.1979933020795426, "learning_rate": 7.738947676363057e-06, "loss": 0.0598, "step": 16010 }, { "epoch": 0.06682327611386035, "grad_norm": 1.2729832395534355, "learning_rate": 7.737739428603691e-06, "loss": 0.0547, "step": 16015 }, { "epoch": 0.06684413882885062, "grad_norm": 1.3251024361213581, "learning_rate": 7.736531746582849e-06, "loss": 0.0502, "step": 16020 }, { "epoch": 0.06686500154384091, "grad_norm": 1.1614596460044417, "learning_rate": 7.735324629859179e-06, "loss": 0.0634, "step": 16025 }, { "epoch": 0.06688586425883118, "grad_norm": 1.0299205795517592, "learning_rate": 7.734118077991807e-06, "loss": 0.0616, "step": 16030 }, { "epoch": 0.06690672697382147, "grad_norm": 0.8569301644827467, "learning_rate": 7.732912090540339e-06, "loss": 0.0625, "step": 16035 }, { "epoch": 0.06692758968881174, "grad_norm": 1.1174051871596904, "learning_rate": 7.731706667064864e-06, "loss": 0.0639, "step": 16040 }, { "epoch": 0.06694845240380203, "grad_norm": 0.9569205742462709, "learning_rate": 7.73050180712595e-06, "loss": 0.0434, "step": 16045 }, { "epoch": 0.0669693151187923, "grad_norm": 2.0879079181362177, "learning_rate": 7.729297510284644e-06, "loss": 0.0551, "step": 16050 }, { "epoch": 0.06699017783378258, "grad_norm": 1.9002641899790522, "learning_rate": 7.728093776102468e-06, "loss": 0.0568, "step": 16055 }, { "epoch": 0.06701104054877285, "grad_norm": 1.529461952138781, "learning_rate": 7.726890604141433e-06, "loss": 0.0734, "step": 16060 }, { "epoch": 0.06703190326376313, "grad_norm": 1.3841007777458663, "learning_rate": 7.725687993964013e-06, "loss": 0.0655, "step": 16065 }, { "epoch": 0.06705276597875341, "grad_norm": 1.6075625971765584, "learning_rate": 7.724485945133168e-06, "loss": 0.0486, "step": 16070 }, { "epoch": 0.06707362869374368, "grad_norm": 1.6603031065029341, "learning_rate": 7.72328445721233e-06, "loss": 0.0649, "step": 16075 }, { "epoch": 0.06709449140873397, "grad_norm": 0.8397644850648895, "learning_rate": 7.722083529765407e-06, "loss": 0.0525, "step": 16080 }, { "epoch": 0.06711535412372424, "grad_norm": 1.5095391943168048, "learning_rate": 7.720883162356784e-06, "loss": 0.0504, "step": 16085 }, { "epoch": 0.06713621683871453, "grad_norm": 1.0021082184556709, "learning_rate": 7.719683354551315e-06, "loss": 0.0552, "step": 16090 }, { "epoch": 0.0671570795537048, "grad_norm": 1.6935229162103929, "learning_rate": 7.718484105914332e-06, "loss": 0.0534, "step": 16095 }, { "epoch": 0.06717794226869508, "grad_norm": 1.9104629105653121, "learning_rate": 7.717285416011633e-06, "loss": 0.0618, "step": 16100 }, { "epoch": 0.06719880498368536, "grad_norm": 1.5065331506191169, "learning_rate": 7.716087284409496e-06, "loss": 0.0697, "step": 16105 }, { "epoch": 0.06721966769867563, "grad_norm": 0.8182022952544796, "learning_rate": 7.714889710674669e-06, "loss": 0.0602, "step": 16110 }, { "epoch": 0.06724053041366591, "grad_norm": 1.3660998746042048, "learning_rate": 7.713692694374361e-06, "loss": 0.0645, "step": 16115 }, { "epoch": 0.06726139312865619, "grad_norm": 1.3844843662083437, "learning_rate": 7.712496235076263e-06, "loss": 0.0426, "step": 16120 }, { "epoch": 0.06728225584364647, "grad_norm": 1.4296415736849357, "learning_rate": 7.711300332348527e-06, "loss": 0.0639, "step": 16125 }, { "epoch": 0.06730311855863674, "grad_norm": 1.225311802336934, "learning_rate": 7.710104985759778e-06, "loss": 0.0566, "step": 16130 }, { "epoch": 0.06732398127362703, "grad_norm": 1.5523681223322694, "learning_rate": 7.708910194879109e-06, "loss": 0.0555, "step": 16135 }, { "epoch": 0.0673448439886173, "grad_norm": 1.8873926050837322, "learning_rate": 7.707715959276077e-06, "loss": 0.0616, "step": 16140 }, { "epoch": 0.06736570670360759, "grad_norm": 1.3090077527785848, "learning_rate": 7.706522278520706e-06, "loss": 0.0606, "step": 16145 }, { "epoch": 0.06738656941859786, "grad_norm": 1.1438826459432898, "learning_rate": 7.705329152183492e-06, "loss": 0.0573, "step": 16150 }, { "epoch": 0.06740743213358813, "grad_norm": 1.9838185512755588, "learning_rate": 7.704136579835389e-06, "loss": 0.0551, "step": 16155 }, { "epoch": 0.06742829484857842, "grad_norm": 0.8855271238937943, "learning_rate": 7.702944561047818e-06, "loss": 0.0481, "step": 16160 }, { "epoch": 0.06744915756356869, "grad_norm": 1.4868774789308354, "learning_rate": 7.701753095392665e-06, "loss": 0.0484, "step": 16165 }, { "epoch": 0.06747002027855897, "grad_norm": 1.1034078352047128, "learning_rate": 7.70056218244228e-06, "loss": 0.0547, "step": 16170 }, { "epoch": 0.06749088299354924, "grad_norm": 1.371018042489689, "learning_rate": 7.699371821769475e-06, "loss": 0.0557, "step": 16175 }, { "epoch": 0.06751174570853953, "grad_norm": 1.1823904829095095, "learning_rate": 7.698182012947523e-06, "loss": 0.0709, "step": 16180 }, { "epoch": 0.0675326084235298, "grad_norm": 1.2552990695381159, "learning_rate": 7.696992755550157e-06, "loss": 0.0617, "step": 16185 }, { "epoch": 0.06755347113852009, "grad_norm": 1.0275298066702465, "learning_rate": 7.695804049151578e-06, "loss": 0.0522, "step": 16190 }, { "epoch": 0.06757433385351036, "grad_norm": 1.2973359960568378, "learning_rate": 7.694615893326437e-06, "loss": 0.0643, "step": 16195 }, { "epoch": 0.06759519656850063, "grad_norm": 1.2172976430668367, "learning_rate": 7.693428287649853e-06, "loss": 0.075, "step": 16200 }, { "epoch": 0.06761605928349092, "grad_norm": 1.0551086706231323, "learning_rate": 7.6922412316974e-06, "loss": 0.0589, "step": 16205 }, { "epoch": 0.06763692199848119, "grad_norm": 1.1176676981031266, "learning_rate": 7.691054725045111e-06, "loss": 0.0791, "step": 16210 }, { "epoch": 0.06765778471347147, "grad_norm": 1.4546699296519305, "learning_rate": 7.689868767269476e-06, "loss": 0.0522, "step": 16215 }, { "epoch": 0.06767864742846175, "grad_norm": 2.160075719516279, "learning_rate": 7.688683357947445e-06, "loss": 0.0564, "step": 16220 }, { "epoch": 0.06769951014345203, "grad_norm": 1.0675330195464154, "learning_rate": 7.687498496656417e-06, "loss": 0.0554, "step": 16225 }, { "epoch": 0.0677203728584423, "grad_norm": 3.1266677756322556, "learning_rate": 7.686314182974258e-06, "loss": 0.0491, "step": 16230 }, { "epoch": 0.06774123557343259, "grad_norm": 1.1687641195822325, "learning_rate": 7.685130416479277e-06, "loss": 0.0729, "step": 16235 }, { "epoch": 0.06776209828842286, "grad_norm": 0.9820859215084893, "learning_rate": 7.683947196750246e-06, "loss": 0.0694, "step": 16240 }, { "epoch": 0.06778296100341313, "grad_norm": 0.9743237612744313, "learning_rate": 7.682764523366388e-06, "loss": 0.0533, "step": 16245 }, { "epoch": 0.06780382371840342, "grad_norm": 1.1018325978381656, "learning_rate": 7.68158239590738e-06, "loss": 0.0666, "step": 16250 }, { "epoch": 0.06782468643339369, "grad_norm": 1.2975728679182594, "learning_rate": 7.68040081395335e-06, "loss": 0.0675, "step": 16255 }, { "epoch": 0.06784554914838398, "grad_norm": 1.33003468737186, "learning_rate": 7.679219777084878e-06, "loss": 0.0451, "step": 16260 }, { "epoch": 0.06786641186337425, "grad_norm": 2.208641282377405, "learning_rate": 7.678039284882996e-06, "loss": 0.0527, "step": 16265 }, { "epoch": 0.06788727457836453, "grad_norm": 1.4839370897545132, "learning_rate": 7.676859336929188e-06, "loss": 0.0563, "step": 16270 }, { "epoch": 0.0679081372933548, "grad_norm": 0.889939922693288, "learning_rate": 7.675679932805388e-06, "loss": 0.0503, "step": 16275 }, { "epoch": 0.06792900000834509, "grad_norm": 1.5778542916686777, "learning_rate": 7.674501072093978e-06, "loss": 0.0647, "step": 16280 }, { "epoch": 0.06794986272333536, "grad_norm": 1.38957587064698, "learning_rate": 7.673322754377785e-06, "loss": 0.0563, "step": 16285 }, { "epoch": 0.06797072543832564, "grad_norm": 1.4591365313658216, "learning_rate": 7.672144979240095e-06, "loss": 0.0728, "step": 16290 }, { "epoch": 0.06799158815331592, "grad_norm": 2.7934576478512407, "learning_rate": 7.670967746264629e-06, "loss": 0.0736, "step": 16295 }, { "epoch": 0.06801245086830619, "grad_norm": 1.3389002790526225, "learning_rate": 7.669791055035564e-06, "loss": 0.0593, "step": 16300 }, { "epoch": 0.06803331358329648, "grad_norm": 1.3355035340670407, "learning_rate": 7.668614905137522e-06, "loss": 0.0736, "step": 16305 }, { "epoch": 0.06805417629828675, "grad_norm": 1.0566365497869301, "learning_rate": 7.667439296155565e-06, "loss": 0.0498, "step": 16310 }, { "epoch": 0.06807503901327704, "grad_norm": 1.1517489515812507, "learning_rate": 7.666264227675207e-06, "loss": 0.0574, "step": 16315 }, { "epoch": 0.06809590172826731, "grad_norm": 1.1288778090591203, "learning_rate": 7.665089699282406e-06, "loss": 0.0558, "step": 16320 }, { "epoch": 0.0681167644432576, "grad_norm": 1.3474449581602297, "learning_rate": 7.663915710563556e-06, "loss": 0.0632, "step": 16325 }, { "epoch": 0.06813762715824787, "grad_norm": 0.7010925829025865, "learning_rate": 7.662742261105504e-06, "loss": 0.0484, "step": 16330 }, { "epoch": 0.06815848987323814, "grad_norm": 1.493973795661105, "learning_rate": 7.661569350495535e-06, "loss": 0.0482, "step": 16335 }, { "epoch": 0.06817935258822842, "grad_norm": 1.3001159529527677, "learning_rate": 7.660396978321375e-06, "loss": 0.0552, "step": 16340 }, { "epoch": 0.0682002153032187, "grad_norm": 1.4641754349921057, "learning_rate": 7.659225144171196e-06, "loss": 0.0531, "step": 16345 }, { "epoch": 0.06822107801820898, "grad_norm": 2.0591665945502156, "learning_rate": 7.658053847633606e-06, "loss": 0.0549, "step": 16350 }, { "epoch": 0.06824194073319925, "grad_norm": 1.4192450511806087, "learning_rate": 7.656883088297655e-06, "loss": 0.077, "step": 16355 }, { "epoch": 0.06826280344818954, "grad_norm": 1.052051377583659, "learning_rate": 7.655712865752836e-06, "loss": 0.048, "step": 16360 }, { "epoch": 0.06828366616317981, "grad_norm": 1.8617336307912662, "learning_rate": 7.654543179589073e-06, "loss": 0.0673, "step": 16365 }, { "epoch": 0.0683045288781701, "grad_norm": 0.7390841125947266, "learning_rate": 7.653374029396738e-06, "loss": 0.0608, "step": 16370 }, { "epoch": 0.06832539159316037, "grad_norm": 2.1604500496303145, "learning_rate": 7.652205414766635e-06, "loss": 0.0613, "step": 16375 }, { "epoch": 0.06834625430815064, "grad_norm": 1.4891926106153721, "learning_rate": 7.651037335290008e-06, "loss": 0.0681, "step": 16380 }, { "epoch": 0.06836711702314092, "grad_norm": 1.727226224516343, "learning_rate": 7.649869790558531e-06, "loss": 0.0501, "step": 16385 }, { "epoch": 0.0683879797381312, "grad_norm": 1.5285892941700463, "learning_rate": 7.648702780164323e-06, "loss": 0.0507, "step": 16390 }, { "epoch": 0.06840884245312148, "grad_norm": 1.29338845659892, "learning_rate": 7.647536303699936e-06, "loss": 0.0575, "step": 16395 }, { "epoch": 0.06842970516811175, "grad_norm": 0.971261864663667, "learning_rate": 7.646370360758349e-06, "loss": 0.0477, "step": 16400 }, { "epoch": 0.06845056788310204, "grad_norm": 1.2776540124334181, "learning_rate": 7.64520495093299e-06, "loss": 0.0611, "step": 16405 }, { "epoch": 0.06847143059809231, "grad_norm": 0.8696842526741277, "learning_rate": 7.644040073817708e-06, "loss": 0.0497, "step": 16410 }, { "epoch": 0.0684922933130826, "grad_norm": 3.2563668498176686, "learning_rate": 7.64287572900679e-06, "loss": 0.0811, "step": 16415 }, { "epoch": 0.06851315602807287, "grad_norm": 0.8582993507211775, "learning_rate": 7.641711916094955e-06, "loss": 0.0532, "step": 16420 }, { "epoch": 0.06853401874306314, "grad_norm": 1.4547229455652684, "learning_rate": 7.640548634677353e-06, "loss": 0.0626, "step": 16425 }, { "epoch": 0.06855488145805343, "grad_norm": 1.6737683023107561, "learning_rate": 7.639385884349566e-06, "loss": 0.0472, "step": 16430 }, { "epoch": 0.0685757441730437, "grad_norm": 1.0449224806946469, "learning_rate": 7.638223664707608e-06, "loss": 0.0539, "step": 16435 }, { "epoch": 0.06859660688803398, "grad_norm": 1.527905882216333, "learning_rate": 7.63706197534792e-06, "loss": 0.0675, "step": 16440 }, { "epoch": 0.06861746960302426, "grad_norm": 2.7424647767520614, "learning_rate": 7.635900815867377e-06, "loss": 0.0735, "step": 16445 }, { "epoch": 0.06863833231801454, "grad_norm": 2.0686680835759104, "learning_rate": 7.634740185863277e-06, "loss": 0.0599, "step": 16450 }, { "epoch": 0.06865919503300481, "grad_norm": 0.8654860873088043, "learning_rate": 7.63358008493335e-06, "loss": 0.071, "step": 16455 }, { "epoch": 0.0686800577479951, "grad_norm": 1.5149960754311649, "learning_rate": 7.632420512675754e-06, "loss": 0.0546, "step": 16460 }, { "epoch": 0.06870092046298537, "grad_norm": 0.9633855462401618, "learning_rate": 7.631261468689077e-06, "loss": 0.051, "step": 16465 }, { "epoch": 0.06872178317797564, "grad_norm": 1.1131592427344918, "learning_rate": 7.630102952572323e-06, "loss": 0.0594, "step": 16470 }, { "epoch": 0.06874264589296593, "grad_norm": 0.8585768650381053, "learning_rate": 7.628944963924935e-06, "loss": 0.0376, "step": 16475 }, { "epoch": 0.0687635086079562, "grad_norm": 1.1541228560847454, "learning_rate": 7.627787502346771e-06, "loss": 0.0543, "step": 16480 }, { "epoch": 0.06878437132294649, "grad_norm": 1.5507796919331722, "learning_rate": 7.626630567438121e-06, "loss": 0.0489, "step": 16485 }, { "epoch": 0.06880523403793676, "grad_norm": 0.9790948215808731, "learning_rate": 7.625474158799696e-06, "loss": 0.0483, "step": 16490 }, { "epoch": 0.06882609675292704, "grad_norm": 1.276159341592249, "learning_rate": 7.624318276032631e-06, "loss": 0.0597, "step": 16495 }, { "epoch": 0.06884695946791732, "grad_norm": 1.2155532613317384, "learning_rate": 7.623162918738484e-06, "loss": 0.0503, "step": 16500 }, { "epoch": 0.0688678221829076, "grad_norm": 1.302139725874109, "learning_rate": 7.622008086519237e-06, "loss": 0.0589, "step": 16505 }, { "epoch": 0.06888868489789787, "grad_norm": 1.318522620853287, "learning_rate": 7.6208537789772905e-06, "loss": 0.0643, "step": 16510 }, { "epoch": 0.06890954761288814, "grad_norm": 1.2668631370372052, "learning_rate": 7.619699995715469e-06, "loss": 0.0539, "step": 16515 }, { "epoch": 0.06893041032787843, "grad_norm": 1.4756050578910203, "learning_rate": 7.618546736337019e-06, "loss": 0.0427, "step": 16520 }, { "epoch": 0.0689512730428687, "grad_norm": 1.0743415701623609, "learning_rate": 7.617394000445606e-06, "loss": 0.0528, "step": 16525 }, { "epoch": 0.06897213575785899, "grad_norm": 1.7446783213296775, "learning_rate": 7.616241787645311e-06, "loss": 0.0644, "step": 16530 }, { "epoch": 0.06899299847284926, "grad_norm": 1.056546978739511, "learning_rate": 7.615090097540643e-06, "loss": 0.0579, "step": 16535 }, { "epoch": 0.06901386118783955, "grad_norm": 1.9577917752584846, "learning_rate": 7.613938929736521e-06, "loss": 0.0547, "step": 16540 }, { "epoch": 0.06903472390282982, "grad_norm": 1.3585938224520082, "learning_rate": 7.612788283838286e-06, "loss": 0.0644, "step": 16545 }, { "epoch": 0.0690555866178201, "grad_norm": 0.7716598338761497, "learning_rate": 7.611638159451694e-06, "loss": 0.0545, "step": 16550 }, { "epoch": 0.06907644933281037, "grad_norm": 0.9644321335650089, "learning_rate": 7.610488556182923e-06, "loss": 0.0566, "step": 16555 }, { "epoch": 0.06909731204780065, "grad_norm": 1.6423764721394105, "learning_rate": 7.6093394736385625e-06, "loss": 0.0585, "step": 16560 }, { "epoch": 0.06911817476279093, "grad_norm": 2.841931015372157, "learning_rate": 7.608190911425617e-06, "loss": 0.0657, "step": 16565 }, { "epoch": 0.0691390374777812, "grad_norm": 1.2626280762374145, "learning_rate": 7.6070428691515085e-06, "loss": 0.0623, "step": 16570 }, { "epoch": 0.06915990019277149, "grad_norm": 1.8232726656873817, "learning_rate": 7.605895346424076e-06, "loss": 0.0608, "step": 16575 }, { "epoch": 0.06918076290776176, "grad_norm": 1.5290794696534231, "learning_rate": 7.604748342851567e-06, "loss": 0.0463, "step": 16580 }, { "epoch": 0.06920162562275205, "grad_norm": 1.8693280564766916, "learning_rate": 7.603601858042643e-06, "loss": 0.0578, "step": 16585 }, { "epoch": 0.06922248833774232, "grad_norm": 1.3829461312162725, "learning_rate": 7.602455891606384e-06, "loss": 0.0569, "step": 16590 }, { "epoch": 0.0692433510527326, "grad_norm": 1.203666102058566, "learning_rate": 7.601310443152276e-06, "loss": 0.0478, "step": 16595 }, { "epoch": 0.06926421376772288, "grad_norm": 1.5045070539130783, "learning_rate": 7.600165512290222e-06, "loss": 0.0695, "step": 16600 }, { "epoch": 0.06928507648271315, "grad_norm": 0.9817340441468506, "learning_rate": 7.599021098630531e-06, "loss": 0.048, "step": 16605 }, { "epoch": 0.06930593919770343, "grad_norm": 1.7916393614070907, "learning_rate": 7.597877201783925e-06, "loss": 0.0604, "step": 16610 }, { "epoch": 0.0693268019126937, "grad_norm": 1.1803441961103804, "learning_rate": 7.5967338213615385e-06, "loss": 0.0629, "step": 16615 }, { "epoch": 0.06934766462768399, "grad_norm": 1.0914099209101218, "learning_rate": 7.595590956974911e-06, "loss": 0.066, "step": 16620 }, { "epoch": 0.06936852734267426, "grad_norm": 0.9947258914928719, "learning_rate": 7.594448608235995e-06, "loss": 0.0563, "step": 16625 }, { "epoch": 0.06938939005766455, "grad_norm": 0.9845654100045963, "learning_rate": 7.5933067747571475e-06, "loss": 0.0577, "step": 16630 }, { "epoch": 0.06941025277265482, "grad_norm": 1.221654664927355, "learning_rate": 7.592165456151139e-06, "loss": 0.0584, "step": 16635 }, { "epoch": 0.0694311154876451, "grad_norm": 1.0666378514573516, "learning_rate": 7.5910246520311385e-06, "loss": 0.0637, "step": 16640 }, { "epoch": 0.06945197820263538, "grad_norm": 1.4190931422950306, "learning_rate": 7.589884362010731e-06, "loss": 0.0433, "step": 16645 }, { "epoch": 0.06947284091762565, "grad_norm": 1.1208297254533437, "learning_rate": 7.588744585703902e-06, "loss": 0.0627, "step": 16650 }, { "epoch": 0.06949370363261594, "grad_norm": 0.8637387134803863, "learning_rate": 7.5876053227250446e-06, "loss": 0.0482, "step": 16655 }, { "epoch": 0.06951456634760621, "grad_norm": 1.0848957000146087, "learning_rate": 7.5864665726889575e-06, "loss": 0.0565, "step": 16660 }, { "epoch": 0.0695354290625965, "grad_norm": 1.7423686401664884, "learning_rate": 7.585328335210842e-06, "loss": 0.0436, "step": 16665 }, { "epoch": 0.06955629177758677, "grad_norm": 1.1065235402301905, "learning_rate": 7.584190609906305e-06, "loss": 0.0416, "step": 16670 }, { "epoch": 0.06957715449257705, "grad_norm": 1.652364203689983, "learning_rate": 7.583053396391357e-06, "loss": 0.0624, "step": 16675 }, { "epoch": 0.06959801720756732, "grad_norm": 1.9154073016401445, "learning_rate": 7.581916694282412e-06, "loss": 0.0667, "step": 16680 }, { "epoch": 0.06961887992255761, "grad_norm": 1.5181172815104775, "learning_rate": 7.580780503196281e-06, "loss": 0.0557, "step": 16685 }, { "epoch": 0.06963974263754788, "grad_norm": 1.7457600849451458, "learning_rate": 7.579644822750185e-06, "loss": 0.0583, "step": 16690 }, { "epoch": 0.06966060535253815, "grad_norm": 0.9908765876201125, "learning_rate": 7.5785096525617415e-06, "loss": 0.0582, "step": 16695 }, { "epoch": 0.06968146806752844, "grad_norm": 0.8577952530505617, "learning_rate": 7.5773749922489705e-06, "loss": 0.0534, "step": 16700 }, { "epoch": 0.06970233078251871, "grad_norm": 1.270775104068555, "learning_rate": 7.576240841430289e-06, "loss": 0.0585, "step": 16705 }, { "epoch": 0.069723193497509, "grad_norm": 1.2208372071727578, "learning_rate": 7.575107199724518e-06, "loss": 0.0678, "step": 16710 }, { "epoch": 0.06974405621249927, "grad_norm": 0.8234584961016768, "learning_rate": 7.573974066750874e-06, "loss": 0.0668, "step": 16715 }, { "epoch": 0.06976491892748955, "grad_norm": 4.106772270197678, "learning_rate": 7.572841442128977e-06, "loss": 0.0683, "step": 16720 }, { "epoch": 0.06978578164247982, "grad_norm": 1.4113146960520355, "learning_rate": 7.571709325478838e-06, "loss": 0.0486, "step": 16725 }, { "epoch": 0.06980664435747011, "grad_norm": 1.0744403980568318, "learning_rate": 7.57057771642087e-06, "loss": 0.0518, "step": 16730 }, { "epoch": 0.06982750707246038, "grad_norm": 1.2109390740101975, "learning_rate": 7.569446614575885e-06, "loss": 0.0488, "step": 16735 }, { "epoch": 0.06984836978745065, "grad_norm": 1.1715932381963474, "learning_rate": 7.568316019565086e-06, "loss": 0.0545, "step": 16740 }, { "epoch": 0.06986923250244094, "grad_norm": 1.2170385591804762, "learning_rate": 7.5671859310100755e-06, "loss": 0.0498, "step": 16745 }, { "epoch": 0.06989009521743121, "grad_norm": 1.4287628518605442, "learning_rate": 7.566056348532853e-06, "loss": 0.0551, "step": 16750 }, { "epoch": 0.0699109579324215, "grad_norm": 0.7971433606755973, "learning_rate": 7.564927271755807e-06, "loss": 0.0562, "step": 16755 }, { "epoch": 0.06993182064741177, "grad_norm": 1.0337409156573618, "learning_rate": 7.563798700301727e-06, "loss": 0.0556, "step": 16760 }, { "epoch": 0.06995268336240205, "grad_norm": 1.5115049638368456, "learning_rate": 7.56267063379379e-06, "loss": 0.0514, "step": 16765 }, { "epoch": 0.06997354607739233, "grad_norm": 1.6737607215051082, "learning_rate": 7.5615430718555715e-06, "loss": 0.0543, "step": 16770 }, { "epoch": 0.06999440879238261, "grad_norm": 1.1029205262642663, "learning_rate": 7.560416014111037e-06, "loss": 0.0628, "step": 16775 }, { "epoch": 0.07001527150737288, "grad_norm": 0.7857754665078961, "learning_rate": 7.559289460184545e-06, "loss": 0.053, "step": 16780 }, { "epoch": 0.07003613422236316, "grad_norm": 2.007387992084983, "learning_rate": 7.558163409700846e-06, "loss": 0.0585, "step": 16785 }, { "epoch": 0.07005699693735344, "grad_norm": 1.7437699858331792, "learning_rate": 7.557037862285079e-06, "loss": 0.0607, "step": 16790 }, { "epoch": 0.07007785965234371, "grad_norm": 1.5315912142432397, "learning_rate": 7.555912817562779e-06, "loss": 0.0599, "step": 16795 }, { "epoch": 0.070098722367334, "grad_norm": 1.0673714105839558, "learning_rate": 7.554788275159868e-06, "loss": 0.0611, "step": 16800 }, { "epoch": 0.07011958508232427, "grad_norm": 1.0901317314722274, "learning_rate": 7.553664234702654e-06, "loss": 0.065, "step": 16805 }, { "epoch": 0.07014044779731456, "grad_norm": 1.4849584928123567, "learning_rate": 7.5525406958178425e-06, "loss": 0.0634, "step": 16810 }, { "epoch": 0.07016131051230483, "grad_norm": 1.160491817905636, "learning_rate": 7.55141765813252e-06, "loss": 0.0585, "step": 16815 }, { "epoch": 0.07018217322729511, "grad_norm": 2.448536614663826, "learning_rate": 7.5502951212741625e-06, "loss": 0.058, "step": 16820 }, { "epoch": 0.07020303594228539, "grad_norm": 0.9632126553229121, "learning_rate": 7.549173084870637e-06, "loss": 0.0613, "step": 16825 }, { "epoch": 0.07022389865727566, "grad_norm": 0.8870390022143327, "learning_rate": 7.548051548550193e-06, "loss": 0.0497, "step": 16830 }, { "epoch": 0.07024476137226594, "grad_norm": 1.5010380430174581, "learning_rate": 7.546930511941474e-06, "loss": 0.0664, "step": 16835 }, { "epoch": 0.07026562408725621, "grad_norm": 1.4708942789895543, "learning_rate": 7.545809974673498e-06, "loss": 0.0514, "step": 16840 }, { "epoch": 0.0702864868022465, "grad_norm": 1.7183171249913436, "learning_rate": 7.54468993637568e-06, "loss": 0.0643, "step": 16845 }, { "epoch": 0.07030734951723677, "grad_norm": 1.3046933473201292, "learning_rate": 7.543570396677809e-06, "loss": 0.0591, "step": 16850 }, { "epoch": 0.07032821223222706, "grad_norm": 0.9995013937102009, "learning_rate": 7.542451355210071e-06, "loss": 0.0534, "step": 16855 }, { "epoch": 0.07034907494721733, "grad_norm": 1.530099440629528, "learning_rate": 7.541332811603024e-06, "loss": 0.0543, "step": 16860 }, { "epoch": 0.07036993766220762, "grad_norm": 2.0922866734911088, "learning_rate": 7.540214765487614e-06, "loss": 0.0499, "step": 16865 }, { "epoch": 0.07039080037719789, "grad_norm": 1.6051046854595972, "learning_rate": 7.5390972164951725e-06, "loss": 0.0508, "step": 16870 }, { "epoch": 0.07041166309218816, "grad_norm": 1.1089735609967195, "learning_rate": 7.537980164257411e-06, "loss": 0.0455, "step": 16875 }, { "epoch": 0.07043252580717844, "grad_norm": 1.593286624597227, "learning_rate": 7.536863608406423e-06, "loss": 0.0516, "step": 16880 }, { "epoch": 0.07045338852216872, "grad_norm": 0.937100147683422, "learning_rate": 7.535747548574682e-06, "loss": 0.05, "step": 16885 }, { "epoch": 0.070474251237159, "grad_norm": 1.4801744449580572, "learning_rate": 7.534631984395046e-06, "loss": 0.0682, "step": 16890 }, { "epoch": 0.07049511395214927, "grad_norm": 1.7154990631471976, "learning_rate": 7.5335169155007485e-06, "loss": 0.0523, "step": 16895 }, { "epoch": 0.07051597666713956, "grad_norm": 0.8201122325319954, "learning_rate": 7.532402341525407e-06, "loss": 0.0681, "step": 16900 }, { "epoch": 0.07053683938212983, "grad_norm": 1.2426520220088877, "learning_rate": 7.531288262103017e-06, "loss": 0.0525, "step": 16905 }, { "epoch": 0.07055770209712012, "grad_norm": 1.4304915485731655, "learning_rate": 7.530174676867952e-06, "loss": 0.0575, "step": 16910 }, { "epoch": 0.07057856481211039, "grad_norm": 1.1830544602492743, "learning_rate": 7.529061585454965e-06, "loss": 0.0634, "step": 16915 }, { "epoch": 0.07059942752710066, "grad_norm": 1.2226227794793387, "learning_rate": 7.527948987499183e-06, "loss": 0.0629, "step": 16920 }, { "epoch": 0.07062029024209095, "grad_norm": 1.8520199493152023, "learning_rate": 7.5268368826361195e-06, "loss": 0.0825, "step": 16925 }, { "epoch": 0.07064115295708122, "grad_norm": 1.2052753360509332, "learning_rate": 7.525725270501653e-06, "loss": 0.0632, "step": 16930 }, { "epoch": 0.0706620156720715, "grad_norm": 1.4857388749410267, "learning_rate": 7.524614150732047e-06, "loss": 0.0424, "step": 16935 }, { "epoch": 0.07068287838706178, "grad_norm": 0.9291763199666885, "learning_rate": 7.523503522963939e-06, "loss": 0.0517, "step": 16940 }, { "epoch": 0.07070374110205206, "grad_norm": 1.4620398810237827, "learning_rate": 7.522393386834336e-06, "loss": 0.0654, "step": 16945 }, { "epoch": 0.07072460381704233, "grad_norm": 1.4091997268466172, "learning_rate": 7.521283741980629e-06, "loss": 0.0544, "step": 16950 }, { "epoch": 0.07074546653203262, "grad_norm": 1.2929366867035665, "learning_rate": 7.520174588040581e-06, "loss": 0.0551, "step": 16955 }, { "epoch": 0.07076632924702289, "grad_norm": 0.7124734822256681, "learning_rate": 7.51906592465232e-06, "loss": 0.0621, "step": 16960 }, { "epoch": 0.07078719196201316, "grad_norm": 1.0634841420810541, "learning_rate": 7.51795775145436e-06, "loss": 0.0733, "step": 16965 }, { "epoch": 0.07080805467700345, "grad_norm": 1.2127140495754458, "learning_rate": 7.516850068085578e-06, "loss": 0.0404, "step": 16970 }, { "epoch": 0.07082891739199372, "grad_norm": 1.0846022550007133, "learning_rate": 7.515742874185228e-06, "loss": 0.0677, "step": 16975 }, { "epoch": 0.070849780106984, "grad_norm": 1.0635673069636251, "learning_rate": 7.514636169392937e-06, "loss": 0.0635, "step": 16980 }, { "epoch": 0.07087064282197428, "grad_norm": 0.8254337689343829, "learning_rate": 7.5135299533487004e-06, "loss": 0.0612, "step": 16985 }, { "epoch": 0.07089150553696456, "grad_norm": 1.1791256712664442, "learning_rate": 7.512424225692885e-06, "loss": 0.0535, "step": 16990 }, { "epoch": 0.07091236825195484, "grad_norm": 1.0697381751047759, "learning_rate": 7.511318986066227e-06, "loss": 0.055, "step": 16995 }, { "epoch": 0.07093323096694512, "grad_norm": 0.9463842332894898, "learning_rate": 7.5102142341098384e-06, "loss": 0.0729, "step": 17000 }, { "epoch": 0.07095409368193539, "grad_norm": 1.7554967707339795, "learning_rate": 7.509109969465193e-06, "loss": 0.0594, "step": 17005 }, { "epoch": 0.07097495639692566, "grad_norm": 1.1857574769838164, "learning_rate": 7.508006191774134e-06, "loss": 0.0585, "step": 17010 }, { "epoch": 0.07099581911191595, "grad_norm": 1.1719238517752182, "learning_rate": 7.506902900678882e-06, "loss": 0.0419, "step": 17015 }, { "epoch": 0.07101668182690622, "grad_norm": 1.2139648913644103, "learning_rate": 7.5058000958220135e-06, "loss": 0.0636, "step": 17020 }, { "epoch": 0.07103754454189651, "grad_norm": 1.6124439684727878, "learning_rate": 7.504697776846481e-06, "loss": 0.0585, "step": 17025 }, { "epoch": 0.07105840725688678, "grad_norm": 1.315465555408158, "learning_rate": 7.503595943395601e-06, "loss": 0.0549, "step": 17030 }, { "epoch": 0.07107926997187707, "grad_norm": 0.8988498284350397, "learning_rate": 7.502494595113056e-06, "loss": 0.0451, "step": 17035 }, { "epoch": 0.07110013268686734, "grad_norm": 8.276930293001751, "learning_rate": 7.501393731642894e-06, "loss": 0.0671, "step": 17040 }, { "epoch": 0.07112099540185762, "grad_norm": 0.9792041127543627, "learning_rate": 7.50029335262953e-06, "loss": 0.0568, "step": 17045 }, { "epoch": 0.0711418581168479, "grad_norm": 1.0950760059689424, "learning_rate": 7.499193457717745e-06, "loss": 0.0579, "step": 17050 }, { "epoch": 0.07116272083183817, "grad_norm": 2.3614316046712887, "learning_rate": 7.498094046552682e-06, "loss": 0.0635, "step": 17055 }, { "epoch": 0.07118358354682845, "grad_norm": 1.143427445907133, "learning_rate": 7.496995118779849e-06, "loss": 0.0457, "step": 17060 }, { "epoch": 0.07120444626181872, "grad_norm": 1.2921289841118713, "learning_rate": 7.4958966740451165e-06, "loss": 0.039, "step": 17065 }, { "epoch": 0.07122530897680901, "grad_norm": 1.2976965586041282, "learning_rate": 7.494798711994721e-06, "loss": 0.0694, "step": 17070 }, { "epoch": 0.07124617169179928, "grad_norm": 1.0167802318488797, "learning_rate": 7.493701232275259e-06, "loss": 0.0653, "step": 17075 }, { "epoch": 0.07126703440678957, "grad_norm": 1.5015198286795177, "learning_rate": 7.49260423453369e-06, "loss": 0.0575, "step": 17080 }, { "epoch": 0.07128789712177984, "grad_norm": 1.2802658098813082, "learning_rate": 7.491507718417338e-06, "loss": 0.055, "step": 17085 }, { "epoch": 0.07130875983677012, "grad_norm": 1.4535500926315148, "learning_rate": 7.4904116835738805e-06, "loss": 0.0484, "step": 17090 }, { "epoch": 0.0713296225517604, "grad_norm": 1.368460153104637, "learning_rate": 7.489316129651364e-06, "loss": 0.0716, "step": 17095 }, { "epoch": 0.07135048526675067, "grad_norm": 1.3499043973581877, "learning_rate": 7.488221056298193e-06, "loss": 0.0584, "step": 17100 }, { "epoch": 0.07137134798174095, "grad_norm": 1.1884749521785876, "learning_rate": 7.4871264631631265e-06, "loss": 0.0565, "step": 17105 }, { "epoch": 0.07139221069673123, "grad_norm": 1.6406074936144626, "learning_rate": 7.486032349895288e-06, "loss": 0.0889, "step": 17110 }, { "epoch": 0.07141307341172151, "grad_norm": 1.2677715644774588, "learning_rate": 7.484938716144161e-06, "loss": 0.0613, "step": 17115 }, { "epoch": 0.07143393612671178, "grad_norm": 0.9613926858767518, "learning_rate": 7.483845561559584e-06, "loss": 0.0665, "step": 17120 }, { "epoch": 0.07145479884170207, "grad_norm": 0.9266759794872326, "learning_rate": 7.482752885791753e-06, "loss": 0.0526, "step": 17125 }, { "epoch": 0.07147566155669234, "grad_norm": 1.6580625512393183, "learning_rate": 7.4816606884912245e-06, "loss": 0.0597, "step": 17130 }, { "epoch": 0.07149652427168263, "grad_norm": 1.1840034527669632, "learning_rate": 7.48056896930891e-06, "loss": 0.059, "step": 17135 }, { "epoch": 0.0715173869866729, "grad_norm": 1.4111362055839431, "learning_rate": 7.479477727896078e-06, "loss": 0.0533, "step": 17140 }, { "epoch": 0.07153824970166317, "grad_norm": 1.9797430819913877, "learning_rate": 7.478386963904351e-06, "loss": 0.0499, "step": 17145 }, { "epoch": 0.07155911241665346, "grad_norm": 1.3346864206205553, "learning_rate": 7.4772966769857115e-06, "loss": 0.0508, "step": 17150 }, { "epoch": 0.07157997513164373, "grad_norm": 1.2080859067455723, "learning_rate": 7.4762068667924915e-06, "loss": 0.0602, "step": 17155 }, { "epoch": 0.07160083784663401, "grad_norm": 1.7941680843191865, "learning_rate": 7.475117532977383e-06, "loss": 0.052, "step": 17160 }, { "epoch": 0.07162170056162429, "grad_norm": 1.1790386626690494, "learning_rate": 7.4740286751934275e-06, "loss": 0.0657, "step": 17165 }, { "epoch": 0.07164256327661457, "grad_norm": 0.9043503410490226, "learning_rate": 7.472940293094022e-06, "loss": 0.0546, "step": 17170 }, { "epoch": 0.07166342599160484, "grad_norm": 6.6725598806679605, "learning_rate": 7.47185238633292e-06, "loss": 0.0702, "step": 17175 }, { "epoch": 0.07168428870659511, "grad_norm": 1.1430909675769318, "learning_rate": 7.4707649545642225e-06, "loss": 0.0514, "step": 17180 }, { "epoch": 0.0717051514215854, "grad_norm": 1.051477759385868, "learning_rate": 7.469677997442385e-06, "loss": 0.0679, "step": 17185 }, { "epoch": 0.07172601413657567, "grad_norm": 0.8397065001331795, "learning_rate": 7.468591514622215e-06, "loss": 0.0614, "step": 17190 }, { "epoch": 0.07174687685156596, "grad_norm": 1.096009654966484, "learning_rate": 7.4675055057588696e-06, "loss": 0.0597, "step": 17195 }, { "epoch": 0.07176773956655623, "grad_norm": 1.3617333869210697, "learning_rate": 7.466419970507861e-06, "loss": 0.0607, "step": 17200 }, { "epoch": 0.07178860228154652, "grad_norm": 1.1702413700150094, "learning_rate": 7.465334908525048e-06, "loss": 0.0518, "step": 17205 }, { "epoch": 0.07180946499653679, "grad_norm": 1.0312879425048793, "learning_rate": 7.464250319466643e-06, "loss": 0.0522, "step": 17210 }, { "epoch": 0.07183032771152707, "grad_norm": 1.1033964178478821, "learning_rate": 7.463166202989201e-06, "loss": 0.0594, "step": 17215 }, { "epoch": 0.07185119042651734, "grad_norm": 0.9117006420629038, "learning_rate": 7.462082558749632e-06, "loss": 0.0622, "step": 17220 }, { "epoch": 0.07187205314150762, "grad_norm": 1.1246420807747504, "learning_rate": 7.4609993864051955e-06, "loss": 0.0577, "step": 17225 }, { "epoch": 0.0718929158564979, "grad_norm": 0.9778081378727296, "learning_rate": 7.459916685613496e-06, "loss": 0.048, "step": 17230 }, { "epoch": 0.07191377857148817, "grad_norm": 1.3958411244866955, "learning_rate": 7.458834456032483e-06, "loss": 0.0639, "step": 17235 }, { "epoch": 0.07193464128647846, "grad_norm": 1.3661143595684238, "learning_rate": 7.457752697320462e-06, "loss": 0.0599, "step": 17240 }, { "epoch": 0.07195550400146873, "grad_norm": 1.935265912798495, "learning_rate": 7.4566714091360784e-06, "loss": 0.0568, "step": 17245 }, { "epoch": 0.07197636671645902, "grad_norm": 1.5263588969924375, "learning_rate": 7.455590591138324e-06, "loss": 0.0538, "step": 17250 }, { "epoch": 0.07199722943144929, "grad_norm": 1.3021985200015265, "learning_rate": 7.45451024298654e-06, "loss": 0.0634, "step": 17255 }, { "epoch": 0.07201809214643957, "grad_norm": 2.4292896878464596, "learning_rate": 7.453430364340411e-06, "loss": 0.0571, "step": 17260 }, { "epoch": 0.07203895486142985, "grad_norm": 1.6566629124142467, "learning_rate": 7.452350954859965e-06, "loss": 0.0689, "step": 17265 }, { "epoch": 0.07205981757642012, "grad_norm": 0.8840038871146199, "learning_rate": 7.451272014205579e-06, "loss": 0.0599, "step": 17270 }, { "epoch": 0.0720806802914104, "grad_norm": 0.9638085787446132, "learning_rate": 7.450193542037969e-06, "loss": 0.0454, "step": 17275 }, { "epoch": 0.07210154300640068, "grad_norm": 1.0148153246203915, "learning_rate": 7.4491155380181974e-06, "loss": 0.0555, "step": 17280 }, { "epoch": 0.07212240572139096, "grad_norm": 6.633549221062402, "learning_rate": 7.448038001807671e-06, "loss": 0.0544, "step": 17285 }, { "epoch": 0.07214326843638123, "grad_norm": 1.3736581856444074, "learning_rate": 7.446960933068138e-06, "loss": 0.0554, "step": 17290 }, { "epoch": 0.07216413115137152, "grad_norm": 1.4026942142831254, "learning_rate": 7.445884331461688e-06, "loss": 0.0506, "step": 17295 }, { "epoch": 0.07218499386636179, "grad_norm": 0.9059625877406392, "learning_rate": 7.4448081966507525e-06, "loss": 0.0532, "step": 17300 }, { "epoch": 0.07220585658135208, "grad_norm": 1.474591461466042, "learning_rate": 7.443732528298103e-06, "loss": 0.0589, "step": 17305 }, { "epoch": 0.07222671929634235, "grad_norm": 1.176761732389784, "learning_rate": 7.44265732606686e-06, "loss": 0.0698, "step": 17310 }, { "epoch": 0.07224758201133262, "grad_norm": 0.8436617251623392, "learning_rate": 7.441582589620474e-06, "loss": 0.0676, "step": 17315 }, { "epoch": 0.0722684447263229, "grad_norm": 1.492968836480043, "learning_rate": 7.440508318622741e-06, "loss": 0.0777, "step": 17320 }, { "epoch": 0.07228930744131318, "grad_norm": 1.1905738984147631, "learning_rate": 7.439434512737796e-06, "loss": 0.0721, "step": 17325 }, { "epoch": 0.07231017015630346, "grad_norm": 1.2799307126527908, "learning_rate": 7.438361171630114e-06, "loss": 0.0479, "step": 17330 }, { "epoch": 0.07233103287129374, "grad_norm": 1.2149205394735225, "learning_rate": 7.437288294964509e-06, "loss": 0.0597, "step": 17335 }, { "epoch": 0.07235189558628402, "grad_norm": 1.55720792974232, "learning_rate": 7.436215882406127e-06, "loss": 0.0509, "step": 17340 }, { "epoch": 0.07237275830127429, "grad_norm": 1.4927180232218553, "learning_rate": 7.435143933620461e-06, "loss": 0.0568, "step": 17345 }, { "epoch": 0.07239362101626458, "grad_norm": 0.8327391394948134, "learning_rate": 7.434072448273337e-06, "loss": 0.0557, "step": 17350 }, { "epoch": 0.07241448373125485, "grad_norm": 4.423261456488053, "learning_rate": 7.433001426030918e-06, "loss": 0.0551, "step": 17355 }, { "epoch": 0.07243534644624512, "grad_norm": 1.3452072919557851, "learning_rate": 7.431930866559705e-06, "loss": 0.058, "step": 17360 }, { "epoch": 0.07245620916123541, "grad_norm": 1.2866158485486754, "learning_rate": 7.430860769526533e-06, "loss": 0.0524, "step": 17365 }, { "epoch": 0.07247707187622568, "grad_norm": 1.0544414541269191, "learning_rate": 7.429791134598575e-06, "loss": 0.0491, "step": 17370 }, { "epoch": 0.07249793459121597, "grad_norm": 2.546537338968334, "learning_rate": 7.428721961443337e-06, "loss": 0.0614, "step": 17375 }, { "epoch": 0.07251879730620624, "grad_norm": 1.139180314888493, "learning_rate": 7.427653249728661e-06, "loss": 0.0593, "step": 17380 }, { "epoch": 0.07253966002119652, "grad_norm": 0.797129981257584, "learning_rate": 7.4265849991227246e-06, "loss": 0.0629, "step": 17385 }, { "epoch": 0.0725605227361868, "grad_norm": 1.1868460135141536, "learning_rate": 7.425517209294036e-06, "loss": 0.055, "step": 17390 }, { "epoch": 0.07258138545117708, "grad_norm": 1.6955900479057129, "learning_rate": 7.42444987991144e-06, "loss": 0.0551, "step": 17395 }, { "epoch": 0.07260224816616735, "grad_norm": 0.9336746531520532, "learning_rate": 7.423383010644116e-06, "loss": 0.0568, "step": 17400 }, { "epoch": 0.07262311088115762, "grad_norm": 0.9931509039917877, "learning_rate": 7.422316601161571e-06, "loss": 0.0508, "step": 17405 }, { "epoch": 0.07264397359614791, "grad_norm": 1.3805910572409101, "learning_rate": 7.421250651133648e-06, "loss": 0.0515, "step": 17410 }, { "epoch": 0.07266483631113818, "grad_norm": 0.8609898100024328, "learning_rate": 7.420185160230518e-06, "loss": 0.0416, "step": 17415 }, { "epoch": 0.07268569902612847, "grad_norm": 0.8429763008393574, "learning_rate": 7.419120128122691e-06, "loss": 0.0509, "step": 17420 }, { "epoch": 0.07270656174111874, "grad_norm": 0.9767815241185254, "learning_rate": 7.418055554480998e-06, "loss": 0.0467, "step": 17425 }, { "epoch": 0.07272742445610902, "grad_norm": 1.2070864869823372, "learning_rate": 7.4169914389766095e-06, "loss": 0.0507, "step": 17430 }, { "epoch": 0.0727482871710993, "grad_norm": 1.5386727813935657, "learning_rate": 7.4159277812810204e-06, "loss": 0.0589, "step": 17435 }, { "epoch": 0.07276914988608958, "grad_norm": 1.1105200771379784, "learning_rate": 7.414864581066057e-06, "loss": 0.0513, "step": 17440 }, { "epoch": 0.07279001260107985, "grad_norm": 1.3225743295106895, "learning_rate": 7.413801838003875e-06, "loss": 0.0491, "step": 17445 }, { "epoch": 0.07281087531607013, "grad_norm": 0.9583004114484027, "learning_rate": 7.41273955176696e-06, "loss": 0.066, "step": 17450 }, { "epoch": 0.07283173803106041, "grad_norm": 1.8150511810875118, "learning_rate": 7.411677722028123e-06, "loss": 0.0805, "step": 17455 }, { "epoch": 0.07285260074605068, "grad_norm": 1.6624918712989638, "learning_rate": 7.410616348460505e-06, "loss": 0.0621, "step": 17460 }, { "epoch": 0.07287346346104097, "grad_norm": 1.1241239902638263, "learning_rate": 7.4095554307375765e-06, "loss": 0.056, "step": 17465 }, { "epoch": 0.07289432617603124, "grad_norm": 0.9912923479007246, "learning_rate": 7.40849496853313e-06, "loss": 0.0462, "step": 17470 }, { "epoch": 0.07291518889102153, "grad_norm": 1.7932981822492327, "learning_rate": 7.407434961521289e-06, "loss": 0.0703, "step": 17475 }, { "epoch": 0.0729360516060118, "grad_norm": 0.5973848572851616, "learning_rate": 7.406375409376504e-06, "loss": 0.0434, "step": 17480 }, { "epoch": 0.07295691432100208, "grad_norm": 1.0471062559629156, "learning_rate": 7.405316311773547e-06, "loss": 0.0557, "step": 17485 }, { "epoch": 0.07297777703599236, "grad_norm": 2.0347103926405543, "learning_rate": 7.404257668387518e-06, "loss": 0.059, "step": 17490 }, { "epoch": 0.07299863975098263, "grad_norm": 1.4478883082194247, "learning_rate": 7.403199478893842e-06, "loss": 0.0469, "step": 17495 }, { "epoch": 0.07301950246597291, "grad_norm": 1.3158444335743342, "learning_rate": 7.402141742968269e-06, "loss": 0.0606, "step": 17500 }, { "epoch": 0.07304036518096318, "grad_norm": 1.44499422465356, "learning_rate": 7.401084460286872e-06, "loss": 0.0562, "step": 17505 }, { "epoch": 0.07306122789595347, "grad_norm": 1.2785829411900596, "learning_rate": 7.40002763052605e-06, "loss": 0.0715, "step": 17510 }, { "epoch": 0.07308209061094374, "grad_norm": 1.3780878469516984, "learning_rate": 7.398971253362522e-06, "loss": 0.054, "step": 17515 }, { "epoch": 0.07310295332593403, "grad_norm": 0.7463599645929394, "learning_rate": 7.397915328473333e-06, "loss": 0.0523, "step": 17520 }, { "epoch": 0.0731238160409243, "grad_norm": 1.526712262424404, "learning_rate": 7.396859855535848e-06, "loss": 0.0418, "step": 17525 }, { "epoch": 0.07314467875591459, "grad_norm": 1.404642738978061, "learning_rate": 7.395804834227757e-06, "loss": 0.0696, "step": 17530 }, { "epoch": 0.07316554147090486, "grad_norm": 1.6631179343058637, "learning_rate": 7.39475026422707e-06, "loss": 0.0675, "step": 17535 }, { "epoch": 0.07318640418589513, "grad_norm": 1.0114741340754665, "learning_rate": 7.393696145212116e-06, "loss": 0.0608, "step": 17540 }, { "epoch": 0.07320726690088541, "grad_norm": 1.2448472248371911, "learning_rate": 7.392642476861552e-06, "loss": 0.0498, "step": 17545 }, { "epoch": 0.07322812961587569, "grad_norm": 2.5742421401323097, "learning_rate": 7.391589258854346e-06, "loss": 0.0682, "step": 17550 }, { "epoch": 0.07324899233086597, "grad_norm": 2.393158411933289, "learning_rate": 7.390536490869792e-06, "loss": 0.0765, "step": 17555 }, { "epoch": 0.07326985504585624, "grad_norm": 0.9263838127848996, "learning_rate": 7.389484172587505e-06, "loss": 0.0405, "step": 17560 }, { "epoch": 0.07329071776084653, "grad_norm": 0.9297988691231206, "learning_rate": 7.388432303687415e-06, "loss": 0.0622, "step": 17565 }, { "epoch": 0.0733115804758368, "grad_norm": 1.2723603247350448, "learning_rate": 7.387380883849771e-06, "loss": 0.0782, "step": 17570 }, { "epoch": 0.07333244319082709, "grad_norm": 1.0538109778177032, "learning_rate": 7.386329912755148e-06, "loss": 0.0568, "step": 17575 }, { "epoch": 0.07335330590581736, "grad_norm": 1.3811543926015564, "learning_rate": 7.3852793900844266e-06, "loss": 0.0572, "step": 17580 }, { "epoch": 0.07337416862080763, "grad_norm": 0.8803106196548637, "learning_rate": 7.384229315518813e-06, "loss": 0.0672, "step": 17585 }, { "epoch": 0.07339503133579792, "grad_norm": 1.6078290785717182, "learning_rate": 7.38317968873983e-06, "loss": 0.0563, "step": 17590 }, { "epoch": 0.07341589405078819, "grad_norm": 1.2526468403417819, "learning_rate": 7.382130509429316e-06, "loss": 0.0567, "step": 17595 }, { "epoch": 0.07343675676577847, "grad_norm": 1.4118326119009048, "learning_rate": 7.381081777269427e-06, "loss": 0.0582, "step": 17600 }, { "epoch": 0.07345761948076875, "grad_norm": 1.4428673267909875, "learning_rate": 7.3800334919426334e-06, "loss": 0.0581, "step": 17605 }, { "epoch": 0.07347848219575903, "grad_norm": 0.9856420812612504, "learning_rate": 7.378985653131721e-06, "loss": 0.0638, "step": 17610 }, { "epoch": 0.0734993449107493, "grad_norm": 1.4981405311558529, "learning_rate": 7.37793826051979e-06, "loss": 0.0637, "step": 17615 }, { "epoch": 0.07352020762573959, "grad_norm": 1.087021326827189, "learning_rate": 7.376891313790261e-06, "loss": 0.056, "step": 17620 }, { "epoch": 0.07354107034072986, "grad_norm": 0.9433854752726408, "learning_rate": 7.375844812626865e-06, "loss": 0.0569, "step": 17625 }, { "epoch": 0.07356193305572013, "grad_norm": 1.1478739780524807, "learning_rate": 7.374798756713642e-06, "loss": 0.0682, "step": 17630 }, { "epoch": 0.07358279577071042, "grad_norm": 1.0624616310870585, "learning_rate": 7.3737531457349554e-06, "loss": 0.0411, "step": 17635 }, { "epoch": 0.07360365848570069, "grad_norm": 3.085940350119645, "learning_rate": 7.3727079793754744e-06, "loss": 0.0684, "step": 17640 }, { "epoch": 0.07362452120069098, "grad_norm": 0.886476507879088, "learning_rate": 7.3716632573201845e-06, "loss": 0.0702, "step": 17645 }, { "epoch": 0.07364538391568125, "grad_norm": 1.2932276633535216, "learning_rate": 7.3706189792543815e-06, "loss": 0.0612, "step": 17650 }, { "epoch": 0.07366624663067153, "grad_norm": 0.8172063862395055, "learning_rate": 7.3695751448636765e-06, "loss": 0.0444, "step": 17655 }, { "epoch": 0.0736871093456618, "grad_norm": 1.2842074860745702, "learning_rate": 7.368531753833986e-06, "loss": 0.0527, "step": 17660 }, { "epoch": 0.07370797206065209, "grad_norm": 1.9696067928912893, "learning_rate": 7.367488805851546e-06, "loss": 0.072, "step": 17665 }, { "epoch": 0.07372883477564236, "grad_norm": 1.6432149487493695, "learning_rate": 7.366446300602896e-06, "loss": 0.0564, "step": 17670 }, { "epoch": 0.07374969749063263, "grad_norm": 0.6383931275529573, "learning_rate": 7.36540423777489e-06, "loss": 0.0535, "step": 17675 }, { "epoch": 0.07377056020562292, "grad_norm": 1.2859659762448996, "learning_rate": 7.36436261705469e-06, "loss": 0.0672, "step": 17680 }, { "epoch": 0.07379142292061319, "grad_norm": 1.0990682742389393, "learning_rate": 7.363321438129769e-06, "loss": 0.0698, "step": 17685 }, { "epoch": 0.07381228563560348, "grad_norm": 1.0263124535493848, "learning_rate": 7.362280700687908e-06, "loss": 0.0546, "step": 17690 }, { "epoch": 0.07383314835059375, "grad_norm": 1.783380966438647, "learning_rate": 7.361240404417199e-06, "loss": 0.0553, "step": 17695 }, { "epoch": 0.07385401106558404, "grad_norm": 1.0497221479228334, "learning_rate": 7.360200549006039e-06, "loss": 0.0698, "step": 17700 }, { "epoch": 0.07387487378057431, "grad_norm": 1.2209620968095514, "learning_rate": 7.3591611341431355e-06, "loss": 0.0465, "step": 17705 }, { "epoch": 0.07389573649556459, "grad_norm": 1.6053080963041084, "learning_rate": 7.358122159517504e-06, "loss": 0.0667, "step": 17710 }, { "epoch": 0.07391659921055486, "grad_norm": 1.6364650512588135, "learning_rate": 7.357083624818464e-06, "loss": 0.0548, "step": 17715 }, { "epoch": 0.07393746192554514, "grad_norm": 0.7978491835687388, "learning_rate": 7.3560455297356446e-06, "loss": 0.0597, "step": 17720 }, { "epoch": 0.07395832464053542, "grad_norm": 0.9088678550372277, "learning_rate": 7.3550078739589836e-06, "loss": 0.0605, "step": 17725 }, { "epoch": 0.0739791873555257, "grad_norm": 0.7819964785508081, "learning_rate": 7.3539706571787206e-06, "loss": 0.0722, "step": 17730 }, { "epoch": 0.07400005007051598, "grad_norm": 1.687559391747649, "learning_rate": 7.352933879085401e-06, "loss": 0.0536, "step": 17735 }, { "epoch": 0.07402091278550625, "grad_norm": 2.2380101751371435, "learning_rate": 7.351897539369878e-06, "loss": 0.0684, "step": 17740 }, { "epoch": 0.07404177550049654, "grad_norm": 1.9216972692783076, "learning_rate": 7.35086163772331e-06, "loss": 0.0642, "step": 17745 }, { "epoch": 0.07406263821548681, "grad_norm": 1.0057751999832205, "learning_rate": 7.349826173837156e-06, "loss": 0.0493, "step": 17750 }, { "epoch": 0.0740835009304771, "grad_norm": 2.3742988737199835, "learning_rate": 7.3487911474031835e-06, "loss": 0.0706, "step": 17755 }, { "epoch": 0.07410436364546737, "grad_norm": 1.2713543145835222, "learning_rate": 7.347756558113464e-06, "loss": 0.081, "step": 17760 }, { "epoch": 0.07412522636045764, "grad_norm": 1.6823442508441342, "learning_rate": 7.346722405660366e-06, "loss": 0.068, "step": 17765 }, { "epoch": 0.07414608907544792, "grad_norm": 1.3355484511041178, "learning_rate": 7.345688689736568e-06, "loss": 0.0636, "step": 17770 }, { "epoch": 0.0741669517904382, "grad_norm": 0.7775669096316069, "learning_rate": 7.344655410035048e-06, "loss": 0.0401, "step": 17775 }, { "epoch": 0.07418781450542848, "grad_norm": 0.7766132852241305, "learning_rate": 7.343622566249086e-06, "loss": 0.0618, "step": 17780 }, { "epoch": 0.07420867722041875, "grad_norm": 1.5151678240704702, "learning_rate": 7.342590158072266e-06, "loss": 0.0602, "step": 17785 }, { "epoch": 0.07422953993540904, "grad_norm": 0.7995227585905771, "learning_rate": 7.341558185198472e-06, "loss": 0.0506, "step": 17790 }, { "epoch": 0.07425040265039931, "grad_norm": 1.2440603823771608, "learning_rate": 7.340526647321889e-06, "loss": 0.0431, "step": 17795 }, { "epoch": 0.0742712653653896, "grad_norm": 5.207043868862444, "learning_rate": 7.3394955441370005e-06, "loss": 0.071, "step": 17800 }, { "epoch": 0.07429212808037987, "grad_norm": 1.2979052308706334, "learning_rate": 7.338464875338595e-06, "loss": 0.0615, "step": 17805 }, { "epoch": 0.07431299079537014, "grad_norm": 1.2133527776985746, "learning_rate": 7.337434640621759e-06, "loss": 0.0554, "step": 17810 }, { "epoch": 0.07433385351036043, "grad_norm": 1.782633110720586, "learning_rate": 7.336404839681875e-06, "loss": 0.0556, "step": 17815 }, { "epoch": 0.0743547162253507, "grad_norm": 0.8562570920863338, "learning_rate": 7.335375472214632e-06, "loss": 0.0487, "step": 17820 }, { "epoch": 0.07437557894034098, "grad_norm": 1.197153298075527, "learning_rate": 7.334346537916012e-06, "loss": 0.0653, "step": 17825 }, { "epoch": 0.07439644165533126, "grad_norm": 1.8001456337172732, "learning_rate": 7.333318036482294e-06, "loss": 0.0605, "step": 17830 }, { "epoch": 0.07441730437032154, "grad_norm": 0.8799471517105749, "learning_rate": 7.332289967610063e-06, "loss": 0.0542, "step": 17835 }, { "epoch": 0.07443816708531181, "grad_norm": 1.1448613851316007, "learning_rate": 7.331262330996194e-06, "loss": 0.0641, "step": 17840 }, { "epoch": 0.0744590298003021, "grad_norm": 1.3877406657346907, "learning_rate": 7.330235126337862e-06, "loss": 0.0548, "step": 17845 }, { "epoch": 0.07447989251529237, "grad_norm": 0.7716549300245794, "learning_rate": 7.329208353332541e-06, "loss": 0.0509, "step": 17850 }, { "epoch": 0.07450075523028264, "grad_norm": 1.3866358640307301, "learning_rate": 7.328182011677997e-06, "loss": 0.0571, "step": 17855 }, { "epoch": 0.07452161794527293, "grad_norm": 1.4163839240856761, "learning_rate": 7.327156101072293e-06, "loss": 0.0606, "step": 17860 }, { "epoch": 0.0745424806602632, "grad_norm": 1.4582783380132442, "learning_rate": 7.326130621213795e-06, "loss": 0.0521, "step": 17865 }, { "epoch": 0.07456334337525349, "grad_norm": 0.5489789362300338, "learning_rate": 7.325105571801158e-06, "loss": 0.0409, "step": 17870 }, { "epoch": 0.07458420609024376, "grad_norm": 0.9705891579160112, "learning_rate": 7.324080952533328e-06, "loss": 0.0567, "step": 17875 }, { "epoch": 0.07460506880523404, "grad_norm": 1.1180521837866202, "learning_rate": 7.3230567631095554e-06, "loss": 0.0624, "step": 17880 }, { "epoch": 0.07462593152022431, "grad_norm": 1.6034490838707296, "learning_rate": 7.32203300322938e-06, "loss": 0.058, "step": 17885 }, { "epoch": 0.0746467942352146, "grad_norm": 1.4575830538466106, "learning_rate": 7.321009672592632e-06, "loss": 0.0614, "step": 17890 }, { "epoch": 0.07466765695020487, "grad_norm": 0.8729159886080852, "learning_rate": 7.319986770899443e-06, "loss": 0.058, "step": 17895 }, { "epoch": 0.07468851966519514, "grad_norm": 1.1337847098050673, "learning_rate": 7.3189642978502315e-06, "loss": 0.0606, "step": 17900 }, { "epoch": 0.07470938238018543, "grad_norm": 1.609462529776499, "learning_rate": 7.317942253145712e-06, "loss": 0.06, "step": 17905 }, { "epoch": 0.0747302450951757, "grad_norm": 1.0023109973547428, "learning_rate": 7.316920636486891e-06, "loss": 0.0508, "step": 17910 }, { "epoch": 0.07475110781016599, "grad_norm": 1.2787611506356544, "learning_rate": 7.315899447575066e-06, "loss": 0.0525, "step": 17915 }, { "epoch": 0.07477197052515626, "grad_norm": 1.417024175552577, "learning_rate": 7.314878686111826e-06, "loss": 0.0599, "step": 17920 }, { "epoch": 0.07479283324014654, "grad_norm": 1.6096299038149884, "learning_rate": 7.313858351799053e-06, "loss": 0.0526, "step": 17925 }, { "epoch": 0.07481369595513682, "grad_norm": 1.0910419650025511, "learning_rate": 7.312838444338921e-06, "loss": 0.0504, "step": 17930 }, { "epoch": 0.0748345586701271, "grad_norm": 1.0600238914430156, "learning_rate": 7.311818963433891e-06, "loss": 0.0395, "step": 17935 }, { "epoch": 0.07485542138511737, "grad_norm": 0.785092652167734, "learning_rate": 7.310799908786715e-06, "loss": 0.0462, "step": 17940 }, { "epoch": 0.07487628410010765, "grad_norm": 1.7523076330958942, "learning_rate": 7.309781280100441e-06, "loss": 0.044, "step": 17945 }, { "epoch": 0.07489714681509793, "grad_norm": 1.14660541323147, "learning_rate": 7.308763077078395e-06, "loss": 0.0579, "step": 17950 }, { "epoch": 0.0749180095300882, "grad_norm": 1.1273146238634308, "learning_rate": 7.307745299424205e-06, "loss": 0.0709, "step": 17955 }, { "epoch": 0.07493887224507849, "grad_norm": 2.53954002157922, "learning_rate": 7.306727946841777e-06, "loss": 0.0593, "step": 17960 }, { "epoch": 0.07495973496006876, "grad_norm": 1.4486984645163006, "learning_rate": 7.305711019035312e-06, "loss": 0.0618, "step": 17965 }, { "epoch": 0.07498059767505905, "grad_norm": 1.2914878941715155, "learning_rate": 7.304694515709297e-06, "loss": 0.0687, "step": 17970 }, { "epoch": 0.07500146039004932, "grad_norm": 1.0855940259339834, "learning_rate": 7.30367843656851e-06, "loss": 0.0392, "step": 17975 }, { "epoch": 0.0750223231050396, "grad_norm": 1.6220456164266532, "learning_rate": 7.302662781318008e-06, "loss": 0.0499, "step": 17980 }, { "epoch": 0.07504318582002988, "grad_norm": 1.3684238161920648, "learning_rate": 7.301647549663143e-06, "loss": 0.0622, "step": 17985 }, { "epoch": 0.07506404853502015, "grad_norm": 1.1835361678399872, "learning_rate": 7.3006327413095525e-06, "loss": 0.072, "step": 17990 }, { "epoch": 0.07508491125001043, "grad_norm": 1.0277593963271339, "learning_rate": 7.299618355963156e-06, "loss": 0.0539, "step": 17995 }, { "epoch": 0.0751057739650007, "grad_norm": 1.1718157622669938, "learning_rate": 7.298604393330163e-06, "loss": 0.055, "step": 18000 }, { "epoch": 0.07512663667999099, "grad_norm": 0.9189278325482962, "learning_rate": 7.297590853117069e-06, "loss": 0.0397, "step": 18005 }, { "epoch": 0.07514749939498126, "grad_norm": 1.79796028687204, "learning_rate": 7.296577735030649e-06, "loss": 0.0561, "step": 18010 }, { "epoch": 0.07516836210997155, "grad_norm": 1.2947427184990086, "learning_rate": 7.295565038777971e-06, "loss": 0.0838, "step": 18015 }, { "epoch": 0.07518922482496182, "grad_norm": 2.147341442361266, "learning_rate": 7.294552764066382e-06, "loss": 0.053, "step": 18020 }, { "epoch": 0.0752100875399521, "grad_norm": 1.4142112911632392, "learning_rate": 7.293540910603514e-06, "loss": 0.0533, "step": 18025 }, { "epoch": 0.07523095025494238, "grad_norm": 0.9424495817778267, "learning_rate": 7.292529478097286e-06, "loss": 0.0503, "step": 18030 }, { "epoch": 0.07525181296993265, "grad_norm": 1.0500286992028105, "learning_rate": 7.291518466255895e-06, "loss": 0.0408, "step": 18035 }, { "epoch": 0.07527267568492294, "grad_norm": 1.2101690911631424, "learning_rate": 7.290507874787823e-06, "loss": 0.0581, "step": 18040 }, { "epoch": 0.0752935383999132, "grad_norm": 1.2153141341431724, "learning_rate": 7.28949770340184e-06, "loss": 0.0694, "step": 18045 }, { "epoch": 0.07531440111490349, "grad_norm": 1.3509546971958242, "learning_rate": 7.288487951806992e-06, "loss": 0.0558, "step": 18050 }, { "epoch": 0.07533526382989376, "grad_norm": 3.2224796814657273, "learning_rate": 7.287478619712607e-06, "loss": 0.0467, "step": 18055 }, { "epoch": 0.07535612654488405, "grad_norm": 1.0518478795169501, "learning_rate": 7.2864697068283004e-06, "loss": 0.043, "step": 18060 }, { "epoch": 0.07537698925987432, "grad_norm": 1.051960509153995, "learning_rate": 7.2854612128639646e-06, "loss": 0.0577, "step": 18065 }, { "epoch": 0.07539785197486461, "grad_norm": 1.2969715807416782, "learning_rate": 7.284453137529773e-06, "loss": 0.0532, "step": 18070 }, { "epoch": 0.07541871468985488, "grad_norm": 1.1277327922512255, "learning_rate": 7.2834454805361806e-06, "loss": 0.0576, "step": 18075 }, { "epoch": 0.07543957740484515, "grad_norm": 1.2981799592497218, "learning_rate": 7.282438241593922e-06, "loss": 0.0536, "step": 18080 }, { "epoch": 0.07546044011983544, "grad_norm": 0.9268383648426792, "learning_rate": 7.281431420414015e-06, "loss": 0.047, "step": 18085 }, { "epoch": 0.07548130283482571, "grad_norm": 2.0226325867987396, "learning_rate": 7.280425016707751e-06, "loss": 0.0724, "step": 18090 }, { "epoch": 0.075502165549816, "grad_norm": 1.5389043741602104, "learning_rate": 7.279419030186704e-06, "loss": 0.054, "step": 18095 }, { "epoch": 0.07552302826480627, "grad_norm": 1.0406854621330666, "learning_rate": 7.278413460562729e-06, "loss": 0.0507, "step": 18100 }, { "epoch": 0.07554389097979655, "grad_norm": 2.1232591182956457, "learning_rate": 7.277408307547956e-06, "loss": 0.0525, "step": 18105 }, { "epoch": 0.07556475369478682, "grad_norm": 1.2420499634063997, "learning_rate": 7.276403570854796e-06, "loss": 0.0409, "step": 18110 }, { "epoch": 0.07558561640977711, "grad_norm": 1.4139198860906372, "learning_rate": 7.275399250195936e-06, "loss": 0.0621, "step": 18115 }, { "epoch": 0.07560647912476738, "grad_norm": 0.7688195828854538, "learning_rate": 7.274395345284339e-06, "loss": 0.0519, "step": 18120 }, { "epoch": 0.07562734183975765, "grad_norm": 1.8337319920365402, "learning_rate": 7.2733918558332495e-06, "loss": 0.0592, "step": 18125 }, { "epoch": 0.07564820455474794, "grad_norm": 1.161336931957211, "learning_rate": 7.2723887815561854e-06, "loss": 0.0499, "step": 18130 }, { "epoch": 0.07566906726973821, "grad_norm": 1.2410458741213224, "learning_rate": 7.271386122166942e-06, "loss": 0.0567, "step": 18135 }, { "epoch": 0.0756899299847285, "grad_norm": 1.7399593837607226, "learning_rate": 7.2703838773795925e-06, "loss": 0.0578, "step": 18140 }, { "epoch": 0.07571079269971877, "grad_norm": 0.8375347610753656, "learning_rate": 7.269382046908482e-06, "loss": 0.041, "step": 18145 }, { "epoch": 0.07573165541470905, "grad_norm": 1.437562028076721, "learning_rate": 7.268380630468235e-06, "loss": 0.0625, "step": 18150 }, { "epoch": 0.07575251812969933, "grad_norm": 2.6264485873883108, "learning_rate": 7.2673796277737485e-06, "loss": 0.0504, "step": 18155 }, { "epoch": 0.07577338084468961, "grad_norm": 1.2851288408749257, "learning_rate": 7.266379038540195e-06, "loss": 0.0528, "step": 18160 }, { "epoch": 0.07579424355967988, "grad_norm": 0.833701027458204, "learning_rate": 7.265378862483024e-06, "loss": 0.0441, "step": 18165 }, { "epoch": 0.07581510627467015, "grad_norm": 1.4970176015788912, "learning_rate": 7.264379099317954e-06, "loss": 0.051, "step": 18170 }, { "epoch": 0.07583596898966044, "grad_norm": 1.6252534083060939, "learning_rate": 7.263379748760982e-06, "loss": 0.0672, "step": 18175 }, { "epoch": 0.07585683170465071, "grad_norm": 1.591315006019755, "learning_rate": 7.262380810528375e-06, "loss": 0.0593, "step": 18180 }, { "epoch": 0.075877694419641, "grad_norm": 1.6200538930853858, "learning_rate": 7.261382284336675e-06, "loss": 0.0628, "step": 18185 }, { "epoch": 0.07589855713463127, "grad_norm": 1.1029319345928363, "learning_rate": 7.260384169902697e-06, "loss": 0.0562, "step": 18190 }, { "epoch": 0.07591941984962156, "grad_norm": 0.9362973958649938, "learning_rate": 7.2593864669435245e-06, "loss": 0.0432, "step": 18195 }, { "epoch": 0.07594028256461183, "grad_norm": 1.674981230382924, "learning_rate": 7.258389175176521e-06, "loss": 0.0497, "step": 18200 }, { "epoch": 0.07596114527960211, "grad_norm": 2.1689482504727984, "learning_rate": 7.257392294319314e-06, "loss": 0.0688, "step": 18205 }, { "epoch": 0.07598200799459238, "grad_norm": 1.3724642577811013, "learning_rate": 7.256395824089807e-06, "loss": 0.0596, "step": 18210 }, { "epoch": 0.07600287070958266, "grad_norm": 1.4328260805766675, "learning_rate": 7.255399764206172e-06, "loss": 0.0656, "step": 18215 }, { "epoch": 0.07602373342457294, "grad_norm": 2.136437064522971, "learning_rate": 7.254404114386851e-06, "loss": 0.0671, "step": 18220 }, { "epoch": 0.07604459613956321, "grad_norm": 1.3724396584799101, "learning_rate": 7.25340887435056e-06, "loss": 0.0629, "step": 18225 }, { "epoch": 0.0760654588545535, "grad_norm": 1.3162977027087082, "learning_rate": 7.252414043816285e-06, "loss": 0.0558, "step": 18230 }, { "epoch": 0.07608632156954377, "grad_norm": 1.5233423704271736, "learning_rate": 7.2514196225032764e-06, "loss": 0.057, "step": 18235 }, { "epoch": 0.07610718428453406, "grad_norm": 1.3891351169643489, "learning_rate": 7.250425610131057e-06, "loss": 0.0591, "step": 18240 }, { "epoch": 0.07612804699952433, "grad_norm": 1.0178016177130975, "learning_rate": 7.249432006419421e-06, "loss": 0.0462, "step": 18245 }, { "epoch": 0.07614890971451461, "grad_norm": 2.536749615390819, "learning_rate": 7.2484388110884304e-06, "loss": 0.0596, "step": 18250 }, { "epoch": 0.07616977242950489, "grad_norm": 1.187954623478771, "learning_rate": 7.247446023858412e-06, "loss": 0.056, "step": 18255 }, { "epoch": 0.07619063514449516, "grad_norm": 1.123040360613459, "learning_rate": 7.246453644449964e-06, "loss": 0.0468, "step": 18260 }, { "epoch": 0.07621149785948544, "grad_norm": 1.3025412044955496, "learning_rate": 7.245461672583954e-06, "loss": 0.0698, "step": 18265 }, { "epoch": 0.07623236057447572, "grad_norm": 1.1314547983053285, "learning_rate": 7.2444701079815095e-06, "loss": 0.0532, "step": 18270 }, { "epoch": 0.076253223289466, "grad_norm": 0.9396377142298303, "learning_rate": 7.243478950364035e-06, "loss": 0.0784, "step": 18275 }, { "epoch": 0.07627408600445627, "grad_norm": 1.0028599197081718, "learning_rate": 7.242488199453195e-06, "loss": 0.0637, "step": 18280 }, { "epoch": 0.07629494871944656, "grad_norm": 1.0258198458503542, "learning_rate": 7.241497854970923e-06, "loss": 0.0553, "step": 18285 }, { "epoch": 0.07631581143443683, "grad_norm": 1.463228785210115, "learning_rate": 7.240507916639416e-06, "loss": 0.0661, "step": 18290 }, { "epoch": 0.07633667414942712, "grad_norm": 1.0349012995823272, "learning_rate": 7.23951838418114e-06, "loss": 0.0641, "step": 18295 }, { "epoch": 0.07635753686441739, "grad_norm": 1.1780103130704127, "learning_rate": 7.238529257318825e-06, "loss": 0.0621, "step": 18300 }, { "epoch": 0.07637839957940766, "grad_norm": 0.9992446099616613, "learning_rate": 7.237540535775465e-06, "loss": 0.0514, "step": 18305 }, { "epoch": 0.07639926229439795, "grad_norm": 0.997014316481081, "learning_rate": 7.236552219274323e-06, "loss": 0.06, "step": 18310 }, { "epoch": 0.07642012500938822, "grad_norm": 1.0322182138584906, "learning_rate": 7.2355643075389205e-06, "loss": 0.0516, "step": 18315 }, { "epoch": 0.0764409877243785, "grad_norm": 1.769705304765796, "learning_rate": 7.234576800293049e-06, "loss": 0.0567, "step": 18320 }, { "epoch": 0.07646185043936878, "grad_norm": 1.5681607533241917, "learning_rate": 7.233589697260756e-06, "loss": 0.0658, "step": 18325 }, { "epoch": 0.07648271315435906, "grad_norm": 1.747677602905126, "learning_rate": 7.2326029981663624e-06, "loss": 0.0566, "step": 18330 }, { "epoch": 0.07650357586934933, "grad_norm": 1.2700292035411962, "learning_rate": 7.2316167027344455e-06, "loss": 0.0544, "step": 18335 }, { "epoch": 0.07652443858433962, "grad_norm": 1.0537315720488418, "learning_rate": 7.230630810689847e-06, "loss": 0.0633, "step": 18340 }, { "epoch": 0.07654530129932989, "grad_norm": 1.237057223160571, "learning_rate": 7.22964532175767e-06, "loss": 0.071, "step": 18345 }, { "epoch": 0.07656616401432016, "grad_norm": 1.1199347974221943, "learning_rate": 7.2286602356632825e-06, "loss": 0.0417, "step": 18350 }, { "epoch": 0.07658702672931045, "grad_norm": 1.124570017276819, "learning_rate": 7.227675552132314e-06, "loss": 0.0577, "step": 18355 }, { "epoch": 0.07660788944430072, "grad_norm": 1.2475621789493423, "learning_rate": 7.226691270890652e-06, "loss": 0.061, "step": 18360 }, { "epoch": 0.076628752159291, "grad_norm": 2.08111810228162, "learning_rate": 7.2257073916644494e-06, "loss": 0.067, "step": 18365 }, { "epoch": 0.07664961487428128, "grad_norm": 1.1222413242211964, "learning_rate": 7.224723914180119e-06, "loss": 0.0549, "step": 18370 }, { "epoch": 0.07667047758927156, "grad_norm": 0.8849623119765337, "learning_rate": 7.223740838164331e-06, "loss": 0.0431, "step": 18375 }, { "epoch": 0.07669134030426183, "grad_norm": 1.2994170801089906, "learning_rate": 7.22275816334402e-06, "loss": 0.0563, "step": 18380 }, { "epoch": 0.0767122030192521, "grad_norm": 1.1147573196343548, "learning_rate": 7.22177588944638e-06, "loss": 0.0516, "step": 18385 }, { "epoch": 0.07673306573424239, "grad_norm": 1.2844901591457567, "learning_rate": 7.220794016198861e-06, "loss": 0.0465, "step": 18390 }, { "epoch": 0.07675392844923266, "grad_norm": 0.8259779263330836, "learning_rate": 7.219812543329177e-06, "loss": 0.0368, "step": 18395 }, { "epoch": 0.07677479116422295, "grad_norm": 0.792310311443356, "learning_rate": 7.218831470565298e-06, "loss": 0.0484, "step": 18400 }, { "epoch": 0.07679565387921322, "grad_norm": 1.4617435998320096, "learning_rate": 7.217850797635455e-06, "loss": 0.0552, "step": 18405 }, { "epoch": 0.07681651659420351, "grad_norm": 1.6443930438536407, "learning_rate": 7.216870524268132e-06, "loss": 0.0507, "step": 18410 }, { "epoch": 0.07683737930919378, "grad_norm": 1.5915300225821687, "learning_rate": 7.215890650192082e-06, "loss": 0.0507, "step": 18415 }, { "epoch": 0.07685824202418406, "grad_norm": 1.721814168194282, "learning_rate": 7.214911175136302e-06, "loss": 0.05, "step": 18420 }, { "epoch": 0.07687910473917434, "grad_norm": 1.2426991146018709, "learning_rate": 7.213932098830056e-06, "loss": 0.0682, "step": 18425 }, { "epoch": 0.07689996745416461, "grad_norm": 0.984261765821796, "learning_rate": 7.212953421002863e-06, "loss": 0.0441, "step": 18430 }, { "epoch": 0.0769208301691549, "grad_norm": 1.1619677891700027, "learning_rate": 7.211975141384497e-06, "loss": 0.0537, "step": 18435 }, { "epoch": 0.07694169288414517, "grad_norm": 1.3465713945447984, "learning_rate": 7.210997259704989e-06, "loss": 0.0538, "step": 18440 }, { "epoch": 0.07696255559913545, "grad_norm": 1.2570293167352062, "learning_rate": 7.210019775694628e-06, "loss": 0.0755, "step": 18445 }, { "epoch": 0.07698341831412572, "grad_norm": 0.8022140505766642, "learning_rate": 7.209042689083958e-06, "loss": 0.0433, "step": 18450 }, { "epoch": 0.07700428102911601, "grad_norm": 0.8466970543467065, "learning_rate": 7.208065999603776e-06, "loss": 0.0529, "step": 18455 }, { "epoch": 0.07702514374410628, "grad_norm": 1.6712376163720737, "learning_rate": 7.207089706985139e-06, "loss": 0.055, "step": 18460 }, { "epoch": 0.07704600645909657, "grad_norm": 1.279669728158185, "learning_rate": 7.206113810959354e-06, "loss": 0.0473, "step": 18465 }, { "epoch": 0.07706686917408684, "grad_norm": 1.6549835602735925, "learning_rate": 7.2051383112579855e-06, "loss": 0.067, "step": 18470 }, { "epoch": 0.07708773188907711, "grad_norm": 3.5916520426724996, "learning_rate": 7.204163207612853e-06, "loss": 0.0711, "step": 18475 }, { "epoch": 0.0771085946040674, "grad_norm": 1.1848759044557546, "learning_rate": 7.2031884997560275e-06, "loss": 0.0573, "step": 18480 }, { "epoch": 0.07712945731905767, "grad_norm": 1.063910713583753, "learning_rate": 7.202214187419835e-06, "loss": 0.0503, "step": 18485 }, { "epoch": 0.07715032003404795, "grad_norm": 2.0602097410638933, "learning_rate": 7.201240270336855e-06, "loss": 0.0597, "step": 18490 }, { "epoch": 0.07717118274903823, "grad_norm": 1.1114169660082274, "learning_rate": 7.200266748239919e-06, "loss": 0.0537, "step": 18495 }, { "epoch": 0.07719204546402851, "grad_norm": 3.1730492316472367, "learning_rate": 7.1992936208621135e-06, "loss": 0.0595, "step": 18500 }, { "epoch": 0.07721290817901878, "grad_norm": 1.5416364735113306, "learning_rate": 7.198320887936776e-06, "loss": 0.0593, "step": 18505 }, { "epoch": 0.07723377089400907, "grad_norm": 1.0159569998320706, "learning_rate": 7.197348549197494e-06, "loss": 0.0448, "step": 18510 }, { "epoch": 0.07725463360899934, "grad_norm": 1.0210946555525702, "learning_rate": 7.19637660437811e-06, "loss": 0.0548, "step": 18515 }, { "epoch": 0.07727549632398961, "grad_norm": 1.9287275100596286, "learning_rate": 7.19540505321272e-06, "loss": 0.062, "step": 18520 }, { "epoch": 0.0772963590389799, "grad_norm": 1.279765404109537, "learning_rate": 7.194433895435663e-06, "loss": 0.0505, "step": 18525 }, { "epoch": 0.07731722175397017, "grad_norm": 2.923414133659703, "learning_rate": 7.193463130781538e-06, "loss": 0.0621, "step": 18530 }, { "epoch": 0.07733808446896046, "grad_norm": 2.4044444782225263, "learning_rate": 7.19249275898519e-06, "loss": 0.0728, "step": 18535 }, { "epoch": 0.07735894718395073, "grad_norm": 1.302926451989888, "learning_rate": 7.191522779781716e-06, "loss": 0.0451, "step": 18540 }, { "epoch": 0.07737980989894101, "grad_norm": 1.3693053561898794, "learning_rate": 7.190553192906459e-06, "loss": 0.0491, "step": 18545 }, { "epoch": 0.07740067261393128, "grad_norm": 1.287553518489974, "learning_rate": 7.189583998095016e-06, "loss": 0.0506, "step": 18550 }, { "epoch": 0.07742153532892157, "grad_norm": 1.0055761229053022, "learning_rate": 7.188615195083235e-06, "loss": 0.055, "step": 18555 }, { "epoch": 0.07744239804391184, "grad_norm": 1.3973093221109538, "learning_rate": 7.187646783607207e-06, "loss": 0.0581, "step": 18560 }, { "epoch": 0.07746326075890211, "grad_norm": 1.5796960926910468, "learning_rate": 7.186678763403278e-06, "loss": 0.0585, "step": 18565 }, { "epoch": 0.0774841234738924, "grad_norm": 1.1879974373759634, "learning_rate": 7.185711134208035e-06, "loss": 0.0572, "step": 18570 }, { "epoch": 0.07750498618888267, "grad_norm": 0.989385568178678, "learning_rate": 7.184743895758323e-06, "loss": 0.0517, "step": 18575 }, { "epoch": 0.07752584890387296, "grad_norm": 1.2775496601198666, "learning_rate": 7.183777047791227e-06, "loss": 0.0517, "step": 18580 }, { "epoch": 0.07754671161886323, "grad_norm": 1.3087142601682549, "learning_rate": 7.182810590044083e-06, "loss": 0.0583, "step": 18585 }, { "epoch": 0.07756757433385351, "grad_norm": 1.0365201971830085, "learning_rate": 7.181844522254473e-06, "loss": 0.0532, "step": 18590 }, { "epoch": 0.07758843704884379, "grad_norm": 1.138510305778246, "learning_rate": 7.180878844160227e-06, "loss": 0.0473, "step": 18595 }, { "epoch": 0.07760929976383407, "grad_norm": 1.7349104215448008, "learning_rate": 7.179913555499421e-06, "loss": 0.0457, "step": 18600 }, { "epoch": 0.07763016247882434, "grad_norm": 1.5390760143297755, "learning_rate": 7.178948656010377e-06, "loss": 0.0516, "step": 18605 }, { "epoch": 0.07765102519381462, "grad_norm": 1.111130739110161, "learning_rate": 7.177984145431666e-06, "loss": 0.0506, "step": 18610 }, { "epoch": 0.0776718879088049, "grad_norm": 1.011221736492513, "learning_rate": 7.1770200235021e-06, "loss": 0.0481, "step": 18615 }, { "epoch": 0.07769275062379517, "grad_norm": 1.1549509279876853, "learning_rate": 7.176056289960741e-06, "loss": 0.0611, "step": 18620 }, { "epoch": 0.07771361333878546, "grad_norm": 1.2986469237161617, "learning_rate": 7.175092944546893e-06, "loss": 0.0558, "step": 18625 }, { "epoch": 0.07773447605377573, "grad_norm": 1.3689875632042388, "learning_rate": 7.174129987000107e-06, "loss": 0.0454, "step": 18630 }, { "epoch": 0.07775533876876602, "grad_norm": 1.011081488563217, "learning_rate": 7.173167417060178e-06, "loss": 0.0599, "step": 18635 }, { "epoch": 0.07777620148375629, "grad_norm": 0.828680122245641, "learning_rate": 7.172205234467144e-06, "loss": 0.0479, "step": 18640 }, { "epoch": 0.07779706419874657, "grad_norm": 0.7549354157131531, "learning_rate": 7.171243438961289e-06, "loss": 0.0619, "step": 18645 }, { "epoch": 0.07781792691373685, "grad_norm": 1.4328597593025652, "learning_rate": 7.1702820302831386e-06, "loss": 0.0499, "step": 18650 }, { "epoch": 0.07783878962872712, "grad_norm": 1.012732456119607, "learning_rate": 7.169321008173464e-06, "loss": 0.0522, "step": 18655 }, { "epoch": 0.0778596523437174, "grad_norm": 1.0984473255966443, "learning_rate": 7.168360372373281e-06, "loss": 0.0484, "step": 18660 }, { "epoch": 0.07788051505870767, "grad_norm": 0.7041285732708439, "learning_rate": 7.167400122623842e-06, "loss": 0.047, "step": 18665 }, { "epoch": 0.07790137777369796, "grad_norm": 1.515872924864978, "learning_rate": 7.166440258666647e-06, "loss": 0.0603, "step": 18670 }, { "epoch": 0.07792224048868823, "grad_norm": 1.056038025147395, "learning_rate": 7.16548078024344e-06, "loss": 0.0583, "step": 18675 }, { "epoch": 0.07794310320367852, "grad_norm": 1.1707836342091953, "learning_rate": 7.164521687096201e-06, "loss": 0.0497, "step": 18680 }, { "epoch": 0.07796396591866879, "grad_norm": 1.834506266899144, "learning_rate": 7.1635629789671546e-06, "loss": 0.0473, "step": 18685 }, { "epoch": 0.07798482863365908, "grad_norm": 4.588772368173397, "learning_rate": 7.16260465559877e-06, "loss": 0.052, "step": 18690 }, { "epoch": 0.07800569134864935, "grad_norm": 1.0100847246589406, "learning_rate": 7.161646716733751e-06, "loss": 0.0625, "step": 18695 }, { "epoch": 0.07802655406363962, "grad_norm": 1.3638596057916308, "learning_rate": 7.160689162115048e-06, "loss": 0.0407, "step": 18700 }, { "epoch": 0.0780474167786299, "grad_norm": 3.359635143181814, "learning_rate": 7.159731991485849e-06, "loss": 0.0511, "step": 18705 }, { "epoch": 0.07806827949362018, "grad_norm": 0.7738385262286062, "learning_rate": 7.158775204589581e-06, "loss": 0.0485, "step": 18710 }, { "epoch": 0.07808914220861046, "grad_norm": 0.9858141773058797, "learning_rate": 7.157818801169917e-06, "loss": 0.0594, "step": 18715 }, { "epoch": 0.07811000492360073, "grad_norm": 1.988445468988283, "learning_rate": 7.156862780970762e-06, "loss": 0.0575, "step": 18720 }, { "epoch": 0.07813086763859102, "grad_norm": 1.0879457471567162, "learning_rate": 7.155907143736264e-06, "loss": 0.0443, "step": 18725 }, { "epoch": 0.07815173035358129, "grad_norm": 1.2617152248244825, "learning_rate": 7.154951889210811e-06, "loss": 0.0468, "step": 18730 }, { "epoch": 0.07817259306857158, "grad_norm": 1.5329446231889141, "learning_rate": 7.153997017139028e-06, "loss": 0.0551, "step": 18735 }, { "epoch": 0.07819345578356185, "grad_norm": 1.263454137688525, "learning_rate": 7.153042527265781e-06, "loss": 0.0545, "step": 18740 }, { "epoch": 0.07821431849855212, "grad_norm": 0.777066331287109, "learning_rate": 7.1520884193361706e-06, "loss": 0.0447, "step": 18745 }, { "epoch": 0.0782351812135424, "grad_norm": 0.869138779237013, "learning_rate": 7.151134693095538e-06, "loss": 0.0398, "step": 18750 }, { "epoch": 0.07825604392853268, "grad_norm": 0.8792312792480905, "learning_rate": 7.15018134828946e-06, "loss": 0.0555, "step": 18755 }, { "epoch": 0.07827690664352296, "grad_norm": 1.734052634388818, "learning_rate": 7.1492283846637525e-06, "loss": 0.0678, "step": 18760 }, { "epoch": 0.07829776935851324, "grad_norm": 1.2949368106077104, "learning_rate": 7.148275801964469e-06, "loss": 0.0545, "step": 18765 }, { "epoch": 0.07831863207350352, "grad_norm": 1.562154777750896, "learning_rate": 7.1473235999378975e-06, "loss": 0.0511, "step": 18770 }, { "epoch": 0.0783394947884938, "grad_norm": 0.8756070596361867, "learning_rate": 7.146371778330563e-06, "loss": 0.0467, "step": 18775 }, { "epoch": 0.07836035750348408, "grad_norm": 1.0793918290066415, "learning_rate": 7.145420336889229e-06, "loss": 0.0486, "step": 18780 }, { "epoch": 0.07838122021847435, "grad_norm": 1.1202241180559536, "learning_rate": 7.144469275360893e-06, "loss": 0.0468, "step": 18785 }, { "epoch": 0.07840208293346462, "grad_norm": 1.1513347340919213, "learning_rate": 7.14351859349279e-06, "loss": 0.0512, "step": 18790 }, { "epoch": 0.07842294564845491, "grad_norm": 1.1948936875266194, "learning_rate": 7.142568291032386e-06, "loss": 0.0453, "step": 18795 }, { "epoch": 0.07844380836344518, "grad_norm": 1.3340183403943224, "learning_rate": 7.141618367727388e-06, "loss": 0.0604, "step": 18800 }, { "epoch": 0.07846467107843547, "grad_norm": 0.7397463334829133, "learning_rate": 7.140668823325732e-06, "loss": 0.064, "step": 18805 }, { "epoch": 0.07848553379342574, "grad_norm": 0.8616533668927157, "learning_rate": 7.139719657575594e-06, "loss": 0.0561, "step": 18810 }, { "epoch": 0.07850639650841602, "grad_norm": 1.330492986980258, "learning_rate": 7.138770870225379e-06, "loss": 0.0637, "step": 18815 }, { "epoch": 0.0785272592234063, "grad_norm": 2.7933538989555426, "learning_rate": 7.13782246102373e-06, "loss": 0.0731, "step": 18820 }, { "epoch": 0.07854812193839658, "grad_norm": 1.5019222607302951, "learning_rate": 7.136874429719524e-06, "loss": 0.0465, "step": 18825 }, { "epoch": 0.07856898465338685, "grad_norm": 2.288234632462588, "learning_rate": 7.135926776061867e-06, "loss": 0.0697, "step": 18830 }, { "epoch": 0.07858984736837712, "grad_norm": 1.8180743945389832, "learning_rate": 7.134979499800101e-06, "loss": 0.0551, "step": 18835 }, { "epoch": 0.07861071008336741, "grad_norm": 1.1311465578306243, "learning_rate": 7.134032600683803e-06, "loss": 0.0633, "step": 18840 }, { "epoch": 0.07863157279835768, "grad_norm": 1.1968305889161361, "learning_rate": 7.133086078462779e-06, "loss": 0.0538, "step": 18845 }, { "epoch": 0.07865243551334797, "grad_norm": 1.3711671787641944, "learning_rate": 7.132139932887067e-06, "loss": 0.0515, "step": 18850 }, { "epoch": 0.07867329822833824, "grad_norm": 0.9561506859505741, "learning_rate": 7.1311941637069436e-06, "loss": 0.0595, "step": 18855 }, { "epoch": 0.07869416094332853, "grad_norm": 0.950184614325878, "learning_rate": 7.130248770672906e-06, "loss": 0.0404, "step": 18860 }, { "epoch": 0.0787150236583188, "grad_norm": 0.5742443700783126, "learning_rate": 7.1293037535356925e-06, "loss": 0.0454, "step": 18865 }, { "epoch": 0.07873588637330908, "grad_norm": 0.8390414342064618, "learning_rate": 7.128359112046269e-06, "loss": 0.0527, "step": 18870 }, { "epoch": 0.07875674908829935, "grad_norm": 1.2371404270478665, "learning_rate": 7.127414845955833e-06, "loss": 0.0496, "step": 18875 }, { "epoch": 0.07877761180328963, "grad_norm": 1.2789863520128515, "learning_rate": 7.12647095501581e-06, "loss": 0.0596, "step": 18880 }, { "epoch": 0.07879847451827991, "grad_norm": 1.2893071404264869, "learning_rate": 7.1255274389778585e-06, "loss": 0.0545, "step": 18885 }, { "epoch": 0.07881933723327018, "grad_norm": 1.232590392482261, "learning_rate": 7.124584297593868e-06, "loss": 0.0483, "step": 18890 }, { "epoch": 0.07884019994826047, "grad_norm": 1.0383265326600661, "learning_rate": 7.123641530615954e-06, "loss": 0.0485, "step": 18895 }, { "epoch": 0.07886106266325074, "grad_norm": 1.3439594780310358, "learning_rate": 7.122699137796467e-06, "loss": 0.0567, "step": 18900 }, { "epoch": 0.07888192537824103, "grad_norm": 1.3946346310310354, "learning_rate": 7.121757118887983e-06, "loss": 0.0465, "step": 18905 }, { "epoch": 0.0789027880932313, "grad_norm": 1.0907185310788396, "learning_rate": 7.120815473643306e-06, "loss": 0.0616, "step": 18910 }, { "epoch": 0.07892365080822158, "grad_norm": 0.9631135548285136, "learning_rate": 7.119874201815472e-06, "loss": 0.0587, "step": 18915 }, { "epoch": 0.07894451352321186, "grad_norm": 1.1060905777767838, "learning_rate": 7.118933303157742e-06, "loss": 0.0768, "step": 18920 }, { "epoch": 0.07896537623820213, "grad_norm": 1.2448461468779541, "learning_rate": 7.11799277742361e-06, "loss": 0.062, "step": 18925 }, { "epoch": 0.07898623895319241, "grad_norm": 0.8429408216173808, "learning_rate": 7.1170526243667925e-06, "loss": 0.0405, "step": 18930 }, { "epoch": 0.07900710166818269, "grad_norm": 0.8219152069984897, "learning_rate": 7.1161128437412365e-06, "loss": 0.0498, "step": 18935 }, { "epoch": 0.07902796438317297, "grad_norm": 1.4281909415914438, "learning_rate": 7.115173435301117e-06, "loss": 0.059, "step": 18940 }, { "epoch": 0.07904882709816324, "grad_norm": 0.496295295756409, "learning_rate": 7.114234398800836e-06, "loss": 0.0434, "step": 18945 }, { "epoch": 0.07906968981315353, "grad_norm": 1.047465208243944, "learning_rate": 7.11329573399502e-06, "loss": 0.0648, "step": 18950 }, { "epoch": 0.0790905525281438, "grad_norm": 1.0377564313386918, "learning_rate": 7.112357440638522e-06, "loss": 0.0596, "step": 18955 }, { "epoch": 0.07911141524313409, "grad_norm": 1.0762513267240466, "learning_rate": 7.111419518486426e-06, "loss": 0.052, "step": 18960 }, { "epoch": 0.07913227795812436, "grad_norm": 0.7799025692240338, "learning_rate": 7.110481967294036e-06, "loss": 0.05, "step": 18965 }, { "epoch": 0.07915314067311463, "grad_norm": 0.8594408393834266, "learning_rate": 7.109544786816888e-06, "loss": 0.0514, "step": 18970 }, { "epoch": 0.07917400338810492, "grad_norm": 1.1777868338703008, "learning_rate": 7.108607976810738e-06, "loss": 0.0515, "step": 18975 }, { "epoch": 0.07919486610309519, "grad_norm": 1.107213811630784, "learning_rate": 7.107671537031567e-06, "loss": 0.0474, "step": 18980 }, { "epoch": 0.07921572881808547, "grad_norm": 1.462148967914771, "learning_rate": 7.106735467235588e-06, "loss": 0.0606, "step": 18985 }, { "epoch": 0.07923659153307575, "grad_norm": 1.2247829145537061, "learning_rate": 7.105799767179231e-06, "loss": 0.0498, "step": 18990 }, { "epoch": 0.07925745424806603, "grad_norm": 0.8189055626898615, "learning_rate": 7.1048644366191545e-06, "loss": 0.0509, "step": 18995 }, { "epoch": 0.0792783169630563, "grad_norm": 1.1932158910964552, "learning_rate": 7.103929475312238e-06, "loss": 0.0674, "step": 19000 }, { "epoch": 0.07929917967804659, "grad_norm": 0.6967327369898416, "learning_rate": 7.10299488301559e-06, "loss": 0.0677, "step": 19005 }, { "epoch": 0.07932004239303686, "grad_norm": 1.0057123450684144, "learning_rate": 7.102060659486538e-06, "loss": 0.0444, "step": 19010 }, { "epoch": 0.07934090510802713, "grad_norm": 0.9177401945979486, "learning_rate": 7.101126804482635e-06, "loss": 0.0519, "step": 19015 }, { "epoch": 0.07936176782301742, "grad_norm": 1.1117138912906555, "learning_rate": 7.100193317761656e-06, "loss": 0.0542, "step": 19020 }, { "epoch": 0.07938263053800769, "grad_norm": 0.940275895730298, "learning_rate": 7.099260199081598e-06, "loss": 0.0468, "step": 19025 }, { "epoch": 0.07940349325299798, "grad_norm": 1.7624152133925903, "learning_rate": 7.098327448200685e-06, "loss": 0.0533, "step": 19030 }, { "epoch": 0.07942435596798825, "grad_norm": 0.7319824815619795, "learning_rate": 7.097395064877358e-06, "loss": 0.0505, "step": 19035 }, { "epoch": 0.07944521868297853, "grad_norm": 1.0010395870123252, "learning_rate": 7.096463048870283e-06, "loss": 0.0588, "step": 19040 }, { "epoch": 0.0794660813979688, "grad_norm": 1.509855119538173, "learning_rate": 7.095531399938347e-06, "loss": 0.0601, "step": 19045 }, { "epoch": 0.07948694411295909, "grad_norm": 1.3085341662272452, "learning_rate": 7.0946001178406575e-06, "loss": 0.0601, "step": 19050 }, { "epoch": 0.07950780682794936, "grad_norm": 1.207433919252542, "learning_rate": 7.093669202336548e-06, "loss": 0.062, "step": 19055 }, { "epoch": 0.07952866954293963, "grad_norm": 1.3411830335180575, "learning_rate": 7.092738653185565e-06, "loss": 0.042, "step": 19060 }, { "epoch": 0.07954953225792992, "grad_norm": 0.849903077163034, "learning_rate": 7.091808470147482e-06, "loss": 0.0587, "step": 19065 }, { "epoch": 0.07957039497292019, "grad_norm": 0.9147444510334906, "learning_rate": 7.090878652982292e-06, "loss": 0.0445, "step": 19070 }, { "epoch": 0.07959125768791048, "grad_norm": 1.0174761372146042, "learning_rate": 7.089949201450204e-06, "loss": 0.0565, "step": 19075 }, { "epoch": 0.07961212040290075, "grad_norm": 0.793799683322714, "learning_rate": 7.089020115311654e-06, "loss": 0.066, "step": 19080 }, { "epoch": 0.07963298311789103, "grad_norm": 1.2508094883194116, "learning_rate": 7.088091394327293e-06, "loss": 0.0601, "step": 19085 }, { "epoch": 0.0796538458328813, "grad_norm": 2.0253114026751398, "learning_rate": 7.0871630382579915e-06, "loss": 0.0713, "step": 19090 }, { "epoch": 0.07967470854787159, "grad_norm": 1.0969756285482144, "learning_rate": 7.086235046864839e-06, "loss": 0.0701, "step": 19095 }, { "epoch": 0.07969557126286186, "grad_norm": 1.3394413392467812, "learning_rate": 7.085307419909147e-06, "loss": 0.0614, "step": 19100 }, { "epoch": 0.07971643397785214, "grad_norm": 1.419739050166543, "learning_rate": 7.084380157152444e-06, "loss": 0.0585, "step": 19105 }, { "epoch": 0.07973729669284242, "grad_norm": 0.9074626554745717, "learning_rate": 7.083453258356474e-06, "loss": 0.06, "step": 19110 }, { "epoch": 0.0797581594078327, "grad_norm": 1.6218620824420809, "learning_rate": 7.082526723283204e-06, "loss": 0.0503, "step": 19115 }, { "epoch": 0.07977902212282298, "grad_norm": 0.7858132917300749, "learning_rate": 7.081600551694816e-06, "loss": 0.0452, "step": 19120 }, { "epoch": 0.07979988483781325, "grad_norm": 1.3161345535214346, "learning_rate": 7.08067474335371e-06, "loss": 0.0624, "step": 19125 }, { "epoch": 0.07982074755280354, "grad_norm": 1.4748543274373382, "learning_rate": 7.079749298022503e-06, "loss": 0.0489, "step": 19130 }, { "epoch": 0.07984161026779381, "grad_norm": 1.234787466995719, "learning_rate": 7.078824215464031e-06, "loss": 0.0549, "step": 19135 }, { "epoch": 0.0798624729827841, "grad_norm": 0.8278985797910428, "learning_rate": 7.077899495441345e-06, "loss": 0.0426, "step": 19140 }, { "epoch": 0.07988333569777437, "grad_norm": 1.1216892180928049, "learning_rate": 7.076975137717713e-06, "loss": 0.0372, "step": 19145 }, { "epoch": 0.07990419841276464, "grad_norm": 0.9201123076982133, "learning_rate": 7.076051142056621e-06, "loss": 0.0475, "step": 19150 }, { "epoch": 0.07992506112775492, "grad_norm": 2.4286586177290457, "learning_rate": 7.0751275082217664e-06, "loss": 0.0538, "step": 19155 }, { "epoch": 0.0799459238427452, "grad_norm": 0.7598099817908649, "learning_rate": 7.074204235977068e-06, "loss": 0.0513, "step": 19160 }, { "epoch": 0.07996678655773548, "grad_norm": 0.6989145319964207, "learning_rate": 7.073281325086659e-06, "loss": 0.0415, "step": 19165 }, { "epoch": 0.07998764927272575, "grad_norm": 1.4004890040487394, "learning_rate": 7.072358775314884e-06, "loss": 0.055, "step": 19170 }, { "epoch": 0.08000851198771604, "grad_norm": 1.2804975170879958, "learning_rate": 7.0714365864263076e-06, "loss": 0.0504, "step": 19175 }, { "epoch": 0.08002937470270631, "grad_norm": 0.8547670327679722, "learning_rate": 7.070514758185707e-06, "loss": 0.054, "step": 19180 }, { "epoch": 0.0800502374176966, "grad_norm": 1.3623668113062566, "learning_rate": 7.069593290358075e-06, "loss": 0.0614, "step": 19185 }, { "epoch": 0.08007110013268687, "grad_norm": 1.606198057356313, "learning_rate": 7.068672182708615e-06, "loss": 0.0491, "step": 19190 }, { "epoch": 0.08009196284767714, "grad_norm": 1.5255034198284438, "learning_rate": 7.06775143500275e-06, "loss": 0.0603, "step": 19195 }, { "epoch": 0.08011282556266743, "grad_norm": 1.8528190414576389, "learning_rate": 7.066831047006115e-06, "loss": 0.0602, "step": 19200 }, { "epoch": 0.0801336882776577, "grad_norm": 1.7742277985929658, "learning_rate": 7.065911018484555e-06, "loss": 0.0416, "step": 19205 }, { "epoch": 0.08015455099264798, "grad_norm": 1.3159890156477572, "learning_rate": 7.064991349204132e-06, "loss": 0.0599, "step": 19210 }, { "epoch": 0.08017541370763825, "grad_norm": 0.8700428031357853, "learning_rate": 7.064072038931122e-06, "loss": 0.045, "step": 19215 }, { "epoch": 0.08019627642262854, "grad_norm": 1.1610126789053337, "learning_rate": 7.06315308743201e-06, "loss": 0.0583, "step": 19220 }, { "epoch": 0.08021713913761881, "grad_norm": 1.0154542827298467, "learning_rate": 7.062234494473498e-06, "loss": 0.0451, "step": 19225 }, { "epoch": 0.0802380018526091, "grad_norm": 1.0223939962774167, "learning_rate": 7.061316259822496e-06, "loss": 0.0542, "step": 19230 }, { "epoch": 0.08025886456759937, "grad_norm": 1.26462932549484, "learning_rate": 7.060398383246129e-06, "loss": 0.048, "step": 19235 }, { "epoch": 0.08027972728258964, "grad_norm": 1.051542234917002, "learning_rate": 7.059480864511731e-06, "loss": 0.0578, "step": 19240 }, { "epoch": 0.08030058999757993, "grad_norm": 1.0888662374471598, "learning_rate": 7.058563703386853e-06, "loss": 0.0525, "step": 19245 }, { "epoch": 0.0803214527125702, "grad_norm": 1.1414948484479484, "learning_rate": 7.057646899639252e-06, "loss": 0.0555, "step": 19250 }, { "epoch": 0.08034231542756048, "grad_norm": 1.0443523235572076, "learning_rate": 7.056730453036897e-06, "loss": 0.0466, "step": 19255 }, { "epoch": 0.08036317814255076, "grad_norm": 1.3167613813345125, "learning_rate": 7.05581436334797e-06, "loss": 0.0589, "step": 19260 }, { "epoch": 0.08038404085754104, "grad_norm": 0.9367176722795592, "learning_rate": 7.054898630340864e-06, "loss": 0.0506, "step": 19265 }, { "epoch": 0.08040490357253131, "grad_norm": 1.0299884475274816, "learning_rate": 7.053983253784178e-06, "loss": 0.0612, "step": 19270 }, { "epoch": 0.0804257662875216, "grad_norm": 0.9055223330486397, "learning_rate": 7.053068233446724e-06, "loss": 0.0398, "step": 19275 }, { "epoch": 0.08044662900251187, "grad_norm": 2.0109036004369654, "learning_rate": 7.052153569097525e-06, "loss": 0.0596, "step": 19280 }, { "epoch": 0.08046749171750214, "grad_norm": 1.7042771936235024, "learning_rate": 7.051239260505813e-06, "loss": 0.057, "step": 19285 }, { "epoch": 0.08048835443249243, "grad_norm": 0.9773081850159812, "learning_rate": 7.0503253074410285e-06, "loss": 0.0441, "step": 19290 }, { "epoch": 0.0805092171474827, "grad_norm": 1.812244867535606, "learning_rate": 7.049411709672821e-06, "loss": 0.06, "step": 19295 }, { "epoch": 0.08053007986247299, "grad_norm": 1.3151211122776791, "learning_rate": 7.0484984669710476e-06, "loss": 0.0491, "step": 19300 }, { "epoch": 0.08055094257746326, "grad_norm": 2.003900242158473, "learning_rate": 7.047585579105779e-06, "loss": 0.0384, "step": 19305 }, { "epoch": 0.08057180529245354, "grad_norm": 1.1220774155231907, "learning_rate": 7.046673045847287e-06, "loss": 0.0565, "step": 19310 }, { "epoch": 0.08059266800744382, "grad_norm": 0.6372703477347692, "learning_rate": 7.04576086696606e-06, "loss": 0.043, "step": 19315 }, { "epoch": 0.0806135307224341, "grad_norm": 0.8414869784729829, "learning_rate": 7.044849042232789e-06, "loss": 0.0557, "step": 19320 }, { "epoch": 0.08063439343742437, "grad_norm": 1.1474774316356389, "learning_rate": 7.043937571418371e-06, "loss": 0.0646, "step": 19325 }, { "epoch": 0.08065525615241464, "grad_norm": 1.2365395115767297, "learning_rate": 7.043026454293914e-06, "loss": 0.0396, "step": 19330 }, { "epoch": 0.08067611886740493, "grad_norm": 1.011511402949097, "learning_rate": 7.042115690630735e-06, "loss": 0.0529, "step": 19335 }, { "epoch": 0.0806969815823952, "grad_norm": 2.0960487548673292, "learning_rate": 7.041205280200351e-06, "loss": 0.0524, "step": 19340 }, { "epoch": 0.08071784429738549, "grad_norm": 0.9774010849821307, "learning_rate": 7.040295222774492e-06, "loss": 0.0707, "step": 19345 }, { "epoch": 0.08073870701237576, "grad_norm": 1.085921294458076, "learning_rate": 7.039385518125095e-06, "loss": 0.0533, "step": 19350 }, { "epoch": 0.08075956972736605, "grad_norm": 1.399024918046521, "learning_rate": 7.038476166024295e-06, "loss": 0.0502, "step": 19355 }, { "epoch": 0.08078043244235632, "grad_norm": 0.8592874322629787, "learning_rate": 7.037567166244443e-06, "loss": 0.0569, "step": 19360 }, { "epoch": 0.0808012951573466, "grad_norm": 1.0506987117505262, "learning_rate": 7.036658518558089e-06, "loss": 0.0463, "step": 19365 }, { "epoch": 0.08082215787233687, "grad_norm": 1.4418108958639588, "learning_rate": 7.035750222737992e-06, "loss": 0.0511, "step": 19370 }, { "epoch": 0.08084302058732715, "grad_norm": 1.0881460801588243, "learning_rate": 7.034842278557116e-06, "loss": 0.0459, "step": 19375 }, { "epoch": 0.08086388330231743, "grad_norm": 1.0782081434847348, "learning_rate": 7.033934685788628e-06, "loss": 0.0472, "step": 19380 }, { "epoch": 0.0808847460173077, "grad_norm": 1.4433910272796224, "learning_rate": 7.0330274442059e-06, "loss": 0.0636, "step": 19385 }, { "epoch": 0.08090560873229799, "grad_norm": 1.3696695104013972, "learning_rate": 7.032120553582511e-06, "loss": 0.0557, "step": 19390 }, { "epoch": 0.08092647144728826, "grad_norm": 1.4352893308576118, "learning_rate": 7.031214013692243e-06, "loss": 0.0563, "step": 19395 }, { "epoch": 0.08094733416227855, "grad_norm": 1.363577202573486, "learning_rate": 7.0303078243090805e-06, "loss": 0.0516, "step": 19400 }, { "epoch": 0.08096819687726882, "grad_norm": 1.3272391652135356, "learning_rate": 7.029401985207216e-06, "loss": 0.0416, "step": 19405 }, { "epoch": 0.0809890595922591, "grad_norm": 0.8422831396223567, "learning_rate": 7.028496496161038e-06, "loss": 0.0597, "step": 19410 }, { "epoch": 0.08100992230724938, "grad_norm": 0.8162105128250944, "learning_rate": 7.027591356945148e-06, "loss": 0.052, "step": 19415 }, { "epoch": 0.08103078502223965, "grad_norm": 0.7311342257586646, "learning_rate": 7.026686567334347e-06, "loss": 0.0452, "step": 19420 }, { "epoch": 0.08105164773722993, "grad_norm": 1.7186571335077108, "learning_rate": 7.025782127103633e-06, "loss": 0.0504, "step": 19425 }, { "epoch": 0.0810725104522202, "grad_norm": 1.2732309912088706, "learning_rate": 7.024878036028213e-06, "loss": 0.0547, "step": 19430 }, { "epoch": 0.08109337316721049, "grad_norm": 2.3249992513324975, "learning_rate": 7.023974293883497e-06, "loss": 0.058, "step": 19435 }, { "epoch": 0.08111423588220076, "grad_norm": 0.6838178280033126, "learning_rate": 7.0230709004450926e-06, "loss": 0.0507, "step": 19440 }, { "epoch": 0.08113509859719105, "grad_norm": 1.0190635917336315, "learning_rate": 7.022167855488814e-06, "loss": 0.0596, "step": 19445 }, { "epoch": 0.08115596131218132, "grad_norm": 1.0081760694704667, "learning_rate": 7.021265158790675e-06, "loss": 0.0605, "step": 19450 }, { "epoch": 0.0811768240271716, "grad_norm": 0.7219211181503284, "learning_rate": 7.020362810126887e-06, "loss": 0.0469, "step": 19455 }, { "epoch": 0.08119768674216188, "grad_norm": 1.002264059758837, "learning_rate": 7.019460809273873e-06, "loss": 0.0497, "step": 19460 }, { "epoch": 0.08121854945715215, "grad_norm": 1.7083575225168213, "learning_rate": 7.018559156008246e-06, "loss": 0.0515, "step": 19465 }, { "epoch": 0.08123941217214244, "grad_norm": 1.7994971530325945, "learning_rate": 7.017657850106827e-06, "loss": 0.0611, "step": 19470 }, { "epoch": 0.08126027488713271, "grad_norm": 1.1417310374266585, "learning_rate": 7.016756891346632e-06, "loss": 0.0636, "step": 19475 }, { "epoch": 0.081281137602123, "grad_norm": 1.1201566550901294, "learning_rate": 7.0158562795048845e-06, "loss": 0.0615, "step": 19480 }, { "epoch": 0.08130200031711327, "grad_norm": 0.8828589774486015, "learning_rate": 7.014956014359e-06, "loss": 0.0559, "step": 19485 }, { "epoch": 0.08132286303210355, "grad_norm": 0.8764896088579235, "learning_rate": 7.0140560956866e-06, "loss": 0.046, "step": 19490 }, { "epoch": 0.08134372574709382, "grad_norm": 1.104663100786722, "learning_rate": 7.013156523265504e-06, "loss": 0.052, "step": 19495 }, { "epoch": 0.08136458846208411, "grad_norm": 1.0435405673296785, "learning_rate": 7.012257296873729e-06, "loss": 0.0602, "step": 19500 }, { "epoch": 0.08138545117707438, "grad_norm": 1.4604030973711506, "learning_rate": 7.011358416289493e-06, "loss": 0.0498, "step": 19505 }, { "epoch": 0.08140631389206465, "grad_norm": 1.4219879809667229, "learning_rate": 7.010459881291213e-06, "loss": 0.045, "step": 19510 }, { "epoch": 0.08142717660705494, "grad_norm": 1.0303091964564675, "learning_rate": 7.009561691657505e-06, "loss": 0.0457, "step": 19515 }, { "epoch": 0.08144803932204521, "grad_norm": 1.4748250666682179, "learning_rate": 7.00866384716718e-06, "loss": 0.0528, "step": 19520 }, { "epoch": 0.0814689020370355, "grad_norm": 1.4020245391718251, "learning_rate": 7.007766347599253e-06, "loss": 0.0635, "step": 19525 }, { "epoch": 0.08148976475202577, "grad_norm": 1.1642622444876551, "learning_rate": 7.006869192732932e-06, "loss": 0.0519, "step": 19530 }, { "epoch": 0.08151062746701605, "grad_norm": 1.1843297842670202, "learning_rate": 7.005972382347627e-06, "loss": 0.0525, "step": 19535 }, { "epoch": 0.08153149018200632, "grad_norm": 1.1087507788221174, "learning_rate": 7.005075916222942e-06, "loss": 0.0492, "step": 19540 }, { "epoch": 0.0815523528969966, "grad_norm": 1.0749793261315141, "learning_rate": 7.004179794138679e-06, "loss": 0.0646, "step": 19545 }, { "epoch": 0.08157321561198688, "grad_norm": 1.3543787987992462, "learning_rate": 7.0032840158748415e-06, "loss": 0.0541, "step": 19550 }, { "epoch": 0.08159407832697715, "grad_norm": 0.8337965962401079, "learning_rate": 7.002388581211624e-06, "loss": 0.0489, "step": 19555 }, { "epoch": 0.08161494104196744, "grad_norm": 1.1486048354539127, "learning_rate": 7.001493489929417e-06, "loss": 0.0618, "step": 19560 }, { "epoch": 0.08163580375695771, "grad_norm": 1.0011801440632553, "learning_rate": 7.0005987418088164e-06, "loss": 0.051, "step": 19565 }, { "epoch": 0.081656666471948, "grad_norm": 1.0686241833269121, "learning_rate": 6.999704336630605e-06, "loss": 0.0637, "step": 19570 }, { "epoch": 0.08167752918693827, "grad_norm": 1.8575646171190239, "learning_rate": 6.998810274175765e-06, "loss": 0.0513, "step": 19575 }, { "epoch": 0.08169839190192855, "grad_norm": 1.0922150486515219, "learning_rate": 6.997916554225476e-06, "loss": 0.0477, "step": 19580 }, { "epoch": 0.08171925461691883, "grad_norm": 1.690939630227868, "learning_rate": 6.9970231765611105e-06, "loss": 0.0636, "step": 19585 }, { "epoch": 0.0817401173319091, "grad_norm": 1.2105413268293468, "learning_rate": 6.996130140964239e-06, "loss": 0.0531, "step": 19590 }, { "epoch": 0.08176098004689938, "grad_norm": 1.1069212496441632, "learning_rate": 6.9952374472166214e-06, "loss": 0.0488, "step": 19595 }, { "epoch": 0.08178184276188966, "grad_norm": 0.8143501607363579, "learning_rate": 6.994345095100221e-06, "loss": 0.0469, "step": 19600 }, { "epoch": 0.08180270547687994, "grad_norm": 1.6301037749284324, "learning_rate": 6.9934530843971895e-06, "loss": 0.0576, "step": 19605 }, { "epoch": 0.08182356819187021, "grad_norm": 1.2624732570962824, "learning_rate": 6.992561414889875e-06, "loss": 0.0488, "step": 19610 }, { "epoch": 0.0818444309068605, "grad_norm": 1.7143040036796893, "learning_rate": 6.991670086360819e-06, "loss": 0.0773, "step": 19615 }, { "epoch": 0.08186529362185077, "grad_norm": 0.9864775578959215, "learning_rate": 6.990779098592758e-06, "loss": 0.0397, "step": 19620 }, { "epoch": 0.08188615633684106, "grad_norm": 1.073765664697095, "learning_rate": 6.989888451368622e-06, "loss": 0.0551, "step": 19625 }, { "epoch": 0.08190701905183133, "grad_norm": 0.7527757871313548, "learning_rate": 6.988998144471536e-06, "loss": 0.0338, "step": 19630 }, { "epoch": 0.0819278817668216, "grad_norm": 1.137801725335342, "learning_rate": 6.988108177684814e-06, "loss": 0.0459, "step": 19635 }, { "epoch": 0.08194874448181189, "grad_norm": 0.9571769994529796, "learning_rate": 6.987218550791967e-06, "loss": 0.0393, "step": 19640 }, { "epoch": 0.08196960719680216, "grad_norm": 1.2245316697129196, "learning_rate": 6.9863292635766984e-06, "loss": 0.0574, "step": 19645 }, { "epoch": 0.08199046991179244, "grad_norm": 0.8520216810914577, "learning_rate": 6.9854403158229026e-06, "loss": 0.0424, "step": 19650 }, { "epoch": 0.08201133262678272, "grad_norm": 1.3274295955084823, "learning_rate": 6.984551707314667e-06, "loss": 0.0488, "step": 19655 }, { "epoch": 0.082032195341773, "grad_norm": 1.4224300655826039, "learning_rate": 6.983663437836275e-06, "loss": 0.0604, "step": 19660 }, { "epoch": 0.08205305805676327, "grad_norm": 1.2044067359832393, "learning_rate": 6.982775507172192e-06, "loss": 0.0571, "step": 19665 }, { "epoch": 0.08207392077175356, "grad_norm": 2.091217593578264, "learning_rate": 6.981887915107088e-06, "loss": 0.0621, "step": 19670 }, { "epoch": 0.08209478348674383, "grad_norm": 1.051673245760757, "learning_rate": 6.981000661425817e-06, "loss": 0.0633, "step": 19675 }, { "epoch": 0.0821156462017341, "grad_norm": 1.2733159891387411, "learning_rate": 6.980113745913424e-06, "loss": 0.0526, "step": 19680 }, { "epoch": 0.08213650891672439, "grad_norm": 1.1973063989966393, "learning_rate": 6.979227168355147e-06, "loss": 0.0499, "step": 19685 }, { "epoch": 0.08215737163171466, "grad_norm": 2.9870662398019148, "learning_rate": 6.978340928536417e-06, "loss": 0.0606, "step": 19690 }, { "epoch": 0.08217823434670495, "grad_norm": 0.7215316768153155, "learning_rate": 6.97745502624285e-06, "loss": 0.0499, "step": 19695 }, { "epoch": 0.08219909706169522, "grad_norm": 1.1543524399445413, "learning_rate": 6.976569461260256e-06, "loss": 0.0549, "step": 19700 }, { "epoch": 0.0822199597766855, "grad_norm": 2.603453672036056, "learning_rate": 6.97568423337464e-06, "loss": 0.0631, "step": 19705 }, { "epoch": 0.08224082249167577, "grad_norm": 1.5640757803638512, "learning_rate": 6.974799342372186e-06, "loss": 0.062, "step": 19710 }, { "epoch": 0.08226168520666606, "grad_norm": 1.3975376498456777, "learning_rate": 6.973914788039278e-06, "loss": 0.0581, "step": 19715 }, { "epoch": 0.08228254792165633, "grad_norm": 6.782281020190497, "learning_rate": 6.9730305701624835e-06, "loss": 0.0571, "step": 19720 }, { "epoch": 0.0823034106366466, "grad_norm": 1.585465742530864, "learning_rate": 6.97214668852856e-06, "loss": 0.0536, "step": 19725 }, { "epoch": 0.08232427335163689, "grad_norm": 1.321553433249195, "learning_rate": 6.971263142924459e-06, "loss": 0.0467, "step": 19730 }, { "epoch": 0.08234513606662716, "grad_norm": 1.675760560580784, "learning_rate": 6.970379933137317e-06, "loss": 0.0522, "step": 19735 }, { "epoch": 0.08236599878161745, "grad_norm": 1.567404193638466, "learning_rate": 6.969497058954456e-06, "loss": 0.0504, "step": 19740 }, { "epoch": 0.08238686149660772, "grad_norm": 1.2494765396275067, "learning_rate": 6.968614520163395e-06, "loss": 0.0599, "step": 19745 }, { "epoch": 0.082407724211598, "grad_norm": 1.7395566320664135, "learning_rate": 6.967732316551833e-06, "loss": 0.047, "step": 19750 }, { "epoch": 0.08242858692658828, "grad_norm": 1.0543540518794865, "learning_rate": 6.966850447907662e-06, "loss": 0.0516, "step": 19755 }, { "epoch": 0.08244944964157856, "grad_norm": 0.8492519761798021, "learning_rate": 6.965968914018961e-06, "loss": 0.0533, "step": 19760 }, { "epoch": 0.08247031235656883, "grad_norm": 1.4821605281864898, "learning_rate": 6.965087714673996e-06, "loss": 0.0509, "step": 19765 }, { "epoch": 0.0824911750715591, "grad_norm": 1.1300791940378634, "learning_rate": 6.964206849661219e-06, "loss": 0.0414, "step": 19770 }, { "epoch": 0.08251203778654939, "grad_norm": 0.994602870426016, "learning_rate": 6.963326318769271e-06, "loss": 0.0463, "step": 19775 }, { "epoch": 0.08253290050153966, "grad_norm": 0.7795746274626809, "learning_rate": 6.962446121786983e-06, "loss": 0.0455, "step": 19780 }, { "epoch": 0.08255376321652995, "grad_norm": 1.068803944251888, "learning_rate": 6.961566258503366e-06, "loss": 0.0576, "step": 19785 }, { "epoch": 0.08257462593152022, "grad_norm": 1.4632654753620191, "learning_rate": 6.960686728707623e-06, "loss": 0.0705, "step": 19790 }, { "epoch": 0.0825954886465105, "grad_norm": 1.1763680489898036, "learning_rate": 6.959807532189142e-06, "loss": 0.0439, "step": 19795 }, { "epoch": 0.08261635136150078, "grad_norm": 0.6105095368502167, "learning_rate": 6.9589286687374944e-06, "loss": 0.0633, "step": 19800 }, { "epoch": 0.08263721407649106, "grad_norm": 0.6624810659677027, "learning_rate": 6.958050138142443e-06, "loss": 0.0621, "step": 19805 }, { "epoch": 0.08265807679148134, "grad_norm": 1.524986031179908, "learning_rate": 6.957171940193932e-06, "loss": 0.0551, "step": 19810 }, { "epoch": 0.08267893950647161, "grad_norm": 1.2645277954987215, "learning_rate": 6.95629407468209e-06, "loss": 0.0459, "step": 19815 }, { "epoch": 0.0826998022214619, "grad_norm": 1.5076432660373864, "learning_rate": 6.955416541397237e-06, "loss": 0.0612, "step": 19820 }, { "epoch": 0.08272066493645217, "grad_norm": 1.6363947891115511, "learning_rate": 6.954539340129873e-06, "loss": 0.0576, "step": 19825 }, { "epoch": 0.08274152765144245, "grad_norm": 1.0376543322133795, "learning_rate": 6.9536624706706845e-06, "loss": 0.0541, "step": 19830 }, { "epoch": 0.08276239036643272, "grad_norm": 0.9448678662446777, "learning_rate": 6.952785932810542e-06, "loss": 0.0613, "step": 19835 }, { "epoch": 0.08278325308142301, "grad_norm": 1.6661371962890836, "learning_rate": 6.951909726340502e-06, "loss": 0.0642, "step": 19840 }, { "epoch": 0.08280411579641328, "grad_norm": 1.0075735184458814, "learning_rate": 6.951033851051803e-06, "loss": 0.0564, "step": 19845 }, { "epoch": 0.08282497851140357, "grad_norm": 1.684662632623681, "learning_rate": 6.95015830673587e-06, "loss": 0.0654, "step": 19850 }, { "epoch": 0.08284584122639384, "grad_norm": 0.8312061034192282, "learning_rate": 6.94928309318431e-06, "loss": 0.0536, "step": 19855 }, { "epoch": 0.08286670394138411, "grad_norm": 0.8713540130781333, "learning_rate": 6.948408210188915e-06, "loss": 0.0561, "step": 19860 }, { "epoch": 0.0828875666563744, "grad_norm": 1.2152606965819768, "learning_rate": 6.9475336575416585e-06, "loss": 0.0567, "step": 19865 }, { "epoch": 0.08290842937136467, "grad_norm": 1.0999027268072459, "learning_rate": 6.9466594350347015e-06, "loss": 0.0506, "step": 19870 }, { "epoch": 0.08292929208635495, "grad_norm": 1.5014833061188504, "learning_rate": 6.945785542460383e-06, "loss": 0.0606, "step": 19875 }, { "epoch": 0.08295015480134522, "grad_norm": 1.2643598624002494, "learning_rate": 6.9449119796112255e-06, "loss": 0.0383, "step": 19880 }, { "epoch": 0.08297101751633551, "grad_norm": 1.6169895623622945, "learning_rate": 6.944038746279939e-06, "loss": 0.0585, "step": 19885 }, { "epoch": 0.08299188023132578, "grad_norm": 1.4260950783856152, "learning_rate": 6.943165842259409e-06, "loss": 0.0537, "step": 19890 }, { "epoch": 0.08301274294631607, "grad_norm": 1.2012994158718475, "learning_rate": 6.942293267342708e-06, "loss": 0.0549, "step": 19895 }, { "epoch": 0.08303360566130634, "grad_norm": 1.450568046641631, "learning_rate": 6.9414210213230915e-06, "loss": 0.0492, "step": 19900 }, { "epoch": 0.08305446837629661, "grad_norm": 1.1093170890575483, "learning_rate": 6.9405491039939896e-06, "loss": 0.0561, "step": 19905 }, { "epoch": 0.0830753310912869, "grad_norm": 0.9041084649056785, "learning_rate": 6.939677515149022e-06, "loss": 0.052, "step": 19910 }, { "epoch": 0.08309619380627717, "grad_norm": 1.1700513723502806, "learning_rate": 6.938806254581987e-06, "loss": 0.0636, "step": 19915 }, { "epoch": 0.08311705652126745, "grad_norm": 1.4338217005831906, "learning_rate": 6.937935322086862e-06, "loss": 0.0576, "step": 19920 }, { "epoch": 0.08313791923625773, "grad_norm": 1.641165080193623, "learning_rate": 6.937064717457808e-06, "loss": 0.0572, "step": 19925 }, { "epoch": 0.08315878195124801, "grad_norm": 0.88324043160204, "learning_rate": 6.936194440489163e-06, "loss": 0.05, "step": 19930 }, { "epoch": 0.08317964466623828, "grad_norm": 0.9640449738311984, "learning_rate": 6.9353244909754525e-06, "loss": 0.0505, "step": 19935 }, { "epoch": 0.08320050738122857, "grad_norm": 1.1925605111140654, "learning_rate": 6.934454868711377e-06, "loss": 0.0786, "step": 19940 }, { "epoch": 0.08322137009621884, "grad_norm": 1.2900796107609398, "learning_rate": 6.933585573491815e-06, "loss": 0.0508, "step": 19945 }, { "epoch": 0.08324223281120911, "grad_norm": 1.0968430687982191, "learning_rate": 6.932716605111833e-06, "loss": 0.0499, "step": 19950 }, { "epoch": 0.0832630955261994, "grad_norm": 1.181625338035148, "learning_rate": 6.9318479633666694e-06, "loss": 0.0543, "step": 19955 }, { "epoch": 0.08328395824118967, "grad_norm": 1.1724594876400782, "learning_rate": 6.930979648051745e-06, "loss": 0.047, "step": 19960 }, { "epoch": 0.08330482095617996, "grad_norm": 0.7840440764150555, "learning_rate": 6.930111658962664e-06, "loss": 0.039, "step": 19965 }, { "epoch": 0.08332568367117023, "grad_norm": 1.791583936428976, "learning_rate": 6.929243995895203e-06, "loss": 0.0493, "step": 19970 }, { "epoch": 0.08334654638616051, "grad_norm": 1.899983994044768, "learning_rate": 6.928376658645319e-06, "loss": 0.0568, "step": 19975 }, { "epoch": 0.08336740910115079, "grad_norm": 1.1658859168952356, "learning_rate": 6.927509647009153e-06, "loss": 0.051, "step": 19980 }, { "epoch": 0.08338827181614107, "grad_norm": 0.96825896732073, "learning_rate": 6.926642960783019e-06, "loss": 0.0501, "step": 19985 }, { "epoch": 0.08340913453113134, "grad_norm": 2.185121592533153, "learning_rate": 6.92577659976341e-06, "loss": 0.0742, "step": 19990 }, { "epoch": 0.08342999724612161, "grad_norm": 1.0143784193699523, "learning_rate": 6.924910563746999e-06, "loss": 0.0655, "step": 19995 }, { "epoch": 0.0834508599611119, "grad_norm": 1.327269589462532, "learning_rate": 6.924044852530637e-06, "loss": 0.0564, "step": 20000 }, { "epoch": 0.08347172267610217, "grad_norm": 0.9517412607977876, "learning_rate": 6.9231794659113494e-06, "loss": 0.0456, "step": 20005 }, { "epoch": 0.08349258539109246, "grad_norm": 1.2553569131986855, "learning_rate": 6.9223144036863445e-06, "loss": 0.0455, "step": 20010 }, { "epoch": 0.08351344810608273, "grad_norm": 1.2881605459646606, "learning_rate": 6.921449665653005e-06, "loss": 0.048, "step": 20015 }, { "epoch": 0.08353431082107302, "grad_norm": 1.4464290580947525, "learning_rate": 6.920585251608888e-06, "loss": 0.0582, "step": 20020 }, { "epoch": 0.08355517353606329, "grad_norm": 1.1398467833317205, "learning_rate": 6.919721161351734e-06, "loss": 0.0491, "step": 20025 }, { "epoch": 0.08357603625105357, "grad_norm": 0.906294762106776, "learning_rate": 6.918857394679453e-06, "loss": 0.0705, "step": 20030 }, { "epoch": 0.08359689896604384, "grad_norm": 0.8925462222132255, "learning_rate": 6.917993951390136e-06, "loss": 0.059, "step": 20035 }, { "epoch": 0.08361776168103412, "grad_norm": 0.9549787677992416, "learning_rate": 6.917130831282051e-06, "loss": 0.0461, "step": 20040 }, { "epoch": 0.0836386243960244, "grad_norm": 0.8511482665207589, "learning_rate": 6.916268034153639e-06, "loss": 0.0635, "step": 20045 }, { "epoch": 0.08365948711101467, "grad_norm": 0.9936392429873497, "learning_rate": 6.9154055598035194e-06, "loss": 0.0545, "step": 20050 }, { "epoch": 0.08368034982600496, "grad_norm": 1.4505428646190774, "learning_rate": 6.914543408030486e-06, "loss": 0.0504, "step": 20055 }, { "epoch": 0.08370121254099523, "grad_norm": 1.1027514950207178, "learning_rate": 6.91368157863351e-06, "loss": 0.049, "step": 20060 }, { "epoch": 0.08372207525598552, "grad_norm": 3.547435529021494, "learning_rate": 6.912820071411733e-06, "loss": 0.0716, "step": 20065 }, { "epoch": 0.08374293797097579, "grad_norm": 1.0757228729311237, "learning_rate": 6.91195888616448e-06, "loss": 0.0526, "step": 20070 }, { "epoch": 0.08376380068596607, "grad_norm": 0.9422114162306446, "learning_rate": 6.911098022691242e-06, "loss": 0.0409, "step": 20075 }, { "epoch": 0.08378466340095635, "grad_norm": 1.018410709355941, "learning_rate": 6.910237480791692e-06, "loss": 0.0478, "step": 20080 }, { "epoch": 0.08380552611594662, "grad_norm": 1.1613615994354276, "learning_rate": 6.909377260265673e-06, "loss": 0.0479, "step": 20085 }, { "epoch": 0.0838263888309369, "grad_norm": 1.3844299920526286, "learning_rate": 6.908517360913204e-06, "loss": 0.0599, "step": 20090 }, { "epoch": 0.08384725154592718, "grad_norm": 1.2770396676751026, "learning_rate": 6.907657782534478e-06, "loss": 0.0574, "step": 20095 }, { "epoch": 0.08386811426091746, "grad_norm": 1.255032507901536, "learning_rate": 6.906798524929864e-06, "loss": 0.0504, "step": 20100 }, { "epoch": 0.08388897697590773, "grad_norm": 1.489361338044085, "learning_rate": 6.905939587899901e-06, "loss": 0.0673, "step": 20105 }, { "epoch": 0.08390983969089802, "grad_norm": 1.513915701667786, "learning_rate": 6.905080971245304e-06, "loss": 0.0542, "step": 20110 }, { "epoch": 0.08393070240588829, "grad_norm": 0.9374214494076879, "learning_rate": 6.904222674766959e-06, "loss": 0.0608, "step": 20115 }, { "epoch": 0.08395156512087858, "grad_norm": 1.019364262457217, "learning_rate": 6.903364698265933e-06, "loss": 0.0482, "step": 20120 }, { "epoch": 0.08397242783586885, "grad_norm": 1.4936157076776695, "learning_rate": 6.902507041543454e-06, "loss": 0.0627, "step": 20125 }, { "epoch": 0.08399329055085912, "grad_norm": 0.9420021340326431, "learning_rate": 6.90164970440093e-06, "loss": 0.0441, "step": 20130 }, { "epoch": 0.0840141532658494, "grad_norm": 0.6711892086990374, "learning_rate": 6.9007926866399436e-06, "loss": 0.0457, "step": 20135 }, { "epoch": 0.08403501598083968, "grad_norm": 1.1754137690611612, "learning_rate": 6.899935988062243e-06, "loss": 0.0532, "step": 20140 }, { "epoch": 0.08405587869582996, "grad_norm": 1.1979415859416198, "learning_rate": 6.899079608469755e-06, "loss": 0.0502, "step": 20145 }, { "epoch": 0.08407674141082024, "grad_norm": 2.467894294270125, "learning_rate": 6.8982235476645755e-06, "loss": 0.0629, "step": 20150 }, { "epoch": 0.08409760412581052, "grad_norm": 1.0616621691929566, "learning_rate": 6.897367805448971e-06, "loss": 0.0638, "step": 20155 }, { "epoch": 0.08411846684080079, "grad_norm": 1.0510556430914593, "learning_rate": 6.896512381625382e-06, "loss": 0.0474, "step": 20160 }, { "epoch": 0.08413932955579108, "grad_norm": 1.3208297125205462, "learning_rate": 6.895657275996422e-06, "loss": 0.0592, "step": 20165 }, { "epoch": 0.08416019227078135, "grad_norm": 1.3603828821893071, "learning_rate": 6.89480248836487e-06, "loss": 0.0401, "step": 20170 }, { "epoch": 0.08418105498577162, "grad_norm": 1.1436596530503693, "learning_rate": 6.893948018533681e-06, "loss": 0.0582, "step": 20175 }, { "epoch": 0.08420191770076191, "grad_norm": 0.8406787217908924, "learning_rate": 6.893093866305982e-06, "loss": 0.0577, "step": 20180 }, { "epoch": 0.08422278041575218, "grad_norm": 1.1170861844152509, "learning_rate": 6.892240031485064e-06, "loss": 0.0548, "step": 20185 }, { "epoch": 0.08424364313074247, "grad_norm": 1.095484857548494, "learning_rate": 6.891386513874395e-06, "loss": 0.0442, "step": 20190 }, { "epoch": 0.08426450584573274, "grad_norm": 1.1176207628689956, "learning_rate": 6.890533313277612e-06, "loss": 0.0558, "step": 20195 }, { "epoch": 0.08428536856072302, "grad_norm": 1.1885129245074821, "learning_rate": 6.889680429498519e-06, "loss": 0.0448, "step": 20200 }, { "epoch": 0.0843062312757133, "grad_norm": 1.4398214023886664, "learning_rate": 6.888827862341092e-06, "loss": 0.0478, "step": 20205 }, { "epoch": 0.08432709399070358, "grad_norm": 1.1960291246810457, "learning_rate": 6.88797561160948e-06, "loss": 0.0509, "step": 20210 }, { "epoch": 0.08434795670569385, "grad_norm": 0.9632894966438952, "learning_rate": 6.887123677107994e-06, "loss": 0.0484, "step": 20215 }, { "epoch": 0.08436881942068412, "grad_norm": 1.02874717542494, "learning_rate": 6.886272058641122e-06, "loss": 0.0336, "step": 20220 }, { "epoch": 0.08438968213567441, "grad_norm": 1.2497484061144515, "learning_rate": 6.88542075601352e-06, "loss": 0.0547, "step": 20225 }, { "epoch": 0.08441054485066468, "grad_norm": 1.3374696339461265, "learning_rate": 6.884569769030005e-06, "loss": 0.0642, "step": 20230 }, { "epoch": 0.08443140756565497, "grad_norm": 1.6083155700179796, "learning_rate": 6.8837190974955735e-06, "loss": 0.0483, "step": 20235 }, { "epoch": 0.08445227028064524, "grad_norm": 1.3296521554636176, "learning_rate": 6.882868741215385e-06, "loss": 0.0473, "step": 20240 }, { "epoch": 0.08447313299563552, "grad_norm": 1.7822673704332084, "learning_rate": 6.882018699994767e-06, "loss": 0.0637, "step": 20245 }, { "epoch": 0.0844939957106258, "grad_norm": 0.8372819312727482, "learning_rate": 6.881168973639217e-06, "loss": 0.0724, "step": 20250 }, { "epoch": 0.08451485842561608, "grad_norm": 1.6502062846568049, "learning_rate": 6.880319561954403e-06, "loss": 0.0435, "step": 20255 }, { "epoch": 0.08453572114060635, "grad_norm": 1.2617024766435585, "learning_rate": 6.879470464746155e-06, "loss": 0.0632, "step": 20260 }, { "epoch": 0.08455658385559663, "grad_norm": 1.1278687731853405, "learning_rate": 6.878621681820476e-06, "loss": 0.0601, "step": 20265 }, { "epoch": 0.08457744657058691, "grad_norm": 1.0209077993133588, "learning_rate": 6.877773212983532e-06, "loss": 0.0547, "step": 20270 }, { "epoch": 0.08459830928557718, "grad_norm": 0.953501988994671, "learning_rate": 6.87692505804166e-06, "loss": 0.0563, "step": 20275 }, { "epoch": 0.08461917200056747, "grad_norm": 1.051595866282226, "learning_rate": 6.876077216801362e-06, "loss": 0.0589, "step": 20280 }, { "epoch": 0.08464003471555774, "grad_norm": 1.5430893818996099, "learning_rate": 6.87522968906931e-06, "loss": 0.0552, "step": 20285 }, { "epoch": 0.08466089743054803, "grad_norm": 1.2304647750804474, "learning_rate": 6.874382474652338e-06, "loss": 0.0566, "step": 20290 }, { "epoch": 0.0846817601455383, "grad_norm": 1.1733512137253732, "learning_rate": 6.873535573357449e-06, "loss": 0.0546, "step": 20295 }, { "epoch": 0.08470262286052858, "grad_norm": 3.0484322303640323, "learning_rate": 6.872688984991815e-06, "loss": 0.0792, "step": 20300 }, { "epoch": 0.08472348557551886, "grad_norm": 1.0482655428277028, "learning_rate": 6.871842709362768e-06, "loss": 0.0509, "step": 20305 }, { "epoch": 0.08474434829050913, "grad_norm": 0.7758377857804295, "learning_rate": 6.870996746277813e-06, "loss": 0.0715, "step": 20310 }, { "epoch": 0.08476521100549941, "grad_norm": 1.5247084226980614, "learning_rate": 6.870151095544616e-06, "loss": 0.0551, "step": 20315 }, { "epoch": 0.08478607372048969, "grad_norm": 0.95578774450882, "learning_rate": 6.8693057569710095e-06, "loss": 0.0475, "step": 20320 }, { "epoch": 0.08480693643547997, "grad_norm": 1.3950641448653167, "learning_rate": 6.868460730364995e-06, "loss": 0.0562, "step": 20325 }, { "epoch": 0.08482779915047024, "grad_norm": 0.8148836949577244, "learning_rate": 6.867616015534734e-06, "loss": 0.0497, "step": 20330 }, { "epoch": 0.08484866186546053, "grad_norm": 1.4023069687040897, "learning_rate": 6.866771612288555e-06, "loss": 0.0506, "step": 20335 }, { "epoch": 0.0848695245804508, "grad_norm": 1.595756788123086, "learning_rate": 6.865927520434954e-06, "loss": 0.0524, "step": 20340 }, { "epoch": 0.08489038729544109, "grad_norm": 0.9506395601606694, "learning_rate": 6.865083739782588e-06, "loss": 0.0552, "step": 20345 }, { "epoch": 0.08491125001043136, "grad_norm": 1.0757058850236296, "learning_rate": 6.864240270140283e-06, "loss": 0.0511, "step": 20350 }, { "epoch": 0.08493211272542163, "grad_norm": 0.7900811320769793, "learning_rate": 6.863397111317023e-06, "loss": 0.0409, "step": 20355 }, { "epoch": 0.08495297544041192, "grad_norm": 1.3383619043203203, "learning_rate": 6.862554263121964e-06, "loss": 0.049, "step": 20360 }, { "epoch": 0.08497383815540219, "grad_norm": 0.788788065627157, "learning_rate": 6.861711725364418e-06, "loss": 0.0428, "step": 20365 }, { "epoch": 0.08499470087039247, "grad_norm": 1.053494195271304, "learning_rate": 6.860869497853867e-06, "loss": 0.0478, "step": 20370 }, { "epoch": 0.08501556358538274, "grad_norm": 1.3589443480112944, "learning_rate": 6.860027580399954e-06, "loss": 0.0508, "step": 20375 }, { "epoch": 0.08503642630037303, "grad_norm": 1.4663797112162316, "learning_rate": 6.859185972812486e-06, "loss": 0.0527, "step": 20380 }, { "epoch": 0.0850572890153633, "grad_norm": 1.3397463327775345, "learning_rate": 6.858344674901431e-06, "loss": 0.0476, "step": 20385 }, { "epoch": 0.08507815173035359, "grad_norm": 1.4715782190890287, "learning_rate": 6.857503686476927e-06, "loss": 0.0559, "step": 20390 }, { "epoch": 0.08509901444534386, "grad_norm": 1.3332861102007885, "learning_rate": 6.856663007349266e-06, "loss": 0.0605, "step": 20395 }, { "epoch": 0.08511987716033413, "grad_norm": 1.7837702686057468, "learning_rate": 6.8558226373289075e-06, "loss": 0.0549, "step": 20400 }, { "epoch": 0.08514073987532442, "grad_norm": 1.841502087232309, "learning_rate": 6.854982576226475e-06, "loss": 0.0589, "step": 20405 }, { "epoch": 0.08516160259031469, "grad_norm": 0.7722094544392856, "learning_rate": 6.854142823852752e-06, "loss": 0.0634, "step": 20410 }, { "epoch": 0.08518246530530497, "grad_norm": 0.6296705024242962, "learning_rate": 6.853303380018682e-06, "loss": 0.0396, "step": 20415 }, { "epoch": 0.08520332802029525, "grad_norm": 1.4985694452640799, "learning_rate": 6.852464244535378e-06, "loss": 0.0564, "step": 20420 }, { "epoch": 0.08522419073528553, "grad_norm": 1.2464232442313083, "learning_rate": 6.851625417214104e-06, "loss": 0.0418, "step": 20425 }, { "epoch": 0.0852450534502758, "grad_norm": 1.115388702677306, "learning_rate": 6.850786897866295e-06, "loss": 0.0454, "step": 20430 }, { "epoch": 0.08526591616526609, "grad_norm": 1.0263766988297545, "learning_rate": 6.849948686303547e-06, "loss": 0.0623, "step": 20435 }, { "epoch": 0.08528677888025636, "grad_norm": 1.525593269379967, "learning_rate": 6.84911078233761e-06, "loss": 0.0605, "step": 20440 }, { "epoch": 0.08530764159524663, "grad_norm": 1.5211214767628283, "learning_rate": 6.848273185780402e-06, "loss": 0.0518, "step": 20445 }, { "epoch": 0.08532850431023692, "grad_norm": 0.7456316944483796, "learning_rate": 6.847435896443999e-06, "loss": 0.0427, "step": 20450 }, { "epoch": 0.08534936702522719, "grad_norm": 1.1506886434514687, "learning_rate": 6.846598914140638e-06, "loss": 0.0491, "step": 20455 }, { "epoch": 0.08537022974021748, "grad_norm": 0.44878768793800217, "learning_rate": 6.845762238682718e-06, "loss": 0.0399, "step": 20460 }, { "epoch": 0.08539109245520775, "grad_norm": 1.4172736220385866, "learning_rate": 6.844925869882799e-06, "loss": 0.087, "step": 20465 }, { "epoch": 0.08541195517019803, "grad_norm": 1.5561287654377605, "learning_rate": 6.8440898075535956e-06, "loss": 0.0497, "step": 20470 }, { "epoch": 0.0854328178851883, "grad_norm": 0.9761495214485855, "learning_rate": 6.843254051507989e-06, "loss": 0.0407, "step": 20475 }, { "epoch": 0.08545368060017859, "grad_norm": 1.4763278398727153, "learning_rate": 6.84241860155902e-06, "loss": 0.0666, "step": 20480 }, { "epoch": 0.08547454331516886, "grad_norm": 1.551155245016697, "learning_rate": 6.841583457519884e-06, "loss": 0.0619, "step": 20485 }, { "epoch": 0.08549540603015914, "grad_norm": 0.9387306048688726, "learning_rate": 6.840748619203941e-06, "loss": 0.0502, "step": 20490 }, { "epoch": 0.08551626874514942, "grad_norm": 1.3089811232385224, "learning_rate": 6.839914086424707e-06, "loss": 0.0444, "step": 20495 }, { "epoch": 0.08553713146013969, "grad_norm": 0.8517727313316725, "learning_rate": 6.83907985899586e-06, "loss": 0.0408, "step": 20500 }, { "epoch": 0.08555799417512998, "grad_norm": 1.6509525326998475, "learning_rate": 6.838245936731234e-06, "loss": 0.06, "step": 20505 }, { "epoch": 0.08557885689012025, "grad_norm": 1.5161153307511743, "learning_rate": 6.837412319444826e-06, "loss": 0.047, "step": 20510 }, { "epoch": 0.08559971960511054, "grad_norm": 1.1985129657985654, "learning_rate": 6.836579006950786e-06, "loss": 0.0491, "step": 20515 }, { "epoch": 0.08562058232010081, "grad_norm": 0.7522776555980141, "learning_rate": 6.83574599906343e-06, "loss": 0.043, "step": 20520 }, { "epoch": 0.0856414450350911, "grad_norm": 0.9362109229001577, "learning_rate": 6.834913295597225e-06, "loss": 0.0382, "step": 20525 }, { "epoch": 0.08566230775008137, "grad_norm": 0.8972426429659673, "learning_rate": 6.8340808963668005e-06, "loss": 0.0456, "step": 20530 }, { "epoch": 0.08568317046507164, "grad_norm": 1.1872561859951702, "learning_rate": 6.833248801186941e-06, "loss": 0.0437, "step": 20535 }, { "epoch": 0.08570403318006192, "grad_norm": 0.8953709333801625, "learning_rate": 6.832417009872593e-06, "loss": 0.0511, "step": 20540 }, { "epoch": 0.0857248958950522, "grad_norm": 1.4323889056223316, "learning_rate": 6.831585522238857e-06, "loss": 0.0734, "step": 20545 }, { "epoch": 0.08574575861004248, "grad_norm": 1.6236920972919355, "learning_rate": 6.830754338100992e-06, "loss": 0.0488, "step": 20550 }, { "epoch": 0.08576662132503275, "grad_norm": 1.597181333980911, "learning_rate": 6.829923457274415e-06, "loss": 0.0747, "step": 20555 }, { "epoch": 0.08578748404002304, "grad_norm": 1.8608525292748836, "learning_rate": 6.829092879574701e-06, "loss": 0.0569, "step": 20560 }, { "epoch": 0.08580834675501331, "grad_norm": 1.1685459251067547, "learning_rate": 6.828262604817578e-06, "loss": 0.0476, "step": 20565 }, { "epoch": 0.0858292094700036, "grad_norm": 1.1942388005381899, "learning_rate": 6.827432632818934e-06, "loss": 0.0531, "step": 20570 }, { "epoch": 0.08585007218499387, "grad_norm": 0.8597105064352115, "learning_rate": 6.826602963394814e-06, "loss": 0.0478, "step": 20575 }, { "epoch": 0.08587093489998414, "grad_norm": 1.092382346902844, "learning_rate": 6.825773596361418e-06, "loss": 0.0498, "step": 20580 }, { "epoch": 0.08589179761497442, "grad_norm": 1.130708057593766, "learning_rate": 6.824944531535102e-06, "loss": 0.052, "step": 20585 }, { "epoch": 0.0859126603299647, "grad_norm": 1.0329756480787617, "learning_rate": 6.824115768732381e-06, "loss": 0.0458, "step": 20590 }, { "epoch": 0.08593352304495498, "grad_norm": 0.8099800198965558, "learning_rate": 6.823287307769921e-06, "loss": 0.057, "step": 20595 }, { "epoch": 0.08595438575994525, "grad_norm": 1.8714451272236885, "learning_rate": 6.8224591484645475e-06, "loss": 0.0549, "step": 20600 }, { "epoch": 0.08597524847493554, "grad_norm": 0.6307800939557868, "learning_rate": 6.821631290633242e-06, "loss": 0.0459, "step": 20605 }, { "epoch": 0.08599611118992581, "grad_norm": 1.7345616535657806, "learning_rate": 6.820803734093137e-06, "loss": 0.0412, "step": 20610 }, { "epoch": 0.0860169739049161, "grad_norm": 1.439768984099698, "learning_rate": 6.8199764786615255e-06, "loss": 0.0445, "step": 20615 }, { "epoch": 0.08603783661990637, "grad_norm": 0.9004690290978711, "learning_rate": 6.819149524155853e-06, "loss": 0.0429, "step": 20620 }, { "epoch": 0.08605869933489664, "grad_norm": 1.9724573956949365, "learning_rate": 6.818322870393721e-06, "loss": 0.056, "step": 20625 }, { "epoch": 0.08607956204988693, "grad_norm": 1.109155489343971, "learning_rate": 6.817496517192883e-06, "loss": 0.1137, "step": 20630 }, { "epoch": 0.0861004247648772, "grad_norm": 1.2934194200460367, "learning_rate": 6.81667046437125e-06, "loss": 0.052, "step": 20635 }, { "epoch": 0.08612128747986748, "grad_norm": 1.2336281337453663, "learning_rate": 6.8158447117468875e-06, "loss": 0.0545, "step": 20640 }, { "epoch": 0.08614215019485776, "grad_norm": 0.7600956355963375, "learning_rate": 6.815019259138012e-06, "loss": 0.0419, "step": 20645 }, { "epoch": 0.08616301290984804, "grad_norm": 1.4118201234053396, "learning_rate": 6.814194106362998e-06, "loss": 0.0453, "step": 20650 }, { "epoch": 0.08618387562483831, "grad_norm": 1.3593376900754202, "learning_rate": 6.813369253240373e-06, "loss": 0.042, "step": 20655 }, { "epoch": 0.0862047383398286, "grad_norm": 0.9795631108077708, "learning_rate": 6.812544699588814e-06, "loss": 0.0574, "step": 20660 }, { "epoch": 0.08622560105481887, "grad_norm": 0.8473861424357529, "learning_rate": 6.811720445227158e-06, "loss": 0.0474, "step": 20665 }, { "epoch": 0.08624646376980914, "grad_norm": 1.253832891444196, "learning_rate": 6.8108964899743926e-06, "loss": 0.0555, "step": 20670 }, { "epoch": 0.08626732648479943, "grad_norm": 0.877424030227204, "learning_rate": 6.810072833649656e-06, "loss": 0.0433, "step": 20675 }, { "epoch": 0.0862881891997897, "grad_norm": 1.2816507767520366, "learning_rate": 6.809249476072243e-06, "loss": 0.0642, "step": 20680 }, { "epoch": 0.08630905191477999, "grad_norm": 0.9389767106686573, "learning_rate": 6.808426417061601e-06, "loss": 0.0479, "step": 20685 }, { "epoch": 0.08632991462977026, "grad_norm": 0.9065757696858203, "learning_rate": 6.807603656437329e-06, "loss": 0.0506, "step": 20690 }, { "epoch": 0.08635077734476054, "grad_norm": 1.7044402854940746, "learning_rate": 6.806781194019177e-06, "loss": 0.0756, "step": 20695 }, { "epoch": 0.08637164005975081, "grad_norm": 1.3370475859074966, "learning_rate": 6.805959029627052e-06, "loss": 0.0587, "step": 20700 }, { "epoch": 0.0863925027747411, "grad_norm": 2.007911560470265, "learning_rate": 6.805137163081007e-06, "loss": 0.0465, "step": 20705 }, { "epoch": 0.08641336548973137, "grad_norm": 0.8963467198056377, "learning_rate": 6.804315594201254e-06, "loss": 0.0565, "step": 20710 }, { "epoch": 0.08643422820472164, "grad_norm": 1.2182630435375088, "learning_rate": 6.803494322808154e-06, "loss": 0.0653, "step": 20715 }, { "epoch": 0.08645509091971193, "grad_norm": 1.364945997471089, "learning_rate": 6.8026733487222154e-06, "loss": 0.0436, "step": 20720 }, { "epoch": 0.0864759536347022, "grad_norm": 0.9111749185618004, "learning_rate": 6.801852671764105e-06, "loss": 0.0567, "step": 20725 }, { "epoch": 0.08649681634969249, "grad_norm": 0.721051610768398, "learning_rate": 6.801032291754637e-06, "loss": 0.0433, "step": 20730 }, { "epoch": 0.08651767906468276, "grad_norm": 1.808296155360434, "learning_rate": 6.8002122085147774e-06, "loss": 0.0469, "step": 20735 }, { "epoch": 0.08653854177967304, "grad_norm": 1.0249844208841057, "learning_rate": 6.799392421865644e-06, "loss": 0.0435, "step": 20740 }, { "epoch": 0.08655940449466332, "grad_norm": 0.6939030965701961, "learning_rate": 6.798572931628505e-06, "loss": 0.0571, "step": 20745 }, { "epoch": 0.08658026720965359, "grad_norm": 1.111064536616754, "learning_rate": 6.797753737624781e-06, "loss": 0.0521, "step": 20750 }, { "epoch": 0.08660112992464387, "grad_norm": 0.8031275979043762, "learning_rate": 6.796934839676041e-06, "loss": 0.0472, "step": 20755 }, { "epoch": 0.08662199263963415, "grad_norm": 1.058175729954722, "learning_rate": 6.796116237604006e-06, "loss": 0.0482, "step": 20760 }, { "epoch": 0.08664285535462443, "grad_norm": 1.724146807887009, "learning_rate": 6.795297931230542e-06, "loss": 0.0451, "step": 20765 }, { "epoch": 0.0866637180696147, "grad_norm": 0.9618453875740016, "learning_rate": 6.794479920377676e-06, "loss": 0.0483, "step": 20770 }, { "epoch": 0.08668458078460499, "grad_norm": 1.0561917937712746, "learning_rate": 6.793662204867575e-06, "loss": 0.0492, "step": 20775 }, { "epoch": 0.08670544349959526, "grad_norm": 2.520376582428716, "learning_rate": 6.7928447845225594e-06, "loss": 0.0478, "step": 20780 }, { "epoch": 0.08672630621458555, "grad_norm": 1.0407944604617534, "learning_rate": 6.7920276591651e-06, "loss": 0.0551, "step": 20785 }, { "epoch": 0.08674716892957582, "grad_norm": 0.9833502432019096, "learning_rate": 6.791210828617816e-06, "loss": 0.0599, "step": 20790 }, { "epoch": 0.08676803164456609, "grad_norm": 1.0398235163646206, "learning_rate": 6.790394292703475e-06, "loss": 0.038, "step": 20795 }, { "epoch": 0.08678889435955638, "grad_norm": 1.1399533533929398, "learning_rate": 6.789578051244996e-06, "loss": 0.0642, "step": 20800 }, { "epoch": 0.08680975707454665, "grad_norm": 1.2692693309097411, "learning_rate": 6.788762104065445e-06, "loss": 0.0588, "step": 20805 }, { "epoch": 0.08683061978953693, "grad_norm": 3.055273727320767, "learning_rate": 6.787946450988038e-06, "loss": 0.058, "step": 20810 }, { "epoch": 0.0868514825045272, "grad_norm": 1.3859438094597716, "learning_rate": 6.7871310918361384e-06, "loss": 0.0675, "step": 20815 }, { "epoch": 0.08687234521951749, "grad_norm": 1.1981736246420553, "learning_rate": 6.786316026433261e-06, "loss": 0.0535, "step": 20820 }, { "epoch": 0.08689320793450776, "grad_norm": 1.1475560146871016, "learning_rate": 6.785501254603064e-06, "loss": 0.0512, "step": 20825 }, { "epoch": 0.08691407064949805, "grad_norm": 0.7954455318336, "learning_rate": 6.784686776169358e-06, "loss": 0.0397, "step": 20830 }, { "epoch": 0.08693493336448832, "grad_norm": 1.1939379786596267, "learning_rate": 6.7838725909561006e-06, "loss": 0.0486, "step": 20835 }, { "epoch": 0.08695579607947859, "grad_norm": 1.263413685035843, "learning_rate": 6.783058698787396e-06, "loss": 0.042, "step": 20840 }, { "epoch": 0.08697665879446888, "grad_norm": 3.649936566338785, "learning_rate": 6.7822450994874965e-06, "loss": 0.0475, "step": 20845 }, { "epoch": 0.08699752150945915, "grad_norm": 3.826354671197504, "learning_rate": 6.781431792880805e-06, "loss": 0.0826, "step": 20850 }, { "epoch": 0.08701838422444944, "grad_norm": 1.0460657183533995, "learning_rate": 6.780618778791866e-06, "loss": 0.0493, "step": 20855 }, { "epoch": 0.08703924693943971, "grad_norm": 1.3339483375577617, "learning_rate": 6.779806057045374e-06, "loss": 0.0528, "step": 20860 }, { "epoch": 0.08706010965442999, "grad_norm": 1.0247385700006115, "learning_rate": 6.7789936274661736e-06, "loss": 0.0508, "step": 20865 }, { "epoch": 0.08708097236942026, "grad_norm": 0.8710479713042314, "learning_rate": 6.778181489879253e-06, "loss": 0.0778, "step": 20870 }, { "epoch": 0.08710183508441055, "grad_norm": 1.263441861010667, "learning_rate": 6.777369644109746e-06, "loss": 0.0536, "step": 20875 }, { "epoch": 0.08712269779940082, "grad_norm": 1.0645663461953825, "learning_rate": 6.776558089982936e-06, "loss": 0.0584, "step": 20880 }, { "epoch": 0.0871435605143911, "grad_norm": 1.2884266232067352, "learning_rate": 6.77574682732425e-06, "loss": 0.0465, "step": 20885 }, { "epoch": 0.08716442322938138, "grad_norm": 0.8613682697605755, "learning_rate": 6.774935855959265e-06, "loss": 0.0456, "step": 20890 }, { "epoch": 0.08718528594437165, "grad_norm": 2.2311888942064297, "learning_rate": 6.774125175713699e-06, "loss": 0.0691, "step": 20895 }, { "epoch": 0.08720614865936194, "grad_norm": 0.8912798254212402, "learning_rate": 6.773314786413422e-06, "loss": 0.0617, "step": 20900 }, { "epoch": 0.08722701137435221, "grad_norm": 0.8465042116204311, "learning_rate": 6.7725046878844436e-06, "loss": 0.0558, "step": 20905 }, { "epoch": 0.0872478740893425, "grad_norm": 1.1709342995575576, "learning_rate": 6.7716948799529236e-06, "loss": 0.0549, "step": 20910 }, { "epoch": 0.08726873680433277, "grad_norm": 2.7714713585178585, "learning_rate": 6.770885362445166e-06, "loss": 0.0468, "step": 20915 }, { "epoch": 0.08728959951932305, "grad_norm": 1.1660715717051766, "learning_rate": 6.770076135187619e-06, "loss": 0.0599, "step": 20920 }, { "epoch": 0.08731046223431332, "grad_norm": 1.2200431989623353, "learning_rate": 6.769267198006876e-06, "loss": 0.0505, "step": 20925 }, { "epoch": 0.0873313249493036, "grad_norm": 0.7545712345245864, "learning_rate": 6.768458550729676e-06, "loss": 0.0358, "step": 20930 }, { "epoch": 0.08735218766429388, "grad_norm": 1.3243914170304036, "learning_rate": 6.767650193182906e-06, "loss": 0.061, "step": 20935 }, { "epoch": 0.08737305037928415, "grad_norm": 1.562799474322426, "learning_rate": 6.766842125193592e-06, "loss": 0.0444, "step": 20940 }, { "epoch": 0.08739391309427444, "grad_norm": 1.1521699034058714, "learning_rate": 6.766034346588908e-06, "loss": 0.0518, "step": 20945 }, { "epoch": 0.08741477580926471, "grad_norm": 1.5901810937310012, "learning_rate": 6.765226857196171e-06, "loss": 0.0472, "step": 20950 }, { "epoch": 0.087435638524255, "grad_norm": 1.0235333698735747, "learning_rate": 6.764419656842844e-06, "loss": 0.0517, "step": 20955 }, { "epoch": 0.08745650123924527, "grad_norm": 1.2327250531160567, "learning_rate": 6.7636127453565305e-06, "loss": 0.057, "step": 20960 }, { "epoch": 0.08747736395423555, "grad_norm": 1.1196900223133583, "learning_rate": 6.762806122564982e-06, "loss": 0.0467, "step": 20965 }, { "epoch": 0.08749822666922583, "grad_norm": 1.0474979788972651, "learning_rate": 6.761999788296093e-06, "loss": 0.0468, "step": 20970 }, { "epoch": 0.0875190893842161, "grad_norm": 1.0634547624070045, "learning_rate": 6.7611937423779e-06, "loss": 0.0518, "step": 20975 }, { "epoch": 0.08753995209920638, "grad_norm": 0.9601342160080174, "learning_rate": 6.7603879846385825e-06, "loss": 0.0483, "step": 20980 }, { "epoch": 0.08756081481419666, "grad_norm": 1.1118506546773335, "learning_rate": 6.759582514906465e-06, "loss": 0.0542, "step": 20985 }, { "epoch": 0.08758167752918694, "grad_norm": 0.9856029939003527, "learning_rate": 6.758777333010015e-06, "loss": 0.0433, "step": 20990 }, { "epoch": 0.08760254024417721, "grad_norm": 1.7928664875772669, "learning_rate": 6.757972438777842e-06, "loss": 0.0652, "step": 20995 }, { "epoch": 0.0876234029591675, "grad_norm": 0.9818190502804328, "learning_rate": 6.7571678320387014e-06, "loss": 0.0386, "step": 21000 }, { "epoch": 0.08764426567415777, "grad_norm": 1.1825008873254197, "learning_rate": 6.756363512621484e-06, "loss": 0.0576, "step": 21005 }, { "epoch": 0.08766512838914806, "grad_norm": 1.3951589532586393, "learning_rate": 6.755559480355232e-06, "loss": 0.0585, "step": 21010 }, { "epoch": 0.08768599110413833, "grad_norm": 0.9024287455471769, "learning_rate": 6.754755735069125e-06, "loss": 0.0443, "step": 21015 }, { "epoch": 0.0877068538191286, "grad_norm": 0.8302903898316194, "learning_rate": 6.7539522765924844e-06, "loss": 0.0555, "step": 21020 }, { "epoch": 0.08772771653411889, "grad_norm": 1.903319536795054, "learning_rate": 6.753149104754776e-06, "loss": 0.0525, "step": 21025 }, { "epoch": 0.08774857924910916, "grad_norm": 1.262023116034302, "learning_rate": 6.752346219385607e-06, "loss": 0.0448, "step": 21030 }, { "epoch": 0.08776944196409944, "grad_norm": 1.1691992472428288, "learning_rate": 6.751543620314724e-06, "loss": 0.0506, "step": 21035 }, { "epoch": 0.08779030467908971, "grad_norm": 1.4012443408778013, "learning_rate": 6.75074130737202e-06, "loss": 0.0421, "step": 21040 }, { "epoch": 0.08781116739408, "grad_norm": 0.8534193026109653, "learning_rate": 6.749939280387525e-06, "loss": 0.0461, "step": 21045 }, { "epoch": 0.08783203010907027, "grad_norm": 1.6247504031523456, "learning_rate": 6.749137539191413e-06, "loss": 0.0561, "step": 21050 }, { "epoch": 0.08785289282406056, "grad_norm": 1.4052358053985146, "learning_rate": 6.7483360836139955e-06, "loss": 0.0625, "step": 21055 }, { "epoch": 0.08787375553905083, "grad_norm": 1.482193792605042, "learning_rate": 6.74753491348573e-06, "loss": 0.0525, "step": 21060 }, { "epoch": 0.0878946182540411, "grad_norm": 2.4938978439432695, "learning_rate": 6.746734028637212e-06, "loss": 0.0517, "step": 21065 }, { "epoch": 0.08791548096903139, "grad_norm": 1.1959638145950038, "learning_rate": 6.745933428899177e-06, "loss": 0.0607, "step": 21070 }, { "epoch": 0.08793634368402166, "grad_norm": 1.3682715059122783, "learning_rate": 6.745133114102503e-06, "loss": 0.0702, "step": 21075 }, { "epoch": 0.08795720639901194, "grad_norm": 0.6493849342656597, "learning_rate": 6.744333084078209e-06, "loss": 0.0378, "step": 21080 }, { "epoch": 0.08797806911400222, "grad_norm": 1.0499097918928666, "learning_rate": 6.743533338657453e-06, "loss": 0.0459, "step": 21085 }, { "epoch": 0.0879989318289925, "grad_norm": 1.0387784137876932, "learning_rate": 6.74273387767153e-06, "loss": 0.0413, "step": 21090 }, { "epoch": 0.08801979454398277, "grad_norm": 1.3582616089928257, "learning_rate": 6.741934700951881e-06, "loss": 0.045, "step": 21095 }, { "epoch": 0.08804065725897306, "grad_norm": 0.9844401117465105, "learning_rate": 6.741135808330083e-06, "loss": 0.0496, "step": 21100 }, { "epoch": 0.08806151997396333, "grad_norm": 0.956474486866096, "learning_rate": 6.740337199637852e-06, "loss": 0.0613, "step": 21105 }, { "epoch": 0.0880823826889536, "grad_norm": 1.2980201045904813, "learning_rate": 6.739538874707047e-06, "loss": 0.049, "step": 21110 }, { "epoch": 0.08810324540394389, "grad_norm": 1.2030943471893247, "learning_rate": 6.738740833369664e-06, "loss": 0.0491, "step": 21115 }, { "epoch": 0.08812410811893416, "grad_norm": 0.5199075373178856, "learning_rate": 6.737943075457839e-06, "loss": 0.0396, "step": 21120 }, { "epoch": 0.08814497083392445, "grad_norm": 0.862492394601228, "learning_rate": 6.737145600803846e-06, "loss": 0.0435, "step": 21125 }, { "epoch": 0.08816583354891472, "grad_norm": 1.05666839692264, "learning_rate": 6.736348409240097e-06, "loss": 0.0606, "step": 21130 }, { "epoch": 0.088186696263905, "grad_norm": 1.0667786193133377, "learning_rate": 6.735551500599149e-06, "loss": 0.0573, "step": 21135 }, { "epoch": 0.08820755897889528, "grad_norm": 0.6891391746332067, "learning_rate": 6.734754874713688e-06, "loss": 0.0404, "step": 21140 }, { "epoch": 0.08822842169388556, "grad_norm": 0.8591170635289364, "learning_rate": 6.733958531416547e-06, "loss": 0.0463, "step": 21145 }, { "epoch": 0.08824928440887583, "grad_norm": 1.338246497523181, "learning_rate": 6.733162470540691e-06, "loss": 0.0588, "step": 21150 }, { "epoch": 0.0882701471238661, "grad_norm": 0.629154062944583, "learning_rate": 6.7323666919192295e-06, "loss": 0.0445, "step": 21155 }, { "epoch": 0.08829100983885639, "grad_norm": 1.3376565363862256, "learning_rate": 6.731571195385405e-06, "loss": 0.0752, "step": 21160 }, { "epoch": 0.08831187255384666, "grad_norm": 0.9649358380272941, "learning_rate": 6.730775980772597e-06, "loss": 0.0426, "step": 21165 }, { "epoch": 0.08833273526883695, "grad_norm": 0.9670508992553705, "learning_rate": 6.729981047914328e-06, "loss": 0.047, "step": 21170 }, { "epoch": 0.08835359798382722, "grad_norm": 1.0729986438182744, "learning_rate": 6.729186396644255e-06, "loss": 0.0598, "step": 21175 }, { "epoch": 0.0883744606988175, "grad_norm": 0.898840610659713, "learning_rate": 6.728392026796173e-06, "loss": 0.0512, "step": 21180 }, { "epoch": 0.08839532341380778, "grad_norm": 1.3842841645137032, "learning_rate": 6.7275979382040135e-06, "loss": 0.0526, "step": 21185 }, { "epoch": 0.08841618612879806, "grad_norm": 0.8411716739482996, "learning_rate": 6.7268041307018465e-06, "loss": 0.053, "step": 21190 }, { "epoch": 0.08843704884378834, "grad_norm": 0.9566650948624205, "learning_rate": 6.726010604123876e-06, "loss": 0.0769, "step": 21195 }, { "epoch": 0.0884579115587786, "grad_norm": 1.1442968394659367, "learning_rate": 6.7252173583044474e-06, "loss": 0.0445, "step": 21200 }, { "epoch": 0.08847877427376889, "grad_norm": 0.5190191127367142, "learning_rate": 6.724424393078042e-06, "loss": 0.0558, "step": 21205 }, { "epoch": 0.08849963698875916, "grad_norm": 0.9274250283981591, "learning_rate": 6.7236317082792716e-06, "loss": 0.0498, "step": 21210 }, { "epoch": 0.08852049970374945, "grad_norm": 1.1748790529099233, "learning_rate": 6.722839303742893e-06, "loss": 0.0537, "step": 21215 }, { "epoch": 0.08854136241873972, "grad_norm": 0.9820469124262714, "learning_rate": 6.722047179303795e-06, "loss": 0.0526, "step": 21220 }, { "epoch": 0.08856222513373001, "grad_norm": 1.0716816036248324, "learning_rate": 6.721255334797e-06, "loss": 0.0546, "step": 21225 }, { "epoch": 0.08858308784872028, "grad_norm": 1.034433900474703, "learning_rate": 6.720463770057673e-06, "loss": 0.062, "step": 21230 }, { "epoch": 0.08860395056371057, "grad_norm": 0.8109320638451865, "learning_rate": 6.7196724849211085e-06, "loss": 0.0552, "step": 21235 }, { "epoch": 0.08862481327870084, "grad_norm": 0.9164694535413874, "learning_rate": 6.718881479222742e-06, "loss": 0.0569, "step": 21240 }, { "epoch": 0.08864567599369111, "grad_norm": 1.017998457199888, "learning_rate": 6.718090752798138e-06, "loss": 0.0432, "step": 21245 }, { "epoch": 0.0886665387086814, "grad_norm": 0.6841513950878453, "learning_rate": 6.717300305483006e-06, "loss": 0.0602, "step": 21250 }, { "epoch": 0.08868740142367167, "grad_norm": 1.3844135650840665, "learning_rate": 6.71651013711318e-06, "loss": 0.0427, "step": 21255 }, { "epoch": 0.08870826413866195, "grad_norm": 1.7320339668048346, "learning_rate": 6.715720247524637e-06, "loss": 0.0633, "step": 21260 }, { "epoch": 0.08872912685365222, "grad_norm": 1.0244202193288494, "learning_rate": 6.714930636553487e-06, "loss": 0.0441, "step": 21265 }, { "epoch": 0.08874998956864251, "grad_norm": 0.984621406084297, "learning_rate": 6.7141413040359715e-06, "loss": 0.0439, "step": 21270 }, { "epoch": 0.08877085228363278, "grad_norm": 1.1168940291098863, "learning_rate": 6.713352249808472e-06, "loss": 0.0511, "step": 21275 }, { "epoch": 0.08879171499862307, "grad_norm": 1.0234464168079815, "learning_rate": 6.712563473707501e-06, "loss": 0.0534, "step": 21280 }, { "epoch": 0.08881257771361334, "grad_norm": 0.9608665224006986, "learning_rate": 6.711774975569704e-06, "loss": 0.0427, "step": 21285 }, { "epoch": 0.08883344042860361, "grad_norm": 0.8654043711006401, "learning_rate": 6.710986755231868e-06, "loss": 0.0541, "step": 21290 }, { "epoch": 0.0888543031435939, "grad_norm": 0.963363652508762, "learning_rate": 6.710198812530907e-06, "loss": 0.0569, "step": 21295 }, { "epoch": 0.08887516585858417, "grad_norm": 1.651244175068628, "learning_rate": 6.70941114730387e-06, "loss": 0.0581, "step": 21300 }, { "epoch": 0.08889602857357445, "grad_norm": 1.543053936083425, "learning_rate": 6.7086237593879436e-06, "loss": 0.0513, "step": 21305 }, { "epoch": 0.08891689128856473, "grad_norm": 0.8629138426396359, "learning_rate": 6.707836648620443e-06, "loss": 0.0498, "step": 21310 }, { "epoch": 0.08893775400355501, "grad_norm": 1.0482274256234079, "learning_rate": 6.707049814838823e-06, "loss": 0.049, "step": 21315 }, { "epoch": 0.08895861671854528, "grad_norm": 2.966598930432821, "learning_rate": 6.706263257880665e-06, "loss": 0.0564, "step": 21320 }, { "epoch": 0.08897947943353557, "grad_norm": 1.468383896640355, "learning_rate": 6.705476977583691e-06, "loss": 0.0507, "step": 21325 }, { "epoch": 0.08900034214852584, "grad_norm": 1.2779284341734376, "learning_rate": 6.704690973785749e-06, "loss": 0.0488, "step": 21330 }, { "epoch": 0.08902120486351611, "grad_norm": 1.030550253258328, "learning_rate": 6.7039052463248245e-06, "loss": 0.0481, "step": 21335 }, { "epoch": 0.0890420675785064, "grad_norm": 0.9729870027091194, "learning_rate": 6.703119795039037e-06, "loss": 0.0538, "step": 21340 }, { "epoch": 0.08906293029349667, "grad_norm": 1.3896439480787555, "learning_rate": 6.702334619766634e-06, "loss": 0.042, "step": 21345 }, { "epoch": 0.08908379300848696, "grad_norm": 1.3865833509973473, "learning_rate": 6.701549720345999e-06, "loss": 0.0387, "step": 21350 }, { "epoch": 0.08910465572347723, "grad_norm": 0.9965531250182303, "learning_rate": 6.700765096615647e-06, "loss": 0.0286, "step": 21355 }, { "epoch": 0.08912551843846751, "grad_norm": 0.9617818813370217, "learning_rate": 6.699980748414223e-06, "loss": 0.0496, "step": 21360 }, { "epoch": 0.08914638115345778, "grad_norm": 1.5724294320969074, "learning_rate": 6.6991966755805116e-06, "loss": 0.0473, "step": 21365 }, { "epoch": 0.08916724386844807, "grad_norm": 1.3909356115737328, "learning_rate": 6.69841287795342e-06, "loss": 0.0504, "step": 21370 }, { "epoch": 0.08918810658343834, "grad_norm": 1.0790573117763953, "learning_rate": 6.697629355371995e-06, "loss": 0.0515, "step": 21375 }, { "epoch": 0.08920896929842861, "grad_norm": 1.1542810292421686, "learning_rate": 6.6968461076754085e-06, "loss": 0.0521, "step": 21380 }, { "epoch": 0.0892298320134189, "grad_norm": 1.6453535211361505, "learning_rate": 6.696063134702972e-06, "loss": 0.0464, "step": 21385 }, { "epoch": 0.08925069472840917, "grad_norm": 0.9045129704253816, "learning_rate": 6.6952804362941185e-06, "loss": 0.0511, "step": 21390 }, { "epoch": 0.08927155744339946, "grad_norm": 2.204903010957997, "learning_rate": 6.69449801228842e-06, "loss": 0.0575, "step": 21395 }, { "epoch": 0.08929242015838973, "grad_norm": 1.0825074112274446, "learning_rate": 6.693715862525579e-06, "loss": 0.0557, "step": 21400 }, { "epoch": 0.08931328287338001, "grad_norm": 0.8336311579081602, "learning_rate": 6.6929339868454254e-06, "loss": 0.044, "step": 21405 }, { "epoch": 0.08933414558837029, "grad_norm": 1.32681166456364, "learning_rate": 6.692152385087924e-06, "loss": 0.0454, "step": 21410 }, { "epoch": 0.08935500830336057, "grad_norm": 0.9763575146456084, "learning_rate": 6.691371057093168e-06, "loss": 0.0628, "step": 21415 }, { "epoch": 0.08937587101835084, "grad_norm": 1.0341771898959908, "learning_rate": 6.69059000270138e-06, "loss": 0.0443, "step": 21420 }, { "epoch": 0.08939673373334112, "grad_norm": 1.3000425174473402, "learning_rate": 6.689809221752914e-06, "loss": 0.0561, "step": 21425 }, { "epoch": 0.0894175964483314, "grad_norm": 0.9791233483709945, "learning_rate": 6.68902871408826e-06, "loss": 0.0485, "step": 21430 }, { "epoch": 0.08943845916332167, "grad_norm": 1.46023801608721, "learning_rate": 6.68824847954803e-06, "loss": 0.0419, "step": 21435 }, { "epoch": 0.08945932187831196, "grad_norm": 1.3579114562903056, "learning_rate": 6.687468517972972e-06, "loss": 0.0632, "step": 21440 }, { "epoch": 0.08948018459330223, "grad_norm": 0.8917822648453638, "learning_rate": 6.686688829203959e-06, "loss": 0.0477, "step": 21445 }, { "epoch": 0.08950104730829252, "grad_norm": 1.1503266122120361, "learning_rate": 6.685909413081998e-06, "loss": 0.0655, "step": 21450 }, { "epoch": 0.08952191002328279, "grad_norm": 1.8313702568906365, "learning_rate": 6.685130269448222e-06, "loss": 0.0581, "step": 21455 }, { "epoch": 0.08954277273827307, "grad_norm": 1.4944886793036447, "learning_rate": 6.6843513981439016e-06, "loss": 0.0472, "step": 21460 }, { "epoch": 0.08956363545326335, "grad_norm": 1.0168603097783973, "learning_rate": 6.683572799010423e-06, "loss": 0.0404, "step": 21465 }, { "epoch": 0.08958449816825362, "grad_norm": 1.2431088645582862, "learning_rate": 6.682794471889313e-06, "loss": 0.0438, "step": 21470 }, { "epoch": 0.0896053608832439, "grad_norm": 1.1184232662119062, "learning_rate": 6.682016416622228e-06, "loss": 0.0415, "step": 21475 }, { "epoch": 0.08962622359823418, "grad_norm": 1.3937443219370285, "learning_rate": 6.681238633050943e-06, "loss": 0.0582, "step": 21480 }, { "epoch": 0.08964708631322446, "grad_norm": 1.4700727149468278, "learning_rate": 6.680461121017372e-06, "loss": 0.0537, "step": 21485 }, { "epoch": 0.08966794902821473, "grad_norm": 1.3112106957759548, "learning_rate": 6.679683880363556e-06, "loss": 0.0533, "step": 21490 }, { "epoch": 0.08968881174320502, "grad_norm": 2.049124624512921, "learning_rate": 6.6789069109316596e-06, "loss": 0.0527, "step": 21495 }, { "epoch": 0.08970967445819529, "grad_norm": 1.2414786061437832, "learning_rate": 6.6781302125639795e-06, "loss": 0.037, "step": 21500 }, { "epoch": 0.08973053717318558, "grad_norm": 1.1144847167171157, "learning_rate": 6.677353785102943e-06, "loss": 0.0524, "step": 21505 }, { "epoch": 0.08975139988817585, "grad_norm": 1.3977279214478506, "learning_rate": 6.676577628391099e-06, "loss": 0.0629, "step": 21510 }, { "epoch": 0.08977226260316612, "grad_norm": 0.8124387374123635, "learning_rate": 6.67580174227113e-06, "loss": 0.042, "step": 21515 }, { "epoch": 0.0897931253181564, "grad_norm": 2.2719388748522773, "learning_rate": 6.6750261265858475e-06, "loss": 0.0626, "step": 21520 }, { "epoch": 0.08981398803314668, "grad_norm": 0.9848246457198624, "learning_rate": 6.674250781178185e-06, "loss": 0.0448, "step": 21525 }, { "epoch": 0.08983485074813696, "grad_norm": 1.2071059968968911, "learning_rate": 6.673475705891208e-06, "loss": 0.055, "step": 21530 }, { "epoch": 0.08985571346312723, "grad_norm": 1.2586640902931905, "learning_rate": 6.672700900568111e-06, "loss": 0.0499, "step": 21535 }, { "epoch": 0.08987657617811752, "grad_norm": 0.5243891596449995, "learning_rate": 6.671926365052208e-06, "loss": 0.049, "step": 21540 }, { "epoch": 0.08989743889310779, "grad_norm": 1.0978984544616803, "learning_rate": 6.671152099186951e-06, "loss": 0.044, "step": 21545 }, { "epoch": 0.08991830160809808, "grad_norm": 1.4583827026499332, "learning_rate": 6.67037810281591e-06, "loss": 0.0624, "step": 21550 }, { "epoch": 0.08993916432308835, "grad_norm": 1.2505794979135134, "learning_rate": 6.669604375782786e-06, "loss": 0.05, "step": 21555 }, { "epoch": 0.08996002703807862, "grad_norm": 1.2558016227668929, "learning_rate": 6.668830917931409e-06, "loss": 0.0405, "step": 21560 }, { "epoch": 0.08998088975306891, "grad_norm": 1.6956450514489085, "learning_rate": 6.6680577291057335e-06, "loss": 0.0593, "step": 21565 }, { "epoch": 0.09000175246805918, "grad_norm": 0.9171356168235322, "learning_rate": 6.6672848091498375e-06, "loss": 0.0463, "step": 21570 }, { "epoch": 0.09002261518304946, "grad_norm": 0.8004656813723251, "learning_rate": 6.666512157907932e-06, "loss": 0.053, "step": 21575 }, { "epoch": 0.09004347789803974, "grad_norm": 0.6456296428239316, "learning_rate": 6.665739775224347e-06, "loss": 0.05, "step": 21580 }, { "epoch": 0.09006434061303002, "grad_norm": 1.1269160248307861, "learning_rate": 6.664967660943545e-06, "loss": 0.0421, "step": 21585 }, { "epoch": 0.0900852033280203, "grad_norm": 1.3552403895168095, "learning_rate": 6.664195814910114e-06, "loss": 0.0591, "step": 21590 }, { "epoch": 0.09010606604301058, "grad_norm": 1.0881718458537635, "learning_rate": 6.663424236968763e-06, "loss": 0.0461, "step": 21595 }, { "epoch": 0.09012692875800085, "grad_norm": 1.7023973839996631, "learning_rate": 6.66265292696433e-06, "loss": 0.0606, "step": 21600 }, { "epoch": 0.09014779147299112, "grad_norm": 1.8008030549052072, "learning_rate": 6.661881884741781e-06, "loss": 0.0574, "step": 21605 }, { "epoch": 0.09016865418798141, "grad_norm": 1.4793040362537047, "learning_rate": 6.661111110146203e-06, "loss": 0.053, "step": 21610 }, { "epoch": 0.09018951690297168, "grad_norm": 1.3463328483236, "learning_rate": 6.660340603022811e-06, "loss": 0.0541, "step": 21615 }, { "epoch": 0.09021037961796197, "grad_norm": 1.0456953499086217, "learning_rate": 6.659570363216946e-06, "loss": 0.0502, "step": 21620 }, { "epoch": 0.09023124233295224, "grad_norm": 1.2203189937392127, "learning_rate": 6.658800390574074e-06, "loss": 0.0553, "step": 21625 }, { "epoch": 0.09025210504794252, "grad_norm": 1.1981459816244642, "learning_rate": 6.658030684939783e-06, "loss": 0.0522, "step": 21630 }, { "epoch": 0.0902729677629328, "grad_norm": 0.8881010297251557, "learning_rate": 6.6572612461597885e-06, "loss": 0.0477, "step": 21635 }, { "epoch": 0.09029383047792308, "grad_norm": 1.1789051650727669, "learning_rate": 6.65649207407993e-06, "loss": 0.0426, "step": 21640 }, { "epoch": 0.09031469319291335, "grad_norm": 0.684176207595896, "learning_rate": 6.6557231685461735e-06, "loss": 0.0538, "step": 21645 }, { "epoch": 0.09033555590790363, "grad_norm": 1.4890578459178068, "learning_rate": 6.654954529404607e-06, "loss": 0.0432, "step": 21650 }, { "epoch": 0.09035641862289391, "grad_norm": 1.1251988147853527, "learning_rate": 6.6541861565014434e-06, "loss": 0.0494, "step": 21655 }, { "epoch": 0.09037728133788418, "grad_norm": 1.4814086239107491, "learning_rate": 6.653418049683021e-06, "loss": 0.0415, "step": 21660 }, { "epoch": 0.09039814405287447, "grad_norm": 1.4743225044991846, "learning_rate": 6.652650208795803e-06, "loss": 0.0558, "step": 21665 }, { "epoch": 0.09041900676786474, "grad_norm": 1.8399034832769696, "learning_rate": 6.651882633686374e-06, "loss": 0.0538, "step": 21670 }, { "epoch": 0.09043986948285503, "grad_norm": 1.167759583481755, "learning_rate": 6.651115324201443e-06, "loss": 0.0406, "step": 21675 }, { "epoch": 0.0904607321978453, "grad_norm": 1.3282684152776851, "learning_rate": 6.6503482801878434e-06, "loss": 0.0474, "step": 21680 }, { "epoch": 0.09048159491283558, "grad_norm": 1.866200552587812, "learning_rate": 6.649581501492534e-06, "loss": 0.0562, "step": 21685 }, { "epoch": 0.09050245762782586, "grad_norm": 1.8357563809260262, "learning_rate": 6.648814987962594e-06, "loss": 0.0526, "step": 21690 }, { "epoch": 0.09052332034281613, "grad_norm": 1.1031877079778898, "learning_rate": 6.648048739445227e-06, "loss": 0.0577, "step": 21695 }, { "epoch": 0.09054418305780641, "grad_norm": 1.1335097916214063, "learning_rate": 6.647282755787762e-06, "loss": 0.0525, "step": 21700 }, { "epoch": 0.09056504577279668, "grad_norm": 1.0656115301063471, "learning_rate": 6.646517036837647e-06, "loss": 0.0481, "step": 21705 }, { "epoch": 0.09058590848778697, "grad_norm": 10.232200701461563, "learning_rate": 6.645751582442458e-06, "loss": 0.0647, "step": 21710 }, { "epoch": 0.09060677120277724, "grad_norm": 1.0214497303486163, "learning_rate": 6.644986392449887e-06, "loss": 0.0454, "step": 21715 }, { "epoch": 0.09062763391776753, "grad_norm": 2.2219554647505144, "learning_rate": 6.644221466707756e-06, "loss": 0.0594, "step": 21720 }, { "epoch": 0.0906484966327578, "grad_norm": 1.8232544930751517, "learning_rate": 6.643456805064007e-06, "loss": 0.0536, "step": 21725 }, { "epoch": 0.09066935934774809, "grad_norm": 1.4190199122918077, "learning_rate": 6.642692407366701e-06, "loss": 0.0455, "step": 21730 }, { "epoch": 0.09069022206273836, "grad_norm": 1.5177317358345381, "learning_rate": 6.641928273464025e-06, "loss": 0.0876, "step": 21735 }, { "epoch": 0.09071108477772863, "grad_norm": 1.0689927562883683, "learning_rate": 6.64116440320429e-06, "loss": 0.0587, "step": 21740 }, { "epoch": 0.09073194749271891, "grad_norm": 0.9546093754082231, "learning_rate": 6.640400796435924e-06, "loss": 0.0515, "step": 21745 }, { "epoch": 0.09075281020770919, "grad_norm": 1.1458698925340194, "learning_rate": 6.6396374530074795e-06, "loss": 0.0396, "step": 21750 }, { "epoch": 0.09077367292269947, "grad_norm": 0.792607316636232, "learning_rate": 6.638874372767633e-06, "loss": 0.0504, "step": 21755 }, { "epoch": 0.09079453563768974, "grad_norm": 1.602459420866755, "learning_rate": 6.638111555565179e-06, "loss": 0.0489, "step": 21760 }, { "epoch": 0.09081539835268003, "grad_norm": 1.7759565079412278, "learning_rate": 6.637349001249034e-06, "loss": 0.0594, "step": 21765 }, { "epoch": 0.0908362610676703, "grad_norm": 1.1372619335976584, "learning_rate": 6.6365867096682405e-06, "loss": 0.0496, "step": 21770 }, { "epoch": 0.09085712378266059, "grad_norm": 1.4497697954291495, "learning_rate": 6.635824680671955e-06, "loss": 0.05, "step": 21775 }, { "epoch": 0.09087798649765086, "grad_norm": 0.6186958556423232, "learning_rate": 6.635062914109461e-06, "loss": 0.0511, "step": 21780 }, { "epoch": 0.09089884921264113, "grad_norm": 0.9813148079978767, "learning_rate": 6.634301409830163e-06, "loss": 0.0478, "step": 21785 }, { "epoch": 0.09091971192763142, "grad_norm": 1.1158335372315265, "learning_rate": 6.633540167683582e-06, "loss": 0.0415, "step": 21790 }, { "epoch": 0.09094057464262169, "grad_norm": 1.0570242633817466, "learning_rate": 6.632779187519363e-06, "loss": 0.0542, "step": 21795 }, { "epoch": 0.09096143735761197, "grad_norm": 1.287518303478786, "learning_rate": 6.632018469187272e-06, "loss": 0.0475, "step": 21800 }, { "epoch": 0.09098230007260225, "grad_norm": 0.9807890382012755, "learning_rate": 6.631258012537195e-06, "loss": 0.038, "step": 21805 }, { "epoch": 0.09100316278759253, "grad_norm": 1.2611055387817283, "learning_rate": 6.630497817419137e-06, "loss": 0.0476, "step": 21810 }, { "epoch": 0.0910240255025828, "grad_norm": 1.554672972976674, "learning_rate": 6.6297378836832275e-06, "loss": 0.0528, "step": 21815 }, { "epoch": 0.09104488821757309, "grad_norm": 0.6593285704181491, "learning_rate": 6.628978211179709e-06, "loss": 0.0386, "step": 21820 }, { "epoch": 0.09106575093256336, "grad_norm": 1.9050193789432348, "learning_rate": 6.628218799758952e-06, "loss": 0.0564, "step": 21825 }, { "epoch": 0.09108661364755363, "grad_norm": 0.6235967167598679, "learning_rate": 6.627459649271444e-06, "loss": 0.0429, "step": 21830 }, { "epoch": 0.09110747636254392, "grad_norm": 1.33428539891455, "learning_rate": 6.626700759567787e-06, "loss": 0.0561, "step": 21835 }, { "epoch": 0.09112833907753419, "grad_norm": 0.8800138792247495, "learning_rate": 6.6259421304987115e-06, "loss": 0.0498, "step": 21840 }, { "epoch": 0.09114920179252448, "grad_norm": 1.1889117268827887, "learning_rate": 6.625183761915063e-06, "loss": 0.0551, "step": 21845 }, { "epoch": 0.09117006450751475, "grad_norm": 0.9172588072696319, "learning_rate": 6.624425653667805e-06, "loss": 0.0556, "step": 21850 }, { "epoch": 0.09119092722250503, "grad_norm": 0.9355912666276411, "learning_rate": 6.6236678056080254e-06, "loss": 0.0446, "step": 21855 }, { "epoch": 0.0912117899374953, "grad_norm": 0.6456203627428787, "learning_rate": 6.622910217586927e-06, "loss": 0.043, "step": 21860 }, { "epoch": 0.09123265265248559, "grad_norm": 1.5968982384887282, "learning_rate": 6.622152889455832e-06, "loss": 0.0559, "step": 21865 }, { "epoch": 0.09125351536747586, "grad_norm": 5.691211033573355, "learning_rate": 6.6213958210661836e-06, "loss": 0.0486, "step": 21870 }, { "epoch": 0.09127437808246613, "grad_norm": 1.004996061751421, "learning_rate": 6.620639012269543e-06, "loss": 0.0603, "step": 21875 }, { "epoch": 0.09129524079745642, "grad_norm": 1.041376044216997, "learning_rate": 6.619882462917588e-06, "loss": 0.0529, "step": 21880 }, { "epoch": 0.09131610351244669, "grad_norm": 1.2413703266042002, "learning_rate": 6.619126172862119e-06, "loss": 0.0541, "step": 21885 }, { "epoch": 0.09133696622743698, "grad_norm": 1.1677468788145429, "learning_rate": 6.618370141955054e-06, "loss": 0.058, "step": 21890 }, { "epoch": 0.09135782894242725, "grad_norm": 1.06892086454516, "learning_rate": 6.617614370048425e-06, "loss": 0.0695, "step": 21895 }, { "epoch": 0.09137869165741754, "grad_norm": 1.0156548650907284, "learning_rate": 6.6168588569943895e-06, "loss": 0.0436, "step": 21900 }, { "epoch": 0.0913995543724078, "grad_norm": 0.9993997002935102, "learning_rate": 6.616103602645215e-06, "loss": 0.0488, "step": 21905 }, { "epoch": 0.09142041708739808, "grad_norm": 1.5213660327877259, "learning_rate": 6.615348606853294e-06, "loss": 0.0453, "step": 21910 }, { "epoch": 0.09144127980238836, "grad_norm": 1.3203862650784723, "learning_rate": 6.614593869471133e-06, "loss": 0.0584, "step": 21915 }, { "epoch": 0.09146214251737864, "grad_norm": 1.2718952515210802, "learning_rate": 6.613839390351356e-06, "loss": 0.0379, "step": 21920 }, { "epoch": 0.09148300523236892, "grad_norm": 1.0778146420241534, "learning_rate": 6.613085169346709e-06, "loss": 0.0532, "step": 21925 }, { "epoch": 0.0915038679473592, "grad_norm": 0.9471933198795808, "learning_rate": 6.612331206310051e-06, "loss": 0.0467, "step": 21930 }, { "epoch": 0.09152473066234948, "grad_norm": 1.0901309154274301, "learning_rate": 6.611577501094359e-06, "loss": 0.0666, "step": 21935 }, { "epoch": 0.09154559337733975, "grad_norm": 2.3998939437625597, "learning_rate": 6.610824053552728e-06, "loss": 0.055, "step": 21940 }, { "epoch": 0.09156645609233004, "grad_norm": 2.1717354983742903, "learning_rate": 6.610070863538371e-06, "loss": 0.0512, "step": 21945 }, { "epoch": 0.09158731880732031, "grad_norm": 1.8551664699266135, "learning_rate": 6.609317930904619e-06, "loss": 0.054, "step": 21950 }, { "epoch": 0.09160818152231058, "grad_norm": 1.2957903618312578, "learning_rate": 6.608565255504915e-06, "loss": 0.0507, "step": 21955 }, { "epoch": 0.09162904423730087, "grad_norm": 1.0235614482420563, "learning_rate": 6.607812837192824e-06, "loss": 0.0476, "step": 21960 }, { "epoch": 0.09164990695229114, "grad_norm": 1.0832678186544875, "learning_rate": 6.607060675822026e-06, "loss": 0.0679, "step": 21965 }, { "epoch": 0.09167076966728142, "grad_norm": 0.846016898424978, "learning_rate": 6.606308771246317e-06, "loss": 0.038, "step": 21970 }, { "epoch": 0.0916916323822717, "grad_norm": 1.2275023686336763, "learning_rate": 6.60555712331961e-06, "loss": 0.0574, "step": 21975 }, { "epoch": 0.09171249509726198, "grad_norm": 1.2155086225929055, "learning_rate": 6.6048057318959326e-06, "loss": 0.0477, "step": 21980 }, { "epoch": 0.09173335781225225, "grad_norm": 1.4552532361533805, "learning_rate": 6.604054596829431e-06, "loss": 0.0704, "step": 21985 }, { "epoch": 0.09175422052724254, "grad_norm": 1.3352216457870738, "learning_rate": 6.603303717974366e-06, "loss": 0.0505, "step": 21990 }, { "epoch": 0.09177508324223281, "grad_norm": 0.7798809925980683, "learning_rate": 6.602553095185117e-06, "loss": 0.049, "step": 21995 }, { "epoch": 0.09179594595722308, "grad_norm": 0.9022451465231655, "learning_rate": 6.601802728316176e-06, "loss": 0.0416, "step": 22000 }, { "epoch": 0.09181680867221337, "grad_norm": 0.8891107922535109, "learning_rate": 6.60105261722215e-06, "loss": 0.0474, "step": 22005 }, { "epoch": 0.09183767138720364, "grad_norm": 2.028809078741549, "learning_rate": 6.600302761757766e-06, "loss": 0.0559, "step": 22010 }, { "epoch": 0.09185853410219393, "grad_norm": 1.042250522299322, "learning_rate": 6.599553161777863e-06, "loss": 0.0434, "step": 22015 }, { "epoch": 0.0918793968171842, "grad_norm": 1.3523418116909751, "learning_rate": 6.598803817137397e-06, "loss": 0.0538, "step": 22020 }, { "epoch": 0.09190025953217448, "grad_norm": 1.7557773849312146, "learning_rate": 6.598054727691438e-06, "loss": 0.0566, "step": 22025 }, { "epoch": 0.09192112224716475, "grad_norm": 1.8404953546421314, "learning_rate": 6.597305893295173e-06, "loss": 0.0523, "step": 22030 }, { "epoch": 0.09194198496215504, "grad_norm": 1.1777988096943923, "learning_rate": 6.5965573138039e-06, "loss": 0.0482, "step": 22035 }, { "epoch": 0.09196284767714531, "grad_norm": 1.2286970159762438, "learning_rate": 6.595808989073037e-06, "loss": 0.051, "step": 22040 }, { "epoch": 0.09198371039213558, "grad_norm": 0.712052015674927, "learning_rate": 6.595060918958115e-06, "loss": 0.0463, "step": 22045 }, { "epoch": 0.09200457310712587, "grad_norm": 0.9395494514391628, "learning_rate": 6.594313103314776e-06, "loss": 0.0473, "step": 22050 }, { "epoch": 0.09202543582211614, "grad_norm": 1.4155742214368725, "learning_rate": 6.593565541998786e-06, "loss": 0.0419, "step": 22055 }, { "epoch": 0.09204629853710643, "grad_norm": 2.0573924742819694, "learning_rate": 6.59281823486601e-06, "loss": 0.0662, "step": 22060 }, { "epoch": 0.0920671612520967, "grad_norm": 1.2249339933727241, "learning_rate": 6.592071181772444e-06, "loss": 0.0447, "step": 22065 }, { "epoch": 0.09208802396708698, "grad_norm": 2.209106530323067, "learning_rate": 6.591324382574189e-06, "loss": 0.0434, "step": 22070 }, { "epoch": 0.09210888668207726, "grad_norm": 2.1413108439903596, "learning_rate": 6.59057783712746e-06, "loss": 0.0473, "step": 22075 }, { "epoch": 0.09212974939706754, "grad_norm": 0.9820277264236239, "learning_rate": 6.589831545288589e-06, "loss": 0.0436, "step": 22080 }, { "epoch": 0.09215061211205781, "grad_norm": 2.738179524350438, "learning_rate": 6.5890855069140204e-06, "loss": 0.0541, "step": 22085 }, { "epoch": 0.09217147482704809, "grad_norm": 1.7069983165488543, "learning_rate": 6.588339721860311e-06, "loss": 0.0551, "step": 22090 }, { "epoch": 0.09219233754203837, "grad_norm": 1.143291968844949, "learning_rate": 6.587594189984136e-06, "loss": 0.0518, "step": 22095 }, { "epoch": 0.09221320025702864, "grad_norm": 1.353488525956979, "learning_rate": 6.586848911142278e-06, "loss": 0.0529, "step": 22100 }, { "epoch": 0.09223406297201893, "grad_norm": 1.0738341512250156, "learning_rate": 6.586103885191637e-06, "loss": 0.0412, "step": 22105 }, { "epoch": 0.0922549256870092, "grad_norm": 0.794089549501217, "learning_rate": 6.5853591119892245e-06, "loss": 0.0718, "step": 22110 }, { "epoch": 0.09227578840199949, "grad_norm": 2.804853744887304, "learning_rate": 6.584614591392169e-06, "loss": 0.058, "step": 22115 }, { "epoch": 0.09229665111698976, "grad_norm": 1.1300523912948486, "learning_rate": 6.583870323257703e-06, "loss": 0.0583, "step": 22120 }, { "epoch": 0.09231751383198004, "grad_norm": 1.1832756070129267, "learning_rate": 6.5831263074431816e-06, "loss": 0.0545, "step": 22125 }, { "epoch": 0.09233837654697032, "grad_norm": 1.6461400917377191, "learning_rate": 6.5823825438060685e-06, "loss": 0.0812, "step": 22130 }, { "epoch": 0.09235923926196059, "grad_norm": 0.9146701092772632, "learning_rate": 6.58163903220394e-06, "loss": 0.0525, "step": 22135 }, { "epoch": 0.09238010197695087, "grad_norm": 1.2029246667519236, "learning_rate": 6.580895772494485e-06, "loss": 0.0401, "step": 22140 }, { "epoch": 0.09240096469194115, "grad_norm": 0.9364738219119945, "learning_rate": 6.580152764535508e-06, "loss": 0.0566, "step": 22145 }, { "epoch": 0.09242182740693143, "grad_norm": 1.1077114826461918, "learning_rate": 6.57941000818492e-06, "loss": 0.0454, "step": 22150 }, { "epoch": 0.0924426901219217, "grad_norm": 0.921365526517283, "learning_rate": 6.578667503300747e-06, "loss": 0.0512, "step": 22155 }, { "epoch": 0.09246355283691199, "grad_norm": 1.5309524405967405, "learning_rate": 6.577925249741131e-06, "loss": 0.0476, "step": 22160 }, { "epoch": 0.09248441555190226, "grad_norm": 0.9050038115516215, "learning_rate": 6.57718324736432e-06, "loss": 0.0491, "step": 22165 }, { "epoch": 0.09250527826689255, "grad_norm": 1.0667942843633873, "learning_rate": 6.576441496028677e-06, "loss": 0.0572, "step": 22170 }, { "epoch": 0.09252614098188282, "grad_norm": 1.2533581678775185, "learning_rate": 6.575699995592677e-06, "loss": 0.0684, "step": 22175 }, { "epoch": 0.09254700369687309, "grad_norm": 1.2391751956202914, "learning_rate": 6.574958745914904e-06, "loss": 0.0532, "step": 22180 }, { "epoch": 0.09256786641186338, "grad_norm": 1.3797746866376936, "learning_rate": 6.574217746854058e-06, "loss": 0.0485, "step": 22185 }, { "epoch": 0.09258872912685365, "grad_norm": 0.9193687664534878, "learning_rate": 6.573476998268947e-06, "loss": 0.0491, "step": 22190 }, { "epoch": 0.09260959184184393, "grad_norm": 1.5928523538125177, "learning_rate": 6.572736500018492e-06, "loss": 0.0578, "step": 22195 }, { "epoch": 0.0926304545568342, "grad_norm": 1.0000880714523936, "learning_rate": 6.571996251961721e-06, "loss": 0.046, "step": 22200 }, { "epoch": 0.09265131727182449, "grad_norm": 1.1520411113020579, "learning_rate": 6.5712562539577805e-06, "loss": 0.055, "step": 22205 }, { "epoch": 0.09267217998681476, "grad_norm": 0.9275806534520289, "learning_rate": 6.5705165058659236e-06, "loss": 0.0554, "step": 22210 }, { "epoch": 0.09269304270180505, "grad_norm": 1.6727108434628162, "learning_rate": 6.569777007545513e-06, "loss": 0.0485, "step": 22215 }, { "epoch": 0.09271390541679532, "grad_norm": 1.1651298624950845, "learning_rate": 6.569037758856026e-06, "loss": 0.0474, "step": 22220 }, { "epoch": 0.09273476813178559, "grad_norm": 1.751505061169487, "learning_rate": 6.568298759657048e-06, "loss": 0.0427, "step": 22225 }, { "epoch": 0.09275563084677588, "grad_norm": 0.745055385099941, "learning_rate": 6.567560009808276e-06, "loss": 0.0518, "step": 22230 }, { "epoch": 0.09277649356176615, "grad_norm": 0.9231238496500099, "learning_rate": 6.566821509169513e-06, "loss": 0.0518, "step": 22235 }, { "epoch": 0.09279735627675643, "grad_norm": 0.7920681342661444, "learning_rate": 6.566083257600681e-06, "loss": 0.0594, "step": 22240 }, { "epoch": 0.0928182189917467, "grad_norm": 1.1112886328703404, "learning_rate": 6.565345254961806e-06, "loss": 0.0468, "step": 22245 }, { "epoch": 0.09283908170673699, "grad_norm": 1.0719841719118168, "learning_rate": 6.564607501113024e-06, "loss": 0.0579, "step": 22250 }, { "epoch": 0.09285994442172726, "grad_norm": 1.0179767020042927, "learning_rate": 6.563869995914584e-06, "loss": 0.0663, "step": 22255 }, { "epoch": 0.09288080713671755, "grad_norm": 1.7884025398160637, "learning_rate": 6.563132739226845e-06, "loss": 0.0532, "step": 22260 }, { "epoch": 0.09290166985170782, "grad_norm": 0.9175831844935262, "learning_rate": 6.562395730910271e-06, "loss": 0.0539, "step": 22265 }, { "epoch": 0.0929225325666981, "grad_norm": 1.4667433568633872, "learning_rate": 6.561658970825442e-06, "loss": 0.0644, "step": 22270 }, { "epoch": 0.09294339528168838, "grad_norm": 1.4324871080813182, "learning_rate": 6.560922458833043e-06, "loss": 0.0513, "step": 22275 }, { "epoch": 0.09296425799667865, "grad_norm": 0.6437109857958786, "learning_rate": 6.560186194793866e-06, "loss": 0.0433, "step": 22280 }, { "epoch": 0.09298512071166894, "grad_norm": 0.7175773403618555, "learning_rate": 6.559450178568822e-06, "loss": 0.0458, "step": 22285 }, { "epoch": 0.09300598342665921, "grad_norm": 1.1517909232801429, "learning_rate": 6.5587144100189235e-06, "loss": 0.0515, "step": 22290 }, { "epoch": 0.0930268461416495, "grad_norm": 0.5609496624212027, "learning_rate": 6.557978889005293e-06, "loss": 0.0462, "step": 22295 }, { "epoch": 0.09304770885663977, "grad_norm": 1.6446897155928877, "learning_rate": 6.557243615389165e-06, "loss": 0.0536, "step": 22300 }, { "epoch": 0.09306857157163005, "grad_norm": 0.9889629203312584, "learning_rate": 6.556508589031879e-06, "loss": 0.0373, "step": 22305 }, { "epoch": 0.09308943428662032, "grad_norm": 1.2991501524788263, "learning_rate": 6.555773809794884e-06, "loss": 0.0539, "step": 22310 }, { "epoch": 0.0931102970016106, "grad_norm": 1.7180444056795325, "learning_rate": 6.555039277539742e-06, "loss": 0.0563, "step": 22315 }, { "epoch": 0.09313115971660088, "grad_norm": 0.8635341174442841, "learning_rate": 6.554304992128118e-06, "loss": 0.0481, "step": 22320 }, { "epoch": 0.09315202243159115, "grad_norm": 1.1463674381108875, "learning_rate": 6.55357095342179e-06, "loss": 0.0492, "step": 22325 }, { "epoch": 0.09317288514658144, "grad_norm": 1.2944017355791542, "learning_rate": 6.552837161282639e-06, "loss": 0.0526, "step": 22330 }, { "epoch": 0.09319374786157171, "grad_norm": 1.5748385729862218, "learning_rate": 6.55210361557266e-06, "loss": 0.0486, "step": 22335 }, { "epoch": 0.093214610576562, "grad_norm": 1.3372957693166834, "learning_rate": 6.551370316153953e-06, "loss": 0.0488, "step": 22340 }, { "epoch": 0.09323547329155227, "grad_norm": 1.015781797610949, "learning_rate": 6.550637262888726e-06, "loss": 0.0503, "step": 22345 }, { "epoch": 0.09325633600654255, "grad_norm": 1.6905257443049777, "learning_rate": 6.549904455639297e-06, "loss": 0.058, "step": 22350 }, { "epoch": 0.09327719872153283, "grad_norm": 0.7441863836261983, "learning_rate": 6.549171894268086e-06, "loss": 0.0402, "step": 22355 }, { "epoch": 0.0932980614365231, "grad_norm": 1.171725458566095, "learning_rate": 6.548439578637629e-06, "loss": 0.0988, "step": 22360 }, { "epoch": 0.09331892415151338, "grad_norm": 0.9993960534915458, "learning_rate": 6.547707508610565e-06, "loss": 0.0407, "step": 22365 }, { "epoch": 0.09333978686650365, "grad_norm": 0.9986581326799939, "learning_rate": 6.546975684049638e-06, "loss": 0.0497, "step": 22370 }, { "epoch": 0.09336064958149394, "grad_norm": 1.1432306281579383, "learning_rate": 6.546244104817705e-06, "loss": 0.0442, "step": 22375 }, { "epoch": 0.09338151229648421, "grad_norm": 3.2587763552797346, "learning_rate": 6.545512770777728e-06, "loss": 0.0444, "step": 22380 }, { "epoch": 0.0934023750114745, "grad_norm": 1.0632982718838213, "learning_rate": 6.544781681792773e-06, "loss": 0.0602, "step": 22385 }, { "epoch": 0.09342323772646477, "grad_norm": 1.1514871502795643, "learning_rate": 6.544050837726018e-06, "loss": 0.0478, "step": 22390 }, { "epoch": 0.09344410044145506, "grad_norm": 1.52776079142366, "learning_rate": 6.543320238440746e-06, "loss": 0.0471, "step": 22395 }, { "epoch": 0.09346496315644533, "grad_norm": 1.2110188422749821, "learning_rate": 6.542589883800343e-06, "loss": 0.045, "step": 22400 }, { "epoch": 0.0934858258714356, "grad_norm": 2.367280227240418, "learning_rate": 6.5418597736683095e-06, "loss": 0.0595, "step": 22405 }, { "epoch": 0.09350668858642588, "grad_norm": 1.0707775582038044, "learning_rate": 6.541129907908246e-06, "loss": 0.0438, "step": 22410 }, { "epoch": 0.09352755130141616, "grad_norm": 1.0607098442193845, "learning_rate": 6.540400286383862e-06, "loss": 0.0481, "step": 22415 }, { "epoch": 0.09354841401640644, "grad_norm": 1.0851652916048227, "learning_rate": 6.539670908958974e-06, "loss": 0.0491, "step": 22420 }, { "epoch": 0.09356927673139671, "grad_norm": 1.7305559550850573, "learning_rate": 6.538941775497503e-06, "loss": 0.0563, "step": 22425 }, { "epoch": 0.093590139446387, "grad_norm": 0.8105092088414036, "learning_rate": 6.538212885863475e-06, "loss": 0.0456, "step": 22430 }, { "epoch": 0.09361100216137727, "grad_norm": 1.5636074003601614, "learning_rate": 6.5374842399210295e-06, "loss": 0.0547, "step": 22435 }, { "epoch": 0.09363186487636756, "grad_norm": 1.5851448272704347, "learning_rate": 6.536755837534403e-06, "loss": 0.0465, "step": 22440 }, { "epoch": 0.09365272759135783, "grad_norm": 1.091591874475074, "learning_rate": 6.536027678567942e-06, "loss": 0.0487, "step": 22445 }, { "epoch": 0.0936735903063481, "grad_norm": 0.782609372580963, "learning_rate": 6.5352997628860995e-06, "loss": 0.0528, "step": 22450 }, { "epoch": 0.09369445302133839, "grad_norm": 1.2910549005692193, "learning_rate": 6.5345720903534315e-06, "loss": 0.0431, "step": 22455 }, { "epoch": 0.09371531573632866, "grad_norm": 1.5829477468456552, "learning_rate": 6.533844660834602e-06, "loss": 0.0566, "step": 22460 }, { "epoch": 0.09373617845131894, "grad_norm": 1.5609909319075377, "learning_rate": 6.533117474194378e-06, "loss": 0.0497, "step": 22465 }, { "epoch": 0.09375704116630922, "grad_norm": 1.4494837879110098, "learning_rate": 6.532390530297637e-06, "loss": 0.0674, "step": 22470 }, { "epoch": 0.0937779038812995, "grad_norm": 0.8231113393450707, "learning_rate": 6.5316638290093526e-06, "loss": 0.0443, "step": 22475 }, { "epoch": 0.09379876659628977, "grad_norm": 1.1730543150138948, "learning_rate": 6.530937370194612e-06, "loss": 0.0405, "step": 22480 }, { "epoch": 0.09381962931128006, "grad_norm": 0.7003672998202289, "learning_rate": 6.530211153718605e-06, "loss": 0.044, "step": 22485 }, { "epoch": 0.09384049202627033, "grad_norm": 0.9259268350852988, "learning_rate": 6.529485179446624e-06, "loss": 0.0589, "step": 22490 }, { "epoch": 0.0938613547412606, "grad_norm": 1.2011010517049419, "learning_rate": 6.5287594472440675e-06, "loss": 0.0615, "step": 22495 }, { "epoch": 0.09388221745625089, "grad_norm": 1.1370684981201964, "learning_rate": 6.528033956976442e-06, "loss": 0.0579, "step": 22500 }, { "epoch": 0.09390308017124116, "grad_norm": 1.2624711645314977, "learning_rate": 6.527308708509351e-06, "loss": 0.0527, "step": 22505 }, { "epoch": 0.09392394288623145, "grad_norm": 0.9641870899302265, "learning_rate": 6.526583701708512e-06, "loss": 0.0506, "step": 22510 }, { "epoch": 0.09394480560122172, "grad_norm": 1.360608159652598, "learning_rate": 6.525858936439739e-06, "loss": 0.0536, "step": 22515 }, { "epoch": 0.093965668316212, "grad_norm": 7.932670365866446, "learning_rate": 6.525134412568953e-06, "loss": 0.0502, "step": 22520 }, { "epoch": 0.09398653103120228, "grad_norm": 1.0239428283929304, "learning_rate": 6.524410129962181e-06, "loss": 0.0487, "step": 22525 }, { "epoch": 0.09400739374619256, "grad_norm": 1.3796804803521046, "learning_rate": 6.5236860884855526e-06, "loss": 0.0623, "step": 22530 }, { "epoch": 0.09402825646118283, "grad_norm": 1.057633780138851, "learning_rate": 6.522962288005299e-06, "loss": 0.0484, "step": 22535 }, { "epoch": 0.0940491191761731, "grad_norm": 1.2464161273036505, "learning_rate": 6.5222387283877605e-06, "loss": 0.0551, "step": 22540 }, { "epoch": 0.09406998189116339, "grad_norm": 1.0049283836655012, "learning_rate": 6.521515409499378e-06, "loss": 0.0467, "step": 22545 }, { "epoch": 0.09409084460615366, "grad_norm": 1.2490700691177574, "learning_rate": 6.5207923312066935e-06, "loss": 0.0565, "step": 22550 }, { "epoch": 0.09411170732114395, "grad_norm": 1.1227473878180045, "learning_rate": 6.520069493376358e-06, "loss": 0.0487, "step": 22555 }, { "epoch": 0.09413257003613422, "grad_norm": 1.5353386618206923, "learning_rate": 6.519346895875124e-06, "loss": 0.0547, "step": 22560 }, { "epoch": 0.0941534327511245, "grad_norm": 1.2409523947941579, "learning_rate": 6.518624538569842e-06, "loss": 0.0396, "step": 22565 }, { "epoch": 0.09417429546611478, "grad_norm": 0.8530365920298477, "learning_rate": 6.517902421327476e-06, "loss": 0.055, "step": 22570 }, { "epoch": 0.09419515818110506, "grad_norm": 1.1761247371543528, "learning_rate": 6.517180544015084e-06, "loss": 0.0561, "step": 22575 }, { "epoch": 0.09421602089609533, "grad_norm": 0.888180884936315, "learning_rate": 6.516458906499832e-06, "loss": 0.0434, "step": 22580 }, { "epoch": 0.0942368836110856, "grad_norm": 0.9301833157869802, "learning_rate": 6.515737508648987e-06, "loss": 0.0463, "step": 22585 }, { "epoch": 0.09425774632607589, "grad_norm": 0.9231398428413912, "learning_rate": 6.51501635032992e-06, "loss": 0.0512, "step": 22590 }, { "epoch": 0.09427860904106616, "grad_norm": 0.8645665787904198, "learning_rate": 6.514295431410103e-06, "loss": 0.0461, "step": 22595 }, { "epoch": 0.09429947175605645, "grad_norm": 0.8803779189893517, "learning_rate": 6.513574751757114e-06, "loss": 0.0538, "step": 22600 }, { "epoch": 0.09432033447104672, "grad_norm": 2.3765452636974644, "learning_rate": 6.512854311238628e-06, "loss": 0.0532, "step": 22605 }, { "epoch": 0.094341197186037, "grad_norm": 0.9160124100617699, "learning_rate": 6.512134109722427e-06, "loss": 0.0597, "step": 22610 }, { "epoch": 0.09436205990102728, "grad_norm": 1.2949648070669024, "learning_rate": 6.511414147076396e-06, "loss": 0.0397, "step": 22615 }, { "epoch": 0.09438292261601756, "grad_norm": 1.296574917014121, "learning_rate": 6.510694423168518e-06, "loss": 0.0556, "step": 22620 }, { "epoch": 0.09440378533100784, "grad_norm": 1.9091654289467848, "learning_rate": 6.509974937866879e-06, "loss": 0.065, "step": 22625 }, { "epoch": 0.09442464804599811, "grad_norm": 1.1002341174151555, "learning_rate": 6.509255691039673e-06, "loss": 0.0509, "step": 22630 }, { "epoch": 0.0944455107609884, "grad_norm": 1.2780381182110305, "learning_rate": 6.508536682555188e-06, "loss": 0.0496, "step": 22635 }, { "epoch": 0.09446637347597867, "grad_norm": 0.718641732861584, "learning_rate": 6.507817912281817e-06, "loss": 0.0466, "step": 22640 }, { "epoch": 0.09448723619096895, "grad_norm": 0.9072919771059859, "learning_rate": 6.507099380088056e-06, "loss": 0.0481, "step": 22645 }, { "epoch": 0.09450809890595922, "grad_norm": 1.1327637435244007, "learning_rate": 6.506381085842499e-06, "loss": 0.0659, "step": 22650 }, { "epoch": 0.09452896162094951, "grad_norm": 1.5529174606291967, "learning_rate": 6.5056630294138474e-06, "loss": 0.0502, "step": 22655 }, { "epoch": 0.09454982433593978, "grad_norm": 1.2245303849030351, "learning_rate": 6.504945210670899e-06, "loss": 0.0398, "step": 22660 }, { "epoch": 0.09457068705093007, "grad_norm": 1.1859047621254737, "learning_rate": 6.504227629482554e-06, "loss": 0.0445, "step": 22665 }, { "epoch": 0.09459154976592034, "grad_norm": 0.924896807921211, "learning_rate": 6.503510285717816e-06, "loss": 0.0454, "step": 22670 }, { "epoch": 0.09461241248091061, "grad_norm": 1.2507231424613525, "learning_rate": 6.502793179245786e-06, "loss": 0.0694, "step": 22675 }, { "epoch": 0.0946332751959009, "grad_norm": 1.4142520176063988, "learning_rate": 6.50207630993567e-06, "loss": 0.0691, "step": 22680 }, { "epoch": 0.09465413791089117, "grad_norm": 1.4954509436074161, "learning_rate": 6.50135967765677e-06, "loss": 0.0434, "step": 22685 }, { "epoch": 0.09467500062588145, "grad_norm": 1.367947931019436, "learning_rate": 6.500643282278494e-06, "loss": 0.0661, "step": 22690 }, { "epoch": 0.09469586334087172, "grad_norm": 2.500571007465033, "learning_rate": 6.499927123670347e-06, "loss": 0.0697, "step": 22695 }, { "epoch": 0.09471672605586201, "grad_norm": 1.5279560631294542, "learning_rate": 6.4992112017019386e-06, "loss": 0.0467, "step": 22700 }, { "epoch": 0.09473758877085228, "grad_norm": 1.1546747121579715, "learning_rate": 6.498495516242973e-06, "loss": 0.052, "step": 22705 }, { "epoch": 0.09475845148584257, "grad_norm": 0.898368942538384, "learning_rate": 6.497780067163261e-06, "loss": 0.0613, "step": 22710 }, { "epoch": 0.09477931420083284, "grad_norm": 1.347446483586469, "learning_rate": 6.49706485433271e-06, "loss": 0.0526, "step": 22715 }, { "epoch": 0.09480017691582311, "grad_norm": 0.9870357069174307, "learning_rate": 6.496349877621326e-06, "loss": 0.0493, "step": 22720 }, { "epoch": 0.0948210396308134, "grad_norm": 0.9578230363820396, "learning_rate": 6.495635136899222e-06, "loss": 0.0379, "step": 22725 }, { "epoch": 0.09484190234580367, "grad_norm": 0.8632247065570499, "learning_rate": 6.494920632036604e-06, "loss": 0.0398, "step": 22730 }, { "epoch": 0.09486276506079395, "grad_norm": 1.3663321930619972, "learning_rate": 6.4942063629037795e-06, "loss": 0.053, "step": 22735 }, { "epoch": 0.09488362777578423, "grad_norm": 0.6192871374933454, "learning_rate": 6.493492329371161e-06, "loss": 0.0426, "step": 22740 }, { "epoch": 0.09490449049077451, "grad_norm": 0.7416379516426552, "learning_rate": 6.492778531309252e-06, "loss": 0.0316, "step": 22745 }, { "epoch": 0.09492535320576478, "grad_norm": 1.5900217523760714, "learning_rate": 6.4920649685886625e-06, "loss": 0.0623, "step": 22750 }, { "epoch": 0.09494621592075507, "grad_norm": 1.0223761505175974, "learning_rate": 6.491351641080099e-06, "loss": 0.0524, "step": 22755 }, { "epoch": 0.09496707863574534, "grad_norm": 1.211374303940239, "learning_rate": 6.490638548654369e-06, "loss": 0.0456, "step": 22760 }, { "epoch": 0.09498794135073561, "grad_norm": 1.5911864434182832, "learning_rate": 6.489925691182377e-06, "loss": 0.0411, "step": 22765 }, { "epoch": 0.0950088040657259, "grad_norm": 0.8437701002559945, "learning_rate": 6.489213068535131e-06, "loss": 0.0487, "step": 22770 }, { "epoch": 0.09502966678071617, "grad_norm": 0.5617374864824755, "learning_rate": 6.488500680583732e-06, "loss": 0.031, "step": 22775 }, { "epoch": 0.09505052949570646, "grad_norm": 2.27532075571818, "learning_rate": 6.487788527199384e-06, "loss": 0.0585, "step": 22780 }, { "epoch": 0.09507139221069673, "grad_norm": 1.1510522413305595, "learning_rate": 6.487076608253392e-06, "loss": 0.0645, "step": 22785 }, { "epoch": 0.09509225492568701, "grad_norm": 1.0970926338187132, "learning_rate": 6.4863649236171535e-06, "loss": 0.0396, "step": 22790 }, { "epoch": 0.09511311764067729, "grad_norm": 1.8999511093646295, "learning_rate": 6.48565347316217e-06, "loss": 0.0431, "step": 22795 }, { "epoch": 0.09513398035566757, "grad_norm": 1.2770781019546984, "learning_rate": 6.4849422567600405e-06, "loss": 0.0661, "step": 22800 }, { "epoch": 0.09515484307065784, "grad_norm": 0.8587391987883862, "learning_rate": 6.484231274282461e-06, "loss": 0.0412, "step": 22805 }, { "epoch": 0.09517570578564812, "grad_norm": 0.9229677907886825, "learning_rate": 6.483520525601228e-06, "loss": 0.0442, "step": 22810 }, { "epoch": 0.0951965685006384, "grad_norm": 0.9955307352989057, "learning_rate": 6.482810010588234e-06, "loss": 0.0369, "step": 22815 }, { "epoch": 0.09521743121562867, "grad_norm": 1.5751419608898893, "learning_rate": 6.482099729115472e-06, "loss": 0.0524, "step": 22820 }, { "epoch": 0.09523829393061896, "grad_norm": 0.7273263903820849, "learning_rate": 6.481389681055032e-06, "loss": 0.0428, "step": 22825 }, { "epoch": 0.09525915664560923, "grad_norm": 1.824334039551288, "learning_rate": 6.4806798662791025e-06, "loss": 0.0467, "step": 22830 }, { "epoch": 0.09528001936059952, "grad_norm": 1.3114508529062965, "learning_rate": 6.47997028465997e-06, "loss": 0.0623, "step": 22835 }, { "epoch": 0.09530088207558979, "grad_norm": 1.410598485886291, "learning_rate": 6.479260936070018e-06, "loss": 0.0415, "step": 22840 }, { "epoch": 0.09532174479058007, "grad_norm": 1.070417439338488, "learning_rate": 6.4785518203817275e-06, "loss": 0.0424, "step": 22845 }, { "epoch": 0.09534260750557035, "grad_norm": 0.9552679480036013, "learning_rate": 6.47784293746768e-06, "loss": 0.0494, "step": 22850 }, { "epoch": 0.09536347022056062, "grad_norm": 0.9321248722904537, "learning_rate": 6.477134287200552e-06, "loss": 0.0509, "step": 22855 }, { "epoch": 0.0953843329355509, "grad_norm": 0.8491440025854897, "learning_rate": 6.476425869453115e-06, "loss": 0.036, "step": 22860 }, { "epoch": 0.09540519565054117, "grad_norm": 2.2514848648174905, "learning_rate": 6.475717684098247e-06, "loss": 0.0518, "step": 22865 }, { "epoch": 0.09542605836553146, "grad_norm": 1.07686361038987, "learning_rate": 6.475009731008913e-06, "loss": 0.0451, "step": 22870 }, { "epoch": 0.09544692108052173, "grad_norm": 0.711217969378516, "learning_rate": 6.474302010058181e-06, "loss": 0.0589, "step": 22875 }, { "epoch": 0.09546778379551202, "grad_norm": 1.0091957955739164, "learning_rate": 6.473594521119212e-06, "loss": 0.044, "step": 22880 }, { "epoch": 0.09548864651050229, "grad_norm": 0.9449584961874617, "learning_rate": 6.4728872640652705e-06, "loss": 0.0978, "step": 22885 }, { "epoch": 0.09550950922549258, "grad_norm": 0.6833413022549389, "learning_rate": 6.472180238769712e-06, "loss": 0.0529, "step": 22890 }, { "epoch": 0.09553037194048285, "grad_norm": 1.4970263153801782, "learning_rate": 6.47147344510599e-06, "loss": 0.0463, "step": 22895 }, { "epoch": 0.09555123465547312, "grad_norm": 1.1870872523304723, "learning_rate": 6.470766882947655e-06, "loss": 0.0483, "step": 22900 }, { "epoch": 0.0955720973704634, "grad_norm": 1.0772963721219992, "learning_rate": 6.470060552168358e-06, "loss": 0.0487, "step": 22905 }, { "epoch": 0.09559296008545368, "grad_norm": 1.4389631901298998, "learning_rate": 6.469354452641841e-06, "loss": 0.0646, "step": 22910 }, { "epoch": 0.09561382280044396, "grad_norm": 1.1248578369669733, "learning_rate": 6.468648584241943e-06, "loss": 0.0605, "step": 22915 }, { "epoch": 0.09563468551543423, "grad_norm": 1.4391332723800114, "learning_rate": 6.467942946842604e-06, "loss": 0.0433, "step": 22920 }, { "epoch": 0.09565554823042452, "grad_norm": 1.7573302370374075, "learning_rate": 6.467237540317854e-06, "loss": 0.0309, "step": 22925 }, { "epoch": 0.09567641094541479, "grad_norm": 0.9265584742412346, "learning_rate": 6.466532364541826e-06, "loss": 0.0377, "step": 22930 }, { "epoch": 0.09569727366040508, "grad_norm": 0.8970818069769576, "learning_rate": 6.465827419388742e-06, "loss": 0.0613, "step": 22935 }, { "epoch": 0.09571813637539535, "grad_norm": 0.9351448648360605, "learning_rate": 6.465122704732924e-06, "loss": 0.0414, "step": 22940 }, { "epoch": 0.09573899909038562, "grad_norm": 1.1823721975614485, "learning_rate": 6.464418220448789e-06, "loss": 0.0574, "step": 22945 }, { "epoch": 0.0957598618053759, "grad_norm": 1.3233995223695265, "learning_rate": 6.463713966410853e-06, "loss": 0.048, "step": 22950 }, { "epoch": 0.09578072452036618, "grad_norm": 1.2486842029195424, "learning_rate": 6.46300994249372e-06, "loss": 0.0481, "step": 22955 }, { "epoch": 0.09580158723535646, "grad_norm": 1.4592368795977513, "learning_rate": 6.462306148572097e-06, "loss": 0.0465, "step": 22960 }, { "epoch": 0.09582244995034674, "grad_norm": 0.7285583236305276, "learning_rate": 6.461602584520784e-06, "loss": 0.0485, "step": 22965 }, { "epoch": 0.09584331266533702, "grad_norm": 1.805243408422447, "learning_rate": 6.460899250214674e-06, "loss": 0.0517, "step": 22970 }, { "epoch": 0.0958641753803273, "grad_norm": 0.7593122037674621, "learning_rate": 6.460196145528759e-06, "loss": 0.0477, "step": 22975 }, { "epoch": 0.09588503809531758, "grad_norm": 0.7370665712856685, "learning_rate": 6.459493270338123e-06, "loss": 0.0509, "step": 22980 }, { "epoch": 0.09590590081030785, "grad_norm": 1.302305811047883, "learning_rate": 6.458790624517948e-06, "loss": 0.0452, "step": 22985 }, { "epoch": 0.09592676352529812, "grad_norm": 0.9078190451065801, "learning_rate": 6.458088207943512e-06, "loss": 0.0429, "step": 22990 }, { "epoch": 0.09594762624028841, "grad_norm": 1.0034585662884201, "learning_rate": 6.4573860204901796e-06, "loss": 0.0386, "step": 22995 }, { "epoch": 0.09596848895527868, "grad_norm": 1.148116563454503, "learning_rate": 6.45668406203342e-06, "loss": 0.0626, "step": 23000 }, { "epoch": 0.09598935167026897, "grad_norm": 1.0318318044737351, "learning_rate": 6.455982332448795e-06, "loss": 0.0359, "step": 23005 }, { "epoch": 0.09601021438525924, "grad_norm": 1.2791498262360539, "learning_rate": 6.455280831611958e-06, "loss": 0.0604, "step": 23010 }, { "epoch": 0.09603107710024952, "grad_norm": 1.0576799619648334, "learning_rate": 6.454579559398655e-06, "loss": 0.049, "step": 23015 }, { "epoch": 0.0960519398152398, "grad_norm": 1.3224081278734758, "learning_rate": 6.453878515684735e-06, "loss": 0.0508, "step": 23020 }, { "epoch": 0.09607280253023008, "grad_norm": 2.0095719735924455, "learning_rate": 6.4531777003461336e-06, "loss": 0.0413, "step": 23025 }, { "epoch": 0.09609366524522035, "grad_norm": 1.3290355598485146, "learning_rate": 6.4524771132588845e-06, "loss": 0.0496, "step": 23030 }, { "epoch": 0.09611452796021062, "grad_norm": 0.8173614365455438, "learning_rate": 6.4517767542991135e-06, "loss": 0.0533, "step": 23035 }, { "epoch": 0.09613539067520091, "grad_norm": 1.2625587177803783, "learning_rate": 6.451076623343044e-06, "loss": 0.0585, "step": 23040 }, { "epoch": 0.09615625339019118, "grad_norm": 1.3498835003363467, "learning_rate": 6.450376720266985e-06, "loss": 0.0533, "step": 23045 }, { "epoch": 0.09617711610518147, "grad_norm": 1.0000766162055146, "learning_rate": 6.449677044947351e-06, "loss": 0.0455, "step": 23050 }, { "epoch": 0.09619797882017174, "grad_norm": 1.2848662632562142, "learning_rate": 6.4489775972606426e-06, "loss": 0.0505, "step": 23055 }, { "epoch": 0.09621884153516203, "grad_norm": 1.8630255956065351, "learning_rate": 6.448278377083454e-06, "loss": 0.0463, "step": 23060 }, { "epoch": 0.0962397042501523, "grad_norm": 0.7724224999579241, "learning_rate": 6.447579384292479e-06, "loss": 0.0402, "step": 23065 }, { "epoch": 0.09626056696514258, "grad_norm": 0.7790147546370939, "learning_rate": 6.4468806187645e-06, "loss": 0.053, "step": 23070 }, { "epoch": 0.09628142968013285, "grad_norm": 1.8106427664199558, "learning_rate": 6.4461820803763905e-06, "loss": 0.0411, "step": 23075 }, { "epoch": 0.09630229239512313, "grad_norm": 1.1883504759358878, "learning_rate": 6.445483769005126e-06, "loss": 0.0436, "step": 23080 }, { "epoch": 0.09632315511011341, "grad_norm": 1.6876086956054053, "learning_rate": 6.444785684527766e-06, "loss": 0.0575, "step": 23085 }, { "epoch": 0.09634401782510368, "grad_norm": 1.1433529216291032, "learning_rate": 6.444087826821469e-06, "loss": 0.0437, "step": 23090 }, { "epoch": 0.09636488054009397, "grad_norm": 1.2617835826998216, "learning_rate": 6.443390195763486e-06, "loss": 0.039, "step": 23095 }, { "epoch": 0.09638574325508424, "grad_norm": 0.6727941079458232, "learning_rate": 6.442692791231158e-06, "loss": 0.032, "step": 23100 }, { "epoch": 0.09640660597007453, "grad_norm": 1.0821477820374719, "learning_rate": 6.4419956131019205e-06, "loss": 0.041, "step": 23105 }, { "epoch": 0.0964274686850648, "grad_norm": 1.1400291715318425, "learning_rate": 6.441298661253302e-06, "loss": 0.0489, "step": 23110 }, { "epoch": 0.09644833140005507, "grad_norm": 1.2884485851937963, "learning_rate": 6.440601935562927e-06, "loss": 0.0515, "step": 23115 }, { "epoch": 0.09646919411504536, "grad_norm": 0.8161313151882984, "learning_rate": 6.439905435908507e-06, "loss": 0.0442, "step": 23120 }, { "epoch": 0.09649005683003563, "grad_norm": 1.063245920157637, "learning_rate": 6.439209162167847e-06, "loss": 0.0544, "step": 23125 }, { "epoch": 0.09651091954502591, "grad_norm": 0.7085112533913943, "learning_rate": 6.438513114218848e-06, "loss": 0.0495, "step": 23130 }, { "epoch": 0.09653178226001619, "grad_norm": 1.0052521731267035, "learning_rate": 6.437817291939502e-06, "loss": 0.0441, "step": 23135 }, { "epoch": 0.09655264497500647, "grad_norm": 0.7911574550966505, "learning_rate": 6.437121695207892e-06, "loss": 0.0515, "step": 23140 }, { "epoch": 0.09657350768999674, "grad_norm": 0.9551006553099024, "learning_rate": 6.436426323902193e-06, "loss": 0.0454, "step": 23145 }, { "epoch": 0.09659437040498703, "grad_norm": 1.0799725193002132, "learning_rate": 6.435731177900673e-06, "loss": 0.044, "step": 23150 }, { "epoch": 0.0966152331199773, "grad_norm": 1.109294000378354, "learning_rate": 6.435036257081691e-06, "loss": 0.047, "step": 23155 }, { "epoch": 0.09663609583496757, "grad_norm": 1.1961153654514884, "learning_rate": 6.434341561323703e-06, "loss": 0.0548, "step": 23160 }, { "epoch": 0.09665695854995786, "grad_norm": 1.0760463693658264, "learning_rate": 6.4336470905052484e-06, "loss": 0.0466, "step": 23165 }, { "epoch": 0.09667782126494813, "grad_norm": 1.0797335695330477, "learning_rate": 6.4329528445049635e-06, "loss": 0.0436, "step": 23170 }, { "epoch": 0.09669868397993842, "grad_norm": 1.5198107312524785, "learning_rate": 6.432258823201577e-06, "loss": 0.0421, "step": 23175 }, { "epoch": 0.09671954669492869, "grad_norm": 1.179091672414851, "learning_rate": 6.431565026473905e-06, "loss": 0.0522, "step": 23180 }, { "epoch": 0.09674040940991897, "grad_norm": 0.9047573369705508, "learning_rate": 6.430871454200861e-06, "loss": 0.0481, "step": 23185 }, { "epoch": 0.09676127212490925, "grad_norm": 1.408276037010877, "learning_rate": 6.430178106261443e-06, "loss": 0.0503, "step": 23190 }, { "epoch": 0.09678213483989953, "grad_norm": 0.6081038379083114, "learning_rate": 6.429484982534746e-06, "loss": 0.0506, "step": 23195 }, { "epoch": 0.0968029975548898, "grad_norm": 1.1561304421626628, "learning_rate": 6.428792082899954e-06, "loss": 0.0458, "step": 23200 }, { "epoch": 0.09682386026988007, "grad_norm": 0.9697296354727849, "learning_rate": 6.428099407236341e-06, "loss": 0.036, "step": 23205 }, { "epoch": 0.09684472298487036, "grad_norm": 1.0746682653813053, "learning_rate": 6.427406955423273e-06, "loss": 0.047, "step": 23210 }, { "epoch": 0.09686558569986063, "grad_norm": 0.9574390347691348, "learning_rate": 6.426714727340209e-06, "loss": 0.0576, "step": 23215 }, { "epoch": 0.09688644841485092, "grad_norm": 0.9758191030218599, "learning_rate": 6.426022722866695e-06, "loss": 0.0507, "step": 23220 }, { "epoch": 0.09690731112984119, "grad_norm": 1.3655102960831729, "learning_rate": 6.425330941882371e-06, "loss": 0.0544, "step": 23225 }, { "epoch": 0.09692817384483148, "grad_norm": 1.2187995124197761, "learning_rate": 6.424639384266966e-06, "loss": 0.0501, "step": 23230 }, { "epoch": 0.09694903655982175, "grad_norm": 1.2172223992269673, "learning_rate": 6.423948049900299e-06, "loss": 0.055, "step": 23235 }, { "epoch": 0.09696989927481203, "grad_norm": 0.8554510260675424, "learning_rate": 6.423256938662283e-06, "loss": 0.0444, "step": 23240 }, { "epoch": 0.0969907619898023, "grad_norm": 0.9382962266659435, "learning_rate": 6.4225660504329164e-06, "loss": 0.0488, "step": 23245 }, { "epoch": 0.09701162470479258, "grad_norm": 0.9836391635003404, "learning_rate": 6.421875385092292e-06, "loss": 0.0415, "step": 23250 }, { "epoch": 0.09703248741978286, "grad_norm": 1.10488124489437, "learning_rate": 6.421184942520589e-06, "loss": 0.0502, "step": 23255 }, { "epoch": 0.09705335013477313, "grad_norm": 1.3542081177703003, "learning_rate": 6.420494722598085e-06, "loss": 0.0538, "step": 23260 }, { "epoch": 0.09707421284976342, "grad_norm": 1.5110208684392044, "learning_rate": 6.4198047252051345e-06, "loss": 0.0571, "step": 23265 }, { "epoch": 0.09709507556475369, "grad_norm": 0.7012907828580774, "learning_rate": 6.419114950222193e-06, "loss": 0.0513, "step": 23270 }, { "epoch": 0.09711593827974398, "grad_norm": 1.2034082129150436, "learning_rate": 6.418425397529801e-06, "loss": 0.0434, "step": 23275 }, { "epoch": 0.09713680099473425, "grad_norm": 1.2672976108872702, "learning_rate": 6.417736067008591e-06, "loss": 0.0466, "step": 23280 }, { "epoch": 0.09715766370972453, "grad_norm": 1.3355713084484053, "learning_rate": 6.417046958539284e-06, "loss": 0.0449, "step": 23285 }, { "epoch": 0.0971785264247148, "grad_norm": 1.39467911037308, "learning_rate": 6.41635807200269e-06, "loss": 0.0656, "step": 23290 }, { "epoch": 0.09719938913970508, "grad_norm": 1.8510445317490032, "learning_rate": 6.415669407279709e-06, "loss": 0.0492, "step": 23295 }, { "epoch": 0.09722025185469536, "grad_norm": 1.105876346858747, "learning_rate": 6.414980964251332e-06, "loss": 0.0496, "step": 23300 }, { "epoch": 0.09724111456968564, "grad_norm": 0.8921751615230156, "learning_rate": 6.414292742798638e-06, "loss": 0.0689, "step": 23305 }, { "epoch": 0.09726197728467592, "grad_norm": 1.336387165503574, "learning_rate": 6.413604742802794e-06, "loss": 0.06, "step": 23310 }, { "epoch": 0.09728283999966619, "grad_norm": 1.3041102541385763, "learning_rate": 6.412916964145059e-06, "loss": 0.0441, "step": 23315 }, { "epoch": 0.09730370271465648, "grad_norm": 1.7095499173435924, "learning_rate": 6.41222940670678e-06, "loss": 0.0419, "step": 23320 }, { "epoch": 0.09732456542964675, "grad_norm": 1.1153024003614684, "learning_rate": 6.411542070369392e-06, "loss": 0.0499, "step": 23325 }, { "epoch": 0.09734542814463704, "grad_norm": 1.6985864322258037, "learning_rate": 6.410854955014419e-06, "loss": 0.0632, "step": 23330 }, { "epoch": 0.09736629085962731, "grad_norm": 1.3798136822051037, "learning_rate": 6.410168060523477e-06, "loss": 0.0504, "step": 23335 }, { "epoch": 0.09738715357461758, "grad_norm": 1.625637977301895, "learning_rate": 6.409481386778267e-06, "loss": 0.056, "step": 23340 }, { "epoch": 0.09740801628960787, "grad_norm": 1.086932202708564, "learning_rate": 6.4087949336605795e-06, "loss": 0.0532, "step": 23345 }, { "epoch": 0.09742887900459814, "grad_norm": 1.1248885478417525, "learning_rate": 6.408108701052295e-06, "loss": 0.0446, "step": 23350 }, { "epoch": 0.09744974171958842, "grad_norm": 1.1059231736559654, "learning_rate": 6.407422688835381e-06, "loss": 0.0506, "step": 23355 }, { "epoch": 0.0974706044345787, "grad_norm": 0.9134442600498714, "learning_rate": 6.406736896891894e-06, "loss": 0.0539, "step": 23360 }, { "epoch": 0.09749146714956898, "grad_norm": 1.6180060561582688, "learning_rate": 6.406051325103982e-06, "loss": 0.0553, "step": 23365 }, { "epoch": 0.09751232986455925, "grad_norm": 1.0032236067007545, "learning_rate": 6.405365973353872e-06, "loss": 0.0404, "step": 23370 }, { "epoch": 0.09753319257954954, "grad_norm": 2.1434346857732214, "learning_rate": 6.4046808415238925e-06, "loss": 0.054, "step": 23375 }, { "epoch": 0.09755405529453981, "grad_norm": 0.9107622667226715, "learning_rate": 6.4039959294964485e-06, "loss": 0.0451, "step": 23380 }, { "epoch": 0.09757491800953008, "grad_norm": 1.263434673590641, "learning_rate": 6.403311237154037e-06, "loss": 0.0602, "step": 23385 }, { "epoch": 0.09759578072452037, "grad_norm": 1.0299433144034496, "learning_rate": 6.402626764379246e-06, "loss": 0.0397, "step": 23390 }, { "epoch": 0.09761664343951064, "grad_norm": 0.749165031864216, "learning_rate": 6.401942511054747e-06, "loss": 0.0435, "step": 23395 }, { "epoch": 0.09763750615450092, "grad_norm": 1.3486588762021763, "learning_rate": 6.401258477063302e-06, "loss": 0.0537, "step": 23400 }, { "epoch": 0.0976583688694912, "grad_norm": 1.6809596713256112, "learning_rate": 6.400574662287762e-06, "loss": 0.0496, "step": 23405 }, { "epoch": 0.09767923158448148, "grad_norm": 1.069789687979641, "learning_rate": 6.399891066611058e-06, "loss": 0.0481, "step": 23410 }, { "epoch": 0.09770009429947175, "grad_norm": 1.0990944243067833, "learning_rate": 6.399207689916217e-06, "loss": 0.0429, "step": 23415 }, { "epoch": 0.09772095701446204, "grad_norm": 1.1068176647827006, "learning_rate": 6.39852453208635e-06, "loss": 0.053, "step": 23420 }, { "epoch": 0.09774181972945231, "grad_norm": 0.6290124299469801, "learning_rate": 6.397841593004655e-06, "loss": 0.0437, "step": 23425 }, { "epoch": 0.09776268244444258, "grad_norm": 0.7615425913853399, "learning_rate": 6.397158872554417e-06, "loss": 0.0423, "step": 23430 }, { "epoch": 0.09778354515943287, "grad_norm": 1.3616196471984274, "learning_rate": 6.396476370619012e-06, "loss": 0.0502, "step": 23435 }, { "epoch": 0.09780440787442314, "grad_norm": 1.0575040272734113, "learning_rate": 6.395794087081895e-06, "loss": 0.0522, "step": 23440 }, { "epoch": 0.09782527058941343, "grad_norm": 0.7048274978236098, "learning_rate": 6.395112021826616e-06, "loss": 0.0479, "step": 23445 }, { "epoch": 0.0978461333044037, "grad_norm": 2.3293440984976646, "learning_rate": 6.394430174736811e-06, "loss": 0.0386, "step": 23450 }, { "epoch": 0.09786699601939398, "grad_norm": 5.739120827424002, "learning_rate": 6.393748545696196e-06, "loss": 0.048, "step": 23455 }, { "epoch": 0.09788785873438426, "grad_norm": 1.6065182185560574, "learning_rate": 6.393067134588582e-06, "loss": 0.0596, "step": 23460 }, { "epoch": 0.09790872144937454, "grad_norm": 1.047466637638379, "learning_rate": 6.392385941297863e-06, "loss": 0.0441, "step": 23465 }, { "epoch": 0.09792958416436481, "grad_norm": 1.5092730295583072, "learning_rate": 6.391704965708017e-06, "loss": 0.0683, "step": 23470 }, { "epoch": 0.09795044687935509, "grad_norm": 1.2197090453759811, "learning_rate": 6.391024207703115e-06, "loss": 0.0525, "step": 23475 }, { "epoch": 0.09797130959434537, "grad_norm": 1.0352160780491886, "learning_rate": 6.390343667167308e-06, "loss": 0.0289, "step": 23480 }, { "epoch": 0.09799217230933564, "grad_norm": 1.0234343601802371, "learning_rate": 6.389663343984836e-06, "loss": 0.0714, "step": 23485 }, { "epoch": 0.09801303502432593, "grad_norm": 0.8864345427819655, "learning_rate": 6.388983238040025e-06, "loss": 0.0527, "step": 23490 }, { "epoch": 0.0980338977393162, "grad_norm": 1.4242386098180653, "learning_rate": 6.38830334921729e-06, "loss": 0.0496, "step": 23495 }, { "epoch": 0.09805476045430649, "grad_norm": 6.483945424526662, "learning_rate": 6.387623677401128e-06, "loss": 0.0891, "step": 23500 }, { "epoch": 0.09807562316929676, "grad_norm": 1.5108833820104761, "learning_rate": 6.386944222476121e-06, "loss": 0.0488, "step": 23505 }, { "epoch": 0.09809648588428704, "grad_norm": 1.4794203924188658, "learning_rate": 6.386264984326944e-06, "loss": 0.0507, "step": 23510 }, { "epoch": 0.09811734859927732, "grad_norm": 1.184238143442661, "learning_rate": 6.38558596283835e-06, "loss": 0.0381, "step": 23515 }, { "epoch": 0.09813821131426759, "grad_norm": 0.9183946408165201, "learning_rate": 6.38490715789518e-06, "loss": 0.0512, "step": 23520 }, { "epoch": 0.09815907402925787, "grad_norm": 1.0165466191353547, "learning_rate": 6.384228569382367e-06, "loss": 0.044, "step": 23525 }, { "epoch": 0.09817993674424814, "grad_norm": 1.3225794470974632, "learning_rate": 6.383550197184918e-06, "loss": 0.0387, "step": 23530 }, { "epoch": 0.09820079945923843, "grad_norm": 0.8544642717004618, "learning_rate": 6.3828720411879355e-06, "loss": 0.0406, "step": 23535 }, { "epoch": 0.0982216621742287, "grad_norm": 0.9333359487012215, "learning_rate": 6.382194101276603e-06, "loss": 0.041, "step": 23540 }, { "epoch": 0.09824252488921899, "grad_norm": 0.906576687215178, "learning_rate": 6.3815163773361885e-06, "loss": 0.0376, "step": 23545 }, { "epoch": 0.09826338760420926, "grad_norm": 0.9921151036792504, "learning_rate": 6.380838869252049e-06, "loss": 0.0423, "step": 23550 }, { "epoch": 0.09828425031919955, "grad_norm": 1.4388620299295287, "learning_rate": 6.3801615769096235e-06, "loss": 0.049, "step": 23555 }, { "epoch": 0.09830511303418982, "grad_norm": 0.9912375619245186, "learning_rate": 6.379484500194437e-06, "loss": 0.0463, "step": 23560 }, { "epoch": 0.09832597574918009, "grad_norm": 1.32344888666393, "learning_rate": 6.3788076389921e-06, "loss": 0.0425, "step": 23565 }, { "epoch": 0.09834683846417037, "grad_norm": 1.1361614186875808, "learning_rate": 6.378130993188307e-06, "loss": 0.0487, "step": 23570 }, { "epoch": 0.09836770117916065, "grad_norm": 1.116264750486224, "learning_rate": 6.377454562668839e-06, "loss": 0.0394, "step": 23575 }, { "epoch": 0.09838856389415093, "grad_norm": 1.0199996330245684, "learning_rate": 6.3767783473195576e-06, "loss": 0.04, "step": 23580 }, { "epoch": 0.0984094266091412, "grad_norm": 0.9253038887607297, "learning_rate": 6.376102347026417e-06, "loss": 0.0458, "step": 23585 }, { "epoch": 0.09843028932413149, "grad_norm": 1.3586183050000644, "learning_rate": 6.375426561675446e-06, "loss": 0.0627, "step": 23590 }, { "epoch": 0.09845115203912176, "grad_norm": 1.2877138313267384, "learning_rate": 6.374750991152767e-06, "loss": 0.041, "step": 23595 }, { "epoch": 0.09847201475411205, "grad_norm": 0.8654931716474283, "learning_rate": 6.3740756353445835e-06, "loss": 0.0452, "step": 23600 }, { "epoch": 0.09849287746910232, "grad_norm": 1.2300694152760596, "learning_rate": 6.373400494137179e-06, "loss": 0.0518, "step": 23605 }, { "epoch": 0.09851374018409259, "grad_norm": 1.3042648721898416, "learning_rate": 6.372725567416927e-06, "loss": 0.0544, "step": 23610 }, { "epoch": 0.09853460289908288, "grad_norm": 1.04125465683996, "learning_rate": 6.372050855070284e-06, "loss": 0.0488, "step": 23615 }, { "epoch": 0.09855546561407315, "grad_norm": 0.7570234934531131, "learning_rate": 6.37137635698379e-06, "loss": 0.094, "step": 23620 }, { "epoch": 0.09857632832906343, "grad_norm": 1.4726579821144128, "learning_rate": 6.370702073044067e-06, "loss": 0.0581, "step": 23625 }, { "epoch": 0.0985971910440537, "grad_norm": 1.1701492942570346, "learning_rate": 6.370028003137827e-06, "loss": 0.0491, "step": 23630 }, { "epoch": 0.09861805375904399, "grad_norm": 0.6528565062825459, "learning_rate": 6.369354147151858e-06, "loss": 0.0503, "step": 23635 }, { "epoch": 0.09863891647403426, "grad_norm": 1.1210488799467178, "learning_rate": 6.3686805049730385e-06, "loss": 0.0469, "step": 23640 }, { "epoch": 0.09865977918902455, "grad_norm": 0.9195246478300535, "learning_rate": 6.368007076488325e-06, "loss": 0.0517, "step": 23645 }, { "epoch": 0.09868064190401482, "grad_norm": 1.844147912910642, "learning_rate": 6.3673338615847635e-06, "loss": 0.0496, "step": 23650 }, { "epoch": 0.09870150461900509, "grad_norm": 0.6012922033632808, "learning_rate": 6.3666608601494796e-06, "loss": 0.0515, "step": 23655 }, { "epoch": 0.09872236733399538, "grad_norm": 0.9241458967054983, "learning_rate": 6.365988072069682e-06, "loss": 0.0381, "step": 23660 }, { "epoch": 0.09874323004898565, "grad_norm": 1.4695032695710954, "learning_rate": 6.365315497232666e-06, "loss": 0.0472, "step": 23665 }, { "epoch": 0.09876409276397594, "grad_norm": 1.406398554185996, "learning_rate": 6.3646431355258096e-06, "loss": 0.0529, "step": 23670 }, { "epoch": 0.09878495547896621, "grad_norm": 0.8358489890242057, "learning_rate": 6.36397098683657e-06, "loss": 0.0488, "step": 23675 }, { "epoch": 0.0988058181939565, "grad_norm": 0.6984972551901668, "learning_rate": 6.363299051052493e-06, "loss": 0.0488, "step": 23680 }, { "epoch": 0.09882668090894677, "grad_norm": 0.5399515798531429, "learning_rate": 6.362627328061204e-06, "loss": 0.0489, "step": 23685 }, { "epoch": 0.09884754362393705, "grad_norm": 0.842006068216911, "learning_rate": 6.361955817750412e-06, "loss": 0.0425, "step": 23690 }, { "epoch": 0.09886840633892732, "grad_norm": 1.8215054335810572, "learning_rate": 6.361284520007911e-06, "loss": 0.0546, "step": 23695 }, { "epoch": 0.0988892690539176, "grad_norm": 1.2394312001630368, "learning_rate": 6.3606134347215755e-06, "loss": 0.0518, "step": 23700 }, { "epoch": 0.09891013176890788, "grad_norm": 1.0852306542325136, "learning_rate": 6.359942561779363e-06, "loss": 0.0486, "step": 23705 }, { "epoch": 0.09893099448389815, "grad_norm": 0.7173045506608831, "learning_rate": 6.359271901069313e-06, "loss": 0.0362, "step": 23710 }, { "epoch": 0.09895185719888844, "grad_norm": 1.2803521547125727, "learning_rate": 6.358601452479552e-06, "loss": 0.0546, "step": 23715 }, { "epoch": 0.09897271991387871, "grad_norm": 1.0395631014857896, "learning_rate": 6.357931215898286e-06, "loss": 0.0403, "step": 23720 }, { "epoch": 0.098993582628869, "grad_norm": 1.0670995287984053, "learning_rate": 6.357261191213801e-06, "loss": 0.0494, "step": 23725 }, { "epoch": 0.09901444534385927, "grad_norm": 1.146636705288148, "learning_rate": 6.356591378314467e-06, "loss": 0.0354, "step": 23730 }, { "epoch": 0.09903530805884955, "grad_norm": 0.6655445280751201, "learning_rate": 6.35592177708874e-06, "loss": 0.0372, "step": 23735 }, { "epoch": 0.09905617077383982, "grad_norm": 0.7317037921474028, "learning_rate": 6.355252387425155e-06, "loss": 0.0503, "step": 23740 }, { "epoch": 0.0990770334888301, "grad_norm": 1.0374665433411256, "learning_rate": 6.354583209212327e-06, "loss": 0.0439, "step": 23745 }, { "epoch": 0.09909789620382038, "grad_norm": 1.075601872682349, "learning_rate": 6.353914242338959e-06, "loss": 0.0506, "step": 23750 }, { "epoch": 0.09911875891881065, "grad_norm": 1.5776138985195707, "learning_rate": 6.353245486693829e-06, "loss": 0.0533, "step": 23755 }, { "epoch": 0.09913962163380094, "grad_norm": 1.6195510028824756, "learning_rate": 6.352576942165804e-06, "loss": 0.0766, "step": 23760 }, { "epoch": 0.09916048434879121, "grad_norm": 0.8277635740348209, "learning_rate": 6.351908608643826e-06, "loss": 0.0512, "step": 23765 }, { "epoch": 0.0991813470637815, "grad_norm": 0.8568895921560999, "learning_rate": 6.351240486016924e-06, "loss": 0.0415, "step": 23770 }, { "epoch": 0.09920220977877177, "grad_norm": 0.821936005754469, "learning_rate": 6.350572574174209e-06, "loss": 0.0604, "step": 23775 }, { "epoch": 0.09922307249376205, "grad_norm": 0.9487304959532913, "learning_rate": 6.349904873004866e-06, "loss": 0.0388, "step": 23780 }, { "epoch": 0.09924393520875233, "grad_norm": 0.6868901595811034, "learning_rate": 6.349237382398172e-06, "loss": 0.0536, "step": 23785 }, { "epoch": 0.0992647979237426, "grad_norm": 1.210300495153835, "learning_rate": 6.348570102243478e-06, "loss": 0.0443, "step": 23790 }, { "epoch": 0.09928566063873288, "grad_norm": 1.0046126533316835, "learning_rate": 6.347903032430218e-06, "loss": 0.0442, "step": 23795 }, { "epoch": 0.09930652335372316, "grad_norm": 1.351982624475762, "learning_rate": 6.347236172847911e-06, "loss": 0.0575, "step": 23800 }, { "epoch": 0.09932738606871344, "grad_norm": 0.7647516494706044, "learning_rate": 6.346569523386151e-06, "loss": 0.0463, "step": 23805 }, { "epoch": 0.09934824878370371, "grad_norm": 2.4719254734066407, "learning_rate": 6.34590308393462e-06, "loss": 0.057, "step": 23810 }, { "epoch": 0.099369111498694, "grad_norm": 1.35652427037947, "learning_rate": 6.345236854383075e-06, "loss": 0.0432, "step": 23815 }, { "epoch": 0.09938997421368427, "grad_norm": 0.7649140934757958, "learning_rate": 6.344570834621357e-06, "loss": 0.048, "step": 23820 }, { "epoch": 0.09941083692867456, "grad_norm": 0.9485900737436036, "learning_rate": 6.343905024539389e-06, "loss": 0.0462, "step": 23825 }, { "epoch": 0.09943169964366483, "grad_norm": 0.9860368557222838, "learning_rate": 6.34323942402717e-06, "loss": 0.0478, "step": 23830 }, { "epoch": 0.0994525623586551, "grad_norm": 1.3217284765515267, "learning_rate": 6.3425740329747866e-06, "loss": 0.0369, "step": 23835 }, { "epoch": 0.09947342507364539, "grad_norm": 0.7171285289376709, "learning_rate": 6.341908851272402e-06, "loss": 0.0434, "step": 23840 }, { "epoch": 0.09949428778863566, "grad_norm": 2.0184563564353066, "learning_rate": 6.341243878810258e-06, "loss": 0.0427, "step": 23845 }, { "epoch": 0.09951515050362594, "grad_norm": 1.2425851290665209, "learning_rate": 6.340579115478683e-06, "loss": 0.0441, "step": 23850 }, { "epoch": 0.09953601321861621, "grad_norm": 1.0120096196284556, "learning_rate": 6.33991456116808e-06, "loss": 0.0594, "step": 23855 }, { "epoch": 0.0995568759336065, "grad_norm": 1.055400336908411, "learning_rate": 6.3392502157689365e-06, "loss": 0.06, "step": 23860 }, { "epoch": 0.09957773864859677, "grad_norm": 1.1016638312657976, "learning_rate": 6.338586079171817e-06, "loss": 0.0373, "step": 23865 }, { "epoch": 0.09959860136358706, "grad_norm": 1.0453685663054004, "learning_rate": 6.337922151267368e-06, "loss": 0.0458, "step": 23870 }, { "epoch": 0.09961946407857733, "grad_norm": 0.5617132342072225, "learning_rate": 6.337258431946318e-06, "loss": 0.0449, "step": 23875 }, { "epoch": 0.0996403267935676, "grad_norm": 0.8374108488262572, "learning_rate": 6.336594921099471e-06, "loss": 0.0432, "step": 23880 }, { "epoch": 0.09966118950855789, "grad_norm": 0.8882253680544467, "learning_rate": 6.335931618617716e-06, "loss": 0.0398, "step": 23885 }, { "epoch": 0.09968205222354816, "grad_norm": 1.4984110892475935, "learning_rate": 6.335268524392018e-06, "loss": 0.0655, "step": 23890 }, { "epoch": 0.09970291493853844, "grad_norm": 0.8413256226565347, "learning_rate": 6.334605638313423e-06, "loss": 0.0536, "step": 23895 }, { "epoch": 0.09972377765352872, "grad_norm": 1.3412809273877841, "learning_rate": 6.333942960273059e-06, "loss": 0.059, "step": 23900 }, { "epoch": 0.099744640368519, "grad_norm": 0.9137417177588908, "learning_rate": 6.333280490162132e-06, "loss": 0.0357, "step": 23905 }, { "epoch": 0.09976550308350927, "grad_norm": 1.121205311141386, "learning_rate": 6.332618227871924e-06, "loss": 0.063, "step": 23910 }, { "epoch": 0.09978636579849956, "grad_norm": 0.9064823704751486, "learning_rate": 6.331956173293804e-06, "loss": 0.0503, "step": 23915 }, { "epoch": 0.09980722851348983, "grad_norm": 0.9726802092886592, "learning_rate": 6.331294326319215e-06, "loss": 0.0448, "step": 23920 }, { "epoch": 0.0998280912284801, "grad_norm": 1.6495207353709807, "learning_rate": 6.330632686839682e-06, "loss": 0.0747, "step": 23925 }, { "epoch": 0.09984895394347039, "grad_norm": 0.8569519948458199, "learning_rate": 6.329971254746807e-06, "loss": 0.0489, "step": 23930 }, { "epoch": 0.09986981665846066, "grad_norm": 1.1074326272381065, "learning_rate": 6.329310029932272e-06, "loss": 0.0591, "step": 23935 }, { "epoch": 0.09989067937345095, "grad_norm": 0.9577397246389106, "learning_rate": 6.328649012287841e-06, "loss": 0.0476, "step": 23940 }, { "epoch": 0.09991154208844122, "grad_norm": 1.0441873786354714, "learning_rate": 6.327988201705354e-06, "loss": 0.0357, "step": 23945 }, { "epoch": 0.0999324048034315, "grad_norm": 0.8825334339664578, "learning_rate": 6.32732759807673e-06, "loss": 0.0421, "step": 23950 }, { "epoch": 0.09995326751842178, "grad_norm": 0.9171753884075066, "learning_rate": 6.32666720129397e-06, "loss": 0.0476, "step": 23955 }, { "epoch": 0.09997413023341206, "grad_norm": 3.014813913230873, "learning_rate": 6.32600701124915e-06, "loss": 0.0468, "step": 23960 }, { "epoch": 0.09999499294840233, "grad_norm": 1.0854340610448268, "learning_rate": 6.3253470278344255e-06, "loss": 0.049, "step": 23965 }, { "epoch": 0.1000158556633926, "grad_norm": 1.4933953598293734, "learning_rate": 6.324687250942035e-06, "loss": 0.0598, "step": 23970 }, { "epoch": 0.10003671837838289, "grad_norm": 1.4217475583488093, "learning_rate": 6.3240276804642905e-06, "loss": 0.0468, "step": 23975 }, { "epoch": 0.10005758109337316, "grad_norm": 1.2784790425646033, "learning_rate": 6.323368316293585e-06, "loss": 0.061, "step": 23980 }, { "epoch": 0.10007844380836345, "grad_norm": 1.1861210133809341, "learning_rate": 6.322709158322391e-06, "loss": 0.0433, "step": 23985 }, { "epoch": 0.10009930652335372, "grad_norm": 1.2986505892394795, "learning_rate": 6.322050206443254e-06, "loss": 0.064, "step": 23990 }, { "epoch": 0.100120169238344, "grad_norm": 1.4457358548232568, "learning_rate": 6.321391460548807e-06, "loss": 0.0521, "step": 23995 }, { "epoch": 0.10014103195333428, "grad_norm": 1.2552315871349553, "learning_rate": 6.320732920531753e-06, "loss": 0.0432, "step": 24000 }, { "epoch": 0.10016189466832456, "grad_norm": 0.9179457810440477, "learning_rate": 6.320074586284878e-06, "loss": 0.0372, "step": 24005 }, { "epoch": 0.10018275738331484, "grad_norm": 1.690763967787096, "learning_rate": 6.319416457701044e-06, "loss": 0.0388, "step": 24010 }, { "epoch": 0.10020362009830511, "grad_norm": 0.820687825870961, "learning_rate": 6.318758534673191e-06, "loss": 0.0381, "step": 24015 }, { "epoch": 0.10022448281329539, "grad_norm": 0.95752994927993, "learning_rate": 6.31810081709434e-06, "loss": 0.0485, "step": 24020 }, { "epoch": 0.10024534552828566, "grad_norm": 0.6661840604470297, "learning_rate": 6.317443304857583e-06, "loss": 0.0427, "step": 24025 }, { "epoch": 0.10026620824327595, "grad_norm": 1.346404836488482, "learning_rate": 6.316785997856102e-06, "loss": 0.0452, "step": 24030 }, { "epoch": 0.10028707095826622, "grad_norm": 1.7426880550712556, "learning_rate": 6.3161288959831425e-06, "loss": 0.0533, "step": 24035 }, { "epoch": 0.10030793367325651, "grad_norm": 0.4655014891572283, "learning_rate": 6.315471999132036e-06, "loss": 0.0468, "step": 24040 }, { "epoch": 0.10032879638824678, "grad_norm": 1.252112047640261, "learning_rate": 6.314815307196193e-06, "loss": 0.0548, "step": 24045 }, { "epoch": 0.10034965910323707, "grad_norm": 1.1885349208287337, "learning_rate": 6.3141588200690965e-06, "loss": 0.036, "step": 24050 }, { "epoch": 0.10037052181822734, "grad_norm": 1.0235364487251604, "learning_rate": 6.313502537644308e-06, "loss": 0.0539, "step": 24055 }, { "epoch": 0.10039138453321761, "grad_norm": 1.0036153712985525, "learning_rate": 6.312846459815471e-06, "loss": 0.0365, "step": 24060 }, { "epoch": 0.1004122472482079, "grad_norm": 1.513502692580488, "learning_rate": 6.312190586476299e-06, "loss": 0.0491, "step": 24065 }, { "epoch": 0.10043310996319817, "grad_norm": 8.290396533059218, "learning_rate": 6.31153491752059e-06, "loss": 0.0495, "step": 24070 }, { "epoch": 0.10045397267818845, "grad_norm": 0.9252005907725331, "learning_rate": 6.3108794528422155e-06, "loss": 0.0411, "step": 24075 }, { "epoch": 0.10047483539317872, "grad_norm": 1.4841787450640074, "learning_rate": 6.310224192335123e-06, "loss": 0.0582, "step": 24080 }, { "epoch": 0.10049569810816901, "grad_norm": 0.791316423718327, "learning_rate": 6.309569135893341e-06, "loss": 0.0544, "step": 24085 }, { "epoch": 0.10051656082315928, "grad_norm": 1.1488800231308833, "learning_rate": 6.308914283410972e-06, "loss": 0.052, "step": 24090 }, { "epoch": 0.10053742353814957, "grad_norm": 1.8938828336208422, "learning_rate": 6.3082596347821936e-06, "loss": 0.0478, "step": 24095 }, { "epoch": 0.10055828625313984, "grad_norm": 1.5638868571428193, "learning_rate": 6.307605189901265e-06, "loss": 0.0397, "step": 24100 }, { "epoch": 0.10057914896813011, "grad_norm": 2.8463333461351663, "learning_rate": 6.306950948662519e-06, "loss": 0.0595, "step": 24105 }, { "epoch": 0.1006000116831204, "grad_norm": 0.7708824186130644, "learning_rate": 6.306296910960368e-06, "loss": 0.0549, "step": 24110 }, { "epoch": 0.10062087439811067, "grad_norm": 2.653772262637045, "learning_rate": 6.305643076689296e-06, "loss": 0.0368, "step": 24115 }, { "epoch": 0.10064173711310095, "grad_norm": 1.814318807437142, "learning_rate": 6.30498944574387e-06, "loss": 0.0519, "step": 24120 }, { "epoch": 0.10066259982809123, "grad_norm": 0.8684959885246067, "learning_rate": 6.304336018018728e-06, "loss": 0.0465, "step": 24125 }, { "epoch": 0.10068346254308151, "grad_norm": 2.028532074646303, "learning_rate": 6.303682793408584e-06, "loss": 0.056, "step": 24130 }, { "epoch": 0.10070432525807178, "grad_norm": 2.036506085082566, "learning_rate": 6.303029771808238e-06, "loss": 0.0538, "step": 24135 }, { "epoch": 0.10072518797306207, "grad_norm": 1.2759683303869145, "learning_rate": 6.302376953112551e-06, "loss": 0.0388, "step": 24140 }, { "epoch": 0.10074605068805234, "grad_norm": 1.1315548481904876, "learning_rate": 6.3017243372164735e-06, "loss": 0.0488, "step": 24145 }, { "epoch": 0.10076691340304261, "grad_norm": 0.5894494275423625, "learning_rate": 6.301071924015026e-06, "loss": 0.0311, "step": 24150 }, { "epoch": 0.1007877761180329, "grad_norm": 0.9202529443467728, "learning_rate": 6.300419713403305e-06, "loss": 0.0505, "step": 24155 }, { "epoch": 0.10080863883302317, "grad_norm": 1.2633766387478829, "learning_rate": 6.299767705276486e-06, "loss": 0.0579, "step": 24160 }, { "epoch": 0.10082950154801346, "grad_norm": 1.1424039738681178, "learning_rate": 6.299115899529816e-06, "loss": 0.049, "step": 24165 }, { "epoch": 0.10085036426300373, "grad_norm": 0.6220306706277199, "learning_rate": 6.2984642960586215e-06, "loss": 0.0486, "step": 24170 }, { "epoch": 0.10087122697799401, "grad_norm": 1.9151518981162852, "learning_rate": 6.297812894758304e-06, "loss": 0.041, "step": 24175 }, { "epoch": 0.10089208969298429, "grad_norm": 0.913055234955033, "learning_rate": 6.29716169552434e-06, "loss": 0.046, "step": 24180 }, { "epoch": 0.10091295240797457, "grad_norm": 0.721609984058364, "learning_rate": 6.296510698252281e-06, "loss": 0.0458, "step": 24185 }, { "epoch": 0.10093381512296484, "grad_norm": 1.5218493512426332, "learning_rate": 6.295859902837757e-06, "loss": 0.0381, "step": 24190 }, { "epoch": 0.10095467783795511, "grad_norm": 1.7694079478992093, "learning_rate": 6.295209309176468e-06, "loss": 0.0513, "step": 24195 }, { "epoch": 0.1009755405529454, "grad_norm": 0.9099793368922595, "learning_rate": 6.294558917164197e-06, "loss": 0.0456, "step": 24200 }, { "epoch": 0.10099640326793567, "grad_norm": 0.9096679722720701, "learning_rate": 6.293908726696797e-06, "loss": 0.0485, "step": 24205 }, { "epoch": 0.10101726598292596, "grad_norm": 1.3373312784607416, "learning_rate": 6.293258737670196e-06, "loss": 0.0492, "step": 24210 }, { "epoch": 0.10103812869791623, "grad_norm": 1.1501635612082202, "learning_rate": 6.292608949980399e-06, "loss": 0.0451, "step": 24215 }, { "epoch": 0.10105899141290652, "grad_norm": 1.077976480752194, "learning_rate": 6.291959363523488e-06, "loss": 0.0611, "step": 24220 }, { "epoch": 0.10107985412789679, "grad_norm": 1.2628536583822523, "learning_rate": 6.291309978195618e-06, "loss": 0.0448, "step": 24225 }, { "epoch": 0.10110071684288707, "grad_norm": 1.0902270869818846, "learning_rate": 6.290660793893015e-06, "loss": 0.0329, "step": 24230 }, { "epoch": 0.10112157955787734, "grad_norm": 1.339092776702962, "learning_rate": 6.2900118105119876e-06, "loss": 0.0578, "step": 24235 }, { "epoch": 0.10114244227286762, "grad_norm": 2.1049486154534764, "learning_rate": 6.289363027948916e-06, "loss": 0.0392, "step": 24240 }, { "epoch": 0.1011633049878579, "grad_norm": 2.1234150625959947, "learning_rate": 6.288714446100253e-06, "loss": 0.056, "step": 24245 }, { "epoch": 0.10118416770284817, "grad_norm": 1.0899979306540817, "learning_rate": 6.288066064862528e-06, "loss": 0.0454, "step": 24250 }, { "epoch": 0.10120503041783846, "grad_norm": 1.4362069633767618, "learning_rate": 6.287417884132347e-06, "loss": 0.0664, "step": 24255 }, { "epoch": 0.10122589313282873, "grad_norm": 0.9197413772359612, "learning_rate": 6.286769903806386e-06, "loss": 0.0431, "step": 24260 }, { "epoch": 0.10124675584781902, "grad_norm": 0.8447286018066849, "learning_rate": 6.286122123781399e-06, "loss": 0.0582, "step": 24265 }, { "epoch": 0.10126761856280929, "grad_norm": 0.8360465260546511, "learning_rate": 6.2854745439542155e-06, "loss": 0.046, "step": 24270 }, { "epoch": 0.10128848127779956, "grad_norm": 2.1862799807245095, "learning_rate": 6.284827164221735e-06, "loss": 0.0736, "step": 24275 }, { "epoch": 0.10130934399278985, "grad_norm": 2.015143971524058, "learning_rate": 6.2841799844809344e-06, "loss": 0.0609, "step": 24280 }, { "epoch": 0.10133020670778012, "grad_norm": 1.0256775396554552, "learning_rate": 6.283533004628866e-06, "loss": 0.0414, "step": 24285 }, { "epoch": 0.1013510694227704, "grad_norm": 0.984846533536289, "learning_rate": 6.282886224562652e-06, "loss": 0.0594, "step": 24290 }, { "epoch": 0.10137193213776068, "grad_norm": 0.807656582650048, "learning_rate": 6.282239644179492e-06, "loss": 0.0494, "step": 24295 }, { "epoch": 0.10139279485275096, "grad_norm": 1.1031215945998398, "learning_rate": 6.281593263376661e-06, "loss": 0.0398, "step": 24300 }, { "epoch": 0.10141365756774123, "grad_norm": 1.123447990112022, "learning_rate": 6.280947082051502e-06, "loss": 0.0478, "step": 24305 }, { "epoch": 0.10143452028273152, "grad_norm": 1.632880596503014, "learning_rate": 6.280301100101438e-06, "loss": 0.0608, "step": 24310 }, { "epoch": 0.10145538299772179, "grad_norm": 0.7208438795915895, "learning_rate": 6.279655317423965e-06, "loss": 0.0537, "step": 24315 }, { "epoch": 0.10147624571271206, "grad_norm": 0.9782422416320831, "learning_rate": 6.279009733916648e-06, "loss": 0.0555, "step": 24320 }, { "epoch": 0.10149710842770235, "grad_norm": 0.8875289291629636, "learning_rate": 6.27836434947713e-06, "loss": 0.0497, "step": 24325 }, { "epoch": 0.10151797114269262, "grad_norm": 0.9794482875858063, "learning_rate": 6.2777191640031295e-06, "loss": 0.04, "step": 24330 }, { "epoch": 0.1015388338576829, "grad_norm": 0.5543592239892384, "learning_rate": 6.277074177392434e-06, "loss": 0.0345, "step": 24335 }, { "epoch": 0.10155969657267318, "grad_norm": 1.756145778248177, "learning_rate": 6.2764293895429044e-06, "loss": 0.0578, "step": 24340 }, { "epoch": 0.10158055928766346, "grad_norm": 1.8208587912910261, "learning_rate": 6.275784800352479e-06, "loss": 0.0475, "step": 24345 }, { "epoch": 0.10160142200265374, "grad_norm": 1.1779347972222614, "learning_rate": 6.275140409719167e-06, "loss": 0.0495, "step": 24350 }, { "epoch": 0.10162228471764402, "grad_norm": 1.5991796581756983, "learning_rate": 6.27449621754105e-06, "loss": 0.0494, "step": 24355 }, { "epoch": 0.10164314743263429, "grad_norm": 1.1524570441485997, "learning_rate": 6.273852223716284e-06, "loss": 0.0525, "step": 24360 }, { "epoch": 0.10166401014762456, "grad_norm": 1.3300015090205337, "learning_rate": 6.2732084281431e-06, "loss": 0.0525, "step": 24365 }, { "epoch": 0.10168487286261485, "grad_norm": 1.5899193391446231, "learning_rate": 6.2725648307198005e-06, "loss": 0.0509, "step": 24370 }, { "epoch": 0.10170573557760512, "grad_norm": 1.0618764005369488, "learning_rate": 6.271921431344759e-06, "loss": 0.0443, "step": 24375 }, { "epoch": 0.10172659829259541, "grad_norm": 0.8984500280896778, "learning_rate": 6.271278229916423e-06, "loss": 0.0534, "step": 24380 }, { "epoch": 0.10174746100758568, "grad_norm": 1.1129279264876488, "learning_rate": 6.270635226333317e-06, "loss": 0.053, "step": 24385 }, { "epoch": 0.10176832372257597, "grad_norm": 0.8064252045165515, "learning_rate": 6.269992420494032e-06, "loss": 0.0363, "step": 24390 }, { "epoch": 0.10178918643756624, "grad_norm": 0.9975749589499409, "learning_rate": 6.269349812297237e-06, "loss": 0.0362, "step": 24395 }, { "epoch": 0.10181004915255652, "grad_norm": 1.0363709916347388, "learning_rate": 6.268707401641667e-06, "loss": 0.041, "step": 24400 }, { "epoch": 0.1018309118675468, "grad_norm": 1.0009074357392913, "learning_rate": 6.26806518842614e-06, "loss": 0.0373, "step": 24405 }, { "epoch": 0.10185177458253707, "grad_norm": 1.0545458088609319, "learning_rate": 6.267423172549536e-06, "loss": 0.0501, "step": 24410 }, { "epoch": 0.10187263729752735, "grad_norm": 0.882666345203957, "learning_rate": 6.266781353910815e-06, "loss": 0.0469, "step": 24415 }, { "epoch": 0.10189350001251762, "grad_norm": 0.9257143282296131, "learning_rate": 6.266139732409003e-06, "loss": 0.0407, "step": 24420 }, { "epoch": 0.10191436272750791, "grad_norm": 0.9687804794001785, "learning_rate": 6.265498307943205e-06, "loss": 0.056, "step": 24425 }, { "epoch": 0.10193522544249818, "grad_norm": 1.5109956173294017, "learning_rate": 6.264857080412595e-06, "loss": 0.0499, "step": 24430 }, { "epoch": 0.10195608815748847, "grad_norm": 0.9451148654717666, "learning_rate": 6.264216049716418e-06, "loss": 0.0532, "step": 24435 }, { "epoch": 0.10197695087247874, "grad_norm": 1.2454057563163377, "learning_rate": 6.263575215753992e-06, "loss": 0.0504, "step": 24440 }, { "epoch": 0.10199781358746902, "grad_norm": 1.5999739517046567, "learning_rate": 6.262934578424709e-06, "loss": 0.0443, "step": 24445 }, { "epoch": 0.1020186763024593, "grad_norm": 1.2050662210248209, "learning_rate": 6.262294137628031e-06, "loss": 0.0466, "step": 24450 }, { "epoch": 0.10203953901744957, "grad_norm": 0.7948980712435324, "learning_rate": 6.261653893263492e-06, "loss": 0.0429, "step": 24455 }, { "epoch": 0.10206040173243985, "grad_norm": 1.0774931806714945, "learning_rate": 6.261013845230698e-06, "loss": 0.0463, "step": 24460 }, { "epoch": 0.10208126444743013, "grad_norm": 0.7380531171877731, "learning_rate": 6.260373993429328e-06, "loss": 0.0527, "step": 24465 }, { "epoch": 0.10210212716242041, "grad_norm": 1.0897457768327368, "learning_rate": 6.259734337759134e-06, "loss": 0.0537, "step": 24470 }, { "epoch": 0.10212298987741068, "grad_norm": 1.1610809917695035, "learning_rate": 6.259094878119933e-06, "loss": 0.0464, "step": 24475 }, { "epoch": 0.10214385259240097, "grad_norm": 1.3100898181277136, "learning_rate": 6.258455614411623e-06, "loss": 0.0423, "step": 24480 }, { "epoch": 0.10216471530739124, "grad_norm": 1.738954649755846, "learning_rate": 6.257816546534165e-06, "loss": 0.0495, "step": 24485 }, { "epoch": 0.10218557802238153, "grad_norm": 1.1451925480793892, "learning_rate": 6.257177674387599e-06, "loss": 0.0496, "step": 24490 }, { "epoch": 0.1022064407373718, "grad_norm": 1.1916001013075836, "learning_rate": 6.256538997872029e-06, "loss": 0.0324, "step": 24495 }, { "epoch": 0.10222730345236207, "grad_norm": 0.7567253490465313, "learning_rate": 6.255900516887637e-06, "loss": 0.0533, "step": 24500 }, { "epoch": 0.10224816616735236, "grad_norm": 1.1837844501683403, "learning_rate": 6.255262231334671e-06, "loss": 0.0493, "step": 24505 }, { "epoch": 0.10226902888234263, "grad_norm": 1.633684994646343, "learning_rate": 6.254624141113455e-06, "loss": 0.059, "step": 24510 }, { "epoch": 0.10228989159733291, "grad_norm": 3.1591398419921615, "learning_rate": 6.253986246124378e-06, "loss": 0.0724, "step": 24515 }, { "epoch": 0.10231075431232318, "grad_norm": 3.2304817249886346, "learning_rate": 6.25334854626791e-06, "loss": 0.0377, "step": 24520 }, { "epoch": 0.10233161702731347, "grad_norm": 0.9481177829671372, "learning_rate": 6.252711041444581e-06, "loss": 0.0452, "step": 24525 }, { "epoch": 0.10235247974230374, "grad_norm": 2.4347427748475883, "learning_rate": 6.2520737315549975e-06, "loss": 0.0574, "step": 24530 }, { "epoch": 0.10237334245729403, "grad_norm": 0.6617889145035358, "learning_rate": 6.251436616499838e-06, "loss": 0.0494, "step": 24535 }, { "epoch": 0.1023942051722843, "grad_norm": 1.3441461321477413, "learning_rate": 6.25079969617985e-06, "loss": 0.0549, "step": 24540 }, { "epoch": 0.10241506788727457, "grad_norm": 1.3714551958811056, "learning_rate": 6.25016297049585e-06, "loss": 0.0527, "step": 24545 }, { "epoch": 0.10243593060226486, "grad_norm": 1.2543088034070646, "learning_rate": 6.249526439348728e-06, "loss": 0.0547, "step": 24550 }, { "epoch": 0.10245679331725513, "grad_norm": 1.0701155174134132, "learning_rate": 6.248890102639444e-06, "loss": 0.0499, "step": 24555 }, { "epoch": 0.10247765603224541, "grad_norm": 0.895934288606424, "learning_rate": 6.2482539602690285e-06, "loss": 0.0414, "step": 24560 }, { "epoch": 0.10249851874723569, "grad_norm": 0.8152000751591698, "learning_rate": 6.247618012138582e-06, "loss": 0.0379, "step": 24565 }, { "epoch": 0.10251938146222597, "grad_norm": 0.3926169120913383, "learning_rate": 6.2469822581492755e-06, "loss": 0.0422, "step": 24570 }, { "epoch": 0.10254024417721624, "grad_norm": 0.5433183604810279, "learning_rate": 6.2463466982023515e-06, "loss": 0.0358, "step": 24575 }, { "epoch": 0.10256110689220653, "grad_norm": 0.9355238231441156, "learning_rate": 6.24571133219912e-06, "loss": 0.045, "step": 24580 }, { "epoch": 0.1025819696071968, "grad_norm": 1.723318444495844, "learning_rate": 6.2450761600409655e-06, "loss": 0.0423, "step": 24585 }, { "epoch": 0.10260283232218707, "grad_norm": 1.460615336459341, "learning_rate": 6.24444118162934e-06, "loss": 0.0451, "step": 24590 }, { "epoch": 0.10262369503717736, "grad_norm": 1.04416706733206, "learning_rate": 6.243806396865766e-06, "loss": 0.0478, "step": 24595 }, { "epoch": 0.10264455775216763, "grad_norm": 1.3497979500037707, "learning_rate": 6.243171805651834e-06, "loss": 0.0568, "step": 24600 }, { "epoch": 0.10266542046715792, "grad_norm": 1.2181152352940878, "learning_rate": 6.242537407889209e-06, "loss": 0.0497, "step": 24605 }, { "epoch": 0.10268628318214819, "grad_norm": 1.1211105865956548, "learning_rate": 6.2419032034796224e-06, "loss": 0.048, "step": 24610 }, { "epoch": 0.10270714589713847, "grad_norm": 0.9510873560683236, "learning_rate": 6.241269192324877e-06, "loss": 0.0504, "step": 24615 }, { "epoch": 0.10272800861212875, "grad_norm": 0.7437100911074265, "learning_rate": 6.240635374326845e-06, "loss": 0.0584, "step": 24620 }, { "epoch": 0.10274887132711903, "grad_norm": 1.2728205275945526, "learning_rate": 6.24000174938747e-06, "loss": 0.0588, "step": 24625 }, { "epoch": 0.1027697340421093, "grad_norm": 1.0802312756110584, "learning_rate": 6.23936831740876e-06, "loss": 0.0504, "step": 24630 }, { "epoch": 0.10279059675709958, "grad_norm": 1.1059964151075117, "learning_rate": 6.2387350782927994e-06, "loss": 0.0497, "step": 24635 }, { "epoch": 0.10281145947208986, "grad_norm": 0.9659980019835969, "learning_rate": 6.238102031941737e-06, "loss": 0.0468, "step": 24640 }, { "epoch": 0.10283232218708013, "grad_norm": 1.0275509411027317, "learning_rate": 6.237469178257794e-06, "loss": 0.0376, "step": 24645 }, { "epoch": 0.10285318490207042, "grad_norm": 1.1278476276293605, "learning_rate": 6.23683651714326e-06, "loss": 0.0462, "step": 24650 }, { "epoch": 0.10287404761706069, "grad_norm": 1.2885482562487849, "learning_rate": 6.236204048500495e-06, "loss": 0.0492, "step": 24655 }, { "epoch": 0.10289491033205098, "grad_norm": 1.2353025277031688, "learning_rate": 6.235571772231925e-06, "loss": 0.0477, "step": 24660 }, { "epoch": 0.10291577304704125, "grad_norm": 1.0675046569308497, "learning_rate": 6.234939688240049e-06, "loss": 0.0436, "step": 24665 }, { "epoch": 0.10293663576203153, "grad_norm": 1.2381063171762625, "learning_rate": 6.2343077964274345e-06, "loss": 0.0423, "step": 24670 }, { "epoch": 0.1029574984770218, "grad_norm": 0.8229726049067824, "learning_rate": 6.233676096696716e-06, "loss": 0.0469, "step": 24675 }, { "epoch": 0.10297836119201208, "grad_norm": 0.9504742838872013, "learning_rate": 6.2330445889505995e-06, "loss": 0.0619, "step": 24680 }, { "epoch": 0.10299922390700236, "grad_norm": 0.7621446642228991, "learning_rate": 6.23241327309186e-06, "loss": 0.0465, "step": 24685 }, { "epoch": 0.10302008662199263, "grad_norm": 1.4464858649053465, "learning_rate": 6.231782149023336e-06, "loss": 0.0436, "step": 24690 }, { "epoch": 0.10304094933698292, "grad_norm": 0.7895927237496069, "learning_rate": 6.231151216647943e-06, "loss": 0.0543, "step": 24695 }, { "epoch": 0.10306181205197319, "grad_norm": 1.2880309660003282, "learning_rate": 6.230520475868661e-06, "loss": 0.0485, "step": 24700 }, { "epoch": 0.10308267476696348, "grad_norm": 1.049762605643208, "learning_rate": 6.2298899265885375e-06, "loss": 0.0448, "step": 24705 }, { "epoch": 0.10310353748195375, "grad_norm": 1.616984276040133, "learning_rate": 6.229259568710692e-06, "loss": 0.0574, "step": 24710 }, { "epoch": 0.10312440019694404, "grad_norm": 7.362367593496256, "learning_rate": 6.228629402138311e-06, "loss": 0.0565, "step": 24715 }, { "epoch": 0.10314526291193431, "grad_norm": 1.5432070117281003, "learning_rate": 6.227999426774648e-06, "loss": 0.042, "step": 24720 }, { "epoch": 0.10316612562692458, "grad_norm": 0.5973580836059642, "learning_rate": 6.227369642523026e-06, "loss": 0.041, "step": 24725 }, { "epoch": 0.10318698834191486, "grad_norm": 0.6209315959015965, "learning_rate": 6.2267400492868426e-06, "loss": 0.0298, "step": 24730 }, { "epoch": 0.10320785105690514, "grad_norm": 1.0925920955666149, "learning_rate": 6.226110646969551e-06, "loss": 0.0388, "step": 24735 }, { "epoch": 0.10322871377189542, "grad_norm": 0.9495812848648886, "learning_rate": 6.225481435474683e-06, "loss": 0.0426, "step": 24740 }, { "epoch": 0.1032495764868857, "grad_norm": 1.366251688364179, "learning_rate": 6.224852414705835e-06, "loss": 0.0804, "step": 24745 }, { "epoch": 0.10327043920187598, "grad_norm": 1.040855470640818, "learning_rate": 6.224223584566672e-06, "loss": 0.0612, "step": 24750 }, { "epoch": 0.10329130191686625, "grad_norm": 0.724978486631753, "learning_rate": 6.223594944960927e-06, "loss": 0.0505, "step": 24755 }, { "epoch": 0.10331216463185654, "grad_norm": 1.375124536425697, "learning_rate": 6.2229664957924015e-06, "loss": 0.0483, "step": 24760 }, { "epoch": 0.10333302734684681, "grad_norm": 1.3819306321692242, "learning_rate": 6.222338236964964e-06, "loss": 0.0465, "step": 24765 }, { "epoch": 0.10335389006183708, "grad_norm": 0.9073927745806153, "learning_rate": 6.221710168382552e-06, "loss": 0.0587, "step": 24770 }, { "epoch": 0.10337475277682737, "grad_norm": 0.7666153318157546, "learning_rate": 6.221082289949171e-06, "loss": 0.0609, "step": 24775 }, { "epoch": 0.10339561549181764, "grad_norm": 1.0908808119835756, "learning_rate": 6.220454601568892e-06, "loss": 0.0555, "step": 24780 }, { "epoch": 0.10341647820680792, "grad_norm": 1.0903057356732155, "learning_rate": 6.2198271031458556e-06, "loss": 0.0393, "step": 24785 }, { "epoch": 0.1034373409217982, "grad_norm": 3.8033666056662887, "learning_rate": 6.219199794584272e-06, "loss": 0.1114, "step": 24790 }, { "epoch": 0.10345820363678848, "grad_norm": 1.2764714866788645, "learning_rate": 6.218572675788413e-06, "loss": 0.0487, "step": 24795 }, { "epoch": 0.10347906635177875, "grad_norm": 0.8005946981307965, "learning_rate": 6.217945746662627e-06, "loss": 0.0423, "step": 24800 }, { "epoch": 0.10349992906676904, "grad_norm": 0.971051757951178, "learning_rate": 6.217319007111323e-06, "loss": 0.0536, "step": 24805 }, { "epoch": 0.10352079178175931, "grad_norm": 1.0634634334518833, "learning_rate": 6.216692457038976e-06, "loss": 0.0558, "step": 24810 }, { "epoch": 0.10354165449674958, "grad_norm": 0.8848186013075077, "learning_rate": 6.216066096350135e-06, "loss": 0.0465, "step": 24815 }, { "epoch": 0.10356251721173987, "grad_norm": 1.1929940361595133, "learning_rate": 6.2154399249494115e-06, "loss": 0.0612, "step": 24820 }, { "epoch": 0.10358337992673014, "grad_norm": 1.0839400039925497, "learning_rate": 6.214813942741487e-06, "loss": 0.07, "step": 24825 }, { "epoch": 0.10360424264172043, "grad_norm": 1.365040135032933, "learning_rate": 6.214188149631106e-06, "loss": 0.0409, "step": 24830 }, { "epoch": 0.1036251053567107, "grad_norm": 2.4578417892383566, "learning_rate": 6.213562545523086e-06, "loss": 0.0777, "step": 24835 }, { "epoch": 0.10364596807170098, "grad_norm": 1.5510944943300764, "learning_rate": 6.212937130322306e-06, "loss": 0.0452, "step": 24840 }, { "epoch": 0.10366683078669126, "grad_norm": 1.2990511891998655, "learning_rate": 6.212311903933717e-06, "loss": 0.0464, "step": 24845 }, { "epoch": 0.10368769350168154, "grad_norm": 1.0898613349866197, "learning_rate": 6.2116868662623335e-06, "loss": 0.0507, "step": 24850 }, { "epoch": 0.10370855621667181, "grad_norm": 0.9306647337167495, "learning_rate": 6.211062017213237e-06, "loss": 0.0468, "step": 24855 }, { "epoch": 0.10372941893166208, "grad_norm": 0.610003858676106, "learning_rate": 6.2104373566915756e-06, "loss": 0.0625, "step": 24860 }, { "epoch": 0.10375028164665237, "grad_norm": 0.9840512988076243, "learning_rate": 6.209812884602568e-06, "loss": 0.0461, "step": 24865 }, { "epoch": 0.10377114436164264, "grad_norm": 1.7613125649239387, "learning_rate": 6.209188600851496e-06, "loss": 0.0596, "step": 24870 }, { "epoch": 0.10379200707663293, "grad_norm": 1.7064113559926246, "learning_rate": 6.208564505343707e-06, "loss": 0.0563, "step": 24875 }, { "epoch": 0.1038128697916232, "grad_norm": 1.173123730744085, "learning_rate": 6.20794059798462e-06, "loss": 0.0557, "step": 24880 }, { "epoch": 0.10383373250661349, "grad_norm": 1.0542735337941413, "learning_rate": 6.207316878679715e-06, "loss": 0.0362, "step": 24885 }, { "epoch": 0.10385459522160376, "grad_norm": 0.9155670110503722, "learning_rate": 6.206693347334541e-06, "loss": 0.0378, "step": 24890 }, { "epoch": 0.10387545793659404, "grad_norm": 0.8944715193257171, "learning_rate": 6.206070003854714e-06, "loss": 0.047, "step": 24895 }, { "epoch": 0.10389632065158431, "grad_norm": 0.7392327468383668, "learning_rate": 6.2054468481459154e-06, "loss": 0.0438, "step": 24900 }, { "epoch": 0.10391718336657459, "grad_norm": 1.2173495954570293, "learning_rate": 6.204823880113892e-06, "loss": 0.0477, "step": 24905 }, { "epoch": 0.10393804608156487, "grad_norm": 0.633209391939322, "learning_rate": 6.20420109966446e-06, "loss": 0.0375, "step": 24910 }, { "epoch": 0.10395890879655514, "grad_norm": 1.5201762901688496, "learning_rate": 6.203578506703497e-06, "loss": 0.0632, "step": 24915 }, { "epoch": 0.10397977151154543, "grad_norm": 1.159755642793415, "learning_rate": 6.202956101136951e-06, "loss": 0.0404, "step": 24920 }, { "epoch": 0.1040006342265357, "grad_norm": 1.0249810137084894, "learning_rate": 6.202333882870836e-06, "loss": 0.0508, "step": 24925 }, { "epoch": 0.10402149694152599, "grad_norm": 1.0623309549795923, "learning_rate": 6.2017118518112265e-06, "loss": 0.0427, "step": 24930 }, { "epoch": 0.10404235965651626, "grad_norm": 1.0286905274759721, "learning_rate": 6.201090007864269e-06, "loss": 0.0571, "step": 24935 }, { "epoch": 0.10406322237150654, "grad_norm": 0.9765062666408683, "learning_rate": 6.200468350936174e-06, "loss": 0.0371, "step": 24940 }, { "epoch": 0.10408408508649682, "grad_norm": 0.9927348818039837, "learning_rate": 6.199846880933216e-06, "loss": 0.0597, "step": 24945 }, { "epoch": 0.10410494780148709, "grad_norm": 0.9438615596064797, "learning_rate": 6.199225597761737e-06, "loss": 0.0427, "step": 24950 }, { "epoch": 0.10412581051647737, "grad_norm": 1.339857779651184, "learning_rate": 6.198604501328146e-06, "loss": 0.0521, "step": 24955 }, { "epoch": 0.10414667323146765, "grad_norm": 0.9718901252809014, "learning_rate": 6.197983591538914e-06, "loss": 0.0505, "step": 24960 }, { "epoch": 0.10416753594645793, "grad_norm": 1.0297304643739544, "learning_rate": 6.197362868300581e-06, "loss": 0.0552, "step": 24965 }, { "epoch": 0.1041883986614482, "grad_norm": 0.6444616646556696, "learning_rate": 6.19674233151975e-06, "loss": 0.041, "step": 24970 }, { "epoch": 0.10420926137643849, "grad_norm": 1.0505859488276632, "learning_rate": 6.196121981103092e-06, "loss": 0.0424, "step": 24975 }, { "epoch": 0.10423012409142876, "grad_norm": 1.1502551776697134, "learning_rate": 6.19550181695734e-06, "loss": 0.0398, "step": 24980 }, { "epoch": 0.10425098680641905, "grad_norm": 0.8634125329240758, "learning_rate": 6.1948818389892965e-06, "loss": 0.0424, "step": 24985 }, { "epoch": 0.10427184952140932, "grad_norm": 0.5870986531110222, "learning_rate": 6.1942620471058234e-06, "loss": 0.045, "step": 24990 }, { "epoch": 0.10429271223639959, "grad_norm": 0.9597565840287153, "learning_rate": 6.193642441213854e-06, "loss": 0.0577, "step": 24995 }, { "epoch": 0.10431357495138988, "grad_norm": 0.7209339728856596, "learning_rate": 6.193023021220385e-06, "loss": 0.0383, "step": 25000 }, { "epoch": 0.10433443766638015, "grad_norm": 0.5825491429381392, "learning_rate": 6.192403787032475e-06, "loss": 0.0401, "step": 25005 }, { "epoch": 0.10435530038137043, "grad_norm": 0.8230243061907726, "learning_rate": 6.191784738557251e-06, "loss": 0.0444, "step": 25010 }, { "epoch": 0.1043761630963607, "grad_norm": 1.4544183129655455, "learning_rate": 6.191165875701907e-06, "loss": 0.0439, "step": 25015 }, { "epoch": 0.10439702581135099, "grad_norm": 1.3298619334699402, "learning_rate": 6.190547198373693e-06, "loss": 0.0383, "step": 25020 }, { "epoch": 0.10441788852634126, "grad_norm": 0.8494828889594299, "learning_rate": 6.189928706479934e-06, "loss": 0.0266, "step": 25025 }, { "epoch": 0.10443875124133155, "grad_norm": 1.5630048309773104, "learning_rate": 6.189310399928015e-06, "loss": 0.0539, "step": 25030 }, { "epoch": 0.10445961395632182, "grad_norm": 0.9429051663950244, "learning_rate": 6.1886922786253855e-06, "loss": 0.048, "step": 25035 }, { "epoch": 0.10448047667131209, "grad_norm": 0.9697258172444643, "learning_rate": 6.188074342479561e-06, "loss": 0.0434, "step": 25040 }, { "epoch": 0.10450133938630238, "grad_norm": 0.830555350597004, "learning_rate": 6.18745659139812e-06, "loss": 0.0511, "step": 25045 }, { "epoch": 0.10452220210129265, "grad_norm": 3.057993125844297, "learning_rate": 6.186839025288708e-06, "loss": 0.058, "step": 25050 }, { "epoch": 0.10454306481628294, "grad_norm": 1.2957878913376422, "learning_rate": 6.186221644059034e-06, "loss": 0.0429, "step": 25055 }, { "epoch": 0.10456392753127321, "grad_norm": 1.5960163594710366, "learning_rate": 6.18560444761687e-06, "loss": 0.042, "step": 25060 }, { "epoch": 0.10458479024626349, "grad_norm": 1.2788923160123526, "learning_rate": 6.184987435870053e-06, "loss": 0.0576, "step": 25065 }, { "epoch": 0.10460565296125376, "grad_norm": 0.7352379291201547, "learning_rate": 6.184370608726488e-06, "loss": 0.045, "step": 25070 }, { "epoch": 0.10462651567624405, "grad_norm": 1.1905821596077735, "learning_rate": 6.1837539660941385e-06, "loss": 0.0567, "step": 25075 }, { "epoch": 0.10464737839123432, "grad_norm": 0.766489384620778, "learning_rate": 6.1831375078810345e-06, "loss": 0.0374, "step": 25080 }, { "epoch": 0.1046682411062246, "grad_norm": 0.9921755587493274, "learning_rate": 6.182521233995273e-06, "loss": 0.0517, "step": 25085 }, { "epoch": 0.10468910382121488, "grad_norm": 1.334713242725241, "learning_rate": 6.181905144345011e-06, "loss": 0.0508, "step": 25090 }, { "epoch": 0.10470996653620515, "grad_norm": 1.1488086208545158, "learning_rate": 6.181289238838472e-06, "loss": 0.0557, "step": 25095 }, { "epoch": 0.10473082925119544, "grad_norm": 0.7938495976708545, "learning_rate": 6.180673517383942e-06, "loss": 0.0433, "step": 25100 }, { "epoch": 0.10475169196618571, "grad_norm": 1.0656225552617529, "learning_rate": 6.1800579798897705e-06, "loss": 0.0491, "step": 25105 }, { "epoch": 0.104772554681176, "grad_norm": 1.091441119196744, "learning_rate": 6.179442626264376e-06, "loss": 0.049, "step": 25110 }, { "epoch": 0.10479341739616627, "grad_norm": 0.6510538884007098, "learning_rate": 6.178827456416234e-06, "loss": 0.0377, "step": 25115 }, { "epoch": 0.10481428011115655, "grad_norm": 1.76775742257528, "learning_rate": 6.1782124702538855e-06, "loss": 0.0586, "step": 25120 }, { "epoch": 0.10483514282614682, "grad_norm": 1.458810616683472, "learning_rate": 6.177597667685941e-06, "loss": 0.0581, "step": 25125 }, { "epoch": 0.1048560055411371, "grad_norm": 1.072783017356283, "learning_rate": 6.176983048621065e-06, "loss": 0.0376, "step": 25130 }, { "epoch": 0.10487686825612738, "grad_norm": 0.9758734830864895, "learning_rate": 6.176368612967991e-06, "loss": 0.0397, "step": 25135 }, { "epoch": 0.10489773097111765, "grad_norm": 1.1485268315760315, "learning_rate": 6.175754360635519e-06, "loss": 0.0454, "step": 25140 }, { "epoch": 0.10491859368610794, "grad_norm": 1.010671544967426, "learning_rate": 6.175140291532508e-06, "loss": 0.0485, "step": 25145 }, { "epoch": 0.10493945640109821, "grad_norm": 1.1192322591062664, "learning_rate": 6.1745264055678796e-06, "loss": 0.0431, "step": 25150 }, { "epoch": 0.1049603191160885, "grad_norm": 1.0069553959067887, "learning_rate": 6.173912702650623e-06, "loss": 0.0477, "step": 25155 }, { "epoch": 0.10498118183107877, "grad_norm": 1.1427352933253772, "learning_rate": 6.173299182689786e-06, "loss": 0.0534, "step": 25160 }, { "epoch": 0.10500204454606905, "grad_norm": 1.5351237623902687, "learning_rate": 6.172685845594484e-06, "loss": 0.0561, "step": 25165 }, { "epoch": 0.10502290726105933, "grad_norm": 0.6842076090904772, "learning_rate": 6.172072691273892e-06, "loss": 0.0424, "step": 25170 }, { "epoch": 0.1050437699760496, "grad_norm": 0.8016271352495026, "learning_rate": 6.171459719637253e-06, "loss": 0.0319, "step": 25175 }, { "epoch": 0.10506463269103988, "grad_norm": 0.9778779602146115, "learning_rate": 6.170846930593866e-06, "loss": 0.0499, "step": 25180 }, { "epoch": 0.10508549540603015, "grad_norm": 8.0287190751412, "learning_rate": 6.170234324053099e-06, "loss": 0.0478, "step": 25185 }, { "epoch": 0.10510635812102044, "grad_norm": 0.9216434302337185, "learning_rate": 6.169621899924381e-06, "loss": 0.0504, "step": 25190 }, { "epoch": 0.10512722083601071, "grad_norm": 0.8087643885131155, "learning_rate": 6.169009658117203e-06, "loss": 0.0387, "step": 25195 }, { "epoch": 0.105148083551001, "grad_norm": 0.8828298772972333, "learning_rate": 6.1683975985411194e-06, "loss": 0.0459, "step": 25200 }, { "epoch": 0.10516894626599127, "grad_norm": 1.2514933300891127, "learning_rate": 6.167785721105749e-06, "loss": 0.0539, "step": 25205 }, { "epoch": 0.10518980898098156, "grad_norm": 0.7204168636489937, "learning_rate": 6.16717402572077e-06, "loss": 0.0398, "step": 25210 }, { "epoch": 0.10521067169597183, "grad_norm": 1.057518689484874, "learning_rate": 6.166562512295929e-06, "loss": 0.0373, "step": 25215 }, { "epoch": 0.1052315344109621, "grad_norm": 1.992254536106383, "learning_rate": 6.165951180741026e-06, "loss": 0.0519, "step": 25220 }, { "epoch": 0.10525239712595238, "grad_norm": 1.40746868210433, "learning_rate": 6.165340030965934e-06, "loss": 0.0538, "step": 25225 }, { "epoch": 0.10527325984094266, "grad_norm": 0.8188347979005403, "learning_rate": 6.164729062880581e-06, "loss": 0.0488, "step": 25230 }, { "epoch": 0.10529412255593294, "grad_norm": 0.8767702483645519, "learning_rate": 6.164118276394961e-06, "loss": 0.0381, "step": 25235 }, { "epoch": 0.10531498527092321, "grad_norm": 0.9260930820643859, "learning_rate": 6.16350767141913e-06, "loss": 0.0614, "step": 25240 }, { "epoch": 0.1053358479859135, "grad_norm": 1.868551980478146, "learning_rate": 6.162897247863204e-06, "loss": 0.043, "step": 25245 }, { "epoch": 0.10535671070090377, "grad_norm": 0.936564382035435, "learning_rate": 6.162287005637365e-06, "loss": 0.0468, "step": 25250 }, { "epoch": 0.10537757341589406, "grad_norm": 0.7014559933234772, "learning_rate": 6.161676944651855e-06, "loss": 0.0343, "step": 25255 }, { "epoch": 0.10539843613088433, "grad_norm": 0.9722572429820653, "learning_rate": 6.161067064816979e-06, "loss": 0.036, "step": 25260 }, { "epoch": 0.1054192988458746, "grad_norm": 4.625982352316313, "learning_rate": 6.160457366043102e-06, "loss": 0.0499, "step": 25265 }, { "epoch": 0.10544016156086489, "grad_norm": 0.9641277537227076, "learning_rate": 6.1598478482406546e-06, "loss": 0.0327, "step": 25270 }, { "epoch": 0.10546102427585516, "grad_norm": 0.624321797003279, "learning_rate": 6.159238511320126e-06, "loss": 0.0469, "step": 25275 }, { "epoch": 0.10548188699084544, "grad_norm": 1.3982733709301556, "learning_rate": 6.15862935519207e-06, "loss": 0.0475, "step": 25280 }, { "epoch": 0.10550274970583572, "grad_norm": 1.193220454707697, "learning_rate": 6.158020379767102e-06, "loss": 0.0526, "step": 25285 }, { "epoch": 0.105523612420826, "grad_norm": 1.1236793040935775, "learning_rate": 6.157411584955897e-06, "loss": 0.0571, "step": 25290 }, { "epoch": 0.10554447513581627, "grad_norm": 1.1401852468642153, "learning_rate": 6.1568029706691955e-06, "loss": 0.0421, "step": 25295 }, { "epoch": 0.10556533785080656, "grad_norm": 0.923558352814021, "learning_rate": 6.156194536817795e-06, "loss": 0.0719, "step": 25300 }, { "epoch": 0.10558620056579683, "grad_norm": 1.6943055014423356, "learning_rate": 6.155586283312559e-06, "loss": 0.0698, "step": 25305 }, { "epoch": 0.1056070632807871, "grad_norm": 1.2444446915697744, "learning_rate": 6.154978210064412e-06, "loss": 0.042, "step": 25310 }, { "epoch": 0.10562792599577739, "grad_norm": 0.9024736527018289, "learning_rate": 6.154370316984336e-06, "loss": 0.0437, "step": 25315 }, { "epoch": 0.10564878871076766, "grad_norm": 4.651130395410068, "learning_rate": 6.153762603983378e-06, "loss": 0.0516, "step": 25320 }, { "epoch": 0.10566965142575795, "grad_norm": 1.463726647232538, "learning_rate": 6.153155070972649e-06, "loss": 0.0609, "step": 25325 }, { "epoch": 0.10569051414074822, "grad_norm": 1.0783518099752187, "learning_rate": 6.152547717863317e-06, "loss": 0.0599, "step": 25330 }, { "epoch": 0.1057113768557385, "grad_norm": 0.7239046910964755, "learning_rate": 6.151940544566612e-06, "loss": 0.0362, "step": 25335 }, { "epoch": 0.10573223957072878, "grad_norm": 1.4868081762218068, "learning_rate": 6.151333550993825e-06, "loss": 0.0528, "step": 25340 }, { "epoch": 0.10575310228571906, "grad_norm": 1.230847853220983, "learning_rate": 6.150726737056313e-06, "loss": 0.0444, "step": 25345 }, { "epoch": 0.10577396500070933, "grad_norm": 0.8035330114668644, "learning_rate": 6.150120102665488e-06, "loss": 0.0459, "step": 25350 }, { "epoch": 0.1057948277156996, "grad_norm": 1.407243248683294, "learning_rate": 6.149513647732827e-06, "loss": 0.0474, "step": 25355 }, { "epoch": 0.10581569043068989, "grad_norm": 1.1721411961134571, "learning_rate": 6.148907372169865e-06, "loss": 0.0402, "step": 25360 }, { "epoch": 0.10583655314568016, "grad_norm": 1.1280232986279408, "learning_rate": 6.148301275888201e-06, "loss": 0.0558, "step": 25365 }, { "epoch": 0.10585741586067045, "grad_norm": 1.2163810160587154, "learning_rate": 6.147695358799495e-06, "loss": 0.0507, "step": 25370 }, { "epoch": 0.10587827857566072, "grad_norm": 0.7880753049666501, "learning_rate": 6.147089620815464e-06, "loss": 0.0465, "step": 25375 }, { "epoch": 0.105899141290651, "grad_norm": 1.2686230971549626, "learning_rate": 6.14648406184789e-06, "loss": 0.0506, "step": 25380 }, { "epoch": 0.10592000400564128, "grad_norm": 0.9069398083408409, "learning_rate": 6.1458786818086166e-06, "loss": 0.052, "step": 25385 }, { "epoch": 0.10594086672063156, "grad_norm": 1.5499405625187321, "learning_rate": 6.145273480609544e-06, "loss": 0.0368, "step": 25390 }, { "epoch": 0.10596172943562183, "grad_norm": 1.2768746139716842, "learning_rate": 6.144668458162634e-06, "loss": 0.0445, "step": 25395 }, { "epoch": 0.1059825921506121, "grad_norm": 0.9738766655224438, "learning_rate": 6.144063614379913e-06, "loss": 0.0422, "step": 25400 }, { "epoch": 0.10600345486560239, "grad_norm": 1.1903190264995294, "learning_rate": 6.143458949173463e-06, "loss": 0.0487, "step": 25405 }, { "epoch": 0.10602431758059266, "grad_norm": 1.6084276430560305, "learning_rate": 6.1428544624554296e-06, "loss": 0.0854, "step": 25410 }, { "epoch": 0.10604518029558295, "grad_norm": 1.3382120591484639, "learning_rate": 6.142250154138019e-06, "loss": 0.0538, "step": 25415 }, { "epoch": 0.10606604301057322, "grad_norm": 1.0376011467291535, "learning_rate": 6.141646024133494e-06, "loss": 0.0525, "step": 25420 }, { "epoch": 0.10608690572556351, "grad_norm": 0.9053794131378718, "learning_rate": 6.141042072354184e-06, "loss": 0.0469, "step": 25425 }, { "epoch": 0.10610776844055378, "grad_norm": 1.221081661917833, "learning_rate": 6.1404382987124745e-06, "loss": 0.0314, "step": 25430 }, { "epoch": 0.10612863115554406, "grad_norm": 1.404341409965797, "learning_rate": 6.139834703120812e-06, "loss": 0.0452, "step": 25435 }, { "epoch": 0.10614949387053434, "grad_norm": 0.979836997981338, "learning_rate": 6.139231285491702e-06, "loss": 0.0399, "step": 25440 }, { "epoch": 0.10617035658552461, "grad_norm": 0.7329726796511497, "learning_rate": 6.138628045737715e-06, "loss": 0.048, "step": 25445 }, { "epoch": 0.1061912193005149, "grad_norm": 1.556481974902811, "learning_rate": 6.138024983771474e-06, "loss": 0.0564, "step": 25450 }, { "epoch": 0.10621208201550517, "grad_norm": 1.1185549530190908, "learning_rate": 6.137422099505671e-06, "loss": 0.0471, "step": 25455 }, { "epoch": 0.10623294473049545, "grad_norm": 1.0306149380981624, "learning_rate": 6.1368193928530506e-06, "loss": 0.0393, "step": 25460 }, { "epoch": 0.10625380744548572, "grad_norm": 0.5977505028570412, "learning_rate": 6.136216863726419e-06, "loss": 0.0386, "step": 25465 }, { "epoch": 0.10627467016047601, "grad_norm": 0.9142562370432583, "learning_rate": 6.135614512038647e-06, "loss": 0.0492, "step": 25470 }, { "epoch": 0.10629553287546628, "grad_norm": 0.6464801835137611, "learning_rate": 6.135012337702659e-06, "loss": 0.0443, "step": 25475 }, { "epoch": 0.10631639559045655, "grad_norm": 0.9430279735053582, "learning_rate": 6.134410340631443e-06, "loss": 0.0423, "step": 25480 }, { "epoch": 0.10633725830544684, "grad_norm": 0.718424868571003, "learning_rate": 6.133808520738045e-06, "loss": 0.0464, "step": 25485 }, { "epoch": 0.10635812102043711, "grad_norm": 0.7405614557415457, "learning_rate": 6.133206877935573e-06, "loss": 0.0408, "step": 25490 }, { "epoch": 0.1063789837354274, "grad_norm": 0.8236663527663814, "learning_rate": 6.132605412137191e-06, "loss": 0.0484, "step": 25495 }, { "epoch": 0.10639984645041767, "grad_norm": 1.0946525545962997, "learning_rate": 6.132004123256125e-06, "loss": 0.042, "step": 25500 }, { "epoch": 0.10642070916540795, "grad_norm": 0.9387145470842859, "learning_rate": 6.13140301120566e-06, "loss": 0.0528, "step": 25505 }, { "epoch": 0.10644157188039823, "grad_norm": 1.1711699598160585, "learning_rate": 6.130802075899143e-06, "loss": 0.0566, "step": 25510 }, { "epoch": 0.10646243459538851, "grad_norm": 0.6598292446534069, "learning_rate": 6.130201317249975e-06, "loss": 0.044, "step": 25515 }, { "epoch": 0.10648329731037878, "grad_norm": 1.2233385921305804, "learning_rate": 6.129600735171623e-06, "loss": 0.0426, "step": 25520 }, { "epoch": 0.10650416002536905, "grad_norm": 0.8766240508190217, "learning_rate": 6.129000329577606e-06, "loss": 0.0428, "step": 25525 }, { "epoch": 0.10652502274035934, "grad_norm": 0.9461749807417043, "learning_rate": 6.1284001003815074e-06, "loss": 0.0599, "step": 25530 }, { "epoch": 0.10654588545534961, "grad_norm": 1.043107912039932, "learning_rate": 6.12780004749697e-06, "loss": 0.0385, "step": 25535 }, { "epoch": 0.1065667481703399, "grad_norm": 1.6955746675042942, "learning_rate": 6.127200170837693e-06, "loss": 0.0435, "step": 25540 }, { "epoch": 0.10658761088533017, "grad_norm": 1.2336057307995025, "learning_rate": 6.126600470317437e-06, "loss": 0.0475, "step": 25545 }, { "epoch": 0.10660847360032046, "grad_norm": 0.9137094971840849, "learning_rate": 6.126000945850019e-06, "loss": 0.0338, "step": 25550 }, { "epoch": 0.10662933631531073, "grad_norm": 1.007945049482134, "learning_rate": 6.1254015973493185e-06, "loss": 0.0381, "step": 25555 }, { "epoch": 0.10665019903030101, "grad_norm": 1.0993275032353216, "learning_rate": 6.124802424729271e-06, "loss": 0.0526, "step": 25560 }, { "epoch": 0.10667106174529128, "grad_norm": 1.2460067591789896, "learning_rate": 6.124203427903874e-06, "loss": 0.0402, "step": 25565 }, { "epoch": 0.10669192446028156, "grad_norm": 1.1416447569885253, "learning_rate": 6.123604606787179e-06, "loss": 0.0442, "step": 25570 }, { "epoch": 0.10671278717527184, "grad_norm": 1.5462848921286605, "learning_rate": 6.1230059612933025e-06, "loss": 0.0526, "step": 25575 }, { "epoch": 0.10673364989026211, "grad_norm": 0.8988789038039707, "learning_rate": 6.1224074913364154e-06, "loss": 0.0437, "step": 25580 }, { "epoch": 0.1067545126052524, "grad_norm": 0.9194953262972463, "learning_rate": 6.121809196830746e-06, "loss": 0.0596, "step": 25585 }, { "epoch": 0.10677537532024267, "grad_norm": 0.9980345067113635, "learning_rate": 6.121211077690588e-06, "loss": 0.0525, "step": 25590 }, { "epoch": 0.10679623803523296, "grad_norm": 1.434408201031242, "learning_rate": 6.120613133830286e-06, "loss": 0.0473, "step": 25595 }, { "epoch": 0.10681710075022323, "grad_norm": 0.768461911699398, "learning_rate": 6.120015365164248e-06, "loss": 0.0368, "step": 25600 }, { "epoch": 0.10683796346521351, "grad_norm": 1.3147986371027585, "learning_rate": 6.119417771606939e-06, "loss": 0.0562, "step": 25605 }, { "epoch": 0.10685882618020379, "grad_norm": 0.9603745308052127, "learning_rate": 6.118820353072883e-06, "loss": 0.0587, "step": 25610 }, { "epoch": 0.10687968889519406, "grad_norm": 1.1720512359429576, "learning_rate": 6.118223109476662e-06, "loss": 0.0503, "step": 25615 }, { "epoch": 0.10690055161018434, "grad_norm": 1.134545005612737, "learning_rate": 6.117626040732916e-06, "loss": 0.0441, "step": 25620 }, { "epoch": 0.10692141432517462, "grad_norm": 0.6830787286130995, "learning_rate": 6.1170291467563425e-06, "loss": 0.0419, "step": 25625 }, { "epoch": 0.1069422770401649, "grad_norm": 1.308771086838087, "learning_rate": 6.1164324274617e-06, "loss": 0.0539, "step": 25630 }, { "epoch": 0.10696313975515517, "grad_norm": 2.380925013053529, "learning_rate": 6.115835882763803e-06, "loss": 0.0449, "step": 25635 }, { "epoch": 0.10698400247014546, "grad_norm": 2.072456137440257, "learning_rate": 6.1152395125775245e-06, "loss": 0.0463, "step": 25640 }, { "epoch": 0.10700486518513573, "grad_norm": 1.169813035990089, "learning_rate": 6.114643316817797e-06, "loss": 0.0413, "step": 25645 }, { "epoch": 0.10702572790012602, "grad_norm": 2.404547091741639, "learning_rate": 6.114047295399608e-06, "loss": 0.061, "step": 25650 }, { "epoch": 0.10704659061511629, "grad_norm": 1.0613787316541086, "learning_rate": 6.1134514482380045e-06, "loss": 0.0498, "step": 25655 }, { "epoch": 0.10706745333010656, "grad_norm": 0.9214595813710607, "learning_rate": 6.112855775248096e-06, "loss": 0.0395, "step": 25660 }, { "epoch": 0.10708831604509685, "grad_norm": 1.2741287386046742, "learning_rate": 6.11226027634504e-06, "loss": 0.0424, "step": 25665 }, { "epoch": 0.10710917876008712, "grad_norm": 1.1644764843807576, "learning_rate": 6.11166495144406e-06, "loss": 0.0461, "step": 25670 }, { "epoch": 0.1071300414750774, "grad_norm": 0.825365639221874, "learning_rate": 6.111069800460437e-06, "loss": 0.049, "step": 25675 }, { "epoch": 0.10715090419006768, "grad_norm": 1.0643871876473812, "learning_rate": 6.110474823309504e-06, "loss": 0.0459, "step": 25680 }, { "epoch": 0.10717176690505796, "grad_norm": 1.1295454441158155, "learning_rate": 6.1098800199066575e-06, "loss": 0.0442, "step": 25685 }, { "epoch": 0.10719262962004823, "grad_norm": 1.1276831721156981, "learning_rate": 6.109285390167349e-06, "loss": 0.0462, "step": 25690 }, { "epoch": 0.10721349233503852, "grad_norm": 0.9312586700945047, "learning_rate": 6.108690934007086e-06, "loss": 0.0461, "step": 25695 }, { "epoch": 0.10723435505002879, "grad_norm": 0.745747297258799, "learning_rate": 6.108096651341439e-06, "loss": 0.0495, "step": 25700 }, { "epoch": 0.10725521776501906, "grad_norm": 0.8177888087709697, "learning_rate": 6.107502542086029e-06, "loss": 0.0377, "step": 25705 }, { "epoch": 0.10727608048000935, "grad_norm": 0.8667065433168497, "learning_rate": 6.106908606156541e-06, "loss": 0.0393, "step": 25710 }, { "epoch": 0.10729694319499962, "grad_norm": 1.1387172289722125, "learning_rate": 6.106314843468712e-06, "loss": 0.0597, "step": 25715 }, { "epoch": 0.1073178059099899, "grad_norm": 0.9082236311715938, "learning_rate": 6.105721253938338e-06, "loss": 0.0484, "step": 25720 }, { "epoch": 0.10733866862498018, "grad_norm": 0.8508752218023513, "learning_rate": 6.105127837481276e-06, "loss": 0.0426, "step": 25725 }, { "epoch": 0.10735953133997046, "grad_norm": 0.772500087292842, "learning_rate": 6.104534594013436e-06, "loss": 0.0538, "step": 25730 }, { "epoch": 0.10738039405496073, "grad_norm": 1.4331072096659414, "learning_rate": 6.103941523450786e-06, "loss": 0.0595, "step": 25735 }, { "epoch": 0.10740125676995102, "grad_norm": 1.4094772753646185, "learning_rate": 6.103348625709351e-06, "loss": 0.0485, "step": 25740 }, { "epoch": 0.10742211948494129, "grad_norm": 0.7326801637576301, "learning_rate": 6.102755900705214e-06, "loss": 0.0339, "step": 25745 }, { "epoch": 0.10744298219993156, "grad_norm": 1.4482737689870253, "learning_rate": 6.102163348354514e-06, "loss": 0.0561, "step": 25750 }, { "epoch": 0.10746384491492185, "grad_norm": 0.9362917861931829, "learning_rate": 6.101570968573449e-06, "loss": 0.0377, "step": 25755 }, { "epoch": 0.10748470762991212, "grad_norm": 1.5833984702360335, "learning_rate": 6.1009787612782715e-06, "loss": 0.0519, "step": 25760 }, { "epoch": 0.1075055703449024, "grad_norm": 1.1587420761208858, "learning_rate": 6.100386726385291e-06, "loss": 0.0478, "step": 25765 }, { "epoch": 0.10752643305989268, "grad_norm": 0.5351082639708118, "learning_rate": 6.099794863810878e-06, "loss": 0.0425, "step": 25770 }, { "epoch": 0.10754729577488296, "grad_norm": 1.0586758981625757, "learning_rate": 6.099203173471453e-06, "loss": 0.0524, "step": 25775 }, { "epoch": 0.10756815848987324, "grad_norm": 1.5010603953681818, "learning_rate": 6.098611655283499e-06, "loss": 0.048, "step": 25780 }, { "epoch": 0.10758902120486352, "grad_norm": 1.0084813256339014, "learning_rate": 6.0980203091635525e-06, "loss": 0.0373, "step": 25785 }, { "epoch": 0.1076098839198538, "grad_norm": 0.9845145519429399, "learning_rate": 6.0974291350282085e-06, "loss": 0.0392, "step": 25790 }, { "epoch": 0.10763074663484407, "grad_norm": 1.3979290473110229, "learning_rate": 6.096838132794116e-06, "loss": 0.0607, "step": 25795 }, { "epoch": 0.10765160934983435, "grad_norm": 1.0162055374070198, "learning_rate": 6.096247302377984e-06, "loss": 0.0487, "step": 25800 }, { "epoch": 0.10767247206482462, "grad_norm": 1.3622456374147232, "learning_rate": 6.095656643696575e-06, "loss": 0.0411, "step": 25805 }, { "epoch": 0.10769333477981491, "grad_norm": 0.9145642906512337, "learning_rate": 6.095066156666711e-06, "loss": 0.0485, "step": 25810 }, { "epoch": 0.10771419749480518, "grad_norm": 0.9154821720730266, "learning_rate": 6.094475841205266e-06, "loss": 0.0356, "step": 25815 }, { "epoch": 0.10773506020979547, "grad_norm": 1.1780787309714567, "learning_rate": 6.093885697229175e-06, "loss": 0.051, "step": 25820 }, { "epoch": 0.10775592292478574, "grad_norm": 1.2976226752672342, "learning_rate": 6.0932957246554255e-06, "loss": 0.0529, "step": 25825 }, { "epoch": 0.10777678563977602, "grad_norm": 1.3835479138284508, "learning_rate": 6.0927059234010656e-06, "loss": 0.0582, "step": 25830 }, { "epoch": 0.1077976483547663, "grad_norm": 2.5426343746610374, "learning_rate": 6.092116293383194e-06, "loss": 0.0449, "step": 25835 }, { "epoch": 0.10781851106975657, "grad_norm": 0.8868137833382309, "learning_rate": 6.091526834518969e-06, "loss": 0.0353, "step": 25840 }, { "epoch": 0.10783937378474685, "grad_norm": 1.2179256162257794, "learning_rate": 6.090937546725607e-06, "loss": 0.0526, "step": 25845 }, { "epoch": 0.10786023649973712, "grad_norm": 0.6207346954394951, "learning_rate": 6.090348429920375e-06, "loss": 0.0376, "step": 25850 }, { "epoch": 0.10788109921472741, "grad_norm": 0.9387110741220793, "learning_rate": 6.0897594840205995e-06, "loss": 0.0342, "step": 25855 }, { "epoch": 0.10790196192971768, "grad_norm": 1.3144370840972188, "learning_rate": 6.089170708943666e-06, "loss": 0.0476, "step": 25860 }, { "epoch": 0.10792282464470797, "grad_norm": 1.1835423951554682, "learning_rate": 6.0885821046070074e-06, "loss": 0.0352, "step": 25865 }, { "epoch": 0.10794368735969824, "grad_norm": 0.7511514534900748, "learning_rate": 6.087993670928117e-06, "loss": 0.0402, "step": 25870 }, { "epoch": 0.10796455007468853, "grad_norm": 1.9510084295202028, "learning_rate": 6.0874054078245495e-06, "loss": 0.0506, "step": 25875 }, { "epoch": 0.1079854127896788, "grad_norm": 0.5221039699800138, "learning_rate": 6.0868173152139064e-06, "loss": 0.0568, "step": 25880 }, { "epoch": 0.10800627550466907, "grad_norm": 0.8971779597562107, "learning_rate": 6.086229393013849e-06, "loss": 0.0471, "step": 25885 }, { "epoch": 0.10802713821965935, "grad_norm": 0.8689495293277064, "learning_rate": 6.085641641142095e-06, "loss": 0.0437, "step": 25890 }, { "epoch": 0.10804800093464963, "grad_norm": 1.0583983711712468, "learning_rate": 6.0850540595164155e-06, "loss": 0.0458, "step": 25895 }, { "epoch": 0.10806886364963991, "grad_norm": 1.1345470923612033, "learning_rate": 6.084466648054638e-06, "loss": 0.0411, "step": 25900 }, { "epoch": 0.10808972636463018, "grad_norm": 0.8565434531916725, "learning_rate": 6.083879406674647e-06, "loss": 0.0473, "step": 25905 }, { "epoch": 0.10811058907962047, "grad_norm": 0.8218822550404811, "learning_rate": 6.083292335294381e-06, "loss": 0.0625, "step": 25910 }, { "epoch": 0.10813145179461074, "grad_norm": 1.5249531215742618, "learning_rate": 6.082705433831834e-06, "loss": 0.0619, "step": 25915 }, { "epoch": 0.10815231450960103, "grad_norm": 1.442682176765402, "learning_rate": 6.082118702205054e-06, "loss": 0.0533, "step": 25920 }, { "epoch": 0.1081731772245913, "grad_norm": 0.7299328119995576, "learning_rate": 6.081532140332148e-06, "loss": 0.0368, "step": 25925 }, { "epoch": 0.10819403993958157, "grad_norm": 0.9148663184266849, "learning_rate": 6.080945748131276e-06, "loss": 0.044, "step": 25930 }, { "epoch": 0.10821490265457186, "grad_norm": 1.206202864573302, "learning_rate": 6.080359525520652e-06, "loss": 0.0365, "step": 25935 }, { "epoch": 0.10823576536956213, "grad_norm": 1.0510098965987253, "learning_rate": 6.079773472418548e-06, "loss": 0.0384, "step": 25940 }, { "epoch": 0.10825662808455241, "grad_norm": 1.0861262721330849, "learning_rate": 6.079187588743288e-06, "loss": 0.0431, "step": 25945 }, { "epoch": 0.10827749079954269, "grad_norm": 1.218243622130078, "learning_rate": 6.078601874413255e-06, "loss": 0.0396, "step": 25950 }, { "epoch": 0.10829835351453297, "grad_norm": 0.6595512221921666, "learning_rate": 6.078016329346883e-06, "loss": 0.0457, "step": 25955 }, { "epoch": 0.10831921622952324, "grad_norm": 1.7182969616316066, "learning_rate": 6.077430953462663e-06, "loss": 0.0451, "step": 25960 }, { "epoch": 0.10834007894451353, "grad_norm": 1.1327675552009937, "learning_rate": 6.076845746679142e-06, "loss": 0.0475, "step": 25965 }, { "epoch": 0.1083609416595038, "grad_norm": 0.8283321258337086, "learning_rate": 6.076260708914918e-06, "loss": 0.0487, "step": 25970 }, { "epoch": 0.10838180437449407, "grad_norm": 1.069239135648013, "learning_rate": 6.075675840088649e-06, "loss": 0.0442, "step": 25975 }, { "epoch": 0.10840266708948436, "grad_norm": 1.254703875031278, "learning_rate": 6.075091140119045e-06, "loss": 0.0436, "step": 25980 }, { "epoch": 0.10842352980447463, "grad_norm": 1.2068817575609487, "learning_rate": 6.0745066089248705e-06, "loss": 0.061, "step": 25985 }, { "epoch": 0.10844439251946492, "grad_norm": 0.8839399998941374, "learning_rate": 6.0739222464249446e-06, "loss": 0.0371, "step": 25990 }, { "epoch": 0.10846525523445519, "grad_norm": 0.8164614734850157, "learning_rate": 6.073338052538142e-06, "loss": 0.0406, "step": 25995 }, { "epoch": 0.10848611794944547, "grad_norm": 1.1031631422182706, "learning_rate": 6.072754027183392e-06, "loss": 0.0331, "step": 26000 }, { "epoch": 0.10850698066443575, "grad_norm": 0.9271247038410898, "learning_rate": 6.0721701702796785e-06, "loss": 0.0612, "step": 26005 }, { "epoch": 0.10852784337942603, "grad_norm": 0.9205560017691466, "learning_rate": 6.071586481746039e-06, "loss": 0.05, "step": 26010 }, { "epoch": 0.1085487060944163, "grad_norm": 1.1993567001824503, "learning_rate": 6.0710029615015655e-06, "loss": 0.0549, "step": 26015 }, { "epoch": 0.10856956880940657, "grad_norm": 1.3884611713119062, "learning_rate": 6.070419609465406e-06, "loss": 0.0568, "step": 26020 }, { "epoch": 0.10859043152439686, "grad_norm": 0.9890924911687855, "learning_rate": 6.069836425556762e-06, "loss": 0.037, "step": 26025 }, { "epoch": 0.10861129423938713, "grad_norm": 1.2671996571988602, "learning_rate": 6.0692534096948876e-06, "loss": 0.0442, "step": 26030 }, { "epoch": 0.10863215695437742, "grad_norm": 0.7783176908282292, "learning_rate": 6.068670561799094e-06, "loss": 0.0394, "step": 26035 }, { "epoch": 0.10865301966936769, "grad_norm": 0.8970390967718197, "learning_rate": 6.068087881788746e-06, "loss": 0.0448, "step": 26040 }, { "epoch": 0.10867388238435798, "grad_norm": 1.6143147780588292, "learning_rate": 6.067505369583259e-06, "loss": 0.0442, "step": 26045 }, { "epoch": 0.10869474509934825, "grad_norm": 1.2348216657577007, "learning_rate": 6.066923025102108e-06, "loss": 0.0436, "step": 26050 }, { "epoch": 0.10871560781433853, "grad_norm": 0.816156877079831, "learning_rate": 6.066340848264819e-06, "loss": 0.0308, "step": 26055 }, { "epoch": 0.1087364705293288, "grad_norm": 1.5149411185830681, "learning_rate": 6.065758838990973e-06, "loss": 0.0461, "step": 26060 }, { "epoch": 0.10875733324431908, "grad_norm": 1.823077275336832, "learning_rate": 6.065176997200205e-06, "loss": 0.0558, "step": 26065 }, { "epoch": 0.10877819595930936, "grad_norm": 0.6883017295603581, "learning_rate": 6.064595322812201e-06, "loss": 0.0607, "step": 26070 }, { "epoch": 0.10879905867429963, "grad_norm": 0.9208362269528166, "learning_rate": 6.064013815746707e-06, "loss": 0.0554, "step": 26075 }, { "epoch": 0.10881992138928992, "grad_norm": 2.1115161702334864, "learning_rate": 6.063432475923516e-06, "loss": 0.0698, "step": 26080 }, { "epoch": 0.10884078410428019, "grad_norm": 0.6188269829076404, "learning_rate": 6.062851303262481e-06, "loss": 0.0335, "step": 26085 }, { "epoch": 0.10886164681927048, "grad_norm": 1.9904367065600281, "learning_rate": 6.062270297683504e-06, "loss": 0.047, "step": 26090 }, { "epoch": 0.10888250953426075, "grad_norm": 0.8744900341700399, "learning_rate": 6.061689459106542e-06, "loss": 0.0393, "step": 26095 }, { "epoch": 0.10890337224925103, "grad_norm": 0.617811764413413, "learning_rate": 6.061108787451609e-06, "loss": 0.0507, "step": 26100 }, { "epoch": 0.1089242349642413, "grad_norm": 1.2511396341573215, "learning_rate": 6.060528282638769e-06, "loss": 0.0349, "step": 26105 }, { "epoch": 0.10894509767923158, "grad_norm": 1.4579029015276126, "learning_rate": 6.0599479445881395e-06, "loss": 0.0468, "step": 26110 }, { "epoch": 0.10896596039422186, "grad_norm": 2.251406399798208, "learning_rate": 6.059367773219892e-06, "loss": 0.0457, "step": 26115 }, { "epoch": 0.10898682310921214, "grad_norm": 1.1751561596487083, "learning_rate": 6.058787768454254e-06, "loss": 0.0361, "step": 26120 }, { "epoch": 0.10900768582420242, "grad_norm": 1.1292447149904203, "learning_rate": 6.058207930211503e-06, "loss": 0.0377, "step": 26125 }, { "epoch": 0.1090285485391927, "grad_norm": 1.31788824394165, "learning_rate": 6.057628258411973e-06, "loss": 0.0393, "step": 26130 }, { "epoch": 0.10904941125418298, "grad_norm": 1.2648452319612955, "learning_rate": 6.057048752976046e-06, "loss": 0.0459, "step": 26135 }, { "epoch": 0.10907027396917325, "grad_norm": 1.428530629726047, "learning_rate": 6.056469413824165e-06, "loss": 0.0523, "step": 26140 }, { "epoch": 0.10909113668416354, "grad_norm": 2.7537154262353067, "learning_rate": 6.0558902408768215e-06, "loss": 0.0528, "step": 26145 }, { "epoch": 0.10911199939915381, "grad_norm": 0.5567322423757932, "learning_rate": 6.055311234054559e-06, "loss": 0.0473, "step": 26150 }, { "epoch": 0.10913286211414408, "grad_norm": 1.3035966553128457, "learning_rate": 6.054732393277979e-06, "loss": 0.036, "step": 26155 }, { "epoch": 0.10915372482913437, "grad_norm": 0.7175689748166656, "learning_rate": 6.054153718467731e-06, "loss": 0.0356, "step": 26160 }, { "epoch": 0.10917458754412464, "grad_norm": 1.1659210808669305, "learning_rate": 6.053575209544521e-06, "loss": 0.0516, "step": 26165 }, { "epoch": 0.10919545025911492, "grad_norm": 0.7124662434884262, "learning_rate": 6.052996866429106e-06, "loss": 0.0447, "step": 26170 }, { "epoch": 0.1092163129741052, "grad_norm": 1.4751757374932253, "learning_rate": 6.0524186890422985e-06, "loss": 0.0428, "step": 26175 }, { "epoch": 0.10923717568909548, "grad_norm": 0.8569974466608528, "learning_rate": 6.051840677304958e-06, "loss": 0.0343, "step": 26180 }, { "epoch": 0.10925803840408575, "grad_norm": 0.965112260497789, "learning_rate": 6.0512628311380074e-06, "loss": 0.0474, "step": 26185 }, { "epoch": 0.10927890111907604, "grad_norm": 1.0374233941767013, "learning_rate": 6.050685150462413e-06, "loss": 0.0451, "step": 26190 }, { "epoch": 0.10929976383406631, "grad_norm": 0.7648053250327477, "learning_rate": 6.050107635199197e-06, "loss": 0.0421, "step": 26195 }, { "epoch": 0.10932062654905658, "grad_norm": 3.218908051625696, "learning_rate": 6.049530285269435e-06, "loss": 0.0461, "step": 26200 }, { "epoch": 0.10934148926404687, "grad_norm": 0.5425225534677317, "learning_rate": 6.0489531005942535e-06, "loss": 0.0387, "step": 26205 }, { "epoch": 0.10936235197903714, "grad_norm": 0.9571690497416394, "learning_rate": 6.048376081094834e-06, "loss": 0.0461, "step": 26210 }, { "epoch": 0.10938321469402743, "grad_norm": 0.903697292188734, "learning_rate": 6.0477992266924125e-06, "loss": 0.0406, "step": 26215 }, { "epoch": 0.1094040774090177, "grad_norm": 1.2218506089444605, "learning_rate": 6.0472225373082685e-06, "loss": 0.0402, "step": 26220 }, { "epoch": 0.10942494012400798, "grad_norm": 0.9667455389692051, "learning_rate": 6.046646012863746e-06, "loss": 0.0411, "step": 26225 }, { "epoch": 0.10944580283899825, "grad_norm": 1.104399448581952, "learning_rate": 6.046069653280232e-06, "loss": 0.0543, "step": 26230 }, { "epoch": 0.10946666555398854, "grad_norm": 0.9884556394467563, "learning_rate": 6.045493458479171e-06, "loss": 0.0412, "step": 26235 }, { "epoch": 0.10948752826897881, "grad_norm": 1.0586375314027963, "learning_rate": 6.04491742838206e-06, "loss": 0.0438, "step": 26240 }, { "epoch": 0.10950839098396908, "grad_norm": 0.9374484506275106, "learning_rate": 6.044341562910444e-06, "loss": 0.0394, "step": 26245 }, { "epoch": 0.10952925369895937, "grad_norm": 1.5367272214108827, "learning_rate": 6.043765861985924e-06, "loss": 0.0432, "step": 26250 }, { "epoch": 0.10955011641394964, "grad_norm": 0.7576622974501787, "learning_rate": 6.043190325530154e-06, "loss": 0.0406, "step": 26255 }, { "epoch": 0.10957097912893993, "grad_norm": 0.9986770005377679, "learning_rate": 6.042614953464838e-06, "loss": 0.0554, "step": 26260 }, { "epoch": 0.1095918418439302, "grad_norm": 1.1306116203938053, "learning_rate": 6.042039745711732e-06, "loss": 0.0412, "step": 26265 }, { "epoch": 0.10961270455892048, "grad_norm": 1.1297910558933422, "learning_rate": 6.0414647021926455e-06, "loss": 0.0375, "step": 26270 }, { "epoch": 0.10963356727391076, "grad_norm": 1.2437300469032269, "learning_rate": 6.04088982282944e-06, "loss": 0.0359, "step": 26275 }, { "epoch": 0.10965442998890104, "grad_norm": 1.083958093919445, "learning_rate": 6.040315107544027e-06, "loss": 0.0405, "step": 26280 }, { "epoch": 0.10967529270389131, "grad_norm": 1.0456720401504178, "learning_rate": 6.039740556258372e-06, "loss": 0.0465, "step": 26285 }, { "epoch": 0.10969615541888159, "grad_norm": 0.7385516407320504, "learning_rate": 6.0391661688944945e-06, "loss": 0.0361, "step": 26290 }, { "epoch": 0.10971701813387187, "grad_norm": 1.3457806762838587, "learning_rate": 6.038591945374461e-06, "loss": 0.046, "step": 26295 }, { "epoch": 0.10973788084886214, "grad_norm": 1.1707259307410027, "learning_rate": 6.0380178856203915e-06, "loss": 0.0487, "step": 26300 }, { "epoch": 0.10975874356385243, "grad_norm": 0.962583030473976, "learning_rate": 6.037443989554464e-06, "loss": 0.043, "step": 26305 }, { "epoch": 0.1097796062788427, "grad_norm": 1.2840118501747755, "learning_rate": 6.036870257098894e-06, "loss": 0.0482, "step": 26310 }, { "epoch": 0.10980046899383299, "grad_norm": 1.4280104690166282, "learning_rate": 6.036296688175965e-06, "loss": 0.0496, "step": 26315 }, { "epoch": 0.10982133170882326, "grad_norm": 1.0927873671673762, "learning_rate": 6.035723282708003e-06, "loss": 0.039, "step": 26320 }, { "epoch": 0.10984219442381354, "grad_norm": 1.4095749577822225, "learning_rate": 6.035150040617385e-06, "loss": 0.0447, "step": 26325 }, { "epoch": 0.10986305713880382, "grad_norm": 0.8814970609083773, "learning_rate": 6.034576961826544e-06, "loss": 0.0508, "step": 26330 }, { "epoch": 0.10988391985379409, "grad_norm": 1.1909536470203654, "learning_rate": 6.034004046257963e-06, "loss": 0.0498, "step": 26335 }, { "epoch": 0.10990478256878437, "grad_norm": 0.9903222111157592, "learning_rate": 6.0334312938341754e-06, "loss": 0.0395, "step": 26340 }, { "epoch": 0.10992564528377465, "grad_norm": 0.9591543180852251, "learning_rate": 6.032858704477765e-06, "loss": 0.0364, "step": 26345 }, { "epoch": 0.10994650799876493, "grad_norm": 1.0546860971752456, "learning_rate": 6.032286278111372e-06, "loss": 0.0491, "step": 26350 }, { "epoch": 0.1099673707137552, "grad_norm": 1.0334824040934845, "learning_rate": 6.031714014657683e-06, "loss": 0.0361, "step": 26355 }, { "epoch": 0.10998823342874549, "grad_norm": 1.1958999697464787, "learning_rate": 6.031141914039437e-06, "loss": 0.0487, "step": 26360 }, { "epoch": 0.11000909614373576, "grad_norm": 0.9436224744868268, "learning_rate": 6.030569976179427e-06, "loss": 0.0338, "step": 26365 }, { "epoch": 0.11002995885872605, "grad_norm": 0.7102625055912997, "learning_rate": 6.029998201000492e-06, "loss": 0.0472, "step": 26370 }, { "epoch": 0.11005082157371632, "grad_norm": 1.0884776339890012, "learning_rate": 6.029426588425529e-06, "loss": 0.0454, "step": 26375 }, { "epoch": 0.11007168428870659, "grad_norm": 1.2867155900775926, "learning_rate": 6.028855138377481e-06, "loss": 0.05, "step": 26380 }, { "epoch": 0.11009254700369688, "grad_norm": 2.8217741582674556, "learning_rate": 6.0282838507793415e-06, "loss": 0.0554, "step": 26385 }, { "epoch": 0.11011340971868715, "grad_norm": 0.6108162907228063, "learning_rate": 6.027712725554161e-06, "loss": 0.0508, "step": 26390 }, { "epoch": 0.11013427243367743, "grad_norm": 1.5591637275339174, "learning_rate": 6.027141762625036e-06, "loss": 0.0586, "step": 26395 }, { "epoch": 0.1101551351486677, "grad_norm": 0.9777871351004989, "learning_rate": 6.026570961915112e-06, "loss": 0.0362, "step": 26400 }, { "epoch": 0.11017599786365799, "grad_norm": 0.7898246658793828, "learning_rate": 6.0260003233475935e-06, "loss": 0.04, "step": 26405 }, { "epoch": 0.11019686057864826, "grad_norm": 1.9974544439623527, "learning_rate": 6.025429846845729e-06, "loss": 0.0586, "step": 26410 }, { "epoch": 0.11021772329363855, "grad_norm": 0.716711418631619, "learning_rate": 6.0248595323328194e-06, "loss": 0.0464, "step": 26415 }, { "epoch": 0.11023858600862882, "grad_norm": 1.753875683630419, "learning_rate": 6.0242893797322166e-06, "loss": 0.0409, "step": 26420 }, { "epoch": 0.11025944872361909, "grad_norm": 0.6046806120252916, "learning_rate": 6.023719388967326e-06, "loss": 0.038, "step": 26425 }, { "epoch": 0.11028031143860938, "grad_norm": 1.1383900371056541, "learning_rate": 6.023149559961598e-06, "loss": 0.057, "step": 26430 }, { "epoch": 0.11030117415359965, "grad_norm": 2.067512588441204, "learning_rate": 6.02257989263854e-06, "loss": 0.0553, "step": 26435 }, { "epoch": 0.11032203686858993, "grad_norm": 0.953459146272279, "learning_rate": 6.022010386921705e-06, "loss": 0.0392, "step": 26440 }, { "epoch": 0.1103428995835802, "grad_norm": 1.2743867383164187, "learning_rate": 6.0214410427346995e-06, "loss": 0.0541, "step": 26445 }, { "epoch": 0.11036376229857049, "grad_norm": 1.4641542604680107, "learning_rate": 6.0208718600011785e-06, "loss": 0.0562, "step": 26450 }, { "epoch": 0.11038462501356076, "grad_norm": 1.2419096114874046, "learning_rate": 6.0203028386448505e-06, "loss": 0.0536, "step": 26455 }, { "epoch": 0.11040548772855105, "grad_norm": 1.022261674871731, "learning_rate": 6.019733978589472e-06, "loss": 0.0371, "step": 26460 }, { "epoch": 0.11042635044354132, "grad_norm": 0.9271116009800504, "learning_rate": 6.019165279758848e-06, "loss": 0.0435, "step": 26465 }, { "epoch": 0.11044721315853159, "grad_norm": 0.913616878415763, "learning_rate": 6.018596742076841e-06, "loss": 0.0375, "step": 26470 }, { "epoch": 0.11046807587352188, "grad_norm": 1.3001639940265513, "learning_rate": 6.018028365467356e-06, "loss": 0.0383, "step": 26475 }, { "epoch": 0.11048893858851215, "grad_norm": 0.6789813645415667, "learning_rate": 6.017460149854352e-06, "loss": 0.0409, "step": 26480 }, { "epoch": 0.11050980130350244, "grad_norm": 0.9320887730215757, "learning_rate": 6.016892095161841e-06, "loss": 0.0462, "step": 26485 }, { "epoch": 0.11053066401849271, "grad_norm": 1.3374801906222518, "learning_rate": 6.016324201313876e-06, "loss": 0.0399, "step": 26490 }, { "epoch": 0.110551526733483, "grad_norm": 1.0344254980204717, "learning_rate": 6.015756468234571e-06, "loss": 0.0424, "step": 26495 }, { "epoch": 0.11057238944847327, "grad_norm": 1.2390586230106289, "learning_rate": 6.015188895848083e-06, "loss": 0.061, "step": 26500 }, { "epoch": 0.11059325216346355, "grad_norm": 1.2796022258215471, "learning_rate": 6.014621484078622e-06, "loss": 0.0599, "step": 26505 }, { "epoch": 0.11061411487845382, "grad_norm": 1.6573771972933498, "learning_rate": 6.014054232850447e-06, "loss": 0.054, "step": 26510 }, { "epoch": 0.1106349775934441, "grad_norm": 1.467648151512118, "learning_rate": 6.013487142087869e-06, "loss": 0.0416, "step": 26515 }, { "epoch": 0.11065584030843438, "grad_norm": 1.2956397854822517, "learning_rate": 6.012920211715245e-06, "loss": 0.0472, "step": 26520 }, { "epoch": 0.11067670302342465, "grad_norm": 1.164812345409078, "learning_rate": 6.012353441656987e-06, "loss": 0.0464, "step": 26525 }, { "epoch": 0.11069756573841494, "grad_norm": 1.3152359524236645, "learning_rate": 6.011786831837553e-06, "loss": 0.038, "step": 26530 }, { "epoch": 0.11071842845340521, "grad_norm": 0.940269230993905, "learning_rate": 6.0112203821814495e-06, "loss": 0.0419, "step": 26535 }, { "epoch": 0.1107392911683955, "grad_norm": 0.8429532898274656, "learning_rate": 6.010654092613238e-06, "loss": 0.0368, "step": 26540 }, { "epoch": 0.11076015388338577, "grad_norm": 2.132323947916137, "learning_rate": 6.010087963057527e-06, "loss": 0.0672, "step": 26545 }, { "epoch": 0.11078101659837605, "grad_norm": 1.1017950200253706, "learning_rate": 6.0095219934389726e-06, "loss": 0.0398, "step": 26550 }, { "epoch": 0.11080187931336632, "grad_norm": 1.0661525708176904, "learning_rate": 6.008956183682285e-06, "loss": 0.0472, "step": 26555 }, { "epoch": 0.1108227420283566, "grad_norm": 1.1843062252332484, "learning_rate": 6.00839053371222e-06, "loss": 0.0343, "step": 26560 }, { "epoch": 0.11084360474334688, "grad_norm": 1.1589771209563655, "learning_rate": 6.007825043453587e-06, "loss": 0.04, "step": 26565 }, { "epoch": 0.11086446745833715, "grad_norm": 1.212695686135549, "learning_rate": 6.007259712831238e-06, "loss": 0.0499, "step": 26570 }, { "epoch": 0.11088533017332744, "grad_norm": 1.1907943461446924, "learning_rate": 6.0066945417700826e-06, "loss": 0.0428, "step": 26575 }, { "epoch": 0.11090619288831771, "grad_norm": 0.6402685293589874, "learning_rate": 6.006129530195076e-06, "loss": 0.0395, "step": 26580 }, { "epoch": 0.110927055603308, "grad_norm": 0.6867725381501734, "learning_rate": 6.005564678031223e-06, "loss": 0.0415, "step": 26585 }, { "epoch": 0.11094791831829827, "grad_norm": 0.9210979549364511, "learning_rate": 6.004999985203575e-06, "loss": 0.0504, "step": 26590 }, { "epoch": 0.11096878103328855, "grad_norm": 1.1295141567697013, "learning_rate": 6.004435451637239e-06, "loss": 0.0422, "step": 26595 }, { "epoch": 0.11098964374827883, "grad_norm": 1.0874933360887697, "learning_rate": 6.003871077257366e-06, "loss": 0.0525, "step": 26600 }, { "epoch": 0.1110105064632691, "grad_norm": 1.906536765224063, "learning_rate": 6.003306861989161e-06, "loss": 0.0489, "step": 26605 }, { "epoch": 0.11103136917825938, "grad_norm": 1.3091064920163051, "learning_rate": 6.00274280575787e-06, "loss": 0.0496, "step": 26610 }, { "epoch": 0.11105223189324966, "grad_norm": 0.7732095152884501, "learning_rate": 6.002178908488798e-06, "loss": 0.035, "step": 26615 }, { "epoch": 0.11107309460823994, "grad_norm": 1.9615252770886185, "learning_rate": 6.001615170107294e-06, "loss": 0.0485, "step": 26620 }, { "epoch": 0.11109395732323021, "grad_norm": 1.3967437724431853, "learning_rate": 6.0010515905387536e-06, "loss": 0.0486, "step": 26625 }, { "epoch": 0.1111148200382205, "grad_norm": 1.117558237775953, "learning_rate": 6.000488169708626e-06, "loss": 0.0791, "step": 26630 }, { "epoch": 0.11113568275321077, "grad_norm": 1.0512125466555007, "learning_rate": 5.999924907542409e-06, "loss": 0.0469, "step": 26635 }, { "epoch": 0.11115654546820106, "grad_norm": 1.2891311596215758, "learning_rate": 5.999361803965647e-06, "loss": 0.0486, "step": 26640 }, { "epoch": 0.11117740818319133, "grad_norm": 0.7615364940868368, "learning_rate": 5.998798858903935e-06, "loss": 0.0519, "step": 26645 }, { "epoch": 0.1111982708981816, "grad_norm": 0.8633777021111642, "learning_rate": 5.998236072282916e-06, "loss": 0.0386, "step": 26650 }, { "epoch": 0.11121913361317189, "grad_norm": 0.9366204282496576, "learning_rate": 5.9976734440282824e-06, "loss": 0.0317, "step": 26655 }, { "epoch": 0.11123999632816216, "grad_norm": 0.8972059528501986, "learning_rate": 5.997110974065774e-06, "loss": 0.0347, "step": 26660 }, { "epoch": 0.11126085904315244, "grad_norm": 1.4875901980988349, "learning_rate": 5.996548662321182e-06, "loss": 0.0458, "step": 26665 }, { "epoch": 0.11128172175814272, "grad_norm": 1.5273914795365098, "learning_rate": 5.995986508720345e-06, "loss": 0.0522, "step": 26670 }, { "epoch": 0.111302584473133, "grad_norm": 0.8379727087338782, "learning_rate": 5.995424513189149e-06, "loss": 0.0415, "step": 26675 }, { "epoch": 0.11132344718812327, "grad_norm": 1.0711216471144551, "learning_rate": 5.994862675653528e-06, "loss": 0.0413, "step": 26680 }, { "epoch": 0.11134430990311354, "grad_norm": 1.6959684312865944, "learning_rate": 5.994300996039469e-06, "loss": 0.0751, "step": 26685 }, { "epoch": 0.11136517261810383, "grad_norm": 1.2100891218305954, "learning_rate": 5.993739474273005e-06, "loss": 0.0474, "step": 26690 }, { "epoch": 0.1113860353330941, "grad_norm": 1.1049855686215477, "learning_rate": 5.9931781102802155e-06, "loss": 0.039, "step": 26695 }, { "epoch": 0.11140689804808439, "grad_norm": 1.1913276347230934, "learning_rate": 5.99261690398723e-06, "loss": 0.0464, "step": 26700 }, { "epoch": 0.11142776076307466, "grad_norm": 1.397582865539122, "learning_rate": 5.992055855320227e-06, "loss": 0.0546, "step": 26705 }, { "epoch": 0.11144862347806495, "grad_norm": 0.7536240858240952, "learning_rate": 5.991494964205434e-06, "loss": 0.0428, "step": 26710 }, { "epoch": 0.11146948619305522, "grad_norm": 0.704054407951859, "learning_rate": 5.990934230569125e-06, "loss": 0.0396, "step": 26715 }, { "epoch": 0.1114903489080455, "grad_norm": 1.0338313622900832, "learning_rate": 5.990373654337622e-06, "loss": 0.0591, "step": 26720 }, { "epoch": 0.11151121162303577, "grad_norm": 1.3229896235973275, "learning_rate": 5.9898132354373e-06, "loss": 0.0485, "step": 26725 }, { "epoch": 0.11153207433802605, "grad_norm": 0.5235383692227942, "learning_rate": 5.9892529737945745e-06, "loss": 0.0345, "step": 26730 }, { "epoch": 0.11155293705301633, "grad_norm": 0.8441714028411936, "learning_rate": 5.988692869335915e-06, "loss": 0.0427, "step": 26735 }, { "epoch": 0.1115737997680066, "grad_norm": 0.8098517007559235, "learning_rate": 5.988132921987838e-06, "loss": 0.0565, "step": 26740 }, { "epoch": 0.11159466248299689, "grad_norm": 1.695822114779328, "learning_rate": 5.987573131676905e-06, "loss": 0.0522, "step": 26745 }, { "epoch": 0.11161552519798716, "grad_norm": 1.1800816950955193, "learning_rate": 5.987013498329731e-06, "loss": 0.0527, "step": 26750 }, { "epoch": 0.11163638791297745, "grad_norm": 0.8413519649944206, "learning_rate": 5.9864540218729735e-06, "loss": 0.0543, "step": 26755 }, { "epoch": 0.11165725062796772, "grad_norm": 2.7383587713609554, "learning_rate": 5.985894702233343e-06, "loss": 0.0435, "step": 26760 }, { "epoch": 0.111678113342958, "grad_norm": 1.4398092616953706, "learning_rate": 5.985335539337592e-06, "loss": 0.0655, "step": 26765 }, { "epoch": 0.11169897605794828, "grad_norm": 0.8853552248976144, "learning_rate": 5.9847765331125285e-06, "loss": 0.0391, "step": 26770 }, { "epoch": 0.11171983877293855, "grad_norm": 1.1687816292743, "learning_rate": 5.984217683485001e-06, "loss": 0.0361, "step": 26775 }, { "epoch": 0.11174070148792883, "grad_norm": 0.8171333038540807, "learning_rate": 5.98365899038191e-06, "loss": 0.0475, "step": 26780 }, { "epoch": 0.1117615642029191, "grad_norm": 0.8513129060215116, "learning_rate": 5.983100453730201e-06, "loss": 0.0375, "step": 26785 }, { "epoch": 0.11178242691790939, "grad_norm": 1.4531228207972646, "learning_rate": 5.9825420734568705e-06, "loss": 0.0596, "step": 26790 }, { "epoch": 0.11180328963289966, "grad_norm": 1.0647640545011123, "learning_rate": 5.9819838494889614e-06, "loss": 0.0299, "step": 26795 }, { "epoch": 0.11182415234788995, "grad_norm": 0.7754058775679207, "learning_rate": 5.981425781753562e-06, "loss": 0.0374, "step": 26800 }, { "epoch": 0.11184501506288022, "grad_norm": 1.1989794147002275, "learning_rate": 5.980867870177814e-06, "loss": 0.0399, "step": 26805 }, { "epoch": 0.1118658777778705, "grad_norm": 0.8693799509045834, "learning_rate": 5.980310114688899e-06, "loss": 0.0538, "step": 26810 }, { "epoch": 0.11188674049286078, "grad_norm": 0.9684945144577736, "learning_rate": 5.9797525152140526e-06, "loss": 0.0366, "step": 26815 }, { "epoch": 0.11190760320785105, "grad_norm": 1.2794137486525563, "learning_rate": 5.979195071680551e-06, "loss": 0.0468, "step": 26820 }, { "epoch": 0.11192846592284134, "grad_norm": 0.7932573663452723, "learning_rate": 5.978637784015728e-06, "loss": 0.0363, "step": 26825 }, { "epoch": 0.11194932863783161, "grad_norm": 1.3950260761183588, "learning_rate": 5.9780806521469535e-06, "loss": 0.0434, "step": 26830 }, { "epoch": 0.1119701913528219, "grad_norm": 1.013368113056415, "learning_rate": 5.977523676001653e-06, "loss": 0.0336, "step": 26835 }, { "epoch": 0.11199105406781217, "grad_norm": 1.5188262096762872, "learning_rate": 5.976966855507297e-06, "loss": 0.0583, "step": 26840 }, { "epoch": 0.11201191678280245, "grad_norm": 1.0642619752410525, "learning_rate": 5.976410190591401e-06, "loss": 0.0443, "step": 26845 }, { "epoch": 0.11203277949779272, "grad_norm": 0.7923116254967956, "learning_rate": 5.975853681181529e-06, "loss": 0.0417, "step": 26850 }, { "epoch": 0.11205364221278301, "grad_norm": 1.4635133932941113, "learning_rate": 5.975297327205296e-06, "loss": 0.039, "step": 26855 }, { "epoch": 0.11207450492777328, "grad_norm": 0.9830231851736464, "learning_rate": 5.974741128590356e-06, "loss": 0.0471, "step": 26860 }, { "epoch": 0.11209536764276355, "grad_norm": 1.2040921661951558, "learning_rate": 5.974185085264419e-06, "loss": 0.0425, "step": 26865 }, { "epoch": 0.11211623035775384, "grad_norm": 1.1724439550362826, "learning_rate": 5.973629197155236e-06, "loss": 0.0509, "step": 26870 }, { "epoch": 0.11213709307274411, "grad_norm": 1.4171001751026633, "learning_rate": 5.973073464190609e-06, "loss": 0.0413, "step": 26875 }, { "epoch": 0.1121579557877344, "grad_norm": 1.1520307712858633, "learning_rate": 5.972517886298383e-06, "loss": 0.0515, "step": 26880 }, { "epoch": 0.11217881850272467, "grad_norm": 1.4227784153457919, "learning_rate": 5.9719624634064535e-06, "loss": 0.0397, "step": 26885 }, { "epoch": 0.11219968121771495, "grad_norm": 2.102627037295116, "learning_rate": 5.97140719544276e-06, "loss": 0.0542, "step": 26890 }, { "epoch": 0.11222054393270522, "grad_norm": 0.8795355133028057, "learning_rate": 5.970852082335292e-06, "loss": 0.0472, "step": 26895 }, { "epoch": 0.11224140664769551, "grad_norm": 0.5117810955286296, "learning_rate": 5.970297124012084e-06, "loss": 0.0459, "step": 26900 }, { "epoch": 0.11226226936268578, "grad_norm": 0.8857403718380326, "learning_rate": 5.9697423204012175e-06, "loss": 0.0559, "step": 26905 }, { "epoch": 0.11228313207767605, "grad_norm": 1.0700888495406693, "learning_rate": 5.96918767143082e-06, "loss": 0.0423, "step": 26910 }, { "epoch": 0.11230399479266634, "grad_norm": 1.0775465913967293, "learning_rate": 5.968633177029066e-06, "loss": 0.0495, "step": 26915 }, { "epoch": 0.11232485750765661, "grad_norm": 0.9377186656129831, "learning_rate": 5.968078837124181e-06, "loss": 0.0446, "step": 26920 }, { "epoch": 0.1123457202226469, "grad_norm": 0.5679108681925724, "learning_rate": 5.967524651644428e-06, "loss": 0.0388, "step": 26925 }, { "epoch": 0.11236658293763717, "grad_norm": 0.9321675308061625, "learning_rate": 5.966970620518128e-06, "loss": 0.049, "step": 26930 }, { "epoch": 0.11238744565262745, "grad_norm": 1.2905195472526305, "learning_rate": 5.966416743673637e-06, "loss": 0.0479, "step": 26935 }, { "epoch": 0.11240830836761773, "grad_norm": 0.9956916858809929, "learning_rate": 5.965863021039368e-06, "loss": 0.0421, "step": 26940 }, { "epoch": 0.11242917108260801, "grad_norm": 0.6481041609829552, "learning_rate": 5.9653094525437726e-06, "loss": 0.0406, "step": 26945 }, { "epoch": 0.11245003379759828, "grad_norm": 1.054032013746615, "learning_rate": 5.964756038115352e-06, "loss": 0.0378, "step": 26950 }, { "epoch": 0.11247089651258856, "grad_norm": 1.4561777587043365, "learning_rate": 5.964202777682656e-06, "loss": 0.0456, "step": 26955 }, { "epoch": 0.11249175922757884, "grad_norm": 1.1630309018677603, "learning_rate": 5.963649671174277e-06, "loss": 0.0487, "step": 26960 }, { "epoch": 0.11251262194256911, "grad_norm": 0.8859961781224096, "learning_rate": 5.963096718518854e-06, "loss": 0.0497, "step": 26965 }, { "epoch": 0.1125334846575594, "grad_norm": 0.7539713813865925, "learning_rate": 5.962543919645076e-06, "loss": 0.0541, "step": 26970 }, { "epoch": 0.11255434737254967, "grad_norm": 1.842629752373684, "learning_rate": 5.961991274481676e-06, "loss": 0.0459, "step": 26975 }, { "epoch": 0.11257521008753996, "grad_norm": 1.5275170284690613, "learning_rate": 5.96143878295743e-06, "loss": 0.0578, "step": 26980 }, { "epoch": 0.11259607280253023, "grad_norm": 0.7125414861597813, "learning_rate": 5.960886445001166e-06, "loss": 0.0483, "step": 26985 }, { "epoch": 0.11261693551752051, "grad_norm": 1.1406480132647696, "learning_rate": 5.960334260541754e-06, "loss": 0.0496, "step": 26990 }, { "epoch": 0.11263779823251079, "grad_norm": 0.9347051093747749, "learning_rate": 5.959782229508115e-06, "loss": 0.057, "step": 26995 }, { "epoch": 0.11265866094750106, "grad_norm": 1.3485604473651882, "learning_rate": 5.959230351829207e-06, "loss": 0.0484, "step": 27000 }, { "epoch": 0.11267952366249134, "grad_norm": 0.9362912304640495, "learning_rate": 5.958678627434043e-06, "loss": 0.0395, "step": 27005 }, { "epoch": 0.11270038637748162, "grad_norm": 1.1217667343197244, "learning_rate": 5.958127056251679e-06, "loss": 0.0429, "step": 27010 }, { "epoch": 0.1127212490924719, "grad_norm": 0.437678583283163, "learning_rate": 5.9575756382112156e-06, "loss": 0.0365, "step": 27015 }, { "epoch": 0.11274211180746217, "grad_norm": 1.0344420295185441, "learning_rate": 5.957024373241802e-06, "loss": 0.0521, "step": 27020 }, { "epoch": 0.11276297452245246, "grad_norm": 1.028004041841592, "learning_rate": 5.9564732612726285e-06, "loss": 0.0414, "step": 27025 }, { "epoch": 0.11278383723744273, "grad_norm": 1.5877798216472006, "learning_rate": 5.955922302232936e-06, "loss": 0.0464, "step": 27030 }, { "epoch": 0.11280469995243302, "grad_norm": 0.9728745999647335, "learning_rate": 5.95537149605201e-06, "loss": 0.0697, "step": 27035 }, { "epoch": 0.11282556266742329, "grad_norm": 0.8025774442990223, "learning_rate": 5.9548208426591815e-06, "loss": 0.044, "step": 27040 }, { "epoch": 0.11284642538241356, "grad_norm": 2.2492584243358467, "learning_rate": 5.954270341983827e-06, "loss": 0.0462, "step": 27045 }, { "epoch": 0.11286728809740385, "grad_norm": 0.9747826265586429, "learning_rate": 5.953719993955367e-06, "loss": 0.0403, "step": 27050 }, { "epoch": 0.11288815081239412, "grad_norm": 1.2148878856819194, "learning_rate": 5.953169798503272e-06, "loss": 0.0439, "step": 27055 }, { "epoch": 0.1129090135273844, "grad_norm": 1.2705765944257605, "learning_rate": 5.952619755557054e-06, "loss": 0.047, "step": 27060 }, { "epoch": 0.11292987624237467, "grad_norm": 1.2230344720705053, "learning_rate": 5.952069865046274e-06, "loss": 0.0615, "step": 27065 }, { "epoch": 0.11295073895736496, "grad_norm": 1.3265778600728602, "learning_rate": 5.951520126900534e-06, "loss": 0.0456, "step": 27070 }, { "epoch": 0.11297160167235523, "grad_norm": 1.5346325128197307, "learning_rate": 5.950970541049485e-06, "loss": 0.0284, "step": 27075 }, { "epoch": 0.11299246438734552, "grad_norm": 0.8417400766436559, "learning_rate": 5.950421107422824e-06, "loss": 0.037, "step": 27080 }, { "epoch": 0.11301332710233579, "grad_norm": 1.4921695391888177, "learning_rate": 5.949871825950291e-06, "loss": 0.0481, "step": 27085 }, { "epoch": 0.11303418981732606, "grad_norm": 1.08250583321083, "learning_rate": 5.949322696561673e-06, "loss": 0.0482, "step": 27090 }, { "epoch": 0.11305505253231635, "grad_norm": 0.919212356001868, "learning_rate": 5.9487737191868e-06, "loss": 0.0437, "step": 27095 }, { "epoch": 0.11307591524730662, "grad_norm": 1.0597925580382943, "learning_rate": 5.948224893755552e-06, "loss": 0.0463, "step": 27100 }, { "epoch": 0.1130967779622969, "grad_norm": 1.5791974417640433, "learning_rate": 5.947676220197849e-06, "loss": 0.0463, "step": 27105 }, { "epoch": 0.11311764067728718, "grad_norm": 0.8263443988949479, "learning_rate": 5.94712769844366e-06, "loss": 0.0358, "step": 27110 }, { "epoch": 0.11313850339227746, "grad_norm": 1.4147215387476701, "learning_rate": 5.946579328422998e-06, "loss": 0.0453, "step": 27115 }, { "epoch": 0.11315936610726773, "grad_norm": 1.5325056100489112, "learning_rate": 5.946031110065919e-06, "loss": 0.0521, "step": 27120 }, { "epoch": 0.11318022882225802, "grad_norm": 0.7212626013412327, "learning_rate": 5.94548304330253e-06, "loss": 0.0436, "step": 27125 }, { "epoch": 0.11320109153724829, "grad_norm": 1.142065419415482, "learning_rate": 5.944935128062975e-06, "loss": 0.0462, "step": 27130 }, { "epoch": 0.11322195425223856, "grad_norm": 1.3025346964809532, "learning_rate": 5.94438736427745e-06, "loss": 0.0455, "step": 27135 }, { "epoch": 0.11324281696722885, "grad_norm": 0.8691962180999111, "learning_rate": 5.943839751876193e-06, "loss": 0.0466, "step": 27140 }, { "epoch": 0.11326367968221912, "grad_norm": 1.000678845901397, "learning_rate": 5.9432922907894865e-06, "loss": 0.0415, "step": 27145 }, { "epoch": 0.1132845423972094, "grad_norm": 0.9695813586145765, "learning_rate": 5.942744980947659e-06, "loss": 0.0449, "step": 27150 }, { "epoch": 0.11330540511219968, "grad_norm": 1.063377552472584, "learning_rate": 5.942197822281084e-06, "loss": 0.0409, "step": 27155 }, { "epoch": 0.11332626782718996, "grad_norm": 0.9192779569907109, "learning_rate": 5.941650814720178e-06, "loss": 0.0422, "step": 27160 }, { "epoch": 0.11334713054218024, "grad_norm": 1.0007656392150917, "learning_rate": 5.941103958195405e-06, "loss": 0.0377, "step": 27165 }, { "epoch": 0.11336799325717052, "grad_norm": 0.7157744980969352, "learning_rate": 5.940557252637274e-06, "loss": 0.0462, "step": 27170 }, { "epoch": 0.11338885597216079, "grad_norm": 0.8737131636684583, "learning_rate": 5.940010697976335e-06, "loss": 0.0395, "step": 27175 }, { "epoch": 0.11340971868715106, "grad_norm": 0.6794472961713989, "learning_rate": 5.939464294143187e-06, "loss": 0.0349, "step": 27180 }, { "epoch": 0.11343058140214135, "grad_norm": 0.7561268950474279, "learning_rate": 5.93891804106847e-06, "loss": 0.0408, "step": 27185 }, { "epoch": 0.11345144411713162, "grad_norm": 1.7961636631290592, "learning_rate": 5.938371938682871e-06, "loss": 0.0495, "step": 27190 }, { "epoch": 0.11347230683212191, "grad_norm": 0.7407918051633274, "learning_rate": 5.937825986917122e-06, "loss": 0.0394, "step": 27195 }, { "epoch": 0.11349316954711218, "grad_norm": 4.067460682993647, "learning_rate": 5.9372801857019975e-06, "loss": 0.0536, "step": 27200 }, { "epoch": 0.11351403226210247, "grad_norm": 1.4308018491778884, "learning_rate": 5.936734534968318e-06, "loss": 0.054, "step": 27205 }, { "epoch": 0.11353489497709274, "grad_norm": 1.7503704251737542, "learning_rate": 5.936189034646948e-06, "loss": 0.0516, "step": 27210 }, { "epoch": 0.11355575769208302, "grad_norm": 1.1895036499499807, "learning_rate": 5.935643684668797e-06, "loss": 0.0439, "step": 27215 }, { "epoch": 0.1135766204070733, "grad_norm": 0.8873654346787312, "learning_rate": 5.935098484964819e-06, "loss": 0.053, "step": 27220 }, { "epoch": 0.11359748312206357, "grad_norm": 1.1574712233325606, "learning_rate": 5.934553435466009e-06, "loss": 0.0422, "step": 27225 }, { "epoch": 0.11361834583705385, "grad_norm": 1.1437773709090429, "learning_rate": 5.9340085361034125e-06, "loss": 0.0482, "step": 27230 }, { "epoch": 0.11363920855204412, "grad_norm": 1.0230508659179471, "learning_rate": 5.933463786808116e-06, "loss": 0.0641, "step": 27235 }, { "epoch": 0.11366007126703441, "grad_norm": 0.914391371217015, "learning_rate": 5.932919187511248e-06, "loss": 0.0385, "step": 27240 }, { "epoch": 0.11368093398202468, "grad_norm": 1.1286557460968127, "learning_rate": 5.932374738143984e-06, "loss": 0.0446, "step": 27245 }, { "epoch": 0.11370179669701497, "grad_norm": 0.9686530184043028, "learning_rate": 5.931830438637546e-06, "loss": 0.0481, "step": 27250 }, { "epoch": 0.11372265941200524, "grad_norm": 1.0696025593851106, "learning_rate": 5.931286288923194e-06, "loss": 0.0495, "step": 27255 }, { "epoch": 0.11374352212699552, "grad_norm": 1.706911586512449, "learning_rate": 5.930742288932238e-06, "loss": 0.0549, "step": 27260 }, { "epoch": 0.1137643848419858, "grad_norm": 1.456595099943734, "learning_rate": 5.9301984385960275e-06, "loss": 0.0421, "step": 27265 }, { "epoch": 0.11378524755697607, "grad_norm": 0.9435907890960769, "learning_rate": 5.92965473784596e-06, "loss": 0.0449, "step": 27270 }, { "epoch": 0.11380611027196635, "grad_norm": 1.720080633888572, "learning_rate": 5.929111186613476e-06, "loss": 0.0415, "step": 27275 }, { "epoch": 0.11382697298695663, "grad_norm": 1.0371072791519118, "learning_rate": 5.928567784830056e-06, "loss": 0.0445, "step": 27280 }, { "epoch": 0.11384783570194691, "grad_norm": 1.036379225210944, "learning_rate": 5.92802453242723e-06, "loss": 0.0374, "step": 27285 }, { "epoch": 0.11386869841693718, "grad_norm": 1.2827006180592635, "learning_rate": 5.92748142933657e-06, "loss": 0.0429, "step": 27290 }, { "epoch": 0.11388956113192747, "grad_norm": 1.3838864854301367, "learning_rate": 5.926938475489691e-06, "loss": 0.0509, "step": 27295 }, { "epoch": 0.11391042384691774, "grad_norm": 3.334632225460968, "learning_rate": 5.92639567081825e-06, "loss": 0.0385, "step": 27300 }, { "epoch": 0.11393128656190803, "grad_norm": 1.1866418351962242, "learning_rate": 5.925853015253954e-06, "loss": 0.0506, "step": 27305 }, { "epoch": 0.1139521492768983, "grad_norm": 0.832141004710738, "learning_rate": 5.925310508728548e-06, "loss": 0.0339, "step": 27310 }, { "epoch": 0.11397301199188857, "grad_norm": 1.5543467264427187, "learning_rate": 5.924768151173821e-06, "loss": 0.0446, "step": 27315 }, { "epoch": 0.11399387470687886, "grad_norm": 1.2619867799827746, "learning_rate": 5.92422594252161e-06, "loss": 0.0395, "step": 27320 }, { "epoch": 0.11401473742186913, "grad_norm": 0.8127785362927472, "learning_rate": 5.9236838827037915e-06, "loss": 0.0442, "step": 27325 }, { "epoch": 0.11403560013685941, "grad_norm": 1.382980916215266, "learning_rate": 5.9231419716522865e-06, "loss": 0.0387, "step": 27330 }, { "epoch": 0.11405646285184969, "grad_norm": 1.122638485955477, "learning_rate": 5.922600209299062e-06, "loss": 0.0354, "step": 27335 }, { "epoch": 0.11407732556683997, "grad_norm": 1.1129311408354756, "learning_rate": 5.922058595576126e-06, "loss": 0.0609, "step": 27340 }, { "epoch": 0.11409818828183024, "grad_norm": 1.44153918137804, "learning_rate": 5.92151713041553e-06, "loss": 0.0616, "step": 27345 }, { "epoch": 0.11411905099682053, "grad_norm": 1.1424538335676537, "learning_rate": 5.92097581374937e-06, "loss": 0.0533, "step": 27350 }, { "epoch": 0.1141399137118108, "grad_norm": 0.49234518347030176, "learning_rate": 5.920434645509786e-06, "loss": 0.0416, "step": 27355 }, { "epoch": 0.11416077642680107, "grad_norm": 1.2141118344534, "learning_rate": 5.91989362562896e-06, "loss": 0.0392, "step": 27360 }, { "epoch": 0.11418163914179136, "grad_norm": 0.5387304625468182, "learning_rate": 5.919352754039117e-06, "loss": 0.0449, "step": 27365 }, { "epoch": 0.11420250185678163, "grad_norm": 1.1357658728358564, "learning_rate": 5.918812030672529e-06, "loss": 0.0461, "step": 27370 }, { "epoch": 0.11422336457177192, "grad_norm": 3.131702044791217, "learning_rate": 5.918271455461505e-06, "loss": 0.0429, "step": 27375 }, { "epoch": 0.11424422728676219, "grad_norm": 1.5008885850240241, "learning_rate": 5.917731028338403e-06, "loss": 0.0407, "step": 27380 }, { "epoch": 0.11426509000175247, "grad_norm": 1.256799702123049, "learning_rate": 5.917190749235622e-06, "loss": 0.0397, "step": 27385 }, { "epoch": 0.11428595271674274, "grad_norm": 0.8537497698473168, "learning_rate": 5.916650618085604e-06, "loss": 0.0502, "step": 27390 }, { "epoch": 0.11430681543173303, "grad_norm": 0.8398469322466062, "learning_rate": 5.9161106348208345e-06, "loss": 0.0478, "step": 27395 }, { "epoch": 0.1143276781467233, "grad_norm": 1.2335014757169505, "learning_rate": 5.9155707993738424e-06, "loss": 0.0409, "step": 27400 }, { "epoch": 0.11434854086171357, "grad_norm": 1.9038174045060896, "learning_rate": 5.915031111677199e-06, "loss": 0.0602, "step": 27405 }, { "epoch": 0.11436940357670386, "grad_norm": 1.354862192836655, "learning_rate": 5.914491571663517e-06, "loss": 0.0547, "step": 27410 }, { "epoch": 0.11439026629169413, "grad_norm": 1.145985986007692, "learning_rate": 5.9139521792654575e-06, "loss": 0.0445, "step": 27415 }, { "epoch": 0.11441112900668442, "grad_norm": 0.9580572328880358, "learning_rate": 5.913412934415721e-06, "loss": 0.042, "step": 27420 }, { "epoch": 0.11443199172167469, "grad_norm": 1.0796863161608967, "learning_rate": 5.912873837047047e-06, "loss": 0.0314, "step": 27425 }, { "epoch": 0.11445285443666497, "grad_norm": 1.3983782983713178, "learning_rate": 5.912334887092227e-06, "loss": 0.042, "step": 27430 }, { "epoch": 0.11447371715165525, "grad_norm": 0.7010302385115795, "learning_rate": 5.911796084484088e-06, "loss": 0.0458, "step": 27435 }, { "epoch": 0.11449457986664553, "grad_norm": 0.9742528390637875, "learning_rate": 5.9112574291555035e-06, "loss": 0.0418, "step": 27440 }, { "epoch": 0.1145154425816358, "grad_norm": 0.9817363428574952, "learning_rate": 5.9107189210393865e-06, "loss": 0.0428, "step": 27445 }, { "epoch": 0.11453630529662608, "grad_norm": 1.1032798688704437, "learning_rate": 5.910180560068698e-06, "loss": 0.0386, "step": 27450 }, { "epoch": 0.11455716801161636, "grad_norm": 1.0064389823131648, "learning_rate": 5.9096423461764355e-06, "loss": 0.0435, "step": 27455 }, { "epoch": 0.11457803072660663, "grad_norm": 1.10854825667288, "learning_rate": 5.909104279295643e-06, "loss": 0.0459, "step": 27460 }, { "epoch": 0.11459889344159692, "grad_norm": 0.8057486697968756, "learning_rate": 5.908566359359409e-06, "loss": 0.0481, "step": 27465 }, { "epoch": 0.11461975615658719, "grad_norm": 1.8258094626356653, "learning_rate": 5.908028586300858e-06, "loss": 0.0476, "step": 27470 }, { "epoch": 0.11464061887157748, "grad_norm": 2.1193265189133377, "learning_rate": 5.907490960053164e-06, "loss": 0.0447, "step": 27475 }, { "epoch": 0.11466148158656775, "grad_norm": 1.1755375200585978, "learning_rate": 5.906953480549541e-06, "loss": 0.0531, "step": 27480 }, { "epoch": 0.11468234430155803, "grad_norm": 0.8626773179751492, "learning_rate": 5.906416147723243e-06, "loss": 0.0657, "step": 27485 }, { "epoch": 0.1147032070165483, "grad_norm": 1.602957346199925, "learning_rate": 5.905878961507571e-06, "loss": 0.0475, "step": 27490 }, { "epoch": 0.11472406973153858, "grad_norm": 1.3666543047368827, "learning_rate": 5.905341921835866e-06, "loss": 0.0436, "step": 27495 }, { "epoch": 0.11474493244652886, "grad_norm": 0.5640210114147761, "learning_rate": 5.9048050286415095e-06, "loss": 0.0371, "step": 27500 }, { "epoch": 0.11476579516151914, "grad_norm": 0.7563471035519806, "learning_rate": 5.904268281857929e-06, "loss": 0.0337, "step": 27505 }, { "epoch": 0.11478665787650942, "grad_norm": 1.0855775480084588, "learning_rate": 5.903731681418594e-06, "loss": 0.0307, "step": 27510 }, { "epoch": 0.11480752059149969, "grad_norm": 0.6038370873492661, "learning_rate": 5.903195227257014e-06, "loss": 0.0443, "step": 27515 }, { "epoch": 0.11482838330648998, "grad_norm": 0.5639547629166907, "learning_rate": 5.902658919306742e-06, "loss": 0.0468, "step": 27520 }, { "epoch": 0.11484924602148025, "grad_norm": 1.0398717937931274, "learning_rate": 5.902122757501373e-06, "loss": 0.0528, "step": 27525 }, { "epoch": 0.11487010873647054, "grad_norm": 0.9616066180917512, "learning_rate": 5.901586741774545e-06, "loss": 0.0392, "step": 27530 }, { "epoch": 0.11489097145146081, "grad_norm": 0.9765121774775374, "learning_rate": 5.901050872059938e-06, "loss": 0.0366, "step": 27535 }, { "epoch": 0.11491183416645108, "grad_norm": 0.9605071803033629, "learning_rate": 5.9005151482912734e-06, "loss": 0.0597, "step": 27540 }, { "epoch": 0.11493269688144137, "grad_norm": 0.832729156654235, "learning_rate": 5.899979570402314e-06, "loss": 0.0389, "step": 27545 }, { "epoch": 0.11495355959643164, "grad_norm": 1.3106232792824208, "learning_rate": 5.899444138326869e-06, "loss": 0.044, "step": 27550 }, { "epoch": 0.11497442231142192, "grad_norm": 1.0696023771913001, "learning_rate": 5.898908851998783e-06, "loss": 0.0402, "step": 27555 }, { "epoch": 0.1149952850264122, "grad_norm": 1.5613689081169175, "learning_rate": 5.898373711351947e-06, "loss": 0.0615, "step": 27560 }, { "epoch": 0.11501614774140248, "grad_norm": 1.160438687485013, "learning_rate": 5.897838716320294e-06, "loss": 0.0387, "step": 27565 }, { "epoch": 0.11503701045639275, "grad_norm": 1.0957905829063814, "learning_rate": 5.897303866837796e-06, "loss": 0.0495, "step": 27570 }, { "epoch": 0.11505787317138304, "grad_norm": 1.0019499830166985, "learning_rate": 5.89676916283847e-06, "loss": 0.0432, "step": 27575 }, { "epoch": 0.11507873588637331, "grad_norm": 0.949675470822912, "learning_rate": 5.896234604256374e-06, "loss": 0.0481, "step": 27580 }, { "epoch": 0.11509959860136358, "grad_norm": 0.5600834375195043, "learning_rate": 5.89570019102561e-06, "loss": 0.0633, "step": 27585 }, { "epoch": 0.11512046131635387, "grad_norm": 1.0889462726943462, "learning_rate": 5.895165923080314e-06, "loss": 0.0409, "step": 27590 }, { "epoch": 0.11514132403134414, "grad_norm": 1.4369947288259153, "learning_rate": 5.894631800354671e-06, "loss": 0.0487, "step": 27595 }, { "epoch": 0.11516218674633442, "grad_norm": 1.129641031114135, "learning_rate": 5.894097822782909e-06, "loss": 0.0367, "step": 27600 }, { "epoch": 0.1151830494613247, "grad_norm": 0.6620475179358966, "learning_rate": 5.893563990299291e-06, "loss": 0.0427, "step": 27605 }, { "epoch": 0.11520391217631498, "grad_norm": 1.4452943259561242, "learning_rate": 5.8930303028381266e-06, "loss": 0.0383, "step": 27610 }, { "epoch": 0.11522477489130525, "grad_norm": 0.8133432696776737, "learning_rate": 5.892496760333766e-06, "loss": 0.0417, "step": 27615 }, { "epoch": 0.11524563760629554, "grad_norm": 0.7149522026604286, "learning_rate": 5.891963362720601e-06, "loss": 0.0416, "step": 27620 }, { "epoch": 0.11526650032128581, "grad_norm": 0.888664160727332, "learning_rate": 5.891430109933063e-06, "loss": 0.0599, "step": 27625 }, { "epoch": 0.11528736303627608, "grad_norm": 1.0426012409471381, "learning_rate": 5.8908970019056275e-06, "loss": 0.0451, "step": 27630 }, { "epoch": 0.11530822575126637, "grad_norm": 0.8754168924572869, "learning_rate": 5.89036403857281e-06, "loss": 0.0509, "step": 27635 }, { "epoch": 0.11532908846625664, "grad_norm": 1.5659096560682413, "learning_rate": 5.889831219869169e-06, "loss": 0.0577, "step": 27640 }, { "epoch": 0.11534995118124693, "grad_norm": 1.022506538484748, "learning_rate": 5.889298545729303e-06, "loss": 0.0513, "step": 27645 }, { "epoch": 0.1153708138962372, "grad_norm": 0.5909754136204213, "learning_rate": 5.8887660160878515e-06, "loss": 0.0525, "step": 27650 }, { "epoch": 0.11539167661122748, "grad_norm": 0.7943622867397967, "learning_rate": 5.888233630879499e-06, "loss": 0.0458, "step": 27655 }, { "epoch": 0.11541253932621776, "grad_norm": 0.9903957994031438, "learning_rate": 5.887701390038965e-06, "loss": 0.0555, "step": 27660 }, { "epoch": 0.11543340204120804, "grad_norm": 0.6140015632495577, "learning_rate": 5.887169293501015e-06, "loss": 0.0461, "step": 27665 }, { "epoch": 0.11545426475619831, "grad_norm": 0.5474899426199217, "learning_rate": 5.886637341200457e-06, "loss": 0.0396, "step": 27670 }, { "epoch": 0.11547512747118859, "grad_norm": 0.8035320565361627, "learning_rate": 5.886105533072134e-06, "loss": 0.0476, "step": 27675 }, { "epoch": 0.11549599018617887, "grad_norm": 1.17112159306249, "learning_rate": 5.885573869050937e-06, "loss": 0.0361, "step": 27680 }, { "epoch": 0.11551685290116914, "grad_norm": 1.10370394618903, "learning_rate": 5.8850423490717934e-06, "loss": 0.0405, "step": 27685 }, { "epoch": 0.11553771561615943, "grad_norm": 1.5190915421718292, "learning_rate": 5.8845109730696744e-06, "loss": 0.042, "step": 27690 }, { "epoch": 0.1155585783311497, "grad_norm": 1.2591889876736868, "learning_rate": 5.883979740979591e-06, "loss": 0.0483, "step": 27695 }, { "epoch": 0.11557944104613999, "grad_norm": 0.7852722556893534, "learning_rate": 5.883448652736596e-06, "loss": 0.0403, "step": 27700 }, { "epoch": 0.11560030376113026, "grad_norm": 0.8397105065546416, "learning_rate": 5.882917708275782e-06, "loss": 0.0434, "step": 27705 }, { "epoch": 0.11562116647612054, "grad_norm": 1.1215252733395824, "learning_rate": 5.8823869075322855e-06, "loss": 0.0446, "step": 27710 }, { "epoch": 0.11564202919111082, "grad_norm": 1.170550526173117, "learning_rate": 5.881856250441281e-06, "loss": 0.0422, "step": 27715 }, { "epoch": 0.11566289190610109, "grad_norm": 1.2547333466298443, "learning_rate": 5.881325736937985e-06, "loss": 0.0458, "step": 27720 }, { "epoch": 0.11568375462109137, "grad_norm": 0.7400208106369889, "learning_rate": 5.880795366957652e-06, "loss": 0.0418, "step": 27725 }, { "epoch": 0.11570461733608164, "grad_norm": 1.1181810680010624, "learning_rate": 5.880265140435585e-06, "loss": 0.039, "step": 27730 }, { "epoch": 0.11572548005107193, "grad_norm": 0.9663930087916721, "learning_rate": 5.879735057307119e-06, "loss": 0.0539, "step": 27735 }, { "epoch": 0.1157463427660622, "grad_norm": 1.029461154977608, "learning_rate": 5.8792051175076355e-06, "loss": 0.0438, "step": 27740 }, { "epoch": 0.11576720548105249, "grad_norm": 0.8035938448324407, "learning_rate": 5.878675320972554e-06, "loss": 0.0356, "step": 27745 }, { "epoch": 0.11578806819604276, "grad_norm": 1.0899370732375822, "learning_rate": 5.878145667637338e-06, "loss": 0.0517, "step": 27750 }, { "epoch": 0.11580893091103305, "grad_norm": 0.8542592478055313, "learning_rate": 5.877616157437487e-06, "loss": 0.0392, "step": 27755 }, { "epoch": 0.11582979362602332, "grad_norm": 1.2931485811480259, "learning_rate": 5.877086790308546e-06, "loss": 0.047, "step": 27760 }, { "epoch": 0.11585065634101359, "grad_norm": 1.0656376169081612, "learning_rate": 5.876557566186095e-06, "loss": 0.0505, "step": 27765 }, { "epoch": 0.11587151905600387, "grad_norm": 1.006648326387958, "learning_rate": 5.87602848500576e-06, "loss": 0.0409, "step": 27770 }, { "epoch": 0.11589238177099415, "grad_norm": 1.1899632258674133, "learning_rate": 5.875499546703205e-06, "loss": 0.049, "step": 27775 }, { "epoch": 0.11591324448598443, "grad_norm": 1.4669911174144843, "learning_rate": 5.874970751214137e-06, "loss": 0.0444, "step": 27780 }, { "epoch": 0.1159341072009747, "grad_norm": 1.17890561950488, "learning_rate": 5.874442098474297e-06, "loss": 0.0469, "step": 27785 }, { "epoch": 0.11595496991596499, "grad_norm": 2.0169491452031636, "learning_rate": 5.873913588419474e-06, "loss": 0.043, "step": 27790 }, { "epoch": 0.11597583263095526, "grad_norm": 0.9347227266248885, "learning_rate": 5.873385220985494e-06, "loss": 0.0491, "step": 27795 }, { "epoch": 0.11599669534594555, "grad_norm": 0.926619071859626, "learning_rate": 5.8728569961082224e-06, "loss": 0.054, "step": 27800 }, { "epoch": 0.11601755806093582, "grad_norm": 1.2124512401905865, "learning_rate": 5.872328913723568e-06, "loss": 0.0446, "step": 27805 }, { "epoch": 0.11603842077592609, "grad_norm": 1.0722157041069837, "learning_rate": 5.871800973767477e-06, "loss": 0.0511, "step": 27810 }, { "epoch": 0.11605928349091638, "grad_norm": 1.1005943092873633, "learning_rate": 5.871273176175939e-06, "loss": 0.0392, "step": 27815 }, { "epoch": 0.11608014620590665, "grad_norm": 0.702357109299833, "learning_rate": 5.8707455208849795e-06, "loss": 0.0489, "step": 27820 }, { "epoch": 0.11610100892089693, "grad_norm": 1.187876279937452, "learning_rate": 5.870218007830669e-06, "loss": 0.0292, "step": 27825 }, { "epoch": 0.1161218716358872, "grad_norm": 1.4240770887731726, "learning_rate": 5.869690636949113e-06, "loss": 0.0402, "step": 27830 }, { "epoch": 0.11614273435087749, "grad_norm": 1.6731512578551606, "learning_rate": 5.869163408176464e-06, "loss": 0.0566, "step": 27835 }, { "epoch": 0.11616359706586776, "grad_norm": 0.9515067134930694, "learning_rate": 5.868636321448907e-06, "loss": 0.0366, "step": 27840 }, { "epoch": 0.11618445978085803, "grad_norm": 1.2088573357079166, "learning_rate": 5.868109376702674e-06, "loss": 0.0484, "step": 27845 }, { "epoch": 0.11620532249584832, "grad_norm": 1.2431414365640798, "learning_rate": 5.867582573874032e-06, "loss": 0.0342, "step": 27850 }, { "epoch": 0.11622618521083859, "grad_norm": 0.7039924220085955, "learning_rate": 5.86705591289929e-06, "loss": 0.0556, "step": 27855 }, { "epoch": 0.11624704792582888, "grad_norm": 1.5555941899230379, "learning_rate": 5.866529393714798e-06, "loss": 0.0435, "step": 27860 }, { "epoch": 0.11626791064081915, "grad_norm": 0.6321715692325321, "learning_rate": 5.866003016256945e-06, "loss": 0.0521, "step": 27865 }, { "epoch": 0.11628877335580944, "grad_norm": 1.2949263131523363, "learning_rate": 5.865476780462159e-06, "loss": 0.0561, "step": 27870 }, { "epoch": 0.11630963607079971, "grad_norm": 1.0202872926451616, "learning_rate": 5.864950686266909e-06, "loss": 0.0447, "step": 27875 }, { "epoch": 0.11633049878578999, "grad_norm": 0.7879209041700809, "learning_rate": 5.864424733607705e-06, "loss": 0.0374, "step": 27880 }, { "epoch": 0.11635136150078026, "grad_norm": 0.8641735510351879, "learning_rate": 5.863898922421093e-06, "loss": 0.0403, "step": 27885 }, { "epoch": 0.11637222421577054, "grad_norm": 1.5602782007346028, "learning_rate": 5.863373252643664e-06, "loss": 0.0526, "step": 27890 }, { "epoch": 0.11639308693076082, "grad_norm": 1.0952634658695624, "learning_rate": 5.8628477242120444e-06, "loss": 0.0505, "step": 27895 }, { "epoch": 0.1164139496457511, "grad_norm": 0.9635549553848365, "learning_rate": 5.862322337062904e-06, "loss": 0.053, "step": 27900 }, { "epoch": 0.11643481236074138, "grad_norm": 0.8752070727612418, "learning_rate": 5.861797091132947e-06, "loss": 0.0356, "step": 27905 }, { "epoch": 0.11645567507573165, "grad_norm": 0.839240548737514, "learning_rate": 5.861271986358925e-06, "loss": 0.0479, "step": 27910 }, { "epoch": 0.11647653779072194, "grad_norm": 1.5354301566761612, "learning_rate": 5.860747022677622e-06, "loss": 0.0468, "step": 27915 }, { "epoch": 0.11649740050571221, "grad_norm": 1.4102780673129474, "learning_rate": 5.860222200025866e-06, "loss": 0.051, "step": 27920 }, { "epoch": 0.1165182632207025, "grad_norm": 1.1777569624156718, "learning_rate": 5.859697518340522e-06, "loss": 0.0453, "step": 27925 }, { "epoch": 0.11653912593569277, "grad_norm": 1.1261743766271568, "learning_rate": 5.859172977558497e-06, "loss": 0.0399, "step": 27930 }, { "epoch": 0.11655998865068304, "grad_norm": 1.231992679054653, "learning_rate": 5.858648577616735e-06, "loss": 0.0434, "step": 27935 }, { "epoch": 0.11658085136567332, "grad_norm": 0.8319165740601813, "learning_rate": 5.858124318452223e-06, "loss": 0.0373, "step": 27940 }, { "epoch": 0.1166017140806636, "grad_norm": 1.3711157040326545, "learning_rate": 5.857600200001982e-06, "loss": 0.0396, "step": 27945 }, { "epoch": 0.11662257679565388, "grad_norm": 0.5038884909681396, "learning_rate": 5.85707622220308e-06, "loss": 0.0334, "step": 27950 }, { "epoch": 0.11664343951064415, "grad_norm": 0.70384395481974, "learning_rate": 5.856552384992617e-06, "loss": 0.0416, "step": 27955 }, { "epoch": 0.11666430222563444, "grad_norm": 0.9259324230934216, "learning_rate": 5.856028688307736e-06, "loss": 0.0416, "step": 27960 }, { "epoch": 0.11668516494062471, "grad_norm": 0.9757534724970033, "learning_rate": 5.855505132085616e-06, "loss": 0.0479, "step": 27965 }, { "epoch": 0.116706027655615, "grad_norm": 0.8347751561724492, "learning_rate": 5.854981716263484e-06, "loss": 0.0496, "step": 27970 }, { "epoch": 0.11672689037060527, "grad_norm": 1.723637981328071, "learning_rate": 5.854458440778596e-06, "loss": 0.0448, "step": 27975 }, { "epoch": 0.11674775308559554, "grad_norm": 1.5253089033029208, "learning_rate": 5.853935305568254e-06, "loss": 0.0488, "step": 27980 }, { "epoch": 0.11676861580058583, "grad_norm": 1.0985265898415835, "learning_rate": 5.8534123105697945e-06, "loss": 0.0452, "step": 27985 }, { "epoch": 0.1167894785155761, "grad_norm": 1.342280035819917, "learning_rate": 5.852889455720597e-06, "loss": 0.0401, "step": 27990 }, { "epoch": 0.11681034123056638, "grad_norm": 0.8157157277252393, "learning_rate": 5.85236674095808e-06, "loss": 0.0426, "step": 27995 }, { "epoch": 0.11683120394555666, "grad_norm": 0.9244894336067904, "learning_rate": 5.8518441662196966e-06, "loss": 0.0408, "step": 28000 }, { "epoch": 0.11685206666054694, "grad_norm": 0.781206883390059, "learning_rate": 5.851321731442945e-06, "loss": 0.0444, "step": 28005 }, { "epoch": 0.11687292937553721, "grad_norm": 0.5949223524554705, "learning_rate": 5.850799436565356e-06, "loss": 0.0505, "step": 28010 }, { "epoch": 0.1168937920905275, "grad_norm": 1.0231795984558743, "learning_rate": 5.850277281524508e-06, "loss": 0.0547, "step": 28015 }, { "epoch": 0.11691465480551777, "grad_norm": 1.8807122748016232, "learning_rate": 5.84975526625801e-06, "loss": 0.0501, "step": 28020 }, { "epoch": 0.11693551752050804, "grad_norm": 0.8470821423795433, "learning_rate": 5.849233390703513e-06, "loss": 0.0482, "step": 28025 }, { "epoch": 0.11695638023549833, "grad_norm": 1.2929193599585487, "learning_rate": 5.848711654798711e-06, "loss": 0.0429, "step": 28030 }, { "epoch": 0.1169772429504886, "grad_norm": 1.0463790717234847, "learning_rate": 5.84819005848133e-06, "loss": 0.0455, "step": 28035 }, { "epoch": 0.11699810566547889, "grad_norm": 2.5466649643620474, "learning_rate": 5.847668601689138e-06, "loss": 0.0527, "step": 28040 }, { "epoch": 0.11701896838046916, "grad_norm": 0.8946673068654248, "learning_rate": 5.847147284359944e-06, "loss": 0.061, "step": 28045 }, { "epoch": 0.11703983109545944, "grad_norm": 2.379178780039033, "learning_rate": 5.846626106431592e-06, "loss": 0.0693, "step": 28050 }, { "epoch": 0.11706069381044971, "grad_norm": 1.1959411273405445, "learning_rate": 5.8461050678419665e-06, "loss": 0.045, "step": 28055 }, { "epoch": 0.11708155652544, "grad_norm": 1.1208940234522735, "learning_rate": 5.845584168528993e-06, "loss": 0.0455, "step": 28060 }, { "epoch": 0.11710241924043027, "grad_norm": 0.9705659722978749, "learning_rate": 5.84506340843063e-06, "loss": 0.0407, "step": 28065 }, { "epoch": 0.11712328195542054, "grad_norm": 1.3452080987684603, "learning_rate": 5.844542787484881e-06, "loss": 0.0522, "step": 28070 }, { "epoch": 0.11714414467041083, "grad_norm": 1.0263203131203877, "learning_rate": 5.844022305629784e-06, "loss": 0.0433, "step": 28075 }, { "epoch": 0.1171650073854011, "grad_norm": 1.1251193759297238, "learning_rate": 5.843501962803415e-06, "loss": 0.0494, "step": 28080 }, { "epoch": 0.11718587010039139, "grad_norm": 1.1299040537839755, "learning_rate": 5.842981758943895e-06, "loss": 0.0563, "step": 28085 }, { "epoch": 0.11720673281538166, "grad_norm": 1.0964854041103504, "learning_rate": 5.842461693989376e-06, "loss": 0.0398, "step": 28090 }, { "epoch": 0.11722759553037194, "grad_norm": 0.8273134894725658, "learning_rate": 5.8419417678780525e-06, "loss": 0.0451, "step": 28095 }, { "epoch": 0.11724845824536222, "grad_norm": 1.0157179571830093, "learning_rate": 5.841421980548155e-06, "loss": 0.0505, "step": 28100 }, { "epoch": 0.1172693209603525, "grad_norm": 0.8431699254874208, "learning_rate": 5.840902331937956e-06, "loss": 0.0367, "step": 28105 }, { "epoch": 0.11729018367534277, "grad_norm": 1.0302638292661668, "learning_rate": 5.840382821985765e-06, "loss": 0.0394, "step": 28110 }, { "epoch": 0.11731104639033305, "grad_norm": 1.1618259062307275, "learning_rate": 5.8398634506299265e-06, "loss": 0.0344, "step": 28115 }, { "epoch": 0.11733190910532333, "grad_norm": 1.0534918186710278, "learning_rate": 5.839344217808829e-06, "loss": 0.0497, "step": 28120 }, { "epoch": 0.1173527718203136, "grad_norm": 1.1104412289253973, "learning_rate": 5.838825123460895e-06, "loss": 0.0466, "step": 28125 }, { "epoch": 0.11737363453530389, "grad_norm": 1.6880973354695967, "learning_rate": 5.838306167524587e-06, "loss": 0.0425, "step": 28130 }, { "epoch": 0.11739449725029416, "grad_norm": 0.5371139829030688, "learning_rate": 5.837787349938407e-06, "loss": 0.0403, "step": 28135 }, { "epoch": 0.11741535996528445, "grad_norm": 0.9130594410644691, "learning_rate": 5.837268670640891e-06, "loss": 0.0456, "step": 28140 }, { "epoch": 0.11743622268027472, "grad_norm": 0.8712078942157778, "learning_rate": 5.836750129570619e-06, "loss": 0.0387, "step": 28145 }, { "epoch": 0.117457085395265, "grad_norm": 0.8301590028237847, "learning_rate": 5.836231726666206e-06, "loss": 0.0385, "step": 28150 }, { "epoch": 0.11747794811025528, "grad_norm": 0.721969911655987, "learning_rate": 5.835713461866305e-06, "loss": 0.0422, "step": 28155 }, { "epoch": 0.11749881082524555, "grad_norm": 0.8869180002019701, "learning_rate": 5.835195335109605e-06, "loss": 0.0439, "step": 28160 }, { "epoch": 0.11751967354023583, "grad_norm": 1.1554584135334443, "learning_rate": 5.8346773463348414e-06, "loss": 0.0468, "step": 28165 }, { "epoch": 0.1175405362552261, "grad_norm": 0.9500439175100179, "learning_rate": 5.834159495480777e-06, "loss": 0.0511, "step": 28170 }, { "epoch": 0.11756139897021639, "grad_norm": 0.9640280594306972, "learning_rate": 5.8336417824862194e-06, "loss": 0.0452, "step": 28175 }, { "epoch": 0.11758226168520666, "grad_norm": 0.9325750581567833, "learning_rate": 5.833124207290013e-06, "loss": 0.0439, "step": 28180 }, { "epoch": 0.11760312440019695, "grad_norm": 0.5731428083110361, "learning_rate": 5.832606769831039e-06, "loss": 0.0428, "step": 28185 }, { "epoch": 0.11762398711518722, "grad_norm": 0.7812971900502234, "learning_rate": 5.832089470048215e-06, "loss": 0.0412, "step": 28190 }, { "epoch": 0.1176448498301775, "grad_norm": 1.1611157645236512, "learning_rate": 5.831572307880503e-06, "loss": 0.0515, "step": 28195 }, { "epoch": 0.11766571254516778, "grad_norm": 1.2057915819036225, "learning_rate": 5.8310552832668955e-06, "loss": 0.0413, "step": 28200 }, { "epoch": 0.11768657526015805, "grad_norm": 1.1008392978453927, "learning_rate": 5.830538396146427e-06, "loss": 0.0377, "step": 28205 }, { "epoch": 0.11770743797514834, "grad_norm": 1.5073052231448738, "learning_rate": 5.830021646458167e-06, "loss": 0.0456, "step": 28210 }, { "epoch": 0.11772830069013861, "grad_norm": 1.1969148002606131, "learning_rate": 5.829505034141228e-06, "loss": 0.04, "step": 28215 }, { "epoch": 0.11774916340512889, "grad_norm": 1.6156319164753161, "learning_rate": 5.828988559134752e-06, "loss": 0.0357, "step": 28220 }, { "epoch": 0.11777002612011916, "grad_norm": 1.1636837362020196, "learning_rate": 5.8284722213779276e-06, "loss": 0.0461, "step": 28225 }, { "epoch": 0.11779088883510945, "grad_norm": 1.0628141519552765, "learning_rate": 5.827956020809975e-06, "loss": 0.0441, "step": 28230 }, { "epoch": 0.11781175155009972, "grad_norm": 0.7009805612914599, "learning_rate": 5.8274399573701546e-06, "loss": 0.0434, "step": 28235 }, { "epoch": 0.11783261426509001, "grad_norm": 1.0776533807916855, "learning_rate": 5.826924030997765e-06, "loss": 0.0403, "step": 28240 }, { "epoch": 0.11785347698008028, "grad_norm": 0.9531585069808237, "learning_rate": 5.826408241632138e-06, "loss": 0.0455, "step": 28245 }, { "epoch": 0.11787433969507055, "grad_norm": 1.2313735994035486, "learning_rate": 5.82589258921265e-06, "loss": 0.1892, "step": 28250 }, { "epoch": 0.11789520241006084, "grad_norm": 0.7810871080705726, "learning_rate": 5.82537707367871e-06, "loss": 0.0393, "step": 28255 }, { "epoch": 0.11791606512505111, "grad_norm": 1.145006327953935, "learning_rate": 5.824861694969763e-06, "loss": 0.0485, "step": 28260 }, { "epoch": 0.1179369278400414, "grad_norm": 1.1316920733528049, "learning_rate": 5.8243464530253e-06, "loss": 0.0406, "step": 28265 }, { "epoch": 0.11795779055503167, "grad_norm": 1.253764847320548, "learning_rate": 5.82383134778484e-06, "loss": 0.0355, "step": 28270 }, { "epoch": 0.11797865327002195, "grad_norm": 0.846941096470527, "learning_rate": 5.823316379187942e-06, "loss": 0.0429, "step": 28275 }, { "epoch": 0.11799951598501222, "grad_norm": 1.7418201373429685, "learning_rate": 5.822801547174205e-06, "loss": 0.0441, "step": 28280 }, { "epoch": 0.11802037870000251, "grad_norm": 0.8354142650638037, "learning_rate": 5.822286851683267e-06, "loss": 0.0396, "step": 28285 }, { "epoch": 0.11804124141499278, "grad_norm": 2.0697552311273646, "learning_rate": 5.821772292654795e-06, "loss": 0.0769, "step": 28290 }, { "epoch": 0.11806210412998305, "grad_norm": 0.9242302714620794, "learning_rate": 5.821257870028503e-06, "loss": 0.0433, "step": 28295 }, { "epoch": 0.11808296684497334, "grad_norm": 2.124858288279773, "learning_rate": 5.820743583744135e-06, "loss": 0.0525, "step": 28300 }, { "epoch": 0.11810382955996361, "grad_norm": 0.8166609604300394, "learning_rate": 5.820229433741477e-06, "loss": 0.0422, "step": 28305 }, { "epoch": 0.1181246922749539, "grad_norm": 0.8766774624386943, "learning_rate": 5.819715419960348e-06, "loss": 0.0374, "step": 28310 }, { "epoch": 0.11814555498994417, "grad_norm": 0.9036617668199447, "learning_rate": 5.819201542340609e-06, "loss": 0.0387, "step": 28315 }, { "epoch": 0.11816641770493445, "grad_norm": 0.7056390583466141, "learning_rate": 5.818687800822154e-06, "loss": 0.0373, "step": 28320 }, { "epoch": 0.11818728041992473, "grad_norm": 1.6142470604634422, "learning_rate": 5.818174195344919e-06, "loss": 0.0495, "step": 28325 }, { "epoch": 0.11820814313491501, "grad_norm": 1.1235858820432034, "learning_rate": 5.81766072584887e-06, "loss": 0.0557, "step": 28330 }, { "epoch": 0.11822900584990528, "grad_norm": 0.9084317372488515, "learning_rate": 5.8171473922740166e-06, "loss": 0.0428, "step": 28335 }, { "epoch": 0.11824986856489555, "grad_norm": 0.7600268276198447, "learning_rate": 5.816634194560402e-06, "loss": 0.0545, "step": 28340 }, { "epoch": 0.11827073127988584, "grad_norm": 1.1244667410038105, "learning_rate": 5.816121132648107e-06, "loss": 0.0364, "step": 28345 }, { "epoch": 0.11829159399487611, "grad_norm": 1.940672759723032, "learning_rate": 5.815608206477252e-06, "loss": 0.0568, "step": 28350 }, { "epoch": 0.1183124567098664, "grad_norm": 1.322150962949984, "learning_rate": 5.815095415987988e-06, "loss": 0.0417, "step": 28355 }, { "epoch": 0.11833331942485667, "grad_norm": 1.221698215738294, "learning_rate": 5.814582761120511e-06, "loss": 0.047, "step": 28360 }, { "epoch": 0.11835418213984696, "grad_norm": 0.7464334528632577, "learning_rate": 5.814070241815046e-06, "loss": 0.0352, "step": 28365 }, { "epoch": 0.11837504485483723, "grad_norm": 0.8908880433212555, "learning_rate": 5.8135578580118635e-06, "loss": 0.0383, "step": 28370 }, { "epoch": 0.11839590756982751, "grad_norm": 0.8770175426125414, "learning_rate": 5.813045609651262e-06, "loss": 0.0422, "step": 28375 }, { "epoch": 0.11841677028481778, "grad_norm": 1.105498317261855, "learning_rate": 5.812533496673584e-06, "loss": 0.0441, "step": 28380 }, { "epoch": 0.11843763299980806, "grad_norm": 1.2059303789009002, "learning_rate": 5.812021519019203e-06, "loss": 0.0378, "step": 28385 }, { "epoch": 0.11845849571479834, "grad_norm": 0.7419937350438308, "learning_rate": 5.811509676628534e-06, "loss": 0.04, "step": 28390 }, { "epoch": 0.11847935842978861, "grad_norm": 1.236809672400282, "learning_rate": 5.810997969442025e-06, "loss": 0.0514, "step": 28395 }, { "epoch": 0.1185002211447789, "grad_norm": 1.0631035729499825, "learning_rate": 5.810486397400163e-06, "loss": 0.0457, "step": 28400 }, { "epoch": 0.11852108385976917, "grad_norm": 0.6259950168251466, "learning_rate": 5.809974960443474e-06, "loss": 0.0491, "step": 28405 }, { "epoch": 0.11854194657475946, "grad_norm": 0.7912941880474629, "learning_rate": 5.809463658512514e-06, "loss": 0.0327, "step": 28410 }, { "epoch": 0.11856280928974973, "grad_norm": 0.6993289570904173, "learning_rate": 5.80895249154788e-06, "loss": 0.0393, "step": 28415 }, { "epoch": 0.11858367200474002, "grad_norm": 0.9034117522778976, "learning_rate": 5.808441459490206e-06, "loss": 0.048, "step": 28420 }, { "epoch": 0.11860453471973029, "grad_norm": 1.3085862912421868, "learning_rate": 5.807930562280162e-06, "loss": 0.055, "step": 28425 }, { "epoch": 0.11862539743472056, "grad_norm": 1.4309741390280617, "learning_rate": 5.80741979985845e-06, "loss": 0.0452, "step": 28430 }, { "epoch": 0.11864626014971084, "grad_norm": 0.7077705150691588, "learning_rate": 5.806909172165818e-06, "loss": 0.0412, "step": 28435 }, { "epoch": 0.11866712286470112, "grad_norm": 0.8879447045914244, "learning_rate": 5.806398679143042e-06, "loss": 0.0304, "step": 28440 }, { "epoch": 0.1186879855796914, "grad_norm": 1.4634145558460243, "learning_rate": 5.805888320730936e-06, "loss": 0.061, "step": 28445 }, { "epoch": 0.11870884829468167, "grad_norm": 1.197783842814706, "learning_rate": 5.805378096870356e-06, "loss": 0.0404, "step": 28450 }, { "epoch": 0.11872971100967196, "grad_norm": 0.9469988910858188, "learning_rate": 5.804868007502186e-06, "loss": 0.0697, "step": 28455 }, { "epoch": 0.11875057372466223, "grad_norm": 0.5879770888132898, "learning_rate": 5.804358052567352e-06, "loss": 0.04, "step": 28460 }, { "epoch": 0.11877143643965252, "grad_norm": 1.5473449175785097, "learning_rate": 5.803848232006816e-06, "loss": 0.0348, "step": 28465 }, { "epoch": 0.11879229915464279, "grad_norm": 1.1684951615060324, "learning_rate": 5.803338545761573e-06, "loss": 0.038, "step": 28470 }, { "epoch": 0.11881316186963306, "grad_norm": 1.14613728988202, "learning_rate": 5.8028289937726586e-06, "loss": 0.0413, "step": 28475 }, { "epoch": 0.11883402458462335, "grad_norm": 1.0547923176677008, "learning_rate": 5.802319575981141e-06, "loss": 0.0367, "step": 28480 }, { "epoch": 0.11885488729961362, "grad_norm": 0.6908423280617243, "learning_rate": 5.801810292328128e-06, "loss": 0.0427, "step": 28485 }, { "epoch": 0.1188757500146039, "grad_norm": 1.2786205933959354, "learning_rate": 5.801301142754758e-06, "loss": 0.0427, "step": 28490 }, { "epoch": 0.11889661272959418, "grad_norm": 1.513972112376771, "learning_rate": 5.800792127202212e-06, "loss": 0.0454, "step": 28495 }, { "epoch": 0.11891747544458446, "grad_norm": 1.1296649993939216, "learning_rate": 5.800283245611705e-06, "loss": 0.0584, "step": 28500 }, { "epoch": 0.11893833815957473, "grad_norm": 0.6990006562825076, "learning_rate": 5.799774497924483e-06, "loss": 0.0343, "step": 28505 }, { "epoch": 0.11895920087456502, "grad_norm": 1.437274930122164, "learning_rate": 5.799265884081839e-06, "loss": 0.0439, "step": 28510 }, { "epoch": 0.11898006358955529, "grad_norm": 0.7954859747690729, "learning_rate": 5.7987574040250895e-06, "loss": 0.0428, "step": 28515 }, { "epoch": 0.11900092630454556, "grad_norm": 0.8723068491666032, "learning_rate": 5.7982490576955955e-06, "loss": 0.0448, "step": 28520 }, { "epoch": 0.11902178901953585, "grad_norm": 1.6456051347541913, "learning_rate": 5.7977408450347515e-06, "loss": 0.0416, "step": 28525 }, { "epoch": 0.11904265173452612, "grad_norm": 1.0470769095853105, "learning_rate": 5.7972327659839875e-06, "loss": 0.0405, "step": 28530 }, { "epoch": 0.1190635144495164, "grad_norm": 0.9332817433286008, "learning_rate": 5.796724820484771e-06, "loss": 0.0327, "step": 28535 }, { "epoch": 0.11908437716450668, "grad_norm": 0.8457273379448019, "learning_rate": 5.796217008478603e-06, "loss": 0.0472, "step": 28540 }, { "epoch": 0.11910523987949696, "grad_norm": 1.4140670128765775, "learning_rate": 5.795709329907022e-06, "loss": 0.0417, "step": 28545 }, { "epoch": 0.11912610259448723, "grad_norm": 1.0639818648780812, "learning_rate": 5.795201784711602e-06, "loss": 0.0385, "step": 28550 }, { "epoch": 0.11914696530947752, "grad_norm": 1.8054379359738646, "learning_rate": 5.794694372833953e-06, "loss": 0.0549, "step": 28555 }, { "epoch": 0.11916782802446779, "grad_norm": 0.9522915050876667, "learning_rate": 5.794187094215719e-06, "loss": 0.0421, "step": 28560 }, { "epoch": 0.11918869073945806, "grad_norm": 0.6674816506518819, "learning_rate": 5.793679948798585e-06, "loss": 0.0398, "step": 28565 }, { "epoch": 0.11920955345444835, "grad_norm": 0.6342393414545359, "learning_rate": 5.793172936524264e-06, "loss": 0.0379, "step": 28570 }, { "epoch": 0.11923041616943862, "grad_norm": 0.5624200117284884, "learning_rate": 5.792666057334511e-06, "loss": 0.0296, "step": 28575 }, { "epoch": 0.11925127888442891, "grad_norm": 1.4388639884906542, "learning_rate": 5.792159311171113e-06, "loss": 0.0492, "step": 28580 }, { "epoch": 0.11927214159941918, "grad_norm": 1.278936269189978, "learning_rate": 5.791652697975894e-06, "loss": 0.0467, "step": 28585 }, { "epoch": 0.11929300431440946, "grad_norm": 0.8461535786416691, "learning_rate": 5.791146217690715e-06, "loss": 0.0521, "step": 28590 }, { "epoch": 0.11931386702939974, "grad_norm": 0.9210525763762308, "learning_rate": 5.790639870257472e-06, "loss": 0.0392, "step": 28595 }, { "epoch": 0.11933472974439002, "grad_norm": 1.5456279434115048, "learning_rate": 5.790133655618092e-06, "loss": 0.0387, "step": 28600 }, { "epoch": 0.1193555924593803, "grad_norm": 1.307411456102405, "learning_rate": 5.789627573714545e-06, "loss": 0.0526, "step": 28605 }, { "epoch": 0.11937645517437057, "grad_norm": 1.0424430012748958, "learning_rate": 5.78912162448883e-06, "loss": 0.0408, "step": 28610 }, { "epoch": 0.11939731788936085, "grad_norm": 1.1867313631130607, "learning_rate": 5.788615807882987e-06, "loss": 0.0402, "step": 28615 }, { "epoch": 0.11941818060435112, "grad_norm": 1.1963395148918108, "learning_rate": 5.788110123839087e-06, "loss": 0.0372, "step": 28620 }, { "epoch": 0.11943904331934141, "grad_norm": 0.9786881755948903, "learning_rate": 5.787604572299239e-06, "loss": 0.0357, "step": 28625 }, { "epoch": 0.11945990603433168, "grad_norm": 1.3436494248047797, "learning_rate": 5.787099153205586e-06, "loss": 0.0383, "step": 28630 }, { "epoch": 0.11948076874932197, "grad_norm": 1.9272161018938072, "learning_rate": 5.786593866500306e-06, "loss": 0.0508, "step": 28635 }, { "epoch": 0.11950163146431224, "grad_norm": 0.6852137921607256, "learning_rate": 5.786088712125615e-06, "loss": 0.0433, "step": 28640 }, { "epoch": 0.11952249417930252, "grad_norm": 1.6624365126739562, "learning_rate": 5.785583690023762e-06, "loss": 0.0579, "step": 28645 }, { "epoch": 0.1195433568942928, "grad_norm": 0.8578046504316509, "learning_rate": 5.7850788001370305e-06, "loss": 0.038, "step": 28650 }, { "epoch": 0.11956421960928307, "grad_norm": 0.6915625576533164, "learning_rate": 5.7845740424077415e-06, "loss": 0.0409, "step": 28655 }, { "epoch": 0.11958508232427335, "grad_norm": 1.2302602285014876, "learning_rate": 5.784069416778251e-06, "loss": 0.0377, "step": 28660 }, { "epoch": 0.11960594503926363, "grad_norm": 0.6639137726899116, "learning_rate": 5.78356492319095e-06, "loss": 0.0378, "step": 28665 }, { "epoch": 0.11962680775425391, "grad_norm": 1.094493821871956, "learning_rate": 5.78306056158826e-06, "loss": 0.0423, "step": 28670 }, { "epoch": 0.11964767046924418, "grad_norm": 1.0330640930765989, "learning_rate": 5.782556331912646e-06, "loss": 0.0449, "step": 28675 }, { "epoch": 0.11966853318423447, "grad_norm": 1.008543910584885, "learning_rate": 5.7820522341066044e-06, "loss": 0.0347, "step": 28680 }, { "epoch": 0.11968939589922474, "grad_norm": 0.843047310163362, "learning_rate": 5.781548268112664e-06, "loss": 0.0502, "step": 28685 }, { "epoch": 0.11971025861421503, "grad_norm": 1.4036480659867192, "learning_rate": 5.781044433873389e-06, "loss": 0.0435, "step": 28690 }, { "epoch": 0.1197311213292053, "grad_norm": 0.6475827334825975, "learning_rate": 5.780540731331384e-06, "loss": 0.0417, "step": 28695 }, { "epoch": 0.11975198404419557, "grad_norm": 1.3216031182836394, "learning_rate": 5.780037160429285e-06, "loss": 0.0545, "step": 28700 }, { "epoch": 0.11977284675918586, "grad_norm": 1.33457747099565, "learning_rate": 5.779533721109759e-06, "loss": 0.04, "step": 28705 }, { "epoch": 0.11979370947417613, "grad_norm": 0.7016082231719486, "learning_rate": 5.779030413315517e-06, "loss": 0.0287, "step": 28710 }, { "epoch": 0.11981457218916641, "grad_norm": 1.0030403574543967, "learning_rate": 5.778527236989297e-06, "loss": 0.0418, "step": 28715 }, { "epoch": 0.11983543490415668, "grad_norm": 1.2706222762498338, "learning_rate": 5.778024192073876e-06, "loss": 0.0375, "step": 28720 }, { "epoch": 0.11985629761914697, "grad_norm": 2.1688235329170786, "learning_rate": 5.777521278512062e-06, "loss": 0.046, "step": 28725 }, { "epoch": 0.11987716033413724, "grad_norm": 1.4207164089633708, "learning_rate": 5.777018496246704e-06, "loss": 0.0513, "step": 28730 }, { "epoch": 0.11989802304912753, "grad_norm": 1.9156105747478085, "learning_rate": 5.776515845220683e-06, "loss": 0.036, "step": 28735 }, { "epoch": 0.1199188857641178, "grad_norm": 1.2088861511618703, "learning_rate": 5.77601332537691e-06, "loss": 0.0425, "step": 28740 }, { "epoch": 0.11993974847910807, "grad_norm": 0.930027297633612, "learning_rate": 5.7755109366583384e-06, "loss": 0.0374, "step": 28745 }, { "epoch": 0.11996061119409836, "grad_norm": 0.7329578335814465, "learning_rate": 5.77500867900795e-06, "loss": 0.0356, "step": 28750 }, { "epoch": 0.11998147390908863, "grad_norm": 1.3380020173286098, "learning_rate": 5.774506552368765e-06, "loss": 0.0442, "step": 28755 }, { "epoch": 0.12000233662407891, "grad_norm": 0.9007828131082295, "learning_rate": 5.774004556683839e-06, "loss": 0.051, "step": 28760 }, { "epoch": 0.12002319933906919, "grad_norm": 1.30702243906847, "learning_rate": 5.773502691896259e-06, "loss": 0.0571, "step": 28765 }, { "epoch": 0.12004406205405947, "grad_norm": 1.942563957691824, "learning_rate": 5.7730009579491474e-06, "loss": 0.0514, "step": 28770 }, { "epoch": 0.12006492476904974, "grad_norm": 0.9153553295314152, "learning_rate": 5.772499354785665e-06, "loss": 0.0446, "step": 28775 }, { "epoch": 0.12008578748404003, "grad_norm": 1.1987450418682621, "learning_rate": 5.771997882349001e-06, "loss": 0.0454, "step": 28780 }, { "epoch": 0.1201066501990303, "grad_norm": 1.4365636641233448, "learning_rate": 5.771496540582383e-06, "loss": 0.0552, "step": 28785 }, { "epoch": 0.12012751291402057, "grad_norm": 1.1749631595842185, "learning_rate": 5.770995329429075e-06, "loss": 0.0441, "step": 28790 }, { "epoch": 0.12014837562901086, "grad_norm": 1.3377423553965908, "learning_rate": 5.770494248832371e-06, "loss": 0.042, "step": 28795 }, { "epoch": 0.12016923834400113, "grad_norm": 0.8155471983921507, "learning_rate": 5.7699932987356e-06, "loss": 0.0335, "step": 28800 }, { "epoch": 0.12019010105899142, "grad_norm": 1.7276376969651435, "learning_rate": 5.769492479082129e-06, "loss": 0.0557, "step": 28805 }, { "epoch": 0.12021096377398169, "grad_norm": 1.3500372899055968, "learning_rate": 5.768991789815357e-06, "loss": 0.0468, "step": 28810 }, { "epoch": 0.12023182648897197, "grad_norm": 1.6035240581187837, "learning_rate": 5.768491230878717e-06, "loss": 0.0515, "step": 28815 }, { "epoch": 0.12025268920396225, "grad_norm": 1.0846464852286863, "learning_rate": 5.767990802215676e-06, "loss": 0.0363, "step": 28820 }, { "epoch": 0.12027355191895253, "grad_norm": 0.8298211342566127, "learning_rate": 5.767490503769738e-06, "loss": 0.0454, "step": 28825 }, { "epoch": 0.1202944146339428, "grad_norm": 0.7939618985757406, "learning_rate": 5.7669903354844395e-06, "loss": 0.0536, "step": 28830 }, { "epoch": 0.12031527734893308, "grad_norm": 1.3447565484277468, "learning_rate": 5.76649029730335e-06, "loss": 0.0474, "step": 28835 }, { "epoch": 0.12033614006392336, "grad_norm": 1.1004056476342592, "learning_rate": 5.765990389170074e-06, "loss": 0.0448, "step": 28840 }, { "epoch": 0.12035700277891363, "grad_norm": 1.2788834373787406, "learning_rate": 5.765490611028254e-06, "loss": 0.053, "step": 28845 }, { "epoch": 0.12037786549390392, "grad_norm": 0.90919650259397, "learning_rate": 5.764990962821562e-06, "loss": 0.047, "step": 28850 }, { "epoch": 0.12039872820889419, "grad_norm": 0.8909514637847306, "learning_rate": 5.764491444493703e-06, "loss": 0.0617, "step": 28855 }, { "epoch": 0.12041959092388448, "grad_norm": 0.941419453528359, "learning_rate": 5.763992055988421e-06, "loss": 0.0406, "step": 28860 }, { "epoch": 0.12044045363887475, "grad_norm": 0.5407444723475465, "learning_rate": 5.7634927972494935e-06, "loss": 0.0374, "step": 28865 }, { "epoch": 0.12046131635386503, "grad_norm": 0.7632956870517933, "learning_rate": 5.762993668220729e-06, "loss": 0.0453, "step": 28870 }, { "epoch": 0.1204821790688553, "grad_norm": 0.5600097252731304, "learning_rate": 5.762494668845969e-06, "loss": 0.0302, "step": 28875 }, { "epoch": 0.12050304178384558, "grad_norm": 0.650298966095726, "learning_rate": 5.761995799069096e-06, "loss": 0.0469, "step": 28880 }, { "epoch": 0.12052390449883586, "grad_norm": 1.9298921664734716, "learning_rate": 5.761497058834019e-06, "loss": 0.049, "step": 28885 }, { "epoch": 0.12054476721382613, "grad_norm": 0.9206853844050731, "learning_rate": 5.760998448084686e-06, "loss": 0.0453, "step": 28890 }, { "epoch": 0.12056562992881642, "grad_norm": 0.96642714985152, "learning_rate": 5.760499966765076e-06, "loss": 0.0354, "step": 28895 }, { "epoch": 0.12058649264380669, "grad_norm": 1.614638330348862, "learning_rate": 5.760001614819203e-06, "loss": 0.0487, "step": 28900 }, { "epoch": 0.12060735535879698, "grad_norm": 1.058701673105197, "learning_rate": 5.759503392191114e-06, "loss": 0.0446, "step": 28905 }, { "epoch": 0.12062821807378725, "grad_norm": 0.7011483029621765, "learning_rate": 5.759005298824892e-06, "loss": 0.0423, "step": 28910 }, { "epoch": 0.12064908078877754, "grad_norm": 0.7818319415010677, "learning_rate": 5.758507334664653e-06, "loss": 0.0544, "step": 28915 }, { "epoch": 0.12066994350376781, "grad_norm": 1.2281478208047754, "learning_rate": 5.758009499654544e-06, "loss": 0.0616, "step": 28920 }, { "epoch": 0.12069080621875808, "grad_norm": 0.8652489914723462, "learning_rate": 5.75751179373875e-06, "loss": 0.045, "step": 28925 }, { "epoch": 0.12071166893374836, "grad_norm": 0.8401021826765595, "learning_rate": 5.7570142168614876e-06, "loss": 0.0499, "step": 28930 }, { "epoch": 0.12073253164873864, "grad_norm": 0.7385427828636137, "learning_rate": 5.756516768967007e-06, "loss": 0.0306, "step": 28935 }, { "epoch": 0.12075339436372892, "grad_norm": 1.1310787108241962, "learning_rate": 5.756019449999595e-06, "loss": 0.0569, "step": 28940 }, { "epoch": 0.1207742570787192, "grad_norm": 1.0284992388594114, "learning_rate": 5.755522259903565e-06, "loss": 0.0623, "step": 28945 }, { "epoch": 0.12079511979370948, "grad_norm": 0.9513669040806699, "learning_rate": 5.7550251986232715e-06, "loss": 0.048, "step": 28950 }, { "epoch": 0.12081598250869975, "grad_norm": 0.9140833276286622, "learning_rate": 5.754528266103102e-06, "loss": 0.0421, "step": 28955 }, { "epoch": 0.12083684522369004, "grad_norm": 1.7982393635998855, "learning_rate": 5.754031462287471e-06, "loss": 0.0353, "step": 28960 }, { "epoch": 0.12085770793868031, "grad_norm": 0.7921856324376229, "learning_rate": 5.7535347871208345e-06, "loss": 0.0513, "step": 28965 }, { "epoch": 0.12087857065367058, "grad_norm": 0.7656328539474281, "learning_rate": 5.753038240547677e-06, "loss": 0.0536, "step": 28970 }, { "epoch": 0.12089943336866087, "grad_norm": 0.9704621560001239, "learning_rate": 5.752541822512518e-06, "loss": 0.0446, "step": 28975 }, { "epoch": 0.12092029608365114, "grad_norm": 1.144113902368402, "learning_rate": 5.7520455329599125e-06, "loss": 0.0561, "step": 28980 }, { "epoch": 0.12094115879864142, "grad_norm": 0.8985195939194781, "learning_rate": 5.751549371834444e-06, "loss": 0.0362, "step": 28985 }, { "epoch": 0.1209620215136317, "grad_norm": 0.7767953399464256, "learning_rate": 5.751053339080735e-06, "loss": 0.0412, "step": 28990 }, { "epoch": 0.12098288422862198, "grad_norm": 2.109590243758327, "learning_rate": 5.750557434643438e-06, "loss": 0.0591, "step": 28995 }, { "epoch": 0.12100374694361225, "grad_norm": 1.129353977787029, "learning_rate": 5.750061658467242e-06, "loss": 0.038, "step": 29000 }, { "epoch": 0.12102460965860254, "grad_norm": 1.4388613019259537, "learning_rate": 5.7495660104968646e-06, "loss": 0.0408, "step": 29005 }, { "epoch": 0.12104547237359281, "grad_norm": 1.0227245036595187, "learning_rate": 5.749070490677059e-06, "loss": 0.0485, "step": 29010 }, { "epoch": 0.12106633508858308, "grad_norm": 1.4449410019426818, "learning_rate": 5.748575098952614e-06, "loss": 0.0417, "step": 29015 }, { "epoch": 0.12108719780357337, "grad_norm": 0.6737225382801194, "learning_rate": 5.748079835268351e-06, "loss": 0.048, "step": 29020 }, { "epoch": 0.12110806051856364, "grad_norm": 1.0159519176965788, "learning_rate": 5.747584699569121e-06, "loss": 0.0483, "step": 29025 }, { "epoch": 0.12112892323355393, "grad_norm": 1.231529043091947, "learning_rate": 5.747089691799812e-06, "loss": 0.0518, "step": 29030 }, { "epoch": 0.1211497859485442, "grad_norm": 2.310059693416853, "learning_rate": 5.746594811905343e-06, "loss": 0.0455, "step": 29035 }, { "epoch": 0.12117064866353448, "grad_norm": 1.2332898171135531, "learning_rate": 5.7461000598306685e-06, "loss": 0.04, "step": 29040 }, { "epoch": 0.12119151137852475, "grad_norm": 1.6011137427042783, "learning_rate": 5.745605435520774e-06, "loss": 0.0533, "step": 29045 }, { "epoch": 0.12121237409351503, "grad_norm": 1.8138082107611602, "learning_rate": 5.745110938920679e-06, "loss": 0.0405, "step": 29050 }, { "epoch": 0.12123323680850531, "grad_norm": 1.3285771348922453, "learning_rate": 5.7446165699754375e-06, "loss": 0.0439, "step": 29055 }, { "epoch": 0.12125409952349558, "grad_norm": 0.4086260771231395, "learning_rate": 5.744122328630133e-06, "loss": 0.0393, "step": 29060 }, { "epoch": 0.12127496223848587, "grad_norm": 1.3014614453008682, "learning_rate": 5.743628214829885e-06, "loss": 0.0515, "step": 29065 }, { "epoch": 0.12129582495347614, "grad_norm": 1.352248289326538, "learning_rate": 5.743134228519846e-06, "loss": 0.0459, "step": 29070 }, { "epoch": 0.12131668766846643, "grad_norm": 0.9718741812001488, "learning_rate": 5.742640369645201e-06, "loss": 0.0492, "step": 29075 }, { "epoch": 0.1213375503834567, "grad_norm": 1.4992250649298215, "learning_rate": 5.742146638151167e-06, "loss": 0.0509, "step": 29080 }, { "epoch": 0.12135841309844698, "grad_norm": 1.0257105361880705, "learning_rate": 5.741653033982996e-06, "loss": 0.047, "step": 29085 }, { "epoch": 0.12137927581343726, "grad_norm": 1.232745302542122, "learning_rate": 5.741159557085969e-06, "loss": 0.0427, "step": 29090 }, { "epoch": 0.12140013852842753, "grad_norm": 1.1779129685374585, "learning_rate": 5.740666207405405e-06, "loss": 0.0441, "step": 29095 }, { "epoch": 0.12142100124341781, "grad_norm": 0.863747161526065, "learning_rate": 5.7401729848866546e-06, "loss": 0.0451, "step": 29100 }, { "epoch": 0.12144186395840809, "grad_norm": 1.708574302212933, "learning_rate": 5.739679889475097e-06, "loss": 0.0464, "step": 29105 }, { "epoch": 0.12146272667339837, "grad_norm": 1.1027640833208223, "learning_rate": 5.73918692111615e-06, "loss": 0.0418, "step": 29110 }, { "epoch": 0.12148358938838864, "grad_norm": 1.173677486924194, "learning_rate": 5.73869407975526e-06, "loss": 0.0383, "step": 29115 }, { "epoch": 0.12150445210337893, "grad_norm": 1.1420981077664334, "learning_rate": 5.7382013653379095e-06, "loss": 0.0439, "step": 29120 }, { "epoch": 0.1215253148183692, "grad_norm": 1.408461587280151, "learning_rate": 5.7377087778096094e-06, "loss": 0.0463, "step": 29125 }, { "epoch": 0.12154617753335949, "grad_norm": 1.1008277691791462, "learning_rate": 5.737216317115909e-06, "loss": 0.0419, "step": 29130 }, { "epoch": 0.12156704024834976, "grad_norm": 0.9332276557457998, "learning_rate": 5.736723983202386e-06, "loss": 0.0354, "step": 29135 }, { "epoch": 0.12158790296334003, "grad_norm": 1.3020468303812083, "learning_rate": 5.7362317760146514e-06, "loss": 0.0373, "step": 29140 }, { "epoch": 0.12160876567833032, "grad_norm": 2.855102706444573, "learning_rate": 5.7357396954983515e-06, "loss": 0.0387, "step": 29145 }, { "epoch": 0.12162962839332059, "grad_norm": 1.3320895777840747, "learning_rate": 5.7352477415991615e-06, "loss": 0.046, "step": 29150 }, { "epoch": 0.12165049110831087, "grad_norm": 0.9538143059924753, "learning_rate": 5.734755914262791e-06, "loss": 0.0436, "step": 29155 }, { "epoch": 0.12167135382330115, "grad_norm": 0.6640114791521453, "learning_rate": 5.734264213434984e-06, "loss": 0.0346, "step": 29160 }, { "epoch": 0.12169221653829143, "grad_norm": 0.8946964969848574, "learning_rate": 5.733772639061512e-06, "loss": 0.04, "step": 29165 }, { "epoch": 0.1217130792532817, "grad_norm": 1.5611697347064113, "learning_rate": 5.733281191088187e-06, "loss": 0.0384, "step": 29170 }, { "epoch": 0.12173394196827199, "grad_norm": 0.7733957143292748, "learning_rate": 5.732789869460845e-06, "loss": 0.045, "step": 29175 }, { "epoch": 0.12175480468326226, "grad_norm": 0.7977728254656812, "learning_rate": 5.732298674125359e-06, "loss": 0.0453, "step": 29180 }, { "epoch": 0.12177566739825253, "grad_norm": 1.1325559781202486, "learning_rate": 5.731807605027635e-06, "loss": 0.0516, "step": 29185 }, { "epoch": 0.12179653011324282, "grad_norm": 0.8717634847799737, "learning_rate": 5.731316662113609e-06, "loss": 0.0365, "step": 29190 }, { "epoch": 0.12181739282823309, "grad_norm": 0.6525007142905068, "learning_rate": 5.730825845329252e-06, "loss": 0.0481, "step": 29195 }, { "epoch": 0.12183825554322338, "grad_norm": 0.9518095372054144, "learning_rate": 5.7303351546205646e-06, "loss": 0.0371, "step": 29200 }, { "epoch": 0.12185911825821365, "grad_norm": 1.0034195083828794, "learning_rate": 5.729844589933581e-06, "loss": 0.0316, "step": 29205 }, { "epoch": 0.12187998097320393, "grad_norm": 1.1439278867124798, "learning_rate": 5.729354151214369e-06, "loss": 0.0466, "step": 29210 }, { "epoch": 0.1219008436881942, "grad_norm": 0.8547729920169612, "learning_rate": 5.728863838409027e-06, "loss": 0.0571, "step": 29215 }, { "epoch": 0.12192170640318449, "grad_norm": 0.752982816468492, "learning_rate": 5.728373651463686e-06, "loss": 0.0325, "step": 29220 }, { "epoch": 0.12194256911817476, "grad_norm": 0.9884679080484974, "learning_rate": 5.727883590324509e-06, "loss": 0.0458, "step": 29225 }, { "epoch": 0.12196343183316503, "grad_norm": 1.003526564870422, "learning_rate": 5.727393654937694e-06, "loss": 0.0538, "step": 29230 }, { "epoch": 0.12198429454815532, "grad_norm": 0.9151233893983763, "learning_rate": 5.726903845249466e-06, "loss": 0.0578, "step": 29235 }, { "epoch": 0.12200515726314559, "grad_norm": 1.648251725695529, "learning_rate": 5.726414161206087e-06, "loss": 0.0508, "step": 29240 }, { "epoch": 0.12202601997813588, "grad_norm": 1.251598151754408, "learning_rate": 5.725924602753851e-06, "loss": 0.0506, "step": 29245 }, { "epoch": 0.12204688269312615, "grad_norm": 0.9779461237460628, "learning_rate": 5.725435169839079e-06, "loss": 0.043, "step": 29250 }, { "epoch": 0.12206774540811643, "grad_norm": 0.9146763677799669, "learning_rate": 5.7249458624081265e-06, "loss": 0.0392, "step": 29255 }, { "epoch": 0.1220886081231067, "grad_norm": 0.675868280513907, "learning_rate": 5.724456680407388e-06, "loss": 0.0426, "step": 29260 }, { "epoch": 0.12210947083809699, "grad_norm": 0.7628620415799379, "learning_rate": 5.723967623783279e-06, "loss": 0.0392, "step": 29265 }, { "epoch": 0.12213033355308726, "grad_norm": 0.7329425431233462, "learning_rate": 5.723478692482254e-06, "loss": 0.0341, "step": 29270 }, { "epoch": 0.12215119626807754, "grad_norm": 1.1465367070132957, "learning_rate": 5.722989886450799e-06, "loss": 0.0633, "step": 29275 }, { "epoch": 0.12217205898306782, "grad_norm": 0.8917849127789242, "learning_rate": 5.722501205635428e-06, "loss": 0.0488, "step": 29280 }, { "epoch": 0.1221929216980581, "grad_norm": 0.6455967129640297, "learning_rate": 5.722012649982692e-06, "loss": 0.0334, "step": 29285 }, { "epoch": 0.12221378441304838, "grad_norm": 1.5013364551856458, "learning_rate": 5.7215242194391706e-06, "loss": 0.0486, "step": 29290 }, { "epoch": 0.12223464712803865, "grad_norm": 1.0356478110739258, "learning_rate": 5.721035913951477e-06, "loss": 0.045, "step": 29295 }, { "epoch": 0.12225550984302894, "grad_norm": 0.9403137820185292, "learning_rate": 5.720547733466255e-06, "loss": 0.0394, "step": 29300 }, { "epoch": 0.12227637255801921, "grad_norm": 0.7353759842225934, "learning_rate": 5.720059677930181e-06, "loss": 0.034, "step": 29305 }, { "epoch": 0.1222972352730095, "grad_norm": 0.9429753336136215, "learning_rate": 5.719571747289963e-06, "loss": 0.0617, "step": 29310 }, { "epoch": 0.12231809798799977, "grad_norm": 0.8296898990146587, "learning_rate": 5.719083941492341e-06, "loss": 0.0485, "step": 29315 }, { "epoch": 0.12233896070299004, "grad_norm": 1.3310680637243686, "learning_rate": 5.718596260484088e-06, "loss": 0.0441, "step": 29320 }, { "epoch": 0.12235982341798032, "grad_norm": 1.3873311690650934, "learning_rate": 5.718108704212006e-06, "loss": 0.0449, "step": 29325 }, { "epoch": 0.1223806861329706, "grad_norm": 1.0732927063713007, "learning_rate": 5.717621272622932e-06, "loss": 0.0498, "step": 29330 }, { "epoch": 0.12240154884796088, "grad_norm": 1.0979139431242853, "learning_rate": 5.71713396566373e-06, "loss": 0.0471, "step": 29335 }, { "epoch": 0.12242241156295115, "grad_norm": 1.640965027283892, "learning_rate": 5.716646783281301e-06, "loss": 0.0589, "step": 29340 }, { "epoch": 0.12244327427794144, "grad_norm": 1.5384465976393629, "learning_rate": 5.7161597254225745e-06, "loss": 0.0471, "step": 29345 }, { "epoch": 0.12246413699293171, "grad_norm": 0.578692942891741, "learning_rate": 5.715672792034514e-06, "loss": 0.0313, "step": 29350 }, { "epoch": 0.122484999707922, "grad_norm": 1.264041821448578, "learning_rate": 5.71518598306411e-06, "loss": 0.0543, "step": 29355 }, { "epoch": 0.12250586242291227, "grad_norm": 1.3117307657088515, "learning_rate": 5.714699298458391e-06, "loss": 0.052, "step": 29360 }, { "epoch": 0.12252672513790254, "grad_norm": 0.6899641207519751, "learning_rate": 5.7142127381644105e-06, "loss": 0.0413, "step": 29365 }, { "epoch": 0.12254758785289283, "grad_norm": 1.4113206563251612, "learning_rate": 5.71372630212926e-06, "loss": 0.0496, "step": 29370 }, { "epoch": 0.1225684505678831, "grad_norm": 0.9163457879262993, "learning_rate": 5.713239990300059e-06, "loss": 0.039, "step": 29375 }, { "epoch": 0.12258931328287338, "grad_norm": 1.2748354429749584, "learning_rate": 5.712753802623956e-06, "loss": 0.0465, "step": 29380 }, { "epoch": 0.12261017599786365, "grad_norm": 1.6366378197917117, "learning_rate": 5.712267739048137e-06, "loss": 0.056, "step": 29385 }, { "epoch": 0.12263103871285394, "grad_norm": 1.0714119260949295, "learning_rate": 5.711781799519815e-06, "loss": 0.0459, "step": 29390 }, { "epoch": 0.12265190142784421, "grad_norm": 0.8527499865666001, "learning_rate": 5.711295983986234e-06, "loss": 0.0401, "step": 29395 }, { "epoch": 0.1226727641428345, "grad_norm": 0.9389819985294892, "learning_rate": 5.710810292394675e-06, "loss": 0.0366, "step": 29400 }, { "epoch": 0.12269362685782477, "grad_norm": 0.9791543121008462, "learning_rate": 5.710324724692443e-06, "loss": 0.048, "step": 29405 }, { "epoch": 0.12271448957281504, "grad_norm": 1.11762246102217, "learning_rate": 5.709839280826879e-06, "loss": 0.0418, "step": 29410 }, { "epoch": 0.12273535228780533, "grad_norm": 0.797176856312409, "learning_rate": 5.709353960745355e-06, "loss": 0.0494, "step": 29415 }, { "epoch": 0.1227562150027956, "grad_norm": 0.6219078810570464, "learning_rate": 5.708868764395273e-06, "loss": 0.0488, "step": 29420 }, { "epoch": 0.12277707771778588, "grad_norm": 0.773618887748572, "learning_rate": 5.7083836917240654e-06, "loss": 0.0495, "step": 29425 }, { "epoch": 0.12279794043277616, "grad_norm": 0.7920915705564598, "learning_rate": 5.7078987426792e-06, "loss": 0.0478, "step": 29430 }, { "epoch": 0.12281880314776644, "grad_norm": 1.0545123848089692, "learning_rate": 5.7074139172081695e-06, "loss": 0.0412, "step": 29435 }, { "epoch": 0.12283966586275671, "grad_norm": 1.0033354691945813, "learning_rate": 5.706929215258505e-06, "loss": 0.0474, "step": 29440 }, { "epoch": 0.122860528577747, "grad_norm": 0.8620053807860253, "learning_rate": 5.706444636777764e-06, "loss": 0.0305, "step": 29445 }, { "epoch": 0.12288139129273727, "grad_norm": 1.0407113289475014, "learning_rate": 5.705960181713535e-06, "loss": 0.0497, "step": 29450 }, { "epoch": 0.12290225400772754, "grad_norm": 2.1253517954438435, "learning_rate": 5.705475850013439e-06, "loss": 0.0631, "step": 29455 }, { "epoch": 0.12292311672271783, "grad_norm": 0.7714497869211713, "learning_rate": 5.704991641625131e-06, "loss": 0.0394, "step": 29460 }, { "epoch": 0.1229439794377081, "grad_norm": 1.3544818458833774, "learning_rate": 5.70450755649629e-06, "loss": 0.0497, "step": 29465 }, { "epoch": 0.12296484215269839, "grad_norm": 1.6888599687880137, "learning_rate": 5.704023594574634e-06, "loss": 0.0406, "step": 29470 }, { "epoch": 0.12298570486768866, "grad_norm": 1.0237947351538426, "learning_rate": 5.703539755807905e-06, "loss": 0.0367, "step": 29475 }, { "epoch": 0.12300656758267894, "grad_norm": 0.9291918040687436, "learning_rate": 5.7030560401438815e-06, "loss": 0.034, "step": 29480 }, { "epoch": 0.12302743029766922, "grad_norm": 1.3298780241693116, "learning_rate": 5.7025724475303705e-06, "loss": 0.0512, "step": 29485 }, { "epoch": 0.1230482930126595, "grad_norm": 0.898400979757149, "learning_rate": 5.702088977915211e-06, "loss": 0.0403, "step": 29490 }, { "epoch": 0.12306915572764977, "grad_norm": 1.4577938144744882, "learning_rate": 5.70160563124627e-06, "loss": 0.057, "step": 29495 }, { "epoch": 0.12309001844264005, "grad_norm": 0.9947391921010232, "learning_rate": 5.701122407471449e-06, "loss": 0.0341, "step": 29500 }, { "epoch": 0.12311088115763033, "grad_norm": 0.6951152307100226, "learning_rate": 5.70063930653868e-06, "loss": 0.0354, "step": 29505 }, { "epoch": 0.1231317438726206, "grad_norm": 1.1381691645184397, "learning_rate": 5.700156328395924e-06, "loss": 0.0441, "step": 29510 }, { "epoch": 0.12315260658761089, "grad_norm": 0.9292140992160419, "learning_rate": 5.699673472991173e-06, "loss": 0.0483, "step": 29515 }, { "epoch": 0.12317346930260116, "grad_norm": 0.7675015242882309, "learning_rate": 5.699190740272452e-06, "loss": 0.0358, "step": 29520 }, { "epoch": 0.12319433201759145, "grad_norm": 1.0916857576014491, "learning_rate": 5.698708130187814e-06, "loss": 0.0431, "step": 29525 }, { "epoch": 0.12321519473258172, "grad_norm": 0.8448744586503848, "learning_rate": 5.698225642685347e-06, "loss": 0.0394, "step": 29530 }, { "epoch": 0.123236057447572, "grad_norm": 1.1268970178168618, "learning_rate": 5.6977432777131635e-06, "loss": 0.0507, "step": 29535 }, { "epoch": 0.12325692016256228, "grad_norm": 0.7468254959178438, "learning_rate": 5.697261035219413e-06, "loss": 0.0332, "step": 29540 }, { "epoch": 0.12327778287755255, "grad_norm": 1.9332389115869126, "learning_rate": 5.69677891515227e-06, "loss": 0.0589, "step": 29545 }, { "epoch": 0.12329864559254283, "grad_norm": 0.9154225026375421, "learning_rate": 5.696296917459947e-06, "loss": 0.0331, "step": 29550 }, { "epoch": 0.1233195083075331, "grad_norm": 0.5677802575532428, "learning_rate": 5.695815042090679e-06, "loss": 0.0329, "step": 29555 }, { "epoch": 0.12334037102252339, "grad_norm": 0.7335384056574649, "learning_rate": 5.695333288992737e-06, "loss": 0.0348, "step": 29560 }, { "epoch": 0.12336123373751366, "grad_norm": 1.1988649559006541, "learning_rate": 5.694851658114421e-06, "loss": 0.0463, "step": 29565 }, { "epoch": 0.12338209645250395, "grad_norm": 0.8914759415116014, "learning_rate": 5.694370149404062e-06, "loss": 0.037, "step": 29570 }, { "epoch": 0.12340295916749422, "grad_norm": 1.1621776238483144, "learning_rate": 5.69388876281002e-06, "loss": 0.0459, "step": 29575 }, { "epoch": 0.1234238218824845, "grad_norm": 0.9354304098576305, "learning_rate": 5.6934074982806875e-06, "loss": 0.0469, "step": 29580 }, { "epoch": 0.12344468459747478, "grad_norm": 1.23906100751001, "learning_rate": 5.692926355764487e-06, "loss": 0.0516, "step": 29585 }, { "epoch": 0.12346554731246505, "grad_norm": 1.3948254002012574, "learning_rate": 5.692445335209872e-06, "loss": 0.0544, "step": 29590 }, { "epoch": 0.12348641002745533, "grad_norm": 1.139737502110993, "learning_rate": 5.691964436565326e-06, "loss": 0.0445, "step": 29595 }, { "epoch": 0.1235072727424456, "grad_norm": 0.7418104163451383, "learning_rate": 5.691483659779361e-06, "loss": 0.0676, "step": 29600 }, { "epoch": 0.12352813545743589, "grad_norm": 1.034915926542899, "learning_rate": 5.691003004800521e-06, "loss": 0.0467, "step": 29605 }, { "epoch": 0.12354899817242616, "grad_norm": 1.2429415675083846, "learning_rate": 5.690522471577384e-06, "loss": 0.0486, "step": 29610 }, { "epoch": 0.12356986088741645, "grad_norm": 0.8358259426907133, "learning_rate": 5.690042060058551e-06, "loss": 0.0392, "step": 29615 }, { "epoch": 0.12359072360240672, "grad_norm": 9.35148725732678, "learning_rate": 5.689561770192661e-06, "loss": 0.0425, "step": 29620 }, { "epoch": 0.12361158631739701, "grad_norm": 0.8822029911085472, "learning_rate": 5.689081601928378e-06, "loss": 0.047, "step": 29625 }, { "epoch": 0.12363244903238728, "grad_norm": 1.1351578875825088, "learning_rate": 5.688601555214397e-06, "loss": 0.0515, "step": 29630 }, { "epoch": 0.12365331174737755, "grad_norm": 0.6984810288092497, "learning_rate": 5.688121629999446e-06, "loss": 0.045, "step": 29635 }, { "epoch": 0.12367417446236784, "grad_norm": 1.0260940464018171, "learning_rate": 5.6876418262322804e-06, "loss": 0.0372, "step": 29640 }, { "epoch": 0.12369503717735811, "grad_norm": 1.6145382026184862, "learning_rate": 5.687162143861688e-06, "loss": 0.0344, "step": 29645 }, { "epoch": 0.1237158998923484, "grad_norm": 0.5602924617840497, "learning_rate": 5.686682582836487e-06, "loss": 0.0361, "step": 29650 }, { "epoch": 0.12373676260733867, "grad_norm": 0.7959742933609799, "learning_rate": 5.686203143105522e-06, "loss": 0.041, "step": 29655 }, { "epoch": 0.12375762532232895, "grad_norm": 0.8539182782746024, "learning_rate": 5.685723824617673e-06, "loss": 0.0407, "step": 29660 }, { "epoch": 0.12377848803731922, "grad_norm": 0.9604894580793033, "learning_rate": 5.685244627321847e-06, "loss": 0.0394, "step": 29665 }, { "epoch": 0.12379935075230951, "grad_norm": 1.7731672202839115, "learning_rate": 5.68476555116698e-06, "loss": 0.0409, "step": 29670 }, { "epoch": 0.12382021346729978, "grad_norm": 1.6856114200090029, "learning_rate": 5.684286596102041e-06, "loss": 0.0445, "step": 29675 }, { "epoch": 0.12384107618229005, "grad_norm": 0.9817604897520709, "learning_rate": 5.68380776207603e-06, "loss": 0.0575, "step": 29680 }, { "epoch": 0.12386193889728034, "grad_norm": 0.7373269342291825, "learning_rate": 5.6833290490379734e-06, "loss": 0.0422, "step": 29685 }, { "epoch": 0.12388280161227061, "grad_norm": 1.161214037318789, "learning_rate": 5.682850456936928e-06, "loss": 0.0333, "step": 29690 }, { "epoch": 0.1239036643272609, "grad_norm": 0.7943199983015463, "learning_rate": 5.682371985721985e-06, "loss": 0.0448, "step": 29695 }, { "epoch": 0.12392452704225117, "grad_norm": 0.6452520299375716, "learning_rate": 5.681893635342259e-06, "loss": 0.0331, "step": 29700 }, { "epoch": 0.12394538975724145, "grad_norm": 0.6837438122238246, "learning_rate": 5.681415405746902e-06, "loss": 0.031, "step": 29705 }, { "epoch": 0.12396625247223172, "grad_norm": 0.7066593084987081, "learning_rate": 5.680937296885089e-06, "loss": 0.0345, "step": 29710 }, { "epoch": 0.12398711518722201, "grad_norm": 0.9113446900899501, "learning_rate": 5.68045930870603e-06, "loss": 0.0452, "step": 29715 }, { "epoch": 0.12400797790221228, "grad_norm": 1.4697364665306982, "learning_rate": 5.6799814411589616e-06, "loss": 0.0541, "step": 29720 }, { "epoch": 0.12402884061720255, "grad_norm": 1.7449660811520733, "learning_rate": 5.679503694193153e-06, "loss": 0.0531, "step": 29725 }, { "epoch": 0.12404970333219284, "grad_norm": 1.1069005422122598, "learning_rate": 5.679026067757899e-06, "loss": 0.0542, "step": 29730 }, { "epoch": 0.12407056604718311, "grad_norm": 1.2806119174886035, "learning_rate": 5.67854856180253e-06, "loss": 0.0484, "step": 29735 }, { "epoch": 0.1240914287621734, "grad_norm": 1.0668173062892776, "learning_rate": 5.678071176276403e-06, "loss": 0.0366, "step": 29740 }, { "epoch": 0.12411229147716367, "grad_norm": 0.8751358289205965, "learning_rate": 5.677593911128905e-06, "loss": 0.028, "step": 29745 }, { "epoch": 0.12413315419215395, "grad_norm": 0.5217589085678443, "learning_rate": 5.67711676630945e-06, "loss": 0.0463, "step": 29750 }, { "epoch": 0.12415401690714423, "grad_norm": 1.8526444368714832, "learning_rate": 5.6766397417674915e-06, "loss": 0.0475, "step": 29755 }, { "epoch": 0.12417487962213451, "grad_norm": 0.8400610506265414, "learning_rate": 5.676162837452498e-06, "loss": 0.042, "step": 29760 }, { "epoch": 0.12419574233712478, "grad_norm": 1.7552458393654917, "learning_rate": 5.6756860533139806e-06, "loss": 0.0408, "step": 29765 }, { "epoch": 0.12421660505211506, "grad_norm": 1.9248911589846929, "learning_rate": 5.675209389301474e-06, "loss": 0.066, "step": 29770 }, { "epoch": 0.12423746776710534, "grad_norm": 1.0688579788672032, "learning_rate": 5.674732845364542e-06, "loss": 0.0525, "step": 29775 }, { "epoch": 0.12425833048209561, "grad_norm": 0.9630327715521856, "learning_rate": 5.6742564214527826e-06, "loss": 0.0454, "step": 29780 }, { "epoch": 0.1242791931970859, "grad_norm": 0.7996577056989822, "learning_rate": 5.673780117515819e-06, "loss": 0.0456, "step": 29785 }, { "epoch": 0.12430005591207617, "grad_norm": 1.265864903948286, "learning_rate": 5.673303933503304e-06, "loss": 0.0433, "step": 29790 }, { "epoch": 0.12432091862706646, "grad_norm": 1.32835812738498, "learning_rate": 5.672827869364924e-06, "loss": 0.0519, "step": 29795 }, { "epoch": 0.12434178134205673, "grad_norm": 1.1232358835224483, "learning_rate": 5.672351925050391e-06, "loss": 0.0382, "step": 29800 }, { "epoch": 0.12436264405704701, "grad_norm": 0.9210979383076452, "learning_rate": 5.671876100509448e-06, "loss": 0.0411, "step": 29805 }, { "epoch": 0.12438350677203729, "grad_norm": 1.03103616825131, "learning_rate": 5.6714003956918695e-06, "loss": 0.0315, "step": 29810 }, { "epoch": 0.12440436948702756, "grad_norm": 1.1999876169953934, "learning_rate": 5.6709248105474545e-06, "loss": 0.0584, "step": 29815 }, { "epoch": 0.12442523220201784, "grad_norm": 1.3935633327572239, "learning_rate": 5.670449345026036e-06, "loss": 0.0595, "step": 29820 }, { "epoch": 0.12444609491700812, "grad_norm": 0.9059299016739016, "learning_rate": 5.669973999077474e-06, "loss": 0.0461, "step": 29825 }, { "epoch": 0.1244669576319984, "grad_norm": 1.2982751592164448, "learning_rate": 5.66949877265166e-06, "loss": 0.0527, "step": 29830 }, { "epoch": 0.12448782034698867, "grad_norm": 0.9723001335675175, "learning_rate": 5.669023665698511e-06, "loss": 0.0457, "step": 29835 }, { "epoch": 0.12450868306197896, "grad_norm": 1.572329906465817, "learning_rate": 5.66854867816798e-06, "loss": 0.0483, "step": 29840 }, { "epoch": 0.12452954577696923, "grad_norm": 2.046121551931359, "learning_rate": 5.668073810010043e-06, "loss": 0.0341, "step": 29845 }, { "epoch": 0.12455040849195952, "grad_norm": 0.9496845771590153, "learning_rate": 5.667599061174708e-06, "loss": 0.0561, "step": 29850 }, { "epoch": 0.12457127120694979, "grad_norm": 1.1788553197777616, "learning_rate": 5.6671244316120115e-06, "loss": 0.0455, "step": 29855 }, { "epoch": 0.12459213392194006, "grad_norm": 1.3034526414656704, "learning_rate": 5.666649921272021e-06, "loss": 0.0439, "step": 29860 }, { "epoch": 0.12461299663693035, "grad_norm": 1.257679509108284, "learning_rate": 5.66617553010483e-06, "loss": 0.0434, "step": 29865 }, { "epoch": 0.12463385935192062, "grad_norm": 0.9036139705972412, "learning_rate": 5.665701258060566e-06, "loss": 0.0408, "step": 29870 }, { "epoch": 0.1246547220669109, "grad_norm": 0.9917871920405678, "learning_rate": 5.665227105089381e-06, "loss": 0.0512, "step": 29875 }, { "epoch": 0.12467558478190117, "grad_norm": 0.9135474018185301, "learning_rate": 5.66475307114146e-06, "loss": 0.037, "step": 29880 }, { "epoch": 0.12469644749689146, "grad_norm": 2.33724666313833, "learning_rate": 5.664279156167013e-06, "loss": 0.0388, "step": 29885 }, { "epoch": 0.12471731021188173, "grad_norm": 1.2535734487991714, "learning_rate": 5.663805360116283e-06, "loss": 0.0536, "step": 29890 }, { "epoch": 0.12473817292687202, "grad_norm": 1.7168642650222243, "learning_rate": 5.6633316829395425e-06, "loss": 0.049, "step": 29895 }, { "epoch": 0.12475903564186229, "grad_norm": 1.11179935566727, "learning_rate": 5.662858124587088e-06, "loss": 0.0441, "step": 29900 }, { "epoch": 0.12477989835685256, "grad_norm": 0.6294650874223447, "learning_rate": 5.662384685009249e-06, "loss": 0.0454, "step": 29905 }, { "epoch": 0.12480076107184285, "grad_norm": 1.1826295084417309, "learning_rate": 5.661911364156385e-06, "loss": 0.0393, "step": 29910 }, { "epoch": 0.12482162378683312, "grad_norm": 2.3638873478806315, "learning_rate": 5.661438161978881e-06, "loss": 0.0504, "step": 29915 }, { "epoch": 0.1248424865018234, "grad_norm": 0.8828212783316173, "learning_rate": 5.660965078427156e-06, "loss": 0.0333, "step": 29920 }, { "epoch": 0.12486334921681368, "grad_norm": 1.2470682693710156, "learning_rate": 5.6604921134516514e-06, "loss": 0.0526, "step": 29925 }, { "epoch": 0.12488421193180396, "grad_norm": 4.845917219321547, "learning_rate": 5.660019267002843e-06, "loss": 0.0405, "step": 29930 }, { "epoch": 0.12490507464679423, "grad_norm": 0.7324596018385042, "learning_rate": 5.6595465390312345e-06, "loss": 0.0392, "step": 29935 }, { "epoch": 0.12492593736178452, "grad_norm": 1.0001003524852043, "learning_rate": 5.659073929487355e-06, "loss": 0.0489, "step": 29940 }, { "epoch": 0.12494680007677479, "grad_norm": 1.8963937823302117, "learning_rate": 5.658601438321768e-06, "loss": 0.053, "step": 29945 }, { "epoch": 0.12496766279176506, "grad_norm": 1.023072511489903, "learning_rate": 5.658129065485063e-06, "loss": 0.0493, "step": 29950 }, { "epoch": 0.12498852550675535, "grad_norm": 1.0078763504856458, "learning_rate": 5.6576568109278576e-06, "loss": 0.0505, "step": 29955 }, { "epoch": 0.12500938822174562, "grad_norm": 1.3531407520877259, "learning_rate": 5.657184674600799e-06, "loss": 0.0524, "step": 29960 }, { "epoch": 0.1250302509367359, "grad_norm": 0.9708913890304087, "learning_rate": 5.6567126564545625e-06, "loss": 0.0392, "step": 29965 }, { "epoch": 0.1250511136517262, "grad_norm": 0.5574348882617048, "learning_rate": 5.656240756439856e-06, "loss": 0.0398, "step": 29970 }, { "epoch": 0.12507197636671646, "grad_norm": 1.4998803498914186, "learning_rate": 5.6557689745074105e-06, "loss": 0.0312, "step": 29975 }, { "epoch": 0.12509283908170674, "grad_norm": 0.6788644736390536, "learning_rate": 5.65529731060799e-06, "loss": 0.041, "step": 29980 }, { "epoch": 0.125113701796697, "grad_norm": 4.364020849505206, "learning_rate": 5.654825764692385e-06, "loss": 0.0484, "step": 29985 }, { "epoch": 0.12513456451168728, "grad_norm": 1.3713866199967866, "learning_rate": 5.654354336711416e-06, "loss": 0.0446, "step": 29990 }, { "epoch": 0.12515542722667758, "grad_norm": 1.1478799225524718, "learning_rate": 5.653883026615933e-06, "loss": 0.0385, "step": 29995 }, { "epoch": 0.12517628994166785, "grad_norm": 0.8282200902191287, "learning_rate": 5.65341183435681e-06, "loss": 0.0438, "step": 30000 }, { "epoch": 0.12519715265665812, "grad_norm": 0.9450698150408275, "learning_rate": 5.652940759884955e-06, "loss": 0.0553, "step": 30005 }, { "epoch": 0.1252180153716484, "grad_norm": 0.8839245617554577, "learning_rate": 5.652469803151303e-06, "loss": 0.0469, "step": 30010 }, { "epoch": 0.1252388780866387, "grad_norm": 3.1961215116270574, "learning_rate": 5.651998964106819e-06, "loss": 0.0379, "step": 30015 }, { "epoch": 0.12525974080162897, "grad_norm": 1.1757451817790978, "learning_rate": 5.65152824270249e-06, "loss": 0.0538, "step": 30020 }, { "epoch": 0.12528060351661924, "grad_norm": 0.8485064561817899, "learning_rate": 5.65105763888934e-06, "loss": 0.0363, "step": 30025 }, { "epoch": 0.1253014662316095, "grad_norm": 1.080138266216007, "learning_rate": 5.650587152618418e-06, "loss": 0.0397, "step": 30030 }, { "epoch": 0.12532232894659978, "grad_norm": 2.3855946586201178, "learning_rate": 5.650116783840799e-06, "loss": 0.0509, "step": 30035 }, { "epoch": 0.12534319166159008, "grad_norm": 1.3743816208258068, "learning_rate": 5.649646532507591e-06, "loss": 0.0398, "step": 30040 }, { "epoch": 0.12536405437658035, "grad_norm": 0.8668218280086954, "learning_rate": 5.649176398569929e-06, "loss": 0.0588, "step": 30045 }, { "epoch": 0.12538491709157062, "grad_norm": 0.8739210807302875, "learning_rate": 5.6487063819789736e-06, "loss": 0.0701, "step": 30050 }, { "epoch": 0.1254057798065609, "grad_norm": 0.9329217745012152, "learning_rate": 5.648236482685917e-06, "loss": 0.0419, "step": 30055 }, { "epoch": 0.1254266425215512, "grad_norm": 1.0289364285628497, "learning_rate": 5.647766700641981e-06, "loss": 0.0408, "step": 30060 }, { "epoch": 0.12544750523654147, "grad_norm": 1.451631923657129, "learning_rate": 5.647297035798411e-06, "loss": 0.046, "step": 30065 }, { "epoch": 0.12546836795153174, "grad_norm": 1.5624659656347495, "learning_rate": 5.646827488106485e-06, "loss": 0.0521, "step": 30070 }, { "epoch": 0.125489230666522, "grad_norm": 0.9757938770715939, "learning_rate": 5.646358057517509e-06, "loss": 0.0454, "step": 30075 }, { "epoch": 0.12551009338151228, "grad_norm": 1.0691075461080997, "learning_rate": 5.645888743982814e-06, "loss": 0.0375, "step": 30080 }, { "epoch": 0.12553095609650258, "grad_norm": 0.815507633254169, "learning_rate": 5.645419547453762e-06, "loss": 0.0364, "step": 30085 }, { "epoch": 0.12555181881149285, "grad_norm": 1.1672146508466859, "learning_rate": 5.644950467881744e-06, "loss": 0.0442, "step": 30090 }, { "epoch": 0.12557268152648313, "grad_norm": 0.6556153276710309, "learning_rate": 5.644481505218177e-06, "loss": 0.0332, "step": 30095 }, { "epoch": 0.1255935442414734, "grad_norm": 0.7634049884893914, "learning_rate": 5.644012659414507e-06, "loss": 0.037, "step": 30100 }, { "epoch": 0.1256144069564637, "grad_norm": 1.5189941695071096, "learning_rate": 5.6435439304222105e-06, "loss": 0.0483, "step": 30105 }, { "epoch": 0.12563526967145397, "grad_norm": 0.790555446049606, "learning_rate": 5.643075318192788e-06, "loss": 0.0452, "step": 30110 }, { "epoch": 0.12565613238644424, "grad_norm": 1.617387747158015, "learning_rate": 5.642606822677773e-06, "loss": 0.0543, "step": 30115 }, { "epoch": 0.1256769951014345, "grad_norm": 0.7756401253502866, "learning_rate": 5.642138443828722e-06, "loss": 0.0334, "step": 30120 }, { "epoch": 0.12569785781642479, "grad_norm": 1.0193256259839556, "learning_rate": 5.641670181597224e-06, "loss": 0.0415, "step": 30125 }, { "epoch": 0.12571872053141508, "grad_norm": 1.2306362838854856, "learning_rate": 5.641202035934894e-06, "loss": 0.0434, "step": 30130 }, { "epoch": 0.12573958324640536, "grad_norm": 0.8731423757773161, "learning_rate": 5.640734006793375e-06, "loss": 0.0458, "step": 30135 }, { "epoch": 0.12576044596139563, "grad_norm": 0.5815524279096627, "learning_rate": 5.640266094124338e-06, "loss": 0.0393, "step": 30140 }, { "epoch": 0.1257813086763859, "grad_norm": 1.317054267381536, "learning_rate": 5.639798297879485e-06, "loss": 0.0533, "step": 30145 }, { "epoch": 0.1258021713913762, "grad_norm": 1.4181902384259615, "learning_rate": 5.639330618010541e-06, "loss": 0.0485, "step": 30150 }, { "epoch": 0.12582303410636647, "grad_norm": 1.3868717444213863, "learning_rate": 5.638863054469263e-06, "loss": 0.0663, "step": 30155 }, { "epoch": 0.12584389682135674, "grad_norm": 0.8289365498167451, "learning_rate": 5.638395607207437e-06, "loss": 0.0516, "step": 30160 }, { "epoch": 0.12586475953634702, "grad_norm": 1.797943591715332, "learning_rate": 5.637928276176869e-06, "loss": 0.0479, "step": 30165 }, { "epoch": 0.1258856222513373, "grad_norm": 1.2201684077887365, "learning_rate": 5.637461061329403e-06, "loss": 0.04, "step": 30170 }, { "epoch": 0.1259064849663276, "grad_norm": 1.1909269889708718, "learning_rate": 5.6369939626169054e-06, "loss": 0.0514, "step": 30175 }, { "epoch": 0.12592734768131786, "grad_norm": 0.6579080627973262, "learning_rate": 5.636526979991272e-06, "loss": 0.0355, "step": 30180 }, { "epoch": 0.12594821039630813, "grad_norm": 0.8222595999328843, "learning_rate": 5.636060113404425e-06, "loss": 0.051, "step": 30185 }, { "epoch": 0.1259690731112984, "grad_norm": 1.1288478684909948, "learning_rate": 5.635593362808317e-06, "loss": 0.0443, "step": 30190 }, { "epoch": 0.1259899358262887, "grad_norm": 1.4156293363507502, "learning_rate": 5.635126728154925e-06, "loss": 0.0423, "step": 30195 }, { "epoch": 0.12601079854127897, "grad_norm": 0.8050098946462344, "learning_rate": 5.6346602093962585e-06, "loss": 0.0342, "step": 30200 }, { "epoch": 0.12603166125626925, "grad_norm": 0.6925867430358069, "learning_rate": 5.634193806484351e-06, "loss": 0.051, "step": 30205 }, { "epoch": 0.12605252397125952, "grad_norm": 0.8611602358645337, "learning_rate": 5.633727519371265e-06, "loss": 0.0323, "step": 30210 }, { "epoch": 0.1260733866862498, "grad_norm": 1.156626480478061, "learning_rate": 5.633261348009091e-06, "loss": 0.0405, "step": 30215 }, { "epoch": 0.1260942494012401, "grad_norm": 0.935234140447877, "learning_rate": 5.6327952923499465e-06, "loss": 0.0464, "step": 30220 }, { "epoch": 0.12611511211623036, "grad_norm": 0.987739036625582, "learning_rate": 5.6323293523459766e-06, "loss": 0.0366, "step": 30225 }, { "epoch": 0.12613597483122063, "grad_norm": 0.7642108881818858, "learning_rate": 5.631863527949356e-06, "loss": 0.0463, "step": 30230 }, { "epoch": 0.1261568375462109, "grad_norm": 0.6664247012900172, "learning_rate": 5.631397819112285e-06, "loss": 0.0352, "step": 30235 }, { "epoch": 0.1261777002612012, "grad_norm": 1.0456575989761685, "learning_rate": 5.630932225786994e-06, "loss": 0.039, "step": 30240 }, { "epoch": 0.12619856297619148, "grad_norm": 0.8900661161656092, "learning_rate": 5.630466747925737e-06, "loss": 0.0409, "step": 30245 }, { "epoch": 0.12621942569118175, "grad_norm": 1.6123760797922173, "learning_rate": 5.6300013854808e-06, "loss": 0.0409, "step": 30250 }, { "epoch": 0.12624028840617202, "grad_norm": 1.7628526289320559, "learning_rate": 5.6295361384044935e-06, "loss": 0.0738, "step": 30255 }, { "epoch": 0.1262611511211623, "grad_norm": 1.5276474331671188, "learning_rate": 5.629071006649157e-06, "loss": 0.0457, "step": 30260 }, { "epoch": 0.1262820138361526, "grad_norm": 0.9634606331617087, "learning_rate": 5.628605990167157e-06, "loss": 0.0451, "step": 30265 }, { "epoch": 0.12630287655114286, "grad_norm": 0.8923158105630189, "learning_rate": 5.6281410889108886e-06, "loss": 0.0357, "step": 30270 }, { "epoch": 0.12632373926613313, "grad_norm": 1.2610226837158902, "learning_rate": 5.627676302832774e-06, "loss": 0.0585, "step": 30275 }, { "epoch": 0.1263446019811234, "grad_norm": 0.7016329312512861, "learning_rate": 5.62721163188526e-06, "loss": 0.0442, "step": 30280 }, { "epoch": 0.1263654646961137, "grad_norm": 0.5540908393534729, "learning_rate": 5.626747076020827e-06, "loss": 0.0349, "step": 30285 }, { "epoch": 0.12638632741110398, "grad_norm": 1.3951673172887324, "learning_rate": 5.626282635191976e-06, "loss": 0.0404, "step": 30290 }, { "epoch": 0.12640719012609425, "grad_norm": 0.8080134871919313, "learning_rate": 5.625818309351241e-06, "loss": 0.053, "step": 30295 }, { "epoch": 0.12642805284108452, "grad_norm": 1.0460616585119766, "learning_rate": 5.62535409845118e-06, "loss": 0.0495, "step": 30300 }, { "epoch": 0.1264489155560748, "grad_norm": 0.4895551454619648, "learning_rate": 5.62489000244438e-06, "loss": 0.0331, "step": 30305 }, { "epoch": 0.1264697782710651, "grad_norm": 1.1083701463020528, "learning_rate": 5.624426021283455e-06, "loss": 0.0412, "step": 30310 }, { "epoch": 0.12649064098605536, "grad_norm": 1.6280554721038831, "learning_rate": 5.623962154921045e-06, "loss": 0.0452, "step": 30315 }, { "epoch": 0.12651150370104564, "grad_norm": 0.9851291787195866, "learning_rate": 5.623498403309822e-06, "loss": 0.0449, "step": 30320 }, { "epoch": 0.1265323664160359, "grad_norm": 0.9169802356810706, "learning_rate": 5.623034766402477e-06, "loss": 0.034, "step": 30325 }, { "epoch": 0.1265532291310262, "grad_norm": 1.0017838302717288, "learning_rate": 5.622571244151736e-06, "loss": 0.0392, "step": 30330 }, { "epoch": 0.12657409184601648, "grad_norm": 1.45829328727908, "learning_rate": 5.622107836510351e-06, "loss": 0.0499, "step": 30335 }, { "epoch": 0.12659495456100675, "grad_norm": 0.932925456418477, "learning_rate": 5.6216445434310966e-06, "loss": 0.0366, "step": 30340 }, { "epoch": 0.12661581727599702, "grad_norm": 1.136671617040718, "learning_rate": 5.62118136486678e-06, "loss": 0.0346, "step": 30345 }, { "epoch": 0.1266366799909873, "grad_norm": 1.4400412572129013, "learning_rate": 5.620718300770233e-06, "loss": 0.0506, "step": 30350 }, { "epoch": 0.1266575427059776, "grad_norm": 0.9880634519524638, "learning_rate": 5.620255351094313e-06, "loss": 0.0521, "step": 30355 }, { "epoch": 0.12667840542096787, "grad_norm": 0.7227854079983597, "learning_rate": 5.619792515791909e-06, "loss": 0.0452, "step": 30360 }, { "epoch": 0.12669926813595814, "grad_norm": 1.0712869501190159, "learning_rate": 5.619329794815933e-06, "loss": 0.0398, "step": 30365 }, { "epoch": 0.1267201308509484, "grad_norm": 1.3128139790537134, "learning_rate": 5.618867188119327e-06, "loss": 0.0597, "step": 30370 }, { "epoch": 0.1267409935659387, "grad_norm": 0.9263617590028853, "learning_rate": 5.61840469565506e-06, "loss": 0.0424, "step": 30375 }, { "epoch": 0.12676185628092898, "grad_norm": 0.7431332149675497, "learning_rate": 5.617942317376125e-06, "loss": 0.0475, "step": 30380 }, { "epoch": 0.12678271899591925, "grad_norm": 1.1340276583676354, "learning_rate": 5.617480053235544e-06, "loss": 0.041, "step": 30385 }, { "epoch": 0.12680358171090952, "grad_norm": 1.1245143579710846, "learning_rate": 5.6170179031863684e-06, "loss": 0.0556, "step": 30390 }, { "epoch": 0.1268244444258998, "grad_norm": 0.7888998151800354, "learning_rate": 5.616555867181672e-06, "loss": 0.0415, "step": 30395 }, { "epoch": 0.1268453071408901, "grad_norm": 0.6439845384567051, "learning_rate": 5.61609394517456e-06, "loss": 0.0345, "step": 30400 }, { "epoch": 0.12686616985588037, "grad_norm": 1.4316895877225992, "learning_rate": 5.615632137118161e-06, "loss": 0.0405, "step": 30405 }, { "epoch": 0.12688703257087064, "grad_norm": 1.4736185217526248, "learning_rate": 5.615170442965634e-06, "loss": 0.0383, "step": 30410 }, { "epoch": 0.1269078952858609, "grad_norm": 5.042740839414299, "learning_rate": 5.614708862670161e-06, "loss": 0.0482, "step": 30415 }, { "epoch": 0.1269287580008512, "grad_norm": 1.5791944225570618, "learning_rate": 5.6142473961849555e-06, "loss": 0.0594, "step": 30420 }, { "epoch": 0.12694962071584148, "grad_norm": 0.7411352363838536, "learning_rate": 5.613786043463254e-06, "loss": 0.0376, "step": 30425 }, { "epoch": 0.12697048343083175, "grad_norm": 1.227917142662894, "learning_rate": 5.613324804458321e-06, "loss": 0.0377, "step": 30430 }, { "epoch": 0.12699134614582203, "grad_norm": 0.959244887121463, "learning_rate": 5.612863679123448e-06, "loss": 0.0491, "step": 30435 }, { "epoch": 0.1270122088608123, "grad_norm": 0.8459207589569295, "learning_rate": 5.612402667411956e-06, "loss": 0.048, "step": 30440 }, { "epoch": 0.1270330715758026, "grad_norm": 0.9992403622480364, "learning_rate": 5.61194176927719e-06, "loss": 0.041, "step": 30445 }, { "epoch": 0.12705393429079287, "grad_norm": 1.496475887420874, "learning_rate": 5.61148098467252e-06, "loss": 0.0357, "step": 30450 }, { "epoch": 0.12707479700578314, "grad_norm": 2.4899318554107066, "learning_rate": 5.611020313551345e-06, "loss": 0.0419, "step": 30455 }, { "epoch": 0.1270956597207734, "grad_norm": 0.8754685824702082, "learning_rate": 5.610559755867094e-06, "loss": 0.0388, "step": 30460 }, { "epoch": 0.1271165224357637, "grad_norm": 1.1618084476838042, "learning_rate": 5.610099311573218e-06, "loss": 0.0452, "step": 30465 }, { "epoch": 0.12713738515075398, "grad_norm": 1.6620201945453315, "learning_rate": 5.609638980623195e-06, "loss": 0.0368, "step": 30470 }, { "epoch": 0.12715824786574426, "grad_norm": 2.0401685515635126, "learning_rate": 5.609178762970534e-06, "loss": 0.0595, "step": 30475 }, { "epoch": 0.12717911058073453, "grad_norm": 0.9101616828418684, "learning_rate": 5.608718658568765e-06, "loss": 0.0495, "step": 30480 }, { "epoch": 0.1271999732957248, "grad_norm": 1.2242871192680922, "learning_rate": 5.608258667371448e-06, "loss": 0.0517, "step": 30485 }, { "epoch": 0.1272208360107151, "grad_norm": 1.0694078977021015, "learning_rate": 5.6077987893321695e-06, "loss": 0.0366, "step": 30490 }, { "epoch": 0.12724169872570537, "grad_norm": 0.6411702824924705, "learning_rate": 5.6073390244045435e-06, "loss": 0.0319, "step": 30495 }, { "epoch": 0.12726256144069564, "grad_norm": 0.9654470571651937, "learning_rate": 5.606879372542208e-06, "loss": 0.0357, "step": 30500 }, { "epoch": 0.12728342415568591, "grad_norm": 1.034608210694618, "learning_rate": 5.606419833698827e-06, "loss": 0.05, "step": 30505 }, { "epoch": 0.12730428687067621, "grad_norm": 0.697604639588566, "learning_rate": 5.605960407828097e-06, "loss": 0.0603, "step": 30510 }, { "epoch": 0.12732514958566649, "grad_norm": 0.7866581279587103, "learning_rate": 5.605501094883734e-06, "loss": 0.0431, "step": 30515 }, { "epoch": 0.12734601230065676, "grad_norm": 0.6249518046925703, "learning_rate": 5.605041894819486e-06, "loss": 0.0377, "step": 30520 }, { "epoch": 0.12736687501564703, "grad_norm": 1.2270574737970927, "learning_rate": 5.604582807589123e-06, "loss": 0.0477, "step": 30525 }, { "epoch": 0.1273877377306373, "grad_norm": 0.8397175016784929, "learning_rate": 5.604123833146445e-06, "loss": 0.0329, "step": 30530 }, { "epoch": 0.1274086004456276, "grad_norm": 0.9216610120195301, "learning_rate": 5.603664971445277e-06, "loss": 0.0386, "step": 30535 }, { "epoch": 0.12742946316061787, "grad_norm": 0.7109747203854025, "learning_rate": 5.60320622243947e-06, "loss": 0.0443, "step": 30540 }, { "epoch": 0.12745032587560814, "grad_norm": 1.011960907508812, "learning_rate": 5.602747586082902e-06, "loss": 0.0443, "step": 30545 }, { "epoch": 0.12747118859059842, "grad_norm": 1.115394793214712, "learning_rate": 5.602289062329478e-06, "loss": 0.0477, "step": 30550 }, { "epoch": 0.12749205130558872, "grad_norm": 1.3336372987583265, "learning_rate": 5.601830651133128e-06, "loss": 0.0489, "step": 30555 }, { "epoch": 0.127512914020579, "grad_norm": 0.8606990974311213, "learning_rate": 5.60137235244781e-06, "loss": 0.0456, "step": 30560 }, { "epoch": 0.12753377673556926, "grad_norm": 0.7295209351955616, "learning_rate": 5.600914166227508e-06, "loss": 0.0444, "step": 30565 }, { "epoch": 0.12755463945055953, "grad_norm": 1.3038548591204788, "learning_rate": 5.600456092426231e-06, "loss": 0.0474, "step": 30570 }, { "epoch": 0.1275755021655498, "grad_norm": 0.7015439354806615, "learning_rate": 5.599998130998015e-06, "loss": 0.0395, "step": 30575 }, { "epoch": 0.1275963648805401, "grad_norm": 1.0830385097821105, "learning_rate": 5.599540281896924e-06, "loss": 0.0467, "step": 30580 }, { "epoch": 0.12761722759553037, "grad_norm": 1.3726973926034862, "learning_rate": 5.599082545077046e-06, "loss": 0.0609, "step": 30585 }, { "epoch": 0.12763809031052065, "grad_norm": 0.8028467693583312, "learning_rate": 5.598624920492496e-06, "loss": 0.0434, "step": 30590 }, { "epoch": 0.12765895302551092, "grad_norm": 1.1434877959737277, "learning_rate": 5.598167408097416e-06, "loss": 0.0505, "step": 30595 }, { "epoch": 0.12767981574050122, "grad_norm": 0.9408891398629138, "learning_rate": 5.597710007845973e-06, "loss": 0.0485, "step": 30600 }, { "epoch": 0.1277006784554915, "grad_norm": 0.8462191449551683, "learning_rate": 5.5972527196923595e-06, "loss": 0.037, "step": 30605 }, { "epoch": 0.12772154117048176, "grad_norm": 0.6926370731839442, "learning_rate": 5.596795543590799e-06, "loss": 0.0389, "step": 30610 }, { "epoch": 0.12774240388547203, "grad_norm": 1.3393006263328686, "learning_rate": 5.596338479495533e-06, "loss": 0.038, "step": 30615 }, { "epoch": 0.1277632666004623, "grad_norm": 1.0167451305088666, "learning_rate": 5.595881527360839e-06, "loss": 0.0383, "step": 30620 }, { "epoch": 0.1277841293154526, "grad_norm": 1.1983927367256917, "learning_rate": 5.595424687141011e-06, "loss": 0.0341, "step": 30625 }, { "epoch": 0.12780499203044288, "grad_norm": 0.784799520566377, "learning_rate": 5.594967958790377e-06, "loss": 0.0283, "step": 30630 }, { "epoch": 0.12782585474543315, "grad_norm": 0.901325352256309, "learning_rate": 5.594511342263286e-06, "loss": 0.0494, "step": 30635 }, { "epoch": 0.12784671746042342, "grad_norm": 0.9788263357454622, "learning_rate": 5.594054837514113e-06, "loss": 0.0477, "step": 30640 }, { "epoch": 0.12786758017541372, "grad_norm": 1.2133591713029284, "learning_rate": 5.593598444497264e-06, "loss": 0.051, "step": 30645 }, { "epoch": 0.127888442890404, "grad_norm": 0.921255196052495, "learning_rate": 5.593142163167167e-06, "loss": 0.0396, "step": 30650 }, { "epoch": 0.12790930560539426, "grad_norm": 1.7160991482039736, "learning_rate": 5.592685993478275e-06, "loss": 0.0479, "step": 30655 }, { "epoch": 0.12793016832038454, "grad_norm": 1.0379708370615288, "learning_rate": 5.592229935385071e-06, "loss": 0.0485, "step": 30660 }, { "epoch": 0.1279510310353748, "grad_norm": 1.0788954613432258, "learning_rate": 5.59177398884206e-06, "loss": 0.0456, "step": 30665 }, { "epoch": 0.1279718937503651, "grad_norm": 1.7782652726610473, "learning_rate": 5.591318153803775e-06, "loss": 0.0544, "step": 30670 }, { "epoch": 0.12799275646535538, "grad_norm": 0.9758707911287912, "learning_rate": 5.5908624302247776e-06, "loss": 0.0406, "step": 30675 }, { "epoch": 0.12801361918034565, "grad_norm": 0.9359318578871779, "learning_rate": 5.59040681805965e-06, "loss": 0.0367, "step": 30680 }, { "epoch": 0.12803448189533592, "grad_norm": 1.4057012132252495, "learning_rate": 5.589951317263001e-06, "loss": 0.0549, "step": 30685 }, { "epoch": 0.12805534461032622, "grad_norm": 0.7625610902945945, "learning_rate": 5.58949592778947e-06, "loss": 0.0302, "step": 30690 }, { "epoch": 0.1280762073253165, "grad_norm": 3.017875454582285, "learning_rate": 5.589040649593719e-06, "loss": 0.0358, "step": 30695 }, { "epoch": 0.12809707004030677, "grad_norm": 0.6297535114904448, "learning_rate": 5.5885854826304346e-06, "loss": 0.0521, "step": 30700 }, { "epoch": 0.12811793275529704, "grad_norm": 1.4173603156913703, "learning_rate": 5.588130426854333e-06, "loss": 0.0525, "step": 30705 }, { "epoch": 0.1281387954702873, "grad_norm": 0.741867102334376, "learning_rate": 5.58767548222015e-06, "loss": 0.0375, "step": 30710 }, { "epoch": 0.1281596581852776, "grad_norm": 0.7640087758045372, "learning_rate": 5.587220648682655e-06, "loss": 0.0372, "step": 30715 }, { "epoch": 0.12818052090026788, "grad_norm": 0.9615221789719737, "learning_rate": 5.5867659261966386e-06, "loss": 0.0508, "step": 30720 }, { "epoch": 0.12820138361525815, "grad_norm": 0.6538455736190009, "learning_rate": 5.586311314716918e-06, "loss": 0.0345, "step": 30725 }, { "epoch": 0.12822224633024842, "grad_norm": 1.2033334090856562, "learning_rate": 5.585856814198334e-06, "loss": 0.0308, "step": 30730 }, { "epoch": 0.12824310904523872, "grad_norm": 1.0504300218754434, "learning_rate": 5.5854024245957575e-06, "loss": 0.0533, "step": 30735 }, { "epoch": 0.128263971760229, "grad_norm": 0.9872937925628166, "learning_rate": 5.5849481458640805e-06, "loss": 0.0397, "step": 30740 }, { "epoch": 0.12828483447521927, "grad_norm": 0.7350223624885079, "learning_rate": 5.584493977958226e-06, "loss": 0.0366, "step": 30745 }, { "epoch": 0.12830569719020954, "grad_norm": 0.8543699442012026, "learning_rate": 5.584039920833137e-06, "loss": 0.0416, "step": 30750 }, { "epoch": 0.1283265599051998, "grad_norm": 0.8834655176084986, "learning_rate": 5.583585974443784e-06, "loss": 0.0396, "step": 30755 }, { "epoch": 0.1283474226201901, "grad_norm": 1.5736189612318348, "learning_rate": 5.583132138745166e-06, "loss": 0.0402, "step": 30760 }, { "epoch": 0.12836828533518038, "grad_norm": 1.1130612781952898, "learning_rate": 5.582678413692305e-06, "loss": 0.0434, "step": 30765 }, { "epoch": 0.12838914805017065, "grad_norm": 1.2134101393837806, "learning_rate": 5.582224799240248e-06, "loss": 0.0486, "step": 30770 }, { "epoch": 0.12841001076516093, "grad_norm": 0.9290547348656136, "learning_rate": 5.581771295344069e-06, "loss": 0.0453, "step": 30775 }, { "epoch": 0.12843087348015123, "grad_norm": 1.019341386004241, "learning_rate": 5.581317901958868e-06, "loss": 0.0362, "step": 30780 }, { "epoch": 0.1284517361951415, "grad_norm": 1.052319013269508, "learning_rate": 5.580864619039769e-06, "loss": 0.0517, "step": 30785 }, { "epoch": 0.12847259891013177, "grad_norm": 1.093027815696952, "learning_rate": 5.580411446541921e-06, "loss": 0.033, "step": 30790 }, { "epoch": 0.12849346162512204, "grad_norm": 0.9660878522410054, "learning_rate": 5.579958384420502e-06, "loss": 0.0325, "step": 30795 }, { "epoch": 0.1285143243401123, "grad_norm": 0.8761703622647768, "learning_rate": 5.579505432630711e-06, "loss": 0.0315, "step": 30800 }, { "epoch": 0.1285351870551026, "grad_norm": 1.6933937971053035, "learning_rate": 5.579052591127775e-06, "loss": 0.045, "step": 30805 }, { "epoch": 0.12855604977009288, "grad_norm": 0.7337713339016513, "learning_rate": 5.5785998598669465e-06, "loss": 0.0391, "step": 30810 }, { "epoch": 0.12857691248508316, "grad_norm": 1.0637278929988723, "learning_rate": 5.578147238803503e-06, "loss": 0.0429, "step": 30815 }, { "epoch": 0.12859777520007343, "grad_norm": 0.8686320564181684, "learning_rate": 5.577694727892747e-06, "loss": 0.0482, "step": 30820 }, { "epoch": 0.12861863791506373, "grad_norm": 1.009280194008919, "learning_rate": 5.577242327090007e-06, "loss": 0.0505, "step": 30825 }, { "epoch": 0.128639500630054, "grad_norm": 1.1514926650134218, "learning_rate": 5.576790036350636e-06, "loss": 0.0335, "step": 30830 }, { "epoch": 0.12866036334504427, "grad_norm": 1.1218053760758586, "learning_rate": 5.576337855630012e-06, "loss": 0.0415, "step": 30835 }, { "epoch": 0.12868122606003454, "grad_norm": 0.827613687071712, "learning_rate": 5.575885784883542e-06, "loss": 0.0455, "step": 30840 }, { "epoch": 0.12870208877502481, "grad_norm": 1.098695263170949, "learning_rate": 5.575433824066653e-06, "loss": 0.0429, "step": 30845 }, { "epoch": 0.12872295149001511, "grad_norm": 0.8545718800541479, "learning_rate": 5.5749819731348e-06, "loss": 0.0363, "step": 30850 }, { "epoch": 0.12874381420500539, "grad_norm": 1.0075149104064902, "learning_rate": 5.574530232043465e-06, "loss": 0.0418, "step": 30855 }, { "epoch": 0.12876467691999566, "grad_norm": 0.8472483256074199, "learning_rate": 5.57407860074815e-06, "loss": 0.0355, "step": 30860 }, { "epoch": 0.12878553963498593, "grad_norm": 0.9243559772152471, "learning_rate": 5.573627079204388e-06, "loss": 0.0478, "step": 30865 }, { "epoch": 0.12880640234997623, "grad_norm": 0.787226971344649, "learning_rate": 5.573175667367735e-06, "loss": 0.0372, "step": 30870 }, { "epoch": 0.1288272650649665, "grad_norm": 0.7982831243385987, "learning_rate": 5.57272436519377e-06, "loss": 0.036, "step": 30875 }, { "epoch": 0.12884812777995677, "grad_norm": 0.9690957448434803, "learning_rate": 5.572273172638101e-06, "loss": 0.0387, "step": 30880 }, { "epoch": 0.12886899049494704, "grad_norm": 3.633871751674157, "learning_rate": 5.571822089656358e-06, "loss": 0.0412, "step": 30885 }, { "epoch": 0.12888985320993732, "grad_norm": 1.8921917798171624, "learning_rate": 5.571371116204198e-06, "loss": 0.0653, "step": 30890 }, { "epoch": 0.12891071592492762, "grad_norm": 0.6731266415027098, "learning_rate": 5.570920252237302e-06, "loss": 0.0561, "step": 30895 }, { "epoch": 0.1289315786399179, "grad_norm": 1.7241226592525232, "learning_rate": 5.570469497711378e-06, "loss": 0.0517, "step": 30900 }, { "epoch": 0.12895244135490816, "grad_norm": 0.897671913755642, "learning_rate": 5.570018852582157e-06, "loss": 0.0415, "step": 30905 }, { "epoch": 0.12897330406989843, "grad_norm": 1.3785363030764168, "learning_rate": 5.5695683168053935e-06, "loss": 0.0418, "step": 30910 }, { "epoch": 0.12899416678488873, "grad_norm": 0.8620452772736334, "learning_rate": 5.569117890336873e-06, "loss": 0.0495, "step": 30915 }, { "epoch": 0.129015029499879, "grad_norm": 0.9731955196198846, "learning_rate": 5.5686675731323994e-06, "loss": 0.0531, "step": 30920 }, { "epoch": 0.12903589221486927, "grad_norm": 0.9094733597670612, "learning_rate": 5.568217365147805e-06, "loss": 0.0434, "step": 30925 }, { "epoch": 0.12905675492985955, "grad_norm": 1.0390049698733825, "learning_rate": 5.5677672663389495e-06, "loss": 0.0516, "step": 30930 }, { "epoch": 0.12907761764484982, "grad_norm": 0.8646870545962942, "learning_rate": 5.567317276661711e-06, "loss": 0.041, "step": 30935 }, { "epoch": 0.12909848035984012, "grad_norm": 0.8021205370109868, "learning_rate": 5.5668673960719986e-06, "loss": 0.0427, "step": 30940 }, { "epoch": 0.1291193430748304, "grad_norm": 0.9140395058537806, "learning_rate": 5.566417624525744e-06, "loss": 0.0404, "step": 30945 }, { "epoch": 0.12914020578982066, "grad_norm": 0.5821364639324971, "learning_rate": 5.565967961978902e-06, "loss": 0.0342, "step": 30950 }, { "epoch": 0.12916106850481093, "grad_norm": 0.5888743440756392, "learning_rate": 5.565518408387456e-06, "loss": 0.0387, "step": 30955 }, { "epoch": 0.12918193121980123, "grad_norm": 0.6349841929695809, "learning_rate": 5.565068963707411e-06, "loss": 0.0417, "step": 30960 }, { "epoch": 0.1292027939347915, "grad_norm": 0.5798037770474233, "learning_rate": 5.5646196278948e-06, "loss": 0.0367, "step": 30965 }, { "epoch": 0.12922365664978178, "grad_norm": 0.8630390989856487, "learning_rate": 5.564170400905677e-06, "loss": 0.0443, "step": 30970 }, { "epoch": 0.12924451936477205, "grad_norm": 1.0000768118693417, "learning_rate": 5.563721282696124e-06, "loss": 0.0391, "step": 30975 }, { "epoch": 0.12926538207976232, "grad_norm": 1.0729278978338672, "learning_rate": 5.5632722732222465e-06, "loss": 0.0465, "step": 30980 }, { "epoch": 0.12928624479475262, "grad_norm": 1.564451860793383, "learning_rate": 5.562823372440177e-06, "loss": 0.0454, "step": 30985 }, { "epoch": 0.1293071075097429, "grad_norm": 1.126080448895753, "learning_rate": 5.562374580306068e-06, "loss": 0.0498, "step": 30990 }, { "epoch": 0.12932797022473316, "grad_norm": 0.9096606884659065, "learning_rate": 5.561925896776101e-06, "loss": 0.0503, "step": 30995 }, { "epoch": 0.12934883293972343, "grad_norm": 1.3575816833190393, "learning_rate": 5.56147732180648e-06, "loss": 0.0519, "step": 31000 }, { "epoch": 0.12936969565471373, "grad_norm": 0.9720116159587493, "learning_rate": 5.561028855353436e-06, "loss": 0.0434, "step": 31005 }, { "epoch": 0.129390558369704, "grad_norm": 0.8346274804073931, "learning_rate": 5.560580497373221e-06, "loss": 0.0362, "step": 31010 }, { "epoch": 0.12941142108469428, "grad_norm": 1.4434842458735135, "learning_rate": 5.560132247822118e-06, "loss": 0.0394, "step": 31015 }, { "epoch": 0.12943228379968455, "grad_norm": 1.0514738281894203, "learning_rate": 5.559684106656425e-06, "loss": 0.0441, "step": 31020 }, { "epoch": 0.12945314651467482, "grad_norm": 0.6770232387572375, "learning_rate": 5.559236073832476e-06, "loss": 0.0325, "step": 31025 }, { "epoch": 0.12947400922966512, "grad_norm": 1.1690482586375541, "learning_rate": 5.5587881493066184e-06, "loss": 0.0389, "step": 31030 }, { "epoch": 0.1294948719446554, "grad_norm": 0.44576224107550105, "learning_rate": 5.558340333035235e-06, "loss": 0.0419, "step": 31035 }, { "epoch": 0.12951573465964566, "grad_norm": 1.318775396554084, "learning_rate": 5.557892624974723e-06, "loss": 0.0504, "step": 31040 }, { "epoch": 0.12953659737463594, "grad_norm": 0.8377159112576279, "learning_rate": 5.557445025081511e-06, "loss": 0.0338, "step": 31045 }, { "epoch": 0.12955746008962624, "grad_norm": 1.0101653542538125, "learning_rate": 5.556997533312053e-06, "loss": 0.0421, "step": 31050 }, { "epoch": 0.1295783228046165, "grad_norm": 2.462974570084156, "learning_rate": 5.556550149622821e-06, "loss": 0.0583, "step": 31055 }, { "epoch": 0.12959918551960678, "grad_norm": 0.7220727421383719, "learning_rate": 5.5561028739703175e-06, "loss": 0.0461, "step": 31060 }, { "epoch": 0.12962004823459705, "grad_norm": 1.4057396864671086, "learning_rate": 5.555655706311066e-06, "loss": 0.0424, "step": 31065 }, { "epoch": 0.12964091094958732, "grad_norm": 1.6218003696501229, "learning_rate": 5.555208646601616e-06, "loss": 0.0475, "step": 31070 }, { "epoch": 0.12966177366457762, "grad_norm": 0.9047476675561074, "learning_rate": 5.554761694798542e-06, "loss": 0.0441, "step": 31075 }, { "epoch": 0.1296826363795679, "grad_norm": 1.352441880481547, "learning_rate": 5.554314850858441e-06, "loss": 0.0525, "step": 31080 }, { "epoch": 0.12970349909455817, "grad_norm": 0.9901739286925225, "learning_rate": 5.553868114737937e-06, "loss": 0.0441, "step": 31085 }, { "epoch": 0.12972436180954844, "grad_norm": 1.0257532385035868, "learning_rate": 5.553421486393675e-06, "loss": 0.0398, "step": 31090 }, { "epoch": 0.12974522452453874, "grad_norm": 1.4536693026530778, "learning_rate": 5.5529749657823295e-06, "loss": 0.0494, "step": 31095 }, { "epoch": 0.129766087239529, "grad_norm": 1.2385107205210382, "learning_rate": 5.552528552860594e-06, "loss": 0.049, "step": 31100 }, { "epoch": 0.12978694995451928, "grad_norm": 0.9035169511654254, "learning_rate": 5.552082247585191e-06, "loss": 0.043, "step": 31105 }, { "epoch": 0.12980781266950955, "grad_norm": 0.9591702498907384, "learning_rate": 5.551636049912863e-06, "loss": 0.0334, "step": 31110 }, { "epoch": 0.12982867538449983, "grad_norm": 0.758813783781657, "learning_rate": 5.551189959800378e-06, "loss": 0.0368, "step": 31115 }, { "epoch": 0.12984953809949012, "grad_norm": 1.0309456270504733, "learning_rate": 5.550743977204532e-06, "loss": 0.0451, "step": 31120 }, { "epoch": 0.1298704008144804, "grad_norm": 1.4002004266345052, "learning_rate": 5.550298102082141e-06, "loss": 0.0317, "step": 31125 }, { "epoch": 0.12989126352947067, "grad_norm": 1.7195043957485834, "learning_rate": 5.549852334390047e-06, "loss": 0.0542, "step": 31130 }, { "epoch": 0.12991212624446094, "grad_norm": 0.8800159426362901, "learning_rate": 5.549406674085116e-06, "loss": 0.044, "step": 31135 }, { "epoch": 0.12993298895945124, "grad_norm": 1.2026194456363406, "learning_rate": 5.548961121124238e-06, "loss": 0.0464, "step": 31140 }, { "epoch": 0.1299538516744415, "grad_norm": 0.869331818760056, "learning_rate": 5.548515675464328e-06, "loss": 0.0279, "step": 31145 }, { "epoch": 0.12997471438943178, "grad_norm": 0.726104305094563, "learning_rate": 5.548070337062324e-06, "loss": 0.0509, "step": 31150 }, { "epoch": 0.12999557710442206, "grad_norm": 0.7609040267111059, "learning_rate": 5.547625105875191e-06, "loss": 0.0321, "step": 31155 }, { "epoch": 0.13001643981941233, "grad_norm": 1.1923652008698604, "learning_rate": 5.547179981859912e-06, "loss": 0.0369, "step": 31160 }, { "epoch": 0.13003730253440263, "grad_norm": 0.8686725965770978, "learning_rate": 5.546734964973503e-06, "loss": 0.0347, "step": 31165 }, { "epoch": 0.1300581652493929, "grad_norm": 1.1738738535189626, "learning_rate": 5.546290055172997e-06, "loss": 0.0543, "step": 31170 }, { "epoch": 0.13007902796438317, "grad_norm": 1.079905666187882, "learning_rate": 5.5458452524154525e-06, "loss": 0.0424, "step": 31175 }, { "epoch": 0.13009989067937344, "grad_norm": 1.3481244882116192, "learning_rate": 5.545400556657955e-06, "loss": 0.0546, "step": 31180 }, { "epoch": 0.13012075339436374, "grad_norm": 0.8190175491674322, "learning_rate": 5.544955967857612e-06, "loss": 0.0444, "step": 31185 }, { "epoch": 0.130141616109354, "grad_norm": 0.8068083686516739, "learning_rate": 5.544511485971553e-06, "loss": 0.0422, "step": 31190 }, { "epoch": 0.13016247882434429, "grad_norm": 0.5605211390199965, "learning_rate": 5.544067110956937e-06, "loss": 0.0499, "step": 31195 }, { "epoch": 0.13018334153933456, "grad_norm": 0.8715023919127336, "learning_rate": 5.543622842770941e-06, "loss": 0.0463, "step": 31200 }, { "epoch": 0.13020420425432483, "grad_norm": 2.429781091015075, "learning_rate": 5.54317868137077e-06, "loss": 0.0441, "step": 31205 }, { "epoch": 0.13022506696931513, "grad_norm": 1.041603393929608, "learning_rate": 5.542734626713653e-06, "loss": 0.0416, "step": 31210 }, { "epoch": 0.1302459296843054, "grad_norm": 1.2477761208821503, "learning_rate": 5.54229067875684e-06, "loss": 0.0361, "step": 31215 }, { "epoch": 0.13026679239929567, "grad_norm": 0.8926547397398913, "learning_rate": 5.5418468374576074e-06, "loss": 0.0424, "step": 31220 }, { "epoch": 0.13028765511428594, "grad_norm": 0.716638696582881, "learning_rate": 5.541403102773255e-06, "loss": 0.0463, "step": 31225 }, { "epoch": 0.13030851782927624, "grad_norm": 0.4571902812254014, "learning_rate": 5.540959474661108e-06, "loss": 0.036, "step": 31230 }, { "epoch": 0.13032938054426652, "grad_norm": 0.9908257764483116, "learning_rate": 5.540515953078511e-06, "loss": 0.0405, "step": 31235 }, { "epoch": 0.1303502432592568, "grad_norm": 0.9211176807373349, "learning_rate": 5.540072537982837e-06, "loss": 0.0417, "step": 31240 }, { "epoch": 0.13037110597424706, "grad_norm": 1.1693453560128668, "learning_rate": 5.5396292293314824e-06, "loss": 0.0489, "step": 31245 }, { "epoch": 0.13039196868923733, "grad_norm": 0.8812736648647939, "learning_rate": 5.5391860270818645e-06, "loss": 0.0438, "step": 31250 }, { "epoch": 0.13041283140422763, "grad_norm": 0.8755382512610452, "learning_rate": 5.538742931191428e-06, "loss": 0.0404, "step": 31255 }, { "epoch": 0.1304336941192179, "grad_norm": 1.1412672327056888, "learning_rate": 5.5382999416176375e-06, "loss": 0.055, "step": 31260 }, { "epoch": 0.13045455683420817, "grad_norm": 1.1644948221314164, "learning_rate": 5.537857058317986e-06, "loss": 0.0458, "step": 31265 }, { "epoch": 0.13047541954919845, "grad_norm": 0.44100620352466696, "learning_rate": 5.537414281249987e-06, "loss": 0.0594, "step": 31270 }, { "epoch": 0.13049628226418875, "grad_norm": 0.8646010348591199, "learning_rate": 5.536971610371179e-06, "loss": 0.0408, "step": 31275 }, { "epoch": 0.13051714497917902, "grad_norm": 1.2821165776686867, "learning_rate": 5.536529045639124e-06, "loss": 0.0384, "step": 31280 }, { "epoch": 0.1305380076941693, "grad_norm": 0.7680459423199156, "learning_rate": 5.536086587011408e-06, "loss": 0.0388, "step": 31285 }, { "epoch": 0.13055887040915956, "grad_norm": 1.1392535827314798, "learning_rate": 5.5356442344456394e-06, "loss": 0.0434, "step": 31290 }, { "epoch": 0.13057973312414983, "grad_norm": 1.2064670394133437, "learning_rate": 5.535201987899452e-06, "loss": 0.0405, "step": 31295 }, { "epoch": 0.13060059583914013, "grad_norm": 1.0156851886111187, "learning_rate": 5.5347598473305034e-06, "loss": 0.035, "step": 31300 }, { "epoch": 0.1306214585541304, "grad_norm": 1.120799397968593, "learning_rate": 5.534317812696472e-06, "loss": 0.0502, "step": 31305 }, { "epoch": 0.13064232126912068, "grad_norm": 0.696990251329998, "learning_rate": 5.533875883955066e-06, "loss": 0.041, "step": 31310 }, { "epoch": 0.13066318398411095, "grad_norm": 0.9737741458072757, "learning_rate": 5.5334340610640095e-06, "loss": 0.038, "step": 31315 }, { "epoch": 0.13068404669910125, "grad_norm": 0.8898691932951579, "learning_rate": 5.532992343981054e-06, "loss": 0.0366, "step": 31320 }, { "epoch": 0.13070490941409152, "grad_norm": 0.9717611283356008, "learning_rate": 5.532550732663976e-06, "loss": 0.0326, "step": 31325 }, { "epoch": 0.1307257721290818, "grad_norm": 0.6737440074459105, "learning_rate": 5.532109227070575e-06, "loss": 0.044, "step": 31330 }, { "epoch": 0.13074663484407206, "grad_norm": 1.2140451500204814, "learning_rate": 5.531667827158672e-06, "loss": 0.0444, "step": 31335 }, { "epoch": 0.13076749755906233, "grad_norm": 1.3146616314051645, "learning_rate": 5.531226532886112e-06, "loss": 0.0642, "step": 31340 }, { "epoch": 0.13078836027405263, "grad_norm": 0.7443892322987777, "learning_rate": 5.530785344210766e-06, "loss": 0.0413, "step": 31345 }, { "epoch": 0.1308092229890429, "grad_norm": 0.5915696429131266, "learning_rate": 5.5303442610905255e-06, "loss": 0.033, "step": 31350 }, { "epoch": 0.13083008570403318, "grad_norm": 0.8823381863347626, "learning_rate": 5.529903283483309e-06, "loss": 0.0393, "step": 31355 }, { "epoch": 0.13085094841902345, "grad_norm": 0.7413571837535069, "learning_rate": 5.529462411347052e-06, "loss": 0.0384, "step": 31360 }, { "epoch": 0.13087181113401375, "grad_norm": 0.9807652424964437, "learning_rate": 5.529021644639722e-06, "loss": 0.0538, "step": 31365 }, { "epoch": 0.13089267384900402, "grad_norm": 0.9332943570183573, "learning_rate": 5.528580983319303e-06, "loss": 0.0376, "step": 31370 }, { "epoch": 0.1309135365639943, "grad_norm": 1.5898176812630462, "learning_rate": 5.528140427343808e-06, "loss": 0.0494, "step": 31375 }, { "epoch": 0.13093439927898456, "grad_norm": 0.8834873123580677, "learning_rate": 5.527699976671268e-06, "loss": 0.0459, "step": 31380 }, { "epoch": 0.13095526199397484, "grad_norm": 0.7551775756434106, "learning_rate": 5.52725963125974e-06, "loss": 0.0429, "step": 31385 }, { "epoch": 0.13097612470896514, "grad_norm": 0.650741012772861, "learning_rate": 5.526819391067306e-06, "loss": 0.0402, "step": 31390 }, { "epoch": 0.1309969874239554, "grad_norm": 0.8232366611865984, "learning_rate": 5.526379256052069e-06, "loss": 0.0407, "step": 31395 }, { "epoch": 0.13101785013894568, "grad_norm": 0.9830904314399428, "learning_rate": 5.525939226172157e-06, "loss": 0.0449, "step": 31400 }, { "epoch": 0.13103871285393595, "grad_norm": 1.2663700530871016, "learning_rate": 5.525499301385719e-06, "loss": 0.0468, "step": 31405 }, { "epoch": 0.13105957556892625, "grad_norm": 1.647758409117577, "learning_rate": 5.525059481650929e-06, "loss": 0.0442, "step": 31410 }, { "epoch": 0.13108043828391652, "grad_norm": 1.2335193016832275, "learning_rate": 5.5246197669259835e-06, "loss": 0.0492, "step": 31415 }, { "epoch": 0.1311013009989068, "grad_norm": 0.7345556514394163, "learning_rate": 5.524180157169105e-06, "loss": 0.0395, "step": 31420 }, { "epoch": 0.13112216371389707, "grad_norm": 1.1440876811970002, "learning_rate": 5.523740652338536e-06, "loss": 0.0501, "step": 31425 }, { "epoch": 0.13114302642888734, "grad_norm": 0.8134619401531307, "learning_rate": 5.523301252392542e-06, "loss": 0.035, "step": 31430 }, { "epoch": 0.13116388914387764, "grad_norm": 1.5334021087394236, "learning_rate": 5.522861957289416e-06, "loss": 0.0453, "step": 31435 }, { "epoch": 0.1311847518588679, "grad_norm": 0.8141750068360203, "learning_rate": 5.522422766987469e-06, "loss": 0.046, "step": 31440 }, { "epoch": 0.13120561457385818, "grad_norm": 0.9106463644710643, "learning_rate": 5.521983681445036e-06, "loss": 0.0364, "step": 31445 }, { "epoch": 0.13122647728884845, "grad_norm": 2.115352746891772, "learning_rate": 5.52154470062048e-06, "loss": 0.0431, "step": 31450 }, { "epoch": 0.13124734000383875, "grad_norm": 1.1133257939796872, "learning_rate": 5.521105824472184e-06, "loss": 0.0518, "step": 31455 }, { "epoch": 0.13126820271882902, "grad_norm": 1.0688719156124193, "learning_rate": 5.520667052958551e-06, "loss": 0.0448, "step": 31460 }, { "epoch": 0.1312890654338193, "grad_norm": 1.0975572930649409, "learning_rate": 5.520228386038013e-06, "loss": 0.0462, "step": 31465 }, { "epoch": 0.13130992814880957, "grad_norm": 1.950809461225551, "learning_rate": 5.51978982366902e-06, "loss": 0.0315, "step": 31470 }, { "epoch": 0.13133079086379984, "grad_norm": 0.6211923424368153, "learning_rate": 5.519351365810049e-06, "loss": 0.0917, "step": 31475 }, { "epoch": 0.13135165357879014, "grad_norm": 0.7908666642853078, "learning_rate": 5.518913012419596e-06, "loss": 0.0407, "step": 31480 }, { "epoch": 0.1313725162937804, "grad_norm": 1.1395878997692925, "learning_rate": 5.518474763456186e-06, "loss": 0.0446, "step": 31485 }, { "epoch": 0.13139337900877068, "grad_norm": 0.5220477308002461, "learning_rate": 5.518036618878363e-06, "loss": 0.0293, "step": 31490 }, { "epoch": 0.13141424172376096, "grad_norm": 1.0770996898030334, "learning_rate": 5.517598578644692e-06, "loss": 0.0386, "step": 31495 }, { "epoch": 0.13143510443875125, "grad_norm": 1.0884682228893177, "learning_rate": 5.5171606427137655e-06, "loss": 0.0343, "step": 31500 }, { "epoch": 0.13145596715374153, "grad_norm": 1.0514016254661156, "learning_rate": 5.516722811044196e-06, "loss": 0.0448, "step": 31505 }, { "epoch": 0.1314768298687318, "grad_norm": 0.9110904657670164, "learning_rate": 5.516285083594623e-06, "loss": 0.0345, "step": 31510 }, { "epoch": 0.13149769258372207, "grad_norm": 0.7928548791216192, "learning_rate": 5.5158474603237025e-06, "loss": 0.0432, "step": 31515 }, { "epoch": 0.13151855529871234, "grad_norm": 1.1100977765005762, "learning_rate": 5.515409941190118e-06, "loss": 0.0398, "step": 31520 }, { "epoch": 0.13153941801370264, "grad_norm": 0.8759446968675886, "learning_rate": 5.514972526152578e-06, "loss": 0.0505, "step": 31525 }, { "epoch": 0.1315602807286929, "grad_norm": 0.7556349052343985, "learning_rate": 5.514535215169806e-06, "loss": 0.0443, "step": 31530 }, { "epoch": 0.13158114344368319, "grad_norm": 0.7827754223558758, "learning_rate": 5.514098008200557e-06, "loss": 0.0396, "step": 31535 }, { "epoch": 0.13160200615867346, "grad_norm": 1.3073732328032837, "learning_rate": 5.513660905203605e-06, "loss": 0.0546, "step": 31540 }, { "epoch": 0.13162286887366376, "grad_norm": 0.849541781769311, "learning_rate": 5.513223906137745e-06, "loss": 0.0423, "step": 31545 }, { "epoch": 0.13164373158865403, "grad_norm": 1.0272458413648, "learning_rate": 5.5127870109617975e-06, "loss": 0.0335, "step": 31550 }, { "epoch": 0.1316645943036443, "grad_norm": 1.176342919029312, "learning_rate": 5.5123502196346054e-06, "loss": 0.0399, "step": 31555 }, { "epoch": 0.13168545701863457, "grad_norm": 0.9015854196328633, "learning_rate": 5.5119135321150365e-06, "loss": 0.0428, "step": 31560 }, { "epoch": 0.13170631973362484, "grad_norm": 0.7526877491380507, "learning_rate": 5.511476948361976e-06, "loss": 0.0326, "step": 31565 }, { "epoch": 0.13172718244861514, "grad_norm": 0.8529889740740225, "learning_rate": 5.511040468334337e-06, "loss": 0.0403, "step": 31570 }, { "epoch": 0.13174804516360542, "grad_norm": 1.0782599194072706, "learning_rate": 5.510604091991053e-06, "loss": 0.0354, "step": 31575 }, { "epoch": 0.1317689078785957, "grad_norm": 0.9672531295444228, "learning_rate": 5.510167819291079e-06, "loss": 0.0401, "step": 31580 }, { "epoch": 0.13178977059358596, "grad_norm": 0.7146628078786812, "learning_rate": 5.509731650193398e-06, "loss": 0.035, "step": 31585 }, { "epoch": 0.13181063330857626, "grad_norm": 0.8614810812609651, "learning_rate": 5.5092955846570096e-06, "loss": 0.0395, "step": 31590 }, { "epoch": 0.13183149602356653, "grad_norm": 1.1941735725588802, "learning_rate": 5.508859622640938e-06, "loss": 0.0475, "step": 31595 }, { "epoch": 0.1318523587385568, "grad_norm": 1.0313953600156078, "learning_rate": 5.508423764104235e-06, "loss": 0.0338, "step": 31600 }, { "epoch": 0.13187322145354707, "grad_norm": 0.9269756653483117, "learning_rate": 5.507988009005965e-06, "loss": 0.0464, "step": 31605 }, { "epoch": 0.13189408416853735, "grad_norm": 1.4234695285916403, "learning_rate": 5.507552357305226e-06, "loss": 0.0444, "step": 31610 }, { "epoch": 0.13191494688352765, "grad_norm": 1.1468772344003535, "learning_rate": 5.507116808961132e-06, "loss": 0.0374, "step": 31615 }, { "epoch": 0.13193580959851792, "grad_norm": 1.0166873625753425, "learning_rate": 5.506681363932818e-06, "loss": 0.0535, "step": 31620 }, { "epoch": 0.1319566723135082, "grad_norm": 1.1809579490779576, "learning_rate": 5.50624602217945e-06, "loss": 0.0311, "step": 31625 }, { "epoch": 0.13197753502849846, "grad_norm": 1.0176313499976621, "learning_rate": 5.5058107836602085e-06, "loss": 0.0413, "step": 31630 }, { "epoch": 0.13199839774348876, "grad_norm": 0.7853620969764791, "learning_rate": 5.505375648334299e-06, "loss": 0.0489, "step": 31635 }, { "epoch": 0.13201926045847903, "grad_norm": 1.0335398388773642, "learning_rate": 5.504940616160952e-06, "loss": 0.0408, "step": 31640 }, { "epoch": 0.1320401231734693, "grad_norm": 1.0662273129404585, "learning_rate": 5.5045056870994175e-06, "loss": 0.036, "step": 31645 }, { "epoch": 0.13206098588845958, "grad_norm": 0.9551014911860113, "learning_rate": 5.504070861108969e-06, "loss": 0.0509, "step": 31650 }, { "epoch": 0.13208184860344985, "grad_norm": 0.7561147044882791, "learning_rate": 5.503636138148902e-06, "loss": 0.0375, "step": 31655 }, { "epoch": 0.13210271131844015, "grad_norm": 0.7846303768014186, "learning_rate": 5.503201518178538e-06, "loss": 0.0345, "step": 31660 }, { "epoch": 0.13212357403343042, "grad_norm": 1.1449802153093043, "learning_rate": 5.5027670011572145e-06, "loss": 0.0392, "step": 31665 }, { "epoch": 0.1321444367484207, "grad_norm": 1.029630928738791, "learning_rate": 5.502332587044298e-06, "loss": 0.0323, "step": 31670 }, { "epoch": 0.13216529946341096, "grad_norm": 0.592789849292585, "learning_rate": 5.501898275799173e-06, "loss": 0.0372, "step": 31675 }, { "epoch": 0.13218616217840126, "grad_norm": 0.8067601976426982, "learning_rate": 5.501464067381248e-06, "loss": 0.0351, "step": 31680 }, { "epoch": 0.13220702489339153, "grad_norm": 0.7716847300958413, "learning_rate": 5.501029961749954e-06, "loss": 0.0422, "step": 31685 }, { "epoch": 0.1322278876083818, "grad_norm": 1.1639334822476393, "learning_rate": 5.500595958864746e-06, "loss": 0.0454, "step": 31690 }, { "epoch": 0.13224875032337208, "grad_norm": 0.6310174572122471, "learning_rate": 5.500162058685097e-06, "loss": 0.05, "step": 31695 }, { "epoch": 0.13226961303836235, "grad_norm": 1.275534924897289, "learning_rate": 5.499728261170507e-06, "loss": 0.0435, "step": 31700 }, { "epoch": 0.13229047575335265, "grad_norm": 1.7286927138554422, "learning_rate": 5.499294566280497e-06, "loss": 0.0392, "step": 31705 }, { "epoch": 0.13231133846834292, "grad_norm": 0.9093412577042821, "learning_rate": 5.498860973974606e-06, "loss": 0.0351, "step": 31710 }, { "epoch": 0.1323322011833332, "grad_norm": 1.2031699430934204, "learning_rate": 5.498427484212403e-06, "loss": 0.0444, "step": 31715 }, { "epoch": 0.13235306389832346, "grad_norm": 1.6205555865697316, "learning_rate": 5.497994096953475e-06, "loss": 0.0384, "step": 31720 }, { "epoch": 0.13237392661331376, "grad_norm": 0.8803917942903445, "learning_rate": 5.497560812157431e-06, "loss": 0.0391, "step": 31725 }, { "epoch": 0.13239478932830404, "grad_norm": 0.7475568600169389, "learning_rate": 5.4971276297839015e-06, "loss": 0.0526, "step": 31730 }, { "epoch": 0.1324156520432943, "grad_norm": 0.8524906738183814, "learning_rate": 5.496694549792543e-06, "loss": 0.0413, "step": 31735 }, { "epoch": 0.13243651475828458, "grad_norm": 0.9427725384198165, "learning_rate": 5.496261572143031e-06, "loss": 0.0395, "step": 31740 }, { "epoch": 0.13245737747327485, "grad_norm": 1.5828418388079861, "learning_rate": 5.495828696795065e-06, "loss": 0.0404, "step": 31745 }, { "epoch": 0.13247824018826515, "grad_norm": 1.1874323932220878, "learning_rate": 5.495395923708364e-06, "loss": 0.0628, "step": 31750 }, { "epoch": 0.13249910290325542, "grad_norm": 0.705352653582988, "learning_rate": 5.494963252842673e-06, "loss": 0.031, "step": 31755 }, { "epoch": 0.1325199656182457, "grad_norm": 0.9629420654116765, "learning_rate": 5.494530684157757e-06, "loss": 0.0475, "step": 31760 }, { "epoch": 0.13254082833323597, "grad_norm": 0.6369246806174474, "learning_rate": 5.494098217613404e-06, "loss": 0.042, "step": 31765 }, { "epoch": 0.13256169104822627, "grad_norm": 0.7206538575052048, "learning_rate": 5.493665853169422e-06, "loss": 0.0383, "step": 31770 }, { "epoch": 0.13258255376321654, "grad_norm": 0.8734190999388423, "learning_rate": 5.493233590785644e-06, "loss": 0.0388, "step": 31775 }, { "epoch": 0.1326034164782068, "grad_norm": 1.1695060775605626, "learning_rate": 5.492801430421924e-06, "loss": 0.0496, "step": 31780 }, { "epoch": 0.13262427919319708, "grad_norm": 1.1868387860062812, "learning_rate": 5.492369372038136e-06, "loss": 0.0437, "step": 31785 }, { "epoch": 0.13264514190818735, "grad_norm": 2.7083793042300126, "learning_rate": 5.4919374155941815e-06, "loss": 0.0515, "step": 31790 }, { "epoch": 0.13266600462317765, "grad_norm": 1.025371010220217, "learning_rate": 5.491505561049979e-06, "loss": 0.0408, "step": 31795 }, { "epoch": 0.13268686733816792, "grad_norm": 0.8432186873732326, "learning_rate": 5.49107380836547e-06, "loss": 0.0446, "step": 31800 }, { "epoch": 0.1327077300531582, "grad_norm": 0.9368561229862582, "learning_rate": 5.490642157500619e-06, "loss": 0.0444, "step": 31805 }, { "epoch": 0.13272859276814847, "grad_norm": 1.0949388525711732, "learning_rate": 5.490210608415414e-06, "loss": 0.0404, "step": 31810 }, { "epoch": 0.13274945548313877, "grad_norm": 1.0595714035286463, "learning_rate": 5.489779161069861e-06, "loss": 0.0418, "step": 31815 }, { "epoch": 0.13277031819812904, "grad_norm": 0.8098963679745058, "learning_rate": 5.489347815423992e-06, "loss": 0.0303, "step": 31820 }, { "epoch": 0.1327911809131193, "grad_norm": 1.6672798078671562, "learning_rate": 5.488916571437858e-06, "loss": 0.0375, "step": 31825 }, { "epoch": 0.13281204362810958, "grad_norm": 1.621278901148849, "learning_rate": 5.488485429071535e-06, "loss": 0.0408, "step": 31830 }, { "epoch": 0.13283290634309985, "grad_norm": 0.6907563879555655, "learning_rate": 5.4880543882851165e-06, "loss": 0.0419, "step": 31835 }, { "epoch": 0.13285376905809015, "grad_norm": 0.9328311134151825, "learning_rate": 5.487623449038723e-06, "loss": 0.0501, "step": 31840 }, { "epoch": 0.13287463177308043, "grad_norm": 0.8904309914425534, "learning_rate": 5.487192611292494e-06, "loss": 0.045, "step": 31845 }, { "epoch": 0.1328954944880707, "grad_norm": 0.6806470016006381, "learning_rate": 5.486761875006591e-06, "loss": 0.0326, "step": 31850 }, { "epoch": 0.13291635720306097, "grad_norm": 0.8921487855641788, "learning_rate": 5.486331240141198e-06, "loss": 0.0401, "step": 31855 }, { "epoch": 0.13293721991805127, "grad_norm": 1.3677357024979513, "learning_rate": 5.485900706656522e-06, "loss": 0.039, "step": 31860 }, { "epoch": 0.13295808263304154, "grad_norm": 1.6218228626190472, "learning_rate": 5.485470274512788e-06, "loss": 0.0348, "step": 31865 }, { "epoch": 0.1329789453480318, "grad_norm": 1.0687303898776317, "learning_rate": 5.485039943670248e-06, "loss": 0.042, "step": 31870 }, { "epoch": 0.13299980806302208, "grad_norm": 0.6713278624299557, "learning_rate": 5.48460971408917e-06, "loss": 0.0383, "step": 31875 }, { "epoch": 0.13302067077801236, "grad_norm": 0.7550609807402678, "learning_rate": 5.48417958572985e-06, "loss": 0.0468, "step": 31880 }, { "epoch": 0.13304153349300266, "grad_norm": 1.281021099567335, "learning_rate": 5.483749558552603e-06, "loss": 0.0482, "step": 31885 }, { "epoch": 0.13306239620799293, "grad_norm": 0.5425940380987095, "learning_rate": 5.483319632517763e-06, "loss": 0.0439, "step": 31890 }, { "epoch": 0.1330832589229832, "grad_norm": 2.2255494658099084, "learning_rate": 5.482889807585691e-06, "loss": 0.0464, "step": 31895 }, { "epoch": 0.13310412163797347, "grad_norm": 1.186730170080588, "learning_rate": 5.482460083716765e-06, "loss": 0.0469, "step": 31900 }, { "epoch": 0.13312498435296377, "grad_norm": 0.9057630836467199, "learning_rate": 5.482030460871388e-06, "loss": 0.0466, "step": 31905 }, { "epoch": 0.13314584706795404, "grad_norm": 0.9276641635025418, "learning_rate": 5.481600939009986e-06, "loss": 0.048, "step": 31910 }, { "epoch": 0.13316670978294431, "grad_norm": 1.0305164342235502, "learning_rate": 5.4811715180929995e-06, "loss": 0.0384, "step": 31915 }, { "epoch": 0.1331875724979346, "grad_norm": 0.9058134744285149, "learning_rate": 5.4807421980808974e-06, "loss": 0.0371, "step": 31920 }, { "epoch": 0.13320843521292486, "grad_norm": 1.3957077364493171, "learning_rate": 5.480312978934169e-06, "loss": 0.0355, "step": 31925 }, { "epoch": 0.13322929792791516, "grad_norm": 0.5612250473385169, "learning_rate": 5.479883860613326e-06, "loss": 0.0421, "step": 31930 }, { "epoch": 0.13325016064290543, "grad_norm": 0.5932898308720989, "learning_rate": 5.4794548430788965e-06, "loss": 0.0459, "step": 31935 }, { "epoch": 0.1332710233578957, "grad_norm": 2.83673251225865, "learning_rate": 5.479025926291437e-06, "loss": 0.0417, "step": 31940 }, { "epoch": 0.13329188607288597, "grad_norm": 1.3051122332093474, "learning_rate": 5.478597110211522e-06, "loss": 0.0429, "step": 31945 }, { "epoch": 0.13331274878787627, "grad_norm": 0.8580838482854541, "learning_rate": 5.478168394799748e-06, "loss": 0.0387, "step": 31950 }, { "epoch": 0.13333361150286654, "grad_norm": 0.5107533326529795, "learning_rate": 5.477739780016734e-06, "loss": 0.0408, "step": 31955 }, { "epoch": 0.13335447421785682, "grad_norm": 1.7287388559404897, "learning_rate": 5.477311265823118e-06, "loss": 0.0596, "step": 31960 }, { "epoch": 0.1333753369328471, "grad_norm": 0.7826592118995304, "learning_rate": 5.476882852179565e-06, "loss": 0.0341, "step": 31965 }, { "epoch": 0.13339619964783736, "grad_norm": 0.8961138792259239, "learning_rate": 5.476454539046755e-06, "loss": 0.0398, "step": 31970 }, { "epoch": 0.13341706236282766, "grad_norm": 1.1079634946692332, "learning_rate": 5.476026326385394e-06, "loss": 0.0309, "step": 31975 }, { "epoch": 0.13343792507781793, "grad_norm": 0.8137649612987953, "learning_rate": 5.475598214156207e-06, "loss": 0.0523, "step": 31980 }, { "epoch": 0.1334587877928082, "grad_norm": 0.6619266327593532, "learning_rate": 5.475170202319942e-06, "loss": 0.041, "step": 31985 }, { "epoch": 0.13347965050779848, "grad_norm": 0.562463826670011, "learning_rate": 5.4747422908373685e-06, "loss": 0.0385, "step": 31990 }, { "epoch": 0.13350051322278875, "grad_norm": 1.3807845321216075, "learning_rate": 5.4743144796692746e-06, "loss": 0.0545, "step": 31995 }, { "epoch": 0.13352137593777905, "grad_norm": 1.059209578272097, "learning_rate": 5.4738867687764765e-06, "loss": 0.0388, "step": 32000 }, { "epoch": 0.13354223865276932, "grad_norm": 0.9018712981239868, "learning_rate": 5.473459158119804e-06, "loss": 0.0335, "step": 32005 }, { "epoch": 0.1335631013677596, "grad_norm": 1.0011648504387711, "learning_rate": 5.473031647660113e-06, "loss": 0.0381, "step": 32010 }, { "epoch": 0.13358396408274986, "grad_norm": 0.6708204469025653, "learning_rate": 5.47260423735828e-06, "loss": 0.045, "step": 32015 }, { "epoch": 0.13360482679774016, "grad_norm": 1.0254662594106327, "learning_rate": 5.4721769271752026e-06, "loss": 0.0333, "step": 32020 }, { "epoch": 0.13362568951273043, "grad_norm": 0.6986033811431628, "learning_rate": 5.471749717071798e-06, "loss": 0.0275, "step": 32025 }, { "epoch": 0.1336465522277207, "grad_norm": 0.8883082258324593, "learning_rate": 5.471322607009007e-06, "loss": 0.0386, "step": 32030 }, { "epoch": 0.13366741494271098, "grad_norm": 1.151917507691108, "learning_rate": 5.4708955969477936e-06, "loss": 0.0722, "step": 32035 }, { "epoch": 0.13368827765770125, "grad_norm": 1.5136140276169892, "learning_rate": 5.470468686849138e-06, "loss": 0.0526, "step": 32040 }, { "epoch": 0.13370914037269155, "grad_norm": 0.654483356333684, "learning_rate": 5.470041876674047e-06, "loss": 0.0334, "step": 32045 }, { "epoch": 0.13373000308768182, "grad_norm": 0.5921316867667208, "learning_rate": 5.469615166383543e-06, "loss": 0.032, "step": 32050 }, { "epoch": 0.1337508658026721, "grad_norm": 1.2361276830346997, "learning_rate": 5.4691885559386745e-06, "loss": 0.0357, "step": 32055 }, { "epoch": 0.13377172851766236, "grad_norm": 1.1918222606747246, "learning_rate": 5.46876204530051e-06, "loss": 0.0356, "step": 32060 }, { "epoch": 0.13379259123265266, "grad_norm": 0.9097667498790584, "learning_rate": 5.468335634430138e-06, "loss": 0.0345, "step": 32065 }, { "epoch": 0.13381345394764294, "grad_norm": 1.0124837105417062, "learning_rate": 5.467909323288667e-06, "loss": 0.0539, "step": 32070 }, { "epoch": 0.1338343166626332, "grad_norm": 0.6958787294192886, "learning_rate": 5.467483111837234e-06, "loss": 0.0409, "step": 32075 }, { "epoch": 0.13385517937762348, "grad_norm": 1.1875932343281865, "learning_rate": 5.467057000036987e-06, "loss": 0.0417, "step": 32080 }, { "epoch": 0.13387604209261375, "grad_norm": 0.6753747074275458, "learning_rate": 5.466630987849103e-06, "loss": 0.0513, "step": 32085 }, { "epoch": 0.13389690480760405, "grad_norm": 0.7833593903414867, "learning_rate": 5.4662050752347765e-06, "loss": 0.0439, "step": 32090 }, { "epoch": 0.13391776752259432, "grad_norm": 1.2286520156554503, "learning_rate": 5.465779262155223e-06, "loss": 0.0543, "step": 32095 }, { "epoch": 0.1339386302375846, "grad_norm": 0.9376368118666416, "learning_rate": 5.465353548571683e-06, "loss": 0.0391, "step": 32100 }, { "epoch": 0.13395949295257487, "grad_norm": 0.7325148216906809, "learning_rate": 5.464927934445411e-06, "loss": 0.0427, "step": 32105 }, { "epoch": 0.13398035566756517, "grad_norm": 1.3089431563924168, "learning_rate": 5.464502419737691e-06, "loss": 0.0363, "step": 32110 }, { "epoch": 0.13400121838255544, "grad_norm": 0.9591962100377733, "learning_rate": 5.4640770044098216e-06, "loss": 0.0463, "step": 32115 }, { "epoch": 0.1340220810975457, "grad_norm": 1.1305887514022195, "learning_rate": 5.463651688423125e-06, "loss": 0.0473, "step": 32120 }, { "epoch": 0.13404294381253598, "grad_norm": 0.7387046729840349, "learning_rate": 5.463226471738945e-06, "loss": 0.0356, "step": 32125 }, { "epoch": 0.13406380652752625, "grad_norm": 0.6226825221188782, "learning_rate": 5.462801354318645e-06, "loss": 0.0359, "step": 32130 }, { "epoch": 0.13408466924251655, "grad_norm": 0.8402911994383755, "learning_rate": 5.462376336123612e-06, "loss": 0.0466, "step": 32135 }, { "epoch": 0.13410553195750682, "grad_norm": 1.1509316966531433, "learning_rate": 5.46195141711525e-06, "loss": 0.047, "step": 32140 }, { "epoch": 0.1341263946724971, "grad_norm": 1.2873895151238692, "learning_rate": 5.461526597254988e-06, "loss": 0.0514, "step": 32145 }, { "epoch": 0.13414725738748737, "grad_norm": 2.418066674885674, "learning_rate": 5.461101876504273e-06, "loss": 0.0291, "step": 32150 }, { "epoch": 0.13416812010247767, "grad_norm": 0.8788132047303211, "learning_rate": 5.4606772548245755e-06, "loss": 0.0396, "step": 32155 }, { "epoch": 0.13418898281746794, "grad_norm": 1.214900746480523, "learning_rate": 5.460252732177386e-06, "loss": 0.0435, "step": 32160 }, { "epoch": 0.1342098455324582, "grad_norm": 0.9706936060990516, "learning_rate": 5.459828308524213e-06, "loss": 0.0493, "step": 32165 }, { "epoch": 0.13423070824744848, "grad_norm": 1.7806625521872292, "learning_rate": 5.459403983826591e-06, "loss": 0.0462, "step": 32170 }, { "epoch": 0.13425157096243875, "grad_norm": 1.1951480450488134, "learning_rate": 5.458979758046074e-06, "loss": 0.0462, "step": 32175 }, { "epoch": 0.13427243367742905, "grad_norm": 0.8003783507149402, "learning_rate": 5.458555631144232e-06, "loss": 0.0506, "step": 32180 }, { "epoch": 0.13429329639241933, "grad_norm": 1.108932448920822, "learning_rate": 5.4581316030826635e-06, "loss": 0.0494, "step": 32185 }, { "epoch": 0.1343141591074096, "grad_norm": 0.912023599956867, "learning_rate": 5.457707673822983e-06, "loss": 0.044, "step": 32190 }, { "epoch": 0.13433502182239987, "grad_norm": 1.360793727752271, "learning_rate": 5.457283843326827e-06, "loss": 0.0368, "step": 32195 }, { "epoch": 0.13435588453739017, "grad_norm": 0.7365098164816949, "learning_rate": 5.456860111555853e-06, "loss": 0.0438, "step": 32200 }, { "epoch": 0.13437674725238044, "grad_norm": 0.944747207298484, "learning_rate": 5.456436478471739e-06, "loss": 0.04, "step": 32205 }, { "epoch": 0.1343976099673707, "grad_norm": 0.5378369067445176, "learning_rate": 5.456012944036184e-06, "loss": 0.0354, "step": 32210 }, { "epoch": 0.13441847268236098, "grad_norm": 0.6862236825130014, "learning_rate": 5.455589508210909e-06, "loss": 0.0453, "step": 32215 }, { "epoch": 0.13443933539735126, "grad_norm": 0.8197067582840712, "learning_rate": 5.455166170957654e-06, "loss": 0.031, "step": 32220 }, { "epoch": 0.13446019811234156, "grad_norm": 1.2134316508729313, "learning_rate": 5.4547429322381805e-06, "loss": 0.0472, "step": 32225 }, { "epoch": 0.13448106082733183, "grad_norm": 1.4120479885275146, "learning_rate": 5.454319792014271e-06, "loss": 0.0484, "step": 32230 }, { "epoch": 0.1345019235423221, "grad_norm": 0.6404138125499917, "learning_rate": 5.453896750247728e-06, "loss": 0.0423, "step": 32235 }, { "epoch": 0.13452278625731237, "grad_norm": 0.6835056229896632, "learning_rate": 5.453473806900375e-06, "loss": 0.0501, "step": 32240 }, { "epoch": 0.13454364897230267, "grad_norm": 1.3793370470202233, "learning_rate": 5.453050961934058e-06, "loss": 0.051, "step": 32245 }, { "epoch": 0.13456451168729294, "grad_norm": 1.0118271752798, "learning_rate": 5.452628215310639e-06, "loss": 0.0323, "step": 32250 }, { "epoch": 0.13458537440228321, "grad_norm": 1.5778056383153078, "learning_rate": 5.452205566992008e-06, "loss": 0.0412, "step": 32255 }, { "epoch": 0.1346062371172735, "grad_norm": 0.8567109685166933, "learning_rate": 5.451783016940069e-06, "loss": 0.0437, "step": 32260 }, { "epoch": 0.13462709983226376, "grad_norm": 1.1446185879777238, "learning_rate": 5.451360565116749e-06, "loss": 0.0405, "step": 32265 }, { "epoch": 0.13464796254725406, "grad_norm": 0.5887563446208731, "learning_rate": 5.450938211483996e-06, "loss": 0.0419, "step": 32270 }, { "epoch": 0.13466882526224433, "grad_norm": 0.6087081652771518, "learning_rate": 5.45051595600378e-06, "loss": 0.0331, "step": 32275 }, { "epoch": 0.1346896879772346, "grad_norm": 0.5750234910377067, "learning_rate": 5.450093798638089e-06, "loss": 0.0386, "step": 32280 }, { "epoch": 0.13471055069222487, "grad_norm": 0.8817268845183193, "learning_rate": 5.449671739348932e-06, "loss": 0.0336, "step": 32285 }, { "epoch": 0.13473141340721517, "grad_norm": 0.9842309472496468, "learning_rate": 5.44924977809834e-06, "loss": 0.0282, "step": 32290 }, { "epoch": 0.13475227612220544, "grad_norm": 0.8784034093528391, "learning_rate": 5.448827914848363e-06, "loss": 0.0458, "step": 32295 }, { "epoch": 0.13477313883719572, "grad_norm": 1.1104261818255257, "learning_rate": 5.448406149561075e-06, "loss": 0.047, "step": 32300 }, { "epoch": 0.134794001552186, "grad_norm": 0.7879315170345524, "learning_rate": 5.447984482198566e-06, "loss": 0.0383, "step": 32305 }, { "epoch": 0.13481486426717626, "grad_norm": 0.7904082520174668, "learning_rate": 5.447562912722948e-06, "loss": 0.0308, "step": 32310 }, { "epoch": 0.13483572698216656, "grad_norm": 1.0270275029092064, "learning_rate": 5.4471414410963566e-06, "loss": 0.0397, "step": 32315 }, { "epoch": 0.13485658969715683, "grad_norm": 1.0072277712966324, "learning_rate": 5.4467200672809404e-06, "loss": 0.0385, "step": 32320 }, { "epoch": 0.1348774524121471, "grad_norm": 1.928312412211536, "learning_rate": 5.446298791238879e-06, "loss": 0.0452, "step": 32325 }, { "epoch": 0.13489831512713737, "grad_norm": 0.5598952829270721, "learning_rate": 5.445877612932363e-06, "loss": 0.045, "step": 32330 }, { "epoch": 0.13491917784212767, "grad_norm": 0.8141998971535077, "learning_rate": 5.44545653232361e-06, "loss": 0.034, "step": 32335 }, { "epoch": 0.13494004055711795, "grad_norm": 0.9934083090837551, "learning_rate": 5.445035549374853e-06, "loss": 0.041, "step": 32340 }, { "epoch": 0.13496090327210822, "grad_norm": 1.2834018402343323, "learning_rate": 5.44461466404835e-06, "loss": 0.0403, "step": 32345 }, { "epoch": 0.1349817659870985, "grad_norm": 0.7427797963224669, "learning_rate": 5.444193876306375e-06, "loss": 0.0411, "step": 32350 }, { "epoch": 0.13500262870208876, "grad_norm": 0.955341479313791, "learning_rate": 5.4437731861112275e-06, "loss": 0.0434, "step": 32355 }, { "epoch": 0.13502349141707906, "grad_norm": 1.3338810383597568, "learning_rate": 5.443352593425222e-06, "loss": 0.0468, "step": 32360 }, { "epoch": 0.13504435413206933, "grad_norm": 0.8948622521798163, "learning_rate": 5.442932098210697e-06, "loss": 0.0439, "step": 32365 }, { "epoch": 0.1350652168470596, "grad_norm": 0.8513351178180901, "learning_rate": 5.4425117004300095e-06, "loss": 0.0447, "step": 32370 }, { "epoch": 0.13508607956204988, "grad_norm": 1.4285859094490185, "learning_rate": 5.442091400045539e-06, "loss": 0.0543, "step": 32375 }, { "epoch": 0.13510694227704018, "grad_norm": 0.9661208822118383, "learning_rate": 5.4416711970196846e-06, "loss": 0.044, "step": 32380 }, { "epoch": 0.13512780499203045, "grad_norm": 1.025325461397152, "learning_rate": 5.441251091314863e-06, "loss": 0.0483, "step": 32385 }, { "epoch": 0.13514866770702072, "grad_norm": 0.8024027335835291, "learning_rate": 5.440831082893514e-06, "loss": 0.0415, "step": 32390 }, { "epoch": 0.135169530422011, "grad_norm": 0.7095749210396334, "learning_rate": 5.440411171718096e-06, "loss": 0.0345, "step": 32395 }, { "epoch": 0.13519039313700126, "grad_norm": 0.5053005340420705, "learning_rate": 5.439991357751091e-06, "loss": 0.0354, "step": 32400 }, { "epoch": 0.13521125585199156, "grad_norm": 1.5837809345793565, "learning_rate": 5.439571640954998e-06, "loss": 0.0481, "step": 32405 }, { "epoch": 0.13523211856698183, "grad_norm": 1.1873263327529706, "learning_rate": 5.439152021292336e-06, "loss": 0.0349, "step": 32410 }, { "epoch": 0.1352529812819721, "grad_norm": 1.120690266047845, "learning_rate": 5.438732498725646e-06, "loss": 0.0462, "step": 32415 }, { "epoch": 0.13527384399696238, "grad_norm": 0.944116815565054, "learning_rate": 5.438313073217491e-06, "loss": 0.0654, "step": 32420 }, { "epoch": 0.13529470671195268, "grad_norm": 0.9878893842001353, "learning_rate": 5.437893744730448e-06, "loss": 0.0377, "step": 32425 }, { "epoch": 0.13531556942694295, "grad_norm": 0.9036385134863537, "learning_rate": 5.437474513227121e-06, "loss": 0.0453, "step": 32430 }, { "epoch": 0.13533643214193322, "grad_norm": 0.7820658688271909, "learning_rate": 5.43705537867013e-06, "loss": 0.0427, "step": 32435 }, { "epoch": 0.1353572948569235, "grad_norm": 0.9587520847784917, "learning_rate": 5.436636341022115e-06, "loss": 0.0447, "step": 32440 }, { "epoch": 0.13537815757191377, "grad_norm": 0.914491392763691, "learning_rate": 5.4362174002457405e-06, "loss": 0.0364, "step": 32445 }, { "epoch": 0.13539902028690406, "grad_norm": 0.9388009088380692, "learning_rate": 5.435798556303687e-06, "loss": 0.047, "step": 32450 }, { "epoch": 0.13541988300189434, "grad_norm": 0.7469175341045138, "learning_rate": 5.435379809158655e-06, "loss": 0.0503, "step": 32455 }, { "epoch": 0.1354407457168846, "grad_norm": 0.9293438630388428, "learning_rate": 5.434961158773369e-06, "loss": 0.0479, "step": 32460 }, { "epoch": 0.13546160843187488, "grad_norm": 1.1055194481954902, "learning_rate": 5.434542605110568e-06, "loss": 0.0495, "step": 32465 }, { "epoch": 0.13548247114686518, "grad_norm": 1.096876197754288, "learning_rate": 5.434124148133016e-06, "loss": 0.0742, "step": 32470 }, { "epoch": 0.13550333386185545, "grad_norm": 1.1156717491241461, "learning_rate": 5.433705787803494e-06, "loss": 0.0574, "step": 32475 }, { "epoch": 0.13552419657684572, "grad_norm": 0.7800566415867832, "learning_rate": 5.433287524084806e-06, "loss": 0.0594, "step": 32480 }, { "epoch": 0.135545059291836, "grad_norm": 0.7431713917954667, "learning_rate": 5.432869356939772e-06, "loss": 0.0404, "step": 32485 }, { "epoch": 0.13556592200682627, "grad_norm": 1.1454329821858087, "learning_rate": 5.432451286331237e-06, "loss": 0.0457, "step": 32490 }, { "epoch": 0.13558678472181657, "grad_norm": 0.9742103255715248, "learning_rate": 5.432033312222061e-06, "loss": 0.0345, "step": 32495 }, { "epoch": 0.13560764743680684, "grad_norm": 0.9116402109211762, "learning_rate": 5.431615434575127e-06, "loss": 0.0548, "step": 32500 }, { "epoch": 0.1356285101517971, "grad_norm": 1.1308341496667305, "learning_rate": 5.431197653353337e-06, "loss": 0.0417, "step": 32505 }, { "epoch": 0.13564937286678738, "grad_norm": 1.2133197889362597, "learning_rate": 5.430779968519614e-06, "loss": 0.0578, "step": 32510 }, { "epoch": 0.13567023558177768, "grad_norm": 1.6032112862763062, "learning_rate": 5.4303623800369e-06, "loss": 0.0459, "step": 32515 }, { "epoch": 0.13569109829676795, "grad_norm": 1.1258967074131312, "learning_rate": 5.429944887868157e-06, "loss": 0.0309, "step": 32520 }, { "epoch": 0.13571196101175823, "grad_norm": 1.0900524300802772, "learning_rate": 5.4295274919763666e-06, "loss": 0.0448, "step": 32525 }, { "epoch": 0.1357328237267485, "grad_norm": 1.0905763585874018, "learning_rate": 5.429110192324531e-06, "loss": 0.0502, "step": 32530 }, { "epoch": 0.13575368644173877, "grad_norm": 0.9617389034501272, "learning_rate": 5.4286929888756734e-06, "loss": 0.0347, "step": 32535 }, { "epoch": 0.13577454915672907, "grad_norm": 1.0384278330072605, "learning_rate": 5.428275881592833e-06, "loss": 0.0366, "step": 32540 }, { "epoch": 0.13579541187171934, "grad_norm": 1.3100580628665273, "learning_rate": 5.427858870439073e-06, "loss": 0.0389, "step": 32545 }, { "epoch": 0.1358162745867096, "grad_norm": 1.1846357420933942, "learning_rate": 5.427441955377476e-06, "loss": 0.0342, "step": 32550 }, { "epoch": 0.13583713730169988, "grad_norm": 0.7311068266064824, "learning_rate": 5.4270251363711415e-06, "loss": 0.037, "step": 32555 }, { "epoch": 0.13585800001669018, "grad_norm": 0.807180022980106, "learning_rate": 5.426608413383192e-06, "loss": 0.0315, "step": 32560 }, { "epoch": 0.13587886273168046, "grad_norm": 0.6825404984471123, "learning_rate": 5.426191786376767e-06, "loss": 0.0439, "step": 32565 }, { "epoch": 0.13589972544667073, "grad_norm": 0.8004905818109135, "learning_rate": 5.425775255315029e-06, "loss": 0.0565, "step": 32570 }, { "epoch": 0.135920588161661, "grad_norm": 0.7106001586740738, "learning_rate": 5.425358820161157e-06, "loss": 0.0383, "step": 32575 }, { "epoch": 0.13594145087665127, "grad_norm": 1.3713026922728087, "learning_rate": 5.424942480878353e-06, "loss": 0.0494, "step": 32580 }, { "epoch": 0.13596231359164157, "grad_norm": 0.8513982123532654, "learning_rate": 5.424526237429837e-06, "loss": 0.0509, "step": 32585 }, { "epoch": 0.13598317630663184, "grad_norm": 0.874919600627179, "learning_rate": 5.424110089778847e-06, "loss": 0.048, "step": 32590 }, { "epoch": 0.13600403902162211, "grad_norm": 0.9942948445553799, "learning_rate": 5.423694037888645e-06, "loss": 0.0413, "step": 32595 }, { "epoch": 0.13602490173661239, "grad_norm": 0.9241566697823654, "learning_rate": 5.42327808172251e-06, "loss": 0.0364, "step": 32600 }, { "epoch": 0.13604576445160269, "grad_norm": 0.7994012673393313, "learning_rate": 5.422862221243737e-06, "loss": 0.0304, "step": 32605 }, { "epoch": 0.13606662716659296, "grad_norm": 1.0270338831226897, "learning_rate": 5.422446456415652e-06, "loss": 0.0512, "step": 32610 }, { "epoch": 0.13608748988158323, "grad_norm": 1.2187877561313, "learning_rate": 5.422030787201589e-06, "loss": 0.0573, "step": 32615 }, { "epoch": 0.1361083525965735, "grad_norm": 1.1687046763755677, "learning_rate": 5.421615213564906e-06, "loss": 0.0393, "step": 32620 }, { "epoch": 0.13612921531156377, "grad_norm": 0.9403478973868393, "learning_rate": 5.42119973546898e-06, "loss": 0.0359, "step": 32625 }, { "epoch": 0.13615007802655407, "grad_norm": 2.164610940747552, "learning_rate": 5.420784352877213e-06, "loss": 0.0384, "step": 32630 }, { "epoch": 0.13617094074154434, "grad_norm": 0.934697446763424, "learning_rate": 5.420369065753017e-06, "loss": 0.0418, "step": 32635 }, { "epoch": 0.13619180345653462, "grad_norm": 1.3993311302567166, "learning_rate": 5.419953874059833e-06, "loss": 0.0472, "step": 32640 }, { "epoch": 0.1362126661715249, "grad_norm": 0.8152023003760965, "learning_rate": 5.419538777761113e-06, "loss": 0.0492, "step": 32645 }, { "epoch": 0.1362335288865152, "grad_norm": 1.0219515656047604, "learning_rate": 5.4191237768203355e-06, "loss": 0.0422, "step": 32650 }, { "epoch": 0.13625439160150546, "grad_norm": 0.7104753016147284, "learning_rate": 5.418708871200996e-06, "loss": 0.0381, "step": 32655 }, { "epoch": 0.13627525431649573, "grad_norm": 1.0112110070830427, "learning_rate": 5.418294060866608e-06, "loss": 0.0419, "step": 32660 }, { "epoch": 0.136296117031486, "grad_norm": 0.9500313282777073, "learning_rate": 5.417879345780708e-06, "loss": 0.0366, "step": 32665 }, { "epoch": 0.13631697974647627, "grad_norm": 1.2681483176254083, "learning_rate": 5.417464725906847e-06, "loss": 0.0511, "step": 32670 }, { "epoch": 0.13633784246146657, "grad_norm": 1.3615844473226788, "learning_rate": 5.4170502012086025e-06, "loss": 0.0426, "step": 32675 }, { "epoch": 0.13635870517645685, "grad_norm": 1.2477525475578084, "learning_rate": 5.416635771649564e-06, "loss": 0.0328, "step": 32680 }, { "epoch": 0.13637956789144712, "grad_norm": 2.525626227175421, "learning_rate": 5.416221437193348e-06, "loss": 0.0479, "step": 32685 }, { "epoch": 0.1364004306064374, "grad_norm": 1.1650134299570742, "learning_rate": 5.415807197803582e-06, "loss": 0.0511, "step": 32690 }, { "epoch": 0.1364212933214277, "grad_norm": 5.237436676599152, "learning_rate": 5.4153930534439215e-06, "loss": 0.0545, "step": 32695 }, { "epoch": 0.13644215603641796, "grad_norm": 0.7226334792936512, "learning_rate": 5.414979004078036e-06, "loss": 0.0409, "step": 32700 }, { "epoch": 0.13646301875140823, "grad_norm": 0.7249742128761693, "learning_rate": 5.4145650496696155e-06, "loss": 0.0399, "step": 32705 }, { "epoch": 0.1364838814663985, "grad_norm": 0.8784857522152317, "learning_rate": 5.4141511901823695e-06, "loss": 0.0516, "step": 32710 }, { "epoch": 0.13650474418138878, "grad_norm": 1.1387619324826839, "learning_rate": 5.4137374255800304e-06, "loss": 0.0432, "step": 32715 }, { "epoch": 0.13652560689637908, "grad_norm": 0.9386927203679271, "learning_rate": 5.413323755826342e-06, "loss": 0.0482, "step": 32720 }, { "epoch": 0.13654646961136935, "grad_norm": 0.8561677554741407, "learning_rate": 5.412910180885076e-06, "loss": 0.0517, "step": 32725 }, { "epoch": 0.13656733232635962, "grad_norm": 0.836365741433471, "learning_rate": 5.412496700720018e-06, "loss": 0.0403, "step": 32730 }, { "epoch": 0.1365881950413499, "grad_norm": 1.3794874967586785, "learning_rate": 5.412083315294977e-06, "loss": 0.0401, "step": 32735 }, { "epoch": 0.1366090577563402, "grad_norm": 1.0807668343844121, "learning_rate": 5.411670024573777e-06, "loss": 0.0322, "step": 32740 }, { "epoch": 0.13662992047133046, "grad_norm": 0.7505812547238944, "learning_rate": 5.411256828520265e-06, "loss": 0.0474, "step": 32745 }, { "epoch": 0.13665078318632073, "grad_norm": 0.8375635891764124, "learning_rate": 5.410843727098304e-06, "loss": 0.0527, "step": 32750 }, { "epoch": 0.136671645901311, "grad_norm": 2.372645526357968, "learning_rate": 5.4104307202717785e-06, "loss": 0.0326, "step": 32755 }, { "epoch": 0.13669250861630128, "grad_norm": 0.7035137301519611, "learning_rate": 5.410017808004595e-06, "loss": 0.0445, "step": 32760 }, { "epoch": 0.13671337133129158, "grad_norm": 0.5719471609538856, "learning_rate": 5.409604990260671e-06, "loss": 0.0388, "step": 32765 }, { "epoch": 0.13673423404628185, "grad_norm": 0.6658335414124381, "learning_rate": 5.409192267003953e-06, "loss": 0.0442, "step": 32770 }, { "epoch": 0.13675509676127212, "grad_norm": 1.0853420600211479, "learning_rate": 5.4087796381984e-06, "loss": 0.0483, "step": 32775 }, { "epoch": 0.1367759594762624, "grad_norm": 0.742164561696049, "learning_rate": 5.408367103807994e-06, "loss": 0.0428, "step": 32780 }, { "epoch": 0.1367968221912527, "grad_norm": 0.981601661070309, "learning_rate": 5.407954663796732e-06, "loss": 0.0448, "step": 32785 }, { "epoch": 0.13681768490624296, "grad_norm": 1.0461262190515175, "learning_rate": 5.407542318128635e-06, "loss": 0.0432, "step": 32790 }, { "epoch": 0.13683854762123324, "grad_norm": 1.0278982518298214, "learning_rate": 5.40713006676774e-06, "loss": 0.0435, "step": 32795 }, { "epoch": 0.1368594103362235, "grad_norm": 1.132830022079425, "learning_rate": 5.4067179096781065e-06, "loss": 0.0474, "step": 32800 }, { "epoch": 0.13688027305121378, "grad_norm": 0.6949701960439789, "learning_rate": 5.406305846823809e-06, "loss": 0.0291, "step": 32805 }, { "epoch": 0.13690113576620408, "grad_norm": 1.0820250558823046, "learning_rate": 5.405893878168942e-06, "loss": 0.0437, "step": 32810 }, { "epoch": 0.13692199848119435, "grad_norm": 0.8954905193331277, "learning_rate": 5.405482003677623e-06, "loss": 0.047, "step": 32815 }, { "epoch": 0.13694286119618462, "grad_norm": 0.9042766895441243, "learning_rate": 5.405070223313985e-06, "loss": 0.0419, "step": 32820 }, { "epoch": 0.1369637239111749, "grad_norm": 0.8071313469947635, "learning_rate": 5.40465853704218e-06, "loss": 0.0499, "step": 32825 }, { "epoch": 0.1369845866261652, "grad_norm": 0.8286631401018033, "learning_rate": 5.404246944826381e-06, "loss": 0.0376, "step": 32830 }, { "epoch": 0.13700544934115547, "grad_norm": 0.699900433285816, "learning_rate": 5.4038354466307785e-06, "loss": 0.0419, "step": 32835 }, { "epoch": 0.13702631205614574, "grad_norm": 0.5520216746685922, "learning_rate": 5.403424042419584e-06, "loss": 0.0405, "step": 32840 }, { "epoch": 0.137047174771136, "grad_norm": 1.1409479089975274, "learning_rate": 5.403012732157027e-06, "loss": 0.0594, "step": 32845 }, { "epoch": 0.13706803748612628, "grad_norm": 1.3534285923401184, "learning_rate": 5.402601515807354e-06, "loss": 0.0375, "step": 32850 }, { "epoch": 0.13708890020111658, "grad_norm": 1.0232281219379527, "learning_rate": 5.402190393334834e-06, "loss": 0.0389, "step": 32855 }, { "epoch": 0.13710976291610685, "grad_norm": 0.8657212969067034, "learning_rate": 5.4017793647037546e-06, "loss": 0.0416, "step": 32860 }, { "epoch": 0.13713062563109712, "grad_norm": 1.2932503028710078, "learning_rate": 5.401368429878418e-06, "loss": 0.056, "step": 32865 }, { "epoch": 0.1371514883460874, "grad_norm": 0.8044817071913775, "learning_rate": 5.400957588823152e-06, "loss": 0.0413, "step": 32870 }, { "epoch": 0.1371723510610777, "grad_norm": 0.9141212889316124, "learning_rate": 5.4005468415022975e-06, "loss": 0.0372, "step": 32875 }, { "epoch": 0.13719321377606797, "grad_norm": 0.9820197967682394, "learning_rate": 5.400136187880221e-06, "loss": 0.0508, "step": 32880 }, { "epoch": 0.13721407649105824, "grad_norm": 0.6851368019187571, "learning_rate": 5.399725627921299e-06, "loss": 0.0427, "step": 32885 }, { "epoch": 0.1372349392060485, "grad_norm": 0.7813625165149225, "learning_rate": 5.399315161589935e-06, "loss": 0.0337, "step": 32890 }, { "epoch": 0.13725580192103878, "grad_norm": 0.9419857181399284, "learning_rate": 5.39890478885055e-06, "loss": 0.0424, "step": 32895 }, { "epoch": 0.13727666463602908, "grad_norm": 1.3995750555683344, "learning_rate": 5.398494509667577e-06, "loss": 0.0427, "step": 32900 }, { "epoch": 0.13729752735101936, "grad_norm": 0.8073376775712899, "learning_rate": 5.398084324005477e-06, "loss": 0.0411, "step": 32905 }, { "epoch": 0.13731839006600963, "grad_norm": 0.9718174581609473, "learning_rate": 5.397674231828725e-06, "loss": 0.045, "step": 32910 }, { "epoch": 0.1373392527809999, "grad_norm": 3.76652091887992, "learning_rate": 5.397264233101819e-06, "loss": 0.0374, "step": 32915 }, { "epoch": 0.1373601154959902, "grad_norm": 0.6335213538458282, "learning_rate": 5.396854327789268e-06, "loss": 0.0436, "step": 32920 }, { "epoch": 0.13738097821098047, "grad_norm": 0.9583052548009883, "learning_rate": 5.396444515855608e-06, "loss": 0.0455, "step": 32925 }, { "epoch": 0.13740184092597074, "grad_norm": 1.0974284579051765, "learning_rate": 5.3960347972653886e-06, "loss": 0.0592, "step": 32930 }, { "epoch": 0.137422703640961, "grad_norm": 0.891558540595523, "learning_rate": 5.395625171983182e-06, "loss": 0.0397, "step": 32935 }, { "epoch": 0.13744356635595129, "grad_norm": 0.6051038178255261, "learning_rate": 5.3952156399735755e-06, "loss": 0.0388, "step": 32940 }, { "epoch": 0.13746442907094159, "grad_norm": 1.3939085343026008, "learning_rate": 5.394806201201179e-06, "loss": 0.0377, "step": 32945 }, { "epoch": 0.13748529178593186, "grad_norm": 2.5742686286628915, "learning_rate": 5.394396855630619e-06, "loss": 0.0496, "step": 32950 }, { "epoch": 0.13750615450092213, "grad_norm": 1.2124929755503842, "learning_rate": 5.393987603226543e-06, "loss": 0.033, "step": 32955 }, { "epoch": 0.1375270172159124, "grad_norm": 1.1869591968265496, "learning_rate": 5.39357844395361e-06, "loss": 0.0602, "step": 32960 }, { "epoch": 0.1375478799309027, "grad_norm": 0.7912503466336904, "learning_rate": 5.393169377776507e-06, "loss": 0.0374, "step": 32965 }, { "epoch": 0.13756874264589297, "grad_norm": 1.298235770683012, "learning_rate": 5.392760404659937e-06, "loss": 0.0572, "step": 32970 }, { "epoch": 0.13758960536088324, "grad_norm": 0.7570746223912187, "learning_rate": 5.3923515245686185e-06, "loss": 0.0364, "step": 32975 }, { "epoch": 0.13761046807587352, "grad_norm": 0.879178671360894, "learning_rate": 5.3919427374672915e-06, "loss": 0.0448, "step": 32980 }, { "epoch": 0.1376313307908638, "grad_norm": 1.0516923332293244, "learning_rate": 5.3915340433207145e-06, "loss": 0.0422, "step": 32985 }, { "epoch": 0.1376521935058541, "grad_norm": 1.0365677797841417, "learning_rate": 5.391125442093663e-06, "loss": 0.0417, "step": 32990 }, { "epoch": 0.13767305622084436, "grad_norm": 0.9747616239242205, "learning_rate": 5.390716933750934e-06, "loss": 0.0701, "step": 32995 }, { "epoch": 0.13769391893583463, "grad_norm": 0.7507871541052231, "learning_rate": 5.39030851825734e-06, "loss": 0.0326, "step": 33000 }, { "epoch": 0.1377147816508249, "grad_norm": 0.537314916324993, "learning_rate": 5.389900195577715e-06, "loss": 0.0392, "step": 33005 }, { "epoch": 0.1377356443658152, "grad_norm": 0.8012376377369816, "learning_rate": 5.389491965676912e-06, "loss": 0.0448, "step": 33010 }, { "epoch": 0.13775650708080547, "grad_norm": 0.8493281876638482, "learning_rate": 5.389083828519798e-06, "loss": 0.0344, "step": 33015 }, { "epoch": 0.13777736979579575, "grad_norm": 0.8635631430376594, "learning_rate": 5.3886757840712634e-06, "loss": 0.0436, "step": 33020 }, { "epoch": 0.13779823251078602, "grad_norm": 1.3284380875392452, "learning_rate": 5.388267832296214e-06, "loss": 0.047, "step": 33025 }, { "epoch": 0.1378190952257763, "grad_norm": 0.6884412938751213, "learning_rate": 5.3878599731595785e-06, "loss": 0.0492, "step": 33030 }, { "epoch": 0.1378399579407666, "grad_norm": 0.8597320259054065, "learning_rate": 5.387452206626299e-06, "loss": 0.0388, "step": 33035 }, { "epoch": 0.13786082065575686, "grad_norm": 0.9287026840922925, "learning_rate": 5.387044532661338e-06, "loss": 0.0429, "step": 33040 }, { "epoch": 0.13788168337074713, "grad_norm": 0.9103409219897739, "learning_rate": 5.3866369512296816e-06, "loss": 0.0406, "step": 33045 }, { "epoch": 0.1379025460857374, "grad_norm": 0.8425609037504718, "learning_rate": 5.386229462296324e-06, "loss": 0.0471, "step": 33050 }, { "epoch": 0.1379234088007277, "grad_norm": 0.8393683123782181, "learning_rate": 5.3858220658262875e-06, "loss": 0.047, "step": 33055 }, { "epoch": 0.13794427151571798, "grad_norm": 1.1136179337462135, "learning_rate": 5.385414761784607e-06, "loss": 0.0449, "step": 33060 }, { "epoch": 0.13796513423070825, "grad_norm": 1.5840891985427141, "learning_rate": 5.3850075501363416e-06, "loss": 0.0581, "step": 33065 }, { "epoch": 0.13798599694569852, "grad_norm": 1.0324963291965326, "learning_rate": 5.384600430846564e-06, "loss": 0.041, "step": 33070 }, { "epoch": 0.1380068596606888, "grad_norm": 1.1494634220094058, "learning_rate": 5.3841934038803645e-06, "loss": 0.045, "step": 33075 }, { "epoch": 0.1380277223756791, "grad_norm": 0.8117327838612659, "learning_rate": 5.3837864692028576e-06, "loss": 0.0386, "step": 33080 }, { "epoch": 0.13804858509066936, "grad_norm": 1.1921108687256425, "learning_rate": 5.383379626779171e-06, "loss": 0.0395, "step": 33085 }, { "epoch": 0.13806944780565963, "grad_norm": 1.3021216427121078, "learning_rate": 5.382972876574453e-06, "loss": 0.0423, "step": 33090 }, { "epoch": 0.1380903105206499, "grad_norm": 1.030291959306012, "learning_rate": 5.38256621855387e-06, "loss": 0.0358, "step": 33095 }, { "epoch": 0.1381111732356402, "grad_norm": 0.6489606023401786, "learning_rate": 5.382159652682608e-06, "loss": 0.0339, "step": 33100 }, { "epoch": 0.13813203595063048, "grad_norm": 1.1756761754811644, "learning_rate": 5.381753178925868e-06, "loss": 0.0486, "step": 33105 }, { "epoch": 0.13815289866562075, "grad_norm": 1.095330204796634, "learning_rate": 5.381346797248874e-06, "loss": 0.0536, "step": 33110 }, { "epoch": 0.13817376138061102, "grad_norm": 1.2649474465187434, "learning_rate": 5.380940507616865e-06, "loss": 0.0442, "step": 33115 }, { "epoch": 0.1381946240956013, "grad_norm": 1.5899584815556294, "learning_rate": 5.380534309995101e-06, "loss": 0.0521, "step": 33120 }, { "epoch": 0.1382154868105916, "grad_norm": 0.8903135204611675, "learning_rate": 5.380128204348857e-06, "loss": 0.03, "step": 33125 }, { "epoch": 0.13823634952558186, "grad_norm": 1.2110398427430624, "learning_rate": 5.379722190643428e-06, "loss": 0.0426, "step": 33130 }, { "epoch": 0.13825721224057214, "grad_norm": 1.3321726492961932, "learning_rate": 5.379316268844127e-06, "loss": 0.0501, "step": 33135 }, { "epoch": 0.1382780749555624, "grad_norm": 0.9024093036897953, "learning_rate": 5.378910438916288e-06, "loss": 0.0465, "step": 33140 }, { "epoch": 0.1382989376705527, "grad_norm": 1.2063925003309364, "learning_rate": 5.378504700825261e-06, "loss": 0.0452, "step": 33145 }, { "epoch": 0.13831980038554298, "grad_norm": 0.5416356166081566, "learning_rate": 5.378099054536412e-06, "loss": 0.0495, "step": 33150 }, { "epoch": 0.13834066310053325, "grad_norm": 1.0384423963026401, "learning_rate": 5.377693500015129e-06, "loss": 0.0415, "step": 33155 }, { "epoch": 0.13836152581552352, "grad_norm": 0.7220950078253632, "learning_rate": 5.3772880372268175e-06, "loss": 0.0384, "step": 33160 }, { "epoch": 0.1383823885305138, "grad_norm": 1.416377243336094, "learning_rate": 5.376882666136899e-06, "loss": 0.0498, "step": 33165 }, { "epoch": 0.1384032512455041, "grad_norm": 1.287776329863224, "learning_rate": 5.376477386710817e-06, "loss": 0.0449, "step": 33170 }, { "epoch": 0.13842411396049437, "grad_norm": 0.6764012649643869, "learning_rate": 5.376072198914031e-06, "loss": 0.0251, "step": 33175 }, { "epoch": 0.13844497667548464, "grad_norm": 1.408777047356645, "learning_rate": 5.375667102712016e-06, "loss": 0.0352, "step": 33180 }, { "epoch": 0.1384658393904749, "grad_norm": 1.0901782085083813, "learning_rate": 5.375262098070272e-06, "loss": 0.0446, "step": 33185 }, { "epoch": 0.1384867021054652, "grad_norm": 0.8110261187419372, "learning_rate": 5.374857184954311e-06, "loss": 0.0367, "step": 33190 }, { "epoch": 0.13850756482045548, "grad_norm": 1.2414648049947754, "learning_rate": 5.374452363329665e-06, "loss": 0.0513, "step": 33195 }, { "epoch": 0.13852842753544575, "grad_norm": 1.5650981655028033, "learning_rate": 5.3740476331618875e-06, "loss": 0.0386, "step": 33200 }, { "epoch": 0.13854929025043602, "grad_norm": 0.7159798937581124, "learning_rate": 5.373642994416543e-06, "loss": 0.0372, "step": 33205 }, { "epoch": 0.1385701529654263, "grad_norm": 0.9593261893561268, "learning_rate": 5.373238447059223e-06, "loss": 0.0409, "step": 33210 }, { "epoch": 0.1385910156804166, "grad_norm": 0.8967993736602156, "learning_rate": 5.3728339910555296e-06, "loss": 0.0416, "step": 33215 }, { "epoch": 0.13861187839540687, "grad_norm": 0.8572242335023158, "learning_rate": 5.372429626371086e-06, "loss": 0.042, "step": 33220 }, { "epoch": 0.13863274111039714, "grad_norm": 1.0154743653759017, "learning_rate": 5.372025352971535e-06, "loss": 0.0446, "step": 33225 }, { "epoch": 0.1386536038253874, "grad_norm": 1.015831076247227, "learning_rate": 5.371621170822535e-06, "loss": 0.0433, "step": 33230 }, { "epoch": 0.1386744665403777, "grad_norm": 0.6735880110788999, "learning_rate": 5.371217079889763e-06, "loss": 0.0444, "step": 33235 }, { "epoch": 0.13869532925536798, "grad_norm": 0.6674611354565603, "learning_rate": 5.3708130801389154e-06, "loss": 0.0494, "step": 33240 }, { "epoch": 0.13871619197035825, "grad_norm": 0.8812712337057097, "learning_rate": 5.3704091715357075e-06, "loss": 0.0378, "step": 33245 }, { "epoch": 0.13873705468534853, "grad_norm": 0.7167502626444743, "learning_rate": 5.370005354045868e-06, "loss": 0.0295, "step": 33250 }, { "epoch": 0.1387579174003388, "grad_norm": 0.821985277258711, "learning_rate": 5.369601627635147e-06, "loss": 0.0458, "step": 33255 }, { "epoch": 0.1387787801153291, "grad_norm": 0.6512345971065959, "learning_rate": 5.369197992269313e-06, "loss": 0.0339, "step": 33260 }, { "epoch": 0.13879964283031937, "grad_norm": 0.9132052346507027, "learning_rate": 5.368794447914152e-06, "loss": 0.0453, "step": 33265 }, { "epoch": 0.13882050554530964, "grad_norm": 1.1704773219073676, "learning_rate": 5.368390994535468e-06, "loss": 0.0385, "step": 33270 }, { "epoch": 0.1388413682602999, "grad_norm": 0.8322483062475318, "learning_rate": 5.367987632099081e-06, "loss": 0.0519, "step": 33275 }, { "epoch": 0.1388622309752902, "grad_norm": 0.8539474433740283, "learning_rate": 5.3675843605708325e-06, "loss": 0.049, "step": 33280 }, { "epoch": 0.13888309369028048, "grad_norm": 1.2109781965893607, "learning_rate": 5.367181179916578e-06, "loss": 0.0521, "step": 33285 }, { "epoch": 0.13890395640527076, "grad_norm": 1.4576229626889723, "learning_rate": 5.366778090102196e-06, "loss": 0.0353, "step": 33290 }, { "epoch": 0.13892481912026103, "grad_norm": 1.1413567293263547, "learning_rate": 5.366375091093579e-06, "loss": 0.0442, "step": 33295 }, { "epoch": 0.1389456818352513, "grad_norm": 0.9643058037485438, "learning_rate": 5.365972182856636e-06, "loss": 0.0389, "step": 33300 }, { "epoch": 0.1389665445502416, "grad_norm": 1.2835572945497378, "learning_rate": 5.365569365357299e-06, "loss": 0.0463, "step": 33305 }, { "epoch": 0.13898740726523187, "grad_norm": 1.1559551932985312, "learning_rate": 5.3651666385615144e-06, "loss": 0.0536, "step": 33310 }, { "epoch": 0.13900826998022214, "grad_norm": 1.2315394490207279, "learning_rate": 5.364764002435247e-06, "loss": 0.0513, "step": 33315 }, { "epoch": 0.13902913269521242, "grad_norm": 0.878588858112793, "learning_rate": 5.364361456944482e-06, "loss": 0.0471, "step": 33320 }, { "epoch": 0.13904999541020271, "grad_norm": 1.3323370014293847, "learning_rate": 5.363959002055218e-06, "loss": 0.0447, "step": 33325 }, { "epoch": 0.139070858125193, "grad_norm": 1.223169096825462, "learning_rate": 5.3635566377334745e-06, "loss": 0.0372, "step": 33330 }, { "epoch": 0.13909172084018326, "grad_norm": 0.8629281924629215, "learning_rate": 5.3631543639452865e-06, "loss": 0.0562, "step": 33335 }, { "epoch": 0.13911258355517353, "grad_norm": 0.9188086856586285, "learning_rate": 5.362752180656711e-06, "loss": 0.0425, "step": 33340 }, { "epoch": 0.1391334462701638, "grad_norm": 1.284202245595332, "learning_rate": 5.36235008783382e-06, "loss": 0.0464, "step": 33345 }, { "epoch": 0.1391543089851541, "grad_norm": 0.8184838084137196, "learning_rate": 5.361948085442701e-06, "loss": 0.0395, "step": 33350 }, { "epoch": 0.13917517170014437, "grad_norm": 0.5831503900820194, "learning_rate": 5.3615461734494636e-06, "loss": 0.0289, "step": 33355 }, { "epoch": 0.13919603441513465, "grad_norm": 1.3150244824446693, "learning_rate": 5.361144351820234e-06, "loss": 0.043, "step": 33360 }, { "epoch": 0.13921689713012492, "grad_norm": 0.7593153551715518, "learning_rate": 5.360742620521153e-06, "loss": 0.0352, "step": 33365 }, { "epoch": 0.13923775984511522, "grad_norm": 0.6247139883462536, "learning_rate": 5.360340979518386e-06, "loss": 0.0453, "step": 33370 }, { "epoch": 0.1392586225601055, "grad_norm": 0.9582652148725376, "learning_rate": 5.3599394287781095e-06, "loss": 0.0367, "step": 33375 }, { "epoch": 0.13927948527509576, "grad_norm": 1.7739887436172017, "learning_rate": 5.359537968266519e-06, "loss": 0.052, "step": 33380 }, { "epoch": 0.13930034799008603, "grad_norm": 1.0954694818858222, "learning_rate": 5.359136597949829e-06, "loss": 0.048, "step": 33385 }, { "epoch": 0.1393212107050763, "grad_norm": 1.0820762881437938, "learning_rate": 5.358735317794274e-06, "loss": 0.047, "step": 33390 }, { "epoch": 0.1393420734200666, "grad_norm": 0.9622366783891171, "learning_rate": 5.3583341277661005e-06, "loss": 0.037, "step": 33395 }, { "epoch": 0.13936293613505688, "grad_norm": 0.770200177308564, "learning_rate": 5.357933027831579e-06, "loss": 0.0377, "step": 33400 }, { "epoch": 0.13938379885004715, "grad_norm": 1.2018615686393832, "learning_rate": 5.357532017956993e-06, "loss": 0.0548, "step": 33405 }, { "epoch": 0.13940466156503742, "grad_norm": 0.9494442912841796, "learning_rate": 5.3571310981086445e-06, "loss": 0.0396, "step": 33410 }, { "epoch": 0.13942552428002772, "grad_norm": 0.7978448499028876, "learning_rate": 5.3567302682528554e-06, "loss": 0.0479, "step": 33415 }, { "epoch": 0.139446386995018, "grad_norm": 1.063143441451397, "learning_rate": 5.356329528355963e-06, "loss": 0.0466, "step": 33420 }, { "epoch": 0.13946724971000826, "grad_norm": 0.8194758786247647, "learning_rate": 5.355928878384323e-06, "loss": 0.0358, "step": 33425 }, { "epoch": 0.13948811242499853, "grad_norm": 1.290569481682852, "learning_rate": 5.3555283183043095e-06, "loss": 0.0372, "step": 33430 }, { "epoch": 0.1395089751399888, "grad_norm": 1.1359738883665218, "learning_rate": 5.355127848082312e-06, "loss": 0.0419, "step": 33435 }, { "epoch": 0.1395298378549791, "grad_norm": 0.8478112996060995, "learning_rate": 5.35472746768474e-06, "loss": 0.0505, "step": 33440 }, { "epoch": 0.13955070056996938, "grad_norm": 0.8819471106359374, "learning_rate": 5.3543271770780206e-06, "loss": 0.0382, "step": 33445 }, { "epoch": 0.13957156328495965, "grad_norm": 0.8633718026092391, "learning_rate": 5.353926976228593e-06, "loss": 0.0287, "step": 33450 }, { "epoch": 0.13959242599994992, "grad_norm": 0.9009103687977638, "learning_rate": 5.353526865102923e-06, "loss": 0.0408, "step": 33455 }, { "epoch": 0.13961328871494022, "grad_norm": 1.0079903250410236, "learning_rate": 5.353126843667488e-06, "loss": 0.0398, "step": 33460 }, { "epoch": 0.1396341514299305, "grad_norm": 0.90254191102662, "learning_rate": 5.3527269118887835e-06, "loss": 0.0404, "step": 33465 }, { "epoch": 0.13965501414492076, "grad_norm": 1.2110692468582793, "learning_rate": 5.352327069733323e-06, "loss": 0.0478, "step": 33470 }, { "epoch": 0.13967587685991104, "grad_norm": 0.8240929905696248, "learning_rate": 5.35192731716764e-06, "loss": 0.04, "step": 33475 }, { "epoch": 0.1396967395749013, "grad_norm": 0.6557344023911624, "learning_rate": 5.351527654158281e-06, "loss": 0.0437, "step": 33480 }, { "epoch": 0.1397176022898916, "grad_norm": 0.763916640647975, "learning_rate": 5.351128080671813e-06, "loss": 0.0426, "step": 33485 }, { "epoch": 0.13973846500488188, "grad_norm": 0.9231312217400407, "learning_rate": 5.350728596674819e-06, "loss": 0.0419, "step": 33490 }, { "epoch": 0.13975932771987215, "grad_norm": 0.47999859691367813, "learning_rate": 5.350329202133902e-06, "loss": 0.0374, "step": 33495 }, { "epoch": 0.13978019043486242, "grad_norm": 1.0142104332659456, "learning_rate": 5.3499298970156775e-06, "loss": 0.0427, "step": 33500 }, { "epoch": 0.13980105314985272, "grad_norm": 1.3649284557560037, "learning_rate": 5.349530681286785e-06, "loss": 0.0508, "step": 33505 }, { "epoch": 0.139821915864843, "grad_norm": 0.7409990159920302, "learning_rate": 5.349131554913877e-06, "loss": 0.0434, "step": 33510 }, { "epoch": 0.13984277857983327, "grad_norm": 1.0939709646223634, "learning_rate": 5.348732517863623e-06, "loss": 0.0344, "step": 33515 }, { "epoch": 0.13986364129482354, "grad_norm": 0.5850187523995801, "learning_rate": 5.348333570102713e-06, "loss": 0.0334, "step": 33520 }, { "epoch": 0.1398845040098138, "grad_norm": 1.0566837576856734, "learning_rate": 5.3479347115978506e-06, "loss": 0.0499, "step": 33525 }, { "epoch": 0.1399053667248041, "grad_norm": 0.6542854855603534, "learning_rate": 5.347535942315761e-06, "loss": 0.033, "step": 33530 }, { "epoch": 0.13992622943979438, "grad_norm": 0.5526917081609433, "learning_rate": 5.347137262223185e-06, "loss": 0.0306, "step": 33535 }, { "epoch": 0.13994709215478465, "grad_norm": 1.287254893353116, "learning_rate": 5.3467386712868775e-06, "loss": 0.0511, "step": 33540 }, { "epoch": 0.13996795486977492, "grad_norm": 1.070534799292256, "learning_rate": 5.346340169473615e-06, "loss": 0.0333, "step": 33545 }, { "epoch": 0.13998881758476522, "grad_norm": 0.812588439555541, "learning_rate": 5.345941756750191e-06, "loss": 0.0442, "step": 33550 }, { "epoch": 0.1400096802997555, "grad_norm": 0.8647272569333017, "learning_rate": 5.345543433083415e-06, "loss": 0.0302, "step": 33555 }, { "epoch": 0.14003054301474577, "grad_norm": 0.9297415916649893, "learning_rate": 5.345145198440114e-06, "loss": 0.0283, "step": 33560 }, { "epoch": 0.14005140572973604, "grad_norm": 1.534879471619912, "learning_rate": 5.3447470527871315e-06, "loss": 0.0577, "step": 33565 }, { "epoch": 0.1400722684447263, "grad_norm": 0.8660233364255006, "learning_rate": 5.34434899609133e-06, "loss": 0.0328, "step": 33570 }, { "epoch": 0.1400931311597166, "grad_norm": 1.1841443905445714, "learning_rate": 5.343951028319587e-06, "loss": 0.0499, "step": 33575 }, { "epoch": 0.14011399387470688, "grad_norm": 1.194286584194318, "learning_rate": 5.343553149438801e-06, "loss": 0.0446, "step": 33580 }, { "epoch": 0.14013485658969715, "grad_norm": 1.4266540302917812, "learning_rate": 5.343155359415885e-06, "loss": 0.0467, "step": 33585 }, { "epoch": 0.14015571930468743, "grad_norm": 1.585374858069648, "learning_rate": 5.342757658217769e-06, "loss": 0.044, "step": 33590 }, { "epoch": 0.14017658201967773, "grad_norm": 1.0485417666956258, "learning_rate": 5.3423600458114e-06, "loss": 0.0372, "step": 33595 }, { "epoch": 0.140197444734668, "grad_norm": 1.0932063867019879, "learning_rate": 5.341962522163744e-06, "loss": 0.0468, "step": 33600 }, { "epoch": 0.14021830744965827, "grad_norm": 1.8548719628131207, "learning_rate": 5.341565087241785e-06, "loss": 0.0559, "step": 33605 }, { "epoch": 0.14023917016464854, "grad_norm": 1.1231179822031705, "learning_rate": 5.3411677410125195e-06, "loss": 0.04, "step": 33610 }, { "epoch": 0.1402600328796388, "grad_norm": 1.1125481803912673, "learning_rate": 5.340770483442966e-06, "loss": 0.0365, "step": 33615 }, { "epoch": 0.1402808955946291, "grad_norm": 0.9220748916670202, "learning_rate": 5.340373314500159e-06, "loss": 0.0388, "step": 33620 }, { "epoch": 0.14030175830961938, "grad_norm": 1.4267035137725674, "learning_rate": 5.339976234151148e-06, "loss": 0.0434, "step": 33625 }, { "epoch": 0.14032262102460966, "grad_norm": 1.7195171175405584, "learning_rate": 5.339579242363002e-06, "loss": 0.0492, "step": 33630 }, { "epoch": 0.14034348373959993, "grad_norm": 1.1220550779499123, "learning_rate": 5.339182339102806e-06, "loss": 0.0392, "step": 33635 }, { "epoch": 0.14036434645459023, "grad_norm": 0.671116137769757, "learning_rate": 5.338785524337664e-06, "loss": 0.0383, "step": 33640 }, { "epoch": 0.1403852091695805, "grad_norm": 1.1054353080599897, "learning_rate": 5.338388798034693e-06, "loss": 0.0334, "step": 33645 }, { "epoch": 0.14040607188457077, "grad_norm": 0.5726977943000617, "learning_rate": 5.337992160161031e-06, "loss": 0.0407, "step": 33650 }, { "epoch": 0.14042693459956104, "grad_norm": 0.6777663551155961, "learning_rate": 5.337595610683831e-06, "loss": 0.0256, "step": 33655 }, { "epoch": 0.14044779731455131, "grad_norm": 1.2213914134965471, "learning_rate": 5.3371991495702655e-06, "loss": 0.0308, "step": 33660 }, { "epoch": 0.14046866002954161, "grad_norm": 1.5041565337352574, "learning_rate": 5.336802776787522e-06, "loss": 0.0529, "step": 33665 }, { "epoch": 0.1404895227445319, "grad_norm": 2.7136178619554365, "learning_rate": 5.336406492302804e-06, "loss": 0.0516, "step": 33670 }, { "epoch": 0.14051038545952216, "grad_norm": 0.7740785798364398, "learning_rate": 5.336010296083334e-06, "loss": 0.0364, "step": 33675 }, { "epoch": 0.14053124817451243, "grad_norm": 0.7482691398124734, "learning_rate": 5.335614188096352e-06, "loss": 0.0492, "step": 33680 }, { "epoch": 0.14055211088950273, "grad_norm": 1.1351466828371575, "learning_rate": 5.3352181683091145e-06, "loss": 0.0389, "step": 33685 }, { "epoch": 0.140572973604493, "grad_norm": 0.8269998510995696, "learning_rate": 5.334822236688891e-06, "loss": 0.0386, "step": 33690 }, { "epoch": 0.14059383631948327, "grad_norm": 0.8787083093355853, "learning_rate": 5.334426393202976e-06, "loss": 0.0492, "step": 33695 }, { "epoch": 0.14061469903447354, "grad_norm": 0.8844318250597811, "learning_rate": 5.334030637818674e-06, "loss": 0.0374, "step": 33700 }, { "epoch": 0.14063556174946382, "grad_norm": 0.5982365327454139, "learning_rate": 5.33363497050331e-06, "loss": 0.0383, "step": 33705 }, { "epoch": 0.14065642446445412, "grad_norm": 0.7093024881379836, "learning_rate": 5.333239391224224e-06, "loss": 0.0331, "step": 33710 }, { "epoch": 0.1406772871794444, "grad_norm": 0.7073773469526282, "learning_rate": 5.332843899948775e-06, "loss": 0.0306, "step": 33715 }, { "epoch": 0.14069814989443466, "grad_norm": 1.2015158264039238, "learning_rate": 5.332448496644337e-06, "loss": 0.0541, "step": 33720 }, { "epoch": 0.14071901260942493, "grad_norm": 0.9554176815170502, "learning_rate": 5.332053181278304e-06, "loss": 0.0433, "step": 33725 }, { "epoch": 0.14073987532441523, "grad_norm": 2.3761981781534764, "learning_rate": 5.331657953818081e-06, "loss": 0.0426, "step": 33730 }, { "epoch": 0.1407607380394055, "grad_norm": 0.7007692801226224, "learning_rate": 5.331262814231098e-06, "loss": 0.0456, "step": 33735 }, { "epoch": 0.14078160075439577, "grad_norm": 1.3997780058413016, "learning_rate": 5.330867762484792e-06, "loss": 0.0531, "step": 33740 }, { "epoch": 0.14080246346938605, "grad_norm": 0.4658801611336331, "learning_rate": 5.330472798546628e-06, "loss": 0.0326, "step": 33745 }, { "epoch": 0.14082332618437632, "grad_norm": 1.0868600838477491, "learning_rate": 5.33007792238408e-06, "loss": 0.0357, "step": 33750 }, { "epoch": 0.14084418889936662, "grad_norm": 1.1696619245314896, "learning_rate": 5.329683133964641e-06, "loss": 0.0336, "step": 33755 }, { "epoch": 0.1408650516143569, "grad_norm": 1.0676076152205267, "learning_rate": 5.329288433255819e-06, "loss": 0.0382, "step": 33760 }, { "epoch": 0.14088591432934716, "grad_norm": 1.0401498576946637, "learning_rate": 5.328893820225145e-06, "loss": 0.039, "step": 33765 }, { "epoch": 0.14090677704433743, "grad_norm": 2.2635654115118595, "learning_rate": 5.32849929484016e-06, "loss": 0.0706, "step": 33770 }, { "epoch": 0.14092763975932773, "grad_norm": 1.1373943460961136, "learning_rate": 5.328104857068426e-06, "loss": 0.0291, "step": 33775 }, { "epoch": 0.140948502474318, "grad_norm": 1.0162781737491609, "learning_rate": 5.327710506877519e-06, "loss": 0.0508, "step": 33780 }, { "epoch": 0.14096936518930828, "grad_norm": 0.7827924975272958, "learning_rate": 5.327316244235034e-06, "loss": 0.035, "step": 33785 }, { "epoch": 0.14099022790429855, "grad_norm": 1.174937299530887, "learning_rate": 5.3269220691085796e-06, "loss": 0.0387, "step": 33790 }, { "epoch": 0.14101109061928882, "grad_norm": 0.6461751263792109, "learning_rate": 5.326527981465788e-06, "loss": 0.0352, "step": 33795 }, { "epoch": 0.14103195333427912, "grad_norm": 1.0879019517794093, "learning_rate": 5.326133981274298e-06, "loss": 0.0352, "step": 33800 }, { "epoch": 0.1410528160492694, "grad_norm": 0.7240570087501749, "learning_rate": 5.325740068501775e-06, "loss": 0.0393, "step": 33805 }, { "epoch": 0.14107367876425966, "grad_norm": 1.2728434196270697, "learning_rate": 5.325346243115897e-06, "loss": 0.0469, "step": 33810 }, { "epoch": 0.14109454147924994, "grad_norm": 1.3713741953390917, "learning_rate": 5.324952505084355e-06, "loss": 0.0382, "step": 33815 }, { "epoch": 0.14111540419424023, "grad_norm": 1.301326709565265, "learning_rate": 5.324558854374865e-06, "loss": 0.0432, "step": 33820 }, { "epoch": 0.1411362669092305, "grad_norm": 1.0429836418607576, "learning_rate": 5.3241652909551515e-06, "loss": 0.0458, "step": 33825 }, { "epoch": 0.14115712962422078, "grad_norm": 1.1110505855381758, "learning_rate": 5.323771814792959e-06, "loss": 0.0455, "step": 33830 }, { "epoch": 0.14117799233921105, "grad_norm": 1.231174449349154, "learning_rate": 5.323378425856053e-06, "loss": 0.0478, "step": 33835 }, { "epoch": 0.14119885505420132, "grad_norm": 1.0254872123482548, "learning_rate": 5.322985124112207e-06, "loss": 0.0434, "step": 33840 }, { "epoch": 0.14121971776919162, "grad_norm": 0.9734781190425301, "learning_rate": 5.322591909529219e-06, "loss": 0.0311, "step": 33845 }, { "epoch": 0.1412405804841819, "grad_norm": 1.1013657686666172, "learning_rate": 5.322198782074898e-06, "loss": 0.0454, "step": 33850 }, { "epoch": 0.14126144319917217, "grad_norm": 1.4086322167164138, "learning_rate": 5.321805741717074e-06, "loss": 0.0413, "step": 33855 }, { "epoch": 0.14128230591416244, "grad_norm": 1.4327002904342783, "learning_rate": 5.321412788423591e-06, "loss": 0.045, "step": 33860 }, { "epoch": 0.14130316862915274, "grad_norm": 1.3520968702772587, "learning_rate": 5.321019922162311e-06, "loss": 0.0298, "step": 33865 }, { "epoch": 0.141324031344143, "grad_norm": 1.3724425213867462, "learning_rate": 5.320627142901109e-06, "loss": 0.0558, "step": 33870 }, { "epoch": 0.14134489405913328, "grad_norm": 1.3480967355620346, "learning_rate": 5.320234450607884e-06, "loss": 0.0283, "step": 33875 }, { "epoch": 0.14136575677412355, "grad_norm": 1.3694616303367348, "learning_rate": 5.319841845250544e-06, "loss": 0.0362, "step": 33880 }, { "epoch": 0.14138661948911382, "grad_norm": 0.5301421217886421, "learning_rate": 5.319449326797017e-06, "loss": 0.0347, "step": 33885 }, { "epoch": 0.14140748220410412, "grad_norm": 0.823533627065245, "learning_rate": 5.3190568952152485e-06, "loss": 0.033, "step": 33890 }, { "epoch": 0.1414283449190944, "grad_norm": 0.921208220276419, "learning_rate": 5.3186645504731974e-06, "loss": 0.0423, "step": 33895 }, { "epoch": 0.14144920763408467, "grad_norm": 1.1643284841789128, "learning_rate": 5.318272292538843e-06, "loss": 0.0366, "step": 33900 }, { "epoch": 0.14147007034907494, "grad_norm": 1.3189462233965148, "learning_rate": 5.317880121380179e-06, "loss": 0.0371, "step": 33905 }, { "epoch": 0.14149093306406524, "grad_norm": 0.958748958906602, "learning_rate": 5.317488036965214e-06, "loss": 0.0428, "step": 33910 }, { "epoch": 0.1415117957790555, "grad_norm": 1.0090357937882486, "learning_rate": 5.317096039261976e-06, "loss": 0.043, "step": 33915 }, { "epoch": 0.14153265849404578, "grad_norm": 1.3262492388731966, "learning_rate": 5.316704128238509e-06, "loss": 0.0406, "step": 33920 }, { "epoch": 0.14155352120903605, "grad_norm": 1.5135153309035128, "learning_rate": 5.316312303862871e-06, "loss": 0.0478, "step": 33925 }, { "epoch": 0.14157438392402633, "grad_norm": 1.0626854169314763, "learning_rate": 5.315920566103141e-06, "loss": 0.0502, "step": 33930 }, { "epoch": 0.14159524663901663, "grad_norm": 1.6830907161185116, "learning_rate": 5.31552891492741e-06, "loss": 0.0455, "step": 33935 }, { "epoch": 0.1416161093540069, "grad_norm": 1.124344765084532, "learning_rate": 5.315137350303787e-06, "loss": 0.0494, "step": 33940 }, { "epoch": 0.14163697206899717, "grad_norm": 1.1100388453269832, "learning_rate": 5.314745872200398e-06, "loss": 0.0459, "step": 33945 }, { "epoch": 0.14165783478398744, "grad_norm": 0.7002231729259543, "learning_rate": 5.314354480585387e-06, "loss": 0.0344, "step": 33950 }, { "epoch": 0.14167869749897774, "grad_norm": 0.8717389792635755, "learning_rate": 5.313963175426911e-06, "loss": 0.0361, "step": 33955 }, { "epoch": 0.141699560213968, "grad_norm": 0.6745608092876003, "learning_rate": 5.313571956693145e-06, "loss": 0.0591, "step": 33960 }, { "epoch": 0.14172042292895828, "grad_norm": 2.1759628217447013, "learning_rate": 5.313180824352279e-06, "loss": 0.0518, "step": 33965 }, { "epoch": 0.14174128564394856, "grad_norm": 1.0763108204604237, "learning_rate": 5.312789778372521e-06, "loss": 0.0311, "step": 33970 }, { "epoch": 0.14176214835893883, "grad_norm": 1.2335871323293237, "learning_rate": 5.312398818722099e-06, "loss": 0.0556, "step": 33975 }, { "epoch": 0.14178301107392913, "grad_norm": 1.7534285399809377, "learning_rate": 5.31200794536925e-06, "loss": 0.0451, "step": 33980 }, { "epoch": 0.1418038737889194, "grad_norm": 0.9432961205019292, "learning_rate": 5.31161715828223e-06, "loss": 0.0361, "step": 33985 }, { "epoch": 0.14182473650390967, "grad_norm": 1.196424906993436, "learning_rate": 5.311226457429315e-06, "loss": 0.0398, "step": 33990 }, { "epoch": 0.14184559921889994, "grad_norm": 0.7308629635145897, "learning_rate": 5.310835842778793e-06, "loss": 0.0415, "step": 33995 }, { "epoch": 0.14186646193389024, "grad_norm": 1.0257176172770843, "learning_rate": 5.310445314298971e-06, "loss": 0.0504, "step": 34000 }, { "epoch": 0.14188732464888051, "grad_norm": 0.6670681351270547, "learning_rate": 5.310054871958169e-06, "loss": 0.0301, "step": 34005 }, { "epoch": 0.14190818736387079, "grad_norm": 1.1956610390595894, "learning_rate": 5.309664515724728e-06, "loss": 0.0423, "step": 34010 }, { "epoch": 0.14192905007886106, "grad_norm": 0.8473122340965779, "learning_rate": 5.309274245567e-06, "loss": 0.0401, "step": 34015 }, { "epoch": 0.14194991279385133, "grad_norm": 1.1283951717127483, "learning_rate": 5.308884061453358e-06, "loss": 0.0498, "step": 34020 }, { "epoch": 0.14197077550884163, "grad_norm": 0.9615520817537112, "learning_rate": 5.308493963352189e-06, "loss": 0.0434, "step": 34025 }, { "epoch": 0.1419916382238319, "grad_norm": 0.704071192582095, "learning_rate": 5.308103951231897e-06, "loss": 0.031, "step": 34030 }, { "epoch": 0.14201250093882217, "grad_norm": 0.9243201114940897, "learning_rate": 5.3077140250608995e-06, "loss": 0.0356, "step": 34035 }, { "epoch": 0.14203336365381244, "grad_norm": 0.6387942107587394, "learning_rate": 5.307324184807635e-06, "loss": 0.0441, "step": 34040 }, { "epoch": 0.14205422636880274, "grad_norm": 0.7367971167632481, "learning_rate": 5.306934430440556e-06, "loss": 0.0391, "step": 34045 }, { "epoch": 0.14207508908379302, "grad_norm": 0.8717000065131227, "learning_rate": 5.306544761928129e-06, "loss": 0.0405, "step": 34050 }, { "epoch": 0.1420959517987833, "grad_norm": 0.794287264818921, "learning_rate": 5.306155179238839e-06, "loss": 0.0384, "step": 34055 }, { "epoch": 0.14211681451377356, "grad_norm": 1.6855806970340883, "learning_rate": 5.305765682341188e-06, "loss": 0.0466, "step": 34060 }, { "epoch": 0.14213767722876383, "grad_norm": 1.3674265035532167, "learning_rate": 5.305376271203693e-06, "loss": 0.0422, "step": 34065 }, { "epoch": 0.14215853994375413, "grad_norm": 0.5780960338777066, "learning_rate": 5.304986945794887e-06, "loss": 0.0379, "step": 34070 }, { "epoch": 0.1421794026587444, "grad_norm": 0.8842266172118981, "learning_rate": 5.304597706083318e-06, "loss": 0.0386, "step": 34075 }, { "epoch": 0.14220026537373467, "grad_norm": 0.7676301187830453, "learning_rate": 5.3042085520375546e-06, "loss": 0.0509, "step": 34080 }, { "epoch": 0.14222112808872495, "grad_norm": 0.8960701748160457, "learning_rate": 5.303819483626175e-06, "loss": 0.031, "step": 34085 }, { "epoch": 0.14224199080371525, "grad_norm": 0.8338649725935978, "learning_rate": 5.303430500817779e-06, "loss": 0.0449, "step": 34090 }, { "epoch": 0.14226285351870552, "grad_norm": 1.211221831291809, "learning_rate": 5.303041603580979e-06, "loss": 0.0429, "step": 34095 }, { "epoch": 0.1422837162336958, "grad_norm": 0.9905626774856496, "learning_rate": 5.302652791884409e-06, "loss": 0.0422, "step": 34100 }, { "epoch": 0.14230457894868606, "grad_norm": 1.0122431794206486, "learning_rate": 5.3022640656967105e-06, "loss": 0.0524, "step": 34105 }, { "epoch": 0.14232544166367633, "grad_norm": 0.7196310814033746, "learning_rate": 5.3018754249865475e-06, "loss": 0.0457, "step": 34110 }, { "epoch": 0.14234630437866663, "grad_norm": 0.7220656090923682, "learning_rate": 5.301486869722599e-06, "loss": 0.0322, "step": 34115 }, { "epoch": 0.1423671670936569, "grad_norm": 0.9631622732546362, "learning_rate": 5.301098399873558e-06, "loss": 0.038, "step": 34120 }, { "epoch": 0.14238802980864718, "grad_norm": 0.8640508471039828, "learning_rate": 5.300710015408137e-06, "loss": 0.039, "step": 34125 }, { "epoch": 0.14240889252363745, "grad_norm": 0.9815796185593907, "learning_rate": 5.30032171629506e-06, "loss": 0.0527, "step": 34130 }, { "epoch": 0.14242975523862775, "grad_norm": 1.1976501315202674, "learning_rate": 5.299933502503072e-06, "loss": 0.0461, "step": 34135 }, { "epoch": 0.14245061795361802, "grad_norm": 1.057966256659201, "learning_rate": 5.2995453740009295e-06, "loss": 0.0401, "step": 34140 }, { "epoch": 0.1424714806686083, "grad_norm": 0.9730562658693376, "learning_rate": 5.2991573307574076e-06, "loss": 0.034, "step": 34145 }, { "epoch": 0.14249234338359856, "grad_norm": 1.590048073297425, "learning_rate": 5.2987693727412956e-06, "loss": 0.0341, "step": 34150 }, { "epoch": 0.14251320609858883, "grad_norm": 0.8870773966967442, "learning_rate": 5.298381499921403e-06, "loss": 0.0429, "step": 34155 }, { "epoch": 0.14253406881357913, "grad_norm": 0.8548704049540186, "learning_rate": 5.297993712266551e-06, "loss": 0.0398, "step": 34160 }, { "epoch": 0.1425549315285694, "grad_norm": 1.4189847524435593, "learning_rate": 5.297606009745577e-06, "loss": 0.0475, "step": 34165 }, { "epoch": 0.14257579424355968, "grad_norm": 1.2297832919472191, "learning_rate": 5.2972183923273356e-06, "loss": 0.0599, "step": 34170 }, { "epoch": 0.14259665695854995, "grad_norm": 0.8268133901912126, "learning_rate": 5.296830859980699e-06, "loss": 0.0407, "step": 34175 }, { "epoch": 0.14261751967354025, "grad_norm": 1.0172320324048318, "learning_rate": 5.296443412674553e-06, "loss": 0.0312, "step": 34180 }, { "epoch": 0.14263838238853052, "grad_norm": 0.7423691774512259, "learning_rate": 5.296056050377797e-06, "loss": 0.0393, "step": 34185 }, { "epoch": 0.1426592451035208, "grad_norm": 0.6874347517695486, "learning_rate": 5.295668773059353e-06, "loss": 0.0374, "step": 34190 }, { "epoch": 0.14268010781851106, "grad_norm": 1.0354859869869069, "learning_rate": 5.295281580688154e-06, "loss": 0.0641, "step": 34195 }, { "epoch": 0.14270097053350134, "grad_norm": 1.1224505430510585, "learning_rate": 5.294894473233149e-06, "loss": 0.0526, "step": 34200 }, { "epoch": 0.14272183324849164, "grad_norm": 0.9619451474721622, "learning_rate": 5.2945074506633035e-06, "loss": 0.0317, "step": 34205 }, { "epoch": 0.1427426959634819, "grad_norm": 0.967888402050677, "learning_rate": 5.2941205129476e-06, "loss": 0.0353, "step": 34210 }, { "epoch": 0.14276355867847218, "grad_norm": 0.6418588632527711, "learning_rate": 5.293733660055037e-06, "loss": 0.0356, "step": 34215 }, { "epoch": 0.14278442139346245, "grad_norm": 1.0307728703718997, "learning_rate": 5.293346891954627e-06, "loss": 0.0447, "step": 34220 }, { "epoch": 0.14280528410845275, "grad_norm": 0.5813748034491082, "learning_rate": 5.2929602086153996e-06, "loss": 0.0465, "step": 34225 }, { "epoch": 0.14282614682344302, "grad_norm": 0.7694610400861112, "learning_rate": 5.292573610006401e-06, "loss": 0.0386, "step": 34230 }, { "epoch": 0.1428470095384333, "grad_norm": 1.3354184173823809, "learning_rate": 5.292187096096689e-06, "loss": 0.0347, "step": 34235 }, { "epoch": 0.14286787225342357, "grad_norm": 1.0512842396298205, "learning_rate": 5.291800666855344e-06, "loss": 0.0559, "step": 34240 }, { "epoch": 0.14288873496841384, "grad_norm": 1.1092392869676173, "learning_rate": 5.291414322251457e-06, "loss": 0.0426, "step": 34245 }, { "epoch": 0.14290959768340414, "grad_norm": 1.6493091735302405, "learning_rate": 5.2910280622541355e-06, "loss": 0.0449, "step": 34250 }, { "epoch": 0.1429304603983944, "grad_norm": 1.1894931265186977, "learning_rate": 5.290641886832505e-06, "loss": 0.0331, "step": 34255 }, { "epoch": 0.14295132311338468, "grad_norm": 1.3362551912432084, "learning_rate": 5.290255795955705e-06, "loss": 0.0457, "step": 34260 }, { "epoch": 0.14297218582837495, "grad_norm": 0.9622715501470294, "learning_rate": 5.28986978959289e-06, "loss": 0.0424, "step": 34265 }, { "epoch": 0.14299304854336525, "grad_norm": 1.28438167130751, "learning_rate": 5.2894838677132356e-06, "loss": 0.0322, "step": 34270 }, { "epoch": 0.14301391125835552, "grad_norm": 1.2061257015216107, "learning_rate": 5.289098030285924e-06, "loss": 0.0356, "step": 34275 }, { "epoch": 0.1430347739733458, "grad_norm": 0.9659427215122386, "learning_rate": 5.2887122772801615e-06, "loss": 0.0406, "step": 34280 }, { "epoch": 0.14305563668833607, "grad_norm": 0.9962437766601422, "learning_rate": 5.288326608665164e-06, "loss": 0.0436, "step": 34285 }, { "epoch": 0.14307649940332634, "grad_norm": 0.5790883118833615, "learning_rate": 5.287941024410169e-06, "loss": 0.0348, "step": 34290 }, { "epoch": 0.14309736211831664, "grad_norm": 0.9451342311568947, "learning_rate": 5.287555524484423e-06, "loss": 0.0372, "step": 34295 }, { "epoch": 0.1431182248333069, "grad_norm": 0.6143138435641896, "learning_rate": 5.287170108857196e-06, "loss": 0.0392, "step": 34300 }, { "epoch": 0.14313908754829718, "grad_norm": 0.8172466035046356, "learning_rate": 5.286784777497766e-06, "loss": 0.0386, "step": 34305 }, { "epoch": 0.14315995026328746, "grad_norm": 1.3661534729837603, "learning_rate": 5.28639953037543e-06, "loss": 0.0387, "step": 34310 }, { "epoch": 0.14318081297827775, "grad_norm": 0.9068971871468219, "learning_rate": 5.2860143674595035e-06, "loss": 0.0379, "step": 34315 }, { "epoch": 0.14320167569326803, "grad_norm": 1.1386511818000242, "learning_rate": 5.285629288719312e-06, "loss": 0.0423, "step": 34320 }, { "epoch": 0.1432225384082583, "grad_norm": 1.607694928355752, "learning_rate": 5.285244294124202e-06, "loss": 0.0539, "step": 34325 }, { "epoch": 0.14324340112324857, "grad_norm": 0.9947641963795424, "learning_rate": 5.284859383643531e-06, "loss": 0.0522, "step": 34330 }, { "epoch": 0.14326426383823884, "grad_norm": 0.9012985259020226, "learning_rate": 5.284474557246676e-06, "loss": 0.0415, "step": 34335 }, { "epoch": 0.14328512655322914, "grad_norm": 1.0655981868556894, "learning_rate": 5.284089814903027e-06, "loss": 0.0447, "step": 34340 }, { "epoch": 0.1433059892682194, "grad_norm": 1.000641833092595, "learning_rate": 5.283705156581992e-06, "loss": 0.037, "step": 34345 }, { "epoch": 0.14332685198320969, "grad_norm": 0.8082851342047499, "learning_rate": 5.283320582252991e-06, "loss": 0.0383, "step": 34350 }, { "epoch": 0.14334771469819996, "grad_norm": 3.756865636274118, "learning_rate": 5.282936091885462e-06, "loss": 0.0296, "step": 34355 }, { "epoch": 0.14336857741319023, "grad_norm": 0.8228547307846706, "learning_rate": 5.282551685448859e-06, "loss": 0.0417, "step": 34360 }, { "epoch": 0.14338944012818053, "grad_norm": 1.2233770697928528, "learning_rate": 5.28216736291265e-06, "loss": 0.0382, "step": 34365 }, { "epoch": 0.1434103028431708, "grad_norm": 0.8545212164702156, "learning_rate": 5.28178312424632e-06, "loss": 0.0305, "step": 34370 }, { "epoch": 0.14343116555816107, "grad_norm": 0.7708600249561591, "learning_rate": 5.28139896941937e-06, "loss": 0.0394, "step": 34375 }, { "epoch": 0.14345202827315134, "grad_norm": 1.3066091125850927, "learning_rate": 5.281014898401314e-06, "loss": 0.0342, "step": 34380 }, { "epoch": 0.14347289098814164, "grad_norm": 0.9945184292743969, "learning_rate": 5.280630911161681e-06, "loss": 0.0338, "step": 34385 }, { "epoch": 0.14349375370313192, "grad_norm": 1.0842844588653537, "learning_rate": 5.28024700767002e-06, "loss": 0.0526, "step": 34390 }, { "epoch": 0.1435146164181222, "grad_norm": 1.2156317913997954, "learning_rate": 5.279863187895893e-06, "loss": 0.0363, "step": 34395 }, { "epoch": 0.14353547913311246, "grad_norm": 2.1834076221055825, "learning_rate": 5.279479451808877e-06, "loss": 0.0505, "step": 34400 }, { "epoch": 0.14355634184810273, "grad_norm": 0.904721345332256, "learning_rate": 5.279095799378564e-06, "loss": 0.0423, "step": 34405 }, { "epoch": 0.14357720456309303, "grad_norm": 1.4769879974914644, "learning_rate": 5.278712230574563e-06, "loss": 0.0504, "step": 34410 }, { "epoch": 0.1435980672780833, "grad_norm": 1.2335912407527176, "learning_rate": 5.278328745366498e-06, "loss": 0.0383, "step": 34415 }, { "epoch": 0.14361892999307357, "grad_norm": 0.947508040869288, "learning_rate": 5.277945343724007e-06, "loss": 0.054, "step": 34420 }, { "epoch": 0.14363979270806385, "grad_norm": 1.059969201814218, "learning_rate": 5.277562025616746e-06, "loss": 0.0387, "step": 34425 }, { "epoch": 0.14366065542305415, "grad_norm": 0.7059213163686373, "learning_rate": 5.277178791014386e-06, "loss": 0.041, "step": 34430 }, { "epoch": 0.14368151813804442, "grad_norm": 1.0028573583041567, "learning_rate": 5.27679563988661e-06, "loss": 0.0564, "step": 34435 }, { "epoch": 0.1437023808530347, "grad_norm": 1.3197555675042705, "learning_rate": 5.2764125722031205e-06, "loss": 0.0582, "step": 34440 }, { "epoch": 0.14372324356802496, "grad_norm": 0.543042460953947, "learning_rate": 5.276029587933632e-06, "loss": 0.0383, "step": 34445 }, { "epoch": 0.14374410628301523, "grad_norm": 2.496714128842531, "learning_rate": 5.27564668704788e-06, "loss": 0.0438, "step": 34450 }, { "epoch": 0.14376496899800553, "grad_norm": 1.0347645234337985, "learning_rate": 5.275263869515608e-06, "loss": 0.0359, "step": 34455 }, { "epoch": 0.1437858317129958, "grad_norm": 1.7223138260769906, "learning_rate": 5.27488113530658e-06, "loss": 0.0394, "step": 34460 }, { "epoch": 0.14380669442798608, "grad_norm": 0.8314125649660769, "learning_rate": 5.274498484390572e-06, "loss": 0.0419, "step": 34465 }, { "epoch": 0.14382755714297635, "grad_norm": 1.1701101049343223, "learning_rate": 5.27411591673738e-06, "loss": 0.0414, "step": 34470 }, { "epoch": 0.14384841985796665, "grad_norm": 1.146360343279018, "learning_rate": 5.273733432316811e-06, "loss": 0.0485, "step": 34475 }, { "epoch": 0.14386928257295692, "grad_norm": 1.0309574598474354, "learning_rate": 5.273351031098688e-06, "loss": 0.0356, "step": 34480 }, { "epoch": 0.1438901452879472, "grad_norm": 0.9985192607858985, "learning_rate": 5.272968713052852e-06, "loss": 0.0444, "step": 34485 }, { "epoch": 0.14391100800293746, "grad_norm": 0.969357425344631, "learning_rate": 5.272586478149155e-06, "loss": 0.0376, "step": 34490 }, { "epoch": 0.14393187071792773, "grad_norm": 0.5583395342877617, "learning_rate": 5.272204326357468e-06, "loss": 0.0446, "step": 34495 }, { "epoch": 0.14395273343291803, "grad_norm": 0.6156480855542307, "learning_rate": 5.2718222576476785e-06, "loss": 0.0397, "step": 34500 }, { "epoch": 0.1439735961479083, "grad_norm": 1.1132034225300127, "learning_rate": 5.271440271989683e-06, "loss": 0.0411, "step": 34505 }, { "epoch": 0.14399445886289858, "grad_norm": 1.118261865027108, "learning_rate": 5.271058369353397e-06, "loss": 0.0484, "step": 34510 }, { "epoch": 0.14401532157788885, "grad_norm": 0.6926342073372724, "learning_rate": 5.270676549708753e-06, "loss": 0.0284, "step": 34515 }, { "epoch": 0.14403618429287915, "grad_norm": 4.056207942053806, "learning_rate": 5.270294813025697e-06, "loss": 0.0455, "step": 34520 }, { "epoch": 0.14405704700786942, "grad_norm": 0.9767433063218446, "learning_rate": 5.269913159274191e-06, "loss": 0.036, "step": 34525 }, { "epoch": 0.1440779097228597, "grad_norm": 5.384259050910838, "learning_rate": 5.269531588424209e-06, "loss": 0.036, "step": 34530 }, { "epoch": 0.14409877243784996, "grad_norm": 0.9085320600271329, "learning_rate": 5.2691501004457444e-06, "loss": 0.0315, "step": 34535 }, { "epoch": 0.14411963515284024, "grad_norm": 0.8411878795796089, "learning_rate": 5.2687686953088035e-06, "loss": 0.0328, "step": 34540 }, { "epoch": 0.14414049786783054, "grad_norm": 0.49427190791730286, "learning_rate": 5.268387372983409e-06, "loss": 0.0522, "step": 34545 }, { "epoch": 0.1441613605828208, "grad_norm": 1.0625655359858857, "learning_rate": 5.2680061334395986e-06, "loss": 0.0428, "step": 34550 }, { "epoch": 0.14418222329781108, "grad_norm": 0.56768985529731, "learning_rate": 5.267624976647423e-06, "loss": 0.0339, "step": 34555 }, { "epoch": 0.14420308601280135, "grad_norm": 1.4132861606152383, "learning_rate": 5.267243902576951e-06, "loss": 0.0502, "step": 34560 }, { "epoch": 0.14422394872779165, "grad_norm": 0.6732511875555679, "learning_rate": 5.266862911198265e-06, "loss": 0.0335, "step": 34565 }, { "epoch": 0.14424481144278192, "grad_norm": 1.4617316660897997, "learning_rate": 5.266482002481464e-06, "loss": 0.0531, "step": 34570 }, { "epoch": 0.1442656741577722, "grad_norm": 1.081357301662304, "learning_rate": 5.26610117639666e-06, "loss": 0.0448, "step": 34575 }, { "epoch": 0.14428653687276247, "grad_norm": 1.4693605764028885, "learning_rate": 5.265720432913981e-06, "loss": 0.045, "step": 34580 }, { "epoch": 0.14430739958775274, "grad_norm": 1.3658207071181205, "learning_rate": 5.26533977200357e-06, "loss": 0.0376, "step": 34585 }, { "epoch": 0.14432826230274304, "grad_norm": 1.208115719018267, "learning_rate": 5.264959193635588e-06, "loss": 0.0336, "step": 34590 }, { "epoch": 0.1443491250177333, "grad_norm": 0.7363831018895833, "learning_rate": 5.264578697780205e-06, "loss": 0.0423, "step": 34595 }, { "epoch": 0.14436998773272358, "grad_norm": 1.5641889063377274, "learning_rate": 5.264198284407612e-06, "loss": 0.0613, "step": 34600 }, { "epoch": 0.14439085044771385, "grad_norm": 1.251301188949858, "learning_rate": 5.263817953488012e-06, "loss": 0.0472, "step": 34605 }, { "epoch": 0.14441171316270415, "grad_norm": 0.8867878546160841, "learning_rate": 5.263437704991624e-06, "loss": 0.0523, "step": 34610 }, { "epoch": 0.14443257587769442, "grad_norm": 1.3470222101697882, "learning_rate": 5.26305753888868e-06, "loss": 0.0319, "step": 34615 }, { "epoch": 0.1444534385926847, "grad_norm": 0.8472032541096952, "learning_rate": 5.262677455149432e-06, "loss": 0.06, "step": 34620 }, { "epoch": 0.14447430130767497, "grad_norm": 1.1712548552084505, "learning_rate": 5.262297453744141e-06, "loss": 0.0396, "step": 34625 }, { "epoch": 0.14449516402266524, "grad_norm": 0.8969770063924886, "learning_rate": 5.261917534643088e-06, "loss": 0.1026, "step": 34630 }, { "epoch": 0.14451602673765554, "grad_norm": 1.1033133481254547, "learning_rate": 5.261537697816565e-06, "loss": 0.0404, "step": 34635 }, { "epoch": 0.1445368894526458, "grad_norm": 0.5236370754065319, "learning_rate": 5.261157943234882e-06, "loss": 0.0377, "step": 34640 }, { "epoch": 0.14455775216763608, "grad_norm": 0.8985574671147425, "learning_rate": 5.2607782708683645e-06, "loss": 0.0421, "step": 34645 }, { "epoch": 0.14457861488262636, "grad_norm": 0.9343226899150763, "learning_rate": 5.2603986806873485e-06, "loss": 0.0381, "step": 34650 }, { "epoch": 0.14459947759761665, "grad_norm": 1.0436355036782001, "learning_rate": 5.260019172662189e-06, "loss": 0.0512, "step": 34655 }, { "epoch": 0.14462034031260693, "grad_norm": 1.8567763181951769, "learning_rate": 5.259639746763255e-06, "loss": 0.0411, "step": 34660 }, { "epoch": 0.1446412030275972, "grad_norm": 0.6143201676135716, "learning_rate": 5.2592604029609325e-06, "loss": 0.0505, "step": 34665 }, { "epoch": 0.14466206574258747, "grad_norm": 0.793509071353362, "learning_rate": 5.258881141225617e-06, "loss": 0.0369, "step": 34670 }, { "epoch": 0.14468292845757774, "grad_norm": 0.8011831239463068, "learning_rate": 5.258501961527722e-06, "loss": 0.0347, "step": 34675 }, { "epoch": 0.14470379117256804, "grad_norm": 1.1004394603383028, "learning_rate": 5.258122863837679e-06, "loss": 0.0321, "step": 34680 }, { "epoch": 0.1447246538875583, "grad_norm": 1.088007037388669, "learning_rate": 5.25774384812593e-06, "loss": 0.0422, "step": 34685 }, { "epoch": 0.14474551660254859, "grad_norm": 1.2298815471929847, "learning_rate": 5.2573649143629335e-06, "loss": 0.0435, "step": 34690 }, { "epoch": 0.14476637931753886, "grad_norm": 0.9810637507423865, "learning_rate": 5.256986062519163e-06, "loss": 0.035, "step": 34695 }, { "epoch": 0.14478724203252916, "grad_norm": 0.7091300613019208, "learning_rate": 5.256607292565107e-06, "loss": 0.0258, "step": 34700 }, { "epoch": 0.14480810474751943, "grad_norm": 0.9655981482965135, "learning_rate": 5.256228604471268e-06, "loss": 0.0369, "step": 34705 }, { "epoch": 0.1448289674625097, "grad_norm": 1.2507623180915368, "learning_rate": 5.255849998208164e-06, "loss": 0.0359, "step": 34710 }, { "epoch": 0.14484983017749997, "grad_norm": 0.6716057451600762, "learning_rate": 5.25547147374633e-06, "loss": 0.0402, "step": 34715 }, { "epoch": 0.14487069289249024, "grad_norm": 0.6902795642194454, "learning_rate": 5.25509303105631e-06, "loss": 0.042, "step": 34720 }, { "epoch": 0.14489155560748054, "grad_norm": 0.7357927215253853, "learning_rate": 5.25471467010867e-06, "loss": 0.0342, "step": 34725 }, { "epoch": 0.14491241832247082, "grad_norm": 0.9457867340364048, "learning_rate": 5.254336390873986e-06, "loss": 0.037, "step": 34730 }, { "epoch": 0.1449332810374611, "grad_norm": 1.1218415274665647, "learning_rate": 5.253958193322851e-06, "loss": 0.0386, "step": 34735 }, { "epoch": 0.14495414375245136, "grad_norm": 0.8804641637748447, "learning_rate": 5.253580077425872e-06, "loss": 0.0306, "step": 34740 }, { "epoch": 0.14497500646744166, "grad_norm": 0.8260377989386632, "learning_rate": 5.253202043153669e-06, "loss": 0.0391, "step": 34745 }, { "epoch": 0.14499586918243193, "grad_norm": 1.3149784850181618, "learning_rate": 5.25282409047688e-06, "loss": 0.0365, "step": 34750 }, { "epoch": 0.1450167318974222, "grad_norm": 0.5724188285615843, "learning_rate": 5.252446219366157e-06, "loss": 0.0341, "step": 34755 }, { "epoch": 0.14503759461241247, "grad_norm": 0.6436827465917236, "learning_rate": 5.252068429792165e-06, "loss": 0.0391, "step": 34760 }, { "epoch": 0.14505845732740275, "grad_norm": 0.9440539203218986, "learning_rate": 5.251690721725588e-06, "loss": 0.0371, "step": 34765 }, { "epoch": 0.14507932004239305, "grad_norm": 1.1133895479826654, "learning_rate": 5.251313095137116e-06, "loss": 0.0493, "step": 34770 }, { "epoch": 0.14510018275738332, "grad_norm": 1.2359232196812096, "learning_rate": 5.250935549997466e-06, "loss": 0.0391, "step": 34775 }, { "epoch": 0.1451210454723736, "grad_norm": 0.6499743239675128, "learning_rate": 5.250558086277358e-06, "loss": 0.0409, "step": 34780 }, { "epoch": 0.14514190818736386, "grad_norm": 1.0901232196188848, "learning_rate": 5.250180703947535e-06, "loss": 0.0461, "step": 34785 }, { "epoch": 0.14516277090235416, "grad_norm": 0.9357725342872306, "learning_rate": 5.249803402978751e-06, "loss": 0.042, "step": 34790 }, { "epoch": 0.14518363361734443, "grad_norm": 0.8544148822389667, "learning_rate": 5.249426183341776e-06, "loss": 0.0447, "step": 34795 }, { "epoch": 0.1452044963323347, "grad_norm": 1.1306392805752192, "learning_rate": 5.249049045007393e-06, "loss": 0.0327, "step": 34800 }, { "epoch": 0.14522535904732498, "grad_norm": 0.9013627694272469, "learning_rate": 5.2486719879464e-06, "loss": 0.0377, "step": 34805 }, { "epoch": 0.14524622176231525, "grad_norm": 0.6791405682396366, "learning_rate": 5.2482950121296125e-06, "loss": 0.0374, "step": 34810 }, { "epoch": 0.14526708447730555, "grad_norm": 0.9416776048280665, "learning_rate": 5.247918117527857e-06, "loss": 0.0289, "step": 34815 }, { "epoch": 0.14528794719229582, "grad_norm": 1.0416586912881067, "learning_rate": 5.247541304111979e-06, "loss": 0.0433, "step": 34820 }, { "epoch": 0.1453088099072861, "grad_norm": 0.8021466109717322, "learning_rate": 5.2471645718528316e-06, "loss": 0.0381, "step": 34825 }, { "epoch": 0.14532967262227636, "grad_norm": 0.8027185774022154, "learning_rate": 5.2467879207212914e-06, "loss": 0.04, "step": 34830 }, { "epoch": 0.14535053533726666, "grad_norm": 1.4322804959719548, "learning_rate": 5.2464113506882415e-06, "loss": 0.0384, "step": 34835 }, { "epoch": 0.14537139805225693, "grad_norm": 1.2769183708310248, "learning_rate": 5.246034861724586e-06, "loss": 0.0375, "step": 34840 }, { "epoch": 0.1453922607672472, "grad_norm": 0.4636847278986996, "learning_rate": 5.245658453801239e-06, "loss": 0.043, "step": 34845 }, { "epoch": 0.14541312348223748, "grad_norm": 0.9271969188505527, "learning_rate": 5.2452821268891315e-06, "loss": 0.0433, "step": 34850 }, { "epoch": 0.14543398619722775, "grad_norm": 0.9387821409388369, "learning_rate": 5.24490588095921e-06, "loss": 0.0476, "step": 34855 }, { "epoch": 0.14545484891221805, "grad_norm": 0.6069945442669215, "learning_rate": 5.244529715982433e-06, "loss": 0.0417, "step": 34860 }, { "epoch": 0.14547571162720832, "grad_norm": 1.0212246732353478, "learning_rate": 5.244153631929775e-06, "loss": 0.0359, "step": 34865 }, { "epoch": 0.1454965743421986, "grad_norm": 0.8339267370021872, "learning_rate": 5.243777628772224e-06, "loss": 0.051, "step": 34870 }, { "epoch": 0.14551743705718886, "grad_norm": 0.5189198559264693, "learning_rate": 5.243401706480785e-06, "loss": 0.0421, "step": 34875 }, { "epoch": 0.14553829977217916, "grad_norm": 0.8011762934548293, "learning_rate": 5.2430258650264754e-06, "loss": 0.0339, "step": 34880 }, { "epoch": 0.14555916248716944, "grad_norm": 1.002439052446086, "learning_rate": 5.242650104380329e-06, "loss": 0.046, "step": 34885 }, { "epoch": 0.1455800252021597, "grad_norm": 1.6801200237460259, "learning_rate": 5.24227442451339e-06, "loss": 0.042, "step": 34890 }, { "epoch": 0.14560088791714998, "grad_norm": 0.9641840006426472, "learning_rate": 5.241898825396723e-06, "loss": 0.0332, "step": 34895 }, { "epoch": 0.14562175063214025, "grad_norm": 1.1153732095372444, "learning_rate": 5.2415233070014015e-06, "loss": 0.0345, "step": 34900 }, { "epoch": 0.14564261334713055, "grad_norm": 1.459532890698798, "learning_rate": 5.2411478692985185e-06, "loss": 0.0473, "step": 34905 }, { "epoch": 0.14566347606212082, "grad_norm": 1.2328011771581486, "learning_rate": 5.240772512259178e-06, "loss": 0.0459, "step": 34910 }, { "epoch": 0.1456843387771111, "grad_norm": 1.343319157890569, "learning_rate": 5.2403972358545e-06, "loss": 0.0538, "step": 34915 }, { "epoch": 0.14570520149210137, "grad_norm": 0.7973820108996237, "learning_rate": 5.240022040055617e-06, "loss": 0.0322, "step": 34920 }, { "epoch": 0.14572606420709167, "grad_norm": 0.9101810506401984, "learning_rate": 5.239646924833679e-06, "loss": 0.0439, "step": 34925 }, { "epoch": 0.14574692692208194, "grad_norm": 1.9480124602678437, "learning_rate": 5.23927189015985e-06, "loss": 0.0478, "step": 34930 }, { "epoch": 0.1457677896370722, "grad_norm": 0.6443296038143883, "learning_rate": 5.238896936005304e-06, "loss": 0.051, "step": 34935 }, { "epoch": 0.14578865235206248, "grad_norm": 1.1845213639568999, "learning_rate": 5.238522062341237e-06, "loss": 0.0449, "step": 34940 }, { "epoch": 0.14580951506705275, "grad_norm": 0.88806230698303, "learning_rate": 5.2381472691388526e-06, "loss": 0.0368, "step": 34945 }, { "epoch": 0.14583037778204305, "grad_norm": 1.2631053919057524, "learning_rate": 5.237772556369372e-06, "loss": 0.0471, "step": 34950 }, { "epoch": 0.14585124049703332, "grad_norm": 0.6814394818707825, "learning_rate": 5.237397924004032e-06, "loss": 0.0422, "step": 34955 }, { "epoch": 0.1458721032120236, "grad_norm": 1.2608685476966557, "learning_rate": 5.23702337201408e-06, "loss": 0.07, "step": 34960 }, { "epoch": 0.14589296592701387, "grad_norm": 1.2580332887533778, "learning_rate": 5.23664890037078e-06, "loss": 0.036, "step": 34965 }, { "epoch": 0.14591382864200417, "grad_norm": 1.0544531969896167, "learning_rate": 5.23627450904541e-06, "loss": 0.0497, "step": 34970 }, { "epoch": 0.14593469135699444, "grad_norm": 0.8753914203456465, "learning_rate": 5.235900198009266e-06, "loss": 0.0381, "step": 34975 }, { "epoch": 0.1459555540719847, "grad_norm": 0.7412820342365187, "learning_rate": 5.235525967233652e-06, "loss": 0.036, "step": 34980 }, { "epoch": 0.14597641678697498, "grad_norm": 0.9603580082716345, "learning_rate": 5.23515181668989e-06, "loss": 0.036, "step": 34985 }, { "epoch": 0.14599727950196525, "grad_norm": 1.0307274413214482, "learning_rate": 5.234777746349315e-06, "loss": 0.0498, "step": 34990 }, { "epoch": 0.14601814221695555, "grad_norm": 0.943520227030923, "learning_rate": 5.234403756183278e-06, "loss": 0.043, "step": 34995 }, { "epoch": 0.14603900493194583, "grad_norm": 0.7297711697413065, "learning_rate": 5.234029846163145e-06, "loss": 0.0368, "step": 35000 }, { "epoch": 0.1460598676469361, "grad_norm": 1.4763865216039844, "learning_rate": 5.233656016260291e-06, "loss": 0.0338, "step": 35005 }, { "epoch": 0.14608073036192637, "grad_norm": 1.2281229072704816, "learning_rate": 5.233282266446112e-06, "loss": 0.045, "step": 35010 }, { "epoch": 0.14610159307691667, "grad_norm": 0.8211935706818115, "learning_rate": 5.232908596692015e-06, "loss": 0.0288, "step": 35015 }, { "epoch": 0.14612245579190694, "grad_norm": 1.205569648978951, "learning_rate": 5.2325350069694195e-06, "loss": 0.0399, "step": 35020 }, { "epoch": 0.1461433185068972, "grad_norm": 1.027929708039861, "learning_rate": 5.232161497249764e-06, "loss": 0.0378, "step": 35025 }, { "epoch": 0.14616418122188748, "grad_norm": 0.9676983263476849, "learning_rate": 5.231788067504497e-06, "loss": 0.0384, "step": 35030 }, { "epoch": 0.14618504393687776, "grad_norm": 2.7470329060772567, "learning_rate": 5.2314147177050824e-06, "loss": 0.049, "step": 35035 }, { "epoch": 0.14620590665186806, "grad_norm": 1.5045486977895746, "learning_rate": 5.231041447823e-06, "loss": 0.0605, "step": 35040 }, { "epoch": 0.14622676936685833, "grad_norm": 1.4912544565453747, "learning_rate": 5.2306682578297426e-06, "loss": 0.0435, "step": 35045 }, { "epoch": 0.1462476320818486, "grad_norm": 0.713006123687488, "learning_rate": 5.230295147696819e-06, "loss": 0.0479, "step": 35050 }, { "epoch": 0.14626849479683887, "grad_norm": 0.7748462029470293, "learning_rate": 5.229922117395748e-06, "loss": 0.032, "step": 35055 }, { "epoch": 0.14628935751182917, "grad_norm": 1.6754196963822927, "learning_rate": 5.229549166898066e-06, "loss": 0.037, "step": 35060 }, { "epoch": 0.14631022022681944, "grad_norm": 1.044981915956778, "learning_rate": 5.229176296175323e-06, "loss": 0.0414, "step": 35065 }, { "epoch": 0.14633108294180971, "grad_norm": 1.122606062589433, "learning_rate": 5.2288035051990825e-06, "loss": 0.039, "step": 35070 }, { "epoch": 0.1463519456568, "grad_norm": 0.9352389864620383, "learning_rate": 5.228430793940923e-06, "loss": 0.0342, "step": 35075 }, { "epoch": 0.14637280837179026, "grad_norm": 0.5800569532809914, "learning_rate": 5.228058162372437e-06, "loss": 0.0387, "step": 35080 }, { "epoch": 0.14639367108678056, "grad_norm": 0.9289389012320175, "learning_rate": 5.227685610465232e-06, "loss": 0.0476, "step": 35085 }, { "epoch": 0.14641453380177083, "grad_norm": 0.775814300665212, "learning_rate": 5.227313138190926e-06, "loss": 0.0383, "step": 35090 }, { "epoch": 0.1464353965167611, "grad_norm": 1.6970005482033412, "learning_rate": 5.226940745521157e-06, "loss": 0.0518, "step": 35095 }, { "epoch": 0.14645625923175137, "grad_norm": 0.8892207932918462, "learning_rate": 5.226568432427571e-06, "loss": 0.0357, "step": 35100 }, { "epoch": 0.14647712194674167, "grad_norm": 1.016453023830461, "learning_rate": 5.226196198881834e-06, "loss": 0.0453, "step": 35105 }, { "epoch": 0.14649798466173194, "grad_norm": 0.8686069066966324, "learning_rate": 5.225824044855622e-06, "loss": 0.0427, "step": 35110 }, { "epoch": 0.14651884737672222, "grad_norm": 0.8081610982206727, "learning_rate": 5.225451970320625e-06, "loss": 0.0378, "step": 35115 }, { "epoch": 0.1465397100917125, "grad_norm": 0.8166565019997193, "learning_rate": 5.22507997524855e-06, "loss": 0.0361, "step": 35120 }, { "epoch": 0.14656057280670276, "grad_norm": 0.9549603073611186, "learning_rate": 5.224708059611116e-06, "loss": 0.0452, "step": 35125 }, { "epoch": 0.14658143552169306, "grad_norm": 1.3331270442596248, "learning_rate": 5.2243362233800585e-06, "loss": 0.0342, "step": 35130 }, { "epoch": 0.14660229823668333, "grad_norm": 0.9457431507891376, "learning_rate": 5.223964466527123e-06, "loss": 0.0334, "step": 35135 }, { "epoch": 0.1466231609516736, "grad_norm": 1.3635542050102274, "learning_rate": 5.223592789024071e-06, "loss": 0.0432, "step": 35140 }, { "epoch": 0.14664402366666388, "grad_norm": 1.2548191241659623, "learning_rate": 5.22322119084268e-06, "loss": 0.0308, "step": 35145 }, { "epoch": 0.14666488638165417, "grad_norm": 0.8179045881072653, "learning_rate": 5.22284967195474e-06, "loss": 0.0406, "step": 35150 }, { "epoch": 0.14668574909664445, "grad_norm": 0.7340394707584511, "learning_rate": 5.222478232332052e-06, "loss": 0.0426, "step": 35155 }, { "epoch": 0.14670661181163472, "grad_norm": 0.776258602333053, "learning_rate": 5.222106871946438e-06, "loss": 0.0301, "step": 35160 }, { "epoch": 0.146727474526625, "grad_norm": 1.087265918585045, "learning_rate": 5.221735590769728e-06, "loss": 0.0432, "step": 35165 }, { "epoch": 0.14674833724161526, "grad_norm": 0.9583212155367578, "learning_rate": 5.2213643887737685e-06, "loss": 0.0422, "step": 35170 }, { "epoch": 0.14676919995660556, "grad_norm": 0.7810364720224399, "learning_rate": 5.220993265930418e-06, "loss": 0.0488, "step": 35175 }, { "epoch": 0.14679006267159583, "grad_norm": 0.8537195862499759, "learning_rate": 5.220622222211553e-06, "loss": 0.047, "step": 35180 }, { "epoch": 0.1468109253865861, "grad_norm": 0.7947952494179468, "learning_rate": 5.220251257589061e-06, "loss": 0.042, "step": 35185 }, { "epoch": 0.14683178810157638, "grad_norm": 1.3132569038134319, "learning_rate": 5.219880372034842e-06, "loss": 0.0328, "step": 35190 }, { "epoch": 0.14685265081656668, "grad_norm": 1.4096043796560473, "learning_rate": 5.219509565520814e-06, "loss": 0.0488, "step": 35195 }, { "epoch": 0.14687351353155695, "grad_norm": 0.6372088682218022, "learning_rate": 5.219138838018907e-06, "loss": 0.0354, "step": 35200 }, { "epoch": 0.14689437624654722, "grad_norm": 0.9055121738959423, "learning_rate": 5.2187681895010645e-06, "loss": 0.0357, "step": 35205 }, { "epoch": 0.1469152389615375, "grad_norm": 0.9784391470627565, "learning_rate": 5.2183976199392425e-06, "loss": 0.0313, "step": 35210 }, { "epoch": 0.14693610167652776, "grad_norm": 1.0752144912719803, "learning_rate": 5.218027129305417e-06, "loss": 0.038, "step": 35215 }, { "epoch": 0.14695696439151806, "grad_norm": 1.1702466857262872, "learning_rate": 5.217656717571569e-06, "loss": 0.043, "step": 35220 }, { "epoch": 0.14697782710650834, "grad_norm": 1.6502904110724599, "learning_rate": 5.217286384709701e-06, "loss": 0.0483, "step": 35225 }, { "epoch": 0.1469986898214986, "grad_norm": 1.0979370324326463, "learning_rate": 5.216916130691826e-06, "loss": 0.0395, "step": 35230 }, { "epoch": 0.14701955253648888, "grad_norm": 1.026717005854701, "learning_rate": 5.216545955489971e-06, "loss": 0.0431, "step": 35235 }, { "epoch": 0.14704041525147918, "grad_norm": 0.7383454003285568, "learning_rate": 5.216175859076178e-06, "loss": 0.0421, "step": 35240 }, { "epoch": 0.14706127796646945, "grad_norm": 0.7230359675808083, "learning_rate": 5.215805841422502e-06, "loss": 0.0448, "step": 35245 }, { "epoch": 0.14708214068145972, "grad_norm": 1.069337159193828, "learning_rate": 5.215435902501012e-06, "loss": 0.0351, "step": 35250 }, { "epoch": 0.14710300339645, "grad_norm": 1.0962305903372414, "learning_rate": 5.2150660422837905e-06, "loss": 0.0436, "step": 35255 }, { "epoch": 0.14712386611144027, "grad_norm": 0.7175470107387353, "learning_rate": 5.214696260742935e-06, "loss": 0.0362, "step": 35260 }, { "epoch": 0.14714472882643057, "grad_norm": 1.291870065722606, "learning_rate": 5.2143265578505565e-06, "loss": 0.0391, "step": 35265 }, { "epoch": 0.14716559154142084, "grad_norm": 0.666069791739579, "learning_rate": 5.213956933578778e-06, "loss": 0.034, "step": 35270 }, { "epoch": 0.1471864542564111, "grad_norm": 1.12085355636235, "learning_rate": 5.213587387899739e-06, "loss": 0.0433, "step": 35275 }, { "epoch": 0.14720731697140138, "grad_norm": 0.46979220994945825, "learning_rate": 5.213217920785592e-06, "loss": 0.0384, "step": 35280 }, { "epoch": 0.14722817968639168, "grad_norm": 0.7744076434961981, "learning_rate": 5.212848532208501e-06, "loss": 0.0363, "step": 35285 }, { "epoch": 0.14724904240138195, "grad_norm": 1.1190222973853101, "learning_rate": 5.21247922214065e-06, "loss": 0.0379, "step": 35290 }, { "epoch": 0.14726990511637222, "grad_norm": 1.283079379015603, "learning_rate": 5.212109990554227e-06, "loss": 0.0388, "step": 35295 }, { "epoch": 0.1472907678313625, "grad_norm": 0.75707886369269, "learning_rate": 5.211740837421443e-06, "loss": 0.0417, "step": 35300 }, { "epoch": 0.14731163054635277, "grad_norm": 0.841414962723081, "learning_rate": 5.211371762714518e-06, "loss": 0.0405, "step": 35305 }, { "epoch": 0.14733249326134307, "grad_norm": 1.1125187801168814, "learning_rate": 5.211002766405687e-06, "loss": 0.0331, "step": 35310 }, { "epoch": 0.14735335597633334, "grad_norm": 1.3046394050573138, "learning_rate": 5.210633848467199e-06, "loss": 0.0413, "step": 35315 }, { "epoch": 0.1473742186913236, "grad_norm": 0.8510701665628121, "learning_rate": 5.2102650088713166e-06, "loss": 0.0391, "step": 35320 }, { "epoch": 0.14739508140631388, "grad_norm": 0.9600729348058096, "learning_rate": 5.2098962475903145e-06, "loss": 0.0341, "step": 35325 }, { "epoch": 0.14741594412130418, "grad_norm": 0.5660856842706871, "learning_rate": 5.209527564596486e-06, "loss": 0.0361, "step": 35330 }, { "epoch": 0.14743680683629445, "grad_norm": 1.1564191101113526, "learning_rate": 5.2091589598621295e-06, "loss": 0.0355, "step": 35335 }, { "epoch": 0.14745766955128473, "grad_norm": 1.355382122084323, "learning_rate": 5.208790433359566e-06, "loss": 0.0444, "step": 35340 }, { "epoch": 0.147478532266275, "grad_norm": 1.0341302157623873, "learning_rate": 5.208421985061127e-06, "loss": 0.0415, "step": 35345 }, { "epoch": 0.14749939498126527, "grad_norm": 0.9760299576714195, "learning_rate": 5.208053614939154e-06, "loss": 0.0545, "step": 35350 }, { "epoch": 0.14752025769625557, "grad_norm": 1.0219042962830902, "learning_rate": 5.20768532296601e-06, "loss": 0.036, "step": 35355 }, { "epoch": 0.14754112041124584, "grad_norm": 0.869859501443437, "learning_rate": 5.207317109114062e-06, "loss": 0.0525, "step": 35360 }, { "epoch": 0.1475619831262361, "grad_norm": 1.2413074438626073, "learning_rate": 5.2069489733556985e-06, "loss": 0.0345, "step": 35365 }, { "epoch": 0.14758284584122638, "grad_norm": 1.1510908029151588, "learning_rate": 5.20658091566332e-06, "loss": 0.0473, "step": 35370 }, { "epoch": 0.14760370855621668, "grad_norm": 1.0239525818355644, "learning_rate": 5.206212936009337e-06, "loss": 0.0392, "step": 35375 }, { "epoch": 0.14762457127120696, "grad_norm": 1.1721993750984938, "learning_rate": 5.205845034366178e-06, "loss": 0.0456, "step": 35380 }, { "epoch": 0.14764543398619723, "grad_norm": 1.1105660361456822, "learning_rate": 5.205477210706282e-06, "loss": 0.0378, "step": 35385 }, { "epoch": 0.1476662967011875, "grad_norm": 0.527791534342126, "learning_rate": 5.205109465002105e-06, "loss": 0.0335, "step": 35390 }, { "epoch": 0.14768715941617777, "grad_norm": 1.2800170485594002, "learning_rate": 5.204741797226114e-06, "loss": 0.0319, "step": 35395 }, { "epoch": 0.14770802213116807, "grad_norm": 1.278202935428518, "learning_rate": 5.2043742073507875e-06, "loss": 0.0414, "step": 35400 }, { "epoch": 0.14772888484615834, "grad_norm": 1.2653329356998413, "learning_rate": 5.204006695348624e-06, "loss": 0.0478, "step": 35405 }, { "epoch": 0.14774974756114861, "grad_norm": 0.889958755808329, "learning_rate": 5.20363926119213e-06, "loss": 0.0371, "step": 35410 }, { "epoch": 0.1477706102761389, "grad_norm": 2.454578858070905, "learning_rate": 5.203271904853829e-06, "loss": 0.0512, "step": 35415 }, { "epoch": 0.14779147299112919, "grad_norm": 0.8052351954970761, "learning_rate": 5.202904626306255e-06, "loss": 0.0368, "step": 35420 }, { "epoch": 0.14781233570611946, "grad_norm": 1.425430992138167, "learning_rate": 5.202537425521958e-06, "loss": 0.053, "step": 35425 }, { "epoch": 0.14783319842110973, "grad_norm": 0.939084015790805, "learning_rate": 5.202170302473501e-06, "loss": 0.0433, "step": 35430 }, { "epoch": 0.1478540611361, "grad_norm": 1.0319558991299265, "learning_rate": 5.201803257133459e-06, "loss": 0.0455, "step": 35435 }, { "epoch": 0.14787492385109027, "grad_norm": 0.8406900201782743, "learning_rate": 5.201436289474422e-06, "loss": 0.0315, "step": 35440 }, { "epoch": 0.14789578656608057, "grad_norm": 0.7953445636037073, "learning_rate": 5.201069399468994e-06, "loss": 0.0338, "step": 35445 }, { "epoch": 0.14791664928107084, "grad_norm": 0.9071372339621808, "learning_rate": 5.200702587089793e-06, "loss": 0.041, "step": 35450 }, { "epoch": 0.14793751199606112, "grad_norm": 1.266101913310412, "learning_rate": 5.200335852309448e-06, "loss": 0.0528, "step": 35455 }, { "epoch": 0.1479583747110514, "grad_norm": 0.9944353853968462, "learning_rate": 5.1999691951006025e-06, "loss": 0.0368, "step": 35460 }, { "epoch": 0.1479792374260417, "grad_norm": 0.6589538139158355, "learning_rate": 5.1996026154359145e-06, "loss": 0.0385, "step": 35465 }, { "epoch": 0.14800010014103196, "grad_norm": 0.773297936031672, "learning_rate": 5.199236113288057e-06, "loss": 0.0287, "step": 35470 }, { "epoch": 0.14802096285602223, "grad_norm": 1.2422312746050013, "learning_rate": 5.1988696886297085e-06, "loss": 0.0431, "step": 35475 }, { "epoch": 0.1480418255710125, "grad_norm": 1.3145832030793696, "learning_rate": 5.198503341433573e-06, "loss": 0.0478, "step": 35480 }, { "epoch": 0.14806268828600277, "grad_norm": 0.9565693805865788, "learning_rate": 5.198137071672359e-06, "loss": 0.0427, "step": 35485 }, { "epoch": 0.14808355100099307, "grad_norm": 1.8073586467532083, "learning_rate": 5.197770879318792e-06, "loss": 0.0804, "step": 35490 }, { "epoch": 0.14810441371598335, "grad_norm": 1.5263879225667298, "learning_rate": 5.19740476434561e-06, "loss": 0.0414, "step": 35495 }, { "epoch": 0.14812527643097362, "grad_norm": 1.27046905786832, "learning_rate": 5.197038726725564e-06, "loss": 0.0489, "step": 35500 }, { "epoch": 0.1481461391459639, "grad_norm": 2.3753027755552063, "learning_rate": 5.1966727664314195e-06, "loss": 0.0298, "step": 35505 }, { "epoch": 0.1481670018609542, "grad_norm": 1.5288169767322415, "learning_rate": 5.196306883435956e-06, "loss": 0.0461, "step": 35510 }, { "epoch": 0.14818786457594446, "grad_norm": 1.1257427229499206, "learning_rate": 5.1959410777119646e-06, "loss": 0.0378, "step": 35515 }, { "epoch": 0.14820872729093473, "grad_norm": 0.8010344780252854, "learning_rate": 5.195575349232251e-06, "loss": 0.0447, "step": 35520 }, { "epoch": 0.148229590005925, "grad_norm": 0.6842587537391144, "learning_rate": 5.195209697969633e-06, "loss": 0.0494, "step": 35525 }, { "epoch": 0.14825045272091528, "grad_norm": 0.8167292562308339, "learning_rate": 5.1948441238969425e-06, "loss": 0.0337, "step": 35530 }, { "epoch": 0.14827131543590558, "grad_norm": 0.7822917301826313, "learning_rate": 5.194478626987027e-06, "loss": 0.039, "step": 35535 }, { "epoch": 0.14829217815089585, "grad_norm": 0.9571614373310667, "learning_rate": 5.1941132072127434e-06, "loss": 0.0503, "step": 35540 }, { "epoch": 0.14831304086588612, "grad_norm": 1.371355241001216, "learning_rate": 5.1937478645469655e-06, "loss": 0.0496, "step": 35545 }, { "epoch": 0.1483339035808764, "grad_norm": 0.9492264891494169, "learning_rate": 5.1933825989625774e-06, "loss": 0.0412, "step": 35550 }, { "epoch": 0.1483547662958667, "grad_norm": 1.0588648691736189, "learning_rate": 5.193017410432479e-06, "loss": 0.0649, "step": 35555 }, { "epoch": 0.14837562901085696, "grad_norm": 0.9097272637983198, "learning_rate": 5.192652298929582e-06, "loss": 0.0386, "step": 35560 }, { "epoch": 0.14839649172584723, "grad_norm": 0.9190657234065552, "learning_rate": 5.192287264426813e-06, "loss": 0.0531, "step": 35565 }, { "epoch": 0.1484173544408375, "grad_norm": 0.7923150862599034, "learning_rate": 5.191922306897109e-06, "loss": 0.0346, "step": 35570 }, { "epoch": 0.14843821715582778, "grad_norm": 0.6596968604243519, "learning_rate": 5.191557426313425e-06, "loss": 0.0376, "step": 35575 }, { "epoch": 0.14845907987081808, "grad_norm": 0.9342294273501287, "learning_rate": 5.191192622648724e-06, "loss": 0.0368, "step": 35580 }, { "epoch": 0.14847994258580835, "grad_norm": 0.8438415172424459, "learning_rate": 5.190827895875986e-06, "loss": 0.0432, "step": 35585 }, { "epoch": 0.14850080530079862, "grad_norm": 1.1811206768802696, "learning_rate": 5.190463245968204e-06, "loss": 0.0446, "step": 35590 }, { "epoch": 0.1485216680157889, "grad_norm": 1.3994846037609823, "learning_rate": 5.190098672898381e-06, "loss": 0.0414, "step": 35595 }, { "epoch": 0.1485425307307792, "grad_norm": 0.7016146334122255, "learning_rate": 5.189734176639537e-06, "loss": 0.0373, "step": 35600 }, { "epoch": 0.14856339344576946, "grad_norm": 0.7080047872188798, "learning_rate": 5.189369757164705e-06, "loss": 0.0467, "step": 35605 }, { "epoch": 0.14858425616075974, "grad_norm": 0.8380005310721206, "learning_rate": 5.1890054144469295e-06, "loss": 0.0331, "step": 35610 }, { "epoch": 0.14860511887575, "grad_norm": 1.1187319568262144, "learning_rate": 5.188641148459267e-06, "loss": 0.0395, "step": 35615 }, { "epoch": 0.14862598159074028, "grad_norm": 1.0491442266287887, "learning_rate": 5.188276959174792e-06, "loss": 0.0467, "step": 35620 }, { "epoch": 0.14864684430573058, "grad_norm": 0.7233061348120366, "learning_rate": 5.187912846566587e-06, "loss": 0.0407, "step": 35625 }, { "epoch": 0.14866770702072085, "grad_norm": 0.80952894791574, "learning_rate": 5.1875488106077535e-06, "loss": 0.0389, "step": 35630 }, { "epoch": 0.14868856973571112, "grad_norm": 1.1064890682714776, "learning_rate": 5.187184851271398e-06, "loss": 0.0333, "step": 35635 }, { "epoch": 0.1487094324507014, "grad_norm": 0.7750024106224799, "learning_rate": 5.186820968530648e-06, "loss": 0.0519, "step": 35640 }, { "epoch": 0.1487302951656917, "grad_norm": 0.9312510381761062, "learning_rate": 5.186457162358643e-06, "loss": 0.0413, "step": 35645 }, { "epoch": 0.14875115788068197, "grad_norm": 0.9192795354589501, "learning_rate": 5.186093432728531e-06, "loss": 0.0397, "step": 35650 }, { "epoch": 0.14877202059567224, "grad_norm": 1.0024332492440295, "learning_rate": 5.185729779613475e-06, "loss": 0.0397, "step": 35655 }, { "epoch": 0.1487928833106625, "grad_norm": 1.0099949544519116, "learning_rate": 5.1853662029866565e-06, "loss": 0.0407, "step": 35660 }, { "epoch": 0.14881374602565278, "grad_norm": 0.689967298462823, "learning_rate": 5.185002702821262e-06, "loss": 0.0305, "step": 35665 }, { "epoch": 0.14883460874064308, "grad_norm": 0.7654183989063511, "learning_rate": 5.184639279090497e-06, "loss": 0.0482, "step": 35670 }, { "epoch": 0.14885547145563335, "grad_norm": 1.1158925003248115, "learning_rate": 5.184275931767579e-06, "loss": 0.0476, "step": 35675 }, { "epoch": 0.14887633417062363, "grad_norm": 0.7563026488659857, "learning_rate": 5.183912660825737e-06, "loss": 0.0494, "step": 35680 }, { "epoch": 0.1488971968856139, "grad_norm": 0.84046319249226, "learning_rate": 5.183549466238212e-06, "loss": 0.0362, "step": 35685 }, { "epoch": 0.1489180596006042, "grad_norm": 0.49015252291642675, "learning_rate": 5.183186347978264e-06, "loss": 0.0508, "step": 35690 }, { "epoch": 0.14893892231559447, "grad_norm": 1.0115841930371365, "learning_rate": 5.182823306019159e-06, "loss": 0.0445, "step": 35695 }, { "epoch": 0.14895978503058474, "grad_norm": 1.2316782084896447, "learning_rate": 5.182460340334181e-06, "loss": 0.0524, "step": 35700 }, { "epoch": 0.148980647745575, "grad_norm": 1.0249062274390361, "learning_rate": 5.182097450896625e-06, "loss": 0.0418, "step": 35705 }, { "epoch": 0.14900151046056528, "grad_norm": 3.5290736237721454, "learning_rate": 5.181734637679798e-06, "loss": 0.061, "step": 35710 }, { "epoch": 0.14902237317555558, "grad_norm": 0.7994067683818361, "learning_rate": 5.181371900657024e-06, "loss": 0.0351, "step": 35715 }, { "epoch": 0.14904323589054586, "grad_norm": 0.9017720203427361, "learning_rate": 5.181009239801637e-06, "loss": 0.0415, "step": 35720 }, { "epoch": 0.14906409860553613, "grad_norm": 1.3262231334207382, "learning_rate": 5.180646655086985e-06, "loss": 0.0289, "step": 35725 }, { "epoch": 0.1490849613205264, "grad_norm": 0.9115869576124619, "learning_rate": 5.180284146486427e-06, "loss": 0.0274, "step": 35730 }, { "epoch": 0.1491058240355167, "grad_norm": 0.5278506327749058, "learning_rate": 5.179921713973338e-06, "loss": 0.036, "step": 35735 }, { "epoch": 0.14912668675050697, "grad_norm": 0.8701185484199968, "learning_rate": 5.179559357521106e-06, "loss": 0.0393, "step": 35740 }, { "epoch": 0.14914754946549724, "grad_norm": 0.9037775165747995, "learning_rate": 5.179197077103129e-06, "loss": 0.0309, "step": 35745 }, { "epoch": 0.14916841218048751, "grad_norm": 0.886652157086967, "learning_rate": 5.178834872692821e-06, "loss": 0.0421, "step": 35750 }, { "epoch": 0.14918927489547779, "grad_norm": 0.9810277880881959, "learning_rate": 5.1784727442636075e-06, "loss": 0.0442, "step": 35755 }, { "epoch": 0.14921013761046809, "grad_norm": 0.960984021379609, "learning_rate": 5.178110691788928e-06, "loss": 0.0377, "step": 35760 }, { "epoch": 0.14923100032545836, "grad_norm": 1.1506697594678736, "learning_rate": 5.177748715242234e-06, "loss": 0.0473, "step": 35765 }, { "epoch": 0.14925186304044863, "grad_norm": 1.03871253069085, "learning_rate": 5.177386814596989e-06, "loss": 0.044, "step": 35770 }, { "epoch": 0.1492727257554389, "grad_norm": 1.5160827482405397, "learning_rate": 5.177024989826674e-06, "loss": 0.045, "step": 35775 }, { "epoch": 0.1492935884704292, "grad_norm": 1.0762617761664839, "learning_rate": 5.176663240904779e-06, "loss": 0.0491, "step": 35780 }, { "epoch": 0.14931445118541947, "grad_norm": 1.0820465096354523, "learning_rate": 5.176301567804806e-06, "loss": 0.0398, "step": 35785 }, { "epoch": 0.14933531390040974, "grad_norm": 1.045689245368014, "learning_rate": 5.175939970500272e-06, "loss": 0.0467, "step": 35790 }, { "epoch": 0.14935617661540002, "grad_norm": 1.358625603509686, "learning_rate": 5.17557844896471e-06, "loss": 0.0394, "step": 35795 }, { "epoch": 0.1493770393303903, "grad_norm": 0.9655519337144153, "learning_rate": 5.175217003171659e-06, "loss": 0.0343, "step": 35800 }, { "epoch": 0.1493979020453806, "grad_norm": 0.9112413513057367, "learning_rate": 5.174855633094675e-06, "loss": 0.0335, "step": 35805 }, { "epoch": 0.14941876476037086, "grad_norm": 0.8799013008562276, "learning_rate": 5.174494338707329e-06, "loss": 0.0462, "step": 35810 }, { "epoch": 0.14943962747536113, "grad_norm": 1.338820149713067, "learning_rate": 5.174133119983201e-06, "loss": 0.0401, "step": 35815 }, { "epoch": 0.1494604901903514, "grad_norm": 0.971381921301849, "learning_rate": 5.173771976895885e-06, "loss": 0.0374, "step": 35820 }, { "epoch": 0.1494813529053417, "grad_norm": 1.0932379933182907, "learning_rate": 5.173410909418987e-06, "loss": 0.0367, "step": 35825 }, { "epoch": 0.14950221562033197, "grad_norm": 1.1015288715331044, "learning_rate": 5.173049917526132e-06, "loss": 0.0446, "step": 35830 }, { "epoch": 0.14952307833532225, "grad_norm": 1.387592547784188, "learning_rate": 5.172689001190948e-06, "loss": 0.0376, "step": 35835 }, { "epoch": 0.14954394105031252, "grad_norm": 0.8633026548986498, "learning_rate": 5.172328160387083e-06, "loss": 0.0401, "step": 35840 }, { "epoch": 0.1495648037653028, "grad_norm": 1.0631058239212383, "learning_rate": 5.171967395088196e-06, "loss": 0.0419, "step": 35845 }, { "epoch": 0.1495856664802931, "grad_norm": 0.6369017441476389, "learning_rate": 5.171606705267958e-06, "loss": 0.0374, "step": 35850 }, { "epoch": 0.14960652919528336, "grad_norm": 0.9963405121515602, "learning_rate": 5.171246090900053e-06, "loss": 0.0326, "step": 35855 }, { "epoch": 0.14962739191027363, "grad_norm": 1.3558467514358552, "learning_rate": 5.1708855519581795e-06, "loss": 0.0364, "step": 35860 }, { "epoch": 0.1496482546252639, "grad_norm": 1.485067922150521, "learning_rate": 5.170525088416048e-06, "loss": 0.0469, "step": 35865 }, { "epoch": 0.1496691173402542, "grad_norm": 0.851892395434147, "learning_rate": 5.17016470024738e-06, "loss": 0.038, "step": 35870 }, { "epoch": 0.14968998005524448, "grad_norm": 1.1876629690846268, "learning_rate": 5.169804387425912e-06, "loss": 0.0412, "step": 35875 }, { "epoch": 0.14971084277023475, "grad_norm": 0.6036394596123068, "learning_rate": 5.169444149925392e-06, "loss": 0.0386, "step": 35880 }, { "epoch": 0.14973170548522502, "grad_norm": 0.792331652314188, "learning_rate": 5.169083987719583e-06, "loss": 0.0326, "step": 35885 }, { "epoch": 0.1497525682002153, "grad_norm": 0.6769721723657542, "learning_rate": 5.168723900782259e-06, "loss": 0.0396, "step": 35890 }, { "epoch": 0.1497734309152056, "grad_norm": 0.7343036313326773, "learning_rate": 5.1683638890872056e-06, "loss": 0.0397, "step": 35895 }, { "epoch": 0.14979429363019586, "grad_norm": 1.4888293780167312, "learning_rate": 5.1680039526082224e-06, "loss": 0.0487, "step": 35900 }, { "epoch": 0.14981515634518613, "grad_norm": 0.766371508569667, "learning_rate": 5.167644091319124e-06, "loss": 0.0456, "step": 35905 }, { "epoch": 0.1498360190601764, "grad_norm": 0.6048309016134653, "learning_rate": 5.167284305193735e-06, "loss": 0.0359, "step": 35910 }, { "epoch": 0.1498568817751667, "grad_norm": 0.6903715231692361, "learning_rate": 5.166924594205892e-06, "loss": 0.0381, "step": 35915 }, { "epoch": 0.14987774449015698, "grad_norm": 1.0385265685217815, "learning_rate": 5.166564958329448e-06, "loss": 0.0308, "step": 35920 }, { "epoch": 0.14989860720514725, "grad_norm": 1.2176272561092187, "learning_rate": 5.1662053975382655e-06, "loss": 0.0395, "step": 35925 }, { "epoch": 0.14991946992013752, "grad_norm": 0.5555371267657387, "learning_rate": 5.1658459118062195e-06, "loss": 0.0339, "step": 35930 }, { "epoch": 0.1499403326351278, "grad_norm": 1.5626281529349464, "learning_rate": 5.165486501107202e-06, "loss": 0.0511, "step": 35935 }, { "epoch": 0.1499611953501181, "grad_norm": 0.9014223206095527, "learning_rate": 5.165127165415112e-06, "loss": 0.0395, "step": 35940 }, { "epoch": 0.14998205806510836, "grad_norm": 0.5704427687740897, "learning_rate": 5.164767904703867e-06, "loss": 0.0422, "step": 35945 }, { "epoch": 0.15000292078009864, "grad_norm": 1.1062792091116247, "learning_rate": 5.164408718947389e-06, "loss": 0.0341, "step": 35950 }, { "epoch": 0.1500237834950889, "grad_norm": 1.1464768131919545, "learning_rate": 5.1640496081196235e-06, "loss": 0.0437, "step": 35955 }, { "epoch": 0.1500446462100792, "grad_norm": 0.32679806278804047, "learning_rate": 5.16369057219452e-06, "loss": 0.0309, "step": 35960 }, { "epoch": 0.15006550892506948, "grad_norm": 1.0073159751287697, "learning_rate": 5.163331611146043e-06, "loss": 0.0382, "step": 35965 }, { "epoch": 0.15008637164005975, "grad_norm": 1.287545423614059, "learning_rate": 5.162972724948173e-06, "loss": 0.0401, "step": 35970 }, { "epoch": 0.15010723435505002, "grad_norm": 1.2842978689223123, "learning_rate": 5.1626139135748985e-06, "loss": 0.0418, "step": 35975 }, { "epoch": 0.1501280970700403, "grad_norm": 0.8866850267863798, "learning_rate": 5.162255177000223e-06, "loss": 0.0411, "step": 35980 }, { "epoch": 0.1501489597850306, "grad_norm": 1.2295939250560475, "learning_rate": 5.161896515198164e-06, "loss": 0.0399, "step": 35985 }, { "epoch": 0.15016982250002087, "grad_norm": 1.6161429584184397, "learning_rate": 5.161537928142747e-06, "loss": 0.0531, "step": 35990 }, { "epoch": 0.15019068521501114, "grad_norm": 0.9553806947476066, "learning_rate": 5.1611794158080154e-06, "loss": 0.039, "step": 35995 }, { "epoch": 0.1502115479300014, "grad_norm": 0.7677677486818124, "learning_rate": 5.160820978168022e-06, "loss": 0.0333, "step": 36000 }, { "epoch": 0.1502324106449917, "grad_norm": 0.5338003373206438, "learning_rate": 5.160462615196833e-06, "loss": 0.0302, "step": 36005 }, { "epoch": 0.15025327335998198, "grad_norm": 0.7267327396742789, "learning_rate": 5.160104326868527e-06, "loss": 0.0503, "step": 36010 }, { "epoch": 0.15027413607497225, "grad_norm": 0.5438975871731156, "learning_rate": 5.159746113157197e-06, "loss": 0.0306, "step": 36015 }, { "epoch": 0.15029499878996253, "grad_norm": 0.5218595093967813, "learning_rate": 5.159387974036946e-06, "loss": 0.0427, "step": 36020 }, { "epoch": 0.1503158615049528, "grad_norm": 1.3900664118229369, "learning_rate": 5.159029909481891e-06, "loss": 0.0471, "step": 36025 }, { "epoch": 0.1503367242199431, "grad_norm": 1.1113089888166634, "learning_rate": 5.158671919466159e-06, "loss": 0.0596, "step": 36030 }, { "epoch": 0.15035758693493337, "grad_norm": 1.1326225856723526, "learning_rate": 5.158314003963897e-06, "loss": 0.0385, "step": 36035 }, { "epoch": 0.15037844964992364, "grad_norm": 0.9823686694377108, "learning_rate": 5.157956162949253e-06, "loss": 0.0431, "step": 36040 }, { "epoch": 0.1503993123649139, "grad_norm": 0.9909146055483268, "learning_rate": 5.1575983963964005e-06, "loss": 0.0412, "step": 36045 }, { "epoch": 0.1504201750799042, "grad_norm": 1.9019032697569176, "learning_rate": 5.157240704279514e-06, "loss": 0.0396, "step": 36050 }, { "epoch": 0.15044103779489448, "grad_norm": 0.7381937032049056, "learning_rate": 5.156883086572787e-06, "loss": 0.0515, "step": 36055 }, { "epoch": 0.15046190050988476, "grad_norm": 1.0701924221219408, "learning_rate": 5.156525543250423e-06, "loss": 0.0408, "step": 36060 }, { "epoch": 0.15048276322487503, "grad_norm": 1.0755448812489752, "learning_rate": 5.156168074286642e-06, "loss": 0.0485, "step": 36065 }, { "epoch": 0.1505036259398653, "grad_norm": 0.6122997790138642, "learning_rate": 5.1558106796556706e-06, "loss": 0.0447, "step": 36070 }, { "epoch": 0.1505244886548556, "grad_norm": 1.1494905381336598, "learning_rate": 5.155453359331754e-06, "loss": 0.0515, "step": 36075 }, { "epoch": 0.15054535136984587, "grad_norm": 0.7504521035522481, "learning_rate": 5.155096113289142e-06, "loss": 0.041, "step": 36080 }, { "epoch": 0.15056621408483614, "grad_norm": 0.895690756313428, "learning_rate": 5.1547389415021055e-06, "loss": 0.0381, "step": 36085 }, { "epoch": 0.1505870767998264, "grad_norm": 0.7582839461801197, "learning_rate": 5.154381843944923e-06, "loss": 0.0444, "step": 36090 }, { "epoch": 0.1506079395148167, "grad_norm": 0.8495560825588491, "learning_rate": 5.154024820591886e-06, "loss": 0.039, "step": 36095 }, { "epoch": 0.15062880222980699, "grad_norm": 0.6965725001630153, "learning_rate": 5.1536678714173e-06, "loss": 0.0365, "step": 36100 }, { "epoch": 0.15064966494479726, "grad_norm": 0.880942378892145, "learning_rate": 5.1533109963954805e-06, "loss": 0.05, "step": 36105 }, { "epoch": 0.15067052765978753, "grad_norm": 0.7070324282211761, "learning_rate": 5.152954195500757e-06, "loss": 0.0523, "step": 36110 }, { "epoch": 0.1506913903747778, "grad_norm": 0.4722024823033176, "learning_rate": 5.152597468707473e-06, "loss": 0.0341, "step": 36115 }, { "epoch": 0.1507122530897681, "grad_norm": 0.9215390774422118, "learning_rate": 5.152240815989981e-06, "loss": 0.0408, "step": 36120 }, { "epoch": 0.15073311580475837, "grad_norm": 0.6671061918840109, "learning_rate": 5.1518842373226485e-06, "loss": 0.035, "step": 36125 }, { "epoch": 0.15075397851974864, "grad_norm": 1.2509030708494893, "learning_rate": 5.151527732679853e-06, "loss": 0.0497, "step": 36130 }, { "epoch": 0.15077484123473892, "grad_norm": 0.9215680208294048, "learning_rate": 5.151171302035989e-06, "loss": 0.034, "step": 36135 }, { "epoch": 0.15079570394972922, "grad_norm": 0.8950715313993325, "learning_rate": 5.150814945365457e-06, "loss": 0.047, "step": 36140 }, { "epoch": 0.1508165666647195, "grad_norm": 1.0051831454731235, "learning_rate": 5.1504586626426755e-06, "loss": 0.0444, "step": 36145 }, { "epoch": 0.15083742937970976, "grad_norm": 3.535955235211434, "learning_rate": 5.150102453842072e-06, "loss": 0.0476, "step": 36150 }, { "epoch": 0.15085829209470003, "grad_norm": 1.065539041909471, "learning_rate": 5.149746318938088e-06, "loss": 0.0481, "step": 36155 }, { "epoch": 0.1508791548096903, "grad_norm": 0.7004171207262642, "learning_rate": 5.149390257905177e-06, "loss": 0.0422, "step": 36160 }, { "epoch": 0.1509000175246806, "grad_norm": 1.4305634822342563, "learning_rate": 5.1490342707178055e-06, "loss": 0.0484, "step": 36165 }, { "epoch": 0.15092088023967087, "grad_norm": 1.1248017435318332, "learning_rate": 5.148678357350449e-06, "loss": 0.0541, "step": 36170 }, { "epoch": 0.15094174295466115, "grad_norm": 0.9567167043981609, "learning_rate": 5.1483225177776005e-06, "loss": 0.0359, "step": 36175 }, { "epoch": 0.15096260566965142, "grad_norm": 1.0260730810904015, "learning_rate": 5.147966751973763e-06, "loss": 0.0421, "step": 36180 }, { "epoch": 0.15098346838464172, "grad_norm": 1.743001184148106, "learning_rate": 5.14761105991345e-06, "loss": 0.0546, "step": 36185 }, { "epoch": 0.151004331099632, "grad_norm": 0.6369081801663268, "learning_rate": 5.147255441571189e-06, "loss": 0.024, "step": 36190 }, { "epoch": 0.15102519381462226, "grad_norm": 0.9051954578747181, "learning_rate": 5.146899896921521e-06, "loss": 0.0468, "step": 36195 }, { "epoch": 0.15104605652961253, "grad_norm": 1.3180201921747334, "learning_rate": 5.146544425938997e-06, "loss": 0.0443, "step": 36200 }, { "epoch": 0.1510669192446028, "grad_norm": 0.9188857069210036, "learning_rate": 5.146189028598183e-06, "loss": 0.0433, "step": 36205 }, { "epoch": 0.1510877819595931, "grad_norm": 1.2090949882115871, "learning_rate": 5.145833704873653e-06, "loss": 0.0405, "step": 36210 }, { "epoch": 0.15110864467458338, "grad_norm": 0.549008288307239, "learning_rate": 5.1454784547399986e-06, "loss": 0.0313, "step": 36215 }, { "epoch": 0.15112950738957365, "grad_norm": 0.9518769595696767, "learning_rate": 5.145123278171819e-06, "loss": 0.0368, "step": 36220 }, { "epoch": 0.15115037010456392, "grad_norm": 1.3435432986899203, "learning_rate": 5.144768175143729e-06, "loss": 0.0404, "step": 36225 }, { "epoch": 0.15117123281955422, "grad_norm": 1.0280638501333184, "learning_rate": 5.144413145630353e-06, "loss": 0.0364, "step": 36230 }, { "epoch": 0.1511920955345445, "grad_norm": 0.7675547023831376, "learning_rate": 5.144058189606333e-06, "loss": 0.0376, "step": 36235 }, { "epoch": 0.15121295824953476, "grad_norm": 0.897758661083735, "learning_rate": 5.143703307046314e-06, "loss": 0.0551, "step": 36240 }, { "epoch": 0.15123382096452503, "grad_norm": 1.0865524559989745, "learning_rate": 5.14334849792496e-06, "loss": 0.0394, "step": 36245 }, { "epoch": 0.1512546836795153, "grad_norm": 0.7728726604779784, "learning_rate": 5.142993762216948e-06, "loss": 0.0425, "step": 36250 }, { "epoch": 0.1512755463945056, "grad_norm": 1.2678499140649995, "learning_rate": 5.142639099896963e-06, "loss": 0.0436, "step": 36255 }, { "epoch": 0.15129640910949588, "grad_norm": 1.3606724307289861, "learning_rate": 5.142284510939706e-06, "loss": 0.0299, "step": 36260 }, { "epoch": 0.15131727182448615, "grad_norm": 1.0278372546924235, "learning_rate": 5.141929995319886e-06, "loss": 0.0409, "step": 36265 }, { "epoch": 0.15133813453947642, "grad_norm": 0.7544270092147469, "learning_rate": 5.141575553012227e-06, "loss": 0.0301, "step": 36270 }, { "epoch": 0.15135899725446672, "grad_norm": 0.5372605172705709, "learning_rate": 5.141221183991466e-06, "loss": 0.0418, "step": 36275 }, { "epoch": 0.151379859969457, "grad_norm": 2.471278757508529, "learning_rate": 5.140866888232351e-06, "loss": 0.0445, "step": 36280 }, { "epoch": 0.15140072268444726, "grad_norm": 2.052377926177332, "learning_rate": 5.1405126657096415e-06, "loss": 0.0499, "step": 36285 }, { "epoch": 0.15142158539943754, "grad_norm": 0.8982080182009571, "learning_rate": 5.140158516398111e-06, "loss": 0.0394, "step": 36290 }, { "epoch": 0.1514424481144278, "grad_norm": 1.2502997681404733, "learning_rate": 5.1398044402725425e-06, "loss": 0.0586, "step": 36295 }, { "epoch": 0.1514633108294181, "grad_norm": 0.7088111181973283, "learning_rate": 5.1394504373077315e-06, "loss": 0.0306, "step": 36300 }, { "epoch": 0.15148417354440838, "grad_norm": 3.0233667363482106, "learning_rate": 5.13909650747849e-06, "loss": 0.054, "step": 36305 }, { "epoch": 0.15150503625939865, "grad_norm": 1.3340892801584587, "learning_rate": 5.138742650759637e-06, "loss": 0.0358, "step": 36310 }, { "epoch": 0.15152589897438892, "grad_norm": 0.9275941636699625, "learning_rate": 5.138388867126007e-06, "loss": 0.0457, "step": 36315 }, { "epoch": 0.15154676168937922, "grad_norm": 0.8327846550994574, "learning_rate": 5.138035156552443e-06, "loss": 0.0439, "step": 36320 }, { "epoch": 0.1515676244043695, "grad_norm": 1.1084873882906123, "learning_rate": 5.137681519013805e-06, "loss": 0.0371, "step": 36325 }, { "epoch": 0.15158848711935977, "grad_norm": 1.2816844825780374, "learning_rate": 5.13732795448496e-06, "loss": 0.0437, "step": 36330 }, { "epoch": 0.15160934983435004, "grad_norm": 2.633378584298763, "learning_rate": 5.13697446294079e-06, "loss": 0.0439, "step": 36335 }, { "epoch": 0.1516302125493403, "grad_norm": 0.9598167029627404, "learning_rate": 5.136621044356191e-06, "loss": 0.0421, "step": 36340 }, { "epoch": 0.1516510752643306, "grad_norm": 0.9277091478956266, "learning_rate": 5.136267698706065e-06, "loss": 0.048, "step": 36345 }, { "epoch": 0.15167193797932088, "grad_norm": 0.632864143516541, "learning_rate": 5.135914425965333e-06, "loss": 0.0443, "step": 36350 }, { "epoch": 0.15169280069431115, "grad_norm": 1.1841785479630444, "learning_rate": 5.1355612261089235e-06, "loss": 0.0346, "step": 36355 }, { "epoch": 0.15171366340930142, "grad_norm": 0.9849663047261789, "learning_rate": 5.135208099111778e-06, "loss": 0.0433, "step": 36360 }, { "epoch": 0.15173452612429172, "grad_norm": 1.0339816382986662, "learning_rate": 5.134855044948852e-06, "loss": 0.044, "step": 36365 }, { "epoch": 0.151755388839282, "grad_norm": 1.0002321192442307, "learning_rate": 5.13450206359511e-06, "loss": 0.0376, "step": 36370 }, { "epoch": 0.15177625155427227, "grad_norm": 0.5603299232422037, "learning_rate": 5.1341491550255315e-06, "loss": 0.0379, "step": 36375 }, { "epoch": 0.15179711426926254, "grad_norm": 0.5439042612064192, "learning_rate": 5.1337963192151055e-06, "loss": 0.034, "step": 36380 }, { "epoch": 0.1518179769842528, "grad_norm": 0.9359616806513527, "learning_rate": 5.133443556138835e-06, "loss": 0.0546, "step": 36385 }, { "epoch": 0.1518388396992431, "grad_norm": 1.2821781540080863, "learning_rate": 5.133090865771734e-06, "loss": 0.0375, "step": 36390 }, { "epoch": 0.15185970241423338, "grad_norm": 1.0483714864080436, "learning_rate": 5.132738248088827e-06, "loss": 0.0341, "step": 36395 }, { "epoch": 0.15188056512922365, "grad_norm": 1.0920404606807759, "learning_rate": 5.132385703065156e-06, "loss": 0.0357, "step": 36400 }, { "epoch": 0.15190142784421393, "grad_norm": 0.8816666418704165, "learning_rate": 5.132033230675768e-06, "loss": 0.0517, "step": 36405 }, { "epoch": 0.15192229055920423, "grad_norm": 0.9551235613287063, "learning_rate": 5.131680830895727e-06, "loss": 0.0296, "step": 36410 }, { "epoch": 0.1519431532741945, "grad_norm": 1.5820369949601614, "learning_rate": 5.131328503700105e-06, "loss": 0.0445, "step": 36415 }, { "epoch": 0.15196401598918477, "grad_norm": 1.0759567067636455, "learning_rate": 5.13097624906399e-06, "loss": 0.0348, "step": 36420 }, { "epoch": 0.15198487870417504, "grad_norm": 0.9427733997061806, "learning_rate": 5.1306240669624805e-06, "loss": 0.0396, "step": 36425 }, { "epoch": 0.1520057414191653, "grad_norm": 0.9861882843164317, "learning_rate": 5.130271957370685e-06, "loss": 0.0314, "step": 36430 }, { "epoch": 0.1520266041341556, "grad_norm": 0.5895427764629195, "learning_rate": 5.129919920263727e-06, "loss": 0.034, "step": 36435 }, { "epoch": 0.15204746684914588, "grad_norm": 1.2957843947018561, "learning_rate": 5.129567955616741e-06, "loss": 0.0377, "step": 36440 }, { "epoch": 0.15206832956413616, "grad_norm": 1.3464689806936867, "learning_rate": 5.12921606340487e-06, "loss": 0.0383, "step": 36445 }, { "epoch": 0.15208919227912643, "grad_norm": 0.7280730841841773, "learning_rate": 5.128864243603276e-06, "loss": 0.0336, "step": 36450 }, { "epoch": 0.15211005499411673, "grad_norm": 1.7689437707757003, "learning_rate": 5.1285124961871245e-06, "loss": 0.0463, "step": 36455 }, { "epoch": 0.152130917709107, "grad_norm": 1.3628964091410443, "learning_rate": 5.128160821131599e-06, "loss": 0.0271, "step": 36460 }, { "epoch": 0.15215178042409727, "grad_norm": 1.2581572926335556, "learning_rate": 5.127809218411896e-06, "loss": 0.041, "step": 36465 }, { "epoch": 0.15217264313908754, "grad_norm": 0.7556423593793745, "learning_rate": 5.1274576880032165e-06, "loss": 0.0406, "step": 36470 }, { "epoch": 0.15219350585407782, "grad_norm": 1.2082117284961362, "learning_rate": 5.127106229880781e-06, "loss": 0.0455, "step": 36475 }, { "epoch": 0.15221436856906811, "grad_norm": 0.5903059136376919, "learning_rate": 5.126754844019818e-06, "loss": 0.0353, "step": 36480 }, { "epoch": 0.1522352312840584, "grad_norm": 0.8559125076950517, "learning_rate": 5.126403530395568e-06, "loss": 0.0487, "step": 36485 }, { "epoch": 0.15225609399904866, "grad_norm": 1.059236549804927, "learning_rate": 5.1260522889832855e-06, "loss": 0.0419, "step": 36490 }, { "epoch": 0.15227695671403893, "grad_norm": 0.6726467070811328, "learning_rate": 5.125701119758234e-06, "loss": 0.0349, "step": 36495 }, { "epoch": 0.15229781942902923, "grad_norm": 3.0500073602837325, "learning_rate": 5.125350022695691e-06, "loss": 0.0421, "step": 36500 }, { "epoch": 0.1523186821440195, "grad_norm": 1.2669763614592655, "learning_rate": 5.124998997770945e-06, "loss": 0.049, "step": 36505 }, { "epoch": 0.15233954485900977, "grad_norm": 1.643651885765016, "learning_rate": 5.124648044959297e-06, "loss": 0.0495, "step": 36510 }, { "epoch": 0.15236040757400005, "grad_norm": 1.0966883112228798, "learning_rate": 5.1242971642360584e-06, "loss": 0.0441, "step": 36515 }, { "epoch": 0.15238127028899032, "grad_norm": 0.7019851492252415, "learning_rate": 5.123946355576556e-06, "loss": 0.0375, "step": 36520 }, { "epoch": 0.15240213300398062, "grad_norm": 0.7812552490861351, "learning_rate": 5.123595618956121e-06, "loss": 0.0358, "step": 36525 }, { "epoch": 0.1524229957189709, "grad_norm": 0.9630708214319585, "learning_rate": 5.123244954350105e-06, "loss": 0.0446, "step": 36530 }, { "epoch": 0.15244385843396116, "grad_norm": 0.783749402653439, "learning_rate": 5.1228943617338675e-06, "loss": 0.0425, "step": 36535 }, { "epoch": 0.15246472114895143, "grad_norm": 0.659128439634796, "learning_rate": 5.122543841082779e-06, "loss": 0.0316, "step": 36540 }, { "epoch": 0.15248558386394173, "grad_norm": 1.1781890909965824, "learning_rate": 5.122193392372222e-06, "loss": 0.0316, "step": 36545 }, { "epoch": 0.152506446578932, "grad_norm": 0.6605786369840614, "learning_rate": 5.121843015577592e-06, "loss": 0.0347, "step": 36550 }, { "epoch": 0.15252730929392228, "grad_norm": 0.9069314103889353, "learning_rate": 5.1214927106742976e-06, "loss": 0.0436, "step": 36555 }, { "epoch": 0.15254817200891255, "grad_norm": 0.997984522795973, "learning_rate": 5.121142477637755e-06, "loss": 0.0419, "step": 36560 }, { "epoch": 0.15256903472390282, "grad_norm": 0.8142666158508847, "learning_rate": 5.120792316443396e-06, "loss": 0.0321, "step": 36565 }, { "epoch": 0.15258989743889312, "grad_norm": 0.9805431702498582, "learning_rate": 5.1204422270666624e-06, "loss": 0.0354, "step": 36570 }, { "epoch": 0.1526107601538834, "grad_norm": 1.205638206067895, "learning_rate": 5.1200922094830065e-06, "loss": 0.0488, "step": 36575 }, { "epoch": 0.15263162286887366, "grad_norm": 0.8537467324870214, "learning_rate": 5.1197422636678965e-06, "loss": 0.0512, "step": 36580 }, { "epoch": 0.15265248558386393, "grad_norm": 3.280419807415457, "learning_rate": 5.1193923895968074e-06, "loss": 0.042, "step": 36585 }, { "epoch": 0.15267334829885423, "grad_norm": 0.9112132489497831, "learning_rate": 5.11904258724523e-06, "loss": 0.0354, "step": 36590 }, { "epoch": 0.1526942110138445, "grad_norm": 0.747726098490795, "learning_rate": 5.118692856588665e-06, "loss": 0.0407, "step": 36595 }, { "epoch": 0.15271507372883478, "grad_norm": 0.8306841432272448, "learning_rate": 5.118343197602623e-06, "loss": 0.0473, "step": 36600 }, { "epoch": 0.15273593644382505, "grad_norm": 0.8055843305795792, "learning_rate": 5.1179936102626305e-06, "loss": 0.0367, "step": 36605 }, { "epoch": 0.15275679915881532, "grad_norm": 0.7597000123830476, "learning_rate": 5.117644094544222e-06, "loss": 0.0485, "step": 36610 }, { "epoch": 0.15277766187380562, "grad_norm": 0.9447710919686932, "learning_rate": 5.117294650422945e-06, "loss": 0.0442, "step": 36615 }, { "epoch": 0.1527985245887959, "grad_norm": 0.6204330611511965, "learning_rate": 5.116945277874359e-06, "loss": 0.0366, "step": 36620 }, { "epoch": 0.15281938730378616, "grad_norm": 1.2439158417380922, "learning_rate": 5.116595976874036e-06, "loss": 0.046, "step": 36625 }, { "epoch": 0.15284025001877644, "grad_norm": 1.2929159176654788, "learning_rate": 5.116246747397557e-06, "loss": 0.0476, "step": 36630 }, { "epoch": 0.15286111273376674, "grad_norm": 0.6520605443792071, "learning_rate": 5.115897589420519e-06, "loss": 0.0503, "step": 36635 }, { "epoch": 0.152881975448757, "grad_norm": 1.1013477663204552, "learning_rate": 5.115548502918524e-06, "loss": 0.0417, "step": 36640 }, { "epoch": 0.15290283816374728, "grad_norm": 0.8637657359642527, "learning_rate": 5.115199487867193e-06, "loss": 0.0406, "step": 36645 }, { "epoch": 0.15292370087873755, "grad_norm": 1.5992248805170883, "learning_rate": 5.1148505442421524e-06, "loss": 0.036, "step": 36650 }, { "epoch": 0.15294456359372782, "grad_norm": 2.0597131121726733, "learning_rate": 5.114501672019046e-06, "loss": 0.0382, "step": 36655 }, { "epoch": 0.15296542630871812, "grad_norm": 0.6027898702713914, "learning_rate": 5.114152871173525e-06, "loss": 0.0353, "step": 36660 }, { "epoch": 0.1529862890237084, "grad_norm": 0.5912278285910126, "learning_rate": 5.113804141681252e-06, "loss": 0.0374, "step": 36665 }, { "epoch": 0.15300715173869867, "grad_norm": 1.0025754568974132, "learning_rate": 5.113455483517906e-06, "loss": 0.0508, "step": 36670 }, { "epoch": 0.15302801445368894, "grad_norm": 0.7664974721645447, "learning_rate": 5.113106896659171e-06, "loss": 0.0378, "step": 36675 }, { "epoch": 0.15304887716867924, "grad_norm": 0.6697248782742552, "learning_rate": 5.112758381080748e-06, "loss": 0.0386, "step": 36680 }, { "epoch": 0.1530697398836695, "grad_norm": 2.107113019307976, "learning_rate": 5.112409936758348e-06, "loss": 0.0548, "step": 36685 }, { "epoch": 0.15309060259865978, "grad_norm": 1.1335936829157836, "learning_rate": 5.112061563667692e-06, "loss": 0.0354, "step": 36690 }, { "epoch": 0.15311146531365005, "grad_norm": 0.6534525329350853, "learning_rate": 5.111713261784512e-06, "loss": 0.039, "step": 36695 }, { "epoch": 0.15313232802864032, "grad_norm": 1.117546325861611, "learning_rate": 5.111365031084557e-06, "loss": 0.0437, "step": 36700 }, { "epoch": 0.15315319074363062, "grad_norm": 1.1288875219891734, "learning_rate": 5.1110168715435814e-06, "loss": 0.0464, "step": 36705 }, { "epoch": 0.1531740534586209, "grad_norm": 0.7009818783460623, "learning_rate": 5.110668783137355e-06, "loss": 0.0401, "step": 36710 }, { "epoch": 0.15319491617361117, "grad_norm": 0.8899908584407102, "learning_rate": 5.110320765841657e-06, "loss": 0.04, "step": 36715 }, { "epoch": 0.15321577888860144, "grad_norm": 1.5855135504283746, "learning_rate": 5.109972819632279e-06, "loss": 0.0447, "step": 36720 }, { "epoch": 0.1532366416035917, "grad_norm": 0.4815770447788003, "learning_rate": 5.109624944485022e-06, "loss": 0.0332, "step": 36725 }, { "epoch": 0.153257504318582, "grad_norm": 0.7753645596463544, "learning_rate": 5.109277140375703e-06, "loss": 0.0303, "step": 36730 }, { "epoch": 0.15327836703357228, "grad_norm": 1.706426829362904, "learning_rate": 5.10892940728015e-06, "loss": 0.0488, "step": 36735 }, { "epoch": 0.15329922974856255, "grad_norm": 1.1584761273502049, "learning_rate": 5.1085817451741956e-06, "loss": 0.0442, "step": 36740 }, { "epoch": 0.15332009246355283, "grad_norm": 0.9785484156218356, "learning_rate": 5.108234154033693e-06, "loss": 0.0359, "step": 36745 }, { "epoch": 0.15334095517854313, "grad_norm": 0.8856417688872541, "learning_rate": 5.107886633834501e-06, "loss": 0.0386, "step": 36750 }, { "epoch": 0.1533618178935334, "grad_norm": 0.8178763920680858, "learning_rate": 5.107539184552492e-06, "loss": 0.0381, "step": 36755 }, { "epoch": 0.15338268060852367, "grad_norm": 0.9477921941988117, "learning_rate": 5.1071918061635495e-06, "loss": 0.0389, "step": 36760 }, { "epoch": 0.15340354332351394, "grad_norm": 0.7725901265308353, "learning_rate": 5.106844498643568e-06, "loss": 0.0565, "step": 36765 }, { "epoch": 0.1534244060385042, "grad_norm": 1.5785463079289868, "learning_rate": 5.106497261968456e-06, "loss": 0.0499, "step": 36770 }, { "epoch": 0.1534452687534945, "grad_norm": 0.9432329406388049, "learning_rate": 5.106150096114131e-06, "loss": 0.0385, "step": 36775 }, { "epoch": 0.15346613146848478, "grad_norm": 1.8358501091376622, "learning_rate": 5.10580300105652e-06, "loss": 0.0555, "step": 36780 }, { "epoch": 0.15348699418347506, "grad_norm": 1.0817070865077107, "learning_rate": 5.105455976771566e-06, "loss": 0.0503, "step": 36785 }, { "epoch": 0.15350785689846533, "grad_norm": 0.9939577669251161, "learning_rate": 5.105109023235222e-06, "loss": 0.0317, "step": 36790 }, { "epoch": 0.15352871961345563, "grad_norm": 0.9172426463574155, "learning_rate": 5.10476214042345e-06, "loss": 0.0537, "step": 36795 }, { "epoch": 0.1535495823284459, "grad_norm": 0.8185293326991043, "learning_rate": 5.104415328312225e-06, "loss": 0.0298, "step": 36800 }, { "epoch": 0.15357044504343617, "grad_norm": 0.8810894090762456, "learning_rate": 5.1040685868775365e-06, "loss": 0.0588, "step": 36805 }, { "epoch": 0.15359130775842644, "grad_norm": 0.8666408968456952, "learning_rate": 5.1037219160953795e-06, "loss": 0.045, "step": 36810 }, { "epoch": 0.15361217047341671, "grad_norm": 1.0314268122488526, "learning_rate": 5.1033753159417664e-06, "loss": 0.0375, "step": 36815 }, { "epoch": 0.15363303318840701, "grad_norm": 1.0198587275586597, "learning_rate": 5.103028786392715e-06, "loss": 0.0368, "step": 36820 }, { "epoch": 0.1536538959033973, "grad_norm": 1.1394728698625443, "learning_rate": 5.102682327424261e-06, "loss": 0.0442, "step": 36825 }, { "epoch": 0.15367475861838756, "grad_norm": 0.8718602318362282, "learning_rate": 5.102335939012445e-06, "loss": 0.0351, "step": 36830 }, { "epoch": 0.15369562133337783, "grad_norm": 0.6565211891487858, "learning_rate": 5.101989621133323e-06, "loss": 0.0353, "step": 36835 }, { "epoch": 0.15371648404836813, "grad_norm": 0.8975274567754845, "learning_rate": 5.101643373762961e-06, "loss": 0.0435, "step": 36840 }, { "epoch": 0.1537373467633584, "grad_norm": 0.7991939828831421, "learning_rate": 5.101297196877439e-06, "loss": 0.0397, "step": 36845 }, { "epoch": 0.15375820947834867, "grad_norm": 0.8234265533617485, "learning_rate": 5.1009510904528435e-06, "loss": 0.0373, "step": 36850 }, { "epoch": 0.15377907219333894, "grad_norm": 0.8024065794369657, "learning_rate": 5.1006050544652776e-06, "loss": 0.0384, "step": 36855 }, { "epoch": 0.15379993490832922, "grad_norm": 0.5250009526550091, "learning_rate": 5.100259088890851e-06, "loss": 0.0357, "step": 36860 }, { "epoch": 0.15382079762331952, "grad_norm": 1.3177579412451403, "learning_rate": 5.099913193705687e-06, "loss": 0.0388, "step": 36865 }, { "epoch": 0.1538416603383098, "grad_norm": 0.9132136842666221, "learning_rate": 5.0995673688859216e-06, "loss": 0.0392, "step": 36870 }, { "epoch": 0.15386252305330006, "grad_norm": 0.6451396581133381, "learning_rate": 5.099221614407699e-06, "loss": 0.0455, "step": 36875 }, { "epoch": 0.15388338576829033, "grad_norm": 1.0824511239160781, "learning_rate": 5.098875930247177e-06, "loss": 0.0479, "step": 36880 }, { "epoch": 0.15390424848328063, "grad_norm": 1.6592744187092445, "learning_rate": 5.098530316380525e-06, "loss": 0.0501, "step": 36885 }, { "epoch": 0.1539251111982709, "grad_norm": 0.6960506041170117, "learning_rate": 5.098184772783923e-06, "loss": 0.032, "step": 36890 }, { "epoch": 0.15394597391326117, "grad_norm": 2.421094966362576, "learning_rate": 5.09783929943356e-06, "loss": 0.0374, "step": 36895 }, { "epoch": 0.15396683662825145, "grad_norm": 1.0618639638760154, "learning_rate": 5.09749389630564e-06, "loss": 0.0418, "step": 36900 }, { "epoch": 0.15398769934324172, "grad_norm": 0.8143467604314402, "learning_rate": 5.0971485633763765e-06, "loss": 0.0407, "step": 36905 }, { "epoch": 0.15400856205823202, "grad_norm": 2.1850197842511285, "learning_rate": 5.096803300621994e-06, "loss": 0.0499, "step": 36910 }, { "epoch": 0.1540294247732223, "grad_norm": 1.300253590766443, "learning_rate": 5.096458108018729e-06, "loss": 0.0417, "step": 36915 }, { "epoch": 0.15405028748821256, "grad_norm": 1.3986589871514363, "learning_rate": 5.096112985542828e-06, "loss": 0.0454, "step": 36920 }, { "epoch": 0.15407115020320283, "grad_norm": 1.1233978209213078, "learning_rate": 5.095767933170553e-06, "loss": 0.0347, "step": 36925 }, { "epoch": 0.15409201291819313, "grad_norm": 0.9465154981886185, "learning_rate": 5.095422950878171e-06, "loss": 0.048, "step": 36930 }, { "epoch": 0.1541128756331834, "grad_norm": 0.7388579855214732, "learning_rate": 5.095078038641963e-06, "loss": 0.0416, "step": 36935 }, { "epoch": 0.15413373834817368, "grad_norm": 1.3860183049565673, "learning_rate": 5.094733196438223e-06, "loss": 0.0333, "step": 36940 }, { "epoch": 0.15415460106316395, "grad_norm": 1.214768924549379, "learning_rate": 5.094388424243254e-06, "loss": 0.0539, "step": 36945 }, { "epoch": 0.15417546377815422, "grad_norm": 0.4111826423440323, "learning_rate": 5.094043722033371e-06, "loss": 0.0295, "step": 36950 }, { "epoch": 0.15419632649314452, "grad_norm": 1.0921039011771687, "learning_rate": 5.093699089784902e-06, "loss": 0.039, "step": 36955 }, { "epoch": 0.1542171892081348, "grad_norm": 0.9301389352339039, "learning_rate": 5.093354527474181e-06, "loss": 0.0433, "step": 36960 }, { "epoch": 0.15423805192312506, "grad_norm": 1.0376549963188961, "learning_rate": 5.093010035077559e-06, "loss": 0.049, "step": 36965 }, { "epoch": 0.15425891463811534, "grad_norm": 1.1371499359918884, "learning_rate": 5.092665612571395e-06, "loss": 0.0372, "step": 36970 }, { "epoch": 0.15427977735310563, "grad_norm": 0.9687675324472846, "learning_rate": 5.0923212599320595e-06, "loss": 0.0448, "step": 36975 }, { "epoch": 0.1543006400680959, "grad_norm": 1.1659597521978498, "learning_rate": 5.091976977135935e-06, "loss": 0.05, "step": 36980 }, { "epoch": 0.15432150278308618, "grad_norm": 0.8435501414858633, "learning_rate": 5.091632764159416e-06, "loss": 0.0421, "step": 36985 }, { "epoch": 0.15434236549807645, "grad_norm": 1.2299188128423584, "learning_rate": 5.091288620978905e-06, "loss": 0.0466, "step": 36990 }, { "epoch": 0.15436322821306672, "grad_norm": 1.4174696927666544, "learning_rate": 5.090944547570819e-06, "loss": 0.0478, "step": 36995 }, { "epoch": 0.15438409092805702, "grad_norm": 1.7050465557723768, "learning_rate": 5.090600543911584e-06, "loss": 0.0439, "step": 37000 }, { "epoch": 0.1544049536430473, "grad_norm": 0.9960198769270353, "learning_rate": 5.0902566099776385e-06, "loss": 0.0329, "step": 37005 }, { "epoch": 0.15442581635803757, "grad_norm": 0.8464191028363912, "learning_rate": 5.0899127457454315e-06, "loss": 0.034, "step": 37010 }, { "epoch": 0.15444667907302784, "grad_norm": 0.9022566566264663, "learning_rate": 5.089568951191423e-06, "loss": 0.0483, "step": 37015 }, { "epoch": 0.15446754178801814, "grad_norm": 0.6163425936498456, "learning_rate": 5.089225226292085e-06, "loss": 0.0511, "step": 37020 }, { "epoch": 0.1544884045030084, "grad_norm": 1.10947562895338, "learning_rate": 5.088881571023898e-06, "loss": 0.0406, "step": 37025 }, { "epoch": 0.15450926721799868, "grad_norm": 1.5090178428987917, "learning_rate": 5.088537985363359e-06, "loss": 0.0339, "step": 37030 }, { "epoch": 0.15453012993298895, "grad_norm": 0.8530636941120983, "learning_rate": 5.088194469286969e-06, "loss": 0.0314, "step": 37035 }, { "epoch": 0.15455099264797922, "grad_norm": 2.966617751437831, "learning_rate": 5.087851022771246e-06, "loss": 0.0455, "step": 37040 }, { "epoch": 0.15457185536296952, "grad_norm": 0.7771075623821816, "learning_rate": 5.0875076457927175e-06, "loss": 0.0321, "step": 37045 }, { "epoch": 0.1545927180779598, "grad_norm": 0.8954034972878575, "learning_rate": 5.087164338327919e-06, "loss": 0.0346, "step": 37050 }, { "epoch": 0.15461358079295007, "grad_norm": 1.2346394908528, "learning_rate": 5.0868211003534025e-06, "loss": 0.0368, "step": 37055 }, { "epoch": 0.15463444350794034, "grad_norm": 1.449114111398911, "learning_rate": 5.086477931845726e-06, "loss": 0.0341, "step": 37060 }, { "epoch": 0.15465530622293064, "grad_norm": 0.838244028015444, "learning_rate": 5.086134832781461e-06, "loss": 0.032, "step": 37065 }, { "epoch": 0.1546761689379209, "grad_norm": 0.40903273613463, "learning_rate": 5.08579180313719e-06, "loss": 0.0445, "step": 37070 }, { "epoch": 0.15469703165291118, "grad_norm": 1.022430627708385, "learning_rate": 5.085448842889507e-06, "loss": 0.0419, "step": 37075 }, { "epoch": 0.15471789436790145, "grad_norm": 0.9119217653004668, "learning_rate": 5.085105952015016e-06, "loss": 0.0569, "step": 37080 }, { "epoch": 0.15473875708289173, "grad_norm": 0.7527895670866209, "learning_rate": 5.084763130490331e-06, "loss": 0.038, "step": 37085 }, { "epoch": 0.15475961979788203, "grad_norm": 0.6844253542496618, "learning_rate": 5.084420378292081e-06, "loss": 0.0321, "step": 37090 }, { "epoch": 0.1547804825128723, "grad_norm": 1.1664212046309452, "learning_rate": 5.084077695396902e-06, "loss": 0.0471, "step": 37095 }, { "epoch": 0.15480134522786257, "grad_norm": 0.8727253934083578, "learning_rate": 5.083735081781441e-06, "loss": 0.0326, "step": 37100 }, { "epoch": 0.15482220794285284, "grad_norm": 1.3613475055023814, "learning_rate": 5.083392537422362e-06, "loss": 0.045, "step": 37105 }, { "epoch": 0.15484307065784314, "grad_norm": 0.7804767300396513, "learning_rate": 5.083050062296329e-06, "loss": 0.039, "step": 37110 }, { "epoch": 0.1548639333728334, "grad_norm": 1.020873645995451, "learning_rate": 5.08270765638003e-06, "loss": 0.025, "step": 37115 }, { "epoch": 0.15488479608782368, "grad_norm": 1.1501043603210392, "learning_rate": 5.0823653196501544e-06, "loss": 0.0611, "step": 37120 }, { "epoch": 0.15490565880281396, "grad_norm": 1.6587977532793188, "learning_rate": 5.082023052083405e-06, "loss": 0.0363, "step": 37125 }, { "epoch": 0.15492652151780423, "grad_norm": 0.8470235168546367, "learning_rate": 5.081680853656498e-06, "loss": 0.0443, "step": 37130 }, { "epoch": 0.15494738423279453, "grad_norm": 0.7624187992754181, "learning_rate": 5.081338724346157e-06, "loss": 0.0398, "step": 37135 }, { "epoch": 0.1549682469477848, "grad_norm": 0.8450860890556223, "learning_rate": 5.080996664129121e-06, "loss": 0.0397, "step": 37140 }, { "epoch": 0.15498910966277507, "grad_norm": 0.9099959156873755, "learning_rate": 5.080654672982135e-06, "loss": 0.0404, "step": 37145 }, { "epoch": 0.15500997237776534, "grad_norm": 1.0857191760744636, "learning_rate": 5.080312750881958e-06, "loss": 0.0489, "step": 37150 }, { "epoch": 0.15503083509275564, "grad_norm": 0.6053971027941223, "learning_rate": 5.07997089780536e-06, "loss": 0.0363, "step": 37155 }, { "epoch": 0.15505169780774591, "grad_norm": 0.7191720240868461, "learning_rate": 5.07962911372912e-06, "loss": 0.0339, "step": 37160 }, { "epoch": 0.15507256052273619, "grad_norm": 0.8273139780547377, "learning_rate": 5.079287398630031e-06, "loss": 0.0312, "step": 37165 }, { "epoch": 0.15509342323772646, "grad_norm": 0.8185400757325852, "learning_rate": 5.078945752484893e-06, "loss": 0.0313, "step": 37170 }, { "epoch": 0.15511428595271673, "grad_norm": 0.9709677034160046, "learning_rate": 5.078604175270521e-06, "loss": 0.0316, "step": 37175 }, { "epoch": 0.15513514866770703, "grad_norm": 0.32986319413493337, "learning_rate": 5.078262666963737e-06, "loss": 0.0336, "step": 37180 }, { "epoch": 0.1551560113826973, "grad_norm": 1.2295814601280877, "learning_rate": 5.077921227541377e-06, "loss": 0.0363, "step": 37185 }, { "epoch": 0.15517687409768757, "grad_norm": 0.9801469907870158, "learning_rate": 5.077579856980286e-06, "loss": 0.0418, "step": 37190 }, { "epoch": 0.15519773681267784, "grad_norm": 1.394097782650023, "learning_rate": 5.077238555257322e-06, "loss": 0.042, "step": 37195 }, { "epoch": 0.15521859952766814, "grad_norm": 1.0708483037211642, "learning_rate": 5.07689732234935e-06, "loss": 0.0345, "step": 37200 }, { "epoch": 0.15523946224265842, "grad_norm": 0.4627681439709754, "learning_rate": 5.076556158233252e-06, "loss": 0.0416, "step": 37205 }, { "epoch": 0.1552603249576487, "grad_norm": 1.185257203666419, "learning_rate": 5.076215062885914e-06, "loss": 0.046, "step": 37210 }, { "epoch": 0.15528118767263896, "grad_norm": 0.7310155367977104, "learning_rate": 5.075874036284237e-06, "loss": 0.0435, "step": 37215 }, { "epoch": 0.15530205038762923, "grad_norm": 1.1226318625268863, "learning_rate": 5.075533078405132e-06, "loss": 0.0411, "step": 37220 }, { "epoch": 0.15532291310261953, "grad_norm": 1.0423886982491488, "learning_rate": 5.075192189225523e-06, "loss": 0.0462, "step": 37225 }, { "epoch": 0.1553437758176098, "grad_norm": 0.8460498476875769, "learning_rate": 5.074851368722341e-06, "loss": 0.0458, "step": 37230 }, { "epoch": 0.15536463853260007, "grad_norm": 0.7486961842755824, "learning_rate": 5.074510616872528e-06, "loss": 0.0389, "step": 37235 }, { "epoch": 0.15538550124759035, "grad_norm": 0.5674961592861533, "learning_rate": 5.07416993365304e-06, "loss": 0.0462, "step": 37240 }, { "epoch": 0.15540636396258065, "grad_norm": 0.7532285434426034, "learning_rate": 5.073829319040843e-06, "loss": 0.0461, "step": 37245 }, { "epoch": 0.15542722667757092, "grad_norm": 0.9398786298142435, "learning_rate": 5.073488773012911e-06, "loss": 0.0396, "step": 37250 }, { "epoch": 0.1554480893925612, "grad_norm": 0.6454912470721645, "learning_rate": 5.073148295546233e-06, "loss": 0.0299, "step": 37255 }, { "epoch": 0.15546895210755146, "grad_norm": 0.6934241769629337, "learning_rate": 5.072807886617805e-06, "loss": 0.0347, "step": 37260 }, { "epoch": 0.15548981482254173, "grad_norm": 1.03208141668992, "learning_rate": 5.0724675462046355e-06, "loss": 0.0391, "step": 37265 }, { "epoch": 0.15551067753753203, "grad_norm": 0.7752767787321543, "learning_rate": 5.072127274283745e-06, "loss": 0.0413, "step": 37270 }, { "epoch": 0.1555315402525223, "grad_norm": 0.7945594212456918, "learning_rate": 5.071787070832162e-06, "loss": 0.0449, "step": 37275 }, { "epoch": 0.15555240296751258, "grad_norm": 0.8919476022765294, "learning_rate": 5.071446935826928e-06, "loss": 0.0364, "step": 37280 }, { "epoch": 0.15557326568250285, "grad_norm": 1.0119635507251357, "learning_rate": 5.071106869245096e-06, "loss": 0.0396, "step": 37285 }, { "epoch": 0.15559412839749315, "grad_norm": 1.2775778296851181, "learning_rate": 5.070766871063727e-06, "loss": 0.0438, "step": 37290 }, { "epoch": 0.15561499111248342, "grad_norm": 1.3200702510975524, "learning_rate": 5.0704269412598926e-06, "loss": 0.0487, "step": 37295 }, { "epoch": 0.1556358538274737, "grad_norm": 0.9497383561026745, "learning_rate": 5.07008707981068e-06, "loss": 0.0393, "step": 37300 }, { "epoch": 0.15565671654246396, "grad_norm": 0.8516691305614705, "learning_rate": 5.069747286693182e-06, "loss": 0.0304, "step": 37305 }, { "epoch": 0.15567757925745423, "grad_norm": 1.1193048097285399, "learning_rate": 5.069407561884504e-06, "loss": 0.0474, "step": 37310 }, { "epoch": 0.15569844197244453, "grad_norm": 0.715933856647941, "learning_rate": 5.069067905361762e-06, "loss": 0.0355, "step": 37315 }, { "epoch": 0.1557193046874348, "grad_norm": 1.5348938980200812, "learning_rate": 5.068728317102083e-06, "loss": 0.0444, "step": 37320 }, { "epoch": 0.15574016740242508, "grad_norm": 0.8560858205834914, "learning_rate": 5.068388797082605e-06, "loss": 0.0458, "step": 37325 }, { "epoch": 0.15576103011741535, "grad_norm": 0.9671538941716237, "learning_rate": 5.0680493452804765e-06, "loss": 0.036, "step": 37330 }, { "epoch": 0.15578189283240565, "grad_norm": 1.142189996709564, "learning_rate": 5.067709961672856e-06, "loss": 0.0377, "step": 37335 }, { "epoch": 0.15580275554739592, "grad_norm": 1.0317389907584897, "learning_rate": 5.067370646236912e-06, "loss": 0.0383, "step": 37340 }, { "epoch": 0.1558236182623862, "grad_norm": 1.3006641379248765, "learning_rate": 5.067031398949828e-06, "loss": 0.049, "step": 37345 }, { "epoch": 0.15584448097737646, "grad_norm": 1.180141332528247, "learning_rate": 5.066692219788793e-06, "loss": 0.0521, "step": 37350 }, { "epoch": 0.15586534369236674, "grad_norm": 0.703770534333396, "learning_rate": 5.066353108731008e-06, "loss": 0.0401, "step": 37355 }, { "epoch": 0.15588620640735704, "grad_norm": 1.3196404760110783, "learning_rate": 5.066014065753687e-06, "loss": 0.0401, "step": 37360 }, { "epoch": 0.1559070691223473, "grad_norm": 0.9683508849189701, "learning_rate": 5.065675090834053e-06, "loss": 0.0361, "step": 37365 }, { "epoch": 0.15592793183733758, "grad_norm": 1.1897640756652814, "learning_rate": 5.0653361839493385e-06, "loss": 0.0458, "step": 37370 }, { "epoch": 0.15594879455232785, "grad_norm": 1.0454193931791114, "learning_rate": 5.064997345076791e-06, "loss": 0.0437, "step": 37375 }, { "epoch": 0.15596965726731815, "grad_norm": 0.8134776590380358, "learning_rate": 5.064658574193661e-06, "loss": 0.0401, "step": 37380 }, { "epoch": 0.15599051998230842, "grad_norm": 0.6696664116467449, "learning_rate": 5.06431987127722e-06, "loss": 0.0328, "step": 37385 }, { "epoch": 0.1560113826972987, "grad_norm": 0.9856802168624491, "learning_rate": 5.063981236304739e-06, "loss": 0.0376, "step": 37390 }, { "epoch": 0.15603224541228897, "grad_norm": 1.5630302170443646, "learning_rate": 5.063642669253509e-06, "loss": 0.0479, "step": 37395 }, { "epoch": 0.15605310812727924, "grad_norm": 1.2883669384545309, "learning_rate": 5.063304170100825e-06, "loss": 0.0397, "step": 37400 }, { "epoch": 0.15607397084226954, "grad_norm": 0.6280574309827932, "learning_rate": 5.062965738823996e-06, "loss": 0.0326, "step": 37405 }, { "epoch": 0.1560948335572598, "grad_norm": 0.9894531541186056, "learning_rate": 5.062627375400342e-06, "loss": 0.0312, "step": 37410 }, { "epoch": 0.15611569627225008, "grad_norm": 1.453763885090701, "learning_rate": 5.0622890798071924e-06, "loss": 0.0489, "step": 37415 }, { "epoch": 0.15613655898724035, "grad_norm": 0.9342874723380133, "learning_rate": 5.0619508520218865e-06, "loss": 0.0359, "step": 37420 }, { "epoch": 0.15615742170223065, "grad_norm": 0.5582479191327681, "learning_rate": 5.0616126920217745e-06, "loss": 0.0342, "step": 37425 }, { "epoch": 0.15617828441722093, "grad_norm": 1.2144405583702693, "learning_rate": 5.0612745997842195e-06, "loss": 0.05, "step": 37430 }, { "epoch": 0.1561991471322112, "grad_norm": 1.2241381630571835, "learning_rate": 5.060936575286591e-06, "loss": 0.0382, "step": 37435 }, { "epoch": 0.15622000984720147, "grad_norm": 0.5793852670024924, "learning_rate": 5.060598618506274e-06, "loss": 0.0413, "step": 37440 }, { "epoch": 0.15624087256219174, "grad_norm": 0.5194945138111945, "learning_rate": 5.060260729420661e-06, "loss": 0.0341, "step": 37445 }, { "epoch": 0.15626173527718204, "grad_norm": 0.9620655691384555, "learning_rate": 5.059922908007153e-06, "loss": 0.0358, "step": 37450 }, { "epoch": 0.1562825979921723, "grad_norm": 1.0094136456187215, "learning_rate": 5.059585154243169e-06, "loss": 0.0441, "step": 37455 }, { "epoch": 0.15630346070716258, "grad_norm": 0.9907516811362705, "learning_rate": 5.059247468106129e-06, "loss": 0.0327, "step": 37460 }, { "epoch": 0.15632432342215286, "grad_norm": 1.24658495356778, "learning_rate": 5.05890984957347e-06, "loss": 0.0325, "step": 37465 }, { "epoch": 0.15634518613714316, "grad_norm": 1.2816680798417188, "learning_rate": 5.058572298622638e-06, "loss": 0.0389, "step": 37470 }, { "epoch": 0.15636604885213343, "grad_norm": 1.7596978532322178, "learning_rate": 5.058234815231091e-06, "loss": 0.0597, "step": 37475 }, { "epoch": 0.1563869115671237, "grad_norm": 0.7997545196000009, "learning_rate": 5.0578973993762926e-06, "loss": 0.0365, "step": 37480 }, { "epoch": 0.15640777428211397, "grad_norm": 1.0775119289408857, "learning_rate": 5.0575600510357225e-06, "loss": 0.0387, "step": 37485 }, { "epoch": 0.15642863699710424, "grad_norm": 0.7414661860043947, "learning_rate": 5.057222770186868e-06, "loss": 0.0337, "step": 37490 }, { "epoch": 0.15644949971209454, "grad_norm": 0.9033385748694365, "learning_rate": 5.056885556807227e-06, "loss": 0.042, "step": 37495 }, { "epoch": 0.1564703624270848, "grad_norm": 0.9677262166086879, "learning_rate": 5.05654841087431e-06, "loss": 0.0318, "step": 37500 }, { "epoch": 0.15649122514207509, "grad_norm": 0.6152097817901792, "learning_rate": 5.0562113323656345e-06, "loss": 0.0377, "step": 37505 }, { "epoch": 0.15651208785706536, "grad_norm": 0.9865158822693517, "learning_rate": 5.055874321258731e-06, "loss": 0.0432, "step": 37510 }, { "epoch": 0.15653295057205566, "grad_norm": 1.609026635810884, "learning_rate": 5.055537377531141e-06, "loss": 0.0393, "step": 37515 }, { "epoch": 0.15655381328704593, "grad_norm": 1.1178677877239531, "learning_rate": 5.055200501160413e-06, "loss": 0.0416, "step": 37520 }, { "epoch": 0.1565746760020362, "grad_norm": 0.8770955856560932, "learning_rate": 5.05486369212411e-06, "loss": 0.0443, "step": 37525 }, { "epoch": 0.15659553871702647, "grad_norm": 0.8936947457999623, "learning_rate": 5.054526950399804e-06, "loss": 0.0451, "step": 37530 }, { "epoch": 0.15661640143201674, "grad_norm": 0.9558697505769905, "learning_rate": 5.0541902759650765e-06, "loss": 0.044, "step": 37535 }, { "epoch": 0.15663726414700704, "grad_norm": 1.048570540010706, "learning_rate": 5.0538536687975206e-06, "loss": 0.0481, "step": 37540 }, { "epoch": 0.15665812686199732, "grad_norm": 0.6390064962572589, "learning_rate": 5.05351712887474e-06, "loss": 0.0339, "step": 37545 }, { "epoch": 0.1566789895769876, "grad_norm": 0.8890233044405923, "learning_rate": 5.053180656174346e-06, "loss": 0.0332, "step": 37550 }, { "epoch": 0.15669985229197786, "grad_norm": 0.862035515125423, "learning_rate": 5.052844250673965e-06, "loss": 0.0498, "step": 37555 }, { "epoch": 0.15672071500696816, "grad_norm": 1.5577232873915206, "learning_rate": 5.052507912351231e-06, "loss": 0.033, "step": 37560 }, { "epoch": 0.15674157772195843, "grad_norm": 0.8880417238552627, "learning_rate": 5.052171641183787e-06, "loss": 0.0415, "step": 37565 }, { "epoch": 0.1567624404369487, "grad_norm": 0.7144793487640031, "learning_rate": 5.0518354371492914e-06, "loss": 0.041, "step": 37570 }, { "epoch": 0.15678330315193897, "grad_norm": 1.2752312235552397, "learning_rate": 5.051499300225408e-06, "loss": 0.0515, "step": 37575 }, { "epoch": 0.15680416586692925, "grad_norm": 0.7411424923873372, "learning_rate": 5.051163230389813e-06, "loss": 0.0375, "step": 37580 }, { "epoch": 0.15682502858191955, "grad_norm": 1.0238494078432376, "learning_rate": 5.050827227620193e-06, "loss": 0.0418, "step": 37585 }, { "epoch": 0.15684589129690982, "grad_norm": 1.2166494404312063, "learning_rate": 5.050491291894244e-06, "loss": 0.0387, "step": 37590 }, { "epoch": 0.1568667540119001, "grad_norm": 0.678906206494148, "learning_rate": 5.0501554231896745e-06, "loss": 0.0416, "step": 37595 }, { "epoch": 0.15688761672689036, "grad_norm": 0.5965298009565961, "learning_rate": 5.049819621484202e-06, "loss": 0.0361, "step": 37600 }, { "epoch": 0.15690847944188066, "grad_norm": 0.6853355238510278, "learning_rate": 5.049483886755555e-06, "loss": 0.0299, "step": 37605 }, { "epoch": 0.15692934215687093, "grad_norm": 0.9836858228287177, "learning_rate": 5.04914821898147e-06, "loss": 0.0399, "step": 37610 }, { "epoch": 0.1569502048718612, "grad_norm": 0.9262242432246638, "learning_rate": 5.0488126181396966e-06, "loss": 0.0407, "step": 37615 }, { "epoch": 0.15697106758685148, "grad_norm": 1.2551181096217587, "learning_rate": 5.048477084207994e-06, "loss": 0.0421, "step": 37620 }, { "epoch": 0.15699193030184175, "grad_norm": 3.2535353550175743, "learning_rate": 5.048141617164133e-06, "loss": 0.0384, "step": 37625 }, { "epoch": 0.15701279301683205, "grad_norm": 1.8463595869616045, "learning_rate": 5.047806216985891e-06, "loss": 0.0519, "step": 37630 }, { "epoch": 0.15703365573182232, "grad_norm": 0.9947768020888472, "learning_rate": 5.047470883651059e-06, "loss": 0.0471, "step": 37635 }, { "epoch": 0.1570545184468126, "grad_norm": 1.0092511008007932, "learning_rate": 5.047135617137439e-06, "loss": 0.0411, "step": 37640 }, { "epoch": 0.15707538116180286, "grad_norm": 0.894561960921546, "learning_rate": 5.046800417422838e-06, "loss": 0.0344, "step": 37645 }, { "epoch": 0.15709624387679316, "grad_norm": 1.4578283050701157, "learning_rate": 5.046465284485081e-06, "loss": 0.0502, "step": 37650 }, { "epoch": 0.15711710659178343, "grad_norm": 0.9666726805859015, "learning_rate": 5.046130218301997e-06, "loss": 0.0314, "step": 37655 }, { "epoch": 0.1571379693067737, "grad_norm": 1.4093095794175436, "learning_rate": 5.0457952188514285e-06, "loss": 0.0352, "step": 37660 }, { "epoch": 0.15715883202176398, "grad_norm": 1.4354587911397323, "learning_rate": 5.045460286111227e-06, "loss": 0.0421, "step": 37665 }, { "epoch": 0.15717969473675425, "grad_norm": 0.8812674911004319, "learning_rate": 5.045125420059255e-06, "loss": 0.0276, "step": 37670 }, { "epoch": 0.15720055745174455, "grad_norm": 0.8122279157857056, "learning_rate": 5.044790620673384e-06, "loss": 0.0419, "step": 37675 }, { "epoch": 0.15722142016673482, "grad_norm": 0.7047470815109345, "learning_rate": 5.044455887931499e-06, "loss": 0.0381, "step": 37680 }, { "epoch": 0.1572422828817251, "grad_norm": 0.9261841393687806, "learning_rate": 5.044121221811492e-06, "loss": 0.0375, "step": 37685 }, { "epoch": 0.15726314559671536, "grad_norm": 0.7129712870797236, "learning_rate": 5.043786622291266e-06, "loss": 0.0444, "step": 37690 }, { "epoch": 0.15728400831170566, "grad_norm": 0.804103910616178, "learning_rate": 5.043452089348735e-06, "loss": 0.0319, "step": 37695 }, { "epoch": 0.15730487102669594, "grad_norm": 0.6992089331278644, "learning_rate": 5.043117622961822e-06, "loss": 0.0317, "step": 37700 }, { "epoch": 0.1573257337416862, "grad_norm": 0.8333600861831423, "learning_rate": 5.042783223108462e-06, "loss": 0.0355, "step": 37705 }, { "epoch": 0.15734659645667648, "grad_norm": 2.086310363913551, "learning_rate": 5.042448889766599e-06, "loss": 0.0537, "step": 37710 }, { "epoch": 0.15736745917166675, "grad_norm": 0.959302028283694, "learning_rate": 5.0421146229141885e-06, "loss": 0.0338, "step": 37715 }, { "epoch": 0.15738832188665705, "grad_norm": 0.719563853295021, "learning_rate": 5.041780422529193e-06, "loss": 0.0318, "step": 37720 }, { "epoch": 0.15740918460164732, "grad_norm": 0.8987492951360186, "learning_rate": 5.041446288589591e-06, "loss": 0.0447, "step": 37725 }, { "epoch": 0.1574300473166376, "grad_norm": 0.867626480391632, "learning_rate": 5.041112221073364e-06, "loss": 0.0301, "step": 37730 }, { "epoch": 0.15745091003162787, "grad_norm": 1.2291565232449861, "learning_rate": 5.04077821995851e-06, "loss": 0.0432, "step": 37735 }, { "epoch": 0.15747177274661817, "grad_norm": 0.6125449225284202, "learning_rate": 5.040444285223034e-06, "loss": 0.0346, "step": 37740 }, { "epoch": 0.15749263546160844, "grad_norm": 1.3034925940540323, "learning_rate": 5.04011041684495e-06, "loss": 0.0411, "step": 37745 }, { "epoch": 0.1575134981765987, "grad_norm": 1.4660338811532019, "learning_rate": 5.039776614802287e-06, "loss": 0.0513, "step": 37750 }, { "epoch": 0.15753436089158898, "grad_norm": 1.5461413848138263, "learning_rate": 5.039442879073081e-06, "loss": 0.0474, "step": 37755 }, { "epoch": 0.15755522360657925, "grad_norm": 0.8509466295493904, "learning_rate": 5.039109209635377e-06, "loss": 0.0323, "step": 37760 }, { "epoch": 0.15757608632156955, "grad_norm": 1.1416588628484217, "learning_rate": 5.03877560646723e-06, "loss": 0.0544, "step": 37765 }, { "epoch": 0.15759694903655982, "grad_norm": 0.6462859752843105, "learning_rate": 5.038442069546711e-06, "loss": 0.0365, "step": 37770 }, { "epoch": 0.1576178117515501, "grad_norm": 0.8265044647166546, "learning_rate": 5.038108598851893e-06, "loss": 0.0366, "step": 37775 }, { "epoch": 0.15763867446654037, "grad_norm": 1.0906049592842362, "learning_rate": 5.0377751943608656e-06, "loss": 0.0429, "step": 37780 }, { "epoch": 0.15765953718153067, "grad_norm": 1.0173768817184297, "learning_rate": 5.0374418560517245e-06, "loss": 0.0378, "step": 37785 }, { "epoch": 0.15768039989652094, "grad_norm": 1.222954646919942, "learning_rate": 5.037108583902578e-06, "loss": 0.0468, "step": 37790 }, { "epoch": 0.1577012626115112, "grad_norm": 4.734572855547699, "learning_rate": 5.036775377891544e-06, "loss": 0.0428, "step": 37795 }, { "epoch": 0.15772212532650148, "grad_norm": 0.9864544207535422, "learning_rate": 5.036442237996748e-06, "loss": 0.0402, "step": 37800 }, { "epoch": 0.15774298804149176, "grad_norm": 0.7174102836628481, "learning_rate": 5.03610916419633e-06, "loss": 0.0323, "step": 37805 }, { "epoch": 0.15776385075648205, "grad_norm": 0.6270517809839669, "learning_rate": 5.035776156468437e-06, "loss": 0.0275, "step": 37810 }, { "epoch": 0.15778471347147233, "grad_norm": 1.4692198176472806, "learning_rate": 5.0354432147912266e-06, "loss": 0.0388, "step": 37815 }, { "epoch": 0.1578055761864626, "grad_norm": 1.8941602298937774, "learning_rate": 5.035110339142868e-06, "loss": 0.0462, "step": 37820 }, { "epoch": 0.15782643890145287, "grad_norm": 1.1235434966148243, "learning_rate": 5.034777529501538e-06, "loss": 0.0381, "step": 37825 }, { "epoch": 0.15784730161644317, "grad_norm": 1.4842582492804144, "learning_rate": 5.034444785845426e-06, "loss": 0.0346, "step": 37830 }, { "epoch": 0.15786816433143344, "grad_norm": 1.0804644806449484, "learning_rate": 5.03411210815273e-06, "loss": 0.0405, "step": 37835 }, { "epoch": 0.1578890270464237, "grad_norm": 1.0028039740560768, "learning_rate": 5.033779496401657e-06, "loss": 0.0417, "step": 37840 }, { "epoch": 0.15790988976141399, "grad_norm": 0.9532077766384327, "learning_rate": 5.033446950570429e-06, "loss": 0.0571, "step": 37845 }, { "epoch": 0.15793075247640426, "grad_norm": 0.8892363650661033, "learning_rate": 5.0331144706372715e-06, "loss": 0.0441, "step": 37850 }, { "epoch": 0.15795161519139456, "grad_norm": 0.535553014167085, "learning_rate": 5.032782056580424e-06, "loss": 0.0339, "step": 37855 }, { "epoch": 0.15797247790638483, "grad_norm": 0.9380005212282647, "learning_rate": 5.0324497083781365e-06, "loss": 0.0369, "step": 37860 }, { "epoch": 0.1579933406213751, "grad_norm": 0.6826939238462931, "learning_rate": 5.032117426008666e-06, "loss": 0.0365, "step": 37865 }, { "epoch": 0.15801420333636537, "grad_norm": 0.9601104280635355, "learning_rate": 5.0317852094502826e-06, "loss": 0.0344, "step": 37870 }, { "epoch": 0.15803506605135567, "grad_norm": 1.7235921462324126, "learning_rate": 5.031453058681265e-06, "loss": 0.0427, "step": 37875 }, { "epoch": 0.15805592876634594, "grad_norm": 0.861383663678039, "learning_rate": 5.0311209736799e-06, "loss": 0.0417, "step": 37880 }, { "epoch": 0.15807679148133622, "grad_norm": 1.5383065953734545, "learning_rate": 5.03078895442449e-06, "loss": 0.0594, "step": 37885 }, { "epoch": 0.1580976541963265, "grad_norm": 1.0261003919750764, "learning_rate": 5.030457000893343e-06, "loss": 0.0378, "step": 37890 }, { "epoch": 0.15811851691131676, "grad_norm": 0.47167377766808627, "learning_rate": 5.0301251130647754e-06, "loss": 0.0331, "step": 37895 }, { "epoch": 0.15813937962630706, "grad_norm": 0.7193368418518056, "learning_rate": 5.029793290917119e-06, "loss": 0.0387, "step": 37900 }, { "epoch": 0.15816024234129733, "grad_norm": 0.797258659758996, "learning_rate": 5.029461534428713e-06, "loss": 0.0417, "step": 37905 }, { "epoch": 0.1581811050562876, "grad_norm": 1.1046372088975134, "learning_rate": 5.029129843577905e-06, "loss": 0.0353, "step": 37910 }, { "epoch": 0.15820196777127787, "grad_norm": 0.9975641417465186, "learning_rate": 5.028798218343052e-06, "loss": 0.0575, "step": 37915 }, { "epoch": 0.15822283048626817, "grad_norm": 0.8456892259256451, "learning_rate": 5.028466658702528e-06, "loss": 0.0316, "step": 37920 }, { "epoch": 0.15824369320125845, "grad_norm": 1.307584547172949, "learning_rate": 5.0281351646347075e-06, "loss": 0.0456, "step": 37925 }, { "epoch": 0.15826455591624872, "grad_norm": 0.9643719018951052, "learning_rate": 5.027803736117982e-06, "loss": 0.039, "step": 37930 }, { "epoch": 0.158285418631239, "grad_norm": 0.9068261734611008, "learning_rate": 5.02747237313075e-06, "loss": 0.0439, "step": 37935 }, { "epoch": 0.15830628134622926, "grad_norm": 0.5365905391637908, "learning_rate": 5.02714107565142e-06, "loss": 0.0394, "step": 37940 }, { "epoch": 0.15832714406121956, "grad_norm": 0.9578010426888947, "learning_rate": 5.026809843658409e-06, "loss": 0.0374, "step": 37945 }, { "epoch": 0.15834800677620983, "grad_norm": 0.8826092051903383, "learning_rate": 5.026478677130149e-06, "loss": 0.0392, "step": 37950 }, { "epoch": 0.1583688694912001, "grad_norm": 0.5501114993856173, "learning_rate": 5.026147576045077e-06, "loss": 0.0314, "step": 37955 }, { "epoch": 0.15838973220619038, "grad_norm": 1.0601132423031394, "learning_rate": 5.025816540381643e-06, "loss": 0.0337, "step": 37960 }, { "epoch": 0.15841059492118068, "grad_norm": 0.7280211723511373, "learning_rate": 5.025485570118304e-06, "loss": 0.0362, "step": 37965 }, { "epoch": 0.15843145763617095, "grad_norm": 0.5916573412647623, "learning_rate": 5.025154665233528e-06, "loss": 0.0348, "step": 37970 }, { "epoch": 0.15845232035116122, "grad_norm": 1.452526387632592, "learning_rate": 5.024823825705795e-06, "loss": 0.0351, "step": 37975 }, { "epoch": 0.1584731830661515, "grad_norm": 1.1427444071985176, "learning_rate": 5.024493051513594e-06, "loss": 0.0456, "step": 37980 }, { "epoch": 0.15849404578114176, "grad_norm": 1.245527824727753, "learning_rate": 5.024162342635422e-06, "loss": 0.0389, "step": 37985 }, { "epoch": 0.15851490849613206, "grad_norm": 3.849045739198038, "learning_rate": 5.0238316990497886e-06, "loss": 0.071, "step": 37990 }, { "epoch": 0.15853577121112233, "grad_norm": 1.184881025680497, "learning_rate": 5.0235011207352094e-06, "loss": 0.0456, "step": 37995 }, { "epoch": 0.1585566339261126, "grad_norm": 1.333727834923048, "learning_rate": 5.023170607670213e-06, "loss": 0.0376, "step": 38000 }, { "epoch": 0.15857749664110288, "grad_norm": 0.914190780572481, "learning_rate": 5.02284015983334e-06, "loss": 0.04, "step": 38005 }, { "epoch": 0.15859835935609318, "grad_norm": 1.1982492117148278, "learning_rate": 5.022509777203135e-06, "loss": 0.0376, "step": 38010 }, { "epoch": 0.15861922207108345, "grad_norm": 1.200731305282563, "learning_rate": 5.022179459758157e-06, "loss": 0.0369, "step": 38015 }, { "epoch": 0.15864008478607372, "grad_norm": 1.0166007564975257, "learning_rate": 5.021849207476973e-06, "loss": 0.0377, "step": 38020 }, { "epoch": 0.158660947501064, "grad_norm": 0.9385410909056291, "learning_rate": 5.021519020338161e-06, "loss": 0.0358, "step": 38025 }, { "epoch": 0.15868181021605426, "grad_norm": 0.7922219878521135, "learning_rate": 5.0211888983203085e-06, "loss": 0.0425, "step": 38030 }, { "epoch": 0.15870267293104456, "grad_norm": 1.240844301577848, "learning_rate": 5.020858841402011e-06, "loss": 0.0385, "step": 38035 }, { "epoch": 0.15872353564603484, "grad_norm": 1.1144654101767562, "learning_rate": 5.020528849561877e-06, "loss": 0.0424, "step": 38040 }, { "epoch": 0.1587443983610251, "grad_norm": 1.083358026499174, "learning_rate": 5.020198922778523e-06, "loss": 0.0432, "step": 38045 }, { "epoch": 0.15876526107601538, "grad_norm": 0.8186762665679158, "learning_rate": 5.019869061030575e-06, "loss": 0.0386, "step": 38050 }, { "epoch": 0.15878612379100568, "grad_norm": 1.0520725377480769, "learning_rate": 5.01953926429667e-06, "loss": 0.0434, "step": 38055 }, { "epoch": 0.15880698650599595, "grad_norm": 4.058916571350199, "learning_rate": 5.0192095325554524e-06, "loss": 0.0332, "step": 38060 }, { "epoch": 0.15882784922098622, "grad_norm": 0.6655215219512144, "learning_rate": 5.018879865785581e-06, "loss": 0.0344, "step": 38065 }, { "epoch": 0.1588487119359765, "grad_norm": 0.8771164985685173, "learning_rate": 5.018550263965721e-06, "loss": 0.0371, "step": 38070 }, { "epoch": 0.15886957465096677, "grad_norm": 0.4887712412685655, "learning_rate": 5.018220727074547e-06, "loss": 0.0346, "step": 38075 }, { "epoch": 0.15889043736595707, "grad_norm": 0.8930023886392637, "learning_rate": 5.017891255090745e-06, "loss": 0.0367, "step": 38080 }, { "epoch": 0.15891130008094734, "grad_norm": 0.9762536785934658, "learning_rate": 5.017561847993011e-06, "loss": 0.0352, "step": 38085 }, { "epoch": 0.1589321627959376, "grad_norm": 0.6707789439285429, "learning_rate": 5.017232505760049e-06, "loss": 0.0314, "step": 38090 }, { "epoch": 0.15895302551092788, "grad_norm": 0.8472460636697162, "learning_rate": 5.016903228370574e-06, "loss": 0.0384, "step": 38095 }, { "epoch": 0.15897388822591818, "grad_norm": 1.1551078552148697, "learning_rate": 5.016574015803312e-06, "loss": 0.0449, "step": 38100 }, { "epoch": 0.15899475094090845, "grad_norm": 0.777319285165886, "learning_rate": 5.016244868036996e-06, "loss": 0.0305, "step": 38105 }, { "epoch": 0.15901561365589872, "grad_norm": 1.2599701006891764, "learning_rate": 5.01591578505037e-06, "loss": 0.0366, "step": 38110 }, { "epoch": 0.159036476370889, "grad_norm": 0.8752901416841526, "learning_rate": 5.015586766822189e-06, "loss": 0.0456, "step": 38115 }, { "epoch": 0.15905733908587927, "grad_norm": 1.3915410453314165, "learning_rate": 5.0152578133312155e-06, "loss": 0.042, "step": 38120 }, { "epoch": 0.15907820180086957, "grad_norm": 0.9250789615258721, "learning_rate": 5.014928924556225e-06, "loss": 0.0337, "step": 38125 }, { "epoch": 0.15909906451585984, "grad_norm": 0.9530457774133778, "learning_rate": 5.014600100475997e-06, "loss": 0.0345, "step": 38130 }, { "epoch": 0.1591199272308501, "grad_norm": 0.9421196841475226, "learning_rate": 5.0142713410693285e-06, "loss": 0.0303, "step": 38135 }, { "epoch": 0.15914078994584038, "grad_norm": 1.0116796670295722, "learning_rate": 5.0139426463150195e-06, "loss": 0.0361, "step": 38140 }, { "epoch": 0.15916165266083068, "grad_norm": 0.6564113627676583, "learning_rate": 5.013614016191883e-06, "loss": 0.041, "step": 38145 }, { "epoch": 0.15918251537582095, "grad_norm": 0.9205423595304709, "learning_rate": 5.013285450678742e-06, "loss": 0.0406, "step": 38150 }, { "epoch": 0.15920337809081123, "grad_norm": 1.3607679270264947, "learning_rate": 5.012956949754427e-06, "loss": 0.0378, "step": 38155 }, { "epoch": 0.1592242408058015, "grad_norm": 0.7638430120644596, "learning_rate": 5.012628513397781e-06, "loss": 0.035, "step": 38160 }, { "epoch": 0.15924510352079177, "grad_norm": 0.9253860466335656, "learning_rate": 5.0123001415876535e-06, "loss": 0.0679, "step": 38165 }, { "epoch": 0.15926596623578207, "grad_norm": 1.1565915217574405, "learning_rate": 5.011971834302906e-06, "loss": 0.044, "step": 38170 }, { "epoch": 0.15928682895077234, "grad_norm": 0.6772396318141958, "learning_rate": 5.011643591522409e-06, "loss": 0.0339, "step": 38175 }, { "epoch": 0.1593076916657626, "grad_norm": 1.4930077281158645, "learning_rate": 5.0113154132250445e-06, "loss": 0.0463, "step": 38180 }, { "epoch": 0.15932855438075288, "grad_norm": 0.6987018971802578, "learning_rate": 5.0109872993897e-06, "loss": 0.0351, "step": 38185 }, { "epoch": 0.15934941709574318, "grad_norm": 0.6485831487222834, "learning_rate": 5.0106592499952764e-06, "loss": 0.0401, "step": 38190 }, { "epoch": 0.15937027981073346, "grad_norm": 0.9956616165468598, "learning_rate": 5.010331265020684e-06, "loss": 0.0312, "step": 38195 }, { "epoch": 0.15939114252572373, "grad_norm": 1.2176754249970672, "learning_rate": 5.0100033444448386e-06, "loss": 0.0286, "step": 38200 }, { "epoch": 0.159412005240714, "grad_norm": 0.9887485964779144, "learning_rate": 5.009675488246673e-06, "loss": 0.0437, "step": 38205 }, { "epoch": 0.15943286795570427, "grad_norm": 0.932997461298755, "learning_rate": 5.009347696405121e-06, "loss": 0.053, "step": 38210 }, { "epoch": 0.15945373067069457, "grad_norm": 1.629374058652775, "learning_rate": 5.009019968899133e-06, "loss": 0.0435, "step": 38215 }, { "epoch": 0.15947459338568484, "grad_norm": 0.8505459368812142, "learning_rate": 5.008692305707668e-06, "loss": 0.0416, "step": 38220 }, { "epoch": 0.15949545610067511, "grad_norm": 0.5922385280264092, "learning_rate": 5.00836470680969e-06, "loss": 0.0349, "step": 38225 }, { "epoch": 0.1595163188156654, "grad_norm": 0.5914504693099255, "learning_rate": 5.008037172184178e-06, "loss": 0.0371, "step": 38230 }, { "epoch": 0.1595371815306557, "grad_norm": 0.8202224541763835, "learning_rate": 5.007709701810118e-06, "loss": 0.0666, "step": 38235 }, { "epoch": 0.15955804424564596, "grad_norm": 0.8322686591973143, "learning_rate": 5.007382295666506e-06, "loss": 0.0344, "step": 38240 }, { "epoch": 0.15957890696063623, "grad_norm": 0.8128918193891624, "learning_rate": 5.0070549537323475e-06, "loss": 0.044, "step": 38245 }, { "epoch": 0.1595997696756265, "grad_norm": 0.856371185085405, "learning_rate": 5.006727675986658e-06, "loss": 0.0471, "step": 38250 }, { "epoch": 0.15962063239061677, "grad_norm": 0.9449285856698137, "learning_rate": 5.0064004624084615e-06, "loss": 0.0344, "step": 38255 }, { "epoch": 0.15964149510560707, "grad_norm": 1.165096964292596, "learning_rate": 5.006073312976795e-06, "loss": 0.0417, "step": 38260 }, { "epoch": 0.15966235782059734, "grad_norm": 0.6980097837175986, "learning_rate": 5.005746227670699e-06, "loss": 0.0386, "step": 38265 }, { "epoch": 0.15968322053558762, "grad_norm": 1.1908187802671872, "learning_rate": 5.00541920646923e-06, "loss": 0.0368, "step": 38270 }, { "epoch": 0.1597040832505779, "grad_norm": 1.4611449574287774, "learning_rate": 5.00509224935145e-06, "loss": 0.0472, "step": 38275 }, { "epoch": 0.1597249459655682, "grad_norm": 0.8609120611689622, "learning_rate": 5.004765356296432e-06, "loss": 0.0388, "step": 38280 }, { "epoch": 0.15974580868055846, "grad_norm": 0.8369860219678852, "learning_rate": 5.0044385272832576e-06, "loss": 0.0327, "step": 38285 }, { "epoch": 0.15976667139554873, "grad_norm": 1.0685842593833605, "learning_rate": 5.00411176229102e-06, "loss": 0.044, "step": 38290 }, { "epoch": 0.159787534110539, "grad_norm": 0.6642214238577254, "learning_rate": 5.003785061298821e-06, "loss": 0.028, "step": 38295 }, { "epoch": 0.15980839682552928, "grad_norm": 1.195684635012585, "learning_rate": 5.003458424285769e-06, "loss": 0.037, "step": 38300 }, { "epoch": 0.15982925954051957, "grad_norm": 1.0678753456372132, "learning_rate": 5.003131851230988e-06, "loss": 0.0401, "step": 38305 }, { "epoch": 0.15985012225550985, "grad_norm": 0.8511417126402838, "learning_rate": 5.002805342113606e-06, "loss": 0.0398, "step": 38310 }, { "epoch": 0.15987098497050012, "grad_norm": 0.8516797447315406, "learning_rate": 5.002478896912762e-06, "loss": 0.0359, "step": 38315 }, { "epoch": 0.1598918476854904, "grad_norm": 1.168581147147439, "learning_rate": 5.002152515607608e-06, "loss": 0.0449, "step": 38320 }, { "epoch": 0.1599127104004807, "grad_norm": 0.9059155503642318, "learning_rate": 5.001826198177301e-06, "loss": 0.0386, "step": 38325 }, { "epoch": 0.15993357311547096, "grad_norm": 1.0032754383675295, "learning_rate": 5.001499944601007e-06, "loss": 0.0394, "step": 38330 }, { "epoch": 0.15995443583046123, "grad_norm": 0.8932752827217125, "learning_rate": 5.001173754857908e-06, "loss": 0.0337, "step": 38335 }, { "epoch": 0.1599752985454515, "grad_norm": 0.8117126086711939, "learning_rate": 5.000847628927189e-06, "loss": 0.0424, "step": 38340 }, { "epoch": 0.15999616126044178, "grad_norm": 0.7598471208073431, "learning_rate": 5.000521566788046e-06, "loss": 0.0528, "step": 38345 }, { "epoch": 0.16001702397543208, "grad_norm": 1.014781794299067, "learning_rate": 5.000195568419687e-06, "loss": 0.0424, "step": 38350 }, { "epoch": 0.16003788669042235, "grad_norm": 0.8068999614631388, "learning_rate": 4.999869633801325e-06, "loss": 0.0378, "step": 38355 }, { "epoch": 0.16005874940541262, "grad_norm": 1.1192387144059146, "learning_rate": 4.99954376291219e-06, "loss": 0.0334, "step": 38360 }, { "epoch": 0.1600796121204029, "grad_norm": 1.1024916348456766, "learning_rate": 4.9992179557315116e-06, "loss": 0.0397, "step": 38365 }, { "epoch": 0.1601004748353932, "grad_norm": 0.6798534141748231, "learning_rate": 4.998892212238536e-06, "loss": 0.0511, "step": 38370 }, { "epoch": 0.16012133755038346, "grad_norm": 0.9962373725517796, "learning_rate": 4.998566532412519e-06, "loss": 0.0426, "step": 38375 }, { "epoch": 0.16014220026537374, "grad_norm": 1.8290457477567477, "learning_rate": 4.9982409162327185e-06, "loss": 0.0342, "step": 38380 }, { "epoch": 0.160163062980364, "grad_norm": 1.0073532650161123, "learning_rate": 4.9979153636784115e-06, "loss": 0.0352, "step": 38385 }, { "epoch": 0.16018392569535428, "grad_norm": 1.7436209312929702, "learning_rate": 4.997589874728878e-06, "loss": 0.0364, "step": 38390 }, { "epoch": 0.16020478841034458, "grad_norm": 1.0984954239297704, "learning_rate": 4.997264449363411e-06, "loss": 0.034, "step": 38395 }, { "epoch": 0.16022565112533485, "grad_norm": 0.8466616681881483, "learning_rate": 4.996939087561309e-06, "loss": 0.0383, "step": 38400 }, { "epoch": 0.16024651384032512, "grad_norm": 1.2947866367138663, "learning_rate": 4.996613789301884e-06, "loss": 0.0415, "step": 38405 }, { "epoch": 0.1602673765553154, "grad_norm": 0.5197328581096989, "learning_rate": 4.996288554564455e-06, "loss": 0.0336, "step": 38410 }, { "epoch": 0.1602882392703057, "grad_norm": 0.7889395356557297, "learning_rate": 4.995963383328351e-06, "loss": 0.0276, "step": 38415 }, { "epoch": 0.16030910198529597, "grad_norm": 0.9888590658330175, "learning_rate": 4.995638275572911e-06, "loss": 0.0331, "step": 38420 }, { "epoch": 0.16032996470028624, "grad_norm": 0.9095524288678049, "learning_rate": 4.995313231277483e-06, "loss": 0.0372, "step": 38425 }, { "epoch": 0.1603508274152765, "grad_norm": 1.0685111332030555, "learning_rate": 4.994988250421425e-06, "loss": 0.0348, "step": 38430 }, { "epoch": 0.16037169013026678, "grad_norm": 1.0332558101867377, "learning_rate": 4.994663332984102e-06, "loss": 0.0478, "step": 38435 }, { "epoch": 0.16039255284525708, "grad_norm": 0.9103033009356141, "learning_rate": 4.994338478944891e-06, "loss": 0.041, "step": 38440 }, { "epoch": 0.16041341556024735, "grad_norm": 1.1211106831782416, "learning_rate": 4.994013688283178e-06, "loss": 0.0379, "step": 38445 }, { "epoch": 0.16043427827523762, "grad_norm": 0.5423860786205374, "learning_rate": 4.993688960978359e-06, "loss": 0.0527, "step": 38450 }, { "epoch": 0.1604551409902279, "grad_norm": 0.746020174666607, "learning_rate": 4.993364297009835e-06, "loss": 0.0357, "step": 38455 }, { "epoch": 0.1604760037052182, "grad_norm": 0.6028296340779615, "learning_rate": 4.993039696357021e-06, "loss": 0.0385, "step": 38460 }, { "epoch": 0.16049686642020847, "grad_norm": 0.5109857233962306, "learning_rate": 4.992715158999343e-06, "loss": 0.0372, "step": 38465 }, { "epoch": 0.16051772913519874, "grad_norm": 0.7414875267578717, "learning_rate": 4.99239068491623e-06, "loss": 0.0357, "step": 38470 }, { "epoch": 0.160538591850189, "grad_norm": 1.20856747519279, "learning_rate": 4.992066274087124e-06, "loss": 0.0457, "step": 38475 }, { "epoch": 0.16055945456517928, "grad_norm": 0.9793045430850936, "learning_rate": 4.991741926491478e-06, "loss": 0.0296, "step": 38480 }, { "epoch": 0.16058031728016958, "grad_norm": 0.9255121546144854, "learning_rate": 4.991417642108751e-06, "loss": 0.0386, "step": 38485 }, { "epoch": 0.16060117999515985, "grad_norm": 0.40992764238281293, "learning_rate": 4.991093420918412e-06, "loss": 0.0411, "step": 38490 }, { "epoch": 0.16062204271015013, "grad_norm": 1.4164190600933488, "learning_rate": 4.990769262899943e-06, "loss": 0.0373, "step": 38495 }, { "epoch": 0.1606429054251404, "grad_norm": 0.9950465254060873, "learning_rate": 4.990445168032829e-06, "loss": 0.0466, "step": 38500 }, { "epoch": 0.1606637681401307, "grad_norm": 0.7065467947339363, "learning_rate": 4.990121136296571e-06, "loss": 0.0333, "step": 38505 }, { "epoch": 0.16068463085512097, "grad_norm": 0.9132280758068272, "learning_rate": 4.989797167670674e-06, "loss": 0.0402, "step": 38510 }, { "epoch": 0.16070549357011124, "grad_norm": 1.366422873608676, "learning_rate": 4.9894732621346546e-06, "loss": 0.0364, "step": 38515 }, { "epoch": 0.1607263562851015, "grad_norm": 1.0225194251553187, "learning_rate": 4.98914941966804e-06, "loss": 0.0473, "step": 38520 }, { "epoch": 0.16074721900009178, "grad_norm": 0.8596764867540262, "learning_rate": 4.988825640250362e-06, "loss": 0.0364, "step": 38525 }, { "epoch": 0.16076808171508208, "grad_norm": 0.8544622352653318, "learning_rate": 4.988501923861168e-06, "loss": 0.0385, "step": 38530 }, { "epoch": 0.16078894443007236, "grad_norm": 1.1015237975525896, "learning_rate": 4.988178270480013e-06, "loss": 0.0382, "step": 38535 }, { "epoch": 0.16080980714506263, "grad_norm": 0.8759864306923865, "learning_rate": 4.987854680086455e-06, "loss": 0.0349, "step": 38540 }, { "epoch": 0.1608306698600529, "grad_norm": 0.9777924162896982, "learning_rate": 4.987531152660069e-06, "loss": 0.0367, "step": 38545 }, { "epoch": 0.1608515325750432, "grad_norm": 1.3047165513936465, "learning_rate": 4.987207688180437e-06, "loss": 0.0454, "step": 38550 }, { "epoch": 0.16087239529003347, "grad_norm": 0.7865525983928192, "learning_rate": 4.986884286627148e-06, "loss": 0.0416, "step": 38555 }, { "epoch": 0.16089325800502374, "grad_norm": 0.8712255570126601, "learning_rate": 4.9865609479798025e-06, "loss": 0.0393, "step": 38560 }, { "epoch": 0.16091412072001401, "grad_norm": 0.512970966581183, "learning_rate": 4.98623767221801e-06, "loss": 0.0477, "step": 38565 }, { "epoch": 0.1609349834350043, "grad_norm": 0.976451160495713, "learning_rate": 4.985914459321389e-06, "loss": 0.0342, "step": 38570 }, { "epoch": 0.16095584614999459, "grad_norm": 0.7242602237348936, "learning_rate": 4.985591309269568e-06, "loss": 0.0289, "step": 38575 }, { "epoch": 0.16097670886498486, "grad_norm": 0.8295214176963654, "learning_rate": 4.985268222042184e-06, "loss": 0.0294, "step": 38580 }, { "epoch": 0.16099757157997513, "grad_norm": 1.2945714341960788, "learning_rate": 4.98494519761888e-06, "loss": 0.0343, "step": 38585 }, { "epoch": 0.1610184342949654, "grad_norm": 0.9739479951337799, "learning_rate": 4.984622235979315e-06, "loss": 0.0429, "step": 38590 }, { "epoch": 0.1610392970099557, "grad_norm": 0.6086818084521105, "learning_rate": 4.984299337103153e-06, "loss": 0.0271, "step": 38595 }, { "epoch": 0.16106015972494597, "grad_norm": 0.8770121621433276, "learning_rate": 4.983976500970066e-06, "loss": 0.027, "step": 38600 }, { "epoch": 0.16108102243993624, "grad_norm": 0.6346273417099808, "learning_rate": 4.9836537275597395e-06, "loss": 0.0306, "step": 38605 }, { "epoch": 0.16110188515492652, "grad_norm": 0.8963122874897731, "learning_rate": 4.9833310168518645e-06, "loss": 0.0356, "step": 38610 }, { "epoch": 0.1611227478699168, "grad_norm": 0.8065987706628212, "learning_rate": 4.983008368826143e-06, "loss": 0.0471, "step": 38615 }, { "epoch": 0.1611436105849071, "grad_norm": 0.7961897246390277, "learning_rate": 4.982685783462285e-06, "loss": 0.0408, "step": 38620 }, { "epoch": 0.16116447329989736, "grad_norm": 0.7975550613374427, "learning_rate": 4.98236326074001e-06, "loss": 0.0283, "step": 38625 }, { "epoch": 0.16118533601488763, "grad_norm": 1.0368302785620287, "learning_rate": 4.9820408006390505e-06, "loss": 0.0562, "step": 38630 }, { "epoch": 0.1612061987298779, "grad_norm": 0.8649328138470941, "learning_rate": 4.9817184031391404e-06, "loss": 0.0395, "step": 38635 }, { "epoch": 0.1612270614448682, "grad_norm": 0.8741040632726012, "learning_rate": 4.981396068220029e-06, "loss": 0.0334, "step": 38640 }, { "epoch": 0.16124792415985847, "grad_norm": 0.9157651167245897, "learning_rate": 4.981073795861474e-06, "loss": 0.0274, "step": 38645 }, { "epoch": 0.16126878687484875, "grad_norm": 0.8664355524783365, "learning_rate": 4.980751586043238e-06, "loss": 0.0353, "step": 38650 }, { "epoch": 0.16128964958983902, "grad_norm": 0.7447168624511598, "learning_rate": 4.9804294387451e-06, "loss": 0.0305, "step": 38655 }, { "epoch": 0.1613105123048293, "grad_norm": 0.7202934311278457, "learning_rate": 4.980107353946841e-06, "loss": 0.0431, "step": 38660 }, { "epoch": 0.1613313750198196, "grad_norm": 0.9028276650706241, "learning_rate": 4.979785331628257e-06, "loss": 0.0386, "step": 38665 }, { "epoch": 0.16135223773480986, "grad_norm": 1.3750031068424373, "learning_rate": 4.979463371769146e-06, "loss": 0.0315, "step": 38670 }, { "epoch": 0.16137310044980013, "grad_norm": 0.9307010665962359, "learning_rate": 4.979141474349325e-06, "loss": 0.0356, "step": 38675 }, { "epoch": 0.1613939631647904, "grad_norm": 0.7112695653987114, "learning_rate": 4.97881963934861e-06, "loss": 0.0378, "step": 38680 }, { "epoch": 0.1614148258797807, "grad_norm": 0.8870777976842827, "learning_rate": 4.9784978667468336e-06, "loss": 0.0421, "step": 38685 }, { "epoch": 0.16143568859477098, "grad_norm": 0.6649002508781003, "learning_rate": 4.978176156523834e-06, "loss": 0.0343, "step": 38690 }, { "epoch": 0.16145655130976125, "grad_norm": 0.992156417318707, "learning_rate": 4.977854508659459e-06, "loss": 0.0469, "step": 38695 }, { "epoch": 0.16147741402475152, "grad_norm": 0.9018632494818929, "learning_rate": 4.9775329231335655e-06, "loss": 0.0379, "step": 38700 }, { "epoch": 0.1614982767397418, "grad_norm": 1.0624018008757419, "learning_rate": 4.97721139992602e-06, "loss": 0.0371, "step": 38705 }, { "epoch": 0.1615191394547321, "grad_norm": 1.3451768544782157, "learning_rate": 4.976889939016698e-06, "loss": 0.0345, "step": 38710 }, { "epoch": 0.16154000216972236, "grad_norm": 0.9965789473575258, "learning_rate": 4.976568540385486e-06, "loss": 0.0311, "step": 38715 }, { "epoch": 0.16156086488471263, "grad_norm": 0.6240147385159485, "learning_rate": 4.976247204012274e-06, "loss": 0.0405, "step": 38720 }, { "epoch": 0.1615817275997029, "grad_norm": 1.5590577542208375, "learning_rate": 4.975925929876966e-06, "loss": 0.0381, "step": 38725 }, { "epoch": 0.1616025903146932, "grad_norm": 0.6759215784402602, "learning_rate": 4.9756047179594735e-06, "loss": 0.0565, "step": 38730 }, { "epoch": 0.16162345302968348, "grad_norm": 1.2828239138342148, "learning_rate": 4.975283568239718e-06, "loss": 0.031, "step": 38735 }, { "epoch": 0.16164431574467375, "grad_norm": 2.0172051432282228, "learning_rate": 4.974962480697629e-06, "loss": 0.0293, "step": 38740 }, { "epoch": 0.16166517845966402, "grad_norm": 0.9977827829203698, "learning_rate": 4.974641455313146e-06, "loss": 0.0365, "step": 38745 }, { "epoch": 0.1616860411746543, "grad_norm": 0.970678401612547, "learning_rate": 4.974320492066216e-06, "loss": 0.0454, "step": 38750 }, { "epoch": 0.1617069038896446, "grad_norm": 0.9411149371220788, "learning_rate": 4.973999590936797e-06, "loss": 0.0355, "step": 38755 }, { "epoch": 0.16172776660463486, "grad_norm": 0.8926688510293853, "learning_rate": 4.973678751904855e-06, "loss": 0.0459, "step": 38760 }, { "epoch": 0.16174862931962514, "grad_norm": 1.0137038531575413, "learning_rate": 4.973357974950363e-06, "loss": 0.0401, "step": 38765 }, { "epoch": 0.1617694920346154, "grad_norm": 0.8809470932075761, "learning_rate": 4.973037260053308e-06, "loss": 0.0305, "step": 38770 }, { "epoch": 0.1617903547496057, "grad_norm": 0.7923277771782364, "learning_rate": 4.972716607193683e-06, "loss": 0.0271, "step": 38775 }, { "epoch": 0.16181121746459598, "grad_norm": 1.048048521344508, "learning_rate": 4.972396016351488e-06, "loss": 0.0499, "step": 38780 }, { "epoch": 0.16183208017958625, "grad_norm": 0.7354007131277418, "learning_rate": 4.9720754875067365e-06, "loss": 0.0401, "step": 38785 }, { "epoch": 0.16185294289457652, "grad_norm": 0.937791245564764, "learning_rate": 4.9717550206394475e-06, "loss": 0.038, "step": 38790 }, { "epoch": 0.1618738056095668, "grad_norm": 0.8275521732662167, "learning_rate": 4.9714346157296505e-06, "loss": 0.0425, "step": 38795 }, { "epoch": 0.1618946683245571, "grad_norm": 1.2369590571354008, "learning_rate": 4.9711142727573845e-06, "loss": 0.0466, "step": 38800 }, { "epoch": 0.16191553103954737, "grad_norm": 0.7630495114897315, "learning_rate": 4.9707939917026975e-06, "loss": 0.0327, "step": 38805 }, { "epoch": 0.16193639375453764, "grad_norm": 0.8160651469479266, "learning_rate": 4.9704737725456445e-06, "loss": 0.0543, "step": 38810 }, { "epoch": 0.1619572564695279, "grad_norm": 1.3992368662733357, "learning_rate": 4.97015361526629e-06, "loss": 0.0361, "step": 38815 }, { "epoch": 0.1619781191845182, "grad_norm": 1.0027422302121614, "learning_rate": 4.969833519844709e-06, "loss": 0.03, "step": 38820 }, { "epoch": 0.16199898189950848, "grad_norm": 1.2001851743634637, "learning_rate": 4.969513486260986e-06, "loss": 0.0386, "step": 38825 }, { "epoch": 0.16201984461449875, "grad_norm": 1.0603664532370534, "learning_rate": 4.969193514495212e-06, "loss": 0.0276, "step": 38830 }, { "epoch": 0.16204070732948903, "grad_norm": 4.213479152633104, "learning_rate": 4.9688736045274884e-06, "loss": 0.0437, "step": 38835 }, { "epoch": 0.1620615700444793, "grad_norm": 0.8299339026813369, "learning_rate": 4.968553756337926e-06, "loss": 0.0417, "step": 38840 }, { "epoch": 0.1620824327594696, "grad_norm": 0.7848490308853202, "learning_rate": 4.968233969906644e-06, "loss": 0.0307, "step": 38845 }, { "epoch": 0.16210329547445987, "grad_norm": 0.5193165232559672, "learning_rate": 4.967914245213768e-06, "loss": 0.0336, "step": 38850 }, { "epoch": 0.16212415818945014, "grad_norm": 0.9219477491810614, "learning_rate": 4.967594582239439e-06, "loss": 0.05, "step": 38855 }, { "epoch": 0.1621450209044404, "grad_norm": 0.45525471567254044, "learning_rate": 4.967274980963799e-06, "loss": 0.0269, "step": 38860 }, { "epoch": 0.1621658836194307, "grad_norm": 0.85303154449562, "learning_rate": 4.966955441367007e-06, "loss": 0.0326, "step": 38865 }, { "epoch": 0.16218674633442098, "grad_norm": 0.5047425719074132, "learning_rate": 4.966635963429225e-06, "loss": 0.0289, "step": 38870 }, { "epoch": 0.16220760904941126, "grad_norm": 1.4423584590681822, "learning_rate": 4.966316547130624e-06, "loss": 0.0426, "step": 38875 }, { "epoch": 0.16222847176440153, "grad_norm": 0.5912816672902482, "learning_rate": 4.9659971924513885e-06, "loss": 0.0309, "step": 38880 }, { "epoch": 0.1622493344793918, "grad_norm": 1.214955899184631, "learning_rate": 4.965677899371708e-06, "loss": 0.0491, "step": 38885 }, { "epoch": 0.1622701971943821, "grad_norm": 1.2293434883203047, "learning_rate": 4.965358667871782e-06, "loss": 0.0473, "step": 38890 }, { "epoch": 0.16229105990937237, "grad_norm": 1.451226314175653, "learning_rate": 4.965039497931819e-06, "loss": 0.045, "step": 38895 }, { "epoch": 0.16231192262436264, "grad_norm": 0.8852412926659382, "learning_rate": 4.964720389532035e-06, "loss": 0.0349, "step": 38900 }, { "epoch": 0.16233278533935291, "grad_norm": 4.372513179982491, "learning_rate": 4.96440134265266e-06, "loss": 0.046, "step": 38905 }, { "epoch": 0.1623536480543432, "grad_norm": 1.0039364922331764, "learning_rate": 4.964082357273925e-06, "loss": 0.0375, "step": 38910 }, { "epoch": 0.16237451076933349, "grad_norm": 0.8932664489891133, "learning_rate": 4.963763433376077e-06, "loss": 0.0568, "step": 38915 }, { "epoch": 0.16239537348432376, "grad_norm": 1.6617984825310104, "learning_rate": 4.963444570939367e-06, "loss": 0.0432, "step": 38920 }, { "epoch": 0.16241623619931403, "grad_norm": 1.6771637848033976, "learning_rate": 4.96312576994406e-06, "loss": 0.0565, "step": 38925 }, { "epoch": 0.1624370989143043, "grad_norm": 1.3080312878590556, "learning_rate": 4.962807030370422e-06, "loss": 0.0464, "step": 38930 }, { "epoch": 0.1624579616292946, "grad_norm": 1.0038708588451883, "learning_rate": 4.962488352198736e-06, "loss": 0.0262, "step": 38935 }, { "epoch": 0.16247882434428487, "grad_norm": 0.8862310190865589, "learning_rate": 4.962169735409289e-06, "loss": 0.0296, "step": 38940 }, { "epoch": 0.16249968705927514, "grad_norm": 0.6691507579608078, "learning_rate": 4.9618511799823786e-06, "loss": 0.0361, "step": 38945 }, { "epoch": 0.16252054977426542, "grad_norm": 0.9101371797279881, "learning_rate": 4.961532685898311e-06, "loss": 0.0397, "step": 38950 }, { "epoch": 0.16254141248925572, "grad_norm": 0.941191342073226, "learning_rate": 4.961214253137402e-06, "loss": 0.0353, "step": 38955 }, { "epoch": 0.162562275204246, "grad_norm": 0.7722965176422033, "learning_rate": 4.960895881679974e-06, "loss": 0.0376, "step": 38960 }, { "epoch": 0.16258313791923626, "grad_norm": 0.8162884756484823, "learning_rate": 4.96057757150636e-06, "loss": 0.0337, "step": 38965 }, { "epoch": 0.16260400063422653, "grad_norm": 0.796742822700196, "learning_rate": 4.9602593225969035e-06, "loss": 0.0291, "step": 38970 }, { "epoch": 0.1626248633492168, "grad_norm": 1.2624061334934638, "learning_rate": 4.959941134931951e-06, "loss": 0.0409, "step": 38975 }, { "epoch": 0.1626457260642071, "grad_norm": 1.156104924468458, "learning_rate": 4.959623008491864e-06, "loss": 0.0396, "step": 38980 }, { "epoch": 0.16266658877919737, "grad_norm": 0.9611424680775726, "learning_rate": 4.959304943257011e-06, "loss": 0.0416, "step": 38985 }, { "epoch": 0.16268745149418765, "grad_norm": 0.48486322573069796, "learning_rate": 4.9589869392077675e-06, "loss": 0.0341, "step": 38990 }, { "epoch": 0.16270831420917792, "grad_norm": 1.4892462496846404, "learning_rate": 4.95866899632452e-06, "loss": 0.0314, "step": 38995 }, { "epoch": 0.16272917692416822, "grad_norm": 0.5747480976821276, "learning_rate": 4.958351114587662e-06, "loss": 0.0308, "step": 39000 }, { "epoch": 0.1627500396391585, "grad_norm": 0.673106851336215, "learning_rate": 4.9580332939775965e-06, "loss": 0.0318, "step": 39005 }, { "epoch": 0.16277090235414876, "grad_norm": 0.6324614332672887, "learning_rate": 4.957715534474737e-06, "loss": 0.0436, "step": 39010 }, { "epoch": 0.16279176506913903, "grad_norm": 0.8371416391837674, "learning_rate": 4.957397836059504e-06, "loss": 0.0327, "step": 39015 }, { "epoch": 0.1628126277841293, "grad_norm": 1.0588408019815474, "learning_rate": 4.957080198712326e-06, "loss": 0.0332, "step": 39020 }, { "epoch": 0.1628334904991196, "grad_norm": 1.2421549141316237, "learning_rate": 4.956762622413641e-06, "loss": 0.0416, "step": 39025 }, { "epoch": 0.16285435321410988, "grad_norm": 1.091299840680218, "learning_rate": 4.9564451071438964e-06, "loss": 0.0438, "step": 39030 }, { "epoch": 0.16287521592910015, "grad_norm": 1.4624940798388686, "learning_rate": 4.956127652883549e-06, "loss": 0.0495, "step": 39035 }, { "epoch": 0.16289607864409042, "grad_norm": 0.6052580490607269, "learning_rate": 4.955810259613064e-06, "loss": 0.0367, "step": 39040 }, { "epoch": 0.16291694135908072, "grad_norm": 1.1330629927463518, "learning_rate": 4.955492927312912e-06, "loss": 0.0456, "step": 39045 }, { "epoch": 0.162937804074071, "grad_norm": 0.8764083525024118, "learning_rate": 4.955175655963578e-06, "loss": 0.0301, "step": 39050 }, { "epoch": 0.16295866678906126, "grad_norm": 0.6111743500893277, "learning_rate": 4.954858445545551e-06, "loss": 0.035, "step": 39055 }, { "epoch": 0.16297952950405153, "grad_norm": 1.1171058817470099, "learning_rate": 4.954541296039332e-06, "loss": 0.0523, "step": 39060 }, { "epoch": 0.1630003922190418, "grad_norm": 0.7104283841168467, "learning_rate": 4.9542242074254276e-06, "loss": 0.0368, "step": 39065 }, { "epoch": 0.1630212549340321, "grad_norm": 0.8370900816283172, "learning_rate": 4.953907179684356e-06, "loss": 0.0306, "step": 39070 }, { "epoch": 0.16304211764902238, "grad_norm": 0.96524685165036, "learning_rate": 4.953590212796644e-06, "loss": 0.035, "step": 39075 }, { "epoch": 0.16306298036401265, "grad_norm": 0.8392448593315565, "learning_rate": 4.9532733067428225e-06, "loss": 0.0495, "step": 39080 }, { "epoch": 0.16308384307900292, "grad_norm": 1.1651709284214455, "learning_rate": 4.95295646150344e-06, "loss": 0.0399, "step": 39085 }, { "epoch": 0.1631047057939932, "grad_norm": 0.52196837143423, "learning_rate": 4.952639677059044e-06, "loss": 0.0344, "step": 39090 }, { "epoch": 0.1631255685089835, "grad_norm": 1.0894560971333762, "learning_rate": 4.9523229533901965e-06, "loss": 0.0394, "step": 39095 }, { "epoch": 0.16314643122397376, "grad_norm": 0.9911597578185108, "learning_rate": 4.952006290477469e-06, "loss": 0.0341, "step": 39100 }, { "epoch": 0.16316729393896404, "grad_norm": 1.2921785178085092, "learning_rate": 4.9516896883014356e-06, "loss": 0.0477, "step": 39105 }, { "epoch": 0.1631881566539543, "grad_norm": 1.0091460903466551, "learning_rate": 4.951373146842687e-06, "loss": 0.0369, "step": 39110 }, { "epoch": 0.1632090193689446, "grad_norm": 0.8428093640513551, "learning_rate": 4.951056666081816e-06, "loss": 0.0272, "step": 39115 }, { "epoch": 0.16322988208393488, "grad_norm": 1.24429227667943, "learning_rate": 4.950740245999428e-06, "loss": 0.0536, "step": 39120 }, { "epoch": 0.16325074479892515, "grad_norm": 0.8544131034101662, "learning_rate": 4.950423886576135e-06, "loss": 0.0337, "step": 39125 }, { "epoch": 0.16327160751391542, "grad_norm": 0.6149176066108368, "learning_rate": 4.9501075877925585e-06, "loss": 0.0338, "step": 39130 }, { "epoch": 0.1632924702289057, "grad_norm": 0.742525140197145, "learning_rate": 4.94979134962933e-06, "loss": 0.0364, "step": 39135 }, { "epoch": 0.163313332943896, "grad_norm": 0.9173416906369273, "learning_rate": 4.949475172067085e-06, "loss": 0.0338, "step": 39140 }, { "epoch": 0.16333419565888627, "grad_norm": 1.7914445402620816, "learning_rate": 4.949159055086476e-06, "loss": 0.0469, "step": 39145 }, { "epoch": 0.16335505837387654, "grad_norm": 0.915935967248363, "learning_rate": 4.948842998668154e-06, "loss": 0.0466, "step": 39150 }, { "epoch": 0.1633759210888668, "grad_norm": 0.9223189479451785, "learning_rate": 4.948527002792787e-06, "loss": 0.0421, "step": 39155 }, { "epoch": 0.1633967838038571, "grad_norm": 1.447110213625798, "learning_rate": 4.9482110674410474e-06, "loss": 0.042, "step": 39160 }, { "epoch": 0.16341764651884738, "grad_norm": 1.866424398826368, "learning_rate": 4.947895192593617e-06, "loss": 0.0543, "step": 39165 }, { "epoch": 0.16343850923383765, "grad_norm": 1.411213697315188, "learning_rate": 4.947579378231187e-06, "loss": 0.0529, "step": 39170 }, { "epoch": 0.16345937194882793, "grad_norm": 0.9781557392874312, "learning_rate": 4.9472636243344554e-06, "loss": 0.0448, "step": 39175 }, { "epoch": 0.1634802346638182, "grad_norm": 0.6306714507474165, "learning_rate": 4.946947930884131e-06, "loss": 0.0289, "step": 39180 }, { "epoch": 0.1635010973788085, "grad_norm": 0.6545576246826352, "learning_rate": 4.946632297860931e-06, "loss": 0.0354, "step": 39185 }, { "epoch": 0.16352196009379877, "grad_norm": 0.7097348457976572, "learning_rate": 4.94631672524558e-06, "loss": 0.0418, "step": 39190 }, { "epoch": 0.16354282280878904, "grad_norm": 0.743683968898755, "learning_rate": 4.9460012130188116e-06, "loss": 0.0331, "step": 39195 }, { "epoch": 0.1635636855237793, "grad_norm": 1.050811239753583, "learning_rate": 4.945685761161368e-06, "loss": 0.0337, "step": 39200 }, { "epoch": 0.1635845482387696, "grad_norm": 1.7491643974409798, "learning_rate": 4.945370369654e-06, "loss": 0.0343, "step": 39205 }, { "epoch": 0.16360541095375988, "grad_norm": 1.4006412989456276, "learning_rate": 4.945055038477467e-06, "loss": 0.0388, "step": 39210 }, { "epoch": 0.16362627366875016, "grad_norm": 0.8352822995244524, "learning_rate": 4.944739767612537e-06, "loss": 0.0424, "step": 39215 }, { "epoch": 0.16364713638374043, "grad_norm": 0.7831307088325795, "learning_rate": 4.944424557039988e-06, "loss": 0.0569, "step": 39220 }, { "epoch": 0.1636679990987307, "grad_norm": 1.4200243670234711, "learning_rate": 4.944109406740604e-06, "loss": 0.0392, "step": 39225 }, { "epoch": 0.163688861813721, "grad_norm": 1.3195095415247036, "learning_rate": 4.9437943166951796e-06, "loss": 0.0386, "step": 39230 }, { "epoch": 0.16370972452871127, "grad_norm": 0.7466382797638172, "learning_rate": 4.9434792868845165e-06, "loss": 0.0363, "step": 39235 }, { "epoch": 0.16373058724370154, "grad_norm": 1.9456434758285788, "learning_rate": 4.9431643172894265e-06, "loss": 0.0407, "step": 39240 }, { "epoch": 0.1637514499586918, "grad_norm": 0.9574911371948364, "learning_rate": 4.9428494078907285e-06, "loss": 0.0355, "step": 39245 }, { "epoch": 0.1637723126736821, "grad_norm": 0.7731287731222691, "learning_rate": 4.942534558669251e-06, "loss": 0.0387, "step": 39250 }, { "epoch": 0.16379317538867239, "grad_norm": 0.887397313685773, "learning_rate": 4.942219769605829e-06, "loss": 0.0355, "step": 39255 }, { "epoch": 0.16381403810366266, "grad_norm": 0.9046942894751607, "learning_rate": 4.94190504068131e-06, "loss": 0.0334, "step": 39260 }, { "epoch": 0.16383490081865293, "grad_norm": 0.7343862630955964, "learning_rate": 4.941590371876547e-06, "loss": 0.0393, "step": 39265 }, { "epoch": 0.1638557635336432, "grad_norm": 0.9061331329749887, "learning_rate": 4.941275763172401e-06, "loss": 0.045, "step": 39270 }, { "epoch": 0.1638766262486335, "grad_norm": 1.2444729197790159, "learning_rate": 4.940961214549744e-06, "loss": 0.0363, "step": 39275 }, { "epoch": 0.16389748896362377, "grad_norm": 1.087494078173023, "learning_rate": 4.940646725989455e-06, "loss": 0.0436, "step": 39280 }, { "epoch": 0.16391835167861404, "grad_norm": 0.7441383785989849, "learning_rate": 4.940332297472423e-06, "loss": 0.0306, "step": 39285 }, { "epoch": 0.16393921439360432, "grad_norm": 0.9676897073858046, "learning_rate": 4.940017928979541e-06, "loss": 0.0425, "step": 39290 }, { "epoch": 0.16396007710859462, "grad_norm": 0.7599585783857801, "learning_rate": 4.939703620491717e-06, "loss": 0.0327, "step": 39295 }, { "epoch": 0.1639809398235849, "grad_norm": 0.6238777070006767, "learning_rate": 4.939389371989863e-06, "loss": 0.0361, "step": 39300 }, { "epoch": 0.16400180253857516, "grad_norm": 0.9310431376452328, "learning_rate": 4.939075183454903e-06, "loss": 0.0331, "step": 39305 }, { "epoch": 0.16402266525356543, "grad_norm": 0.7827024049361005, "learning_rate": 4.938761054867763e-06, "loss": 0.037, "step": 39310 }, { "epoch": 0.1640435279685557, "grad_norm": 1.0099437432759693, "learning_rate": 4.938446986209384e-06, "loss": 0.0306, "step": 39315 }, { "epoch": 0.164064390683546, "grad_norm": 1.159245297257036, "learning_rate": 4.9381329774607155e-06, "loss": 0.0462, "step": 39320 }, { "epoch": 0.16408525339853627, "grad_norm": 0.873935064921658, "learning_rate": 4.93781902860271e-06, "loss": 0.0404, "step": 39325 }, { "epoch": 0.16410611611352655, "grad_norm": 0.843294315101099, "learning_rate": 4.937505139616332e-06, "loss": 0.0358, "step": 39330 }, { "epoch": 0.16412697882851682, "grad_norm": 1.3060509245047742, "learning_rate": 4.937191310482557e-06, "loss": 0.0364, "step": 39335 }, { "epoch": 0.16414784154350712, "grad_norm": 0.8225904706858143, "learning_rate": 4.936877541182364e-06, "loss": 0.0372, "step": 39340 }, { "epoch": 0.1641687042584974, "grad_norm": 1.1158274316100458, "learning_rate": 4.936563831696743e-06, "loss": 0.0431, "step": 39345 }, { "epoch": 0.16418956697348766, "grad_norm": 0.7622375689540363, "learning_rate": 4.936250182006693e-06, "loss": 0.0369, "step": 39350 }, { "epoch": 0.16421042968847793, "grad_norm": 0.8767773733010109, "learning_rate": 4.935936592093219e-06, "loss": 0.0416, "step": 39355 }, { "epoch": 0.1642312924034682, "grad_norm": 0.934537797312884, "learning_rate": 4.935623061937336e-06, "loss": 0.0508, "step": 39360 }, { "epoch": 0.1642521551184585, "grad_norm": 0.6631569353776583, "learning_rate": 4.935309591520069e-06, "loss": 0.0383, "step": 39365 }, { "epoch": 0.16427301783344878, "grad_norm": 1.2688552637898043, "learning_rate": 4.934996180822449e-06, "loss": 0.0328, "step": 39370 }, { "epoch": 0.16429388054843905, "grad_norm": 1.227631782579134, "learning_rate": 4.934682829825516e-06, "loss": 0.0391, "step": 39375 }, { "epoch": 0.16431474326342932, "grad_norm": 2.7619557760874196, "learning_rate": 4.934369538510319e-06, "loss": 0.0424, "step": 39380 }, { "epoch": 0.16433560597841962, "grad_norm": 0.8725148197673694, "learning_rate": 4.934056306857915e-06, "loss": 0.036, "step": 39385 }, { "epoch": 0.1643564686934099, "grad_norm": 1.4243887205374868, "learning_rate": 4.933743134849371e-06, "loss": 0.0394, "step": 39390 }, { "epoch": 0.16437733140840016, "grad_norm": 0.9518865485747472, "learning_rate": 4.933430022465758e-06, "loss": 0.0403, "step": 39395 }, { "epoch": 0.16439819412339043, "grad_norm": 0.891178828323075, "learning_rate": 4.933116969688161e-06, "loss": 0.0319, "step": 39400 }, { "epoch": 0.1644190568383807, "grad_norm": 0.7248694371325662, "learning_rate": 4.932803976497669e-06, "loss": 0.0346, "step": 39405 }, { "epoch": 0.164439919553371, "grad_norm": 1.2059487236164668, "learning_rate": 4.932491042875383e-06, "loss": 0.0401, "step": 39410 }, { "epoch": 0.16446078226836128, "grad_norm": 0.7946691059850568, "learning_rate": 4.932178168802409e-06, "loss": 0.0333, "step": 39415 }, { "epoch": 0.16448164498335155, "grad_norm": 1.0150132430322387, "learning_rate": 4.931865354259863e-06, "loss": 0.0444, "step": 39420 }, { "epoch": 0.16450250769834182, "grad_norm": 0.7627873030489396, "learning_rate": 4.931552599228872e-06, "loss": 0.0447, "step": 39425 }, { "epoch": 0.16452337041333212, "grad_norm": 1.0278644334499143, "learning_rate": 4.931239903690566e-06, "loss": 0.0385, "step": 39430 }, { "epoch": 0.1645442331283224, "grad_norm": 0.602180452734211, "learning_rate": 4.930927267626086e-06, "loss": 0.0301, "step": 39435 }, { "epoch": 0.16456509584331266, "grad_norm": 0.8267955293345114, "learning_rate": 4.930614691016582e-06, "loss": 0.0373, "step": 39440 }, { "epoch": 0.16458595855830294, "grad_norm": 2.0203143685828677, "learning_rate": 4.930302173843213e-06, "loss": 0.0425, "step": 39445 }, { "epoch": 0.1646068212732932, "grad_norm": 0.6826997081832702, "learning_rate": 4.9299897160871425e-06, "loss": 0.0472, "step": 39450 }, { "epoch": 0.1646276839882835, "grad_norm": 1.0844968518086175, "learning_rate": 4.929677317729549e-06, "loss": 0.0588, "step": 39455 }, { "epoch": 0.16464854670327378, "grad_norm": 0.6187257951502322, "learning_rate": 4.929364978751612e-06, "loss": 0.0543, "step": 39460 }, { "epoch": 0.16466940941826405, "grad_norm": 0.9540787196100664, "learning_rate": 4.929052699134524e-06, "loss": 0.037, "step": 39465 }, { "epoch": 0.16469027213325432, "grad_norm": 0.9818308459078495, "learning_rate": 4.928740478859484e-06, "loss": 0.0386, "step": 39470 }, { "epoch": 0.16471113484824462, "grad_norm": 1.0531549238972415, "learning_rate": 4.9284283179077e-06, "loss": 0.0298, "step": 39475 }, { "epoch": 0.1647319975632349, "grad_norm": 1.873035212397234, "learning_rate": 4.9281162162603905e-06, "loss": 0.0507, "step": 39480 }, { "epoch": 0.16475286027822517, "grad_norm": 1.1521869603721877, "learning_rate": 4.927804173898775e-06, "loss": 0.0311, "step": 39485 }, { "epoch": 0.16477372299321544, "grad_norm": 0.586383365379307, "learning_rate": 4.927492190804093e-06, "loss": 0.0287, "step": 39490 }, { "epoch": 0.1647945857082057, "grad_norm": 0.8474063532542916, "learning_rate": 4.92718026695758e-06, "loss": 0.0399, "step": 39495 }, { "epoch": 0.164815448423196, "grad_norm": 1.0712417854955434, "learning_rate": 4.926868402340488e-06, "loss": 0.0388, "step": 39500 }, { "epoch": 0.16483631113818628, "grad_norm": 0.9068434562371034, "learning_rate": 4.926556596934075e-06, "loss": 0.0317, "step": 39505 }, { "epoch": 0.16485717385317655, "grad_norm": 1.5026200490996053, "learning_rate": 4.926244850719607e-06, "loss": 0.0466, "step": 39510 }, { "epoch": 0.16487803656816682, "grad_norm": 1.0704481515149442, "learning_rate": 4.925933163678357e-06, "loss": 0.0418, "step": 39515 }, { "epoch": 0.16489889928315712, "grad_norm": 2.502762775974059, "learning_rate": 4.9256215357916105e-06, "loss": 0.041, "step": 39520 }, { "epoch": 0.1649197619981474, "grad_norm": 1.502352926541444, "learning_rate": 4.925309967040655e-06, "loss": 0.0557, "step": 39525 }, { "epoch": 0.16494062471313767, "grad_norm": 1.0850381433249663, "learning_rate": 4.924998457406792e-06, "loss": 0.0493, "step": 39530 }, { "epoch": 0.16496148742812794, "grad_norm": 4.146161697830224, "learning_rate": 4.92468700687133e-06, "loss": 0.0362, "step": 39535 }, { "epoch": 0.1649823501431182, "grad_norm": 1.0152667783975398, "learning_rate": 4.924375615415582e-06, "loss": 0.0377, "step": 39540 }, { "epoch": 0.1650032128581085, "grad_norm": 2.139953233133477, "learning_rate": 4.9240642830208736e-06, "loss": 0.0475, "step": 39545 }, { "epoch": 0.16502407557309878, "grad_norm": 0.5491058192794135, "learning_rate": 4.923753009668538e-06, "loss": 0.0294, "step": 39550 }, { "epoch": 0.16504493828808905, "grad_norm": 1.1591245282660523, "learning_rate": 4.923441795339914e-06, "loss": 0.0403, "step": 39555 }, { "epoch": 0.16506580100307933, "grad_norm": 0.9115951177748058, "learning_rate": 4.923130640016352e-06, "loss": 0.0307, "step": 39560 }, { "epoch": 0.16508666371806963, "grad_norm": 0.6996737051353621, "learning_rate": 4.922819543679208e-06, "loss": 0.027, "step": 39565 }, { "epoch": 0.1651075264330599, "grad_norm": 0.9057914447156135, "learning_rate": 4.922508506309849e-06, "loss": 0.0353, "step": 39570 }, { "epoch": 0.16512838914805017, "grad_norm": 1.0014856380723354, "learning_rate": 4.9221975278896464e-06, "loss": 0.0449, "step": 39575 }, { "epoch": 0.16514925186304044, "grad_norm": 1.3625382372561066, "learning_rate": 4.921886608399984e-06, "loss": 0.0647, "step": 39580 }, { "epoch": 0.1651701145780307, "grad_norm": 0.5811701910339615, "learning_rate": 4.921575747822251e-06, "loss": 0.0337, "step": 39585 }, { "epoch": 0.165190977293021, "grad_norm": 1.1759265777473034, "learning_rate": 4.921264946137846e-06, "loss": 0.0369, "step": 39590 }, { "epoch": 0.16521184000801128, "grad_norm": 0.7111606173030146, "learning_rate": 4.920954203328174e-06, "loss": 0.0278, "step": 39595 }, { "epoch": 0.16523270272300156, "grad_norm": 1.113568273261318, "learning_rate": 4.920643519374652e-06, "loss": 0.042, "step": 39600 }, { "epoch": 0.16525356543799183, "grad_norm": 1.3949576077071624, "learning_rate": 4.920332894258701e-06, "loss": 0.057, "step": 39605 }, { "epoch": 0.16527442815298213, "grad_norm": 0.6319667049749448, "learning_rate": 4.920022327961756e-06, "loss": 0.0398, "step": 39610 }, { "epoch": 0.1652952908679724, "grad_norm": 1.0820390864473102, "learning_rate": 4.9197118204652496e-06, "loss": 0.0394, "step": 39615 }, { "epoch": 0.16531615358296267, "grad_norm": 1.5093062492061187, "learning_rate": 4.919401371750636e-06, "loss": 0.0335, "step": 39620 }, { "epoch": 0.16533701629795294, "grad_norm": 0.8572010612325869, "learning_rate": 4.919090981799368e-06, "loss": 0.0321, "step": 39625 }, { "epoch": 0.16535787901294322, "grad_norm": 0.5676978043714532, "learning_rate": 4.918780650592909e-06, "loss": 0.0294, "step": 39630 }, { "epoch": 0.16537874172793351, "grad_norm": 0.8809861818138461, "learning_rate": 4.918470378112733e-06, "loss": 0.0333, "step": 39635 }, { "epoch": 0.1653996044429238, "grad_norm": 1.1854790423058237, "learning_rate": 4.918160164340319e-06, "loss": 0.0343, "step": 39640 }, { "epoch": 0.16542046715791406, "grad_norm": 0.7599703701592222, "learning_rate": 4.917850009257155e-06, "loss": 0.0472, "step": 39645 }, { "epoch": 0.16544132987290433, "grad_norm": 0.5653957711562609, "learning_rate": 4.91753991284474e-06, "loss": 0.0234, "step": 39650 }, { "epoch": 0.16546219258789463, "grad_norm": 1.0258015597490786, "learning_rate": 4.9172298750845756e-06, "loss": 0.0314, "step": 39655 }, { "epoch": 0.1654830553028849, "grad_norm": 0.776392705105926, "learning_rate": 4.916919895958177e-06, "loss": 0.0366, "step": 39660 }, { "epoch": 0.16550391801787517, "grad_norm": 0.8350876328586267, "learning_rate": 4.916609975447066e-06, "loss": 0.0518, "step": 39665 }, { "epoch": 0.16552478073286545, "grad_norm": 0.48830756044123574, "learning_rate": 4.916300113532769e-06, "loss": 0.0291, "step": 39670 }, { "epoch": 0.16554564344785572, "grad_norm": 1.4900643419892732, "learning_rate": 4.9159903101968266e-06, "loss": 0.0424, "step": 39675 }, { "epoch": 0.16556650616284602, "grad_norm": 0.816069101842617, "learning_rate": 4.915680565420782e-06, "loss": 0.0384, "step": 39680 }, { "epoch": 0.1655873688778363, "grad_norm": 0.9210915460631605, "learning_rate": 4.915370879186191e-06, "loss": 0.0351, "step": 39685 }, { "epoch": 0.16560823159282656, "grad_norm": 0.7856771353671833, "learning_rate": 4.915061251474613e-06, "loss": 0.0292, "step": 39690 }, { "epoch": 0.16562909430781683, "grad_norm": 0.950018253327748, "learning_rate": 4.91475168226762e-06, "loss": 0.0286, "step": 39695 }, { "epoch": 0.16564995702280713, "grad_norm": 0.9018556952796921, "learning_rate": 4.914442171546789e-06, "loss": 0.0372, "step": 39700 }, { "epoch": 0.1656708197377974, "grad_norm": 1.001589721258725, "learning_rate": 4.914132719293707e-06, "loss": 0.0353, "step": 39705 }, { "epoch": 0.16569168245278768, "grad_norm": 0.9564340692512191, "learning_rate": 4.913823325489969e-06, "loss": 0.0361, "step": 39710 }, { "epoch": 0.16571254516777795, "grad_norm": 1.6487437517430321, "learning_rate": 4.913513990117175e-06, "loss": 0.0382, "step": 39715 }, { "epoch": 0.16573340788276822, "grad_norm": 1.4021490982642466, "learning_rate": 4.913204713156938e-06, "loss": 0.0432, "step": 39720 }, { "epoch": 0.16575427059775852, "grad_norm": 1.485753664896256, "learning_rate": 4.912895494590876e-06, "loss": 0.0367, "step": 39725 }, { "epoch": 0.1657751333127488, "grad_norm": 0.6496825131163367, "learning_rate": 4.912586334400615e-06, "loss": 0.0433, "step": 39730 }, { "epoch": 0.16579599602773906, "grad_norm": 1.0103242693071932, "learning_rate": 4.912277232567791e-06, "loss": 0.0375, "step": 39735 }, { "epoch": 0.16581685874272933, "grad_norm": 0.565792340484431, "learning_rate": 4.911968189074045e-06, "loss": 0.0303, "step": 39740 }, { "epoch": 0.16583772145771963, "grad_norm": 0.7884328201873027, "learning_rate": 4.911659203901031e-06, "loss": 0.0391, "step": 39745 }, { "epoch": 0.1658585841727099, "grad_norm": 0.7295759014990842, "learning_rate": 4.911350277030406e-06, "loss": 0.0292, "step": 39750 }, { "epoch": 0.16587944688770018, "grad_norm": 0.7844790048391228, "learning_rate": 4.911041408443837e-06, "loss": 0.0345, "step": 39755 }, { "epoch": 0.16590030960269045, "grad_norm": 1.4879669069374752, "learning_rate": 4.9107325981229995e-06, "loss": 0.0515, "step": 39760 }, { "epoch": 0.16592117231768072, "grad_norm": 1.1603663556784054, "learning_rate": 4.910423846049578e-06, "loss": 0.0473, "step": 39765 }, { "epoch": 0.16594203503267102, "grad_norm": 1.1762229632800894, "learning_rate": 4.910115152205263e-06, "loss": 0.0468, "step": 39770 }, { "epoch": 0.1659628977476613, "grad_norm": 0.6631041726264545, "learning_rate": 4.909806516571755e-06, "loss": 0.0348, "step": 39775 }, { "epoch": 0.16598376046265156, "grad_norm": 0.5540672508894875, "learning_rate": 4.9094979391307594e-06, "loss": 0.0363, "step": 39780 }, { "epoch": 0.16600462317764184, "grad_norm": 0.6659104731074732, "learning_rate": 4.9091894198639924e-06, "loss": 0.0389, "step": 39785 }, { "epoch": 0.16602548589263214, "grad_norm": 0.6664814784303674, "learning_rate": 4.9088809587531795e-06, "loss": 0.0409, "step": 39790 }, { "epoch": 0.1660463486076224, "grad_norm": 0.45751578202471055, "learning_rate": 4.90857255578005e-06, "loss": 0.0346, "step": 39795 }, { "epoch": 0.16606721132261268, "grad_norm": 1.5444001472086224, "learning_rate": 4.908264210926344e-06, "loss": 0.0459, "step": 39800 }, { "epoch": 0.16608807403760295, "grad_norm": 1.1478593963405357, "learning_rate": 4.907955924173811e-06, "loss": 0.0356, "step": 39805 }, { "epoch": 0.16610893675259322, "grad_norm": 0.5944305462855939, "learning_rate": 4.907647695504204e-06, "loss": 0.0387, "step": 39810 }, { "epoch": 0.16612979946758352, "grad_norm": 0.5975211898917603, "learning_rate": 4.907339524899288e-06, "loss": 0.0323, "step": 39815 }, { "epoch": 0.1661506621825738, "grad_norm": 0.6758227347762792, "learning_rate": 4.907031412340835e-06, "loss": 0.0298, "step": 39820 }, { "epoch": 0.16617152489756407, "grad_norm": 1.0762307752736666, "learning_rate": 4.906723357810626e-06, "loss": 0.0327, "step": 39825 }, { "epoch": 0.16619238761255434, "grad_norm": 0.9112722294282177, "learning_rate": 4.906415361290445e-06, "loss": 0.0349, "step": 39830 }, { "epoch": 0.16621325032754464, "grad_norm": 0.7315222426574434, "learning_rate": 4.906107422762091e-06, "loss": 0.0388, "step": 39835 }, { "epoch": 0.1662341130425349, "grad_norm": 0.4478933312849481, "learning_rate": 4.9057995422073655e-06, "loss": 0.0317, "step": 39840 }, { "epoch": 0.16625497575752518, "grad_norm": 1.0258811430618908, "learning_rate": 4.905491719608082e-06, "loss": 0.0413, "step": 39845 }, { "epoch": 0.16627583847251545, "grad_norm": 0.9444313786173362, "learning_rate": 4.9051839549460605e-06, "loss": 0.0333, "step": 39850 }, { "epoch": 0.16629670118750572, "grad_norm": 1.0801041869092247, "learning_rate": 4.9048762482031275e-06, "loss": 0.0352, "step": 39855 }, { "epoch": 0.16631756390249602, "grad_norm": 0.9469108673596124, "learning_rate": 4.904568599361119e-06, "loss": 0.036, "step": 39860 }, { "epoch": 0.1663384266174863, "grad_norm": 0.6294229615975455, "learning_rate": 4.904261008401878e-06, "loss": 0.0304, "step": 39865 }, { "epoch": 0.16635928933247657, "grad_norm": 1.2850223447486122, "learning_rate": 4.903953475307258e-06, "loss": 0.0458, "step": 39870 }, { "epoch": 0.16638015204746684, "grad_norm": 1.7012713977343237, "learning_rate": 4.903646000059118e-06, "loss": 0.0409, "step": 39875 }, { "epoch": 0.16640101476245714, "grad_norm": 1.1245035409583195, "learning_rate": 4.903338582639322e-06, "loss": 0.0455, "step": 39880 }, { "epoch": 0.1664218774774474, "grad_norm": 2.415179488612274, "learning_rate": 4.903031223029752e-06, "loss": 0.0389, "step": 39885 }, { "epoch": 0.16644274019243768, "grad_norm": 1.0317268503097468, "learning_rate": 4.9027239212122854e-06, "loss": 0.0539, "step": 39890 }, { "epoch": 0.16646360290742795, "grad_norm": 0.6155184365128581, "learning_rate": 4.902416677168817e-06, "loss": 0.0318, "step": 39895 }, { "epoch": 0.16648446562241823, "grad_norm": 1.3836179979429257, "learning_rate": 4.902109490881246e-06, "loss": 0.0342, "step": 39900 }, { "epoch": 0.16650532833740853, "grad_norm": 0.6265550580507316, "learning_rate": 4.901802362331479e-06, "loss": 0.0481, "step": 39905 }, { "epoch": 0.1665261910523988, "grad_norm": 1.2812663669196163, "learning_rate": 4.901495291501431e-06, "loss": 0.0582, "step": 39910 }, { "epoch": 0.16654705376738907, "grad_norm": 0.8134959088973643, "learning_rate": 4.901188278373025e-06, "loss": 0.0345, "step": 39915 }, { "epoch": 0.16656791648237934, "grad_norm": 1.0583797856109094, "learning_rate": 4.900881322928193e-06, "loss": 0.0257, "step": 39920 }, { "epoch": 0.16658877919736964, "grad_norm": 0.6494786991188555, "learning_rate": 4.900574425148872e-06, "loss": 0.0423, "step": 39925 }, { "epoch": 0.1666096419123599, "grad_norm": 1.1940087971045068, "learning_rate": 4.900267585017013e-06, "loss": 0.0369, "step": 39930 }, { "epoch": 0.16663050462735018, "grad_norm": 0.8391544497064626, "learning_rate": 4.899960802514568e-06, "loss": 0.0644, "step": 39935 }, { "epoch": 0.16665136734234046, "grad_norm": 1.0576485898160626, "learning_rate": 4.8996540776234984e-06, "loss": 0.0432, "step": 39940 }, { "epoch": 0.16667223005733073, "grad_norm": 0.9971072611913565, "learning_rate": 4.899347410325777e-06, "loss": 0.0381, "step": 39945 }, { "epoch": 0.16669309277232103, "grad_norm": 1.2650998837845142, "learning_rate": 4.899040800603381e-06, "loss": 0.0373, "step": 39950 }, { "epoch": 0.1667139554873113, "grad_norm": 1.1780085782722338, "learning_rate": 4.8987342484383e-06, "loss": 0.0395, "step": 39955 }, { "epoch": 0.16673481820230157, "grad_norm": 1.015441381582978, "learning_rate": 4.898427753812523e-06, "loss": 0.0408, "step": 39960 }, { "epoch": 0.16675568091729184, "grad_norm": 0.873163401236175, "learning_rate": 4.898121316708058e-06, "loss": 0.036, "step": 39965 }, { "epoch": 0.16677654363228214, "grad_norm": 0.7081068780110051, "learning_rate": 4.89781493710691e-06, "loss": 0.0425, "step": 39970 }, { "epoch": 0.16679740634727241, "grad_norm": 0.6979232988616156, "learning_rate": 4.8975086149911e-06, "loss": 0.0263, "step": 39975 }, { "epoch": 0.1668182690622627, "grad_norm": 1.4360864289150606, "learning_rate": 4.897202350342653e-06, "loss": 0.0445, "step": 39980 }, { "epoch": 0.16683913177725296, "grad_norm": 1.190204450286391, "learning_rate": 4.896896143143603e-06, "loss": 0.0392, "step": 39985 }, { "epoch": 0.16685999449224323, "grad_norm": 1.269992550980971, "learning_rate": 4.8965899933759914e-06, "loss": 0.0345, "step": 39990 }, { "epoch": 0.16688085720723353, "grad_norm": 1.0946129531849729, "learning_rate": 4.896283901021867e-06, "loss": 0.0361, "step": 39995 }, { "epoch": 0.1669017199222238, "grad_norm": 1.1324089631971435, "learning_rate": 4.895977866063289e-06, "loss": 0.0436, "step": 40000 }, { "epoch": 0.16692258263721407, "grad_norm": 1.0477117994119236, "learning_rate": 4.895671888482319e-06, "loss": 0.0391, "step": 40005 }, { "epoch": 0.16694344535220434, "grad_norm": 0.8669400680051973, "learning_rate": 4.8953659682610335e-06, "loss": 0.0414, "step": 40010 }, { "epoch": 0.16696430806719464, "grad_norm": 1.5336875247055968, "learning_rate": 4.895060105381511e-06, "loss": 0.0394, "step": 40015 }, { "epoch": 0.16698517078218492, "grad_norm": 1.0266792116380434, "learning_rate": 4.8947542998258415e-06, "loss": 0.0424, "step": 40020 }, { "epoch": 0.1670060334971752, "grad_norm": 1.102761624966984, "learning_rate": 4.894448551576121e-06, "loss": 0.0482, "step": 40025 }, { "epoch": 0.16702689621216546, "grad_norm": 0.8173817099732542, "learning_rate": 4.894142860614453e-06, "loss": 0.0315, "step": 40030 }, { "epoch": 0.16704775892715573, "grad_norm": 1.3883695472729338, "learning_rate": 4.8938372269229514e-06, "loss": 0.0391, "step": 40035 }, { "epoch": 0.16706862164214603, "grad_norm": 0.8096921648264394, "learning_rate": 4.8935316504837355e-06, "loss": 0.0434, "step": 40040 }, { "epoch": 0.1670894843571363, "grad_norm": 1.1715351800445828, "learning_rate": 4.893226131278932e-06, "loss": 0.0419, "step": 40045 }, { "epoch": 0.16711034707212657, "grad_norm": 1.2197789831567705, "learning_rate": 4.892920669290678e-06, "loss": 0.0408, "step": 40050 }, { "epoch": 0.16713120978711685, "grad_norm": 0.6819023078026757, "learning_rate": 4.892615264501116e-06, "loss": 0.0389, "step": 40055 }, { "epoch": 0.16715207250210715, "grad_norm": 0.5110883589602845, "learning_rate": 4.892309916892396e-06, "loss": 0.0389, "step": 40060 }, { "epoch": 0.16717293521709742, "grad_norm": 0.8619600767804223, "learning_rate": 4.892004626446681e-06, "loss": 0.0472, "step": 40065 }, { "epoch": 0.1671937979320877, "grad_norm": 1.2943879691135423, "learning_rate": 4.891699393146133e-06, "loss": 0.0479, "step": 40070 }, { "epoch": 0.16721466064707796, "grad_norm": 1.067276558838963, "learning_rate": 4.891394216972928e-06, "loss": 0.0371, "step": 40075 }, { "epoch": 0.16723552336206823, "grad_norm": 0.702077384981716, "learning_rate": 4.8910890979092494e-06, "loss": 0.0305, "step": 40080 }, { "epoch": 0.16725638607705853, "grad_norm": 0.6584247307320488, "learning_rate": 4.890784035937287e-06, "loss": 0.047, "step": 40085 }, { "epoch": 0.1672772487920488, "grad_norm": 0.9091550563633953, "learning_rate": 4.8904790310392395e-06, "loss": 0.0394, "step": 40090 }, { "epoch": 0.16729811150703908, "grad_norm": 0.9820621152896326, "learning_rate": 4.8901740831973116e-06, "loss": 0.0361, "step": 40095 }, { "epoch": 0.16731897422202935, "grad_norm": 1.3356247584875405, "learning_rate": 4.889869192393716e-06, "loss": 0.0436, "step": 40100 }, { "epoch": 0.16733983693701965, "grad_norm": 1.007104383122374, "learning_rate": 4.889564358610675e-06, "loss": 0.0376, "step": 40105 }, { "epoch": 0.16736069965200992, "grad_norm": 0.9649258206555744, "learning_rate": 4.889259581830416e-06, "loss": 0.0315, "step": 40110 }, { "epoch": 0.1673815623670002, "grad_norm": 0.57732867351345, "learning_rate": 4.888954862035178e-06, "loss": 0.047, "step": 40115 }, { "epoch": 0.16740242508199046, "grad_norm": 0.7823686218347665, "learning_rate": 4.8886501992072035e-06, "loss": 0.0325, "step": 40120 }, { "epoch": 0.16742328779698074, "grad_norm": 1.3587332380053616, "learning_rate": 4.888345593328747e-06, "loss": 0.0487, "step": 40125 }, { "epoch": 0.16744415051197103, "grad_norm": 7.915515878236124, "learning_rate": 4.888041044382065e-06, "loss": 0.0368, "step": 40130 }, { "epoch": 0.1674650132269613, "grad_norm": 1.1189021827945729, "learning_rate": 4.887736552349428e-06, "loss": 0.0384, "step": 40135 }, { "epoch": 0.16748587594195158, "grad_norm": 1.8150641465027575, "learning_rate": 4.8874321172131105e-06, "loss": 0.0372, "step": 40140 }, { "epoch": 0.16750673865694185, "grad_norm": 0.6213435701961787, "learning_rate": 4.887127738955396e-06, "loss": 0.0501, "step": 40145 }, { "epoch": 0.16752760137193215, "grad_norm": 0.9334266073905327, "learning_rate": 4.886823417558574e-06, "loss": 0.0391, "step": 40150 }, { "epoch": 0.16754846408692242, "grad_norm": 1.5724775860097289, "learning_rate": 4.886519153004946e-06, "loss": 0.0412, "step": 40155 }, { "epoch": 0.1675693268019127, "grad_norm": 0.8406942066628017, "learning_rate": 4.886214945276814e-06, "loss": 0.0415, "step": 40160 }, { "epoch": 0.16759018951690297, "grad_norm": 0.8669594968103168, "learning_rate": 4.885910794356495e-06, "loss": 0.0356, "step": 40165 }, { "epoch": 0.16761105223189324, "grad_norm": 1.2472984589298253, "learning_rate": 4.8856067002263116e-06, "loss": 0.0422, "step": 40170 }, { "epoch": 0.16763191494688354, "grad_norm": 0.6759004330878563, "learning_rate": 4.885302662868591e-06, "loss": 0.0401, "step": 40175 }, { "epoch": 0.1676527776618738, "grad_norm": 1.4633255058165195, "learning_rate": 4.884998682265669e-06, "loss": 0.0418, "step": 40180 }, { "epoch": 0.16767364037686408, "grad_norm": 1.0642360371843826, "learning_rate": 4.884694758399894e-06, "loss": 0.0537, "step": 40185 }, { "epoch": 0.16769450309185435, "grad_norm": 0.9189257754769813, "learning_rate": 4.884390891253616e-06, "loss": 0.0313, "step": 40190 }, { "epoch": 0.16771536580684465, "grad_norm": 1.070901061773523, "learning_rate": 4.884087080809197e-06, "loss": 0.036, "step": 40195 }, { "epoch": 0.16773622852183492, "grad_norm": 1.0028476833124491, "learning_rate": 4.883783327049003e-06, "loss": 0.0471, "step": 40200 }, { "epoch": 0.1677570912368252, "grad_norm": 0.90180223373494, "learning_rate": 4.883479629955411e-06, "loss": 0.0362, "step": 40205 }, { "epoch": 0.16777795395181547, "grad_norm": 0.8987706095924222, "learning_rate": 4.8831759895108025e-06, "loss": 0.0385, "step": 40210 }, { "epoch": 0.16779881666680574, "grad_norm": 1.0081932602297075, "learning_rate": 4.88287240569757e-06, "loss": 0.0615, "step": 40215 }, { "epoch": 0.16781967938179604, "grad_norm": 0.8771717398566424, "learning_rate": 4.882568878498111e-06, "loss": 0.0384, "step": 40220 }, { "epoch": 0.1678405420967863, "grad_norm": 1.2299521673781635, "learning_rate": 4.882265407894832e-06, "loss": 0.0326, "step": 40225 }, { "epoch": 0.16786140481177658, "grad_norm": 0.6138711350069057, "learning_rate": 4.881961993870147e-06, "loss": 0.0477, "step": 40230 }, { "epoch": 0.16788226752676685, "grad_norm": 1.0212308450835776, "learning_rate": 4.881658636406478e-06, "loss": 0.0471, "step": 40235 }, { "epoch": 0.16790313024175715, "grad_norm": 1.0614364908014198, "learning_rate": 4.881355335486253e-06, "loss": 0.0484, "step": 40240 }, { "epoch": 0.16792399295674743, "grad_norm": 0.6700378887465208, "learning_rate": 4.881052091091908e-06, "loss": 0.0319, "step": 40245 }, { "epoch": 0.1679448556717377, "grad_norm": 0.7176084654706909, "learning_rate": 4.88074890320589e-06, "loss": 0.0312, "step": 40250 }, { "epoch": 0.16796571838672797, "grad_norm": 0.805718825857078, "learning_rate": 4.8804457718106495e-06, "loss": 0.0392, "step": 40255 }, { "epoch": 0.16798658110171824, "grad_norm": 0.9627488850625229, "learning_rate": 4.880142696888646e-06, "loss": 0.0474, "step": 40260 }, { "epoch": 0.16800744381670854, "grad_norm": 0.8356297167520031, "learning_rate": 4.879839678422347e-06, "loss": 0.0347, "step": 40265 }, { "epoch": 0.1680283065316988, "grad_norm": 0.8050434022109875, "learning_rate": 4.879536716394227e-06, "loss": 0.0362, "step": 40270 }, { "epoch": 0.16804916924668908, "grad_norm": 0.8362643937024906, "learning_rate": 4.87923381078677e-06, "loss": 0.0336, "step": 40275 }, { "epoch": 0.16807003196167936, "grad_norm": 1.244997443115073, "learning_rate": 4.878930961582464e-06, "loss": 0.0333, "step": 40280 }, { "epoch": 0.16809089467666966, "grad_norm": 0.9279804862906825, "learning_rate": 4.8786281687638084e-06, "loss": 0.0354, "step": 40285 }, { "epoch": 0.16811175739165993, "grad_norm": 0.8907501385757826, "learning_rate": 4.8783254323133085e-06, "loss": 0.0402, "step": 40290 }, { "epoch": 0.1681326201066502, "grad_norm": 0.7427573832002601, "learning_rate": 4.878022752213476e-06, "loss": 0.042, "step": 40295 }, { "epoch": 0.16815348282164047, "grad_norm": 1.1805974457202477, "learning_rate": 4.877720128446832e-06, "loss": 0.0292, "step": 40300 }, { "epoch": 0.16817434553663074, "grad_norm": 0.6992970030864155, "learning_rate": 4.877417560995904e-06, "loss": 0.0395, "step": 40305 }, { "epoch": 0.16819520825162104, "grad_norm": 0.870363816672475, "learning_rate": 4.87711504984323e-06, "loss": 0.0378, "step": 40310 }, { "epoch": 0.16821607096661131, "grad_norm": 1.02471403814034, "learning_rate": 4.8768125949713505e-06, "loss": 0.0461, "step": 40315 }, { "epoch": 0.16823693368160159, "grad_norm": 1.232111270605462, "learning_rate": 4.876510196362819e-06, "loss": 0.0487, "step": 40320 }, { "epoch": 0.16825779639659186, "grad_norm": 0.5592088815227539, "learning_rate": 4.876207854000192e-06, "loss": 0.0292, "step": 40325 }, { "epoch": 0.16827865911158216, "grad_norm": 2.2737858423268538, "learning_rate": 4.875905567866036e-06, "loss": 0.0423, "step": 40330 }, { "epoch": 0.16829952182657243, "grad_norm": 0.6577616108165799, "learning_rate": 4.875603337942925e-06, "loss": 0.0365, "step": 40335 }, { "epoch": 0.1683203845415627, "grad_norm": 0.9053955408403174, "learning_rate": 4.875301164213438e-06, "loss": 0.0347, "step": 40340 }, { "epoch": 0.16834124725655297, "grad_norm": 1.036114084758403, "learning_rate": 4.874999046660167e-06, "loss": 0.0292, "step": 40345 }, { "epoch": 0.16836210997154324, "grad_norm": 1.3434146934757778, "learning_rate": 4.874696985265707e-06, "loss": 0.04, "step": 40350 }, { "epoch": 0.16838297268653354, "grad_norm": 4.7422772562534705, "learning_rate": 4.874394980012661e-06, "loss": 0.0442, "step": 40355 }, { "epoch": 0.16840383540152382, "grad_norm": 0.7242600021185617, "learning_rate": 4.874093030883641e-06, "loss": 0.0373, "step": 40360 }, { "epoch": 0.1684246981165141, "grad_norm": 0.7725304951309167, "learning_rate": 4.873791137861265e-06, "loss": 0.0388, "step": 40365 }, { "epoch": 0.16844556083150436, "grad_norm": 0.7712405653253858, "learning_rate": 4.873489300928161e-06, "loss": 0.0344, "step": 40370 }, { "epoch": 0.16846642354649466, "grad_norm": 1.0988806572907162, "learning_rate": 4.87318752006696e-06, "loss": 0.0468, "step": 40375 }, { "epoch": 0.16848728626148493, "grad_norm": 0.9263194168147889, "learning_rate": 4.872885795260307e-06, "loss": 0.0362, "step": 40380 }, { "epoch": 0.1685081489764752, "grad_norm": 0.9016143584705998, "learning_rate": 4.872584126490849e-06, "loss": 0.0344, "step": 40385 }, { "epoch": 0.16852901169146547, "grad_norm": 0.7661325162254455, "learning_rate": 4.872282513741243e-06, "loss": 0.0355, "step": 40390 }, { "epoch": 0.16854987440645575, "grad_norm": 1.593563164735578, "learning_rate": 4.8719809569941525e-06, "loss": 0.041, "step": 40395 }, { "epoch": 0.16857073712144605, "grad_norm": 0.881604615166125, "learning_rate": 4.871679456232251e-06, "loss": 0.0365, "step": 40400 }, { "epoch": 0.16859159983643632, "grad_norm": 1.014819074804279, "learning_rate": 4.871378011438214e-06, "loss": 0.0226, "step": 40405 }, { "epoch": 0.1686124625514266, "grad_norm": 0.6191497123850195, "learning_rate": 4.871076622594732e-06, "loss": 0.0278, "step": 40410 }, { "epoch": 0.16863332526641686, "grad_norm": 1.0962891223976192, "learning_rate": 4.8707752896844954e-06, "loss": 0.0301, "step": 40415 }, { "epoch": 0.16865418798140716, "grad_norm": 0.8571480633347432, "learning_rate": 4.8704740126902086e-06, "loss": 0.0376, "step": 40420 }, { "epoch": 0.16867505069639743, "grad_norm": 1.1414584515923993, "learning_rate": 4.870172791594579e-06, "loss": 0.0275, "step": 40425 }, { "epoch": 0.1686959134113877, "grad_norm": 0.9778346047780173, "learning_rate": 4.869871626380323e-06, "loss": 0.0307, "step": 40430 }, { "epoch": 0.16871677612637798, "grad_norm": 0.7580488777261791, "learning_rate": 4.869570517030165e-06, "loss": 0.0381, "step": 40435 }, { "epoch": 0.16873763884136825, "grad_norm": 0.9407217547684551, "learning_rate": 4.869269463526837e-06, "loss": 0.0404, "step": 40440 }, { "epoch": 0.16875850155635855, "grad_norm": 1.0285211808066583, "learning_rate": 4.868968465853078e-06, "loss": 0.0335, "step": 40445 }, { "epoch": 0.16877936427134882, "grad_norm": 2.710766616041418, "learning_rate": 4.868667523991632e-06, "loss": 0.0645, "step": 40450 }, { "epoch": 0.1688002269863391, "grad_norm": 0.9338457764509124, "learning_rate": 4.868366637925256e-06, "loss": 0.0321, "step": 40455 }, { "epoch": 0.16882108970132936, "grad_norm": 0.8722781013791646, "learning_rate": 4.86806580763671e-06, "loss": 0.0303, "step": 40460 }, { "epoch": 0.16884195241631966, "grad_norm": 0.8568074152203038, "learning_rate": 4.867765033108762e-06, "loss": 0.0342, "step": 40465 }, { "epoch": 0.16886281513130993, "grad_norm": 1.13006659587554, "learning_rate": 4.86746431432419e-06, "loss": 0.0263, "step": 40470 }, { "epoch": 0.1688836778463002, "grad_norm": 1.2454965201962434, "learning_rate": 4.867163651265776e-06, "loss": 0.0309, "step": 40475 }, { "epoch": 0.16890454056129048, "grad_norm": 1.0836330080614283, "learning_rate": 4.8668630439163124e-06, "loss": 0.035, "step": 40480 }, { "epoch": 0.16892540327628075, "grad_norm": 0.8071374789578759, "learning_rate": 4.866562492258596e-06, "loss": 0.0377, "step": 40485 }, { "epoch": 0.16894626599127105, "grad_norm": 2.882819320203799, "learning_rate": 4.866261996275435e-06, "loss": 0.0373, "step": 40490 }, { "epoch": 0.16896712870626132, "grad_norm": 0.48339453378055625, "learning_rate": 4.865961555949642e-06, "loss": 0.0347, "step": 40495 }, { "epoch": 0.1689879914212516, "grad_norm": 0.9402532583109277, "learning_rate": 4.865661171264036e-06, "loss": 0.0356, "step": 40500 }, { "epoch": 0.16900885413624187, "grad_norm": 0.8723174238780242, "learning_rate": 4.865360842201448e-06, "loss": 0.0607, "step": 40505 }, { "epoch": 0.16902971685123216, "grad_norm": 1.236201240010697, "learning_rate": 4.865060568744712e-06, "loss": 0.0453, "step": 40510 }, { "epoch": 0.16905057956622244, "grad_norm": 0.7373882970753298, "learning_rate": 4.864760350876671e-06, "loss": 0.0436, "step": 40515 }, { "epoch": 0.1690714422812127, "grad_norm": 1.0619323051042637, "learning_rate": 4.8644601885801775e-06, "loss": 0.0314, "step": 40520 }, { "epoch": 0.16909230499620298, "grad_norm": 0.9786226399408494, "learning_rate": 4.864160081838088e-06, "loss": 0.0435, "step": 40525 }, { "epoch": 0.16911316771119325, "grad_norm": 1.0551067604180813, "learning_rate": 4.8638600306332674e-06, "loss": 0.0331, "step": 40530 }, { "epoch": 0.16913403042618355, "grad_norm": 0.9610001291109372, "learning_rate": 4.86356003494859e-06, "loss": 0.0378, "step": 40535 }, { "epoch": 0.16915489314117382, "grad_norm": 0.5889593283842961, "learning_rate": 4.863260094766933e-06, "loss": 0.0288, "step": 40540 }, { "epoch": 0.1691757558561641, "grad_norm": 0.976830901015817, "learning_rate": 4.8629602100711865e-06, "loss": 0.0449, "step": 40545 }, { "epoch": 0.16919661857115437, "grad_norm": 1.0178273791163468, "learning_rate": 4.862660380844245e-06, "loss": 0.0355, "step": 40550 }, { "epoch": 0.16921748128614467, "grad_norm": 0.7951530293731472, "learning_rate": 4.862360607069009e-06, "loss": 0.0359, "step": 40555 }, { "epoch": 0.16923834400113494, "grad_norm": 1.2336638302613268, "learning_rate": 4.862060888728391e-06, "loss": 0.0343, "step": 40560 }, { "epoch": 0.1692592067161252, "grad_norm": 1.9424793414567307, "learning_rate": 4.861761225805306e-06, "loss": 0.032, "step": 40565 }, { "epoch": 0.16928006943111548, "grad_norm": 1.356534061066047, "learning_rate": 4.86146161828268e-06, "loss": 0.0533, "step": 40570 }, { "epoch": 0.16930093214610575, "grad_norm": 0.8479014539683356, "learning_rate": 4.861162066143442e-06, "loss": 0.0375, "step": 40575 }, { "epoch": 0.16932179486109605, "grad_norm": 1.3328530377709074, "learning_rate": 4.860862569370532e-06, "loss": 0.0373, "step": 40580 }, { "epoch": 0.16934265757608633, "grad_norm": 0.9655746701364569, "learning_rate": 4.860563127946899e-06, "loss": 0.0349, "step": 40585 }, { "epoch": 0.1693635202910766, "grad_norm": 1.1310160842951118, "learning_rate": 4.860263741855496e-06, "loss": 0.0324, "step": 40590 }, { "epoch": 0.16938438300606687, "grad_norm": 0.9993711024920549, "learning_rate": 4.859964411079282e-06, "loss": 0.0347, "step": 40595 }, { "epoch": 0.16940524572105717, "grad_norm": 1.2917441607481617, "learning_rate": 4.859665135601226e-06, "loss": 0.0313, "step": 40600 }, { "epoch": 0.16942610843604744, "grad_norm": 2.1412927605971865, "learning_rate": 4.859365915404306e-06, "loss": 0.0412, "step": 40605 }, { "epoch": 0.1694469711510377, "grad_norm": 0.8056690548644394, "learning_rate": 4.859066750471503e-06, "loss": 0.0371, "step": 40610 }, { "epoch": 0.16946783386602798, "grad_norm": 0.7228062314167781, "learning_rate": 4.858767640785809e-06, "loss": 0.0424, "step": 40615 }, { "epoch": 0.16948869658101826, "grad_norm": 1.21302233724728, "learning_rate": 4.85846858633022e-06, "loss": 0.0345, "step": 40620 }, { "epoch": 0.16950955929600856, "grad_norm": 0.8065668208273485, "learning_rate": 4.8581695870877435e-06, "loss": 0.0318, "step": 40625 }, { "epoch": 0.16953042201099883, "grad_norm": 0.7739421203462626, "learning_rate": 4.857870643041391e-06, "loss": 0.0368, "step": 40630 }, { "epoch": 0.1695512847259891, "grad_norm": 1.0994407757644076, "learning_rate": 4.857571754174182e-06, "loss": 0.0308, "step": 40635 }, { "epoch": 0.16957214744097937, "grad_norm": 1.7887357819593588, "learning_rate": 4.857272920469143e-06, "loss": 0.0474, "step": 40640 }, { "epoch": 0.16959301015596967, "grad_norm": 0.8854689992671431, "learning_rate": 4.85697414190931e-06, "loss": 0.0423, "step": 40645 }, { "epoch": 0.16961387287095994, "grad_norm": 0.9042765351703677, "learning_rate": 4.856675418477724e-06, "loss": 0.0316, "step": 40650 }, { "epoch": 0.1696347355859502, "grad_norm": 1.0030225719434966, "learning_rate": 4.856376750157434e-06, "loss": 0.04, "step": 40655 }, { "epoch": 0.16965559830094049, "grad_norm": 0.9509224909883098, "learning_rate": 4.856078136931496e-06, "loss": 0.0335, "step": 40660 }, { "epoch": 0.16967646101593076, "grad_norm": 1.2733576555467736, "learning_rate": 4.855779578782974e-06, "loss": 0.0391, "step": 40665 }, { "epoch": 0.16969732373092106, "grad_norm": 0.7887458883984858, "learning_rate": 4.855481075694939e-06, "loss": 0.0428, "step": 40670 }, { "epoch": 0.16971818644591133, "grad_norm": 1.2742730724595157, "learning_rate": 4.85518262765047e-06, "loss": 0.0411, "step": 40675 }, { "epoch": 0.1697390491609016, "grad_norm": 0.3717686485706474, "learning_rate": 4.8548842346326515e-06, "loss": 0.0307, "step": 40680 }, { "epoch": 0.16975991187589187, "grad_norm": 0.9425353155283777, "learning_rate": 4.8545858966245756e-06, "loss": 0.0309, "step": 40685 }, { "epoch": 0.16978077459088217, "grad_norm": 1.120248071859966, "learning_rate": 4.854287613609343e-06, "loss": 0.0326, "step": 40690 }, { "epoch": 0.16980163730587244, "grad_norm": 0.9179557138915695, "learning_rate": 4.853989385570061e-06, "loss": 0.0321, "step": 40695 }, { "epoch": 0.16982250002086272, "grad_norm": 0.9044405547326121, "learning_rate": 4.853691212489845e-06, "loss": 0.027, "step": 40700 }, { "epoch": 0.169843362735853, "grad_norm": 0.4680204119175515, "learning_rate": 4.853393094351815e-06, "loss": 0.0327, "step": 40705 }, { "epoch": 0.16986422545084326, "grad_norm": 0.7409237045450555, "learning_rate": 4.853095031139103e-06, "loss": 0.0392, "step": 40710 }, { "epoch": 0.16988508816583356, "grad_norm": 1.2127503160094868, "learning_rate": 4.8527970228348415e-06, "loss": 0.0445, "step": 40715 }, { "epoch": 0.16990595088082383, "grad_norm": 0.8968101175427428, "learning_rate": 4.852499069422178e-06, "loss": 0.0259, "step": 40720 }, { "epoch": 0.1699268135958141, "grad_norm": 0.9153929479952584, "learning_rate": 4.852201170884259e-06, "loss": 0.0441, "step": 40725 }, { "epoch": 0.16994767631080437, "grad_norm": 0.7039296728253714, "learning_rate": 4.851903327204247e-06, "loss": 0.0291, "step": 40730 }, { "epoch": 0.16996853902579467, "grad_norm": 0.7798247125620249, "learning_rate": 4.851605538365304e-06, "loss": 0.0443, "step": 40735 }, { "epoch": 0.16998940174078495, "grad_norm": 0.6223736083668467, "learning_rate": 4.851307804350604e-06, "loss": 0.0404, "step": 40740 }, { "epoch": 0.17001026445577522, "grad_norm": 1.276702002553064, "learning_rate": 4.851010125143327e-06, "loss": 0.0311, "step": 40745 }, { "epoch": 0.1700311271707655, "grad_norm": 1.2902753359051289, "learning_rate": 4.850712500726659e-06, "loss": 0.0392, "step": 40750 }, { "epoch": 0.17005198988575576, "grad_norm": 1.010765325498752, "learning_rate": 4.8504149310837955e-06, "loss": 0.0531, "step": 40755 }, { "epoch": 0.17007285260074606, "grad_norm": 0.6277876786539663, "learning_rate": 4.850117416197937e-06, "loss": 0.0367, "step": 40760 }, { "epoch": 0.17009371531573633, "grad_norm": 0.8999682117898631, "learning_rate": 4.849819956052292e-06, "loss": 0.0402, "step": 40765 }, { "epoch": 0.1701145780307266, "grad_norm": 0.8981744978224978, "learning_rate": 4.849522550630077e-06, "loss": 0.031, "step": 40770 }, { "epoch": 0.17013544074571688, "grad_norm": 0.7237784234583375, "learning_rate": 4.849225199914514e-06, "loss": 0.0313, "step": 40775 }, { "epoch": 0.17015630346070718, "grad_norm": 1.3245131516748352, "learning_rate": 4.848927903888833e-06, "loss": 0.0358, "step": 40780 }, { "epoch": 0.17017716617569745, "grad_norm": 0.73360793297864, "learning_rate": 4.848630662536274e-06, "loss": 0.0244, "step": 40785 }, { "epoch": 0.17019802889068772, "grad_norm": 1.1741709852330682, "learning_rate": 4.848333475840078e-06, "loss": 0.036, "step": 40790 }, { "epoch": 0.170218891605678, "grad_norm": 0.6072610934332, "learning_rate": 4.848036343783501e-06, "loss": 0.0288, "step": 40795 }, { "epoch": 0.17023975432066826, "grad_norm": 0.690369699490428, "learning_rate": 4.847739266349798e-06, "loss": 0.0324, "step": 40800 }, { "epoch": 0.17026061703565856, "grad_norm": 1.3241967050359926, "learning_rate": 4.847442243522237e-06, "loss": 0.036, "step": 40805 }, { "epoch": 0.17028147975064883, "grad_norm": 1.1696459976850535, "learning_rate": 4.847145275284091e-06, "loss": 0.0495, "step": 40810 }, { "epoch": 0.1703023424656391, "grad_norm": 0.9859445065208299, "learning_rate": 4.846848361618643e-06, "loss": 0.0451, "step": 40815 }, { "epoch": 0.17032320518062938, "grad_norm": 0.7590885847781822, "learning_rate": 4.846551502509175e-06, "loss": 0.0353, "step": 40820 }, { "epoch": 0.17034406789561968, "grad_norm": 1.3042875233370377, "learning_rate": 4.8462546979389885e-06, "loss": 0.0345, "step": 40825 }, { "epoch": 0.17036493061060995, "grad_norm": 0.49957405457980186, "learning_rate": 4.84595794789138e-06, "loss": 0.0302, "step": 40830 }, { "epoch": 0.17038579332560022, "grad_norm": 0.5661855205153679, "learning_rate": 4.8456612523496625e-06, "loss": 0.0393, "step": 40835 }, { "epoch": 0.1704066560405905, "grad_norm": 1.5774418331358724, "learning_rate": 4.845364611297151e-06, "loss": 0.0518, "step": 40840 }, { "epoch": 0.17042751875558076, "grad_norm": 0.7375542642625565, "learning_rate": 4.845068024717167e-06, "loss": 0.0331, "step": 40845 }, { "epoch": 0.17044838147057106, "grad_norm": 0.5762599941864245, "learning_rate": 4.844771492593046e-06, "loss": 0.0293, "step": 40850 }, { "epoch": 0.17046924418556134, "grad_norm": 0.6125391561887088, "learning_rate": 4.84447501490812e-06, "loss": 0.0352, "step": 40855 }, { "epoch": 0.1704901069005516, "grad_norm": 0.6379190353280756, "learning_rate": 4.844178591645737e-06, "loss": 0.0332, "step": 40860 }, { "epoch": 0.17051096961554188, "grad_norm": 1.5126324693958733, "learning_rate": 4.8438822227892505e-06, "loss": 0.0339, "step": 40865 }, { "epoch": 0.17053183233053218, "grad_norm": 5.317436422704404, "learning_rate": 4.8435859083220155e-06, "loss": 0.0391, "step": 40870 }, { "epoch": 0.17055269504552245, "grad_norm": 0.6916306081727084, "learning_rate": 4.843289648227402e-06, "loss": 0.0364, "step": 40875 }, { "epoch": 0.17057355776051272, "grad_norm": 0.7921682956638759, "learning_rate": 4.842993442488782e-06, "loss": 0.0346, "step": 40880 }, { "epoch": 0.170594420475503, "grad_norm": 0.7629940729557945, "learning_rate": 4.8426972910895355e-06, "loss": 0.0309, "step": 40885 }, { "epoch": 0.17061528319049327, "grad_norm": 1.5872585786307436, "learning_rate": 4.8424011940130505e-06, "loss": 0.0438, "step": 40890 }, { "epoch": 0.17063614590548357, "grad_norm": 1.2226243442834102, "learning_rate": 4.842105151242722e-06, "loss": 0.04, "step": 40895 }, { "epoch": 0.17065700862047384, "grad_norm": 0.8579022240910853, "learning_rate": 4.841809162761953e-06, "loss": 0.0253, "step": 40900 }, { "epoch": 0.1706778713354641, "grad_norm": 1.0270441178999417, "learning_rate": 4.841513228554149e-06, "loss": 0.0331, "step": 40905 }, { "epoch": 0.17069873405045438, "grad_norm": 1.1327406502667157, "learning_rate": 4.841217348602729e-06, "loss": 0.0376, "step": 40910 }, { "epoch": 0.17071959676544468, "grad_norm": 0.6801196968341886, "learning_rate": 4.840921522891116e-06, "loss": 0.032, "step": 40915 }, { "epoch": 0.17074045948043495, "grad_norm": 0.558479005644025, "learning_rate": 4.840625751402739e-06, "loss": 0.0299, "step": 40920 }, { "epoch": 0.17076132219542522, "grad_norm": 0.974763498861238, "learning_rate": 4.840330034121037e-06, "loss": 0.0377, "step": 40925 }, { "epoch": 0.1707821849104155, "grad_norm": 1.4045412378072206, "learning_rate": 4.840034371029453e-06, "loss": 0.0381, "step": 40930 }, { "epoch": 0.17080304762540577, "grad_norm": 0.9648911223065061, "learning_rate": 4.839738762111438e-06, "loss": 0.0504, "step": 40935 }, { "epoch": 0.17082391034039607, "grad_norm": 0.8464192825644, "learning_rate": 4.839443207350452e-06, "loss": 0.0317, "step": 40940 }, { "epoch": 0.17084477305538634, "grad_norm": 0.9914700107042417, "learning_rate": 4.8391477067299595e-06, "loss": 0.038, "step": 40945 }, { "epoch": 0.1708656357703766, "grad_norm": 0.8132589263583767, "learning_rate": 4.838852260233433e-06, "loss": 0.0357, "step": 40950 }, { "epoch": 0.17088649848536688, "grad_norm": 1.1369707877260367, "learning_rate": 4.838556867844355e-06, "loss": 0.0372, "step": 40955 }, { "epoch": 0.17090736120035718, "grad_norm": 1.242897061128524, "learning_rate": 4.83826152954621e-06, "loss": 0.0422, "step": 40960 }, { "epoch": 0.17092822391534745, "grad_norm": 0.5334912133877889, "learning_rate": 4.83796624532249e-06, "loss": 0.0402, "step": 40965 }, { "epoch": 0.17094908663033773, "grad_norm": 0.879178775381688, "learning_rate": 4.8376710151567e-06, "loss": 0.0415, "step": 40970 }, { "epoch": 0.170969949345328, "grad_norm": 0.6024955556902076, "learning_rate": 4.837375839032345e-06, "loss": 0.034, "step": 40975 }, { "epoch": 0.17099081206031827, "grad_norm": 1.043474955757502, "learning_rate": 4.837080716932942e-06, "loss": 0.0349, "step": 40980 }, { "epoch": 0.17101167477530857, "grad_norm": 1.3633162630995028, "learning_rate": 4.836785648842012e-06, "loss": 0.0424, "step": 40985 }, { "epoch": 0.17103253749029884, "grad_norm": 1.0175627817301343, "learning_rate": 4.836490634743083e-06, "loss": 0.0314, "step": 40990 }, { "epoch": 0.1710534002052891, "grad_norm": 0.6417650967786686, "learning_rate": 4.836195674619693e-06, "loss": 0.0354, "step": 40995 }, { "epoch": 0.17107426292027939, "grad_norm": 1.112027938694224, "learning_rate": 4.8359007684553835e-06, "loss": 0.0327, "step": 41000 }, { "epoch": 0.17109512563526968, "grad_norm": 0.7676464256489882, "learning_rate": 4.835605916233708e-06, "loss": 0.0362, "step": 41005 }, { "epoch": 0.17111598835025996, "grad_norm": 1.9565439818189563, "learning_rate": 4.835311117938219e-06, "loss": 0.0285, "step": 41010 }, { "epoch": 0.17113685106525023, "grad_norm": 1.0325537905265285, "learning_rate": 4.8350163735524835e-06, "loss": 0.0348, "step": 41015 }, { "epoch": 0.1711577137802405, "grad_norm": 1.1793261794871468, "learning_rate": 4.834721683060072e-06, "loss": 0.037, "step": 41020 }, { "epoch": 0.17117857649523077, "grad_norm": 1.0477537841038835, "learning_rate": 4.834427046444563e-06, "loss": 0.0477, "step": 41025 }, { "epoch": 0.17119943921022107, "grad_norm": 0.8201005869024046, "learning_rate": 4.834132463689541e-06, "loss": 0.033, "step": 41030 }, { "epoch": 0.17122030192521134, "grad_norm": 1.1689846662466232, "learning_rate": 4.8338379347786e-06, "loss": 0.0306, "step": 41035 }, { "epoch": 0.17124116464020162, "grad_norm": 0.7424356368767512, "learning_rate": 4.8335434596953376e-06, "loss": 0.0371, "step": 41040 }, { "epoch": 0.1712620273551919, "grad_norm": 0.8168390437364773, "learning_rate": 4.83324903842336e-06, "loss": 0.0393, "step": 41045 }, { "epoch": 0.1712828900701822, "grad_norm": 1.34919279975969, "learning_rate": 4.832954670946281e-06, "loss": 0.034, "step": 41050 }, { "epoch": 0.17130375278517246, "grad_norm": 1.9864358381214697, "learning_rate": 4.832660357247721e-06, "loss": 0.0382, "step": 41055 }, { "epoch": 0.17132461550016273, "grad_norm": 0.5088629566083948, "learning_rate": 4.8323660973113065e-06, "loss": 0.0332, "step": 41060 }, { "epoch": 0.171345478215153, "grad_norm": 1.8609783507527407, "learning_rate": 4.832071891120672e-06, "loss": 0.0407, "step": 41065 }, { "epoch": 0.17136634093014327, "grad_norm": 1.3108710960461367, "learning_rate": 4.83177773865946e-06, "loss": 0.0458, "step": 41070 }, { "epoch": 0.17138720364513357, "grad_norm": 0.7266400206719009, "learning_rate": 4.831483639911314e-06, "loss": 0.0354, "step": 41075 }, { "epoch": 0.17140806636012385, "grad_norm": 0.8770732040471734, "learning_rate": 4.831189594859894e-06, "loss": 0.042, "step": 41080 }, { "epoch": 0.17142892907511412, "grad_norm": 1.0487978276295225, "learning_rate": 4.830895603488861e-06, "loss": 0.0424, "step": 41085 }, { "epoch": 0.1714497917901044, "grad_norm": 0.9131191139170678, "learning_rate": 4.830601665781883e-06, "loss": 0.0352, "step": 41090 }, { "epoch": 0.1714706545050947, "grad_norm": 1.1226425016711288, "learning_rate": 4.8303077817226355e-06, "loss": 0.0418, "step": 41095 }, { "epoch": 0.17149151722008496, "grad_norm": 0.57384597421876, "learning_rate": 4.830013951294803e-06, "loss": 0.0267, "step": 41100 }, { "epoch": 0.17151237993507523, "grad_norm": 1.3650063878397627, "learning_rate": 4.829720174482073e-06, "loss": 0.0498, "step": 41105 }, { "epoch": 0.1715332426500655, "grad_norm": 1.2161166400456276, "learning_rate": 4.829426451268145e-06, "loss": 0.0407, "step": 41110 }, { "epoch": 0.17155410536505578, "grad_norm": 1.2534027794980693, "learning_rate": 4.82913278163672e-06, "loss": 0.041, "step": 41115 }, { "epoch": 0.17157496808004608, "grad_norm": 0.7186761437607427, "learning_rate": 4.828839165571511e-06, "loss": 0.0366, "step": 41120 }, { "epoch": 0.17159583079503635, "grad_norm": 1.1735255542261633, "learning_rate": 4.828545603056235e-06, "loss": 0.0389, "step": 41125 }, { "epoch": 0.17161669351002662, "grad_norm": 1.6528725634264634, "learning_rate": 4.828252094074616e-06, "loss": 0.0365, "step": 41130 }, { "epoch": 0.1716375562250169, "grad_norm": 1.105744129010123, "learning_rate": 4.827958638610386e-06, "loss": 0.0438, "step": 41135 }, { "epoch": 0.1716584189400072, "grad_norm": 0.6523818533724162, "learning_rate": 4.827665236647282e-06, "loss": 0.0272, "step": 41140 }, { "epoch": 0.17167928165499746, "grad_norm": 1.2195178040044485, "learning_rate": 4.82737188816905e-06, "loss": 0.0339, "step": 41145 }, { "epoch": 0.17170014436998773, "grad_norm": 0.4612865535315258, "learning_rate": 4.827078593159442e-06, "loss": 0.0302, "step": 41150 }, { "epoch": 0.171721007084978, "grad_norm": 1.7806190809920708, "learning_rate": 4.826785351602219e-06, "loss": 0.0437, "step": 41155 }, { "epoch": 0.17174186979996828, "grad_norm": 0.7660169098825599, "learning_rate": 4.826492163481145e-06, "loss": 0.0451, "step": 41160 }, { "epoch": 0.17176273251495858, "grad_norm": 1.195110971753867, "learning_rate": 4.826199028779993e-06, "loss": 0.0347, "step": 41165 }, { "epoch": 0.17178359522994885, "grad_norm": 0.7463602770580405, "learning_rate": 4.825905947482544e-06, "loss": 0.0372, "step": 41170 }, { "epoch": 0.17180445794493912, "grad_norm": 1.1955803548147692, "learning_rate": 4.825612919572584e-06, "loss": 0.034, "step": 41175 }, { "epoch": 0.1718253206599294, "grad_norm": 0.6799421477022356, "learning_rate": 4.825319945033905e-06, "loss": 0.0372, "step": 41180 }, { "epoch": 0.1718461833749197, "grad_norm": 0.5747437731706432, "learning_rate": 4.825027023850311e-06, "loss": 0.0269, "step": 41185 }, { "epoch": 0.17186704608990996, "grad_norm": 0.8047221517893081, "learning_rate": 4.824734156005606e-06, "loss": 0.0309, "step": 41190 }, { "epoch": 0.17188790880490024, "grad_norm": 1.0236870810786682, "learning_rate": 4.824441341483606e-06, "loss": 0.0422, "step": 41195 }, { "epoch": 0.1719087715198905, "grad_norm": 1.0738629068408023, "learning_rate": 4.8241485802681305e-06, "loss": 0.0377, "step": 41200 }, { "epoch": 0.17192963423488078, "grad_norm": 0.7627897103654537, "learning_rate": 4.82385587234301e-06, "loss": 0.0301, "step": 41205 }, { "epoch": 0.17195049694987108, "grad_norm": 0.8627885389513573, "learning_rate": 4.823563217692078e-06, "loss": 0.0399, "step": 41210 }, { "epoch": 0.17197135966486135, "grad_norm": 1.2015954162932647, "learning_rate": 4.8232706162991765e-06, "loss": 0.0483, "step": 41215 }, { "epoch": 0.17199222237985162, "grad_norm": 1.2079829267732642, "learning_rate": 4.822978068148153e-06, "loss": 0.049, "step": 41220 }, { "epoch": 0.1720130850948419, "grad_norm": 0.8018863623198339, "learning_rate": 4.822685573222863e-06, "loss": 0.0372, "step": 41225 }, { "epoch": 0.1720339478098322, "grad_norm": 0.5175817383654598, "learning_rate": 4.8223931315071715e-06, "loss": 0.047, "step": 41230 }, { "epoch": 0.17205481052482247, "grad_norm": 1.2059167367454393, "learning_rate": 4.822100742984945e-06, "loss": 0.0346, "step": 41235 }, { "epoch": 0.17207567323981274, "grad_norm": 1.1380585146778468, "learning_rate": 4.821808407640059e-06, "loss": 0.0364, "step": 41240 }, { "epoch": 0.172096535954803, "grad_norm": 0.7664763116554184, "learning_rate": 4.821516125456399e-06, "loss": 0.0375, "step": 41245 }, { "epoch": 0.17211739866979328, "grad_norm": 1.284850842361742, "learning_rate": 4.8212238964178515e-06, "loss": 0.0451, "step": 41250 }, { "epoch": 0.17213826138478358, "grad_norm": 0.8397568671680498, "learning_rate": 4.820931720508315e-06, "loss": 0.039, "step": 41255 }, { "epoch": 0.17215912409977385, "grad_norm": 2.6409505810520013, "learning_rate": 4.820639597711694e-06, "loss": 0.0287, "step": 41260 }, { "epoch": 0.17217998681476412, "grad_norm": 0.8616096235229034, "learning_rate": 4.820347528011896e-06, "loss": 0.0412, "step": 41265 }, { "epoch": 0.1722008495297544, "grad_norm": 0.6883714207922588, "learning_rate": 4.8200555113928385e-06, "loss": 0.0335, "step": 41270 }, { "epoch": 0.1722217122447447, "grad_norm": 0.9334017756394468, "learning_rate": 4.8197635478384474e-06, "loss": 0.0419, "step": 41275 }, { "epoch": 0.17224257495973497, "grad_norm": 0.8464139912757667, "learning_rate": 4.819471637332651e-06, "loss": 0.0423, "step": 41280 }, { "epoch": 0.17226343767472524, "grad_norm": 1.512000209618615, "learning_rate": 4.819179779859386e-06, "loss": 0.0325, "step": 41285 }, { "epoch": 0.1722843003897155, "grad_norm": 0.7754477785838945, "learning_rate": 4.8188879754026005e-06, "loss": 0.0316, "step": 41290 }, { "epoch": 0.17230516310470578, "grad_norm": 0.889376660059524, "learning_rate": 4.818596223946241e-06, "loss": 0.0345, "step": 41295 }, { "epoch": 0.17232602581969608, "grad_norm": 0.7305096271026235, "learning_rate": 4.8183045254742685e-06, "loss": 0.0392, "step": 41300 }, { "epoch": 0.17234688853468635, "grad_norm": 1.0170811441369298, "learning_rate": 4.818012879970647e-06, "loss": 0.0326, "step": 41305 }, { "epoch": 0.17236775124967663, "grad_norm": 0.4743127628511829, "learning_rate": 4.817721287419348e-06, "loss": 0.0333, "step": 41310 }, { "epoch": 0.1723886139646669, "grad_norm": 0.8425213174574969, "learning_rate": 4.817429747804348e-06, "loss": 0.0395, "step": 41315 }, { "epoch": 0.1724094766796572, "grad_norm": 0.9830758036424251, "learning_rate": 4.817138261109633e-06, "loss": 0.0366, "step": 41320 }, { "epoch": 0.17243033939464747, "grad_norm": 1.1670277876666908, "learning_rate": 4.816846827319196e-06, "loss": 0.0384, "step": 41325 }, { "epoch": 0.17245120210963774, "grad_norm": 0.6988187515936697, "learning_rate": 4.816555446417034e-06, "loss": 0.0411, "step": 41330 }, { "epoch": 0.172472064824628, "grad_norm": 1.2460318526092713, "learning_rate": 4.816264118387153e-06, "loss": 0.0399, "step": 41335 }, { "epoch": 0.17249292753961828, "grad_norm": 0.6108333059574336, "learning_rate": 4.815972843213566e-06, "loss": 0.0507, "step": 41340 }, { "epoch": 0.17251379025460858, "grad_norm": 1.1698402885272978, "learning_rate": 4.815681620880289e-06, "loss": 0.0406, "step": 41345 }, { "epoch": 0.17253465296959886, "grad_norm": 1.2925161864899102, "learning_rate": 4.815390451371351e-06, "loss": 0.0378, "step": 41350 }, { "epoch": 0.17255551568458913, "grad_norm": 0.8018240359944037, "learning_rate": 4.815099334670783e-06, "loss": 0.035, "step": 41355 }, { "epoch": 0.1725763783995794, "grad_norm": 0.9902967783984545, "learning_rate": 4.814808270762624e-06, "loss": 0.0337, "step": 41360 }, { "epoch": 0.1725972411145697, "grad_norm": 1.0827252440585262, "learning_rate": 4.8145172596309195e-06, "loss": 0.0378, "step": 41365 }, { "epoch": 0.17261810382955997, "grad_norm": 1.2125260727882425, "learning_rate": 4.814226301259723e-06, "loss": 0.0346, "step": 41370 }, { "epoch": 0.17263896654455024, "grad_norm": 1.1973705884470844, "learning_rate": 4.813935395633093e-06, "loss": 0.037, "step": 41375 }, { "epoch": 0.17265982925954051, "grad_norm": 0.8598880609864185, "learning_rate": 4.813644542735097e-06, "loss": 0.0245, "step": 41380 }, { "epoch": 0.1726806919745308, "grad_norm": 1.1089379543874212, "learning_rate": 4.813353742549806e-06, "loss": 0.0299, "step": 41385 }, { "epoch": 0.1727015546895211, "grad_norm": 1.0722990154775283, "learning_rate": 4.8130629950613026e-06, "loss": 0.0426, "step": 41390 }, { "epoch": 0.17272241740451136, "grad_norm": 0.5226038004189059, "learning_rate": 4.812772300253671e-06, "loss": 0.0277, "step": 41395 }, { "epoch": 0.17274328011950163, "grad_norm": 0.735559187774154, "learning_rate": 4.812481658111003e-06, "loss": 0.0376, "step": 41400 }, { "epoch": 0.1727641428344919, "grad_norm": 0.8716970966971614, "learning_rate": 4.8121910686174e-06, "loss": 0.0318, "step": 41405 }, { "epoch": 0.1727850055494822, "grad_norm": 1.222254495530191, "learning_rate": 4.81190053175697e-06, "loss": 0.0343, "step": 41410 }, { "epoch": 0.17280586826447247, "grad_norm": 0.9832622584596824, "learning_rate": 4.811610047513822e-06, "loss": 0.0395, "step": 41415 }, { "epoch": 0.17282673097946274, "grad_norm": 1.0242369301400522, "learning_rate": 4.811319615872082e-06, "loss": 0.0316, "step": 41420 }, { "epoch": 0.17284759369445302, "grad_norm": 1.5077484997973614, "learning_rate": 4.811029236815871e-06, "loss": 0.0236, "step": 41425 }, { "epoch": 0.1728684564094433, "grad_norm": 1.1690177243897169, "learning_rate": 4.810738910329326e-06, "loss": 0.0421, "step": 41430 }, { "epoch": 0.1728893191244336, "grad_norm": 1.126801195140832, "learning_rate": 4.8104486363965854e-06, "loss": 0.0415, "step": 41435 }, { "epoch": 0.17291018183942386, "grad_norm": 0.6346019220815711, "learning_rate": 4.810158415001795e-06, "loss": 0.0386, "step": 41440 }, { "epoch": 0.17293104455441413, "grad_norm": 0.7542595055586756, "learning_rate": 4.80986824612911e-06, "loss": 0.0417, "step": 41445 }, { "epoch": 0.1729519072694044, "grad_norm": 1.9974864100488274, "learning_rate": 4.809578129762692e-06, "loss": 0.0417, "step": 41450 }, { "epoch": 0.17297276998439468, "grad_norm": 1.1094735501310369, "learning_rate": 4.809288065886704e-06, "loss": 0.0361, "step": 41455 }, { "epoch": 0.17299363269938497, "grad_norm": 3.470685268022465, "learning_rate": 4.808998054485322e-06, "loss": 0.0352, "step": 41460 }, { "epoch": 0.17301449541437525, "grad_norm": 0.5562194201936523, "learning_rate": 4.808708095542725e-06, "loss": 0.0309, "step": 41465 }, { "epoch": 0.17303535812936552, "grad_norm": 0.8666065309550705, "learning_rate": 4.8084181890431015e-06, "loss": 0.0358, "step": 41470 }, { "epoch": 0.1730562208443558, "grad_norm": 1.4452151281093824, "learning_rate": 4.808128334970644e-06, "loss": 0.0494, "step": 41475 }, { "epoch": 0.1730770835593461, "grad_norm": 5.025578152561554, "learning_rate": 4.807838533309553e-06, "loss": 0.0314, "step": 41480 }, { "epoch": 0.17309794627433636, "grad_norm": 0.8197525910919309, "learning_rate": 4.807548784044034e-06, "loss": 0.0308, "step": 41485 }, { "epoch": 0.17311880898932663, "grad_norm": 0.8319242967660505, "learning_rate": 4.807259087158302e-06, "loss": 0.0364, "step": 41490 }, { "epoch": 0.1731396717043169, "grad_norm": 1.1233489322720105, "learning_rate": 4.806969442636576e-06, "loss": 0.0465, "step": 41495 }, { "epoch": 0.17316053441930718, "grad_norm": 0.568353912082789, "learning_rate": 4.806679850463086e-06, "loss": 0.0438, "step": 41500 }, { "epoch": 0.17318139713429748, "grad_norm": 0.9498081511712985, "learning_rate": 4.806390310622061e-06, "loss": 0.032, "step": 41505 }, { "epoch": 0.17320225984928775, "grad_norm": 0.9346068070047564, "learning_rate": 4.806100823097745e-06, "loss": 0.0307, "step": 41510 }, { "epoch": 0.17322312256427802, "grad_norm": 1.0641047524597984, "learning_rate": 4.8058113878743825e-06, "loss": 0.0347, "step": 41515 }, { "epoch": 0.1732439852792683, "grad_norm": 0.9502261640550786, "learning_rate": 4.805522004936228e-06, "loss": 0.0398, "step": 41520 }, { "epoch": 0.1732648479942586, "grad_norm": 0.9135126658358371, "learning_rate": 4.80523267426754e-06, "loss": 0.0293, "step": 41525 }, { "epoch": 0.17328571070924886, "grad_norm": 0.8302691316406379, "learning_rate": 4.804943395852587e-06, "loss": 0.0317, "step": 41530 }, { "epoch": 0.17330657342423914, "grad_norm": 0.6679298008303454, "learning_rate": 4.804654169675643e-06, "loss": 0.0342, "step": 41535 }, { "epoch": 0.1733274361392294, "grad_norm": 0.6692732677427892, "learning_rate": 4.804364995720986e-06, "loss": 0.0326, "step": 41540 }, { "epoch": 0.17334829885421968, "grad_norm": 0.8388928864034556, "learning_rate": 4.804075873972902e-06, "loss": 0.0373, "step": 41545 }, { "epoch": 0.17336916156920998, "grad_norm": 0.7957879269817237, "learning_rate": 4.803786804415687e-06, "loss": 0.0492, "step": 41550 }, { "epoch": 0.17339002428420025, "grad_norm": 1.326352346596562, "learning_rate": 4.803497787033639e-06, "loss": 0.0342, "step": 41555 }, { "epoch": 0.17341088699919052, "grad_norm": 0.7809096278643589, "learning_rate": 4.803208821811064e-06, "loss": 0.0339, "step": 41560 }, { "epoch": 0.1734317497141808, "grad_norm": 1.350393153446896, "learning_rate": 4.802919908732276e-06, "loss": 0.0438, "step": 41565 }, { "epoch": 0.1734526124291711, "grad_norm": 1.253382812008309, "learning_rate": 4.802631047781595e-06, "loss": 0.0481, "step": 41570 }, { "epoch": 0.17347347514416137, "grad_norm": 0.48820088938426953, "learning_rate": 4.802342238943346e-06, "loss": 0.0303, "step": 41575 }, { "epoch": 0.17349433785915164, "grad_norm": 0.562582982407067, "learning_rate": 4.802053482201862e-06, "loss": 0.0315, "step": 41580 }, { "epoch": 0.1735152005741419, "grad_norm": 1.005334811201769, "learning_rate": 4.8017647775414835e-06, "loss": 0.0466, "step": 41585 }, { "epoch": 0.17353606328913218, "grad_norm": 0.6788092663052206, "learning_rate": 4.801476124946557e-06, "loss": 0.0469, "step": 41590 }, { "epoch": 0.17355692600412248, "grad_norm": 0.843629797225406, "learning_rate": 4.8011875244014326e-06, "loss": 0.025, "step": 41595 }, { "epoch": 0.17357778871911275, "grad_norm": 1.3926990808358533, "learning_rate": 4.800898975890471e-06, "loss": 0.0446, "step": 41600 }, { "epoch": 0.17359865143410302, "grad_norm": 0.8146438648321706, "learning_rate": 4.800610479398038e-06, "loss": 0.0456, "step": 41605 }, { "epoch": 0.1736195141490933, "grad_norm": 0.7405597072865379, "learning_rate": 4.800322034908506e-06, "loss": 0.0338, "step": 41610 }, { "epoch": 0.1736403768640836, "grad_norm": 0.6321906625941711, "learning_rate": 4.800033642406254e-06, "loss": 0.0363, "step": 41615 }, { "epoch": 0.17366123957907387, "grad_norm": 0.7846908241782271, "learning_rate": 4.799745301875667e-06, "loss": 0.0324, "step": 41620 }, { "epoch": 0.17368210229406414, "grad_norm": 0.5600923106480685, "learning_rate": 4.799457013301137e-06, "loss": 0.0359, "step": 41625 }, { "epoch": 0.1737029650090544, "grad_norm": 0.9171689352714979, "learning_rate": 4.799168776667063e-06, "loss": 0.0494, "step": 41630 }, { "epoch": 0.17372382772404468, "grad_norm": 1.4862415995623743, "learning_rate": 4.79888059195785e-06, "loss": 0.039, "step": 41635 }, { "epoch": 0.17374469043903498, "grad_norm": 0.9174196307628358, "learning_rate": 4.798592459157911e-06, "loss": 0.0433, "step": 41640 }, { "epoch": 0.17376555315402525, "grad_norm": 1.0778844748143526, "learning_rate": 4.798304378251661e-06, "loss": 0.0374, "step": 41645 }, { "epoch": 0.17378641586901553, "grad_norm": 1.132995148207494, "learning_rate": 4.798016349223527e-06, "loss": 0.0412, "step": 41650 }, { "epoch": 0.1738072785840058, "grad_norm": 0.7517020269542151, "learning_rate": 4.797728372057941e-06, "loss": 0.0431, "step": 41655 }, { "epoch": 0.1738281412989961, "grad_norm": 0.737557401245612, "learning_rate": 4.797440446739342e-06, "loss": 0.0258, "step": 41660 }, { "epoch": 0.17384900401398637, "grad_norm": 0.9843713586923492, "learning_rate": 4.79715257325217e-06, "loss": 0.033, "step": 41665 }, { "epoch": 0.17386986672897664, "grad_norm": 0.896047653509314, "learning_rate": 4.796864751580879e-06, "loss": 0.0343, "step": 41670 }, { "epoch": 0.1738907294439669, "grad_norm": 0.4197778983740042, "learning_rate": 4.796576981709926e-06, "loss": 0.0341, "step": 41675 }, { "epoch": 0.17391159215895718, "grad_norm": 0.4420497146293974, "learning_rate": 4.796289263623776e-06, "loss": 0.0306, "step": 41680 }, { "epoch": 0.17393245487394748, "grad_norm": 0.8302867195033824, "learning_rate": 4.796001597306897e-06, "loss": 0.0308, "step": 41685 }, { "epoch": 0.17395331758893776, "grad_norm": 0.7242335720106097, "learning_rate": 4.79571398274377e-06, "loss": 0.0404, "step": 41690 }, { "epoch": 0.17397418030392803, "grad_norm": 1.1655809151821401, "learning_rate": 4.795426419918874e-06, "loss": 0.0357, "step": 41695 }, { "epoch": 0.1739950430189183, "grad_norm": 1.359769616943344, "learning_rate": 4.795138908816703e-06, "loss": 0.043, "step": 41700 }, { "epoch": 0.1740159057339086, "grad_norm": 0.6452090212404773, "learning_rate": 4.794851449421751e-06, "loss": 0.0337, "step": 41705 }, { "epoch": 0.17403676844889887, "grad_norm": 1.1323417998586056, "learning_rate": 4.794564041718523e-06, "loss": 0.0399, "step": 41710 }, { "epoch": 0.17405763116388914, "grad_norm": 0.6233854890453068, "learning_rate": 4.794276685691527e-06, "loss": 0.0391, "step": 41715 }, { "epoch": 0.17407849387887941, "grad_norm": 1.0668838574994182, "learning_rate": 4.7939893813252815e-06, "loss": 0.034, "step": 41720 }, { "epoch": 0.1740993565938697, "grad_norm": 0.8037869330813203, "learning_rate": 4.793702128604306e-06, "loss": 0.0289, "step": 41725 }, { "epoch": 0.17412021930885999, "grad_norm": 0.8357191568062441, "learning_rate": 4.793414927513131e-06, "loss": 0.0312, "step": 41730 }, { "epoch": 0.17414108202385026, "grad_norm": 0.6744357275556117, "learning_rate": 4.7931277780362925e-06, "loss": 0.0458, "step": 41735 }, { "epoch": 0.17416194473884053, "grad_norm": 0.7718901150624803, "learning_rate": 4.792840680158332e-06, "loss": 0.0447, "step": 41740 }, { "epoch": 0.1741828074538308, "grad_norm": 0.8693322748192168, "learning_rate": 4.792553633863798e-06, "loss": 0.0271, "step": 41745 }, { "epoch": 0.1742036701688211, "grad_norm": 0.8911156754646067, "learning_rate": 4.792266639137247e-06, "loss": 0.0335, "step": 41750 }, { "epoch": 0.17422453288381137, "grad_norm": 2.156639710465209, "learning_rate": 4.791979695963239e-06, "loss": 0.0491, "step": 41755 }, { "epoch": 0.17424539559880164, "grad_norm": 1.0607767138864939, "learning_rate": 4.791692804326341e-06, "loss": 0.045, "step": 41760 }, { "epoch": 0.17426625831379192, "grad_norm": 1.73333487693421, "learning_rate": 4.791405964211129e-06, "loss": 0.0369, "step": 41765 }, { "epoch": 0.1742871210287822, "grad_norm": 0.5216844122673187, "learning_rate": 4.791119175602184e-06, "loss": 0.0462, "step": 41770 }, { "epoch": 0.1743079837437725, "grad_norm": 0.7755261711082878, "learning_rate": 4.790832438484094e-06, "loss": 0.0254, "step": 41775 }, { "epoch": 0.17432884645876276, "grad_norm": 0.8446472600737671, "learning_rate": 4.790545752841451e-06, "loss": 0.0336, "step": 41780 }, { "epoch": 0.17434970917375303, "grad_norm": 0.9641344276924971, "learning_rate": 4.790259118658856e-06, "loss": 0.0392, "step": 41785 }, { "epoch": 0.1743705718887433, "grad_norm": 1.0586839371281973, "learning_rate": 4.7899725359209144e-06, "loss": 0.0421, "step": 41790 }, { "epoch": 0.1743914346037336, "grad_norm": 0.9776156059844804, "learning_rate": 4.789686004612244e-06, "loss": 0.0315, "step": 41795 }, { "epoch": 0.17441229731872387, "grad_norm": 0.6158020288450414, "learning_rate": 4.789399524717458e-06, "loss": 0.0337, "step": 41800 }, { "epoch": 0.17443316003371415, "grad_norm": 0.720604883436232, "learning_rate": 4.789113096221187e-06, "loss": 0.0374, "step": 41805 }, { "epoch": 0.17445402274870442, "grad_norm": 1.9682505644666508, "learning_rate": 4.788826719108062e-06, "loss": 0.0453, "step": 41810 }, { "epoch": 0.1744748854636947, "grad_norm": 0.7103928816446711, "learning_rate": 4.788540393362723e-06, "loss": 0.0312, "step": 41815 }, { "epoch": 0.174495748178685, "grad_norm": 0.9105802007819427, "learning_rate": 4.788254118969811e-06, "loss": 0.0361, "step": 41820 }, { "epoch": 0.17451661089367526, "grad_norm": 0.6119240878768227, "learning_rate": 4.787967895913984e-06, "loss": 0.042, "step": 41825 }, { "epoch": 0.17453747360866553, "grad_norm": 0.6662296353526682, "learning_rate": 4.787681724179896e-06, "loss": 0.0327, "step": 41830 }, { "epoch": 0.1745583363236558, "grad_norm": 1.1406588198022176, "learning_rate": 4.787395603752215e-06, "loss": 0.0518, "step": 41835 }, { "epoch": 0.1745791990386461, "grad_norm": 0.6385955072888074, "learning_rate": 4.787109534615608e-06, "loss": 0.0365, "step": 41840 }, { "epoch": 0.17460006175363638, "grad_norm": 0.7664753161199067, "learning_rate": 4.7868235167547545e-06, "loss": 0.0399, "step": 41845 }, { "epoch": 0.17462092446862665, "grad_norm": 2.1626386797134587, "learning_rate": 4.786537550154339e-06, "loss": 0.0388, "step": 41850 }, { "epoch": 0.17464178718361692, "grad_norm": 1.027621724888331, "learning_rate": 4.786251634799049e-06, "loss": 0.0397, "step": 41855 }, { "epoch": 0.1746626498986072, "grad_norm": 0.9452112892811808, "learning_rate": 4.785965770673585e-06, "loss": 0.0304, "step": 41860 }, { "epoch": 0.1746835126135975, "grad_norm": 0.5737230072172724, "learning_rate": 4.785679957762647e-06, "loss": 0.0282, "step": 41865 }, { "epoch": 0.17470437532858776, "grad_norm": 0.8871580412075892, "learning_rate": 4.785394196050946e-06, "loss": 0.0456, "step": 41870 }, { "epoch": 0.17472523804357804, "grad_norm": 0.7148510367870823, "learning_rate": 4.785108485523197e-06, "loss": 0.0399, "step": 41875 }, { "epoch": 0.1747461007585683, "grad_norm": 0.7362748727107838, "learning_rate": 4.7848228261641236e-06, "loss": 0.0355, "step": 41880 }, { "epoch": 0.1747669634735586, "grad_norm": 0.8073589492765026, "learning_rate": 4.784537217958453e-06, "loss": 0.0287, "step": 41885 }, { "epoch": 0.17478782618854888, "grad_norm": 0.8246697235771717, "learning_rate": 4.78425166089092e-06, "loss": 0.0383, "step": 41890 }, { "epoch": 0.17480868890353915, "grad_norm": 1.4295561674374468, "learning_rate": 4.783966154946267e-06, "loss": 0.0428, "step": 41895 }, { "epoch": 0.17482955161852942, "grad_norm": 0.5252426230335064, "learning_rate": 4.7836807001092405e-06, "loss": 0.0318, "step": 41900 }, { "epoch": 0.1748504143335197, "grad_norm": 1.002645381689307, "learning_rate": 4.783395296364595e-06, "loss": 0.0341, "step": 41905 }, { "epoch": 0.17487127704851, "grad_norm": 0.6491521110451647, "learning_rate": 4.783109943697092e-06, "loss": 0.0274, "step": 41910 }, { "epoch": 0.17489213976350027, "grad_norm": 0.8063972493788678, "learning_rate": 4.782824642091498e-06, "loss": 0.0347, "step": 41915 }, { "epoch": 0.17491300247849054, "grad_norm": 2.5912987082457386, "learning_rate": 4.782539391532585e-06, "loss": 0.0313, "step": 41920 }, { "epoch": 0.1749338651934808, "grad_norm": 0.939906995432071, "learning_rate": 4.782254192005134e-06, "loss": 0.0334, "step": 41925 }, { "epoch": 0.1749547279084711, "grad_norm": 1.1545892633270962, "learning_rate": 4.78196904349393e-06, "loss": 0.048, "step": 41930 }, { "epoch": 0.17497559062346138, "grad_norm": 1.1064053268872072, "learning_rate": 4.781683945983765e-06, "loss": 0.0422, "step": 41935 }, { "epoch": 0.17499645333845165, "grad_norm": 1.0207361396202752, "learning_rate": 4.781398899459438e-06, "loss": 0.033, "step": 41940 }, { "epoch": 0.17501731605344192, "grad_norm": 0.5425567242451125, "learning_rate": 4.781113903905755e-06, "loss": 0.0354, "step": 41945 }, { "epoch": 0.1750381787684322, "grad_norm": 1.3359976777642903, "learning_rate": 4.780828959307525e-06, "loss": 0.0388, "step": 41950 }, { "epoch": 0.1750590414834225, "grad_norm": 0.7884292798885151, "learning_rate": 4.780544065649569e-06, "loss": 0.0307, "step": 41955 }, { "epoch": 0.17507990419841277, "grad_norm": 0.8457288676001943, "learning_rate": 4.780259222916707e-06, "loss": 0.0308, "step": 41960 }, { "epoch": 0.17510076691340304, "grad_norm": 0.9254747588295137, "learning_rate": 4.77997443109377e-06, "loss": 0.0325, "step": 41965 }, { "epoch": 0.1751216296283933, "grad_norm": 0.7934106254797912, "learning_rate": 4.779689690165597e-06, "loss": 0.0379, "step": 41970 }, { "epoch": 0.1751424923433836, "grad_norm": 1.3643107057498323, "learning_rate": 4.77940500011703e-06, "loss": 0.0434, "step": 41975 }, { "epoch": 0.17516335505837388, "grad_norm": 0.9026229444804738, "learning_rate": 4.779120360932917e-06, "loss": 0.0403, "step": 41980 }, { "epoch": 0.17518421777336415, "grad_norm": 0.9640589699089048, "learning_rate": 4.778835772598115e-06, "loss": 0.0327, "step": 41985 }, { "epoch": 0.17520508048835443, "grad_norm": 0.8955735098862347, "learning_rate": 4.778551235097484e-06, "loss": 0.0419, "step": 41990 }, { "epoch": 0.1752259432033447, "grad_norm": 0.8575354452321863, "learning_rate": 4.7782667484158954e-06, "loss": 0.0353, "step": 41995 }, { "epoch": 0.175246805918335, "grad_norm": 0.9737570230686511, "learning_rate": 4.77798231253822e-06, "loss": 0.0358, "step": 42000 }, { "epoch": 0.17526766863332527, "grad_norm": 1.2622083489871283, "learning_rate": 4.77769792744934e-06, "loss": 0.0434, "step": 42005 }, { "epoch": 0.17528853134831554, "grad_norm": 1.5481914916034125, "learning_rate": 4.777413593134143e-06, "loss": 0.0323, "step": 42010 }, { "epoch": 0.1753093940633058, "grad_norm": 0.9798005198610963, "learning_rate": 4.777129309577523e-06, "loss": 0.0359, "step": 42015 }, { "epoch": 0.1753302567782961, "grad_norm": 0.898746830129635, "learning_rate": 4.77684507676438e-06, "loss": 0.03, "step": 42020 }, { "epoch": 0.17535111949328638, "grad_norm": 0.9979340369036501, "learning_rate": 4.776560894679615e-06, "loss": 0.0378, "step": 42025 }, { "epoch": 0.17537198220827666, "grad_norm": 1.0934611596561807, "learning_rate": 4.776276763308146e-06, "loss": 0.0478, "step": 42030 }, { "epoch": 0.17539284492326693, "grad_norm": 0.7605427377942664, "learning_rate": 4.775992682634889e-06, "loss": 0.0317, "step": 42035 }, { "epoch": 0.1754137076382572, "grad_norm": 1.1400137024761967, "learning_rate": 4.77570865264477e-06, "loss": 0.0446, "step": 42040 }, { "epoch": 0.1754345703532475, "grad_norm": 0.8189602240202639, "learning_rate": 4.77542467332272e-06, "loss": 0.0371, "step": 42045 }, { "epoch": 0.17545543306823777, "grad_norm": 0.53107562618054, "learning_rate": 4.775140744653674e-06, "loss": 0.0259, "step": 42050 }, { "epoch": 0.17547629578322804, "grad_norm": 0.9900317367700985, "learning_rate": 4.7748568666225775e-06, "loss": 0.0421, "step": 42055 }, { "epoch": 0.17549715849821831, "grad_norm": 0.7603139074855432, "learning_rate": 4.77457303921438e-06, "loss": 0.0382, "step": 42060 }, { "epoch": 0.1755180212132086, "grad_norm": 0.699061073286809, "learning_rate": 4.774289262414039e-06, "loss": 0.0298, "step": 42065 }, { "epoch": 0.17553888392819889, "grad_norm": 1.1886551017950433, "learning_rate": 4.774005536206514e-06, "loss": 0.0275, "step": 42070 }, { "epoch": 0.17555974664318916, "grad_norm": 0.9971555011414585, "learning_rate": 4.773721860576777e-06, "loss": 0.0358, "step": 42075 }, { "epoch": 0.17558060935817943, "grad_norm": 0.6518407908384933, "learning_rate": 4.773438235509799e-06, "loss": 0.0381, "step": 42080 }, { "epoch": 0.1756014720731697, "grad_norm": 0.9317218951816348, "learning_rate": 4.773154660990566e-06, "loss": 0.0343, "step": 42085 }, { "epoch": 0.17562233478816, "grad_norm": 0.8966770910560381, "learning_rate": 4.772871137004062e-06, "loss": 0.0417, "step": 42090 }, { "epoch": 0.17564319750315027, "grad_norm": 0.7589522133028312, "learning_rate": 4.77258766353528e-06, "loss": 0.0397, "step": 42095 }, { "epoch": 0.17566406021814054, "grad_norm": 1.9244510237106078, "learning_rate": 4.772304240569222e-06, "loss": 0.0389, "step": 42100 }, { "epoch": 0.17568492293313082, "grad_norm": 1.315927196003933, "learning_rate": 4.772020868090894e-06, "loss": 0.0448, "step": 42105 }, { "epoch": 0.17570578564812112, "grad_norm": 0.7587138062590709, "learning_rate": 4.771737546085307e-06, "loss": 0.0283, "step": 42110 }, { "epoch": 0.1757266483631114, "grad_norm": 0.8617533672985995, "learning_rate": 4.771454274537481e-06, "loss": 0.0412, "step": 42115 }, { "epoch": 0.17574751107810166, "grad_norm": 0.8477917116786939, "learning_rate": 4.771171053432439e-06, "loss": 0.0355, "step": 42120 }, { "epoch": 0.17576837379309193, "grad_norm": 1.3300359434993587, "learning_rate": 4.770887882755214e-06, "loss": 0.0445, "step": 42125 }, { "epoch": 0.1757892365080822, "grad_norm": 0.6228639181622075, "learning_rate": 4.770604762490842e-06, "loss": 0.0425, "step": 42130 }, { "epoch": 0.1758100992230725, "grad_norm": 0.8180621534904777, "learning_rate": 4.770321692624368e-06, "loss": 0.0317, "step": 42135 }, { "epoch": 0.17583096193806277, "grad_norm": 0.8360499685063222, "learning_rate": 4.77003867314084e-06, "loss": 0.031, "step": 42140 }, { "epoch": 0.17585182465305305, "grad_norm": 0.9387157158313637, "learning_rate": 4.769755704025313e-06, "loss": 0.0443, "step": 42145 }, { "epoch": 0.17587268736804332, "grad_norm": 0.5534330491166879, "learning_rate": 4.769472785262851e-06, "loss": 0.0326, "step": 42150 }, { "epoch": 0.17589355008303362, "grad_norm": 0.875054831837108, "learning_rate": 4.769189916838522e-06, "loss": 0.034, "step": 42155 }, { "epoch": 0.1759144127980239, "grad_norm": 0.6373799483584816, "learning_rate": 4.7689070987374e-06, "loss": 0.0339, "step": 42160 }, { "epoch": 0.17593527551301416, "grad_norm": 1.0676612632364355, "learning_rate": 4.768624330944566e-06, "loss": 0.0468, "step": 42165 }, { "epoch": 0.17595613822800443, "grad_norm": 0.7351764253096813, "learning_rate": 4.768341613445106e-06, "loss": 0.0279, "step": 42170 }, { "epoch": 0.1759770009429947, "grad_norm": 0.8231085223528367, "learning_rate": 4.768058946224115e-06, "loss": 0.0339, "step": 42175 }, { "epoch": 0.175997863657985, "grad_norm": 0.7627982502046806, "learning_rate": 4.7677763292666895e-06, "loss": 0.0349, "step": 42180 }, { "epoch": 0.17601872637297528, "grad_norm": 0.772669984903997, "learning_rate": 4.767493762557937e-06, "loss": 0.0342, "step": 42185 }, { "epoch": 0.17603958908796555, "grad_norm": 0.7290658888796789, "learning_rate": 4.767211246082968e-06, "loss": 0.0347, "step": 42190 }, { "epoch": 0.17606045180295582, "grad_norm": 1.3251667711070048, "learning_rate": 4.766928779826902e-06, "loss": 0.0402, "step": 42195 }, { "epoch": 0.17608131451794612, "grad_norm": 0.8244957044894891, "learning_rate": 4.76664636377486e-06, "loss": 0.0449, "step": 42200 }, { "epoch": 0.1761021772329364, "grad_norm": 0.9255433178898984, "learning_rate": 4.766363997911974e-06, "loss": 0.0314, "step": 42205 }, { "epoch": 0.17612303994792666, "grad_norm": 0.6617438570463838, "learning_rate": 4.766081682223381e-06, "loss": 0.0291, "step": 42210 }, { "epoch": 0.17614390266291693, "grad_norm": 1.2858880732723794, "learning_rate": 4.765799416694221e-06, "loss": 0.0323, "step": 42215 }, { "epoch": 0.1761647653779072, "grad_norm": 2.0081877740749996, "learning_rate": 4.765517201309646e-06, "loss": 0.0448, "step": 42220 }, { "epoch": 0.1761856280928975, "grad_norm": 0.730716259725461, "learning_rate": 4.7652350360548074e-06, "loss": 0.0331, "step": 42225 }, { "epoch": 0.17620649080788778, "grad_norm": 0.9624766837255473, "learning_rate": 4.764952920914869e-06, "loss": 0.0434, "step": 42230 }, { "epoch": 0.17622735352287805, "grad_norm": 1.0758274273787447, "learning_rate": 4.7646708558749945e-06, "loss": 0.03, "step": 42235 }, { "epoch": 0.17624821623786832, "grad_norm": 0.89191721432387, "learning_rate": 4.7643888409203596e-06, "loss": 0.0332, "step": 42240 }, { "epoch": 0.17626907895285862, "grad_norm": 0.8104259000777646, "learning_rate": 4.7641068760361435e-06, "loss": 0.0326, "step": 42245 }, { "epoch": 0.1762899416678489, "grad_norm": 3.9380706699617787, "learning_rate": 4.76382496120753e-06, "loss": 0.0414, "step": 42250 }, { "epoch": 0.17631080438283916, "grad_norm": 1.3279027760936983, "learning_rate": 4.763543096419714e-06, "loss": 0.0376, "step": 42255 }, { "epoch": 0.17633166709782944, "grad_norm": 1.0024800439458923, "learning_rate": 4.76326128165789e-06, "loss": 0.0297, "step": 42260 }, { "epoch": 0.1763525298128197, "grad_norm": 1.154125950172277, "learning_rate": 4.762979516907263e-06, "loss": 0.0335, "step": 42265 }, { "epoch": 0.17637339252781, "grad_norm": 1.4486193512241596, "learning_rate": 4.762697802153042e-06, "loss": 0.0474, "step": 42270 }, { "epoch": 0.17639425524280028, "grad_norm": 0.6784215944811459, "learning_rate": 4.762416137380445e-06, "loss": 0.0302, "step": 42275 }, { "epoch": 0.17641511795779055, "grad_norm": 1.1592000335366852, "learning_rate": 4.762134522574694e-06, "loss": 0.0439, "step": 42280 }, { "epoch": 0.17643598067278082, "grad_norm": 0.9846177408226517, "learning_rate": 4.761852957721017e-06, "loss": 0.0399, "step": 42285 }, { "epoch": 0.17645684338777112, "grad_norm": 1.2148872746001709, "learning_rate": 4.761571442804647e-06, "loss": 0.0393, "step": 42290 }, { "epoch": 0.1764777061027614, "grad_norm": 1.077152125785742, "learning_rate": 4.7612899778108265e-06, "loss": 0.0489, "step": 42295 }, { "epoch": 0.17649856881775167, "grad_norm": 0.7668320473740342, "learning_rate": 4.761008562724801e-06, "loss": 0.031, "step": 42300 }, { "epoch": 0.17651943153274194, "grad_norm": 0.9634997308954224, "learning_rate": 4.760727197531824e-06, "loss": 0.0457, "step": 42305 }, { "epoch": 0.1765402942477322, "grad_norm": 1.0645626704419962, "learning_rate": 4.760445882217154e-06, "loss": 0.0366, "step": 42310 }, { "epoch": 0.1765611569627225, "grad_norm": 1.3184542621503772, "learning_rate": 4.760164616766057e-06, "loss": 0.0318, "step": 42315 }, { "epoch": 0.17658201967771278, "grad_norm": 0.8522825090719921, "learning_rate": 4.759883401163802e-06, "loss": 0.0251, "step": 42320 }, { "epoch": 0.17660288239270305, "grad_norm": 0.8030896155477404, "learning_rate": 4.759602235395669e-06, "loss": 0.0308, "step": 42325 }, { "epoch": 0.17662374510769333, "grad_norm": 0.8316426843828063, "learning_rate": 4.759321119446938e-06, "loss": 0.0336, "step": 42330 }, { "epoch": 0.17664460782268362, "grad_norm": 0.888314004836691, "learning_rate": 4.759040053302901e-06, "loss": 0.0402, "step": 42335 }, { "epoch": 0.1766654705376739, "grad_norm": 0.7840974706939298, "learning_rate": 4.758759036948852e-06, "loss": 0.0386, "step": 42340 }, { "epoch": 0.17668633325266417, "grad_norm": 0.6379491074922634, "learning_rate": 4.758478070370094e-06, "loss": 0.0454, "step": 42345 }, { "epoch": 0.17670719596765444, "grad_norm": 0.9057185837196452, "learning_rate": 4.7581971535519315e-06, "loss": 0.0421, "step": 42350 }, { "epoch": 0.1767280586826447, "grad_norm": 3.4509608178668163, "learning_rate": 4.757916286479682e-06, "loss": 0.0373, "step": 42355 }, { "epoch": 0.176748921397635, "grad_norm": 0.7871007500549873, "learning_rate": 4.7576354691386625e-06, "loss": 0.0289, "step": 42360 }, { "epoch": 0.17676978411262528, "grad_norm": 0.741642476410803, "learning_rate": 4.7573547015142e-06, "loss": 0.0449, "step": 42365 }, { "epoch": 0.17679064682761556, "grad_norm": 0.8574598136493794, "learning_rate": 4.757073983591626e-06, "loss": 0.0367, "step": 42370 }, { "epoch": 0.17681150954260583, "grad_norm": 0.9365688629125387, "learning_rate": 4.7567933153562785e-06, "loss": 0.0346, "step": 42375 }, { "epoch": 0.17683237225759613, "grad_norm": 1.3839872561942748, "learning_rate": 4.756512696793501e-06, "loss": 0.0336, "step": 42380 }, { "epoch": 0.1768532349725864, "grad_norm": 0.8402714439458372, "learning_rate": 4.756232127888644e-06, "loss": 0.0374, "step": 42385 }, { "epoch": 0.17687409768757667, "grad_norm": 0.6939003684898486, "learning_rate": 4.755951608627063e-06, "loss": 0.0347, "step": 42390 }, { "epoch": 0.17689496040256694, "grad_norm": 1.1757558371483174, "learning_rate": 4.755671138994122e-06, "loss": 0.0335, "step": 42395 }, { "epoch": 0.1769158231175572, "grad_norm": 0.6138740681174145, "learning_rate": 4.7553907189751855e-06, "loss": 0.0336, "step": 42400 }, { "epoch": 0.1769366858325475, "grad_norm": 0.5431324480621925, "learning_rate": 4.755110348555632e-06, "loss": 0.0369, "step": 42405 }, { "epoch": 0.17695754854753779, "grad_norm": 0.9026409822927521, "learning_rate": 4.754830027720838e-06, "loss": 0.0373, "step": 42410 }, { "epoch": 0.17697841126252806, "grad_norm": 0.6452939470801369, "learning_rate": 4.754549756456191e-06, "loss": 0.0579, "step": 42415 }, { "epoch": 0.17699927397751833, "grad_norm": 0.7988273028825694, "learning_rate": 4.754269534747084e-06, "loss": 0.0374, "step": 42420 }, { "epoch": 0.17702013669250863, "grad_norm": 0.73225808990308, "learning_rate": 4.7539893625789156e-06, "loss": 0.0376, "step": 42425 }, { "epoch": 0.1770409994074989, "grad_norm": 0.6012444531171847, "learning_rate": 4.753709239937089e-06, "loss": 0.0488, "step": 42430 }, { "epoch": 0.17706186212248917, "grad_norm": 1.4424225832084059, "learning_rate": 4.753429166807015e-06, "loss": 0.0362, "step": 42435 }, { "epoch": 0.17708272483747944, "grad_norm": 1.0469332668793447, "learning_rate": 4.75314914317411e-06, "loss": 0.0347, "step": 42440 }, { "epoch": 0.17710358755246972, "grad_norm": 0.48306293844791887, "learning_rate": 4.752869169023796e-06, "loss": 0.0337, "step": 42445 }, { "epoch": 0.17712445026746002, "grad_norm": 1.005940528086335, "learning_rate": 4.752589244341501e-06, "loss": 0.036, "step": 42450 }, { "epoch": 0.1771453129824503, "grad_norm": 1.3445545354240847, "learning_rate": 4.75230936911266e-06, "loss": 0.0341, "step": 42455 }, { "epoch": 0.17716617569744056, "grad_norm": 0.9849159261029621, "learning_rate": 4.752029543322715e-06, "loss": 0.0359, "step": 42460 }, { "epoch": 0.17718703841243083, "grad_norm": 0.6230906383096567, "learning_rate": 4.75174976695711e-06, "loss": 0.0303, "step": 42465 }, { "epoch": 0.17720790112742113, "grad_norm": 0.8640048754436257, "learning_rate": 4.751470040001298e-06, "loss": 0.0379, "step": 42470 }, { "epoch": 0.1772287638424114, "grad_norm": 0.9276132563779, "learning_rate": 4.7511903624407366e-06, "loss": 0.0534, "step": 42475 }, { "epoch": 0.17724962655740167, "grad_norm": 0.8963220394990713, "learning_rate": 4.750910734260892e-06, "loss": 0.031, "step": 42480 }, { "epoch": 0.17727048927239195, "grad_norm": 0.8914875205718705, "learning_rate": 4.750631155447234e-06, "loss": 0.0335, "step": 42485 }, { "epoch": 0.17729135198738222, "grad_norm": 0.9148320530821864, "learning_rate": 4.7503516259852395e-06, "loss": 0.0261, "step": 42490 }, { "epoch": 0.17731221470237252, "grad_norm": 0.9872338813974066, "learning_rate": 4.750072145860389e-06, "loss": 0.031, "step": 42495 }, { "epoch": 0.1773330774173628, "grad_norm": 0.5807593469239469, "learning_rate": 4.7497927150581714e-06, "loss": 0.0369, "step": 42500 }, { "epoch": 0.17735394013235306, "grad_norm": 0.8905137357374736, "learning_rate": 4.749513333564082e-06, "loss": 0.0334, "step": 42505 }, { "epoch": 0.17737480284734333, "grad_norm": 1.2432080198086362, "learning_rate": 4.7492340013636196e-06, "loss": 0.0358, "step": 42510 }, { "epoch": 0.17739566556233363, "grad_norm": 1.1041758786837137, "learning_rate": 4.748954718442292e-06, "loss": 0.0363, "step": 42515 }, { "epoch": 0.1774165282773239, "grad_norm": 0.8953477129485793, "learning_rate": 4.748675484785611e-06, "loss": 0.0351, "step": 42520 }, { "epoch": 0.17743739099231418, "grad_norm": 1.1796463635386027, "learning_rate": 4.748396300379094e-06, "loss": 0.0351, "step": 42525 }, { "epoch": 0.17745825370730445, "grad_norm": 0.5178945936195579, "learning_rate": 4.748117165208265e-06, "loss": 0.0289, "step": 42530 }, { "epoch": 0.17747911642229472, "grad_norm": 1.2160882793307008, "learning_rate": 4.747838079258655e-06, "loss": 0.0524, "step": 42535 }, { "epoch": 0.17749997913728502, "grad_norm": 0.7987140159347538, "learning_rate": 4.747559042515799e-06, "loss": 0.0379, "step": 42540 }, { "epoch": 0.1775208418522753, "grad_norm": 0.8542874253254016, "learning_rate": 4.747280054965242e-06, "loss": 0.042, "step": 42545 }, { "epoch": 0.17754170456726556, "grad_norm": 0.7952879575868804, "learning_rate": 4.747001116592527e-06, "loss": 0.0441, "step": 42550 }, { "epoch": 0.17756256728225583, "grad_norm": 0.7365976941066027, "learning_rate": 4.746722227383211e-06, "loss": 0.0379, "step": 42555 }, { "epoch": 0.17758342999724613, "grad_norm": 0.704406123314991, "learning_rate": 4.746443387322853e-06, "loss": 0.0361, "step": 42560 }, { "epoch": 0.1776042927122364, "grad_norm": 0.9864624976401933, "learning_rate": 4.746164596397018e-06, "loss": 0.0354, "step": 42565 }, { "epoch": 0.17762515542722668, "grad_norm": 0.9370668045898078, "learning_rate": 4.745885854591281e-06, "loss": 0.0391, "step": 42570 }, { "epoch": 0.17764601814221695, "grad_norm": 0.9525998480633429, "learning_rate": 4.745607161891215e-06, "loss": 0.0279, "step": 42575 }, { "epoch": 0.17766688085720722, "grad_norm": 0.8001967014901403, "learning_rate": 4.745328518282406e-06, "loss": 0.0312, "step": 42580 }, { "epoch": 0.17768774357219752, "grad_norm": 0.981345921781407, "learning_rate": 4.7450499237504435e-06, "loss": 0.0401, "step": 42585 }, { "epoch": 0.1777086062871878, "grad_norm": 1.066827521376782, "learning_rate": 4.744771378280923e-06, "loss": 0.043, "step": 42590 }, { "epoch": 0.17772946900217806, "grad_norm": 1.0069725328687569, "learning_rate": 4.7444928818594445e-06, "loss": 0.0377, "step": 42595 }, { "epoch": 0.17775033171716834, "grad_norm": 0.9389781611877186, "learning_rate": 4.744214434471616e-06, "loss": 0.0423, "step": 42600 }, { "epoch": 0.17777119443215864, "grad_norm": 0.7208622296711451, "learning_rate": 4.743936036103051e-06, "loss": 0.0421, "step": 42605 }, { "epoch": 0.1777920571471489, "grad_norm": 1.3516887242132787, "learning_rate": 4.743657686739368e-06, "loss": 0.0498, "step": 42610 }, { "epoch": 0.17781291986213918, "grad_norm": 0.7126086102139978, "learning_rate": 4.7433793863661916e-06, "loss": 0.0238, "step": 42615 }, { "epoch": 0.17783378257712945, "grad_norm": 1.1552505217099227, "learning_rate": 4.7431011349691535e-06, "loss": 0.0462, "step": 42620 }, { "epoch": 0.17785464529211972, "grad_norm": 0.5425946405123349, "learning_rate": 4.742822932533891e-06, "loss": 0.0311, "step": 42625 }, { "epoch": 0.17787550800711002, "grad_norm": 0.8852087810405556, "learning_rate": 4.742544779046044e-06, "loss": 0.0446, "step": 42630 }, { "epoch": 0.1778963707221003, "grad_norm": 0.8673641451193598, "learning_rate": 4.742266674491264e-06, "loss": 0.0344, "step": 42635 }, { "epoch": 0.17791723343709057, "grad_norm": 1.0719582364605524, "learning_rate": 4.741988618855205e-06, "loss": 0.0374, "step": 42640 }, { "epoch": 0.17793809615208084, "grad_norm": 0.7960760059255634, "learning_rate": 4.741710612123524e-06, "loss": 0.0412, "step": 42645 }, { "epoch": 0.17795895886707114, "grad_norm": 1.0402167572490464, "learning_rate": 4.741432654281892e-06, "loss": 0.0372, "step": 42650 }, { "epoch": 0.1779798215820614, "grad_norm": 0.7412793921022786, "learning_rate": 4.741154745315978e-06, "loss": 0.0387, "step": 42655 }, { "epoch": 0.17800068429705168, "grad_norm": 0.60943013641369, "learning_rate": 4.74087688521146e-06, "loss": 0.0288, "step": 42660 }, { "epoch": 0.17802154701204195, "grad_norm": 1.0082389300531294, "learning_rate": 4.7405990739540245e-06, "loss": 0.0273, "step": 42665 }, { "epoch": 0.17804240972703222, "grad_norm": 0.8692239291334382, "learning_rate": 4.740321311529358e-06, "loss": 0.0305, "step": 42670 }, { "epoch": 0.17806327244202252, "grad_norm": 0.8297097152272873, "learning_rate": 4.740043597923158e-06, "loss": 0.0405, "step": 42675 }, { "epoch": 0.1780841351570128, "grad_norm": 0.42846871151096216, "learning_rate": 4.739765933121124e-06, "loss": 0.0279, "step": 42680 }, { "epoch": 0.17810499787200307, "grad_norm": 1.9673289208410356, "learning_rate": 4.7394883171089665e-06, "loss": 0.0311, "step": 42685 }, { "epoch": 0.17812586058699334, "grad_norm": 0.6935186083686475, "learning_rate": 4.739210749872394e-06, "loss": 0.0342, "step": 42690 }, { "epoch": 0.17814672330198364, "grad_norm": 0.5603257255712651, "learning_rate": 4.73893323139713e-06, "loss": 0.0281, "step": 42695 }, { "epoch": 0.1781675860169739, "grad_norm": 1.1837938588631645, "learning_rate": 4.738655761668898e-06, "loss": 0.0306, "step": 42700 }, { "epoch": 0.17818844873196418, "grad_norm": 1.1099961823379056, "learning_rate": 4.738378340673428e-06, "loss": 0.0283, "step": 42705 }, { "epoch": 0.17820931144695445, "grad_norm": 1.2534829255251347, "learning_rate": 4.738100968396456e-06, "loss": 0.0406, "step": 42710 }, { "epoch": 0.17823017416194473, "grad_norm": 1.0739899228132375, "learning_rate": 4.737823644823725e-06, "loss": 0.0528, "step": 42715 }, { "epoch": 0.17825103687693503, "grad_norm": 0.9005254655247554, "learning_rate": 4.737546369940985e-06, "loss": 0.0325, "step": 42720 }, { "epoch": 0.1782718995919253, "grad_norm": 0.6594471655008457, "learning_rate": 4.737269143733987e-06, "loss": 0.0388, "step": 42725 }, { "epoch": 0.17829276230691557, "grad_norm": 1.049630812534166, "learning_rate": 4.736991966188494e-06, "loss": 0.0509, "step": 42730 }, { "epoch": 0.17831362502190584, "grad_norm": 0.8463246224068769, "learning_rate": 4.736714837290269e-06, "loss": 0.0392, "step": 42735 }, { "epoch": 0.17833448773689614, "grad_norm": 0.6512313536261217, "learning_rate": 4.7364377570250855e-06, "loss": 0.0436, "step": 42740 }, { "epoch": 0.1783553504518864, "grad_norm": 0.680205113406048, "learning_rate": 4.73616072537872e-06, "loss": 0.0361, "step": 42745 }, { "epoch": 0.17837621316687668, "grad_norm": 0.5645845880908202, "learning_rate": 4.735883742336955e-06, "loss": 0.0411, "step": 42750 }, { "epoch": 0.17839707588186696, "grad_norm": 1.1230126217573588, "learning_rate": 4.735606807885581e-06, "loss": 0.0377, "step": 42755 }, { "epoch": 0.17841793859685723, "grad_norm": 0.8212963857898206, "learning_rate": 4.735329922010394e-06, "loss": 0.0315, "step": 42760 }, { "epoch": 0.17843880131184753, "grad_norm": 0.8871533412126654, "learning_rate": 4.735053084697191e-06, "loss": 0.0331, "step": 42765 }, { "epoch": 0.1784596640268378, "grad_norm": 1.076163372485179, "learning_rate": 4.734776295931782e-06, "loss": 0.0405, "step": 42770 }, { "epoch": 0.17848052674182807, "grad_norm": 1.7941107486986418, "learning_rate": 4.7344995556999765e-06, "loss": 0.0362, "step": 42775 }, { "epoch": 0.17850138945681834, "grad_norm": 0.9999083019126452, "learning_rate": 4.734222863987595e-06, "loss": 0.0416, "step": 42780 }, { "epoch": 0.17852225217180864, "grad_norm": 1.007314905209099, "learning_rate": 4.733946220780459e-06, "loss": 0.0313, "step": 42785 }, { "epoch": 0.17854311488679891, "grad_norm": 1.0030279291391313, "learning_rate": 4.7336696260644e-06, "loss": 0.0414, "step": 42790 }, { "epoch": 0.1785639776017892, "grad_norm": 1.2546213503929027, "learning_rate": 4.7333930798252535e-06, "loss": 0.0387, "step": 42795 }, { "epoch": 0.17858484031677946, "grad_norm": 0.8359148313683791, "learning_rate": 4.733116582048859e-06, "loss": 0.0447, "step": 42800 }, { "epoch": 0.17860570303176973, "grad_norm": 1.5981855062658898, "learning_rate": 4.732840132721066e-06, "loss": 0.0417, "step": 42805 }, { "epoch": 0.17862656574676003, "grad_norm": 0.9597480647200237, "learning_rate": 4.732563731827725e-06, "loss": 0.0373, "step": 42810 }, { "epoch": 0.1786474284617503, "grad_norm": 1.1248376897681391, "learning_rate": 4.732287379354696e-06, "loss": 0.0504, "step": 42815 }, { "epoch": 0.17866829117674057, "grad_norm": 0.8481281129914104, "learning_rate": 4.732011075287845e-06, "loss": 0.0405, "step": 42820 }, { "epoch": 0.17868915389173085, "grad_norm": 2.028703768290902, "learning_rate": 4.731734819613038e-06, "loss": 0.0436, "step": 42825 }, { "epoch": 0.17871001660672114, "grad_norm": 1.105983149282102, "learning_rate": 4.731458612316154e-06, "loss": 0.0327, "step": 42830 }, { "epoch": 0.17873087932171142, "grad_norm": 1.3895658449339436, "learning_rate": 4.731182453383074e-06, "loss": 0.0436, "step": 42835 }, { "epoch": 0.1787517420367017, "grad_norm": 0.8028912390916463, "learning_rate": 4.7309063427996874e-06, "loss": 0.0366, "step": 42840 }, { "epoch": 0.17877260475169196, "grad_norm": 1.0265688511755173, "learning_rate": 4.730630280551883e-06, "loss": 0.0521, "step": 42845 }, { "epoch": 0.17879346746668223, "grad_norm": 0.82061950656082, "learning_rate": 4.730354266625565e-06, "loss": 0.036, "step": 42850 }, { "epoch": 0.17881433018167253, "grad_norm": 1.0311442981409646, "learning_rate": 4.730078301006635e-06, "loss": 0.0372, "step": 42855 }, { "epoch": 0.1788351928966628, "grad_norm": 0.9967400598918393, "learning_rate": 4.7298023836810025e-06, "loss": 0.0326, "step": 42860 }, { "epoch": 0.17885605561165308, "grad_norm": 1.0569703029719275, "learning_rate": 4.7295265146345865e-06, "loss": 0.0362, "step": 42865 }, { "epoch": 0.17887691832664335, "grad_norm": 1.3260302211425345, "learning_rate": 4.729250693853308e-06, "loss": 0.042, "step": 42870 }, { "epoch": 0.17889778104163365, "grad_norm": 1.1226695669134312, "learning_rate": 4.7289749213230956e-06, "loss": 0.042, "step": 42875 }, { "epoch": 0.17891864375662392, "grad_norm": 0.8929431761946308, "learning_rate": 4.728699197029882e-06, "loss": 0.0501, "step": 42880 }, { "epoch": 0.1789395064716142, "grad_norm": 1.213473196344095, "learning_rate": 4.728423520959606e-06, "loss": 0.0421, "step": 42885 }, { "epoch": 0.17896036918660446, "grad_norm": 1.023865764617593, "learning_rate": 4.728147893098213e-06, "loss": 0.0396, "step": 42890 }, { "epoch": 0.17898123190159473, "grad_norm": 0.8936944544873148, "learning_rate": 4.727872313431654e-06, "loss": 0.0344, "step": 42895 }, { "epoch": 0.17900209461658503, "grad_norm": 0.9862233356301299, "learning_rate": 4.727596781945887e-06, "loss": 0.0345, "step": 42900 }, { "epoch": 0.1790229573315753, "grad_norm": 0.8763216608204024, "learning_rate": 4.727321298626871e-06, "loss": 0.0379, "step": 42905 }, { "epoch": 0.17904382004656558, "grad_norm": 1.4688250017419242, "learning_rate": 4.727045863460576e-06, "loss": 0.0441, "step": 42910 }, { "epoch": 0.17906468276155585, "grad_norm": 0.562754755303037, "learning_rate": 4.726770476432975e-06, "loss": 0.0288, "step": 42915 }, { "epoch": 0.17908554547654615, "grad_norm": 0.4962447031464347, "learning_rate": 4.726495137530048e-06, "loss": 0.0243, "step": 42920 }, { "epoch": 0.17910640819153642, "grad_norm": 1.3099688059764818, "learning_rate": 4.726219846737779e-06, "loss": 0.0361, "step": 42925 }, { "epoch": 0.1791272709065267, "grad_norm": 0.5607494886935065, "learning_rate": 4.72594460404216e-06, "loss": 0.0266, "step": 42930 }, { "epoch": 0.17914813362151696, "grad_norm": 0.3217101247398785, "learning_rate": 4.725669409429186e-06, "loss": 0.0461, "step": 42935 }, { "epoch": 0.17916899633650724, "grad_norm": 1.0145472595536398, "learning_rate": 4.725394262884862e-06, "loss": 0.0357, "step": 42940 }, { "epoch": 0.17918985905149754, "grad_norm": 0.722447898000794, "learning_rate": 4.725119164395194e-06, "loss": 0.04, "step": 42945 }, { "epoch": 0.1792107217664878, "grad_norm": 0.7526049155824743, "learning_rate": 4.7248441139461946e-06, "loss": 0.034, "step": 42950 }, { "epoch": 0.17923158448147808, "grad_norm": 0.987352237368645, "learning_rate": 4.724569111523884e-06, "loss": 0.0353, "step": 42955 }, { "epoch": 0.17925244719646835, "grad_norm": 0.5578947133285332, "learning_rate": 4.724294157114289e-06, "loss": 0.0259, "step": 42960 }, { "epoch": 0.17927330991145865, "grad_norm": 1.149847449229335, "learning_rate": 4.724019250703437e-06, "loss": 0.0342, "step": 42965 }, { "epoch": 0.17929417262644892, "grad_norm": 0.5528783187929079, "learning_rate": 4.723744392277369e-06, "loss": 0.0314, "step": 42970 }, { "epoch": 0.1793150353414392, "grad_norm": 0.9529988265882402, "learning_rate": 4.723469581822122e-06, "loss": 0.0446, "step": 42975 }, { "epoch": 0.17933589805642947, "grad_norm": 0.6821111990836297, "learning_rate": 4.723194819323746e-06, "loss": 0.0434, "step": 42980 }, { "epoch": 0.17935676077141974, "grad_norm": 0.7991816803452196, "learning_rate": 4.7229201047682955e-06, "loss": 0.0313, "step": 42985 }, { "epoch": 0.17937762348641004, "grad_norm": 0.9989337480590378, "learning_rate": 4.722645438141829e-06, "loss": 0.041, "step": 42990 }, { "epoch": 0.1793984862014003, "grad_norm": 0.8612460403440862, "learning_rate": 4.72237081943041e-06, "loss": 0.0428, "step": 42995 }, { "epoch": 0.17941934891639058, "grad_norm": 0.9402084185201967, "learning_rate": 4.722096248620112e-06, "loss": 0.0431, "step": 43000 }, { "epoch": 0.17944021163138085, "grad_norm": 1.3316942292609488, "learning_rate": 4.721821725697006e-06, "loss": 0.0353, "step": 43005 }, { "epoch": 0.17946107434637115, "grad_norm": 0.8335623800230039, "learning_rate": 4.7215472506471794e-06, "loss": 0.0334, "step": 43010 }, { "epoch": 0.17948193706136142, "grad_norm": 0.5474304432023706, "learning_rate": 4.721272823456715e-06, "loss": 0.03, "step": 43015 }, { "epoch": 0.1795027997763517, "grad_norm": 1.0933945083619392, "learning_rate": 4.72099844411171e-06, "loss": 0.0359, "step": 43020 }, { "epoch": 0.17952366249134197, "grad_norm": 0.7348727015572488, "learning_rate": 4.720724112598262e-06, "loss": 0.0388, "step": 43025 }, { "epoch": 0.17954452520633224, "grad_norm": 0.919363108965399, "learning_rate": 4.7204498289024734e-06, "loss": 0.0327, "step": 43030 }, { "epoch": 0.17956538792132254, "grad_norm": 0.7055944009772882, "learning_rate": 4.720175593010455e-06, "loss": 0.0354, "step": 43035 }, { "epoch": 0.1795862506363128, "grad_norm": 1.0071742709755147, "learning_rate": 4.719901404908325e-06, "loss": 0.03, "step": 43040 }, { "epoch": 0.17960711335130308, "grad_norm": 0.9328172571101383, "learning_rate": 4.7196272645822025e-06, "loss": 0.0312, "step": 43045 }, { "epoch": 0.17962797606629335, "grad_norm": 0.889443077742896, "learning_rate": 4.719353172018214e-06, "loss": 0.0275, "step": 43050 }, { "epoch": 0.17964883878128365, "grad_norm": 1.350597108744417, "learning_rate": 4.719079127202495e-06, "loss": 0.0406, "step": 43055 }, { "epoch": 0.17966970149627393, "grad_norm": 1.0921964510555957, "learning_rate": 4.718805130121181e-06, "loss": 0.0305, "step": 43060 }, { "epoch": 0.1796905642112642, "grad_norm": 0.5955090410687899, "learning_rate": 4.718531180760417e-06, "loss": 0.0356, "step": 43065 }, { "epoch": 0.17971142692625447, "grad_norm": 0.8379760470657408, "learning_rate": 4.7182572791063525e-06, "loss": 0.0372, "step": 43070 }, { "epoch": 0.17973228964124474, "grad_norm": 0.7693177592130375, "learning_rate": 4.7179834251451415e-06, "loss": 0.0393, "step": 43075 }, { "epoch": 0.17975315235623504, "grad_norm": 0.49865899879760806, "learning_rate": 4.7177096188629475e-06, "loss": 0.0298, "step": 43080 }, { "epoch": 0.1797740150712253, "grad_norm": 1.0800926335366992, "learning_rate": 4.717435860245936e-06, "loss": 0.033, "step": 43085 }, { "epoch": 0.17979487778621558, "grad_norm": 1.0099199775745418, "learning_rate": 4.717162149280277e-06, "loss": 0.0565, "step": 43090 }, { "epoch": 0.17981574050120586, "grad_norm": 0.7202445094367718, "learning_rate": 4.716888485952152e-06, "loss": 0.0707, "step": 43095 }, { "epoch": 0.17983660321619616, "grad_norm": 0.873696755920834, "learning_rate": 4.716614870247739e-06, "loss": 0.0338, "step": 43100 }, { "epoch": 0.17985746593118643, "grad_norm": 0.6424181578714729, "learning_rate": 4.716341302153232e-06, "loss": 0.029, "step": 43105 }, { "epoch": 0.1798783286461767, "grad_norm": 1.0631697789808146, "learning_rate": 4.716067781654822e-06, "loss": 0.0392, "step": 43110 }, { "epoch": 0.17989919136116697, "grad_norm": 0.9098926737739818, "learning_rate": 4.715794308738712e-06, "loss": 0.0377, "step": 43115 }, { "epoch": 0.17992005407615724, "grad_norm": 0.7770065837214188, "learning_rate": 4.715520883391105e-06, "loss": 0.0342, "step": 43120 }, { "epoch": 0.17994091679114754, "grad_norm": 1.2374940842862068, "learning_rate": 4.715247505598215e-06, "loss": 0.0308, "step": 43125 }, { "epoch": 0.17996177950613781, "grad_norm": 0.8286509779022306, "learning_rate": 4.714974175346256e-06, "loss": 0.037, "step": 43130 }, { "epoch": 0.1799826422211281, "grad_norm": 1.0151630130978493, "learning_rate": 4.714700892621452e-06, "loss": 0.0357, "step": 43135 }, { "epoch": 0.18000350493611836, "grad_norm": 0.9005259380884706, "learning_rate": 4.7144276574100325e-06, "loss": 0.0262, "step": 43140 }, { "epoch": 0.18002436765110866, "grad_norm": 1.8241712602953986, "learning_rate": 4.714154469698229e-06, "loss": 0.0651, "step": 43145 }, { "epoch": 0.18004523036609893, "grad_norm": 1.1268026032225704, "learning_rate": 4.713881329472281e-06, "loss": 0.037, "step": 43150 }, { "epoch": 0.1800660930810892, "grad_norm": 1.1383145662598224, "learning_rate": 4.713608236718434e-06, "loss": 0.0407, "step": 43155 }, { "epoch": 0.18008695579607947, "grad_norm": 0.9850711239788355, "learning_rate": 4.713335191422938e-06, "loss": 0.0448, "step": 43160 }, { "epoch": 0.18010781851106974, "grad_norm": 0.8404653390125466, "learning_rate": 4.71306219357205e-06, "loss": 0.032, "step": 43165 }, { "epoch": 0.18012868122606004, "grad_norm": 1.33900089229854, "learning_rate": 4.71278924315203e-06, "loss": 0.0343, "step": 43170 }, { "epoch": 0.18014954394105032, "grad_norm": 0.45128197757415656, "learning_rate": 4.712516340149146e-06, "loss": 0.0322, "step": 43175 }, { "epoch": 0.1801704066560406, "grad_norm": 1.030539479016103, "learning_rate": 4.71224348454967e-06, "loss": 0.0298, "step": 43180 }, { "epoch": 0.18019126937103086, "grad_norm": 0.5576024209094474, "learning_rate": 4.711970676339882e-06, "loss": 0.033, "step": 43185 }, { "epoch": 0.18021213208602116, "grad_norm": 0.9649395702493261, "learning_rate": 4.711697915506064e-06, "loss": 0.0374, "step": 43190 }, { "epoch": 0.18023299480101143, "grad_norm": 0.9459885471163901, "learning_rate": 4.7114252020345065e-06, "loss": 0.0298, "step": 43195 }, { "epoch": 0.1802538575160017, "grad_norm": 0.5672220721481883, "learning_rate": 4.7111525359115025e-06, "loss": 0.0326, "step": 43200 }, { "epoch": 0.18027472023099197, "grad_norm": 1.7562296712539298, "learning_rate": 4.710879917123356e-06, "loss": 0.0403, "step": 43205 }, { "epoch": 0.18029558294598225, "grad_norm": 0.8127818320490345, "learning_rate": 4.710607345656369e-06, "loss": 0.0458, "step": 43210 }, { "epoch": 0.18031644566097255, "grad_norm": 1.1986814696539685, "learning_rate": 4.710334821496857e-06, "loss": 0.0326, "step": 43215 }, { "epoch": 0.18033730837596282, "grad_norm": 1.0983447690621881, "learning_rate": 4.710062344631134e-06, "loss": 0.0447, "step": 43220 }, { "epoch": 0.1803581710909531, "grad_norm": 0.6242429070188924, "learning_rate": 4.709789915045524e-06, "loss": 0.0228, "step": 43225 }, { "epoch": 0.18037903380594336, "grad_norm": 0.6060613819250655, "learning_rate": 4.7095175327263545e-06, "loss": 0.0491, "step": 43230 }, { "epoch": 0.18039989652093366, "grad_norm": 1.2661295525420122, "learning_rate": 4.709245197659962e-06, "loss": 0.0481, "step": 43235 }, { "epoch": 0.18042075923592393, "grad_norm": 0.9941301113738094, "learning_rate": 4.70897290983268e-06, "loss": 0.0532, "step": 43240 }, { "epoch": 0.1804416219509142, "grad_norm": 0.9402071505050134, "learning_rate": 4.708700669230859e-06, "loss": 0.0373, "step": 43245 }, { "epoch": 0.18046248466590448, "grad_norm": 0.8319148938773411, "learning_rate": 4.708428475840846e-06, "loss": 0.0401, "step": 43250 }, { "epoch": 0.18048334738089475, "grad_norm": 1.0170772974925468, "learning_rate": 4.708156329648999e-06, "loss": 0.0435, "step": 43255 }, { "epoch": 0.18050421009588505, "grad_norm": 1.035685634023895, "learning_rate": 4.7078842306416775e-06, "loss": 0.0322, "step": 43260 }, { "epoch": 0.18052507281087532, "grad_norm": 1.1197870958959708, "learning_rate": 4.70761217880525e-06, "loss": 0.041, "step": 43265 }, { "epoch": 0.1805459355258656, "grad_norm": 0.5664517987211235, "learning_rate": 4.707340174126087e-06, "loss": 0.0317, "step": 43270 }, { "epoch": 0.18056679824085586, "grad_norm": 0.7832545079578328, "learning_rate": 4.707068216590568e-06, "loss": 0.0296, "step": 43275 }, { "epoch": 0.18058766095584616, "grad_norm": 1.0631956064485737, "learning_rate": 4.706796306185075e-06, "loss": 0.0405, "step": 43280 }, { "epoch": 0.18060852367083643, "grad_norm": 0.5956382309431718, "learning_rate": 4.706524442895999e-06, "loss": 0.0307, "step": 43285 }, { "epoch": 0.1806293863858267, "grad_norm": 1.1354293555663337, "learning_rate": 4.706252626709732e-06, "loss": 0.0458, "step": 43290 }, { "epoch": 0.18065024910081698, "grad_norm": 1.081812136800876, "learning_rate": 4.705980857612676e-06, "loss": 0.0366, "step": 43295 }, { "epoch": 0.18067111181580725, "grad_norm": 0.510166453591473, "learning_rate": 4.705709135591236e-06, "loss": 0.0255, "step": 43300 }, { "epoch": 0.18069197453079755, "grad_norm": 1.8103619851518935, "learning_rate": 4.705437460631821e-06, "loss": 0.0476, "step": 43305 }, { "epoch": 0.18071283724578782, "grad_norm": 0.6975318100536624, "learning_rate": 4.705165832720849e-06, "loss": 0.0368, "step": 43310 }, { "epoch": 0.1807336999607781, "grad_norm": 0.7116246254524847, "learning_rate": 4.704894251844743e-06, "loss": 0.0328, "step": 43315 }, { "epoch": 0.18075456267576837, "grad_norm": 0.6095903293383511, "learning_rate": 4.704622717989927e-06, "loss": 0.0388, "step": 43320 }, { "epoch": 0.18077542539075867, "grad_norm": 0.8545901430782283, "learning_rate": 4.704351231142839e-06, "loss": 0.0433, "step": 43325 }, { "epoch": 0.18079628810574894, "grad_norm": 2.0221453215592566, "learning_rate": 4.704079791289911e-06, "loss": 0.0349, "step": 43330 }, { "epoch": 0.1808171508207392, "grad_norm": 0.8916071208903772, "learning_rate": 4.7038083984175906e-06, "loss": 0.0343, "step": 43335 }, { "epoch": 0.18083801353572948, "grad_norm": 0.7059026452266162, "learning_rate": 4.703537052512327e-06, "loss": 0.0311, "step": 43340 }, { "epoch": 0.18085887625071975, "grad_norm": 0.8426681953605619, "learning_rate": 4.703265753560576e-06, "loss": 0.036, "step": 43345 }, { "epoch": 0.18087973896571005, "grad_norm": 1.4255857576823172, "learning_rate": 4.702994501548794e-06, "loss": 0.0408, "step": 43350 }, { "epoch": 0.18090060168070032, "grad_norm": 1.5201416703636659, "learning_rate": 4.702723296463451e-06, "loss": 0.0402, "step": 43355 }, { "epoch": 0.1809214643956906, "grad_norm": 1.2158095566686455, "learning_rate": 4.702452138291015e-06, "loss": 0.0333, "step": 43360 }, { "epoch": 0.18094232711068087, "grad_norm": 1.022152226149975, "learning_rate": 4.702181027017964e-06, "loss": 0.0363, "step": 43365 }, { "epoch": 0.18096318982567117, "grad_norm": 0.6403767568559874, "learning_rate": 4.70190996263078e-06, "loss": 0.0309, "step": 43370 }, { "epoch": 0.18098405254066144, "grad_norm": 0.5630829400170584, "learning_rate": 4.701638945115949e-06, "loss": 0.0424, "step": 43375 }, { "epoch": 0.1810049152556517, "grad_norm": 1.22642344758307, "learning_rate": 4.701367974459966e-06, "loss": 0.0474, "step": 43380 }, { "epoch": 0.18102577797064198, "grad_norm": 2.20638670210377, "learning_rate": 4.70109705064933e-06, "loss": 0.0312, "step": 43385 }, { "epoch": 0.18104664068563225, "grad_norm": 0.652847174195971, "learning_rate": 4.7008261736705425e-06, "loss": 0.0359, "step": 43390 }, { "epoch": 0.18106750340062255, "grad_norm": 1.221985090737216, "learning_rate": 4.700555343510114e-06, "loss": 0.0398, "step": 43395 }, { "epoch": 0.18108836611561283, "grad_norm": 1.1459561385436732, "learning_rate": 4.70028456015456e-06, "loss": 0.0388, "step": 43400 }, { "epoch": 0.1811092288306031, "grad_norm": 0.6118405434519489, "learning_rate": 4.700013823590399e-06, "loss": 0.0375, "step": 43405 }, { "epoch": 0.18113009154559337, "grad_norm": 0.8188580353066301, "learning_rate": 4.699743133804157e-06, "loss": 0.0322, "step": 43410 }, { "epoch": 0.18115095426058367, "grad_norm": 0.6304499434223513, "learning_rate": 4.699472490782366e-06, "loss": 0.0358, "step": 43415 }, { "epoch": 0.18117181697557394, "grad_norm": 0.899605784342928, "learning_rate": 4.699201894511562e-06, "loss": 0.0355, "step": 43420 }, { "epoch": 0.1811926796905642, "grad_norm": 1.3729818937680456, "learning_rate": 4.698931344978288e-06, "loss": 0.0416, "step": 43425 }, { "epoch": 0.18121354240555448, "grad_norm": 1.1187182288910622, "learning_rate": 4.698660842169089e-06, "loss": 0.0376, "step": 43430 }, { "epoch": 0.18123440512054476, "grad_norm": 0.5744452173849532, "learning_rate": 4.698390386070519e-06, "loss": 0.0368, "step": 43435 }, { "epoch": 0.18125526783553506, "grad_norm": 0.7154212197600266, "learning_rate": 4.698119976669137e-06, "loss": 0.0313, "step": 43440 }, { "epoch": 0.18127613055052533, "grad_norm": 1.4176988173304261, "learning_rate": 4.697849613951505e-06, "loss": 0.0497, "step": 43445 }, { "epoch": 0.1812969932655156, "grad_norm": 0.8119330817187974, "learning_rate": 4.697579297904194e-06, "loss": 0.0391, "step": 43450 }, { "epoch": 0.18131785598050587, "grad_norm": 1.4677483421273283, "learning_rate": 4.6973090285137755e-06, "loss": 0.0282, "step": 43455 }, { "epoch": 0.18133871869549617, "grad_norm": 1.3687962907631286, "learning_rate": 4.697038805766832e-06, "loss": 0.0381, "step": 43460 }, { "epoch": 0.18135958141048644, "grad_norm": 0.9052078669198903, "learning_rate": 4.696768629649946e-06, "loss": 0.0426, "step": 43465 }, { "epoch": 0.18138044412547671, "grad_norm": 0.9044546317826351, "learning_rate": 4.696498500149711e-06, "loss": 0.0454, "step": 43470 }, { "epoch": 0.18140130684046699, "grad_norm": 0.7829139396733874, "learning_rate": 4.6962284172527225e-06, "loss": 0.0356, "step": 43475 }, { "epoch": 0.18142216955545726, "grad_norm": 0.9297750636492664, "learning_rate": 4.69595838094558e-06, "loss": 0.0379, "step": 43480 }, { "epoch": 0.18144303227044756, "grad_norm": 0.8831820337523416, "learning_rate": 4.695688391214893e-06, "loss": 0.0369, "step": 43485 }, { "epoch": 0.18146389498543783, "grad_norm": 1.1767049793973572, "learning_rate": 4.695418448047271e-06, "loss": 0.038, "step": 43490 }, { "epoch": 0.1814847577004281, "grad_norm": 0.3616029177757447, "learning_rate": 4.695148551429334e-06, "loss": 0.026, "step": 43495 }, { "epoch": 0.18150562041541837, "grad_norm": 0.6713962356102006, "learning_rate": 4.694878701347702e-06, "loss": 0.0357, "step": 43500 }, { "epoch": 0.18152648313040867, "grad_norm": 1.571739060906729, "learning_rate": 4.694608897789007e-06, "loss": 0.0447, "step": 43505 }, { "epoch": 0.18154734584539894, "grad_norm": 1.5453983034131173, "learning_rate": 4.69433914073988e-06, "loss": 0.0354, "step": 43510 }, { "epoch": 0.18156820856038922, "grad_norm": 2.9025364017981823, "learning_rate": 4.6940694301869604e-06, "loss": 0.0355, "step": 43515 }, { "epoch": 0.1815890712753795, "grad_norm": 0.9662367361504755, "learning_rate": 4.693799766116893e-06, "loss": 0.0447, "step": 43520 }, { "epoch": 0.18160993399036976, "grad_norm": 1.0731626135519674, "learning_rate": 4.693530148516329e-06, "loss": 0.0433, "step": 43525 }, { "epoch": 0.18163079670536006, "grad_norm": 1.1259411538059039, "learning_rate": 4.693260577371921e-06, "loss": 0.0379, "step": 43530 }, { "epoch": 0.18165165942035033, "grad_norm": 1.3753145828033047, "learning_rate": 4.6929910526703325e-06, "loss": 0.0372, "step": 43535 }, { "epoch": 0.1816725221353406, "grad_norm": 0.5394013286079731, "learning_rate": 4.692721574398227e-06, "loss": 0.0277, "step": 43540 }, { "epoch": 0.18169338485033087, "grad_norm": 2.475999755567325, "learning_rate": 4.692452142542278e-06, "loss": 0.0383, "step": 43545 }, { "epoch": 0.18171424756532117, "grad_norm": 0.6773725462442983, "learning_rate": 4.6921827570891584e-06, "loss": 0.0289, "step": 43550 }, { "epoch": 0.18173511028031145, "grad_norm": 0.7933857318194495, "learning_rate": 4.691913418025555e-06, "loss": 0.045, "step": 43555 }, { "epoch": 0.18175597299530172, "grad_norm": 1.1291090548573246, "learning_rate": 4.6916441253381525e-06, "loss": 0.0334, "step": 43560 }, { "epoch": 0.181776835710292, "grad_norm": 0.8316848424734031, "learning_rate": 4.691374879013644e-06, "loss": 0.0342, "step": 43565 }, { "epoch": 0.18179769842528226, "grad_norm": 0.5936066549788402, "learning_rate": 4.691105679038726e-06, "loss": 0.0304, "step": 43570 }, { "epoch": 0.18181856114027256, "grad_norm": 0.7898061757487799, "learning_rate": 4.690836525400104e-06, "loss": 0.0549, "step": 43575 }, { "epoch": 0.18183942385526283, "grad_norm": 1.0585725964193329, "learning_rate": 4.690567418084487e-06, "loss": 0.0407, "step": 43580 }, { "epoch": 0.1818602865702531, "grad_norm": 0.840018547007949, "learning_rate": 4.690298357078587e-06, "loss": 0.033, "step": 43585 }, { "epoch": 0.18188114928524338, "grad_norm": 0.7209564989936622, "learning_rate": 4.690029342369126e-06, "loss": 0.0323, "step": 43590 }, { "epoch": 0.18190201200023368, "grad_norm": 0.5696567302705919, "learning_rate": 4.689760373942827e-06, "loss": 0.0297, "step": 43595 }, { "epoch": 0.18192287471522395, "grad_norm": 0.45954139032736363, "learning_rate": 4.68949145178642e-06, "loss": 0.0342, "step": 43600 }, { "epoch": 0.18194373743021422, "grad_norm": 1.2885939711984116, "learning_rate": 4.68922257588664e-06, "loss": 0.0321, "step": 43605 }, { "epoch": 0.1819646001452045, "grad_norm": 1.2052944583765226, "learning_rate": 4.6889537462302295e-06, "loss": 0.0289, "step": 43610 }, { "epoch": 0.18198546286019476, "grad_norm": 0.7773647986497969, "learning_rate": 4.688684962803933e-06, "loss": 0.0381, "step": 43615 }, { "epoch": 0.18200632557518506, "grad_norm": 0.585002388131144, "learning_rate": 4.6884162255945025e-06, "loss": 0.0366, "step": 43620 }, { "epoch": 0.18202718829017533, "grad_norm": 0.7853007296198318, "learning_rate": 4.6881475345886954e-06, "loss": 0.0442, "step": 43625 }, { "epoch": 0.1820480510051656, "grad_norm": 0.9089989778648084, "learning_rate": 4.687878889773271e-06, "loss": 0.0413, "step": 43630 }, { "epoch": 0.18206891372015588, "grad_norm": 1.3832325230419718, "learning_rate": 4.687610291134999e-06, "loss": 0.0355, "step": 43635 }, { "epoch": 0.18208977643514618, "grad_norm": 0.8965502090613795, "learning_rate": 4.687341738660651e-06, "loss": 0.0423, "step": 43640 }, { "epoch": 0.18211063915013645, "grad_norm": 1.5294877101894, "learning_rate": 4.687073232337006e-06, "loss": 0.0438, "step": 43645 }, { "epoch": 0.18213150186512672, "grad_norm": 0.8990251860247712, "learning_rate": 4.686804772150848e-06, "loss": 0.0308, "step": 43650 }, { "epoch": 0.182152364580117, "grad_norm": 1.2900651963419452, "learning_rate": 4.686536358088961e-06, "loss": 0.0413, "step": 43655 }, { "epoch": 0.18217322729510727, "grad_norm": 0.9534954385583524, "learning_rate": 4.686267990138144e-06, "loss": 0.0278, "step": 43660 }, { "epoch": 0.18219409001009756, "grad_norm": 0.7071405451295091, "learning_rate": 4.6859996682851915e-06, "loss": 0.0413, "step": 43665 }, { "epoch": 0.18221495272508784, "grad_norm": 1.0015589442835349, "learning_rate": 4.685731392516912e-06, "loss": 0.0322, "step": 43670 }, { "epoch": 0.1822358154400781, "grad_norm": 0.9333087362612876, "learning_rate": 4.685463162820113e-06, "loss": 0.0449, "step": 43675 }, { "epoch": 0.18225667815506838, "grad_norm": 0.6765752206606691, "learning_rate": 4.68519497918161e-06, "loss": 0.0358, "step": 43680 }, { "epoch": 0.18227754087005868, "grad_norm": 0.6324640039626264, "learning_rate": 4.684926841588221e-06, "loss": 0.0329, "step": 43685 }, { "epoch": 0.18229840358504895, "grad_norm": 0.9349968062544838, "learning_rate": 4.684658750026776e-06, "loss": 0.0423, "step": 43690 }, { "epoch": 0.18231926630003922, "grad_norm": 0.7835616041538945, "learning_rate": 4.684390704484102e-06, "loss": 0.0306, "step": 43695 }, { "epoch": 0.1823401290150295, "grad_norm": 1.0518054602049094, "learning_rate": 4.684122704947037e-06, "loss": 0.0364, "step": 43700 }, { "epoch": 0.18236099173001977, "grad_norm": 0.768146777756695, "learning_rate": 4.683854751402421e-06, "loss": 0.0405, "step": 43705 }, { "epoch": 0.18238185444501007, "grad_norm": 0.6975016105559392, "learning_rate": 4.6835868438371025e-06, "loss": 0.0353, "step": 43710 }, { "epoch": 0.18240271716000034, "grad_norm": 0.5688850344843871, "learning_rate": 4.683318982237931e-06, "loss": 0.036, "step": 43715 }, { "epoch": 0.1824235798749906, "grad_norm": 1.030783380391923, "learning_rate": 4.683051166591765e-06, "loss": 0.0357, "step": 43720 }, { "epoch": 0.18244444258998088, "grad_norm": 1.0974520323683292, "learning_rate": 4.682783396885467e-06, "loss": 0.0367, "step": 43725 }, { "epoch": 0.18246530530497118, "grad_norm": 0.8684580605150274, "learning_rate": 4.682515673105904e-06, "loss": 0.0282, "step": 43730 }, { "epoch": 0.18248616801996145, "grad_norm": 1.4261412694769084, "learning_rate": 4.682247995239949e-06, "loss": 0.0381, "step": 43735 }, { "epoch": 0.18250703073495173, "grad_norm": 0.8372221011620958, "learning_rate": 4.681980363274481e-06, "loss": 0.0252, "step": 43740 }, { "epoch": 0.182527893449942, "grad_norm": 0.881365260475684, "learning_rate": 4.681712777196383e-06, "loss": 0.0315, "step": 43745 }, { "epoch": 0.18254875616493227, "grad_norm": 0.5821619068217553, "learning_rate": 4.681445236992542e-06, "loss": 0.0368, "step": 43750 }, { "epoch": 0.18256961887992257, "grad_norm": 1.0087562347994734, "learning_rate": 4.681177742649855e-06, "loss": 0.0343, "step": 43755 }, { "epoch": 0.18259048159491284, "grad_norm": 0.842576239731945, "learning_rate": 4.680910294155218e-06, "loss": 0.0382, "step": 43760 }, { "epoch": 0.1826113443099031, "grad_norm": 0.6692865471935721, "learning_rate": 4.680642891495538e-06, "loss": 0.0294, "step": 43765 }, { "epoch": 0.18263220702489338, "grad_norm": 0.660765557873548, "learning_rate": 4.680375534657723e-06, "loss": 0.0398, "step": 43770 }, { "epoch": 0.18265306973988368, "grad_norm": 1.123309488973141, "learning_rate": 4.6801082236286875e-06, "loss": 0.0306, "step": 43775 }, { "epoch": 0.18267393245487396, "grad_norm": 0.7940581172564464, "learning_rate": 4.679840958395354e-06, "loss": 0.0347, "step": 43780 }, { "epoch": 0.18269479516986423, "grad_norm": 0.9784329415823925, "learning_rate": 4.679573738944645e-06, "loss": 0.0422, "step": 43785 }, { "epoch": 0.1827156578848545, "grad_norm": 0.7030362271637556, "learning_rate": 4.679306565263492e-06, "loss": 0.0288, "step": 43790 }, { "epoch": 0.18273652059984477, "grad_norm": 1.235045238896839, "learning_rate": 4.679039437338831e-06, "loss": 0.0388, "step": 43795 }, { "epoch": 0.18275738331483507, "grad_norm": 0.7064799726760266, "learning_rate": 4.678772355157603e-06, "loss": 0.0518, "step": 43800 }, { "epoch": 0.18277824602982534, "grad_norm": 0.4719749450111301, "learning_rate": 4.678505318706754e-06, "loss": 0.0467, "step": 43805 }, { "epoch": 0.1827991087448156, "grad_norm": 1.1850300032758239, "learning_rate": 4.678238327973235e-06, "loss": 0.0367, "step": 43810 }, { "epoch": 0.18281997145980589, "grad_norm": 0.9910776131338411, "learning_rate": 4.6779713829440035e-06, "loss": 0.0318, "step": 43815 }, { "epoch": 0.18284083417479616, "grad_norm": 1.0982543489554735, "learning_rate": 4.677704483606022e-06, "loss": 0.0383, "step": 43820 }, { "epoch": 0.18286169688978646, "grad_norm": 0.5947686390550456, "learning_rate": 4.6774376299462556e-06, "loss": 0.0302, "step": 43825 }, { "epoch": 0.18288255960477673, "grad_norm": 2.9187260249657987, "learning_rate": 4.677170821951677e-06, "loss": 0.0581, "step": 43830 }, { "epoch": 0.182903422319767, "grad_norm": 1.100424094853187, "learning_rate": 4.6769040596092645e-06, "loss": 0.0455, "step": 43835 }, { "epoch": 0.18292428503475727, "grad_norm": 0.8520604559813453, "learning_rate": 4.676637342906e-06, "loss": 0.0264, "step": 43840 }, { "epoch": 0.18294514774974757, "grad_norm": 0.8427311029413519, "learning_rate": 4.6763706718288715e-06, "loss": 0.0371, "step": 43845 }, { "epoch": 0.18296601046473784, "grad_norm": 0.9140912596950148, "learning_rate": 4.676104046364872e-06, "loss": 0.0264, "step": 43850 }, { "epoch": 0.18298687317972812, "grad_norm": 1.2923052721717583, "learning_rate": 4.675837466501e-06, "loss": 0.0331, "step": 43855 }, { "epoch": 0.1830077358947184, "grad_norm": 0.9674951132284896, "learning_rate": 4.675570932224259e-06, "loss": 0.0337, "step": 43860 }, { "epoch": 0.18302859860970866, "grad_norm": 0.5961557677780851, "learning_rate": 4.675304443521657e-06, "loss": 0.0408, "step": 43865 }, { "epoch": 0.18304946132469896, "grad_norm": 1.430752099822346, "learning_rate": 4.6750380003802084e-06, "loss": 0.0324, "step": 43870 }, { "epoch": 0.18307032403968923, "grad_norm": 1.1154864330578156, "learning_rate": 4.6747716027869315e-06, "loss": 0.04, "step": 43875 }, { "epoch": 0.1830911867546795, "grad_norm": 0.9589707078537205, "learning_rate": 4.674505250728851e-06, "loss": 0.0427, "step": 43880 }, { "epoch": 0.18311204946966977, "grad_norm": 1.1019021492091332, "learning_rate": 4.674238944192995e-06, "loss": 0.0344, "step": 43885 }, { "epoch": 0.18313291218466007, "grad_norm": 0.8451094418801628, "learning_rate": 4.6739726831664e-06, "loss": 0.0428, "step": 43890 }, { "epoch": 0.18315377489965035, "grad_norm": 0.9996048614032513, "learning_rate": 4.6737064676361045e-06, "loss": 0.0253, "step": 43895 }, { "epoch": 0.18317463761464062, "grad_norm": 0.8898100711686949, "learning_rate": 4.673440297589152e-06, "loss": 0.0408, "step": 43900 }, { "epoch": 0.1831955003296309, "grad_norm": 0.8680966160596556, "learning_rate": 4.673174173012596e-06, "loss": 0.0345, "step": 43905 }, { "epoch": 0.18321636304462116, "grad_norm": 1.9886984551977451, "learning_rate": 4.672908093893488e-06, "loss": 0.0621, "step": 43910 }, { "epoch": 0.18323722575961146, "grad_norm": 0.8332352284049827, "learning_rate": 4.672642060218889e-06, "loss": 0.0331, "step": 43915 }, { "epoch": 0.18325808847460173, "grad_norm": 0.8272802254196493, "learning_rate": 4.672376071975868e-06, "loss": 0.0344, "step": 43920 }, { "epoch": 0.183278951189592, "grad_norm": 1.0103045500832997, "learning_rate": 4.6721101291514905e-06, "loss": 0.0421, "step": 43925 }, { "epoch": 0.18329981390458228, "grad_norm": 0.6783161746520424, "learning_rate": 4.671844231732835e-06, "loss": 0.0398, "step": 43930 }, { "epoch": 0.18332067661957258, "grad_norm": 1.2335039339206735, "learning_rate": 4.671578379706982e-06, "loss": 0.0372, "step": 43935 }, { "epoch": 0.18334153933456285, "grad_norm": 0.9085846584328642, "learning_rate": 4.671312573061018e-06, "loss": 0.0315, "step": 43940 }, { "epoch": 0.18336240204955312, "grad_norm": 1.1925682746626811, "learning_rate": 4.671046811782034e-06, "loss": 0.0295, "step": 43945 }, { "epoch": 0.1833832647645434, "grad_norm": 0.5266813049611138, "learning_rate": 4.6707810958571245e-06, "loss": 0.0334, "step": 43950 }, { "epoch": 0.18340412747953366, "grad_norm": 0.5304504923071759, "learning_rate": 4.670515425273393e-06, "loss": 0.0317, "step": 43955 }, { "epoch": 0.18342499019452396, "grad_norm": 0.7106454573987886, "learning_rate": 4.670249800017946e-06, "loss": 0.0254, "step": 43960 }, { "epoch": 0.18344585290951423, "grad_norm": 0.7531388153653632, "learning_rate": 4.669984220077895e-06, "loss": 0.0453, "step": 43965 }, { "epoch": 0.1834667156245045, "grad_norm": 1.2248388542259934, "learning_rate": 4.669718685440357e-06, "loss": 0.0462, "step": 43970 }, { "epoch": 0.18348757833949478, "grad_norm": 0.8693238433138372, "learning_rate": 4.669453196092453e-06, "loss": 0.0409, "step": 43975 }, { "epoch": 0.18350844105448508, "grad_norm": 0.8825874280958776, "learning_rate": 4.669187752021311e-06, "loss": 0.0337, "step": 43980 }, { "epoch": 0.18352930376947535, "grad_norm": 1.137105575189914, "learning_rate": 4.668922353214063e-06, "loss": 0.0316, "step": 43985 }, { "epoch": 0.18355016648446562, "grad_norm": 0.9478492315809287, "learning_rate": 4.668656999657846e-06, "loss": 0.0412, "step": 43990 }, { "epoch": 0.1835710291994559, "grad_norm": 0.4724773908393064, "learning_rate": 4.668391691339804e-06, "loss": 0.0377, "step": 43995 }, { "epoch": 0.18359189191444616, "grad_norm": 0.7824035199325464, "learning_rate": 4.668126428247082e-06, "loss": 0.0321, "step": 44000 }, { "epoch": 0.18361275462943646, "grad_norm": 0.7980978138777429, "learning_rate": 4.667861210366837e-06, "loss": 0.0338, "step": 44005 }, { "epoch": 0.18363361734442674, "grad_norm": 0.8490265222524185, "learning_rate": 4.667596037686222e-06, "loss": 0.0489, "step": 44010 }, { "epoch": 0.183654480059417, "grad_norm": 0.9749730890820938, "learning_rate": 4.667330910192402e-06, "loss": 0.0348, "step": 44015 }, { "epoch": 0.18367534277440728, "grad_norm": 0.7039474719270938, "learning_rate": 4.667065827872544e-06, "loss": 0.0427, "step": 44020 }, { "epoch": 0.18369620548939758, "grad_norm": 0.9845365415888399, "learning_rate": 4.666800790713823e-06, "loss": 0.0314, "step": 44025 }, { "epoch": 0.18371706820438785, "grad_norm": 1.0440804518696802, "learning_rate": 4.666535798703416e-06, "loss": 0.0316, "step": 44030 }, { "epoch": 0.18373793091937812, "grad_norm": 0.6088796563970788, "learning_rate": 4.666270851828507e-06, "loss": 0.0391, "step": 44035 }, { "epoch": 0.1837587936343684, "grad_norm": 0.7749823255620499, "learning_rate": 4.6660059500762834e-06, "loss": 0.0405, "step": 44040 }, { "epoch": 0.18377965634935867, "grad_norm": 1.1619135055699386, "learning_rate": 4.66574109343394e-06, "loss": 0.0458, "step": 44045 }, { "epoch": 0.18380051906434897, "grad_norm": 1.235830002835478, "learning_rate": 4.665476281888674e-06, "loss": 0.0471, "step": 44050 }, { "epoch": 0.18382138177933924, "grad_norm": 1.1486199007437683, "learning_rate": 4.665211515427689e-06, "loss": 0.0434, "step": 44055 }, { "epoch": 0.1838422444943295, "grad_norm": 0.6907741959888237, "learning_rate": 4.664946794038196e-06, "loss": 0.0274, "step": 44060 }, { "epoch": 0.18386310720931978, "grad_norm": 0.5840493925172264, "learning_rate": 4.664682117707407e-06, "loss": 0.0389, "step": 44065 }, { "epoch": 0.18388396992431008, "grad_norm": 0.6716354020397298, "learning_rate": 4.664417486422541e-06, "loss": 0.0327, "step": 44070 }, { "epoch": 0.18390483263930035, "grad_norm": 0.5578150542807088, "learning_rate": 4.664152900170823e-06, "loss": 0.0333, "step": 44075 }, { "epoch": 0.18392569535429062, "grad_norm": 1.1114859831850898, "learning_rate": 4.6638883589394805e-06, "loss": 0.0494, "step": 44080 }, { "epoch": 0.1839465580692809, "grad_norm": 0.9975464490821334, "learning_rate": 4.663623862715748e-06, "loss": 0.0363, "step": 44085 }, { "epoch": 0.18396742078427117, "grad_norm": 0.9484906459581254, "learning_rate": 4.663359411486866e-06, "loss": 0.0443, "step": 44090 }, { "epoch": 0.18398828349926147, "grad_norm": 0.5355875916656048, "learning_rate": 4.663095005240077e-06, "loss": 0.0313, "step": 44095 }, { "epoch": 0.18400914621425174, "grad_norm": 1.0519651763271685, "learning_rate": 4.662830643962632e-06, "loss": 0.0359, "step": 44100 }, { "epoch": 0.184030008929242, "grad_norm": 0.7119965176904363, "learning_rate": 4.6625663276417846e-06, "loss": 0.0356, "step": 44105 }, { "epoch": 0.18405087164423228, "grad_norm": 0.8343236578613935, "learning_rate": 4.662302056264793e-06, "loss": 0.0387, "step": 44110 }, { "epoch": 0.18407173435922258, "grad_norm": 0.6108268984037488, "learning_rate": 4.662037829818924e-06, "loss": 0.0286, "step": 44115 }, { "epoch": 0.18409259707421285, "grad_norm": 0.9660432501266255, "learning_rate": 4.6617736482914455e-06, "loss": 0.0361, "step": 44120 }, { "epoch": 0.18411345978920313, "grad_norm": 0.5426091113147808, "learning_rate": 4.6615095116696325e-06, "loss": 0.0249, "step": 44125 }, { "epoch": 0.1841343225041934, "grad_norm": 1.1438626376748182, "learning_rate": 4.661245419940766e-06, "loss": 0.0412, "step": 44130 }, { "epoch": 0.18415518521918367, "grad_norm": 0.9444337802729666, "learning_rate": 4.660981373092128e-06, "loss": 0.0423, "step": 44135 }, { "epoch": 0.18417604793417397, "grad_norm": 1.3772664219034423, "learning_rate": 4.66071737111101e-06, "loss": 0.0449, "step": 44140 }, { "epoch": 0.18419691064916424, "grad_norm": 0.6397061894874724, "learning_rate": 4.660453413984707e-06, "loss": 0.0388, "step": 44145 }, { "epoch": 0.1842177733641545, "grad_norm": 0.731064985438869, "learning_rate": 4.660189501700518e-06, "loss": 0.0459, "step": 44150 }, { "epoch": 0.18423863607914479, "grad_norm": 0.4348600915793774, "learning_rate": 4.6599256342457485e-06, "loss": 0.0294, "step": 44155 }, { "epoch": 0.18425949879413508, "grad_norm": 1.148746543594941, "learning_rate": 4.659661811607708e-06, "loss": 0.047, "step": 44160 }, { "epoch": 0.18428036150912536, "grad_norm": 1.048260214991867, "learning_rate": 4.65939803377371e-06, "loss": 0.0283, "step": 44165 }, { "epoch": 0.18430122422411563, "grad_norm": 1.0164856475794752, "learning_rate": 4.659134300731077e-06, "loss": 0.0315, "step": 44170 }, { "epoch": 0.1843220869391059, "grad_norm": 2.5389216651637545, "learning_rate": 4.658870612467133e-06, "loss": 0.0587, "step": 44175 }, { "epoch": 0.18434294965409617, "grad_norm": 1.1413040529364755, "learning_rate": 4.658606968969207e-06, "loss": 0.044, "step": 44180 }, { "epoch": 0.18436381236908647, "grad_norm": 0.5794783659243508, "learning_rate": 4.658343370224635e-06, "loss": 0.0396, "step": 44185 }, { "epoch": 0.18438467508407674, "grad_norm": 0.7603491328547591, "learning_rate": 4.658079816220757e-06, "loss": 0.0383, "step": 44190 }, { "epoch": 0.18440553779906702, "grad_norm": 0.7805164486678365, "learning_rate": 4.657816306944917e-06, "loss": 0.0359, "step": 44195 }, { "epoch": 0.1844264005140573, "grad_norm": 0.5231220687292969, "learning_rate": 4.657552842384465e-06, "loss": 0.0353, "step": 44200 }, { "epoch": 0.1844472632290476, "grad_norm": 1.180062191471027, "learning_rate": 4.657289422526758e-06, "loss": 0.0413, "step": 44205 }, { "epoch": 0.18446812594403786, "grad_norm": 1.246478984142749, "learning_rate": 4.6570260473591544e-06, "loss": 0.0417, "step": 44210 }, { "epoch": 0.18448898865902813, "grad_norm": 0.9044279256750951, "learning_rate": 4.65676271686902e-06, "loss": 0.0397, "step": 44215 }, { "epoch": 0.1845098513740184, "grad_norm": 0.5240096342201979, "learning_rate": 4.656499431043724e-06, "loss": 0.0329, "step": 44220 }, { "epoch": 0.18453071408900867, "grad_norm": 0.8466472709598287, "learning_rate": 4.656236189870642e-06, "loss": 0.0375, "step": 44225 }, { "epoch": 0.18455157680399897, "grad_norm": 1.0262057621119873, "learning_rate": 4.655972993337154e-06, "loss": 0.0271, "step": 44230 }, { "epoch": 0.18457243951898925, "grad_norm": 0.9998618431685208, "learning_rate": 4.655709841430645e-06, "loss": 0.0463, "step": 44235 }, { "epoch": 0.18459330223397952, "grad_norm": 1.3015268680769763, "learning_rate": 4.655446734138505e-06, "loss": 0.0374, "step": 44240 }, { "epoch": 0.1846141649489698, "grad_norm": 0.7460659818406316, "learning_rate": 4.655183671448128e-06, "loss": 0.0301, "step": 44245 }, { "epoch": 0.1846350276639601, "grad_norm": 0.6494397964513322, "learning_rate": 4.654920653346916e-06, "loss": 0.04, "step": 44250 }, { "epoch": 0.18465589037895036, "grad_norm": 1.1483188158581599, "learning_rate": 4.654657679822273e-06, "loss": 0.0302, "step": 44255 }, { "epoch": 0.18467675309394063, "grad_norm": 1.3312968340111317, "learning_rate": 4.654394750861609e-06, "loss": 0.0338, "step": 44260 }, { "epoch": 0.1846976158089309, "grad_norm": 0.5849745670434425, "learning_rate": 4.654131866452339e-06, "loss": 0.0354, "step": 44265 }, { "epoch": 0.18471847852392118, "grad_norm": 0.9753948896654145, "learning_rate": 4.653869026581882e-06, "loss": 0.0323, "step": 44270 }, { "epoch": 0.18473934123891148, "grad_norm": 0.671625481953636, "learning_rate": 4.653606231237664e-06, "loss": 0.0319, "step": 44275 }, { "epoch": 0.18476020395390175, "grad_norm": 0.784033449515608, "learning_rate": 4.653343480407114e-06, "loss": 0.0404, "step": 44280 }, { "epoch": 0.18478106666889202, "grad_norm": 0.8075370637772492, "learning_rate": 4.653080774077668e-06, "loss": 0.0319, "step": 44285 }, { "epoch": 0.1848019293838823, "grad_norm": 1.0970899210063434, "learning_rate": 4.6528181122367646e-06, "loss": 0.0406, "step": 44290 }, { "epoch": 0.1848227920988726, "grad_norm": 1.6393421179333647, "learning_rate": 4.652555494871849e-06, "loss": 0.0314, "step": 44295 }, { "epoch": 0.18484365481386286, "grad_norm": 1.5877436279951134, "learning_rate": 4.6522929219703715e-06, "loss": 0.0368, "step": 44300 }, { "epoch": 0.18486451752885313, "grad_norm": 0.7869773974719604, "learning_rate": 4.652030393519786e-06, "loss": 0.0422, "step": 44305 }, { "epoch": 0.1848853802438434, "grad_norm": 0.5860233685474692, "learning_rate": 4.651767909507553e-06, "loss": 0.0477, "step": 44310 }, { "epoch": 0.18490624295883368, "grad_norm": 0.6995473486752193, "learning_rate": 4.6515054699211345e-06, "loss": 0.0284, "step": 44315 }, { "epoch": 0.18492710567382398, "grad_norm": 1.139884088175734, "learning_rate": 4.651243074748003e-06, "loss": 0.0344, "step": 44320 }, { "epoch": 0.18494796838881425, "grad_norm": 0.8876209302697553, "learning_rate": 4.650980723975632e-06, "loss": 0.0301, "step": 44325 }, { "epoch": 0.18496883110380452, "grad_norm": 0.9229912818112205, "learning_rate": 4.6507184175915e-06, "loss": 0.038, "step": 44330 }, { "epoch": 0.1849896938187948, "grad_norm": 0.7500843417115128, "learning_rate": 4.650456155583093e-06, "loss": 0.0412, "step": 44335 }, { "epoch": 0.1850105565337851, "grad_norm": 0.6000493744649595, "learning_rate": 4.650193937937899e-06, "loss": 0.0366, "step": 44340 }, { "epoch": 0.18503141924877536, "grad_norm": 0.7155501532392642, "learning_rate": 4.649931764643412e-06, "loss": 0.0374, "step": 44345 }, { "epoch": 0.18505228196376564, "grad_norm": 0.7680909143101698, "learning_rate": 4.649669635687133e-06, "loss": 0.0398, "step": 44350 }, { "epoch": 0.1850731446787559, "grad_norm": 0.6454687954764583, "learning_rate": 4.649407551056564e-06, "loss": 0.0288, "step": 44355 }, { "epoch": 0.18509400739374618, "grad_norm": 6.546423137040281, "learning_rate": 4.649145510739216e-06, "loss": 0.0512, "step": 44360 }, { "epoch": 0.18511487010873648, "grad_norm": 1.3109096688648998, "learning_rate": 4.648883514722601e-06, "loss": 0.0332, "step": 44365 }, { "epoch": 0.18513573282372675, "grad_norm": 1.3427817535882562, "learning_rate": 4.6486215629942396e-06, "loss": 0.0404, "step": 44370 }, { "epoch": 0.18515659553871702, "grad_norm": 0.8731928148289001, "learning_rate": 4.648359655541654e-06, "loss": 0.0428, "step": 44375 }, { "epoch": 0.1851774582537073, "grad_norm": 0.9242407320012583, "learning_rate": 4.648097792352373e-06, "loss": 0.0302, "step": 44380 }, { "epoch": 0.1851983209686976, "grad_norm": 1.0695263971785043, "learning_rate": 4.647835973413932e-06, "loss": 0.0448, "step": 44385 }, { "epoch": 0.18521918368368787, "grad_norm": 1.2145693619938926, "learning_rate": 4.647574198713869e-06, "loss": 0.0331, "step": 44390 }, { "epoch": 0.18524004639867814, "grad_norm": 0.6642571905869871, "learning_rate": 4.647312468239727e-06, "loss": 0.0313, "step": 44395 }, { "epoch": 0.1852609091136684, "grad_norm": 0.7114990877434713, "learning_rate": 4.647050781979053e-06, "loss": 0.0419, "step": 44400 }, { "epoch": 0.18528177182865868, "grad_norm": 0.5927327014118412, "learning_rate": 4.646789139919402e-06, "loss": 0.0365, "step": 44405 }, { "epoch": 0.18530263454364898, "grad_norm": 1.1826229357103477, "learning_rate": 4.646527542048332e-06, "loss": 0.0411, "step": 44410 }, { "epoch": 0.18532349725863925, "grad_norm": 0.9562920343401808, "learning_rate": 4.646265988353406e-06, "loss": 0.0396, "step": 44415 }, { "epoch": 0.18534435997362952, "grad_norm": 0.7260989942948122, "learning_rate": 4.646004478822192e-06, "loss": 0.0424, "step": 44420 }, { "epoch": 0.1853652226886198, "grad_norm": 1.3666799459416399, "learning_rate": 4.645743013442263e-06, "loss": 0.0317, "step": 44425 }, { "epoch": 0.1853860854036101, "grad_norm": 0.5567469423167211, "learning_rate": 4.6454815922011965e-06, "loss": 0.0297, "step": 44430 }, { "epoch": 0.18540694811860037, "grad_norm": 0.678553105946452, "learning_rate": 4.6452202150865746e-06, "loss": 0.0337, "step": 44435 }, { "epoch": 0.18542781083359064, "grad_norm": 0.6784003285971701, "learning_rate": 4.644958882085986e-06, "loss": 0.0362, "step": 44440 }, { "epoch": 0.1854486735485809, "grad_norm": 0.7034187485548684, "learning_rate": 4.644697593187023e-06, "loss": 0.0309, "step": 44445 }, { "epoch": 0.18546953626357118, "grad_norm": 1.0056789527879413, "learning_rate": 4.644436348377282e-06, "loss": 0.0394, "step": 44450 }, { "epoch": 0.18549039897856148, "grad_norm": 0.6623039644315669, "learning_rate": 4.644175147644366e-06, "loss": 0.0346, "step": 44455 }, { "epoch": 0.18551126169355175, "grad_norm": 0.5644673225241793, "learning_rate": 4.643913990975883e-06, "loss": 0.0284, "step": 44460 }, { "epoch": 0.18553212440854203, "grad_norm": 1.253755642422159, "learning_rate": 4.643652878359443e-06, "loss": 0.0487, "step": 44465 }, { "epoch": 0.1855529871235323, "grad_norm": 0.6891241091876789, "learning_rate": 4.643391809782663e-06, "loss": 0.0389, "step": 44470 }, { "epoch": 0.1855738498385226, "grad_norm": 0.8403582882483609, "learning_rate": 4.643130785233167e-06, "loss": 0.0515, "step": 44475 }, { "epoch": 0.18559471255351287, "grad_norm": 1.1246695910416866, "learning_rate": 4.64286980469858e-06, "loss": 0.0377, "step": 44480 }, { "epoch": 0.18561557526850314, "grad_norm": 1.1109029876360392, "learning_rate": 4.642608868166532e-06, "loss": 0.0453, "step": 44485 }, { "epoch": 0.1856364379834934, "grad_norm": 0.9826929971750661, "learning_rate": 4.642347975624662e-06, "loss": 0.0295, "step": 44490 }, { "epoch": 0.18565730069848368, "grad_norm": 0.799846432617119, "learning_rate": 4.64208712706061e-06, "loss": 0.0422, "step": 44495 }, { "epoch": 0.18567816341347398, "grad_norm": 1.6630943928567603, "learning_rate": 4.641826322462023e-06, "loss": 0.0484, "step": 44500 }, { "epoch": 0.18569902612846426, "grad_norm": 1.0548703788527987, "learning_rate": 4.64156556181655e-06, "loss": 0.0429, "step": 44505 }, { "epoch": 0.18571988884345453, "grad_norm": 0.8791814077266064, "learning_rate": 4.641304845111848e-06, "loss": 0.0358, "step": 44510 }, { "epoch": 0.1857407515584448, "grad_norm": 3.4351517453302325, "learning_rate": 4.641044172335578e-06, "loss": 0.0358, "step": 44515 }, { "epoch": 0.1857616142734351, "grad_norm": 0.7810883290491931, "learning_rate": 4.640783543475404e-06, "loss": 0.0283, "step": 44520 }, { "epoch": 0.18578247698842537, "grad_norm": 0.9923508281908117, "learning_rate": 4.640522958518998e-06, "loss": 0.0336, "step": 44525 }, { "epoch": 0.18580333970341564, "grad_norm": 0.8929014768964839, "learning_rate": 4.640262417454034e-06, "loss": 0.0311, "step": 44530 }, { "epoch": 0.18582420241840591, "grad_norm": 0.6865244768863774, "learning_rate": 4.6400019202681925e-06, "loss": 0.0328, "step": 44535 }, { "epoch": 0.1858450651333962, "grad_norm": 0.5182297880371499, "learning_rate": 4.639741466949158e-06, "loss": 0.0302, "step": 44540 }, { "epoch": 0.1858659278483865, "grad_norm": 0.797185725840819, "learning_rate": 4.639481057484622e-06, "loss": 0.0389, "step": 44545 }, { "epoch": 0.18588679056337676, "grad_norm": 2.8617136917537436, "learning_rate": 4.639220691862276e-06, "loss": 0.0471, "step": 44550 }, { "epoch": 0.18590765327836703, "grad_norm": 0.9272202510761011, "learning_rate": 4.638960370069822e-06, "loss": 0.0328, "step": 44555 }, { "epoch": 0.1859285159933573, "grad_norm": 1.2840841591276915, "learning_rate": 4.638700092094962e-06, "loss": 0.0423, "step": 44560 }, { "epoch": 0.1859493787083476, "grad_norm": 0.8368778422459733, "learning_rate": 4.638439857925406e-06, "loss": 0.0395, "step": 44565 }, { "epoch": 0.18597024142333787, "grad_norm": 1.0201559032589962, "learning_rate": 4.638179667548869e-06, "loss": 0.0305, "step": 44570 }, { "epoch": 0.18599110413832814, "grad_norm": 1.3741851902440685, "learning_rate": 4.637919520953068e-06, "loss": 0.0395, "step": 44575 }, { "epoch": 0.18601196685331842, "grad_norm": 0.9794020227122444, "learning_rate": 4.637659418125726e-06, "loss": 0.0358, "step": 44580 }, { "epoch": 0.1860328295683087, "grad_norm": 0.9018442830845619, "learning_rate": 4.637399359054574e-06, "loss": 0.0363, "step": 44585 }, { "epoch": 0.186053692283299, "grad_norm": 0.9117356214735173, "learning_rate": 4.637139343727343e-06, "loss": 0.0303, "step": 44590 }, { "epoch": 0.18607455499828926, "grad_norm": 0.6672760372452915, "learning_rate": 4.636879372131771e-06, "loss": 0.0358, "step": 44595 }, { "epoch": 0.18609541771327953, "grad_norm": 0.7047493898672443, "learning_rate": 4.636619444255601e-06, "loss": 0.0418, "step": 44600 }, { "epoch": 0.1861162804282698, "grad_norm": 0.82235919955641, "learning_rate": 4.636359560086582e-06, "loss": 0.0318, "step": 44605 }, { "epoch": 0.1861371431432601, "grad_norm": 1.1645361056790176, "learning_rate": 4.636099719612465e-06, "loss": 0.0425, "step": 44610 }, { "epoch": 0.18615800585825037, "grad_norm": 5.061014044099924, "learning_rate": 4.635839922821006e-06, "loss": 0.0338, "step": 44615 }, { "epoch": 0.18617886857324065, "grad_norm": 1.1765769229261747, "learning_rate": 4.63558016969997e-06, "loss": 0.0364, "step": 44620 }, { "epoch": 0.18619973128823092, "grad_norm": 1.673627790159876, "learning_rate": 4.63532046023712e-06, "loss": 0.0398, "step": 44625 }, { "epoch": 0.1862205940032212, "grad_norm": 1.1261760111027324, "learning_rate": 4.635060794420231e-06, "loss": 0.043, "step": 44630 }, { "epoch": 0.1862414567182115, "grad_norm": 0.5926873947842736, "learning_rate": 4.634801172237077e-06, "loss": 0.0341, "step": 44635 }, { "epoch": 0.18626231943320176, "grad_norm": 1.1014792355836684, "learning_rate": 4.634541593675441e-06, "loss": 0.0359, "step": 44640 }, { "epoch": 0.18628318214819203, "grad_norm": 0.7084191860254759, "learning_rate": 4.6342820587231075e-06, "loss": 0.0275, "step": 44645 }, { "epoch": 0.1863040448631823, "grad_norm": 0.5672662964827503, "learning_rate": 4.634022567367868e-06, "loss": 0.0298, "step": 44650 }, { "epoch": 0.1863249075781726, "grad_norm": 0.5918470753231313, "learning_rate": 4.633763119597518e-06, "loss": 0.0381, "step": 44655 }, { "epoch": 0.18634577029316288, "grad_norm": 0.7478904084357649, "learning_rate": 4.633503715399858e-06, "loss": 0.0404, "step": 44660 }, { "epoch": 0.18636663300815315, "grad_norm": 1.1623302146377037, "learning_rate": 4.633244354762692e-06, "loss": 0.0399, "step": 44665 }, { "epoch": 0.18638749572314342, "grad_norm": 1.1222594759330453, "learning_rate": 4.632985037673831e-06, "loss": 0.0452, "step": 44670 }, { "epoch": 0.1864083584381337, "grad_norm": 0.8650568952046794, "learning_rate": 4.63272576412109e-06, "loss": 0.0444, "step": 44675 }, { "epoch": 0.186429221153124, "grad_norm": 0.9476553477170843, "learning_rate": 4.632466534092287e-06, "loss": 0.0414, "step": 44680 }, { "epoch": 0.18645008386811426, "grad_norm": 2.0356193785595833, "learning_rate": 4.6322073475752454e-06, "loss": 0.0523, "step": 44685 }, { "epoch": 0.18647094658310454, "grad_norm": 0.9249187212488108, "learning_rate": 4.631948204557797e-06, "loss": 0.033, "step": 44690 }, { "epoch": 0.1864918092980948, "grad_norm": 0.8376012168747886, "learning_rate": 4.631689105027773e-06, "loss": 0.0333, "step": 44695 }, { "epoch": 0.1865126720130851, "grad_norm": 0.7356839967187769, "learning_rate": 4.631430048973014e-06, "loss": 0.0356, "step": 44700 }, { "epoch": 0.18653353472807538, "grad_norm": 0.47556959888207084, "learning_rate": 4.6311710363813615e-06, "loss": 0.0376, "step": 44705 }, { "epoch": 0.18655439744306565, "grad_norm": 0.4686901757855494, "learning_rate": 4.630912067240664e-06, "loss": 0.0294, "step": 44710 }, { "epoch": 0.18657526015805592, "grad_norm": 0.740064452475355, "learning_rate": 4.6306531415387745e-06, "loss": 0.083, "step": 44715 }, { "epoch": 0.1865961228730462, "grad_norm": 1.1499510784416127, "learning_rate": 4.630394259263551e-06, "loss": 0.0396, "step": 44720 }, { "epoch": 0.1866169855880365, "grad_norm": 0.7067747640537049, "learning_rate": 4.630135420402855e-06, "loss": 0.0423, "step": 44725 }, { "epoch": 0.18663784830302677, "grad_norm": 0.8278963267872887, "learning_rate": 4.629876624944554e-06, "loss": 0.0384, "step": 44730 }, { "epoch": 0.18665871101801704, "grad_norm": 0.9658075335283818, "learning_rate": 4.629617872876518e-06, "loss": 0.0342, "step": 44735 }, { "epoch": 0.1866795737330073, "grad_norm": 1.2192054993578891, "learning_rate": 4.629359164186626e-06, "loss": 0.0481, "step": 44740 }, { "epoch": 0.1867004364479976, "grad_norm": 1.284463660878554, "learning_rate": 4.6291004988627575e-06, "loss": 0.0412, "step": 44745 }, { "epoch": 0.18672129916298788, "grad_norm": 1.0768940313003548, "learning_rate": 4.6288418768928e-06, "loss": 0.0366, "step": 44750 }, { "epoch": 0.18674216187797815, "grad_norm": 0.7587048505352956, "learning_rate": 4.628583298264642e-06, "loss": 0.0324, "step": 44755 }, { "epoch": 0.18676302459296842, "grad_norm": 0.5553591361789985, "learning_rate": 4.628324762966182e-06, "loss": 0.0341, "step": 44760 }, { "epoch": 0.1867838873079587, "grad_norm": 1.0205736344625054, "learning_rate": 4.6280662709853175e-06, "loss": 0.0293, "step": 44765 }, { "epoch": 0.186804750022949, "grad_norm": 0.6201026446921168, "learning_rate": 4.6278078223099545e-06, "loss": 0.033, "step": 44770 }, { "epoch": 0.18682561273793927, "grad_norm": 0.8335931922708537, "learning_rate": 4.627549416928002e-06, "loss": 0.0496, "step": 44775 }, { "epoch": 0.18684647545292954, "grad_norm": 1.145422650264776, "learning_rate": 4.6272910548273755e-06, "loss": 0.0357, "step": 44780 }, { "epoch": 0.1868673381679198, "grad_norm": 0.8619670634008595, "learning_rate": 4.627032735995994e-06, "loss": 0.0346, "step": 44785 }, { "epoch": 0.1868882008829101, "grad_norm": 0.9004416332944535, "learning_rate": 4.62677446042178e-06, "loss": 0.0373, "step": 44790 }, { "epoch": 0.18690906359790038, "grad_norm": 0.9364441344792128, "learning_rate": 4.626516228092662e-06, "loss": 0.047, "step": 44795 }, { "epoch": 0.18692992631289065, "grad_norm": 0.49159553396407135, "learning_rate": 4.626258038996575e-06, "loss": 0.0266, "step": 44800 }, { "epoch": 0.18695078902788093, "grad_norm": 1.132341052049304, "learning_rate": 4.625999893121456e-06, "loss": 0.04, "step": 44805 }, { "epoch": 0.1869716517428712, "grad_norm": 0.7752078999036102, "learning_rate": 4.625741790455248e-06, "loss": 0.0296, "step": 44810 }, { "epoch": 0.1869925144578615, "grad_norm": 0.7440171435987496, "learning_rate": 4.6254837309858965e-06, "loss": 0.0368, "step": 44815 }, { "epoch": 0.18701337717285177, "grad_norm": 0.5148364453264068, "learning_rate": 4.625225714701356e-06, "loss": 0.0292, "step": 44820 }, { "epoch": 0.18703423988784204, "grad_norm": 0.8824217929521604, "learning_rate": 4.624967741589583e-06, "loss": 0.037, "step": 44825 }, { "epoch": 0.1870551026028323, "grad_norm": 0.8425467768956142, "learning_rate": 4.624709811638538e-06, "loss": 0.036, "step": 44830 }, { "epoch": 0.1870759653178226, "grad_norm": 1.1932974801963634, "learning_rate": 4.624451924836187e-06, "loss": 0.0455, "step": 44835 }, { "epoch": 0.18709682803281288, "grad_norm": 1.0431988307985223, "learning_rate": 4.624194081170502e-06, "loss": 0.0437, "step": 44840 }, { "epoch": 0.18711769074780316, "grad_norm": 1.2758580862272066, "learning_rate": 4.623936280629458e-06, "loss": 0.047, "step": 44845 }, { "epoch": 0.18713855346279343, "grad_norm": 1.230031181881226, "learning_rate": 4.623678523201034e-06, "loss": 0.0496, "step": 44850 }, { "epoch": 0.1871594161777837, "grad_norm": 1.0317470604650287, "learning_rate": 4.623420808873218e-06, "loss": 0.0331, "step": 44855 }, { "epoch": 0.187180278892774, "grad_norm": 1.158646684677868, "learning_rate": 4.623163137633998e-06, "loss": 0.0322, "step": 44860 }, { "epoch": 0.18720114160776427, "grad_norm": 1.0848243171374716, "learning_rate": 4.622905509471367e-06, "loss": 0.0387, "step": 44865 }, { "epoch": 0.18722200432275454, "grad_norm": 0.9198786723263511, "learning_rate": 4.622647924373325e-06, "loss": 0.028, "step": 44870 }, { "epoch": 0.18724286703774481, "grad_norm": 1.7250041089502723, "learning_rate": 4.622390382327877e-06, "loss": 0.0307, "step": 44875 }, { "epoch": 0.18726372975273511, "grad_norm": 2.016540295473071, "learning_rate": 4.62213288332303e-06, "loss": 0.0318, "step": 44880 }, { "epoch": 0.18728459246772539, "grad_norm": 0.9088756276444742, "learning_rate": 4.621875427346796e-06, "loss": 0.0364, "step": 44885 }, { "epoch": 0.18730545518271566, "grad_norm": 0.7881397054571624, "learning_rate": 4.621618014387195e-06, "loss": 0.0369, "step": 44890 }, { "epoch": 0.18732631789770593, "grad_norm": 0.4468945483447305, "learning_rate": 4.621360644432249e-06, "loss": 0.0309, "step": 44895 }, { "epoch": 0.1873471806126962, "grad_norm": 0.8321963875109288, "learning_rate": 4.621103317469984e-06, "loss": 0.0368, "step": 44900 }, { "epoch": 0.1873680433276865, "grad_norm": 0.7337006332040633, "learning_rate": 4.6208460334884325e-06, "loss": 0.032, "step": 44905 }, { "epoch": 0.18738890604267677, "grad_norm": 1.4083631876737779, "learning_rate": 4.62058879247563e-06, "loss": 0.0364, "step": 44910 }, { "epoch": 0.18740976875766704, "grad_norm": 1.2032738416278883, "learning_rate": 4.620331594419618e-06, "loss": 0.0444, "step": 44915 }, { "epoch": 0.18743063147265732, "grad_norm": 0.7360102228577456, "learning_rate": 4.620074439308443e-06, "loss": 0.0355, "step": 44920 }, { "epoch": 0.18745149418764762, "grad_norm": 1.1471347358354615, "learning_rate": 4.6198173271301535e-06, "loss": 0.0507, "step": 44925 }, { "epoch": 0.1874723569026379, "grad_norm": 0.7904710999128141, "learning_rate": 4.619560257872807e-06, "loss": 0.0338, "step": 44930 }, { "epoch": 0.18749321961762816, "grad_norm": 0.7635491055695021, "learning_rate": 4.619303231524462e-06, "loss": 0.039, "step": 44935 }, { "epoch": 0.18751408233261843, "grad_norm": 0.7216615108251889, "learning_rate": 4.619046248073182e-06, "loss": 0.0304, "step": 44940 }, { "epoch": 0.1875349450476087, "grad_norm": 1.245425849283411, "learning_rate": 4.618789307507037e-06, "loss": 0.0364, "step": 44945 }, { "epoch": 0.187555807762599, "grad_norm": 0.8911204958750794, "learning_rate": 4.6185324098141e-06, "loss": 0.0261, "step": 44950 }, { "epoch": 0.18757667047758927, "grad_norm": 0.8765423151715792, "learning_rate": 4.61827555498245e-06, "loss": 0.0373, "step": 44955 }, { "epoch": 0.18759753319257955, "grad_norm": 1.2925446888434686, "learning_rate": 4.618018743000169e-06, "loss": 0.0419, "step": 44960 }, { "epoch": 0.18761839590756982, "grad_norm": 0.6619971657926869, "learning_rate": 4.617761973855346e-06, "loss": 0.0283, "step": 44965 }, { "epoch": 0.18763925862256012, "grad_norm": 0.737800654066781, "learning_rate": 4.617505247536071e-06, "loss": 0.0281, "step": 44970 }, { "epoch": 0.1876601213375504, "grad_norm": 0.7765802983541047, "learning_rate": 4.617248564030442e-06, "loss": 0.027, "step": 44975 }, { "epoch": 0.18768098405254066, "grad_norm": 0.830809560864979, "learning_rate": 4.6169919233265605e-06, "loss": 0.0317, "step": 44980 }, { "epoch": 0.18770184676753093, "grad_norm": 1.025758224700569, "learning_rate": 4.616735325412533e-06, "loss": 0.0356, "step": 44985 }, { "epoch": 0.1877227094825212, "grad_norm": 0.946263146838089, "learning_rate": 4.61647877027647e-06, "loss": 0.0328, "step": 44990 }, { "epoch": 0.1877435721975115, "grad_norm": 0.8046608845260211, "learning_rate": 4.616222257906486e-06, "loss": 0.0336, "step": 44995 }, { "epoch": 0.18776443491250178, "grad_norm": 1.7140264604309796, "learning_rate": 4.615965788290701e-06, "loss": 0.0397, "step": 45000 }, { "epoch": 0.18778529762749205, "grad_norm": 0.7841313233431734, "learning_rate": 4.615709361417241e-06, "loss": 0.0407, "step": 45005 }, { "epoch": 0.18780616034248232, "grad_norm": 0.8092408903469416, "learning_rate": 4.615452977274234e-06, "loss": 0.032, "step": 45010 }, { "epoch": 0.18782702305747262, "grad_norm": 1.5353596937478318, "learning_rate": 4.615196635849812e-06, "loss": 0.0409, "step": 45015 }, { "epoch": 0.1878478857724629, "grad_norm": 0.49686564622981316, "learning_rate": 4.614940337132117e-06, "loss": 0.0351, "step": 45020 }, { "epoch": 0.18786874848745316, "grad_norm": 0.9573153229200908, "learning_rate": 4.614684081109291e-06, "loss": 0.0275, "step": 45025 }, { "epoch": 0.18788961120244344, "grad_norm": 0.5627081344668545, "learning_rate": 4.61442786776948e-06, "loss": 0.0317, "step": 45030 }, { "epoch": 0.1879104739174337, "grad_norm": 1.023171614412792, "learning_rate": 4.6141716971008375e-06, "loss": 0.033, "step": 45035 }, { "epoch": 0.187931336632424, "grad_norm": 0.9759295857900034, "learning_rate": 4.613915569091519e-06, "loss": 0.0408, "step": 45040 }, { "epoch": 0.18795219934741428, "grad_norm": 0.7258263864164952, "learning_rate": 4.6136594837296885e-06, "loss": 0.0264, "step": 45045 }, { "epoch": 0.18797306206240455, "grad_norm": 0.8657221825885116, "learning_rate": 4.613403441003511e-06, "loss": 0.0312, "step": 45050 }, { "epoch": 0.18799392477739482, "grad_norm": 0.963991386470213, "learning_rate": 4.613147440901156e-06, "loss": 0.0314, "step": 45055 }, { "epoch": 0.18801478749238512, "grad_norm": 0.5262202136976254, "learning_rate": 4.612891483410798e-06, "loss": 0.0343, "step": 45060 }, { "epoch": 0.1880356502073754, "grad_norm": 0.6716984452882694, "learning_rate": 4.61263556852062e-06, "loss": 0.0254, "step": 45065 }, { "epoch": 0.18805651292236567, "grad_norm": 3.2126992728257466, "learning_rate": 4.612379696218804e-06, "loss": 0.0332, "step": 45070 }, { "epoch": 0.18807737563735594, "grad_norm": 0.7072823883860512, "learning_rate": 4.612123866493538e-06, "loss": 0.0303, "step": 45075 }, { "epoch": 0.1880982383523462, "grad_norm": 1.0377960944629667, "learning_rate": 4.611868079333019e-06, "loss": 0.039, "step": 45080 }, { "epoch": 0.1881191010673365, "grad_norm": 0.8100090787127011, "learning_rate": 4.611612334725444e-06, "loss": 0.0292, "step": 45085 }, { "epoch": 0.18813996378232678, "grad_norm": 0.8759857564710278, "learning_rate": 4.611356632659014e-06, "loss": 0.0382, "step": 45090 }, { "epoch": 0.18816082649731705, "grad_norm": 0.747799171733069, "learning_rate": 4.611100973121938e-06, "loss": 0.0382, "step": 45095 }, { "epoch": 0.18818168921230732, "grad_norm": 0.7653973840574673, "learning_rate": 4.610845356102426e-06, "loss": 0.0445, "step": 45100 }, { "epoch": 0.18820255192729762, "grad_norm": 1.8542406513473997, "learning_rate": 4.610589781588697e-06, "loss": 0.0409, "step": 45105 }, { "epoch": 0.1882234146422879, "grad_norm": 1.123322933924099, "learning_rate": 4.6103342495689704e-06, "loss": 0.0343, "step": 45110 }, { "epoch": 0.18824427735727817, "grad_norm": 0.9687442581026916, "learning_rate": 4.6100787600314725e-06, "loss": 0.0414, "step": 45115 }, { "epoch": 0.18826514007226844, "grad_norm": 1.1660575347808186, "learning_rate": 4.609823312964433e-06, "loss": 0.0371, "step": 45120 }, { "epoch": 0.1882860027872587, "grad_norm": 0.7039736914594531, "learning_rate": 4.609567908356086e-06, "loss": 0.0269, "step": 45125 }, { "epoch": 0.188306865502249, "grad_norm": 0.6825165668687616, "learning_rate": 4.609312546194672e-06, "loss": 0.0416, "step": 45130 }, { "epoch": 0.18832772821723928, "grad_norm": 0.940007980525199, "learning_rate": 4.609057226468436e-06, "loss": 0.0449, "step": 45135 }, { "epoch": 0.18834859093222955, "grad_norm": 0.8531718803991281, "learning_rate": 4.608801949165624e-06, "loss": 0.0386, "step": 45140 }, { "epoch": 0.18836945364721983, "grad_norm": 0.7020393542967842, "learning_rate": 4.608546714274489e-06, "loss": 0.0331, "step": 45145 }, { "epoch": 0.18839031636221013, "grad_norm": 1.0107014875509548, "learning_rate": 4.608291521783291e-06, "loss": 0.0479, "step": 45150 }, { "epoch": 0.1884111790772004, "grad_norm": 0.7040663869066834, "learning_rate": 4.608036371680289e-06, "loss": 0.0383, "step": 45155 }, { "epoch": 0.18843204179219067, "grad_norm": 0.9517676857910672, "learning_rate": 4.607781263953752e-06, "loss": 0.0296, "step": 45160 }, { "epoch": 0.18845290450718094, "grad_norm": 0.6830045782661422, "learning_rate": 4.6075261985919506e-06, "loss": 0.0321, "step": 45165 }, { "epoch": 0.1884737672221712, "grad_norm": 0.703383312517609, "learning_rate": 4.60727117558316e-06, "loss": 0.0291, "step": 45170 }, { "epoch": 0.1884946299371615, "grad_norm": 1.2855480911835013, "learning_rate": 4.607016194915661e-06, "loss": 0.0303, "step": 45175 }, { "epoch": 0.18851549265215178, "grad_norm": 0.9000503349994586, "learning_rate": 4.606761256577737e-06, "loss": 0.0452, "step": 45180 }, { "epoch": 0.18853635536714206, "grad_norm": 1.3166546197935796, "learning_rate": 4.60650636055768e-06, "loss": 0.0403, "step": 45185 }, { "epoch": 0.18855721808213233, "grad_norm": 1.2547360510849996, "learning_rate": 4.6062515068437815e-06, "loss": 0.0389, "step": 45190 }, { "epoch": 0.18857808079712263, "grad_norm": 0.355819838355174, "learning_rate": 4.6059966954243415e-06, "loss": 0.041, "step": 45195 }, { "epoch": 0.1885989435121129, "grad_norm": 0.827590575687968, "learning_rate": 4.605741926287661e-06, "loss": 0.0337, "step": 45200 }, { "epoch": 0.18861980622710317, "grad_norm": 1.144896026846622, "learning_rate": 4.6054871994220495e-06, "loss": 0.0433, "step": 45205 }, { "epoch": 0.18864066894209344, "grad_norm": 0.6665452553565203, "learning_rate": 4.605232514815818e-06, "loss": 0.0261, "step": 45210 }, { "epoch": 0.18866153165708371, "grad_norm": 0.8167602136262132, "learning_rate": 4.604977872457283e-06, "loss": 0.0413, "step": 45215 }, { "epoch": 0.188682394372074, "grad_norm": 1.1034849155338275, "learning_rate": 4.604723272334767e-06, "loss": 0.0351, "step": 45220 }, { "epoch": 0.18870325708706429, "grad_norm": 1.1977973645593687, "learning_rate": 4.604468714436593e-06, "loss": 0.0284, "step": 45225 }, { "epoch": 0.18872411980205456, "grad_norm": 0.8033031103020782, "learning_rate": 4.604214198751093e-06, "loss": 0.0384, "step": 45230 }, { "epoch": 0.18874498251704483, "grad_norm": 1.0543987451558248, "learning_rate": 4.603959725266601e-06, "loss": 0.0344, "step": 45235 }, { "epoch": 0.18876584523203513, "grad_norm": 0.5813798701575036, "learning_rate": 4.603705293971455e-06, "loss": 0.0314, "step": 45240 }, { "epoch": 0.1887867079470254, "grad_norm": 1.0976979504068893, "learning_rate": 4.6034509048540005e-06, "loss": 0.0407, "step": 45245 }, { "epoch": 0.18880757066201567, "grad_norm": 0.8195395446584166, "learning_rate": 4.603196557902585e-06, "loss": 0.0373, "step": 45250 }, { "epoch": 0.18882843337700594, "grad_norm": 0.6968303714302606, "learning_rate": 4.602942253105561e-06, "loss": 0.0339, "step": 45255 }, { "epoch": 0.18884929609199622, "grad_norm": 0.6806849416767397, "learning_rate": 4.6026879904512854e-06, "loss": 0.0362, "step": 45260 }, { "epoch": 0.18887015880698652, "grad_norm": 0.904923691791513, "learning_rate": 4.6024337699281204e-06, "loss": 0.0322, "step": 45265 }, { "epoch": 0.1888910215219768, "grad_norm": 1.0213269709414055, "learning_rate": 4.602179591524431e-06, "loss": 0.0334, "step": 45270 }, { "epoch": 0.18891188423696706, "grad_norm": 0.6959390513217464, "learning_rate": 4.601925455228588e-06, "loss": 0.028, "step": 45275 }, { "epoch": 0.18893274695195733, "grad_norm": 0.8168477234583191, "learning_rate": 4.601671361028968e-06, "loss": 0.0307, "step": 45280 }, { "epoch": 0.18895360966694763, "grad_norm": 1.2066648275149099, "learning_rate": 4.6014173089139505e-06, "loss": 0.0366, "step": 45285 }, { "epoch": 0.1889744723819379, "grad_norm": 0.49215674997706976, "learning_rate": 4.601163298871919e-06, "loss": 0.0319, "step": 45290 }, { "epoch": 0.18899533509692817, "grad_norm": 0.8744123116565164, "learning_rate": 4.60090933089126e-06, "loss": 0.039, "step": 45295 }, { "epoch": 0.18901619781191845, "grad_norm": 1.1689686179534162, "learning_rate": 4.6006554049603705e-06, "loss": 0.0395, "step": 45300 }, { "epoch": 0.18903706052690872, "grad_norm": 0.9894668300342147, "learning_rate": 4.600401521067645e-06, "loss": 0.0427, "step": 45305 }, { "epoch": 0.18905792324189902, "grad_norm": 0.9033123995171928, "learning_rate": 4.600147679201486e-06, "loss": 0.0405, "step": 45310 }, { "epoch": 0.1890787859568893, "grad_norm": 0.8728239192255137, "learning_rate": 4.599893879350302e-06, "loss": 0.0482, "step": 45315 }, { "epoch": 0.18909964867187956, "grad_norm": 0.6127563245044615, "learning_rate": 4.5996401215025024e-06, "loss": 0.0412, "step": 45320 }, { "epoch": 0.18912051138686983, "grad_norm": 1.2581281570332208, "learning_rate": 4.599386405646503e-06, "loss": 0.0282, "step": 45325 }, { "epoch": 0.18914137410186013, "grad_norm": 0.9347762169575328, "learning_rate": 4.599132731770724e-06, "loss": 0.0277, "step": 45330 }, { "epoch": 0.1891622368168504, "grad_norm": 1.0635034570506672, "learning_rate": 4.598879099863589e-06, "loss": 0.0374, "step": 45335 }, { "epoch": 0.18918309953184068, "grad_norm": 0.9659919607347274, "learning_rate": 4.598625509913528e-06, "loss": 0.0371, "step": 45340 }, { "epoch": 0.18920396224683095, "grad_norm": 0.7492397506916406, "learning_rate": 4.598371961908973e-06, "loss": 0.029, "step": 45345 }, { "epoch": 0.18922482496182122, "grad_norm": 0.7968623619843919, "learning_rate": 4.598118455838363e-06, "loss": 0.0371, "step": 45350 }, { "epoch": 0.18924568767681152, "grad_norm": 1.2356691192581253, "learning_rate": 4.59786499169014e-06, "loss": 0.0283, "step": 45355 }, { "epoch": 0.1892665503918018, "grad_norm": 0.7949777099751021, "learning_rate": 4.59761156945275e-06, "loss": 0.037, "step": 45360 }, { "epoch": 0.18928741310679206, "grad_norm": 0.8666297395101419, "learning_rate": 4.597358189114645e-06, "loss": 0.0356, "step": 45365 }, { "epoch": 0.18930827582178233, "grad_norm": 1.2951362219677074, "learning_rate": 4.5971048506642805e-06, "loss": 0.0351, "step": 45370 }, { "epoch": 0.18932913853677263, "grad_norm": 0.45185480327954186, "learning_rate": 4.596851554090119e-06, "loss": 0.0395, "step": 45375 }, { "epoch": 0.1893500012517629, "grad_norm": 1.3407718233358654, "learning_rate": 4.596598299380619e-06, "loss": 0.0385, "step": 45380 }, { "epoch": 0.18937086396675318, "grad_norm": 0.8701620809076565, "learning_rate": 4.596345086524255e-06, "loss": 0.042, "step": 45385 }, { "epoch": 0.18939172668174345, "grad_norm": 0.8252119082308633, "learning_rate": 4.596091915509498e-06, "loss": 0.0419, "step": 45390 }, { "epoch": 0.18941258939673372, "grad_norm": 0.8234440809275347, "learning_rate": 4.595838786324827e-06, "loss": 0.0298, "step": 45395 }, { "epoch": 0.18943345211172402, "grad_norm": 0.5118115172786617, "learning_rate": 4.595585698958722e-06, "loss": 0.0309, "step": 45400 }, { "epoch": 0.1894543148267143, "grad_norm": 1.3388400064611465, "learning_rate": 4.595332653399675e-06, "loss": 0.034, "step": 45405 }, { "epoch": 0.18947517754170456, "grad_norm": 0.8744720788555366, "learning_rate": 4.595079649636171e-06, "loss": 0.0419, "step": 45410 }, { "epoch": 0.18949604025669484, "grad_norm": 0.6262530351026103, "learning_rate": 4.594826687656709e-06, "loss": 0.032, "step": 45415 }, { "epoch": 0.18951690297168514, "grad_norm": 1.055491974124529, "learning_rate": 4.594573767449789e-06, "loss": 0.0336, "step": 45420 }, { "epoch": 0.1895377656866754, "grad_norm": 1.3429741651992058, "learning_rate": 4.594320889003915e-06, "loss": 0.0542, "step": 45425 }, { "epoch": 0.18955862840166568, "grad_norm": 1.139900934222946, "learning_rate": 4.594068052307596e-06, "loss": 0.0381, "step": 45430 }, { "epoch": 0.18957949111665595, "grad_norm": 0.9996290223806464, "learning_rate": 4.593815257349346e-06, "loss": 0.0313, "step": 45435 }, { "epoch": 0.18960035383164622, "grad_norm": 0.9023080123398903, "learning_rate": 4.5935625041176816e-06, "loss": 0.0768, "step": 45440 }, { "epoch": 0.18962121654663652, "grad_norm": 0.7984309271042168, "learning_rate": 4.593309792601125e-06, "loss": 0.0347, "step": 45445 }, { "epoch": 0.1896420792616268, "grad_norm": 0.7984429973407414, "learning_rate": 4.593057122788204e-06, "loss": 0.0292, "step": 45450 }, { "epoch": 0.18966294197661707, "grad_norm": 1.2213826035983433, "learning_rate": 4.592804494667448e-06, "loss": 0.0323, "step": 45455 }, { "epoch": 0.18968380469160734, "grad_norm": 1.6767202546492614, "learning_rate": 4.592551908227395e-06, "loss": 0.0332, "step": 45460 }, { "epoch": 0.18970466740659764, "grad_norm": 0.4044371840465506, "learning_rate": 4.592299363456583e-06, "loss": 0.0339, "step": 45465 }, { "epoch": 0.1897255301215879, "grad_norm": 0.9847923594007533, "learning_rate": 4.592046860343556e-06, "loss": 0.0358, "step": 45470 }, { "epoch": 0.18974639283657818, "grad_norm": 0.7188427229995183, "learning_rate": 4.591794398876864e-06, "loss": 0.0291, "step": 45475 }, { "epoch": 0.18976725555156845, "grad_norm": 0.7714742704810543, "learning_rate": 4.5915419790450595e-06, "loss": 0.0336, "step": 45480 }, { "epoch": 0.18978811826655873, "grad_norm": 1.2715317611733519, "learning_rate": 4.591289600836698e-06, "loss": 0.0517, "step": 45485 }, { "epoch": 0.18980898098154902, "grad_norm": 0.7968898480649836, "learning_rate": 4.591037264240346e-06, "loss": 0.0323, "step": 45490 }, { "epoch": 0.1898298436965393, "grad_norm": 0.7995113668688164, "learning_rate": 4.5907849692445665e-06, "loss": 0.0347, "step": 45495 }, { "epoch": 0.18985070641152957, "grad_norm": 1.3197271381213873, "learning_rate": 4.59053271583793e-06, "loss": 0.0419, "step": 45500 }, { "epoch": 0.18987156912651984, "grad_norm": 1.052870357143129, "learning_rate": 4.590280504009013e-06, "loss": 0.0347, "step": 45505 }, { "epoch": 0.18989243184151014, "grad_norm": 1.1518054086449152, "learning_rate": 4.590028333746394e-06, "loss": 0.0412, "step": 45510 }, { "epoch": 0.1899132945565004, "grad_norm": 0.515582862945991, "learning_rate": 4.589776205038657e-06, "loss": 0.0304, "step": 45515 }, { "epoch": 0.18993415727149068, "grad_norm": 1.8250123445082396, "learning_rate": 4.589524117874392e-06, "loss": 0.0389, "step": 45520 }, { "epoch": 0.18995501998648096, "grad_norm": 1.476293231961299, "learning_rate": 4.58927207224219e-06, "loss": 0.0435, "step": 45525 }, { "epoch": 0.18997588270147123, "grad_norm": 2.0617458174503964, "learning_rate": 4.589020068130649e-06, "loss": 0.0569, "step": 45530 }, { "epoch": 0.18999674541646153, "grad_norm": 0.8432258436249789, "learning_rate": 4.588768105528368e-06, "loss": 0.0252, "step": 45535 }, { "epoch": 0.1900176081314518, "grad_norm": 1.8800197165389396, "learning_rate": 4.588516184423955e-06, "loss": 0.0445, "step": 45540 }, { "epoch": 0.19003847084644207, "grad_norm": 0.9161659229425998, "learning_rate": 4.58826430480602e-06, "loss": 0.0371, "step": 45545 }, { "epoch": 0.19005933356143234, "grad_norm": 1.3325163503829627, "learning_rate": 4.588012466663178e-06, "loss": 0.0416, "step": 45550 }, { "epoch": 0.19008019627642264, "grad_norm": 0.45530805639940447, "learning_rate": 4.587760669984046e-06, "loss": 0.0312, "step": 45555 }, { "epoch": 0.1901010589914129, "grad_norm": 1.1179819636323378, "learning_rate": 4.58750891475725e-06, "loss": 0.0491, "step": 45560 }, { "epoch": 0.19012192170640319, "grad_norm": 0.6753693586082634, "learning_rate": 4.587257200971415e-06, "loss": 0.0374, "step": 45565 }, { "epoch": 0.19014278442139346, "grad_norm": 1.1519295537540901, "learning_rate": 4.587005528615176e-06, "loss": 0.0331, "step": 45570 }, { "epoch": 0.19016364713638373, "grad_norm": 0.921421933462531, "learning_rate": 4.586753897677166e-06, "loss": 0.0324, "step": 45575 }, { "epoch": 0.19018450985137403, "grad_norm": 1.7283626717093608, "learning_rate": 4.586502308146028e-06, "loss": 0.0328, "step": 45580 }, { "epoch": 0.1902053725663643, "grad_norm": 1.5477020492182647, "learning_rate": 4.586250760010407e-06, "loss": 0.0286, "step": 45585 }, { "epoch": 0.19022623528135457, "grad_norm": 0.8340716301739753, "learning_rate": 4.585999253258953e-06, "loss": 0.0335, "step": 45590 }, { "epoch": 0.19024709799634484, "grad_norm": 0.9522783871714664, "learning_rate": 4.585747787880316e-06, "loss": 0.0264, "step": 45595 }, { "epoch": 0.19026796071133514, "grad_norm": 0.612794425988705, "learning_rate": 4.58549636386316e-06, "loss": 0.0359, "step": 45600 }, { "epoch": 0.19028882342632542, "grad_norm": 1.1437835573815684, "learning_rate": 4.585244981196144e-06, "loss": 0.0455, "step": 45605 }, { "epoch": 0.1903096861413157, "grad_norm": 0.6608227219386706, "learning_rate": 4.584993639867935e-06, "loss": 0.035, "step": 45610 }, { "epoch": 0.19033054885630596, "grad_norm": 1.0499080427461094, "learning_rate": 4.584742339867206e-06, "loss": 0.031, "step": 45615 }, { "epoch": 0.19035141157129623, "grad_norm": 0.837552139443547, "learning_rate": 4.584491081182631e-06, "loss": 0.0299, "step": 45620 }, { "epoch": 0.19037227428628653, "grad_norm": 1.1459175730544573, "learning_rate": 4.584239863802891e-06, "loss": 0.036, "step": 45625 }, { "epoch": 0.1903931370012768, "grad_norm": 0.9509503255116201, "learning_rate": 4.5839886877166684e-06, "loss": 0.0317, "step": 45630 }, { "epoch": 0.19041399971626707, "grad_norm": 0.7871700823568822, "learning_rate": 4.583737552912653e-06, "loss": 0.0338, "step": 45635 }, { "epoch": 0.19043486243125735, "grad_norm": 0.7842017893416559, "learning_rate": 4.58348645937954e-06, "loss": 0.0321, "step": 45640 }, { "epoch": 0.19045572514624765, "grad_norm": 0.6655925152876714, "learning_rate": 4.583235407106024e-06, "loss": 0.0369, "step": 45645 }, { "epoch": 0.19047658786123792, "grad_norm": 1.3188769540888763, "learning_rate": 4.582984396080807e-06, "loss": 0.0333, "step": 45650 }, { "epoch": 0.1904974505762282, "grad_norm": 0.6744309641894511, "learning_rate": 4.582733426292594e-06, "loss": 0.0302, "step": 45655 }, { "epoch": 0.19051831329121846, "grad_norm": 0.7158851428330323, "learning_rate": 4.582482497730098e-06, "loss": 0.048, "step": 45660 }, { "epoch": 0.19053917600620873, "grad_norm": 0.9862360999195782, "learning_rate": 4.582231610382031e-06, "loss": 0.0354, "step": 45665 }, { "epoch": 0.19056003872119903, "grad_norm": 1.3195663574867562, "learning_rate": 4.581980764237115e-06, "loss": 0.0439, "step": 45670 }, { "epoch": 0.1905809014361893, "grad_norm": 0.9125454964412906, "learning_rate": 4.58172995928407e-06, "loss": 0.0464, "step": 45675 }, { "epoch": 0.19060176415117958, "grad_norm": 0.8068074883168015, "learning_rate": 4.581479195511624e-06, "loss": 0.045, "step": 45680 }, { "epoch": 0.19062262686616985, "grad_norm": 0.6569185178179472, "learning_rate": 4.5812284729085125e-06, "loss": 0.04, "step": 45685 }, { "epoch": 0.19064348958116015, "grad_norm": 1.3285398111386075, "learning_rate": 4.580977791463467e-06, "loss": 0.0457, "step": 45690 }, { "epoch": 0.19066435229615042, "grad_norm": 0.7182545803717925, "learning_rate": 4.5807271511652315e-06, "loss": 0.0328, "step": 45695 }, { "epoch": 0.1906852150111407, "grad_norm": 0.5046748610303726, "learning_rate": 4.580476552002549e-06, "loss": 0.0383, "step": 45700 }, { "epoch": 0.19070607772613096, "grad_norm": 0.9144514259436006, "learning_rate": 4.580225993964168e-06, "loss": 0.0326, "step": 45705 }, { "epoch": 0.19072694044112123, "grad_norm": 1.3651498453875213, "learning_rate": 4.579975477038844e-06, "loss": 0.0718, "step": 45710 }, { "epoch": 0.19074780315611153, "grad_norm": 1.4865847464755784, "learning_rate": 4.579725001215335e-06, "loss": 0.0474, "step": 45715 }, { "epoch": 0.1907686658711018, "grad_norm": 1.010579046553038, "learning_rate": 4.5794745664824e-06, "loss": 0.0352, "step": 45720 }, { "epoch": 0.19078952858609208, "grad_norm": 0.7213573422692138, "learning_rate": 4.579224172828809e-06, "loss": 0.0336, "step": 45725 }, { "epoch": 0.19081039130108235, "grad_norm": 0.808585657180413, "learning_rate": 4.57897382024333e-06, "loss": 0.0318, "step": 45730 }, { "epoch": 0.19083125401607265, "grad_norm": 0.799319369317272, "learning_rate": 4.57872350871474e-06, "loss": 0.0282, "step": 45735 }, { "epoch": 0.19085211673106292, "grad_norm": 0.8964418374537024, "learning_rate": 4.578473238231816e-06, "loss": 0.0302, "step": 45740 }, { "epoch": 0.1908729794460532, "grad_norm": 1.3872711207372792, "learning_rate": 4.578223008783343e-06, "loss": 0.0451, "step": 45745 }, { "epoch": 0.19089384216104346, "grad_norm": 1.4763839891950525, "learning_rate": 4.577972820358109e-06, "loss": 0.0433, "step": 45750 }, { "epoch": 0.19091470487603374, "grad_norm": 0.8693572344817004, "learning_rate": 4.5777226729449045e-06, "loss": 0.0426, "step": 45755 }, { "epoch": 0.19093556759102404, "grad_norm": 0.8109223893745193, "learning_rate": 4.577472566532528e-06, "loss": 0.0263, "step": 45760 }, { "epoch": 0.1909564303060143, "grad_norm": 0.7308251122313895, "learning_rate": 4.577222501109781e-06, "loss": 0.0414, "step": 45765 }, { "epoch": 0.19097729302100458, "grad_norm": 1.002212536661611, "learning_rate": 4.576972476665466e-06, "loss": 0.0343, "step": 45770 }, { "epoch": 0.19099815573599485, "grad_norm": 3.8777907735363732, "learning_rate": 4.576722493188393e-06, "loss": 0.0281, "step": 45775 }, { "epoch": 0.19101901845098515, "grad_norm": 1.023013245246756, "learning_rate": 4.576472550667375e-06, "loss": 0.0264, "step": 45780 }, { "epoch": 0.19103988116597542, "grad_norm": 0.8084059954231576, "learning_rate": 4.576222649091231e-06, "loss": 0.029, "step": 45785 }, { "epoch": 0.1910607438809657, "grad_norm": 0.7661300008916344, "learning_rate": 4.575972788448783e-06, "loss": 0.0307, "step": 45790 }, { "epoch": 0.19108160659595597, "grad_norm": 0.9009037589717882, "learning_rate": 4.575722968728857e-06, "loss": 0.0245, "step": 45795 }, { "epoch": 0.19110246931094624, "grad_norm": 0.7542764028237454, "learning_rate": 4.575473189920284e-06, "loss": 0.0306, "step": 45800 }, { "epoch": 0.19112333202593654, "grad_norm": 1.184359820840765, "learning_rate": 4.575223452011898e-06, "loss": 0.0336, "step": 45805 }, { "epoch": 0.1911441947409268, "grad_norm": 0.578672264472448, "learning_rate": 4.574973754992539e-06, "loss": 0.0348, "step": 45810 }, { "epoch": 0.19116505745591708, "grad_norm": 1.0846185392477148, "learning_rate": 4.57472409885105e-06, "loss": 0.0426, "step": 45815 }, { "epoch": 0.19118592017090735, "grad_norm": 0.8500799717970571, "learning_rate": 4.574474483576279e-06, "loss": 0.0343, "step": 45820 }, { "epoch": 0.19120678288589765, "grad_norm": 0.6292395167951036, "learning_rate": 4.574224909157078e-06, "loss": 0.0305, "step": 45825 }, { "epoch": 0.19122764560088792, "grad_norm": 0.7191860892056806, "learning_rate": 4.573975375582303e-06, "loss": 0.0353, "step": 45830 }, { "epoch": 0.1912485083158782, "grad_norm": 0.9921347367278888, "learning_rate": 4.573725882840814e-06, "loss": 0.0351, "step": 45835 }, { "epoch": 0.19126937103086847, "grad_norm": 0.7591098386645452, "learning_rate": 4.573476430921476e-06, "loss": 0.0232, "step": 45840 }, { "epoch": 0.19129023374585874, "grad_norm": 0.5537853337480252, "learning_rate": 4.57322701981316e-06, "loss": 0.0286, "step": 45845 }, { "epoch": 0.19131109646084904, "grad_norm": 0.6929454713894229, "learning_rate": 4.572977649504736e-06, "loss": 0.0365, "step": 45850 }, { "epoch": 0.1913319591758393, "grad_norm": 0.924799631572936, "learning_rate": 4.5727283199850834e-06, "loss": 0.0409, "step": 45855 }, { "epoch": 0.19135282189082958, "grad_norm": 0.7656245007391492, "learning_rate": 4.572479031243083e-06, "loss": 0.0331, "step": 45860 }, { "epoch": 0.19137368460581985, "grad_norm": 0.9233376304659352, "learning_rate": 4.572229783267621e-06, "loss": 0.0278, "step": 45865 }, { "epoch": 0.19139454732081015, "grad_norm": 0.7229774285750749, "learning_rate": 4.571980576047589e-06, "loss": 0.035, "step": 45870 }, { "epoch": 0.19141541003580043, "grad_norm": 0.7805591808402661, "learning_rate": 4.57173140957188e-06, "loss": 0.0337, "step": 45875 }, { "epoch": 0.1914362727507907, "grad_norm": 0.7031903967508922, "learning_rate": 4.571482283829391e-06, "loss": 0.0389, "step": 45880 }, { "epoch": 0.19145713546578097, "grad_norm": 1.2643712503438267, "learning_rate": 4.571233198809029e-06, "loss": 0.0466, "step": 45885 }, { "epoch": 0.19147799818077124, "grad_norm": 0.6968936820687642, "learning_rate": 4.570984154499698e-06, "loss": 0.0334, "step": 45890 }, { "epoch": 0.19149886089576154, "grad_norm": 1.1971533213109153, "learning_rate": 4.5707351508903095e-06, "loss": 0.0419, "step": 45895 }, { "epoch": 0.1915197236107518, "grad_norm": 1.1047490847824002, "learning_rate": 4.570486187969781e-06, "loss": 0.0402, "step": 45900 }, { "epoch": 0.19154058632574208, "grad_norm": 0.9417655784882965, "learning_rate": 4.57023726572703e-06, "loss": 0.0296, "step": 45905 }, { "epoch": 0.19156144904073236, "grad_norm": 0.2453826777741659, "learning_rate": 4.569988384150984e-06, "loss": 0.026, "step": 45910 }, { "epoch": 0.19158231175572266, "grad_norm": 0.7922841432000792, "learning_rate": 4.569739543230567e-06, "loss": 0.0326, "step": 45915 }, { "epoch": 0.19160317447071293, "grad_norm": 0.9417350282733618, "learning_rate": 4.569490742954715e-06, "loss": 0.038, "step": 45920 }, { "epoch": 0.1916240371857032, "grad_norm": 0.7355150095996753, "learning_rate": 4.569241983312362e-06, "loss": 0.0323, "step": 45925 }, { "epoch": 0.19164489990069347, "grad_norm": 0.7201660284430956, "learning_rate": 4.56899326429245e-06, "loss": 0.0281, "step": 45930 }, { "epoch": 0.19166576261568374, "grad_norm": 0.9550982540601499, "learning_rate": 4.568744585883925e-06, "loss": 0.0329, "step": 45935 }, { "epoch": 0.19168662533067404, "grad_norm": 0.7511627350688568, "learning_rate": 4.5684959480757354e-06, "loss": 0.0301, "step": 45940 }, { "epoch": 0.19170748804566431, "grad_norm": 0.7711859613715806, "learning_rate": 4.5682473508568346e-06, "loss": 0.0385, "step": 45945 }, { "epoch": 0.1917283507606546, "grad_norm": 0.7901694230161488, "learning_rate": 4.56799879421618e-06, "loss": 0.0422, "step": 45950 }, { "epoch": 0.19174921347564486, "grad_norm": 0.8951773264131643, "learning_rate": 4.567750278142736e-06, "loss": 0.0298, "step": 45955 }, { "epoch": 0.19177007619063516, "grad_norm": 1.0217942036917904, "learning_rate": 4.567501802625466e-06, "loss": 0.0446, "step": 45960 }, { "epoch": 0.19179093890562543, "grad_norm": 0.8349182415209163, "learning_rate": 4.567253367653342e-06, "loss": 0.0385, "step": 45965 }, { "epoch": 0.1918118016206157, "grad_norm": 1.0129644275783787, "learning_rate": 4.567004973215338e-06, "loss": 0.03, "step": 45970 }, { "epoch": 0.19183266433560597, "grad_norm": 1.4299778150781206, "learning_rate": 4.566756619300432e-06, "loss": 0.0318, "step": 45975 }, { "epoch": 0.19185352705059625, "grad_norm": 1.3475858512230705, "learning_rate": 4.566508305897608e-06, "loss": 0.0423, "step": 45980 }, { "epoch": 0.19187438976558654, "grad_norm": 0.9831753214500262, "learning_rate": 4.566260032995853e-06, "loss": 0.0366, "step": 45985 }, { "epoch": 0.19189525248057682, "grad_norm": 0.5669622656839947, "learning_rate": 4.5660118005841576e-06, "loss": 0.0348, "step": 45990 }, { "epoch": 0.1919161151955671, "grad_norm": 0.5417323236928008, "learning_rate": 4.565763608651518e-06, "loss": 0.0408, "step": 45995 }, { "epoch": 0.19193697791055736, "grad_norm": 0.9706473187014086, "learning_rate": 4.5655154571869345e-06, "loss": 0.0241, "step": 46000 }, { "epoch": 0.19195784062554766, "grad_norm": 0.7187987050593174, "learning_rate": 4.56526734617941e-06, "loss": 0.0362, "step": 46005 }, { "epoch": 0.19197870334053793, "grad_norm": 0.5949182196883639, "learning_rate": 4.565019275617953e-06, "loss": 0.0539, "step": 46010 }, { "epoch": 0.1919995660555282, "grad_norm": 1.6756730029094977, "learning_rate": 4.564771245491576e-06, "loss": 0.0467, "step": 46015 }, { "epoch": 0.19202042877051848, "grad_norm": 0.9097985746084791, "learning_rate": 4.564523255789295e-06, "loss": 0.0316, "step": 46020 }, { "epoch": 0.19204129148550875, "grad_norm": 1.5617633839414204, "learning_rate": 4.56427530650013e-06, "loss": 0.0459, "step": 46025 }, { "epoch": 0.19206215420049905, "grad_norm": 0.7187836571627321, "learning_rate": 4.564027397613108e-06, "loss": 0.0293, "step": 46030 }, { "epoch": 0.19208301691548932, "grad_norm": 1.1758122799152486, "learning_rate": 4.563779529117256e-06, "loss": 0.0421, "step": 46035 }, { "epoch": 0.1921038796304796, "grad_norm": 0.7417585262983329, "learning_rate": 4.563531701001607e-06, "loss": 0.0333, "step": 46040 }, { "epoch": 0.19212474234546986, "grad_norm": 0.5102062218731798, "learning_rate": 4.563283913255199e-06, "loss": 0.0385, "step": 46045 }, { "epoch": 0.19214560506046016, "grad_norm": 1.2078744967480488, "learning_rate": 4.563036165867074e-06, "loss": 0.0496, "step": 46050 }, { "epoch": 0.19216646777545043, "grad_norm": 0.8547436277257963, "learning_rate": 4.5627884588262764e-06, "loss": 0.0373, "step": 46055 }, { "epoch": 0.1921873304904407, "grad_norm": 1.1864389530683834, "learning_rate": 4.562540792121857e-06, "loss": 0.0405, "step": 46060 }, { "epoch": 0.19220819320543098, "grad_norm": 0.8270568172824898, "learning_rate": 4.562293165742869e-06, "loss": 0.0352, "step": 46065 }, { "epoch": 0.19222905592042125, "grad_norm": 1.0091390409399061, "learning_rate": 4.562045579678372e-06, "loss": 0.0241, "step": 46070 }, { "epoch": 0.19224991863541155, "grad_norm": 1.6270400670814074, "learning_rate": 4.5617980339174265e-06, "loss": 0.041, "step": 46075 }, { "epoch": 0.19227078135040182, "grad_norm": 1.0490898108686475, "learning_rate": 4.561550528449101e-06, "loss": 0.0329, "step": 46080 }, { "epoch": 0.1922916440653921, "grad_norm": 0.717007503865517, "learning_rate": 4.561303063262462e-06, "loss": 0.0486, "step": 46085 }, { "epoch": 0.19231250678038236, "grad_norm": 1.1660417831778316, "learning_rate": 4.5610556383465894e-06, "loss": 0.0369, "step": 46090 }, { "epoch": 0.19233336949537266, "grad_norm": 0.7228979989027009, "learning_rate": 4.5608082536905595e-06, "loss": 0.0331, "step": 46095 }, { "epoch": 0.19235423221036294, "grad_norm": 0.5353545445321488, "learning_rate": 4.560560909283454e-06, "loss": 0.0273, "step": 46100 }, { "epoch": 0.1923750949253532, "grad_norm": 0.9240166204214101, "learning_rate": 4.5603136051143625e-06, "loss": 0.0355, "step": 46105 }, { "epoch": 0.19239595764034348, "grad_norm": 1.0412383704478057, "learning_rate": 4.560066341172376e-06, "loss": 0.031, "step": 46110 }, { "epoch": 0.19241682035533375, "grad_norm": 0.595587836419388, "learning_rate": 4.559819117446587e-06, "loss": 0.0302, "step": 46115 }, { "epoch": 0.19243768307032405, "grad_norm": 0.6024046430113581, "learning_rate": 4.5595719339261e-06, "loss": 0.0343, "step": 46120 }, { "epoch": 0.19245854578531432, "grad_norm": 0.9774575269192665, "learning_rate": 4.5593247906000145e-06, "loss": 0.0425, "step": 46125 }, { "epoch": 0.1924794085003046, "grad_norm": 1.413381994367834, "learning_rate": 4.55907768745744e-06, "loss": 0.0369, "step": 46130 }, { "epoch": 0.19250027121529487, "grad_norm": 1.1428242937166075, "learning_rate": 4.558830624487489e-06, "loss": 0.0382, "step": 46135 }, { "epoch": 0.19252113393028517, "grad_norm": 1.1520342708198767, "learning_rate": 4.558583601679276e-06, "loss": 0.0485, "step": 46140 }, { "epoch": 0.19254199664527544, "grad_norm": 1.0724448433345555, "learning_rate": 4.558336619021925e-06, "loss": 0.0353, "step": 46145 }, { "epoch": 0.1925628593602657, "grad_norm": 0.5046404439666147, "learning_rate": 4.558089676504555e-06, "loss": 0.0342, "step": 46150 }, { "epoch": 0.19258372207525598, "grad_norm": 0.9468827817983911, "learning_rate": 4.557842774116299e-06, "loss": 0.0333, "step": 46155 }, { "epoch": 0.19260458479024625, "grad_norm": 1.0941146326950753, "learning_rate": 4.557595911846287e-06, "loss": 0.067, "step": 46160 }, { "epoch": 0.19262544750523655, "grad_norm": 1.1129153720005982, "learning_rate": 4.5573490896836564e-06, "loss": 0.0422, "step": 46165 }, { "epoch": 0.19264631022022682, "grad_norm": 1.307995760844897, "learning_rate": 4.557102307617548e-06, "loss": 0.0333, "step": 46170 }, { "epoch": 0.1926671729352171, "grad_norm": 1.0671714606073988, "learning_rate": 4.556855565637107e-06, "loss": 0.0397, "step": 46175 }, { "epoch": 0.19268803565020737, "grad_norm": 1.0592973905648706, "learning_rate": 4.5566088637314835e-06, "loss": 0.04, "step": 46180 }, { "epoch": 0.19270889836519764, "grad_norm": 1.3036000907016505, "learning_rate": 4.556362201889828e-06, "loss": 0.0565, "step": 46185 }, { "epoch": 0.19272976108018794, "grad_norm": 0.7022523255922251, "learning_rate": 4.5561155801013006e-06, "loss": 0.0353, "step": 46190 }, { "epoch": 0.1927506237951782, "grad_norm": 0.8352161107699526, "learning_rate": 4.555868998355062e-06, "loss": 0.0376, "step": 46195 }, { "epoch": 0.19277148651016848, "grad_norm": 1.363397703963315, "learning_rate": 4.555622456640277e-06, "loss": 0.0375, "step": 46200 }, { "epoch": 0.19279234922515875, "grad_norm": 0.8811357038525578, "learning_rate": 4.555375954946114e-06, "loss": 0.035, "step": 46205 }, { "epoch": 0.19281321194014905, "grad_norm": 0.8800916187766632, "learning_rate": 4.555129493261749e-06, "loss": 0.0398, "step": 46210 }, { "epoch": 0.19283407465513933, "grad_norm": 0.6628800385026553, "learning_rate": 4.5548830715763605e-06, "loss": 0.038, "step": 46215 }, { "epoch": 0.1928549373701296, "grad_norm": 1.2128572969523175, "learning_rate": 4.554636689879127e-06, "loss": 0.0404, "step": 46220 }, { "epoch": 0.19287580008511987, "grad_norm": 0.8364003877732403, "learning_rate": 4.554390348159238e-06, "loss": 0.0349, "step": 46225 }, { "epoch": 0.19289666280011014, "grad_norm": 0.5764943772597129, "learning_rate": 4.554144046405881e-06, "loss": 0.0244, "step": 46230 }, { "epoch": 0.19291752551510044, "grad_norm": 0.6274204405507186, "learning_rate": 4.553897784608253e-06, "loss": 0.0232, "step": 46235 }, { "epoch": 0.1929383882300907, "grad_norm": 1.598110281569327, "learning_rate": 4.553651562755549e-06, "loss": 0.0403, "step": 46240 }, { "epoch": 0.19295925094508098, "grad_norm": 1.4748363887927607, "learning_rate": 4.5534053808369735e-06, "loss": 0.0304, "step": 46245 }, { "epoch": 0.19298011366007126, "grad_norm": 0.6063487391175227, "learning_rate": 4.553159238841733e-06, "loss": 0.0328, "step": 46250 }, { "epoch": 0.19300097637506156, "grad_norm": 0.6390035189340574, "learning_rate": 4.552913136759036e-06, "loss": 0.0553, "step": 46255 }, { "epoch": 0.19302183909005183, "grad_norm": 0.6320205081056621, "learning_rate": 4.5526670745781e-06, "loss": 0.0242, "step": 46260 }, { "epoch": 0.1930427018050421, "grad_norm": 1.646687984138119, "learning_rate": 4.552421052288142e-06, "loss": 0.0385, "step": 46265 }, { "epoch": 0.19306356452003237, "grad_norm": 0.31492998639375835, "learning_rate": 4.552175069878385e-06, "loss": 0.0285, "step": 46270 }, { "epoch": 0.19308442723502264, "grad_norm": 0.9468588337361412, "learning_rate": 4.551929127338057e-06, "loss": 0.0286, "step": 46275 }, { "epoch": 0.19310528995001294, "grad_norm": 0.7429570355427201, "learning_rate": 4.551683224656386e-06, "loss": 0.0323, "step": 46280 }, { "epoch": 0.19312615266500321, "grad_norm": 0.7364781354209574, "learning_rate": 4.551437361822611e-06, "loss": 0.0306, "step": 46285 }, { "epoch": 0.1931470153799935, "grad_norm": 0.7569504555948995, "learning_rate": 4.551191538825968e-06, "loss": 0.0369, "step": 46290 }, { "epoch": 0.19316787809498376, "grad_norm": 0.6046422252258356, "learning_rate": 4.550945755655701e-06, "loss": 0.0392, "step": 46295 }, { "epoch": 0.19318874080997406, "grad_norm": 0.9376380795983817, "learning_rate": 4.5507000123010575e-06, "loss": 0.0411, "step": 46300 }, { "epoch": 0.19320960352496433, "grad_norm": 0.7486539811326327, "learning_rate": 4.55045430875129e-06, "loss": 0.0422, "step": 46305 }, { "epoch": 0.1932304662399546, "grad_norm": 0.8671157153243075, "learning_rate": 4.550208644995651e-06, "loss": 0.032, "step": 46310 }, { "epoch": 0.19325132895494487, "grad_norm": 0.8341199017288037, "learning_rate": 4.549963021023402e-06, "loss": 0.0336, "step": 46315 }, { "epoch": 0.19327219166993514, "grad_norm": 0.6829717112262048, "learning_rate": 4.549717436823806e-06, "loss": 0.0359, "step": 46320 }, { "epoch": 0.19329305438492544, "grad_norm": 1.0428430491030378, "learning_rate": 4.54947189238613e-06, "loss": 0.0309, "step": 46325 }, { "epoch": 0.19331391709991572, "grad_norm": 0.7842118188362439, "learning_rate": 4.549226387699646e-06, "loss": 0.0392, "step": 46330 }, { "epoch": 0.193334779814906, "grad_norm": 1.0920412448169534, "learning_rate": 4.548980922753631e-06, "loss": 0.035, "step": 46335 }, { "epoch": 0.19335564252989626, "grad_norm": 0.8804394838571942, "learning_rate": 4.548735497537362e-06, "loss": 0.0357, "step": 46340 }, { "epoch": 0.19337650524488656, "grad_norm": 0.5351819785527907, "learning_rate": 4.548490112040123e-06, "loss": 0.0381, "step": 46345 }, { "epoch": 0.19339736795987683, "grad_norm": 1.0921229136672925, "learning_rate": 4.5482447662512045e-06, "loss": 0.0286, "step": 46350 }, { "epoch": 0.1934182306748671, "grad_norm": 0.936274642674016, "learning_rate": 4.547999460159896e-06, "loss": 0.0279, "step": 46355 }, { "epoch": 0.19343909338985738, "grad_norm": 0.5750076362719447, "learning_rate": 4.547754193755494e-06, "loss": 0.027, "step": 46360 }, { "epoch": 0.19345995610484765, "grad_norm": 0.8385745513039911, "learning_rate": 4.547508967027299e-06, "loss": 0.0365, "step": 46365 }, { "epoch": 0.19348081881983795, "grad_norm": 1.0635258793353606, "learning_rate": 4.547263779964613e-06, "loss": 0.0415, "step": 46370 }, { "epoch": 0.19350168153482822, "grad_norm": 1.3584592450683242, "learning_rate": 4.547018632556746e-06, "loss": 0.0307, "step": 46375 }, { "epoch": 0.1935225442498185, "grad_norm": 0.932091266586843, "learning_rate": 4.546773524793009e-06, "loss": 0.0383, "step": 46380 }, { "epoch": 0.19354340696480876, "grad_norm": 0.5934018147808816, "learning_rate": 4.546528456662718e-06, "loss": 0.0307, "step": 46385 }, { "epoch": 0.19356426967979906, "grad_norm": 1.8056134247579434, "learning_rate": 4.546283428155195e-06, "loss": 0.0461, "step": 46390 }, { "epoch": 0.19358513239478933, "grad_norm": 0.7424742787380708, "learning_rate": 4.546038439259761e-06, "loss": 0.0304, "step": 46395 }, { "epoch": 0.1936059951097796, "grad_norm": 0.5244541570575466, "learning_rate": 4.545793489965745e-06, "loss": 0.0291, "step": 46400 }, { "epoch": 0.19362685782476988, "grad_norm": 1.2811145943016686, "learning_rate": 4.54554858026248e-06, "loss": 0.0308, "step": 46405 }, { "epoch": 0.19364772053976015, "grad_norm": 0.9907194513524819, "learning_rate": 4.545303710139303e-06, "loss": 0.0462, "step": 46410 }, { "epoch": 0.19366858325475045, "grad_norm": 0.7370623464125795, "learning_rate": 4.545058879585552e-06, "loss": 0.0411, "step": 46415 }, { "epoch": 0.19368944596974072, "grad_norm": 0.6927308153912175, "learning_rate": 4.544814088590573e-06, "loss": 0.0347, "step": 46420 }, { "epoch": 0.193710308684731, "grad_norm": 0.5537425724045053, "learning_rate": 4.544569337143712e-06, "loss": 0.0338, "step": 46425 }, { "epoch": 0.19373117139972126, "grad_norm": 1.064316138532021, "learning_rate": 4.544324625234325e-06, "loss": 0.0322, "step": 46430 }, { "epoch": 0.19375203411471156, "grad_norm": 0.9728564701971085, "learning_rate": 4.544079952851764e-06, "loss": 0.029, "step": 46435 }, { "epoch": 0.19377289682970184, "grad_norm": 0.8016532769775888, "learning_rate": 4.543835319985392e-06, "loss": 0.0315, "step": 46440 }, { "epoch": 0.1937937595446921, "grad_norm": 0.6024193107998702, "learning_rate": 4.543590726624573e-06, "loss": 0.0426, "step": 46445 }, { "epoch": 0.19381462225968238, "grad_norm": 2.024625469125198, "learning_rate": 4.543346172758675e-06, "loss": 0.0306, "step": 46450 }, { "epoch": 0.19383548497467265, "grad_norm": 1.0894379409604433, "learning_rate": 4.543101658377069e-06, "loss": 0.0346, "step": 46455 }, { "epoch": 0.19385634768966295, "grad_norm": 0.7351067048121204, "learning_rate": 4.542857183469133e-06, "loss": 0.0372, "step": 46460 }, { "epoch": 0.19387721040465322, "grad_norm": 0.7627486774113843, "learning_rate": 4.542612748024247e-06, "loss": 0.0349, "step": 46465 }, { "epoch": 0.1938980731196435, "grad_norm": 1.0333454696110544, "learning_rate": 4.5423683520317945e-06, "loss": 0.0354, "step": 46470 }, { "epoch": 0.19391893583463377, "grad_norm": 0.5333732431635859, "learning_rate": 4.542123995481165e-06, "loss": 0.037, "step": 46475 }, { "epoch": 0.19393979854962407, "grad_norm": 0.9674974201028766, "learning_rate": 4.541879678361749e-06, "loss": 0.0363, "step": 46480 }, { "epoch": 0.19396066126461434, "grad_norm": 0.6349152934812679, "learning_rate": 4.541635400662944e-06, "loss": 0.0359, "step": 46485 }, { "epoch": 0.1939815239796046, "grad_norm": 1.2844318528222076, "learning_rate": 4.54139116237415e-06, "loss": 0.035, "step": 46490 }, { "epoch": 0.19400238669459488, "grad_norm": 0.6619196318117774, "learning_rate": 4.541146963484772e-06, "loss": 0.0292, "step": 46495 }, { "epoch": 0.19402324940958515, "grad_norm": 2.605080355659787, "learning_rate": 4.540902803984218e-06, "loss": 0.0445, "step": 46500 }, { "epoch": 0.19404411212457545, "grad_norm": 0.5899993888149304, "learning_rate": 4.540658683861899e-06, "loss": 0.0309, "step": 46505 }, { "epoch": 0.19406497483956572, "grad_norm": 1.0500341081477433, "learning_rate": 4.540414603107232e-06, "loss": 0.0298, "step": 46510 }, { "epoch": 0.194085837554556, "grad_norm": 0.7815048183712077, "learning_rate": 4.540170561709638e-06, "loss": 0.0391, "step": 46515 }, { "epoch": 0.19410670026954627, "grad_norm": 0.8586713889708572, "learning_rate": 4.53992655965854e-06, "loss": 0.0347, "step": 46520 }, { "epoch": 0.19412756298453657, "grad_norm": 2.145400449814435, "learning_rate": 4.539682596943366e-06, "loss": 0.0472, "step": 46525 }, { "epoch": 0.19414842569952684, "grad_norm": 0.5616460669585316, "learning_rate": 4.539438673553549e-06, "loss": 0.0249, "step": 46530 }, { "epoch": 0.1941692884145171, "grad_norm": 1.4070328458224683, "learning_rate": 4.539194789478525e-06, "loss": 0.0288, "step": 46535 }, { "epoch": 0.19419015112950738, "grad_norm": 1.0740475679019197, "learning_rate": 4.538950944707733e-06, "loss": 0.0501, "step": 46540 }, { "epoch": 0.19421101384449765, "grad_norm": 0.9887369685029443, "learning_rate": 4.538707139230619e-06, "loss": 0.0429, "step": 46545 }, { "epoch": 0.19423187655948795, "grad_norm": 0.8771208082402995, "learning_rate": 4.538463373036631e-06, "loss": 0.0285, "step": 46550 }, { "epoch": 0.19425273927447823, "grad_norm": 0.8814332384465124, "learning_rate": 4.538219646115218e-06, "loss": 0.0344, "step": 46555 }, { "epoch": 0.1942736019894685, "grad_norm": 0.6653051399424035, "learning_rate": 4.537975958455838e-06, "loss": 0.0295, "step": 46560 }, { "epoch": 0.19429446470445877, "grad_norm": 1.1053855147710963, "learning_rate": 4.5377323100479525e-06, "loss": 0.0341, "step": 46565 }, { "epoch": 0.19431532741944907, "grad_norm": 1.1435840529238745, "learning_rate": 4.537488700881023e-06, "loss": 0.0382, "step": 46570 }, { "epoch": 0.19433619013443934, "grad_norm": 1.2271707692536584, "learning_rate": 4.5372451309445175e-06, "loss": 0.0301, "step": 46575 }, { "epoch": 0.1943570528494296, "grad_norm": 3.037089822372729, "learning_rate": 4.537001600227908e-06, "loss": 0.0534, "step": 46580 }, { "epoch": 0.19437791556441988, "grad_norm": 1.17652463245113, "learning_rate": 4.536758108720671e-06, "loss": 0.0496, "step": 46585 }, { "epoch": 0.19439877827941016, "grad_norm": 0.8408289196518736, "learning_rate": 4.536514656412286e-06, "loss": 0.0395, "step": 46590 }, { "epoch": 0.19441964099440046, "grad_norm": 0.8439431452984848, "learning_rate": 4.5362712432922374e-06, "loss": 0.0364, "step": 46595 }, { "epoch": 0.19444050370939073, "grad_norm": 0.8010136622548133, "learning_rate": 4.53602786935001e-06, "loss": 0.0347, "step": 46600 }, { "epoch": 0.194461366424381, "grad_norm": 1.046465203783772, "learning_rate": 4.535784534575098e-06, "loss": 0.0269, "step": 46605 }, { "epoch": 0.19448222913937127, "grad_norm": 0.8493038592693496, "learning_rate": 4.5355412389569955e-06, "loss": 0.0494, "step": 46610 }, { "epoch": 0.19450309185436157, "grad_norm": 1.026007106768293, "learning_rate": 4.5352979824852034e-06, "loss": 0.0423, "step": 46615 }, { "epoch": 0.19452395456935184, "grad_norm": 1.0881678967601107, "learning_rate": 4.535054765149224e-06, "loss": 0.0362, "step": 46620 }, { "epoch": 0.19454481728434211, "grad_norm": 0.658683605234944, "learning_rate": 4.534811586938565e-06, "loss": 0.0376, "step": 46625 }, { "epoch": 0.19456567999933239, "grad_norm": 1.133420514321255, "learning_rate": 4.534568447842736e-06, "loss": 0.0502, "step": 46630 }, { "epoch": 0.19458654271432266, "grad_norm": 0.7070585188540937, "learning_rate": 4.534325347851256e-06, "loss": 0.0281, "step": 46635 }, { "epoch": 0.19460740542931296, "grad_norm": 1.4562154340764482, "learning_rate": 4.5340822869536395e-06, "loss": 0.032, "step": 46640 }, { "epoch": 0.19462826814430323, "grad_norm": 0.7146606620230076, "learning_rate": 4.5338392651394124e-06, "loss": 0.037, "step": 46645 }, { "epoch": 0.1946491308592935, "grad_norm": 1.0175568245887228, "learning_rate": 4.533596282398101e-06, "loss": 0.0275, "step": 46650 }, { "epoch": 0.19466999357428377, "grad_norm": 1.0252760312349738, "learning_rate": 4.533353338719237e-06, "loss": 0.0355, "step": 46655 }, { "epoch": 0.19469085628927407, "grad_norm": 0.6997257210386408, "learning_rate": 4.533110434092356e-06, "loss": 0.0338, "step": 46660 }, { "epoch": 0.19471171900426434, "grad_norm": 0.9644747302772215, "learning_rate": 4.532867568506993e-06, "loss": 0.0503, "step": 46665 }, { "epoch": 0.19473258171925462, "grad_norm": 0.9986766224745699, "learning_rate": 4.532624741952693e-06, "loss": 0.0391, "step": 46670 }, { "epoch": 0.1947534444342449, "grad_norm": 0.9977935512602594, "learning_rate": 4.532381954419004e-06, "loss": 0.0367, "step": 46675 }, { "epoch": 0.19477430714923516, "grad_norm": 0.5842988066283464, "learning_rate": 4.532139205895475e-06, "loss": 0.0444, "step": 46680 }, { "epoch": 0.19479516986422546, "grad_norm": 0.7356626411489741, "learning_rate": 4.53189649637166e-06, "loss": 0.0343, "step": 46685 }, { "epoch": 0.19481603257921573, "grad_norm": 0.6704673491281598, "learning_rate": 4.531653825837118e-06, "loss": 0.0535, "step": 46690 }, { "epoch": 0.194836895294206, "grad_norm": 1.3018669813715207, "learning_rate": 4.5314111942814115e-06, "loss": 0.0334, "step": 46695 }, { "epoch": 0.19485775800919627, "grad_norm": 1.048189155293446, "learning_rate": 4.531168601694106e-06, "loss": 0.0341, "step": 46700 }, { "epoch": 0.19487862072418657, "grad_norm": 1.2628498445254794, "learning_rate": 4.530926048064773e-06, "loss": 0.0345, "step": 46705 }, { "epoch": 0.19489948343917685, "grad_norm": 1.2648610279003414, "learning_rate": 4.530683533382986e-06, "loss": 0.0462, "step": 46710 }, { "epoch": 0.19492034615416712, "grad_norm": 0.7354703285285986, "learning_rate": 4.5304410576383215e-06, "loss": 0.0339, "step": 46715 }, { "epoch": 0.1949412088691574, "grad_norm": 1.0154603257968204, "learning_rate": 4.530198620820362e-06, "loss": 0.0418, "step": 46720 }, { "epoch": 0.19496207158414766, "grad_norm": 0.913855235269483, "learning_rate": 4.529956222918694e-06, "loss": 0.03, "step": 46725 }, { "epoch": 0.19498293429913796, "grad_norm": 1.0361808511395159, "learning_rate": 4.529713863922906e-06, "loss": 0.0329, "step": 46730 }, { "epoch": 0.19500379701412823, "grad_norm": 0.8260110512691269, "learning_rate": 4.529471543822594e-06, "loss": 0.0282, "step": 46735 }, { "epoch": 0.1950246597291185, "grad_norm": 1.0359057544066068, "learning_rate": 4.529229262607352e-06, "loss": 0.054, "step": 46740 }, { "epoch": 0.19504552244410878, "grad_norm": 0.9646141136378676, "learning_rate": 4.528987020266783e-06, "loss": 0.0315, "step": 46745 }, { "epoch": 0.19506638515909908, "grad_norm": 0.9576586850203586, "learning_rate": 4.528744816790494e-06, "loss": 0.0367, "step": 46750 }, { "epoch": 0.19508724787408935, "grad_norm": 0.4730103456469495, "learning_rate": 4.528502652168089e-06, "loss": 0.0366, "step": 46755 }, { "epoch": 0.19510811058907962, "grad_norm": 0.5125795045429381, "learning_rate": 4.528260526389187e-06, "loss": 0.0286, "step": 46760 }, { "epoch": 0.1951289733040699, "grad_norm": 1.2211747688401926, "learning_rate": 4.5280184394434e-06, "loss": 0.0358, "step": 46765 }, { "epoch": 0.19514983601906016, "grad_norm": 1.1698359028168697, "learning_rate": 4.527776391320352e-06, "loss": 0.043, "step": 46770 }, { "epoch": 0.19517069873405046, "grad_norm": 1.1819101001513188, "learning_rate": 4.527534382009666e-06, "loss": 0.0508, "step": 46775 }, { "epoch": 0.19519156144904073, "grad_norm": 0.7512003303302265, "learning_rate": 4.527292411500971e-06, "loss": 0.0293, "step": 46780 }, { "epoch": 0.195212424164031, "grad_norm": 0.7317913440667466, "learning_rate": 4.527050479783899e-06, "loss": 0.035, "step": 46785 }, { "epoch": 0.19523328687902128, "grad_norm": 1.0520728277180569, "learning_rate": 4.526808586848087e-06, "loss": 0.0321, "step": 46790 }, { "epoch": 0.19525414959401158, "grad_norm": 0.9082385349545569, "learning_rate": 4.5265667326831744e-06, "loss": 0.0484, "step": 46795 }, { "epoch": 0.19527501230900185, "grad_norm": 1.2648481541212528, "learning_rate": 4.526324917278807e-06, "loss": 0.0252, "step": 46800 }, { "epoch": 0.19529587502399212, "grad_norm": 1.1545600753734857, "learning_rate": 4.52608314062463e-06, "loss": 0.0351, "step": 46805 }, { "epoch": 0.1953167377389824, "grad_norm": 0.6734208439454261, "learning_rate": 4.525841402710297e-06, "loss": 0.0276, "step": 46810 }, { "epoch": 0.19533760045397267, "grad_norm": 0.581127441809623, "learning_rate": 4.525599703525463e-06, "loss": 0.0253, "step": 46815 }, { "epoch": 0.19535846316896296, "grad_norm": 0.9495591541762846, "learning_rate": 4.525358043059787e-06, "loss": 0.0463, "step": 46820 }, { "epoch": 0.19537932588395324, "grad_norm": 1.004102174447948, "learning_rate": 4.525116421302934e-06, "loss": 0.03, "step": 46825 }, { "epoch": 0.1954001885989435, "grad_norm": 1.0002286352611247, "learning_rate": 4.52487483824457e-06, "loss": 0.0352, "step": 46830 }, { "epoch": 0.19542105131393378, "grad_norm": 0.8798452164659979, "learning_rate": 4.524633293874365e-06, "loss": 0.0316, "step": 46835 }, { "epoch": 0.19544191402892408, "grad_norm": 1.093758968758979, "learning_rate": 4.5243917881819966e-06, "loss": 0.0421, "step": 46840 }, { "epoch": 0.19546277674391435, "grad_norm": 0.7672237387405808, "learning_rate": 4.524150321157141e-06, "loss": 0.0279, "step": 46845 }, { "epoch": 0.19548363945890462, "grad_norm": 1.1043052737251813, "learning_rate": 4.523908892789484e-06, "loss": 0.0406, "step": 46850 }, { "epoch": 0.1955045021738949, "grad_norm": 1.6716518558420845, "learning_rate": 4.5236675030687096e-06, "loss": 0.0345, "step": 46855 }, { "epoch": 0.19552536488888517, "grad_norm": 0.7562476564467866, "learning_rate": 4.523426151984508e-06, "loss": 0.0255, "step": 46860 }, { "epoch": 0.19554622760387547, "grad_norm": 1.177685445675007, "learning_rate": 4.523184839526576e-06, "loss": 0.0385, "step": 46865 }, { "epoch": 0.19556709031886574, "grad_norm": 0.939578381806238, "learning_rate": 4.5229435656846075e-06, "loss": 0.0324, "step": 46870 }, { "epoch": 0.195587953033856, "grad_norm": 1.2577907170143805, "learning_rate": 4.522702330448309e-06, "loss": 0.0485, "step": 46875 }, { "epoch": 0.19560881574884628, "grad_norm": 0.6697499069879274, "learning_rate": 4.522461133807382e-06, "loss": 0.0398, "step": 46880 }, { "epoch": 0.19562967846383658, "grad_norm": 0.49893260567137393, "learning_rate": 4.52221997575154e-06, "loss": 0.034, "step": 46885 }, { "epoch": 0.19565054117882685, "grad_norm": 0.9443487836357726, "learning_rate": 4.521978856270494e-06, "loss": 0.0301, "step": 46890 }, { "epoch": 0.19567140389381713, "grad_norm": 1.0951229658481676, "learning_rate": 4.521737775353963e-06, "loss": 0.039, "step": 46895 }, { "epoch": 0.1956922666088074, "grad_norm": 0.6930947788046216, "learning_rate": 4.521496732991665e-06, "loss": 0.0297, "step": 46900 }, { "epoch": 0.19571312932379767, "grad_norm": 0.6395348186672543, "learning_rate": 4.5212557291733276e-06, "loss": 0.0359, "step": 46905 }, { "epoch": 0.19573399203878797, "grad_norm": 0.4907665696110486, "learning_rate": 4.521014763888679e-06, "loss": 0.0322, "step": 46910 }, { "epoch": 0.19575485475377824, "grad_norm": 0.8167683767948847, "learning_rate": 4.520773837127453e-06, "loss": 0.0295, "step": 46915 }, { "epoch": 0.1957757174687685, "grad_norm": 0.749650653584937, "learning_rate": 4.5205329488793835e-06, "loss": 0.0305, "step": 46920 }, { "epoch": 0.19579658018375878, "grad_norm": 1.2239276272821094, "learning_rate": 4.520292099134212e-06, "loss": 0.0495, "step": 46925 }, { "epoch": 0.19581744289874908, "grad_norm": 1.171965133872088, "learning_rate": 4.5200512878816835e-06, "loss": 0.0325, "step": 46930 }, { "epoch": 0.19583830561373936, "grad_norm": 0.6270958264597, "learning_rate": 4.519810515111545e-06, "loss": 0.0409, "step": 46935 }, { "epoch": 0.19585916832872963, "grad_norm": 1.1134119925447235, "learning_rate": 4.519569780813549e-06, "loss": 0.0427, "step": 46940 }, { "epoch": 0.1958800310437199, "grad_norm": 0.7402887093634131, "learning_rate": 4.51932908497745e-06, "loss": 0.0382, "step": 46945 }, { "epoch": 0.19590089375871017, "grad_norm": 1.2730046046107144, "learning_rate": 4.519088427593007e-06, "loss": 0.0483, "step": 46950 }, { "epoch": 0.19592175647370047, "grad_norm": 0.6306673194106482, "learning_rate": 4.518847808649985e-06, "loss": 0.0251, "step": 46955 }, { "epoch": 0.19594261918869074, "grad_norm": 0.7780674931168596, "learning_rate": 4.518607228138151e-06, "loss": 0.027, "step": 46960 }, { "epoch": 0.195963481903681, "grad_norm": 0.566817681420852, "learning_rate": 4.518366686047272e-06, "loss": 0.0315, "step": 46965 }, { "epoch": 0.19598434461867129, "grad_norm": 0.732797959675725, "learning_rate": 4.518126182367127e-06, "loss": 0.0344, "step": 46970 }, { "epoch": 0.19600520733366159, "grad_norm": 0.5819381875570607, "learning_rate": 4.5178857170874936e-06, "loss": 0.035, "step": 46975 }, { "epoch": 0.19602607004865186, "grad_norm": 1.058373649720104, "learning_rate": 4.5176452901981535e-06, "loss": 0.0282, "step": 46980 }, { "epoch": 0.19604693276364213, "grad_norm": 0.6912266174623943, "learning_rate": 4.517404901688892e-06, "loss": 0.0361, "step": 46985 }, { "epoch": 0.1960677954786324, "grad_norm": 0.9466015181255862, "learning_rate": 4.5171645515495e-06, "loss": 0.0384, "step": 46990 }, { "epoch": 0.19608865819362267, "grad_norm": 1.2744650944910743, "learning_rate": 4.516924239769771e-06, "loss": 0.0369, "step": 46995 }, { "epoch": 0.19610952090861297, "grad_norm": 1.0230465919100318, "learning_rate": 4.516683966339503e-06, "loss": 0.0388, "step": 47000 }, { "epoch": 0.19613038362360324, "grad_norm": 0.579240888908951, "learning_rate": 4.516443731248496e-06, "loss": 0.0288, "step": 47005 }, { "epoch": 0.19615124633859352, "grad_norm": 0.5368871477280385, "learning_rate": 4.516203534486555e-06, "loss": 0.0272, "step": 47010 }, { "epoch": 0.1961721090535838, "grad_norm": 1.464874735311225, "learning_rate": 4.51596337604349e-06, "loss": 0.0416, "step": 47015 }, { "epoch": 0.1961929717685741, "grad_norm": 1.0180964765137224, "learning_rate": 4.515723255909113e-06, "loss": 0.0362, "step": 47020 }, { "epoch": 0.19621383448356436, "grad_norm": 0.7391144105834788, "learning_rate": 4.515483174073241e-06, "loss": 0.0379, "step": 47025 }, { "epoch": 0.19623469719855463, "grad_norm": 0.9088762794082345, "learning_rate": 4.515243130525694e-06, "loss": 0.0393, "step": 47030 }, { "epoch": 0.1962555599135449, "grad_norm": 0.535627315601598, "learning_rate": 4.515003125256296e-06, "loss": 0.0312, "step": 47035 }, { "epoch": 0.19627642262853517, "grad_norm": 0.887925299794633, "learning_rate": 4.514763158254875e-06, "loss": 0.0339, "step": 47040 }, { "epoch": 0.19629728534352547, "grad_norm": 0.6494644528177974, "learning_rate": 4.5145232295112615e-06, "loss": 0.028, "step": 47045 }, { "epoch": 0.19631814805851575, "grad_norm": 0.8856536809685746, "learning_rate": 4.514283339015292e-06, "loss": 0.0301, "step": 47050 }, { "epoch": 0.19633901077350602, "grad_norm": 1.3078623671027485, "learning_rate": 4.514043486756805e-06, "loss": 0.0515, "step": 47055 }, { "epoch": 0.1963598734884963, "grad_norm": 0.7858408081711351, "learning_rate": 4.513803672725644e-06, "loss": 0.0338, "step": 47060 }, { "epoch": 0.1963807362034866, "grad_norm": 0.8949071295692255, "learning_rate": 4.513563896911656e-06, "loss": 0.0349, "step": 47065 }, { "epoch": 0.19640159891847686, "grad_norm": 0.8748020172890272, "learning_rate": 4.513324159304691e-06, "loss": 0.0378, "step": 47070 }, { "epoch": 0.19642246163346713, "grad_norm": 0.7876406987807143, "learning_rate": 4.513084459894604e-06, "loss": 0.0341, "step": 47075 }, { "epoch": 0.1964433243484574, "grad_norm": 1.128599735594352, "learning_rate": 4.51284479867125e-06, "loss": 0.0357, "step": 47080 }, { "epoch": 0.19646418706344768, "grad_norm": 0.9953111003301804, "learning_rate": 4.512605175624496e-06, "loss": 0.0401, "step": 47085 }, { "epoch": 0.19648504977843798, "grad_norm": 0.9443350090966651, "learning_rate": 4.512365590744202e-06, "loss": 0.0326, "step": 47090 }, { "epoch": 0.19650591249342825, "grad_norm": 0.7998021976832685, "learning_rate": 4.512126044020241e-06, "loss": 0.0403, "step": 47095 }, { "epoch": 0.19652677520841852, "grad_norm": 1.298388931619896, "learning_rate": 4.511886535442486e-06, "loss": 0.0287, "step": 47100 }, { "epoch": 0.1965476379234088, "grad_norm": 0.7682193052426377, "learning_rate": 4.511647065000812e-06, "loss": 0.0353, "step": 47105 }, { "epoch": 0.1965685006383991, "grad_norm": 0.7815714933923196, "learning_rate": 4.511407632685101e-06, "loss": 0.0319, "step": 47110 }, { "epoch": 0.19658936335338936, "grad_norm": 0.826627416641684, "learning_rate": 4.511168238485236e-06, "loss": 0.0418, "step": 47115 }, { "epoch": 0.19661022606837963, "grad_norm": 1.0658718781017358, "learning_rate": 4.510928882391107e-06, "loss": 0.0396, "step": 47120 }, { "epoch": 0.1966310887833699, "grad_norm": 0.9713797696685005, "learning_rate": 4.510689564392605e-06, "loss": 0.0431, "step": 47125 }, { "epoch": 0.19665195149836018, "grad_norm": 1.0620723972555302, "learning_rate": 4.510450284479626e-06, "loss": 0.0321, "step": 47130 }, { "epoch": 0.19667281421335048, "grad_norm": 0.6857243126672048, "learning_rate": 4.510211042642068e-06, "loss": 0.0412, "step": 47135 }, { "epoch": 0.19669367692834075, "grad_norm": 1.0337216320162825, "learning_rate": 4.5099718388698365e-06, "loss": 0.0345, "step": 47140 }, { "epoch": 0.19671453964333102, "grad_norm": 0.8010047178823466, "learning_rate": 4.5097326731528365e-06, "loss": 0.0321, "step": 47145 }, { "epoch": 0.1967354023583213, "grad_norm": 0.7936281805651095, "learning_rate": 4.509493545480979e-06, "loss": 0.0404, "step": 47150 }, { "epoch": 0.1967562650733116, "grad_norm": 0.7201410414248244, "learning_rate": 4.509254455844179e-06, "loss": 0.0357, "step": 47155 }, { "epoch": 0.19677712778830186, "grad_norm": 0.9245547336244647, "learning_rate": 4.509015404232355e-06, "loss": 0.0306, "step": 47160 }, { "epoch": 0.19679799050329214, "grad_norm": 1.0795216512990196, "learning_rate": 4.508776390635426e-06, "loss": 0.033, "step": 47165 }, { "epoch": 0.1968188532182824, "grad_norm": 1.2239560782592935, "learning_rate": 4.508537415043322e-06, "loss": 0.0461, "step": 47170 }, { "epoch": 0.19683971593327268, "grad_norm": 1.150437972320434, "learning_rate": 4.50829847744597e-06, "loss": 0.0325, "step": 47175 }, { "epoch": 0.19686057864826298, "grad_norm": 0.6641308895272102, "learning_rate": 4.508059577833301e-06, "loss": 0.026, "step": 47180 }, { "epoch": 0.19688144136325325, "grad_norm": 0.9557202472322652, "learning_rate": 4.507820716195255e-06, "loss": 0.0445, "step": 47185 }, { "epoch": 0.19690230407824352, "grad_norm": 0.5823110963813131, "learning_rate": 4.507581892521772e-06, "loss": 0.0267, "step": 47190 }, { "epoch": 0.1969231667932338, "grad_norm": 1.6538850313476845, "learning_rate": 4.507343106802797e-06, "loss": 0.0463, "step": 47195 }, { "epoch": 0.1969440295082241, "grad_norm": 0.8899107590244556, "learning_rate": 4.5071043590282755e-06, "loss": 0.0401, "step": 47200 }, { "epoch": 0.19696489222321437, "grad_norm": 0.5148498599708216, "learning_rate": 4.506865649188161e-06, "loss": 0.0316, "step": 47205 }, { "epoch": 0.19698575493820464, "grad_norm": 0.5443297666944571, "learning_rate": 4.5066269772724084e-06, "loss": 0.0481, "step": 47210 }, { "epoch": 0.1970066176531949, "grad_norm": 1.1078157800209172, "learning_rate": 4.506388343270976e-06, "loss": 0.0328, "step": 47215 }, { "epoch": 0.19702748036818518, "grad_norm": 0.69876728426903, "learning_rate": 4.50614974717383e-06, "loss": 0.0289, "step": 47220 }, { "epoch": 0.19704834308317548, "grad_norm": 1.8716299039166588, "learning_rate": 4.5059111889709325e-06, "loss": 0.0371, "step": 47225 }, { "epoch": 0.19706920579816575, "grad_norm": 1.1027915465305436, "learning_rate": 4.505672668652257e-06, "loss": 0.0442, "step": 47230 }, { "epoch": 0.19709006851315602, "grad_norm": 1.3345437365502315, "learning_rate": 4.505434186207777e-06, "loss": 0.0532, "step": 47235 }, { "epoch": 0.1971109312281463, "grad_norm": 0.8438590955827235, "learning_rate": 4.50519574162747e-06, "loss": 0.0339, "step": 47240 }, { "epoch": 0.1971317939431366, "grad_norm": 1.0945350658078332, "learning_rate": 4.504957334901317e-06, "loss": 0.0321, "step": 47245 }, { "epoch": 0.19715265665812687, "grad_norm": 1.0269441063063869, "learning_rate": 4.5047189660193045e-06, "loss": 0.0369, "step": 47250 }, { "epoch": 0.19717351937311714, "grad_norm": 0.6538062913478041, "learning_rate": 4.50448063497142e-06, "loss": 0.0307, "step": 47255 }, { "epoch": 0.1971943820881074, "grad_norm": 0.5455087344105551, "learning_rate": 4.504242341747657e-06, "loss": 0.0457, "step": 47260 }, { "epoch": 0.19721524480309768, "grad_norm": 0.8350700400767672, "learning_rate": 4.50400408633801e-06, "loss": 0.0311, "step": 47265 }, { "epoch": 0.19723610751808798, "grad_norm": 1.2087020659861127, "learning_rate": 4.503765868732482e-06, "loss": 0.0261, "step": 47270 }, { "epoch": 0.19725697023307825, "grad_norm": 0.5034244365576477, "learning_rate": 4.5035276889210744e-06, "loss": 0.0262, "step": 47275 }, { "epoch": 0.19727783294806853, "grad_norm": 0.8679477629703608, "learning_rate": 4.5032895468937965e-06, "loss": 0.0346, "step": 47280 }, { "epoch": 0.1972986956630588, "grad_norm": 0.6837562625356366, "learning_rate": 4.5030514426406576e-06, "loss": 0.0426, "step": 47285 }, { "epoch": 0.1973195583780491, "grad_norm": 0.9548433813010653, "learning_rate": 4.502813376151673e-06, "loss": 0.0297, "step": 47290 }, { "epoch": 0.19734042109303937, "grad_norm": 1.035205083164677, "learning_rate": 4.502575347416862e-06, "loss": 0.0375, "step": 47295 }, { "epoch": 0.19736128380802964, "grad_norm": 0.7522879389754366, "learning_rate": 4.502337356426245e-06, "loss": 0.0355, "step": 47300 }, { "epoch": 0.1973821465230199, "grad_norm": 1.3128834053825913, "learning_rate": 4.5020994031698506e-06, "loss": 0.0304, "step": 47305 }, { "epoch": 0.19740300923801019, "grad_norm": 1.0316738475370437, "learning_rate": 4.501861487637707e-06, "loss": 0.0389, "step": 47310 }, { "epoch": 0.19742387195300048, "grad_norm": 1.044010979555771, "learning_rate": 4.501623609819847e-06, "loss": 0.0352, "step": 47315 }, { "epoch": 0.19744473466799076, "grad_norm": 1.0119246296641795, "learning_rate": 4.501385769706309e-06, "loss": 0.0358, "step": 47320 }, { "epoch": 0.19746559738298103, "grad_norm": 1.0810921062984864, "learning_rate": 4.5011479672871315e-06, "loss": 0.0393, "step": 47325 }, { "epoch": 0.1974864600979713, "grad_norm": 0.8154589496328316, "learning_rate": 4.500910202552359e-06, "loss": 0.0435, "step": 47330 }, { "epoch": 0.1975073228129616, "grad_norm": 1.694612535785258, "learning_rate": 4.5006724754920424e-06, "loss": 0.0328, "step": 47335 }, { "epoch": 0.19752818552795187, "grad_norm": 1.403309015179382, "learning_rate": 4.500434786096231e-06, "loss": 0.0355, "step": 47340 }, { "epoch": 0.19754904824294214, "grad_norm": 1.536747644105109, "learning_rate": 4.50019713435498e-06, "loss": 0.0377, "step": 47345 }, { "epoch": 0.19756991095793242, "grad_norm": 1.711913229012384, "learning_rate": 4.499959520258351e-06, "loss": 0.0378, "step": 47350 }, { "epoch": 0.1975907736729227, "grad_norm": 0.9001766440438245, "learning_rate": 4.499721943796403e-06, "loss": 0.0362, "step": 47355 }, { "epoch": 0.197611636387913, "grad_norm": 0.9148799495629751, "learning_rate": 4.499484404959204e-06, "loss": 0.042, "step": 47360 }, { "epoch": 0.19763249910290326, "grad_norm": 0.8352096285183932, "learning_rate": 4.499246903736825e-06, "loss": 0.0334, "step": 47365 }, { "epoch": 0.19765336181789353, "grad_norm": 0.7858236678849367, "learning_rate": 4.499009440119339e-06, "loss": 0.0402, "step": 47370 }, { "epoch": 0.1976742245328838, "grad_norm": 0.9985952172449608, "learning_rate": 4.498772014096823e-06, "loss": 0.0395, "step": 47375 }, { "epoch": 0.1976950872478741, "grad_norm": 1.0446536462736549, "learning_rate": 4.498534625659358e-06, "loss": 0.0383, "step": 47380 }, { "epoch": 0.19771594996286437, "grad_norm": 0.48990920034894003, "learning_rate": 4.498297274797031e-06, "loss": 0.0289, "step": 47385 }, { "epoch": 0.19773681267785465, "grad_norm": 0.5807248923013022, "learning_rate": 4.498059961499927e-06, "loss": 0.0347, "step": 47390 }, { "epoch": 0.19775767539284492, "grad_norm": 1.4042324132108892, "learning_rate": 4.497822685758141e-06, "loss": 0.0357, "step": 47395 }, { "epoch": 0.1977785381078352, "grad_norm": 5.679899694472468, "learning_rate": 4.497585447561766e-06, "loss": 0.0347, "step": 47400 }, { "epoch": 0.1977994008228255, "grad_norm": 0.8342818786036066, "learning_rate": 4.497348246900903e-06, "loss": 0.0302, "step": 47405 }, { "epoch": 0.19782026353781576, "grad_norm": 0.9311264926559116, "learning_rate": 4.4971110837656544e-06, "loss": 0.0365, "step": 47410 }, { "epoch": 0.19784112625280603, "grad_norm": 1.1221956954081218, "learning_rate": 4.496873958146128e-06, "loss": 0.0451, "step": 47415 }, { "epoch": 0.1978619889677963, "grad_norm": 0.656613723802619, "learning_rate": 4.496636870032432e-06, "loss": 0.0332, "step": 47420 }, { "epoch": 0.1978828516827866, "grad_norm": 0.4484517425406317, "learning_rate": 4.496399819414682e-06, "loss": 0.0269, "step": 47425 }, { "epoch": 0.19790371439777688, "grad_norm": 0.35777754825778296, "learning_rate": 4.4961628062829965e-06, "loss": 0.0298, "step": 47430 }, { "epoch": 0.19792457711276715, "grad_norm": 0.6680169071474981, "learning_rate": 4.495925830627494e-06, "loss": 0.0439, "step": 47435 }, { "epoch": 0.19794543982775742, "grad_norm": 0.9056181709340168, "learning_rate": 4.495688892438302e-06, "loss": 0.0319, "step": 47440 }, { "epoch": 0.1979663025427477, "grad_norm": 0.7814165147401453, "learning_rate": 4.4954519917055475e-06, "loss": 0.0541, "step": 47445 }, { "epoch": 0.197987165257738, "grad_norm": 0.7469832218634657, "learning_rate": 4.495215128419362e-06, "loss": 0.0277, "step": 47450 }, { "epoch": 0.19800802797272826, "grad_norm": 0.9336595876251936, "learning_rate": 4.494978302569883e-06, "loss": 0.0358, "step": 47455 }, { "epoch": 0.19802889068771853, "grad_norm": 1.3080485298204936, "learning_rate": 4.494741514147251e-06, "loss": 0.047, "step": 47460 }, { "epoch": 0.1980497534027088, "grad_norm": 0.6471319464297857, "learning_rate": 4.494504763141606e-06, "loss": 0.0275, "step": 47465 }, { "epoch": 0.1980706161176991, "grad_norm": 0.936117169957264, "learning_rate": 4.494268049543097e-06, "loss": 0.0323, "step": 47470 }, { "epoch": 0.19809147883268938, "grad_norm": 0.6475115515278294, "learning_rate": 4.4940313733418725e-06, "loss": 0.0249, "step": 47475 }, { "epoch": 0.19811234154767965, "grad_norm": 0.7408429046078959, "learning_rate": 4.493794734528088e-06, "loss": 0.0256, "step": 47480 }, { "epoch": 0.19813320426266992, "grad_norm": 0.7002088249520689, "learning_rate": 4.4935581330919015e-06, "loss": 0.0475, "step": 47485 }, { "epoch": 0.1981540669776602, "grad_norm": 0.7235559379727161, "learning_rate": 4.4933215690234734e-06, "loss": 0.0292, "step": 47490 }, { "epoch": 0.1981749296926505, "grad_norm": 1.1953708057820827, "learning_rate": 4.493085042312968e-06, "loss": 0.0402, "step": 47495 }, { "epoch": 0.19819579240764076, "grad_norm": 1.9420492748633937, "learning_rate": 4.492848552950554e-06, "loss": 0.0302, "step": 47500 }, { "epoch": 0.19821665512263104, "grad_norm": 1.577444137187086, "learning_rate": 4.492612100926405e-06, "loss": 0.0318, "step": 47505 }, { "epoch": 0.1982375178376213, "grad_norm": 5.341775950894509, "learning_rate": 4.492375686230696e-06, "loss": 0.0347, "step": 47510 }, { "epoch": 0.1982583805526116, "grad_norm": 0.8672568724257859, "learning_rate": 4.4921393088536055e-06, "loss": 0.0378, "step": 47515 }, { "epoch": 0.19827924326760188, "grad_norm": 1.3023580646751958, "learning_rate": 4.491902968785317e-06, "loss": 0.027, "step": 47520 }, { "epoch": 0.19830010598259215, "grad_norm": 1.1054081546210603, "learning_rate": 4.491666666016017e-06, "loss": 0.0378, "step": 47525 }, { "epoch": 0.19832096869758242, "grad_norm": 1.077536764992031, "learning_rate": 4.491430400535897e-06, "loss": 0.0392, "step": 47530 }, { "epoch": 0.1983418314125727, "grad_norm": 0.7002244145522167, "learning_rate": 4.491194172335148e-06, "loss": 0.0366, "step": 47535 }, { "epoch": 0.198362694127563, "grad_norm": 0.8278010634862828, "learning_rate": 4.490957981403971e-06, "loss": 0.0345, "step": 47540 }, { "epoch": 0.19838355684255327, "grad_norm": 0.8947484975440729, "learning_rate": 4.490721827732565e-06, "loss": 0.0283, "step": 47545 }, { "epoch": 0.19840441955754354, "grad_norm": 1.1189108407038388, "learning_rate": 4.490485711311133e-06, "loss": 0.0333, "step": 47550 }, { "epoch": 0.1984252822725338, "grad_norm": 0.7097583063617177, "learning_rate": 4.490249632129887e-06, "loss": 0.0404, "step": 47555 }, { "epoch": 0.1984461449875241, "grad_norm": 0.4859212554476236, "learning_rate": 4.490013590179036e-06, "loss": 0.0294, "step": 47560 }, { "epoch": 0.19846700770251438, "grad_norm": 0.6058637326869662, "learning_rate": 4.489777585448795e-06, "loss": 0.0234, "step": 47565 }, { "epoch": 0.19848787041750465, "grad_norm": 1.1255938572213742, "learning_rate": 4.489541617929386e-06, "loss": 0.0311, "step": 47570 }, { "epoch": 0.19850873313249492, "grad_norm": 0.8361558096636262, "learning_rate": 4.4893056876110304e-06, "loss": 0.0404, "step": 47575 }, { "epoch": 0.1985295958474852, "grad_norm": 1.0695748444092994, "learning_rate": 4.489069794483954e-06, "loss": 0.0329, "step": 47580 }, { "epoch": 0.1985504585624755, "grad_norm": 0.902360993959902, "learning_rate": 4.488833938538387e-06, "loss": 0.0392, "step": 47585 }, { "epoch": 0.19857132127746577, "grad_norm": 0.8962425273088327, "learning_rate": 4.488598119764562e-06, "loss": 0.0381, "step": 47590 }, { "epoch": 0.19859218399245604, "grad_norm": 0.6093568588326783, "learning_rate": 4.4883623381527154e-06, "loss": 0.0304, "step": 47595 }, { "epoch": 0.1986130467074463, "grad_norm": 1.1612803410112007, "learning_rate": 4.488126593693091e-06, "loss": 0.0339, "step": 47600 }, { "epoch": 0.1986339094224366, "grad_norm": 1.2486065985990247, "learning_rate": 4.487890886375931e-06, "loss": 0.0396, "step": 47605 }, { "epoch": 0.19865477213742688, "grad_norm": 0.8241498858367892, "learning_rate": 4.487655216191482e-06, "loss": 0.0434, "step": 47610 }, { "epoch": 0.19867563485241715, "grad_norm": 0.9906376723720052, "learning_rate": 4.4874195831299975e-06, "loss": 0.0345, "step": 47615 }, { "epoch": 0.19869649756740743, "grad_norm": 0.7408168188549141, "learning_rate": 4.487183987181731e-06, "loss": 0.036, "step": 47620 }, { "epoch": 0.1987173602823977, "grad_norm": 0.9153199075325863, "learning_rate": 4.486948428336942e-06, "loss": 0.0418, "step": 47625 }, { "epoch": 0.198738222997388, "grad_norm": 0.8561062215227269, "learning_rate": 4.486712906585892e-06, "loss": 0.0307, "step": 47630 }, { "epoch": 0.19875908571237827, "grad_norm": 1.3163494783561607, "learning_rate": 4.486477421918846e-06, "loss": 0.0411, "step": 47635 }, { "epoch": 0.19877994842736854, "grad_norm": 0.8708965270949993, "learning_rate": 4.486241974326076e-06, "loss": 0.0349, "step": 47640 }, { "epoch": 0.1988008111423588, "grad_norm": 0.8973580621069308, "learning_rate": 4.486006563797853e-06, "loss": 0.0468, "step": 47645 }, { "epoch": 0.1988216738573491, "grad_norm": 0.8404393414986442, "learning_rate": 4.485771190324452e-06, "loss": 0.0406, "step": 47650 }, { "epoch": 0.19884253657233938, "grad_norm": 0.9195897739585092, "learning_rate": 4.485535853896155e-06, "loss": 0.0326, "step": 47655 }, { "epoch": 0.19886339928732966, "grad_norm": 0.8015868203878854, "learning_rate": 4.4853005545032445e-06, "loss": 0.031, "step": 47660 }, { "epoch": 0.19888426200231993, "grad_norm": 0.8872565057630504, "learning_rate": 4.485065292136009e-06, "loss": 0.0304, "step": 47665 }, { "epoch": 0.1989051247173102, "grad_norm": 1.255666884479541, "learning_rate": 4.484830066784738e-06, "loss": 0.0308, "step": 47670 }, { "epoch": 0.1989259874323005, "grad_norm": 1.1225322752127058, "learning_rate": 4.484594878439726e-06, "loss": 0.0356, "step": 47675 }, { "epoch": 0.19894685014729077, "grad_norm": 0.8077937338868666, "learning_rate": 4.484359727091271e-06, "loss": 0.0288, "step": 47680 }, { "epoch": 0.19896771286228104, "grad_norm": 2.0785484265535277, "learning_rate": 4.484124612729675e-06, "loss": 0.0433, "step": 47685 }, { "epoch": 0.19898857557727131, "grad_norm": 0.6677210242686458, "learning_rate": 4.483889535345241e-06, "loss": 0.0363, "step": 47690 }, { "epoch": 0.19900943829226161, "grad_norm": 0.8856092749997739, "learning_rate": 4.483654494928278e-06, "loss": 0.0325, "step": 47695 }, { "epoch": 0.1990303010072519, "grad_norm": 1.664736424604811, "learning_rate": 4.4834194914691e-06, "loss": 0.0369, "step": 47700 }, { "epoch": 0.19905116372224216, "grad_norm": 0.766482336278506, "learning_rate": 4.483184524958022e-06, "loss": 0.0366, "step": 47705 }, { "epoch": 0.19907202643723243, "grad_norm": 0.8345906131877399, "learning_rate": 4.4829495953853605e-06, "loss": 0.0242, "step": 47710 }, { "epoch": 0.1990928891522227, "grad_norm": 0.7077502305554573, "learning_rate": 4.482714702741442e-06, "loss": 0.0337, "step": 47715 }, { "epoch": 0.199113751867213, "grad_norm": 0.8475300535788317, "learning_rate": 4.48247984701659e-06, "loss": 0.0434, "step": 47720 }, { "epoch": 0.19913461458220327, "grad_norm": 0.6849108974370609, "learning_rate": 4.482245028201136e-06, "loss": 0.0308, "step": 47725 }, { "epoch": 0.19915547729719354, "grad_norm": 1.0268882396524164, "learning_rate": 4.482010246285412e-06, "loss": 0.0299, "step": 47730 }, { "epoch": 0.19917634001218382, "grad_norm": 0.464513508150137, "learning_rate": 4.481775501259756e-06, "loss": 0.0305, "step": 47735 }, { "epoch": 0.19919720272717412, "grad_norm": 0.6615590484059614, "learning_rate": 4.481540793114507e-06, "loss": 0.0285, "step": 47740 }, { "epoch": 0.1992180654421644, "grad_norm": 1.5380476759717552, "learning_rate": 4.4813061218400105e-06, "loss": 0.0496, "step": 47745 }, { "epoch": 0.19923892815715466, "grad_norm": 0.9730657700839407, "learning_rate": 4.481071487426613e-06, "loss": 0.0344, "step": 47750 }, { "epoch": 0.19925979087214493, "grad_norm": 0.7791977753398276, "learning_rate": 4.480836889864668e-06, "loss": 0.0483, "step": 47755 }, { "epoch": 0.1992806535871352, "grad_norm": 0.6324476062915138, "learning_rate": 4.480602329144525e-06, "loss": 0.0318, "step": 47760 }, { "epoch": 0.1993015163021255, "grad_norm": 2.384223094556328, "learning_rate": 4.480367805256547e-06, "loss": 0.0403, "step": 47765 }, { "epoch": 0.19932237901711577, "grad_norm": 1.0010649397187137, "learning_rate": 4.480133318191094e-06, "loss": 0.0403, "step": 47770 }, { "epoch": 0.19934324173210605, "grad_norm": 1.5543627734797374, "learning_rate": 4.479898867938531e-06, "loss": 0.0334, "step": 47775 }, { "epoch": 0.19936410444709632, "grad_norm": 1.6600513863846584, "learning_rate": 4.4796644544892256e-06, "loss": 0.0373, "step": 47780 }, { "epoch": 0.19938496716208662, "grad_norm": 0.78417785408279, "learning_rate": 4.479430077833553e-06, "loss": 0.0295, "step": 47785 }, { "epoch": 0.1994058298770769, "grad_norm": 1.0665668130553994, "learning_rate": 4.479195737961886e-06, "loss": 0.0293, "step": 47790 }, { "epoch": 0.19942669259206716, "grad_norm": 0.8514495656645039, "learning_rate": 4.478961434864604e-06, "loss": 0.0271, "step": 47795 }, { "epoch": 0.19944755530705743, "grad_norm": 0.989458728457799, "learning_rate": 4.478727168532093e-06, "loss": 0.0462, "step": 47800 }, { "epoch": 0.1994684180220477, "grad_norm": 2.106816532895123, "learning_rate": 4.478492938954736e-06, "loss": 0.0308, "step": 47805 }, { "epoch": 0.199489280737038, "grad_norm": 0.5009867067155132, "learning_rate": 4.478258746122924e-06, "loss": 0.0335, "step": 47810 }, { "epoch": 0.19951014345202828, "grad_norm": 4.9157635597790446, "learning_rate": 4.4780245900270515e-06, "loss": 0.0456, "step": 47815 }, { "epoch": 0.19953100616701855, "grad_norm": 0.5099706223878373, "learning_rate": 4.477790470657513e-06, "loss": 0.0305, "step": 47820 }, { "epoch": 0.19955186888200882, "grad_norm": 0.8206912545337425, "learning_rate": 4.477556388004711e-06, "loss": 0.0455, "step": 47825 }, { "epoch": 0.19957273159699912, "grad_norm": 0.695860421586087, "learning_rate": 4.477322342059049e-06, "loss": 0.043, "step": 47830 }, { "epoch": 0.1995935943119894, "grad_norm": 1.233768235595586, "learning_rate": 4.4770883328109336e-06, "loss": 0.045, "step": 47835 }, { "epoch": 0.19961445702697966, "grad_norm": 0.7806180360406392, "learning_rate": 4.476854360250776e-06, "loss": 0.0389, "step": 47840 }, { "epoch": 0.19963531974196994, "grad_norm": 0.521712043735111, "learning_rate": 4.476620424368991e-06, "loss": 0.0302, "step": 47845 }, { "epoch": 0.1996561824569602, "grad_norm": 0.4516309812576061, "learning_rate": 4.4763865251559965e-06, "loss": 0.0349, "step": 47850 }, { "epoch": 0.1996770451719505, "grad_norm": 0.5128755238154364, "learning_rate": 4.476152662602214e-06, "loss": 0.0328, "step": 47855 }, { "epoch": 0.19969790788694078, "grad_norm": 0.7799701526032556, "learning_rate": 4.475918836698068e-06, "loss": 0.0392, "step": 47860 }, { "epoch": 0.19971877060193105, "grad_norm": 0.8421225513435485, "learning_rate": 4.475685047433987e-06, "loss": 0.0317, "step": 47865 }, { "epoch": 0.19973963331692132, "grad_norm": 0.6508033036293891, "learning_rate": 4.475451294800403e-06, "loss": 0.0337, "step": 47870 }, { "epoch": 0.19976049603191162, "grad_norm": 0.5834699763614484, "learning_rate": 4.4752175787877525e-06, "loss": 0.0377, "step": 47875 }, { "epoch": 0.1997813587469019, "grad_norm": 0.8997865787911147, "learning_rate": 4.474983899386474e-06, "loss": 0.0266, "step": 47880 }, { "epoch": 0.19980222146189217, "grad_norm": 1.4367747130011537, "learning_rate": 4.474750256587008e-06, "loss": 0.0308, "step": 47885 }, { "epoch": 0.19982308417688244, "grad_norm": 1.4008122003009564, "learning_rate": 4.474516650379803e-06, "loss": 0.0371, "step": 47890 }, { "epoch": 0.1998439468918727, "grad_norm": 0.9946651386818692, "learning_rate": 4.474283080755308e-06, "loss": 0.0536, "step": 47895 }, { "epoch": 0.199864809606863, "grad_norm": 0.6793960874186264, "learning_rate": 4.474049547703974e-06, "loss": 0.0384, "step": 47900 }, { "epoch": 0.19988567232185328, "grad_norm": 1.0389864387559673, "learning_rate": 4.4738160512162595e-06, "loss": 0.0348, "step": 47905 }, { "epoch": 0.19990653503684355, "grad_norm": 0.6553415026349927, "learning_rate": 4.4735825912826234e-06, "loss": 0.0519, "step": 47910 }, { "epoch": 0.19992739775183382, "grad_norm": 0.7413485287123475, "learning_rate": 4.47334916789353e-06, "loss": 0.0377, "step": 47915 }, { "epoch": 0.19994826046682412, "grad_norm": 0.4701752520855453, "learning_rate": 4.473115781039444e-06, "loss": 0.03, "step": 47920 }, { "epoch": 0.1999691231818144, "grad_norm": 0.7769073881220041, "learning_rate": 4.4728824307108385e-06, "loss": 0.0408, "step": 47925 }, { "epoch": 0.19998998589680467, "grad_norm": 1.0379230309804786, "learning_rate": 4.472649116898184e-06, "loss": 0.0357, "step": 47930 }, { "epoch": 0.20001084861179494, "grad_norm": 0.8045535917382443, "learning_rate": 4.472415839591962e-06, "loss": 0.0307, "step": 47935 }, { "epoch": 0.2000317113267852, "grad_norm": 0.6216634721031618, "learning_rate": 4.472182598782651e-06, "loss": 0.0296, "step": 47940 }, { "epoch": 0.2000525740417755, "grad_norm": 0.8517688109255809, "learning_rate": 4.471949394460734e-06, "loss": 0.0338, "step": 47945 }, { "epoch": 0.20007343675676578, "grad_norm": 0.7135696444083023, "learning_rate": 4.4717162266167e-06, "loss": 0.038, "step": 47950 }, { "epoch": 0.20009429947175605, "grad_norm": 0.9461787199816357, "learning_rate": 4.471483095241041e-06, "loss": 0.0303, "step": 47955 }, { "epoch": 0.20011516218674633, "grad_norm": 1.1270616732591954, "learning_rate": 4.471250000324251e-06, "loss": 0.034, "step": 47960 }, { "epoch": 0.20013602490173663, "grad_norm": 1.0944092108555232, "learning_rate": 4.471016941856828e-06, "loss": 0.0378, "step": 47965 }, { "epoch": 0.2001568876167269, "grad_norm": 0.8720781967727876, "learning_rate": 4.4707839198292735e-06, "loss": 0.0353, "step": 47970 }, { "epoch": 0.20017775033171717, "grad_norm": 0.5393022402091228, "learning_rate": 4.470550934232092e-06, "loss": 0.0352, "step": 47975 }, { "epoch": 0.20019861304670744, "grad_norm": 0.6109658080103127, "learning_rate": 4.470317985055794e-06, "loss": 0.0334, "step": 47980 }, { "epoch": 0.2002194757616977, "grad_norm": 1.6443591429657212, "learning_rate": 4.470085072290889e-06, "loss": 0.0469, "step": 47985 }, { "epoch": 0.200240338476688, "grad_norm": 0.8761783892241746, "learning_rate": 4.469852195927894e-06, "loss": 0.0308, "step": 47990 }, { "epoch": 0.20026120119167828, "grad_norm": 1.3342492247628934, "learning_rate": 4.469619355957327e-06, "loss": 0.0414, "step": 47995 }, { "epoch": 0.20028206390666856, "grad_norm": 0.8361491539270431, "learning_rate": 4.469386552369712e-06, "loss": 0.037, "step": 48000 }, { "epoch": 0.20030292662165883, "grad_norm": 0.9123516728188905, "learning_rate": 4.4691537851555736e-06, "loss": 0.0373, "step": 48005 }, { "epoch": 0.20032378933664913, "grad_norm": 1.0638498959246234, "learning_rate": 4.468921054305441e-06, "loss": 0.0359, "step": 48010 }, { "epoch": 0.2003446520516394, "grad_norm": 0.7330016954316385, "learning_rate": 4.468688359809846e-06, "loss": 0.0263, "step": 48015 }, { "epoch": 0.20036551476662967, "grad_norm": 0.73915763997371, "learning_rate": 4.468455701659327e-06, "loss": 0.0297, "step": 48020 }, { "epoch": 0.20038637748161994, "grad_norm": 0.9883080425240991, "learning_rate": 4.468223079844422e-06, "loss": 0.0478, "step": 48025 }, { "epoch": 0.20040724019661021, "grad_norm": 0.9663394172574811, "learning_rate": 4.467990494355676e-06, "loss": 0.0333, "step": 48030 }, { "epoch": 0.20042810291160051, "grad_norm": 1.3013063001261684, "learning_rate": 4.4677579451836325e-06, "loss": 0.0403, "step": 48035 }, { "epoch": 0.20044896562659079, "grad_norm": 0.3991827875508219, "learning_rate": 4.4675254323188435e-06, "loss": 0.0241, "step": 48040 }, { "epoch": 0.20046982834158106, "grad_norm": 0.7302963058881331, "learning_rate": 4.4672929557518615e-06, "loss": 0.0347, "step": 48045 }, { "epoch": 0.20049069105657133, "grad_norm": 1.0243340942965014, "learning_rate": 4.467060515473244e-06, "loss": 0.0415, "step": 48050 }, { "epoch": 0.20051155377156163, "grad_norm": 1.007571729333961, "learning_rate": 4.466828111473551e-06, "loss": 0.0381, "step": 48055 }, { "epoch": 0.2005324164865519, "grad_norm": 0.7747282868479308, "learning_rate": 4.466595743743347e-06, "loss": 0.0364, "step": 48060 }, { "epoch": 0.20055327920154217, "grad_norm": 0.802942684907909, "learning_rate": 4.466363412273197e-06, "loss": 0.0278, "step": 48065 }, { "epoch": 0.20057414191653244, "grad_norm": 1.366798065930631, "learning_rate": 4.466131117053674e-06, "loss": 0.0373, "step": 48070 }, { "epoch": 0.20059500463152272, "grad_norm": 0.9736077682835795, "learning_rate": 4.46589885807535e-06, "loss": 0.0471, "step": 48075 }, { "epoch": 0.20061586734651302, "grad_norm": 0.9226312207625542, "learning_rate": 4.465666635328806e-06, "loss": 0.0387, "step": 48080 }, { "epoch": 0.2006367300615033, "grad_norm": 0.7452411452789556, "learning_rate": 4.465434448804618e-06, "loss": 0.0322, "step": 48085 }, { "epoch": 0.20065759277649356, "grad_norm": 0.9699654386267902, "learning_rate": 4.465202298493373e-06, "loss": 0.0283, "step": 48090 }, { "epoch": 0.20067845549148383, "grad_norm": 1.0116989488772707, "learning_rate": 4.464970184385659e-06, "loss": 0.0383, "step": 48095 }, { "epoch": 0.20069931820647413, "grad_norm": 1.2768859776789985, "learning_rate": 4.4647381064720665e-06, "loss": 0.0425, "step": 48100 }, { "epoch": 0.2007201809214644, "grad_norm": 1.2910244414923817, "learning_rate": 4.464506064743189e-06, "loss": 0.0502, "step": 48105 }, { "epoch": 0.20074104363645467, "grad_norm": 0.6335508871384771, "learning_rate": 4.464274059189628e-06, "loss": 0.0303, "step": 48110 }, { "epoch": 0.20076190635144495, "grad_norm": 1.1703711357451914, "learning_rate": 4.464042089801981e-06, "loss": 0.0356, "step": 48115 }, { "epoch": 0.20078276906643522, "grad_norm": 2.8669110296506037, "learning_rate": 4.463810156570855e-06, "loss": 0.0354, "step": 48120 }, { "epoch": 0.20080363178142552, "grad_norm": 0.658915729577203, "learning_rate": 4.463578259486856e-06, "loss": 0.0348, "step": 48125 }, { "epoch": 0.2008244944964158, "grad_norm": 0.47790824049845515, "learning_rate": 4.4633463985405985e-06, "loss": 0.0306, "step": 48130 }, { "epoch": 0.20084535721140606, "grad_norm": 0.9173534924835305, "learning_rate": 4.463114573722697e-06, "loss": 0.0343, "step": 48135 }, { "epoch": 0.20086621992639633, "grad_norm": 0.6946891374150662, "learning_rate": 4.462882785023768e-06, "loss": 0.0364, "step": 48140 }, { "epoch": 0.20088708264138663, "grad_norm": 0.8891052622944131, "learning_rate": 4.462651032434436e-06, "loss": 0.0383, "step": 48145 }, { "epoch": 0.2009079453563769, "grad_norm": 1.0300163516563556, "learning_rate": 4.462419315945325e-06, "loss": 0.0386, "step": 48150 }, { "epoch": 0.20092880807136718, "grad_norm": 0.9832368572429581, "learning_rate": 4.462187635547063e-06, "loss": 0.0382, "step": 48155 }, { "epoch": 0.20094967078635745, "grad_norm": 0.9941218709994588, "learning_rate": 4.461955991230284e-06, "loss": 0.0364, "step": 48160 }, { "epoch": 0.20097053350134772, "grad_norm": 0.8851061136644628, "learning_rate": 4.4617243829856225e-06, "loss": 0.033, "step": 48165 }, { "epoch": 0.20099139621633802, "grad_norm": 1.0393831982263044, "learning_rate": 4.461492810803717e-06, "loss": 0.0354, "step": 48170 }, { "epoch": 0.2010122589313283, "grad_norm": 0.8650536931396947, "learning_rate": 4.4612612746752105e-06, "loss": 0.0319, "step": 48175 }, { "epoch": 0.20103312164631856, "grad_norm": 0.7106743753171119, "learning_rate": 4.461029774590748e-06, "loss": 0.032, "step": 48180 }, { "epoch": 0.20105398436130884, "grad_norm": 0.9061862576516581, "learning_rate": 4.460798310540979e-06, "loss": 0.0296, "step": 48185 }, { "epoch": 0.20107484707629913, "grad_norm": 0.44815484495399444, "learning_rate": 4.460566882516557e-06, "loss": 0.0332, "step": 48190 }, { "epoch": 0.2010957097912894, "grad_norm": 0.8949787568203501, "learning_rate": 4.460335490508136e-06, "loss": 0.0419, "step": 48195 }, { "epoch": 0.20111657250627968, "grad_norm": 0.8582869447141499, "learning_rate": 4.460104134506379e-06, "loss": 0.0309, "step": 48200 }, { "epoch": 0.20113743522126995, "grad_norm": 0.9324901329859646, "learning_rate": 4.459872814501943e-06, "loss": 0.04, "step": 48205 }, { "epoch": 0.20115829793626022, "grad_norm": 1.2635216143136105, "learning_rate": 4.459641530485499e-06, "loss": 0.0495, "step": 48210 }, { "epoch": 0.20117916065125052, "grad_norm": 0.7584062119286021, "learning_rate": 4.459410282447714e-06, "loss": 0.0307, "step": 48215 }, { "epoch": 0.2012000233662408, "grad_norm": 1.3937410141159494, "learning_rate": 4.459179070379262e-06, "loss": 0.0337, "step": 48220 }, { "epoch": 0.20122088608123107, "grad_norm": 1.096580678540363, "learning_rate": 4.458947894270818e-06, "loss": 0.0352, "step": 48225 }, { "epoch": 0.20124174879622134, "grad_norm": 1.0264905495699421, "learning_rate": 4.458716754113063e-06, "loss": 0.034, "step": 48230 }, { "epoch": 0.20126261151121164, "grad_norm": 0.6537692926537615, "learning_rate": 4.458485649896681e-06, "loss": 0.0274, "step": 48235 }, { "epoch": 0.2012834742262019, "grad_norm": 0.7664991814122597, "learning_rate": 4.4582545816123545e-06, "loss": 0.0299, "step": 48240 }, { "epoch": 0.20130433694119218, "grad_norm": 0.6301554428264673, "learning_rate": 4.458023549250777e-06, "loss": 0.0307, "step": 48245 }, { "epoch": 0.20132519965618245, "grad_norm": 0.7656877982579238, "learning_rate": 4.457792552802639e-06, "loss": 0.0314, "step": 48250 }, { "epoch": 0.20134606237117272, "grad_norm": 0.618684650720241, "learning_rate": 4.457561592258639e-06, "loss": 0.0287, "step": 48255 }, { "epoch": 0.20136692508616302, "grad_norm": 0.6511328194002682, "learning_rate": 4.4573306676094775e-06, "loss": 0.0309, "step": 48260 }, { "epoch": 0.2013877878011533, "grad_norm": 1.0911293672541444, "learning_rate": 4.4570997788458555e-06, "loss": 0.0348, "step": 48265 }, { "epoch": 0.20140865051614357, "grad_norm": 1.4543462961288454, "learning_rate": 4.456868925958481e-06, "loss": 0.0299, "step": 48270 }, { "epoch": 0.20142951323113384, "grad_norm": 0.7835793249990819, "learning_rate": 4.456638108938063e-06, "loss": 0.0363, "step": 48275 }, { "epoch": 0.20145037594612414, "grad_norm": 1.0621759388589063, "learning_rate": 4.456407327775316e-06, "loss": 0.0344, "step": 48280 }, { "epoch": 0.2014712386611144, "grad_norm": 1.817032628668737, "learning_rate": 4.456176582460955e-06, "loss": 0.0358, "step": 48285 }, { "epoch": 0.20149210137610468, "grad_norm": 0.6664020533766112, "learning_rate": 4.455945872985703e-06, "loss": 0.0276, "step": 48290 }, { "epoch": 0.20151296409109495, "grad_norm": 0.7724506986986456, "learning_rate": 4.455715199340282e-06, "loss": 0.0319, "step": 48295 }, { "epoch": 0.20153382680608523, "grad_norm": 0.8962172717383998, "learning_rate": 4.455484561515419e-06, "loss": 0.0386, "step": 48300 }, { "epoch": 0.20155468952107553, "grad_norm": 0.8619634905056318, "learning_rate": 4.455253959501842e-06, "loss": 0.0469, "step": 48305 }, { "epoch": 0.2015755522360658, "grad_norm": 1.3502477364233707, "learning_rate": 4.455023393290288e-06, "loss": 0.0395, "step": 48310 }, { "epoch": 0.20159641495105607, "grad_norm": 0.8579944881255709, "learning_rate": 4.454792862871492e-06, "loss": 0.0297, "step": 48315 }, { "epoch": 0.20161727766604634, "grad_norm": 0.6471400177319859, "learning_rate": 4.4545623682361955e-06, "loss": 0.0321, "step": 48320 }, { "epoch": 0.20163814038103664, "grad_norm": 1.0591186134096917, "learning_rate": 4.454331909375139e-06, "loss": 0.0331, "step": 48325 }, { "epoch": 0.2016590030960269, "grad_norm": 0.792684488694675, "learning_rate": 4.4541014862790744e-06, "loss": 0.0292, "step": 48330 }, { "epoch": 0.20167986581101718, "grad_norm": 1.4831199777595858, "learning_rate": 4.453871098938748e-06, "loss": 0.0306, "step": 48335 }, { "epoch": 0.20170072852600746, "grad_norm": 1.0949294720227185, "learning_rate": 4.453640747344916e-06, "loss": 0.0333, "step": 48340 }, { "epoch": 0.20172159124099773, "grad_norm": 1.2717716269748403, "learning_rate": 4.453410431488332e-06, "loss": 0.04, "step": 48345 }, { "epoch": 0.20174245395598803, "grad_norm": 1.0459381940360535, "learning_rate": 4.453180151359761e-06, "loss": 0.0434, "step": 48350 }, { "epoch": 0.2017633166709783, "grad_norm": 0.9287190536493125, "learning_rate": 4.452949906949964e-06, "loss": 0.0278, "step": 48355 }, { "epoch": 0.20178417938596857, "grad_norm": 0.8353198773402173, "learning_rate": 4.452719698249707e-06, "loss": 0.0501, "step": 48360 }, { "epoch": 0.20180504210095884, "grad_norm": 0.7135720482640763, "learning_rate": 4.452489525249762e-06, "loss": 0.0327, "step": 48365 }, { "epoch": 0.20182590481594914, "grad_norm": 1.2059198973736656, "learning_rate": 4.4522593879409036e-06, "loss": 0.0346, "step": 48370 }, { "epoch": 0.2018467675309394, "grad_norm": 1.201664109661957, "learning_rate": 4.4520292863139075e-06, "loss": 0.0433, "step": 48375 }, { "epoch": 0.20186763024592969, "grad_norm": 0.8492656534978554, "learning_rate": 4.451799220359554e-06, "loss": 0.0307, "step": 48380 }, { "epoch": 0.20188849296091996, "grad_norm": 0.7681972460994136, "learning_rate": 4.4515691900686284e-06, "loss": 0.042, "step": 48385 }, { "epoch": 0.20190935567591023, "grad_norm": 0.5489975380403359, "learning_rate": 4.451339195431915e-06, "loss": 0.0307, "step": 48390 }, { "epoch": 0.20193021839090053, "grad_norm": 0.7660822072064163, "learning_rate": 4.451109236440206e-06, "loss": 0.0273, "step": 48395 }, { "epoch": 0.2019510811058908, "grad_norm": 1.4518065529647, "learning_rate": 4.450879313084296e-06, "loss": 0.0416, "step": 48400 }, { "epoch": 0.20197194382088107, "grad_norm": 0.5522499801620642, "learning_rate": 4.45064942535498e-06, "loss": 0.0287, "step": 48405 }, { "epoch": 0.20199280653587134, "grad_norm": 1.1042873254985897, "learning_rate": 4.45041957324306e-06, "loss": 0.0303, "step": 48410 }, { "epoch": 0.20201366925086164, "grad_norm": 1.1521994156904338, "learning_rate": 4.450189756739339e-06, "loss": 0.0334, "step": 48415 }, { "epoch": 0.20203453196585192, "grad_norm": 0.5783725835339459, "learning_rate": 4.449959975834624e-06, "loss": 0.0319, "step": 48420 }, { "epoch": 0.2020553946808422, "grad_norm": 0.7131615128520815, "learning_rate": 4.4497302305197256e-06, "loss": 0.0338, "step": 48425 }, { "epoch": 0.20207625739583246, "grad_norm": 1.3619709252727894, "learning_rate": 4.4495005207854576e-06, "loss": 0.0414, "step": 48430 }, { "epoch": 0.20209712011082273, "grad_norm": 0.5046814001682715, "learning_rate": 4.4492708466226355e-06, "loss": 0.0341, "step": 48435 }, { "epoch": 0.20211798282581303, "grad_norm": 0.9484644573120216, "learning_rate": 4.449041208022083e-06, "loss": 0.0323, "step": 48440 }, { "epoch": 0.2021388455408033, "grad_norm": 0.7141298411890298, "learning_rate": 4.4488116049746205e-06, "loss": 0.0316, "step": 48445 }, { "epoch": 0.20215970825579357, "grad_norm": 1.3157036800972757, "learning_rate": 4.448582037471076e-06, "loss": 0.0433, "step": 48450 }, { "epoch": 0.20218057097078385, "grad_norm": 1.5090468516823852, "learning_rate": 4.448352505502279e-06, "loss": 0.0409, "step": 48455 }, { "epoch": 0.20220143368577415, "grad_norm": 0.7464799887637973, "learning_rate": 4.448123009059066e-06, "loss": 0.0315, "step": 48460 }, { "epoch": 0.20222229640076442, "grad_norm": 0.9041753498204186, "learning_rate": 4.44789354813227e-06, "loss": 0.0344, "step": 48465 }, { "epoch": 0.2022431591157547, "grad_norm": 1.3293683400616605, "learning_rate": 4.447664122712734e-06, "loss": 0.0467, "step": 48470 }, { "epoch": 0.20226402183074496, "grad_norm": 0.8604177020306287, "learning_rate": 4.4474347327913e-06, "loss": 0.0424, "step": 48475 }, { "epoch": 0.20228488454573523, "grad_norm": 0.7114196783662712, "learning_rate": 4.447205378358815e-06, "loss": 0.0453, "step": 48480 }, { "epoch": 0.20230574726072553, "grad_norm": 0.8534063564207932, "learning_rate": 4.446976059406131e-06, "loss": 0.0297, "step": 48485 }, { "epoch": 0.2023266099757158, "grad_norm": 1.1993121670548672, "learning_rate": 4.446746775924099e-06, "loss": 0.0331, "step": 48490 }, { "epoch": 0.20234747269070608, "grad_norm": 0.8027308972436972, "learning_rate": 4.446517527903576e-06, "loss": 0.0379, "step": 48495 }, { "epoch": 0.20236833540569635, "grad_norm": 3.0010774322463383, "learning_rate": 4.4462883153354235e-06, "loss": 0.0331, "step": 48500 }, { "epoch": 0.20238919812068665, "grad_norm": 1.1940345873846265, "learning_rate": 4.446059138210503e-06, "loss": 0.0331, "step": 48505 }, { "epoch": 0.20241006083567692, "grad_norm": 1.3295187134573003, "learning_rate": 4.445829996519682e-06, "loss": 0.0422, "step": 48510 }, { "epoch": 0.2024309235506672, "grad_norm": 1.0955088377507742, "learning_rate": 4.445600890253831e-06, "loss": 0.0394, "step": 48515 }, { "epoch": 0.20245178626565746, "grad_norm": 0.6161342517950505, "learning_rate": 4.445371819403822e-06, "loss": 0.031, "step": 48520 }, { "epoch": 0.20247264898064773, "grad_norm": 0.8627892430046381, "learning_rate": 4.445142783960532e-06, "loss": 0.0347, "step": 48525 }, { "epoch": 0.20249351169563803, "grad_norm": 0.5479787251434207, "learning_rate": 4.4449137839148414e-06, "loss": 0.0296, "step": 48530 }, { "epoch": 0.2025143744106283, "grad_norm": 0.654971220146963, "learning_rate": 4.444684819257633e-06, "loss": 0.0401, "step": 48535 }, { "epoch": 0.20253523712561858, "grad_norm": 0.78468220584783, "learning_rate": 4.444455889979792e-06, "loss": 0.0401, "step": 48540 }, { "epoch": 0.20255609984060885, "grad_norm": 0.5875466810223376, "learning_rate": 4.444226996072209e-06, "loss": 0.0316, "step": 48545 }, { "epoch": 0.20257696255559912, "grad_norm": 0.8451833914879909, "learning_rate": 4.443998137525777e-06, "loss": 0.0299, "step": 48550 }, { "epoch": 0.20259782527058942, "grad_norm": 0.7893357177783178, "learning_rate": 4.443769314331392e-06, "loss": 0.0366, "step": 48555 }, { "epoch": 0.2026186879855797, "grad_norm": 0.48881796292219387, "learning_rate": 4.443540526479953e-06, "loss": 0.0305, "step": 48560 }, { "epoch": 0.20263955070056996, "grad_norm": 0.6955179653527979, "learning_rate": 4.443311773962365e-06, "loss": 0.0345, "step": 48565 }, { "epoch": 0.20266041341556024, "grad_norm": 1.112555490282734, "learning_rate": 4.4430830567695305e-06, "loss": 0.0314, "step": 48570 }, { "epoch": 0.20268127613055054, "grad_norm": 0.9054084519158259, "learning_rate": 4.442854374892361e-06, "loss": 0.0281, "step": 48575 }, { "epoch": 0.2027021388455408, "grad_norm": 1.1771626501770491, "learning_rate": 4.442625728321768e-06, "loss": 0.036, "step": 48580 }, { "epoch": 0.20272300156053108, "grad_norm": 0.7474868251363588, "learning_rate": 4.442397117048669e-06, "loss": 0.0281, "step": 48585 }, { "epoch": 0.20274386427552135, "grad_norm": 0.993404626214945, "learning_rate": 4.442168541063983e-06, "loss": 0.0315, "step": 48590 }, { "epoch": 0.20276472699051162, "grad_norm": 0.8169370771833977, "learning_rate": 4.4419400003586295e-06, "loss": 0.0407, "step": 48595 }, { "epoch": 0.20278558970550192, "grad_norm": 0.5888746701048361, "learning_rate": 4.4417114949235375e-06, "loss": 0.0318, "step": 48600 }, { "epoch": 0.2028064524204922, "grad_norm": 0.7781224387071498, "learning_rate": 4.441483024749635e-06, "loss": 0.0303, "step": 48605 }, { "epoch": 0.20282731513548247, "grad_norm": 0.8054991859646868, "learning_rate": 4.441254589827853e-06, "loss": 0.0352, "step": 48610 }, { "epoch": 0.20284817785047274, "grad_norm": 0.5832865692214436, "learning_rate": 4.441026190149129e-06, "loss": 0.0266, "step": 48615 }, { "epoch": 0.20286904056546304, "grad_norm": 0.6255366202208414, "learning_rate": 4.4407978257044e-06, "loss": 0.0314, "step": 48620 }, { "epoch": 0.2028899032804533, "grad_norm": 0.994845863643911, "learning_rate": 4.440569496484609e-06, "loss": 0.0413, "step": 48625 }, { "epoch": 0.20291076599544358, "grad_norm": 0.851616358183208, "learning_rate": 4.440341202480701e-06, "loss": 0.0373, "step": 48630 }, { "epoch": 0.20293162871043385, "grad_norm": 1.0162895809784176, "learning_rate": 4.440112943683624e-06, "loss": 0.0366, "step": 48635 }, { "epoch": 0.20295249142542413, "grad_norm": 0.7907395693599356, "learning_rate": 4.439884720084331e-06, "loss": 0.034, "step": 48640 }, { "epoch": 0.20297335414041442, "grad_norm": 1.0672360310445177, "learning_rate": 4.439656531673776e-06, "loss": 0.028, "step": 48645 }, { "epoch": 0.2029942168554047, "grad_norm": 2.0101847590951887, "learning_rate": 4.439428378442917e-06, "loss": 0.038, "step": 48650 }, { "epoch": 0.20301507957039497, "grad_norm": 0.5930809524111773, "learning_rate": 4.439200260382716e-06, "loss": 0.042, "step": 48655 }, { "epoch": 0.20303594228538524, "grad_norm": 0.6868055123736211, "learning_rate": 4.438972177484137e-06, "loss": 0.0363, "step": 48660 }, { "epoch": 0.20305680500037554, "grad_norm": 0.7599723820000772, "learning_rate": 4.438744129738151e-06, "loss": 0.0307, "step": 48665 }, { "epoch": 0.2030776677153658, "grad_norm": 0.6363998378925524, "learning_rate": 4.438516117135726e-06, "loss": 0.0315, "step": 48670 }, { "epoch": 0.20309853043035608, "grad_norm": 0.663260640897339, "learning_rate": 4.438288139667838e-06, "loss": 0.0508, "step": 48675 }, { "epoch": 0.20311939314534636, "grad_norm": 0.7727786177721163, "learning_rate": 4.438060197325464e-06, "loss": 0.0408, "step": 48680 }, { "epoch": 0.20314025586033663, "grad_norm": 0.7538225968668192, "learning_rate": 4.437832290099586e-06, "loss": 0.0328, "step": 48685 }, { "epoch": 0.20316111857532693, "grad_norm": 1.0605860809858676, "learning_rate": 4.437604417981188e-06, "loss": 0.0384, "step": 48690 }, { "epoch": 0.2031819812903172, "grad_norm": 0.6846621067860599, "learning_rate": 4.437376580961255e-06, "loss": 0.0307, "step": 48695 }, { "epoch": 0.20320284400530747, "grad_norm": 0.9790279751995806, "learning_rate": 4.437148779030782e-06, "loss": 0.0467, "step": 48700 }, { "epoch": 0.20322370672029774, "grad_norm": 0.7675053524686202, "learning_rate": 4.436921012180761e-06, "loss": 0.0309, "step": 48705 }, { "epoch": 0.20324456943528804, "grad_norm": 0.6531667441281518, "learning_rate": 4.436693280402189e-06, "loss": 0.0226, "step": 48710 }, { "epoch": 0.2032654321502783, "grad_norm": 0.7918109462643346, "learning_rate": 4.436465583686065e-06, "loss": 0.0382, "step": 48715 }, { "epoch": 0.20328629486526859, "grad_norm": 0.7950973826882423, "learning_rate": 4.436237922023396e-06, "loss": 0.0308, "step": 48720 }, { "epoch": 0.20330715758025886, "grad_norm": 1.5125184600524382, "learning_rate": 4.436010295405187e-06, "loss": 0.0361, "step": 48725 }, { "epoch": 0.20332802029524913, "grad_norm": 1.0035217698267704, "learning_rate": 4.435782703822447e-06, "loss": 0.0365, "step": 48730 }, { "epoch": 0.20334888301023943, "grad_norm": 0.5829728198642139, "learning_rate": 4.435555147266192e-06, "loss": 0.0265, "step": 48735 }, { "epoch": 0.2033697457252297, "grad_norm": 0.865925874392567, "learning_rate": 4.435327625727437e-06, "loss": 0.0398, "step": 48740 }, { "epoch": 0.20339060844021997, "grad_norm": 1.4513551655607624, "learning_rate": 4.435100139197202e-06, "loss": 0.0351, "step": 48745 }, { "epoch": 0.20341147115521024, "grad_norm": 0.6852348661727629, "learning_rate": 4.43487268766651e-06, "loss": 0.0287, "step": 48750 }, { "epoch": 0.20343233387020054, "grad_norm": 0.9018365312714877, "learning_rate": 4.434645271126389e-06, "loss": 0.029, "step": 48755 }, { "epoch": 0.20345319658519082, "grad_norm": 1.0052151585957696, "learning_rate": 4.434417889567865e-06, "loss": 0.0405, "step": 48760 }, { "epoch": 0.2034740593001811, "grad_norm": 0.5666355525333423, "learning_rate": 4.434190542981973e-06, "loss": 0.0355, "step": 48765 }, { "epoch": 0.20349492201517136, "grad_norm": 0.8099382111001021, "learning_rate": 4.433963231359747e-06, "loss": 0.0315, "step": 48770 }, { "epoch": 0.20351578473016163, "grad_norm": 0.8064575616426323, "learning_rate": 4.43373595469223e-06, "loss": 0.042, "step": 48775 }, { "epoch": 0.20353664744515193, "grad_norm": 1.2262177188422163, "learning_rate": 4.433508712970461e-06, "loss": 0.0337, "step": 48780 }, { "epoch": 0.2035575101601422, "grad_norm": 1.2169324906436172, "learning_rate": 4.433281506185487e-06, "loss": 0.0243, "step": 48785 }, { "epoch": 0.20357837287513247, "grad_norm": 0.8873020812709554, "learning_rate": 4.433054334328356e-06, "loss": 0.0394, "step": 48790 }, { "epoch": 0.20359923559012275, "grad_norm": 0.7060526546071336, "learning_rate": 4.43282719739012e-06, "loss": 0.0302, "step": 48795 }, { "epoch": 0.20362009830511305, "grad_norm": 1.4048762296543953, "learning_rate": 4.432600095361835e-06, "loss": 0.0507, "step": 48800 }, { "epoch": 0.20364096102010332, "grad_norm": 0.7271373126746029, "learning_rate": 4.43237302823456e-06, "loss": 0.0293, "step": 48805 }, { "epoch": 0.2036618237350936, "grad_norm": 0.6562501819729805, "learning_rate": 4.4321459959993544e-06, "loss": 0.0356, "step": 48810 }, { "epoch": 0.20368268645008386, "grad_norm": 0.9371242145780602, "learning_rate": 4.4319189986472835e-06, "loss": 0.038, "step": 48815 }, { "epoch": 0.20370354916507413, "grad_norm": 1.2356063244241038, "learning_rate": 4.4316920361694165e-06, "loss": 0.0461, "step": 48820 }, { "epoch": 0.20372441188006443, "grad_norm": 0.8574372517752072, "learning_rate": 4.431465108556824e-06, "loss": 0.0431, "step": 48825 }, { "epoch": 0.2037452745950547, "grad_norm": 1.4352086234031465, "learning_rate": 4.431238215800582e-06, "loss": 0.0386, "step": 48830 }, { "epoch": 0.20376613731004498, "grad_norm": 1.2953750105612947, "learning_rate": 4.431011357891765e-06, "loss": 0.0412, "step": 48835 }, { "epoch": 0.20378700002503525, "grad_norm": 1.1484137773426886, "learning_rate": 4.430784534821456e-06, "loss": 0.0319, "step": 48840 }, { "epoch": 0.20380786274002555, "grad_norm": 1.0668624835338203, "learning_rate": 4.430557746580738e-06, "loss": 0.0387, "step": 48845 }, { "epoch": 0.20382872545501582, "grad_norm": 1.1252665785198073, "learning_rate": 4.430330993160699e-06, "loss": 0.0406, "step": 48850 }, { "epoch": 0.2038495881700061, "grad_norm": 0.5349755118174456, "learning_rate": 4.43010427455243e-06, "loss": 0.0353, "step": 48855 }, { "epoch": 0.20387045088499636, "grad_norm": 1.1530980783853035, "learning_rate": 4.429877590747023e-06, "loss": 0.0417, "step": 48860 }, { "epoch": 0.20389131359998663, "grad_norm": 0.9351335454841235, "learning_rate": 4.429650941735575e-06, "loss": 0.0312, "step": 48865 }, { "epoch": 0.20391217631497693, "grad_norm": 0.9581401461096193, "learning_rate": 4.429424327509186e-06, "loss": 0.0466, "step": 48870 }, { "epoch": 0.2039330390299672, "grad_norm": 1.1364202165618529, "learning_rate": 4.429197748058961e-06, "loss": 0.0286, "step": 48875 }, { "epoch": 0.20395390174495748, "grad_norm": 0.7557724786197193, "learning_rate": 4.428971203376004e-06, "loss": 0.0245, "step": 48880 }, { "epoch": 0.20397476445994775, "grad_norm": 0.9381908238663564, "learning_rate": 4.428744693451426e-06, "loss": 0.0291, "step": 48885 }, { "epoch": 0.20399562717493805, "grad_norm": 1.3607819842077133, "learning_rate": 4.428518218276338e-06, "loss": 0.0347, "step": 48890 }, { "epoch": 0.20401648988992832, "grad_norm": 0.6015105098356979, "learning_rate": 4.428291777841858e-06, "loss": 0.0425, "step": 48895 }, { "epoch": 0.2040373526049186, "grad_norm": 0.6909372268540169, "learning_rate": 4.428065372139104e-06, "loss": 0.0407, "step": 48900 }, { "epoch": 0.20405821531990886, "grad_norm": 0.43327250582474147, "learning_rate": 4.427839001159199e-06, "loss": 0.0331, "step": 48905 }, { "epoch": 0.20407907803489914, "grad_norm": 0.6967294702470441, "learning_rate": 4.4276126648932665e-06, "loss": 0.0518, "step": 48910 }, { "epoch": 0.20409994074988944, "grad_norm": 0.9291559556675175, "learning_rate": 4.427386363332438e-06, "loss": 0.0367, "step": 48915 }, { "epoch": 0.2041208034648797, "grad_norm": 1.054688166777036, "learning_rate": 4.4271600964678415e-06, "loss": 0.0325, "step": 48920 }, { "epoch": 0.20414166617986998, "grad_norm": 0.9996291215844602, "learning_rate": 4.426933864290615e-06, "loss": 0.0333, "step": 48925 }, { "epoch": 0.20416252889486025, "grad_norm": 0.9847046716405352, "learning_rate": 4.426707666791897e-06, "loss": 0.023, "step": 48930 }, { "epoch": 0.20418339160985055, "grad_norm": 0.9471184999323367, "learning_rate": 4.426481503962826e-06, "loss": 0.0424, "step": 48935 }, { "epoch": 0.20420425432484082, "grad_norm": 0.5657163324412962, "learning_rate": 4.426255375794549e-06, "loss": 0.0329, "step": 48940 }, { "epoch": 0.2042251170398311, "grad_norm": 1.0005565402408518, "learning_rate": 4.426029282278211e-06, "loss": 0.0397, "step": 48945 }, { "epoch": 0.20424597975482137, "grad_norm": 0.8070184780242452, "learning_rate": 4.425803223404966e-06, "loss": 0.0317, "step": 48950 }, { "epoch": 0.20426684246981164, "grad_norm": 1.0011678947119296, "learning_rate": 4.4255771991659655e-06, "loss": 0.0228, "step": 48955 }, { "epoch": 0.20428770518480194, "grad_norm": 0.9895122962660279, "learning_rate": 4.4253512095523675e-06, "loss": 0.034, "step": 48960 }, { "epoch": 0.2043085678997922, "grad_norm": 0.7350874060153354, "learning_rate": 4.425125254555332e-06, "loss": 0.0386, "step": 48965 }, { "epoch": 0.20432943061478248, "grad_norm": 1.073621347033697, "learning_rate": 4.424899334166023e-06, "loss": 0.0394, "step": 48970 }, { "epoch": 0.20435029332977275, "grad_norm": 0.8036643283633492, "learning_rate": 4.424673448375606e-06, "loss": 0.0305, "step": 48975 }, { "epoch": 0.20437115604476305, "grad_norm": 0.7234889746876837, "learning_rate": 4.424447597175253e-06, "loss": 0.0351, "step": 48980 }, { "epoch": 0.20439201875975332, "grad_norm": 0.7681450684284905, "learning_rate": 4.424221780556134e-06, "loss": 0.0333, "step": 48985 }, { "epoch": 0.2044128814747436, "grad_norm": 0.5335929295026878, "learning_rate": 4.4239959985094255e-06, "loss": 0.025, "step": 48990 }, { "epoch": 0.20443374418973387, "grad_norm": 0.6244801204709656, "learning_rate": 4.4237702510263085e-06, "loss": 0.0381, "step": 48995 }, { "epoch": 0.20445460690472414, "grad_norm": 1.42438036162807, "learning_rate": 4.423544538097965e-06, "loss": 0.0352, "step": 49000 }, { "epoch": 0.20447546961971444, "grad_norm": 0.9194604399377007, "learning_rate": 4.42331885971558e-06, "loss": 0.0252, "step": 49005 }, { "epoch": 0.2044963323347047, "grad_norm": 0.8516036957037108, "learning_rate": 4.423093215870341e-06, "loss": 0.037, "step": 49010 }, { "epoch": 0.20451719504969498, "grad_norm": 0.6010741297328601, "learning_rate": 4.422867606553442e-06, "loss": 0.0295, "step": 49015 }, { "epoch": 0.20453805776468525, "grad_norm": 1.2678070205455398, "learning_rate": 4.422642031756076e-06, "loss": 0.0375, "step": 49020 }, { "epoch": 0.20455892047967555, "grad_norm": 0.39429126523187585, "learning_rate": 4.422416491469443e-06, "loss": 0.0312, "step": 49025 }, { "epoch": 0.20457978319466583, "grad_norm": 0.7468318423567624, "learning_rate": 4.422190985684741e-06, "loss": 0.0361, "step": 49030 }, { "epoch": 0.2046006459096561, "grad_norm": 0.9005998224959406, "learning_rate": 4.4219655143931785e-06, "loss": 0.0342, "step": 49035 }, { "epoch": 0.20462150862464637, "grad_norm": 0.9243622862287125, "learning_rate": 4.42174007758596e-06, "loss": 0.0263, "step": 49040 }, { "epoch": 0.20464237133963664, "grad_norm": 1.9078299474108125, "learning_rate": 4.421514675254297e-06, "loss": 0.0446, "step": 49045 }, { "epoch": 0.20466323405462694, "grad_norm": 0.6471152511411679, "learning_rate": 4.4212893073894045e-06, "loss": 0.037, "step": 49050 }, { "epoch": 0.2046840967696172, "grad_norm": 1.1388750233269767, "learning_rate": 4.421063973982497e-06, "loss": 0.038, "step": 49055 }, { "epoch": 0.20470495948460748, "grad_norm": 0.6364149996040965, "learning_rate": 4.420838675024796e-06, "loss": 0.0297, "step": 49060 }, { "epoch": 0.20472582219959776, "grad_norm": 1.0118204331294238, "learning_rate": 4.420613410507526e-06, "loss": 0.0292, "step": 49065 }, { "epoch": 0.20474668491458806, "grad_norm": 0.9531720326256521, "learning_rate": 4.4203881804219105e-06, "loss": 0.0427, "step": 49070 }, { "epoch": 0.20476754762957833, "grad_norm": 0.9436908641258231, "learning_rate": 4.420162984759181e-06, "loss": 0.0317, "step": 49075 }, { "epoch": 0.2047884103445686, "grad_norm": 1.1923365796627383, "learning_rate": 4.41993782351057e-06, "loss": 0.0348, "step": 49080 }, { "epoch": 0.20480927305955887, "grad_norm": 0.9278598139548034, "learning_rate": 4.41971269666731e-06, "loss": 0.0323, "step": 49085 }, { "epoch": 0.20483013577454914, "grad_norm": 1.333569312089622, "learning_rate": 4.419487604220645e-06, "loss": 0.0366, "step": 49090 }, { "epoch": 0.20485099848953944, "grad_norm": 1.056820753576171, "learning_rate": 4.419262546161813e-06, "loss": 0.0356, "step": 49095 }, { "epoch": 0.20487186120452971, "grad_norm": 1.6274236776194266, "learning_rate": 4.41903752248206e-06, "loss": 0.0345, "step": 49100 }, { "epoch": 0.20489272391952, "grad_norm": 0.36797387401833276, "learning_rate": 4.418812533172636e-06, "loss": 0.0307, "step": 49105 }, { "epoch": 0.20491358663451026, "grad_norm": 0.8005295198900099, "learning_rate": 4.41858757822479e-06, "loss": 0.0298, "step": 49110 }, { "epoch": 0.20493444934950056, "grad_norm": 1.030116813497807, "learning_rate": 4.418362657629777e-06, "loss": 0.0421, "step": 49115 }, { "epoch": 0.20495531206449083, "grad_norm": 0.6826756964263017, "learning_rate": 4.418137771378854e-06, "loss": 0.0329, "step": 49120 }, { "epoch": 0.2049761747794811, "grad_norm": 0.46015509858471476, "learning_rate": 4.4179129194632834e-06, "loss": 0.0383, "step": 49125 }, { "epoch": 0.20499703749447137, "grad_norm": 1.3151316913331421, "learning_rate": 4.417688101874329e-06, "loss": 0.0301, "step": 49130 }, { "epoch": 0.20501790020946165, "grad_norm": 0.6317229180169174, "learning_rate": 4.417463318603254e-06, "loss": 0.0297, "step": 49135 }, { "epoch": 0.20503876292445194, "grad_norm": 0.9042470938132542, "learning_rate": 4.417238569641333e-06, "loss": 0.0423, "step": 49140 }, { "epoch": 0.20505962563944222, "grad_norm": 0.802337096923629, "learning_rate": 4.417013854979837e-06, "loss": 0.0409, "step": 49145 }, { "epoch": 0.2050804883544325, "grad_norm": 0.9128570934272289, "learning_rate": 4.416789174610041e-06, "loss": 0.0412, "step": 49150 }, { "epoch": 0.20510135106942276, "grad_norm": 0.8292876640105263, "learning_rate": 4.416564528523228e-06, "loss": 0.0365, "step": 49155 }, { "epoch": 0.20512221378441306, "grad_norm": 0.809324288787246, "learning_rate": 4.416339916710676e-06, "loss": 0.0329, "step": 49160 }, { "epoch": 0.20514307649940333, "grad_norm": 0.543201846878251, "learning_rate": 4.416115339163673e-06, "loss": 0.029, "step": 49165 }, { "epoch": 0.2051639392143936, "grad_norm": 0.7364662601492977, "learning_rate": 4.415890795873508e-06, "loss": 0.0239, "step": 49170 }, { "epoch": 0.20518480192938388, "grad_norm": 1.0314400930701102, "learning_rate": 4.41566628683147e-06, "loss": 0.0352, "step": 49175 }, { "epoch": 0.20520566464437415, "grad_norm": 0.638202152669189, "learning_rate": 4.415441812028857e-06, "loss": 0.035, "step": 49180 }, { "epoch": 0.20522652735936445, "grad_norm": 0.7345407229020798, "learning_rate": 4.415217371456966e-06, "loss": 0.0279, "step": 49185 }, { "epoch": 0.20524739007435472, "grad_norm": 0.794079511027874, "learning_rate": 4.414992965107097e-06, "loss": 0.0319, "step": 49190 }, { "epoch": 0.205268252789345, "grad_norm": 0.6233198660560176, "learning_rate": 4.414768592970554e-06, "loss": 0.0342, "step": 49195 }, { "epoch": 0.20528911550433526, "grad_norm": 0.93165108480015, "learning_rate": 4.414544255038646e-06, "loss": 0.0396, "step": 49200 }, { "epoch": 0.20530997821932556, "grad_norm": 1.5737945529883313, "learning_rate": 4.414319951302682e-06, "loss": 0.0261, "step": 49205 }, { "epoch": 0.20533084093431583, "grad_norm": 0.8726666563219426, "learning_rate": 4.414095681753974e-06, "loss": 0.0406, "step": 49210 }, { "epoch": 0.2053517036493061, "grad_norm": 1.344895155467855, "learning_rate": 4.4138714463838425e-06, "loss": 0.0376, "step": 49215 }, { "epoch": 0.20537256636429638, "grad_norm": 0.9183241539153146, "learning_rate": 4.413647245183602e-06, "loss": 0.0343, "step": 49220 }, { "epoch": 0.20539342907928665, "grad_norm": 0.5106838153530163, "learning_rate": 4.41342307814458e-06, "loss": 0.0336, "step": 49225 }, { "epoch": 0.20541429179427695, "grad_norm": 0.8209580370900739, "learning_rate": 4.413198945258098e-06, "loss": 0.0341, "step": 49230 }, { "epoch": 0.20543515450926722, "grad_norm": 0.6038786945762799, "learning_rate": 4.412974846515487e-06, "loss": 0.0426, "step": 49235 }, { "epoch": 0.2054560172242575, "grad_norm": 1.449482178821028, "learning_rate": 4.41275078190808e-06, "loss": 0.0489, "step": 49240 }, { "epoch": 0.20547687993924776, "grad_norm": 1.0072235019723375, "learning_rate": 4.412526751427209e-06, "loss": 0.0437, "step": 49245 }, { "epoch": 0.20549774265423806, "grad_norm": 1.3387418266799944, "learning_rate": 4.4123027550642144e-06, "loss": 0.0353, "step": 49250 }, { "epoch": 0.20551860536922834, "grad_norm": 0.8268371297691586, "learning_rate": 4.412078792810437e-06, "loss": 0.0325, "step": 49255 }, { "epoch": 0.2055394680842186, "grad_norm": 1.61089291655736, "learning_rate": 4.411854864657219e-06, "loss": 0.0365, "step": 49260 }, { "epoch": 0.20556033079920888, "grad_norm": 0.8483753786027888, "learning_rate": 4.411630970595911e-06, "loss": 0.0426, "step": 49265 }, { "epoch": 0.20558119351419915, "grad_norm": 1.6031454531070826, "learning_rate": 4.411407110617859e-06, "loss": 0.0429, "step": 49270 }, { "epoch": 0.20560205622918945, "grad_norm": 0.935549653622862, "learning_rate": 4.411183284714422e-06, "loss": 0.0295, "step": 49275 }, { "epoch": 0.20562291894417972, "grad_norm": 0.792083748408871, "learning_rate": 4.4109594928769525e-06, "loss": 0.0254, "step": 49280 }, { "epoch": 0.20564378165917, "grad_norm": 0.4584413394810335, "learning_rate": 4.41073573509681e-06, "loss": 0.0345, "step": 49285 }, { "epoch": 0.20566464437416027, "grad_norm": 1.1148821812154135, "learning_rate": 4.410512011365359e-06, "loss": 0.0498, "step": 49290 }, { "epoch": 0.20568550708915057, "grad_norm": 0.6913487876236051, "learning_rate": 4.410288321673964e-06, "loss": 0.0204, "step": 49295 }, { "epoch": 0.20570636980414084, "grad_norm": 0.6275062766810614, "learning_rate": 4.410064666013994e-06, "loss": 0.0383, "step": 49300 }, { "epoch": 0.2057272325191311, "grad_norm": 0.8988995861691085, "learning_rate": 4.409841044376822e-06, "loss": 0.037, "step": 49305 }, { "epoch": 0.20574809523412138, "grad_norm": 1.0254773533099957, "learning_rate": 4.409617456753821e-06, "loss": 0.0314, "step": 49310 }, { "epoch": 0.20576895794911165, "grad_norm": 0.9120591244189669, "learning_rate": 4.40939390313637e-06, "loss": 0.0274, "step": 49315 }, { "epoch": 0.20578982066410195, "grad_norm": 3.4692483970958996, "learning_rate": 4.409170383515849e-06, "loss": 0.0251, "step": 49320 }, { "epoch": 0.20581068337909222, "grad_norm": 0.9228821898043749, "learning_rate": 4.408946897883643e-06, "loss": 0.0367, "step": 49325 }, { "epoch": 0.2058315460940825, "grad_norm": 0.7438026253062634, "learning_rate": 4.40872344623114e-06, "loss": 0.0414, "step": 49330 }, { "epoch": 0.20585240880907277, "grad_norm": 0.47232175364460793, "learning_rate": 4.4085000285497285e-06, "loss": 0.0346, "step": 49335 }, { "epoch": 0.20587327152406307, "grad_norm": 0.8301130359069674, "learning_rate": 4.4082766448308014e-06, "loss": 0.0544, "step": 49340 }, { "epoch": 0.20589413423905334, "grad_norm": 0.67119876103686, "learning_rate": 4.4080532950657576e-06, "loss": 0.0291, "step": 49345 }, { "epoch": 0.2059149969540436, "grad_norm": 0.629496635629997, "learning_rate": 4.407829979245994e-06, "loss": 0.0491, "step": 49350 }, { "epoch": 0.20593585966903388, "grad_norm": 0.8188769655082646, "learning_rate": 4.407606697362913e-06, "loss": 0.0345, "step": 49355 }, { "epoch": 0.20595672238402415, "grad_norm": 1.1802250410827833, "learning_rate": 4.4073834494079225e-06, "loss": 0.0272, "step": 49360 }, { "epoch": 0.20597758509901445, "grad_norm": 0.46863194253181156, "learning_rate": 4.407160235372428e-06, "loss": 0.0271, "step": 49365 }, { "epoch": 0.20599844781400473, "grad_norm": 0.857831283883178, "learning_rate": 4.406937055247843e-06, "loss": 0.0327, "step": 49370 }, { "epoch": 0.206019310528995, "grad_norm": 0.8601490994934067, "learning_rate": 4.4067139090255804e-06, "loss": 0.0406, "step": 49375 }, { "epoch": 0.20604017324398527, "grad_norm": 0.9133174042535738, "learning_rate": 4.40649079669706e-06, "loss": 0.0317, "step": 49380 }, { "epoch": 0.20606103595897557, "grad_norm": 0.587371525747779, "learning_rate": 4.406267718253701e-06, "loss": 0.0365, "step": 49385 }, { "epoch": 0.20608189867396584, "grad_norm": 0.5817920365474223, "learning_rate": 4.406044673686927e-06, "loss": 0.0288, "step": 49390 }, { "epoch": 0.2061027613889561, "grad_norm": 0.892079418431717, "learning_rate": 4.405821662988165e-06, "loss": 0.0347, "step": 49395 }, { "epoch": 0.20612362410394638, "grad_norm": 0.4583823074738502, "learning_rate": 4.405598686148845e-06, "loss": 0.0304, "step": 49400 }, { "epoch": 0.20614448681893666, "grad_norm": 0.5967958617090237, "learning_rate": 4.4053757431604e-06, "loss": 0.0249, "step": 49405 }, { "epoch": 0.20616534953392696, "grad_norm": 0.7133148511393069, "learning_rate": 4.4051528340142644e-06, "loss": 0.0496, "step": 49410 }, { "epoch": 0.20618621224891723, "grad_norm": 1.0147967746612938, "learning_rate": 4.404929958701879e-06, "loss": 0.0375, "step": 49415 }, { "epoch": 0.2062070749639075, "grad_norm": 0.5731391738341247, "learning_rate": 4.4047071172146846e-06, "loss": 0.0297, "step": 49420 }, { "epoch": 0.20622793767889777, "grad_norm": 0.7626597028011878, "learning_rate": 4.404484309544127e-06, "loss": 0.0467, "step": 49425 }, { "epoch": 0.20624880039388807, "grad_norm": 0.5416688167965423, "learning_rate": 4.404261535681652e-06, "loss": 0.0329, "step": 49430 }, { "epoch": 0.20626966310887834, "grad_norm": 1.0296334724373677, "learning_rate": 4.4040387956187134e-06, "loss": 0.0325, "step": 49435 }, { "epoch": 0.20629052582386861, "grad_norm": 1.778416867403107, "learning_rate": 4.403816089346763e-06, "loss": 0.0485, "step": 49440 }, { "epoch": 0.2063113885388589, "grad_norm": 1.6353308400550868, "learning_rate": 4.4035934168572595e-06, "loss": 0.0283, "step": 49445 }, { "epoch": 0.20633225125384916, "grad_norm": 0.9315172745390153, "learning_rate": 4.403370778141663e-06, "loss": 0.0335, "step": 49450 }, { "epoch": 0.20635311396883946, "grad_norm": 1.9588364464034353, "learning_rate": 4.403148173191434e-06, "loss": 0.0322, "step": 49455 }, { "epoch": 0.20637397668382973, "grad_norm": 1.1718399498699794, "learning_rate": 4.402925601998043e-06, "loss": 0.0467, "step": 49460 }, { "epoch": 0.20639483939882, "grad_norm": 0.6839557808082787, "learning_rate": 4.402703064552954e-06, "loss": 0.0323, "step": 49465 }, { "epoch": 0.20641570211381027, "grad_norm": 0.7885483496493538, "learning_rate": 4.402480560847642e-06, "loss": 0.0367, "step": 49470 }, { "epoch": 0.20643656482880057, "grad_norm": 0.9378113963399946, "learning_rate": 4.402258090873583e-06, "loss": 0.0355, "step": 49475 }, { "epoch": 0.20645742754379084, "grad_norm": 0.7491098086796735, "learning_rate": 4.4020356546222544e-06, "loss": 0.0369, "step": 49480 }, { "epoch": 0.20647829025878112, "grad_norm": 0.6320602008095422, "learning_rate": 4.401813252085136e-06, "loss": 0.0292, "step": 49485 }, { "epoch": 0.2064991529737714, "grad_norm": 1.164161330686766, "learning_rate": 4.401590883253714e-06, "loss": 0.0422, "step": 49490 }, { "epoch": 0.20652001568876166, "grad_norm": 1.5311646133151626, "learning_rate": 4.401368548119473e-06, "loss": 0.0394, "step": 49495 }, { "epoch": 0.20654087840375196, "grad_norm": 1.1339848555746095, "learning_rate": 4.4011462466739065e-06, "loss": 0.0321, "step": 49500 }, { "epoch": 0.20656174111874223, "grad_norm": 1.5750198472676344, "learning_rate": 4.400923978908505e-06, "loss": 0.0289, "step": 49505 }, { "epoch": 0.2065826038337325, "grad_norm": 1.5263103101259374, "learning_rate": 4.400701744814767e-06, "loss": 0.036, "step": 49510 }, { "epoch": 0.20660346654872278, "grad_norm": 0.9955988071235471, "learning_rate": 4.40047954438419e-06, "loss": 0.0382, "step": 49515 }, { "epoch": 0.20662432926371307, "grad_norm": 1.0570570029021835, "learning_rate": 4.400257377608277e-06, "loss": 0.0286, "step": 49520 }, { "epoch": 0.20664519197870335, "grad_norm": 0.9186703510566931, "learning_rate": 4.4000352444785336e-06, "loss": 0.0278, "step": 49525 }, { "epoch": 0.20666605469369362, "grad_norm": 0.8115198840174344, "learning_rate": 4.399813144986466e-06, "loss": 0.0395, "step": 49530 }, { "epoch": 0.2066869174086839, "grad_norm": 0.6597839087378982, "learning_rate": 4.399591079123588e-06, "loss": 0.0285, "step": 49535 }, { "epoch": 0.20670778012367416, "grad_norm": 0.6856064956133299, "learning_rate": 4.3993690468814135e-06, "loss": 0.0368, "step": 49540 }, { "epoch": 0.20672864283866446, "grad_norm": 1.0474501358319057, "learning_rate": 4.399147048251457e-06, "loss": 0.0344, "step": 49545 }, { "epoch": 0.20674950555365473, "grad_norm": 0.8343719162278737, "learning_rate": 4.398925083225243e-06, "loss": 0.0295, "step": 49550 }, { "epoch": 0.206770368268645, "grad_norm": 1.1701407077714638, "learning_rate": 4.3987031517942925e-06, "loss": 0.0391, "step": 49555 }, { "epoch": 0.20679123098363528, "grad_norm": 0.756625565180181, "learning_rate": 4.39848125395013e-06, "loss": 0.0276, "step": 49560 }, { "epoch": 0.20681209369862558, "grad_norm": 1.5927136936358242, "learning_rate": 4.398259389684288e-06, "loss": 0.04, "step": 49565 }, { "epoch": 0.20683295641361585, "grad_norm": 1.7070283102100605, "learning_rate": 4.398037558988299e-06, "loss": 0.0419, "step": 49570 }, { "epoch": 0.20685381912860612, "grad_norm": 0.811100606813053, "learning_rate": 4.3978157618536935e-06, "loss": 0.029, "step": 49575 }, { "epoch": 0.2068746818435964, "grad_norm": 0.6099364328019945, "learning_rate": 4.397593998272013e-06, "loss": 0.0339, "step": 49580 }, { "epoch": 0.20689554455858666, "grad_norm": 1.0632209282061547, "learning_rate": 4.3973722682347995e-06, "loss": 0.0398, "step": 49585 }, { "epoch": 0.20691640727357696, "grad_norm": 0.7353705270592578, "learning_rate": 4.397150571733596e-06, "loss": 0.0405, "step": 49590 }, { "epoch": 0.20693726998856724, "grad_norm": 0.6157723003320007, "learning_rate": 4.396928908759949e-06, "loss": 0.027, "step": 49595 }, { "epoch": 0.2069581327035575, "grad_norm": 0.4596304455363805, "learning_rate": 4.39670727930541e-06, "loss": 0.0331, "step": 49600 }, { "epoch": 0.20697899541854778, "grad_norm": 1.1117053284290777, "learning_rate": 4.396485683361531e-06, "loss": 0.0344, "step": 49605 }, { "epoch": 0.20699985813353808, "grad_norm": 1.0572362793971415, "learning_rate": 4.3962641209198685e-06, "loss": 0.0389, "step": 49610 }, { "epoch": 0.20702072084852835, "grad_norm": 0.6325718297131884, "learning_rate": 4.396042591971981e-06, "loss": 0.0302, "step": 49615 }, { "epoch": 0.20704158356351862, "grad_norm": 0.6141945898461036, "learning_rate": 4.395821096509432e-06, "loss": 0.0362, "step": 49620 }, { "epoch": 0.2070624462785089, "grad_norm": 0.4689254643823809, "learning_rate": 4.395599634523785e-06, "loss": 0.0354, "step": 49625 }, { "epoch": 0.20708330899349917, "grad_norm": 0.9299452274961704, "learning_rate": 4.395378206006609e-06, "loss": 0.0321, "step": 49630 }, { "epoch": 0.20710417170848947, "grad_norm": 1.3899150155841993, "learning_rate": 4.395156810949475e-06, "loss": 0.0386, "step": 49635 }, { "epoch": 0.20712503442347974, "grad_norm": 0.9102550383428123, "learning_rate": 4.3949354493439566e-06, "loss": 0.0409, "step": 49640 }, { "epoch": 0.20714589713847, "grad_norm": 0.35054349603704005, "learning_rate": 4.3947141211816305e-06, "loss": 0.029, "step": 49645 }, { "epoch": 0.20716675985346028, "grad_norm": 0.9509935889421253, "learning_rate": 4.394492826454076e-06, "loss": 0.037, "step": 49650 }, { "epoch": 0.20718762256845058, "grad_norm": 0.7606336530077883, "learning_rate": 4.394271565152877e-06, "loss": 0.0342, "step": 49655 }, { "epoch": 0.20720848528344085, "grad_norm": 0.8979677976224112, "learning_rate": 4.394050337269619e-06, "loss": 0.0415, "step": 49660 }, { "epoch": 0.20722934799843112, "grad_norm": 0.48617091780218963, "learning_rate": 4.393829142795892e-06, "loss": 0.0302, "step": 49665 }, { "epoch": 0.2072502107134214, "grad_norm": 1.1907054207607801, "learning_rate": 4.393607981723285e-06, "loss": 0.046, "step": 49670 }, { "epoch": 0.20727107342841167, "grad_norm": 1.1227472182021614, "learning_rate": 4.3933868540433945e-06, "loss": 0.0456, "step": 49675 }, { "epoch": 0.20729193614340197, "grad_norm": 0.9858523880890595, "learning_rate": 4.393165759747818e-06, "loss": 0.0364, "step": 49680 }, { "epoch": 0.20731279885839224, "grad_norm": 1.3834129326052071, "learning_rate": 4.392944698828157e-06, "loss": 0.0397, "step": 49685 }, { "epoch": 0.2073336615733825, "grad_norm": 0.5887333919668027, "learning_rate": 4.392723671276014e-06, "loss": 0.0343, "step": 49690 }, { "epoch": 0.20735452428837278, "grad_norm": 0.9485874317074332, "learning_rate": 4.392502677082994e-06, "loss": 0.0377, "step": 49695 }, { "epoch": 0.20737538700336308, "grad_norm": 0.8326394397948776, "learning_rate": 4.3922817162407094e-06, "loss": 0.0359, "step": 49700 }, { "epoch": 0.20739624971835335, "grad_norm": 1.070360794582284, "learning_rate": 4.392060788740772e-06, "loss": 0.0383, "step": 49705 }, { "epoch": 0.20741711243334363, "grad_norm": 0.8193632218546953, "learning_rate": 4.391839894574796e-06, "loss": 0.034, "step": 49710 }, { "epoch": 0.2074379751483339, "grad_norm": 0.6382090038770318, "learning_rate": 4.3916190337344e-06, "loss": 0.0357, "step": 49715 }, { "epoch": 0.20745883786332417, "grad_norm": 0.6980786837473693, "learning_rate": 4.3913982062112065e-06, "loss": 0.026, "step": 49720 }, { "epoch": 0.20747970057831447, "grad_norm": 0.5303498482818324, "learning_rate": 4.391177411996838e-06, "loss": 0.0297, "step": 49725 }, { "epoch": 0.20750056329330474, "grad_norm": 0.7615236307548022, "learning_rate": 4.390956651082923e-06, "loss": 0.0345, "step": 49730 }, { "epoch": 0.207521426008295, "grad_norm": 1.3493523803618195, "learning_rate": 4.390735923461092e-06, "loss": 0.0325, "step": 49735 }, { "epoch": 0.20754228872328528, "grad_norm": 0.70353260099946, "learning_rate": 4.390515229122976e-06, "loss": 0.033, "step": 49740 }, { "epoch": 0.20756315143827558, "grad_norm": 1.426472438731763, "learning_rate": 4.390294568060214e-06, "loss": 0.0521, "step": 49745 }, { "epoch": 0.20758401415326586, "grad_norm": 1.0109041167257449, "learning_rate": 4.3900739402644424e-06, "loss": 0.033, "step": 49750 }, { "epoch": 0.20760487686825613, "grad_norm": 0.8210813622050174, "learning_rate": 4.389853345727305e-06, "loss": 0.0341, "step": 49755 }, { "epoch": 0.2076257395832464, "grad_norm": 0.9857857137278895, "learning_rate": 4.389632784440444e-06, "loss": 0.0405, "step": 49760 }, { "epoch": 0.20764660229823667, "grad_norm": 1.0081752700449682, "learning_rate": 4.389412256395511e-06, "loss": 0.0306, "step": 49765 }, { "epoch": 0.20766746501322697, "grad_norm": 1.4050092799323461, "learning_rate": 4.389191761584155e-06, "loss": 0.0323, "step": 49770 }, { "epoch": 0.20768832772821724, "grad_norm": 0.8762268246852052, "learning_rate": 4.388971299998028e-06, "loss": 0.0248, "step": 49775 }, { "epoch": 0.20770919044320751, "grad_norm": 1.6581772426782087, "learning_rate": 4.388750871628788e-06, "loss": 0.0345, "step": 49780 }, { "epoch": 0.20773005315819779, "grad_norm": 1.214102953445634, "learning_rate": 4.388530476468095e-06, "loss": 0.0292, "step": 49785 }, { "epoch": 0.20775091587318809, "grad_norm": 1.1159776862830708, "learning_rate": 4.388310114507612e-06, "loss": 0.041, "step": 49790 }, { "epoch": 0.20777177858817836, "grad_norm": 0.8276337110263724, "learning_rate": 4.388089785739002e-06, "loss": 0.0289, "step": 49795 }, { "epoch": 0.20779264130316863, "grad_norm": 0.7216669020621754, "learning_rate": 4.387869490153935e-06, "loss": 0.0324, "step": 49800 }, { "epoch": 0.2078135040181589, "grad_norm": 1.0124093103295293, "learning_rate": 4.387649227744082e-06, "loss": 0.0291, "step": 49805 }, { "epoch": 0.20783436673314917, "grad_norm": 0.7493844051552907, "learning_rate": 4.3874289985011175e-06, "loss": 0.039, "step": 49810 }, { "epoch": 0.20785522944813947, "grad_norm": 1.1297117158003989, "learning_rate": 4.387208802416718e-06, "loss": 0.0318, "step": 49815 }, { "epoch": 0.20787609216312974, "grad_norm": 0.7474105936525279, "learning_rate": 4.3869886394825645e-06, "loss": 0.0334, "step": 49820 }, { "epoch": 0.20789695487812002, "grad_norm": 0.8099446648803716, "learning_rate": 4.386768509690339e-06, "loss": 0.0436, "step": 49825 }, { "epoch": 0.2079178175931103, "grad_norm": 0.5896769485527488, "learning_rate": 4.386548413031727e-06, "loss": 0.038, "step": 49830 }, { "epoch": 0.2079386803081006, "grad_norm": 0.5284892403826034, "learning_rate": 4.386328349498418e-06, "loss": 0.0359, "step": 49835 }, { "epoch": 0.20795954302309086, "grad_norm": 0.819415082866106, "learning_rate": 4.386108319082105e-06, "loss": 0.0377, "step": 49840 }, { "epoch": 0.20798040573808113, "grad_norm": 0.9346665442392432, "learning_rate": 4.3858883217744804e-06, "loss": 0.0351, "step": 49845 }, { "epoch": 0.2080012684530714, "grad_norm": 0.8380303360510811, "learning_rate": 4.385668357567242e-06, "loss": 0.034, "step": 49850 }, { "epoch": 0.20802213116806167, "grad_norm": 0.6357452753514932, "learning_rate": 4.3854484264520925e-06, "loss": 0.0359, "step": 49855 }, { "epoch": 0.20804299388305197, "grad_norm": 0.7906648212013121, "learning_rate": 4.385228528420732e-06, "loss": 0.0377, "step": 49860 }, { "epoch": 0.20806385659804225, "grad_norm": 0.7805503568942448, "learning_rate": 4.38500866346487e-06, "loss": 0.0296, "step": 49865 }, { "epoch": 0.20808471931303252, "grad_norm": 0.8997083360517646, "learning_rate": 4.384788831576213e-06, "loss": 0.0358, "step": 49870 }, { "epoch": 0.2081055820280228, "grad_norm": 1.3077058853312127, "learning_rate": 4.3845690327464755e-06, "loss": 0.0557, "step": 49875 }, { "epoch": 0.2081264447430131, "grad_norm": 0.9967075591460589, "learning_rate": 4.3843492669673695e-06, "loss": 0.0382, "step": 49880 }, { "epoch": 0.20814730745800336, "grad_norm": 1.0376334182948233, "learning_rate": 4.3841295342306164e-06, "loss": 0.0385, "step": 49885 }, { "epoch": 0.20816817017299363, "grad_norm": 1.2864360089368712, "learning_rate": 4.383909834527935e-06, "loss": 0.0549, "step": 49890 }, { "epoch": 0.2081890328879839, "grad_norm": 0.5023669568178926, "learning_rate": 4.383690167851049e-06, "loss": 0.0182, "step": 49895 }, { "epoch": 0.20820989560297418, "grad_norm": 0.928190208017293, "learning_rate": 4.383470534191685e-06, "loss": 0.0293, "step": 49900 }, { "epoch": 0.20823075831796448, "grad_norm": 0.6458693226444293, "learning_rate": 4.383250933541573e-06, "loss": 0.0241, "step": 49905 }, { "epoch": 0.20825162103295475, "grad_norm": 1.098255956209014, "learning_rate": 4.383031365892447e-06, "loss": 0.0351, "step": 49910 }, { "epoch": 0.20827248374794502, "grad_norm": 1.4440706318096515, "learning_rate": 4.382811831236039e-06, "loss": 0.0425, "step": 49915 }, { "epoch": 0.2082933464629353, "grad_norm": 1.0098623788058436, "learning_rate": 4.38259232956409e-06, "loss": 0.0422, "step": 49920 }, { "epoch": 0.2083142091779256, "grad_norm": 1.0480729755809464, "learning_rate": 4.38237286086834e-06, "loss": 0.0304, "step": 49925 }, { "epoch": 0.20833507189291586, "grad_norm": 0.949805013068102, "learning_rate": 4.382153425140532e-06, "loss": 0.0327, "step": 49930 }, { "epoch": 0.20835593460790613, "grad_norm": 0.7971518654515198, "learning_rate": 4.3819340223724144e-06, "loss": 0.0342, "step": 49935 }, { "epoch": 0.2083767973228964, "grad_norm": 0.3969015434020748, "learning_rate": 4.381714652555737e-06, "loss": 0.0333, "step": 49940 }, { "epoch": 0.20839766003788668, "grad_norm": 0.8922154423821337, "learning_rate": 4.381495315682251e-06, "loss": 0.0276, "step": 49945 }, { "epoch": 0.20841852275287698, "grad_norm": 0.9340369955005339, "learning_rate": 4.381276011743715e-06, "loss": 0.0334, "step": 49950 }, { "epoch": 0.20843938546786725, "grad_norm": 0.5645715526890139, "learning_rate": 4.3810567407318835e-06, "loss": 0.0325, "step": 49955 }, { "epoch": 0.20846024818285752, "grad_norm": 0.6816359065488466, "learning_rate": 4.380837502638522e-06, "loss": 0.0361, "step": 49960 }, { "epoch": 0.2084811108978478, "grad_norm": 0.8787401793634717, "learning_rate": 4.38061829745539e-06, "loss": 0.0336, "step": 49965 }, { "epoch": 0.2085019736128381, "grad_norm": 1.006512048288993, "learning_rate": 4.38039912517426e-06, "loss": 0.0418, "step": 49970 }, { "epoch": 0.20852283632782836, "grad_norm": 0.7534831164467131, "learning_rate": 4.3801799857868976e-06, "loss": 0.0278, "step": 49975 }, { "epoch": 0.20854369904281864, "grad_norm": 0.9487685227659295, "learning_rate": 4.379960879285078e-06, "loss": 0.0331, "step": 49980 }, { "epoch": 0.2085645617578089, "grad_norm": 0.8360776037918994, "learning_rate": 4.379741805660576e-06, "loss": 0.0413, "step": 49985 }, { "epoch": 0.20858542447279918, "grad_norm": 1.0726458097999307, "learning_rate": 4.379522764905172e-06, "loss": 0.0336, "step": 49990 }, { "epoch": 0.20860628718778948, "grad_norm": 0.5804991401242715, "learning_rate": 4.3793037570106435e-06, "loss": 0.0368, "step": 49995 }, { "epoch": 0.20862714990277975, "grad_norm": 0.9449745938804545, "learning_rate": 4.37908478196878e-06, "loss": 0.0362, "step": 50000 }, { "epoch": 0.20864801261777002, "grad_norm": 0.4082351305521886, "learning_rate": 4.378865839771366e-06, "loss": 0.033, "step": 50005 }, { "epoch": 0.2086688753327603, "grad_norm": 0.8302740883831643, "learning_rate": 4.378646930410191e-06, "loss": 0.0255, "step": 50010 }, { "epoch": 0.2086897380477506, "grad_norm": 0.7726442358757133, "learning_rate": 4.3784280538770496e-06, "loss": 0.0379, "step": 50015 }, { "epoch": 0.20871060076274087, "grad_norm": 0.495231704606475, "learning_rate": 4.378209210163738e-06, "loss": 0.0273, "step": 50020 }, { "epoch": 0.20873146347773114, "grad_norm": 0.9294067533935256, "learning_rate": 4.377990399262052e-06, "loss": 0.032, "step": 50025 }, { "epoch": 0.2087523261927214, "grad_norm": 1.1271209348289315, "learning_rate": 4.377771621163797e-06, "loss": 0.0318, "step": 50030 }, { "epoch": 0.20877318890771168, "grad_norm": 0.48648754763074503, "learning_rate": 4.377552875860776e-06, "loss": 0.0325, "step": 50035 }, { "epoch": 0.20879405162270198, "grad_norm": 0.5951228753086948, "learning_rate": 4.377334163344796e-06, "loss": 0.0281, "step": 50040 }, { "epoch": 0.20881491433769225, "grad_norm": 0.8286401491675838, "learning_rate": 4.377115483607668e-06, "loss": 0.0302, "step": 50045 }, { "epoch": 0.20883577705268253, "grad_norm": 0.6577166106517981, "learning_rate": 4.3768968366412034e-06, "loss": 0.0307, "step": 50050 }, { "epoch": 0.2088566397676728, "grad_norm": 0.9474423022080297, "learning_rate": 4.37667822243722e-06, "loss": 0.0275, "step": 50055 }, { "epoch": 0.2088775024826631, "grad_norm": 0.8045365751151471, "learning_rate": 4.376459640987536e-06, "loss": 0.0235, "step": 50060 }, { "epoch": 0.20889836519765337, "grad_norm": 0.5170490353533735, "learning_rate": 4.376241092283973e-06, "loss": 0.0323, "step": 50065 }, { "epoch": 0.20891922791264364, "grad_norm": 0.9099962828148817, "learning_rate": 4.3760225763183555e-06, "loss": 0.0324, "step": 50070 }, { "epoch": 0.2089400906276339, "grad_norm": 0.9560295533459543, "learning_rate": 4.375804093082512e-06, "loss": 0.0329, "step": 50075 }, { "epoch": 0.20896095334262418, "grad_norm": 0.8138675958540302, "learning_rate": 4.375585642568271e-06, "loss": 0.0525, "step": 50080 }, { "epoch": 0.20898181605761448, "grad_norm": 0.7363916366181823, "learning_rate": 4.375367224767467e-06, "loss": 0.0266, "step": 50085 }, { "epoch": 0.20900267877260476, "grad_norm": 0.8932141349628767, "learning_rate": 4.375148839671935e-06, "loss": 0.0267, "step": 50090 }, { "epoch": 0.20902354148759503, "grad_norm": 0.8505722940631256, "learning_rate": 4.374930487273514e-06, "loss": 0.0444, "step": 50095 }, { "epoch": 0.2090444042025853, "grad_norm": 1.4051756714182022, "learning_rate": 4.3747121675640464e-06, "loss": 0.0393, "step": 50100 }, { "epoch": 0.2090652669175756, "grad_norm": 1.007762778248197, "learning_rate": 4.374493880535376e-06, "loss": 0.034, "step": 50105 }, { "epoch": 0.20908612963256587, "grad_norm": 1.2132437129588767, "learning_rate": 4.37427562617935e-06, "loss": 0.0372, "step": 50110 }, { "epoch": 0.20910699234755614, "grad_norm": 0.7539425261795845, "learning_rate": 4.37405740448782e-06, "loss": 0.0342, "step": 50115 }, { "epoch": 0.20912785506254641, "grad_norm": 0.577666890117115, "learning_rate": 4.3738392154526375e-06, "loss": 0.0345, "step": 50120 }, { "epoch": 0.20914871777753669, "grad_norm": 1.3210542768742257, "learning_rate": 4.373621059065659e-06, "loss": 0.0304, "step": 50125 }, { "epoch": 0.20916958049252699, "grad_norm": 0.6492592190248531, "learning_rate": 4.373402935318742e-06, "loss": 0.0247, "step": 50130 }, { "epoch": 0.20919044320751726, "grad_norm": 0.4417263983761755, "learning_rate": 4.373184844203752e-06, "loss": 0.0264, "step": 50135 }, { "epoch": 0.20921130592250753, "grad_norm": 0.7631964215935093, "learning_rate": 4.372966785712548e-06, "loss": 0.0246, "step": 50140 }, { "epoch": 0.2092321686374978, "grad_norm": 0.9366539771538318, "learning_rate": 4.3727487598370016e-06, "loss": 0.0278, "step": 50145 }, { "epoch": 0.2092530313524881, "grad_norm": 1.408263473825457, "learning_rate": 4.37253076656898e-06, "loss": 0.05, "step": 50150 }, { "epoch": 0.20927389406747837, "grad_norm": 1.180633412610809, "learning_rate": 4.37231280590036e-06, "loss": 0.0298, "step": 50155 }, { "epoch": 0.20929475678246864, "grad_norm": 0.8046535358482437, "learning_rate": 4.372094877823012e-06, "loss": 0.0327, "step": 50160 }, { "epoch": 0.20931561949745892, "grad_norm": 0.9856182225654445, "learning_rate": 4.371876982328819e-06, "loss": 0.0234, "step": 50165 }, { "epoch": 0.2093364822124492, "grad_norm": 0.797929607130647, "learning_rate": 4.371659119409661e-06, "loss": 0.0301, "step": 50170 }, { "epoch": 0.2093573449274395, "grad_norm": 1.3179163510086795, "learning_rate": 4.3714412890574216e-06, "loss": 0.0415, "step": 50175 }, { "epoch": 0.20937820764242976, "grad_norm": 0.7009277221607053, "learning_rate": 4.371223491263988e-06, "loss": 0.0315, "step": 50180 }, { "epoch": 0.20939907035742003, "grad_norm": 0.8737582155177721, "learning_rate": 4.371005726021252e-06, "loss": 0.0481, "step": 50185 }, { "epoch": 0.2094199330724103, "grad_norm": 0.7493270923190564, "learning_rate": 4.370787993321104e-06, "loss": 0.0308, "step": 50190 }, { "epoch": 0.2094407957874006, "grad_norm": 0.8263548050688106, "learning_rate": 4.370570293155441e-06, "loss": 0.0313, "step": 50195 }, { "epoch": 0.20946165850239087, "grad_norm": 0.6346577916883567, "learning_rate": 4.370352625516161e-06, "loss": 0.0433, "step": 50200 }, { "epoch": 0.20948252121738115, "grad_norm": 1.0088147154653422, "learning_rate": 4.370134990395166e-06, "loss": 0.0407, "step": 50205 }, { "epoch": 0.20950338393237142, "grad_norm": 0.7506067462027401, "learning_rate": 4.369917387784358e-06, "loss": 0.0327, "step": 50210 }, { "epoch": 0.2095242466473617, "grad_norm": 5.114413569554755, "learning_rate": 4.369699817675646e-06, "loss": 0.0485, "step": 50215 }, { "epoch": 0.209545109362352, "grad_norm": 0.7175342150536375, "learning_rate": 4.3694822800609384e-06, "loss": 0.0277, "step": 50220 }, { "epoch": 0.20956597207734226, "grad_norm": 0.727182225408167, "learning_rate": 4.36926477493215e-06, "loss": 0.0371, "step": 50225 }, { "epoch": 0.20958683479233253, "grad_norm": 0.6036788655452183, "learning_rate": 4.369047302281194e-06, "loss": 0.0334, "step": 50230 }, { "epoch": 0.2096076975073228, "grad_norm": 0.7680538909668037, "learning_rate": 4.368829862099988e-06, "loss": 0.0335, "step": 50235 }, { "epoch": 0.2096285602223131, "grad_norm": 0.6736026926769934, "learning_rate": 4.368612454380454e-06, "loss": 0.0286, "step": 50240 }, { "epoch": 0.20964942293730338, "grad_norm": 0.6181241778590456, "learning_rate": 4.368395079114515e-06, "loss": 0.0337, "step": 50245 }, { "epoch": 0.20967028565229365, "grad_norm": 0.9165908240725713, "learning_rate": 4.3681777362941e-06, "loss": 0.0274, "step": 50250 }, { "epoch": 0.20969114836728392, "grad_norm": 1.4714656585806432, "learning_rate": 4.367960425911136e-06, "loss": 0.0395, "step": 50255 }, { "epoch": 0.2097120110822742, "grad_norm": 0.9430318025522566, "learning_rate": 4.367743147957556e-06, "loss": 0.0283, "step": 50260 }, { "epoch": 0.2097328737972645, "grad_norm": 0.9186790253115003, "learning_rate": 4.367525902425295e-06, "loss": 0.0334, "step": 50265 }, { "epoch": 0.20975373651225476, "grad_norm": 1.0562236924134345, "learning_rate": 4.36730868930629e-06, "loss": 0.045, "step": 50270 }, { "epoch": 0.20977459922724503, "grad_norm": 0.5431129749573202, "learning_rate": 4.367091508592484e-06, "loss": 0.0317, "step": 50275 }, { "epoch": 0.2097954619422353, "grad_norm": 0.4800316242634882, "learning_rate": 4.366874360275819e-06, "loss": 0.0277, "step": 50280 }, { "epoch": 0.2098163246572256, "grad_norm": 0.637338436875672, "learning_rate": 4.36665724434824e-06, "loss": 0.0346, "step": 50285 }, { "epoch": 0.20983718737221588, "grad_norm": 1.0270896060124457, "learning_rate": 4.366440160801698e-06, "loss": 0.0379, "step": 50290 }, { "epoch": 0.20985805008720615, "grad_norm": 0.7469269627550587, "learning_rate": 4.366223109628145e-06, "loss": 0.0389, "step": 50295 }, { "epoch": 0.20987891280219642, "grad_norm": 1.209435343081328, "learning_rate": 4.366006090819533e-06, "loss": 0.0369, "step": 50300 }, { "epoch": 0.2098997755171867, "grad_norm": 0.9625326061882314, "learning_rate": 4.365789104367822e-06, "loss": 0.0331, "step": 50305 }, { "epoch": 0.209920638232177, "grad_norm": 1.5155620664251417, "learning_rate": 4.365572150264973e-06, "loss": 0.049, "step": 50310 }, { "epoch": 0.20994150094716726, "grad_norm": 0.9416921960032502, "learning_rate": 4.365355228502946e-06, "loss": 0.0342, "step": 50315 }, { "epoch": 0.20996236366215754, "grad_norm": 0.5742118937452898, "learning_rate": 4.36513833907371e-06, "loss": 0.0366, "step": 50320 }, { "epoch": 0.2099832263771478, "grad_norm": 0.43848560491266886, "learning_rate": 4.36492148196923e-06, "loss": 0.0391, "step": 50325 }, { "epoch": 0.2100040890921381, "grad_norm": 0.7186393064674619, "learning_rate": 4.364704657181481e-06, "loss": 0.0218, "step": 50330 }, { "epoch": 0.21002495180712838, "grad_norm": 1.1649164203913875, "learning_rate": 4.364487864702435e-06, "loss": 0.0347, "step": 50335 }, { "epoch": 0.21004581452211865, "grad_norm": 1.5797987863025054, "learning_rate": 4.36427110452407e-06, "loss": 0.0322, "step": 50340 }, { "epoch": 0.21006667723710892, "grad_norm": 0.9196671264204741, "learning_rate": 4.364054376638366e-06, "loss": 0.0491, "step": 50345 }, { "epoch": 0.2100875399520992, "grad_norm": 1.9566936818670082, "learning_rate": 4.363837681037305e-06, "loss": 0.0384, "step": 50350 }, { "epoch": 0.2101084026670895, "grad_norm": 3.277341972654824, "learning_rate": 4.363621017712873e-06, "loss": 0.0419, "step": 50355 }, { "epoch": 0.21012926538207977, "grad_norm": 0.8488573367663239, "learning_rate": 4.363404386657057e-06, "loss": 0.0281, "step": 50360 }, { "epoch": 0.21015012809707004, "grad_norm": 1.2606619526891412, "learning_rate": 4.363187787861849e-06, "loss": 0.035, "step": 50365 }, { "epoch": 0.2101709908120603, "grad_norm": 0.9395256837008706, "learning_rate": 4.3629712213192426e-06, "loss": 0.039, "step": 50370 }, { "epoch": 0.2101918535270506, "grad_norm": 0.7372717665036246, "learning_rate": 4.362754687021234e-06, "loss": 0.0362, "step": 50375 }, { "epoch": 0.21021271624204088, "grad_norm": 0.9016282443169328, "learning_rate": 4.362538184959823e-06, "loss": 0.0317, "step": 50380 }, { "epoch": 0.21023357895703115, "grad_norm": 0.7062123326647327, "learning_rate": 4.362321715127011e-06, "loss": 0.028, "step": 50385 }, { "epoch": 0.21025444167202142, "grad_norm": 0.6615091243967226, "learning_rate": 4.362105277514802e-06, "loss": 0.0308, "step": 50390 }, { "epoch": 0.2102753043870117, "grad_norm": 1.039540391715396, "learning_rate": 4.361888872115206e-06, "loss": 0.0379, "step": 50395 }, { "epoch": 0.210296167102002, "grad_norm": 1.4521991828270822, "learning_rate": 4.361672498920232e-06, "loss": 0.0512, "step": 50400 }, { "epoch": 0.21031702981699227, "grad_norm": 1.1400921728131908, "learning_rate": 4.361456157921894e-06, "loss": 0.039, "step": 50405 }, { "epoch": 0.21033789253198254, "grad_norm": 0.8961376793616986, "learning_rate": 4.361239849112206e-06, "loss": 0.0349, "step": 50410 }, { "epoch": 0.2103587552469728, "grad_norm": 1.1678597018911165, "learning_rate": 4.36102357248319e-06, "loss": 0.0407, "step": 50415 }, { "epoch": 0.2103796179619631, "grad_norm": 0.6096369549124869, "learning_rate": 4.360807328026865e-06, "loss": 0.0376, "step": 50420 }, { "epoch": 0.21040048067695338, "grad_norm": 0.41962228312265903, "learning_rate": 4.360591115735254e-06, "loss": 0.0424, "step": 50425 }, { "epoch": 0.21042134339194365, "grad_norm": 1.0675889889438566, "learning_rate": 4.360374935600387e-06, "loss": 0.0307, "step": 50430 }, { "epoch": 0.21044220610693393, "grad_norm": 1.0512832249859967, "learning_rate": 4.360158787614293e-06, "loss": 0.0331, "step": 50435 }, { "epoch": 0.2104630688219242, "grad_norm": 1.8514218854373514, "learning_rate": 4.359942671769004e-06, "loss": 0.0276, "step": 50440 }, { "epoch": 0.2104839315369145, "grad_norm": 0.9441905800890995, "learning_rate": 4.359726588056555e-06, "loss": 0.0344, "step": 50445 }, { "epoch": 0.21050479425190477, "grad_norm": 1.179365584902909, "learning_rate": 4.359510536468984e-06, "loss": 0.0414, "step": 50450 }, { "epoch": 0.21052565696689504, "grad_norm": 1.1486407041690414, "learning_rate": 4.359294516998335e-06, "loss": 0.0442, "step": 50455 }, { "epoch": 0.2105465196818853, "grad_norm": 1.6777675620085846, "learning_rate": 4.359078529636648e-06, "loss": 0.0423, "step": 50460 }, { "epoch": 0.2105673823968756, "grad_norm": 0.5299445595067673, "learning_rate": 4.358862574375969e-06, "loss": 0.0341, "step": 50465 }, { "epoch": 0.21058824511186588, "grad_norm": 0.8208902101192547, "learning_rate": 4.35864665120835e-06, "loss": 0.0391, "step": 50470 }, { "epoch": 0.21060910782685616, "grad_norm": 0.9059466921767418, "learning_rate": 4.3584307601258406e-06, "loss": 0.0328, "step": 50475 }, { "epoch": 0.21062997054184643, "grad_norm": 1.1928141787395743, "learning_rate": 4.358214901120497e-06, "loss": 0.0426, "step": 50480 }, { "epoch": 0.2106508332568367, "grad_norm": 0.6891244823894759, "learning_rate": 4.357999074184376e-06, "loss": 0.0336, "step": 50485 }, { "epoch": 0.210671695971827, "grad_norm": 0.9932927308165722, "learning_rate": 4.357783279309537e-06, "loss": 0.033, "step": 50490 }, { "epoch": 0.21069255868681727, "grad_norm": 0.9883571410439884, "learning_rate": 4.357567516488043e-06, "loss": 0.0272, "step": 50495 }, { "epoch": 0.21071342140180754, "grad_norm": 1.0394923204727249, "learning_rate": 4.357351785711962e-06, "loss": 0.0319, "step": 50500 }, { "epoch": 0.21073428411679782, "grad_norm": 0.7995444976185601, "learning_rate": 4.357136086973359e-06, "loss": 0.0309, "step": 50505 }, { "epoch": 0.21075514683178811, "grad_norm": 0.9679723248847859, "learning_rate": 4.356920420264307e-06, "loss": 0.0313, "step": 50510 }, { "epoch": 0.2107760095467784, "grad_norm": 0.9649672111095989, "learning_rate": 4.3567047855768794e-06, "loss": 0.0347, "step": 50515 }, { "epoch": 0.21079687226176866, "grad_norm": 1.3412882529485926, "learning_rate": 4.3564891829031545e-06, "loss": 0.0394, "step": 50520 }, { "epoch": 0.21081773497675893, "grad_norm": 1.1624677494645483, "learning_rate": 4.35627361223521e-06, "loss": 0.0355, "step": 50525 }, { "epoch": 0.2108385976917492, "grad_norm": 0.9686320678668616, "learning_rate": 4.356058073565127e-06, "loss": 0.0461, "step": 50530 }, { "epoch": 0.2108594604067395, "grad_norm": 1.2185540835283595, "learning_rate": 4.355842566884992e-06, "loss": 0.0299, "step": 50535 }, { "epoch": 0.21088032312172977, "grad_norm": 0.7185395039379519, "learning_rate": 4.3556270921868935e-06, "loss": 0.0301, "step": 50540 }, { "epoch": 0.21090118583672005, "grad_norm": 1.769824966359221, "learning_rate": 4.35541164946292e-06, "loss": 0.0557, "step": 50545 }, { "epoch": 0.21092204855171032, "grad_norm": 0.8968459522187381, "learning_rate": 4.355196238705164e-06, "loss": 0.0471, "step": 50550 }, { "epoch": 0.21094291126670062, "grad_norm": 0.6298206007395344, "learning_rate": 4.354980859905723e-06, "loss": 0.0273, "step": 50555 }, { "epoch": 0.2109637739816909, "grad_norm": 1.137771950934849, "learning_rate": 4.354765513056696e-06, "loss": 0.0413, "step": 50560 }, { "epoch": 0.21098463669668116, "grad_norm": 0.7436399193444567, "learning_rate": 4.354550198150183e-06, "loss": 0.0261, "step": 50565 }, { "epoch": 0.21100549941167143, "grad_norm": 0.9642543492425909, "learning_rate": 4.354334915178287e-06, "loss": 0.0295, "step": 50570 }, { "epoch": 0.2110263621266617, "grad_norm": 0.9831247002365651, "learning_rate": 4.3541196641331186e-06, "loss": 0.038, "step": 50575 }, { "epoch": 0.211047224841652, "grad_norm": 0.35415271735841725, "learning_rate": 4.353904445006784e-06, "loss": 0.0384, "step": 50580 }, { "epoch": 0.21106808755664228, "grad_norm": 0.7710958609433345, "learning_rate": 4.353689257791396e-06, "loss": 0.0245, "step": 50585 }, { "epoch": 0.21108895027163255, "grad_norm": 0.7930197268029899, "learning_rate": 4.353474102479069e-06, "loss": 0.0335, "step": 50590 }, { "epoch": 0.21110981298662282, "grad_norm": 1.2876407488610861, "learning_rate": 4.353258979061923e-06, "loss": 0.0341, "step": 50595 }, { "epoch": 0.21113067570161312, "grad_norm": 0.8555446275776458, "learning_rate": 4.353043887532077e-06, "loss": 0.0329, "step": 50600 }, { "epoch": 0.2111515384166034, "grad_norm": 0.7772773676246315, "learning_rate": 4.352828827881653e-06, "loss": 0.046, "step": 50605 }, { "epoch": 0.21117240113159366, "grad_norm": 3.4459839159034855, "learning_rate": 4.352613800102779e-06, "loss": 0.0533, "step": 50610 }, { "epoch": 0.21119326384658393, "grad_norm": 0.4766008063761091, "learning_rate": 4.352398804187582e-06, "loss": 0.039, "step": 50615 }, { "epoch": 0.2112141265615742, "grad_norm": 0.5787721634798103, "learning_rate": 4.352183840128193e-06, "loss": 0.0248, "step": 50620 }, { "epoch": 0.2112349892765645, "grad_norm": 0.8982665459951886, "learning_rate": 4.351968907916747e-06, "loss": 0.0298, "step": 50625 }, { "epoch": 0.21125585199155478, "grad_norm": 0.8023121025628553, "learning_rate": 4.351754007545381e-06, "loss": 0.0402, "step": 50630 }, { "epoch": 0.21127671470654505, "grad_norm": 1.899537950409887, "learning_rate": 4.3515391390062345e-06, "loss": 0.0295, "step": 50635 }, { "epoch": 0.21129757742153532, "grad_norm": 0.5804566714467314, "learning_rate": 4.351324302291449e-06, "loss": 0.0373, "step": 50640 }, { "epoch": 0.21131844013652562, "grad_norm": 1.2748877233991815, "learning_rate": 4.351109497393169e-06, "loss": 0.0356, "step": 50645 }, { "epoch": 0.2113393028515159, "grad_norm": 1.201999496615098, "learning_rate": 4.350894724303543e-06, "loss": 0.0357, "step": 50650 }, { "epoch": 0.21136016556650616, "grad_norm": 1.8767453774483236, "learning_rate": 4.35067998301472e-06, "loss": 0.0378, "step": 50655 }, { "epoch": 0.21138102828149644, "grad_norm": 0.7138787087106461, "learning_rate": 4.350465273518855e-06, "loss": 0.0368, "step": 50660 }, { "epoch": 0.2114018909964867, "grad_norm": 0.7640066607464784, "learning_rate": 4.350250595808103e-06, "loss": 0.0351, "step": 50665 }, { "epoch": 0.211422753711477, "grad_norm": 0.9650622484308239, "learning_rate": 4.350035949874622e-06, "loss": 0.038, "step": 50670 }, { "epoch": 0.21144361642646728, "grad_norm": 0.3930044339660212, "learning_rate": 4.349821335710573e-06, "loss": 0.0294, "step": 50675 }, { "epoch": 0.21146447914145755, "grad_norm": 0.4785535107641354, "learning_rate": 4.34960675330812e-06, "loss": 0.0307, "step": 50680 }, { "epoch": 0.21148534185644782, "grad_norm": 1.6537286132414735, "learning_rate": 4.34939220265943e-06, "loss": 0.0322, "step": 50685 }, { "epoch": 0.21150620457143812, "grad_norm": 0.6577431804549677, "learning_rate": 4.349177683756673e-06, "loss": 0.0368, "step": 50690 }, { "epoch": 0.2115270672864284, "grad_norm": 0.7542742970774855, "learning_rate": 4.348963196592019e-06, "loss": 0.0287, "step": 50695 }, { "epoch": 0.21154793000141867, "grad_norm": 0.5076245310523156, "learning_rate": 4.348748741157645e-06, "loss": 0.0287, "step": 50700 }, { "epoch": 0.21156879271640894, "grad_norm": 0.9205168773131914, "learning_rate": 4.3485343174457255e-06, "loss": 0.0353, "step": 50705 }, { "epoch": 0.2115896554313992, "grad_norm": 0.7894079307142413, "learning_rate": 4.348319925448442e-06, "loss": 0.0468, "step": 50710 }, { "epoch": 0.2116105181463895, "grad_norm": 0.717430528049778, "learning_rate": 4.348105565157978e-06, "loss": 0.0374, "step": 50715 }, { "epoch": 0.21163138086137978, "grad_norm": 0.6585324859573736, "learning_rate": 4.347891236566518e-06, "loss": 0.036, "step": 50720 }, { "epoch": 0.21165224357637005, "grad_norm": 1.6387534209371053, "learning_rate": 4.347676939666252e-06, "loss": 0.036, "step": 50725 }, { "epoch": 0.21167310629136032, "grad_norm": 0.7099497222788789, "learning_rate": 4.347462674449369e-06, "loss": 0.0327, "step": 50730 }, { "epoch": 0.21169396900635062, "grad_norm": 1.1913933303539443, "learning_rate": 4.3472484409080625e-06, "loss": 0.0344, "step": 50735 }, { "epoch": 0.2117148317213409, "grad_norm": 1.322328678931078, "learning_rate": 4.347034239034529e-06, "loss": 0.0404, "step": 50740 }, { "epoch": 0.21173569443633117, "grad_norm": 1.2981533579670057, "learning_rate": 4.346820068820967e-06, "loss": 0.0323, "step": 50745 }, { "epoch": 0.21175655715132144, "grad_norm": 0.6507843560840452, "learning_rate": 4.346605930259579e-06, "loss": 0.0306, "step": 50750 }, { "epoch": 0.2117774198663117, "grad_norm": 0.7247639489059184, "learning_rate": 4.346391823342571e-06, "loss": 0.0336, "step": 50755 }, { "epoch": 0.211798282581302, "grad_norm": 1.4086475358842239, "learning_rate": 4.346177748062147e-06, "loss": 0.037, "step": 50760 }, { "epoch": 0.21181914529629228, "grad_norm": 0.6425913980701409, "learning_rate": 4.345963704410517e-06, "loss": 0.0314, "step": 50765 }, { "epoch": 0.21184000801128255, "grad_norm": 1.459944384144982, "learning_rate": 4.345749692379895e-06, "loss": 0.0429, "step": 50770 }, { "epoch": 0.21186087072627283, "grad_norm": 0.797098243405018, "learning_rate": 4.345535711962495e-06, "loss": 0.0336, "step": 50775 }, { "epoch": 0.21188173344126313, "grad_norm": 0.976208200632963, "learning_rate": 4.3453217631505355e-06, "loss": 0.0345, "step": 50780 }, { "epoch": 0.2119025961562534, "grad_norm": 0.7528632153674799, "learning_rate": 4.345107845936236e-06, "loss": 0.0355, "step": 50785 }, { "epoch": 0.21192345887124367, "grad_norm": 1.450928671680769, "learning_rate": 4.34489396031182e-06, "loss": 0.0325, "step": 50790 }, { "epoch": 0.21194432158623394, "grad_norm": 4.828899003587996, "learning_rate": 4.344680106269514e-06, "loss": 0.0552, "step": 50795 }, { "epoch": 0.2119651843012242, "grad_norm": 0.7717837420415007, "learning_rate": 4.344466283801546e-06, "loss": 0.0379, "step": 50800 }, { "epoch": 0.2119860470162145, "grad_norm": 0.9476286874879513, "learning_rate": 4.344252492900145e-06, "loss": 0.0346, "step": 50805 }, { "epoch": 0.21200690973120478, "grad_norm": 0.7590951948278779, "learning_rate": 4.344038733557549e-06, "loss": 0.0455, "step": 50810 }, { "epoch": 0.21202777244619506, "grad_norm": 0.6136534514365272, "learning_rate": 4.343825005765991e-06, "loss": 0.0289, "step": 50815 }, { "epoch": 0.21204863516118533, "grad_norm": 0.8507310078190417, "learning_rate": 4.343611309517712e-06, "loss": 0.0323, "step": 50820 }, { "epoch": 0.21206949787617563, "grad_norm": 0.5668414154993452, "learning_rate": 4.343397644804953e-06, "loss": 0.0298, "step": 50825 }, { "epoch": 0.2120903605911659, "grad_norm": 1.3192752990655894, "learning_rate": 4.343184011619959e-06, "loss": 0.0255, "step": 50830 }, { "epoch": 0.21211122330615617, "grad_norm": 1.5809961718421466, "learning_rate": 4.3429704099549774e-06, "loss": 0.0393, "step": 50835 }, { "epoch": 0.21213208602114644, "grad_norm": 1.147423175635582, "learning_rate": 4.342756839802257e-06, "loss": 0.0312, "step": 50840 }, { "epoch": 0.21215294873613672, "grad_norm": 0.5758230488542978, "learning_rate": 4.342543301154051e-06, "loss": 0.0367, "step": 50845 }, { "epoch": 0.21217381145112701, "grad_norm": 0.8418212536189226, "learning_rate": 4.342329794002616e-06, "loss": 0.0401, "step": 50850 }, { "epoch": 0.2121946741661173, "grad_norm": 0.6718958611171759, "learning_rate": 4.342116318340206e-06, "loss": 0.028, "step": 50855 }, { "epoch": 0.21221553688110756, "grad_norm": 1.3760276943679886, "learning_rate": 4.3419028741590854e-06, "loss": 0.0506, "step": 50860 }, { "epoch": 0.21223639959609783, "grad_norm": 0.7488733868196118, "learning_rate": 4.341689461451515e-06, "loss": 0.0282, "step": 50865 }, { "epoch": 0.21225726231108813, "grad_norm": 0.5829522856257859, "learning_rate": 4.341476080209762e-06, "loss": 0.0337, "step": 50870 }, { "epoch": 0.2122781250260784, "grad_norm": 0.8830946542310693, "learning_rate": 4.341262730426094e-06, "loss": 0.0293, "step": 50875 }, { "epoch": 0.21229898774106867, "grad_norm": 1.4451763165216098, "learning_rate": 4.341049412092783e-06, "loss": 0.0321, "step": 50880 }, { "epoch": 0.21231985045605895, "grad_norm": 1.2884461036934407, "learning_rate": 4.340836125202103e-06, "loss": 0.0274, "step": 50885 }, { "epoch": 0.21234071317104922, "grad_norm": 0.9309121169746561, "learning_rate": 4.340622869746329e-06, "loss": 0.0359, "step": 50890 }, { "epoch": 0.21236157588603952, "grad_norm": 0.6662547602436867, "learning_rate": 4.340409645717741e-06, "loss": 0.0334, "step": 50895 }, { "epoch": 0.2123824386010298, "grad_norm": 0.5347141737965614, "learning_rate": 4.340196453108621e-06, "loss": 0.0262, "step": 50900 }, { "epoch": 0.21240330131602006, "grad_norm": 0.7456325206050639, "learning_rate": 4.339983291911253e-06, "loss": 0.0356, "step": 50905 }, { "epoch": 0.21242416403101033, "grad_norm": 0.6641343694600603, "learning_rate": 4.339770162117925e-06, "loss": 0.0304, "step": 50910 }, { "epoch": 0.21244502674600063, "grad_norm": 0.6425030668931383, "learning_rate": 4.339557063720925e-06, "loss": 0.0288, "step": 50915 }, { "epoch": 0.2124658894609909, "grad_norm": 0.9713849431937419, "learning_rate": 4.339343996712547e-06, "loss": 0.0369, "step": 50920 }, { "epoch": 0.21248675217598118, "grad_norm": 1.0123227155809829, "learning_rate": 4.339130961085086e-06, "loss": 0.0416, "step": 50925 }, { "epoch": 0.21250761489097145, "grad_norm": 0.48570677424628733, "learning_rate": 4.338917956830837e-06, "loss": 0.0383, "step": 50930 }, { "epoch": 0.21252847760596172, "grad_norm": 1.5963046685410307, "learning_rate": 4.338704983942104e-06, "loss": 0.0386, "step": 50935 }, { "epoch": 0.21254934032095202, "grad_norm": 0.7102465186557755, "learning_rate": 4.33849204241119e-06, "loss": 0.0267, "step": 50940 }, { "epoch": 0.2125702030359423, "grad_norm": 0.7664867794400968, "learning_rate": 4.338279132230398e-06, "loss": 0.0349, "step": 50945 }, { "epoch": 0.21259106575093256, "grad_norm": 1.051724633816881, "learning_rate": 4.338066253392037e-06, "loss": 0.0409, "step": 50950 }, { "epoch": 0.21261192846592283, "grad_norm": 0.6239690759719321, "learning_rate": 4.337853405888417e-06, "loss": 0.0312, "step": 50955 }, { "epoch": 0.2126327911809131, "grad_norm": 0.6868818899496727, "learning_rate": 4.337640589711855e-06, "loss": 0.03, "step": 50960 }, { "epoch": 0.2126536538959034, "grad_norm": 0.8136231888220664, "learning_rate": 4.3374278048546645e-06, "loss": 0.0374, "step": 50965 }, { "epoch": 0.21267451661089368, "grad_norm": 0.6285516831937629, "learning_rate": 4.337215051309165e-06, "loss": 0.0307, "step": 50970 }, { "epoch": 0.21269537932588395, "grad_norm": 0.7770162132151414, "learning_rate": 4.337002329067677e-06, "loss": 0.0263, "step": 50975 }, { "epoch": 0.21271624204087422, "grad_norm": 1.279675008583582, "learning_rate": 4.3367896381225255e-06, "loss": 0.0343, "step": 50980 }, { "epoch": 0.21273710475586452, "grad_norm": 0.7840062066075479, "learning_rate": 4.336576978466037e-06, "loss": 0.0379, "step": 50985 }, { "epoch": 0.2127579674708548, "grad_norm": 0.8669811510012654, "learning_rate": 4.336364350090541e-06, "loss": 0.0365, "step": 50990 }, { "epoch": 0.21277883018584506, "grad_norm": 1.0758340392341275, "learning_rate": 4.336151752988369e-06, "loss": 0.0362, "step": 50995 }, { "epoch": 0.21279969290083534, "grad_norm": 0.7352908696315007, "learning_rate": 4.335939187151857e-06, "loss": 0.0329, "step": 51000 }, { "epoch": 0.2128205556158256, "grad_norm": 1.151643899214283, "learning_rate": 4.335726652573342e-06, "loss": 0.0384, "step": 51005 }, { "epoch": 0.2128414183308159, "grad_norm": 0.8623577369874545, "learning_rate": 4.335514149245163e-06, "loss": 0.0347, "step": 51010 }, { "epoch": 0.21286228104580618, "grad_norm": 0.7089304910066562, "learning_rate": 4.335301677159662e-06, "loss": 0.0363, "step": 51015 }, { "epoch": 0.21288314376079645, "grad_norm": 0.49448673865814985, "learning_rate": 4.335089236309185e-06, "loss": 0.028, "step": 51020 }, { "epoch": 0.21290400647578672, "grad_norm": 0.8596123559616107, "learning_rate": 4.334876826686082e-06, "loss": 0.0443, "step": 51025 }, { "epoch": 0.21292486919077702, "grad_norm": 0.6017541985967778, "learning_rate": 4.334664448282698e-06, "loss": 0.0247, "step": 51030 }, { "epoch": 0.2129457319057673, "grad_norm": 0.8640615388445282, "learning_rate": 4.33445210109139e-06, "loss": 0.0319, "step": 51035 }, { "epoch": 0.21296659462075757, "grad_norm": 1.189484894372266, "learning_rate": 4.3342397851045134e-06, "loss": 0.0263, "step": 51040 }, { "epoch": 0.21298745733574784, "grad_norm": 1.0617090809571252, "learning_rate": 4.334027500314425e-06, "loss": 0.0416, "step": 51045 }, { "epoch": 0.2130083200507381, "grad_norm": 1.2831231680191586, "learning_rate": 4.333815246713487e-06, "loss": 0.0385, "step": 51050 }, { "epoch": 0.2130291827657284, "grad_norm": 0.9855592979514478, "learning_rate": 4.333603024294063e-06, "loss": 0.0351, "step": 51055 }, { "epoch": 0.21305004548071868, "grad_norm": 1.0214545339787477, "learning_rate": 4.333390833048517e-06, "loss": 0.0291, "step": 51060 }, { "epoch": 0.21307090819570895, "grad_norm": 0.72138307052357, "learning_rate": 4.333178672969221e-06, "loss": 0.0303, "step": 51065 }, { "epoch": 0.21309177091069922, "grad_norm": 0.6392806684244896, "learning_rate": 4.332966544048544e-06, "loss": 0.0343, "step": 51070 }, { "epoch": 0.21311263362568952, "grad_norm": 0.9226651922149423, "learning_rate": 4.33275444627886e-06, "loss": 0.0245, "step": 51075 }, { "epoch": 0.2131334963406798, "grad_norm": 1.6768969482829135, "learning_rate": 4.332542379652546e-06, "loss": 0.0357, "step": 51080 }, { "epoch": 0.21315435905567007, "grad_norm": 0.6730069431281296, "learning_rate": 4.332330344161982e-06, "loss": 0.0264, "step": 51085 }, { "epoch": 0.21317522177066034, "grad_norm": 1.0345655612203921, "learning_rate": 4.33211833979955e-06, "loss": 0.0286, "step": 51090 }, { "epoch": 0.2131960844856506, "grad_norm": 0.7824340487719612, "learning_rate": 4.331906366557632e-06, "loss": 0.0307, "step": 51095 }, { "epoch": 0.2132169472006409, "grad_norm": 0.8343269009532206, "learning_rate": 4.331694424428617e-06, "loss": 0.031, "step": 51100 }, { "epoch": 0.21323780991563118, "grad_norm": 0.5408451653730132, "learning_rate": 4.331482513404894e-06, "loss": 0.0317, "step": 51105 }, { "epoch": 0.21325867263062145, "grad_norm": 1.0767560522529547, "learning_rate": 4.331270633478855e-06, "loss": 0.0412, "step": 51110 }, { "epoch": 0.21327953534561173, "grad_norm": 1.3631036858540482, "learning_rate": 4.331058784642896e-06, "loss": 0.0349, "step": 51115 }, { "epoch": 0.21330039806060203, "grad_norm": 3.0188417323876173, "learning_rate": 4.330846966889414e-06, "loss": 0.0251, "step": 51120 }, { "epoch": 0.2133212607755923, "grad_norm": 1.051246675175104, "learning_rate": 4.3306351802108085e-06, "loss": 0.0315, "step": 51125 }, { "epoch": 0.21334212349058257, "grad_norm": 0.8426091374643137, "learning_rate": 4.330423424599481e-06, "loss": 0.0322, "step": 51130 }, { "epoch": 0.21336298620557284, "grad_norm": 1.3196199536642124, "learning_rate": 4.33021170004784e-06, "loss": 0.0354, "step": 51135 }, { "epoch": 0.2133838489205631, "grad_norm": 0.7740168879404021, "learning_rate": 4.33000000654829e-06, "loss": 0.0386, "step": 51140 }, { "epoch": 0.2134047116355534, "grad_norm": 0.8404617164381096, "learning_rate": 4.329788344093243e-06, "loss": 0.0361, "step": 51145 }, { "epoch": 0.21342557435054368, "grad_norm": 0.9134311084269818, "learning_rate": 4.329576712675113e-06, "loss": 0.0304, "step": 51150 }, { "epoch": 0.21344643706553396, "grad_norm": 0.6960698914433795, "learning_rate": 4.3293651122863125e-06, "loss": 0.0314, "step": 51155 }, { "epoch": 0.21346729978052423, "grad_norm": 0.8200404965764075, "learning_rate": 4.329153542919264e-06, "loss": 0.0228, "step": 51160 }, { "epoch": 0.21348816249551453, "grad_norm": 0.7411884033593311, "learning_rate": 4.328942004566384e-06, "loss": 0.0323, "step": 51165 }, { "epoch": 0.2135090252105048, "grad_norm": 0.909338466970786, "learning_rate": 4.3287304972200995e-06, "loss": 0.0314, "step": 51170 }, { "epoch": 0.21352988792549507, "grad_norm": 1.1386380873430564, "learning_rate": 4.3285190208728345e-06, "loss": 0.0319, "step": 51175 }, { "epoch": 0.21355075064048534, "grad_norm": 0.9193906875625829, "learning_rate": 4.328307575517018e-06, "loss": 0.0353, "step": 51180 }, { "epoch": 0.21357161335547561, "grad_norm": 0.9064876987529422, "learning_rate": 4.3280961611450816e-06, "loss": 0.0382, "step": 51185 }, { "epoch": 0.21359247607046591, "grad_norm": 0.5422645120106593, "learning_rate": 4.327884777749458e-06, "loss": 0.0251, "step": 51190 }, { "epoch": 0.21361333878545619, "grad_norm": 0.8831107182521821, "learning_rate": 4.327673425322585e-06, "loss": 0.0521, "step": 51195 }, { "epoch": 0.21363420150044646, "grad_norm": 0.8471012392918599, "learning_rate": 4.3274621038569e-06, "loss": 0.0307, "step": 51200 }, { "epoch": 0.21365506421543673, "grad_norm": 0.9597260186685219, "learning_rate": 4.327250813344846e-06, "loss": 0.0334, "step": 51205 }, { "epoch": 0.21367592693042703, "grad_norm": 0.8986406138739365, "learning_rate": 4.3270395537788655e-06, "loss": 0.0388, "step": 51210 }, { "epoch": 0.2136967896454173, "grad_norm": 0.6634658302981854, "learning_rate": 4.326828325151407e-06, "loss": 0.0293, "step": 51215 }, { "epoch": 0.21371765236040757, "grad_norm": 0.9097131760128205, "learning_rate": 4.326617127454919e-06, "loss": 0.0326, "step": 51220 }, { "epoch": 0.21373851507539784, "grad_norm": 0.6472561779731225, "learning_rate": 4.3264059606818524e-06, "loss": 0.0267, "step": 51225 }, { "epoch": 0.21375937779038812, "grad_norm": 1.1378322027036043, "learning_rate": 4.326194824824663e-06, "loss": 0.0482, "step": 51230 }, { "epoch": 0.21378024050537842, "grad_norm": 1.1042374875320735, "learning_rate": 4.325983719875806e-06, "loss": 0.0226, "step": 51235 }, { "epoch": 0.2138011032203687, "grad_norm": 0.810672362951588, "learning_rate": 4.325772645827743e-06, "loss": 0.043, "step": 51240 }, { "epoch": 0.21382196593535896, "grad_norm": 1.06147596723209, "learning_rate": 4.325561602672936e-06, "loss": 0.0333, "step": 51245 }, { "epoch": 0.21384282865034923, "grad_norm": 0.8150451015543869, "learning_rate": 4.32535059040385e-06, "loss": 0.034, "step": 51250 }, { "epoch": 0.21386369136533953, "grad_norm": 0.9438091673757202, "learning_rate": 4.32513960901295e-06, "loss": 0.0372, "step": 51255 }, { "epoch": 0.2138845540803298, "grad_norm": 0.4877015139126881, "learning_rate": 4.324928658492707e-06, "loss": 0.0311, "step": 51260 }, { "epoch": 0.21390541679532007, "grad_norm": 0.7810427645988463, "learning_rate": 4.324717738835595e-06, "loss": 0.0356, "step": 51265 }, { "epoch": 0.21392627951031035, "grad_norm": 0.7284575774753469, "learning_rate": 4.324506850034086e-06, "loss": 0.0339, "step": 51270 }, { "epoch": 0.21394714222530062, "grad_norm": 0.7518887667350089, "learning_rate": 4.32429599208066e-06, "loss": 0.0339, "step": 51275 }, { "epoch": 0.21396800494029092, "grad_norm": 1.050484338641913, "learning_rate": 4.324085164967798e-06, "loss": 0.0325, "step": 51280 }, { "epoch": 0.2139888676552812, "grad_norm": 0.7334927497707152, "learning_rate": 4.323874368687979e-06, "loss": 0.0259, "step": 51285 }, { "epoch": 0.21400973037027146, "grad_norm": 1.0743775948809369, "learning_rate": 4.323663603233691e-06, "loss": 0.035, "step": 51290 }, { "epoch": 0.21403059308526173, "grad_norm": 0.6383112076884836, "learning_rate": 4.3234528685974216e-06, "loss": 0.0289, "step": 51295 }, { "epoch": 0.21405145580025203, "grad_norm": 0.5968490318729835, "learning_rate": 4.323242164771661e-06, "loss": 0.038, "step": 51300 }, { "epoch": 0.2140723185152423, "grad_norm": 1.0262662378195426, "learning_rate": 4.323031491748902e-06, "loss": 0.0399, "step": 51305 }, { "epoch": 0.21409318123023258, "grad_norm": 0.786136918541521, "learning_rate": 4.32282084952164e-06, "loss": 0.0296, "step": 51310 }, { "epoch": 0.21411404394522285, "grad_norm": 0.9695993817913371, "learning_rate": 4.322610238082374e-06, "loss": 0.0434, "step": 51315 }, { "epoch": 0.21413490666021312, "grad_norm": 0.7918582304020075, "learning_rate": 4.322399657423603e-06, "loss": 0.0315, "step": 51320 }, { "epoch": 0.21415576937520342, "grad_norm": 1.0077285281877362, "learning_rate": 4.322189107537832e-06, "loss": 0.0387, "step": 51325 }, { "epoch": 0.2141766320901937, "grad_norm": 1.762885331812824, "learning_rate": 4.321978588417566e-06, "loss": 0.0511, "step": 51330 }, { "epoch": 0.21419749480518396, "grad_norm": 0.4754607113974241, "learning_rate": 4.321768100055313e-06, "loss": 0.0247, "step": 51335 }, { "epoch": 0.21421835752017424, "grad_norm": 0.800141467409355, "learning_rate": 4.321557642443583e-06, "loss": 0.0345, "step": 51340 }, { "epoch": 0.21423922023516453, "grad_norm": 0.6220874017479793, "learning_rate": 4.321347215574892e-06, "loss": 0.0354, "step": 51345 }, { "epoch": 0.2142600829501548, "grad_norm": 0.6626625222039689, "learning_rate": 4.321136819441754e-06, "loss": 0.0365, "step": 51350 }, { "epoch": 0.21428094566514508, "grad_norm": 0.783468951951899, "learning_rate": 4.320926454036688e-06, "loss": 0.04, "step": 51355 }, { "epoch": 0.21430180838013535, "grad_norm": 1.1219314586079072, "learning_rate": 4.3207161193522155e-06, "loss": 0.0545, "step": 51360 }, { "epoch": 0.21432267109512562, "grad_norm": 0.5701750000243515, "learning_rate": 4.320505815380859e-06, "loss": 0.0353, "step": 51365 }, { "epoch": 0.21434353381011592, "grad_norm": 0.9178810960125654, "learning_rate": 4.320295542115145e-06, "loss": 0.0317, "step": 51370 }, { "epoch": 0.2143643965251062, "grad_norm": 1.2569209321399992, "learning_rate": 4.320085299547603e-06, "loss": 0.0773, "step": 51375 }, { "epoch": 0.21438525924009647, "grad_norm": 0.9084117717475221, "learning_rate": 4.319875087670764e-06, "loss": 0.0396, "step": 51380 }, { "epoch": 0.21440612195508674, "grad_norm": 1.0017078122031071, "learning_rate": 4.319664906477162e-06, "loss": 0.0351, "step": 51385 }, { "epoch": 0.21442698467007704, "grad_norm": 0.6996052418752312, "learning_rate": 4.319454755959334e-06, "loss": 0.0315, "step": 51390 }, { "epoch": 0.2144478473850673, "grad_norm": 1.4341958420258338, "learning_rate": 4.319244636109816e-06, "loss": 0.0365, "step": 51395 }, { "epoch": 0.21446871010005758, "grad_norm": 1.216698292601978, "learning_rate": 4.319034546921152e-06, "loss": 0.0282, "step": 51400 }, { "epoch": 0.21448957281504785, "grad_norm": 0.7264023777156315, "learning_rate": 4.3188244883858844e-06, "loss": 0.0344, "step": 51405 }, { "epoch": 0.21451043553003812, "grad_norm": 0.6750992509372995, "learning_rate": 4.318614460496561e-06, "loss": 0.0323, "step": 51410 }, { "epoch": 0.21453129824502842, "grad_norm": 1.0743351442251106, "learning_rate": 4.318404463245731e-06, "loss": 0.0364, "step": 51415 }, { "epoch": 0.2145521609600187, "grad_norm": 1.0291073433651807, "learning_rate": 4.318194496625946e-06, "loss": 0.041, "step": 51420 }, { "epoch": 0.21457302367500897, "grad_norm": 0.6954117845101914, "learning_rate": 4.317984560629758e-06, "loss": 0.0289, "step": 51425 }, { "epoch": 0.21459388638999924, "grad_norm": 0.7158903965205474, "learning_rate": 4.317774655249726e-06, "loss": 0.0359, "step": 51430 }, { "epoch": 0.21461474910498954, "grad_norm": 0.8290472908346228, "learning_rate": 4.317564780478407e-06, "loss": 0.0358, "step": 51435 }, { "epoch": 0.2146356118199798, "grad_norm": 0.9795370787673563, "learning_rate": 4.317354936308367e-06, "loss": 0.0353, "step": 51440 }, { "epoch": 0.21465647453497008, "grad_norm": 0.8338826075734176, "learning_rate": 4.317145122732165e-06, "loss": 0.0352, "step": 51445 }, { "epoch": 0.21467733724996035, "grad_norm": 1.3548586524285189, "learning_rate": 4.31693533974237e-06, "loss": 0.0403, "step": 51450 }, { "epoch": 0.21469819996495063, "grad_norm": 1.1115227605060842, "learning_rate": 4.316725587331552e-06, "loss": 0.0415, "step": 51455 }, { "epoch": 0.21471906267994093, "grad_norm": 1.2299392937368485, "learning_rate": 4.316515865492282e-06, "loss": 0.0433, "step": 51460 }, { "epoch": 0.2147399253949312, "grad_norm": 1.2062109553468674, "learning_rate": 4.316306174217134e-06, "loss": 0.0425, "step": 51465 }, { "epoch": 0.21476078810992147, "grad_norm": 1.0345768517572067, "learning_rate": 4.316096513498686e-06, "loss": 0.036, "step": 51470 }, { "epoch": 0.21478165082491174, "grad_norm": 0.7199380897291561, "learning_rate": 4.3158868833295166e-06, "loss": 0.0256, "step": 51475 }, { "epoch": 0.21480251353990204, "grad_norm": 1.091752840847239, "learning_rate": 4.315677283702208e-06, "loss": 0.0448, "step": 51480 }, { "epoch": 0.2148233762548923, "grad_norm": 0.39464114382128423, "learning_rate": 4.3154677146093445e-06, "loss": 0.0359, "step": 51485 }, { "epoch": 0.21484423896988258, "grad_norm": 0.8602462742761388, "learning_rate": 4.315258176043513e-06, "loss": 0.0344, "step": 51490 }, { "epoch": 0.21486510168487286, "grad_norm": 1.3332683682717383, "learning_rate": 4.315048667997304e-06, "loss": 0.0401, "step": 51495 }, { "epoch": 0.21488596439986313, "grad_norm": 1.1539290334638506, "learning_rate": 4.314839190463307e-06, "loss": 0.0377, "step": 51500 }, { "epoch": 0.21490682711485343, "grad_norm": 0.6310718638262721, "learning_rate": 4.31462974343412e-06, "loss": 0.0324, "step": 51505 }, { "epoch": 0.2149276898298437, "grad_norm": 0.8924151448013306, "learning_rate": 4.3144203269023366e-06, "loss": 0.0325, "step": 51510 }, { "epoch": 0.21494855254483397, "grad_norm": 1.2740894503425562, "learning_rate": 4.314210940860559e-06, "loss": 0.0302, "step": 51515 }, { "epoch": 0.21496941525982424, "grad_norm": 1.2173397502820529, "learning_rate": 4.314001585301387e-06, "loss": 0.041, "step": 51520 }, { "epoch": 0.21499027797481454, "grad_norm": 0.54010472764558, "learning_rate": 4.313792260217427e-06, "loss": 0.033, "step": 51525 }, { "epoch": 0.2150111406898048, "grad_norm": 0.44314473673259996, "learning_rate": 4.313582965601285e-06, "loss": 0.0307, "step": 51530 }, { "epoch": 0.21503200340479509, "grad_norm": 1.1768554820870787, "learning_rate": 4.313373701445573e-06, "loss": 0.0383, "step": 51535 }, { "epoch": 0.21505286611978536, "grad_norm": 0.7533427446312673, "learning_rate": 4.313164467742899e-06, "loss": 0.0323, "step": 51540 }, { "epoch": 0.21507372883477563, "grad_norm": 0.9365706558315149, "learning_rate": 4.3129552644858805e-06, "loss": 0.0407, "step": 51545 }, { "epoch": 0.21509459154976593, "grad_norm": 0.5333760885524239, "learning_rate": 4.312746091667132e-06, "loss": 0.0283, "step": 51550 }, { "epoch": 0.2151154542647562, "grad_norm": 1.9340720031489147, "learning_rate": 4.312536949279278e-06, "loss": 0.0372, "step": 51555 }, { "epoch": 0.21513631697974647, "grad_norm": 0.9522085448229625, "learning_rate": 4.3123278373149364e-06, "loss": 0.0302, "step": 51560 }, { "epoch": 0.21515717969473674, "grad_norm": 0.5074543874742419, "learning_rate": 4.3121187557667335e-06, "loss": 0.0324, "step": 51565 }, { "epoch": 0.21517804240972704, "grad_norm": 0.9956783387501508, "learning_rate": 4.3119097046272955e-06, "loss": 0.045, "step": 51570 }, { "epoch": 0.21519890512471732, "grad_norm": 0.6608506483859449, "learning_rate": 4.311700683889251e-06, "loss": 0.0264, "step": 51575 }, { "epoch": 0.2152197678397076, "grad_norm": 0.8560578362311014, "learning_rate": 4.311491693545236e-06, "loss": 0.0335, "step": 51580 }, { "epoch": 0.21524063055469786, "grad_norm": 0.6372045955577783, "learning_rate": 4.311282733587882e-06, "loss": 0.0324, "step": 51585 }, { "epoch": 0.21526149326968813, "grad_norm": 0.8892918021088747, "learning_rate": 4.311073804009827e-06, "loss": 0.0464, "step": 51590 }, { "epoch": 0.21528235598467843, "grad_norm": 0.5313542134818267, "learning_rate": 4.310864904803712e-06, "loss": 0.0215, "step": 51595 }, { "epoch": 0.2153032186996687, "grad_norm": 1.2922610777505588, "learning_rate": 4.310656035962176e-06, "loss": 0.0304, "step": 51600 }, { "epoch": 0.21532408141465897, "grad_norm": 0.9197780975530152, "learning_rate": 4.310447197477867e-06, "loss": 0.0448, "step": 51605 }, { "epoch": 0.21534494412964925, "grad_norm": 1.0444630600824207, "learning_rate": 4.310238389343429e-06, "loss": 0.04, "step": 51610 }, { "epoch": 0.21536580684463955, "grad_norm": 0.7521872939515724, "learning_rate": 4.3100296115515135e-06, "loss": 0.0323, "step": 51615 }, { "epoch": 0.21538666955962982, "grad_norm": 0.6826636612192001, "learning_rate": 4.309820864094773e-06, "loss": 0.0416, "step": 51620 }, { "epoch": 0.2154075322746201, "grad_norm": 0.9267370711180619, "learning_rate": 4.309612146965861e-06, "loss": 0.0333, "step": 51625 }, { "epoch": 0.21542839498961036, "grad_norm": 0.6997161431860106, "learning_rate": 4.309403460157434e-06, "loss": 0.0306, "step": 51630 }, { "epoch": 0.21544925770460063, "grad_norm": 0.715930156766889, "learning_rate": 4.309194803662152e-06, "loss": 0.0359, "step": 51635 }, { "epoch": 0.21547012041959093, "grad_norm": 0.5698260920819194, "learning_rate": 4.30898617747268e-06, "loss": 0.0347, "step": 51640 }, { "epoch": 0.2154909831345812, "grad_norm": 0.5557784818673579, "learning_rate": 4.308777581581679e-06, "loss": 0.0355, "step": 51645 }, { "epoch": 0.21551184584957148, "grad_norm": 0.7234511905638441, "learning_rate": 4.308569015981817e-06, "loss": 0.0225, "step": 51650 }, { "epoch": 0.21553270856456175, "grad_norm": 1.133837372460424, "learning_rate": 4.3083604806657636e-06, "loss": 0.0362, "step": 51655 }, { "epoch": 0.21555357127955205, "grad_norm": 0.7342486340805766, "learning_rate": 4.308151975626192e-06, "loss": 0.0342, "step": 51660 }, { "epoch": 0.21557443399454232, "grad_norm": 1.0786160271986827, "learning_rate": 4.307943500855774e-06, "loss": 0.0427, "step": 51665 }, { "epoch": 0.2155952967095326, "grad_norm": 1.0198003048782553, "learning_rate": 4.307735056347189e-06, "loss": 0.037, "step": 51670 }, { "epoch": 0.21561615942452286, "grad_norm": 0.7828120937337646, "learning_rate": 4.307526642093116e-06, "loss": 0.0316, "step": 51675 }, { "epoch": 0.21563702213951313, "grad_norm": 0.8465848338785934, "learning_rate": 4.307318258086236e-06, "loss": 0.0355, "step": 51680 }, { "epoch": 0.21565788485450343, "grad_norm": 0.7225895024949234, "learning_rate": 4.307109904319234e-06, "loss": 0.0456, "step": 51685 }, { "epoch": 0.2156787475694937, "grad_norm": 1.1023951853137564, "learning_rate": 4.306901580784797e-06, "loss": 0.0379, "step": 51690 }, { "epoch": 0.21569961028448398, "grad_norm": 0.3446772384669198, "learning_rate": 4.306693287475615e-06, "loss": 0.0343, "step": 51695 }, { "epoch": 0.21572047299947425, "grad_norm": 0.49714367160365713, "learning_rate": 4.306485024384378e-06, "loss": 0.0383, "step": 51700 }, { "epoch": 0.21574133571446455, "grad_norm": 0.7645595806365146, "learning_rate": 4.30627679150378e-06, "loss": 0.0271, "step": 51705 }, { "epoch": 0.21576219842945482, "grad_norm": 1.0129736021071887, "learning_rate": 4.306068588826522e-06, "loss": 0.0288, "step": 51710 }, { "epoch": 0.2157830611444451, "grad_norm": 1.1200739194756004, "learning_rate": 4.305860416345299e-06, "loss": 0.0333, "step": 51715 }, { "epoch": 0.21580392385943536, "grad_norm": 0.7739941363627975, "learning_rate": 4.305652274052815e-06, "loss": 0.0329, "step": 51720 }, { "epoch": 0.21582478657442564, "grad_norm": 1.213527628906835, "learning_rate": 4.305444161941772e-06, "loss": 0.0323, "step": 51725 }, { "epoch": 0.21584564928941594, "grad_norm": 0.9146131956933753, "learning_rate": 4.305236080004878e-06, "loss": 0.0371, "step": 51730 }, { "epoch": 0.2158665120044062, "grad_norm": 0.8944043924155016, "learning_rate": 4.305028028234843e-06, "loss": 0.0419, "step": 51735 }, { "epoch": 0.21588737471939648, "grad_norm": 0.6665852993862815, "learning_rate": 4.304820006624377e-06, "loss": 0.0384, "step": 51740 }, { "epoch": 0.21590823743438675, "grad_norm": 0.7150658358126366, "learning_rate": 4.304612015166195e-06, "loss": 0.0316, "step": 51745 }, { "epoch": 0.21592910014937705, "grad_norm": 1.5583664147214635, "learning_rate": 4.304404053853014e-06, "loss": 0.0374, "step": 51750 }, { "epoch": 0.21594996286436732, "grad_norm": 1.371685111197167, "learning_rate": 4.304196122677552e-06, "loss": 0.0392, "step": 51755 }, { "epoch": 0.2159708255793576, "grad_norm": 0.902124879952976, "learning_rate": 4.3039882216325305e-06, "loss": 0.0326, "step": 51760 }, { "epoch": 0.21599168829434787, "grad_norm": 1.0805827044412335, "learning_rate": 4.303780350710673e-06, "loss": 0.0274, "step": 51765 }, { "epoch": 0.21601255100933814, "grad_norm": 0.9144069539920017, "learning_rate": 4.303572509904707e-06, "loss": 0.0265, "step": 51770 }, { "epoch": 0.21603341372432844, "grad_norm": 0.7998924259139145, "learning_rate": 4.3033646992073606e-06, "loss": 0.0253, "step": 51775 }, { "epoch": 0.2160542764393187, "grad_norm": 0.9729992702350095, "learning_rate": 4.303156918611366e-06, "loss": 0.0356, "step": 51780 }, { "epoch": 0.21607513915430898, "grad_norm": 0.7971507744223605, "learning_rate": 4.302949168109456e-06, "loss": 0.0282, "step": 51785 }, { "epoch": 0.21609600186929925, "grad_norm": 0.5288956666275504, "learning_rate": 4.302741447694366e-06, "loss": 0.0284, "step": 51790 }, { "epoch": 0.21611686458428955, "grad_norm": 0.7988658652381955, "learning_rate": 4.3025337573588365e-06, "loss": 0.0339, "step": 51795 }, { "epoch": 0.21613772729927982, "grad_norm": 0.94625318973369, "learning_rate": 4.3023260970956076e-06, "loss": 0.0423, "step": 51800 }, { "epoch": 0.2161585900142701, "grad_norm": 0.8386042457399799, "learning_rate": 4.302118466897423e-06, "loss": 0.0375, "step": 51805 }, { "epoch": 0.21617945272926037, "grad_norm": 1.0647754547003805, "learning_rate": 4.301910866757028e-06, "loss": 0.0325, "step": 51810 }, { "epoch": 0.21620031544425064, "grad_norm": 0.9896914472159482, "learning_rate": 4.301703296667173e-06, "loss": 0.0383, "step": 51815 }, { "epoch": 0.21622117815924094, "grad_norm": 0.9054172746634773, "learning_rate": 4.301495756620608e-06, "loss": 0.0358, "step": 51820 }, { "epoch": 0.2162420408742312, "grad_norm": 1.4003486747908869, "learning_rate": 4.3012882466100845e-06, "loss": 0.0269, "step": 51825 }, { "epoch": 0.21626290358922148, "grad_norm": 0.5854319759781287, "learning_rate": 4.301080766628361e-06, "loss": 0.0347, "step": 51830 }, { "epoch": 0.21628376630421176, "grad_norm": 1.892437865478986, "learning_rate": 4.300873316668195e-06, "loss": 0.0496, "step": 51835 }, { "epoch": 0.21630462901920205, "grad_norm": 0.9120049755542745, "learning_rate": 4.3006658967223464e-06, "loss": 0.0332, "step": 51840 }, { "epoch": 0.21632549173419233, "grad_norm": 0.862454878082057, "learning_rate": 4.300458506783579e-06, "loss": 0.0382, "step": 51845 }, { "epoch": 0.2163463544491826, "grad_norm": 0.9777105666027539, "learning_rate": 4.300251146844658e-06, "loss": 0.0385, "step": 51850 }, { "epoch": 0.21636721716417287, "grad_norm": 0.8090947387739974, "learning_rate": 4.30004381689835e-06, "loss": 0.0381, "step": 51855 }, { "epoch": 0.21638807987916314, "grad_norm": 0.6058028812849265, "learning_rate": 4.299836516937428e-06, "loss": 0.0337, "step": 51860 }, { "epoch": 0.21640894259415344, "grad_norm": 1.0030719646180202, "learning_rate": 4.2996292469546645e-06, "loss": 0.033, "step": 51865 }, { "epoch": 0.2164298053091437, "grad_norm": 0.9030518478426085, "learning_rate": 4.2994220069428335e-06, "loss": 0.0286, "step": 51870 }, { "epoch": 0.21645066802413399, "grad_norm": 0.7156328188258757, "learning_rate": 4.299214796894714e-06, "loss": 0.0237, "step": 51875 }, { "epoch": 0.21647153073912426, "grad_norm": 1.6064855123259612, "learning_rate": 4.299007616803085e-06, "loss": 0.0472, "step": 51880 }, { "epoch": 0.21649239345411456, "grad_norm": 0.765509798953302, "learning_rate": 4.2988004666607315e-06, "loss": 0.0385, "step": 51885 }, { "epoch": 0.21651325616910483, "grad_norm": 0.9539144183742481, "learning_rate": 4.298593346460435e-06, "loss": 0.0295, "step": 51890 }, { "epoch": 0.2165341188840951, "grad_norm": 0.6115042053847073, "learning_rate": 4.2983862561949865e-06, "loss": 0.0282, "step": 51895 }, { "epoch": 0.21655498159908537, "grad_norm": 1.1761574806077872, "learning_rate": 4.2981791958571734e-06, "loss": 0.0394, "step": 51900 }, { "epoch": 0.21657584431407564, "grad_norm": 1.0621183525293323, "learning_rate": 4.2979721654397896e-06, "loss": 0.0346, "step": 51905 }, { "epoch": 0.21659670702906594, "grad_norm": 1.3251900289044432, "learning_rate": 4.297765164935629e-06, "loss": 0.0483, "step": 51910 }, { "epoch": 0.21661756974405622, "grad_norm": 0.5023599555633448, "learning_rate": 4.29755819433749e-06, "loss": 0.0252, "step": 51915 }, { "epoch": 0.2166384324590465, "grad_norm": 0.7295306636906482, "learning_rate": 4.297351253638171e-06, "loss": 0.0295, "step": 51920 }, { "epoch": 0.21665929517403676, "grad_norm": 0.8899973482376606, "learning_rate": 4.2971443428304735e-06, "loss": 0.0465, "step": 51925 }, { "epoch": 0.21668015788902706, "grad_norm": 1.1687994166314628, "learning_rate": 4.296937461907205e-06, "loss": 0.0326, "step": 51930 }, { "epoch": 0.21670102060401733, "grad_norm": 1.4567321265967628, "learning_rate": 4.29673061086117e-06, "loss": 0.0383, "step": 51935 }, { "epoch": 0.2167218833190076, "grad_norm": 1.0092783254973, "learning_rate": 4.296523789685178e-06, "loss": 0.032, "step": 51940 }, { "epoch": 0.21674274603399787, "grad_norm": 0.7199435927948828, "learning_rate": 4.296316998372041e-06, "loss": 0.0311, "step": 51945 }, { "epoch": 0.21676360874898815, "grad_norm": 0.7064945859393236, "learning_rate": 4.296110236914573e-06, "loss": 0.0338, "step": 51950 }, { "epoch": 0.21678447146397845, "grad_norm": 1.040542562679127, "learning_rate": 4.295903505305592e-06, "loss": 0.0377, "step": 51955 }, { "epoch": 0.21680533417896872, "grad_norm": 1.348661399486571, "learning_rate": 4.295696803537916e-06, "loss": 0.0432, "step": 51960 }, { "epoch": 0.216826196893959, "grad_norm": 0.9579992676703556, "learning_rate": 4.295490131604366e-06, "loss": 0.0256, "step": 51965 }, { "epoch": 0.21684705960894926, "grad_norm": 1.1782765854203407, "learning_rate": 4.2952834894977664e-06, "loss": 0.0411, "step": 51970 }, { "epoch": 0.21686792232393956, "grad_norm": 0.9719988460491409, "learning_rate": 4.295076877210944e-06, "loss": 0.0352, "step": 51975 }, { "epoch": 0.21688878503892983, "grad_norm": 0.6153368976768414, "learning_rate": 4.2948702947367274e-06, "loss": 0.0302, "step": 51980 }, { "epoch": 0.2169096477539201, "grad_norm": 0.8583984307419855, "learning_rate": 4.294663742067947e-06, "loss": 0.0357, "step": 51985 }, { "epoch": 0.21693051046891038, "grad_norm": 0.5483217125085768, "learning_rate": 4.294457219197437e-06, "loss": 0.0223, "step": 51990 }, { "epoch": 0.21695137318390065, "grad_norm": 1.1984208057181749, "learning_rate": 4.294250726118032e-06, "loss": 0.037, "step": 51995 }, { "epoch": 0.21697223589889095, "grad_norm": 0.7235800799215646, "learning_rate": 4.294044262822573e-06, "loss": 0.0287, "step": 52000 }, { "epoch": 0.21699309861388122, "grad_norm": 1.0269262097212501, "learning_rate": 4.293837829303897e-06, "loss": 0.0337, "step": 52005 }, { "epoch": 0.2170139613288715, "grad_norm": 1.0515774308468349, "learning_rate": 4.293631425554851e-06, "loss": 0.0286, "step": 52010 }, { "epoch": 0.21703482404386176, "grad_norm": 1.0328444591309653, "learning_rate": 4.293425051568279e-06, "loss": 0.0393, "step": 52015 }, { "epoch": 0.21705568675885206, "grad_norm": 0.8572095566976332, "learning_rate": 4.2932187073370285e-06, "loss": 0.0418, "step": 52020 }, { "epoch": 0.21707654947384233, "grad_norm": 1.0429847116985793, "learning_rate": 4.293012392853951e-06, "loss": 0.0428, "step": 52025 }, { "epoch": 0.2170974121888326, "grad_norm": 0.8822468668980437, "learning_rate": 4.2928061081118985e-06, "loss": 0.0359, "step": 52030 }, { "epoch": 0.21711827490382288, "grad_norm": 0.6657372092442794, "learning_rate": 4.2925998531037255e-06, "loss": 0.0429, "step": 52035 }, { "epoch": 0.21713913761881315, "grad_norm": 0.7544489494688946, "learning_rate": 4.292393627822292e-06, "loss": 0.0339, "step": 52040 }, { "epoch": 0.21716000033380345, "grad_norm": 0.9805556282649359, "learning_rate": 4.292187432260455e-06, "loss": 0.0346, "step": 52045 }, { "epoch": 0.21718086304879372, "grad_norm": 0.7034146472485179, "learning_rate": 4.29198126641108e-06, "loss": 0.0327, "step": 52050 }, { "epoch": 0.217201725763784, "grad_norm": 0.5719001831954205, "learning_rate": 4.291775130267029e-06, "loss": 0.0268, "step": 52055 }, { "epoch": 0.21722258847877426, "grad_norm": 0.7536028221527956, "learning_rate": 4.291569023821172e-06, "loss": 0.0391, "step": 52060 }, { "epoch": 0.21724345119376456, "grad_norm": 1.1204307837391763, "learning_rate": 4.291362947066376e-06, "loss": 0.0339, "step": 52065 }, { "epoch": 0.21726431390875484, "grad_norm": 0.5101912469095113, "learning_rate": 4.2911568999955146e-06, "loss": 0.0303, "step": 52070 }, { "epoch": 0.2172851766237451, "grad_norm": 0.9370958742237128, "learning_rate": 4.2909508826014616e-06, "loss": 0.0371, "step": 52075 }, { "epoch": 0.21730603933873538, "grad_norm": 0.8455795351645989, "learning_rate": 4.290744894877094e-06, "loss": 0.0354, "step": 52080 }, { "epoch": 0.21732690205372565, "grad_norm": 0.6728355815502868, "learning_rate": 4.290538936815291e-06, "loss": 0.0313, "step": 52085 }, { "epoch": 0.21734776476871595, "grad_norm": 0.6111479306200778, "learning_rate": 4.2903330084089345e-06, "loss": 0.0299, "step": 52090 }, { "epoch": 0.21736862748370622, "grad_norm": 0.981078243711205, "learning_rate": 4.290127109650907e-06, "loss": 0.0354, "step": 52095 }, { "epoch": 0.2173894901986965, "grad_norm": 1.1733195515836314, "learning_rate": 4.289921240534097e-06, "loss": 0.0439, "step": 52100 }, { "epoch": 0.21741035291368677, "grad_norm": 1.3602779486380887, "learning_rate": 4.289715401051392e-06, "loss": 0.0423, "step": 52105 }, { "epoch": 0.21743121562867707, "grad_norm": 0.7425051954170746, "learning_rate": 4.289509591195683e-06, "loss": 0.0362, "step": 52110 }, { "epoch": 0.21745207834366734, "grad_norm": 0.9403363227084424, "learning_rate": 4.289303810959865e-06, "loss": 0.0332, "step": 52115 }, { "epoch": 0.2174729410586576, "grad_norm": 0.6349029941811857, "learning_rate": 4.2890980603368314e-06, "loss": 0.0244, "step": 52120 }, { "epoch": 0.21749380377364788, "grad_norm": 0.6041615535896903, "learning_rate": 4.288892339319483e-06, "loss": 0.0314, "step": 52125 }, { "epoch": 0.21751466648863815, "grad_norm": 0.9388870001283652, "learning_rate": 4.28868664790072e-06, "loss": 0.0357, "step": 52130 }, { "epoch": 0.21753552920362845, "grad_norm": 0.39816541154838986, "learning_rate": 4.2884809860734445e-06, "loss": 0.026, "step": 52135 }, { "epoch": 0.21755639191861872, "grad_norm": 1.1899353578429055, "learning_rate": 4.288275353830561e-06, "loss": 0.035, "step": 52140 }, { "epoch": 0.217577254633609, "grad_norm": 0.6797655073943465, "learning_rate": 4.2880697511649806e-06, "loss": 0.0356, "step": 52145 }, { "epoch": 0.21759811734859927, "grad_norm": 0.6440024729208405, "learning_rate": 4.28786417806961e-06, "loss": 0.026, "step": 52150 }, { "epoch": 0.21761898006358957, "grad_norm": 1.7311343507500017, "learning_rate": 4.287658634537364e-06, "loss": 0.0358, "step": 52155 }, { "epoch": 0.21763984277857984, "grad_norm": 0.6391114515878303, "learning_rate": 4.287453120561158e-06, "loss": 0.0352, "step": 52160 }, { "epoch": 0.2176607054935701, "grad_norm": 0.7939730955240872, "learning_rate": 4.287247636133908e-06, "loss": 0.0376, "step": 52165 }, { "epoch": 0.21768156820856038, "grad_norm": 0.8734966958544481, "learning_rate": 4.287042181248532e-06, "loss": 0.0382, "step": 52170 }, { "epoch": 0.21770243092355065, "grad_norm": 0.9469250834696739, "learning_rate": 4.286836755897956e-06, "loss": 0.0673, "step": 52175 }, { "epoch": 0.21772329363854095, "grad_norm": 0.7203183591162607, "learning_rate": 4.286631360075104e-06, "loss": 0.0296, "step": 52180 }, { "epoch": 0.21774415635353123, "grad_norm": 0.6741603872128058, "learning_rate": 4.286425993772899e-06, "loss": 0.0339, "step": 52185 }, { "epoch": 0.2177650190685215, "grad_norm": 0.9238400097358535, "learning_rate": 4.286220656984274e-06, "loss": 0.0264, "step": 52190 }, { "epoch": 0.21778588178351177, "grad_norm": 1.3352097974410118, "learning_rate": 4.286015349702159e-06, "loss": 0.0328, "step": 52195 }, { "epoch": 0.21780674449850207, "grad_norm": 0.9804962100288467, "learning_rate": 4.285810071919488e-06, "loss": 0.0431, "step": 52200 }, { "epoch": 0.21782760721349234, "grad_norm": 0.7118675330107861, "learning_rate": 4.285604823629198e-06, "loss": 0.0394, "step": 52205 }, { "epoch": 0.2178484699284826, "grad_norm": 0.8340564704007492, "learning_rate": 4.285399604824227e-06, "loss": 0.0337, "step": 52210 }, { "epoch": 0.21786933264347288, "grad_norm": 0.6495494002172086, "learning_rate": 4.285194415497517e-06, "loss": 0.0352, "step": 52215 }, { "epoch": 0.21789019535846316, "grad_norm": 1.4446547273749049, "learning_rate": 4.28498925564201e-06, "loss": 0.0282, "step": 52220 }, { "epoch": 0.21791105807345346, "grad_norm": 0.8417596351327592, "learning_rate": 4.284784125250653e-06, "loss": 0.0404, "step": 52225 }, { "epoch": 0.21793192078844373, "grad_norm": 0.97938385291811, "learning_rate": 4.284579024316394e-06, "loss": 0.0332, "step": 52230 }, { "epoch": 0.217952783503434, "grad_norm": 0.7251937593805897, "learning_rate": 4.284373952832182e-06, "loss": 0.0388, "step": 52235 }, { "epoch": 0.21797364621842427, "grad_norm": 0.9418512649838573, "learning_rate": 4.284168910790973e-06, "loss": 0.0336, "step": 52240 }, { "epoch": 0.21799450893341457, "grad_norm": 1.3288114105512592, "learning_rate": 4.28396389818572e-06, "loss": 0.0281, "step": 52245 }, { "epoch": 0.21801537164840484, "grad_norm": 0.8169628050758962, "learning_rate": 4.28375891500938e-06, "loss": 0.0355, "step": 52250 }, { "epoch": 0.21803623436339511, "grad_norm": 1.0067585367294298, "learning_rate": 4.283553961254914e-06, "loss": 0.0484, "step": 52255 }, { "epoch": 0.2180570970783854, "grad_norm": 1.2336245247037105, "learning_rate": 4.283349036915284e-06, "loss": 0.041, "step": 52260 }, { "epoch": 0.21807795979337566, "grad_norm": 0.7312466516408224, "learning_rate": 4.2831441419834565e-06, "loss": 0.032, "step": 52265 }, { "epoch": 0.21809882250836596, "grad_norm": 0.7371268047347296, "learning_rate": 4.282939276452396e-06, "loss": 0.0232, "step": 52270 }, { "epoch": 0.21811968522335623, "grad_norm": 0.7264900531686839, "learning_rate": 4.282734440315073e-06, "loss": 0.0397, "step": 52275 }, { "epoch": 0.2181405479383465, "grad_norm": 1.0956897073753238, "learning_rate": 4.282529633564458e-06, "loss": 0.0338, "step": 52280 }, { "epoch": 0.21816141065333677, "grad_norm": 0.767792650111386, "learning_rate": 4.282324856193528e-06, "loss": 0.0338, "step": 52285 }, { "epoch": 0.21818227336832707, "grad_norm": 0.9105545755921275, "learning_rate": 4.282120108195258e-06, "loss": 0.0306, "step": 52290 }, { "epoch": 0.21820313608331735, "grad_norm": 0.8190869735795142, "learning_rate": 4.281915389562623e-06, "loss": 0.0371, "step": 52295 }, { "epoch": 0.21822399879830762, "grad_norm": 1.0960500796164165, "learning_rate": 4.281710700288611e-06, "loss": 0.0421, "step": 52300 }, { "epoch": 0.2182448615132979, "grad_norm": 0.9279406921221448, "learning_rate": 4.2815060403662015e-06, "loss": 0.0282, "step": 52305 }, { "epoch": 0.21826572422828816, "grad_norm": 0.5771932756166321, "learning_rate": 4.28130140978838e-06, "loss": 0.0375, "step": 52310 }, { "epoch": 0.21828658694327846, "grad_norm": 0.8620114600350913, "learning_rate": 4.281096808548136e-06, "loss": 0.0391, "step": 52315 }, { "epoch": 0.21830744965826873, "grad_norm": 0.8004242689990717, "learning_rate": 4.280892236638459e-06, "loss": 0.0343, "step": 52320 }, { "epoch": 0.218328312373259, "grad_norm": 0.7370390186565093, "learning_rate": 4.280687694052342e-06, "loss": 0.0332, "step": 52325 }, { "epoch": 0.21834917508824928, "grad_norm": 0.5348911521107006, "learning_rate": 4.280483180782782e-06, "loss": 0.0343, "step": 52330 }, { "epoch": 0.21837003780323958, "grad_norm": 0.875366862753627, "learning_rate": 4.280278696822776e-06, "loss": 0.0381, "step": 52335 }, { "epoch": 0.21839090051822985, "grad_norm": 1.3736802358136657, "learning_rate": 4.280074242165322e-06, "loss": 0.0334, "step": 52340 }, { "epoch": 0.21841176323322012, "grad_norm": 0.6835742326885557, "learning_rate": 4.279869816803423e-06, "loss": 0.0258, "step": 52345 }, { "epoch": 0.2184326259482104, "grad_norm": 1.0063717583274416, "learning_rate": 4.279665420730084e-06, "loss": 0.0374, "step": 52350 }, { "epoch": 0.21845348866320066, "grad_norm": 1.1844620610044667, "learning_rate": 4.279461053938312e-06, "loss": 0.0429, "step": 52355 }, { "epoch": 0.21847435137819096, "grad_norm": 0.8080344406178244, "learning_rate": 4.279256716421117e-06, "loss": 0.028, "step": 52360 }, { "epoch": 0.21849521409318123, "grad_norm": 1.2892802911847447, "learning_rate": 4.279052408171509e-06, "loss": 0.033, "step": 52365 }, { "epoch": 0.2185160768081715, "grad_norm": 1.114865210949483, "learning_rate": 4.2788481291825026e-06, "loss": 0.0334, "step": 52370 }, { "epoch": 0.21853693952316178, "grad_norm": 0.6401048721161636, "learning_rate": 4.278643879447114e-06, "loss": 0.0498, "step": 52375 }, { "epoch": 0.21855780223815208, "grad_norm": 0.9035001490516739, "learning_rate": 4.278439658958364e-06, "loss": 0.0413, "step": 52380 }, { "epoch": 0.21857866495314235, "grad_norm": 1.1460131046690336, "learning_rate": 4.278235467709269e-06, "loss": 0.0321, "step": 52385 }, { "epoch": 0.21859952766813262, "grad_norm": 1.5059858222658977, "learning_rate": 4.278031305692856e-06, "loss": 0.0504, "step": 52390 }, { "epoch": 0.2186203903831229, "grad_norm": 0.9899698041662744, "learning_rate": 4.277827172902149e-06, "loss": 0.0283, "step": 52395 }, { "epoch": 0.21864125309811316, "grad_norm": 0.6230469829043658, "learning_rate": 4.277623069330176e-06, "loss": 0.0357, "step": 52400 }, { "epoch": 0.21866211581310346, "grad_norm": 0.6245451909756662, "learning_rate": 4.277418994969967e-06, "loss": 0.0266, "step": 52405 }, { "epoch": 0.21868297852809374, "grad_norm": 0.8936631756612622, "learning_rate": 4.277214949814556e-06, "loss": 0.0388, "step": 52410 }, { "epoch": 0.218703841243084, "grad_norm": 0.4216113394845304, "learning_rate": 4.277010933856977e-06, "loss": 0.0319, "step": 52415 }, { "epoch": 0.21872470395807428, "grad_norm": 0.6615199325062449, "learning_rate": 4.276806947090267e-06, "loss": 0.0337, "step": 52420 }, { "epoch": 0.21874556667306458, "grad_norm": 0.5877612923097241, "learning_rate": 4.2766029895074655e-06, "loss": 0.0355, "step": 52425 }, { "epoch": 0.21876642938805485, "grad_norm": 0.7103351140045628, "learning_rate": 4.276399061101614e-06, "loss": 0.0315, "step": 52430 }, { "epoch": 0.21878729210304512, "grad_norm": 0.8432088546061978, "learning_rate": 4.276195161865758e-06, "loss": 0.0293, "step": 52435 }, { "epoch": 0.2188081548180354, "grad_norm": 1.148188431896708, "learning_rate": 4.275991291792943e-06, "loss": 0.0302, "step": 52440 }, { "epoch": 0.21882901753302567, "grad_norm": 1.286885361853975, "learning_rate": 4.275787450876219e-06, "loss": 0.0378, "step": 52445 }, { "epoch": 0.21884988024801597, "grad_norm": 0.836483324715212, "learning_rate": 4.275583639108635e-06, "loss": 0.0326, "step": 52450 }, { "epoch": 0.21887074296300624, "grad_norm": 0.7045136077204988, "learning_rate": 4.275379856483248e-06, "loss": 0.0313, "step": 52455 }, { "epoch": 0.2188916056779965, "grad_norm": 0.9688208379058277, "learning_rate": 4.275176102993109e-06, "loss": 0.0377, "step": 52460 }, { "epoch": 0.21891246839298678, "grad_norm": 0.9666486559165196, "learning_rate": 4.274972378631281e-06, "loss": 0.0347, "step": 52465 }, { "epoch": 0.21893333110797708, "grad_norm": 0.6078019872163654, "learning_rate": 4.27476868339082e-06, "loss": 0.0315, "step": 52470 }, { "epoch": 0.21895419382296735, "grad_norm": 0.7855294542975194, "learning_rate": 4.274565017264792e-06, "loss": 0.0302, "step": 52475 }, { "epoch": 0.21897505653795762, "grad_norm": 0.7911371282876148, "learning_rate": 4.274361380246261e-06, "loss": 0.0409, "step": 52480 }, { "epoch": 0.2189959192529479, "grad_norm": 1.0083403825445514, "learning_rate": 4.274157772328294e-06, "loss": 0.032, "step": 52485 }, { "epoch": 0.21901678196793817, "grad_norm": 0.7046794363517043, "learning_rate": 4.273954193503961e-06, "loss": 0.0354, "step": 52490 }, { "epoch": 0.21903764468292847, "grad_norm": 0.9678128017985342, "learning_rate": 4.273750643766334e-06, "loss": 0.0224, "step": 52495 }, { "epoch": 0.21905850739791874, "grad_norm": 1.1142387742961242, "learning_rate": 4.273547123108487e-06, "loss": 0.0319, "step": 52500 }, { "epoch": 0.219079370112909, "grad_norm": 0.627847450027935, "learning_rate": 4.2733436315234975e-06, "loss": 0.0313, "step": 52505 }, { "epoch": 0.21910023282789928, "grad_norm": 0.9852566942380279, "learning_rate": 4.273140169004444e-06, "loss": 0.0388, "step": 52510 }, { "epoch": 0.21912109554288958, "grad_norm": 0.7348696358832676, "learning_rate": 4.272936735544406e-06, "loss": 0.026, "step": 52515 }, { "epoch": 0.21914195825787985, "grad_norm": 0.6231377388744243, "learning_rate": 4.27273333113647e-06, "loss": 0.0277, "step": 52520 }, { "epoch": 0.21916282097287013, "grad_norm": 0.8030576001615273, "learning_rate": 4.272529955773719e-06, "loss": 0.0385, "step": 52525 }, { "epoch": 0.2191836836878604, "grad_norm": 1.2405962275889948, "learning_rate": 4.272326609449243e-06, "loss": 0.0277, "step": 52530 }, { "epoch": 0.21920454640285067, "grad_norm": 0.743902818940592, "learning_rate": 4.272123292156133e-06, "loss": 0.0318, "step": 52535 }, { "epoch": 0.21922540911784097, "grad_norm": 2.080417413249893, "learning_rate": 4.271920003887479e-06, "loss": 0.038, "step": 52540 }, { "epoch": 0.21924627183283124, "grad_norm": 1.0220516243271154, "learning_rate": 4.271716744636379e-06, "loss": 0.0477, "step": 52545 }, { "epoch": 0.2192671345478215, "grad_norm": 0.8902536517415621, "learning_rate": 4.271513514395929e-06, "loss": 0.0327, "step": 52550 }, { "epoch": 0.21928799726281178, "grad_norm": 0.9635183120993893, "learning_rate": 4.2713103131592275e-06, "loss": 0.0352, "step": 52555 }, { "epoch": 0.21930885997780208, "grad_norm": 0.7397825649357616, "learning_rate": 4.271107140919379e-06, "loss": 0.0338, "step": 52560 }, { "epoch": 0.21932972269279236, "grad_norm": 0.7266875743830412, "learning_rate": 4.270903997669485e-06, "loss": 0.0474, "step": 52565 }, { "epoch": 0.21935058540778263, "grad_norm": 0.9113002883153573, "learning_rate": 4.270700883402653e-06, "loss": 0.0418, "step": 52570 }, { "epoch": 0.2193714481227729, "grad_norm": 1.1952162285076628, "learning_rate": 4.270497798111994e-06, "loss": 0.04, "step": 52575 }, { "epoch": 0.21939231083776317, "grad_norm": 0.8082100674681791, "learning_rate": 4.270294741790616e-06, "loss": 0.0396, "step": 52580 }, { "epoch": 0.21941317355275347, "grad_norm": 0.8285606829312395, "learning_rate": 4.270091714431635e-06, "loss": 0.0364, "step": 52585 }, { "epoch": 0.21943403626774374, "grad_norm": 0.43516037208660147, "learning_rate": 4.2698887160281635e-06, "loss": 0.028, "step": 52590 }, { "epoch": 0.21945489898273401, "grad_norm": 0.9205815697893122, "learning_rate": 4.2696857465733224e-06, "loss": 0.0318, "step": 52595 }, { "epoch": 0.2194757616977243, "grad_norm": 0.7448915346884268, "learning_rate": 4.269482806060231e-06, "loss": 0.034, "step": 52600 }, { "epoch": 0.21949662441271459, "grad_norm": 0.7003012943545676, "learning_rate": 4.269279894482011e-06, "loss": 0.035, "step": 52605 }, { "epoch": 0.21951748712770486, "grad_norm": 1.2630605115862852, "learning_rate": 4.26907701183179e-06, "loss": 0.0365, "step": 52610 }, { "epoch": 0.21953834984269513, "grad_norm": 1.062708131575517, "learning_rate": 4.268874158102691e-06, "loss": 0.0351, "step": 52615 }, { "epoch": 0.2195592125576854, "grad_norm": 0.500282986464059, "learning_rate": 4.268671333287847e-06, "loss": 0.0227, "step": 52620 }, { "epoch": 0.21958007527267567, "grad_norm": 0.7106551646625185, "learning_rate": 4.268468537380388e-06, "loss": 0.0334, "step": 52625 }, { "epoch": 0.21960093798766597, "grad_norm": 0.9139505615145999, "learning_rate": 4.268265770373448e-06, "loss": 0.0326, "step": 52630 }, { "epoch": 0.21962180070265624, "grad_norm": 4.458497867902504, "learning_rate": 4.268063032260164e-06, "loss": 0.0419, "step": 52635 }, { "epoch": 0.21964266341764652, "grad_norm": 0.9740426811254492, "learning_rate": 4.267860323033674e-06, "loss": 0.0452, "step": 52640 }, { "epoch": 0.2196635261326368, "grad_norm": 0.6367614861942198, "learning_rate": 4.267657642687119e-06, "loss": 0.0389, "step": 52645 }, { "epoch": 0.2196843888476271, "grad_norm": 1.053104523367812, "learning_rate": 4.267454991213643e-06, "loss": 0.0369, "step": 52650 }, { "epoch": 0.21970525156261736, "grad_norm": 0.7112129671932396, "learning_rate": 4.267252368606389e-06, "loss": 0.0383, "step": 52655 }, { "epoch": 0.21972611427760763, "grad_norm": 1.2050341110200287, "learning_rate": 4.267049774858507e-06, "loss": 0.0368, "step": 52660 }, { "epoch": 0.2197469769925979, "grad_norm": 1.0751113303962119, "learning_rate": 4.266847209963147e-06, "loss": 0.0479, "step": 52665 }, { "epoch": 0.21976783970758818, "grad_norm": 0.5598102531200001, "learning_rate": 4.2666446739134595e-06, "loss": 0.0258, "step": 52670 }, { "epoch": 0.21978870242257847, "grad_norm": 0.9472035965137, "learning_rate": 4.2664421667026005e-06, "loss": 0.0341, "step": 52675 }, { "epoch": 0.21980956513756875, "grad_norm": 0.7566080046934989, "learning_rate": 4.2662396883237266e-06, "loss": 0.0333, "step": 52680 }, { "epoch": 0.21983042785255902, "grad_norm": 0.5648804374620813, "learning_rate": 4.2660372387699945e-06, "loss": 0.0463, "step": 52685 }, { "epoch": 0.2198512905675493, "grad_norm": 0.9541207130161864, "learning_rate": 4.265834818034569e-06, "loss": 0.0325, "step": 52690 }, { "epoch": 0.2198721532825396, "grad_norm": 0.8039948283812157, "learning_rate": 4.265632426110612e-06, "loss": 0.0528, "step": 52695 }, { "epoch": 0.21989301599752986, "grad_norm": 0.8353378488613623, "learning_rate": 4.265430062991289e-06, "loss": 0.0287, "step": 52700 }, { "epoch": 0.21991387871252013, "grad_norm": 1.07976064990311, "learning_rate": 4.26522772866977e-06, "loss": 0.0325, "step": 52705 }, { "epoch": 0.2199347414275104, "grad_norm": 1.3657050120496748, "learning_rate": 4.265025423139223e-06, "loss": 0.0351, "step": 52710 }, { "epoch": 0.21995560414250068, "grad_norm": 1.0504885458244488, "learning_rate": 4.264823146392822e-06, "loss": 0.0314, "step": 52715 }, { "epoch": 0.21997646685749098, "grad_norm": 0.7563643137097135, "learning_rate": 4.264620898423742e-06, "loss": 0.0428, "step": 52720 }, { "epoch": 0.21999732957248125, "grad_norm": 0.7720561326406853, "learning_rate": 4.26441867922516e-06, "loss": 0.0283, "step": 52725 }, { "epoch": 0.22001819228747152, "grad_norm": 0.9486897141636357, "learning_rate": 4.264216488790255e-06, "loss": 0.0287, "step": 52730 }, { "epoch": 0.2200390550024618, "grad_norm": 1.4272095063553571, "learning_rate": 4.264014327112208e-06, "loss": 0.0358, "step": 52735 }, { "epoch": 0.2200599177174521, "grad_norm": 0.5438805709383145, "learning_rate": 4.263812194184206e-06, "loss": 0.0343, "step": 52740 }, { "epoch": 0.22008078043244236, "grad_norm": 0.7851811077989165, "learning_rate": 4.2636100899994334e-06, "loss": 0.0316, "step": 52745 }, { "epoch": 0.22010164314743264, "grad_norm": 1.1477868878387938, "learning_rate": 4.263408014551078e-06, "loss": 0.032, "step": 52750 }, { "epoch": 0.2201225058624229, "grad_norm": 0.8597105845158847, "learning_rate": 4.263205967832332e-06, "loss": 0.0242, "step": 52755 }, { "epoch": 0.22014336857741318, "grad_norm": 0.5997820758482234, "learning_rate": 4.263003949836387e-06, "loss": 0.0502, "step": 52760 }, { "epoch": 0.22016423129240348, "grad_norm": 0.573964242997734, "learning_rate": 4.262801960556438e-06, "loss": 0.0262, "step": 52765 }, { "epoch": 0.22018509400739375, "grad_norm": 1.0917931839871373, "learning_rate": 4.262599999985685e-06, "loss": 0.049, "step": 52770 }, { "epoch": 0.22020595672238402, "grad_norm": 1.0015457142297681, "learning_rate": 4.262398068117325e-06, "loss": 0.0364, "step": 52775 }, { "epoch": 0.2202268194373743, "grad_norm": 1.5719676196200558, "learning_rate": 4.262196164944563e-06, "loss": 0.0362, "step": 52780 }, { "epoch": 0.2202476821523646, "grad_norm": 0.6565061414336661, "learning_rate": 4.2619942904606015e-06, "loss": 0.0274, "step": 52785 }, { "epoch": 0.22026854486735487, "grad_norm": 0.4317501261555755, "learning_rate": 4.261792444658647e-06, "loss": 0.0272, "step": 52790 }, { "epoch": 0.22028940758234514, "grad_norm": 1.2433744671986469, "learning_rate": 4.261590627531909e-06, "loss": 0.0462, "step": 52795 }, { "epoch": 0.2203102702973354, "grad_norm": 1.2382616147921455, "learning_rate": 4.261388839073597e-06, "loss": 0.0373, "step": 52800 }, { "epoch": 0.22033113301232568, "grad_norm": 0.9543705818862755, "learning_rate": 4.261187079276927e-06, "loss": 0.038, "step": 52805 }, { "epoch": 0.22035199572731598, "grad_norm": 1.7901216177700467, "learning_rate": 4.260985348135113e-06, "loss": 0.0336, "step": 52810 }, { "epoch": 0.22037285844230625, "grad_norm": 0.9038677376753065, "learning_rate": 4.2607836456413714e-06, "loss": 0.0344, "step": 52815 }, { "epoch": 0.22039372115729652, "grad_norm": 0.8392319305706354, "learning_rate": 4.260581971788924e-06, "loss": 0.0247, "step": 52820 }, { "epoch": 0.2204145838722868, "grad_norm": 0.9945499250918752, "learning_rate": 4.260380326570993e-06, "loss": 0.0361, "step": 52825 }, { "epoch": 0.2204354465872771, "grad_norm": 0.7625428913432105, "learning_rate": 4.260178709980805e-06, "loss": 0.026, "step": 52830 }, { "epoch": 0.22045630930226737, "grad_norm": 0.7509985430299714, "learning_rate": 4.259977122011583e-06, "loss": 0.0249, "step": 52835 }, { "epoch": 0.22047717201725764, "grad_norm": 2.8884412728648594, "learning_rate": 4.259775562656558e-06, "loss": 0.0346, "step": 52840 }, { "epoch": 0.2204980347322479, "grad_norm": 0.5850195813543276, "learning_rate": 4.25957403190896e-06, "loss": 0.0308, "step": 52845 }, { "epoch": 0.22051889744723818, "grad_norm": 2.08831245473974, "learning_rate": 4.259372529762024e-06, "loss": 0.038, "step": 52850 }, { "epoch": 0.22053976016222848, "grad_norm": 1.101864693017542, "learning_rate": 4.259171056208986e-06, "loss": 0.0365, "step": 52855 }, { "epoch": 0.22056062287721875, "grad_norm": 1.2741781243708188, "learning_rate": 4.2589696112430826e-06, "loss": 0.05, "step": 52860 }, { "epoch": 0.22058148559220903, "grad_norm": 0.8619045611106494, "learning_rate": 4.258768194857554e-06, "loss": 0.0296, "step": 52865 }, { "epoch": 0.2206023483071993, "grad_norm": 1.005363313849764, "learning_rate": 4.258566807045645e-06, "loss": 0.0319, "step": 52870 }, { "epoch": 0.2206232110221896, "grad_norm": 0.823361274579477, "learning_rate": 4.258365447800597e-06, "loss": 0.0386, "step": 52875 }, { "epoch": 0.22064407373717987, "grad_norm": 0.6175074028545376, "learning_rate": 4.25816411711566e-06, "loss": 0.0432, "step": 52880 }, { "epoch": 0.22066493645217014, "grad_norm": 0.9502092910777846, "learning_rate": 4.257962814984081e-06, "loss": 0.0413, "step": 52885 }, { "epoch": 0.2206857991671604, "grad_norm": 0.5018104827875662, "learning_rate": 4.257761541399113e-06, "loss": 0.0379, "step": 52890 }, { "epoch": 0.22070666188215068, "grad_norm": 0.7162452493763201, "learning_rate": 4.257560296354008e-06, "loss": 0.0328, "step": 52895 }, { "epoch": 0.22072752459714098, "grad_norm": 1.0313569639491549, "learning_rate": 4.257359079842024e-06, "loss": 0.038, "step": 52900 }, { "epoch": 0.22074838731213126, "grad_norm": 0.673207530785589, "learning_rate": 4.2571578918564176e-06, "loss": 0.0319, "step": 52905 }, { "epoch": 0.22076925002712153, "grad_norm": 0.9908169857493981, "learning_rate": 4.256956732390449e-06, "loss": 0.0375, "step": 52910 }, { "epoch": 0.2207901127421118, "grad_norm": 0.5638461995507502, "learning_rate": 4.256755601437381e-06, "loss": 0.0277, "step": 52915 }, { "epoch": 0.2208109754571021, "grad_norm": 1.2813735203868555, "learning_rate": 4.25655449899048e-06, "loss": 0.0403, "step": 52920 }, { "epoch": 0.22083183817209237, "grad_norm": 0.6507046019022948, "learning_rate": 4.25635342504301e-06, "loss": 0.0439, "step": 52925 }, { "epoch": 0.22085270088708264, "grad_norm": 0.4135690425898258, "learning_rate": 4.256152379588244e-06, "loss": 0.0327, "step": 52930 }, { "epoch": 0.22087356360207291, "grad_norm": 0.6551953694268123, "learning_rate": 4.255951362619451e-06, "loss": 0.026, "step": 52935 }, { "epoch": 0.22089442631706319, "grad_norm": 0.8480486487485311, "learning_rate": 4.255750374129904e-06, "loss": 0.0279, "step": 52940 }, { "epoch": 0.22091528903205349, "grad_norm": 0.7624231818156846, "learning_rate": 4.255549414112882e-06, "loss": 0.0371, "step": 52945 }, { "epoch": 0.22093615174704376, "grad_norm": 1.6987746760939688, "learning_rate": 4.255348482561659e-06, "loss": 0.0502, "step": 52950 }, { "epoch": 0.22095701446203403, "grad_norm": 0.680039113800718, "learning_rate": 4.255147579469519e-06, "loss": 0.0316, "step": 52955 }, { "epoch": 0.2209778771770243, "grad_norm": 0.43756237299262096, "learning_rate": 4.254946704829743e-06, "loss": 0.0289, "step": 52960 }, { "epoch": 0.2209987398920146, "grad_norm": 1.171790903004205, "learning_rate": 4.2547458586356154e-06, "loss": 0.0341, "step": 52965 }, { "epoch": 0.22101960260700487, "grad_norm": 1.0412308226399876, "learning_rate": 4.254545040880425e-06, "loss": 0.0346, "step": 52970 }, { "epoch": 0.22104046532199514, "grad_norm": 1.10783574650525, "learning_rate": 4.254344251557459e-06, "loss": 0.0381, "step": 52975 }, { "epoch": 0.22106132803698542, "grad_norm": 1.0233242234317679, "learning_rate": 4.25414349066001e-06, "loss": 0.0427, "step": 52980 }, { "epoch": 0.2210821907519757, "grad_norm": 0.9512106973359564, "learning_rate": 4.253942758181372e-06, "loss": 0.0254, "step": 52985 }, { "epoch": 0.221103053466966, "grad_norm": 0.879311498750552, "learning_rate": 4.253742054114841e-06, "loss": 0.0326, "step": 52990 }, { "epoch": 0.22112391618195626, "grad_norm": 1.4188835016530719, "learning_rate": 4.253541378453712e-06, "loss": 0.0376, "step": 52995 }, { "epoch": 0.22114477889694653, "grad_norm": 0.7133367302287668, "learning_rate": 4.253340731191291e-06, "loss": 0.0317, "step": 53000 }, { "epoch": 0.2211656416119368, "grad_norm": 1.0321360953736143, "learning_rate": 4.253140112320876e-06, "loss": 0.0439, "step": 53005 }, { "epoch": 0.2211865043269271, "grad_norm": 1.3164870050686956, "learning_rate": 4.2529395218357726e-06, "loss": 0.0395, "step": 53010 }, { "epoch": 0.22120736704191737, "grad_norm": 0.7385540383728806, "learning_rate": 4.252738959729289e-06, "loss": 0.0347, "step": 53015 }, { "epoch": 0.22122822975690765, "grad_norm": 1.0357648122098302, "learning_rate": 4.252538425994733e-06, "loss": 0.0405, "step": 53020 }, { "epoch": 0.22124909247189792, "grad_norm": 0.8039089365431488, "learning_rate": 4.252337920625418e-06, "loss": 0.03, "step": 53025 }, { "epoch": 0.2212699551868882, "grad_norm": 0.8226609264553523, "learning_rate": 4.2521374436146544e-06, "loss": 0.0322, "step": 53030 }, { "epoch": 0.2212908179018785, "grad_norm": 0.6938944665775233, "learning_rate": 4.2519369949557615e-06, "loss": 0.0308, "step": 53035 }, { "epoch": 0.22131168061686876, "grad_norm": 0.6409115734107663, "learning_rate": 4.2517365746420546e-06, "loss": 0.0281, "step": 53040 }, { "epoch": 0.22133254333185903, "grad_norm": 0.7003401095832439, "learning_rate": 4.251536182666855e-06, "loss": 0.0321, "step": 53045 }, { "epoch": 0.2213534060468493, "grad_norm": 0.8075149880341261, "learning_rate": 4.251335819023486e-06, "loss": 0.0304, "step": 53050 }, { "epoch": 0.2213742687618396, "grad_norm": 0.8310259284741106, "learning_rate": 4.25113548370527e-06, "loss": 0.0333, "step": 53055 }, { "epoch": 0.22139513147682988, "grad_norm": 0.7758067764968325, "learning_rate": 4.250935176705535e-06, "loss": 0.0259, "step": 53060 }, { "epoch": 0.22141599419182015, "grad_norm": 0.5951452767086272, "learning_rate": 4.2507348980176115e-06, "loss": 0.028, "step": 53065 }, { "epoch": 0.22143685690681042, "grad_norm": 0.7191808325349103, "learning_rate": 4.250534647634828e-06, "loss": 0.0306, "step": 53070 }, { "epoch": 0.2214577196218007, "grad_norm": 0.844431693074774, "learning_rate": 4.250334425550519e-06, "loss": 0.036, "step": 53075 }, { "epoch": 0.221478582336791, "grad_norm": 0.8019954837269185, "learning_rate": 4.250134231758021e-06, "loss": 0.0301, "step": 53080 }, { "epoch": 0.22149944505178126, "grad_norm": 1.0307880710526593, "learning_rate": 4.249934066250671e-06, "loss": 0.041, "step": 53085 }, { "epoch": 0.22152030776677153, "grad_norm": 0.6136232133462163, "learning_rate": 4.249733929021809e-06, "loss": 0.0357, "step": 53090 }, { "epoch": 0.2215411704817618, "grad_norm": 0.8508141259433134, "learning_rate": 4.249533820064778e-06, "loss": 0.0314, "step": 53095 }, { "epoch": 0.2215620331967521, "grad_norm": 0.5644638613764204, "learning_rate": 4.2493337393729205e-06, "loss": 0.0246, "step": 53100 }, { "epoch": 0.22158289591174238, "grad_norm": 0.7583762730576409, "learning_rate": 4.249133686939585e-06, "loss": 0.0418, "step": 53105 }, { "epoch": 0.22160375862673265, "grad_norm": 2.0569430936357365, "learning_rate": 4.248933662758119e-06, "loss": 0.0377, "step": 53110 }, { "epoch": 0.22162462134172292, "grad_norm": 0.5603579552822339, "learning_rate": 4.248733666821874e-06, "loss": 0.0218, "step": 53115 }, { "epoch": 0.2216454840567132, "grad_norm": 1.5427728159503291, "learning_rate": 4.248533699124204e-06, "loss": 0.0245, "step": 53120 }, { "epoch": 0.2216663467717035, "grad_norm": 0.9036454005501126, "learning_rate": 4.248333759658462e-06, "loss": 0.0356, "step": 53125 }, { "epoch": 0.22168720948669376, "grad_norm": 1.0122142401884109, "learning_rate": 4.248133848418008e-06, "loss": 0.0403, "step": 53130 }, { "epoch": 0.22170807220168404, "grad_norm": 0.8182419272103132, "learning_rate": 4.247933965396201e-06, "loss": 0.0316, "step": 53135 }, { "epoch": 0.2217289349166743, "grad_norm": 1.050239622168119, "learning_rate": 4.247734110586404e-06, "loss": 0.0373, "step": 53140 }, { "epoch": 0.2217497976316646, "grad_norm": 0.7014423083413849, "learning_rate": 4.2475342839819775e-06, "loss": 0.0215, "step": 53145 }, { "epoch": 0.22177066034665488, "grad_norm": 1.0376368918467276, "learning_rate": 4.247334485576291e-06, "loss": 0.028, "step": 53150 }, { "epoch": 0.22179152306164515, "grad_norm": 0.662335533098801, "learning_rate": 4.247134715362712e-06, "loss": 0.0355, "step": 53155 }, { "epoch": 0.22181238577663542, "grad_norm": 1.0237210508571308, "learning_rate": 4.246934973334611e-06, "loss": 0.032, "step": 53160 }, { "epoch": 0.2218332484916257, "grad_norm": 1.1008100857050482, "learning_rate": 4.246735259485361e-06, "loss": 0.0333, "step": 53165 }, { "epoch": 0.221854111206616, "grad_norm": 0.8271168731512003, "learning_rate": 4.246535573808337e-06, "loss": 0.0501, "step": 53170 }, { "epoch": 0.22187497392160627, "grad_norm": 0.6542759165735649, "learning_rate": 4.246335916296917e-06, "loss": 0.0287, "step": 53175 }, { "epoch": 0.22189583663659654, "grad_norm": 0.8800534110079727, "learning_rate": 4.246136286944479e-06, "loss": 0.0317, "step": 53180 }, { "epoch": 0.2219166993515868, "grad_norm": 0.5676201984264619, "learning_rate": 4.245936685744406e-06, "loss": 0.0269, "step": 53185 }, { "epoch": 0.2219375620665771, "grad_norm": 0.4094202911780651, "learning_rate": 4.245737112690079e-06, "loss": 0.035, "step": 53190 }, { "epoch": 0.22195842478156738, "grad_norm": 1.3794986250627925, "learning_rate": 4.245537567774888e-06, "loss": 0.0306, "step": 53195 }, { "epoch": 0.22197928749655765, "grad_norm": 0.8638145299184123, "learning_rate": 4.245338050992217e-06, "loss": 0.0417, "step": 53200 }, { "epoch": 0.22200015021154793, "grad_norm": 1.2565144517093152, "learning_rate": 4.245138562335459e-06, "loss": 0.0349, "step": 53205 }, { "epoch": 0.2220210129265382, "grad_norm": 0.7755380036752987, "learning_rate": 4.244939101798005e-06, "loss": 0.0311, "step": 53210 }, { "epoch": 0.2220418756415285, "grad_norm": 0.9367475480356027, "learning_rate": 4.244739669373251e-06, "loss": 0.0316, "step": 53215 }, { "epoch": 0.22206273835651877, "grad_norm": 0.8277256447853095, "learning_rate": 4.244540265054592e-06, "loss": 0.0373, "step": 53220 }, { "epoch": 0.22208360107150904, "grad_norm": 0.6226323220219111, "learning_rate": 4.244340888835428e-06, "loss": 0.0331, "step": 53225 }, { "epoch": 0.2221044637864993, "grad_norm": 0.5936111452929422, "learning_rate": 4.2441415407091615e-06, "loss": 0.0298, "step": 53230 }, { "epoch": 0.2221253265014896, "grad_norm": 0.9069338344454249, "learning_rate": 4.243942220669193e-06, "loss": 0.0364, "step": 53235 }, { "epoch": 0.22214618921647988, "grad_norm": 0.5276948335232052, "learning_rate": 4.243742928708929e-06, "loss": 0.0296, "step": 53240 }, { "epoch": 0.22216705193147016, "grad_norm": 0.5556268354406403, "learning_rate": 4.243543664821778e-06, "loss": 0.0323, "step": 53245 }, { "epoch": 0.22218791464646043, "grad_norm": 0.7116468981434987, "learning_rate": 4.243344429001148e-06, "loss": 0.0318, "step": 53250 }, { "epoch": 0.2222087773614507, "grad_norm": 0.7309991016543114, "learning_rate": 4.243145221240454e-06, "loss": 0.0253, "step": 53255 }, { "epoch": 0.222229640076441, "grad_norm": 1.2496691838605047, "learning_rate": 4.2429460415331066e-06, "loss": 0.0505, "step": 53260 }, { "epoch": 0.22225050279143127, "grad_norm": 0.8742531953507824, "learning_rate": 4.242746889872525e-06, "loss": 0.0267, "step": 53265 }, { "epoch": 0.22227136550642154, "grad_norm": 1.137379055671762, "learning_rate": 4.242547766252126e-06, "loss": 0.033, "step": 53270 }, { "epoch": 0.22229222822141181, "grad_norm": 0.8307712482421243, "learning_rate": 4.24234867066533e-06, "loss": 0.0365, "step": 53275 }, { "epoch": 0.2223130909364021, "grad_norm": 0.700250571706974, "learning_rate": 4.242149603105561e-06, "loss": 0.0345, "step": 53280 }, { "epoch": 0.22233395365139239, "grad_norm": 0.8418219734605974, "learning_rate": 4.241950563566243e-06, "loss": 0.0422, "step": 53285 }, { "epoch": 0.22235481636638266, "grad_norm": 0.6643484461974815, "learning_rate": 4.2417515520408035e-06, "loss": 0.0329, "step": 53290 }, { "epoch": 0.22237567908137293, "grad_norm": 0.6988337976519576, "learning_rate": 4.241552568522671e-06, "loss": 0.0274, "step": 53295 }, { "epoch": 0.2223965417963632, "grad_norm": 0.8110467196320312, "learning_rate": 4.241353613005278e-06, "loss": 0.0304, "step": 53300 }, { "epoch": 0.2224174045113535, "grad_norm": 1.2742255857780973, "learning_rate": 4.2411546854820575e-06, "loss": 0.0383, "step": 53305 }, { "epoch": 0.22243826722634377, "grad_norm": 1.3347814539213914, "learning_rate": 4.240955785946445e-06, "loss": 0.0452, "step": 53310 }, { "epoch": 0.22245912994133404, "grad_norm": 0.7802642773761775, "learning_rate": 4.240756914391878e-06, "loss": 0.0281, "step": 53315 }, { "epoch": 0.22247999265632432, "grad_norm": 0.8703434249686993, "learning_rate": 4.240558070811799e-06, "loss": 0.039, "step": 53320 }, { "epoch": 0.2225008553713146, "grad_norm": 0.8666969321445478, "learning_rate": 4.2403592551996476e-06, "loss": 0.0345, "step": 53325 }, { "epoch": 0.2225217180863049, "grad_norm": 0.7087451296993451, "learning_rate": 4.2401604675488675e-06, "loss": 0.0273, "step": 53330 }, { "epoch": 0.22254258080129516, "grad_norm": 1.0103145189041547, "learning_rate": 4.239961707852908e-06, "loss": 0.0336, "step": 53335 }, { "epoch": 0.22256344351628543, "grad_norm": 1.1020613398218049, "learning_rate": 4.239762976105217e-06, "loss": 0.0403, "step": 53340 }, { "epoch": 0.2225843062312757, "grad_norm": 1.2944446499385254, "learning_rate": 4.239564272299243e-06, "loss": 0.0396, "step": 53345 }, { "epoch": 0.222605168946266, "grad_norm": 1.012094669505213, "learning_rate": 4.239365596428442e-06, "loss": 0.0333, "step": 53350 }, { "epoch": 0.22262603166125627, "grad_norm": 1.102186508197835, "learning_rate": 4.239166948486266e-06, "loss": 0.0315, "step": 53355 }, { "epoch": 0.22264689437624655, "grad_norm": 1.1379400417101697, "learning_rate": 4.238968328466175e-06, "loss": 0.0319, "step": 53360 }, { "epoch": 0.22266775709123682, "grad_norm": 1.3192937035150423, "learning_rate": 4.238769736361627e-06, "loss": 0.0328, "step": 53365 }, { "epoch": 0.2226886198062271, "grad_norm": 1.1887166884552294, "learning_rate": 4.238571172166083e-06, "loss": 0.0396, "step": 53370 }, { "epoch": 0.2227094825212174, "grad_norm": 0.8931029326718339, "learning_rate": 4.238372635873008e-06, "loss": 0.0255, "step": 53375 }, { "epoch": 0.22273034523620766, "grad_norm": 0.5668315413516384, "learning_rate": 4.238174127475867e-06, "loss": 0.0376, "step": 53380 }, { "epoch": 0.22275120795119793, "grad_norm": 0.6650778504584638, "learning_rate": 4.237975646968128e-06, "loss": 0.0298, "step": 53385 }, { "epoch": 0.2227720706661882, "grad_norm": 1.0883130207135585, "learning_rate": 4.237777194343261e-06, "loss": 0.0396, "step": 53390 }, { "epoch": 0.2227929333811785, "grad_norm": 0.551542627892306, "learning_rate": 4.2375787695947385e-06, "loss": 0.0334, "step": 53395 }, { "epoch": 0.22281379609616878, "grad_norm": 1.02570846097668, "learning_rate": 4.237380372716036e-06, "loss": 0.0299, "step": 53400 }, { "epoch": 0.22283465881115905, "grad_norm": 0.8298307076267075, "learning_rate": 4.237182003700627e-06, "loss": 0.0318, "step": 53405 }, { "epoch": 0.22285552152614932, "grad_norm": 0.8452508923219825, "learning_rate": 4.236983662541993e-06, "loss": 0.0366, "step": 53410 }, { "epoch": 0.2228763842411396, "grad_norm": 0.5590410066388847, "learning_rate": 4.236785349233613e-06, "loss": 0.0269, "step": 53415 }, { "epoch": 0.2228972469561299, "grad_norm": 0.8212044002634641, "learning_rate": 4.236587063768971e-06, "loss": 0.0318, "step": 53420 }, { "epoch": 0.22291810967112016, "grad_norm": 0.6708125864192788, "learning_rate": 4.236388806141551e-06, "loss": 0.037, "step": 53425 }, { "epoch": 0.22293897238611043, "grad_norm": 2.069780428119693, "learning_rate": 4.236190576344842e-06, "loss": 0.0343, "step": 53430 }, { "epoch": 0.2229598351011007, "grad_norm": 0.7748027534855094, "learning_rate": 4.235992374372332e-06, "loss": 0.0289, "step": 53435 }, { "epoch": 0.222980697816091, "grad_norm": 1.1320976593203467, "learning_rate": 4.235794200217512e-06, "loss": 0.0505, "step": 53440 }, { "epoch": 0.22300156053108128, "grad_norm": 0.7187666311654339, "learning_rate": 4.235596053873876e-06, "loss": 0.0391, "step": 53445 }, { "epoch": 0.22302242324607155, "grad_norm": 0.7869436961389618, "learning_rate": 4.23539793533492e-06, "loss": 0.0291, "step": 53450 }, { "epoch": 0.22304328596106182, "grad_norm": 0.6632863775104297, "learning_rate": 4.2351998445941415e-06, "loss": 0.0334, "step": 53455 }, { "epoch": 0.2230641486760521, "grad_norm": 0.7049811270161984, "learning_rate": 4.2350017816450415e-06, "loss": 0.0322, "step": 53460 }, { "epoch": 0.2230850113910424, "grad_norm": 0.6954199794701855, "learning_rate": 4.234803746481122e-06, "loss": 0.0333, "step": 53465 }, { "epoch": 0.22310587410603266, "grad_norm": 1.7889915221487471, "learning_rate": 4.234605739095884e-06, "loss": 0.0283, "step": 53470 }, { "epoch": 0.22312673682102294, "grad_norm": 0.565115620345196, "learning_rate": 4.234407759482838e-06, "loss": 0.0317, "step": 53475 }, { "epoch": 0.2231475995360132, "grad_norm": 0.7199422157682014, "learning_rate": 4.23420980763549e-06, "loss": 0.0304, "step": 53480 }, { "epoch": 0.2231684622510035, "grad_norm": 1.2383920693238561, "learning_rate": 4.234011883547352e-06, "loss": 0.034, "step": 53485 }, { "epoch": 0.22318932496599378, "grad_norm": 0.5533901956292306, "learning_rate": 4.233813987211936e-06, "loss": 0.0372, "step": 53490 }, { "epoch": 0.22321018768098405, "grad_norm": 0.8280062903802837, "learning_rate": 4.233616118622758e-06, "loss": 0.0314, "step": 53495 }, { "epoch": 0.22323105039597432, "grad_norm": 0.6839179539682423, "learning_rate": 4.233418277773331e-06, "loss": 0.0375, "step": 53500 }, { "epoch": 0.2232519131109646, "grad_norm": 0.820293608936038, "learning_rate": 4.23322046465718e-06, "loss": 0.0304, "step": 53505 }, { "epoch": 0.2232727758259549, "grad_norm": 1.7661790056929136, "learning_rate": 4.233022679267822e-06, "loss": 0.0341, "step": 53510 }, { "epoch": 0.22329363854094517, "grad_norm": 0.7787154556134627, "learning_rate": 4.232824921598782e-06, "loss": 0.0397, "step": 53515 }, { "epoch": 0.22331450125593544, "grad_norm": 1.0404846087466955, "learning_rate": 4.232627191643583e-06, "loss": 0.0323, "step": 53520 }, { "epoch": 0.2233353639709257, "grad_norm": 0.7542863695755422, "learning_rate": 4.232429489395757e-06, "loss": 0.0268, "step": 53525 }, { "epoch": 0.223356226685916, "grad_norm": 1.009903338510149, "learning_rate": 4.23223181484883e-06, "loss": 0.0358, "step": 53530 }, { "epoch": 0.22337708940090628, "grad_norm": 1.0117199884305534, "learning_rate": 4.232034167996335e-06, "loss": 0.0307, "step": 53535 }, { "epoch": 0.22339795211589655, "grad_norm": 1.0011669874057239, "learning_rate": 4.2318365488318066e-06, "loss": 0.0389, "step": 53540 }, { "epoch": 0.22341881483088682, "grad_norm": 0.7353132122611525, "learning_rate": 4.231638957348779e-06, "loss": 0.0334, "step": 53545 }, { "epoch": 0.2234396775458771, "grad_norm": 1.2282851702184654, "learning_rate": 4.231441393540792e-06, "loss": 0.0301, "step": 53550 }, { "epoch": 0.2234605402608674, "grad_norm": 1.8659268913265328, "learning_rate": 4.231243857401386e-06, "loss": 0.0299, "step": 53555 }, { "epoch": 0.22348140297585767, "grad_norm": 0.8389104413287904, "learning_rate": 4.231046348924101e-06, "loss": 0.0418, "step": 53560 }, { "epoch": 0.22350226569084794, "grad_norm": 0.7189048432374, "learning_rate": 4.230848868102484e-06, "loss": 0.0365, "step": 53565 }, { "epoch": 0.2235231284058382, "grad_norm": 0.8950466225073967, "learning_rate": 4.230651414930081e-06, "loss": 0.0288, "step": 53570 }, { "epoch": 0.2235439911208285, "grad_norm": 1.1688654294095384, "learning_rate": 4.23045398940044e-06, "loss": 0.0341, "step": 53575 }, { "epoch": 0.22356485383581878, "grad_norm": 0.5303253303407424, "learning_rate": 4.230256591507111e-06, "loss": 0.0278, "step": 53580 }, { "epoch": 0.22358571655080905, "grad_norm": 0.4543552830335312, "learning_rate": 4.23005922124365e-06, "loss": 0.0309, "step": 53585 }, { "epoch": 0.22360657926579933, "grad_norm": 1.309246903527117, "learning_rate": 4.229861878603608e-06, "loss": 0.03, "step": 53590 }, { "epoch": 0.2236274419807896, "grad_norm": 1.1634965471541376, "learning_rate": 4.229664563580545e-06, "loss": 0.0302, "step": 53595 }, { "epoch": 0.2236483046957799, "grad_norm": 1.0236042044295688, "learning_rate": 4.229467276168019e-06, "loss": 0.0435, "step": 53600 }, { "epoch": 0.22366916741077017, "grad_norm": 0.8178040752448399, "learning_rate": 4.229270016359592e-06, "loss": 0.0324, "step": 53605 }, { "epoch": 0.22369003012576044, "grad_norm": 1.2075474081532636, "learning_rate": 4.229072784148827e-06, "loss": 0.0357, "step": 53610 }, { "epoch": 0.2237108928407507, "grad_norm": 0.7734246437406038, "learning_rate": 4.22887557952929e-06, "loss": 0.0356, "step": 53615 }, { "epoch": 0.223731755555741, "grad_norm": 1.1478135110576992, "learning_rate": 4.228678402494546e-06, "loss": 0.0419, "step": 53620 }, { "epoch": 0.22375261827073128, "grad_norm": 1.4312375851500994, "learning_rate": 4.228481253038169e-06, "loss": 0.0462, "step": 53625 }, { "epoch": 0.22377348098572156, "grad_norm": 1.14554506258268, "learning_rate": 4.228284131153728e-06, "loss": 0.0236, "step": 53630 }, { "epoch": 0.22379434370071183, "grad_norm": 1.2289784251961833, "learning_rate": 4.228087036834797e-06, "loss": 0.0371, "step": 53635 }, { "epoch": 0.2238152064157021, "grad_norm": 1.4147268452924637, "learning_rate": 4.2278899700749536e-06, "loss": 0.0359, "step": 53640 }, { "epoch": 0.2238360691306924, "grad_norm": 0.943358259258784, "learning_rate": 4.227692930867774e-06, "loss": 0.0383, "step": 53645 }, { "epoch": 0.22385693184568267, "grad_norm": 0.772810148313652, "learning_rate": 4.2274959192068386e-06, "loss": 0.0265, "step": 53650 }, { "epoch": 0.22387779456067294, "grad_norm": 0.8847000767866878, "learning_rate": 4.227298935085731e-06, "loss": 0.0391, "step": 53655 }, { "epoch": 0.22389865727566322, "grad_norm": 0.9034194179805662, "learning_rate": 4.227101978498036e-06, "loss": 0.0342, "step": 53660 }, { "epoch": 0.22391951999065351, "grad_norm": 1.0216360832162876, "learning_rate": 4.226905049437337e-06, "loss": 0.0398, "step": 53665 }, { "epoch": 0.2239403827056438, "grad_norm": 0.9944102268339422, "learning_rate": 4.226708147897225e-06, "loss": 0.0345, "step": 53670 }, { "epoch": 0.22396124542063406, "grad_norm": 1.0443699880770951, "learning_rate": 4.226511273871292e-06, "loss": 0.0348, "step": 53675 }, { "epoch": 0.22398210813562433, "grad_norm": 0.5166183788886075, "learning_rate": 4.226314427353127e-06, "loss": 0.0281, "step": 53680 }, { "epoch": 0.2240029708506146, "grad_norm": 0.6073131111559152, "learning_rate": 4.226117608336327e-06, "loss": 0.0364, "step": 53685 }, { "epoch": 0.2240238335656049, "grad_norm": 0.9020278420155587, "learning_rate": 4.225920816814488e-06, "loss": 0.0412, "step": 53690 }, { "epoch": 0.22404469628059517, "grad_norm": 0.6916511712741783, "learning_rate": 4.225724052781211e-06, "loss": 0.0372, "step": 53695 }, { "epoch": 0.22406555899558545, "grad_norm": 1.0800152887329648, "learning_rate": 4.225527316230095e-06, "loss": 0.0269, "step": 53700 }, { "epoch": 0.22408642171057572, "grad_norm": 0.8217882750177097, "learning_rate": 4.225330607154744e-06, "loss": 0.0424, "step": 53705 }, { "epoch": 0.22410728442556602, "grad_norm": 1.2550385045256438, "learning_rate": 4.2251339255487636e-06, "loss": 0.0318, "step": 53710 }, { "epoch": 0.2241281471405563, "grad_norm": 1.0162840900249348, "learning_rate": 4.22493727140576e-06, "loss": 0.0421, "step": 53715 }, { "epoch": 0.22414900985554656, "grad_norm": 1.7690500148146475, "learning_rate": 4.224740644719344e-06, "loss": 0.0273, "step": 53720 }, { "epoch": 0.22416987257053683, "grad_norm": 0.7695304259368233, "learning_rate": 4.224544045483127e-06, "loss": 0.0515, "step": 53725 }, { "epoch": 0.2241907352855271, "grad_norm": 1.1992251337807245, "learning_rate": 4.224347473690721e-06, "loss": 0.0392, "step": 53730 }, { "epoch": 0.2242115980005174, "grad_norm": 1.210471040061313, "learning_rate": 4.224150929335742e-06, "loss": 0.0367, "step": 53735 }, { "epoch": 0.22423246071550768, "grad_norm": 0.9741000128146967, "learning_rate": 4.22395441241181e-06, "loss": 0.0284, "step": 53740 }, { "epoch": 0.22425332343049795, "grad_norm": 0.6243177362307647, "learning_rate": 4.223757922912543e-06, "loss": 0.033, "step": 53745 }, { "epoch": 0.22427418614548822, "grad_norm": 1.3914113405017146, "learning_rate": 4.223561460831564e-06, "loss": 0.0348, "step": 53750 }, { "epoch": 0.22429504886047852, "grad_norm": 0.9318006341681475, "learning_rate": 4.223365026162494e-06, "loss": 0.0354, "step": 53755 }, { "epoch": 0.2243159115754688, "grad_norm": 0.8284881649872331, "learning_rate": 4.223168618898963e-06, "loss": 0.0383, "step": 53760 }, { "epoch": 0.22433677429045906, "grad_norm": 1.0863604485359033, "learning_rate": 4.222972239034596e-06, "loss": 0.0348, "step": 53765 }, { "epoch": 0.22435763700544933, "grad_norm": 0.9529500382549667, "learning_rate": 4.222775886563026e-06, "loss": 0.0468, "step": 53770 }, { "epoch": 0.2243784997204396, "grad_norm": 0.8925929582770179, "learning_rate": 4.222579561477884e-06, "loss": 0.0284, "step": 53775 }, { "epoch": 0.2243993624354299, "grad_norm": 0.3434151802851611, "learning_rate": 4.222383263772803e-06, "loss": 0.0228, "step": 53780 }, { "epoch": 0.22442022515042018, "grad_norm": 0.7075797378111656, "learning_rate": 4.222186993441421e-06, "loss": 0.0321, "step": 53785 }, { "epoch": 0.22444108786541045, "grad_norm": 0.6036878741053432, "learning_rate": 4.221990750477375e-06, "loss": 0.0274, "step": 53790 }, { "epoch": 0.22446195058040072, "grad_norm": 0.876976370041455, "learning_rate": 4.221794534874309e-06, "loss": 0.0284, "step": 53795 }, { "epoch": 0.22448281329539102, "grad_norm": 0.9558362781957122, "learning_rate": 4.221598346625862e-06, "loss": 0.0345, "step": 53800 }, { "epoch": 0.2245036760103813, "grad_norm": 0.5780910158031016, "learning_rate": 4.221402185725679e-06, "loss": 0.0287, "step": 53805 }, { "epoch": 0.22452453872537156, "grad_norm": 0.8168408522008362, "learning_rate": 4.221206052167409e-06, "loss": 0.0304, "step": 53810 }, { "epoch": 0.22454540144036184, "grad_norm": 1.4139456439590221, "learning_rate": 4.2210099459447e-06, "loss": 0.037, "step": 53815 }, { "epoch": 0.2245662641553521, "grad_norm": 0.7061084351659466, "learning_rate": 4.220813867051201e-06, "loss": 0.0296, "step": 53820 }, { "epoch": 0.2245871268703424, "grad_norm": 0.9582171029007371, "learning_rate": 4.220617815480568e-06, "loss": 0.0236, "step": 53825 }, { "epoch": 0.22460798958533268, "grad_norm": 1.1226734944789407, "learning_rate": 4.220421791226454e-06, "loss": 0.0368, "step": 53830 }, { "epoch": 0.22462885230032295, "grad_norm": 0.6512309701412741, "learning_rate": 4.220225794282517e-06, "loss": 0.0304, "step": 53835 }, { "epoch": 0.22464971501531322, "grad_norm": 0.9235312820618657, "learning_rate": 4.220029824642415e-06, "loss": 0.0412, "step": 53840 }, { "epoch": 0.22467057773030352, "grad_norm": 0.5664665773927795, "learning_rate": 4.2198338822998095e-06, "loss": 0.033, "step": 53845 }, { "epoch": 0.2246914404452938, "grad_norm": 0.7381910710758832, "learning_rate": 4.219637967248365e-06, "loss": 0.0252, "step": 53850 }, { "epoch": 0.22471230316028407, "grad_norm": 0.6987139016536964, "learning_rate": 4.219442079481746e-06, "loss": 0.021, "step": 53855 }, { "epoch": 0.22473316587527434, "grad_norm": 0.8390273795881806, "learning_rate": 4.2192462189936195e-06, "loss": 0.0359, "step": 53860 }, { "epoch": 0.2247540285902646, "grad_norm": 0.8863756670644052, "learning_rate": 4.219050385777655e-06, "loss": 0.0329, "step": 53865 }, { "epoch": 0.2247748913052549, "grad_norm": 0.694739181349767, "learning_rate": 4.218854579827526e-06, "loss": 0.04, "step": 53870 }, { "epoch": 0.22479575402024518, "grad_norm": 1.8247722239044923, "learning_rate": 4.2186588011369024e-06, "loss": 0.0361, "step": 53875 }, { "epoch": 0.22481661673523545, "grad_norm": 1.2897534114451135, "learning_rate": 4.218463049699463e-06, "loss": 0.0337, "step": 53880 }, { "epoch": 0.22483747945022572, "grad_norm": 1.0888380912743763, "learning_rate": 4.218267325508884e-06, "loss": 0.0359, "step": 53885 }, { "epoch": 0.22485834216521602, "grad_norm": 0.4540719793474201, "learning_rate": 4.218071628558845e-06, "loss": 0.0328, "step": 53890 }, { "epoch": 0.2248792048802063, "grad_norm": 0.7246881156644505, "learning_rate": 4.217875958843028e-06, "loss": 0.0375, "step": 53895 }, { "epoch": 0.22490006759519657, "grad_norm": 1.0690996459818918, "learning_rate": 4.217680316355118e-06, "loss": 0.0398, "step": 53900 }, { "epoch": 0.22492093031018684, "grad_norm": 0.678155455870161, "learning_rate": 4.217484701088799e-06, "loss": 0.0275, "step": 53905 }, { "epoch": 0.2249417930251771, "grad_norm": 1.2254559621860674, "learning_rate": 4.217289113037761e-06, "loss": 0.0344, "step": 53910 }, { "epoch": 0.2249626557401674, "grad_norm": 0.7386332913052982, "learning_rate": 4.217093552195692e-06, "loss": 0.0339, "step": 53915 }, { "epoch": 0.22498351845515768, "grad_norm": 1.6253897346897312, "learning_rate": 4.216898018556283e-06, "loss": 0.0378, "step": 53920 }, { "epoch": 0.22500438117014795, "grad_norm": 1.0188271410213419, "learning_rate": 4.216702512113233e-06, "loss": 0.0306, "step": 53925 }, { "epoch": 0.22502524388513823, "grad_norm": 0.6095883267092042, "learning_rate": 4.2165070328602326e-06, "loss": 0.0238, "step": 53930 }, { "epoch": 0.22504610660012853, "grad_norm": 0.6105004393161978, "learning_rate": 4.216311580790983e-06, "loss": 0.0281, "step": 53935 }, { "epoch": 0.2250669693151188, "grad_norm": 0.6551002989738671, "learning_rate": 4.216116155899184e-06, "loss": 0.0437, "step": 53940 }, { "epoch": 0.22508783203010907, "grad_norm": 0.6653994031726689, "learning_rate": 4.215920758178538e-06, "loss": 0.0289, "step": 53945 }, { "epoch": 0.22510869474509934, "grad_norm": 0.7372982594010099, "learning_rate": 4.215725387622748e-06, "loss": 0.0265, "step": 53950 }, { "epoch": 0.2251295574600896, "grad_norm": 0.672008856779097, "learning_rate": 4.2155300442255215e-06, "loss": 0.0312, "step": 53955 }, { "epoch": 0.2251504201750799, "grad_norm": 1.063466665402627, "learning_rate": 4.215334727980566e-06, "loss": 0.0364, "step": 53960 }, { "epoch": 0.22517128289007018, "grad_norm": 1.1696039803875795, "learning_rate": 4.215139438881593e-06, "loss": 0.0263, "step": 53965 }, { "epoch": 0.22519214560506046, "grad_norm": 0.8604133036718701, "learning_rate": 4.214944176922316e-06, "loss": 0.0268, "step": 53970 }, { "epoch": 0.22521300832005073, "grad_norm": 0.5530778668481495, "learning_rate": 4.214748942096445e-06, "loss": 0.0308, "step": 53975 }, { "epoch": 0.22523387103504103, "grad_norm": 0.7708760436147956, "learning_rate": 4.214553734397701e-06, "loss": 0.0422, "step": 53980 }, { "epoch": 0.2252547337500313, "grad_norm": 0.47876460003065974, "learning_rate": 4.2143585538198006e-06, "loss": 0.0271, "step": 53985 }, { "epoch": 0.22527559646502157, "grad_norm": 0.8412443823096645, "learning_rate": 4.214163400356465e-06, "loss": 0.0287, "step": 53990 }, { "epoch": 0.22529645918001184, "grad_norm": 0.9909010504555191, "learning_rate": 4.213968274001417e-06, "loss": 0.0396, "step": 53995 }, { "epoch": 0.22531732189500212, "grad_norm": 0.9343125369625976, "learning_rate": 4.21377317474838e-06, "loss": 0.0355, "step": 54000 }, { "epoch": 0.22533818460999241, "grad_norm": 1.0462107448079583, "learning_rate": 4.213578102591083e-06, "loss": 0.0466, "step": 54005 }, { "epoch": 0.2253590473249827, "grad_norm": 0.9324923037483221, "learning_rate": 4.213383057523252e-06, "loss": 0.0439, "step": 54010 }, { "epoch": 0.22537991003997296, "grad_norm": 0.7483721632492554, "learning_rate": 4.21318803953862e-06, "loss": 0.0309, "step": 54015 }, { "epoch": 0.22540077275496323, "grad_norm": 0.8822093341152958, "learning_rate": 4.212993048630918e-06, "loss": 0.04, "step": 54020 }, { "epoch": 0.22542163546995353, "grad_norm": 0.8687723068041051, "learning_rate": 4.2127980847938824e-06, "loss": 0.0343, "step": 54025 }, { "epoch": 0.2254424981849438, "grad_norm": 1.2024574101294623, "learning_rate": 4.21260314802125e-06, "loss": 0.0344, "step": 54030 }, { "epoch": 0.22546336089993407, "grad_norm": 0.7691790492309486, "learning_rate": 4.212408238306758e-06, "loss": 0.0258, "step": 54035 }, { "epoch": 0.22548422361492435, "grad_norm": 0.6094851512776134, "learning_rate": 4.2122133556441494e-06, "loss": 0.0411, "step": 54040 }, { "epoch": 0.22550508632991462, "grad_norm": 1.181786550177397, "learning_rate": 4.2120185000271655e-06, "loss": 0.0308, "step": 54045 }, { "epoch": 0.22552594904490492, "grad_norm": 0.6202156716961369, "learning_rate": 4.211823671449552e-06, "loss": 0.0283, "step": 54050 }, { "epoch": 0.2255468117598952, "grad_norm": 0.6854736385585983, "learning_rate": 4.2116288699050565e-06, "loss": 0.0255, "step": 54055 }, { "epoch": 0.22556767447488546, "grad_norm": 0.6868491033705528, "learning_rate": 4.211434095387427e-06, "loss": 0.0267, "step": 54060 }, { "epoch": 0.22558853718987573, "grad_norm": 0.5986519185991549, "learning_rate": 4.211239347890415e-06, "loss": 0.0381, "step": 54065 }, { "epoch": 0.22560939990486603, "grad_norm": 0.4218094934560839, "learning_rate": 4.211044627407773e-06, "loss": 0.0274, "step": 54070 }, { "epoch": 0.2256302626198563, "grad_norm": 1.16151108770715, "learning_rate": 4.210849933933257e-06, "loss": 0.0352, "step": 54075 }, { "epoch": 0.22565112533484658, "grad_norm": 0.5383821227508472, "learning_rate": 4.210655267460623e-06, "loss": 0.0312, "step": 54080 }, { "epoch": 0.22567198804983685, "grad_norm": 0.47677375382231063, "learning_rate": 4.210460627983631e-06, "loss": 0.0286, "step": 54085 }, { "epoch": 0.22569285076482712, "grad_norm": 0.8837681555393823, "learning_rate": 4.210266015496043e-06, "loss": 0.0319, "step": 54090 }, { "epoch": 0.22571371347981742, "grad_norm": 0.7444552980871074, "learning_rate": 4.21007142999162e-06, "loss": 0.0303, "step": 54095 }, { "epoch": 0.2257345761948077, "grad_norm": 1.2470700319520773, "learning_rate": 4.209876871464129e-06, "loss": 0.0344, "step": 54100 }, { "epoch": 0.22575543890979796, "grad_norm": 1.23301986641422, "learning_rate": 4.209682339907335e-06, "loss": 0.0344, "step": 54105 }, { "epoch": 0.22577630162478823, "grad_norm": 0.8939596531913249, "learning_rate": 4.20948783531501e-06, "loss": 0.0401, "step": 54110 }, { "epoch": 0.22579716433977853, "grad_norm": 0.7282946341083868, "learning_rate": 4.209293357680924e-06, "loss": 0.0348, "step": 54115 }, { "epoch": 0.2258180270547688, "grad_norm": 0.9086480592259544, "learning_rate": 4.209098906998849e-06, "loss": 0.0266, "step": 54120 }, { "epoch": 0.22583888976975908, "grad_norm": 0.9121430980989482, "learning_rate": 4.208904483262563e-06, "loss": 0.0349, "step": 54125 }, { "epoch": 0.22585975248474935, "grad_norm": 0.6422384274932977, "learning_rate": 4.208710086465841e-06, "loss": 0.0328, "step": 54130 }, { "epoch": 0.22588061519973962, "grad_norm": 0.9493575301554148, "learning_rate": 4.2085157166024635e-06, "loss": 0.0284, "step": 54135 }, { "epoch": 0.22590147791472992, "grad_norm": 0.500536976685575, "learning_rate": 4.20832137366621e-06, "loss": 0.0352, "step": 54140 }, { "epoch": 0.2259223406297202, "grad_norm": 0.7371302577293197, "learning_rate": 4.208127057650867e-06, "loss": 0.0285, "step": 54145 }, { "epoch": 0.22594320334471046, "grad_norm": 1.6352000002186162, "learning_rate": 4.207932768550217e-06, "loss": 0.0422, "step": 54150 }, { "epoch": 0.22596406605970074, "grad_norm": 1.1647441049418952, "learning_rate": 4.207738506358047e-06, "loss": 0.0364, "step": 54155 }, { "epoch": 0.22598492877469104, "grad_norm": 1.0093709436653324, "learning_rate": 4.207544271068148e-06, "loss": 0.0335, "step": 54160 }, { "epoch": 0.2260057914896813, "grad_norm": 0.9508124501288594, "learning_rate": 4.207350062674311e-06, "loss": 0.0282, "step": 54165 }, { "epoch": 0.22602665420467158, "grad_norm": 0.7095470533430484, "learning_rate": 4.20715588117033e-06, "loss": 0.0236, "step": 54170 }, { "epoch": 0.22604751691966185, "grad_norm": 0.9126383494079393, "learning_rate": 4.20696172655e-06, "loss": 0.0337, "step": 54175 }, { "epoch": 0.22606837963465212, "grad_norm": 0.736123861474932, "learning_rate": 4.206767598807116e-06, "loss": 0.0292, "step": 54180 }, { "epoch": 0.22608924234964242, "grad_norm": 0.7419908495895032, "learning_rate": 4.206573497935481e-06, "loss": 0.0339, "step": 54185 }, { "epoch": 0.2261101050646327, "grad_norm": 0.5023236821048697, "learning_rate": 4.206379423928895e-06, "loss": 0.0291, "step": 54190 }, { "epoch": 0.22613096777962297, "grad_norm": 0.549655092819907, "learning_rate": 4.206185376781159e-06, "loss": 0.0379, "step": 54195 }, { "epoch": 0.22615183049461324, "grad_norm": 0.6871549708134406, "learning_rate": 4.205991356486081e-06, "loss": 0.0395, "step": 54200 }, { "epoch": 0.22617269320960354, "grad_norm": 1.2458323745506215, "learning_rate": 4.205797363037468e-06, "loss": 0.0465, "step": 54205 }, { "epoch": 0.2261935559245938, "grad_norm": 0.8748996136148935, "learning_rate": 4.20560339642913e-06, "loss": 0.0475, "step": 54210 }, { "epoch": 0.22621441863958408, "grad_norm": 0.397225981168161, "learning_rate": 4.205409456654876e-06, "loss": 0.0317, "step": 54215 }, { "epoch": 0.22623528135457435, "grad_norm": 0.9156182781427014, "learning_rate": 4.205215543708521e-06, "loss": 0.0298, "step": 54220 }, { "epoch": 0.22625614406956462, "grad_norm": 0.9957860039566877, "learning_rate": 4.205021657583881e-06, "loss": 0.0227, "step": 54225 }, { "epoch": 0.22627700678455492, "grad_norm": 0.8926613725184515, "learning_rate": 4.204827798274771e-06, "loss": 0.0321, "step": 54230 }, { "epoch": 0.2262978694995452, "grad_norm": 0.48945975512431067, "learning_rate": 4.204633965775013e-06, "loss": 0.029, "step": 54235 }, { "epoch": 0.22631873221453547, "grad_norm": 0.8682806805742741, "learning_rate": 4.204440160078427e-06, "loss": 0.0308, "step": 54240 }, { "epoch": 0.22633959492952574, "grad_norm": 1.310610458669705, "learning_rate": 4.204246381178836e-06, "loss": 0.0268, "step": 54245 }, { "epoch": 0.22636045764451604, "grad_norm": 1.1847509683085793, "learning_rate": 4.204052629070066e-06, "loss": 0.0341, "step": 54250 }, { "epoch": 0.2263813203595063, "grad_norm": 0.7163079579296978, "learning_rate": 4.203858903745944e-06, "loss": 0.0311, "step": 54255 }, { "epoch": 0.22640218307449658, "grad_norm": 0.874106154279292, "learning_rate": 4.2036652052002995e-06, "loss": 0.039, "step": 54260 }, { "epoch": 0.22642304578948685, "grad_norm": 0.5617847724652274, "learning_rate": 4.2034715334269635e-06, "loss": 0.0328, "step": 54265 }, { "epoch": 0.22644390850447713, "grad_norm": 0.9578346208379936, "learning_rate": 4.20327788841977e-06, "loss": 0.0379, "step": 54270 }, { "epoch": 0.22646477121946743, "grad_norm": 0.9002469277955347, "learning_rate": 4.203084270172554e-06, "loss": 0.0293, "step": 54275 }, { "epoch": 0.2264856339344577, "grad_norm": 0.6937277418945641, "learning_rate": 4.202890678679151e-06, "loss": 0.0375, "step": 54280 }, { "epoch": 0.22650649664944797, "grad_norm": 0.8604581037984959, "learning_rate": 4.202697113933403e-06, "loss": 0.0304, "step": 54285 }, { "epoch": 0.22652735936443824, "grad_norm": 1.2995057983655547, "learning_rate": 4.202503575929149e-06, "loss": 0.0334, "step": 54290 }, { "epoch": 0.22654822207942854, "grad_norm": 1.1259386129095668, "learning_rate": 4.202310064660235e-06, "loss": 0.0478, "step": 54295 }, { "epoch": 0.2265690847944188, "grad_norm": 0.7647036920997252, "learning_rate": 4.2021165801205035e-06, "loss": 0.0328, "step": 54300 }, { "epoch": 0.22658994750940908, "grad_norm": 1.1177104801988376, "learning_rate": 4.201923122303801e-06, "loss": 0.0299, "step": 54305 }, { "epoch": 0.22661081022439936, "grad_norm": 0.5029224362507738, "learning_rate": 4.20172969120398e-06, "loss": 0.0329, "step": 54310 }, { "epoch": 0.22663167293938963, "grad_norm": 0.5386421306498879, "learning_rate": 4.20153628681489e-06, "loss": 0.0382, "step": 54315 }, { "epoch": 0.22665253565437993, "grad_norm": 1.2037839224907445, "learning_rate": 4.201342909130383e-06, "loss": 0.0242, "step": 54320 }, { "epoch": 0.2266733983693702, "grad_norm": 0.9558883892255892, "learning_rate": 4.2011495581443165e-06, "loss": 0.0344, "step": 54325 }, { "epoch": 0.22669426108436047, "grad_norm": 0.6785171837744851, "learning_rate": 4.200956233850545e-06, "loss": 0.0306, "step": 54330 }, { "epoch": 0.22671512379935074, "grad_norm": 0.6438513872320341, "learning_rate": 4.200762936242929e-06, "loss": 0.0318, "step": 54335 }, { "epoch": 0.22673598651434104, "grad_norm": 0.7759059511356511, "learning_rate": 4.20056966531533e-06, "loss": 0.026, "step": 54340 }, { "epoch": 0.22675684922933131, "grad_norm": 1.136943859565808, "learning_rate": 4.200376421061609e-06, "loss": 0.0366, "step": 54345 }, { "epoch": 0.22677771194432159, "grad_norm": 0.9845928494232319, "learning_rate": 4.200183203475634e-06, "loss": 0.0258, "step": 54350 }, { "epoch": 0.22679857465931186, "grad_norm": 1.0747803318378701, "learning_rate": 4.19999001255127e-06, "loss": 0.0285, "step": 54355 }, { "epoch": 0.22681943737430213, "grad_norm": 0.7939807443818726, "learning_rate": 4.199796848282386e-06, "loss": 0.0281, "step": 54360 }, { "epoch": 0.22684030008929243, "grad_norm": 0.8008353730932657, "learning_rate": 4.199603710662852e-06, "loss": 0.031, "step": 54365 }, { "epoch": 0.2268611628042827, "grad_norm": 0.7218486886266435, "learning_rate": 4.199410599686544e-06, "loss": 0.0259, "step": 54370 }, { "epoch": 0.22688202551927297, "grad_norm": 0.6030744371438658, "learning_rate": 4.199217515347334e-06, "loss": 0.0352, "step": 54375 }, { "epoch": 0.22690288823426324, "grad_norm": 1.0078672620769284, "learning_rate": 4.199024457639101e-06, "loss": 0.0291, "step": 54380 }, { "epoch": 0.22692375094925354, "grad_norm": 0.9528743526120128, "learning_rate": 4.198831426555722e-06, "loss": 0.0419, "step": 54385 }, { "epoch": 0.22694461366424382, "grad_norm": 1.2999743106038761, "learning_rate": 4.198638422091078e-06, "loss": 0.0316, "step": 54390 }, { "epoch": 0.2269654763792341, "grad_norm": 0.7433604735637976, "learning_rate": 4.198445444239052e-06, "loss": 0.0318, "step": 54395 }, { "epoch": 0.22698633909422436, "grad_norm": 0.5586519621188769, "learning_rate": 4.198252492993529e-06, "loss": 0.0387, "step": 54400 }, { "epoch": 0.22700720180921463, "grad_norm": 0.6020283884617715, "learning_rate": 4.198059568348395e-06, "loss": 0.0246, "step": 54405 }, { "epoch": 0.22702806452420493, "grad_norm": 0.9026876419732627, "learning_rate": 4.19786667029754e-06, "loss": 0.0325, "step": 54410 }, { "epoch": 0.2270489272391952, "grad_norm": 0.8371706770949071, "learning_rate": 4.197673798834854e-06, "loss": 0.0427, "step": 54415 }, { "epoch": 0.22706978995418547, "grad_norm": 1.3166402061288767, "learning_rate": 4.19748095395423e-06, "loss": 0.0293, "step": 54420 }, { "epoch": 0.22709065266917575, "grad_norm": 8.232671458487367, "learning_rate": 4.19728813564956e-06, "loss": 0.0472, "step": 54425 }, { "epoch": 0.22711151538416605, "grad_norm": 1.419093393648822, "learning_rate": 4.197095343914744e-06, "loss": 0.0367, "step": 54430 }, { "epoch": 0.22713237809915632, "grad_norm": 0.7071166397449729, "learning_rate": 4.196902578743678e-06, "loss": 0.0371, "step": 54435 }, { "epoch": 0.2271532408141466, "grad_norm": 0.8278594246419473, "learning_rate": 4.1967098401302645e-06, "loss": 0.0374, "step": 54440 }, { "epoch": 0.22717410352913686, "grad_norm": 0.8661727418846036, "learning_rate": 4.196517128068404e-06, "loss": 0.034, "step": 54445 }, { "epoch": 0.22719496624412713, "grad_norm": 1.222040277952303, "learning_rate": 4.1963244425520015e-06, "loss": 0.0372, "step": 54450 }, { "epoch": 0.22721582895911743, "grad_norm": 0.8848506969704344, "learning_rate": 4.1961317835749636e-06, "loss": 0.0334, "step": 54455 }, { "epoch": 0.2272366916741077, "grad_norm": 1.0121952030671053, "learning_rate": 4.195939151131198e-06, "loss": 0.0393, "step": 54460 }, { "epoch": 0.22725755438909798, "grad_norm": 0.5798058947672112, "learning_rate": 4.195746545214615e-06, "loss": 0.0343, "step": 54465 }, { "epoch": 0.22727841710408825, "grad_norm": 1.4092376159148075, "learning_rate": 4.195553965819128e-06, "loss": 0.0264, "step": 54470 }, { "epoch": 0.22729927981907855, "grad_norm": 0.7193956185370058, "learning_rate": 4.19536141293865e-06, "loss": 0.0291, "step": 54475 }, { "epoch": 0.22732014253406882, "grad_norm": 1.9531460896613133, "learning_rate": 4.195168886567097e-06, "loss": 0.0322, "step": 54480 }, { "epoch": 0.2273410052490591, "grad_norm": 1.070233735546763, "learning_rate": 4.1949763866983865e-06, "loss": 0.0323, "step": 54485 }, { "epoch": 0.22736186796404936, "grad_norm": 1.3700618895040024, "learning_rate": 4.194783913326441e-06, "loss": 0.0416, "step": 54490 }, { "epoch": 0.22738273067903964, "grad_norm": 0.7536966252966968, "learning_rate": 4.19459146644518e-06, "loss": 0.0306, "step": 54495 }, { "epoch": 0.22740359339402993, "grad_norm": 0.7927032299533864, "learning_rate": 4.1943990460485275e-06, "loss": 0.0335, "step": 54500 }, { "epoch": 0.2274244561090202, "grad_norm": 0.509695888965352, "learning_rate": 4.194206652130411e-06, "loss": 0.0308, "step": 54505 }, { "epoch": 0.22744531882401048, "grad_norm": 0.871151510545481, "learning_rate": 4.194014284684757e-06, "loss": 0.0392, "step": 54510 }, { "epoch": 0.22746618153900075, "grad_norm": 0.4163066076964163, "learning_rate": 4.193821943705496e-06, "loss": 0.0334, "step": 54515 }, { "epoch": 0.22748704425399105, "grad_norm": 0.7171182659402469, "learning_rate": 4.193629629186559e-06, "loss": 0.0337, "step": 54520 }, { "epoch": 0.22750790696898132, "grad_norm": 0.7477119874997102, "learning_rate": 4.19343734112188e-06, "loss": 0.0278, "step": 54525 }, { "epoch": 0.2275287696839716, "grad_norm": 0.5656038298355544, "learning_rate": 4.193245079505395e-06, "loss": 0.0348, "step": 54530 }, { "epoch": 0.22754963239896187, "grad_norm": 1.2334594641970842, "learning_rate": 4.19305284433104e-06, "loss": 0.0423, "step": 54535 }, { "epoch": 0.22757049511395214, "grad_norm": 0.9406539692742372, "learning_rate": 4.192860635592757e-06, "loss": 0.0354, "step": 54540 }, { "epoch": 0.22759135782894244, "grad_norm": 0.7814917980758297, "learning_rate": 4.192668453284485e-06, "loss": 0.0273, "step": 54545 }, { "epoch": 0.2276122205439327, "grad_norm": 0.691745386643772, "learning_rate": 4.192476297400169e-06, "loss": 0.0425, "step": 54550 }, { "epoch": 0.22763308325892298, "grad_norm": 1.0195165698428514, "learning_rate": 4.192284167933753e-06, "loss": 0.0337, "step": 54555 }, { "epoch": 0.22765394597391325, "grad_norm": 0.995497616560225, "learning_rate": 4.1920920648791855e-06, "loss": 0.0358, "step": 54560 }, { "epoch": 0.22767480868890355, "grad_norm": 0.6822413240277685, "learning_rate": 4.191899988230415e-06, "loss": 0.026, "step": 54565 }, { "epoch": 0.22769567140389382, "grad_norm": 0.6854232242734295, "learning_rate": 4.191707937981393e-06, "loss": 0.0379, "step": 54570 }, { "epoch": 0.2277165341188841, "grad_norm": 1.1341282203150265, "learning_rate": 4.191515914126072e-06, "loss": 0.0316, "step": 54575 }, { "epoch": 0.22773739683387437, "grad_norm": 1.0372054356229148, "learning_rate": 4.191323916658408e-06, "loss": 0.0333, "step": 54580 }, { "epoch": 0.22775825954886464, "grad_norm": 1.2517244132026777, "learning_rate": 4.191131945572357e-06, "loss": 0.0367, "step": 54585 }, { "epoch": 0.22777912226385494, "grad_norm": 0.5550763993635365, "learning_rate": 4.1909400008618774e-06, "loss": 0.029, "step": 54590 }, { "epoch": 0.2277999849788452, "grad_norm": 0.8812406817787396, "learning_rate": 4.190748082520934e-06, "loss": 0.0469, "step": 54595 }, { "epoch": 0.22782084769383548, "grad_norm": 0.31185950607766066, "learning_rate": 4.190556190543483e-06, "loss": 0.0295, "step": 54600 }, { "epoch": 0.22784171040882575, "grad_norm": 0.75428002795976, "learning_rate": 4.190364324923495e-06, "loss": 0.0372, "step": 54605 }, { "epoch": 0.22786257312381605, "grad_norm": 0.801953201569355, "learning_rate": 4.190172485654933e-06, "loss": 0.0365, "step": 54610 }, { "epoch": 0.22788343583880633, "grad_norm": 0.6794431351839763, "learning_rate": 4.189980672731767e-06, "loss": 0.0415, "step": 54615 }, { "epoch": 0.2279042985537966, "grad_norm": 0.7655000870877167, "learning_rate": 4.189788886147968e-06, "loss": 0.0278, "step": 54620 }, { "epoch": 0.22792516126878687, "grad_norm": 0.5892488175400147, "learning_rate": 4.189597125897507e-06, "loss": 0.0341, "step": 54625 }, { "epoch": 0.22794602398377714, "grad_norm": 0.7469185075546672, "learning_rate": 4.18940539197436e-06, "loss": 0.0298, "step": 54630 }, { "epoch": 0.22796688669876744, "grad_norm": 0.8056927141322405, "learning_rate": 4.189213684372501e-06, "loss": 0.0269, "step": 54635 }, { "epoch": 0.2279877494137577, "grad_norm": 0.5314906061181877, "learning_rate": 4.189022003085911e-06, "loss": 0.0376, "step": 54640 }, { "epoch": 0.22800861212874798, "grad_norm": 0.7573356345246205, "learning_rate": 4.188830348108568e-06, "loss": 0.028, "step": 54645 }, { "epoch": 0.22802947484373826, "grad_norm": 1.6713206755688865, "learning_rate": 4.188638719434455e-06, "loss": 0.0467, "step": 54650 }, { "epoch": 0.22805033755872856, "grad_norm": 0.7573437856210548, "learning_rate": 4.188447117057556e-06, "loss": 0.0339, "step": 54655 }, { "epoch": 0.22807120027371883, "grad_norm": 1.0717528698709047, "learning_rate": 4.188255540971857e-06, "loss": 0.028, "step": 54660 }, { "epoch": 0.2280920629887091, "grad_norm": 0.9321494222107531, "learning_rate": 4.1880639911713456e-06, "loss": 0.0305, "step": 54665 }, { "epoch": 0.22811292570369937, "grad_norm": 0.6244503020449791, "learning_rate": 4.187872467650012e-06, "loss": 0.0294, "step": 54670 }, { "epoch": 0.22813378841868964, "grad_norm": 0.8749868744153273, "learning_rate": 4.187680970401847e-06, "loss": 0.034, "step": 54675 }, { "epoch": 0.22815465113367994, "grad_norm": 0.6729785953674727, "learning_rate": 4.187489499420845e-06, "loss": 0.0291, "step": 54680 }, { "epoch": 0.22817551384867021, "grad_norm": 1.027985899227728, "learning_rate": 4.1872980547010016e-06, "loss": 0.0335, "step": 54685 }, { "epoch": 0.22819637656366049, "grad_norm": 1.475273022347219, "learning_rate": 4.187106636236314e-06, "loss": 0.0457, "step": 54690 }, { "epoch": 0.22821723927865076, "grad_norm": 1.1459706205319324, "learning_rate": 4.1869152440207814e-06, "loss": 0.0391, "step": 54695 }, { "epoch": 0.22823810199364106, "grad_norm": 0.713999024835147, "learning_rate": 4.186723878048405e-06, "loss": 0.033, "step": 54700 }, { "epoch": 0.22825896470863133, "grad_norm": 0.915771084574104, "learning_rate": 4.186532538313189e-06, "loss": 0.0404, "step": 54705 }, { "epoch": 0.2282798274236216, "grad_norm": 1.0276129324048149, "learning_rate": 4.186341224809138e-06, "loss": 0.0304, "step": 54710 }, { "epoch": 0.22830069013861187, "grad_norm": 0.8551964494981935, "learning_rate": 4.18614993753026e-06, "loss": 0.0317, "step": 54715 }, { "epoch": 0.22832155285360214, "grad_norm": 2.7984689933189233, "learning_rate": 4.185958676470561e-06, "loss": 0.0294, "step": 54720 }, { "epoch": 0.22834241556859244, "grad_norm": 1.114243987727534, "learning_rate": 4.1857674416240555e-06, "loss": 0.0276, "step": 54725 }, { "epoch": 0.22836327828358272, "grad_norm": 0.6749261235006406, "learning_rate": 4.185576232984754e-06, "loss": 0.0258, "step": 54730 }, { "epoch": 0.228384140998573, "grad_norm": 0.9292991862865967, "learning_rate": 4.185385050546673e-06, "loss": 0.0383, "step": 54735 }, { "epoch": 0.22840500371356326, "grad_norm": 1.0828699855661545, "learning_rate": 4.185193894303827e-06, "loss": 0.0282, "step": 54740 }, { "epoch": 0.22842586642855356, "grad_norm": 0.6474177288842464, "learning_rate": 4.1850027642502364e-06, "loss": 0.0419, "step": 54745 }, { "epoch": 0.22844672914354383, "grad_norm": 0.6472107637410899, "learning_rate": 4.18481166037992e-06, "loss": 0.03, "step": 54750 }, { "epoch": 0.2284675918585341, "grad_norm": 0.5246585457648388, "learning_rate": 4.184620582686902e-06, "loss": 0.031, "step": 54755 }, { "epoch": 0.22848845457352437, "grad_norm": 0.8881281840595364, "learning_rate": 4.184429531165205e-06, "loss": 0.0452, "step": 54760 }, { "epoch": 0.22850931728851465, "grad_norm": 0.4447864100612616, "learning_rate": 4.184238505808857e-06, "loss": 0.0354, "step": 54765 }, { "epoch": 0.22853018000350495, "grad_norm": 0.9959109795597567, "learning_rate": 4.184047506611885e-06, "loss": 0.0324, "step": 54770 }, { "epoch": 0.22855104271849522, "grad_norm": 0.5781949099677951, "learning_rate": 4.183856533568319e-06, "loss": 0.0372, "step": 54775 }, { "epoch": 0.2285719054334855, "grad_norm": 0.29822990549354766, "learning_rate": 4.1836655866721915e-06, "loss": 0.0264, "step": 54780 }, { "epoch": 0.22859276814847576, "grad_norm": 1.534638183438586, "learning_rate": 4.1834746659175355e-06, "loss": 0.0341, "step": 54785 }, { "epoch": 0.22861363086346606, "grad_norm": 0.7833451857445265, "learning_rate": 4.183283771298388e-06, "loss": 0.0309, "step": 54790 }, { "epoch": 0.22863449357845633, "grad_norm": 0.9306942705007765, "learning_rate": 4.183092902808784e-06, "loss": 0.0348, "step": 54795 }, { "epoch": 0.2286553562934466, "grad_norm": 0.5643293350808393, "learning_rate": 4.182902060442766e-06, "loss": 0.0239, "step": 54800 }, { "epoch": 0.22867621900843688, "grad_norm": 1.08576439102622, "learning_rate": 4.182711244194375e-06, "loss": 0.0334, "step": 54805 }, { "epoch": 0.22869708172342715, "grad_norm": 0.7266631557986855, "learning_rate": 4.1825204540576525e-06, "loss": 0.0346, "step": 54810 }, { "epoch": 0.22871794443841745, "grad_norm": 1.0187615241135846, "learning_rate": 4.182329690026646e-06, "loss": 0.0324, "step": 54815 }, { "epoch": 0.22873880715340772, "grad_norm": 0.50666421754513, "learning_rate": 4.1821389520954005e-06, "loss": 0.0433, "step": 54820 }, { "epoch": 0.228759669868398, "grad_norm": 1.23662437481553, "learning_rate": 4.181948240257967e-06, "loss": 0.032, "step": 54825 }, { "epoch": 0.22878053258338826, "grad_norm": 0.944499248806237, "learning_rate": 4.181757554508395e-06, "loss": 0.0382, "step": 54830 }, { "epoch": 0.22880139529837856, "grad_norm": 1.0681762445006382, "learning_rate": 4.181566894840738e-06, "loss": 0.0281, "step": 54835 }, { "epoch": 0.22882225801336883, "grad_norm": 1.0905866178931145, "learning_rate": 4.181376261249051e-06, "loss": 0.0302, "step": 54840 }, { "epoch": 0.2288431207283591, "grad_norm": 0.7731797898005535, "learning_rate": 4.181185653727391e-06, "loss": 0.0294, "step": 54845 }, { "epoch": 0.22886398344334938, "grad_norm": 0.9380019606273059, "learning_rate": 4.180995072269815e-06, "loss": 0.026, "step": 54850 }, { "epoch": 0.22888484615833965, "grad_norm": 1.2109663459015818, "learning_rate": 4.1808045168703845e-06, "loss": 0.036, "step": 54855 }, { "epoch": 0.22890570887332995, "grad_norm": 1.2151871510547543, "learning_rate": 4.1806139875231626e-06, "loss": 0.0327, "step": 54860 }, { "epoch": 0.22892657158832022, "grad_norm": 1.254434127339774, "learning_rate": 4.180423484222212e-06, "loss": 0.0336, "step": 54865 }, { "epoch": 0.2289474343033105, "grad_norm": 0.841979094312156, "learning_rate": 4.180233006961599e-06, "loss": 0.0306, "step": 54870 }, { "epoch": 0.22896829701830076, "grad_norm": 0.6523057435473122, "learning_rate": 4.180042555735392e-06, "loss": 0.0324, "step": 54875 }, { "epoch": 0.22898915973329106, "grad_norm": 0.7749960543879004, "learning_rate": 4.179852130537662e-06, "loss": 0.0303, "step": 54880 }, { "epoch": 0.22901002244828134, "grad_norm": 1.1831218647328956, "learning_rate": 4.179661731362479e-06, "loss": 0.0376, "step": 54885 }, { "epoch": 0.2290308851632716, "grad_norm": 0.8638932763807124, "learning_rate": 4.179471358203917e-06, "loss": 0.0281, "step": 54890 }, { "epoch": 0.22905174787826188, "grad_norm": 0.884787046636116, "learning_rate": 4.179281011056053e-06, "loss": 0.0309, "step": 54895 }, { "epoch": 0.22907261059325215, "grad_norm": 1.103528013530892, "learning_rate": 4.179090689912963e-06, "loss": 0.0294, "step": 54900 }, { "epoch": 0.22909347330824245, "grad_norm": 0.6613767798681186, "learning_rate": 4.178900394768728e-06, "loss": 0.0346, "step": 54905 }, { "epoch": 0.22911433602323272, "grad_norm": 1.2171290665385464, "learning_rate": 4.1787101256174266e-06, "loss": 0.0347, "step": 54910 }, { "epoch": 0.229135198738223, "grad_norm": 0.6118883912380326, "learning_rate": 4.178519882453144e-06, "loss": 0.0256, "step": 54915 }, { "epoch": 0.22915606145321327, "grad_norm": 0.8173059026496541, "learning_rate": 4.178329665269965e-06, "loss": 0.0291, "step": 54920 }, { "epoch": 0.22917692416820357, "grad_norm": 1.1030372504679813, "learning_rate": 4.178139474061975e-06, "loss": 0.03, "step": 54925 }, { "epoch": 0.22919778688319384, "grad_norm": 0.5100596808963049, "learning_rate": 4.177949308823266e-06, "loss": 0.0345, "step": 54930 }, { "epoch": 0.2292186495981841, "grad_norm": 1.121379383498908, "learning_rate": 4.177759169547925e-06, "loss": 0.039, "step": 54935 }, { "epoch": 0.22923951231317438, "grad_norm": 0.5986877712097721, "learning_rate": 4.177569056230046e-06, "loss": 0.0278, "step": 54940 }, { "epoch": 0.22926037502816465, "grad_norm": 0.5243260981014072, "learning_rate": 4.1773789688637245e-06, "loss": 0.0357, "step": 54945 }, { "epoch": 0.22928123774315495, "grad_norm": 0.7300008312310878, "learning_rate": 4.177188907443055e-06, "loss": 0.038, "step": 54950 }, { "epoch": 0.22930210045814522, "grad_norm": 0.7884402394520305, "learning_rate": 4.176998871962138e-06, "loss": 0.0358, "step": 54955 }, { "epoch": 0.2293229631731355, "grad_norm": 0.7967768208161438, "learning_rate": 4.176808862415071e-06, "loss": 0.0324, "step": 54960 }, { "epoch": 0.22934382588812577, "grad_norm": 0.7930335690454751, "learning_rate": 4.176618878795958e-06, "loss": 0.0291, "step": 54965 }, { "epoch": 0.22936468860311607, "grad_norm": 0.5801957156925619, "learning_rate": 4.176428921098901e-06, "loss": 0.0255, "step": 54970 }, { "epoch": 0.22938555131810634, "grad_norm": 0.4802799875510498, "learning_rate": 4.1762389893180065e-06, "loss": 0.0335, "step": 54975 }, { "epoch": 0.2294064140330966, "grad_norm": 0.7114911751262081, "learning_rate": 4.176049083447382e-06, "loss": 0.0304, "step": 54980 }, { "epoch": 0.22942727674808688, "grad_norm": 0.8528814673125876, "learning_rate": 4.175859203481139e-06, "loss": 0.0411, "step": 54985 }, { "epoch": 0.22944813946307716, "grad_norm": 0.9167554645880451, "learning_rate": 4.1756693494133845e-06, "loss": 0.0326, "step": 54990 }, { "epoch": 0.22946900217806745, "grad_norm": 1.1978252282731603, "learning_rate": 4.175479521238236e-06, "loss": 0.0384, "step": 54995 }, { "epoch": 0.22948986489305773, "grad_norm": 0.9226279930495346, "learning_rate": 4.175289718949807e-06, "loss": 0.0277, "step": 55000 }, { "epoch": 0.229510727608048, "grad_norm": 0.7960405636627036, "learning_rate": 4.175099942542213e-06, "loss": 0.0299, "step": 55005 }, { "epoch": 0.22953159032303827, "grad_norm": 0.5834982726802114, "learning_rate": 4.174910192009573e-06, "loss": 0.0241, "step": 55010 }, { "epoch": 0.22955245303802857, "grad_norm": 1.167911822311973, "learning_rate": 4.174720467346011e-06, "loss": 0.033, "step": 55015 }, { "epoch": 0.22957331575301884, "grad_norm": 1.1622726654701097, "learning_rate": 4.174530768545647e-06, "loss": 0.0372, "step": 55020 }, { "epoch": 0.2295941784680091, "grad_norm": 1.1616167748434587, "learning_rate": 4.174341095602605e-06, "loss": 0.0322, "step": 55025 }, { "epoch": 0.22961504118299939, "grad_norm": 0.864316620323313, "learning_rate": 4.174151448511012e-06, "loss": 0.0292, "step": 55030 }, { "epoch": 0.22963590389798966, "grad_norm": 0.9291214867627716, "learning_rate": 4.173961827264997e-06, "loss": 0.029, "step": 55035 }, { "epoch": 0.22965676661297996, "grad_norm": 0.6762856890001164, "learning_rate": 4.1737722318586885e-06, "loss": 0.0305, "step": 55040 }, { "epoch": 0.22967762932797023, "grad_norm": 1.1401673864484996, "learning_rate": 4.173582662286219e-06, "loss": 0.0337, "step": 55045 }, { "epoch": 0.2296984920429605, "grad_norm": 0.4053929904572872, "learning_rate": 4.173393118541724e-06, "loss": 0.0226, "step": 55050 }, { "epoch": 0.22971935475795077, "grad_norm": 0.5199460112036512, "learning_rate": 4.173203600619335e-06, "loss": 0.0265, "step": 55055 }, { "epoch": 0.22974021747294107, "grad_norm": 3.082257160307594, "learning_rate": 4.173014108513194e-06, "loss": 0.0402, "step": 55060 }, { "epoch": 0.22976108018793134, "grad_norm": 0.7746674104079501, "learning_rate": 4.172824642217437e-06, "loss": 0.0283, "step": 55065 }, { "epoch": 0.22978194290292162, "grad_norm": 0.8963767565318806, "learning_rate": 4.172635201726208e-06, "loss": 0.0286, "step": 55070 }, { "epoch": 0.2298028056179119, "grad_norm": 0.743519821468489, "learning_rate": 4.172445787033648e-06, "loss": 0.0267, "step": 55075 }, { "epoch": 0.22982366833290216, "grad_norm": 1.1725020302330367, "learning_rate": 4.172256398133902e-06, "loss": 0.0342, "step": 55080 }, { "epoch": 0.22984453104789246, "grad_norm": 1.0532130119632166, "learning_rate": 4.172067035021118e-06, "loss": 0.0388, "step": 55085 }, { "epoch": 0.22986539376288273, "grad_norm": 0.9662504255881911, "learning_rate": 4.171877697689445e-06, "loss": 0.0365, "step": 55090 }, { "epoch": 0.229886256477873, "grad_norm": 0.6725435636916811, "learning_rate": 4.17168838613303e-06, "loss": 0.032, "step": 55095 }, { "epoch": 0.22990711919286327, "grad_norm": 0.8059379045759705, "learning_rate": 4.17149910034603e-06, "loss": 0.0407, "step": 55100 }, { "epoch": 0.22992798190785357, "grad_norm": 2.1726949722296687, "learning_rate": 4.171309840322597e-06, "loss": 0.0383, "step": 55105 }, { "epoch": 0.22994884462284385, "grad_norm": 0.9651685034243689, "learning_rate": 4.171120606056887e-06, "loss": 0.0426, "step": 55110 }, { "epoch": 0.22996970733783412, "grad_norm": 0.866509366134073, "learning_rate": 4.170931397543058e-06, "loss": 0.0414, "step": 55115 }, { "epoch": 0.2299905700528244, "grad_norm": 0.6557788367713839, "learning_rate": 4.17074221477527e-06, "loss": 0.0382, "step": 55120 }, { "epoch": 0.23001143276781466, "grad_norm": 1.1533364533806902, "learning_rate": 4.170553057747686e-06, "loss": 0.0375, "step": 55125 }, { "epoch": 0.23003229548280496, "grad_norm": 0.9593896905192671, "learning_rate": 4.170363926454468e-06, "loss": 0.033, "step": 55130 }, { "epoch": 0.23005315819779523, "grad_norm": 0.576177369253226, "learning_rate": 4.17017482088978e-06, "loss": 0.0402, "step": 55135 }, { "epoch": 0.2300740209127855, "grad_norm": 0.7513321598617089, "learning_rate": 4.169985741047793e-06, "loss": 0.027, "step": 55140 }, { "epoch": 0.23009488362777578, "grad_norm": 0.5708474213853196, "learning_rate": 4.1697966869226725e-06, "loss": 0.0254, "step": 55145 }, { "epoch": 0.23011574634276608, "grad_norm": 0.9891724050458867, "learning_rate": 4.16960765850859e-06, "loss": 0.0431, "step": 55150 }, { "epoch": 0.23013660905775635, "grad_norm": 1.3474325408788386, "learning_rate": 4.16941865579972e-06, "loss": 0.0336, "step": 55155 }, { "epoch": 0.23015747177274662, "grad_norm": 0.8434512700966396, "learning_rate": 4.169229678790237e-06, "loss": 0.0321, "step": 55160 }, { "epoch": 0.2301783344877369, "grad_norm": 0.7134637773881833, "learning_rate": 4.169040727474316e-06, "loss": 0.0432, "step": 55165 }, { "epoch": 0.23019919720272716, "grad_norm": 0.45484885739878483, "learning_rate": 4.168851801846136e-06, "loss": 0.0329, "step": 55170 }, { "epoch": 0.23022005991771746, "grad_norm": 0.8316208831926114, "learning_rate": 4.168662901899877e-06, "loss": 0.0434, "step": 55175 }, { "epoch": 0.23024092263270773, "grad_norm": 0.8724970604223607, "learning_rate": 4.168474027629721e-06, "loss": 0.0345, "step": 55180 }, { "epoch": 0.230261785347698, "grad_norm": 0.7782778790862109, "learning_rate": 4.168285179029851e-06, "loss": 0.0283, "step": 55185 }, { "epoch": 0.23028264806268828, "grad_norm": 1.7418101761201523, "learning_rate": 4.168096356094455e-06, "loss": 0.036, "step": 55190 }, { "epoch": 0.23030351077767858, "grad_norm": 2.1613556079750493, "learning_rate": 4.167907558817719e-06, "loss": 0.0404, "step": 55195 }, { "epoch": 0.23032437349266885, "grad_norm": 1.859663952832726, "learning_rate": 4.167718787193832e-06, "loss": 0.0321, "step": 55200 }, { "epoch": 0.23034523620765912, "grad_norm": 0.7677865633442095, "learning_rate": 4.1675300412169865e-06, "loss": 0.0311, "step": 55205 }, { "epoch": 0.2303660989226494, "grad_norm": 0.7393107147840203, "learning_rate": 4.167341320881375e-06, "loss": 0.0287, "step": 55210 }, { "epoch": 0.23038696163763966, "grad_norm": 1.2321160418394899, "learning_rate": 4.167152626181191e-06, "loss": 0.038, "step": 55215 }, { "epoch": 0.23040782435262996, "grad_norm": 0.9939173908792249, "learning_rate": 4.166963957110633e-06, "loss": 0.028, "step": 55220 }, { "epoch": 0.23042868706762024, "grad_norm": 1.0454219798473932, "learning_rate": 4.1667753136638995e-06, "loss": 0.0294, "step": 55225 }, { "epoch": 0.2304495497826105, "grad_norm": 1.0673620514743758, "learning_rate": 4.16658669583519e-06, "loss": 0.0357, "step": 55230 }, { "epoch": 0.23047041249760078, "grad_norm": 1.0630304363663665, "learning_rate": 4.166398103618708e-06, "loss": 0.0343, "step": 55235 }, { "epoch": 0.23049127521259108, "grad_norm": 0.518730488965498, "learning_rate": 4.166209537008656e-06, "loss": 0.0281, "step": 55240 }, { "epoch": 0.23051213792758135, "grad_norm": 1.2027866295574694, "learning_rate": 4.166020995999241e-06, "loss": 0.0363, "step": 55245 }, { "epoch": 0.23053300064257162, "grad_norm": 1.0650814087372273, "learning_rate": 4.165832480584671e-06, "loss": 0.038, "step": 55250 }, { "epoch": 0.2305538633575619, "grad_norm": 0.9812677467524173, "learning_rate": 4.165643990759154e-06, "loss": 0.0308, "step": 55255 }, { "epoch": 0.23057472607255217, "grad_norm": 1.012519632392117, "learning_rate": 4.165455526516904e-06, "loss": 0.0365, "step": 55260 }, { "epoch": 0.23059558878754247, "grad_norm": 1.0214619357282015, "learning_rate": 4.165267087852132e-06, "loss": 0.0316, "step": 55265 }, { "epoch": 0.23061645150253274, "grad_norm": 0.7900096842949117, "learning_rate": 4.165078674759055e-06, "loss": 0.0357, "step": 55270 }, { "epoch": 0.230637314217523, "grad_norm": 1.0808110791697354, "learning_rate": 4.164890287231888e-06, "loss": 0.0291, "step": 55275 }, { "epoch": 0.23065817693251328, "grad_norm": 0.7561290222913621, "learning_rate": 4.1647019252648505e-06, "loss": 0.0262, "step": 55280 }, { "epoch": 0.23067903964750358, "grad_norm": 0.9530624840197638, "learning_rate": 4.164513588852163e-06, "loss": 0.0375, "step": 55285 }, { "epoch": 0.23069990236249385, "grad_norm": 0.909571999526383, "learning_rate": 4.164325277988048e-06, "loss": 0.0261, "step": 55290 }, { "epoch": 0.23072076507748412, "grad_norm": 0.6278325521509657, "learning_rate": 4.16413699266673e-06, "loss": 0.0264, "step": 55295 }, { "epoch": 0.2307416277924744, "grad_norm": 0.8840404079576116, "learning_rate": 4.163948732882436e-06, "loss": 0.0444, "step": 55300 }, { "epoch": 0.23076249050746467, "grad_norm": 0.6805305816566873, "learning_rate": 4.1637604986293916e-06, "loss": 0.0392, "step": 55305 }, { "epoch": 0.23078335322245497, "grad_norm": 0.3938951338448334, "learning_rate": 4.163572289901829e-06, "loss": 0.0322, "step": 55310 }, { "epoch": 0.23080421593744524, "grad_norm": 0.8193023372864213, "learning_rate": 4.163384106693977e-06, "loss": 0.0305, "step": 55315 }, { "epoch": 0.2308250786524355, "grad_norm": 0.5529359652332134, "learning_rate": 4.163195949000071e-06, "loss": 0.0377, "step": 55320 }, { "epoch": 0.23084594136742578, "grad_norm": 0.8006267870742263, "learning_rate": 4.163007816814347e-06, "loss": 0.0351, "step": 55325 }, { "epoch": 0.23086680408241608, "grad_norm": 0.6545321675071407, "learning_rate": 4.162819710131039e-06, "loss": 0.0399, "step": 55330 }, { "epoch": 0.23088766679740635, "grad_norm": 0.924386744082494, "learning_rate": 4.162631628944389e-06, "loss": 0.0344, "step": 55335 }, { "epoch": 0.23090852951239663, "grad_norm": 0.520910872347395, "learning_rate": 4.162443573248636e-06, "loss": 0.0234, "step": 55340 }, { "epoch": 0.2309293922273869, "grad_norm": 0.780655268057438, "learning_rate": 4.162255543038022e-06, "loss": 0.0438, "step": 55345 }, { "epoch": 0.23095025494237717, "grad_norm": 0.5411878133167539, "learning_rate": 4.162067538306793e-06, "loss": 0.034, "step": 55350 }, { "epoch": 0.23097111765736747, "grad_norm": 1.106082184493813, "learning_rate": 4.161879559049193e-06, "loss": 0.0329, "step": 55355 }, { "epoch": 0.23099198037235774, "grad_norm": 0.5503396535263556, "learning_rate": 4.161691605259473e-06, "loss": 0.0307, "step": 55360 }, { "epoch": 0.231012843087348, "grad_norm": 1.4194000493517656, "learning_rate": 4.161503676931879e-06, "loss": 0.0345, "step": 55365 }, { "epoch": 0.23103370580233829, "grad_norm": 1.1960003550747391, "learning_rate": 4.161315774060666e-06, "loss": 0.0398, "step": 55370 }, { "epoch": 0.23105456851732858, "grad_norm": 0.9794269420234154, "learning_rate": 4.161127896640085e-06, "loss": 0.0334, "step": 55375 }, { "epoch": 0.23107543123231886, "grad_norm": 0.6305414029527596, "learning_rate": 4.160940044664393e-06, "loss": 0.0277, "step": 55380 }, { "epoch": 0.23109629394730913, "grad_norm": 0.7377357513743039, "learning_rate": 4.160752218127846e-06, "loss": 0.0326, "step": 55385 }, { "epoch": 0.2311171566622994, "grad_norm": 0.5759453210920502, "learning_rate": 4.1605644170247025e-06, "loss": 0.0377, "step": 55390 }, { "epoch": 0.23113801937728967, "grad_norm": 0.7814909624163424, "learning_rate": 4.160376641349224e-06, "loss": 0.0326, "step": 55395 }, { "epoch": 0.23115888209227997, "grad_norm": 0.964400891883297, "learning_rate": 4.160188891095672e-06, "loss": 0.0295, "step": 55400 }, { "epoch": 0.23117974480727024, "grad_norm": 0.9266486285487449, "learning_rate": 4.160001166258313e-06, "loss": 0.0377, "step": 55405 }, { "epoch": 0.23120060752226052, "grad_norm": 0.9370037007272242, "learning_rate": 4.159813466831411e-06, "loss": 0.0344, "step": 55410 }, { "epoch": 0.2312214702372508, "grad_norm": 0.7027442751640199, "learning_rate": 4.1596257928092344e-06, "loss": 0.0272, "step": 55415 }, { "epoch": 0.2312423329522411, "grad_norm": 0.7198266375254523, "learning_rate": 4.159438144186053e-06, "loss": 0.0345, "step": 55420 }, { "epoch": 0.23126319566723136, "grad_norm": 1.7087010182191222, "learning_rate": 4.159250520956138e-06, "loss": 0.035, "step": 55425 }, { "epoch": 0.23128405838222163, "grad_norm": 1.2075321875640768, "learning_rate": 4.159062923113763e-06, "loss": 0.0393, "step": 55430 }, { "epoch": 0.2313049210972119, "grad_norm": 0.6533097138438082, "learning_rate": 4.1588753506532045e-06, "loss": 0.0455, "step": 55435 }, { "epoch": 0.23132578381220217, "grad_norm": 0.6769950130010284, "learning_rate": 4.1586878035687365e-06, "loss": 0.0286, "step": 55440 }, { "epoch": 0.23134664652719247, "grad_norm": 0.48905641364874375, "learning_rate": 4.158500281854641e-06, "loss": 0.0263, "step": 55445 }, { "epoch": 0.23136750924218275, "grad_norm": 1.1376200378782682, "learning_rate": 4.1583127855051965e-06, "loss": 0.0345, "step": 55450 }, { "epoch": 0.23138837195717302, "grad_norm": 1.3045612319325877, "learning_rate": 4.1581253145146845e-06, "loss": 0.0304, "step": 55455 }, { "epoch": 0.2314092346721633, "grad_norm": 0.8813154599427917, "learning_rate": 4.157937868877393e-06, "loss": 0.0321, "step": 55460 }, { "epoch": 0.2314300973871536, "grad_norm": 1.0761384333508361, "learning_rate": 4.157750448587605e-06, "loss": 0.0271, "step": 55465 }, { "epoch": 0.23145096010214386, "grad_norm": 0.5900639239430929, "learning_rate": 4.157563053639607e-06, "loss": 0.0317, "step": 55470 }, { "epoch": 0.23147182281713413, "grad_norm": 0.8463689586118747, "learning_rate": 4.157375684027692e-06, "loss": 0.0241, "step": 55475 }, { "epoch": 0.2314926855321244, "grad_norm": 1.860992420328118, "learning_rate": 4.1571883397461496e-06, "loss": 0.0345, "step": 55480 }, { "epoch": 0.23151354824711468, "grad_norm": 0.9674089251377259, "learning_rate": 4.157001020789272e-06, "loss": 0.0377, "step": 55485 }, { "epoch": 0.23153441096210498, "grad_norm": 0.6206560138276112, "learning_rate": 4.156813727151357e-06, "loss": 0.0274, "step": 55490 }, { "epoch": 0.23155527367709525, "grad_norm": 1.1685995693324607, "learning_rate": 4.156626458826699e-06, "loss": 0.0445, "step": 55495 }, { "epoch": 0.23157613639208552, "grad_norm": 1.2872903487934138, "learning_rate": 4.1564392158095985e-06, "loss": 0.0403, "step": 55500 }, { "epoch": 0.2315969991070758, "grad_norm": 1.207262818062828, "learning_rate": 4.156251998094354e-06, "loss": 0.043, "step": 55505 }, { "epoch": 0.2316178618220661, "grad_norm": 0.791743009671188, "learning_rate": 4.156064805675268e-06, "loss": 0.0328, "step": 55510 }, { "epoch": 0.23163872453705636, "grad_norm": 1.1397799370735964, "learning_rate": 4.1558776385466444e-06, "loss": 0.0372, "step": 55515 }, { "epoch": 0.23165958725204663, "grad_norm": 0.49576370933934266, "learning_rate": 4.155690496702791e-06, "loss": 0.0318, "step": 55520 }, { "epoch": 0.2316804499670369, "grad_norm": 0.5023432115800288, "learning_rate": 4.155503380138013e-06, "loss": 0.0222, "step": 55525 }, { "epoch": 0.23170131268202718, "grad_norm": 0.901361731982149, "learning_rate": 4.155316288846621e-06, "loss": 0.0369, "step": 55530 }, { "epoch": 0.23172217539701748, "grad_norm": 1.244223621466287, "learning_rate": 4.155129222822925e-06, "loss": 0.0385, "step": 55535 }, { "epoch": 0.23174303811200775, "grad_norm": 0.7841732533151211, "learning_rate": 4.154942182061239e-06, "loss": 0.0407, "step": 55540 }, { "epoch": 0.23176390082699802, "grad_norm": 0.9731324925399973, "learning_rate": 4.154755166555878e-06, "loss": 0.0359, "step": 55545 }, { "epoch": 0.2317847635419883, "grad_norm": 0.6275731454442739, "learning_rate": 4.154568176301157e-06, "loss": 0.0273, "step": 55550 }, { "epoch": 0.2318056262569786, "grad_norm": 0.7572664094221242, "learning_rate": 4.154381211291396e-06, "loss": 0.024, "step": 55555 }, { "epoch": 0.23182648897196886, "grad_norm": 1.165111932999995, "learning_rate": 4.154194271520914e-06, "loss": 0.0437, "step": 55560 }, { "epoch": 0.23184735168695914, "grad_norm": 1.1222850846362624, "learning_rate": 4.154007356984034e-06, "loss": 0.038, "step": 55565 }, { "epoch": 0.2318682144019494, "grad_norm": 0.3661152657865531, "learning_rate": 4.153820467675078e-06, "loss": 0.0228, "step": 55570 }, { "epoch": 0.23188907711693968, "grad_norm": 1.1152011271281588, "learning_rate": 4.153633603588373e-06, "loss": 0.0277, "step": 55575 }, { "epoch": 0.23190993983192998, "grad_norm": 0.9636706204889826, "learning_rate": 4.1534467647182455e-06, "loss": 0.0351, "step": 55580 }, { "epoch": 0.23193080254692025, "grad_norm": 0.807729829715009, "learning_rate": 4.153259951059025e-06, "loss": 0.0358, "step": 55585 }, { "epoch": 0.23195166526191052, "grad_norm": 0.6601082171773925, "learning_rate": 4.1530731626050415e-06, "loss": 0.0205, "step": 55590 }, { "epoch": 0.2319725279769008, "grad_norm": 1.2087188272589862, "learning_rate": 4.152886399350628e-06, "loss": 0.0349, "step": 55595 }, { "epoch": 0.2319933906918911, "grad_norm": 0.7243280091472679, "learning_rate": 4.152699661290121e-06, "loss": 0.0367, "step": 55600 }, { "epoch": 0.23201425340688137, "grad_norm": 0.8329392448043617, "learning_rate": 4.152512948417854e-06, "loss": 0.0304, "step": 55605 }, { "epoch": 0.23203511612187164, "grad_norm": 0.6855024416308304, "learning_rate": 4.152326260728165e-06, "loss": 0.0275, "step": 55610 }, { "epoch": 0.2320559788368619, "grad_norm": 1.2607421767248448, "learning_rate": 4.152139598215394e-06, "loss": 0.0393, "step": 55615 }, { "epoch": 0.23207684155185218, "grad_norm": 1.0336942233273454, "learning_rate": 4.151952960873884e-06, "loss": 0.0337, "step": 55620 }, { "epoch": 0.23209770426684248, "grad_norm": 0.6558134064293276, "learning_rate": 4.151766348697977e-06, "loss": 0.0245, "step": 55625 }, { "epoch": 0.23211856698183275, "grad_norm": 0.5950712935406864, "learning_rate": 4.151579761682019e-06, "loss": 0.0295, "step": 55630 }, { "epoch": 0.23213942969682302, "grad_norm": 1.0224584074256042, "learning_rate": 4.151393199820356e-06, "loss": 0.0432, "step": 55635 }, { "epoch": 0.2321602924118133, "grad_norm": 0.8523624338455797, "learning_rate": 4.151206663107336e-06, "loss": 0.0328, "step": 55640 }, { "epoch": 0.2321811551268036, "grad_norm": 0.7922359295129959, "learning_rate": 4.151020151537311e-06, "loss": 0.0416, "step": 55645 }, { "epoch": 0.23220201784179387, "grad_norm": 0.9817220300432403, "learning_rate": 4.150833665104633e-06, "loss": 0.0379, "step": 55650 }, { "epoch": 0.23222288055678414, "grad_norm": 0.7744628195842806, "learning_rate": 4.150647203803655e-06, "loss": 0.037, "step": 55655 }, { "epoch": 0.2322437432717744, "grad_norm": 1.0990528457302828, "learning_rate": 4.150460767628733e-06, "loss": 0.0264, "step": 55660 }, { "epoch": 0.23226460598676468, "grad_norm": 0.8973236764395337, "learning_rate": 4.150274356574225e-06, "loss": 0.0359, "step": 55665 }, { "epoch": 0.23228546870175498, "grad_norm": 1.4625710998748989, "learning_rate": 4.150087970634491e-06, "loss": 0.033, "step": 55670 }, { "epoch": 0.23230633141674525, "grad_norm": 0.800839038939238, "learning_rate": 4.14990160980389e-06, "loss": 0.0323, "step": 55675 }, { "epoch": 0.23232719413173553, "grad_norm": 0.6024967539153296, "learning_rate": 4.149715274076786e-06, "loss": 0.0246, "step": 55680 }, { "epoch": 0.2323480568467258, "grad_norm": 1.3779457889715314, "learning_rate": 4.149528963447544e-06, "loss": 0.0329, "step": 55685 }, { "epoch": 0.23236891956171607, "grad_norm": 1.0647802873978733, "learning_rate": 4.149342677910531e-06, "loss": 0.0298, "step": 55690 }, { "epoch": 0.23238978227670637, "grad_norm": 1.0560512029194669, "learning_rate": 4.149156417460113e-06, "loss": 0.0433, "step": 55695 }, { "epoch": 0.23241064499169664, "grad_norm": 0.5927828254114185, "learning_rate": 4.148970182090661e-06, "loss": 0.0234, "step": 55700 }, { "epoch": 0.2324315077066869, "grad_norm": 1.0765520886730715, "learning_rate": 4.148783971796548e-06, "loss": 0.0357, "step": 55705 }, { "epoch": 0.23245237042167718, "grad_norm": 0.7032152982343803, "learning_rate": 4.148597786572145e-06, "loss": 0.0241, "step": 55710 }, { "epoch": 0.23247323313666748, "grad_norm": 1.2905298116898316, "learning_rate": 4.148411626411829e-06, "loss": 0.0403, "step": 55715 }, { "epoch": 0.23249409585165776, "grad_norm": 0.6129427954107597, "learning_rate": 4.1482254913099766e-06, "loss": 0.0448, "step": 55720 }, { "epoch": 0.23251495856664803, "grad_norm": 0.7803597534347385, "learning_rate": 4.148039381260965e-06, "loss": 0.0434, "step": 55725 }, { "epoch": 0.2325358212816383, "grad_norm": 0.9689210100962513, "learning_rate": 4.147853296259178e-06, "loss": 0.0346, "step": 55730 }, { "epoch": 0.23255668399662857, "grad_norm": 0.5415241018767503, "learning_rate": 4.147667236298995e-06, "loss": 0.0277, "step": 55735 }, { "epoch": 0.23257754671161887, "grad_norm": 0.5820547404748679, "learning_rate": 4.1474812013748015e-06, "loss": 0.0365, "step": 55740 }, { "epoch": 0.23259840942660914, "grad_norm": 0.6813335305206444, "learning_rate": 4.1472951914809835e-06, "loss": 0.0359, "step": 55745 }, { "epoch": 0.23261927214159941, "grad_norm": 0.7518043499651885, "learning_rate": 4.147109206611928e-06, "loss": 0.0307, "step": 55750 }, { "epoch": 0.2326401348565897, "grad_norm": 0.4378589463849454, "learning_rate": 4.146923246762024e-06, "loss": 0.0246, "step": 55755 }, { "epoch": 0.23266099757157999, "grad_norm": 0.7012662162669878, "learning_rate": 4.146737311925662e-06, "loss": 0.0293, "step": 55760 }, { "epoch": 0.23268186028657026, "grad_norm": 0.8741456309104063, "learning_rate": 4.146551402097237e-06, "loss": 0.0313, "step": 55765 }, { "epoch": 0.23270272300156053, "grad_norm": 1.102322572261402, "learning_rate": 4.146365517271142e-06, "loss": 0.0431, "step": 55770 }, { "epoch": 0.2327235857165508, "grad_norm": 1.0105337106147412, "learning_rate": 4.146179657441775e-06, "loss": 0.0345, "step": 55775 }, { "epoch": 0.23274444843154107, "grad_norm": 0.3902887562339119, "learning_rate": 4.145993822603531e-06, "loss": 0.0212, "step": 55780 }, { "epoch": 0.23276531114653137, "grad_norm": 0.8904520920676668, "learning_rate": 4.145808012750813e-06, "loss": 0.0402, "step": 55785 }, { "epoch": 0.23278617386152164, "grad_norm": 0.5895946619544739, "learning_rate": 4.145622227878022e-06, "loss": 0.0324, "step": 55790 }, { "epoch": 0.23280703657651192, "grad_norm": 1.2117446411459805, "learning_rate": 4.14543646797956e-06, "loss": 0.0373, "step": 55795 }, { "epoch": 0.2328278992915022, "grad_norm": 0.7018664926779186, "learning_rate": 4.145250733049833e-06, "loss": 0.0502, "step": 55800 }, { "epoch": 0.2328487620064925, "grad_norm": 0.930404349538817, "learning_rate": 4.145065023083249e-06, "loss": 0.0302, "step": 55805 }, { "epoch": 0.23286962472148276, "grad_norm": 0.9057298441428155, "learning_rate": 4.144879338074215e-06, "loss": 0.0288, "step": 55810 }, { "epoch": 0.23289048743647303, "grad_norm": 1.1654602080423182, "learning_rate": 4.144693678017143e-06, "loss": 0.0372, "step": 55815 }, { "epoch": 0.2329113501514633, "grad_norm": 0.8073720555167527, "learning_rate": 4.144508042906443e-06, "loss": 0.0488, "step": 55820 }, { "epoch": 0.23293221286645358, "grad_norm": 0.6945991543836557, "learning_rate": 4.14432243273653e-06, "loss": 0.0272, "step": 55825 }, { "epoch": 0.23295307558144387, "grad_norm": 0.8426554757446753, "learning_rate": 4.1441368475018215e-06, "loss": 0.0325, "step": 55830 }, { "epoch": 0.23297393829643415, "grad_norm": 0.8658667065672601, "learning_rate": 4.143951287196733e-06, "loss": 0.0325, "step": 55835 }, { "epoch": 0.23299480101142442, "grad_norm": 0.547287437217784, "learning_rate": 4.143765751815683e-06, "loss": 0.0356, "step": 55840 }, { "epoch": 0.2330156637264147, "grad_norm": 0.5748668200693015, "learning_rate": 4.143580241353095e-06, "loss": 0.0248, "step": 55845 }, { "epoch": 0.233036526441405, "grad_norm": 1.2969202195348903, "learning_rate": 4.1433947558033884e-06, "loss": 0.0337, "step": 55850 }, { "epoch": 0.23305738915639526, "grad_norm": 1.3730400799050813, "learning_rate": 4.143209295160991e-06, "loss": 0.0363, "step": 55855 }, { "epoch": 0.23307825187138553, "grad_norm": 1.2472565674675828, "learning_rate": 4.143023859420327e-06, "loss": 0.032, "step": 55860 }, { "epoch": 0.2330991145863758, "grad_norm": 0.6635317869688663, "learning_rate": 4.142838448575824e-06, "loss": 0.0309, "step": 55865 }, { "epoch": 0.23311997730136608, "grad_norm": 0.7679591972678583, "learning_rate": 4.142653062621912e-06, "loss": 0.0365, "step": 55870 }, { "epoch": 0.23314084001635638, "grad_norm": 1.106533521567446, "learning_rate": 4.142467701553024e-06, "loss": 0.0332, "step": 55875 }, { "epoch": 0.23316170273134665, "grad_norm": 1.0340349747253885, "learning_rate": 4.142282365363592e-06, "loss": 0.0253, "step": 55880 }, { "epoch": 0.23318256544633692, "grad_norm": 0.5259962116199308, "learning_rate": 4.142097054048051e-06, "loss": 0.0308, "step": 55885 }, { "epoch": 0.2332034281613272, "grad_norm": 0.7436423879867154, "learning_rate": 4.141911767600836e-06, "loss": 0.0325, "step": 55890 }, { "epoch": 0.2332242908763175, "grad_norm": 0.8938029443126215, "learning_rate": 4.1417265060163874e-06, "loss": 0.042, "step": 55895 }, { "epoch": 0.23324515359130776, "grad_norm": 0.8086480848291155, "learning_rate": 4.141541269289145e-06, "loss": 0.0357, "step": 55900 }, { "epoch": 0.23326601630629804, "grad_norm": 0.8727229611246723, "learning_rate": 4.141356057413551e-06, "loss": 0.0315, "step": 55905 }, { "epoch": 0.2332868790212883, "grad_norm": 0.8450144989980299, "learning_rate": 4.141170870384047e-06, "loss": 0.0422, "step": 55910 }, { "epoch": 0.23330774173627858, "grad_norm": 1.0212569222569132, "learning_rate": 4.14098570819508e-06, "loss": 0.0439, "step": 55915 }, { "epoch": 0.23332860445126888, "grad_norm": 0.737811797367842, "learning_rate": 4.140800570841097e-06, "loss": 0.0313, "step": 55920 }, { "epoch": 0.23334946716625915, "grad_norm": 0.5116020478062886, "learning_rate": 4.140615458316548e-06, "loss": 0.0402, "step": 55925 }, { "epoch": 0.23337032988124942, "grad_norm": 1.0273856153566518, "learning_rate": 4.14043037061588e-06, "loss": 0.0298, "step": 55930 }, { "epoch": 0.2333911925962397, "grad_norm": 0.6537899184366432, "learning_rate": 4.14024530773355e-06, "loss": 0.0386, "step": 55935 }, { "epoch": 0.23341205531123, "grad_norm": 0.8329230819845574, "learning_rate": 4.140060269664008e-06, "loss": 0.0276, "step": 55940 }, { "epoch": 0.23343291802622027, "grad_norm": 1.0918740732881533, "learning_rate": 4.139875256401712e-06, "loss": 0.0425, "step": 55945 }, { "epoch": 0.23345378074121054, "grad_norm": 0.8385309264824442, "learning_rate": 4.139690267941118e-06, "loss": 0.0449, "step": 55950 }, { "epoch": 0.2334746434562008, "grad_norm": 0.6780424032895705, "learning_rate": 4.139505304276687e-06, "loss": 0.0407, "step": 55955 }, { "epoch": 0.23349550617119108, "grad_norm": 0.7225942140188707, "learning_rate": 4.139320365402879e-06, "loss": 0.0363, "step": 55960 }, { "epoch": 0.23351636888618138, "grad_norm": 1.324095618119354, "learning_rate": 4.139135451314156e-06, "loss": 0.028, "step": 55965 }, { "epoch": 0.23353723160117165, "grad_norm": 0.5868189264008287, "learning_rate": 4.138950562004983e-06, "loss": 0.0326, "step": 55970 }, { "epoch": 0.23355809431616192, "grad_norm": 0.5643511217000146, "learning_rate": 4.138765697469827e-06, "loss": 0.0401, "step": 55975 }, { "epoch": 0.2335789570311522, "grad_norm": 0.5359110744452279, "learning_rate": 4.138580857703154e-06, "loss": 0.0376, "step": 55980 }, { "epoch": 0.2335998197461425, "grad_norm": 0.6736223168835713, "learning_rate": 4.1383960426994365e-06, "loss": 0.028, "step": 55985 }, { "epoch": 0.23362068246113277, "grad_norm": 0.8368365403878295, "learning_rate": 4.1382112524531425e-06, "loss": 0.0322, "step": 55990 }, { "epoch": 0.23364154517612304, "grad_norm": 0.7543299763990524, "learning_rate": 4.138026486958748e-06, "loss": 0.0304, "step": 55995 }, { "epoch": 0.2336624078911133, "grad_norm": 0.5937708597088113, "learning_rate": 4.137841746210727e-06, "loss": 0.0584, "step": 56000 }, { "epoch": 0.23368327060610358, "grad_norm": 0.31254530692762333, "learning_rate": 4.137657030203553e-06, "loss": 0.0329, "step": 56005 }, { "epoch": 0.23370413332109388, "grad_norm": 1.3633370417712962, "learning_rate": 4.137472338931708e-06, "loss": 0.0301, "step": 56010 }, { "epoch": 0.23372499603608415, "grad_norm": 0.5939122054988334, "learning_rate": 4.137287672389672e-06, "loss": 0.0295, "step": 56015 }, { "epoch": 0.23374585875107443, "grad_norm": 0.9588549559131045, "learning_rate": 4.137103030571924e-06, "loss": 0.0303, "step": 56020 }, { "epoch": 0.2337667214660647, "grad_norm": 1.129839587552243, "learning_rate": 4.136918413472949e-06, "loss": 0.0368, "step": 56025 }, { "epoch": 0.233787584181055, "grad_norm": 0.8404951719647394, "learning_rate": 4.136733821087233e-06, "loss": 0.0314, "step": 56030 }, { "epoch": 0.23380844689604527, "grad_norm": 0.7424521542807642, "learning_rate": 4.136549253409262e-06, "loss": 0.0365, "step": 56035 }, { "epoch": 0.23382930961103554, "grad_norm": 0.765964130288333, "learning_rate": 4.136364710433523e-06, "loss": 0.0264, "step": 56040 }, { "epoch": 0.2338501723260258, "grad_norm": 0.9159032993856878, "learning_rate": 4.136180192154509e-06, "loss": 0.0365, "step": 56045 }, { "epoch": 0.23387103504101608, "grad_norm": 0.5430428079191203, "learning_rate": 4.13599569856671e-06, "loss": 0.0309, "step": 56050 }, { "epoch": 0.23389189775600638, "grad_norm": 0.8265858363760847, "learning_rate": 4.135811229664622e-06, "loss": 0.0395, "step": 56055 }, { "epoch": 0.23391276047099666, "grad_norm": 0.6093222458362484, "learning_rate": 4.135626785442738e-06, "loss": 0.0313, "step": 56060 }, { "epoch": 0.23393362318598693, "grad_norm": 0.7379553925822212, "learning_rate": 4.135442365895557e-06, "loss": 0.0245, "step": 56065 }, { "epoch": 0.2339544859009772, "grad_norm": 0.739110769241904, "learning_rate": 4.135257971017577e-06, "loss": 0.0299, "step": 56070 }, { "epoch": 0.2339753486159675, "grad_norm": 0.5810459523763245, "learning_rate": 4.135073600803298e-06, "loss": 0.0269, "step": 56075 }, { "epoch": 0.23399621133095777, "grad_norm": 1.0238134633713472, "learning_rate": 4.134889255247224e-06, "loss": 0.032, "step": 56080 }, { "epoch": 0.23401707404594804, "grad_norm": 0.9197063990930259, "learning_rate": 4.134704934343857e-06, "loss": 0.0302, "step": 56085 }, { "epoch": 0.23403793676093831, "grad_norm": 1.45158808367181, "learning_rate": 4.134520638087706e-06, "loss": 0.0407, "step": 56090 }, { "epoch": 0.2340587994759286, "grad_norm": 1.1161269860913514, "learning_rate": 4.134336366473274e-06, "loss": 0.0393, "step": 56095 }, { "epoch": 0.23407966219091889, "grad_norm": 1.0856350936162027, "learning_rate": 4.134152119495075e-06, "loss": 0.0299, "step": 56100 }, { "epoch": 0.23410052490590916, "grad_norm": 0.6265496802639763, "learning_rate": 4.133967897147616e-06, "loss": 0.0306, "step": 56105 }, { "epoch": 0.23412138762089943, "grad_norm": 0.8263390901046535, "learning_rate": 4.1337836994254125e-06, "loss": 0.0322, "step": 56110 }, { "epoch": 0.2341422503358897, "grad_norm": 1.3998129357511335, "learning_rate": 4.133599526322977e-06, "loss": 0.0377, "step": 56115 }, { "epoch": 0.23416311305088, "grad_norm": 1.7966219456581223, "learning_rate": 4.133415377834827e-06, "loss": 0.0287, "step": 56120 }, { "epoch": 0.23418397576587027, "grad_norm": 0.8945343964620683, "learning_rate": 4.133231253955477e-06, "loss": 0.0379, "step": 56125 }, { "epoch": 0.23420483848086054, "grad_norm": 1.4331773444953797, "learning_rate": 4.133047154679451e-06, "loss": 0.0469, "step": 56130 }, { "epoch": 0.23422570119585082, "grad_norm": 0.6048315941840431, "learning_rate": 4.132863080001267e-06, "loss": 0.0257, "step": 56135 }, { "epoch": 0.2342465639108411, "grad_norm": 0.8138567259843349, "learning_rate": 4.132679029915448e-06, "loss": 0.0311, "step": 56140 }, { "epoch": 0.2342674266258314, "grad_norm": 0.6518040274214335, "learning_rate": 4.132495004416522e-06, "loss": 0.0351, "step": 56145 }, { "epoch": 0.23428828934082166, "grad_norm": 0.8075127304463837, "learning_rate": 4.13231100349901e-06, "loss": 0.0272, "step": 56150 }, { "epoch": 0.23430915205581193, "grad_norm": 0.9046576593196941, "learning_rate": 4.1321270271574445e-06, "loss": 0.045, "step": 56155 }, { "epoch": 0.2343300147708022, "grad_norm": 1.645930207514262, "learning_rate": 4.131943075386353e-06, "loss": 0.0322, "step": 56160 }, { "epoch": 0.2343508774857925, "grad_norm": 1.185145868682273, "learning_rate": 4.131759148180268e-06, "loss": 0.0462, "step": 56165 }, { "epoch": 0.23437174020078277, "grad_norm": 0.8910671821100579, "learning_rate": 4.131575245533721e-06, "loss": 0.0334, "step": 56170 }, { "epoch": 0.23439260291577305, "grad_norm": 3.341284873831044, "learning_rate": 4.131391367441248e-06, "loss": 0.0465, "step": 56175 }, { "epoch": 0.23441346563076332, "grad_norm": 1.2002677211202684, "learning_rate": 4.1312075138973865e-06, "loss": 0.0327, "step": 56180 }, { "epoch": 0.2344343283457536, "grad_norm": 1.1796042304122512, "learning_rate": 4.131023684896672e-06, "loss": 0.03, "step": 56185 }, { "epoch": 0.2344551910607439, "grad_norm": 1.0542223345576784, "learning_rate": 4.130839880433647e-06, "loss": 0.0289, "step": 56190 }, { "epoch": 0.23447605377573416, "grad_norm": 0.6920043930236651, "learning_rate": 4.130656100502851e-06, "loss": 0.0276, "step": 56195 }, { "epoch": 0.23449691649072443, "grad_norm": 1.4153594735531518, "learning_rate": 4.13047234509883e-06, "loss": 0.0292, "step": 56200 }, { "epoch": 0.2345177792057147, "grad_norm": 0.6299217277228378, "learning_rate": 4.130288614216126e-06, "loss": 0.0421, "step": 56205 }, { "epoch": 0.234538641920705, "grad_norm": 4.274629543723917, "learning_rate": 4.130104907849288e-06, "loss": 0.035, "step": 56210 }, { "epoch": 0.23455950463569528, "grad_norm": 0.6988559235238482, "learning_rate": 4.1299212259928635e-06, "loss": 0.0336, "step": 56215 }, { "epoch": 0.23458036735068555, "grad_norm": 0.7461176650587003, "learning_rate": 4.129737568641402e-06, "loss": 0.0278, "step": 56220 }, { "epoch": 0.23460123006567582, "grad_norm": 0.49794564848942047, "learning_rate": 4.129553935789457e-06, "loss": 0.0338, "step": 56225 }, { "epoch": 0.2346220927806661, "grad_norm": 0.9186325990631385, "learning_rate": 4.129370327431581e-06, "loss": 0.0443, "step": 56230 }, { "epoch": 0.2346429554956564, "grad_norm": 0.386927260751759, "learning_rate": 4.129186743562329e-06, "loss": 0.0516, "step": 56235 }, { "epoch": 0.23466381821064666, "grad_norm": 0.9284884800036166, "learning_rate": 4.1290031841762585e-06, "loss": 0.0597, "step": 56240 }, { "epoch": 0.23468468092563693, "grad_norm": 0.7246454421759514, "learning_rate": 4.128819649267927e-06, "loss": 0.0358, "step": 56245 }, { "epoch": 0.2347055436406272, "grad_norm": 6.9755192890708075, "learning_rate": 4.128636138831896e-06, "loss": 0.0246, "step": 56250 }, { "epoch": 0.2347264063556175, "grad_norm": 0.7430740748667932, "learning_rate": 4.128452652862727e-06, "loss": 0.0487, "step": 56255 }, { "epoch": 0.23474726907060778, "grad_norm": 0.8591446381739146, "learning_rate": 4.1282691913549835e-06, "loss": 0.0484, "step": 56260 }, { "epoch": 0.23476813178559805, "grad_norm": 0.34467907162516886, "learning_rate": 4.128085754303231e-06, "loss": 0.0368, "step": 56265 }, { "epoch": 0.23478899450058832, "grad_norm": 0.5806378953458011, "learning_rate": 4.127902341702037e-06, "loss": 0.0247, "step": 56270 }, { "epoch": 0.2348098572155786, "grad_norm": 0.9547544735768342, "learning_rate": 4.127718953545969e-06, "loss": 0.0337, "step": 56275 }, { "epoch": 0.2348307199305689, "grad_norm": 0.7418515182565749, "learning_rate": 4.1275355898295994e-06, "loss": 0.0274, "step": 56280 }, { "epoch": 0.23485158264555916, "grad_norm": 0.8905857292996278, "learning_rate": 4.127352250547498e-06, "loss": 0.0306, "step": 56285 }, { "epoch": 0.23487244536054944, "grad_norm": 1.196676955321664, "learning_rate": 4.127168935694241e-06, "loss": 0.037, "step": 56290 }, { "epoch": 0.2348933080755397, "grad_norm": 1.0321503883671854, "learning_rate": 4.126985645264401e-06, "loss": 0.0332, "step": 56295 }, { "epoch": 0.23491417079053, "grad_norm": 0.9875205734382331, "learning_rate": 4.126802379252558e-06, "loss": 0.0234, "step": 56300 }, { "epoch": 0.23493503350552028, "grad_norm": 0.7291411661697886, "learning_rate": 4.12661913765329e-06, "loss": 0.0337, "step": 56305 }, { "epoch": 0.23495589622051055, "grad_norm": 0.7682982743095371, "learning_rate": 4.126435920461176e-06, "loss": 0.0354, "step": 56310 }, { "epoch": 0.23497675893550082, "grad_norm": 1.1214043871154393, "learning_rate": 4.1262527276708e-06, "loss": 0.0278, "step": 56315 }, { "epoch": 0.2349976216504911, "grad_norm": 0.3566794143229693, "learning_rate": 4.1260695592767456e-06, "loss": 0.0248, "step": 56320 }, { "epoch": 0.2350184843654814, "grad_norm": 2.375876596095662, "learning_rate": 4.1258864152735985e-06, "loss": 0.0316, "step": 56325 }, { "epoch": 0.23503934708047167, "grad_norm": 0.8385938199700814, "learning_rate": 4.125703295655946e-06, "loss": 0.0292, "step": 56330 }, { "epoch": 0.23506020979546194, "grad_norm": 0.6730999547200601, "learning_rate": 4.125520200418375e-06, "loss": 0.0267, "step": 56335 }, { "epoch": 0.2350810725104522, "grad_norm": 0.6395601881269982, "learning_rate": 4.1253371295554796e-06, "loss": 0.0407, "step": 56340 }, { "epoch": 0.2351019352254425, "grad_norm": 0.9530662387278891, "learning_rate": 4.125154083061849e-06, "loss": 0.0307, "step": 56345 }, { "epoch": 0.23512279794043278, "grad_norm": 0.6007815535936332, "learning_rate": 4.12497106093208e-06, "loss": 0.0317, "step": 56350 }, { "epoch": 0.23514366065542305, "grad_norm": 1.0154871452075473, "learning_rate": 4.1247880631607655e-06, "loss": 0.0331, "step": 56355 }, { "epoch": 0.23516452337041333, "grad_norm": 1.0089520385153752, "learning_rate": 4.124605089742505e-06, "loss": 0.0379, "step": 56360 }, { "epoch": 0.2351853860854036, "grad_norm": 0.7544401568979832, "learning_rate": 4.124422140671897e-06, "loss": 0.0337, "step": 56365 }, { "epoch": 0.2352062488003939, "grad_norm": 0.8783858661851142, "learning_rate": 4.124239215943542e-06, "loss": 0.0374, "step": 56370 }, { "epoch": 0.23522711151538417, "grad_norm": 0.7577648621625923, "learning_rate": 4.124056315552041e-06, "loss": 0.0391, "step": 56375 }, { "epoch": 0.23524797423037444, "grad_norm": 0.5808552297370736, "learning_rate": 4.123873439492001e-06, "loss": 0.0269, "step": 56380 }, { "epoch": 0.2352688369453647, "grad_norm": 0.8185831001291869, "learning_rate": 4.123690587758026e-06, "loss": 0.0328, "step": 56385 }, { "epoch": 0.235289699660355, "grad_norm": 0.9346739649346301, "learning_rate": 4.1235077603447235e-06, "loss": 0.0334, "step": 56390 }, { "epoch": 0.23531056237534528, "grad_norm": 1.1124672980524881, "learning_rate": 4.123324957246702e-06, "loss": 0.0367, "step": 56395 }, { "epoch": 0.23533142509033556, "grad_norm": 1.7099417552979972, "learning_rate": 4.123142178458573e-06, "loss": 0.0506, "step": 56400 }, { "epoch": 0.23535228780532583, "grad_norm": 1.2281286603110617, "learning_rate": 4.12295942397495e-06, "loss": 0.0376, "step": 56405 }, { "epoch": 0.2353731505203161, "grad_norm": 0.7931880936214826, "learning_rate": 4.122776693790445e-06, "loss": 0.0336, "step": 56410 }, { "epoch": 0.2353940132353064, "grad_norm": 0.7756754305157936, "learning_rate": 4.122593987899675e-06, "loss": 0.0345, "step": 56415 }, { "epoch": 0.23541487595029667, "grad_norm": 1.0215201710732273, "learning_rate": 4.122411306297257e-06, "loss": 0.0365, "step": 56420 }, { "epoch": 0.23543573866528694, "grad_norm": 1.1589114145453807, "learning_rate": 4.1222286489778104e-06, "loss": 0.0398, "step": 56425 }, { "epoch": 0.23545660138027721, "grad_norm": 0.9307820994239833, "learning_rate": 4.122046015935956e-06, "loss": 0.0347, "step": 56430 }, { "epoch": 0.2354774640952675, "grad_norm": 0.6267329886359578, "learning_rate": 4.121863407166317e-06, "loss": 0.0403, "step": 56435 }, { "epoch": 0.23549832681025779, "grad_norm": 0.9843256695788818, "learning_rate": 4.121680822663516e-06, "loss": 0.0284, "step": 56440 }, { "epoch": 0.23551918952524806, "grad_norm": 0.9926021854524486, "learning_rate": 4.121498262422178e-06, "loss": 0.0341, "step": 56445 }, { "epoch": 0.23554005224023833, "grad_norm": 5.512138145394718, "learning_rate": 4.121315726436934e-06, "loss": 0.0332, "step": 56450 }, { "epoch": 0.2355609149552286, "grad_norm": 1.0362413624898636, "learning_rate": 4.1211332147024095e-06, "loss": 0.0414, "step": 56455 }, { "epoch": 0.2355817776702189, "grad_norm": 0.856742698095313, "learning_rate": 4.120950727213237e-06, "loss": 0.0381, "step": 56460 }, { "epoch": 0.23560264038520917, "grad_norm": 0.8926005768505061, "learning_rate": 4.12076826396405e-06, "loss": 0.0349, "step": 56465 }, { "epoch": 0.23562350310019944, "grad_norm": 0.48431938338243746, "learning_rate": 4.120585824949481e-06, "loss": 0.033, "step": 56470 }, { "epoch": 0.23564436581518972, "grad_norm": 1.9593261658676893, "learning_rate": 4.120403410164164e-06, "loss": 0.0363, "step": 56475 }, { "epoch": 0.23566522853018002, "grad_norm": 1.1810459086913172, "learning_rate": 4.120221019602741e-06, "loss": 0.0387, "step": 56480 }, { "epoch": 0.2356860912451703, "grad_norm": 0.5341398482751152, "learning_rate": 4.1200386532598475e-06, "loss": 0.0296, "step": 56485 }, { "epoch": 0.23570695396016056, "grad_norm": 0.5748289882908646, "learning_rate": 4.119856311130126e-06, "loss": 0.0277, "step": 56490 }, { "epoch": 0.23572781667515083, "grad_norm": 0.9367984719393802, "learning_rate": 4.119673993208217e-06, "loss": 0.1387, "step": 56495 }, { "epoch": 0.2357486793901411, "grad_norm": 1.8833716674139778, "learning_rate": 4.119491699488768e-06, "loss": 0.0324, "step": 56500 }, { "epoch": 0.2357695421051314, "grad_norm": 1.2943442338888744, "learning_rate": 4.119309429966421e-06, "loss": 0.0333, "step": 56505 }, { "epoch": 0.23579040482012167, "grad_norm": 0.4421512220088101, "learning_rate": 4.119127184635826e-06, "loss": 0.0381, "step": 56510 }, { "epoch": 0.23581126753511195, "grad_norm": 0.9471974314850952, "learning_rate": 4.118944963491631e-06, "loss": 0.0342, "step": 56515 }, { "epoch": 0.23583213025010222, "grad_norm": 1.1380312354093678, "learning_rate": 4.118762766528486e-06, "loss": 0.0339, "step": 56520 }, { "epoch": 0.23585299296509252, "grad_norm": 0.852855195210617, "learning_rate": 4.118580593741044e-06, "loss": 0.0305, "step": 56525 }, { "epoch": 0.2358738556800828, "grad_norm": 1.39237907561369, "learning_rate": 4.11839844512396e-06, "loss": 0.031, "step": 56530 }, { "epoch": 0.23589471839507306, "grad_norm": 1.3602460749407903, "learning_rate": 4.118216320671888e-06, "loss": 0.0371, "step": 56535 }, { "epoch": 0.23591558111006333, "grad_norm": 1.2788370269566807, "learning_rate": 4.118034220379487e-06, "loss": 0.0508, "step": 56540 }, { "epoch": 0.2359364438250536, "grad_norm": 0.9478304041864989, "learning_rate": 4.117852144241415e-06, "loss": 0.0325, "step": 56545 }, { "epoch": 0.2359573065400439, "grad_norm": 0.4927417841406773, "learning_rate": 4.117670092252332e-06, "loss": 0.024, "step": 56550 }, { "epoch": 0.23597816925503418, "grad_norm": 0.5809819794404352, "learning_rate": 4.117488064406902e-06, "loss": 0.0268, "step": 56555 }, { "epoch": 0.23599903197002445, "grad_norm": 0.7208996390217461, "learning_rate": 4.117306060699788e-06, "loss": 0.0357, "step": 56560 }, { "epoch": 0.23601989468501472, "grad_norm": 0.5250811029542458, "learning_rate": 4.117124081125656e-06, "loss": 0.0328, "step": 56565 }, { "epoch": 0.23604075740000502, "grad_norm": 0.7451123449433394, "learning_rate": 4.116942125679171e-06, "loss": 0.0389, "step": 56570 }, { "epoch": 0.2360616201149953, "grad_norm": 0.3797000167757058, "learning_rate": 4.116760194355005e-06, "loss": 0.0291, "step": 56575 }, { "epoch": 0.23608248282998556, "grad_norm": 0.8107886476659882, "learning_rate": 4.116578287147827e-06, "loss": 0.0463, "step": 56580 }, { "epoch": 0.23610334554497583, "grad_norm": 1.4565973189888344, "learning_rate": 4.116396404052309e-06, "loss": 0.031, "step": 56585 }, { "epoch": 0.2361242082599661, "grad_norm": 1.1411741663925028, "learning_rate": 4.116214545063127e-06, "loss": 0.0405, "step": 56590 }, { "epoch": 0.2361450709749564, "grad_norm": 0.6707556711317397, "learning_rate": 4.116032710174953e-06, "loss": 0.0259, "step": 56595 }, { "epoch": 0.23616593368994668, "grad_norm": 1.5413012497209115, "learning_rate": 4.1158508993824664e-06, "loss": 0.0421, "step": 56600 }, { "epoch": 0.23618679640493695, "grad_norm": 0.8595226880717962, "learning_rate": 4.115669112680345e-06, "loss": 0.0298, "step": 56605 }, { "epoch": 0.23620765911992722, "grad_norm": 1.399524368468523, "learning_rate": 4.115487350063271e-06, "loss": 0.0374, "step": 56610 }, { "epoch": 0.23622852183491752, "grad_norm": 0.6821434233682456, "learning_rate": 4.115305611525924e-06, "loss": 0.0319, "step": 56615 }, { "epoch": 0.2362493845499078, "grad_norm": 0.9108295431117595, "learning_rate": 4.115123897062989e-06, "loss": 0.0317, "step": 56620 }, { "epoch": 0.23627024726489806, "grad_norm": 0.9231762838765122, "learning_rate": 4.114942206669151e-06, "loss": 0.0355, "step": 56625 }, { "epoch": 0.23629110997988834, "grad_norm": 0.6914799104206559, "learning_rate": 4.114760540339098e-06, "loss": 0.0285, "step": 56630 }, { "epoch": 0.2363119726948786, "grad_norm": 0.7091740796727064, "learning_rate": 4.114578898067518e-06, "loss": 0.0294, "step": 56635 }, { "epoch": 0.2363328354098689, "grad_norm": 0.8072189600148234, "learning_rate": 4.1143972798491e-06, "loss": 0.0286, "step": 56640 }, { "epoch": 0.23635369812485918, "grad_norm": 0.8928412873062956, "learning_rate": 4.114215685678537e-06, "loss": 0.0349, "step": 56645 }, { "epoch": 0.23637456083984945, "grad_norm": 1.5248112432378962, "learning_rate": 4.114034115550524e-06, "loss": 0.0362, "step": 56650 }, { "epoch": 0.23639542355483972, "grad_norm": 1.0440700318787388, "learning_rate": 4.113852569459753e-06, "loss": 0.0232, "step": 56655 }, { "epoch": 0.23641628626983002, "grad_norm": 1.3861443419945239, "learning_rate": 4.113671047400923e-06, "loss": 0.0349, "step": 56660 }, { "epoch": 0.2364371489848203, "grad_norm": 1.0136273185825837, "learning_rate": 4.113489549368733e-06, "loss": 0.0402, "step": 56665 }, { "epoch": 0.23645801169981057, "grad_norm": 0.7097455548075498, "learning_rate": 4.113308075357881e-06, "loss": 0.0313, "step": 56670 }, { "epoch": 0.23647887441480084, "grad_norm": 0.7699475298169303, "learning_rate": 4.1131266253630705e-06, "loss": 0.0349, "step": 56675 }, { "epoch": 0.2364997371297911, "grad_norm": 0.7964148596058528, "learning_rate": 4.112945199379005e-06, "loss": 0.0314, "step": 56680 }, { "epoch": 0.2365205998447814, "grad_norm": 0.6147379926571102, "learning_rate": 4.112763797400388e-06, "loss": 0.0229, "step": 56685 }, { "epoch": 0.23654146255977168, "grad_norm": 1.0121000220635648, "learning_rate": 4.1125824194219275e-06, "loss": 0.0334, "step": 56690 }, { "epoch": 0.23656232527476195, "grad_norm": 1.017179683299771, "learning_rate": 4.11240106543833e-06, "loss": 0.0305, "step": 56695 }, { "epoch": 0.23658318798975222, "grad_norm": 1.1589026024452305, "learning_rate": 4.112219735444307e-06, "loss": 0.0335, "step": 56700 }, { "epoch": 0.23660405070474252, "grad_norm": 0.7937153584156849, "learning_rate": 4.11203842943457e-06, "loss": 0.0343, "step": 56705 }, { "epoch": 0.2366249134197328, "grad_norm": 0.7437190467031537, "learning_rate": 4.111857147403832e-06, "loss": 0.0302, "step": 56710 }, { "epoch": 0.23664577613472307, "grad_norm": 0.6716241064531944, "learning_rate": 4.111675889346808e-06, "loss": 0.0312, "step": 56715 }, { "epoch": 0.23666663884971334, "grad_norm": 0.959090831050845, "learning_rate": 4.111494655258214e-06, "loss": 0.034, "step": 56720 }, { "epoch": 0.2366875015647036, "grad_norm": 1.0502218286392528, "learning_rate": 4.111313445132768e-06, "loss": 0.0388, "step": 56725 }, { "epoch": 0.2367083642796939, "grad_norm": 0.8329746101595856, "learning_rate": 4.11113225896519e-06, "loss": 0.033, "step": 56730 }, { "epoch": 0.23672922699468418, "grad_norm": 0.8003032318764924, "learning_rate": 4.110951096750202e-06, "loss": 0.028, "step": 56735 }, { "epoch": 0.23675008970967445, "grad_norm": 0.475035698050295, "learning_rate": 4.110769958482525e-06, "loss": 0.022, "step": 56740 }, { "epoch": 0.23677095242466473, "grad_norm": 0.951945115156791, "learning_rate": 4.110588844156886e-06, "loss": 0.0344, "step": 56745 }, { "epoch": 0.23679181513965503, "grad_norm": 0.8871579958453361, "learning_rate": 4.110407753768008e-06, "loss": 0.0296, "step": 56750 }, { "epoch": 0.2368126778546453, "grad_norm": 0.7970140819025182, "learning_rate": 4.110226687310623e-06, "loss": 0.0404, "step": 56755 }, { "epoch": 0.23683354056963557, "grad_norm": 1.1692206996172185, "learning_rate": 4.110045644779457e-06, "loss": 0.0345, "step": 56760 }, { "epoch": 0.23685440328462584, "grad_norm": 1.0349589478820846, "learning_rate": 4.1098646261692436e-06, "loss": 0.0429, "step": 56765 }, { "epoch": 0.2368752659996161, "grad_norm": 0.8432702791358777, "learning_rate": 4.109683631474713e-06, "loss": 0.0278, "step": 56770 }, { "epoch": 0.2368961287146064, "grad_norm": 1.1961521509457531, "learning_rate": 4.109502660690601e-06, "loss": 0.0365, "step": 56775 }, { "epoch": 0.23691699142959669, "grad_norm": 1.2717041635235635, "learning_rate": 4.109321713811645e-06, "loss": 0.0351, "step": 56780 }, { "epoch": 0.23693785414458696, "grad_norm": 0.6418695906359048, "learning_rate": 4.109140790832579e-06, "loss": 0.0259, "step": 56785 }, { "epoch": 0.23695871685957723, "grad_norm": 0.7467667289792009, "learning_rate": 4.1089598917481455e-06, "loss": 0.0286, "step": 56790 }, { "epoch": 0.23697957957456753, "grad_norm": 0.6253924246784512, "learning_rate": 4.108779016553084e-06, "loss": 0.0296, "step": 56795 }, { "epoch": 0.2370004422895578, "grad_norm": 1.2673480382969815, "learning_rate": 4.108598165242137e-06, "loss": 0.0311, "step": 56800 }, { "epoch": 0.23702130500454807, "grad_norm": 1.2748496454715228, "learning_rate": 4.108417337810048e-06, "loss": 0.0337, "step": 56805 }, { "epoch": 0.23704216771953834, "grad_norm": 0.8538174965705684, "learning_rate": 4.108236534251563e-06, "loss": 0.0399, "step": 56810 }, { "epoch": 0.23706303043452862, "grad_norm": 0.5764369747810367, "learning_rate": 4.10805575456143e-06, "loss": 0.0308, "step": 56815 }, { "epoch": 0.23708389314951892, "grad_norm": 1.0955904375940182, "learning_rate": 4.107874998734397e-06, "loss": 0.0393, "step": 56820 }, { "epoch": 0.2371047558645092, "grad_norm": 1.2430362802449773, "learning_rate": 4.107694266765216e-06, "loss": 0.034, "step": 56825 }, { "epoch": 0.23712561857949946, "grad_norm": 0.60681569054482, "learning_rate": 4.107513558648638e-06, "loss": 0.0202, "step": 56830 }, { "epoch": 0.23714648129448973, "grad_norm": 1.047044469039861, "learning_rate": 4.107332874379416e-06, "loss": 0.0296, "step": 56835 }, { "epoch": 0.23716734400948003, "grad_norm": 1.3518511175038248, "learning_rate": 4.107152213952308e-06, "loss": 0.0386, "step": 56840 }, { "epoch": 0.2371882067244703, "grad_norm": 0.929485316039672, "learning_rate": 4.106971577362068e-06, "loss": 0.0318, "step": 56845 }, { "epoch": 0.23720906943946057, "grad_norm": 0.808019818713537, "learning_rate": 4.106790964603457e-06, "loss": 0.0296, "step": 56850 }, { "epoch": 0.23722993215445085, "grad_norm": 0.7800309665146042, "learning_rate": 4.106610375671232e-06, "loss": 0.0397, "step": 56855 }, { "epoch": 0.23725079486944112, "grad_norm": 1.0512327003040762, "learning_rate": 4.1064298105601595e-06, "loss": 0.0264, "step": 56860 }, { "epoch": 0.23727165758443142, "grad_norm": 0.4771421265327321, "learning_rate": 4.106249269264999e-06, "loss": 0.0358, "step": 56865 }, { "epoch": 0.2372925202994217, "grad_norm": 0.922026143902832, "learning_rate": 4.106068751780517e-06, "loss": 0.0418, "step": 56870 }, { "epoch": 0.23731338301441196, "grad_norm": 0.822379856386348, "learning_rate": 4.105888258101481e-06, "loss": 0.028, "step": 56875 }, { "epoch": 0.23733424572940223, "grad_norm": 0.9622731744377588, "learning_rate": 4.105707788222658e-06, "loss": 0.0377, "step": 56880 }, { "epoch": 0.23735510844439253, "grad_norm": 0.565284461329109, "learning_rate": 4.10552734213882e-06, "loss": 0.0333, "step": 56885 }, { "epoch": 0.2373759711593828, "grad_norm": 0.7047274907823596, "learning_rate": 4.105346919844735e-06, "loss": 0.0408, "step": 56890 }, { "epoch": 0.23739683387437308, "grad_norm": 0.45604350218066214, "learning_rate": 4.105166521335179e-06, "loss": 0.0309, "step": 56895 }, { "epoch": 0.23741769658936335, "grad_norm": 0.971808463061305, "learning_rate": 4.104986146604925e-06, "loss": 0.0339, "step": 56900 }, { "epoch": 0.23743855930435362, "grad_norm": 0.6963233790903658, "learning_rate": 4.10480579564875e-06, "loss": 0.0331, "step": 56905 }, { "epoch": 0.23745942201934392, "grad_norm": 0.7347371196446912, "learning_rate": 4.104625468461432e-06, "loss": 0.0393, "step": 56910 }, { "epoch": 0.2374802847343342, "grad_norm": 0.9479956107443415, "learning_rate": 4.104445165037752e-06, "loss": 0.0336, "step": 56915 }, { "epoch": 0.23750114744932446, "grad_norm": 1.2377703727998124, "learning_rate": 4.104264885372487e-06, "loss": 0.0289, "step": 56920 }, { "epoch": 0.23752201016431473, "grad_norm": 0.6429795878653694, "learning_rate": 4.104084629460425e-06, "loss": 0.0292, "step": 56925 }, { "epoch": 0.23754287287930503, "grad_norm": 0.592185720369109, "learning_rate": 4.1039043972963476e-06, "loss": 0.0264, "step": 56930 }, { "epoch": 0.2375637355942953, "grad_norm": 1.0429534593524215, "learning_rate": 4.10372418887504e-06, "loss": 0.0297, "step": 56935 }, { "epoch": 0.23758459830928558, "grad_norm": 0.7753694696355216, "learning_rate": 4.10354400419129e-06, "loss": 0.0204, "step": 56940 }, { "epoch": 0.23760546102427585, "grad_norm": 1.0378264397981536, "learning_rate": 4.103363843239889e-06, "loss": 0.0365, "step": 56945 }, { "epoch": 0.23762632373926612, "grad_norm": 1.3194730326681408, "learning_rate": 4.103183706015626e-06, "loss": 0.0316, "step": 56950 }, { "epoch": 0.23764718645425642, "grad_norm": 1.2818208770744568, "learning_rate": 4.103003592513293e-06, "loss": 0.0321, "step": 56955 }, { "epoch": 0.2376680491692467, "grad_norm": 1.4491974605205094, "learning_rate": 4.102823502727685e-06, "loss": 0.0333, "step": 56960 }, { "epoch": 0.23768891188423696, "grad_norm": 1.5850683464362654, "learning_rate": 4.102643436653597e-06, "loss": 0.0395, "step": 56965 }, { "epoch": 0.23770977459922724, "grad_norm": 0.8183817414116009, "learning_rate": 4.102463394285826e-06, "loss": 0.0309, "step": 56970 }, { "epoch": 0.23773063731421754, "grad_norm": 0.5702444099045259, "learning_rate": 4.102283375619172e-06, "loss": 0.0341, "step": 56975 }, { "epoch": 0.2377515000292078, "grad_norm": 0.5671040817552689, "learning_rate": 4.102103380648433e-06, "loss": 0.0325, "step": 56980 }, { "epoch": 0.23777236274419808, "grad_norm": 1.1121514334300728, "learning_rate": 4.101923409368413e-06, "loss": 0.0327, "step": 56985 }, { "epoch": 0.23779322545918835, "grad_norm": 0.81659949961696, "learning_rate": 4.1017434617739146e-06, "loss": 0.0359, "step": 56990 }, { "epoch": 0.23781408817417862, "grad_norm": 1.347477809343321, "learning_rate": 4.1015635378597435e-06, "loss": 0.0326, "step": 56995 }, { "epoch": 0.23783495088916892, "grad_norm": 0.7295870843450188, "learning_rate": 4.101383637620707e-06, "loss": 0.0301, "step": 57000 }, { "epoch": 0.2378558136041592, "grad_norm": 0.6094292768978573, "learning_rate": 4.101203761051612e-06, "loss": 0.0307, "step": 57005 }, { "epoch": 0.23787667631914947, "grad_norm": 0.5260857913644884, "learning_rate": 4.1010239081472695e-06, "loss": 0.0323, "step": 57010 }, { "epoch": 0.23789753903413974, "grad_norm": 1.1059341615156095, "learning_rate": 4.10084407890249e-06, "loss": 0.0334, "step": 57015 }, { "epoch": 0.23791840174913004, "grad_norm": 0.8120586403301027, "learning_rate": 4.100664273312087e-06, "loss": 0.0418, "step": 57020 }, { "epoch": 0.2379392644641203, "grad_norm": 1.0076264949454115, "learning_rate": 4.100484491370876e-06, "loss": 0.0367, "step": 57025 }, { "epoch": 0.23796012717911058, "grad_norm": 0.9053112742850665, "learning_rate": 4.100304733073672e-06, "loss": 0.0296, "step": 57030 }, { "epoch": 0.23798098989410085, "grad_norm": 0.8101490779776606, "learning_rate": 4.1001249984152945e-06, "loss": 0.0377, "step": 57035 }, { "epoch": 0.23800185260909112, "grad_norm": 0.8953633951867092, "learning_rate": 4.09994528739056e-06, "loss": 0.0429, "step": 57040 }, { "epoch": 0.23802271532408142, "grad_norm": 0.46624165701561654, "learning_rate": 4.099765599994294e-06, "loss": 0.0471, "step": 57045 }, { "epoch": 0.2380435780390717, "grad_norm": 0.8410044579076297, "learning_rate": 4.099585936221316e-06, "loss": 0.0297, "step": 57050 }, { "epoch": 0.23806444075406197, "grad_norm": 2.952150177075026, "learning_rate": 4.099406296066451e-06, "loss": 0.0808, "step": 57055 }, { "epoch": 0.23808530346905224, "grad_norm": 0.47929793761863515, "learning_rate": 4.099226679524525e-06, "loss": 0.0358, "step": 57060 }, { "epoch": 0.23810616618404254, "grad_norm": 0.9673806352508483, "learning_rate": 4.099047086590365e-06, "loss": 0.035, "step": 57065 }, { "epoch": 0.2381270288990328, "grad_norm": 1.5286581014045928, "learning_rate": 4.0988675172588e-06, "loss": 0.0304, "step": 57070 }, { "epoch": 0.23814789161402308, "grad_norm": 1.3522933957857144, "learning_rate": 4.098687971524662e-06, "loss": 0.0393, "step": 57075 }, { "epoch": 0.23816875432901335, "grad_norm": 1.0154730615140926, "learning_rate": 4.098508449382781e-06, "loss": 0.0336, "step": 57080 }, { "epoch": 0.23818961704400363, "grad_norm": 1.0796284105836873, "learning_rate": 4.098328950827993e-06, "loss": 0.0299, "step": 57085 }, { "epoch": 0.23821047975899393, "grad_norm": 1.0937196639061226, "learning_rate": 4.09814947585513e-06, "loss": 0.0354, "step": 57090 }, { "epoch": 0.2382313424739842, "grad_norm": 0.8526602805983811, "learning_rate": 4.0979700244590335e-06, "loss": 0.0398, "step": 57095 }, { "epoch": 0.23825220518897447, "grad_norm": 1.1034866389069393, "learning_rate": 4.097790596634538e-06, "loss": 0.0377, "step": 57100 }, { "epoch": 0.23827306790396474, "grad_norm": 0.592171730082363, "learning_rate": 4.097611192376486e-06, "loss": 0.0271, "step": 57105 }, { "epoch": 0.23829393061895504, "grad_norm": 4.129924355296742, "learning_rate": 4.097431811679719e-06, "loss": 0.0218, "step": 57110 }, { "epoch": 0.2383147933339453, "grad_norm": 1.1861925846920651, "learning_rate": 4.0972524545390774e-06, "loss": 0.025, "step": 57115 }, { "epoch": 0.23833565604893558, "grad_norm": 0.5622951320926481, "learning_rate": 4.0970731209494105e-06, "loss": 0.036, "step": 57120 }, { "epoch": 0.23835651876392586, "grad_norm": 1.6743475730746746, "learning_rate": 4.096893810905561e-06, "loss": 0.052, "step": 57125 }, { "epoch": 0.23837738147891613, "grad_norm": 0.7573670852763884, "learning_rate": 4.096714524402379e-06, "loss": 0.0315, "step": 57130 }, { "epoch": 0.23839824419390643, "grad_norm": 0.6010421324388318, "learning_rate": 4.096535261434713e-06, "loss": 0.0321, "step": 57135 }, { "epoch": 0.2384191069088967, "grad_norm": 0.6809489601422692, "learning_rate": 4.096356021997414e-06, "loss": 0.0276, "step": 57140 }, { "epoch": 0.23843996962388697, "grad_norm": 0.842595563688044, "learning_rate": 4.096176806085337e-06, "loss": 0.0328, "step": 57145 }, { "epoch": 0.23846083233887724, "grad_norm": 0.802627767389502, "learning_rate": 4.095997613693333e-06, "loss": 0.0397, "step": 57150 }, { "epoch": 0.23848169505386754, "grad_norm": 0.7505615609151922, "learning_rate": 4.0958184448162605e-06, "loss": 0.0281, "step": 57155 }, { "epoch": 0.23850255776885781, "grad_norm": 0.6150956048042878, "learning_rate": 4.095639299448975e-06, "loss": 0.0292, "step": 57160 }, { "epoch": 0.2385234204838481, "grad_norm": 0.5424752593751869, "learning_rate": 4.095460177586336e-06, "loss": 0.0445, "step": 57165 }, { "epoch": 0.23854428319883836, "grad_norm": 1.1432307044319967, "learning_rate": 4.095281079223206e-06, "loss": 0.033, "step": 57170 }, { "epoch": 0.23856514591382863, "grad_norm": 0.7300526620380218, "learning_rate": 4.095102004354445e-06, "loss": 0.0349, "step": 57175 }, { "epoch": 0.23858600862881893, "grad_norm": 1.4073105538427646, "learning_rate": 4.094922952974918e-06, "loss": 0.0345, "step": 57180 }, { "epoch": 0.2386068713438092, "grad_norm": 0.751237109094994, "learning_rate": 4.09474392507949e-06, "loss": 0.023, "step": 57185 }, { "epoch": 0.23862773405879947, "grad_norm": 0.5833058670008053, "learning_rate": 4.0945649206630264e-06, "loss": 0.0248, "step": 57190 }, { "epoch": 0.23864859677378975, "grad_norm": 1.0819484790443636, "learning_rate": 4.0943859397203965e-06, "loss": 0.0361, "step": 57195 }, { "epoch": 0.23866945948878004, "grad_norm": 1.0165443966664383, "learning_rate": 4.094206982246472e-06, "loss": 0.0335, "step": 57200 }, { "epoch": 0.23869032220377032, "grad_norm": 1.0445058984392555, "learning_rate": 4.0940280482361225e-06, "loss": 0.0247, "step": 57205 }, { "epoch": 0.2387111849187606, "grad_norm": 1.0462447814416813, "learning_rate": 4.093849137684222e-06, "loss": 0.028, "step": 57210 }, { "epoch": 0.23873204763375086, "grad_norm": 2.087763863887082, "learning_rate": 4.093670250585646e-06, "loss": 0.0341, "step": 57215 }, { "epoch": 0.23875291034874113, "grad_norm": 0.6936114756807977, "learning_rate": 4.093491386935269e-06, "loss": 0.0332, "step": 57220 }, { "epoch": 0.23877377306373143, "grad_norm": 0.9895042285876595, "learning_rate": 4.09331254672797e-06, "loss": 0.0489, "step": 57225 }, { "epoch": 0.2387946357787217, "grad_norm": 0.8024251586618518, "learning_rate": 4.093133729958628e-06, "loss": 0.0369, "step": 57230 }, { "epoch": 0.23881549849371198, "grad_norm": 0.8819948503700122, "learning_rate": 4.092954936622124e-06, "loss": 0.0262, "step": 57235 }, { "epoch": 0.23883636120870225, "grad_norm": 0.7837642031929155, "learning_rate": 4.092776166713342e-06, "loss": 0.0301, "step": 57240 }, { "epoch": 0.23885722392369255, "grad_norm": 1.1671524669744664, "learning_rate": 4.092597420227164e-06, "loss": 0.0491, "step": 57245 }, { "epoch": 0.23887808663868282, "grad_norm": 0.6437478369628359, "learning_rate": 4.092418697158476e-06, "loss": 0.0286, "step": 57250 }, { "epoch": 0.2388989493536731, "grad_norm": 0.746681269473092, "learning_rate": 4.092239997502167e-06, "loss": 0.0282, "step": 57255 }, { "epoch": 0.23891981206866336, "grad_norm": 1.3558461054874835, "learning_rate": 4.092061321253124e-06, "loss": 0.0369, "step": 57260 }, { "epoch": 0.23894067478365363, "grad_norm": 1.0523189561182065, "learning_rate": 4.091882668406238e-06, "loss": 0.0417, "step": 57265 }, { "epoch": 0.23896153749864393, "grad_norm": 0.8404514530758236, "learning_rate": 4.091704038956401e-06, "loss": 0.0331, "step": 57270 }, { "epoch": 0.2389824002136342, "grad_norm": 0.5823860311751505, "learning_rate": 4.091525432898508e-06, "loss": 0.0347, "step": 57275 }, { "epoch": 0.23900326292862448, "grad_norm": 1.2130500155727741, "learning_rate": 4.091346850227451e-06, "loss": 0.035, "step": 57280 }, { "epoch": 0.23902412564361475, "grad_norm": 1.016651705355884, "learning_rate": 4.091168290938128e-06, "loss": 0.0374, "step": 57285 }, { "epoch": 0.23904498835860505, "grad_norm": 0.6877854812942642, "learning_rate": 4.090989755025438e-06, "loss": 0.0367, "step": 57290 }, { "epoch": 0.23906585107359532, "grad_norm": 0.7739900886826552, "learning_rate": 4.090811242484279e-06, "loss": 0.0256, "step": 57295 }, { "epoch": 0.2390867137885856, "grad_norm": 2.6079821365455778, "learning_rate": 4.090632753309555e-06, "loss": 0.0335, "step": 57300 }, { "epoch": 0.23910757650357586, "grad_norm": 0.8368768693368946, "learning_rate": 4.090454287496166e-06, "loss": 0.0286, "step": 57305 }, { "epoch": 0.23912843921856614, "grad_norm": 0.5920806981858973, "learning_rate": 4.090275845039019e-06, "loss": 0.0306, "step": 57310 }, { "epoch": 0.23914930193355644, "grad_norm": 0.7162680231637165, "learning_rate": 4.090097425933018e-06, "loss": 0.0292, "step": 57315 }, { "epoch": 0.2391701646485467, "grad_norm": 1.082656191646809, "learning_rate": 4.089919030173071e-06, "loss": 0.0272, "step": 57320 }, { "epoch": 0.23919102736353698, "grad_norm": 0.6101924631920831, "learning_rate": 4.089740657754088e-06, "loss": 0.0243, "step": 57325 }, { "epoch": 0.23921189007852725, "grad_norm": 0.7334744777787215, "learning_rate": 4.089562308670978e-06, "loss": 0.0302, "step": 57330 }, { "epoch": 0.23923275279351755, "grad_norm": 0.7694745694057283, "learning_rate": 4.089383982918654e-06, "loss": 0.0298, "step": 57335 }, { "epoch": 0.23925361550850782, "grad_norm": 0.8316290437148126, "learning_rate": 4.08920568049203e-06, "loss": 0.0268, "step": 57340 }, { "epoch": 0.2392744782234981, "grad_norm": 1.334052411913936, "learning_rate": 4.089027401386021e-06, "loss": 0.0369, "step": 57345 }, { "epoch": 0.23929534093848837, "grad_norm": 0.7805979321283324, "learning_rate": 4.088849145595544e-06, "loss": 0.0336, "step": 57350 }, { "epoch": 0.23931620365347864, "grad_norm": 1.1350758694666452, "learning_rate": 4.088670913115518e-06, "loss": 0.0337, "step": 57355 }, { "epoch": 0.23933706636846894, "grad_norm": 0.6719512008437486, "learning_rate": 4.0884927039408615e-06, "loss": 0.0295, "step": 57360 }, { "epoch": 0.2393579290834592, "grad_norm": 1.3329375625188071, "learning_rate": 4.088314518066497e-06, "loss": 0.0393, "step": 57365 }, { "epoch": 0.23937879179844948, "grad_norm": 1.0990729350093655, "learning_rate": 4.088136355487347e-06, "loss": 0.0419, "step": 57370 }, { "epoch": 0.23939965451343975, "grad_norm": 0.5708730209463316, "learning_rate": 4.087958216198337e-06, "loss": 0.0291, "step": 57375 }, { "epoch": 0.23942051722843005, "grad_norm": 2.04541239755116, "learning_rate": 4.087780100194392e-06, "loss": 0.0392, "step": 57380 }, { "epoch": 0.23944137994342032, "grad_norm": 0.6895420438300105, "learning_rate": 4.08760200747044e-06, "loss": 0.0284, "step": 57385 }, { "epoch": 0.2394622426584106, "grad_norm": 0.6431558702760284, "learning_rate": 4.0874239380214116e-06, "loss": 0.0314, "step": 57390 }, { "epoch": 0.23948310537340087, "grad_norm": 0.6606797113755861, "learning_rate": 4.087245891842235e-06, "loss": 0.0331, "step": 57395 }, { "epoch": 0.23950396808839114, "grad_norm": 1.2648318338500222, "learning_rate": 4.087067868927845e-06, "loss": 0.0443, "step": 57400 }, { "epoch": 0.23952483080338144, "grad_norm": 0.9220595272771028, "learning_rate": 4.086889869273173e-06, "loss": 0.0243, "step": 57405 }, { "epoch": 0.2395456935183717, "grad_norm": 0.6406329654801922, "learning_rate": 4.086711892873158e-06, "loss": 0.0299, "step": 57410 }, { "epoch": 0.23956655623336198, "grad_norm": 1.2233126141166113, "learning_rate": 4.0865339397227336e-06, "loss": 0.0419, "step": 57415 }, { "epoch": 0.23958741894835225, "grad_norm": 0.7278152088655604, "learning_rate": 4.086356009816839e-06, "loss": 0.0333, "step": 57420 }, { "epoch": 0.23960828166334255, "grad_norm": 1.05390076428266, "learning_rate": 4.086178103150416e-06, "loss": 0.0375, "step": 57425 }, { "epoch": 0.23962914437833283, "grad_norm": 0.6530658230137583, "learning_rate": 4.086000219718404e-06, "loss": 0.0296, "step": 57430 }, { "epoch": 0.2396500070933231, "grad_norm": 0.9089102277259429, "learning_rate": 4.085822359515747e-06, "loss": 0.0311, "step": 57435 }, { "epoch": 0.23967086980831337, "grad_norm": 2.43633865539716, "learning_rate": 4.08564452253739e-06, "loss": 0.0362, "step": 57440 }, { "epoch": 0.23969173252330364, "grad_norm": 0.9609749749967275, "learning_rate": 4.085466708778279e-06, "loss": 0.0357, "step": 57445 }, { "epoch": 0.23971259523829394, "grad_norm": 0.8590683247922403, "learning_rate": 4.085288918233361e-06, "loss": 0.0316, "step": 57450 }, { "epoch": 0.2397334579532842, "grad_norm": 0.5761580158389173, "learning_rate": 4.0851111508975865e-06, "loss": 0.0267, "step": 57455 }, { "epoch": 0.23975432066827448, "grad_norm": 0.998830107794971, "learning_rate": 4.0849334067659066e-06, "loss": 0.0366, "step": 57460 }, { "epoch": 0.23977518338326476, "grad_norm": 1.0691057316008412, "learning_rate": 4.0847556858332715e-06, "loss": 0.0321, "step": 57465 }, { "epoch": 0.23979604609825506, "grad_norm": 0.9128189544748793, "learning_rate": 4.0845779880946365e-06, "loss": 0.0291, "step": 57470 }, { "epoch": 0.23981690881324533, "grad_norm": 1.474795780637868, "learning_rate": 4.084400313544958e-06, "loss": 0.0265, "step": 57475 }, { "epoch": 0.2398377715282356, "grad_norm": 0.781776546773773, "learning_rate": 4.0842226621791916e-06, "loss": 0.0393, "step": 57480 }, { "epoch": 0.23985863424322587, "grad_norm": 0.8852688713095461, "learning_rate": 4.084045033992296e-06, "loss": 0.0308, "step": 57485 }, { "epoch": 0.23987949695821614, "grad_norm": 0.7949121594480693, "learning_rate": 4.083867428979231e-06, "loss": 0.0314, "step": 57490 }, { "epoch": 0.23990035967320644, "grad_norm": 0.8171526107075344, "learning_rate": 4.083689847134959e-06, "loss": 0.0297, "step": 57495 }, { "epoch": 0.23992122238819671, "grad_norm": 1.080244935593125, "learning_rate": 4.083512288454442e-06, "loss": 0.0399, "step": 57500 }, { "epoch": 0.239942085103187, "grad_norm": 1.0077462165332511, "learning_rate": 4.083334752932645e-06, "loss": 0.0427, "step": 57505 }, { "epoch": 0.23996294781817726, "grad_norm": 1.15897626514407, "learning_rate": 4.0831572405645365e-06, "loss": 0.0319, "step": 57510 }, { "epoch": 0.23998381053316756, "grad_norm": 0.6412986016254415, "learning_rate": 4.082979751345081e-06, "loss": 0.0313, "step": 57515 }, { "epoch": 0.24000467324815783, "grad_norm": 1.28300821671755, "learning_rate": 4.082802285269249e-06, "loss": 0.0355, "step": 57520 }, { "epoch": 0.2400255359631481, "grad_norm": 1.0955636767366275, "learning_rate": 4.082624842332012e-06, "loss": 0.029, "step": 57525 }, { "epoch": 0.24004639867813837, "grad_norm": 0.9041164691467788, "learning_rate": 4.08244742252834e-06, "loss": 0.0457, "step": 57530 }, { "epoch": 0.24006726139312864, "grad_norm": 0.9488540502603944, "learning_rate": 4.082270025853209e-06, "loss": 0.0253, "step": 57535 }, { "epoch": 0.24008812410811894, "grad_norm": 1.0961183893511894, "learning_rate": 4.082092652301594e-06, "loss": 0.0378, "step": 57540 }, { "epoch": 0.24010898682310922, "grad_norm": 1.0243117663781058, "learning_rate": 4.0819153018684715e-06, "loss": 0.0453, "step": 57545 }, { "epoch": 0.2401298495380995, "grad_norm": 0.5373319884210759, "learning_rate": 4.08173797454882e-06, "loss": 0.0298, "step": 57550 }, { "epoch": 0.24015071225308976, "grad_norm": 0.5491757281476651, "learning_rate": 4.0815606703376196e-06, "loss": 0.0322, "step": 57555 }, { "epoch": 0.24017157496808006, "grad_norm": 0.6946497902329308, "learning_rate": 4.08138338922985e-06, "loss": 0.0279, "step": 57560 }, { "epoch": 0.24019243768307033, "grad_norm": 0.7750284982022593, "learning_rate": 4.081206131220497e-06, "loss": 0.0392, "step": 57565 }, { "epoch": 0.2402133003980606, "grad_norm": 1.058478029898316, "learning_rate": 4.081028896304544e-06, "loss": 0.03, "step": 57570 }, { "epoch": 0.24023416311305087, "grad_norm": 0.7351138494792834, "learning_rate": 4.080851684476978e-06, "loss": 0.0306, "step": 57575 }, { "epoch": 0.24025502582804115, "grad_norm": 0.6229395572025344, "learning_rate": 4.0806744957327835e-06, "loss": 0.0387, "step": 57580 }, { "epoch": 0.24027588854303145, "grad_norm": 0.8658356571997344, "learning_rate": 4.0804973300669524e-06, "loss": 0.0494, "step": 57585 }, { "epoch": 0.24029675125802172, "grad_norm": 0.7371120436669818, "learning_rate": 4.080320187474474e-06, "loss": 0.0309, "step": 57590 }, { "epoch": 0.240317613973012, "grad_norm": 0.7842696138231, "learning_rate": 4.080143067950342e-06, "loss": 0.0253, "step": 57595 }, { "epoch": 0.24033847668800226, "grad_norm": 0.9272635557044383, "learning_rate": 4.079965971489548e-06, "loss": 0.028, "step": 57600 }, { "epoch": 0.24035933940299256, "grad_norm": 0.4581221322639126, "learning_rate": 4.079788898087088e-06, "loss": 0.0309, "step": 57605 }, { "epoch": 0.24038020211798283, "grad_norm": 0.5960417842888281, "learning_rate": 4.07961184773796e-06, "loss": 0.0389, "step": 57610 }, { "epoch": 0.2404010648329731, "grad_norm": 1.4059211161930756, "learning_rate": 4.07943482043716e-06, "loss": 0.0404, "step": 57615 }, { "epoch": 0.24042192754796338, "grad_norm": 1.007262239239792, "learning_rate": 4.079257816179689e-06, "loss": 0.0295, "step": 57620 }, { "epoch": 0.24044279026295365, "grad_norm": 0.8180956114497393, "learning_rate": 4.079080834960548e-06, "loss": 0.0437, "step": 57625 }, { "epoch": 0.24046365297794395, "grad_norm": 0.5869507341989564, "learning_rate": 4.0789038767747405e-06, "loss": 0.0302, "step": 57630 }, { "epoch": 0.24048451569293422, "grad_norm": 0.625229708605132, "learning_rate": 4.07872694161727e-06, "loss": 0.0304, "step": 57635 }, { "epoch": 0.2405053784079245, "grad_norm": 1.2097763055348694, "learning_rate": 4.078550029483141e-06, "loss": 0.0306, "step": 57640 }, { "epoch": 0.24052624112291476, "grad_norm": 0.6882399129155901, "learning_rate": 4.0783731403673634e-06, "loss": 0.0274, "step": 57645 }, { "epoch": 0.24054710383790506, "grad_norm": 0.5121365420697184, "learning_rate": 4.078196274264945e-06, "loss": 0.0277, "step": 57650 }, { "epoch": 0.24056796655289533, "grad_norm": 0.690622799130809, "learning_rate": 4.0780194311708944e-06, "loss": 0.0263, "step": 57655 }, { "epoch": 0.2405888292678856, "grad_norm": 1.300595493960876, "learning_rate": 4.077842611080227e-06, "loss": 0.0362, "step": 57660 }, { "epoch": 0.24060969198287588, "grad_norm": 0.701267873639345, "learning_rate": 4.077665813987954e-06, "loss": 0.0378, "step": 57665 }, { "epoch": 0.24063055469786615, "grad_norm": 0.9625408341490574, "learning_rate": 4.07748903988909e-06, "loss": 0.031, "step": 57670 }, { "epoch": 0.24065141741285645, "grad_norm": 0.5978343579778875, "learning_rate": 4.077312288778652e-06, "loss": 0.0451, "step": 57675 }, { "epoch": 0.24067228012784672, "grad_norm": 0.6038788405007018, "learning_rate": 4.077135560651658e-06, "loss": 0.0272, "step": 57680 }, { "epoch": 0.240693142842837, "grad_norm": 1.067115853073573, "learning_rate": 4.076958855503127e-06, "loss": 0.0722, "step": 57685 }, { "epoch": 0.24071400555782727, "grad_norm": 0.958582875206541, "learning_rate": 4.07678217332808e-06, "loss": 0.031, "step": 57690 }, { "epoch": 0.24073486827281756, "grad_norm": 1.201207697337393, "learning_rate": 4.07660551412154e-06, "loss": 0.0318, "step": 57695 }, { "epoch": 0.24075573098780784, "grad_norm": 0.899519281738958, "learning_rate": 4.076428877878531e-06, "loss": 0.0285, "step": 57700 }, { "epoch": 0.2407765937027981, "grad_norm": 0.9456308518460717, "learning_rate": 4.0762522645940775e-06, "loss": 0.028, "step": 57705 }, { "epoch": 0.24079745641778838, "grad_norm": 0.6054681980284466, "learning_rate": 4.076075674263208e-06, "loss": 0.0293, "step": 57710 }, { "epoch": 0.24081831913277865, "grad_norm": 0.38308343500330966, "learning_rate": 4.075899106880949e-06, "loss": 0.027, "step": 57715 }, { "epoch": 0.24083918184776895, "grad_norm": 0.4119703567248704, "learning_rate": 4.075722562442333e-06, "loss": 0.0275, "step": 57720 }, { "epoch": 0.24086004456275922, "grad_norm": 0.44517582828416624, "learning_rate": 4.075546040942389e-06, "loss": 0.0388, "step": 57725 }, { "epoch": 0.2408809072777495, "grad_norm": 0.7018369686200904, "learning_rate": 4.075369542376151e-06, "loss": 0.0383, "step": 57730 }, { "epoch": 0.24090176999273977, "grad_norm": 0.5389236428562252, "learning_rate": 4.075193066738654e-06, "loss": 0.027, "step": 57735 }, { "epoch": 0.24092263270773007, "grad_norm": 1.6040977776402434, "learning_rate": 4.075016614024933e-06, "loss": 0.0367, "step": 57740 }, { "epoch": 0.24094349542272034, "grad_norm": 1.1361439601163266, "learning_rate": 4.074840184230026e-06, "loss": 0.0397, "step": 57745 }, { "epoch": 0.2409643581377106, "grad_norm": 0.8010044449159273, "learning_rate": 4.074663777348974e-06, "loss": 0.0297, "step": 57750 }, { "epoch": 0.24098522085270088, "grad_norm": 0.8519360799031919, "learning_rate": 4.074487393376814e-06, "loss": 0.0291, "step": 57755 }, { "epoch": 0.24100608356769115, "grad_norm": 0.6074929380165273, "learning_rate": 4.07431103230859e-06, "loss": 0.026, "step": 57760 }, { "epoch": 0.24102694628268145, "grad_norm": 0.7057490826755443, "learning_rate": 4.074134694139344e-06, "loss": 0.035, "step": 57765 }, { "epoch": 0.24104780899767173, "grad_norm": 1.2751825757946087, "learning_rate": 4.073958378864124e-06, "loss": 0.048, "step": 57770 }, { "epoch": 0.241068671712662, "grad_norm": 1.170572186180124, "learning_rate": 4.073782086477975e-06, "loss": 0.0313, "step": 57775 }, { "epoch": 0.24108953442765227, "grad_norm": 0.7455034983629895, "learning_rate": 4.073605816975943e-06, "loss": 0.0436, "step": 57780 }, { "epoch": 0.24111039714264257, "grad_norm": 0.7510412011202473, "learning_rate": 4.073429570353082e-06, "loss": 0.0303, "step": 57785 }, { "epoch": 0.24113125985763284, "grad_norm": 0.6207142553902626, "learning_rate": 4.073253346604438e-06, "loss": 0.0351, "step": 57790 }, { "epoch": 0.2411521225726231, "grad_norm": 0.8657729476876764, "learning_rate": 4.073077145725067e-06, "loss": 0.0293, "step": 57795 }, { "epoch": 0.24117298528761338, "grad_norm": 1.0731569690927343, "learning_rate": 4.072900967710021e-06, "loss": 0.0285, "step": 57800 }, { "epoch": 0.24119384800260366, "grad_norm": 0.9465082365232568, "learning_rate": 4.0727248125543574e-06, "loss": 0.0338, "step": 57805 }, { "epoch": 0.24121471071759396, "grad_norm": 0.6384554178701736, "learning_rate": 4.072548680253133e-06, "loss": 0.0275, "step": 57810 }, { "epoch": 0.24123557343258423, "grad_norm": 1.0884602095401652, "learning_rate": 4.072372570801405e-06, "loss": 0.0276, "step": 57815 }, { "epoch": 0.2412564361475745, "grad_norm": 0.6145675567693135, "learning_rate": 4.072196484194234e-06, "loss": 0.0315, "step": 57820 }, { "epoch": 0.24127729886256477, "grad_norm": 1.9184386021417879, "learning_rate": 4.072020420426682e-06, "loss": 0.0371, "step": 57825 }, { "epoch": 0.24129816157755507, "grad_norm": 0.7876468962696037, "learning_rate": 4.071844379493811e-06, "loss": 0.0267, "step": 57830 }, { "epoch": 0.24131902429254534, "grad_norm": 0.4471975291963401, "learning_rate": 4.071668361390686e-06, "loss": 0.0209, "step": 57835 }, { "epoch": 0.24133988700753561, "grad_norm": 0.7703000941467514, "learning_rate": 4.071492366112374e-06, "loss": 0.0261, "step": 57840 }, { "epoch": 0.24136074972252589, "grad_norm": 0.8433030271268895, "learning_rate": 4.071316393653941e-06, "loss": 0.031, "step": 57845 }, { "epoch": 0.24138161243751616, "grad_norm": 0.7043837223755308, "learning_rate": 4.071140444010455e-06, "loss": 0.0306, "step": 57850 }, { "epoch": 0.24140247515250646, "grad_norm": 0.563820434254424, "learning_rate": 4.0709645171769905e-06, "loss": 0.0305, "step": 57855 }, { "epoch": 0.24142333786749673, "grad_norm": 0.7636893467963081, "learning_rate": 4.0707886131486155e-06, "loss": 0.0228, "step": 57860 }, { "epoch": 0.241444200582487, "grad_norm": 0.8854414483139129, "learning_rate": 4.070612731920406e-06, "loss": 0.0261, "step": 57865 }, { "epoch": 0.24146506329747727, "grad_norm": 1.059957570748889, "learning_rate": 4.070436873487435e-06, "loss": 0.0347, "step": 57870 }, { "epoch": 0.24148592601246757, "grad_norm": 1.0834664391092836, "learning_rate": 4.07026103784478e-06, "loss": 0.0375, "step": 57875 }, { "epoch": 0.24150678872745784, "grad_norm": 0.8778940877958405, "learning_rate": 4.0700852249875176e-06, "loss": 0.0295, "step": 57880 }, { "epoch": 0.24152765144244812, "grad_norm": 0.6096886462304079, "learning_rate": 4.06990943491073e-06, "loss": 0.0303, "step": 57885 }, { "epoch": 0.2415485141574384, "grad_norm": 1.998578720840925, "learning_rate": 4.069733667609496e-06, "loss": 0.0376, "step": 57890 }, { "epoch": 0.24156937687242866, "grad_norm": 0.6605139807747652, "learning_rate": 4.069557923078898e-06, "loss": 0.0235, "step": 57895 }, { "epoch": 0.24159023958741896, "grad_norm": 0.6317160124996125, "learning_rate": 4.06938220131402e-06, "loss": 0.0355, "step": 57900 }, { "epoch": 0.24161110230240923, "grad_norm": 0.9866981471906806, "learning_rate": 4.069206502309948e-06, "loss": 0.0325, "step": 57905 }, { "epoch": 0.2416319650173995, "grad_norm": 1.108140043210363, "learning_rate": 4.069030826061769e-06, "loss": 0.0309, "step": 57910 }, { "epoch": 0.24165282773238977, "grad_norm": 0.663474625882247, "learning_rate": 4.06885517256457e-06, "loss": 0.0339, "step": 57915 }, { "epoch": 0.24167369044738007, "grad_norm": 0.7023091749489746, "learning_rate": 4.068679541813442e-06, "loss": 0.0277, "step": 57920 }, { "epoch": 0.24169455316237035, "grad_norm": 0.8332301726229904, "learning_rate": 4.068503933803476e-06, "loss": 0.0361, "step": 57925 }, { "epoch": 0.24171541587736062, "grad_norm": 0.7194478534442992, "learning_rate": 4.0683283485297645e-06, "loss": 0.0365, "step": 57930 }, { "epoch": 0.2417362785923509, "grad_norm": 0.8972213212102539, "learning_rate": 4.068152785987401e-06, "loss": 0.0311, "step": 57935 }, { "epoch": 0.24175714130734116, "grad_norm": 0.7948334915376575, "learning_rate": 4.067977246171483e-06, "loss": 0.0408, "step": 57940 }, { "epoch": 0.24177800402233146, "grad_norm": 0.9995326186269085, "learning_rate": 4.067801729077107e-06, "loss": 0.0336, "step": 57945 }, { "epoch": 0.24179886673732173, "grad_norm": 0.7476922412923235, "learning_rate": 4.067626234699371e-06, "loss": 0.0378, "step": 57950 }, { "epoch": 0.241819729452312, "grad_norm": 1.1774274540575134, "learning_rate": 4.067450763033377e-06, "loss": 0.034, "step": 57955 }, { "epoch": 0.24184059216730228, "grad_norm": 0.6799438380677795, "learning_rate": 4.067275314074225e-06, "loss": 0.0278, "step": 57960 }, { "epoch": 0.24186145488229258, "grad_norm": 1.006739041157041, "learning_rate": 4.067099887817017e-06, "loss": 0.0383, "step": 57965 }, { "epoch": 0.24188231759728285, "grad_norm": 6.642816270151602, "learning_rate": 4.0669244842568605e-06, "loss": 0.028, "step": 57970 }, { "epoch": 0.24190318031227312, "grad_norm": 0.6897911542593826, "learning_rate": 4.0667491033888604e-06, "loss": 0.0358, "step": 57975 }, { "epoch": 0.2419240430272634, "grad_norm": 1.3793666227895636, "learning_rate": 4.0665737452081246e-06, "loss": 0.0408, "step": 57980 }, { "epoch": 0.24194490574225366, "grad_norm": 0.8661487709284347, "learning_rate": 4.0663984097097604e-06, "loss": 0.0349, "step": 57985 }, { "epoch": 0.24196576845724396, "grad_norm": 1.0045552741165233, "learning_rate": 4.066223096888881e-06, "loss": 0.026, "step": 57990 }, { "epoch": 0.24198663117223423, "grad_norm": 0.9946880674452603, "learning_rate": 4.066047806740595e-06, "loss": 0.0349, "step": 57995 }, { "epoch": 0.2420074938872245, "grad_norm": 0.6747394304123562, "learning_rate": 4.065872539260019e-06, "loss": 0.0309, "step": 58000 }, { "epoch": 0.24202835660221478, "grad_norm": 1.3102624567660277, "learning_rate": 4.065697294442268e-06, "loss": 0.045, "step": 58005 }, { "epoch": 0.24204921931720508, "grad_norm": 0.5649446566563557, "learning_rate": 4.065522072282456e-06, "loss": 0.034, "step": 58010 }, { "epoch": 0.24207008203219535, "grad_norm": 0.6910387531243146, "learning_rate": 4.065346872775702e-06, "loss": 0.0313, "step": 58015 }, { "epoch": 0.24209094474718562, "grad_norm": 1.1666875480261314, "learning_rate": 4.065171695917126e-06, "loss": 0.0353, "step": 58020 }, { "epoch": 0.2421118074621759, "grad_norm": 0.5496029840360847, "learning_rate": 4.064996541701848e-06, "loss": 0.0227, "step": 58025 }, { "epoch": 0.24213267017716616, "grad_norm": 1.2691477570705354, "learning_rate": 4.064821410124991e-06, "loss": 0.0323, "step": 58030 }, { "epoch": 0.24215353289215646, "grad_norm": 0.46315294461788825, "learning_rate": 4.0646463011816786e-06, "loss": 0.0228, "step": 58035 }, { "epoch": 0.24217439560714674, "grad_norm": 0.5755197047580785, "learning_rate": 4.0644712148670346e-06, "loss": 0.0252, "step": 58040 }, { "epoch": 0.242195258322137, "grad_norm": 0.650361666282141, "learning_rate": 4.064296151176189e-06, "loss": 0.0307, "step": 58045 }, { "epoch": 0.24221612103712728, "grad_norm": 0.5404575189510695, "learning_rate": 4.064121110104267e-06, "loss": 0.0271, "step": 58050 }, { "epoch": 0.24223698375211755, "grad_norm": 0.496596363797182, "learning_rate": 4.063946091646399e-06, "loss": 0.0294, "step": 58055 }, { "epoch": 0.24225784646710785, "grad_norm": 0.8308977651777649, "learning_rate": 4.063771095797717e-06, "loss": 0.0335, "step": 58060 }, { "epoch": 0.24227870918209812, "grad_norm": 1.2791402547975688, "learning_rate": 4.063596122553353e-06, "loss": 0.0448, "step": 58065 }, { "epoch": 0.2422995718970884, "grad_norm": 0.8605324178282369, "learning_rate": 4.063421171908441e-06, "loss": 0.0339, "step": 58070 }, { "epoch": 0.24232043461207867, "grad_norm": 0.833454743977243, "learning_rate": 4.063246243858117e-06, "loss": 0.0451, "step": 58075 }, { "epoch": 0.24234129732706897, "grad_norm": 0.8158601300895667, "learning_rate": 4.063071338397517e-06, "loss": 0.034, "step": 58080 }, { "epoch": 0.24236216004205924, "grad_norm": 0.6249145686020577, "learning_rate": 4.06289645552178e-06, "loss": 0.0364, "step": 58085 }, { "epoch": 0.2423830227570495, "grad_norm": 0.6640651342697945, "learning_rate": 4.062721595226047e-06, "loss": 0.0256, "step": 58090 }, { "epoch": 0.24240388547203978, "grad_norm": 0.641397150193073, "learning_rate": 4.0625467575054565e-06, "loss": 0.039, "step": 58095 }, { "epoch": 0.24242474818703005, "grad_norm": 0.9258279680898228, "learning_rate": 4.0623719423551545e-06, "loss": 0.0379, "step": 58100 }, { "epoch": 0.24244561090202035, "grad_norm": 1.2445394891993848, "learning_rate": 4.062197149770284e-06, "loss": 0.0316, "step": 58105 }, { "epoch": 0.24246647361701062, "grad_norm": 0.778514133679037, "learning_rate": 4.0620223797459914e-06, "loss": 0.0241, "step": 58110 }, { "epoch": 0.2424873363320009, "grad_norm": 0.8690724468321486, "learning_rate": 4.0618476322774226e-06, "loss": 0.0369, "step": 58115 }, { "epoch": 0.24250819904699117, "grad_norm": 1.22367787783838, "learning_rate": 4.061672907359727e-06, "loss": 0.041, "step": 58120 }, { "epoch": 0.24252906176198147, "grad_norm": 1.0763514843291968, "learning_rate": 4.0614982049880544e-06, "loss": 0.0378, "step": 58125 }, { "epoch": 0.24254992447697174, "grad_norm": 0.6511387775528479, "learning_rate": 4.0613235251575575e-06, "loss": 0.0447, "step": 58130 }, { "epoch": 0.242570787191962, "grad_norm": 1.420060011436836, "learning_rate": 4.0611488678633895e-06, "loss": 0.0339, "step": 58135 }, { "epoch": 0.24259164990695228, "grad_norm": 0.5244592181378063, "learning_rate": 4.060974233100703e-06, "loss": 0.0328, "step": 58140 }, { "epoch": 0.24261251262194256, "grad_norm": 0.8994795296180113, "learning_rate": 4.060799620864656e-06, "loss": 0.0287, "step": 58145 }, { "epoch": 0.24263337533693285, "grad_norm": 0.47413222627174356, "learning_rate": 4.060625031150404e-06, "loss": 0.0285, "step": 58150 }, { "epoch": 0.24265423805192313, "grad_norm": 0.6126250679153237, "learning_rate": 4.0604504639531085e-06, "loss": 0.0335, "step": 58155 }, { "epoch": 0.2426751007669134, "grad_norm": 0.6447364886106368, "learning_rate": 4.060275919267927e-06, "loss": 0.0281, "step": 58160 }, { "epoch": 0.24269596348190367, "grad_norm": 0.8586014601629023, "learning_rate": 4.0601013970900245e-06, "loss": 0.0407, "step": 58165 }, { "epoch": 0.24271682619689397, "grad_norm": 0.43186488324642586, "learning_rate": 4.05992689741456e-06, "loss": 0.0323, "step": 58170 }, { "epoch": 0.24273768891188424, "grad_norm": 0.7449193977265709, "learning_rate": 4.059752420236704e-06, "loss": 0.03, "step": 58175 }, { "epoch": 0.2427585516268745, "grad_norm": 0.5728718764251003, "learning_rate": 4.059577965551617e-06, "loss": 0.0254, "step": 58180 }, { "epoch": 0.24277941434186479, "grad_norm": 0.7495619203952405, "learning_rate": 4.05940353335447e-06, "loss": 0.0324, "step": 58185 }, { "epoch": 0.24280027705685506, "grad_norm": 0.6981143306185656, "learning_rate": 4.059229123640431e-06, "loss": 0.0339, "step": 58190 }, { "epoch": 0.24282113977184536, "grad_norm": 0.5259868157112414, "learning_rate": 4.059054736404671e-06, "loss": 0.0266, "step": 58195 }, { "epoch": 0.24284200248683563, "grad_norm": 0.9067887032437871, "learning_rate": 4.058880371642361e-06, "loss": 0.0307, "step": 58200 }, { "epoch": 0.2428628652018259, "grad_norm": 1.0371472728017568, "learning_rate": 4.058706029348676e-06, "loss": 0.0267, "step": 58205 }, { "epoch": 0.24288372791681617, "grad_norm": 0.5694517146853931, "learning_rate": 4.058531709518791e-06, "loss": 0.0491, "step": 58210 }, { "epoch": 0.24290459063180647, "grad_norm": 0.7851353722603494, "learning_rate": 4.058357412147879e-06, "loss": 0.027, "step": 58215 }, { "epoch": 0.24292545334679674, "grad_norm": 1.5362761960617735, "learning_rate": 4.058183137231122e-06, "loss": 0.0344, "step": 58220 }, { "epoch": 0.24294631606178702, "grad_norm": 1.3666552755802852, "learning_rate": 4.058008884763697e-06, "loss": 0.0268, "step": 58225 }, { "epoch": 0.2429671787767773, "grad_norm": 0.9057672839911929, "learning_rate": 4.057834654740786e-06, "loss": 0.0418, "step": 58230 }, { "epoch": 0.24298804149176756, "grad_norm": 0.7742364180648359, "learning_rate": 4.05766044715757e-06, "loss": 0.0308, "step": 58235 }, { "epoch": 0.24300890420675786, "grad_norm": 1.0622062474308613, "learning_rate": 4.057486262009233e-06, "loss": 0.0359, "step": 58240 }, { "epoch": 0.24302976692174813, "grad_norm": 0.6758106220014416, "learning_rate": 4.05731209929096e-06, "loss": 0.0419, "step": 58245 }, { "epoch": 0.2430506296367384, "grad_norm": 0.5637726244171449, "learning_rate": 4.057137958997937e-06, "loss": 0.0254, "step": 58250 }, { "epoch": 0.24307149235172867, "grad_norm": 0.7984726463612332, "learning_rate": 4.056963841125354e-06, "loss": 0.0298, "step": 58255 }, { "epoch": 0.24309235506671897, "grad_norm": 0.711377626954913, "learning_rate": 4.056789745668397e-06, "loss": 0.0292, "step": 58260 }, { "epoch": 0.24311321778170925, "grad_norm": 0.49677678928497115, "learning_rate": 4.0566156726222596e-06, "loss": 0.0297, "step": 58265 }, { "epoch": 0.24313408049669952, "grad_norm": 1.013146087828528, "learning_rate": 4.056441621982134e-06, "loss": 0.032, "step": 58270 }, { "epoch": 0.2431549432116898, "grad_norm": 1.5611213350516209, "learning_rate": 4.056267593743211e-06, "loss": 0.0496, "step": 58275 }, { "epoch": 0.24317580592668006, "grad_norm": 0.7553335133038798, "learning_rate": 4.0560935879006905e-06, "loss": 0.0305, "step": 58280 }, { "epoch": 0.24319666864167036, "grad_norm": 0.4910503025220042, "learning_rate": 4.055919604449764e-06, "loss": 0.0293, "step": 58285 }, { "epoch": 0.24321753135666063, "grad_norm": 0.3625933697622916, "learning_rate": 4.055745643385634e-06, "loss": 0.0296, "step": 58290 }, { "epoch": 0.2432383940716509, "grad_norm": 0.7112729583089635, "learning_rate": 4.055571704703498e-06, "loss": 0.0362, "step": 58295 }, { "epoch": 0.24325925678664118, "grad_norm": 0.603161174150959, "learning_rate": 4.0553977883985555e-06, "loss": 0.0237, "step": 58300 }, { "epoch": 0.24328011950163148, "grad_norm": 1.0507482355999496, "learning_rate": 4.055223894466011e-06, "loss": 0.0304, "step": 58305 }, { "epoch": 0.24330098221662175, "grad_norm": 1.099559521088894, "learning_rate": 4.055050022901067e-06, "loss": 0.0368, "step": 58310 }, { "epoch": 0.24332184493161202, "grad_norm": 0.7729178999596712, "learning_rate": 4.054876173698931e-06, "loss": 0.0307, "step": 58315 }, { "epoch": 0.2433427076466023, "grad_norm": 1.6035600127169483, "learning_rate": 4.054702346854807e-06, "loss": 0.0335, "step": 58320 }, { "epoch": 0.24336357036159256, "grad_norm": 1.2493925276431104, "learning_rate": 4.054528542363904e-06, "loss": 0.0356, "step": 58325 }, { "epoch": 0.24338443307658286, "grad_norm": 0.8076849531115577, "learning_rate": 4.054354760221432e-06, "loss": 0.0191, "step": 58330 }, { "epoch": 0.24340529579157313, "grad_norm": 0.7342200960798274, "learning_rate": 4.0541810004226014e-06, "loss": 0.0283, "step": 58335 }, { "epoch": 0.2434261585065634, "grad_norm": 0.607245906410714, "learning_rate": 4.054007262962627e-06, "loss": 0.0421, "step": 58340 }, { "epoch": 0.24344702122155368, "grad_norm": 0.599704770809249, "learning_rate": 4.0538335478367185e-06, "loss": 0.0366, "step": 58345 }, { "epoch": 0.24346788393654398, "grad_norm": 0.6626971247586719, "learning_rate": 4.053659855040094e-06, "loss": 0.0283, "step": 58350 }, { "epoch": 0.24348874665153425, "grad_norm": 0.8104856694563006, "learning_rate": 4.05348618456797e-06, "loss": 0.0434, "step": 58355 }, { "epoch": 0.24350960936652452, "grad_norm": 1.0542420832384862, "learning_rate": 4.053312536415564e-06, "loss": 0.034, "step": 58360 }, { "epoch": 0.2435304720815148, "grad_norm": 0.8157307730378507, "learning_rate": 4.053138910578096e-06, "loss": 0.0356, "step": 58365 }, { "epoch": 0.24355133479650506, "grad_norm": 0.8238249615195861, "learning_rate": 4.052965307050787e-06, "loss": 0.0385, "step": 58370 }, { "epoch": 0.24357219751149536, "grad_norm": 0.6566727459775192, "learning_rate": 4.052791725828859e-06, "loss": 0.0377, "step": 58375 }, { "epoch": 0.24359306022648564, "grad_norm": 1.8019939984745286, "learning_rate": 4.052618166907537e-06, "loss": 0.0449, "step": 58380 }, { "epoch": 0.2436139229414759, "grad_norm": 0.9575600380950439, "learning_rate": 4.052444630282045e-06, "loss": 0.0288, "step": 58385 }, { "epoch": 0.24363478565646618, "grad_norm": 0.9254757463503174, "learning_rate": 4.052271115947611e-06, "loss": 0.0494, "step": 58390 }, { "epoch": 0.24365564837145648, "grad_norm": 1.1378190145670413, "learning_rate": 4.052097623899463e-06, "loss": 0.0394, "step": 58395 }, { "epoch": 0.24367651108644675, "grad_norm": 0.6202008822097539, "learning_rate": 4.0519241541328294e-06, "loss": 0.0351, "step": 58400 }, { "epoch": 0.24369737380143702, "grad_norm": 0.6594547915859313, "learning_rate": 4.051750706642944e-06, "loss": 0.0292, "step": 58405 }, { "epoch": 0.2437182365164273, "grad_norm": 1.1202594033699198, "learning_rate": 4.051577281425036e-06, "loss": 0.0323, "step": 58410 }, { "epoch": 0.24373909923141757, "grad_norm": 1.034582773528599, "learning_rate": 4.051403878474341e-06, "loss": 0.0268, "step": 58415 }, { "epoch": 0.24375996194640787, "grad_norm": 0.8409227216617519, "learning_rate": 4.051230497786094e-06, "loss": 0.046, "step": 58420 }, { "epoch": 0.24378082466139814, "grad_norm": 0.7089082753729338, "learning_rate": 4.0510571393555325e-06, "loss": 0.0311, "step": 58425 }, { "epoch": 0.2438016873763884, "grad_norm": 0.7570916645097607, "learning_rate": 4.050883803177894e-06, "loss": 0.0297, "step": 58430 }, { "epoch": 0.24382255009137868, "grad_norm": 0.8356037501233601, "learning_rate": 4.050710489248418e-06, "loss": 0.0342, "step": 58435 }, { "epoch": 0.24384341280636898, "grad_norm": 1.2837734062381887, "learning_rate": 4.050537197562347e-06, "loss": 0.0444, "step": 58440 }, { "epoch": 0.24386427552135925, "grad_norm": 1.2205885857108902, "learning_rate": 4.050363928114921e-06, "loss": 0.0343, "step": 58445 }, { "epoch": 0.24388513823634952, "grad_norm": 1.3364413857877298, "learning_rate": 4.050190680901384e-06, "loss": 0.0347, "step": 58450 }, { "epoch": 0.2439060009513398, "grad_norm": 1.0555528510921843, "learning_rate": 4.050017455916985e-06, "loss": 0.0411, "step": 58455 }, { "epoch": 0.24392686366633007, "grad_norm": 0.8195353826664497, "learning_rate": 4.049844253156968e-06, "loss": 0.0281, "step": 58460 }, { "epoch": 0.24394772638132037, "grad_norm": 0.3929742327847403, "learning_rate": 4.04967107261658e-06, "loss": 0.03, "step": 58465 }, { "epoch": 0.24396858909631064, "grad_norm": 1.6106956311813592, "learning_rate": 4.049497914291073e-06, "loss": 0.037, "step": 58470 }, { "epoch": 0.2439894518113009, "grad_norm": 0.7119811793791752, "learning_rate": 4.049324778175697e-06, "loss": 0.0286, "step": 58475 }, { "epoch": 0.24401031452629118, "grad_norm": 0.6492498234556653, "learning_rate": 4.049151664265704e-06, "loss": 0.0382, "step": 58480 }, { "epoch": 0.24403117724128148, "grad_norm": 1.0886914707357174, "learning_rate": 4.048978572556349e-06, "loss": 0.036, "step": 58485 }, { "epoch": 0.24405203995627175, "grad_norm": 0.864278901127684, "learning_rate": 4.048805503042887e-06, "loss": 0.0302, "step": 58490 }, { "epoch": 0.24407290267126203, "grad_norm": 0.429817035556581, "learning_rate": 4.0486324557205745e-06, "loss": 0.0361, "step": 58495 }, { "epoch": 0.2440937653862523, "grad_norm": 0.6019379707068985, "learning_rate": 4.048459430584669e-06, "loss": 0.0306, "step": 58500 }, { "epoch": 0.24411462810124257, "grad_norm": 1.4282728930612587, "learning_rate": 4.04828642763043e-06, "loss": 0.046, "step": 58505 }, { "epoch": 0.24413549081623287, "grad_norm": 1.108549127725015, "learning_rate": 4.048113446853119e-06, "loss": 0.0319, "step": 58510 }, { "epoch": 0.24415635353122314, "grad_norm": 1.2150405304373835, "learning_rate": 4.047940488247999e-06, "loss": 0.0368, "step": 58515 }, { "epoch": 0.2441772162462134, "grad_norm": 0.6030572057577203, "learning_rate": 4.047767551810333e-06, "loss": 0.0299, "step": 58520 }, { "epoch": 0.24419807896120369, "grad_norm": 0.8958869147045253, "learning_rate": 4.047594637535386e-06, "loss": 0.0277, "step": 58525 }, { "epoch": 0.24421894167619398, "grad_norm": 1.1096676356260415, "learning_rate": 4.047421745418425e-06, "loss": 0.0348, "step": 58530 }, { "epoch": 0.24423980439118426, "grad_norm": 0.8654094736993929, "learning_rate": 4.047248875454718e-06, "loss": 0.0244, "step": 58535 }, { "epoch": 0.24426066710617453, "grad_norm": 0.59560417998632, "learning_rate": 4.047076027639535e-06, "loss": 0.0355, "step": 58540 }, { "epoch": 0.2442815298211648, "grad_norm": 1.0729327686559074, "learning_rate": 4.046903201968145e-06, "loss": 0.0277, "step": 58545 }, { "epoch": 0.24430239253615507, "grad_norm": 1.5372738419740948, "learning_rate": 4.046730398435823e-06, "loss": 0.0286, "step": 58550 }, { "epoch": 0.24432325525114537, "grad_norm": 0.979989401019903, "learning_rate": 4.0465576170378414e-06, "loss": 0.0289, "step": 58555 }, { "epoch": 0.24434411796613564, "grad_norm": 0.7177217356007629, "learning_rate": 4.046384857769474e-06, "loss": 0.0234, "step": 58560 }, { "epoch": 0.24436498068112592, "grad_norm": 2.625166698665745, "learning_rate": 4.046212120626e-06, "loss": 0.045, "step": 58565 }, { "epoch": 0.2443858433961162, "grad_norm": 0.7262511317057252, "learning_rate": 4.046039405602695e-06, "loss": 0.0267, "step": 58570 }, { "epoch": 0.2444067061111065, "grad_norm": 0.6572112017406443, "learning_rate": 4.045866712694838e-06, "loss": 0.0282, "step": 58575 }, { "epoch": 0.24442756882609676, "grad_norm": 0.9170575428648919, "learning_rate": 4.0456940418977126e-06, "loss": 0.0321, "step": 58580 }, { "epoch": 0.24444843154108703, "grad_norm": 0.710874123936424, "learning_rate": 4.045521393206599e-06, "loss": 0.024, "step": 58585 }, { "epoch": 0.2444692942560773, "grad_norm": 0.9573090985360219, "learning_rate": 4.045348766616781e-06, "loss": 0.0364, "step": 58590 }, { "epoch": 0.24449015697106757, "grad_norm": 1.2212204775681035, "learning_rate": 4.045176162123543e-06, "loss": 0.0396, "step": 58595 }, { "epoch": 0.24451101968605787, "grad_norm": 0.6745678607736824, "learning_rate": 4.0450035797221724e-06, "loss": 0.0422, "step": 58600 }, { "epoch": 0.24453188240104815, "grad_norm": 0.4144663569602431, "learning_rate": 4.044831019407956e-06, "loss": 0.0366, "step": 58605 }, { "epoch": 0.24455274511603842, "grad_norm": 1.1047499349206844, "learning_rate": 4.0446584811761855e-06, "loss": 0.0434, "step": 58610 }, { "epoch": 0.2445736078310287, "grad_norm": 2.4722214161132303, "learning_rate": 4.044485965022148e-06, "loss": 0.0415, "step": 58615 }, { "epoch": 0.244594470546019, "grad_norm": 0.8460696636043353, "learning_rate": 4.044313470941138e-06, "loss": 0.0291, "step": 58620 }, { "epoch": 0.24461533326100926, "grad_norm": 1.1355700688251096, "learning_rate": 4.044140998928448e-06, "loss": 0.038, "step": 58625 }, { "epoch": 0.24463619597599953, "grad_norm": 1.7543141400705689, "learning_rate": 4.043968548979371e-06, "loss": 0.031, "step": 58630 }, { "epoch": 0.2446570586909898, "grad_norm": 0.7681141828985425, "learning_rate": 4.043796121089206e-06, "loss": 0.0309, "step": 58635 }, { "epoch": 0.24467792140598008, "grad_norm": 0.8586270516150436, "learning_rate": 4.043623715253251e-06, "loss": 0.0349, "step": 58640 }, { "epoch": 0.24469878412097038, "grad_norm": 0.5330158652439707, "learning_rate": 4.0434513314668025e-06, "loss": 0.0303, "step": 58645 }, { "epoch": 0.24471964683596065, "grad_norm": 1.097828944071486, "learning_rate": 4.043278969725163e-06, "loss": 0.0299, "step": 58650 }, { "epoch": 0.24474050955095092, "grad_norm": 1.0070805422109366, "learning_rate": 4.043106630023633e-06, "loss": 0.0301, "step": 58655 }, { "epoch": 0.2447613722659412, "grad_norm": 1.499642251307652, "learning_rate": 4.042934312357516e-06, "loss": 0.0365, "step": 58660 }, { "epoch": 0.2447822349809315, "grad_norm": 4.692875481550903, "learning_rate": 4.042762016722118e-06, "loss": 0.0319, "step": 58665 }, { "epoch": 0.24480309769592176, "grad_norm": 1.0651027192140583, "learning_rate": 4.042589743112743e-06, "loss": 0.0379, "step": 58670 }, { "epoch": 0.24482396041091203, "grad_norm": 0.657684937269595, "learning_rate": 4.0424174915247e-06, "loss": 0.0344, "step": 58675 }, { "epoch": 0.2448448231259023, "grad_norm": 0.41805062284137834, "learning_rate": 4.042245261953297e-06, "loss": 0.0304, "step": 58680 }, { "epoch": 0.24486568584089258, "grad_norm": 0.6796002398127245, "learning_rate": 4.042073054393844e-06, "loss": 0.0285, "step": 58685 }, { "epoch": 0.24488654855588288, "grad_norm": 0.7803725245047733, "learning_rate": 4.041900868841653e-06, "loss": 0.0359, "step": 58690 }, { "epoch": 0.24490741127087315, "grad_norm": 1.30914454774473, "learning_rate": 4.041728705292038e-06, "loss": 0.0355, "step": 58695 }, { "epoch": 0.24492827398586342, "grad_norm": 0.9703913419866681, "learning_rate": 4.041556563740312e-06, "loss": 0.0512, "step": 58700 }, { "epoch": 0.2449491367008537, "grad_norm": 0.8490197789803026, "learning_rate": 4.041384444181793e-06, "loss": 0.0329, "step": 58705 }, { "epoch": 0.244969999415844, "grad_norm": 1.0729683736465052, "learning_rate": 4.041212346611795e-06, "loss": 0.0308, "step": 58710 }, { "epoch": 0.24499086213083426, "grad_norm": 1.1494209970457183, "learning_rate": 4.041040271025638e-06, "loss": 0.0492, "step": 58715 }, { "epoch": 0.24501172484582454, "grad_norm": 1.1253205121841334, "learning_rate": 4.040868217418643e-06, "loss": 0.027, "step": 58720 }, { "epoch": 0.2450325875608148, "grad_norm": 0.44972856265889055, "learning_rate": 4.04069618578613e-06, "loss": 0.0365, "step": 58725 }, { "epoch": 0.24505345027580508, "grad_norm": 0.9905782049549846, "learning_rate": 4.040524176123423e-06, "loss": 0.0286, "step": 58730 }, { "epoch": 0.24507431299079538, "grad_norm": 0.6576341560204222, "learning_rate": 4.040352188425846e-06, "loss": 0.0367, "step": 58735 }, { "epoch": 0.24509517570578565, "grad_norm": 0.8084617633967401, "learning_rate": 4.040180222688724e-06, "loss": 0.0289, "step": 58740 }, { "epoch": 0.24511603842077592, "grad_norm": 1.4657541751014092, "learning_rate": 4.040008278907384e-06, "loss": 0.0371, "step": 58745 }, { "epoch": 0.2451369011357662, "grad_norm": 1.5821043135151593, "learning_rate": 4.039836357077156e-06, "loss": 0.0249, "step": 58750 }, { "epoch": 0.2451577638507565, "grad_norm": 1.256738438020821, "learning_rate": 4.039664457193366e-06, "loss": 0.0326, "step": 58755 }, { "epoch": 0.24517862656574677, "grad_norm": 3.1109711901208548, "learning_rate": 4.03949257925135e-06, "loss": 0.0324, "step": 58760 }, { "epoch": 0.24519948928073704, "grad_norm": 0.7857438000831006, "learning_rate": 4.039320723246437e-06, "loss": 0.0458, "step": 58765 }, { "epoch": 0.2452203519957273, "grad_norm": 1.1044008894214725, "learning_rate": 4.039148889173962e-06, "loss": 0.0421, "step": 58770 }, { "epoch": 0.24524121471071758, "grad_norm": 1.038289933243263, "learning_rate": 4.03897707702926e-06, "loss": 0.0344, "step": 58775 }, { "epoch": 0.24526207742570788, "grad_norm": 1.1955714266469903, "learning_rate": 4.038805286807669e-06, "loss": 0.0368, "step": 58780 }, { "epoch": 0.24528294014069815, "grad_norm": 0.9794449150201687, "learning_rate": 4.0386335185045254e-06, "loss": 0.0315, "step": 58785 }, { "epoch": 0.24530380285568842, "grad_norm": 1.079649797451297, "learning_rate": 4.03846177211517e-06, "loss": 0.034, "step": 58790 }, { "epoch": 0.2453246655706787, "grad_norm": 0.8699026974791332, "learning_rate": 4.0382900476349435e-06, "loss": 0.0344, "step": 58795 }, { "epoch": 0.245345528285669, "grad_norm": 0.5805773670232304, "learning_rate": 4.038118345059187e-06, "loss": 0.0361, "step": 58800 }, { "epoch": 0.24536639100065927, "grad_norm": 1.067573604751573, "learning_rate": 4.037946664383246e-06, "loss": 0.0373, "step": 58805 }, { "epoch": 0.24538725371564954, "grad_norm": 0.9606863780319513, "learning_rate": 4.037775005602464e-06, "loss": 0.0346, "step": 58810 }, { "epoch": 0.2454081164306398, "grad_norm": 0.7398231243187493, "learning_rate": 4.037603368712188e-06, "loss": 0.0324, "step": 58815 }, { "epoch": 0.24542897914563008, "grad_norm": 1.0965830087898265, "learning_rate": 4.037431753707767e-06, "loss": 0.0367, "step": 58820 }, { "epoch": 0.24544984186062038, "grad_norm": 0.5097972437231564, "learning_rate": 4.037260160584547e-06, "loss": 0.0196, "step": 58825 }, { "epoch": 0.24547070457561065, "grad_norm": 0.9492065281369014, "learning_rate": 4.037088589337882e-06, "loss": 0.0379, "step": 58830 }, { "epoch": 0.24549156729060093, "grad_norm": 1.0052153829887407, "learning_rate": 4.036917039963122e-06, "loss": 0.0364, "step": 58835 }, { "epoch": 0.2455124300055912, "grad_norm": 0.6298359780599707, "learning_rate": 4.036745512455622e-06, "loss": 0.0232, "step": 58840 }, { "epoch": 0.2455332927205815, "grad_norm": 0.643381507272085, "learning_rate": 4.036574006810735e-06, "loss": 0.0275, "step": 58845 }, { "epoch": 0.24555415543557177, "grad_norm": 0.5311109261958737, "learning_rate": 4.036402523023817e-06, "loss": 0.023, "step": 58850 }, { "epoch": 0.24557501815056204, "grad_norm": 0.9891380142648049, "learning_rate": 4.036231061090226e-06, "loss": 0.0307, "step": 58855 }, { "epoch": 0.2455958808655523, "grad_norm": 0.7967169620390158, "learning_rate": 4.036059621005322e-06, "loss": 0.0319, "step": 58860 }, { "epoch": 0.24561674358054258, "grad_norm": 0.6976039684416337, "learning_rate": 4.035888202764464e-06, "loss": 0.0328, "step": 58865 }, { "epoch": 0.24563760629553288, "grad_norm": 0.6082816422791069, "learning_rate": 4.035716806363014e-06, "loss": 0.0412, "step": 58870 }, { "epoch": 0.24565846901052316, "grad_norm": 0.9370016763088084, "learning_rate": 4.0355454317963345e-06, "loss": 0.0332, "step": 58875 }, { "epoch": 0.24567933172551343, "grad_norm": 0.8064567374933642, "learning_rate": 4.035374079059792e-06, "loss": 0.0455, "step": 58880 }, { "epoch": 0.2457001944405037, "grad_norm": 1.2093748445660872, "learning_rate": 4.0352027481487485e-06, "loss": 0.0255, "step": 58885 }, { "epoch": 0.245721057155494, "grad_norm": 0.5675836533908868, "learning_rate": 4.035031439058575e-06, "loss": 0.0336, "step": 58890 }, { "epoch": 0.24574191987048427, "grad_norm": 0.650219900963628, "learning_rate": 4.034860151784636e-06, "loss": 0.025, "step": 58895 }, { "epoch": 0.24576278258547454, "grad_norm": 0.5878807940040891, "learning_rate": 4.034688886322305e-06, "loss": 0.0315, "step": 58900 }, { "epoch": 0.24578364530046481, "grad_norm": 1.3308567852323512, "learning_rate": 4.034517642666952e-06, "loss": 0.0415, "step": 58905 }, { "epoch": 0.2458045080154551, "grad_norm": 0.9827099210583475, "learning_rate": 4.03434642081395e-06, "loss": 0.0398, "step": 58910 }, { "epoch": 0.24582537073044539, "grad_norm": 1.2400282444461712, "learning_rate": 4.034175220758672e-06, "loss": 0.0267, "step": 58915 }, { "epoch": 0.24584623344543566, "grad_norm": 0.9033827893537256, "learning_rate": 4.034004042496494e-06, "loss": 0.0246, "step": 58920 }, { "epoch": 0.24586709616042593, "grad_norm": 1.5675667807630556, "learning_rate": 4.033832886022792e-06, "loss": 0.0299, "step": 58925 }, { "epoch": 0.2458879588754162, "grad_norm": 0.9957311690931316, "learning_rate": 4.0336617513329465e-06, "loss": 0.0278, "step": 58930 }, { "epoch": 0.2459088215904065, "grad_norm": 0.7381189817702235, "learning_rate": 4.033490638422334e-06, "loss": 0.0279, "step": 58935 }, { "epoch": 0.24592968430539677, "grad_norm": 0.9823816210729042, "learning_rate": 4.033319547286338e-06, "loss": 0.0361, "step": 58940 }, { "epoch": 0.24595054702038704, "grad_norm": 0.5054769529895229, "learning_rate": 4.0331484779203385e-06, "loss": 0.0334, "step": 58945 }, { "epoch": 0.24597140973537732, "grad_norm": 0.41562796266450014, "learning_rate": 4.032977430319721e-06, "loss": 0.025, "step": 58950 }, { "epoch": 0.2459922724503676, "grad_norm": 0.37506347593432887, "learning_rate": 4.032806404479868e-06, "loss": 0.0298, "step": 58955 }, { "epoch": 0.2460131351653579, "grad_norm": 0.6412586675371112, "learning_rate": 4.032635400396169e-06, "loss": 0.0402, "step": 58960 }, { "epoch": 0.24603399788034816, "grad_norm": 0.6471420813491099, "learning_rate": 4.03246441806401e-06, "loss": 0.0297, "step": 58965 }, { "epoch": 0.24605486059533843, "grad_norm": 0.8632950901752924, "learning_rate": 4.03229345747878e-06, "loss": 0.0335, "step": 58970 }, { "epoch": 0.2460757233103287, "grad_norm": 1.0329523381581565, "learning_rate": 4.03212251863587e-06, "loss": 0.0426, "step": 58975 }, { "epoch": 0.246096586025319, "grad_norm": 1.7400337837989268, "learning_rate": 4.031951601530672e-06, "loss": 0.0507, "step": 58980 }, { "epoch": 0.24611744874030927, "grad_norm": 0.555596498136839, "learning_rate": 4.0317807061585775e-06, "loss": 0.0302, "step": 58985 }, { "epoch": 0.24613831145529955, "grad_norm": 0.4394796107490281, "learning_rate": 4.031609832514984e-06, "loss": 0.024, "step": 58990 }, { "epoch": 0.24615917417028982, "grad_norm": 0.5044540003352083, "learning_rate": 4.031438980595285e-06, "loss": 0.0311, "step": 58995 }, { "epoch": 0.2461800368852801, "grad_norm": 1.0917585269897738, "learning_rate": 4.031268150394879e-06, "loss": 0.039, "step": 59000 }, { "epoch": 0.2462008996002704, "grad_norm": 0.5349364224397453, "learning_rate": 4.0310973419091635e-06, "loss": 0.0361, "step": 59005 }, { "epoch": 0.24622176231526066, "grad_norm": 0.5404435148745036, "learning_rate": 4.03092655513354e-06, "loss": 0.0283, "step": 59010 }, { "epoch": 0.24624262503025093, "grad_norm": 0.9226396023899024, "learning_rate": 4.03075579006341e-06, "loss": 0.0374, "step": 59015 }, { "epoch": 0.2462634877452412, "grad_norm": 0.8732030169111038, "learning_rate": 4.030585046694173e-06, "loss": 0.0305, "step": 59020 }, { "epoch": 0.2462843504602315, "grad_norm": 0.8818420106607152, "learning_rate": 4.030414325021239e-06, "loss": 0.0424, "step": 59025 }, { "epoch": 0.24630521317522178, "grad_norm": 0.5166023368524768, "learning_rate": 4.030243625040008e-06, "loss": 0.0311, "step": 59030 }, { "epoch": 0.24632607589021205, "grad_norm": 0.6933286542002061, "learning_rate": 4.03007294674589e-06, "loss": 0.0305, "step": 59035 }, { "epoch": 0.24634693860520232, "grad_norm": 1.154447359918807, "learning_rate": 4.029902290134292e-06, "loss": 0.0279, "step": 59040 }, { "epoch": 0.2463678013201926, "grad_norm": 1.175014663536945, "learning_rate": 4.029731655200623e-06, "loss": 0.0304, "step": 59045 }, { "epoch": 0.2463886640351829, "grad_norm": 0.7614284058517077, "learning_rate": 4.0295610419402955e-06, "loss": 0.0268, "step": 59050 }, { "epoch": 0.24640952675017316, "grad_norm": 0.7813516302485828, "learning_rate": 4.029390450348721e-06, "loss": 0.0327, "step": 59055 }, { "epoch": 0.24643038946516344, "grad_norm": 0.9980176508593818, "learning_rate": 4.029219880421313e-06, "loss": 0.0335, "step": 59060 }, { "epoch": 0.2464512521801537, "grad_norm": 1.065308376657031, "learning_rate": 4.029049332153486e-06, "loss": 0.0269, "step": 59065 }, { "epoch": 0.246472114895144, "grad_norm": 0.9984888302864924, "learning_rate": 4.0288788055406575e-06, "loss": 0.0346, "step": 59070 }, { "epoch": 0.24649297761013428, "grad_norm": 0.7026768842958991, "learning_rate": 4.028708300578246e-06, "loss": 0.0287, "step": 59075 }, { "epoch": 0.24651384032512455, "grad_norm": 0.6108927428163401, "learning_rate": 4.028537817261667e-06, "loss": 0.0347, "step": 59080 }, { "epoch": 0.24653470304011482, "grad_norm": 0.9363705870287407, "learning_rate": 4.028367355586345e-06, "loss": 0.0434, "step": 59085 }, { "epoch": 0.2465555657551051, "grad_norm": 0.7901070105397641, "learning_rate": 4.0281969155477006e-06, "loss": 0.0309, "step": 59090 }, { "epoch": 0.2465764284700954, "grad_norm": 0.8286575080334219, "learning_rate": 4.028026497141155e-06, "loss": 0.0435, "step": 59095 }, { "epoch": 0.24659729118508567, "grad_norm": 0.8937979642849151, "learning_rate": 4.027856100362134e-06, "loss": 0.0324, "step": 59100 }, { "epoch": 0.24661815390007594, "grad_norm": 0.45263365005274936, "learning_rate": 4.027685725206064e-06, "loss": 0.032, "step": 59105 }, { "epoch": 0.2466390166150662, "grad_norm": 1.88071700085272, "learning_rate": 4.027515371668372e-06, "loss": 0.029, "step": 59110 }, { "epoch": 0.2466598793300565, "grad_norm": 0.7482119554374104, "learning_rate": 4.027345039744486e-06, "loss": 0.0345, "step": 59115 }, { "epoch": 0.24668074204504678, "grad_norm": 0.5899443590089877, "learning_rate": 4.027174729429837e-06, "loss": 0.0414, "step": 59120 }, { "epoch": 0.24670160476003705, "grad_norm": 0.9336547883573255, "learning_rate": 4.027004440719856e-06, "loss": 0.0393, "step": 59125 }, { "epoch": 0.24672246747502732, "grad_norm": 0.8434909221326411, "learning_rate": 4.026834173609973e-06, "loss": 0.0267, "step": 59130 }, { "epoch": 0.2467433301900176, "grad_norm": 0.8941274907010539, "learning_rate": 4.026663928095626e-06, "loss": 0.0234, "step": 59135 }, { "epoch": 0.2467641929050079, "grad_norm": 0.6096825345497424, "learning_rate": 4.026493704172248e-06, "loss": 0.0289, "step": 59140 }, { "epoch": 0.24678505561999817, "grad_norm": 0.8773511642294654, "learning_rate": 4.026323501835277e-06, "loss": 0.0279, "step": 59145 }, { "epoch": 0.24680591833498844, "grad_norm": 0.7662779240533045, "learning_rate": 4.02615332108015e-06, "loss": 0.0346, "step": 59150 }, { "epoch": 0.2468267810499787, "grad_norm": 1.264538476114459, "learning_rate": 4.025983161902306e-06, "loss": 0.0364, "step": 59155 }, { "epoch": 0.246847643764969, "grad_norm": 0.8750105122467771, "learning_rate": 4.025813024297187e-06, "loss": 0.0303, "step": 59160 }, { "epoch": 0.24686850647995928, "grad_norm": 0.7120779005515722, "learning_rate": 4.025642908260234e-06, "loss": 0.0285, "step": 59165 }, { "epoch": 0.24688936919494955, "grad_norm": 1.2811847780450236, "learning_rate": 4.0254728137868904e-06, "loss": 0.0345, "step": 59170 }, { "epoch": 0.24691023190993983, "grad_norm": 0.6072939946002907, "learning_rate": 4.025302740872602e-06, "loss": 0.0234, "step": 59175 }, { "epoch": 0.2469310946249301, "grad_norm": 0.9742015529826746, "learning_rate": 4.025132689512814e-06, "loss": 0.0319, "step": 59180 }, { "epoch": 0.2469519573399204, "grad_norm": 1.3310558953195821, "learning_rate": 4.024962659702975e-06, "loss": 0.0434, "step": 59185 }, { "epoch": 0.24697282005491067, "grad_norm": 1.3690058387780153, "learning_rate": 4.024792651438532e-06, "loss": 0.029, "step": 59190 }, { "epoch": 0.24699368276990094, "grad_norm": 0.8522777285265503, "learning_rate": 4.024622664714938e-06, "loss": 0.0271, "step": 59195 }, { "epoch": 0.2470145454848912, "grad_norm": 0.8418443284386972, "learning_rate": 4.024452699527641e-06, "loss": 0.0321, "step": 59200 }, { "epoch": 0.2470354081998815, "grad_norm": 0.9294253865505973, "learning_rate": 4.024282755872097e-06, "loss": 0.0268, "step": 59205 }, { "epoch": 0.24705627091487178, "grad_norm": 1.3798206027496966, "learning_rate": 4.024112833743757e-06, "loss": 0.0425, "step": 59210 }, { "epoch": 0.24707713362986206, "grad_norm": 0.8519349363640507, "learning_rate": 4.023942933138079e-06, "loss": 0.0405, "step": 59215 }, { "epoch": 0.24709799634485233, "grad_norm": 0.9299093564079256, "learning_rate": 4.023773054050521e-06, "loss": 0.0464, "step": 59220 }, { "epoch": 0.2471188590598426, "grad_norm": 0.6954043670337998, "learning_rate": 4.023603196476537e-06, "loss": 0.0438, "step": 59225 }, { "epoch": 0.2471397217748329, "grad_norm": 0.8080189264900894, "learning_rate": 4.02343336041159e-06, "loss": 0.0248, "step": 59230 }, { "epoch": 0.24716058448982317, "grad_norm": 0.7953198639755398, "learning_rate": 4.02326354585114e-06, "loss": 0.0469, "step": 59235 }, { "epoch": 0.24718144720481344, "grad_norm": 0.9736177087475938, "learning_rate": 4.023093752790649e-06, "loss": 0.0498, "step": 59240 }, { "epoch": 0.24720230991980371, "grad_norm": 0.9266978680687227, "learning_rate": 4.022923981225581e-06, "loss": 0.0363, "step": 59245 }, { "epoch": 0.24722317263479401, "grad_norm": 1.0522469100283909, "learning_rate": 4.0227542311514e-06, "loss": 0.0385, "step": 59250 }, { "epoch": 0.24724403534978429, "grad_norm": 0.8882976239206832, "learning_rate": 4.022584502563573e-06, "loss": 0.0288, "step": 59255 }, { "epoch": 0.24726489806477456, "grad_norm": 0.8135334606758253, "learning_rate": 4.022414795457567e-06, "loss": 0.0288, "step": 59260 }, { "epoch": 0.24728576077976483, "grad_norm": 0.5561531143410928, "learning_rate": 4.0222451098288525e-06, "loss": 0.0251, "step": 59265 }, { "epoch": 0.2473066234947551, "grad_norm": 0.9098835138763309, "learning_rate": 4.022075445672899e-06, "loss": 0.0322, "step": 59270 }, { "epoch": 0.2473274862097454, "grad_norm": 0.7607395344186224, "learning_rate": 4.021905802985177e-06, "loss": 0.028, "step": 59275 }, { "epoch": 0.24734834892473567, "grad_norm": 0.6885132702828322, "learning_rate": 4.0217361817611606e-06, "loss": 0.0351, "step": 59280 }, { "epoch": 0.24736921163972594, "grad_norm": 0.5626398880274855, "learning_rate": 4.021566581996324e-06, "loss": 0.0381, "step": 59285 }, { "epoch": 0.24739007435471622, "grad_norm": 0.9711672787376139, "learning_rate": 4.021397003686142e-06, "loss": 0.0303, "step": 59290 }, { "epoch": 0.24741093706970652, "grad_norm": 0.9890221897407948, "learning_rate": 4.021227446826093e-06, "loss": 0.0334, "step": 59295 }, { "epoch": 0.2474317997846968, "grad_norm": 0.6053289101287532, "learning_rate": 4.021057911411655e-06, "loss": 0.0334, "step": 59300 }, { "epoch": 0.24745266249968706, "grad_norm": 0.8554202289145818, "learning_rate": 4.020888397438307e-06, "loss": 0.025, "step": 59305 }, { "epoch": 0.24747352521467733, "grad_norm": 1.0175806066408253, "learning_rate": 4.020718904901529e-06, "loss": 0.0323, "step": 59310 }, { "epoch": 0.2474943879296676, "grad_norm": 1.14520455332732, "learning_rate": 4.020549433796805e-06, "loss": 0.0285, "step": 59315 }, { "epoch": 0.2475152506446579, "grad_norm": 0.7481701739662628, "learning_rate": 4.020379984119618e-06, "loss": 0.0306, "step": 59320 }, { "epoch": 0.24753611335964817, "grad_norm": 0.5901152913026564, "learning_rate": 4.020210555865453e-06, "loss": 0.027, "step": 59325 }, { "epoch": 0.24755697607463845, "grad_norm": 0.8653771804496787, "learning_rate": 4.020041149029797e-06, "loss": 0.0337, "step": 59330 }, { "epoch": 0.24757783878962872, "grad_norm": 1.1643625913444506, "learning_rate": 4.019871763608136e-06, "loss": 0.0293, "step": 59335 }, { "epoch": 0.24759870150461902, "grad_norm": 0.7179807161482392, "learning_rate": 4.019702399595959e-06, "loss": 0.0269, "step": 59340 }, { "epoch": 0.2476195642196093, "grad_norm": 0.5630894055858785, "learning_rate": 4.019533056988759e-06, "loss": 0.0249, "step": 59345 }, { "epoch": 0.24764042693459956, "grad_norm": 1.1981173263232454, "learning_rate": 4.019363735782025e-06, "loss": 0.0367, "step": 59350 }, { "epoch": 0.24766128964958983, "grad_norm": 0.9781791499790778, "learning_rate": 4.019194435971252e-06, "loss": 0.0328, "step": 59355 }, { "epoch": 0.2476821523645801, "grad_norm": 0.7574032464035798, "learning_rate": 4.019025157551932e-06, "loss": 0.0288, "step": 59360 }, { "epoch": 0.2477030150795704, "grad_norm": 0.798375157679182, "learning_rate": 4.018855900519562e-06, "loss": 0.0275, "step": 59365 }, { "epoch": 0.24772387779456068, "grad_norm": 0.9994322492391158, "learning_rate": 4.018686664869638e-06, "loss": 0.03, "step": 59370 }, { "epoch": 0.24774474050955095, "grad_norm": 0.8130875327082028, "learning_rate": 4.018517450597659e-06, "loss": 0.0364, "step": 59375 }, { "epoch": 0.24776560322454122, "grad_norm": 0.8422406190355607, "learning_rate": 4.018348257699125e-06, "loss": 0.0325, "step": 59380 }, { "epoch": 0.24778646593953152, "grad_norm": 1.1687282265801853, "learning_rate": 4.018179086169536e-06, "loss": 0.0394, "step": 59385 }, { "epoch": 0.2478073286545218, "grad_norm": 1.1378264745562203, "learning_rate": 4.018009936004394e-06, "loss": 0.0314, "step": 59390 }, { "epoch": 0.24782819136951206, "grad_norm": 0.45362781281418146, "learning_rate": 4.0178408071992046e-06, "loss": 0.0308, "step": 59395 }, { "epoch": 0.24784905408450233, "grad_norm": 1.0709480973339194, "learning_rate": 4.0176716997494704e-06, "loss": 0.0292, "step": 59400 }, { "epoch": 0.2478699167994926, "grad_norm": 0.46604431122843315, "learning_rate": 4.0175026136506985e-06, "loss": 0.0302, "step": 59405 }, { "epoch": 0.2478907795144829, "grad_norm": 0.502902532445877, "learning_rate": 4.017333548898396e-06, "loss": 0.0309, "step": 59410 }, { "epoch": 0.24791164222947318, "grad_norm": 0.33121828789988056, "learning_rate": 4.017164505488073e-06, "loss": 0.02, "step": 59415 }, { "epoch": 0.24793250494446345, "grad_norm": 0.46356282277112504, "learning_rate": 4.016995483415238e-06, "loss": 0.0329, "step": 59420 }, { "epoch": 0.24795336765945372, "grad_norm": 1.0326792499540094, "learning_rate": 4.016826482675404e-06, "loss": 0.0273, "step": 59425 }, { "epoch": 0.24797423037444402, "grad_norm": 0.5811303919330663, "learning_rate": 4.016657503264083e-06, "loss": 0.0345, "step": 59430 }, { "epoch": 0.2479950930894343, "grad_norm": 0.7401741956462168, "learning_rate": 4.016488545176789e-06, "loss": 0.0383, "step": 59435 }, { "epoch": 0.24801595580442456, "grad_norm": 0.5014532742148367, "learning_rate": 4.016319608409038e-06, "loss": 0.0265, "step": 59440 }, { "epoch": 0.24803681851941484, "grad_norm": 0.7291346546368069, "learning_rate": 4.016150692956348e-06, "loss": 0.0296, "step": 59445 }, { "epoch": 0.2480576812344051, "grad_norm": 0.8824630594984216, "learning_rate": 4.015981798814234e-06, "loss": 0.0304, "step": 59450 }, { "epoch": 0.2480785439493954, "grad_norm": 1.4371406238980569, "learning_rate": 4.015812925978217e-06, "loss": 0.0296, "step": 59455 }, { "epoch": 0.24809940666438568, "grad_norm": 0.7934126380491273, "learning_rate": 4.015644074443819e-06, "loss": 0.0326, "step": 59460 }, { "epoch": 0.24812026937937595, "grad_norm": 0.7980787056230765, "learning_rate": 4.01547524420656e-06, "loss": 0.0231, "step": 59465 }, { "epoch": 0.24814113209436622, "grad_norm": 0.4840737115476414, "learning_rate": 4.0153064352619645e-06, "loss": 0.031, "step": 59470 }, { "epoch": 0.24816199480935652, "grad_norm": 0.8873743792187273, "learning_rate": 4.015137647605558e-06, "loss": 0.0366, "step": 59475 }, { "epoch": 0.2481828575243468, "grad_norm": 0.7032388432087844, "learning_rate": 4.014968881232865e-06, "loss": 0.0314, "step": 59480 }, { "epoch": 0.24820372023933707, "grad_norm": 0.7783977886581827, "learning_rate": 4.014800136139414e-06, "loss": 0.0287, "step": 59485 }, { "epoch": 0.24822458295432734, "grad_norm": 1.3772262978678496, "learning_rate": 4.014631412320731e-06, "loss": 0.0351, "step": 59490 }, { "epoch": 0.2482454456693176, "grad_norm": 1.0052805250006969, "learning_rate": 4.01446270977235e-06, "loss": 0.0322, "step": 59495 }, { "epoch": 0.2482663083843079, "grad_norm": 0.8492025000522919, "learning_rate": 4.0142940284898e-06, "loss": 0.0355, "step": 59500 }, { "epoch": 0.24828717109929818, "grad_norm": 1.4481216445561322, "learning_rate": 4.014125368468613e-06, "loss": 0.0379, "step": 59505 }, { "epoch": 0.24830803381428845, "grad_norm": 0.6819798956965192, "learning_rate": 4.013956729704325e-06, "loss": 0.0237, "step": 59510 }, { "epoch": 0.24832889652927873, "grad_norm": 0.9453615142241999, "learning_rate": 4.013788112192468e-06, "loss": 0.0366, "step": 59515 }, { "epoch": 0.24834975924426902, "grad_norm": 0.8911540480176348, "learning_rate": 4.0136195159285825e-06, "loss": 0.0374, "step": 59520 }, { "epoch": 0.2483706219592593, "grad_norm": 0.5697253448744324, "learning_rate": 4.013450940908203e-06, "loss": 0.0316, "step": 59525 }, { "epoch": 0.24839148467424957, "grad_norm": 1.309216840121676, "learning_rate": 4.013282387126871e-06, "loss": 0.0381, "step": 59530 }, { "epoch": 0.24841234738923984, "grad_norm": 0.789546376293278, "learning_rate": 4.013113854580127e-06, "loss": 0.0271, "step": 59535 }, { "epoch": 0.2484332101042301, "grad_norm": 0.7644651749221173, "learning_rate": 4.01294534326351e-06, "loss": 0.0298, "step": 59540 }, { "epoch": 0.2484540728192204, "grad_norm": 0.6761660602717672, "learning_rate": 4.012776853172566e-06, "loss": 0.0317, "step": 59545 }, { "epoch": 0.24847493553421068, "grad_norm": 0.5754301916242716, "learning_rate": 4.012608384302838e-06, "loss": 0.0308, "step": 59550 }, { "epoch": 0.24849579824920096, "grad_norm": 1.0349484654972567, "learning_rate": 4.0124399366498716e-06, "loss": 0.0302, "step": 59555 }, { "epoch": 0.24851666096419123, "grad_norm": 0.7161957005031216, "learning_rate": 4.0122715102092145e-06, "loss": 0.027, "step": 59560 }, { "epoch": 0.24853752367918153, "grad_norm": 0.8153413757875179, "learning_rate": 4.012103104976415e-06, "loss": 0.0286, "step": 59565 }, { "epoch": 0.2485583863941718, "grad_norm": 1.7038564325849435, "learning_rate": 4.011934720947022e-06, "loss": 0.0236, "step": 59570 }, { "epoch": 0.24857924910916207, "grad_norm": 0.7558037570587973, "learning_rate": 4.011766358116587e-06, "loss": 0.0269, "step": 59575 }, { "epoch": 0.24860011182415234, "grad_norm": 0.8947174235769165, "learning_rate": 4.011598016480663e-06, "loss": 0.0324, "step": 59580 }, { "epoch": 0.24862097453914261, "grad_norm": 0.936766792413184, "learning_rate": 4.0114296960348024e-06, "loss": 0.0408, "step": 59585 }, { "epoch": 0.2486418372541329, "grad_norm": 2.1305259241314882, "learning_rate": 4.011261396774559e-06, "loss": 0.0466, "step": 59590 }, { "epoch": 0.24866269996912319, "grad_norm": 0.7986109921719525, "learning_rate": 4.011093118695492e-06, "loss": 0.0354, "step": 59595 }, { "epoch": 0.24868356268411346, "grad_norm": 0.7651934433429303, "learning_rate": 4.010924861793157e-06, "loss": 0.0374, "step": 59600 }, { "epoch": 0.24870442539910373, "grad_norm": 1.1344789041099055, "learning_rate": 4.010756626063112e-06, "loss": 0.0377, "step": 59605 }, { "epoch": 0.24872528811409403, "grad_norm": 1.017869837786755, "learning_rate": 4.01058841150092e-06, "loss": 0.0435, "step": 59610 }, { "epoch": 0.2487461508290843, "grad_norm": 0.8812927444782308, "learning_rate": 4.010420218102139e-06, "loss": 0.0321, "step": 59615 }, { "epoch": 0.24876701354407457, "grad_norm": 0.46464750471419075, "learning_rate": 4.010252045862334e-06, "loss": 0.0441, "step": 59620 }, { "epoch": 0.24878787625906484, "grad_norm": 0.7264569980120067, "learning_rate": 4.010083894777067e-06, "loss": 0.0262, "step": 59625 }, { "epoch": 0.24880873897405512, "grad_norm": 0.6957976908705558, "learning_rate": 4.009915764841905e-06, "loss": 0.0245, "step": 59630 }, { "epoch": 0.24882960168904542, "grad_norm": 1.0168869601464583, "learning_rate": 4.009747656052414e-06, "loss": 0.0392, "step": 59635 }, { "epoch": 0.2488504644040357, "grad_norm": 0.6282715152185971, "learning_rate": 4.009579568404163e-06, "loss": 0.0245, "step": 59640 }, { "epoch": 0.24887132711902596, "grad_norm": 0.9992174215043235, "learning_rate": 4.009411501892719e-06, "loss": 0.0307, "step": 59645 }, { "epoch": 0.24889218983401623, "grad_norm": 0.33870518538474137, "learning_rate": 4.009243456513654e-06, "loss": 0.0227, "step": 59650 }, { "epoch": 0.24891305254900653, "grad_norm": 0.6584106128950951, "learning_rate": 4.009075432262539e-06, "loss": 0.0283, "step": 59655 }, { "epoch": 0.2489339152639968, "grad_norm": 1.193128683426284, "learning_rate": 4.008907429134947e-06, "loss": 0.0377, "step": 59660 }, { "epoch": 0.24895477797898707, "grad_norm": 0.8955489423991171, "learning_rate": 4.008739447126454e-06, "loss": 0.0252, "step": 59665 }, { "epoch": 0.24897564069397735, "grad_norm": 0.671273408801072, "learning_rate": 4.008571486232634e-06, "loss": 0.0285, "step": 59670 }, { "epoch": 0.24899650340896762, "grad_norm": 0.6540968404278417, "learning_rate": 4.008403546449065e-06, "loss": 0.0345, "step": 59675 }, { "epoch": 0.24901736612395792, "grad_norm": 1.066944319937288, "learning_rate": 4.008235627771325e-06, "loss": 0.03, "step": 59680 }, { "epoch": 0.2490382288389482, "grad_norm": 0.8938782512798294, "learning_rate": 4.008067730194993e-06, "loss": 0.0292, "step": 59685 }, { "epoch": 0.24905909155393846, "grad_norm": 0.9223910070947884, "learning_rate": 4.00789985371565e-06, "loss": 0.0374, "step": 59690 }, { "epoch": 0.24907995426892873, "grad_norm": 0.8707506320205318, "learning_rate": 4.007731998328879e-06, "loss": 0.0312, "step": 59695 }, { "epoch": 0.24910081698391903, "grad_norm": 0.9475289385781069, "learning_rate": 4.007564164030263e-06, "loss": 0.0439, "step": 59700 }, { "epoch": 0.2491216796989093, "grad_norm": 1.000790613350616, "learning_rate": 4.007396350815387e-06, "loss": 0.0382, "step": 59705 }, { "epoch": 0.24914254241389958, "grad_norm": 0.4526839119571765, "learning_rate": 4.007228558679836e-06, "loss": 0.0459, "step": 59710 }, { "epoch": 0.24916340512888985, "grad_norm": 0.5401711996099949, "learning_rate": 4.007060787619198e-06, "loss": 0.0227, "step": 59715 }, { "epoch": 0.24918426784388012, "grad_norm": 0.7541759613782268, "learning_rate": 4.006893037629062e-06, "loss": 0.023, "step": 59720 }, { "epoch": 0.24920513055887042, "grad_norm": 0.9755301064031675, "learning_rate": 4.0067253087050186e-06, "loss": 0.0284, "step": 59725 }, { "epoch": 0.2492259932738607, "grad_norm": 0.8092529838556854, "learning_rate": 4.006557600842657e-06, "loss": 0.025, "step": 59730 }, { "epoch": 0.24924685598885096, "grad_norm": 1.0373478499056317, "learning_rate": 4.006389914037571e-06, "loss": 0.0396, "step": 59735 }, { "epoch": 0.24926771870384123, "grad_norm": 0.7319347239876406, "learning_rate": 4.0062222482853545e-06, "loss": 0.0302, "step": 59740 }, { "epoch": 0.24928858141883153, "grad_norm": 1.182445620116978, "learning_rate": 4.006054603581602e-06, "loss": 0.037, "step": 59745 }, { "epoch": 0.2493094441338218, "grad_norm": 0.4076524590171625, "learning_rate": 4.00588697992191e-06, "loss": 0.0408, "step": 59750 }, { "epoch": 0.24933030684881208, "grad_norm": 0.7141078935657817, "learning_rate": 4.005719377301877e-06, "loss": 0.0414, "step": 59755 }, { "epoch": 0.24935116956380235, "grad_norm": 0.822130001565001, "learning_rate": 4.0055517957171015e-06, "loss": 0.0339, "step": 59760 }, { "epoch": 0.24937203227879262, "grad_norm": 0.6619309269326037, "learning_rate": 4.005384235163183e-06, "loss": 0.0259, "step": 59765 }, { "epoch": 0.24939289499378292, "grad_norm": 0.8183686609450478, "learning_rate": 4.005216695635724e-06, "loss": 0.0323, "step": 59770 }, { "epoch": 0.2494137577087732, "grad_norm": 0.8106070961038938, "learning_rate": 4.005049177130328e-06, "loss": 0.0325, "step": 59775 }, { "epoch": 0.24943462042376346, "grad_norm": 0.9746704405270313, "learning_rate": 4.004881679642596e-06, "loss": 0.0299, "step": 59780 }, { "epoch": 0.24945548313875374, "grad_norm": 0.8907598681780248, "learning_rate": 4.004714203168137e-06, "loss": 0.025, "step": 59785 }, { "epoch": 0.24947634585374404, "grad_norm": 0.7059479130295224, "learning_rate": 4.004546747702556e-06, "loss": 0.0305, "step": 59790 }, { "epoch": 0.2494972085687343, "grad_norm": 0.7191616221155352, "learning_rate": 4.004379313241461e-06, "loss": 0.0297, "step": 59795 }, { "epoch": 0.24951807128372458, "grad_norm": 0.8784719826458127, "learning_rate": 4.004211899780462e-06, "loss": 0.03, "step": 59800 }, { "epoch": 0.24953893399871485, "grad_norm": 1.2158366985388562, "learning_rate": 4.004044507315168e-06, "loss": 0.0392, "step": 59805 }, { "epoch": 0.24955979671370512, "grad_norm": 1.02344998232972, "learning_rate": 4.003877135841192e-06, "loss": 0.0247, "step": 59810 }, { "epoch": 0.24958065942869542, "grad_norm": 0.5698636138414865, "learning_rate": 4.003709785354148e-06, "loss": 0.031, "step": 59815 }, { "epoch": 0.2496015221436857, "grad_norm": 0.6794527677240658, "learning_rate": 4.003542455849649e-06, "loss": 0.0257, "step": 59820 }, { "epoch": 0.24962238485867597, "grad_norm": 0.7105820477951189, "learning_rate": 4.003375147323311e-06, "loss": 0.03, "step": 59825 }, { "epoch": 0.24964324757366624, "grad_norm": 0.9153427653593935, "learning_rate": 4.003207859770751e-06, "loss": 0.0346, "step": 59830 }, { "epoch": 0.24966411028865654, "grad_norm": 0.5653309709099014, "learning_rate": 4.003040593187587e-06, "loss": 0.0227, "step": 59835 }, { "epoch": 0.2496849730036468, "grad_norm": 1.0156934038166419, "learning_rate": 4.002873347569439e-06, "loss": 0.0295, "step": 59840 }, { "epoch": 0.24970583571863708, "grad_norm": 1.202081003014146, "learning_rate": 4.0027061229119265e-06, "loss": 0.0455, "step": 59845 }, { "epoch": 0.24972669843362735, "grad_norm": 1.065185746223694, "learning_rate": 4.002538919210675e-06, "loss": 0.041, "step": 59850 }, { "epoch": 0.24974756114861763, "grad_norm": 0.8236150239090982, "learning_rate": 4.002371736461304e-06, "loss": 0.0345, "step": 59855 }, { "epoch": 0.24976842386360792, "grad_norm": 0.9594344559128316, "learning_rate": 4.002204574659441e-06, "loss": 0.0383, "step": 59860 }, { "epoch": 0.2497892865785982, "grad_norm": 0.7500202930870217, "learning_rate": 4.002037433800709e-06, "loss": 0.0256, "step": 59865 }, { "epoch": 0.24981014929358847, "grad_norm": 2.2079619404247848, "learning_rate": 4.001870313880738e-06, "loss": 0.0336, "step": 59870 }, { "epoch": 0.24983101200857874, "grad_norm": 0.8652459191300768, "learning_rate": 4.001703214895155e-06, "loss": 0.0293, "step": 59875 }, { "epoch": 0.24985187472356904, "grad_norm": 0.5680911587225881, "learning_rate": 4.00153613683959e-06, "loss": 0.0247, "step": 59880 }, { "epoch": 0.2498727374385593, "grad_norm": 1.4986198069974013, "learning_rate": 4.001369079709674e-06, "loss": 0.0346, "step": 59885 }, { "epoch": 0.24989360015354958, "grad_norm": 0.7653356516395281, "learning_rate": 4.00120204350104e-06, "loss": 0.032, "step": 59890 }, { "epoch": 0.24991446286853986, "grad_norm": 0.8983863258649635, "learning_rate": 4.00103502820932e-06, "loss": 0.0347, "step": 59895 }, { "epoch": 0.24993532558353013, "grad_norm": 0.7238745703390881, "learning_rate": 4.000868033830152e-06, "loss": 0.026, "step": 59900 }, { "epoch": 0.24995618829852043, "grad_norm": 1.315442429150368, "learning_rate": 4.000701060359169e-06, "loss": 0.0363, "step": 59905 }, { "epoch": 0.2499770510135107, "grad_norm": 1.0982632809190094, "learning_rate": 4.00053410779201e-06, "loss": 0.0279, "step": 59910 }, { "epoch": 0.24999791372850097, "grad_norm": 0.26700097070338946, "learning_rate": 4.000367176124312e-06, "loss": 0.0244, "step": 59915 }, { "epoch": 0.25001877644349124, "grad_norm": 0.7145634121411593, "learning_rate": 4.000200265351717e-06, "loss": 0.0305, "step": 59920 }, { "epoch": 0.25003963915848154, "grad_norm": 0.7343406853105238, "learning_rate": 4.000033375469865e-06, "loss": 0.0302, "step": 59925 }, { "epoch": 0.2500605018734718, "grad_norm": 0.4089471767718381, "learning_rate": 3.999866506474399e-06, "loss": 0.0381, "step": 59930 }, { "epoch": 0.2500813645884621, "grad_norm": 0.8586045538332056, "learning_rate": 3.9996996583609625e-06, "loss": 0.0315, "step": 59935 }, { "epoch": 0.2501022273034524, "grad_norm": 0.44583923955410354, "learning_rate": 3.999532831125202e-06, "loss": 0.0187, "step": 59940 }, { "epoch": 0.25012309001844263, "grad_norm": 0.745237564975963, "learning_rate": 3.999366024762761e-06, "loss": 0.0386, "step": 59945 }, { "epoch": 0.25014395273343293, "grad_norm": 1.4170452609419053, "learning_rate": 3.9991992392692905e-06, "loss": 0.0323, "step": 59950 }, { "epoch": 0.2501648154484232, "grad_norm": 0.9863159841934648, "learning_rate": 3.9990324746404365e-06, "loss": 0.0336, "step": 59955 }, { "epoch": 0.25018567816341347, "grad_norm": 1.1886835973296994, "learning_rate": 3.99886573087185e-06, "loss": 0.0402, "step": 59960 }, { "epoch": 0.25020654087840377, "grad_norm": 0.6509913203022255, "learning_rate": 3.998699007959183e-06, "loss": 0.0293, "step": 59965 }, { "epoch": 0.250227403593394, "grad_norm": 1.3681767637863245, "learning_rate": 3.9985323058980875e-06, "loss": 0.0417, "step": 59970 }, { "epoch": 0.2502482663083843, "grad_norm": 0.6229278123480896, "learning_rate": 3.998365624684218e-06, "loss": 0.0306, "step": 59975 }, { "epoch": 0.25026912902337456, "grad_norm": 1.0006442491424363, "learning_rate": 3.99819896431323e-06, "loss": 0.0313, "step": 59980 }, { "epoch": 0.25028999173836486, "grad_norm": 0.5895787608980405, "learning_rate": 3.998032324780779e-06, "loss": 0.0339, "step": 59985 }, { "epoch": 0.25031085445335516, "grad_norm": 0.667979042744934, "learning_rate": 3.997865706082523e-06, "loss": 0.0245, "step": 59990 }, { "epoch": 0.2503317171683454, "grad_norm": 0.5702929293692822, "learning_rate": 3.997699108214122e-06, "loss": 0.0214, "step": 59995 }, { "epoch": 0.2503525798833357, "grad_norm": 0.3869623130461876, "learning_rate": 3.997532531171235e-06, "loss": 0.0223, "step": 60000 }, { "epoch": 0.250373442598326, "grad_norm": 1.8234833809588122, "learning_rate": 3.997365974949524e-06, "loss": 0.0251, "step": 60005 }, { "epoch": 0.25039430531331625, "grad_norm": 0.7279516606537971, "learning_rate": 3.997199439544652e-06, "loss": 0.0253, "step": 60010 }, { "epoch": 0.25041516802830655, "grad_norm": 0.5098622137291251, "learning_rate": 3.997032924952282e-06, "loss": 0.0298, "step": 60015 }, { "epoch": 0.2504360307432968, "grad_norm": 1.3265823573148585, "learning_rate": 3.9968664311680815e-06, "loss": 0.0247, "step": 60020 }, { "epoch": 0.2504568934582871, "grad_norm": 0.7645626847673458, "learning_rate": 3.9966999581877155e-06, "loss": 0.0332, "step": 60025 }, { "epoch": 0.2504777561732774, "grad_norm": 0.46902836914425877, "learning_rate": 3.996533506006853e-06, "loss": 0.0367, "step": 60030 }, { "epoch": 0.25049861888826763, "grad_norm": 1.9005054133749997, "learning_rate": 3.996367074621162e-06, "loss": 0.0395, "step": 60035 }, { "epoch": 0.25051948160325793, "grad_norm": 0.9919348636690353, "learning_rate": 3.996200664026313e-06, "loss": 0.0312, "step": 60040 }, { "epoch": 0.2505403443182482, "grad_norm": 0.8197376191583093, "learning_rate": 3.996034274217979e-06, "loss": 0.0365, "step": 60045 }, { "epoch": 0.2505612070332385, "grad_norm": 4.939002533140685, "learning_rate": 3.99586790519183e-06, "loss": 0.0264, "step": 60050 }, { "epoch": 0.2505820697482288, "grad_norm": 1.3969324643776613, "learning_rate": 3.995701556943543e-06, "loss": 0.0305, "step": 60055 }, { "epoch": 0.250602932463219, "grad_norm": 1.1889432926932144, "learning_rate": 3.995535229468793e-06, "loss": 0.0395, "step": 60060 }, { "epoch": 0.2506237951782093, "grad_norm": 0.9464151213455647, "learning_rate": 3.995368922763256e-06, "loss": 0.0347, "step": 60065 }, { "epoch": 0.25064465789319956, "grad_norm": 0.7908342029909078, "learning_rate": 3.9952026368226106e-06, "loss": 0.0241, "step": 60070 }, { "epoch": 0.25066552060818986, "grad_norm": 0.6730416100789282, "learning_rate": 3.995036371642534e-06, "loss": 0.0342, "step": 60075 }, { "epoch": 0.25068638332318016, "grad_norm": 0.5759108686302319, "learning_rate": 3.99487012721871e-06, "loss": 0.024, "step": 60080 }, { "epoch": 0.2507072460381704, "grad_norm": 0.8991953299844175, "learning_rate": 3.9947039035468186e-06, "loss": 0.0311, "step": 60085 }, { "epoch": 0.2507281087531607, "grad_norm": 0.7251310789972465, "learning_rate": 3.994537700622541e-06, "loss": 0.0265, "step": 60090 }, { "epoch": 0.250748971468151, "grad_norm": 0.4810002379502796, "learning_rate": 3.994371518441565e-06, "loss": 0.0249, "step": 60095 }, { "epoch": 0.25076983418314125, "grad_norm": 0.7103390710642346, "learning_rate": 3.9942053569995744e-06, "loss": 0.0329, "step": 60100 }, { "epoch": 0.25079069689813155, "grad_norm": 0.9642284940112994, "learning_rate": 3.994039216292255e-06, "loss": 0.0358, "step": 60105 }, { "epoch": 0.2508115596131218, "grad_norm": 0.6060358598787988, "learning_rate": 3.993873096315296e-06, "loss": 0.0322, "step": 60110 }, { "epoch": 0.2508324223281121, "grad_norm": 0.7906076548003259, "learning_rate": 3.993706997064387e-06, "loss": 0.0294, "step": 60115 }, { "epoch": 0.2508532850431024, "grad_norm": 0.6613273282972966, "learning_rate": 3.993540918535217e-06, "loss": 0.0377, "step": 60120 }, { "epoch": 0.25087414775809264, "grad_norm": 1.1339987898001387, "learning_rate": 3.993374860723479e-06, "loss": 0.0323, "step": 60125 }, { "epoch": 0.25089501047308294, "grad_norm": 0.8495960750279745, "learning_rate": 3.993208823624867e-06, "loss": 0.025, "step": 60130 }, { "epoch": 0.2509158731880732, "grad_norm": 0.8050293513279302, "learning_rate": 3.9930428072350725e-06, "loss": 0.0281, "step": 60135 }, { "epoch": 0.2509367359030635, "grad_norm": 0.9369721484493451, "learning_rate": 3.992876811549793e-06, "loss": 0.0358, "step": 60140 }, { "epoch": 0.2509575986180538, "grad_norm": 0.7611635867085951, "learning_rate": 3.992710836564725e-06, "loss": 0.0387, "step": 60145 }, { "epoch": 0.250978461333044, "grad_norm": 1.1301401157517585, "learning_rate": 3.992544882275567e-06, "loss": 0.0318, "step": 60150 }, { "epoch": 0.2509993240480343, "grad_norm": 0.7047933274029894, "learning_rate": 3.992378948678018e-06, "loss": 0.0264, "step": 60155 }, { "epoch": 0.25102018676302457, "grad_norm": 1.0498973901407607, "learning_rate": 3.992213035767777e-06, "loss": 0.0365, "step": 60160 }, { "epoch": 0.25104104947801487, "grad_norm": 1.1392332083007624, "learning_rate": 3.992047143540548e-06, "loss": 0.0281, "step": 60165 }, { "epoch": 0.25106191219300517, "grad_norm": 1.4405021826313023, "learning_rate": 3.991881271992033e-06, "loss": 0.0301, "step": 60170 }, { "epoch": 0.2510827749079954, "grad_norm": 1.2319883433745924, "learning_rate": 3.991715421117937e-06, "loss": 0.0298, "step": 60175 }, { "epoch": 0.2511036376229857, "grad_norm": 0.8158626224359524, "learning_rate": 3.991549590913964e-06, "loss": 0.0315, "step": 60180 }, { "epoch": 0.251124500337976, "grad_norm": 0.862939203720767, "learning_rate": 3.9913837813758225e-06, "loss": 0.0339, "step": 60185 }, { "epoch": 0.25114536305296625, "grad_norm": 1.2848257842210853, "learning_rate": 3.99121799249922e-06, "loss": 0.0374, "step": 60190 }, { "epoch": 0.25116622576795655, "grad_norm": 1.3986168808606738, "learning_rate": 3.991052224279866e-06, "loss": 0.0359, "step": 60195 }, { "epoch": 0.2511870884829468, "grad_norm": 1.370758174015536, "learning_rate": 3.99088647671347e-06, "loss": 0.0412, "step": 60200 }, { "epoch": 0.2512079511979371, "grad_norm": 0.8522359639508481, "learning_rate": 3.990720749795745e-06, "loss": 0.033, "step": 60205 }, { "epoch": 0.2512288139129274, "grad_norm": 1.3009299415353348, "learning_rate": 3.990555043522403e-06, "loss": 0.0287, "step": 60210 }, { "epoch": 0.25124967662791764, "grad_norm": 0.7750637748965445, "learning_rate": 3.9903893578891595e-06, "loss": 0.0322, "step": 60215 }, { "epoch": 0.25127053934290794, "grad_norm": 0.7233061297014775, "learning_rate": 3.990223692891729e-06, "loss": 0.0304, "step": 60220 }, { "epoch": 0.2512914020578982, "grad_norm": 0.9130121586644369, "learning_rate": 3.990058048525828e-06, "loss": 0.0314, "step": 60225 }, { "epoch": 0.2513122647728885, "grad_norm": 0.5398422634621386, "learning_rate": 3.989892424787176e-06, "loss": 0.0197, "step": 60230 }, { "epoch": 0.2513331274878788, "grad_norm": 1.401356074312604, "learning_rate": 3.989726821671491e-06, "loss": 0.0388, "step": 60235 }, { "epoch": 0.251353990202869, "grad_norm": 0.9906488868792888, "learning_rate": 3.989561239174494e-06, "loss": 0.0278, "step": 60240 }, { "epoch": 0.2513748529178593, "grad_norm": 0.9459876933445628, "learning_rate": 3.989395677291908e-06, "loss": 0.0379, "step": 60245 }, { "epoch": 0.25139571563284957, "grad_norm": 1.0147380522183653, "learning_rate": 3.989230136019452e-06, "loss": 0.0344, "step": 60250 }, { "epoch": 0.25141657834783987, "grad_norm": 0.9944725409932598, "learning_rate": 3.989064615352855e-06, "loss": 0.0344, "step": 60255 }, { "epoch": 0.25143744106283017, "grad_norm": 1.1235581965447419, "learning_rate": 3.988899115287839e-06, "loss": 0.0232, "step": 60260 }, { "epoch": 0.2514583037778204, "grad_norm": 0.8094783277001927, "learning_rate": 3.9887336358201335e-06, "loss": 0.0319, "step": 60265 }, { "epoch": 0.2514791664928107, "grad_norm": 1.357031219578637, "learning_rate": 3.988568176945464e-06, "loss": 0.0455, "step": 60270 }, { "epoch": 0.251500029207801, "grad_norm": 0.5706156938662142, "learning_rate": 3.988402738659561e-06, "loss": 0.0249, "step": 60275 }, { "epoch": 0.25152089192279126, "grad_norm": 0.7430488974757182, "learning_rate": 3.988237320958155e-06, "loss": 0.0263, "step": 60280 }, { "epoch": 0.25154175463778156, "grad_norm": 1.0844170472518617, "learning_rate": 3.988071923836978e-06, "loss": 0.0307, "step": 60285 }, { "epoch": 0.2515626173527718, "grad_norm": 0.598226544507207, "learning_rate": 3.987906547291761e-06, "loss": 0.0299, "step": 60290 }, { "epoch": 0.2515834800677621, "grad_norm": 0.7573101201462933, "learning_rate": 3.98774119131824e-06, "loss": 0.0351, "step": 60295 }, { "epoch": 0.2516043427827524, "grad_norm": 1.1474383281529228, "learning_rate": 3.987575855912149e-06, "loss": 0.0349, "step": 60300 }, { "epoch": 0.25162520549774264, "grad_norm": 1.1322467304616388, "learning_rate": 3.9874105410692265e-06, "loss": 0.0395, "step": 60305 }, { "epoch": 0.25164606821273294, "grad_norm": 0.5832983913627301, "learning_rate": 3.9872452467852094e-06, "loss": 0.0264, "step": 60310 }, { "epoch": 0.2516669309277232, "grad_norm": 0.7872821152449256, "learning_rate": 3.987079973055836e-06, "loss": 0.0295, "step": 60315 }, { "epoch": 0.2516877936427135, "grad_norm": 0.787871264398326, "learning_rate": 3.986914719876848e-06, "loss": 0.0321, "step": 60320 }, { "epoch": 0.2517086563577038, "grad_norm": 0.7577704978666941, "learning_rate": 3.986749487243986e-06, "loss": 0.033, "step": 60325 }, { "epoch": 0.25172951907269403, "grad_norm": 0.6624581143371402, "learning_rate": 3.9865842751529935e-06, "loss": 0.0254, "step": 60330 }, { "epoch": 0.25175038178768433, "grad_norm": 0.6128981037869652, "learning_rate": 3.986419083599613e-06, "loss": 0.0359, "step": 60335 }, { "epoch": 0.2517712445026746, "grad_norm": 0.6869994531523989, "learning_rate": 3.986253912579592e-06, "loss": 0.0292, "step": 60340 }, { "epoch": 0.2517921072176649, "grad_norm": 1.3129037556391734, "learning_rate": 3.986088762088677e-06, "loss": 0.0264, "step": 60345 }, { "epoch": 0.2518129699326552, "grad_norm": 0.634397217545173, "learning_rate": 3.985923632122613e-06, "loss": 0.0288, "step": 60350 }, { "epoch": 0.2518338326476454, "grad_norm": 0.7767081913446936, "learning_rate": 3.985758522677151e-06, "loss": 0.0352, "step": 60355 }, { "epoch": 0.2518546953626357, "grad_norm": 1.1691752811749128, "learning_rate": 3.985593433748042e-06, "loss": 0.0354, "step": 60360 }, { "epoch": 0.251875558077626, "grad_norm": 0.8568833044132013, "learning_rate": 3.985428365331036e-06, "loss": 0.0257, "step": 60365 }, { "epoch": 0.25189642079261626, "grad_norm": 1.0516362299006956, "learning_rate": 3.9852633174218855e-06, "loss": 0.0356, "step": 60370 }, { "epoch": 0.25191728350760656, "grad_norm": 0.7465781986917516, "learning_rate": 3.985098290016346e-06, "loss": 0.0301, "step": 60375 }, { "epoch": 0.2519381462225968, "grad_norm": 0.7722833391465084, "learning_rate": 3.9849332831101705e-06, "loss": 0.0333, "step": 60380 }, { "epoch": 0.2519590089375871, "grad_norm": 1.012167766653144, "learning_rate": 3.984768296699117e-06, "loss": 0.0445, "step": 60385 }, { "epoch": 0.2519798716525774, "grad_norm": 0.7457012535039848, "learning_rate": 3.9846033307789425e-06, "loss": 0.031, "step": 60390 }, { "epoch": 0.25200073436756765, "grad_norm": 1.313073793236645, "learning_rate": 3.984438385345406e-06, "loss": 0.0386, "step": 60395 }, { "epoch": 0.25202159708255795, "grad_norm": 0.6659324745647424, "learning_rate": 3.9842734603942676e-06, "loss": 0.0335, "step": 60400 }, { "epoch": 0.2520424597975482, "grad_norm": 0.6634548466602704, "learning_rate": 3.984108555921288e-06, "loss": 0.0287, "step": 60405 }, { "epoch": 0.2520633225125385, "grad_norm": 0.8089536731541672, "learning_rate": 3.983943671922231e-06, "loss": 0.0312, "step": 60410 }, { "epoch": 0.2520841852275288, "grad_norm": 0.7487176411030692, "learning_rate": 3.983778808392858e-06, "loss": 0.0352, "step": 60415 }, { "epoch": 0.25210504794251903, "grad_norm": 0.42654180562556965, "learning_rate": 3.983613965328937e-06, "loss": 0.029, "step": 60420 }, { "epoch": 0.25212591065750933, "grad_norm": 0.572501522340735, "learning_rate": 3.983449142726232e-06, "loss": 0.0303, "step": 60425 }, { "epoch": 0.2521467733724996, "grad_norm": 0.9980542178203466, "learning_rate": 3.9832843405805115e-06, "loss": 0.025, "step": 60430 }, { "epoch": 0.2521676360874899, "grad_norm": 0.6436285578324576, "learning_rate": 3.983119558887543e-06, "loss": 0.0324, "step": 60435 }, { "epoch": 0.2521884988024802, "grad_norm": 0.6863175135572629, "learning_rate": 3.982954797643098e-06, "loss": 0.0274, "step": 60440 }, { "epoch": 0.2522093615174704, "grad_norm": 0.6364733684718842, "learning_rate": 3.982790056842947e-06, "loss": 0.0229, "step": 60445 }, { "epoch": 0.2522302242324607, "grad_norm": 0.6017032614769678, "learning_rate": 3.982625336482861e-06, "loss": 0.0329, "step": 60450 }, { "epoch": 0.252251086947451, "grad_norm": 0.7956739594260434, "learning_rate": 3.982460636558615e-06, "loss": 0.025, "step": 60455 }, { "epoch": 0.25227194966244126, "grad_norm": 0.6787491604216558, "learning_rate": 3.982295957065983e-06, "loss": 0.0328, "step": 60460 }, { "epoch": 0.25229281237743156, "grad_norm": 1.0258331605916495, "learning_rate": 3.982131298000741e-06, "loss": 0.0248, "step": 60465 }, { "epoch": 0.2523136750924218, "grad_norm": 0.8596742557333891, "learning_rate": 3.981966659358667e-06, "loss": 0.0335, "step": 60470 }, { "epoch": 0.2523345378074121, "grad_norm": 0.7685446818241671, "learning_rate": 3.98180204113554e-06, "loss": 0.0328, "step": 60475 }, { "epoch": 0.2523554005224024, "grad_norm": 1.2646371897941227, "learning_rate": 3.981637443327137e-06, "loss": 0.0395, "step": 60480 }, { "epoch": 0.25237626323739265, "grad_norm": 0.637485393368199, "learning_rate": 3.981472865929243e-06, "loss": 0.0272, "step": 60485 }, { "epoch": 0.25239712595238295, "grad_norm": 0.8522545680950093, "learning_rate": 3.981308308937635e-06, "loss": 0.0347, "step": 60490 }, { "epoch": 0.2524179886673732, "grad_norm": 0.4557603094311869, "learning_rate": 3.9811437723481e-06, "loss": 0.0338, "step": 60495 }, { "epoch": 0.2524388513823635, "grad_norm": 0.95614600643419, "learning_rate": 3.980979256156422e-06, "loss": 0.031, "step": 60500 }, { "epoch": 0.2524597140973538, "grad_norm": 0.7408313414542148, "learning_rate": 3.980814760358385e-06, "loss": 0.0323, "step": 60505 }, { "epoch": 0.25248057681234404, "grad_norm": 1.187414577910668, "learning_rate": 3.980650284949778e-06, "loss": 0.0364, "step": 60510 }, { "epoch": 0.25250143952733434, "grad_norm": 0.9616639170128347, "learning_rate": 3.9804858299263885e-06, "loss": 0.0299, "step": 60515 }, { "epoch": 0.2525223022423246, "grad_norm": 0.8573873682489154, "learning_rate": 3.9803213952840055e-06, "loss": 0.0342, "step": 60520 }, { "epoch": 0.2525431649573149, "grad_norm": 1.2004458376425498, "learning_rate": 3.9801569810184206e-06, "loss": 0.0317, "step": 60525 }, { "epoch": 0.2525640276723052, "grad_norm": 0.7583387131066816, "learning_rate": 3.979992587125425e-06, "loss": 0.0233, "step": 60530 }, { "epoch": 0.2525848903872954, "grad_norm": 1.1522841748608301, "learning_rate": 3.979828213600811e-06, "loss": 0.0337, "step": 60535 }, { "epoch": 0.2526057531022857, "grad_norm": 0.9366739436113395, "learning_rate": 3.979663860440374e-06, "loss": 0.032, "step": 60540 }, { "epoch": 0.252626615817276, "grad_norm": 0.6847338984792349, "learning_rate": 3.979499527639909e-06, "loss": 0.0326, "step": 60545 }, { "epoch": 0.25264747853226627, "grad_norm": 0.6660012986182056, "learning_rate": 3.979335215195213e-06, "loss": 0.025, "step": 60550 }, { "epoch": 0.25266834124725657, "grad_norm": 0.5565141815154289, "learning_rate": 3.979170923102084e-06, "loss": 0.0345, "step": 60555 }, { "epoch": 0.2526892039622468, "grad_norm": 1.0761004129448772, "learning_rate": 3.979006651356321e-06, "loss": 0.0319, "step": 60560 }, { "epoch": 0.2527100666772371, "grad_norm": 0.7605382861432037, "learning_rate": 3.978842399953724e-06, "loss": 0.026, "step": 60565 }, { "epoch": 0.2527309293922274, "grad_norm": 0.7965353285117892, "learning_rate": 3.9786781688900945e-06, "loss": 0.026, "step": 60570 }, { "epoch": 0.25275179210721765, "grad_norm": 0.4341229199277931, "learning_rate": 3.978513958161236e-06, "loss": 0.034, "step": 60575 }, { "epoch": 0.25277265482220795, "grad_norm": 0.9372367396418346, "learning_rate": 3.9783497677629525e-06, "loss": 0.0304, "step": 60580 }, { "epoch": 0.2527935175371982, "grad_norm": 0.821133515013601, "learning_rate": 3.978185597691048e-06, "loss": 0.0346, "step": 60585 }, { "epoch": 0.2528143802521885, "grad_norm": 0.822540385268559, "learning_rate": 3.978021447941331e-06, "loss": 0.0366, "step": 60590 }, { "epoch": 0.2528352429671788, "grad_norm": 0.603452177393705, "learning_rate": 3.977857318509606e-06, "loss": 0.028, "step": 60595 }, { "epoch": 0.25285610568216904, "grad_norm": 0.5244256205027754, "learning_rate": 3.977693209391686e-06, "loss": 0.0231, "step": 60600 }, { "epoch": 0.25287696839715934, "grad_norm": 0.7349878976119472, "learning_rate": 3.977529120583378e-06, "loss": 0.0377, "step": 60605 }, { "epoch": 0.2528978311121496, "grad_norm": 0.5062646926135046, "learning_rate": 3.977365052080493e-06, "loss": 0.031, "step": 60610 }, { "epoch": 0.2529186938271399, "grad_norm": 0.7292926783743768, "learning_rate": 3.977201003878846e-06, "loss": 0.0264, "step": 60615 }, { "epoch": 0.2529395565421302, "grad_norm": 0.7636366996016197, "learning_rate": 3.977036975974248e-06, "loss": 0.0313, "step": 60620 }, { "epoch": 0.25296041925712043, "grad_norm": 0.5592066083272973, "learning_rate": 3.976872968362514e-06, "loss": 0.0229, "step": 60625 }, { "epoch": 0.2529812819721107, "grad_norm": 0.6714635053708657, "learning_rate": 3.976708981039463e-06, "loss": 0.0311, "step": 60630 }, { "epoch": 0.253002144687101, "grad_norm": 0.6916884034377215, "learning_rate": 3.976545014000911e-06, "loss": 0.0369, "step": 60635 }, { "epoch": 0.25302300740209127, "grad_norm": 1.327662831679654, "learning_rate": 3.976381067242675e-06, "loss": 0.0395, "step": 60640 }, { "epoch": 0.25304387011708157, "grad_norm": 0.7662012457613334, "learning_rate": 3.9762171407605755e-06, "loss": 0.0357, "step": 60645 }, { "epoch": 0.2530647328320718, "grad_norm": 0.6071870791093964, "learning_rate": 3.976053234550434e-06, "loss": 0.0262, "step": 60650 }, { "epoch": 0.2530855955470621, "grad_norm": 1.0274005551842464, "learning_rate": 3.975889348608072e-06, "loss": 0.0317, "step": 60655 }, { "epoch": 0.2531064582620524, "grad_norm": 0.7622010157557818, "learning_rate": 3.975725482929313e-06, "loss": 0.0349, "step": 60660 }, { "epoch": 0.25312732097704266, "grad_norm": 0.9966431756571926, "learning_rate": 3.975561637509982e-06, "loss": 0.0385, "step": 60665 }, { "epoch": 0.25314818369203296, "grad_norm": 0.5807778785503374, "learning_rate": 3.975397812345903e-06, "loss": 0.034, "step": 60670 }, { "epoch": 0.2531690464070232, "grad_norm": 1.0148940127571182, "learning_rate": 3.975234007432906e-06, "loss": 0.042, "step": 60675 }, { "epoch": 0.2531899091220135, "grad_norm": 0.9645891724109862, "learning_rate": 3.975070222766816e-06, "loss": 0.0403, "step": 60680 }, { "epoch": 0.2532107718370038, "grad_norm": 4.300046195178967, "learning_rate": 3.974906458343464e-06, "loss": 0.0345, "step": 60685 }, { "epoch": 0.25323163455199404, "grad_norm": 1.279583121721665, "learning_rate": 3.974742714158681e-06, "loss": 0.0415, "step": 60690 }, { "epoch": 0.25325249726698434, "grad_norm": 1.2257374974692192, "learning_rate": 3.974578990208298e-06, "loss": 0.0412, "step": 60695 }, { "epoch": 0.2532733599819746, "grad_norm": 1.2798130024531689, "learning_rate": 3.974415286488148e-06, "loss": 0.0408, "step": 60700 }, { "epoch": 0.2532942226969649, "grad_norm": 0.7700951478893057, "learning_rate": 3.974251602994065e-06, "loss": 0.0259, "step": 60705 }, { "epoch": 0.2533150854119552, "grad_norm": 1.0991874854792518, "learning_rate": 3.974087939721885e-06, "loss": 0.0246, "step": 60710 }, { "epoch": 0.25333594812694543, "grad_norm": 0.3578958806748102, "learning_rate": 3.973924296667444e-06, "loss": 0.0242, "step": 60715 }, { "epoch": 0.25335681084193573, "grad_norm": 1.121081175721543, "learning_rate": 3.9737606738265796e-06, "loss": 0.035, "step": 60720 }, { "epoch": 0.25337767355692603, "grad_norm": 0.7372346041296491, "learning_rate": 3.973597071195132e-06, "loss": 0.0627, "step": 60725 }, { "epoch": 0.2533985362719163, "grad_norm": 0.7860755226517496, "learning_rate": 3.973433488768939e-06, "loss": 0.0269, "step": 60730 }, { "epoch": 0.2534193989869066, "grad_norm": 0.6720389552480146, "learning_rate": 3.973269926543844e-06, "loss": 0.0317, "step": 60735 }, { "epoch": 0.2534402617018968, "grad_norm": 1.3611798623421856, "learning_rate": 3.97310638451569e-06, "loss": 0.0299, "step": 60740 }, { "epoch": 0.2534611244168871, "grad_norm": 0.5947933147767199, "learning_rate": 3.972942862680319e-06, "loss": 0.049, "step": 60745 }, { "epoch": 0.2534819871318774, "grad_norm": 1.0185971094332262, "learning_rate": 3.972779361033577e-06, "loss": 0.0302, "step": 60750 }, { "epoch": 0.25350284984686766, "grad_norm": 0.720901016870622, "learning_rate": 3.972615879571308e-06, "loss": 0.0361, "step": 60755 }, { "epoch": 0.25352371256185796, "grad_norm": 0.6836907124455953, "learning_rate": 3.972452418289362e-06, "loss": 0.0349, "step": 60760 }, { "epoch": 0.2535445752768482, "grad_norm": 0.599284297612538, "learning_rate": 3.972288977183588e-06, "loss": 0.0357, "step": 60765 }, { "epoch": 0.2535654379918385, "grad_norm": 0.9639405591753208, "learning_rate": 3.972125556249833e-06, "loss": 0.0295, "step": 60770 }, { "epoch": 0.2535863007068288, "grad_norm": 0.5776433402302048, "learning_rate": 3.97196215548395e-06, "loss": 0.0273, "step": 60775 }, { "epoch": 0.25360716342181905, "grad_norm": 0.6419797519739187, "learning_rate": 3.97179877488179e-06, "loss": 0.0295, "step": 60780 }, { "epoch": 0.25362802613680935, "grad_norm": 0.8042780809567576, "learning_rate": 3.971635414439208e-06, "loss": 0.0232, "step": 60785 }, { "epoch": 0.2536488888517996, "grad_norm": 1.4002598574186738, "learning_rate": 3.971472074152056e-06, "loss": 0.0457, "step": 60790 }, { "epoch": 0.2536697515667899, "grad_norm": 0.962546672784401, "learning_rate": 3.971308754016191e-06, "loss": 0.0363, "step": 60795 }, { "epoch": 0.2536906142817802, "grad_norm": 0.6550934727706114, "learning_rate": 3.971145454027471e-06, "loss": 0.0363, "step": 60800 }, { "epoch": 0.25371147699677044, "grad_norm": 0.612559129193012, "learning_rate": 3.970982174181753e-06, "loss": 0.0329, "step": 60805 }, { "epoch": 0.25373233971176073, "grad_norm": 0.9022754089897592, "learning_rate": 3.970818914474895e-06, "loss": 0.0296, "step": 60810 }, { "epoch": 0.25375320242675103, "grad_norm": 1.1093883890718794, "learning_rate": 3.970655674902759e-06, "loss": 0.0342, "step": 60815 }, { "epoch": 0.2537740651417413, "grad_norm": 1.0361638320614266, "learning_rate": 3.9704924554612075e-06, "loss": 0.0389, "step": 60820 }, { "epoch": 0.2537949278567316, "grad_norm": 1.1772717342699353, "learning_rate": 3.970329256146102e-06, "loss": 0.0303, "step": 60825 }, { "epoch": 0.2538157905717218, "grad_norm": 0.5420836589183241, "learning_rate": 3.970166076953306e-06, "loss": 0.0302, "step": 60830 }, { "epoch": 0.2538366532867121, "grad_norm": 0.7684245528036018, "learning_rate": 3.970002917878686e-06, "loss": 0.0338, "step": 60835 }, { "epoch": 0.2538575160017024, "grad_norm": 0.6123612792831326, "learning_rate": 3.969839778918109e-06, "loss": 0.0449, "step": 60840 }, { "epoch": 0.25387837871669267, "grad_norm": 0.46991124771920284, "learning_rate": 3.9696766600674415e-06, "loss": 0.0271, "step": 60845 }, { "epoch": 0.25389924143168296, "grad_norm": 0.5570963591130411, "learning_rate": 3.969513561322551e-06, "loss": 0.0328, "step": 60850 }, { "epoch": 0.2539201041466732, "grad_norm": 0.46647683806702556, "learning_rate": 3.96935048267931e-06, "loss": 0.0258, "step": 60855 }, { "epoch": 0.2539409668616635, "grad_norm": 0.6084849224540709, "learning_rate": 3.969187424133589e-06, "loss": 0.0275, "step": 60860 }, { "epoch": 0.2539618295766538, "grad_norm": 1.110028668371793, "learning_rate": 3.969024385681259e-06, "loss": 0.0279, "step": 60865 }, { "epoch": 0.25398269229164405, "grad_norm": 0.5210919254075865, "learning_rate": 3.968861367318195e-06, "loss": 0.0222, "step": 60870 }, { "epoch": 0.25400355500663435, "grad_norm": 0.9912307367869797, "learning_rate": 3.9686983690402715e-06, "loss": 0.0341, "step": 60875 }, { "epoch": 0.2540244177216246, "grad_norm": 0.771081223856608, "learning_rate": 3.968535390843364e-06, "loss": 0.0347, "step": 60880 }, { "epoch": 0.2540452804366149, "grad_norm": 0.6771472006685815, "learning_rate": 3.96837243272335e-06, "loss": 0.0272, "step": 60885 }, { "epoch": 0.2540661431516052, "grad_norm": 1.0992730927230303, "learning_rate": 3.968209494676107e-06, "loss": 0.0335, "step": 60890 }, { "epoch": 0.25408700586659544, "grad_norm": 0.8760036816480026, "learning_rate": 3.968046576697515e-06, "loss": 0.0355, "step": 60895 }, { "epoch": 0.25410786858158574, "grad_norm": 0.8878863380279897, "learning_rate": 3.967883678783456e-06, "loss": 0.0305, "step": 60900 }, { "epoch": 0.25412873129657604, "grad_norm": 0.5291580409885894, "learning_rate": 3.96772080092981e-06, "loss": 0.0303, "step": 60905 }, { "epoch": 0.2541495940115663, "grad_norm": 1.0050612100121343, "learning_rate": 3.967557943132461e-06, "loss": 0.0327, "step": 60910 }, { "epoch": 0.2541704567265566, "grad_norm": 0.7474646710442185, "learning_rate": 3.967395105387292e-06, "loss": 0.0299, "step": 60915 }, { "epoch": 0.2541913194415468, "grad_norm": 1.3601722506113019, "learning_rate": 3.96723228769019e-06, "loss": 0.0229, "step": 60920 }, { "epoch": 0.2542121821565371, "grad_norm": 0.558203391871987, "learning_rate": 3.967069490037041e-06, "loss": 0.0354, "step": 60925 }, { "epoch": 0.2542330448715274, "grad_norm": 0.9457622304774878, "learning_rate": 3.966906712423732e-06, "loss": 0.0233, "step": 60930 }, { "epoch": 0.25425390758651767, "grad_norm": 0.9685218500418671, "learning_rate": 3.9667439548461525e-06, "loss": 0.0331, "step": 60935 }, { "epoch": 0.25427477030150797, "grad_norm": 0.6421972958947684, "learning_rate": 3.966581217300193e-06, "loss": 0.0271, "step": 60940 }, { "epoch": 0.2542956330164982, "grad_norm": 0.8849365390647649, "learning_rate": 3.9664184997817455e-06, "loss": 0.0333, "step": 60945 }, { "epoch": 0.2543164957314885, "grad_norm": 1.1837163623176972, "learning_rate": 3.9662558022866995e-06, "loss": 0.0383, "step": 60950 }, { "epoch": 0.2543373584464788, "grad_norm": 0.7583458811772169, "learning_rate": 3.966093124810953e-06, "loss": 0.0314, "step": 60955 }, { "epoch": 0.25435822116146906, "grad_norm": 0.5778361665212521, "learning_rate": 3.9659304673503965e-06, "loss": 0.0307, "step": 60960 }, { "epoch": 0.25437908387645936, "grad_norm": 1.1785134570494717, "learning_rate": 3.965767829900929e-06, "loss": 0.0323, "step": 60965 }, { "epoch": 0.2543999465914496, "grad_norm": 0.9946233140105603, "learning_rate": 3.965605212458447e-06, "loss": 0.0361, "step": 60970 }, { "epoch": 0.2544208093064399, "grad_norm": 0.7467027030483301, "learning_rate": 3.965442615018848e-06, "loss": 0.0259, "step": 60975 }, { "epoch": 0.2544416720214302, "grad_norm": 0.8347882588733936, "learning_rate": 3.965280037578031e-06, "loss": 0.0358, "step": 60980 }, { "epoch": 0.25446253473642044, "grad_norm": 0.6961899438241556, "learning_rate": 3.965117480131899e-06, "loss": 0.0296, "step": 60985 }, { "epoch": 0.25448339745141074, "grad_norm": 0.8139756924986357, "learning_rate": 3.964954942676354e-06, "loss": 0.0298, "step": 60990 }, { "epoch": 0.25450426016640104, "grad_norm": 1.2622655087968582, "learning_rate": 3.964792425207296e-06, "loss": 0.0422, "step": 60995 }, { "epoch": 0.2545251228813913, "grad_norm": 1.0965993526903657, "learning_rate": 3.964629927720631e-06, "loss": 0.0309, "step": 61000 }, { "epoch": 0.2545459855963816, "grad_norm": 0.9970630665639376, "learning_rate": 3.964467450212266e-06, "loss": 0.0355, "step": 61005 }, { "epoch": 0.25456684831137183, "grad_norm": 0.7477984711247962, "learning_rate": 3.964304992678104e-06, "loss": 0.0476, "step": 61010 }, { "epoch": 0.25458771102636213, "grad_norm": 0.7927241833372822, "learning_rate": 3.9641425551140566e-06, "loss": 0.0224, "step": 61015 }, { "epoch": 0.25460857374135243, "grad_norm": 0.6061160838364595, "learning_rate": 3.963980137516031e-06, "loss": 0.0508, "step": 61020 }, { "epoch": 0.2546294364563427, "grad_norm": 0.8772262408640829, "learning_rate": 3.963817739879938e-06, "loss": 0.0451, "step": 61025 }, { "epoch": 0.25465029917133297, "grad_norm": 0.687949133459708, "learning_rate": 3.9636553622016865e-06, "loss": 0.0379, "step": 61030 }, { "epoch": 0.2546711618863232, "grad_norm": 0.5222213456796315, "learning_rate": 3.963493004477192e-06, "loss": 0.0295, "step": 61035 }, { "epoch": 0.2546920246013135, "grad_norm": 1.8690461168142978, "learning_rate": 3.963330666702366e-06, "loss": 0.0255, "step": 61040 }, { "epoch": 0.2547128873163038, "grad_norm": 1.3631649211974133, "learning_rate": 3.9631683488731245e-06, "loss": 0.0375, "step": 61045 }, { "epoch": 0.25473375003129406, "grad_norm": 1.0494002319435665, "learning_rate": 3.9630060509853845e-06, "loss": 0.0402, "step": 61050 }, { "epoch": 0.25475461274628436, "grad_norm": 0.6843900208352733, "learning_rate": 3.96284377303506e-06, "loss": 0.0306, "step": 61055 }, { "epoch": 0.2547754754612746, "grad_norm": 0.7486035593592616, "learning_rate": 3.9626815150180725e-06, "loss": 0.0239, "step": 61060 }, { "epoch": 0.2547963381762649, "grad_norm": 1.0139235273637432, "learning_rate": 3.96251927693034e-06, "loss": 0.0333, "step": 61065 }, { "epoch": 0.2548172008912552, "grad_norm": 0.9173350370408762, "learning_rate": 3.962357058767783e-06, "loss": 0.027, "step": 61070 }, { "epoch": 0.25483806360624545, "grad_norm": 1.0403553385489877, "learning_rate": 3.962194860526325e-06, "loss": 0.0264, "step": 61075 }, { "epoch": 0.25485892632123575, "grad_norm": 0.531109173464441, "learning_rate": 3.962032682201886e-06, "loss": 0.0257, "step": 61080 }, { "epoch": 0.25487978903622605, "grad_norm": 0.9637448338737934, "learning_rate": 3.961870523790394e-06, "loss": 0.0243, "step": 61085 }, { "epoch": 0.2549006517512163, "grad_norm": 0.850271761397258, "learning_rate": 3.961708385287769e-06, "loss": 0.0435, "step": 61090 }, { "epoch": 0.2549215144662066, "grad_norm": 0.9633228287469315, "learning_rate": 3.961546266689943e-06, "loss": 0.0422, "step": 61095 }, { "epoch": 0.25494237718119683, "grad_norm": 0.91769110480249, "learning_rate": 3.961384167992841e-06, "loss": 0.0349, "step": 61100 }, { "epoch": 0.25496323989618713, "grad_norm": 0.8704929434518958, "learning_rate": 3.961222089192391e-06, "loss": 0.0326, "step": 61105 }, { "epoch": 0.25498410261117743, "grad_norm": 0.489845785273589, "learning_rate": 3.961060030284526e-06, "loss": 0.0256, "step": 61110 }, { "epoch": 0.2550049653261677, "grad_norm": 0.6845485065296835, "learning_rate": 3.960897991265174e-06, "loss": 0.0348, "step": 61115 }, { "epoch": 0.255025828041158, "grad_norm": 0.9016463396971237, "learning_rate": 3.960735972130271e-06, "loss": 0.0308, "step": 61120 }, { "epoch": 0.2550466907561482, "grad_norm": 1.0562597109725738, "learning_rate": 3.960573972875746e-06, "loss": 0.0352, "step": 61125 }, { "epoch": 0.2550675534711385, "grad_norm": 1.0631318402479837, "learning_rate": 3.960411993497537e-06, "loss": 0.0447, "step": 61130 }, { "epoch": 0.2550884161861288, "grad_norm": 0.9085513734129379, "learning_rate": 3.960250033991579e-06, "loss": 0.0261, "step": 61135 }, { "epoch": 0.25510927890111906, "grad_norm": 0.6966509954940501, "learning_rate": 3.960088094353808e-06, "loss": 0.0336, "step": 61140 }, { "epoch": 0.25513014161610936, "grad_norm": 0.6428031307876145, "learning_rate": 3.959926174580163e-06, "loss": 0.0253, "step": 61145 }, { "epoch": 0.2551510043310996, "grad_norm": 0.907549278045595, "learning_rate": 3.959764274666583e-06, "loss": 0.0305, "step": 61150 }, { "epoch": 0.2551718670460899, "grad_norm": 0.773851587185663, "learning_rate": 3.959602394609009e-06, "loss": 0.028, "step": 61155 }, { "epoch": 0.2551927297610802, "grad_norm": 0.7724850000384159, "learning_rate": 3.959440534403383e-06, "loss": 0.0261, "step": 61160 }, { "epoch": 0.25521359247607045, "grad_norm": 0.8814951365037209, "learning_rate": 3.959278694045646e-06, "loss": 0.0323, "step": 61165 }, { "epoch": 0.25523445519106075, "grad_norm": 0.8869726378318044, "learning_rate": 3.959116873531745e-06, "loss": 0.0273, "step": 61170 }, { "epoch": 0.25525531790605105, "grad_norm": 0.7044441007173545, "learning_rate": 3.958955072857621e-06, "loss": 0.0298, "step": 61175 }, { "epoch": 0.2552761806210413, "grad_norm": 0.9610883218253711, "learning_rate": 3.958793292019223e-06, "loss": 0.0355, "step": 61180 }, { "epoch": 0.2552970433360316, "grad_norm": 0.6302210795261259, "learning_rate": 3.958631531012499e-06, "loss": 0.0252, "step": 61185 }, { "epoch": 0.25531790605102184, "grad_norm": 0.7370631753403476, "learning_rate": 3.958469789833396e-06, "loss": 0.0295, "step": 61190 }, { "epoch": 0.25533876876601214, "grad_norm": 0.7654479186757355, "learning_rate": 3.958308068477864e-06, "loss": 0.0318, "step": 61195 }, { "epoch": 0.25535963148100244, "grad_norm": 1.0720069695513172, "learning_rate": 3.958146366941855e-06, "loss": 0.0325, "step": 61200 }, { "epoch": 0.2553804941959927, "grad_norm": 0.8526977393785613, "learning_rate": 3.95798468522132e-06, "loss": 0.0366, "step": 61205 }, { "epoch": 0.255401356910983, "grad_norm": 0.8393833967247847, "learning_rate": 3.957823023312212e-06, "loss": 0.0323, "step": 61210 }, { "epoch": 0.2554222196259732, "grad_norm": 0.4144876030602404, "learning_rate": 3.957661381210487e-06, "loss": 0.0247, "step": 61215 }, { "epoch": 0.2554430823409635, "grad_norm": 0.9839505986390706, "learning_rate": 3.957499758912099e-06, "loss": 0.0266, "step": 61220 }, { "epoch": 0.2554639450559538, "grad_norm": 1.4107659814146052, "learning_rate": 3.957338156413004e-06, "loss": 0.0358, "step": 61225 }, { "epoch": 0.25548480777094407, "grad_norm": 0.5247498950393072, "learning_rate": 3.957176573709163e-06, "loss": 0.0303, "step": 61230 }, { "epoch": 0.25550567048593437, "grad_norm": 1.061196470583051, "learning_rate": 3.9570150107965316e-06, "loss": 0.0293, "step": 61235 }, { "epoch": 0.2555265332009246, "grad_norm": 0.6085109176446858, "learning_rate": 3.956853467671072e-06, "loss": 0.0287, "step": 61240 }, { "epoch": 0.2555473959159149, "grad_norm": 0.7293218211216611, "learning_rate": 3.956691944328745e-06, "loss": 0.0292, "step": 61245 }, { "epoch": 0.2555682586309052, "grad_norm": 1.1910943642022118, "learning_rate": 3.956530440765512e-06, "loss": 0.041, "step": 61250 }, { "epoch": 0.25558912134589545, "grad_norm": 0.8272986436718405, "learning_rate": 3.956368956977339e-06, "loss": 0.0466, "step": 61255 }, { "epoch": 0.25560998406088575, "grad_norm": 0.6520283595070117, "learning_rate": 3.95620749296019e-06, "loss": 0.0325, "step": 61260 }, { "epoch": 0.25563084677587605, "grad_norm": 1.1361510444097398, "learning_rate": 3.95604604871003e-06, "loss": 0.0301, "step": 61265 }, { "epoch": 0.2556517094908663, "grad_norm": 0.9988441468476703, "learning_rate": 3.955884624222825e-06, "loss": 0.0278, "step": 61270 }, { "epoch": 0.2556725722058566, "grad_norm": 0.6647532997696891, "learning_rate": 3.955723219494546e-06, "loss": 0.0319, "step": 61275 }, { "epoch": 0.25569343492084684, "grad_norm": 0.8101649078204097, "learning_rate": 3.955561834521161e-06, "loss": 0.0248, "step": 61280 }, { "epoch": 0.25571429763583714, "grad_norm": 0.8522858093490759, "learning_rate": 3.95540046929864e-06, "loss": 0.0336, "step": 61285 }, { "epoch": 0.25573516035082744, "grad_norm": 0.9230643841144237, "learning_rate": 3.955239123822956e-06, "loss": 0.0279, "step": 61290 }, { "epoch": 0.2557560230658177, "grad_norm": 1.7306524724055647, "learning_rate": 3.955077798090082e-06, "loss": 0.0392, "step": 61295 }, { "epoch": 0.255776885780808, "grad_norm": 0.7604267206177246, "learning_rate": 3.95491649209599e-06, "loss": 0.0328, "step": 61300 }, { "epoch": 0.2557977484957982, "grad_norm": 3.137391000403998, "learning_rate": 3.954755205836656e-06, "loss": 0.0343, "step": 61305 }, { "epoch": 0.2558186112107885, "grad_norm": 0.6138005901013802, "learning_rate": 3.954593939308057e-06, "loss": 0.0233, "step": 61310 }, { "epoch": 0.2558394739257788, "grad_norm": 0.7747669098737257, "learning_rate": 3.95443269250617e-06, "loss": 0.0358, "step": 61315 }, { "epoch": 0.25586033664076907, "grad_norm": 0.8751081314514977, "learning_rate": 3.954271465426975e-06, "loss": 0.0353, "step": 61320 }, { "epoch": 0.25588119935575937, "grad_norm": 0.5797749602438191, "learning_rate": 3.9541102580664494e-06, "loss": 0.0222, "step": 61325 }, { "epoch": 0.2559020620707496, "grad_norm": 0.973792908067392, "learning_rate": 3.953949070420575e-06, "loss": 0.0295, "step": 61330 }, { "epoch": 0.2559229247857399, "grad_norm": 1.1213786850275793, "learning_rate": 3.9537879024853335e-06, "loss": 0.0329, "step": 61335 }, { "epoch": 0.2559437875007302, "grad_norm": 0.8781342783463562, "learning_rate": 3.953626754256709e-06, "loss": 0.0392, "step": 61340 }, { "epoch": 0.25596465021572046, "grad_norm": 1.2562483780227551, "learning_rate": 3.953465625730686e-06, "loss": 0.0332, "step": 61345 }, { "epoch": 0.25598551293071076, "grad_norm": 0.7747615051823534, "learning_rate": 3.953304516903248e-06, "loss": 0.0286, "step": 61350 }, { "epoch": 0.25600637564570106, "grad_norm": 0.9775124639699659, "learning_rate": 3.953143427770384e-06, "loss": 0.034, "step": 61355 }, { "epoch": 0.2560272383606913, "grad_norm": 0.32722720727275534, "learning_rate": 3.952982358328079e-06, "loss": 0.0319, "step": 61360 }, { "epoch": 0.2560481010756816, "grad_norm": 1.4285998384091916, "learning_rate": 3.952821308572324e-06, "loss": 0.0376, "step": 61365 }, { "epoch": 0.25606896379067184, "grad_norm": 0.5139077025367589, "learning_rate": 3.9526602784991095e-06, "loss": 0.0269, "step": 61370 }, { "epoch": 0.25608982650566214, "grad_norm": 0.901130108954747, "learning_rate": 3.952499268104426e-06, "loss": 0.0281, "step": 61375 }, { "epoch": 0.25611068922065244, "grad_norm": 0.5141208004556349, "learning_rate": 3.952338277384265e-06, "loss": 0.0245, "step": 61380 }, { "epoch": 0.2561315519356427, "grad_norm": 0.6674329298000671, "learning_rate": 3.95217730633462e-06, "loss": 0.0361, "step": 61385 }, { "epoch": 0.256152414650633, "grad_norm": 1.1891767546039789, "learning_rate": 3.952016354951488e-06, "loss": 0.0282, "step": 61390 }, { "epoch": 0.25617327736562323, "grad_norm": 0.806802287740713, "learning_rate": 3.9518554232308605e-06, "loss": 0.0307, "step": 61395 }, { "epoch": 0.25619414008061353, "grad_norm": 0.8909760782005408, "learning_rate": 3.951694511168738e-06, "loss": 0.0276, "step": 61400 }, { "epoch": 0.25621500279560383, "grad_norm": 0.8225280284922188, "learning_rate": 3.951533618761118e-06, "loss": 0.0367, "step": 61405 }, { "epoch": 0.2562358655105941, "grad_norm": 1.2485911977660091, "learning_rate": 3.951372746003999e-06, "loss": 0.0329, "step": 61410 }, { "epoch": 0.2562567282255844, "grad_norm": 0.626653306844502, "learning_rate": 3.951211892893382e-06, "loss": 0.0277, "step": 61415 }, { "epoch": 0.2562775909405746, "grad_norm": 1.2639887641843983, "learning_rate": 3.951051059425266e-06, "loss": 0.0323, "step": 61420 }, { "epoch": 0.2562984536555649, "grad_norm": 0.5139522870105414, "learning_rate": 3.950890245595656e-06, "loss": 0.0294, "step": 61425 }, { "epoch": 0.2563193163705552, "grad_norm": 0.58790760995579, "learning_rate": 3.9507294514005555e-06, "loss": 0.0302, "step": 61430 }, { "epoch": 0.25634017908554546, "grad_norm": 0.77888857935771, "learning_rate": 3.95056867683597e-06, "loss": 0.0318, "step": 61435 }, { "epoch": 0.25636104180053576, "grad_norm": 0.591142868687713, "learning_rate": 3.950407921897903e-06, "loss": 0.0294, "step": 61440 }, { "epoch": 0.25638190451552606, "grad_norm": 0.9956125955901144, "learning_rate": 3.9502471865823645e-06, "loss": 0.0392, "step": 61445 }, { "epoch": 0.2564027672305163, "grad_norm": 0.8293606355061177, "learning_rate": 3.950086470885361e-06, "loss": 0.029, "step": 61450 }, { "epoch": 0.2564236299455066, "grad_norm": 1.0893172599962135, "learning_rate": 3.949925774802902e-06, "loss": 0.0303, "step": 61455 }, { "epoch": 0.25644449266049685, "grad_norm": 0.8420303046520285, "learning_rate": 3.949765098330999e-06, "loss": 0.028, "step": 61460 }, { "epoch": 0.25646535537548715, "grad_norm": 0.7030377844752882, "learning_rate": 3.949604441465664e-06, "loss": 0.0309, "step": 61465 }, { "epoch": 0.25648621809047745, "grad_norm": 0.8123334326705158, "learning_rate": 3.949443804202908e-06, "loss": 0.0313, "step": 61470 }, { "epoch": 0.2565070808054677, "grad_norm": 5.7763032190747, "learning_rate": 3.949283186538746e-06, "loss": 0.0329, "step": 61475 }, { "epoch": 0.256527943520458, "grad_norm": 0.5936808070305117, "learning_rate": 3.949122588469194e-06, "loss": 0.0342, "step": 61480 }, { "epoch": 0.25654880623544823, "grad_norm": 0.47635787975471244, "learning_rate": 3.9489620099902674e-06, "loss": 0.0287, "step": 61485 }, { "epoch": 0.25656966895043853, "grad_norm": 0.9441435269168148, "learning_rate": 3.948801451097982e-06, "loss": 0.033, "step": 61490 }, { "epoch": 0.25659053166542883, "grad_norm": 0.5491185828573801, "learning_rate": 3.9486409117883594e-06, "loss": 0.0312, "step": 61495 }, { "epoch": 0.2566113943804191, "grad_norm": 1.7070319560596259, "learning_rate": 3.948480392057417e-06, "loss": 0.0319, "step": 61500 }, { "epoch": 0.2566322570954094, "grad_norm": 0.6234969244786899, "learning_rate": 3.948319891901176e-06, "loss": 0.0312, "step": 61505 }, { "epoch": 0.2566531198103996, "grad_norm": 1.059406952196989, "learning_rate": 3.9481594113156595e-06, "loss": 0.0345, "step": 61510 }, { "epoch": 0.2566739825253899, "grad_norm": 1.0598348328943392, "learning_rate": 3.947998950296889e-06, "loss": 0.03, "step": 61515 }, { "epoch": 0.2566948452403802, "grad_norm": 0.8114987504094602, "learning_rate": 3.94783850884089e-06, "loss": 0.0267, "step": 61520 }, { "epoch": 0.25671570795537046, "grad_norm": 0.6682711696355403, "learning_rate": 3.947678086943687e-06, "loss": 0.031, "step": 61525 }, { "epoch": 0.25673657067036076, "grad_norm": 1.4130057530160132, "learning_rate": 3.947517684601305e-06, "loss": 0.0328, "step": 61530 }, { "epoch": 0.25675743338535106, "grad_norm": 0.7591269489246258, "learning_rate": 3.947357301809774e-06, "loss": 0.0335, "step": 61535 }, { "epoch": 0.2567782961003413, "grad_norm": 0.7694126240391874, "learning_rate": 3.947196938565122e-06, "loss": 0.0336, "step": 61540 }, { "epoch": 0.2567991588153316, "grad_norm": 0.9226893377315936, "learning_rate": 3.947036594863378e-06, "loss": 0.0325, "step": 61545 }, { "epoch": 0.25682002153032185, "grad_norm": 0.9407361748982263, "learning_rate": 3.946876270700575e-06, "loss": 0.0405, "step": 61550 }, { "epoch": 0.25684088424531215, "grad_norm": 0.5919804149432393, "learning_rate": 3.946715966072742e-06, "loss": 0.0242, "step": 61555 }, { "epoch": 0.25686174696030245, "grad_norm": 0.5683434570486333, "learning_rate": 3.946555680975914e-06, "loss": 0.0306, "step": 61560 }, { "epoch": 0.2568826096752927, "grad_norm": 0.5219741383461027, "learning_rate": 3.946395415406125e-06, "loss": 0.0265, "step": 61565 }, { "epoch": 0.256903472390283, "grad_norm": 1.6388880806571084, "learning_rate": 3.946235169359412e-06, "loss": 0.0296, "step": 61570 }, { "epoch": 0.25692433510527324, "grad_norm": 1.5302336075481813, "learning_rate": 3.946074942831808e-06, "loss": 0.0388, "step": 61575 }, { "epoch": 0.25694519782026354, "grad_norm": 1.2816683279911742, "learning_rate": 3.945914735819355e-06, "loss": 0.0319, "step": 61580 }, { "epoch": 0.25696606053525384, "grad_norm": 0.8585728196681167, "learning_rate": 3.945754548318088e-06, "loss": 0.0402, "step": 61585 }, { "epoch": 0.2569869232502441, "grad_norm": 0.7968577117607124, "learning_rate": 3.945594380324049e-06, "loss": 0.0319, "step": 61590 }, { "epoch": 0.2570077859652344, "grad_norm": 0.7307953380730486, "learning_rate": 3.945434231833279e-06, "loss": 0.0249, "step": 61595 }, { "epoch": 0.2570286486802246, "grad_norm": 1.1277092462141522, "learning_rate": 3.94527410284182e-06, "loss": 0.0239, "step": 61600 }, { "epoch": 0.2570495113952149, "grad_norm": 1.8026434436942305, "learning_rate": 3.945113993345716e-06, "loss": 0.0287, "step": 61605 }, { "epoch": 0.2570703741102052, "grad_norm": 0.9499268981093961, "learning_rate": 3.944953903341009e-06, "loss": 0.0357, "step": 61610 }, { "epoch": 0.25709123682519547, "grad_norm": 1.2614849878944234, "learning_rate": 3.9447938328237476e-06, "loss": 0.0366, "step": 61615 }, { "epoch": 0.25711209954018577, "grad_norm": 1.18341437970893, "learning_rate": 3.944633781789977e-06, "loss": 0.0272, "step": 61620 }, { "epoch": 0.257132962255176, "grad_norm": 0.8481438683167811, "learning_rate": 3.944473750235745e-06, "loss": 0.0267, "step": 61625 }, { "epoch": 0.2571538249701663, "grad_norm": 0.5569321828125527, "learning_rate": 3.944313738157101e-06, "loss": 0.0324, "step": 61630 }, { "epoch": 0.2571746876851566, "grad_norm": 0.9119937779903565, "learning_rate": 3.944153745550096e-06, "loss": 0.0264, "step": 61635 }, { "epoch": 0.25719555040014686, "grad_norm": 2.9059258552098863, "learning_rate": 3.9439937724107785e-06, "loss": 0.0556, "step": 61640 }, { "epoch": 0.25721641311513715, "grad_norm": 0.7440985584926758, "learning_rate": 3.943833818735203e-06, "loss": 0.0327, "step": 61645 }, { "epoch": 0.25723727583012745, "grad_norm": 0.5972166203557472, "learning_rate": 3.943673884519422e-06, "loss": 0.0273, "step": 61650 }, { "epoch": 0.2572581385451177, "grad_norm": 2.061082288744573, "learning_rate": 3.943513969759492e-06, "loss": 0.029, "step": 61655 }, { "epoch": 0.257279001260108, "grad_norm": 0.9331574323606806, "learning_rate": 3.9433540744514655e-06, "loss": 0.0275, "step": 61660 }, { "epoch": 0.25729986397509824, "grad_norm": 0.7843265495910873, "learning_rate": 3.943194198591401e-06, "loss": 0.0367, "step": 61665 }, { "epoch": 0.25732072669008854, "grad_norm": 2.011033588050964, "learning_rate": 3.943034342175356e-06, "loss": 0.0411, "step": 61670 }, { "epoch": 0.25734158940507884, "grad_norm": 0.6324488686113515, "learning_rate": 3.942874505199391e-06, "loss": 0.0281, "step": 61675 }, { "epoch": 0.2573624521200691, "grad_norm": 0.5781834690507194, "learning_rate": 3.942714687659564e-06, "loss": 0.0412, "step": 61680 }, { "epoch": 0.2573833148350594, "grad_norm": 0.6201861610176532, "learning_rate": 3.942554889551937e-06, "loss": 0.0338, "step": 61685 }, { "epoch": 0.25740417755004963, "grad_norm": 1.5292806341638037, "learning_rate": 3.9423951108725735e-06, "loss": 0.0245, "step": 61690 }, { "epoch": 0.25742504026503993, "grad_norm": 0.776471050395961, "learning_rate": 3.9422353516175355e-06, "loss": 0.0312, "step": 61695 }, { "epoch": 0.25744590298003023, "grad_norm": 1.0948430751289402, "learning_rate": 3.942075611782889e-06, "loss": 0.034, "step": 61700 }, { "epoch": 0.25746676569502047, "grad_norm": 0.6069518427928525, "learning_rate": 3.941915891364699e-06, "loss": 0.0225, "step": 61705 }, { "epoch": 0.25748762841001077, "grad_norm": 0.9160234448171083, "learning_rate": 3.941756190359032e-06, "loss": 0.0288, "step": 61710 }, { "epoch": 0.257508491125001, "grad_norm": 1.2908199847350368, "learning_rate": 3.941596508761955e-06, "loss": 0.0351, "step": 61715 }, { "epoch": 0.2575293538399913, "grad_norm": 0.7205484365334184, "learning_rate": 3.94143684656954e-06, "loss": 0.0238, "step": 61720 }, { "epoch": 0.2575502165549816, "grad_norm": 0.7314461150490396, "learning_rate": 3.941277203777855e-06, "loss": 0.0236, "step": 61725 }, { "epoch": 0.25757107926997186, "grad_norm": 0.7273515281453006, "learning_rate": 3.9411175803829725e-06, "loss": 0.0294, "step": 61730 }, { "epoch": 0.25759194198496216, "grad_norm": 0.7810826611690943, "learning_rate": 3.940957976380964e-06, "loss": 0.041, "step": 61735 }, { "epoch": 0.25761280469995246, "grad_norm": 0.6103108221765169, "learning_rate": 3.9407983917679025e-06, "loss": 0.0363, "step": 61740 }, { "epoch": 0.2576336674149427, "grad_norm": 0.8142698184558411, "learning_rate": 3.940638826539864e-06, "loss": 0.032, "step": 61745 }, { "epoch": 0.257654530129933, "grad_norm": 1.147084464134297, "learning_rate": 3.940479280692925e-06, "loss": 0.0301, "step": 61750 }, { "epoch": 0.25767539284492325, "grad_norm": 2.708783058517753, "learning_rate": 3.94031975422316e-06, "loss": 0.0519, "step": 61755 }, { "epoch": 0.25769625555991355, "grad_norm": 1.0737412332704923, "learning_rate": 3.940160247126649e-06, "loss": 0.0281, "step": 61760 }, { "epoch": 0.25771711827490384, "grad_norm": 0.8601197840342365, "learning_rate": 3.940000759399469e-06, "loss": 0.0326, "step": 61765 }, { "epoch": 0.2577379809898941, "grad_norm": 0.8458008554013775, "learning_rate": 3.939841291037702e-06, "loss": 0.0318, "step": 61770 }, { "epoch": 0.2577588437048844, "grad_norm": 0.8811278004230545, "learning_rate": 3.939681842037429e-06, "loss": 0.027, "step": 61775 }, { "epoch": 0.25777970641987463, "grad_norm": 0.6660162456700571, "learning_rate": 3.9395224123947315e-06, "loss": 0.0313, "step": 61780 }, { "epoch": 0.25780056913486493, "grad_norm": 0.47731250895247745, "learning_rate": 3.939363002105695e-06, "loss": 0.0287, "step": 61785 }, { "epoch": 0.25782143184985523, "grad_norm": 0.8030195752149822, "learning_rate": 3.939203611166401e-06, "loss": 0.0195, "step": 61790 }, { "epoch": 0.2578422945648455, "grad_norm": 0.5881569550494034, "learning_rate": 3.939044239572938e-06, "loss": 0.0597, "step": 61795 }, { "epoch": 0.2578631572798358, "grad_norm": 0.7544595341859399, "learning_rate": 3.938884887321393e-06, "loss": 0.0295, "step": 61800 }, { "epoch": 0.257884019994826, "grad_norm": 0.6357540350798573, "learning_rate": 3.938725554407852e-06, "loss": 0.0312, "step": 61805 }, { "epoch": 0.2579048827098163, "grad_norm": 1.0585739783407904, "learning_rate": 3.938566240828406e-06, "loss": 0.0352, "step": 61810 }, { "epoch": 0.2579257454248066, "grad_norm": 0.6988322175578147, "learning_rate": 3.938406946579143e-06, "loss": 0.0245, "step": 61815 }, { "epoch": 0.25794660813979686, "grad_norm": 0.6038079666168135, "learning_rate": 3.938247671656155e-06, "loss": 0.0321, "step": 61820 }, { "epoch": 0.25796747085478716, "grad_norm": 0.7472801238132112, "learning_rate": 3.938088416055537e-06, "loss": 0.0366, "step": 61825 }, { "epoch": 0.25798833356977746, "grad_norm": 1.534011534214789, "learning_rate": 3.93792917977338e-06, "loss": 0.034, "step": 61830 }, { "epoch": 0.2580091962847677, "grad_norm": 0.6619421115677306, "learning_rate": 3.937769962805778e-06, "loss": 0.0251, "step": 61835 }, { "epoch": 0.258030058999758, "grad_norm": 0.8631513858732514, "learning_rate": 3.937610765148828e-06, "loss": 0.0313, "step": 61840 }, { "epoch": 0.25805092171474825, "grad_norm": 0.8149914604521064, "learning_rate": 3.937451586798627e-06, "loss": 0.0254, "step": 61845 }, { "epoch": 0.25807178442973855, "grad_norm": 0.8005160335314678, "learning_rate": 3.9372924277512735e-06, "loss": 0.0315, "step": 61850 }, { "epoch": 0.25809264714472885, "grad_norm": 0.7473663512160249, "learning_rate": 3.937133288002864e-06, "loss": 0.028, "step": 61855 }, { "epoch": 0.2581135098597191, "grad_norm": 0.9288525845097002, "learning_rate": 3.936974167549501e-06, "loss": 0.0282, "step": 61860 }, { "epoch": 0.2581343725747094, "grad_norm": 0.6905585957681896, "learning_rate": 3.936815066387285e-06, "loss": 0.0434, "step": 61865 }, { "epoch": 0.25815523528969964, "grad_norm": 0.9351784742139282, "learning_rate": 3.936655984512317e-06, "loss": 0.0371, "step": 61870 }, { "epoch": 0.25817609800468994, "grad_norm": 0.38539409982855927, "learning_rate": 3.936496921920703e-06, "loss": 0.0355, "step": 61875 }, { "epoch": 0.25819696071968024, "grad_norm": 2.316410792185878, "learning_rate": 3.936337878608547e-06, "loss": 0.0382, "step": 61880 }, { "epoch": 0.2582178234346705, "grad_norm": 0.9143622947308284, "learning_rate": 3.9361788545719525e-06, "loss": 0.0347, "step": 61885 }, { "epoch": 0.2582386861496608, "grad_norm": 1.3336455583319067, "learning_rate": 3.936019849807029e-06, "loss": 0.0236, "step": 61890 }, { "epoch": 0.258259548864651, "grad_norm": 0.6564488056258387, "learning_rate": 3.935860864309881e-06, "loss": 0.0329, "step": 61895 }, { "epoch": 0.2582804115796413, "grad_norm": 0.586145306370903, "learning_rate": 3.935701898076621e-06, "loss": 0.0323, "step": 61900 }, { "epoch": 0.2583012742946316, "grad_norm": 0.9414670768821202, "learning_rate": 3.935542951103357e-06, "loss": 0.0256, "step": 61905 }, { "epoch": 0.25832213700962187, "grad_norm": 0.6669428312202982, "learning_rate": 3.935384023386201e-06, "loss": 0.0509, "step": 61910 }, { "epoch": 0.25834299972461217, "grad_norm": 0.5549521920375186, "learning_rate": 3.9352251149212655e-06, "loss": 0.0246, "step": 61915 }, { "epoch": 0.25836386243960247, "grad_norm": 1.1660948035181478, "learning_rate": 3.935066225704662e-06, "loss": 0.0328, "step": 61920 }, { "epoch": 0.2583847251545927, "grad_norm": 0.5876628887382408, "learning_rate": 3.934907355732507e-06, "loss": 0.0277, "step": 61925 }, { "epoch": 0.258405587869583, "grad_norm": 0.7613831795849211, "learning_rate": 3.9347485050009155e-06, "loss": 0.0217, "step": 61930 }, { "epoch": 0.25842645058457325, "grad_norm": 1.0340159968419684, "learning_rate": 3.934589673506004e-06, "loss": 0.0301, "step": 61935 }, { "epoch": 0.25844731329956355, "grad_norm": 0.8657310892345182, "learning_rate": 3.93443086124389e-06, "loss": 0.0376, "step": 61940 }, { "epoch": 0.25846817601455385, "grad_norm": 0.978470802300676, "learning_rate": 3.9342720682106915e-06, "loss": 0.0367, "step": 61945 }, { "epoch": 0.2584890387295441, "grad_norm": 0.8156588504182227, "learning_rate": 3.93411329440253e-06, "loss": 0.025, "step": 61950 }, { "epoch": 0.2585099014445344, "grad_norm": 1.2991347680052845, "learning_rate": 3.933954539815526e-06, "loss": 0.0322, "step": 61955 }, { "epoch": 0.25853076415952464, "grad_norm": 0.5876336188930784, "learning_rate": 3.933795804445802e-06, "loss": 0.0233, "step": 61960 }, { "epoch": 0.25855162687451494, "grad_norm": 0.7222322530329134, "learning_rate": 3.93363708828948e-06, "loss": 0.0246, "step": 61965 }, { "epoch": 0.25857248958950524, "grad_norm": 0.6667753741762458, "learning_rate": 3.933478391342686e-06, "loss": 0.0331, "step": 61970 }, { "epoch": 0.2585933523044955, "grad_norm": 0.5453453881351715, "learning_rate": 3.933319713601543e-06, "loss": 0.0315, "step": 61975 }, { "epoch": 0.2586142150194858, "grad_norm": 0.6510199794219567, "learning_rate": 3.9331610550621804e-06, "loss": 0.0233, "step": 61980 }, { "epoch": 0.258635077734476, "grad_norm": 1.2348514218921958, "learning_rate": 3.9330024157207245e-06, "loss": 0.0341, "step": 61985 }, { "epoch": 0.2586559404494663, "grad_norm": 0.6852022884958663, "learning_rate": 3.932843795573303e-06, "loss": 0.0492, "step": 61990 }, { "epoch": 0.2586768031644566, "grad_norm": 0.6036503669275837, "learning_rate": 3.932685194616047e-06, "loss": 0.0329, "step": 61995 }, { "epoch": 0.25869766587944687, "grad_norm": 0.6050290161548614, "learning_rate": 3.932526612845087e-06, "loss": 0.0368, "step": 62000 }, { "epoch": 0.25871852859443717, "grad_norm": 0.8589523881532902, "learning_rate": 3.932368050256555e-06, "loss": 0.0222, "step": 62005 }, { "epoch": 0.25873939130942747, "grad_norm": 0.5576418543986741, "learning_rate": 3.932209506846584e-06, "loss": 0.0278, "step": 62010 }, { "epoch": 0.2587602540244177, "grad_norm": 0.8775517097854293, "learning_rate": 3.932050982611308e-06, "loss": 0.0437, "step": 62015 }, { "epoch": 0.258781116739408, "grad_norm": 0.8079332713036348, "learning_rate": 3.931892477546862e-06, "loss": 0.0327, "step": 62020 }, { "epoch": 0.25880197945439826, "grad_norm": 1.2334555751283185, "learning_rate": 3.931733991649383e-06, "loss": 0.0351, "step": 62025 }, { "epoch": 0.25882284216938856, "grad_norm": 1.0002104870589168, "learning_rate": 3.9315755249150085e-06, "loss": 0.0348, "step": 62030 }, { "epoch": 0.25884370488437886, "grad_norm": 0.6339282015712502, "learning_rate": 3.931417077339877e-06, "loss": 0.0328, "step": 62035 }, { "epoch": 0.2588645675993691, "grad_norm": 0.6708289031312242, "learning_rate": 3.931258648920126e-06, "loss": 0.0287, "step": 62040 }, { "epoch": 0.2588854303143594, "grad_norm": 0.9685181372195857, "learning_rate": 3.9311002396518995e-06, "loss": 0.0299, "step": 62045 }, { "epoch": 0.25890629302934964, "grad_norm": 3.508564110917575, "learning_rate": 3.930941849531337e-06, "loss": 0.0331, "step": 62050 }, { "epoch": 0.25892715574433994, "grad_norm": 0.8696539435415654, "learning_rate": 3.930783478554583e-06, "loss": 0.033, "step": 62055 }, { "epoch": 0.25894801845933024, "grad_norm": 1.0277773940404387, "learning_rate": 3.93062512671778e-06, "loss": 0.0273, "step": 62060 }, { "epoch": 0.2589688811743205, "grad_norm": 1.001836157018918, "learning_rate": 3.930466794017072e-06, "loss": 0.0346, "step": 62065 }, { "epoch": 0.2589897438893108, "grad_norm": 0.5932848086560181, "learning_rate": 3.930308480448609e-06, "loss": 0.0256, "step": 62070 }, { "epoch": 0.25901060660430103, "grad_norm": 1.2714959907174261, "learning_rate": 3.930150186008535e-06, "loss": 0.0289, "step": 62075 }, { "epoch": 0.25903146931929133, "grad_norm": 0.7050260501604694, "learning_rate": 3.9299919106929985e-06, "loss": 0.0295, "step": 62080 }, { "epoch": 0.25905233203428163, "grad_norm": 0.7368583100431714, "learning_rate": 3.929833654498151e-06, "loss": 0.0344, "step": 62085 }, { "epoch": 0.2590731947492719, "grad_norm": 0.5096228778723754, "learning_rate": 3.92967541742014e-06, "loss": 0.0279, "step": 62090 }, { "epoch": 0.2590940574642622, "grad_norm": 0.7731423095723705, "learning_rate": 3.92951719945512e-06, "loss": 0.0326, "step": 62095 }, { "epoch": 0.2591149201792525, "grad_norm": 1.8360119003918598, "learning_rate": 3.9293590005992415e-06, "loss": 0.0507, "step": 62100 }, { "epoch": 0.2591357828942427, "grad_norm": 0.9817592519333096, "learning_rate": 3.92920082084866e-06, "loss": 0.0437, "step": 62105 }, { "epoch": 0.259156645609233, "grad_norm": 1.3372705518114547, "learning_rate": 3.929042660199527e-06, "loss": 0.039, "step": 62110 }, { "epoch": 0.25917750832422326, "grad_norm": 0.7304014490090439, "learning_rate": 3.928884518648002e-06, "loss": 0.0191, "step": 62115 }, { "epoch": 0.25919837103921356, "grad_norm": 0.8092617029609155, "learning_rate": 3.928726396190242e-06, "loss": 0.0302, "step": 62120 }, { "epoch": 0.25921923375420386, "grad_norm": 0.7357697852573675, "learning_rate": 3.928568292822403e-06, "loss": 0.0282, "step": 62125 }, { "epoch": 0.2592400964691941, "grad_norm": 0.6237795567602629, "learning_rate": 3.9284102085406435e-06, "loss": 0.0297, "step": 62130 }, { "epoch": 0.2592609591841844, "grad_norm": 0.6191091246658946, "learning_rate": 3.9282521433411255e-06, "loss": 0.024, "step": 62135 }, { "epoch": 0.25928182189917465, "grad_norm": 0.6401595640704605, "learning_rate": 3.928094097220011e-06, "loss": 0.0278, "step": 62140 }, { "epoch": 0.25930268461416495, "grad_norm": 0.7199037151363534, "learning_rate": 3.9279360701734595e-06, "loss": 0.0317, "step": 62145 }, { "epoch": 0.25932354732915525, "grad_norm": 0.8204424998145046, "learning_rate": 3.927778062197638e-06, "loss": 0.0458, "step": 62150 }, { "epoch": 0.2593444100441455, "grad_norm": 0.9336146121678298, "learning_rate": 3.927620073288708e-06, "loss": 0.0389, "step": 62155 }, { "epoch": 0.2593652727591358, "grad_norm": 1.869249369664988, "learning_rate": 3.927462103442838e-06, "loss": 0.0385, "step": 62160 }, { "epoch": 0.25938613547412603, "grad_norm": 1.0205891165125769, "learning_rate": 3.927304152656191e-06, "loss": 0.0351, "step": 62165 }, { "epoch": 0.25940699818911633, "grad_norm": 0.622986108482983, "learning_rate": 3.927146220924938e-06, "loss": 0.0363, "step": 62170 }, { "epoch": 0.25942786090410663, "grad_norm": 0.7452912220963656, "learning_rate": 3.926988308245247e-06, "loss": 0.0387, "step": 62175 }, { "epoch": 0.2594487236190969, "grad_norm": 1.136771926404533, "learning_rate": 3.926830414613287e-06, "loss": 0.0294, "step": 62180 }, { "epoch": 0.2594695863340872, "grad_norm": 0.6050170826870684, "learning_rate": 3.9266725400252305e-06, "loss": 0.0347, "step": 62185 }, { "epoch": 0.2594904490490775, "grad_norm": 0.9386062252881212, "learning_rate": 3.926514684477248e-06, "loss": 0.0294, "step": 62190 }, { "epoch": 0.2595113117640677, "grad_norm": 0.6256711932078817, "learning_rate": 3.9263568479655134e-06, "loss": 0.0349, "step": 62195 }, { "epoch": 0.259532174479058, "grad_norm": 0.4749230146201359, "learning_rate": 3.926199030486202e-06, "loss": 0.0286, "step": 62200 }, { "epoch": 0.25955303719404826, "grad_norm": 1.0231785525294068, "learning_rate": 3.926041232035487e-06, "loss": 0.0381, "step": 62205 }, { "epoch": 0.25957389990903856, "grad_norm": 0.9007650161721213, "learning_rate": 3.925883452609547e-06, "loss": 0.027, "step": 62210 }, { "epoch": 0.25959476262402886, "grad_norm": 0.9490128396240981, "learning_rate": 3.925725692204558e-06, "loss": 0.0306, "step": 62215 }, { "epoch": 0.2596156253390191, "grad_norm": 0.8015704265833337, "learning_rate": 3.925567950816698e-06, "loss": 0.0408, "step": 62220 }, { "epoch": 0.2596364880540094, "grad_norm": 1.384467487380572, "learning_rate": 3.9254102284421495e-06, "loss": 0.0284, "step": 62225 }, { "epoch": 0.25965735076899965, "grad_norm": 0.41665615203087053, "learning_rate": 3.925252525077089e-06, "loss": 0.0275, "step": 62230 }, { "epoch": 0.25967821348398995, "grad_norm": 0.9473411508180151, "learning_rate": 3.925094840717703e-06, "loss": 0.025, "step": 62235 }, { "epoch": 0.25969907619898025, "grad_norm": 0.8278650740027134, "learning_rate": 3.9249371753601704e-06, "loss": 0.0382, "step": 62240 }, { "epoch": 0.2597199389139705, "grad_norm": 0.6830438602443535, "learning_rate": 3.924779529000677e-06, "loss": 0.0247, "step": 62245 }, { "epoch": 0.2597408016289608, "grad_norm": 0.6145224407859678, "learning_rate": 3.924621901635408e-06, "loss": 0.0346, "step": 62250 }, { "epoch": 0.25976166434395104, "grad_norm": 0.4555577839331045, "learning_rate": 3.924464293260548e-06, "loss": 0.0278, "step": 62255 }, { "epoch": 0.25978252705894134, "grad_norm": 0.9344810778510678, "learning_rate": 3.924306703872285e-06, "loss": 0.0284, "step": 62260 }, { "epoch": 0.25980338977393164, "grad_norm": 1.0199926086193627, "learning_rate": 3.924149133466807e-06, "loss": 0.0358, "step": 62265 }, { "epoch": 0.2598242524889219, "grad_norm": 0.8097191268690489, "learning_rate": 3.9239915820403046e-06, "loss": 0.0313, "step": 62270 }, { "epoch": 0.2598451152039122, "grad_norm": 0.799717045342608, "learning_rate": 3.923834049588966e-06, "loss": 0.0396, "step": 62275 }, { "epoch": 0.2598659779189025, "grad_norm": 0.9778928590854798, "learning_rate": 3.923676536108986e-06, "loss": 0.0366, "step": 62280 }, { "epoch": 0.2598868406338927, "grad_norm": 0.5896189130602623, "learning_rate": 3.923519041596552e-06, "loss": 0.0335, "step": 62285 }, { "epoch": 0.259907703348883, "grad_norm": 0.7347974888363404, "learning_rate": 3.923361566047862e-06, "loss": 0.0254, "step": 62290 }, { "epoch": 0.25992856606387327, "grad_norm": 1.344607183140075, "learning_rate": 3.923204109459109e-06, "loss": 0.0464, "step": 62295 }, { "epoch": 0.25994942877886357, "grad_norm": 0.5438105803491601, "learning_rate": 3.923046671826488e-06, "loss": 0.0237, "step": 62300 }, { "epoch": 0.25997029149385387, "grad_norm": 1.1830242015586585, "learning_rate": 3.922889253146197e-06, "loss": 0.0423, "step": 62305 }, { "epoch": 0.2599911542088441, "grad_norm": 0.6917034188777775, "learning_rate": 3.922731853414433e-06, "loss": 0.0289, "step": 62310 }, { "epoch": 0.2600120169238344, "grad_norm": 0.7284720832436447, "learning_rate": 3.922574472627395e-06, "loss": 0.0334, "step": 62315 }, { "epoch": 0.26003287963882465, "grad_norm": 0.8062061427331638, "learning_rate": 3.922417110781283e-06, "loss": 0.0333, "step": 62320 }, { "epoch": 0.26005374235381495, "grad_norm": 0.8895396646584901, "learning_rate": 3.9222597678723e-06, "loss": 0.0315, "step": 62325 }, { "epoch": 0.26007460506880525, "grad_norm": 0.7113147575798744, "learning_rate": 3.922102443896645e-06, "loss": 0.0307, "step": 62330 }, { "epoch": 0.2600954677837955, "grad_norm": 0.7624927213122247, "learning_rate": 3.921945138850522e-06, "loss": 0.0436, "step": 62335 }, { "epoch": 0.2601163304987858, "grad_norm": 0.7922804799270532, "learning_rate": 3.921787852730136e-06, "loss": 0.0241, "step": 62340 }, { "epoch": 0.26013719321377604, "grad_norm": 0.9850775830031946, "learning_rate": 3.921630585531693e-06, "loss": 0.0338, "step": 62345 }, { "epoch": 0.26015805592876634, "grad_norm": 0.8627730458961431, "learning_rate": 3.921473337251398e-06, "loss": 0.0253, "step": 62350 }, { "epoch": 0.26017891864375664, "grad_norm": 1.0511130183885316, "learning_rate": 3.921316107885459e-06, "loss": 0.0382, "step": 62355 }, { "epoch": 0.2601997813587469, "grad_norm": 2.996031360270928, "learning_rate": 3.921158897430084e-06, "loss": 0.0342, "step": 62360 }, { "epoch": 0.2602206440737372, "grad_norm": 0.7322280677446477, "learning_rate": 3.9210017058814834e-06, "loss": 0.0245, "step": 62365 }, { "epoch": 0.2602415067887275, "grad_norm": 0.7140929225376058, "learning_rate": 3.920844533235866e-06, "loss": 0.0321, "step": 62370 }, { "epoch": 0.2602623695037177, "grad_norm": 1.86156055787931, "learning_rate": 3.920687379489447e-06, "loss": 0.0459, "step": 62375 }, { "epoch": 0.260283232218708, "grad_norm": 0.8197135736802633, "learning_rate": 3.920530244638435e-06, "loss": 0.0277, "step": 62380 }, { "epoch": 0.26030409493369827, "grad_norm": 0.6905227067298647, "learning_rate": 3.9203731286790465e-06, "loss": 0.0267, "step": 62385 }, { "epoch": 0.26032495764868857, "grad_norm": 0.5613608160436931, "learning_rate": 3.920216031607496e-06, "loss": 0.0325, "step": 62390 }, { "epoch": 0.26034582036367887, "grad_norm": 0.5469797112022488, "learning_rate": 3.920058953419999e-06, "loss": 0.0338, "step": 62395 }, { "epoch": 0.2603666830786691, "grad_norm": 0.786261018519791, "learning_rate": 3.919901894112773e-06, "loss": 0.0255, "step": 62400 }, { "epoch": 0.2603875457936594, "grad_norm": 1.0601171494292183, "learning_rate": 3.919744853682035e-06, "loss": 0.0375, "step": 62405 }, { "epoch": 0.26040840850864966, "grad_norm": 0.6511982566691993, "learning_rate": 3.919587832124005e-06, "loss": 0.0274, "step": 62410 }, { "epoch": 0.26042927122363996, "grad_norm": 2.386201518816941, "learning_rate": 3.919430829434904e-06, "loss": 0.0275, "step": 62415 }, { "epoch": 0.26045013393863026, "grad_norm": 1.285946259052004, "learning_rate": 3.919273845610951e-06, "loss": 0.0326, "step": 62420 }, { "epoch": 0.2604709966536205, "grad_norm": 1.3077519832081423, "learning_rate": 3.9191168806483695e-06, "loss": 0.036, "step": 62425 }, { "epoch": 0.2604918593686108, "grad_norm": 0.6545916804382759, "learning_rate": 3.918959934543384e-06, "loss": 0.0313, "step": 62430 }, { "epoch": 0.26051272208360104, "grad_norm": 0.789299422094952, "learning_rate": 3.918803007292217e-06, "loss": 0.0301, "step": 62435 }, { "epoch": 0.26053358479859134, "grad_norm": 0.6728080127373933, "learning_rate": 3.918646098891095e-06, "loss": 0.0295, "step": 62440 }, { "epoch": 0.26055444751358164, "grad_norm": 0.8447001537393675, "learning_rate": 3.918489209336244e-06, "loss": 0.0303, "step": 62445 }, { "epoch": 0.2605753102285719, "grad_norm": 0.5195933604120435, "learning_rate": 3.918332338623892e-06, "loss": 0.0254, "step": 62450 }, { "epoch": 0.2605961729435622, "grad_norm": 0.3788992356277084, "learning_rate": 3.918175486750269e-06, "loss": 0.0284, "step": 62455 }, { "epoch": 0.2606170356585525, "grad_norm": 0.5922656399705145, "learning_rate": 3.918018653711602e-06, "loss": 0.0305, "step": 62460 }, { "epoch": 0.26063789837354273, "grad_norm": 1.1231729443892609, "learning_rate": 3.917861839504122e-06, "loss": 0.0386, "step": 62465 }, { "epoch": 0.26065876108853303, "grad_norm": 0.8707046031090871, "learning_rate": 3.917705044124063e-06, "loss": 0.0402, "step": 62470 }, { "epoch": 0.2606796238035233, "grad_norm": 0.659597030243123, "learning_rate": 3.917548267567657e-06, "loss": 0.0423, "step": 62475 }, { "epoch": 0.2607004865185136, "grad_norm": 0.5187568243966298, "learning_rate": 3.917391509831138e-06, "loss": 0.0305, "step": 62480 }, { "epoch": 0.2607213492335039, "grad_norm": 0.8329468560060884, "learning_rate": 3.917234770910739e-06, "loss": 0.0341, "step": 62485 }, { "epoch": 0.2607422119484941, "grad_norm": 1.237075784975427, "learning_rate": 3.917078050802699e-06, "loss": 0.0329, "step": 62490 }, { "epoch": 0.2607630746634844, "grad_norm": 0.8089291502981704, "learning_rate": 3.916921349503254e-06, "loss": 0.0246, "step": 62495 }, { "epoch": 0.26078393737847466, "grad_norm": 0.6405101845169459, "learning_rate": 3.916764667008641e-06, "loss": 0.041, "step": 62500 }, { "epoch": 0.26080480009346496, "grad_norm": 0.7718925757719037, "learning_rate": 3.916608003315101e-06, "loss": 0.0361, "step": 62505 }, { "epoch": 0.26082566280845526, "grad_norm": 0.8025645686741736, "learning_rate": 3.916451358418873e-06, "loss": 0.0317, "step": 62510 }, { "epoch": 0.2608465255234455, "grad_norm": 0.9943390916254491, "learning_rate": 3.9162947323161974e-06, "loss": 0.0481, "step": 62515 }, { "epoch": 0.2608673882384358, "grad_norm": 0.9249163943092674, "learning_rate": 3.916138125003319e-06, "loss": 0.0314, "step": 62520 }, { "epoch": 0.26088825095342605, "grad_norm": 1.1973316654260475, "learning_rate": 3.915981536476481e-06, "loss": 0.0322, "step": 62525 }, { "epoch": 0.26090911366841635, "grad_norm": 0.8927814242827483, "learning_rate": 3.915824966731925e-06, "loss": 0.0305, "step": 62530 }, { "epoch": 0.26092997638340665, "grad_norm": 0.961400225127417, "learning_rate": 3.915668415765901e-06, "loss": 0.0342, "step": 62535 }, { "epoch": 0.2609508390983969, "grad_norm": 1.0555551928889142, "learning_rate": 3.91551188357465e-06, "loss": 0.0367, "step": 62540 }, { "epoch": 0.2609717018133872, "grad_norm": 0.8141540247935636, "learning_rate": 3.915355370154425e-06, "loss": 0.0303, "step": 62545 }, { "epoch": 0.2609925645283775, "grad_norm": 0.5509179949300481, "learning_rate": 3.91519887550147e-06, "loss": 0.0247, "step": 62550 }, { "epoch": 0.26101342724336773, "grad_norm": 0.4844990685974799, "learning_rate": 3.915042399612039e-06, "loss": 0.0319, "step": 62555 }, { "epoch": 0.26103428995835803, "grad_norm": 0.9022291694600396, "learning_rate": 3.914885942482379e-06, "loss": 0.0383, "step": 62560 }, { "epoch": 0.2610551526733483, "grad_norm": 0.6538896400277043, "learning_rate": 3.914729504108745e-06, "loss": 0.0317, "step": 62565 }, { "epoch": 0.2610760153883386, "grad_norm": 1.2623949642012142, "learning_rate": 3.914573084487387e-06, "loss": 0.0423, "step": 62570 }, { "epoch": 0.2610968781033289, "grad_norm": 0.7011871664352245, "learning_rate": 3.914416683614561e-06, "loss": 0.0249, "step": 62575 }, { "epoch": 0.2611177408183191, "grad_norm": 1.6386558702908853, "learning_rate": 3.914260301486521e-06, "loss": 0.0305, "step": 62580 }, { "epoch": 0.2611386035333094, "grad_norm": 1.127428538766846, "learning_rate": 3.914103938099524e-06, "loss": 0.0327, "step": 62585 }, { "epoch": 0.26115946624829967, "grad_norm": 0.6058707355751312, "learning_rate": 3.913947593449825e-06, "loss": 0.0362, "step": 62590 }, { "epoch": 0.26118032896328996, "grad_norm": 1.028529798725281, "learning_rate": 3.9137912675336844e-06, "loss": 0.0285, "step": 62595 }, { "epoch": 0.26120119167828026, "grad_norm": 0.8543560000670326, "learning_rate": 3.91363496034736e-06, "loss": 0.0316, "step": 62600 }, { "epoch": 0.2612220543932705, "grad_norm": 0.7854909106433758, "learning_rate": 3.913478671887113e-06, "loss": 0.0357, "step": 62605 }, { "epoch": 0.2612429171082608, "grad_norm": 0.7642801181542452, "learning_rate": 3.913322402149202e-06, "loss": 0.0322, "step": 62610 }, { "epoch": 0.26126377982325105, "grad_norm": 0.8898433573067587, "learning_rate": 3.913166151129893e-06, "loss": 0.0432, "step": 62615 }, { "epoch": 0.26128464253824135, "grad_norm": 0.7264979124439146, "learning_rate": 3.9130099188254466e-06, "loss": 0.0281, "step": 62620 }, { "epoch": 0.26130550525323165, "grad_norm": 0.7727710216145469, "learning_rate": 3.912853705232129e-06, "loss": 0.0345, "step": 62625 }, { "epoch": 0.2613263679682219, "grad_norm": 1.0364816256678102, "learning_rate": 3.912697510346205e-06, "loss": 0.0261, "step": 62630 }, { "epoch": 0.2613472306832122, "grad_norm": 0.5631380538773074, "learning_rate": 3.9125413341639395e-06, "loss": 0.033, "step": 62635 }, { "epoch": 0.2613680933982025, "grad_norm": 0.8205836432814025, "learning_rate": 3.912385176681601e-06, "loss": 0.0373, "step": 62640 }, { "epoch": 0.26138895611319274, "grad_norm": 0.8223678097336363, "learning_rate": 3.912229037895459e-06, "loss": 0.0375, "step": 62645 }, { "epoch": 0.26140981882818304, "grad_norm": 0.8682391588917118, "learning_rate": 3.912072917801783e-06, "loss": 0.0392, "step": 62650 }, { "epoch": 0.2614306815431733, "grad_norm": 0.635234434661812, "learning_rate": 3.9119168163968415e-06, "loss": 0.0366, "step": 62655 }, { "epoch": 0.2614515442581636, "grad_norm": 0.4990564069397744, "learning_rate": 3.911760733676909e-06, "loss": 0.0308, "step": 62660 }, { "epoch": 0.2614724069731539, "grad_norm": 0.9079078440661037, "learning_rate": 3.911604669638256e-06, "loss": 0.0318, "step": 62665 }, { "epoch": 0.2614932696881441, "grad_norm": 0.6924115299350623, "learning_rate": 3.911448624277157e-06, "loss": 0.033, "step": 62670 }, { "epoch": 0.2615141324031344, "grad_norm": 0.9148180599637711, "learning_rate": 3.911292597589888e-06, "loss": 0.0389, "step": 62675 }, { "epoch": 0.26153499511812467, "grad_norm": 0.5858517487132167, "learning_rate": 3.911136589572722e-06, "loss": 0.033, "step": 62680 }, { "epoch": 0.26155585783311497, "grad_norm": 0.7259853565033713, "learning_rate": 3.91098060022194e-06, "loss": 0.0327, "step": 62685 }, { "epoch": 0.26157672054810527, "grad_norm": 0.6414644510950437, "learning_rate": 3.910824629533816e-06, "loss": 0.0414, "step": 62690 }, { "epoch": 0.2615975832630955, "grad_norm": 0.9729126455629495, "learning_rate": 3.9106686775046295e-06, "loss": 0.0299, "step": 62695 }, { "epoch": 0.2616184459780858, "grad_norm": 0.7204826127054018, "learning_rate": 3.910512744130663e-06, "loss": 0.0357, "step": 62700 }, { "epoch": 0.26163930869307606, "grad_norm": 0.6460811288740815, "learning_rate": 3.910356829408195e-06, "loss": 0.0318, "step": 62705 }, { "epoch": 0.26166017140806636, "grad_norm": 0.8251131297891005, "learning_rate": 3.910200933333509e-06, "loss": 0.0289, "step": 62710 }, { "epoch": 0.26168103412305666, "grad_norm": 0.6340520031279636, "learning_rate": 3.9100450559028875e-06, "loss": 0.0245, "step": 62715 }, { "epoch": 0.2617018968380469, "grad_norm": 0.5949670767685485, "learning_rate": 3.909889197112615e-06, "loss": 0.0256, "step": 62720 }, { "epoch": 0.2617227595530372, "grad_norm": 1.0632230331387538, "learning_rate": 3.9097333569589754e-06, "loss": 0.0334, "step": 62725 }, { "epoch": 0.2617436222680275, "grad_norm": 0.8245776285143575, "learning_rate": 3.909577535438256e-06, "loss": 0.0301, "step": 62730 }, { "epoch": 0.26176448498301774, "grad_norm": 0.8652849800733752, "learning_rate": 3.909421732546745e-06, "loss": 0.033, "step": 62735 }, { "epoch": 0.26178534769800804, "grad_norm": 0.8704714436730717, "learning_rate": 3.9092659482807285e-06, "loss": 0.0339, "step": 62740 }, { "epoch": 0.2618062104129983, "grad_norm": 1.042452872693841, "learning_rate": 3.909110182636498e-06, "loss": 0.0334, "step": 62745 }, { "epoch": 0.2618270731279886, "grad_norm": 0.7656278232194592, "learning_rate": 3.9089544356103425e-06, "loss": 0.0291, "step": 62750 }, { "epoch": 0.2618479358429789, "grad_norm": 0.6803736331927613, "learning_rate": 3.908798707198553e-06, "loss": 0.0381, "step": 62755 }, { "epoch": 0.26186879855796913, "grad_norm": 0.9835771716876618, "learning_rate": 3.908642997397423e-06, "loss": 0.0379, "step": 62760 }, { "epoch": 0.26188966127295943, "grad_norm": 0.583319471374112, "learning_rate": 3.908487306203245e-06, "loss": 0.0262, "step": 62765 }, { "epoch": 0.2619105239879497, "grad_norm": 0.8221311714336931, "learning_rate": 3.908331633612314e-06, "loss": 0.0313, "step": 62770 }, { "epoch": 0.26193138670294, "grad_norm": 0.6744028024911696, "learning_rate": 3.908175979620927e-06, "loss": 0.0364, "step": 62775 }, { "epoch": 0.26195224941793027, "grad_norm": 0.8134595751857283, "learning_rate": 3.9080203442253775e-06, "loss": 0.0313, "step": 62780 }, { "epoch": 0.2619731121329205, "grad_norm": 1.1035967695423805, "learning_rate": 3.907864727421965e-06, "loss": 0.0408, "step": 62785 }, { "epoch": 0.2619939748479108, "grad_norm": 1.4547383404279886, "learning_rate": 3.907709129206988e-06, "loss": 0.0285, "step": 62790 }, { "epoch": 0.26201483756290106, "grad_norm": 0.8501838715187039, "learning_rate": 3.907553549576746e-06, "loss": 0.0249, "step": 62795 }, { "epoch": 0.26203570027789136, "grad_norm": 1.1529102317996005, "learning_rate": 3.907397988527539e-06, "loss": 0.0301, "step": 62800 }, { "epoch": 0.26205656299288166, "grad_norm": 0.6176516286287685, "learning_rate": 3.90724244605567e-06, "loss": 0.0395, "step": 62805 }, { "epoch": 0.2620774257078719, "grad_norm": 0.4124434139324788, "learning_rate": 3.907086922157441e-06, "loss": 0.0692, "step": 62810 }, { "epoch": 0.2620982884228622, "grad_norm": 0.7184432511117906, "learning_rate": 3.906931416829155e-06, "loss": 0.0359, "step": 62815 }, { "epoch": 0.2621191511378525, "grad_norm": 0.5717002482214538, "learning_rate": 3.906775930067118e-06, "loss": 0.0257, "step": 62820 }, { "epoch": 0.26214001385284275, "grad_norm": 1.2232196089680794, "learning_rate": 3.906620461867636e-06, "loss": 0.0427, "step": 62825 }, { "epoch": 0.26216087656783305, "grad_norm": 0.5891120907298271, "learning_rate": 3.9064650122270145e-06, "loss": 0.031, "step": 62830 }, { "epoch": 0.2621817392828233, "grad_norm": 0.6443406876828136, "learning_rate": 3.906309581141562e-06, "loss": 0.0418, "step": 62835 }, { "epoch": 0.2622026019978136, "grad_norm": 0.49001949351784907, "learning_rate": 3.906154168607589e-06, "loss": 0.0243, "step": 62840 }, { "epoch": 0.2622234647128039, "grad_norm": 0.4289029833614735, "learning_rate": 3.905998774621403e-06, "loss": 0.0377, "step": 62845 }, { "epoch": 0.26224432742779413, "grad_norm": 1.0772524181344718, "learning_rate": 3.905843399179316e-06, "loss": 0.0366, "step": 62850 }, { "epoch": 0.26226519014278443, "grad_norm": 0.8405918008242849, "learning_rate": 3.90568804227764e-06, "loss": 0.0266, "step": 62855 }, { "epoch": 0.2622860528577747, "grad_norm": 0.5384224002068722, "learning_rate": 3.905532703912688e-06, "loss": 0.0256, "step": 62860 }, { "epoch": 0.262306915572765, "grad_norm": 1.322557223354447, "learning_rate": 3.905377384080775e-06, "loss": 0.0312, "step": 62865 }, { "epoch": 0.2623277782877553, "grad_norm": 1.0798094731834078, "learning_rate": 3.905222082778214e-06, "loss": 0.0361, "step": 62870 }, { "epoch": 0.2623486410027455, "grad_norm": 0.865258534949156, "learning_rate": 3.905066800001322e-06, "loss": 0.0495, "step": 62875 }, { "epoch": 0.2623695037177358, "grad_norm": 0.8869969290140677, "learning_rate": 3.904911535746417e-06, "loss": 0.0353, "step": 62880 }, { "epoch": 0.26239036643272606, "grad_norm": 1.626142917492057, "learning_rate": 3.904756290009816e-06, "loss": 0.0328, "step": 62885 }, { "epoch": 0.26241122914771636, "grad_norm": 0.7158896768786638, "learning_rate": 3.904601062787839e-06, "loss": 0.0323, "step": 62890 }, { "epoch": 0.26243209186270666, "grad_norm": 1.016400787483309, "learning_rate": 3.904445854076806e-06, "loss": 0.0274, "step": 62895 }, { "epoch": 0.2624529545776969, "grad_norm": 0.7085128255340326, "learning_rate": 3.904290663873039e-06, "loss": 0.0914, "step": 62900 }, { "epoch": 0.2624738172926872, "grad_norm": 0.8269242529640658, "learning_rate": 3.904135492172858e-06, "loss": 0.0294, "step": 62905 }, { "epoch": 0.2624946800076775, "grad_norm": 0.7013487813501876, "learning_rate": 3.903980338972588e-06, "loss": 0.0325, "step": 62910 }, { "epoch": 0.26251554272266775, "grad_norm": 0.50208809770866, "learning_rate": 3.903825204268553e-06, "loss": 0.0242, "step": 62915 }, { "epoch": 0.26253640543765805, "grad_norm": 0.6972383645103998, "learning_rate": 3.903670088057078e-06, "loss": 0.0259, "step": 62920 }, { "epoch": 0.2625572681526483, "grad_norm": 0.7154409667955249, "learning_rate": 3.90351499033449e-06, "loss": 0.0404, "step": 62925 }, { "epoch": 0.2625781308676386, "grad_norm": 0.9506968847545504, "learning_rate": 3.9033599110971154e-06, "loss": 0.0327, "step": 62930 }, { "epoch": 0.2625989935826289, "grad_norm": 0.6275253458026603, "learning_rate": 3.903204850341283e-06, "loss": 0.0234, "step": 62935 }, { "epoch": 0.26261985629761914, "grad_norm": 0.8895136784364461, "learning_rate": 3.9030498080633225e-06, "loss": 0.0322, "step": 62940 }, { "epoch": 0.26264071901260944, "grad_norm": 0.8060265420856593, "learning_rate": 3.902894784259564e-06, "loss": 0.0382, "step": 62945 }, { "epoch": 0.2626615817275997, "grad_norm": 0.9281158869403565, "learning_rate": 3.90273977892634e-06, "loss": 0.0296, "step": 62950 }, { "epoch": 0.26268244444259, "grad_norm": 0.9202901863231631, "learning_rate": 3.9025847920599805e-06, "loss": 0.0262, "step": 62955 }, { "epoch": 0.2627033071575803, "grad_norm": 0.5027969897405942, "learning_rate": 3.902429823656822e-06, "loss": 0.0379, "step": 62960 }, { "epoch": 0.2627241698725705, "grad_norm": 0.8557210128366353, "learning_rate": 3.902274873713196e-06, "loss": 0.0328, "step": 62965 }, { "epoch": 0.2627450325875608, "grad_norm": 1.0315400014618643, "learning_rate": 3.90211994222544e-06, "loss": 0.0401, "step": 62970 }, { "epoch": 0.26276589530255107, "grad_norm": 1.0258634087742717, "learning_rate": 3.9019650291898906e-06, "loss": 0.0353, "step": 62975 }, { "epoch": 0.26278675801754137, "grad_norm": 0.7816183996364758, "learning_rate": 3.901810134602884e-06, "loss": 0.0209, "step": 62980 }, { "epoch": 0.26280762073253167, "grad_norm": 0.5579252714826238, "learning_rate": 3.901655258460762e-06, "loss": 0.0269, "step": 62985 }, { "epoch": 0.2628284834475219, "grad_norm": 0.8068911217464395, "learning_rate": 3.901500400759859e-06, "loss": 0.0301, "step": 62990 }, { "epoch": 0.2628493461625122, "grad_norm": 0.5467598059331535, "learning_rate": 3.9013455614965195e-06, "loss": 0.0202, "step": 62995 }, { "epoch": 0.2628702088775025, "grad_norm": 1.1070020379089416, "learning_rate": 3.901190740667084e-06, "loss": 0.0183, "step": 63000 }, { "epoch": 0.26289107159249275, "grad_norm": 0.6852199851889419, "learning_rate": 3.901035938267895e-06, "loss": 0.0248, "step": 63005 }, { "epoch": 0.26291193430748305, "grad_norm": 1.0693938669536476, "learning_rate": 3.900881154295296e-06, "loss": 0.0395, "step": 63010 }, { "epoch": 0.2629327970224733, "grad_norm": 0.8546804446113192, "learning_rate": 3.900726388745632e-06, "loss": 0.0298, "step": 63015 }, { "epoch": 0.2629536597374636, "grad_norm": 0.8844711735472409, "learning_rate": 3.900571641615249e-06, "loss": 0.03, "step": 63020 }, { "epoch": 0.2629745224524539, "grad_norm": 0.9887163483468928, "learning_rate": 3.900416912900494e-06, "loss": 0.0382, "step": 63025 }, { "epoch": 0.26299538516744414, "grad_norm": 0.6463825524193015, "learning_rate": 3.900262202597713e-06, "loss": 0.025, "step": 63030 }, { "epoch": 0.26301624788243444, "grad_norm": 0.8132288396204229, "learning_rate": 3.900107510703256e-06, "loss": 0.0275, "step": 63035 }, { "epoch": 0.2630371105974247, "grad_norm": 0.8153348054176482, "learning_rate": 3.8999528372134725e-06, "loss": 0.0337, "step": 63040 }, { "epoch": 0.263057973312415, "grad_norm": 0.5928632096030201, "learning_rate": 3.899798182124714e-06, "loss": 0.0283, "step": 63045 }, { "epoch": 0.2630788360274053, "grad_norm": 0.5647367468060176, "learning_rate": 3.89964354543333e-06, "loss": 0.0404, "step": 63050 }, { "epoch": 0.2630996987423955, "grad_norm": 1.0982404291687393, "learning_rate": 3.899488927135676e-06, "loss": 0.025, "step": 63055 }, { "epoch": 0.2631205614573858, "grad_norm": 1.5167233618647071, "learning_rate": 3.899334327228105e-06, "loss": 0.0264, "step": 63060 }, { "epoch": 0.26314142417237607, "grad_norm": 0.7685949042823413, "learning_rate": 3.8991797457069705e-06, "loss": 0.0276, "step": 63065 }, { "epoch": 0.26316228688736637, "grad_norm": 0.6954028997653899, "learning_rate": 3.89902518256863e-06, "loss": 0.0323, "step": 63070 }, { "epoch": 0.26318314960235667, "grad_norm": 0.7889369478527255, "learning_rate": 3.898870637809438e-06, "loss": 0.0298, "step": 63075 }, { "epoch": 0.2632040123173469, "grad_norm": 0.8536262913056528, "learning_rate": 3.898716111425756e-06, "loss": 0.0309, "step": 63080 }, { "epoch": 0.2632248750323372, "grad_norm": 1.1941090144522977, "learning_rate": 3.89856160341394e-06, "loss": 0.0318, "step": 63085 }, { "epoch": 0.2632457377473275, "grad_norm": 1.443036678778717, "learning_rate": 3.898407113770351e-06, "loss": 0.0379, "step": 63090 }, { "epoch": 0.26326660046231776, "grad_norm": 0.6133328069426279, "learning_rate": 3.898252642491349e-06, "loss": 0.0281, "step": 63095 }, { "epoch": 0.26328746317730806, "grad_norm": 0.5224693667295249, "learning_rate": 3.898098189573296e-06, "loss": 0.03, "step": 63100 }, { "epoch": 0.2633083258922983, "grad_norm": 0.9392242478079406, "learning_rate": 3.8979437550125556e-06, "loss": 0.0334, "step": 63105 }, { "epoch": 0.2633291886072886, "grad_norm": 1.1582779227745477, "learning_rate": 3.897789338805491e-06, "loss": 0.0297, "step": 63110 }, { "epoch": 0.2633500513222789, "grad_norm": 1.0114078243140021, "learning_rate": 3.897634940948468e-06, "loss": 0.0306, "step": 63115 }, { "epoch": 0.26337091403726914, "grad_norm": 0.7933316471480532, "learning_rate": 3.897480561437852e-06, "loss": 0.0326, "step": 63120 }, { "epoch": 0.26339177675225944, "grad_norm": 1.0022778061764979, "learning_rate": 3.8973262002700105e-06, "loss": 0.0311, "step": 63125 }, { "epoch": 0.2634126394672497, "grad_norm": 0.8479413234547193, "learning_rate": 3.897171857441309e-06, "loss": 0.0498, "step": 63130 }, { "epoch": 0.26343350218224, "grad_norm": 0.6346346220694609, "learning_rate": 3.897017532948119e-06, "loss": 0.0292, "step": 63135 }, { "epoch": 0.2634543648972303, "grad_norm": 0.9996754270897192, "learning_rate": 3.8968632267868116e-06, "loss": 0.0274, "step": 63140 }, { "epoch": 0.26347522761222053, "grad_norm": 0.8301527936458579, "learning_rate": 3.8967089389537535e-06, "loss": 0.0318, "step": 63145 }, { "epoch": 0.26349609032721083, "grad_norm": 0.607163443056083, "learning_rate": 3.896554669445319e-06, "loss": 0.0304, "step": 63150 }, { "epoch": 0.2635169530422011, "grad_norm": 0.7555475642171422, "learning_rate": 3.896400418257883e-06, "loss": 0.0256, "step": 63155 }, { "epoch": 0.2635378157571914, "grad_norm": 0.5254523278530617, "learning_rate": 3.896246185387816e-06, "loss": 0.0424, "step": 63160 }, { "epoch": 0.2635586784721817, "grad_norm": 0.6009176269562423, "learning_rate": 3.896091970831494e-06, "loss": 0.0236, "step": 63165 }, { "epoch": 0.2635795411871719, "grad_norm": 0.8812239483798102, "learning_rate": 3.895937774585294e-06, "loss": 0.0281, "step": 63170 }, { "epoch": 0.2636004039021622, "grad_norm": 1.0002748423858696, "learning_rate": 3.895783596645593e-06, "loss": 0.0353, "step": 63175 }, { "epoch": 0.2636212666171525, "grad_norm": 0.6947075652364956, "learning_rate": 3.895629437008768e-06, "loss": 0.0307, "step": 63180 }, { "epoch": 0.26364212933214276, "grad_norm": 0.7078149941995547, "learning_rate": 3.8954752956711975e-06, "loss": 0.0311, "step": 63185 }, { "epoch": 0.26366299204713306, "grad_norm": 1.013257482488838, "learning_rate": 3.895321172629263e-06, "loss": 0.033, "step": 63190 }, { "epoch": 0.2636838547621233, "grad_norm": 0.46803048003053244, "learning_rate": 3.895167067879346e-06, "loss": 0.0241, "step": 63195 }, { "epoch": 0.2637047174771136, "grad_norm": 0.5971888539963645, "learning_rate": 3.895012981417826e-06, "loss": 0.0251, "step": 63200 }, { "epoch": 0.2637255801921039, "grad_norm": 0.7859590463782617, "learning_rate": 3.894858913241088e-06, "loss": 0.0258, "step": 63205 }, { "epoch": 0.26374644290709415, "grad_norm": 0.5856158205226397, "learning_rate": 3.894704863345515e-06, "loss": 0.0178, "step": 63210 }, { "epoch": 0.26376730562208445, "grad_norm": 0.7737913566744661, "learning_rate": 3.894550831727491e-06, "loss": 0.0286, "step": 63215 }, { "epoch": 0.2637881683370747, "grad_norm": 0.9413996259324818, "learning_rate": 3.894396818383405e-06, "loss": 0.0293, "step": 63220 }, { "epoch": 0.263809031052065, "grad_norm": 0.8155322794013002, "learning_rate": 3.894242823309641e-06, "loss": 0.036, "step": 63225 }, { "epoch": 0.2638298937670553, "grad_norm": 0.7501257836161046, "learning_rate": 3.89408884650259e-06, "loss": 0.0383, "step": 63230 }, { "epoch": 0.26385075648204553, "grad_norm": 1.9818163145958894, "learning_rate": 3.893934887958638e-06, "loss": 0.0368, "step": 63235 }, { "epoch": 0.26387161919703583, "grad_norm": 0.9723840873653522, "learning_rate": 3.893780947674176e-06, "loss": 0.03, "step": 63240 }, { "epoch": 0.2638924819120261, "grad_norm": 0.9555358288895809, "learning_rate": 3.893627025645596e-06, "loss": 0.0352, "step": 63245 }, { "epoch": 0.2639133446270164, "grad_norm": 0.5387966668644933, "learning_rate": 3.89347312186929e-06, "loss": 0.0291, "step": 63250 }, { "epoch": 0.2639342073420067, "grad_norm": 1.1438488252345618, "learning_rate": 3.893319236341649e-06, "loss": 0.0348, "step": 63255 }, { "epoch": 0.2639550700569969, "grad_norm": 1.0731798187739165, "learning_rate": 3.893165369059068e-06, "loss": 0.0332, "step": 63260 }, { "epoch": 0.2639759327719872, "grad_norm": 0.7961774546272413, "learning_rate": 3.893011520017943e-06, "loss": 0.0403, "step": 63265 }, { "epoch": 0.2639967954869775, "grad_norm": 0.5609539319356976, "learning_rate": 3.892857689214668e-06, "loss": 0.0281, "step": 63270 }, { "epoch": 0.26401765820196776, "grad_norm": 0.6148234566524858, "learning_rate": 3.892703876645643e-06, "loss": 0.0251, "step": 63275 }, { "epoch": 0.26403852091695806, "grad_norm": 0.8509512194390997, "learning_rate": 3.8925500823072635e-06, "loss": 0.04, "step": 63280 }, { "epoch": 0.2640593836319483, "grad_norm": 1.43971282202732, "learning_rate": 3.892396306195929e-06, "loss": 0.0473, "step": 63285 }, { "epoch": 0.2640802463469386, "grad_norm": 0.7145521993734515, "learning_rate": 3.892242548308039e-06, "loss": 0.0327, "step": 63290 }, { "epoch": 0.2641011090619289, "grad_norm": 0.6895668950387781, "learning_rate": 3.892088808639995e-06, "loss": 0.0325, "step": 63295 }, { "epoch": 0.26412197177691915, "grad_norm": 0.8998056611909178, "learning_rate": 3.8919350871882e-06, "loss": 0.0287, "step": 63300 }, { "epoch": 0.26414283449190945, "grad_norm": 1.0721026583504096, "learning_rate": 3.891781383949055e-06, "loss": 0.0376, "step": 63305 }, { "epoch": 0.2641636972068997, "grad_norm": 0.7968253682074589, "learning_rate": 3.891627698918965e-06, "loss": 0.0354, "step": 63310 }, { "epoch": 0.26418455992189, "grad_norm": 0.8919731318245743, "learning_rate": 3.891474032094335e-06, "loss": 0.0316, "step": 63315 }, { "epoch": 0.2642054226368803, "grad_norm": 1.8675604414636156, "learning_rate": 3.891320383471571e-06, "loss": 0.0372, "step": 63320 }, { "epoch": 0.26422628535187054, "grad_norm": 0.500373803631166, "learning_rate": 3.891166753047079e-06, "loss": 0.0304, "step": 63325 }, { "epoch": 0.26424714806686084, "grad_norm": 0.8248053663644133, "learning_rate": 3.8910131408172685e-06, "loss": 0.0276, "step": 63330 }, { "epoch": 0.2642680107818511, "grad_norm": 0.9570680866810765, "learning_rate": 3.890859546778548e-06, "loss": 0.0295, "step": 63335 }, { "epoch": 0.2642888734968414, "grad_norm": 1.2699310965718098, "learning_rate": 3.890705970927325e-06, "loss": 0.0468, "step": 63340 }, { "epoch": 0.2643097362118317, "grad_norm": 0.6203085222232014, "learning_rate": 3.890552413260013e-06, "loss": 0.0301, "step": 63345 }, { "epoch": 0.2643305989268219, "grad_norm": 1.2684751821055693, "learning_rate": 3.890398873773024e-06, "loss": 0.0229, "step": 63350 }, { "epoch": 0.2643514616418122, "grad_norm": 0.665908519723836, "learning_rate": 3.8902453524627695e-06, "loss": 0.0258, "step": 63355 }, { "epoch": 0.2643723243568025, "grad_norm": 0.7670462802458179, "learning_rate": 3.8900918493256645e-06, "loss": 0.0317, "step": 63360 }, { "epoch": 0.26439318707179277, "grad_norm": 1.2528431065526555, "learning_rate": 3.889938364358123e-06, "loss": 0.0392, "step": 63365 }, { "epoch": 0.26441404978678307, "grad_norm": 0.888154554148472, "learning_rate": 3.88978489755656e-06, "loss": 0.0343, "step": 63370 }, { "epoch": 0.2644349125017733, "grad_norm": 0.9012093019059549, "learning_rate": 3.889631448917396e-06, "loss": 0.0276, "step": 63375 }, { "epoch": 0.2644557752167636, "grad_norm": 0.5310443452632465, "learning_rate": 3.889478018437045e-06, "loss": 0.0363, "step": 63380 }, { "epoch": 0.2644766379317539, "grad_norm": 0.8328988917631815, "learning_rate": 3.889324606111926e-06, "loss": 0.0332, "step": 63385 }, { "epoch": 0.26449750064674415, "grad_norm": 0.7694985216596223, "learning_rate": 3.8891712119384615e-06, "loss": 0.0276, "step": 63390 }, { "epoch": 0.26451836336173445, "grad_norm": 0.7152400462884435, "learning_rate": 3.88901783591307e-06, "loss": 0.0367, "step": 63395 }, { "epoch": 0.2645392260767247, "grad_norm": 1.1739140257959515, "learning_rate": 3.888864478032174e-06, "loss": 0.0316, "step": 63400 }, { "epoch": 0.264560088791715, "grad_norm": 0.9146360339838658, "learning_rate": 3.888711138292196e-06, "loss": 0.03, "step": 63405 }, { "epoch": 0.2645809515067053, "grad_norm": 0.7294408009012756, "learning_rate": 3.888557816689561e-06, "loss": 0.029, "step": 63410 }, { "epoch": 0.26460181422169554, "grad_norm": 0.8212934402145733, "learning_rate": 3.888404513220692e-06, "loss": 0.0363, "step": 63415 }, { "epoch": 0.26462267693668584, "grad_norm": 0.9366663410241477, "learning_rate": 3.888251227882015e-06, "loss": 0.0384, "step": 63420 }, { "epoch": 0.2646435396516761, "grad_norm": 0.7877707484093863, "learning_rate": 3.8880979606699585e-06, "loss": 0.0331, "step": 63425 }, { "epoch": 0.2646644023666664, "grad_norm": 0.8050889489122383, "learning_rate": 3.887944711580949e-06, "loss": 0.0375, "step": 63430 }, { "epoch": 0.2646852650816567, "grad_norm": 0.7581805822507506, "learning_rate": 3.887791480611415e-06, "loss": 0.0261, "step": 63435 }, { "epoch": 0.26470612779664693, "grad_norm": 0.7701157765712678, "learning_rate": 3.887638267757786e-06, "loss": 0.0383, "step": 63440 }, { "epoch": 0.26472699051163723, "grad_norm": 1.3829393588632775, "learning_rate": 3.887485073016493e-06, "loss": 0.0334, "step": 63445 }, { "epoch": 0.2647478532266275, "grad_norm": 0.6977822622206622, "learning_rate": 3.887331896383968e-06, "loss": 0.0283, "step": 63450 }, { "epoch": 0.26476871594161777, "grad_norm": 0.8629226397457509, "learning_rate": 3.887178737856643e-06, "loss": 0.0298, "step": 63455 }, { "epoch": 0.26478957865660807, "grad_norm": 0.6562129645420175, "learning_rate": 3.8870255974309515e-06, "loss": 0.024, "step": 63460 }, { "epoch": 0.2648104413715983, "grad_norm": 1.1433262997054363, "learning_rate": 3.886872475103328e-06, "loss": 0.0317, "step": 63465 }, { "epoch": 0.2648313040865886, "grad_norm": 0.5963822685523211, "learning_rate": 3.88671937087021e-06, "loss": 0.0273, "step": 63470 }, { "epoch": 0.2648521668015789, "grad_norm": 1.0264875225266106, "learning_rate": 3.886566284728032e-06, "loss": 0.0378, "step": 63475 }, { "epoch": 0.26487302951656916, "grad_norm": 0.7050436041322007, "learning_rate": 3.886413216673231e-06, "loss": 0.0251, "step": 63480 }, { "epoch": 0.26489389223155946, "grad_norm": 0.8975089624467006, "learning_rate": 3.886260166702248e-06, "loss": 0.0303, "step": 63485 }, { "epoch": 0.2649147549465497, "grad_norm": 0.5849548478423511, "learning_rate": 3.88610713481152e-06, "loss": 0.0305, "step": 63490 }, { "epoch": 0.26493561766154, "grad_norm": 0.8119850552533194, "learning_rate": 3.885954120997489e-06, "loss": 0.0403, "step": 63495 }, { "epoch": 0.2649564803765303, "grad_norm": 1.0514216175107083, "learning_rate": 3.885801125256597e-06, "loss": 0.0331, "step": 63500 }, { "epoch": 0.26497734309152055, "grad_norm": 0.4516245696110116, "learning_rate": 3.8856481475852845e-06, "loss": 0.0414, "step": 63505 }, { "epoch": 0.26499820580651084, "grad_norm": 1.048332642390231, "learning_rate": 3.8854951879799975e-06, "loss": 0.0264, "step": 63510 }, { "epoch": 0.2650190685215011, "grad_norm": 0.7701040779299377, "learning_rate": 3.8853422464371775e-06, "loss": 0.0303, "step": 63515 }, { "epoch": 0.2650399312364914, "grad_norm": 0.7137406213426812, "learning_rate": 3.885189322953272e-06, "loss": 0.0411, "step": 63520 }, { "epoch": 0.2650607939514817, "grad_norm": 0.883099363758844, "learning_rate": 3.885036417524726e-06, "loss": 0.0252, "step": 63525 }, { "epoch": 0.26508165666647193, "grad_norm": 0.5372215948514725, "learning_rate": 3.8848835301479886e-06, "loss": 0.0312, "step": 63530 }, { "epoch": 0.26510251938146223, "grad_norm": 0.7523514421836026, "learning_rate": 3.884730660819506e-06, "loss": 0.0478, "step": 63535 }, { "epoch": 0.26512338209645253, "grad_norm": 0.7686313596705124, "learning_rate": 3.884577809535729e-06, "loss": 0.04, "step": 63540 }, { "epoch": 0.2651442448114428, "grad_norm": 2.3950418284642616, "learning_rate": 3.884424976293107e-06, "loss": 0.0567, "step": 63545 }, { "epoch": 0.2651651075264331, "grad_norm": 0.7596581127389403, "learning_rate": 3.884272161088093e-06, "loss": 0.0299, "step": 63550 }, { "epoch": 0.2651859702414233, "grad_norm": 1.4924209156697061, "learning_rate": 3.884119363917137e-06, "loss": 0.0399, "step": 63555 }, { "epoch": 0.2652068329564136, "grad_norm": 0.7560143016035442, "learning_rate": 3.883966584776692e-06, "loss": 0.0358, "step": 63560 }, { "epoch": 0.2652276956714039, "grad_norm": 0.5727218011099727, "learning_rate": 3.883813823663215e-06, "loss": 0.0356, "step": 63565 }, { "epoch": 0.26524855838639416, "grad_norm": 1.0210089620896392, "learning_rate": 3.883661080573159e-06, "loss": 0.0284, "step": 63570 }, { "epoch": 0.26526942110138446, "grad_norm": 1.0885898889737986, "learning_rate": 3.883508355502979e-06, "loss": 0.0268, "step": 63575 }, { "epoch": 0.2652902838163747, "grad_norm": 0.3276056765315007, "learning_rate": 3.883355648449137e-06, "loss": 0.0223, "step": 63580 }, { "epoch": 0.265311146531365, "grad_norm": 0.4134135225045591, "learning_rate": 3.883202959408086e-06, "loss": 0.0247, "step": 63585 }, { "epoch": 0.2653320092463553, "grad_norm": 0.6767603952135024, "learning_rate": 3.883050288376287e-06, "loss": 0.0339, "step": 63590 }, { "epoch": 0.26535287196134555, "grad_norm": 0.5417113447592456, "learning_rate": 3.8828976353501995e-06, "loss": 0.0299, "step": 63595 }, { "epoch": 0.26537373467633585, "grad_norm": 0.4259199069437646, "learning_rate": 3.8827450003262854e-06, "loss": 0.0378, "step": 63600 }, { "epoch": 0.2653945973913261, "grad_norm": 0.881530305099463, "learning_rate": 3.8825923833010075e-06, "loss": 0.0311, "step": 63605 }, { "epoch": 0.2654154601063164, "grad_norm": 0.8097502960657098, "learning_rate": 3.882439784270826e-06, "loss": 0.0418, "step": 63610 }, { "epoch": 0.2654363228213067, "grad_norm": 0.830194004503973, "learning_rate": 3.882287203232207e-06, "loss": 0.0393, "step": 63615 }, { "epoch": 0.26545718553629694, "grad_norm": 1.0179555621674121, "learning_rate": 3.882134640181614e-06, "loss": 0.029, "step": 63620 }, { "epoch": 0.26547804825128724, "grad_norm": 1.186902925424778, "learning_rate": 3.881982095115514e-06, "loss": 0.0431, "step": 63625 }, { "epoch": 0.26549891096627753, "grad_norm": 0.7097988934473258, "learning_rate": 3.881829568030373e-06, "loss": 0.026, "step": 63630 }, { "epoch": 0.2655197736812678, "grad_norm": 0.8816659738129475, "learning_rate": 3.881677058922661e-06, "loss": 0.0379, "step": 63635 }, { "epoch": 0.2655406363962581, "grad_norm": 0.9186281325494231, "learning_rate": 3.881524567788845e-06, "loss": 0.0351, "step": 63640 }, { "epoch": 0.2655614991112483, "grad_norm": 0.7633071911928646, "learning_rate": 3.881372094625394e-06, "loss": 0.0266, "step": 63645 }, { "epoch": 0.2655823618262386, "grad_norm": 0.6306877536483284, "learning_rate": 3.881219639428779e-06, "loss": 0.0311, "step": 63650 }, { "epoch": 0.2656032245412289, "grad_norm": 0.9704689789808797, "learning_rate": 3.881067202195473e-06, "loss": 0.0256, "step": 63655 }, { "epoch": 0.26562408725621917, "grad_norm": 0.6643494434009537, "learning_rate": 3.880914782921949e-06, "loss": 0.0331, "step": 63660 }, { "epoch": 0.26564494997120947, "grad_norm": 1.049864391586234, "learning_rate": 3.880762381604678e-06, "loss": 0.0353, "step": 63665 }, { "epoch": 0.2656658126861997, "grad_norm": 1.1504653199426458, "learning_rate": 3.880609998240137e-06, "loss": 0.031, "step": 63670 }, { "epoch": 0.26568667540119, "grad_norm": 0.6742789906531548, "learning_rate": 3.880457632824801e-06, "loss": 0.0292, "step": 63675 }, { "epoch": 0.2657075381161803, "grad_norm": 0.8556988632402084, "learning_rate": 3.880305285355147e-06, "loss": 0.0332, "step": 63680 }, { "epoch": 0.26572840083117055, "grad_norm": 0.8976101463707495, "learning_rate": 3.8801529558276506e-06, "loss": 0.042, "step": 63685 }, { "epoch": 0.26574926354616085, "grad_norm": 0.8709124635007759, "learning_rate": 3.8800006442387925e-06, "loss": 0.0323, "step": 63690 }, { "epoch": 0.2657701262611511, "grad_norm": 1.08310068873852, "learning_rate": 3.879848350585051e-06, "loss": 0.0332, "step": 63695 }, { "epoch": 0.2657909889761414, "grad_norm": 0.6906392890235742, "learning_rate": 3.8796960748629074e-06, "loss": 0.023, "step": 63700 }, { "epoch": 0.2658118516911317, "grad_norm": 0.654684400070575, "learning_rate": 3.8795438170688425e-06, "loss": 0.0312, "step": 63705 }, { "epoch": 0.26583271440612194, "grad_norm": 1.5196904596455325, "learning_rate": 3.879391577199338e-06, "loss": 0.0426, "step": 63710 }, { "epoch": 0.26585357712111224, "grad_norm": 0.7564172751261588, "learning_rate": 3.879239355250877e-06, "loss": 0.0287, "step": 63715 }, { "epoch": 0.26587443983610254, "grad_norm": 0.8453867435647539, "learning_rate": 3.879087151219946e-06, "loss": 0.0302, "step": 63720 }, { "epoch": 0.2658953025510928, "grad_norm": 0.5559455017680126, "learning_rate": 3.87893496510303e-06, "loss": 0.0234, "step": 63725 }, { "epoch": 0.2659161652660831, "grad_norm": 0.7262694512148834, "learning_rate": 3.878782796896612e-06, "loss": 0.0314, "step": 63730 }, { "epoch": 0.2659370279810733, "grad_norm": 0.9203027607947485, "learning_rate": 3.878630646597183e-06, "loss": 0.0268, "step": 63735 }, { "epoch": 0.2659578906960636, "grad_norm": 0.7530337772090228, "learning_rate": 3.878478514201229e-06, "loss": 0.0286, "step": 63740 }, { "epoch": 0.2659787534110539, "grad_norm": 0.5661689695424289, "learning_rate": 3.878326399705239e-06, "loss": 0.0295, "step": 63745 }, { "epoch": 0.26599961612604417, "grad_norm": 1.24570339654288, "learning_rate": 3.878174303105705e-06, "loss": 0.0425, "step": 63750 }, { "epoch": 0.26602047884103447, "grad_norm": 0.73832466587335, "learning_rate": 3.878022224399116e-06, "loss": 0.032, "step": 63755 }, { "epoch": 0.2660413415560247, "grad_norm": 0.578763834377482, "learning_rate": 3.877870163581964e-06, "loss": 0.027, "step": 63760 }, { "epoch": 0.266062204271015, "grad_norm": 1.0232906398951298, "learning_rate": 3.877718120650744e-06, "loss": 0.0405, "step": 63765 }, { "epoch": 0.2660830669860053, "grad_norm": 0.5668675877632094, "learning_rate": 3.877566095601948e-06, "loss": 0.0258, "step": 63770 }, { "epoch": 0.26610392970099556, "grad_norm": 1.2001020880737188, "learning_rate": 3.877414088432071e-06, "loss": 0.0312, "step": 63775 }, { "epoch": 0.26612479241598586, "grad_norm": 0.8567816281726078, "learning_rate": 3.877262099137611e-06, "loss": 0.0394, "step": 63780 }, { "epoch": 0.2661456551309761, "grad_norm": 0.5232923015714287, "learning_rate": 3.877110127715062e-06, "loss": 0.0302, "step": 63785 }, { "epoch": 0.2661665178459664, "grad_norm": 0.1756012377237956, "learning_rate": 3.876958174160924e-06, "loss": 0.0248, "step": 63790 }, { "epoch": 0.2661873805609567, "grad_norm": 1.1155661025709758, "learning_rate": 3.876806238471694e-06, "loss": 0.0397, "step": 63795 }, { "epoch": 0.26620824327594694, "grad_norm": 0.6151399277549924, "learning_rate": 3.876654320643872e-06, "loss": 0.033, "step": 63800 }, { "epoch": 0.26622910599093724, "grad_norm": 0.44079526017960746, "learning_rate": 3.8765024206739595e-06, "loss": 0.0291, "step": 63805 }, { "epoch": 0.26624996870592754, "grad_norm": 0.9582294028194196, "learning_rate": 3.876350538558458e-06, "loss": 0.0287, "step": 63810 }, { "epoch": 0.2662708314209178, "grad_norm": 0.9657261188159054, "learning_rate": 3.87619867429387e-06, "loss": 0.0412, "step": 63815 }, { "epoch": 0.2662916941359081, "grad_norm": 1.4183102896843969, "learning_rate": 3.876046827876698e-06, "loss": 0.0283, "step": 63820 }, { "epoch": 0.26631255685089833, "grad_norm": 0.6546039754689943, "learning_rate": 3.875894999303448e-06, "loss": 0.0298, "step": 63825 }, { "epoch": 0.26633341956588863, "grad_norm": 0.7223281874005708, "learning_rate": 3.875743188570624e-06, "loss": 0.0293, "step": 63830 }, { "epoch": 0.26635428228087893, "grad_norm": 1.0610797966301868, "learning_rate": 3.875591395674734e-06, "loss": 0.0352, "step": 63835 }, { "epoch": 0.2663751449958692, "grad_norm": 0.44153236802352086, "learning_rate": 3.875439620612284e-06, "loss": 0.0306, "step": 63840 }, { "epoch": 0.2663960077108595, "grad_norm": 0.7306206247899567, "learning_rate": 3.875287863379784e-06, "loss": 0.0336, "step": 63845 }, { "epoch": 0.2664168704258497, "grad_norm": 1.3022480275373352, "learning_rate": 3.875136123973741e-06, "loss": 0.0304, "step": 63850 }, { "epoch": 0.26643773314084, "grad_norm": 0.6561848830587353, "learning_rate": 3.874984402390666e-06, "loss": 0.0312, "step": 63855 }, { "epoch": 0.2664585958558303, "grad_norm": 1.1847753313040366, "learning_rate": 3.874832698627072e-06, "loss": 0.0407, "step": 63860 }, { "epoch": 0.26647945857082056, "grad_norm": 0.7505542875412518, "learning_rate": 3.874681012679468e-06, "loss": 0.0234, "step": 63865 }, { "epoch": 0.26650032128581086, "grad_norm": 0.7738646535880943, "learning_rate": 3.874529344544371e-06, "loss": 0.0347, "step": 63870 }, { "epoch": 0.2665211840008011, "grad_norm": 0.9548400460134211, "learning_rate": 3.874377694218292e-06, "loss": 0.0303, "step": 63875 }, { "epoch": 0.2665420467157914, "grad_norm": 0.6980701974227617, "learning_rate": 3.8742260616977464e-06, "loss": 0.0284, "step": 63880 }, { "epoch": 0.2665629094307817, "grad_norm": 1.3167977855378306, "learning_rate": 3.874074446979251e-06, "loss": 0.0341, "step": 63885 }, { "epoch": 0.26658377214577195, "grad_norm": 0.48705238960444125, "learning_rate": 3.873922850059322e-06, "loss": 0.0332, "step": 63890 }, { "epoch": 0.26660463486076225, "grad_norm": 0.8576253915565383, "learning_rate": 3.873771270934479e-06, "loss": 0.0283, "step": 63895 }, { "epoch": 0.26662549757575255, "grad_norm": 1.1552278575565849, "learning_rate": 3.873619709601239e-06, "loss": 0.035, "step": 63900 }, { "epoch": 0.2666463602907428, "grad_norm": 0.9717330881484264, "learning_rate": 3.873468166056122e-06, "loss": 0.0337, "step": 63905 }, { "epoch": 0.2666672230057331, "grad_norm": 0.7186394310669809, "learning_rate": 3.873316640295649e-06, "loss": 0.0289, "step": 63910 }, { "epoch": 0.26668808572072333, "grad_norm": 0.6814164268907363, "learning_rate": 3.873165132316343e-06, "loss": 0.0338, "step": 63915 }, { "epoch": 0.26670894843571363, "grad_norm": 0.9126339896883198, "learning_rate": 3.873013642114724e-06, "loss": 0.0319, "step": 63920 }, { "epoch": 0.26672981115070393, "grad_norm": 0.9279132332216572, "learning_rate": 3.872862169687317e-06, "loss": 0.0287, "step": 63925 }, { "epoch": 0.2667506738656942, "grad_norm": 0.7613814609762518, "learning_rate": 3.872710715030648e-06, "loss": 0.0287, "step": 63930 }, { "epoch": 0.2667715365806845, "grad_norm": 0.735769893937476, "learning_rate": 3.872559278141241e-06, "loss": 0.0346, "step": 63935 }, { "epoch": 0.2667923992956747, "grad_norm": 0.8540638190735635, "learning_rate": 3.872407859015622e-06, "loss": 0.0373, "step": 63940 }, { "epoch": 0.266813262010665, "grad_norm": 0.7424948475202585, "learning_rate": 3.872256457650318e-06, "loss": 0.0256, "step": 63945 }, { "epoch": 0.2668341247256553, "grad_norm": 1.2170762896226208, "learning_rate": 3.872105074041859e-06, "loss": 0.0242, "step": 63950 }, { "epoch": 0.26685498744064556, "grad_norm": 0.8492870900138032, "learning_rate": 3.871953708186774e-06, "loss": 0.0323, "step": 63955 }, { "epoch": 0.26687585015563586, "grad_norm": 0.8630616511254995, "learning_rate": 3.871802360081592e-06, "loss": 0.022, "step": 63960 }, { "epoch": 0.2668967128706261, "grad_norm": 1.2839195085895494, "learning_rate": 3.871651029722847e-06, "loss": 0.028, "step": 63965 }, { "epoch": 0.2669175755856164, "grad_norm": 0.7406499973588311, "learning_rate": 3.871499717107067e-06, "loss": 0.027, "step": 63970 }, { "epoch": 0.2669384383006067, "grad_norm": 0.5307394458537749, "learning_rate": 3.871348422230789e-06, "loss": 0.0235, "step": 63975 }, { "epoch": 0.26695930101559695, "grad_norm": 1.0368147476285614, "learning_rate": 3.871197145090544e-06, "loss": 0.0441, "step": 63980 }, { "epoch": 0.26698016373058725, "grad_norm": 1.1129448005948854, "learning_rate": 3.871045885682869e-06, "loss": 0.0352, "step": 63985 }, { "epoch": 0.2670010264455775, "grad_norm": 0.8796499553335114, "learning_rate": 3.8708946440043e-06, "loss": 0.0277, "step": 63990 }, { "epoch": 0.2670218891605678, "grad_norm": 1.1457601710369656, "learning_rate": 3.870743420051372e-06, "loss": 0.0339, "step": 63995 }, { "epoch": 0.2670427518755581, "grad_norm": 1.0768867717666517, "learning_rate": 3.8705922138206244e-06, "loss": 0.0303, "step": 64000 }, { "epoch": 0.26706361459054834, "grad_norm": 1.1360147782434071, "learning_rate": 3.870441025308596e-06, "loss": 0.0348, "step": 64005 }, { "epoch": 0.26708447730553864, "grad_norm": 0.7045466790232241, "learning_rate": 3.8702898545118254e-06, "loss": 0.0342, "step": 64010 }, { "epoch": 0.26710534002052894, "grad_norm": 1.2302954286834233, "learning_rate": 3.870138701426855e-06, "loss": 0.0356, "step": 64015 }, { "epoch": 0.2671262027355192, "grad_norm": 0.5917199936731551, "learning_rate": 3.869987566050224e-06, "loss": 0.0289, "step": 64020 }, { "epoch": 0.2671470654505095, "grad_norm": 1.3679429042944418, "learning_rate": 3.869836448378478e-06, "loss": 0.0442, "step": 64025 }, { "epoch": 0.2671679281654997, "grad_norm": 1.0315966447906606, "learning_rate": 3.869685348408158e-06, "loss": 0.0265, "step": 64030 }, { "epoch": 0.26718879088049, "grad_norm": 0.6681369237094857, "learning_rate": 3.86953426613581e-06, "loss": 0.0302, "step": 64035 }, { "epoch": 0.2672096535954803, "grad_norm": 0.8986646449240543, "learning_rate": 3.869383201557979e-06, "loss": 0.029, "step": 64040 }, { "epoch": 0.26723051631047057, "grad_norm": 1.2220799596769285, "learning_rate": 3.869232154671211e-06, "loss": 0.0347, "step": 64045 }, { "epoch": 0.26725137902546087, "grad_norm": 0.7010394021281801, "learning_rate": 3.869081125472052e-06, "loss": 0.0356, "step": 64050 }, { "epoch": 0.2672722417404511, "grad_norm": 1.0219996082717584, "learning_rate": 3.868930113957054e-06, "loss": 0.0302, "step": 64055 }, { "epoch": 0.2672931044554414, "grad_norm": 0.8290208982460094, "learning_rate": 3.868779120122762e-06, "loss": 0.0226, "step": 64060 }, { "epoch": 0.2673139671704317, "grad_norm": 0.7488435957753481, "learning_rate": 3.868628143965728e-06, "loss": 0.0284, "step": 64065 }, { "epoch": 0.26733482988542195, "grad_norm": 0.5055103591306781, "learning_rate": 3.868477185482504e-06, "loss": 0.0288, "step": 64070 }, { "epoch": 0.26735569260041225, "grad_norm": 1.540469571496742, "learning_rate": 3.868326244669641e-06, "loss": 0.0295, "step": 64075 }, { "epoch": 0.2673765553154025, "grad_norm": 0.7816291774139061, "learning_rate": 3.868175321523692e-06, "loss": 0.0326, "step": 64080 }, { "epoch": 0.2673974180303928, "grad_norm": 0.8915270638122725, "learning_rate": 3.86802441604121e-06, "loss": 0.046, "step": 64085 }, { "epoch": 0.2674182807453831, "grad_norm": 1.1047757806239142, "learning_rate": 3.867873528218751e-06, "loss": 0.0386, "step": 64090 }, { "epoch": 0.26743914346037334, "grad_norm": 0.6982250240155912, "learning_rate": 3.867722658052871e-06, "loss": 0.0307, "step": 64095 }, { "epoch": 0.26746000617536364, "grad_norm": 0.8246848627878929, "learning_rate": 3.867571805540126e-06, "loss": 0.0356, "step": 64100 }, { "epoch": 0.26748086889035394, "grad_norm": 0.9843644573225839, "learning_rate": 3.867420970677074e-06, "loss": 0.0296, "step": 64105 }, { "epoch": 0.2675017316053442, "grad_norm": 1.0736254517662076, "learning_rate": 3.867270153460273e-06, "loss": 0.0341, "step": 64110 }, { "epoch": 0.2675225943203345, "grad_norm": 0.6465231291659498, "learning_rate": 3.867119353886284e-06, "loss": 0.0343, "step": 64115 }, { "epoch": 0.2675434570353247, "grad_norm": 0.8673993911786063, "learning_rate": 3.866968571951664e-06, "loss": 0.0354, "step": 64120 }, { "epoch": 0.267564319750315, "grad_norm": 0.3792517278813353, "learning_rate": 3.866817807652979e-06, "loss": 0.0272, "step": 64125 }, { "epoch": 0.2675851824653053, "grad_norm": 0.48985601630587994, "learning_rate": 3.866667060986788e-06, "loss": 0.0272, "step": 64130 }, { "epoch": 0.26760604518029557, "grad_norm": 0.9936640111987929, "learning_rate": 3.8665163319496554e-06, "loss": 0.0299, "step": 64135 }, { "epoch": 0.26762690789528587, "grad_norm": 0.7902961714018045, "learning_rate": 3.8663656205381465e-06, "loss": 0.0233, "step": 64140 }, { "epoch": 0.2676477706102761, "grad_norm": 0.8164617391819342, "learning_rate": 3.866214926748824e-06, "loss": 0.0292, "step": 64145 }, { "epoch": 0.2676686333252664, "grad_norm": 0.5512404875457798, "learning_rate": 3.8660642505782565e-06, "loss": 0.0272, "step": 64150 }, { "epoch": 0.2676894960402567, "grad_norm": 0.6900121698321287, "learning_rate": 3.86591359202301e-06, "loss": 0.0275, "step": 64155 }, { "epoch": 0.26771035875524696, "grad_norm": 1.4522244211030289, "learning_rate": 3.865762951079651e-06, "loss": 0.0353, "step": 64160 }, { "epoch": 0.26773122147023726, "grad_norm": 1.0327321591392034, "learning_rate": 3.865612327744751e-06, "loss": 0.0336, "step": 64165 }, { "epoch": 0.2677520841852275, "grad_norm": 0.950812966196307, "learning_rate": 3.8654617220148785e-06, "loss": 0.0437, "step": 64170 }, { "epoch": 0.2677729469002178, "grad_norm": 0.4977291825248882, "learning_rate": 3.865311133886605e-06, "loss": 0.0324, "step": 64175 }, { "epoch": 0.2677938096152081, "grad_norm": 0.6342131713688388, "learning_rate": 3.865160563356501e-06, "loss": 0.0249, "step": 64180 }, { "epoch": 0.26781467233019834, "grad_norm": 1.9273869722176482, "learning_rate": 3.86501001042114e-06, "loss": 0.035, "step": 64185 }, { "epoch": 0.26783553504518864, "grad_norm": 0.8789872897276932, "learning_rate": 3.864859475077095e-06, "loss": 0.0205, "step": 64190 }, { "epoch": 0.26785639776017894, "grad_norm": 1.3633608486673736, "learning_rate": 3.864708957320941e-06, "loss": 0.0445, "step": 64195 }, { "epoch": 0.2678772604751692, "grad_norm": 0.882468550161048, "learning_rate": 3.864558457149253e-06, "loss": 0.0295, "step": 64200 }, { "epoch": 0.2678981231901595, "grad_norm": 0.4824368559540919, "learning_rate": 3.864407974558607e-06, "loss": 0.0215, "step": 64205 }, { "epoch": 0.26791898590514973, "grad_norm": 1.048866948685728, "learning_rate": 3.864257509545583e-06, "loss": 0.03, "step": 64210 }, { "epoch": 0.26793984862014003, "grad_norm": 0.8779426328272234, "learning_rate": 3.864107062106756e-06, "loss": 0.0429, "step": 64215 }, { "epoch": 0.26796071133513033, "grad_norm": 0.4562619527100711, "learning_rate": 3.863956632238707e-06, "loss": 0.0323, "step": 64220 }, { "epoch": 0.2679815740501206, "grad_norm": 0.6543219210209879, "learning_rate": 3.863806219938014e-06, "loss": 0.0263, "step": 64225 }, { "epoch": 0.2680024367651109, "grad_norm": 0.9665903284337969, "learning_rate": 3.863655825201261e-06, "loss": 0.0341, "step": 64230 }, { "epoch": 0.2680232994801011, "grad_norm": 0.4973116054225536, "learning_rate": 3.863505448025029e-06, "loss": 0.0227, "step": 64235 }, { "epoch": 0.2680441621950914, "grad_norm": 3.3362138467416225, "learning_rate": 3.8633550884059e-06, "loss": 0.0376, "step": 64240 }, { "epoch": 0.2680650249100817, "grad_norm": 0.5540737137300166, "learning_rate": 3.863204746340458e-06, "loss": 0.029, "step": 64245 }, { "epoch": 0.26808588762507196, "grad_norm": 0.665872629316349, "learning_rate": 3.863054421825288e-06, "loss": 0.0297, "step": 64250 }, { "epoch": 0.26810675034006226, "grad_norm": 0.8461542552730518, "learning_rate": 3.862904114856976e-06, "loss": 0.0262, "step": 64255 }, { "epoch": 0.2681276130550525, "grad_norm": 0.9748861174519162, "learning_rate": 3.862753825432108e-06, "loss": 0.0389, "step": 64260 }, { "epoch": 0.2681484757700428, "grad_norm": 0.6187495725360762, "learning_rate": 3.862603553547272e-06, "loss": 0.0301, "step": 64265 }, { "epoch": 0.2681693384850331, "grad_norm": 0.770669043438193, "learning_rate": 3.862453299199057e-06, "loss": 0.0342, "step": 64270 }, { "epoch": 0.26819020120002335, "grad_norm": 0.6878697575475812, "learning_rate": 3.86230306238405e-06, "loss": 0.0289, "step": 64275 }, { "epoch": 0.26821106391501365, "grad_norm": 1.196427638533703, "learning_rate": 3.8621528430988456e-06, "loss": 0.0316, "step": 64280 }, { "epoch": 0.26823192663000395, "grad_norm": 0.43052214033459363, "learning_rate": 3.862002641340032e-06, "loss": 0.0328, "step": 64285 }, { "epoch": 0.2682527893449942, "grad_norm": 0.8901786690026752, "learning_rate": 3.861852457104201e-06, "loss": 0.0365, "step": 64290 }, { "epoch": 0.2682736520599845, "grad_norm": 1.4888737001974903, "learning_rate": 3.861702290387947e-06, "loss": 0.0397, "step": 64295 }, { "epoch": 0.26829451477497473, "grad_norm": 1.4696459193291656, "learning_rate": 3.8615521411878655e-06, "loss": 0.0387, "step": 64300 }, { "epoch": 0.26831537748996503, "grad_norm": 0.6826961766435612, "learning_rate": 3.861402009500548e-06, "loss": 0.0265, "step": 64305 }, { "epoch": 0.26833624020495533, "grad_norm": 0.9200868882575869, "learning_rate": 3.861251895322593e-06, "loss": 0.038, "step": 64310 }, { "epoch": 0.2683571029199456, "grad_norm": 1.084216072062962, "learning_rate": 3.861101798650596e-06, "loss": 0.0311, "step": 64315 }, { "epoch": 0.2683779656349359, "grad_norm": 1.2262325131744838, "learning_rate": 3.860951719481156e-06, "loss": 0.0345, "step": 64320 }, { "epoch": 0.2683988283499261, "grad_norm": 0.5125711355229902, "learning_rate": 3.8608016578108705e-06, "loss": 0.0329, "step": 64325 }, { "epoch": 0.2684196910649164, "grad_norm": 0.7320713149496901, "learning_rate": 3.86065161363634e-06, "loss": 0.0356, "step": 64330 }, { "epoch": 0.2684405537799067, "grad_norm": 0.6168890024949929, "learning_rate": 3.860501586954164e-06, "loss": 0.0245, "step": 64335 }, { "epoch": 0.26846141649489697, "grad_norm": 0.7091102351863732, "learning_rate": 3.860351577760945e-06, "loss": 0.0357, "step": 64340 }, { "epoch": 0.26848227920988726, "grad_norm": 0.7245854129404083, "learning_rate": 3.860201586053285e-06, "loss": 0.0312, "step": 64345 }, { "epoch": 0.2685031419248775, "grad_norm": 1.4520898569052925, "learning_rate": 3.860051611827788e-06, "loss": 0.0294, "step": 64350 }, { "epoch": 0.2685240046398678, "grad_norm": 1.4808740648758243, "learning_rate": 3.859901655081056e-06, "loss": 0.0346, "step": 64355 }, { "epoch": 0.2685448673548581, "grad_norm": 0.9584020600682177, "learning_rate": 3.859751715809697e-06, "loss": 0.0379, "step": 64360 }, { "epoch": 0.26856573006984835, "grad_norm": 0.8083308682650169, "learning_rate": 3.859601794010314e-06, "loss": 0.0388, "step": 64365 }, { "epoch": 0.26858659278483865, "grad_norm": 0.6001800129957132, "learning_rate": 3.859451889679517e-06, "loss": 0.0266, "step": 64370 }, { "epoch": 0.26860745549982895, "grad_norm": 1.0692739539258542, "learning_rate": 3.859302002813913e-06, "loss": 0.037, "step": 64375 }, { "epoch": 0.2686283182148192, "grad_norm": 1.201914839388998, "learning_rate": 3.859152133410111e-06, "loss": 0.0356, "step": 64380 }, { "epoch": 0.2686491809298095, "grad_norm": 0.8463949153082327, "learning_rate": 3.859002281464719e-06, "loss": 0.0358, "step": 64385 }, { "epoch": 0.26867004364479974, "grad_norm": 0.8877266320451873, "learning_rate": 3.858852446974349e-06, "loss": 0.0256, "step": 64390 }, { "epoch": 0.26869090635979004, "grad_norm": 0.7873014718858136, "learning_rate": 3.858702629935613e-06, "loss": 0.0281, "step": 64395 }, { "epoch": 0.26871176907478034, "grad_norm": 1.3320420261665102, "learning_rate": 3.858552830345123e-06, "loss": 0.0362, "step": 64400 }, { "epoch": 0.2687326317897706, "grad_norm": 0.6864427841061476, "learning_rate": 3.858403048199493e-06, "loss": 0.0308, "step": 64405 }, { "epoch": 0.2687534945047609, "grad_norm": 0.7795029713570437, "learning_rate": 3.858253283495336e-06, "loss": 0.0365, "step": 64410 }, { "epoch": 0.2687743572197511, "grad_norm": 0.8961065944980219, "learning_rate": 3.8581035362292695e-06, "loss": 0.0313, "step": 64415 }, { "epoch": 0.2687952199347414, "grad_norm": 1.675905637988219, "learning_rate": 3.857953806397907e-06, "loss": 0.034, "step": 64420 }, { "epoch": 0.2688160826497317, "grad_norm": 0.985988515077283, "learning_rate": 3.8578040939978676e-06, "loss": 0.0354, "step": 64425 }, { "epoch": 0.26883694536472197, "grad_norm": 0.27979302234684844, "learning_rate": 3.85765439902577e-06, "loss": 0.0329, "step": 64430 }, { "epoch": 0.26885780807971227, "grad_norm": 0.5394627408245651, "learning_rate": 3.85750472147823e-06, "loss": 0.0214, "step": 64435 }, { "epoch": 0.2688786707947025, "grad_norm": 1.110387045109909, "learning_rate": 3.85735506135187e-06, "loss": 0.0257, "step": 64440 }, { "epoch": 0.2688995335096928, "grad_norm": 1.0742253389656835, "learning_rate": 3.857205418643311e-06, "loss": 0.0316, "step": 64445 }, { "epoch": 0.2689203962246831, "grad_norm": 0.7406080844472379, "learning_rate": 3.857055793349174e-06, "loss": 0.0296, "step": 64450 }, { "epoch": 0.26894125893967336, "grad_norm": 1.0418550526440282, "learning_rate": 3.856906185466082e-06, "loss": 0.0329, "step": 64455 }, { "epoch": 0.26896212165466366, "grad_norm": 0.6677940728293493, "learning_rate": 3.8567565949906575e-06, "loss": 0.031, "step": 64460 }, { "epoch": 0.26898298436965395, "grad_norm": 0.6166581264023355, "learning_rate": 3.856607021919526e-06, "loss": 0.0323, "step": 64465 }, { "epoch": 0.2690038470846442, "grad_norm": 0.9810856901931306, "learning_rate": 3.856457466249311e-06, "loss": 0.0359, "step": 64470 }, { "epoch": 0.2690247097996345, "grad_norm": 1.7223586663071475, "learning_rate": 3.856307927976642e-06, "loss": 0.0371, "step": 64475 }, { "epoch": 0.26904557251462474, "grad_norm": 0.7021458203175082, "learning_rate": 3.856158407098145e-06, "loss": 0.0309, "step": 64480 }, { "epoch": 0.26906643522961504, "grad_norm": 0.6147705364720337, "learning_rate": 3.856008903610446e-06, "loss": 0.0284, "step": 64485 }, { "epoch": 0.26908729794460534, "grad_norm": 1.144405641500802, "learning_rate": 3.855859417510177e-06, "loss": 0.0229, "step": 64490 }, { "epoch": 0.2691081606595956, "grad_norm": 0.7425966212478924, "learning_rate": 3.855709948793966e-06, "loss": 0.0326, "step": 64495 }, { "epoch": 0.2691290233745859, "grad_norm": 1.0439283978592597, "learning_rate": 3.855560497458446e-06, "loss": 0.0312, "step": 64500 }, { "epoch": 0.26914988608957613, "grad_norm": 0.7539506586133531, "learning_rate": 3.855411063500245e-06, "loss": 0.0249, "step": 64505 }, { "epoch": 0.26917074880456643, "grad_norm": 0.8523924448932364, "learning_rate": 3.855261646915999e-06, "loss": 0.0439, "step": 64510 }, { "epoch": 0.26919161151955673, "grad_norm": 0.7198826047356222, "learning_rate": 3.855112247702342e-06, "loss": 0.0331, "step": 64515 }, { "epoch": 0.269212474234547, "grad_norm": 0.8306836352532914, "learning_rate": 3.854962865855905e-06, "loss": 0.0379, "step": 64520 }, { "epoch": 0.26923333694953727, "grad_norm": 0.7720146557052059, "learning_rate": 3.854813501373328e-06, "loss": 0.0311, "step": 64525 }, { "epoch": 0.2692541996645275, "grad_norm": 0.8525923967203578, "learning_rate": 3.854664154251245e-06, "loss": 0.0394, "step": 64530 }, { "epoch": 0.2692750623795178, "grad_norm": 1.108015204060077, "learning_rate": 3.8545148244862915e-06, "loss": 0.0375, "step": 64535 }, { "epoch": 0.2692959250945081, "grad_norm": 0.9895758096034584, "learning_rate": 3.854365512075109e-06, "loss": 0.0335, "step": 64540 }, { "epoch": 0.26931678780949836, "grad_norm": 0.6012308610737316, "learning_rate": 3.854216217014334e-06, "loss": 0.0281, "step": 64545 }, { "epoch": 0.26933765052448866, "grad_norm": 0.5546612651069548, "learning_rate": 3.854066939300608e-06, "loss": 0.0362, "step": 64550 }, { "epoch": 0.26935851323947896, "grad_norm": 0.7070946308987882, "learning_rate": 3.853917678930572e-06, "loss": 0.0296, "step": 64555 }, { "epoch": 0.2693793759544692, "grad_norm": 0.5864198427172918, "learning_rate": 3.853768435900867e-06, "loss": 0.0205, "step": 64560 }, { "epoch": 0.2694002386694595, "grad_norm": 1.001386083130817, "learning_rate": 3.853619210208135e-06, "loss": 0.0372, "step": 64565 }, { "epoch": 0.26942110138444975, "grad_norm": 0.9621665810407624, "learning_rate": 3.853470001849023e-06, "loss": 0.0276, "step": 64570 }, { "epoch": 0.26944196409944005, "grad_norm": 0.7116305667004854, "learning_rate": 3.853320810820172e-06, "loss": 0.028, "step": 64575 }, { "epoch": 0.26946282681443035, "grad_norm": 0.8099951593393231, "learning_rate": 3.853171637118229e-06, "loss": 0.032, "step": 64580 }, { "epoch": 0.2694836895294206, "grad_norm": 0.7909126060980566, "learning_rate": 3.853022480739839e-06, "loss": 0.0315, "step": 64585 }, { "epoch": 0.2695045522444109, "grad_norm": 0.5114056522274115, "learning_rate": 3.852873341681652e-06, "loss": 0.0391, "step": 64590 }, { "epoch": 0.26952541495940113, "grad_norm": 0.7472122527581356, "learning_rate": 3.852724219940314e-06, "loss": 0.0321, "step": 64595 }, { "epoch": 0.26954627767439143, "grad_norm": 0.5734771907419374, "learning_rate": 3.852575115512475e-06, "loss": 0.0298, "step": 64600 }, { "epoch": 0.26956714038938173, "grad_norm": 2.1065240284924878, "learning_rate": 3.852426028394783e-06, "loss": 0.0482, "step": 64605 }, { "epoch": 0.269588003104372, "grad_norm": 1.2348228681532571, "learning_rate": 3.852276958583893e-06, "loss": 0.0261, "step": 64610 }, { "epoch": 0.2696088658193623, "grad_norm": 0.7687144903447684, "learning_rate": 3.852127906076454e-06, "loss": 0.0342, "step": 64615 }, { "epoch": 0.2696297285343525, "grad_norm": 0.807259106108173, "learning_rate": 3.851978870869119e-06, "loss": 0.0285, "step": 64620 }, { "epoch": 0.2696505912493428, "grad_norm": 0.25832137556567947, "learning_rate": 3.851829852958541e-06, "loss": 0.0352, "step": 64625 }, { "epoch": 0.2696714539643331, "grad_norm": 3.1105813776964353, "learning_rate": 3.851680852341376e-06, "loss": 0.0336, "step": 64630 }, { "epoch": 0.26969231667932336, "grad_norm": 1.2273784805956192, "learning_rate": 3.851531869014278e-06, "loss": 0.033, "step": 64635 }, { "epoch": 0.26971317939431366, "grad_norm": 0.47579138998330217, "learning_rate": 3.851382902973905e-06, "loss": 0.0291, "step": 64640 }, { "epoch": 0.26973404210930396, "grad_norm": 0.7171617489724087, "learning_rate": 3.851233954216912e-06, "loss": 0.027, "step": 64645 }, { "epoch": 0.2697549048242942, "grad_norm": 0.777280772904877, "learning_rate": 3.851085022739961e-06, "loss": 0.0352, "step": 64650 }, { "epoch": 0.2697757675392845, "grad_norm": 0.8791292687184193, "learning_rate": 3.8509361085397065e-06, "loss": 0.0263, "step": 64655 }, { "epoch": 0.26979663025427475, "grad_norm": 0.6732713201521858, "learning_rate": 3.850787211612812e-06, "loss": 0.0331, "step": 64660 }, { "epoch": 0.26981749296926505, "grad_norm": 1.493374519511408, "learning_rate": 3.850638331955936e-06, "loss": 0.0316, "step": 64665 }, { "epoch": 0.26983835568425535, "grad_norm": 0.516301949214794, "learning_rate": 3.850489469565742e-06, "loss": 0.0357, "step": 64670 }, { "epoch": 0.2698592183992456, "grad_norm": 0.5297239814077732, "learning_rate": 3.850340624438891e-06, "loss": 0.0286, "step": 64675 }, { "epoch": 0.2698800811142359, "grad_norm": 0.9147417146961754, "learning_rate": 3.850191796572049e-06, "loss": 0.0349, "step": 64680 }, { "epoch": 0.26990094382922614, "grad_norm": 0.6718948425571613, "learning_rate": 3.850042985961878e-06, "loss": 0.0205, "step": 64685 }, { "epoch": 0.26992180654421644, "grad_norm": 1.1693861167425539, "learning_rate": 3.849894192605044e-06, "loss": 0.0344, "step": 64690 }, { "epoch": 0.26994266925920674, "grad_norm": 1.0804368241879843, "learning_rate": 3.849745416498215e-06, "loss": 0.0308, "step": 64695 }, { "epoch": 0.269963531974197, "grad_norm": 0.7707217152640787, "learning_rate": 3.849596657638056e-06, "loss": 0.0298, "step": 64700 }, { "epoch": 0.2699843946891873, "grad_norm": 1.5281543458117122, "learning_rate": 3.8494479160212365e-06, "loss": 0.0432, "step": 64705 }, { "epoch": 0.2700052574041775, "grad_norm": 1.2936024474581427, "learning_rate": 3.849299191644424e-06, "loss": 0.0387, "step": 64710 }, { "epoch": 0.2700261201191678, "grad_norm": 0.7587194149310532, "learning_rate": 3.849150484504291e-06, "loss": 0.0321, "step": 64715 }, { "epoch": 0.2700469828341581, "grad_norm": 0.5677210671187516, "learning_rate": 3.849001794597506e-06, "loss": 0.0236, "step": 64720 }, { "epoch": 0.27006784554914837, "grad_norm": 0.6954991046549163, "learning_rate": 3.848853121920741e-06, "loss": 0.0302, "step": 64725 }, { "epoch": 0.27008870826413867, "grad_norm": 0.5599894716368016, "learning_rate": 3.848704466470668e-06, "loss": 0.0325, "step": 64730 }, { "epoch": 0.27010957097912897, "grad_norm": 1.2875524293222813, "learning_rate": 3.8485558282439634e-06, "loss": 0.0361, "step": 64735 }, { "epoch": 0.2701304336941192, "grad_norm": 0.9982074724015716, "learning_rate": 3.848407207237299e-06, "loss": 0.0311, "step": 64740 }, { "epoch": 0.2701512964091095, "grad_norm": 0.9833459162500644, "learning_rate": 3.84825860344735e-06, "loss": 0.0272, "step": 64745 }, { "epoch": 0.27017215912409975, "grad_norm": 0.6808227347153635, "learning_rate": 3.848110016870794e-06, "loss": 0.0314, "step": 64750 }, { "epoch": 0.27019302183909005, "grad_norm": 0.8989206294684939, "learning_rate": 3.847961447504307e-06, "loss": 0.0256, "step": 64755 }, { "epoch": 0.27021388455408035, "grad_norm": 1.10816662325499, "learning_rate": 3.8478128953445685e-06, "loss": 0.0336, "step": 64760 }, { "epoch": 0.2702347472690706, "grad_norm": 0.6693235731256363, "learning_rate": 3.847664360388256e-06, "loss": 0.0547, "step": 64765 }, { "epoch": 0.2702556099840609, "grad_norm": 1.0321038503067455, "learning_rate": 3.847515842632049e-06, "loss": 0.0364, "step": 64770 }, { "epoch": 0.27027647269905114, "grad_norm": 0.8114015235274731, "learning_rate": 3.847367342072629e-06, "loss": 0.0297, "step": 64775 }, { "epoch": 0.27029733541404144, "grad_norm": 1.9326914409459082, "learning_rate": 3.847218858706678e-06, "loss": 0.0412, "step": 64780 }, { "epoch": 0.27031819812903174, "grad_norm": 0.604878033111649, "learning_rate": 3.847070392530877e-06, "loss": 0.0335, "step": 64785 }, { "epoch": 0.270339060844022, "grad_norm": 0.7618643699960753, "learning_rate": 3.8469219435419106e-06, "loss": 0.0319, "step": 64790 }, { "epoch": 0.2703599235590123, "grad_norm": 0.9946482203037695, "learning_rate": 3.846773511736463e-06, "loss": 0.0327, "step": 64795 }, { "epoch": 0.2703807862740025, "grad_norm": 1.1601673837583735, "learning_rate": 3.846625097111219e-06, "loss": 0.0429, "step": 64800 }, { "epoch": 0.2704016489889928, "grad_norm": 0.7019090963777962, "learning_rate": 3.846476699662864e-06, "loss": 0.0323, "step": 64805 }, { "epoch": 0.2704225117039831, "grad_norm": 0.6271786365181026, "learning_rate": 3.846328319388087e-06, "loss": 0.0318, "step": 64810 }, { "epoch": 0.27044337441897337, "grad_norm": 0.6608559426106548, "learning_rate": 3.8461799562835725e-06, "loss": 0.0273, "step": 64815 }, { "epoch": 0.27046423713396367, "grad_norm": 2.0234549525346806, "learning_rate": 3.846031610346012e-06, "loss": 0.0281, "step": 64820 }, { "epoch": 0.27048509984895397, "grad_norm": 0.5446474738568219, "learning_rate": 3.845883281572097e-06, "loss": 0.0275, "step": 64825 }, { "epoch": 0.2705059625639442, "grad_norm": 0.4479350682128502, "learning_rate": 3.845734969958511e-06, "loss": 0.0256, "step": 64830 }, { "epoch": 0.2705268252789345, "grad_norm": 0.9354711932456883, "learning_rate": 3.845586675501953e-06, "loss": 0.0346, "step": 64835 }, { "epoch": 0.27054768799392476, "grad_norm": 0.8575648753296408, "learning_rate": 3.845438398199111e-06, "loss": 0.0331, "step": 64840 }, { "epoch": 0.27056855070891506, "grad_norm": 0.6454853703406888, "learning_rate": 3.84529013804668e-06, "loss": 0.0376, "step": 64845 }, { "epoch": 0.27058941342390536, "grad_norm": 0.9951424611927732, "learning_rate": 3.845141895041353e-06, "loss": 0.0315, "step": 64850 }, { "epoch": 0.2706102761388956, "grad_norm": 0.7603156206571571, "learning_rate": 3.844993669179826e-06, "loss": 0.0274, "step": 64855 }, { "epoch": 0.2706311388538859, "grad_norm": 0.3653242867587993, "learning_rate": 3.844845460458794e-06, "loss": 0.0298, "step": 64860 }, { "epoch": 0.27065200156887614, "grad_norm": 1.053834958493759, "learning_rate": 3.844697268874953e-06, "loss": 0.0317, "step": 64865 }, { "epoch": 0.27067286428386644, "grad_norm": 1.0113350267397345, "learning_rate": 3.844549094425003e-06, "loss": 0.0549, "step": 64870 }, { "epoch": 0.27069372699885674, "grad_norm": 0.6845925828014783, "learning_rate": 3.844400937105641e-06, "loss": 0.0279, "step": 64875 }, { "epoch": 0.270714589713847, "grad_norm": 0.8803135890568888, "learning_rate": 3.8442527969135674e-06, "loss": 0.0383, "step": 64880 }, { "epoch": 0.2707354524288373, "grad_norm": 0.8747602299126949, "learning_rate": 3.844104673845481e-06, "loss": 0.0298, "step": 64885 }, { "epoch": 0.27075631514382753, "grad_norm": 0.7887238037626833, "learning_rate": 3.843956567898085e-06, "loss": 0.0317, "step": 64890 }, { "epoch": 0.27077717785881783, "grad_norm": 0.716170517050251, "learning_rate": 3.84380847906808e-06, "loss": 0.0335, "step": 64895 }, { "epoch": 0.27079804057380813, "grad_norm": 1.136959184822322, "learning_rate": 3.843660407352169e-06, "loss": 0.0257, "step": 64900 }, { "epoch": 0.2708189032887984, "grad_norm": 0.9552568110859043, "learning_rate": 3.843512352747057e-06, "loss": 0.0283, "step": 64905 }, { "epoch": 0.2708397660037887, "grad_norm": 0.7697472475110018, "learning_rate": 3.843364315249448e-06, "loss": 0.0311, "step": 64910 }, { "epoch": 0.270860628718779, "grad_norm": 0.6248114659845855, "learning_rate": 3.843216294856048e-06, "loss": 0.0208, "step": 64915 }, { "epoch": 0.2708814914337692, "grad_norm": 0.8401653027995625, "learning_rate": 3.843068291563563e-06, "loss": 0.0464, "step": 64920 }, { "epoch": 0.2709023541487595, "grad_norm": 0.7796065042097879, "learning_rate": 3.842920305368701e-06, "loss": 0.0263, "step": 64925 }, { "epoch": 0.27092321686374976, "grad_norm": 0.7393411942552004, "learning_rate": 3.842772336268171e-06, "loss": 0.0413, "step": 64930 }, { "epoch": 0.27094407957874006, "grad_norm": 1.1526669883162155, "learning_rate": 3.84262438425868e-06, "loss": 0.0315, "step": 64935 }, { "epoch": 0.27096494229373036, "grad_norm": 1.5822815354600628, "learning_rate": 3.84247644933694e-06, "loss": 0.0437, "step": 64940 }, { "epoch": 0.2709858050087206, "grad_norm": 0.8325415257672386, "learning_rate": 3.842328531499662e-06, "loss": 0.0288, "step": 64945 }, { "epoch": 0.2710066677237109, "grad_norm": 0.713612081111429, "learning_rate": 3.842180630743557e-06, "loss": 0.0275, "step": 64950 }, { "epoch": 0.27102753043870115, "grad_norm": 0.6932716089913668, "learning_rate": 3.842032747065339e-06, "loss": 0.0235, "step": 64955 }, { "epoch": 0.27104839315369145, "grad_norm": 0.9093570697794477, "learning_rate": 3.84188488046172e-06, "loss": 0.0327, "step": 64960 }, { "epoch": 0.27106925586868175, "grad_norm": 1.4096541021370346, "learning_rate": 3.841737030929415e-06, "loss": 0.0264, "step": 64965 }, { "epoch": 0.271090118583672, "grad_norm": 1.0990390387152518, "learning_rate": 3.84158919846514e-06, "loss": 0.0333, "step": 64970 }, { "epoch": 0.2711109812986623, "grad_norm": 0.9161676406522137, "learning_rate": 3.841441383065613e-06, "loss": 0.0309, "step": 64975 }, { "epoch": 0.27113184401365253, "grad_norm": 1.1375777158389295, "learning_rate": 3.8412935847275475e-06, "loss": 0.05, "step": 64980 }, { "epoch": 0.27115270672864283, "grad_norm": 0.888299144382542, "learning_rate": 3.8411458034476635e-06, "loss": 0.0405, "step": 64985 }, { "epoch": 0.27117356944363313, "grad_norm": 1.8542131797116328, "learning_rate": 3.840998039222679e-06, "loss": 0.0422, "step": 64990 }, { "epoch": 0.2711944321586234, "grad_norm": 0.7164506479155899, "learning_rate": 3.840850292049316e-06, "loss": 0.021, "step": 64995 }, { "epoch": 0.2712152948736137, "grad_norm": 0.928216371686294, "learning_rate": 3.840702561924294e-06, "loss": 0.0483, "step": 65000 }, { "epoch": 0.271236157588604, "grad_norm": 1.2721797284323058, "learning_rate": 3.840554848844334e-06, "loss": 0.0301, "step": 65005 }, { "epoch": 0.2712570203035942, "grad_norm": 0.6588093595238088, "learning_rate": 3.84040715280616e-06, "loss": 0.0404, "step": 65010 }, { "epoch": 0.2712778830185845, "grad_norm": 0.554432069546227, "learning_rate": 3.840259473806493e-06, "loss": 0.0299, "step": 65015 }, { "epoch": 0.27129874573357476, "grad_norm": 0.4608154387951588, "learning_rate": 3.840111811842059e-06, "loss": 0.0429, "step": 65020 }, { "epoch": 0.27131960844856506, "grad_norm": 1.0797405328359277, "learning_rate": 3.839964166909583e-06, "loss": 0.0391, "step": 65025 }, { "epoch": 0.27134047116355536, "grad_norm": 0.9381843867260304, "learning_rate": 3.83981653900579e-06, "loss": 0.0302, "step": 65030 }, { "epoch": 0.2713613338785456, "grad_norm": 0.9567591873184901, "learning_rate": 3.83966892812741e-06, "loss": 0.0284, "step": 65035 }, { "epoch": 0.2713821965935359, "grad_norm": 0.8476380135680767, "learning_rate": 3.8395213342711665e-06, "loss": 0.0352, "step": 65040 }, { "epoch": 0.27140305930852615, "grad_norm": 0.9931948242099081, "learning_rate": 3.839373757433791e-06, "loss": 0.029, "step": 65045 }, { "epoch": 0.27142392202351645, "grad_norm": 0.580573003474319, "learning_rate": 3.8392261976120115e-06, "loss": 0.0248, "step": 65050 }, { "epoch": 0.27144478473850675, "grad_norm": 0.9739182042421474, "learning_rate": 3.8390786548025596e-06, "loss": 0.026, "step": 65055 }, { "epoch": 0.271465647453497, "grad_norm": 0.9480121503251135, "learning_rate": 3.838931129002166e-06, "loss": 0.024, "step": 65060 }, { "epoch": 0.2714865101684873, "grad_norm": 0.969635606219969, "learning_rate": 3.838783620207564e-06, "loss": 0.0353, "step": 65065 }, { "epoch": 0.27150737288347754, "grad_norm": 0.5396869915810759, "learning_rate": 3.838636128415485e-06, "loss": 0.0275, "step": 65070 }, { "epoch": 0.27152823559846784, "grad_norm": 1.3937430883883717, "learning_rate": 3.838488653622662e-06, "loss": 0.0375, "step": 65075 }, { "epoch": 0.27154909831345814, "grad_norm": 0.6978004661676206, "learning_rate": 3.838341195825834e-06, "loss": 0.0275, "step": 65080 }, { "epoch": 0.2715699610284484, "grad_norm": 0.8371382684825593, "learning_rate": 3.838193755021733e-06, "loss": 0.033, "step": 65085 }, { "epoch": 0.2715908237434387, "grad_norm": 0.6225236429287526, "learning_rate": 3.838046331207096e-06, "loss": 0.0311, "step": 65090 }, { "epoch": 0.271611686458429, "grad_norm": 1.9848625984257398, "learning_rate": 3.837898924378662e-06, "loss": 0.0494, "step": 65095 }, { "epoch": 0.2716325491734192, "grad_norm": 0.9134885107405784, "learning_rate": 3.837751534533169e-06, "loss": 0.0285, "step": 65100 }, { "epoch": 0.2716534118884095, "grad_norm": 0.7110430080434006, "learning_rate": 3.837604161667354e-06, "loss": 0.0317, "step": 65105 }, { "epoch": 0.27167427460339977, "grad_norm": 0.9623954804446456, "learning_rate": 3.837456805777961e-06, "loss": 0.0332, "step": 65110 }, { "epoch": 0.27169513731839007, "grad_norm": 0.8290099516760197, "learning_rate": 3.837309466861727e-06, "loss": 0.0414, "step": 65115 }, { "epoch": 0.27171600003338037, "grad_norm": 0.948584663384744, "learning_rate": 3.8371621449153955e-06, "loss": 0.0282, "step": 65120 }, { "epoch": 0.2717368627483706, "grad_norm": 1.2414974070974654, "learning_rate": 3.8370148399357105e-06, "loss": 0.033, "step": 65125 }, { "epoch": 0.2717577254633609, "grad_norm": 0.7657019769827832, "learning_rate": 3.836867551919414e-06, "loss": 0.0261, "step": 65130 }, { "epoch": 0.27177858817835115, "grad_norm": 1.9033683223069158, "learning_rate": 3.83672028086325e-06, "loss": 0.0289, "step": 65135 }, { "epoch": 0.27179945089334145, "grad_norm": 0.9574758191572802, "learning_rate": 3.836573026763965e-06, "loss": 0.0421, "step": 65140 }, { "epoch": 0.27182031360833175, "grad_norm": 0.6179316922849767, "learning_rate": 3.836425789618305e-06, "loss": 0.041, "step": 65145 }, { "epoch": 0.271841176323322, "grad_norm": 0.7882514438545928, "learning_rate": 3.836278569423017e-06, "loss": 0.0328, "step": 65150 }, { "epoch": 0.2718620390383123, "grad_norm": 1.107440821090538, "learning_rate": 3.836131366174849e-06, "loss": 0.0398, "step": 65155 }, { "epoch": 0.27188290175330254, "grad_norm": 0.9790184190926364, "learning_rate": 3.83598417987055e-06, "loss": 0.034, "step": 65160 }, { "epoch": 0.27190376446829284, "grad_norm": 0.6933984995831725, "learning_rate": 3.835837010506869e-06, "loss": 0.0288, "step": 65165 }, { "epoch": 0.27192462718328314, "grad_norm": 0.9467360673566012, "learning_rate": 3.835689858080557e-06, "loss": 0.055, "step": 65170 }, { "epoch": 0.2719454898982734, "grad_norm": 1.0202388770099748, "learning_rate": 3.835542722588365e-06, "loss": 0.0226, "step": 65175 }, { "epoch": 0.2719663526132637, "grad_norm": 2.816362880464008, "learning_rate": 3.835395604027046e-06, "loss": 0.0272, "step": 65180 }, { "epoch": 0.271987215328254, "grad_norm": 1.1221832721820781, "learning_rate": 3.835248502393354e-06, "loss": 0.0293, "step": 65185 }, { "epoch": 0.27200807804324423, "grad_norm": 1.941466493843271, "learning_rate": 3.83510141768404e-06, "loss": 0.0256, "step": 65190 }, { "epoch": 0.2720289407582345, "grad_norm": 1.1845265895308115, "learning_rate": 3.834954349895863e-06, "loss": 0.0307, "step": 65195 }, { "epoch": 0.27204980347322477, "grad_norm": 0.7704590830722396, "learning_rate": 3.834807299025576e-06, "loss": 0.0322, "step": 65200 }, { "epoch": 0.27207066618821507, "grad_norm": 0.8137091565800436, "learning_rate": 3.834660265069936e-06, "loss": 0.03, "step": 65205 }, { "epoch": 0.27209152890320537, "grad_norm": 1.0448505711841891, "learning_rate": 3.8345132480257e-06, "loss": 0.0272, "step": 65210 }, { "epoch": 0.2721123916181956, "grad_norm": 0.8698286087025479, "learning_rate": 3.8343662478896295e-06, "loss": 0.0248, "step": 65215 }, { "epoch": 0.2721332543331859, "grad_norm": 1.3764861390576066, "learning_rate": 3.834219264658481e-06, "loss": 0.0373, "step": 65220 }, { "epoch": 0.27215411704817616, "grad_norm": 0.8838552003895727, "learning_rate": 3.834072298329015e-06, "loss": 0.0329, "step": 65225 }, { "epoch": 0.27217497976316646, "grad_norm": 1.2160215104108674, "learning_rate": 3.833925348897993e-06, "loss": 0.0317, "step": 65230 }, { "epoch": 0.27219584247815676, "grad_norm": 1.1175411784501699, "learning_rate": 3.833778416362177e-06, "loss": 0.0399, "step": 65235 }, { "epoch": 0.272216705193147, "grad_norm": 0.5276242181896895, "learning_rate": 3.833631500718329e-06, "loss": 0.0339, "step": 65240 }, { "epoch": 0.2722375679081373, "grad_norm": 0.6398207381309284, "learning_rate": 3.833484601963214e-06, "loss": 0.0274, "step": 65245 }, { "epoch": 0.27225843062312755, "grad_norm": 1.2078014184217896, "learning_rate": 3.833337720093595e-06, "loss": 0.053, "step": 65250 }, { "epoch": 0.27227929333811784, "grad_norm": 10.398191965876595, "learning_rate": 3.833190855106238e-06, "loss": 0.0245, "step": 65255 }, { "epoch": 0.27230015605310814, "grad_norm": 0.6766236476555004, "learning_rate": 3.83304400699791e-06, "loss": 0.0265, "step": 65260 }, { "epoch": 0.2723210187680984, "grad_norm": 1.3164739521432673, "learning_rate": 3.832897175765376e-06, "loss": 0.0275, "step": 65265 }, { "epoch": 0.2723418814830887, "grad_norm": 0.9548202424715829, "learning_rate": 3.832750361405406e-06, "loss": 0.0497, "step": 65270 }, { "epoch": 0.272362744198079, "grad_norm": 1.1221844976546058, "learning_rate": 3.832603563914768e-06, "loss": 0.0284, "step": 65275 }, { "epoch": 0.27238360691306923, "grad_norm": 0.6917530922503747, "learning_rate": 3.832456783290232e-06, "loss": 0.0215, "step": 65280 }, { "epoch": 0.27240446962805953, "grad_norm": 0.6463762782198819, "learning_rate": 3.832310019528569e-06, "loss": 0.0373, "step": 65285 }, { "epoch": 0.2724253323430498, "grad_norm": 0.8772247935915708, "learning_rate": 3.832163272626549e-06, "loss": 0.0365, "step": 65290 }, { "epoch": 0.2724461950580401, "grad_norm": 0.814238813794731, "learning_rate": 3.832016542580945e-06, "loss": 0.0279, "step": 65295 }, { "epoch": 0.2724670577730304, "grad_norm": 0.7533115519109833, "learning_rate": 3.831869829388532e-06, "loss": 0.0252, "step": 65300 }, { "epoch": 0.2724879204880206, "grad_norm": 0.5418861541099464, "learning_rate": 3.831723133046081e-06, "loss": 0.0289, "step": 65305 }, { "epoch": 0.2725087832030109, "grad_norm": 0.9838810458466827, "learning_rate": 3.831576453550369e-06, "loss": 0.0284, "step": 65310 }, { "epoch": 0.27252964591800116, "grad_norm": 0.8856011892966852, "learning_rate": 3.831429790898169e-06, "loss": 0.0273, "step": 65315 }, { "epoch": 0.27255050863299146, "grad_norm": 0.9583801910377698, "learning_rate": 3.831283145086261e-06, "loss": 0.0271, "step": 65320 }, { "epoch": 0.27257137134798176, "grad_norm": 0.6280675764911073, "learning_rate": 3.831136516111421e-06, "loss": 0.0326, "step": 65325 }, { "epoch": 0.272592234062972, "grad_norm": 0.5630532439900253, "learning_rate": 3.8309899039704284e-06, "loss": 0.0359, "step": 65330 }, { "epoch": 0.2726130967779623, "grad_norm": 0.3702034785942198, "learning_rate": 3.8308433086600595e-06, "loss": 0.0342, "step": 65335 }, { "epoch": 0.27263395949295255, "grad_norm": 1.3181315891695067, "learning_rate": 3.830696730177098e-06, "loss": 0.036, "step": 65340 }, { "epoch": 0.27265482220794285, "grad_norm": 0.6786420858679441, "learning_rate": 3.830550168518323e-06, "loss": 0.0296, "step": 65345 }, { "epoch": 0.27267568492293315, "grad_norm": 0.6951245997235969, "learning_rate": 3.8304036236805155e-06, "loss": 0.0371, "step": 65350 }, { "epoch": 0.2726965476379234, "grad_norm": 0.6822134501655648, "learning_rate": 3.83025709566046e-06, "loss": 0.0238, "step": 65355 }, { "epoch": 0.2727174103529137, "grad_norm": 1.077840792594912, "learning_rate": 3.830110584454939e-06, "loss": 0.0381, "step": 65360 }, { "epoch": 0.272738273067904, "grad_norm": 1.138526723217453, "learning_rate": 3.829964090060736e-06, "loss": 0.0292, "step": 65365 }, { "epoch": 0.27275913578289424, "grad_norm": 1.2835297783713868, "learning_rate": 3.829817612474639e-06, "loss": 0.0317, "step": 65370 }, { "epoch": 0.27277999849788453, "grad_norm": 0.5672207183910835, "learning_rate": 3.829671151693431e-06, "loss": 0.0328, "step": 65375 }, { "epoch": 0.2728008612128748, "grad_norm": 0.9801523579622934, "learning_rate": 3.829524707713901e-06, "loss": 0.0342, "step": 65380 }, { "epoch": 0.2728217239278651, "grad_norm": 1.2759779772590338, "learning_rate": 3.8293782805328355e-06, "loss": 0.0298, "step": 65385 }, { "epoch": 0.2728425866428554, "grad_norm": 0.7982751965178859, "learning_rate": 3.829231870147024e-06, "loss": 0.0325, "step": 65390 }, { "epoch": 0.2728634493578456, "grad_norm": 0.930348325085594, "learning_rate": 3.829085476553256e-06, "loss": 0.0317, "step": 65395 }, { "epoch": 0.2728843120728359, "grad_norm": 0.6767846675821498, "learning_rate": 3.828939099748323e-06, "loss": 0.0329, "step": 65400 }, { "epoch": 0.27290517478782617, "grad_norm": 0.9922477358985535, "learning_rate": 3.828792739729014e-06, "loss": 0.0354, "step": 65405 }, { "epoch": 0.27292603750281647, "grad_norm": 0.5902645242952406, "learning_rate": 3.828646396492121e-06, "loss": 0.0276, "step": 65410 }, { "epoch": 0.27294690021780676, "grad_norm": 0.5493463501745935, "learning_rate": 3.828500070034439e-06, "loss": 0.0349, "step": 65415 }, { "epoch": 0.272967762932797, "grad_norm": 0.9601891873790639, "learning_rate": 3.828353760352762e-06, "loss": 0.0269, "step": 65420 }, { "epoch": 0.2729886256477873, "grad_norm": 0.8278899129182099, "learning_rate": 3.828207467443883e-06, "loss": 0.0267, "step": 65425 }, { "epoch": 0.27300948836277755, "grad_norm": 1.221357200600851, "learning_rate": 3.8280611913045975e-06, "loss": 0.0264, "step": 65430 }, { "epoch": 0.27303035107776785, "grad_norm": 0.7456528167402343, "learning_rate": 3.8279149319317035e-06, "loss": 0.0435, "step": 65435 }, { "epoch": 0.27305121379275815, "grad_norm": 1.6433951275908887, "learning_rate": 3.8277686893219964e-06, "loss": 0.044, "step": 65440 }, { "epoch": 0.2730720765077484, "grad_norm": 1.1880323134565955, "learning_rate": 3.827622463472276e-06, "loss": 0.0354, "step": 65445 }, { "epoch": 0.2730929392227387, "grad_norm": 0.5844046695432, "learning_rate": 3.827476254379341e-06, "loss": 0.0347, "step": 65450 }, { "epoch": 0.273113801937729, "grad_norm": 0.9959081513352227, "learning_rate": 3.827330062039991e-06, "loss": 0.0292, "step": 65455 }, { "epoch": 0.27313466465271924, "grad_norm": 1.2973439318387638, "learning_rate": 3.827183886451025e-06, "loss": 0.0288, "step": 65460 }, { "epoch": 0.27315552736770954, "grad_norm": 0.43537489910232613, "learning_rate": 3.827037727609247e-06, "loss": 0.0206, "step": 65465 }, { "epoch": 0.2731763900826998, "grad_norm": 0.8744055206640123, "learning_rate": 3.826891585511458e-06, "loss": 0.0268, "step": 65470 }, { "epoch": 0.2731972527976901, "grad_norm": 0.7106469314384574, "learning_rate": 3.826745460154462e-06, "loss": 0.0235, "step": 65475 }, { "epoch": 0.2732181155126804, "grad_norm": 0.8929317213491952, "learning_rate": 3.826599351535063e-06, "loss": 0.0264, "step": 65480 }, { "epoch": 0.2732389782276706, "grad_norm": 0.5980557253919868, "learning_rate": 3.826453259650064e-06, "loss": 0.0285, "step": 65485 }, { "epoch": 0.2732598409426609, "grad_norm": 0.7792157444291226, "learning_rate": 3.826307184496275e-06, "loss": 0.028, "step": 65490 }, { "epoch": 0.27328070365765117, "grad_norm": 0.594915562792577, "learning_rate": 3.826161126070499e-06, "loss": 0.0289, "step": 65495 }, { "epoch": 0.27330156637264147, "grad_norm": 0.8737183395778875, "learning_rate": 3.826015084369546e-06, "loss": 0.0297, "step": 65500 }, { "epoch": 0.27332242908763177, "grad_norm": 0.779496847099214, "learning_rate": 3.825869059390221e-06, "loss": 0.0344, "step": 65505 }, { "epoch": 0.273343291802622, "grad_norm": 0.8140237070878812, "learning_rate": 3.825723051129337e-06, "loss": 0.0313, "step": 65510 }, { "epoch": 0.2733641545176123, "grad_norm": 0.9244238939121942, "learning_rate": 3.825577059583702e-06, "loss": 0.0254, "step": 65515 }, { "epoch": 0.27338501723260256, "grad_norm": 0.5333077656840727, "learning_rate": 3.825431084750126e-06, "loss": 0.0213, "step": 65520 }, { "epoch": 0.27340587994759286, "grad_norm": 0.6700262602866819, "learning_rate": 3.825285126625425e-06, "loss": 0.028, "step": 65525 }, { "epoch": 0.27342674266258316, "grad_norm": 0.6905589063969619, "learning_rate": 3.825139185206406e-06, "loss": 0.0274, "step": 65530 }, { "epoch": 0.2734476053775734, "grad_norm": 1.3911624779326301, "learning_rate": 3.824993260489887e-06, "loss": 0.0359, "step": 65535 }, { "epoch": 0.2734684680925637, "grad_norm": 0.7615229082620832, "learning_rate": 3.82484735247268e-06, "loss": 0.039, "step": 65540 }, { "epoch": 0.273489330807554, "grad_norm": 1.2221556056861627, "learning_rate": 3.824701461151601e-06, "loss": 0.0341, "step": 65545 }, { "epoch": 0.27351019352254424, "grad_norm": 0.8633941844167886, "learning_rate": 3.824555586523466e-06, "loss": 0.0401, "step": 65550 }, { "epoch": 0.27353105623753454, "grad_norm": 0.8470422704974022, "learning_rate": 3.8244097285850904e-06, "loss": 0.0301, "step": 65555 }, { "epoch": 0.2735519189525248, "grad_norm": 1.4289097418835115, "learning_rate": 3.824263887333295e-06, "loss": 0.0355, "step": 65560 }, { "epoch": 0.2735727816675151, "grad_norm": 0.7976118430394753, "learning_rate": 3.824118062764896e-06, "loss": 0.0311, "step": 65565 }, { "epoch": 0.2735936443825054, "grad_norm": 0.9589672318803678, "learning_rate": 3.823972254876713e-06, "loss": 0.0388, "step": 65570 }, { "epoch": 0.27361450709749563, "grad_norm": 0.8345763401691683, "learning_rate": 3.823826463665567e-06, "loss": 0.0232, "step": 65575 }, { "epoch": 0.27363536981248593, "grad_norm": 0.8553360464559233, "learning_rate": 3.82368068912828e-06, "loss": 0.0271, "step": 65580 }, { "epoch": 0.2736562325274762, "grad_norm": 0.7200696310877482, "learning_rate": 3.823534931261672e-06, "loss": 0.025, "step": 65585 }, { "epoch": 0.2736770952424665, "grad_norm": 0.6967865690554957, "learning_rate": 3.823389190062566e-06, "loss": 0.0284, "step": 65590 }, { "epoch": 0.2736979579574568, "grad_norm": 0.5520123966834971, "learning_rate": 3.823243465527787e-06, "loss": 0.0302, "step": 65595 }, { "epoch": 0.273718820672447, "grad_norm": 0.8201819333188325, "learning_rate": 3.8230977576541586e-06, "loss": 0.0276, "step": 65600 }, { "epoch": 0.2737396833874373, "grad_norm": 0.7353863556468642, "learning_rate": 3.8229520664385065e-06, "loss": 0.0295, "step": 65605 }, { "epoch": 0.27376054610242756, "grad_norm": 1.0495186187347367, "learning_rate": 3.8228063918776575e-06, "loss": 0.026, "step": 65610 }, { "epoch": 0.27378140881741786, "grad_norm": 0.9352975512654387, "learning_rate": 3.822660733968438e-06, "loss": 0.0321, "step": 65615 }, { "epoch": 0.27380227153240816, "grad_norm": 0.6498878699638884, "learning_rate": 3.822515092707675e-06, "loss": 0.0308, "step": 65620 }, { "epoch": 0.2738231342473984, "grad_norm": 0.7469545533875681, "learning_rate": 3.822369468092199e-06, "loss": 0.0317, "step": 65625 }, { "epoch": 0.2738439969623887, "grad_norm": 0.700893271716341, "learning_rate": 3.822223860118839e-06, "loss": 0.0294, "step": 65630 }, { "epoch": 0.273864859677379, "grad_norm": 0.43967718370713854, "learning_rate": 3.822078268784424e-06, "loss": 0.022, "step": 65635 }, { "epoch": 0.27388572239236925, "grad_norm": 0.5694104395712943, "learning_rate": 3.821932694085788e-06, "loss": 0.0501, "step": 65640 }, { "epoch": 0.27390658510735955, "grad_norm": 0.8035093049699306, "learning_rate": 3.821787136019761e-06, "loss": 0.028, "step": 65645 }, { "epoch": 0.2739274478223498, "grad_norm": 0.958988937512293, "learning_rate": 3.821641594583178e-06, "loss": 0.0305, "step": 65650 }, { "epoch": 0.2739483105373401, "grad_norm": 0.6966957613097782, "learning_rate": 3.821496069772869e-06, "loss": 0.0296, "step": 65655 }, { "epoch": 0.2739691732523304, "grad_norm": 1.1966167191783528, "learning_rate": 3.821350561585673e-06, "loss": 0.0281, "step": 65660 }, { "epoch": 0.27399003596732063, "grad_norm": 0.5442495270341585, "learning_rate": 3.821205070018424e-06, "loss": 0.0266, "step": 65665 }, { "epoch": 0.27401089868231093, "grad_norm": 0.42874930402373823, "learning_rate": 3.821059595067957e-06, "loss": 0.0261, "step": 65670 }, { "epoch": 0.2740317613973012, "grad_norm": 0.8472735690795248, "learning_rate": 3.82091413673111e-06, "loss": 0.0264, "step": 65675 }, { "epoch": 0.2740526241122915, "grad_norm": 0.6236893164474903, "learning_rate": 3.820768695004723e-06, "loss": 0.0312, "step": 65680 }, { "epoch": 0.2740734868272818, "grad_norm": 0.8173627982242905, "learning_rate": 3.820623269885631e-06, "loss": 0.033, "step": 65685 }, { "epoch": 0.274094349542272, "grad_norm": 0.8099979945742181, "learning_rate": 3.820477861370677e-06, "loss": 0.0268, "step": 65690 }, { "epoch": 0.2741152122572623, "grad_norm": 0.937784907198219, "learning_rate": 3.8203324694567e-06, "loss": 0.0328, "step": 65695 }, { "epoch": 0.27413607497225256, "grad_norm": 0.8059188636212814, "learning_rate": 3.8201870941405425e-06, "loss": 0.0319, "step": 65700 }, { "epoch": 0.27415693768724286, "grad_norm": 0.9424980701767123, "learning_rate": 3.820041735419046e-06, "loss": 0.0295, "step": 65705 }, { "epoch": 0.27417780040223316, "grad_norm": 0.5967573444924543, "learning_rate": 3.819896393289054e-06, "loss": 0.0286, "step": 65710 }, { "epoch": 0.2741986631172234, "grad_norm": 1.1244755412125813, "learning_rate": 3.81975106774741e-06, "loss": 0.0374, "step": 65715 }, { "epoch": 0.2742195258322137, "grad_norm": 1.2708391626753663, "learning_rate": 3.819605758790959e-06, "loss": 0.0305, "step": 65720 }, { "epoch": 0.274240388547204, "grad_norm": 0.5956594981151825, "learning_rate": 3.819460466416545e-06, "loss": 0.0334, "step": 65725 }, { "epoch": 0.27426125126219425, "grad_norm": 0.7977028784718903, "learning_rate": 3.8193151906210186e-06, "loss": 0.0265, "step": 65730 }, { "epoch": 0.27428211397718455, "grad_norm": 1.08767431287293, "learning_rate": 3.8191699314012235e-06, "loss": 0.0314, "step": 65735 }, { "epoch": 0.2743029766921748, "grad_norm": 0.6355519171770858, "learning_rate": 3.8190246887540075e-06, "loss": 0.0456, "step": 65740 }, { "epoch": 0.2743238394071651, "grad_norm": 0.4878912569372641, "learning_rate": 3.818879462676222e-06, "loss": 0.0303, "step": 65745 }, { "epoch": 0.2743447021221554, "grad_norm": 0.8248488581527509, "learning_rate": 3.818734253164717e-06, "loss": 0.0239, "step": 65750 }, { "epoch": 0.27436556483714564, "grad_norm": 0.6755031961431676, "learning_rate": 3.81858906021634e-06, "loss": 0.0329, "step": 65755 }, { "epoch": 0.27438642755213594, "grad_norm": 1.0197613765383773, "learning_rate": 3.818443883827945e-06, "loss": 0.0303, "step": 65760 }, { "epoch": 0.2744072902671262, "grad_norm": 0.9164488274604634, "learning_rate": 3.818298723996383e-06, "loss": 0.0301, "step": 65765 }, { "epoch": 0.2744281529821165, "grad_norm": 1.0524867478725721, "learning_rate": 3.818153580718508e-06, "loss": 0.0354, "step": 65770 }, { "epoch": 0.2744490156971068, "grad_norm": 0.8021739035019505, "learning_rate": 3.818008453991175e-06, "loss": 0.0442, "step": 65775 }, { "epoch": 0.274469878412097, "grad_norm": 0.29386867022653773, "learning_rate": 3.8178633438112365e-06, "loss": 0.0373, "step": 65780 }, { "epoch": 0.2744907411270873, "grad_norm": 0.34109487483676904, "learning_rate": 3.817718250175549e-06, "loss": 0.0217, "step": 65785 }, { "epoch": 0.27451160384207757, "grad_norm": 0.8745411900063819, "learning_rate": 3.817573173080971e-06, "loss": 0.0268, "step": 65790 }, { "epoch": 0.27453246655706787, "grad_norm": 0.9868548479465338, "learning_rate": 3.817428112524357e-06, "loss": 0.032, "step": 65795 }, { "epoch": 0.27455332927205817, "grad_norm": 0.9061226986459042, "learning_rate": 3.8172830685025665e-06, "loss": 0.0315, "step": 65800 }, { "epoch": 0.2745741919870484, "grad_norm": 1.186803463573145, "learning_rate": 3.817138041012459e-06, "loss": 0.0389, "step": 65805 }, { "epoch": 0.2745950547020387, "grad_norm": 0.9466294148702933, "learning_rate": 3.816993030050893e-06, "loss": 0.0463, "step": 65810 }, { "epoch": 0.274615917417029, "grad_norm": 0.8948996227484592, "learning_rate": 3.81684803561473e-06, "loss": 0.0343, "step": 65815 }, { "epoch": 0.27463678013201925, "grad_norm": 0.8481069519108999, "learning_rate": 3.8167030577008315e-06, "loss": 0.0241, "step": 65820 }, { "epoch": 0.27465764284700955, "grad_norm": 1.0059955593973424, "learning_rate": 3.8165580963060614e-06, "loss": 0.0295, "step": 65825 }, { "epoch": 0.2746785055619998, "grad_norm": 1.1981125209634114, "learning_rate": 3.816413151427279e-06, "loss": 0.0308, "step": 65830 }, { "epoch": 0.2746993682769901, "grad_norm": 0.5929935431692998, "learning_rate": 3.816268223061351e-06, "loss": 0.0285, "step": 65835 }, { "epoch": 0.2747202309919804, "grad_norm": 1.1098698460456746, "learning_rate": 3.816123311205143e-06, "loss": 0.0268, "step": 65840 }, { "epoch": 0.27474109370697064, "grad_norm": 0.9328230500076401, "learning_rate": 3.815978415855519e-06, "loss": 0.0334, "step": 65845 }, { "epoch": 0.27476195642196094, "grad_norm": 1.1614943662811867, "learning_rate": 3.815833537009347e-06, "loss": 0.0297, "step": 65850 }, { "epoch": 0.2747828191369512, "grad_norm": 0.8586850167362852, "learning_rate": 3.815688674663493e-06, "loss": 0.0329, "step": 65855 }, { "epoch": 0.2748036818519415, "grad_norm": 0.7634682417466793, "learning_rate": 3.815543828814824e-06, "loss": 0.0243, "step": 65860 }, { "epoch": 0.2748245445669318, "grad_norm": 0.717374012586396, "learning_rate": 3.815398999460213e-06, "loss": 0.027, "step": 65865 }, { "epoch": 0.274845407281922, "grad_norm": 0.6793351695864567, "learning_rate": 3.815254186596526e-06, "loss": 0.0258, "step": 65870 }, { "epoch": 0.2748662699969123, "grad_norm": 0.5341198365914072, "learning_rate": 3.815109390220636e-06, "loss": 0.0332, "step": 65875 }, { "epoch": 0.27488713271190257, "grad_norm": 0.6319077105756273, "learning_rate": 3.814964610329413e-06, "loss": 0.0292, "step": 65880 }, { "epoch": 0.27490799542689287, "grad_norm": 0.8426008559045343, "learning_rate": 3.814819846919731e-06, "loss": 0.0286, "step": 65885 }, { "epoch": 0.27492885814188317, "grad_norm": 1.4323116208044022, "learning_rate": 3.8146750999884613e-06, "loss": 0.0292, "step": 65890 }, { "epoch": 0.2749497208568734, "grad_norm": 0.48971217742389667, "learning_rate": 3.8145303695324797e-06, "loss": 0.0255, "step": 65895 }, { "epoch": 0.2749705835718637, "grad_norm": 0.7696002663802379, "learning_rate": 3.8143856555486597e-06, "loss": 0.0286, "step": 65900 }, { "epoch": 0.274991446286854, "grad_norm": 0.9231345231426371, "learning_rate": 3.814240958033878e-06, "loss": 0.0366, "step": 65905 }, { "epoch": 0.27501230900184426, "grad_norm": 1.092314521556576, "learning_rate": 3.8140962769850105e-06, "loss": 0.0297, "step": 65910 }, { "epoch": 0.27503317171683456, "grad_norm": 0.5450759181372906, "learning_rate": 3.8139516123989335e-06, "loss": 0.02, "step": 65915 }, { "epoch": 0.2750540344318248, "grad_norm": 0.6802339724303692, "learning_rate": 3.8138069642725274e-06, "loss": 0.0261, "step": 65920 }, { "epoch": 0.2750748971468151, "grad_norm": 0.8514426778103015, "learning_rate": 3.8136623326026696e-06, "loss": 0.0316, "step": 65925 }, { "epoch": 0.2750957598618054, "grad_norm": 1.0159268386715168, "learning_rate": 3.8135177173862405e-06, "loss": 0.0389, "step": 65930 }, { "epoch": 0.27511662257679564, "grad_norm": 0.8402490585447645, "learning_rate": 3.8133731186201207e-06, "loss": 0.0364, "step": 65935 }, { "epoch": 0.27513748529178594, "grad_norm": 3.8399995378278953, "learning_rate": 3.8132285363011913e-06, "loss": 0.0297, "step": 65940 }, { "epoch": 0.2751583480067762, "grad_norm": 0.8529360665981417, "learning_rate": 3.813083970426335e-06, "loss": 0.0292, "step": 65945 }, { "epoch": 0.2751792107217665, "grad_norm": 0.9865140719687076, "learning_rate": 3.812939420992435e-06, "loss": 0.0342, "step": 65950 }, { "epoch": 0.2752000734367568, "grad_norm": 0.8470423451896794, "learning_rate": 3.812794887996375e-06, "loss": 0.0306, "step": 65955 }, { "epoch": 0.27522093615174703, "grad_norm": 1.2279655576036914, "learning_rate": 3.8126503714350395e-06, "loss": 0.0354, "step": 65960 }, { "epoch": 0.27524179886673733, "grad_norm": 0.614372815011438, "learning_rate": 3.8125058713053144e-06, "loss": 0.0345, "step": 65965 }, { "epoch": 0.2752626615817276, "grad_norm": 1.3322364897510977, "learning_rate": 3.8123613876040865e-06, "loss": 0.0339, "step": 65970 }, { "epoch": 0.2752835242967179, "grad_norm": 1.0838319255887692, "learning_rate": 3.8122169203282427e-06, "loss": 0.0367, "step": 65975 }, { "epoch": 0.2753043870117082, "grad_norm": 0.8504623115277664, "learning_rate": 3.8120724694746708e-06, "loss": 0.0236, "step": 65980 }, { "epoch": 0.2753252497266984, "grad_norm": 0.7885513418181428, "learning_rate": 3.8119280350402604e-06, "loss": 0.0326, "step": 65985 }, { "epoch": 0.2753461124416887, "grad_norm": 1.112204171068743, "learning_rate": 3.8117836170219007e-06, "loss": 0.0324, "step": 65990 }, { "epoch": 0.275366975156679, "grad_norm": 1.0629458891266768, "learning_rate": 3.811639215416482e-06, "loss": 0.0361, "step": 65995 }, { "epoch": 0.27538783787166926, "grad_norm": 1.6727584360092125, "learning_rate": 3.8114948302208964e-06, "loss": 0.037, "step": 66000 }, { "epoch": 0.27540870058665956, "grad_norm": 0.6537074831141637, "learning_rate": 3.811350461432036e-06, "loss": 0.0334, "step": 66005 }, { "epoch": 0.2754295633016498, "grad_norm": 0.6320952345112517, "learning_rate": 3.811206109046793e-06, "loss": 0.0279, "step": 66010 }, { "epoch": 0.2754504260166401, "grad_norm": 1.135970058006808, "learning_rate": 3.811061773062063e-06, "loss": 0.0366, "step": 66015 }, { "epoch": 0.2754712887316304, "grad_norm": 0.6969933013224109, "learning_rate": 3.8109174534747386e-06, "loss": 0.0337, "step": 66020 }, { "epoch": 0.27549215144662065, "grad_norm": 0.8322667478978453, "learning_rate": 3.810773150281716e-06, "loss": 0.0248, "step": 66025 }, { "epoch": 0.27551301416161095, "grad_norm": 1.6866665484211207, "learning_rate": 3.8106288634798926e-06, "loss": 0.0287, "step": 66030 }, { "epoch": 0.2755338768766012, "grad_norm": 0.6198644204869436, "learning_rate": 3.8104845930661644e-06, "loss": 0.0228, "step": 66035 }, { "epoch": 0.2755547395915915, "grad_norm": 0.6096043598255056, "learning_rate": 3.810340339037429e-06, "loss": 0.0399, "step": 66040 }, { "epoch": 0.2755756023065818, "grad_norm": 0.6401159519001897, "learning_rate": 3.810196101390587e-06, "loss": 0.0337, "step": 66045 }, { "epoch": 0.27559646502157203, "grad_norm": 0.8197517046437234, "learning_rate": 3.810051880122536e-06, "loss": 0.0393, "step": 66050 }, { "epoch": 0.27561732773656233, "grad_norm": 0.9936636645293806, "learning_rate": 3.8099076752301784e-06, "loss": 0.0431, "step": 66055 }, { "epoch": 0.2756381904515526, "grad_norm": 0.96554059616166, "learning_rate": 3.809763486710413e-06, "loss": 0.0264, "step": 66060 }, { "epoch": 0.2756590531665429, "grad_norm": 0.7825348335381899, "learning_rate": 3.8096193145601445e-06, "loss": 0.0307, "step": 66065 }, { "epoch": 0.2756799158815332, "grad_norm": 1.5734385252271847, "learning_rate": 3.8094751587762735e-06, "loss": 0.0765, "step": 66070 }, { "epoch": 0.2757007785965234, "grad_norm": 1.2042163433479858, "learning_rate": 3.809331019355706e-06, "loss": 0.036, "step": 66075 }, { "epoch": 0.2757216413115137, "grad_norm": 0.681564097206094, "learning_rate": 3.8091868962953445e-06, "loss": 0.03, "step": 66080 }, { "epoch": 0.275742504026504, "grad_norm": 1.2579339921270611, "learning_rate": 3.809042789592095e-06, "loss": 0.0382, "step": 66085 }, { "epoch": 0.27576336674149426, "grad_norm": 1.105844008370987, "learning_rate": 3.808898699242864e-06, "loss": 0.0289, "step": 66090 }, { "epoch": 0.27578422945648456, "grad_norm": 1.044623929834151, "learning_rate": 3.8087546252445582e-06, "loss": 0.0332, "step": 66095 }, { "epoch": 0.2758050921714748, "grad_norm": 0.630409908232286, "learning_rate": 3.8086105675940854e-06, "loss": 0.0266, "step": 66100 }, { "epoch": 0.2758259548864651, "grad_norm": 0.6039972850150268, "learning_rate": 3.808466526288356e-06, "loss": 0.0283, "step": 66105 }, { "epoch": 0.2758468176014554, "grad_norm": 0.4652010246956501, "learning_rate": 3.8083225013242758e-06, "loss": 0.0314, "step": 66110 }, { "epoch": 0.27586768031644565, "grad_norm": 1.4332852682079285, "learning_rate": 3.808178492698758e-06, "loss": 0.0422, "step": 66115 }, { "epoch": 0.27588854303143595, "grad_norm": 4.755332790221708, "learning_rate": 3.808034500408713e-06, "loss": 0.0264, "step": 66120 }, { "epoch": 0.2759094057464262, "grad_norm": 0.5521858797961822, "learning_rate": 3.8078905244510513e-06, "loss": 0.0255, "step": 66125 }, { "epoch": 0.2759302684614165, "grad_norm": 1.3881444439443151, "learning_rate": 3.8077465648226874e-06, "loss": 0.0478, "step": 66130 }, { "epoch": 0.2759511311764068, "grad_norm": 0.4003341314859159, "learning_rate": 3.8076026215205346e-06, "loss": 0.0268, "step": 66135 }, { "epoch": 0.27597199389139704, "grad_norm": 0.5523782564204623, "learning_rate": 3.807458694541507e-06, "loss": 0.0269, "step": 66140 }, { "epoch": 0.27599285660638734, "grad_norm": 1.5456244824729755, "learning_rate": 3.8073147838825197e-06, "loss": 0.0437, "step": 66145 }, { "epoch": 0.2760137193213776, "grad_norm": 1.1961067908404064, "learning_rate": 3.807170889540488e-06, "loss": 0.0415, "step": 66150 }, { "epoch": 0.2760345820363679, "grad_norm": 1.025002207084963, "learning_rate": 3.807027011512329e-06, "loss": 0.0385, "step": 66155 }, { "epoch": 0.2760554447513582, "grad_norm": 0.8070483221703576, "learning_rate": 3.8068831497949617e-06, "loss": 0.0308, "step": 66160 }, { "epoch": 0.2760763074663484, "grad_norm": 0.5150231862550974, "learning_rate": 3.8067393043853036e-06, "loss": 0.0305, "step": 66165 }, { "epoch": 0.2760971701813387, "grad_norm": 0.7376031441285015, "learning_rate": 3.806595475280273e-06, "loss": 0.0261, "step": 66170 }, { "epoch": 0.276118032896329, "grad_norm": 0.8558761875420116, "learning_rate": 3.8064516624767916e-06, "loss": 0.0322, "step": 66175 }, { "epoch": 0.27613889561131927, "grad_norm": 1.8761998996962477, "learning_rate": 3.8063078659717797e-06, "loss": 0.0442, "step": 66180 }, { "epoch": 0.27615975832630957, "grad_norm": 0.7997886508298352, "learning_rate": 3.806164085762158e-06, "loss": 0.0255, "step": 66185 }, { "epoch": 0.2761806210412998, "grad_norm": 0.733039056383895, "learning_rate": 3.8060203218448493e-06, "loss": 0.0312, "step": 66190 }, { "epoch": 0.2762014837562901, "grad_norm": 1.2518348535639587, "learning_rate": 3.8058765742167784e-06, "loss": 0.0371, "step": 66195 }, { "epoch": 0.2762223464712804, "grad_norm": 0.8201175150868735, "learning_rate": 3.8057328428748687e-06, "loss": 0.0376, "step": 66200 }, { "epoch": 0.27624320918627066, "grad_norm": 0.787506832365163, "learning_rate": 3.8055891278160443e-06, "loss": 0.0252, "step": 66205 }, { "epoch": 0.27626407190126095, "grad_norm": 1.0758140055002687, "learning_rate": 3.8054454290372313e-06, "loss": 0.0291, "step": 66210 }, { "epoch": 0.2762849346162512, "grad_norm": 0.6965298581663544, "learning_rate": 3.805301746535357e-06, "loss": 0.0302, "step": 66215 }, { "epoch": 0.2763057973312415, "grad_norm": 0.7167293752457558, "learning_rate": 3.8051580803073484e-06, "loss": 0.0261, "step": 66220 }, { "epoch": 0.2763266600462318, "grad_norm": 0.5445385446829526, "learning_rate": 3.8050144303501334e-06, "loss": 0.028, "step": 66225 }, { "epoch": 0.27634752276122204, "grad_norm": 4.830636040902994, "learning_rate": 3.8048707966606414e-06, "loss": 0.03, "step": 66230 }, { "epoch": 0.27636838547621234, "grad_norm": 0.9492492439677944, "learning_rate": 3.8047271792358015e-06, "loss": 0.0282, "step": 66235 }, { "epoch": 0.2763892481912026, "grad_norm": 1.2749681763620062, "learning_rate": 3.8045835780725453e-06, "loss": 0.0384, "step": 66240 }, { "epoch": 0.2764101109061929, "grad_norm": 1.2162446815102117, "learning_rate": 3.804439993167804e-06, "loss": 0.0286, "step": 66245 }, { "epoch": 0.2764309736211832, "grad_norm": 0.8254926709279098, "learning_rate": 3.8042964245185094e-06, "loss": 0.0286, "step": 66250 }, { "epoch": 0.27645183633617343, "grad_norm": 1.135156647253314, "learning_rate": 3.8041528721215946e-06, "loss": 0.0355, "step": 66255 }, { "epoch": 0.27647269905116373, "grad_norm": 0.8643638298854878, "learning_rate": 3.804009335973994e-06, "loss": 0.0285, "step": 66260 }, { "epoch": 0.27649356176615403, "grad_norm": 1.1603169552202572, "learning_rate": 3.8038658160726418e-06, "loss": 0.0308, "step": 66265 }, { "epoch": 0.27651442448114427, "grad_norm": 0.4473024850054798, "learning_rate": 3.803722312414474e-06, "loss": 0.028, "step": 66270 }, { "epoch": 0.27653528719613457, "grad_norm": 0.6904854649757377, "learning_rate": 3.803578824996426e-06, "loss": 0.0328, "step": 66275 }, { "epoch": 0.2765561499111248, "grad_norm": 0.8733357218399953, "learning_rate": 3.8034353538154358e-06, "loss": 0.0315, "step": 66280 }, { "epoch": 0.2765770126261151, "grad_norm": 0.7802963879413193, "learning_rate": 3.803291898868441e-06, "loss": 0.0296, "step": 66285 }, { "epoch": 0.2765978753411054, "grad_norm": 0.5708370612106476, "learning_rate": 3.8031484601523805e-06, "loss": 0.0327, "step": 66290 }, { "epoch": 0.27661873805609566, "grad_norm": 1.4072014443238423, "learning_rate": 3.8030050376641935e-06, "loss": 0.0465, "step": 66295 }, { "epoch": 0.27663960077108596, "grad_norm": 1.0863674968720909, "learning_rate": 3.8028616314008205e-06, "loss": 0.0296, "step": 66300 }, { "epoch": 0.2766604634860762, "grad_norm": 0.7541838602005593, "learning_rate": 3.802718241359202e-06, "loss": 0.0238, "step": 66305 }, { "epoch": 0.2766813262010665, "grad_norm": 1.7196294967308328, "learning_rate": 3.8025748675362818e-06, "loss": 0.0276, "step": 66310 }, { "epoch": 0.2767021889160568, "grad_norm": 0.7080711487058453, "learning_rate": 3.802431509929001e-06, "loss": 0.0285, "step": 66315 }, { "epoch": 0.27672305163104705, "grad_norm": 0.6717106636598996, "learning_rate": 3.802288168534303e-06, "loss": 0.0319, "step": 66320 }, { "epoch": 0.27674391434603735, "grad_norm": 0.9897562867458327, "learning_rate": 3.802144843349133e-06, "loss": 0.0307, "step": 66325 }, { "epoch": 0.2767647770610276, "grad_norm": 0.4749793283319135, "learning_rate": 3.8020015343704368e-06, "loss": 0.0288, "step": 66330 }, { "epoch": 0.2767856397760179, "grad_norm": 0.8599430444616594, "learning_rate": 3.8018582415951592e-06, "loss": 0.0251, "step": 66335 }, { "epoch": 0.2768065024910082, "grad_norm": 0.8033626058454713, "learning_rate": 3.8017149650202474e-06, "loss": 0.0358, "step": 66340 }, { "epoch": 0.27682736520599843, "grad_norm": 0.9274966289578234, "learning_rate": 3.8015717046426486e-06, "loss": 0.0294, "step": 66345 }, { "epoch": 0.27684822792098873, "grad_norm": 0.8528310636837677, "learning_rate": 3.8014284604593117e-06, "loss": 0.0289, "step": 66350 }, { "epoch": 0.276869090635979, "grad_norm": 1.005136309181686, "learning_rate": 3.8012852324671862e-06, "loss": 0.0285, "step": 66355 }, { "epoch": 0.2768899533509693, "grad_norm": 0.7799104472762648, "learning_rate": 3.8011420206632216e-06, "loss": 0.0283, "step": 66360 }, { "epoch": 0.2769108160659596, "grad_norm": 1.1386889787824925, "learning_rate": 3.8009988250443684e-06, "loss": 0.0325, "step": 66365 }, { "epoch": 0.2769316787809498, "grad_norm": 0.6747014106225004, "learning_rate": 3.800855645607579e-06, "loss": 0.0253, "step": 66370 }, { "epoch": 0.2769525414959401, "grad_norm": 0.4652329979882962, "learning_rate": 3.800712482349806e-06, "loss": 0.0241, "step": 66375 }, { "epoch": 0.2769734042109304, "grad_norm": 0.7083257500099909, "learning_rate": 3.800569335268001e-06, "loss": 0.0339, "step": 66380 }, { "epoch": 0.27699426692592066, "grad_norm": 1.2166943051832217, "learning_rate": 3.80042620435912e-06, "loss": 0.028, "step": 66385 }, { "epoch": 0.27701512964091096, "grad_norm": 1.043391207534038, "learning_rate": 3.800283089620117e-06, "loss": 0.0377, "step": 66390 }, { "epoch": 0.2770359923559012, "grad_norm": 0.9316975341695077, "learning_rate": 3.800139991047947e-06, "loss": 0.0286, "step": 66395 }, { "epoch": 0.2770568550708915, "grad_norm": 0.6014150267173841, "learning_rate": 3.7999969086395673e-06, "loss": 0.0259, "step": 66400 }, { "epoch": 0.2770777177858818, "grad_norm": 0.5834103128680385, "learning_rate": 3.799853842391935e-06, "loss": 0.0314, "step": 66405 }, { "epoch": 0.27709858050087205, "grad_norm": 1.128793203977095, "learning_rate": 3.7997107923020083e-06, "loss": 0.0342, "step": 66410 }, { "epoch": 0.27711944321586235, "grad_norm": 0.7474158008320811, "learning_rate": 3.7995677583667455e-06, "loss": 0.0333, "step": 66415 }, { "epoch": 0.2771403059308526, "grad_norm": 1.4103374508926514, "learning_rate": 3.7994247405831063e-06, "loss": 0.0384, "step": 66420 }, { "epoch": 0.2771611686458429, "grad_norm": 0.9806172311134931, "learning_rate": 3.799281738948052e-06, "loss": 0.03, "step": 66425 }, { "epoch": 0.2771820313608332, "grad_norm": 0.7407227921471515, "learning_rate": 3.7991387534585426e-06, "loss": 0.0229, "step": 66430 }, { "epoch": 0.27720289407582344, "grad_norm": 1.2739335709489081, "learning_rate": 3.7989957841115415e-06, "loss": 0.0351, "step": 66435 }, { "epoch": 0.27722375679081374, "grad_norm": 0.7991536371416044, "learning_rate": 3.7988528309040107e-06, "loss": 0.0294, "step": 66440 }, { "epoch": 0.277244619505804, "grad_norm": 0.6406993766857038, "learning_rate": 3.7987098938329137e-06, "loss": 0.0258, "step": 66445 }, { "epoch": 0.2772654822207943, "grad_norm": 1.0864515138043846, "learning_rate": 3.798566972895215e-06, "loss": 0.0379, "step": 66450 }, { "epoch": 0.2772863449357846, "grad_norm": 1.0326796055824925, "learning_rate": 3.798424068087881e-06, "loss": 0.0283, "step": 66455 }, { "epoch": 0.2773072076507748, "grad_norm": 1.1754281790448504, "learning_rate": 3.7982811794078766e-06, "loss": 0.0323, "step": 66460 }, { "epoch": 0.2773280703657651, "grad_norm": 0.8764245737697474, "learning_rate": 3.798138306852168e-06, "loss": 0.0372, "step": 66465 }, { "epoch": 0.2773489330807554, "grad_norm": 0.8064468942778866, "learning_rate": 3.7979954504177246e-06, "loss": 0.0338, "step": 66470 }, { "epoch": 0.27736979579574567, "grad_norm": 0.5507968575305823, "learning_rate": 3.7978526101015134e-06, "loss": 0.0249, "step": 66475 }, { "epoch": 0.27739065851073597, "grad_norm": 0.5413013907417469, "learning_rate": 3.797709785900505e-06, "loss": 0.0316, "step": 66480 }, { "epoch": 0.2774115212257262, "grad_norm": 0.4247081032551957, "learning_rate": 3.7975669778116687e-06, "loss": 0.0257, "step": 66485 }, { "epoch": 0.2774323839407165, "grad_norm": 0.8875787143836432, "learning_rate": 3.797424185831975e-06, "loss": 0.0395, "step": 66490 }, { "epoch": 0.2774532466557068, "grad_norm": 0.8790418968747888, "learning_rate": 3.7972814099583956e-06, "loss": 0.0262, "step": 66495 }, { "epoch": 0.27747410937069705, "grad_norm": 0.7868669360988103, "learning_rate": 3.797138650187903e-06, "loss": 0.0526, "step": 66500 }, { "epoch": 0.27749497208568735, "grad_norm": 0.8296270867629193, "learning_rate": 3.796995906517471e-06, "loss": 0.0379, "step": 66505 }, { "epoch": 0.2775158348006776, "grad_norm": 1.1998005813014938, "learning_rate": 3.796853178944073e-06, "loss": 0.0321, "step": 66510 }, { "epoch": 0.2775366975156679, "grad_norm": 1.102318458553332, "learning_rate": 3.7967104674646844e-06, "loss": 0.0292, "step": 66515 }, { "epoch": 0.2775575602306582, "grad_norm": 0.3550693341358112, "learning_rate": 3.7965677720762806e-06, "loss": 0.0279, "step": 66520 }, { "epoch": 0.27757842294564844, "grad_norm": 0.9223642806146217, "learning_rate": 3.7964250927758374e-06, "loss": 0.0303, "step": 66525 }, { "epoch": 0.27759928566063874, "grad_norm": 1.361966269018098, "learning_rate": 3.7962824295603327e-06, "loss": 0.0354, "step": 66530 }, { "epoch": 0.277620148375629, "grad_norm": 0.6797220644171307, "learning_rate": 3.796139782426745e-06, "loss": 0.0268, "step": 66535 }, { "epoch": 0.2776410110906193, "grad_norm": 1.8587097137995967, "learning_rate": 3.7959971513720513e-06, "loss": 0.0383, "step": 66540 }, { "epoch": 0.2776618738056096, "grad_norm": 0.6611556003269421, "learning_rate": 3.795854536393233e-06, "loss": 0.0207, "step": 66545 }, { "epoch": 0.2776827365205998, "grad_norm": 0.5772360034604841, "learning_rate": 3.795711937487269e-06, "loss": 0.0234, "step": 66550 }, { "epoch": 0.2777035992355901, "grad_norm": 0.721020847797803, "learning_rate": 3.795569354651142e-06, "loss": 0.0284, "step": 66555 }, { "epoch": 0.2777244619505804, "grad_norm": 0.5985215941046415, "learning_rate": 3.7954267878818337e-06, "loss": 0.0318, "step": 66560 }, { "epoch": 0.27774532466557067, "grad_norm": 0.7902806251477031, "learning_rate": 3.7952842371763254e-06, "loss": 0.0481, "step": 66565 }, { "epoch": 0.27776618738056097, "grad_norm": 0.7298484522988501, "learning_rate": 3.795141702531602e-06, "loss": 0.0246, "step": 66570 }, { "epoch": 0.2777870500955512, "grad_norm": 0.6238193927913169, "learning_rate": 3.794999183944647e-06, "loss": 0.0309, "step": 66575 }, { "epoch": 0.2778079128105415, "grad_norm": 1.036723303260201, "learning_rate": 3.794856681412447e-06, "loss": 0.0346, "step": 66580 }, { "epoch": 0.2778287755255318, "grad_norm": 0.4833133621335101, "learning_rate": 3.7947141949319864e-06, "loss": 0.0202, "step": 66585 }, { "epoch": 0.27784963824052206, "grad_norm": 1.0768096843030568, "learning_rate": 3.794571724500253e-06, "loss": 0.0326, "step": 66590 }, { "epoch": 0.27787050095551236, "grad_norm": 1.1311680032905926, "learning_rate": 3.794429270114233e-06, "loss": 0.0283, "step": 66595 }, { "epoch": 0.2778913636705026, "grad_norm": 1.211833193931325, "learning_rate": 3.7942868317709165e-06, "loss": 0.0328, "step": 66600 }, { "epoch": 0.2779122263854929, "grad_norm": 1.544063835125909, "learning_rate": 3.7941444094672914e-06, "loss": 0.0349, "step": 66605 }, { "epoch": 0.2779330891004832, "grad_norm": 0.6321209040137697, "learning_rate": 3.7940020032003476e-06, "loss": 0.0273, "step": 66610 }, { "epoch": 0.27795395181547344, "grad_norm": 0.7005656833332987, "learning_rate": 3.793859612967077e-06, "loss": 0.0321, "step": 66615 }, { "epoch": 0.27797481453046374, "grad_norm": 1.5110344704383556, "learning_rate": 3.7937172387644687e-06, "loss": 0.0336, "step": 66620 }, { "epoch": 0.277995677245454, "grad_norm": 1.2272129621899845, "learning_rate": 3.7935748805895175e-06, "loss": 0.0437, "step": 66625 }, { "epoch": 0.2780165399604443, "grad_norm": 0.6138613364755199, "learning_rate": 3.793432538439215e-06, "loss": 0.029, "step": 66630 }, { "epoch": 0.2780374026754346, "grad_norm": 0.8544826642410805, "learning_rate": 3.793290212310555e-06, "loss": 0.0216, "step": 66635 }, { "epoch": 0.27805826539042483, "grad_norm": 0.3645832774234217, "learning_rate": 3.7931479022005343e-06, "loss": 0.0275, "step": 66640 }, { "epoch": 0.27807912810541513, "grad_norm": 0.7498475377523502, "learning_rate": 3.793005608106145e-06, "loss": 0.0293, "step": 66645 }, { "epoch": 0.27809999082040543, "grad_norm": 0.6273493334518547, "learning_rate": 3.792863330024385e-06, "loss": 0.0234, "step": 66650 }, { "epoch": 0.2781208535353957, "grad_norm": 1.4458147393540726, "learning_rate": 3.7927210679522514e-06, "loss": 0.0272, "step": 66655 }, { "epoch": 0.278141716250386, "grad_norm": 1.008975413439369, "learning_rate": 3.792578821886742e-06, "loss": 0.0398, "step": 66660 }, { "epoch": 0.2781625789653762, "grad_norm": 0.36155391668152637, "learning_rate": 3.7924365918248553e-06, "loss": 0.0251, "step": 66665 }, { "epoch": 0.2781834416803665, "grad_norm": 1.1911717315897077, "learning_rate": 3.79229437776359e-06, "loss": 0.0347, "step": 66670 }, { "epoch": 0.2782043043953568, "grad_norm": 0.7348593367016748, "learning_rate": 3.792152179699948e-06, "loss": 0.0263, "step": 66675 }, { "epoch": 0.27822516711034706, "grad_norm": 0.5261895733351096, "learning_rate": 3.7920099976309278e-06, "loss": 0.0312, "step": 66680 }, { "epoch": 0.27824602982533736, "grad_norm": 0.6685914198028718, "learning_rate": 3.791867831553533e-06, "loss": 0.0304, "step": 66685 }, { "epoch": 0.2782668925403276, "grad_norm": 1.2364144051109398, "learning_rate": 3.791725681464765e-06, "loss": 0.0282, "step": 66690 }, { "epoch": 0.2782877552553179, "grad_norm": 0.6992411253248662, "learning_rate": 3.791583547361628e-06, "loss": 0.025, "step": 66695 }, { "epoch": 0.2783086179703082, "grad_norm": 0.6278284386833796, "learning_rate": 3.791441429241126e-06, "loss": 0.0245, "step": 66700 }, { "epoch": 0.27832948068529845, "grad_norm": 0.9823306416265817, "learning_rate": 3.7912993271002635e-06, "loss": 0.0214, "step": 66705 }, { "epoch": 0.27835034340028875, "grad_norm": 0.25900171905417435, "learning_rate": 3.7911572409360452e-06, "loss": 0.0284, "step": 66710 }, { "epoch": 0.278371206115279, "grad_norm": 0.6994455302129707, "learning_rate": 3.79101517074548e-06, "loss": 0.0242, "step": 66715 }, { "epoch": 0.2783920688302693, "grad_norm": 0.8191338569989318, "learning_rate": 3.7908731165255724e-06, "loss": 0.0383, "step": 66720 }, { "epoch": 0.2784129315452596, "grad_norm": 0.8744585548038968, "learning_rate": 3.7907310782733327e-06, "loss": 0.0264, "step": 66725 }, { "epoch": 0.27843379426024983, "grad_norm": 1.2451558875137387, "learning_rate": 3.790589055985769e-06, "loss": 0.04, "step": 66730 }, { "epoch": 0.27845465697524013, "grad_norm": 0.34582048900817025, "learning_rate": 3.79044704965989e-06, "loss": 0.0285, "step": 66735 }, { "epoch": 0.27847551969023043, "grad_norm": 0.8656153587222505, "learning_rate": 3.7903050592927064e-06, "loss": 0.0229, "step": 66740 }, { "epoch": 0.2784963824052207, "grad_norm": 0.7901725188115253, "learning_rate": 3.790163084881231e-06, "loss": 0.0347, "step": 66745 }, { "epoch": 0.278517245120211, "grad_norm": 1.1037947668501247, "learning_rate": 3.790021126422473e-06, "loss": 0.0292, "step": 66750 }, { "epoch": 0.2785381078352012, "grad_norm": 0.7959966123947824, "learning_rate": 3.789879183913447e-06, "loss": 0.0273, "step": 66755 }, { "epoch": 0.2785589705501915, "grad_norm": 0.9707569480893374, "learning_rate": 3.789737257351167e-06, "loss": 0.0303, "step": 66760 }, { "epoch": 0.2785798332651818, "grad_norm": 0.6387521277849381, "learning_rate": 3.7895953467326453e-06, "loss": 0.0322, "step": 66765 }, { "epoch": 0.27860069598017206, "grad_norm": 0.9870500093625891, "learning_rate": 3.7894534520548975e-06, "loss": 0.0299, "step": 66770 }, { "epoch": 0.27862155869516236, "grad_norm": 0.7585588818817304, "learning_rate": 3.7893115733149406e-06, "loss": 0.0297, "step": 66775 }, { "epoch": 0.2786424214101526, "grad_norm": 1.3870123745067253, "learning_rate": 3.7891697105097907e-06, "loss": 0.0326, "step": 66780 }, { "epoch": 0.2786632841251429, "grad_norm": 0.6009448216397654, "learning_rate": 3.789027863636465e-06, "loss": 0.0331, "step": 66785 }, { "epoch": 0.2786841468401332, "grad_norm": 0.9352079818225162, "learning_rate": 3.788886032691982e-06, "loss": 0.0321, "step": 66790 }, { "epoch": 0.27870500955512345, "grad_norm": 0.6351265923296883, "learning_rate": 3.7887442176733603e-06, "loss": 0.0274, "step": 66795 }, { "epoch": 0.27872587227011375, "grad_norm": 0.9173793864735369, "learning_rate": 3.78860241857762e-06, "loss": 0.025, "step": 66800 }, { "epoch": 0.278746734985104, "grad_norm": 1.4612595913465163, "learning_rate": 3.788460635401781e-06, "loss": 0.0322, "step": 66805 }, { "epoch": 0.2787675977000943, "grad_norm": 0.39273896362823574, "learning_rate": 3.7883188681428653e-06, "loss": 0.0316, "step": 66810 }, { "epoch": 0.2787884604150846, "grad_norm": 0.4983237914736175, "learning_rate": 3.788177116797895e-06, "loss": 0.0335, "step": 66815 }, { "epoch": 0.27880932313007484, "grad_norm": 0.9409978250521955, "learning_rate": 3.7880353813638925e-06, "loss": 0.0248, "step": 66820 }, { "epoch": 0.27883018584506514, "grad_norm": 0.9609981702925944, "learning_rate": 3.787893661837882e-06, "loss": 0.0287, "step": 66825 }, { "epoch": 0.27885104856005544, "grad_norm": 0.9527910191859164, "learning_rate": 3.7877519582168874e-06, "loss": 0.0392, "step": 66830 }, { "epoch": 0.2788719112750457, "grad_norm": 0.8843555816648875, "learning_rate": 3.7876102704979352e-06, "loss": 0.04, "step": 66835 }, { "epoch": 0.278892773990036, "grad_norm": 0.7627143507856621, "learning_rate": 3.787468598678049e-06, "loss": 0.0245, "step": 66840 }, { "epoch": 0.2789136367050262, "grad_norm": 1.248093355007327, "learning_rate": 3.7873269427542577e-06, "loss": 0.0227, "step": 66845 }, { "epoch": 0.2789344994200165, "grad_norm": 0.482827215494306, "learning_rate": 3.7871853027235882e-06, "loss": 0.022, "step": 66850 }, { "epoch": 0.2789553621350068, "grad_norm": 1.0567683298350015, "learning_rate": 3.787043678583069e-06, "loss": 0.0295, "step": 66855 }, { "epoch": 0.27897622484999707, "grad_norm": 0.9165651903533406, "learning_rate": 3.786902070329728e-06, "loss": 0.0326, "step": 66860 }, { "epoch": 0.27899708756498737, "grad_norm": 0.9201292075761531, "learning_rate": 3.7867604779605963e-06, "loss": 0.0281, "step": 66865 }, { "epoch": 0.2790179502799776, "grad_norm": 0.799038089266772, "learning_rate": 3.786618901472705e-06, "loss": 0.03, "step": 66870 }, { "epoch": 0.2790388129949679, "grad_norm": 6.24023307318616, "learning_rate": 3.786477340863085e-06, "loss": 0.0405, "step": 66875 }, { "epoch": 0.2790596757099582, "grad_norm": 0.8611038649912149, "learning_rate": 3.786335796128768e-06, "loss": 0.0399, "step": 66880 }, { "epoch": 0.27908053842494845, "grad_norm": 1.1339822348221962, "learning_rate": 3.7861942672667873e-06, "loss": 0.0319, "step": 66885 }, { "epoch": 0.27910140113993875, "grad_norm": 0.8344702626120293, "learning_rate": 3.786052754274177e-06, "loss": 0.0291, "step": 66890 }, { "epoch": 0.279122263854929, "grad_norm": 0.6451070952870129, "learning_rate": 3.7859112571479713e-06, "loss": 0.0248, "step": 66895 }, { "epoch": 0.2791431265699193, "grad_norm": 1.0788015496222598, "learning_rate": 3.7857697758852057e-06, "loss": 0.034, "step": 66900 }, { "epoch": 0.2791639892849096, "grad_norm": 1.031923123772327, "learning_rate": 3.785628310482916e-06, "loss": 0.0347, "step": 66905 }, { "epoch": 0.27918485199989984, "grad_norm": 0.7785258153490696, "learning_rate": 3.785486860938141e-06, "loss": 0.0348, "step": 66910 }, { "epoch": 0.27920571471489014, "grad_norm": 0.9867753397716749, "learning_rate": 3.7853454272479144e-06, "loss": 0.0254, "step": 66915 }, { "epoch": 0.27922657742988044, "grad_norm": 0.8372738808687038, "learning_rate": 3.785204009409278e-06, "loss": 0.0275, "step": 66920 }, { "epoch": 0.2792474401448707, "grad_norm": 0.85378694263208, "learning_rate": 3.78506260741927e-06, "loss": 0.0275, "step": 66925 }, { "epoch": 0.279268302859861, "grad_norm": 0.6557719286112786, "learning_rate": 3.7849212212749297e-06, "loss": 0.0349, "step": 66930 }, { "epoch": 0.27928916557485123, "grad_norm": 0.8246696433830134, "learning_rate": 3.7847798509732986e-06, "loss": 0.0335, "step": 66935 }, { "epoch": 0.2793100282898415, "grad_norm": 1.1845388705260798, "learning_rate": 3.7846384965114186e-06, "loss": 0.0476, "step": 66940 }, { "epoch": 0.2793308910048318, "grad_norm": 0.39796415103716576, "learning_rate": 3.784497157886331e-06, "loss": 0.0231, "step": 66945 }, { "epoch": 0.27935175371982207, "grad_norm": 0.5012968428594251, "learning_rate": 3.784355835095079e-06, "loss": 0.0262, "step": 66950 }, { "epoch": 0.27937261643481237, "grad_norm": 0.8393514039017089, "learning_rate": 3.7842145281347078e-06, "loss": 0.04, "step": 66955 }, { "epoch": 0.2793934791498026, "grad_norm": 2.0206874254727185, "learning_rate": 3.78407323700226e-06, "loss": 0.0378, "step": 66960 }, { "epoch": 0.2794143418647929, "grad_norm": 0.5644958100841339, "learning_rate": 3.7839319616947823e-06, "loss": 0.0273, "step": 66965 }, { "epoch": 0.2794352045797832, "grad_norm": 0.4840640150704224, "learning_rate": 3.78379070220932e-06, "loss": 0.0332, "step": 66970 }, { "epoch": 0.27945606729477346, "grad_norm": 0.579803122896629, "learning_rate": 3.7836494585429216e-06, "loss": 0.0258, "step": 66975 }, { "epoch": 0.27947693000976376, "grad_norm": 0.9718991958957598, "learning_rate": 3.783508230692633e-06, "loss": 0.0357, "step": 66980 }, { "epoch": 0.279497792724754, "grad_norm": 0.41204982160066356, "learning_rate": 3.7833670186555036e-06, "loss": 0.0207, "step": 66985 }, { "epoch": 0.2795186554397443, "grad_norm": 0.5677684185798113, "learning_rate": 3.783225822428583e-06, "loss": 0.0523, "step": 66990 }, { "epoch": 0.2795395181547346, "grad_norm": 0.9541101694122042, "learning_rate": 3.7830846420089195e-06, "loss": 0.0317, "step": 66995 }, { "epoch": 0.27956038086972484, "grad_norm": 0.9077014446409127, "learning_rate": 3.7829434773935665e-06, "loss": 0.0331, "step": 67000 }, { "epoch": 0.27958124358471514, "grad_norm": 0.7154221295812851, "learning_rate": 3.782802328579573e-06, "loss": 0.0302, "step": 67005 }, { "epoch": 0.27960210629970544, "grad_norm": 0.9802227609754419, "learning_rate": 3.782661195563993e-06, "loss": 0.0397, "step": 67010 }, { "epoch": 0.2796229690146957, "grad_norm": 1.2171339202484504, "learning_rate": 3.782520078343879e-06, "loss": 0.0327, "step": 67015 }, { "epoch": 0.279643831729686, "grad_norm": 0.8569391148772746, "learning_rate": 3.782378976916285e-06, "loss": 0.0345, "step": 67020 }, { "epoch": 0.27966469444467623, "grad_norm": 1.4491971211250712, "learning_rate": 3.7822378912782655e-06, "loss": 0.0371, "step": 67025 }, { "epoch": 0.27968555715966653, "grad_norm": 0.9481873954924672, "learning_rate": 3.7820968214268754e-06, "loss": 0.0262, "step": 67030 }, { "epoch": 0.27970641987465683, "grad_norm": 1.0984773287121996, "learning_rate": 3.7819557673591717e-06, "loss": 0.0287, "step": 67035 }, { "epoch": 0.2797272825896471, "grad_norm": 0.5169067860764613, "learning_rate": 3.781814729072211e-06, "loss": 0.034, "step": 67040 }, { "epoch": 0.2797481453046374, "grad_norm": 0.9132882749142226, "learning_rate": 3.7816737065630514e-06, "loss": 0.0346, "step": 67045 }, { "epoch": 0.2797690080196276, "grad_norm": 0.5505304713599897, "learning_rate": 3.781532699828751e-06, "loss": 0.0294, "step": 67050 }, { "epoch": 0.2797898707346179, "grad_norm": 0.6819825520429962, "learning_rate": 3.7813917088663693e-06, "loss": 0.0198, "step": 67055 }, { "epoch": 0.2798107334496082, "grad_norm": 1.4288148332897077, "learning_rate": 3.781250733672965e-06, "loss": 0.0284, "step": 67060 }, { "epoch": 0.27983159616459846, "grad_norm": 0.9728263327894587, "learning_rate": 3.7811097742456016e-06, "loss": 0.0273, "step": 67065 }, { "epoch": 0.27985245887958876, "grad_norm": 0.8562578373761839, "learning_rate": 3.7809688305813387e-06, "loss": 0.0296, "step": 67070 }, { "epoch": 0.279873321594579, "grad_norm": 0.6295233150531618, "learning_rate": 3.780827902677238e-06, "loss": 0.0312, "step": 67075 }, { "epoch": 0.2798941843095693, "grad_norm": 0.7504178734598814, "learning_rate": 3.7806869905303643e-06, "loss": 0.0257, "step": 67080 }, { "epoch": 0.2799150470245596, "grad_norm": 1.0594736395825948, "learning_rate": 3.7805460941377803e-06, "loss": 0.0351, "step": 67085 }, { "epoch": 0.27993590973954985, "grad_norm": 0.6206644109229158, "learning_rate": 3.7804052134965506e-06, "loss": 0.0297, "step": 67090 }, { "epoch": 0.27995677245454015, "grad_norm": 1.0715107016897483, "learning_rate": 3.7802643486037417e-06, "loss": 0.029, "step": 67095 }, { "epoch": 0.27997763516953045, "grad_norm": 0.5912970924042881, "learning_rate": 3.780123499456419e-06, "loss": 0.0302, "step": 67100 }, { "epoch": 0.2799984978845207, "grad_norm": 0.8586712604929334, "learning_rate": 3.7799826660516493e-06, "loss": 0.0355, "step": 67105 }, { "epoch": 0.280019360599511, "grad_norm": 1.2664906715763689, "learning_rate": 3.7798418483865003e-06, "loss": 0.0332, "step": 67110 }, { "epoch": 0.28004022331450124, "grad_norm": 0.8009783689621969, "learning_rate": 3.7797010464580405e-06, "loss": 0.0259, "step": 67115 }, { "epoch": 0.28006108602949153, "grad_norm": 0.5080234255190574, "learning_rate": 3.7795602602633383e-06, "loss": 0.027, "step": 67120 }, { "epoch": 0.28008194874448183, "grad_norm": 0.6990297112960037, "learning_rate": 3.7794194897994657e-06, "loss": 0.0318, "step": 67125 }, { "epoch": 0.2801028114594721, "grad_norm": 0.9992204112195846, "learning_rate": 3.779278735063492e-06, "loss": 0.0409, "step": 67130 }, { "epoch": 0.2801236741744624, "grad_norm": 0.7894191088054621, "learning_rate": 3.7791379960524882e-06, "loss": 0.0272, "step": 67135 }, { "epoch": 0.2801445368894526, "grad_norm": 0.9671143679438374, "learning_rate": 3.7789972727635278e-06, "loss": 0.0228, "step": 67140 }, { "epoch": 0.2801653996044429, "grad_norm": 1.1041421251757944, "learning_rate": 3.7788565651936832e-06, "loss": 0.0359, "step": 67145 }, { "epoch": 0.2801862623194332, "grad_norm": 1.1111804759492316, "learning_rate": 3.7787158733400285e-06, "loss": 0.0235, "step": 67150 }, { "epoch": 0.28020712503442347, "grad_norm": 0.7575689632453367, "learning_rate": 3.7785751971996375e-06, "loss": 0.0223, "step": 67155 }, { "epoch": 0.28022798774941376, "grad_norm": 0.672320841847447, "learning_rate": 3.7784345367695862e-06, "loss": 0.0282, "step": 67160 }, { "epoch": 0.280248850464404, "grad_norm": 0.5997951508843526, "learning_rate": 3.778293892046951e-06, "loss": 0.0208, "step": 67165 }, { "epoch": 0.2802697131793943, "grad_norm": 1.0656008560902814, "learning_rate": 3.7781532630288077e-06, "loss": 0.0326, "step": 67170 }, { "epoch": 0.2802905758943846, "grad_norm": 0.5237349148026145, "learning_rate": 3.7780126497122345e-06, "loss": 0.0292, "step": 67175 }, { "epoch": 0.28031143860937485, "grad_norm": 0.6863011771066599, "learning_rate": 3.7778720520943096e-06, "loss": 0.0313, "step": 67180 }, { "epoch": 0.28033230132436515, "grad_norm": 0.6490660861761732, "learning_rate": 3.7777314701721123e-06, "loss": 0.0257, "step": 67185 }, { "epoch": 0.28035316403935545, "grad_norm": 0.7257600986830374, "learning_rate": 3.777590903942722e-06, "loss": 0.0319, "step": 67190 }, { "epoch": 0.2803740267543457, "grad_norm": 0.8595911852471388, "learning_rate": 3.77745035340322e-06, "loss": 0.0352, "step": 67195 }, { "epoch": 0.280394889469336, "grad_norm": 0.5026956001451786, "learning_rate": 3.7773098185506875e-06, "loss": 0.03, "step": 67200 }, { "epoch": 0.28041575218432624, "grad_norm": 1.1808063328443343, "learning_rate": 3.7771692993822064e-06, "loss": 0.0273, "step": 67205 }, { "epoch": 0.28043661489931654, "grad_norm": 0.6472182284157413, "learning_rate": 3.7770287958948598e-06, "loss": 0.0288, "step": 67210 }, { "epoch": 0.28045747761430684, "grad_norm": 0.6303798320902222, "learning_rate": 3.776888308085731e-06, "loss": 0.0311, "step": 67215 }, { "epoch": 0.2804783403292971, "grad_norm": 0.9264479240459836, "learning_rate": 3.776747835951905e-06, "loss": 0.0301, "step": 67220 }, { "epoch": 0.2804992030442874, "grad_norm": 0.890153960643246, "learning_rate": 3.7766073794904665e-06, "loss": 0.0392, "step": 67225 }, { "epoch": 0.2805200657592776, "grad_norm": 1.054113307913783, "learning_rate": 3.776466938698502e-06, "loss": 0.0343, "step": 67230 }, { "epoch": 0.2805409284742679, "grad_norm": 1.3621056237761953, "learning_rate": 3.7763265135730988e-06, "loss": 0.0276, "step": 67235 }, { "epoch": 0.2805617911892582, "grad_norm": 1.1931785374598263, "learning_rate": 3.7761861041113424e-06, "loss": 0.0293, "step": 67240 }, { "epoch": 0.28058265390424847, "grad_norm": 0.5651626841529636, "learning_rate": 3.7760457103103227e-06, "loss": 0.0354, "step": 67245 }, { "epoch": 0.28060351661923877, "grad_norm": 1.2958478665823823, "learning_rate": 3.7759053321671272e-06, "loss": 0.0334, "step": 67250 }, { "epoch": 0.280624379334229, "grad_norm": 1.124435209754677, "learning_rate": 3.7757649696788467e-06, "loss": 0.0251, "step": 67255 }, { "epoch": 0.2806452420492193, "grad_norm": 0.6822559570972441, "learning_rate": 3.7756246228425725e-06, "loss": 0.0247, "step": 67260 }, { "epoch": 0.2806661047642096, "grad_norm": 0.6587786763568508, "learning_rate": 3.7754842916553947e-06, "loss": 0.0307, "step": 67265 }, { "epoch": 0.28068696747919986, "grad_norm": 0.4508428383194307, "learning_rate": 3.775343976114405e-06, "loss": 0.0257, "step": 67270 }, { "epoch": 0.28070783019419016, "grad_norm": 0.6894006548762427, "learning_rate": 3.7752036762166965e-06, "loss": 0.0253, "step": 67275 }, { "epoch": 0.28072869290918046, "grad_norm": 0.7671981922069748, "learning_rate": 3.775063391959363e-06, "loss": 0.0241, "step": 67280 }, { "epoch": 0.2807495556241707, "grad_norm": 1.0179068814002652, "learning_rate": 3.774923123339499e-06, "loss": 0.0383, "step": 67285 }, { "epoch": 0.280770418339161, "grad_norm": 1.0620200471696055, "learning_rate": 3.7747828703541993e-06, "loss": 0.0292, "step": 67290 }, { "epoch": 0.28079128105415124, "grad_norm": 0.8672104411686177, "learning_rate": 3.7746426330005605e-06, "loss": 0.0506, "step": 67295 }, { "epoch": 0.28081214376914154, "grad_norm": 1.1204114118428392, "learning_rate": 3.7745024112756766e-06, "loss": 0.0326, "step": 67300 }, { "epoch": 0.28083300648413184, "grad_norm": 0.620520313679588, "learning_rate": 3.7743622051766477e-06, "loss": 0.0248, "step": 67305 }, { "epoch": 0.2808538691991221, "grad_norm": 0.8363928262392903, "learning_rate": 3.7742220147005698e-06, "loss": 0.0286, "step": 67310 }, { "epoch": 0.2808747319141124, "grad_norm": 0.26324705810245136, "learning_rate": 3.7740818398445434e-06, "loss": 0.0235, "step": 67315 }, { "epoch": 0.28089559462910263, "grad_norm": 1.2523596812543079, "learning_rate": 3.773941680605667e-06, "loss": 0.0351, "step": 67320 }, { "epoch": 0.28091645734409293, "grad_norm": 0.6226865857662998, "learning_rate": 3.7738015369810414e-06, "loss": 0.0241, "step": 67325 }, { "epoch": 0.28093732005908323, "grad_norm": 1.755223050425833, "learning_rate": 3.7736614089677675e-06, "loss": 0.033, "step": 67330 }, { "epoch": 0.2809581827740735, "grad_norm": 1.872532340352745, "learning_rate": 3.773521296562947e-06, "loss": 0.0365, "step": 67335 }, { "epoch": 0.2809790454890638, "grad_norm": 0.9991361144201879, "learning_rate": 3.7733811997636826e-06, "loss": 0.0331, "step": 67340 }, { "epoch": 0.280999908204054, "grad_norm": 0.7089111262977982, "learning_rate": 3.773241118567078e-06, "loss": 0.0319, "step": 67345 }, { "epoch": 0.2810207709190443, "grad_norm": 0.45484833460135493, "learning_rate": 3.7731010529702368e-06, "loss": 0.0296, "step": 67350 }, { "epoch": 0.2810416336340346, "grad_norm": 0.8955118998310793, "learning_rate": 3.772961002970264e-06, "loss": 0.0293, "step": 67355 }, { "epoch": 0.28106249634902486, "grad_norm": 0.7004676184381305, "learning_rate": 3.772820968564265e-06, "loss": 0.0292, "step": 67360 }, { "epoch": 0.28108335906401516, "grad_norm": 0.5783681550905757, "learning_rate": 3.7726809497493465e-06, "loss": 0.0265, "step": 67365 }, { "epoch": 0.28110422177900546, "grad_norm": 0.7035777600266231, "learning_rate": 3.7725409465226146e-06, "loss": 0.0358, "step": 67370 }, { "epoch": 0.2811250844939957, "grad_norm": 0.8805099069284578, "learning_rate": 3.772400958881179e-06, "loss": 0.0317, "step": 67375 }, { "epoch": 0.281145947208986, "grad_norm": 0.6185664570004249, "learning_rate": 3.772260986822147e-06, "loss": 0.0396, "step": 67380 }, { "epoch": 0.28116680992397625, "grad_norm": 0.7461074800167747, "learning_rate": 3.772121030342628e-06, "loss": 0.0299, "step": 67385 }, { "epoch": 0.28118767263896655, "grad_norm": 0.2631876374962334, "learning_rate": 3.7719810894397325e-06, "loss": 0.0215, "step": 67390 }, { "epoch": 0.28120853535395685, "grad_norm": 0.8452562630700182, "learning_rate": 3.771841164110571e-06, "loss": 0.0274, "step": 67395 }, { "epoch": 0.2812293980689471, "grad_norm": 0.7492722303198929, "learning_rate": 3.771701254352256e-06, "loss": 0.0326, "step": 67400 }, { "epoch": 0.2812502607839374, "grad_norm": 0.6265418089098951, "learning_rate": 3.7715613601618977e-06, "loss": 0.0357, "step": 67405 }, { "epoch": 0.28127112349892763, "grad_norm": 1.0293622981785358, "learning_rate": 3.7714214815366118e-06, "loss": 0.0269, "step": 67410 }, { "epoch": 0.28129198621391793, "grad_norm": 0.6000304039957269, "learning_rate": 3.7712816184735103e-06, "loss": 0.0205, "step": 67415 }, { "epoch": 0.28131284892890823, "grad_norm": 0.9868084754538712, "learning_rate": 3.7711417709697086e-06, "loss": 0.0323, "step": 67420 }, { "epoch": 0.2813337116438985, "grad_norm": 1.0138591596005124, "learning_rate": 3.7710019390223218e-06, "loss": 0.0311, "step": 67425 }, { "epoch": 0.2813545743588888, "grad_norm": 0.9642672370269638, "learning_rate": 3.7708621226284665e-06, "loss": 0.0286, "step": 67430 }, { "epoch": 0.281375437073879, "grad_norm": 1.196089754763262, "learning_rate": 3.7707223217852586e-06, "loss": 0.0309, "step": 67435 }, { "epoch": 0.2813962997888693, "grad_norm": 1.1377229116670011, "learning_rate": 3.7705825364898164e-06, "loss": 0.0286, "step": 67440 }, { "epoch": 0.2814171625038596, "grad_norm": 0.768138060371576, "learning_rate": 3.7704427667392584e-06, "loss": 0.0338, "step": 67445 }, { "epoch": 0.28143802521884986, "grad_norm": 0.6384908998654296, "learning_rate": 3.7703030125307035e-06, "loss": 0.0282, "step": 67450 }, { "epoch": 0.28145888793384016, "grad_norm": 0.9125629421666595, "learning_rate": 3.7701632738612703e-06, "loss": 0.0292, "step": 67455 }, { "epoch": 0.28147975064883046, "grad_norm": 0.7773044953143625, "learning_rate": 3.770023550728081e-06, "loss": 0.0258, "step": 67460 }, { "epoch": 0.2815006133638207, "grad_norm": 0.7182760741492992, "learning_rate": 3.769883843128257e-06, "loss": 0.0271, "step": 67465 }, { "epoch": 0.281521476078811, "grad_norm": 0.7658554415698317, "learning_rate": 3.7697441510589195e-06, "loss": 0.0318, "step": 67470 }, { "epoch": 0.28154233879380125, "grad_norm": 0.6774642248866194, "learning_rate": 3.7696044745171913e-06, "loss": 0.0331, "step": 67475 }, { "epoch": 0.28156320150879155, "grad_norm": 0.5739373879354313, "learning_rate": 3.7694648135001965e-06, "loss": 0.0224, "step": 67480 }, { "epoch": 0.28158406422378185, "grad_norm": 1.1185618262491095, "learning_rate": 3.769325168005059e-06, "loss": 0.0251, "step": 67485 }, { "epoch": 0.2816049269387721, "grad_norm": 0.9646405580086067, "learning_rate": 3.769185538028904e-06, "loss": 0.0282, "step": 67490 }, { "epoch": 0.2816257896537624, "grad_norm": 0.9708866386095774, "learning_rate": 3.7690459235688577e-06, "loss": 0.0211, "step": 67495 }, { "epoch": 0.28164665236875264, "grad_norm": 0.6591547241525851, "learning_rate": 3.768906324622046e-06, "loss": 0.0269, "step": 67500 }, { "epoch": 0.28166751508374294, "grad_norm": 0.8070789577151073, "learning_rate": 3.7687667411855965e-06, "loss": 0.0301, "step": 67505 }, { "epoch": 0.28168837779873324, "grad_norm": 0.8466053838921205, "learning_rate": 3.768627173256637e-06, "loss": 0.0255, "step": 67510 }, { "epoch": 0.2817092405137235, "grad_norm": 1.698679701347587, "learning_rate": 3.7684876208322967e-06, "loss": 0.0317, "step": 67515 }, { "epoch": 0.2817301032287138, "grad_norm": 0.5860553977114562, "learning_rate": 3.7683480839097054e-06, "loss": 0.0273, "step": 67520 }, { "epoch": 0.281750965943704, "grad_norm": 0.9928217698146743, "learning_rate": 3.7682085624859925e-06, "loss": 0.0295, "step": 67525 }, { "epoch": 0.2817718286586943, "grad_norm": 0.8404740749941508, "learning_rate": 3.7680690565582895e-06, "loss": 0.023, "step": 67530 }, { "epoch": 0.2817926913736846, "grad_norm": 0.628692211500946, "learning_rate": 3.7679295661237285e-06, "loss": 0.0317, "step": 67535 }, { "epoch": 0.28181355408867487, "grad_norm": 0.5084206605767204, "learning_rate": 3.7677900911794413e-06, "loss": 0.0242, "step": 67540 }, { "epoch": 0.28183441680366517, "grad_norm": 0.7473688316751754, "learning_rate": 3.7676506317225616e-06, "loss": 0.0247, "step": 67545 }, { "epoch": 0.28185527951865547, "grad_norm": 0.4717397190499318, "learning_rate": 3.767511187750223e-06, "loss": 0.0285, "step": 67550 }, { "epoch": 0.2818761422336457, "grad_norm": 0.30281456817045516, "learning_rate": 3.7673717592595604e-06, "loss": 0.0162, "step": 67555 }, { "epoch": 0.281897004948636, "grad_norm": 1.0363357313401618, "learning_rate": 3.7672323462477088e-06, "loss": 0.0314, "step": 67560 }, { "epoch": 0.28191786766362625, "grad_norm": 1.9717373664266198, "learning_rate": 3.7670929487118056e-06, "loss": 0.0273, "step": 67565 }, { "epoch": 0.28193873037861655, "grad_norm": 0.8282332142846577, "learning_rate": 3.7669535666489878e-06, "loss": 0.04, "step": 67570 }, { "epoch": 0.28195959309360685, "grad_norm": 1.0831395253438068, "learning_rate": 3.766814200056391e-06, "loss": 0.0289, "step": 67575 }, { "epoch": 0.2819804558085971, "grad_norm": 0.7966146421812257, "learning_rate": 3.766674848931156e-06, "loss": 0.0222, "step": 67580 }, { "epoch": 0.2820013185235874, "grad_norm": 0.4974869751281322, "learning_rate": 3.766535513270421e-06, "loss": 0.0268, "step": 67585 }, { "epoch": 0.28202218123857764, "grad_norm": 0.8604416176623413, "learning_rate": 3.7663961930713262e-06, "loss": 0.0248, "step": 67590 }, { "epoch": 0.28204304395356794, "grad_norm": 0.8455113479506464, "learning_rate": 3.7662568883310113e-06, "loss": 0.0258, "step": 67595 }, { "epoch": 0.28206390666855824, "grad_norm": 0.9056023363102117, "learning_rate": 3.766117599046619e-06, "loss": 0.0252, "step": 67600 }, { "epoch": 0.2820847693835485, "grad_norm": 1.6789931907598605, "learning_rate": 3.7659783252152904e-06, "loss": 0.0379, "step": 67605 }, { "epoch": 0.2821056320985388, "grad_norm": 0.5403030874710436, "learning_rate": 3.7658390668341695e-06, "loss": 0.0322, "step": 67610 }, { "epoch": 0.282126494813529, "grad_norm": 1.179161208144174, "learning_rate": 3.765699823900398e-06, "loss": 0.0465, "step": 67615 }, { "epoch": 0.2821473575285193, "grad_norm": 0.5982316485837595, "learning_rate": 3.765560596411123e-06, "loss": 0.0261, "step": 67620 }, { "epoch": 0.2821682202435096, "grad_norm": 0.6548395796647765, "learning_rate": 3.7654213843634873e-06, "loss": 0.0299, "step": 67625 }, { "epoch": 0.28218908295849987, "grad_norm": 0.869893626784917, "learning_rate": 3.7652821877546382e-06, "loss": 0.0407, "step": 67630 }, { "epoch": 0.28220994567349017, "grad_norm": 0.7277485182351731, "learning_rate": 3.7651430065817207e-06, "loss": 0.0307, "step": 67635 }, { "epoch": 0.28223080838848047, "grad_norm": 0.8324794148757015, "learning_rate": 3.7650038408418837e-06, "loss": 0.0343, "step": 67640 }, { "epoch": 0.2822516711034707, "grad_norm": 0.9864313626743391, "learning_rate": 3.764864690532274e-06, "loss": 0.0337, "step": 67645 }, { "epoch": 0.282272533818461, "grad_norm": 0.5739567794944076, "learning_rate": 3.764725555650042e-06, "loss": 0.0283, "step": 67650 }, { "epoch": 0.28229339653345126, "grad_norm": 0.5155874400604575, "learning_rate": 3.7645864361923356e-06, "loss": 0.0261, "step": 67655 }, { "epoch": 0.28231425924844156, "grad_norm": 0.37589697079014733, "learning_rate": 3.7644473321563056e-06, "loss": 0.0283, "step": 67660 }, { "epoch": 0.28233512196343186, "grad_norm": 0.8633029121705618, "learning_rate": 3.764308243539103e-06, "loss": 0.034, "step": 67665 }, { "epoch": 0.2823559846784221, "grad_norm": 1.2488743760838859, "learning_rate": 3.7641691703378807e-06, "loss": 0.0322, "step": 67670 }, { "epoch": 0.2823768473934124, "grad_norm": 0.8426901905125717, "learning_rate": 3.764030112549789e-06, "loss": 0.0416, "step": 67675 }, { "epoch": 0.28239771010840264, "grad_norm": 0.4103684721016286, "learning_rate": 3.763891070171983e-06, "loss": 0.0346, "step": 67680 }, { "epoch": 0.28241857282339294, "grad_norm": 1.1112374800981042, "learning_rate": 3.7637520432016155e-06, "loss": 0.0328, "step": 67685 }, { "epoch": 0.28243943553838324, "grad_norm": 0.8883429548055729, "learning_rate": 3.7636130316358416e-06, "loss": 0.0328, "step": 67690 }, { "epoch": 0.2824602982533735, "grad_norm": 1.0320000866183623, "learning_rate": 3.7634740354718164e-06, "loss": 0.0343, "step": 67695 }, { "epoch": 0.2824811609683638, "grad_norm": 0.48168939875236255, "learning_rate": 3.7633350547066965e-06, "loss": 0.0256, "step": 67700 }, { "epoch": 0.28250202368335403, "grad_norm": 0.4250654424398107, "learning_rate": 3.763196089337639e-06, "loss": 0.0316, "step": 67705 }, { "epoch": 0.28252288639834433, "grad_norm": 0.8028806560924109, "learning_rate": 3.763057139361801e-06, "loss": 0.029, "step": 67710 }, { "epoch": 0.28254374911333463, "grad_norm": 0.7710053649905566, "learning_rate": 3.762918204776341e-06, "loss": 0.0293, "step": 67715 }, { "epoch": 0.2825646118283249, "grad_norm": 0.6150663296806644, "learning_rate": 3.7627792855784184e-06, "loss": 0.0268, "step": 67720 }, { "epoch": 0.2825854745433152, "grad_norm": 0.7899943552223899, "learning_rate": 3.762640381765192e-06, "loss": 0.04, "step": 67725 }, { "epoch": 0.2826063372583055, "grad_norm": 0.6292899077442641, "learning_rate": 3.7625014933338235e-06, "loss": 0.022, "step": 67730 }, { "epoch": 0.2826271999732957, "grad_norm": 0.8986329054096972, "learning_rate": 3.7623626202814734e-06, "loss": 0.0252, "step": 67735 }, { "epoch": 0.282648062688286, "grad_norm": 0.7515523500402322, "learning_rate": 3.7622237626053047e-06, "loss": 0.0387, "step": 67740 }, { "epoch": 0.28266892540327626, "grad_norm": 0.5482525854649277, "learning_rate": 3.7620849203024796e-06, "loss": 0.0223, "step": 67745 }, { "epoch": 0.28268978811826656, "grad_norm": 1.1622810439558262, "learning_rate": 3.761946093370161e-06, "loss": 0.0347, "step": 67750 }, { "epoch": 0.28271065083325686, "grad_norm": 0.7951833421586074, "learning_rate": 3.7618072818055145e-06, "loss": 0.0282, "step": 67755 }, { "epoch": 0.2827315135482471, "grad_norm": 0.5473667635978176, "learning_rate": 3.7616684856057038e-06, "loss": 0.0277, "step": 67760 }, { "epoch": 0.2827523762632374, "grad_norm": 0.7528527879447915, "learning_rate": 3.761529704767895e-06, "loss": 0.0346, "step": 67765 }, { "epoch": 0.28277323897822765, "grad_norm": 0.6535947386782596, "learning_rate": 3.7613909392892558e-06, "loss": 0.0251, "step": 67770 }, { "epoch": 0.28279410169321795, "grad_norm": 0.6799247027696007, "learning_rate": 3.7612521891669506e-06, "loss": 0.0292, "step": 67775 }, { "epoch": 0.28281496440820825, "grad_norm": 0.44837089861953844, "learning_rate": 3.7611134543981493e-06, "loss": 0.0324, "step": 67780 }, { "epoch": 0.2828358271231985, "grad_norm": 1.0210937351572928, "learning_rate": 3.76097473498002e-06, "loss": 0.0394, "step": 67785 }, { "epoch": 0.2828566898381888, "grad_norm": 1.3646266216943728, "learning_rate": 3.7608360309097325e-06, "loss": 0.0343, "step": 67790 }, { "epoch": 0.28287755255317903, "grad_norm": 0.6955070242433959, "learning_rate": 3.760697342184456e-06, "loss": 0.0435, "step": 67795 }, { "epoch": 0.28289841526816933, "grad_norm": 0.9052563685617222, "learning_rate": 3.7605586688013622e-06, "loss": 0.0246, "step": 67800 }, { "epoch": 0.28291927798315963, "grad_norm": 1.0202472739939017, "learning_rate": 3.7604200107576222e-06, "loss": 0.0294, "step": 67805 }, { "epoch": 0.2829401406981499, "grad_norm": 0.8595351477831362, "learning_rate": 3.7602813680504086e-06, "loss": 0.0314, "step": 67810 }, { "epoch": 0.2829610034131402, "grad_norm": 0.6827158902770414, "learning_rate": 3.760142740676893e-06, "loss": 0.0325, "step": 67815 }, { "epoch": 0.2829818661281305, "grad_norm": 0.6175168410657441, "learning_rate": 3.760004128634251e-06, "loss": 0.0271, "step": 67820 }, { "epoch": 0.2830027288431207, "grad_norm": 0.5389596168338783, "learning_rate": 3.7598655319196565e-06, "loss": 0.0323, "step": 67825 }, { "epoch": 0.283023591558111, "grad_norm": 0.6237425137560028, "learning_rate": 3.759726950530285e-06, "loss": 0.0322, "step": 67830 }, { "epoch": 0.28304445427310126, "grad_norm": 0.681452759621641, "learning_rate": 3.7595883844633107e-06, "loss": 0.027, "step": 67835 }, { "epoch": 0.28306531698809156, "grad_norm": 0.7516035828108495, "learning_rate": 3.759449833715912e-06, "loss": 0.0281, "step": 67840 }, { "epoch": 0.28308617970308186, "grad_norm": 2.1128753707447716, "learning_rate": 3.7593112982852653e-06, "loss": 0.0301, "step": 67845 }, { "epoch": 0.2831070424180721, "grad_norm": 1.2220054521492176, "learning_rate": 3.75917277816855e-06, "loss": 0.0254, "step": 67850 }, { "epoch": 0.2831279051330624, "grad_norm": 0.7427714002339735, "learning_rate": 3.759034273362943e-06, "loss": 0.0348, "step": 67855 }, { "epoch": 0.28314876784805265, "grad_norm": 0.7680084983230104, "learning_rate": 3.758895783865626e-06, "loss": 0.029, "step": 67860 }, { "epoch": 0.28316963056304295, "grad_norm": 0.7200116846724085, "learning_rate": 3.758757309673778e-06, "loss": 0.0347, "step": 67865 }, { "epoch": 0.28319049327803325, "grad_norm": 0.5037368018038547, "learning_rate": 3.7586188507845794e-06, "loss": 0.0203, "step": 67870 }, { "epoch": 0.2832113559930235, "grad_norm": 0.7905297870403163, "learning_rate": 3.7584804071952134e-06, "loss": 0.0332, "step": 67875 }, { "epoch": 0.2832322187080138, "grad_norm": 0.8581781644117209, "learning_rate": 3.758341978902862e-06, "loss": 0.0303, "step": 67880 }, { "epoch": 0.28325308142300404, "grad_norm": 0.7520317260671058, "learning_rate": 3.7582035659047074e-06, "loss": 0.0286, "step": 67885 }, { "epoch": 0.28327394413799434, "grad_norm": 0.9907362479626713, "learning_rate": 3.758065168197935e-06, "loss": 0.0401, "step": 67890 }, { "epoch": 0.28329480685298464, "grad_norm": 1.6687013638626933, "learning_rate": 3.757926785779729e-06, "loss": 0.0353, "step": 67895 }, { "epoch": 0.2833156695679749, "grad_norm": 0.8969183154436945, "learning_rate": 3.757788418647274e-06, "loss": 0.0377, "step": 67900 }, { "epoch": 0.2833365322829652, "grad_norm": 0.7672285244866388, "learning_rate": 3.7576500667977568e-06, "loss": 0.0281, "step": 67905 }, { "epoch": 0.2833573949979555, "grad_norm": 0.410524299972749, "learning_rate": 3.7575117302283635e-06, "loss": 0.0281, "step": 67910 }, { "epoch": 0.2833782577129457, "grad_norm": 0.7047688478318163, "learning_rate": 3.7573734089362834e-06, "loss": 0.0284, "step": 67915 }, { "epoch": 0.283399120427936, "grad_norm": 0.9091194339347047, "learning_rate": 3.7572351029187032e-06, "loss": 0.0354, "step": 67920 }, { "epoch": 0.28341998314292627, "grad_norm": 0.9386694760177919, "learning_rate": 3.757096812172812e-06, "loss": 0.0458, "step": 67925 }, { "epoch": 0.28344084585791657, "grad_norm": 0.5219286919931413, "learning_rate": 3.7569585366957996e-06, "loss": 0.0238, "step": 67930 }, { "epoch": 0.28346170857290687, "grad_norm": 1.2520980532023482, "learning_rate": 3.756820276484857e-06, "loss": 0.0272, "step": 67935 }, { "epoch": 0.2834825712878971, "grad_norm": 0.6475993260694143, "learning_rate": 3.756682031537175e-06, "loss": 0.0309, "step": 67940 }, { "epoch": 0.2835034340028874, "grad_norm": 0.7704970382393155, "learning_rate": 3.756543801849945e-06, "loss": 0.046, "step": 67945 }, { "epoch": 0.28352429671787766, "grad_norm": 1.4394599243208375, "learning_rate": 3.756405587420361e-06, "loss": 0.0323, "step": 67950 }, { "epoch": 0.28354515943286795, "grad_norm": 0.5800943221029131, "learning_rate": 3.7562673882456148e-06, "loss": 0.0274, "step": 67955 }, { "epoch": 0.28356602214785825, "grad_norm": 1.0802649883038742, "learning_rate": 3.756129204322901e-06, "loss": 0.0279, "step": 67960 }, { "epoch": 0.2835868848628485, "grad_norm": 0.8070616223494834, "learning_rate": 3.7559910356494145e-06, "loss": 0.0299, "step": 67965 }, { "epoch": 0.2836077475778388, "grad_norm": 0.847698740212497, "learning_rate": 3.755852882222351e-06, "loss": 0.0395, "step": 67970 }, { "epoch": 0.28362861029282904, "grad_norm": 0.4439518295564232, "learning_rate": 3.7557147440389063e-06, "loss": 0.023, "step": 67975 }, { "epoch": 0.28364947300781934, "grad_norm": 1.1856936735898107, "learning_rate": 3.755576621096278e-06, "loss": 0.0411, "step": 67980 }, { "epoch": 0.28367033572280964, "grad_norm": 0.9159192819531293, "learning_rate": 3.7554385133916633e-06, "loss": 0.0363, "step": 67985 }, { "epoch": 0.2836911984377999, "grad_norm": 0.8013635021110908, "learning_rate": 3.7553004209222597e-06, "loss": 0.0298, "step": 67990 }, { "epoch": 0.2837120611527902, "grad_norm": 0.9081423065068075, "learning_rate": 3.755162343685268e-06, "loss": 0.0249, "step": 67995 }, { "epoch": 0.2837329238677805, "grad_norm": 0.8707247684770772, "learning_rate": 3.755024281677887e-06, "loss": 0.0233, "step": 68000 }, { "epoch": 0.28375378658277073, "grad_norm": 1.23267546387824, "learning_rate": 3.7548862348973176e-06, "loss": 0.0302, "step": 68005 }, { "epoch": 0.28377464929776103, "grad_norm": 0.5935992257945247, "learning_rate": 3.7547482033407612e-06, "loss": 0.0287, "step": 68010 }, { "epoch": 0.28379551201275127, "grad_norm": 2.320450271817563, "learning_rate": 3.754610187005419e-06, "loss": 0.0403, "step": 68015 }, { "epoch": 0.28381637472774157, "grad_norm": 0.36847280723135734, "learning_rate": 3.754472185888495e-06, "loss": 0.024, "step": 68020 }, { "epoch": 0.28383723744273187, "grad_norm": 0.8303580620133058, "learning_rate": 3.7543341999871913e-06, "loss": 0.03, "step": 68025 }, { "epoch": 0.2838581001577221, "grad_norm": 0.7052325982914991, "learning_rate": 3.7541962292987127e-06, "loss": 0.0226, "step": 68030 }, { "epoch": 0.2838789628727124, "grad_norm": 0.6878624517315746, "learning_rate": 3.7540582738202645e-06, "loss": 0.0344, "step": 68035 }, { "epoch": 0.28389982558770266, "grad_norm": 0.6126455462647341, "learning_rate": 3.7539203335490516e-06, "loss": 0.0262, "step": 68040 }, { "epoch": 0.28392068830269296, "grad_norm": 0.9555314232237679, "learning_rate": 3.7537824084822803e-06, "loss": 0.0377, "step": 68045 }, { "epoch": 0.28394155101768326, "grad_norm": 0.8274969348406841, "learning_rate": 3.753644498617158e-06, "loss": 0.0254, "step": 68050 }, { "epoch": 0.2839624137326735, "grad_norm": 0.566756525785319, "learning_rate": 3.7535066039508924e-06, "loss": 0.0337, "step": 68055 }, { "epoch": 0.2839832764476638, "grad_norm": 0.8769670992849464, "learning_rate": 3.753368724480692e-06, "loss": 0.0239, "step": 68060 }, { "epoch": 0.28400413916265405, "grad_norm": 0.6250739929377923, "learning_rate": 3.7532308602037653e-06, "loss": 0.0255, "step": 68065 }, { "epoch": 0.28402500187764435, "grad_norm": 0.4676177009651796, "learning_rate": 3.7530930111173237e-06, "loss": 0.0263, "step": 68070 }, { "epoch": 0.28404586459263464, "grad_norm": 1.0035264860223485, "learning_rate": 3.752955177218576e-06, "loss": 0.0291, "step": 68075 }, { "epoch": 0.2840667273076249, "grad_norm": 0.6654918080295047, "learning_rate": 3.7528173585047346e-06, "loss": 0.0307, "step": 68080 }, { "epoch": 0.2840875900226152, "grad_norm": 0.6961814118612245, "learning_rate": 3.7526795549730116e-06, "loss": 0.026, "step": 68085 }, { "epoch": 0.2841084527376055, "grad_norm": 0.7195247569352677, "learning_rate": 3.7525417666206187e-06, "loss": 0.0329, "step": 68090 }, { "epoch": 0.28412931545259573, "grad_norm": 1.44523794326602, "learning_rate": 3.752403993444771e-06, "loss": 0.044, "step": 68095 }, { "epoch": 0.28415017816758603, "grad_norm": 1.0385519737679128, "learning_rate": 3.752266235442682e-06, "loss": 0.0368, "step": 68100 }, { "epoch": 0.2841710408825763, "grad_norm": 1.065145488676698, "learning_rate": 3.7521284926115663e-06, "loss": 0.0405, "step": 68105 }, { "epoch": 0.2841919035975666, "grad_norm": 1.1328538003720823, "learning_rate": 3.75199076494864e-06, "loss": 0.0348, "step": 68110 }, { "epoch": 0.2842127663125569, "grad_norm": 0.9587371647819777, "learning_rate": 3.7518530524511187e-06, "loss": 0.0264, "step": 68115 }, { "epoch": 0.2842336290275471, "grad_norm": 0.9090793132706286, "learning_rate": 3.75171535511622e-06, "loss": 0.034, "step": 68120 }, { "epoch": 0.2842544917425374, "grad_norm": 0.896655933316112, "learning_rate": 3.7515776729411617e-06, "loss": 0.0292, "step": 68125 }, { "epoch": 0.28427535445752766, "grad_norm": 0.8166081726213628, "learning_rate": 3.7514400059231625e-06, "loss": 0.0379, "step": 68130 }, { "epoch": 0.28429621717251796, "grad_norm": 0.7081436821150906, "learning_rate": 3.7513023540594412e-06, "loss": 0.0319, "step": 68135 }, { "epoch": 0.28431707988750826, "grad_norm": 0.5688516091939453, "learning_rate": 3.751164717347218e-06, "loss": 0.0253, "step": 68140 }, { "epoch": 0.2843379426024985, "grad_norm": 1.0151981580670286, "learning_rate": 3.751027095783713e-06, "loss": 0.0293, "step": 68145 }, { "epoch": 0.2843588053174888, "grad_norm": 0.6600695907501319, "learning_rate": 3.750889489366148e-06, "loss": 0.0375, "step": 68150 }, { "epoch": 0.28437966803247905, "grad_norm": 1.263900302970041, "learning_rate": 3.7507518980917446e-06, "loss": 0.0356, "step": 68155 }, { "epoch": 0.28440053074746935, "grad_norm": 0.828553807920367, "learning_rate": 3.750614321957726e-06, "loss": 0.0355, "step": 68160 }, { "epoch": 0.28442139346245965, "grad_norm": 0.8121350397728787, "learning_rate": 3.7504767609613166e-06, "loss": 0.0326, "step": 68165 }, { "epoch": 0.2844422561774499, "grad_norm": 1.0597685153074892, "learning_rate": 3.7503392150997394e-06, "loss": 0.0235, "step": 68170 }, { "epoch": 0.2844631188924402, "grad_norm": 1.021667245616708, "learning_rate": 3.750201684370219e-06, "loss": 0.0275, "step": 68175 }, { "epoch": 0.2844839816074305, "grad_norm": 0.7346996731496012, "learning_rate": 3.7500641687699813e-06, "loss": 0.0313, "step": 68180 }, { "epoch": 0.28450484432242074, "grad_norm": 1.5917323794483862, "learning_rate": 3.7499266682962536e-06, "loss": 0.0259, "step": 68185 }, { "epoch": 0.28452570703741104, "grad_norm": 0.7839772626661146, "learning_rate": 3.749789182946262e-06, "loss": 0.0289, "step": 68190 }, { "epoch": 0.2845465697524013, "grad_norm": 1.0571122459082258, "learning_rate": 3.7496517127172345e-06, "loss": 0.0351, "step": 68195 }, { "epoch": 0.2845674324673916, "grad_norm": 1.2201714569897224, "learning_rate": 3.749514257606399e-06, "loss": 0.0337, "step": 68200 }, { "epoch": 0.2845882951823819, "grad_norm": 0.7066015777328342, "learning_rate": 3.749376817610986e-06, "loss": 0.0222, "step": 68205 }, { "epoch": 0.2846091578973721, "grad_norm": 0.9509917548729405, "learning_rate": 3.7492393927282245e-06, "loss": 0.0365, "step": 68210 }, { "epoch": 0.2846300206123624, "grad_norm": 0.9868962789803716, "learning_rate": 3.7491019829553456e-06, "loss": 0.0296, "step": 68215 }, { "epoch": 0.28465088332735267, "grad_norm": 0.6614575682583237, "learning_rate": 3.7489645882895797e-06, "loss": 0.0269, "step": 68220 }, { "epoch": 0.28467174604234297, "grad_norm": 3.5362070564642187, "learning_rate": 3.7488272087281595e-06, "loss": 0.0393, "step": 68225 }, { "epoch": 0.28469260875733327, "grad_norm": 0.665963120185677, "learning_rate": 3.748689844268318e-06, "loss": 0.0293, "step": 68230 }, { "epoch": 0.2847134714723235, "grad_norm": 0.807578351684666, "learning_rate": 3.748552494907288e-06, "loss": 0.0315, "step": 68235 }, { "epoch": 0.2847343341873138, "grad_norm": 0.6575889533841285, "learning_rate": 3.7484151606423037e-06, "loss": 0.0298, "step": 68240 }, { "epoch": 0.28475519690230405, "grad_norm": 0.81845210019355, "learning_rate": 3.748277841470601e-06, "loss": 0.0254, "step": 68245 }, { "epoch": 0.28477605961729435, "grad_norm": 0.7826979856142583, "learning_rate": 3.7481405373894142e-06, "loss": 0.0347, "step": 68250 }, { "epoch": 0.28479692233228465, "grad_norm": 0.6699824657953456, "learning_rate": 3.74800324839598e-06, "loss": 0.0213, "step": 68255 }, { "epoch": 0.2848177850472749, "grad_norm": 0.7309622518966156, "learning_rate": 3.747865974487535e-06, "loss": 0.035, "step": 68260 }, { "epoch": 0.2848386477622652, "grad_norm": 0.7239825554806368, "learning_rate": 3.747728715661318e-06, "loss": 0.0316, "step": 68265 }, { "epoch": 0.2848595104772555, "grad_norm": 1.186910856347485, "learning_rate": 3.747591471914566e-06, "loss": 0.0354, "step": 68270 }, { "epoch": 0.28488037319224574, "grad_norm": 0.7293740009194407, "learning_rate": 3.74745424324452e-06, "loss": 0.03, "step": 68275 }, { "epoch": 0.28490123590723604, "grad_norm": 0.3779573009351396, "learning_rate": 3.747317029648418e-06, "loss": 0.0268, "step": 68280 }, { "epoch": 0.2849220986222263, "grad_norm": 0.7312988116104949, "learning_rate": 3.7471798311235013e-06, "loss": 0.03, "step": 68285 }, { "epoch": 0.2849429613372166, "grad_norm": 1.0375210505955987, "learning_rate": 3.74704264766701e-06, "loss": 0.0319, "step": 68290 }, { "epoch": 0.2849638240522069, "grad_norm": 2.6650208956392265, "learning_rate": 3.746905479276189e-06, "loss": 0.0444, "step": 68295 }, { "epoch": 0.2849846867671971, "grad_norm": 0.6047910727993194, "learning_rate": 3.746768325948277e-06, "loss": 0.0209, "step": 68300 }, { "epoch": 0.2850055494821874, "grad_norm": 0.8019762003444947, "learning_rate": 3.7466311876805215e-06, "loss": 0.0224, "step": 68305 }, { "epoch": 0.28502641219717767, "grad_norm": 0.6988091701155292, "learning_rate": 3.746494064470163e-06, "loss": 0.0278, "step": 68310 }, { "epoch": 0.28504727491216797, "grad_norm": 0.5795105040275593, "learning_rate": 3.7463569563144482e-06, "loss": 0.0216, "step": 68315 }, { "epoch": 0.28506813762715827, "grad_norm": 0.5656560954547775, "learning_rate": 3.7462198632106215e-06, "loss": 0.0313, "step": 68320 }, { "epoch": 0.2850890003421485, "grad_norm": 0.6865593149390269, "learning_rate": 3.7460827851559304e-06, "loss": 0.0298, "step": 68325 }, { "epoch": 0.2851098630571388, "grad_norm": 0.734541044808865, "learning_rate": 3.7459457221476214e-06, "loss": 0.0297, "step": 68330 }, { "epoch": 0.28513072577212906, "grad_norm": 0.8638635628067294, "learning_rate": 3.7458086741829406e-06, "loss": 0.0312, "step": 68335 }, { "epoch": 0.28515158848711936, "grad_norm": 1.2508953047392983, "learning_rate": 3.7456716412591386e-06, "loss": 0.0326, "step": 68340 }, { "epoch": 0.28517245120210966, "grad_norm": 0.5909802512690467, "learning_rate": 3.7455346233734622e-06, "loss": 0.0333, "step": 68345 }, { "epoch": 0.2851933139170999, "grad_norm": 0.6396155214075603, "learning_rate": 3.7453976205231627e-06, "loss": 0.0329, "step": 68350 }, { "epoch": 0.2852141766320902, "grad_norm": 0.5978836741522429, "learning_rate": 3.7452606327054902e-06, "loss": 0.0331, "step": 68355 }, { "epoch": 0.2852350393470805, "grad_norm": 0.5728922967432672, "learning_rate": 3.745123659917695e-06, "loss": 0.0317, "step": 68360 }, { "epoch": 0.28525590206207074, "grad_norm": 0.8045272795108322, "learning_rate": 3.7449867021570297e-06, "loss": 0.029, "step": 68365 }, { "epoch": 0.28527676477706104, "grad_norm": 1.2628609919201392, "learning_rate": 3.7448497594207466e-06, "loss": 0.0317, "step": 68370 }, { "epoch": 0.2852976274920513, "grad_norm": 1.4497781833785865, "learning_rate": 3.744712831706099e-06, "loss": 0.0417, "step": 68375 }, { "epoch": 0.2853184902070416, "grad_norm": 0.9808004931665527, "learning_rate": 3.7445759190103403e-06, "loss": 0.025, "step": 68380 }, { "epoch": 0.2853393529220319, "grad_norm": 0.8742696606879741, "learning_rate": 3.744439021330726e-06, "loss": 0.0293, "step": 68385 }, { "epoch": 0.28536021563702213, "grad_norm": 1.116122730863087, "learning_rate": 3.7443021386645107e-06, "loss": 0.0393, "step": 68390 }, { "epoch": 0.28538107835201243, "grad_norm": 0.6194437040523986, "learning_rate": 3.7441652710089514e-06, "loss": 0.0277, "step": 68395 }, { "epoch": 0.2854019410670027, "grad_norm": 0.5025079496580221, "learning_rate": 3.7440284183613028e-06, "loss": 0.0299, "step": 68400 }, { "epoch": 0.285422803781993, "grad_norm": 0.9948581834405749, "learning_rate": 3.7438915807188254e-06, "loss": 0.033, "step": 68405 }, { "epoch": 0.2854436664969833, "grad_norm": 1.2299600796072834, "learning_rate": 3.743754758078775e-06, "loss": 0.0263, "step": 68410 }, { "epoch": 0.2854645292119735, "grad_norm": 1.1231006872449094, "learning_rate": 3.74361795043841e-06, "loss": 0.0474, "step": 68415 }, { "epoch": 0.2854853919269638, "grad_norm": 3.118005869297382, "learning_rate": 3.743481157794992e-06, "loss": 0.0244, "step": 68420 }, { "epoch": 0.28550625464195406, "grad_norm": 0.4831173009678367, "learning_rate": 3.743344380145781e-06, "loss": 0.0307, "step": 68425 }, { "epoch": 0.28552711735694436, "grad_norm": 1.40158281483862, "learning_rate": 3.7432076174880354e-06, "loss": 0.0323, "step": 68430 }, { "epoch": 0.28554798007193466, "grad_norm": 2.202550884939359, "learning_rate": 3.7430708698190198e-06, "loss": 0.0272, "step": 68435 }, { "epoch": 0.2855688427869249, "grad_norm": 0.7021014908663522, "learning_rate": 3.742934137135995e-06, "loss": 0.0257, "step": 68440 }, { "epoch": 0.2855897055019152, "grad_norm": 1.0619540374281493, "learning_rate": 3.742797419436224e-06, "loss": 0.0291, "step": 68445 }, { "epoch": 0.2856105682169055, "grad_norm": 0.8637406666091282, "learning_rate": 3.742660716716972e-06, "loss": 0.0313, "step": 68450 }, { "epoch": 0.28563143093189575, "grad_norm": 1.076263817391328, "learning_rate": 3.7425240289755025e-06, "loss": 0.0332, "step": 68455 }, { "epoch": 0.28565229364688605, "grad_norm": 0.9126578938989196, "learning_rate": 3.7423873562090797e-06, "loss": 0.0281, "step": 68460 }, { "epoch": 0.2856731563618763, "grad_norm": 0.7550807406198449, "learning_rate": 3.74225069841497e-06, "loss": 0.0268, "step": 68465 }, { "epoch": 0.2856940190768666, "grad_norm": 0.8597507220492637, "learning_rate": 3.7421140555904417e-06, "loss": 0.031, "step": 68470 }, { "epoch": 0.2857148817918569, "grad_norm": 0.41853669343702504, "learning_rate": 3.7419774277327596e-06, "loss": 0.0237, "step": 68475 }, { "epoch": 0.28573574450684713, "grad_norm": 0.8172435895301001, "learning_rate": 3.7418408148391927e-06, "loss": 0.0317, "step": 68480 }, { "epoch": 0.28575660722183743, "grad_norm": 0.6509842304960947, "learning_rate": 3.7417042169070094e-06, "loss": 0.0326, "step": 68485 }, { "epoch": 0.2857774699368277, "grad_norm": 0.4582105760371635, "learning_rate": 3.7415676339334795e-06, "loss": 0.0257, "step": 68490 }, { "epoch": 0.285798332651818, "grad_norm": 0.6427348937736895, "learning_rate": 3.7414310659158724e-06, "loss": 0.0232, "step": 68495 }, { "epoch": 0.2858191953668083, "grad_norm": 0.5882319260124594, "learning_rate": 3.741294512851459e-06, "loss": 0.0267, "step": 68500 }, { "epoch": 0.2858400580817985, "grad_norm": 0.9101614329826574, "learning_rate": 3.7411579747375117e-06, "loss": 0.0358, "step": 68505 }, { "epoch": 0.2858609207967888, "grad_norm": 0.8250260495214153, "learning_rate": 3.741021451571301e-06, "loss": 0.0414, "step": 68510 }, { "epoch": 0.28588178351177906, "grad_norm": 0.8738043292126505, "learning_rate": 3.740884943350101e-06, "loss": 0.035, "step": 68515 }, { "epoch": 0.28590264622676936, "grad_norm": 0.8287799019142926, "learning_rate": 3.7407484500711845e-06, "loss": 0.0296, "step": 68520 }, { "epoch": 0.28592350894175966, "grad_norm": 1.2126392389957454, "learning_rate": 3.740611971731825e-06, "loss": 0.0261, "step": 68525 }, { "epoch": 0.2859443716567499, "grad_norm": 0.730405239882414, "learning_rate": 3.7404755083292994e-06, "loss": 0.0286, "step": 68530 }, { "epoch": 0.2859652343717402, "grad_norm": 0.9802787318184771, "learning_rate": 3.740339059860882e-06, "loss": 0.0349, "step": 68535 }, { "epoch": 0.2859860970867305, "grad_norm": 1.0082304651783827, "learning_rate": 3.7402026263238493e-06, "loss": 0.0324, "step": 68540 }, { "epoch": 0.28600695980172075, "grad_norm": 1.2347671766684218, "learning_rate": 3.7400662077154785e-06, "loss": 0.0388, "step": 68545 }, { "epoch": 0.28602782251671105, "grad_norm": 0.6922919933971173, "learning_rate": 3.7399298040330467e-06, "loss": 0.0235, "step": 68550 }, { "epoch": 0.2860486852317013, "grad_norm": 1.1396441429942825, "learning_rate": 3.7397934152738336e-06, "loss": 0.0265, "step": 68555 }, { "epoch": 0.2860695479466916, "grad_norm": 0.9031424299905556, "learning_rate": 3.7396570414351165e-06, "loss": 0.036, "step": 68560 }, { "epoch": 0.2860904106616819, "grad_norm": 1.0319821824050361, "learning_rate": 3.739520682514176e-06, "loss": 0.029, "step": 68565 }, { "epoch": 0.28611127337667214, "grad_norm": 1.145011001930759, "learning_rate": 3.739384338508293e-06, "loss": 0.0458, "step": 68570 }, { "epoch": 0.28613213609166244, "grad_norm": 0.5064516369949215, "learning_rate": 3.739248009414748e-06, "loss": 0.0332, "step": 68575 }, { "epoch": 0.2861529988066527, "grad_norm": 0.6301800554546432, "learning_rate": 3.7391116952308238e-06, "loss": 0.0255, "step": 68580 }, { "epoch": 0.286173861521643, "grad_norm": 1.2763144052620057, "learning_rate": 3.7389753959538018e-06, "loss": 0.0299, "step": 68585 }, { "epoch": 0.2861947242366333, "grad_norm": 1.5725082829295336, "learning_rate": 3.7388391115809653e-06, "loss": 0.0295, "step": 68590 }, { "epoch": 0.2862155869516235, "grad_norm": 1.1123961028502733, "learning_rate": 3.7387028421095987e-06, "loss": 0.0356, "step": 68595 }, { "epoch": 0.2862364496666138, "grad_norm": 0.5415936806588474, "learning_rate": 3.738566587536987e-06, "loss": 0.0314, "step": 68600 }, { "epoch": 0.28625731238160407, "grad_norm": 0.7389669279067503, "learning_rate": 3.738430347860416e-06, "loss": 0.0352, "step": 68605 }, { "epoch": 0.28627817509659437, "grad_norm": 0.9037259038261575, "learning_rate": 3.738294123077169e-06, "loss": 0.0338, "step": 68610 }, { "epoch": 0.28629903781158467, "grad_norm": 1.2713878755406913, "learning_rate": 3.7381579131845356e-06, "loss": 0.035, "step": 68615 }, { "epoch": 0.2863199005265749, "grad_norm": 0.7934443872614758, "learning_rate": 3.7380217181798016e-06, "loss": 0.029, "step": 68620 }, { "epoch": 0.2863407632415652, "grad_norm": 0.6430337612462218, "learning_rate": 3.737885538060256e-06, "loss": 0.0253, "step": 68625 }, { "epoch": 0.2863616259565555, "grad_norm": 0.8349937809012531, "learning_rate": 3.7377493728231875e-06, "loss": 0.0228, "step": 68630 }, { "epoch": 0.28638248867154575, "grad_norm": 1.090308586297855, "learning_rate": 3.737613222465885e-06, "loss": 0.0314, "step": 68635 }, { "epoch": 0.28640335138653605, "grad_norm": 0.9096027848244304, "learning_rate": 3.7374770869856388e-06, "loss": 0.0548, "step": 68640 }, { "epoch": 0.2864242141015263, "grad_norm": 1.6041379366494903, "learning_rate": 3.73734096637974e-06, "loss": 0.0273, "step": 68645 }, { "epoch": 0.2864450768165166, "grad_norm": 0.971327366613047, "learning_rate": 3.7372048606454796e-06, "loss": 0.0237, "step": 68650 }, { "epoch": 0.2864659395315069, "grad_norm": 1.0017646848453812, "learning_rate": 3.737068769780151e-06, "loss": 0.0293, "step": 68655 }, { "epoch": 0.28648680224649714, "grad_norm": 1.0167988116212814, "learning_rate": 3.736932693781046e-06, "loss": 0.0399, "step": 68660 }, { "epoch": 0.28650766496148744, "grad_norm": 0.998475228427395, "learning_rate": 3.736796632645459e-06, "loss": 0.0256, "step": 68665 }, { "epoch": 0.2865285276764777, "grad_norm": 0.7311835125982105, "learning_rate": 3.7366605863706833e-06, "loss": 0.0273, "step": 68670 }, { "epoch": 0.286549390391468, "grad_norm": 1.3743213788866582, "learning_rate": 3.7365245549540145e-06, "loss": 0.0323, "step": 68675 }, { "epoch": 0.2865702531064583, "grad_norm": 1.1288038276545975, "learning_rate": 3.7363885383927485e-06, "loss": 0.038, "step": 68680 }, { "epoch": 0.2865911158214485, "grad_norm": 0.8018414871860614, "learning_rate": 3.7362525366841823e-06, "loss": 0.0479, "step": 68685 }, { "epoch": 0.2866119785364388, "grad_norm": 0.6542566080015402, "learning_rate": 3.736116549825611e-06, "loss": 0.0346, "step": 68690 }, { "epoch": 0.28663284125142907, "grad_norm": 0.858379468736295, "learning_rate": 3.735980577814334e-06, "loss": 0.0309, "step": 68695 }, { "epoch": 0.28665370396641937, "grad_norm": 0.8876265232602091, "learning_rate": 3.735844620647649e-06, "loss": 0.0297, "step": 68700 }, { "epoch": 0.28667456668140967, "grad_norm": 0.7674336647884789, "learning_rate": 3.735708678322855e-06, "loss": 0.0252, "step": 68705 }, { "epoch": 0.2866954293963999, "grad_norm": 0.7953320485919142, "learning_rate": 3.735572750837253e-06, "loss": 0.0248, "step": 68710 }, { "epoch": 0.2867162921113902, "grad_norm": 0.6362415303843248, "learning_rate": 3.7354368381881424e-06, "loss": 0.0204, "step": 68715 }, { "epoch": 0.28673715482638046, "grad_norm": 1.244274727506211, "learning_rate": 3.7353009403728246e-06, "loss": 0.0353, "step": 68720 }, { "epoch": 0.28675801754137076, "grad_norm": 1.1704058932840002, "learning_rate": 3.735165057388602e-06, "loss": 0.0321, "step": 68725 }, { "epoch": 0.28677888025636106, "grad_norm": 0.7529984427681906, "learning_rate": 3.735029189232775e-06, "loss": 0.0311, "step": 68730 }, { "epoch": 0.2867997429713513, "grad_norm": 0.6649650440082894, "learning_rate": 3.73489333590265e-06, "loss": 0.0208, "step": 68735 }, { "epoch": 0.2868206056863416, "grad_norm": 0.42715949936832337, "learning_rate": 3.7347574973955293e-06, "loss": 0.0223, "step": 68740 }, { "epoch": 0.2868414684013319, "grad_norm": 0.7924630861589982, "learning_rate": 3.7346216737087175e-06, "loss": 0.0315, "step": 68745 }, { "epoch": 0.28686233111632214, "grad_norm": 1.139945400026456, "learning_rate": 3.7344858648395206e-06, "loss": 0.0232, "step": 68750 }, { "epoch": 0.28688319383131244, "grad_norm": 0.9407520265503676, "learning_rate": 3.7343500707852437e-06, "loss": 0.0286, "step": 68755 }, { "epoch": 0.2869040565463027, "grad_norm": 0.3803377619074309, "learning_rate": 3.734214291543194e-06, "loss": 0.0283, "step": 68760 }, { "epoch": 0.286924919261293, "grad_norm": 0.7377921524627918, "learning_rate": 3.7340785271106785e-06, "loss": 0.0297, "step": 68765 }, { "epoch": 0.2869457819762833, "grad_norm": 1.274019744064546, "learning_rate": 3.733942777485005e-06, "loss": 0.0279, "step": 68770 }, { "epoch": 0.28696664469127353, "grad_norm": 0.7844585228642007, "learning_rate": 3.7338070426634835e-06, "loss": 0.0281, "step": 68775 }, { "epoch": 0.28698750740626383, "grad_norm": 0.6781798626915185, "learning_rate": 3.733671322643423e-06, "loss": 0.0289, "step": 68780 }, { "epoch": 0.2870083701212541, "grad_norm": 0.5642213181924111, "learning_rate": 3.733535617422133e-06, "loss": 0.037, "step": 68785 }, { "epoch": 0.2870292328362444, "grad_norm": 0.6358902320749085, "learning_rate": 3.7333999269969244e-06, "loss": 0.0288, "step": 68790 }, { "epoch": 0.2870500955512347, "grad_norm": 0.4803320122875771, "learning_rate": 3.7332642513651092e-06, "loss": 0.0265, "step": 68795 }, { "epoch": 0.2870709582662249, "grad_norm": 0.6311647619787928, "learning_rate": 3.7331285905239983e-06, "loss": 0.0292, "step": 68800 }, { "epoch": 0.2870918209812152, "grad_norm": 1.0569195120577286, "learning_rate": 3.7329929444709066e-06, "loss": 0.0407, "step": 68805 }, { "epoch": 0.28711268369620546, "grad_norm": 1.0624006779594075, "learning_rate": 3.732857313203146e-06, "loss": 0.0324, "step": 68810 }, { "epoch": 0.28713354641119576, "grad_norm": 0.9393791500705602, "learning_rate": 3.7327216967180313e-06, "loss": 0.0414, "step": 68815 }, { "epoch": 0.28715440912618606, "grad_norm": 0.8541288839527783, "learning_rate": 3.7325860950128767e-06, "loss": 0.034, "step": 68820 }, { "epoch": 0.2871752718411763, "grad_norm": 0.5356261772980928, "learning_rate": 3.7324505080849988e-06, "loss": 0.0295, "step": 68825 }, { "epoch": 0.2871961345561666, "grad_norm": 0.7408384913430303, "learning_rate": 3.7323149359317133e-06, "loss": 0.0326, "step": 68830 }, { "epoch": 0.2872169972711569, "grad_norm": 0.5838423828799402, "learning_rate": 3.7321793785503376e-06, "loss": 0.0314, "step": 68835 }, { "epoch": 0.28723785998614715, "grad_norm": 0.7266673451516487, "learning_rate": 3.7320438359381884e-06, "loss": 0.0316, "step": 68840 }, { "epoch": 0.28725872270113745, "grad_norm": 1.0424337256257115, "learning_rate": 3.7319083080925856e-06, "loss": 0.0289, "step": 68845 }, { "epoch": 0.2872795854161277, "grad_norm": 0.8848632931469493, "learning_rate": 3.7317727950108455e-06, "loss": 0.0335, "step": 68850 }, { "epoch": 0.287300448131118, "grad_norm": 1.227222637503749, "learning_rate": 3.731637296690291e-06, "loss": 0.0313, "step": 68855 }, { "epoch": 0.2873213108461083, "grad_norm": 0.5775214077058552, "learning_rate": 3.73150181312824e-06, "loss": 0.04, "step": 68860 }, { "epoch": 0.28734217356109854, "grad_norm": 1.2057284030388353, "learning_rate": 3.7313663443220143e-06, "loss": 0.0337, "step": 68865 }, { "epoch": 0.28736303627608883, "grad_norm": 0.8162081937081602, "learning_rate": 3.7312308902689363e-06, "loss": 0.0305, "step": 68870 }, { "epoch": 0.2873838989910791, "grad_norm": 0.790485700376367, "learning_rate": 3.7310954509663278e-06, "loss": 0.0263, "step": 68875 }, { "epoch": 0.2874047617060694, "grad_norm": 0.9684866153526839, "learning_rate": 3.7309600264115115e-06, "loss": 0.0362, "step": 68880 }, { "epoch": 0.2874256244210597, "grad_norm": 0.6674622122477387, "learning_rate": 3.7308246166018113e-06, "loss": 0.0282, "step": 68885 }, { "epoch": 0.2874464871360499, "grad_norm": 0.7779785294489511, "learning_rate": 3.7306892215345526e-06, "loss": 0.0341, "step": 68890 }, { "epoch": 0.2874673498510402, "grad_norm": 0.8535717442433244, "learning_rate": 3.730553841207059e-06, "loss": 0.0249, "step": 68895 }, { "epoch": 0.28748821256603047, "grad_norm": 0.817284827532727, "learning_rate": 3.730418475616658e-06, "loss": 0.0293, "step": 68900 }, { "epoch": 0.28750907528102077, "grad_norm": 0.9187179187100988, "learning_rate": 3.730283124760674e-06, "loss": 0.0302, "step": 68905 }, { "epoch": 0.28752993799601106, "grad_norm": 1.4687280238323024, "learning_rate": 3.7301477886364366e-06, "loss": 0.0334, "step": 68910 }, { "epoch": 0.2875508007110013, "grad_norm": 0.7887069729459422, "learning_rate": 3.7300124672412713e-06, "loss": 0.035, "step": 68915 }, { "epoch": 0.2875716634259916, "grad_norm": 0.6525090267690691, "learning_rate": 3.7298771605725077e-06, "loss": 0.03, "step": 68920 }, { "epoch": 0.2875925261409819, "grad_norm": 0.9991716820458361, "learning_rate": 3.729741868627475e-06, "loss": 0.0314, "step": 68925 }, { "epoch": 0.28761338885597215, "grad_norm": 0.651673634375886, "learning_rate": 3.7296065914035033e-06, "loss": 0.0329, "step": 68930 }, { "epoch": 0.28763425157096245, "grad_norm": 0.5282758602074096, "learning_rate": 3.729471328897923e-06, "loss": 0.0476, "step": 68935 }, { "epoch": 0.2876551142859527, "grad_norm": 1.0651106674319728, "learning_rate": 3.7293360811080644e-06, "loss": 0.0273, "step": 68940 }, { "epoch": 0.287675977000943, "grad_norm": 0.8662693423004455, "learning_rate": 3.7292008480312597e-06, "loss": 0.0391, "step": 68945 }, { "epoch": 0.2876968397159333, "grad_norm": 1.1632421030809716, "learning_rate": 3.7290656296648427e-06, "loss": 0.0371, "step": 68950 }, { "epoch": 0.28771770243092354, "grad_norm": 0.8885448830458897, "learning_rate": 3.7289304260061453e-06, "loss": 0.0371, "step": 68955 }, { "epoch": 0.28773856514591384, "grad_norm": 0.9392705635331143, "learning_rate": 3.7287952370525027e-06, "loss": 0.0358, "step": 68960 }, { "epoch": 0.2877594278609041, "grad_norm": 0.8151308150393408, "learning_rate": 3.728660062801248e-06, "loss": 0.0321, "step": 68965 }, { "epoch": 0.2877802905758944, "grad_norm": 0.592312831679626, "learning_rate": 3.728524903249718e-06, "loss": 0.0249, "step": 68970 }, { "epoch": 0.2878011532908847, "grad_norm": 0.6875664254563111, "learning_rate": 3.7283897583952472e-06, "loss": 0.0268, "step": 68975 }, { "epoch": 0.2878220160058749, "grad_norm": 1.315823334536022, "learning_rate": 3.7282546282351733e-06, "loss": 0.0228, "step": 68980 }, { "epoch": 0.2878428787208652, "grad_norm": 0.5342396611719555, "learning_rate": 3.728119512766833e-06, "loss": 0.0265, "step": 68985 }, { "epoch": 0.28786374143585547, "grad_norm": 0.5648699342825303, "learning_rate": 3.727984411987564e-06, "loss": 0.0267, "step": 68990 }, { "epoch": 0.28788460415084577, "grad_norm": 0.5888476080492364, "learning_rate": 3.7278493258947063e-06, "loss": 0.0271, "step": 68995 }, { "epoch": 0.28790546686583607, "grad_norm": 0.7855855448503898, "learning_rate": 3.7277142544855986e-06, "loss": 0.0289, "step": 69000 }, { "epoch": 0.2879263295808263, "grad_norm": 0.7222940405525188, "learning_rate": 3.72757919775758e-06, "loss": 0.031, "step": 69005 }, { "epoch": 0.2879471922958166, "grad_norm": 0.6392134439354514, "learning_rate": 3.727444155707992e-06, "loss": 0.0302, "step": 69010 }, { "epoch": 0.2879680550108069, "grad_norm": 0.6159940139860881, "learning_rate": 3.727309128334176e-06, "loss": 0.0314, "step": 69015 }, { "epoch": 0.28798891772579716, "grad_norm": 0.8800467882963925, "learning_rate": 3.7271741156334734e-06, "loss": 0.0358, "step": 69020 }, { "epoch": 0.28800978044078746, "grad_norm": 0.9452567302856292, "learning_rate": 3.7270391176032276e-06, "loss": 0.0257, "step": 69025 }, { "epoch": 0.2880306431557777, "grad_norm": 0.775792228959309, "learning_rate": 3.7269041342407825e-06, "loss": 0.0259, "step": 69030 }, { "epoch": 0.288051505870768, "grad_norm": 0.458614471447515, "learning_rate": 3.72676916554348e-06, "loss": 0.0242, "step": 69035 }, { "epoch": 0.2880723685857583, "grad_norm": 0.7140316322214085, "learning_rate": 3.726634211508667e-06, "loss": 0.0305, "step": 69040 }, { "epoch": 0.28809323130074854, "grad_norm": 0.8873994156911579, "learning_rate": 3.7264992721336886e-06, "loss": 0.0308, "step": 69045 }, { "epoch": 0.28811409401573884, "grad_norm": 0.8478625523997736, "learning_rate": 3.72636434741589e-06, "loss": 0.0272, "step": 69050 }, { "epoch": 0.2881349567307291, "grad_norm": 0.5288140249314753, "learning_rate": 3.726229437352618e-06, "loss": 0.0281, "step": 69055 }, { "epoch": 0.2881558194457194, "grad_norm": 1.1091918569858126, "learning_rate": 3.7260945419412202e-06, "loss": 0.0354, "step": 69060 }, { "epoch": 0.2881766821607097, "grad_norm": 0.4618016270033239, "learning_rate": 3.7259596611790455e-06, "loss": 0.0323, "step": 69065 }, { "epoch": 0.28819754487569993, "grad_norm": 0.6792158323962656, "learning_rate": 3.7258247950634413e-06, "loss": 0.0183, "step": 69070 }, { "epoch": 0.28821840759069023, "grad_norm": 0.504821703963854, "learning_rate": 3.725689943591758e-06, "loss": 0.0329, "step": 69075 }, { "epoch": 0.2882392703056805, "grad_norm": 1.001250807758053, "learning_rate": 3.725555106761346e-06, "loss": 0.0328, "step": 69080 }, { "epoch": 0.2882601330206708, "grad_norm": 0.21650434236171004, "learning_rate": 3.7254202845695545e-06, "loss": 0.0261, "step": 69085 }, { "epoch": 0.28828099573566107, "grad_norm": 1.0244377102422826, "learning_rate": 3.7252854770137366e-06, "loss": 0.0257, "step": 69090 }, { "epoch": 0.2883018584506513, "grad_norm": 0.7094381386883022, "learning_rate": 3.7251506840912434e-06, "loss": 0.0282, "step": 69095 }, { "epoch": 0.2883227211656416, "grad_norm": 0.7569870470760435, "learning_rate": 3.725015905799428e-06, "loss": 0.0284, "step": 69100 }, { "epoch": 0.2883435838806319, "grad_norm": 0.5133128696064513, "learning_rate": 3.724881142135644e-06, "loss": 0.0398, "step": 69105 }, { "epoch": 0.28836444659562216, "grad_norm": 0.5719256538840409, "learning_rate": 3.724746393097246e-06, "loss": 0.0336, "step": 69110 }, { "epoch": 0.28838530931061246, "grad_norm": 0.8345773947525986, "learning_rate": 3.724611658681587e-06, "loss": 0.0335, "step": 69115 }, { "epoch": 0.2884061720256027, "grad_norm": 0.4824794783220696, "learning_rate": 3.7244769388860253e-06, "loss": 0.0243, "step": 69120 }, { "epoch": 0.288427034740593, "grad_norm": 0.4708397387849822, "learning_rate": 3.7243422337079144e-06, "loss": 0.0252, "step": 69125 }, { "epoch": 0.2884478974555833, "grad_norm": 0.9356870879737085, "learning_rate": 3.7242075431446116e-06, "loss": 0.0282, "step": 69130 }, { "epoch": 0.28846876017057355, "grad_norm": 1.1166988123317845, "learning_rate": 3.7240728671934754e-06, "loss": 0.037, "step": 69135 }, { "epoch": 0.28848962288556385, "grad_norm": 0.8111943314521589, "learning_rate": 3.7239382058518636e-06, "loss": 0.0262, "step": 69140 }, { "epoch": 0.2885104856005541, "grad_norm": 0.40089933532470645, "learning_rate": 3.723803559117135e-06, "loss": 0.0272, "step": 69145 }, { "epoch": 0.2885313483155444, "grad_norm": 0.8657909149566451, "learning_rate": 3.7236689269866487e-06, "loss": 0.0305, "step": 69150 }, { "epoch": 0.2885522110305347, "grad_norm": 0.9100786640570353, "learning_rate": 3.7235343094577652e-06, "loss": 0.0281, "step": 69155 }, { "epoch": 0.28857307374552493, "grad_norm": 1.3257918878007355, "learning_rate": 3.723399706527845e-06, "loss": 0.0352, "step": 69160 }, { "epoch": 0.28859393646051523, "grad_norm": 1.4254402155174941, "learning_rate": 3.723265118194249e-06, "loss": 0.0356, "step": 69165 }, { "epoch": 0.2886147991755055, "grad_norm": 0.479358851810369, "learning_rate": 3.723130544454342e-06, "loss": 0.0308, "step": 69170 }, { "epoch": 0.2886356618904958, "grad_norm": 2.2568908890024186, "learning_rate": 3.7229959853054836e-06, "loss": 0.0277, "step": 69175 }, { "epoch": 0.2886565246054861, "grad_norm": 0.5154056977043913, "learning_rate": 3.7228614407450386e-06, "loss": 0.0298, "step": 69180 }, { "epoch": 0.2886773873204763, "grad_norm": 0.7682496148639549, "learning_rate": 3.7227269107703718e-06, "loss": 0.0311, "step": 69185 }, { "epoch": 0.2886982500354666, "grad_norm": 1.0639875320358203, "learning_rate": 3.7225923953788467e-06, "loss": 0.0293, "step": 69190 }, { "epoch": 0.2887191127504569, "grad_norm": 0.6530713774701583, "learning_rate": 3.7224578945678304e-06, "loss": 0.0314, "step": 69195 }, { "epoch": 0.28873997546544716, "grad_norm": 1.2146661505944387, "learning_rate": 3.722323408334688e-06, "loss": 0.032, "step": 69200 }, { "epoch": 0.28876083818043746, "grad_norm": 0.4214674270140818, "learning_rate": 3.7221889366767855e-06, "loss": 0.0332, "step": 69205 }, { "epoch": 0.2887817008954277, "grad_norm": 1.3753340133202683, "learning_rate": 3.7220544795914916e-06, "loss": 0.0351, "step": 69210 }, { "epoch": 0.288802563610418, "grad_norm": 1.0527950203083913, "learning_rate": 3.721920037076175e-06, "loss": 0.0298, "step": 69215 }, { "epoch": 0.2888234263254083, "grad_norm": 1.2389621400032704, "learning_rate": 3.721785609128203e-06, "loss": 0.0294, "step": 69220 }, { "epoch": 0.28884428904039855, "grad_norm": 0.5594972686575562, "learning_rate": 3.7216511957449464e-06, "loss": 0.0252, "step": 69225 }, { "epoch": 0.28886515175538885, "grad_norm": 0.3593753374313927, "learning_rate": 3.7215167969237744e-06, "loss": 0.0255, "step": 69230 }, { "epoch": 0.2888860144703791, "grad_norm": 0.7415735690582475, "learning_rate": 3.7213824126620573e-06, "loss": 0.0526, "step": 69235 }, { "epoch": 0.2889068771853694, "grad_norm": 0.9538274152825684, "learning_rate": 3.7212480429571685e-06, "loss": 0.0255, "step": 69240 }, { "epoch": 0.2889277399003597, "grad_norm": 0.5520363243108555, "learning_rate": 3.7211136878064783e-06, "loss": 0.0274, "step": 69245 }, { "epoch": 0.28894860261534994, "grad_norm": 0.6599618158173273, "learning_rate": 3.7209793472073606e-06, "loss": 0.0355, "step": 69250 }, { "epoch": 0.28896946533034024, "grad_norm": 1.186417152535034, "learning_rate": 3.7208450211571887e-06, "loss": 0.0292, "step": 69255 }, { "epoch": 0.2889903280453305, "grad_norm": 0.8065145896901217, "learning_rate": 3.7207107096533373e-06, "loss": 0.0257, "step": 69260 }, { "epoch": 0.2890111907603208, "grad_norm": 1.102558834411549, "learning_rate": 3.720576412693179e-06, "loss": 0.0414, "step": 69265 }, { "epoch": 0.2890320534753111, "grad_norm": 0.4502617068803065, "learning_rate": 3.720442130274091e-06, "loss": 0.0212, "step": 69270 }, { "epoch": 0.2890529161903013, "grad_norm": 0.6853629104111225, "learning_rate": 3.7203078623934495e-06, "loss": 0.0241, "step": 69275 }, { "epoch": 0.2890737789052916, "grad_norm": 1.1877335497775985, "learning_rate": 3.7201736090486306e-06, "loss": 0.0395, "step": 69280 }, { "epoch": 0.2890946416202819, "grad_norm": 0.78039319939029, "learning_rate": 3.720039370237013e-06, "loss": 0.0292, "step": 69285 }, { "epoch": 0.28911550433527217, "grad_norm": 0.6533066874063583, "learning_rate": 3.7199051459559733e-06, "loss": 0.0294, "step": 69290 }, { "epoch": 0.28913636705026247, "grad_norm": 0.7762814269200614, "learning_rate": 3.7197709362028898e-06, "loss": 0.0273, "step": 69295 }, { "epoch": 0.2891572297652527, "grad_norm": 1.6831055982045782, "learning_rate": 3.719636740975144e-06, "loss": 0.0345, "step": 69300 }, { "epoch": 0.289178092480243, "grad_norm": 1.1895693942706822, "learning_rate": 3.719502560270115e-06, "loss": 0.0301, "step": 69305 }, { "epoch": 0.2891989551952333, "grad_norm": 0.4179771628400945, "learning_rate": 3.7193683940851834e-06, "loss": 0.024, "step": 69310 }, { "epoch": 0.28921981791022355, "grad_norm": 0.8052801348079283, "learning_rate": 3.7192342424177306e-06, "loss": 0.026, "step": 69315 }, { "epoch": 0.28924068062521385, "grad_norm": 1.3728400794555327, "learning_rate": 3.7191001052651395e-06, "loss": 0.0352, "step": 69320 }, { "epoch": 0.2892615433402041, "grad_norm": 0.8937031911754703, "learning_rate": 3.718965982624791e-06, "loss": 0.0368, "step": 69325 }, { "epoch": 0.2892824060551944, "grad_norm": 1.0043964986836913, "learning_rate": 3.7188318744940697e-06, "loss": 0.0223, "step": 69330 }, { "epoch": 0.2893032687701847, "grad_norm": 0.9006446080983159, "learning_rate": 3.71869778087036e-06, "loss": 0.0216, "step": 69335 }, { "epoch": 0.28932413148517494, "grad_norm": 0.8170377804484268, "learning_rate": 3.718563701751046e-06, "loss": 0.0282, "step": 69340 }, { "epoch": 0.28934499420016524, "grad_norm": 1.2340243339454093, "learning_rate": 3.7184296371335137e-06, "loss": 0.0378, "step": 69345 }, { "epoch": 0.2893658569151555, "grad_norm": 1.1726041250098311, "learning_rate": 3.718295587015149e-06, "loss": 0.0395, "step": 69350 }, { "epoch": 0.2893867196301458, "grad_norm": 0.6554507074880921, "learning_rate": 3.7181615513933378e-06, "loss": 0.0239, "step": 69355 }, { "epoch": 0.2894075823451361, "grad_norm": 1.4472606866267441, "learning_rate": 3.7180275302654685e-06, "loss": 0.0363, "step": 69360 }, { "epoch": 0.2894284450601263, "grad_norm": 0.9782082738302568, "learning_rate": 3.717893523628928e-06, "loss": 0.0396, "step": 69365 }, { "epoch": 0.2894493077751166, "grad_norm": 0.771906988400434, "learning_rate": 3.7177595314811055e-06, "loss": 0.031, "step": 69370 }, { "epoch": 0.2894701704901069, "grad_norm": 1.0408489232550777, "learning_rate": 3.7176255538193913e-06, "loss": 0.0408, "step": 69375 }, { "epoch": 0.28949103320509717, "grad_norm": 0.814861100387697, "learning_rate": 3.717491590641174e-06, "loss": 0.0228, "step": 69380 }, { "epoch": 0.28951189592008747, "grad_norm": 0.6395113208753771, "learning_rate": 3.7173576419438443e-06, "loss": 0.0279, "step": 69385 }, { "epoch": 0.2895327586350777, "grad_norm": 0.6674973029233019, "learning_rate": 3.7172237077247945e-06, "loss": 0.0395, "step": 69390 }, { "epoch": 0.289553621350068, "grad_norm": 0.8037904077863052, "learning_rate": 3.7170897879814167e-06, "loss": 0.0294, "step": 69395 }, { "epoch": 0.2895744840650583, "grad_norm": 1.3622403908695977, "learning_rate": 3.716955882711102e-06, "loss": 0.0251, "step": 69400 }, { "epoch": 0.28959534678004856, "grad_norm": 0.7264172675934939, "learning_rate": 3.716821991911244e-06, "loss": 0.0236, "step": 69405 }, { "epoch": 0.28961620949503886, "grad_norm": 0.8913583820230099, "learning_rate": 3.7166881155792385e-06, "loss": 0.027, "step": 69410 }, { "epoch": 0.2896370722100291, "grad_norm": 0.6949192210553156, "learning_rate": 3.716554253712478e-06, "loss": 0.0292, "step": 69415 }, { "epoch": 0.2896579349250194, "grad_norm": 0.5756266853115828, "learning_rate": 3.716420406308359e-06, "loss": 0.0273, "step": 69420 }, { "epoch": 0.2896787976400097, "grad_norm": 0.6383594900108343, "learning_rate": 3.7162865733642765e-06, "loss": 0.0257, "step": 69425 }, { "epoch": 0.28969966035499994, "grad_norm": 0.9859272512171412, "learning_rate": 3.7161527548776282e-06, "loss": 0.0276, "step": 69430 }, { "epoch": 0.28972052306999024, "grad_norm": 0.5330442980081873, "learning_rate": 3.7160189508458107e-06, "loss": 0.0253, "step": 69435 }, { "epoch": 0.2897413857849805, "grad_norm": 1.043247801151627, "learning_rate": 3.7158851612662213e-06, "loss": 0.0311, "step": 69440 }, { "epoch": 0.2897622484999708, "grad_norm": 0.7653236906039478, "learning_rate": 3.715751386136259e-06, "loss": 0.0317, "step": 69445 }, { "epoch": 0.2897831112149611, "grad_norm": 0.7639297733123851, "learning_rate": 3.715617625453323e-06, "loss": 0.0239, "step": 69450 }, { "epoch": 0.28980397392995133, "grad_norm": 0.8152197417217998, "learning_rate": 3.715483879214814e-06, "loss": 0.0333, "step": 69455 }, { "epoch": 0.28982483664494163, "grad_norm": 4.61041275950277, "learning_rate": 3.7153501474181308e-06, "loss": 0.0418, "step": 69460 }, { "epoch": 0.28984569935993193, "grad_norm": 0.7827583910206831, "learning_rate": 3.7152164300606765e-06, "loss": 0.0296, "step": 69465 }, { "epoch": 0.2898665620749222, "grad_norm": 0.9234784211644899, "learning_rate": 3.7150827271398506e-06, "loss": 0.0308, "step": 69470 }, { "epoch": 0.2898874247899125, "grad_norm": 0.5596013167231796, "learning_rate": 3.7149490386530578e-06, "loss": 0.0321, "step": 69475 }, { "epoch": 0.2899082875049027, "grad_norm": 0.6638421380403154, "learning_rate": 3.7148153645976997e-06, "loss": 0.0234, "step": 69480 }, { "epoch": 0.289929150219893, "grad_norm": 1.2507354929020802, "learning_rate": 3.7146817049711808e-06, "loss": 0.0442, "step": 69485 }, { "epoch": 0.2899500129348833, "grad_norm": 0.9453625322007975, "learning_rate": 3.7145480597709056e-06, "loss": 0.0305, "step": 69490 }, { "epoch": 0.28997087564987356, "grad_norm": 0.5217631648968473, "learning_rate": 3.7144144289942784e-06, "loss": 0.0367, "step": 69495 }, { "epoch": 0.28999173836486386, "grad_norm": 1.5768650243031124, "learning_rate": 3.7142808126387065e-06, "loss": 0.0345, "step": 69500 }, { "epoch": 0.2900126010798541, "grad_norm": 0.947957690521677, "learning_rate": 3.714147210701594e-06, "loss": 0.0311, "step": 69505 }, { "epoch": 0.2900334637948444, "grad_norm": 0.9720701812445198, "learning_rate": 3.7140136231803497e-06, "loss": 0.0336, "step": 69510 }, { "epoch": 0.2900543265098347, "grad_norm": 0.8712053811786037, "learning_rate": 3.7138800500723815e-06, "loss": 0.0372, "step": 69515 }, { "epoch": 0.29007518922482495, "grad_norm": 0.5744860058398142, "learning_rate": 3.7137464913750966e-06, "loss": 0.0338, "step": 69520 }, { "epoch": 0.29009605193981525, "grad_norm": 0.6554722707145043, "learning_rate": 3.713612947085904e-06, "loss": 0.0248, "step": 69525 }, { "epoch": 0.2901169146548055, "grad_norm": 0.771517115920344, "learning_rate": 3.713479417202214e-06, "loss": 0.0241, "step": 69530 }, { "epoch": 0.2901377773697958, "grad_norm": 0.950461159330294, "learning_rate": 3.7133459017214375e-06, "loss": 0.0354, "step": 69535 }, { "epoch": 0.2901586400847861, "grad_norm": 1.24125667585688, "learning_rate": 3.7132124006409837e-06, "loss": 0.0336, "step": 69540 }, { "epoch": 0.29017950279977633, "grad_norm": 0.7630445237023673, "learning_rate": 3.713078913958265e-06, "loss": 0.0269, "step": 69545 }, { "epoch": 0.29020036551476663, "grad_norm": 0.6251517806774565, "learning_rate": 3.7129454416706943e-06, "loss": 0.0271, "step": 69550 }, { "epoch": 0.29022122822975693, "grad_norm": 1.698443252794706, "learning_rate": 3.712811983775685e-06, "loss": 0.036, "step": 69555 }, { "epoch": 0.2902420909447472, "grad_norm": 0.7296866968741974, "learning_rate": 3.7126785402706483e-06, "loss": 0.0392, "step": 69560 }, { "epoch": 0.2902629536597375, "grad_norm": 0.4403943513778855, "learning_rate": 3.7125451111530003e-06, "loss": 0.0337, "step": 69565 }, { "epoch": 0.2902838163747277, "grad_norm": 1.2322036020775997, "learning_rate": 3.712411696420155e-06, "loss": 0.0354, "step": 69570 }, { "epoch": 0.290304679089718, "grad_norm": 0.8244001253127091, "learning_rate": 3.7122782960695288e-06, "loss": 0.0385, "step": 69575 }, { "epoch": 0.2903255418047083, "grad_norm": 0.9494281569390742, "learning_rate": 3.7121449100985367e-06, "loss": 0.027, "step": 69580 }, { "epoch": 0.29034640451969856, "grad_norm": 1.182394550385885, "learning_rate": 3.7120115385045974e-06, "loss": 0.0288, "step": 69585 }, { "epoch": 0.29036726723468886, "grad_norm": 0.614878868012175, "learning_rate": 3.711878181285126e-06, "loss": 0.0358, "step": 69590 }, { "epoch": 0.2903881299496791, "grad_norm": 0.6810703604748829, "learning_rate": 3.7117448384375414e-06, "loss": 0.037, "step": 69595 }, { "epoch": 0.2904089926646694, "grad_norm": 1.126289568528501, "learning_rate": 3.711611509959264e-06, "loss": 0.0348, "step": 69600 }, { "epoch": 0.2904298553796597, "grad_norm": 0.27868997232577836, "learning_rate": 3.7114781958477103e-06, "loss": 0.029, "step": 69605 }, { "epoch": 0.29045071809464995, "grad_norm": 0.7473359118597014, "learning_rate": 3.711344896100303e-06, "loss": 0.0302, "step": 69610 }, { "epoch": 0.29047158080964025, "grad_norm": 0.6713657936305252, "learning_rate": 3.7112116107144614e-06, "loss": 0.0283, "step": 69615 }, { "epoch": 0.2904924435246305, "grad_norm": 0.8260801368451518, "learning_rate": 3.711078339687607e-06, "loss": 0.0254, "step": 69620 }, { "epoch": 0.2905133062396208, "grad_norm": 0.42564281466924614, "learning_rate": 3.7109450830171622e-06, "loss": 0.0261, "step": 69625 }, { "epoch": 0.2905341689546111, "grad_norm": 0.6542041630465606, "learning_rate": 3.7108118407005495e-06, "loss": 0.0232, "step": 69630 }, { "epoch": 0.29055503166960134, "grad_norm": 0.6264361014284946, "learning_rate": 3.710678612735192e-06, "loss": 0.0379, "step": 69635 }, { "epoch": 0.29057589438459164, "grad_norm": 1.1575373840521874, "learning_rate": 3.7105453991185142e-06, "loss": 0.0395, "step": 69640 }, { "epoch": 0.29059675709958194, "grad_norm": 2.0088475894486093, "learning_rate": 3.7104121998479397e-06, "loss": 0.029, "step": 69645 }, { "epoch": 0.2906176198145722, "grad_norm": 0.7131053539953373, "learning_rate": 3.7102790149208944e-06, "loss": 0.0222, "step": 69650 }, { "epoch": 0.2906384825295625, "grad_norm": 0.6785879819138642, "learning_rate": 3.7101458443348043e-06, "loss": 0.0265, "step": 69655 }, { "epoch": 0.2906593452445527, "grad_norm": 0.8613142070644159, "learning_rate": 3.7100126880870957e-06, "loss": 0.0239, "step": 69660 }, { "epoch": 0.290680207959543, "grad_norm": 1.5351611605509135, "learning_rate": 3.7098795461751964e-06, "loss": 0.0339, "step": 69665 }, { "epoch": 0.2907010706745333, "grad_norm": 0.3158202236917108, "learning_rate": 3.7097464185965325e-06, "loss": 0.0246, "step": 69670 }, { "epoch": 0.29072193338952357, "grad_norm": 0.9936308128099733, "learning_rate": 3.7096133053485345e-06, "loss": 0.0299, "step": 69675 }, { "epoch": 0.29074279610451387, "grad_norm": 0.9568686027855541, "learning_rate": 3.7094802064286302e-06, "loss": 0.0295, "step": 69680 }, { "epoch": 0.2907636588195041, "grad_norm": 0.6734383851837756, "learning_rate": 3.70934712183425e-06, "loss": 0.0296, "step": 69685 }, { "epoch": 0.2907845215344944, "grad_norm": 0.6860261552682386, "learning_rate": 3.709214051562824e-06, "loss": 0.0253, "step": 69690 }, { "epoch": 0.2908053842494847, "grad_norm": 0.7534255363689074, "learning_rate": 3.7090809956117835e-06, "loss": 0.0249, "step": 69695 }, { "epoch": 0.29082624696447495, "grad_norm": 1.1329858526818617, "learning_rate": 3.7089479539785593e-06, "loss": 0.0387, "step": 69700 }, { "epoch": 0.29084710967946525, "grad_norm": 0.7507007364592151, "learning_rate": 3.7088149266605854e-06, "loss": 0.0271, "step": 69705 }, { "epoch": 0.2908679723944555, "grad_norm": 0.993835519446931, "learning_rate": 3.708681913655293e-06, "loss": 0.0326, "step": 69710 }, { "epoch": 0.2908888351094458, "grad_norm": 1.3224972355772169, "learning_rate": 3.7085489149601177e-06, "loss": 0.0374, "step": 69715 }, { "epoch": 0.2909096978244361, "grad_norm": 0.9719413101832008, "learning_rate": 3.708415930572492e-06, "loss": 0.0324, "step": 69720 }, { "epoch": 0.29093056053942634, "grad_norm": 0.966362395276088, "learning_rate": 3.708282960489852e-06, "loss": 0.0317, "step": 69725 }, { "epoch": 0.29095142325441664, "grad_norm": 0.8223293084450902, "learning_rate": 3.7081500047096317e-06, "loss": 0.037, "step": 69730 }, { "epoch": 0.29097228596940694, "grad_norm": 0.4523942538609479, "learning_rate": 3.7080170632292694e-06, "loss": 0.0257, "step": 69735 }, { "epoch": 0.2909931486843972, "grad_norm": 0.747026804395548, "learning_rate": 3.7078841360462007e-06, "loss": 0.0251, "step": 69740 }, { "epoch": 0.2910140113993875, "grad_norm": 1.3548715125319015, "learning_rate": 3.7077512231578633e-06, "loss": 0.0258, "step": 69745 }, { "epoch": 0.29103487411437773, "grad_norm": 0.9733992466775043, "learning_rate": 3.7076183245616958e-06, "loss": 0.0245, "step": 69750 }, { "epoch": 0.29105573682936803, "grad_norm": 1.5750803220807321, "learning_rate": 3.707485440255136e-06, "loss": 0.0362, "step": 69755 }, { "epoch": 0.2910765995443583, "grad_norm": 0.4682357504836043, "learning_rate": 3.7073525702356234e-06, "loss": 0.0231, "step": 69760 }, { "epoch": 0.29109746225934857, "grad_norm": 0.9270513060791545, "learning_rate": 3.7072197145005994e-06, "loss": 0.0326, "step": 69765 }, { "epoch": 0.29111832497433887, "grad_norm": 0.4597933303091442, "learning_rate": 3.7070868730475035e-06, "loss": 0.0273, "step": 69770 }, { "epoch": 0.2911391876893291, "grad_norm": 0.7688111589409757, "learning_rate": 3.706954045873778e-06, "loss": 0.0281, "step": 69775 }, { "epoch": 0.2911600504043194, "grad_norm": 0.9273140594985124, "learning_rate": 3.7068212329768634e-06, "loss": 0.0365, "step": 69780 }, { "epoch": 0.2911809131193097, "grad_norm": 0.9812451257239823, "learning_rate": 3.7066884343542035e-06, "loss": 0.0362, "step": 69785 }, { "epoch": 0.29120177583429996, "grad_norm": 0.5118840081313155, "learning_rate": 3.7065556500032412e-06, "loss": 0.0348, "step": 69790 }, { "epoch": 0.29122263854929026, "grad_norm": 1.1522638107829146, "learning_rate": 3.7064228799214214e-06, "loss": 0.0305, "step": 69795 }, { "epoch": 0.2912435012642805, "grad_norm": 0.9050527998869584, "learning_rate": 3.7062901241061868e-06, "loss": 0.0317, "step": 69800 }, { "epoch": 0.2912643639792708, "grad_norm": 0.7060626961201998, "learning_rate": 3.7061573825549836e-06, "loss": 0.0332, "step": 69805 }, { "epoch": 0.2912852266942611, "grad_norm": 1.0042945607984421, "learning_rate": 3.706024655265258e-06, "loss": 0.0309, "step": 69810 }, { "epoch": 0.29130608940925135, "grad_norm": 1.1717390211389762, "learning_rate": 3.705891942234456e-06, "loss": 0.0334, "step": 69815 }, { "epoch": 0.29132695212424164, "grad_norm": 0.8022592712335672, "learning_rate": 3.7057592434600242e-06, "loss": 0.0345, "step": 69820 }, { "epoch": 0.29134781483923194, "grad_norm": 0.6937071098918267, "learning_rate": 3.7056265589394113e-06, "loss": 0.0291, "step": 69825 }, { "epoch": 0.2913686775542222, "grad_norm": 0.9082789929333165, "learning_rate": 3.705493888670066e-06, "loss": 0.0319, "step": 69830 }, { "epoch": 0.2913895402692125, "grad_norm": 0.5820881204862486, "learning_rate": 3.705361232649436e-06, "loss": 0.0399, "step": 69835 }, { "epoch": 0.29141040298420273, "grad_norm": 0.7401491878545368, "learning_rate": 3.7052285908749713e-06, "loss": 0.0286, "step": 69840 }, { "epoch": 0.29143126569919303, "grad_norm": 0.9406955959994391, "learning_rate": 3.7050959633441226e-06, "loss": 0.0301, "step": 69845 }, { "epoch": 0.29145212841418333, "grad_norm": 0.9846031996151808, "learning_rate": 3.7049633500543417e-06, "loss": 0.0332, "step": 69850 }, { "epoch": 0.2914729911291736, "grad_norm": 1.1994813990533373, "learning_rate": 3.7048307510030785e-06, "loss": 0.0361, "step": 69855 }, { "epoch": 0.2914938538441639, "grad_norm": 0.8940529368465636, "learning_rate": 3.7046981661877866e-06, "loss": 0.0255, "step": 69860 }, { "epoch": 0.2915147165591541, "grad_norm": 1.118642863742813, "learning_rate": 3.7045655956059186e-06, "loss": 0.0315, "step": 69865 }, { "epoch": 0.2915355792741444, "grad_norm": 1.2396183933569966, "learning_rate": 3.7044330392549266e-06, "loss": 0.0329, "step": 69870 }, { "epoch": 0.2915564419891347, "grad_norm": 0.5837922450097366, "learning_rate": 3.7043004971322666e-06, "loss": 0.0262, "step": 69875 }, { "epoch": 0.29157730470412496, "grad_norm": 0.8258726072670912, "learning_rate": 3.704167969235392e-06, "loss": 0.0272, "step": 69880 }, { "epoch": 0.29159816741911526, "grad_norm": 0.7719633067885305, "learning_rate": 3.7040354555617603e-06, "loss": 0.0279, "step": 69885 }, { "epoch": 0.2916190301341055, "grad_norm": 0.5003002297576256, "learning_rate": 3.7039029561088246e-06, "loss": 0.032, "step": 69890 }, { "epoch": 0.2916398928490958, "grad_norm": 0.8744949751751306, "learning_rate": 3.7037704708740442e-06, "loss": 0.0241, "step": 69895 }, { "epoch": 0.2916607555640861, "grad_norm": 0.9230269270119086, "learning_rate": 3.7036379998548742e-06, "loss": 0.0354, "step": 69900 }, { "epoch": 0.29168161827907635, "grad_norm": 0.7714895095289034, "learning_rate": 3.7035055430487748e-06, "loss": 0.0366, "step": 69905 }, { "epoch": 0.29170248099406665, "grad_norm": 0.794731736243493, "learning_rate": 3.7033731004532027e-06, "loss": 0.0322, "step": 69910 }, { "epoch": 0.29172334370905695, "grad_norm": 0.6731454370384158, "learning_rate": 3.7032406720656185e-06, "loss": 0.0375, "step": 69915 }, { "epoch": 0.2917442064240472, "grad_norm": 1.0911705349425698, "learning_rate": 3.703108257883481e-06, "loss": 0.0356, "step": 69920 }, { "epoch": 0.2917650691390375, "grad_norm": 1.020986197722554, "learning_rate": 3.7029758579042512e-06, "loss": 0.0235, "step": 69925 }, { "epoch": 0.29178593185402774, "grad_norm": 0.6101808970376689, "learning_rate": 3.702843472125391e-06, "loss": 0.0299, "step": 69930 }, { "epoch": 0.29180679456901804, "grad_norm": 0.8816482248449647, "learning_rate": 3.702711100544361e-06, "loss": 0.0327, "step": 69935 }, { "epoch": 0.29182765728400833, "grad_norm": 0.9654184388216863, "learning_rate": 3.7025787431586243e-06, "loss": 0.0309, "step": 69940 }, { "epoch": 0.2918485199989986, "grad_norm": 0.7312691196303903, "learning_rate": 3.702446399965644e-06, "loss": 0.0289, "step": 69945 }, { "epoch": 0.2918693827139889, "grad_norm": 0.7345517482619073, "learning_rate": 3.702314070962883e-06, "loss": 0.0346, "step": 69950 }, { "epoch": 0.2918902454289791, "grad_norm": 1.0067145478433517, "learning_rate": 3.702181756147806e-06, "loss": 0.0439, "step": 69955 }, { "epoch": 0.2919111081439694, "grad_norm": 1.0924739111002564, "learning_rate": 3.7020494555178783e-06, "loss": 0.0348, "step": 69960 }, { "epoch": 0.2919319708589597, "grad_norm": 0.9972198282390355, "learning_rate": 3.701917169070565e-06, "loss": 0.0301, "step": 69965 }, { "epoch": 0.29195283357394997, "grad_norm": 0.622892841351918, "learning_rate": 3.7017848968033334e-06, "loss": 0.0288, "step": 69970 }, { "epoch": 0.29197369628894027, "grad_norm": 0.42137841022108535, "learning_rate": 3.7016526387136485e-06, "loss": 0.0198, "step": 69975 }, { "epoch": 0.2919945590039305, "grad_norm": 1.0439066327799535, "learning_rate": 3.7015203947989796e-06, "loss": 0.0374, "step": 69980 }, { "epoch": 0.2920154217189208, "grad_norm": 0.9008828095681972, "learning_rate": 3.7013881650567936e-06, "loss": 0.0306, "step": 69985 }, { "epoch": 0.2920362844339111, "grad_norm": 0.7870860643354393, "learning_rate": 3.7012559494845602e-06, "loss": 0.0288, "step": 69990 }, { "epoch": 0.29205714714890135, "grad_norm": 0.6123978013762919, "learning_rate": 3.701123748079748e-06, "loss": 0.0298, "step": 69995 }, { "epoch": 0.29207800986389165, "grad_norm": 0.7360077888228792, "learning_rate": 3.7009915608398273e-06, "loss": 0.032, "step": 70000 }, { "epoch": 0.29209887257888195, "grad_norm": 0.8808742299653477, "learning_rate": 3.700859387762269e-06, "loss": 0.0348, "step": 70005 }, { "epoch": 0.2921197352938722, "grad_norm": 1.0178597460541374, "learning_rate": 3.700727228844544e-06, "loss": 0.0353, "step": 70010 }, { "epoch": 0.2921405980088625, "grad_norm": 1.5275732748438529, "learning_rate": 3.700595084084124e-06, "loss": 0.0302, "step": 70015 }, { "epoch": 0.29216146072385274, "grad_norm": 1.8915842201597461, "learning_rate": 3.700462953478483e-06, "loss": 0.0452, "step": 70020 }, { "epoch": 0.29218232343884304, "grad_norm": 0.6947910209207128, "learning_rate": 3.700330837025093e-06, "loss": 0.0223, "step": 70025 }, { "epoch": 0.29220318615383334, "grad_norm": 0.9814329800558891, "learning_rate": 3.7001987347214273e-06, "loss": 0.0296, "step": 70030 }, { "epoch": 0.2922240488688236, "grad_norm": 0.7126767606892916, "learning_rate": 3.700066646564961e-06, "loss": 0.0319, "step": 70035 }, { "epoch": 0.2922449115838139, "grad_norm": 0.40303014683213867, "learning_rate": 3.6999345725531692e-06, "loss": 0.0274, "step": 70040 }, { "epoch": 0.2922657742988041, "grad_norm": 1.0738365574072273, "learning_rate": 3.6998025126835284e-06, "loss": 0.0323, "step": 70045 }, { "epoch": 0.2922866370137944, "grad_norm": 0.4665511404037253, "learning_rate": 3.6996704669535128e-06, "loss": 0.0285, "step": 70050 }, { "epoch": 0.2923074997287847, "grad_norm": 0.47548321735497884, "learning_rate": 3.699538435360602e-06, "loss": 0.0206, "step": 70055 }, { "epoch": 0.29232836244377497, "grad_norm": 0.3794371309581609, "learning_rate": 3.6994064179022713e-06, "loss": 0.0231, "step": 70060 }, { "epoch": 0.29234922515876527, "grad_norm": 0.5462401956638779, "learning_rate": 3.6992744145760007e-06, "loss": 0.0334, "step": 70065 }, { "epoch": 0.2923700878737555, "grad_norm": 1.1244103821225617, "learning_rate": 3.6991424253792674e-06, "loss": 0.0266, "step": 70070 }, { "epoch": 0.2923909505887458, "grad_norm": 0.7539760913709054, "learning_rate": 3.6990104503095512e-06, "loss": 0.027, "step": 70075 }, { "epoch": 0.2924118133037361, "grad_norm": 0.7740693836811897, "learning_rate": 3.6988784893643344e-06, "loss": 0.0305, "step": 70080 }, { "epoch": 0.29243267601872636, "grad_norm": 0.7326634189712349, "learning_rate": 3.6987465425410956e-06, "loss": 0.0234, "step": 70085 }, { "epoch": 0.29245353873371666, "grad_norm": 1.4561960849947286, "learning_rate": 3.6986146098373166e-06, "loss": 0.045, "step": 70090 }, { "epoch": 0.29247440144870696, "grad_norm": 1.1409855486290015, "learning_rate": 3.6984826912504797e-06, "loss": 0.0307, "step": 70095 }, { "epoch": 0.2924952641636972, "grad_norm": 0.7082129241663249, "learning_rate": 3.6983507867780667e-06, "loss": 0.0401, "step": 70100 }, { "epoch": 0.2925161268786875, "grad_norm": 0.5684704590835169, "learning_rate": 3.6982188964175614e-06, "loss": 0.0239, "step": 70105 }, { "epoch": 0.29253698959367774, "grad_norm": 1.1119800521993306, "learning_rate": 3.698087020166449e-06, "loss": 0.04, "step": 70110 }, { "epoch": 0.29255785230866804, "grad_norm": 0.5377439921401268, "learning_rate": 3.697955158022211e-06, "loss": 0.0309, "step": 70115 }, { "epoch": 0.29257871502365834, "grad_norm": 0.7970344548651899, "learning_rate": 3.6978233099823356e-06, "loss": 0.0346, "step": 70120 }, { "epoch": 0.2925995777386486, "grad_norm": 0.48799024939498864, "learning_rate": 3.6976914760443068e-06, "loss": 0.027, "step": 70125 }, { "epoch": 0.2926204404536389, "grad_norm": 0.8046533767453328, "learning_rate": 3.6975596562056115e-06, "loss": 0.0292, "step": 70130 }, { "epoch": 0.29264130316862913, "grad_norm": 0.5104566548376794, "learning_rate": 3.6974278504637374e-06, "loss": 0.0236, "step": 70135 }, { "epoch": 0.29266216588361943, "grad_norm": 0.9732304733408398, "learning_rate": 3.697296058816171e-06, "loss": 0.0256, "step": 70140 }, { "epoch": 0.29268302859860973, "grad_norm": 0.6734490132076955, "learning_rate": 3.6971642812604015e-06, "loss": 0.0327, "step": 70145 }, { "epoch": 0.2927038913136, "grad_norm": 0.8590637799305375, "learning_rate": 3.6970325177939164e-06, "loss": 0.0355, "step": 70150 }, { "epoch": 0.2927247540285903, "grad_norm": 1.1974226104527843, "learning_rate": 3.696900768414207e-06, "loss": 0.0396, "step": 70155 }, { "epoch": 0.2927456167435805, "grad_norm": 0.9512116587599462, "learning_rate": 3.6967690331187626e-06, "loss": 0.033, "step": 70160 }, { "epoch": 0.2927664794585708, "grad_norm": 0.9144193260245146, "learning_rate": 3.696637311905074e-06, "loss": 0.0285, "step": 70165 }, { "epoch": 0.2927873421735611, "grad_norm": 0.9356011627502901, "learning_rate": 3.696505604770633e-06, "loss": 0.03, "step": 70170 }, { "epoch": 0.29280820488855136, "grad_norm": 0.7091104245165472, "learning_rate": 3.6963739117129305e-06, "loss": 0.0312, "step": 70175 }, { "epoch": 0.29282906760354166, "grad_norm": 0.8182378361168863, "learning_rate": 3.696242232729461e-06, "loss": 0.033, "step": 70180 }, { "epoch": 0.29284993031853196, "grad_norm": 0.5778964000287429, "learning_rate": 3.6961105678177163e-06, "loss": 0.0291, "step": 70185 }, { "epoch": 0.2928707930335222, "grad_norm": 0.9543347211013712, "learning_rate": 3.6959789169751903e-06, "loss": 0.028, "step": 70190 }, { "epoch": 0.2928916557485125, "grad_norm": 2.0713141093123446, "learning_rate": 3.6958472801993782e-06, "loss": 0.0412, "step": 70195 }, { "epoch": 0.29291251846350275, "grad_norm": 1.2475305932799237, "learning_rate": 3.6957156574877758e-06, "loss": 0.0379, "step": 70200 }, { "epoch": 0.29293338117849305, "grad_norm": 0.5255899873658639, "learning_rate": 3.6955840488378773e-06, "loss": 0.0266, "step": 70205 }, { "epoch": 0.29295424389348335, "grad_norm": 0.7296252054181951, "learning_rate": 3.6954524542471796e-06, "loss": 0.0306, "step": 70210 }, { "epoch": 0.2929751066084736, "grad_norm": 0.8457319339897835, "learning_rate": 3.695320873713181e-06, "loss": 0.035, "step": 70215 }, { "epoch": 0.2929959693234639, "grad_norm": 0.8359981167597129, "learning_rate": 3.695189307233378e-06, "loss": 0.0328, "step": 70220 }, { "epoch": 0.29301683203845413, "grad_norm": 0.38004350336326365, "learning_rate": 3.695057754805268e-06, "loss": 0.0408, "step": 70225 }, { "epoch": 0.29303769475344443, "grad_norm": 0.6687435654628945, "learning_rate": 3.694926216426352e-06, "loss": 0.0244, "step": 70230 }, { "epoch": 0.29305855746843473, "grad_norm": 1.5665149546283144, "learning_rate": 3.6947946920941286e-06, "loss": 0.0321, "step": 70235 }, { "epoch": 0.293079420183425, "grad_norm": 0.678085355311407, "learning_rate": 3.6946631818060972e-06, "loss": 0.0214, "step": 70240 }, { "epoch": 0.2931002828984153, "grad_norm": 1.2622391054541593, "learning_rate": 3.694531685559759e-06, "loss": 0.0252, "step": 70245 }, { "epoch": 0.2931211456134055, "grad_norm": 0.8429308164231171, "learning_rate": 3.6944002033526166e-06, "loss": 0.0326, "step": 70250 }, { "epoch": 0.2931420083283958, "grad_norm": 0.8648500807487716, "learning_rate": 3.69426873518217e-06, "loss": 0.0311, "step": 70255 }, { "epoch": 0.2931628710433861, "grad_norm": 2.2200918183832274, "learning_rate": 3.6941372810459235e-06, "loss": 0.0303, "step": 70260 }, { "epoch": 0.29318373375837636, "grad_norm": 0.9833808024681383, "learning_rate": 3.69400584094138e-06, "loss": 0.0264, "step": 70265 }, { "epoch": 0.29320459647336666, "grad_norm": 0.6532071423399237, "learning_rate": 3.6938744148660425e-06, "loss": 0.0281, "step": 70270 }, { "epoch": 0.29322545918835696, "grad_norm": 0.5736990791620323, "learning_rate": 3.6937430028174167e-06, "loss": 0.0305, "step": 70275 }, { "epoch": 0.2932463219033472, "grad_norm": 1.3939796839080982, "learning_rate": 3.6936116047930065e-06, "loss": 0.0392, "step": 70280 }, { "epoch": 0.2932671846183375, "grad_norm": 0.4676364758379091, "learning_rate": 3.693480220790319e-06, "loss": 0.0252, "step": 70285 }, { "epoch": 0.29328804733332775, "grad_norm": 0.7959152077980001, "learning_rate": 3.6933488508068593e-06, "loss": 0.0268, "step": 70290 }, { "epoch": 0.29330891004831805, "grad_norm": 1.3749257469848453, "learning_rate": 3.6932174948401358e-06, "loss": 0.0271, "step": 70295 }, { "epoch": 0.29332977276330835, "grad_norm": 0.6911959846009016, "learning_rate": 3.6930861528876545e-06, "loss": 0.0261, "step": 70300 }, { "epoch": 0.2933506354782986, "grad_norm": 1.3169645276277056, "learning_rate": 3.6929548249469253e-06, "loss": 0.0332, "step": 70305 }, { "epoch": 0.2933714981932889, "grad_norm": 1.1190682688698514, "learning_rate": 3.6928235110154553e-06, "loss": 0.0285, "step": 70310 }, { "epoch": 0.29339236090827914, "grad_norm": 0.6787400559819373, "learning_rate": 3.6926922110907554e-06, "loss": 0.0245, "step": 70315 }, { "epoch": 0.29341322362326944, "grad_norm": 0.9722109932343512, "learning_rate": 3.692560925170335e-06, "loss": 0.0293, "step": 70320 }, { "epoch": 0.29343408633825974, "grad_norm": 0.5444331281686199, "learning_rate": 3.6924296532517046e-06, "loss": 0.0348, "step": 70325 }, { "epoch": 0.29345494905325, "grad_norm": 0.857176130811146, "learning_rate": 3.6922983953323766e-06, "loss": 0.0364, "step": 70330 }, { "epoch": 0.2934758117682403, "grad_norm": 0.9070494197694852, "learning_rate": 3.692167151409862e-06, "loss": 0.0327, "step": 70335 }, { "epoch": 0.2934966744832305, "grad_norm": 0.6585885762008105, "learning_rate": 3.692035921481673e-06, "loss": 0.032, "step": 70340 }, { "epoch": 0.2935175371982208, "grad_norm": 0.6344273873952389, "learning_rate": 3.691904705545324e-06, "loss": 0.0338, "step": 70345 }, { "epoch": 0.2935383999132111, "grad_norm": 1.1093138554980164, "learning_rate": 3.691773503598328e-06, "loss": 0.0366, "step": 70350 }, { "epoch": 0.29355926262820137, "grad_norm": 0.8352573395129594, "learning_rate": 3.6916423156381997e-06, "loss": 0.0312, "step": 70355 }, { "epoch": 0.29358012534319167, "grad_norm": 1.002487766730689, "learning_rate": 3.6915111416624545e-06, "loss": 0.0385, "step": 70360 }, { "epoch": 0.29360098805818197, "grad_norm": 0.6745723298459657, "learning_rate": 3.6913799816686076e-06, "loss": 0.0264, "step": 70365 }, { "epoch": 0.2936218507731722, "grad_norm": 1.3342857414277147, "learning_rate": 3.6912488356541747e-06, "loss": 0.0378, "step": 70370 }, { "epoch": 0.2936427134881625, "grad_norm": 0.9476973897111487, "learning_rate": 3.6911177036166735e-06, "loss": 0.0344, "step": 70375 }, { "epoch": 0.29366357620315275, "grad_norm": 1.0734244534089015, "learning_rate": 3.6909865855536214e-06, "loss": 0.0287, "step": 70380 }, { "epoch": 0.29368443891814305, "grad_norm": 0.7530276682131736, "learning_rate": 3.6908554814625375e-06, "loss": 0.0249, "step": 70385 }, { "epoch": 0.29370530163313335, "grad_norm": 0.8427006389627267, "learning_rate": 3.6907243913409385e-06, "loss": 0.0369, "step": 70390 }, { "epoch": 0.2937261643481236, "grad_norm": 0.522607909547364, "learning_rate": 3.6905933151863454e-06, "loss": 0.025, "step": 70395 }, { "epoch": 0.2937470270631139, "grad_norm": 0.4344199713136493, "learning_rate": 3.6904622529962763e-06, "loss": 0.0332, "step": 70400 }, { "epoch": 0.29376788977810414, "grad_norm": 0.7345595036630123, "learning_rate": 3.6903312047682544e-06, "loss": 0.0226, "step": 70405 }, { "epoch": 0.29378875249309444, "grad_norm": 0.5561106649920957, "learning_rate": 3.6902001704997992e-06, "loss": 0.029, "step": 70410 }, { "epoch": 0.29380961520808474, "grad_norm": 0.9804018226786662, "learning_rate": 3.690069150188433e-06, "loss": 0.0355, "step": 70415 }, { "epoch": 0.293830477923075, "grad_norm": 0.6308834244467296, "learning_rate": 3.689938143831678e-06, "loss": 0.0288, "step": 70420 }, { "epoch": 0.2938513406380653, "grad_norm": 0.5108703086595038, "learning_rate": 3.689807151427057e-06, "loss": 0.0262, "step": 70425 }, { "epoch": 0.29387220335305553, "grad_norm": 1.420264678533496, "learning_rate": 3.6896761729720952e-06, "loss": 0.0329, "step": 70430 }, { "epoch": 0.2938930660680458, "grad_norm": 0.7955796815427263, "learning_rate": 3.689545208464315e-06, "loss": 0.0277, "step": 70435 }, { "epoch": 0.2939139287830361, "grad_norm": 1.077206597288656, "learning_rate": 3.6894142579012428e-06, "loss": 0.043, "step": 70440 }, { "epoch": 0.29393479149802637, "grad_norm": 0.6996029159749415, "learning_rate": 3.689283321280403e-06, "loss": 0.0225, "step": 70445 }, { "epoch": 0.29395565421301667, "grad_norm": 0.617295186492546, "learning_rate": 3.689152398599322e-06, "loss": 0.0359, "step": 70450 }, { "epoch": 0.29397651692800697, "grad_norm": 0.8620298936827094, "learning_rate": 3.689021489855527e-06, "loss": 0.0259, "step": 70455 }, { "epoch": 0.2939973796429972, "grad_norm": 0.983904457458904, "learning_rate": 3.6888905950465456e-06, "loss": 0.0299, "step": 70460 }, { "epoch": 0.2940182423579875, "grad_norm": 0.7444879093622605, "learning_rate": 3.6887597141699046e-06, "loss": 0.0276, "step": 70465 }, { "epoch": 0.29403910507297776, "grad_norm": 0.5973751179463198, "learning_rate": 3.6886288472231333e-06, "loss": 0.0377, "step": 70470 }, { "epoch": 0.29405996778796806, "grad_norm": 1.0912895334902926, "learning_rate": 3.6884979942037615e-06, "loss": 0.0311, "step": 70475 }, { "epoch": 0.29408083050295836, "grad_norm": 1.6487940500527045, "learning_rate": 3.688367155109318e-06, "loss": 0.0493, "step": 70480 }, { "epoch": 0.2941016932179486, "grad_norm": 1.3936927463760929, "learning_rate": 3.6882363299373344e-06, "loss": 0.0387, "step": 70485 }, { "epoch": 0.2941225559329389, "grad_norm": 0.5798235841348703, "learning_rate": 3.6881055186853397e-06, "loss": 0.0323, "step": 70490 }, { "epoch": 0.29414341864792914, "grad_norm": 0.8420751138695818, "learning_rate": 3.687974721350867e-06, "loss": 0.0334, "step": 70495 }, { "epoch": 0.29416428136291944, "grad_norm": 0.6758510593628551, "learning_rate": 3.6878439379314496e-06, "loss": 0.0376, "step": 70500 }, { "epoch": 0.29418514407790974, "grad_norm": 0.7388162725917432, "learning_rate": 3.6877131684246187e-06, "loss": 0.0325, "step": 70505 }, { "epoch": 0.2942060067929, "grad_norm": 0.5530392527235872, "learning_rate": 3.687582412827908e-06, "loss": 0.0218, "step": 70510 }, { "epoch": 0.2942268695078903, "grad_norm": 0.6868045473251397, "learning_rate": 3.6874516711388524e-06, "loss": 0.0324, "step": 70515 }, { "epoch": 0.29424773222288053, "grad_norm": 0.705214614984318, "learning_rate": 3.687320943354986e-06, "loss": 0.0296, "step": 70520 }, { "epoch": 0.29426859493787083, "grad_norm": 0.4716997348606151, "learning_rate": 3.6871902294738443e-06, "loss": 0.0273, "step": 70525 }, { "epoch": 0.29428945765286113, "grad_norm": 0.9156972043177242, "learning_rate": 3.687059529492963e-06, "loss": 0.0344, "step": 70530 }, { "epoch": 0.2943103203678514, "grad_norm": 0.8163972749694866, "learning_rate": 3.68692884340988e-06, "loss": 0.0347, "step": 70535 }, { "epoch": 0.2943311830828417, "grad_norm": 0.8116345138914738, "learning_rate": 3.68679817122213e-06, "loss": 0.0303, "step": 70540 }, { "epoch": 0.294352045797832, "grad_norm": 0.6996446196654292, "learning_rate": 3.6866675129272526e-06, "loss": 0.0229, "step": 70545 }, { "epoch": 0.2943729085128222, "grad_norm": 0.7492747343508773, "learning_rate": 3.686536868522786e-06, "loss": 0.0279, "step": 70550 }, { "epoch": 0.2943937712278125, "grad_norm": 1.325311215011333, "learning_rate": 3.6864062380062693e-06, "loss": 0.0353, "step": 70555 }, { "epoch": 0.29441463394280276, "grad_norm": 1.1627236076456569, "learning_rate": 3.686275621375241e-06, "loss": 0.0335, "step": 70560 }, { "epoch": 0.29443549665779306, "grad_norm": 1.3307182172855292, "learning_rate": 3.686145018627243e-06, "loss": 0.0496, "step": 70565 }, { "epoch": 0.29445635937278336, "grad_norm": 1.4228150516031783, "learning_rate": 3.686014429759815e-06, "loss": 0.0254, "step": 70570 }, { "epoch": 0.2944772220877736, "grad_norm": 0.8336145581070711, "learning_rate": 3.685883854770498e-06, "loss": 0.0276, "step": 70575 }, { "epoch": 0.2944980848027639, "grad_norm": 0.5256298855317747, "learning_rate": 3.6857532936568353e-06, "loss": 0.0255, "step": 70580 }, { "epoch": 0.29451894751775415, "grad_norm": 1.0485435797716542, "learning_rate": 3.6856227464163684e-06, "loss": 0.0293, "step": 70585 }, { "epoch": 0.29453981023274445, "grad_norm": 0.7287531844176839, "learning_rate": 3.6854922130466416e-06, "loss": 0.0293, "step": 70590 }, { "epoch": 0.29456067294773475, "grad_norm": 0.9142180942204131, "learning_rate": 3.6853616935451993e-06, "loss": 0.0275, "step": 70595 }, { "epoch": 0.294581535662725, "grad_norm": 0.8293486310412602, "learning_rate": 3.685231187909584e-06, "loss": 0.0272, "step": 70600 }, { "epoch": 0.2946023983777153, "grad_norm": 0.9522783038272087, "learning_rate": 3.6851006961373414e-06, "loss": 0.0346, "step": 70605 }, { "epoch": 0.29462326109270554, "grad_norm": 0.5284616962029706, "learning_rate": 3.6849702182260185e-06, "loss": 0.0283, "step": 70610 }, { "epoch": 0.29464412380769583, "grad_norm": 1.9195986459510463, "learning_rate": 3.6848397541731605e-06, "loss": 0.0297, "step": 70615 }, { "epoch": 0.29466498652268613, "grad_norm": 0.8869439954228452, "learning_rate": 3.6847093039763143e-06, "loss": 0.0322, "step": 70620 }, { "epoch": 0.2946858492376764, "grad_norm": 1.5568190110539977, "learning_rate": 3.684578867633028e-06, "loss": 0.0373, "step": 70625 }, { "epoch": 0.2947067119526667, "grad_norm": 0.8870927438329307, "learning_rate": 3.684448445140849e-06, "loss": 0.0278, "step": 70630 }, { "epoch": 0.294727574667657, "grad_norm": 0.8505911884486085, "learning_rate": 3.6843180364973276e-06, "loss": 0.0448, "step": 70635 }, { "epoch": 0.2947484373826472, "grad_norm": 0.6770620768603682, "learning_rate": 3.684187641700011e-06, "loss": 0.024, "step": 70640 }, { "epoch": 0.2947693000976375, "grad_norm": 1.1445608723533198, "learning_rate": 3.6840572607464503e-06, "loss": 0.0351, "step": 70645 }, { "epoch": 0.29479016281262777, "grad_norm": 0.704901852444101, "learning_rate": 3.6839268936341965e-06, "loss": 0.0294, "step": 70650 }, { "epoch": 0.29481102552761806, "grad_norm": 0.8557264433019275, "learning_rate": 3.6837965403608e-06, "loss": 0.0259, "step": 70655 }, { "epoch": 0.29483188824260836, "grad_norm": 0.7701187355980887, "learning_rate": 3.683666200923813e-06, "loss": 0.0354, "step": 70660 }, { "epoch": 0.2948527509575986, "grad_norm": 0.7681820304895153, "learning_rate": 3.683535875320787e-06, "loss": 0.0319, "step": 70665 }, { "epoch": 0.2948736136725889, "grad_norm": 1.0505410990351975, "learning_rate": 3.683405563549276e-06, "loss": 0.0407, "step": 70670 }, { "epoch": 0.29489447638757915, "grad_norm": 0.654977367333654, "learning_rate": 3.683275265606834e-06, "loss": 0.0259, "step": 70675 }, { "epoch": 0.29491533910256945, "grad_norm": 1.0495128289057472, "learning_rate": 3.6831449814910135e-06, "loss": 0.0281, "step": 70680 }, { "epoch": 0.29493620181755975, "grad_norm": 1.0821145634986542, "learning_rate": 3.6830147111993715e-06, "loss": 0.0281, "step": 70685 }, { "epoch": 0.29495706453255, "grad_norm": 3.2388226363812174, "learning_rate": 3.682884454729461e-06, "loss": 0.0368, "step": 70690 }, { "epoch": 0.2949779272475403, "grad_norm": 0.7564883917399559, "learning_rate": 3.68275421207884e-06, "loss": 0.0269, "step": 70695 }, { "epoch": 0.29499878996253054, "grad_norm": 0.8218808661961278, "learning_rate": 3.682623983245064e-06, "loss": 0.0328, "step": 70700 }, { "epoch": 0.29501965267752084, "grad_norm": 0.6488512238872887, "learning_rate": 3.6824937682256904e-06, "loss": 0.0217, "step": 70705 }, { "epoch": 0.29504051539251114, "grad_norm": 1.6337409449648674, "learning_rate": 3.682363567018278e-06, "loss": 0.0381, "step": 70710 }, { "epoch": 0.2950613781075014, "grad_norm": 1.0939573659741852, "learning_rate": 3.6822333796203844e-06, "loss": 0.0275, "step": 70715 }, { "epoch": 0.2950822408224917, "grad_norm": 0.5787423360549723, "learning_rate": 3.6821032060295685e-06, "loss": 0.0289, "step": 70720 }, { "epoch": 0.295103103537482, "grad_norm": 1.0531676662151304, "learning_rate": 3.6819730462433893e-06, "loss": 0.0299, "step": 70725 }, { "epoch": 0.2951239662524722, "grad_norm": 0.5637837764479633, "learning_rate": 3.681842900259409e-06, "loss": 0.0234, "step": 70730 }, { "epoch": 0.2951448289674625, "grad_norm": 1.0686604678177374, "learning_rate": 3.681712768075187e-06, "loss": 0.0397, "step": 70735 }, { "epoch": 0.29516569168245277, "grad_norm": 0.788503730067806, "learning_rate": 3.6815826496882855e-06, "loss": 0.0225, "step": 70740 }, { "epoch": 0.29518655439744307, "grad_norm": 0.6636067101997557, "learning_rate": 3.681452545096266e-06, "loss": 0.0235, "step": 70745 }, { "epoch": 0.29520741711243337, "grad_norm": 0.7517737659562466, "learning_rate": 3.681322454296691e-06, "loss": 0.0219, "step": 70750 }, { "epoch": 0.2952282798274236, "grad_norm": 0.7872310693899353, "learning_rate": 3.6811923772871245e-06, "loss": 0.0372, "step": 70755 }, { "epoch": 0.2952491425424139, "grad_norm": 1.1634831703626645, "learning_rate": 3.6810623140651297e-06, "loss": 0.0351, "step": 70760 }, { "epoch": 0.29527000525740416, "grad_norm": 0.8803582705153602, "learning_rate": 3.6809322646282714e-06, "loss": 0.0261, "step": 70765 }, { "epoch": 0.29529086797239446, "grad_norm": 0.6148026192065573, "learning_rate": 3.680802228974115e-06, "loss": 0.0297, "step": 70770 }, { "epoch": 0.29531173068738475, "grad_norm": 0.8180708096766562, "learning_rate": 3.680672207100226e-06, "loss": 0.0282, "step": 70775 }, { "epoch": 0.295332593402375, "grad_norm": 0.9031129791178698, "learning_rate": 3.68054219900417e-06, "loss": 0.034, "step": 70780 }, { "epoch": 0.2953534561173653, "grad_norm": 0.9695580515852729, "learning_rate": 3.6804122046835143e-06, "loss": 0.0308, "step": 70785 }, { "epoch": 0.29537431883235554, "grad_norm": 0.43663645462172646, "learning_rate": 3.6802822241358264e-06, "loss": 0.0277, "step": 70790 }, { "epoch": 0.29539518154734584, "grad_norm": 1.232801137585031, "learning_rate": 3.6801522573586747e-06, "loss": 0.0318, "step": 70795 }, { "epoch": 0.29541604426233614, "grad_norm": 0.551146099774068, "learning_rate": 3.6800223043496276e-06, "loss": 0.042, "step": 70800 }, { "epoch": 0.2954369069773264, "grad_norm": 0.8173332595374605, "learning_rate": 3.6798923651062546e-06, "loss": 0.0301, "step": 70805 }, { "epoch": 0.2954577696923167, "grad_norm": 0.540589158469023, "learning_rate": 3.6797624396261256e-06, "loss": 0.0294, "step": 70810 }, { "epoch": 0.295478632407307, "grad_norm": 0.5006521863749259, "learning_rate": 3.67963252790681e-06, "loss": 0.0296, "step": 70815 }, { "epoch": 0.29549949512229723, "grad_norm": 1.1579590049354167, "learning_rate": 3.6795026299458798e-06, "loss": 0.0373, "step": 70820 }, { "epoch": 0.29552035783728753, "grad_norm": 0.9152096521633496, "learning_rate": 3.6793727457409078e-06, "loss": 0.0295, "step": 70825 }, { "epoch": 0.2955412205522778, "grad_norm": 1.0199513876711046, "learning_rate": 3.6792428752894643e-06, "loss": 0.0238, "step": 70830 }, { "epoch": 0.29556208326726807, "grad_norm": 1.3843087677144243, "learning_rate": 3.679113018589123e-06, "loss": 0.0462, "step": 70835 }, { "epoch": 0.29558294598225837, "grad_norm": 0.6713881344060179, "learning_rate": 3.6789831756374574e-06, "loss": 0.0267, "step": 70840 }, { "epoch": 0.2956038086972486, "grad_norm": 1.161085310782677, "learning_rate": 3.678853346432042e-06, "loss": 0.0357, "step": 70845 }, { "epoch": 0.2956246714122389, "grad_norm": 0.8268683765963386, "learning_rate": 3.6787235309704504e-06, "loss": 0.0382, "step": 70850 }, { "epoch": 0.29564553412722916, "grad_norm": 0.6561571192583696, "learning_rate": 3.6785937292502595e-06, "loss": 0.0291, "step": 70855 }, { "epoch": 0.29566639684221946, "grad_norm": 1.1593327390423347, "learning_rate": 3.678463941269043e-06, "loss": 0.0325, "step": 70860 }, { "epoch": 0.29568725955720976, "grad_norm": 0.6943223590454756, "learning_rate": 3.6783341670243805e-06, "loss": 0.0251, "step": 70865 }, { "epoch": 0.2957081222722, "grad_norm": 0.8924401567318909, "learning_rate": 3.678204406513846e-06, "loss": 0.0346, "step": 70870 }, { "epoch": 0.2957289849871903, "grad_norm": 0.8150039921638772, "learning_rate": 3.678074659735018e-06, "loss": 0.0312, "step": 70875 }, { "epoch": 0.29574984770218055, "grad_norm": 0.5767539136264896, "learning_rate": 3.677944926685476e-06, "loss": 0.0207, "step": 70880 }, { "epoch": 0.29577071041717085, "grad_norm": 0.8940968366432007, "learning_rate": 3.677815207362798e-06, "loss": 0.0244, "step": 70885 }, { "epoch": 0.29579157313216115, "grad_norm": 0.5540861936664196, "learning_rate": 3.677685501764563e-06, "loss": 0.0333, "step": 70890 }, { "epoch": 0.2958124358471514, "grad_norm": 0.6906854246381694, "learning_rate": 3.677555809888352e-06, "loss": 0.0336, "step": 70895 }, { "epoch": 0.2958332985621417, "grad_norm": 1.123099077242058, "learning_rate": 3.677426131731745e-06, "loss": 0.0276, "step": 70900 }, { "epoch": 0.295854161277132, "grad_norm": 0.38298263034354896, "learning_rate": 3.6772964672923233e-06, "loss": 0.0218, "step": 70905 }, { "epoch": 0.29587502399212223, "grad_norm": 0.7676864984133912, "learning_rate": 3.6771668165676692e-06, "loss": 0.0286, "step": 70910 }, { "epoch": 0.29589588670711253, "grad_norm": 0.4320550879755172, "learning_rate": 3.677037179555365e-06, "loss": 0.0254, "step": 70915 }, { "epoch": 0.2959167494221028, "grad_norm": 1.08701944251868, "learning_rate": 3.6769075562529937e-06, "loss": 0.0498, "step": 70920 }, { "epoch": 0.2959376121370931, "grad_norm": 0.6011681526618714, "learning_rate": 3.6767779466581395e-06, "loss": 0.0287, "step": 70925 }, { "epoch": 0.2959584748520834, "grad_norm": 0.8524536234245625, "learning_rate": 3.6766483507683846e-06, "loss": 0.0373, "step": 70930 }, { "epoch": 0.2959793375670736, "grad_norm": 1.553705767695468, "learning_rate": 3.6765187685813168e-06, "loss": 0.0277, "step": 70935 }, { "epoch": 0.2960002002820639, "grad_norm": 1.802688281079245, "learning_rate": 3.676389200094519e-06, "loss": 0.0358, "step": 70940 }, { "epoch": 0.29602106299705416, "grad_norm": 0.39858857136007203, "learning_rate": 3.676259645305579e-06, "loss": 0.0156, "step": 70945 }, { "epoch": 0.29604192571204446, "grad_norm": 1.0365952928316742, "learning_rate": 3.676130104212083e-06, "loss": 0.0341, "step": 70950 }, { "epoch": 0.29606278842703476, "grad_norm": 0.609574251736491, "learning_rate": 3.676000576811617e-06, "loss": 0.0369, "step": 70955 }, { "epoch": 0.296083651142025, "grad_norm": 0.6299108925859939, "learning_rate": 3.67587106310177e-06, "loss": 0.0299, "step": 70960 }, { "epoch": 0.2961045138570153, "grad_norm": 0.7368529947673838, "learning_rate": 3.67574156308013e-06, "loss": 0.0264, "step": 70965 }, { "epoch": 0.29612537657200555, "grad_norm": 0.870510067290996, "learning_rate": 3.6756120767442876e-06, "loss": 0.0372, "step": 70970 }, { "epoch": 0.29614623928699585, "grad_norm": 0.7279967605867909, "learning_rate": 3.67548260409183e-06, "loss": 0.0263, "step": 70975 }, { "epoch": 0.29616710200198615, "grad_norm": 0.6802405904431597, "learning_rate": 3.6753531451203483e-06, "loss": 0.0282, "step": 70980 }, { "epoch": 0.2961879647169764, "grad_norm": 0.8205880958480413, "learning_rate": 3.675223699827433e-06, "loss": 0.027, "step": 70985 }, { "epoch": 0.2962088274319667, "grad_norm": 0.690606799058399, "learning_rate": 3.6750942682106767e-06, "loss": 0.025, "step": 70990 }, { "epoch": 0.296229690146957, "grad_norm": 1.0226356042916565, "learning_rate": 3.6749648502676698e-06, "loss": 0.0426, "step": 70995 }, { "epoch": 0.29625055286194724, "grad_norm": 1.7829483193167284, "learning_rate": 3.6748354459960062e-06, "loss": 0.0336, "step": 71000 }, { "epoch": 0.29627141557693754, "grad_norm": 0.6536866580253832, "learning_rate": 3.6747060553932784e-06, "loss": 0.0287, "step": 71005 }, { "epoch": 0.2962922782919278, "grad_norm": 0.7639548976737396, "learning_rate": 3.6745766784570804e-06, "loss": 0.0291, "step": 71010 }, { "epoch": 0.2963131410069181, "grad_norm": 1.1480344689739153, "learning_rate": 3.6744473151850057e-06, "loss": 0.0278, "step": 71015 }, { "epoch": 0.2963340037219084, "grad_norm": 0.9100129782156566, "learning_rate": 3.674317965574651e-06, "loss": 0.0249, "step": 71020 }, { "epoch": 0.2963548664368986, "grad_norm": 1.1841796715258992, "learning_rate": 3.6741886296236103e-06, "loss": 0.0355, "step": 71025 }, { "epoch": 0.2963757291518889, "grad_norm": 0.5144676360111751, "learning_rate": 3.67405930732948e-06, "loss": 0.0273, "step": 71030 }, { "epoch": 0.29639659186687917, "grad_norm": 1.2004137302531057, "learning_rate": 3.673929998689857e-06, "loss": 0.0318, "step": 71035 }, { "epoch": 0.29641745458186947, "grad_norm": 0.7164411075186005, "learning_rate": 3.673800703702339e-06, "loss": 0.0235, "step": 71040 }, { "epoch": 0.29643831729685977, "grad_norm": 1.1965078925110584, "learning_rate": 3.673671422364523e-06, "loss": 0.0345, "step": 71045 }, { "epoch": 0.29645918001185, "grad_norm": 1.5392834293851716, "learning_rate": 3.673542154674009e-06, "loss": 0.0363, "step": 71050 }, { "epoch": 0.2964800427268403, "grad_norm": 0.7421147904627213, "learning_rate": 3.6734129006283943e-06, "loss": 0.0336, "step": 71055 }, { "epoch": 0.29650090544183055, "grad_norm": 1.0467927598017739, "learning_rate": 3.67328366022528e-06, "loss": 0.033, "step": 71060 }, { "epoch": 0.29652176815682085, "grad_norm": 1.1131153371335474, "learning_rate": 3.673154433462265e-06, "loss": 0.0338, "step": 71065 }, { "epoch": 0.29654263087181115, "grad_norm": 0.8217095409545744, "learning_rate": 3.6730252203369516e-06, "loss": 0.0373, "step": 71070 }, { "epoch": 0.2965634935868014, "grad_norm": 0.5059387950178618, "learning_rate": 3.6728960208469406e-06, "loss": 0.0252, "step": 71075 }, { "epoch": 0.2965843563017917, "grad_norm": 0.5474537154733692, "learning_rate": 3.6727668349898342e-06, "loss": 0.0226, "step": 71080 }, { "epoch": 0.29660521901678194, "grad_norm": 0.8748767456308529, "learning_rate": 3.672637662763234e-06, "loss": 0.0372, "step": 71085 }, { "epoch": 0.29662608173177224, "grad_norm": 0.7824074710867542, "learning_rate": 3.6725085041647453e-06, "loss": 0.0306, "step": 71090 }, { "epoch": 0.29664694444676254, "grad_norm": 0.9432579878498849, "learning_rate": 3.6723793591919702e-06, "loss": 0.0317, "step": 71095 }, { "epoch": 0.2966678071617528, "grad_norm": 0.8038752396183072, "learning_rate": 3.6722502278425144e-06, "loss": 0.0339, "step": 71100 }, { "epoch": 0.2966886698767431, "grad_norm": 0.7941119812280225, "learning_rate": 3.6721211101139815e-06, "loss": 0.0164, "step": 71105 }, { "epoch": 0.2967095325917334, "grad_norm": 1.2339201733011773, "learning_rate": 3.671992006003978e-06, "loss": 0.0319, "step": 71110 }, { "epoch": 0.2967303953067236, "grad_norm": 1.9409447609725252, "learning_rate": 3.671862915510109e-06, "loss": 0.035, "step": 71115 }, { "epoch": 0.2967512580217139, "grad_norm": 1.1625286525395613, "learning_rate": 3.671733838629982e-06, "loss": 0.0325, "step": 71120 }, { "epoch": 0.29677212073670417, "grad_norm": 1.3182169924921185, "learning_rate": 3.6716047753612055e-06, "loss": 0.0256, "step": 71125 }, { "epoch": 0.29679298345169447, "grad_norm": 0.9543218120957279, "learning_rate": 3.6714757257013855e-06, "loss": 0.0303, "step": 71130 }, { "epoch": 0.29681384616668477, "grad_norm": 0.6023823990695593, "learning_rate": 3.671346689648132e-06, "loss": 0.0219, "step": 71135 }, { "epoch": 0.296834708881675, "grad_norm": 0.7138239998036959, "learning_rate": 3.6712176671990535e-06, "loss": 0.0332, "step": 71140 }, { "epoch": 0.2968555715966653, "grad_norm": 0.9379981552041814, "learning_rate": 3.6710886583517593e-06, "loss": 0.0308, "step": 71145 }, { "epoch": 0.29687643431165556, "grad_norm": 0.7135310139049936, "learning_rate": 3.67095966310386e-06, "loss": 0.0248, "step": 71150 }, { "epoch": 0.29689729702664586, "grad_norm": 0.8413390449293686, "learning_rate": 3.6708306814529664e-06, "loss": 0.0294, "step": 71155 }, { "epoch": 0.29691815974163616, "grad_norm": 0.8293446187041347, "learning_rate": 3.6707017133966906e-06, "loss": 0.0223, "step": 71160 }, { "epoch": 0.2969390224566264, "grad_norm": 0.6738041030771323, "learning_rate": 3.6705727589326444e-06, "loss": 0.0353, "step": 71165 }, { "epoch": 0.2969598851716167, "grad_norm": 1.400014472583279, "learning_rate": 3.67044381805844e-06, "loss": 0.0332, "step": 71170 }, { "epoch": 0.29698074788660694, "grad_norm": 0.7020847207763179, "learning_rate": 3.6703148907716905e-06, "loss": 0.0331, "step": 71175 }, { "epoch": 0.29700161060159724, "grad_norm": 0.8450172498072656, "learning_rate": 3.670185977070011e-06, "loss": 0.034, "step": 71180 }, { "epoch": 0.29702247331658754, "grad_norm": 0.5649356251725233, "learning_rate": 3.6700570769510142e-06, "loss": 0.0238, "step": 71185 }, { "epoch": 0.2970433360315778, "grad_norm": 0.9306859672268437, "learning_rate": 3.669928190412316e-06, "loss": 0.0359, "step": 71190 }, { "epoch": 0.2970641987465681, "grad_norm": 1.618448254079124, "learning_rate": 3.6697993174515325e-06, "loss": 0.0221, "step": 71195 }, { "epoch": 0.2970850614615584, "grad_norm": 0.8538094444705101, "learning_rate": 3.6696704580662778e-06, "loss": 0.0303, "step": 71200 }, { "epoch": 0.29710592417654863, "grad_norm": 0.41029990057858073, "learning_rate": 3.6695416122541717e-06, "loss": 0.032, "step": 71205 }, { "epoch": 0.29712678689153893, "grad_norm": 1.9589611964847764, "learning_rate": 3.669412780012828e-06, "loss": 0.0327, "step": 71210 }, { "epoch": 0.2971476496065292, "grad_norm": 1.486268863912822, "learning_rate": 3.6692839613398685e-06, "loss": 0.0399, "step": 71215 }, { "epoch": 0.2971685123215195, "grad_norm": 0.5724403298687275, "learning_rate": 3.6691551562329086e-06, "loss": 0.0277, "step": 71220 }, { "epoch": 0.2971893750365098, "grad_norm": 0.9702529999070553, "learning_rate": 3.669026364689569e-06, "loss": 0.0354, "step": 71225 }, { "epoch": 0.2972102377515, "grad_norm": 0.5702437277510071, "learning_rate": 3.6688975867074687e-06, "loss": 0.0246, "step": 71230 }, { "epoch": 0.2972311004664903, "grad_norm": 0.7197784167142577, "learning_rate": 3.6687688222842275e-06, "loss": 0.0255, "step": 71235 }, { "epoch": 0.29725196318148056, "grad_norm": 0.6917639642726063, "learning_rate": 3.6686400714174675e-06, "loss": 0.0231, "step": 71240 }, { "epoch": 0.29727282589647086, "grad_norm": 1.386012932286619, "learning_rate": 3.6685113341048097e-06, "loss": 0.0356, "step": 71245 }, { "epoch": 0.29729368861146116, "grad_norm": 0.865870862958744, "learning_rate": 3.6683826103438756e-06, "loss": 0.0334, "step": 71250 }, { "epoch": 0.2973145513264514, "grad_norm": 0.7242620832226593, "learning_rate": 3.6682539001322885e-06, "loss": 0.0221, "step": 71255 }, { "epoch": 0.2973354140414417, "grad_norm": 0.9633157719022093, "learning_rate": 3.6681252034676707e-06, "loss": 0.0343, "step": 71260 }, { "epoch": 0.29735627675643195, "grad_norm": 0.9185032200628789, "learning_rate": 3.6679965203476464e-06, "loss": 0.0333, "step": 71265 }, { "epoch": 0.29737713947142225, "grad_norm": 0.6534696318920443, "learning_rate": 3.667867850769841e-06, "loss": 0.0384, "step": 71270 }, { "epoch": 0.29739800218641255, "grad_norm": 0.7730938429717099, "learning_rate": 3.6677391947318773e-06, "loss": 0.0352, "step": 71275 }, { "epoch": 0.2974188649014028, "grad_norm": 0.661745856097346, "learning_rate": 3.6676105522313827e-06, "loss": 0.0259, "step": 71280 }, { "epoch": 0.2974397276163931, "grad_norm": 0.6274787283959978, "learning_rate": 3.6674819232659825e-06, "loss": 0.0354, "step": 71285 }, { "epoch": 0.2974605903313834, "grad_norm": 0.7123803978413399, "learning_rate": 3.667353307833303e-06, "loss": 0.0343, "step": 71290 }, { "epoch": 0.29748145304637363, "grad_norm": 0.8610580420868996, "learning_rate": 3.667224705930972e-06, "loss": 0.0513, "step": 71295 }, { "epoch": 0.29750231576136393, "grad_norm": 0.5177243728414611, "learning_rate": 3.6670961175566173e-06, "loss": 0.0305, "step": 71300 }, { "epoch": 0.2975231784763542, "grad_norm": 0.5879214903635689, "learning_rate": 3.666967542707867e-06, "loss": 0.0256, "step": 71305 }, { "epoch": 0.2975440411913445, "grad_norm": 0.6573634378590494, "learning_rate": 3.666838981382351e-06, "loss": 0.0295, "step": 71310 }, { "epoch": 0.2975649039063348, "grad_norm": 1.2271500914495332, "learning_rate": 3.666710433577698e-06, "loss": 0.0318, "step": 71315 }, { "epoch": 0.297585766621325, "grad_norm": 0.9682925563615756, "learning_rate": 3.666581899291538e-06, "loss": 0.0327, "step": 71320 }, { "epoch": 0.2976066293363153, "grad_norm": 0.8017394560956722, "learning_rate": 3.666453378521503e-06, "loss": 0.0232, "step": 71325 }, { "epoch": 0.29762749205130556, "grad_norm": 0.4296519018698793, "learning_rate": 3.6663248712652226e-06, "loss": 0.0228, "step": 71330 }, { "epoch": 0.29764835476629586, "grad_norm": 0.7915009363245276, "learning_rate": 3.6661963775203307e-06, "loss": 0.0317, "step": 71335 }, { "epoch": 0.29766921748128616, "grad_norm": 0.4821421632782023, "learning_rate": 3.6660678972844576e-06, "loss": 0.0237, "step": 71340 }, { "epoch": 0.2976900801962764, "grad_norm": 0.6711028203266568, "learning_rate": 3.6659394305552386e-06, "loss": 0.0235, "step": 71345 }, { "epoch": 0.2977109429112667, "grad_norm": 1.401143679940764, "learning_rate": 3.665810977330305e-06, "loss": 0.0498, "step": 71350 }, { "epoch": 0.29773180562625695, "grad_norm": 0.636568839097064, "learning_rate": 3.6656825376072934e-06, "loss": 0.0344, "step": 71355 }, { "epoch": 0.29775266834124725, "grad_norm": 0.7338522709190838, "learning_rate": 3.6655541113838374e-06, "loss": 0.0242, "step": 71360 }, { "epoch": 0.29777353105623755, "grad_norm": 0.94366932705722, "learning_rate": 3.6654256986575728e-06, "loss": 0.0359, "step": 71365 }, { "epoch": 0.2977943937712278, "grad_norm": 0.8185359649465068, "learning_rate": 3.665297299426135e-06, "loss": 0.0371, "step": 71370 }, { "epoch": 0.2978152564862181, "grad_norm": 0.8395726812842882, "learning_rate": 3.6651689136871604e-06, "loss": 0.0261, "step": 71375 }, { "epoch": 0.2978361192012084, "grad_norm": 0.6779691564842253, "learning_rate": 3.6650405414382867e-06, "loss": 0.0239, "step": 71380 }, { "epoch": 0.29785698191619864, "grad_norm": 0.6114575583229097, "learning_rate": 3.6649121826771522e-06, "loss": 0.027, "step": 71385 }, { "epoch": 0.29787784463118894, "grad_norm": 0.8613135038520219, "learning_rate": 3.664783837401394e-06, "loss": 0.0342, "step": 71390 }, { "epoch": 0.2978987073461792, "grad_norm": 0.6862660358202417, "learning_rate": 3.6646555056086513e-06, "loss": 0.0277, "step": 71395 }, { "epoch": 0.2979195700611695, "grad_norm": 1.5499605976023976, "learning_rate": 3.664527187296564e-06, "loss": 0.031, "step": 71400 }, { "epoch": 0.2979404327761598, "grad_norm": 1.3729373571449717, "learning_rate": 3.6643988824627718e-06, "loss": 0.0357, "step": 71405 }, { "epoch": 0.29796129549115, "grad_norm": 1.138964256097481, "learning_rate": 3.664270591104915e-06, "loss": 0.0377, "step": 71410 }, { "epoch": 0.2979821582061403, "grad_norm": 0.712531533583787, "learning_rate": 3.6641423132206363e-06, "loss": 0.0314, "step": 71415 }, { "epoch": 0.29800302092113057, "grad_norm": 0.4813266553693639, "learning_rate": 3.6640140488075753e-06, "loss": 0.0249, "step": 71420 }, { "epoch": 0.29802388363612087, "grad_norm": 0.6413737032620145, "learning_rate": 3.6638857978633763e-06, "loss": 0.0279, "step": 71425 }, { "epoch": 0.29804474635111117, "grad_norm": 0.6472278799231145, "learning_rate": 3.6637575603856805e-06, "loss": 0.0433, "step": 71430 }, { "epoch": 0.2980656090661014, "grad_norm": 0.9260337389018394, "learning_rate": 3.663629336372133e-06, "loss": 0.0333, "step": 71435 }, { "epoch": 0.2980864717810917, "grad_norm": 1.0193720278995186, "learning_rate": 3.6635011258203763e-06, "loss": 0.0337, "step": 71440 }, { "epoch": 0.29810733449608195, "grad_norm": 1.1323949951194794, "learning_rate": 3.663372928728056e-06, "loss": 0.028, "step": 71445 }, { "epoch": 0.29812819721107225, "grad_norm": 0.7045109402730027, "learning_rate": 3.663244745092818e-06, "loss": 0.025, "step": 71450 }, { "epoch": 0.29814905992606255, "grad_norm": 0.6708123634145148, "learning_rate": 3.663116574912306e-06, "loss": 0.0298, "step": 71455 }, { "epoch": 0.2981699226410528, "grad_norm": 0.6148482819204163, "learning_rate": 3.6629884181841683e-06, "loss": 0.0307, "step": 71460 }, { "epoch": 0.2981907853560431, "grad_norm": 0.6024163232258323, "learning_rate": 3.6628602749060515e-06, "loss": 0.0295, "step": 71465 }, { "epoch": 0.2982116480710334, "grad_norm": 0.8781350839875889, "learning_rate": 3.6627321450756025e-06, "loss": 0.028, "step": 71470 }, { "epoch": 0.29823251078602364, "grad_norm": 0.7771491984730332, "learning_rate": 3.6626040286904696e-06, "loss": 0.028, "step": 71475 }, { "epoch": 0.29825337350101394, "grad_norm": 0.5341022313417177, "learning_rate": 3.662475925748302e-06, "loss": 0.0263, "step": 71480 }, { "epoch": 0.2982742362160042, "grad_norm": 0.7087523152829186, "learning_rate": 3.6623478362467487e-06, "loss": 0.0237, "step": 71485 }, { "epoch": 0.2982950989309945, "grad_norm": 0.6103570147592372, "learning_rate": 3.6622197601834597e-06, "loss": 0.0372, "step": 71490 }, { "epoch": 0.2983159616459848, "grad_norm": 0.6436008046181422, "learning_rate": 3.6620916975560842e-06, "loss": 0.0269, "step": 71495 }, { "epoch": 0.29833682436097503, "grad_norm": 0.49452512930625997, "learning_rate": 3.6619636483622744e-06, "loss": 0.0233, "step": 71500 }, { "epoch": 0.2983576870759653, "grad_norm": 1.1113880752310008, "learning_rate": 3.6618356125996822e-06, "loss": 0.0279, "step": 71505 }, { "epoch": 0.29837854979095557, "grad_norm": 0.5794986019762615, "learning_rate": 3.6617075902659593e-06, "loss": 0.0326, "step": 71510 }, { "epoch": 0.29839941250594587, "grad_norm": 0.6286708329066202, "learning_rate": 3.661579581358758e-06, "loss": 0.0318, "step": 71515 }, { "epoch": 0.29842027522093617, "grad_norm": 0.674932410283134, "learning_rate": 3.6614515858757317e-06, "loss": 0.0214, "step": 71520 }, { "epoch": 0.2984411379359264, "grad_norm": 0.3934411698903933, "learning_rate": 3.6613236038145346e-06, "loss": 0.0325, "step": 71525 }, { "epoch": 0.2984620006509167, "grad_norm": 0.5302483387234035, "learning_rate": 3.6611956351728207e-06, "loss": 0.028, "step": 71530 }, { "epoch": 0.29848286336590696, "grad_norm": 0.6625528106337369, "learning_rate": 3.661067679948246e-06, "loss": 0.0255, "step": 71535 }, { "epoch": 0.29850372608089726, "grad_norm": 1.8964478221674148, "learning_rate": 3.660939738138465e-06, "loss": 0.0225, "step": 71540 }, { "epoch": 0.29852458879588756, "grad_norm": 0.5540122249563484, "learning_rate": 3.6608118097411345e-06, "loss": 0.0375, "step": 71545 }, { "epoch": 0.2985454515108778, "grad_norm": 1.184992189697309, "learning_rate": 3.66068389475391e-06, "loss": 0.03, "step": 71550 }, { "epoch": 0.2985663142258681, "grad_norm": 0.7772276144236995, "learning_rate": 3.6605559931744508e-06, "loss": 0.0286, "step": 71555 }, { "epoch": 0.2985871769408584, "grad_norm": 2.012084028725331, "learning_rate": 3.6604281050004133e-06, "loss": 0.043, "step": 71560 }, { "epoch": 0.29860803965584864, "grad_norm": 0.7494602256715023, "learning_rate": 3.660300230229456e-06, "loss": 0.0332, "step": 71565 }, { "epoch": 0.29862890237083894, "grad_norm": 0.7423179270766957, "learning_rate": 3.660172368859239e-06, "loss": 0.0259, "step": 71570 }, { "epoch": 0.2986497650858292, "grad_norm": 0.740628713258528, "learning_rate": 3.660044520887421e-06, "loss": 0.0294, "step": 71575 }, { "epoch": 0.2986706278008195, "grad_norm": 0.974494601762437, "learning_rate": 3.659916686311662e-06, "loss": 0.0259, "step": 71580 }, { "epoch": 0.2986914905158098, "grad_norm": 0.8525020119892556, "learning_rate": 3.659788865129624e-06, "loss": 0.0167, "step": 71585 }, { "epoch": 0.29871235323080003, "grad_norm": 1.3249615071595753, "learning_rate": 3.6596610573389665e-06, "loss": 0.0359, "step": 71590 }, { "epoch": 0.29873321594579033, "grad_norm": 0.6988425239691121, "learning_rate": 3.659533262937353e-06, "loss": 0.0343, "step": 71595 }, { "epoch": 0.2987540786607806, "grad_norm": 0.9632763383243348, "learning_rate": 3.6594054819224446e-06, "loss": 0.0253, "step": 71600 }, { "epoch": 0.2987749413757709, "grad_norm": 0.46640307328805986, "learning_rate": 3.6592777142919054e-06, "loss": 0.0232, "step": 71605 }, { "epoch": 0.2987958040907612, "grad_norm": 1.3466890902988413, "learning_rate": 3.6591499600433984e-06, "loss": 0.0282, "step": 71610 }, { "epoch": 0.2988166668057514, "grad_norm": 0.649062991975908, "learning_rate": 3.659022219174588e-06, "loss": 0.0239, "step": 71615 }, { "epoch": 0.2988375295207417, "grad_norm": 0.9473902251284867, "learning_rate": 3.658894491683139e-06, "loss": 0.0227, "step": 71620 }, { "epoch": 0.29885839223573196, "grad_norm": 0.7850077395548991, "learning_rate": 3.658766777566717e-06, "loss": 0.0329, "step": 71625 }, { "epoch": 0.29887925495072226, "grad_norm": 0.66842099999657, "learning_rate": 3.658639076822987e-06, "loss": 0.0225, "step": 71630 }, { "epoch": 0.29890011766571256, "grad_norm": 1.218009994126418, "learning_rate": 3.6585113894496164e-06, "loss": 0.0331, "step": 71635 }, { "epoch": 0.2989209803807028, "grad_norm": 0.8313261057258031, "learning_rate": 3.658383715444271e-06, "loss": 0.0307, "step": 71640 }, { "epoch": 0.2989418430956931, "grad_norm": 0.7074793035667356, "learning_rate": 3.6582560548046193e-06, "loss": 0.0245, "step": 71645 }, { "epoch": 0.2989627058106834, "grad_norm": 0.9619803001922437, "learning_rate": 3.6581284075283296e-06, "loss": 0.0295, "step": 71650 }, { "epoch": 0.29898356852567365, "grad_norm": 0.722524466386825, "learning_rate": 3.6580007736130703e-06, "loss": 0.0347, "step": 71655 }, { "epoch": 0.29900443124066395, "grad_norm": 0.7534774593711141, "learning_rate": 3.6578731530565103e-06, "loss": 0.0245, "step": 71660 }, { "epoch": 0.2990252939556542, "grad_norm": 0.5445671975467142, "learning_rate": 3.6577455458563203e-06, "loss": 0.0978, "step": 71665 }, { "epoch": 0.2990461566706445, "grad_norm": 0.8739671045540228, "learning_rate": 3.65761795201017e-06, "loss": 0.0255, "step": 71670 }, { "epoch": 0.2990670193856348, "grad_norm": 0.5851769478016039, "learning_rate": 3.6574903715157313e-06, "loss": 0.0316, "step": 71675 }, { "epoch": 0.29908788210062504, "grad_norm": 0.903700163531566, "learning_rate": 3.657362804370675e-06, "loss": 0.0215, "step": 71680 }, { "epoch": 0.29910874481561533, "grad_norm": 0.6813718033774255, "learning_rate": 3.657235250572673e-06, "loss": 0.0217, "step": 71685 }, { "epoch": 0.2991296075306056, "grad_norm": 0.5294283038805709, "learning_rate": 3.6571077101193985e-06, "loss": 0.0251, "step": 71690 }, { "epoch": 0.2991504702455959, "grad_norm": 1.703885360998092, "learning_rate": 3.656980183008525e-06, "loss": 0.041, "step": 71695 }, { "epoch": 0.2991713329605862, "grad_norm": 0.34387834423825636, "learning_rate": 3.6568526692377264e-06, "loss": 0.0205, "step": 71700 }, { "epoch": 0.2991921956755764, "grad_norm": 0.914754506177867, "learning_rate": 3.656725168804676e-06, "loss": 0.0413, "step": 71705 }, { "epoch": 0.2992130583905667, "grad_norm": 0.9208993970528943, "learning_rate": 3.6565976817070504e-06, "loss": 0.0361, "step": 71710 }, { "epoch": 0.29923392110555697, "grad_norm": 0.8537894712791948, "learning_rate": 3.6564702079425233e-06, "loss": 0.0342, "step": 71715 }, { "epoch": 0.29925478382054727, "grad_norm": 0.8207317408179132, "learning_rate": 3.656342747508773e-06, "loss": 0.0339, "step": 71720 }, { "epoch": 0.29927564653553757, "grad_norm": 1.1296051746488363, "learning_rate": 3.6562153004034747e-06, "loss": 0.0314, "step": 71725 }, { "epoch": 0.2992965092505278, "grad_norm": 0.4665342013843255, "learning_rate": 3.656087866624306e-06, "loss": 0.0284, "step": 71730 }, { "epoch": 0.2993173719655181, "grad_norm": 0.8924666724159089, "learning_rate": 3.6559604461689442e-06, "loss": 0.0286, "step": 71735 }, { "epoch": 0.2993382346805084, "grad_norm": 1.218961591112572, "learning_rate": 3.6558330390350683e-06, "loss": 0.0281, "step": 71740 }, { "epoch": 0.29935909739549865, "grad_norm": 1.1220136506835041, "learning_rate": 3.655705645220357e-06, "loss": 0.0193, "step": 71745 }, { "epoch": 0.29937996011048895, "grad_norm": 0.5699253972470584, "learning_rate": 3.65557826472249e-06, "loss": 0.0243, "step": 71750 }, { "epoch": 0.2994008228254792, "grad_norm": 0.7325072349025468, "learning_rate": 3.655450897539148e-06, "loss": 0.0371, "step": 71755 }, { "epoch": 0.2994216855404695, "grad_norm": 1.1194434875851553, "learning_rate": 3.65532354366801e-06, "loss": 0.0331, "step": 71760 }, { "epoch": 0.2994425482554598, "grad_norm": 0.5798332487313121, "learning_rate": 3.655196203106759e-06, "loss": 0.0242, "step": 71765 }, { "epoch": 0.29946341097045004, "grad_norm": 0.5828203237168011, "learning_rate": 3.655068875853075e-06, "loss": 0.0226, "step": 71770 }, { "epoch": 0.29948427368544034, "grad_norm": 0.8026931006000055, "learning_rate": 3.6549415619046424e-06, "loss": 0.0356, "step": 71775 }, { "epoch": 0.2995051364004306, "grad_norm": 0.916874806535511, "learning_rate": 3.6548142612591426e-06, "loss": 0.0271, "step": 71780 }, { "epoch": 0.2995259991154209, "grad_norm": 0.7464972605464314, "learning_rate": 3.6546869739142594e-06, "loss": 0.0499, "step": 71785 }, { "epoch": 0.2995468618304112, "grad_norm": 1.699076489660354, "learning_rate": 3.654559699867676e-06, "loss": 0.0483, "step": 71790 }, { "epoch": 0.2995677245454014, "grad_norm": 0.6537051369191589, "learning_rate": 3.654432439117079e-06, "loss": 0.0221, "step": 71795 }, { "epoch": 0.2995885872603917, "grad_norm": 0.6348562505849584, "learning_rate": 3.654305191660153e-06, "loss": 0.0261, "step": 71800 }, { "epoch": 0.29960944997538197, "grad_norm": 0.8705464274150577, "learning_rate": 3.6541779574945818e-06, "loss": 0.0229, "step": 71805 }, { "epoch": 0.29963031269037227, "grad_norm": 0.9256413389470424, "learning_rate": 3.6540507366180544e-06, "loss": 0.0281, "step": 71810 }, { "epoch": 0.29965117540536257, "grad_norm": 0.6977303038670789, "learning_rate": 3.653923529028256e-06, "loss": 0.0388, "step": 71815 }, { "epoch": 0.2996720381203528, "grad_norm": 0.6718506913600847, "learning_rate": 3.6537963347228733e-06, "loss": 0.0279, "step": 71820 }, { "epoch": 0.2996929008353431, "grad_norm": 0.7240836808345444, "learning_rate": 3.653669153699596e-06, "loss": 0.029, "step": 71825 }, { "epoch": 0.2997137635503334, "grad_norm": 0.4336367998370058, "learning_rate": 3.653541985956112e-06, "loss": 0.0215, "step": 71830 }, { "epoch": 0.29973462626532366, "grad_norm": 2.29466187197344, "learning_rate": 3.653414831490111e-06, "loss": 0.0249, "step": 71835 }, { "epoch": 0.29975548898031396, "grad_norm": 0.8160569078636675, "learning_rate": 3.653287690299282e-06, "loss": 0.0307, "step": 71840 }, { "epoch": 0.2997763516953042, "grad_norm": 0.5200080374160353, "learning_rate": 3.6531605623813155e-06, "loss": 0.0328, "step": 71845 }, { "epoch": 0.2997972144102945, "grad_norm": 0.740786621395096, "learning_rate": 3.6530334477339014e-06, "loss": 0.0259, "step": 71850 }, { "epoch": 0.2998180771252848, "grad_norm": 0.6101131581260297, "learning_rate": 3.652906346354732e-06, "loss": 0.0341, "step": 71855 }, { "epoch": 0.29983893984027504, "grad_norm": 1.3337317388860328, "learning_rate": 3.6527792582415e-06, "loss": 0.0322, "step": 71860 }, { "epoch": 0.29985980255526534, "grad_norm": 1.545062844390684, "learning_rate": 3.652652183391896e-06, "loss": 0.0271, "step": 71865 }, { "epoch": 0.2998806652702556, "grad_norm": 0.9282936560589924, "learning_rate": 3.6525251218036138e-06, "loss": 0.034, "step": 71870 }, { "epoch": 0.2999015279852459, "grad_norm": 0.3918127517514724, "learning_rate": 3.6523980734743474e-06, "loss": 0.0239, "step": 71875 }, { "epoch": 0.2999223907002362, "grad_norm": 1.0685895805114962, "learning_rate": 3.6522710384017913e-06, "loss": 0.0288, "step": 71880 }, { "epoch": 0.29994325341522643, "grad_norm": 0.6259713011244241, "learning_rate": 3.6521440165836393e-06, "loss": 0.0329, "step": 71885 }, { "epoch": 0.29996411613021673, "grad_norm": 0.7205547025369846, "learning_rate": 3.6520170080175876e-06, "loss": 0.0233, "step": 71890 }, { "epoch": 0.299984978845207, "grad_norm": 0.546554931806096, "learning_rate": 3.65189001270133e-06, "loss": 0.0224, "step": 71895 }, { "epoch": 0.3000058415601973, "grad_norm": 0.6452420096150826, "learning_rate": 3.6517630306325658e-06, "loss": 0.0343, "step": 71900 }, { "epoch": 0.3000267042751876, "grad_norm": 1.293295757430275, "learning_rate": 3.6516360618089907e-06, "loss": 0.0383, "step": 71905 }, { "epoch": 0.3000475669901778, "grad_norm": 0.5351298380455091, "learning_rate": 3.651509106228302e-06, "loss": 0.0252, "step": 71910 }, { "epoch": 0.3000684297051681, "grad_norm": 0.45330956990662635, "learning_rate": 3.6513821638881976e-06, "loss": 0.0192, "step": 71915 }, { "epoch": 0.3000892924201584, "grad_norm": 0.8607333930082299, "learning_rate": 3.6512552347863766e-06, "loss": 0.0262, "step": 71920 }, { "epoch": 0.30011015513514866, "grad_norm": 0.48768534697598764, "learning_rate": 3.651128318920538e-06, "loss": 0.0249, "step": 71925 }, { "epoch": 0.30013101785013896, "grad_norm": 0.9631648203196822, "learning_rate": 3.651001416288382e-06, "loss": 0.0297, "step": 71930 }, { "epoch": 0.3001518805651292, "grad_norm": 1.0198370138288146, "learning_rate": 3.650874526887609e-06, "loss": 0.0406, "step": 71935 }, { "epoch": 0.3001727432801195, "grad_norm": 1.1926969042286637, "learning_rate": 3.650747650715919e-06, "loss": 0.0336, "step": 71940 }, { "epoch": 0.3001936059951098, "grad_norm": 0.7094065413216796, "learning_rate": 3.6506207877710142e-06, "loss": 0.032, "step": 71945 }, { "epoch": 0.30021446871010005, "grad_norm": 2.215139823424999, "learning_rate": 3.6504939380505965e-06, "loss": 0.0302, "step": 71950 }, { "epoch": 0.30023533142509035, "grad_norm": 0.9074520902095827, "learning_rate": 3.650367101552368e-06, "loss": 0.0302, "step": 71955 }, { "epoch": 0.3002561941400806, "grad_norm": 1.0031680372913232, "learning_rate": 3.650240278274033e-06, "loss": 0.0381, "step": 71960 }, { "epoch": 0.3002770568550709, "grad_norm": 0.6405718078653416, "learning_rate": 3.6501134682132945e-06, "loss": 0.0266, "step": 71965 }, { "epoch": 0.3002979195700612, "grad_norm": 0.9105916131186463, "learning_rate": 3.649986671367856e-06, "loss": 0.0323, "step": 71970 }, { "epoch": 0.30031878228505143, "grad_norm": 0.645843861379567, "learning_rate": 3.6498598877354235e-06, "loss": 0.0255, "step": 71975 }, { "epoch": 0.30033964500004173, "grad_norm": 1.0999534346799356, "learning_rate": 3.6497331173137018e-06, "loss": 0.0338, "step": 71980 }, { "epoch": 0.300360507715032, "grad_norm": 0.8477144163344672, "learning_rate": 3.6496063601003967e-06, "loss": 0.021, "step": 71985 }, { "epoch": 0.3003813704300223, "grad_norm": 0.6733742858528592, "learning_rate": 3.6494796160932144e-06, "loss": 0.0352, "step": 71990 }, { "epoch": 0.3004022331450126, "grad_norm": 0.7353399537119367, "learning_rate": 3.6493528852898634e-06, "loss": 0.0227, "step": 71995 }, { "epoch": 0.3004230958600028, "grad_norm": 0.5942234682670322, "learning_rate": 3.64922616768805e-06, "loss": 0.0282, "step": 72000 }, { "epoch": 0.3004439585749931, "grad_norm": 0.8909239629352017, "learning_rate": 3.649099463285482e-06, "loss": 0.0309, "step": 72005 }, { "epoch": 0.3004648212899834, "grad_norm": 0.9057423767875643, "learning_rate": 3.648972772079869e-06, "loss": 0.0282, "step": 72010 }, { "epoch": 0.30048568400497366, "grad_norm": 1.285938139329921, "learning_rate": 3.6488460940689208e-06, "loss": 0.0294, "step": 72015 }, { "epoch": 0.30050654671996396, "grad_norm": 1.4791590096751204, "learning_rate": 3.6487194292503456e-06, "loss": 0.0403, "step": 72020 }, { "epoch": 0.3005274094349542, "grad_norm": 1.0552021953247614, "learning_rate": 3.648592777621855e-06, "loss": 0.0315, "step": 72025 }, { "epoch": 0.3005482721499445, "grad_norm": 0.8486462160258569, "learning_rate": 3.64846613918116e-06, "loss": 0.0254, "step": 72030 }, { "epoch": 0.3005691348649348, "grad_norm": 0.4050803869616481, "learning_rate": 3.648339513925971e-06, "loss": 0.0289, "step": 72035 }, { "epoch": 0.30058999757992505, "grad_norm": 0.7671862220372274, "learning_rate": 3.648212901854001e-06, "loss": 0.026, "step": 72040 }, { "epoch": 0.30061086029491535, "grad_norm": 0.4927758434233662, "learning_rate": 3.648086302962962e-06, "loss": 0.0271, "step": 72045 }, { "epoch": 0.3006317230099056, "grad_norm": 0.7438090867053089, "learning_rate": 3.6479597172505684e-06, "loss": 0.0283, "step": 72050 }, { "epoch": 0.3006525857248959, "grad_norm": 0.5509700744685812, "learning_rate": 3.647833144714532e-06, "loss": 0.0249, "step": 72055 }, { "epoch": 0.3006734484398862, "grad_norm": 0.6062192370268309, "learning_rate": 3.6477065853525685e-06, "loss": 0.0216, "step": 72060 }, { "epoch": 0.30069431115487644, "grad_norm": 0.7686669026967492, "learning_rate": 3.6475800391623916e-06, "loss": 0.0253, "step": 72065 }, { "epoch": 0.30071517386986674, "grad_norm": 0.7686715196559022, "learning_rate": 3.6474535061417183e-06, "loss": 0.0301, "step": 72070 }, { "epoch": 0.300736036584857, "grad_norm": 0.8821154322568506, "learning_rate": 3.647326986288263e-06, "loss": 0.0314, "step": 72075 }, { "epoch": 0.3007568992998473, "grad_norm": 0.9427192799236773, "learning_rate": 3.6472004795997435e-06, "loss": 0.0341, "step": 72080 }, { "epoch": 0.3007777620148376, "grad_norm": 1.0242318656256022, "learning_rate": 3.6470739860738757e-06, "loss": 0.0347, "step": 72085 }, { "epoch": 0.3007986247298278, "grad_norm": 0.5496402287706595, "learning_rate": 3.646947505708378e-06, "loss": 0.0283, "step": 72090 }, { "epoch": 0.3008194874448181, "grad_norm": 0.893844710570712, "learning_rate": 3.6468210385009677e-06, "loss": 0.0303, "step": 72095 }, { "epoch": 0.3008403501598084, "grad_norm": 0.5529527186516305, "learning_rate": 3.646694584449364e-06, "loss": 0.0271, "step": 72100 }, { "epoch": 0.30086121287479867, "grad_norm": 0.9559982110938992, "learning_rate": 3.646568143551287e-06, "loss": 0.0421, "step": 72105 }, { "epoch": 0.30088207558978897, "grad_norm": 0.2365894164529889, "learning_rate": 3.6464417158044545e-06, "loss": 0.0265, "step": 72110 }, { "epoch": 0.3009029383047792, "grad_norm": 0.9600874241646348, "learning_rate": 3.646315301206589e-06, "loss": 0.0314, "step": 72115 }, { "epoch": 0.3009238010197695, "grad_norm": 1.0168049057838346, "learning_rate": 3.6461888997554102e-06, "loss": 0.0309, "step": 72120 }, { "epoch": 0.3009446637347598, "grad_norm": 0.709457894867043, "learning_rate": 3.6460625114486397e-06, "loss": 0.02, "step": 72125 }, { "epoch": 0.30096552644975005, "grad_norm": 1.495392315780351, "learning_rate": 3.6459361362839995e-06, "loss": 0.0317, "step": 72130 }, { "epoch": 0.30098638916474035, "grad_norm": 0.8829385328747564, "learning_rate": 3.6458097742592124e-06, "loss": 0.0478, "step": 72135 }, { "epoch": 0.3010072518797306, "grad_norm": 0.7243061226784663, "learning_rate": 3.6456834253720025e-06, "loss": 0.0259, "step": 72140 }, { "epoch": 0.3010281145947209, "grad_norm": 1.5768267445425324, "learning_rate": 3.6455570896200916e-06, "loss": 0.0288, "step": 72145 }, { "epoch": 0.3010489773097112, "grad_norm": 0.42822606846357264, "learning_rate": 3.6454307670012047e-06, "loss": 0.0378, "step": 72150 }, { "epoch": 0.30106984002470144, "grad_norm": 1.0262658524647172, "learning_rate": 3.645304457513067e-06, "loss": 0.0274, "step": 72155 }, { "epoch": 0.30109070273969174, "grad_norm": 1.244391680349616, "learning_rate": 3.6451781611534033e-06, "loss": 0.0258, "step": 72160 }, { "epoch": 0.301111565454682, "grad_norm": 1.174060427543078, "learning_rate": 3.6450518779199395e-06, "loss": 0.0318, "step": 72165 }, { "epoch": 0.3011324281696723, "grad_norm": 0.9343416384531094, "learning_rate": 3.644925607810403e-06, "loss": 0.0324, "step": 72170 }, { "epoch": 0.3011532908846626, "grad_norm": 1.1938199887794216, "learning_rate": 3.644799350822519e-06, "loss": 0.0363, "step": 72175 }, { "epoch": 0.3011741535996528, "grad_norm": 0.701138481332947, "learning_rate": 3.644673106954017e-06, "loss": 0.0239, "step": 72180 }, { "epoch": 0.3011950163146431, "grad_norm": 0.6815663405575865, "learning_rate": 3.6445468762026245e-06, "loss": 0.0269, "step": 72185 }, { "epoch": 0.3012158790296334, "grad_norm": 1.1527440152278992, "learning_rate": 3.6444206585660687e-06, "loss": 0.0251, "step": 72190 }, { "epoch": 0.30123674174462367, "grad_norm": 0.4269687857903228, "learning_rate": 3.6442944540420806e-06, "loss": 0.0322, "step": 72195 }, { "epoch": 0.30125760445961397, "grad_norm": 0.6241346733516367, "learning_rate": 3.6441682626283897e-06, "loss": 0.0242, "step": 72200 }, { "epoch": 0.3012784671746042, "grad_norm": 0.846387685869413, "learning_rate": 3.6440420843227254e-06, "loss": 0.037, "step": 72205 }, { "epoch": 0.3012993298895945, "grad_norm": 0.6884073082728489, "learning_rate": 3.6439159191228183e-06, "loss": 0.0246, "step": 72210 }, { "epoch": 0.3013201926045848, "grad_norm": 0.8626199972029286, "learning_rate": 3.6437897670264016e-06, "loss": 0.0323, "step": 72215 }, { "epoch": 0.30134105531957506, "grad_norm": 0.7653174220286922, "learning_rate": 3.643663628031206e-06, "loss": 0.017, "step": 72220 }, { "epoch": 0.30136191803456536, "grad_norm": 0.4381908847964288, "learning_rate": 3.6435375021349635e-06, "loss": 0.0291, "step": 72225 }, { "epoch": 0.3013827807495556, "grad_norm": 0.6958190478295826, "learning_rate": 3.643411389335409e-06, "loss": 0.0326, "step": 72230 }, { "epoch": 0.3014036434645459, "grad_norm": 1.605123197612521, "learning_rate": 3.643285289630274e-06, "loss": 0.0371, "step": 72235 }, { "epoch": 0.3014245061795362, "grad_norm": 1.1461591576389207, "learning_rate": 3.643159203017294e-06, "loss": 0.0294, "step": 72240 }, { "epoch": 0.30144536889452644, "grad_norm": 0.795861859914141, "learning_rate": 3.6430331294942034e-06, "loss": 0.0365, "step": 72245 }, { "epoch": 0.30146623160951674, "grad_norm": 1.1218862210202063, "learning_rate": 3.642907069058737e-06, "loss": 0.0272, "step": 72250 }, { "epoch": 0.301487094324507, "grad_norm": 1.2547210726532336, "learning_rate": 3.6427810217086305e-06, "loss": 0.0421, "step": 72255 }, { "epoch": 0.3015079570394973, "grad_norm": 1.1603054813730618, "learning_rate": 3.6426549874416215e-06, "loss": 0.0328, "step": 72260 }, { "epoch": 0.3015288197544876, "grad_norm": 0.6187024495584196, "learning_rate": 3.6425289662554458e-06, "loss": 0.023, "step": 72265 }, { "epoch": 0.30154968246947783, "grad_norm": 0.2899558263051071, "learning_rate": 3.6424029581478403e-06, "loss": 0.0361, "step": 72270 }, { "epoch": 0.30157054518446813, "grad_norm": 1.0232949767038426, "learning_rate": 3.642276963116545e-06, "loss": 0.0306, "step": 72275 }, { "epoch": 0.30159140789945843, "grad_norm": 1.0737274694183063, "learning_rate": 3.6421509811592964e-06, "loss": 0.0379, "step": 72280 }, { "epoch": 0.3016122706144487, "grad_norm": 0.916921129552536, "learning_rate": 3.642025012273835e-06, "loss": 0.039, "step": 72285 }, { "epoch": 0.301633133329439, "grad_norm": 0.632569517791584, "learning_rate": 3.6418990564578994e-06, "loss": 0.0362, "step": 72290 }, { "epoch": 0.3016539960444292, "grad_norm": 1.2482158048589045, "learning_rate": 3.6417731137092294e-06, "loss": 0.0394, "step": 72295 }, { "epoch": 0.3016748587594195, "grad_norm": 0.6980600389668287, "learning_rate": 3.641647184025567e-06, "loss": 0.0358, "step": 72300 }, { "epoch": 0.3016957214744098, "grad_norm": 0.46998364974904655, "learning_rate": 3.6415212674046534e-06, "loss": 0.0334, "step": 72305 }, { "epoch": 0.30171658418940006, "grad_norm": 0.6108813486501974, "learning_rate": 3.6413953638442295e-06, "loss": 0.0256, "step": 72310 }, { "epoch": 0.30173744690439036, "grad_norm": 1.0597504845622303, "learning_rate": 3.6412694733420377e-06, "loss": 0.028, "step": 72315 }, { "epoch": 0.3017583096193806, "grad_norm": 0.6808017103189267, "learning_rate": 3.6411435958958225e-06, "loss": 0.0307, "step": 72320 }, { "epoch": 0.3017791723343709, "grad_norm": 0.6471748437816965, "learning_rate": 3.641017731503325e-06, "loss": 0.0268, "step": 72325 }, { "epoch": 0.3018000350493612, "grad_norm": 0.8026678009095205, "learning_rate": 3.640891880162292e-06, "loss": 0.0303, "step": 72330 }, { "epoch": 0.30182089776435145, "grad_norm": 0.4467884408757157, "learning_rate": 3.6407660418704643e-06, "loss": 0.0372, "step": 72335 }, { "epoch": 0.30184176047934175, "grad_norm": 0.5360650650939124, "learning_rate": 3.6406402166255907e-06, "loss": 0.0365, "step": 72340 }, { "epoch": 0.301862623194332, "grad_norm": 1.7284282858319862, "learning_rate": 3.640514404425415e-06, "loss": 0.0262, "step": 72345 }, { "epoch": 0.3018834859093223, "grad_norm": 0.5501518627177683, "learning_rate": 3.6403886052676827e-06, "loss": 0.0236, "step": 72350 }, { "epoch": 0.3019043486243126, "grad_norm": 0.6444916853995597, "learning_rate": 3.640262819150142e-06, "loss": 0.0253, "step": 72355 }, { "epoch": 0.30192521133930283, "grad_norm": 1.1296123508127414, "learning_rate": 3.6401370460705405e-06, "loss": 0.0353, "step": 72360 }, { "epoch": 0.30194607405429313, "grad_norm": 1.0802229736100408, "learning_rate": 3.640011286026624e-06, "loss": 0.0341, "step": 72365 }, { "epoch": 0.30196693676928343, "grad_norm": 1.461884690888968, "learning_rate": 3.6398855390161426e-06, "loss": 0.0288, "step": 72370 }, { "epoch": 0.3019877994842737, "grad_norm": 0.5202112321127313, "learning_rate": 3.6397598050368443e-06, "loss": 0.0271, "step": 72375 }, { "epoch": 0.302008662199264, "grad_norm": 0.9415196519950771, "learning_rate": 3.6396340840864785e-06, "loss": 0.0319, "step": 72380 }, { "epoch": 0.3020295249142542, "grad_norm": 0.6960079736479022, "learning_rate": 3.6395083761627963e-06, "loss": 0.0291, "step": 72385 }, { "epoch": 0.3020503876292445, "grad_norm": 0.5611985053246578, "learning_rate": 3.6393826812635467e-06, "loss": 0.0389, "step": 72390 }, { "epoch": 0.3020712503442348, "grad_norm": 0.8080006892373767, "learning_rate": 3.6392569993864825e-06, "loss": 0.0385, "step": 72395 }, { "epoch": 0.30209211305922506, "grad_norm": 0.7829314016717782, "learning_rate": 3.6391313305293536e-06, "loss": 0.0351, "step": 72400 }, { "epoch": 0.30211297577421536, "grad_norm": 0.9387346082284217, "learning_rate": 3.6390056746899133e-06, "loss": 0.0292, "step": 72405 }, { "epoch": 0.3021338384892056, "grad_norm": 0.6907601604332239, "learning_rate": 3.638880031865914e-06, "loss": 0.0254, "step": 72410 }, { "epoch": 0.3021547012041959, "grad_norm": 0.8845073388016365, "learning_rate": 3.638754402055109e-06, "loss": 0.0254, "step": 72415 }, { "epoch": 0.3021755639191862, "grad_norm": 0.8715480117711383, "learning_rate": 3.638628785255253e-06, "loss": 0.0275, "step": 72420 }, { "epoch": 0.30219642663417645, "grad_norm": 0.5217123939676601, "learning_rate": 3.6385031814640974e-06, "loss": 0.0308, "step": 72425 }, { "epoch": 0.30221728934916675, "grad_norm": 1.7145759380323828, "learning_rate": 3.6383775906794e-06, "loss": 0.0346, "step": 72430 }, { "epoch": 0.302238152064157, "grad_norm": 0.5725227908851314, "learning_rate": 3.6382520128989154e-06, "loss": 0.0312, "step": 72435 }, { "epoch": 0.3022590147791473, "grad_norm": 0.8423913534949697, "learning_rate": 3.638126448120399e-06, "loss": 0.0354, "step": 72440 }, { "epoch": 0.3022798774941376, "grad_norm": 2.736636032776356, "learning_rate": 3.638000896341608e-06, "loss": 0.0258, "step": 72445 }, { "epoch": 0.30230074020912784, "grad_norm": 0.9993686620421858, "learning_rate": 3.6378753575602994e-06, "loss": 0.0252, "step": 72450 }, { "epoch": 0.30232160292411814, "grad_norm": 0.4840297983034285, "learning_rate": 3.6377498317742304e-06, "loss": 0.0262, "step": 72455 }, { "epoch": 0.30234246563910844, "grad_norm": 0.4632865098572177, "learning_rate": 3.637624318981159e-06, "loss": 0.0335, "step": 72460 }, { "epoch": 0.3023633283540987, "grad_norm": 1.111535510685719, "learning_rate": 3.6374988191788446e-06, "loss": 0.0382, "step": 72465 }, { "epoch": 0.302384191069089, "grad_norm": 0.6068558687121537, "learning_rate": 3.637373332365046e-06, "loss": 0.0315, "step": 72470 }, { "epoch": 0.3024050537840792, "grad_norm": 0.7756199311123089, "learning_rate": 3.6372478585375236e-06, "loss": 0.0224, "step": 72475 }, { "epoch": 0.3024259164990695, "grad_norm": 0.5749569812261061, "learning_rate": 3.637122397694036e-06, "loss": 0.032, "step": 72480 }, { "epoch": 0.3024467792140598, "grad_norm": 1.3087561081369865, "learning_rate": 3.636996949832346e-06, "loss": 0.0294, "step": 72485 }, { "epoch": 0.30246764192905007, "grad_norm": 0.675147083616801, "learning_rate": 3.636871514950213e-06, "loss": 0.0304, "step": 72490 }, { "epoch": 0.30248850464404037, "grad_norm": 0.9777433903700988, "learning_rate": 3.6367460930454015e-06, "loss": 0.0384, "step": 72495 }, { "epoch": 0.3025093673590306, "grad_norm": 0.582363166871614, "learning_rate": 3.636620684115672e-06, "loss": 0.0399, "step": 72500 }, { "epoch": 0.3025302300740209, "grad_norm": 0.6237802007898232, "learning_rate": 3.6364952881587874e-06, "loss": 0.0262, "step": 72505 }, { "epoch": 0.3025510927890112, "grad_norm": 1.2809507860134728, "learning_rate": 3.6363699051725114e-06, "loss": 0.034, "step": 72510 }, { "epoch": 0.30257195550400146, "grad_norm": 1.5212800015954822, "learning_rate": 3.6362445351546092e-06, "loss": 0.0447, "step": 72515 }, { "epoch": 0.30259281821899175, "grad_norm": 0.3774870764169181, "learning_rate": 3.636119178102846e-06, "loss": 0.0335, "step": 72520 }, { "epoch": 0.302613680933982, "grad_norm": 0.6652796106366935, "learning_rate": 3.6359938340149843e-06, "loss": 0.0217, "step": 72525 }, { "epoch": 0.3026345436489723, "grad_norm": 0.47472887692415744, "learning_rate": 3.6358685028887913e-06, "loss": 0.0185, "step": 72530 }, { "epoch": 0.3026554063639626, "grad_norm": 1.531154603108602, "learning_rate": 3.6357431847220326e-06, "loss": 0.0327, "step": 72535 }, { "epoch": 0.30267626907895284, "grad_norm": 1.2163454801124933, "learning_rate": 3.6356178795124762e-06, "loss": 0.0345, "step": 72540 }, { "epoch": 0.30269713179394314, "grad_norm": 0.52898156822067, "learning_rate": 3.635492587257889e-06, "loss": 0.0272, "step": 72545 }, { "epoch": 0.30271799450893344, "grad_norm": 0.827230169458275, "learning_rate": 3.635367307956038e-06, "loss": 0.0297, "step": 72550 }, { "epoch": 0.3027388572239237, "grad_norm": 0.8029426389124968, "learning_rate": 3.6352420416046925e-06, "loss": 0.0233, "step": 72555 }, { "epoch": 0.302759719938914, "grad_norm": 0.7744664022536621, "learning_rate": 3.6351167882016214e-06, "loss": 0.0442, "step": 72560 }, { "epoch": 0.30278058265390423, "grad_norm": 0.6076755334186194, "learning_rate": 3.6349915477445935e-06, "loss": 0.034, "step": 72565 }, { "epoch": 0.30280144536889453, "grad_norm": 1.5682044905482908, "learning_rate": 3.6348663202313793e-06, "loss": 0.0364, "step": 72570 }, { "epoch": 0.30282230808388483, "grad_norm": 0.9710020061463738, "learning_rate": 3.6347411056597485e-06, "loss": 0.0305, "step": 72575 }, { "epoch": 0.30284317079887507, "grad_norm": 0.6543710113376002, "learning_rate": 3.6346159040274735e-06, "loss": 0.0261, "step": 72580 }, { "epoch": 0.30286403351386537, "grad_norm": 0.5950204190019467, "learning_rate": 3.6344907153323257e-06, "loss": 0.0246, "step": 72585 }, { "epoch": 0.3028848962288556, "grad_norm": 0.512563087622781, "learning_rate": 3.6343655395720767e-06, "loss": 0.0286, "step": 72590 }, { "epoch": 0.3029057589438459, "grad_norm": 0.6025423397047401, "learning_rate": 3.634240376744499e-06, "loss": 0.0333, "step": 72595 }, { "epoch": 0.3029266216588362, "grad_norm": 0.9253068494463204, "learning_rate": 3.6341152268473667e-06, "loss": 0.028, "step": 72600 }, { "epoch": 0.30294748437382646, "grad_norm": 0.8013087636373926, "learning_rate": 3.633990089878453e-06, "loss": 0.0248, "step": 72605 }, { "epoch": 0.30296834708881676, "grad_norm": 0.8604428535763572, "learning_rate": 3.633864965835532e-06, "loss": 0.039, "step": 72610 }, { "epoch": 0.302989209803807, "grad_norm": 1.2742521243658131, "learning_rate": 3.633739854716379e-06, "loss": 0.022, "step": 72615 }, { "epoch": 0.3030100725187973, "grad_norm": 0.44366885234202513, "learning_rate": 3.633614756518769e-06, "loss": 0.0325, "step": 72620 }, { "epoch": 0.3030309352337876, "grad_norm": 1.3798210469321148, "learning_rate": 3.633489671240478e-06, "loss": 0.0312, "step": 72625 }, { "epoch": 0.30305179794877785, "grad_norm": 0.5724198326496288, "learning_rate": 3.633364598879283e-06, "loss": 0.0307, "step": 72630 }, { "epoch": 0.30307266066376815, "grad_norm": 0.8858511495556324, "learning_rate": 3.6332395394329602e-06, "loss": 0.0313, "step": 72635 }, { "epoch": 0.30309352337875844, "grad_norm": 1.6671008409534245, "learning_rate": 3.6331144928992877e-06, "loss": 0.0418, "step": 72640 }, { "epoch": 0.3031143860937487, "grad_norm": 0.6790454196298211, "learning_rate": 3.6329894592760436e-06, "loss": 0.0281, "step": 72645 }, { "epoch": 0.303135248808739, "grad_norm": 0.9886428625497048, "learning_rate": 3.6328644385610055e-06, "loss": 0.0271, "step": 72650 }, { "epoch": 0.30315611152372923, "grad_norm": 0.6997984574263689, "learning_rate": 3.6327394307519544e-06, "loss": 0.0343, "step": 72655 }, { "epoch": 0.30317697423871953, "grad_norm": 0.8231397098498576, "learning_rate": 3.6326144358466676e-06, "loss": 0.0221, "step": 72660 }, { "epoch": 0.30319783695370983, "grad_norm": 0.6171712704600262, "learning_rate": 3.6324894538429267e-06, "loss": 0.0245, "step": 72665 }, { "epoch": 0.3032186996687001, "grad_norm": 1.522338780730167, "learning_rate": 3.6323644847385124e-06, "loss": 0.0339, "step": 72670 }, { "epoch": 0.3032395623836904, "grad_norm": 2.163311568217216, "learning_rate": 3.632239528531206e-06, "loss": 0.0409, "step": 72675 }, { "epoch": 0.3032604250986806, "grad_norm": 0.6770674684229453, "learning_rate": 3.6321145852187897e-06, "loss": 0.0339, "step": 72680 }, { "epoch": 0.3032812878136709, "grad_norm": 0.6270427226320225, "learning_rate": 3.631989654799044e-06, "loss": 0.0284, "step": 72685 }, { "epoch": 0.3033021505286612, "grad_norm": 0.7770621890362813, "learning_rate": 3.631864737269754e-06, "loss": 0.0374, "step": 72690 }, { "epoch": 0.30332301324365146, "grad_norm": 0.6510143644894844, "learning_rate": 3.6317398326287013e-06, "loss": 0.0297, "step": 72695 }, { "epoch": 0.30334387595864176, "grad_norm": 0.9594315974095186, "learning_rate": 3.6316149408736705e-06, "loss": 0.0243, "step": 72700 }, { "epoch": 0.303364738673632, "grad_norm": 0.7121592415423021, "learning_rate": 3.6314900620024467e-06, "loss": 0.0213, "step": 72705 }, { "epoch": 0.3033856013886223, "grad_norm": 0.6685648208826629, "learning_rate": 3.631365196012815e-06, "loss": 0.0278, "step": 72710 }, { "epoch": 0.3034064641036126, "grad_norm": 0.9188481266340538, "learning_rate": 3.6312403429025594e-06, "loss": 0.0334, "step": 72715 }, { "epoch": 0.30342732681860285, "grad_norm": 0.595566615082366, "learning_rate": 3.6311155026694673e-06, "loss": 0.0267, "step": 72720 }, { "epoch": 0.30344818953359315, "grad_norm": 0.9702681063429688, "learning_rate": 3.630990675311325e-06, "loss": 0.0383, "step": 72725 }, { "epoch": 0.30346905224858345, "grad_norm": 0.7056747427850425, "learning_rate": 3.630865860825919e-06, "loss": 0.0266, "step": 72730 }, { "epoch": 0.3034899149635737, "grad_norm": 0.7340208308805773, "learning_rate": 3.630741059211038e-06, "loss": 0.0311, "step": 72735 }, { "epoch": 0.303510777678564, "grad_norm": 1.4674887504629759, "learning_rate": 3.63061627046447e-06, "loss": 0.0458, "step": 72740 }, { "epoch": 0.30353164039355424, "grad_norm": 0.33801689759167175, "learning_rate": 3.6304914945840026e-06, "loss": 0.0249, "step": 72745 }, { "epoch": 0.30355250310854454, "grad_norm": 1.0809421709647395, "learning_rate": 3.6303667315674262e-06, "loss": 0.0336, "step": 72750 }, { "epoch": 0.30357336582353484, "grad_norm": 1.0208989765282392, "learning_rate": 3.6302419814125306e-06, "loss": 0.0306, "step": 72755 }, { "epoch": 0.3035942285385251, "grad_norm": 1.1563154071290243, "learning_rate": 3.6301172441171058e-06, "loss": 0.0291, "step": 72760 }, { "epoch": 0.3036150912535154, "grad_norm": 1.2519527519169085, "learning_rate": 3.6299925196789427e-06, "loss": 0.0504, "step": 72765 }, { "epoch": 0.3036359539685056, "grad_norm": 0.6918464146172508, "learning_rate": 3.6298678080958323e-06, "loss": 0.0229, "step": 72770 }, { "epoch": 0.3036568166834959, "grad_norm": 0.5474019113231052, "learning_rate": 3.629743109365566e-06, "loss": 0.0263, "step": 72775 }, { "epoch": 0.3036776793984862, "grad_norm": 1.2025868053361877, "learning_rate": 3.629618423485938e-06, "loss": 0.0237, "step": 72780 }, { "epoch": 0.30369854211347647, "grad_norm": 1.033170942147867, "learning_rate": 3.629493750454741e-06, "loss": 0.0281, "step": 72785 }, { "epoch": 0.30371940482846677, "grad_norm": 0.8695537264563332, "learning_rate": 3.629369090269767e-06, "loss": 0.033, "step": 72790 }, { "epoch": 0.303740267543457, "grad_norm": 0.8236258222336109, "learning_rate": 3.6292444429288114e-06, "loss": 0.0323, "step": 72795 }, { "epoch": 0.3037611302584473, "grad_norm": 0.9527750325820907, "learning_rate": 3.6291198084296684e-06, "loss": 0.0486, "step": 72800 }, { "epoch": 0.3037819929734376, "grad_norm": 0.6956840113597159, "learning_rate": 3.628995186770133e-06, "loss": 0.0279, "step": 72805 }, { "epoch": 0.30380285568842785, "grad_norm": 0.5740054591963585, "learning_rate": 3.628870577948001e-06, "loss": 0.0316, "step": 72810 }, { "epoch": 0.30382371840341815, "grad_norm": 0.6990103268753816, "learning_rate": 3.6287459819610676e-06, "loss": 0.0297, "step": 72815 }, { "epoch": 0.30384458111840845, "grad_norm": 0.40582922135042343, "learning_rate": 3.628621398807131e-06, "loss": 0.0284, "step": 72820 }, { "epoch": 0.3038654438333987, "grad_norm": 0.511184584213057, "learning_rate": 3.6284968284839883e-06, "loss": 0.0261, "step": 72825 }, { "epoch": 0.303886306548389, "grad_norm": 0.918631692842961, "learning_rate": 3.628372270989436e-06, "loss": 0.0336, "step": 72830 }, { "epoch": 0.30390716926337924, "grad_norm": 0.8909915685924671, "learning_rate": 3.6282477263212728e-06, "loss": 0.0244, "step": 72835 }, { "epoch": 0.30392803197836954, "grad_norm": 0.5690365958551562, "learning_rate": 3.6281231944772977e-06, "loss": 0.0225, "step": 72840 }, { "epoch": 0.30394889469335984, "grad_norm": 0.45890860033971614, "learning_rate": 3.627998675455311e-06, "loss": 0.0263, "step": 72845 }, { "epoch": 0.3039697574083501, "grad_norm": 0.7859563448070933, "learning_rate": 3.6278741692531106e-06, "loss": 0.023, "step": 72850 }, { "epoch": 0.3039906201233404, "grad_norm": 0.7863211764731456, "learning_rate": 3.6277496758684988e-06, "loss": 0.0265, "step": 72855 }, { "epoch": 0.3040114828383306, "grad_norm": 1.22585641594362, "learning_rate": 3.6276251952992748e-06, "loss": 0.03, "step": 72860 }, { "epoch": 0.3040323455533209, "grad_norm": 1.1378700907661081, "learning_rate": 3.627500727543242e-06, "loss": 0.0313, "step": 72865 }, { "epoch": 0.3040532082683112, "grad_norm": 0.7246089673389335, "learning_rate": 3.6273762725982e-06, "loss": 0.0337, "step": 72870 }, { "epoch": 0.30407407098330147, "grad_norm": 0.5991093681464771, "learning_rate": 3.6272518304619535e-06, "loss": 0.0284, "step": 72875 }, { "epoch": 0.30409493369829177, "grad_norm": 0.6699177046978807, "learning_rate": 3.627127401132304e-06, "loss": 0.029, "step": 72880 }, { "epoch": 0.304115796413282, "grad_norm": 1.4020805547647948, "learning_rate": 3.6270029846070575e-06, "loss": 0.0438, "step": 72885 }, { "epoch": 0.3041366591282723, "grad_norm": 0.5963811038842585, "learning_rate": 3.626878580884015e-06, "loss": 0.0228, "step": 72890 }, { "epoch": 0.3041575218432626, "grad_norm": 0.7833699797050565, "learning_rate": 3.626754189960982e-06, "loss": 0.0306, "step": 72895 }, { "epoch": 0.30417838455825286, "grad_norm": 0.8835357870204525, "learning_rate": 3.6266298118357653e-06, "loss": 0.0305, "step": 72900 }, { "epoch": 0.30419924727324316, "grad_norm": 0.4765925328556605, "learning_rate": 3.6265054465061682e-06, "loss": 0.0265, "step": 72905 }, { "epoch": 0.30422010998823346, "grad_norm": 0.6402963132691875, "learning_rate": 3.6263810939699985e-06, "loss": 0.0255, "step": 72910 }, { "epoch": 0.3042409727032237, "grad_norm": 0.9580769596333398, "learning_rate": 3.6262567542250625e-06, "loss": 0.0274, "step": 72915 }, { "epoch": 0.304261835418214, "grad_norm": 0.9139999788815668, "learning_rate": 3.626132427269167e-06, "loss": 0.0233, "step": 72920 }, { "epoch": 0.30428269813320424, "grad_norm": 0.7620912306076344, "learning_rate": 3.6260081131001208e-06, "loss": 0.0281, "step": 72925 }, { "epoch": 0.30430356084819454, "grad_norm": 0.41811553115862965, "learning_rate": 3.6258838117157307e-06, "loss": 0.0218, "step": 72930 }, { "epoch": 0.30432442356318484, "grad_norm": 0.8440822752094455, "learning_rate": 3.6257595231138072e-06, "loss": 0.0199, "step": 72935 }, { "epoch": 0.3043452862781751, "grad_norm": 0.6436771219387873, "learning_rate": 3.625635247292159e-06, "loss": 0.0282, "step": 72940 }, { "epoch": 0.3043661489931654, "grad_norm": 0.7130851753638668, "learning_rate": 3.625510984248595e-06, "loss": 0.0291, "step": 72945 }, { "epoch": 0.30438701170815563, "grad_norm": 1.0125351866932935, "learning_rate": 3.6253867339809274e-06, "loss": 0.0306, "step": 72950 }, { "epoch": 0.30440787442314593, "grad_norm": 0.7174802525256958, "learning_rate": 3.6252624964869653e-06, "loss": 0.0306, "step": 72955 }, { "epoch": 0.30442873713813623, "grad_norm": 0.7265388000224399, "learning_rate": 3.6251382717645216e-06, "loss": 0.0292, "step": 72960 }, { "epoch": 0.3044495998531265, "grad_norm": 1.0413537021372863, "learning_rate": 3.6250140598114075e-06, "loss": 0.0355, "step": 72965 }, { "epoch": 0.3044704625681168, "grad_norm": 0.6456507124623896, "learning_rate": 3.624889860625436e-06, "loss": 0.0286, "step": 72970 }, { "epoch": 0.304491325283107, "grad_norm": 0.6290583656189165, "learning_rate": 3.6247656742044198e-06, "loss": 0.0277, "step": 72975 }, { "epoch": 0.3045121879980973, "grad_norm": 1.2867675549038138, "learning_rate": 3.624641500546172e-06, "loss": 0.0242, "step": 72980 }, { "epoch": 0.3045330507130876, "grad_norm": 0.9022049213931828, "learning_rate": 3.624517339648507e-06, "loss": 0.032, "step": 72985 }, { "epoch": 0.30455391342807786, "grad_norm": 0.7321378625036746, "learning_rate": 3.624393191509241e-06, "loss": 0.0335, "step": 72990 }, { "epoch": 0.30457477614306816, "grad_norm": 0.5891279081328796, "learning_rate": 3.624269056126186e-06, "loss": 0.0232, "step": 72995 }, { "epoch": 0.30459563885805846, "grad_norm": 0.5038812365308368, "learning_rate": 3.6241449334971605e-06, "loss": 0.0302, "step": 73000 }, { "epoch": 0.3046165015730487, "grad_norm": 1.119327505360522, "learning_rate": 3.6240208236199796e-06, "loss": 0.0334, "step": 73005 }, { "epoch": 0.304637364288039, "grad_norm": 0.7102339799507525, "learning_rate": 3.623896726492459e-06, "loss": 0.0355, "step": 73010 }, { "epoch": 0.30465822700302925, "grad_norm": 0.8955159076725239, "learning_rate": 3.6237726421124174e-06, "loss": 0.0361, "step": 73015 }, { "epoch": 0.30467908971801955, "grad_norm": 0.7785513223076042, "learning_rate": 3.623648570477672e-06, "loss": 0.0358, "step": 73020 }, { "epoch": 0.30469995243300985, "grad_norm": 0.9071716888145471, "learning_rate": 3.623524511586041e-06, "loss": 0.027, "step": 73025 }, { "epoch": 0.3047208151480001, "grad_norm": 0.6431809453679689, "learning_rate": 3.6234004654353434e-06, "loss": 0.0358, "step": 73030 }, { "epoch": 0.3047416778629904, "grad_norm": 1.0806839539956385, "learning_rate": 3.623276432023398e-06, "loss": 0.0313, "step": 73035 }, { "epoch": 0.30476254057798063, "grad_norm": 0.7820594403516994, "learning_rate": 3.623152411348025e-06, "loss": 0.0245, "step": 73040 }, { "epoch": 0.30478340329297093, "grad_norm": 0.9693753116770646, "learning_rate": 3.623028403407045e-06, "loss": 0.0408, "step": 73045 }, { "epoch": 0.30480426600796123, "grad_norm": 0.36884347958164815, "learning_rate": 3.622904408198279e-06, "loss": 0.0228, "step": 73050 }, { "epoch": 0.3048251287229515, "grad_norm": 0.7127803791482352, "learning_rate": 3.622780425719547e-06, "loss": 0.0223, "step": 73055 }, { "epoch": 0.3048459914379418, "grad_norm": 0.636242997373836, "learning_rate": 3.6226564559686726e-06, "loss": 0.0223, "step": 73060 }, { "epoch": 0.304866854152932, "grad_norm": 0.5128045185922343, "learning_rate": 3.622532498943478e-06, "loss": 0.0265, "step": 73065 }, { "epoch": 0.3048877168679223, "grad_norm": 0.511709510216603, "learning_rate": 3.622408554641785e-06, "loss": 0.0229, "step": 73070 }, { "epoch": 0.3049085795829126, "grad_norm": 1.1019500466072598, "learning_rate": 3.622284623061418e-06, "loss": 0.0283, "step": 73075 }, { "epoch": 0.30492944229790286, "grad_norm": 0.6997411896081653, "learning_rate": 3.622160704200201e-06, "loss": 0.0312, "step": 73080 }, { "epoch": 0.30495030501289316, "grad_norm": 0.8846761135641661, "learning_rate": 3.6220367980559585e-06, "loss": 0.0296, "step": 73085 }, { "epoch": 0.30497116772788346, "grad_norm": 0.7202897727438825, "learning_rate": 3.621912904626515e-06, "loss": 0.0227, "step": 73090 }, { "epoch": 0.3049920304428737, "grad_norm": 0.7693400500817699, "learning_rate": 3.621789023909697e-06, "loss": 0.0332, "step": 73095 }, { "epoch": 0.305012893157864, "grad_norm": 1.1145593759019663, "learning_rate": 3.62166515590333e-06, "loss": 0.0231, "step": 73100 }, { "epoch": 0.30503375587285425, "grad_norm": 1.3123587373557222, "learning_rate": 3.62154130060524e-06, "loss": 0.0285, "step": 73105 }, { "epoch": 0.30505461858784455, "grad_norm": 0.8746739872722796, "learning_rate": 3.6214174580132554e-06, "loss": 0.033, "step": 73110 }, { "epoch": 0.30507548130283485, "grad_norm": 1.5623677356827594, "learning_rate": 3.621293628125204e-06, "loss": 0.0372, "step": 73115 }, { "epoch": 0.3050963440178251, "grad_norm": 0.7354677648434722, "learning_rate": 3.6211698109389124e-06, "loss": 0.0273, "step": 73120 }, { "epoch": 0.3051172067328154, "grad_norm": 0.7592498064939737, "learning_rate": 3.6210460064522105e-06, "loss": 0.0233, "step": 73125 }, { "epoch": 0.30513806944780564, "grad_norm": 0.5949004331066586, "learning_rate": 3.6209222146629268e-06, "loss": 0.0252, "step": 73130 }, { "epoch": 0.30515893216279594, "grad_norm": 0.6329056631067557, "learning_rate": 3.620798435568892e-06, "loss": 0.0286, "step": 73135 }, { "epoch": 0.30517979487778624, "grad_norm": 0.6404559656099366, "learning_rate": 3.620674669167935e-06, "loss": 0.0265, "step": 73140 }, { "epoch": 0.3052006575927765, "grad_norm": 1.6459308773901913, "learning_rate": 3.620550915457888e-06, "loss": 0.023, "step": 73145 }, { "epoch": 0.3052215203077668, "grad_norm": 0.6517166572967045, "learning_rate": 3.6204271744365805e-06, "loss": 0.0321, "step": 73150 }, { "epoch": 0.305242383022757, "grad_norm": 0.8372322633419141, "learning_rate": 3.620303446101847e-06, "loss": 0.0381, "step": 73155 }, { "epoch": 0.3052632457377473, "grad_norm": 0.5182193517399666, "learning_rate": 3.620179730451517e-06, "loss": 0.026, "step": 73160 }, { "epoch": 0.3052841084527376, "grad_norm": 1.09603496051477, "learning_rate": 3.620056027483425e-06, "loss": 0.0321, "step": 73165 }, { "epoch": 0.30530497116772787, "grad_norm": 1.079691290270276, "learning_rate": 3.6199323371954043e-06, "loss": 0.0279, "step": 73170 }, { "epoch": 0.30532583388271817, "grad_norm": 0.5663897882503508, "learning_rate": 3.619808659585288e-06, "loss": 0.0291, "step": 73175 }, { "epoch": 0.30534669659770847, "grad_norm": 1.214875274364195, "learning_rate": 3.619684994650911e-06, "loss": 0.0255, "step": 73180 }, { "epoch": 0.3053675593126987, "grad_norm": 0.8873700629392257, "learning_rate": 3.6195613423901084e-06, "loss": 0.0391, "step": 73185 }, { "epoch": 0.305388422027689, "grad_norm": 0.6647905623037877, "learning_rate": 3.619437702800715e-06, "loss": 0.0301, "step": 73190 }, { "epoch": 0.30540928474267925, "grad_norm": 0.7968412996461848, "learning_rate": 3.619314075880567e-06, "loss": 0.0379, "step": 73195 }, { "epoch": 0.30543014745766955, "grad_norm": 1.1093975298609209, "learning_rate": 3.6191904616275015e-06, "loss": 0.0296, "step": 73200 }, { "epoch": 0.30545101017265985, "grad_norm": 1.069030282024403, "learning_rate": 3.619066860039355e-06, "loss": 0.0333, "step": 73205 }, { "epoch": 0.3054718728876501, "grad_norm": 0.4994697959104834, "learning_rate": 3.618943271113965e-06, "loss": 0.0224, "step": 73210 }, { "epoch": 0.3054927356026404, "grad_norm": 1.0181256471894866, "learning_rate": 3.618819694849169e-06, "loss": 0.0287, "step": 73215 }, { "epoch": 0.30551359831763064, "grad_norm": 0.5378785204851574, "learning_rate": 3.6186961312428065e-06, "loss": 0.0242, "step": 73220 }, { "epoch": 0.30553446103262094, "grad_norm": 0.6732689269855893, "learning_rate": 3.6185725802927156e-06, "loss": 0.0281, "step": 73225 }, { "epoch": 0.30555532374761124, "grad_norm": 1.0170343029362243, "learning_rate": 3.6184490419967366e-06, "loss": 0.0216, "step": 73230 }, { "epoch": 0.3055761864626015, "grad_norm": 0.7856787454406624, "learning_rate": 3.618325516352709e-06, "loss": 0.033, "step": 73235 }, { "epoch": 0.3055970491775918, "grad_norm": 0.5753474967831272, "learning_rate": 3.6182020033584746e-06, "loss": 0.0361, "step": 73240 }, { "epoch": 0.30561791189258203, "grad_norm": 1.1510238753906525, "learning_rate": 3.618078503011874e-06, "loss": 0.0274, "step": 73245 }, { "epoch": 0.3056387746075723, "grad_norm": 1.2689833716848542, "learning_rate": 3.6179550153107475e-06, "loss": 0.0337, "step": 73250 }, { "epoch": 0.3056596373225626, "grad_norm": 0.8970548766682177, "learning_rate": 3.617831540252939e-06, "loss": 0.0324, "step": 73255 }, { "epoch": 0.30568050003755287, "grad_norm": 0.7100926868151094, "learning_rate": 3.6177080778362893e-06, "loss": 0.0271, "step": 73260 }, { "epoch": 0.30570136275254317, "grad_norm": 0.8020163834273362, "learning_rate": 3.6175846280586435e-06, "loss": 0.0331, "step": 73265 }, { "epoch": 0.30572222546753347, "grad_norm": 3.706022118698965, "learning_rate": 3.6174611909178446e-06, "loss": 0.0392, "step": 73270 }, { "epoch": 0.3057430881825237, "grad_norm": 0.948974064734472, "learning_rate": 3.6173377664117364e-06, "loss": 0.0383, "step": 73275 }, { "epoch": 0.305763950897514, "grad_norm": 0.6340834893383259, "learning_rate": 3.6172143545381645e-06, "loss": 0.0245, "step": 73280 }, { "epoch": 0.30578481361250426, "grad_norm": 0.7175734559266295, "learning_rate": 3.617090955294973e-06, "loss": 0.0257, "step": 73285 }, { "epoch": 0.30580567632749456, "grad_norm": 0.7295016354493088, "learning_rate": 3.6169675686800086e-06, "loss": 0.0296, "step": 73290 }, { "epoch": 0.30582653904248486, "grad_norm": 0.7823589540392575, "learning_rate": 3.616844194691117e-06, "loss": 0.0225, "step": 73295 }, { "epoch": 0.3058474017574751, "grad_norm": 0.7960993070672621, "learning_rate": 3.616720833326145e-06, "loss": 0.0209, "step": 73300 }, { "epoch": 0.3058682644724654, "grad_norm": 1.0526722485522535, "learning_rate": 3.6165974845829404e-06, "loss": 0.0329, "step": 73305 }, { "epoch": 0.30588912718745565, "grad_norm": 1.6671361773662212, "learning_rate": 3.616474148459351e-06, "loss": 0.0333, "step": 73310 }, { "epoch": 0.30590998990244594, "grad_norm": 0.4665182987245898, "learning_rate": 3.6163508249532248e-06, "loss": 0.0289, "step": 73315 }, { "epoch": 0.30593085261743624, "grad_norm": 0.6967305236133763, "learning_rate": 3.6162275140624105e-06, "loss": 0.0346, "step": 73320 }, { "epoch": 0.3059517153324265, "grad_norm": 1.0706293307274253, "learning_rate": 3.6161042157847563e-06, "loss": 0.0323, "step": 73325 }, { "epoch": 0.3059725780474168, "grad_norm": 1.5393419027172075, "learning_rate": 3.615980930118115e-06, "loss": 0.0317, "step": 73330 }, { "epoch": 0.30599344076240703, "grad_norm": 0.8618752539842616, "learning_rate": 3.615857657060335e-06, "loss": 0.0233, "step": 73335 }, { "epoch": 0.30601430347739733, "grad_norm": 0.5246546928383355, "learning_rate": 3.615734396609267e-06, "loss": 0.0296, "step": 73340 }, { "epoch": 0.30603516619238763, "grad_norm": 0.5301708041776889, "learning_rate": 3.6156111487627637e-06, "loss": 0.0235, "step": 73345 }, { "epoch": 0.3060560289073779, "grad_norm": 0.7586263295250956, "learning_rate": 3.615487913518676e-06, "loss": 0.0305, "step": 73350 }, { "epoch": 0.3060768916223682, "grad_norm": 0.7265400593765917, "learning_rate": 3.615364690874857e-06, "loss": 0.0258, "step": 73355 }, { "epoch": 0.3060977543373585, "grad_norm": 0.9386582659464867, "learning_rate": 3.615241480829159e-06, "loss": 0.0365, "step": 73360 }, { "epoch": 0.3061186170523487, "grad_norm": 0.9619549282754859, "learning_rate": 3.615118283379435e-06, "loss": 0.035, "step": 73365 }, { "epoch": 0.306139479767339, "grad_norm": 0.7336663194355504, "learning_rate": 3.6149950985235412e-06, "loss": 0.0266, "step": 73370 }, { "epoch": 0.30616034248232926, "grad_norm": 1.0372510197200053, "learning_rate": 3.6148719262593284e-06, "loss": 0.0284, "step": 73375 }, { "epoch": 0.30618120519731956, "grad_norm": 0.40014079927738155, "learning_rate": 3.614748766584656e-06, "loss": 0.0322, "step": 73380 }, { "epoch": 0.30620206791230986, "grad_norm": 0.9085584714601239, "learning_rate": 3.6146256194973756e-06, "loss": 0.026, "step": 73385 }, { "epoch": 0.3062229306273001, "grad_norm": 0.8536733930742445, "learning_rate": 3.614502484995346e-06, "loss": 0.0305, "step": 73390 }, { "epoch": 0.3062437933422904, "grad_norm": 0.790902266031467, "learning_rate": 3.6143793630764217e-06, "loss": 0.0323, "step": 73395 }, { "epoch": 0.30626465605728065, "grad_norm": 1.040586369714415, "learning_rate": 3.6142562537384606e-06, "loss": 0.0367, "step": 73400 }, { "epoch": 0.30628551877227095, "grad_norm": 0.6925957143413674, "learning_rate": 3.6141331569793203e-06, "loss": 0.0354, "step": 73405 }, { "epoch": 0.30630638148726125, "grad_norm": 0.6079575995473819, "learning_rate": 3.6140100727968595e-06, "loss": 0.0307, "step": 73410 }, { "epoch": 0.3063272442022515, "grad_norm": 0.766881768418521, "learning_rate": 3.6138870011889354e-06, "loss": 0.0319, "step": 73415 }, { "epoch": 0.3063481069172418, "grad_norm": 0.6049913613053997, "learning_rate": 3.613763942153408e-06, "loss": 0.0255, "step": 73420 }, { "epoch": 0.30636896963223204, "grad_norm": 1.0451679969912153, "learning_rate": 3.613640895688137e-06, "loss": 0.0283, "step": 73425 }, { "epoch": 0.30638983234722234, "grad_norm": 0.2610744189771532, "learning_rate": 3.6135178617909815e-06, "loss": 0.0257, "step": 73430 }, { "epoch": 0.30641069506221263, "grad_norm": 0.8335150340472458, "learning_rate": 3.6133948404598033e-06, "loss": 0.0358, "step": 73435 }, { "epoch": 0.3064315577772029, "grad_norm": 0.5152806281527434, "learning_rate": 3.6132718316924623e-06, "loss": 0.0199, "step": 73440 }, { "epoch": 0.3064524204921932, "grad_norm": 0.869199574232581, "learning_rate": 3.613148835486821e-06, "loss": 0.0342, "step": 73445 }, { "epoch": 0.3064732832071834, "grad_norm": 0.5099361982884786, "learning_rate": 3.6130258518407423e-06, "loss": 0.0256, "step": 73450 }, { "epoch": 0.3064941459221737, "grad_norm": 0.5806838971802318, "learning_rate": 3.6129028807520865e-06, "loss": 0.0236, "step": 73455 }, { "epoch": 0.306515008637164, "grad_norm": 0.9316669581687593, "learning_rate": 3.6127799222187193e-06, "loss": 0.0389, "step": 73460 }, { "epoch": 0.30653587135215427, "grad_norm": 0.9231113812046513, "learning_rate": 3.6126569762385024e-06, "loss": 0.0265, "step": 73465 }, { "epoch": 0.30655673406714457, "grad_norm": 0.7088054595741684, "learning_rate": 3.6125340428093008e-06, "loss": 0.0263, "step": 73470 }, { "epoch": 0.30657759678213486, "grad_norm": 1.0131303296470513, "learning_rate": 3.612411121928979e-06, "loss": 0.0468, "step": 73475 }, { "epoch": 0.3065984594971251, "grad_norm": 0.9245896925297471, "learning_rate": 3.6122882135954036e-06, "loss": 0.0309, "step": 73480 }, { "epoch": 0.3066193222121154, "grad_norm": 0.6840466750455645, "learning_rate": 3.6121653178064382e-06, "loss": 0.0348, "step": 73485 }, { "epoch": 0.30664018492710565, "grad_norm": 0.800003414198285, "learning_rate": 3.6120424345599496e-06, "loss": 0.0328, "step": 73490 }, { "epoch": 0.30666104764209595, "grad_norm": 0.8540699348929531, "learning_rate": 3.611919563853805e-06, "loss": 0.0265, "step": 73495 }, { "epoch": 0.30668191035708625, "grad_norm": 1.001778421954645, "learning_rate": 3.611796705685871e-06, "loss": 0.0295, "step": 73500 }, { "epoch": 0.3067027730720765, "grad_norm": 0.7192116732266594, "learning_rate": 3.611673860054016e-06, "loss": 0.0344, "step": 73505 }, { "epoch": 0.3067236357870668, "grad_norm": 0.6872970325970162, "learning_rate": 3.6115510269561082e-06, "loss": 0.028, "step": 73510 }, { "epoch": 0.30674449850205704, "grad_norm": 1.4744986163879983, "learning_rate": 3.6114282063900158e-06, "loss": 0.0397, "step": 73515 }, { "epoch": 0.30676536121704734, "grad_norm": 0.6132188040078884, "learning_rate": 3.6113053983536082e-06, "loss": 0.0283, "step": 73520 }, { "epoch": 0.30678622393203764, "grad_norm": 0.6619224908001891, "learning_rate": 3.611182602844756e-06, "loss": 0.0262, "step": 73525 }, { "epoch": 0.3068070866470279, "grad_norm": 0.7933489035326408, "learning_rate": 3.6110598198613284e-06, "loss": 0.0289, "step": 73530 }, { "epoch": 0.3068279493620182, "grad_norm": 1.3379042891378814, "learning_rate": 3.6109370494011964e-06, "loss": 0.0312, "step": 73535 }, { "epoch": 0.3068488120770084, "grad_norm": 1.4344258273788564, "learning_rate": 3.610814291462232e-06, "loss": 0.0456, "step": 73540 }, { "epoch": 0.3068696747919987, "grad_norm": 0.5855954274751272, "learning_rate": 3.6106915460423055e-06, "loss": 0.037, "step": 73545 }, { "epoch": 0.306890537506989, "grad_norm": 0.6481767717362025, "learning_rate": 3.610568813139291e-06, "loss": 0.0302, "step": 73550 }, { "epoch": 0.30691140022197927, "grad_norm": 1.6611135311045782, "learning_rate": 3.610446092751059e-06, "loss": 0.0469, "step": 73555 }, { "epoch": 0.30693226293696957, "grad_norm": 0.49799405074230835, "learning_rate": 3.610323384875486e-06, "loss": 0.0196, "step": 73560 }, { "epoch": 0.30695312565195987, "grad_norm": 0.7487832937440871, "learning_rate": 3.6102006895104437e-06, "loss": 0.0306, "step": 73565 }, { "epoch": 0.3069739883669501, "grad_norm": 1.3841812447587605, "learning_rate": 3.610078006653806e-06, "loss": 0.0335, "step": 73570 }, { "epoch": 0.3069948510819404, "grad_norm": 0.5348371236014203, "learning_rate": 3.609955336303449e-06, "loss": 0.0209, "step": 73575 }, { "epoch": 0.30701571379693066, "grad_norm": 0.9231348106084666, "learning_rate": 3.609832678457247e-06, "loss": 0.0325, "step": 73580 }, { "epoch": 0.30703657651192096, "grad_norm": 0.5025130515518076, "learning_rate": 3.609710033113077e-06, "loss": 0.0354, "step": 73585 }, { "epoch": 0.30705743922691126, "grad_norm": 0.7715536492504027, "learning_rate": 3.609587400268814e-06, "loss": 0.0157, "step": 73590 }, { "epoch": 0.3070783019419015, "grad_norm": 0.805484298960009, "learning_rate": 3.6094647799223365e-06, "loss": 0.0343, "step": 73595 }, { "epoch": 0.3070991646568918, "grad_norm": 0.9046640438627855, "learning_rate": 3.60934217207152e-06, "loss": 0.0307, "step": 73600 }, { "epoch": 0.30712002737188204, "grad_norm": 0.7583756229888733, "learning_rate": 3.6092195767142436e-06, "loss": 0.0314, "step": 73605 }, { "epoch": 0.30714089008687234, "grad_norm": 0.5467318170746834, "learning_rate": 3.609096993848385e-06, "loss": 0.023, "step": 73610 }, { "epoch": 0.30716175280186264, "grad_norm": 0.6721308813434864, "learning_rate": 3.6089744234718234e-06, "loss": 0.0213, "step": 73615 }, { "epoch": 0.3071826155168529, "grad_norm": 0.7521397389977991, "learning_rate": 3.6088518655824386e-06, "loss": 0.0305, "step": 73620 }, { "epoch": 0.3072034782318432, "grad_norm": 0.5644195341598456, "learning_rate": 3.6087293201781093e-06, "loss": 0.0201, "step": 73625 }, { "epoch": 0.30722434094683343, "grad_norm": 0.6455991674475097, "learning_rate": 3.6086067872567177e-06, "loss": 0.0353, "step": 73630 }, { "epoch": 0.30724520366182373, "grad_norm": 0.42254927239766155, "learning_rate": 3.6084842668161422e-06, "loss": 0.0224, "step": 73635 }, { "epoch": 0.30726606637681403, "grad_norm": 0.5693430858425806, "learning_rate": 3.608361758854266e-06, "loss": 0.0278, "step": 73640 }, { "epoch": 0.3072869290918043, "grad_norm": 1.120499987164973, "learning_rate": 3.6082392633689708e-06, "loss": 0.0343, "step": 73645 }, { "epoch": 0.3073077918067946, "grad_norm": 0.9262603189691053, "learning_rate": 3.6081167803581384e-06, "loss": 0.0283, "step": 73650 }, { "epoch": 0.30732865452178487, "grad_norm": 1.0495560061975293, "learning_rate": 3.607994309819652e-06, "loss": 0.0317, "step": 73655 }, { "epoch": 0.3073495172367751, "grad_norm": 0.5513398228974773, "learning_rate": 3.6078718517513956e-06, "loss": 0.0273, "step": 73660 }, { "epoch": 0.3073703799517654, "grad_norm": 1.0287194963822883, "learning_rate": 3.6077494061512518e-06, "loss": 0.0298, "step": 73665 }, { "epoch": 0.30739124266675566, "grad_norm": 1.317582168472839, "learning_rate": 3.607626973017106e-06, "loss": 0.0411, "step": 73670 }, { "epoch": 0.30741210538174596, "grad_norm": 0.6231041087661275, "learning_rate": 3.607504552346843e-06, "loss": 0.0255, "step": 73675 }, { "epoch": 0.30743296809673626, "grad_norm": 1.0245900897042342, "learning_rate": 3.607382144138347e-06, "loss": 0.0299, "step": 73680 }, { "epoch": 0.3074538308117265, "grad_norm": 1.250651632132219, "learning_rate": 3.6072597483895063e-06, "loss": 0.0304, "step": 73685 }, { "epoch": 0.3074746935267168, "grad_norm": 0.7922581890436893, "learning_rate": 3.607137365098205e-06, "loss": 0.0268, "step": 73690 }, { "epoch": 0.30749555624170705, "grad_norm": 0.6013761783084918, "learning_rate": 3.6070149942623315e-06, "loss": 0.0236, "step": 73695 }, { "epoch": 0.30751641895669735, "grad_norm": 1.1214693950979144, "learning_rate": 3.606892635879772e-06, "loss": 0.027, "step": 73700 }, { "epoch": 0.30753728167168765, "grad_norm": 0.38432379500253966, "learning_rate": 3.606770289948415e-06, "loss": 0.0366, "step": 73705 }, { "epoch": 0.3075581443866779, "grad_norm": 1.3868328997670596, "learning_rate": 3.60664795646615e-06, "loss": 0.0422, "step": 73710 }, { "epoch": 0.3075790071016682, "grad_norm": 0.42564848794749316, "learning_rate": 3.6065256354308637e-06, "loss": 0.0376, "step": 73715 }, { "epoch": 0.30759986981665843, "grad_norm": 0.815963566895888, "learning_rate": 3.6064033268404476e-06, "loss": 0.0225, "step": 73720 }, { "epoch": 0.30762073253164873, "grad_norm": 0.9628596596976761, "learning_rate": 3.606281030692791e-06, "loss": 0.0386, "step": 73725 }, { "epoch": 0.30764159524663903, "grad_norm": 0.7920706608842342, "learning_rate": 3.606158746985783e-06, "loss": 0.0259, "step": 73730 }, { "epoch": 0.3076624579616293, "grad_norm": 0.8128815888733878, "learning_rate": 3.606036475717316e-06, "loss": 0.023, "step": 73735 }, { "epoch": 0.3076833206766196, "grad_norm": 1.37475889121615, "learning_rate": 3.6059142168852807e-06, "loss": 0.0275, "step": 73740 }, { "epoch": 0.3077041833916099, "grad_norm": 0.8235353999295811, "learning_rate": 3.60579197048757e-06, "loss": 0.0278, "step": 73745 }, { "epoch": 0.3077250461066001, "grad_norm": 0.6659656516638509, "learning_rate": 3.6056697365220755e-06, "loss": 0.0258, "step": 73750 }, { "epoch": 0.3077459088215904, "grad_norm": 1.2357265910491375, "learning_rate": 3.6055475149866903e-06, "loss": 0.0319, "step": 73755 }, { "epoch": 0.30776677153658066, "grad_norm": 0.9964021458856571, "learning_rate": 3.6054253058793075e-06, "loss": 0.0293, "step": 73760 }, { "epoch": 0.30778763425157096, "grad_norm": 0.8287094961548872, "learning_rate": 3.6053031091978213e-06, "loss": 0.0248, "step": 73765 }, { "epoch": 0.30780849696656126, "grad_norm": 0.7876334535949043, "learning_rate": 3.6051809249401257e-06, "loss": 0.0292, "step": 73770 }, { "epoch": 0.3078293596815515, "grad_norm": 0.8008456168830517, "learning_rate": 3.605058753104117e-06, "loss": 0.0251, "step": 73775 }, { "epoch": 0.3078502223965418, "grad_norm": 0.5156582941514399, "learning_rate": 3.6049365936876896e-06, "loss": 0.0347, "step": 73780 }, { "epoch": 0.30787108511153205, "grad_norm": 0.4699545649233853, "learning_rate": 3.6048144466887393e-06, "loss": 0.0254, "step": 73785 }, { "epoch": 0.30789194782652235, "grad_norm": 0.7272137323990401, "learning_rate": 3.6046923121051626e-06, "loss": 0.0267, "step": 73790 }, { "epoch": 0.30791281054151265, "grad_norm": 0.7904527133156065, "learning_rate": 3.6045701899348566e-06, "loss": 0.0328, "step": 73795 }, { "epoch": 0.3079336732565029, "grad_norm": 0.7879540793385817, "learning_rate": 3.6044480801757188e-06, "loss": 0.0363, "step": 73800 }, { "epoch": 0.3079545359714932, "grad_norm": 1.1868286923710094, "learning_rate": 3.6043259828256466e-06, "loss": 0.0313, "step": 73805 }, { "epoch": 0.30797539868648344, "grad_norm": 0.4764510273402237, "learning_rate": 3.604203897882539e-06, "loss": 0.0268, "step": 73810 }, { "epoch": 0.30799626140147374, "grad_norm": 0.749261044001231, "learning_rate": 3.604081825344295e-06, "loss": 0.0394, "step": 73815 }, { "epoch": 0.30801712411646404, "grad_norm": 0.9594641409542677, "learning_rate": 3.6039597652088133e-06, "loss": 0.0249, "step": 73820 }, { "epoch": 0.3080379868314543, "grad_norm": 1.5659542534828426, "learning_rate": 3.603837717473994e-06, "loss": 0.04, "step": 73825 }, { "epoch": 0.3080588495464446, "grad_norm": 1.450043042418401, "learning_rate": 3.6037156821377384e-06, "loss": 0.035, "step": 73830 }, { "epoch": 0.3080797122614349, "grad_norm": 0.4684026160658666, "learning_rate": 3.6035936591979466e-06, "loss": 0.0248, "step": 73835 }, { "epoch": 0.3081005749764251, "grad_norm": 0.9892268545634847, "learning_rate": 3.6034716486525194e-06, "loss": 0.0343, "step": 73840 }, { "epoch": 0.3081214376914154, "grad_norm": 1.1248526736601434, "learning_rate": 3.6033496504993604e-06, "loss": 0.0395, "step": 73845 }, { "epoch": 0.30814230040640567, "grad_norm": 0.8913176548026465, "learning_rate": 3.6032276647363703e-06, "loss": 0.0247, "step": 73850 }, { "epoch": 0.30816316312139597, "grad_norm": 0.6176017957880209, "learning_rate": 3.603105691361453e-06, "loss": 0.0281, "step": 73855 }, { "epoch": 0.30818402583638627, "grad_norm": 0.5601913858529781, "learning_rate": 3.6029837303725117e-06, "loss": 0.0312, "step": 73860 }, { "epoch": 0.3082048885513765, "grad_norm": 0.5994012740967037, "learning_rate": 3.6028617817674504e-06, "loss": 0.0301, "step": 73865 }, { "epoch": 0.3082257512663668, "grad_norm": 0.7942452904900729, "learning_rate": 3.602739845544173e-06, "loss": 0.0227, "step": 73870 }, { "epoch": 0.30824661398135705, "grad_norm": 0.9505767404947862, "learning_rate": 3.6026179217005845e-06, "loss": 0.0238, "step": 73875 }, { "epoch": 0.30826747669634735, "grad_norm": 0.9465967689459809, "learning_rate": 3.602496010234591e-06, "loss": 0.0338, "step": 73880 }, { "epoch": 0.30828833941133765, "grad_norm": 0.6298225401880295, "learning_rate": 3.602374111144097e-06, "loss": 0.0263, "step": 73885 }, { "epoch": 0.3083092021263279, "grad_norm": 1.5658300199853963, "learning_rate": 3.60225222442701e-06, "loss": 0.0289, "step": 73890 }, { "epoch": 0.3083300648413182, "grad_norm": 0.7035096733155236, "learning_rate": 3.6021303500812364e-06, "loss": 0.0233, "step": 73895 }, { "epoch": 0.30835092755630844, "grad_norm": 1.0153408898445486, "learning_rate": 3.6020084881046847e-06, "loss": 0.03, "step": 73900 }, { "epoch": 0.30837179027129874, "grad_norm": 0.8129802993107708, "learning_rate": 3.6018866384952604e-06, "loss": 0.0321, "step": 73905 }, { "epoch": 0.30839265298628904, "grad_norm": 0.8411341992305726, "learning_rate": 3.6017648012508743e-06, "loss": 0.028, "step": 73910 }, { "epoch": 0.3084135157012793, "grad_norm": 0.6247935507113236, "learning_rate": 3.6016429763694334e-06, "loss": 0.0226, "step": 73915 }, { "epoch": 0.3084343784162696, "grad_norm": 0.8096363290852773, "learning_rate": 3.6015211638488477e-06, "loss": 0.0304, "step": 73920 }, { "epoch": 0.3084552411312599, "grad_norm": 1.1743050454653838, "learning_rate": 3.6013993636870283e-06, "loss": 0.0373, "step": 73925 }, { "epoch": 0.3084761038462501, "grad_norm": 0.43506657833671486, "learning_rate": 3.601277575881883e-06, "loss": 0.0243, "step": 73930 }, { "epoch": 0.3084969665612404, "grad_norm": 1.2116361085468725, "learning_rate": 3.6011558004313245e-06, "loss": 0.0333, "step": 73935 }, { "epoch": 0.30851782927623067, "grad_norm": 0.83237048727312, "learning_rate": 3.6010340373332635e-06, "loss": 0.0321, "step": 73940 }, { "epoch": 0.30853869199122097, "grad_norm": 0.9636988721904837, "learning_rate": 3.6009122865856123e-06, "loss": 0.0305, "step": 73945 }, { "epoch": 0.30855955470621127, "grad_norm": 0.9399514849165184, "learning_rate": 3.600790548186283e-06, "loss": 0.0303, "step": 73950 }, { "epoch": 0.3085804174212015, "grad_norm": 1.5679175527203297, "learning_rate": 3.6006688221331878e-06, "loss": 0.0246, "step": 73955 }, { "epoch": 0.3086012801361918, "grad_norm": 0.6593790052583617, "learning_rate": 3.60054710842424e-06, "loss": 0.0459, "step": 73960 }, { "epoch": 0.30862214285118206, "grad_norm": 1.136751046502901, "learning_rate": 3.600425407057354e-06, "loss": 0.0391, "step": 73965 }, { "epoch": 0.30864300556617236, "grad_norm": 0.8818598869040343, "learning_rate": 3.600303718030444e-06, "loss": 0.038, "step": 73970 }, { "epoch": 0.30866386828116266, "grad_norm": 0.5911405808272814, "learning_rate": 3.6001820413414244e-06, "loss": 0.023, "step": 73975 }, { "epoch": 0.3086847309961529, "grad_norm": 0.8917150451702501, "learning_rate": 3.6000603769882116e-06, "loss": 0.0328, "step": 73980 }, { "epoch": 0.3087055937111432, "grad_norm": 0.8999956510368742, "learning_rate": 3.59993872496872e-06, "loss": 0.027, "step": 73985 }, { "epoch": 0.30872645642613344, "grad_norm": 0.7798561353744838, "learning_rate": 3.5998170852808667e-06, "loss": 0.0344, "step": 73990 }, { "epoch": 0.30874731914112374, "grad_norm": 0.790142208443627, "learning_rate": 3.599695457922568e-06, "loss": 0.0255, "step": 73995 }, { "epoch": 0.30876818185611404, "grad_norm": 0.49003530013287533, "learning_rate": 3.59957384289174e-06, "loss": 0.0307, "step": 74000 }, { "epoch": 0.3087890445711043, "grad_norm": 0.9955141220212843, "learning_rate": 3.5994522401863036e-06, "loss": 0.028, "step": 74005 }, { "epoch": 0.3088099072860946, "grad_norm": 0.9622509294961529, "learning_rate": 3.5993306498041736e-06, "loss": 0.033, "step": 74010 }, { "epoch": 0.3088307700010849, "grad_norm": 0.7000034102143358, "learning_rate": 3.5992090717432716e-06, "loss": 0.0276, "step": 74015 }, { "epoch": 0.30885163271607513, "grad_norm": 1.4414951505530376, "learning_rate": 3.5990875060015147e-06, "loss": 0.0274, "step": 74020 }, { "epoch": 0.30887249543106543, "grad_norm": 0.9307653301335429, "learning_rate": 3.5989659525768234e-06, "loss": 0.0321, "step": 74025 }, { "epoch": 0.3088933581460557, "grad_norm": 1.17371989961199, "learning_rate": 3.598844411467118e-06, "loss": 0.0372, "step": 74030 }, { "epoch": 0.308914220861046, "grad_norm": 0.6449067155451296, "learning_rate": 3.598722882670318e-06, "loss": 0.0273, "step": 74035 }, { "epoch": 0.3089350835760363, "grad_norm": 0.6813869243062785, "learning_rate": 3.598601366184347e-06, "loss": 0.0264, "step": 74040 }, { "epoch": 0.3089559462910265, "grad_norm": 0.6658587035645758, "learning_rate": 3.5984798620071248e-06, "loss": 0.0261, "step": 74045 }, { "epoch": 0.3089768090060168, "grad_norm": 0.5332902453004572, "learning_rate": 3.5983583701365736e-06, "loss": 0.0323, "step": 74050 }, { "epoch": 0.30899767172100706, "grad_norm": 1.3977911711710995, "learning_rate": 3.5982368905706168e-06, "loss": 0.0395, "step": 74055 }, { "epoch": 0.30901853443599736, "grad_norm": 0.7870400737312502, "learning_rate": 3.5981154233071774e-06, "loss": 0.0194, "step": 74060 }, { "epoch": 0.30903939715098766, "grad_norm": 1.1048772024609874, "learning_rate": 3.5979939683441785e-06, "loss": 0.0286, "step": 74065 }, { "epoch": 0.3090602598659779, "grad_norm": 0.9477359822098376, "learning_rate": 3.597872525679545e-06, "loss": 0.0346, "step": 74070 }, { "epoch": 0.3090811225809682, "grad_norm": 0.9615990693208609, "learning_rate": 3.597751095311201e-06, "loss": 0.0324, "step": 74075 }, { "epoch": 0.30910198529595845, "grad_norm": 0.5650294239116714, "learning_rate": 3.597629677237072e-06, "loss": 0.0259, "step": 74080 }, { "epoch": 0.30912284801094875, "grad_norm": 0.5384102565830401, "learning_rate": 3.5975082714550823e-06, "loss": 0.0234, "step": 74085 }, { "epoch": 0.30914371072593905, "grad_norm": 0.7735326033905493, "learning_rate": 3.5973868779631597e-06, "loss": 0.0286, "step": 74090 }, { "epoch": 0.3091645734409293, "grad_norm": 0.9436530489379485, "learning_rate": 3.5972654967592296e-06, "loss": 0.026, "step": 74095 }, { "epoch": 0.3091854361559196, "grad_norm": 0.779435274985855, "learning_rate": 3.59714412784122e-06, "loss": 0.0299, "step": 74100 }, { "epoch": 0.3092062988709099, "grad_norm": 0.7292271033275965, "learning_rate": 3.597022771207058e-06, "loss": 0.0247, "step": 74105 }, { "epoch": 0.30922716158590013, "grad_norm": 0.7353603671845929, "learning_rate": 3.5969014268546716e-06, "loss": 0.0256, "step": 74110 }, { "epoch": 0.30924802430089043, "grad_norm": 0.6759628963204567, "learning_rate": 3.5967800947819885e-06, "loss": 0.0333, "step": 74115 }, { "epoch": 0.3092688870158807, "grad_norm": 0.6868002500927866, "learning_rate": 3.59665877498694e-06, "loss": 0.031, "step": 74120 }, { "epoch": 0.309289749730871, "grad_norm": 0.5774100557667908, "learning_rate": 3.596537467467453e-06, "loss": 0.0289, "step": 74125 }, { "epoch": 0.3093106124458613, "grad_norm": 0.6487235407226906, "learning_rate": 3.596416172221459e-06, "loss": 0.0199, "step": 74130 }, { "epoch": 0.3093314751608515, "grad_norm": 1.0526965323463573, "learning_rate": 3.5962948892468887e-06, "loss": 0.0385, "step": 74135 }, { "epoch": 0.3093523378758418, "grad_norm": 0.8500913511309335, "learning_rate": 3.596173618541672e-06, "loss": 0.0252, "step": 74140 }, { "epoch": 0.30937320059083206, "grad_norm": 0.4989592823322899, "learning_rate": 3.5960523601037405e-06, "loss": 0.0298, "step": 74145 }, { "epoch": 0.30939406330582236, "grad_norm": 0.7150665732773517, "learning_rate": 3.595931113931027e-06, "loss": 0.0399, "step": 74150 }, { "epoch": 0.30941492602081266, "grad_norm": 0.5100523869790009, "learning_rate": 3.595809880021464e-06, "loss": 0.0282, "step": 74155 }, { "epoch": 0.3094357887358029, "grad_norm": 0.6985534322147565, "learning_rate": 3.595688658372983e-06, "loss": 0.0254, "step": 74160 }, { "epoch": 0.3094566514507932, "grad_norm": 0.7650464405738205, "learning_rate": 3.595567448983519e-06, "loss": 0.0262, "step": 74165 }, { "epoch": 0.30947751416578345, "grad_norm": 0.685544595488337, "learning_rate": 3.595446251851005e-06, "loss": 0.0218, "step": 74170 }, { "epoch": 0.30949837688077375, "grad_norm": 0.8423981079970588, "learning_rate": 3.5953250669733757e-06, "loss": 0.0339, "step": 74175 }, { "epoch": 0.30951923959576405, "grad_norm": 1.0582588640125365, "learning_rate": 3.5952038943485658e-06, "loss": 0.0395, "step": 74180 }, { "epoch": 0.3095401023107543, "grad_norm": 0.814789960555564, "learning_rate": 3.595082733974511e-06, "loss": 0.0355, "step": 74185 }, { "epoch": 0.3095609650257446, "grad_norm": 0.7709296166848096, "learning_rate": 3.594961585849147e-06, "loss": 0.0294, "step": 74190 }, { "epoch": 0.3095818277407349, "grad_norm": 0.8701307280281393, "learning_rate": 3.59484044997041e-06, "loss": 0.0323, "step": 74195 }, { "epoch": 0.30960269045572514, "grad_norm": 1.538168022339345, "learning_rate": 3.5947193263362366e-06, "loss": 0.0272, "step": 74200 }, { "epoch": 0.30962355317071544, "grad_norm": 0.691777105356085, "learning_rate": 3.5945982149445645e-06, "loss": 0.0238, "step": 74205 }, { "epoch": 0.3096444158857057, "grad_norm": 0.8550156388455133, "learning_rate": 3.5944771157933316e-06, "loss": 0.033, "step": 74210 }, { "epoch": 0.309665278600696, "grad_norm": 0.3638798617815569, "learning_rate": 3.594356028880476e-06, "loss": 0.027, "step": 74215 }, { "epoch": 0.3096861413156863, "grad_norm": 0.8992691305824644, "learning_rate": 3.594234954203936e-06, "loss": 0.0255, "step": 74220 }, { "epoch": 0.3097070040306765, "grad_norm": 0.5911626778587729, "learning_rate": 3.5941138917616517e-06, "loss": 0.0301, "step": 74225 }, { "epoch": 0.3097278667456668, "grad_norm": 1.3714039852071742, "learning_rate": 3.593992841551563e-06, "loss": 0.038, "step": 74230 }, { "epoch": 0.30974872946065707, "grad_norm": 0.4828347557913419, "learning_rate": 3.593871803571609e-06, "loss": 0.03, "step": 74235 }, { "epoch": 0.30976959217564737, "grad_norm": 0.6372562194898136, "learning_rate": 3.5937507778197304e-06, "loss": 0.0258, "step": 74240 }, { "epoch": 0.30979045489063767, "grad_norm": 0.7834443768816025, "learning_rate": 3.59362976429387e-06, "loss": 0.0218, "step": 74245 }, { "epoch": 0.3098113176056279, "grad_norm": 0.7816623236174903, "learning_rate": 3.593508762991968e-06, "loss": 0.0348, "step": 74250 }, { "epoch": 0.3098321803206182, "grad_norm": 0.9252494901062531, "learning_rate": 3.5933877739119675e-06, "loss": 0.0288, "step": 74255 }, { "epoch": 0.30985304303560846, "grad_norm": 0.6494667543400559, "learning_rate": 3.59326679705181e-06, "loss": 0.049, "step": 74260 }, { "epoch": 0.30987390575059875, "grad_norm": 0.8058942965237992, "learning_rate": 3.5931458324094402e-06, "loss": 0.0268, "step": 74265 }, { "epoch": 0.30989476846558905, "grad_norm": 0.5220174463103892, "learning_rate": 3.5930248799828004e-06, "loss": 0.0341, "step": 74270 }, { "epoch": 0.3099156311805793, "grad_norm": 0.7081825593276418, "learning_rate": 3.5929039397698356e-06, "loss": 0.0323, "step": 74275 }, { "epoch": 0.3099364938955696, "grad_norm": 0.7811061438003147, "learning_rate": 3.5927830117684897e-06, "loss": 0.036, "step": 74280 }, { "epoch": 0.3099573566105599, "grad_norm": 0.7977253790826008, "learning_rate": 3.5926620959767084e-06, "loss": 0.038, "step": 74285 }, { "epoch": 0.30997821932555014, "grad_norm": 0.6208374763826072, "learning_rate": 3.5925411923924365e-06, "loss": 0.0317, "step": 74290 }, { "epoch": 0.30999908204054044, "grad_norm": 0.7353213879283492, "learning_rate": 3.5924203010136207e-06, "loss": 0.0357, "step": 74295 }, { "epoch": 0.3100199447555307, "grad_norm": 0.8238657519456359, "learning_rate": 3.5922994218382067e-06, "loss": 0.0338, "step": 74300 }, { "epoch": 0.310040807470521, "grad_norm": 0.7082332365179157, "learning_rate": 3.592178554864143e-06, "loss": 0.0362, "step": 74305 }, { "epoch": 0.3100616701855113, "grad_norm": 0.7193309927002691, "learning_rate": 3.5920577000893757e-06, "loss": 0.0256, "step": 74310 }, { "epoch": 0.31008253290050153, "grad_norm": 0.8337618049204498, "learning_rate": 3.5919368575118536e-06, "loss": 0.0289, "step": 74315 }, { "epoch": 0.31010339561549183, "grad_norm": 0.4609453470958483, "learning_rate": 3.5918160271295248e-06, "loss": 0.0284, "step": 74320 }, { "epoch": 0.31012425833048207, "grad_norm": 0.8999730250887086, "learning_rate": 3.5916952089403385e-06, "loss": 0.0273, "step": 74325 }, { "epoch": 0.31014512104547237, "grad_norm": 0.5742209382429949, "learning_rate": 3.5915744029422434e-06, "loss": 0.0293, "step": 74330 }, { "epoch": 0.31016598376046267, "grad_norm": 0.5897952268106047, "learning_rate": 3.5914536091331904e-06, "loss": 0.0331, "step": 74335 }, { "epoch": 0.3101868464754529, "grad_norm": 0.8102085906247871, "learning_rate": 3.5913328275111293e-06, "loss": 0.0388, "step": 74340 }, { "epoch": 0.3102077091904432, "grad_norm": 0.4916733020901051, "learning_rate": 3.591212058074011e-06, "loss": 0.0266, "step": 74345 }, { "epoch": 0.31022857190543346, "grad_norm": 0.842791423713764, "learning_rate": 3.5910913008197873e-06, "loss": 0.0297, "step": 74350 }, { "epoch": 0.31024943462042376, "grad_norm": 0.6897194678901264, "learning_rate": 3.5909705557464093e-06, "loss": 0.0377, "step": 74355 }, { "epoch": 0.31027029733541406, "grad_norm": 0.6500314650126445, "learning_rate": 3.59084982285183e-06, "loss": 0.0314, "step": 74360 }, { "epoch": 0.3102911600504043, "grad_norm": 0.7026511668331662, "learning_rate": 3.590729102134002e-06, "loss": 0.0364, "step": 74365 }, { "epoch": 0.3103120227653946, "grad_norm": 0.45426396454721674, "learning_rate": 3.5906083935908786e-06, "loss": 0.0265, "step": 74370 }, { "epoch": 0.3103328854803849, "grad_norm": 0.8119017272547728, "learning_rate": 3.5904876972204136e-06, "loss": 0.0298, "step": 74375 }, { "epoch": 0.31035374819537515, "grad_norm": 0.3389593440150548, "learning_rate": 3.5903670130205605e-06, "loss": 0.0204, "step": 74380 }, { "epoch": 0.31037461091036544, "grad_norm": 0.8190735052231475, "learning_rate": 3.5902463409892755e-06, "loss": 0.0246, "step": 74385 }, { "epoch": 0.3103954736253557, "grad_norm": 1.0215529084549566, "learning_rate": 3.5901256811245126e-06, "loss": 0.0312, "step": 74390 }, { "epoch": 0.310416336340346, "grad_norm": 0.6744764229408783, "learning_rate": 3.5900050334242283e-06, "loss": 0.0238, "step": 74395 }, { "epoch": 0.3104371990553363, "grad_norm": 0.7897749671901744, "learning_rate": 3.589884397886378e-06, "loss": 0.0309, "step": 74400 }, { "epoch": 0.31045806177032653, "grad_norm": 0.6490483635279308, "learning_rate": 3.5897637745089186e-06, "loss": 0.0356, "step": 74405 }, { "epoch": 0.31047892448531683, "grad_norm": 0.607513190369229, "learning_rate": 3.5896431632898075e-06, "loss": 0.0317, "step": 74410 }, { "epoch": 0.3104997872003071, "grad_norm": 1.056286571917625, "learning_rate": 3.5895225642270025e-06, "loss": 0.0328, "step": 74415 }, { "epoch": 0.3105206499152974, "grad_norm": 0.7016903712094925, "learning_rate": 3.5894019773184613e-06, "loss": 0.0237, "step": 74420 }, { "epoch": 0.3105415126302877, "grad_norm": 0.6451124046225273, "learning_rate": 3.5892814025621425e-06, "loss": 0.0217, "step": 74425 }, { "epoch": 0.3105623753452779, "grad_norm": 2.3715500876741147, "learning_rate": 3.5891608399560057e-06, "loss": 0.0261, "step": 74430 }, { "epoch": 0.3105832380602682, "grad_norm": 1.196985443101757, "learning_rate": 3.589040289498009e-06, "loss": 0.0356, "step": 74435 }, { "epoch": 0.31060410077525846, "grad_norm": 0.6537248576768572, "learning_rate": 3.5889197511861133e-06, "loss": 0.0246, "step": 74440 }, { "epoch": 0.31062496349024876, "grad_norm": 0.8596733173480048, "learning_rate": 3.58879922501828e-06, "loss": 0.0323, "step": 74445 }, { "epoch": 0.31064582620523906, "grad_norm": 0.5003195431328947, "learning_rate": 3.588678710992469e-06, "loss": 0.0258, "step": 74450 }, { "epoch": 0.3106666889202293, "grad_norm": 0.7638974947009559, "learning_rate": 3.5885582091066425e-06, "loss": 0.0279, "step": 74455 }, { "epoch": 0.3106875516352196, "grad_norm": 0.7435768846584957, "learning_rate": 3.588437719358761e-06, "loss": 0.0183, "step": 74460 }, { "epoch": 0.3107084143502099, "grad_norm": 0.6897515524045892, "learning_rate": 3.588317241746788e-06, "loss": 0.0341, "step": 74465 }, { "epoch": 0.31072927706520015, "grad_norm": 0.6884717586437665, "learning_rate": 3.588196776268687e-06, "loss": 0.028, "step": 74470 }, { "epoch": 0.31075013978019045, "grad_norm": 0.7177021495158727, "learning_rate": 3.5880763229224196e-06, "loss": 0.0297, "step": 74475 }, { "epoch": 0.3107710024951807, "grad_norm": 0.9055341477947784, "learning_rate": 3.5879558817059513e-06, "loss": 0.0344, "step": 74480 }, { "epoch": 0.310791865210171, "grad_norm": 0.44364296534518183, "learning_rate": 3.5878354526172452e-06, "loss": 0.0303, "step": 74485 }, { "epoch": 0.3108127279251613, "grad_norm": 1.0537945425009538, "learning_rate": 3.5877150356542673e-06, "loss": 0.0258, "step": 74490 }, { "epoch": 0.31083359064015154, "grad_norm": 0.43881745244846165, "learning_rate": 3.5875946308149825e-06, "loss": 0.0283, "step": 74495 }, { "epoch": 0.31085445335514184, "grad_norm": 0.6339933420599227, "learning_rate": 3.587474238097356e-06, "loss": 0.0281, "step": 74500 }, { "epoch": 0.3108753160701321, "grad_norm": 0.7790807596515034, "learning_rate": 3.5873538574993543e-06, "loss": 0.027, "step": 74505 }, { "epoch": 0.3108961787851224, "grad_norm": 0.6806960991967831, "learning_rate": 3.5872334890189434e-06, "loss": 0.0177, "step": 74510 }, { "epoch": 0.3109170415001127, "grad_norm": 0.5682617404542843, "learning_rate": 3.587113132654093e-06, "loss": 0.0211, "step": 74515 }, { "epoch": 0.3109379042151029, "grad_norm": 1.3859172931628254, "learning_rate": 3.586992788402768e-06, "loss": 0.0342, "step": 74520 }, { "epoch": 0.3109587669300932, "grad_norm": 1.1922636481656856, "learning_rate": 3.586872456262938e-06, "loss": 0.0357, "step": 74525 }, { "epoch": 0.31097962964508347, "grad_norm": 1.4827116417907331, "learning_rate": 3.586752136232571e-06, "loss": 0.0387, "step": 74530 }, { "epoch": 0.31100049236007377, "grad_norm": 0.8699979515006632, "learning_rate": 3.5866318283096362e-06, "loss": 0.0292, "step": 74535 }, { "epoch": 0.31102135507506407, "grad_norm": 1.2858584640910267, "learning_rate": 3.5865115324921035e-06, "loss": 0.0347, "step": 74540 }, { "epoch": 0.3110422177900543, "grad_norm": 0.3813057664799528, "learning_rate": 3.5863912487779427e-06, "loss": 0.031, "step": 74545 }, { "epoch": 0.3110630805050446, "grad_norm": 0.6655992279991388, "learning_rate": 3.5862709771651248e-06, "loss": 0.0287, "step": 74550 }, { "epoch": 0.3110839432200349, "grad_norm": 0.9145233602308357, "learning_rate": 3.5861507176516202e-06, "loss": 0.032, "step": 74555 }, { "epoch": 0.31110480593502515, "grad_norm": 0.4701929766604658, "learning_rate": 3.5860304702354005e-06, "loss": 0.0269, "step": 74560 }, { "epoch": 0.31112566865001545, "grad_norm": 0.5238898686570012, "learning_rate": 3.585910234914438e-06, "loss": 0.0285, "step": 74565 }, { "epoch": 0.3111465313650057, "grad_norm": 0.49907014719873216, "learning_rate": 3.585790011686705e-06, "loss": 0.0285, "step": 74570 }, { "epoch": 0.311167394079996, "grad_norm": 0.980201452635774, "learning_rate": 3.5856698005501737e-06, "loss": 0.0314, "step": 74575 }, { "epoch": 0.3111882567949863, "grad_norm": 0.5629973688667502, "learning_rate": 3.5855496015028183e-06, "loss": 0.0257, "step": 74580 }, { "epoch": 0.31120911950997654, "grad_norm": 0.9481720795258973, "learning_rate": 3.585429414542613e-06, "loss": 0.0383, "step": 74585 }, { "epoch": 0.31122998222496684, "grad_norm": 0.43889259779078954, "learning_rate": 3.5853092396675314e-06, "loss": 0.0221, "step": 74590 }, { "epoch": 0.3112508449399571, "grad_norm": 0.7757876545965968, "learning_rate": 3.585189076875548e-06, "loss": 0.0288, "step": 74595 }, { "epoch": 0.3112717076549474, "grad_norm": 0.9838589837874094, "learning_rate": 3.5850689261646394e-06, "loss": 0.0304, "step": 74600 }, { "epoch": 0.3112925703699377, "grad_norm": 0.5638987996431164, "learning_rate": 3.5849487875327806e-06, "loss": 0.0278, "step": 74605 }, { "epoch": 0.3113134330849279, "grad_norm": 0.9941592793719936, "learning_rate": 3.5848286609779476e-06, "loss": 0.0315, "step": 74610 }, { "epoch": 0.3113342957999182, "grad_norm": 0.46182283848479233, "learning_rate": 3.584708546498117e-06, "loss": 0.0322, "step": 74615 }, { "epoch": 0.31135515851490847, "grad_norm": 0.3720418324981827, "learning_rate": 3.5845884440912665e-06, "loss": 0.0202, "step": 74620 }, { "epoch": 0.31137602122989877, "grad_norm": 0.6785604764786591, "learning_rate": 3.584468353755374e-06, "loss": 0.0249, "step": 74625 }, { "epoch": 0.31139688394488907, "grad_norm": 0.8269809299326764, "learning_rate": 3.5843482754884166e-06, "loss": 0.0292, "step": 74630 }, { "epoch": 0.3114177466598793, "grad_norm": 0.628778196248518, "learning_rate": 3.584228209288374e-06, "loss": 0.0229, "step": 74635 }, { "epoch": 0.3114386093748696, "grad_norm": 0.576909735736913, "learning_rate": 3.5841081551532246e-06, "loss": 0.029, "step": 74640 }, { "epoch": 0.3114594720898599, "grad_norm": 1.0097985401126661, "learning_rate": 3.583988113080949e-06, "loss": 0.0323, "step": 74645 }, { "epoch": 0.31148033480485016, "grad_norm": 1.1019981903018097, "learning_rate": 3.5838680830695254e-06, "loss": 0.035, "step": 74650 }, { "epoch": 0.31150119751984046, "grad_norm": 0.8846121840217612, "learning_rate": 3.5837480651169353e-06, "loss": 0.0216, "step": 74655 }, { "epoch": 0.3115220602348307, "grad_norm": 0.8341191631945553, "learning_rate": 3.583628059221161e-06, "loss": 0.0306, "step": 74660 }, { "epoch": 0.311542922949821, "grad_norm": 0.8971597485160736, "learning_rate": 3.5835080653801814e-06, "loss": 0.031, "step": 74665 }, { "epoch": 0.3115637856648113, "grad_norm": 0.4807732570053224, "learning_rate": 3.58338808359198e-06, "loss": 0.034, "step": 74670 }, { "epoch": 0.31158464837980154, "grad_norm": 0.6107039236901205, "learning_rate": 3.5832681138545393e-06, "loss": 0.025, "step": 74675 }, { "epoch": 0.31160551109479184, "grad_norm": 1.1708483554362472, "learning_rate": 3.583148156165842e-06, "loss": 0.0413, "step": 74680 }, { "epoch": 0.3116263738097821, "grad_norm": 1.1500803887669062, "learning_rate": 3.5830282105238707e-06, "loss": 0.0326, "step": 74685 }, { "epoch": 0.3116472365247724, "grad_norm": 0.8972485666275261, "learning_rate": 3.5829082769266096e-06, "loss": 0.028, "step": 74690 }, { "epoch": 0.3116680992397627, "grad_norm": 0.9768964071725752, "learning_rate": 3.582788355372043e-06, "loss": 0.0303, "step": 74695 }, { "epoch": 0.31168896195475293, "grad_norm": 1.263344756985826, "learning_rate": 3.582668445858157e-06, "loss": 0.0357, "step": 74700 }, { "epoch": 0.31170982466974323, "grad_norm": 0.9751352661373857, "learning_rate": 3.582548548382934e-06, "loss": 0.0313, "step": 74705 }, { "epoch": 0.3117306873847335, "grad_norm": 1.1139621103519193, "learning_rate": 3.5824286629443627e-06, "loss": 0.0357, "step": 74710 }, { "epoch": 0.3117515500997238, "grad_norm": 1.0933331582077115, "learning_rate": 3.582308789540427e-06, "loss": 0.0291, "step": 74715 }, { "epoch": 0.3117724128147141, "grad_norm": 0.723197180312047, "learning_rate": 3.5821889281691146e-06, "loss": 0.0345, "step": 74720 }, { "epoch": 0.3117932755297043, "grad_norm": 0.7820408271587653, "learning_rate": 3.582069078828413e-06, "loss": 0.0248, "step": 74725 }, { "epoch": 0.3118141382446946, "grad_norm": 1.2399036747098364, "learning_rate": 3.581949241516309e-06, "loss": 0.0201, "step": 74730 }, { "epoch": 0.3118350009596849, "grad_norm": 0.5837095065826468, "learning_rate": 3.5818294162307908e-06, "loss": 0.0325, "step": 74735 }, { "epoch": 0.31185586367467516, "grad_norm": 0.6953395651655342, "learning_rate": 3.581709602969848e-06, "loss": 0.035, "step": 74740 }, { "epoch": 0.31187672638966546, "grad_norm": 0.5253096354982808, "learning_rate": 3.5815898017314675e-06, "loss": 0.0316, "step": 74745 }, { "epoch": 0.3118975891046557, "grad_norm": 0.853447795189156, "learning_rate": 3.5814700125136408e-06, "loss": 0.028, "step": 74750 }, { "epoch": 0.311918451819646, "grad_norm": 2.67450369599558, "learning_rate": 3.5813502353143565e-06, "loss": 0.0334, "step": 74755 }, { "epoch": 0.3119393145346363, "grad_norm": 0.8926790496866727, "learning_rate": 3.581230470131606e-06, "loss": 0.0321, "step": 74760 }, { "epoch": 0.31196017724962655, "grad_norm": 0.8846729188849086, "learning_rate": 3.581110716963379e-06, "loss": 0.0258, "step": 74765 }, { "epoch": 0.31198103996461685, "grad_norm": 0.6939275991086403, "learning_rate": 3.5809909758076684e-06, "loss": 0.0257, "step": 74770 }, { "epoch": 0.3120019026796071, "grad_norm": 2.3870835392227674, "learning_rate": 3.5808712466624644e-06, "loss": 0.0346, "step": 74775 }, { "epoch": 0.3120227653945974, "grad_norm": 1.028673204474997, "learning_rate": 3.5807515295257605e-06, "loss": 0.0251, "step": 74780 }, { "epoch": 0.3120436281095877, "grad_norm": 0.7474124907953321, "learning_rate": 3.5806318243955495e-06, "loss": 0.0294, "step": 74785 }, { "epoch": 0.31206449082457793, "grad_norm": 0.6816024567206848, "learning_rate": 3.580512131269824e-06, "loss": 0.0265, "step": 74790 }, { "epoch": 0.31208535353956823, "grad_norm": 2.712208574267605, "learning_rate": 3.5803924501465773e-06, "loss": 0.0332, "step": 74795 }, { "epoch": 0.3121062162545585, "grad_norm": 0.7226104369406454, "learning_rate": 3.5802727810238047e-06, "loss": 0.0288, "step": 74800 }, { "epoch": 0.3121270789695488, "grad_norm": 1.0342827723947108, "learning_rate": 3.5801531238995006e-06, "loss": 0.0383, "step": 74805 }, { "epoch": 0.3121479416845391, "grad_norm": 1.1478331033967948, "learning_rate": 3.5800334787716594e-06, "loss": 0.0258, "step": 74810 }, { "epoch": 0.3121688043995293, "grad_norm": 0.9804914067057025, "learning_rate": 3.579913845638277e-06, "loss": 0.0278, "step": 74815 }, { "epoch": 0.3121896671145196, "grad_norm": 0.3765913508096299, "learning_rate": 3.57979422449735e-06, "loss": 0.0313, "step": 74820 }, { "epoch": 0.3122105298295099, "grad_norm": 0.6454724408379985, "learning_rate": 3.579674615346875e-06, "loss": 0.0313, "step": 74825 }, { "epoch": 0.31223139254450016, "grad_norm": 0.8245861245301079, "learning_rate": 3.5795550181848477e-06, "loss": 0.0226, "step": 74830 }, { "epoch": 0.31225225525949046, "grad_norm": 1.0160843877339438, "learning_rate": 3.5794354330092666e-06, "loss": 0.0219, "step": 74835 }, { "epoch": 0.3122731179744807, "grad_norm": 1.175943288762462, "learning_rate": 3.5793158598181294e-06, "loss": 0.0248, "step": 74840 }, { "epoch": 0.312293980689471, "grad_norm": 0.7104698152811589, "learning_rate": 3.5791962986094354e-06, "loss": 0.0296, "step": 74845 }, { "epoch": 0.3123148434044613, "grad_norm": 0.7831781906812141, "learning_rate": 3.5790767493811813e-06, "loss": 0.0301, "step": 74850 }, { "epoch": 0.31233570611945155, "grad_norm": 0.8985340845093371, "learning_rate": 3.578957212131368e-06, "loss": 0.0241, "step": 74855 }, { "epoch": 0.31235656883444185, "grad_norm": 1.1458636767032284, "learning_rate": 3.5788376868579953e-06, "loss": 0.0352, "step": 74860 }, { "epoch": 0.3123774315494321, "grad_norm": 0.8898655453654158, "learning_rate": 3.5787181735590632e-06, "loss": 0.0433, "step": 74865 }, { "epoch": 0.3123982942644224, "grad_norm": 0.976385848155349, "learning_rate": 3.578598672232572e-06, "loss": 0.0429, "step": 74870 }, { "epoch": 0.3124191569794127, "grad_norm": 0.42083671244181015, "learning_rate": 3.5784791828765235e-06, "loss": 0.02, "step": 74875 }, { "epoch": 0.31244001969440294, "grad_norm": 0.7961423971661065, "learning_rate": 3.5783597054889194e-06, "loss": 0.0273, "step": 74880 }, { "epoch": 0.31246088240939324, "grad_norm": 0.8044344576136706, "learning_rate": 3.578240240067762e-06, "loss": 0.0242, "step": 74885 }, { "epoch": 0.3124817451243835, "grad_norm": 0.8209138878146494, "learning_rate": 3.578120786611053e-06, "loss": 0.0307, "step": 74890 }, { "epoch": 0.3125026078393738, "grad_norm": 0.7416179613771712, "learning_rate": 3.5780013451167957e-06, "loss": 0.0276, "step": 74895 }, { "epoch": 0.3125234705543641, "grad_norm": 1.2833407407372908, "learning_rate": 3.5778819155829935e-06, "loss": 0.038, "step": 74900 }, { "epoch": 0.3125443332693543, "grad_norm": 1.4985942498518192, "learning_rate": 3.577762498007652e-06, "loss": 0.0349, "step": 74905 }, { "epoch": 0.3125651959843446, "grad_norm": 0.7373398500991551, "learning_rate": 3.577643092388774e-06, "loss": 0.03, "step": 74910 }, { "epoch": 0.3125860586993349, "grad_norm": 0.8361746144977416, "learning_rate": 3.5775236987243646e-06, "loss": 0.031, "step": 74915 }, { "epoch": 0.31260692141432517, "grad_norm": 1.0326303989437284, "learning_rate": 3.57740431701243e-06, "loss": 0.0275, "step": 74920 }, { "epoch": 0.31262778412931547, "grad_norm": 0.7613003812431777, "learning_rate": 3.5772849472509756e-06, "loss": 0.0388, "step": 74925 }, { "epoch": 0.3126486468443057, "grad_norm": 1.3994885861855184, "learning_rate": 3.5771655894380073e-06, "loss": 0.0265, "step": 74930 }, { "epoch": 0.312669509559296, "grad_norm": 0.6838963855613932, "learning_rate": 3.577046243571533e-06, "loss": 0.0298, "step": 74935 }, { "epoch": 0.3126903722742863, "grad_norm": 0.9702791824942941, "learning_rate": 3.576926909649559e-06, "loss": 0.0341, "step": 74940 }, { "epoch": 0.31271123498927655, "grad_norm": 1.2161411528222796, "learning_rate": 3.576807587670093e-06, "loss": 0.0318, "step": 74945 }, { "epoch": 0.31273209770426685, "grad_norm": 0.6817147727352073, "learning_rate": 3.5766882776311434e-06, "loss": 0.0263, "step": 74950 }, { "epoch": 0.3127529604192571, "grad_norm": 0.8169122023975145, "learning_rate": 3.576568979530719e-06, "loss": 0.0392, "step": 74955 }, { "epoch": 0.3127738231342474, "grad_norm": 0.7780288859948333, "learning_rate": 3.5764496933668296e-06, "loss": 0.0278, "step": 74960 }, { "epoch": 0.3127946858492377, "grad_norm": 0.822634573844372, "learning_rate": 3.576330419137483e-06, "loss": 0.0254, "step": 74965 }, { "epoch": 0.31281554856422794, "grad_norm": 0.6488475726609118, "learning_rate": 3.576211156840692e-06, "loss": 0.03, "step": 74970 }, { "epoch": 0.31283641127921824, "grad_norm": 0.7391171617213843, "learning_rate": 3.5760919064744643e-06, "loss": 0.0293, "step": 74975 }, { "epoch": 0.3128572739942085, "grad_norm": 0.7305626060590549, "learning_rate": 3.5759726680368117e-06, "loss": 0.0282, "step": 74980 }, { "epoch": 0.3128781367091988, "grad_norm": 0.9613119181302341, "learning_rate": 3.575853441525747e-06, "loss": 0.0303, "step": 74985 }, { "epoch": 0.3128989994241891, "grad_norm": 0.6860938264489737, "learning_rate": 3.57573422693928e-06, "loss": 0.0301, "step": 74990 }, { "epoch": 0.31291986213917933, "grad_norm": 0.8737175177213419, "learning_rate": 3.575615024275425e-06, "loss": 0.0347, "step": 74995 }, { "epoch": 0.3129407248541696, "grad_norm": 0.6564771579480331, "learning_rate": 3.5754958335321937e-06, "loss": 0.03, "step": 75000 }, { "epoch": 0.3129615875691599, "grad_norm": 0.6064728410005309, "learning_rate": 3.5753766547076002e-06, "loss": 0.0326, "step": 75005 }, { "epoch": 0.31298245028415017, "grad_norm": 1.0833426942863833, "learning_rate": 3.575257487799657e-06, "loss": 0.0388, "step": 75010 }, { "epoch": 0.31300331299914047, "grad_norm": 0.4525247799312022, "learning_rate": 3.575138332806379e-06, "loss": 0.0211, "step": 75015 }, { "epoch": 0.3130241757141307, "grad_norm": 0.7176168477420211, "learning_rate": 3.575019189725782e-06, "loss": 0.0292, "step": 75020 }, { "epoch": 0.313045038429121, "grad_norm": 0.5830972107343214, "learning_rate": 3.5749000585558796e-06, "loss": 0.0296, "step": 75025 }, { "epoch": 0.3130659011441113, "grad_norm": 1.51924026954401, "learning_rate": 3.5747809392946875e-06, "loss": 0.0334, "step": 75030 }, { "epoch": 0.31308676385910156, "grad_norm": 0.7911117964261897, "learning_rate": 3.574661831940223e-06, "loss": 0.0298, "step": 75035 }, { "epoch": 0.31310762657409186, "grad_norm": 0.8804758381562665, "learning_rate": 3.574542736490502e-06, "loss": 0.0253, "step": 75040 }, { "epoch": 0.3131284892890821, "grad_norm": 1.0823169232568732, "learning_rate": 3.5744236529435403e-06, "loss": 0.0342, "step": 75045 }, { "epoch": 0.3131493520040724, "grad_norm": 0.6816558543684258, "learning_rate": 3.5743045812973575e-06, "loss": 0.0356, "step": 75050 }, { "epoch": 0.3131702147190627, "grad_norm": 0.8878564342484879, "learning_rate": 3.57418552154997e-06, "loss": 0.0341, "step": 75055 }, { "epoch": 0.31319107743405294, "grad_norm": 0.7660835572567858, "learning_rate": 3.5740664736993973e-06, "loss": 0.0259, "step": 75060 }, { "epoch": 0.31321194014904324, "grad_norm": 0.9393033798541485, "learning_rate": 3.5739474377436575e-06, "loss": 0.0316, "step": 75065 }, { "epoch": 0.3132328028640335, "grad_norm": 0.8225157479808538, "learning_rate": 3.5738284136807693e-06, "loss": 0.0384, "step": 75070 }, { "epoch": 0.3132536655790238, "grad_norm": 0.9569958050020488, "learning_rate": 3.5737094015087537e-06, "loss": 0.0358, "step": 75075 }, { "epoch": 0.3132745282940141, "grad_norm": 0.5205984229892643, "learning_rate": 3.5735904012256307e-06, "loss": 0.0345, "step": 75080 }, { "epoch": 0.31329539100900433, "grad_norm": 0.9083272183815055, "learning_rate": 3.5734714128294212e-06, "loss": 0.0264, "step": 75085 }, { "epoch": 0.31331625372399463, "grad_norm": 0.4721745840548626, "learning_rate": 3.573352436318145e-06, "loss": 0.0309, "step": 75090 }, { "epoch": 0.31333711643898493, "grad_norm": 0.6829505811090107, "learning_rate": 3.573233471689825e-06, "loss": 0.0222, "step": 75095 }, { "epoch": 0.3133579791539752, "grad_norm": 0.886756201794494, "learning_rate": 3.573114518942483e-06, "loss": 0.0303, "step": 75100 }, { "epoch": 0.3133788418689655, "grad_norm": 0.8546407410631761, "learning_rate": 3.5729955780741414e-06, "loss": 0.0339, "step": 75105 }, { "epoch": 0.3133997045839557, "grad_norm": 0.757247555425654, "learning_rate": 3.5728766490828238e-06, "loss": 0.0295, "step": 75110 }, { "epoch": 0.313420567298946, "grad_norm": 1.189582814913934, "learning_rate": 3.5727577319665523e-06, "loss": 0.0274, "step": 75115 }, { "epoch": 0.3134414300139363, "grad_norm": 0.6251443565885607, "learning_rate": 3.572638826723353e-06, "loss": 0.0328, "step": 75120 }, { "epoch": 0.31346229272892656, "grad_norm": 1.118714544646641, "learning_rate": 3.572519933351248e-06, "loss": 0.0391, "step": 75125 }, { "epoch": 0.31348315544391686, "grad_norm": 0.9139216001460806, "learning_rate": 3.5724010518482633e-06, "loss": 0.0305, "step": 75130 }, { "epoch": 0.3135040181589071, "grad_norm": 0.9133940832760972, "learning_rate": 3.5722821822124245e-06, "loss": 0.029, "step": 75135 }, { "epoch": 0.3135248808738974, "grad_norm": 0.8206451671664307, "learning_rate": 3.5721633244417563e-06, "loss": 0.036, "step": 75140 }, { "epoch": 0.3135457435888877, "grad_norm": 1.0395671757067138, "learning_rate": 3.5720444785342856e-06, "loss": 0.0332, "step": 75145 }, { "epoch": 0.31356660630387795, "grad_norm": 0.48921052676668825, "learning_rate": 3.571925644488039e-06, "loss": 0.0194, "step": 75150 }, { "epoch": 0.31358746901886825, "grad_norm": 0.8342409031559468, "learning_rate": 3.571806822301044e-06, "loss": 0.0273, "step": 75155 }, { "epoch": 0.3136083317338585, "grad_norm": 0.7813681035016177, "learning_rate": 3.571688011971328e-06, "loss": 0.0324, "step": 75160 }, { "epoch": 0.3136291944488488, "grad_norm": 0.7681349783778543, "learning_rate": 3.571569213496919e-06, "loss": 0.0246, "step": 75165 }, { "epoch": 0.3136500571638391, "grad_norm": 0.8039742600125567, "learning_rate": 3.5714504268758456e-06, "loss": 0.0261, "step": 75170 }, { "epoch": 0.31367091987882934, "grad_norm": 1.24908510583035, "learning_rate": 3.5713316521061362e-06, "loss": 0.0395, "step": 75175 }, { "epoch": 0.31369178259381963, "grad_norm": 0.6241017105978376, "learning_rate": 3.571212889185821e-06, "loss": 0.0253, "step": 75180 }, { "epoch": 0.31371264530880993, "grad_norm": 1.2359731277913715, "learning_rate": 3.5710941381129295e-06, "loss": 0.0305, "step": 75185 }, { "epoch": 0.3137335080238002, "grad_norm": 0.2844198603068583, "learning_rate": 3.570975398885492e-06, "loss": 0.0209, "step": 75190 }, { "epoch": 0.3137543707387905, "grad_norm": 0.843956864700928, "learning_rate": 3.5708566715015397e-06, "loss": 0.0288, "step": 75195 }, { "epoch": 0.3137752334537807, "grad_norm": 0.7777882963861183, "learning_rate": 3.5707379559591036e-06, "loss": 0.0345, "step": 75200 }, { "epoch": 0.313796096168771, "grad_norm": 0.3292090605522796, "learning_rate": 3.570619252256216e-06, "loss": 0.028, "step": 75205 }, { "epoch": 0.3138169588837613, "grad_norm": 0.6564510280407116, "learning_rate": 3.570500560390908e-06, "loss": 0.0212, "step": 75210 }, { "epoch": 0.31383782159875157, "grad_norm": 0.3199233261295082, "learning_rate": 3.5703818803612135e-06, "loss": 0.0239, "step": 75215 }, { "epoch": 0.31385868431374186, "grad_norm": 0.5434692582595277, "learning_rate": 3.5702632121651637e-06, "loss": 0.0206, "step": 75220 }, { "epoch": 0.3138795470287321, "grad_norm": 0.4256903105666395, "learning_rate": 3.570144555800794e-06, "loss": 0.0307, "step": 75225 }, { "epoch": 0.3139004097437224, "grad_norm": 1.214757965328915, "learning_rate": 3.5700259112661384e-06, "loss": 0.0372, "step": 75230 }, { "epoch": 0.3139212724587127, "grad_norm": 0.7148205568564869, "learning_rate": 3.5699072785592305e-06, "loss": 0.0289, "step": 75235 }, { "epoch": 0.31394213517370295, "grad_norm": 1.1368590062959334, "learning_rate": 3.5697886576781053e-06, "loss": 0.0314, "step": 75240 }, { "epoch": 0.31396299788869325, "grad_norm": 0.5498035602294254, "learning_rate": 3.569670048620799e-06, "loss": 0.0277, "step": 75245 }, { "epoch": 0.3139838606036835, "grad_norm": 1.3086040191674089, "learning_rate": 3.5695514513853465e-06, "loss": 0.0301, "step": 75250 }, { "epoch": 0.3140047233186738, "grad_norm": 0.5922134792160872, "learning_rate": 3.5694328659697843e-06, "loss": 0.017, "step": 75255 }, { "epoch": 0.3140255860336641, "grad_norm": 1.7025181313796107, "learning_rate": 3.5693142923721495e-06, "loss": 0.0317, "step": 75260 }, { "epoch": 0.31404644874865434, "grad_norm": 0.48392648600682364, "learning_rate": 3.5691957305904803e-06, "loss": 0.0343, "step": 75265 }, { "epoch": 0.31406731146364464, "grad_norm": 1.3894667853097344, "learning_rate": 3.569077180622812e-06, "loss": 0.0426, "step": 75270 }, { "epoch": 0.31408817417863494, "grad_norm": 1.600014944163297, "learning_rate": 3.5689586424671846e-06, "loss": 0.0294, "step": 75275 }, { "epoch": 0.3141090368936252, "grad_norm": 0.49028230466113865, "learning_rate": 3.568840116121635e-06, "loss": 0.0333, "step": 75280 }, { "epoch": 0.3141298996086155, "grad_norm": 0.7973236823701257, "learning_rate": 3.5687216015842047e-06, "loss": 0.0254, "step": 75285 }, { "epoch": 0.3141507623236057, "grad_norm": 0.8001349097137193, "learning_rate": 3.5686030988529313e-06, "loss": 0.0348, "step": 75290 }, { "epoch": 0.314171625038596, "grad_norm": 1.3104538585238636, "learning_rate": 3.5684846079258557e-06, "loss": 0.021, "step": 75295 }, { "epoch": 0.3141924877535863, "grad_norm": 0.7763996113790022, "learning_rate": 3.5683661288010174e-06, "loss": 0.0186, "step": 75300 }, { "epoch": 0.31421335046857657, "grad_norm": 1.3263668346967836, "learning_rate": 3.568247661476458e-06, "loss": 0.0279, "step": 75305 }, { "epoch": 0.31423421318356687, "grad_norm": 1.1197516759719786, "learning_rate": 3.568129205950219e-06, "loss": 0.0315, "step": 75310 }, { "epoch": 0.3142550758985571, "grad_norm": 0.7278647620413645, "learning_rate": 3.5680107622203415e-06, "loss": 0.0285, "step": 75315 }, { "epoch": 0.3142759386135474, "grad_norm": 0.3782035567798241, "learning_rate": 3.567892330284868e-06, "loss": 0.0276, "step": 75320 }, { "epoch": 0.3142968013285377, "grad_norm": 1.1509003661743338, "learning_rate": 3.567773910141841e-06, "loss": 0.0371, "step": 75325 }, { "epoch": 0.31431766404352796, "grad_norm": 0.7756006278016584, "learning_rate": 3.5676555017893043e-06, "loss": 0.0216, "step": 75330 }, { "epoch": 0.31433852675851826, "grad_norm": 0.4795503193104469, "learning_rate": 3.567537105225301e-06, "loss": 0.022, "step": 75335 }, { "epoch": 0.3143593894735085, "grad_norm": 0.3780108820960219, "learning_rate": 3.567418720447875e-06, "loss": 0.0191, "step": 75340 }, { "epoch": 0.3143802521884988, "grad_norm": 1.0593649965138678, "learning_rate": 3.5673003474550712e-06, "loss": 0.0237, "step": 75345 }, { "epoch": 0.3144011149034891, "grad_norm": 0.5450762141622013, "learning_rate": 3.5671819862449336e-06, "loss": 0.0276, "step": 75350 }, { "epoch": 0.31442197761847934, "grad_norm": 1.2707729625961808, "learning_rate": 3.5670636368155093e-06, "loss": 0.0223, "step": 75355 }, { "epoch": 0.31444284033346964, "grad_norm": 0.6359764021622841, "learning_rate": 3.5669452991648434e-06, "loss": 0.0363, "step": 75360 }, { "epoch": 0.31446370304845994, "grad_norm": 1.3476827643697815, "learning_rate": 3.5668269732909823e-06, "loss": 0.0344, "step": 75365 }, { "epoch": 0.3144845657634502, "grad_norm": 0.8092278173939497, "learning_rate": 3.5667086591919714e-06, "loss": 0.0378, "step": 75370 }, { "epoch": 0.3145054284784405, "grad_norm": 0.6194913809324288, "learning_rate": 3.56659035686586e-06, "loss": 0.0239, "step": 75375 }, { "epoch": 0.31452629119343073, "grad_norm": 1.112037957034357, "learning_rate": 3.5664720663106944e-06, "loss": 0.0277, "step": 75380 }, { "epoch": 0.31454715390842103, "grad_norm": 1.0899238800257394, "learning_rate": 3.5663537875245235e-06, "loss": 0.0338, "step": 75385 }, { "epoch": 0.31456801662341133, "grad_norm": 0.698677591648902, "learning_rate": 3.5662355205053957e-06, "loss": 0.0242, "step": 75390 }, { "epoch": 0.3145888793384016, "grad_norm": 0.5696921730653535, "learning_rate": 3.56611726525136e-06, "loss": 0.0283, "step": 75395 }, { "epoch": 0.31460974205339187, "grad_norm": 1.007120956319927, "learning_rate": 3.5659990217604657e-06, "loss": 0.0351, "step": 75400 }, { "epoch": 0.3146306047683821, "grad_norm": 1.058967833956322, "learning_rate": 3.5658807900307626e-06, "loss": 0.0282, "step": 75405 }, { "epoch": 0.3146514674833724, "grad_norm": 0.3805180102999267, "learning_rate": 3.5657625700603017e-06, "loss": 0.0275, "step": 75410 }, { "epoch": 0.3146723301983627, "grad_norm": 1.1008696061096024, "learning_rate": 3.565644361847134e-06, "loss": 0.0378, "step": 75415 }, { "epoch": 0.31469319291335296, "grad_norm": 0.6986086223710961, "learning_rate": 3.565526165389309e-06, "loss": 0.0245, "step": 75420 }, { "epoch": 0.31471405562834326, "grad_norm": 1.0112049182552403, "learning_rate": 3.565407980684881e-06, "loss": 0.0243, "step": 75425 }, { "epoch": 0.3147349183433335, "grad_norm": 0.4925569945143071, "learning_rate": 3.565289807731901e-06, "loss": 0.0379, "step": 75430 }, { "epoch": 0.3147557810583238, "grad_norm": 0.6630204500636661, "learning_rate": 3.5651716465284215e-06, "loss": 0.0236, "step": 75435 }, { "epoch": 0.3147766437733141, "grad_norm": 0.9845076795201707, "learning_rate": 3.5650534970724947e-06, "loss": 0.0266, "step": 75440 }, { "epoch": 0.31479750648830435, "grad_norm": 0.8611278801511464, "learning_rate": 3.5649353593621765e-06, "loss": 0.0278, "step": 75445 }, { "epoch": 0.31481836920329465, "grad_norm": 1.042853715936608, "learning_rate": 3.5648172333955194e-06, "loss": 0.0311, "step": 75450 }, { "epoch": 0.31483923191828495, "grad_norm": 0.579107861356161, "learning_rate": 3.5646991191705783e-06, "loss": 0.0309, "step": 75455 }, { "epoch": 0.3148600946332752, "grad_norm": 0.44700914333256336, "learning_rate": 3.564581016685408e-06, "loss": 0.025, "step": 75460 }, { "epoch": 0.3148809573482655, "grad_norm": 0.8714361885793008, "learning_rate": 3.5644629259380636e-06, "loss": 0.0343, "step": 75465 }, { "epoch": 0.31490182006325573, "grad_norm": 1.0287702008283115, "learning_rate": 3.5643448469266013e-06, "loss": 0.0271, "step": 75470 }, { "epoch": 0.31492268277824603, "grad_norm": 0.4806500297940384, "learning_rate": 3.5642267796490775e-06, "loss": 0.0229, "step": 75475 }, { "epoch": 0.31494354549323633, "grad_norm": 1.1092389771396545, "learning_rate": 3.564108724103548e-06, "loss": 0.0327, "step": 75480 }, { "epoch": 0.3149644082082266, "grad_norm": 1.0425221347570461, "learning_rate": 3.563990680288071e-06, "loss": 0.0282, "step": 75485 }, { "epoch": 0.3149852709232169, "grad_norm": 0.6709519979835882, "learning_rate": 3.5638726482007034e-06, "loss": 0.0364, "step": 75490 }, { "epoch": 0.3150061336382071, "grad_norm": 1.0387041545267817, "learning_rate": 3.563754627839504e-06, "loss": 0.0355, "step": 75495 }, { "epoch": 0.3150269963531974, "grad_norm": 0.8744311301229304, "learning_rate": 3.5636366192025307e-06, "loss": 0.0341, "step": 75500 }, { "epoch": 0.3150478590681877, "grad_norm": 0.815680104707386, "learning_rate": 3.5635186222878432e-06, "loss": 0.029, "step": 75505 }, { "epoch": 0.31506872178317796, "grad_norm": 0.679326639606841, "learning_rate": 3.5634006370935e-06, "loss": 0.0286, "step": 75510 }, { "epoch": 0.31508958449816826, "grad_norm": 0.43668106389753775, "learning_rate": 3.5632826636175617e-06, "loss": 0.0221, "step": 75515 }, { "epoch": 0.3151104472131585, "grad_norm": 0.8560757861602519, "learning_rate": 3.5631647018580874e-06, "loss": 0.0323, "step": 75520 }, { "epoch": 0.3151313099281488, "grad_norm": 1.1592138746934146, "learning_rate": 3.5630467518131397e-06, "loss": 0.0305, "step": 75525 }, { "epoch": 0.3151521726431391, "grad_norm": 0.9069090385790851, "learning_rate": 3.562928813480779e-06, "loss": 0.0317, "step": 75530 }, { "epoch": 0.31517303535812935, "grad_norm": 0.8112767922230463, "learning_rate": 3.5628108868590666e-06, "loss": 0.0243, "step": 75535 }, { "epoch": 0.31519389807311965, "grad_norm": 0.5378045675930955, "learning_rate": 3.5626929719460653e-06, "loss": 0.0223, "step": 75540 }, { "epoch": 0.31521476078810995, "grad_norm": 0.851293190976821, "learning_rate": 3.562575068739837e-06, "loss": 0.0326, "step": 75545 }, { "epoch": 0.3152356235031002, "grad_norm": 0.7314208116530979, "learning_rate": 3.5624571772384446e-06, "loss": 0.0353, "step": 75550 }, { "epoch": 0.3152564862180905, "grad_norm": 1.0560876766231633, "learning_rate": 3.562339297439952e-06, "loss": 0.0315, "step": 75555 }, { "epoch": 0.31527734893308074, "grad_norm": 1.1536670963511644, "learning_rate": 3.562221429342424e-06, "loss": 0.035, "step": 75560 }, { "epoch": 0.31529821164807104, "grad_norm": 1.0635721642494051, "learning_rate": 3.5621035729439233e-06, "loss": 0.0277, "step": 75565 }, { "epoch": 0.31531907436306134, "grad_norm": 0.6809541819182924, "learning_rate": 3.561985728242516e-06, "loss": 0.0357, "step": 75570 }, { "epoch": 0.3153399370780516, "grad_norm": 0.8312968326308343, "learning_rate": 3.561867895236266e-06, "loss": 0.0271, "step": 75575 }, { "epoch": 0.3153607997930419, "grad_norm": 0.7341162002899975, "learning_rate": 3.56175007392324e-06, "loss": 0.0342, "step": 75580 }, { "epoch": 0.3153816625080321, "grad_norm": 0.8223895917727737, "learning_rate": 3.5616322643015044e-06, "loss": 0.0194, "step": 75585 }, { "epoch": 0.3154025252230224, "grad_norm": 0.760482100986354, "learning_rate": 3.561514466369126e-06, "loss": 0.0281, "step": 75590 }, { "epoch": 0.3154233879380127, "grad_norm": 0.7344298716702213, "learning_rate": 3.5613966801241705e-06, "loss": 0.0313, "step": 75595 }, { "epoch": 0.31544425065300297, "grad_norm": 1.0980602015391325, "learning_rate": 3.5612789055647064e-06, "loss": 0.0305, "step": 75600 }, { "epoch": 0.31546511336799327, "grad_norm": 0.1812973304410655, "learning_rate": 3.5611611426888006e-06, "loss": 0.0242, "step": 75605 }, { "epoch": 0.3154859760829835, "grad_norm": 0.6478165886123801, "learning_rate": 3.561043391494523e-06, "loss": 0.0269, "step": 75610 }, { "epoch": 0.3155068387979738, "grad_norm": 1.5799838773832082, "learning_rate": 3.560925651979942e-06, "loss": 0.0352, "step": 75615 }, { "epoch": 0.3155277015129641, "grad_norm": 0.7958741270057414, "learning_rate": 3.560807924143126e-06, "loss": 0.0273, "step": 75620 }, { "epoch": 0.31554856422795435, "grad_norm": 0.8390497896238565, "learning_rate": 3.5606902079821455e-06, "loss": 0.0296, "step": 75625 }, { "epoch": 0.31556942694294465, "grad_norm": 0.5030336415425138, "learning_rate": 3.5605725034950706e-06, "loss": 0.03, "step": 75630 }, { "epoch": 0.31559028965793495, "grad_norm": 0.7897094362728314, "learning_rate": 3.560454810679971e-06, "loss": 0.0292, "step": 75635 }, { "epoch": 0.3156111523729252, "grad_norm": 0.6706875600880572, "learning_rate": 3.5603371295349194e-06, "loss": 0.0261, "step": 75640 }, { "epoch": 0.3156320150879155, "grad_norm": 0.5656405661592155, "learning_rate": 3.560219460057987e-06, "loss": 0.0258, "step": 75645 }, { "epoch": 0.31565287780290574, "grad_norm": 0.9365182858674684, "learning_rate": 3.5601018022472442e-06, "loss": 0.0349, "step": 75650 }, { "epoch": 0.31567374051789604, "grad_norm": 0.5988939345184227, "learning_rate": 3.5599841561007655e-06, "loss": 0.0323, "step": 75655 }, { "epoch": 0.31569460323288634, "grad_norm": 0.5681916888079477, "learning_rate": 3.559866521616622e-06, "loss": 0.0248, "step": 75660 }, { "epoch": 0.3157154659478766, "grad_norm": 0.9527353068402079, "learning_rate": 3.5597488987928886e-06, "loss": 0.0397, "step": 75665 }, { "epoch": 0.3157363286628669, "grad_norm": 0.9494022859298054, "learning_rate": 3.5596312876276377e-06, "loss": 0.0329, "step": 75670 }, { "epoch": 0.3157571913778571, "grad_norm": 0.9494994875139986, "learning_rate": 3.5595136881189443e-06, "loss": 0.0318, "step": 75675 }, { "epoch": 0.3157780540928474, "grad_norm": 0.9738218013108254, "learning_rate": 3.5593961002648825e-06, "loss": 0.0249, "step": 75680 }, { "epoch": 0.3157989168078377, "grad_norm": 0.7332981804006008, "learning_rate": 3.559278524063527e-06, "loss": 0.023, "step": 75685 }, { "epoch": 0.31581977952282797, "grad_norm": 0.7195496793604426, "learning_rate": 3.5591609595129544e-06, "loss": 0.04, "step": 75690 }, { "epoch": 0.31584064223781827, "grad_norm": 0.5243379697404889, "learning_rate": 3.5590434066112413e-06, "loss": 0.0375, "step": 75695 }, { "epoch": 0.3158615049528085, "grad_norm": 0.7159479876569433, "learning_rate": 3.5589258653564616e-06, "loss": 0.028, "step": 75700 }, { "epoch": 0.3158823676677988, "grad_norm": 0.9408306322875111, "learning_rate": 3.5588083357466942e-06, "loss": 0.0322, "step": 75705 }, { "epoch": 0.3159032303827891, "grad_norm": 0.8142591120431132, "learning_rate": 3.558690817780016e-06, "loss": 0.0367, "step": 75710 }, { "epoch": 0.31592409309777936, "grad_norm": 1.3614461717741855, "learning_rate": 3.5585733114545047e-06, "loss": 0.0269, "step": 75715 }, { "epoch": 0.31594495581276966, "grad_norm": 1.4323394784987529, "learning_rate": 3.5584558167682383e-06, "loss": 0.0347, "step": 75720 }, { "epoch": 0.31596581852775996, "grad_norm": 0.7902415512355658, "learning_rate": 3.5583383337192944e-06, "loss": 0.0269, "step": 75725 }, { "epoch": 0.3159866812427502, "grad_norm": 0.9058158922586289, "learning_rate": 3.5582208623057545e-06, "loss": 0.0265, "step": 75730 }, { "epoch": 0.3160075439577405, "grad_norm": 0.9755659366431882, "learning_rate": 3.5581034025256967e-06, "loss": 0.0363, "step": 75735 }, { "epoch": 0.31602840667273074, "grad_norm": 0.6670090856456039, "learning_rate": 3.557985954377201e-06, "loss": 0.0363, "step": 75740 }, { "epoch": 0.31604926938772104, "grad_norm": 0.7852727994721378, "learning_rate": 3.557868517858348e-06, "loss": 0.0294, "step": 75745 }, { "epoch": 0.31607013210271134, "grad_norm": 0.7444137262576795, "learning_rate": 3.5577510929672177e-06, "loss": 0.0282, "step": 75750 }, { "epoch": 0.3160909948177016, "grad_norm": 0.9832847017554338, "learning_rate": 3.5576336797018933e-06, "loss": 0.0402, "step": 75755 }, { "epoch": 0.3161118575326919, "grad_norm": 2.4912865974373064, "learning_rate": 3.557516278060455e-06, "loss": 0.0297, "step": 75760 }, { "epoch": 0.31613272024768213, "grad_norm": 1.6101221192027673, "learning_rate": 3.5573988880409848e-06, "loss": 0.0227, "step": 75765 }, { "epoch": 0.31615358296267243, "grad_norm": 1.0595677055508985, "learning_rate": 3.557281509641567e-06, "loss": 0.0324, "step": 75770 }, { "epoch": 0.31617444567766273, "grad_norm": 0.7858103887363254, "learning_rate": 3.5571641428602833e-06, "loss": 0.0333, "step": 75775 }, { "epoch": 0.316195308392653, "grad_norm": 1.9702536432700684, "learning_rate": 3.557046787695217e-06, "loss": 0.0376, "step": 75780 }, { "epoch": 0.3162161711076433, "grad_norm": 0.8236466583314522, "learning_rate": 3.5569294441444537e-06, "loss": 0.0298, "step": 75785 }, { "epoch": 0.3162370338226335, "grad_norm": 0.7274604808153763, "learning_rate": 3.5568121122060758e-06, "loss": 0.0273, "step": 75790 }, { "epoch": 0.3162578965376238, "grad_norm": 0.8978542595490931, "learning_rate": 3.556694791878169e-06, "loss": 0.032, "step": 75795 }, { "epoch": 0.3162787592526141, "grad_norm": 0.8121864770524176, "learning_rate": 3.556577483158819e-06, "loss": 0.0332, "step": 75800 }, { "epoch": 0.31629962196760436, "grad_norm": 1.1104669810599332, "learning_rate": 3.556460186046111e-06, "loss": 0.0325, "step": 75805 }, { "epoch": 0.31632048468259466, "grad_norm": 0.8087596258323, "learning_rate": 3.556342900538132e-06, "loss": 0.0279, "step": 75810 }, { "epoch": 0.3163413473975849, "grad_norm": 0.8167793834263626, "learning_rate": 3.556225626632967e-06, "loss": 0.0328, "step": 75815 }, { "epoch": 0.3163622101125752, "grad_norm": 0.7631981895741005, "learning_rate": 3.5561083643287047e-06, "loss": 0.0317, "step": 75820 }, { "epoch": 0.3163830728275655, "grad_norm": 1.220109175831552, "learning_rate": 3.5559911136234317e-06, "loss": 0.0265, "step": 75825 }, { "epoch": 0.31640393554255575, "grad_norm": 0.6627353568276175, "learning_rate": 3.5558738745152366e-06, "loss": 0.0223, "step": 75830 }, { "epoch": 0.31642479825754605, "grad_norm": 0.5323481559716763, "learning_rate": 3.555756647002207e-06, "loss": 0.0202, "step": 75835 }, { "epoch": 0.31644566097253635, "grad_norm": 0.664333797202568, "learning_rate": 3.5556394310824314e-06, "loss": 0.027, "step": 75840 }, { "epoch": 0.3164665236875266, "grad_norm": 0.4901269418903781, "learning_rate": 3.555522226754e-06, "loss": 0.0224, "step": 75845 }, { "epoch": 0.3164873864025169, "grad_norm": 2.0136218130877888, "learning_rate": 3.5554050340150028e-06, "loss": 0.0174, "step": 75850 }, { "epoch": 0.31650824911750713, "grad_norm": 0.30551471771969896, "learning_rate": 3.5552878528635285e-06, "loss": 0.0336, "step": 75855 }, { "epoch": 0.31652911183249743, "grad_norm": 0.7955196881699549, "learning_rate": 3.5551706832976697e-06, "loss": 0.0269, "step": 75860 }, { "epoch": 0.31654997454748773, "grad_norm": 1.5945410145582768, "learning_rate": 3.5550535253155155e-06, "loss": 0.0396, "step": 75865 }, { "epoch": 0.316570837262478, "grad_norm": 0.9196295391101817, "learning_rate": 3.554936378915158e-06, "loss": 0.0294, "step": 75870 }, { "epoch": 0.3165916999774683, "grad_norm": 0.8588007516346116, "learning_rate": 3.5548192440946896e-06, "loss": 0.0333, "step": 75875 }, { "epoch": 0.3166125626924585, "grad_norm": 0.4645688615381543, "learning_rate": 3.554702120852202e-06, "loss": 0.021, "step": 75880 }, { "epoch": 0.3166334254074488, "grad_norm": 1.0272492578546024, "learning_rate": 3.554585009185789e-06, "loss": 0.026, "step": 75885 }, { "epoch": 0.3166542881224391, "grad_norm": 0.7997988966209045, "learning_rate": 3.554467909093543e-06, "loss": 0.0256, "step": 75890 }, { "epoch": 0.31667515083742936, "grad_norm": 0.7729334137272079, "learning_rate": 3.554350820573557e-06, "loss": 0.032, "step": 75895 }, { "epoch": 0.31669601355241966, "grad_norm": 0.338447262913644, "learning_rate": 3.554233743623926e-06, "loss": 0.0341, "step": 75900 }, { "epoch": 0.3167168762674099, "grad_norm": 0.9911921327688116, "learning_rate": 3.5541166782427445e-06, "loss": 0.0341, "step": 75905 }, { "epoch": 0.3167377389824002, "grad_norm": 1.0762827489230655, "learning_rate": 3.553999624428107e-06, "loss": 0.0381, "step": 75910 }, { "epoch": 0.3167586016973905, "grad_norm": 0.7731386596227134, "learning_rate": 3.55388258217811e-06, "loss": 0.0227, "step": 75915 }, { "epoch": 0.31677946441238075, "grad_norm": 0.758605124978848, "learning_rate": 3.5537655514908487e-06, "loss": 0.0216, "step": 75920 }, { "epoch": 0.31680032712737105, "grad_norm": 0.4269235166706431, "learning_rate": 3.5536485323644186e-06, "loss": 0.0289, "step": 75925 }, { "epoch": 0.31682118984236135, "grad_norm": 0.5576982573115284, "learning_rate": 3.5535315247969178e-06, "loss": 0.0262, "step": 75930 }, { "epoch": 0.3168420525573516, "grad_norm": 1.206830669937043, "learning_rate": 3.553414528786443e-06, "loss": 0.0293, "step": 75935 }, { "epoch": 0.3168629152723419, "grad_norm": 0.4487266257604058, "learning_rate": 3.553297544331091e-06, "loss": 0.0225, "step": 75940 }, { "epoch": 0.31688377798733214, "grad_norm": 1.2441185268364436, "learning_rate": 3.5531805714289613e-06, "loss": 0.045, "step": 75945 }, { "epoch": 0.31690464070232244, "grad_norm": 2.08701709881316, "learning_rate": 3.5530636100781518e-06, "loss": 0.0302, "step": 75950 }, { "epoch": 0.31692550341731274, "grad_norm": 0.9852258137747192, "learning_rate": 3.5529466602767606e-06, "loss": 0.0241, "step": 75955 }, { "epoch": 0.316946366132303, "grad_norm": 0.8175264755241902, "learning_rate": 3.5528297220228874e-06, "loss": 0.0261, "step": 75960 }, { "epoch": 0.3169672288472933, "grad_norm": 0.6907118760269253, "learning_rate": 3.5527127953146327e-06, "loss": 0.0261, "step": 75965 }, { "epoch": 0.3169880915622835, "grad_norm": 0.6924712584996647, "learning_rate": 3.552595880150097e-06, "loss": 0.0274, "step": 75970 }, { "epoch": 0.3170089542772738, "grad_norm": 0.8709289247177031, "learning_rate": 3.55247897652738e-06, "loss": 0.0264, "step": 75975 }, { "epoch": 0.3170298169922641, "grad_norm": 0.5769877765645203, "learning_rate": 3.5523620844445828e-06, "loss": 0.0228, "step": 75980 }, { "epoch": 0.31705067970725437, "grad_norm": 0.5388604762621022, "learning_rate": 3.552245203899808e-06, "loss": 0.0322, "step": 75985 }, { "epoch": 0.31707154242224467, "grad_norm": 1.5544476146354302, "learning_rate": 3.552128334891156e-06, "loss": 0.0403, "step": 75990 }, { "epoch": 0.3170924051372349, "grad_norm": 1.1406312510699668, "learning_rate": 3.5520114774167314e-06, "loss": 0.0341, "step": 75995 }, { "epoch": 0.3171132678522252, "grad_norm": 0.8403654508917472, "learning_rate": 3.551894631474635e-06, "loss": 0.0337, "step": 76000 }, { "epoch": 0.3171341305672155, "grad_norm": 1.6852349548823682, "learning_rate": 3.5517777970629712e-06, "loss": 0.0438, "step": 76005 }, { "epoch": 0.31715499328220575, "grad_norm": 0.9194633124894551, "learning_rate": 3.5516609741798432e-06, "loss": 0.0216, "step": 76010 }, { "epoch": 0.31717585599719605, "grad_norm": 0.6505717672971162, "learning_rate": 3.551544162823356e-06, "loss": 0.0221, "step": 76015 }, { "epoch": 0.31719671871218635, "grad_norm": 0.43980643050121865, "learning_rate": 3.551427362991613e-06, "loss": 0.0243, "step": 76020 }, { "epoch": 0.3172175814271766, "grad_norm": 1.0238669424607159, "learning_rate": 3.5513105746827202e-06, "loss": 0.0269, "step": 76025 }, { "epoch": 0.3172384441421669, "grad_norm": 0.7108196577352798, "learning_rate": 3.5511937978947826e-06, "loss": 0.024, "step": 76030 }, { "epoch": 0.31725930685715714, "grad_norm": 0.6859470666624207, "learning_rate": 3.5510770326259064e-06, "loss": 0.0251, "step": 76035 }, { "epoch": 0.31728016957214744, "grad_norm": 0.35043245096235665, "learning_rate": 3.5509602788741986e-06, "loss": 0.0255, "step": 76040 }, { "epoch": 0.31730103228713774, "grad_norm": 0.6314942419329522, "learning_rate": 3.550843536637765e-06, "loss": 0.0214, "step": 76045 }, { "epoch": 0.317321895002128, "grad_norm": 1.4196742025122886, "learning_rate": 3.550726805914712e-06, "loss": 0.0309, "step": 76050 }, { "epoch": 0.3173427577171183, "grad_norm": 0.9299528132665874, "learning_rate": 3.550610086703149e-06, "loss": 0.0304, "step": 76055 }, { "epoch": 0.31736362043210853, "grad_norm": 2.9454364680197527, "learning_rate": 3.5504933790011837e-06, "loss": 0.0452, "step": 76060 }, { "epoch": 0.31738448314709883, "grad_norm": 0.8552560432881697, "learning_rate": 3.550376682806925e-06, "loss": 0.0297, "step": 76065 }, { "epoch": 0.3174053458620891, "grad_norm": 1.2635708770637677, "learning_rate": 3.55025999811848e-06, "loss": 0.0245, "step": 76070 }, { "epoch": 0.31742620857707937, "grad_norm": 0.42741424442959414, "learning_rate": 3.5501433249339607e-06, "loss": 0.0221, "step": 76075 }, { "epoch": 0.31744707129206967, "grad_norm": 0.7089422957285458, "learning_rate": 3.5500266632514745e-06, "loss": 0.0358, "step": 76080 }, { "epoch": 0.3174679340070599, "grad_norm": 0.6445990803941213, "learning_rate": 3.5499100130691334e-06, "loss": 0.0306, "step": 76085 }, { "epoch": 0.3174887967220502, "grad_norm": 0.5446528205318824, "learning_rate": 3.549793374385047e-06, "loss": 0.027, "step": 76090 }, { "epoch": 0.3175096594370405, "grad_norm": 0.7578438649613168, "learning_rate": 3.5496767471973275e-06, "loss": 0.0222, "step": 76095 }, { "epoch": 0.31753052215203076, "grad_norm": 0.7226321025920299, "learning_rate": 3.549560131504086e-06, "loss": 0.0313, "step": 76100 }, { "epoch": 0.31755138486702106, "grad_norm": 0.9908546697700218, "learning_rate": 3.5494435273034346e-06, "loss": 0.0305, "step": 76105 }, { "epoch": 0.31757224758201136, "grad_norm": 1.1181504979106751, "learning_rate": 3.5493269345934844e-06, "loss": 0.0384, "step": 76110 }, { "epoch": 0.3175931102970016, "grad_norm": 0.7312043059906346, "learning_rate": 3.5492103533723506e-06, "loss": 0.0229, "step": 76115 }, { "epoch": 0.3176139730119919, "grad_norm": 0.9396141006049303, "learning_rate": 3.549093783638145e-06, "loss": 0.0297, "step": 76120 }, { "epoch": 0.31763483572698215, "grad_norm": 0.7252767776763188, "learning_rate": 3.5489772253889814e-06, "loss": 0.028, "step": 76125 }, { "epoch": 0.31765569844197244, "grad_norm": 0.7154424058852477, "learning_rate": 3.5488606786229744e-06, "loss": 0.0301, "step": 76130 }, { "epoch": 0.31767656115696274, "grad_norm": 0.8590879988511595, "learning_rate": 3.54874414333824e-06, "loss": 0.0306, "step": 76135 }, { "epoch": 0.317697423871953, "grad_norm": 9.90598707637262, "learning_rate": 3.54862761953289e-06, "loss": 0.0325, "step": 76140 }, { "epoch": 0.3177182865869433, "grad_norm": 0.6027682379994304, "learning_rate": 3.5485111072050427e-06, "loss": 0.0301, "step": 76145 }, { "epoch": 0.31773914930193353, "grad_norm": 0.8183235174707723, "learning_rate": 3.5483946063528124e-06, "loss": 0.0383, "step": 76150 }, { "epoch": 0.31776001201692383, "grad_norm": 0.8127233266164681, "learning_rate": 3.5482781169743164e-06, "loss": 0.0214, "step": 76155 }, { "epoch": 0.31778087473191413, "grad_norm": 0.5142122557058267, "learning_rate": 3.5481616390676708e-06, "loss": 0.0212, "step": 76160 }, { "epoch": 0.3178017374469044, "grad_norm": 0.874619076465106, "learning_rate": 3.5480451726309927e-06, "loss": 0.0281, "step": 76165 }, { "epoch": 0.3178226001618947, "grad_norm": 0.6890561048477933, "learning_rate": 3.5479287176624016e-06, "loss": 0.0239, "step": 76170 }, { "epoch": 0.3178434628768849, "grad_norm": 1.4217657650734452, "learning_rate": 3.547812274160013e-06, "loss": 0.0282, "step": 76175 }, { "epoch": 0.3178643255918752, "grad_norm": 1.1441815444441978, "learning_rate": 3.5476958421219472e-06, "loss": 0.0291, "step": 76180 }, { "epoch": 0.3178851883068655, "grad_norm": 0.5076027975104838, "learning_rate": 3.547579421546322e-06, "loss": 0.0321, "step": 76185 }, { "epoch": 0.31790605102185576, "grad_norm": 0.9129987456851432, "learning_rate": 3.547463012431258e-06, "loss": 0.0363, "step": 76190 }, { "epoch": 0.31792691373684606, "grad_norm": 0.9368602361508173, "learning_rate": 3.5473466147748736e-06, "loss": 0.0331, "step": 76195 }, { "epoch": 0.31794777645183636, "grad_norm": 1.6373683901927536, "learning_rate": 3.5472302285752896e-06, "loss": 0.0332, "step": 76200 }, { "epoch": 0.3179686391668266, "grad_norm": 1.1722289628245335, "learning_rate": 3.5471138538306277e-06, "loss": 0.0308, "step": 76205 }, { "epoch": 0.3179895018818169, "grad_norm": 0.5329214004025228, "learning_rate": 3.5469974905390074e-06, "loss": 0.0324, "step": 76210 }, { "epoch": 0.31801036459680715, "grad_norm": 0.9583602742909064, "learning_rate": 3.546881138698551e-06, "loss": 0.0223, "step": 76215 }, { "epoch": 0.31803122731179745, "grad_norm": 0.7987052491709404, "learning_rate": 3.5467647983073807e-06, "loss": 0.0323, "step": 76220 }, { "epoch": 0.31805209002678775, "grad_norm": 1.0143394867962572, "learning_rate": 3.546648469363618e-06, "loss": 0.034, "step": 76225 }, { "epoch": 0.318072952741778, "grad_norm": 0.9648517440870101, "learning_rate": 3.5465321518653874e-06, "loss": 0.0333, "step": 76230 }, { "epoch": 0.3180938154567683, "grad_norm": 0.9947071948344112, "learning_rate": 3.54641584581081e-06, "loss": 0.0268, "step": 76235 }, { "epoch": 0.31811467817175854, "grad_norm": 0.6704589711884833, "learning_rate": 3.5462995511980113e-06, "loss": 0.0266, "step": 76240 }, { "epoch": 0.31813554088674884, "grad_norm": 0.7970643233264202, "learning_rate": 3.5461832680251144e-06, "loss": 0.0204, "step": 76245 }, { "epoch": 0.31815640360173914, "grad_norm": 0.3938640673269745, "learning_rate": 3.5460669962902443e-06, "loss": 0.0406, "step": 76250 }, { "epoch": 0.3181772663167294, "grad_norm": 0.6016517861148661, "learning_rate": 3.545950735991526e-06, "loss": 0.0329, "step": 76255 }, { "epoch": 0.3181981290317197, "grad_norm": 0.7967203317321254, "learning_rate": 3.5458344871270844e-06, "loss": 0.0346, "step": 76260 }, { "epoch": 0.3182189917467099, "grad_norm": 0.8922055856496018, "learning_rate": 3.545718249695046e-06, "loss": 0.0286, "step": 76265 }, { "epoch": 0.3182398544617002, "grad_norm": 0.39807142782282595, "learning_rate": 3.5456020236935364e-06, "loss": 0.0208, "step": 76270 }, { "epoch": 0.3182607171766905, "grad_norm": 0.6609667752022517, "learning_rate": 3.545485809120683e-06, "loss": 0.0229, "step": 76275 }, { "epoch": 0.31828157989168077, "grad_norm": 1.4108143395803636, "learning_rate": 3.5453696059746123e-06, "loss": 0.0343, "step": 76280 }, { "epoch": 0.31830244260667107, "grad_norm": 0.48602167214899267, "learning_rate": 3.545253414253452e-06, "loss": 0.0274, "step": 76285 }, { "epoch": 0.31832330532166137, "grad_norm": 0.8113071893398434, "learning_rate": 3.5451372339553308e-06, "loss": 0.0317, "step": 76290 }, { "epoch": 0.3183441680366516, "grad_norm": 0.6347513712332271, "learning_rate": 3.545021065078376e-06, "loss": 0.0339, "step": 76295 }, { "epoch": 0.3183650307516419, "grad_norm": 1.0160052510667028, "learning_rate": 3.5449049076207174e-06, "loss": 0.033, "step": 76300 }, { "epoch": 0.31838589346663215, "grad_norm": 0.8586602874177423, "learning_rate": 3.5447887615804845e-06, "loss": 0.0294, "step": 76305 }, { "epoch": 0.31840675618162245, "grad_norm": 0.7054537575053323, "learning_rate": 3.5446726269558057e-06, "loss": 0.0365, "step": 76310 }, { "epoch": 0.31842761889661275, "grad_norm": 1.0402228153952864, "learning_rate": 3.5445565037448115e-06, "loss": 0.0279, "step": 76315 }, { "epoch": 0.318448481611603, "grad_norm": 0.5441835616125422, "learning_rate": 3.544440391945633e-06, "loss": 0.0319, "step": 76320 }, { "epoch": 0.3184693443265933, "grad_norm": 0.7155413841545109, "learning_rate": 3.5443242915564018e-06, "loss": 0.0283, "step": 76325 }, { "epoch": 0.31849020704158354, "grad_norm": 0.4785346068398307, "learning_rate": 3.544208202575248e-06, "loss": 0.0268, "step": 76330 }, { "epoch": 0.31851106975657384, "grad_norm": 0.7537137664843448, "learning_rate": 3.5440921250003043e-06, "loss": 0.0252, "step": 76335 }, { "epoch": 0.31853193247156414, "grad_norm": 0.6286280006844835, "learning_rate": 3.543976058829702e-06, "loss": 0.0249, "step": 76340 }, { "epoch": 0.3185527951865544, "grad_norm": 0.731831253796816, "learning_rate": 3.543860004061574e-06, "loss": 0.0271, "step": 76345 }, { "epoch": 0.3185736579015447, "grad_norm": 1.44315002823458, "learning_rate": 3.543743960694055e-06, "loss": 0.0346, "step": 76350 }, { "epoch": 0.3185945206165349, "grad_norm": 0.7283555224242051, "learning_rate": 3.5436279287252774e-06, "loss": 0.0223, "step": 76355 }, { "epoch": 0.3186153833315252, "grad_norm": 0.7380040561687734, "learning_rate": 3.543511908153375e-06, "loss": 0.0414, "step": 76360 }, { "epoch": 0.3186362460465155, "grad_norm": 1.692110462526232, "learning_rate": 3.543395898976482e-06, "loss": 0.028, "step": 76365 }, { "epoch": 0.31865710876150577, "grad_norm": 1.0660580245382052, "learning_rate": 3.543279901192734e-06, "loss": 0.0203, "step": 76370 }, { "epoch": 0.31867797147649607, "grad_norm": 1.1301155877108942, "learning_rate": 3.543163914800266e-06, "loss": 0.0341, "step": 76375 }, { "epoch": 0.31869883419148637, "grad_norm": 0.5226700813171274, "learning_rate": 3.543047939797214e-06, "loss": 0.0333, "step": 76380 }, { "epoch": 0.3187196969064766, "grad_norm": 0.5733770790138396, "learning_rate": 3.542931976181713e-06, "loss": 0.0182, "step": 76385 }, { "epoch": 0.3187405596214669, "grad_norm": 1.0234412552839656, "learning_rate": 3.5428160239519015e-06, "loss": 0.0298, "step": 76390 }, { "epoch": 0.31876142233645716, "grad_norm": 0.3913577411247109, "learning_rate": 3.542700083105915e-06, "loss": 0.0427, "step": 76395 }, { "epoch": 0.31878228505144746, "grad_norm": 0.8721845298255145, "learning_rate": 3.5425841536418902e-06, "loss": 0.0253, "step": 76400 }, { "epoch": 0.31880314776643776, "grad_norm": 0.41771522570426006, "learning_rate": 3.542468235557967e-06, "loss": 0.027, "step": 76405 }, { "epoch": 0.318824010481428, "grad_norm": 0.6834453084434875, "learning_rate": 3.5423523288522833e-06, "loss": 0.027, "step": 76410 }, { "epoch": 0.3188448731964183, "grad_norm": 0.7393010845238053, "learning_rate": 3.542236433522976e-06, "loss": 0.0288, "step": 76415 }, { "epoch": 0.31886573591140854, "grad_norm": 0.7209869661284677, "learning_rate": 3.5421205495681853e-06, "loss": 0.0288, "step": 76420 }, { "epoch": 0.31888659862639884, "grad_norm": 0.5815257206655704, "learning_rate": 3.5420046769860515e-06, "loss": 0.0347, "step": 76425 }, { "epoch": 0.31890746134138914, "grad_norm": 1.054088195116924, "learning_rate": 3.541888815774714e-06, "loss": 0.0244, "step": 76430 }, { "epoch": 0.3189283240563794, "grad_norm": 0.4061975493460099, "learning_rate": 3.541772965932312e-06, "loss": 0.0309, "step": 76435 }, { "epoch": 0.3189491867713697, "grad_norm": 0.5478654855733938, "learning_rate": 3.5416571274569877e-06, "loss": 0.0283, "step": 76440 }, { "epoch": 0.31897004948635993, "grad_norm": 0.9447958058427472, "learning_rate": 3.541541300346883e-06, "loss": 0.0306, "step": 76445 }, { "epoch": 0.31899091220135023, "grad_norm": 0.9335670882248155, "learning_rate": 3.5414254846001375e-06, "loss": 0.0319, "step": 76450 }, { "epoch": 0.31901177491634053, "grad_norm": 0.987668716490152, "learning_rate": 3.5413096802148942e-06, "loss": 0.0265, "step": 76455 }, { "epoch": 0.3190326376313308, "grad_norm": 0.7498833652772741, "learning_rate": 3.541193887189297e-06, "loss": 0.0261, "step": 76460 }, { "epoch": 0.3190535003463211, "grad_norm": 0.8428562454174215, "learning_rate": 3.5410781055214872e-06, "loss": 0.0382, "step": 76465 }, { "epoch": 0.3190743630613114, "grad_norm": 0.6401559277700324, "learning_rate": 3.540962335209608e-06, "loss": 0.0352, "step": 76470 }, { "epoch": 0.3190952257763016, "grad_norm": 0.6948630436543728, "learning_rate": 3.5408465762518047e-06, "loss": 0.0325, "step": 76475 }, { "epoch": 0.3191160884912919, "grad_norm": 0.8930320757177067, "learning_rate": 3.5407308286462195e-06, "loss": 0.0263, "step": 76480 }, { "epoch": 0.31913695120628216, "grad_norm": 0.9005539475251847, "learning_rate": 3.5406150923909994e-06, "loss": 0.0459, "step": 76485 }, { "epoch": 0.31915781392127246, "grad_norm": 0.5249604867795312, "learning_rate": 3.5404993674842874e-06, "loss": 0.0245, "step": 76490 }, { "epoch": 0.31917867663626276, "grad_norm": 0.661772436074228, "learning_rate": 3.54038365392423e-06, "loss": 0.0245, "step": 76495 }, { "epoch": 0.319199539351253, "grad_norm": 0.7694043197933671, "learning_rate": 3.5402679517089726e-06, "loss": 0.0297, "step": 76500 }, { "epoch": 0.3192204020662433, "grad_norm": 0.42858644751930675, "learning_rate": 3.5401522608366628e-06, "loss": 0.0253, "step": 76505 }, { "epoch": 0.31924126478123355, "grad_norm": 1.0923475875742594, "learning_rate": 3.5400365813054456e-06, "loss": 0.0273, "step": 76510 }, { "epoch": 0.31926212749622385, "grad_norm": 0.5716317926665009, "learning_rate": 3.53992091311347e-06, "loss": 0.0217, "step": 76515 }, { "epoch": 0.31928299021121415, "grad_norm": 0.6861120364505413, "learning_rate": 3.539805256258882e-06, "loss": 0.0283, "step": 76520 }, { "epoch": 0.3193038529262044, "grad_norm": 0.6670942999204531, "learning_rate": 3.53968961073983e-06, "loss": 0.0255, "step": 76525 }, { "epoch": 0.3193247156411947, "grad_norm": 0.6761117167627846, "learning_rate": 3.5395739765544635e-06, "loss": 0.0331, "step": 76530 }, { "epoch": 0.31934557835618493, "grad_norm": 1.0088717161741882, "learning_rate": 3.5394583537009297e-06, "loss": 0.0382, "step": 76535 }, { "epoch": 0.31936644107117523, "grad_norm": 0.5629186385855569, "learning_rate": 3.5393427421773797e-06, "loss": 0.0252, "step": 76540 }, { "epoch": 0.31938730378616553, "grad_norm": 0.8390420910421554, "learning_rate": 3.5392271419819624e-06, "loss": 0.0323, "step": 76545 }, { "epoch": 0.3194081665011558, "grad_norm": 0.593827646159511, "learning_rate": 3.5391115531128273e-06, "loss": 0.0279, "step": 76550 }, { "epoch": 0.3194290292161461, "grad_norm": 0.9280563670914129, "learning_rate": 3.538995975568126e-06, "loss": 0.0295, "step": 76555 }, { "epoch": 0.3194498919311364, "grad_norm": 0.8668969753812944, "learning_rate": 3.538880409346009e-06, "loss": 0.0255, "step": 76560 }, { "epoch": 0.3194707546461266, "grad_norm": 1.0314741433994625, "learning_rate": 3.538764854444628e-06, "loss": 0.0341, "step": 76565 }, { "epoch": 0.3194916173611169, "grad_norm": 0.588849930752011, "learning_rate": 3.538649310862134e-06, "loss": 0.0295, "step": 76570 }, { "epoch": 0.31951248007610716, "grad_norm": 0.6674022334248004, "learning_rate": 3.538533778596681e-06, "loss": 0.0226, "step": 76575 }, { "epoch": 0.31953334279109746, "grad_norm": 0.7829232176303961, "learning_rate": 3.5384182576464195e-06, "loss": 0.0288, "step": 76580 }, { "epoch": 0.31955420550608776, "grad_norm": 0.9619470095376504, "learning_rate": 3.5383027480095044e-06, "loss": 0.0353, "step": 76585 }, { "epoch": 0.319575068221078, "grad_norm": 1.36713298133244, "learning_rate": 3.5381872496840885e-06, "loss": 0.0298, "step": 76590 }, { "epoch": 0.3195959309360683, "grad_norm": 1.4561937240027498, "learning_rate": 3.5380717626683254e-06, "loss": 0.0338, "step": 76595 }, { "epoch": 0.31961679365105855, "grad_norm": 0.7589101154691098, "learning_rate": 3.53795628696037e-06, "loss": 0.0211, "step": 76600 }, { "epoch": 0.31963765636604885, "grad_norm": 0.6913394544933464, "learning_rate": 3.537840822558377e-06, "loss": 0.0309, "step": 76605 }, { "epoch": 0.31965851908103915, "grad_norm": 1.6705324422440682, "learning_rate": 3.5377253694605018e-06, "loss": 0.0203, "step": 76610 }, { "epoch": 0.3196793817960294, "grad_norm": 0.7879372407560641, "learning_rate": 3.5376099276649e-06, "loss": 0.0307, "step": 76615 }, { "epoch": 0.3197002445110197, "grad_norm": 0.9087288422605982, "learning_rate": 3.537494497169728e-06, "loss": 0.0347, "step": 76620 }, { "epoch": 0.31972110722600994, "grad_norm": 0.8003386691336748, "learning_rate": 3.537379077973141e-06, "loss": 0.0312, "step": 76625 }, { "epoch": 0.31974196994100024, "grad_norm": 0.8434168712385168, "learning_rate": 3.5372636700732964e-06, "loss": 0.026, "step": 76630 }, { "epoch": 0.31976283265599054, "grad_norm": 0.9078888072734468, "learning_rate": 3.537148273468352e-06, "loss": 0.0293, "step": 76635 }, { "epoch": 0.3197836953709808, "grad_norm": 1.4047671206937098, "learning_rate": 3.5370328881564665e-06, "loss": 0.037, "step": 76640 }, { "epoch": 0.3198045580859711, "grad_norm": 0.9276514696854479, "learning_rate": 3.5369175141357952e-06, "loss": 0.0374, "step": 76645 }, { "epoch": 0.3198254208009614, "grad_norm": 0.5252580335863032, "learning_rate": 3.5368021514044993e-06, "loss": 0.0237, "step": 76650 }, { "epoch": 0.3198462835159516, "grad_norm": 0.9188017560685797, "learning_rate": 3.536686799960737e-06, "loss": 0.0325, "step": 76655 }, { "epoch": 0.3198671462309419, "grad_norm": 0.7789487624890538, "learning_rate": 3.5365714598026675e-06, "loss": 0.03, "step": 76660 }, { "epoch": 0.31988800894593217, "grad_norm": 0.6337172193965525, "learning_rate": 3.5364561309284513e-06, "loss": 0.0252, "step": 76665 }, { "epoch": 0.31990887166092247, "grad_norm": 0.9250436424456927, "learning_rate": 3.536340813336247e-06, "loss": 0.0252, "step": 76670 }, { "epoch": 0.31992973437591277, "grad_norm": 0.5070779286798099, "learning_rate": 3.5362255070242175e-06, "loss": 0.0363, "step": 76675 }, { "epoch": 0.319950597090903, "grad_norm": 0.8414471179063361, "learning_rate": 3.5361102119905222e-06, "loss": 0.0266, "step": 76680 }, { "epoch": 0.3199714598058933, "grad_norm": 1.1105254617548899, "learning_rate": 3.535994928233323e-06, "loss": 0.0415, "step": 76685 }, { "epoch": 0.31999232252088355, "grad_norm": 0.7798194717994882, "learning_rate": 3.5358796557507828e-06, "loss": 0.0265, "step": 76690 }, { "epoch": 0.32001318523587385, "grad_norm": 0.3706637526974113, "learning_rate": 3.5357643945410634e-06, "loss": 0.0243, "step": 76695 }, { "epoch": 0.32003404795086415, "grad_norm": 1.8790210164168362, "learning_rate": 3.5356491446023273e-06, "loss": 0.0296, "step": 76700 }, { "epoch": 0.3200549106658544, "grad_norm": 0.8109545143941681, "learning_rate": 3.5355339059327378e-06, "loss": 0.0236, "step": 76705 }, { "epoch": 0.3200757733808447, "grad_norm": 0.6093085185617906, "learning_rate": 3.535418678530459e-06, "loss": 0.0247, "step": 76710 }, { "epoch": 0.32009663609583494, "grad_norm": 1.3803867879904046, "learning_rate": 3.535303462393654e-06, "loss": 0.025, "step": 76715 }, { "epoch": 0.32011749881082524, "grad_norm": 0.577375001855535, "learning_rate": 3.5351882575204883e-06, "loss": 0.0242, "step": 76720 }, { "epoch": 0.32013836152581554, "grad_norm": 0.965666275435756, "learning_rate": 3.5350730639091256e-06, "loss": 0.0329, "step": 76725 }, { "epoch": 0.3201592242408058, "grad_norm": 0.7734526282279005, "learning_rate": 3.5349578815577323e-06, "loss": 0.03, "step": 76730 }, { "epoch": 0.3201800869557961, "grad_norm": 0.7176947324646427, "learning_rate": 3.534842710464474e-06, "loss": 0.0279, "step": 76735 }, { "epoch": 0.3202009496707864, "grad_norm": 0.6045437683857154, "learning_rate": 3.534727550627516e-06, "loss": 0.0363, "step": 76740 }, { "epoch": 0.3202218123857766, "grad_norm": 0.860463726843296, "learning_rate": 3.5346124020450256e-06, "loss": 0.0322, "step": 76745 }, { "epoch": 0.3202426751007669, "grad_norm": 0.3623648528369991, "learning_rate": 3.5344972647151694e-06, "loss": 0.0186, "step": 76750 }, { "epoch": 0.32026353781575717, "grad_norm": 1.391477204812292, "learning_rate": 3.5343821386361156e-06, "loss": 0.0336, "step": 76755 }, { "epoch": 0.32028440053074747, "grad_norm": 1.190304890729486, "learning_rate": 3.53426702380603e-06, "loss": 0.0319, "step": 76760 }, { "epoch": 0.32030526324573777, "grad_norm": 0.7305452358617068, "learning_rate": 3.534151920223084e-06, "loss": 0.0306, "step": 76765 }, { "epoch": 0.320326125960728, "grad_norm": 1.0744219763844738, "learning_rate": 3.5340368278854436e-06, "loss": 0.0187, "step": 76770 }, { "epoch": 0.3203469886757183, "grad_norm": 0.5164229797926424, "learning_rate": 3.5339217467912783e-06, "loss": 0.0295, "step": 76775 }, { "epoch": 0.32036785139070856, "grad_norm": 0.5242874711126763, "learning_rate": 3.5338066769387586e-06, "loss": 0.0191, "step": 76780 }, { "epoch": 0.32038871410569886, "grad_norm": 0.5401976364231368, "learning_rate": 3.5336916183260527e-06, "loss": 0.0314, "step": 76785 }, { "epoch": 0.32040957682068916, "grad_norm": 1.5611737432438664, "learning_rate": 3.533576570951333e-06, "loss": 0.034, "step": 76790 }, { "epoch": 0.3204304395356794, "grad_norm": 0.4591530936226998, "learning_rate": 3.5334615348127693e-06, "loss": 0.0226, "step": 76795 }, { "epoch": 0.3204513022506697, "grad_norm": 1.135227852118419, "learning_rate": 3.533346509908532e-06, "loss": 0.0262, "step": 76800 }, { "epoch": 0.32047216496565994, "grad_norm": 2.915627926019982, "learning_rate": 3.533231496236794e-06, "loss": 0.0341, "step": 76805 }, { "epoch": 0.32049302768065024, "grad_norm": 0.7586162711306963, "learning_rate": 3.5331164937957253e-06, "loss": 0.0284, "step": 76810 }, { "epoch": 0.32051389039564054, "grad_norm": 1.191138615274024, "learning_rate": 3.5330015025835007e-06, "loss": 0.0291, "step": 76815 }, { "epoch": 0.3205347531106308, "grad_norm": 0.48520068638019914, "learning_rate": 3.5328865225982917e-06, "loss": 0.0321, "step": 76820 }, { "epoch": 0.3205556158256211, "grad_norm": 1.2422221649310676, "learning_rate": 3.5327715538382716e-06, "loss": 0.0286, "step": 76825 }, { "epoch": 0.3205764785406114, "grad_norm": 0.6618776719913619, "learning_rate": 3.532656596301614e-06, "loss": 0.0267, "step": 76830 }, { "epoch": 0.32059734125560163, "grad_norm": 1.0319483964881389, "learning_rate": 3.532541649986493e-06, "loss": 0.0354, "step": 76835 }, { "epoch": 0.32061820397059193, "grad_norm": 0.7440385297780348, "learning_rate": 3.532426714891084e-06, "loss": 0.0207, "step": 76840 }, { "epoch": 0.3206390666855822, "grad_norm": 0.6474207394273707, "learning_rate": 3.5323117910135603e-06, "loss": 0.0325, "step": 76845 }, { "epoch": 0.3206599294005725, "grad_norm": 0.738757039265858, "learning_rate": 3.532196878352098e-06, "loss": 0.0273, "step": 76850 }, { "epoch": 0.3206807921155628, "grad_norm": 0.6562171336111204, "learning_rate": 3.5320819769048737e-06, "loss": 0.0279, "step": 76855 }, { "epoch": 0.320701654830553, "grad_norm": 1.2090107068105855, "learning_rate": 3.5319670866700616e-06, "loss": 0.0272, "step": 76860 }, { "epoch": 0.3207225175455433, "grad_norm": 0.9138501658898697, "learning_rate": 3.5318522076458393e-06, "loss": 0.0227, "step": 76865 }, { "epoch": 0.32074338026053356, "grad_norm": 2.340825043437806, "learning_rate": 3.5317373398303845e-06, "loss": 0.0323, "step": 76870 }, { "epoch": 0.32076424297552386, "grad_norm": 0.6173423091390382, "learning_rate": 3.5316224832218736e-06, "loss": 0.024, "step": 76875 }, { "epoch": 0.32078510569051416, "grad_norm": 0.7339989433182016, "learning_rate": 3.531507637818484e-06, "loss": 0.024, "step": 76880 }, { "epoch": 0.3208059684055044, "grad_norm": 0.8930548925626742, "learning_rate": 3.531392803618396e-06, "loss": 0.0374, "step": 76885 }, { "epoch": 0.3208268311204947, "grad_norm": 0.9685661030802124, "learning_rate": 3.531277980619786e-06, "loss": 0.0263, "step": 76890 }, { "epoch": 0.32084769383548495, "grad_norm": 0.7727113088012851, "learning_rate": 3.5311631688208338e-06, "loss": 0.0359, "step": 76895 }, { "epoch": 0.32086855655047525, "grad_norm": 0.6000306962837509, "learning_rate": 3.531048368219719e-06, "loss": 0.0259, "step": 76900 }, { "epoch": 0.32088941926546555, "grad_norm": 1.2340675084523984, "learning_rate": 3.530933578814621e-06, "loss": 0.0343, "step": 76905 }, { "epoch": 0.3209102819804558, "grad_norm": 0.7691954699229875, "learning_rate": 3.5308188006037208e-06, "loss": 0.0354, "step": 76910 }, { "epoch": 0.3209311446954461, "grad_norm": 0.7195594785546627, "learning_rate": 3.530704033585198e-06, "loss": 0.0294, "step": 76915 }, { "epoch": 0.3209520074104364, "grad_norm": 0.6121296518765191, "learning_rate": 3.5305892777572353e-06, "loss": 0.0248, "step": 76920 }, { "epoch": 0.32097287012542663, "grad_norm": 1.0511799121819918, "learning_rate": 3.5304745331180135e-06, "loss": 0.0327, "step": 76925 }, { "epoch": 0.32099373284041693, "grad_norm": 0.8466939901234951, "learning_rate": 3.530359799665714e-06, "loss": 0.0294, "step": 76930 }, { "epoch": 0.3210145955554072, "grad_norm": 2.52122835176157, "learning_rate": 3.5302450773985193e-06, "loss": 0.0335, "step": 76935 }, { "epoch": 0.3210354582703975, "grad_norm": 0.3498607757277926, "learning_rate": 3.530130366314613e-06, "loss": 0.0262, "step": 76940 }, { "epoch": 0.3210563209853878, "grad_norm": 0.9028488925631012, "learning_rate": 3.5300156664121776e-06, "loss": 0.0216, "step": 76945 }, { "epoch": 0.321077183700378, "grad_norm": 0.48125257048198006, "learning_rate": 3.529900977689397e-06, "loss": 0.0306, "step": 76950 }, { "epoch": 0.3210980464153683, "grad_norm": 1.3746063925622192, "learning_rate": 3.529786300144454e-06, "loss": 0.0191, "step": 76955 }, { "epoch": 0.32111890913035857, "grad_norm": 0.6520187942700041, "learning_rate": 3.5296716337755355e-06, "loss": 0.0418, "step": 76960 }, { "epoch": 0.32113977184534886, "grad_norm": 0.6940150668484232, "learning_rate": 3.529556978580824e-06, "loss": 0.024, "step": 76965 }, { "epoch": 0.32116063456033916, "grad_norm": 0.8800399040136074, "learning_rate": 3.529442334558506e-06, "loss": 0.0329, "step": 76970 }, { "epoch": 0.3211814972753294, "grad_norm": 1.057794774875508, "learning_rate": 3.5293277017067667e-06, "loss": 0.037, "step": 76975 }, { "epoch": 0.3212023599903197, "grad_norm": 0.6349340143869204, "learning_rate": 3.5292130800237923e-06, "loss": 0.0266, "step": 76980 }, { "epoch": 0.32122322270530995, "grad_norm": 1.0736414436888382, "learning_rate": 3.5290984695077684e-06, "loss": 0.0338, "step": 76985 }, { "epoch": 0.32124408542030025, "grad_norm": 0.6672680646523166, "learning_rate": 3.528983870156884e-06, "loss": 0.0238, "step": 76990 }, { "epoch": 0.32126494813529055, "grad_norm": 0.8800239494589661, "learning_rate": 3.5288692819693242e-06, "loss": 0.026, "step": 76995 }, { "epoch": 0.3212858108502808, "grad_norm": 1.3092714900757982, "learning_rate": 3.528754704943278e-06, "loss": 0.0284, "step": 77000 }, { "epoch": 0.3213066735652711, "grad_norm": 0.8283336170847951, "learning_rate": 3.528640139076934e-06, "loss": 0.031, "step": 77005 }, { "epoch": 0.3213275362802614, "grad_norm": 0.2727823948773332, "learning_rate": 3.528525584368479e-06, "loss": 0.0263, "step": 77010 }, { "epoch": 0.32134839899525164, "grad_norm": 0.7973795134337173, "learning_rate": 3.528411040816102e-06, "loss": 0.031, "step": 77015 }, { "epoch": 0.32136926171024194, "grad_norm": 0.8229433322646986, "learning_rate": 3.528296508417995e-06, "loss": 0.0287, "step": 77020 }, { "epoch": 0.3213901244252322, "grad_norm": 0.6884128615370378, "learning_rate": 3.5281819871723445e-06, "loss": 0.0298, "step": 77025 }, { "epoch": 0.3214109871402225, "grad_norm": 1.015923042505121, "learning_rate": 3.5280674770773437e-06, "loss": 0.026, "step": 77030 }, { "epoch": 0.3214318498552128, "grad_norm": 1.3355880753741012, "learning_rate": 3.527952978131181e-06, "loss": 0.0263, "step": 77035 }, { "epoch": 0.321452712570203, "grad_norm": 0.7111320767271254, "learning_rate": 3.527838490332048e-06, "loss": 0.0315, "step": 77040 }, { "epoch": 0.3214735752851933, "grad_norm": 0.9748545632464962, "learning_rate": 3.527724013678136e-06, "loss": 0.0252, "step": 77045 }, { "epoch": 0.32149443800018357, "grad_norm": 0.5485253475987752, "learning_rate": 3.5276095481676374e-06, "loss": 0.0378, "step": 77050 }, { "epoch": 0.32151530071517387, "grad_norm": 0.6534635291137213, "learning_rate": 3.527495093798744e-06, "loss": 0.0402, "step": 77055 }, { "epoch": 0.32153616343016417, "grad_norm": 1.0855258120049374, "learning_rate": 3.527380650569649e-06, "loss": 0.0258, "step": 77060 }, { "epoch": 0.3215570261451544, "grad_norm": 1.385384271275589, "learning_rate": 3.527266218478545e-06, "loss": 0.0292, "step": 77065 }, { "epoch": 0.3215778888601447, "grad_norm": 0.9963643142619554, "learning_rate": 3.5271517975236254e-06, "loss": 0.0286, "step": 77070 }, { "epoch": 0.32159875157513496, "grad_norm": 0.9948068976643066, "learning_rate": 3.527037387703083e-06, "loss": 0.0294, "step": 77075 }, { "epoch": 0.32161961429012526, "grad_norm": 0.547048344525018, "learning_rate": 3.5269229890151152e-06, "loss": 0.0209, "step": 77080 }, { "epoch": 0.32164047700511555, "grad_norm": 1.0656385519278835, "learning_rate": 3.5268086014579132e-06, "loss": 0.035, "step": 77085 }, { "epoch": 0.3216613397201058, "grad_norm": 0.8688637597907224, "learning_rate": 3.5266942250296747e-06, "loss": 0.0332, "step": 77090 }, { "epoch": 0.3216822024350961, "grad_norm": 0.6011637551006641, "learning_rate": 3.5265798597285946e-06, "loss": 0.027, "step": 77095 }, { "epoch": 0.3217030651500864, "grad_norm": 0.6734176324507686, "learning_rate": 3.5264655055528675e-06, "loss": 0.0322, "step": 77100 }, { "epoch": 0.32172392786507664, "grad_norm": 0.9019540917988184, "learning_rate": 3.526351162500691e-06, "loss": 0.0325, "step": 77105 }, { "epoch": 0.32174479058006694, "grad_norm": 0.6840550306763186, "learning_rate": 3.5262368305702617e-06, "loss": 0.0256, "step": 77110 }, { "epoch": 0.3217656532950572, "grad_norm": 0.9232596679472337, "learning_rate": 3.526122509759777e-06, "loss": 0.0403, "step": 77115 }, { "epoch": 0.3217865160100475, "grad_norm": 0.5974476606493868, "learning_rate": 3.526008200067433e-06, "loss": 0.0312, "step": 77120 }, { "epoch": 0.3218073787250378, "grad_norm": 0.7604237626921949, "learning_rate": 3.5258939014914297e-06, "loss": 0.0285, "step": 77125 }, { "epoch": 0.32182824144002803, "grad_norm": 0.4347655049603671, "learning_rate": 3.525779614029964e-06, "loss": 0.0237, "step": 77130 }, { "epoch": 0.32184910415501833, "grad_norm": 0.7066031322863636, "learning_rate": 3.5256653376812355e-06, "loss": 0.0265, "step": 77135 }, { "epoch": 0.3218699668700086, "grad_norm": 1.0128846991972207, "learning_rate": 3.5255510724434426e-06, "loss": 0.0305, "step": 77140 }, { "epoch": 0.32189082958499887, "grad_norm": 0.6916968886500405, "learning_rate": 3.5254368183147863e-06, "loss": 0.0329, "step": 77145 }, { "epoch": 0.32191169229998917, "grad_norm": 0.572432045031323, "learning_rate": 3.5253225752934654e-06, "loss": 0.0184, "step": 77150 }, { "epoch": 0.3219325550149794, "grad_norm": 1.0391418606122556, "learning_rate": 3.5252083433776814e-06, "loss": 0.0221, "step": 77155 }, { "epoch": 0.3219534177299697, "grad_norm": 1.1400338370601582, "learning_rate": 3.5250941225656338e-06, "loss": 0.0279, "step": 77160 }, { "epoch": 0.32197428044495996, "grad_norm": 0.9206309195766827, "learning_rate": 3.5249799128555246e-06, "loss": 0.0324, "step": 77165 }, { "epoch": 0.32199514315995026, "grad_norm": 0.7696734050592481, "learning_rate": 3.524865714245555e-06, "loss": 0.0317, "step": 77170 }, { "epoch": 0.32201600587494056, "grad_norm": 0.6971175755212352, "learning_rate": 3.5247515267339276e-06, "loss": 0.0254, "step": 77175 }, { "epoch": 0.3220368685899308, "grad_norm": 0.7467995567807788, "learning_rate": 3.524637350318845e-06, "loss": 0.0199, "step": 77180 }, { "epoch": 0.3220577313049211, "grad_norm": 0.9924492851624245, "learning_rate": 3.52452318499851e-06, "loss": 0.0358, "step": 77185 }, { "epoch": 0.3220785940199114, "grad_norm": 0.7987421318972505, "learning_rate": 3.5244090307711253e-06, "loss": 0.0308, "step": 77190 }, { "epoch": 0.32209945673490165, "grad_norm": 0.8348537782759903, "learning_rate": 3.5242948876348953e-06, "loss": 0.0299, "step": 77195 }, { "epoch": 0.32212031944989195, "grad_norm": 0.7888367471420612, "learning_rate": 3.5241807555880236e-06, "loss": 0.0294, "step": 77200 }, { "epoch": 0.3221411821648822, "grad_norm": 1.0923103487000492, "learning_rate": 3.5240666346287146e-06, "loss": 0.0268, "step": 77205 }, { "epoch": 0.3221620448798725, "grad_norm": 0.7024071863527006, "learning_rate": 3.5239525247551737e-06, "loss": 0.0291, "step": 77210 }, { "epoch": 0.3221829075948628, "grad_norm": 0.3872096147439307, "learning_rate": 3.523838425965606e-06, "loss": 0.0241, "step": 77215 }, { "epoch": 0.32220377030985303, "grad_norm": 0.5054289123496044, "learning_rate": 3.523724338258217e-06, "loss": 0.0403, "step": 77220 }, { "epoch": 0.32222463302484333, "grad_norm": 0.6386566052114493, "learning_rate": 3.5236102616312135e-06, "loss": 0.0294, "step": 77225 }, { "epoch": 0.3222454957398336, "grad_norm": 0.8420147571818619, "learning_rate": 3.523496196082801e-06, "loss": 0.0326, "step": 77230 }, { "epoch": 0.3222663584548239, "grad_norm": 0.9723145770734484, "learning_rate": 3.523382141611188e-06, "loss": 0.0291, "step": 77235 }, { "epoch": 0.3222872211698142, "grad_norm": 0.9293775985990492, "learning_rate": 3.5232680982145796e-06, "loss": 0.033, "step": 77240 }, { "epoch": 0.3223080838848044, "grad_norm": 0.48116398556072815, "learning_rate": 3.5231540658911855e-06, "loss": 0.0244, "step": 77245 }, { "epoch": 0.3223289465997947, "grad_norm": 0.3984300807198615, "learning_rate": 3.523040044639214e-06, "loss": 0.0433, "step": 77250 }, { "epoch": 0.32234980931478496, "grad_norm": 0.9387497147493932, "learning_rate": 3.5229260344568718e-06, "loss": 0.0303, "step": 77255 }, { "epoch": 0.32237067202977526, "grad_norm": 0.7675511195307826, "learning_rate": 3.5228120353423696e-06, "loss": 0.0319, "step": 77260 }, { "epoch": 0.32239153474476556, "grad_norm": 0.9810280724978113, "learning_rate": 3.522698047293916e-06, "loss": 0.036, "step": 77265 }, { "epoch": 0.3224123974597558, "grad_norm": 1.2056738564123317, "learning_rate": 3.522584070309721e-06, "loss": 0.0257, "step": 77270 }, { "epoch": 0.3224332601747461, "grad_norm": 1.346335299717149, "learning_rate": 3.522470104387995e-06, "loss": 0.0379, "step": 77275 }, { "epoch": 0.3224541228897364, "grad_norm": 0.9294256596347487, "learning_rate": 3.5223561495269477e-06, "loss": 0.028, "step": 77280 }, { "epoch": 0.32247498560472665, "grad_norm": 0.9118908403717642, "learning_rate": 3.522242205724791e-06, "loss": 0.0327, "step": 77285 }, { "epoch": 0.32249584831971695, "grad_norm": 1.2844170606424345, "learning_rate": 3.5221282729797357e-06, "loss": 0.0302, "step": 77290 }, { "epoch": 0.3225167110347072, "grad_norm": 0.9140597484360732, "learning_rate": 3.5220143512899944e-06, "loss": 0.0437, "step": 77295 }, { "epoch": 0.3225375737496975, "grad_norm": 0.5917660728842732, "learning_rate": 3.5219004406537794e-06, "loss": 0.0303, "step": 77300 }, { "epoch": 0.3225584364646878, "grad_norm": 0.5457545776784329, "learning_rate": 3.5217865410693018e-06, "loss": 0.0277, "step": 77305 }, { "epoch": 0.32257929917967804, "grad_norm": 0.9615451846522514, "learning_rate": 3.5216726525347768e-06, "loss": 0.033, "step": 77310 }, { "epoch": 0.32260016189466834, "grad_norm": 0.46084632757575494, "learning_rate": 3.521558775048416e-06, "loss": 0.0274, "step": 77315 }, { "epoch": 0.3226210246096586, "grad_norm": 1.064784552541588, "learning_rate": 3.5214449086084336e-06, "loss": 0.0269, "step": 77320 }, { "epoch": 0.3226418873246489, "grad_norm": 0.5821163358924016, "learning_rate": 3.5213310532130444e-06, "loss": 0.0323, "step": 77325 }, { "epoch": 0.3226627500396392, "grad_norm": 0.6379224669638464, "learning_rate": 3.521217208860463e-06, "loss": 0.0293, "step": 77330 }, { "epoch": 0.3226836127546294, "grad_norm": 0.777311609640903, "learning_rate": 3.521103375548904e-06, "loss": 0.029, "step": 77335 }, { "epoch": 0.3227044754696197, "grad_norm": 0.6290021329778922, "learning_rate": 3.5209895532765837e-06, "loss": 0.0309, "step": 77340 }, { "epoch": 0.32272533818460997, "grad_norm": 0.8717676631002446, "learning_rate": 3.5208757420417167e-06, "loss": 0.0203, "step": 77345 }, { "epoch": 0.32274620089960027, "grad_norm": 0.7586781325597965, "learning_rate": 3.5207619418425197e-06, "loss": 0.03, "step": 77350 }, { "epoch": 0.32276706361459057, "grad_norm": 1.3114223399039087, "learning_rate": 3.520648152677211e-06, "loss": 0.039, "step": 77355 }, { "epoch": 0.3227879263295808, "grad_norm": 0.9354656318922805, "learning_rate": 3.520534374544005e-06, "loss": 0.0287, "step": 77360 }, { "epoch": 0.3228087890445711, "grad_norm": 0.9448271797102421, "learning_rate": 3.520420607441121e-06, "loss": 0.0314, "step": 77365 }, { "epoch": 0.3228296517595614, "grad_norm": 0.7710053232278679, "learning_rate": 3.5203068513667754e-06, "loss": 0.0362, "step": 77370 }, { "epoch": 0.32285051447455165, "grad_norm": 0.7967672320508129, "learning_rate": 3.520193106319188e-06, "loss": 0.0338, "step": 77375 }, { "epoch": 0.32287137718954195, "grad_norm": 0.6065752860902884, "learning_rate": 3.5200793722965777e-06, "loss": 0.0265, "step": 77380 }, { "epoch": 0.3228922399045322, "grad_norm": 1.0380240218535897, "learning_rate": 3.519965649297161e-06, "loss": 0.0357, "step": 77385 }, { "epoch": 0.3229131026195225, "grad_norm": 1.1309666060998071, "learning_rate": 3.5198519373191607e-06, "loss": 0.0282, "step": 77390 }, { "epoch": 0.3229339653345128, "grad_norm": 0.5679220203313257, "learning_rate": 3.5197382363607947e-06, "loss": 0.0237, "step": 77395 }, { "epoch": 0.32295482804950304, "grad_norm": 0.8415942192955491, "learning_rate": 3.5196245464202834e-06, "loss": 0.0238, "step": 77400 }, { "epoch": 0.32297569076449334, "grad_norm": 1.2154999147902594, "learning_rate": 3.5195108674958477e-06, "loss": 0.0422, "step": 77405 }, { "epoch": 0.3229965534794836, "grad_norm": 0.4854849629554482, "learning_rate": 3.519397199585709e-06, "loss": 0.0315, "step": 77410 }, { "epoch": 0.3230174161944739, "grad_norm": 1.2688365692892891, "learning_rate": 3.519283542688089e-06, "loss": 0.0329, "step": 77415 }, { "epoch": 0.3230382789094642, "grad_norm": 1.415476681902359, "learning_rate": 3.519169896801209e-06, "loss": 0.0346, "step": 77420 }, { "epoch": 0.3230591416244544, "grad_norm": 1.0608406178500651, "learning_rate": 3.519056261923291e-06, "loss": 0.0385, "step": 77425 }, { "epoch": 0.3230800043394447, "grad_norm": 0.563713454165983, "learning_rate": 3.5189426380525586e-06, "loss": 0.0301, "step": 77430 }, { "epoch": 0.32310086705443497, "grad_norm": 0.8688171674176325, "learning_rate": 3.5188290251872343e-06, "loss": 0.0335, "step": 77435 }, { "epoch": 0.32312172976942527, "grad_norm": 0.5296660873327064, "learning_rate": 3.5187154233255423e-06, "loss": 0.0301, "step": 77440 }, { "epoch": 0.32314259248441557, "grad_norm": 0.3804624825117741, "learning_rate": 3.5186018324657054e-06, "loss": 0.0266, "step": 77445 }, { "epoch": 0.3231634551994058, "grad_norm": 0.43227555693193925, "learning_rate": 3.5184882526059496e-06, "loss": 0.0314, "step": 77450 }, { "epoch": 0.3231843179143961, "grad_norm": 1.5087182792183567, "learning_rate": 3.5183746837444977e-06, "loss": 0.0344, "step": 77455 }, { "epoch": 0.3232051806293864, "grad_norm": 0.6569820024357343, "learning_rate": 3.5182611258795757e-06, "loss": 0.0353, "step": 77460 }, { "epoch": 0.32322604334437666, "grad_norm": 1.1170840961478226, "learning_rate": 3.5181475790094094e-06, "loss": 0.0304, "step": 77465 }, { "epoch": 0.32324690605936696, "grad_norm": 0.6205852887231851, "learning_rate": 3.518034043132224e-06, "loss": 0.0224, "step": 77470 }, { "epoch": 0.3232677687743572, "grad_norm": 0.9025684332284918, "learning_rate": 3.517920518246246e-06, "loss": 0.0307, "step": 77475 }, { "epoch": 0.3232886314893475, "grad_norm": 0.9252405954228068, "learning_rate": 3.5178070043497035e-06, "loss": 0.0317, "step": 77480 }, { "epoch": 0.3233094942043378, "grad_norm": 1.7440198862403782, "learning_rate": 3.5176935014408216e-06, "loss": 0.0451, "step": 77485 }, { "epoch": 0.32333035691932804, "grad_norm": 1.047148505257945, "learning_rate": 3.5175800095178287e-06, "loss": 0.0253, "step": 77490 }, { "epoch": 0.32335121963431834, "grad_norm": 0.7833051327613195, "learning_rate": 3.517466528578952e-06, "loss": 0.0302, "step": 77495 }, { "epoch": 0.3233720823493086, "grad_norm": 1.7726925939895906, "learning_rate": 3.5173530586224216e-06, "loss": 0.0355, "step": 77500 }, { "epoch": 0.3233929450642989, "grad_norm": 1.1384118144973219, "learning_rate": 3.517239599646464e-06, "loss": 0.032, "step": 77505 }, { "epoch": 0.3234138077792892, "grad_norm": 0.8275284236391679, "learning_rate": 3.5171261516493103e-06, "loss": 0.0269, "step": 77510 }, { "epoch": 0.32343467049427943, "grad_norm": 0.749076527770516, "learning_rate": 3.5170127146291893e-06, "loss": 0.0371, "step": 77515 }, { "epoch": 0.32345553320926973, "grad_norm": 0.7099315196868552, "learning_rate": 3.5168992885843305e-06, "loss": 0.0298, "step": 77520 }, { "epoch": 0.32347639592426, "grad_norm": 0.6385985692839613, "learning_rate": 3.5167858735129647e-06, "loss": 0.0194, "step": 77525 }, { "epoch": 0.3234972586392503, "grad_norm": 1.1001676433541219, "learning_rate": 3.516672469413322e-06, "loss": 0.0278, "step": 77530 }, { "epoch": 0.3235181213542406, "grad_norm": 0.6759183629102558, "learning_rate": 3.516559076283634e-06, "loss": 0.0283, "step": 77535 }, { "epoch": 0.3235389840692308, "grad_norm": 1.2822793244440058, "learning_rate": 3.516445694122132e-06, "loss": 0.0257, "step": 77540 }, { "epoch": 0.3235598467842211, "grad_norm": 0.8900334205796894, "learning_rate": 3.5163323229270477e-06, "loss": 0.033, "step": 77545 }, { "epoch": 0.3235807094992114, "grad_norm": 0.6441566690670036, "learning_rate": 3.5162189626966146e-06, "loss": 0.0418, "step": 77550 }, { "epoch": 0.32360157221420166, "grad_norm": 0.6744897191700002, "learning_rate": 3.5161056134290646e-06, "loss": 0.0378, "step": 77555 }, { "epoch": 0.32362243492919196, "grad_norm": 0.7335110107424505, "learning_rate": 3.51599227512263e-06, "loss": 0.0263, "step": 77560 }, { "epoch": 0.3236432976441822, "grad_norm": 1.3549699711834258, "learning_rate": 3.515878947775545e-06, "loss": 0.0334, "step": 77565 }, { "epoch": 0.3236641603591725, "grad_norm": 1.01128611218355, "learning_rate": 3.515765631386044e-06, "loss": 0.0333, "step": 77570 }, { "epoch": 0.3236850230741628, "grad_norm": 0.6826004872436834, "learning_rate": 3.5156523259523613e-06, "loss": 0.0356, "step": 77575 }, { "epoch": 0.32370588578915305, "grad_norm": 0.7551198356513102, "learning_rate": 3.5155390314727302e-06, "loss": 0.0275, "step": 77580 }, { "epoch": 0.32372674850414335, "grad_norm": 0.6722159572759554, "learning_rate": 3.515425747945387e-06, "loss": 0.0241, "step": 77585 }, { "epoch": 0.3237476112191336, "grad_norm": 0.5140412129504279, "learning_rate": 3.5153124753685674e-06, "loss": 0.0299, "step": 77590 }, { "epoch": 0.3237684739341239, "grad_norm": 0.8074260680341641, "learning_rate": 3.5151992137405074e-06, "loss": 0.0297, "step": 77595 }, { "epoch": 0.3237893366491142, "grad_norm": 0.2990703835839886, "learning_rate": 3.515085963059442e-06, "loss": 0.0235, "step": 77600 }, { "epoch": 0.32381019936410443, "grad_norm": 1.2633305955836234, "learning_rate": 3.5149727233236086e-06, "loss": 0.0286, "step": 77605 }, { "epoch": 0.32383106207909473, "grad_norm": 0.6971643421399047, "learning_rate": 3.514859494531245e-06, "loss": 0.0326, "step": 77610 }, { "epoch": 0.323851924794085, "grad_norm": 0.43943226980803196, "learning_rate": 3.5147462766805873e-06, "loss": 0.0266, "step": 77615 }, { "epoch": 0.3238727875090753, "grad_norm": 0.7324259781427153, "learning_rate": 3.5146330697698744e-06, "loss": 0.0356, "step": 77620 }, { "epoch": 0.3238936502240656, "grad_norm": 1.1805270416209035, "learning_rate": 3.5145198737973447e-06, "loss": 0.0347, "step": 77625 }, { "epoch": 0.3239145129390558, "grad_norm": 0.8300989076556051, "learning_rate": 3.514406688761236e-06, "loss": 0.0242, "step": 77630 }, { "epoch": 0.3239353756540461, "grad_norm": 0.7336914728295177, "learning_rate": 3.5142935146597878e-06, "loss": 0.0248, "step": 77635 }, { "epoch": 0.3239562383690364, "grad_norm": 0.6927080236954549, "learning_rate": 3.51418035149124e-06, "loss": 0.0345, "step": 77640 }, { "epoch": 0.32397710108402666, "grad_norm": 0.5914207233619128, "learning_rate": 3.514067199253832e-06, "loss": 0.0227, "step": 77645 }, { "epoch": 0.32399796379901696, "grad_norm": 0.5842712015437735, "learning_rate": 3.513954057945805e-06, "loss": 0.0213, "step": 77650 }, { "epoch": 0.3240188265140072, "grad_norm": 0.958257901292536, "learning_rate": 3.513840927565398e-06, "loss": 0.0274, "step": 77655 }, { "epoch": 0.3240396892289975, "grad_norm": 0.8702507957090995, "learning_rate": 3.513727808110853e-06, "loss": 0.028, "step": 77660 }, { "epoch": 0.3240605519439878, "grad_norm": 0.6684766928278382, "learning_rate": 3.513614699580411e-06, "loss": 0.0194, "step": 77665 }, { "epoch": 0.32408141465897805, "grad_norm": 5.576179055347718, "learning_rate": 3.5135016019723143e-06, "loss": 0.022, "step": 77670 }, { "epoch": 0.32410227737396835, "grad_norm": 1.0449589133108248, "learning_rate": 3.5133885152848056e-06, "loss": 0.0326, "step": 77675 }, { "epoch": 0.3241231400889586, "grad_norm": 0.8320734789789307, "learning_rate": 3.5132754395161267e-06, "loss": 0.0349, "step": 77680 }, { "epoch": 0.3241440028039489, "grad_norm": 0.3978654985117738, "learning_rate": 3.513162374664521e-06, "loss": 0.0221, "step": 77685 }, { "epoch": 0.3241648655189392, "grad_norm": 0.8346260152455741, "learning_rate": 3.513049320728232e-06, "loss": 0.0355, "step": 77690 }, { "epoch": 0.32418572823392944, "grad_norm": 1.012328259424357, "learning_rate": 3.5129362777055035e-06, "loss": 0.0308, "step": 77695 }, { "epoch": 0.32420659094891974, "grad_norm": 1.120437611545498, "learning_rate": 3.51282324559458e-06, "loss": 0.0361, "step": 77700 }, { "epoch": 0.32422745366391, "grad_norm": 0.7110453577769128, "learning_rate": 3.5127102243937057e-06, "loss": 0.0298, "step": 77705 }, { "epoch": 0.3242483163789003, "grad_norm": 1.0986967266801846, "learning_rate": 3.512597214101125e-06, "loss": 0.0325, "step": 77710 }, { "epoch": 0.3242691790938906, "grad_norm": 0.7900899458216831, "learning_rate": 3.5124842147150846e-06, "loss": 0.0309, "step": 77715 }, { "epoch": 0.3242900418088808, "grad_norm": 0.5332951123114604, "learning_rate": 3.5123712262338294e-06, "loss": 0.0262, "step": 77720 }, { "epoch": 0.3243109045238711, "grad_norm": 1.1980413890635446, "learning_rate": 3.512258248655606e-06, "loss": 0.023, "step": 77725 }, { "epoch": 0.3243317672388614, "grad_norm": 0.5357830326477239, "learning_rate": 3.512145281978661e-06, "loss": 0.0185, "step": 77730 }, { "epoch": 0.32435262995385167, "grad_norm": 0.6334068044204798, "learning_rate": 3.5120323262012412e-06, "loss": 0.0297, "step": 77735 }, { "epoch": 0.32437349266884197, "grad_norm": 1.7083300768335117, "learning_rate": 3.5119193813215952e-06, "loss": 0.0418, "step": 77740 }, { "epoch": 0.3243943553838322, "grad_norm": 0.6061505994712924, "learning_rate": 3.511806447337968e-06, "loss": 0.0223, "step": 77745 }, { "epoch": 0.3244152180988225, "grad_norm": 0.849752754170964, "learning_rate": 3.5116935242486105e-06, "loss": 0.0249, "step": 77750 }, { "epoch": 0.3244360808138128, "grad_norm": 1.0011098676457195, "learning_rate": 3.5115806120517698e-06, "loss": 0.0298, "step": 77755 }, { "epoch": 0.32445694352880305, "grad_norm": 0.39630652168858616, "learning_rate": 3.5114677107456953e-06, "loss": 0.0311, "step": 77760 }, { "epoch": 0.32447780624379335, "grad_norm": 0.7068844876250335, "learning_rate": 3.511354820328637e-06, "loss": 0.0297, "step": 77765 }, { "epoch": 0.3244986689587836, "grad_norm": 0.704227002071931, "learning_rate": 3.5112419407988436e-06, "loss": 0.0263, "step": 77770 }, { "epoch": 0.3245195316737739, "grad_norm": 0.7786099091039858, "learning_rate": 3.511129072154566e-06, "loss": 0.0328, "step": 77775 }, { "epoch": 0.3245403943887642, "grad_norm": 0.9528844734149652, "learning_rate": 3.5110162143940534e-06, "loss": 0.0265, "step": 77780 }, { "epoch": 0.32456125710375444, "grad_norm": 1.2373836434596608, "learning_rate": 3.510903367515559e-06, "loss": 0.0421, "step": 77785 }, { "epoch": 0.32458211981874474, "grad_norm": 0.6938337926815418, "learning_rate": 3.5107905315173317e-06, "loss": 0.024, "step": 77790 }, { "epoch": 0.324602982533735, "grad_norm": 0.7947830546620165, "learning_rate": 3.5106777063976256e-06, "loss": 0.0234, "step": 77795 }, { "epoch": 0.3246238452487253, "grad_norm": 0.618963347086792, "learning_rate": 3.5105648921546907e-06, "loss": 0.0265, "step": 77800 }, { "epoch": 0.3246447079637156, "grad_norm": 0.8058789801039349, "learning_rate": 3.5104520887867805e-06, "loss": 0.0235, "step": 77805 }, { "epoch": 0.32466557067870583, "grad_norm": 0.8005060993918369, "learning_rate": 3.5103392962921488e-06, "loss": 0.0342, "step": 77810 }, { "epoch": 0.32468643339369613, "grad_norm": 0.4359707081305932, "learning_rate": 3.5102265146690473e-06, "loss": 0.0245, "step": 77815 }, { "epoch": 0.3247072961086864, "grad_norm": 0.7104583362577241, "learning_rate": 3.51011374391573e-06, "loss": 0.0242, "step": 77820 }, { "epoch": 0.32472815882367667, "grad_norm": 0.6796034691977636, "learning_rate": 3.5100009840304517e-06, "loss": 0.0269, "step": 77825 }, { "epoch": 0.32474902153866697, "grad_norm": 0.6874417638180733, "learning_rate": 3.509888235011466e-06, "loss": 0.0279, "step": 77830 }, { "epoch": 0.3247698842536572, "grad_norm": 1.0889878331883196, "learning_rate": 3.5097754968570287e-06, "loss": 0.0363, "step": 77835 }, { "epoch": 0.3247907469686475, "grad_norm": 0.7470475166582797, "learning_rate": 3.509662769565395e-06, "loss": 0.0214, "step": 77840 }, { "epoch": 0.3248116096836378, "grad_norm": 1.3789943636110382, "learning_rate": 3.509550053134819e-06, "loss": 0.0323, "step": 77845 }, { "epoch": 0.32483247239862806, "grad_norm": 1.5784684143873486, "learning_rate": 3.5094373475635584e-06, "loss": 0.0383, "step": 77850 }, { "epoch": 0.32485333511361836, "grad_norm": 0.7242978709206827, "learning_rate": 3.5093246528498693e-06, "loss": 0.0291, "step": 77855 }, { "epoch": 0.3248741978286086, "grad_norm": 0.2926021948111561, "learning_rate": 3.5092119689920086e-06, "loss": 0.0179, "step": 77860 }, { "epoch": 0.3248950605435989, "grad_norm": 0.6879112641929362, "learning_rate": 3.5090992959882326e-06, "loss": 0.0364, "step": 77865 }, { "epoch": 0.3249159232585892, "grad_norm": 1.2146525707916427, "learning_rate": 3.5089866338367994e-06, "loss": 0.0282, "step": 77870 }, { "epoch": 0.32493678597357945, "grad_norm": 0.8195013390001872, "learning_rate": 3.508873982535968e-06, "loss": 0.0299, "step": 77875 }, { "epoch": 0.32495764868856974, "grad_norm": 1.3291530258485804, "learning_rate": 3.508761342083995e-06, "loss": 0.0336, "step": 77880 }, { "epoch": 0.32497851140356, "grad_norm": 0.362495653679441, "learning_rate": 3.5086487124791405e-06, "loss": 0.0193, "step": 77885 }, { "epoch": 0.3249993741185503, "grad_norm": 1.533766889583322, "learning_rate": 3.5085360937196637e-06, "loss": 0.0287, "step": 77890 }, { "epoch": 0.3250202368335406, "grad_norm": 0.8744801200620488, "learning_rate": 3.5084234858038234e-06, "loss": 0.0244, "step": 77895 }, { "epoch": 0.32504109954853083, "grad_norm": 1.735485376046679, "learning_rate": 3.5083108887298807e-06, "loss": 0.0364, "step": 77900 }, { "epoch": 0.32506196226352113, "grad_norm": 1.076243614696156, "learning_rate": 3.508198302496094e-06, "loss": 0.0288, "step": 77905 }, { "epoch": 0.32508282497851143, "grad_norm": 1.035565442539257, "learning_rate": 3.508085727100726e-06, "loss": 0.0267, "step": 77910 }, { "epoch": 0.3251036876935017, "grad_norm": 1.0108580384174095, "learning_rate": 3.5079731625420367e-06, "loss": 0.0312, "step": 77915 }, { "epoch": 0.325124550408492, "grad_norm": 0.9957170402251247, "learning_rate": 3.5078606088182886e-06, "loss": 0.0292, "step": 77920 }, { "epoch": 0.3251454131234822, "grad_norm": 0.6114309439072505, "learning_rate": 3.5077480659277426e-06, "loss": 0.0234, "step": 77925 }, { "epoch": 0.3251662758384725, "grad_norm": 1.172029846627437, "learning_rate": 3.5076355338686612e-06, "loss": 0.0332, "step": 77930 }, { "epoch": 0.3251871385534628, "grad_norm": 0.9388057996933299, "learning_rate": 3.507523012639307e-06, "loss": 0.0354, "step": 77935 }, { "epoch": 0.32520800126845306, "grad_norm": 2.032342979644479, "learning_rate": 3.507410502237944e-06, "loss": 0.0243, "step": 77940 }, { "epoch": 0.32522886398344336, "grad_norm": 0.8791841767941699, "learning_rate": 3.5072980026628346e-06, "loss": 0.0321, "step": 77945 }, { "epoch": 0.3252497266984336, "grad_norm": 1.2848956891800742, "learning_rate": 3.507185513912243e-06, "loss": 0.0291, "step": 77950 }, { "epoch": 0.3252705894134239, "grad_norm": 0.754378240189731, "learning_rate": 3.5070730359844335e-06, "loss": 0.028, "step": 77955 }, { "epoch": 0.3252914521284142, "grad_norm": 1.26661654977034, "learning_rate": 3.5069605688776716e-06, "loss": 0.0343, "step": 77960 }, { "epoch": 0.32531231484340445, "grad_norm": 0.5446287154314582, "learning_rate": 3.5068481125902203e-06, "loss": 0.0363, "step": 77965 }, { "epoch": 0.32533317755839475, "grad_norm": 0.8081719470263151, "learning_rate": 3.5067356671203466e-06, "loss": 0.0342, "step": 77970 }, { "epoch": 0.325354040273385, "grad_norm": 0.38307304086622485, "learning_rate": 3.5066232324663162e-06, "loss": 0.0273, "step": 77975 }, { "epoch": 0.3253749029883753, "grad_norm": 0.4628164289627907, "learning_rate": 3.506510808626395e-06, "loss": 0.0207, "step": 77980 }, { "epoch": 0.3253957657033656, "grad_norm": 1.1534709373489338, "learning_rate": 3.50639839559885e-06, "loss": 0.0379, "step": 77985 }, { "epoch": 0.32541662841835584, "grad_norm": 0.26296842951824306, "learning_rate": 3.506285993381947e-06, "loss": 0.0353, "step": 77990 }, { "epoch": 0.32543749113334614, "grad_norm": 1.031211279669121, "learning_rate": 3.506173601973955e-06, "loss": 0.0218, "step": 77995 }, { "epoch": 0.32545835384833643, "grad_norm": 0.8273961920617429, "learning_rate": 3.50606122137314e-06, "loss": 0.035, "step": 78000 }, { "epoch": 0.3254792165633267, "grad_norm": 0.7119854377002772, "learning_rate": 3.505948851577772e-06, "loss": 0.03, "step": 78005 }, { "epoch": 0.325500079278317, "grad_norm": 0.8153822844460528, "learning_rate": 3.5058364925861178e-06, "loss": 0.0255, "step": 78010 }, { "epoch": 0.3255209419933072, "grad_norm": 0.6846842216830605, "learning_rate": 3.5057241443964466e-06, "loss": 0.0292, "step": 78015 }, { "epoch": 0.3255418047082975, "grad_norm": 0.42576894539744436, "learning_rate": 3.50561180700703e-06, "loss": 0.0253, "step": 78020 }, { "epoch": 0.3255626674232878, "grad_norm": 1.1956492452731184, "learning_rate": 3.505499480416134e-06, "loss": 0.0358, "step": 78025 }, { "epoch": 0.32558353013827807, "grad_norm": 0.3673554279577539, "learning_rate": 3.5053871646220317e-06, "loss": 0.0303, "step": 78030 }, { "epoch": 0.32560439285326837, "grad_norm": 0.6886391728226784, "learning_rate": 3.505274859622992e-06, "loss": 0.0175, "step": 78035 }, { "epoch": 0.3256252555682586, "grad_norm": 0.6839978150657102, "learning_rate": 3.505162565417286e-06, "loss": 0.0269, "step": 78040 }, { "epoch": 0.3256461182832489, "grad_norm": 0.7083697278868866, "learning_rate": 3.505050282003186e-06, "loss": 0.0253, "step": 78045 }, { "epoch": 0.3256669809982392, "grad_norm": 0.6300949302034147, "learning_rate": 3.504938009378962e-06, "loss": 0.0278, "step": 78050 }, { "epoch": 0.32568784371322945, "grad_norm": 0.32732719363725027, "learning_rate": 3.5048257475428878e-06, "loss": 0.0163, "step": 78055 }, { "epoch": 0.32570870642821975, "grad_norm": 0.6700110042454768, "learning_rate": 3.504713496493234e-06, "loss": 0.0276, "step": 78060 }, { "epoch": 0.32572956914321, "grad_norm": 0.6869492186532656, "learning_rate": 3.5046012562282743e-06, "loss": 0.0425, "step": 78065 }, { "epoch": 0.3257504318582003, "grad_norm": 0.6733578860800509, "learning_rate": 3.5044890267462816e-06, "loss": 0.024, "step": 78070 }, { "epoch": 0.3257712945731906, "grad_norm": 0.8821998290284289, "learning_rate": 3.5043768080455297e-06, "loss": 0.0323, "step": 78075 }, { "epoch": 0.32579215728818084, "grad_norm": 0.7397016583482449, "learning_rate": 3.504264600124293e-06, "loss": 0.0265, "step": 78080 }, { "epoch": 0.32581302000317114, "grad_norm": 0.6457762738339129, "learning_rate": 3.504152402980845e-06, "loss": 0.0253, "step": 78085 }, { "epoch": 0.32583388271816144, "grad_norm": 1.2438553253574527, "learning_rate": 3.5040402166134607e-06, "loss": 0.0296, "step": 78090 }, { "epoch": 0.3258547454331517, "grad_norm": 0.4527244835688911, "learning_rate": 3.503928041020415e-06, "loss": 0.0373, "step": 78095 }, { "epoch": 0.325875608148142, "grad_norm": 1.0969884654536821, "learning_rate": 3.503815876199985e-06, "loss": 0.0206, "step": 78100 }, { "epoch": 0.3258964708631322, "grad_norm": 1.1090091502494483, "learning_rate": 3.503703722150445e-06, "loss": 0.0254, "step": 78105 }, { "epoch": 0.3259173335781225, "grad_norm": 0.8253193639790729, "learning_rate": 3.5035915788700705e-06, "loss": 0.0302, "step": 78110 }, { "epoch": 0.3259381962931128, "grad_norm": 0.7880404587254441, "learning_rate": 3.50347944635714e-06, "loss": 0.0315, "step": 78115 }, { "epoch": 0.32595905900810307, "grad_norm": 0.7342291560161772, "learning_rate": 3.5033673246099296e-06, "loss": 0.0282, "step": 78120 }, { "epoch": 0.32597992172309337, "grad_norm": 0.7995287508401847, "learning_rate": 3.503255213626717e-06, "loss": 0.0206, "step": 78125 }, { "epoch": 0.3260007844380836, "grad_norm": 1.5627551640476478, "learning_rate": 3.5031431134057804e-06, "loss": 0.0266, "step": 78130 }, { "epoch": 0.3260216471530739, "grad_norm": 0.7472020327727258, "learning_rate": 3.503031023945397e-06, "loss": 0.0239, "step": 78135 }, { "epoch": 0.3260425098680642, "grad_norm": 0.6951600657963359, "learning_rate": 3.502918945243846e-06, "loss": 0.0285, "step": 78140 }, { "epoch": 0.32606337258305446, "grad_norm": 0.7647663545727372, "learning_rate": 3.5028068772994067e-06, "loss": 0.0249, "step": 78145 }, { "epoch": 0.32608423529804476, "grad_norm": 0.9523920430092074, "learning_rate": 3.5026948201103576e-06, "loss": 0.0356, "step": 78150 }, { "epoch": 0.326105098013035, "grad_norm": 0.5742351444363397, "learning_rate": 3.5025827736749796e-06, "loss": 0.0293, "step": 78155 }, { "epoch": 0.3261259607280253, "grad_norm": 0.6695738297748094, "learning_rate": 3.502470737991552e-06, "loss": 0.028, "step": 78160 }, { "epoch": 0.3261468234430156, "grad_norm": 0.919359111521558, "learning_rate": 3.5023587130583552e-06, "loss": 0.0202, "step": 78165 }, { "epoch": 0.32616768615800584, "grad_norm": 1.1359578902069178, "learning_rate": 3.5022466988736703e-06, "loss": 0.0361, "step": 78170 }, { "epoch": 0.32618854887299614, "grad_norm": 1.281684967566387, "learning_rate": 3.5021346954357795e-06, "loss": 0.0357, "step": 78175 }, { "epoch": 0.3262094115879864, "grad_norm": 0.6923814046840772, "learning_rate": 3.5020227027429627e-06, "loss": 0.0259, "step": 78180 }, { "epoch": 0.3262302743029767, "grad_norm": 0.9068207034760528, "learning_rate": 3.5019107207935037e-06, "loss": 0.0324, "step": 78185 }, { "epoch": 0.326251137017967, "grad_norm": 1.325888703722802, "learning_rate": 3.501798749585683e-06, "loss": 0.024, "step": 78190 }, { "epoch": 0.32627199973295723, "grad_norm": 1.134624326964003, "learning_rate": 3.501686789117786e-06, "loss": 0.0328, "step": 78195 }, { "epoch": 0.32629286244794753, "grad_norm": 1.04958649007244, "learning_rate": 3.501574839388094e-06, "loss": 0.0284, "step": 78200 }, { "epoch": 0.32631372516293783, "grad_norm": 0.5886001799725388, "learning_rate": 3.50146290039489e-06, "loss": 0.0261, "step": 78205 }, { "epoch": 0.3263345878779281, "grad_norm": 0.8212596266025055, "learning_rate": 3.501350972136461e-06, "loss": 0.0281, "step": 78210 }, { "epoch": 0.3263554505929184, "grad_norm": 0.7462248267025329, "learning_rate": 3.501239054611088e-06, "loss": 0.0314, "step": 78215 }, { "epoch": 0.3263763133079086, "grad_norm": 0.7563774117101082, "learning_rate": 3.501127147817058e-06, "loss": 0.026, "step": 78220 }, { "epoch": 0.3263971760228989, "grad_norm": 0.59355587724401, "learning_rate": 3.5010152517526552e-06, "loss": 0.0255, "step": 78225 }, { "epoch": 0.3264180387378892, "grad_norm": 0.7363442044816483, "learning_rate": 3.5009033664161646e-06, "loss": 0.0292, "step": 78230 }, { "epoch": 0.32643890145287946, "grad_norm": 0.655463086933393, "learning_rate": 3.5007914918058732e-06, "loss": 0.0277, "step": 78235 }, { "epoch": 0.32645976416786976, "grad_norm": 0.3888652579277118, "learning_rate": 3.5006796279200668e-06, "loss": 0.0247, "step": 78240 }, { "epoch": 0.32648062688286, "grad_norm": 0.910115151593661, "learning_rate": 3.500567774757032e-06, "loss": 0.027, "step": 78245 }, { "epoch": 0.3265014895978503, "grad_norm": 1.2392378352543656, "learning_rate": 3.500455932315056e-06, "loss": 0.0337, "step": 78250 }, { "epoch": 0.3265223523128406, "grad_norm": 1.2688402780503392, "learning_rate": 3.5003441005924268e-06, "loss": 0.0358, "step": 78255 }, { "epoch": 0.32654321502783085, "grad_norm": 0.8064169492085107, "learning_rate": 3.5002322795874303e-06, "loss": 0.0239, "step": 78260 }, { "epoch": 0.32656407774282115, "grad_norm": 0.6913562707396667, "learning_rate": 3.5001204692983565e-06, "loss": 0.0297, "step": 78265 }, { "epoch": 0.3265849404578114, "grad_norm": 0.7630527520689231, "learning_rate": 3.500008669723495e-06, "loss": 0.0252, "step": 78270 }, { "epoch": 0.3266058031728017, "grad_norm": 0.733534951892128, "learning_rate": 3.4998968808611304e-06, "loss": 0.023, "step": 78275 }, { "epoch": 0.326626665887792, "grad_norm": 0.6335471375928928, "learning_rate": 3.4997851027095564e-06, "loss": 0.0274, "step": 78280 }, { "epoch": 0.32664752860278223, "grad_norm": 0.4908238544139339, "learning_rate": 3.4996733352670615e-06, "loss": 0.0417, "step": 78285 }, { "epoch": 0.32666839131777253, "grad_norm": 1.2386269431662102, "learning_rate": 3.4995615785319347e-06, "loss": 0.0248, "step": 78290 }, { "epoch": 0.32668925403276283, "grad_norm": 0.47992338288279046, "learning_rate": 3.499449832502468e-06, "loss": 0.027, "step": 78295 }, { "epoch": 0.3267101167477531, "grad_norm": 0.8290769617803676, "learning_rate": 3.499338097176951e-06, "loss": 0.0283, "step": 78300 }, { "epoch": 0.3267309794627434, "grad_norm": 0.9032956080451907, "learning_rate": 3.4992263725536756e-06, "loss": 0.036, "step": 78305 }, { "epoch": 0.3267518421777336, "grad_norm": 0.796969753613164, "learning_rate": 3.4991146586309334e-06, "loss": 0.0443, "step": 78310 }, { "epoch": 0.3267727048927239, "grad_norm": 0.6818881945862076, "learning_rate": 3.499002955407016e-06, "loss": 0.0273, "step": 78315 }, { "epoch": 0.3267935676077142, "grad_norm": 1.0692364744346283, "learning_rate": 3.4988912628802175e-06, "loss": 0.0243, "step": 78320 }, { "epoch": 0.32681443032270446, "grad_norm": 0.7241629913291137, "learning_rate": 3.498779581048828e-06, "loss": 0.0286, "step": 78325 }, { "epoch": 0.32683529303769476, "grad_norm": 0.625679061464406, "learning_rate": 3.498667909911142e-06, "loss": 0.0281, "step": 78330 }, { "epoch": 0.326856155752685, "grad_norm": 0.4758664859305828, "learning_rate": 3.498556249465454e-06, "loss": 0.0437, "step": 78335 }, { "epoch": 0.3268770184676753, "grad_norm": 1.0261344713857157, "learning_rate": 3.4984445997100562e-06, "loss": 0.0265, "step": 78340 }, { "epoch": 0.3268978811826656, "grad_norm": 1.1712861966240007, "learning_rate": 3.498332960643244e-06, "loss": 0.0355, "step": 78345 }, { "epoch": 0.32691874389765585, "grad_norm": 0.45997046165417627, "learning_rate": 3.4982213322633126e-06, "loss": 0.0291, "step": 78350 }, { "epoch": 0.32693960661264615, "grad_norm": 0.6876561853611509, "learning_rate": 3.498109714568555e-06, "loss": 0.0241, "step": 78355 }, { "epoch": 0.3269604693276364, "grad_norm": 0.8865244152455724, "learning_rate": 3.4979981075572683e-06, "loss": 0.0273, "step": 78360 }, { "epoch": 0.3269813320426267, "grad_norm": 0.8821200447329739, "learning_rate": 3.4978865112277476e-06, "loss": 0.0237, "step": 78365 }, { "epoch": 0.327002194757617, "grad_norm": 0.6140567341088271, "learning_rate": 3.49777492557829e-06, "loss": 0.0358, "step": 78370 }, { "epoch": 0.32702305747260724, "grad_norm": 2.165839798995878, "learning_rate": 3.497663350607191e-06, "loss": 0.0425, "step": 78375 }, { "epoch": 0.32704392018759754, "grad_norm": 0.27138762856008886, "learning_rate": 3.497551786312749e-06, "loss": 0.032, "step": 78380 }, { "epoch": 0.32706478290258784, "grad_norm": 0.5964112021261878, "learning_rate": 3.49744023269326e-06, "loss": 0.0247, "step": 78385 }, { "epoch": 0.3270856456175781, "grad_norm": 0.4064927877361058, "learning_rate": 3.4973286897470222e-06, "loss": 0.0275, "step": 78390 }, { "epoch": 0.3271065083325684, "grad_norm": 0.655344500099133, "learning_rate": 3.4972171574723336e-06, "loss": 0.031, "step": 78395 }, { "epoch": 0.3271273710475586, "grad_norm": 0.6265802810851643, "learning_rate": 3.497105635867493e-06, "loss": 0.0235, "step": 78400 }, { "epoch": 0.3271482337625489, "grad_norm": 0.7553978515595882, "learning_rate": 3.4969941249307986e-06, "loss": 0.0187, "step": 78405 }, { "epoch": 0.3271690964775392, "grad_norm": 0.611217509219111, "learning_rate": 3.49688262466055e-06, "loss": 0.0293, "step": 78410 }, { "epoch": 0.32718995919252947, "grad_norm": 0.6973233282631747, "learning_rate": 3.4967711350550475e-06, "loss": 0.0275, "step": 78415 }, { "epoch": 0.32721082190751977, "grad_norm": 0.6771115359182157, "learning_rate": 3.496659656112591e-06, "loss": 0.0314, "step": 78420 }, { "epoch": 0.32723168462251, "grad_norm": 0.7253985815491121, "learning_rate": 3.49654818783148e-06, "loss": 0.0319, "step": 78425 }, { "epoch": 0.3272525473375003, "grad_norm": 0.9512681646227825, "learning_rate": 3.4964367302100156e-06, "loss": 0.0337, "step": 78430 }, { "epoch": 0.3272734100524906, "grad_norm": 0.5961655901842446, "learning_rate": 3.496325283246499e-06, "loss": 0.0432, "step": 78435 }, { "epoch": 0.32729427276748085, "grad_norm": 1.3174391906443474, "learning_rate": 3.4962138469392326e-06, "loss": 0.0305, "step": 78440 }, { "epoch": 0.32731513548247115, "grad_norm": 0.8541412546850758, "learning_rate": 3.496102421286516e-06, "loss": 0.0311, "step": 78445 }, { "epoch": 0.3273359981974614, "grad_norm": 0.8063096134590072, "learning_rate": 3.4959910062866544e-06, "loss": 0.0268, "step": 78450 }, { "epoch": 0.3273568609124517, "grad_norm": 0.8801126800996913, "learning_rate": 3.4958796019379482e-06, "loss": 0.0255, "step": 78455 }, { "epoch": 0.327377723627442, "grad_norm": 0.9201316617928295, "learning_rate": 3.495768208238702e-06, "loss": 0.0272, "step": 78460 }, { "epoch": 0.32739858634243224, "grad_norm": 0.7312109678945942, "learning_rate": 3.4956568251872182e-06, "loss": 0.0301, "step": 78465 }, { "epoch": 0.32741944905742254, "grad_norm": 0.7156656191759563, "learning_rate": 3.4955454527818008e-06, "loss": 0.0337, "step": 78470 }, { "epoch": 0.32744031177241284, "grad_norm": 0.6813643559208163, "learning_rate": 3.495434091020754e-06, "loss": 0.0336, "step": 78475 }, { "epoch": 0.3274611744874031, "grad_norm": 0.7989845053729746, "learning_rate": 3.4953227399023826e-06, "loss": 0.0186, "step": 78480 }, { "epoch": 0.3274820372023934, "grad_norm": 0.7359451454965809, "learning_rate": 3.4952113994249907e-06, "loss": 0.0298, "step": 78485 }, { "epoch": 0.3275028999173836, "grad_norm": 1.0012324498644354, "learning_rate": 3.4951000695868858e-06, "loss": 0.0254, "step": 78490 }, { "epoch": 0.3275237626323739, "grad_norm": 0.4024371933482865, "learning_rate": 3.4949887503863716e-06, "loss": 0.0228, "step": 78495 }, { "epoch": 0.3275446253473642, "grad_norm": 0.5943436282519251, "learning_rate": 3.494877441821754e-06, "loss": 0.0223, "step": 78500 }, { "epoch": 0.32756548806235447, "grad_norm": 0.8137238048762498, "learning_rate": 3.4947661438913405e-06, "loss": 0.0281, "step": 78505 }, { "epoch": 0.32758635077734477, "grad_norm": 0.7807494910815098, "learning_rate": 3.4946548565934375e-06, "loss": 0.038, "step": 78510 }, { "epoch": 0.327607213492335, "grad_norm": 0.4152348655991783, "learning_rate": 3.494543579926352e-06, "loss": 0.0246, "step": 78515 }, { "epoch": 0.3276280762073253, "grad_norm": 1.3857663319882947, "learning_rate": 3.4944323138883916e-06, "loss": 0.0407, "step": 78520 }, { "epoch": 0.3276489389223156, "grad_norm": 0.5676637113907468, "learning_rate": 3.494321058477866e-06, "loss": 0.0357, "step": 78525 }, { "epoch": 0.32766980163730586, "grad_norm": 0.49862225536943244, "learning_rate": 3.4942098136930803e-06, "loss": 0.0263, "step": 78530 }, { "epoch": 0.32769066435229616, "grad_norm": 0.296251426959092, "learning_rate": 3.4940985795323456e-06, "loss": 0.0192, "step": 78535 }, { "epoch": 0.3277115270672864, "grad_norm": 0.6125139576434929, "learning_rate": 3.49398735599397e-06, "loss": 0.0249, "step": 78540 }, { "epoch": 0.3277323897822767, "grad_norm": 0.7309961829875484, "learning_rate": 3.4938761430762635e-06, "loss": 0.0362, "step": 78545 }, { "epoch": 0.327753252497267, "grad_norm": 0.6597277230887345, "learning_rate": 3.493764940777536e-06, "loss": 0.0185, "step": 78550 }, { "epoch": 0.32777411521225724, "grad_norm": 0.5993810642794342, "learning_rate": 3.4936537490960963e-06, "loss": 0.0297, "step": 78555 }, { "epoch": 0.32779497792724754, "grad_norm": 0.343391548829969, "learning_rate": 3.4935425680302572e-06, "loss": 0.0257, "step": 78560 }, { "epoch": 0.32781584064223784, "grad_norm": 1.0444776733127845, "learning_rate": 3.493431397578328e-06, "loss": 0.0325, "step": 78565 }, { "epoch": 0.3278367033572281, "grad_norm": 0.6849456687348525, "learning_rate": 3.493320237738621e-06, "loss": 0.0254, "step": 78570 }, { "epoch": 0.3278575660722184, "grad_norm": 0.6827041545649103, "learning_rate": 3.4932090885094473e-06, "loss": 0.0337, "step": 78575 }, { "epoch": 0.32787842878720863, "grad_norm": 1.353742885021153, "learning_rate": 3.493097949889119e-06, "loss": 0.0335, "step": 78580 }, { "epoch": 0.32789929150219893, "grad_norm": 0.5168251056850011, "learning_rate": 3.4929868218759496e-06, "loss": 0.0264, "step": 78585 }, { "epoch": 0.32792015421718923, "grad_norm": 0.7666174973801841, "learning_rate": 3.4928757044682504e-06, "loss": 0.028, "step": 78590 }, { "epoch": 0.3279410169321795, "grad_norm": 0.6880425810637354, "learning_rate": 3.492764597664336e-06, "loss": 0.0198, "step": 78595 }, { "epoch": 0.3279618796471698, "grad_norm": 0.7375305635395947, "learning_rate": 3.4926535014625183e-06, "loss": 0.0281, "step": 78600 }, { "epoch": 0.32798274236216, "grad_norm": 0.6952392344472349, "learning_rate": 3.4925424158611126e-06, "loss": 0.0425, "step": 78605 }, { "epoch": 0.3280036050771503, "grad_norm": 1.039999722862127, "learning_rate": 3.4924313408584336e-06, "loss": 0.028, "step": 78610 }, { "epoch": 0.3280244677921406, "grad_norm": 1.1207492135817871, "learning_rate": 3.4923202764527953e-06, "loss": 0.0353, "step": 78615 }, { "epoch": 0.32804533050713086, "grad_norm": 0.7530472989440061, "learning_rate": 3.492209222642513e-06, "loss": 0.0319, "step": 78620 }, { "epoch": 0.32806619322212116, "grad_norm": 0.7051126186883111, "learning_rate": 3.492098179425901e-06, "loss": 0.027, "step": 78625 }, { "epoch": 0.3280870559371114, "grad_norm": 0.5496291493517488, "learning_rate": 3.491987146801278e-06, "loss": 0.0231, "step": 78630 }, { "epoch": 0.3281079186521017, "grad_norm": 1.1104305182148924, "learning_rate": 3.4918761247669568e-06, "loss": 0.0311, "step": 78635 }, { "epoch": 0.328128781367092, "grad_norm": 0.6791718061823967, "learning_rate": 3.4917651133212573e-06, "loss": 0.032, "step": 78640 }, { "epoch": 0.32814964408208225, "grad_norm": 0.9276758885573363, "learning_rate": 3.4916541124624943e-06, "loss": 0.0289, "step": 78645 }, { "epoch": 0.32817050679707255, "grad_norm": 0.8840631158211971, "learning_rate": 3.4915431221889844e-06, "loss": 0.0251, "step": 78650 }, { "epoch": 0.32819136951206285, "grad_norm": 0.5781683084254752, "learning_rate": 3.491432142499048e-06, "loss": 0.0316, "step": 78655 }, { "epoch": 0.3282122322270531, "grad_norm": 1.1623235790547595, "learning_rate": 3.4913211733910005e-06, "loss": 0.0304, "step": 78660 }, { "epoch": 0.3282330949420434, "grad_norm": 0.6637583522513315, "learning_rate": 3.491210214863164e-06, "loss": 0.0331, "step": 78665 }, { "epoch": 0.32825395765703363, "grad_norm": 0.7497864728990312, "learning_rate": 3.491099266913853e-06, "loss": 0.0197, "step": 78670 }, { "epoch": 0.32827482037202393, "grad_norm": 0.5620820605315543, "learning_rate": 3.490988329541389e-06, "loss": 0.0249, "step": 78675 }, { "epoch": 0.32829568308701423, "grad_norm": 1.2019661186654405, "learning_rate": 3.490877402744092e-06, "loss": 0.0369, "step": 78680 }, { "epoch": 0.3283165458020045, "grad_norm": 0.6924181619452564, "learning_rate": 3.490766486520281e-06, "loss": 0.0232, "step": 78685 }, { "epoch": 0.3283374085169948, "grad_norm": 0.5867605796531463, "learning_rate": 3.4906555808682775e-06, "loss": 0.0303, "step": 78690 }, { "epoch": 0.328358271231985, "grad_norm": 0.9744238035381655, "learning_rate": 3.4905446857863994e-06, "loss": 0.0293, "step": 78695 }, { "epoch": 0.3283791339469753, "grad_norm": 1.0833030492441944, "learning_rate": 3.490433801272971e-06, "loss": 0.0311, "step": 78700 }, { "epoch": 0.3283999966619656, "grad_norm": 0.38650058219334193, "learning_rate": 3.4903229273263128e-06, "loss": 0.023, "step": 78705 }, { "epoch": 0.32842085937695586, "grad_norm": 1.2079048260577903, "learning_rate": 3.4902120639447457e-06, "loss": 0.0351, "step": 78710 }, { "epoch": 0.32844172209194616, "grad_norm": 1.05725974458455, "learning_rate": 3.490101211126593e-06, "loss": 0.0324, "step": 78715 }, { "epoch": 0.3284625848069364, "grad_norm": 1.3020269351877816, "learning_rate": 3.489990368870177e-06, "loss": 0.0327, "step": 78720 }, { "epoch": 0.3284834475219267, "grad_norm": 0.8119451583316248, "learning_rate": 3.4898795371738197e-06, "loss": 0.0286, "step": 78725 }, { "epoch": 0.328504310236917, "grad_norm": 0.4249470280282424, "learning_rate": 3.489768716035846e-06, "loss": 0.0179, "step": 78730 }, { "epoch": 0.32852517295190725, "grad_norm": 0.6756675978657847, "learning_rate": 3.4896579054545788e-06, "loss": 0.0245, "step": 78735 }, { "epoch": 0.32854603566689755, "grad_norm": 0.6575593922979097, "learning_rate": 3.4895471054283415e-06, "loss": 0.0269, "step": 78740 }, { "epoch": 0.32856689838188785, "grad_norm": 0.9465786641704425, "learning_rate": 3.4894363159554596e-06, "loss": 0.05, "step": 78745 }, { "epoch": 0.3285877610968781, "grad_norm": 0.4711260908712176, "learning_rate": 3.489325537034258e-06, "loss": 0.0265, "step": 78750 }, { "epoch": 0.3286086238118684, "grad_norm": 1.0097063321719222, "learning_rate": 3.4892147686630605e-06, "loss": 0.0307, "step": 78755 }, { "epoch": 0.32862948652685864, "grad_norm": 0.6285037297280431, "learning_rate": 3.489104010840194e-06, "loss": 0.0275, "step": 78760 }, { "epoch": 0.32865034924184894, "grad_norm": 0.5104340222768324, "learning_rate": 3.4889932635639844e-06, "loss": 0.0247, "step": 78765 }, { "epoch": 0.32867121195683924, "grad_norm": 0.5800903087169162, "learning_rate": 3.4888825268327576e-06, "loss": 0.0247, "step": 78770 }, { "epoch": 0.3286920746718295, "grad_norm": 0.9214271531719461, "learning_rate": 3.48877180064484e-06, "loss": 0.0314, "step": 78775 }, { "epoch": 0.3287129373868198, "grad_norm": 0.702341236390416, "learning_rate": 3.488661084998559e-06, "loss": 0.0242, "step": 78780 }, { "epoch": 0.32873380010181, "grad_norm": 0.8865970618786968, "learning_rate": 3.4885503798922417e-06, "loss": 0.025, "step": 78785 }, { "epoch": 0.3287546628168003, "grad_norm": 0.6970666760070132, "learning_rate": 3.488439685324217e-06, "loss": 0.0301, "step": 78790 }, { "epoch": 0.3287755255317906, "grad_norm": 0.38120635775124356, "learning_rate": 3.488329001292811e-06, "loss": 0.0201, "step": 78795 }, { "epoch": 0.32879638824678087, "grad_norm": 0.6439011904647884, "learning_rate": 3.488218327796355e-06, "loss": 0.0269, "step": 78800 }, { "epoch": 0.32881725096177117, "grad_norm": 0.5900771266483409, "learning_rate": 3.488107664833175e-06, "loss": 0.0317, "step": 78805 }, { "epoch": 0.3288381136767614, "grad_norm": 1.16539447648313, "learning_rate": 3.4879970124016025e-06, "loss": 0.0296, "step": 78810 }, { "epoch": 0.3288589763917517, "grad_norm": 0.5320901559933895, "learning_rate": 3.487886370499966e-06, "loss": 0.0339, "step": 78815 }, { "epoch": 0.328879839106742, "grad_norm": 1.9274184766152138, "learning_rate": 3.487775739126596e-06, "loss": 0.0255, "step": 78820 }, { "epoch": 0.32890070182173226, "grad_norm": 0.5909363891553362, "learning_rate": 3.487665118279823e-06, "loss": 0.0243, "step": 78825 }, { "epoch": 0.32892156453672255, "grad_norm": 0.6490767003487916, "learning_rate": 3.4875545079579766e-06, "loss": 0.03, "step": 78830 }, { "epoch": 0.32894242725171285, "grad_norm": 0.7283225967099133, "learning_rate": 3.48744390815939e-06, "loss": 0.0299, "step": 78835 }, { "epoch": 0.3289632899667031, "grad_norm": 0.7066857239203368, "learning_rate": 3.4873333188823915e-06, "loss": 0.0309, "step": 78840 }, { "epoch": 0.3289841526816934, "grad_norm": 0.8151443407692297, "learning_rate": 3.4872227401253166e-06, "loss": 0.0315, "step": 78845 }, { "epoch": 0.32900501539668364, "grad_norm": 0.7935431876168872, "learning_rate": 3.487112171886496e-06, "loss": 0.045, "step": 78850 }, { "epoch": 0.32902587811167394, "grad_norm": 1.1108810673933924, "learning_rate": 3.487001614164261e-06, "loss": 0.0223, "step": 78855 }, { "epoch": 0.32904674082666424, "grad_norm": 1.099867576552013, "learning_rate": 3.4868910669569473e-06, "loss": 0.0351, "step": 78860 }, { "epoch": 0.3290676035416545, "grad_norm": 0.4496591988680832, "learning_rate": 3.486780530262886e-06, "loss": 0.0187, "step": 78865 }, { "epoch": 0.3290884662566448, "grad_norm": 0.6849934285328836, "learning_rate": 3.4866700040804114e-06, "loss": 0.0308, "step": 78870 }, { "epoch": 0.32910932897163503, "grad_norm": 0.7569674990243254, "learning_rate": 3.4865594884078576e-06, "loss": 0.0325, "step": 78875 }, { "epoch": 0.32913019168662533, "grad_norm": 0.7139162327966976, "learning_rate": 3.4864489832435598e-06, "loss": 0.0267, "step": 78880 }, { "epoch": 0.32915105440161563, "grad_norm": 0.5944453686665324, "learning_rate": 3.486338488585852e-06, "loss": 0.0312, "step": 78885 }, { "epoch": 0.32917191711660587, "grad_norm": 0.7738169712891757, "learning_rate": 3.4862280044330694e-06, "loss": 0.0309, "step": 78890 }, { "epoch": 0.32919277983159617, "grad_norm": 0.8414956489118343, "learning_rate": 3.486117530783548e-06, "loss": 0.0312, "step": 78895 }, { "epoch": 0.3292136425465864, "grad_norm": 0.8004054792901022, "learning_rate": 3.4860070676356238e-06, "loss": 0.0342, "step": 78900 }, { "epoch": 0.3292345052615767, "grad_norm": 0.6701911937773001, "learning_rate": 3.4858966149876334e-06, "loss": 0.0249, "step": 78905 }, { "epoch": 0.329255367976567, "grad_norm": 0.6680203080822001, "learning_rate": 3.485786172837912e-06, "loss": 0.0268, "step": 78910 }, { "epoch": 0.32927623069155726, "grad_norm": 1.139680485502262, "learning_rate": 3.485675741184798e-06, "loss": 0.033, "step": 78915 }, { "epoch": 0.32929709340654756, "grad_norm": 1.1648237360706, "learning_rate": 3.4855653200266283e-06, "loss": 0.034, "step": 78920 }, { "epoch": 0.32931795612153786, "grad_norm": 1.281131258917151, "learning_rate": 3.485454909361741e-06, "loss": 0.0369, "step": 78925 }, { "epoch": 0.3293388188365281, "grad_norm": 1.0722255512489942, "learning_rate": 3.4853445091884736e-06, "loss": 0.0373, "step": 78930 }, { "epoch": 0.3293596815515184, "grad_norm": 0.5923935977990665, "learning_rate": 3.485234119505165e-06, "loss": 0.026, "step": 78935 }, { "epoch": 0.32938054426650865, "grad_norm": 1.075597794492804, "learning_rate": 3.485123740310154e-06, "loss": 0.0354, "step": 78940 }, { "epoch": 0.32940140698149895, "grad_norm": 0.42160414814289465, "learning_rate": 3.485013371601781e-06, "loss": 0.0283, "step": 78945 }, { "epoch": 0.32942226969648924, "grad_norm": 0.6799751278174807, "learning_rate": 3.484903013378384e-06, "loss": 0.0221, "step": 78950 }, { "epoch": 0.3294431324114795, "grad_norm": 0.6295912640932887, "learning_rate": 3.4847926656383037e-06, "loss": 0.0239, "step": 78955 }, { "epoch": 0.3294639951264698, "grad_norm": 1.1976106812896459, "learning_rate": 3.4846823283798796e-06, "loss": 0.0381, "step": 78960 }, { "epoch": 0.32948485784146003, "grad_norm": 0.8277118352478667, "learning_rate": 3.4845720016014544e-06, "loss": 0.0305, "step": 78965 }, { "epoch": 0.32950572055645033, "grad_norm": 0.7368950267462442, "learning_rate": 3.484461685301367e-06, "loss": 0.0252, "step": 78970 }, { "epoch": 0.32952658327144063, "grad_norm": 0.7723227507719915, "learning_rate": 3.484351379477961e-06, "loss": 0.031, "step": 78975 }, { "epoch": 0.3295474459864309, "grad_norm": 1.126014091331029, "learning_rate": 3.4842410841295765e-06, "loss": 0.0308, "step": 78980 }, { "epoch": 0.3295683087014212, "grad_norm": 0.9744764860055233, "learning_rate": 3.4841307992545557e-06, "loss": 0.0335, "step": 78985 }, { "epoch": 0.3295891714164114, "grad_norm": 0.8264731670164686, "learning_rate": 3.4840205248512426e-06, "loss": 0.0279, "step": 78990 }, { "epoch": 0.3296100341314017, "grad_norm": 1.5674069324412787, "learning_rate": 3.4839102609179793e-06, "loss": 0.0383, "step": 78995 }, { "epoch": 0.329630896846392, "grad_norm": 0.5423198061233355, "learning_rate": 3.4838000074531085e-06, "loss": 0.0319, "step": 79000 }, { "epoch": 0.32965175956138226, "grad_norm": 0.5815600340471501, "learning_rate": 3.4836897644549748e-06, "loss": 0.0199, "step": 79005 }, { "epoch": 0.32967262227637256, "grad_norm": 0.5171615038706535, "learning_rate": 3.4835795319219216e-06, "loss": 0.0267, "step": 79010 }, { "epoch": 0.32969348499136286, "grad_norm": 1.0958072131252343, "learning_rate": 3.4834693098522944e-06, "loss": 0.0296, "step": 79015 }, { "epoch": 0.3297143477063531, "grad_norm": 0.592655473364052, "learning_rate": 3.483359098244436e-06, "loss": 0.0232, "step": 79020 }, { "epoch": 0.3297352104213434, "grad_norm": 1.2554885068731814, "learning_rate": 3.4832488970966932e-06, "loss": 0.0298, "step": 79025 }, { "epoch": 0.32975607313633365, "grad_norm": 0.9390131606980789, "learning_rate": 3.483138706407412e-06, "loss": 0.0313, "step": 79030 }, { "epoch": 0.32977693585132395, "grad_norm": 0.36800754279204345, "learning_rate": 3.483028526174935e-06, "loss": 0.0309, "step": 79035 }, { "epoch": 0.32979779856631425, "grad_norm": 0.5261225816399161, "learning_rate": 3.4829183563976128e-06, "loss": 0.0313, "step": 79040 }, { "epoch": 0.3298186612813045, "grad_norm": 0.7221578626708549, "learning_rate": 3.4828081970737888e-06, "loss": 0.0284, "step": 79045 }, { "epoch": 0.3298395239962948, "grad_norm": 0.8371321899481882, "learning_rate": 3.482698048201811e-06, "loss": 0.0219, "step": 79050 }, { "epoch": 0.32986038671128504, "grad_norm": 1.0600240880473326, "learning_rate": 3.4825879097800275e-06, "loss": 0.0445, "step": 79055 }, { "epoch": 0.32988124942627534, "grad_norm": 1.3413951550119216, "learning_rate": 3.4824777818067844e-06, "loss": 0.047, "step": 79060 }, { "epoch": 0.32990211214126564, "grad_norm": 1.1764284099401918, "learning_rate": 3.4823676642804318e-06, "loss": 0.0305, "step": 79065 }, { "epoch": 0.3299229748562559, "grad_norm": 0.5951177421581921, "learning_rate": 3.4822575571993155e-06, "loss": 0.032, "step": 79070 }, { "epoch": 0.3299438375712462, "grad_norm": 0.5741433638795485, "learning_rate": 3.482147460561786e-06, "loss": 0.03, "step": 79075 }, { "epoch": 0.3299647002862364, "grad_norm": 0.9875399690122463, "learning_rate": 3.482037374366193e-06, "loss": 0.0329, "step": 79080 }, { "epoch": 0.3299855630012267, "grad_norm": 0.7704442248252193, "learning_rate": 3.4819272986108854e-06, "loss": 0.04, "step": 79085 }, { "epoch": 0.330006425716217, "grad_norm": 0.9193452155585582, "learning_rate": 3.4818172332942122e-06, "loss": 0.0288, "step": 79090 }, { "epoch": 0.33002728843120727, "grad_norm": 0.71929950707571, "learning_rate": 3.481707178414524e-06, "loss": 0.0345, "step": 79095 }, { "epoch": 0.33004815114619757, "grad_norm": 0.6371069191246356, "learning_rate": 3.4815971339701732e-06, "loss": 0.0204, "step": 79100 }, { "epoch": 0.33006901386118787, "grad_norm": 0.8692582152356959, "learning_rate": 3.481487099959508e-06, "loss": 0.0326, "step": 79105 }, { "epoch": 0.3300898765761781, "grad_norm": 0.7231486053019517, "learning_rate": 3.481377076380882e-06, "loss": 0.0247, "step": 79110 }, { "epoch": 0.3301107392911684, "grad_norm": 1.0811988285686365, "learning_rate": 3.481267063232646e-06, "loss": 0.0306, "step": 79115 }, { "epoch": 0.33013160200615865, "grad_norm": 0.46314602310922015, "learning_rate": 3.4811570605131515e-06, "loss": 0.028, "step": 79120 }, { "epoch": 0.33015246472114895, "grad_norm": 0.6518678692369483, "learning_rate": 3.4810470682207527e-06, "loss": 0.0192, "step": 79125 }, { "epoch": 0.33017332743613925, "grad_norm": 0.6103141975631509, "learning_rate": 3.4809370863538002e-06, "loss": 0.0317, "step": 79130 }, { "epoch": 0.3301941901511295, "grad_norm": 0.9280670970899793, "learning_rate": 3.4808271149106486e-06, "loss": 0.0383, "step": 79135 }, { "epoch": 0.3302150528661198, "grad_norm": 0.9328464636713256, "learning_rate": 3.4807171538896504e-06, "loss": 0.027, "step": 79140 }, { "epoch": 0.33023591558111004, "grad_norm": 1.0177866781622853, "learning_rate": 3.4806072032891615e-06, "loss": 0.0301, "step": 79145 }, { "epoch": 0.33025677829610034, "grad_norm": 0.4035330326177192, "learning_rate": 3.480497263107534e-06, "loss": 0.027, "step": 79150 }, { "epoch": 0.33027764101109064, "grad_norm": 0.812138556411648, "learning_rate": 3.4803873333431236e-06, "loss": 0.0287, "step": 79155 }, { "epoch": 0.3302985037260809, "grad_norm": 0.6290291259057774, "learning_rate": 3.480277413994285e-06, "loss": 0.0253, "step": 79160 }, { "epoch": 0.3303193664410712, "grad_norm": 0.8883953336659879, "learning_rate": 3.4801675050593735e-06, "loss": 0.0236, "step": 79165 }, { "epoch": 0.3303402291560614, "grad_norm": 0.5620574097890628, "learning_rate": 3.4800576065367454e-06, "loss": 0.0365, "step": 79170 }, { "epoch": 0.3303610918710517, "grad_norm": 0.6335968968379804, "learning_rate": 3.479947718424756e-06, "loss": 0.0344, "step": 79175 }, { "epoch": 0.330381954586042, "grad_norm": 1.701000929103523, "learning_rate": 3.479837840721762e-06, "loss": 0.0296, "step": 79180 }, { "epoch": 0.33040281730103227, "grad_norm": 1.2352032227909666, "learning_rate": 3.4797279734261203e-06, "loss": 0.0291, "step": 79185 }, { "epoch": 0.33042368001602257, "grad_norm": 0.4719894146352189, "learning_rate": 3.479618116536188e-06, "loss": 0.0252, "step": 79190 }, { "epoch": 0.33044454273101287, "grad_norm": 0.7165346219732436, "learning_rate": 3.4795082700503223e-06, "loss": 0.0234, "step": 79195 }, { "epoch": 0.3304654054460031, "grad_norm": 0.8569947558803488, "learning_rate": 3.479398433966882e-06, "loss": 0.0259, "step": 79200 }, { "epoch": 0.3304862681609934, "grad_norm": 0.6415966115673175, "learning_rate": 3.479288608284225e-06, "loss": 0.0428, "step": 79205 }, { "epoch": 0.33050713087598366, "grad_norm": 0.7771829830458533, "learning_rate": 3.479178793000709e-06, "loss": 0.0284, "step": 79210 }, { "epoch": 0.33052799359097396, "grad_norm": 0.46802256709312007, "learning_rate": 3.479068988114694e-06, "loss": 0.0292, "step": 79215 }, { "epoch": 0.33054885630596426, "grad_norm": 0.48209143312175723, "learning_rate": 3.47895919362454e-06, "loss": 0.0298, "step": 79220 }, { "epoch": 0.3305697190209545, "grad_norm": 0.9590194105949034, "learning_rate": 3.4788494095286045e-06, "loss": 0.0362, "step": 79225 }, { "epoch": 0.3305905817359448, "grad_norm": 1.1537074799608402, "learning_rate": 3.47873963582525e-06, "loss": 0.0316, "step": 79230 }, { "epoch": 0.33061144445093504, "grad_norm": 0.5365361871992118, "learning_rate": 3.4786298725128352e-06, "loss": 0.0351, "step": 79235 }, { "epoch": 0.33063230716592534, "grad_norm": 0.65262550438904, "learning_rate": 3.4785201195897218e-06, "loss": 0.0242, "step": 79240 }, { "epoch": 0.33065316988091564, "grad_norm": 1.123791586964935, "learning_rate": 3.4784103770542705e-06, "loss": 0.0469, "step": 79245 }, { "epoch": 0.3306740325959059, "grad_norm": 0.6784795511703215, "learning_rate": 3.478300644904843e-06, "loss": 0.0195, "step": 79250 }, { "epoch": 0.3306948953108962, "grad_norm": 2.7540916366358665, "learning_rate": 3.478190923139801e-06, "loss": 0.0414, "step": 79255 }, { "epoch": 0.33071575802588643, "grad_norm": 0.8167356539327107, "learning_rate": 3.478081211757506e-06, "loss": 0.0232, "step": 79260 }, { "epoch": 0.33073662074087673, "grad_norm": 1.1798823494466628, "learning_rate": 3.4779715107563235e-06, "loss": 0.0263, "step": 79265 }, { "epoch": 0.33075748345586703, "grad_norm": 0.8032853811536322, "learning_rate": 3.477861820134613e-06, "loss": 0.0305, "step": 79270 }, { "epoch": 0.3307783461708573, "grad_norm": 1.2099663466340822, "learning_rate": 3.4777521398907404e-06, "loss": 0.0261, "step": 79275 }, { "epoch": 0.3307992088858476, "grad_norm": 0.705811855378828, "learning_rate": 3.477642470023067e-06, "loss": 0.0327, "step": 79280 }, { "epoch": 0.3308200716008379, "grad_norm": 1.12572627734833, "learning_rate": 3.4775328105299593e-06, "loss": 0.0329, "step": 79285 }, { "epoch": 0.3308409343158281, "grad_norm": 0.6506658906236046, "learning_rate": 3.47742316140978e-06, "loss": 0.0295, "step": 79290 }, { "epoch": 0.3308617970308184, "grad_norm": 0.5403103739267532, "learning_rate": 3.477313522660895e-06, "loss": 0.0272, "step": 79295 }, { "epoch": 0.33088265974580866, "grad_norm": 1.1061404385156637, "learning_rate": 3.4772038942816684e-06, "loss": 0.0217, "step": 79300 }, { "epoch": 0.33090352246079896, "grad_norm": 0.6186328401212805, "learning_rate": 3.477094276270466e-06, "loss": 0.0264, "step": 79305 }, { "epoch": 0.33092438517578926, "grad_norm": 1.8772288430373734, "learning_rate": 3.4769846686256543e-06, "loss": 0.0344, "step": 79310 }, { "epoch": 0.3309452478907795, "grad_norm": 0.5223980058982025, "learning_rate": 3.4768750713455983e-06, "loss": 0.026, "step": 79315 }, { "epoch": 0.3309661106057698, "grad_norm": 0.5937020316369083, "learning_rate": 3.4767654844286656e-06, "loss": 0.0277, "step": 79320 }, { "epoch": 0.33098697332076005, "grad_norm": 0.7970336466290633, "learning_rate": 3.4766559078732236e-06, "loss": 0.0328, "step": 79325 }, { "epoch": 0.33100783603575035, "grad_norm": 1.2862650284149668, "learning_rate": 3.4765463416776377e-06, "loss": 0.031, "step": 79330 }, { "epoch": 0.33102869875074065, "grad_norm": 0.7991765909160611, "learning_rate": 3.4764367858402777e-06, "loss": 0.0298, "step": 79335 }, { "epoch": 0.3310495614657309, "grad_norm": 1.0256932757322264, "learning_rate": 3.4763272403595106e-06, "loss": 0.0287, "step": 79340 }, { "epoch": 0.3310704241807212, "grad_norm": 1.1059707255248803, "learning_rate": 3.4762177052337045e-06, "loss": 0.0256, "step": 79345 }, { "epoch": 0.33109128689571143, "grad_norm": 0.828773535890462, "learning_rate": 3.4761081804612286e-06, "loss": 0.0289, "step": 79350 }, { "epoch": 0.33111214961070173, "grad_norm": 1.1043176078162489, "learning_rate": 3.4759986660404516e-06, "loss": 0.0336, "step": 79355 }, { "epoch": 0.33113301232569203, "grad_norm": 0.9042018966012046, "learning_rate": 3.4758891619697426e-06, "loss": 0.0321, "step": 79360 }, { "epoch": 0.3311538750406823, "grad_norm": 0.7516272267620846, "learning_rate": 3.475779668247473e-06, "loss": 0.0414, "step": 79365 }, { "epoch": 0.3311747377556726, "grad_norm": 0.8581820023941235, "learning_rate": 3.4756701848720114e-06, "loss": 0.0283, "step": 79370 }, { "epoch": 0.3311956004706629, "grad_norm": 0.9006645281502716, "learning_rate": 3.4755607118417294e-06, "loss": 0.0304, "step": 79375 }, { "epoch": 0.3312164631856531, "grad_norm": 0.61177710469875, "learning_rate": 3.475451249154997e-06, "loss": 0.0187, "step": 79380 }, { "epoch": 0.3312373259006434, "grad_norm": 0.4674928839917617, "learning_rate": 3.475341796810186e-06, "loss": 0.0266, "step": 79385 }, { "epoch": 0.33125818861563366, "grad_norm": 0.7930889569735536, "learning_rate": 3.475232354805668e-06, "loss": 0.0332, "step": 79390 }, { "epoch": 0.33127905133062396, "grad_norm": 0.8283161409845222, "learning_rate": 3.475122923139815e-06, "loss": 0.025, "step": 79395 }, { "epoch": 0.33129991404561426, "grad_norm": 0.9575776874692273, "learning_rate": 3.475013501810999e-06, "loss": 0.0348, "step": 79400 }, { "epoch": 0.3313207767606045, "grad_norm": 0.8178502432573496, "learning_rate": 3.4749040908175924e-06, "loss": 0.0263, "step": 79405 }, { "epoch": 0.3313416394755948, "grad_norm": 0.7499807980486052, "learning_rate": 3.4747946901579693e-06, "loss": 0.0278, "step": 79410 }, { "epoch": 0.33136250219058505, "grad_norm": 0.58976544633396, "learning_rate": 3.4746852998305023e-06, "loss": 0.0321, "step": 79415 }, { "epoch": 0.33138336490557535, "grad_norm": 0.7248921246051505, "learning_rate": 3.4745759198335653e-06, "loss": 0.0254, "step": 79420 }, { "epoch": 0.33140422762056565, "grad_norm": 0.596946808529056, "learning_rate": 3.4744665501655326e-06, "loss": 0.0308, "step": 79425 }, { "epoch": 0.3314250903355559, "grad_norm": 0.7902698818696757, "learning_rate": 3.4743571908247793e-06, "loss": 0.0291, "step": 79430 }, { "epoch": 0.3314459530505462, "grad_norm": 0.8007258838205105, "learning_rate": 3.4742478418096787e-06, "loss": 0.0273, "step": 79435 }, { "epoch": 0.33146681576553644, "grad_norm": 0.6359179987362564, "learning_rate": 3.474138503118607e-06, "loss": 0.0387, "step": 79440 }, { "epoch": 0.33148767848052674, "grad_norm": 0.7383716974994761, "learning_rate": 3.47402917474994e-06, "loss": 0.0445, "step": 79445 }, { "epoch": 0.33150854119551704, "grad_norm": 0.508666316488805, "learning_rate": 3.4739198567020526e-06, "loss": 0.021, "step": 79450 }, { "epoch": 0.3315294039105073, "grad_norm": 0.778008800020767, "learning_rate": 3.4738105489733216e-06, "loss": 0.0311, "step": 79455 }, { "epoch": 0.3315502666254976, "grad_norm": 0.8437431187199491, "learning_rate": 3.4737012515621244e-06, "loss": 0.0294, "step": 79460 }, { "epoch": 0.3315711293404879, "grad_norm": 0.24154520202491003, "learning_rate": 3.473591964466837e-06, "loss": 0.02, "step": 79465 }, { "epoch": 0.3315919920554781, "grad_norm": 0.4869957437206492, "learning_rate": 3.473482687685837e-06, "loss": 0.0284, "step": 79470 }, { "epoch": 0.3316128547704684, "grad_norm": 0.8797891811969328, "learning_rate": 3.473373421217502e-06, "loss": 0.0316, "step": 79475 }, { "epoch": 0.33163371748545867, "grad_norm": 0.8076725154900695, "learning_rate": 3.47326416506021e-06, "loss": 0.0442, "step": 79480 }, { "epoch": 0.33165458020044897, "grad_norm": 0.7816932328056405, "learning_rate": 3.4731549192123403e-06, "loss": 0.0282, "step": 79485 }, { "epoch": 0.33167544291543927, "grad_norm": 0.6022957677976812, "learning_rate": 3.4730456836722704e-06, "loss": 0.028, "step": 79490 }, { "epoch": 0.3316963056304295, "grad_norm": 1.3643241051583228, "learning_rate": 3.47293645843838e-06, "loss": 0.0283, "step": 79495 }, { "epoch": 0.3317171683454198, "grad_norm": 0.6802135814489089, "learning_rate": 3.472827243509049e-06, "loss": 0.0304, "step": 79500 }, { "epoch": 0.33173803106041005, "grad_norm": 0.8513934108876323, "learning_rate": 3.4727180388826564e-06, "loss": 0.0287, "step": 79505 }, { "epoch": 0.33175889377540035, "grad_norm": 0.5136830263754242, "learning_rate": 3.4726088445575835e-06, "loss": 0.0246, "step": 79510 }, { "epoch": 0.33177975649039065, "grad_norm": 0.7061774024448815, "learning_rate": 3.4724996605322093e-06, "loss": 0.0338, "step": 79515 }, { "epoch": 0.3318006192053809, "grad_norm": 0.4538195876269579, "learning_rate": 3.472390486804916e-06, "loss": 0.0217, "step": 79520 }, { "epoch": 0.3318214819203712, "grad_norm": 0.5688936375937934, "learning_rate": 3.4722813233740844e-06, "loss": 0.0287, "step": 79525 }, { "epoch": 0.33184234463536144, "grad_norm": 1.3525088070625941, "learning_rate": 3.4721721702380968e-06, "loss": 0.0253, "step": 79530 }, { "epoch": 0.33186320735035174, "grad_norm": 1.3241636627030229, "learning_rate": 3.4720630273953336e-06, "loss": 0.0232, "step": 79535 }, { "epoch": 0.33188407006534204, "grad_norm": 0.9261727634225108, "learning_rate": 3.4719538948441782e-06, "loss": 0.0336, "step": 79540 }, { "epoch": 0.3319049327803323, "grad_norm": 0.8910941517130321, "learning_rate": 3.471844772583014e-06, "loss": 0.0341, "step": 79545 }, { "epoch": 0.3319257954953226, "grad_norm": 1.192443612143626, "learning_rate": 3.4717356606102228e-06, "loss": 0.0267, "step": 79550 }, { "epoch": 0.3319466582103129, "grad_norm": 0.55302541848816, "learning_rate": 3.4716265589241887e-06, "loss": 0.0256, "step": 79555 }, { "epoch": 0.33196752092530313, "grad_norm": 1.1885690511000289, "learning_rate": 3.471517467523295e-06, "loss": 0.0289, "step": 79560 }, { "epoch": 0.3319883836402934, "grad_norm": 0.8955372729395763, "learning_rate": 3.4714083864059258e-06, "loss": 0.0231, "step": 79565 }, { "epoch": 0.33200924635528367, "grad_norm": 0.8792431251965199, "learning_rate": 3.471299315570466e-06, "loss": 0.0333, "step": 79570 }, { "epoch": 0.33203010907027397, "grad_norm": 0.5929391679339212, "learning_rate": 3.4711902550152998e-06, "loss": 0.0338, "step": 79575 }, { "epoch": 0.33205097178526427, "grad_norm": 1.7472874333927324, "learning_rate": 3.471081204738813e-06, "loss": 0.0348, "step": 79580 }, { "epoch": 0.3320718345002545, "grad_norm": 1.2632022708539523, "learning_rate": 3.4709721647393917e-06, "loss": 0.0349, "step": 79585 }, { "epoch": 0.3320926972152448, "grad_norm": 0.8631896975116559, "learning_rate": 3.4708631350154205e-06, "loss": 0.0398, "step": 79590 }, { "epoch": 0.33211355993023506, "grad_norm": 0.7088615170277386, "learning_rate": 3.470754115565287e-06, "loss": 0.0278, "step": 79595 }, { "epoch": 0.33213442264522536, "grad_norm": 0.8198705792676912, "learning_rate": 3.4706451063873755e-06, "loss": 0.0357, "step": 79600 }, { "epoch": 0.33215528536021566, "grad_norm": 0.7292733421219678, "learning_rate": 3.4705361074800753e-06, "loss": 0.0319, "step": 79605 }, { "epoch": 0.3321761480752059, "grad_norm": 0.8231885163597221, "learning_rate": 3.470427118841772e-06, "loss": 0.0318, "step": 79610 }, { "epoch": 0.3321970107901962, "grad_norm": 1.1785227814213737, "learning_rate": 3.470318140470855e-06, "loss": 0.0366, "step": 79615 }, { "epoch": 0.33221787350518645, "grad_norm": 1.8117350180399563, "learning_rate": 3.4702091723657116e-06, "loss": 0.0275, "step": 79620 }, { "epoch": 0.33223873622017674, "grad_norm": 0.97757976594119, "learning_rate": 3.470100214524729e-06, "loss": 0.025, "step": 79625 }, { "epoch": 0.33225959893516704, "grad_norm": 0.8384759275093266, "learning_rate": 3.4699912669462977e-06, "loss": 0.0271, "step": 79630 }, { "epoch": 0.3322804616501573, "grad_norm": 0.6737331629862936, "learning_rate": 3.4698823296288053e-06, "loss": 0.0302, "step": 79635 }, { "epoch": 0.3323013243651476, "grad_norm": 0.9214445142767863, "learning_rate": 3.4697734025706432e-06, "loss": 0.035, "step": 79640 }, { "epoch": 0.3323221870801379, "grad_norm": 0.807864589342783, "learning_rate": 3.469664485770199e-06, "loss": 0.0224, "step": 79645 }, { "epoch": 0.33234304979512813, "grad_norm": 0.7145020388227421, "learning_rate": 3.469555579225864e-06, "loss": 0.0254, "step": 79650 }, { "epoch": 0.33236391251011843, "grad_norm": 0.4505577007301368, "learning_rate": 3.4694466829360285e-06, "loss": 0.0194, "step": 79655 }, { "epoch": 0.3323847752251087, "grad_norm": 1.0763311772594597, "learning_rate": 3.4693377968990826e-06, "loss": 0.0338, "step": 79660 }, { "epoch": 0.332405637940099, "grad_norm": 0.8350125667720077, "learning_rate": 3.469228921113419e-06, "loss": 0.0325, "step": 79665 }, { "epoch": 0.3324265006550893, "grad_norm": 0.5782160177012143, "learning_rate": 3.4691200555774287e-06, "loss": 0.0315, "step": 79670 }, { "epoch": 0.3324473633700795, "grad_norm": 0.5728647596103662, "learning_rate": 3.469011200289503e-06, "loss": 0.0317, "step": 79675 }, { "epoch": 0.3324682260850698, "grad_norm": 0.6032871232269476, "learning_rate": 3.4689023552480342e-06, "loss": 0.0227, "step": 79680 }, { "epoch": 0.33248908880006006, "grad_norm": 0.42275860480868144, "learning_rate": 3.468793520451416e-06, "loss": 0.0247, "step": 79685 }, { "epoch": 0.33250995151505036, "grad_norm": 0.9149208552784815, "learning_rate": 3.4686846958980404e-06, "loss": 0.032, "step": 79690 }, { "epoch": 0.33253081423004066, "grad_norm": 0.9552203998827216, "learning_rate": 3.4685758815863007e-06, "loss": 0.0309, "step": 79695 }, { "epoch": 0.3325516769450309, "grad_norm": 1.4765364067822948, "learning_rate": 3.4684670775145914e-06, "loss": 0.0238, "step": 79700 }, { "epoch": 0.3325725396600212, "grad_norm": 0.9289935182340371, "learning_rate": 3.468358283681305e-06, "loss": 0.0463, "step": 79705 }, { "epoch": 0.33259340237501145, "grad_norm": 0.4365115131853772, "learning_rate": 3.4682495000848376e-06, "loss": 0.0292, "step": 79710 }, { "epoch": 0.33261426509000175, "grad_norm": 0.7519147954791263, "learning_rate": 3.4681407267235834e-06, "loss": 0.0358, "step": 79715 }, { "epoch": 0.33263512780499205, "grad_norm": 1.476379235220901, "learning_rate": 3.468031963595937e-06, "loss": 0.0362, "step": 79720 }, { "epoch": 0.3326559905199823, "grad_norm": 0.7269098761160525, "learning_rate": 3.4679232107002934e-06, "loss": 0.0265, "step": 79725 }, { "epoch": 0.3326768532349726, "grad_norm": 0.5426936433966824, "learning_rate": 3.4678144680350503e-06, "loss": 0.0307, "step": 79730 }, { "epoch": 0.3326977159499629, "grad_norm": 0.8080226927016817, "learning_rate": 3.467705735598602e-06, "loss": 0.026, "step": 79735 }, { "epoch": 0.33271857866495314, "grad_norm": 0.8369273417577368, "learning_rate": 3.4675970133893456e-06, "loss": 0.0401, "step": 79740 }, { "epoch": 0.33273944137994343, "grad_norm": 0.6660199232527381, "learning_rate": 3.467488301405678e-06, "loss": 0.0237, "step": 79745 }, { "epoch": 0.3327603040949337, "grad_norm": 0.8881543245673844, "learning_rate": 3.4673795996459967e-06, "loss": 0.027, "step": 79750 }, { "epoch": 0.332781166809924, "grad_norm": 1.477012424633569, "learning_rate": 3.4672709081086982e-06, "loss": 0.0314, "step": 79755 }, { "epoch": 0.3328020295249143, "grad_norm": 0.5365990941141195, "learning_rate": 3.4671622267921825e-06, "loss": 0.0207, "step": 79760 }, { "epoch": 0.3328228922399045, "grad_norm": 0.767612052275286, "learning_rate": 3.467053555694845e-06, "loss": 0.0255, "step": 79765 }, { "epoch": 0.3328437549548948, "grad_norm": 0.7006699874108723, "learning_rate": 3.4669448948150865e-06, "loss": 0.0221, "step": 79770 }, { "epoch": 0.33286461766988507, "grad_norm": 0.9598091693296047, "learning_rate": 3.466836244151305e-06, "loss": 0.0279, "step": 79775 }, { "epoch": 0.33288548038487537, "grad_norm": 0.9209409143624556, "learning_rate": 3.4667276037019003e-06, "loss": 0.0246, "step": 79780 }, { "epoch": 0.33290634309986566, "grad_norm": 0.6202729557983648, "learning_rate": 3.4666189734652715e-06, "loss": 0.0294, "step": 79785 }, { "epoch": 0.3329272058148559, "grad_norm": 0.831144018923096, "learning_rate": 3.4665103534398193e-06, "loss": 0.0267, "step": 79790 }, { "epoch": 0.3329480685298462, "grad_norm": 0.9215436511563693, "learning_rate": 3.466401743623943e-06, "loss": 0.0233, "step": 79795 }, { "epoch": 0.33296893124483645, "grad_norm": 0.6451915904600278, "learning_rate": 3.4662931440160448e-06, "loss": 0.0296, "step": 79800 }, { "epoch": 0.33298979395982675, "grad_norm": 0.8502824403899681, "learning_rate": 3.4661845546145246e-06, "loss": 0.0255, "step": 79805 }, { "epoch": 0.33301065667481705, "grad_norm": 0.7779908124903516, "learning_rate": 3.466075975417784e-06, "loss": 0.0365, "step": 79810 }, { "epoch": 0.3330315193898073, "grad_norm": 1.2230764401044332, "learning_rate": 3.4659674064242255e-06, "loss": 0.0323, "step": 79815 }, { "epoch": 0.3330523821047976, "grad_norm": 0.4839522094199271, "learning_rate": 3.4658588476322504e-06, "loss": 0.0238, "step": 79820 }, { "epoch": 0.3330732448197879, "grad_norm": 0.5707578996551339, "learning_rate": 3.465750299040261e-06, "loss": 0.0282, "step": 79825 }, { "epoch": 0.33309410753477814, "grad_norm": 0.7470844227493671, "learning_rate": 3.4656417606466607e-06, "loss": 0.0279, "step": 79830 }, { "epoch": 0.33311497024976844, "grad_norm": 0.9431679522030666, "learning_rate": 3.4655332324498524e-06, "loss": 0.0272, "step": 79835 }, { "epoch": 0.3331358329647587, "grad_norm": 0.6975807129576437, "learning_rate": 3.46542471444824e-06, "loss": 0.0333, "step": 79840 }, { "epoch": 0.333156695679749, "grad_norm": 0.9586574820579289, "learning_rate": 3.4653162066402263e-06, "loss": 0.0273, "step": 79845 }, { "epoch": 0.3331775583947393, "grad_norm": 0.6342126169678359, "learning_rate": 3.465207709024217e-06, "loss": 0.0311, "step": 79850 }, { "epoch": 0.3331984211097295, "grad_norm": 0.6904621593848621, "learning_rate": 3.4650992215986157e-06, "loss": 0.0251, "step": 79855 }, { "epoch": 0.3332192838247198, "grad_norm": 0.5417401342749042, "learning_rate": 3.4649907443618277e-06, "loss": 0.0269, "step": 79860 }, { "epoch": 0.33324014653971007, "grad_norm": 0.6983013030382677, "learning_rate": 3.4648822773122583e-06, "loss": 0.0247, "step": 79865 }, { "epoch": 0.33326100925470037, "grad_norm": 1.2322088906365054, "learning_rate": 3.4647738204483123e-06, "loss": 0.0288, "step": 79870 }, { "epoch": 0.33328187196969067, "grad_norm": 0.6210185055248804, "learning_rate": 3.4646653737683966e-06, "loss": 0.022, "step": 79875 }, { "epoch": 0.3333027346846809, "grad_norm": 0.9224932702665406, "learning_rate": 3.464556937270917e-06, "loss": 0.0242, "step": 79880 }, { "epoch": 0.3333235973996712, "grad_norm": 0.3859585679600419, "learning_rate": 3.4644485109542803e-06, "loss": 0.0259, "step": 79885 }, { "epoch": 0.33334446011466146, "grad_norm": 0.9191270421793842, "learning_rate": 3.464340094816894e-06, "loss": 0.0396, "step": 79890 }, { "epoch": 0.33336532282965176, "grad_norm": 1.6935780995650744, "learning_rate": 3.4642316888571643e-06, "loss": 0.0488, "step": 79895 }, { "epoch": 0.33338618554464206, "grad_norm": 0.7798454994857743, "learning_rate": 3.4641232930734997e-06, "loss": 0.0318, "step": 79900 }, { "epoch": 0.3334070482596323, "grad_norm": 0.6490380423647184, "learning_rate": 3.4640149074643085e-06, "loss": 0.0328, "step": 79905 }, { "epoch": 0.3334279109746226, "grad_norm": 0.5717412633523183, "learning_rate": 3.4639065320279986e-06, "loss": 0.0282, "step": 79910 }, { "epoch": 0.3334487736896129, "grad_norm": 0.7528428735632127, "learning_rate": 3.4637981667629794e-06, "loss": 0.0286, "step": 79915 }, { "epoch": 0.33346963640460314, "grad_norm": 0.4677837816098194, "learning_rate": 3.4636898116676597e-06, "loss": 0.0223, "step": 79920 }, { "epoch": 0.33349049911959344, "grad_norm": 0.897158259406063, "learning_rate": 3.463581466740448e-06, "loss": 0.0367, "step": 79925 }, { "epoch": 0.3335113618345837, "grad_norm": 0.6425306458059756, "learning_rate": 3.4634731319797554e-06, "loss": 0.0317, "step": 79930 }, { "epoch": 0.333532224549574, "grad_norm": 1.0012701671910273, "learning_rate": 3.463364807383991e-06, "loss": 0.0352, "step": 79935 }, { "epoch": 0.3335530872645643, "grad_norm": 2.3264609898226096, "learning_rate": 3.4632564929515662e-06, "loss": 0.0224, "step": 79940 }, { "epoch": 0.33357394997955453, "grad_norm": 0.7847019439810079, "learning_rate": 3.463148188680891e-06, "loss": 0.0224, "step": 79945 }, { "epoch": 0.33359481269454483, "grad_norm": 0.5696212508557191, "learning_rate": 3.4630398945703774e-06, "loss": 0.0237, "step": 79950 }, { "epoch": 0.3336156754095351, "grad_norm": 0.6331095533236085, "learning_rate": 3.462931610618437e-06, "loss": 0.0208, "step": 79955 }, { "epoch": 0.3336365381245254, "grad_norm": 1.0258910798200407, "learning_rate": 3.4628233368234804e-06, "loss": 0.0328, "step": 79960 }, { "epoch": 0.33365740083951567, "grad_norm": 0.9466676417553653, "learning_rate": 3.462715073183921e-06, "loss": 0.0306, "step": 79965 }, { "epoch": 0.3336782635545059, "grad_norm": 1.0397952843340348, "learning_rate": 3.462606819698171e-06, "loss": 0.0293, "step": 79970 }, { "epoch": 0.3336991262694962, "grad_norm": 0.5390620851494639, "learning_rate": 3.462498576364644e-06, "loss": 0.0266, "step": 79975 }, { "epoch": 0.33371998898448646, "grad_norm": 1.225294059286554, "learning_rate": 3.462390343181752e-06, "loss": 0.0364, "step": 79980 }, { "epoch": 0.33374085169947676, "grad_norm": 0.5966117326678251, "learning_rate": 3.462282120147909e-06, "loss": 0.0226, "step": 79985 }, { "epoch": 0.33376171441446706, "grad_norm": 0.4519233347864057, "learning_rate": 3.4621739072615303e-06, "loss": 0.0348, "step": 79990 }, { "epoch": 0.3337825771294573, "grad_norm": 0.6144590993866236, "learning_rate": 3.462065704521028e-06, "loss": 0.0243, "step": 79995 }, { "epoch": 0.3338034398444476, "grad_norm": 0.7116638631562389, "learning_rate": 3.4619575119248187e-06, "loss": 0.0238, "step": 80000 }, { "epoch": 0.3338243025594379, "grad_norm": 0.6701700137252379, "learning_rate": 3.4618493294713164e-06, "loss": 0.0308, "step": 80005 }, { "epoch": 0.33384516527442815, "grad_norm": 1.0485979343743483, "learning_rate": 3.461741157158937e-06, "loss": 0.0233, "step": 80010 }, { "epoch": 0.33386602798941845, "grad_norm": 0.745358603281882, "learning_rate": 3.461632994986096e-06, "loss": 0.0324, "step": 80015 }, { "epoch": 0.3338868907044087, "grad_norm": 0.8762155982987885, "learning_rate": 3.461524842951209e-06, "loss": 0.0271, "step": 80020 }, { "epoch": 0.333907753419399, "grad_norm": 0.8988139856019047, "learning_rate": 3.4614167010526926e-06, "loss": 0.0279, "step": 80025 }, { "epoch": 0.3339286161343893, "grad_norm": 0.8975753405498792, "learning_rate": 3.4613085692889635e-06, "loss": 0.0305, "step": 80030 }, { "epoch": 0.33394947884937953, "grad_norm": 1.080318019147228, "learning_rate": 3.4612004476584394e-06, "loss": 0.0331, "step": 80035 }, { "epoch": 0.33397034156436983, "grad_norm": 0.5791308295672853, "learning_rate": 3.4610923361595373e-06, "loss": 0.0227, "step": 80040 }, { "epoch": 0.3339912042793601, "grad_norm": 1.228589746425869, "learning_rate": 3.4609842347906754e-06, "loss": 0.0319, "step": 80045 }, { "epoch": 0.3340120669943504, "grad_norm": 0.886718378891008, "learning_rate": 3.460876143550271e-06, "loss": 0.0399, "step": 80050 }, { "epoch": 0.3340329297093407, "grad_norm": 0.4893017522148438, "learning_rate": 3.460768062436742e-06, "loss": 0.0336, "step": 80055 }, { "epoch": 0.3340537924243309, "grad_norm": 0.739324353720535, "learning_rate": 3.4606599914485094e-06, "loss": 0.0316, "step": 80060 }, { "epoch": 0.3340746551393212, "grad_norm": 0.8422773881648522, "learning_rate": 3.460551930583991e-06, "loss": 0.0273, "step": 80065 }, { "epoch": 0.33409551785431146, "grad_norm": 1.4846999825717193, "learning_rate": 3.4604438798416064e-06, "loss": 0.0315, "step": 80070 }, { "epoch": 0.33411638056930176, "grad_norm": 0.6763090058968331, "learning_rate": 3.4603358392197756e-06, "loss": 0.0227, "step": 80075 }, { "epoch": 0.33413724328429206, "grad_norm": 0.9009603553448177, "learning_rate": 3.4602278087169185e-06, "loss": 0.0236, "step": 80080 }, { "epoch": 0.3341581059992823, "grad_norm": 0.49758466138014346, "learning_rate": 3.4601197883314558e-06, "loss": 0.0387, "step": 80085 }, { "epoch": 0.3341789687142726, "grad_norm": 0.6475655887965062, "learning_rate": 3.460011778061808e-06, "loss": 0.024, "step": 80090 }, { "epoch": 0.3341998314292629, "grad_norm": 0.5064214882271018, "learning_rate": 3.4599037779063983e-06, "loss": 0.0241, "step": 80095 }, { "epoch": 0.33422069414425315, "grad_norm": 0.4586069379859568, "learning_rate": 3.4597957878636458e-06, "loss": 0.0319, "step": 80100 }, { "epoch": 0.33424155685924345, "grad_norm": 1.0901255937306629, "learning_rate": 3.4596878079319734e-06, "loss": 0.0361, "step": 80105 }, { "epoch": 0.3342624195742337, "grad_norm": 1.0175614123250358, "learning_rate": 3.4595798381098032e-06, "loss": 0.0343, "step": 80110 }, { "epoch": 0.334283282289224, "grad_norm": 0.6067712338762402, "learning_rate": 3.4594718783955584e-06, "loss": 0.0239, "step": 80115 }, { "epoch": 0.3343041450042143, "grad_norm": 1.2602542925015163, "learning_rate": 3.4593639287876617e-06, "loss": 0.0251, "step": 80120 }, { "epoch": 0.33432500771920454, "grad_norm": 0.7161552159357586, "learning_rate": 3.4592559892845353e-06, "loss": 0.0294, "step": 80125 }, { "epoch": 0.33434587043419484, "grad_norm": 0.535311891779868, "learning_rate": 3.4591480598846044e-06, "loss": 0.0264, "step": 80130 }, { "epoch": 0.3343667331491851, "grad_norm": 0.6433220043055258, "learning_rate": 3.4590401405862927e-06, "loss": 0.0234, "step": 80135 }, { "epoch": 0.3343875958641754, "grad_norm": 0.671693980387931, "learning_rate": 3.4589322313880234e-06, "loss": 0.0317, "step": 80140 }, { "epoch": 0.3344084585791657, "grad_norm": 0.5520485504838789, "learning_rate": 3.4588243322882224e-06, "loss": 0.036, "step": 80145 }, { "epoch": 0.3344293212941559, "grad_norm": 0.40835868943757375, "learning_rate": 3.458716443285315e-06, "loss": 0.0219, "step": 80150 }, { "epoch": 0.3344501840091462, "grad_norm": 0.5636112204657282, "learning_rate": 3.458608564377725e-06, "loss": 0.0282, "step": 80155 }, { "epoch": 0.33447104672413647, "grad_norm": 1.1269648399311802, "learning_rate": 3.4585006955638797e-06, "loss": 0.026, "step": 80160 }, { "epoch": 0.33449190943912677, "grad_norm": 0.48723454839992364, "learning_rate": 3.4583928368422032e-06, "loss": 0.0297, "step": 80165 }, { "epoch": 0.33451277215411707, "grad_norm": 0.8381574388096902, "learning_rate": 3.458284988211124e-06, "loss": 0.0317, "step": 80170 }, { "epoch": 0.3345336348691073, "grad_norm": 0.45066274018478764, "learning_rate": 3.458177149669068e-06, "loss": 0.0223, "step": 80175 }, { "epoch": 0.3345544975840976, "grad_norm": 0.8511346133419202, "learning_rate": 3.4580693212144615e-06, "loss": 0.0379, "step": 80180 }, { "epoch": 0.3345753602990879, "grad_norm": 0.6780370061259537, "learning_rate": 3.457961502845734e-06, "loss": 0.0428, "step": 80185 }, { "epoch": 0.33459622301407815, "grad_norm": 0.9894022072232399, "learning_rate": 3.4578536945613113e-06, "loss": 0.0188, "step": 80190 }, { "epoch": 0.33461708572906845, "grad_norm": 0.5606311664078204, "learning_rate": 3.4577458963596222e-06, "loss": 0.033, "step": 80195 }, { "epoch": 0.3346379484440587, "grad_norm": 0.8771045450771371, "learning_rate": 3.457638108239095e-06, "loss": 0.0301, "step": 80200 }, { "epoch": 0.334658811159049, "grad_norm": 1.0727313602924955, "learning_rate": 3.4575303301981587e-06, "loss": 0.0331, "step": 80205 }, { "epoch": 0.3346796738740393, "grad_norm": 1.2175627811874639, "learning_rate": 3.4574225622352427e-06, "loss": 0.0283, "step": 80210 }, { "epoch": 0.33470053658902954, "grad_norm": 0.5412654791277901, "learning_rate": 3.457314804348776e-06, "loss": 0.0277, "step": 80215 }, { "epoch": 0.33472139930401984, "grad_norm": 0.731592183983687, "learning_rate": 3.4572070565371886e-06, "loss": 0.0277, "step": 80220 }, { "epoch": 0.3347422620190101, "grad_norm": 0.6219281134718899, "learning_rate": 3.45709931879891e-06, "loss": 0.0254, "step": 80225 }, { "epoch": 0.3347631247340004, "grad_norm": 0.36256954310535217, "learning_rate": 3.456991591132372e-06, "loss": 0.0263, "step": 80230 }, { "epoch": 0.3347839874489907, "grad_norm": 0.8526867978650307, "learning_rate": 3.456883873536005e-06, "loss": 0.0311, "step": 80235 }, { "epoch": 0.3348048501639809, "grad_norm": 0.5151996953177931, "learning_rate": 3.4567761660082406e-06, "loss": 0.0278, "step": 80240 }, { "epoch": 0.3348257128789712, "grad_norm": 0.5652958698888058, "learning_rate": 3.456668468547509e-06, "loss": 0.0228, "step": 80245 }, { "epoch": 0.33484657559396147, "grad_norm": 1.864523424522383, "learning_rate": 3.4565607811522423e-06, "loss": 0.0277, "step": 80250 }, { "epoch": 0.33486743830895177, "grad_norm": 0.864392732628993, "learning_rate": 3.456453103820874e-06, "loss": 0.0241, "step": 80255 }, { "epoch": 0.33488830102394207, "grad_norm": 0.9017885427240391, "learning_rate": 3.4563454365518355e-06, "loss": 0.041, "step": 80260 }, { "epoch": 0.3349091637389323, "grad_norm": 0.7856534475026175, "learning_rate": 3.4562377793435608e-06, "loss": 0.0345, "step": 80265 }, { "epoch": 0.3349300264539226, "grad_norm": 0.6974518342148555, "learning_rate": 3.4561301321944824e-06, "loss": 0.0278, "step": 80270 }, { "epoch": 0.3349508891689129, "grad_norm": 0.9077106294492663, "learning_rate": 3.4560224951030335e-06, "loss": 0.0266, "step": 80275 }, { "epoch": 0.33497175188390316, "grad_norm": 0.5781499044492477, "learning_rate": 3.455914868067649e-06, "loss": 0.0304, "step": 80280 }, { "epoch": 0.33499261459889346, "grad_norm": 0.8234421656833283, "learning_rate": 3.4558072510867618e-06, "loss": 0.0241, "step": 80285 }, { "epoch": 0.3350134773138837, "grad_norm": 0.4773635245816553, "learning_rate": 3.4556996441588087e-06, "loss": 0.0387, "step": 80290 }, { "epoch": 0.335034340028874, "grad_norm": 0.9092025129141551, "learning_rate": 3.4555920472822225e-06, "loss": 0.0263, "step": 80295 }, { "epoch": 0.3350552027438643, "grad_norm": 0.5427757466107979, "learning_rate": 3.45548446045544e-06, "loss": 0.0257, "step": 80300 }, { "epoch": 0.33507606545885454, "grad_norm": 0.7452024996187793, "learning_rate": 3.4553768836768963e-06, "loss": 0.0302, "step": 80305 }, { "epoch": 0.33509692817384484, "grad_norm": 0.6281140551619215, "learning_rate": 3.4552693169450265e-06, "loss": 0.0299, "step": 80310 }, { "epoch": 0.3351177908888351, "grad_norm": 0.3474242889459026, "learning_rate": 3.455161760258268e-06, "loss": 0.0197, "step": 80315 }, { "epoch": 0.3351386536038254, "grad_norm": 0.6121982153236574, "learning_rate": 3.4550542136150573e-06, "loss": 0.028, "step": 80320 }, { "epoch": 0.3351595163188157, "grad_norm": 0.9484852741188682, "learning_rate": 3.454946677013831e-06, "loss": 0.0337, "step": 80325 }, { "epoch": 0.33518037903380593, "grad_norm": 0.4095594623574127, "learning_rate": 3.454839150453027e-06, "loss": 0.0238, "step": 80330 }, { "epoch": 0.33520124174879623, "grad_norm": 0.4720248929227952, "learning_rate": 3.454731633931083e-06, "loss": 0.0286, "step": 80335 }, { "epoch": 0.3352221044637865, "grad_norm": 0.589760186575488, "learning_rate": 3.4546241274464366e-06, "loss": 0.028, "step": 80340 }, { "epoch": 0.3352429671787768, "grad_norm": 0.535955319731156, "learning_rate": 3.4545166309975254e-06, "loss": 0.0347, "step": 80345 }, { "epoch": 0.3352638298937671, "grad_norm": 1.3697491779875965, "learning_rate": 3.45440914458279e-06, "loss": 0.0403, "step": 80350 }, { "epoch": 0.3352846926087573, "grad_norm": 1.1327225733441986, "learning_rate": 3.454301668200668e-06, "loss": 0.0318, "step": 80355 }, { "epoch": 0.3353055553237476, "grad_norm": 1.0746364994684068, "learning_rate": 3.4541942018496e-06, "loss": 0.0275, "step": 80360 }, { "epoch": 0.3353264180387379, "grad_norm": 0.7883705778334856, "learning_rate": 3.454086745528024e-06, "loss": 0.0256, "step": 80365 }, { "epoch": 0.33534728075372816, "grad_norm": 0.7585423763070572, "learning_rate": 3.4539792992343806e-06, "loss": 0.0291, "step": 80370 }, { "epoch": 0.33536814346871846, "grad_norm": 0.6188472225691363, "learning_rate": 3.4538718629671116e-06, "loss": 0.0251, "step": 80375 }, { "epoch": 0.3353890061837087, "grad_norm": 0.6859674178295072, "learning_rate": 3.4537644367246558e-06, "loss": 0.0318, "step": 80380 }, { "epoch": 0.335409868898699, "grad_norm": 0.8178344189282512, "learning_rate": 3.4536570205054563e-06, "loss": 0.0267, "step": 80385 }, { "epoch": 0.3354307316136893, "grad_norm": 0.3753297278138658, "learning_rate": 3.4535496143079526e-06, "loss": 0.0224, "step": 80390 }, { "epoch": 0.33545159432867955, "grad_norm": 1.3319308170608366, "learning_rate": 3.453442218130588e-06, "loss": 0.0323, "step": 80395 }, { "epoch": 0.33547245704366985, "grad_norm": 0.6873861066131641, "learning_rate": 3.453334831971804e-06, "loss": 0.0298, "step": 80400 }, { "epoch": 0.3354933197586601, "grad_norm": 0.5807184376162743, "learning_rate": 3.4532274558300415e-06, "loss": 0.0181, "step": 80405 }, { "epoch": 0.3355141824736504, "grad_norm": 0.7176929549344846, "learning_rate": 3.4531200897037463e-06, "loss": 0.0287, "step": 80410 }, { "epoch": 0.3355350451886407, "grad_norm": 0.6525933818196418, "learning_rate": 3.453012733591359e-06, "loss": 0.0252, "step": 80415 }, { "epoch": 0.33555590790363093, "grad_norm": 0.7268024736662456, "learning_rate": 3.452905387491324e-06, "loss": 0.0272, "step": 80420 }, { "epoch": 0.33557677061862123, "grad_norm": 0.783495777175426, "learning_rate": 3.4527980514020866e-06, "loss": 0.0233, "step": 80425 }, { "epoch": 0.3355976333336115, "grad_norm": 0.8708220693032732, "learning_rate": 3.452690725322088e-06, "loss": 0.0399, "step": 80430 }, { "epoch": 0.3356184960486018, "grad_norm": 0.6871762532519758, "learning_rate": 3.452583409249774e-06, "loss": 0.0242, "step": 80435 }, { "epoch": 0.3356393587635921, "grad_norm": 0.9893575835558592, "learning_rate": 3.4524761031835895e-06, "loss": 0.0266, "step": 80440 }, { "epoch": 0.3356602214785823, "grad_norm": 0.8834741625383515, "learning_rate": 3.45236880712198e-06, "loss": 0.0227, "step": 80445 }, { "epoch": 0.3356810841935726, "grad_norm": 0.8561988167165528, "learning_rate": 3.4522615210633907e-06, "loss": 0.0277, "step": 80450 }, { "epoch": 0.3357019469085629, "grad_norm": 1.3089922421104454, "learning_rate": 3.4521542450062673e-06, "loss": 0.0275, "step": 80455 }, { "epoch": 0.33572280962355316, "grad_norm": 0.9728812539322005, "learning_rate": 3.4520469789490565e-06, "loss": 0.0275, "step": 80460 }, { "epoch": 0.33574367233854346, "grad_norm": 0.7131246028762271, "learning_rate": 3.451939722890203e-06, "loss": 0.0266, "step": 80465 }, { "epoch": 0.3357645350535337, "grad_norm": 1.3070434372176372, "learning_rate": 3.4518324768281558e-06, "loss": 0.0441, "step": 80470 }, { "epoch": 0.335785397768524, "grad_norm": 0.6578817036747997, "learning_rate": 3.451725240761361e-06, "loss": 0.0335, "step": 80475 }, { "epoch": 0.3358062604835143, "grad_norm": 1.4008628670210697, "learning_rate": 3.451618014688267e-06, "loss": 0.0228, "step": 80480 }, { "epoch": 0.33582712319850455, "grad_norm": 0.6344657700922672, "learning_rate": 3.45151079860732e-06, "loss": 0.0296, "step": 80485 }, { "epoch": 0.33584798591349485, "grad_norm": 0.8017904616310457, "learning_rate": 3.4514035925169696e-06, "loss": 0.0311, "step": 80490 }, { "epoch": 0.3358688486284851, "grad_norm": 0.65342059875672, "learning_rate": 3.4512963964156636e-06, "loss": 0.03, "step": 80495 }, { "epoch": 0.3358897113434754, "grad_norm": 1.2631145028983035, "learning_rate": 3.4511892103018518e-06, "loss": 0.029, "step": 80500 }, { "epoch": 0.3359105740584657, "grad_norm": 1.679822238906965, "learning_rate": 3.451082034173982e-06, "loss": 0.0296, "step": 80505 }, { "epoch": 0.33593143677345594, "grad_norm": 1.1465328693157721, "learning_rate": 3.450974868030505e-06, "loss": 0.0274, "step": 80510 }, { "epoch": 0.33595229948844624, "grad_norm": 0.8561552235956215, "learning_rate": 3.4508677118698704e-06, "loss": 0.0271, "step": 80515 }, { "epoch": 0.3359731622034365, "grad_norm": 0.6718049589700854, "learning_rate": 3.450760565690528e-06, "loss": 0.0321, "step": 80520 }, { "epoch": 0.3359940249184268, "grad_norm": 0.5383302187185449, "learning_rate": 3.4506534294909282e-06, "loss": 0.0238, "step": 80525 }, { "epoch": 0.3360148876334171, "grad_norm": 0.3605827671971639, "learning_rate": 3.4505463032695223e-06, "loss": 0.0327, "step": 80530 }, { "epoch": 0.3360357503484073, "grad_norm": 1.140173771789324, "learning_rate": 3.4504391870247617e-06, "loss": 0.0269, "step": 80535 }, { "epoch": 0.3360566130633976, "grad_norm": 1.1832058680068223, "learning_rate": 3.450332080755098e-06, "loss": 0.0288, "step": 80540 }, { "epoch": 0.33607747577838787, "grad_norm": 0.6193654137205001, "learning_rate": 3.4502249844589826e-06, "loss": 0.0216, "step": 80545 }, { "epoch": 0.33609833849337817, "grad_norm": 13.694169506293038, "learning_rate": 3.4501178981348673e-06, "loss": 0.032, "step": 80550 }, { "epoch": 0.33611920120836847, "grad_norm": 0.5330996463675497, "learning_rate": 3.450010821781206e-06, "loss": 0.0337, "step": 80555 }, { "epoch": 0.3361400639233587, "grad_norm": 1.0933325407207861, "learning_rate": 3.4499037553964503e-06, "loss": 0.0329, "step": 80560 }, { "epoch": 0.336160926638349, "grad_norm": 0.6935603842787706, "learning_rate": 3.4497966989790545e-06, "loss": 0.0236, "step": 80565 }, { "epoch": 0.3361817893533393, "grad_norm": 2.0225489024480896, "learning_rate": 3.449689652527471e-06, "loss": 0.0278, "step": 80570 }, { "epoch": 0.33620265206832955, "grad_norm": 0.5658965656638681, "learning_rate": 3.4495826160401553e-06, "loss": 0.0258, "step": 80575 }, { "epoch": 0.33622351478331985, "grad_norm": 0.8376677532327161, "learning_rate": 3.44947558951556e-06, "loss": 0.0284, "step": 80580 }, { "epoch": 0.3362443774983101, "grad_norm": 0.598737911774715, "learning_rate": 3.4493685729521416e-06, "loss": 0.0271, "step": 80585 }, { "epoch": 0.3362652402133004, "grad_norm": 1.217774956415502, "learning_rate": 3.449261566348353e-06, "loss": 0.0295, "step": 80590 }, { "epoch": 0.3362861029282907, "grad_norm": 0.9433228639020158, "learning_rate": 3.44915456970265e-06, "loss": 0.0263, "step": 80595 }, { "epoch": 0.33630696564328094, "grad_norm": 1.0060156338213695, "learning_rate": 3.449047583013489e-06, "loss": 0.0401, "step": 80600 }, { "epoch": 0.33632782835827124, "grad_norm": 1.1930081132619332, "learning_rate": 3.448940606279326e-06, "loss": 0.022, "step": 80605 }, { "epoch": 0.3363486910732615, "grad_norm": 0.9232863773634763, "learning_rate": 3.4488336394986153e-06, "loss": 0.0193, "step": 80610 }, { "epoch": 0.3363695537882518, "grad_norm": 0.9051251164341769, "learning_rate": 3.4487266826698164e-06, "loss": 0.0267, "step": 80615 }, { "epoch": 0.3363904165032421, "grad_norm": 0.6494626432638766, "learning_rate": 3.448619735791383e-06, "loss": 0.0264, "step": 80620 }, { "epoch": 0.33641127921823233, "grad_norm": 0.7415155046698342, "learning_rate": 3.4485127988617757e-06, "loss": 0.0288, "step": 80625 }, { "epoch": 0.33643214193322263, "grad_norm": 0.6529363847734682, "learning_rate": 3.4484058718794494e-06, "loss": 0.0205, "step": 80630 }, { "epoch": 0.33645300464821287, "grad_norm": 1.2160512760054991, "learning_rate": 3.4482989548428637e-06, "loss": 0.0455, "step": 80635 }, { "epoch": 0.33647386736320317, "grad_norm": 0.4666934881110026, "learning_rate": 3.4481920477504766e-06, "loss": 0.0236, "step": 80640 }, { "epoch": 0.33649473007819347, "grad_norm": 0.9031673890302913, "learning_rate": 3.448085150600745e-06, "loss": 0.0309, "step": 80645 }, { "epoch": 0.3365155927931837, "grad_norm": 0.9507501400020228, "learning_rate": 3.4479782633921304e-06, "loss": 0.0353, "step": 80650 }, { "epoch": 0.336536455508174, "grad_norm": 1.050850494464897, "learning_rate": 3.4478713861230906e-06, "loss": 0.0405, "step": 80655 }, { "epoch": 0.3365573182231643, "grad_norm": 0.9407775390155363, "learning_rate": 3.4477645187920856e-06, "loss": 0.0248, "step": 80660 }, { "epoch": 0.33657818093815456, "grad_norm": 1.4528706773570188, "learning_rate": 3.4476576613975753e-06, "loss": 0.0229, "step": 80665 }, { "epoch": 0.33659904365314486, "grad_norm": 0.7740737113115619, "learning_rate": 3.447550813938019e-06, "loss": 0.0312, "step": 80670 }, { "epoch": 0.3366199063681351, "grad_norm": 0.8134575641589266, "learning_rate": 3.4474439764118796e-06, "loss": 0.0276, "step": 80675 }, { "epoch": 0.3366407690831254, "grad_norm": 0.680989042278794, "learning_rate": 3.447337148817616e-06, "loss": 0.0266, "step": 80680 }, { "epoch": 0.3366616317981157, "grad_norm": 0.6059575088464059, "learning_rate": 3.44723033115369e-06, "loss": 0.0223, "step": 80685 }, { "epoch": 0.33668249451310595, "grad_norm": 1.3479945720316415, "learning_rate": 3.4471235234185633e-06, "loss": 0.0323, "step": 80690 }, { "epoch": 0.33670335722809625, "grad_norm": 0.6310865380121055, "learning_rate": 3.4470167256106983e-06, "loss": 0.0346, "step": 80695 }, { "epoch": 0.3367242199430865, "grad_norm": 0.7037372368884035, "learning_rate": 3.446909937728557e-06, "loss": 0.0304, "step": 80700 }, { "epoch": 0.3367450826580768, "grad_norm": 0.8209297648767067, "learning_rate": 3.4468031597706012e-06, "loss": 0.028, "step": 80705 }, { "epoch": 0.3367659453730671, "grad_norm": 0.5922874788596736, "learning_rate": 3.4466963917352954e-06, "loss": 0.034, "step": 80710 }, { "epoch": 0.33678680808805733, "grad_norm": 0.77635001724502, "learning_rate": 3.4465896336211006e-06, "loss": 0.0281, "step": 80715 }, { "epoch": 0.33680767080304763, "grad_norm": 0.4951696851440371, "learning_rate": 3.4464828854264832e-06, "loss": 0.0199, "step": 80720 }, { "epoch": 0.3368285335180379, "grad_norm": 0.5909167496221741, "learning_rate": 3.446376147149905e-06, "loss": 0.0202, "step": 80725 }, { "epoch": 0.3368493962330282, "grad_norm": 0.6298513434593487, "learning_rate": 3.4462694187898317e-06, "loss": 0.0257, "step": 80730 }, { "epoch": 0.3368702589480185, "grad_norm": 0.7057338104233402, "learning_rate": 3.4461627003447267e-06, "loss": 0.0292, "step": 80735 }, { "epoch": 0.3368911216630087, "grad_norm": 0.9981270515001491, "learning_rate": 3.4460559918130555e-06, "loss": 0.0344, "step": 80740 }, { "epoch": 0.336911984377999, "grad_norm": 0.6573660183916777, "learning_rate": 3.4459492931932835e-06, "loss": 0.028, "step": 80745 }, { "epoch": 0.3369328470929893, "grad_norm": 0.6487985212441376, "learning_rate": 3.445842604483876e-06, "loss": 0.0476, "step": 80750 }, { "epoch": 0.33695370980797956, "grad_norm": 1.0040699637244874, "learning_rate": 3.4457359256832994e-06, "loss": 0.031, "step": 80755 }, { "epoch": 0.33697457252296986, "grad_norm": 0.7510767135029599, "learning_rate": 3.4456292567900195e-06, "loss": 0.0307, "step": 80760 }, { "epoch": 0.3369954352379601, "grad_norm": 1.2718287284546976, "learning_rate": 3.445522597802503e-06, "loss": 0.0226, "step": 80765 }, { "epoch": 0.3370162979529504, "grad_norm": 0.5797459456019755, "learning_rate": 3.4454159487192173e-06, "loss": 0.0254, "step": 80770 }, { "epoch": 0.3370371606679407, "grad_norm": 0.5194703367433805, "learning_rate": 3.4453093095386287e-06, "loss": 0.0303, "step": 80775 }, { "epoch": 0.33705802338293095, "grad_norm": 1.1807304955537852, "learning_rate": 3.445202680259206e-06, "loss": 0.0247, "step": 80780 }, { "epoch": 0.33707888609792125, "grad_norm": 0.7611282655443403, "learning_rate": 3.4450960608794167e-06, "loss": 0.0313, "step": 80785 }, { "epoch": 0.3370997488129115, "grad_norm": 0.7850968109645271, "learning_rate": 3.444989451397728e-06, "loss": 0.0336, "step": 80790 }, { "epoch": 0.3371206115279018, "grad_norm": 1.096233670133671, "learning_rate": 3.44488285181261e-06, "loss": 0.0279, "step": 80795 }, { "epoch": 0.3371414742428921, "grad_norm": 1.1144827338545782, "learning_rate": 3.4447762621225306e-06, "loss": 0.0277, "step": 80800 }, { "epoch": 0.33716233695788234, "grad_norm": 0.6021314399581739, "learning_rate": 3.44466968232596e-06, "loss": 0.0274, "step": 80805 }, { "epoch": 0.33718319967287264, "grad_norm": 0.7117856618402981, "learning_rate": 3.4445631124213667e-06, "loss": 0.0249, "step": 80810 }, { "epoch": 0.3372040623878629, "grad_norm": 0.41920007929281905, "learning_rate": 3.4444565524072214e-06, "loss": 0.0213, "step": 80815 }, { "epoch": 0.3372249251028532, "grad_norm": 0.6560857666114115, "learning_rate": 3.4443500022819947e-06, "loss": 0.0457, "step": 80820 }, { "epoch": 0.3372457878178435, "grad_norm": 0.7328008776328422, "learning_rate": 3.4442434620441557e-06, "loss": 0.024, "step": 80825 }, { "epoch": 0.3372666505328337, "grad_norm": 0.6443091201273985, "learning_rate": 3.4441369316921767e-06, "loss": 0.0301, "step": 80830 }, { "epoch": 0.337287513247824, "grad_norm": 0.7748028401779746, "learning_rate": 3.444030411224529e-06, "loss": 0.0294, "step": 80835 }, { "epoch": 0.3373083759628143, "grad_norm": 0.9623783758084093, "learning_rate": 3.443923900639683e-06, "loss": 0.0206, "step": 80840 }, { "epoch": 0.33732923867780457, "grad_norm": 0.7734239859341951, "learning_rate": 3.4438173999361112e-06, "loss": 0.0216, "step": 80845 }, { "epoch": 0.33735010139279487, "grad_norm": 0.776118081131467, "learning_rate": 3.4437109091122867e-06, "loss": 0.0291, "step": 80850 }, { "epoch": 0.3373709641077851, "grad_norm": 0.7046977554101305, "learning_rate": 3.4436044281666808e-06, "loss": 0.0311, "step": 80855 }, { "epoch": 0.3373918268227754, "grad_norm": 0.6295667630860793, "learning_rate": 3.4434979570977665e-06, "loss": 0.0242, "step": 80860 }, { "epoch": 0.3374126895377657, "grad_norm": 0.6295169107155429, "learning_rate": 3.4433914959040176e-06, "loss": 0.0334, "step": 80865 }, { "epoch": 0.33743355225275595, "grad_norm": 1.0076675568323665, "learning_rate": 3.443285044583908e-06, "loss": 0.0313, "step": 80870 }, { "epoch": 0.33745441496774625, "grad_norm": 0.6015537383522954, "learning_rate": 3.443178603135911e-06, "loss": 0.0248, "step": 80875 }, { "epoch": 0.3374752776827365, "grad_norm": 0.5455674124824612, "learning_rate": 3.4430721715585e-06, "loss": 0.0245, "step": 80880 }, { "epoch": 0.3374961403977268, "grad_norm": 1.0291444693909013, "learning_rate": 3.4429657498501514e-06, "loss": 0.0315, "step": 80885 }, { "epoch": 0.3375170031127171, "grad_norm": 0.6435055736719267, "learning_rate": 3.4428593380093387e-06, "loss": 0.0294, "step": 80890 }, { "epoch": 0.33753786582770734, "grad_norm": 1.0482420970050796, "learning_rate": 3.442752936034538e-06, "loss": 0.0365, "step": 80895 }, { "epoch": 0.33755872854269764, "grad_norm": 0.7320454453454065, "learning_rate": 3.4426465439242246e-06, "loss": 0.0281, "step": 80900 }, { "epoch": 0.3375795912576879, "grad_norm": 0.6947333369939406, "learning_rate": 3.4425401616768733e-06, "loss": 0.0245, "step": 80905 }, { "epoch": 0.3376004539726782, "grad_norm": 0.7723716633500882, "learning_rate": 3.4424337892909617e-06, "loss": 0.0209, "step": 80910 }, { "epoch": 0.3376213166876685, "grad_norm": 0.3676316665615145, "learning_rate": 3.442327426764966e-06, "loss": 0.0369, "step": 80915 }, { "epoch": 0.3376421794026587, "grad_norm": 0.9891316593670171, "learning_rate": 3.442221074097362e-06, "loss": 0.0272, "step": 80920 }, { "epoch": 0.337663042117649, "grad_norm": 0.8801510419299496, "learning_rate": 3.442114731286629e-06, "loss": 0.0229, "step": 80925 }, { "epoch": 0.3376839048326393, "grad_norm": 0.7931909265675042, "learning_rate": 3.4420083983312424e-06, "loss": 0.0236, "step": 80930 }, { "epoch": 0.33770476754762957, "grad_norm": 0.8050319867790732, "learning_rate": 3.441902075229681e-06, "loss": 0.0347, "step": 80935 }, { "epoch": 0.33772563026261987, "grad_norm": 0.7144385569139403, "learning_rate": 3.4417957619804233e-06, "loss": 0.0292, "step": 80940 }, { "epoch": 0.3377464929776101, "grad_norm": 0.8633753762443018, "learning_rate": 3.4416894585819473e-06, "loss": 0.0268, "step": 80945 }, { "epoch": 0.3377673556926004, "grad_norm": 0.81352028694347, "learning_rate": 3.441583165032732e-06, "loss": 0.0244, "step": 80950 }, { "epoch": 0.3377882184075907, "grad_norm": 0.8305626961440696, "learning_rate": 3.4414768813312554e-06, "loss": 0.0271, "step": 80955 }, { "epoch": 0.33780908112258096, "grad_norm": 1.0441693474406852, "learning_rate": 3.4413706074759995e-06, "loss": 0.0313, "step": 80960 }, { "epoch": 0.33782994383757126, "grad_norm": 0.7690937280065245, "learning_rate": 3.4412643434654424e-06, "loss": 0.0452, "step": 80965 }, { "epoch": 0.3378508065525615, "grad_norm": 1.1270764955850048, "learning_rate": 3.4411580892980646e-06, "loss": 0.0363, "step": 80970 }, { "epoch": 0.3378716692675518, "grad_norm": 0.6684742711323433, "learning_rate": 3.441051844972346e-06, "loss": 0.0265, "step": 80975 }, { "epoch": 0.3378925319825421, "grad_norm": 1.194019820350574, "learning_rate": 3.4409456104867683e-06, "loss": 0.0298, "step": 80980 }, { "epoch": 0.33791339469753234, "grad_norm": 0.7360402272813188, "learning_rate": 3.4408393858398126e-06, "loss": 0.0299, "step": 80985 }, { "epoch": 0.33793425741252264, "grad_norm": 0.7544120390205453, "learning_rate": 3.44073317102996e-06, "loss": 0.0287, "step": 80990 }, { "epoch": 0.3379551201275129, "grad_norm": 0.8524486281058004, "learning_rate": 3.4406269660556923e-06, "loss": 0.0306, "step": 80995 }, { "epoch": 0.3379759828425032, "grad_norm": 0.7769485493146535, "learning_rate": 3.440520770915492e-06, "loss": 0.0232, "step": 81000 }, { "epoch": 0.3379968455574935, "grad_norm": 0.7179073548407671, "learning_rate": 3.44041458560784e-06, "loss": 0.0311, "step": 81005 }, { "epoch": 0.33801770827248373, "grad_norm": 0.9184688094363601, "learning_rate": 3.4403084101312216e-06, "loss": 0.0378, "step": 81010 }, { "epoch": 0.33803857098747403, "grad_norm": 0.9412824419026994, "learning_rate": 3.440202244484118e-06, "loss": 0.0382, "step": 81015 }, { "epoch": 0.33805943370246433, "grad_norm": 0.5738572666564888, "learning_rate": 3.4400960886650137e-06, "loss": 0.0276, "step": 81020 }, { "epoch": 0.3380802964174546, "grad_norm": 0.4818949684023654, "learning_rate": 3.439989942672392e-06, "loss": 0.0259, "step": 81025 }, { "epoch": 0.3381011591324449, "grad_norm": 0.9680345783687856, "learning_rate": 3.439883806504736e-06, "loss": 0.0322, "step": 81030 }, { "epoch": 0.3381220218474351, "grad_norm": 0.9696569302553829, "learning_rate": 3.4397776801605327e-06, "loss": 0.031, "step": 81035 }, { "epoch": 0.3381428845624254, "grad_norm": 0.841987758777804, "learning_rate": 3.4396715636382636e-06, "loss": 0.0262, "step": 81040 }, { "epoch": 0.3381637472774157, "grad_norm": 0.7578772375317938, "learning_rate": 3.4395654569364166e-06, "loss": 0.0285, "step": 81045 }, { "epoch": 0.33818460999240596, "grad_norm": 0.54652479502636, "learning_rate": 3.439459360053476e-06, "loss": 0.0225, "step": 81050 }, { "epoch": 0.33820547270739626, "grad_norm": 0.8521910416461723, "learning_rate": 3.4393532729879262e-06, "loss": 0.0221, "step": 81055 }, { "epoch": 0.3382263354223865, "grad_norm": 1.0782025280619143, "learning_rate": 3.4392471957382555e-06, "loss": 0.0332, "step": 81060 }, { "epoch": 0.3382471981373768, "grad_norm": 1.3359480117332803, "learning_rate": 3.4391411283029493e-06, "loss": 0.0259, "step": 81065 }, { "epoch": 0.3382680608523671, "grad_norm": 0.8856940013546957, "learning_rate": 3.4390350706804944e-06, "loss": 0.0305, "step": 81070 }, { "epoch": 0.33828892356735735, "grad_norm": 6.133121231748945, "learning_rate": 3.4389290228693767e-06, "loss": 0.0284, "step": 81075 }, { "epoch": 0.33830978628234765, "grad_norm": 0.5486028944500163, "learning_rate": 3.4388229848680855e-06, "loss": 0.0294, "step": 81080 }, { "epoch": 0.3383306489973379, "grad_norm": 0.4746789731229122, "learning_rate": 3.438716956675107e-06, "loss": 0.0274, "step": 81085 }, { "epoch": 0.3383515117123282, "grad_norm": 0.9541393108567279, "learning_rate": 3.4386109382889294e-06, "loss": 0.0272, "step": 81090 }, { "epoch": 0.3383723744273185, "grad_norm": 0.5201925126069181, "learning_rate": 3.4385049297080426e-06, "loss": 0.0361, "step": 81095 }, { "epoch": 0.33839323714230873, "grad_norm": 0.8598933975898221, "learning_rate": 3.438398930930933e-06, "loss": 0.0237, "step": 81100 }, { "epoch": 0.33841409985729903, "grad_norm": 0.47015521634686147, "learning_rate": 3.4382929419560913e-06, "loss": 0.0288, "step": 81105 }, { "epoch": 0.33843496257228933, "grad_norm": 0.5217585769508026, "learning_rate": 3.4381869627820057e-06, "loss": 0.0234, "step": 81110 }, { "epoch": 0.3384558252872796, "grad_norm": 1.1684579100722685, "learning_rate": 3.4380809934071663e-06, "loss": 0.0287, "step": 81115 }, { "epoch": 0.3384766880022699, "grad_norm": 0.6390256792068181, "learning_rate": 3.4379750338300634e-06, "loss": 0.0305, "step": 81120 }, { "epoch": 0.3384975507172601, "grad_norm": 0.8165029821606239, "learning_rate": 3.437869084049186e-06, "loss": 0.0356, "step": 81125 }, { "epoch": 0.3385184134322504, "grad_norm": 0.6878672109968982, "learning_rate": 3.437763144063027e-06, "loss": 0.0246, "step": 81130 }, { "epoch": 0.3385392761472407, "grad_norm": 0.7914546897711123, "learning_rate": 3.4376572138700755e-06, "loss": 0.0233, "step": 81135 }, { "epoch": 0.33856013886223096, "grad_norm": 0.6594292429070832, "learning_rate": 3.4375512934688227e-06, "loss": 0.0228, "step": 81140 }, { "epoch": 0.33858100157722126, "grad_norm": 0.8674744951197724, "learning_rate": 3.4374453828577615e-06, "loss": 0.0313, "step": 81145 }, { "epoch": 0.3386018642922115, "grad_norm": 0.7402344237877684, "learning_rate": 3.4373394820353827e-06, "loss": 0.0265, "step": 81150 }, { "epoch": 0.3386227270072018, "grad_norm": 1.026252579950698, "learning_rate": 3.4372335910001786e-06, "loss": 0.0319, "step": 81155 }, { "epoch": 0.3386435897221921, "grad_norm": 1.214525867881713, "learning_rate": 3.437127709750643e-06, "loss": 0.028, "step": 81160 }, { "epoch": 0.33866445243718235, "grad_norm": 0.6980740027199215, "learning_rate": 3.4370218382852672e-06, "loss": 0.0281, "step": 81165 }, { "epoch": 0.33868531515217265, "grad_norm": 1.1523378635437005, "learning_rate": 3.436915976602546e-06, "loss": 0.0347, "step": 81170 }, { "epoch": 0.3387061778671629, "grad_norm": 0.4934829424258224, "learning_rate": 3.4368101247009703e-06, "loss": 0.0309, "step": 81175 }, { "epoch": 0.3387270405821532, "grad_norm": 0.6960411154715606, "learning_rate": 3.436704282579037e-06, "loss": 0.0303, "step": 81180 }, { "epoch": 0.3387479032971435, "grad_norm": 0.6383340218611376, "learning_rate": 3.4365984502352388e-06, "loss": 0.0255, "step": 81185 }, { "epoch": 0.33876876601213374, "grad_norm": 0.5013931837246594, "learning_rate": 3.4364926276680706e-06, "loss": 0.0266, "step": 81190 }, { "epoch": 0.33878962872712404, "grad_norm": 0.5043975325416857, "learning_rate": 3.4363868148760265e-06, "loss": 0.0237, "step": 81195 }, { "epoch": 0.33881049144211434, "grad_norm": 1.0616313117728553, "learning_rate": 3.436281011857603e-06, "loss": 0.0346, "step": 81200 }, { "epoch": 0.3388313541571046, "grad_norm": 0.7036449633602897, "learning_rate": 3.436175218611294e-06, "loss": 0.0248, "step": 81205 }, { "epoch": 0.3388522168720949, "grad_norm": 0.6996417142951615, "learning_rate": 3.4360694351355966e-06, "loss": 0.0265, "step": 81210 }, { "epoch": 0.3388730795870851, "grad_norm": 0.8098207100236826, "learning_rate": 3.435963661429007e-06, "loss": 0.0337, "step": 81215 }, { "epoch": 0.3388939423020754, "grad_norm": 0.7189903993136287, "learning_rate": 3.4358578974900205e-06, "loss": 0.0287, "step": 81220 }, { "epoch": 0.3389148050170657, "grad_norm": 0.9805575685417609, "learning_rate": 3.435752143317135e-06, "loss": 0.0251, "step": 81225 }, { "epoch": 0.33893566773205597, "grad_norm": 0.6749837903501161, "learning_rate": 3.4356463989088466e-06, "loss": 0.022, "step": 81230 }, { "epoch": 0.33895653044704627, "grad_norm": 1.1119571049887809, "learning_rate": 3.4355406642636535e-06, "loss": 0.0237, "step": 81235 }, { "epoch": 0.3389773931620365, "grad_norm": 1.153687590147549, "learning_rate": 3.4354349393800535e-06, "loss": 0.0342, "step": 81240 }, { "epoch": 0.3389982558770268, "grad_norm": 0.5164857754251527, "learning_rate": 3.435329224256544e-06, "loss": 0.0255, "step": 81245 }, { "epoch": 0.3390191185920171, "grad_norm": 0.5163458106556639, "learning_rate": 3.435223518891624e-06, "loss": 0.0347, "step": 81250 }, { "epoch": 0.33903998130700735, "grad_norm": 0.8096789662477686, "learning_rate": 3.4351178232837924e-06, "loss": 0.0317, "step": 81255 }, { "epoch": 0.33906084402199765, "grad_norm": 1.2545888293468805, "learning_rate": 3.4350121374315476e-06, "loss": 0.0329, "step": 81260 }, { "epoch": 0.3390817067369879, "grad_norm": 0.8358461732536386, "learning_rate": 3.434906461333389e-06, "loss": 0.0387, "step": 81265 }, { "epoch": 0.3391025694519782, "grad_norm": 0.7876101861827673, "learning_rate": 3.434800794987817e-06, "loss": 0.022, "step": 81270 }, { "epoch": 0.3391234321669685, "grad_norm": 0.977964694316402, "learning_rate": 3.4346951383933315e-06, "loss": 0.0248, "step": 81275 }, { "epoch": 0.33914429488195874, "grad_norm": 1.316605904591472, "learning_rate": 3.4345894915484316e-06, "loss": 0.0364, "step": 81280 }, { "epoch": 0.33916515759694904, "grad_norm": 0.9988885379272838, "learning_rate": 3.4344838544516197e-06, "loss": 0.026, "step": 81285 }, { "epoch": 0.33918602031193934, "grad_norm": 0.6078988206872252, "learning_rate": 3.4343782271013966e-06, "loss": 0.0204, "step": 81290 }, { "epoch": 0.3392068830269296, "grad_norm": 0.7569709192451834, "learning_rate": 3.4342726094962613e-06, "loss": 0.0337, "step": 81295 }, { "epoch": 0.3392277457419199, "grad_norm": 0.9926042695263605, "learning_rate": 3.4341670016347188e-06, "loss": 0.0323, "step": 81300 }, { "epoch": 0.33924860845691013, "grad_norm": 0.779118881135176, "learning_rate": 3.4340614035152685e-06, "loss": 0.0294, "step": 81305 }, { "epoch": 0.3392694711719004, "grad_norm": 1.1828294971667308, "learning_rate": 3.433955815136414e-06, "loss": 0.0338, "step": 81310 }, { "epoch": 0.3392903338868907, "grad_norm": 0.8802811122685874, "learning_rate": 3.4338502364966575e-06, "loss": 0.0218, "step": 81315 }, { "epoch": 0.33931119660188097, "grad_norm": 0.6879502593834865, "learning_rate": 3.4337446675945013e-06, "loss": 0.0246, "step": 81320 }, { "epoch": 0.33933205931687127, "grad_norm": 0.44556109396657395, "learning_rate": 3.4336391084284496e-06, "loss": 0.0298, "step": 81325 }, { "epoch": 0.3393529220318615, "grad_norm": 0.6910734311952568, "learning_rate": 3.433533558997006e-06, "loss": 0.0403, "step": 81330 }, { "epoch": 0.3393737847468518, "grad_norm": 0.8737968528440776, "learning_rate": 3.433428019298674e-06, "loss": 0.028, "step": 81335 }, { "epoch": 0.3393946474618421, "grad_norm": 0.6662826836328694, "learning_rate": 3.4333224893319577e-06, "loss": 0.0198, "step": 81340 }, { "epoch": 0.33941551017683236, "grad_norm": 1.3246430092904042, "learning_rate": 3.433216969095362e-06, "loss": 0.0332, "step": 81345 }, { "epoch": 0.33943637289182266, "grad_norm": 0.7212426329838942, "learning_rate": 3.4331114585873916e-06, "loss": 0.0289, "step": 81350 }, { "epoch": 0.3394572356068129, "grad_norm": 1.2771633824585578, "learning_rate": 3.433005957806551e-06, "loss": 0.0321, "step": 81355 }, { "epoch": 0.3394780983218032, "grad_norm": 1.8598526669789985, "learning_rate": 3.432900466751347e-06, "loss": 0.0404, "step": 81360 }, { "epoch": 0.3394989610367935, "grad_norm": 0.3812755702879307, "learning_rate": 3.4327949854202845e-06, "loss": 0.029, "step": 81365 }, { "epoch": 0.33951982375178374, "grad_norm": 0.7043548685467087, "learning_rate": 3.4326895138118702e-06, "loss": 0.0186, "step": 81370 }, { "epoch": 0.33954068646677404, "grad_norm": 0.6941600169300033, "learning_rate": 3.43258405192461e-06, "loss": 0.0364, "step": 81375 }, { "epoch": 0.33956154918176434, "grad_norm": 1.106780150612031, "learning_rate": 3.4324785997570104e-06, "loss": 0.0339, "step": 81380 }, { "epoch": 0.3395824118967546, "grad_norm": 0.5021397745989417, "learning_rate": 3.4323731573075804e-06, "loss": 0.0279, "step": 81385 }, { "epoch": 0.3396032746117449, "grad_norm": 1.1985056543514108, "learning_rate": 3.432267724574825e-06, "loss": 0.029, "step": 81390 }, { "epoch": 0.33962413732673513, "grad_norm": 0.46687510072049493, "learning_rate": 3.432162301557253e-06, "loss": 0.0367, "step": 81395 }, { "epoch": 0.33964500004172543, "grad_norm": 0.5676879479132348, "learning_rate": 3.432056888253373e-06, "loss": 0.0305, "step": 81400 }, { "epoch": 0.33966586275671573, "grad_norm": 0.8253215305838979, "learning_rate": 3.4319514846616925e-06, "loss": 0.0295, "step": 81405 }, { "epoch": 0.339686725471706, "grad_norm": 0.7284789750580066, "learning_rate": 3.431846090780721e-06, "loss": 0.0378, "step": 81410 }, { "epoch": 0.3397075881866963, "grad_norm": 0.6313140907818222, "learning_rate": 3.4317407066089665e-06, "loss": 0.0275, "step": 81415 }, { "epoch": 0.3397284509016865, "grad_norm": 0.7586379115980294, "learning_rate": 3.4316353321449398e-06, "loss": 0.0236, "step": 81420 }, { "epoch": 0.3397493136166768, "grad_norm": 0.5907463812193297, "learning_rate": 3.4315299673871488e-06, "loss": 0.0175, "step": 81425 }, { "epoch": 0.3397701763316671, "grad_norm": 0.7735168394095128, "learning_rate": 3.431424612334105e-06, "loss": 0.0238, "step": 81430 }, { "epoch": 0.33979103904665736, "grad_norm": 0.5998588029965929, "learning_rate": 3.4313192669843182e-06, "loss": 0.0274, "step": 81435 }, { "epoch": 0.33981190176164766, "grad_norm": 0.5885448743051372, "learning_rate": 3.4312139313362993e-06, "loss": 0.023, "step": 81440 }, { "epoch": 0.3398327644766379, "grad_norm": 1.160222874096444, "learning_rate": 3.431108605388559e-06, "loss": 0.0243, "step": 81445 }, { "epoch": 0.3398536271916282, "grad_norm": 0.9449823529536496, "learning_rate": 3.4310032891396074e-06, "loss": 0.027, "step": 81450 }, { "epoch": 0.3398744899066185, "grad_norm": 0.8357260947586875, "learning_rate": 3.430897982587958e-06, "loss": 0.0452, "step": 81455 }, { "epoch": 0.33989535262160875, "grad_norm": 0.5835130817696542, "learning_rate": 3.4307926857321218e-06, "loss": 0.0275, "step": 81460 }, { "epoch": 0.33991621533659905, "grad_norm": 0.9893207890158627, "learning_rate": 3.4306873985706112e-06, "loss": 0.0239, "step": 81465 }, { "epoch": 0.33993707805158935, "grad_norm": 0.6187163971208677, "learning_rate": 3.4305821211019384e-06, "loss": 0.036, "step": 81470 }, { "epoch": 0.3399579407665796, "grad_norm": 0.7994930661551541, "learning_rate": 3.4304768533246167e-06, "loss": 0.0224, "step": 81475 }, { "epoch": 0.3399788034815699, "grad_norm": 0.48573032484152545, "learning_rate": 3.4303715952371597e-06, "loss": 0.026, "step": 81480 }, { "epoch": 0.33999966619656014, "grad_norm": 0.9163759741510799, "learning_rate": 3.43026634683808e-06, "loss": 0.0283, "step": 81485 }, { "epoch": 0.34002052891155043, "grad_norm": 1.295500405299667, "learning_rate": 3.4301611081258917e-06, "loss": 0.0343, "step": 81490 }, { "epoch": 0.34004139162654073, "grad_norm": 0.9115698914170182, "learning_rate": 3.4300558790991093e-06, "loss": 0.0227, "step": 81495 }, { "epoch": 0.340062254341531, "grad_norm": 0.7816481203790544, "learning_rate": 3.4299506597562467e-06, "loss": 0.0282, "step": 81500 }, { "epoch": 0.3400831170565213, "grad_norm": 0.628775228165295, "learning_rate": 3.4298454500958194e-06, "loss": 0.0319, "step": 81505 }, { "epoch": 0.3401039797715115, "grad_norm": 0.8671058376605155, "learning_rate": 3.4297402501163417e-06, "loss": 0.0311, "step": 81510 }, { "epoch": 0.3401248424865018, "grad_norm": 1.0300900171932676, "learning_rate": 3.4296350598163297e-06, "loss": 0.0312, "step": 81515 }, { "epoch": 0.3401457052014921, "grad_norm": 0.7142229035925637, "learning_rate": 3.4295298791942988e-06, "loss": 0.0247, "step": 81520 }, { "epoch": 0.34016656791648237, "grad_norm": 0.6885903242417165, "learning_rate": 3.429424708248765e-06, "loss": 0.0297, "step": 81525 }, { "epoch": 0.34018743063147266, "grad_norm": 0.7324556362078269, "learning_rate": 3.4293195469782454e-06, "loss": 0.0297, "step": 81530 }, { "epoch": 0.3402082933464629, "grad_norm": 0.5655180271888162, "learning_rate": 3.429214395381256e-06, "loss": 0.0293, "step": 81535 }, { "epoch": 0.3402291560614532, "grad_norm": 0.39608264634142665, "learning_rate": 3.4291092534563135e-06, "loss": 0.0253, "step": 81540 }, { "epoch": 0.3402500187764435, "grad_norm": 0.8152708420874465, "learning_rate": 3.4290041212019353e-06, "loss": 0.027, "step": 81545 }, { "epoch": 0.34027088149143375, "grad_norm": 2.335319251229651, "learning_rate": 3.4288989986166403e-06, "loss": 0.0296, "step": 81550 }, { "epoch": 0.34029174420642405, "grad_norm": 0.4776707214457247, "learning_rate": 3.428793885698945e-06, "loss": 0.0303, "step": 81555 }, { "epoch": 0.34031260692141435, "grad_norm": 0.49234417528889807, "learning_rate": 3.4286887824473686e-06, "loss": 0.0328, "step": 81560 }, { "epoch": 0.3403334696364046, "grad_norm": 0.6250668951952321, "learning_rate": 3.4285836888604294e-06, "loss": 0.0312, "step": 81565 }, { "epoch": 0.3403543323513949, "grad_norm": 0.9178546438107448, "learning_rate": 3.4284786049366463e-06, "loss": 0.0326, "step": 81570 }, { "epoch": 0.34037519506638514, "grad_norm": 0.5550028698384896, "learning_rate": 3.4283735306745385e-06, "loss": 0.0317, "step": 81575 }, { "epoch": 0.34039605778137544, "grad_norm": 0.8776962103148688, "learning_rate": 3.4282684660726255e-06, "loss": 0.0251, "step": 81580 }, { "epoch": 0.34041692049636574, "grad_norm": 0.7950415499397442, "learning_rate": 3.4281634111294275e-06, "loss": 0.0237, "step": 81585 }, { "epoch": 0.340437783211356, "grad_norm": 0.8180642190043814, "learning_rate": 3.428058365843464e-06, "loss": 0.0256, "step": 81590 }, { "epoch": 0.3404586459263463, "grad_norm": 0.6092384727685054, "learning_rate": 3.4279533302132566e-06, "loss": 0.0319, "step": 81595 }, { "epoch": 0.3404795086413365, "grad_norm": 0.4130784615067847, "learning_rate": 3.4278483042373254e-06, "loss": 0.0273, "step": 81600 }, { "epoch": 0.3405003713563268, "grad_norm": 0.8486045939336663, "learning_rate": 3.427743287914192e-06, "loss": 0.0296, "step": 81605 }, { "epoch": 0.3405212340713171, "grad_norm": 0.9317771347616728, "learning_rate": 3.427638281242377e-06, "loss": 0.0272, "step": 81610 }, { "epoch": 0.34054209678630737, "grad_norm": 0.9606354988501016, "learning_rate": 3.427533284220403e-06, "loss": 0.0357, "step": 81615 }, { "epoch": 0.34056295950129767, "grad_norm": 0.7762190190121578, "learning_rate": 3.4274282968467913e-06, "loss": 0.0241, "step": 81620 }, { "epoch": 0.3405838222162879, "grad_norm": 0.7524282610635579, "learning_rate": 3.4273233191200654e-06, "loss": 0.0328, "step": 81625 }, { "epoch": 0.3406046849312782, "grad_norm": 0.8650035366895835, "learning_rate": 3.4272183510387473e-06, "loss": 0.0249, "step": 81630 }, { "epoch": 0.3406255476462685, "grad_norm": 0.9458717956837841, "learning_rate": 3.4271133926013604e-06, "loss": 0.0418, "step": 81635 }, { "epoch": 0.34064641036125876, "grad_norm": 0.924135025141854, "learning_rate": 3.4270084438064277e-06, "loss": 0.0235, "step": 81640 }, { "epoch": 0.34066727307624906, "grad_norm": 0.6584769161434201, "learning_rate": 3.4269035046524735e-06, "loss": 0.022, "step": 81645 }, { "epoch": 0.34068813579123935, "grad_norm": 0.660911267241824, "learning_rate": 3.426798575138021e-06, "loss": 0.0311, "step": 81650 }, { "epoch": 0.3407089985062296, "grad_norm": 1.2391746743678755, "learning_rate": 3.426693655261595e-06, "loss": 0.0281, "step": 81655 }, { "epoch": 0.3407298612212199, "grad_norm": 1.1122494872839095, "learning_rate": 3.4265887450217193e-06, "loss": 0.0279, "step": 81660 }, { "epoch": 0.34075072393621014, "grad_norm": 0.732210665266606, "learning_rate": 3.42648384441692e-06, "loss": 0.0226, "step": 81665 }, { "epoch": 0.34077158665120044, "grad_norm": 0.5368484968479685, "learning_rate": 3.426378953445722e-06, "loss": 0.0394, "step": 81670 }, { "epoch": 0.34079244936619074, "grad_norm": 0.4318437053916812, "learning_rate": 3.4262740721066517e-06, "loss": 0.0218, "step": 81675 }, { "epoch": 0.340813312081181, "grad_norm": 0.7254325981906993, "learning_rate": 3.4261692003982334e-06, "loss": 0.0296, "step": 81680 }, { "epoch": 0.3408341747961713, "grad_norm": 0.9871782824722579, "learning_rate": 3.4260643383189936e-06, "loss": 0.0258, "step": 81685 }, { "epoch": 0.34085503751116153, "grad_norm": 0.6118222846044169, "learning_rate": 3.425959485867459e-06, "loss": 0.0281, "step": 81690 }, { "epoch": 0.34087590022615183, "grad_norm": 0.8681159764650836, "learning_rate": 3.4258546430421572e-06, "loss": 0.0235, "step": 81695 }, { "epoch": 0.34089676294114213, "grad_norm": 0.7780477923462693, "learning_rate": 3.425749809841615e-06, "loss": 0.0326, "step": 81700 }, { "epoch": 0.3409176256561324, "grad_norm": 1.325992828212644, "learning_rate": 3.4256449862643596e-06, "loss": 0.0302, "step": 81705 }, { "epoch": 0.34093848837112267, "grad_norm": 0.7866130828712674, "learning_rate": 3.4255401723089194e-06, "loss": 0.024, "step": 81710 }, { "epoch": 0.3409593510861129, "grad_norm": 0.7813338849440111, "learning_rate": 3.4254353679738206e-06, "loss": 0.0277, "step": 81715 }, { "epoch": 0.3409802138011032, "grad_norm": 0.6251635921827342, "learning_rate": 3.425330573257594e-06, "loss": 0.0313, "step": 81720 }, { "epoch": 0.3410010765160935, "grad_norm": 0.5207969219960922, "learning_rate": 3.4252257881587676e-06, "loss": 0.0319, "step": 81725 }, { "epoch": 0.34102193923108376, "grad_norm": 1.0303873467386235, "learning_rate": 3.42512101267587e-06, "loss": 0.0357, "step": 81730 }, { "epoch": 0.34104280194607406, "grad_norm": 0.7080484479785529, "learning_rate": 3.4250162468074304e-06, "loss": 0.0262, "step": 81735 }, { "epoch": 0.34106366466106436, "grad_norm": 1.9937754661886695, "learning_rate": 3.4249114905519787e-06, "loss": 0.0242, "step": 81740 }, { "epoch": 0.3410845273760546, "grad_norm": 0.7106091745332956, "learning_rate": 3.424806743908046e-06, "loss": 0.0239, "step": 81745 }, { "epoch": 0.3411053900910449, "grad_norm": 0.8448407671837049, "learning_rate": 3.4247020068741603e-06, "loss": 0.0306, "step": 81750 }, { "epoch": 0.34112625280603515, "grad_norm": 0.782875353215426, "learning_rate": 3.424597279448854e-06, "loss": 0.0276, "step": 81755 }, { "epoch": 0.34114711552102545, "grad_norm": 0.665779293333686, "learning_rate": 3.424492561630658e-06, "loss": 0.0253, "step": 81760 }, { "epoch": 0.34116797823601575, "grad_norm": 0.8443234132752019, "learning_rate": 3.4243878534181036e-06, "loss": 0.0287, "step": 81765 }, { "epoch": 0.341188840951006, "grad_norm": 0.4521650309044187, "learning_rate": 3.4242831548097216e-06, "loss": 0.0214, "step": 81770 }, { "epoch": 0.3412097036659963, "grad_norm": 0.6316099024801941, "learning_rate": 3.424178465804044e-06, "loss": 0.0198, "step": 81775 }, { "epoch": 0.34123056638098653, "grad_norm": 1.0201020831750789, "learning_rate": 3.4240737863996026e-06, "loss": 0.0262, "step": 81780 }, { "epoch": 0.34125142909597683, "grad_norm": 0.6478156429807652, "learning_rate": 3.423969116594931e-06, "loss": 0.033, "step": 81785 }, { "epoch": 0.34127229181096713, "grad_norm": 0.6896058627624991, "learning_rate": 3.4238644563885613e-06, "loss": 0.0353, "step": 81790 }, { "epoch": 0.3412931545259574, "grad_norm": 0.7613595495714301, "learning_rate": 3.423759805779028e-06, "loss": 0.0237, "step": 81795 }, { "epoch": 0.3413140172409477, "grad_norm": 1.367963746910669, "learning_rate": 3.4236551647648618e-06, "loss": 0.0261, "step": 81800 }, { "epoch": 0.3413348799559379, "grad_norm": 0.9661614930217192, "learning_rate": 3.4235505333445994e-06, "loss": 0.0389, "step": 81805 }, { "epoch": 0.3413557426709282, "grad_norm": 0.7930678903918525, "learning_rate": 3.423445911516773e-06, "loss": 0.0389, "step": 81810 }, { "epoch": 0.3413766053859185, "grad_norm": 0.684173522549386, "learning_rate": 3.4233412992799176e-06, "loss": 0.0322, "step": 81815 }, { "epoch": 0.34139746810090876, "grad_norm": 0.8936071220983763, "learning_rate": 3.4232366966325683e-06, "loss": 0.0344, "step": 81820 }, { "epoch": 0.34141833081589906, "grad_norm": 0.5813658693728606, "learning_rate": 3.423132103573259e-06, "loss": 0.0327, "step": 81825 }, { "epoch": 0.34143919353088936, "grad_norm": 0.5356232284434286, "learning_rate": 3.423027520100526e-06, "loss": 0.0188, "step": 81830 }, { "epoch": 0.3414600562458796, "grad_norm": 0.9947483230930704, "learning_rate": 3.422922946212905e-06, "loss": 0.033, "step": 81835 }, { "epoch": 0.3414809189608699, "grad_norm": 0.42356525298741865, "learning_rate": 3.4228183819089305e-06, "loss": 0.0273, "step": 81840 }, { "epoch": 0.34150178167586015, "grad_norm": 1.098785657023104, "learning_rate": 3.4227138271871406e-06, "loss": 0.0261, "step": 81845 }, { "epoch": 0.34152264439085045, "grad_norm": 0.8717171150689391, "learning_rate": 3.4226092820460715e-06, "loss": 0.0348, "step": 81850 }, { "epoch": 0.34154350710584075, "grad_norm": 0.6156205202203007, "learning_rate": 3.4225047464842597e-06, "loss": 0.0333, "step": 81855 }, { "epoch": 0.341564369820831, "grad_norm": 1.1034733956678728, "learning_rate": 3.422400220500242e-06, "loss": 0.0249, "step": 81860 }, { "epoch": 0.3415852325358213, "grad_norm": 1.0312376042498856, "learning_rate": 3.422295704092557e-06, "loss": 0.0264, "step": 81865 }, { "epoch": 0.34160609525081154, "grad_norm": 0.6721384669751559, "learning_rate": 3.422191197259741e-06, "loss": 0.0277, "step": 81870 }, { "epoch": 0.34162695796580184, "grad_norm": 0.9746624532243755, "learning_rate": 3.422086700000334e-06, "loss": 0.0259, "step": 81875 }, { "epoch": 0.34164782068079214, "grad_norm": 0.6260981316166765, "learning_rate": 3.421982212312873e-06, "loss": 0.0289, "step": 81880 }, { "epoch": 0.3416686833957824, "grad_norm": 1.6455042761322525, "learning_rate": 3.4218777341958977e-06, "loss": 0.0266, "step": 81885 }, { "epoch": 0.3416895461107727, "grad_norm": 0.758351298083129, "learning_rate": 3.421773265647947e-06, "loss": 0.029, "step": 81890 }, { "epoch": 0.3417104088257629, "grad_norm": 0.9251541821262849, "learning_rate": 3.421668806667559e-06, "loss": 0.0325, "step": 81895 }, { "epoch": 0.3417312715407532, "grad_norm": 0.8231629932359475, "learning_rate": 3.4215643572532753e-06, "loss": 0.0256, "step": 81900 }, { "epoch": 0.3417521342557435, "grad_norm": 0.9962793787587345, "learning_rate": 3.421459917403635e-06, "loss": 0.0241, "step": 81905 }, { "epoch": 0.34177299697073377, "grad_norm": 0.46901332921835637, "learning_rate": 3.421355487117179e-06, "loss": 0.0276, "step": 81910 }, { "epoch": 0.34179385968572407, "grad_norm": 0.8304043520744947, "learning_rate": 3.421251066392447e-06, "loss": 0.0221, "step": 81915 }, { "epoch": 0.34181472240071437, "grad_norm": 0.36601628259182073, "learning_rate": 3.4211466552279803e-06, "loss": 0.0244, "step": 81920 }, { "epoch": 0.3418355851157046, "grad_norm": 1.181382336973752, "learning_rate": 3.4210422536223204e-06, "loss": 0.0282, "step": 81925 }, { "epoch": 0.3418564478306949, "grad_norm": 0.712122701554271, "learning_rate": 3.420937861574009e-06, "loss": 0.0278, "step": 81930 }, { "epoch": 0.34187731054568515, "grad_norm": 0.6001057942170428, "learning_rate": 3.4208334790815883e-06, "loss": 0.0274, "step": 81935 }, { "epoch": 0.34189817326067545, "grad_norm": 1.1330304653397334, "learning_rate": 3.4207291061435992e-06, "loss": 0.0283, "step": 81940 }, { "epoch": 0.34191903597566575, "grad_norm": 0.6816331659071238, "learning_rate": 3.4206247427585854e-06, "loss": 0.0297, "step": 81945 }, { "epoch": 0.341939898690656, "grad_norm": 0.5889367864528238, "learning_rate": 3.4205203889250887e-06, "loss": 0.0282, "step": 81950 }, { "epoch": 0.3419607614056463, "grad_norm": 0.8593909801476478, "learning_rate": 3.4204160446416534e-06, "loss": 0.018, "step": 81955 }, { "epoch": 0.34198162412063654, "grad_norm": 0.9120603951788439, "learning_rate": 3.420311709906823e-06, "loss": 0.0259, "step": 81960 }, { "epoch": 0.34200248683562684, "grad_norm": 0.4829353235944794, "learning_rate": 3.42020738471914e-06, "loss": 0.0217, "step": 81965 }, { "epoch": 0.34202334955061714, "grad_norm": 0.5433988564081518, "learning_rate": 3.420103069077149e-06, "loss": 0.0238, "step": 81970 }, { "epoch": 0.3420442122656074, "grad_norm": 1.0619786962941882, "learning_rate": 3.4199987629793952e-06, "loss": 0.0271, "step": 81975 }, { "epoch": 0.3420650749805977, "grad_norm": 1.187994184710778, "learning_rate": 3.419894466424422e-06, "loss": 0.0194, "step": 81980 }, { "epoch": 0.3420859376955879, "grad_norm": 0.62149615213615, "learning_rate": 3.4197901794107753e-06, "loss": 0.0331, "step": 81985 }, { "epoch": 0.3421068004105782, "grad_norm": 0.7974549600174999, "learning_rate": 3.4196859019369993e-06, "loss": 0.0264, "step": 81990 }, { "epoch": 0.3421276631255685, "grad_norm": 0.8609301994660407, "learning_rate": 3.419581634001642e-06, "loss": 0.0178, "step": 81995 }, { "epoch": 0.34214852584055877, "grad_norm": 0.7880960479776092, "learning_rate": 3.419477375603247e-06, "loss": 0.0448, "step": 82000 }, { "epoch": 0.34216938855554907, "grad_norm": 0.7907698570999201, "learning_rate": 3.4193731267403614e-06, "loss": 0.0291, "step": 82005 }, { "epoch": 0.34219025127053937, "grad_norm": 1.5588082069768652, "learning_rate": 3.4192688874115313e-06, "loss": 0.0296, "step": 82010 }, { "epoch": 0.3422111139855296, "grad_norm": 0.6726534187817962, "learning_rate": 3.4191646576153045e-06, "loss": 0.0311, "step": 82015 }, { "epoch": 0.3422319767005199, "grad_norm": 0.5976485999060547, "learning_rate": 3.4190604373502273e-06, "loss": 0.0303, "step": 82020 }, { "epoch": 0.34225283941551016, "grad_norm": 1.1754163515326759, "learning_rate": 3.4189562266148473e-06, "loss": 0.0293, "step": 82025 }, { "epoch": 0.34227370213050046, "grad_norm": 0.6427941640602397, "learning_rate": 3.4188520254077133e-06, "loss": 0.0245, "step": 82030 }, { "epoch": 0.34229456484549076, "grad_norm": 0.49176898101475464, "learning_rate": 3.4187478337273728e-06, "loss": 0.0211, "step": 82035 }, { "epoch": 0.342315427560481, "grad_norm": 0.5531126828727577, "learning_rate": 3.4186436515723726e-06, "loss": 0.0213, "step": 82040 }, { "epoch": 0.3423362902754713, "grad_norm": 0.9556935707335935, "learning_rate": 3.418539478941264e-06, "loss": 0.0289, "step": 82045 }, { "epoch": 0.34235715299046154, "grad_norm": 0.7741716907417879, "learning_rate": 3.418435315832594e-06, "loss": 0.0314, "step": 82050 }, { "epoch": 0.34237801570545184, "grad_norm": 0.6080991787321699, "learning_rate": 3.4183311622449133e-06, "loss": 0.0221, "step": 82055 }, { "epoch": 0.34239887842044214, "grad_norm": 1.0382391153798354, "learning_rate": 3.4182270181767717e-06, "loss": 0.0263, "step": 82060 }, { "epoch": 0.3424197411354324, "grad_norm": 1.2126243305823894, "learning_rate": 3.4181228836267176e-06, "loss": 0.0272, "step": 82065 }, { "epoch": 0.3424406038504227, "grad_norm": 1.4974517095385296, "learning_rate": 3.4180187585933027e-06, "loss": 0.0319, "step": 82070 }, { "epoch": 0.34246146656541293, "grad_norm": 0.7505104137953542, "learning_rate": 3.4179146430750765e-06, "loss": 0.0282, "step": 82075 }, { "epoch": 0.34248232928040323, "grad_norm": 1.050556157905366, "learning_rate": 3.417810537070591e-06, "loss": 0.0354, "step": 82080 }, { "epoch": 0.34250319199539353, "grad_norm": 0.6812487482781989, "learning_rate": 3.4177064405783964e-06, "loss": 0.0239, "step": 82085 }, { "epoch": 0.3425240547103838, "grad_norm": 1.1961846138470118, "learning_rate": 3.417602353597046e-06, "loss": 0.0303, "step": 82090 }, { "epoch": 0.3425449174253741, "grad_norm": 0.7275060243380073, "learning_rate": 3.417498276125089e-06, "loss": 0.0254, "step": 82095 }, { "epoch": 0.3425657801403644, "grad_norm": 0.9294034854668499, "learning_rate": 3.417394208161079e-06, "loss": 0.0288, "step": 82100 }, { "epoch": 0.3425866428553546, "grad_norm": 0.9456705366164724, "learning_rate": 3.417290149703568e-06, "loss": 0.0289, "step": 82105 }, { "epoch": 0.3426075055703449, "grad_norm": 0.6996271553883275, "learning_rate": 3.417186100751109e-06, "loss": 0.0348, "step": 82110 }, { "epoch": 0.34262836828533516, "grad_norm": 0.6443994724010085, "learning_rate": 3.4170820613022564e-06, "loss": 0.0243, "step": 82115 }, { "epoch": 0.34264923100032546, "grad_norm": 0.640715838026205, "learning_rate": 3.416978031355561e-06, "loss": 0.0253, "step": 82120 }, { "epoch": 0.34267009371531576, "grad_norm": 0.6757002326032778, "learning_rate": 3.4168740109095777e-06, "loss": 0.0258, "step": 82125 }, { "epoch": 0.342690956430306, "grad_norm": 0.6371406387454035, "learning_rate": 3.4167699999628602e-06, "loss": 0.0232, "step": 82130 }, { "epoch": 0.3427118191452963, "grad_norm": 1.9180158344526819, "learning_rate": 3.4166659985139634e-06, "loss": 0.0254, "step": 82135 }, { "epoch": 0.34273268186028655, "grad_norm": 0.8752870248606721, "learning_rate": 3.4165620065614415e-06, "loss": 0.0238, "step": 82140 }, { "epoch": 0.34275354457527685, "grad_norm": 1.1776693148191206, "learning_rate": 3.4164580241038492e-06, "loss": 0.0335, "step": 82145 }, { "epoch": 0.34277440729026715, "grad_norm": 0.8397656865920808, "learning_rate": 3.416354051139742e-06, "loss": 0.027, "step": 82150 }, { "epoch": 0.3427952700052574, "grad_norm": 0.4650402424589656, "learning_rate": 3.416250087667676e-06, "loss": 0.0247, "step": 82155 }, { "epoch": 0.3428161327202477, "grad_norm": 0.9237741368785175, "learning_rate": 3.4161461336862053e-06, "loss": 0.0303, "step": 82160 }, { "epoch": 0.34283699543523793, "grad_norm": 0.5640765355351356, "learning_rate": 3.416042189193887e-06, "loss": 0.026, "step": 82165 }, { "epoch": 0.34285785815022823, "grad_norm": 0.4478012836443254, "learning_rate": 3.415938254189278e-06, "loss": 0.0262, "step": 82170 }, { "epoch": 0.34287872086521853, "grad_norm": 1.28745350791953, "learning_rate": 3.4158343286709344e-06, "loss": 0.0308, "step": 82175 }, { "epoch": 0.3428995835802088, "grad_norm": 1.0859088314106966, "learning_rate": 3.4157304126374143e-06, "loss": 0.0279, "step": 82180 }, { "epoch": 0.3429204462951991, "grad_norm": 0.9925677756481778, "learning_rate": 3.4156265060872734e-06, "loss": 0.0294, "step": 82185 }, { "epoch": 0.3429413090101894, "grad_norm": 1.1598148764686478, "learning_rate": 3.415522609019071e-06, "loss": 0.031, "step": 82190 }, { "epoch": 0.3429621717251796, "grad_norm": 1.2034334739994774, "learning_rate": 3.4154187214313634e-06, "loss": 0.0192, "step": 82195 }, { "epoch": 0.3429830344401699, "grad_norm": 0.7827625711208748, "learning_rate": 3.41531484332271e-06, "loss": 0.023, "step": 82200 }, { "epoch": 0.34300389715516016, "grad_norm": 0.8089787628020125, "learning_rate": 3.4152109746916695e-06, "loss": 0.0306, "step": 82205 }, { "epoch": 0.34302475987015046, "grad_norm": 1.205797262165444, "learning_rate": 3.4151071155368006e-06, "loss": 0.0286, "step": 82210 }, { "epoch": 0.34304562258514076, "grad_norm": 0.6048963073447329, "learning_rate": 3.415003265856662e-06, "loss": 0.0449, "step": 82215 }, { "epoch": 0.343066485300131, "grad_norm": 0.5292029873199855, "learning_rate": 3.4148994256498132e-06, "loss": 0.0226, "step": 82220 }, { "epoch": 0.3430873480151213, "grad_norm": 3.417360129576145, "learning_rate": 3.4147955949148144e-06, "loss": 0.0243, "step": 82225 }, { "epoch": 0.34310821073011155, "grad_norm": 1.1573475426188977, "learning_rate": 3.414691773650226e-06, "loss": 0.0404, "step": 82230 }, { "epoch": 0.34312907344510185, "grad_norm": 0.7593264638876931, "learning_rate": 3.414587961854608e-06, "loss": 0.0305, "step": 82235 }, { "epoch": 0.34314993616009215, "grad_norm": 0.39744032724665773, "learning_rate": 3.4144841595265215e-06, "loss": 0.0239, "step": 82240 }, { "epoch": 0.3431707988750824, "grad_norm": 0.6079792182073167, "learning_rate": 3.4143803666645274e-06, "loss": 0.0328, "step": 82245 }, { "epoch": 0.3431916615900727, "grad_norm": 1.0788801090266191, "learning_rate": 3.4142765832671863e-06, "loss": 0.0335, "step": 82250 }, { "epoch": 0.34321252430506294, "grad_norm": 0.746800049982255, "learning_rate": 3.4141728093330608e-06, "loss": 0.0244, "step": 82255 }, { "epoch": 0.34323338702005324, "grad_norm": 1.1006495098398026, "learning_rate": 3.414069044860713e-06, "loss": 0.0241, "step": 82260 }, { "epoch": 0.34325424973504354, "grad_norm": 0.6772732678101381, "learning_rate": 3.413965289848704e-06, "loss": 0.0297, "step": 82265 }, { "epoch": 0.3432751124500338, "grad_norm": 0.7202598876346293, "learning_rate": 3.4138615442955974e-06, "loss": 0.0252, "step": 82270 }, { "epoch": 0.3432959751650241, "grad_norm": 0.7333762321956158, "learning_rate": 3.4137578081999558e-06, "loss": 0.0293, "step": 82275 }, { "epoch": 0.3433168378800144, "grad_norm": 1.5375654964865593, "learning_rate": 3.413654081560342e-06, "loss": 0.0418, "step": 82280 }, { "epoch": 0.3433377005950046, "grad_norm": 0.6295986363843628, "learning_rate": 3.4135503643753196e-06, "loss": 0.023, "step": 82285 }, { "epoch": 0.3433585633099949, "grad_norm": 1.411363156905341, "learning_rate": 3.4134466566434533e-06, "loss": 0.0207, "step": 82290 }, { "epoch": 0.34337942602498517, "grad_norm": 1.6622506092760345, "learning_rate": 3.4133429583633066e-06, "loss": 0.0334, "step": 82295 }, { "epoch": 0.34340028873997547, "grad_norm": 0.553913558373419, "learning_rate": 3.4132392695334434e-06, "loss": 0.0289, "step": 82300 }, { "epoch": 0.34342115145496577, "grad_norm": 0.8881748696236694, "learning_rate": 3.4131355901524284e-06, "loss": 0.0242, "step": 82305 }, { "epoch": 0.343442014169956, "grad_norm": 0.4691967227014926, "learning_rate": 3.413031920218827e-06, "loss": 0.0348, "step": 82310 }, { "epoch": 0.3434628768849463, "grad_norm": 0.8583406394320214, "learning_rate": 3.412928259731205e-06, "loss": 0.0265, "step": 82315 }, { "epoch": 0.34348373959993656, "grad_norm": 0.6677067047585463, "learning_rate": 3.412824608688128e-06, "loss": 0.0225, "step": 82320 }, { "epoch": 0.34350460231492685, "grad_norm": 0.7797171048335771, "learning_rate": 3.4127209670881608e-06, "loss": 0.022, "step": 82325 }, { "epoch": 0.34352546502991715, "grad_norm": 0.8610513005907128, "learning_rate": 3.4126173349298704e-06, "loss": 0.0259, "step": 82330 }, { "epoch": 0.3435463277449074, "grad_norm": 1.5773919402902365, "learning_rate": 3.4125137122118236e-06, "loss": 0.0294, "step": 82335 }, { "epoch": 0.3435671904598977, "grad_norm": 0.34937565907543994, "learning_rate": 3.4124100989325864e-06, "loss": 0.0218, "step": 82340 }, { "epoch": 0.34358805317488794, "grad_norm": 1.113578492140413, "learning_rate": 3.4123064950907264e-06, "loss": 0.033, "step": 82345 }, { "epoch": 0.34360891588987824, "grad_norm": 0.5540437018427418, "learning_rate": 3.4122029006848114e-06, "loss": 0.028, "step": 82350 }, { "epoch": 0.34362977860486854, "grad_norm": 0.6395658196555105, "learning_rate": 3.4120993157134085e-06, "loss": 0.0337, "step": 82355 }, { "epoch": 0.3436506413198588, "grad_norm": 0.6043673151751371, "learning_rate": 3.411995740175087e-06, "loss": 0.0231, "step": 82360 }, { "epoch": 0.3436715040348491, "grad_norm": 2.4294156895507832, "learning_rate": 3.4118921740684137e-06, "loss": 0.034, "step": 82365 }, { "epoch": 0.3436923667498394, "grad_norm": 0.6617067708782137, "learning_rate": 3.411788617391958e-06, "loss": 0.0202, "step": 82370 }, { "epoch": 0.34371322946482963, "grad_norm": 1.587087999266949, "learning_rate": 3.4116850701442884e-06, "loss": 0.0452, "step": 82375 }, { "epoch": 0.34373409217981993, "grad_norm": 0.7536299814100162, "learning_rate": 3.411581532323975e-06, "loss": 0.0254, "step": 82380 }, { "epoch": 0.34375495489481017, "grad_norm": 0.6074484029268474, "learning_rate": 3.411478003929587e-06, "loss": 0.0329, "step": 82385 }, { "epoch": 0.34377581760980047, "grad_norm": 1.3109715816724874, "learning_rate": 3.411374484959694e-06, "loss": 0.0442, "step": 82390 }, { "epoch": 0.34379668032479077, "grad_norm": 1.2318844989893158, "learning_rate": 3.411270975412867e-06, "loss": 0.0282, "step": 82395 }, { "epoch": 0.343817543039781, "grad_norm": 0.7174970922293178, "learning_rate": 3.4111674752876756e-06, "loss": 0.0298, "step": 82400 }, { "epoch": 0.3438384057547713, "grad_norm": 0.3438721759578607, "learning_rate": 3.411063984582691e-06, "loss": 0.0222, "step": 82405 }, { "epoch": 0.34385926846976156, "grad_norm": 0.4212578173279624, "learning_rate": 3.410960503296483e-06, "loss": 0.0295, "step": 82410 }, { "epoch": 0.34388013118475186, "grad_norm": 0.9004238743738495, "learning_rate": 3.4108570314276258e-06, "loss": 0.024, "step": 82415 }, { "epoch": 0.34390099389974216, "grad_norm": 1.8243251735352062, "learning_rate": 3.4107535689746894e-06, "loss": 0.0356, "step": 82420 }, { "epoch": 0.3439218566147324, "grad_norm": 0.6229582194719432, "learning_rate": 3.4106501159362452e-06, "loss": 0.0348, "step": 82425 }, { "epoch": 0.3439427193297227, "grad_norm": 0.8072925446427599, "learning_rate": 3.4105466723108667e-06, "loss": 0.0338, "step": 82430 }, { "epoch": 0.34396358204471295, "grad_norm": 0.6372207649104663, "learning_rate": 3.410443238097126e-06, "loss": 0.0325, "step": 82435 }, { "epoch": 0.34398444475970325, "grad_norm": 0.7060734714438671, "learning_rate": 3.410339813293596e-06, "loss": 0.0423, "step": 82440 }, { "epoch": 0.34400530747469354, "grad_norm": 0.5743918879162638, "learning_rate": 3.41023639789885e-06, "loss": 0.0315, "step": 82445 }, { "epoch": 0.3440261701896838, "grad_norm": 0.787782360036194, "learning_rate": 3.4101329919114613e-06, "loss": 0.0234, "step": 82450 }, { "epoch": 0.3440470329046741, "grad_norm": 1.0499150439511626, "learning_rate": 3.4100295953300043e-06, "loss": 0.0307, "step": 82455 }, { "epoch": 0.3440678956196644, "grad_norm": 4.242622031916441, "learning_rate": 3.4099262081530524e-06, "loss": 0.0264, "step": 82460 }, { "epoch": 0.34408875833465463, "grad_norm": 0.6727846293485759, "learning_rate": 3.4098228303791815e-06, "loss": 0.0429, "step": 82465 }, { "epoch": 0.34410962104964493, "grad_norm": 0.4799635561218645, "learning_rate": 3.409719462006964e-06, "loss": 0.024, "step": 82470 }, { "epoch": 0.3441304837646352, "grad_norm": 0.9232461030480851, "learning_rate": 3.4096161030349765e-06, "loss": 0.0355, "step": 82475 }, { "epoch": 0.3441513464796255, "grad_norm": 2.12264804628335, "learning_rate": 3.4095127534617944e-06, "loss": 0.0332, "step": 82480 }, { "epoch": 0.3441722091946158, "grad_norm": 1.0246329099130096, "learning_rate": 3.4094094132859923e-06, "loss": 0.0253, "step": 82485 }, { "epoch": 0.344193071909606, "grad_norm": 1.1893350901496342, "learning_rate": 3.409306082506147e-06, "loss": 0.0349, "step": 82490 }, { "epoch": 0.3442139346245963, "grad_norm": 0.6250700378260161, "learning_rate": 3.4092027611208344e-06, "loss": 0.0319, "step": 82495 }, { "epoch": 0.34423479733958656, "grad_norm": 0.5464963810949762, "learning_rate": 3.409099449128632e-06, "loss": 0.0305, "step": 82500 }, { "epoch": 0.34425566005457686, "grad_norm": 0.4500776070815752, "learning_rate": 3.408996146528116e-06, "loss": 0.0239, "step": 82505 }, { "epoch": 0.34427652276956716, "grad_norm": 0.7525180531715574, "learning_rate": 3.4088928533178617e-06, "loss": 0.0347, "step": 82510 }, { "epoch": 0.3442973854845574, "grad_norm": 0.7816577763251858, "learning_rate": 3.4087895694964494e-06, "loss": 0.0287, "step": 82515 }, { "epoch": 0.3443182481995477, "grad_norm": 0.9467426115576258, "learning_rate": 3.4086862950624556e-06, "loss": 0.0362, "step": 82520 }, { "epoch": 0.34433911091453795, "grad_norm": 0.9583130460561075, "learning_rate": 3.4085830300144583e-06, "loss": 0.0433, "step": 82525 }, { "epoch": 0.34435997362952825, "grad_norm": 0.7506114524314479, "learning_rate": 3.408479774351036e-06, "loss": 0.0247, "step": 82530 }, { "epoch": 0.34438083634451855, "grad_norm": 0.6202530000279438, "learning_rate": 3.408376528070768e-06, "loss": 0.0266, "step": 82535 }, { "epoch": 0.3444016990595088, "grad_norm": 0.7793453222688388, "learning_rate": 3.408273291172232e-06, "loss": 0.0328, "step": 82540 }, { "epoch": 0.3444225617744991, "grad_norm": 0.7436011176722074, "learning_rate": 3.408170063654008e-06, "loss": 0.0326, "step": 82545 }, { "epoch": 0.3444434244894894, "grad_norm": 0.603271474783972, "learning_rate": 3.4080668455146754e-06, "loss": 0.025, "step": 82550 }, { "epoch": 0.34446428720447964, "grad_norm": 0.7913313255035501, "learning_rate": 3.407963636752814e-06, "loss": 0.0195, "step": 82555 }, { "epoch": 0.34448514991946994, "grad_norm": 0.5574598578224076, "learning_rate": 3.4078604373670045e-06, "loss": 0.0344, "step": 82560 }, { "epoch": 0.3445060126344602, "grad_norm": 1.0250557913769693, "learning_rate": 3.407757247355826e-06, "loss": 0.0454, "step": 82565 }, { "epoch": 0.3445268753494505, "grad_norm": 0.8031420990091777, "learning_rate": 3.4076540667178613e-06, "loss": 0.029, "step": 82570 }, { "epoch": 0.3445477380644408, "grad_norm": 0.9018269750732999, "learning_rate": 3.40755089545169e-06, "loss": 0.0399, "step": 82575 }, { "epoch": 0.344568600779431, "grad_norm": 0.5999202167398527, "learning_rate": 3.4074477335558938e-06, "loss": 0.0271, "step": 82580 }, { "epoch": 0.3445894634944213, "grad_norm": 0.7795411237548527, "learning_rate": 3.4073445810290547e-06, "loss": 0.0325, "step": 82585 }, { "epoch": 0.34461032620941157, "grad_norm": 0.8251880744467258, "learning_rate": 3.407241437869754e-06, "loss": 0.0342, "step": 82590 }, { "epoch": 0.34463118892440187, "grad_norm": 0.5856405846023787, "learning_rate": 3.4071383040765743e-06, "loss": 0.022, "step": 82595 }, { "epoch": 0.34465205163939217, "grad_norm": 0.5926006791498879, "learning_rate": 3.4070351796480988e-06, "loss": 0.0295, "step": 82600 }, { "epoch": 0.3446729143543824, "grad_norm": 0.6247145362288516, "learning_rate": 3.4069320645829092e-06, "loss": 0.0322, "step": 82605 }, { "epoch": 0.3446937770693727, "grad_norm": 0.8214999402453707, "learning_rate": 3.4068289588795895e-06, "loss": 0.0252, "step": 82610 }, { "epoch": 0.34471463978436295, "grad_norm": 0.7569492581570585, "learning_rate": 3.4067258625367228e-06, "loss": 0.0267, "step": 82615 }, { "epoch": 0.34473550249935325, "grad_norm": 0.510542574196508, "learning_rate": 3.4066227755528934e-06, "loss": 0.0307, "step": 82620 }, { "epoch": 0.34475636521434355, "grad_norm": 0.8812801834312591, "learning_rate": 3.406519697926685e-06, "loss": 0.0318, "step": 82625 }, { "epoch": 0.3447772279293338, "grad_norm": 0.6650947299946608, "learning_rate": 3.4064166296566815e-06, "loss": 0.0218, "step": 82630 }, { "epoch": 0.3447980906443241, "grad_norm": 0.6659846787953642, "learning_rate": 3.4063135707414687e-06, "loss": 0.0244, "step": 82635 }, { "epoch": 0.3448189533593144, "grad_norm": 0.7368464532842649, "learning_rate": 3.40621052117963e-06, "loss": 0.0238, "step": 82640 }, { "epoch": 0.34483981607430464, "grad_norm": 0.655377414381577, "learning_rate": 3.406107480969752e-06, "loss": 0.0231, "step": 82645 }, { "epoch": 0.34486067878929494, "grad_norm": 1.0916684067394096, "learning_rate": 3.4060044501104196e-06, "loss": 0.0308, "step": 82650 }, { "epoch": 0.3448815415042852, "grad_norm": 0.8816110041143727, "learning_rate": 3.405901428600219e-06, "loss": 0.0256, "step": 82655 }, { "epoch": 0.3449024042192755, "grad_norm": 0.9689133219204689, "learning_rate": 3.4057984164377367e-06, "loss": 0.0248, "step": 82660 }, { "epoch": 0.3449232669342658, "grad_norm": 0.5736284571222885, "learning_rate": 3.405695413621558e-06, "loss": 0.0246, "step": 82665 }, { "epoch": 0.344944129649256, "grad_norm": 0.9771309382633167, "learning_rate": 3.405592420150271e-06, "loss": 0.0291, "step": 82670 }, { "epoch": 0.3449649923642463, "grad_norm": 0.6781063259183479, "learning_rate": 3.4054894360224617e-06, "loss": 0.0305, "step": 82675 }, { "epoch": 0.34498585507923657, "grad_norm": 1.0606093261224536, "learning_rate": 3.405386461236718e-06, "loss": 0.0271, "step": 82680 }, { "epoch": 0.34500671779422687, "grad_norm": 1.3376235087298483, "learning_rate": 3.4052834957916287e-06, "loss": 0.0375, "step": 82685 }, { "epoch": 0.34502758050921717, "grad_norm": 0.9390325804281294, "learning_rate": 3.405180539685779e-06, "loss": 0.0303, "step": 82690 }, { "epoch": 0.3450484432242074, "grad_norm": 0.9966649712473343, "learning_rate": 3.405077592917759e-06, "loss": 0.0467, "step": 82695 }, { "epoch": 0.3450693059391977, "grad_norm": 1.1372458492086333, "learning_rate": 3.404974655486157e-06, "loss": 0.0302, "step": 82700 }, { "epoch": 0.34509016865418796, "grad_norm": 0.5136360129721466, "learning_rate": 3.404871727389563e-06, "loss": 0.0248, "step": 82705 }, { "epoch": 0.34511103136917826, "grad_norm": 0.5330856597397275, "learning_rate": 3.404768808626564e-06, "loss": 0.0336, "step": 82710 }, { "epoch": 0.34513189408416856, "grad_norm": 0.5381798618417687, "learning_rate": 3.4046658991957504e-06, "loss": 0.0273, "step": 82715 }, { "epoch": 0.3451527567991588, "grad_norm": 0.6632279590716993, "learning_rate": 3.404562999095713e-06, "loss": 0.03, "step": 82720 }, { "epoch": 0.3451736195141491, "grad_norm": 0.7486054248378122, "learning_rate": 3.4044601083250395e-06, "loss": 0.0323, "step": 82725 }, { "epoch": 0.3451944822291394, "grad_norm": 0.7600812163097079, "learning_rate": 3.404357226882322e-06, "loss": 0.0255, "step": 82730 }, { "epoch": 0.34521534494412964, "grad_norm": 0.7235726915951185, "learning_rate": 3.404254354766151e-06, "loss": 0.029, "step": 82735 }, { "epoch": 0.34523620765911994, "grad_norm": 0.7993568416024204, "learning_rate": 3.4041514919751177e-06, "loss": 0.0252, "step": 82740 }, { "epoch": 0.3452570703741102, "grad_norm": 1.1517757622201608, "learning_rate": 3.4040486385078123e-06, "loss": 0.0272, "step": 82745 }, { "epoch": 0.3452779330891005, "grad_norm": 0.2591337325168101, "learning_rate": 3.4039457943628266e-06, "loss": 0.0255, "step": 82750 }, { "epoch": 0.3452987958040908, "grad_norm": 1.077730563250578, "learning_rate": 3.4038429595387536e-06, "loss": 0.0278, "step": 82755 }, { "epoch": 0.34531965851908103, "grad_norm": 0.6453043184230466, "learning_rate": 3.4037401340341835e-06, "loss": 0.0222, "step": 82760 }, { "epoch": 0.34534052123407133, "grad_norm": 0.9259613867181757, "learning_rate": 3.403637317847711e-06, "loss": 0.0332, "step": 82765 }, { "epoch": 0.3453613839490616, "grad_norm": 0.831484872588135, "learning_rate": 3.403534510977926e-06, "loss": 0.0444, "step": 82770 }, { "epoch": 0.3453822466640519, "grad_norm": 0.9110396797268153, "learning_rate": 3.4034317134234246e-06, "loss": 0.0276, "step": 82775 }, { "epoch": 0.3454031093790422, "grad_norm": 0.6904549325274206, "learning_rate": 3.403328925182799e-06, "loss": 0.031, "step": 82780 }, { "epoch": 0.3454239720940324, "grad_norm": 1.0712531524446094, "learning_rate": 3.403226146254642e-06, "loss": 0.0313, "step": 82785 }, { "epoch": 0.3454448348090227, "grad_norm": 0.7108747650739705, "learning_rate": 3.4031233766375475e-06, "loss": 0.0384, "step": 82790 }, { "epoch": 0.34546569752401296, "grad_norm": 2.00289345657209, "learning_rate": 3.4030206163301104e-06, "loss": 0.035, "step": 82795 }, { "epoch": 0.34548656023900326, "grad_norm": 1.0012627958170779, "learning_rate": 3.402917865330926e-06, "loss": 0.0268, "step": 82800 }, { "epoch": 0.34550742295399356, "grad_norm": 0.9156097996200463, "learning_rate": 3.4028151236385876e-06, "loss": 0.0261, "step": 82805 }, { "epoch": 0.3455282856689838, "grad_norm": 1.2355179086681376, "learning_rate": 3.4027123912516914e-06, "loss": 0.0317, "step": 82810 }, { "epoch": 0.3455491483839741, "grad_norm": 0.5408825178042295, "learning_rate": 3.402609668168832e-06, "loss": 0.0307, "step": 82815 }, { "epoch": 0.3455700110989644, "grad_norm": 0.7866643721100355, "learning_rate": 3.402506954388605e-06, "loss": 0.0353, "step": 82820 }, { "epoch": 0.34559087381395465, "grad_norm": 0.8952942571372536, "learning_rate": 3.402404249909607e-06, "loss": 0.0251, "step": 82825 }, { "epoch": 0.34561173652894495, "grad_norm": 0.9757610872047979, "learning_rate": 3.4023015547304346e-06, "loss": 0.0264, "step": 82830 }, { "epoch": 0.3456325992439352, "grad_norm": 0.850549221303322, "learning_rate": 3.4021988688496837e-06, "loss": 0.0324, "step": 82835 }, { "epoch": 0.3456534619589255, "grad_norm": 1.0486425328923974, "learning_rate": 3.4020961922659524e-06, "loss": 0.0364, "step": 82840 }, { "epoch": 0.3456743246739158, "grad_norm": 0.602301878736749, "learning_rate": 3.401993524977835e-06, "loss": 0.0287, "step": 82845 }, { "epoch": 0.34569518738890603, "grad_norm": 0.7112477866577291, "learning_rate": 3.401890866983933e-06, "loss": 0.0316, "step": 82850 }, { "epoch": 0.34571605010389633, "grad_norm": 0.678343316507612, "learning_rate": 3.4017882182828404e-06, "loss": 0.0337, "step": 82855 }, { "epoch": 0.3457369128188866, "grad_norm": 0.46878422382811324, "learning_rate": 3.4016855788731575e-06, "loss": 0.0225, "step": 82860 }, { "epoch": 0.3457577755338769, "grad_norm": 1.057927628248057, "learning_rate": 3.401582948753483e-06, "loss": 0.0257, "step": 82865 }, { "epoch": 0.3457786382488672, "grad_norm": 0.6510663529417169, "learning_rate": 3.4014803279224136e-06, "loss": 0.026, "step": 82870 }, { "epoch": 0.3457995009638574, "grad_norm": 0.775704470444115, "learning_rate": 3.40137771637855e-06, "loss": 0.0262, "step": 82875 }, { "epoch": 0.3458203636788477, "grad_norm": 0.7031196734599497, "learning_rate": 3.40127511412049e-06, "loss": 0.0299, "step": 82880 }, { "epoch": 0.34584122639383796, "grad_norm": 0.4830360230213523, "learning_rate": 3.4011725211468347e-06, "loss": 0.0237, "step": 82885 }, { "epoch": 0.34586208910882826, "grad_norm": 0.7281269507265289, "learning_rate": 3.4010699374561836e-06, "loss": 0.0322, "step": 82890 }, { "epoch": 0.34588295182381856, "grad_norm": 0.6435029270233906, "learning_rate": 3.4009673630471357e-06, "loss": 0.026, "step": 82895 }, { "epoch": 0.3459038145388088, "grad_norm": 1.0273571615121746, "learning_rate": 3.400864797918293e-06, "loss": 0.0254, "step": 82900 }, { "epoch": 0.3459246772537991, "grad_norm": 0.49822812949723366, "learning_rate": 3.4007622420682545e-06, "loss": 0.0275, "step": 82905 }, { "epoch": 0.34594553996878935, "grad_norm": 0.8455028827959865, "learning_rate": 3.4006596954956235e-06, "loss": 0.0286, "step": 82910 }, { "epoch": 0.34596640268377965, "grad_norm": 0.8320315405621566, "learning_rate": 3.400557158198999e-06, "loss": 0.0336, "step": 82915 }, { "epoch": 0.34598726539876995, "grad_norm": 0.9268800226813367, "learning_rate": 3.400454630176984e-06, "loss": 0.024, "step": 82920 }, { "epoch": 0.3460081281137602, "grad_norm": 0.64108029032426, "learning_rate": 3.4003521114281808e-06, "loss": 0.0251, "step": 82925 }, { "epoch": 0.3460289908287505, "grad_norm": 0.8074788443746926, "learning_rate": 3.40024960195119e-06, "loss": 0.0253, "step": 82930 }, { "epoch": 0.3460498535437408, "grad_norm": 0.7442310618190128, "learning_rate": 3.4001471017446147e-06, "loss": 0.0278, "step": 82935 }, { "epoch": 0.34607071625873104, "grad_norm": 0.3011442734845227, "learning_rate": 3.400044610807059e-06, "loss": 0.0183, "step": 82940 }, { "epoch": 0.34609157897372134, "grad_norm": 0.8021052248371485, "learning_rate": 3.399942129137124e-06, "loss": 0.0331, "step": 82945 }, { "epoch": 0.3461124416887116, "grad_norm": 0.7808976320403149, "learning_rate": 3.3998396567334152e-06, "loss": 0.0225, "step": 82950 }, { "epoch": 0.3461333044037019, "grad_norm": 1.0459620852665745, "learning_rate": 3.399737193594535e-06, "loss": 0.0324, "step": 82955 }, { "epoch": 0.3461541671186922, "grad_norm": 0.8719808589103598, "learning_rate": 3.3996347397190873e-06, "loss": 0.0353, "step": 82960 }, { "epoch": 0.3461750298336824, "grad_norm": 0.5394117728214974, "learning_rate": 3.399532295105677e-06, "loss": 0.024, "step": 82965 }, { "epoch": 0.3461958925486727, "grad_norm": 0.7554556795764216, "learning_rate": 3.3994298597529073e-06, "loss": 0.0189, "step": 82970 }, { "epoch": 0.34621675526366297, "grad_norm": 0.5511050250099956, "learning_rate": 3.399327433659385e-06, "loss": 0.0188, "step": 82975 }, { "epoch": 0.34623761797865327, "grad_norm": 0.5112197853658142, "learning_rate": 3.3992250168237147e-06, "loss": 0.0271, "step": 82980 }, { "epoch": 0.34625848069364357, "grad_norm": 0.6312354398056469, "learning_rate": 3.399122609244501e-06, "loss": 0.0319, "step": 82985 }, { "epoch": 0.3462793434086338, "grad_norm": 0.8589589454962406, "learning_rate": 3.3990202109203502e-06, "loss": 0.0293, "step": 82990 }, { "epoch": 0.3463002061236241, "grad_norm": 1.632698630829308, "learning_rate": 3.398917821849868e-06, "loss": 0.0361, "step": 82995 }, { "epoch": 0.34632106883861435, "grad_norm": 0.4414546507769409, "learning_rate": 3.398815442031662e-06, "loss": 0.027, "step": 83000 }, { "epoch": 0.34634193155360465, "grad_norm": 0.5418760352904711, "learning_rate": 3.3987130714643374e-06, "loss": 0.0298, "step": 83005 }, { "epoch": 0.34636279426859495, "grad_norm": 1.2713286980886591, "learning_rate": 3.3986107101465016e-06, "loss": 0.0302, "step": 83010 }, { "epoch": 0.3463836569835852, "grad_norm": 0.5902148533061464, "learning_rate": 3.398508358076762e-06, "loss": 0.0275, "step": 83015 }, { "epoch": 0.3464045196985755, "grad_norm": 0.6203915097978778, "learning_rate": 3.3984060152537256e-06, "loss": 0.0312, "step": 83020 }, { "epoch": 0.3464253824135658, "grad_norm": 0.8449641555992938, "learning_rate": 3.3983036816760003e-06, "loss": 0.026, "step": 83025 }, { "epoch": 0.34644624512855604, "grad_norm": 1.1063656626324663, "learning_rate": 3.398201357342195e-06, "loss": 0.0289, "step": 83030 }, { "epoch": 0.34646710784354634, "grad_norm": 0.35720671554241146, "learning_rate": 3.398099042250917e-06, "loss": 0.0226, "step": 83035 }, { "epoch": 0.3464879705585366, "grad_norm": 0.575010464711817, "learning_rate": 3.3979967364007765e-06, "loss": 0.0244, "step": 83040 }, { "epoch": 0.3465088332735269, "grad_norm": 1.1289815173323394, "learning_rate": 3.397894439790381e-06, "loss": 0.0244, "step": 83045 }, { "epoch": 0.3465296959885172, "grad_norm": 0.7787473917578465, "learning_rate": 3.39779215241834e-06, "loss": 0.0284, "step": 83050 }, { "epoch": 0.3465505587035074, "grad_norm": 2.4052975693251013, "learning_rate": 3.3976898742832635e-06, "loss": 0.0453, "step": 83055 }, { "epoch": 0.3465714214184977, "grad_norm": 0.3180052621043509, "learning_rate": 3.3975876053837615e-06, "loss": 0.0279, "step": 83060 }, { "epoch": 0.34659228413348797, "grad_norm": 0.4741871752385747, "learning_rate": 3.3974853457184438e-06, "loss": 0.0238, "step": 83065 }, { "epoch": 0.34661314684847827, "grad_norm": 0.7085895356131292, "learning_rate": 3.3973830952859207e-06, "loss": 0.0335, "step": 83070 }, { "epoch": 0.34663400956346857, "grad_norm": 0.9629029071985874, "learning_rate": 3.397280854084803e-06, "loss": 0.0351, "step": 83075 }, { "epoch": 0.3466548722784588, "grad_norm": 0.6268103408271642, "learning_rate": 3.3971786221137015e-06, "loss": 0.0241, "step": 83080 }, { "epoch": 0.3466757349934491, "grad_norm": 0.7615116159308443, "learning_rate": 3.3970763993712284e-06, "loss": 0.0244, "step": 83085 }, { "epoch": 0.34669659770843936, "grad_norm": 0.9231486004924776, "learning_rate": 3.396974185855995e-06, "loss": 0.026, "step": 83090 }, { "epoch": 0.34671746042342966, "grad_norm": 0.7896569431944581, "learning_rate": 3.3968719815666125e-06, "loss": 0.0286, "step": 83095 }, { "epoch": 0.34673832313841996, "grad_norm": 0.8622752818027797, "learning_rate": 3.3967697865016948e-06, "loss": 0.0188, "step": 83100 }, { "epoch": 0.3467591858534102, "grad_norm": 0.6419101101852123, "learning_rate": 3.396667600659852e-06, "loss": 0.0242, "step": 83105 }, { "epoch": 0.3467800485684005, "grad_norm": 0.7565038266437252, "learning_rate": 3.3965654240396985e-06, "loss": 0.0254, "step": 83110 }, { "epoch": 0.3468009112833908, "grad_norm": 0.4613956641217449, "learning_rate": 3.3964632566398463e-06, "loss": 0.0292, "step": 83115 }, { "epoch": 0.34682177399838104, "grad_norm": 0.9029401323675245, "learning_rate": 3.39636109845891e-06, "loss": 0.0265, "step": 83120 }, { "epoch": 0.34684263671337134, "grad_norm": 0.8942835117112392, "learning_rate": 3.396258949495503e-06, "loss": 0.0397, "step": 83125 }, { "epoch": 0.3468634994283616, "grad_norm": 0.8133674879691434, "learning_rate": 3.396156809748239e-06, "loss": 0.0159, "step": 83130 }, { "epoch": 0.3468843621433519, "grad_norm": 0.8594204455846187, "learning_rate": 3.3960546792157313e-06, "loss": 0.0239, "step": 83135 }, { "epoch": 0.3469052248583422, "grad_norm": 0.9943096946904104, "learning_rate": 3.3959525578965957e-06, "loss": 0.0218, "step": 83140 }, { "epoch": 0.34692608757333243, "grad_norm": 0.774033000646815, "learning_rate": 3.3958504457894467e-06, "loss": 0.0284, "step": 83145 }, { "epoch": 0.34694695028832273, "grad_norm": 0.7141592427202744, "learning_rate": 3.3957483428928995e-06, "loss": 0.024, "step": 83150 }, { "epoch": 0.346967813003313, "grad_norm": 0.829660663466899, "learning_rate": 3.395646249205569e-06, "loss": 0.025, "step": 83155 }, { "epoch": 0.3469886757183033, "grad_norm": 1.2984824634897854, "learning_rate": 3.3955441647260716e-06, "loss": 0.0253, "step": 83160 }, { "epoch": 0.3470095384332936, "grad_norm": 0.7150024744025035, "learning_rate": 3.395442089453023e-06, "loss": 0.0291, "step": 83165 }, { "epoch": 0.3470304011482838, "grad_norm": 0.5569095437197785, "learning_rate": 3.395340023385039e-06, "loss": 0.0289, "step": 83170 }, { "epoch": 0.3470512638632741, "grad_norm": 0.8532659808464452, "learning_rate": 3.3952379665207368e-06, "loss": 0.0233, "step": 83175 }, { "epoch": 0.34707212657826436, "grad_norm": 0.8201928001256823, "learning_rate": 3.3951359188587334e-06, "loss": 0.0258, "step": 83180 }, { "epoch": 0.34709298929325466, "grad_norm": 1.0226399298085105, "learning_rate": 3.3950338803976457e-06, "loss": 0.0301, "step": 83185 }, { "epoch": 0.34711385200824496, "grad_norm": 0.670977840314061, "learning_rate": 3.3949318511360906e-06, "loss": 0.028, "step": 83190 }, { "epoch": 0.3471347147232352, "grad_norm": 0.8499584561888884, "learning_rate": 3.394829831072687e-06, "loss": 0.0229, "step": 83195 }, { "epoch": 0.3471555774382255, "grad_norm": 0.9762015941370418, "learning_rate": 3.3947278202060518e-06, "loss": 0.0187, "step": 83200 }, { "epoch": 0.3471764401532158, "grad_norm": 1.0714001876861188, "learning_rate": 3.3946258185348034e-06, "loss": 0.0272, "step": 83205 }, { "epoch": 0.34719730286820605, "grad_norm": 0.6988758847006236, "learning_rate": 3.3945238260575615e-06, "loss": 0.0255, "step": 83210 }, { "epoch": 0.34721816558319635, "grad_norm": 0.3571534510804378, "learning_rate": 3.3944218427729437e-06, "loss": 0.0325, "step": 83215 }, { "epoch": 0.3472390282981866, "grad_norm": 0.7891772791066082, "learning_rate": 3.3943198686795702e-06, "loss": 0.0294, "step": 83220 }, { "epoch": 0.3472598910131769, "grad_norm": 0.6753756831137764, "learning_rate": 3.3942179037760604e-06, "loss": 0.0308, "step": 83225 }, { "epoch": 0.3472807537281672, "grad_norm": 0.7790076315430899, "learning_rate": 3.3941159480610326e-06, "loss": 0.0295, "step": 83230 }, { "epoch": 0.34730161644315743, "grad_norm": 0.5440479356722355, "learning_rate": 3.3940140015331085e-06, "loss": 0.0374, "step": 83235 }, { "epoch": 0.34732247915814773, "grad_norm": 0.8474341819812735, "learning_rate": 3.393912064190908e-06, "loss": 0.0284, "step": 83240 }, { "epoch": 0.347343341873138, "grad_norm": 0.8705833763910996, "learning_rate": 3.393810136033052e-06, "loss": 0.0295, "step": 83245 }, { "epoch": 0.3473642045881283, "grad_norm": 0.8111448180921239, "learning_rate": 3.393708217058161e-06, "loss": 0.0341, "step": 83250 }, { "epoch": 0.3473850673031186, "grad_norm": 0.585096616248014, "learning_rate": 3.3936063072648555e-06, "loss": 0.0272, "step": 83255 }, { "epoch": 0.3474059300181088, "grad_norm": 1.423344644361808, "learning_rate": 3.393504406651759e-06, "loss": 0.0418, "step": 83260 }, { "epoch": 0.3474267927330991, "grad_norm": 1.028771432735293, "learning_rate": 3.3934025152174917e-06, "loss": 0.0265, "step": 83265 }, { "epoch": 0.34744765544808937, "grad_norm": 0.7550878499190514, "learning_rate": 3.393300632960676e-06, "loss": 0.0205, "step": 83270 }, { "epoch": 0.34746851816307966, "grad_norm": 0.8439026307210952, "learning_rate": 3.3931987598799345e-06, "loss": 0.0277, "step": 83275 }, { "epoch": 0.34748938087806996, "grad_norm": 0.8424783927551028, "learning_rate": 3.3930968959738897e-06, "loss": 0.0226, "step": 83280 }, { "epoch": 0.3475102435930602, "grad_norm": 0.9481263124292766, "learning_rate": 3.392995041241165e-06, "loss": 0.0257, "step": 83285 }, { "epoch": 0.3475311063080505, "grad_norm": 0.6522914785509192, "learning_rate": 3.3928931956803836e-06, "loss": 0.0218, "step": 83290 }, { "epoch": 0.3475519690230408, "grad_norm": 0.9348120084966334, "learning_rate": 3.3927913592901677e-06, "loss": 0.0255, "step": 83295 }, { "epoch": 0.34757283173803105, "grad_norm": 3.6800985912348634, "learning_rate": 3.3926895320691427e-06, "loss": 0.0335, "step": 83300 }, { "epoch": 0.34759369445302135, "grad_norm": 0.570117387827841, "learning_rate": 3.392587714015932e-06, "loss": 0.0201, "step": 83305 }, { "epoch": 0.3476145571680116, "grad_norm": 1.1010540650477594, "learning_rate": 3.392485905129161e-06, "loss": 0.0269, "step": 83310 }, { "epoch": 0.3476354198830019, "grad_norm": 1.4694628876187636, "learning_rate": 3.3923841054074522e-06, "loss": 0.032, "step": 83315 }, { "epoch": 0.3476562825979922, "grad_norm": 0.923190691859011, "learning_rate": 3.392282314849433e-06, "loss": 0.0366, "step": 83320 }, { "epoch": 0.34767714531298244, "grad_norm": 0.7591296619234855, "learning_rate": 3.392180533453727e-06, "loss": 0.029, "step": 83325 }, { "epoch": 0.34769800802797274, "grad_norm": 1.068129346031444, "learning_rate": 3.3920787612189614e-06, "loss": 0.0428, "step": 83330 }, { "epoch": 0.347718870742963, "grad_norm": 0.3680291228325206, "learning_rate": 3.39197699814376e-06, "loss": 0.0288, "step": 83335 }, { "epoch": 0.3477397334579533, "grad_norm": 1.2467580434762462, "learning_rate": 3.3918752442267504e-06, "loss": 0.0302, "step": 83340 }, { "epoch": 0.3477605961729436, "grad_norm": 0.6247652712330025, "learning_rate": 3.3917734994665593e-06, "loss": 0.0289, "step": 83345 }, { "epoch": 0.3477814588879338, "grad_norm": 1.139772167012032, "learning_rate": 3.3916717638618114e-06, "loss": 0.0482, "step": 83350 }, { "epoch": 0.3478023216029241, "grad_norm": 0.9049977651059731, "learning_rate": 3.3915700374111353e-06, "loss": 0.033, "step": 83355 }, { "epoch": 0.34782318431791437, "grad_norm": 0.7031220263995701, "learning_rate": 3.391468320113159e-06, "loss": 0.0305, "step": 83360 }, { "epoch": 0.34784404703290467, "grad_norm": 0.6013028482849502, "learning_rate": 3.3913666119665086e-06, "loss": 0.0212, "step": 83365 }, { "epoch": 0.34786490974789497, "grad_norm": 0.8689691228438565, "learning_rate": 3.3912649129698123e-06, "loss": 0.0262, "step": 83370 }, { "epoch": 0.3478857724628852, "grad_norm": 0.7070846087953249, "learning_rate": 3.3911632231216982e-06, "loss": 0.0246, "step": 83375 }, { "epoch": 0.3479066351778755, "grad_norm": 1.0272876043465355, "learning_rate": 3.3910615424207954e-06, "loss": 0.0304, "step": 83380 }, { "epoch": 0.3479274978928658, "grad_norm": 0.7554652042338518, "learning_rate": 3.390959870865732e-06, "loss": 0.0235, "step": 83385 }, { "epoch": 0.34794836060785606, "grad_norm": 0.7404621905361211, "learning_rate": 3.390858208455137e-06, "loss": 0.0342, "step": 83390 }, { "epoch": 0.34796922332284635, "grad_norm": 0.8580722337902037, "learning_rate": 3.3907565551876405e-06, "loss": 0.028, "step": 83395 }, { "epoch": 0.3479900860378366, "grad_norm": 1.124433571644995, "learning_rate": 3.3906549110618715e-06, "loss": 0.0395, "step": 83400 }, { "epoch": 0.3480109487528269, "grad_norm": 0.7047727688072036, "learning_rate": 3.3905532760764597e-06, "loss": 0.0246, "step": 83405 }, { "epoch": 0.3480318114678172, "grad_norm": 0.7016179959060985, "learning_rate": 3.390451650230035e-06, "loss": 0.0204, "step": 83410 }, { "epoch": 0.34805267418280744, "grad_norm": 0.9540584729471763, "learning_rate": 3.3903500335212286e-06, "loss": 0.0286, "step": 83415 }, { "epoch": 0.34807353689779774, "grad_norm": 0.648148725311575, "learning_rate": 3.3902484259486714e-06, "loss": 0.0198, "step": 83420 }, { "epoch": 0.348094399612788, "grad_norm": 0.7413789340710285, "learning_rate": 3.390146827510994e-06, "loss": 0.0271, "step": 83425 }, { "epoch": 0.3481152623277783, "grad_norm": 0.8300621421484083, "learning_rate": 3.3900452382068274e-06, "loss": 0.0235, "step": 83430 }, { "epoch": 0.3481361250427686, "grad_norm": 0.8449944739855816, "learning_rate": 3.3899436580348034e-06, "loss": 0.0236, "step": 83435 }, { "epoch": 0.34815698775775883, "grad_norm": 0.669501096386432, "learning_rate": 3.3898420869935545e-06, "loss": 0.0225, "step": 83440 }, { "epoch": 0.34817785047274913, "grad_norm": 0.7942475781773825, "learning_rate": 3.389740525081712e-06, "loss": 0.0264, "step": 83445 }, { "epoch": 0.3481987131877394, "grad_norm": 0.5976728353950453, "learning_rate": 3.389638972297909e-06, "loss": 0.0397, "step": 83450 }, { "epoch": 0.34821957590272967, "grad_norm": 1.0676114245245263, "learning_rate": 3.389537428640777e-06, "loss": 0.0323, "step": 83455 }, { "epoch": 0.34824043861771997, "grad_norm": 0.4355437396289043, "learning_rate": 3.389435894108952e-06, "loss": 0.0177, "step": 83460 }, { "epoch": 0.3482613013327102, "grad_norm": 0.3533433836048698, "learning_rate": 3.3893343687010642e-06, "loss": 0.0208, "step": 83465 }, { "epoch": 0.3482821640477005, "grad_norm": 0.6156599357723848, "learning_rate": 3.389232852415748e-06, "loss": 0.0294, "step": 83470 }, { "epoch": 0.3483030267626908, "grad_norm": 1.3013176343406647, "learning_rate": 3.389131345251639e-06, "loss": 0.0343, "step": 83475 }, { "epoch": 0.34832388947768106, "grad_norm": 0.48611640744491524, "learning_rate": 3.3890298472073685e-06, "loss": 0.0296, "step": 83480 }, { "epoch": 0.34834475219267136, "grad_norm": 0.6044580954597123, "learning_rate": 3.388928358281574e-06, "loss": 0.0247, "step": 83485 }, { "epoch": 0.3483656149076616, "grad_norm": 1.2160168787968493, "learning_rate": 3.3888268784728883e-06, "loss": 0.0289, "step": 83490 }, { "epoch": 0.3483864776226519, "grad_norm": 1.1975208371241635, "learning_rate": 3.388725407779947e-06, "loss": 0.0426, "step": 83495 }, { "epoch": 0.3484073403376422, "grad_norm": 0.42421042779310836, "learning_rate": 3.3886239462013854e-06, "loss": 0.0341, "step": 83500 }, { "epoch": 0.34842820305263245, "grad_norm": 0.9386378813066566, "learning_rate": 3.388522493735839e-06, "loss": 0.0316, "step": 83505 }, { "epoch": 0.34844906576762275, "grad_norm": 0.8244476210154543, "learning_rate": 3.3884210503819436e-06, "loss": 0.0215, "step": 83510 }, { "epoch": 0.348469928482613, "grad_norm": 0.9779154751718856, "learning_rate": 3.388319616138336e-06, "loss": 0.0302, "step": 83515 }, { "epoch": 0.3484907911976033, "grad_norm": 0.7765799136625974, "learning_rate": 3.388218191003652e-06, "loss": 0.0277, "step": 83520 }, { "epoch": 0.3485116539125936, "grad_norm": 0.9072202510422794, "learning_rate": 3.3881167749765293e-06, "loss": 0.0341, "step": 83525 }, { "epoch": 0.34853251662758383, "grad_norm": 1.0750261997477406, "learning_rate": 3.3880153680556033e-06, "loss": 0.0208, "step": 83530 }, { "epoch": 0.34855337934257413, "grad_norm": 0.633327462105173, "learning_rate": 3.3879139702395132e-06, "loss": 0.032, "step": 83535 }, { "epoch": 0.3485742420575644, "grad_norm": 0.3481695006556632, "learning_rate": 3.387812581526895e-06, "loss": 0.0243, "step": 83540 }, { "epoch": 0.3485951047725547, "grad_norm": 0.8795289271015089, "learning_rate": 3.3877112019163872e-06, "loss": 0.0268, "step": 83545 }, { "epoch": 0.348615967487545, "grad_norm": 0.41330854449486637, "learning_rate": 3.3876098314066293e-06, "loss": 0.0246, "step": 83550 }, { "epoch": 0.3486368302025352, "grad_norm": 0.417872968169423, "learning_rate": 3.3875084699962576e-06, "loss": 0.0324, "step": 83555 }, { "epoch": 0.3486576929175255, "grad_norm": 0.7954469183192613, "learning_rate": 3.387407117683912e-06, "loss": 0.0277, "step": 83560 }, { "epoch": 0.3486785556325158, "grad_norm": 1.022773636568858, "learning_rate": 3.3873057744682315e-06, "loss": 0.0305, "step": 83565 }, { "epoch": 0.34869941834750606, "grad_norm": 0.8934754552149775, "learning_rate": 3.3872044403478556e-06, "loss": 0.0386, "step": 83570 }, { "epoch": 0.34872028106249636, "grad_norm": 0.605624389821438, "learning_rate": 3.3871031153214233e-06, "loss": 0.0338, "step": 83575 }, { "epoch": 0.3487411437774866, "grad_norm": 1.0214341882545606, "learning_rate": 3.387001799387575e-06, "loss": 0.027, "step": 83580 }, { "epoch": 0.3487620064924769, "grad_norm": 0.4448510464341378, "learning_rate": 3.38690049254495e-06, "loss": 0.0282, "step": 83585 }, { "epoch": 0.3487828692074672, "grad_norm": 0.7516549214428544, "learning_rate": 3.3867991947921907e-06, "loss": 0.0175, "step": 83590 }, { "epoch": 0.34880373192245745, "grad_norm": 0.6157084592825456, "learning_rate": 3.386697906127936e-06, "loss": 0.03, "step": 83595 }, { "epoch": 0.34882459463744775, "grad_norm": 0.5886194522072875, "learning_rate": 3.386596626550827e-06, "loss": 0.0267, "step": 83600 }, { "epoch": 0.348845457352438, "grad_norm": 1.0047522132880093, "learning_rate": 3.3864953560595064e-06, "loss": 0.0274, "step": 83605 }, { "epoch": 0.3488663200674283, "grad_norm": 0.5194391022281213, "learning_rate": 3.3863940946526146e-06, "loss": 0.0246, "step": 83610 }, { "epoch": 0.3488871827824186, "grad_norm": 0.6132589820147728, "learning_rate": 3.3862928423287942e-06, "loss": 0.027, "step": 83615 }, { "epoch": 0.34890804549740884, "grad_norm": 1.0146523362911473, "learning_rate": 3.3861915990866875e-06, "loss": 0.0329, "step": 83620 }, { "epoch": 0.34892890821239914, "grad_norm": 1.5888546289997332, "learning_rate": 3.3860903649249357e-06, "loss": 0.0275, "step": 83625 }, { "epoch": 0.3489497709273894, "grad_norm": 0.5070574475070534, "learning_rate": 3.3859891398421824e-06, "loss": 0.0257, "step": 83630 }, { "epoch": 0.3489706336423797, "grad_norm": 0.9314629439057516, "learning_rate": 3.3858879238370714e-06, "loss": 0.0318, "step": 83635 }, { "epoch": 0.34899149635737, "grad_norm": 0.8557498785843465, "learning_rate": 3.385786716908244e-06, "loss": 0.035, "step": 83640 }, { "epoch": 0.3490123590723602, "grad_norm": 0.7968889618774281, "learning_rate": 3.3856855190543458e-06, "loss": 0.0255, "step": 83645 }, { "epoch": 0.3490332217873505, "grad_norm": 0.5396894085553939, "learning_rate": 3.3855843302740193e-06, "loss": 0.025, "step": 83650 }, { "epoch": 0.3490540845023408, "grad_norm": 1.0304516510153405, "learning_rate": 3.3854831505659104e-06, "loss": 0.033, "step": 83655 }, { "epoch": 0.34907494721733107, "grad_norm": 1.61724819944487, "learning_rate": 3.385381979928661e-06, "loss": 0.0293, "step": 83660 }, { "epoch": 0.34909580993232137, "grad_norm": 0.579821062324051, "learning_rate": 3.3852808183609183e-06, "loss": 0.0244, "step": 83665 }, { "epoch": 0.3491166726473116, "grad_norm": 0.6252883201608413, "learning_rate": 3.385179665861326e-06, "loss": 0.0213, "step": 83670 }, { "epoch": 0.3491375353623019, "grad_norm": 0.6804067895119448, "learning_rate": 3.385078522428528e-06, "loss": 0.033, "step": 83675 }, { "epoch": 0.3491583980772922, "grad_norm": 0.6523238911827307, "learning_rate": 3.384977388061173e-06, "loss": 0.0276, "step": 83680 }, { "epoch": 0.34917926079228245, "grad_norm": 0.7250214766170135, "learning_rate": 3.3848762627579052e-06, "loss": 0.0264, "step": 83685 }, { "epoch": 0.34920012350727275, "grad_norm": 0.632880822538893, "learning_rate": 3.384775146517371e-06, "loss": 0.0261, "step": 83690 }, { "epoch": 0.349220986222263, "grad_norm": 1.100562382278883, "learning_rate": 3.3846740393382168e-06, "loss": 0.0267, "step": 83695 }, { "epoch": 0.3492418489372533, "grad_norm": 0.9097927759128007, "learning_rate": 3.3845729412190886e-06, "loss": 0.0269, "step": 83700 }, { "epoch": 0.3492627116522436, "grad_norm": 0.743787218534503, "learning_rate": 3.3844718521586344e-06, "loss": 0.0579, "step": 83705 }, { "epoch": 0.34928357436723384, "grad_norm": 0.6596010968788464, "learning_rate": 3.384370772155501e-06, "loss": 0.0251, "step": 83710 }, { "epoch": 0.34930443708222414, "grad_norm": 1.162716205075025, "learning_rate": 3.3842697012083363e-06, "loss": 0.0281, "step": 83715 }, { "epoch": 0.3493252997972144, "grad_norm": 0.5178204310795184, "learning_rate": 3.384168639315788e-06, "loss": 0.0261, "step": 83720 }, { "epoch": 0.3493461625122047, "grad_norm": 0.6523820398245369, "learning_rate": 3.3840675864765033e-06, "loss": 0.0304, "step": 83725 }, { "epoch": 0.349367025227195, "grad_norm": 0.755084779227207, "learning_rate": 3.383966542689132e-06, "loss": 0.0338, "step": 83730 }, { "epoch": 0.3493878879421852, "grad_norm": 0.7587298821174469, "learning_rate": 3.3838655079523224e-06, "loss": 0.0331, "step": 83735 }, { "epoch": 0.3494087506571755, "grad_norm": 0.782963958458701, "learning_rate": 3.3837644822647234e-06, "loss": 0.0317, "step": 83740 }, { "epoch": 0.3494296133721658, "grad_norm": 0.7200792156526236, "learning_rate": 3.3836634656249835e-06, "loss": 0.0314, "step": 83745 }, { "epoch": 0.34945047608715607, "grad_norm": 0.7452784302928795, "learning_rate": 3.3835624580317532e-06, "loss": 0.0295, "step": 83750 }, { "epoch": 0.34947133880214637, "grad_norm": 0.6021975907332398, "learning_rate": 3.3834614594836824e-06, "loss": 0.0266, "step": 83755 }, { "epoch": 0.3494922015171366, "grad_norm": 0.7543304553815101, "learning_rate": 3.3833604699794205e-06, "loss": 0.031, "step": 83760 }, { "epoch": 0.3495130642321269, "grad_norm": 0.6501877912750519, "learning_rate": 3.3832594895176178e-06, "loss": 0.0227, "step": 83765 }, { "epoch": 0.3495339269471172, "grad_norm": 1.0428903922300752, "learning_rate": 3.383158518096926e-06, "loss": 0.0254, "step": 83770 }, { "epoch": 0.34955478966210746, "grad_norm": 0.8711419309505891, "learning_rate": 3.383057555715995e-06, "loss": 0.0227, "step": 83775 }, { "epoch": 0.34957565237709776, "grad_norm": 0.5359317903671889, "learning_rate": 3.3829566023734767e-06, "loss": 0.0262, "step": 83780 }, { "epoch": 0.349596515092088, "grad_norm": 1.1014068598170603, "learning_rate": 3.3828556580680224e-06, "loss": 0.0315, "step": 83785 }, { "epoch": 0.3496173778070783, "grad_norm": 0.8203321853088484, "learning_rate": 3.3827547227982836e-06, "loss": 0.0283, "step": 83790 }, { "epoch": 0.3496382405220686, "grad_norm": 0.5256465378437801, "learning_rate": 3.3826537965629126e-06, "loss": 0.0275, "step": 83795 }, { "epoch": 0.34965910323705884, "grad_norm": 1.1876333674764847, "learning_rate": 3.3825528793605615e-06, "loss": 0.0377, "step": 83800 }, { "epoch": 0.34967996595204914, "grad_norm": 1.1621604397501386, "learning_rate": 3.382451971189884e-06, "loss": 0.033, "step": 83805 }, { "epoch": 0.3497008286670394, "grad_norm": 0.5042313900994114, "learning_rate": 3.382351072049531e-06, "loss": 0.0243, "step": 83810 }, { "epoch": 0.3497216913820297, "grad_norm": 0.541344396205816, "learning_rate": 3.3822501819381587e-06, "loss": 0.0279, "step": 83815 }, { "epoch": 0.34974255409702, "grad_norm": 2.1680351310232955, "learning_rate": 3.3821493008544173e-06, "loss": 0.0342, "step": 83820 }, { "epoch": 0.34976341681201023, "grad_norm": 0.8663152977276051, "learning_rate": 3.382048428796963e-06, "loss": 0.0291, "step": 83825 }, { "epoch": 0.34978427952700053, "grad_norm": 0.5494803180637565, "learning_rate": 3.381947565764448e-06, "loss": 0.0247, "step": 83830 }, { "epoch": 0.34980514224199083, "grad_norm": 0.5064799622835467, "learning_rate": 3.381846711755528e-06, "loss": 0.0237, "step": 83835 }, { "epoch": 0.3498260049569811, "grad_norm": 0.7785663213420226, "learning_rate": 3.381745866768857e-06, "loss": 0.0246, "step": 83840 }, { "epoch": 0.3498468676719714, "grad_norm": 0.7547548962010614, "learning_rate": 3.3816450308030906e-06, "loss": 0.0247, "step": 83845 }, { "epoch": 0.3498677303869616, "grad_norm": 0.8583116082869506, "learning_rate": 3.3815442038568823e-06, "loss": 0.0281, "step": 83850 }, { "epoch": 0.3498885931019519, "grad_norm": 0.8556636173540223, "learning_rate": 3.381443385928889e-06, "loss": 0.0282, "step": 83855 }, { "epoch": 0.3499094558169422, "grad_norm": 0.9226213767737014, "learning_rate": 3.3813425770177664e-06, "loss": 0.0324, "step": 83860 }, { "epoch": 0.34993031853193246, "grad_norm": 1.204106126843164, "learning_rate": 3.3812417771221703e-06, "loss": 0.0355, "step": 83865 }, { "epoch": 0.34995118124692276, "grad_norm": 1.7023832993926733, "learning_rate": 3.3811409862407563e-06, "loss": 0.0293, "step": 83870 }, { "epoch": 0.349972043961913, "grad_norm": 1.1070748984665943, "learning_rate": 3.3810402043721825e-06, "loss": 0.0279, "step": 83875 }, { "epoch": 0.3499929066769033, "grad_norm": 0.7970999429588819, "learning_rate": 3.380939431515104e-06, "loss": 0.0307, "step": 83880 }, { "epoch": 0.3500137693918936, "grad_norm": 1.0168566587860572, "learning_rate": 3.3808386676681786e-06, "loss": 0.0275, "step": 83885 }, { "epoch": 0.35003463210688385, "grad_norm": 0.5694952953037695, "learning_rate": 3.380737912830064e-06, "loss": 0.0198, "step": 83890 }, { "epoch": 0.35005549482187415, "grad_norm": 0.3443606906257938, "learning_rate": 3.3806371669994174e-06, "loss": 0.0224, "step": 83895 }, { "epoch": 0.3500763575368644, "grad_norm": 0.5252279239125788, "learning_rate": 3.3805364301748974e-06, "loss": 0.0323, "step": 83900 }, { "epoch": 0.3500972202518547, "grad_norm": 0.7938978951669932, "learning_rate": 3.380435702355162e-06, "loss": 0.0241, "step": 83905 }, { "epoch": 0.350118082966845, "grad_norm": 0.8791606024121013, "learning_rate": 3.38033498353887e-06, "loss": 0.0288, "step": 83910 }, { "epoch": 0.35013894568183523, "grad_norm": 0.7535835988983826, "learning_rate": 3.380234273724679e-06, "loss": 0.0243, "step": 83915 }, { "epoch": 0.35015980839682553, "grad_norm": 1.2157030888046698, "learning_rate": 3.3801335729112493e-06, "loss": 0.0243, "step": 83920 }, { "epoch": 0.35018067111181583, "grad_norm": 0.6112005128090627, "learning_rate": 3.38003288109724e-06, "loss": 0.0295, "step": 83925 }, { "epoch": 0.3502015338268061, "grad_norm": 0.9473442140769684, "learning_rate": 3.3799321982813104e-06, "loss": 0.0287, "step": 83930 }, { "epoch": 0.3502223965417964, "grad_norm": 0.2567785342832165, "learning_rate": 3.3798315244621215e-06, "loss": 0.0202, "step": 83935 }, { "epoch": 0.3502432592567866, "grad_norm": 1.058643916346969, "learning_rate": 3.3797308596383315e-06, "loss": 0.0311, "step": 83940 }, { "epoch": 0.3502641219717769, "grad_norm": 0.6081101308878187, "learning_rate": 3.3796302038086033e-06, "loss": 0.0307, "step": 83945 }, { "epoch": 0.3502849846867672, "grad_norm": 1.1094435056658798, "learning_rate": 3.379529556971596e-06, "loss": 0.0256, "step": 83950 }, { "epoch": 0.35030584740175746, "grad_norm": 1.13409759165616, "learning_rate": 3.3794289191259705e-06, "loss": 0.0433, "step": 83955 }, { "epoch": 0.35032671011674776, "grad_norm": 0.5176122536381952, "learning_rate": 3.379328290270389e-06, "loss": 0.0269, "step": 83960 }, { "epoch": 0.350347572831738, "grad_norm": 0.8671333093879959, "learning_rate": 3.3792276704035133e-06, "loss": 0.033, "step": 83965 }, { "epoch": 0.3503684355467283, "grad_norm": 1.210004968437563, "learning_rate": 3.3791270595240038e-06, "loss": 0.0341, "step": 83970 }, { "epoch": 0.3503892982617186, "grad_norm": 0.7181551668519083, "learning_rate": 3.379026457630525e-06, "loss": 0.0303, "step": 83975 }, { "epoch": 0.35041016097670885, "grad_norm": 1.1777708869928085, "learning_rate": 3.378925864721737e-06, "loss": 0.0282, "step": 83980 }, { "epoch": 0.35043102369169915, "grad_norm": 1.1773515789881752, "learning_rate": 3.3788252807963033e-06, "loss": 0.0312, "step": 83985 }, { "epoch": 0.3504518864066894, "grad_norm": 0.45701268122194455, "learning_rate": 3.3787247058528877e-06, "loss": 0.0268, "step": 83990 }, { "epoch": 0.3504727491216797, "grad_norm": 0.4233638027755921, "learning_rate": 3.378624139890153e-06, "loss": 0.029, "step": 83995 }, { "epoch": 0.35049361183667, "grad_norm": 0.7422130239531736, "learning_rate": 3.3785235829067625e-06, "loss": 0.0336, "step": 84000 }, { "epoch": 0.35051447455166024, "grad_norm": 0.8509587679157924, "learning_rate": 3.3784230349013803e-06, "loss": 0.0255, "step": 84005 }, { "epoch": 0.35053533726665054, "grad_norm": 0.7373833079106654, "learning_rate": 3.3783224958726695e-06, "loss": 0.0276, "step": 84010 }, { "epoch": 0.35055619998164084, "grad_norm": 1.1076734171655573, "learning_rate": 3.378221965819296e-06, "loss": 0.0247, "step": 84015 }, { "epoch": 0.3505770626966311, "grad_norm": 0.8959498071683594, "learning_rate": 3.378121444739924e-06, "loss": 0.0295, "step": 84020 }, { "epoch": 0.3505979254116214, "grad_norm": 0.9089450642836447, "learning_rate": 3.3780209326332188e-06, "loss": 0.0253, "step": 84025 }, { "epoch": 0.3506187881266116, "grad_norm": 0.8398469256248218, "learning_rate": 3.3779204294978442e-06, "loss": 0.0365, "step": 84030 }, { "epoch": 0.3506396508416019, "grad_norm": 0.6965698090135382, "learning_rate": 3.377819935332467e-06, "loss": 0.0365, "step": 84035 }, { "epoch": 0.3506605135565922, "grad_norm": 1.256707524483468, "learning_rate": 3.3777194501357526e-06, "loss": 0.0244, "step": 84040 }, { "epoch": 0.35068137627158247, "grad_norm": 0.5575399672274862, "learning_rate": 3.3776189739063665e-06, "loss": 0.0342, "step": 84045 }, { "epoch": 0.35070223898657277, "grad_norm": 1.0579867451390486, "learning_rate": 3.377518506642976e-06, "loss": 0.0287, "step": 84050 }, { "epoch": 0.350723101701563, "grad_norm": 0.7631911334654381, "learning_rate": 3.377418048344247e-06, "loss": 0.022, "step": 84055 }, { "epoch": 0.3507439644165533, "grad_norm": 0.6996318747007926, "learning_rate": 3.377317599008847e-06, "loss": 0.0305, "step": 84060 }, { "epoch": 0.3507648271315436, "grad_norm": 1.0672726146440228, "learning_rate": 3.377217158635443e-06, "loss": 0.037, "step": 84065 }, { "epoch": 0.35078568984653385, "grad_norm": 0.4813143789515288, "learning_rate": 3.377116727222702e-06, "loss": 0.0266, "step": 84070 }, { "epoch": 0.35080655256152415, "grad_norm": 0.3830826159308478, "learning_rate": 3.377016304769292e-06, "loss": 0.0197, "step": 84075 }, { "epoch": 0.3508274152765144, "grad_norm": 0.5873831038887214, "learning_rate": 3.376915891273881e-06, "loss": 0.0316, "step": 84080 }, { "epoch": 0.3508482779915047, "grad_norm": 0.7796761782582967, "learning_rate": 3.376815486735137e-06, "loss": 0.0282, "step": 84085 }, { "epoch": 0.350869140706495, "grad_norm": 0.5280906018831762, "learning_rate": 3.3767150911517294e-06, "loss": 0.0278, "step": 84090 }, { "epoch": 0.35089000342148524, "grad_norm": 0.7595233512368887, "learning_rate": 3.3766147045223262e-06, "loss": 0.0259, "step": 84095 }, { "epoch": 0.35091086613647554, "grad_norm": 0.527813098260647, "learning_rate": 3.376514326845597e-06, "loss": 0.0301, "step": 84100 }, { "epoch": 0.35093172885146584, "grad_norm": 0.6274242885780387, "learning_rate": 3.3764139581202105e-06, "loss": 0.0344, "step": 84105 }, { "epoch": 0.3509525915664561, "grad_norm": 1.4228812015158685, "learning_rate": 3.376313598344837e-06, "loss": 0.0282, "step": 84110 }, { "epoch": 0.3509734542814464, "grad_norm": 0.9439891374277476, "learning_rate": 3.3762132475181464e-06, "loss": 0.0255, "step": 84115 }, { "epoch": 0.35099431699643663, "grad_norm": 1.106030718932594, "learning_rate": 3.376112905638809e-06, "loss": 0.0323, "step": 84120 }, { "epoch": 0.35101517971142693, "grad_norm": 0.7343198246619623, "learning_rate": 3.3760125727054944e-06, "loss": 0.0225, "step": 84125 }, { "epoch": 0.3510360424264172, "grad_norm": 0.7393348084889321, "learning_rate": 3.3759122487168746e-06, "loss": 0.0231, "step": 84130 }, { "epoch": 0.35105690514140747, "grad_norm": 0.7221839903877831, "learning_rate": 3.37581193367162e-06, "loss": 0.0276, "step": 84135 }, { "epoch": 0.35107776785639777, "grad_norm": 0.9037446820528814, "learning_rate": 3.3757116275684015e-06, "loss": 0.0303, "step": 84140 }, { "epoch": 0.351098630571388, "grad_norm": 1.3353398826380372, "learning_rate": 3.375611330405891e-06, "loss": 0.0371, "step": 84145 }, { "epoch": 0.3511194932863783, "grad_norm": 0.7188381211178839, "learning_rate": 3.375511042182762e-06, "loss": 0.0303, "step": 84150 }, { "epoch": 0.3511403560013686, "grad_norm": 0.49729072895701665, "learning_rate": 3.3754107628976844e-06, "loss": 0.0307, "step": 84155 }, { "epoch": 0.35116121871635886, "grad_norm": 1.1391057113708127, "learning_rate": 3.3753104925493314e-06, "loss": 0.038, "step": 84160 }, { "epoch": 0.35118208143134916, "grad_norm": 0.9880733321027744, "learning_rate": 3.375210231136375e-06, "loss": 0.0469, "step": 84165 }, { "epoch": 0.3512029441463394, "grad_norm": 0.9055243483117779, "learning_rate": 3.3751099786574903e-06, "loss": 0.0382, "step": 84170 }, { "epoch": 0.3512238068613297, "grad_norm": 0.40497585105357825, "learning_rate": 3.375009735111348e-06, "loss": 0.0353, "step": 84175 }, { "epoch": 0.35124466957632, "grad_norm": 0.4481594939925941, "learning_rate": 3.3749095004966237e-06, "loss": 0.0191, "step": 84180 }, { "epoch": 0.35126553229131025, "grad_norm": 0.8914548319780956, "learning_rate": 3.3748092748119898e-06, "loss": 0.029, "step": 84185 }, { "epoch": 0.35128639500630054, "grad_norm": 0.687141808733426, "learning_rate": 3.374709058056121e-06, "loss": 0.0387, "step": 84190 }, { "epoch": 0.35130725772129084, "grad_norm": 1.1187938359919234, "learning_rate": 3.3746088502276917e-06, "loss": 0.0337, "step": 84195 }, { "epoch": 0.3513281204362811, "grad_norm": 0.9418910317907614, "learning_rate": 3.3745086513253765e-06, "loss": 0.0247, "step": 84200 }, { "epoch": 0.3513489831512714, "grad_norm": 1.3103550809130673, "learning_rate": 3.37440846134785e-06, "loss": 0.0284, "step": 84205 }, { "epoch": 0.35136984586626163, "grad_norm": 1.6232533499586335, "learning_rate": 3.3743082802937874e-06, "loss": 0.0327, "step": 84210 }, { "epoch": 0.35139070858125193, "grad_norm": 0.9423868464212118, "learning_rate": 3.374208108161864e-06, "loss": 0.0328, "step": 84215 }, { "epoch": 0.35141157129624223, "grad_norm": 1.0168082088212893, "learning_rate": 3.374107944950757e-06, "loss": 0.0288, "step": 84220 }, { "epoch": 0.3514324340112325, "grad_norm": 0.7348518620101617, "learning_rate": 3.37400779065914e-06, "loss": 0.0207, "step": 84225 }, { "epoch": 0.3514532967262228, "grad_norm": 0.9778641759385067, "learning_rate": 3.373907645285691e-06, "loss": 0.0236, "step": 84230 }, { "epoch": 0.351474159441213, "grad_norm": 1.03168716162305, "learning_rate": 3.3738075088290868e-06, "loss": 0.0291, "step": 84235 }, { "epoch": 0.3514950221562033, "grad_norm": 0.7736324088398961, "learning_rate": 3.3737073812880026e-06, "loss": 0.0375, "step": 84240 }, { "epoch": 0.3515158848711936, "grad_norm": 0.8211115005165214, "learning_rate": 3.3736072626611173e-06, "loss": 0.0251, "step": 84245 }, { "epoch": 0.35153674758618386, "grad_norm": 0.6326396959739339, "learning_rate": 3.373507152947107e-06, "loss": 0.028, "step": 84250 }, { "epoch": 0.35155761030117416, "grad_norm": 0.5165859461649163, "learning_rate": 3.37340705214465e-06, "loss": 0.025, "step": 84255 }, { "epoch": 0.3515784730161644, "grad_norm": 1.0184113202973182, "learning_rate": 3.3733069602524237e-06, "loss": 0.0327, "step": 84260 }, { "epoch": 0.3515993357311547, "grad_norm": 0.8371334271773233, "learning_rate": 3.373206877269106e-06, "loss": 0.0182, "step": 84265 }, { "epoch": 0.351620198446145, "grad_norm": 0.856937643189651, "learning_rate": 3.3731068031933778e-06, "loss": 0.0215, "step": 84270 }, { "epoch": 0.35164106116113525, "grad_norm": 0.7130770388120706, "learning_rate": 3.3730067380239145e-06, "loss": 0.0269, "step": 84275 }, { "epoch": 0.35166192387612555, "grad_norm": 1.2795554843544739, "learning_rate": 3.3729066817593976e-06, "loss": 0.0287, "step": 84280 }, { "epoch": 0.35168278659111585, "grad_norm": 0.6198646466747346, "learning_rate": 3.372806634398505e-06, "loss": 0.0229, "step": 84285 }, { "epoch": 0.3517036493061061, "grad_norm": 0.9939810371671833, "learning_rate": 3.3727065959399175e-06, "loss": 0.0325, "step": 84290 }, { "epoch": 0.3517245120210964, "grad_norm": 0.8229881723759251, "learning_rate": 3.3726065663823134e-06, "loss": 0.0281, "step": 84295 }, { "epoch": 0.35174537473608664, "grad_norm": 0.575653968076005, "learning_rate": 3.3725065457243743e-06, "loss": 0.0247, "step": 84300 }, { "epoch": 0.35176623745107694, "grad_norm": 1.4829532943960135, "learning_rate": 3.37240653396478e-06, "loss": 0.0304, "step": 84305 }, { "epoch": 0.35178710016606723, "grad_norm": 0.5449745763132027, "learning_rate": 3.37230653110221e-06, "loss": 0.0258, "step": 84310 }, { "epoch": 0.3518079628810575, "grad_norm": 0.9204072942673918, "learning_rate": 3.3722065371353474e-06, "loss": 0.0279, "step": 84315 }, { "epoch": 0.3518288255960478, "grad_norm": 0.5242953819379338, "learning_rate": 3.3721065520628726e-06, "loss": 0.0304, "step": 84320 }, { "epoch": 0.351849688311038, "grad_norm": 0.9261456431575916, "learning_rate": 3.3720065758834663e-06, "loss": 0.0248, "step": 84325 }, { "epoch": 0.3518705510260283, "grad_norm": 0.4619714048032417, "learning_rate": 3.371906608595811e-06, "loss": 0.0265, "step": 84330 }, { "epoch": 0.3518914137410186, "grad_norm": 0.8718530557272918, "learning_rate": 3.3718066501985884e-06, "loss": 0.0266, "step": 84335 }, { "epoch": 0.35191227645600887, "grad_norm": 1.1850654014968487, "learning_rate": 3.3717067006904826e-06, "loss": 0.0242, "step": 84340 }, { "epoch": 0.35193313917099917, "grad_norm": 1.2823268642217862, "learning_rate": 3.371606760070173e-06, "loss": 0.0305, "step": 84345 }, { "epoch": 0.3519540018859894, "grad_norm": 0.8678652167341449, "learning_rate": 3.3715068283363445e-06, "loss": 0.0317, "step": 84350 }, { "epoch": 0.3519748646009797, "grad_norm": 1.3162394083560751, "learning_rate": 3.37140690548768e-06, "loss": 0.0338, "step": 84355 }, { "epoch": 0.35199572731597, "grad_norm": 0.7058766086298144, "learning_rate": 3.3713069915228623e-06, "loss": 0.0214, "step": 84360 }, { "epoch": 0.35201659003096025, "grad_norm": 0.642938162271759, "learning_rate": 3.3712070864405767e-06, "loss": 0.0283, "step": 84365 }, { "epoch": 0.35203745274595055, "grad_norm": 0.6500949866142279, "learning_rate": 3.3711071902395043e-06, "loss": 0.0238, "step": 84370 }, { "epoch": 0.35205831546094085, "grad_norm": 0.7170568986121542, "learning_rate": 3.371007302918333e-06, "loss": 0.0353, "step": 84375 }, { "epoch": 0.3520791781759311, "grad_norm": 0.4921497791523425, "learning_rate": 3.370907424475744e-06, "loss": 0.0212, "step": 84380 }, { "epoch": 0.3521000408909214, "grad_norm": 0.916347364299662, "learning_rate": 3.370807554910423e-06, "loss": 0.0411, "step": 84385 }, { "epoch": 0.35212090360591164, "grad_norm": 0.6487091068319653, "learning_rate": 3.3707076942210565e-06, "loss": 0.0208, "step": 84390 }, { "epoch": 0.35214176632090194, "grad_norm": 0.8205502624223011, "learning_rate": 3.3706078424063283e-06, "loss": 0.03, "step": 84395 }, { "epoch": 0.35216262903589224, "grad_norm": 1.5137865123688459, "learning_rate": 3.370507999464924e-06, "loss": 0.0322, "step": 84400 }, { "epoch": 0.3521834917508825, "grad_norm": 0.7657938946435706, "learning_rate": 3.37040816539553e-06, "loss": 0.0324, "step": 84405 }, { "epoch": 0.3522043544658728, "grad_norm": 0.6807169535892869, "learning_rate": 3.3703083401968334e-06, "loss": 0.0204, "step": 84410 }, { "epoch": 0.352225217180863, "grad_norm": 1.3453391735345197, "learning_rate": 3.3702085238675185e-06, "loss": 0.0288, "step": 84415 }, { "epoch": 0.3522460798958533, "grad_norm": 0.4839070225188493, "learning_rate": 3.3701087164062734e-06, "loss": 0.0257, "step": 84420 }, { "epoch": 0.3522669426108436, "grad_norm": 0.8059676236821589, "learning_rate": 3.370008917811785e-06, "loss": 0.0266, "step": 84425 }, { "epoch": 0.35228780532583387, "grad_norm": 0.45593333464338925, "learning_rate": 3.36990912808274e-06, "loss": 0.0246, "step": 84430 }, { "epoch": 0.35230866804082417, "grad_norm": 1.3410371810141815, "learning_rate": 3.369809347217827e-06, "loss": 0.0256, "step": 84435 }, { "epoch": 0.3523295307558144, "grad_norm": 0.704727375990376, "learning_rate": 3.3697095752157315e-06, "loss": 0.0236, "step": 84440 }, { "epoch": 0.3523503934708047, "grad_norm": 1.0300382549521288, "learning_rate": 3.3696098120751436e-06, "loss": 0.0324, "step": 84445 }, { "epoch": 0.352371256185795, "grad_norm": 0.8305294026055046, "learning_rate": 3.3695100577947507e-06, "loss": 0.024, "step": 84450 }, { "epoch": 0.35239211890078526, "grad_norm": 0.9645766470562018, "learning_rate": 3.3694103123732417e-06, "loss": 0.0354, "step": 84455 }, { "epoch": 0.35241298161577556, "grad_norm": 0.8378543411879518, "learning_rate": 3.3693105758093057e-06, "loss": 0.0263, "step": 84460 }, { "epoch": 0.35243384433076586, "grad_norm": 0.7670054604891003, "learning_rate": 3.3692108481016315e-06, "loss": 0.0327, "step": 84465 }, { "epoch": 0.3524547070457561, "grad_norm": 1.0395354185617514, "learning_rate": 3.3691111292489084e-06, "loss": 0.0272, "step": 84470 }, { "epoch": 0.3524755697607464, "grad_norm": 0.8600704506215642, "learning_rate": 3.3690114192498263e-06, "loss": 0.0279, "step": 84475 }, { "epoch": 0.35249643247573664, "grad_norm": 0.8347548199140362, "learning_rate": 3.368911718103075e-06, "loss": 0.0327, "step": 84480 }, { "epoch": 0.35251729519072694, "grad_norm": 0.5937363467439364, "learning_rate": 3.3688120258073454e-06, "loss": 0.0275, "step": 84485 }, { "epoch": 0.35253815790571724, "grad_norm": 0.9757172215442081, "learning_rate": 3.368712342361326e-06, "loss": 0.0234, "step": 84490 }, { "epoch": 0.3525590206207075, "grad_norm": 0.29861189606299776, "learning_rate": 3.368612667763709e-06, "loss": 0.0228, "step": 84495 }, { "epoch": 0.3525798833356978, "grad_norm": 0.9144342712606884, "learning_rate": 3.3685130020131866e-06, "loss": 0.0287, "step": 84500 }, { "epoch": 0.35260074605068803, "grad_norm": 1.6243183822309237, "learning_rate": 3.3684133451084484e-06, "loss": 0.0256, "step": 84505 }, { "epoch": 0.35262160876567833, "grad_norm": 0.43893556780502674, "learning_rate": 3.368313697048186e-06, "loss": 0.0235, "step": 84510 }, { "epoch": 0.35264247148066863, "grad_norm": 1.3312510550574654, "learning_rate": 3.3682140578310925e-06, "loss": 0.0334, "step": 84515 }, { "epoch": 0.3526633341956589, "grad_norm": 0.7322590490086428, "learning_rate": 3.3681144274558586e-06, "loss": 0.0225, "step": 84520 }, { "epoch": 0.3526841969106492, "grad_norm": 0.7999027018872368, "learning_rate": 3.3680148059211776e-06, "loss": 0.018, "step": 84525 }, { "epoch": 0.3527050596256394, "grad_norm": 0.5128672734526584, "learning_rate": 3.3679151932257414e-06, "loss": 0.0222, "step": 84530 }, { "epoch": 0.3527259223406297, "grad_norm": 0.7078950457263917, "learning_rate": 3.3678155893682436e-06, "loss": 0.0243, "step": 84535 }, { "epoch": 0.35274678505562, "grad_norm": 0.3684372927648888, "learning_rate": 3.367715994347377e-06, "loss": 0.0363, "step": 84540 }, { "epoch": 0.35276764777061026, "grad_norm": 0.8714596748431218, "learning_rate": 3.367616408161835e-06, "loss": 0.0225, "step": 84545 }, { "epoch": 0.35278851048560056, "grad_norm": 0.8644099614410816, "learning_rate": 3.3675168308103117e-06, "loss": 0.0202, "step": 84550 }, { "epoch": 0.35280937320059086, "grad_norm": 0.719512676351279, "learning_rate": 3.367417262291501e-06, "loss": 0.0301, "step": 84555 }, { "epoch": 0.3528302359155811, "grad_norm": 0.5624505557392849, "learning_rate": 3.3673177026040975e-06, "loss": 0.0224, "step": 84560 }, { "epoch": 0.3528510986305714, "grad_norm": 0.7558559210463938, "learning_rate": 3.367218151746795e-06, "loss": 0.0284, "step": 84565 }, { "epoch": 0.35287196134556165, "grad_norm": 1.0748366425849092, "learning_rate": 3.367118609718289e-06, "loss": 0.0245, "step": 84570 }, { "epoch": 0.35289282406055195, "grad_norm": 1.0395714322444267, "learning_rate": 3.367019076517274e-06, "loss": 0.0314, "step": 84575 }, { "epoch": 0.35291368677554225, "grad_norm": 1.029369665863219, "learning_rate": 3.3669195521424456e-06, "loss": 0.0406, "step": 84580 }, { "epoch": 0.3529345494905325, "grad_norm": 0.8541128441068073, "learning_rate": 3.3668200365924995e-06, "loss": 0.0316, "step": 84585 }, { "epoch": 0.3529554122055228, "grad_norm": 0.6204781204203753, "learning_rate": 3.3667205298661316e-06, "loss": 0.0234, "step": 84590 }, { "epoch": 0.35297627492051303, "grad_norm": 0.8906681242312384, "learning_rate": 3.3666210319620387e-06, "loss": 0.029, "step": 84595 }, { "epoch": 0.35299713763550333, "grad_norm": 0.4602818243005837, "learning_rate": 3.3665215428789156e-06, "loss": 0.0231, "step": 84600 }, { "epoch": 0.35301800035049363, "grad_norm": 0.6207203413957065, "learning_rate": 3.3664220626154603e-06, "loss": 0.0237, "step": 84605 }, { "epoch": 0.3530388630654839, "grad_norm": 0.26992493705521525, "learning_rate": 3.3663225911703694e-06, "loss": 0.0274, "step": 84610 }, { "epoch": 0.3530597257804742, "grad_norm": 1.0146136320188608, "learning_rate": 3.366223128542341e-06, "loss": 0.0342, "step": 84615 }, { "epoch": 0.3530805884954644, "grad_norm": 1.2407230777290033, "learning_rate": 3.3661236747300708e-06, "loss": 0.0353, "step": 84620 }, { "epoch": 0.3531014512104547, "grad_norm": 0.7476823268103374, "learning_rate": 3.366024229732258e-06, "loss": 0.0248, "step": 84625 }, { "epoch": 0.353122313925445, "grad_norm": 0.6132371727565141, "learning_rate": 3.3659247935476007e-06, "loss": 0.0308, "step": 84630 }, { "epoch": 0.35314317664043526, "grad_norm": 0.9548396011273532, "learning_rate": 3.365825366174796e-06, "loss": 0.0274, "step": 84635 }, { "epoch": 0.35316403935542556, "grad_norm": 0.7673661733865395, "learning_rate": 3.3657259476125438e-06, "loss": 0.0302, "step": 84640 }, { "epoch": 0.35318490207041586, "grad_norm": 0.72890221900907, "learning_rate": 3.365626537859542e-06, "loss": 0.0368, "step": 84645 }, { "epoch": 0.3532057647854061, "grad_norm": 0.6368638916913955, "learning_rate": 3.36552713691449e-06, "loss": 0.0236, "step": 84650 }, { "epoch": 0.3532266275003964, "grad_norm": 1.2835630335634682, "learning_rate": 3.365427744776089e-06, "loss": 0.0364, "step": 84655 }, { "epoch": 0.35324749021538665, "grad_norm": 0.7822579241955159, "learning_rate": 3.3653283614430356e-06, "loss": 0.0283, "step": 84660 }, { "epoch": 0.35326835293037695, "grad_norm": 1.880789135825762, "learning_rate": 3.365228986914032e-06, "loss": 0.0307, "step": 84665 }, { "epoch": 0.35328921564536725, "grad_norm": 0.6899402156212615, "learning_rate": 3.3651296211877766e-06, "loss": 0.0274, "step": 84670 }, { "epoch": 0.3533100783603575, "grad_norm": 0.721671840675456, "learning_rate": 3.365030264262971e-06, "loss": 0.0337, "step": 84675 }, { "epoch": 0.3533309410753478, "grad_norm": 0.986207172843381, "learning_rate": 3.364930916138317e-06, "loss": 0.0274, "step": 84680 }, { "epoch": 0.35335180379033804, "grad_norm": 0.5461860315561211, "learning_rate": 3.3648315768125127e-06, "loss": 0.0217, "step": 84685 }, { "epoch": 0.35337266650532834, "grad_norm": 0.7354615492420061, "learning_rate": 3.3647322462842626e-06, "loss": 0.0305, "step": 84690 }, { "epoch": 0.35339352922031864, "grad_norm": 0.8254440023352553, "learning_rate": 3.364632924552266e-06, "loss": 0.0262, "step": 84695 }, { "epoch": 0.3534143919353089, "grad_norm": 0.9072623149510856, "learning_rate": 3.3645336116152256e-06, "loss": 0.0285, "step": 84700 }, { "epoch": 0.3534352546502992, "grad_norm": 0.6815485352348402, "learning_rate": 3.3644343074718435e-06, "loss": 0.0277, "step": 84705 }, { "epoch": 0.3534561173652894, "grad_norm": 0.4109483622570308, "learning_rate": 3.3643350121208213e-06, "loss": 0.0259, "step": 84710 }, { "epoch": 0.3534769800802797, "grad_norm": 1.4453295596225517, "learning_rate": 3.364235725560863e-06, "loss": 0.0301, "step": 84715 }, { "epoch": 0.35349784279527, "grad_norm": 1.6710449517871957, "learning_rate": 3.36413644779067e-06, "loss": 0.0314, "step": 84720 }, { "epoch": 0.35351870551026027, "grad_norm": 0.733286439158352, "learning_rate": 3.3640371788089472e-06, "loss": 0.0186, "step": 84725 }, { "epoch": 0.35353956822525057, "grad_norm": 0.7433782198243788, "learning_rate": 3.3639379186143956e-06, "loss": 0.0296, "step": 84730 }, { "epoch": 0.35356043094024087, "grad_norm": 0.5680020154751199, "learning_rate": 3.3638386672057215e-06, "loss": 0.0272, "step": 84735 }, { "epoch": 0.3535812936552311, "grad_norm": 1.2334934258892345, "learning_rate": 3.3637394245816274e-06, "loss": 0.0288, "step": 84740 }, { "epoch": 0.3536021563702214, "grad_norm": 1.0333658776190848, "learning_rate": 3.3636401907408173e-06, "loss": 0.0245, "step": 84745 }, { "epoch": 0.35362301908521165, "grad_norm": 1.358273868583405, "learning_rate": 3.3635409656819965e-06, "loss": 0.0389, "step": 84750 }, { "epoch": 0.35364388180020195, "grad_norm": 0.6441065029653157, "learning_rate": 3.3634417494038695e-06, "loss": 0.0241, "step": 84755 }, { "epoch": 0.35366474451519225, "grad_norm": 1.2719469004197121, "learning_rate": 3.363342541905141e-06, "loss": 0.0312, "step": 84760 }, { "epoch": 0.3536856072301825, "grad_norm": 0.7427363096330855, "learning_rate": 3.3632433431845173e-06, "loss": 0.0206, "step": 84765 }, { "epoch": 0.3537064699451728, "grad_norm": 0.80498120362647, "learning_rate": 3.3631441532407026e-06, "loss": 0.0319, "step": 84770 }, { "epoch": 0.35372733266016304, "grad_norm": 0.6459365424897886, "learning_rate": 3.3630449720724034e-06, "loss": 0.0226, "step": 84775 }, { "epoch": 0.35374819537515334, "grad_norm": 1.00586562976821, "learning_rate": 3.362945799678326e-06, "loss": 0.0329, "step": 84780 }, { "epoch": 0.35376905809014364, "grad_norm": 0.5924709709501209, "learning_rate": 3.3628466360571764e-06, "loss": 0.0335, "step": 84785 }, { "epoch": 0.3537899208051339, "grad_norm": 0.690559943925454, "learning_rate": 3.362747481207661e-06, "loss": 0.0295, "step": 84790 }, { "epoch": 0.3538107835201242, "grad_norm": 0.619549031954977, "learning_rate": 3.3626483351284876e-06, "loss": 0.021, "step": 84795 }, { "epoch": 0.3538316462351144, "grad_norm": 0.6917336094378481, "learning_rate": 3.3625491978183626e-06, "loss": 0.0357, "step": 84800 }, { "epoch": 0.3538525089501047, "grad_norm": 0.6984659480002406, "learning_rate": 3.362450069275994e-06, "loss": 0.0222, "step": 84805 }, { "epoch": 0.353873371665095, "grad_norm": 0.7977863609704733, "learning_rate": 3.3623509495000887e-06, "loss": 0.0317, "step": 84810 }, { "epoch": 0.35389423438008527, "grad_norm": 0.6731583421137083, "learning_rate": 3.3622518384893555e-06, "loss": 0.0258, "step": 84815 }, { "epoch": 0.35391509709507557, "grad_norm": 0.7257878266046265, "learning_rate": 3.362152736242502e-06, "loss": 0.0328, "step": 84820 }, { "epoch": 0.35393595981006587, "grad_norm": 0.8437867949799857, "learning_rate": 3.362053642758237e-06, "loss": 0.023, "step": 84825 }, { "epoch": 0.3539568225250561, "grad_norm": 0.6925903979527344, "learning_rate": 3.361954558035269e-06, "loss": 0.0228, "step": 84830 }, { "epoch": 0.3539776852400464, "grad_norm": 3.1219520363497164, "learning_rate": 3.3618554820723083e-06, "loss": 0.0227, "step": 84835 }, { "epoch": 0.35399854795503666, "grad_norm": 0.9082214923242443, "learning_rate": 3.361756414868062e-06, "loss": 0.0393, "step": 84840 }, { "epoch": 0.35401941067002696, "grad_norm": 0.8244774131379712, "learning_rate": 3.3616573564212416e-06, "loss": 0.0325, "step": 84845 }, { "epoch": 0.35404027338501726, "grad_norm": 0.5321465906424097, "learning_rate": 3.3615583067305556e-06, "loss": 0.028, "step": 84850 }, { "epoch": 0.3540611361000075, "grad_norm": 0.7240818660280971, "learning_rate": 3.3614592657947154e-06, "loss": 0.0214, "step": 84855 }, { "epoch": 0.3540819988149978, "grad_norm": 1.0266905559143522, "learning_rate": 3.36136023361243e-06, "loss": 0.0308, "step": 84860 }, { "epoch": 0.35410286152998804, "grad_norm": 0.7078405239866001, "learning_rate": 3.361261210182411e-06, "loss": 0.045, "step": 84865 }, { "epoch": 0.35412372424497834, "grad_norm": 0.688084312244895, "learning_rate": 3.3611621955033687e-06, "loss": 0.0303, "step": 84870 }, { "epoch": 0.35414458695996864, "grad_norm": 0.8416182493203965, "learning_rate": 3.3610631895740144e-06, "loss": 0.0233, "step": 84875 }, { "epoch": 0.3541654496749589, "grad_norm": 0.7452355691847866, "learning_rate": 3.3609641923930604e-06, "loss": 0.0314, "step": 84880 }, { "epoch": 0.3541863123899492, "grad_norm": 0.4873678684050007, "learning_rate": 3.360865203959217e-06, "loss": 0.0228, "step": 84885 }, { "epoch": 0.35420717510493943, "grad_norm": 0.6804914851270014, "learning_rate": 3.360766224271197e-06, "loss": 0.055, "step": 84890 }, { "epoch": 0.35422803781992973, "grad_norm": 1.031034383372769, "learning_rate": 3.3606672533277124e-06, "loss": 0.0291, "step": 84895 }, { "epoch": 0.35424890053492003, "grad_norm": 0.44794675412638707, "learning_rate": 3.360568291127476e-06, "loss": 0.0329, "step": 84900 }, { "epoch": 0.3542697632499103, "grad_norm": 0.9880683380606787, "learning_rate": 3.3604693376692e-06, "loss": 0.0334, "step": 84905 }, { "epoch": 0.3542906259649006, "grad_norm": 0.4457680588680328, "learning_rate": 3.3603703929515973e-06, "loss": 0.0206, "step": 84910 }, { "epoch": 0.3543114886798909, "grad_norm": 0.5598360658198261, "learning_rate": 3.360271456973382e-06, "loss": 0.0247, "step": 84915 }, { "epoch": 0.3543323513948811, "grad_norm": 1.4087770562550015, "learning_rate": 3.3601725297332678e-06, "loss": 0.0412, "step": 84920 }, { "epoch": 0.3543532141098714, "grad_norm": 0.7951351379170442, "learning_rate": 3.360073611229967e-06, "loss": 0.0275, "step": 84925 }, { "epoch": 0.35437407682486166, "grad_norm": 0.5059716494275703, "learning_rate": 3.3599747014621954e-06, "loss": 0.0322, "step": 84930 }, { "epoch": 0.35439493953985196, "grad_norm": 1.0389646954062706, "learning_rate": 3.3598758004286657e-06, "loss": 0.0227, "step": 84935 }, { "epoch": 0.35441580225484226, "grad_norm": 1.2930354002695246, "learning_rate": 3.359776908128094e-06, "loss": 0.0263, "step": 84940 }, { "epoch": 0.3544366649698325, "grad_norm": 0.6015327401214404, "learning_rate": 3.3596780245591947e-06, "loss": 0.0219, "step": 84945 }, { "epoch": 0.3544575276848228, "grad_norm": 0.8499179078747188, "learning_rate": 3.3595791497206824e-06, "loss": 0.0296, "step": 84950 }, { "epoch": 0.35447839039981305, "grad_norm": 0.6233759941758282, "learning_rate": 3.359480283611273e-06, "loss": 0.023, "step": 84955 }, { "epoch": 0.35449925311480335, "grad_norm": 0.6454463417874038, "learning_rate": 3.359381426229682e-06, "loss": 0.0268, "step": 84960 }, { "epoch": 0.35452011582979365, "grad_norm": 0.8113435550048779, "learning_rate": 3.359282577574626e-06, "loss": 0.0349, "step": 84965 }, { "epoch": 0.3545409785447839, "grad_norm": 0.8512447743218563, "learning_rate": 3.35918373764482e-06, "loss": 0.0295, "step": 84970 }, { "epoch": 0.3545618412597742, "grad_norm": 0.6714090917741783, "learning_rate": 3.359084906438982e-06, "loss": 0.0217, "step": 84975 }, { "epoch": 0.35458270397476443, "grad_norm": 1.1068546716627226, "learning_rate": 3.358986083955827e-06, "loss": 0.0321, "step": 84980 }, { "epoch": 0.35460356668975473, "grad_norm": 0.8863598148041211, "learning_rate": 3.358887270194073e-06, "loss": 0.0328, "step": 84985 }, { "epoch": 0.35462442940474503, "grad_norm": 0.7647959165850718, "learning_rate": 3.3587884651524366e-06, "loss": 0.0261, "step": 84990 }, { "epoch": 0.3546452921197353, "grad_norm": 0.9240751970760296, "learning_rate": 3.358689668829636e-06, "loss": 0.0308, "step": 84995 }, { "epoch": 0.3546661548347256, "grad_norm": 0.5555898739289767, "learning_rate": 3.358590881224389e-06, "loss": 0.0263, "step": 85000 }, { "epoch": 0.3546870175497159, "grad_norm": 0.5552934548928095, "learning_rate": 3.3584921023354132e-06, "loss": 0.0261, "step": 85005 }, { "epoch": 0.3547078802647061, "grad_norm": 0.4202384091961166, "learning_rate": 3.3583933321614274e-06, "loss": 0.0259, "step": 85010 }, { "epoch": 0.3547287429796964, "grad_norm": 0.8920464099668597, "learning_rate": 3.35829457070115e-06, "loss": 0.0331, "step": 85015 }, { "epoch": 0.35474960569468666, "grad_norm": 0.700438676556915, "learning_rate": 3.358195817953299e-06, "loss": 0.0244, "step": 85020 }, { "epoch": 0.35477046840967696, "grad_norm": 0.5005408793689365, "learning_rate": 3.358097073916595e-06, "loss": 0.0196, "step": 85025 }, { "epoch": 0.35479133112466726, "grad_norm": 1.260604648570458, "learning_rate": 3.3579983385897564e-06, "loss": 0.0421, "step": 85030 }, { "epoch": 0.3548121938396575, "grad_norm": 0.9468852621749378, "learning_rate": 3.3578996119715028e-06, "loss": 0.0277, "step": 85035 }, { "epoch": 0.3548330565546478, "grad_norm": 0.8674550250745316, "learning_rate": 3.357800894060555e-06, "loss": 0.0311, "step": 85040 }, { "epoch": 0.35485391926963805, "grad_norm": 0.8149354658304353, "learning_rate": 3.357702184855631e-06, "loss": 0.0374, "step": 85045 }, { "epoch": 0.35487478198462835, "grad_norm": 1.1381265364121864, "learning_rate": 3.3576034843554546e-06, "loss": 0.027, "step": 85050 }, { "epoch": 0.35489564469961865, "grad_norm": 0.7302063336850869, "learning_rate": 3.3575047925587432e-06, "loss": 0.0255, "step": 85055 }, { "epoch": 0.3549165074146089, "grad_norm": 0.43172793089116407, "learning_rate": 3.35740610946422e-06, "loss": 0.0225, "step": 85060 }, { "epoch": 0.3549373701295992, "grad_norm": 0.6864916707127198, "learning_rate": 3.3573074350706053e-06, "loss": 0.0331, "step": 85065 }, { "epoch": 0.35495823284458944, "grad_norm": 0.7052077025527549, "learning_rate": 3.35720876937662e-06, "loss": 0.0252, "step": 85070 }, { "epoch": 0.35497909555957974, "grad_norm": 0.99520698982466, "learning_rate": 3.3571101123809872e-06, "loss": 0.0263, "step": 85075 }, { "epoch": 0.35499995827457004, "grad_norm": 0.4839453564931744, "learning_rate": 3.3570114640824276e-06, "loss": 0.0237, "step": 85080 }, { "epoch": 0.3550208209895603, "grad_norm": 0.5444460117171182, "learning_rate": 3.356912824479664e-06, "loss": 0.0276, "step": 85085 }, { "epoch": 0.3550416837045506, "grad_norm": 1.005963568046938, "learning_rate": 3.3568141935714193e-06, "loss": 0.0289, "step": 85090 }, { "epoch": 0.3550625464195409, "grad_norm": 0.6814412558484036, "learning_rate": 3.356715571356416e-06, "loss": 0.0249, "step": 85095 }, { "epoch": 0.3550834091345311, "grad_norm": 0.6948164606304792, "learning_rate": 3.356616957833377e-06, "loss": 0.0292, "step": 85100 }, { "epoch": 0.3551042718495214, "grad_norm": 0.679934458365759, "learning_rate": 3.3565183530010253e-06, "loss": 0.0228, "step": 85105 }, { "epoch": 0.35512513456451167, "grad_norm": 0.8354042434842078, "learning_rate": 3.356419756858085e-06, "loss": 0.0244, "step": 85110 }, { "epoch": 0.35514599727950197, "grad_norm": 0.5954000521240326, "learning_rate": 3.35632116940328e-06, "loss": 0.0268, "step": 85115 }, { "epoch": 0.35516685999449227, "grad_norm": 0.6586779424486076, "learning_rate": 3.356222590635334e-06, "loss": 0.0174, "step": 85120 }, { "epoch": 0.3551877227094825, "grad_norm": 0.6278661136592641, "learning_rate": 3.356124020552972e-06, "loss": 0.0295, "step": 85125 }, { "epoch": 0.3552085854244728, "grad_norm": 1.2218521395785857, "learning_rate": 3.356025459154918e-06, "loss": 0.0259, "step": 85130 }, { "epoch": 0.35522944813946306, "grad_norm": 0.9714459250550218, "learning_rate": 3.3559269064398976e-06, "loss": 0.0244, "step": 85135 }, { "epoch": 0.35525031085445335, "grad_norm": 0.8664962327344407, "learning_rate": 3.3558283624066346e-06, "loss": 0.0277, "step": 85140 }, { "epoch": 0.35527117356944365, "grad_norm": 0.3313289384887014, "learning_rate": 3.355729827053856e-06, "loss": 0.0258, "step": 85145 }, { "epoch": 0.3552920362844339, "grad_norm": 0.8368849758327394, "learning_rate": 3.3556313003802858e-06, "loss": 0.0278, "step": 85150 }, { "epoch": 0.3553128989994242, "grad_norm": 0.7024411671177501, "learning_rate": 3.3555327823846512e-06, "loss": 0.0226, "step": 85155 }, { "epoch": 0.35533376171441444, "grad_norm": 0.5883450916085987, "learning_rate": 3.3554342730656787e-06, "loss": 0.0262, "step": 85160 }, { "epoch": 0.35535462442940474, "grad_norm": 0.9980100705911683, "learning_rate": 3.3553357724220935e-06, "loss": 0.0326, "step": 85165 }, { "epoch": 0.35537548714439504, "grad_norm": 0.6331155339297709, "learning_rate": 3.355237280452623e-06, "loss": 0.0379, "step": 85170 }, { "epoch": 0.3553963498593853, "grad_norm": 0.522599395162163, "learning_rate": 3.355138797155994e-06, "loss": 0.0311, "step": 85175 }, { "epoch": 0.3554172125743756, "grad_norm": 1.0339668577462426, "learning_rate": 3.3550403225309342e-06, "loss": 0.027, "step": 85180 }, { "epoch": 0.3554380752893659, "grad_norm": 0.5813347319337999, "learning_rate": 3.354941856576171e-06, "loss": 0.0207, "step": 85185 }, { "epoch": 0.35545893800435613, "grad_norm": 0.7572734470344381, "learning_rate": 3.354843399290431e-06, "loss": 0.0271, "step": 85190 }, { "epoch": 0.35547980071934643, "grad_norm": 0.5516133836732494, "learning_rate": 3.3547449506724443e-06, "loss": 0.03, "step": 85195 }, { "epoch": 0.3555006634343367, "grad_norm": 3.0404673071389787, "learning_rate": 3.354646510720937e-06, "loss": 0.0258, "step": 85200 }, { "epoch": 0.35552152614932697, "grad_norm": 0.618749253810376, "learning_rate": 3.3545480794346395e-06, "loss": 0.0289, "step": 85205 }, { "epoch": 0.35554238886431727, "grad_norm": 0.46537465435416414, "learning_rate": 3.3544496568122798e-06, "loss": 0.0288, "step": 85210 }, { "epoch": 0.3555632515793075, "grad_norm": 0.6986749533428007, "learning_rate": 3.3543512428525865e-06, "loss": 0.0286, "step": 85215 }, { "epoch": 0.3555841142942978, "grad_norm": 0.6440893471580842, "learning_rate": 3.35425283755429e-06, "loss": 0.0244, "step": 85220 }, { "epoch": 0.35560497700928806, "grad_norm": 0.9349282961684918, "learning_rate": 3.3541544409161186e-06, "loss": 0.0331, "step": 85225 }, { "epoch": 0.35562583972427836, "grad_norm": 0.5360695560177781, "learning_rate": 3.3540560529368032e-06, "loss": 0.0251, "step": 85230 }, { "epoch": 0.35564670243926866, "grad_norm": 0.929526882634274, "learning_rate": 3.3539576736150735e-06, "loss": 0.0283, "step": 85235 }, { "epoch": 0.3556675651542589, "grad_norm": 0.6037427313118663, "learning_rate": 3.35385930294966e-06, "loss": 0.0266, "step": 85240 }, { "epoch": 0.3556884278692492, "grad_norm": 0.7462379649548752, "learning_rate": 3.353760940939294e-06, "loss": 0.0302, "step": 85245 }, { "epoch": 0.35570929058423945, "grad_norm": 0.5468993997692906, "learning_rate": 3.353662587582705e-06, "loss": 0.0247, "step": 85250 }, { "epoch": 0.35573015329922975, "grad_norm": 0.8916777767959133, "learning_rate": 3.3535642428786252e-06, "loss": 0.0232, "step": 85255 }, { "epoch": 0.35575101601422005, "grad_norm": 0.44892557700366553, "learning_rate": 3.3534659068257846e-06, "loss": 0.0308, "step": 85260 }, { "epoch": 0.3557718787292103, "grad_norm": 1.1414449578586248, "learning_rate": 3.3533675794229174e-06, "loss": 0.0307, "step": 85265 }, { "epoch": 0.3557927414442006, "grad_norm": 0.8580012912211261, "learning_rate": 3.3532692606687537e-06, "loss": 0.0292, "step": 85270 }, { "epoch": 0.35581360415919083, "grad_norm": 0.7658490510034306, "learning_rate": 3.3531709505620256e-06, "loss": 0.0283, "step": 85275 }, { "epoch": 0.35583446687418113, "grad_norm": 0.9602576667640741, "learning_rate": 3.3530726491014664e-06, "loss": 0.0276, "step": 85280 }, { "epoch": 0.35585532958917143, "grad_norm": 0.5323434226746939, "learning_rate": 3.3529743562858085e-06, "loss": 0.0169, "step": 85285 }, { "epoch": 0.3558761923041617, "grad_norm": 0.7651320259544683, "learning_rate": 3.352876072113785e-06, "loss": 0.0271, "step": 85290 }, { "epoch": 0.355897055019152, "grad_norm": 0.8527598215951391, "learning_rate": 3.3527777965841286e-06, "loss": 0.0335, "step": 85295 }, { "epoch": 0.3559179177341423, "grad_norm": 0.44405020848157234, "learning_rate": 3.352679529695573e-06, "loss": 0.0268, "step": 85300 }, { "epoch": 0.3559387804491325, "grad_norm": 0.7077655832674807, "learning_rate": 3.352581271446853e-06, "loss": 0.0276, "step": 85305 }, { "epoch": 0.3559596431641228, "grad_norm": 0.3992356993749162, "learning_rate": 3.352483021836701e-06, "loss": 0.02, "step": 85310 }, { "epoch": 0.35598050587911306, "grad_norm": 0.6991178311069297, "learning_rate": 3.3523847808638526e-06, "loss": 0.0246, "step": 85315 }, { "epoch": 0.35600136859410336, "grad_norm": 0.7264257246395955, "learning_rate": 3.3522865485270412e-06, "loss": 0.029, "step": 85320 }, { "epoch": 0.35602223130909366, "grad_norm": 1.0917643348745178, "learning_rate": 3.3521883248250027e-06, "loss": 0.0339, "step": 85325 }, { "epoch": 0.3560430940240839, "grad_norm": 0.8855050691341452, "learning_rate": 3.3520901097564716e-06, "loss": 0.0353, "step": 85330 }, { "epoch": 0.3560639567390742, "grad_norm": 0.5286071812273097, "learning_rate": 3.351991903320183e-06, "loss": 0.0288, "step": 85335 }, { "epoch": 0.35608481945406445, "grad_norm": 0.660188927583081, "learning_rate": 3.351893705514873e-06, "loss": 0.0222, "step": 85340 }, { "epoch": 0.35610568216905475, "grad_norm": 0.9478480584247987, "learning_rate": 3.3517955163392774e-06, "loss": 0.0272, "step": 85345 }, { "epoch": 0.35612654488404505, "grad_norm": 0.567264739019441, "learning_rate": 3.351697335792132e-06, "loss": 0.0319, "step": 85350 }, { "epoch": 0.3561474075990353, "grad_norm": 0.757224821040529, "learning_rate": 3.3515991638721723e-06, "loss": 0.0239, "step": 85355 }, { "epoch": 0.3561682703140256, "grad_norm": 0.6386949815254084, "learning_rate": 3.3515010005781367e-06, "loss": 0.0346, "step": 85360 }, { "epoch": 0.35618913302901584, "grad_norm": 0.6251916354418349, "learning_rate": 3.3514028459087616e-06, "loss": 0.0262, "step": 85365 }, { "epoch": 0.35620999574400614, "grad_norm": 2.1591344160675936, "learning_rate": 3.3513046998627833e-06, "loss": 0.0247, "step": 85370 }, { "epoch": 0.35623085845899644, "grad_norm": 0.6596313093954851, "learning_rate": 3.3512065624389393e-06, "loss": 0.0189, "step": 85375 }, { "epoch": 0.3562517211739867, "grad_norm": 1.0976547494621443, "learning_rate": 3.351108433635968e-06, "loss": 0.0276, "step": 85380 }, { "epoch": 0.356272583888977, "grad_norm": 0.6548333160052509, "learning_rate": 3.3510103134526068e-06, "loss": 0.0227, "step": 85385 }, { "epoch": 0.3562934466039673, "grad_norm": 0.6920096603728132, "learning_rate": 3.350912201887594e-06, "loss": 0.0207, "step": 85390 }, { "epoch": 0.3563143093189575, "grad_norm": 1.023300873009347, "learning_rate": 3.350814098939668e-06, "loss": 0.0304, "step": 85395 }, { "epoch": 0.3563351720339478, "grad_norm": 0.4522401644002551, "learning_rate": 3.350716004607568e-06, "loss": 0.0238, "step": 85400 }, { "epoch": 0.35635603474893807, "grad_norm": 0.7514622602409661, "learning_rate": 3.350617918890031e-06, "loss": 0.0261, "step": 85405 }, { "epoch": 0.35637689746392837, "grad_norm": 0.5394259508112477, "learning_rate": 3.3505198417857986e-06, "loss": 0.0216, "step": 85410 }, { "epoch": 0.35639776017891867, "grad_norm": 0.8632249853691939, "learning_rate": 3.350421773293609e-06, "loss": 0.0251, "step": 85415 }, { "epoch": 0.3564186228939089, "grad_norm": 0.6280823714996097, "learning_rate": 3.3503237134122025e-06, "loss": 0.0195, "step": 85420 }, { "epoch": 0.3564394856088992, "grad_norm": 1.4618496157779493, "learning_rate": 3.3502256621403185e-06, "loss": 0.0303, "step": 85425 }, { "epoch": 0.35646034832388945, "grad_norm": 0.864031439928621, "learning_rate": 3.3501276194766973e-06, "loss": 0.038, "step": 85430 }, { "epoch": 0.35648121103887975, "grad_norm": 0.519156414737797, "learning_rate": 3.35002958542008e-06, "loss": 0.0187, "step": 85435 }, { "epoch": 0.35650207375387005, "grad_norm": 0.7950059603586918, "learning_rate": 3.3499315599692066e-06, "loss": 0.031, "step": 85440 }, { "epoch": 0.3565229364688603, "grad_norm": 1.3721512041683799, "learning_rate": 3.3498335431228184e-06, "loss": 0.0269, "step": 85445 }, { "epoch": 0.3565437991838506, "grad_norm": 0.9327498070762061, "learning_rate": 3.349735534879658e-06, "loss": 0.0387, "step": 85450 }, { "epoch": 0.35656466189884084, "grad_norm": 0.9710881540629642, "learning_rate": 3.3496375352384643e-06, "loss": 0.0198, "step": 85455 }, { "epoch": 0.35658552461383114, "grad_norm": 0.5765994103899784, "learning_rate": 3.34953954419798e-06, "loss": 0.0221, "step": 85460 }, { "epoch": 0.35660638732882144, "grad_norm": 0.9536102791842607, "learning_rate": 3.349441561756949e-06, "loss": 0.0253, "step": 85465 }, { "epoch": 0.3566272500438117, "grad_norm": 0.9329680117073329, "learning_rate": 3.3493435879141113e-06, "loss": 0.0275, "step": 85470 }, { "epoch": 0.356648112758802, "grad_norm": 0.6004216651409828, "learning_rate": 3.34924562266821e-06, "loss": 0.0323, "step": 85475 }, { "epoch": 0.3566689754737923, "grad_norm": 0.6150637770079128, "learning_rate": 3.349147666017988e-06, "loss": 0.0275, "step": 85480 }, { "epoch": 0.3566898381887825, "grad_norm": 0.8197525444493015, "learning_rate": 3.34904971796219e-06, "loss": 0.0225, "step": 85485 }, { "epoch": 0.3567107009037728, "grad_norm": 0.7091911935155251, "learning_rate": 3.348951778499557e-06, "loss": 0.0231, "step": 85490 }, { "epoch": 0.35673156361876307, "grad_norm": 0.7123811318239702, "learning_rate": 3.3488538476288337e-06, "loss": 0.0233, "step": 85495 }, { "epoch": 0.35675242633375337, "grad_norm": 2.468933385860548, "learning_rate": 3.3487559253487634e-06, "loss": 0.0384, "step": 85500 }, { "epoch": 0.35677328904874367, "grad_norm": 0.9204542039843677, "learning_rate": 3.348658011658091e-06, "loss": 0.0331, "step": 85505 }, { "epoch": 0.3567941517637339, "grad_norm": 0.4895711199345325, "learning_rate": 3.34856010655556e-06, "loss": 0.0198, "step": 85510 }, { "epoch": 0.3568150144787242, "grad_norm": 0.633356062067231, "learning_rate": 3.3484622100399157e-06, "loss": 0.0281, "step": 85515 }, { "epoch": 0.35683587719371446, "grad_norm": 0.9483795339326131, "learning_rate": 3.348364322109902e-06, "loss": 0.0369, "step": 85520 }, { "epoch": 0.35685673990870476, "grad_norm": 0.4602248978862036, "learning_rate": 3.3482664427642653e-06, "loss": 0.0272, "step": 85525 }, { "epoch": 0.35687760262369506, "grad_norm": 0.9057987787970677, "learning_rate": 3.3481685720017503e-06, "loss": 0.0282, "step": 85530 }, { "epoch": 0.3568984653386853, "grad_norm": 0.914173064347596, "learning_rate": 3.3480707098211024e-06, "loss": 0.0337, "step": 85535 }, { "epoch": 0.3569193280536756, "grad_norm": 1.0344586638434858, "learning_rate": 3.3479728562210686e-06, "loss": 0.0282, "step": 85540 }, { "epoch": 0.35694019076866584, "grad_norm": 0.5078232562068141, "learning_rate": 3.347875011200394e-06, "loss": 0.025, "step": 85545 }, { "epoch": 0.35696105348365614, "grad_norm": 0.9827426935041125, "learning_rate": 3.3477771747578242e-06, "loss": 0.0285, "step": 85550 }, { "epoch": 0.35698191619864644, "grad_norm": 1.1947699735500088, "learning_rate": 3.3476793468921078e-06, "loss": 0.0279, "step": 85555 }, { "epoch": 0.3570027789136367, "grad_norm": 0.9562222347255861, "learning_rate": 3.34758152760199e-06, "loss": 0.0326, "step": 85560 }, { "epoch": 0.357023641628627, "grad_norm": 1.0844150583681464, "learning_rate": 3.34748371688622e-06, "loss": 0.025, "step": 85565 }, { "epoch": 0.3570445043436173, "grad_norm": 0.83061002642091, "learning_rate": 3.347385914743544e-06, "loss": 0.0209, "step": 85570 }, { "epoch": 0.35706536705860753, "grad_norm": 0.7517526148348498, "learning_rate": 3.347288121172709e-06, "loss": 0.0261, "step": 85575 }, { "epoch": 0.35708622977359783, "grad_norm": 0.5996969800323619, "learning_rate": 3.347190336172464e-06, "loss": 0.0252, "step": 85580 }, { "epoch": 0.3571070924885881, "grad_norm": 0.8699832044782435, "learning_rate": 3.347092559741557e-06, "loss": 0.0282, "step": 85585 }, { "epoch": 0.3571279552035784, "grad_norm": 0.8249777015182487, "learning_rate": 3.3469947918787367e-06, "loss": 0.0257, "step": 85590 }, { "epoch": 0.3571488179185687, "grad_norm": 0.9642434662225988, "learning_rate": 3.3468970325827497e-06, "loss": 0.0212, "step": 85595 }, { "epoch": 0.3571696806335589, "grad_norm": 0.6797716658670261, "learning_rate": 3.3467992818523486e-06, "loss": 0.0325, "step": 85600 }, { "epoch": 0.3571905433485492, "grad_norm": 0.8247966259150206, "learning_rate": 3.3467015396862808e-06, "loss": 0.0292, "step": 85605 }, { "epoch": 0.35721140606353946, "grad_norm": 0.9456419777546647, "learning_rate": 3.3466038060832946e-06, "loss": 0.0348, "step": 85610 }, { "epoch": 0.35723226877852976, "grad_norm": 0.8167214933507416, "learning_rate": 3.346506081042141e-06, "loss": 0.037, "step": 85615 }, { "epoch": 0.35725313149352006, "grad_norm": 0.8216267300692076, "learning_rate": 3.3464083645615703e-06, "loss": 0.0325, "step": 85620 }, { "epoch": 0.3572739942085103, "grad_norm": 0.40983700893087155, "learning_rate": 3.3463106566403325e-06, "loss": 0.0239, "step": 85625 }, { "epoch": 0.3572948569235006, "grad_norm": 0.6744886182231468, "learning_rate": 3.3462129572771777e-06, "loss": 0.0273, "step": 85630 }, { "epoch": 0.35731571963849085, "grad_norm": 0.7481444928572112, "learning_rate": 3.346115266470857e-06, "loss": 0.037, "step": 85635 }, { "epoch": 0.35733658235348115, "grad_norm": 0.9622502874277524, "learning_rate": 3.346017584220121e-06, "loss": 0.0332, "step": 85640 }, { "epoch": 0.35735744506847145, "grad_norm": 0.896653116466072, "learning_rate": 3.345919910523721e-06, "loss": 0.0281, "step": 85645 }, { "epoch": 0.3573783077834617, "grad_norm": 0.5792076600086431, "learning_rate": 3.345822245380409e-06, "loss": 0.0303, "step": 85650 }, { "epoch": 0.357399170498452, "grad_norm": 1.167868604134201, "learning_rate": 3.3457245887889365e-06, "loss": 0.0338, "step": 85655 }, { "epoch": 0.3574200332134423, "grad_norm": 0.7563587986710085, "learning_rate": 3.3456269407480564e-06, "loss": 0.0238, "step": 85660 }, { "epoch": 0.35744089592843253, "grad_norm": 0.8420018529099135, "learning_rate": 3.345529301256519e-06, "loss": 0.0248, "step": 85665 }, { "epoch": 0.35746175864342283, "grad_norm": 0.8648297547466474, "learning_rate": 3.3454316703130783e-06, "loss": 0.0257, "step": 85670 }, { "epoch": 0.3574826213584131, "grad_norm": 0.9663372913704545, "learning_rate": 3.3453340479164874e-06, "loss": 0.0355, "step": 85675 }, { "epoch": 0.3575034840734034, "grad_norm": 0.6284314597146349, "learning_rate": 3.3452364340654985e-06, "loss": 0.0302, "step": 85680 }, { "epoch": 0.3575243467883937, "grad_norm": 0.86009162843658, "learning_rate": 3.3451388287588655e-06, "loss": 0.0275, "step": 85685 }, { "epoch": 0.3575452095033839, "grad_norm": 0.9303100526152037, "learning_rate": 3.3450412319953407e-06, "loss": 0.03, "step": 85690 }, { "epoch": 0.3575660722183742, "grad_norm": 0.6437538087736381, "learning_rate": 3.3449436437736794e-06, "loss": 0.023, "step": 85695 }, { "epoch": 0.35758693493336446, "grad_norm": 1.003023766073153, "learning_rate": 3.344846064092635e-06, "loss": 0.0247, "step": 85700 }, { "epoch": 0.35760779764835476, "grad_norm": 1.6307161755913424, "learning_rate": 3.3447484929509623e-06, "loss": 0.0434, "step": 85705 }, { "epoch": 0.35762866036334506, "grad_norm": 0.5057370191832722, "learning_rate": 3.3446509303474157e-06, "loss": 0.0345, "step": 85710 }, { "epoch": 0.3576495230783353, "grad_norm": 1.2875186377855843, "learning_rate": 3.344553376280749e-06, "loss": 0.0352, "step": 85715 }, { "epoch": 0.3576703857933256, "grad_norm": 1.0039974037797137, "learning_rate": 3.344455830749719e-06, "loss": 0.0284, "step": 85720 }, { "epoch": 0.35769124850831585, "grad_norm": 0.581033196660283, "learning_rate": 3.3443582937530795e-06, "loss": 0.0288, "step": 85725 }, { "epoch": 0.35771211122330615, "grad_norm": 0.7846648257390862, "learning_rate": 3.3442607652895868e-06, "loss": 0.0335, "step": 85730 }, { "epoch": 0.35773297393829645, "grad_norm": 1.09338915071659, "learning_rate": 3.3441632453579977e-06, "loss": 0.0329, "step": 85735 }, { "epoch": 0.3577538366532867, "grad_norm": 0.6110853813365797, "learning_rate": 3.344065733957066e-06, "loss": 0.0301, "step": 85740 }, { "epoch": 0.357774699368277, "grad_norm": 1.2637520505008955, "learning_rate": 3.3439682310855505e-06, "loss": 0.0377, "step": 85745 }, { "epoch": 0.3577955620832673, "grad_norm": 0.33676266962527673, "learning_rate": 3.3438707367422063e-06, "loss": 0.0224, "step": 85750 }, { "epoch": 0.35781642479825754, "grad_norm": 0.836064608623383, "learning_rate": 3.3437732509257903e-06, "loss": 0.0306, "step": 85755 }, { "epoch": 0.35783728751324784, "grad_norm": 0.5879578344302272, "learning_rate": 3.3436757736350612e-06, "loss": 0.0267, "step": 85760 }, { "epoch": 0.3578581502282381, "grad_norm": 0.8150269476895134, "learning_rate": 3.343578304868774e-06, "loss": 0.0325, "step": 85765 }, { "epoch": 0.3578790129432284, "grad_norm": 0.6307154783608595, "learning_rate": 3.343480844625688e-06, "loss": 0.0254, "step": 85770 }, { "epoch": 0.3578998756582187, "grad_norm": 0.677362575435064, "learning_rate": 3.3433833929045604e-06, "loss": 0.0223, "step": 85775 }, { "epoch": 0.3579207383732089, "grad_norm": 0.7328146256989335, "learning_rate": 3.34328594970415e-06, "loss": 0.0345, "step": 85780 }, { "epoch": 0.3579416010881992, "grad_norm": 0.9153677357650744, "learning_rate": 3.343188515023214e-06, "loss": 0.0441, "step": 85785 }, { "epoch": 0.35796246380318947, "grad_norm": 0.661078023688894, "learning_rate": 3.3430910888605123e-06, "loss": 0.025, "step": 85790 }, { "epoch": 0.35798332651817977, "grad_norm": 0.8211346875973335, "learning_rate": 3.3429936712148025e-06, "loss": 0.0313, "step": 85795 }, { "epoch": 0.35800418923317007, "grad_norm": 0.5055602885678121, "learning_rate": 3.3428962620848447e-06, "loss": 0.0232, "step": 85800 }, { "epoch": 0.3580250519481603, "grad_norm": 0.5771539255092859, "learning_rate": 3.342798861469399e-06, "loss": 0.0274, "step": 85805 }, { "epoch": 0.3580459146631506, "grad_norm": 0.7836158053589622, "learning_rate": 3.342701469367224e-06, "loss": 0.0257, "step": 85810 }, { "epoch": 0.35806677737814085, "grad_norm": 1.2098038499410153, "learning_rate": 3.3426040857770785e-06, "loss": 0.0327, "step": 85815 }, { "epoch": 0.35808764009313115, "grad_norm": 0.8704308127774533, "learning_rate": 3.342506710697725e-06, "loss": 0.0231, "step": 85820 }, { "epoch": 0.35810850280812145, "grad_norm": 0.5443921832681027, "learning_rate": 3.3424093441279227e-06, "loss": 0.0214, "step": 85825 }, { "epoch": 0.3581293655231117, "grad_norm": 0.7863624498491899, "learning_rate": 3.342311986066432e-06, "loss": 0.0285, "step": 85830 }, { "epoch": 0.358150228238102, "grad_norm": 1.1040090496811388, "learning_rate": 3.3422146365120137e-06, "loss": 0.0289, "step": 85835 }, { "epoch": 0.3581710909530923, "grad_norm": 0.601866505708238, "learning_rate": 3.3421172954634307e-06, "loss": 0.0245, "step": 85840 }, { "epoch": 0.35819195366808254, "grad_norm": 0.5704256894746592, "learning_rate": 3.3420199629194425e-06, "loss": 0.0267, "step": 85845 }, { "epoch": 0.35821281638307284, "grad_norm": 0.9786513503999242, "learning_rate": 3.341922638878812e-06, "loss": 0.025, "step": 85850 }, { "epoch": 0.3582336790980631, "grad_norm": 0.5726888502412892, "learning_rate": 3.3418253233403e-06, "loss": 0.0259, "step": 85855 }, { "epoch": 0.3582545418130534, "grad_norm": 0.9088321621372659, "learning_rate": 3.3417280163026695e-06, "loss": 0.0247, "step": 85860 }, { "epoch": 0.3582754045280437, "grad_norm": 0.4527311088886715, "learning_rate": 3.341630717764683e-06, "loss": 0.0306, "step": 85865 }, { "epoch": 0.35829626724303393, "grad_norm": 1.639716505566222, "learning_rate": 3.341533427725103e-06, "loss": 0.031, "step": 85870 }, { "epoch": 0.3583171299580242, "grad_norm": 0.9582973931526804, "learning_rate": 3.341436146182691e-06, "loss": 0.0235, "step": 85875 }, { "epoch": 0.35833799267301447, "grad_norm": 1.26944772173869, "learning_rate": 3.341338873136212e-06, "loss": 0.0298, "step": 85880 }, { "epoch": 0.35835885538800477, "grad_norm": 0.9708213568859662, "learning_rate": 3.3412416085844295e-06, "loss": 0.0275, "step": 85885 }, { "epoch": 0.35837971810299507, "grad_norm": 1.3479288258977895, "learning_rate": 3.3411443525261066e-06, "loss": 0.0356, "step": 85890 }, { "epoch": 0.3584005808179853, "grad_norm": 0.5818234939862442, "learning_rate": 3.3410471049600073e-06, "loss": 0.0341, "step": 85895 }, { "epoch": 0.3584214435329756, "grad_norm": 0.6772260862171401, "learning_rate": 3.340949865884896e-06, "loss": 0.0245, "step": 85900 }, { "epoch": 0.35844230624796586, "grad_norm": 1.0555643895365379, "learning_rate": 3.340852635299537e-06, "loss": 0.0383, "step": 85905 }, { "epoch": 0.35846316896295616, "grad_norm": 0.7847930779619597, "learning_rate": 3.3407554132026943e-06, "loss": 0.0244, "step": 85910 }, { "epoch": 0.35848403167794646, "grad_norm": 0.8630646041075779, "learning_rate": 3.3406581995931333e-06, "loss": 0.0256, "step": 85915 }, { "epoch": 0.3585048943929367, "grad_norm": 0.8591925995717801, "learning_rate": 3.34056099446962e-06, "loss": 0.0265, "step": 85920 }, { "epoch": 0.358525757107927, "grad_norm": 0.889366903154548, "learning_rate": 3.340463797830919e-06, "loss": 0.0313, "step": 85925 }, { "epoch": 0.3585466198229173, "grad_norm": 1.1326434202810327, "learning_rate": 3.3403666096757965e-06, "loss": 0.0265, "step": 85930 }, { "epoch": 0.35856748253790754, "grad_norm": 0.655598972069702, "learning_rate": 3.340269430003018e-06, "loss": 0.024, "step": 85935 }, { "epoch": 0.35858834525289784, "grad_norm": 0.6613647522367007, "learning_rate": 3.34017225881135e-06, "loss": 0.0355, "step": 85940 }, { "epoch": 0.3586092079678881, "grad_norm": 0.4540656256044257, "learning_rate": 3.340075096099559e-06, "loss": 0.0232, "step": 85945 }, { "epoch": 0.3586300706828784, "grad_norm": 0.6933651394869048, "learning_rate": 3.3399779418664106e-06, "loss": 0.0235, "step": 85950 }, { "epoch": 0.3586509333978687, "grad_norm": 1.060139968810384, "learning_rate": 3.3398807961106744e-06, "loss": 0.0312, "step": 85955 }, { "epoch": 0.35867179611285893, "grad_norm": 0.5278335994610944, "learning_rate": 3.3397836588311142e-06, "loss": 0.0243, "step": 85960 }, { "epoch": 0.35869265882784923, "grad_norm": 0.692817044268952, "learning_rate": 3.3396865300265003e-06, "loss": 0.0302, "step": 85965 }, { "epoch": 0.3587135215428395, "grad_norm": 0.8933459693638844, "learning_rate": 3.339589409695599e-06, "loss": 0.0312, "step": 85970 }, { "epoch": 0.3587343842578298, "grad_norm": 0.623611119021937, "learning_rate": 3.3394922978371784e-06, "loss": 0.0295, "step": 85975 }, { "epoch": 0.3587552469728201, "grad_norm": 0.7011937937727243, "learning_rate": 3.3393951944500073e-06, "loss": 0.0258, "step": 85980 }, { "epoch": 0.3587761096878103, "grad_norm": 0.7800093838544344, "learning_rate": 3.339298099532854e-06, "loss": 0.0286, "step": 85985 }, { "epoch": 0.3587969724028006, "grad_norm": 1.6818968611318221, "learning_rate": 3.339201013084487e-06, "loss": 0.0236, "step": 85990 }, { "epoch": 0.35881783511779086, "grad_norm": 0.47993126824411864, "learning_rate": 3.3391039351036746e-06, "loss": 0.0291, "step": 85995 }, { "epoch": 0.35883869783278116, "grad_norm": 0.8632349778885609, "learning_rate": 3.339006865589187e-06, "loss": 0.0256, "step": 86000 }, { "epoch": 0.35885956054777146, "grad_norm": 0.7383409330127767, "learning_rate": 3.3389098045397932e-06, "loss": 0.0302, "step": 86005 }, { "epoch": 0.3588804232627617, "grad_norm": 0.5254640536362774, "learning_rate": 3.3388127519542634e-06, "loss": 0.0311, "step": 86010 }, { "epoch": 0.358901285977752, "grad_norm": 0.8882190662586924, "learning_rate": 3.3387157078313677e-06, "loss": 0.0427, "step": 86015 }, { "epoch": 0.3589221486927423, "grad_norm": 0.5855366205919001, "learning_rate": 3.338618672169875e-06, "loss": 0.028, "step": 86020 }, { "epoch": 0.35894301140773255, "grad_norm": 0.6648801733495766, "learning_rate": 3.338521644968557e-06, "loss": 0.0227, "step": 86025 }, { "epoch": 0.35896387412272285, "grad_norm": 0.7707100030624412, "learning_rate": 3.338424626226184e-06, "loss": 0.033, "step": 86030 }, { "epoch": 0.3589847368377131, "grad_norm": 0.6660932228655067, "learning_rate": 3.338327615941527e-06, "loss": 0.0332, "step": 86035 }, { "epoch": 0.3590055995527034, "grad_norm": 1.089851608888166, "learning_rate": 3.338230614113357e-06, "loss": 0.0282, "step": 86040 }, { "epoch": 0.3590264622676937, "grad_norm": 1.518904190810575, "learning_rate": 3.338133620740446e-06, "loss": 0.0374, "step": 86045 }, { "epoch": 0.35904732498268394, "grad_norm": 0.8753446904402931, "learning_rate": 3.338036635821565e-06, "loss": 0.0254, "step": 86050 }, { "epoch": 0.35906818769767423, "grad_norm": 0.44060705889416124, "learning_rate": 3.337939659355487e-06, "loss": 0.0269, "step": 86055 }, { "epoch": 0.3590890504126645, "grad_norm": 0.5963131313461606, "learning_rate": 3.3378426913409833e-06, "loss": 0.0331, "step": 86060 }, { "epoch": 0.3591099131276548, "grad_norm": 1.4710014916458005, "learning_rate": 3.3377457317768264e-06, "loss": 0.031, "step": 86065 }, { "epoch": 0.3591307758426451, "grad_norm": 0.7890434919705201, "learning_rate": 3.33764878066179e-06, "loss": 0.03, "step": 86070 }, { "epoch": 0.3591516385576353, "grad_norm": 0.5649798336276456, "learning_rate": 3.3375518379946463e-06, "loss": 0.0238, "step": 86075 }, { "epoch": 0.3591725012726256, "grad_norm": 0.6951141753910912, "learning_rate": 3.3374549037741684e-06, "loss": 0.0356, "step": 86080 }, { "epoch": 0.35919336398761587, "grad_norm": 1.050012921270261, "learning_rate": 3.33735797799913e-06, "loss": 0.0343, "step": 86085 }, { "epoch": 0.35921422670260617, "grad_norm": 0.7222481341746317, "learning_rate": 3.3372610606683043e-06, "loss": 0.027, "step": 86090 }, { "epoch": 0.35923508941759646, "grad_norm": 0.7840641111014104, "learning_rate": 3.3371641517804664e-06, "loss": 0.0272, "step": 86095 }, { "epoch": 0.3592559521325867, "grad_norm": 1.0110056230105118, "learning_rate": 3.3370672513343892e-06, "loss": 0.0259, "step": 86100 }, { "epoch": 0.359276814847577, "grad_norm": 0.7390305150586871, "learning_rate": 3.3369703593288484e-06, "loss": 0.0229, "step": 86105 }, { "epoch": 0.3592976775625673, "grad_norm": 0.6883083296781298, "learning_rate": 3.3368734757626183e-06, "loss": 0.0265, "step": 86110 }, { "epoch": 0.35931854027755755, "grad_norm": 0.5845065304806359, "learning_rate": 3.336776600634473e-06, "loss": 0.0301, "step": 86115 }, { "epoch": 0.35933940299254785, "grad_norm": 0.8128702910230168, "learning_rate": 3.336679733943189e-06, "loss": 0.0287, "step": 86120 }, { "epoch": 0.3593602657075381, "grad_norm": 0.9526105949842715, "learning_rate": 3.3365828756875402e-06, "loss": 0.0215, "step": 86125 }, { "epoch": 0.3593811284225284, "grad_norm": 0.41211551060556056, "learning_rate": 3.3364860258663044e-06, "loss": 0.032, "step": 86130 }, { "epoch": 0.3594019911375187, "grad_norm": 1.03455604569908, "learning_rate": 3.3363891844782565e-06, "loss": 0.0269, "step": 86135 }, { "epoch": 0.35942285385250894, "grad_norm": 0.6711222519306341, "learning_rate": 3.3362923515221717e-06, "loss": 0.0273, "step": 86140 }, { "epoch": 0.35944371656749924, "grad_norm": 0.6202510134705567, "learning_rate": 3.3361955269968278e-06, "loss": 0.0233, "step": 86145 }, { "epoch": 0.3594645792824895, "grad_norm": 1.154148751743799, "learning_rate": 3.336098710901001e-06, "loss": 0.0327, "step": 86150 }, { "epoch": 0.3594854419974798, "grad_norm": 0.8898459842687222, "learning_rate": 3.336001903233469e-06, "loss": 0.0256, "step": 86155 }, { "epoch": 0.3595063047124701, "grad_norm": 0.5311732500951978, "learning_rate": 3.3359051039930073e-06, "loss": 0.0229, "step": 86160 }, { "epoch": 0.3595271674274603, "grad_norm": 0.7082834042517062, "learning_rate": 3.335808313178395e-06, "loss": 0.0254, "step": 86165 }, { "epoch": 0.3595480301424506, "grad_norm": 0.5697773160336894, "learning_rate": 3.3357115307884096e-06, "loss": 0.0254, "step": 86170 }, { "epoch": 0.35956889285744087, "grad_norm": 0.5205879112887799, "learning_rate": 3.335614756821828e-06, "loss": 0.0274, "step": 86175 }, { "epoch": 0.35958975557243117, "grad_norm": 1.0578673517102868, "learning_rate": 3.3355179912774294e-06, "loss": 0.0378, "step": 86180 }, { "epoch": 0.35961061828742147, "grad_norm": 0.8100589555960032, "learning_rate": 3.3354212341539923e-06, "loss": 0.0262, "step": 86185 }, { "epoch": 0.3596314810024117, "grad_norm": 0.793330866539247, "learning_rate": 3.335324485450294e-06, "loss": 0.0289, "step": 86190 }, { "epoch": 0.359652343717402, "grad_norm": 0.39550911722517484, "learning_rate": 3.335227745165115e-06, "loss": 0.03, "step": 86195 }, { "epoch": 0.3596732064323923, "grad_norm": 0.6209542744257988, "learning_rate": 3.335131013297233e-06, "loss": 0.0414, "step": 86200 }, { "epoch": 0.35969406914738256, "grad_norm": 0.5641505663692559, "learning_rate": 3.3350342898454295e-06, "loss": 0.0275, "step": 86205 }, { "epoch": 0.35971493186237286, "grad_norm": 0.6309737247724757, "learning_rate": 3.334937574808482e-06, "loss": 0.0391, "step": 86210 }, { "epoch": 0.3597357945773631, "grad_norm": 0.8274575090149279, "learning_rate": 3.3348408681851722e-06, "loss": 0.0319, "step": 86215 }, { "epoch": 0.3597566572923534, "grad_norm": 0.5167272891143158, "learning_rate": 3.3347441699742794e-06, "loss": 0.0341, "step": 86220 }, { "epoch": 0.3597775200073437, "grad_norm": 0.7897807918853602, "learning_rate": 3.3346474801745837e-06, "loss": 0.0308, "step": 86225 }, { "epoch": 0.35979838272233394, "grad_norm": 0.9838543646763445, "learning_rate": 3.334550798784867e-06, "loss": 0.021, "step": 86230 }, { "epoch": 0.35981924543732424, "grad_norm": 0.8943225328064809, "learning_rate": 3.3344541258039087e-06, "loss": 0.0235, "step": 86235 }, { "epoch": 0.3598401081523145, "grad_norm": 0.6637471555813325, "learning_rate": 3.3343574612304907e-06, "loss": 0.0266, "step": 86240 }, { "epoch": 0.3598609708673048, "grad_norm": 0.4079113704701519, "learning_rate": 3.3342608050633945e-06, "loss": 0.0215, "step": 86245 }, { "epoch": 0.3598818335822951, "grad_norm": 1.4433544638147784, "learning_rate": 3.3341641573014024e-06, "loss": 0.0291, "step": 86250 }, { "epoch": 0.35990269629728533, "grad_norm": 0.8236011001336442, "learning_rate": 3.3340675179432946e-06, "loss": 0.0309, "step": 86255 }, { "epoch": 0.35992355901227563, "grad_norm": 0.9304756069200583, "learning_rate": 3.3339708869878545e-06, "loss": 0.036, "step": 86260 }, { "epoch": 0.3599444217272659, "grad_norm": 1.5750220773023795, "learning_rate": 3.3338742644338645e-06, "loss": 0.0293, "step": 86265 }, { "epoch": 0.3599652844422562, "grad_norm": 0.46891429077426194, "learning_rate": 3.333777650280107e-06, "loss": 0.0289, "step": 86270 }, { "epoch": 0.35998614715724647, "grad_norm": 0.6869365187723209, "learning_rate": 3.3336810445253644e-06, "loss": 0.0309, "step": 86275 }, { "epoch": 0.3600070098722367, "grad_norm": 1.0024389723363782, "learning_rate": 3.33358444716842e-06, "loss": 0.0339, "step": 86280 }, { "epoch": 0.360027872587227, "grad_norm": 0.7274796407888748, "learning_rate": 3.333487858208058e-06, "loss": 0.0327, "step": 86285 }, { "epoch": 0.3600487353022173, "grad_norm": 0.7190743131875099, "learning_rate": 3.333391277643062e-06, "loss": 0.0317, "step": 86290 }, { "epoch": 0.36006959801720756, "grad_norm": 0.6065080083034119, "learning_rate": 3.3332947054722146e-06, "loss": 0.0276, "step": 86295 }, { "epoch": 0.36009046073219786, "grad_norm": 0.7839165005240036, "learning_rate": 3.3331981416943015e-06, "loss": 0.0219, "step": 86300 }, { "epoch": 0.3601113234471881, "grad_norm": 0.971255301422715, "learning_rate": 3.333101586308106e-06, "loss": 0.028, "step": 86305 }, { "epoch": 0.3601321861621784, "grad_norm": 0.8636971435031936, "learning_rate": 3.333005039312413e-06, "loss": 0.0349, "step": 86310 }, { "epoch": 0.3601530488771687, "grad_norm": 0.775241233732223, "learning_rate": 3.3329085007060073e-06, "loss": 0.0219, "step": 86315 }, { "epoch": 0.36017391159215895, "grad_norm": 0.5653151402570843, "learning_rate": 3.3328119704876742e-06, "loss": 0.0249, "step": 86320 }, { "epoch": 0.36019477430714925, "grad_norm": 0.7995730960597434, "learning_rate": 3.3327154486562e-06, "loss": 0.0213, "step": 86325 }, { "epoch": 0.3602156370221395, "grad_norm": 2.4211538453165216, "learning_rate": 3.3326189352103677e-06, "loss": 0.0274, "step": 86330 }, { "epoch": 0.3602364997371298, "grad_norm": 0.9080550668494287, "learning_rate": 3.3325224301489657e-06, "loss": 0.0238, "step": 86335 }, { "epoch": 0.3602573624521201, "grad_norm": 0.6746410379415964, "learning_rate": 3.332425933470779e-06, "loss": 0.022, "step": 86340 }, { "epoch": 0.36027822516711033, "grad_norm": 1.5478133101770337, "learning_rate": 3.332329445174594e-06, "loss": 0.0319, "step": 86345 }, { "epoch": 0.36029908788210063, "grad_norm": 0.8603900085025858, "learning_rate": 3.3322329652591977e-06, "loss": 0.0243, "step": 86350 }, { "epoch": 0.3603199505970909, "grad_norm": 0.6826433217491307, "learning_rate": 3.3321364937233763e-06, "loss": 0.0308, "step": 86355 }, { "epoch": 0.3603408133120812, "grad_norm": 1.2025443135062752, "learning_rate": 3.3320400305659173e-06, "loss": 0.0244, "step": 86360 }, { "epoch": 0.3603616760270715, "grad_norm": 0.7504015390764012, "learning_rate": 3.331943575785608e-06, "loss": 0.0303, "step": 86365 }, { "epoch": 0.3603825387420617, "grad_norm": 0.6800001633894315, "learning_rate": 3.3318471293812353e-06, "loss": 0.0304, "step": 86370 }, { "epoch": 0.360403401457052, "grad_norm": 0.6506389474293256, "learning_rate": 3.3317506913515885e-06, "loss": 0.0237, "step": 86375 }, { "epoch": 0.3604242641720423, "grad_norm": 0.4991202914701575, "learning_rate": 3.3316542616954545e-06, "loss": 0.0218, "step": 86380 }, { "epoch": 0.36044512688703256, "grad_norm": 0.9176622204259581, "learning_rate": 3.3315578404116224e-06, "loss": 0.0291, "step": 86385 }, { "epoch": 0.36046598960202286, "grad_norm": 0.6184427722029898, "learning_rate": 3.3314614274988793e-06, "loss": 0.0349, "step": 86390 }, { "epoch": 0.3604868523170131, "grad_norm": 1.2835461516236233, "learning_rate": 3.3313650229560155e-06, "loss": 0.0389, "step": 86395 }, { "epoch": 0.3605077150320034, "grad_norm": 0.5014821186650092, "learning_rate": 3.33126862678182e-06, "loss": 0.0368, "step": 86400 }, { "epoch": 0.3605285777469937, "grad_norm": 0.5324795237964739, "learning_rate": 3.3311722389750813e-06, "loss": 0.0233, "step": 86405 }, { "epoch": 0.36054944046198395, "grad_norm": 0.8003257789228152, "learning_rate": 3.331075859534589e-06, "loss": 0.0212, "step": 86410 }, { "epoch": 0.36057030317697425, "grad_norm": 0.609286160901281, "learning_rate": 3.3309794884591334e-06, "loss": 0.0198, "step": 86415 }, { "epoch": 0.3605911658919645, "grad_norm": 1.2073338233444706, "learning_rate": 3.3308831257475043e-06, "loss": 0.0292, "step": 86420 }, { "epoch": 0.3606120286069548, "grad_norm": 0.9757911102486694, "learning_rate": 3.330786771398492e-06, "loss": 0.0329, "step": 86425 }, { "epoch": 0.3606328913219451, "grad_norm": 1.7481809641772286, "learning_rate": 3.330690425410887e-06, "loss": 0.0401, "step": 86430 }, { "epoch": 0.36065375403693534, "grad_norm": 0.95397122648611, "learning_rate": 3.3305940877834802e-06, "loss": 0.0273, "step": 86435 }, { "epoch": 0.36067461675192564, "grad_norm": 1.0075788339033862, "learning_rate": 3.3304977585150615e-06, "loss": 0.0269, "step": 86440 }, { "epoch": 0.3606954794669159, "grad_norm": 0.3846092750006928, "learning_rate": 3.330401437604424e-06, "loss": 0.0264, "step": 86445 }, { "epoch": 0.3607163421819062, "grad_norm": 0.9920807174542896, "learning_rate": 3.330305125050358e-06, "loss": 0.0272, "step": 86450 }, { "epoch": 0.3607372048968965, "grad_norm": 0.7813200406618095, "learning_rate": 3.3302088208516558e-06, "loss": 0.0218, "step": 86455 }, { "epoch": 0.3607580676118867, "grad_norm": 1.1282226723225703, "learning_rate": 3.3301125250071093e-06, "loss": 0.0325, "step": 86460 }, { "epoch": 0.360778930326877, "grad_norm": 0.9112134196853513, "learning_rate": 3.330016237515509e-06, "loss": 0.0228, "step": 86465 }, { "epoch": 0.3607997930418673, "grad_norm": 0.7567668297857827, "learning_rate": 3.329919958375651e-06, "loss": 0.0243, "step": 86470 }, { "epoch": 0.36082065575685757, "grad_norm": 0.9147596183503869, "learning_rate": 3.329823687586325e-06, "loss": 0.0326, "step": 86475 }, { "epoch": 0.36084151847184787, "grad_norm": 0.633802378359685, "learning_rate": 3.3297274251463253e-06, "loss": 0.0208, "step": 86480 }, { "epoch": 0.3608623811868381, "grad_norm": 0.9193453931216155, "learning_rate": 3.3296311710544443e-06, "loss": 0.032, "step": 86485 }, { "epoch": 0.3608832439018284, "grad_norm": 1.047947050705991, "learning_rate": 3.329534925309476e-06, "loss": 0.0298, "step": 86490 }, { "epoch": 0.3609041066168187, "grad_norm": 1.0601236431412233, "learning_rate": 3.329438687910215e-06, "loss": 0.0327, "step": 86495 }, { "epoch": 0.36092496933180895, "grad_norm": 3.291211985508962, "learning_rate": 3.3293424588554526e-06, "loss": 0.0252, "step": 86500 }, { "epoch": 0.36094583204679925, "grad_norm": 0.8336726581276467, "learning_rate": 3.3292462381439855e-06, "loss": 0.0265, "step": 86505 }, { "epoch": 0.3609666947617895, "grad_norm": 0.7962591582593593, "learning_rate": 3.3291500257746067e-06, "loss": 0.029, "step": 86510 }, { "epoch": 0.3609875574767798, "grad_norm": 1.1522601008546787, "learning_rate": 3.329053821746112e-06, "loss": 0.0243, "step": 86515 }, { "epoch": 0.3610084201917701, "grad_norm": 1.109968006675825, "learning_rate": 3.328957626057296e-06, "loss": 0.0402, "step": 86520 }, { "epoch": 0.36102928290676034, "grad_norm": 0.7453528833939554, "learning_rate": 3.3288614387069524e-06, "loss": 0.0216, "step": 86525 }, { "epoch": 0.36105014562175064, "grad_norm": 0.30767657715326857, "learning_rate": 3.3287652596938785e-06, "loss": 0.0214, "step": 86530 }, { "epoch": 0.3610710083367409, "grad_norm": 0.6658643266305165, "learning_rate": 3.3286690890168695e-06, "loss": 0.0307, "step": 86535 }, { "epoch": 0.3610918710517312, "grad_norm": 0.843397153264085, "learning_rate": 3.3285729266747206e-06, "loss": 0.0269, "step": 86540 }, { "epoch": 0.3611127337667215, "grad_norm": 1.0178078446496965, "learning_rate": 3.3284767726662287e-06, "loss": 0.0273, "step": 86545 }, { "epoch": 0.3611335964817117, "grad_norm": 1.5918532459777908, "learning_rate": 3.328380626990189e-06, "loss": 0.0298, "step": 86550 }, { "epoch": 0.361154459196702, "grad_norm": 0.4675050633034331, "learning_rate": 3.328284489645401e-06, "loss": 0.0443, "step": 86555 }, { "epoch": 0.3611753219116923, "grad_norm": 0.5619236019701277, "learning_rate": 3.3281883606306574e-06, "loss": 0.0294, "step": 86560 }, { "epoch": 0.36119618462668257, "grad_norm": 0.6757459727302059, "learning_rate": 3.3280922399447584e-06, "loss": 0.0222, "step": 86565 }, { "epoch": 0.36121704734167287, "grad_norm": 0.6848472814039774, "learning_rate": 3.3279961275865e-06, "loss": 0.0357, "step": 86570 }, { "epoch": 0.3612379100566631, "grad_norm": 0.5956725718663873, "learning_rate": 3.327900023554681e-06, "loss": 0.0264, "step": 86575 }, { "epoch": 0.3612587727716534, "grad_norm": 0.8857879896350168, "learning_rate": 3.3278039278480978e-06, "loss": 0.0313, "step": 86580 }, { "epoch": 0.3612796354866437, "grad_norm": 0.7239563050342306, "learning_rate": 3.327707840465549e-06, "loss": 0.0263, "step": 86585 }, { "epoch": 0.36130049820163396, "grad_norm": 0.6250534903142091, "learning_rate": 3.327611761405834e-06, "loss": 0.0279, "step": 86590 }, { "epoch": 0.36132136091662426, "grad_norm": 0.7474787401149859, "learning_rate": 3.327515690667749e-06, "loss": 0.0268, "step": 86595 }, { "epoch": 0.3613422236316145, "grad_norm": 0.605938589369516, "learning_rate": 3.3274196282500955e-06, "loss": 0.0339, "step": 86600 }, { "epoch": 0.3613630863466048, "grad_norm": 0.4476451277362033, "learning_rate": 3.32732357415167e-06, "loss": 0.026, "step": 86605 }, { "epoch": 0.3613839490615951, "grad_norm": 1.0187365967435866, "learning_rate": 3.327227528371274e-06, "loss": 0.0387, "step": 86610 }, { "epoch": 0.36140481177658534, "grad_norm": 0.532047923574769, "learning_rate": 3.3271314909077063e-06, "loss": 0.0276, "step": 86615 }, { "epoch": 0.36142567449157564, "grad_norm": 0.9860986447535312, "learning_rate": 3.3270354617597655e-06, "loss": 0.0303, "step": 86620 }, { "epoch": 0.3614465372065659, "grad_norm": 0.926406722077316, "learning_rate": 3.3269394409262536e-06, "loss": 0.0385, "step": 86625 }, { "epoch": 0.3614673999215562, "grad_norm": 0.6488197077158339, "learning_rate": 3.3268434284059694e-06, "loss": 0.0244, "step": 86630 }, { "epoch": 0.3614882626365465, "grad_norm": 0.679215908399178, "learning_rate": 3.326747424197714e-06, "loss": 0.0231, "step": 86635 }, { "epoch": 0.36150912535153673, "grad_norm": 0.8982247901638162, "learning_rate": 3.3266514283002883e-06, "loss": 0.0312, "step": 86640 }, { "epoch": 0.36152998806652703, "grad_norm": 0.5858832474382936, "learning_rate": 3.3265554407124917e-06, "loss": 0.0184, "step": 86645 }, { "epoch": 0.36155085078151733, "grad_norm": 1.4831732179980077, "learning_rate": 3.3264594614331287e-06, "loss": 0.0329, "step": 86650 }, { "epoch": 0.3615717134965076, "grad_norm": 0.8622230159827704, "learning_rate": 3.326363490460997e-06, "loss": 0.0271, "step": 86655 }, { "epoch": 0.3615925762114979, "grad_norm": 0.6274266078222478, "learning_rate": 3.3262675277949013e-06, "loss": 0.0271, "step": 86660 }, { "epoch": 0.3616134389264881, "grad_norm": 0.950265718368593, "learning_rate": 3.326171573433642e-06, "loss": 0.0282, "step": 86665 }, { "epoch": 0.3616343016414784, "grad_norm": 1.1014533872269567, "learning_rate": 3.3260756273760212e-06, "loss": 0.0345, "step": 86670 }, { "epoch": 0.3616551643564687, "grad_norm": 0.9737772904399016, "learning_rate": 3.3259796896208433e-06, "loss": 0.0215, "step": 86675 }, { "epoch": 0.36167602707145896, "grad_norm": 0.8405262999272719, "learning_rate": 3.325883760166908e-06, "loss": 0.0316, "step": 86680 }, { "epoch": 0.36169688978644926, "grad_norm": 0.575368424851297, "learning_rate": 3.325787839013021e-06, "loss": 0.0243, "step": 86685 }, { "epoch": 0.3617177525014395, "grad_norm": 1.1763441934711554, "learning_rate": 3.325691926157984e-06, "loss": 0.0303, "step": 86690 }, { "epoch": 0.3617386152164298, "grad_norm": 1.0211276962529021, "learning_rate": 3.3255960216005996e-06, "loss": 0.0386, "step": 86695 }, { "epoch": 0.3617594779314201, "grad_norm": 0.8804796610905936, "learning_rate": 3.3255001253396734e-06, "loss": 0.0365, "step": 86700 }, { "epoch": 0.36178034064641035, "grad_norm": 0.4619676171239612, "learning_rate": 3.3254042373740082e-06, "loss": 0.0288, "step": 86705 }, { "epoch": 0.36180120336140065, "grad_norm": 0.577969521298103, "learning_rate": 3.3253083577024083e-06, "loss": 0.0302, "step": 86710 }, { "epoch": 0.3618220660763909, "grad_norm": 1.4289304887141403, "learning_rate": 3.3252124863236785e-06, "loss": 0.0318, "step": 86715 }, { "epoch": 0.3618429287913812, "grad_norm": 0.7389924203001803, "learning_rate": 3.325116623236623e-06, "loss": 0.0325, "step": 86720 }, { "epoch": 0.3618637915063715, "grad_norm": 0.5177308834702247, "learning_rate": 3.3250207684400454e-06, "loss": 0.0213, "step": 86725 }, { "epoch": 0.36188465422136173, "grad_norm": 1.6812540171509, "learning_rate": 3.3249249219327533e-06, "loss": 0.0293, "step": 86730 }, { "epoch": 0.36190551693635203, "grad_norm": 0.5473219079166902, "learning_rate": 3.324829083713551e-06, "loss": 0.0202, "step": 86735 }, { "epoch": 0.36192637965134233, "grad_norm": 0.5766183661066019, "learning_rate": 3.3247332537812434e-06, "loss": 0.0294, "step": 86740 }, { "epoch": 0.3619472423663326, "grad_norm": 0.7753196803045318, "learning_rate": 3.324637432134637e-06, "loss": 0.035, "step": 86745 }, { "epoch": 0.3619681050813229, "grad_norm": 0.7838913705095091, "learning_rate": 3.324541618772537e-06, "loss": 0.0199, "step": 86750 }, { "epoch": 0.3619889677963131, "grad_norm": 1.1415719126326207, "learning_rate": 3.3244458136937513e-06, "loss": 0.0362, "step": 86755 }, { "epoch": 0.3620098305113034, "grad_norm": 1.2962267682053923, "learning_rate": 3.324350016897085e-06, "loss": 0.0332, "step": 86760 }, { "epoch": 0.3620306932262937, "grad_norm": 0.41643373755068813, "learning_rate": 3.3242542283813457e-06, "loss": 0.0555, "step": 86765 }, { "epoch": 0.36205155594128396, "grad_norm": 0.4797475010088884, "learning_rate": 3.32415844814534e-06, "loss": 0.0334, "step": 86770 }, { "epoch": 0.36207241865627426, "grad_norm": 1.7765233957422137, "learning_rate": 3.324062676187875e-06, "loss": 0.0363, "step": 86775 }, { "epoch": 0.3620932813712645, "grad_norm": 1.1536621276700485, "learning_rate": 3.323966912507759e-06, "loss": 0.0398, "step": 86780 }, { "epoch": 0.3621141440862548, "grad_norm": 1.1342255725216532, "learning_rate": 3.323871157103799e-06, "loss": 0.0219, "step": 86785 }, { "epoch": 0.3621350068012451, "grad_norm": 0.6013760164296647, "learning_rate": 3.3237754099748044e-06, "loss": 0.0215, "step": 86790 }, { "epoch": 0.36215586951623535, "grad_norm": 0.4977033491964021, "learning_rate": 3.323679671119581e-06, "loss": 0.0376, "step": 86795 }, { "epoch": 0.36217673223122565, "grad_norm": 1.1486167195537753, "learning_rate": 3.3235839405369384e-06, "loss": 0.021, "step": 86800 }, { "epoch": 0.3621975949462159, "grad_norm": 0.6816623930018983, "learning_rate": 3.3234882182256863e-06, "loss": 0.0292, "step": 86805 }, { "epoch": 0.3622184576612062, "grad_norm": 0.8643299729688319, "learning_rate": 3.3233925041846326e-06, "loss": 0.0279, "step": 86810 }, { "epoch": 0.3622393203761965, "grad_norm": 0.4282609220370327, "learning_rate": 3.323296798412587e-06, "loss": 0.0341, "step": 86815 }, { "epoch": 0.36226018309118674, "grad_norm": 0.6076552184444358, "learning_rate": 3.323201100908358e-06, "loss": 0.0362, "step": 86820 }, { "epoch": 0.36228104580617704, "grad_norm": 0.7748604401749218, "learning_rate": 3.3231054116707563e-06, "loss": 0.0253, "step": 86825 }, { "epoch": 0.36230190852116734, "grad_norm": 1.0831689533895703, "learning_rate": 3.323009730698591e-06, "loss": 0.043, "step": 86830 }, { "epoch": 0.3623227712361576, "grad_norm": 0.7405077723329279, "learning_rate": 3.322914057990673e-06, "loss": 0.0328, "step": 86835 }, { "epoch": 0.3623436339511479, "grad_norm": 0.5059249238095647, "learning_rate": 3.3228183935458127e-06, "loss": 0.0308, "step": 86840 }, { "epoch": 0.3623644966661381, "grad_norm": 0.6790211465340694, "learning_rate": 3.322722737362819e-06, "loss": 0.0206, "step": 86845 }, { "epoch": 0.3623853593811284, "grad_norm": 0.8511923432760944, "learning_rate": 3.322627089440506e-06, "loss": 0.0269, "step": 86850 }, { "epoch": 0.3624062220961187, "grad_norm": 0.684991824789218, "learning_rate": 3.322531449777682e-06, "loss": 0.0209, "step": 86855 }, { "epoch": 0.36242708481110897, "grad_norm": 0.7796569345582316, "learning_rate": 3.3224358183731594e-06, "loss": 0.0198, "step": 86860 }, { "epoch": 0.36244794752609927, "grad_norm": 0.8805679555326532, "learning_rate": 3.32234019522575e-06, "loss": 0.0234, "step": 86865 }, { "epoch": 0.3624688102410895, "grad_norm": 0.9223559278521881, "learning_rate": 3.322244580334265e-06, "loss": 0.033, "step": 86870 }, { "epoch": 0.3624896729560798, "grad_norm": 1.2021986539840115, "learning_rate": 3.3221489736975172e-06, "loss": 0.0285, "step": 86875 }, { "epoch": 0.3625105356710701, "grad_norm": 0.8189690063682651, "learning_rate": 3.322053375314318e-06, "loss": 0.0322, "step": 86880 }, { "epoch": 0.36253139838606036, "grad_norm": 0.3796139192545076, "learning_rate": 3.3219577851834807e-06, "loss": 0.0286, "step": 86885 }, { "epoch": 0.36255226110105065, "grad_norm": 0.5904899180096959, "learning_rate": 3.3218622033038177e-06, "loss": 0.0168, "step": 86890 }, { "epoch": 0.3625731238160409, "grad_norm": 1.396770026292447, "learning_rate": 3.3217666296741426e-06, "loss": 0.0313, "step": 86895 }, { "epoch": 0.3625939865310312, "grad_norm": 0.27596615638899796, "learning_rate": 3.3216710642932678e-06, "loss": 0.018, "step": 86900 }, { "epoch": 0.3626148492460215, "grad_norm": 0.6392866561808609, "learning_rate": 3.3215755071600074e-06, "loss": 0.0215, "step": 86905 }, { "epoch": 0.36263571196101174, "grad_norm": 0.7901436755721148, "learning_rate": 3.321479958273175e-06, "loss": 0.0222, "step": 86910 }, { "epoch": 0.36265657467600204, "grad_norm": 0.6163973756551939, "learning_rate": 3.3213844176315844e-06, "loss": 0.0259, "step": 86915 }, { "epoch": 0.36267743739099234, "grad_norm": 0.7395396423990762, "learning_rate": 3.3212888852340497e-06, "loss": 0.0279, "step": 86920 }, { "epoch": 0.3626983001059826, "grad_norm": 1.3799863793162943, "learning_rate": 3.321193361079386e-06, "loss": 0.0384, "step": 86925 }, { "epoch": 0.3627191628209729, "grad_norm": 0.8839091154434631, "learning_rate": 3.3210978451664072e-06, "loss": 0.0255, "step": 86930 }, { "epoch": 0.36274002553596313, "grad_norm": 1.3886775266879374, "learning_rate": 3.3210023374939294e-06, "loss": 0.0404, "step": 86935 }, { "epoch": 0.36276088825095343, "grad_norm": 1.0681473163763895, "learning_rate": 3.320906838060767e-06, "loss": 0.0228, "step": 86940 }, { "epoch": 0.36278175096594373, "grad_norm": 0.670388647930204, "learning_rate": 3.320811346865735e-06, "loss": 0.0236, "step": 86945 }, { "epoch": 0.36280261368093397, "grad_norm": 0.4623546633563989, "learning_rate": 3.320715863907649e-06, "loss": 0.0168, "step": 86950 }, { "epoch": 0.36282347639592427, "grad_norm": 0.7905779913083889, "learning_rate": 3.3206203891853255e-06, "loss": 0.0359, "step": 86955 }, { "epoch": 0.3628443391109145, "grad_norm": 2.169444026843581, "learning_rate": 3.320524922697581e-06, "loss": 0.0294, "step": 86960 }, { "epoch": 0.3628652018259048, "grad_norm": 0.7994856003744085, "learning_rate": 3.320429464443231e-06, "loss": 0.0239, "step": 86965 }, { "epoch": 0.3628860645408951, "grad_norm": 1.8751551894442104, "learning_rate": 3.320334014421093e-06, "loss": 0.0271, "step": 86970 }, { "epoch": 0.36290692725588536, "grad_norm": 0.9874767297236426, "learning_rate": 3.320238572629983e-06, "loss": 0.035, "step": 86975 }, { "epoch": 0.36292778997087566, "grad_norm": 0.7800558434701698, "learning_rate": 3.320143139068718e-06, "loss": 0.0242, "step": 86980 }, { "epoch": 0.3629486526858659, "grad_norm": 0.6845359930538333, "learning_rate": 3.3200477137361163e-06, "loss": 0.0222, "step": 86985 }, { "epoch": 0.3629695154008562, "grad_norm": 0.8995275054766481, "learning_rate": 3.3199522966309944e-06, "loss": 0.0317, "step": 86990 }, { "epoch": 0.3629903781158465, "grad_norm": 0.47017781261393504, "learning_rate": 3.319856887752171e-06, "loss": 0.0242, "step": 86995 }, { "epoch": 0.36301124083083675, "grad_norm": 0.9309978127441209, "learning_rate": 3.3197614870984635e-06, "loss": 0.0318, "step": 87000 }, { "epoch": 0.36303210354582705, "grad_norm": 0.8417061320921099, "learning_rate": 3.31966609466869e-06, "loss": 0.0284, "step": 87005 }, { "epoch": 0.36305296626081734, "grad_norm": 0.9870163420485621, "learning_rate": 3.3195707104616696e-06, "loss": 0.0281, "step": 87010 }, { "epoch": 0.3630738289758076, "grad_norm": 0.9145227322741144, "learning_rate": 3.3194753344762204e-06, "loss": 0.0251, "step": 87015 }, { "epoch": 0.3630946916907979, "grad_norm": 1.1535611985391583, "learning_rate": 3.3193799667111624e-06, "loss": 0.027, "step": 87020 }, { "epoch": 0.36311555440578813, "grad_norm": 0.5504313186500607, "learning_rate": 3.3192846071653136e-06, "loss": 0.03, "step": 87025 }, { "epoch": 0.36313641712077843, "grad_norm": 0.6176927870855583, "learning_rate": 3.3191892558374945e-06, "loss": 0.0239, "step": 87030 }, { "epoch": 0.36315727983576873, "grad_norm": 0.7932410200715285, "learning_rate": 3.3190939127265244e-06, "loss": 0.0319, "step": 87035 }, { "epoch": 0.363178142550759, "grad_norm": 0.34617755737646155, "learning_rate": 3.318998577831223e-06, "loss": 0.0413, "step": 87040 }, { "epoch": 0.3631990052657493, "grad_norm": 1.0793041962416767, "learning_rate": 3.31890325115041e-06, "loss": 0.0224, "step": 87045 }, { "epoch": 0.3632198679807395, "grad_norm": 0.5804853498486052, "learning_rate": 3.3188079326829066e-06, "loss": 0.0279, "step": 87050 }, { "epoch": 0.3632407306957298, "grad_norm": 0.574666281858723, "learning_rate": 3.318712622427534e-06, "loss": 0.0199, "step": 87055 }, { "epoch": 0.3632615934107201, "grad_norm": 0.935249792708293, "learning_rate": 3.318617320383112e-06, "loss": 0.0254, "step": 87060 }, { "epoch": 0.36328245612571036, "grad_norm": 0.5408294303790202, "learning_rate": 3.318522026548462e-06, "loss": 0.0238, "step": 87065 }, { "epoch": 0.36330331884070066, "grad_norm": 0.5873503851239558, "learning_rate": 3.3184267409224056e-06, "loss": 0.0268, "step": 87070 }, { "epoch": 0.3633241815556909, "grad_norm": 0.9817891433180779, "learning_rate": 3.318331463503764e-06, "loss": 0.0238, "step": 87075 }, { "epoch": 0.3633450442706812, "grad_norm": 0.9081655160130059, "learning_rate": 3.3182361942913595e-06, "loss": 0.0399, "step": 87080 }, { "epoch": 0.3633659069856715, "grad_norm": 0.769176133534939, "learning_rate": 3.3181409332840137e-06, "loss": 0.0207, "step": 87085 }, { "epoch": 0.36338676970066175, "grad_norm": 0.5289164679610739, "learning_rate": 3.3180456804805493e-06, "loss": 0.0273, "step": 87090 }, { "epoch": 0.36340763241565205, "grad_norm": 0.7843081523392724, "learning_rate": 3.3179504358797887e-06, "loss": 0.0227, "step": 87095 }, { "epoch": 0.36342849513064235, "grad_norm": 0.8923255937471807, "learning_rate": 3.3178551994805542e-06, "loss": 0.0246, "step": 87100 }, { "epoch": 0.3634493578456326, "grad_norm": 1.050266556188554, "learning_rate": 3.3177599712816694e-06, "loss": 0.0305, "step": 87105 }, { "epoch": 0.3634702205606229, "grad_norm": 0.8370442499437438, "learning_rate": 3.3176647512819575e-06, "loss": 0.0284, "step": 87110 }, { "epoch": 0.36349108327561314, "grad_norm": 0.4345897098198886, "learning_rate": 3.3175695394802414e-06, "loss": 0.0239, "step": 87115 }, { "epoch": 0.36351194599060344, "grad_norm": 0.7434333714740761, "learning_rate": 3.317474335875346e-06, "loss": 0.0284, "step": 87120 }, { "epoch": 0.36353280870559374, "grad_norm": 0.7956483229804829, "learning_rate": 3.3173791404660938e-06, "loss": 0.0254, "step": 87125 }, { "epoch": 0.363553671420584, "grad_norm": 0.30858336215925386, "learning_rate": 3.31728395325131e-06, "loss": 0.0263, "step": 87130 }, { "epoch": 0.3635745341355743, "grad_norm": 0.616784360284187, "learning_rate": 3.317188774229819e-06, "loss": 0.0227, "step": 87135 }, { "epoch": 0.3635953968505645, "grad_norm": 0.631193433974932, "learning_rate": 3.317093603400445e-06, "loss": 0.0279, "step": 87140 }, { "epoch": 0.3636162595655548, "grad_norm": 0.5454658721055327, "learning_rate": 3.3169984407620137e-06, "loss": 0.0291, "step": 87145 }, { "epoch": 0.3636371222805451, "grad_norm": 0.5210982590776415, "learning_rate": 3.3169032863133486e-06, "loss": 0.0213, "step": 87150 }, { "epoch": 0.36365798499553537, "grad_norm": 0.6215971342441068, "learning_rate": 3.3168081400532763e-06, "loss": 0.0265, "step": 87155 }, { "epoch": 0.36367884771052567, "grad_norm": 0.5301796664760957, "learning_rate": 3.3167130019806227e-06, "loss": 0.0223, "step": 87160 }, { "epoch": 0.3636997104255159, "grad_norm": 0.5454311448944809, "learning_rate": 3.316617872094214e-06, "loss": 0.021, "step": 87165 }, { "epoch": 0.3637205731405062, "grad_norm": 0.7366516724268204, "learning_rate": 3.316522750392874e-06, "loss": 0.0356, "step": 87170 }, { "epoch": 0.3637414358554965, "grad_norm": 0.9413507040607061, "learning_rate": 3.3164276368754306e-06, "loss": 0.0238, "step": 87175 }, { "epoch": 0.36376229857048675, "grad_norm": 0.768841827474729, "learning_rate": 3.316332531540711e-06, "loss": 0.0227, "step": 87180 }, { "epoch": 0.36378316128547705, "grad_norm": 0.7619463535166539, "learning_rate": 3.31623743438754e-06, "loss": 0.0279, "step": 87185 }, { "epoch": 0.36380402400046735, "grad_norm": 0.9348472461060173, "learning_rate": 3.3161423454147472e-06, "loss": 0.0312, "step": 87190 }, { "epoch": 0.3638248867154576, "grad_norm": 0.9506556396791842, "learning_rate": 3.3160472646211572e-06, "loss": 0.0246, "step": 87195 }, { "epoch": 0.3638457494304479, "grad_norm": 0.9718330711566112, "learning_rate": 3.315952192005599e-06, "loss": 0.0338, "step": 87200 }, { "epoch": 0.36386661214543814, "grad_norm": 0.7859609654193025, "learning_rate": 3.3158571275669017e-06, "loss": 0.0257, "step": 87205 }, { "epoch": 0.36388747486042844, "grad_norm": 0.9874008895225437, "learning_rate": 3.3157620713038894e-06, "loss": 0.0175, "step": 87210 }, { "epoch": 0.36390833757541874, "grad_norm": 0.42722219485255214, "learning_rate": 3.3156670232153943e-06, "loss": 0.0432, "step": 87215 }, { "epoch": 0.363929200290409, "grad_norm": 1.1366538405550008, "learning_rate": 3.315571983300242e-06, "loss": 0.0388, "step": 87220 }, { "epoch": 0.3639500630053993, "grad_norm": 1.3640184312275987, "learning_rate": 3.315476951557263e-06, "loss": 0.031, "step": 87225 }, { "epoch": 0.3639709257203895, "grad_norm": 1.0857979656207573, "learning_rate": 3.3153819279852846e-06, "loss": 0.0308, "step": 87230 }, { "epoch": 0.3639917884353798, "grad_norm": 0.8287771262853678, "learning_rate": 3.315286912583138e-06, "loss": 0.0423, "step": 87235 }, { "epoch": 0.3640126511503701, "grad_norm": 0.7612453234110718, "learning_rate": 3.31519190534965e-06, "loss": 0.029, "step": 87240 }, { "epoch": 0.36403351386536037, "grad_norm": 0.6321004774473057, "learning_rate": 3.3150969062836525e-06, "loss": 0.0222, "step": 87245 }, { "epoch": 0.36405437658035067, "grad_norm": 0.8431748789923956, "learning_rate": 3.315001915383974e-06, "loss": 0.0326, "step": 87250 }, { "epoch": 0.3640752392953409, "grad_norm": 0.762420023135566, "learning_rate": 3.3149069326494455e-06, "loss": 0.023, "step": 87255 }, { "epoch": 0.3640961020103312, "grad_norm": 1.2235234626985418, "learning_rate": 3.314811958078896e-06, "loss": 0.0272, "step": 87260 }, { "epoch": 0.3641169647253215, "grad_norm": 0.8257497990283221, "learning_rate": 3.314716991671158e-06, "loss": 0.0334, "step": 87265 }, { "epoch": 0.36413782744031176, "grad_norm": 1.3026340854529652, "learning_rate": 3.31462203342506e-06, "loss": 0.0253, "step": 87270 }, { "epoch": 0.36415869015530206, "grad_norm": 1.2073212830357245, "learning_rate": 3.3145270833394348e-06, "loss": 0.0251, "step": 87275 }, { "epoch": 0.36417955287029236, "grad_norm": 0.9153122984925639, "learning_rate": 3.3144321414131124e-06, "loss": 0.0289, "step": 87280 }, { "epoch": 0.3642004155852826, "grad_norm": 1.0476159706084138, "learning_rate": 3.3143372076449255e-06, "loss": 0.0328, "step": 87285 }, { "epoch": 0.3642212783002729, "grad_norm": 1.0551751363797481, "learning_rate": 3.314242282033705e-06, "loss": 0.0315, "step": 87290 }, { "epoch": 0.36424214101526314, "grad_norm": 1.4071524450493078, "learning_rate": 3.314147364578283e-06, "loss": 0.022, "step": 87295 }, { "epoch": 0.36426300373025344, "grad_norm": 1.2107902876724606, "learning_rate": 3.3140524552774924e-06, "loss": 0.0382, "step": 87300 }, { "epoch": 0.36428386644524374, "grad_norm": 0.7218518922066041, "learning_rate": 3.313957554130164e-06, "loss": 0.028, "step": 87305 }, { "epoch": 0.364304729160234, "grad_norm": 0.810148483099432, "learning_rate": 3.3138626611351315e-06, "loss": 0.0229, "step": 87310 }, { "epoch": 0.3643255918752243, "grad_norm": 0.49186979300203454, "learning_rate": 3.3137677762912285e-06, "loss": 0.0198, "step": 87315 }, { "epoch": 0.36434645459021453, "grad_norm": 0.8202836736016005, "learning_rate": 3.3136728995972867e-06, "loss": 0.0298, "step": 87320 }, { "epoch": 0.36436731730520483, "grad_norm": 0.6194166229930888, "learning_rate": 3.3135780310521403e-06, "loss": 0.0287, "step": 87325 }, { "epoch": 0.36438818002019513, "grad_norm": 0.8130048347887111, "learning_rate": 3.313483170654622e-06, "loss": 0.0252, "step": 87330 }, { "epoch": 0.3644090427351854, "grad_norm": 0.999767916629311, "learning_rate": 3.3133883184035677e-06, "loss": 0.024, "step": 87335 }, { "epoch": 0.3644299054501757, "grad_norm": 0.6696370910301416, "learning_rate": 3.313293474297809e-06, "loss": 0.0228, "step": 87340 }, { "epoch": 0.3644507681651659, "grad_norm": 2.893212661553271, "learning_rate": 3.3131986383361813e-06, "loss": 0.0407, "step": 87345 }, { "epoch": 0.3644716308801562, "grad_norm": 0.8425048552444178, "learning_rate": 3.313103810517519e-06, "loss": 0.0274, "step": 87350 }, { "epoch": 0.3644924935951465, "grad_norm": 0.9384055079715412, "learning_rate": 3.313008990840657e-06, "loss": 0.0291, "step": 87355 }, { "epoch": 0.36451335631013676, "grad_norm": 0.8332778203917268, "learning_rate": 3.3129141793044312e-06, "loss": 0.0343, "step": 87360 }, { "epoch": 0.36453421902512706, "grad_norm": 0.48304388632525097, "learning_rate": 3.312819375907675e-06, "loss": 0.0158, "step": 87365 }, { "epoch": 0.36455508174011736, "grad_norm": 0.4515349198471772, "learning_rate": 3.3127245806492248e-06, "loss": 0.0235, "step": 87370 }, { "epoch": 0.3645759444551076, "grad_norm": 0.4327712826202857, "learning_rate": 3.3126297935279156e-06, "loss": 0.0227, "step": 87375 }, { "epoch": 0.3645968071700979, "grad_norm": 0.44622935696309246, "learning_rate": 3.312535014542585e-06, "loss": 0.036, "step": 87380 }, { "epoch": 0.36461766988508815, "grad_norm": 1.2073191226849527, "learning_rate": 3.3124402436920676e-06, "loss": 0.046, "step": 87385 }, { "epoch": 0.36463853260007845, "grad_norm": 0.9807937678543521, "learning_rate": 3.3123454809752e-06, "loss": 0.0314, "step": 87390 }, { "epoch": 0.36465939531506875, "grad_norm": 0.7646373543299556, "learning_rate": 3.3122507263908195e-06, "loss": 0.0237, "step": 87395 }, { "epoch": 0.364680258030059, "grad_norm": 0.508464086373454, "learning_rate": 3.3121559799377624e-06, "loss": 0.0219, "step": 87400 }, { "epoch": 0.3647011207450493, "grad_norm": 0.9394405923179919, "learning_rate": 3.312061241614866e-06, "loss": 0.0334, "step": 87405 }, { "epoch": 0.36472198346003953, "grad_norm": 0.2125843719546824, "learning_rate": 3.311966511420967e-06, "loss": 0.0315, "step": 87410 }, { "epoch": 0.36474284617502983, "grad_norm": 0.9281858723877731, "learning_rate": 3.311871789354904e-06, "loss": 0.0204, "step": 87415 }, { "epoch": 0.36476370889002013, "grad_norm": 0.8956890836186763, "learning_rate": 3.3117770754155147e-06, "loss": 0.0275, "step": 87420 }, { "epoch": 0.3647845716050104, "grad_norm": 1.0793357309001324, "learning_rate": 3.311682369601636e-06, "loss": 0.0196, "step": 87425 }, { "epoch": 0.3648054343200007, "grad_norm": 0.4647471793626186, "learning_rate": 3.3115876719121076e-06, "loss": 0.0242, "step": 87430 }, { "epoch": 0.3648262970349909, "grad_norm": 1.1586298238958792, "learning_rate": 3.311492982345767e-06, "loss": 0.0291, "step": 87435 }, { "epoch": 0.3648471597499812, "grad_norm": 0.8613985569369362, "learning_rate": 3.3113983009014535e-06, "loss": 0.0296, "step": 87440 }, { "epoch": 0.3648680224649715, "grad_norm": 0.5323535942714058, "learning_rate": 3.311303627578005e-06, "loss": 0.0323, "step": 87445 }, { "epoch": 0.36488888517996176, "grad_norm": 0.7787210265545089, "learning_rate": 3.3112089623742618e-06, "loss": 0.0256, "step": 87450 }, { "epoch": 0.36490974789495206, "grad_norm": 0.6757981716998229, "learning_rate": 3.311114305289063e-06, "loss": 0.0367, "step": 87455 }, { "epoch": 0.36493061060994236, "grad_norm": 0.7010977626508056, "learning_rate": 3.3110196563212477e-06, "loss": 0.0258, "step": 87460 }, { "epoch": 0.3649514733249326, "grad_norm": 1.1768440261740514, "learning_rate": 3.3109250154696566e-06, "loss": 0.0292, "step": 87465 }, { "epoch": 0.3649723360399229, "grad_norm": 0.7031513125172848, "learning_rate": 3.3108303827331295e-06, "loss": 0.0264, "step": 87470 }, { "epoch": 0.36499319875491315, "grad_norm": 0.7321616786801398, "learning_rate": 3.310735758110507e-06, "loss": 0.0198, "step": 87475 }, { "epoch": 0.36501406146990345, "grad_norm": 0.6575043700996568, "learning_rate": 3.3106411416006295e-06, "loss": 0.0318, "step": 87480 }, { "epoch": 0.36503492418489375, "grad_norm": 0.7432242457254726, "learning_rate": 3.3105465332023367e-06, "loss": 0.0263, "step": 87485 }, { "epoch": 0.365055786899884, "grad_norm": 0.6917945544802332, "learning_rate": 3.310451932914472e-06, "loss": 0.0236, "step": 87490 }, { "epoch": 0.3650766496148743, "grad_norm": 0.5736229246982438, "learning_rate": 3.310357340735874e-06, "loss": 0.0283, "step": 87495 }, { "epoch": 0.36509751232986454, "grad_norm": 0.4284054892405791, "learning_rate": 3.3102627566653866e-06, "loss": 0.0235, "step": 87500 }, { "epoch": 0.36511837504485484, "grad_norm": 0.9323457331899759, "learning_rate": 3.3101681807018497e-06, "loss": 0.0287, "step": 87505 }, { "epoch": 0.36513923775984514, "grad_norm": 1.8587651810327015, "learning_rate": 3.3100736128441063e-06, "loss": 0.0248, "step": 87510 }, { "epoch": 0.3651601004748354, "grad_norm": 0.620611515279176, "learning_rate": 3.3099790530909987e-06, "loss": 0.0213, "step": 87515 }, { "epoch": 0.3651809631898257, "grad_norm": 0.9763727263075901, "learning_rate": 3.3098845014413684e-06, "loss": 0.0281, "step": 87520 }, { "epoch": 0.3652018259048159, "grad_norm": 0.5055876631680777, "learning_rate": 3.309789957894059e-06, "loss": 0.03, "step": 87525 }, { "epoch": 0.3652226886198062, "grad_norm": 0.8093016626515733, "learning_rate": 3.3096954224479123e-06, "loss": 0.028, "step": 87530 }, { "epoch": 0.3652435513347965, "grad_norm": 0.4347453853588923, "learning_rate": 3.309600895101773e-06, "loss": 0.0192, "step": 87535 }, { "epoch": 0.36526441404978677, "grad_norm": 0.590171533093699, "learning_rate": 3.309506375854483e-06, "loss": 0.028, "step": 87540 }, { "epoch": 0.36528527676477707, "grad_norm": 0.6796780809397062, "learning_rate": 3.3094118647048867e-06, "loss": 0.0201, "step": 87545 }, { "epoch": 0.36530613947976737, "grad_norm": 0.934035731739015, "learning_rate": 3.3093173616518274e-06, "loss": 0.0269, "step": 87550 }, { "epoch": 0.3653270021947576, "grad_norm": 0.6881899381649579, "learning_rate": 3.30922286669415e-06, "loss": 0.029, "step": 87555 }, { "epoch": 0.3653478649097479, "grad_norm": 0.5655318132269591, "learning_rate": 3.309128379830698e-06, "loss": 0.0297, "step": 87560 }, { "epoch": 0.36536872762473815, "grad_norm": 0.6908837912427506, "learning_rate": 3.309033901060316e-06, "loss": 0.0291, "step": 87565 }, { "epoch": 0.36538959033972845, "grad_norm": 0.6856031523133416, "learning_rate": 3.308939430381849e-06, "loss": 0.0343, "step": 87570 }, { "epoch": 0.36541045305471875, "grad_norm": 0.717449337139422, "learning_rate": 3.308844967794142e-06, "loss": 0.0344, "step": 87575 }, { "epoch": 0.365431315769709, "grad_norm": 0.6946250607995301, "learning_rate": 3.3087505132960392e-06, "loss": 0.0288, "step": 87580 }, { "epoch": 0.3654521784846993, "grad_norm": 0.48420336549112397, "learning_rate": 3.3086560668863876e-06, "loss": 0.0265, "step": 87585 }, { "epoch": 0.36547304119968954, "grad_norm": 0.5659581193597659, "learning_rate": 3.3085616285640326e-06, "loss": 0.0318, "step": 87590 }, { "epoch": 0.36549390391467984, "grad_norm": 1.3849954312619235, "learning_rate": 3.3084671983278187e-06, "loss": 0.0198, "step": 87595 }, { "epoch": 0.36551476662967014, "grad_norm": 0.5342835171115673, "learning_rate": 3.308372776176593e-06, "loss": 0.0393, "step": 87600 }, { "epoch": 0.3655356293446604, "grad_norm": 0.6270734129739548, "learning_rate": 3.3082783621092023e-06, "loss": 0.0258, "step": 87605 }, { "epoch": 0.3655564920596507, "grad_norm": 0.737129260306404, "learning_rate": 3.308183956124493e-06, "loss": 0.0243, "step": 87610 }, { "epoch": 0.36557735477464093, "grad_norm": 0.6407372447921187, "learning_rate": 3.308089558221311e-06, "loss": 0.0228, "step": 87615 }, { "epoch": 0.3655982174896312, "grad_norm": 1.5516712777427146, "learning_rate": 3.307995168398504e-06, "loss": 0.0412, "step": 87620 }, { "epoch": 0.3656190802046215, "grad_norm": 0.34402561810608123, "learning_rate": 3.3079007866549197e-06, "loss": 0.0292, "step": 87625 }, { "epoch": 0.36563994291961177, "grad_norm": 0.7277197552639405, "learning_rate": 3.3078064129894042e-06, "loss": 0.0407, "step": 87630 }, { "epoch": 0.36566080563460207, "grad_norm": 0.6698500577817464, "learning_rate": 3.3077120474008074e-06, "loss": 0.0232, "step": 87635 }, { "epoch": 0.3656816683495923, "grad_norm": 0.49119357123502977, "learning_rate": 3.3076176898879747e-06, "loss": 0.0222, "step": 87640 }, { "epoch": 0.3657025310645826, "grad_norm": 0.8529869615025599, "learning_rate": 3.307523340449757e-06, "loss": 0.0298, "step": 87645 }, { "epoch": 0.3657233937795729, "grad_norm": 0.6370118852480663, "learning_rate": 3.3074289990850007e-06, "loss": 0.0226, "step": 87650 }, { "epoch": 0.36574425649456316, "grad_norm": 0.4986788520077184, "learning_rate": 3.3073346657925555e-06, "loss": 0.0276, "step": 87655 }, { "epoch": 0.36576511920955346, "grad_norm": 0.7887782875219967, "learning_rate": 3.3072403405712696e-06, "loss": 0.0292, "step": 87660 }, { "epoch": 0.36578598192454376, "grad_norm": 1.0063698535565466, "learning_rate": 3.3071460234199926e-06, "loss": 0.0328, "step": 87665 }, { "epoch": 0.365806844639534, "grad_norm": 0.5860627916588452, "learning_rate": 3.3070517143375736e-06, "loss": 0.0261, "step": 87670 }, { "epoch": 0.3658277073545243, "grad_norm": 0.383104440662744, "learning_rate": 3.306957413322862e-06, "loss": 0.0221, "step": 87675 }, { "epoch": 0.36584857006951454, "grad_norm": 0.7411780200472774, "learning_rate": 3.306863120374709e-06, "loss": 0.0345, "step": 87680 }, { "epoch": 0.36586943278450484, "grad_norm": 0.47500052503921697, "learning_rate": 3.3067688354919626e-06, "loss": 0.0228, "step": 87685 }, { "epoch": 0.36589029549949514, "grad_norm": 0.8989021836247444, "learning_rate": 3.306674558673474e-06, "loss": 0.0374, "step": 87690 }, { "epoch": 0.3659111582144854, "grad_norm": 1.03234739667161, "learning_rate": 3.306580289918094e-06, "loss": 0.0337, "step": 87695 }, { "epoch": 0.3659320209294757, "grad_norm": 0.46507633909180995, "learning_rate": 3.3064860292246726e-06, "loss": 0.0214, "step": 87700 }, { "epoch": 0.36595288364446593, "grad_norm": 0.4979286119583933, "learning_rate": 3.3063917765920617e-06, "loss": 0.0203, "step": 87705 }, { "epoch": 0.36597374635945623, "grad_norm": 1.0914236642931296, "learning_rate": 3.3062975320191116e-06, "loss": 0.0331, "step": 87710 }, { "epoch": 0.36599460907444653, "grad_norm": 0.9190230930010601, "learning_rate": 3.306203295504674e-06, "loss": 0.0351, "step": 87715 }, { "epoch": 0.3660154717894368, "grad_norm": 0.5865088337431623, "learning_rate": 3.306109067047601e-06, "loss": 0.0199, "step": 87720 }, { "epoch": 0.3660363345044271, "grad_norm": 0.7278674508870125, "learning_rate": 3.3060148466467436e-06, "loss": 0.0302, "step": 87725 }, { "epoch": 0.3660571972194173, "grad_norm": 0.6087099125157708, "learning_rate": 3.3059206343009547e-06, "loss": 0.0236, "step": 87730 }, { "epoch": 0.3660780599344076, "grad_norm": 0.5653019960160801, "learning_rate": 3.305826430009086e-06, "loss": 0.0258, "step": 87735 }, { "epoch": 0.3660989226493979, "grad_norm": 0.7482038032229192, "learning_rate": 3.3057322337699905e-06, "loss": 0.0288, "step": 87740 }, { "epoch": 0.36611978536438816, "grad_norm": 0.5796407349004459, "learning_rate": 3.305638045582521e-06, "loss": 0.02, "step": 87745 }, { "epoch": 0.36614064807937846, "grad_norm": 0.5175944052267473, "learning_rate": 3.3055438654455297e-06, "loss": 0.0233, "step": 87750 }, { "epoch": 0.36616151079436876, "grad_norm": 0.3596102324749783, "learning_rate": 3.3054496933578717e-06, "loss": 0.0241, "step": 87755 }, { "epoch": 0.366182373509359, "grad_norm": 0.37191864738073926, "learning_rate": 3.305355529318398e-06, "loss": 0.0257, "step": 87760 }, { "epoch": 0.3662032362243493, "grad_norm": 0.9541825226545501, "learning_rate": 3.305261373325964e-06, "loss": 0.0253, "step": 87765 }, { "epoch": 0.36622409893933955, "grad_norm": 0.8677284057505533, "learning_rate": 3.305167225379424e-06, "loss": 0.0249, "step": 87770 }, { "epoch": 0.36624496165432985, "grad_norm": 0.46226125488628483, "learning_rate": 3.3050730854776305e-06, "loss": 0.0271, "step": 87775 }, { "epoch": 0.36626582436932015, "grad_norm": 0.9951605190306737, "learning_rate": 3.304978953619439e-06, "loss": 0.0245, "step": 87780 }, { "epoch": 0.3662866870843104, "grad_norm": 0.778399907546226, "learning_rate": 3.304884829803704e-06, "loss": 0.025, "step": 87785 }, { "epoch": 0.3663075497993007, "grad_norm": 0.8964751425345888, "learning_rate": 3.3047907140292795e-06, "loss": 0.0288, "step": 87790 }, { "epoch": 0.36632841251429094, "grad_norm": 0.766348720901753, "learning_rate": 3.3046966062950225e-06, "loss": 0.0248, "step": 87795 }, { "epoch": 0.36634927522928123, "grad_norm": 0.5011554749668259, "learning_rate": 3.3046025065997862e-06, "loss": 0.0319, "step": 87800 }, { "epoch": 0.36637013794427153, "grad_norm": 0.6739539381534859, "learning_rate": 3.304508414942427e-06, "loss": 0.0229, "step": 87805 }, { "epoch": 0.3663910006592618, "grad_norm": 1.736901140516314, "learning_rate": 3.3044143313218006e-06, "loss": 0.0367, "step": 87810 }, { "epoch": 0.3664118633742521, "grad_norm": 0.9485411154821609, "learning_rate": 3.304320255736764e-06, "loss": 0.0263, "step": 87815 }, { "epoch": 0.3664327260892423, "grad_norm": 1.2931962331528877, "learning_rate": 3.3042261881861717e-06, "loss": 0.0403, "step": 87820 }, { "epoch": 0.3664535888042326, "grad_norm": 0.5589039391301658, "learning_rate": 3.304132128668881e-06, "loss": 0.0193, "step": 87825 }, { "epoch": 0.3664744515192229, "grad_norm": 0.9414667846364715, "learning_rate": 3.304038077183749e-06, "loss": 0.0286, "step": 87830 }, { "epoch": 0.36649531423421317, "grad_norm": 0.8163153638057582, "learning_rate": 3.303944033729631e-06, "loss": 0.0333, "step": 87835 }, { "epoch": 0.36651617694920346, "grad_norm": 0.5674324792577542, "learning_rate": 3.303849998305386e-06, "loss": 0.025, "step": 87840 }, { "epoch": 0.36653703966419376, "grad_norm": 0.7404196849359892, "learning_rate": 3.30375597090987e-06, "loss": 0.0323, "step": 87845 }, { "epoch": 0.366557902379184, "grad_norm": 0.9788588161537941, "learning_rate": 3.3036619515419414e-06, "loss": 0.0384, "step": 87850 }, { "epoch": 0.3665787650941743, "grad_norm": 0.6656539593008979, "learning_rate": 3.3035679402004577e-06, "loss": 0.0299, "step": 87855 }, { "epoch": 0.36659962780916455, "grad_norm": 0.6322455998455893, "learning_rate": 3.303473936884277e-06, "loss": 0.023, "step": 87860 }, { "epoch": 0.36662049052415485, "grad_norm": 0.6982428472388768, "learning_rate": 3.303379941592258e-06, "loss": 0.0272, "step": 87865 }, { "epoch": 0.36664135323914515, "grad_norm": 0.8694818479070947, "learning_rate": 3.303285954323258e-06, "loss": 0.033, "step": 87870 }, { "epoch": 0.3666622159541354, "grad_norm": 0.936084892660985, "learning_rate": 3.303191975076137e-06, "loss": 0.0285, "step": 87875 }, { "epoch": 0.3666830786691257, "grad_norm": 0.6772447805080936, "learning_rate": 3.303098003849753e-06, "loss": 0.0307, "step": 87880 }, { "epoch": 0.36670394138411594, "grad_norm": 0.7871926733084347, "learning_rate": 3.303004040642966e-06, "loss": 0.0476, "step": 87885 }, { "epoch": 0.36672480409910624, "grad_norm": 0.8850773452985419, "learning_rate": 3.302910085454634e-06, "loss": 0.0321, "step": 87890 }, { "epoch": 0.36674566681409654, "grad_norm": 0.6688033699877046, "learning_rate": 3.3028161382836182e-06, "loss": 0.0272, "step": 87895 }, { "epoch": 0.3667665295290868, "grad_norm": 1.1606691008277163, "learning_rate": 3.3027221991287777e-06, "loss": 0.0287, "step": 87900 }, { "epoch": 0.3667873922440771, "grad_norm": 1.8314476353353342, "learning_rate": 3.3026282679889725e-06, "loss": 0.0389, "step": 87905 }, { "epoch": 0.3668082549590673, "grad_norm": 0.8788786118851282, "learning_rate": 3.3025343448630637e-06, "loss": 0.0294, "step": 87910 }, { "epoch": 0.3668291176740576, "grad_norm": 0.3345699632869742, "learning_rate": 3.3024404297499104e-06, "loss": 0.0124, "step": 87915 }, { "epoch": 0.3668499803890479, "grad_norm": 0.6378584123010149, "learning_rate": 3.3023465226483746e-06, "loss": 0.0186, "step": 87920 }, { "epoch": 0.36687084310403817, "grad_norm": 1.0063133708886647, "learning_rate": 3.3022526235573175e-06, "loss": 0.0371, "step": 87925 }, { "epoch": 0.36689170581902847, "grad_norm": 0.9994593542025185, "learning_rate": 3.302158732475599e-06, "loss": 0.0328, "step": 87930 }, { "epoch": 0.36691256853401877, "grad_norm": 0.8659512264036305, "learning_rate": 3.3020648494020814e-06, "loss": 0.042, "step": 87935 }, { "epoch": 0.366933431249009, "grad_norm": 0.6943533968064078, "learning_rate": 3.301970974335626e-06, "loss": 0.0344, "step": 87940 }, { "epoch": 0.3669542939639993, "grad_norm": 0.8250223393662678, "learning_rate": 3.3018771072750953e-06, "loss": 0.0283, "step": 87945 }, { "epoch": 0.36697515667898956, "grad_norm": 0.6230252406939598, "learning_rate": 3.3017832482193507e-06, "loss": 0.0234, "step": 87950 }, { "epoch": 0.36699601939397986, "grad_norm": 0.9187941240241071, "learning_rate": 3.3016893971672548e-06, "loss": 0.0255, "step": 87955 }, { "epoch": 0.36701688210897015, "grad_norm": 0.718202976972091, "learning_rate": 3.3015955541176703e-06, "loss": 0.0257, "step": 87960 }, { "epoch": 0.3670377448239604, "grad_norm": 0.8580673999944849, "learning_rate": 3.3015017190694597e-06, "loss": 0.032, "step": 87965 }, { "epoch": 0.3670586075389507, "grad_norm": 0.8712914095353879, "learning_rate": 3.301407892021487e-06, "loss": 0.0324, "step": 87970 }, { "epoch": 0.36707947025394094, "grad_norm": 0.6882217275792561, "learning_rate": 3.301314072972614e-06, "loss": 0.0242, "step": 87975 }, { "epoch": 0.36710033296893124, "grad_norm": 0.9645238065062759, "learning_rate": 3.301220261921705e-06, "loss": 0.0285, "step": 87980 }, { "epoch": 0.36712119568392154, "grad_norm": 0.8287821012657467, "learning_rate": 3.3011264588676244e-06, "loss": 0.0352, "step": 87985 }, { "epoch": 0.3671420583989118, "grad_norm": 0.644464939806371, "learning_rate": 3.3010326638092344e-06, "loss": 0.0269, "step": 87990 }, { "epoch": 0.3671629211139021, "grad_norm": 0.42618806061328257, "learning_rate": 3.3009388767454004e-06, "loss": 0.0232, "step": 87995 }, { "epoch": 0.36718378382889233, "grad_norm": 0.9257208961652018, "learning_rate": 3.300845097674986e-06, "loss": 0.0335, "step": 88000 }, { "epoch": 0.36720464654388263, "grad_norm": 0.4185109322395688, "learning_rate": 3.3007513265968567e-06, "loss": 0.0272, "step": 88005 }, { "epoch": 0.36722550925887293, "grad_norm": 0.4047368921102356, "learning_rate": 3.300657563509877e-06, "loss": 0.0221, "step": 88010 }, { "epoch": 0.3672463719738632, "grad_norm": 0.7372728948161377, "learning_rate": 3.3005638084129114e-06, "loss": 0.0318, "step": 88015 }, { "epoch": 0.36726723468885347, "grad_norm": 0.8490426530933196, "learning_rate": 3.3004700613048257e-06, "loss": 0.0261, "step": 88020 }, { "epoch": 0.36728809740384377, "grad_norm": 0.6939191303331652, "learning_rate": 3.300376322184486e-06, "loss": 0.0312, "step": 88025 }, { "epoch": 0.367308960118834, "grad_norm": 0.7263566995183209, "learning_rate": 3.3002825910507564e-06, "loss": 0.0265, "step": 88030 }, { "epoch": 0.3673298228338243, "grad_norm": 0.8029541421916258, "learning_rate": 3.3001888679025044e-06, "loss": 0.0253, "step": 88035 }, { "epoch": 0.36735068554881456, "grad_norm": 0.8005872390397808, "learning_rate": 3.3000951527385955e-06, "loss": 0.038, "step": 88040 }, { "epoch": 0.36737154826380486, "grad_norm": 0.4206257923775457, "learning_rate": 3.300001445557896e-06, "loss": 0.0211, "step": 88045 }, { "epoch": 0.36739241097879516, "grad_norm": 1.058878305163066, "learning_rate": 3.299907746359273e-06, "loss": 0.035, "step": 88050 }, { "epoch": 0.3674132736937854, "grad_norm": 1.9309032553027305, "learning_rate": 3.2998140551415934e-06, "loss": 0.0304, "step": 88055 }, { "epoch": 0.3674341364087757, "grad_norm": 0.4762132492538773, "learning_rate": 3.2997203719037234e-06, "loss": 0.0257, "step": 88060 }, { "epoch": 0.36745499912376595, "grad_norm": 1.0900578484875676, "learning_rate": 3.299626696644532e-06, "loss": 0.0243, "step": 88065 }, { "epoch": 0.36747586183875625, "grad_norm": 0.49778239043319555, "learning_rate": 3.2995330293628848e-06, "loss": 0.0217, "step": 88070 }, { "epoch": 0.36749672455374655, "grad_norm": 1.1437700872071335, "learning_rate": 3.29943937005765e-06, "loss": 0.0286, "step": 88075 }, { "epoch": 0.3675175872687368, "grad_norm": 0.2635166015836596, "learning_rate": 3.2993457187276965e-06, "loss": 0.0239, "step": 88080 }, { "epoch": 0.3675384499837271, "grad_norm": 1.316208850253697, "learning_rate": 3.299252075371892e-06, "loss": 0.0363, "step": 88085 }, { "epoch": 0.36755931269871733, "grad_norm": 0.38540917622862536, "learning_rate": 3.299158439989105e-06, "loss": 0.0205, "step": 88090 }, { "epoch": 0.36758017541370763, "grad_norm": 0.6477081361958757, "learning_rate": 3.2990648125782047e-06, "loss": 0.0302, "step": 88095 }, { "epoch": 0.36760103812869793, "grad_norm": 0.5989351499990679, "learning_rate": 3.2989711931380593e-06, "loss": 0.0261, "step": 88100 }, { "epoch": 0.3676219008436882, "grad_norm": 0.4051670867685821, "learning_rate": 3.298877581667537e-06, "loss": 0.0275, "step": 88105 }, { "epoch": 0.3676427635586785, "grad_norm": 1.1400375604054973, "learning_rate": 3.298783978165509e-06, "loss": 0.0361, "step": 88110 }, { "epoch": 0.3676636262736688, "grad_norm": 0.5321084313178706, "learning_rate": 3.298690382630844e-06, "loss": 0.0205, "step": 88115 }, { "epoch": 0.367684488988659, "grad_norm": 1.3346162413742302, "learning_rate": 3.2985967950624115e-06, "loss": 0.0353, "step": 88120 }, { "epoch": 0.3677053517036493, "grad_norm": 0.44368993897497994, "learning_rate": 3.2985032154590814e-06, "loss": 0.0268, "step": 88125 }, { "epoch": 0.36772621441863956, "grad_norm": 0.5966281675721242, "learning_rate": 3.2984096438197254e-06, "loss": 0.0281, "step": 88130 }, { "epoch": 0.36774707713362986, "grad_norm": 0.7361685082613989, "learning_rate": 3.2983160801432122e-06, "loss": 0.0254, "step": 88135 }, { "epoch": 0.36776793984862016, "grad_norm": 0.5874977967092574, "learning_rate": 3.298222524428413e-06, "loss": 0.0196, "step": 88140 }, { "epoch": 0.3677888025636104, "grad_norm": 1.4885564820131068, "learning_rate": 3.298128976674199e-06, "loss": 0.0208, "step": 88145 }, { "epoch": 0.3678096652786007, "grad_norm": 0.9759946630265499, "learning_rate": 3.298035436879442e-06, "loss": 0.0299, "step": 88150 }, { "epoch": 0.36783052799359095, "grad_norm": 1.4306755954350105, "learning_rate": 3.2979419050430124e-06, "loss": 0.0353, "step": 88155 }, { "epoch": 0.36785139070858125, "grad_norm": 0.2900842237737827, "learning_rate": 3.2978483811637807e-06, "loss": 0.0238, "step": 88160 }, { "epoch": 0.36787225342357155, "grad_norm": 1.298167286080273, "learning_rate": 3.297754865240621e-06, "loss": 0.0261, "step": 88165 }, { "epoch": 0.3678931161385618, "grad_norm": 0.7745045774245743, "learning_rate": 3.297661357272404e-06, "loss": 0.0304, "step": 88170 }, { "epoch": 0.3679139788535521, "grad_norm": 0.8222130820623396, "learning_rate": 3.297567857258002e-06, "loss": 0.0235, "step": 88175 }, { "epoch": 0.36793484156854234, "grad_norm": 0.732058313425704, "learning_rate": 3.2974743651962883e-06, "loss": 0.0264, "step": 88180 }, { "epoch": 0.36795570428353264, "grad_norm": 0.7778019470495408, "learning_rate": 3.2973808810861348e-06, "loss": 0.025, "step": 88185 }, { "epoch": 0.36797656699852294, "grad_norm": 1.3938858702780588, "learning_rate": 3.297287404926414e-06, "loss": 0.0358, "step": 88190 }, { "epoch": 0.3679974297135132, "grad_norm": 0.3517907180182696, "learning_rate": 3.2971939367160003e-06, "loss": 0.0199, "step": 88195 }, { "epoch": 0.3680182924285035, "grad_norm": 1.5916526557019894, "learning_rate": 3.2971004764537668e-06, "loss": 0.0384, "step": 88200 }, { "epoch": 0.3680391551434938, "grad_norm": 0.7552829815083482, "learning_rate": 3.297007024138586e-06, "loss": 0.0297, "step": 88205 }, { "epoch": 0.368060017858484, "grad_norm": 0.666833799791318, "learning_rate": 3.296913579769333e-06, "loss": 0.022, "step": 88210 }, { "epoch": 0.3680808805734743, "grad_norm": 0.6852546250521229, "learning_rate": 3.296820143344881e-06, "loss": 0.0251, "step": 88215 }, { "epoch": 0.36810174328846457, "grad_norm": 1.0367760301367523, "learning_rate": 3.2967267148641043e-06, "loss": 0.027, "step": 88220 }, { "epoch": 0.36812260600345487, "grad_norm": 1.1153988557322034, "learning_rate": 3.2966332943258783e-06, "loss": 0.0296, "step": 88225 }, { "epoch": 0.36814346871844517, "grad_norm": 0.8990544730070731, "learning_rate": 3.2965398817290762e-06, "loss": 0.0214, "step": 88230 }, { "epoch": 0.3681643314334354, "grad_norm": 0.49792545723117904, "learning_rate": 3.2964464770725745e-06, "loss": 0.0268, "step": 88235 }, { "epoch": 0.3681851941484257, "grad_norm": 1.14374013281516, "learning_rate": 3.296353080355247e-06, "loss": 0.031, "step": 88240 }, { "epoch": 0.36820605686341595, "grad_norm": 0.9254128392936993, "learning_rate": 3.2962596915759702e-06, "loss": 0.0229, "step": 88245 }, { "epoch": 0.36822691957840625, "grad_norm": 0.6965187719887226, "learning_rate": 3.296166310733619e-06, "loss": 0.0381, "step": 88250 }, { "epoch": 0.36824778229339655, "grad_norm": 0.4981950810568741, "learning_rate": 3.2960729378270693e-06, "loss": 0.0217, "step": 88255 }, { "epoch": 0.3682686450083868, "grad_norm": 0.9489119886461108, "learning_rate": 3.295979572855198e-06, "loss": 0.0311, "step": 88260 }, { "epoch": 0.3682895077233771, "grad_norm": 0.6074510492049106, "learning_rate": 3.2958862158168796e-06, "loss": 0.0204, "step": 88265 }, { "epoch": 0.36831037043836734, "grad_norm": 0.705413175821904, "learning_rate": 3.2957928667109923e-06, "loss": 0.0286, "step": 88270 }, { "epoch": 0.36833123315335764, "grad_norm": 0.9490273913202427, "learning_rate": 3.2956995255364123e-06, "loss": 0.0305, "step": 88275 }, { "epoch": 0.36835209586834794, "grad_norm": 0.4298491585458397, "learning_rate": 3.2956061922920157e-06, "loss": 0.0188, "step": 88280 }, { "epoch": 0.3683729585833382, "grad_norm": 0.6996494560278222, "learning_rate": 3.295512866976681e-06, "loss": 0.0245, "step": 88285 }, { "epoch": 0.3683938212983285, "grad_norm": 0.6312087271452016, "learning_rate": 3.295419549589285e-06, "loss": 0.0313, "step": 88290 }, { "epoch": 0.3684146840133188, "grad_norm": 0.5444217588055641, "learning_rate": 3.295326240128705e-06, "loss": 0.0259, "step": 88295 }, { "epoch": 0.368435546728309, "grad_norm": 1.472729816994238, "learning_rate": 3.2952329385938184e-06, "loss": 0.0264, "step": 88300 }, { "epoch": 0.3684564094432993, "grad_norm": 0.8449795153875416, "learning_rate": 3.295139644983505e-06, "loss": 0.0275, "step": 88305 }, { "epoch": 0.36847727215828957, "grad_norm": 0.36944726385683235, "learning_rate": 3.295046359296642e-06, "loss": 0.0279, "step": 88310 }, { "epoch": 0.36849813487327987, "grad_norm": 0.5932132795445566, "learning_rate": 3.2949530815321075e-06, "loss": 0.0237, "step": 88315 }, { "epoch": 0.36851899758827017, "grad_norm": 1.2064028513354388, "learning_rate": 3.2948598116887805e-06, "loss": 0.0472, "step": 88320 }, { "epoch": 0.3685398603032604, "grad_norm": 0.4561299963033043, "learning_rate": 3.29476654976554e-06, "loss": 0.0285, "step": 88325 }, { "epoch": 0.3685607230182507, "grad_norm": 0.5473437214586249, "learning_rate": 3.2946732957612656e-06, "loss": 0.0303, "step": 88330 }, { "epoch": 0.36858158573324096, "grad_norm": 1.0359145103600333, "learning_rate": 3.294580049674836e-06, "loss": 0.0284, "step": 88335 }, { "epoch": 0.36860244844823126, "grad_norm": 1.360741148962947, "learning_rate": 3.294486811505131e-06, "loss": 0.0272, "step": 88340 }, { "epoch": 0.36862331116322156, "grad_norm": 1.9629401334746623, "learning_rate": 3.2943935812510314e-06, "loss": 0.0289, "step": 88345 }, { "epoch": 0.3686441738782118, "grad_norm": 0.7613236056926064, "learning_rate": 3.2943003589114157e-06, "loss": 0.0244, "step": 88350 }, { "epoch": 0.3686650365932021, "grad_norm": 1.140821679746065, "learning_rate": 3.2942071444851646e-06, "loss": 0.0264, "step": 88355 }, { "epoch": 0.36868589930819234, "grad_norm": 0.6084234420788542, "learning_rate": 3.294113937971159e-06, "loss": 0.022, "step": 88360 }, { "epoch": 0.36870676202318264, "grad_norm": 0.43438601489211676, "learning_rate": 3.29402073936828e-06, "loss": 0.0253, "step": 88365 }, { "epoch": 0.36872762473817294, "grad_norm": 0.48259407768648954, "learning_rate": 3.293927548675408e-06, "loss": 0.0262, "step": 88370 }, { "epoch": 0.3687484874531632, "grad_norm": 0.8185605576770614, "learning_rate": 3.2938343658914236e-06, "loss": 0.0213, "step": 88375 }, { "epoch": 0.3687693501681535, "grad_norm": 1.7518671321673542, "learning_rate": 3.293741191015209e-06, "loss": 0.0324, "step": 88380 }, { "epoch": 0.3687902128831438, "grad_norm": 0.6977910090582177, "learning_rate": 3.293648024045645e-06, "loss": 0.0243, "step": 88385 }, { "epoch": 0.36881107559813403, "grad_norm": 0.6160199012683969, "learning_rate": 3.2935548649816153e-06, "loss": 0.0294, "step": 88390 }, { "epoch": 0.36883193831312433, "grad_norm": 0.5208867382926785, "learning_rate": 3.293461713822e-06, "loss": 0.0258, "step": 88395 }, { "epoch": 0.3688528010281146, "grad_norm": 0.8450706763338788, "learning_rate": 3.293368570565681e-06, "loss": 0.0292, "step": 88400 }, { "epoch": 0.3688736637431049, "grad_norm": 0.5806520813575042, "learning_rate": 3.2932754352115433e-06, "loss": 0.0254, "step": 88405 }, { "epoch": 0.3688945264580952, "grad_norm": 0.8878188920184918, "learning_rate": 3.2931823077584667e-06, "loss": 0.0302, "step": 88410 }, { "epoch": 0.3689153891730854, "grad_norm": 0.7367939381409203, "learning_rate": 3.293089188205336e-06, "loss": 0.0317, "step": 88415 }, { "epoch": 0.3689362518880757, "grad_norm": 0.8813903151132647, "learning_rate": 3.2929960765510346e-06, "loss": 0.0277, "step": 88420 }, { "epoch": 0.36895711460306596, "grad_norm": 0.9445379772078915, "learning_rate": 3.2929029727944445e-06, "loss": 0.0334, "step": 88425 }, { "epoch": 0.36897797731805626, "grad_norm": 1.2881481331233218, "learning_rate": 3.29280987693445e-06, "loss": 0.0286, "step": 88430 }, { "epoch": 0.36899884003304656, "grad_norm": 0.6867692950701093, "learning_rate": 3.2927167889699346e-06, "loss": 0.0298, "step": 88435 }, { "epoch": 0.3690197027480368, "grad_norm": 0.8701088297805732, "learning_rate": 3.2926237088997833e-06, "loss": 0.0374, "step": 88440 }, { "epoch": 0.3690405654630271, "grad_norm": 0.5746380625362885, "learning_rate": 3.2925306367228787e-06, "loss": 0.0213, "step": 88445 }, { "epoch": 0.36906142817801735, "grad_norm": 0.5542940299017672, "learning_rate": 3.2924375724381066e-06, "loss": 0.0226, "step": 88450 }, { "epoch": 0.36908229089300765, "grad_norm": 0.813161698005603, "learning_rate": 3.2923445160443514e-06, "loss": 0.0239, "step": 88455 }, { "epoch": 0.36910315360799795, "grad_norm": 1.5503012711409752, "learning_rate": 3.2922514675404977e-06, "loss": 0.0324, "step": 88460 }, { "epoch": 0.3691240163229882, "grad_norm": 0.5882339859256449, "learning_rate": 3.2921584269254315e-06, "loss": 0.0236, "step": 88465 }, { "epoch": 0.3691448790379785, "grad_norm": 0.6522169364386008, "learning_rate": 3.292065394198036e-06, "loss": 0.0331, "step": 88470 }, { "epoch": 0.3691657417529688, "grad_norm": 0.9544844369421821, "learning_rate": 3.2919723693571998e-06, "loss": 0.0236, "step": 88475 }, { "epoch": 0.36918660446795903, "grad_norm": 0.9073857579153404, "learning_rate": 3.291879352401806e-06, "loss": 0.022, "step": 88480 }, { "epoch": 0.36920746718294933, "grad_norm": 0.7588725985721715, "learning_rate": 3.2917863433307427e-06, "loss": 0.0269, "step": 88485 }, { "epoch": 0.3692283298979396, "grad_norm": 0.7498139085877554, "learning_rate": 3.2916933421428953e-06, "loss": 0.0185, "step": 88490 }, { "epoch": 0.3692491926129299, "grad_norm": 1.2796430279402002, "learning_rate": 3.29160034883715e-06, "loss": 0.0261, "step": 88495 }, { "epoch": 0.3692700553279202, "grad_norm": 0.931068322507051, "learning_rate": 3.2915073634123935e-06, "loss": 0.0237, "step": 88500 }, { "epoch": 0.3692909180429104, "grad_norm": 1.1409009248207507, "learning_rate": 3.291414385867513e-06, "loss": 0.0253, "step": 88505 }, { "epoch": 0.3693117807579007, "grad_norm": 0.7443887193596248, "learning_rate": 3.2913214162013955e-06, "loss": 0.0182, "step": 88510 }, { "epoch": 0.36933264347289096, "grad_norm": 0.7698393666252511, "learning_rate": 3.2912284544129286e-06, "loss": 0.0279, "step": 88515 }, { "epoch": 0.36935350618788126, "grad_norm": 1.5950754950639152, "learning_rate": 3.2911355005009986e-06, "loss": 0.0314, "step": 88520 }, { "epoch": 0.36937436890287156, "grad_norm": 1.3811012995934953, "learning_rate": 3.291042554464496e-06, "loss": 0.0274, "step": 88525 }, { "epoch": 0.3693952316178618, "grad_norm": 0.8287744335813367, "learning_rate": 3.2909496163023057e-06, "loss": 0.022, "step": 88530 }, { "epoch": 0.3694160943328521, "grad_norm": 0.4945998714938902, "learning_rate": 3.290856686013318e-06, "loss": 0.022, "step": 88535 }, { "epoch": 0.36943695704784235, "grad_norm": 0.8208686984187935, "learning_rate": 3.2907637635964203e-06, "loss": 0.0283, "step": 88540 }, { "epoch": 0.36945781976283265, "grad_norm": 0.4483501192740141, "learning_rate": 3.290670849050501e-06, "loss": 0.0204, "step": 88545 }, { "epoch": 0.36947868247782295, "grad_norm": 0.5349853121030058, "learning_rate": 3.290577942374451e-06, "loss": 0.0246, "step": 88550 }, { "epoch": 0.3694995451928132, "grad_norm": 0.7997124372611683, "learning_rate": 3.290485043567157e-06, "loss": 0.0239, "step": 88555 }, { "epoch": 0.3695204079078035, "grad_norm": 1.3892211897322038, "learning_rate": 3.29039215262751e-06, "loss": 0.0324, "step": 88560 }, { "epoch": 0.3695412706227938, "grad_norm": 0.9389055677598016, "learning_rate": 3.290299269554398e-06, "loss": 0.0281, "step": 88565 }, { "epoch": 0.36956213333778404, "grad_norm": 0.5213721509814094, "learning_rate": 3.290206394346712e-06, "loss": 0.0209, "step": 88570 }, { "epoch": 0.36958299605277434, "grad_norm": 1.240785940508175, "learning_rate": 3.2901135270033425e-06, "loss": 0.0302, "step": 88575 }, { "epoch": 0.3696038587677646, "grad_norm": 0.9316765594808116, "learning_rate": 3.290020667523178e-06, "loss": 0.029, "step": 88580 }, { "epoch": 0.3696247214827549, "grad_norm": 0.83945156349266, "learning_rate": 3.2899278159051096e-06, "loss": 0.0296, "step": 88585 }, { "epoch": 0.3696455841977452, "grad_norm": 0.5039812925642678, "learning_rate": 3.289834972148027e-06, "loss": 0.0288, "step": 88590 }, { "epoch": 0.3696664469127354, "grad_norm": 0.6577781535916406, "learning_rate": 3.289742136250824e-06, "loss": 0.0313, "step": 88595 }, { "epoch": 0.3696873096277257, "grad_norm": 0.6719424102569093, "learning_rate": 3.289649308212389e-06, "loss": 0.0246, "step": 88600 }, { "epoch": 0.36970817234271597, "grad_norm": 1.0236224672609213, "learning_rate": 3.2895564880316133e-06, "loss": 0.0322, "step": 88605 }, { "epoch": 0.36972903505770627, "grad_norm": 1.3134917703490534, "learning_rate": 3.28946367570739e-06, "loss": 0.0352, "step": 88610 }, { "epoch": 0.36974989777269657, "grad_norm": 0.8093995568800536, "learning_rate": 3.28937087123861e-06, "loss": 0.0275, "step": 88615 }, { "epoch": 0.3697707604876868, "grad_norm": 0.5327398138454718, "learning_rate": 3.289278074624165e-06, "loss": 0.0247, "step": 88620 }, { "epoch": 0.3697916232026771, "grad_norm": 0.7328372565949676, "learning_rate": 3.2891852858629475e-06, "loss": 0.0282, "step": 88625 }, { "epoch": 0.36981248591766736, "grad_norm": 0.7388316178167647, "learning_rate": 3.2890925049538496e-06, "loss": 0.0287, "step": 88630 }, { "epoch": 0.36983334863265765, "grad_norm": 0.40083478532411915, "learning_rate": 3.2889997318957646e-06, "loss": 0.0183, "step": 88635 }, { "epoch": 0.36985421134764795, "grad_norm": 0.722095886824507, "learning_rate": 3.288906966687584e-06, "loss": 0.0265, "step": 88640 }, { "epoch": 0.3698750740626382, "grad_norm": 0.5816247068722525, "learning_rate": 3.2888142093282023e-06, "loss": 0.026, "step": 88645 }, { "epoch": 0.3698959367776285, "grad_norm": 0.6791389911066601, "learning_rate": 3.2887214598165114e-06, "loss": 0.0318, "step": 88650 }, { "epoch": 0.3699167994926188, "grad_norm": 0.6170726641028224, "learning_rate": 3.288628718151406e-06, "loss": 0.0252, "step": 88655 }, { "epoch": 0.36993766220760904, "grad_norm": 0.3839170143839961, "learning_rate": 3.2885359843317794e-06, "loss": 0.0276, "step": 88660 }, { "epoch": 0.36995852492259934, "grad_norm": 1.8119989326550532, "learning_rate": 3.288443258356525e-06, "loss": 0.0368, "step": 88665 }, { "epoch": 0.3699793876375896, "grad_norm": 0.6984888098999347, "learning_rate": 3.288350540224538e-06, "loss": 0.0232, "step": 88670 }, { "epoch": 0.3700002503525799, "grad_norm": 0.8896424207206809, "learning_rate": 3.288257829934711e-06, "loss": 0.0229, "step": 88675 }, { "epoch": 0.3700211130675702, "grad_norm": 0.5532809160599803, "learning_rate": 3.288165127485941e-06, "loss": 0.0268, "step": 88680 }, { "epoch": 0.37004197578256043, "grad_norm": 0.657573807389353, "learning_rate": 3.2880724328771207e-06, "loss": 0.0318, "step": 88685 }, { "epoch": 0.37006283849755073, "grad_norm": 0.8240558582012892, "learning_rate": 3.287979746107146e-06, "loss": 0.0273, "step": 88690 }, { "epoch": 0.37008370121254097, "grad_norm": 0.6776414120042724, "learning_rate": 3.2878870671749124e-06, "loss": 0.0329, "step": 88695 }, { "epoch": 0.37010456392753127, "grad_norm": 0.9188039843605024, "learning_rate": 3.287794396079314e-06, "loss": 0.0293, "step": 88700 }, { "epoch": 0.37012542664252157, "grad_norm": 1.009573616086084, "learning_rate": 3.287701732819248e-06, "loss": 0.0351, "step": 88705 }, { "epoch": 0.3701462893575118, "grad_norm": 0.7121190712334704, "learning_rate": 3.2876090773936093e-06, "loss": 0.0255, "step": 88710 }, { "epoch": 0.3701671520725021, "grad_norm": 0.5601928802010575, "learning_rate": 3.2875164298012953e-06, "loss": 0.0256, "step": 88715 }, { "epoch": 0.37018801478749236, "grad_norm": 0.521358748002474, "learning_rate": 3.2874237900412003e-06, "loss": 0.0235, "step": 88720 }, { "epoch": 0.37020887750248266, "grad_norm": 0.9606676575294666, "learning_rate": 3.287331158112222e-06, "loss": 0.0307, "step": 88725 }, { "epoch": 0.37022974021747296, "grad_norm": 0.6876778959095461, "learning_rate": 3.287238534013257e-06, "loss": 0.0388, "step": 88730 }, { "epoch": 0.3702506029324632, "grad_norm": 0.5909483499810964, "learning_rate": 3.287145917743203e-06, "loss": 0.0249, "step": 88735 }, { "epoch": 0.3702714656474535, "grad_norm": 0.8690715742199072, "learning_rate": 3.2870533093009565e-06, "loss": 0.0283, "step": 88740 }, { "epoch": 0.3702923283624438, "grad_norm": 0.45186485308004887, "learning_rate": 3.286960708685414e-06, "loss": 0.0297, "step": 88745 }, { "epoch": 0.37031319107743405, "grad_norm": 1.2434534856154225, "learning_rate": 3.286868115895475e-06, "loss": 0.0328, "step": 88750 }, { "epoch": 0.37033405379242434, "grad_norm": 0.7140863779174702, "learning_rate": 3.2867755309300362e-06, "loss": 0.0253, "step": 88755 }, { "epoch": 0.3703549165074146, "grad_norm": 0.9756612584382394, "learning_rate": 3.2866829537879953e-06, "loss": 0.0364, "step": 88760 }, { "epoch": 0.3703757792224049, "grad_norm": 1.288815886483723, "learning_rate": 3.2865903844682515e-06, "loss": 0.0276, "step": 88765 }, { "epoch": 0.3703966419373952, "grad_norm": 0.7408173011562954, "learning_rate": 3.2864978229697025e-06, "loss": 0.0235, "step": 88770 }, { "epoch": 0.37041750465238543, "grad_norm": 0.6816936566930899, "learning_rate": 3.286405269291248e-06, "loss": 0.0246, "step": 88775 }, { "epoch": 0.37043836736737573, "grad_norm": 0.4573113671862073, "learning_rate": 3.286312723431786e-06, "loss": 0.0177, "step": 88780 }, { "epoch": 0.370459230082366, "grad_norm": 0.5771123178353869, "learning_rate": 3.286220185390216e-06, "loss": 0.0213, "step": 88785 }, { "epoch": 0.3704800927973563, "grad_norm": 0.8593932139058069, "learning_rate": 3.2861276551654368e-06, "loss": 0.0246, "step": 88790 }, { "epoch": 0.3705009555123466, "grad_norm": 0.7361808304894079, "learning_rate": 3.2860351327563487e-06, "loss": 0.0296, "step": 88795 }, { "epoch": 0.3705218182273368, "grad_norm": 0.5125554153790883, "learning_rate": 3.2859426181618514e-06, "loss": 0.0328, "step": 88800 }, { "epoch": 0.3705426809423271, "grad_norm": 0.7621683684731102, "learning_rate": 3.285850111380845e-06, "loss": 0.0295, "step": 88805 }, { "epoch": 0.37056354365731736, "grad_norm": 0.9147286805231926, "learning_rate": 3.2857576124122297e-06, "loss": 0.031, "step": 88810 }, { "epoch": 0.37058440637230766, "grad_norm": 0.5920285888365886, "learning_rate": 3.285665121254905e-06, "loss": 0.0408, "step": 88815 }, { "epoch": 0.37060526908729796, "grad_norm": 4.9956555070981565, "learning_rate": 3.285572637907773e-06, "loss": 0.0378, "step": 88820 }, { "epoch": 0.3706261318022882, "grad_norm": 0.5158304735299103, "learning_rate": 3.2854801623697337e-06, "loss": 0.0264, "step": 88825 }, { "epoch": 0.3706469945172785, "grad_norm": 0.6822393435291789, "learning_rate": 3.2853876946396877e-06, "loss": 0.0265, "step": 88830 }, { "epoch": 0.3706678572322688, "grad_norm": 0.8423621761966804, "learning_rate": 3.2852952347165373e-06, "loss": 0.0285, "step": 88835 }, { "epoch": 0.37068871994725905, "grad_norm": 1.027335213126171, "learning_rate": 3.285202782599184e-06, "loss": 0.0302, "step": 88840 }, { "epoch": 0.37070958266224935, "grad_norm": 0.534838719344742, "learning_rate": 3.285110338286529e-06, "loss": 0.0288, "step": 88845 }, { "epoch": 0.3707304453772396, "grad_norm": 0.8933150719705275, "learning_rate": 3.2850179017774745e-06, "loss": 0.0279, "step": 88850 }, { "epoch": 0.3707513080922299, "grad_norm": 0.8548159754550707, "learning_rate": 3.284925473070923e-06, "loss": 0.0394, "step": 88855 }, { "epoch": 0.3707721708072202, "grad_norm": 0.9260096979596438, "learning_rate": 3.284833052165776e-06, "loss": 0.0241, "step": 88860 }, { "epoch": 0.37079303352221044, "grad_norm": 0.8294392904859299, "learning_rate": 3.284740639060937e-06, "loss": 0.0294, "step": 88865 }, { "epoch": 0.37081389623720074, "grad_norm": 0.6915805715073197, "learning_rate": 3.284648233755309e-06, "loss": 0.0293, "step": 88870 }, { "epoch": 0.370834758952191, "grad_norm": 0.6246595866897865, "learning_rate": 3.2845558362477937e-06, "loss": 0.0331, "step": 88875 }, { "epoch": 0.3708556216671813, "grad_norm": 1.028667365238976, "learning_rate": 3.284463446537295e-06, "loss": 0.0318, "step": 88880 }, { "epoch": 0.3708764843821716, "grad_norm": 0.6787192858156477, "learning_rate": 3.2843710646227165e-06, "loss": 0.0268, "step": 88885 }, { "epoch": 0.3708973470971618, "grad_norm": 0.8551898611178665, "learning_rate": 3.2842786905029627e-06, "loss": 0.0438, "step": 88890 }, { "epoch": 0.3709182098121521, "grad_norm": 0.5912590154620962, "learning_rate": 3.284186324176936e-06, "loss": 0.0311, "step": 88895 }, { "epoch": 0.37093907252714237, "grad_norm": 0.8065851756897849, "learning_rate": 3.2840939656435422e-06, "loss": 0.032, "step": 88900 }, { "epoch": 0.37095993524213267, "grad_norm": 0.5746355480889228, "learning_rate": 3.2840016149016835e-06, "loss": 0.0203, "step": 88905 }, { "epoch": 0.37098079795712297, "grad_norm": 0.5469169191799266, "learning_rate": 3.2839092719502662e-06, "loss": 0.0324, "step": 88910 }, { "epoch": 0.3710016606721132, "grad_norm": 0.6278892166794439, "learning_rate": 3.2838169367881945e-06, "loss": 0.0265, "step": 88915 }, { "epoch": 0.3710225233871035, "grad_norm": 1.1928144874438746, "learning_rate": 3.2837246094143734e-06, "loss": 0.0263, "step": 88920 }, { "epoch": 0.3710433861020938, "grad_norm": 0.7650914838207237, "learning_rate": 3.283632289827708e-06, "loss": 0.0232, "step": 88925 }, { "epoch": 0.37106424881708405, "grad_norm": 0.9413245754107961, "learning_rate": 3.2835399780271037e-06, "loss": 0.0226, "step": 88930 }, { "epoch": 0.37108511153207435, "grad_norm": 0.6654536165419501, "learning_rate": 3.2834476740114664e-06, "loss": 0.0265, "step": 88935 }, { "epoch": 0.3711059742470646, "grad_norm": 1.012771153826514, "learning_rate": 3.283355377779701e-06, "loss": 0.0263, "step": 88940 }, { "epoch": 0.3711268369620549, "grad_norm": 0.7904243637477768, "learning_rate": 3.283263089330715e-06, "loss": 0.0252, "step": 88945 }, { "epoch": 0.3711476996770452, "grad_norm": 0.526528108813607, "learning_rate": 3.2831708086634133e-06, "loss": 0.0222, "step": 88950 }, { "epoch": 0.37116856239203544, "grad_norm": 0.6494417781942375, "learning_rate": 3.2830785357767035e-06, "loss": 0.0369, "step": 88955 }, { "epoch": 0.37118942510702574, "grad_norm": 0.474928484717162, "learning_rate": 3.282986270669492e-06, "loss": 0.0335, "step": 88960 }, { "epoch": 0.371210287822016, "grad_norm": 0.8474527668910387, "learning_rate": 3.282894013340685e-06, "loss": 0.0259, "step": 88965 }, { "epoch": 0.3712311505370063, "grad_norm": 0.8078723107243942, "learning_rate": 3.282801763789191e-06, "loss": 0.0211, "step": 88970 }, { "epoch": 0.3712520132519966, "grad_norm": 0.7799969095941304, "learning_rate": 3.282709522013916e-06, "loss": 0.0363, "step": 88975 }, { "epoch": 0.3712728759669868, "grad_norm": 0.707457500017912, "learning_rate": 3.2826172880137687e-06, "loss": 0.0302, "step": 88980 }, { "epoch": 0.3712937386819771, "grad_norm": 0.7727725017954444, "learning_rate": 3.2825250617876547e-06, "loss": 0.0264, "step": 88985 }, { "epoch": 0.37131460139696737, "grad_norm": 0.5539292435320569, "learning_rate": 3.2824328433344848e-06, "loss": 0.0343, "step": 88990 }, { "epoch": 0.37133546411195767, "grad_norm": 0.7362981642129611, "learning_rate": 3.2823406326531665e-06, "loss": 0.0348, "step": 88995 }, { "epoch": 0.37135632682694797, "grad_norm": 0.9091762708920912, "learning_rate": 3.2822484297426064e-06, "loss": 0.0253, "step": 89000 }, { "epoch": 0.3713771895419382, "grad_norm": 0.7577991718881815, "learning_rate": 3.282156234601715e-06, "loss": 0.0358, "step": 89005 }, { "epoch": 0.3713980522569285, "grad_norm": 0.4377445933633528, "learning_rate": 3.2820640472294003e-06, "loss": 0.0182, "step": 89010 }, { "epoch": 0.3714189149719188, "grad_norm": 0.5318297549565139, "learning_rate": 3.2819718676245722e-06, "loss": 0.0264, "step": 89015 }, { "epoch": 0.37143977768690906, "grad_norm": 0.9701291727229361, "learning_rate": 3.281879695786139e-06, "loss": 0.0333, "step": 89020 }, { "epoch": 0.37146064040189936, "grad_norm": 1.3594656210382918, "learning_rate": 3.281787531713011e-06, "loss": 0.0279, "step": 89025 }, { "epoch": 0.3714815031168896, "grad_norm": 1.0222601124372328, "learning_rate": 3.281695375404097e-06, "loss": 0.0279, "step": 89030 }, { "epoch": 0.3715023658318799, "grad_norm": 0.9194292317255863, "learning_rate": 3.2816032268583075e-06, "loss": 0.0273, "step": 89035 }, { "epoch": 0.3715232285468702, "grad_norm": 1.1134634710386908, "learning_rate": 3.281511086074553e-06, "loss": 0.0271, "step": 89040 }, { "epoch": 0.37154409126186044, "grad_norm": 0.8623174246692483, "learning_rate": 3.2814189530517428e-06, "loss": 0.0273, "step": 89045 }, { "epoch": 0.37156495397685074, "grad_norm": 3.889367266159292, "learning_rate": 3.2813268277887883e-06, "loss": 0.0235, "step": 89050 }, { "epoch": 0.371585816691841, "grad_norm": 0.830846924432812, "learning_rate": 3.2812347102846005e-06, "loss": 0.0277, "step": 89055 }, { "epoch": 0.3716066794068313, "grad_norm": 1.1115276212817566, "learning_rate": 3.2811426005380893e-06, "loss": 0.0307, "step": 89060 }, { "epoch": 0.3716275421218216, "grad_norm": 0.9481551515628586, "learning_rate": 3.281050498548167e-06, "loss": 0.0305, "step": 89065 }, { "epoch": 0.37164840483681183, "grad_norm": 1.0635486090378652, "learning_rate": 3.2809584043137443e-06, "loss": 0.0231, "step": 89070 }, { "epoch": 0.37166926755180213, "grad_norm": 0.7075774431535113, "learning_rate": 3.2808663178337336e-06, "loss": 0.0272, "step": 89075 }, { "epoch": 0.3716901302667924, "grad_norm": 0.8654920997054342, "learning_rate": 3.2807742391070457e-06, "loss": 0.029, "step": 89080 }, { "epoch": 0.3717109929817827, "grad_norm": 1.4238769207383304, "learning_rate": 3.280682168132594e-06, "loss": 0.0304, "step": 89085 }, { "epoch": 0.371731855696773, "grad_norm": 1.3412056604385258, "learning_rate": 3.280590104909289e-06, "loss": 0.0239, "step": 89090 }, { "epoch": 0.3717527184117632, "grad_norm": 0.9303456478519997, "learning_rate": 3.280498049436045e-06, "loss": 0.0254, "step": 89095 }, { "epoch": 0.3717735811267535, "grad_norm": 0.7309053866505011, "learning_rate": 3.2804060017117733e-06, "loss": 0.0256, "step": 89100 }, { "epoch": 0.3717944438417438, "grad_norm": 0.626597199340659, "learning_rate": 3.280313961735388e-06, "loss": 0.0193, "step": 89105 }, { "epoch": 0.37181530655673406, "grad_norm": 0.803098418179389, "learning_rate": 3.2802219295058013e-06, "loss": 0.0254, "step": 89110 }, { "epoch": 0.37183616927172436, "grad_norm": 0.7811961120352369, "learning_rate": 3.2801299050219273e-06, "loss": 0.0276, "step": 89115 }, { "epoch": 0.3718570319867146, "grad_norm": 0.9864289891024555, "learning_rate": 3.2800378882826783e-06, "loss": 0.0248, "step": 89120 }, { "epoch": 0.3718778947017049, "grad_norm": 0.5808727100379812, "learning_rate": 3.2799458792869695e-06, "loss": 0.0248, "step": 89125 }, { "epoch": 0.3718987574166952, "grad_norm": 1.026039128870829, "learning_rate": 3.279853878033714e-06, "loss": 0.0268, "step": 89130 }, { "epoch": 0.37191962013168545, "grad_norm": 0.7197626993347964, "learning_rate": 3.2797618845218267e-06, "loss": 0.0234, "step": 89135 }, { "epoch": 0.37194048284667575, "grad_norm": 1.840546697027144, "learning_rate": 3.2796698987502213e-06, "loss": 0.0272, "step": 89140 }, { "epoch": 0.371961345561666, "grad_norm": 0.8671102659678117, "learning_rate": 3.2795779207178124e-06, "loss": 0.0269, "step": 89145 }, { "epoch": 0.3719822082766563, "grad_norm": 0.9599321888689752, "learning_rate": 3.2794859504235154e-06, "loss": 0.0241, "step": 89150 }, { "epoch": 0.3720030709916466, "grad_norm": 1.048968889128117, "learning_rate": 3.2793939878662456e-06, "loss": 0.0359, "step": 89155 }, { "epoch": 0.37202393370663683, "grad_norm": 1.0422756283118535, "learning_rate": 3.279302033044917e-06, "loss": 0.0288, "step": 89160 }, { "epoch": 0.37204479642162713, "grad_norm": 9.841535500955693, "learning_rate": 3.2792100859584457e-06, "loss": 0.027, "step": 89165 }, { "epoch": 0.3720656591366174, "grad_norm": 0.9955305174459008, "learning_rate": 3.279118146605749e-06, "loss": 0.0221, "step": 89170 }, { "epoch": 0.3720865218516077, "grad_norm": 0.4303299073838551, "learning_rate": 3.27902621498574e-06, "loss": 0.0194, "step": 89175 }, { "epoch": 0.372107384566598, "grad_norm": 1.420657848192718, "learning_rate": 3.2789342910973364e-06, "loss": 0.0305, "step": 89180 }, { "epoch": 0.3721282472815882, "grad_norm": 1.4160016921917982, "learning_rate": 3.278842374939455e-06, "loss": 0.0238, "step": 89185 }, { "epoch": 0.3721491099965785, "grad_norm": 1.1283268914444793, "learning_rate": 3.2787504665110103e-06, "loss": 0.0367, "step": 89190 }, { "epoch": 0.3721699727115688, "grad_norm": 0.5934782169686907, "learning_rate": 3.278658565810921e-06, "loss": 0.0227, "step": 89195 }, { "epoch": 0.37219083542655906, "grad_norm": 1.3367782535202168, "learning_rate": 3.278566672838104e-06, "loss": 0.0296, "step": 89200 }, { "epoch": 0.37221169814154936, "grad_norm": 1.0171949481862943, "learning_rate": 3.278474787591475e-06, "loss": 0.0285, "step": 89205 }, { "epoch": 0.3722325608565396, "grad_norm": 0.4849276463343103, "learning_rate": 3.2783829100699525e-06, "loss": 0.0292, "step": 89210 }, { "epoch": 0.3722534235715299, "grad_norm": 1.2104874508862502, "learning_rate": 3.2782910402724538e-06, "loss": 0.0341, "step": 89215 }, { "epoch": 0.3722742862865202, "grad_norm": 0.9745080939689554, "learning_rate": 3.2781991781978974e-06, "loss": 0.0242, "step": 89220 }, { "epoch": 0.37229514900151045, "grad_norm": 0.720712155496809, "learning_rate": 3.2781073238452e-06, "loss": 0.0249, "step": 89225 }, { "epoch": 0.37231601171650075, "grad_norm": 0.7497878260680678, "learning_rate": 3.278015477213281e-06, "loss": 0.0233, "step": 89230 }, { "epoch": 0.372336874431491, "grad_norm": 0.6293698871193933, "learning_rate": 3.2779236383010588e-06, "loss": 0.0318, "step": 89235 }, { "epoch": 0.3723577371464813, "grad_norm": 0.9425925223383345, "learning_rate": 3.2778318071074504e-06, "loss": 0.0299, "step": 89240 }, { "epoch": 0.3723785998614716, "grad_norm": 0.8314469224233042, "learning_rate": 3.2777399836313774e-06, "loss": 0.0278, "step": 89245 }, { "epoch": 0.37239946257646184, "grad_norm": 0.6435981496400908, "learning_rate": 3.2776481678717562e-06, "loss": 0.0289, "step": 89250 }, { "epoch": 0.37242032529145214, "grad_norm": 0.7762861560694193, "learning_rate": 3.2775563598275084e-06, "loss": 0.0218, "step": 89255 }, { "epoch": 0.3724411880064424, "grad_norm": 0.9213027879573499, "learning_rate": 3.2774645594975516e-06, "loss": 0.0208, "step": 89260 }, { "epoch": 0.3724620507214327, "grad_norm": 0.8020184907534621, "learning_rate": 3.2773727668808065e-06, "loss": 0.0366, "step": 89265 }, { "epoch": 0.372482913436423, "grad_norm": 1.146534077577, "learning_rate": 3.277280981976193e-06, "loss": 0.0306, "step": 89270 }, { "epoch": 0.3725037761514132, "grad_norm": 1.0630324072840571, "learning_rate": 3.277189204782631e-06, "loss": 0.0223, "step": 89275 }, { "epoch": 0.3725246388664035, "grad_norm": 0.7755070182408693, "learning_rate": 3.2770974352990416e-06, "loss": 0.0244, "step": 89280 }, { "epoch": 0.3725455015813938, "grad_norm": 0.48491226693751716, "learning_rate": 3.2770056735243446e-06, "loss": 0.0257, "step": 89285 }, { "epoch": 0.37256636429638407, "grad_norm": 1.0810419684410106, "learning_rate": 3.27691391945746e-06, "loss": 0.0253, "step": 89290 }, { "epoch": 0.37258722701137437, "grad_norm": 0.3050451267863059, "learning_rate": 3.2768221730973105e-06, "loss": 0.0387, "step": 89295 }, { "epoch": 0.3726080897263646, "grad_norm": 1.20435398881812, "learning_rate": 3.2767304344428165e-06, "loss": 0.0303, "step": 89300 }, { "epoch": 0.3726289524413549, "grad_norm": 0.5447483402065288, "learning_rate": 3.276638703492899e-06, "loss": 0.0224, "step": 89305 }, { "epoch": 0.3726498151563452, "grad_norm": 0.7838854400631621, "learning_rate": 3.2765469802464807e-06, "loss": 0.0263, "step": 89310 }, { "epoch": 0.37267067787133545, "grad_norm": 0.6022256454924197, "learning_rate": 3.2764552647024828e-06, "loss": 0.02, "step": 89315 }, { "epoch": 0.37269154058632575, "grad_norm": 0.7884158102589667, "learning_rate": 3.2763635568598274e-06, "loss": 0.0215, "step": 89320 }, { "epoch": 0.372712403301316, "grad_norm": 0.5736224214093077, "learning_rate": 3.2762718567174363e-06, "loss": 0.0252, "step": 89325 }, { "epoch": 0.3727332660163063, "grad_norm": 1.1997613422742337, "learning_rate": 3.2761801642742325e-06, "loss": 0.0219, "step": 89330 }, { "epoch": 0.3727541287312966, "grad_norm": 0.3761918447526109, "learning_rate": 3.276088479529138e-06, "loss": 0.0289, "step": 89335 }, { "epoch": 0.37277499144628684, "grad_norm": 0.5404888658132009, "learning_rate": 3.275996802481077e-06, "loss": 0.0201, "step": 89340 }, { "epoch": 0.37279585416127714, "grad_norm": 1.0493229765831158, "learning_rate": 3.275905133128972e-06, "loss": 0.0329, "step": 89345 }, { "epoch": 0.3728167168762674, "grad_norm": 0.5879115441226574, "learning_rate": 3.275813471471746e-06, "loss": 0.037, "step": 89350 }, { "epoch": 0.3728375795912577, "grad_norm": 0.7704909847104952, "learning_rate": 3.2757218175083223e-06, "loss": 0.0253, "step": 89355 }, { "epoch": 0.372858442306248, "grad_norm": 1.2172631231920557, "learning_rate": 3.2756301712376252e-06, "loss": 0.0227, "step": 89360 }, { "epoch": 0.3728793050212382, "grad_norm": 1.4615741839118848, "learning_rate": 3.2755385326585785e-06, "loss": 0.0382, "step": 89365 }, { "epoch": 0.3729001677362285, "grad_norm": 0.6878213934492596, "learning_rate": 3.2754469017701065e-06, "loss": 0.0304, "step": 89370 }, { "epoch": 0.3729210304512188, "grad_norm": 0.5920705285422903, "learning_rate": 3.2753552785711333e-06, "loss": 0.0378, "step": 89375 }, { "epoch": 0.37294189316620907, "grad_norm": 15.018862808485732, "learning_rate": 3.275263663060584e-06, "loss": 0.0316, "step": 89380 }, { "epoch": 0.37296275588119937, "grad_norm": 1.4460716790703958, "learning_rate": 3.275172055237382e-06, "loss": 0.0261, "step": 89385 }, { "epoch": 0.3729836185961896, "grad_norm": 0.785222955687483, "learning_rate": 3.2750804551004544e-06, "loss": 0.0299, "step": 89390 }, { "epoch": 0.3730044813111799, "grad_norm": 0.6794190077994567, "learning_rate": 3.2749888626487244e-06, "loss": 0.024, "step": 89395 }, { "epoch": 0.3730253440261702, "grad_norm": 0.5489928219306115, "learning_rate": 3.2748972778811188e-06, "loss": 0.0226, "step": 89400 }, { "epoch": 0.37304620674116046, "grad_norm": 0.6365434650303712, "learning_rate": 3.2748057007965632e-06, "loss": 0.0268, "step": 89405 }, { "epoch": 0.37306706945615076, "grad_norm": 0.4726806858215567, "learning_rate": 3.2747141313939816e-06, "loss": 0.0285, "step": 89410 }, { "epoch": 0.373087932171141, "grad_norm": 0.6105284250352201, "learning_rate": 3.2746225696723027e-06, "loss": 0.0195, "step": 89415 }, { "epoch": 0.3731087948861313, "grad_norm": 0.6182888074406248, "learning_rate": 3.274531015630451e-06, "loss": 0.0218, "step": 89420 }, { "epoch": 0.3731296576011216, "grad_norm": 1.6451755916466253, "learning_rate": 3.2744394692673535e-06, "loss": 0.0298, "step": 89425 }, { "epoch": 0.37315052031611184, "grad_norm": 1.0087390079832388, "learning_rate": 3.274347930581937e-06, "loss": 0.0243, "step": 89430 }, { "epoch": 0.37317138303110214, "grad_norm": 1.2703087882758533, "learning_rate": 3.274256399573128e-06, "loss": 0.0375, "step": 89435 }, { "epoch": 0.3731922457460924, "grad_norm": 0.7867866754992208, "learning_rate": 3.2741648762398543e-06, "loss": 0.0351, "step": 89440 }, { "epoch": 0.3732131084610827, "grad_norm": 1.0209792282869312, "learning_rate": 3.274073360581042e-06, "loss": 0.0258, "step": 89445 }, { "epoch": 0.373233971176073, "grad_norm": 0.6704709192854225, "learning_rate": 3.27398185259562e-06, "loss": 0.0253, "step": 89450 }, { "epoch": 0.37325483389106323, "grad_norm": 0.3348070498332485, "learning_rate": 3.273890352282515e-06, "loss": 0.0191, "step": 89455 }, { "epoch": 0.37327569660605353, "grad_norm": 0.6160367456155434, "learning_rate": 3.2737988596406556e-06, "loss": 0.0382, "step": 89460 }, { "epoch": 0.37329655932104383, "grad_norm": 1.2181791323749391, "learning_rate": 3.2737073746689696e-06, "loss": 0.0225, "step": 89465 }, { "epoch": 0.3733174220360341, "grad_norm": 0.6200062894277443, "learning_rate": 3.2736158973663856e-06, "loss": 0.0316, "step": 89470 }, { "epoch": 0.3733382847510244, "grad_norm": 0.704836338814572, "learning_rate": 3.2735244277318325e-06, "loss": 0.0281, "step": 89475 }, { "epoch": 0.3733591474660146, "grad_norm": 0.6922057392464321, "learning_rate": 3.273432965764237e-06, "loss": 0.0311, "step": 89480 }, { "epoch": 0.3733800101810049, "grad_norm": 0.645596334904675, "learning_rate": 3.2733415114625312e-06, "loss": 0.0272, "step": 89485 }, { "epoch": 0.3734008728959952, "grad_norm": 0.7751557357748069, "learning_rate": 3.273250064825642e-06, "loss": 0.0317, "step": 89490 }, { "epoch": 0.37342173561098546, "grad_norm": 0.4630751285028632, "learning_rate": 3.2731586258525e-06, "loss": 0.0251, "step": 89495 }, { "epoch": 0.37344259832597576, "grad_norm": 0.900611087425118, "learning_rate": 3.273067194542035e-06, "loss": 0.0223, "step": 89500 }, { "epoch": 0.373463461040966, "grad_norm": 1.057508127679336, "learning_rate": 3.272975770893175e-06, "loss": 0.0236, "step": 89505 }, { "epoch": 0.3734843237559563, "grad_norm": 1.1297972995380259, "learning_rate": 3.272884354904852e-06, "loss": 0.0263, "step": 89510 }, { "epoch": 0.3735051864709466, "grad_norm": 1.2619742552944795, "learning_rate": 3.2727929465759954e-06, "loss": 0.0327, "step": 89515 }, { "epoch": 0.37352604918593685, "grad_norm": 0.634119716267926, "learning_rate": 3.2727015459055354e-06, "loss": 0.0278, "step": 89520 }, { "epoch": 0.37354691190092715, "grad_norm": 1.4385550565056018, "learning_rate": 3.2726101528924036e-06, "loss": 0.0357, "step": 89525 }, { "epoch": 0.3735677746159174, "grad_norm": 0.9322467449097563, "learning_rate": 3.2725187675355302e-06, "loss": 0.0348, "step": 89530 }, { "epoch": 0.3735886373309077, "grad_norm": 0.5493917759089617, "learning_rate": 3.2724273898338467e-06, "loss": 0.0383, "step": 89535 }, { "epoch": 0.373609500045898, "grad_norm": 0.5503371537788252, "learning_rate": 3.2723360197862836e-06, "loss": 0.0287, "step": 89540 }, { "epoch": 0.37363036276088823, "grad_norm": 0.7006726353469444, "learning_rate": 3.2722446573917725e-06, "loss": 0.038, "step": 89545 }, { "epoch": 0.37365122547587853, "grad_norm": 0.5836455035612892, "learning_rate": 3.2721533026492465e-06, "loss": 0.0268, "step": 89550 }, { "epoch": 0.37367208819086883, "grad_norm": 0.7634895198041474, "learning_rate": 3.2720619555576362e-06, "loss": 0.031, "step": 89555 }, { "epoch": 0.3736929509058591, "grad_norm": 0.868499667041372, "learning_rate": 3.2719706161158736e-06, "loss": 0.0206, "step": 89560 }, { "epoch": 0.3737138136208494, "grad_norm": 0.624845977319026, "learning_rate": 3.271879284322892e-06, "loss": 0.0291, "step": 89565 }, { "epoch": 0.3737346763358396, "grad_norm": 0.549973480214795, "learning_rate": 3.271787960177623e-06, "loss": 0.0217, "step": 89570 }, { "epoch": 0.3737555390508299, "grad_norm": 1.318385215566505, "learning_rate": 3.271696643679e-06, "loss": 0.0272, "step": 89575 }, { "epoch": 0.3737764017658202, "grad_norm": 0.7603256128669975, "learning_rate": 3.2716053348259556e-06, "loss": 0.0345, "step": 89580 }, { "epoch": 0.37379726448081046, "grad_norm": 0.8365422253158159, "learning_rate": 3.2715140336174234e-06, "loss": 0.0252, "step": 89585 }, { "epoch": 0.37381812719580076, "grad_norm": 1.2620525154078182, "learning_rate": 3.271422740052336e-06, "loss": 0.0351, "step": 89590 }, { "epoch": 0.373838989910791, "grad_norm": 0.6824178406034458, "learning_rate": 3.271331454129628e-06, "loss": 0.0221, "step": 89595 }, { "epoch": 0.3738598526257813, "grad_norm": 1.1673980485119522, "learning_rate": 3.271240175848232e-06, "loss": 0.0419, "step": 89600 }, { "epoch": 0.3738807153407716, "grad_norm": 0.880631534432141, "learning_rate": 3.271148905207083e-06, "loss": 0.0214, "step": 89605 }, { "epoch": 0.37390157805576185, "grad_norm": 1.1813364889389126, "learning_rate": 3.2710576422051147e-06, "loss": 0.0429, "step": 89610 }, { "epoch": 0.37392244077075215, "grad_norm": 0.5511854902989464, "learning_rate": 3.270966386841262e-06, "loss": 0.0277, "step": 89615 }, { "epoch": 0.3739433034857424, "grad_norm": 0.9179035865779674, "learning_rate": 3.2708751391144596e-06, "loss": 0.0289, "step": 89620 }, { "epoch": 0.3739641662007327, "grad_norm": 0.8982587155048907, "learning_rate": 3.2707838990236414e-06, "loss": 0.027, "step": 89625 }, { "epoch": 0.373985028915723, "grad_norm": 0.6963278411894309, "learning_rate": 3.270692666567743e-06, "loss": 0.0246, "step": 89630 }, { "epoch": 0.37400589163071324, "grad_norm": 0.6739779602104845, "learning_rate": 3.2706014417456995e-06, "loss": 0.0322, "step": 89635 }, { "epoch": 0.37402675434570354, "grad_norm": 0.704998405846967, "learning_rate": 3.270510224556446e-06, "loss": 0.0267, "step": 89640 }, { "epoch": 0.37404761706069384, "grad_norm": 0.8989585819149317, "learning_rate": 3.2704190149989197e-06, "loss": 0.0369, "step": 89645 }, { "epoch": 0.3740684797756841, "grad_norm": 0.6713799216851589, "learning_rate": 3.2703278130720552e-06, "loss": 0.0254, "step": 89650 }, { "epoch": 0.3740893424906744, "grad_norm": 0.5315900689331812, "learning_rate": 3.2702366187747888e-06, "loss": 0.0239, "step": 89655 }, { "epoch": 0.3741102052056646, "grad_norm": 1.007277946558951, "learning_rate": 3.270145432106056e-06, "loss": 0.0347, "step": 89660 }, { "epoch": 0.3741310679206549, "grad_norm": 0.8296159235516558, "learning_rate": 3.270054253064795e-06, "loss": 0.0212, "step": 89665 }, { "epoch": 0.3741519306356452, "grad_norm": 1.0237310789576641, "learning_rate": 3.2699630816499413e-06, "loss": 0.0347, "step": 89670 }, { "epoch": 0.37417279335063547, "grad_norm": 0.644384855648302, "learning_rate": 3.269871917860432e-06, "loss": 0.0247, "step": 89675 }, { "epoch": 0.37419365606562577, "grad_norm": 0.7856850131383535, "learning_rate": 3.2697807616952044e-06, "loss": 0.0263, "step": 89680 }, { "epoch": 0.374214518780616, "grad_norm": 0.8721561383672474, "learning_rate": 3.2696896131531958e-06, "loss": 0.0255, "step": 89685 }, { "epoch": 0.3742353814956063, "grad_norm": 0.8188828690850225, "learning_rate": 3.269598472233344e-06, "loss": 0.0287, "step": 89690 }, { "epoch": 0.3742562442105966, "grad_norm": 0.768196056149341, "learning_rate": 3.2695073389345856e-06, "loss": 0.0279, "step": 89695 }, { "epoch": 0.37427710692558686, "grad_norm": 0.929686001218559, "learning_rate": 3.26941621325586e-06, "loss": 0.0284, "step": 89700 }, { "epoch": 0.37429796964057716, "grad_norm": 0.6412580923927093, "learning_rate": 3.269325095196105e-06, "loss": 0.0263, "step": 89705 }, { "epoch": 0.3743188323555674, "grad_norm": 0.8428968927054213, "learning_rate": 3.2692339847542574e-06, "loss": 0.0267, "step": 89710 }, { "epoch": 0.3743396950705577, "grad_norm": 0.9163539009576672, "learning_rate": 3.269142881929259e-06, "loss": 0.0255, "step": 89715 }, { "epoch": 0.374360557785548, "grad_norm": 0.551427171332662, "learning_rate": 3.269051786720045e-06, "loss": 0.0277, "step": 89720 }, { "epoch": 0.37438142050053824, "grad_norm": 0.4011322972641819, "learning_rate": 3.2689606991255563e-06, "loss": 0.023, "step": 89725 }, { "epoch": 0.37440228321552854, "grad_norm": 0.5276380890265703, "learning_rate": 3.268869619144732e-06, "loss": 0.0262, "step": 89730 }, { "epoch": 0.37442314593051884, "grad_norm": 0.5903429124757213, "learning_rate": 3.2687785467765114e-06, "loss": 0.0259, "step": 89735 }, { "epoch": 0.3744440086455091, "grad_norm": 0.7662223356473493, "learning_rate": 3.2686874820198344e-06, "loss": 0.0276, "step": 89740 }, { "epoch": 0.3744648713604994, "grad_norm": 1.2387277047575214, "learning_rate": 3.2685964248736396e-06, "loss": 0.0348, "step": 89745 }, { "epoch": 0.37448573407548963, "grad_norm": 0.7125373242351682, "learning_rate": 3.2685053753368682e-06, "loss": 0.026, "step": 89750 }, { "epoch": 0.37450659679047993, "grad_norm": 0.46580277284626975, "learning_rate": 3.26841433340846e-06, "loss": 0.0193, "step": 89755 }, { "epoch": 0.37452745950547023, "grad_norm": 0.5521064854926628, "learning_rate": 3.268323299087356e-06, "loss": 0.027, "step": 89760 }, { "epoch": 0.3745483222204605, "grad_norm": 0.6786003370933044, "learning_rate": 3.2682322723724964e-06, "loss": 0.0233, "step": 89765 }, { "epoch": 0.37456918493545077, "grad_norm": 1.5021597624546987, "learning_rate": 3.268141253262821e-06, "loss": 0.0349, "step": 89770 }, { "epoch": 0.374590047650441, "grad_norm": 0.9453916245029276, "learning_rate": 3.2680502417572724e-06, "loss": 0.0273, "step": 89775 }, { "epoch": 0.3746109103654313, "grad_norm": 1.152331762757582, "learning_rate": 3.2679592378547915e-06, "loss": 0.0342, "step": 89780 }, { "epoch": 0.3746317730804216, "grad_norm": 0.8165499198841734, "learning_rate": 3.267868241554319e-06, "loss": 0.0295, "step": 89785 }, { "epoch": 0.37465263579541186, "grad_norm": 1.003541358354018, "learning_rate": 3.267777252854797e-06, "loss": 0.0268, "step": 89790 }, { "epoch": 0.37467349851040216, "grad_norm": 0.7661208098138711, "learning_rate": 3.2676862717551683e-06, "loss": 0.0226, "step": 89795 }, { "epoch": 0.3746943612253924, "grad_norm": 0.4077094655512808, "learning_rate": 3.267595298254374e-06, "loss": 0.0273, "step": 89800 }, { "epoch": 0.3747152239403827, "grad_norm": 0.6102774343440507, "learning_rate": 3.267504332351356e-06, "loss": 0.026, "step": 89805 }, { "epoch": 0.374736086655373, "grad_norm": 0.6464644413374837, "learning_rate": 3.267413374045058e-06, "loss": 0.025, "step": 89810 }, { "epoch": 0.37475694937036325, "grad_norm": 0.7375831867286823, "learning_rate": 3.2673224233344215e-06, "loss": 0.0289, "step": 89815 }, { "epoch": 0.37477781208535355, "grad_norm": 0.6132188427480304, "learning_rate": 3.26723148021839e-06, "loss": 0.0225, "step": 89820 }, { "epoch": 0.37479867480034385, "grad_norm": 1.2420136056725555, "learning_rate": 3.2671405446959076e-06, "loss": 0.0296, "step": 89825 }, { "epoch": 0.3748195375153341, "grad_norm": 0.9703997701094617, "learning_rate": 3.2670496167659154e-06, "loss": 0.0313, "step": 89830 }, { "epoch": 0.3748404002303244, "grad_norm": 0.8588317135916782, "learning_rate": 3.2669586964273585e-06, "loss": 0.0247, "step": 89835 }, { "epoch": 0.37486126294531463, "grad_norm": 0.4928384059412262, "learning_rate": 3.26686778367918e-06, "loss": 0.0272, "step": 89840 }, { "epoch": 0.37488212566030493, "grad_norm": 0.6048676106274788, "learning_rate": 3.2667768785203247e-06, "loss": 0.0252, "step": 89845 }, { "epoch": 0.37490298837529523, "grad_norm": 0.3949519394169445, "learning_rate": 3.266685980949736e-06, "loss": 0.0159, "step": 89850 }, { "epoch": 0.3749238510902855, "grad_norm": 0.6436036901119879, "learning_rate": 3.266595090966358e-06, "loss": 0.0234, "step": 89855 }, { "epoch": 0.3749447138052758, "grad_norm": 0.9659751309550032, "learning_rate": 3.266504208569136e-06, "loss": 0.0257, "step": 89860 }, { "epoch": 0.374965576520266, "grad_norm": 0.8120536830014203, "learning_rate": 3.266413333757014e-06, "loss": 0.0282, "step": 89865 }, { "epoch": 0.3749864392352563, "grad_norm": 1.3077127902802128, "learning_rate": 3.2663224665289373e-06, "loss": 0.0409, "step": 89870 }, { "epoch": 0.3750073019502466, "grad_norm": 0.6604862757962771, "learning_rate": 3.266231606883851e-06, "loss": 0.028, "step": 89875 }, { "epoch": 0.37502816466523686, "grad_norm": 0.727583697392428, "learning_rate": 3.2661407548207014e-06, "loss": 0.0255, "step": 89880 }, { "epoch": 0.37504902738022716, "grad_norm": 0.8350717054904555, "learning_rate": 3.2660499103384326e-06, "loss": 0.039, "step": 89885 }, { "epoch": 0.3750698900952174, "grad_norm": 0.5363758609355322, "learning_rate": 3.2659590734359914e-06, "loss": 0.0259, "step": 89890 }, { "epoch": 0.3750907528102077, "grad_norm": 0.8387392757302385, "learning_rate": 3.2658682441123237e-06, "loss": 0.0267, "step": 89895 }, { "epoch": 0.375111615525198, "grad_norm": 0.4439683178886889, "learning_rate": 3.265777422366374e-06, "loss": 0.0308, "step": 89900 }, { "epoch": 0.37513247824018825, "grad_norm": 0.791174820643112, "learning_rate": 3.265686608197092e-06, "loss": 0.0235, "step": 89905 }, { "epoch": 0.37515334095517855, "grad_norm": 0.6839433206140887, "learning_rate": 3.2655958016034212e-06, "loss": 0.0242, "step": 89910 }, { "epoch": 0.37517420367016885, "grad_norm": 0.4387053567965585, "learning_rate": 3.26550500258431e-06, "loss": 0.0308, "step": 89915 }, { "epoch": 0.3751950663851591, "grad_norm": 0.47351352109893213, "learning_rate": 3.265414211138705e-06, "loss": 0.0302, "step": 89920 }, { "epoch": 0.3752159291001494, "grad_norm": 0.7384733645842378, "learning_rate": 3.2653234272655536e-06, "loss": 0.023, "step": 89925 }, { "epoch": 0.37523679181513964, "grad_norm": 0.6780227374379013, "learning_rate": 3.2652326509638025e-06, "loss": 0.022, "step": 89930 }, { "epoch": 0.37525765453012994, "grad_norm": 0.9050779193648474, "learning_rate": 3.265141882232401e-06, "loss": 0.0244, "step": 89935 }, { "epoch": 0.37527851724512024, "grad_norm": 0.7110925759449004, "learning_rate": 3.2650511210702956e-06, "loss": 0.0318, "step": 89940 }, { "epoch": 0.3752993799601105, "grad_norm": 0.9539270061169691, "learning_rate": 3.2649603674764344e-06, "loss": 0.0318, "step": 89945 }, { "epoch": 0.3753202426751008, "grad_norm": 0.6387359825883385, "learning_rate": 3.264869621449766e-06, "loss": 0.0277, "step": 89950 }, { "epoch": 0.375341105390091, "grad_norm": 0.43229376871146935, "learning_rate": 3.2647788829892387e-06, "loss": 0.0248, "step": 89955 }, { "epoch": 0.3753619681050813, "grad_norm": 1.1064721833996716, "learning_rate": 3.2646881520938013e-06, "loss": 0.0287, "step": 89960 }, { "epoch": 0.3753828308200716, "grad_norm": 0.7485074424061133, "learning_rate": 3.264597428762402e-06, "loss": 0.0389, "step": 89965 }, { "epoch": 0.37540369353506187, "grad_norm": 0.6595533163509024, "learning_rate": 3.264506712993991e-06, "loss": 0.0269, "step": 89970 }, { "epoch": 0.37542455625005217, "grad_norm": 0.6647478406665763, "learning_rate": 3.2644160047875167e-06, "loss": 0.0356, "step": 89975 }, { "epoch": 0.3754454189650424, "grad_norm": 3.0091367048772373, "learning_rate": 3.2643253041419285e-06, "loss": 0.0329, "step": 89980 }, { "epoch": 0.3754662816800327, "grad_norm": 0.4749692532542241, "learning_rate": 3.2642346110561772e-06, "loss": 0.021, "step": 89985 }, { "epoch": 0.375487144395023, "grad_norm": 0.5342864641351843, "learning_rate": 3.264143925529211e-06, "loss": 0.0172, "step": 89990 }, { "epoch": 0.37550800711001325, "grad_norm": 1.136750529853334, "learning_rate": 3.2640532475599812e-06, "loss": 0.03, "step": 89995 }, { "epoch": 0.37552886982500355, "grad_norm": 0.491496154102024, "learning_rate": 3.263962577147438e-06, "loss": 0.027, "step": 90000 }, { "epoch": 0.3755497325399938, "grad_norm": 0.9270915690684327, "learning_rate": 3.2638719142905316e-06, "loss": 0.023, "step": 90005 }, { "epoch": 0.3755705952549841, "grad_norm": 0.8801274855091962, "learning_rate": 3.263781258988213e-06, "loss": 0.0267, "step": 90010 }, { "epoch": 0.3755914579699744, "grad_norm": 0.6439824915715401, "learning_rate": 3.263690611239432e-06, "loss": 0.0214, "step": 90015 }, { "epoch": 0.37561232068496464, "grad_norm": 0.6806994288478541, "learning_rate": 3.263599971043141e-06, "loss": 0.0253, "step": 90020 }, { "epoch": 0.37563318339995494, "grad_norm": 0.6364558033185963, "learning_rate": 3.2635093383982914e-06, "loss": 0.022, "step": 90025 }, { "epoch": 0.37565404611494524, "grad_norm": 0.5071518419762894, "learning_rate": 3.2634187133038337e-06, "loss": 0.024, "step": 90030 }, { "epoch": 0.3756749088299355, "grad_norm": 0.5197492006865965, "learning_rate": 3.2633280957587197e-06, "loss": 0.0301, "step": 90035 }, { "epoch": 0.3756957715449258, "grad_norm": 0.9794094391589917, "learning_rate": 3.263237485761902e-06, "loss": 0.0297, "step": 90040 }, { "epoch": 0.375716634259916, "grad_norm": 0.8358471717516103, "learning_rate": 3.2631468833123325e-06, "loss": 0.0261, "step": 90045 }, { "epoch": 0.3757374969749063, "grad_norm": 0.7477937179159102, "learning_rate": 3.2630562884089636e-06, "loss": 0.0202, "step": 90050 }, { "epoch": 0.3757583596898966, "grad_norm": 0.889937299686873, "learning_rate": 3.2629657010507478e-06, "loss": 0.033, "step": 90055 }, { "epoch": 0.37577922240488687, "grad_norm": 1.2612166831778324, "learning_rate": 3.2628751212366373e-06, "loss": 0.0432, "step": 90060 }, { "epoch": 0.37580008511987717, "grad_norm": 0.6867503316099274, "learning_rate": 3.2627845489655856e-06, "loss": 0.0308, "step": 90065 }, { "epoch": 0.3758209478348674, "grad_norm": 0.395982357312323, "learning_rate": 3.2626939842365453e-06, "loss": 0.0274, "step": 90070 }, { "epoch": 0.3758418105498577, "grad_norm": 0.8265133674881101, "learning_rate": 3.2626034270484703e-06, "loss": 0.0296, "step": 90075 }, { "epoch": 0.375862673264848, "grad_norm": 0.5293336997193291, "learning_rate": 3.262512877400314e-06, "loss": 0.0214, "step": 90080 }, { "epoch": 0.37588353597983826, "grad_norm": 0.3939486356493365, "learning_rate": 3.2624223352910305e-06, "loss": 0.0273, "step": 90085 }, { "epoch": 0.37590439869482856, "grad_norm": 2.1047072897175836, "learning_rate": 3.262331800719574e-06, "loss": 0.0182, "step": 90090 }, { "epoch": 0.3759252614098188, "grad_norm": 0.9537851656992853, "learning_rate": 3.262241273684897e-06, "loss": 0.0231, "step": 90095 }, { "epoch": 0.3759461241248091, "grad_norm": 0.7060962357255811, "learning_rate": 3.2621507541859552e-06, "loss": 0.0307, "step": 90100 }, { "epoch": 0.3759669868397994, "grad_norm": 0.5521998937540552, "learning_rate": 3.262060242221703e-06, "loss": 0.0237, "step": 90105 }, { "epoch": 0.37598784955478964, "grad_norm": 1.3958029591028687, "learning_rate": 3.2619697377910946e-06, "loss": 0.031, "step": 90110 }, { "epoch": 0.37600871226977994, "grad_norm": 0.6324565101449954, "learning_rate": 3.261879240893086e-06, "loss": 0.023, "step": 90115 }, { "epoch": 0.37602957498477024, "grad_norm": 0.8919873114133523, "learning_rate": 3.2617887515266315e-06, "loss": 0.0251, "step": 90120 }, { "epoch": 0.3760504376997605, "grad_norm": 0.7843323825720125, "learning_rate": 3.2616982696906868e-06, "loss": 0.0271, "step": 90125 }, { "epoch": 0.3760713004147508, "grad_norm": 1.1659537758109317, "learning_rate": 3.2616077953842068e-06, "loss": 0.0386, "step": 90130 }, { "epoch": 0.37609216312974103, "grad_norm": 1.0304311463610234, "learning_rate": 3.2615173286061493e-06, "loss": 0.0295, "step": 90135 }, { "epoch": 0.37611302584473133, "grad_norm": 1.0915916006716295, "learning_rate": 3.2614268693554677e-06, "loss": 0.0316, "step": 90140 }, { "epoch": 0.37613388855972163, "grad_norm": 0.6090457787760956, "learning_rate": 3.2613364176311203e-06, "loss": 0.0248, "step": 90145 }, { "epoch": 0.3761547512747119, "grad_norm": 1.022700423662465, "learning_rate": 3.261245973432062e-06, "loss": 0.0309, "step": 90150 }, { "epoch": 0.3761756139897022, "grad_norm": 0.5873212693601912, "learning_rate": 3.26115553675725e-06, "loss": 0.0264, "step": 90155 }, { "epoch": 0.3761964767046924, "grad_norm": 0.7574975287148, "learning_rate": 3.261065107605641e-06, "loss": 0.0216, "step": 90160 }, { "epoch": 0.3762173394196827, "grad_norm": 1.1994649460432467, "learning_rate": 3.260974685976192e-06, "loss": 0.0287, "step": 90165 }, { "epoch": 0.376238202134673, "grad_norm": 0.8853800212638642, "learning_rate": 3.260884271867861e-06, "loss": 0.0339, "step": 90170 }, { "epoch": 0.37625906484966326, "grad_norm": 1.2372137133219239, "learning_rate": 3.260793865279604e-06, "loss": 0.0251, "step": 90175 }, { "epoch": 0.37627992756465356, "grad_norm": 0.929810062831008, "learning_rate": 3.2607034662103793e-06, "loss": 0.0231, "step": 90180 }, { "epoch": 0.3763007902796438, "grad_norm": 0.6140597721373773, "learning_rate": 3.2606130746591453e-06, "loss": 0.0203, "step": 90185 }, { "epoch": 0.3763216529946341, "grad_norm": 0.7433676875951387, "learning_rate": 3.2605226906248586e-06, "loss": 0.0293, "step": 90190 }, { "epoch": 0.3763425157096244, "grad_norm": 0.9618793358710035, "learning_rate": 3.2604323141064787e-06, "loss": 0.0324, "step": 90195 }, { "epoch": 0.37636337842461465, "grad_norm": 0.9235010829959637, "learning_rate": 3.2603419451029627e-06, "loss": 0.043, "step": 90200 }, { "epoch": 0.37638424113960495, "grad_norm": 0.7348435066270074, "learning_rate": 3.2602515836132715e-06, "loss": 0.0248, "step": 90205 }, { "epoch": 0.37640510385459525, "grad_norm": 0.9521583313321381, "learning_rate": 3.260161229636361e-06, "loss": 0.0279, "step": 90210 }, { "epoch": 0.3764259665695855, "grad_norm": 0.9476528189782596, "learning_rate": 3.260070883171192e-06, "loss": 0.0287, "step": 90215 }, { "epoch": 0.3764468292845758, "grad_norm": 1.5265468252710106, "learning_rate": 3.2599805442167242e-06, "loss": 0.0293, "step": 90220 }, { "epoch": 0.37646769199956603, "grad_norm": 0.719334946913138, "learning_rate": 3.259890212771915e-06, "loss": 0.0341, "step": 90225 }, { "epoch": 0.37648855471455633, "grad_norm": 0.6275687519636972, "learning_rate": 3.2597998888357265e-06, "loss": 0.021, "step": 90230 }, { "epoch": 0.37650941742954663, "grad_norm": 0.8513709538404027, "learning_rate": 3.259709572407116e-06, "loss": 0.0266, "step": 90235 }, { "epoch": 0.3765302801445369, "grad_norm": 0.6693440870805347, "learning_rate": 3.2596192634850456e-06, "loss": 0.0304, "step": 90240 }, { "epoch": 0.3765511428595272, "grad_norm": 0.9284298099607301, "learning_rate": 3.2595289620684743e-06, "loss": 0.0254, "step": 90245 }, { "epoch": 0.3765720055745174, "grad_norm": 0.42578494809434836, "learning_rate": 3.2594386681563633e-06, "loss": 0.0297, "step": 90250 }, { "epoch": 0.3765928682895077, "grad_norm": 0.7946897156989773, "learning_rate": 3.259348381747673e-06, "loss": 0.0211, "step": 90255 }, { "epoch": 0.376613731004498, "grad_norm": 0.5544320764337815, "learning_rate": 3.2592581028413635e-06, "loss": 0.0252, "step": 90260 }, { "epoch": 0.37663459371948826, "grad_norm": 0.5492793010277449, "learning_rate": 3.259167831436397e-06, "loss": 0.0331, "step": 90265 }, { "epoch": 0.37665545643447856, "grad_norm": 0.4780102276798295, "learning_rate": 3.2590775675317337e-06, "loss": 0.0203, "step": 90270 }, { "epoch": 0.3766763191494688, "grad_norm": 0.8414281330752709, "learning_rate": 3.2589873111263353e-06, "loss": 0.0208, "step": 90275 }, { "epoch": 0.3766971818644591, "grad_norm": 0.8787108952748628, "learning_rate": 3.258897062219164e-06, "loss": 0.03, "step": 90280 }, { "epoch": 0.3767180445794494, "grad_norm": 1.0784440266227135, "learning_rate": 3.258806820809182e-06, "loss": 0.0257, "step": 90285 }, { "epoch": 0.37673890729443965, "grad_norm": 0.95367319672114, "learning_rate": 3.2587165868953495e-06, "loss": 0.0243, "step": 90290 }, { "epoch": 0.37675977000942995, "grad_norm": 0.8895530866141487, "learning_rate": 3.25862636047663e-06, "loss": 0.0235, "step": 90295 }, { "epoch": 0.37678063272442025, "grad_norm": 0.7112886864645696, "learning_rate": 3.2585361415519867e-06, "loss": 0.0321, "step": 90300 }, { "epoch": 0.3768014954394105, "grad_norm": 1.1246282640759786, "learning_rate": 3.2584459301203807e-06, "loss": 0.0266, "step": 90305 }, { "epoch": 0.3768223581544008, "grad_norm": 0.7053077402234658, "learning_rate": 3.2583557261807756e-06, "loss": 0.0321, "step": 90310 }, { "epoch": 0.37684322086939104, "grad_norm": 0.4939519723171452, "learning_rate": 3.2582655297321346e-06, "loss": 0.0254, "step": 90315 }, { "epoch": 0.37686408358438134, "grad_norm": 0.5908970894963166, "learning_rate": 3.2581753407734206e-06, "loss": 0.0382, "step": 90320 }, { "epoch": 0.37688494629937164, "grad_norm": 0.6155685650265087, "learning_rate": 3.2580851593035976e-06, "loss": 0.0287, "step": 90325 }, { "epoch": 0.3769058090143619, "grad_norm": 1.3237174678123609, "learning_rate": 3.257994985321628e-06, "loss": 0.0347, "step": 90330 }, { "epoch": 0.3769266717293522, "grad_norm": 0.8220758070195393, "learning_rate": 3.2579048188264772e-06, "loss": 0.0264, "step": 90335 }, { "epoch": 0.3769475344443424, "grad_norm": 0.8660089065595304, "learning_rate": 3.257814659817108e-06, "loss": 0.0314, "step": 90340 }, { "epoch": 0.3769683971593327, "grad_norm": 0.6692533924940804, "learning_rate": 3.2577245082924853e-06, "loss": 0.0234, "step": 90345 }, { "epoch": 0.376989259874323, "grad_norm": 0.6916847499684654, "learning_rate": 3.2576343642515735e-06, "loss": 0.0246, "step": 90350 }, { "epoch": 0.37701012258931327, "grad_norm": 0.7476025935782393, "learning_rate": 3.2575442276933373e-06, "loss": 0.0285, "step": 90355 }, { "epoch": 0.37703098530430357, "grad_norm": 1.1841981522790763, "learning_rate": 3.2574540986167407e-06, "loss": 0.0385, "step": 90360 }, { "epoch": 0.3770518480192938, "grad_norm": 1.0306714255663985, "learning_rate": 3.2573639770207506e-06, "loss": 0.0239, "step": 90365 }, { "epoch": 0.3770727107342841, "grad_norm": 0.8882534837257383, "learning_rate": 3.257273862904331e-06, "loss": 0.0277, "step": 90370 }, { "epoch": 0.3770935734492744, "grad_norm": 0.4983283709021811, "learning_rate": 3.2571837562664464e-06, "loss": 0.0288, "step": 90375 }, { "epoch": 0.37711443616426465, "grad_norm": 0.27988169401729773, "learning_rate": 3.2570936571060645e-06, "loss": 0.0203, "step": 90380 }, { "epoch": 0.37713529887925495, "grad_norm": 0.3290501471823556, "learning_rate": 3.25700356542215e-06, "loss": 0.0267, "step": 90385 }, { "epoch": 0.37715616159424525, "grad_norm": 0.8238935124082639, "learning_rate": 3.2569134812136693e-06, "loss": 0.0235, "step": 90390 }, { "epoch": 0.3771770243092355, "grad_norm": 1.626638504106887, "learning_rate": 3.256823404479589e-06, "loss": 0.0251, "step": 90395 }, { "epoch": 0.3771978870242258, "grad_norm": 0.8744591510084754, "learning_rate": 3.256733335218874e-06, "loss": 0.0279, "step": 90400 }, { "epoch": 0.37721874973921604, "grad_norm": 0.7911079039721697, "learning_rate": 3.256643273430492e-06, "loss": 0.0258, "step": 90405 }, { "epoch": 0.37723961245420634, "grad_norm": 0.6373770218421597, "learning_rate": 3.2565532191134106e-06, "loss": 0.0262, "step": 90410 }, { "epoch": 0.37726047516919664, "grad_norm": 0.6365691410482037, "learning_rate": 3.2564631722665963e-06, "loss": 0.0277, "step": 90415 }, { "epoch": 0.3772813378841869, "grad_norm": 1.0117855950163759, "learning_rate": 3.256373132889016e-06, "loss": 0.0276, "step": 90420 }, { "epoch": 0.3773022005991772, "grad_norm": 0.6685379657640017, "learning_rate": 3.2562831009796374e-06, "loss": 0.0227, "step": 90425 }, { "epoch": 0.37732306331416743, "grad_norm": 0.7943989778050639, "learning_rate": 3.2561930765374277e-06, "loss": 0.0262, "step": 90430 }, { "epoch": 0.37734392602915773, "grad_norm": 0.5503299740303651, "learning_rate": 3.2561030595613557e-06, "loss": 0.0194, "step": 90435 }, { "epoch": 0.377364788744148, "grad_norm": 0.7431957407149717, "learning_rate": 3.2560130500503882e-06, "loss": 0.0317, "step": 90440 }, { "epoch": 0.37738565145913827, "grad_norm": 0.6507716481554323, "learning_rate": 3.2559230480034946e-06, "loss": 0.0281, "step": 90445 }, { "epoch": 0.37740651417412857, "grad_norm": 0.9526664072681208, "learning_rate": 3.2558330534196436e-06, "loss": 0.0295, "step": 90450 }, { "epoch": 0.3774273768891188, "grad_norm": 1.0970184468389774, "learning_rate": 3.2557430662978026e-06, "loss": 0.0197, "step": 90455 }, { "epoch": 0.3774482396041091, "grad_norm": 0.8416345829468761, "learning_rate": 3.2556530866369407e-06, "loss": 0.0364, "step": 90460 }, { "epoch": 0.3774691023190994, "grad_norm": 0.9378766219897291, "learning_rate": 3.255563114436027e-06, "loss": 0.027, "step": 90465 }, { "epoch": 0.37748996503408966, "grad_norm": 0.7596543377106122, "learning_rate": 3.2554731496940324e-06, "loss": 0.0295, "step": 90470 }, { "epoch": 0.37751082774907996, "grad_norm": 0.9758046282823803, "learning_rate": 3.2553831924099244e-06, "loss": 0.0297, "step": 90475 }, { "epoch": 0.37753169046407026, "grad_norm": 1.0818485009775403, "learning_rate": 3.2552932425826734e-06, "loss": 0.028, "step": 90480 }, { "epoch": 0.3775525531790605, "grad_norm": 0.7808173301367566, "learning_rate": 3.255203300211248e-06, "loss": 0.0255, "step": 90485 }, { "epoch": 0.3775734158940508, "grad_norm": 0.5276773086758519, "learning_rate": 3.2551133652946206e-06, "loss": 0.0252, "step": 90490 }, { "epoch": 0.37759427860904105, "grad_norm": 1.3294519815298895, "learning_rate": 3.25502343783176e-06, "loss": 0.026, "step": 90495 }, { "epoch": 0.37761514132403134, "grad_norm": 0.7339754637558121, "learning_rate": 3.2549335178216367e-06, "loss": 0.0302, "step": 90500 }, { "epoch": 0.37763600403902164, "grad_norm": 1.6510227736151613, "learning_rate": 3.2548436052632217e-06, "loss": 0.0304, "step": 90505 }, { "epoch": 0.3776568667540119, "grad_norm": 1.1030717643230121, "learning_rate": 3.254753700155486e-06, "loss": 0.0262, "step": 90510 }, { "epoch": 0.3776777294690022, "grad_norm": 0.6660963115930749, "learning_rate": 3.254663802497399e-06, "loss": 0.0217, "step": 90515 }, { "epoch": 0.37769859218399243, "grad_norm": 0.5520425409865175, "learning_rate": 3.2545739122879344e-06, "loss": 0.0209, "step": 90520 }, { "epoch": 0.37771945489898273, "grad_norm": 0.5596553178141892, "learning_rate": 3.254484029526062e-06, "loss": 0.0247, "step": 90525 }, { "epoch": 0.37774031761397303, "grad_norm": 0.9328561144581605, "learning_rate": 3.254394154210754e-06, "loss": 0.0275, "step": 90530 }, { "epoch": 0.3777611803289633, "grad_norm": 0.47214552123240267, "learning_rate": 3.2543042863409824e-06, "loss": 0.0252, "step": 90535 }, { "epoch": 0.3777820430439536, "grad_norm": 0.8315606537727563, "learning_rate": 3.2542144259157187e-06, "loss": 0.0302, "step": 90540 }, { "epoch": 0.3778029057589438, "grad_norm": 0.5431429636469561, "learning_rate": 3.254124572933936e-06, "loss": 0.0243, "step": 90545 }, { "epoch": 0.3778237684739341, "grad_norm": 0.6148248959605688, "learning_rate": 3.2540347273946055e-06, "loss": 0.0362, "step": 90550 }, { "epoch": 0.3778446311889244, "grad_norm": 0.47766806419287405, "learning_rate": 3.253944889296701e-06, "loss": 0.0182, "step": 90555 }, { "epoch": 0.37786549390391466, "grad_norm": 1.0179209147654111, "learning_rate": 3.253855058639195e-06, "loss": 0.0281, "step": 90560 }, { "epoch": 0.37788635661890496, "grad_norm": 0.9948524020668378, "learning_rate": 3.2537652354210595e-06, "loss": 0.0302, "step": 90565 }, { "epoch": 0.37790721933389526, "grad_norm": 0.45723727090429866, "learning_rate": 3.2536754196412693e-06, "loss": 0.0309, "step": 90570 }, { "epoch": 0.3779280820488855, "grad_norm": 0.5843187178894985, "learning_rate": 3.2535856112987973e-06, "loss": 0.0241, "step": 90575 }, { "epoch": 0.3779489447638758, "grad_norm": 0.4152424013482842, "learning_rate": 3.253495810392617e-06, "loss": 0.0242, "step": 90580 }, { "epoch": 0.37796980747886605, "grad_norm": 0.6696472541373019, "learning_rate": 3.2534060169217013e-06, "loss": 0.021, "step": 90585 }, { "epoch": 0.37799067019385635, "grad_norm": 0.8815689677712043, "learning_rate": 3.2533162308850262e-06, "loss": 0.0281, "step": 90590 }, { "epoch": 0.37801153290884665, "grad_norm": 0.6756067297464013, "learning_rate": 3.253226452281564e-06, "loss": 0.0273, "step": 90595 }, { "epoch": 0.3780323956238369, "grad_norm": 0.3228738670349386, "learning_rate": 3.253136681110291e-06, "loss": 0.0248, "step": 90600 }, { "epoch": 0.3780532583388272, "grad_norm": 0.9616276766001196, "learning_rate": 3.25304691737018e-06, "loss": 0.0319, "step": 90605 }, { "epoch": 0.37807412105381744, "grad_norm": 0.5880864584421478, "learning_rate": 3.2529571610602066e-06, "loss": 0.0231, "step": 90610 }, { "epoch": 0.37809498376880774, "grad_norm": 0.915706735984187, "learning_rate": 3.2528674121793463e-06, "loss": 0.0289, "step": 90615 }, { "epoch": 0.37811584648379803, "grad_norm": 0.5915595052000444, "learning_rate": 3.2527776707265735e-06, "loss": 0.0252, "step": 90620 }, { "epoch": 0.3781367091987883, "grad_norm": 0.9536542331387367, "learning_rate": 3.252687936700864e-06, "loss": 0.0396, "step": 90625 }, { "epoch": 0.3781575719137786, "grad_norm": 2.0549506518611955, "learning_rate": 3.252598210101194e-06, "loss": 0.0363, "step": 90630 }, { "epoch": 0.3781784346287688, "grad_norm": 0.8692436936681431, "learning_rate": 3.2525084909265374e-06, "loss": 0.0274, "step": 90635 }, { "epoch": 0.3781992973437591, "grad_norm": 0.6832357379972702, "learning_rate": 3.252418779175872e-06, "loss": 0.0285, "step": 90640 }, { "epoch": 0.3782201600587494, "grad_norm": 0.6348786314472676, "learning_rate": 3.2523290748481735e-06, "loss": 0.03, "step": 90645 }, { "epoch": 0.37824102277373967, "grad_norm": 0.8056343403857419, "learning_rate": 3.252239377942418e-06, "loss": 0.0207, "step": 90650 }, { "epoch": 0.37826188548872997, "grad_norm": 0.551814583884431, "learning_rate": 3.2521496884575833e-06, "loss": 0.0248, "step": 90655 }, { "epoch": 0.37828274820372026, "grad_norm": 0.8452563916337043, "learning_rate": 3.2520600063926444e-06, "loss": 0.0343, "step": 90660 }, { "epoch": 0.3783036109187105, "grad_norm": 0.4699131695857003, "learning_rate": 3.25197033174658e-06, "loss": 0.0299, "step": 90665 }, { "epoch": 0.3783244736337008, "grad_norm": 0.8571985777695195, "learning_rate": 3.2518806645183654e-06, "loss": 0.0248, "step": 90670 }, { "epoch": 0.37834533634869105, "grad_norm": 0.5265000650909234, "learning_rate": 3.25179100470698e-06, "loss": 0.0288, "step": 90675 }, { "epoch": 0.37836619906368135, "grad_norm": 0.6997326701290074, "learning_rate": 3.2517013523113993e-06, "loss": 0.031, "step": 90680 }, { "epoch": 0.37838706177867165, "grad_norm": 0.6892573374521324, "learning_rate": 3.251611707330603e-06, "loss": 0.0287, "step": 90685 }, { "epoch": 0.3784079244936619, "grad_norm": 1.0082005798740072, "learning_rate": 3.2515220697635685e-06, "loss": 0.0295, "step": 90690 }, { "epoch": 0.3784287872086522, "grad_norm": 0.7199961372586956, "learning_rate": 3.2514324396092733e-06, "loss": 0.0232, "step": 90695 }, { "epoch": 0.37844964992364244, "grad_norm": 1.1607639722764012, "learning_rate": 3.251342816866696e-06, "loss": 0.0309, "step": 90700 }, { "epoch": 0.37847051263863274, "grad_norm": 0.8115701160138751, "learning_rate": 3.2512532015348154e-06, "loss": 0.0316, "step": 90705 }, { "epoch": 0.37849137535362304, "grad_norm": 0.8188782455864178, "learning_rate": 3.2511635936126105e-06, "loss": 0.0225, "step": 90710 }, { "epoch": 0.3785122380686133, "grad_norm": 0.6425536594563712, "learning_rate": 3.25107399309906e-06, "loss": 0.0272, "step": 90715 }, { "epoch": 0.3785331007836036, "grad_norm": 0.8805930056265705, "learning_rate": 3.250984399993143e-06, "loss": 0.0239, "step": 90720 }, { "epoch": 0.3785539634985938, "grad_norm": 0.9356539480786168, "learning_rate": 3.2508948142938397e-06, "loss": 0.0231, "step": 90725 }, { "epoch": 0.3785748262135841, "grad_norm": 0.9094607684083387, "learning_rate": 3.250805236000128e-06, "loss": 0.0248, "step": 90730 }, { "epoch": 0.3785956889285744, "grad_norm": 1.6004706743725092, "learning_rate": 3.250715665110989e-06, "loss": 0.0356, "step": 90735 }, { "epoch": 0.37861655164356467, "grad_norm": 0.5454249347695829, "learning_rate": 3.250626101625402e-06, "loss": 0.0298, "step": 90740 }, { "epoch": 0.37863741435855497, "grad_norm": 0.49799335127383787, "learning_rate": 3.250536545542347e-06, "loss": 0.0225, "step": 90745 }, { "epoch": 0.37865827707354527, "grad_norm": 0.6195022835710684, "learning_rate": 3.250446996860805e-06, "loss": 0.0357, "step": 90750 }, { "epoch": 0.3786791397885355, "grad_norm": 1.1485978332221531, "learning_rate": 3.250357455579756e-06, "loss": 0.0254, "step": 90755 }, { "epoch": 0.3787000025035258, "grad_norm": 0.48817759459212146, "learning_rate": 3.2502679216981813e-06, "loss": 0.0238, "step": 90760 }, { "epoch": 0.37872086521851606, "grad_norm": 0.8000193384829712, "learning_rate": 3.2501783952150616e-06, "loss": 0.0297, "step": 90765 }, { "epoch": 0.37874172793350636, "grad_norm": 1.054253006442679, "learning_rate": 3.2500888761293783e-06, "loss": 0.0265, "step": 90770 }, { "epoch": 0.37876259064849666, "grad_norm": 1.0706558088276545, "learning_rate": 3.2499993644401116e-06, "loss": 0.029, "step": 90775 }, { "epoch": 0.3787834533634869, "grad_norm": 0.7857520618278375, "learning_rate": 3.2499098601462442e-06, "loss": 0.0325, "step": 90780 }, { "epoch": 0.3788043160784772, "grad_norm": 0.8735056153472263, "learning_rate": 3.249820363246757e-06, "loss": 0.0264, "step": 90785 }, { "epoch": 0.37882517879346744, "grad_norm": 0.5983461993356182, "learning_rate": 3.2497308737406325e-06, "loss": 0.0263, "step": 90790 }, { "epoch": 0.37884604150845774, "grad_norm": 0.6771422183273105, "learning_rate": 3.2496413916268528e-06, "loss": 0.0353, "step": 90795 }, { "epoch": 0.37886690422344804, "grad_norm": 0.7842157977784594, "learning_rate": 3.2495519169043994e-06, "loss": 0.0342, "step": 90800 }, { "epoch": 0.3788877669384383, "grad_norm": 0.92924585248988, "learning_rate": 3.249462449572256e-06, "loss": 0.0203, "step": 90805 }, { "epoch": 0.3789086296534286, "grad_norm": 0.7986294178198713, "learning_rate": 3.2493729896294047e-06, "loss": 0.025, "step": 90810 }, { "epoch": 0.37892949236841883, "grad_norm": 1.0896165734471697, "learning_rate": 3.2492835370748276e-06, "loss": 0.0203, "step": 90815 }, { "epoch": 0.37895035508340913, "grad_norm": 1.699183033349412, "learning_rate": 3.24919409190751e-06, "loss": 0.0636, "step": 90820 }, { "epoch": 0.37897121779839943, "grad_norm": 0.9769907893257622, "learning_rate": 3.2491046541264325e-06, "loss": 0.0357, "step": 90825 }, { "epoch": 0.3789920805133897, "grad_norm": 0.4161456058873024, "learning_rate": 3.2490152237305807e-06, "loss": 0.0181, "step": 90830 }, { "epoch": 0.37901294322838, "grad_norm": 0.7682996105475532, "learning_rate": 3.2489258007189373e-06, "loss": 0.0244, "step": 90835 }, { "epoch": 0.37903380594337027, "grad_norm": 0.7702863327545951, "learning_rate": 3.248836385090486e-06, "loss": 0.0254, "step": 90840 }, { "epoch": 0.3790546686583605, "grad_norm": 0.9991767695967637, "learning_rate": 3.2487469768442115e-06, "loss": 0.048, "step": 90845 }, { "epoch": 0.3790755313733508, "grad_norm": 0.8315447858296738, "learning_rate": 3.248657575979097e-06, "loss": 0.0228, "step": 90850 }, { "epoch": 0.37909639408834106, "grad_norm": 0.7127215915019134, "learning_rate": 3.248568182494129e-06, "loss": 0.0331, "step": 90855 }, { "epoch": 0.37911725680333136, "grad_norm": 0.7410875140940887, "learning_rate": 3.2484787963882897e-06, "loss": 0.0214, "step": 90860 }, { "epoch": 0.37913811951832166, "grad_norm": 0.7178565034193405, "learning_rate": 3.2483894176605664e-06, "loss": 0.0188, "step": 90865 }, { "epoch": 0.3791589822333119, "grad_norm": 1.139862351904393, "learning_rate": 3.248300046309942e-06, "loss": 0.0358, "step": 90870 }, { "epoch": 0.3791798449483022, "grad_norm": 1.001521260365904, "learning_rate": 3.2482106823354027e-06, "loss": 0.025, "step": 90875 }, { "epoch": 0.37920070766329245, "grad_norm": 0.6045625256793958, "learning_rate": 3.248121325735934e-06, "loss": 0.0267, "step": 90880 }, { "epoch": 0.37922157037828275, "grad_norm": 0.41247131264732745, "learning_rate": 3.2480319765105208e-06, "loss": 0.0286, "step": 90885 }, { "epoch": 0.37924243309327305, "grad_norm": 0.5676248884791268, "learning_rate": 3.24794263465815e-06, "loss": 0.0258, "step": 90890 }, { "epoch": 0.3792632958082633, "grad_norm": 0.6736136198726431, "learning_rate": 3.2478533001778077e-06, "loss": 0.0326, "step": 90895 }, { "epoch": 0.3792841585232536, "grad_norm": 1.1154639778027986, "learning_rate": 3.2477639730684785e-06, "loss": 0.0264, "step": 90900 }, { "epoch": 0.37930502123824383, "grad_norm": 0.9410422123094784, "learning_rate": 3.2476746533291503e-06, "loss": 0.0298, "step": 90905 }, { "epoch": 0.37932588395323413, "grad_norm": 0.5858596536285995, "learning_rate": 3.2475853409588098e-06, "loss": 0.0409, "step": 90910 }, { "epoch": 0.37934674666822443, "grad_norm": 0.472850346522191, "learning_rate": 3.247496035956443e-06, "loss": 0.0282, "step": 90915 }, { "epoch": 0.3793676093832147, "grad_norm": 0.6492334108539696, "learning_rate": 3.2474067383210366e-06, "loss": 0.0289, "step": 90920 }, { "epoch": 0.379388472098205, "grad_norm": 1.0059330641977007, "learning_rate": 3.2473174480515796e-06, "loss": 0.0429, "step": 90925 }, { "epoch": 0.3794093348131953, "grad_norm": 0.6745880534086868, "learning_rate": 3.247228165147057e-06, "loss": 0.0346, "step": 90930 }, { "epoch": 0.3794301975281855, "grad_norm": 0.5260605276553516, "learning_rate": 3.2471388896064575e-06, "loss": 0.0321, "step": 90935 }, { "epoch": 0.3794510602431758, "grad_norm": 0.9265936405994171, "learning_rate": 3.24704962142877e-06, "loss": 0.0252, "step": 90940 }, { "epoch": 0.37947192295816606, "grad_norm": 0.6074847928882244, "learning_rate": 3.2469603606129814e-06, "loss": 0.0231, "step": 90945 }, { "epoch": 0.37949278567315636, "grad_norm": 0.3571650392336947, "learning_rate": 3.246871107158079e-06, "loss": 0.0276, "step": 90950 }, { "epoch": 0.37951364838814666, "grad_norm": 1.1953936680402895, "learning_rate": 3.2467818610630524e-06, "loss": 0.0282, "step": 90955 }, { "epoch": 0.3795345111031369, "grad_norm": 0.5700844141393597, "learning_rate": 3.24669262232689e-06, "loss": 0.0236, "step": 90960 }, { "epoch": 0.3795553738181272, "grad_norm": 0.6886632696569229, "learning_rate": 3.24660339094858e-06, "loss": 0.0243, "step": 90965 }, { "epoch": 0.37957623653311745, "grad_norm": 0.5948927611272286, "learning_rate": 3.2465141669271115e-06, "loss": 0.0252, "step": 90970 }, { "epoch": 0.37959709924810775, "grad_norm": 0.5380184629161421, "learning_rate": 3.246424950261475e-06, "loss": 0.0231, "step": 90975 }, { "epoch": 0.37961796196309805, "grad_norm": 0.3915723324102676, "learning_rate": 3.246335740950658e-06, "loss": 0.0221, "step": 90980 }, { "epoch": 0.3796388246780883, "grad_norm": 0.5237135386557682, "learning_rate": 3.2462465389936505e-06, "loss": 0.0274, "step": 90985 }, { "epoch": 0.3796596873930786, "grad_norm": 1.0505037305700102, "learning_rate": 3.2461573443894422e-06, "loss": 0.0308, "step": 90990 }, { "epoch": 0.37968055010806884, "grad_norm": 0.8018042285950229, "learning_rate": 3.246068157137024e-06, "loss": 0.0244, "step": 90995 }, { "epoch": 0.37970141282305914, "grad_norm": 0.620728406441338, "learning_rate": 3.245978977235385e-06, "loss": 0.0309, "step": 91000 }, { "epoch": 0.37972227553804944, "grad_norm": 0.5251007441448763, "learning_rate": 3.2458898046835158e-06, "loss": 0.0222, "step": 91005 }, { "epoch": 0.3797431382530397, "grad_norm": 0.7388669355548682, "learning_rate": 3.2458006394804072e-06, "loss": 0.027, "step": 91010 }, { "epoch": 0.37976400096803, "grad_norm": 0.7254318738936043, "learning_rate": 3.245711481625049e-06, "loss": 0.0238, "step": 91015 }, { "epoch": 0.3797848636830203, "grad_norm": 0.579026845629078, "learning_rate": 3.2456223311164324e-06, "loss": 0.0251, "step": 91020 }, { "epoch": 0.3798057263980105, "grad_norm": 1.787371036365665, "learning_rate": 3.2455331879535493e-06, "loss": 0.0442, "step": 91025 }, { "epoch": 0.3798265891130008, "grad_norm": 0.6585174676867729, "learning_rate": 3.24544405213539e-06, "loss": 0.0289, "step": 91030 }, { "epoch": 0.37984745182799107, "grad_norm": 0.6360619985406903, "learning_rate": 3.2453549236609474e-06, "loss": 0.0212, "step": 91035 }, { "epoch": 0.37986831454298137, "grad_norm": 0.6206469703183536, "learning_rate": 3.2452658025292117e-06, "loss": 0.0245, "step": 91040 }, { "epoch": 0.37988917725797167, "grad_norm": 0.8438225664273268, "learning_rate": 3.245176688739175e-06, "loss": 0.0287, "step": 91045 }, { "epoch": 0.3799100399729619, "grad_norm": 0.7169947548085562, "learning_rate": 3.24508758228983e-06, "loss": 0.0226, "step": 91050 }, { "epoch": 0.3799309026879522, "grad_norm": 0.48419311906529194, "learning_rate": 3.2449984831801677e-06, "loss": 0.0309, "step": 91055 }, { "epoch": 0.37995176540294245, "grad_norm": 0.8709399300542783, "learning_rate": 3.244909391409182e-06, "loss": 0.0377, "step": 91060 }, { "epoch": 0.37997262811793275, "grad_norm": 1.018328496573116, "learning_rate": 3.244820306975865e-06, "loss": 0.0288, "step": 91065 }, { "epoch": 0.37999349083292305, "grad_norm": 0.5840465592719172, "learning_rate": 3.2447312298792093e-06, "loss": 0.0261, "step": 91070 }, { "epoch": 0.3800143535479133, "grad_norm": 0.47915270431314283, "learning_rate": 3.2446421601182075e-06, "loss": 0.0225, "step": 91075 }, { "epoch": 0.3800352162629036, "grad_norm": 1.2198631976780892, "learning_rate": 3.2445530976918542e-06, "loss": 0.0336, "step": 91080 }, { "epoch": 0.38005607897789384, "grad_norm": 1.373657643840403, "learning_rate": 3.244464042599142e-06, "loss": 0.0319, "step": 91085 }, { "epoch": 0.38007694169288414, "grad_norm": 1.1081501066475241, "learning_rate": 3.244374994839064e-06, "loss": 0.0271, "step": 91090 }, { "epoch": 0.38009780440787444, "grad_norm": 0.46805258925365273, "learning_rate": 3.244285954410615e-06, "loss": 0.0238, "step": 91095 }, { "epoch": 0.3801186671228647, "grad_norm": 0.8315066978859372, "learning_rate": 3.2441969213127885e-06, "loss": 0.0282, "step": 91100 }, { "epoch": 0.380139529837855, "grad_norm": 0.6136990031806141, "learning_rate": 3.244107895544578e-06, "loss": 0.024, "step": 91105 }, { "epoch": 0.3801603925528453, "grad_norm": 0.538169720588577, "learning_rate": 3.244018877104979e-06, "loss": 0.0262, "step": 91110 }, { "epoch": 0.3801812552678355, "grad_norm": 0.8646798518041614, "learning_rate": 3.2439298659929854e-06, "loss": 0.0294, "step": 91115 }, { "epoch": 0.3802021179828258, "grad_norm": 0.4162805965247215, "learning_rate": 3.2438408622075925e-06, "loss": 0.0224, "step": 91120 }, { "epoch": 0.38022298069781607, "grad_norm": 0.9024882981744662, "learning_rate": 3.243751865747794e-06, "loss": 0.0411, "step": 91125 }, { "epoch": 0.38024384341280637, "grad_norm": 1.0196525309717228, "learning_rate": 3.2436628766125865e-06, "loss": 0.0332, "step": 91130 }, { "epoch": 0.38026470612779667, "grad_norm": 1.0283459644919455, "learning_rate": 3.2435738948009655e-06, "loss": 0.0353, "step": 91135 }, { "epoch": 0.3802855688427869, "grad_norm": 1.3817897289772576, "learning_rate": 3.2434849203119253e-06, "loss": 0.0357, "step": 91140 }, { "epoch": 0.3803064315577772, "grad_norm": 0.9549535976459108, "learning_rate": 3.2433959531444612e-06, "loss": 0.0316, "step": 91145 }, { "epoch": 0.38032729427276746, "grad_norm": 0.6636478853659163, "learning_rate": 3.2433069932975714e-06, "loss": 0.024, "step": 91150 }, { "epoch": 0.38034815698775776, "grad_norm": 0.5463275589612808, "learning_rate": 3.24321804077025e-06, "loss": 0.0273, "step": 91155 }, { "epoch": 0.38036901970274806, "grad_norm": 1.0805774371673085, "learning_rate": 3.243129095561495e-06, "loss": 0.0289, "step": 91160 }, { "epoch": 0.3803898824177383, "grad_norm": 0.5843695101106513, "learning_rate": 3.2430401576703008e-06, "loss": 0.0229, "step": 91165 }, { "epoch": 0.3804107451327286, "grad_norm": 0.26654217572226013, "learning_rate": 3.242951227095666e-06, "loss": 0.0326, "step": 91170 }, { "epoch": 0.38043160784771884, "grad_norm": 0.44292076937740954, "learning_rate": 3.2428623038365857e-06, "loss": 0.0286, "step": 91175 }, { "epoch": 0.38045247056270914, "grad_norm": 0.6937226750649538, "learning_rate": 3.2427733878920585e-06, "loss": 0.023, "step": 91180 }, { "epoch": 0.38047333327769944, "grad_norm": 0.7662074684857267, "learning_rate": 3.2426844792610814e-06, "loss": 0.0264, "step": 91185 }, { "epoch": 0.3804941959926897, "grad_norm": 1.2064622097065956, "learning_rate": 3.242595577942651e-06, "loss": 0.03, "step": 91190 }, { "epoch": 0.38051505870768, "grad_norm": 0.5620396330876976, "learning_rate": 3.242506683935766e-06, "loss": 0.0311, "step": 91195 }, { "epoch": 0.3805359214226703, "grad_norm": 0.8639525703034082, "learning_rate": 3.2424177972394233e-06, "loss": 0.0292, "step": 91200 }, { "epoch": 0.38055678413766053, "grad_norm": 0.6852217658037897, "learning_rate": 3.2423289178526217e-06, "loss": 0.0218, "step": 91205 }, { "epoch": 0.38057764685265083, "grad_norm": 0.5112902472984531, "learning_rate": 3.2422400457743596e-06, "loss": 0.0238, "step": 91210 }, { "epoch": 0.3805985095676411, "grad_norm": 0.6133501095639072, "learning_rate": 3.2421511810036345e-06, "loss": 0.0245, "step": 91215 }, { "epoch": 0.3806193722826314, "grad_norm": 1.1680612767757714, "learning_rate": 3.2420623235394457e-06, "loss": 0.0275, "step": 91220 }, { "epoch": 0.3806402349976217, "grad_norm": 0.5723334648410685, "learning_rate": 3.241973473380791e-06, "loss": 0.0244, "step": 91225 }, { "epoch": 0.3806610977126119, "grad_norm": 0.8656039108882649, "learning_rate": 3.241884630526671e-06, "loss": 0.0268, "step": 91230 }, { "epoch": 0.3806819604276022, "grad_norm": 0.7627101318207329, "learning_rate": 3.2417957949760833e-06, "loss": 0.0262, "step": 91235 }, { "epoch": 0.38070282314259246, "grad_norm": 0.5931042303169348, "learning_rate": 3.241706966728029e-06, "loss": 0.0251, "step": 91240 }, { "epoch": 0.38072368585758276, "grad_norm": 1.1309905959838749, "learning_rate": 3.241618145781506e-06, "loss": 0.0219, "step": 91245 }, { "epoch": 0.38074454857257306, "grad_norm": 0.7657738671793601, "learning_rate": 3.241529332135515e-06, "loss": 0.0266, "step": 91250 }, { "epoch": 0.3807654112875633, "grad_norm": 0.5606779479954658, "learning_rate": 3.2414405257890556e-06, "loss": 0.0203, "step": 91255 }, { "epoch": 0.3807862740025536, "grad_norm": 1.0039623690782191, "learning_rate": 3.241351726741128e-06, "loss": 0.0344, "step": 91260 }, { "epoch": 0.38080713671754385, "grad_norm": 0.7405763994722108, "learning_rate": 3.241262934990733e-06, "loss": 0.0297, "step": 91265 }, { "epoch": 0.38082799943253415, "grad_norm": 0.8034201250023466, "learning_rate": 3.2411741505368703e-06, "loss": 0.0195, "step": 91270 }, { "epoch": 0.38084886214752445, "grad_norm": 0.8755312772395692, "learning_rate": 3.241085373378542e-06, "loss": 0.027, "step": 91275 }, { "epoch": 0.3808697248625147, "grad_norm": 0.8295132668305063, "learning_rate": 3.240996603514748e-06, "loss": 0.0406, "step": 91280 }, { "epoch": 0.380890587577505, "grad_norm": 0.3502022874178272, "learning_rate": 3.240907840944489e-06, "loss": 0.0255, "step": 91285 }, { "epoch": 0.3809114502924953, "grad_norm": 0.8523135984934389, "learning_rate": 3.2408190856667666e-06, "loss": 0.0225, "step": 91290 }, { "epoch": 0.38093231300748553, "grad_norm": 1.1958538574419222, "learning_rate": 3.2407303376805825e-06, "loss": 0.0297, "step": 91295 }, { "epoch": 0.38095317572247583, "grad_norm": 0.5312643464757664, "learning_rate": 3.2406415969849384e-06, "loss": 0.0242, "step": 91300 }, { "epoch": 0.3809740384374661, "grad_norm": 0.4041771589875987, "learning_rate": 3.2405528635788362e-06, "loss": 0.0255, "step": 91305 }, { "epoch": 0.3809949011524564, "grad_norm": 0.7920544011435066, "learning_rate": 3.240464137461279e-06, "loss": 0.0231, "step": 91310 }, { "epoch": 0.3810157638674467, "grad_norm": 1.1424136545005437, "learning_rate": 3.2403754186312665e-06, "loss": 0.0355, "step": 91315 }, { "epoch": 0.3810366265824369, "grad_norm": 0.6515944363102352, "learning_rate": 3.2402867070878037e-06, "loss": 0.0227, "step": 91320 }, { "epoch": 0.3810574892974272, "grad_norm": 1.029128961764504, "learning_rate": 3.240198002829891e-06, "loss": 0.0292, "step": 91325 }, { "epoch": 0.38107835201241747, "grad_norm": 1.101181003190238, "learning_rate": 3.2401093058565336e-06, "loss": 0.0292, "step": 91330 }, { "epoch": 0.38109921472740776, "grad_norm": 0.6813063783798846, "learning_rate": 3.2400206161667326e-06, "loss": 0.0258, "step": 91335 }, { "epoch": 0.38112007744239806, "grad_norm": 0.7452691379710796, "learning_rate": 3.239931933759492e-06, "loss": 0.0256, "step": 91340 }, { "epoch": 0.3811409401573883, "grad_norm": 0.4898239534523872, "learning_rate": 3.239843258633815e-06, "loss": 0.0268, "step": 91345 }, { "epoch": 0.3811618028723786, "grad_norm": 0.7766499410720874, "learning_rate": 3.2397545907887055e-06, "loss": 0.023, "step": 91350 }, { "epoch": 0.38118266558736885, "grad_norm": 0.9541296361798758, "learning_rate": 3.239665930223167e-06, "loss": 0.0295, "step": 91355 }, { "epoch": 0.38120352830235915, "grad_norm": 0.7562619632461643, "learning_rate": 3.239577276936204e-06, "loss": 0.0287, "step": 91360 }, { "epoch": 0.38122439101734945, "grad_norm": 0.7233776835443058, "learning_rate": 3.2394886309268198e-06, "loss": 0.0293, "step": 91365 }, { "epoch": 0.3812452537323397, "grad_norm": 1.2552574510981358, "learning_rate": 3.2393999921940195e-06, "loss": 0.0228, "step": 91370 }, { "epoch": 0.38126611644733, "grad_norm": 1.0329250673964743, "learning_rate": 3.239311360736807e-06, "loss": 0.0279, "step": 91375 }, { "epoch": 0.3812869791623203, "grad_norm": 0.5366179914592686, "learning_rate": 3.239222736554188e-06, "loss": 0.0258, "step": 91380 }, { "epoch": 0.38130784187731054, "grad_norm": 0.7393552963577419, "learning_rate": 3.2391341196451658e-06, "loss": 0.0226, "step": 91385 }, { "epoch": 0.38132870459230084, "grad_norm": 0.6145659351009989, "learning_rate": 3.239045510008747e-06, "loss": 0.0261, "step": 91390 }, { "epoch": 0.3813495673072911, "grad_norm": 0.7089778398434173, "learning_rate": 3.2389569076439364e-06, "loss": 0.0354, "step": 91395 }, { "epoch": 0.3813704300222814, "grad_norm": 1.4382014014014695, "learning_rate": 3.23886831254974e-06, "loss": 0.0403, "step": 91400 }, { "epoch": 0.3813912927372717, "grad_norm": 0.7459789538242677, "learning_rate": 3.2387797247251622e-06, "loss": 0.0231, "step": 91405 }, { "epoch": 0.3814121554522619, "grad_norm": 1.0342626316223196, "learning_rate": 3.2386911441692106e-06, "loss": 0.0406, "step": 91410 }, { "epoch": 0.3814330181672522, "grad_norm": 0.8589654077015474, "learning_rate": 3.2386025708808893e-06, "loss": 0.029, "step": 91415 }, { "epoch": 0.38145388088224247, "grad_norm": 0.5449671723369559, "learning_rate": 3.238514004859207e-06, "loss": 0.0209, "step": 91420 }, { "epoch": 0.38147474359723277, "grad_norm": 0.6225071249532218, "learning_rate": 3.2384254461031673e-06, "loss": 0.0262, "step": 91425 }, { "epoch": 0.38149560631222307, "grad_norm": 1.4610001031396078, "learning_rate": 3.2383368946117795e-06, "loss": 0.0325, "step": 91430 }, { "epoch": 0.3815164690272133, "grad_norm": 0.8769421522470656, "learning_rate": 3.2382483503840486e-06, "loss": 0.0314, "step": 91435 }, { "epoch": 0.3815373317422036, "grad_norm": 0.4919510789261801, "learning_rate": 3.2381598134189823e-06, "loss": 0.023, "step": 91440 }, { "epoch": 0.38155819445719386, "grad_norm": 0.6952126614810266, "learning_rate": 3.2380712837155888e-06, "loss": 0.0215, "step": 91445 }, { "epoch": 0.38157905717218416, "grad_norm": 0.5029017098321963, "learning_rate": 3.237982761272873e-06, "loss": 0.023, "step": 91450 }, { "epoch": 0.38159991988717445, "grad_norm": 0.5779057809589683, "learning_rate": 3.237894246089845e-06, "loss": 0.0229, "step": 91455 }, { "epoch": 0.3816207826021647, "grad_norm": 0.3554061116709051, "learning_rate": 3.237805738165511e-06, "loss": 0.0206, "step": 91460 }, { "epoch": 0.381641645317155, "grad_norm": 0.8610534227514458, "learning_rate": 3.2377172374988796e-06, "loss": 0.0277, "step": 91465 }, { "epoch": 0.3816625080321453, "grad_norm": 0.7827452617213181, "learning_rate": 3.23762874408896e-06, "loss": 0.0196, "step": 91470 }, { "epoch": 0.38168337074713554, "grad_norm": 0.6012371910399237, "learning_rate": 3.2375402579347578e-06, "loss": 0.0265, "step": 91475 }, { "epoch": 0.38170423346212584, "grad_norm": 0.8739497732748879, "learning_rate": 3.237451779035284e-06, "loss": 0.0322, "step": 91480 }, { "epoch": 0.3817250961771161, "grad_norm": 0.9377098630001742, "learning_rate": 3.237363307389546e-06, "loss": 0.0347, "step": 91485 }, { "epoch": 0.3817459588921064, "grad_norm": 1.2157027812160122, "learning_rate": 3.237274842996554e-06, "loss": 0.0321, "step": 91490 }, { "epoch": 0.3817668216070967, "grad_norm": 0.743552383245495, "learning_rate": 3.237186385855316e-06, "loss": 0.0225, "step": 91495 }, { "epoch": 0.38178768432208693, "grad_norm": 1.2617325905837453, "learning_rate": 3.237097935964842e-06, "loss": 0.0331, "step": 91500 }, { "epoch": 0.38180854703707723, "grad_norm": 0.780573695610552, "learning_rate": 3.23700949332414e-06, "loss": 0.019, "step": 91505 }, { "epoch": 0.3818294097520675, "grad_norm": 0.6286102557498209, "learning_rate": 3.236921057932222e-06, "loss": 0.021, "step": 91510 }, { "epoch": 0.38185027246705777, "grad_norm": 0.5684647393084222, "learning_rate": 3.2368326297880957e-06, "loss": 0.0243, "step": 91515 }, { "epoch": 0.38187113518204807, "grad_norm": 0.9141353653056984, "learning_rate": 3.2367442088907734e-06, "loss": 0.0199, "step": 91520 }, { "epoch": 0.3818919978970383, "grad_norm": 0.5854621353007118, "learning_rate": 3.236655795239263e-06, "loss": 0.0242, "step": 91525 }, { "epoch": 0.3819128606120286, "grad_norm": 0.3446427264902301, "learning_rate": 3.2365673888325766e-06, "loss": 0.0188, "step": 91530 }, { "epoch": 0.38193372332701886, "grad_norm": 0.7788286248689343, "learning_rate": 3.236478989669724e-06, "loss": 0.0288, "step": 91535 }, { "epoch": 0.38195458604200916, "grad_norm": 0.6879857224817855, "learning_rate": 3.2363905977497166e-06, "loss": 0.0189, "step": 91540 }, { "epoch": 0.38197544875699946, "grad_norm": 0.869204562121722, "learning_rate": 3.236302213071565e-06, "loss": 0.0363, "step": 91545 }, { "epoch": 0.3819963114719897, "grad_norm": 0.8309879181343712, "learning_rate": 3.23621383563428e-06, "loss": 0.0265, "step": 91550 }, { "epoch": 0.38201717418698, "grad_norm": 1.3434796872218873, "learning_rate": 3.236125465436874e-06, "loss": 0.0262, "step": 91555 }, { "epoch": 0.3820380369019703, "grad_norm": 1.1728828577484511, "learning_rate": 3.236037102478357e-06, "loss": 0.0272, "step": 91560 }, { "epoch": 0.38205889961696055, "grad_norm": 0.9242469802133448, "learning_rate": 3.2359487467577428e-06, "loss": 0.0253, "step": 91565 }, { "epoch": 0.38207976233195085, "grad_norm": 0.8155570859512369, "learning_rate": 3.2358603982740418e-06, "loss": 0.0339, "step": 91570 }, { "epoch": 0.3821006250469411, "grad_norm": 0.7985353020907062, "learning_rate": 3.2357720570262667e-06, "loss": 0.0278, "step": 91575 }, { "epoch": 0.3821214877619314, "grad_norm": 0.6001064905883168, "learning_rate": 3.23568372301343e-06, "loss": 0.0218, "step": 91580 }, { "epoch": 0.3821423504769217, "grad_norm": 0.53595770264718, "learning_rate": 3.2355953962345437e-06, "loss": 0.0198, "step": 91585 }, { "epoch": 0.38216321319191193, "grad_norm": 0.40056823532498204, "learning_rate": 3.235507076688621e-06, "loss": 0.0227, "step": 91590 }, { "epoch": 0.38218407590690223, "grad_norm": 0.7448095513084348, "learning_rate": 3.2354187643746743e-06, "loss": 0.0231, "step": 91595 }, { "epoch": 0.3822049386218925, "grad_norm": 1.2092063143033345, "learning_rate": 3.235330459291717e-06, "loss": 0.0254, "step": 91600 }, { "epoch": 0.3822258013368828, "grad_norm": 0.5713246272639606, "learning_rate": 3.2352421614387625e-06, "loss": 0.0292, "step": 91605 }, { "epoch": 0.3822466640518731, "grad_norm": 0.7979787501065011, "learning_rate": 3.235153870814824e-06, "loss": 0.0266, "step": 91610 }, { "epoch": 0.3822675267668633, "grad_norm": 1.433814136171829, "learning_rate": 3.2350655874189153e-06, "loss": 0.0252, "step": 91615 }, { "epoch": 0.3822883894818536, "grad_norm": 0.8839727762681325, "learning_rate": 3.23497731125005e-06, "loss": 0.027, "step": 91620 }, { "epoch": 0.38230925219684386, "grad_norm": 0.8561424178139021, "learning_rate": 3.2348890423072426e-06, "loss": 0.0285, "step": 91625 }, { "epoch": 0.38233011491183416, "grad_norm": 0.9397310704859071, "learning_rate": 3.2348007805895063e-06, "loss": 0.0356, "step": 91630 }, { "epoch": 0.38235097762682446, "grad_norm": 0.97457370847193, "learning_rate": 3.2347125260958566e-06, "loss": 0.0252, "step": 91635 }, { "epoch": 0.3823718403418147, "grad_norm": 0.8032129019141779, "learning_rate": 3.234624278825308e-06, "loss": 0.0332, "step": 91640 }, { "epoch": 0.382392703056805, "grad_norm": 0.551977996804539, "learning_rate": 3.2345360387768747e-06, "loss": 0.0248, "step": 91645 }, { "epoch": 0.3824135657717953, "grad_norm": 0.7403672158743905, "learning_rate": 3.2344478059495714e-06, "loss": 0.0257, "step": 91650 }, { "epoch": 0.38243442848678555, "grad_norm": 0.5616310637374793, "learning_rate": 3.2343595803424146e-06, "loss": 0.0241, "step": 91655 }, { "epoch": 0.38245529120177585, "grad_norm": 0.7823481305108207, "learning_rate": 3.234271361954418e-06, "loss": 0.0226, "step": 91660 }, { "epoch": 0.3824761539167661, "grad_norm": 1.028621613652971, "learning_rate": 3.234183150784599e-06, "loss": 0.0331, "step": 91665 }, { "epoch": 0.3824970166317564, "grad_norm": 0.8138933340939543, "learning_rate": 3.234094946831971e-06, "loss": 0.0294, "step": 91670 }, { "epoch": 0.3825178793467467, "grad_norm": 1.0550517943955893, "learning_rate": 3.2340067500955517e-06, "loss": 0.05, "step": 91675 }, { "epoch": 0.38253874206173694, "grad_norm": 0.9542413022300861, "learning_rate": 3.233918560574356e-06, "loss": 0.0347, "step": 91680 }, { "epoch": 0.38255960477672724, "grad_norm": 0.6441207116504051, "learning_rate": 3.2338303782674017e-06, "loss": 0.0281, "step": 91685 }, { "epoch": 0.3825804674917175, "grad_norm": 0.6954138526156959, "learning_rate": 3.2337422031737044e-06, "loss": 0.0311, "step": 91690 }, { "epoch": 0.3826013302067078, "grad_norm": 0.7366674676784117, "learning_rate": 3.233654035292281e-06, "loss": 0.0449, "step": 91695 }, { "epoch": 0.3826221929216981, "grad_norm": 1.1602109046110303, "learning_rate": 3.233565874622147e-06, "loss": 0.0248, "step": 91700 }, { "epoch": 0.3826430556366883, "grad_norm": 0.6178323655395841, "learning_rate": 3.2334777211623216e-06, "loss": 0.0311, "step": 91705 }, { "epoch": 0.3826639183516786, "grad_norm": 0.8081938335152411, "learning_rate": 3.2333895749118194e-06, "loss": 0.0212, "step": 91710 }, { "epoch": 0.38268478106666887, "grad_norm": 1.1153532742727115, "learning_rate": 3.233301435869661e-06, "loss": 0.0239, "step": 91715 }, { "epoch": 0.38270564378165917, "grad_norm": 0.9518434704852551, "learning_rate": 3.233213304034861e-06, "loss": 0.0282, "step": 91720 }, { "epoch": 0.38272650649664947, "grad_norm": 1.0751479454225095, "learning_rate": 3.2331251794064394e-06, "loss": 0.0254, "step": 91725 }, { "epoch": 0.3827473692116397, "grad_norm": 0.44931655838741136, "learning_rate": 3.2330370619834127e-06, "loss": 0.0252, "step": 91730 }, { "epoch": 0.38276823192663, "grad_norm": 0.7105196174381485, "learning_rate": 3.2329489517648e-06, "loss": 0.0394, "step": 91735 }, { "epoch": 0.3827890946416203, "grad_norm": 0.5885622837137503, "learning_rate": 3.232860848749619e-06, "loss": 0.0295, "step": 91740 }, { "epoch": 0.38280995735661055, "grad_norm": 0.891477306127031, "learning_rate": 3.2327727529368884e-06, "loss": 0.0236, "step": 91745 }, { "epoch": 0.38283082007160085, "grad_norm": 1.035000182852182, "learning_rate": 3.2326846643256272e-06, "loss": 0.0255, "step": 91750 }, { "epoch": 0.3828516827865911, "grad_norm": 0.8881696344646535, "learning_rate": 3.2325965829148543e-06, "loss": 0.0321, "step": 91755 }, { "epoch": 0.3828725455015814, "grad_norm": 0.5067862307551024, "learning_rate": 3.2325085087035883e-06, "loss": 0.0207, "step": 91760 }, { "epoch": 0.3828934082165717, "grad_norm": 1.1871698668737671, "learning_rate": 3.2324204416908482e-06, "loss": 0.0258, "step": 91765 }, { "epoch": 0.38291427093156194, "grad_norm": 0.8253158155841356, "learning_rate": 3.232332381875655e-06, "loss": 0.0351, "step": 91770 }, { "epoch": 0.38293513364655224, "grad_norm": 0.48389549680925276, "learning_rate": 3.232244329257027e-06, "loss": 0.0416, "step": 91775 }, { "epoch": 0.3829559963615425, "grad_norm": 1.0245895999626347, "learning_rate": 3.232156283833985e-06, "loss": 0.0248, "step": 91780 }, { "epoch": 0.3829768590765328, "grad_norm": 0.6736598205318586, "learning_rate": 3.232068245605548e-06, "loss": 0.0319, "step": 91785 }, { "epoch": 0.3829977217915231, "grad_norm": 0.6585437326141708, "learning_rate": 3.2319802145707365e-06, "loss": 0.0253, "step": 91790 }, { "epoch": 0.3830185845065133, "grad_norm": 0.3816783616723026, "learning_rate": 3.231892190728571e-06, "loss": 0.0253, "step": 91795 }, { "epoch": 0.3830394472215036, "grad_norm": 0.9055272427888119, "learning_rate": 3.231804174078072e-06, "loss": 0.0319, "step": 91800 }, { "epoch": 0.38306030993649387, "grad_norm": 0.8756145736996785, "learning_rate": 3.231716164618261e-06, "loss": 0.0205, "step": 91805 }, { "epoch": 0.38308117265148417, "grad_norm": 0.41629349288587075, "learning_rate": 3.231628162348158e-06, "loss": 0.02, "step": 91810 }, { "epoch": 0.38310203536647447, "grad_norm": 1.1886028583136201, "learning_rate": 3.2315401672667845e-06, "loss": 0.0324, "step": 91815 }, { "epoch": 0.3831228980814647, "grad_norm": 0.7792957900399529, "learning_rate": 3.2314521793731622e-06, "loss": 0.036, "step": 91820 }, { "epoch": 0.383143760796455, "grad_norm": 0.8213450068834169, "learning_rate": 3.2313641986663113e-06, "loss": 0.0369, "step": 91825 }, { "epoch": 0.3831646235114453, "grad_norm": 0.42940578928931566, "learning_rate": 3.2312762251452555e-06, "loss": 0.0414, "step": 91830 }, { "epoch": 0.38318548622643556, "grad_norm": 0.9605295125948159, "learning_rate": 3.2311882588090148e-06, "loss": 0.0245, "step": 91835 }, { "epoch": 0.38320634894142586, "grad_norm": 1.2934107487380646, "learning_rate": 3.2311002996566134e-06, "loss": 0.0369, "step": 91840 }, { "epoch": 0.3832272116564161, "grad_norm": 1.0738299788155548, "learning_rate": 3.231012347687072e-06, "loss": 0.026, "step": 91845 }, { "epoch": 0.3832480743714064, "grad_norm": 0.5387783429811572, "learning_rate": 3.2309244028994123e-06, "loss": 0.0219, "step": 91850 }, { "epoch": 0.3832689370863967, "grad_norm": 0.5514644133748002, "learning_rate": 3.230836465292659e-06, "loss": 0.024, "step": 91855 }, { "epoch": 0.38328979980138694, "grad_norm": 0.6434418145640076, "learning_rate": 3.2307485348658328e-06, "loss": 0.0328, "step": 91860 }, { "epoch": 0.38331066251637724, "grad_norm": 1.2140563732589338, "learning_rate": 3.230660611617959e-06, "loss": 0.0343, "step": 91865 }, { "epoch": 0.3833315252313675, "grad_norm": 2.9017906866160974, "learning_rate": 3.2305726955480593e-06, "loss": 0.026, "step": 91870 }, { "epoch": 0.3833523879463578, "grad_norm": 0.931947319050916, "learning_rate": 3.230484786655157e-06, "loss": 0.028, "step": 91875 }, { "epoch": 0.3833732506613481, "grad_norm": 0.8407640118047716, "learning_rate": 3.2303968849382768e-06, "loss": 0.0229, "step": 91880 }, { "epoch": 0.38339411337633833, "grad_norm": 1.1280567011105807, "learning_rate": 3.2303089903964407e-06, "loss": 0.0415, "step": 91885 }, { "epoch": 0.38341497609132863, "grad_norm": 0.46589945290611917, "learning_rate": 3.2302211030286746e-06, "loss": 0.0268, "step": 91890 }, { "epoch": 0.3834358388063189, "grad_norm": 0.714870363271357, "learning_rate": 3.230133222834001e-06, "loss": 0.0337, "step": 91895 }, { "epoch": 0.3834567015213092, "grad_norm": 0.4929124646404595, "learning_rate": 3.2300453498114453e-06, "loss": 0.0295, "step": 91900 }, { "epoch": 0.3834775642362995, "grad_norm": 0.7810743834080833, "learning_rate": 3.229957483960032e-06, "loss": 0.0367, "step": 91905 }, { "epoch": 0.3834984269512897, "grad_norm": 0.792102677973242, "learning_rate": 3.2298696252787844e-06, "loss": 0.0247, "step": 91910 }, { "epoch": 0.38351928966628, "grad_norm": 0.49031636377170046, "learning_rate": 3.2297817737667285e-06, "loss": 0.0313, "step": 91915 }, { "epoch": 0.3835401523812703, "grad_norm": 0.7347242824387807, "learning_rate": 3.2296939294228897e-06, "loss": 0.0337, "step": 91920 }, { "epoch": 0.38356101509626056, "grad_norm": 0.6358852050126238, "learning_rate": 3.2296060922462922e-06, "loss": 0.0298, "step": 91925 }, { "epoch": 0.38358187781125086, "grad_norm": 0.6329529139825978, "learning_rate": 3.2295182622359628e-06, "loss": 0.0367, "step": 91930 }, { "epoch": 0.3836027405262411, "grad_norm": 1.0594221243143096, "learning_rate": 3.2294304393909255e-06, "loss": 0.0254, "step": 91935 }, { "epoch": 0.3836236032412314, "grad_norm": 0.7409636275523174, "learning_rate": 3.229342623710207e-06, "loss": 0.0258, "step": 91940 }, { "epoch": 0.3836444659562217, "grad_norm": 1.4091250981524406, "learning_rate": 3.2292548151928332e-06, "loss": 0.0246, "step": 91945 }, { "epoch": 0.38366532867121195, "grad_norm": 0.9320703471332664, "learning_rate": 3.22916701383783e-06, "loss": 0.0303, "step": 91950 }, { "epoch": 0.38368619138620225, "grad_norm": 0.29034937340670586, "learning_rate": 3.229079219644224e-06, "loss": 0.0196, "step": 91955 }, { "epoch": 0.3837070541011925, "grad_norm": 1.2872197778467374, "learning_rate": 3.228991432611042e-06, "loss": 0.027, "step": 91960 }, { "epoch": 0.3837279168161828, "grad_norm": 0.6935225169437075, "learning_rate": 3.2289036527373107e-06, "loss": 0.0302, "step": 91965 }, { "epoch": 0.3837487795311731, "grad_norm": 0.6166971736073245, "learning_rate": 3.2288158800220566e-06, "loss": 0.025, "step": 91970 }, { "epoch": 0.38376964224616333, "grad_norm": 0.8996568755328668, "learning_rate": 3.2287281144643064e-06, "loss": 0.0426, "step": 91975 }, { "epoch": 0.38379050496115363, "grad_norm": 1.3556718627053042, "learning_rate": 3.228640356063088e-06, "loss": 0.0255, "step": 91980 }, { "epoch": 0.3838113676761439, "grad_norm": 0.6411324811365374, "learning_rate": 3.2285526048174295e-06, "loss": 0.0241, "step": 91985 }, { "epoch": 0.3838322303911342, "grad_norm": 0.7873886463744767, "learning_rate": 3.2284648607263563e-06, "loss": 0.0247, "step": 91990 }, { "epoch": 0.3838530931061245, "grad_norm": 0.9720787949161488, "learning_rate": 3.228377123788899e-06, "loss": 0.0399, "step": 91995 }, { "epoch": 0.3838739558211147, "grad_norm": 1.0963254004180936, "learning_rate": 3.2282893940040843e-06, "loss": 0.0211, "step": 92000 }, { "epoch": 0.383894818536105, "grad_norm": 0.5552821124630837, "learning_rate": 3.2282016713709404e-06, "loss": 0.0309, "step": 92005 }, { "epoch": 0.3839156812510953, "grad_norm": 0.7191282481910958, "learning_rate": 3.2281139558884956e-06, "loss": 0.0231, "step": 92010 }, { "epoch": 0.38393654396608556, "grad_norm": 0.5238745074576545, "learning_rate": 3.2280262475557776e-06, "loss": 0.0223, "step": 92015 }, { "epoch": 0.38395740668107586, "grad_norm": 1.0045354295407813, "learning_rate": 3.2279385463718177e-06, "loss": 0.03, "step": 92020 }, { "epoch": 0.3839782693960661, "grad_norm": 0.5203851041949147, "learning_rate": 3.227850852335643e-06, "loss": 0.0211, "step": 92025 }, { "epoch": 0.3839991321110564, "grad_norm": 0.7178653929445447, "learning_rate": 3.2277631654462825e-06, "loss": 0.0242, "step": 92030 }, { "epoch": 0.3840199948260467, "grad_norm": 0.6201126499454701, "learning_rate": 3.2276754857027667e-06, "loss": 0.0249, "step": 92035 }, { "epoch": 0.38404085754103695, "grad_norm": 1.474137979027752, "learning_rate": 3.227587813104123e-06, "loss": 0.0282, "step": 92040 }, { "epoch": 0.38406172025602725, "grad_norm": 1.3148935474285808, "learning_rate": 3.2275001476493835e-06, "loss": 0.029, "step": 92045 }, { "epoch": 0.3840825829710175, "grad_norm": 0.7282857543541823, "learning_rate": 3.227412489337576e-06, "loss": 0.025, "step": 92050 }, { "epoch": 0.3841034456860078, "grad_norm": 0.7543899718251061, "learning_rate": 3.227324838167733e-06, "loss": 0.0272, "step": 92055 }, { "epoch": 0.3841243084009981, "grad_norm": 0.5188724381914235, "learning_rate": 3.2272371941388823e-06, "loss": 0.0328, "step": 92060 }, { "epoch": 0.38414517111598834, "grad_norm": 0.7800148925292166, "learning_rate": 3.2271495572500556e-06, "loss": 0.0291, "step": 92065 }, { "epoch": 0.38416603383097864, "grad_norm": 0.31404565948955715, "learning_rate": 3.2270619275002835e-06, "loss": 0.0222, "step": 92070 }, { "epoch": 0.3841868965459689, "grad_norm": 0.7531577098303505, "learning_rate": 3.2269743048885953e-06, "loss": 0.0186, "step": 92075 }, { "epoch": 0.3842077592609592, "grad_norm": 1.3794561396457912, "learning_rate": 3.2268866894140245e-06, "loss": 0.0306, "step": 92080 }, { "epoch": 0.3842286219759495, "grad_norm": 0.4809240399887239, "learning_rate": 3.2267990810756008e-06, "loss": 0.023, "step": 92085 }, { "epoch": 0.3842494846909397, "grad_norm": 1.072307981516191, "learning_rate": 3.2267114798723548e-06, "loss": 0.0192, "step": 92090 }, { "epoch": 0.38427034740593, "grad_norm": 0.4971810124763093, "learning_rate": 3.2266238858033196e-06, "loss": 0.0188, "step": 92095 }, { "epoch": 0.3842912101209203, "grad_norm": 0.6601359651005141, "learning_rate": 3.2265362988675257e-06, "loss": 0.0236, "step": 92100 }, { "epoch": 0.38431207283591057, "grad_norm": 1.8570354715025272, "learning_rate": 3.2264487190640056e-06, "loss": 0.0231, "step": 92105 }, { "epoch": 0.38433293555090087, "grad_norm": 0.44049528531603294, "learning_rate": 3.2263611463917917e-06, "loss": 0.0271, "step": 92110 }, { "epoch": 0.3843537982658911, "grad_norm": 0.6742375335019126, "learning_rate": 3.226273580849915e-06, "loss": 0.0271, "step": 92115 }, { "epoch": 0.3843746609808814, "grad_norm": 1.3889242450963029, "learning_rate": 3.2261860224374095e-06, "loss": 0.0305, "step": 92120 }, { "epoch": 0.3843955236958717, "grad_norm": 0.5944350424242069, "learning_rate": 3.226098471153306e-06, "loss": 0.0303, "step": 92125 }, { "epoch": 0.38441638641086195, "grad_norm": 0.4548809064064784, "learning_rate": 3.2260109269966394e-06, "loss": 0.0383, "step": 92130 }, { "epoch": 0.38443724912585225, "grad_norm": 0.6389742127539338, "learning_rate": 3.2259233899664417e-06, "loss": 0.0431, "step": 92135 }, { "epoch": 0.3844581118408425, "grad_norm": 0.857447313006547, "learning_rate": 3.225835860061745e-06, "loss": 0.0339, "step": 92140 }, { "epoch": 0.3844789745558328, "grad_norm": 0.7094114999222095, "learning_rate": 3.2257483372815853e-06, "loss": 0.0269, "step": 92145 }, { "epoch": 0.3844998372708231, "grad_norm": 0.5976948636415238, "learning_rate": 3.2256608216249934e-06, "loss": 0.0323, "step": 92150 }, { "epoch": 0.38452069998581334, "grad_norm": 0.9078861898849583, "learning_rate": 3.2255733130910045e-06, "loss": 0.022, "step": 92155 }, { "epoch": 0.38454156270080364, "grad_norm": 0.8915826511102074, "learning_rate": 3.225485811678652e-06, "loss": 0.0289, "step": 92160 }, { "epoch": 0.3845624254157939, "grad_norm": 1.135602731881065, "learning_rate": 3.22539831738697e-06, "loss": 0.0287, "step": 92165 }, { "epoch": 0.3845832881307842, "grad_norm": 0.9419395345806796, "learning_rate": 3.2253108302149943e-06, "loss": 0.0304, "step": 92170 }, { "epoch": 0.3846041508457745, "grad_norm": 0.950718811263687, "learning_rate": 3.225223350161757e-06, "loss": 0.0315, "step": 92175 }, { "epoch": 0.38462501356076473, "grad_norm": 0.4768803215651235, "learning_rate": 3.2251358772262936e-06, "loss": 0.019, "step": 92180 }, { "epoch": 0.384645876275755, "grad_norm": 0.6246573931882377, "learning_rate": 3.2250484114076393e-06, "loss": 0.0247, "step": 92185 }, { "epoch": 0.3846667389907453, "grad_norm": 0.7010302521467678, "learning_rate": 3.224960952704829e-06, "loss": 0.0272, "step": 92190 }, { "epoch": 0.38468760170573557, "grad_norm": 0.43669950521251505, "learning_rate": 3.2248735011168975e-06, "loss": 0.0232, "step": 92195 }, { "epoch": 0.38470846442072587, "grad_norm": 0.6818529933775485, "learning_rate": 3.2247860566428813e-06, "loss": 0.0276, "step": 92200 }, { "epoch": 0.3847293271357161, "grad_norm": 0.4657615549082522, "learning_rate": 3.224698619281814e-06, "loss": 0.0252, "step": 92205 }, { "epoch": 0.3847501898507064, "grad_norm": 0.5733704076465689, "learning_rate": 3.2246111890327334e-06, "loss": 0.0272, "step": 92210 }, { "epoch": 0.3847710525656967, "grad_norm": 0.7746703624971862, "learning_rate": 3.2245237658946742e-06, "loss": 0.0307, "step": 92215 }, { "epoch": 0.38479191528068696, "grad_norm": 1.0843617058118602, "learning_rate": 3.224436349866673e-06, "loss": 0.0302, "step": 92220 }, { "epoch": 0.38481277799567726, "grad_norm": 0.6051448432909996, "learning_rate": 3.2243489409477658e-06, "loss": 0.03, "step": 92225 }, { "epoch": 0.3848336407106675, "grad_norm": 0.5944926740513248, "learning_rate": 3.2242615391369895e-06, "loss": 0.0311, "step": 92230 }, { "epoch": 0.3848545034256578, "grad_norm": 1.269215866071251, "learning_rate": 3.2241741444333804e-06, "loss": 0.0258, "step": 92235 }, { "epoch": 0.3848753661406481, "grad_norm": 1.0506457219319008, "learning_rate": 3.2240867568359745e-06, "loss": 0.0244, "step": 92240 }, { "epoch": 0.38489622885563834, "grad_norm": 0.7400853285515457, "learning_rate": 3.223999376343811e-06, "loss": 0.0282, "step": 92245 }, { "epoch": 0.38491709157062864, "grad_norm": 1.5754128449337592, "learning_rate": 3.2239120029559253e-06, "loss": 0.026, "step": 92250 }, { "epoch": 0.3849379542856189, "grad_norm": 1.001426804202917, "learning_rate": 3.223824636671355e-06, "loss": 0.0334, "step": 92255 }, { "epoch": 0.3849588170006092, "grad_norm": 0.6887842484740716, "learning_rate": 3.2237372774891384e-06, "loss": 0.0231, "step": 92260 }, { "epoch": 0.3849796797155995, "grad_norm": 0.5446329012372118, "learning_rate": 3.2236499254083127e-06, "loss": 0.0364, "step": 92265 }, { "epoch": 0.38500054243058973, "grad_norm": 0.7034337779961126, "learning_rate": 3.2235625804279157e-06, "loss": 0.0238, "step": 92270 }, { "epoch": 0.38502140514558003, "grad_norm": 0.6379514528418526, "learning_rate": 3.2234752425469863e-06, "loss": 0.028, "step": 92275 }, { "epoch": 0.38504226786057033, "grad_norm": 0.7460964355568741, "learning_rate": 3.2233879117645622e-06, "loss": 0.0283, "step": 92280 }, { "epoch": 0.3850631305755606, "grad_norm": 0.8729583381278064, "learning_rate": 3.223300588079682e-06, "loss": 0.0207, "step": 92285 }, { "epoch": 0.3850839932905509, "grad_norm": 0.820405060364745, "learning_rate": 3.223213271491384e-06, "loss": 0.0283, "step": 92290 }, { "epoch": 0.3851048560055411, "grad_norm": 0.3899628243992813, "learning_rate": 3.2231259619987075e-06, "loss": 0.0238, "step": 92295 }, { "epoch": 0.3851257187205314, "grad_norm": 0.8483411809408548, "learning_rate": 3.2230386596006917e-06, "loss": 0.0307, "step": 92300 }, { "epoch": 0.3851465814355217, "grad_norm": 1.5766377133645864, "learning_rate": 3.222951364296375e-06, "loss": 0.0339, "step": 92305 }, { "epoch": 0.38516744415051196, "grad_norm": 0.8525660188285732, "learning_rate": 3.222864076084798e-06, "loss": 0.0216, "step": 92310 }, { "epoch": 0.38518830686550226, "grad_norm": 0.5658589141659123, "learning_rate": 3.222776794964999e-06, "loss": 0.0217, "step": 92315 }, { "epoch": 0.3852091695804925, "grad_norm": 0.9877724739930854, "learning_rate": 3.2226895209360194e-06, "loss": 0.0202, "step": 92320 }, { "epoch": 0.3852300322954828, "grad_norm": 0.9179266720365175, "learning_rate": 3.2226022539968975e-06, "loss": 0.0253, "step": 92325 }, { "epoch": 0.3852508950104731, "grad_norm": 0.3526293568661482, "learning_rate": 3.222514994146674e-06, "loss": 0.0237, "step": 92330 }, { "epoch": 0.38527175772546335, "grad_norm": 0.8559075937557441, "learning_rate": 3.222427741384389e-06, "loss": 0.0211, "step": 92335 }, { "epoch": 0.38529262044045365, "grad_norm": 0.9079636991372773, "learning_rate": 3.2223404957090836e-06, "loss": 0.0282, "step": 92340 }, { "epoch": 0.3853134831554439, "grad_norm": 0.7391151985630926, "learning_rate": 3.2222532571197978e-06, "loss": 0.0269, "step": 92345 }, { "epoch": 0.3853343458704342, "grad_norm": 1.0519944140532158, "learning_rate": 3.2221660256155733e-06, "loss": 0.0359, "step": 92350 }, { "epoch": 0.3853552085854245, "grad_norm": 0.9395659120751128, "learning_rate": 3.22207880119545e-06, "loss": 0.0278, "step": 92355 }, { "epoch": 0.38537607130041474, "grad_norm": 0.4830858564705459, "learning_rate": 3.2219915838584703e-06, "loss": 0.017, "step": 92360 }, { "epoch": 0.38539693401540503, "grad_norm": 1.3364390156986408, "learning_rate": 3.221904373603675e-06, "loss": 0.0271, "step": 92365 }, { "epoch": 0.3854177967303953, "grad_norm": 0.5386868894392208, "learning_rate": 3.2218171704301047e-06, "loss": 0.0399, "step": 92370 }, { "epoch": 0.3854386594453856, "grad_norm": 1.3511674228724018, "learning_rate": 3.221729974336803e-06, "loss": 0.032, "step": 92375 }, { "epoch": 0.3854595221603759, "grad_norm": 0.9912075340712557, "learning_rate": 3.221642785322811e-06, "loss": 0.034, "step": 92380 }, { "epoch": 0.3854803848753661, "grad_norm": 0.651610097124037, "learning_rate": 3.221555603387171e-06, "loss": 0.0248, "step": 92385 }, { "epoch": 0.3855012475903564, "grad_norm": 0.5650231099457605, "learning_rate": 3.2214684285289245e-06, "loss": 0.0188, "step": 92390 }, { "epoch": 0.3855221103053467, "grad_norm": 0.6888848410980491, "learning_rate": 3.2213812607471152e-06, "loss": 0.0188, "step": 92395 }, { "epoch": 0.38554297302033697, "grad_norm": 0.9767227033238817, "learning_rate": 3.2212941000407844e-06, "loss": 0.021, "step": 92400 }, { "epoch": 0.38556383573532726, "grad_norm": 0.7603246658812352, "learning_rate": 3.2212069464089756e-06, "loss": 0.0267, "step": 92405 }, { "epoch": 0.3855846984503175, "grad_norm": 0.5261896266945066, "learning_rate": 3.221119799850733e-06, "loss": 0.0199, "step": 92410 }, { "epoch": 0.3856055611653078, "grad_norm": 0.8081821378963655, "learning_rate": 3.2210326603650975e-06, "loss": 0.0313, "step": 92415 }, { "epoch": 0.3856264238802981, "grad_norm": 1.0686018458633728, "learning_rate": 3.2209455279511147e-06, "loss": 0.0373, "step": 92420 }, { "epoch": 0.38564728659528835, "grad_norm": 0.7526004852635836, "learning_rate": 3.220858402607827e-06, "loss": 0.0233, "step": 92425 }, { "epoch": 0.38566814931027865, "grad_norm": 0.7429668877841921, "learning_rate": 3.220771284334278e-06, "loss": 0.0274, "step": 92430 }, { "epoch": 0.3856890120252689, "grad_norm": 0.5239849960562668, "learning_rate": 3.2206841731295126e-06, "loss": 0.0298, "step": 92435 }, { "epoch": 0.3857098747402592, "grad_norm": 0.644397973153597, "learning_rate": 3.220597068992574e-06, "loss": 0.0231, "step": 92440 }, { "epoch": 0.3857307374552495, "grad_norm": 1.0939843366831639, "learning_rate": 3.2205099719225067e-06, "loss": 0.0323, "step": 92445 }, { "epoch": 0.38575160017023974, "grad_norm": 0.6740356765320576, "learning_rate": 3.2204228819183552e-06, "loss": 0.0246, "step": 92450 }, { "epoch": 0.38577246288523004, "grad_norm": 0.907071043240261, "learning_rate": 3.220335798979165e-06, "loss": 0.0277, "step": 92455 }, { "epoch": 0.3857933256002203, "grad_norm": 0.7378012316750875, "learning_rate": 3.2202487231039796e-06, "loss": 0.0243, "step": 92460 }, { "epoch": 0.3858141883152106, "grad_norm": 0.7764147460578475, "learning_rate": 3.2201616542918445e-06, "loss": 0.0339, "step": 92465 }, { "epoch": 0.3858350510302009, "grad_norm": 0.5918796014219658, "learning_rate": 3.2200745925418055e-06, "loss": 0.0186, "step": 92470 }, { "epoch": 0.3858559137451911, "grad_norm": 0.9322229159078648, "learning_rate": 3.2199875378529068e-06, "loss": 0.027, "step": 92475 }, { "epoch": 0.3858767764601814, "grad_norm": 0.6103560916212657, "learning_rate": 3.2199004902241954e-06, "loss": 0.0249, "step": 92480 }, { "epoch": 0.3858976391751717, "grad_norm": 0.7757188585897006, "learning_rate": 3.219813449654715e-06, "loss": 0.035, "step": 92485 }, { "epoch": 0.38591850189016197, "grad_norm": 0.21805178894073277, "learning_rate": 3.2197264161435144e-06, "loss": 0.0218, "step": 92490 }, { "epoch": 0.38593936460515227, "grad_norm": 0.5755843976288147, "learning_rate": 3.2196393896896365e-06, "loss": 0.0262, "step": 92495 }, { "epoch": 0.3859602273201425, "grad_norm": 0.713255844647193, "learning_rate": 3.219552370292131e-06, "loss": 0.0237, "step": 92500 }, { "epoch": 0.3859810900351328, "grad_norm": 0.803041759078488, "learning_rate": 3.2194653579500417e-06, "loss": 0.0352, "step": 92505 }, { "epoch": 0.3860019527501231, "grad_norm": 1.3317344618071036, "learning_rate": 3.2193783526624156e-06, "loss": 0.0293, "step": 92510 }, { "epoch": 0.38602281546511336, "grad_norm": 0.7932410465552545, "learning_rate": 3.219291354428301e-06, "loss": 0.0251, "step": 92515 }, { "epoch": 0.38604367818010366, "grad_norm": 0.9199908540430551, "learning_rate": 3.2192043632467435e-06, "loss": 0.025, "step": 92520 }, { "epoch": 0.3860645408950939, "grad_norm": 0.8578425669178782, "learning_rate": 3.219117379116791e-06, "loss": 0.0287, "step": 92525 }, { "epoch": 0.3860854036100842, "grad_norm": 1.0723488002129165, "learning_rate": 3.21903040203749e-06, "loss": 0.0349, "step": 92530 }, { "epoch": 0.3861062663250745, "grad_norm": 0.6249365484981957, "learning_rate": 3.2189434320078886e-06, "loss": 0.0253, "step": 92535 }, { "epoch": 0.38612712904006474, "grad_norm": 0.277710579858013, "learning_rate": 3.2188564690270353e-06, "loss": 0.0164, "step": 92540 }, { "epoch": 0.38614799175505504, "grad_norm": 0.8585768571676589, "learning_rate": 3.218769513093977e-06, "loss": 0.0251, "step": 92545 }, { "epoch": 0.3861688544700453, "grad_norm": 0.6688342420753446, "learning_rate": 3.218682564207762e-06, "loss": 0.0252, "step": 92550 }, { "epoch": 0.3861897171850356, "grad_norm": 0.4302978195657286, "learning_rate": 3.2185956223674386e-06, "loss": 0.028, "step": 92555 }, { "epoch": 0.3862105799000259, "grad_norm": 0.9758050055612494, "learning_rate": 3.2185086875720556e-06, "loss": 0.0269, "step": 92560 }, { "epoch": 0.38623144261501613, "grad_norm": 0.7637782220972766, "learning_rate": 3.218421759820661e-06, "loss": 0.0332, "step": 92565 }, { "epoch": 0.38625230533000643, "grad_norm": 0.5353701535805314, "learning_rate": 3.218334839112304e-06, "loss": 0.0268, "step": 92570 }, { "epoch": 0.38627316804499673, "grad_norm": 0.9531741161062918, "learning_rate": 3.2182479254460336e-06, "loss": 0.0308, "step": 92575 }, { "epoch": 0.386294030759987, "grad_norm": 0.907989680073895, "learning_rate": 3.2181610188208984e-06, "loss": 0.0278, "step": 92580 }, { "epoch": 0.3863148934749773, "grad_norm": 0.4425104019480396, "learning_rate": 3.218074119235948e-06, "loss": 0.0363, "step": 92585 }, { "epoch": 0.3863357561899675, "grad_norm": 0.7717931260818238, "learning_rate": 3.2179872266902336e-06, "loss": 0.0275, "step": 92590 }, { "epoch": 0.3863566189049578, "grad_norm": 0.46224054876955945, "learning_rate": 3.217900341182802e-06, "loss": 0.0258, "step": 92595 }, { "epoch": 0.3863774816199481, "grad_norm": 0.6597896838500741, "learning_rate": 3.217813462712705e-06, "loss": 0.0219, "step": 92600 }, { "epoch": 0.38639834433493836, "grad_norm": 1.0966862830133863, "learning_rate": 3.2177265912789925e-06, "loss": 0.0266, "step": 92605 }, { "epoch": 0.38641920704992866, "grad_norm": 1.0416183623884772, "learning_rate": 3.217639726880714e-06, "loss": 0.0318, "step": 92610 }, { "epoch": 0.3864400697649189, "grad_norm": 0.7188320543534977, "learning_rate": 3.21755286951692e-06, "loss": 0.0251, "step": 92615 }, { "epoch": 0.3864609324799092, "grad_norm": 0.7222513941756155, "learning_rate": 3.2174660191866625e-06, "loss": 0.0272, "step": 92620 }, { "epoch": 0.3864817951948995, "grad_norm": 0.7979971376171696, "learning_rate": 3.2173791758889906e-06, "loss": 0.029, "step": 92625 }, { "epoch": 0.38650265790988975, "grad_norm": 1.1637102769330803, "learning_rate": 3.2172923396229563e-06, "loss": 0.026, "step": 92630 }, { "epoch": 0.38652352062488005, "grad_norm": 0.407942039957047, "learning_rate": 3.21720551038761e-06, "loss": 0.0187, "step": 92635 }, { "epoch": 0.3865443833398703, "grad_norm": 0.32198962431187284, "learning_rate": 3.2171186881820025e-06, "loss": 0.0263, "step": 92640 }, { "epoch": 0.3865652460548606, "grad_norm": 0.6360583472755024, "learning_rate": 3.2170318730051874e-06, "loss": 0.0263, "step": 92645 }, { "epoch": 0.3865861087698509, "grad_norm": 0.6659528513051867, "learning_rate": 3.216945064856215e-06, "loss": 0.0202, "step": 92650 }, { "epoch": 0.38660697148484113, "grad_norm": 0.5836043844591589, "learning_rate": 3.2168582637341374e-06, "loss": 0.0295, "step": 92655 }, { "epoch": 0.38662783419983143, "grad_norm": 0.6307136997380591, "learning_rate": 3.216771469638006e-06, "loss": 0.0255, "step": 92660 }, { "epoch": 0.38664869691482173, "grad_norm": 0.6167471722180415, "learning_rate": 3.2166846825668736e-06, "loss": 0.0244, "step": 92665 }, { "epoch": 0.386669559629812, "grad_norm": 0.5861146301777396, "learning_rate": 3.216597902519793e-06, "loss": 0.0226, "step": 92670 }, { "epoch": 0.3866904223448023, "grad_norm": 0.8552264771522529, "learning_rate": 3.216511129495816e-06, "loss": 0.0356, "step": 92675 }, { "epoch": 0.3867112850597925, "grad_norm": 0.3796864823074408, "learning_rate": 3.2164243634939958e-06, "loss": 0.0261, "step": 92680 }, { "epoch": 0.3867321477747828, "grad_norm": 0.960373288866785, "learning_rate": 3.216337604513385e-06, "loss": 0.0276, "step": 92685 }, { "epoch": 0.3867530104897731, "grad_norm": 0.8474268930566607, "learning_rate": 3.2162508525530367e-06, "loss": 0.0265, "step": 92690 }, { "epoch": 0.38677387320476336, "grad_norm": 0.8757676664516219, "learning_rate": 3.2161641076120047e-06, "loss": 0.0276, "step": 92695 }, { "epoch": 0.38679473591975366, "grad_norm": 0.6435434286290567, "learning_rate": 3.216077369689342e-06, "loss": 0.0238, "step": 92700 }, { "epoch": 0.3868155986347439, "grad_norm": 1.0064204775666938, "learning_rate": 3.215990638784103e-06, "loss": 0.0347, "step": 92705 }, { "epoch": 0.3868364613497342, "grad_norm": 0.5759453721178331, "learning_rate": 3.215903914895341e-06, "loss": 0.0228, "step": 92710 }, { "epoch": 0.3868573240647245, "grad_norm": 0.9071099664341534, "learning_rate": 3.2158171980221085e-06, "loss": 0.0266, "step": 92715 }, { "epoch": 0.38687818677971475, "grad_norm": 0.8145350161515502, "learning_rate": 3.215730488163462e-06, "loss": 0.024, "step": 92720 }, { "epoch": 0.38689904949470505, "grad_norm": 0.6368048406653642, "learning_rate": 3.2156437853184553e-06, "loss": 0.0295, "step": 92725 }, { "epoch": 0.3869199122096953, "grad_norm": 0.6196722192873767, "learning_rate": 3.2155570894861425e-06, "loss": 0.0253, "step": 92730 }, { "epoch": 0.3869407749246856, "grad_norm": 0.3222241203229264, "learning_rate": 3.2154704006655783e-06, "loss": 0.0274, "step": 92735 }, { "epoch": 0.3869616376396759, "grad_norm": 0.6325277163053634, "learning_rate": 3.2153837188558173e-06, "loss": 0.0287, "step": 92740 }, { "epoch": 0.38698250035466614, "grad_norm": 0.6315677700862276, "learning_rate": 3.2152970440559154e-06, "loss": 0.0162, "step": 92745 }, { "epoch": 0.38700336306965644, "grad_norm": 0.9947962868734036, "learning_rate": 3.2152103762649274e-06, "loss": 0.034, "step": 92750 }, { "epoch": 0.38702422578464674, "grad_norm": 0.7322139699474353, "learning_rate": 3.2151237154819093e-06, "loss": 0.0285, "step": 92755 }, { "epoch": 0.387045088499637, "grad_norm": 0.7700007374757678, "learning_rate": 3.2150370617059158e-06, "loss": 0.0348, "step": 92760 }, { "epoch": 0.3870659512146273, "grad_norm": 1.035911735505291, "learning_rate": 3.214950414936003e-06, "loss": 0.031, "step": 92765 }, { "epoch": 0.3870868139296175, "grad_norm": 0.461507388000835, "learning_rate": 3.2148637751712268e-06, "loss": 0.022, "step": 92770 }, { "epoch": 0.3871076766446078, "grad_norm": 0.6065237473448121, "learning_rate": 3.2147771424106437e-06, "loss": 0.0268, "step": 92775 }, { "epoch": 0.3871285393595981, "grad_norm": 1.1779373240989057, "learning_rate": 3.21469051665331e-06, "loss": 0.0286, "step": 92780 }, { "epoch": 0.38714940207458837, "grad_norm": 0.7192462903501237, "learning_rate": 3.214603897898282e-06, "loss": 0.0299, "step": 92785 }, { "epoch": 0.38717026478957867, "grad_norm": 0.5733691308059589, "learning_rate": 3.214517286144616e-06, "loss": 0.0279, "step": 92790 }, { "epoch": 0.3871911275045689, "grad_norm": 0.6416173986008599, "learning_rate": 3.2144306813913694e-06, "loss": 0.029, "step": 92795 }, { "epoch": 0.3872119902195592, "grad_norm": 0.6047392330263806, "learning_rate": 3.2143440836375988e-06, "loss": 0.0302, "step": 92800 }, { "epoch": 0.3872328529345495, "grad_norm": 0.5234286196558962, "learning_rate": 3.214257492882362e-06, "loss": 0.0172, "step": 92805 }, { "epoch": 0.38725371564953975, "grad_norm": 0.9535173382530931, "learning_rate": 3.2141709091247166e-06, "loss": 0.0252, "step": 92810 }, { "epoch": 0.38727457836453005, "grad_norm": 0.8237993217332418, "learning_rate": 3.2140843323637187e-06, "loss": 0.0226, "step": 92815 }, { "epoch": 0.3872954410795203, "grad_norm": 0.7663922615647908, "learning_rate": 3.2139977625984277e-06, "loss": 0.0253, "step": 92820 }, { "epoch": 0.3873163037945106, "grad_norm": 0.482479452687366, "learning_rate": 3.2139111998279005e-06, "loss": 0.0276, "step": 92825 }, { "epoch": 0.3873371665095009, "grad_norm": 0.4188454238012509, "learning_rate": 3.2138246440511956e-06, "loss": 0.0206, "step": 92830 }, { "epoch": 0.38735802922449114, "grad_norm": 1.3797409202661748, "learning_rate": 3.2137380952673707e-06, "loss": 0.0369, "step": 92835 }, { "epoch": 0.38737889193948144, "grad_norm": 0.23512083913049392, "learning_rate": 3.2136515534754855e-06, "loss": 0.0214, "step": 92840 }, { "epoch": 0.38739975465447174, "grad_norm": 0.8873722375387919, "learning_rate": 3.213565018674597e-06, "loss": 0.0224, "step": 92845 }, { "epoch": 0.387420617369462, "grad_norm": 0.4562109479405716, "learning_rate": 3.2134784908637656e-06, "loss": 0.0275, "step": 92850 }, { "epoch": 0.3874414800844523, "grad_norm": 0.8451588737499011, "learning_rate": 3.2133919700420496e-06, "loss": 0.0243, "step": 92855 }, { "epoch": 0.3874623427994425, "grad_norm": 0.3338468248963741, "learning_rate": 3.2133054562085074e-06, "loss": 0.0271, "step": 92860 }, { "epoch": 0.3874832055144328, "grad_norm": 0.6616834702206804, "learning_rate": 3.2132189493621998e-06, "loss": 0.0215, "step": 92865 }, { "epoch": 0.3875040682294231, "grad_norm": 1.0161278634207969, "learning_rate": 3.2131324495021856e-06, "loss": 0.0406, "step": 92870 }, { "epoch": 0.38752493094441337, "grad_norm": 0.6667702001952145, "learning_rate": 3.213045956627524e-06, "loss": 0.0297, "step": 92875 }, { "epoch": 0.38754579365940367, "grad_norm": 1.3106139758103126, "learning_rate": 3.2129594707372754e-06, "loss": 0.0322, "step": 92880 }, { "epoch": 0.3875666563743939, "grad_norm": 1.0970195720549702, "learning_rate": 3.2128729918305003e-06, "loss": 0.0266, "step": 92885 }, { "epoch": 0.3875875190893842, "grad_norm": 0.9401453315187234, "learning_rate": 3.2127865199062574e-06, "loss": 0.0296, "step": 92890 }, { "epoch": 0.3876083818043745, "grad_norm": 0.7729152406521369, "learning_rate": 3.212700054963609e-06, "loss": 0.0246, "step": 92895 }, { "epoch": 0.38762924451936476, "grad_norm": 0.5842737780805337, "learning_rate": 3.212613597001615e-06, "loss": 0.0193, "step": 92900 }, { "epoch": 0.38765010723435506, "grad_norm": 0.6356173691314493, "learning_rate": 3.2125271460193354e-06, "loss": 0.0282, "step": 92905 }, { "epoch": 0.3876709699493453, "grad_norm": 0.9005312277494149, "learning_rate": 3.2124407020158315e-06, "loss": 0.0337, "step": 92910 }, { "epoch": 0.3876918326643356, "grad_norm": 0.7584072803970926, "learning_rate": 3.2123542649901655e-06, "loss": 0.0234, "step": 92915 }, { "epoch": 0.3877126953793259, "grad_norm": 0.596675054767715, "learning_rate": 3.2122678349413972e-06, "loss": 0.0206, "step": 92920 }, { "epoch": 0.38773355809431614, "grad_norm": 0.639255956396555, "learning_rate": 3.212181411868589e-06, "loss": 0.0209, "step": 92925 }, { "epoch": 0.38775442080930644, "grad_norm": 0.8703852308042729, "learning_rate": 3.212094995770802e-06, "loss": 0.0266, "step": 92930 }, { "epoch": 0.38777528352429674, "grad_norm": 0.46363683111822207, "learning_rate": 3.212008586647098e-06, "loss": 0.0306, "step": 92935 }, { "epoch": 0.387796146239287, "grad_norm": 0.62720647986496, "learning_rate": 3.2119221844965393e-06, "loss": 0.024, "step": 92940 }, { "epoch": 0.3878170089542773, "grad_norm": 0.9005902339654651, "learning_rate": 3.2118357893181884e-06, "loss": 0.0317, "step": 92945 }, { "epoch": 0.38783787166926753, "grad_norm": 1.1084072401965026, "learning_rate": 3.211749401111107e-06, "loss": 0.0317, "step": 92950 }, { "epoch": 0.38785873438425783, "grad_norm": 0.651177517071917, "learning_rate": 3.211663019874358e-06, "loss": 0.0269, "step": 92955 }, { "epoch": 0.38787959709924813, "grad_norm": 0.5003413845492387, "learning_rate": 3.2115766456070035e-06, "loss": 0.0175, "step": 92960 }, { "epoch": 0.3879004598142384, "grad_norm": 0.8499874876098784, "learning_rate": 3.2114902783081076e-06, "loss": 0.0341, "step": 92965 }, { "epoch": 0.3879213225292287, "grad_norm": 0.6978481259016286, "learning_rate": 3.2114039179767327e-06, "loss": 0.0274, "step": 92970 }, { "epoch": 0.3879421852442189, "grad_norm": 1.0495693320997077, "learning_rate": 3.211317564611941e-06, "loss": 0.0259, "step": 92975 }, { "epoch": 0.3879630479592092, "grad_norm": 1.0767839383833124, "learning_rate": 3.2112312182127973e-06, "loss": 0.03, "step": 92980 }, { "epoch": 0.3879839106741995, "grad_norm": 0.5567802613361851, "learning_rate": 3.2111448787783644e-06, "loss": 0.0273, "step": 92985 }, { "epoch": 0.38800477338918976, "grad_norm": 0.4545280936297464, "learning_rate": 3.211058546307707e-06, "loss": 0.0248, "step": 92990 }, { "epoch": 0.38802563610418006, "grad_norm": 0.7707345607516289, "learning_rate": 3.2109722207998882e-06, "loss": 0.0155, "step": 92995 }, { "epoch": 0.3880464988191703, "grad_norm": 0.7094008894102998, "learning_rate": 3.210885902253972e-06, "loss": 0.0258, "step": 93000 }, { "epoch": 0.3880673615341606, "grad_norm": 0.27079689300297516, "learning_rate": 3.2107995906690237e-06, "loss": 0.0256, "step": 93005 }, { "epoch": 0.3880882242491509, "grad_norm": 0.5862155131604063, "learning_rate": 3.2107132860441064e-06, "loss": 0.0212, "step": 93010 }, { "epoch": 0.38810908696414115, "grad_norm": 0.9938280222296506, "learning_rate": 3.210626988378286e-06, "loss": 0.0268, "step": 93015 }, { "epoch": 0.38812994967913145, "grad_norm": 0.5676159644711658, "learning_rate": 3.2105406976706266e-06, "loss": 0.0351, "step": 93020 }, { "epoch": 0.38815081239412175, "grad_norm": 0.49870275313603224, "learning_rate": 3.210454413920193e-06, "loss": 0.0211, "step": 93025 }, { "epoch": 0.388171675109112, "grad_norm": 0.5185529482061373, "learning_rate": 3.210368137126051e-06, "loss": 0.0258, "step": 93030 }, { "epoch": 0.3881925378241023, "grad_norm": 1.0952205024168873, "learning_rate": 3.210281867287266e-06, "loss": 0.0311, "step": 93035 }, { "epoch": 0.38821340053909253, "grad_norm": 1.244401671891144, "learning_rate": 3.2101956044029026e-06, "loss": 0.034, "step": 93040 }, { "epoch": 0.38823426325408283, "grad_norm": 0.7501946535238223, "learning_rate": 3.2101093484720273e-06, "loss": 0.0258, "step": 93045 }, { "epoch": 0.38825512596907313, "grad_norm": 0.8945881919409537, "learning_rate": 3.210023099493705e-06, "loss": 0.0316, "step": 93050 }, { "epoch": 0.3882759886840634, "grad_norm": 0.7299692775754286, "learning_rate": 3.2099368574670033e-06, "loss": 0.0287, "step": 93055 }, { "epoch": 0.3882968513990537, "grad_norm": 1.0669389701905403, "learning_rate": 3.2098506223909876e-06, "loss": 0.0301, "step": 93060 }, { "epoch": 0.3883177141140439, "grad_norm": 1.0583458293972468, "learning_rate": 3.2097643942647244e-06, "loss": 0.027, "step": 93065 }, { "epoch": 0.3883385768290342, "grad_norm": 1.2516094719698865, "learning_rate": 3.20967817308728e-06, "loss": 0.0365, "step": 93070 }, { "epoch": 0.3883594395440245, "grad_norm": 1.1511181072678112, "learning_rate": 3.209591958857721e-06, "loss": 0.0256, "step": 93075 }, { "epoch": 0.38838030225901476, "grad_norm": 0.8520509881553114, "learning_rate": 3.2095057515751148e-06, "loss": 0.0216, "step": 93080 }, { "epoch": 0.38840116497400506, "grad_norm": 0.5974913441556093, "learning_rate": 3.2094195512385283e-06, "loss": 0.0264, "step": 93085 }, { "epoch": 0.3884220276889953, "grad_norm": 0.5565149202921271, "learning_rate": 3.2093333578470292e-06, "loss": 0.0259, "step": 93090 }, { "epoch": 0.3884428904039856, "grad_norm": 0.832360068465049, "learning_rate": 3.209247171399684e-06, "loss": 0.0322, "step": 93095 }, { "epoch": 0.3884637531189759, "grad_norm": 0.9099856410561468, "learning_rate": 3.2091609918955613e-06, "loss": 0.0255, "step": 93100 }, { "epoch": 0.38848461583396615, "grad_norm": 0.8102097948230935, "learning_rate": 3.209074819333729e-06, "loss": 0.0354, "step": 93105 }, { "epoch": 0.38850547854895645, "grad_norm": 0.5913088084932108, "learning_rate": 3.2089886537132538e-06, "loss": 0.0286, "step": 93110 }, { "epoch": 0.38852634126394675, "grad_norm": 0.7919329933365361, "learning_rate": 3.2089024950332047e-06, "loss": 0.0244, "step": 93115 }, { "epoch": 0.388547203978937, "grad_norm": 0.9682603147157234, "learning_rate": 3.20881634329265e-06, "loss": 0.0346, "step": 93120 }, { "epoch": 0.3885680666939273, "grad_norm": 0.7635648376114236, "learning_rate": 3.208730198490658e-06, "loss": 0.0216, "step": 93125 }, { "epoch": 0.38858892940891754, "grad_norm": 0.48863463833727544, "learning_rate": 3.208644060626298e-06, "loss": 0.0343, "step": 93130 }, { "epoch": 0.38860979212390784, "grad_norm": 1.0614955513882314, "learning_rate": 3.208557929698638e-06, "loss": 0.0317, "step": 93135 }, { "epoch": 0.38863065483889814, "grad_norm": 1.014313340666114, "learning_rate": 3.208471805706748e-06, "loss": 0.0339, "step": 93140 }, { "epoch": 0.3886515175538884, "grad_norm": 0.680485127788754, "learning_rate": 3.2083856886496957e-06, "loss": 0.0213, "step": 93145 }, { "epoch": 0.3886723802688787, "grad_norm": 0.723512353113011, "learning_rate": 3.2082995785265523e-06, "loss": 0.0274, "step": 93150 }, { "epoch": 0.3886932429838689, "grad_norm": 1.126129725718283, "learning_rate": 3.2082134753363863e-06, "loss": 0.0284, "step": 93155 }, { "epoch": 0.3887141056988592, "grad_norm": 0.8224142249328417, "learning_rate": 3.208127379078267e-06, "loss": 0.0379, "step": 93160 }, { "epoch": 0.3887349684138495, "grad_norm": 0.8868488176117786, "learning_rate": 3.2080412897512655e-06, "loss": 0.0296, "step": 93165 }, { "epoch": 0.38875583112883977, "grad_norm": 0.5001395103113127, "learning_rate": 3.20795520735445e-06, "loss": 0.0265, "step": 93170 }, { "epoch": 0.38877669384383007, "grad_norm": 0.8581867838012436, "learning_rate": 3.2078691318868936e-06, "loss": 0.0233, "step": 93175 }, { "epoch": 0.3887975565588203, "grad_norm": 1.3749933829196104, "learning_rate": 3.2077830633476643e-06, "loss": 0.0288, "step": 93180 }, { "epoch": 0.3888184192738106, "grad_norm": 0.8144633829571767, "learning_rate": 3.2076970017358334e-06, "loss": 0.0236, "step": 93185 }, { "epoch": 0.3888392819888009, "grad_norm": 0.5912585898625519, "learning_rate": 3.2076109470504726e-06, "loss": 0.0241, "step": 93190 }, { "epoch": 0.38886014470379116, "grad_norm": 3.0083328513942718, "learning_rate": 3.207524899290651e-06, "loss": 0.0359, "step": 93195 }, { "epoch": 0.38888100741878145, "grad_norm": 1.1075957640341803, "learning_rate": 3.2074388584554413e-06, "loss": 0.0252, "step": 93200 }, { "epoch": 0.38890187013377175, "grad_norm": 0.6773030172166487, "learning_rate": 3.207352824543914e-06, "loss": 0.0258, "step": 93205 }, { "epoch": 0.388922732848762, "grad_norm": 0.5690623511546019, "learning_rate": 3.2072667975551415e-06, "loss": 0.0205, "step": 93210 }, { "epoch": 0.3889435955637523, "grad_norm": 0.6420770895503279, "learning_rate": 3.2071807774881942e-06, "loss": 0.0269, "step": 93215 }, { "epoch": 0.38896445827874254, "grad_norm": 0.5603633570505383, "learning_rate": 3.207094764342145e-06, "loss": 0.0202, "step": 93220 }, { "epoch": 0.38898532099373284, "grad_norm": 0.5687055422710259, "learning_rate": 3.207008758116065e-06, "loss": 0.0299, "step": 93225 }, { "epoch": 0.38900618370872314, "grad_norm": 0.8667999346844804, "learning_rate": 3.206922758809026e-06, "loss": 0.0237, "step": 93230 }, { "epoch": 0.3890270464237134, "grad_norm": 0.9275045968952202, "learning_rate": 3.2068367664201026e-06, "loss": 0.0272, "step": 93235 }, { "epoch": 0.3890479091387037, "grad_norm": 0.5068175148933405, "learning_rate": 3.2067507809483645e-06, "loss": 0.0272, "step": 93240 }, { "epoch": 0.38906877185369393, "grad_norm": 0.6667383191309558, "learning_rate": 3.2066648023928866e-06, "loss": 0.0228, "step": 93245 }, { "epoch": 0.38908963456868423, "grad_norm": 0.7488251949432624, "learning_rate": 3.2065788307527412e-06, "loss": 0.02, "step": 93250 }, { "epoch": 0.38911049728367453, "grad_norm": 0.6866459372265173, "learning_rate": 3.2064928660269995e-06, "loss": 0.0192, "step": 93255 }, { "epoch": 0.38913135999866477, "grad_norm": 0.5175876493542407, "learning_rate": 3.2064069082147377e-06, "loss": 0.0204, "step": 93260 }, { "epoch": 0.38915222271365507, "grad_norm": 0.4977293395348825, "learning_rate": 3.2063209573150266e-06, "loss": 0.0322, "step": 93265 }, { "epoch": 0.3891730854286453, "grad_norm": 0.5162687490756, "learning_rate": 3.206235013326942e-06, "loss": 0.0246, "step": 93270 }, { "epoch": 0.3891939481436356, "grad_norm": 0.7275582475683053, "learning_rate": 3.206149076249556e-06, "loss": 0.0309, "step": 93275 }, { "epoch": 0.3892148108586259, "grad_norm": 0.7667282969813592, "learning_rate": 3.2060631460819424e-06, "loss": 0.0241, "step": 93280 }, { "epoch": 0.38923567357361616, "grad_norm": 0.7010480161592387, "learning_rate": 3.205977222823177e-06, "loss": 0.0192, "step": 93285 }, { "epoch": 0.38925653628860646, "grad_norm": 1.078259594907641, "learning_rate": 3.2058913064723324e-06, "loss": 0.031, "step": 93290 }, { "epoch": 0.38927739900359676, "grad_norm": 0.8022286406038064, "learning_rate": 3.2058053970284835e-06, "loss": 0.0282, "step": 93295 }, { "epoch": 0.389298261718587, "grad_norm": 1.1260072830234724, "learning_rate": 3.2057194944907055e-06, "loss": 0.0299, "step": 93300 }, { "epoch": 0.3893191244335773, "grad_norm": 0.842104528339635, "learning_rate": 3.2056335988580723e-06, "loss": 0.0239, "step": 93305 }, { "epoch": 0.38933998714856755, "grad_norm": 0.46147992846511365, "learning_rate": 3.2055477101296597e-06, "loss": 0.0296, "step": 93310 }, { "epoch": 0.38936084986355785, "grad_norm": 0.6531467155073375, "learning_rate": 3.2054618283045415e-06, "loss": 0.0281, "step": 93315 }, { "epoch": 0.38938171257854814, "grad_norm": 1.981290314499888, "learning_rate": 3.205375953381794e-06, "loss": 0.0378, "step": 93320 }, { "epoch": 0.3894025752935384, "grad_norm": 0.8309469138862965, "learning_rate": 3.2052900853604925e-06, "loss": 0.0253, "step": 93325 }, { "epoch": 0.3894234380085287, "grad_norm": 0.6087555978567032, "learning_rate": 3.2052042242397136e-06, "loss": 0.031, "step": 93330 }, { "epoch": 0.38944430072351893, "grad_norm": 0.743800915483426, "learning_rate": 3.2051183700185313e-06, "loss": 0.024, "step": 93335 }, { "epoch": 0.38946516343850923, "grad_norm": 0.6290093085429093, "learning_rate": 3.205032522696022e-06, "loss": 0.023, "step": 93340 }, { "epoch": 0.38948602615349953, "grad_norm": 0.9855394167625556, "learning_rate": 3.2049466822712637e-06, "loss": 0.0282, "step": 93345 }, { "epoch": 0.3895068888684898, "grad_norm": 0.6070954989042767, "learning_rate": 3.20486084874333e-06, "loss": 0.0307, "step": 93350 }, { "epoch": 0.3895277515834801, "grad_norm": 1.5951756245428133, "learning_rate": 3.2047750221112993e-06, "loss": 0.0201, "step": 93355 }, { "epoch": 0.3895486142984703, "grad_norm": 0.6127388787905312, "learning_rate": 3.204689202374249e-06, "loss": 0.026, "step": 93360 }, { "epoch": 0.3895694770134606, "grad_norm": 0.7023732559921375, "learning_rate": 3.204603389531253e-06, "loss": 0.0301, "step": 93365 }, { "epoch": 0.3895903397284509, "grad_norm": 0.34481776280613047, "learning_rate": 3.20451758358139e-06, "loss": 0.016, "step": 93370 }, { "epoch": 0.38961120244344116, "grad_norm": 1.1797637671059216, "learning_rate": 3.204431784523738e-06, "loss": 0.0306, "step": 93375 }, { "epoch": 0.38963206515843146, "grad_norm": 0.7798341107913669, "learning_rate": 3.2043459923573737e-06, "loss": 0.0318, "step": 93380 }, { "epoch": 0.38965292787342176, "grad_norm": 0.8188358947532457, "learning_rate": 3.2042602070813746e-06, "loss": 0.0237, "step": 93385 }, { "epoch": 0.389673790588412, "grad_norm": 0.5599815601718963, "learning_rate": 3.204174428694818e-06, "loss": 0.0234, "step": 93390 }, { "epoch": 0.3896946533034023, "grad_norm": 0.6140939759732298, "learning_rate": 3.204088657196782e-06, "loss": 0.0346, "step": 93395 }, { "epoch": 0.38971551601839255, "grad_norm": 0.7804323212402784, "learning_rate": 3.2040028925863455e-06, "loss": 0.0275, "step": 93400 }, { "epoch": 0.38973637873338285, "grad_norm": 0.9957696780728157, "learning_rate": 3.2039171348625858e-06, "loss": 0.0338, "step": 93405 }, { "epoch": 0.38975724144837315, "grad_norm": 0.5767169972633677, "learning_rate": 3.203831384024581e-06, "loss": 0.0236, "step": 93410 }, { "epoch": 0.3897781041633634, "grad_norm": 0.8944429031615653, "learning_rate": 3.203745640071411e-06, "loss": 0.0223, "step": 93415 }, { "epoch": 0.3897989668783537, "grad_norm": 0.9837180310519453, "learning_rate": 3.2036599030021543e-06, "loss": 0.0281, "step": 93420 }, { "epoch": 0.38981982959334394, "grad_norm": 0.4571069497307217, "learning_rate": 3.203574172815888e-06, "loss": 0.0241, "step": 93425 }, { "epoch": 0.38984069230833424, "grad_norm": 1.1466817286562603, "learning_rate": 3.2034884495116937e-06, "loss": 0.0269, "step": 93430 }, { "epoch": 0.38986155502332454, "grad_norm": 0.7548883573269334, "learning_rate": 3.203402733088649e-06, "loss": 0.0239, "step": 93435 }, { "epoch": 0.3898824177383148, "grad_norm": 0.7256396292414412, "learning_rate": 3.2033170235458343e-06, "loss": 0.0241, "step": 93440 }, { "epoch": 0.3899032804533051, "grad_norm": 0.6823920102201275, "learning_rate": 3.2032313208823286e-06, "loss": 0.024, "step": 93445 }, { "epoch": 0.3899241431682953, "grad_norm": 1.130597688339265, "learning_rate": 3.203145625097212e-06, "loss": 0.026, "step": 93450 }, { "epoch": 0.3899450058832856, "grad_norm": 0.9611150458172842, "learning_rate": 3.203059936189564e-06, "loss": 0.0354, "step": 93455 }, { "epoch": 0.3899658685982759, "grad_norm": 0.4038754985948235, "learning_rate": 3.2029742541584655e-06, "loss": 0.0244, "step": 93460 }, { "epoch": 0.38998673131326617, "grad_norm": 0.6204733509008119, "learning_rate": 3.202888579002997e-06, "loss": 0.0371, "step": 93465 }, { "epoch": 0.39000759402825647, "grad_norm": 1.3771272017562617, "learning_rate": 3.202802910722237e-06, "loss": 0.0335, "step": 93470 }, { "epoch": 0.39002845674324677, "grad_norm": 0.7059709423549875, "learning_rate": 3.2027172493152687e-06, "loss": 0.0243, "step": 93475 }, { "epoch": 0.390049319458237, "grad_norm": 1.2716163941310008, "learning_rate": 3.2026315947811716e-06, "loss": 0.0279, "step": 93480 }, { "epoch": 0.3900701821732273, "grad_norm": 0.7916657167287606, "learning_rate": 3.202545947119026e-06, "loss": 0.0247, "step": 93485 }, { "epoch": 0.39009104488821755, "grad_norm": 0.9523963871543443, "learning_rate": 3.202460306327915e-06, "loss": 0.0324, "step": 93490 }, { "epoch": 0.39011190760320785, "grad_norm": 1.2505345405429877, "learning_rate": 3.202374672406918e-06, "loss": 0.031, "step": 93495 }, { "epoch": 0.39013277031819815, "grad_norm": 0.607235034817925, "learning_rate": 3.2022890453551175e-06, "loss": 0.0268, "step": 93500 }, { "epoch": 0.3901536330331884, "grad_norm": 0.8587446690100408, "learning_rate": 3.2022034251715954e-06, "loss": 0.0322, "step": 93505 }, { "epoch": 0.3901744957481787, "grad_norm": 1.0172425201756248, "learning_rate": 3.2021178118554335e-06, "loss": 0.034, "step": 93510 }, { "epoch": 0.39019535846316894, "grad_norm": 0.5129593095049714, "learning_rate": 3.2020322054057142e-06, "loss": 0.0269, "step": 93515 }, { "epoch": 0.39021622117815924, "grad_norm": 0.7687971348834636, "learning_rate": 3.2019466058215175e-06, "loss": 0.0281, "step": 93520 }, { "epoch": 0.39023708389314954, "grad_norm": 0.6857450944433213, "learning_rate": 3.2018610131019283e-06, "loss": 0.0275, "step": 93525 }, { "epoch": 0.3902579466081398, "grad_norm": 0.619189535977091, "learning_rate": 3.201775427246028e-06, "loss": 0.0313, "step": 93530 }, { "epoch": 0.3902788093231301, "grad_norm": 1.087182538809613, "learning_rate": 3.2016898482528993e-06, "loss": 0.0276, "step": 93535 }, { "epoch": 0.3902996720381203, "grad_norm": 0.4592855083322687, "learning_rate": 3.2016042761216253e-06, "loss": 0.029, "step": 93540 }, { "epoch": 0.3903205347531106, "grad_norm": 0.8228719187982835, "learning_rate": 3.201518710851289e-06, "loss": 0.019, "step": 93545 }, { "epoch": 0.3903413974681009, "grad_norm": 0.6730115913385767, "learning_rate": 3.201433152440974e-06, "loss": 0.0278, "step": 93550 }, { "epoch": 0.39036226018309117, "grad_norm": 0.5616256745912747, "learning_rate": 3.2013476008897633e-06, "loss": 0.0212, "step": 93555 }, { "epoch": 0.39038312289808147, "grad_norm": 0.8960329412038621, "learning_rate": 3.2012620561967405e-06, "loss": 0.028, "step": 93560 }, { "epoch": 0.39040398561307177, "grad_norm": 2.7355419135747137, "learning_rate": 3.2011765183609895e-06, "loss": 0.0315, "step": 93565 }, { "epoch": 0.390424848328062, "grad_norm": 0.4034166083438902, "learning_rate": 3.2010909873815936e-06, "loss": 0.0176, "step": 93570 }, { "epoch": 0.3904457110430523, "grad_norm": 0.5074118548253284, "learning_rate": 3.201005463257638e-06, "loss": 0.0226, "step": 93575 }, { "epoch": 0.39046657375804256, "grad_norm": 0.7033032231754708, "learning_rate": 3.200919945988206e-06, "loss": 0.025, "step": 93580 }, { "epoch": 0.39048743647303286, "grad_norm": 0.3672232052000562, "learning_rate": 3.200834435572383e-06, "loss": 0.024, "step": 93585 }, { "epoch": 0.39050829918802316, "grad_norm": 0.8276539582541581, "learning_rate": 3.2007489320092523e-06, "loss": 0.0273, "step": 93590 }, { "epoch": 0.3905291619030134, "grad_norm": 0.7740509886877889, "learning_rate": 3.2006634352979e-06, "loss": 0.0275, "step": 93595 }, { "epoch": 0.3905500246180037, "grad_norm": 0.8113611784303622, "learning_rate": 3.20057794543741e-06, "loss": 0.0349, "step": 93600 }, { "epoch": 0.39057088733299394, "grad_norm": 1.0848502523059504, "learning_rate": 3.200492462426868e-06, "loss": 0.0247, "step": 93605 }, { "epoch": 0.39059175004798424, "grad_norm": 1.162171381466982, "learning_rate": 3.2004069862653585e-06, "loss": 0.0265, "step": 93610 }, { "epoch": 0.39061261276297454, "grad_norm": 1.1727535630985575, "learning_rate": 3.2003215169519687e-06, "loss": 0.0299, "step": 93615 }, { "epoch": 0.3906334754779648, "grad_norm": 0.5199657888824829, "learning_rate": 3.2002360544857823e-06, "loss": 0.0317, "step": 93620 }, { "epoch": 0.3906543381929551, "grad_norm": 1.0361107950465045, "learning_rate": 3.200150598865886e-06, "loss": 0.0253, "step": 93625 }, { "epoch": 0.39067520090794533, "grad_norm": 0.8648861587464022, "learning_rate": 3.2000651500913653e-06, "loss": 0.0277, "step": 93630 }, { "epoch": 0.39069606362293563, "grad_norm": 0.7245757545436778, "learning_rate": 3.1999797081613076e-06, "loss": 0.0316, "step": 93635 }, { "epoch": 0.39071692633792593, "grad_norm": 0.7662263521697031, "learning_rate": 3.199894273074798e-06, "loss": 0.0266, "step": 93640 }, { "epoch": 0.3907377890529162, "grad_norm": 0.58067560104073, "learning_rate": 3.199808844830923e-06, "loss": 0.0249, "step": 93645 }, { "epoch": 0.3907586517679065, "grad_norm": 1.0296843905789173, "learning_rate": 3.1997234234287693e-06, "loss": 0.0273, "step": 93650 }, { "epoch": 0.3907795144828968, "grad_norm": 0.5444984234391628, "learning_rate": 3.199638008867425e-06, "loss": 0.0322, "step": 93655 }, { "epoch": 0.390800377197887, "grad_norm": 0.9123400378161426, "learning_rate": 3.1995526011459753e-06, "loss": 0.0291, "step": 93660 }, { "epoch": 0.3908212399128773, "grad_norm": 0.9099059246181695, "learning_rate": 3.199467200263508e-06, "loss": 0.0259, "step": 93665 }, { "epoch": 0.39084210262786756, "grad_norm": 0.7122266909938957, "learning_rate": 3.1993818062191106e-06, "loss": 0.0338, "step": 93670 }, { "epoch": 0.39086296534285786, "grad_norm": 0.665316844404174, "learning_rate": 3.199296419011871e-06, "loss": 0.0285, "step": 93675 }, { "epoch": 0.39088382805784816, "grad_norm": 0.7589398105289655, "learning_rate": 3.199211038640876e-06, "loss": 0.0262, "step": 93680 }, { "epoch": 0.3909046907728384, "grad_norm": 0.6466470189114836, "learning_rate": 3.1991256651052144e-06, "loss": 0.0313, "step": 93685 }, { "epoch": 0.3909255534878287, "grad_norm": 0.22032084770546986, "learning_rate": 3.199040298403974e-06, "loss": 0.0247, "step": 93690 }, { "epoch": 0.39094641620281895, "grad_norm": 0.6561789713270036, "learning_rate": 3.1989549385362417e-06, "loss": 0.0224, "step": 93695 }, { "epoch": 0.39096727891780925, "grad_norm": 0.835054335191945, "learning_rate": 3.198869585501107e-06, "loss": 0.0248, "step": 93700 }, { "epoch": 0.39098814163279955, "grad_norm": 0.9557612787732034, "learning_rate": 3.198784239297659e-06, "loss": 0.0317, "step": 93705 }, { "epoch": 0.3910090043477898, "grad_norm": 0.6149994278797438, "learning_rate": 3.1986988999249853e-06, "loss": 0.024, "step": 93710 }, { "epoch": 0.3910298670627801, "grad_norm": 0.8858570514424035, "learning_rate": 3.198613567382175e-06, "loss": 0.0276, "step": 93715 }, { "epoch": 0.39105072977777033, "grad_norm": 0.5828557145784264, "learning_rate": 3.1985282416683177e-06, "loss": 0.0255, "step": 93720 }, { "epoch": 0.39107159249276063, "grad_norm": 0.596560556262866, "learning_rate": 3.1984429227825015e-06, "loss": 0.0305, "step": 93725 }, { "epoch": 0.39109245520775093, "grad_norm": 1.0782293876281372, "learning_rate": 3.1983576107238167e-06, "loss": 0.0267, "step": 93730 }, { "epoch": 0.3911133179227412, "grad_norm": 0.39707487681664144, "learning_rate": 3.198272305491353e-06, "loss": 0.0212, "step": 93735 }, { "epoch": 0.3911341806377315, "grad_norm": 0.5447479128651376, "learning_rate": 3.1981870070842e-06, "loss": 0.0307, "step": 93740 }, { "epoch": 0.3911550433527218, "grad_norm": 1.7696244299971817, "learning_rate": 3.198101715501446e-06, "loss": 0.0241, "step": 93745 }, { "epoch": 0.391175906067712, "grad_norm": 0.9683716716700047, "learning_rate": 3.1980164307421836e-06, "loss": 0.0266, "step": 93750 }, { "epoch": 0.3911967687827023, "grad_norm": 0.7390181487982924, "learning_rate": 3.197931152805501e-06, "loss": 0.0315, "step": 93755 }, { "epoch": 0.39121763149769256, "grad_norm": 1.103834102228651, "learning_rate": 3.1978458816904902e-06, "loss": 0.0397, "step": 93760 }, { "epoch": 0.39123849421268286, "grad_norm": 0.9223674773920978, "learning_rate": 3.1977606173962406e-06, "loss": 0.024, "step": 93765 }, { "epoch": 0.39125935692767316, "grad_norm": 0.4924066071140236, "learning_rate": 3.1976753599218437e-06, "loss": 0.0312, "step": 93770 }, { "epoch": 0.3912802196426634, "grad_norm": 1.1297992757250879, "learning_rate": 3.19759010926639e-06, "loss": 0.0339, "step": 93775 }, { "epoch": 0.3913010823576537, "grad_norm": 0.5388680951537215, "learning_rate": 3.1975048654289697e-06, "loss": 0.0233, "step": 93780 }, { "epoch": 0.39132194507264395, "grad_norm": 0.6470805224281364, "learning_rate": 3.1974196284086757e-06, "loss": 0.0257, "step": 93785 }, { "epoch": 0.39134280778763425, "grad_norm": 0.6434959043235672, "learning_rate": 3.1973343982045986e-06, "loss": 0.0262, "step": 93790 }, { "epoch": 0.39136367050262455, "grad_norm": 0.5130048362926338, "learning_rate": 3.1972491748158295e-06, "loss": 0.0159, "step": 93795 }, { "epoch": 0.3913845332176148, "grad_norm": 0.982565427259162, "learning_rate": 3.1971639582414615e-06, "loss": 0.0273, "step": 93800 }, { "epoch": 0.3914053959326051, "grad_norm": 1.5606171788407566, "learning_rate": 3.1970787484805856e-06, "loss": 0.0316, "step": 93805 }, { "epoch": 0.39142625864759534, "grad_norm": 0.6070366344938863, "learning_rate": 3.196993545532293e-06, "loss": 0.0294, "step": 93810 }, { "epoch": 0.39144712136258564, "grad_norm": 1.8331690643422824, "learning_rate": 3.1969083493956787e-06, "loss": 0.0275, "step": 93815 }, { "epoch": 0.39146798407757594, "grad_norm": 1.0220354731933412, "learning_rate": 3.1968231600698326e-06, "loss": 0.0289, "step": 93820 }, { "epoch": 0.3914888467925662, "grad_norm": 1.2415578631633846, "learning_rate": 3.1967379775538476e-06, "loss": 0.0297, "step": 93825 }, { "epoch": 0.3915097095075565, "grad_norm": 0.589734928063744, "learning_rate": 3.1966528018468174e-06, "loss": 0.0326, "step": 93830 }, { "epoch": 0.3915305722225468, "grad_norm": 0.8394111442701215, "learning_rate": 3.1965676329478344e-06, "loss": 0.0267, "step": 93835 }, { "epoch": 0.391551434937537, "grad_norm": 1.0692239651996476, "learning_rate": 3.1964824708559915e-06, "loss": 0.0282, "step": 93840 }, { "epoch": 0.3915722976525273, "grad_norm": 0.8778094672426032, "learning_rate": 3.196397315570383e-06, "loss": 0.0318, "step": 93845 }, { "epoch": 0.39159316036751757, "grad_norm": 0.6222120257423509, "learning_rate": 3.1963121670901023e-06, "loss": 0.0407, "step": 93850 }, { "epoch": 0.39161402308250787, "grad_norm": 0.9787936379019687, "learning_rate": 3.1962270254142407e-06, "loss": 0.0281, "step": 93855 }, { "epoch": 0.39163488579749817, "grad_norm": 0.9910274237294168, "learning_rate": 3.196141890541895e-06, "loss": 0.0322, "step": 93860 }, { "epoch": 0.3916557485124884, "grad_norm": 1.756693182555526, "learning_rate": 3.196056762472157e-06, "loss": 0.0292, "step": 93865 }, { "epoch": 0.3916766112274787, "grad_norm": 0.9866424989024425, "learning_rate": 3.195971641204122e-06, "loss": 0.0317, "step": 93870 }, { "epoch": 0.39169747394246895, "grad_norm": 0.5724326369705535, "learning_rate": 3.1958865267368837e-06, "loss": 0.0305, "step": 93875 }, { "epoch": 0.39171833665745925, "grad_norm": 0.6100874300455582, "learning_rate": 3.1958014190695373e-06, "loss": 0.0228, "step": 93880 }, { "epoch": 0.39173919937244955, "grad_norm": 0.7945306750426409, "learning_rate": 3.1957163182011765e-06, "loss": 0.027, "step": 93885 }, { "epoch": 0.3917600620874398, "grad_norm": 0.8626345320711576, "learning_rate": 3.1956312241308964e-06, "loss": 0.0269, "step": 93890 }, { "epoch": 0.3917809248024301, "grad_norm": 0.6721509681382803, "learning_rate": 3.1955461368577926e-06, "loss": 0.0355, "step": 93895 }, { "epoch": 0.39180178751742034, "grad_norm": 1.133045477819798, "learning_rate": 3.1954610563809598e-06, "loss": 0.0318, "step": 93900 }, { "epoch": 0.39182265023241064, "grad_norm": 0.47665002148753627, "learning_rate": 3.1953759826994924e-06, "loss": 0.0425, "step": 93905 }, { "epoch": 0.39184351294740094, "grad_norm": 0.546925152677955, "learning_rate": 3.1952909158124875e-06, "loss": 0.0245, "step": 93910 }, { "epoch": 0.3918643756623912, "grad_norm": 0.5105444790220216, "learning_rate": 3.195205855719039e-06, "loss": 0.0223, "step": 93915 }, { "epoch": 0.3918852383773815, "grad_norm": 1.6004964077400063, "learning_rate": 3.1951208024182445e-06, "loss": 0.0294, "step": 93920 }, { "epoch": 0.3919061010923718, "grad_norm": 0.7951161246331491, "learning_rate": 3.195035755909199e-06, "loss": 0.0381, "step": 93925 }, { "epoch": 0.391926963807362, "grad_norm": 0.9765641478683231, "learning_rate": 3.1949507161909986e-06, "loss": 0.0327, "step": 93930 }, { "epoch": 0.3919478265223523, "grad_norm": 0.5517789915486502, "learning_rate": 3.19486568326274e-06, "loss": 0.0324, "step": 93935 }, { "epoch": 0.39196868923734257, "grad_norm": 0.48657214776066693, "learning_rate": 3.1947806571235198e-06, "loss": 0.0226, "step": 93940 }, { "epoch": 0.39198955195233287, "grad_norm": 0.8243084635348502, "learning_rate": 3.1946956377724337e-06, "loss": 0.0311, "step": 93945 }, { "epoch": 0.39201041466732317, "grad_norm": 0.8651702188139963, "learning_rate": 3.194610625208579e-06, "loss": 0.0248, "step": 93950 }, { "epoch": 0.3920312773823134, "grad_norm": 0.8043107477192373, "learning_rate": 3.1945256194310537e-06, "loss": 0.0221, "step": 93955 }, { "epoch": 0.3920521400973037, "grad_norm": 0.6545904967351992, "learning_rate": 3.1944406204389535e-06, "loss": 0.0302, "step": 93960 }, { "epoch": 0.39207300281229396, "grad_norm": 1.0008083849157898, "learning_rate": 3.1943556282313766e-06, "loss": 0.0282, "step": 93965 }, { "epoch": 0.39209386552728426, "grad_norm": 0.557071118628456, "learning_rate": 3.19427064280742e-06, "loss": 0.0297, "step": 93970 }, { "epoch": 0.39211472824227456, "grad_norm": 0.8632934167916434, "learning_rate": 3.194185664166181e-06, "loss": 0.0236, "step": 93975 }, { "epoch": 0.3921355909572648, "grad_norm": 0.8536173858030577, "learning_rate": 3.194100692306759e-06, "loss": 0.0231, "step": 93980 }, { "epoch": 0.3921564536722551, "grad_norm": 0.5678482154734287, "learning_rate": 3.1940157272282506e-06, "loss": 0.0209, "step": 93985 }, { "epoch": 0.39217731638724534, "grad_norm": 0.5721810710159723, "learning_rate": 3.1939307689297544e-06, "loss": 0.036, "step": 93990 }, { "epoch": 0.39219817910223564, "grad_norm": 0.825475984215023, "learning_rate": 3.1938458174103687e-06, "loss": 0.0333, "step": 93995 }, { "epoch": 0.39221904181722594, "grad_norm": 0.8402987097470056, "learning_rate": 3.193760872669192e-06, "loss": 0.0346, "step": 94000 }, { "epoch": 0.3922399045322162, "grad_norm": 0.776196004914032, "learning_rate": 3.1936759347053232e-06, "loss": 0.0267, "step": 94005 }, { "epoch": 0.3922607672472065, "grad_norm": 0.8498356774886997, "learning_rate": 3.19359100351786e-06, "loss": 0.0362, "step": 94010 }, { "epoch": 0.3922816299621968, "grad_norm": 0.9668376854870118, "learning_rate": 3.1935060791059042e-06, "loss": 0.0326, "step": 94015 }, { "epoch": 0.39230249267718703, "grad_norm": 0.9698776890722727, "learning_rate": 3.1934211614685517e-06, "loss": 0.03, "step": 94020 }, { "epoch": 0.39232335539217733, "grad_norm": 0.5587869815297724, "learning_rate": 3.193336250604903e-06, "loss": 0.0338, "step": 94025 }, { "epoch": 0.3923442181071676, "grad_norm": 2.317529857111651, "learning_rate": 3.1932513465140587e-06, "loss": 0.0499, "step": 94030 }, { "epoch": 0.3923650808221579, "grad_norm": 0.7408015722425237, "learning_rate": 3.193166449195117e-06, "loss": 0.0238, "step": 94035 }, { "epoch": 0.3923859435371482, "grad_norm": 0.45264573443389405, "learning_rate": 3.1930815586471787e-06, "loss": 0.0218, "step": 94040 }, { "epoch": 0.3924068062521384, "grad_norm": 0.4969694577283264, "learning_rate": 3.192996674869343e-06, "loss": 0.017, "step": 94045 }, { "epoch": 0.3924276689671287, "grad_norm": 1.140389007000737, "learning_rate": 3.192911797860711e-06, "loss": 0.0288, "step": 94050 }, { "epoch": 0.39244853168211896, "grad_norm": 0.4794226384155472, "learning_rate": 3.1928269276203827e-06, "loss": 0.0231, "step": 94055 }, { "epoch": 0.39246939439710926, "grad_norm": 1.0492481927933046, "learning_rate": 3.1927420641474583e-06, "loss": 0.0242, "step": 94060 }, { "epoch": 0.39249025711209956, "grad_norm": 1.1436962355710423, "learning_rate": 3.1926572074410384e-06, "loss": 0.0357, "step": 94065 }, { "epoch": 0.3925111198270898, "grad_norm": 0.7740946436853149, "learning_rate": 3.1925723575002243e-06, "loss": 0.0239, "step": 94070 }, { "epoch": 0.3925319825420801, "grad_norm": 0.7418757351949165, "learning_rate": 3.192487514324117e-06, "loss": 0.0315, "step": 94075 }, { "epoch": 0.39255284525707035, "grad_norm": 0.985916185255818, "learning_rate": 3.1924026779118174e-06, "loss": 0.0261, "step": 94080 }, { "epoch": 0.39257370797206065, "grad_norm": 0.6154034662784751, "learning_rate": 3.1923178482624268e-06, "loss": 0.0293, "step": 94085 }, { "epoch": 0.39259457068705095, "grad_norm": 0.6031346437561941, "learning_rate": 3.192233025375047e-06, "loss": 0.0271, "step": 94090 }, { "epoch": 0.3926154334020412, "grad_norm": 0.3200082137709514, "learning_rate": 3.1921482092487797e-06, "loss": 0.0197, "step": 94095 }, { "epoch": 0.3926362961170315, "grad_norm": 0.8552740128428684, "learning_rate": 3.1920633998827267e-06, "loss": 0.0272, "step": 94100 }, { "epoch": 0.3926571588320218, "grad_norm": 0.8613784437532321, "learning_rate": 3.19197859727599e-06, "loss": 0.0209, "step": 94105 }, { "epoch": 0.39267802154701203, "grad_norm": 0.7396596130429197, "learning_rate": 3.1918938014276713e-06, "loss": 0.0355, "step": 94110 }, { "epoch": 0.39269888426200233, "grad_norm": 0.701020048771455, "learning_rate": 3.1918090123368732e-06, "loss": 0.0239, "step": 94115 }, { "epoch": 0.3927197469769926, "grad_norm": 0.9430554544899188, "learning_rate": 3.1917242300026985e-06, "loss": 0.0316, "step": 94120 }, { "epoch": 0.3927406096919829, "grad_norm": 1.0634439268130336, "learning_rate": 3.1916394544242507e-06, "loss": 0.0313, "step": 94125 }, { "epoch": 0.3927614724069732, "grad_norm": 0.3405005320860251, "learning_rate": 3.1915546856006305e-06, "loss": 0.0266, "step": 94130 }, { "epoch": 0.3927823351219634, "grad_norm": 0.4212638500744346, "learning_rate": 3.191469923530943e-06, "loss": 0.0214, "step": 94135 }, { "epoch": 0.3928031978369537, "grad_norm": 0.8193457596172189, "learning_rate": 3.19138516821429e-06, "loss": 0.0298, "step": 94140 }, { "epoch": 0.39282406055194397, "grad_norm": 1.0718402814177264, "learning_rate": 3.1913004196497753e-06, "loss": 0.0268, "step": 94145 }, { "epoch": 0.39284492326693427, "grad_norm": 0.7350142807336023, "learning_rate": 3.1912156778365035e-06, "loss": 0.0272, "step": 94150 }, { "epoch": 0.39286578598192456, "grad_norm": 0.4616202642590621, "learning_rate": 3.1911309427735758e-06, "loss": 0.0246, "step": 94155 }, { "epoch": 0.3928866486969148, "grad_norm": 0.9895138011921464, "learning_rate": 3.1910462144600983e-06, "loss": 0.0339, "step": 94160 }, { "epoch": 0.3929075114119051, "grad_norm": 0.9224164453515562, "learning_rate": 3.190961492895174e-06, "loss": 0.0307, "step": 94165 }, { "epoch": 0.39292837412689535, "grad_norm": 0.5101977478767347, "learning_rate": 3.1908767780779076e-06, "loss": 0.0281, "step": 94170 }, { "epoch": 0.39294923684188565, "grad_norm": 0.32127167582280014, "learning_rate": 3.1907920700074037e-06, "loss": 0.0198, "step": 94175 }, { "epoch": 0.39297009955687595, "grad_norm": 0.9136954089540342, "learning_rate": 3.1907073686827656e-06, "loss": 0.0226, "step": 94180 }, { "epoch": 0.3929909622718662, "grad_norm": 0.8789019969555669, "learning_rate": 3.1906226741030986e-06, "loss": 0.0478, "step": 94185 }, { "epoch": 0.3930118249868565, "grad_norm": 0.6058199063196722, "learning_rate": 3.190537986267508e-06, "loss": 0.026, "step": 94190 }, { "epoch": 0.3930326877018468, "grad_norm": 0.71789605111917, "learning_rate": 3.190453305175098e-06, "loss": 0.0233, "step": 94195 }, { "epoch": 0.39305355041683704, "grad_norm": 0.784722980546569, "learning_rate": 3.190368630824975e-06, "loss": 0.0349, "step": 94200 }, { "epoch": 0.39307441313182734, "grad_norm": 0.4592197226220462, "learning_rate": 3.190283963216243e-06, "loss": 0.0228, "step": 94205 }, { "epoch": 0.3930952758468176, "grad_norm": 0.6323468647926528, "learning_rate": 3.1901993023480083e-06, "loss": 0.023, "step": 94210 }, { "epoch": 0.3931161385618079, "grad_norm": 1.3813112123850169, "learning_rate": 3.1901146482193764e-06, "loss": 0.0307, "step": 94215 }, { "epoch": 0.3931370012767982, "grad_norm": 0.5831705495114118, "learning_rate": 3.1900300008294527e-06, "loss": 0.0191, "step": 94220 }, { "epoch": 0.3931578639917884, "grad_norm": 0.7817675005794241, "learning_rate": 3.1899453601773445e-06, "loss": 0.0199, "step": 94225 }, { "epoch": 0.3931787267067787, "grad_norm": 1.298861877228672, "learning_rate": 3.189860726262156e-06, "loss": 0.0265, "step": 94230 }, { "epoch": 0.39319958942176897, "grad_norm": 0.5928779153776617, "learning_rate": 3.189776099082996e-06, "loss": 0.0261, "step": 94235 }, { "epoch": 0.39322045213675927, "grad_norm": 0.996480456798971, "learning_rate": 3.189691478638968e-06, "loss": 0.0245, "step": 94240 }, { "epoch": 0.39324131485174957, "grad_norm": 0.5541580218815889, "learning_rate": 3.189606864929182e-06, "loss": 0.0209, "step": 94245 }, { "epoch": 0.3932621775667398, "grad_norm": 0.679775663118253, "learning_rate": 3.1895222579527425e-06, "loss": 0.0232, "step": 94250 }, { "epoch": 0.3932830402817301, "grad_norm": 1.110180772246441, "learning_rate": 3.189437657708757e-06, "loss": 0.0294, "step": 94255 }, { "epoch": 0.39330390299672036, "grad_norm": 0.8881155696066783, "learning_rate": 3.1893530641963332e-06, "loss": 0.0268, "step": 94260 }, { "epoch": 0.39332476571171066, "grad_norm": 0.7280781255069795, "learning_rate": 3.1892684774145777e-06, "loss": 0.0282, "step": 94265 }, { "epoch": 0.39334562842670096, "grad_norm": 0.6837670443824294, "learning_rate": 3.189183897362599e-06, "loss": 0.0398, "step": 94270 }, { "epoch": 0.3933664911416912, "grad_norm": 0.4723245033126976, "learning_rate": 3.1890993240395037e-06, "loss": 0.0196, "step": 94275 }, { "epoch": 0.3933873538566815, "grad_norm": 0.8860297503658084, "learning_rate": 3.1890147574444004e-06, "loss": 0.0232, "step": 94280 }, { "epoch": 0.3934082165716718, "grad_norm": 0.7609134674803103, "learning_rate": 3.188930197576397e-06, "loss": 0.0287, "step": 94285 }, { "epoch": 0.39342907928666204, "grad_norm": 0.6532715341610783, "learning_rate": 3.188845644434601e-06, "loss": 0.0199, "step": 94290 }, { "epoch": 0.39344994200165234, "grad_norm": 0.7302589617265266, "learning_rate": 3.1887610980181214e-06, "loss": 0.0242, "step": 94295 }, { "epoch": 0.3934708047166426, "grad_norm": 0.6521031532310931, "learning_rate": 3.188676558326067e-06, "loss": 0.0215, "step": 94300 }, { "epoch": 0.3934916674316329, "grad_norm": 0.9761885596346404, "learning_rate": 3.1885920253575466e-06, "loss": 0.0255, "step": 94305 }, { "epoch": 0.3935125301466232, "grad_norm": 0.745308309395803, "learning_rate": 3.188507499111667e-06, "loss": 0.0295, "step": 94310 }, { "epoch": 0.39353339286161343, "grad_norm": 0.7243035881824895, "learning_rate": 3.188422979587539e-06, "loss": 0.0314, "step": 94315 }, { "epoch": 0.39355425557660373, "grad_norm": 0.5546104680966409, "learning_rate": 3.188338466784272e-06, "loss": 0.0222, "step": 94320 }, { "epoch": 0.393575118291594, "grad_norm": 0.9394431007236267, "learning_rate": 3.1882539607009743e-06, "loss": 0.0299, "step": 94325 }, { "epoch": 0.3935959810065843, "grad_norm": 0.6136181071135128, "learning_rate": 3.188169461336756e-06, "loss": 0.0244, "step": 94330 }, { "epoch": 0.39361684372157457, "grad_norm": 0.555084140783644, "learning_rate": 3.1880849686907263e-06, "loss": 0.0207, "step": 94335 }, { "epoch": 0.3936377064365648, "grad_norm": 1.034838380224134, "learning_rate": 3.188000482761996e-06, "loss": 0.0313, "step": 94340 }, { "epoch": 0.3936585691515551, "grad_norm": 1.1505299908142745, "learning_rate": 3.1879160035496733e-06, "loss": 0.0253, "step": 94345 }, { "epoch": 0.39367943186654536, "grad_norm": 1.8330973738814806, "learning_rate": 3.18783153105287e-06, "loss": 0.0334, "step": 94350 }, { "epoch": 0.39370029458153566, "grad_norm": 0.6333189188234406, "learning_rate": 3.187747065270696e-06, "loss": 0.0342, "step": 94355 }, { "epoch": 0.39372115729652596, "grad_norm": 0.5478034243245824, "learning_rate": 3.1876626062022614e-06, "loss": 0.0294, "step": 94360 }, { "epoch": 0.3937420200115162, "grad_norm": 0.6884000236204065, "learning_rate": 3.1875781538466777e-06, "loss": 0.0297, "step": 94365 }, { "epoch": 0.3937628827265065, "grad_norm": 0.8587470898447337, "learning_rate": 3.1874937082030546e-06, "loss": 0.0324, "step": 94370 }, { "epoch": 0.3937837454414968, "grad_norm": 0.7503325489400839, "learning_rate": 3.1874092692705028e-06, "loss": 0.0179, "step": 94375 }, { "epoch": 0.39380460815648705, "grad_norm": 0.5722385936362995, "learning_rate": 3.1873248370481358e-06, "loss": 0.0242, "step": 94380 }, { "epoch": 0.39382547087147735, "grad_norm": 1.0490422288100734, "learning_rate": 3.1872404115350617e-06, "loss": 0.0207, "step": 94385 }, { "epoch": 0.3938463335864676, "grad_norm": 1.555734756693863, "learning_rate": 3.1871559927303946e-06, "loss": 0.0429, "step": 94390 }, { "epoch": 0.3938671963014579, "grad_norm": 5.552978818720892, "learning_rate": 3.1870715806332446e-06, "loss": 0.0308, "step": 94395 }, { "epoch": 0.3938880590164482, "grad_norm": 0.6088427348957216, "learning_rate": 3.1869871752427252e-06, "loss": 0.0289, "step": 94400 }, { "epoch": 0.39390892173143843, "grad_norm": 1.0045961143069213, "learning_rate": 3.1869027765579464e-06, "loss": 0.0294, "step": 94405 }, { "epoch": 0.39392978444642873, "grad_norm": 1.2292816836614677, "learning_rate": 3.186818384578021e-06, "loss": 0.0298, "step": 94410 }, { "epoch": 0.393950647161419, "grad_norm": 0.6653941942716276, "learning_rate": 3.1867339993020614e-06, "loss": 0.0312, "step": 94415 }, { "epoch": 0.3939715098764093, "grad_norm": 0.4506103689717684, "learning_rate": 3.18664962072918e-06, "loss": 0.0231, "step": 94420 }, { "epoch": 0.3939923725913996, "grad_norm": 0.3930499029152649, "learning_rate": 3.1865652488584898e-06, "loss": 0.0317, "step": 94425 }, { "epoch": 0.3940132353063898, "grad_norm": 0.9520670249564538, "learning_rate": 3.186480883689103e-06, "loss": 0.0243, "step": 94430 }, { "epoch": 0.3940340980213801, "grad_norm": 1.1738614801927836, "learning_rate": 3.1863965252201334e-06, "loss": 0.0257, "step": 94435 }, { "epoch": 0.39405496073637036, "grad_norm": 1.169654547523627, "learning_rate": 3.1863121734506934e-06, "loss": 0.0348, "step": 94440 }, { "epoch": 0.39407582345136066, "grad_norm": 0.8567804547652615, "learning_rate": 3.1862278283798958e-06, "loss": 0.0254, "step": 94445 }, { "epoch": 0.39409668616635096, "grad_norm": 0.6272466858237647, "learning_rate": 3.1861434900068553e-06, "loss": 0.0299, "step": 94450 }, { "epoch": 0.3941175488813412, "grad_norm": 0.5864657924733797, "learning_rate": 3.1860591583306848e-06, "loss": 0.0291, "step": 94455 }, { "epoch": 0.3941384115963315, "grad_norm": 0.776061161862538, "learning_rate": 3.1859748333504986e-06, "loss": 0.0221, "step": 94460 }, { "epoch": 0.3941592743113218, "grad_norm": 0.6266723371711724, "learning_rate": 3.1858905150654103e-06, "loss": 0.0193, "step": 94465 }, { "epoch": 0.39418013702631205, "grad_norm": 0.4908469734237946, "learning_rate": 3.185806203474533e-06, "loss": 0.0233, "step": 94470 }, { "epoch": 0.39420099974130235, "grad_norm": 0.6729260369377533, "learning_rate": 3.185721898576982e-06, "loss": 0.0242, "step": 94475 }, { "epoch": 0.3942218624562926, "grad_norm": 0.7699899875273524, "learning_rate": 3.185637600371872e-06, "loss": 0.0289, "step": 94480 }, { "epoch": 0.3942427251712829, "grad_norm": 0.7624301922845239, "learning_rate": 3.185553308858317e-06, "loss": 0.0319, "step": 94485 }, { "epoch": 0.3942635878862732, "grad_norm": 0.7327932670702029, "learning_rate": 3.1854690240354323e-06, "loss": 0.0336, "step": 94490 }, { "epoch": 0.39428445060126344, "grad_norm": 0.4659203252949656, "learning_rate": 3.1853847459023324e-06, "loss": 0.0394, "step": 94495 }, { "epoch": 0.39430531331625374, "grad_norm": 0.7798402278760485, "learning_rate": 3.1853004744581322e-06, "loss": 0.0265, "step": 94500 }, { "epoch": 0.394326176031244, "grad_norm": 0.8666413528933369, "learning_rate": 3.185216209701948e-06, "loss": 0.0324, "step": 94505 }, { "epoch": 0.3943470387462343, "grad_norm": 0.48566119825753823, "learning_rate": 3.185131951632894e-06, "loss": 0.021, "step": 94510 }, { "epoch": 0.3943679014612246, "grad_norm": 0.5316418633473268, "learning_rate": 3.185047700250086e-06, "loss": 0.0216, "step": 94515 }, { "epoch": 0.3943887641762148, "grad_norm": 0.7061454991023146, "learning_rate": 3.18496345555264e-06, "loss": 0.0321, "step": 94520 }, { "epoch": 0.3944096268912051, "grad_norm": 0.7496562408570026, "learning_rate": 3.184879217539672e-06, "loss": 0.0261, "step": 94525 }, { "epoch": 0.39443048960619537, "grad_norm": 0.630525291016658, "learning_rate": 3.1847949862102977e-06, "loss": 0.033, "step": 94530 }, { "epoch": 0.39445135232118567, "grad_norm": 1.0705963337046887, "learning_rate": 3.184710761563634e-06, "loss": 0.029, "step": 94535 }, { "epoch": 0.39447221503617597, "grad_norm": 0.6623894302833192, "learning_rate": 3.184626543598796e-06, "loss": 0.0306, "step": 94540 }, { "epoch": 0.3944930777511662, "grad_norm": 0.8354794098498347, "learning_rate": 3.1845423323149023e-06, "loss": 0.0242, "step": 94545 }, { "epoch": 0.3945139404661565, "grad_norm": 0.878072533856653, "learning_rate": 3.184458127711068e-06, "loss": 0.0246, "step": 94550 }, { "epoch": 0.3945348031811468, "grad_norm": 1.2787964053658547, "learning_rate": 3.18437392978641e-06, "loss": 0.034, "step": 94555 }, { "epoch": 0.39455566589613705, "grad_norm": 0.6243432895213971, "learning_rate": 3.1842897385400466e-06, "loss": 0.0236, "step": 94560 }, { "epoch": 0.39457652861112735, "grad_norm": 0.8442204459813313, "learning_rate": 3.1842055539710937e-06, "loss": 0.0249, "step": 94565 }, { "epoch": 0.3945973913261176, "grad_norm": 0.6443628702542635, "learning_rate": 3.1841213760786692e-06, "loss": 0.0358, "step": 94570 }, { "epoch": 0.3946182540411079, "grad_norm": 0.936680428842705, "learning_rate": 3.1840372048618905e-06, "loss": 0.0284, "step": 94575 }, { "epoch": 0.3946391167560982, "grad_norm": 0.3480539947149294, "learning_rate": 3.183953040319876e-06, "loss": 0.0149, "step": 94580 }, { "epoch": 0.39465997947108844, "grad_norm": 0.2319360818072069, "learning_rate": 3.1838688824517427e-06, "loss": 0.0197, "step": 94585 }, { "epoch": 0.39468084218607874, "grad_norm": 2.2799306142964175, "learning_rate": 3.1837847312566087e-06, "loss": 0.0299, "step": 94590 }, { "epoch": 0.394701704901069, "grad_norm": 0.809828595274185, "learning_rate": 3.1837005867335923e-06, "loss": 0.0277, "step": 94595 }, { "epoch": 0.3947225676160593, "grad_norm": 0.4746236574474054, "learning_rate": 3.1836164488818124e-06, "loss": 0.0363, "step": 94600 }, { "epoch": 0.3947434303310496, "grad_norm": 0.8285008095467732, "learning_rate": 3.183532317700387e-06, "loss": 0.0223, "step": 94605 }, { "epoch": 0.3947642930460398, "grad_norm": 0.7212764721922887, "learning_rate": 3.1834481931884353e-06, "loss": 0.0363, "step": 94610 }, { "epoch": 0.3947851557610301, "grad_norm": 0.6656065980392436, "learning_rate": 3.183364075345075e-06, "loss": 0.0274, "step": 94615 }, { "epoch": 0.39480601847602037, "grad_norm": 0.785853425627069, "learning_rate": 3.1832799641694263e-06, "loss": 0.0279, "step": 94620 }, { "epoch": 0.39482688119101067, "grad_norm": 0.669185016491617, "learning_rate": 3.1831958596606075e-06, "loss": 0.0225, "step": 94625 }, { "epoch": 0.39484774390600097, "grad_norm": 0.5943736477783448, "learning_rate": 3.1831117618177386e-06, "loss": 0.0285, "step": 94630 }, { "epoch": 0.3948686066209912, "grad_norm": 1.0721775964891755, "learning_rate": 3.183027670639939e-06, "loss": 0.0248, "step": 94635 }, { "epoch": 0.3948894693359815, "grad_norm": 0.6819910166465024, "learning_rate": 3.182943586126328e-06, "loss": 0.0299, "step": 94640 }, { "epoch": 0.3949103320509718, "grad_norm": 0.5347745578496023, "learning_rate": 3.1828595082760257e-06, "loss": 0.0196, "step": 94645 }, { "epoch": 0.39493119476596206, "grad_norm": 0.99741805452284, "learning_rate": 3.1827754370881516e-06, "loss": 0.0257, "step": 94650 }, { "epoch": 0.39495205748095236, "grad_norm": 1.2030574191099488, "learning_rate": 3.182691372561827e-06, "loss": 0.0385, "step": 94655 }, { "epoch": 0.3949729201959426, "grad_norm": 0.8829607136376652, "learning_rate": 3.1826073146961708e-06, "loss": 0.0306, "step": 94660 }, { "epoch": 0.3949937829109329, "grad_norm": 0.8298774039336667, "learning_rate": 3.182523263490305e-06, "loss": 0.0267, "step": 94665 }, { "epoch": 0.3950146456259232, "grad_norm": 0.68467684967727, "learning_rate": 3.182439218943348e-06, "loss": 0.0244, "step": 94670 }, { "epoch": 0.39503550834091344, "grad_norm": 0.5242386789082266, "learning_rate": 3.182355181054423e-06, "loss": 0.0199, "step": 94675 }, { "epoch": 0.39505637105590374, "grad_norm": 1.0573860996602131, "learning_rate": 3.18227114982265e-06, "loss": 0.0276, "step": 94680 }, { "epoch": 0.395077233770894, "grad_norm": 0.7057517607070031, "learning_rate": 3.18218712524715e-06, "loss": 0.0299, "step": 94685 }, { "epoch": 0.3950980964858843, "grad_norm": 0.6852776310800959, "learning_rate": 3.1821031073270436e-06, "loss": 0.0299, "step": 94690 }, { "epoch": 0.3951189592008746, "grad_norm": 0.6034312160620168, "learning_rate": 3.1820190960614535e-06, "loss": 0.0201, "step": 94695 }, { "epoch": 0.39513982191586483, "grad_norm": 1.0518369133482248, "learning_rate": 3.181935091449501e-06, "loss": 0.0261, "step": 94700 }, { "epoch": 0.39516068463085513, "grad_norm": 0.625974975386494, "learning_rate": 3.181851093490308e-06, "loss": 0.025, "step": 94705 }, { "epoch": 0.3951815473458454, "grad_norm": 0.635525194250619, "learning_rate": 3.1817671021829948e-06, "loss": 0.0226, "step": 94710 }, { "epoch": 0.3952024100608357, "grad_norm": 0.9153257635807319, "learning_rate": 3.181683117526686e-06, "loss": 0.0275, "step": 94715 }, { "epoch": 0.395223272775826, "grad_norm": 0.4056428919777683, "learning_rate": 3.181599139520502e-06, "loss": 0.0307, "step": 94720 }, { "epoch": 0.3952441354908162, "grad_norm": 0.6703487806998394, "learning_rate": 3.181515168163567e-06, "loss": 0.0245, "step": 94725 }, { "epoch": 0.3952649982058065, "grad_norm": 0.7736366503717396, "learning_rate": 3.1814312034550015e-06, "loss": 0.0279, "step": 94730 }, { "epoch": 0.39528586092079676, "grad_norm": 0.9373370567665668, "learning_rate": 3.1813472453939293e-06, "loss": 0.0275, "step": 94735 }, { "epoch": 0.39530672363578706, "grad_norm": 1.14223619373537, "learning_rate": 3.1812632939794734e-06, "loss": 0.0333, "step": 94740 }, { "epoch": 0.39532758635077736, "grad_norm": 1.0336136285964252, "learning_rate": 3.1811793492107567e-06, "loss": 0.0253, "step": 94745 }, { "epoch": 0.3953484490657676, "grad_norm": 0.8424378281811357, "learning_rate": 3.1810954110869025e-06, "loss": 0.0346, "step": 94750 }, { "epoch": 0.3953693117807579, "grad_norm": 0.6123005016957606, "learning_rate": 3.1810114796070333e-06, "loss": 0.0224, "step": 94755 }, { "epoch": 0.3953901744957482, "grad_norm": 0.6920610243800088, "learning_rate": 3.180927554770275e-06, "loss": 0.0309, "step": 94760 }, { "epoch": 0.39541103721073845, "grad_norm": 0.9632944014319814, "learning_rate": 3.1808436365757495e-06, "loss": 0.0241, "step": 94765 }, { "epoch": 0.39543189992572875, "grad_norm": 0.6019728019808531, "learning_rate": 3.1807597250225802e-06, "loss": 0.0324, "step": 94770 }, { "epoch": 0.395452762640719, "grad_norm": 0.579485543067555, "learning_rate": 3.1806758201098926e-06, "loss": 0.0216, "step": 94775 }, { "epoch": 0.3954736253557093, "grad_norm": 0.5782773540877892, "learning_rate": 3.1805919218368096e-06, "loss": 0.0269, "step": 94780 }, { "epoch": 0.3954944880706996, "grad_norm": 0.868434768509149, "learning_rate": 3.1805080302024565e-06, "loss": 0.0287, "step": 94785 }, { "epoch": 0.39551535078568983, "grad_norm": 0.8440563389064168, "learning_rate": 3.1804241452059576e-06, "loss": 0.0247, "step": 94790 }, { "epoch": 0.39553621350068013, "grad_norm": 0.9740330842915104, "learning_rate": 3.180340266846438e-06, "loss": 0.0355, "step": 94795 }, { "epoch": 0.3955570762156704, "grad_norm": 1.1491493436613387, "learning_rate": 3.1802563951230216e-06, "loss": 0.0332, "step": 94800 }, { "epoch": 0.3955779389306607, "grad_norm": 0.7718520624057156, "learning_rate": 3.1801725300348335e-06, "loss": 0.0175, "step": 94805 }, { "epoch": 0.395598801645651, "grad_norm": 0.6397414681246878, "learning_rate": 3.1800886715809997e-06, "loss": 0.0282, "step": 94810 }, { "epoch": 0.3956196643606412, "grad_norm": 0.8727407139125625, "learning_rate": 3.1800048197606447e-06, "loss": 0.0238, "step": 94815 }, { "epoch": 0.3956405270756315, "grad_norm": 1.004880232395917, "learning_rate": 3.179920974572895e-06, "loss": 0.0323, "step": 94820 }, { "epoch": 0.39566138979062176, "grad_norm": 0.8587024901156987, "learning_rate": 3.1798371360168755e-06, "loss": 0.0319, "step": 94825 }, { "epoch": 0.39568225250561206, "grad_norm": 1.4158976414651272, "learning_rate": 3.1797533040917117e-06, "loss": 0.0253, "step": 94830 }, { "epoch": 0.39570311522060236, "grad_norm": 0.5850280009134753, "learning_rate": 3.1796694787965306e-06, "loss": 0.0206, "step": 94835 }, { "epoch": 0.3957239779355926, "grad_norm": 1.5065892290800083, "learning_rate": 3.179585660130457e-06, "loss": 0.0283, "step": 94840 }, { "epoch": 0.3957448406505829, "grad_norm": 0.7315260987892384, "learning_rate": 3.1795018480926185e-06, "loss": 0.0273, "step": 94845 }, { "epoch": 0.3957657033655732, "grad_norm": 0.863261598526113, "learning_rate": 3.1794180426821415e-06, "loss": 0.036, "step": 94850 }, { "epoch": 0.39578656608056345, "grad_norm": 1.3200561388183452, "learning_rate": 3.179334243898151e-06, "loss": 0.0301, "step": 94855 }, { "epoch": 0.39580742879555375, "grad_norm": 0.8548237241690918, "learning_rate": 3.1792504517397767e-06, "loss": 0.0303, "step": 94860 }, { "epoch": 0.395828291510544, "grad_norm": 0.5733955450787902, "learning_rate": 3.179166666206142e-06, "loss": 0.0241, "step": 94865 }, { "epoch": 0.3958491542255343, "grad_norm": 0.8177630516386479, "learning_rate": 3.179082887296377e-06, "loss": 0.0234, "step": 94870 }, { "epoch": 0.3958700169405246, "grad_norm": 0.6417835037082922, "learning_rate": 3.1789991150096073e-06, "loss": 0.024, "step": 94875 }, { "epoch": 0.39589087965551484, "grad_norm": 1.0602416501918617, "learning_rate": 3.1789153493449608e-06, "loss": 0.0277, "step": 94880 }, { "epoch": 0.39591174237050514, "grad_norm": 0.7353449249413198, "learning_rate": 3.1788315903015653e-06, "loss": 0.0161, "step": 94885 }, { "epoch": 0.3959326050854954, "grad_norm": 0.5464672315400204, "learning_rate": 3.1787478378785484e-06, "loss": 0.0278, "step": 94890 }, { "epoch": 0.3959534678004857, "grad_norm": 1.0451747469077173, "learning_rate": 3.1786640920750377e-06, "loss": 0.0246, "step": 94895 }, { "epoch": 0.395974330515476, "grad_norm": 0.8725853908315708, "learning_rate": 3.1785803528901616e-06, "loss": 0.0275, "step": 94900 }, { "epoch": 0.3959951932304662, "grad_norm": 0.3493524262073046, "learning_rate": 3.178496620323049e-06, "loss": 0.0346, "step": 94905 }, { "epoch": 0.3960160559454565, "grad_norm": 0.8186216456233075, "learning_rate": 3.178412894372827e-06, "loss": 0.0221, "step": 94910 }, { "epoch": 0.39603691866044677, "grad_norm": 0.5331200316140493, "learning_rate": 3.178329175038625e-06, "loss": 0.0206, "step": 94915 }, { "epoch": 0.39605778137543707, "grad_norm": 0.6330538326939263, "learning_rate": 3.1782454623195706e-06, "loss": 0.0258, "step": 94920 }, { "epoch": 0.39607864409042737, "grad_norm": 0.6730929474125638, "learning_rate": 3.1781617562147937e-06, "loss": 0.0221, "step": 94925 }, { "epoch": 0.3960995068054176, "grad_norm": 1.2115254554777612, "learning_rate": 3.178078056723424e-06, "loss": 0.0284, "step": 94930 }, { "epoch": 0.3961203695204079, "grad_norm": 0.9477730514949104, "learning_rate": 3.177994363844589e-06, "loss": 0.0217, "step": 94935 }, { "epoch": 0.3961412322353982, "grad_norm": 0.6959610560788396, "learning_rate": 3.177910677577419e-06, "loss": 0.0296, "step": 94940 }, { "epoch": 0.39616209495038845, "grad_norm": 0.71170122441759, "learning_rate": 3.177826997921044e-06, "loss": 0.0258, "step": 94945 }, { "epoch": 0.39618295766537875, "grad_norm": 0.6846194711067622, "learning_rate": 3.177743324874593e-06, "loss": 0.0293, "step": 94950 }, { "epoch": 0.396203820380369, "grad_norm": 1.3547433716910777, "learning_rate": 3.177659658437196e-06, "loss": 0.0223, "step": 94955 }, { "epoch": 0.3962246830953593, "grad_norm": 0.5740262003320883, "learning_rate": 3.1775759986079823e-06, "loss": 0.0246, "step": 94960 }, { "epoch": 0.3962455458103496, "grad_norm": 1.2113975120444738, "learning_rate": 3.1774923453860833e-06, "loss": 0.023, "step": 94965 }, { "epoch": 0.39626640852533984, "grad_norm": 0.6643706371852778, "learning_rate": 3.177408698770629e-06, "loss": 0.0203, "step": 94970 }, { "epoch": 0.39628727124033014, "grad_norm": 1.292562583472773, "learning_rate": 3.177325058760749e-06, "loss": 0.0271, "step": 94975 }, { "epoch": 0.3963081339553204, "grad_norm": 0.6476589346044241, "learning_rate": 3.1772414253555756e-06, "loss": 0.0193, "step": 94980 }, { "epoch": 0.3963289966703107, "grad_norm": 1.1719594718478794, "learning_rate": 3.1771577985542372e-06, "loss": 0.0276, "step": 94985 }, { "epoch": 0.396349859385301, "grad_norm": 0.9558423265582346, "learning_rate": 3.1770741783558667e-06, "loss": 0.0292, "step": 94990 }, { "epoch": 0.39637072210029123, "grad_norm": 0.7102707518591646, "learning_rate": 3.1769905647595958e-06, "loss": 0.0271, "step": 94995 }, { "epoch": 0.39639158481528153, "grad_norm": 0.6895826503090381, "learning_rate": 3.176906957764553e-06, "loss": 0.0249, "step": 95000 }, { "epoch": 0.39641244753027177, "grad_norm": 0.3533998235943077, "learning_rate": 3.176823357369872e-06, "loss": 0.0206, "step": 95005 }, { "epoch": 0.39643331024526207, "grad_norm": 0.9177780757395732, "learning_rate": 3.1767397635746834e-06, "loss": 0.0385, "step": 95010 }, { "epoch": 0.39645417296025237, "grad_norm": 0.684121288011764, "learning_rate": 3.1766561763781196e-06, "loss": 0.0196, "step": 95015 }, { "epoch": 0.3964750356752426, "grad_norm": 0.5722183282532335, "learning_rate": 3.176572595779312e-06, "loss": 0.0227, "step": 95020 }, { "epoch": 0.3964958983902329, "grad_norm": 0.26805644676241064, "learning_rate": 3.1764890217773936e-06, "loss": 0.0181, "step": 95025 }, { "epoch": 0.3965167611052232, "grad_norm": 0.656304015050541, "learning_rate": 3.176405454371496e-06, "loss": 0.0258, "step": 95030 }, { "epoch": 0.39653762382021346, "grad_norm": 1.1784268685908321, "learning_rate": 3.1763218935607507e-06, "loss": 0.0443, "step": 95035 }, { "epoch": 0.39655848653520376, "grad_norm": 0.6265469199500483, "learning_rate": 3.176238339344292e-06, "loss": 0.0348, "step": 95040 }, { "epoch": 0.396579349250194, "grad_norm": 0.7010511677398161, "learning_rate": 3.1761547917212517e-06, "loss": 0.0293, "step": 95045 }, { "epoch": 0.3966002119651843, "grad_norm": 0.7506745949894129, "learning_rate": 3.1760712506907626e-06, "loss": 0.019, "step": 95050 }, { "epoch": 0.3966210746801746, "grad_norm": 0.537073458312989, "learning_rate": 3.175987716251958e-06, "loss": 0.0335, "step": 95055 }, { "epoch": 0.39664193739516485, "grad_norm": 0.5266835976738861, "learning_rate": 3.1759041884039702e-06, "loss": 0.0339, "step": 95060 }, { "epoch": 0.39666280011015514, "grad_norm": 0.39546144507457326, "learning_rate": 3.175820667145934e-06, "loss": 0.0223, "step": 95065 }, { "epoch": 0.3966836628251454, "grad_norm": 0.7234681014358852, "learning_rate": 3.1757371524769825e-06, "loss": 0.0295, "step": 95070 }, { "epoch": 0.3967045255401357, "grad_norm": 0.6909256726278399, "learning_rate": 3.1756536443962487e-06, "loss": 0.0247, "step": 95075 }, { "epoch": 0.396725388255126, "grad_norm": 1.1340304642612362, "learning_rate": 3.175570142902867e-06, "loss": 0.0413, "step": 95080 }, { "epoch": 0.39674625097011623, "grad_norm": 0.7957498750392876, "learning_rate": 3.1754866479959717e-06, "loss": 0.0353, "step": 95085 }, { "epoch": 0.39676711368510653, "grad_norm": 0.7555831930748348, "learning_rate": 3.1754031596746963e-06, "loss": 0.0245, "step": 95090 }, { "epoch": 0.3967879764000968, "grad_norm": 1.3410519761340243, "learning_rate": 3.175319677938175e-06, "loss": 0.0351, "step": 95095 }, { "epoch": 0.3968088391150871, "grad_norm": 0.7176688072254944, "learning_rate": 3.1752362027855425e-06, "loss": 0.0213, "step": 95100 }, { "epoch": 0.3968297018300774, "grad_norm": 0.7648498992235205, "learning_rate": 3.1751527342159345e-06, "loss": 0.027, "step": 95105 }, { "epoch": 0.3968505645450676, "grad_norm": 0.2201320829589204, "learning_rate": 3.1750692722284844e-06, "loss": 0.0151, "step": 95110 }, { "epoch": 0.3968714272600579, "grad_norm": 1.1399021223994301, "learning_rate": 3.1749858168223274e-06, "loss": 0.0333, "step": 95115 }, { "epoch": 0.3968922899750482, "grad_norm": 0.45987476532137295, "learning_rate": 3.174902367996599e-06, "loss": 0.0271, "step": 95120 }, { "epoch": 0.39691315269003846, "grad_norm": 0.5579913670078659, "learning_rate": 3.1748189257504348e-06, "loss": 0.0292, "step": 95125 }, { "epoch": 0.39693401540502876, "grad_norm": 0.7069313880859717, "learning_rate": 3.1747354900829686e-06, "loss": 0.0285, "step": 95130 }, { "epoch": 0.396954878120019, "grad_norm": 0.5962136068137596, "learning_rate": 3.174652060993338e-06, "loss": 0.0321, "step": 95135 }, { "epoch": 0.3969757408350093, "grad_norm": 0.7830783042856979, "learning_rate": 3.1745686384806772e-06, "loss": 0.0204, "step": 95140 }, { "epoch": 0.3969966035499996, "grad_norm": 1.0754638315169793, "learning_rate": 3.1744852225441236e-06, "loss": 0.0327, "step": 95145 }, { "epoch": 0.39701746626498985, "grad_norm": 1.03944318353235, "learning_rate": 3.1744018131828123e-06, "loss": 0.0301, "step": 95150 }, { "epoch": 0.39703832897998015, "grad_norm": 1.2942913831361809, "learning_rate": 3.174318410395879e-06, "loss": 0.0306, "step": 95155 }, { "epoch": 0.3970591916949704, "grad_norm": 1.309130952949457, "learning_rate": 3.1742350141824617e-06, "loss": 0.0387, "step": 95160 }, { "epoch": 0.3970800544099607, "grad_norm": 1.24837234424597, "learning_rate": 3.1741516245416952e-06, "loss": 0.0349, "step": 95165 }, { "epoch": 0.397100917124951, "grad_norm": 1.248968268208486, "learning_rate": 3.1740682414727174e-06, "loss": 0.0266, "step": 95170 }, { "epoch": 0.39712177983994124, "grad_norm": 0.7061241417604327, "learning_rate": 3.1739848649746645e-06, "loss": 0.0245, "step": 95175 }, { "epoch": 0.39714264255493154, "grad_norm": 0.5418303747502748, "learning_rate": 3.173901495046674e-06, "loss": 0.0274, "step": 95180 }, { "epoch": 0.3971635052699218, "grad_norm": 0.5425937274308675, "learning_rate": 3.173818131687883e-06, "loss": 0.0284, "step": 95185 }, { "epoch": 0.3971843679849121, "grad_norm": 0.6507835200638304, "learning_rate": 3.1737347748974284e-06, "loss": 0.0212, "step": 95190 }, { "epoch": 0.3972052306999024, "grad_norm": 0.5623925225831878, "learning_rate": 3.1736514246744476e-06, "loss": 0.0253, "step": 95195 }, { "epoch": 0.3972260934148926, "grad_norm": 0.547945939290406, "learning_rate": 3.1735680810180792e-06, "loss": 0.03, "step": 95200 }, { "epoch": 0.3972469561298829, "grad_norm": 0.5098818442497228, "learning_rate": 3.17348474392746e-06, "loss": 0.0244, "step": 95205 }, { "epoch": 0.3972678188448732, "grad_norm": 0.8310230434891852, "learning_rate": 3.1734014134017294e-06, "loss": 0.0332, "step": 95210 }, { "epoch": 0.39728868155986347, "grad_norm": 0.8748356182524601, "learning_rate": 3.173318089440024e-06, "loss": 0.0328, "step": 95215 }, { "epoch": 0.39730954427485377, "grad_norm": 0.7418732488748219, "learning_rate": 3.173234772041483e-06, "loss": 0.0282, "step": 95220 }, { "epoch": 0.397330406989844, "grad_norm": 0.9275747722058278, "learning_rate": 3.1731514612052432e-06, "loss": 0.0243, "step": 95225 }, { "epoch": 0.3973512697048343, "grad_norm": 1.530366703326382, "learning_rate": 3.1730681569304456e-06, "loss": 0.022, "step": 95230 }, { "epoch": 0.3973721324198246, "grad_norm": 1.070485262356896, "learning_rate": 3.172984859216228e-06, "loss": 0.0217, "step": 95235 }, { "epoch": 0.39739299513481485, "grad_norm": 0.6350453942155557, "learning_rate": 3.1729015680617287e-06, "loss": 0.0231, "step": 95240 }, { "epoch": 0.39741385784980515, "grad_norm": 0.9113199388031855, "learning_rate": 3.1728182834660874e-06, "loss": 0.0336, "step": 95245 }, { "epoch": 0.3974347205647954, "grad_norm": 0.7838086600482356, "learning_rate": 3.1727350054284428e-06, "loss": 0.0306, "step": 95250 }, { "epoch": 0.3974555832797857, "grad_norm": 0.512985523227876, "learning_rate": 3.172651733947935e-06, "loss": 0.0184, "step": 95255 }, { "epoch": 0.397476445994776, "grad_norm": 0.9147641646751037, "learning_rate": 3.172568469023703e-06, "loss": 0.0279, "step": 95260 }, { "epoch": 0.39749730870976624, "grad_norm": 1.1792250195333303, "learning_rate": 3.1724852106548874e-06, "loss": 0.0317, "step": 95265 }, { "epoch": 0.39751817142475654, "grad_norm": 0.8882948795514275, "learning_rate": 3.1724019588406273e-06, "loss": 0.0262, "step": 95270 }, { "epoch": 0.3975390341397468, "grad_norm": 0.674323030052952, "learning_rate": 3.1723187135800627e-06, "loss": 0.0307, "step": 95275 }, { "epoch": 0.3975598968547371, "grad_norm": 0.643369328631131, "learning_rate": 3.1722354748723334e-06, "loss": 0.0216, "step": 95280 }, { "epoch": 0.3975807595697274, "grad_norm": 0.4959131149453975, "learning_rate": 3.1721522427165806e-06, "loss": 0.0247, "step": 95285 }, { "epoch": 0.3976016222847176, "grad_norm": 2.0629049248284956, "learning_rate": 3.172069017111945e-06, "loss": 0.0269, "step": 95290 }, { "epoch": 0.3976224849997079, "grad_norm": 0.8679552781444154, "learning_rate": 3.1719857980575663e-06, "loss": 0.0228, "step": 95295 }, { "epoch": 0.3976433477146982, "grad_norm": 0.6813463689451839, "learning_rate": 3.1719025855525853e-06, "loss": 0.0273, "step": 95300 }, { "epoch": 0.39766421042968847, "grad_norm": 0.7028964501730434, "learning_rate": 3.171819379596144e-06, "loss": 0.0239, "step": 95305 }, { "epoch": 0.39768507314467877, "grad_norm": 1.077338980117891, "learning_rate": 3.1717361801873823e-06, "loss": 0.0232, "step": 95310 }, { "epoch": 0.397705935859669, "grad_norm": 0.830765627470408, "learning_rate": 3.1716529873254435e-06, "loss": 0.02, "step": 95315 }, { "epoch": 0.3977267985746593, "grad_norm": 1.222767839158307, "learning_rate": 3.171569801009467e-06, "loss": 0.0285, "step": 95320 }, { "epoch": 0.3977476612896496, "grad_norm": 1.2044728022063667, "learning_rate": 3.171486621238595e-06, "loss": 0.0332, "step": 95325 }, { "epoch": 0.39776852400463986, "grad_norm": 0.35864990597693475, "learning_rate": 3.1714034480119695e-06, "loss": 0.0176, "step": 95330 }, { "epoch": 0.39778938671963016, "grad_norm": 0.8001991539202478, "learning_rate": 3.171320281328732e-06, "loss": 0.0331, "step": 95335 }, { "epoch": 0.3978102494346204, "grad_norm": 1.5995771765152547, "learning_rate": 3.171237121188025e-06, "loss": 0.0203, "step": 95340 }, { "epoch": 0.3978311121496107, "grad_norm": 0.8950852727887633, "learning_rate": 3.171153967588991e-06, "loss": 0.0274, "step": 95345 }, { "epoch": 0.397851974864601, "grad_norm": 1.3216222038197023, "learning_rate": 3.1710708205307716e-06, "loss": 0.0343, "step": 95350 }, { "epoch": 0.39787283757959124, "grad_norm": 1.086830802101143, "learning_rate": 3.1709876800125094e-06, "loss": 0.0308, "step": 95355 }, { "epoch": 0.39789370029458154, "grad_norm": 0.34744835827960246, "learning_rate": 3.1709045460333474e-06, "loss": 0.0184, "step": 95360 }, { "epoch": 0.3979145630095718, "grad_norm": 0.9643610929042271, "learning_rate": 3.170821418592429e-06, "loss": 0.0257, "step": 95365 }, { "epoch": 0.3979354257245621, "grad_norm": 0.9482047303093194, "learning_rate": 3.1707382976888973e-06, "loss": 0.0257, "step": 95370 }, { "epoch": 0.3979562884395524, "grad_norm": 0.6864824106714722, "learning_rate": 3.1706551833218947e-06, "loss": 0.0215, "step": 95375 }, { "epoch": 0.39797715115454263, "grad_norm": 0.6820750197429711, "learning_rate": 3.170572075490564e-06, "loss": 0.0256, "step": 95380 }, { "epoch": 0.39799801386953293, "grad_norm": 0.9876008990807794, "learning_rate": 3.1704889741940493e-06, "loss": 0.0322, "step": 95385 }, { "epoch": 0.39801887658452323, "grad_norm": 0.5781426686236842, "learning_rate": 3.1704058794314955e-06, "loss": 0.0377, "step": 95390 }, { "epoch": 0.3980397392995135, "grad_norm": 0.7708164176673119, "learning_rate": 3.1703227912020446e-06, "loss": 0.0249, "step": 95395 }, { "epoch": 0.3980606020145038, "grad_norm": 1.0963275715557528, "learning_rate": 3.1702397095048415e-06, "loss": 0.0239, "step": 95400 }, { "epoch": 0.398081464729494, "grad_norm": 1.0325159176912593, "learning_rate": 3.1701566343390294e-06, "loss": 0.0292, "step": 95405 }, { "epoch": 0.3981023274444843, "grad_norm": 0.794013016977945, "learning_rate": 3.170073565703754e-06, "loss": 0.0245, "step": 95410 }, { "epoch": 0.3981231901594746, "grad_norm": 0.4035444623078234, "learning_rate": 3.169990503598159e-06, "loss": 0.0229, "step": 95415 }, { "epoch": 0.39814405287446486, "grad_norm": 0.7472624668589283, "learning_rate": 3.169907448021389e-06, "loss": 0.0388, "step": 95420 }, { "epoch": 0.39816491558945516, "grad_norm": 0.9024655937393209, "learning_rate": 3.1698243989725885e-06, "loss": 0.0223, "step": 95425 }, { "epoch": 0.3981857783044454, "grad_norm": 0.504129728821226, "learning_rate": 3.1697413564509027e-06, "loss": 0.0194, "step": 95430 }, { "epoch": 0.3982066410194357, "grad_norm": 0.5705321954506611, "learning_rate": 3.169658320455477e-06, "loss": 0.0263, "step": 95435 }, { "epoch": 0.398227503734426, "grad_norm": 1.083482489162741, "learning_rate": 3.169575290985456e-06, "loss": 0.0275, "step": 95440 }, { "epoch": 0.39824836644941625, "grad_norm": 0.8855331133729639, "learning_rate": 3.1694922680399853e-06, "loss": 0.028, "step": 95445 }, { "epoch": 0.39826922916440655, "grad_norm": 1.0491238360561546, "learning_rate": 3.1694092516182107e-06, "loss": 0.0232, "step": 95450 }, { "epoch": 0.3982900918793968, "grad_norm": 1.8412064666100612, "learning_rate": 3.169326241719277e-06, "loss": 0.0267, "step": 95455 }, { "epoch": 0.3983109545943871, "grad_norm": 0.9635173567480517, "learning_rate": 3.169243238342331e-06, "loss": 0.0325, "step": 95460 }, { "epoch": 0.3983318173093774, "grad_norm": 0.5899101820739482, "learning_rate": 3.1691602414865185e-06, "loss": 0.0224, "step": 95465 }, { "epoch": 0.39835268002436763, "grad_norm": 1.5391404329454983, "learning_rate": 3.1690772511509858e-06, "loss": 0.0332, "step": 95470 }, { "epoch": 0.39837354273935793, "grad_norm": 0.8848490739085545, "learning_rate": 3.168994267334879e-06, "loss": 0.0227, "step": 95475 }, { "epoch": 0.39839440545434823, "grad_norm": 0.5838992094460883, "learning_rate": 3.1689112900373443e-06, "loss": 0.0263, "step": 95480 }, { "epoch": 0.3984152681693385, "grad_norm": 0.607011006222043, "learning_rate": 3.1688283192575285e-06, "loss": 0.0219, "step": 95485 }, { "epoch": 0.3984361308843288, "grad_norm": 0.5647507957484726, "learning_rate": 3.1687453549945783e-06, "loss": 0.0249, "step": 95490 }, { "epoch": 0.398456993599319, "grad_norm": 0.608319808091064, "learning_rate": 3.1686623972476412e-06, "loss": 0.018, "step": 95495 }, { "epoch": 0.3984778563143093, "grad_norm": 1.07481575477905, "learning_rate": 3.1685794460158635e-06, "loss": 0.0247, "step": 95500 }, { "epoch": 0.3984987190292996, "grad_norm": 3.2551965531755944, "learning_rate": 3.1684965012983933e-06, "loss": 0.0222, "step": 95505 }, { "epoch": 0.39851958174428986, "grad_norm": 0.8061759317367825, "learning_rate": 3.1684135630943775e-06, "loss": 0.0269, "step": 95510 }, { "epoch": 0.39854044445928016, "grad_norm": 0.7162941548512775, "learning_rate": 3.1683306314029637e-06, "loss": 0.0296, "step": 95515 }, { "epoch": 0.3985613071742704, "grad_norm": 0.9661616736655908, "learning_rate": 3.168247706223299e-06, "loss": 0.0348, "step": 95520 }, { "epoch": 0.3985821698892607, "grad_norm": 0.6531565553964985, "learning_rate": 3.168164787554533e-06, "loss": 0.0306, "step": 95525 }, { "epoch": 0.398603032604251, "grad_norm": 0.6420910407401592, "learning_rate": 3.168081875395812e-06, "loss": 0.0192, "step": 95530 }, { "epoch": 0.39862389531924125, "grad_norm": 0.5460751224209428, "learning_rate": 3.1679989697462855e-06, "loss": 0.0268, "step": 95535 }, { "epoch": 0.39864475803423155, "grad_norm": 1.3150931998269064, "learning_rate": 3.1679160706051003e-06, "loss": 0.0341, "step": 95540 }, { "epoch": 0.3986656207492218, "grad_norm": 0.5152865374257958, "learning_rate": 3.1678331779714065e-06, "loss": 0.0241, "step": 95545 }, { "epoch": 0.3986864834642121, "grad_norm": 0.7832406347132711, "learning_rate": 3.167750291844352e-06, "loss": 0.0292, "step": 95550 }, { "epoch": 0.3987073461792024, "grad_norm": 0.43699615663017255, "learning_rate": 3.1676674122230853e-06, "loss": 0.0221, "step": 95555 }, { "epoch": 0.39872820889419264, "grad_norm": 1.5193685677771058, "learning_rate": 3.167584539106756e-06, "loss": 0.0308, "step": 95560 }, { "epoch": 0.39874907160918294, "grad_norm": 1.1321294530365913, "learning_rate": 3.167501672494513e-06, "loss": 0.027, "step": 95565 }, { "epoch": 0.39876993432417324, "grad_norm": 0.4992866211824056, "learning_rate": 3.1674188123855054e-06, "loss": 0.032, "step": 95570 }, { "epoch": 0.3987907970391635, "grad_norm": 1.0409779413396232, "learning_rate": 3.167335958778883e-06, "loss": 0.0293, "step": 95575 }, { "epoch": 0.3988116597541538, "grad_norm": 0.8958371803686419, "learning_rate": 3.1672531116737952e-06, "loss": 0.031, "step": 95580 }, { "epoch": 0.398832522469144, "grad_norm": 0.8115541505509996, "learning_rate": 3.167170271069392e-06, "loss": 0.0286, "step": 95585 }, { "epoch": 0.3988533851841343, "grad_norm": 0.7030901926339114, "learning_rate": 3.167087436964823e-06, "loss": 0.0302, "step": 95590 }, { "epoch": 0.3988742478991246, "grad_norm": 0.8361549291196155, "learning_rate": 3.167004609359238e-06, "loss": 0.0187, "step": 95595 }, { "epoch": 0.39889511061411487, "grad_norm": 0.44056333229846245, "learning_rate": 3.1669217882517877e-06, "loss": 0.0228, "step": 95600 }, { "epoch": 0.39891597332910517, "grad_norm": 0.4529754629178532, "learning_rate": 3.166838973641623e-06, "loss": 0.0214, "step": 95605 }, { "epoch": 0.3989368360440954, "grad_norm": 0.8131519353601268, "learning_rate": 3.166756165527893e-06, "loss": 0.0305, "step": 95610 }, { "epoch": 0.3989576987590857, "grad_norm": 1.6747216770099236, "learning_rate": 3.1666733639097487e-06, "loss": 0.0336, "step": 95615 }, { "epoch": 0.398978561474076, "grad_norm": 0.705168388587501, "learning_rate": 3.1665905687863426e-06, "loss": 0.0216, "step": 95620 }, { "epoch": 0.39899942418906625, "grad_norm": 0.781122413807714, "learning_rate": 3.1665077801568234e-06, "loss": 0.0267, "step": 95625 }, { "epoch": 0.39902028690405655, "grad_norm": 1.9239349493738718, "learning_rate": 3.1664249980203437e-06, "loss": 0.037, "step": 95630 }, { "epoch": 0.3990411496190468, "grad_norm": 0.5488588433144864, "learning_rate": 3.1663422223760544e-06, "loss": 0.0288, "step": 95635 }, { "epoch": 0.3990620123340371, "grad_norm": 0.5327147474069108, "learning_rate": 3.166259453223107e-06, "loss": 0.0234, "step": 95640 }, { "epoch": 0.3990828750490274, "grad_norm": 1.1034027713061665, "learning_rate": 3.1661766905606526e-06, "loss": 0.0294, "step": 95645 }, { "epoch": 0.39910373776401764, "grad_norm": 0.5949673134968165, "learning_rate": 3.1660939343878444e-06, "loss": 0.0297, "step": 95650 }, { "epoch": 0.39912460047900794, "grad_norm": 1.726090040921141, "learning_rate": 3.1660111847038333e-06, "loss": 0.0308, "step": 95655 }, { "epoch": 0.39914546319399824, "grad_norm": 1.0368772771613022, "learning_rate": 3.165928441507771e-06, "loss": 0.0276, "step": 95660 }, { "epoch": 0.3991663259089885, "grad_norm": 0.3827084430491593, "learning_rate": 3.1658457047988105e-06, "loss": 0.0195, "step": 95665 }, { "epoch": 0.3991871886239788, "grad_norm": 0.36667556202146684, "learning_rate": 3.1657629745761035e-06, "loss": 0.0186, "step": 95670 }, { "epoch": 0.399208051338969, "grad_norm": 0.8606129292493956, "learning_rate": 3.1656802508388033e-06, "loss": 0.0201, "step": 95675 }, { "epoch": 0.3992289140539593, "grad_norm": 0.82798565638768, "learning_rate": 3.165597533586062e-06, "loss": 0.0261, "step": 95680 }, { "epoch": 0.3992497767689496, "grad_norm": 0.9237473467380014, "learning_rate": 3.1655148228170334e-06, "loss": 0.0269, "step": 95685 }, { "epoch": 0.39927063948393987, "grad_norm": 0.5040525917477853, "learning_rate": 3.1654321185308695e-06, "loss": 0.0226, "step": 95690 }, { "epoch": 0.39929150219893017, "grad_norm": 0.7398788706123645, "learning_rate": 3.165349420726723e-06, "loss": 0.0279, "step": 95695 }, { "epoch": 0.3993123649139204, "grad_norm": 0.5954198233575411, "learning_rate": 3.165266729403749e-06, "loss": 0.036, "step": 95700 }, { "epoch": 0.3993332276289107, "grad_norm": 0.34349202773736126, "learning_rate": 3.1651840445611e-06, "loss": 0.0213, "step": 95705 }, { "epoch": 0.399354090343901, "grad_norm": 0.9526857379115715, "learning_rate": 3.1651013661979292e-06, "loss": 0.0255, "step": 95710 }, { "epoch": 0.39937495305889126, "grad_norm": 0.5786035927791725, "learning_rate": 3.1650186943133908e-06, "loss": 0.0274, "step": 95715 }, { "epoch": 0.39939581577388156, "grad_norm": 0.49660540384289786, "learning_rate": 3.164936028906639e-06, "loss": 0.0227, "step": 95720 }, { "epoch": 0.3994166784888718, "grad_norm": 0.671018404065046, "learning_rate": 3.1648533699768273e-06, "loss": 0.0218, "step": 95725 }, { "epoch": 0.3994375412038621, "grad_norm": 0.920373590477084, "learning_rate": 3.1647707175231103e-06, "loss": 0.022, "step": 95730 }, { "epoch": 0.3994584039188524, "grad_norm": 0.7552081476299556, "learning_rate": 3.1646880715446426e-06, "loss": 0.0224, "step": 95735 }, { "epoch": 0.39947926663384264, "grad_norm": 1.491956308805199, "learning_rate": 3.1646054320405788e-06, "loss": 0.0387, "step": 95740 }, { "epoch": 0.39950012934883294, "grad_norm": 0.668061940393281, "learning_rate": 3.164522799010073e-06, "loss": 0.0248, "step": 95745 }, { "epoch": 0.39952099206382324, "grad_norm": 0.8018974386249178, "learning_rate": 3.1644401724522806e-06, "loss": 0.0289, "step": 95750 }, { "epoch": 0.3995418547788135, "grad_norm": 0.7730860745307361, "learning_rate": 3.164357552366356e-06, "loss": 0.0266, "step": 95755 }, { "epoch": 0.3995627174938038, "grad_norm": 0.9821660128243586, "learning_rate": 3.1642749387514553e-06, "loss": 0.0263, "step": 95760 }, { "epoch": 0.39958358020879403, "grad_norm": 0.5195951206588302, "learning_rate": 3.1641923316067335e-06, "loss": 0.0284, "step": 95765 }, { "epoch": 0.39960444292378433, "grad_norm": 1.457162721878665, "learning_rate": 3.164109730931346e-06, "loss": 0.0302, "step": 95770 }, { "epoch": 0.39962530563877463, "grad_norm": 0.5786044128301311, "learning_rate": 3.164027136724448e-06, "loss": 0.0266, "step": 95775 }, { "epoch": 0.3996461683537649, "grad_norm": 0.4133403637192662, "learning_rate": 3.163944548985196e-06, "loss": 0.0257, "step": 95780 }, { "epoch": 0.3996670310687552, "grad_norm": 0.5341212669680704, "learning_rate": 3.1638619677127453e-06, "loss": 0.026, "step": 95785 }, { "epoch": 0.3996878937837454, "grad_norm": 1.4818543143844745, "learning_rate": 3.1637793929062527e-06, "loss": 0.0215, "step": 95790 }, { "epoch": 0.3997087564987357, "grad_norm": 0.8454954418521633, "learning_rate": 3.1636968245648738e-06, "loss": 0.0325, "step": 95795 }, { "epoch": 0.399729619213726, "grad_norm": 0.3704171047284684, "learning_rate": 3.1636142626877657e-06, "loss": 0.025, "step": 95800 }, { "epoch": 0.39975048192871626, "grad_norm": 0.789670142497836, "learning_rate": 3.163531707274084e-06, "loss": 0.0349, "step": 95805 }, { "epoch": 0.39977134464370656, "grad_norm": 0.8413832765410014, "learning_rate": 3.1634491583229867e-06, "loss": 0.0313, "step": 95810 }, { "epoch": 0.3997922073586968, "grad_norm": 0.39967186240574504, "learning_rate": 3.163366615833629e-06, "loss": 0.0235, "step": 95815 }, { "epoch": 0.3998130700736871, "grad_norm": 0.887477103566393, "learning_rate": 3.1632840798051694e-06, "loss": 0.0263, "step": 95820 }, { "epoch": 0.3998339327886774, "grad_norm": 1.156246205589553, "learning_rate": 3.1632015502367647e-06, "loss": 0.0278, "step": 95825 }, { "epoch": 0.39985479550366765, "grad_norm": 0.8332009719010792, "learning_rate": 3.163119027127572e-06, "loss": 0.0291, "step": 95830 }, { "epoch": 0.39987565821865795, "grad_norm": 0.8156237981759555, "learning_rate": 3.1630365104767492e-06, "loss": 0.0219, "step": 95835 }, { "epoch": 0.39989652093364825, "grad_norm": 0.5335991454624156, "learning_rate": 3.1629540002834537e-06, "loss": 0.0237, "step": 95840 }, { "epoch": 0.3999173836486385, "grad_norm": 0.6759808713498598, "learning_rate": 3.1628714965468425e-06, "loss": 0.0217, "step": 95845 }, { "epoch": 0.3999382463636288, "grad_norm": 0.4574198991946599, "learning_rate": 3.1627889992660743e-06, "loss": 0.0326, "step": 95850 }, { "epoch": 0.39995910907861904, "grad_norm": 0.3028141966272517, "learning_rate": 3.162706508440308e-06, "loss": 0.0205, "step": 95855 }, { "epoch": 0.39997997179360933, "grad_norm": 1.232495466128313, "learning_rate": 3.1626240240687006e-06, "loss": 0.0476, "step": 95860 }, { "epoch": 0.40000083450859963, "grad_norm": 1.3282438826653669, "learning_rate": 3.162541546150411e-06, "loss": 0.0294, "step": 95865 }, { "epoch": 0.4000216972235899, "grad_norm": 0.8301637587954261, "learning_rate": 3.162459074684598e-06, "loss": 0.0302, "step": 95870 }, { "epoch": 0.4000425599385802, "grad_norm": 0.7592007121846486, "learning_rate": 3.1623766096704195e-06, "loss": 0.0323, "step": 95875 }, { "epoch": 0.4000634226535704, "grad_norm": 0.38603459486074243, "learning_rate": 3.1622941511070347e-06, "loss": 0.028, "step": 95880 }, { "epoch": 0.4000842853685607, "grad_norm": 0.7671167909157834, "learning_rate": 3.1622116989936034e-06, "loss": 0.029, "step": 95885 }, { "epoch": 0.400105148083551, "grad_norm": 0.38694336368750065, "learning_rate": 3.162129253329285e-06, "loss": 0.0219, "step": 95890 }, { "epoch": 0.40012601079854127, "grad_norm": 0.46045952531003925, "learning_rate": 3.1620468141132366e-06, "loss": 0.0268, "step": 95895 }, { "epoch": 0.40014687351353156, "grad_norm": 1.0036576603718814, "learning_rate": 3.16196438134462e-06, "loss": 0.0364, "step": 95900 }, { "epoch": 0.4001677362285218, "grad_norm": 0.5115910346073864, "learning_rate": 3.161881955022594e-06, "loss": 0.0177, "step": 95905 }, { "epoch": 0.4001885989435121, "grad_norm": 1.1113031064369765, "learning_rate": 3.161799535146318e-06, "loss": 0.0352, "step": 95910 }, { "epoch": 0.4002094616585024, "grad_norm": 1.0780334966816325, "learning_rate": 3.1617171217149525e-06, "loss": 0.0309, "step": 95915 }, { "epoch": 0.40023032437349265, "grad_norm": 0.8214447353946268, "learning_rate": 3.161634714727657e-06, "loss": 0.0246, "step": 95920 }, { "epoch": 0.40025118708848295, "grad_norm": 1.4649483320994516, "learning_rate": 3.1615523141835922e-06, "loss": 0.0368, "step": 95925 }, { "epoch": 0.40027204980347325, "grad_norm": 0.8719633652195549, "learning_rate": 3.1614699200819194e-06, "loss": 0.0274, "step": 95930 }, { "epoch": 0.4002929125184635, "grad_norm": 0.2644529710269318, "learning_rate": 3.161387532421797e-06, "loss": 0.0253, "step": 95935 }, { "epoch": 0.4003137752334538, "grad_norm": 0.7059575123710418, "learning_rate": 3.161305151202388e-06, "loss": 0.0228, "step": 95940 }, { "epoch": 0.40033463794844404, "grad_norm": 0.6522282360144668, "learning_rate": 3.1612227764228515e-06, "loss": 0.0269, "step": 95945 }, { "epoch": 0.40035550066343434, "grad_norm": 0.6842223524535513, "learning_rate": 3.16114040808235e-06, "loss": 0.0327, "step": 95950 }, { "epoch": 0.40037636337842464, "grad_norm": 0.4046981518551201, "learning_rate": 3.1610580461800434e-06, "loss": 0.02, "step": 95955 }, { "epoch": 0.4003972260934149, "grad_norm": 0.4317572322273775, "learning_rate": 3.1609756907150936e-06, "loss": 0.0208, "step": 95960 }, { "epoch": 0.4004180888084052, "grad_norm": 2.55152529283782, "learning_rate": 3.1608933416866616e-06, "loss": 0.0269, "step": 95965 }, { "epoch": 0.4004389515233954, "grad_norm": 0.8744311743423533, "learning_rate": 3.1608109990939093e-06, "loss": 0.0335, "step": 95970 }, { "epoch": 0.4004598142383857, "grad_norm": 0.4916678036889124, "learning_rate": 3.1607286629359994e-06, "loss": 0.0263, "step": 95975 }, { "epoch": 0.400480676953376, "grad_norm": 0.42853932028694225, "learning_rate": 3.160646333212093e-06, "loss": 0.0219, "step": 95980 }, { "epoch": 0.40050153966836627, "grad_norm": 1.0129156400819628, "learning_rate": 3.160564009921352e-06, "loss": 0.0282, "step": 95985 }, { "epoch": 0.40052240238335657, "grad_norm": 0.8275968190454992, "learning_rate": 3.160481693062939e-06, "loss": 0.0281, "step": 95990 }, { "epoch": 0.4005432650983468, "grad_norm": 0.5627194631155618, "learning_rate": 3.1603993826360162e-06, "loss": 0.0237, "step": 95995 }, { "epoch": 0.4005641278133371, "grad_norm": 0.572686200167562, "learning_rate": 3.160317078639746e-06, "loss": 0.0283, "step": 96000 }, { "epoch": 0.4005849905283274, "grad_norm": 0.46324178317125453, "learning_rate": 3.1602347810732913e-06, "loss": 0.0192, "step": 96005 }, { "epoch": 0.40060585324331766, "grad_norm": 3.3933208888382485, "learning_rate": 3.1601524899358153e-06, "loss": 0.0368, "step": 96010 }, { "epoch": 0.40062671595830796, "grad_norm": 0.5596608917683119, "learning_rate": 3.1600702052264808e-06, "loss": 0.0237, "step": 96015 }, { "epoch": 0.40064757867329825, "grad_norm": 0.638225775278338, "learning_rate": 3.1599879269444505e-06, "loss": 0.0228, "step": 96020 }, { "epoch": 0.4006684413882885, "grad_norm": 0.9432761003943358, "learning_rate": 3.1599056550888884e-06, "loss": 0.0345, "step": 96025 }, { "epoch": 0.4006893041032788, "grad_norm": 0.9398461780433498, "learning_rate": 3.159823389658957e-06, "loss": 0.023, "step": 96030 }, { "epoch": 0.40071016681826904, "grad_norm": 0.7157741686113008, "learning_rate": 3.1597411306538213e-06, "loss": 0.0281, "step": 96035 }, { "epoch": 0.40073102953325934, "grad_norm": 0.8788231730934308, "learning_rate": 3.1596588780726438e-06, "loss": 0.0259, "step": 96040 }, { "epoch": 0.40075189224824964, "grad_norm": 1.016694628994065, "learning_rate": 3.1595766319145887e-06, "loss": 0.0286, "step": 96045 }, { "epoch": 0.4007727549632399, "grad_norm": 0.5540693667488279, "learning_rate": 3.159494392178821e-06, "loss": 0.0248, "step": 96050 }, { "epoch": 0.4007936176782302, "grad_norm": 0.4687099899669583, "learning_rate": 3.1594121588645035e-06, "loss": 0.025, "step": 96055 }, { "epoch": 0.40081448039322043, "grad_norm": 0.5259086827468656, "learning_rate": 3.1593299319708016e-06, "loss": 0.038, "step": 96060 }, { "epoch": 0.40083534310821073, "grad_norm": 1.828618548534608, "learning_rate": 3.1592477114968794e-06, "loss": 0.0359, "step": 96065 }, { "epoch": 0.40085620582320103, "grad_norm": 0.8445105646788855, "learning_rate": 3.1591654974419016e-06, "loss": 0.026, "step": 96070 }, { "epoch": 0.4008770685381913, "grad_norm": 0.37462939608475043, "learning_rate": 3.159083289805034e-06, "loss": 0.0278, "step": 96075 }, { "epoch": 0.40089793125318157, "grad_norm": 0.9876079195880525, "learning_rate": 3.1590010885854394e-06, "loss": 0.0249, "step": 96080 }, { "epoch": 0.4009187939681718, "grad_norm": 1.0395267837338267, "learning_rate": 3.1589188937822847e-06, "loss": 0.0288, "step": 96085 }, { "epoch": 0.4009396566831621, "grad_norm": 0.5642135502642799, "learning_rate": 3.1588367053947345e-06, "loss": 0.0321, "step": 96090 }, { "epoch": 0.4009605193981524, "grad_norm": 0.7748537030736256, "learning_rate": 3.158754523421955e-06, "loss": 0.0313, "step": 96095 }, { "epoch": 0.40098138211314266, "grad_norm": 1.0566757345818598, "learning_rate": 3.1586723478631114e-06, "loss": 0.0338, "step": 96100 }, { "epoch": 0.40100224482813296, "grad_norm": 0.7914333655668818, "learning_rate": 3.158590178717369e-06, "loss": 0.0258, "step": 96105 }, { "epoch": 0.40102310754312326, "grad_norm": 0.378923824294084, "learning_rate": 3.1585080159838943e-06, "loss": 0.0254, "step": 96110 }, { "epoch": 0.4010439702581135, "grad_norm": 0.902336401266407, "learning_rate": 3.158425859661852e-06, "loss": 0.03, "step": 96115 }, { "epoch": 0.4010648329731038, "grad_norm": 0.39818272154088225, "learning_rate": 3.1583437097504106e-06, "loss": 0.0406, "step": 96120 }, { "epoch": 0.40108569568809405, "grad_norm": 0.7055131916692423, "learning_rate": 3.1582615662487346e-06, "loss": 0.0216, "step": 96125 }, { "epoch": 0.40110655840308435, "grad_norm": 0.6143792876985597, "learning_rate": 3.158179429155991e-06, "loss": 0.0271, "step": 96130 }, { "epoch": 0.40112742111807465, "grad_norm": 0.5769726420622758, "learning_rate": 3.1580972984713476e-06, "loss": 0.0321, "step": 96135 }, { "epoch": 0.4011482838330649, "grad_norm": 0.9915846462010173, "learning_rate": 3.158015174193969e-06, "loss": 0.027, "step": 96140 }, { "epoch": 0.4011691465480552, "grad_norm": 0.6491790518431035, "learning_rate": 3.1579330563230236e-06, "loss": 0.021, "step": 96145 }, { "epoch": 0.40119000926304543, "grad_norm": 0.43696742592060817, "learning_rate": 3.1578509448576787e-06, "loss": 0.0247, "step": 96150 }, { "epoch": 0.40121087197803573, "grad_norm": 0.5004155344256095, "learning_rate": 3.1577688397971006e-06, "loss": 0.0255, "step": 96155 }, { "epoch": 0.40123173469302603, "grad_norm": 1.0122599997138642, "learning_rate": 3.157686741140458e-06, "loss": 0.0451, "step": 96160 }, { "epoch": 0.4012525974080163, "grad_norm": 0.5776201054678827, "learning_rate": 3.157604648886917e-06, "loss": 0.0244, "step": 96165 }, { "epoch": 0.4012734601230066, "grad_norm": 0.744092549129122, "learning_rate": 3.157522563035646e-06, "loss": 0.0266, "step": 96170 }, { "epoch": 0.4012943228379968, "grad_norm": 0.5044566051908073, "learning_rate": 3.157440483585813e-06, "loss": 0.0227, "step": 96175 }, { "epoch": 0.4013151855529871, "grad_norm": 0.5198839337874452, "learning_rate": 3.1573584105365862e-06, "loss": 0.0251, "step": 96180 }, { "epoch": 0.4013360482679774, "grad_norm": 0.5338128726969593, "learning_rate": 3.157276343887133e-06, "loss": 0.0242, "step": 96185 }, { "epoch": 0.40135691098296766, "grad_norm": 0.8290319656321642, "learning_rate": 3.1571942836366225e-06, "loss": 0.0255, "step": 96190 }, { "epoch": 0.40137777369795796, "grad_norm": 0.7996487530143445, "learning_rate": 3.1571122297842225e-06, "loss": 0.0258, "step": 96195 }, { "epoch": 0.40139863641294826, "grad_norm": 0.6352046147056764, "learning_rate": 3.1570301823291024e-06, "loss": 0.0218, "step": 96200 }, { "epoch": 0.4014194991279385, "grad_norm": 0.5005726277079242, "learning_rate": 3.1569481412704308e-06, "loss": 0.0279, "step": 96205 }, { "epoch": 0.4014403618429288, "grad_norm": 0.9944082030795861, "learning_rate": 3.156866106607376e-06, "loss": 0.0263, "step": 96210 }, { "epoch": 0.40146122455791905, "grad_norm": 0.949213220819584, "learning_rate": 3.1567840783391076e-06, "loss": 0.0297, "step": 96215 }, { "epoch": 0.40148208727290935, "grad_norm": 1.144833636066839, "learning_rate": 3.156702056464794e-06, "loss": 0.0283, "step": 96220 }, { "epoch": 0.40150294998789965, "grad_norm": 1.0360682644196983, "learning_rate": 3.1566200409836063e-06, "loss": 0.0328, "step": 96225 }, { "epoch": 0.4015238127028899, "grad_norm": 0.7926252779026761, "learning_rate": 3.1565380318947126e-06, "loss": 0.0286, "step": 96230 }, { "epoch": 0.4015446754178802, "grad_norm": 0.7740878128155696, "learning_rate": 3.1564560291972827e-06, "loss": 0.0268, "step": 96235 }, { "epoch": 0.40156553813287044, "grad_norm": 0.49682547746281375, "learning_rate": 3.156374032890487e-06, "loss": 0.025, "step": 96240 }, { "epoch": 0.40158640084786074, "grad_norm": 0.8068418811719253, "learning_rate": 3.1562920429734957e-06, "loss": 0.0216, "step": 96245 }, { "epoch": 0.40160726356285104, "grad_norm": 0.5286014688741486, "learning_rate": 3.1562100594454776e-06, "loss": 0.0347, "step": 96250 }, { "epoch": 0.4016281262778413, "grad_norm": 0.6809935478743123, "learning_rate": 3.1561280823056046e-06, "loss": 0.0256, "step": 96255 }, { "epoch": 0.4016489889928316, "grad_norm": 1.3478181073354532, "learning_rate": 3.1560461115530456e-06, "loss": 0.0312, "step": 96260 }, { "epoch": 0.4016698517078218, "grad_norm": 0.8780242491561859, "learning_rate": 3.155964147186973e-06, "loss": 0.0287, "step": 96265 }, { "epoch": 0.4016907144228121, "grad_norm": 0.5796323301479174, "learning_rate": 3.1558821892065556e-06, "loss": 0.0269, "step": 96270 }, { "epoch": 0.4017115771378024, "grad_norm": 0.5713253755563019, "learning_rate": 3.155800237610965e-06, "loss": 0.0268, "step": 96275 }, { "epoch": 0.40173243985279267, "grad_norm": 1.0782641461613978, "learning_rate": 3.1557182923993735e-06, "loss": 0.0229, "step": 96280 }, { "epoch": 0.40175330256778297, "grad_norm": 0.4491529337420398, "learning_rate": 3.1556363535709506e-06, "loss": 0.0227, "step": 96285 }, { "epoch": 0.40177416528277327, "grad_norm": 0.7505683307605783, "learning_rate": 3.155554421124868e-06, "loss": 0.0214, "step": 96290 }, { "epoch": 0.4017950279977635, "grad_norm": 1.1155742548347916, "learning_rate": 3.155472495060298e-06, "loss": 0.0367, "step": 96295 }, { "epoch": 0.4018158907127538, "grad_norm": 0.3339959531617448, "learning_rate": 3.1553905753764114e-06, "loss": 0.0335, "step": 96300 }, { "epoch": 0.40183675342774405, "grad_norm": 0.6147882484194654, "learning_rate": 3.1553086620723806e-06, "loss": 0.0205, "step": 96305 }, { "epoch": 0.40185761614273435, "grad_norm": 1.0722519140178814, "learning_rate": 3.155226755147377e-06, "loss": 0.0304, "step": 96310 }, { "epoch": 0.40187847885772465, "grad_norm": 0.7425926511604541, "learning_rate": 3.155144854600573e-06, "loss": 0.0341, "step": 96315 }, { "epoch": 0.4018993415727149, "grad_norm": 0.9401191156015647, "learning_rate": 3.1550629604311406e-06, "loss": 0.0236, "step": 96320 }, { "epoch": 0.4019202042877052, "grad_norm": 0.6840554427304714, "learning_rate": 3.154981072638253e-06, "loss": 0.024, "step": 96325 }, { "epoch": 0.40194106700269544, "grad_norm": 0.5361284427163622, "learning_rate": 3.1548991912210813e-06, "loss": 0.0257, "step": 96330 }, { "epoch": 0.40196192971768574, "grad_norm": 0.7898530833719929, "learning_rate": 3.1548173161787997e-06, "loss": 0.0282, "step": 96335 }, { "epoch": 0.40198279243267604, "grad_norm": 0.7014328257834079, "learning_rate": 3.15473544751058e-06, "loss": 0.0326, "step": 96340 }, { "epoch": 0.4020036551476663, "grad_norm": 0.7493102124435362, "learning_rate": 3.154653585215596e-06, "loss": 0.0201, "step": 96345 }, { "epoch": 0.4020245178626566, "grad_norm": 0.47048157414211356, "learning_rate": 3.1545717292930193e-06, "loss": 0.0191, "step": 96350 }, { "epoch": 0.4020453805776468, "grad_norm": 0.8327679920819586, "learning_rate": 3.1544898797420253e-06, "loss": 0.0252, "step": 96355 }, { "epoch": 0.4020662432926371, "grad_norm": 0.7077424228341699, "learning_rate": 3.1544080365617863e-06, "loss": 0.0252, "step": 96360 }, { "epoch": 0.4020871060076274, "grad_norm": 0.6542290024807333, "learning_rate": 3.154326199751476e-06, "loss": 0.0196, "step": 96365 }, { "epoch": 0.40210796872261767, "grad_norm": 1.0699137804519794, "learning_rate": 3.1542443693102683e-06, "loss": 0.0266, "step": 96370 }, { "epoch": 0.40212883143760797, "grad_norm": 0.915229994035697, "learning_rate": 3.154162545237337e-06, "loss": 0.0264, "step": 96375 }, { "epoch": 0.40214969415259827, "grad_norm": 0.6709010741840989, "learning_rate": 3.154080727531856e-06, "loss": 0.029, "step": 96380 }, { "epoch": 0.4021705568675885, "grad_norm": 1.0535789353756757, "learning_rate": 3.1539989161929995e-06, "loss": 0.0281, "step": 96385 }, { "epoch": 0.4021914195825788, "grad_norm": 0.8555715605082722, "learning_rate": 3.153917111219942e-06, "loss": 0.0318, "step": 96390 }, { "epoch": 0.40221228229756906, "grad_norm": 0.6937712609834973, "learning_rate": 3.153835312611858e-06, "loss": 0.0256, "step": 96395 }, { "epoch": 0.40223314501255936, "grad_norm": 0.6036392006980829, "learning_rate": 3.1537535203679225e-06, "loss": 0.0241, "step": 96400 }, { "epoch": 0.40225400772754966, "grad_norm": 0.9407783063776617, "learning_rate": 3.1536717344873096e-06, "loss": 0.0255, "step": 96405 }, { "epoch": 0.4022748704425399, "grad_norm": 1.648652746381349, "learning_rate": 3.153589954969195e-06, "loss": 0.0254, "step": 96410 }, { "epoch": 0.4022957331575302, "grad_norm": 0.4033236560046639, "learning_rate": 3.1535081818127526e-06, "loss": 0.0225, "step": 96415 }, { "epoch": 0.40231659587252044, "grad_norm": 0.8491092669444448, "learning_rate": 3.153426415017159e-06, "loss": 0.0269, "step": 96420 }, { "epoch": 0.40233745858751074, "grad_norm": 1.2349951629511169, "learning_rate": 3.153344654581589e-06, "loss": 0.0219, "step": 96425 }, { "epoch": 0.40235832130250104, "grad_norm": 0.8396886340776328, "learning_rate": 3.153262900505218e-06, "loss": 0.0188, "step": 96430 }, { "epoch": 0.4023791840174913, "grad_norm": 0.6978047389538165, "learning_rate": 3.153181152787222e-06, "loss": 0.029, "step": 96435 }, { "epoch": 0.4024000467324816, "grad_norm": 0.8246752280641934, "learning_rate": 3.1530994114267767e-06, "loss": 0.0211, "step": 96440 }, { "epoch": 0.40242090944747183, "grad_norm": 0.5686025409268541, "learning_rate": 3.153017676423058e-06, "loss": 0.0193, "step": 96445 }, { "epoch": 0.40244177216246213, "grad_norm": 0.44008835965236426, "learning_rate": 3.152935947775242e-06, "loss": 0.0276, "step": 96450 }, { "epoch": 0.40246263487745243, "grad_norm": 1.013660624832829, "learning_rate": 3.152854225482505e-06, "loss": 0.0351, "step": 96455 }, { "epoch": 0.4024834975924427, "grad_norm": 0.5349215734987025, "learning_rate": 3.152772509544024e-06, "loss": 0.0238, "step": 96460 }, { "epoch": 0.402504360307433, "grad_norm": 0.9436417728189542, "learning_rate": 3.1526907999589752e-06, "loss": 0.0315, "step": 96465 }, { "epoch": 0.4025252230224233, "grad_norm": 0.7863989458578844, "learning_rate": 3.1526090967265354e-06, "loss": 0.0271, "step": 96470 }, { "epoch": 0.4025460857374135, "grad_norm": 0.637268421602477, "learning_rate": 3.1525273998458813e-06, "loss": 0.0241, "step": 96475 }, { "epoch": 0.4025669484524038, "grad_norm": 0.525349717684482, "learning_rate": 3.1524457093161896e-06, "loss": 0.0252, "step": 96480 }, { "epoch": 0.40258781116739406, "grad_norm": 0.6164939351417412, "learning_rate": 3.152364025136638e-06, "loss": 0.02, "step": 96485 }, { "epoch": 0.40260867388238436, "grad_norm": 1.0888375455959816, "learning_rate": 3.152282347306404e-06, "loss": 0.0348, "step": 96490 }, { "epoch": 0.40262953659737466, "grad_norm": 1.2180786349466666, "learning_rate": 3.152200675824665e-06, "loss": 0.0316, "step": 96495 }, { "epoch": 0.4026503993123649, "grad_norm": 0.7031121513834582, "learning_rate": 3.152119010690598e-06, "loss": 0.019, "step": 96500 }, { "epoch": 0.4026712620273552, "grad_norm": 1.158095230149372, "learning_rate": 3.1520373519033815e-06, "loss": 0.0316, "step": 96505 }, { "epoch": 0.40269212474234545, "grad_norm": 0.6241497361078258, "learning_rate": 3.1519556994621932e-06, "loss": 0.0199, "step": 96510 }, { "epoch": 0.40271298745733575, "grad_norm": 0.7631883462486928, "learning_rate": 3.1518740533662114e-06, "loss": 0.0274, "step": 96515 }, { "epoch": 0.40273385017232605, "grad_norm": 0.7444241916723596, "learning_rate": 3.1517924136146143e-06, "loss": 0.0234, "step": 96520 }, { "epoch": 0.4027547128873163, "grad_norm": 0.5507729495421393, "learning_rate": 3.1517107802065794e-06, "loss": 0.0212, "step": 96525 }, { "epoch": 0.4027755756023066, "grad_norm": 0.6463194072377044, "learning_rate": 3.151629153141287e-06, "loss": 0.0318, "step": 96530 }, { "epoch": 0.40279643831729683, "grad_norm": 0.7762460823305499, "learning_rate": 3.1515475324179136e-06, "loss": 0.0271, "step": 96535 }, { "epoch": 0.40281730103228713, "grad_norm": 0.5654237128168526, "learning_rate": 3.15146591803564e-06, "loss": 0.0263, "step": 96540 }, { "epoch": 0.40283816374727743, "grad_norm": 0.6395452392729912, "learning_rate": 3.1513843099936444e-06, "loss": 0.0286, "step": 96545 }, { "epoch": 0.4028590264622677, "grad_norm": 0.8041256914292406, "learning_rate": 3.151302708291105e-06, "loss": 0.0264, "step": 96550 }, { "epoch": 0.402879889177258, "grad_norm": 1.4393351411872024, "learning_rate": 3.1512211129272024e-06, "loss": 0.0284, "step": 96555 }, { "epoch": 0.4029007518922483, "grad_norm": 0.7753563292404252, "learning_rate": 3.1511395239011157e-06, "loss": 0.023, "step": 96560 }, { "epoch": 0.4029216146072385, "grad_norm": 0.7448972885916039, "learning_rate": 3.151057941212024e-06, "loss": 0.0331, "step": 96565 }, { "epoch": 0.4029424773222288, "grad_norm": 0.6613149458599374, "learning_rate": 3.150976364859107e-06, "loss": 0.0297, "step": 96570 }, { "epoch": 0.40296334003721906, "grad_norm": 1.5141683646171256, "learning_rate": 3.150894794841546e-06, "loss": 0.0369, "step": 96575 }, { "epoch": 0.40298420275220936, "grad_norm": 0.7921864177700954, "learning_rate": 3.1508132311585195e-06, "loss": 0.0292, "step": 96580 }, { "epoch": 0.40300506546719966, "grad_norm": 0.7510471325901482, "learning_rate": 3.1507316738092076e-06, "loss": 0.0276, "step": 96585 }, { "epoch": 0.4030259281821899, "grad_norm": 0.33604046274474575, "learning_rate": 3.150650122792792e-06, "loss": 0.0306, "step": 96590 }, { "epoch": 0.4030467908971802, "grad_norm": 0.42141848230524664, "learning_rate": 3.1505685781084512e-06, "loss": 0.0239, "step": 96595 }, { "epoch": 0.40306765361217045, "grad_norm": 1.2077121943370475, "learning_rate": 3.1504870397553677e-06, "loss": 0.0253, "step": 96600 }, { "epoch": 0.40308851632716075, "grad_norm": 0.4952517243600784, "learning_rate": 3.1504055077327217e-06, "loss": 0.0221, "step": 96605 }, { "epoch": 0.40310937904215105, "grad_norm": 0.5420321193026836, "learning_rate": 3.150323982039693e-06, "loss": 0.0273, "step": 96610 }, { "epoch": 0.4031302417571413, "grad_norm": 0.6852420545020391, "learning_rate": 3.150242462675464e-06, "loss": 0.0196, "step": 96615 }, { "epoch": 0.4031511044721316, "grad_norm": 0.7206342088734639, "learning_rate": 3.1501609496392156e-06, "loss": 0.0242, "step": 96620 }, { "epoch": 0.40317196718712184, "grad_norm": 0.5757706609871263, "learning_rate": 3.1500794429301285e-06, "loss": 0.0217, "step": 96625 }, { "epoch": 0.40319282990211214, "grad_norm": 0.6051013306369031, "learning_rate": 3.149997942547385e-06, "loss": 0.0359, "step": 96630 }, { "epoch": 0.40321369261710244, "grad_norm": 0.6402565799222213, "learning_rate": 3.1499164484901666e-06, "loss": 0.0229, "step": 96635 }, { "epoch": 0.4032345553320927, "grad_norm": 1.1585439644082556, "learning_rate": 3.149834960757655e-06, "loss": 0.0272, "step": 96640 }, { "epoch": 0.403255418047083, "grad_norm": 0.5779034766726718, "learning_rate": 3.149753479349032e-06, "loss": 0.0301, "step": 96645 }, { "epoch": 0.4032762807620733, "grad_norm": 0.5630317008349937, "learning_rate": 3.14967200426348e-06, "loss": 0.0275, "step": 96650 }, { "epoch": 0.4032971434770635, "grad_norm": 1.4612814337769762, "learning_rate": 3.14959053550018e-06, "loss": 0.024, "step": 96655 }, { "epoch": 0.4033180061920538, "grad_norm": 0.7683161233505511, "learning_rate": 3.149509073058316e-06, "loss": 0.0308, "step": 96660 }, { "epoch": 0.40333886890704407, "grad_norm": 0.8303475398170048, "learning_rate": 3.149427616937071e-06, "loss": 0.0274, "step": 96665 }, { "epoch": 0.40335973162203437, "grad_norm": 1.3227385006546366, "learning_rate": 3.149346167135625e-06, "loss": 0.0282, "step": 96670 }, { "epoch": 0.40338059433702467, "grad_norm": 0.5206529999527608, "learning_rate": 3.1492647236531633e-06, "loss": 0.0167, "step": 96675 }, { "epoch": 0.4034014570520149, "grad_norm": 0.518725823381113, "learning_rate": 3.1491832864888684e-06, "loss": 0.0259, "step": 96680 }, { "epoch": 0.4034223197670052, "grad_norm": 0.7014306731949068, "learning_rate": 3.1491018556419223e-06, "loss": 0.022, "step": 96685 }, { "epoch": 0.40344318248199545, "grad_norm": 0.6659944938794229, "learning_rate": 3.1490204311115097e-06, "loss": 0.0195, "step": 96690 }, { "epoch": 0.40346404519698575, "grad_norm": 0.7548531491168934, "learning_rate": 3.1489390128968128e-06, "loss": 0.021, "step": 96695 }, { "epoch": 0.40348490791197605, "grad_norm": 0.45181350676026405, "learning_rate": 3.1488576009970166e-06, "loss": 0.021, "step": 96700 }, { "epoch": 0.4035057706269663, "grad_norm": 0.7359346196839496, "learning_rate": 3.1487761954113035e-06, "loss": 0.0184, "step": 96705 }, { "epoch": 0.4035266333419566, "grad_norm": 0.635529203442035, "learning_rate": 3.148694796138858e-06, "loss": 0.0312, "step": 96710 }, { "epoch": 0.40354749605694684, "grad_norm": 0.3973866780977571, "learning_rate": 3.148613403178864e-06, "loss": 0.0234, "step": 96715 }, { "epoch": 0.40356835877193714, "grad_norm": 0.7645572472715756, "learning_rate": 3.1485320165305057e-06, "loss": 0.0241, "step": 96720 }, { "epoch": 0.40358922148692744, "grad_norm": 0.618983823675498, "learning_rate": 3.1484506361929673e-06, "loss": 0.0287, "step": 96725 }, { "epoch": 0.4036100842019177, "grad_norm": 0.9393140092050556, "learning_rate": 3.1483692621654335e-06, "loss": 0.0339, "step": 96730 }, { "epoch": 0.403630946916908, "grad_norm": 0.648031151813503, "learning_rate": 3.1482878944470885e-06, "loss": 0.02, "step": 96735 }, { "epoch": 0.4036518096318983, "grad_norm": 0.38742221449254804, "learning_rate": 3.148206533037117e-06, "loss": 0.0204, "step": 96740 }, { "epoch": 0.40367267234688853, "grad_norm": 0.4260548202034852, "learning_rate": 3.148125177934705e-06, "loss": 0.02, "step": 96745 }, { "epoch": 0.4036935350618788, "grad_norm": 0.9482056003638127, "learning_rate": 3.1480438291390365e-06, "loss": 0.0239, "step": 96750 }, { "epoch": 0.40371439777686907, "grad_norm": 0.6674609648791814, "learning_rate": 3.1479624866492963e-06, "loss": 0.0242, "step": 96755 }, { "epoch": 0.40373526049185937, "grad_norm": 0.5968834525043005, "learning_rate": 3.1478811504646712e-06, "loss": 0.03, "step": 96760 }, { "epoch": 0.40375612320684967, "grad_norm": 0.41389613536131925, "learning_rate": 3.147799820584346e-06, "loss": 0.0272, "step": 96765 }, { "epoch": 0.4037769859218399, "grad_norm": 1.022044910989136, "learning_rate": 3.147718497007505e-06, "loss": 0.027, "step": 96770 }, { "epoch": 0.4037978486368302, "grad_norm": 0.5283988871403038, "learning_rate": 3.1476371797333366e-06, "loss": 0.0282, "step": 96775 }, { "epoch": 0.40381871135182046, "grad_norm": 0.9837132933345332, "learning_rate": 3.1475558687610245e-06, "loss": 0.0301, "step": 96780 }, { "epoch": 0.40383957406681076, "grad_norm": 0.5128327944735382, "learning_rate": 3.1474745640897563e-06, "loss": 0.0296, "step": 96785 }, { "epoch": 0.40386043678180106, "grad_norm": 1.0497596819594706, "learning_rate": 3.1473932657187174e-06, "loss": 0.029, "step": 96790 }, { "epoch": 0.4038812994967913, "grad_norm": 1.074697444298481, "learning_rate": 3.147311973647094e-06, "loss": 0.0276, "step": 96795 }, { "epoch": 0.4039021622117816, "grad_norm": 0.7871815912689808, "learning_rate": 3.1472306878740734e-06, "loss": 0.0259, "step": 96800 }, { "epoch": 0.40392302492677185, "grad_norm": 1.073573230342049, "learning_rate": 3.147149408398841e-06, "loss": 0.0253, "step": 96805 }, { "epoch": 0.40394388764176214, "grad_norm": 0.5920318971890958, "learning_rate": 3.147068135220585e-06, "loss": 0.0198, "step": 96810 }, { "epoch": 0.40396475035675244, "grad_norm": 0.4093593478637276, "learning_rate": 3.146986868338492e-06, "loss": 0.0218, "step": 96815 }, { "epoch": 0.4039856130717427, "grad_norm": 0.7681921475301208, "learning_rate": 3.1469056077517485e-06, "loss": 0.0227, "step": 96820 }, { "epoch": 0.404006475786733, "grad_norm": 0.6729929237552055, "learning_rate": 3.1468243534595427e-06, "loss": 0.0258, "step": 96825 }, { "epoch": 0.4040273385017233, "grad_norm": 0.9387622300211554, "learning_rate": 3.1467431054610615e-06, "loss": 0.0402, "step": 96830 }, { "epoch": 0.40404820121671353, "grad_norm": 0.6279587033781412, "learning_rate": 3.1466618637554917e-06, "loss": 0.0212, "step": 96835 }, { "epoch": 0.40406906393170383, "grad_norm": 0.7282193519742667, "learning_rate": 3.1465806283420227e-06, "loss": 0.025, "step": 96840 }, { "epoch": 0.4040899266466941, "grad_norm": 0.4335283327537058, "learning_rate": 3.146499399219841e-06, "loss": 0.0262, "step": 96845 }, { "epoch": 0.4041107893616844, "grad_norm": 0.7832230683612916, "learning_rate": 3.1464181763881346e-06, "loss": 0.0249, "step": 96850 }, { "epoch": 0.4041316520766747, "grad_norm": 1.0525252989186327, "learning_rate": 3.1463369598460925e-06, "loss": 0.0249, "step": 96855 }, { "epoch": 0.4041525147916649, "grad_norm": 0.6891084295782024, "learning_rate": 3.1462557495929025e-06, "loss": 0.0254, "step": 96860 }, { "epoch": 0.4041733775066552, "grad_norm": 0.5690699076928769, "learning_rate": 3.1461745456277526e-06, "loss": 0.0315, "step": 96865 }, { "epoch": 0.40419424022164546, "grad_norm": 1.7999649719212014, "learning_rate": 3.146093347949832e-06, "loss": 0.027, "step": 96870 }, { "epoch": 0.40421510293663576, "grad_norm": 0.4998763088006823, "learning_rate": 3.1460121565583303e-06, "loss": 0.0216, "step": 96875 }, { "epoch": 0.40423596565162606, "grad_norm": 0.7457011578544872, "learning_rate": 3.145930971452434e-06, "loss": 0.0196, "step": 96880 }, { "epoch": 0.4042568283666163, "grad_norm": 0.9930369794926074, "learning_rate": 3.1458497926313337e-06, "loss": 0.0279, "step": 96885 }, { "epoch": 0.4042776910816066, "grad_norm": 0.843199892576442, "learning_rate": 3.145768620094219e-06, "loss": 0.0276, "step": 96890 }, { "epoch": 0.40429855379659685, "grad_norm": 0.5444903859439251, "learning_rate": 3.1456874538402778e-06, "loss": 0.0199, "step": 96895 }, { "epoch": 0.40431941651158715, "grad_norm": 0.650698049397733, "learning_rate": 3.1456062938687003e-06, "loss": 0.0214, "step": 96900 }, { "epoch": 0.40434027922657745, "grad_norm": 0.6126607908507462, "learning_rate": 3.1455251401786764e-06, "loss": 0.0194, "step": 96905 }, { "epoch": 0.4043611419415677, "grad_norm": 0.7900443000620845, "learning_rate": 3.145443992769395e-06, "loss": 0.0311, "step": 96910 }, { "epoch": 0.404382004656558, "grad_norm": 0.8989041298595084, "learning_rate": 3.145362851640047e-06, "loss": 0.0256, "step": 96915 }, { "epoch": 0.4044028673715483, "grad_norm": 0.509946556706587, "learning_rate": 3.145281716789822e-06, "loss": 0.0234, "step": 96920 }, { "epoch": 0.40442373008653854, "grad_norm": 0.7011065768360161, "learning_rate": 3.14520058821791e-06, "loss": 0.0272, "step": 96925 }, { "epoch": 0.40444459280152883, "grad_norm": 0.7947335253135912, "learning_rate": 3.1451194659235013e-06, "loss": 0.0211, "step": 96930 }, { "epoch": 0.4044654555165191, "grad_norm": 0.5439171618868939, "learning_rate": 3.145038349905787e-06, "loss": 0.02, "step": 96935 }, { "epoch": 0.4044863182315094, "grad_norm": 1.135432075692225, "learning_rate": 3.144957240163957e-06, "loss": 0.0248, "step": 96940 }, { "epoch": 0.4045071809464997, "grad_norm": 0.44111353208331494, "learning_rate": 3.1448761366972026e-06, "loss": 0.0263, "step": 96945 }, { "epoch": 0.4045280436614899, "grad_norm": 1.0176277044144955, "learning_rate": 3.1447950395047146e-06, "loss": 0.0282, "step": 96950 }, { "epoch": 0.4045489063764802, "grad_norm": 0.5587089728264094, "learning_rate": 3.144713948585683e-06, "loss": 0.0225, "step": 96955 }, { "epoch": 0.40456976909147047, "grad_norm": 0.8042919873923619, "learning_rate": 3.144632863939301e-06, "loss": 0.023, "step": 96960 }, { "epoch": 0.40459063180646077, "grad_norm": 0.5906557347954052, "learning_rate": 3.144551785564759e-06, "loss": 0.0275, "step": 96965 }, { "epoch": 0.40461149452145106, "grad_norm": 0.7355715923226271, "learning_rate": 3.144470713461248e-06, "loss": 0.024, "step": 96970 }, { "epoch": 0.4046323572364413, "grad_norm": 0.543659792919377, "learning_rate": 3.14438964762796e-06, "loss": 0.0291, "step": 96975 }, { "epoch": 0.4046532199514316, "grad_norm": 0.7951279532588136, "learning_rate": 3.1443085880640873e-06, "loss": 0.0301, "step": 96980 }, { "epoch": 0.40467408266642185, "grad_norm": 0.919297519854573, "learning_rate": 3.1442275347688214e-06, "loss": 0.036, "step": 96985 }, { "epoch": 0.40469494538141215, "grad_norm": 1.029536592623115, "learning_rate": 3.144146487741354e-06, "loss": 0.0339, "step": 96990 }, { "epoch": 0.40471580809640245, "grad_norm": 0.7100555728582832, "learning_rate": 3.144065446980878e-06, "loss": 0.0215, "step": 96995 }, { "epoch": 0.4047366708113927, "grad_norm": 0.8244290560841455, "learning_rate": 3.1439844124865854e-06, "loss": 0.0224, "step": 97000 }, { "epoch": 0.404757533526383, "grad_norm": 0.8502495701982133, "learning_rate": 3.1439033842576688e-06, "loss": 0.0331, "step": 97005 }, { "epoch": 0.4047783962413733, "grad_norm": 0.6930419316529495, "learning_rate": 3.143822362293321e-06, "loss": 0.0295, "step": 97010 }, { "epoch": 0.40479925895636354, "grad_norm": 1.128192025782853, "learning_rate": 3.1437413465927354e-06, "loss": 0.0207, "step": 97015 }, { "epoch": 0.40482012167135384, "grad_norm": 0.6236465766008994, "learning_rate": 3.1436603371551037e-06, "loss": 0.0244, "step": 97020 }, { "epoch": 0.4048409843863441, "grad_norm": 0.4242067381983993, "learning_rate": 3.14357933397962e-06, "loss": 0.0242, "step": 97025 }, { "epoch": 0.4048618471013344, "grad_norm": 1.5375597847406832, "learning_rate": 3.1434983370654763e-06, "loss": 0.0354, "step": 97030 }, { "epoch": 0.4048827098163247, "grad_norm": 0.6486053357556069, "learning_rate": 3.143417346411867e-06, "loss": 0.0248, "step": 97035 }, { "epoch": 0.4049035725313149, "grad_norm": 0.9985227261855046, "learning_rate": 3.1433363620179863e-06, "loss": 0.0292, "step": 97040 }, { "epoch": 0.4049244352463052, "grad_norm": 1.0106854139034283, "learning_rate": 3.1432553838830273e-06, "loss": 0.0217, "step": 97045 }, { "epoch": 0.40494529796129547, "grad_norm": 0.53950554396571, "learning_rate": 3.143174412006183e-06, "loss": 0.0237, "step": 97050 }, { "epoch": 0.40496616067628577, "grad_norm": 0.381273419386373, "learning_rate": 3.1430934463866474e-06, "loss": 0.0246, "step": 97055 }, { "epoch": 0.40498702339127607, "grad_norm": 0.9103574579795223, "learning_rate": 3.143012487023616e-06, "loss": 0.0244, "step": 97060 }, { "epoch": 0.4050078861062663, "grad_norm": 0.2997755704335006, "learning_rate": 3.1429315339162825e-06, "loss": 0.0202, "step": 97065 }, { "epoch": 0.4050287488212566, "grad_norm": 0.9355020343196851, "learning_rate": 3.142850587063841e-06, "loss": 0.022, "step": 97070 }, { "epoch": 0.40504961153624686, "grad_norm": 0.6036379307936588, "learning_rate": 3.1427696464654857e-06, "loss": 0.0244, "step": 97075 }, { "epoch": 0.40507047425123716, "grad_norm": 0.8896248762214654, "learning_rate": 3.1426887121204126e-06, "loss": 0.0301, "step": 97080 }, { "epoch": 0.40509133696622746, "grad_norm": 1.1537255195873921, "learning_rate": 3.1426077840278154e-06, "loss": 0.0348, "step": 97085 }, { "epoch": 0.4051121996812177, "grad_norm": 1.6851176190763948, "learning_rate": 3.1425268621868887e-06, "loss": 0.0241, "step": 97090 }, { "epoch": 0.405133062396208, "grad_norm": 1.290101250164205, "learning_rate": 3.142445946596829e-06, "loss": 0.0272, "step": 97095 }, { "epoch": 0.40515392511119824, "grad_norm": 1.282960288193815, "learning_rate": 3.142365037256831e-06, "loss": 0.0311, "step": 97100 }, { "epoch": 0.40517478782618854, "grad_norm": 0.6897919151908709, "learning_rate": 3.1422841341660903e-06, "loss": 0.0269, "step": 97105 }, { "epoch": 0.40519565054117884, "grad_norm": 0.8790674366092333, "learning_rate": 3.142203237323802e-06, "loss": 0.0231, "step": 97110 }, { "epoch": 0.4052165132561691, "grad_norm": 0.8085188652073945, "learning_rate": 3.142122346729162e-06, "loss": 0.0256, "step": 97115 }, { "epoch": 0.4052373759711594, "grad_norm": 0.7172798022813058, "learning_rate": 3.142041462381367e-06, "loss": 0.0352, "step": 97120 }, { "epoch": 0.4052582386861497, "grad_norm": 0.5940550967775224, "learning_rate": 3.141960584279611e-06, "loss": 0.0214, "step": 97125 }, { "epoch": 0.40527910140113993, "grad_norm": 0.9948073073184102, "learning_rate": 3.141879712423092e-06, "loss": 0.0223, "step": 97130 }, { "epoch": 0.40529996411613023, "grad_norm": 0.6033420415596626, "learning_rate": 3.141798846811006e-06, "loss": 0.0296, "step": 97135 }, { "epoch": 0.4053208268311205, "grad_norm": 0.7019340955471925, "learning_rate": 3.1417179874425498e-06, "loss": 0.031, "step": 97140 }, { "epoch": 0.4053416895461108, "grad_norm": 0.8271731971008822, "learning_rate": 3.141637134316918e-06, "loss": 0.0337, "step": 97145 }, { "epoch": 0.4053625522611011, "grad_norm": 0.5301460273352498, "learning_rate": 3.1415562874333095e-06, "loss": 0.0247, "step": 97150 }, { "epoch": 0.4053834149760913, "grad_norm": 0.7599011829087604, "learning_rate": 3.14147544679092e-06, "loss": 0.0243, "step": 97155 }, { "epoch": 0.4054042776910816, "grad_norm": 0.569271649098027, "learning_rate": 3.141394612388947e-06, "loss": 0.0247, "step": 97160 }, { "epoch": 0.40542514040607186, "grad_norm": 1.9724170169766402, "learning_rate": 3.141313784226588e-06, "loss": 0.0421, "step": 97165 }, { "epoch": 0.40544600312106216, "grad_norm": 0.7592552170242267, "learning_rate": 3.1412329623030396e-06, "loss": 0.0259, "step": 97170 }, { "epoch": 0.40546686583605246, "grad_norm": 0.6840764969233272, "learning_rate": 3.1411521466174992e-06, "loss": 0.0297, "step": 97175 }, { "epoch": 0.4054877285510427, "grad_norm": 0.8181606876282255, "learning_rate": 3.141071337169166e-06, "loss": 0.0261, "step": 97180 }, { "epoch": 0.405508591266033, "grad_norm": 0.5459189137623788, "learning_rate": 3.1409905339572357e-06, "loss": 0.0272, "step": 97185 }, { "epoch": 0.40552945398102325, "grad_norm": 0.9419587916282693, "learning_rate": 3.1409097369809065e-06, "loss": 0.0256, "step": 97190 }, { "epoch": 0.40555031669601355, "grad_norm": 0.6503427234474494, "learning_rate": 3.1408289462393773e-06, "loss": 0.033, "step": 97195 }, { "epoch": 0.40557117941100385, "grad_norm": 0.37409931024020754, "learning_rate": 3.1407481617318457e-06, "loss": 0.0224, "step": 97200 }, { "epoch": 0.4055920421259941, "grad_norm": 0.6932956028625713, "learning_rate": 3.1406673834575115e-06, "loss": 0.0245, "step": 97205 }, { "epoch": 0.4056129048409844, "grad_norm": 0.2986784248175613, "learning_rate": 3.1405866114155708e-06, "loss": 0.0209, "step": 97210 }, { "epoch": 0.4056337675559747, "grad_norm": 0.2532642887453639, "learning_rate": 3.1405058456052234e-06, "loss": 0.026, "step": 97215 }, { "epoch": 0.40565463027096493, "grad_norm": 0.6259629821043102, "learning_rate": 3.140425086025668e-06, "loss": 0.0248, "step": 97220 }, { "epoch": 0.40567549298595523, "grad_norm": 0.4551117634902714, "learning_rate": 3.1403443326761035e-06, "loss": 0.0251, "step": 97225 }, { "epoch": 0.4056963557009455, "grad_norm": 0.46661464208172976, "learning_rate": 3.140263585555729e-06, "loss": 0.0311, "step": 97230 }, { "epoch": 0.4057172184159358, "grad_norm": 1.1701194576045688, "learning_rate": 3.140182844663744e-06, "loss": 0.0261, "step": 97235 }, { "epoch": 0.4057380811309261, "grad_norm": 0.8205401694331879, "learning_rate": 3.1401021099993466e-06, "loss": 0.0318, "step": 97240 }, { "epoch": 0.4057589438459163, "grad_norm": 1.177037480498967, "learning_rate": 3.1400213815617382e-06, "loss": 0.027, "step": 97245 }, { "epoch": 0.4057798065609066, "grad_norm": 1.309612782414394, "learning_rate": 3.1399406593501164e-06, "loss": 0.0278, "step": 97250 }, { "epoch": 0.40580066927589686, "grad_norm": 0.5099059302334684, "learning_rate": 3.1398599433636828e-06, "loss": 0.0346, "step": 97255 }, { "epoch": 0.40582153199088716, "grad_norm": 0.7974526350847457, "learning_rate": 3.139779233601636e-06, "loss": 0.0336, "step": 97260 }, { "epoch": 0.40584239470587746, "grad_norm": 0.982631472461399, "learning_rate": 3.1396985300631765e-06, "loss": 0.0328, "step": 97265 }, { "epoch": 0.4058632574208677, "grad_norm": 1.644495753464341, "learning_rate": 3.139617832747504e-06, "loss": 0.0398, "step": 97270 }, { "epoch": 0.405884120135858, "grad_norm": 0.6832219690907106, "learning_rate": 3.13953714165382e-06, "loss": 0.0222, "step": 97275 }, { "epoch": 0.40590498285084825, "grad_norm": 0.8377037015370533, "learning_rate": 3.139456456781324e-06, "loss": 0.0377, "step": 97280 }, { "epoch": 0.40592584556583855, "grad_norm": 0.7446997394070286, "learning_rate": 3.139375778129218e-06, "loss": 0.0208, "step": 97285 }, { "epoch": 0.40594670828082885, "grad_norm": 0.9564305862699718, "learning_rate": 3.1392951056967008e-06, "loss": 0.0299, "step": 97290 }, { "epoch": 0.4059675709958191, "grad_norm": 0.7814939251844423, "learning_rate": 3.139214439482974e-06, "loss": 0.0271, "step": 97295 }, { "epoch": 0.4059884337108094, "grad_norm": 1.232071094819054, "learning_rate": 3.139133779487239e-06, "loss": 0.0312, "step": 97300 }, { "epoch": 0.4060092964257997, "grad_norm": 1.0978103517783553, "learning_rate": 3.139053125708697e-06, "loss": 0.0238, "step": 97305 }, { "epoch": 0.40603015914078994, "grad_norm": 1.763384698354572, "learning_rate": 3.1389724781465497e-06, "loss": 0.033, "step": 97310 }, { "epoch": 0.40605102185578024, "grad_norm": 1.4126233297114204, "learning_rate": 3.1388918367999982e-06, "loss": 0.025, "step": 97315 }, { "epoch": 0.4060718845707705, "grad_norm": 1.2698440665645006, "learning_rate": 3.1388112016682436e-06, "loss": 0.0324, "step": 97320 }, { "epoch": 0.4060927472857608, "grad_norm": 0.5957452509577372, "learning_rate": 3.1387305727504886e-06, "loss": 0.0219, "step": 97325 }, { "epoch": 0.4061136100007511, "grad_norm": 0.4871539280849538, "learning_rate": 3.138649950045934e-06, "loss": 0.0215, "step": 97330 }, { "epoch": 0.4061344727157413, "grad_norm": 0.9448232881637454, "learning_rate": 3.1385693335537833e-06, "loss": 0.0221, "step": 97335 }, { "epoch": 0.4061553354307316, "grad_norm": 0.8962960912514759, "learning_rate": 3.1384887232732375e-06, "loss": 0.0299, "step": 97340 }, { "epoch": 0.40617619814572187, "grad_norm": 0.40104415900100737, "learning_rate": 3.1384081192035e-06, "loss": 0.0236, "step": 97345 }, { "epoch": 0.40619706086071217, "grad_norm": 1.0948687523510743, "learning_rate": 3.138327521343772e-06, "loss": 0.0231, "step": 97350 }, { "epoch": 0.40621792357570247, "grad_norm": 0.9543349435241917, "learning_rate": 3.1382469296932572e-06, "loss": 0.022, "step": 97355 }, { "epoch": 0.4062387862906927, "grad_norm": 0.5657518436327382, "learning_rate": 3.1381663442511586e-06, "loss": 0.0249, "step": 97360 }, { "epoch": 0.406259649005683, "grad_norm": 0.6944191472726112, "learning_rate": 3.1380857650166772e-06, "loss": 0.0274, "step": 97365 }, { "epoch": 0.40628051172067325, "grad_norm": 0.922857577594364, "learning_rate": 3.1380051919890187e-06, "loss": 0.0274, "step": 97370 }, { "epoch": 0.40630137443566355, "grad_norm": 0.6770791277972747, "learning_rate": 3.137924625167385e-06, "loss": 0.0296, "step": 97375 }, { "epoch": 0.40632223715065385, "grad_norm": 0.678689436806901, "learning_rate": 3.137844064550979e-06, "loss": 0.026, "step": 97380 }, { "epoch": 0.4063430998656441, "grad_norm": 0.8176206874927708, "learning_rate": 3.1377635101390047e-06, "loss": 0.0338, "step": 97385 }, { "epoch": 0.4063639625806344, "grad_norm": 0.737931167166423, "learning_rate": 3.137682961930666e-06, "loss": 0.0286, "step": 97390 }, { "epoch": 0.4063848252956247, "grad_norm": 0.5268604570172225, "learning_rate": 3.1376024199251665e-06, "loss": 0.0314, "step": 97395 }, { "epoch": 0.40640568801061494, "grad_norm": 0.6622356581015832, "learning_rate": 3.13752188412171e-06, "loss": 0.022, "step": 97400 }, { "epoch": 0.40642655072560524, "grad_norm": 0.8963783890116307, "learning_rate": 3.1374413545195002e-06, "loss": 0.0172, "step": 97405 }, { "epoch": 0.4064474134405955, "grad_norm": 0.6202642720922896, "learning_rate": 3.1373608311177424e-06, "loss": 0.0189, "step": 97410 }, { "epoch": 0.4064682761555858, "grad_norm": 0.974836475007266, "learning_rate": 3.1372803139156406e-06, "loss": 0.0334, "step": 97415 }, { "epoch": 0.4064891388705761, "grad_norm": 0.6923249893503464, "learning_rate": 3.1371998029123986e-06, "loss": 0.0223, "step": 97420 }, { "epoch": 0.4065100015855663, "grad_norm": 0.6120552782163685, "learning_rate": 3.137119298107221e-06, "loss": 0.0249, "step": 97425 }, { "epoch": 0.4065308643005566, "grad_norm": 0.7156198686819037, "learning_rate": 3.1370387994993132e-06, "loss": 0.0246, "step": 97430 }, { "epoch": 0.40655172701554687, "grad_norm": 0.44315758859916166, "learning_rate": 3.1369583070878808e-06, "loss": 0.0271, "step": 97435 }, { "epoch": 0.40657258973053717, "grad_norm": 0.6261828089850289, "learning_rate": 3.136877820872127e-06, "loss": 0.0299, "step": 97440 }, { "epoch": 0.40659345244552747, "grad_norm": 0.6628794676287304, "learning_rate": 3.136797340851259e-06, "loss": 0.0198, "step": 97445 }, { "epoch": 0.4066143151605177, "grad_norm": 0.5834118113745013, "learning_rate": 3.136716867024481e-06, "loss": 0.0301, "step": 97450 }, { "epoch": 0.406635177875508, "grad_norm": 0.9259489831568593, "learning_rate": 3.1366363993909986e-06, "loss": 0.0295, "step": 97455 }, { "epoch": 0.40665604059049826, "grad_norm": 0.768679743684306, "learning_rate": 3.1365559379500172e-06, "loss": 0.0226, "step": 97460 }, { "epoch": 0.40667690330548856, "grad_norm": 1.078995195545767, "learning_rate": 3.1364754827007437e-06, "loss": 0.0319, "step": 97465 }, { "epoch": 0.40669776602047886, "grad_norm": 0.960585000718432, "learning_rate": 3.1363950336423833e-06, "loss": 0.0269, "step": 97470 }, { "epoch": 0.4067186287354691, "grad_norm": 0.5179532609270892, "learning_rate": 3.1363145907741417e-06, "loss": 0.023, "step": 97475 }, { "epoch": 0.4067394914504594, "grad_norm": 0.4596991252174314, "learning_rate": 3.1362341540952253e-06, "loss": 0.0176, "step": 97480 }, { "epoch": 0.4067603541654497, "grad_norm": 0.3155619009238483, "learning_rate": 3.1361537236048405e-06, "loss": 0.0274, "step": 97485 }, { "epoch": 0.40678121688043994, "grad_norm": 0.8348710740583324, "learning_rate": 3.136073299302194e-06, "loss": 0.026, "step": 97490 }, { "epoch": 0.40680207959543024, "grad_norm": 1.3114076008708018, "learning_rate": 3.135992881186493e-06, "loss": 0.029, "step": 97495 }, { "epoch": 0.4068229423104205, "grad_norm": 1.1228780265907319, "learning_rate": 3.1359124692569434e-06, "loss": 0.0218, "step": 97500 }, { "epoch": 0.4068438050254108, "grad_norm": 0.6437681012923894, "learning_rate": 3.135832063512752e-06, "loss": 0.0275, "step": 97505 }, { "epoch": 0.4068646677404011, "grad_norm": 0.4735844934215337, "learning_rate": 3.1357516639531266e-06, "loss": 0.0236, "step": 97510 }, { "epoch": 0.40688553045539133, "grad_norm": 1.1132918215000076, "learning_rate": 3.135671270577274e-06, "loss": 0.0306, "step": 97515 }, { "epoch": 0.40690639317038163, "grad_norm": 0.5360085258653449, "learning_rate": 3.135590883384401e-06, "loss": 0.0247, "step": 97520 }, { "epoch": 0.4069272558853719, "grad_norm": 0.6782368564271987, "learning_rate": 3.135510502373717e-06, "loss": 0.0271, "step": 97525 }, { "epoch": 0.4069481186003622, "grad_norm": 1.0553436009664394, "learning_rate": 3.135430127544428e-06, "loss": 0.0267, "step": 97530 }, { "epoch": 0.4069689813153525, "grad_norm": 0.8786454538612956, "learning_rate": 3.1353497588957417e-06, "loss": 0.0361, "step": 97535 }, { "epoch": 0.4069898440303427, "grad_norm": 0.5522690207582621, "learning_rate": 3.1352693964268667e-06, "loss": 0.0312, "step": 97540 }, { "epoch": 0.407010706745333, "grad_norm": 0.5839699422805519, "learning_rate": 3.13518904013701e-06, "loss": 0.0351, "step": 97545 }, { "epoch": 0.40703156946032326, "grad_norm": 0.6735954732102645, "learning_rate": 3.135108690025382e-06, "loss": 0.0261, "step": 97550 }, { "epoch": 0.40705243217531356, "grad_norm": 0.7071792630965591, "learning_rate": 3.1350283460911884e-06, "loss": 0.0298, "step": 97555 }, { "epoch": 0.40707329489030386, "grad_norm": 0.9435000439040867, "learning_rate": 3.13494800833364e-06, "loss": 0.0218, "step": 97560 }, { "epoch": 0.4070941576052941, "grad_norm": 1.0149495108847753, "learning_rate": 3.1348676767519437e-06, "loss": 0.0238, "step": 97565 }, { "epoch": 0.4071150203202844, "grad_norm": 0.47245163397680023, "learning_rate": 3.1347873513453087e-06, "loss": 0.0175, "step": 97570 }, { "epoch": 0.4071358830352747, "grad_norm": 0.6942174382502231, "learning_rate": 3.1347070321129446e-06, "loss": 0.0253, "step": 97575 }, { "epoch": 0.40715674575026495, "grad_norm": 0.8297637636021017, "learning_rate": 3.1346267190540603e-06, "loss": 0.0306, "step": 97580 }, { "epoch": 0.40717760846525525, "grad_norm": 1.0173780148576048, "learning_rate": 3.134546412167865e-06, "loss": 0.023, "step": 97585 }, { "epoch": 0.4071984711802455, "grad_norm": 0.543419670727289, "learning_rate": 3.1344661114535673e-06, "loss": 0.0265, "step": 97590 }, { "epoch": 0.4072193338952358, "grad_norm": 0.7303544224038083, "learning_rate": 3.134385816910377e-06, "loss": 0.034, "step": 97595 }, { "epoch": 0.4072401966102261, "grad_norm": 0.46203231550821294, "learning_rate": 3.134305528537504e-06, "loss": 0.0144, "step": 97600 }, { "epoch": 0.40726105932521633, "grad_norm": 0.9919599341497033, "learning_rate": 3.1342252463341578e-06, "loss": 0.0302, "step": 97605 }, { "epoch": 0.40728192204020663, "grad_norm": 0.5876384478408218, "learning_rate": 3.134144970299549e-06, "loss": 0.0232, "step": 97610 }, { "epoch": 0.4073027847551969, "grad_norm": 1.0105289500389398, "learning_rate": 3.134064700432887e-06, "loss": 0.0312, "step": 97615 }, { "epoch": 0.4073236474701872, "grad_norm": 0.8031798090468627, "learning_rate": 3.133984436733381e-06, "loss": 0.0311, "step": 97620 }, { "epoch": 0.4073445101851775, "grad_norm": 0.819172320593021, "learning_rate": 3.133904179200244e-06, "loss": 0.031, "step": 97625 }, { "epoch": 0.4073653729001677, "grad_norm": 1.156356425420966, "learning_rate": 3.133823927832684e-06, "loss": 0.0367, "step": 97630 }, { "epoch": 0.407386235615158, "grad_norm": 0.43023658892991534, "learning_rate": 3.133743682629913e-06, "loss": 0.0241, "step": 97635 }, { "epoch": 0.40740709833014827, "grad_norm": 0.5994481286882226, "learning_rate": 3.1336634435911407e-06, "loss": 0.0182, "step": 97640 }, { "epoch": 0.40742796104513856, "grad_norm": 0.42424826972679264, "learning_rate": 3.1335832107155788e-06, "loss": 0.0279, "step": 97645 }, { "epoch": 0.40744882376012886, "grad_norm": 0.3856613746047731, "learning_rate": 3.133502984002438e-06, "loss": 0.0238, "step": 97650 }, { "epoch": 0.4074696864751191, "grad_norm": 0.7603194546211498, "learning_rate": 3.1334227634509303e-06, "loss": 0.0256, "step": 97655 }, { "epoch": 0.4074905491901094, "grad_norm": 0.9006192239225269, "learning_rate": 3.1333425490602658e-06, "loss": 0.0259, "step": 97660 }, { "epoch": 0.4075114119050997, "grad_norm": 1.3198558253134098, "learning_rate": 3.1332623408296564e-06, "loss": 0.0399, "step": 97665 }, { "epoch": 0.40753227462008995, "grad_norm": 0.6805980173535191, "learning_rate": 3.133182138758314e-06, "loss": 0.028, "step": 97670 }, { "epoch": 0.40755313733508025, "grad_norm": 1.1576167605281313, "learning_rate": 3.13310194284545e-06, "loss": 0.026, "step": 97675 }, { "epoch": 0.4075740000500705, "grad_norm": 1.0723567200857922, "learning_rate": 3.1330217530902763e-06, "loss": 0.0332, "step": 97680 }, { "epoch": 0.4075948627650608, "grad_norm": 0.37690779202889707, "learning_rate": 3.132941569492006e-06, "loss": 0.0179, "step": 97685 }, { "epoch": 0.4076157254800511, "grad_norm": 0.7863836722201258, "learning_rate": 3.132861392049849e-06, "loss": 0.0243, "step": 97690 }, { "epoch": 0.40763658819504134, "grad_norm": 0.6138086503172638, "learning_rate": 3.13278122076302e-06, "loss": 0.0271, "step": 97695 }, { "epoch": 0.40765745091003164, "grad_norm": 1.003420432344705, "learning_rate": 3.1327010556307297e-06, "loss": 0.0247, "step": 97700 }, { "epoch": 0.4076783136250219, "grad_norm": 0.9585945977449991, "learning_rate": 3.1326208966521915e-06, "loss": 0.0362, "step": 97705 }, { "epoch": 0.4076991763400122, "grad_norm": 0.5740069615677716, "learning_rate": 3.1325407438266188e-06, "loss": 0.0304, "step": 97710 }, { "epoch": 0.4077200390550025, "grad_norm": 0.8022009433011248, "learning_rate": 3.1324605971532225e-06, "loss": 0.0353, "step": 97715 }, { "epoch": 0.4077409017699927, "grad_norm": 0.842412764491188, "learning_rate": 3.1323804566312175e-06, "loss": 0.03, "step": 97720 }, { "epoch": 0.407761764484983, "grad_norm": 0.9012962849973405, "learning_rate": 3.1323003222598157e-06, "loss": 0.0313, "step": 97725 }, { "epoch": 0.40778262719997327, "grad_norm": 0.7834917162626892, "learning_rate": 3.1322201940382317e-06, "loss": 0.0302, "step": 97730 }, { "epoch": 0.40780348991496357, "grad_norm": 1.0200754183063958, "learning_rate": 3.132140071965679e-06, "loss": 0.0229, "step": 97735 }, { "epoch": 0.40782435262995387, "grad_norm": 0.37068081326993185, "learning_rate": 3.1320599560413687e-06, "loss": 0.0293, "step": 97740 }, { "epoch": 0.4078452153449441, "grad_norm": 0.9097703924117934, "learning_rate": 3.1319798462645167e-06, "loss": 0.0308, "step": 97745 }, { "epoch": 0.4078660780599344, "grad_norm": 0.6165685745868286, "learning_rate": 3.1318997426343368e-06, "loss": 0.0254, "step": 97750 }, { "epoch": 0.4078869407749247, "grad_norm": 0.8268164127298037, "learning_rate": 3.131819645150042e-06, "loss": 0.0285, "step": 97755 }, { "epoch": 0.40790780348991496, "grad_norm": 0.6202425458974306, "learning_rate": 3.1317395538108475e-06, "loss": 0.0217, "step": 97760 }, { "epoch": 0.40792866620490525, "grad_norm": 0.5914093036077976, "learning_rate": 3.131659468615967e-06, "loss": 0.0189, "step": 97765 }, { "epoch": 0.4079495289198955, "grad_norm": 0.7699334391584143, "learning_rate": 3.131579389564615e-06, "loss": 0.0234, "step": 97770 }, { "epoch": 0.4079703916348858, "grad_norm": 0.7564076586908506, "learning_rate": 3.131499316656006e-06, "loss": 0.0249, "step": 97775 }, { "epoch": 0.4079912543498761, "grad_norm": 0.7156264912850898, "learning_rate": 3.1314192498893546e-06, "loss": 0.025, "step": 97780 }, { "epoch": 0.40801211706486634, "grad_norm": 1.1299889981627562, "learning_rate": 3.1313391892638763e-06, "loss": 0.0283, "step": 97785 }, { "epoch": 0.40803297977985664, "grad_norm": 0.5875328931840821, "learning_rate": 3.1312591347787856e-06, "loss": 0.0205, "step": 97790 }, { "epoch": 0.4080538424948469, "grad_norm": 0.9867724097272343, "learning_rate": 3.131179086433298e-06, "loss": 0.0299, "step": 97795 }, { "epoch": 0.4080747052098372, "grad_norm": 0.9019079292277933, "learning_rate": 3.131099044226628e-06, "loss": 0.0267, "step": 97800 }, { "epoch": 0.4080955679248275, "grad_norm": 0.6141444059637686, "learning_rate": 3.1310190081579912e-06, "loss": 0.0215, "step": 97805 }, { "epoch": 0.40811643063981773, "grad_norm": 0.5423764163373102, "learning_rate": 3.130938978226603e-06, "loss": 0.0208, "step": 97810 }, { "epoch": 0.40813729335480803, "grad_norm": 0.5363198033684627, "learning_rate": 3.13085895443168e-06, "loss": 0.0264, "step": 97815 }, { "epoch": 0.4081581560697983, "grad_norm": 0.8845689438674338, "learning_rate": 3.1307789367724386e-06, "loss": 0.0297, "step": 97820 }, { "epoch": 0.40817901878478857, "grad_norm": 1.1784308489670416, "learning_rate": 3.1306989252480923e-06, "loss": 0.0302, "step": 97825 }, { "epoch": 0.40819988149977887, "grad_norm": 1.2284115483949607, "learning_rate": 3.130618919857859e-06, "loss": 0.0343, "step": 97830 }, { "epoch": 0.4082207442147691, "grad_norm": 0.950860517853473, "learning_rate": 3.130538920600954e-06, "loss": 0.046, "step": 97835 }, { "epoch": 0.4082416069297594, "grad_norm": 1.2001450553707933, "learning_rate": 3.130458927476595e-06, "loss": 0.0347, "step": 97840 }, { "epoch": 0.4082624696447497, "grad_norm": 0.5708387159743017, "learning_rate": 3.1303789404839976e-06, "loss": 0.0277, "step": 97845 }, { "epoch": 0.40828333235973996, "grad_norm": 1.1769563461680679, "learning_rate": 3.1302989596223787e-06, "loss": 0.0222, "step": 97850 }, { "epoch": 0.40830419507473026, "grad_norm": 0.5436959257471572, "learning_rate": 3.1302189848909554e-06, "loss": 0.0279, "step": 97855 }, { "epoch": 0.4083250577897205, "grad_norm": 0.6310612104137001, "learning_rate": 3.130139016288943e-06, "loss": 0.0223, "step": 97860 }, { "epoch": 0.4083459205047108, "grad_norm": 0.7210718533806029, "learning_rate": 3.1300590538155605e-06, "loss": 0.0216, "step": 97865 }, { "epoch": 0.4083667832197011, "grad_norm": 0.3245965239207366, "learning_rate": 3.1299790974700245e-06, "loss": 0.0171, "step": 97870 }, { "epoch": 0.40838764593469135, "grad_norm": 1.1900177544754917, "learning_rate": 3.1298991472515526e-06, "loss": 0.0221, "step": 97875 }, { "epoch": 0.40840850864968165, "grad_norm": 0.7495802896133607, "learning_rate": 3.129819203159363e-06, "loss": 0.0201, "step": 97880 }, { "epoch": 0.4084293713646719, "grad_norm": 1.7924989484057392, "learning_rate": 3.1297392651926707e-06, "loss": 0.0351, "step": 97885 }, { "epoch": 0.4084502340796622, "grad_norm": 0.9716466281907062, "learning_rate": 3.1296593333506963e-06, "loss": 0.0262, "step": 97890 }, { "epoch": 0.4084710967946525, "grad_norm": 0.623040162604147, "learning_rate": 3.129579407632656e-06, "loss": 0.0255, "step": 97895 }, { "epoch": 0.40849195950964273, "grad_norm": 0.7208123200315492, "learning_rate": 3.1294994880377693e-06, "loss": 0.0267, "step": 97900 }, { "epoch": 0.40851282222463303, "grad_norm": 0.7056948899676463, "learning_rate": 3.129419574565253e-06, "loss": 0.0242, "step": 97905 }, { "epoch": 0.4085336849396233, "grad_norm": 1.7288921615401984, "learning_rate": 3.1293396672143267e-06, "loss": 0.0289, "step": 97910 }, { "epoch": 0.4085545476546136, "grad_norm": 0.8669882838378657, "learning_rate": 3.1292597659842077e-06, "loss": 0.0287, "step": 97915 }, { "epoch": 0.4085754103696039, "grad_norm": 1.0463714789101861, "learning_rate": 3.1291798708741156e-06, "loss": 0.0285, "step": 97920 }, { "epoch": 0.4085962730845941, "grad_norm": 0.4288865935065338, "learning_rate": 3.1290999818832687e-06, "loss": 0.0209, "step": 97925 }, { "epoch": 0.4086171357995844, "grad_norm": 0.7898810778886428, "learning_rate": 3.129020099010886e-06, "loss": 0.0282, "step": 97930 }, { "epoch": 0.4086379985145747, "grad_norm": 1.0448262975643838, "learning_rate": 3.128940222256186e-06, "loss": 0.0234, "step": 97935 }, { "epoch": 0.40865886122956496, "grad_norm": 0.40582338803201723, "learning_rate": 3.128860351618389e-06, "loss": 0.0266, "step": 97940 }, { "epoch": 0.40867972394455526, "grad_norm": 0.671131049844293, "learning_rate": 3.128780487096714e-06, "loss": 0.0303, "step": 97945 }, { "epoch": 0.4087005866595455, "grad_norm": 0.6598830812922769, "learning_rate": 3.1287006286903797e-06, "loss": 0.0248, "step": 97950 }, { "epoch": 0.4087214493745358, "grad_norm": 0.9808647725619029, "learning_rate": 3.1286207763986065e-06, "loss": 0.0279, "step": 97955 }, { "epoch": 0.4087423120895261, "grad_norm": 0.5923803449306628, "learning_rate": 3.1285409302206138e-06, "loss": 0.0257, "step": 97960 }, { "epoch": 0.40876317480451635, "grad_norm": 0.549059169355055, "learning_rate": 3.128461090155621e-06, "loss": 0.0329, "step": 97965 }, { "epoch": 0.40878403751950665, "grad_norm": 0.8566069132579781, "learning_rate": 3.1283812562028493e-06, "loss": 0.027, "step": 97970 }, { "epoch": 0.4088049002344969, "grad_norm": 0.8453258780144948, "learning_rate": 3.128301428361518e-06, "loss": 0.0371, "step": 97975 }, { "epoch": 0.4088257629494872, "grad_norm": 0.9174937627342513, "learning_rate": 3.128221606630848e-06, "loss": 0.0355, "step": 97980 }, { "epoch": 0.4088466256644775, "grad_norm": 2.8978328831689564, "learning_rate": 3.128141791010059e-06, "loss": 0.0206, "step": 97985 }, { "epoch": 0.40886748837946774, "grad_norm": 0.9675164852795356, "learning_rate": 3.128061981498372e-06, "loss": 0.0258, "step": 97990 }, { "epoch": 0.40888835109445804, "grad_norm": 0.6010564088417204, "learning_rate": 3.127982178095008e-06, "loss": 0.0224, "step": 97995 }, { "epoch": 0.4089092138094483, "grad_norm": 0.7118482990124968, "learning_rate": 3.1279023807991878e-06, "loss": 0.0213, "step": 98000 }, { "epoch": 0.4089300765244386, "grad_norm": 0.6288566149941349, "learning_rate": 3.127822589610131e-06, "loss": 0.0211, "step": 98005 }, { "epoch": 0.4089509392394289, "grad_norm": 0.9461392973537827, "learning_rate": 3.1277428045270607e-06, "loss": 0.0391, "step": 98010 }, { "epoch": 0.4089718019544191, "grad_norm": 0.5510624452348578, "learning_rate": 3.1276630255491973e-06, "loss": 0.019, "step": 98015 }, { "epoch": 0.4089926646694094, "grad_norm": 0.7723851336060857, "learning_rate": 3.127583252675762e-06, "loss": 0.028, "step": 98020 }, { "epoch": 0.4090135273843997, "grad_norm": 0.8951417542279523, "learning_rate": 3.1275034859059767e-06, "loss": 0.0277, "step": 98025 }, { "epoch": 0.40903439009938997, "grad_norm": 0.8689740039844148, "learning_rate": 3.127423725239063e-06, "loss": 0.0361, "step": 98030 }, { "epoch": 0.40905525281438027, "grad_norm": 0.6894575450866155, "learning_rate": 3.127343970674243e-06, "loss": 0.0278, "step": 98035 }, { "epoch": 0.4090761155293705, "grad_norm": 0.6866029973673289, "learning_rate": 3.1272642222107385e-06, "loss": 0.0201, "step": 98040 }, { "epoch": 0.4090969782443608, "grad_norm": 1.0505713283529923, "learning_rate": 3.127184479847772e-06, "loss": 0.031, "step": 98045 }, { "epoch": 0.4091178409593511, "grad_norm": 0.5118405301115113, "learning_rate": 3.127104743584564e-06, "loss": 0.0227, "step": 98050 }, { "epoch": 0.40913870367434135, "grad_norm": 0.5582062289902224, "learning_rate": 3.127025013420339e-06, "loss": 0.0295, "step": 98055 }, { "epoch": 0.40915956638933165, "grad_norm": 0.6632698949418669, "learning_rate": 3.126945289354319e-06, "loss": 0.0266, "step": 98060 }, { "epoch": 0.4091804291043219, "grad_norm": 0.48137681874852456, "learning_rate": 3.1268655713857255e-06, "loss": 0.0243, "step": 98065 }, { "epoch": 0.4092012918193122, "grad_norm": 0.9254920103539965, "learning_rate": 3.126785859513782e-06, "loss": 0.0247, "step": 98070 }, { "epoch": 0.4092221545343025, "grad_norm": 0.9032096755905313, "learning_rate": 3.1267061537377126e-06, "loss": 0.0265, "step": 98075 }, { "epoch": 0.40924301724929274, "grad_norm": 1.017819977593881, "learning_rate": 3.126626454056739e-06, "loss": 0.0291, "step": 98080 }, { "epoch": 0.40926387996428304, "grad_norm": 0.7108402772859699, "learning_rate": 3.1265467604700845e-06, "loss": 0.0332, "step": 98085 }, { "epoch": 0.4092847426792733, "grad_norm": 1.0711232500985277, "learning_rate": 3.126467072976973e-06, "loss": 0.0225, "step": 98090 }, { "epoch": 0.4093056053942636, "grad_norm": 0.6880033424326514, "learning_rate": 3.1263873915766278e-06, "loss": 0.017, "step": 98095 }, { "epoch": 0.4093264681092539, "grad_norm": 0.681269584644866, "learning_rate": 3.1263077162682728e-06, "loss": 0.0302, "step": 98100 }, { "epoch": 0.4093473308242441, "grad_norm": 0.4816131995953612, "learning_rate": 3.1262280470511307e-06, "loss": 0.0211, "step": 98105 }, { "epoch": 0.4093681935392344, "grad_norm": 0.7421163364335661, "learning_rate": 3.1261483839244266e-06, "loss": 0.027, "step": 98110 }, { "epoch": 0.4093890562542247, "grad_norm": 0.648664464889339, "learning_rate": 3.1260687268873846e-06, "loss": 0.0229, "step": 98115 }, { "epoch": 0.40940991896921497, "grad_norm": 2.017152213168829, "learning_rate": 3.125989075939228e-06, "loss": 0.0375, "step": 98120 }, { "epoch": 0.40943078168420527, "grad_norm": 1.007055604594676, "learning_rate": 3.1259094310791815e-06, "loss": 0.0291, "step": 98125 }, { "epoch": 0.4094516443991955, "grad_norm": 0.6434600820958868, "learning_rate": 3.12582979230647e-06, "loss": 0.0255, "step": 98130 }, { "epoch": 0.4094725071141858, "grad_norm": 1.650248715338596, "learning_rate": 3.1257501596203173e-06, "loss": 0.0304, "step": 98135 }, { "epoch": 0.4094933698291761, "grad_norm": 1.1267563451023455, "learning_rate": 3.1256705330199486e-06, "loss": 0.0296, "step": 98140 }, { "epoch": 0.40951423254416636, "grad_norm": 0.9642686070312233, "learning_rate": 3.1255909125045892e-06, "loss": 0.0334, "step": 98145 }, { "epoch": 0.40953509525915666, "grad_norm": 0.671551808539916, "learning_rate": 3.1255112980734633e-06, "loss": 0.0279, "step": 98150 }, { "epoch": 0.4095559579741469, "grad_norm": 0.4626028187807243, "learning_rate": 3.1254316897257965e-06, "loss": 0.0381, "step": 98155 }, { "epoch": 0.4095768206891372, "grad_norm": 0.47252590035005265, "learning_rate": 3.125352087460814e-06, "loss": 0.0235, "step": 98160 }, { "epoch": 0.4095976834041275, "grad_norm": 0.617878847453925, "learning_rate": 3.125272491277741e-06, "loss": 0.026, "step": 98165 }, { "epoch": 0.40961854611911774, "grad_norm": 0.643324643224863, "learning_rate": 3.1251929011758035e-06, "loss": 0.0262, "step": 98170 }, { "epoch": 0.40963940883410804, "grad_norm": 0.4925551091705905, "learning_rate": 3.1251133171542276e-06, "loss": 0.025, "step": 98175 }, { "epoch": 0.4096602715490983, "grad_norm": 0.8376024704071371, "learning_rate": 3.1250337392122388e-06, "loss": 0.0318, "step": 98180 }, { "epoch": 0.4096811342640886, "grad_norm": 1.2422577230144793, "learning_rate": 3.1249541673490614e-06, "loss": 0.0324, "step": 98185 }, { "epoch": 0.4097019969790789, "grad_norm": 0.696394805154879, "learning_rate": 3.124874601563924e-06, "loss": 0.0308, "step": 98190 }, { "epoch": 0.40972285969406913, "grad_norm": 0.6624358951663325, "learning_rate": 3.124795041856052e-06, "loss": 0.0306, "step": 98195 }, { "epoch": 0.40974372240905943, "grad_norm": 0.8621906483056662, "learning_rate": 3.1247154882246714e-06, "loss": 0.0234, "step": 98200 }, { "epoch": 0.40976458512404973, "grad_norm": 0.5472460371922798, "learning_rate": 3.124635940669009e-06, "loss": 0.0184, "step": 98205 }, { "epoch": 0.40978544783904, "grad_norm": 0.4921850974847001, "learning_rate": 3.1245563991882916e-06, "loss": 0.0221, "step": 98210 }, { "epoch": 0.4098063105540303, "grad_norm": 0.6761828072715453, "learning_rate": 3.1244768637817453e-06, "loss": 0.0204, "step": 98215 }, { "epoch": 0.4098271732690205, "grad_norm": 0.34021426026697105, "learning_rate": 3.1243973344485984e-06, "loss": 0.0248, "step": 98220 }, { "epoch": 0.4098480359840108, "grad_norm": 0.8184919826121348, "learning_rate": 3.1243178111880767e-06, "loss": 0.0264, "step": 98225 }, { "epoch": 0.4098688986990011, "grad_norm": 0.8012985713916879, "learning_rate": 3.1242382939994082e-06, "loss": 0.0264, "step": 98230 }, { "epoch": 0.40988976141399136, "grad_norm": 0.6664897998355568, "learning_rate": 3.12415878288182e-06, "loss": 0.0235, "step": 98235 }, { "epoch": 0.40991062412898166, "grad_norm": 0.8055287357897886, "learning_rate": 3.1240792778345396e-06, "loss": 0.0336, "step": 98240 }, { "epoch": 0.4099314868439719, "grad_norm": 0.964355366783336, "learning_rate": 3.123999778856795e-06, "loss": 0.0223, "step": 98245 }, { "epoch": 0.4099523495589622, "grad_norm": 0.5582577746422902, "learning_rate": 3.1239202859478134e-06, "loss": 0.026, "step": 98250 }, { "epoch": 0.4099732122739525, "grad_norm": 0.6862385550091871, "learning_rate": 3.1238407991068226e-06, "loss": 0.0247, "step": 98255 }, { "epoch": 0.40999407498894275, "grad_norm": 0.5185898717649376, "learning_rate": 3.1237613183330514e-06, "loss": 0.0217, "step": 98260 }, { "epoch": 0.41001493770393305, "grad_norm": 0.6407979224215133, "learning_rate": 3.123681843625727e-06, "loss": 0.0266, "step": 98265 }, { "epoch": 0.4100358004189233, "grad_norm": 1.402958073430715, "learning_rate": 3.1236023749840786e-06, "loss": 0.0308, "step": 98270 }, { "epoch": 0.4100566631339136, "grad_norm": 0.5017763934858673, "learning_rate": 3.123522912407335e-06, "loss": 0.0353, "step": 98275 }, { "epoch": 0.4100775258489039, "grad_norm": 0.5005027140606259, "learning_rate": 3.1234434558947237e-06, "loss": 0.0286, "step": 98280 }, { "epoch": 0.41009838856389413, "grad_norm": 1.0766472750587912, "learning_rate": 3.123364005445474e-06, "loss": 0.0285, "step": 98285 }, { "epoch": 0.41011925127888443, "grad_norm": 1.1464456554191296, "learning_rate": 3.123284561058814e-06, "loss": 0.0271, "step": 98290 }, { "epoch": 0.41014011399387473, "grad_norm": 0.566932710202551, "learning_rate": 3.123205122733975e-06, "loss": 0.0216, "step": 98295 }, { "epoch": 0.410160976708865, "grad_norm": 0.4691614617942717, "learning_rate": 3.1231256904701834e-06, "loss": 0.0282, "step": 98300 }, { "epoch": 0.4101818394238553, "grad_norm": 0.7096124437717266, "learning_rate": 3.12304626426667e-06, "loss": 0.024, "step": 98305 }, { "epoch": 0.4102027021388455, "grad_norm": 0.8218200777120258, "learning_rate": 3.1229668441226635e-06, "loss": 0.0206, "step": 98310 }, { "epoch": 0.4102235648538358, "grad_norm": 0.7771180042437263, "learning_rate": 3.122887430037394e-06, "loss": 0.024, "step": 98315 }, { "epoch": 0.4102444275688261, "grad_norm": 0.5928647489106593, "learning_rate": 3.1228080220100916e-06, "loss": 0.0266, "step": 98320 }, { "epoch": 0.41026529028381636, "grad_norm": 0.5321892096476711, "learning_rate": 3.122728620039985e-06, "loss": 0.0251, "step": 98325 }, { "epoch": 0.41028615299880666, "grad_norm": 0.6318672120404689, "learning_rate": 3.1226492241263047e-06, "loss": 0.0274, "step": 98330 }, { "epoch": 0.4103070157137969, "grad_norm": 0.3880648047264829, "learning_rate": 3.1225698342682808e-06, "loss": 0.0234, "step": 98335 }, { "epoch": 0.4103278784287872, "grad_norm": 0.5310369686107189, "learning_rate": 3.122490450465144e-06, "loss": 0.0229, "step": 98340 }, { "epoch": 0.4103487411437775, "grad_norm": 0.6367862683243051, "learning_rate": 3.1224110727161243e-06, "loss": 0.025, "step": 98345 }, { "epoch": 0.41036960385876775, "grad_norm": 0.34744050570741375, "learning_rate": 3.122331701020452e-06, "loss": 0.0211, "step": 98350 }, { "epoch": 0.41039046657375805, "grad_norm": 0.2693634625017888, "learning_rate": 3.1222523353773576e-06, "loss": 0.0253, "step": 98355 }, { "epoch": 0.4104113292887483, "grad_norm": 1.1202473262780348, "learning_rate": 3.122172975786073e-06, "loss": 0.0356, "step": 98360 }, { "epoch": 0.4104321920037386, "grad_norm": 0.7055255662653763, "learning_rate": 3.122093622245828e-06, "loss": 0.0282, "step": 98365 }, { "epoch": 0.4104530547187289, "grad_norm": 0.9598971431684137, "learning_rate": 3.1220142747558548e-06, "loss": 0.0263, "step": 98370 }, { "epoch": 0.41047391743371914, "grad_norm": 0.9474637726898008, "learning_rate": 3.1219349333153827e-06, "loss": 0.0293, "step": 98375 }, { "epoch": 0.41049478014870944, "grad_norm": 0.656479510192983, "learning_rate": 3.121855597923645e-06, "loss": 0.0265, "step": 98380 }, { "epoch": 0.41051564286369974, "grad_norm": 1.037938241566218, "learning_rate": 3.1217762685798718e-06, "loss": 0.0328, "step": 98385 }, { "epoch": 0.41053650557869, "grad_norm": 0.8028977920421427, "learning_rate": 3.1216969452832956e-06, "loss": 0.0231, "step": 98390 }, { "epoch": 0.4105573682936803, "grad_norm": 0.7149123096484943, "learning_rate": 3.1216176280331485e-06, "loss": 0.025, "step": 98395 }, { "epoch": 0.4105782310086705, "grad_norm": 0.5259357352976668, "learning_rate": 3.1215383168286606e-06, "loss": 0.0259, "step": 98400 }, { "epoch": 0.4105990937236608, "grad_norm": 0.9997125996721479, "learning_rate": 3.1214590116690656e-06, "loss": 0.0275, "step": 98405 }, { "epoch": 0.4106199564386511, "grad_norm": 1.18909794616997, "learning_rate": 3.121379712553595e-06, "loss": 0.0251, "step": 98410 }, { "epoch": 0.41064081915364137, "grad_norm": 0.7917280880223223, "learning_rate": 3.1213004194814823e-06, "loss": 0.0242, "step": 98415 }, { "epoch": 0.41066168186863167, "grad_norm": 0.727549473361791, "learning_rate": 3.1212211324519576e-06, "loss": 0.0263, "step": 98420 }, { "epoch": 0.4106825445836219, "grad_norm": 0.8573326879213037, "learning_rate": 3.121141851464255e-06, "loss": 0.0244, "step": 98425 }, { "epoch": 0.4107034072986122, "grad_norm": 0.8122971095874372, "learning_rate": 3.1210625765176073e-06, "loss": 0.0302, "step": 98430 }, { "epoch": 0.4107242700136025, "grad_norm": 0.9537516727400328, "learning_rate": 3.120983307611247e-06, "loss": 0.0263, "step": 98435 }, { "epoch": 0.41074513272859275, "grad_norm": 0.5431898199365137, "learning_rate": 3.1209040447444068e-06, "loss": 0.0266, "step": 98440 }, { "epoch": 0.41076599544358305, "grad_norm": 1.4915065759271657, "learning_rate": 3.1208247879163207e-06, "loss": 0.028, "step": 98445 }, { "epoch": 0.4107868581585733, "grad_norm": 0.48319021110446253, "learning_rate": 3.12074553712622e-06, "loss": 0.021, "step": 98450 }, { "epoch": 0.4108077208735636, "grad_norm": 0.30234044379559055, "learning_rate": 3.1206662923733405e-06, "loss": 0.0232, "step": 98455 }, { "epoch": 0.4108285835885539, "grad_norm": 0.6818255028152664, "learning_rate": 3.120587053656915e-06, "loss": 0.0272, "step": 98460 }, { "epoch": 0.41084944630354414, "grad_norm": 0.7397800400375958, "learning_rate": 3.1205078209761763e-06, "loss": 0.0299, "step": 98465 }, { "epoch": 0.41087030901853444, "grad_norm": 0.6774872553590592, "learning_rate": 3.120428594330358e-06, "loss": 0.0276, "step": 98470 }, { "epoch": 0.41089117173352474, "grad_norm": 0.6524208442575712, "learning_rate": 3.1203493737186963e-06, "loss": 0.0254, "step": 98475 }, { "epoch": 0.410912034448515, "grad_norm": 0.6331739285834642, "learning_rate": 3.1202701591404227e-06, "loss": 0.0158, "step": 98480 }, { "epoch": 0.4109328971635053, "grad_norm": 0.7132852856203266, "learning_rate": 3.120190950594773e-06, "loss": 0.0276, "step": 98485 }, { "epoch": 0.41095375987849553, "grad_norm": 0.6383755057838415, "learning_rate": 3.1201117480809804e-06, "loss": 0.0311, "step": 98490 }, { "epoch": 0.4109746225934858, "grad_norm": 1.1969311118551011, "learning_rate": 3.1200325515982803e-06, "loss": 0.0263, "step": 98495 }, { "epoch": 0.4109954853084761, "grad_norm": 1.0425742432084781, "learning_rate": 3.1199533611459077e-06, "loss": 0.0288, "step": 98500 }, { "epoch": 0.41101634802346637, "grad_norm": 0.4789624554163349, "learning_rate": 3.1198741767230954e-06, "loss": 0.0222, "step": 98505 }, { "epoch": 0.41103721073845667, "grad_norm": 0.714271463480378, "learning_rate": 3.1197949983290804e-06, "loss": 0.0225, "step": 98510 }, { "epoch": 0.4110580734534469, "grad_norm": 0.9365665791282679, "learning_rate": 3.1197158259630965e-06, "loss": 0.0213, "step": 98515 }, { "epoch": 0.4110789361684372, "grad_norm": 0.6822936065099066, "learning_rate": 3.1196366596243787e-06, "loss": 0.0239, "step": 98520 }, { "epoch": 0.4110997988834275, "grad_norm": 0.8215992256603953, "learning_rate": 3.1195574993121637e-06, "loss": 0.0196, "step": 98525 }, { "epoch": 0.41112066159841776, "grad_norm": 0.44352261705366197, "learning_rate": 3.1194783450256855e-06, "loss": 0.0279, "step": 98530 }, { "epoch": 0.41114152431340806, "grad_norm": 0.5443988590539597, "learning_rate": 3.1193991967641806e-06, "loss": 0.025, "step": 98535 }, { "epoch": 0.4111623870283983, "grad_norm": 0.6560961963457579, "learning_rate": 3.119320054526884e-06, "loss": 0.0192, "step": 98540 }, { "epoch": 0.4111832497433886, "grad_norm": 0.8764019753655926, "learning_rate": 3.1192409183130317e-06, "loss": 0.0306, "step": 98545 }, { "epoch": 0.4112041124583789, "grad_norm": 0.8925260664749048, "learning_rate": 3.11916178812186e-06, "loss": 0.0366, "step": 98550 }, { "epoch": 0.41122497517336914, "grad_norm": 0.7807026595152639, "learning_rate": 3.1190826639526053e-06, "loss": 0.0295, "step": 98555 }, { "epoch": 0.41124583788835944, "grad_norm": 0.6382068651749333, "learning_rate": 3.1190035458045027e-06, "loss": 0.0242, "step": 98560 }, { "epoch": 0.41126670060334974, "grad_norm": 0.9005282561998068, "learning_rate": 3.11892443367679e-06, "loss": 0.0234, "step": 98565 }, { "epoch": 0.41128756331834, "grad_norm": 0.8013939196048989, "learning_rate": 3.118845327568702e-06, "loss": 0.0284, "step": 98570 }, { "epoch": 0.4113084260333303, "grad_norm": 0.9756616799737209, "learning_rate": 3.1187662274794767e-06, "loss": 0.0257, "step": 98575 }, { "epoch": 0.41132928874832053, "grad_norm": 0.5791934729320077, "learning_rate": 3.118687133408351e-06, "loss": 0.0231, "step": 98580 }, { "epoch": 0.41135015146331083, "grad_norm": 0.645778966183312, "learning_rate": 3.1186080453545607e-06, "loss": 0.0303, "step": 98585 }, { "epoch": 0.41137101417830113, "grad_norm": 0.9601922728807665, "learning_rate": 3.118528963317343e-06, "loss": 0.0354, "step": 98590 }, { "epoch": 0.4113918768932914, "grad_norm": 1.348333746276176, "learning_rate": 3.1184498872959367e-06, "loss": 0.0296, "step": 98595 }, { "epoch": 0.4114127396082817, "grad_norm": 0.9312066720138603, "learning_rate": 3.1183708172895777e-06, "loss": 0.0289, "step": 98600 }, { "epoch": 0.4114336023232719, "grad_norm": 0.6851479039834283, "learning_rate": 3.1182917532975037e-06, "loss": 0.0272, "step": 98605 }, { "epoch": 0.4114544650382622, "grad_norm": 0.5389299591885618, "learning_rate": 3.1182126953189524e-06, "loss": 0.031, "step": 98610 }, { "epoch": 0.4114753277532525, "grad_norm": 0.9481703041751194, "learning_rate": 3.118133643353161e-06, "loss": 0.0313, "step": 98615 }, { "epoch": 0.41149619046824276, "grad_norm": 0.44682158571915376, "learning_rate": 3.1180545973993686e-06, "loss": 0.0223, "step": 98620 }, { "epoch": 0.41151705318323306, "grad_norm": 0.9048167385107497, "learning_rate": 3.1179755574568123e-06, "loss": 0.0329, "step": 98625 }, { "epoch": 0.4115379158982233, "grad_norm": 0.7305259162541318, "learning_rate": 3.1178965235247306e-06, "loss": 0.025, "step": 98630 }, { "epoch": 0.4115587786132136, "grad_norm": 0.6123376656599995, "learning_rate": 3.1178174956023616e-06, "loss": 0.0327, "step": 98635 }, { "epoch": 0.4115796413282039, "grad_norm": 0.6998170416901462, "learning_rate": 3.117738473688943e-06, "loss": 0.0303, "step": 98640 }, { "epoch": 0.41160050404319415, "grad_norm": 0.6640142337731112, "learning_rate": 3.1176594577837143e-06, "loss": 0.0232, "step": 98645 }, { "epoch": 0.41162136675818445, "grad_norm": 0.6109851589425256, "learning_rate": 3.117580447885914e-06, "loss": 0.0258, "step": 98650 }, { "epoch": 0.41164222947317475, "grad_norm": 1.1133423923100778, "learning_rate": 3.1175014439947814e-06, "loss": 0.0367, "step": 98655 }, { "epoch": 0.411663092188165, "grad_norm": 0.5642050921579572, "learning_rate": 3.1174224461095547e-06, "loss": 0.0255, "step": 98660 }, { "epoch": 0.4116839549031553, "grad_norm": 1.019592657838943, "learning_rate": 3.1173434542294727e-06, "loss": 0.0235, "step": 98665 }, { "epoch": 0.41170481761814554, "grad_norm": 1.302904945359213, "learning_rate": 3.1172644683537757e-06, "loss": 0.0274, "step": 98670 }, { "epoch": 0.41172568033313584, "grad_norm": 0.585212513982647, "learning_rate": 3.117185488481702e-06, "loss": 0.0311, "step": 98675 }, { "epoch": 0.41174654304812613, "grad_norm": 0.5669245685519139, "learning_rate": 3.1171065146124924e-06, "loss": 0.0282, "step": 98680 }, { "epoch": 0.4117674057631164, "grad_norm": 0.49484760293995467, "learning_rate": 3.117027546745385e-06, "loss": 0.0241, "step": 98685 }, { "epoch": 0.4117882684781067, "grad_norm": 1.960748308279079, "learning_rate": 3.1169485848796207e-06, "loss": 0.0407, "step": 98690 }, { "epoch": 0.4118091311930969, "grad_norm": 0.6923292094547401, "learning_rate": 3.1168696290144385e-06, "loss": 0.0207, "step": 98695 }, { "epoch": 0.4118299939080872, "grad_norm": 0.7562093453706671, "learning_rate": 3.1167906791490788e-06, "loss": 0.0252, "step": 98700 }, { "epoch": 0.4118508566230775, "grad_norm": 0.403120466244423, "learning_rate": 3.116711735282782e-06, "loss": 0.022, "step": 98705 }, { "epoch": 0.41187171933806777, "grad_norm": 0.7320533831016756, "learning_rate": 3.116632797414788e-06, "loss": 0.0256, "step": 98710 }, { "epoch": 0.41189258205305807, "grad_norm": 0.6905059633754861, "learning_rate": 3.116553865544338e-06, "loss": 0.0301, "step": 98715 }, { "epoch": 0.4119134447680483, "grad_norm": 0.5738961599837755, "learning_rate": 3.1164749396706718e-06, "loss": 0.0256, "step": 98720 }, { "epoch": 0.4119343074830386, "grad_norm": 0.6464005599315238, "learning_rate": 3.1163960197930304e-06, "loss": 0.0182, "step": 98725 }, { "epoch": 0.4119551701980289, "grad_norm": 0.873424176446896, "learning_rate": 3.1163171059106547e-06, "loss": 0.0298, "step": 98730 }, { "epoch": 0.41197603291301915, "grad_norm": 0.280849273006706, "learning_rate": 3.116238198022785e-06, "loss": 0.018, "step": 98735 }, { "epoch": 0.41199689562800945, "grad_norm": 0.5864755218587404, "learning_rate": 3.1161592961286635e-06, "loss": 0.0342, "step": 98740 }, { "epoch": 0.41201775834299975, "grad_norm": 0.46154650450825635, "learning_rate": 3.1160804002275306e-06, "loss": 0.0183, "step": 98745 }, { "epoch": 0.41203862105799, "grad_norm": 0.48242700715233794, "learning_rate": 3.1160015103186285e-06, "loss": 0.021, "step": 98750 }, { "epoch": 0.4120594837729803, "grad_norm": 0.34909218444680634, "learning_rate": 3.1159226264011982e-06, "loss": 0.0251, "step": 98755 }, { "epoch": 0.41208034648797054, "grad_norm": 0.8280994226258741, "learning_rate": 3.115843748474481e-06, "loss": 0.024, "step": 98760 }, { "epoch": 0.41210120920296084, "grad_norm": 1.035561546380131, "learning_rate": 3.1157648765377185e-06, "loss": 0.0236, "step": 98765 }, { "epoch": 0.41212207191795114, "grad_norm": 1.0244215911418377, "learning_rate": 3.1156860105901545e-06, "loss": 0.0275, "step": 98770 }, { "epoch": 0.4121429346329414, "grad_norm": 0.9994981255096829, "learning_rate": 3.1156071506310287e-06, "loss": 0.0215, "step": 98775 }, { "epoch": 0.4121637973479317, "grad_norm": 0.8133498342438711, "learning_rate": 3.115528296659584e-06, "loss": 0.0682, "step": 98780 }, { "epoch": 0.4121846600629219, "grad_norm": 0.7928998086258546, "learning_rate": 3.1154494486750637e-06, "loss": 0.0193, "step": 98785 }, { "epoch": 0.4122055227779122, "grad_norm": 0.9974701135049381, "learning_rate": 3.1153706066767096e-06, "loss": 0.0348, "step": 98790 }, { "epoch": 0.4122263854929025, "grad_norm": 0.831334378728951, "learning_rate": 3.1152917706637635e-06, "loss": 0.0259, "step": 98795 }, { "epoch": 0.41224724820789277, "grad_norm": 0.31874120291725727, "learning_rate": 3.1152129406354697e-06, "loss": 0.0208, "step": 98800 }, { "epoch": 0.41226811092288307, "grad_norm": 0.7248646259966239, "learning_rate": 3.11513411659107e-06, "loss": 0.0259, "step": 98805 }, { "epoch": 0.4122889736378733, "grad_norm": 0.7402821884829289, "learning_rate": 3.1150552985298067e-06, "loss": 0.0151, "step": 98810 }, { "epoch": 0.4123098363528636, "grad_norm": 0.8429981208445217, "learning_rate": 3.1149764864509246e-06, "loss": 0.0192, "step": 98815 }, { "epoch": 0.4123306990678539, "grad_norm": 0.672642719585222, "learning_rate": 3.1148976803536656e-06, "loss": 0.0245, "step": 98820 }, { "epoch": 0.41235156178284416, "grad_norm": 1.4266102625410606, "learning_rate": 3.1148188802372744e-06, "loss": 0.028, "step": 98825 }, { "epoch": 0.41237242449783446, "grad_norm": 0.5417963247877038, "learning_rate": 3.1147400861009934e-06, "loss": 0.032, "step": 98830 }, { "epoch": 0.41239328721282476, "grad_norm": 0.7075759271354272, "learning_rate": 3.1146612979440666e-06, "loss": 0.0319, "step": 98835 }, { "epoch": 0.412414149927815, "grad_norm": 0.811726937880997, "learning_rate": 3.1145825157657385e-06, "loss": 0.0383, "step": 98840 }, { "epoch": 0.4124350126428053, "grad_norm": 0.764042497120039, "learning_rate": 3.114503739565251e-06, "loss": 0.0325, "step": 98845 }, { "epoch": 0.41245587535779554, "grad_norm": 0.7105947361767729, "learning_rate": 3.1144249693418503e-06, "loss": 0.0213, "step": 98850 }, { "epoch": 0.41247673807278584, "grad_norm": 1.002086380973631, "learning_rate": 3.1143462050947802e-06, "loss": 0.0239, "step": 98855 }, { "epoch": 0.41249760078777614, "grad_norm": 0.6498840290095477, "learning_rate": 3.114267446823284e-06, "loss": 0.0265, "step": 98860 }, { "epoch": 0.4125184635027664, "grad_norm": 1.620192330481613, "learning_rate": 3.1141886945266076e-06, "loss": 0.028, "step": 98865 }, { "epoch": 0.4125393262177567, "grad_norm": 0.5172923762287918, "learning_rate": 3.114109948203994e-06, "loss": 0.0218, "step": 98870 }, { "epoch": 0.41256018893274693, "grad_norm": 1.1925976559859106, "learning_rate": 3.1140312078546884e-06, "loss": 0.0313, "step": 98875 }, { "epoch": 0.41258105164773723, "grad_norm": 0.81528834563179, "learning_rate": 3.113952473477937e-06, "loss": 0.0316, "step": 98880 }, { "epoch": 0.41260191436272753, "grad_norm": 0.6819164181384691, "learning_rate": 3.113873745072983e-06, "loss": 0.0272, "step": 98885 }, { "epoch": 0.4126227770777178, "grad_norm": 0.7966241265478913, "learning_rate": 3.1137950226390725e-06, "loss": 0.019, "step": 98890 }, { "epoch": 0.4126436397927081, "grad_norm": 1.0850300545410194, "learning_rate": 3.113716306175451e-06, "loss": 0.0379, "step": 98895 }, { "epoch": 0.4126645025076983, "grad_norm": 0.7815346675311025, "learning_rate": 3.113637595681363e-06, "loss": 0.0267, "step": 98900 }, { "epoch": 0.4126853652226886, "grad_norm": 0.6853253061965736, "learning_rate": 3.113558891156054e-06, "loss": 0.0259, "step": 98905 }, { "epoch": 0.4127062279376789, "grad_norm": 0.645053428089031, "learning_rate": 3.113480192598771e-06, "loss": 0.0176, "step": 98910 }, { "epoch": 0.41272709065266916, "grad_norm": 0.5687809421673315, "learning_rate": 3.1134015000087592e-06, "loss": 0.0163, "step": 98915 }, { "epoch": 0.41274795336765946, "grad_norm": 1.2948909857541449, "learning_rate": 3.113322813385263e-06, "loss": 0.0323, "step": 98920 }, { "epoch": 0.41276881608264976, "grad_norm": 0.323474759599776, "learning_rate": 3.1132441327275315e-06, "loss": 0.0231, "step": 98925 }, { "epoch": 0.41278967879764, "grad_norm": 1.323883939415417, "learning_rate": 3.1131654580348075e-06, "loss": 0.0423, "step": 98930 }, { "epoch": 0.4128105415126303, "grad_norm": 0.9435099862481807, "learning_rate": 3.1130867893063398e-06, "loss": 0.0212, "step": 98935 }, { "epoch": 0.41283140422762055, "grad_norm": 0.41102778043805954, "learning_rate": 3.1130081265413736e-06, "loss": 0.0245, "step": 98940 }, { "epoch": 0.41285226694261085, "grad_norm": 0.8896799527025068, "learning_rate": 3.1129294697391565e-06, "loss": 0.0384, "step": 98945 }, { "epoch": 0.41287312965760115, "grad_norm": 0.573973626775802, "learning_rate": 3.1128508188989344e-06, "loss": 0.026, "step": 98950 }, { "epoch": 0.4128939923725914, "grad_norm": 0.3232166949926638, "learning_rate": 3.112772174019954e-06, "loss": 0.0264, "step": 98955 }, { "epoch": 0.4129148550875817, "grad_norm": 0.7995496863662314, "learning_rate": 3.1126935351014638e-06, "loss": 0.0272, "step": 98960 }, { "epoch": 0.41293571780257193, "grad_norm": 0.8061772178056386, "learning_rate": 3.1126149021427087e-06, "loss": 0.0257, "step": 98965 }, { "epoch": 0.41295658051756223, "grad_norm": 0.4529638741022003, "learning_rate": 3.112536275142938e-06, "loss": 0.0168, "step": 98970 }, { "epoch": 0.41297744323255253, "grad_norm": 0.6777103338080128, "learning_rate": 3.1124576541013972e-06, "loss": 0.0173, "step": 98975 }, { "epoch": 0.4129983059475428, "grad_norm": 0.7395725870355442, "learning_rate": 3.1123790390173357e-06, "loss": 0.0229, "step": 98980 }, { "epoch": 0.4130191686625331, "grad_norm": 0.7151507469351323, "learning_rate": 3.11230042989e-06, "loss": 0.0242, "step": 98985 }, { "epoch": 0.4130400313775233, "grad_norm": 1.047443369127295, "learning_rate": 3.1122218267186382e-06, "loss": 0.0243, "step": 98990 }, { "epoch": 0.4130608940925136, "grad_norm": 0.9355306259733783, "learning_rate": 3.1121432295024983e-06, "loss": 0.031, "step": 98995 }, { "epoch": 0.4130817568075039, "grad_norm": 0.7323442988972937, "learning_rate": 3.112064638240828e-06, "loss": 0.0357, "step": 99000 }, { "epoch": 0.41310261952249416, "grad_norm": 0.4184918240687359, "learning_rate": 3.1119860529328766e-06, "loss": 0.021, "step": 99005 }, { "epoch": 0.41312348223748446, "grad_norm": 1.1538364171268916, "learning_rate": 3.111907473577891e-06, "loss": 0.0353, "step": 99010 }, { "epoch": 0.41314434495247476, "grad_norm": 1.7254365770258175, "learning_rate": 3.11182890017512e-06, "loss": 0.0293, "step": 99015 }, { "epoch": 0.413165207667465, "grad_norm": 0.523398077095362, "learning_rate": 3.111750332723813e-06, "loss": 0.0191, "step": 99020 }, { "epoch": 0.4131860703824553, "grad_norm": 0.5274289255926895, "learning_rate": 3.1116717712232173e-06, "loss": 0.0224, "step": 99025 }, { "epoch": 0.41320693309744555, "grad_norm": 0.4139062902392731, "learning_rate": 3.1115932156725836e-06, "loss": 0.0236, "step": 99030 }, { "epoch": 0.41322779581243585, "grad_norm": 0.9460004347774789, "learning_rate": 3.11151466607116e-06, "loss": 0.029, "step": 99035 }, { "epoch": 0.41324865852742615, "grad_norm": 0.7823552547378356, "learning_rate": 3.111436122418195e-06, "loss": 0.0338, "step": 99040 }, { "epoch": 0.4132695212424164, "grad_norm": 0.774831510496876, "learning_rate": 3.1113575847129386e-06, "loss": 0.033, "step": 99045 }, { "epoch": 0.4132903839574067, "grad_norm": 0.8804281049536399, "learning_rate": 3.11127905295464e-06, "loss": 0.0273, "step": 99050 }, { "epoch": 0.41331124667239694, "grad_norm": 0.8504640054999584, "learning_rate": 3.1112005271425483e-06, "loss": 0.0281, "step": 99055 }, { "epoch": 0.41333210938738724, "grad_norm": 0.9050509701308935, "learning_rate": 3.1111220072759143e-06, "loss": 0.0194, "step": 99060 }, { "epoch": 0.41335297210237754, "grad_norm": 0.7009909135879052, "learning_rate": 3.111043493353987e-06, "loss": 0.0216, "step": 99065 }, { "epoch": 0.4133738348173678, "grad_norm": 0.7773612829147372, "learning_rate": 3.110964985376016e-06, "loss": 0.0298, "step": 99070 }, { "epoch": 0.4133946975323581, "grad_norm": 0.9893779362481545, "learning_rate": 3.1108864833412517e-06, "loss": 0.0236, "step": 99075 }, { "epoch": 0.4134155602473483, "grad_norm": 0.7221741758595119, "learning_rate": 3.110807987248945e-06, "loss": 0.0206, "step": 99080 }, { "epoch": 0.4134364229623386, "grad_norm": 1.0371218012527355, "learning_rate": 3.1107294970983442e-06, "loss": 0.028, "step": 99085 }, { "epoch": 0.4134572856773289, "grad_norm": 0.7097149605602607, "learning_rate": 3.1106510128887026e-06, "loss": 0.0219, "step": 99090 }, { "epoch": 0.41347814839231917, "grad_norm": 1.0966486422167068, "learning_rate": 3.110572534619268e-06, "loss": 0.0439, "step": 99095 }, { "epoch": 0.41349901110730947, "grad_norm": 0.4436681186787822, "learning_rate": 3.110494062289293e-06, "loss": 0.025, "step": 99100 }, { "epoch": 0.41351987382229977, "grad_norm": 0.6309250103835898, "learning_rate": 3.110415595898028e-06, "loss": 0.0331, "step": 99105 }, { "epoch": 0.41354073653729, "grad_norm": 0.7258044761170909, "learning_rate": 3.110337135444723e-06, "loss": 0.0253, "step": 99110 }, { "epoch": 0.4135615992522803, "grad_norm": 1.3554311419951623, "learning_rate": 3.110258680928631e-06, "loss": 0.0239, "step": 99115 }, { "epoch": 0.41358246196727055, "grad_norm": 2.1011136415910583, "learning_rate": 3.1101802323490015e-06, "loss": 0.0285, "step": 99120 }, { "epoch": 0.41360332468226085, "grad_norm": 0.322829336398082, "learning_rate": 3.1101017897050873e-06, "loss": 0.0248, "step": 99125 }, { "epoch": 0.41362418739725115, "grad_norm": 0.9641833276501816, "learning_rate": 3.110023352996138e-06, "loss": 0.0222, "step": 99130 }, { "epoch": 0.4136450501122414, "grad_norm": 0.9325973734068298, "learning_rate": 3.1099449222214066e-06, "loss": 0.0211, "step": 99135 }, { "epoch": 0.4136659128272317, "grad_norm": 0.536208309193331, "learning_rate": 3.109866497380145e-06, "loss": 0.0165, "step": 99140 }, { "epoch": 0.41368677554222194, "grad_norm": 0.7564477560474602, "learning_rate": 3.109788078471605e-06, "loss": 0.0177, "step": 99145 }, { "epoch": 0.41370763825721224, "grad_norm": 0.36099239754573575, "learning_rate": 3.1097096654950377e-06, "loss": 0.018, "step": 99150 }, { "epoch": 0.41372850097220254, "grad_norm": 0.4491545648526028, "learning_rate": 3.109631258449696e-06, "loss": 0.029, "step": 99155 }, { "epoch": 0.4137493636871928, "grad_norm": 0.9931913299723333, "learning_rate": 3.1095528573348332e-06, "loss": 0.033, "step": 99160 }, { "epoch": 0.4137702264021831, "grad_norm": 0.7616229204288603, "learning_rate": 3.1094744621497003e-06, "loss": 0.0305, "step": 99165 }, { "epoch": 0.4137910891171733, "grad_norm": 1.1523164636976544, "learning_rate": 3.109396072893549e-06, "loss": 0.0307, "step": 99170 }, { "epoch": 0.4138119518321636, "grad_norm": 0.5658047728989094, "learning_rate": 3.1093176895656348e-06, "loss": 0.0255, "step": 99175 }, { "epoch": 0.4138328145471539, "grad_norm": 0.5750536128355129, "learning_rate": 3.109239312165208e-06, "loss": 0.0306, "step": 99180 }, { "epoch": 0.41385367726214417, "grad_norm": 0.9231104892496331, "learning_rate": 3.1091609406915224e-06, "loss": 0.0381, "step": 99185 }, { "epoch": 0.41387453997713447, "grad_norm": 0.7421454708439413, "learning_rate": 3.1090825751438326e-06, "loss": 0.0257, "step": 99190 }, { "epoch": 0.41389540269212477, "grad_norm": 0.566123377740162, "learning_rate": 3.109004215521389e-06, "loss": 0.0293, "step": 99195 }, { "epoch": 0.413916265407115, "grad_norm": 0.5876608666597094, "learning_rate": 3.108925861823447e-06, "loss": 0.0279, "step": 99200 }, { "epoch": 0.4139371281221053, "grad_norm": 1.4137231833720016, "learning_rate": 3.108847514049259e-06, "loss": 0.0316, "step": 99205 }, { "epoch": 0.41395799083709556, "grad_norm": 0.4958404224251035, "learning_rate": 3.1087691721980797e-06, "loss": 0.0186, "step": 99210 }, { "epoch": 0.41397885355208586, "grad_norm": 0.6893120416453166, "learning_rate": 3.108690836269162e-06, "loss": 0.0278, "step": 99215 }, { "epoch": 0.41399971626707616, "grad_norm": 0.6283308422246149, "learning_rate": 3.10861250626176e-06, "loss": 0.0266, "step": 99220 }, { "epoch": 0.4140205789820664, "grad_norm": 0.9310199818233618, "learning_rate": 3.108534182175128e-06, "loss": 0.0225, "step": 99225 }, { "epoch": 0.4140414416970567, "grad_norm": 0.8173284787756493, "learning_rate": 3.1084558640085194e-06, "loss": 0.0261, "step": 99230 }, { "epoch": 0.41406230441204694, "grad_norm": 0.92840265096542, "learning_rate": 3.1083775517611893e-06, "loss": 0.0247, "step": 99235 }, { "epoch": 0.41408316712703724, "grad_norm": 0.5240055094943262, "learning_rate": 3.1082992454323918e-06, "loss": 0.0187, "step": 99240 }, { "epoch": 0.41410402984202754, "grad_norm": 0.7281949871224161, "learning_rate": 3.1082209450213814e-06, "loss": 0.0256, "step": 99245 }, { "epoch": 0.4141248925570178, "grad_norm": 0.9566363284703393, "learning_rate": 3.1081426505274125e-06, "loss": 0.0255, "step": 99250 }, { "epoch": 0.4141457552720081, "grad_norm": 1.0483745772977067, "learning_rate": 3.10806436194974e-06, "loss": 0.0222, "step": 99255 }, { "epoch": 0.41416661798699833, "grad_norm": 1.5303984236887014, "learning_rate": 3.10798607928762e-06, "loss": 0.0201, "step": 99260 }, { "epoch": 0.41418748070198863, "grad_norm": 0.4519703492589672, "learning_rate": 3.1079078025403054e-06, "loss": 0.0257, "step": 99265 }, { "epoch": 0.41420834341697893, "grad_norm": 0.6509498718332153, "learning_rate": 3.1078295317070533e-06, "loss": 0.036, "step": 99270 }, { "epoch": 0.4142292061319692, "grad_norm": 0.6420674744165439, "learning_rate": 3.107751266787118e-06, "loss": 0.022, "step": 99275 }, { "epoch": 0.4142500688469595, "grad_norm": 0.5401716482996762, "learning_rate": 3.1076730077797556e-06, "loss": 0.0257, "step": 99280 }, { "epoch": 0.4142709315619498, "grad_norm": 0.585138323068095, "learning_rate": 3.1075947546842204e-06, "loss": 0.024, "step": 99285 }, { "epoch": 0.41429179427694, "grad_norm": 0.6029266466432257, "learning_rate": 3.10751650749977e-06, "loss": 0.02, "step": 99290 }, { "epoch": 0.4143126569919303, "grad_norm": 0.6559208080931322, "learning_rate": 3.107438266225659e-06, "loss": 0.0286, "step": 99295 }, { "epoch": 0.41433351970692056, "grad_norm": 0.5489762180727442, "learning_rate": 3.107360030861144e-06, "loss": 0.0242, "step": 99300 }, { "epoch": 0.41435438242191086, "grad_norm": 0.3161066646725471, "learning_rate": 3.10728180140548e-06, "loss": 0.0285, "step": 99305 }, { "epoch": 0.41437524513690116, "grad_norm": 0.8426543152077577, "learning_rate": 3.107203577857925e-06, "loss": 0.0313, "step": 99310 }, { "epoch": 0.4143961078518914, "grad_norm": 3.137374161786206, "learning_rate": 3.1071253602177336e-06, "loss": 0.0278, "step": 99315 }, { "epoch": 0.4144169705668817, "grad_norm": 0.544495711563213, "learning_rate": 3.107047148484164e-06, "loss": 0.0234, "step": 99320 }, { "epoch": 0.41443783328187195, "grad_norm": 0.46455252423101423, "learning_rate": 3.1069689426564716e-06, "loss": 0.0222, "step": 99325 }, { "epoch": 0.41445869599686225, "grad_norm": 1.9103880261242459, "learning_rate": 3.1068907427339133e-06, "loss": 0.0299, "step": 99330 }, { "epoch": 0.41447955871185255, "grad_norm": 0.570688862815313, "learning_rate": 3.1068125487157465e-06, "loss": 0.0256, "step": 99335 }, { "epoch": 0.4145004214268428, "grad_norm": 1.1364194283254552, "learning_rate": 3.1067343606012278e-06, "loss": 0.0335, "step": 99340 }, { "epoch": 0.4145212841418331, "grad_norm": 0.7687553246061534, "learning_rate": 3.1066561783896147e-06, "loss": 0.0254, "step": 99345 }, { "epoch": 0.41454214685682333, "grad_norm": 0.6803559458175397, "learning_rate": 3.1065780020801645e-06, "loss": 0.0329, "step": 99350 }, { "epoch": 0.41456300957181363, "grad_norm": 1.8485604942343723, "learning_rate": 3.106499831672134e-06, "loss": 0.0234, "step": 99355 }, { "epoch": 0.41458387228680393, "grad_norm": 1.0156613319781773, "learning_rate": 3.1064216671647813e-06, "loss": 0.0496, "step": 99360 }, { "epoch": 0.4146047350017942, "grad_norm": 0.8708510458573326, "learning_rate": 3.1063435085573647e-06, "loss": 0.0321, "step": 99365 }, { "epoch": 0.4146255977167845, "grad_norm": 0.7295582082757084, "learning_rate": 3.106265355849141e-06, "loss": 0.0294, "step": 99370 }, { "epoch": 0.4146464604317748, "grad_norm": 0.47463495487519125, "learning_rate": 3.1061872090393676e-06, "loss": 0.0197, "step": 99375 }, { "epoch": 0.414667323146765, "grad_norm": 0.7965595432837272, "learning_rate": 3.1061090681273036e-06, "loss": 0.0228, "step": 99380 }, { "epoch": 0.4146881858617553, "grad_norm": 0.6917316395123497, "learning_rate": 3.1060309331122076e-06, "loss": 0.0297, "step": 99385 }, { "epoch": 0.41470904857674556, "grad_norm": 0.7543602529265333, "learning_rate": 3.105952803993337e-06, "loss": 0.0227, "step": 99390 }, { "epoch": 0.41472991129173586, "grad_norm": 0.9045620768944497, "learning_rate": 3.1058746807699515e-06, "loss": 0.0259, "step": 99395 }, { "epoch": 0.41475077400672616, "grad_norm": 0.6250541803852135, "learning_rate": 3.1057965634413083e-06, "loss": 0.033, "step": 99400 }, { "epoch": 0.4147716367217164, "grad_norm": 1.3189132030371908, "learning_rate": 3.1057184520066664e-06, "loss": 0.0287, "step": 99405 }, { "epoch": 0.4147924994367067, "grad_norm": 1.2548889471518372, "learning_rate": 3.105640346465285e-06, "loss": 0.0279, "step": 99410 }, { "epoch": 0.41481336215169695, "grad_norm": 0.47748382045618665, "learning_rate": 3.1055622468164233e-06, "loss": 0.0318, "step": 99415 }, { "epoch": 0.41483422486668725, "grad_norm": 0.6776748862971922, "learning_rate": 3.10548415305934e-06, "loss": 0.0277, "step": 99420 }, { "epoch": 0.41485508758167755, "grad_norm": 0.8032618006307559, "learning_rate": 3.1054060651932944e-06, "loss": 0.0273, "step": 99425 }, { "epoch": 0.4148759502966678, "grad_norm": 0.5046722064543533, "learning_rate": 3.1053279832175457e-06, "loss": 0.0261, "step": 99430 }, { "epoch": 0.4148968130116581, "grad_norm": 0.5541584470769108, "learning_rate": 3.1052499071313547e-06, "loss": 0.0243, "step": 99435 }, { "epoch": 0.41491767572664834, "grad_norm": 0.7689039454701209, "learning_rate": 3.105171836933979e-06, "loss": 0.0214, "step": 99440 }, { "epoch": 0.41493853844163864, "grad_norm": 0.6282690296376645, "learning_rate": 3.10509377262468e-06, "loss": 0.0257, "step": 99445 }, { "epoch": 0.41495940115662894, "grad_norm": 0.7496345396616363, "learning_rate": 3.105015714202717e-06, "loss": 0.0224, "step": 99450 }, { "epoch": 0.4149802638716192, "grad_norm": 1.032761552116762, "learning_rate": 3.10493766166735e-06, "loss": 0.0268, "step": 99455 }, { "epoch": 0.4150011265866095, "grad_norm": 0.9165755012707614, "learning_rate": 3.1048596150178384e-06, "loss": 0.031, "step": 99460 }, { "epoch": 0.4150219893015997, "grad_norm": 0.9286913450428731, "learning_rate": 3.1047815742534447e-06, "loss": 0.0279, "step": 99465 }, { "epoch": 0.41504285201659, "grad_norm": 0.5211761310986284, "learning_rate": 3.1047035393734267e-06, "loss": 0.0259, "step": 99470 }, { "epoch": 0.4150637147315803, "grad_norm": 0.509658785531594, "learning_rate": 3.1046255103770473e-06, "loss": 0.0203, "step": 99475 }, { "epoch": 0.41508457744657057, "grad_norm": 0.8059258231995114, "learning_rate": 3.104547487263565e-06, "loss": 0.0275, "step": 99480 }, { "epoch": 0.41510544016156087, "grad_norm": 7.751528903189392, "learning_rate": 3.104469470032242e-06, "loss": 0.1049, "step": 99485 }, { "epoch": 0.41512630287655117, "grad_norm": 0.7478855955348687, "learning_rate": 3.1043914586823397e-06, "loss": 0.0242, "step": 99490 }, { "epoch": 0.4151471655915414, "grad_norm": 0.6327073189984004, "learning_rate": 3.104313453213117e-06, "loss": 0.0173, "step": 99495 }, { "epoch": 0.4151680283065317, "grad_norm": 0.502178801701968, "learning_rate": 3.1042354536238377e-06, "loss": 0.0221, "step": 99500 }, { "epoch": 0.41518889102152196, "grad_norm": 1.2589254164371524, "learning_rate": 3.1041574599137616e-06, "loss": 0.0323, "step": 99505 }, { "epoch": 0.41520975373651225, "grad_norm": 1.1280158288682942, "learning_rate": 3.1040794720821505e-06, "loss": 0.0252, "step": 99510 }, { "epoch": 0.41523061645150255, "grad_norm": 0.3737614187808525, "learning_rate": 3.104001490128265e-06, "loss": 0.0198, "step": 99515 }, { "epoch": 0.4152514791664928, "grad_norm": 0.4800138648324602, "learning_rate": 3.1039235140513696e-06, "loss": 0.0225, "step": 99520 }, { "epoch": 0.4152723418814831, "grad_norm": 0.5556228077909936, "learning_rate": 3.103845543850724e-06, "loss": 0.0243, "step": 99525 }, { "epoch": 0.41529320459647334, "grad_norm": 0.7274797773173377, "learning_rate": 3.1037675795255895e-06, "loss": 0.0251, "step": 99530 }, { "epoch": 0.41531406731146364, "grad_norm": 1.3563749844749389, "learning_rate": 3.103689621075229e-06, "loss": 0.0264, "step": 99535 }, { "epoch": 0.41533493002645394, "grad_norm": 1.0843751459116968, "learning_rate": 3.103611668498906e-06, "loss": 0.0222, "step": 99540 }, { "epoch": 0.4153557927414442, "grad_norm": 0.4768709633245835, "learning_rate": 3.103533721795882e-06, "loss": 0.0197, "step": 99545 }, { "epoch": 0.4153766554564345, "grad_norm": 0.5144697841768732, "learning_rate": 3.103455780965419e-06, "loss": 0.0283, "step": 99550 }, { "epoch": 0.41539751817142473, "grad_norm": 0.43155339621854816, "learning_rate": 3.1033778460067798e-06, "loss": 0.0189, "step": 99555 }, { "epoch": 0.41541838088641503, "grad_norm": 1.4878952999885613, "learning_rate": 3.1032999169192287e-06, "loss": 0.0259, "step": 99560 }, { "epoch": 0.41543924360140533, "grad_norm": 0.778096380168416, "learning_rate": 3.103221993702026e-06, "loss": 0.0239, "step": 99565 }, { "epoch": 0.41546010631639557, "grad_norm": 0.4559186074138287, "learning_rate": 3.103144076354436e-06, "loss": 0.0243, "step": 99570 }, { "epoch": 0.41548096903138587, "grad_norm": 0.8278188427911153, "learning_rate": 3.1030661648757224e-06, "loss": 0.0408, "step": 99575 }, { "epoch": 0.41550183174637617, "grad_norm": 0.931104577483399, "learning_rate": 3.102988259265147e-06, "loss": 0.0244, "step": 99580 }, { "epoch": 0.4155226944613664, "grad_norm": 0.5936376909731882, "learning_rate": 3.102910359521975e-06, "loss": 0.0221, "step": 99585 }, { "epoch": 0.4155435571763567, "grad_norm": 0.6113058389485089, "learning_rate": 3.102832465645469e-06, "loss": 0.0269, "step": 99590 }, { "epoch": 0.41556441989134696, "grad_norm": 0.356353459319108, "learning_rate": 3.1027545776348923e-06, "loss": 0.0217, "step": 99595 }, { "epoch": 0.41558528260633726, "grad_norm": 1.1861336783045633, "learning_rate": 3.1026766954895095e-06, "loss": 0.0359, "step": 99600 }, { "epoch": 0.41560614532132756, "grad_norm": 0.4934977134286224, "learning_rate": 3.102598819208584e-06, "loss": 0.0185, "step": 99605 }, { "epoch": 0.4156270080363178, "grad_norm": 0.8515572050580611, "learning_rate": 3.1025209487913805e-06, "loss": 0.0277, "step": 99610 }, { "epoch": 0.4156478707513081, "grad_norm": 0.7985685602661057, "learning_rate": 3.102443084237162e-06, "loss": 0.0306, "step": 99615 }, { "epoch": 0.41566873346629835, "grad_norm": 0.8336684472475672, "learning_rate": 3.1023652255451937e-06, "loss": 0.0325, "step": 99620 }, { "epoch": 0.41568959618128865, "grad_norm": 0.890204855885889, "learning_rate": 3.1022873727147402e-06, "loss": 0.029, "step": 99625 }, { "epoch": 0.41571045889627894, "grad_norm": 0.46169187606500983, "learning_rate": 3.1022095257450653e-06, "loss": 0.0221, "step": 99630 }, { "epoch": 0.4157313216112692, "grad_norm": 0.8449387295243861, "learning_rate": 3.102131684635435e-06, "loss": 0.0258, "step": 99635 }, { "epoch": 0.4157521843262595, "grad_norm": 1.26646808658956, "learning_rate": 3.102053849385112e-06, "loss": 0.0276, "step": 99640 }, { "epoch": 0.41577304704124973, "grad_norm": 0.4894859611361825, "learning_rate": 3.1019760199933634e-06, "loss": 0.025, "step": 99645 }, { "epoch": 0.41579390975624003, "grad_norm": 0.7537402438268553, "learning_rate": 3.101898196459453e-06, "loss": 0.0282, "step": 99650 }, { "epoch": 0.41581477247123033, "grad_norm": 0.8744217266828034, "learning_rate": 3.1018203787826467e-06, "loss": 0.0231, "step": 99655 }, { "epoch": 0.4158356351862206, "grad_norm": 0.322382422062711, "learning_rate": 3.101742566962209e-06, "loss": 0.0234, "step": 99660 }, { "epoch": 0.4158564979012109, "grad_norm": 0.8638533509239711, "learning_rate": 3.1016647609974067e-06, "loss": 0.0245, "step": 99665 }, { "epoch": 0.4158773606162012, "grad_norm": 1.0770191583983535, "learning_rate": 3.101586960887505e-06, "loss": 0.0345, "step": 99670 }, { "epoch": 0.4158982233311914, "grad_norm": 0.631624934218276, "learning_rate": 3.1015091666317683e-06, "loss": 0.0216, "step": 99675 }, { "epoch": 0.4159190860461817, "grad_norm": 1.1376410583014072, "learning_rate": 3.1014313782294644e-06, "loss": 0.0223, "step": 99680 }, { "epoch": 0.41593994876117196, "grad_norm": 0.761775736631141, "learning_rate": 3.1013535956798574e-06, "loss": 0.0339, "step": 99685 }, { "epoch": 0.41596081147616226, "grad_norm": 1.6425492997128543, "learning_rate": 3.101275818982215e-06, "loss": 0.0549, "step": 99690 }, { "epoch": 0.41598167419115256, "grad_norm": 1.2052511285970957, "learning_rate": 3.101198048135803e-06, "loss": 0.0253, "step": 99695 }, { "epoch": 0.4160025369061428, "grad_norm": 0.7565660884231225, "learning_rate": 3.101120283139887e-06, "loss": 0.0252, "step": 99700 }, { "epoch": 0.4160233996211331, "grad_norm": 0.9965132674422815, "learning_rate": 3.101042523993734e-06, "loss": 0.0271, "step": 99705 }, { "epoch": 0.41604426233612335, "grad_norm": 0.7617354839883205, "learning_rate": 3.1009647706966113e-06, "loss": 0.0269, "step": 99710 }, { "epoch": 0.41606512505111365, "grad_norm": 0.804987283789272, "learning_rate": 3.100887023247785e-06, "loss": 0.0257, "step": 99715 }, { "epoch": 0.41608598776610395, "grad_norm": 0.556366480177788, "learning_rate": 3.1008092816465217e-06, "loss": 0.0211, "step": 99720 }, { "epoch": 0.4161068504810942, "grad_norm": 0.8567452334007307, "learning_rate": 3.1007315458920894e-06, "loss": 0.0236, "step": 99725 }, { "epoch": 0.4161277131960845, "grad_norm": 0.7885589338555955, "learning_rate": 3.1006538159837547e-06, "loss": 0.036, "step": 99730 }, { "epoch": 0.41614857591107474, "grad_norm": 1.267965527310779, "learning_rate": 3.1005760919207843e-06, "loss": 0.0262, "step": 99735 }, { "epoch": 0.41616943862606504, "grad_norm": 0.9089403502414453, "learning_rate": 3.1004983737024467e-06, "loss": 0.0233, "step": 99740 }, { "epoch": 0.41619030134105534, "grad_norm": 1.0107222658499682, "learning_rate": 3.1004206613280087e-06, "loss": 0.0328, "step": 99745 }, { "epoch": 0.4162111640560456, "grad_norm": 1.1437035273309264, "learning_rate": 3.1003429547967386e-06, "loss": 0.0271, "step": 99750 }, { "epoch": 0.4162320267710359, "grad_norm": 0.5502844497851012, "learning_rate": 3.1002652541079033e-06, "loss": 0.0262, "step": 99755 }, { "epoch": 0.4162528894860262, "grad_norm": 0.5830183672314819, "learning_rate": 3.1001875592607704e-06, "loss": 0.0208, "step": 99760 }, { "epoch": 0.4162737522010164, "grad_norm": 0.9320433396837536, "learning_rate": 3.1001098702546103e-06, "loss": 0.0289, "step": 99765 }, { "epoch": 0.4162946149160067, "grad_norm": 0.6033657263526233, "learning_rate": 3.1000321870886885e-06, "loss": 0.0353, "step": 99770 }, { "epoch": 0.41631547763099697, "grad_norm": 0.878032829342196, "learning_rate": 3.099954509762275e-06, "loss": 0.0299, "step": 99775 }, { "epoch": 0.41633634034598727, "grad_norm": 1.0155249876165988, "learning_rate": 3.0998768382746375e-06, "loss": 0.022, "step": 99780 }, { "epoch": 0.41635720306097757, "grad_norm": 0.6251989777594993, "learning_rate": 3.099799172625045e-06, "loss": 0.0235, "step": 99785 }, { "epoch": 0.4163780657759678, "grad_norm": 0.7088153390524968, "learning_rate": 3.0997215128127656e-06, "loss": 0.0257, "step": 99790 }, { "epoch": 0.4163989284909581, "grad_norm": 0.9789709055026244, "learning_rate": 3.099643858837068e-06, "loss": 0.0247, "step": 99795 }, { "epoch": 0.41641979120594835, "grad_norm": 0.8471437422223317, "learning_rate": 3.0995662106972224e-06, "loss": 0.0263, "step": 99800 }, { "epoch": 0.41644065392093865, "grad_norm": 0.7036036693401226, "learning_rate": 3.099488568392497e-06, "loss": 0.0263, "step": 99805 }, { "epoch": 0.41646151663592895, "grad_norm": 0.8183315334925558, "learning_rate": 3.0994109319221604e-06, "loss": 0.0227, "step": 99810 }, { "epoch": 0.4164823793509192, "grad_norm": 0.472060187134005, "learning_rate": 3.0993333012854837e-06, "loss": 0.0296, "step": 99815 }, { "epoch": 0.4165032420659095, "grad_norm": 1.9062649345538882, "learning_rate": 3.0992556764817337e-06, "loss": 0.0218, "step": 99820 }, { "epoch": 0.41652410478089974, "grad_norm": 1.6120909444693357, "learning_rate": 3.0991780575101826e-06, "loss": 0.0288, "step": 99825 }, { "epoch": 0.41654496749589004, "grad_norm": 0.6544091094327289, "learning_rate": 3.099100444370099e-06, "loss": 0.0293, "step": 99830 }, { "epoch": 0.41656583021088034, "grad_norm": 0.8109766665590518, "learning_rate": 3.0990228370607526e-06, "loss": 0.0328, "step": 99835 }, { "epoch": 0.4165866929258706, "grad_norm": 0.9250902200973282, "learning_rate": 3.0989452355814144e-06, "loss": 0.0228, "step": 99840 }, { "epoch": 0.4166075556408609, "grad_norm": 0.33162901178839216, "learning_rate": 3.0988676399313527e-06, "loss": 0.0312, "step": 99845 }, { "epoch": 0.4166284183558512, "grad_norm": 0.8145056973870158, "learning_rate": 3.098790050109839e-06, "loss": 0.0309, "step": 99850 }, { "epoch": 0.4166492810708414, "grad_norm": 0.9408256459681623, "learning_rate": 3.0987124661161432e-06, "loss": 0.0315, "step": 99855 }, { "epoch": 0.4166701437858317, "grad_norm": 0.5575597094898509, "learning_rate": 3.0986348879495365e-06, "loss": 0.0267, "step": 99860 }, { "epoch": 0.41669100650082197, "grad_norm": 0.5730517957651846, "learning_rate": 3.0985573156092887e-06, "loss": 0.0262, "step": 99865 }, { "epoch": 0.41671186921581227, "grad_norm": 0.5558388130418096, "learning_rate": 3.0984797490946705e-06, "loss": 0.0273, "step": 99870 }, { "epoch": 0.41673273193080257, "grad_norm": 0.8705780269624498, "learning_rate": 3.098402188404953e-06, "loss": 0.0296, "step": 99875 }, { "epoch": 0.4167535946457928, "grad_norm": 0.9542603245266473, "learning_rate": 3.098324633539408e-06, "loss": 0.03, "step": 99880 }, { "epoch": 0.4167744573607831, "grad_norm": 1.3419748664262947, "learning_rate": 3.098247084497305e-06, "loss": 0.0333, "step": 99885 }, { "epoch": 0.41679532007577336, "grad_norm": 0.9365769457605253, "learning_rate": 3.0981695412779168e-06, "loss": 0.0304, "step": 99890 }, { "epoch": 0.41681618279076366, "grad_norm": 1.9076074755384804, "learning_rate": 3.098092003880514e-06, "loss": 0.0329, "step": 99895 }, { "epoch": 0.41683704550575396, "grad_norm": 1.3881460673314692, "learning_rate": 3.098014472304369e-06, "loss": 0.0326, "step": 99900 }, { "epoch": 0.4168579082207442, "grad_norm": 0.7468186814055664, "learning_rate": 3.097936946548751e-06, "loss": 0.0244, "step": 99905 }, { "epoch": 0.4168787709357345, "grad_norm": 1.101690048841114, "learning_rate": 3.0978594266129346e-06, "loss": 0.0318, "step": 99910 }, { "epoch": 0.41689963365072474, "grad_norm": 0.6723384916321331, "learning_rate": 3.0977819124961905e-06, "loss": 0.02, "step": 99915 }, { "epoch": 0.41692049636571504, "grad_norm": 1.5612753592274942, "learning_rate": 3.0977044041977905e-06, "loss": 0.0209, "step": 99920 }, { "epoch": 0.41694135908070534, "grad_norm": 0.7553615376128637, "learning_rate": 3.097626901717007e-06, "loss": 0.0285, "step": 99925 }, { "epoch": 0.4169622217956956, "grad_norm": 0.9532041903047189, "learning_rate": 3.0975494050531123e-06, "loss": 0.027, "step": 99930 }, { "epoch": 0.4169830845106859, "grad_norm": 0.8046116353398538, "learning_rate": 3.0974719142053788e-06, "loss": 0.0251, "step": 99935 }, { "epoch": 0.4170039472256762, "grad_norm": 0.4447727228990262, "learning_rate": 3.097394429173079e-06, "loss": 0.0291, "step": 99940 }, { "epoch": 0.41702480994066643, "grad_norm": 0.618387138393367, "learning_rate": 3.0973169499554857e-06, "loss": 0.0178, "step": 99945 }, { "epoch": 0.41704567265565673, "grad_norm": 0.8029266841708618, "learning_rate": 3.097239476551871e-06, "loss": 0.0185, "step": 99950 }, { "epoch": 0.417066535370647, "grad_norm": 0.7447597601457536, "learning_rate": 3.0971620089615086e-06, "loss": 0.0211, "step": 99955 }, { "epoch": 0.4170873980856373, "grad_norm": 0.6405483637549305, "learning_rate": 3.0970845471836714e-06, "loss": 0.0307, "step": 99960 }, { "epoch": 0.4171082608006276, "grad_norm": 0.8258375457316101, "learning_rate": 3.097007091217632e-06, "loss": 0.0251, "step": 99965 }, { "epoch": 0.4171291235156178, "grad_norm": 0.9865260741308678, "learning_rate": 3.096929641062665e-06, "loss": 0.0293, "step": 99970 }, { "epoch": 0.4171499862306081, "grad_norm": 0.7219343812270875, "learning_rate": 3.0968521967180427e-06, "loss": 0.0187, "step": 99975 }, { "epoch": 0.41717084894559836, "grad_norm": 0.7966758996791728, "learning_rate": 3.096774758183038e-06, "loss": 0.021, "step": 99980 }, { "epoch": 0.41719171166058866, "grad_norm": 0.5452288203519179, "learning_rate": 3.0966973254569267e-06, "loss": 0.0226, "step": 99985 }, { "epoch": 0.41721257437557896, "grad_norm": 0.8139049432074823, "learning_rate": 3.0966198985389807e-06, "loss": 0.0217, "step": 99990 }, { "epoch": 0.4172334370905692, "grad_norm": 1.2398569487965765, "learning_rate": 3.0965424774284743e-06, "loss": 0.0487, "step": 99995 }, { "epoch": 0.4172542998055595, "grad_norm": 0.7030729061126053, "learning_rate": 3.0964650621246824e-06, "loss": 0.0195, "step": 100000 }, { "epoch": 0.41727516252054975, "grad_norm": 0.6215134807127946, "learning_rate": 3.0963876526268783e-06, "loss": 0.0226, "step": 100005 }, { "epoch": 0.41729602523554005, "grad_norm": 0.8925409357818077, "learning_rate": 3.0963102489343364e-06, "loss": 0.0261, "step": 100010 }, { "epoch": 0.41731688795053035, "grad_norm": 0.610513003317224, "learning_rate": 3.096232851046332e-06, "loss": 0.0222, "step": 100015 }, { "epoch": 0.4173377506655206, "grad_norm": 0.8804983565607404, "learning_rate": 3.0961554589621385e-06, "loss": 0.0373, "step": 100020 }, { "epoch": 0.4173586133805109, "grad_norm": 0.4500528870361514, "learning_rate": 3.0960780726810314e-06, "loss": 0.0194, "step": 100025 }, { "epoch": 0.4173794760955012, "grad_norm": 0.9524587303845441, "learning_rate": 3.096000692202285e-06, "loss": 0.0345, "step": 100030 }, { "epoch": 0.41740033881049143, "grad_norm": 0.7898733651213027, "learning_rate": 3.0959233175251745e-06, "loss": 0.0384, "step": 100035 }, { "epoch": 0.41742120152548173, "grad_norm": 1.3574973160426589, "learning_rate": 3.0958459486489758e-06, "loss": 0.0239, "step": 100040 }, { "epoch": 0.417442064240472, "grad_norm": 0.5573498892663435, "learning_rate": 3.0957685855729614e-06, "loss": 0.0215, "step": 100045 }, { "epoch": 0.4174629269554623, "grad_norm": 0.7760520845102902, "learning_rate": 3.0956912282964097e-06, "loss": 0.0284, "step": 100050 }, { "epoch": 0.4174837896704526, "grad_norm": 0.3001351833026591, "learning_rate": 3.095613876818595e-06, "loss": 0.0274, "step": 100055 }, { "epoch": 0.4175046523854428, "grad_norm": 0.5574841616800611, "learning_rate": 3.0955365311387926e-06, "loss": 0.0321, "step": 100060 }, { "epoch": 0.4175255151004331, "grad_norm": 0.5757261703930212, "learning_rate": 3.095459191256278e-06, "loss": 0.0266, "step": 100065 }, { "epoch": 0.41754637781542336, "grad_norm": 0.845863478267905, "learning_rate": 3.0953818571703275e-06, "loss": 0.0308, "step": 100070 }, { "epoch": 0.41756724053041366, "grad_norm": 0.6043919424648285, "learning_rate": 3.095304528880218e-06, "loss": 0.0278, "step": 100075 }, { "epoch": 0.41758810324540396, "grad_norm": 0.4798752531331858, "learning_rate": 3.0952272063852236e-06, "loss": 0.0277, "step": 100080 }, { "epoch": 0.4176089659603942, "grad_norm": 0.6367242691262308, "learning_rate": 3.0951498896846214e-06, "loss": 0.0269, "step": 100085 }, { "epoch": 0.4176298286753845, "grad_norm": 0.8122254661414776, "learning_rate": 3.095072578777688e-06, "loss": 0.0299, "step": 100090 }, { "epoch": 0.41765069139037475, "grad_norm": 0.5977965195403715, "learning_rate": 3.0949952736637e-06, "loss": 0.0213, "step": 100095 }, { "epoch": 0.41767155410536505, "grad_norm": 0.6333686867379944, "learning_rate": 3.094917974341933e-06, "loss": 0.0215, "step": 100100 }, { "epoch": 0.41769241682035535, "grad_norm": 0.9786707172562843, "learning_rate": 3.094840680811665e-06, "loss": 0.0312, "step": 100105 }, { "epoch": 0.4177132795353456, "grad_norm": 1.3408030377889997, "learning_rate": 3.094763393072172e-06, "loss": 0.0226, "step": 100110 }, { "epoch": 0.4177341422503359, "grad_norm": 0.7954001625043137, "learning_rate": 3.094686111122731e-06, "loss": 0.0226, "step": 100115 }, { "epoch": 0.4177550049653262, "grad_norm": 0.9354965104697511, "learning_rate": 3.0946088349626195e-06, "loss": 0.03, "step": 100120 }, { "epoch": 0.41777586768031644, "grad_norm": 0.6207504867735865, "learning_rate": 3.0945315645911146e-06, "loss": 0.029, "step": 100125 }, { "epoch": 0.41779673039530674, "grad_norm": 0.5719537717689811, "learning_rate": 3.0944543000074933e-06, "loss": 0.0222, "step": 100130 }, { "epoch": 0.417817593110297, "grad_norm": 0.8086076658690767, "learning_rate": 3.094377041211033e-06, "loss": 0.024, "step": 100135 }, { "epoch": 0.4178384558252873, "grad_norm": 1.1292043139813015, "learning_rate": 3.094299788201012e-06, "loss": 0.0253, "step": 100140 }, { "epoch": 0.4178593185402776, "grad_norm": 0.504879493093534, "learning_rate": 3.0942225409767077e-06, "loss": 0.0297, "step": 100145 }, { "epoch": 0.4178801812552678, "grad_norm": 0.615071287267211, "learning_rate": 3.0941452995373978e-06, "loss": 0.0231, "step": 100150 }, { "epoch": 0.4179010439702581, "grad_norm": 0.759472212792208, "learning_rate": 3.0940680638823606e-06, "loss": 0.0304, "step": 100155 }, { "epoch": 0.41792190668524837, "grad_norm": 0.6185836000639737, "learning_rate": 3.0939908340108737e-06, "loss": 0.0188, "step": 100160 }, { "epoch": 0.41794276940023867, "grad_norm": 0.45328120103774044, "learning_rate": 3.0939136099222154e-06, "loss": 0.0293, "step": 100165 }, { "epoch": 0.41796363211522897, "grad_norm": 0.6615063545787976, "learning_rate": 3.093836391615665e-06, "loss": 0.0257, "step": 100170 }, { "epoch": 0.4179844948302192, "grad_norm": 0.8044115280330217, "learning_rate": 3.0937591790904997e-06, "loss": 0.0188, "step": 100175 }, { "epoch": 0.4180053575452095, "grad_norm": 0.36880542402940164, "learning_rate": 3.093681972345999e-06, "loss": 0.027, "step": 100180 }, { "epoch": 0.41802622026019975, "grad_norm": 0.6217947622116772, "learning_rate": 3.0936047713814405e-06, "loss": 0.0317, "step": 100185 }, { "epoch": 0.41804708297519005, "grad_norm": 0.6899280769025634, "learning_rate": 3.093527576196105e-06, "loss": 0.0346, "step": 100190 }, { "epoch": 0.41806794569018035, "grad_norm": 0.6548232796663144, "learning_rate": 3.0934503867892695e-06, "loss": 0.0312, "step": 100195 }, { "epoch": 0.4180888084051706, "grad_norm": 1.4828779452941068, "learning_rate": 3.0933732031602135e-06, "loss": 0.0346, "step": 100200 }, { "epoch": 0.4181096711201609, "grad_norm": 1.0994882545281217, "learning_rate": 3.093296025308218e-06, "loss": 0.0328, "step": 100205 }, { "epoch": 0.4181305338351512, "grad_norm": 0.3921179209050959, "learning_rate": 3.09321885323256e-06, "loss": 0.0182, "step": 100210 }, { "epoch": 0.41815139655014144, "grad_norm": 0.8708276986016747, "learning_rate": 3.093141686932521e-06, "loss": 0.0231, "step": 100215 }, { "epoch": 0.41817225926513174, "grad_norm": 0.8281893038413366, "learning_rate": 3.0930645264073787e-06, "loss": 0.0248, "step": 100220 }, { "epoch": 0.418193121980122, "grad_norm": 1.0043275522174329, "learning_rate": 3.0929873716564144e-06, "loss": 0.0309, "step": 100225 }, { "epoch": 0.4182139846951123, "grad_norm": 1.1640931093507585, "learning_rate": 3.0929102226789076e-06, "loss": 0.0296, "step": 100230 }, { "epoch": 0.4182348474101026, "grad_norm": 1.238182633271305, "learning_rate": 3.0928330794741374e-06, "loss": 0.0302, "step": 100235 }, { "epoch": 0.41825571012509283, "grad_norm": 2.189258422701495, "learning_rate": 3.092755942041385e-06, "loss": 0.0292, "step": 100240 }, { "epoch": 0.4182765728400831, "grad_norm": 0.5099063252555356, "learning_rate": 3.0926788103799303e-06, "loss": 0.0217, "step": 100245 }, { "epoch": 0.41829743555507337, "grad_norm": 0.4928347335809307, "learning_rate": 3.092601684489053e-06, "loss": 0.023, "step": 100250 }, { "epoch": 0.41831829827006367, "grad_norm": 0.5488455550574751, "learning_rate": 3.092524564368035e-06, "loss": 0.0219, "step": 100255 }, { "epoch": 0.41833916098505397, "grad_norm": 0.6538253556809367, "learning_rate": 3.0924474500161557e-06, "loss": 0.0208, "step": 100260 }, { "epoch": 0.4183600237000442, "grad_norm": 1.1280098913470453, "learning_rate": 3.092370341432696e-06, "loss": 0.0287, "step": 100265 }, { "epoch": 0.4183808864150345, "grad_norm": 1.0505401603198583, "learning_rate": 3.092293238616938e-06, "loss": 0.0259, "step": 100270 }, { "epoch": 0.41840174913002476, "grad_norm": 0.6478065759298731, "learning_rate": 3.0922161415681607e-06, "loss": 0.0189, "step": 100275 }, { "epoch": 0.41842261184501506, "grad_norm": 0.5631502275628318, "learning_rate": 3.092139050285647e-06, "loss": 0.0264, "step": 100280 }, { "epoch": 0.41844347456000536, "grad_norm": 1.4235397503704248, "learning_rate": 3.092061964768676e-06, "loss": 0.0341, "step": 100285 }, { "epoch": 0.4184643372749956, "grad_norm": 0.8816802573394822, "learning_rate": 3.0919848850165323e-06, "loss": 0.025, "step": 100290 }, { "epoch": 0.4184851999899859, "grad_norm": 0.6122351419421079, "learning_rate": 3.091907811028495e-06, "loss": 0.0289, "step": 100295 }, { "epoch": 0.4185060627049762, "grad_norm": 0.31181962314083045, "learning_rate": 3.091830742803846e-06, "loss": 0.0247, "step": 100300 }, { "epoch": 0.41852692541996644, "grad_norm": 0.8281609884825748, "learning_rate": 3.0917536803418675e-06, "loss": 0.0299, "step": 100305 }, { "epoch": 0.41854778813495674, "grad_norm": 0.6470562676044407, "learning_rate": 3.091676623641841e-06, "loss": 0.0275, "step": 100310 }, { "epoch": 0.418568650849947, "grad_norm": 0.934273921528527, "learning_rate": 3.091599572703049e-06, "loss": 0.032, "step": 100315 }, { "epoch": 0.4185895135649373, "grad_norm": 1.0828292260718466, "learning_rate": 3.091522527524774e-06, "loss": 0.0224, "step": 100320 }, { "epoch": 0.4186103762799276, "grad_norm": 0.36236724210574844, "learning_rate": 3.0914454881062967e-06, "loss": 0.0274, "step": 100325 }, { "epoch": 0.41863123899491783, "grad_norm": 0.6194266156542199, "learning_rate": 3.091368454446901e-06, "loss": 0.0194, "step": 100330 }, { "epoch": 0.41865210170990813, "grad_norm": 3.5703085982359695, "learning_rate": 3.0912914265458682e-06, "loss": 0.0247, "step": 100335 }, { "epoch": 0.4186729644248984, "grad_norm": 0.43891524068880705, "learning_rate": 3.0912144044024824e-06, "loss": 0.0248, "step": 100340 }, { "epoch": 0.4186938271398887, "grad_norm": 0.7030632311842224, "learning_rate": 3.0911373880160246e-06, "loss": 0.0238, "step": 100345 }, { "epoch": 0.418714689854879, "grad_norm": 1.4122625135780063, "learning_rate": 3.0910603773857783e-06, "loss": 0.0343, "step": 100350 }, { "epoch": 0.4187355525698692, "grad_norm": 0.7906375766320763, "learning_rate": 3.0909833725110276e-06, "loss": 0.0224, "step": 100355 }, { "epoch": 0.4187564152848595, "grad_norm": 0.855379621220554, "learning_rate": 3.090906373391055e-06, "loss": 0.0236, "step": 100360 }, { "epoch": 0.41877727799984976, "grad_norm": 0.5867357324890444, "learning_rate": 3.0908293800251433e-06, "loss": 0.0268, "step": 100365 }, { "epoch": 0.41879814071484006, "grad_norm": 0.8708601043799405, "learning_rate": 3.0907523924125756e-06, "loss": 0.0251, "step": 100370 }, { "epoch": 0.41881900342983036, "grad_norm": 0.7179060367905656, "learning_rate": 3.090675410552636e-06, "loss": 0.0248, "step": 100375 }, { "epoch": 0.4188398661448206, "grad_norm": 0.9836569090729805, "learning_rate": 3.090598434444608e-06, "loss": 0.0286, "step": 100380 }, { "epoch": 0.4188607288598109, "grad_norm": 3.352378501209555, "learning_rate": 3.0905214640877757e-06, "loss": 0.0222, "step": 100385 }, { "epoch": 0.4188815915748012, "grad_norm": 0.286938775981085, "learning_rate": 3.0904444994814233e-06, "loss": 0.0266, "step": 100390 }, { "epoch": 0.41890245428979145, "grad_norm": 0.9527908102660793, "learning_rate": 3.0903675406248334e-06, "loss": 0.0288, "step": 100395 }, { "epoch": 0.41892331700478175, "grad_norm": 1.2929686421311146, "learning_rate": 3.090290587517291e-06, "loss": 0.0332, "step": 100400 }, { "epoch": 0.418944179719772, "grad_norm": 1.3307601323656908, "learning_rate": 3.09021364015808e-06, "loss": 0.0357, "step": 100405 }, { "epoch": 0.4189650424347623, "grad_norm": 0.6938133095435038, "learning_rate": 3.0901366985464855e-06, "loss": 0.0259, "step": 100410 }, { "epoch": 0.4189859051497526, "grad_norm": 1.089906025683557, "learning_rate": 3.090059762681792e-06, "loss": 0.0367, "step": 100415 }, { "epoch": 0.41900676786474284, "grad_norm": 0.5885117242842755, "learning_rate": 3.0899828325632825e-06, "loss": 0.028, "step": 100420 }, { "epoch": 0.41902763057973313, "grad_norm": 0.4166669145468355, "learning_rate": 3.0899059081902437e-06, "loss": 0.0292, "step": 100425 }, { "epoch": 0.4190484932947234, "grad_norm": 0.5218549228594899, "learning_rate": 3.0898289895619586e-06, "loss": 0.0394, "step": 100430 }, { "epoch": 0.4190693560097137, "grad_norm": 0.7899402122315855, "learning_rate": 3.0897520766777143e-06, "loss": 0.0293, "step": 100435 }, { "epoch": 0.419090218724704, "grad_norm": 1.2691286577747243, "learning_rate": 3.0896751695367956e-06, "loss": 0.0307, "step": 100440 }, { "epoch": 0.4191110814396942, "grad_norm": 0.72372355733088, "learning_rate": 3.0895982681384858e-06, "loss": 0.0226, "step": 100445 }, { "epoch": 0.4191319441546845, "grad_norm": 0.7510966457720086, "learning_rate": 3.089521372482072e-06, "loss": 0.0274, "step": 100450 }, { "epoch": 0.41915280686967477, "grad_norm": 0.7729898201621694, "learning_rate": 3.089444482566839e-06, "loss": 0.0254, "step": 100455 }, { "epoch": 0.41917366958466507, "grad_norm": 0.6447430273004273, "learning_rate": 3.0893675983920733e-06, "loss": 0.033, "step": 100460 }, { "epoch": 0.41919453229965536, "grad_norm": 0.5503509806109019, "learning_rate": 3.0892907199570593e-06, "loss": 0.018, "step": 100465 }, { "epoch": 0.4192153950146456, "grad_norm": 0.6393154726045447, "learning_rate": 3.089213847261084e-06, "loss": 0.025, "step": 100470 }, { "epoch": 0.4192362577296359, "grad_norm": 0.6642066291060018, "learning_rate": 3.0891369803034327e-06, "loss": 0.0294, "step": 100475 }, { "epoch": 0.4192571204446262, "grad_norm": 2.3341827557078783, "learning_rate": 3.0890601190833917e-06, "loss": 0.0307, "step": 100480 }, { "epoch": 0.41927798315961645, "grad_norm": 0.5759958329929835, "learning_rate": 3.088983263600248e-06, "loss": 0.0287, "step": 100485 }, { "epoch": 0.41929884587460675, "grad_norm": 1.0861674557307652, "learning_rate": 3.0889064138532866e-06, "loss": 0.0268, "step": 100490 }, { "epoch": 0.419319708589597, "grad_norm": 0.8328490808912302, "learning_rate": 3.088829569841795e-06, "loss": 0.0306, "step": 100495 }, { "epoch": 0.4193405713045873, "grad_norm": 0.6306349701485732, "learning_rate": 3.0887527315650596e-06, "loss": 0.0206, "step": 100500 }, { "epoch": 0.4193614340195776, "grad_norm": 1.04430388801023, "learning_rate": 3.088675899022367e-06, "loss": 0.0269, "step": 100505 }, { "epoch": 0.41938229673456784, "grad_norm": 0.7098888413768457, "learning_rate": 3.0885990722130038e-06, "loss": 0.0254, "step": 100510 }, { "epoch": 0.41940315944955814, "grad_norm": 0.9053053796381768, "learning_rate": 3.088522251136257e-06, "loss": 0.0206, "step": 100515 }, { "epoch": 0.4194240221645484, "grad_norm": 1.3137766784431526, "learning_rate": 3.088445435791415e-06, "loss": 0.0299, "step": 100520 }, { "epoch": 0.4194448848795387, "grad_norm": 1.1521372019976794, "learning_rate": 3.0883686261777633e-06, "loss": 0.0287, "step": 100525 }, { "epoch": 0.419465747594529, "grad_norm": 0.40797351519514896, "learning_rate": 3.0882918222945908e-06, "loss": 0.0311, "step": 100530 }, { "epoch": 0.4194866103095192, "grad_norm": 0.9300910444613, "learning_rate": 3.088215024141184e-06, "loss": 0.0334, "step": 100535 }, { "epoch": 0.4195074730245095, "grad_norm": 1.0171462447646518, "learning_rate": 3.08813823171683e-06, "loss": 0.0273, "step": 100540 }, { "epoch": 0.41952833573949977, "grad_norm": 2.151577604942526, "learning_rate": 3.088061445020818e-06, "loss": 0.0308, "step": 100545 }, { "epoch": 0.41954919845449007, "grad_norm": 0.6200896470483562, "learning_rate": 3.0879846640524345e-06, "loss": 0.0248, "step": 100550 }, { "epoch": 0.41957006116948037, "grad_norm": 0.6665255777110489, "learning_rate": 3.087907888810969e-06, "loss": 0.0272, "step": 100555 }, { "epoch": 0.4195909238844706, "grad_norm": 0.6182245031818945, "learning_rate": 3.087831119295708e-06, "loss": 0.0219, "step": 100560 }, { "epoch": 0.4196117865994609, "grad_norm": 0.879108979603021, "learning_rate": 3.087754355505941e-06, "loss": 0.0263, "step": 100565 }, { "epoch": 0.4196326493144512, "grad_norm": 0.7057774820999949, "learning_rate": 3.0876775974409552e-06, "loss": 0.0277, "step": 100570 }, { "epoch": 0.41965351202944146, "grad_norm": 0.7431453210409377, "learning_rate": 3.08760084510004e-06, "loss": 0.0192, "step": 100575 }, { "epoch": 0.41967437474443176, "grad_norm": 0.6600116334891655, "learning_rate": 3.0875240984824827e-06, "loss": 0.0206, "step": 100580 }, { "epoch": 0.419695237459422, "grad_norm": 0.648252430732217, "learning_rate": 3.087447357587574e-06, "loss": 0.0327, "step": 100585 }, { "epoch": 0.4197161001744123, "grad_norm": 0.5553410192831695, "learning_rate": 3.0873706224146017e-06, "loss": 0.0315, "step": 100590 }, { "epoch": 0.4197369628894026, "grad_norm": 1.7461465153872675, "learning_rate": 3.087293892962855e-06, "loss": 0.0331, "step": 100595 }, { "epoch": 0.41975782560439284, "grad_norm": 0.7306330617035834, "learning_rate": 3.087217169231622e-06, "loss": 0.0243, "step": 100600 }, { "epoch": 0.41977868831938314, "grad_norm": 0.894719519672927, "learning_rate": 3.0871404512201938e-06, "loss": 0.0313, "step": 100605 }, { "epoch": 0.4197995510343734, "grad_norm": 0.4969137199243913, "learning_rate": 3.0870637389278573e-06, "loss": 0.0277, "step": 100610 }, { "epoch": 0.4198204137493637, "grad_norm": 0.7512474679842371, "learning_rate": 3.086987032353904e-06, "loss": 0.0208, "step": 100615 }, { "epoch": 0.419841276464354, "grad_norm": 0.38212593035869824, "learning_rate": 3.0869103314976233e-06, "loss": 0.022, "step": 100620 }, { "epoch": 0.41986213917934423, "grad_norm": 0.4515222703666263, "learning_rate": 3.086833636358304e-06, "loss": 0.0249, "step": 100625 }, { "epoch": 0.41988300189433453, "grad_norm": 0.9312911673472247, "learning_rate": 3.0867569469352367e-06, "loss": 0.0352, "step": 100630 }, { "epoch": 0.4199038646093248, "grad_norm": 0.9478695470838143, "learning_rate": 3.0866802632277105e-06, "loss": 0.0304, "step": 100635 }, { "epoch": 0.4199247273243151, "grad_norm": 0.8102201767261303, "learning_rate": 3.086603585235016e-06, "loss": 0.0316, "step": 100640 }, { "epoch": 0.41994559003930537, "grad_norm": 0.9686318449623769, "learning_rate": 3.086526912956444e-06, "loss": 0.0346, "step": 100645 }, { "epoch": 0.4199664527542956, "grad_norm": 1.0336886598398447, "learning_rate": 3.0864502463912845e-06, "loss": 0.0289, "step": 100650 }, { "epoch": 0.4199873154692859, "grad_norm": 0.6939475749767541, "learning_rate": 3.0863735855388273e-06, "loss": 0.0216, "step": 100655 }, { "epoch": 0.4200081781842762, "grad_norm": 0.8736602532382635, "learning_rate": 3.0862969303983626e-06, "loss": 0.0257, "step": 100660 }, { "epoch": 0.42002904089926646, "grad_norm": 0.6813583651192032, "learning_rate": 3.086220280969183e-06, "loss": 0.0347, "step": 100665 }, { "epoch": 0.42004990361425676, "grad_norm": 0.7134480376212013, "learning_rate": 3.0861436372505775e-06, "loss": 0.0323, "step": 100670 }, { "epoch": 0.420070766329247, "grad_norm": 1.3826420780596282, "learning_rate": 3.0860669992418383e-06, "loss": 0.0192, "step": 100675 }, { "epoch": 0.4200916290442373, "grad_norm": 0.7586682097207076, "learning_rate": 3.0859903669422557e-06, "loss": 0.0242, "step": 100680 }, { "epoch": 0.4201124917592276, "grad_norm": 0.7690903070730377, "learning_rate": 3.0859137403511212e-06, "loss": 0.0287, "step": 100685 }, { "epoch": 0.42013335447421785, "grad_norm": 0.6063946649015165, "learning_rate": 3.0858371194677263e-06, "loss": 0.027, "step": 100690 }, { "epoch": 0.42015421718920815, "grad_norm": 0.7768860535104751, "learning_rate": 3.085760504291362e-06, "loss": 0.024, "step": 100695 }, { "epoch": 0.4201750799041984, "grad_norm": 0.9811692456896572, "learning_rate": 3.08568389482132e-06, "loss": 0.025, "step": 100700 }, { "epoch": 0.4201959426191887, "grad_norm": 0.6356008368932671, "learning_rate": 3.0856072910568923e-06, "loss": 0.021, "step": 100705 }, { "epoch": 0.420216805334179, "grad_norm": 0.3857557991173456, "learning_rate": 3.08553069299737e-06, "loss": 0.0352, "step": 100710 }, { "epoch": 0.42023766804916923, "grad_norm": 0.8546524499831752, "learning_rate": 3.0854541006420458e-06, "loss": 0.0265, "step": 100715 }, { "epoch": 0.42025853076415953, "grad_norm": 0.6252289968241816, "learning_rate": 3.0853775139902114e-06, "loss": 0.0265, "step": 100720 }, { "epoch": 0.4202793934791498, "grad_norm": 0.4119699784383522, "learning_rate": 3.0853009330411593e-06, "loss": 0.0207, "step": 100725 }, { "epoch": 0.4203002561941401, "grad_norm": 0.8945614377942518, "learning_rate": 3.085224357794181e-06, "loss": 0.028, "step": 100730 }, { "epoch": 0.4203211189091304, "grad_norm": 0.8270476267744868, "learning_rate": 3.08514778824857e-06, "loss": 0.0207, "step": 100735 }, { "epoch": 0.4203419816241206, "grad_norm": 1.1654444854822863, "learning_rate": 3.085071224403618e-06, "loss": 0.0271, "step": 100740 }, { "epoch": 0.4203628443391109, "grad_norm": 1.2294518922050903, "learning_rate": 3.084994666258618e-06, "loss": 0.0244, "step": 100745 }, { "epoch": 0.4203837070541012, "grad_norm": 0.7337457747168519, "learning_rate": 3.084918113812863e-06, "loss": 0.0173, "step": 100750 }, { "epoch": 0.42040456976909146, "grad_norm": 0.4854577618926311, "learning_rate": 3.0848415670656453e-06, "loss": 0.0272, "step": 100755 }, { "epoch": 0.42042543248408176, "grad_norm": 0.6913038626608429, "learning_rate": 3.084765026016259e-06, "loss": 0.0363, "step": 100760 }, { "epoch": 0.420446295199072, "grad_norm": 0.830514651138814, "learning_rate": 3.0846884906639957e-06, "loss": 0.026, "step": 100765 }, { "epoch": 0.4204671579140623, "grad_norm": 0.9095323219431976, "learning_rate": 3.08461196100815e-06, "loss": 0.0221, "step": 100770 }, { "epoch": 0.4204880206290526, "grad_norm": 0.3586239195888796, "learning_rate": 3.0845354370480145e-06, "loss": 0.0173, "step": 100775 }, { "epoch": 0.42050888334404285, "grad_norm": 0.936900439615342, "learning_rate": 3.0844589187828835e-06, "loss": 0.0301, "step": 100780 }, { "epoch": 0.42052974605903315, "grad_norm": 1.3167448078353112, "learning_rate": 3.0843824062120504e-06, "loss": 0.0283, "step": 100785 }, { "epoch": 0.4205506087740234, "grad_norm": 0.44423676163268444, "learning_rate": 3.0843058993348086e-06, "loss": 0.0295, "step": 100790 }, { "epoch": 0.4205714714890137, "grad_norm": 0.5479460356439616, "learning_rate": 3.0842293981504527e-06, "loss": 0.0272, "step": 100795 }, { "epoch": 0.420592334204004, "grad_norm": 1.0276447352936902, "learning_rate": 3.0841529026582756e-06, "loss": 0.0273, "step": 100800 }, { "epoch": 0.42061319691899424, "grad_norm": 0.9174667824968415, "learning_rate": 3.0840764128575724e-06, "loss": 0.0303, "step": 100805 }, { "epoch": 0.42063405963398454, "grad_norm": 0.5010617052742034, "learning_rate": 3.0839999287476376e-06, "loss": 0.0223, "step": 100810 }, { "epoch": 0.4206549223489748, "grad_norm": 0.7594376881768321, "learning_rate": 3.0839234503277644e-06, "loss": 0.0265, "step": 100815 }, { "epoch": 0.4206757850639651, "grad_norm": 0.77393004193502, "learning_rate": 3.083846977597248e-06, "loss": 0.0286, "step": 100820 }, { "epoch": 0.4206966477789554, "grad_norm": 1.331387512871363, "learning_rate": 3.0837705105553834e-06, "loss": 0.0317, "step": 100825 }, { "epoch": 0.4207175104939456, "grad_norm": 0.585165019998581, "learning_rate": 3.0836940492014644e-06, "loss": 0.0238, "step": 100830 }, { "epoch": 0.4207383732089359, "grad_norm": 0.40772995002786616, "learning_rate": 3.083617593534787e-06, "loss": 0.0266, "step": 100835 }, { "epoch": 0.4207592359239262, "grad_norm": 0.8525195883353339, "learning_rate": 3.0835411435546453e-06, "loss": 0.0235, "step": 100840 }, { "epoch": 0.42078009863891647, "grad_norm": 1.340000655039261, "learning_rate": 3.083464699260335e-06, "loss": 0.0231, "step": 100845 }, { "epoch": 0.42080096135390677, "grad_norm": 1.0683172114134387, "learning_rate": 3.0833882606511504e-06, "loss": 0.0321, "step": 100850 }, { "epoch": 0.420821824068897, "grad_norm": 0.6285337806096793, "learning_rate": 3.0833118277263884e-06, "loss": 0.0319, "step": 100855 }, { "epoch": 0.4208426867838873, "grad_norm": 0.7403506510794404, "learning_rate": 3.0832354004853436e-06, "loss": 0.0337, "step": 100860 }, { "epoch": 0.4208635494988776, "grad_norm": 0.40692183306554464, "learning_rate": 3.0831589789273114e-06, "loss": 0.0218, "step": 100865 }, { "epoch": 0.42088441221386785, "grad_norm": 0.8941366270300898, "learning_rate": 3.0830825630515883e-06, "loss": 0.0309, "step": 100870 }, { "epoch": 0.42090527492885815, "grad_norm": 0.9075699735237843, "learning_rate": 3.083006152857469e-06, "loss": 0.0376, "step": 100875 }, { "epoch": 0.4209261376438484, "grad_norm": 0.837980152353976, "learning_rate": 3.082929748344251e-06, "loss": 0.0393, "step": 100880 }, { "epoch": 0.4209470003588387, "grad_norm": 1.1076351480187516, "learning_rate": 3.082853349511229e-06, "loss": 0.0273, "step": 100885 }, { "epoch": 0.420967863073829, "grad_norm": 0.6444859634567967, "learning_rate": 3.0827769563576997e-06, "loss": 0.0221, "step": 100890 }, { "epoch": 0.42098872578881924, "grad_norm": 1.2413792147434681, "learning_rate": 3.08270056888296e-06, "loss": 0.0263, "step": 100895 }, { "epoch": 0.42100958850380954, "grad_norm": 0.42529632403763906, "learning_rate": 3.082624187086305e-06, "loss": 0.0259, "step": 100900 }, { "epoch": 0.4210304512187998, "grad_norm": 1.0260167162055414, "learning_rate": 3.082547810967033e-06, "loss": 0.0258, "step": 100905 }, { "epoch": 0.4210513139337901, "grad_norm": 0.720826957251634, "learning_rate": 3.0824714405244395e-06, "loss": 0.0231, "step": 100910 }, { "epoch": 0.4210721766487804, "grad_norm": 0.7987294017150478, "learning_rate": 3.082395075757822e-06, "loss": 0.0292, "step": 100915 }, { "epoch": 0.4210930393637706, "grad_norm": 1.6119153489721847, "learning_rate": 3.082318716666477e-06, "loss": 0.0335, "step": 100920 }, { "epoch": 0.4211139020787609, "grad_norm": 0.8101198887141517, "learning_rate": 3.082242363249702e-06, "loss": 0.0323, "step": 100925 }, { "epoch": 0.4211347647937512, "grad_norm": 0.5124096110956213, "learning_rate": 3.0821660155067938e-06, "loss": 0.0222, "step": 100930 }, { "epoch": 0.42115562750874147, "grad_norm": 0.5857842439703653, "learning_rate": 3.0820896734370497e-06, "loss": 0.0259, "step": 100935 }, { "epoch": 0.42117649022373177, "grad_norm": 1.0768614845804716, "learning_rate": 3.0820133370397677e-06, "loss": 0.0361, "step": 100940 }, { "epoch": 0.421197352938722, "grad_norm": 0.9047971811009351, "learning_rate": 3.0819370063142444e-06, "loss": 0.026, "step": 100945 }, { "epoch": 0.4212182156537123, "grad_norm": 0.8645814744865271, "learning_rate": 3.0818606812597783e-06, "loss": 0.0349, "step": 100950 }, { "epoch": 0.4212390783687026, "grad_norm": 0.558412784563018, "learning_rate": 3.081784361875667e-06, "loss": 0.0219, "step": 100955 }, { "epoch": 0.42125994108369286, "grad_norm": 0.8904515129449664, "learning_rate": 3.0817080481612086e-06, "loss": 0.0237, "step": 100960 }, { "epoch": 0.42128080379868316, "grad_norm": 1.2151960772802113, "learning_rate": 3.0816317401157007e-06, "loss": 0.0188, "step": 100965 }, { "epoch": 0.4213016665136734, "grad_norm": 1.2616618717966923, "learning_rate": 3.081555437738442e-06, "loss": 0.0298, "step": 100970 }, { "epoch": 0.4213225292286637, "grad_norm": 1.159579523470856, "learning_rate": 3.08147914102873e-06, "loss": 0.0258, "step": 100975 }, { "epoch": 0.421343391943654, "grad_norm": 0.5744735855810946, "learning_rate": 3.081402849985865e-06, "loss": 0.0244, "step": 100980 }, { "epoch": 0.42136425465864424, "grad_norm": 0.9568592562493807, "learning_rate": 3.0813265646091427e-06, "loss": 0.0295, "step": 100985 }, { "epoch": 0.42138511737363454, "grad_norm": 1.1556174929984318, "learning_rate": 3.0812502848978644e-06, "loss": 0.0365, "step": 100990 }, { "epoch": 0.4214059800886248, "grad_norm": 0.4511936133285564, "learning_rate": 3.081174010851327e-06, "loss": 0.0271, "step": 100995 }, { "epoch": 0.4214268428036151, "grad_norm": 0.5279252916891047, "learning_rate": 3.0810977424688303e-06, "loss": 0.03, "step": 101000 }, { "epoch": 0.4214477055186054, "grad_norm": 0.4492592382212531, "learning_rate": 3.0810214797496736e-06, "loss": 0.0256, "step": 101005 }, { "epoch": 0.42146856823359563, "grad_norm": 0.9207216823955526, "learning_rate": 3.0809452226931545e-06, "loss": 0.0318, "step": 101010 }, { "epoch": 0.42148943094858593, "grad_norm": 0.9685958987765682, "learning_rate": 3.080868971298574e-06, "loss": 0.0281, "step": 101015 }, { "epoch": 0.42151029366357623, "grad_norm": 0.6697111807332612, "learning_rate": 3.080792725565231e-06, "loss": 0.0293, "step": 101020 }, { "epoch": 0.4215311563785665, "grad_norm": 0.6654384690420284, "learning_rate": 3.0807164854924247e-06, "loss": 0.0233, "step": 101025 }, { "epoch": 0.4215520190935568, "grad_norm": 0.5669642462876653, "learning_rate": 3.080640251079455e-06, "loss": 0.0174, "step": 101030 }, { "epoch": 0.421572881808547, "grad_norm": 0.38586700655497375, "learning_rate": 3.080564022325622e-06, "loss": 0.0166, "step": 101035 }, { "epoch": 0.4215937445235373, "grad_norm": 0.751962751147103, "learning_rate": 3.080487799230224e-06, "loss": 0.0274, "step": 101040 }, { "epoch": 0.4216146072385276, "grad_norm": 1.2722160556871427, "learning_rate": 3.080411581792563e-06, "loss": 0.0317, "step": 101045 }, { "epoch": 0.42163546995351786, "grad_norm": 1.184363508125795, "learning_rate": 3.080335370011938e-06, "loss": 0.0362, "step": 101050 }, { "epoch": 0.42165633266850816, "grad_norm": 0.9428454781822191, "learning_rate": 3.0802591638876488e-06, "loss": 0.0293, "step": 101055 }, { "epoch": 0.4216771953834984, "grad_norm": 1.1316191990340032, "learning_rate": 3.0801829634189966e-06, "loss": 0.0269, "step": 101060 }, { "epoch": 0.4216980580984887, "grad_norm": 0.3815413529480524, "learning_rate": 3.0801067686052825e-06, "loss": 0.0203, "step": 101065 }, { "epoch": 0.421718920813479, "grad_norm": 0.5546131786604205, "learning_rate": 3.080030579445806e-06, "loss": 0.0252, "step": 101070 }, { "epoch": 0.42173978352846925, "grad_norm": 0.5569542404403303, "learning_rate": 3.079954395939868e-06, "loss": 0.0229, "step": 101075 }, { "epoch": 0.42176064624345955, "grad_norm": 0.5781989994074829, "learning_rate": 3.079878218086769e-06, "loss": 0.0328, "step": 101080 }, { "epoch": 0.4217815089584498, "grad_norm": 0.5998657159641921, "learning_rate": 3.0798020458858112e-06, "loss": 0.0182, "step": 101085 }, { "epoch": 0.4218023716734401, "grad_norm": 0.8004695227215063, "learning_rate": 3.0797258793362944e-06, "loss": 0.0299, "step": 101090 }, { "epoch": 0.4218232343884304, "grad_norm": 0.8376845917700779, "learning_rate": 3.079649718437521e-06, "loss": 0.0233, "step": 101095 }, { "epoch": 0.42184409710342063, "grad_norm": 0.9855694984537445, "learning_rate": 3.0795735631887914e-06, "loss": 0.0261, "step": 101100 }, { "epoch": 0.42186495981841093, "grad_norm": 1.1401021271242378, "learning_rate": 3.0794974135894068e-06, "loss": 0.0186, "step": 101105 }, { "epoch": 0.42188582253340123, "grad_norm": 0.642423141096252, "learning_rate": 3.07942126963867e-06, "loss": 0.025, "step": 101110 }, { "epoch": 0.4219066852483915, "grad_norm": 0.8452386799575293, "learning_rate": 3.0793451313358814e-06, "loss": 0.0331, "step": 101115 }, { "epoch": 0.4219275479633818, "grad_norm": 0.641538466795363, "learning_rate": 3.0792689986803435e-06, "loss": 0.0266, "step": 101120 }, { "epoch": 0.421948410678372, "grad_norm": 0.5239499820126159, "learning_rate": 3.079192871671358e-06, "loss": 0.0309, "step": 101125 }, { "epoch": 0.4219692733933623, "grad_norm": 0.7557505409813184, "learning_rate": 3.079116750308227e-06, "loss": 0.0259, "step": 101130 }, { "epoch": 0.4219901361083526, "grad_norm": 0.6953523300697761, "learning_rate": 3.0790406345902535e-06, "loss": 0.023, "step": 101135 }, { "epoch": 0.42201099882334286, "grad_norm": 0.7823968947240595, "learning_rate": 3.078964524516738e-06, "loss": 0.0277, "step": 101140 }, { "epoch": 0.42203186153833316, "grad_norm": 0.9603559734312186, "learning_rate": 3.0788884200869846e-06, "loss": 0.0228, "step": 101145 }, { "epoch": 0.4220527242533234, "grad_norm": 1.3303324783254629, "learning_rate": 3.0788123213002945e-06, "loss": 0.0322, "step": 101150 }, { "epoch": 0.4220735869683137, "grad_norm": 1.7126788418775842, "learning_rate": 3.0787362281559714e-06, "loss": 0.0358, "step": 101155 }, { "epoch": 0.422094449683304, "grad_norm": 0.6619698895290089, "learning_rate": 3.078660140653318e-06, "loss": 0.0285, "step": 101160 }, { "epoch": 0.42211531239829425, "grad_norm": 0.8461482578257192, "learning_rate": 3.078584058791636e-06, "loss": 0.0345, "step": 101165 }, { "epoch": 0.42213617511328455, "grad_norm": 0.9970000855632888, "learning_rate": 3.078507982570231e-06, "loss": 0.0294, "step": 101170 }, { "epoch": 0.4221570378282748, "grad_norm": 0.8921508143793669, "learning_rate": 3.0784319119884026e-06, "loss": 0.0228, "step": 101175 }, { "epoch": 0.4221779005432651, "grad_norm": 1.237174258797913, "learning_rate": 3.0783558470454565e-06, "loss": 0.0314, "step": 101180 }, { "epoch": 0.4221987632582554, "grad_norm": 0.6216703705787434, "learning_rate": 3.0782797877406963e-06, "loss": 0.0278, "step": 101185 }, { "epoch": 0.42221962597324564, "grad_norm": 0.5817971949476579, "learning_rate": 3.078203734073424e-06, "loss": 0.0229, "step": 101190 }, { "epoch": 0.42224048868823594, "grad_norm": 0.8647011950025875, "learning_rate": 3.078127686042944e-06, "loss": 0.0204, "step": 101195 }, { "epoch": 0.42226135140322624, "grad_norm": 0.6171112876921862, "learning_rate": 3.07805164364856e-06, "loss": 0.0341, "step": 101200 }, { "epoch": 0.4222822141182165, "grad_norm": 0.8878581659374255, "learning_rate": 3.0779756068895754e-06, "loss": 0.0182, "step": 101205 }, { "epoch": 0.4223030768332068, "grad_norm": 0.7701198439552831, "learning_rate": 3.077899575765295e-06, "loss": 0.0297, "step": 101210 }, { "epoch": 0.422323939548197, "grad_norm": 0.6652393457507848, "learning_rate": 3.0778235502750224e-06, "loss": 0.0294, "step": 101215 }, { "epoch": 0.4223448022631873, "grad_norm": 1.180270034114519, "learning_rate": 3.077747530418062e-06, "loss": 0.0389, "step": 101220 }, { "epoch": 0.4223656649781776, "grad_norm": 0.7443118361543593, "learning_rate": 3.0776715161937175e-06, "loss": 0.029, "step": 101225 }, { "epoch": 0.42238652769316787, "grad_norm": 1.2181208199034403, "learning_rate": 3.0775955076012946e-06, "loss": 0.0184, "step": 101230 }, { "epoch": 0.42240739040815817, "grad_norm": 1.9413078109459434, "learning_rate": 3.0775195046400966e-06, "loss": 0.0271, "step": 101235 }, { "epoch": 0.4224282531231484, "grad_norm": 0.5555953853081605, "learning_rate": 3.0774435073094295e-06, "loss": 0.0265, "step": 101240 }, { "epoch": 0.4224491158381387, "grad_norm": 0.5609522695961388, "learning_rate": 3.0773675156085974e-06, "loss": 0.02, "step": 101245 }, { "epoch": 0.422469978553129, "grad_norm": 0.7634987714286079, "learning_rate": 3.0772915295369048e-06, "loss": 0.0194, "step": 101250 }, { "epoch": 0.42249084126811925, "grad_norm": 1.1868949094093073, "learning_rate": 3.0772155490936576e-06, "loss": 0.0267, "step": 101255 }, { "epoch": 0.42251170398310955, "grad_norm": 0.39123827417785473, "learning_rate": 3.07713957427816e-06, "loss": 0.0199, "step": 101260 }, { "epoch": 0.4225325666980998, "grad_norm": 1.1174100206462203, "learning_rate": 3.077063605089719e-06, "loss": 0.0292, "step": 101265 }, { "epoch": 0.4225534294130901, "grad_norm": 1.1579780112559674, "learning_rate": 3.0769876415276383e-06, "loss": 0.0406, "step": 101270 }, { "epoch": 0.4225742921280804, "grad_norm": 1.3866475109836283, "learning_rate": 3.076911683591224e-06, "loss": 0.0228, "step": 101275 }, { "epoch": 0.42259515484307064, "grad_norm": 0.3868682877495288, "learning_rate": 3.076835731279782e-06, "loss": 0.0254, "step": 101280 }, { "epoch": 0.42261601755806094, "grad_norm": 0.7612941244522778, "learning_rate": 3.076759784592618e-06, "loss": 0.0255, "step": 101285 }, { "epoch": 0.42263688027305124, "grad_norm": 0.6927747142243071, "learning_rate": 3.0766838435290374e-06, "loss": 0.0206, "step": 101290 }, { "epoch": 0.4226577429880415, "grad_norm": 0.5776089703615841, "learning_rate": 3.076607908088347e-06, "loss": 0.0117, "step": 101295 }, { "epoch": 0.4226786057030318, "grad_norm": 1.1159336609039856, "learning_rate": 3.0765319782698527e-06, "loss": 0.0295, "step": 101300 }, { "epoch": 0.42269946841802203, "grad_norm": 0.8979556944717364, "learning_rate": 3.0764560540728606e-06, "loss": 0.0301, "step": 101305 }, { "epoch": 0.42272033113301233, "grad_norm": 0.8141694265287558, "learning_rate": 3.076380135496677e-06, "loss": 0.0263, "step": 101310 }, { "epoch": 0.4227411938480026, "grad_norm": 0.6095321136977869, "learning_rate": 3.0763042225406086e-06, "loss": 0.0243, "step": 101315 }, { "epoch": 0.42276205656299287, "grad_norm": 0.5557375935855134, "learning_rate": 3.076228315203962e-06, "loss": 0.0286, "step": 101320 }, { "epoch": 0.42278291927798317, "grad_norm": 0.5248482681844637, "learning_rate": 3.0761524134860444e-06, "loss": 0.024, "step": 101325 }, { "epoch": 0.4228037819929734, "grad_norm": 0.3781828792733262, "learning_rate": 3.076076517386162e-06, "loss": 0.0193, "step": 101330 }, { "epoch": 0.4228246447079637, "grad_norm": 1.1748747757142897, "learning_rate": 3.076000626903621e-06, "loss": 0.0316, "step": 101335 }, { "epoch": 0.422845507422954, "grad_norm": 0.6229738513198182, "learning_rate": 3.075924742037731e-06, "loss": 0.0273, "step": 101340 }, { "epoch": 0.42286637013794426, "grad_norm": 0.628247931271165, "learning_rate": 3.075848862787796e-06, "loss": 0.0208, "step": 101345 }, { "epoch": 0.42288723285293456, "grad_norm": 0.4812761272117581, "learning_rate": 3.075772989153126e-06, "loss": 0.0254, "step": 101350 }, { "epoch": 0.4229080955679248, "grad_norm": 0.7022328351136279, "learning_rate": 3.075697121133027e-06, "loss": 0.0211, "step": 101355 }, { "epoch": 0.4229289582829151, "grad_norm": 0.6993609393093608, "learning_rate": 3.075621258726808e-06, "loss": 0.0377, "step": 101360 }, { "epoch": 0.4229498209979054, "grad_norm": 0.9740113837731827, "learning_rate": 3.0755454019337753e-06, "loss": 0.0226, "step": 101365 }, { "epoch": 0.42297068371289565, "grad_norm": 1.341566900491652, "learning_rate": 3.0754695507532367e-06, "loss": 0.0392, "step": 101370 }, { "epoch": 0.42299154642788594, "grad_norm": 0.6635496075319653, "learning_rate": 3.0753937051845013e-06, "loss": 0.0289, "step": 101375 }, { "epoch": 0.42301240914287624, "grad_norm": 0.8827371006255408, "learning_rate": 3.075317865226876e-06, "loss": 0.0356, "step": 101380 }, { "epoch": 0.4230332718578665, "grad_norm": 0.9985729402287791, "learning_rate": 3.075242030879669e-06, "loss": 0.0284, "step": 101385 }, { "epoch": 0.4230541345728568, "grad_norm": 0.5551681248817255, "learning_rate": 3.0751662021421904e-06, "loss": 0.0235, "step": 101390 }, { "epoch": 0.42307499728784703, "grad_norm": 0.8990496727196005, "learning_rate": 3.0750903790137464e-06, "loss": 0.0272, "step": 101395 }, { "epoch": 0.42309586000283733, "grad_norm": 0.6410644995858955, "learning_rate": 3.0750145614936465e-06, "loss": 0.0177, "step": 101400 }, { "epoch": 0.42311672271782763, "grad_norm": 0.3063371268980109, "learning_rate": 3.0749387495811995e-06, "loss": 0.022, "step": 101405 }, { "epoch": 0.4231375854328179, "grad_norm": 0.6923737487725887, "learning_rate": 3.074862943275714e-06, "loss": 0.0245, "step": 101410 }, { "epoch": 0.4231584481478082, "grad_norm": 0.6524024447014236, "learning_rate": 3.074787142576498e-06, "loss": 0.024, "step": 101415 }, { "epoch": 0.4231793108627984, "grad_norm": 1.3351215219771309, "learning_rate": 3.0747113474828623e-06, "loss": 0.0259, "step": 101420 }, { "epoch": 0.4232001735777887, "grad_norm": 0.9602346285473125, "learning_rate": 3.074635557994115e-06, "loss": 0.0255, "step": 101425 }, { "epoch": 0.423221036292779, "grad_norm": 0.5404775322329834, "learning_rate": 3.0745597741095647e-06, "loss": 0.0282, "step": 101430 }, { "epoch": 0.42324189900776926, "grad_norm": 0.4018960314706051, "learning_rate": 3.0744839958285223e-06, "loss": 0.0279, "step": 101435 }, { "epoch": 0.42326276172275956, "grad_norm": 0.34818389375841713, "learning_rate": 3.074408223150296e-06, "loss": 0.0144, "step": 101440 }, { "epoch": 0.4232836244377498, "grad_norm": 0.43667983832340934, "learning_rate": 3.074332456074196e-06, "loss": 0.0218, "step": 101445 }, { "epoch": 0.4233044871527401, "grad_norm": 0.8599192028394507, "learning_rate": 3.074256694599532e-06, "loss": 0.0201, "step": 101450 }, { "epoch": 0.4233253498677304, "grad_norm": 0.8112830568327765, "learning_rate": 3.074180938725613e-06, "loss": 0.0302, "step": 101455 }, { "epoch": 0.42334621258272065, "grad_norm": 0.47217074676379284, "learning_rate": 3.0741051884517507e-06, "loss": 0.0172, "step": 101460 }, { "epoch": 0.42336707529771095, "grad_norm": 0.7341344773781614, "learning_rate": 3.074029443777254e-06, "loss": 0.0239, "step": 101465 }, { "epoch": 0.42338793801270125, "grad_norm": 0.9174832825348828, "learning_rate": 3.0739537047014334e-06, "loss": 0.0239, "step": 101470 }, { "epoch": 0.4234088007276915, "grad_norm": 0.9534950158637379, "learning_rate": 3.073877971223599e-06, "loss": 0.0323, "step": 101475 }, { "epoch": 0.4234296634426818, "grad_norm": 0.6401215043338281, "learning_rate": 3.0738022433430614e-06, "loss": 0.0265, "step": 101480 }, { "epoch": 0.42345052615767204, "grad_norm": 0.5907206620215909, "learning_rate": 3.0737265210591312e-06, "loss": 0.0293, "step": 101485 }, { "epoch": 0.42347138887266234, "grad_norm": 0.328982528301495, "learning_rate": 3.0736508043711188e-06, "loss": 0.0264, "step": 101490 }, { "epoch": 0.42349225158765263, "grad_norm": 0.8093306331377269, "learning_rate": 3.0735750932783353e-06, "loss": 0.0293, "step": 101495 }, { "epoch": 0.4235131143026429, "grad_norm": 0.6195939115198593, "learning_rate": 3.073499387780092e-06, "loss": 0.0267, "step": 101500 }, { "epoch": 0.4235339770176332, "grad_norm": 0.8238004974473091, "learning_rate": 3.073423687875699e-06, "loss": 0.0282, "step": 101505 }, { "epoch": 0.4235548397326234, "grad_norm": 0.749301545329864, "learning_rate": 3.0733479935644676e-06, "loss": 0.0256, "step": 101510 }, { "epoch": 0.4235757024476137, "grad_norm": 0.269441943955357, "learning_rate": 3.07327230484571e-06, "loss": 0.0282, "step": 101515 }, { "epoch": 0.423596565162604, "grad_norm": 0.27163963925434303, "learning_rate": 3.073196621718737e-06, "loss": 0.0208, "step": 101520 }, { "epoch": 0.42361742787759427, "grad_norm": 1.860167134852123, "learning_rate": 3.07312094418286e-06, "loss": 0.0247, "step": 101525 }, { "epoch": 0.42363829059258457, "grad_norm": 0.18145732840692705, "learning_rate": 3.073045272237391e-06, "loss": 0.0167, "step": 101530 }, { "epoch": 0.4236591533075748, "grad_norm": 0.6582938761361766, "learning_rate": 3.0729696058816412e-06, "loss": 0.0251, "step": 101535 }, { "epoch": 0.4236800160225651, "grad_norm": 0.45926552129827697, "learning_rate": 3.072893945114922e-06, "loss": 0.0235, "step": 101540 }, { "epoch": 0.4237008787375554, "grad_norm": 1.071561202227094, "learning_rate": 3.072818289936547e-06, "loss": 0.0255, "step": 101545 }, { "epoch": 0.42372174145254565, "grad_norm": 1.7843330493548941, "learning_rate": 3.072742640345828e-06, "loss": 0.0269, "step": 101550 }, { "epoch": 0.42374260416753595, "grad_norm": 0.3478105626565747, "learning_rate": 3.072666996342076e-06, "loss": 0.0241, "step": 101555 }, { "epoch": 0.42376346688252625, "grad_norm": 0.8243667959617799, "learning_rate": 3.0725913579246037e-06, "loss": 0.0197, "step": 101560 }, { "epoch": 0.4237843295975165, "grad_norm": 4.151006858688913, "learning_rate": 3.072515725092724e-06, "loss": 0.028, "step": 101565 }, { "epoch": 0.4238051923125068, "grad_norm": 0.7312310150347006, "learning_rate": 3.0724400978457498e-06, "loss": 0.0264, "step": 101570 }, { "epoch": 0.42382605502749704, "grad_norm": 1.019178874821627, "learning_rate": 3.072364476182993e-06, "loss": 0.0222, "step": 101575 }, { "epoch": 0.42384691774248734, "grad_norm": 1.5878571075582368, "learning_rate": 3.0722888601037667e-06, "loss": 0.0353, "step": 101580 }, { "epoch": 0.42386778045747764, "grad_norm": 1.1960324228104156, "learning_rate": 3.0722132496073837e-06, "loss": 0.0356, "step": 101585 }, { "epoch": 0.4238886431724679, "grad_norm": 1.7120596481517965, "learning_rate": 3.072137644693158e-06, "loss": 0.0349, "step": 101590 }, { "epoch": 0.4239095058874582, "grad_norm": 0.6290514294185198, "learning_rate": 3.072062045360401e-06, "loss": 0.0282, "step": 101595 }, { "epoch": 0.4239303686024484, "grad_norm": 0.5167739012984798, "learning_rate": 3.0719864516084276e-06, "loss": 0.0269, "step": 101600 }, { "epoch": 0.4239512313174387, "grad_norm": 0.447964225630243, "learning_rate": 3.071910863436551e-06, "loss": 0.0244, "step": 101605 }, { "epoch": 0.423972094032429, "grad_norm": 1.3049216837888686, "learning_rate": 3.071835280844083e-06, "loss": 0.0262, "step": 101610 }, { "epoch": 0.42399295674741927, "grad_norm": 1.0626157825562725, "learning_rate": 3.0717597038303396e-06, "loss": 0.0193, "step": 101615 }, { "epoch": 0.42401381946240957, "grad_norm": 0.707874208002707, "learning_rate": 3.0716841323946333e-06, "loss": 0.0225, "step": 101620 }, { "epoch": 0.4240346821773998, "grad_norm": 0.5172204812190364, "learning_rate": 3.071608566536278e-06, "loss": 0.0294, "step": 101625 }, { "epoch": 0.4240555448923901, "grad_norm": 0.9100229779765381, "learning_rate": 3.071533006254589e-06, "loss": 0.0246, "step": 101630 }, { "epoch": 0.4240764076073804, "grad_norm": 0.7035848308469029, "learning_rate": 3.0714574515488783e-06, "loss": 0.0223, "step": 101635 }, { "epoch": 0.42409727032237066, "grad_norm": 0.5150448688822988, "learning_rate": 3.0713819024184615e-06, "loss": 0.025, "step": 101640 }, { "epoch": 0.42411813303736096, "grad_norm": 0.5550939284361246, "learning_rate": 3.0713063588626524e-06, "loss": 0.0245, "step": 101645 }, { "epoch": 0.42413899575235126, "grad_norm": 1.2465385650037517, "learning_rate": 3.0712308208807655e-06, "loss": 0.0397, "step": 101650 }, { "epoch": 0.4241598584673415, "grad_norm": 0.6900982623194402, "learning_rate": 3.0711552884721158e-06, "loss": 0.0193, "step": 101655 }, { "epoch": 0.4241807211823318, "grad_norm": 0.6038607446950384, "learning_rate": 3.071079761636018e-06, "loss": 0.0236, "step": 101660 }, { "epoch": 0.42420158389732204, "grad_norm": 0.7538360236482196, "learning_rate": 3.0710042403717865e-06, "loss": 0.032, "step": 101665 }, { "epoch": 0.42422244661231234, "grad_norm": 0.8426077650070118, "learning_rate": 3.070928724678736e-06, "loss": 0.0313, "step": 101670 }, { "epoch": 0.42424330932730264, "grad_norm": 0.8758463420788577, "learning_rate": 3.070853214556183e-06, "loss": 0.0344, "step": 101675 }, { "epoch": 0.4242641720422929, "grad_norm": 1.0168752813230968, "learning_rate": 3.0707777100034408e-06, "loss": 0.0389, "step": 101680 }, { "epoch": 0.4242850347572832, "grad_norm": 1.091671942339978, "learning_rate": 3.0707022110198267e-06, "loss": 0.0239, "step": 101685 }, { "epoch": 0.42430589747227343, "grad_norm": 0.5941597442652328, "learning_rate": 3.0706267176046537e-06, "loss": 0.0345, "step": 101690 }, { "epoch": 0.42432676018726373, "grad_norm": 0.33858277490193056, "learning_rate": 3.0705512297572393e-06, "loss": 0.0183, "step": 101695 }, { "epoch": 0.42434762290225403, "grad_norm": 0.832685939697118, "learning_rate": 3.070475747476898e-06, "loss": 0.0315, "step": 101700 }, { "epoch": 0.4243684856172443, "grad_norm": 1.1017304416447236, "learning_rate": 3.0704002707629465e-06, "loss": 0.0349, "step": 101705 }, { "epoch": 0.4243893483322346, "grad_norm": 0.6599799415560068, "learning_rate": 3.0703247996146994e-06, "loss": 0.0269, "step": 101710 }, { "epoch": 0.4244102110472248, "grad_norm": 0.7101860955646285, "learning_rate": 3.0702493340314743e-06, "loss": 0.0281, "step": 101715 }, { "epoch": 0.4244310737622151, "grad_norm": 0.8118085123073351, "learning_rate": 3.0701738740125862e-06, "loss": 0.0285, "step": 101720 }, { "epoch": 0.4244519364772054, "grad_norm": 0.660930266493847, "learning_rate": 3.070098419557352e-06, "loss": 0.0343, "step": 101725 }, { "epoch": 0.42447279919219566, "grad_norm": 0.5146325830253946, "learning_rate": 3.070022970665088e-06, "loss": 0.0294, "step": 101730 }, { "epoch": 0.42449366190718596, "grad_norm": 0.9671089242796471, "learning_rate": 3.0699475273351097e-06, "loss": 0.0233, "step": 101735 }, { "epoch": 0.42451452462217626, "grad_norm": 0.5772742999660001, "learning_rate": 3.0698720895667344e-06, "loss": 0.0227, "step": 101740 }, { "epoch": 0.4245353873371665, "grad_norm": 0.47951626484192666, "learning_rate": 3.0697966573592795e-06, "loss": 0.02, "step": 101745 }, { "epoch": 0.4245562500521568, "grad_norm": 0.738791544838277, "learning_rate": 3.0697212307120605e-06, "loss": 0.0267, "step": 101750 }, { "epoch": 0.42457711276714705, "grad_norm": 0.4410726360540027, "learning_rate": 3.0696458096243953e-06, "loss": 0.0297, "step": 101755 }, { "epoch": 0.42459797548213735, "grad_norm": 0.43601653728138606, "learning_rate": 3.0695703940956006e-06, "loss": 0.025, "step": 101760 }, { "epoch": 0.42461883819712765, "grad_norm": 0.899279643673631, "learning_rate": 3.0694949841249933e-06, "loss": 0.0304, "step": 101765 }, { "epoch": 0.4246397009121179, "grad_norm": 0.7142902730127987, "learning_rate": 3.0694195797118916e-06, "loss": 0.0288, "step": 101770 }, { "epoch": 0.4246605636271082, "grad_norm": 0.5837239385251565, "learning_rate": 3.069344180855612e-06, "loss": 0.0267, "step": 101775 }, { "epoch": 0.42468142634209843, "grad_norm": 0.991673468375489, "learning_rate": 3.0692687875554714e-06, "loss": 0.0224, "step": 101780 }, { "epoch": 0.42470228905708873, "grad_norm": 0.44154425069154096, "learning_rate": 3.0691933998107897e-06, "loss": 0.0257, "step": 101785 }, { "epoch": 0.42472315177207903, "grad_norm": 0.6921832895451117, "learning_rate": 3.0691180176208828e-06, "loss": 0.0228, "step": 101790 }, { "epoch": 0.4247440144870693, "grad_norm": 1.3593777288088273, "learning_rate": 3.0690426409850695e-06, "loss": 0.0214, "step": 101795 }, { "epoch": 0.4247648772020596, "grad_norm": 0.714635351486251, "learning_rate": 3.068967269902667e-06, "loss": 0.0225, "step": 101800 }, { "epoch": 0.4247857399170498, "grad_norm": 0.8457280903457927, "learning_rate": 3.068891904372994e-06, "loss": 0.0297, "step": 101805 }, { "epoch": 0.4248066026320401, "grad_norm": 0.8589898001505504, "learning_rate": 3.0688165443953688e-06, "loss": 0.0296, "step": 101810 }, { "epoch": 0.4248274653470304, "grad_norm": 0.5790282877921507, "learning_rate": 3.068741189969109e-06, "loss": 0.0287, "step": 101815 }, { "epoch": 0.42484832806202066, "grad_norm": 0.7185228284794638, "learning_rate": 3.0686658410935345e-06, "loss": 0.0285, "step": 101820 }, { "epoch": 0.42486919077701096, "grad_norm": 0.7856286486938752, "learning_rate": 3.068590497767962e-06, "loss": 0.025, "step": 101825 }, { "epoch": 0.42489005349200126, "grad_norm": 0.9438149093585317, "learning_rate": 3.0685151599917116e-06, "loss": 0.0309, "step": 101830 }, { "epoch": 0.4249109162069915, "grad_norm": 0.5933998806552918, "learning_rate": 3.068439827764102e-06, "loss": 0.025, "step": 101835 }, { "epoch": 0.4249317789219818, "grad_norm": 0.8975340074476105, "learning_rate": 3.0683645010844513e-06, "loss": 0.0265, "step": 101840 }, { "epoch": 0.42495264163697205, "grad_norm": 0.4914624601785871, "learning_rate": 3.0682891799520796e-06, "loss": 0.0251, "step": 101845 }, { "epoch": 0.42497350435196235, "grad_norm": 1.0726997987812148, "learning_rate": 3.0682138643663058e-06, "loss": 0.0278, "step": 101850 }, { "epoch": 0.42499436706695265, "grad_norm": 0.7415089037585348, "learning_rate": 3.0681385543264484e-06, "loss": 0.0232, "step": 101855 }, { "epoch": 0.4250152297819429, "grad_norm": 0.5692027275375905, "learning_rate": 3.0680632498318273e-06, "loss": 0.0223, "step": 101860 }, { "epoch": 0.4250360924969332, "grad_norm": 0.9308687738365037, "learning_rate": 3.0679879508817624e-06, "loss": 0.0254, "step": 101865 }, { "epoch": 0.42505695521192344, "grad_norm": 0.5966514892713947, "learning_rate": 3.0679126574755734e-06, "loss": 0.0288, "step": 101870 }, { "epoch": 0.42507781792691374, "grad_norm": 1.1076586257169598, "learning_rate": 3.067837369612579e-06, "loss": 0.0214, "step": 101875 }, { "epoch": 0.42509868064190404, "grad_norm": 1.0053759538956704, "learning_rate": 3.0677620872921e-06, "loss": 0.0303, "step": 101880 }, { "epoch": 0.4251195433568943, "grad_norm": 0.8773109561278409, "learning_rate": 3.0676868105134554e-06, "loss": 0.0281, "step": 101885 }, { "epoch": 0.4251404060718846, "grad_norm": 0.9500075968417526, "learning_rate": 3.067611539275967e-06, "loss": 0.029, "step": 101890 }, { "epoch": 0.4251612687868748, "grad_norm": 0.8937100767569637, "learning_rate": 3.067536273578954e-06, "loss": 0.0376, "step": 101895 }, { "epoch": 0.4251821315018651, "grad_norm": 0.7141650446855068, "learning_rate": 3.067461013421736e-06, "loss": 0.0224, "step": 101900 }, { "epoch": 0.4252029942168554, "grad_norm": 1.0478067926151065, "learning_rate": 3.0673857588036355e-06, "loss": 0.0309, "step": 101905 }, { "epoch": 0.42522385693184567, "grad_norm": 0.9912288990889393, "learning_rate": 3.0673105097239707e-06, "loss": 0.0246, "step": 101910 }, { "epoch": 0.42524471964683597, "grad_norm": 0.83357230422821, "learning_rate": 3.067235266182064e-06, "loss": 0.024, "step": 101915 }, { "epoch": 0.4252655823618262, "grad_norm": 0.9004809835215937, "learning_rate": 3.0671600281772356e-06, "loss": 0.0409, "step": 101920 }, { "epoch": 0.4252864450768165, "grad_norm": 0.4604326720003292, "learning_rate": 3.0670847957088063e-06, "loss": 0.0301, "step": 101925 }, { "epoch": 0.4253073077918068, "grad_norm": 1.5425018714741614, "learning_rate": 3.0670095687760978e-06, "loss": 0.0342, "step": 101930 }, { "epoch": 0.42532817050679705, "grad_norm": 0.5988660497911457, "learning_rate": 3.06693434737843e-06, "loss": 0.0245, "step": 101935 }, { "epoch": 0.42534903322178735, "grad_norm": 1.3213633374517963, "learning_rate": 3.066859131515125e-06, "loss": 0.0269, "step": 101940 }, { "epoch": 0.42536989593677765, "grad_norm": 1.1361371523637633, "learning_rate": 3.0667839211855045e-06, "loss": 0.0204, "step": 101945 }, { "epoch": 0.4253907586517679, "grad_norm": 0.7920976816668197, "learning_rate": 3.0667087163888894e-06, "loss": 0.0235, "step": 101950 }, { "epoch": 0.4254116213667582, "grad_norm": 0.6078619404317762, "learning_rate": 3.0666335171246014e-06, "loss": 0.0202, "step": 101955 }, { "epoch": 0.42543248408174844, "grad_norm": 0.5671275354551103, "learning_rate": 3.0665583233919625e-06, "loss": 0.017, "step": 101960 }, { "epoch": 0.42545334679673874, "grad_norm": 0.35116608087376516, "learning_rate": 3.0664831351902945e-06, "loss": 0.0252, "step": 101965 }, { "epoch": 0.42547420951172904, "grad_norm": 0.9492546631881139, "learning_rate": 3.0664079525189187e-06, "loss": 0.0247, "step": 101970 }, { "epoch": 0.4254950722267193, "grad_norm": 0.5994289367613335, "learning_rate": 3.066332775377159e-06, "loss": 0.0409, "step": 101975 }, { "epoch": 0.4255159349417096, "grad_norm": 0.8006176704919703, "learning_rate": 3.0662576037643357e-06, "loss": 0.0296, "step": 101980 }, { "epoch": 0.42553679765669983, "grad_norm": 0.7244779826233029, "learning_rate": 3.0661824376797716e-06, "loss": 0.0245, "step": 101985 }, { "epoch": 0.4255576603716901, "grad_norm": 0.8316431343122764, "learning_rate": 3.0661072771227895e-06, "loss": 0.0238, "step": 101990 }, { "epoch": 0.4255785230866804, "grad_norm": 1.9955678377540604, "learning_rate": 3.0660321220927114e-06, "loss": 0.0504, "step": 101995 }, { "epoch": 0.42559938580167067, "grad_norm": 0.767458415962172, "learning_rate": 3.065956972588861e-06, "loss": 0.0253, "step": 102000 }, { "epoch": 0.42562024851666097, "grad_norm": 1.0930483759311294, "learning_rate": 3.0658818286105603e-06, "loss": 0.0231, "step": 102005 }, { "epoch": 0.4256411112316512, "grad_norm": 1.088340496395247, "learning_rate": 3.065806690157132e-06, "loss": 0.0286, "step": 102010 }, { "epoch": 0.4256619739466415, "grad_norm": 0.6275567826979598, "learning_rate": 3.0657315572279e-06, "loss": 0.0277, "step": 102015 }, { "epoch": 0.4256828366616318, "grad_norm": 3.173919514334929, "learning_rate": 3.0656564298221865e-06, "loss": 0.0272, "step": 102020 }, { "epoch": 0.42570369937662206, "grad_norm": 0.6851933998867745, "learning_rate": 3.0655813079393155e-06, "loss": 0.0275, "step": 102025 }, { "epoch": 0.42572456209161236, "grad_norm": 0.6572699742927399, "learning_rate": 3.0655061915786093e-06, "loss": 0.0301, "step": 102030 }, { "epoch": 0.42574542480660266, "grad_norm": 0.7263882235130389, "learning_rate": 3.065431080739393e-06, "loss": 0.0295, "step": 102035 }, { "epoch": 0.4257662875215929, "grad_norm": 0.5368505362638715, "learning_rate": 3.0653559754209885e-06, "loss": 0.0196, "step": 102040 }, { "epoch": 0.4257871502365832, "grad_norm": 0.5377629543028467, "learning_rate": 3.0652808756227203e-06, "loss": 0.0219, "step": 102045 }, { "epoch": 0.42580801295157344, "grad_norm": 0.7578220336453512, "learning_rate": 3.0652057813439127e-06, "loss": 0.0327, "step": 102050 }, { "epoch": 0.42582887566656374, "grad_norm": 1.0398635272617331, "learning_rate": 3.0651306925838887e-06, "loss": 0.0283, "step": 102055 }, { "epoch": 0.42584973838155404, "grad_norm": 0.3245139297421645, "learning_rate": 3.0650556093419738e-06, "loss": 0.0204, "step": 102060 }, { "epoch": 0.4258706010965443, "grad_norm": 0.7924499521589977, "learning_rate": 3.06498053161749e-06, "loss": 0.0306, "step": 102065 }, { "epoch": 0.4258914638115346, "grad_norm": 10.93295742283357, "learning_rate": 3.0649054594097633e-06, "loss": 0.0251, "step": 102070 }, { "epoch": 0.42591232652652483, "grad_norm": 1.085771401921743, "learning_rate": 3.064830392718118e-06, "loss": 0.0256, "step": 102075 }, { "epoch": 0.42593318924151513, "grad_norm": 1.3099177355619493, "learning_rate": 3.0647553315418776e-06, "loss": 0.0345, "step": 102080 }, { "epoch": 0.42595405195650543, "grad_norm": 0.45035650160075424, "learning_rate": 3.0646802758803677e-06, "loss": 0.0242, "step": 102085 }, { "epoch": 0.4259749146714957, "grad_norm": 1.0360578794523265, "learning_rate": 3.0646052257329127e-06, "loss": 0.0262, "step": 102090 }, { "epoch": 0.425995777386486, "grad_norm": 0.6288281318094889, "learning_rate": 3.0645301810988375e-06, "loss": 0.0249, "step": 102095 }, { "epoch": 0.4260166401014762, "grad_norm": 0.5823990333434284, "learning_rate": 3.064455141977467e-06, "loss": 0.0198, "step": 102100 }, { "epoch": 0.4260375028164665, "grad_norm": 0.5371850411379571, "learning_rate": 3.064380108368127e-06, "loss": 0.0251, "step": 102105 }, { "epoch": 0.4260583655314568, "grad_norm": 0.9402633980599542, "learning_rate": 3.064305080270141e-06, "loss": 0.0315, "step": 102110 }, { "epoch": 0.42607922824644706, "grad_norm": 0.8839209460224128, "learning_rate": 3.0642300576828364e-06, "loss": 0.0256, "step": 102115 }, { "epoch": 0.42610009096143736, "grad_norm": 0.7737661913666753, "learning_rate": 3.0641550406055377e-06, "loss": 0.0326, "step": 102120 }, { "epoch": 0.42612095367642766, "grad_norm": 0.5456698185282559, "learning_rate": 3.0640800290375696e-06, "loss": 0.0231, "step": 102125 }, { "epoch": 0.4261418163914179, "grad_norm": 0.7423601905599626, "learning_rate": 3.064005022978259e-06, "loss": 0.0327, "step": 102130 }, { "epoch": 0.4261626791064082, "grad_norm": 0.46659754940714354, "learning_rate": 3.063930022426932e-06, "loss": 0.0274, "step": 102135 }, { "epoch": 0.42618354182139845, "grad_norm": 0.6492806046741381, "learning_rate": 3.063855027382913e-06, "loss": 0.0294, "step": 102140 }, { "epoch": 0.42620440453638875, "grad_norm": 0.7586082009768438, "learning_rate": 3.063780037845529e-06, "loss": 0.0311, "step": 102145 }, { "epoch": 0.42622526725137905, "grad_norm": 0.5649913317600092, "learning_rate": 3.0637050538141067e-06, "loss": 0.0251, "step": 102150 }, { "epoch": 0.4262461299663693, "grad_norm": 0.7373649373558925, "learning_rate": 3.063630075287971e-06, "loss": 0.0239, "step": 102155 }, { "epoch": 0.4262669926813596, "grad_norm": 0.9172287359942393, "learning_rate": 3.0635551022664496e-06, "loss": 0.0267, "step": 102160 }, { "epoch": 0.42628785539634984, "grad_norm": 0.37746972694190556, "learning_rate": 3.063480134748868e-06, "loss": 0.0202, "step": 102165 }, { "epoch": 0.42630871811134013, "grad_norm": 0.697739873077829, "learning_rate": 3.063405172734553e-06, "loss": 0.0289, "step": 102170 }, { "epoch": 0.42632958082633043, "grad_norm": 0.8804814538330563, "learning_rate": 3.063330216222832e-06, "loss": 0.0274, "step": 102175 }, { "epoch": 0.4263504435413207, "grad_norm": 0.7653710444144531, "learning_rate": 3.0632552652130303e-06, "loss": 0.0306, "step": 102180 }, { "epoch": 0.426371306256311, "grad_norm": 0.7777204206029747, "learning_rate": 3.063180319704476e-06, "loss": 0.0263, "step": 102185 }, { "epoch": 0.4263921689713012, "grad_norm": 0.8000320714561138, "learning_rate": 3.063105379696497e-06, "loss": 0.0247, "step": 102190 }, { "epoch": 0.4264130316862915, "grad_norm": 0.6848552874066968, "learning_rate": 3.063030445188419e-06, "loss": 0.0285, "step": 102195 }, { "epoch": 0.4264338944012818, "grad_norm": 0.43689969459915506, "learning_rate": 3.06295551617957e-06, "loss": 0.0281, "step": 102200 }, { "epoch": 0.42645475711627207, "grad_norm": 0.9066701780543022, "learning_rate": 3.0628805926692774e-06, "loss": 0.0235, "step": 102205 }, { "epoch": 0.42647561983126236, "grad_norm": 0.5069863149711704, "learning_rate": 3.0628056746568675e-06, "loss": 0.0327, "step": 102210 }, { "epoch": 0.42649648254625266, "grad_norm": 0.9380069418176287, "learning_rate": 3.0627307621416704e-06, "loss": 0.0212, "step": 102215 }, { "epoch": 0.4265173452612429, "grad_norm": 0.5728071217720526, "learning_rate": 3.062655855123011e-06, "loss": 0.0205, "step": 102220 }, { "epoch": 0.4265382079762332, "grad_norm": 0.944283978880816, "learning_rate": 3.0625809536002194e-06, "loss": 0.0258, "step": 102225 }, { "epoch": 0.42655907069122345, "grad_norm": 0.9726365690678718, "learning_rate": 3.062506057572623e-06, "loss": 0.029, "step": 102230 }, { "epoch": 0.42657993340621375, "grad_norm": 0.8346967818342012, "learning_rate": 3.0624311670395494e-06, "loss": 0.0224, "step": 102235 }, { "epoch": 0.42660079612120405, "grad_norm": 0.762130090185163, "learning_rate": 3.062356282000327e-06, "loss": 0.0277, "step": 102240 }, { "epoch": 0.4266216588361943, "grad_norm": 0.8272159782079991, "learning_rate": 3.0622814024542845e-06, "loss": 0.0301, "step": 102245 }, { "epoch": 0.4266425215511846, "grad_norm": 0.6822621188822331, "learning_rate": 3.06220652840075e-06, "loss": 0.0271, "step": 102250 }, { "epoch": 0.42666338426617484, "grad_norm": 0.7837062809110638, "learning_rate": 3.0621316598390522e-06, "loss": 0.0285, "step": 102255 }, { "epoch": 0.42668424698116514, "grad_norm": 0.31098268195578765, "learning_rate": 3.06205679676852e-06, "loss": 0.0212, "step": 102260 }, { "epoch": 0.42670510969615544, "grad_norm": 0.9045324933887939, "learning_rate": 3.0619819391884812e-06, "loss": 0.032, "step": 102265 }, { "epoch": 0.4267259724111457, "grad_norm": 1.3453231368078116, "learning_rate": 3.061907087098266e-06, "loss": 0.0275, "step": 102270 }, { "epoch": 0.426746835126136, "grad_norm": 0.9354991430159991, "learning_rate": 3.061832240497203e-06, "loss": 0.022, "step": 102275 }, { "epoch": 0.4267676978411262, "grad_norm": 0.629228992813404, "learning_rate": 3.0617573993846205e-06, "loss": 0.0188, "step": 102280 }, { "epoch": 0.4267885605561165, "grad_norm": 0.6950185048782154, "learning_rate": 3.0616825637598498e-06, "loss": 0.0261, "step": 102285 }, { "epoch": 0.4268094232711068, "grad_norm": 0.5572796817171713, "learning_rate": 3.0616077336222177e-06, "loss": 0.0162, "step": 102290 }, { "epoch": 0.42683028598609707, "grad_norm": 0.6599165041998584, "learning_rate": 3.061532908971055e-06, "loss": 0.0171, "step": 102295 }, { "epoch": 0.42685114870108737, "grad_norm": 0.6168414595612093, "learning_rate": 3.061458089805691e-06, "loss": 0.0268, "step": 102300 }, { "epoch": 0.42687201141607767, "grad_norm": 1.0259188119996623, "learning_rate": 3.0613832761254565e-06, "loss": 0.0221, "step": 102305 }, { "epoch": 0.4268928741310679, "grad_norm": 0.3287295870153448, "learning_rate": 3.06130846792968e-06, "loss": 0.0228, "step": 102310 }, { "epoch": 0.4269137368460582, "grad_norm": 0.5441093781131341, "learning_rate": 3.061233665217692e-06, "loss": 0.0237, "step": 102315 }, { "epoch": 0.42693459956104846, "grad_norm": 0.9428888204174807, "learning_rate": 3.061158867988822e-06, "loss": 0.0254, "step": 102320 }, { "epoch": 0.42695546227603876, "grad_norm": 0.938911667230081, "learning_rate": 3.061084076242401e-06, "loss": 0.0346, "step": 102325 }, { "epoch": 0.42697632499102905, "grad_norm": 1.1822464667221633, "learning_rate": 3.0610092899777582e-06, "loss": 0.041, "step": 102330 }, { "epoch": 0.4269971877060193, "grad_norm": 1.2576908391993702, "learning_rate": 3.0609345091942254e-06, "loss": 0.0333, "step": 102335 }, { "epoch": 0.4270180504210096, "grad_norm": 0.7088962633221306, "learning_rate": 3.060859733891132e-06, "loss": 0.0319, "step": 102340 }, { "epoch": 0.42703891313599984, "grad_norm": 0.5579041298423704, "learning_rate": 3.0607849640678093e-06, "loss": 0.0305, "step": 102345 }, { "epoch": 0.42705977585099014, "grad_norm": 0.4906638390070514, "learning_rate": 3.060710199723588e-06, "loss": 0.0242, "step": 102350 }, { "epoch": 0.42708063856598044, "grad_norm": 0.6512248333341846, "learning_rate": 3.060635440857799e-06, "loss": 0.0224, "step": 102355 }, { "epoch": 0.4271015012809707, "grad_norm": 0.9455774700964901, "learning_rate": 3.060560687469772e-06, "loss": 0.0234, "step": 102360 }, { "epoch": 0.427122363995961, "grad_norm": 0.7004419630265583, "learning_rate": 3.0604859395588394e-06, "loss": 0.0239, "step": 102365 }, { "epoch": 0.42714322671095123, "grad_norm": 0.5463432971468818, "learning_rate": 3.0604111971243315e-06, "loss": 0.0276, "step": 102370 }, { "epoch": 0.42716408942594153, "grad_norm": 0.7590307051176142, "learning_rate": 3.0603364601655812e-06, "loss": 0.0278, "step": 102375 }, { "epoch": 0.42718495214093183, "grad_norm": 0.631347668848752, "learning_rate": 3.060261728681918e-06, "loss": 0.0224, "step": 102380 }, { "epoch": 0.4272058148559221, "grad_norm": 0.579986694672631, "learning_rate": 3.060187002672675e-06, "loss": 0.0271, "step": 102385 }, { "epoch": 0.42722667757091237, "grad_norm": 0.8037470459545379, "learning_rate": 3.0601122821371834e-06, "loss": 0.0256, "step": 102390 }, { "epoch": 0.42724754028590267, "grad_norm": 0.5459992652015659, "learning_rate": 3.0600375670747743e-06, "loss": 0.0158, "step": 102395 }, { "epoch": 0.4272684030008929, "grad_norm": 0.855506212404006, "learning_rate": 3.05996285748478e-06, "loss": 0.023, "step": 102400 }, { "epoch": 0.4272892657158832, "grad_norm": 0.8731432883817136, "learning_rate": 3.0598881533665326e-06, "loss": 0.0288, "step": 102405 }, { "epoch": 0.42731012843087346, "grad_norm": 0.5961496105257363, "learning_rate": 3.059813454719364e-06, "loss": 0.0289, "step": 102410 }, { "epoch": 0.42733099114586376, "grad_norm": 0.7454553464807664, "learning_rate": 3.059738761542607e-06, "loss": 0.0312, "step": 102415 }, { "epoch": 0.42735185386085406, "grad_norm": 1.6604610508328586, "learning_rate": 3.059664073835594e-06, "loss": 0.026, "step": 102420 }, { "epoch": 0.4273727165758443, "grad_norm": 0.6101215616274505, "learning_rate": 3.059589391597656e-06, "loss": 0.0215, "step": 102425 }, { "epoch": 0.4273935792908346, "grad_norm": 0.6463171883320858, "learning_rate": 3.0595147148281262e-06, "loss": 0.0245, "step": 102430 }, { "epoch": 0.42741444200582485, "grad_norm": 0.8404984563098994, "learning_rate": 3.0594400435263386e-06, "loss": 0.0318, "step": 102435 }, { "epoch": 0.42743530472081515, "grad_norm": 0.7081241008596327, "learning_rate": 3.059365377691625e-06, "loss": 0.0291, "step": 102440 }, { "epoch": 0.42745616743580545, "grad_norm": 0.4675426721831917, "learning_rate": 3.059290717323318e-06, "loss": 0.0213, "step": 102445 }, { "epoch": 0.4274770301507957, "grad_norm": 0.6064396428247977, "learning_rate": 3.0592160624207512e-06, "loss": 0.0222, "step": 102450 }, { "epoch": 0.427497892865786, "grad_norm": 0.9757572501459241, "learning_rate": 3.0591414129832574e-06, "loss": 0.0353, "step": 102455 }, { "epoch": 0.42751875558077623, "grad_norm": 0.6793148901353981, "learning_rate": 3.05906676901017e-06, "loss": 0.0184, "step": 102460 }, { "epoch": 0.42753961829576653, "grad_norm": 0.5434465367757053, "learning_rate": 3.0589921305008225e-06, "loss": 0.0274, "step": 102465 }, { "epoch": 0.42756048101075683, "grad_norm": 0.7796720364689623, "learning_rate": 3.0589174974545483e-06, "loss": 0.0316, "step": 102470 }, { "epoch": 0.4275813437257471, "grad_norm": 0.5385131034684713, "learning_rate": 3.0588428698706802e-06, "loss": 0.0276, "step": 102475 }, { "epoch": 0.4276022064407374, "grad_norm": 0.8374722368833692, "learning_rate": 3.058768247748553e-06, "loss": 0.0282, "step": 102480 }, { "epoch": 0.4276230691557277, "grad_norm": 0.9079140242824214, "learning_rate": 3.0586936310875005e-06, "loss": 0.0178, "step": 102485 }, { "epoch": 0.4276439318707179, "grad_norm": 0.7380491143686283, "learning_rate": 3.058619019886856e-06, "loss": 0.0268, "step": 102490 }, { "epoch": 0.4276647945857082, "grad_norm": 0.5415643015072283, "learning_rate": 3.0585444141459535e-06, "loss": 0.0299, "step": 102495 }, { "epoch": 0.42768565730069846, "grad_norm": 0.731002700674961, "learning_rate": 3.058469813864128e-06, "loss": 0.031, "step": 102500 }, { "epoch": 0.42770652001568876, "grad_norm": 0.9605978584551128, "learning_rate": 3.058395219040713e-06, "loss": 0.0232, "step": 102505 }, { "epoch": 0.42772738273067906, "grad_norm": 1.231799730095918, "learning_rate": 3.0583206296750427e-06, "loss": 0.0285, "step": 102510 }, { "epoch": 0.4277482454456693, "grad_norm": 0.6434702935811948, "learning_rate": 3.0582460457664533e-06, "loss": 0.0227, "step": 102515 }, { "epoch": 0.4277691081606596, "grad_norm": 0.8613204843777778, "learning_rate": 3.058171467314277e-06, "loss": 0.0245, "step": 102520 }, { "epoch": 0.42778997087564985, "grad_norm": 0.7569776200292286, "learning_rate": 3.0580968943178503e-06, "loss": 0.0257, "step": 102525 }, { "epoch": 0.42781083359064015, "grad_norm": 0.34941769690299956, "learning_rate": 3.0580223267765074e-06, "loss": 0.0183, "step": 102530 }, { "epoch": 0.42783169630563045, "grad_norm": 0.6431035898941705, "learning_rate": 3.0579477646895827e-06, "loss": 0.0259, "step": 102535 }, { "epoch": 0.4278525590206207, "grad_norm": 0.8529191626107488, "learning_rate": 3.0578732080564126e-06, "loss": 0.0243, "step": 102540 }, { "epoch": 0.427873421735611, "grad_norm": 0.5126116153509707, "learning_rate": 3.0577986568763314e-06, "loss": 0.023, "step": 102545 }, { "epoch": 0.42789428445060124, "grad_norm": 0.80175949797397, "learning_rate": 3.0577241111486745e-06, "loss": 0.0202, "step": 102550 }, { "epoch": 0.42791514716559154, "grad_norm": 0.8283262565863099, "learning_rate": 3.057649570872777e-06, "loss": 0.0214, "step": 102555 }, { "epoch": 0.42793600988058184, "grad_norm": 0.4574105885474755, "learning_rate": 3.0575750360479758e-06, "loss": 0.0246, "step": 102560 }, { "epoch": 0.4279568725955721, "grad_norm": 0.8480497174890075, "learning_rate": 3.0575005066736045e-06, "loss": 0.026, "step": 102565 }, { "epoch": 0.4279777353105624, "grad_norm": 1.3449210084614849, "learning_rate": 3.0574259827490005e-06, "loss": 0.0308, "step": 102570 }, { "epoch": 0.4279985980255527, "grad_norm": 0.49930215002285727, "learning_rate": 3.057351464273499e-06, "loss": 0.0192, "step": 102575 }, { "epoch": 0.4280194607405429, "grad_norm": 0.7412438035567425, "learning_rate": 3.057276951246436e-06, "loss": 0.0316, "step": 102580 }, { "epoch": 0.4280403234555332, "grad_norm": 0.7362634618168079, "learning_rate": 3.0572024436671475e-06, "loss": 0.0264, "step": 102585 }, { "epoch": 0.42806118617052347, "grad_norm": 0.4197688261892673, "learning_rate": 3.05712794153497e-06, "loss": 0.0288, "step": 102590 }, { "epoch": 0.42808204888551377, "grad_norm": 0.5578671983034472, "learning_rate": 3.0570534448492395e-06, "loss": 0.0204, "step": 102595 }, { "epoch": 0.42810291160050407, "grad_norm": 1.8048604315052905, "learning_rate": 3.0569789536092927e-06, "loss": 0.029, "step": 102600 }, { "epoch": 0.4281237743154943, "grad_norm": 0.550020858142248, "learning_rate": 3.056904467814466e-06, "loss": 0.0209, "step": 102605 }, { "epoch": 0.4281446370304846, "grad_norm": 0.6072605246322188, "learning_rate": 3.0568299874640963e-06, "loss": 0.0363, "step": 102610 }, { "epoch": 0.42816549974547485, "grad_norm": 1.075317567558359, "learning_rate": 3.0567555125575197e-06, "loss": 0.0212, "step": 102615 }, { "epoch": 0.42818636246046515, "grad_norm": 0.8455633261971683, "learning_rate": 3.0566810430940734e-06, "loss": 0.0252, "step": 102620 }, { "epoch": 0.42820722517545545, "grad_norm": 0.6462891656147932, "learning_rate": 3.0566065790730955e-06, "loss": 0.0228, "step": 102625 }, { "epoch": 0.4282280878904457, "grad_norm": 0.5894524461649068, "learning_rate": 3.0565321204939214e-06, "loss": 0.0304, "step": 102630 }, { "epoch": 0.428248950605436, "grad_norm": 0.6194772302218507, "learning_rate": 3.0564576673558895e-06, "loss": 0.0227, "step": 102635 }, { "epoch": 0.42826981332042624, "grad_norm": 0.9704638813183711, "learning_rate": 3.056383219658336e-06, "loss": 0.0258, "step": 102640 }, { "epoch": 0.42829067603541654, "grad_norm": 0.6540324328685925, "learning_rate": 3.0563087774005994e-06, "loss": 0.0239, "step": 102645 }, { "epoch": 0.42831153875040684, "grad_norm": 1.143273876505695, "learning_rate": 3.056234340582017e-06, "loss": 0.0258, "step": 102650 }, { "epoch": 0.4283324014653971, "grad_norm": 0.7312760191700414, "learning_rate": 3.056159909201927e-06, "loss": 0.0385, "step": 102655 }, { "epoch": 0.4283532641803874, "grad_norm": 0.5373249412100468, "learning_rate": 3.056085483259666e-06, "loss": 0.0284, "step": 102660 }, { "epoch": 0.4283741268953777, "grad_norm": 2.5450915380558126, "learning_rate": 3.056011062754572e-06, "loss": 0.029, "step": 102665 }, { "epoch": 0.4283949896103679, "grad_norm": 0.7978516792752504, "learning_rate": 3.0559366476859843e-06, "loss": 0.0205, "step": 102670 }, { "epoch": 0.4284158523253582, "grad_norm": 1.1278643355351585, "learning_rate": 3.0558622380532405e-06, "loss": 0.0285, "step": 102675 }, { "epoch": 0.42843671504034847, "grad_norm": 0.5445808379989052, "learning_rate": 3.055787833855677e-06, "loss": 0.0278, "step": 102680 }, { "epoch": 0.42845757775533877, "grad_norm": 0.5865053292298966, "learning_rate": 3.0557134350926353e-06, "loss": 0.023, "step": 102685 }, { "epoch": 0.42847844047032907, "grad_norm": 0.5738791029159986, "learning_rate": 3.0556390417634516e-06, "loss": 0.0264, "step": 102690 }, { "epoch": 0.4284993031853193, "grad_norm": 0.8078505055254832, "learning_rate": 3.055564653867466e-06, "loss": 0.027, "step": 102695 }, { "epoch": 0.4285201659003096, "grad_norm": 1.0568832668822221, "learning_rate": 3.055490271404016e-06, "loss": 0.0334, "step": 102700 }, { "epoch": 0.42854102861529986, "grad_norm": 0.6188927353780608, "learning_rate": 3.05541589437244e-06, "loss": 0.0228, "step": 102705 }, { "epoch": 0.42856189133029016, "grad_norm": 2.3916181013925635, "learning_rate": 3.0553415227720784e-06, "loss": 0.0305, "step": 102710 }, { "epoch": 0.42858275404528046, "grad_norm": 0.8896873042003189, "learning_rate": 3.055267156602269e-06, "loss": 0.0292, "step": 102715 }, { "epoch": 0.4286036167602707, "grad_norm": 0.7338511983640477, "learning_rate": 3.0551927958623518e-06, "loss": 0.0289, "step": 102720 }, { "epoch": 0.428624479475261, "grad_norm": 0.5552049243743269, "learning_rate": 3.0551184405516655e-06, "loss": 0.0298, "step": 102725 }, { "epoch": 0.42864534219025124, "grad_norm": 0.5824115210362371, "learning_rate": 3.05504409066955e-06, "loss": 0.023, "step": 102730 }, { "epoch": 0.42866620490524154, "grad_norm": 1.0173533228880989, "learning_rate": 3.0549697462153443e-06, "loss": 0.0291, "step": 102735 }, { "epoch": 0.42868706762023184, "grad_norm": 0.9412394252958122, "learning_rate": 3.054895407188388e-06, "loss": 0.0277, "step": 102740 }, { "epoch": 0.4287079303352221, "grad_norm": 0.7931685750815349, "learning_rate": 3.054821073588021e-06, "loss": 0.0283, "step": 102745 }, { "epoch": 0.4287287930502124, "grad_norm": 0.9860558936528661, "learning_rate": 3.054746745413583e-06, "loss": 0.0268, "step": 102750 }, { "epoch": 0.4287496557652027, "grad_norm": 0.4158150832416465, "learning_rate": 3.054672422664414e-06, "loss": 0.0241, "step": 102755 }, { "epoch": 0.42877051848019293, "grad_norm": 0.5884409499665603, "learning_rate": 3.0545981053398543e-06, "loss": 0.02, "step": 102760 }, { "epoch": 0.42879138119518323, "grad_norm": 0.6509615225006227, "learning_rate": 3.0545237934392437e-06, "loss": 0.0231, "step": 102765 }, { "epoch": 0.4288122439101735, "grad_norm": 0.4578536190086112, "learning_rate": 3.054449486961923e-06, "loss": 0.0218, "step": 102770 }, { "epoch": 0.4288331066251638, "grad_norm": 1.157805284602281, "learning_rate": 3.054375185907231e-06, "loss": 0.0301, "step": 102775 }, { "epoch": 0.4288539693401541, "grad_norm": 1.1130197614214674, "learning_rate": 3.0543008902745096e-06, "loss": 0.0264, "step": 102780 }, { "epoch": 0.4288748320551443, "grad_norm": 0.575456980622829, "learning_rate": 3.0542266000631e-06, "loss": 0.031, "step": 102785 }, { "epoch": 0.4288956947701346, "grad_norm": 4.963338192479054, "learning_rate": 3.0541523152723413e-06, "loss": 0.0351, "step": 102790 }, { "epoch": 0.42891655748512486, "grad_norm": 0.5060024531955952, "learning_rate": 3.0540780359015753e-06, "loss": 0.0239, "step": 102795 }, { "epoch": 0.42893742020011516, "grad_norm": 0.8033576047597435, "learning_rate": 3.054003761950143e-06, "loss": 0.0241, "step": 102800 }, { "epoch": 0.42895828291510546, "grad_norm": 0.7957794280140809, "learning_rate": 3.0539294934173846e-06, "loss": 0.0309, "step": 102805 }, { "epoch": 0.4289791456300957, "grad_norm": 2.3051410937805743, "learning_rate": 3.053855230302642e-06, "loss": 0.0218, "step": 102810 }, { "epoch": 0.429000008345086, "grad_norm": 0.570627987587172, "learning_rate": 3.0537809726052566e-06, "loss": 0.0257, "step": 102815 }, { "epoch": 0.42902087106007625, "grad_norm": 0.611138155797368, "learning_rate": 3.053706720324569e-06, "loss": 0.0261, "step": 102820 }, { "epoch": 0.42904173377506655, "grad_norm": 1.016933574444633, "learning_rate": 3.0536324734599213e-06, "loss": 0.037, "step": 102825 }, { "epoch": 0.42906259649005685, "grad_norm": 0.8567396985100211, "learning_rate": 3.0535582320106556e-06, "loss": 0.0311, "step": 102830 }, { "epoch": 0.4290834592050471, "grad_norm": 1.0208110675927375, "learning_rate": 3.053483995976112e-06, "loss": 0.0251, "step": 102835 }, { "epoch": 0.4291043219200374, "grad_norm": 1.034277443624882, "learning_rate": 3.0534097653556344e-06, "loss": 0.0291, "step": 102840 }, { "epoch": 0.4291251846350277, "grad_norm": 0.7197515328526297, "learning_rate": 3.053335540148563e-06, "loss": 0.025, "step": 102845 }, { "epoch": 0.42914604735001793, "grad_norm": 0.9701134408111044, "learning_rate": 3.0532613203542405e-06, "loss": 0.0313, "step": 102850 }, { "epoch": 0.42916691006500823, "grad_norm": 0.5853305347421581, "learning_rate": 3.05318710597201e-06, "loss": 0.0262, "step": 102855 }, { "epoch": 0.4291877727799985, "grad_norm": 0.737961023860104, "learning_rate": 3.0531128970012127e-06, "loss": 0.0439, "step": 102860 }, { "epoch": 0.4292086354949888, "grad_norm": 0.8233058119028723, "learning_rate": 3.0530386934411906e-06, "loss": 0.0262, "step": 102865 }, { "epoch": 0.4292294982099791, "grad_norm": 0.6379523595189475, "learning_rate": 3.052964495291287e-06, "loss": 0.0221, "step": 102870 }, { "epoch": 0.4292503609249693, "grad_norm": 0.8852894789549541, "learning_rate": 3.0528903025508444e-06, "loss": 0.0281, "step": 102875 }, { "epoch": 0.4292712236399596, "grad_norm": 0.6092455648055612, "learning_rate": 3.052816115219206e-06, "loss": 0.0256, "step": 102880 }, { "epoch": 0.42929208635494986, "grad_norm": 0.6998333919147562, "learning_rate": 3.052741933295713e-06, "loss": 0.0178, "step": 102885 }, { "epoch": 0.42931294906994016, "grad_norm": 1.3496808896356713, "learning_rate": 3.0526677567797107e-06, "loss": 0.0323, "step": 102890 }, { "epoch": 0.42933381178493046, "grad_norm": 0.8044760944816624, "learning_rate": 3.0525935856705402e-06, "loss": 0.0231, "step": 102895 }, { "epoch": 0.4293546744999207, "grad_norm": 0.6571001090206633, "learning_rate": 3.052519419967546e-06, "loss": 0.029, "step": 102900 }, { "epoch": 0.429375537214911, "grad_norm": 0.9779335447705552, "learning_rate": 3.0524452596700706e-06, "loss": 0.0231, "step": 102905 }, { "epoch": 0.42939639992990125, "grad_norm": 0.9700817547407268, "learning_rate": 3.0523711047774572e-06, "loss": 0.0274, "step": 102910 }, { "epoch": 0.42941726264489155, "grad_norm": 0.8849628512745075, "learning_rate": 3.05229695528905e-06, "loss": 0.0256, "step": 102915 }, { "epoch": 0.42943812535988185, "grad_norm": 0.44969789049765535, "learning_rate": 3.052222811204193e-06, "loss": 0.0203, "step": 102920 }, { "epoch": 0.4294589880748721, "grad_norm": 1.159381481525285, "learning_rate": 3.0521486725222288e-06, "loss": 0.0344, "step": 102925 }, { "epoch": 0.4294798507898624, "grad_norm": 0.85137596103112, "learning_rate": 3.0520745392425018e-06, "loss": 0.029, "step": 102930 }, { "epoch": 0.4295007135048527, "grad_norm": 0.7962191380217034, "learning_rate": 3.0520004113643555e-06, "loss": 0.0319, "step": 102935 }, { "epoch": 0.42952157621984294, "grad_norm": 1.361052894708022, "learning_rate": 3.0519262888871345e-06, "loss": 0.0305, "step": 102940 }, { "epoch": 0.42954243893483324, "grad_norm": 1.449405450181927, "learning_rate": 3.051852171810183e-06, "loss": 0.045, "step": 102945 }, { "epoch": 0.4295633016498235, "grad_norm": 0.7907594842125788, "learning_rate": 3.0517780601328454e-06, "loss": 0.0247, "step": 102950 }, { "epoch": 0.4295841643648138, "grad_norm": 0.8554329672245674, "learning_rate": 3.051703953854466e-06, "loss": 0.0302, "step": 102955 }, { "epoch": 0.4296050270798041, "grad_norm": 0.7147984635134903, "learning_rate": 3.0516298529743886e-06, "loss": 0.0236, "step": 102960 }, { "epoch": 0.4296258897947943, "grad_norm": 0.7379546073167538, "learning_rate": 3.0515557574919585e-06, "loss": 0.0223, "step": 102965 }, { "epoch": 0.4296467525097846, "grad_norm": 1.5342485940139774, "learning_rate": 3.0514816674065208e-06, "loss": 0.0348, "step": 102970 }, { "epoch": 0.42966761522477487, "grad_norm": 0.5481177983714912, "learning_rate": 3.0514075827174194e-06, "loss": 0.0194, "step": 102975 }, { "epoch": 0.42968847793976517, "grad_norm": 0.5968465802968064, "learning_rate": 3.051333503424e-06, "loss": 0.0296, "step": 102980 }, { "epoch": 0.42970934065475547, "grad_norm": 0.8487944473499057, "learning_rate": 3.0512594295256077e-06, "loss": 0.0325, "step": 102985 }, { "epoch": 0.4297302033697457, "grad_norm": 0.8067192910277342, "learning_rate": 3.051185361021587e-06, "loss": 0.0232, "step": 102990 }, { "epoch": 0.429751066084736, "grad_norm": 0.6029092975158891, "learning_rate": 3.051111297911284e-06, "loss": 0.0341, "step": 102995 }, { "epoch": 0.42977192879972625, "grad_norm": 0.6970638574481658, "learning_rate": 3.0510372401940435e-06, "loss": 0.024, "step": 103000 }, { "epoch": 0.42979279151471655, "grad_norm": 0.5823613794209369, "learning_rate": 3.0509631878692113e-06, "loss": 0.0217, "step": 103005 }, { "epoch": 0.42981365422970685, "grad_norm": 0.8995768630273798, "learning_rate": 3.050889140936133e-06, "loss": 0.0211, "step": 103010 }, { "epoch": 0.4298345169446971, "grad_norm": 1.2199595200240054, "learning_rate": 3.0508150993941544e-06, "loss": 0.0224, "step": 103015 }, { "epoch": 0.4298553796596874, "grad_norm": 1.186993496289967, "learning_rate": 3.050741063242621e-06, "loss": 0.0297, "step": 103020 }, { "epoch": 0.4298762423746777, "grad_norm": 0.7559862183805605, "learning_rate": 3.0506670324808798e-06, "loss": 0.0202, "step": 103025 }, { "epoch": 0.42989710508966794, "grad_norm": 0.7172433100602016, "learning_rate": 3.050593007108275e-06, "loss": 0.0325, "step": 103030 }, { "epoch": 0.42991796780465824, "grad_norm": 1.8186912600286238, "learning_rate": 3.050518987124155e-06, "loss": 0.037, "step": 103035 }, { "epoch": 0.4299388305196485, "grad_norm": 1.1291985243750506, "learning_rate": 3.050444972527864e-06, "loss": 0.0266, "step": 103040 }, { "epoch": 0.4299596932346388, "grad_norm": 0.6380610450032477, "learning_rate": 3.0503709633187502e-06, "loss": 0.0241, "step": 103045 }, { "epoch": 0.4299805559496291, "grad_norm": 0.6795289857547273, "learning_rate": 3.050296959496159e-06, "loss": 0.0259, "step": 103050 }, { "epoch": 0.43000141866461933, "grad_norm": 0.8181601619507722, "learning_rate": 3.0502229610594374e-06, "loss": 0.0263, "step": 103055 }, { "epoch": 0.4300222813796096, "grad_norm": 1.072763176712014, "learning_rate": 3.050148968007932e-06, "loss": 0.0262, "step": 103060 }, { "epoch": 0.43004314409459987, "grad_norm": 1.1960502967470046, "learning_rate": 3.05007498034099e-06, "loss": 0.0235, "step": 103065 }, { "epoch": 0.43006400680959017, "grad_norm": 0.462892305765331, "learning_rate": 3.0500009980579583e-06, "loss": 0.021, "step": 103070 }, { "epoch": 0.43008486952458047, "grad_norm": 0.5868608982827298, "learning_rate": 3.049927021158183e-06, "loss": 0.0215, "step": 103075 }, { "epoch": 0.4301057322395707, "grad_norm": 0.6721837639254884, "learning_rate": 3.049853049641012e-06, "loss": 0.0303, "step": 103080 }, { "epoch": 0.430126594954561, "grad_norm": 0.8093914180419789, "learning_rate": 3.049779083505794e-06, "loss": 0.023, "step": 103085 }, { "epoch": 0.43014745766955126, "grad_norm": 0.8045274351612747, "learning_rate": 3.0497051227518742e-06, "loss": 0.0206, "step": 103090 }, { "epoch": 0.43016832038454156, "grad_norm": 0.5956143843380675, "learning_rate": 3.049631167378601e-06, "loss": 0.0321, "step": 103095 }, { "epoch": 0.43018918309953186, "grad_norm": 0.5624697213310447, "learning_rate": 3.049557217385322e-06, "loss": 0.0243, "step": 103100 }, { "epoch": 0.4302100458145221, "grad_norm": 1.093943026548327, "learning_rate": 3.0494832727713854e-06, "loss": 0.0319, "step": 103105 }, { "epoch": 0.4302309085295124, "grad_norm": 0.7110094417962215, "learning_rate": 3.049409333536138e-06, "loss": 0.0246, "step": 103110 }, { "epoch": 0.4302517712445027, "grad_norm": 1.1145620008901194, "learning_rate": 3.0493353996789286e-06, "loss": 0.0277, "step": 103115 }, { "epoch": 0.43027263395949294, "grad_norm": 0.5246491669031339, "learning_rate": 3.049261471199105e-06, "loss": 0.0273, "step": 103120 }, { "epoch": 0.43029349667448324, "grad_norm": 1.510865482871199, "learning_rate": 3.0491875480960155e-06, "loss": 0.0299, "step": 103125 }, { "epoch": 0.4303143593894735, "grad_norm": 0.7256173662398459, "learning_rate": 3.049113630369009e-06, "loss": 0.0223, "step": 103130 }, { "epoch": 0.4303352221044638, "grad_norm": 1.2097799935444624, "learning_rate": 3.0490397180174318e-06, "loss": 0.0365, "step": 103135 }, { "epoch": 0.4303560848194541, "grad_norm": 1.1556017459499734, "learning_rate": 3.0489658110406353e-06, "loss": 0.0245, "step": 103140 }, { "epoch": 0.43037694753444433, "grad_norm": 0.35815212839996374, "learning_rate": 3.048891909437966e-06, "loss": 0.0216, "step": 103145 }, { "epoch": 0.43039781024943463, "grad_norm": 0.5781826578032083, "learning_rate": 3.048818013208773e-06, "loss": 0.0247, "step": 103150 }, { "epoch": 0.4304186729644249, "grad_norm": 1.23612685088607, "learning_rate": 3.0487441223524062e-06, "loss": 0.0346, "step": 103155 }, { "epoch": 0.4304395356794152, "grad_norm": 0.675274195742651, "learning_rate": 3.048670236868213e-06, "loss": 0.0219, "step": 103160 }, { "epoch": 0.4304603983944055, "grad_norm": 0.7458190325703192, "learning_rate": 3.048596356755544e-06, "loss": 0.0185, "step": 103165 }, { "epoch": 0.4304812611093957, "grad_norm": 0.795365838013963, "learning_rate": 3.0485224820137466e-06, "loss": 0.0285, "step": 103170 }, { "epoch": 0.430502123824386, "grad_norm": 0.5692290764093774, "learning_rate": 3.0484486126421713e-06, "loss": 0.0192, "step": 103175 }, { "epoch": 0.43052298653937626, "grad_norm": 0.5187201634269372, "learning_rate": 3.0483747486401682e-06, "loss": 0.0221, "step": 103180 }, { "epoch": 0.43054384925436656, "grad_norm": 1.9950383769820719, "learning_rate": 3.0483008900070856e-06, "loss": 0.0287, "step": 103185 }, { "epoch": 0.43056471196935686, "grad_norm": 1.9540319361127616, "learning_rate": 3.048227036742273e-06, "loss": 0.0628, "step": 103190 }, { "epoch": 0.4305855746843471, "grad_norm": 0.5754756130846778, "learning_rate": 3.048153188845081e-06, "loss": 0.0185, "step": 103195 }, { "epoch": 0.4306064373993374, "grad_norm": 1.0495787490473665, "learning_rate": 3.048079346314859e-06, "loss": 0.0252, "step": 103200 }, { "epoch": 0.4306273001143277, "grad_norm": 0.8851152948718991, "learning_rate": 3.0480055091509568e-06, "loss": 0.025, "step": 103205 }, { "epoch": 0.43064816282931795, "grad_norm": 0.5689391246561623, "learning_rate": 3.0479316773527245e-06, "loss": 0.0219, "step": 103210 }, { "epoch": 0.43066902554430825, "grad_norm": 0.7652463194792501, "learning_rate": 3.047857850919513e-06, "loss": 0.0626, "step": 103215 }, { "epoch": 0.4306898882592985, "grad_norm": 0.35843240778225455, "learning_rate": 3.0477840298506704e-06, "loss": 0.0161, "step": 103220 }, { "epoch": 0.4307107509742888, "grad_norm": 0.9056661003588665, "learning_rate": 3.0477102141455504e-06, "loss": 0.0242, "step": 103225 }, { "epoch": 0.4307316136892791, "grad_norm": 0.8981885318831233, "learning_rate": 3.0476364038035012e-06, "loss": 0.0245, "step": 103230 }, { "epoch": 0.43075247640426934, "grad_norm": 0.1854664637430947, "learning_rate": 3.0475625988238737e-06, "loss": 0.0209, "step": 103235 }, { "epoch": 0.43077333911925964, "grad_norm": 0.7503154739684648, "learning_rate": 3.0474887992060193e-06, "loss": 0.0176, "step": 103240 }, { "epoch": 0.4307942018342499, "grad_norm": 0.4801981859452637, "learning_rate": 3.047415004949288e-06, "loss": 0.0239, "step": 103245 }, { "epoch": 0.4308150645492402, "grad_norm": 0.9597663564079532, "learning_rate": 3.0473412160530307e-06, "loss": 0.0277, "step": 103250 }, { "epoch": 0.4308359272642305, "grad_norm": 0.7714774084062542, "learning_rate": 3.0472674325166004e-06, "loss": 0.0283, "step": 103255 }, { "epoch": 0.4308567899792207, "grad_norm": 0.3876997149955731, "learning_rate": 3.047193654339345e-06, "loss": 0.036, "step": 103260 }, { "epoch": 0.430877652694211, "grad_norm": 0.9999440079414504, "learning_rate": 3.0471198815206187e-06, "loss": 0.0219, "step": 103265 }, { "epoch": 0.43089851540920127, "grad_norm": 0.7254328009890596, "learning_rate": 3.0470461140597713e-06, "loss": 0.0315, "step": 103270 }, { "epoch": 0.43091937812419157, "grad_norm": 0.5830619095618133, "learning_rate": 3.0469723519561545e-06, "loss": 0.0252, "step": 103275 }, { "epoch": 0.43094024083918187, "grad_norm": 0.80242688799072, "learning_rate": 3.0468985952091207e-06, "loss": 0.0194, "step": 103280 }, { "epoch": 0.4309611035541721, "grad_norm": 0.5430453451671852, "learning_rate": 3.04682484381802e-06, "loss": 0.024, "step": 103285 }, { "epoch": 0.4309819662691624, "grad_norm": 1.0245530011358945, "learning_rate": 3.0467510977822063e-06, "loss": 0.0317, "step": 103290 }, { "epoch": 0.4310028289841527, "grad_norm": 0.4097393616302158, "learning_rate": 3.0466773571010298e-06, "loss": 0.0297, "step": 103295 }, { "epoch": 0.43102369169914295, "grad_norm": 1.0690888318167657, "learning_rate": 3.0466036217738443e-06, "loss": 0.0385, "step": 103300 }, { "epoch": 0.43104455441413325, "grad_norm": 1.1274968417653524, "learning_rate": 3.046529891799999e-06, "loss": 0.0238, "step": 103305 }, { "epoch": 0.4310654171291235, "grad_norm": 0.8154066435401671, "learning_rate": 3.0464561671788496e-06, "loss": 0.0175, "step": 103310 }, { "epoch": 0.4310862798441138, "grad_norm": 0.5870028096383623, "learning_rate": 3.0463824479097457e-06, "loss": 0.0235, "step": 103315 }, { "epoch": 0.4311071425591041, "grad_norm": 0.35241181558031315, "learning_rate": 3.0463087339920418e-06, "loss": 0.0203, "step": 103320 }, { "epoch": 0.43112800527409434, "grad_norm": 0.7914833371129908, "learning_rate": 3.0462350254250895e-06, "loss": 0.0367, "step": 103325 }, { "epoch": 0.43114886798908464, "grad_norm": 0.6379512995149613, "learning_rate": 3.0461613222082408e-06, "loss": 0.019, "step": 103330 }, { "epoch": 0.4311697307040749, "grad_norm": 0.8204071405018594, "learning_rate": 3.0460876243408507e-06, "loss": 0.0302, "step": 103335 }, { "epoch": 0.4311905934190652, "grad_norm": 0.7182292018355968, "learning_rate": 3.046013931822269e-06, "loss": 0.0212, "step": 103340 }, { "epoch": 0.4312114561340555, "grad_norm": 0.8192193566759962, "learning_rate": 3.0459402446518522e-06, "loss": 0.0352, "step": 103345 }, { "epoch": 0.4312323188490457, "grad_norm": 0.6280431686511837, "learning_rate": 3.045866562828951e-06, "loss": 0.0222, "step": 103350 }, { "epoch": 0.431253181564036, "grad_norm": 0.6441731362049254, "learning_rate": 3.0457928863529183e-06, "loss": 0.0204, "step": 103355 }, { "epoch": 0.43127404427902627, "grad_norm": 0.3767277324000581, "learning_rate": 3.04571921522311e-06, "loss": 0.0254, "step": 103360 }, { "epoch": 0.43129490699401657, "grad_norm": 0.9235131811926572, "learning_rate": 3.0456455494388775e-06, "loss": 0.0273, "step": 103365 }, { "epoch": 0.43131576970900687, "grad_norm": 0.9161563958207124, "learning_rate": 3.045571888999575e-06, "loss": 0.0257, "step": 103370 }, { "epoch": 0.4313366324239971, "grad_norm": 0.4364193746509759, "learning_rate": 3.0454982339045564e-06, "loss": 0.0224, "step": 103375 }, { "epoch": 0.4313574951389874, "grad_norm": 0.32221905138112567, "learning_rate": 3.045424584153175e-06, "loss": 0.027, "step": 103380 }, { "epoch": 0.4313783578539777, "grad_norm": 1.2320623703320308, "learning_rate": 3.045350939744785e-06, "loss": 0.0237, "step": 103385 }, { "epoch": 0.43139922056896796, "grad_norm": 0.8906706823477369, "learning_rate": 3.04527730067874e-06, "loss": 0.0304, "step": 103390 }, { "epoch": 0.43142008328395826, "grad_norm": 0.7457925562055706, "learning_rate": 3.0452036669543944e-06, "loss": 0.033, "step": 103395 }, { "epoch": 0.4314409459989485, "grad_norm": 2.010608244314296, "learning_rate": 3.0451300385711024e-06, "loss": 0.0298, "step": 103400 }, { "epoch": 0.4314618087139388, "grad_norm": 0.6745442117466466, "learning_rate": 3.0450564155282193e-06, "loss": 0.0221, "step": 103405 }, { "epoch": 0.4314826714289291, "grad_norm": 0.8786574712997668, "learning_rate": 3.044982797825098e-06, "loss": 0.0285, "step": 103410 }, { "epoch": 0.43150353414391934, "grad_norm": 0.7490980354630287, "learning_rate": 3.0449091854610934e-06, "loss": 0.0275, "step": 103415 }, { "epoch": 0.43152439685890964, "grad_norm": 0.776009942135137, "learning_rate": 3.044835578435561e-06, "loss": 0.0265, "step": 103420 }, { "epoch": 0.4315452595738999, "grad_norm": 0.7569890369299904, "learning_rate": 3.0447619767478553e-06, "loss": 0.0257, "step": 103425 }, { "epoch": 0.4315661222888902, "grad_norm": 0.48714578358639854, "learning_rate": 3.0446883803973305e-06, "loss": 0.0216, "step": 103430 }, { "epoch": 0.4315869850038805, "grad_norm": 0.8287619426371812, "learning_rate": 3.0446147893833423e-06, "loss": 0.0189, "step": 103435 }, { "epoch": 0.43160784771887073, "grad_norm": 0.5014476951426534, "learning_rate": 3.044541203705245e-06, "loss": 0.0298, "step": 103440 }, { "epoch": 0.43162871043386103, "grad_norm": 0.7338091177120034, "learning_rate": 3.044467623362395e-06, "loss": 0.0259, "step": 103445 }, { "epoch": 0.4316495731488513, "grad_norm": 1.0236048612593451, "learning_rate": 3.044394048354146e-06, "loss": 0.025, "step": 103450 }, { "epoch": 0.4316704358638416, "grad_norm": 0.5335964131956802, "learning_rate": 3.044320478679856e-06, "loss": 0.0244, "step": 103455 }, { "epoch": 0.4316912985788319, "grad_norm": 0.7402708983990041, "learning_rate": 3.044246914338878e-06, "loss": 0.0355, "step": 103460 }, { "epoch": 0.4317121612938221, "grad_norm": 0.967209631203384, "learning_rate": 3.0441733553305686e-06, "loss": 0.0261, "step": 103465 }, { "epoch": 0.4317330240088124, "grad_norm": 1.0164382363374873, "learning_rate": 3.044099801654283e-06, "loss": 0.0242, "step": 103470 }, { "epoch": 0.4317538867238027, "grad_norm": 0.7378392423735546, "learning_rate": 3.044026253309378e-06, "loss": 0.0226, "step": 103475 }, { "epoch": 0.43177474943879296, "grad_norm": 0.7694055329265952, "learning_rate": 3.04395271029521e-06, "loss": 0.0219, "step": 103480 }, { "epoch": 0.43179561215378326, "grad_norm": 0.6865917083226205, "learning_rate": 3.043879172611134e-06, "loss": 0.0204, "step": 103485 }, { "epoch": 0.4318164748687735, "grad_norm": 0.5428767639380409, "learning_rate": 3.043805640256506e-06, "loss": 0.0228, "step": 103490 }, { "epoch": 0.4318373375837638, "grad_norm": 0.9818146846017748, "learning_rate": 3.043732113230683e-06, "loss": 0.0228, "step": 103495 }, { "epoch": 0.4318582002987541, "grad_norm": 0.6673298905036616, "learning_rate": 3.043658591533021e-06, "loss": 0.0276, "step": 103500 }, { "epoch": 0.43187906301374435, "grad_norm": 1.220306902124763, "learning_rate": 3.0435850751628765e-06, "loss": 0.0309, "step": 103505 }, { "epoch": 0.43189992572873465, "grad_norm": 0.5394933131493848, "learning_rate": 3.0435115641196067e-06, "loss": 0.0239, "step": 103510 }, { "epoch": 0.4319207884437249, "grad_norm": 0.6095388953782388, "learning_rate": 3.043438058402568e-06, "loss": 0.0355, "step": 103515 }, { "epoch": 0.4319416511587152, "grad_norm": 0.6546967114005245, "learning_rate": 3.0433645580111172e-06, "loss": 0.0209, "step": 103520 }, { "epoch": 0.4319625138737055, "grad_norm": 0.6377482542888866, "learning_rate": 3.0432910629446113e-06, "loss": 0.0358, "step": 103525 }, { "epoch": 0.43198337658869573, "grad_norm": 0.5264758720572386, "learning_rate": 3.043217573202407e-06, "loss": 0.0139, "step": 103530 }, { "epoch": 0.43200423930368603, "grad_norm": 1.2685675502922433, "learning_rate": 3.043144088783862e-06, "loss": 0.0251, "step": 103535 }, { "epoch": 0.4320251020186763, "grad_norm": 0.6879931000852927, "learning_rate": 3.043070609688334e-06, "loss": 0.0232, "step": 103540 }, { "epoch": 0.4320459647336666, "grad_norm": 0.6959111749229193, "learning_rate": 3.0429971359151782e-06, "loss": 0.0209, "step": 103545 }, { "epoch": 0.4320668274486569, "grad_norm": 0.6856275370641584, "learning_rate": 3.0429236674637553e-06, "loss": 0.0322, "step": 103550 }, { "epoch": 0.4320876901636471, "grad_norm": 1.0048207058264076, "learning_rate": 3.04285020433342e-06, "loss": 0.0285, "step": 103555 }, { "epoch": 0.4321085528786374, "grad_norm": 1.4505792804318531, "learning_rate": 3.042776746523532e-06, "loss": 0.0314, "step": 103560 }, { "epoch": 0.4321294155936277, "grad_norm": 0.458960969033095, "learning_rate": 3.0427032940334485e-06, "loss": 0.0216, "step": 103565 }, { "epoch": 0.43215027830861796, "grad_norm": 0.6442475461745897, "learning_rate": 3.0426298468625272e-06, "loss": 0.026, "step": 103570 }, { "epoch": 0.43217114102360826, "grad_norm": 0.9801417368747475, "learning_rate": 3.0425564050101263e-06, "loss": 0.0282, "step": 103575 }, { "epoch": 0.4321920037385985, "grad_norm": 0.6368726903649943, "learning_rate": 3.0424829684756036e-06, "loss": 0.0289, "step": 103580 }, { "epoch": 0.4322128664535888, "grad_norm": 2.852151553300728, "learning_rate": 3.042409537258318e-06, "loss": 0.0301, "step": 103585 }, { "epoch": 0.4322337291685791, "grad_norm": 1.2632011518764146, "learning_rate": 3.0423361113576278e-06, "loss": 0.0196, "step": 103590 }, { "epoch": 0.43225459188356935, "grad_norm": 0.39893481466732955, "learning_rate": 3.042262690772891e-06, "loss": 0.0217, "step": 103595 }, { "epoch": 0.43227545459855965, "grad_norm": 2.0984468508427807, "learning_rate": 3.042189275503466e-06, "loss": 0.0349, "step": 103600 }, { "epoch": 0.4322963173135499, "grad_norm": 1.102843588194775, "learning_rate": 3.042115865548712e-06, "loss": 0.0232, "step": 103605 }, { "epoch": 0.4323171800285402, "grad_norm": 0.6628875393007346, "learning_rate": 3.042042460907988e-06, "loss": 0.0213, "step": 103610 }, { "epoch": 0.4323380427435305, "grad_norm": 0.6844111647179363, "learning_rate": 3.0419690615806523e-06, "loss": 0.0213, "step": 103615 }, { "epoch": 0.43235890545852074, "grad_norm": 0.5087346924962074, "learning_rate": 3.041895667566065e-06, "loss": 0.0237, "step": 103620 }, { "epoch": 0.43237976817351104, "grad_norm": 0.6722770615727381, "learning_rate": 3.041822278863584e-06, "loss": 0.0327, "step": 103625 }, { "epoch": 0.4324006308885013, "grad_norm": 0.7860205808997056, "learning_rate": 3.041748895472568e-06, "loss": 0.0211, "step": 103630 }, { "epoch": 0.4324214936034916, "grad_norm": 0.8253920527337587, "learning_rate": 3.041675517392379e-06, "loss": 0.0215, "step": 103635 }, { "epoch": 0.4324423563184819, "grad_norm": 0.8774216281471741, "learning_rate": 3.0416021446223735e-06, "loss": 0.0178, "step": 103640 }, { "epoch": 0.4324632190334721, "grad_norm": 0.6010093631638532, "learning_rate": 3.041528777161913e-06, "loss": 0.0297, "step": 103645 }, { "epoch": 0.4324840817484624, "grad_norm": 2.3154262547251796, "learning_rate": 3.0414554150103564e-06, "loss": 0.0297, "step": 103650 }, { "epoch": 0.4325049444634527, "grad_norm": 0.5339104209998372, "learning_rate": 3.0413820581670634e-06, "loss": 0.0214, "step": 103655 }, { "epoch": 0.43252580717844297, "grad_norm": 0.47644977384854753, "learning_rate": 3.041308706631394e-06, "loss": 0.0208, "step": 103660 }, { "epoch": 0.43254666989343327, "grad_norm": 0.8885439752976986, "learning_rate": 3.0412353604027084e-06, "loss": 0.0231, "step": 103665 }, { "epoch": 0.4325675326084235, "grad_norm": 1.9469838846479446, "learning_rate": 3.0411620194803665e-06, "loss": 0.023, "step": 103670 }, { "epoch": 0.4325883953234138, "grad_norm": 1.0456151368577526, "learning_rate": 3.041088683863728e-06, "loss": 0.0302, "step": 103675 }, { "epoch": 0.4326092580384041, "grad_norm": 0.9316148891631233, "learning_rate": 3.041015353552155e-06, "loss": 0.0189, "step": 103680 }, { "epoch": 0.43263012075339435, "grad_norm": 0.8971676441280659, "learning_rate": 3.040942028545006e-06, "loss": 0.0245, "step": 103685 }, { "epoch": 0.43265098346838465, "grad_norm": 0.8173989408345017, "learning_rate": 3.0408687088416415e-06, "loss": 0.0251, "step": 103690 }, { "epoch": 0.4326718461833749, "grad_norm": 0.574863558756532, "learning_rate": 3.0407953944414242e-06, "loss": 0.0268, "step": 103695 }, { "epoch": 0.4326927088983652, "grad_norm": 0.5315154994074763, "learning_rate": 3.040722085343712e-06, "loss": 0.0238, "step": 103700 }, { "epoch": 0.4327135716133555, "grad_norm": 0.9024443034559616, "learning_rate": 3.040648781547868e-06, "loss": 0.0261, "step": 103705 }, { "epoch": 0.43273443432834574, "grad_norm": 0.38327431980298077, "learning_rate": 3.040575483053253e-06, "loss": 0.0231, "step": 103710 }, { "epoch": 0.43275529704333604, "grad_norm": 1.006355774045418, "learning_rate": 3.040502189859226e-06, "loss": 0.0266, "step": 103715 }, { "epoch": 0.4327761597583263, "grad_norm": 0.9217040233283864, "learning_rate": 3.040428901965151e-06, "loss": 0.0289, "step": 103720 }, { "epoch": 0.4327970224733166, "grad_norm": 0.7400049942168655, "learning_rate": 3.040355619370387e-06, "loss": 0.018, "step": 103725 }, { "epoch": 0.4328178851883069, "grad_norm": 1.3223389781276342, "learning_rate": 3.040282342074297e-06, "loss": 0.0289, "step": 103730 }, { "epoch": 0.4328387479032971, "grad_norm": 0.4392102446901665, "learning_rate": 3.0402090700762416e-06, "loss": 0.0204, "step": 103735 }, { "epoch": 0.4328596106182874, "grad_norm": 0.7918448271248445, "learning_rate": 3.0401358033755828e-06, "loss": 0.0222, "step": 103740 }, { "epoch": 0.4328804733332777, "grad_norm": 0.5665144323212299, "learning_rate": 3.0400625419716818e-06, "loss": 0.0248, "step": 103745 }, { "epoch": 0.43290133604826797, "grad_norm": 0.6228508730018423, "learning_rate": 3.039989285863901e-06, "loss": 0.0281, "step": 103750 }, { "epoch": 0.43292219876325827, "grad_norm": 0.873896401122084, "learning_rate": 3.0399160350516014e-06, "loss": 0.0316, "step": 103755 }, { "epoch": 0.4329430614782485, "grad_norm": 1.0420185593838112, "learning_rate": 3.0398427895341464e-06, "loss": 0.0308, "step": 103760 }, { "epoch": 0.4329639241932388, "grad_norm": 0.46033442173786765, "learning_rate": 3.0397695493108976e-06, "loss": 0.0275, "step": 103765 }, { "epoch": 0.4329847869082291, "grad_norm": 0.49758509530294953, "learning_rate": 3.0396963143812168e-06, "loss": 0.0208, "step": 103770 }, { "epoch": 0.43300564962321936, "grad_norm": 0.647889913105593, "learning_rate": 3.039623084744467e-06, "loss": 0.0266, "step": 103775 }, { "epoch": 0.43302651233820966, "grad_norm": 2.147100135635151, "learning_rate": 3.0395498604000095e-06, "loss": 0.0223, "step": 103780 }, { "epoch": 0.4330473750531999, "grad_norm": 1.0769530309032542, "learning_rate": 3.0394766413472083e-06, "loss": 0.0318, "step": 103785 }, { "epoch": 0.4330682377681902, "grad_norm": 0.6168554593222264, "learning_rate": 3.0394034275854255e-06, "loss": 0.0311, "step": 103790 }, { "epoch": 0.4330891004831805, "grad_norm": 0.6900493682902737, "learning_rate": 3.0393302191140245e-06, "loss": 0.0304, "step": 103795 }, { "epoch": 0.43310996319817074, "grad_norm": 0.6981276923381077, "learning_rate": 3.0392570159323663e-06, "loss": 0.0275, "step": 103800 }, { "epoch": 0.43313082591316104, "grad_norm": 0.7856347389421499, "learning_rate": 3.0391838180398165e-06, "loss": 0.0288, "step": 103805 }, { "epoch": 0.4331516886281513, "grad_norm": 0.628627206782622, "learning_rate": 3.039110625435736e-06, "loss": 0.0225, "step": 103810 }, { "epoch": 0.4331725513431416, "grad_norm": 0.9012743457452687, "learning_rate": 3.0390374381194898e-06, "loss": 0.0242, "step": 103815 }, { "epoch": 0.4331934140581319, "grad_norm": 0.812624955439536, "learning_rate": 3.0389642560904394e-06, "loss": 0.0278, "step": 103820 }, { "epoch": 0.43321427677312213, "grad_norm": 0.5259625435971581, "learning_rate": 3.03889107934795e-06, "loss": 0.022, "step": 103825 }, { "epoch": 0.43323513948811243, "grad_norm": 0.35677064145896814, "learning_rate": 3.038817907891384e-06, "loss": 0.0196, "step": 103830 }, { "epoch": 0.43325600220310273, "grad_norm": 0.5913592065941624, "learning_rate": 3.038744741720105e-06, "loss": 0.021, "step": 103835 }, { "epoch": 0.433276864918093, "grad_norm": 0.8361527399985715, "learning_rate": 3.0386715808334775e-06, "loss": 0.0218, "step": 103840 }, { "epoch": 0.4332977276330833, "grad_norm": 1.1769832092792207, "learning_rate": 3.0385984252308653e-06, "loss": 0.024, "step": 103845 }, { "epoch": 0.4333185903480735, "grad_norm": 0.488066604224686, "learning_rate": 3.0385252749116317e-06, "loss": 0.0221, "step": 103850 }, { "epoch": 0.4333394530630638, "grad_norm": 0.6585448767542136, "learning_rate": 3.0384521298751414e-06, "loss": 0.0272, "step": 103855 }, { "epoch": 0.4333603157780541, "grad_norm": 1.1710342770129352, "learning_rate": 3.0383789901207583e-06, "loss": 0.0313, "step": 103860 }, { "epoch": 0.43338117849304436, "grad_norm": 0.5564241730468026, "learning_rate": 3.0383058556478467e-06, "loss": 0.0208, "step": 103865 }, { "epoch": 0.43340204120803466, "grad_norm": 1.128309084095993, "learning_rate": 3.038232726455771e-06, "loss": 0.0305, "step": 103870 }, { "epoch": 0.4334229039230249, "grad_norm": 0.7547104971682783, "learning_rate": 3.0381596025438953e-06, "loss": 0.0211, "step": 103875 }, { "epoch": 0.4334437666380152, "grad_norm": 0.7854383873656237, "learning_rate": 3.0380864839115853e-06, "loss": 0.0276, "step": 103880 }, { "epoch": 0.4334646293530055, "grad_norm": 0.8686159866683242, "learning_rate": 3.038013370558205e-06, "loss": 0.0275, "step": 103885 }, { "epoch": 0.43348549206799575, "grad_norm": 1.0725762927418834, "learning_rate": 3.0379402624831195e-06, "loss": 0.0296, "step": 103890 }, { "epoch": 0.43350635478298605, "grad_norm": 0.6714903879450859, "learning_rate": 3.037867159685693e-06, "loss": 0.0231, "step": 103895 }, { "epoch": 0.4335272174979763, "grad_norm": 0.7291374473486876, "learning_rate": 3.037794062165291e-06, "loss": 0.0231, "step": 103900 }, { "epoch": 0.4335480802129666, "grad_norm": 0.8463420895039747, "learning_rate": 3.0377209699212795e-06, "loss": 0.0251, "step": 103905 }, { "epoch": 0.4335689429279569, "grad_norm": 0.7948226846689694, "learning_rate": 3.037647882953022e-06, "loss": 0.023, "step": 103910 }, { "epoch": 0.43358980564294713, "grad_norm": 0.9495712463684026, "learning_rate": 3.0375748012598853e-06, "loss": 0.033, "step": 103915 }, { "epoch": 0.43361066835793743, "grad_norm": 0.838479581522396, "learning_rate": 3.0375017248412355e-06, "loss": 0.0214, "step": 103920 }, { "epoch": 0.43363153107292773, "grad_norm": 0.5919770399605664, "learning_rate": 3.037428653696436e-06, "loss": 0.0311, "step": 103925 }, { "epoch": 0.433652393787918, "grad_norm": 0.9606916346183876, "learning_rate": 3.0373555878248532e-06, "loss": 0.0315, "step": 103930 }, { "epoch": 0.4336732565029083, "grad_norm": 0.6328511272136919, "learning_rate": 3.0372825272258537e-06, "loss": 0.0351, "step": 103935 }, { "epoch": 0.4336941192178985, "grad_norm": 1.0345782888554842, "learning_rate": 3.0372094718988032e-06, "loss": 0.0272, "step": 103940 }, { "epoch": 0.4337149819328888, "grad_norm": 1.063844786416935, "learning_rate": 3.037136421843067e-06, "loss": 0.0255, "step": 103945 }, { "epoch": 0.4337358446478791, "grad_norm": 0.5749828385015494, "learning_rate": 3.037063377058012e-06, "loss": 0.0207, "step": 103950 }, { "epoch": 0.43375670736286936, "grad_norm": 1.363497515560348, "learning_rate": 3.036990337543004e-06, "loss": 0.0214, "step": 103955 }, { "epoch": 0.43377757007785966, "grad_norm": 0.25426105090062706, "learning_rate": 3.0369173032974096e-06, "loss": 0.0269, "step": 103960 }, { "epoch": 0.4337984327928499, "grad_norm": 0.5829961727963945, "learning_rate": 3.036844274320595e-06, "loss": 0.027, "step": 103965 }, { "epoch": 0.4338192955078402, "grad_norm": 0.6523711769567341, "learning_rate": 3.0367712506119267e-06, "loss": 0.0255, "step": 103970 }, { "epoch": 0.4338401582228305, "grad_norm": 0.5574359016950519, "learning_rate": 3.0366982321707716e-06, "loss": 0.0303, "step": 103975 }, { "epoch": 0.43386102093782075, "grad_norm": 0.6923593969045957, "learning_rate": 3.0366252189964962e-06, "loss": 0.0221, "step": 103980 }, { "epoch": 0.43388188365281105, "grad_norm": 0.6363049683619489, "learning_rate": 3.036552211088467e-06, "loss": 0.0234, "step": 103985 }, { "epoch": 0.4339027463678013, "grad_norm": 0.9243287575589857, "learning_rate": 3.0364792084460522e-06, "loss": 0.0254, "step": 103990 }, { "epoch": 0.4339236090827916, "grad_norm": 1.32287686599581, "learning_rate": 3.036406211068617e-06, "loss": 0.0228, "step": 103995 }, { "epoch": 0.4339444717977819, "grad_norm": 0.722584251090579, "learning_rate": 3.0363332189555307e-06, "loss": 0.0313, "step": 104000 }, { "epoch": 0.43396533451277214, "grad_norm": 0.8765425676530315, "learning_rate": 3.036260232106159e-06, "loss": 0.0212, "step": 104005 }, { "epoch": 0.43398619722776244, "grad_norm": 1.0250747908679976, "learning_rate": 3.03618725051987e-06, "loss": 0.036, "step": 104010 }, { "epoch": 0.43400705994275274, "grad_norm": 0.5949921569180137, "learning_rate": 3.0361142741960305e-06, "loss": 0.0258, "step": 104015 }, { "epoch": 0.434027922657743, "grad_norm": 0.5269152948555293, "learning_rate": 3.0360413031340093e-06, "loss": 0.0234, "step": 104020 }, { "epoch": 0.4340487853727333, "grad_norm": 0.43395058518520224, "learning_rate": 3.035968337333173e-06, "loss": 0.0251, "step": 104025 }, { "epoch": 0.4340696480877235, "grad_norm": 0.8257678411811862, "learning_rate": 3.03589537679289e-06, "loss": 0.0253, "step": 104030 }, { "epoch": 0.4340905108027138, "grad_norm": 0.7383922373996008, "learning_rate": 3.0358224215125275e-06, "loss": 0.0373, "step": 104035 }, { "epoch": 0.4341113735177041, "grad_norm": 0.5492955016608787, "learning_rate": 3.035749471491455e-06, "loss": 0.0211, "step": 104040 }, { "epoch": 0.43413223623269437, "grad_norm": 0.3554286988962617, "learning_rate": 3.0356765267290393e-06, "loss": 0.0191, "step": 104045 }, { "epoch": 0.43415309894768467, "grad_norm": 0.7102241029213522, "learning_rate": 3.0356035872246486e-06, "loss": 0.0287, "step": 104050 }, { "epoch": 0.4341739616626749, "grad_norm": 0.9453536190129863, "learning_rate": 3.0355306529776524e-06, "loss": 0.0211, "step": 104055 }, { "epoch": 0.4341948243776652, "grad_norm": 0.7065417168448577, "learning_rate": 3.0354577239874185e-06, "loss": 0.0203, "step": 104060 }, { "epoch": 0.4342156870926555, "grad_norm": 0.6750340977493947, "learning_rate": 3.0353848002533155e-06, "loss": 0.0205, "step": 104065 }, { "epoch": 0.43423654980764576, "grad_norm": 0.8224601207348861, "learning_rate": 3.0353118817747113e-06, "loss": 0.0264, "step": 104070 }, { "epoch": 0.43425741252263605, "grad_norm": 1.0452871943565276, "learning_rate": 3.0352389685509755e-06, "loss": 0.0285, "step": 104075 }, { "epoch": 0.4342782752376263, "grad_norm": 1.6079351281346943, "learning_rate": 3.035166060581477e-06, "loss": 0.0226, "step": 104080 }, { "epoch": 0.4342991379526166, "grad_norm": 0.6994316790788776, "learning_rate": 3.0350931578655847e-06, "loss": 0.019, "step": 104085 }, { "epoch": 0.4343200006676069, "grad_norm": 0.5024374689905688, "learning_rate": 3.0350202604026676e-06, "loss": 0.0229, "step": 104090 }, { "epoch": 0.43434086338259714, "grad_norm": 0.4347238835406561, "learning_rate": 3.034947368192094e-06, "loss": 0.0226, "step": 104095 }, { "epoch": 0.43436172609758744, "grad_norm": 0.6161944695102388, "learning_rate": 3.034874481233235e-06, "loss": 0.0262, "step": 104100 }, { "epoch": 0.43438258881257774, "grad_norm": 1.3948058192917532, "learning_rate": 3.03480159952546e-06, "loss": 0.0302, "step": 104105 }, { "epoch": 0.434403451527568, "grad_norm": 0.7845850626275135, "learning_rate": 3.0347287230681362e-06, "loss": 0.0272, "step": 104110 }, { "epoch": 0.4344243142425583, "grad_norm": 0.7308556750243871, "learning_rate": 3.034655851860635e-06, "loss": 0.0271, "step": 104115 }, { "epoch": 0.43444517695754853, "grad_norm": 0.731834104285269, "learning_rate": 3.0345829859023257e-06, "loss": 0.0233, "step": 104120 }, { "epoch": 0.43446603967253883, "grad_norm": 0.9074725102715552, "learning_rate": 3.0345101251925786e-06, "loss": 0.0311, "step": 104125 }, { "epoch": 0.43448690238752913, "grad_norm": 0.2743875337493077, "learning_rate": 3.0344372697307634e-06, "loss": 0.0163, "step": 104130 }, { "epoch": 0.43450776510251937, "grad_norm": 0.5878347384548219, "learning_rate": 3.0343644195162487e-06, "loss": 0.0241, "step": 104135 }, { "epoch": 0.43452862781750967, "grad_norm": 0.5975642521240946, "learning_rate": 3.034291574548407e-06, "loss": 0.0297, "step": 104140 }, { "epoch": 0.4345494905324999, "grad_norm": 0.8046013859711086, "learning_rate": 3.0342187348266073e-06, "loss": 0.0245, "step": 104145 }, { "epoch": 0.4345703532474902, "grad_norm": 1.0113935513616132, "learning_rate": 3.03414590035022e-06, "loss": 0.028, "step": 104150 }, { "epoch": 0.4345912159624805, "grad_norm": 0.9561703384164868, "learning_rate": 3.034073071118616e-06, "loss": 0.026, "step": 104155 }, { "epoch": 0.43461207867747076, "grad_norm": 0.5130539449287711, "learning_rate": 3.0340002471311654e-06, "loss": 0.0312, "step": 104160 }, { "epoch": 0.43463294139246106, "grad_norm": 0.7594267858390332, "learning_rate": 3.0339274283872387e-06, "loss": 0.0303, "step": 104165 }, { "epoch": 0.4346538041074513, "grad_norm": 0.6537194330064542, "learning_rate": 3.0338546148862076e-06, "loss": 0.0238, "step": 104170 }, { "epoch": 0.4346746668224416, "grad_norm": 0.7333286029209587, "learning_rate": 3.0337818066274417e-06, "loss": 0.0267, "step": 104175 }, { "epoch": 0.4346955295374319, "grad_norm": 0.3989461448361048, "learning_rate": 3.0337090036103135e-06, "loss": 0.0207, "step": 104180 }, { "epoch": 0.43471639225242215, "grad_norm": 0.9975160861045684, "learning_rate": 3.0336362058341928e-06, "loss": 0.0302, "step": 104185 }, { "epoch": 0.43473725496741245, "grad_norm": 0.6806014607876177, "learning_rate": 3.033563413298452e-06, "loss": 0.0187, "step": 104190 }, { "epoch": 0.43475811768240274, "grad_norm": 0.8870366717174708, "learning_rate": 3.033490626002461e-06, "loss": 0.0247, "step": 104195 }, { "epoch": 0.434778980397393, "grad_norm": 1.0755235172881765, "learning_rate": 3.033417843945592e-06, "loss": 0.0223, "step": 104200 }, { "epoch": 0.4347998431123833, "grad_norm": 0.45000125233536675, "learning_rate": 3.033345067127217e-06, "loss": 0.0221, "step": 104205 }, { "epoch": 0.43482070582737353, "grad_norm": 1.104763764564769, "learning_rate": 3.0332722955467066e-06, "loss": 0.0323, "step": 104210 }, { "epoch": 0.43484156854236383, "grad_norm": 0.7219328785793047, "learning_rate": 3.0331995292034335e-06, "loss": 0.0216, "step": 104215 }, { "epoch": 0.43486243125735413, "grad_norm": 0.7885494716780531, "learning_rate": 3.0331267680967684e-06, "loss": 0.0206, "step": 104220 }, { "epoch": 0.4348832939723444, "grad_norm": 0.8028706656514843, "learning_rate": 3.033054012226085e-06, "loss": 0.0213, "step": 104225 }, { "epoch": 0.4349041566873347, "grad_norm": 0.9853171511034914, "learning_rate": 3.0329812615907535e-06, "loss": 0.028, "step": 104230 }, { "epoch": 0.4349250194023249, "grad_norm": 0.9801125126359935, "learning_rate": 3.032908516190147e-06, "loss": 0.0277, "step": 104235 }, { "epoch": 0.4349458821173152, "grad_norm": 0.5119425263238964, "learning_rate": 3.0328357760236375e-06, "loss": 0.0201, "step": 104240 }, { "epoch": 0.4349667448323055, "grad_norm": 0.43378990486366303, "learning_rate": 3.032763041090598e-06, "loss": 0.0221, "step": 104245 }, { "epoch": 0.43498760754729576, "grad_norm": 0.6132898270961166, "learning_rate": 3.0326903113904e-06, "loss": 0.026, "step": 104250 }, { "epoch": 0.43500847026228606, "grad_norm": 0.7442034960190386, "learning_rate": 3.032617586922416e-06, "loss": 0.0376, "step": 104255 }, { "epoch": 0.4350293329772763, "grad_norm": 0.5314238936919607, "learning_rate": 3.0325448676860196e-06, "loss": 0.0234, "step": 104260 }, { "epoch": 0.4350501956922666, "grad_norm": 1.205106936767136, "learning_rate": 3.0324721536805836e-06, "loss": 0.0307, "step": 104265 }, { "epoch": 0.4350710584072569, "grad_norm": 0.726171220114039, "learning_rate": 3.03239944490548e-06, "loss": 0.0949, "step": 104270 }, { "epoch": 0.43509192112224715, "grad_norm": 0.8890975092561614, "learning_rate": 3.0323267413600827e-06, "loss": 0.0244, "step": 104275 }, { "epoch": 0.43511278383723745, "grad_norm": 0.9855450161858968, "learning_rate": 3.0322540430437637e-06, "loss": 0.0288, "step": 104280 }, { "epoch": 0.4351336465522277, "grad_norm": 0.5460807064117942, "learning_rate": 3.032181349955898e-06, "loss": 0.0268, "step": 104285 }, { "epoch": 0.435154509267218, "grad_norm": 0.9668357139589557, "learning_rate": 3.0321086620958566e-06, "loss": 0.0227, "step": 104290 }, { "epoch": 0.4351753719822083, "grad_norm": 4.120226635450833, "learning_rate": 3.0320359794630146e-06, "loss": 0.0318, "step": 104295 }, { "epoch": 0.43519623469719854, "grad_norm": 0.48132070649070463, "learning_rate": 3.0319633020567453e-06, "loss": 0.0248, "step": 104300 }, { "epoch": 0.43521709741218884, "grad_norm": 0.6225318702828179, "learning_rate": 3.0318906298764215e-06, "loss": 0.0291, "step": 104305 }, { "epoch": 0.43523796012717914, "grad_norm": 0.7667764464966461, "learning_rate": 3.0318179629214178e-06, "loss": 0.0305, "step": 104310 }, { "epoch": 0.4352588228421694, "grad_norm": 0.7618094140043361, "learning_rate": 3.031745301191108e-06, "loss": 0.0262, "step": 104315 }, { "epoch": 0.4352796855571597, "grad_norm": 1.2280759583178746, "learning_rate": 3.0316726446848655e-06, "loss": 0.0275, "step": 104320 }, { "epoch": 0.4353005482721499, "grad_norm": 0.808024998124405, "learning_rate": 3.031599993402065e-06, "loss": 0.0257, "step": 104325 }, { "epoch": 0.4353214109871402, "grad_norm": 1.113857508766001, "learning_rate": 3.0315273473420797e-06, "loss": 0.0292, "step": 104330 }, { "epoch": 0.4353422737021305, "grad_norm": 0.9907585808496182, "learning_rate": 3.0314547065042852e-06, "loss": 0.0202, "step": 104335 }, { "epoch": 0.43536313641712077, "grad_norm": 0.8650231558587396, "learning_rate": 3.031382070888054e-06, "loss": 0.0301, "step": 104340 }, { "epoch": 0.43538399913211107, "grad_norm": 0.4686531080481422, "learning_rate": 3.0313094404927636e-06, "loss": 0.0288, "step": 104345 }, { "epoch": 0.4354048618471013, "grad_norm": 1.1838644536144134, "learning_rate": 3.0312368153177853e-06, "loss": 0.0268, "step": 104350 }, { "epoch": 0.4354257245620916, "grad_norm": 1.0835244900167798, "learning_rate": 3.0311641953624955e-06, "loss": 0.023, "step": 104355 }, { "epoch": 0.4354465872770819, "grad_norm": 0.640853960339051, "learning_rate": 3.0310915806262682e-06, "loss": 0.0304, "step": 104360 }, { "epoch": 0.43546744999207215, "grad_norm": 0.5667310401436995, "learning_rate": 3.031018971108479e-06, "loss": 0.0168, "step": 104365 }, { "epoch": 0.43548831270706245, "grad_norm": 1.22237383158736, "learning_rate": 3.0309463668085026e-06, "loss": 0.0323, "step": 104370 }, { "epoch": 0.4355091754220527, "grad_norm": 0.8841269242125517, "learning_rate": 3.030873767725714e-06, "loss": 0.0298, "step": 104375 }, { "epoch": 0.435530038137043, "grad_norm": 0.8621776614534894, "learning_rate": 3.0308011738594888e-06, "loss": 0.0282, "step": 104380 }, { "epoch": 0.4355509008520333, "grad_norm": 1.2086314451287292, "learning_rate": 3.0307285852092018e-06, "loss": 0.0399, "step": 104385 }, { "epoch": 0.43557176356702354, "grad_norm": 0.8804728724814397, "learning_rate": 3.0306560017742288e-06, "loss": 0.0245, "step": 104390 }, { "epoch": 0.43559262628201384, "grad_norm": 0.6059224295338012, "learning_rate": 3.030583423553945e-06, "loss": 0.0234, "step": 104395 }, { "epoch": 0.43561348899700414, "grad_norm": 0.4367814283276802, "learning_rate": 3.0305108505477265e-06, "loss": 0.024, "step": 104400 }, { "epoch": 0.4356343517119944, "grad_norm": 0.7525276719411984, "learning_rate": 3.030438282754948e-06, "loss": 0.0329, "step": 104405 }, { "epoch": 0.4356552144269847, "grad_norm": 0.5430087632677448, "learning_rate": 3.0303657201749864e-06, "loss": 0.0333, "step": 104410 }, { "epoch": 0.4356760771419749, "grad_norm": 1.0638618150996741, "learning_rate": 3.030293162807218e-06, "loss": 0.0343, "step": 104415 }, { "epoch": 0.4356969398569652, "grad_norm": 0.8728451704243464, "learning_rate": 3.0302206106510175e-06, "loss": 0.0349, "step": 104420 }, { "epoch": 0.4357178025719555, "grad_norm": 2.5102449128374182, "learning_rate": 3.0301480637057614e-06, "loss": 0.0298, "step": 104425 }, { "epoch": 0.43573866528694577, "grad_norm": 0.7624971398940591, "learning_rate": 3.030075521970827e-06, "loss": 0.0234, "step": 104430 }, { "epoch": 0.43575952800193607, "grad_norm": 0.634066261125979, "learning_rate": 3.030002985445589e-06, "loss": 0.0322, "step": 104435 }, { "epoch": 0.4357803907169263, "grad_norm": 0.757135342141636, "learning_rate": 3.029930454129425e-06, "loss": 0.0295, "step": 104440 }, { "epoch": 0.4358012534319166, "grad_norm": 0.5977391985402277, "learning_rate": 3.029857928021711e-06, "loss": 0.0229, "step": 104445 }, { "epoch": 0.4358221161469069, "grad_norm": 1.4045243810999706, "learning_rate": 3.029785407121825e-06, "loss": 0.0306, "step": 104450 }, { "epoch": 0.43584297886189716, "grad_norm": 0.717343086714111, "learning_rate": 3.029712891429142e-06, "loss": 0.0272, "step": 104455 }, { "epoch": 0.43586384157688746, "grad_norm": 0.5812746243957193, "learning_rate": 3.029640380943039e-06, "loss": 0.0252, "step": 104460 }, { "epoch": 0.4358847042918777, "grad_norm": 0.46135299288680237, "learning_rate": 3.029567875662895e-06, "loss": 0.0189, "step": 104465 }, { "epoch": 0.435905567006868, "grad_norm": 0.6307595420644299, "learning_rate": 3.0294953755880846e-06, "loss": 0.0242, "step": 104470 }, { "epoch": 0.4359264297218583, "grad_norm": 0.471833930257932, "learning_rate": 3.0294228807179864e-06, "loss": 0.0261, "step": 104475 }, { "epoch": 0.43594729243684854, "grad_norm": 0.769666052286492, "learning_rate": 3.0293503910519772e-06, "loss": 0.022, "step": 104480 }, { "epoch": 0.43596815515183884, "grad_norm": 0.6663956792792876, "learning_rate": 3.0292779065894346e-06, "loss": 0.0265, "step": 104485 }, { "epoch": 0.43598901786682914, "grad_norm": 0.8238139962787926, "learning_rate": 3.029205427329736e-06, "loss": 0.0254, "step": 104490 }, { "epoch": 0.4360098805818194, "grad_norm": 0.4895185277010552, "learning_rate": 3.0291329532722596e-06, "loss": 0.0376, "step": 104495 }, { "epoch": 0.4360307432968097, "grad_norm": 0.9364606020919826, "learning_rate": 3.0290604844163825e-06, "loss": 0.0335, "step": 104500 }, { "epoch": 0.43605160601179993, "grad_norm": 0.7468713538241493, "learning_rate": 3.028988020761482e-06, "loss": 0.0302, "step": 104505 }, { "epoch": 0.43607246872679023, "grad_norm": 0.5947548103406197, "learning_rate": 3.0289155623069366e-06, "loss": 0.0211, "step": 104510 }, { "epoch": 0.43609333144178053, "grad_norm": 0.4066339415255294, "learning_rate": 3.028843109052125e-06, "loss": 0.0209, "step": 104515 }, { "epoch": 0.4361141941567708, "grad_norm": 0.814190631910989, "learning_rate": 3.028770660996424e-06, "loss": 0.0227, "step": 104520 }, { "epoch": 0.4361350568717611, "grad_norm": 0.6628925215009638, "learning_rate": 3.0286982181392133e-06, "loss": 0.0279, "step": 104525 }, { "epoch": 0.4361559195867513, "grad_norm": 0.7384232707386347, "learning_rate": 3.0286257804798696e-06, "loss": 0.0217, "step": 104530 }, { "epoch": 0.4361767823017416, "grad_norm": 0.8309734740604059, "learning_rate": 3.0285533480177727e-06, "loss": 0.0284, "step": 104535 }, { "epoch": 0.4361976450167319, "grad_norm": 1.238854165501758, "learning_rate": 3.0284809207523e-06, "loss": 0.0303, "step": 104540 }, { "epoch": 0.43621850773172216, "grad_norm": 0.6307528929321372, "learning_rate": 3.028408498682831e-06, "loss": 0.0257, "step": 104545 }, { "epoch": 0.43623937044671246, "grad_norm": 1.5268576852078921, "learning_rate": 3.0283360818087447e-06, "loss": 0.0389, "step": 104550 }, { "epoch": 0.4362602331617027, "grad_norm": 0.8960135706529152, "learning_rate": 3.0282636701294194e-06, "loss": 0.0256, "step": 104555 }, { "epoch": 0.436281095876693, "grad_norm": 0.6628044567888621, "learning_rate": 3.0281912636442336e-06, "loss": 0.0235, "step": 104560 }, { "epoch": 0.4363019585916833, "grad_norm": 0.7540569357122772, "learning_rate": 3.028118862352567e-06, "loss": 0.0219, "step": 104565 }, { "epoch": 0.43632282130667355, "grad_norm": 1.1917198483565536, "learning_rate": 3.028046466253799e-06, "loss": 0.0422, "step": 104570 }, { "epoch": 0.43634368402166385, "grad_norm": 0.4217944353234969, "learning_rate": 3.0279740753473086e-06, "loss": 0.0283, "step": 104575 }, { "epoch": 0.43636454673665415, "grad_norm": 0.6610901605706163, "learning_rate": 3.0279016896324748e-06, "loss": 0.0219, "step": 104580 }, { "epoch": 0.4363854094516444, "grad_norm": 0.5944423119097316, "learning_rate": 3.0278293091086772e-06, "loss": 0.0295, "step": 104585 }, { "epoch": 0.4364062721666347, "grad_norm": 0.7692900690318398, "learning_rate": 3.027756933775296e-06, "loss": 0.0241, "step": 104590 }, { "epoch": 0.43642713488162493, "grad_norm": 0.6294165471934376, "learning_rate": 3.0276845636317102e-06, "loss": 0.019, "step": 104595 }, { "epoch": 0.43644799759661523, "grad_norm": 0.46928837536888085, "learning_rate": 3.0276121986773e-06, "loss": 0.0176, "step": 104600 }, { "epoch": 0.43646886031160553, "grad_norm": 0.6755281297724456, "learning_rate": 3.0275398389114457e-06, "loss": 0.0285, "step": 104605 }, { "epoch": 0.4364897230265958, "grad_norm": 0.5991975263667343, "learning_rate": 3.027467484333526e-06, "loss": 0.0187, "step": 104610 }, { "epoch": 0.4365105857415861, "grad_norm": 1.042311401288606, "learning_rate": 3.027395134942922e-06, "loss": 0.0294, "step": 104615 }, { "epoch": 0.4365314484565763, "grad_norm": 0.5276829021646088, "learning_rate": 3.0273227907390135e-06, "loss": 0.0243, "step": 104620 }, { "epoch": 0.4365523111715666, "grad_norm": 0.634640707298824, "learning_rate": 3.027250451721181e-06, "loss": 0.0226, "step": 104625 }, { "epoch": 0.4365731738865569, "grad_norm": 0.8524930917476503, "learning_rate": 3.027178117888805e-06, "loss": 0.0225, "step": 104630 }, { "epoch": 0.43659403660154716, "grad_norm": 0.817514776855775, "learning_rate": 3.027105789241266e-06, "loss": 0.0252, "step": 104635 }, { "epoch": 0.43661489931653746, "grad_norm": 0.6290406993976885, "learning_rate": 3.0270334657779444e-06, "loss": 0.0173, "step": 104640 }, { "epoch": 0.4366357620315277, "grad_norm": 0.31044261213922747, "learning_rate": 3.0269611474982207e-06, "loss": 0.0165, "step": 104645 }, { "epoch": 0.436656624746518, "grad_norm": 0.39572957898757877, "learning_rate": 3.0268888344014764e-06, "loss": 0.0269, "step": 104650 }, { "epoch": 0.4366774874615083, "grad_norm": 1.1259552570927007, "learning_rate": 3.0268165264870926e-06, "loss": 0.0232, "step": 104655 }, { "epoch": 0.43669835017649855, "grad_norm": 0.49408701389460796, "learning_rate": 3.0267442237544486e-06, "loss": 0.016, "step": 104660 }, { "epoch": 0.43671921289148885, "grad_norm": 0.9155518787775322, "learning_rate": 3.0266719262029277e-06, "loss": 0.0339, "step": 104665 }, { "epoch": 0.43674007560647915, "grad_norm": 1.226425726369244, "learning_rate": 3.02659963383191e-06, "loss": 0.0304, "step": 104670 }, { "epoch": 0.4367609383214694, "grad_norm": 0.8478003447626687, "learning_rate": 3.0265273466407767e-06, "loss": 0.03, "step": 104675 }, { "epoch": 0.4367818010364597, "grad_norm": 0.5208410464309774, "learning_rate": 3.0264550646289103e-06, "loss": 0.0265, "step": 104680 }, { "epoch": 0.43680266375144994, "grad_norm": 0.8381108230995491, "learning_rate": 3.026382787795691e-06, "loss": 0.0236, "step": 104685 }, { "epoch": 0.43682352646644024, "grad_norm": 1.184556346939091, "learning_rate": 3.026310516140502e-06, "loss": 0.0327, "step": 104690 }, { "epoch": 0.43684438918143054, "grad_norm": 0.8563617000221777, "learning_rate": 3.026238249662724e-06, "loss": 0.0278, "step": 104695 }, { "epoch": 0.4368652518964208, "grad_norm": 0.7045853179894394, "learning_rate": 3.026165988361738e-06, "loss": 0.0307, "step": 104700 }, { "epoch": 0.4368861146114111, "grad_norm": 0.6048198188659177, "learning_rate": 3.026093732236928e-06, "loss": 0.0243, "step": 104705 }, { "epoch": 0.4369069773264013, "grad_norm": 1.0997594378004545, "learning_rate": 3.0260214812876746e-06, "loss": 0.023, "step": 104710 }, { "epoch": 0.4369278400413916, "grad_norm": 0.9204969291527392, "learning_rate": 3.025949235513361e-06, "loss": 0.0273, "step": 104715 }, { "epoch": 0.4369487027563819, "grad_norm": 0.7930456208721145, "learning_rate": 3.0258769949133686e-06, "loss": 0.0423, "step": 104720 }, { "epoch": 0.43696956547137217, "grad_norm": 0.292616290115276, "learning_rate": 3.02580475948708e-06, "loss": 0.0266, "step": 104725 }, { "epoch": 0.43699042818636247, "grad_norm": 0.7463508552385758, "learning_rate": 3.0257325292338775e-06, "loss": 0.0305, "step": 104730 }, { "epoch": 0.4370112909013527, "grad_norm": 0.8062314519779037, "learning_rate": 3.025660304153144e-06, "loss": 0.0223, "step": 104735 }, { "epoch": 0.437032153616343, "grad_norm": 0.44784127884329006, "learning_rate": 3.025588084244263e-06, "loss": 0.0207, "step": 104740 }, { "epoch": 0.4370530163313333, "grad_norm": 0.7570514977239519, "learning_rate": 3.0255158695066154e-06, "loss": 0.0249, "step": 104745 }, { "epoch": 0.43707387904632355, "grad_norm": 0.3579147733297512, "learning_rate": 3.025443659939586e-06, "loss": 0.0288, "step": 104750 }, { "epoch": 0.43709474176131385, "grad_norm": 0.4734821716084459, "learning_rate": 3.025371455542556e-06, "loss": 0.0278, "step": 104755 }, { "epoch": 0.43711560447630415, "grad_norm": 0.8211079380438292, "learning_rate": 3.0252992563149102e-06, "loss": 0.0198, "step": 104760 }, { "epoch": 0.4371364671912944, "grad_norm": 0.9652905016713517, "learning_rate": 3.025227062256031e-06, "loss": 0.0178, "step": 104765 }, { "epoch": 0.4371573299062847, "grad_norm": 0.8640275976756228, "learning_rate": 3.025154873365302e-06, "loss": 0.0271, "step": 104770 }, { "epoch": 0.43717819262127494, "grad_norm": 0.8109995014419904, "learning_rate": 3.0250826896421054e-06, "loss": 0.028, "step": 104775 }, { "epoch": 0.43719905533626524, "grad_norm": 0.4954511625877443, "learning_rate": 3.0250105110858264e-06, "loss": 0.0263, "step": 104780 }, { "epoch": 0.43721991805125554, "grad_norm": 0.9744012825124163, "learning_rate": 3.0249383376958474e-06, "loss": 0.0268, "step": 104785 }, { "epoch": 0.4372407807662458, "grad_norm": 0.40694866687693604, "learning_rate": 3.024866169471553e-06, "loss": 0.0205, "step": 104790 }, { "epoch": 0.4372616434812361, "grad_norm": 1.1983001908340154, "learning_rate": 3.0247940064123266e-06, "loss": 0.0311, "step": 104795 }, { "epoch": 0.43728250619622633, "grad_norm": 0.5824313195682955, "learning_rate": 3.024721848517552e-06, "loss": 0.0397, "step": 104800 }, { "epoch": 0.43730336891121663, "grad_norm": 0.4645404755526131, "learning_rate": 3.0246496957866134e-06, "loss": 0.0153, "step": 104805 }, { "epoch": 0.4373242316262069, "grad_norm": 0.6913792283016972, "learning_rate": 3.024577548218895e-06, "loss": 0.0251, "step": 104810 }, { "epoch": 0.43734509434119717, "grad_norm": 1.2833697528533532, "learning_rate": 3.0245054058137814e-06, "loss": 0.0282, "step": 104815 }, { "epoch": 0.43736595705618747, "grad_norm": 0.36317572722765257, "learning_rate": 3.0244332685706556e-06, "loss": 0.0278, "step": 104820 }, { "epoch": 0.4373868197711777, "grad_norm": 0.6344528288465605, "learning_rate": 3.0243611364889034e-06, "loss": 0.0216, "step": 104825 }, { "epoch": 0.437407682486168, "grad_norm": 0.9663692440380013, "learning_rate": 3.0242890095679083e-06, "loss": 0.0276, "step": 104830 }, { "epoch": 0.4374285452011583, "grad_norm": 0.8723479834107547, "learning_rate": 3.024216887807056e-06, "loss": 0.0264, "step": 104835 }, { "epoch": 0.43744940791614856, "grad_norm": 0.6969649573092674, "learning_rate": 3.024144771205731e-06, "loss": 0.0341, "step": 104840 }, { "epoch": 0.43747027063113886, "grad_norm": 0.6860206071122144, "learning_rate": 3.0240726597633173e-06, "loss": 0.0241, "step": 104845 }, { "epoch": 0.43749113334612916, "grad_norm": 0.5800866165253499, "learning_rate": 3.0240005534792012e-06, "loss": 0.0283, "step": 104850 }, { "epoch": 0.4375119960611194, "grad_norm": 0.8509383578642318, "learning_rate": 3.023928452352766e-06, "loss": 0.0297, "step": 104855 }, { "epoch": 0.4375328587761097, "grad_norm": 0.84805422412322, "learning_rate": 3.0238563563833985e-06, "loss": 0.0228, "step": 104860 }, { "epoch": 0.43755372149109995, "grad_norm": 0.6889727541589025, "learning_rate": 3.0237842655704828e-06, "loss": 0.0215, "step": 104865 }, { "epoch": 0.43757458420609024, "grad_norm": 0.6276488533441318, "learning_rate": 3.023712179913406e-06, "loss": 0.0216, "step": 104870 }, { "epoch": 0.43759544692108054, "grad_norm": 0.6235125630407314, "learning_rate": 3.0236400994115517e-06, "loss": 0.0239, "step": 104875 }, { "epoch": 0.4376163096360708, "grad_norm": 0.4258371233083195, "learning_rate": 3.0235680240643057e-06, "loss": 0.0251, "step": 104880 }, { "epoch": 0.4376371723510611, "grad_norm": 0.7322195827587975, "learning_rate": 3.0234959538710544e-06, "loss": 0.0299, "step": 104885 }, { "epoch": 0.43765803506605133, "grad_norm": 0.5444030005556174, "learning_rate": 3.023423888831183e-06, "loss": 0.0209, "step": 104890 }, { "epoch": 0.43767889778104163, "grad_norm": 0.8610759477406205, "learning_rate": 3.023351828944078e-06, "loss": 0.0241, "step": 104895 }, { "epoch": 0.43769976049603193, "grad_norm": 0.6944404607313401, "learning_rate": 3.0232797742091253e-06, "loss": 0.028, "step": 104900 }, { "epoch": 0.4377206232110222, "grad_norm": 0.6875041269687797, "learning_rate": 3.0232077246257098e-06, "loss": 0.0231, "step": 104905 }, { "epoch": 0.4377414859260125, "grad_norm": 0.8752230981288747, "learning_rate": 3.023135680193219e-06, "loss": 0.0271, "step": 104910 }, { "epoch": 0.4377623486410027, "grad_norm": 0.8258513910608266, "learning_rate": 3.0230636409110383e-06, "loss": 0.0262, "step": 104915 }, { "epoch": 0.437783211355993, "grad_norm": 0.9354267674748163, "learning_rate": 3.0229916067785554e-06, "loss": 0.0301, "step": 104920 }, { "epoch": 0.4378040740709833, "grad_norm": 0.6919121932138375, "learning_rate": 3.0229195777951552e-06, "loss": 0.0231, "step": 104925 }, { "epoch": 0.43782493678597356, "grad_norm": 0.5197774110390597, "learning_rate": 3.0228475539602255e-06, "loss": 0.0199, "step": 104930 }, { "epoch": 0.43784579950096386, "grad_norm": 1.3219476928400835, "learning_rate": 3.0227755352731524e-06, "loss": 0.0274, "step": 104935 }, { "epoch": 0.43786666221595416, "grad_norm": 0.9426051154871593, "learning_rate": 3.022703521733323e-06, "loss": 0.0368, "step": 104940 }, { "epoch": 0.4378875249309444, "grad_norm": 0.6866034131396113, "learning_rate": 3.0226315133401235e-06, "loss": 0.0295, "step": 104945 }, { "epoch": 0.4379083876459347, "grad_norm": 1.0108178400918881, "learning_rate": 3.0225595100929415e-06, "loss": 0.0217, "step": 104950 }, { "epoch": 0.43792925036092495, "grad_norm": 0.6443024427423616, "learning_rate": 3.0224875119911645e-06, "loss": 0.0196, "step": 104955 }, { "epoch": 0.43795011307591525, "grad_norm": 1.1092778419486886, "learning_rate": 3.022415519034179e-06, "loss": 0.0243, "step": 104960 }, { "epoch": 0.43797097579090555, "grad_norm": 0.4342252312263539, "learning_rate": 3.022343531221373e-06, "loss": 0.0257, "step": 104965 }, { "epoch": 0.4379918385058958, "grad_norm": 0.6136828250804852, "learning_rate": 3.0222715485521325e-06, "loss": 0.0358, "step": 104970 }, { "epoch": 0.4380127012208861, "grad_norm": 0.4234647012653861, "learning_rate": 3.0221995710258462e-06, "loss": 0.0207, "step": 104975 }, { "epoch": 0.43803356393587634, "grad_norm": 0.6856485625548006, "learning_rate": 3.0221275986419017e-06, "loss": 0.0249, "step": 104980 }, { "epoch": 0.43805442665086664, "grad_norm": 0.8457219284402167, "learning_rate": 3.0220556313996864e-06, "loss": 0.0346, "step": 104985 }, { "epoch": 0.43807528936585693, "grad_norm": 0.5467435791706203, "learning_rate": 3.0219836692985884e-06, "loss": 0.0244, "step": 104990 }, { "epoch": 0.4380961520808472, "grad_norm": 0.8042952006071754, "learning_rate": 3.0219117123379953e-06, "loss": 0.0243, "step": 104995 }, { "epoch": 0.4381170147958375, "grad_norm": 0.9248363825186926, "learning_rate": 3.021839760517295e-06, "loss": 0.0267, "step": 105000 }, { "epoch": 0.4381378775108277, "grad_norm": 0.5444264306335276, "learning_rate": 3.021767813835876e-06, "loss": 0.0165, "step": 105005 }, { "epoch": 0.438158740225818, "grad_norm": 0.7347517632240448, "learning_rate": 3.021695872293127e-06, "loss": 0.0206, "step": 105010 }, { "epoch": 0.4381796029408083, "grad_norm": 0.6470848220084577, "learning_rate": 3.0216239358884348e-06, "loss": 0.0293, "step": 105015 }, { "epoch": 0.43820046565579857, "grad_norm": 0.4044877728943359, "learning_rate": 3.021552004621189e-06, "loss": 0.0198, "step": 105020 }, { "epoch": 0.43822132837078887, "grad_norm": 0.8685153950108214, "learning_rate": 3.021480078490778e-06, "loss": 0.0315, "step": 105025 }, { "epoch": 0.43824219108577916, "grad_norm": 0.883272808771371, "learning_rate": 3.0214081574965903e-06, "loss": 0.0262, "step": 105030 }, { "epoch": 0.4382630538007694, "grad_norm": 1.9223524303714599, "learning_rate": 3.0213362416380143e-06, "loss": 0.0308, "step": 105035 }, { "epoch": 0.4382839165157597, "grad_norm": 0.4776152145403059, "learning_rate": 3.02126433091444e-06, "loss": 0.0222, "step": 105040 }, { "epoch": 0.43830477923074995, "grad_norm": 0.9836971622401829, "learning_rate": 3.0211924253252546e-06, "loss": 0.0195, "step": 105045 }, { "epoch": 0.43832564194574025, "grad_norm": 1.1921146918344119, "learning_rate": 3.0211205248698487e-06, "loss": 0.0284, "step": 105050 }, { "epoch": 0.43834650466073055, "grad_norm": 1.2898870309767139, "learning_rate": 3.021048629547611e-06, "loss": 0.0296, "step": 105055 }, { "epoch": 0.4383673673757208, "grad_norm": 1.86743485355053, "learning_rate": 3.0209767393579303e-06, "loss": 0.0278, "step": 105060 }, { "epoch": 0.4383882300907111, "grad_norm": 0.9789068146010472, "learning_rate": 3.0209048543001954e-06, "loss": 0.0281, "step": 105065 }, { "epoch": 0.43840909280570134, "grad_norm": 0.6132839471138937, "learning_rate": 3.020832974373798e-06, "loss": 0.0234, "step": 105070 }, { "epoch": 0.43842995552069164, "grad_norm": 0.4591892990997469, "learning_rate": 3.020761099578125e-06, "loss": 0.0216, "step": 105075 }, { "epoch": 0.43845081823568194, "grad_norm": 1.0724623170469647, "learning_rate": 3.020689229912568e-06, "loss": 0.0284, "step": 105080 }, { "epoch": 0.4384716809506722, "grad_norm": 1.1587437775996865, "learning_rate": 3.020617365376515e-06, "loss": 0.0339, "step": 105085 }, { "epoch": 0.4384925436656625, "grad_norm": 2.1852579242747723, "learning_rate": 3.0205455059693584e-06, "loss": 0.0249, "step": 105090 }, { "epoch": 0.4385134063806527, "grad_norm": 0.5082188076990111, "learning_rate": 3.0204736516904855e-06, "loss": 0.0208, "step": 105095 }, { "epoch": 0.438534269095643, "grad_norm": 0.46612433351588184, "learning_rate": 3.020401802539288e-06, "loss": 0.0214, "step": 105100 }, { "epoch": 0.4385551318106333, "grad_norm": 1.309987706660956, "learning_rate": 3.0203299585151546e-06, "loss": 0.0315, "step": 105105 }, { "epoch": 0.43857599452562357, "grad_norm": 0.664904758918308, "learning_rate": 3.020258119617477e-06, "loss": 0.0272, "step": 105110 }, { "epoch": 0.43859685724061387, "grad_norm": 0.6573114387803729, "learning_rate": 3.020186285845646e-06, "loss": 0.0277, "step": 105115 }, { "epoch": 0.43861771995560417, "grad_norm": 0.8009485544882727, "learning_rate": 3.0201144571990497e-06, "loss": 0.0296, "step": 105120 }, { "epoch": 0.4386385826705944, "grad_norm": 1.1954542168431397, "learning_rate": 3.020042633677081e-06, "loss": 0.0403, "step": 105125 }, { "epoch": 0.4386594453855847, "grad_norm": 0.7918097510173701, "learning_rate": 3.0199708152791295e-06, "loss": 0.0289, "step": 105130 }, { "epoch": 0.43868030810057496, "grad_norm": 1.0559494487216758, "learning_rate": 3.019899002004586e-06, "loss": 0.0266, "step": 105135 }, { "epoch": 0.43870117081556526, "grad_norm": 0.44305835820205963, "learning_rate": 3.019827193852841e-06, "loss": 0.0217, "step": 105140 }, { "epoch": 0.43872203353055556, "grad_norm": 0.8189109177546047, "learning_rate": 3.019755390823287e-06, "loss": 0.028, "step": 105145 }, { "epoch": 0.4387428962455458, "grad_norm": 0.768056097494882, "learning_rate": 3.0196835929153134e-06, "loss": 0.0238, "step": 105150 }, { "epoch": 0.4387637589605361, "grad_norm": 0.7903952722690991, "learning_rate": 3.0196118001283114e-06, "loss": 0.0297, "step": 105155 }, { "epoch": 0.43878462167552634, "grad_norm": 1.6782713238781415, "learning_rate": 3.019540012461674e-06, "loss": 0.0252, "step": 105160 }, { "epoch": 0.43880548439051664, "grad_norm": 0.6789007433304867, "learning_rate": 3.019468229914791e-06, "loss": 0.0263, "step": 105165 }, { "epoch": 0.43882634710550694, "grad_norm": 0.6383928314062257, "learning_rate": 3.0193964524870546e-06, "loss": 0.0233, "step": 105170 }, { "epoch": 0.4388472098204972, "grad_norm": 1.084350987620776, "learning_rate": 3.019324680177855e-06, "loss": 0.0291, "step": 105175 }, { "epoch": 0.4388680725354875, "grad_norm": 0.8886695581763083, "learning_rate": 3.0192529129865866e-06, "loss": 0.0295, "step": 105180 }, { "epoch": 0.43888893525047773, "grad_norm": 1.175787220586769, "learning_rate": 3.0191811509126385e-06, "loss": 0.03, "step": 105185 }, { "epoch": 0.43890979796546803, "grad_norm": 0.8295900579996283, "learning_rate": 3.0191093939554037e-06, "loss": 0.0264, "step": 105190 }, { "epoch": 0.43893066068045833, "grad_norm": 0.6487542482419077, "learning_rate": 3.0190376421142747e-06, "loss": 0.0203, "step": 105195 }, { "epoch": 0.4389515233954486, "grad_norm": 0.6645879414157723, "learning_rate": 3.018965895388643e-06, "loss": 0.0249, "step": 105200 }, { "epoch": 0.4389723861104389, "grad_norm": 0.4588073854771954, "learning_rate": 3.0188941537779005e-06, "loss": 0.0282, "step": 105205 }, { "epoch": 0.43899324882542917, "grad_norm": 0.4968735488519666, "learning_rate": 3.0188224172814395e-06, "loss": 0.0175, "step": 105210 }, { "epoch": 0.4390141115404194, "grad_norm": 0.2825753211628367, "learning_rate": 3.018750685898653e-06, "loss": 0.0226, "step": 105215 }, { "epoch": 0.4390349742554097, "grad_norm": 1.4493881412752327, "learning_rate": 3.0186789596289334e-06, "loss": 0.035, "step": 105220 }, { "epoch": 0.43905583697039996, "grad_norm": 0.6801496732631865, "learning_rate": 3.0186072384716724e-06, "loss": 0.0249, "step": 105225 }, { "epoch": 0.43907669968539026, "grad_norm": 0.5666554980926426, "learning_rate": 3.018535522426264e-06, "loss": 0.0241, "step": 105230 }, { "epoch": 0.43909756240038056, "grad_norm": 1.2385935147066987, "learning_rate": 3.0184638114921004e-06, "loss": 0.0211, "step": 105235 }, { "epoch": 0.4391184251153708, "grad_norm": 1.296659989159914, "learning_rate": 3.0183921056685744e-06, "loss": 0.0246, "step": 105240 }, { "epoch": 0.4391392878303611, "grad_norm": 0.8454215079573562, "learning_rate": 3.018320404955079e-06, "loss": 0.0192, "step": 105245 }, { "epoch": 0.43916015054535135, "grad_norm": 0.9045053161628497, "learning_rate": 3.0182487093510064e-06, "loss": 0.0254, "step": 105250 }, { "epoch": 0.43918101326034165, "grad_norm": 0.44288302171651506, "learning_rate": 3.018177018855752e-06, "loss": 0.0266, "step": 105255 }, { "epoch": 0.43920187597533195, "grad_norm": 0.5793101229620803, "learning_rate": 3.018105333468707e-06, "loss": 0.0236, "step": 105260 }, { "epoch": 0.4392227386903222, "grad_norm": 1.1213531978560207, "learning_rate": 3.0180336531892663e-06, "loss": 0.0311, "step": 105265 }, { "epoch": 0.4392436014053125, "grad_norm": 0.3308455136589961, "learning_rate": 3.017961978016822e-06, "loss": 0.0273, "step": 105270 }, { "epoch": 0.43926446412030273, "grad_norm": 0.6415210995342887, "learning_rate": 3.0178903079507687e-06, "loss": 0.0267, "step": 105275 }, { "epoch": 0.43928532683529303, "grad_norm": 0.9787700586972133, "learning_rate": 3.0178186429905002e-06, "loss": 0.0332, "step": 105280 }, { "epoch": 0.43930618955028333, "grad_norm": 0.532337513332499, "learning_rate": 3.0177469831354094e-06, "loss": 0.0226, "step": 105285 }, { "epoch": 0.4393270522652736, "grad_norm": 0.6892479291984558, "learning_rate": 3.0176753283848907e-06, "loss": 0.0278, "step": 105290 }, { "epoch": 0.4393479149802639, "grad_norm": 0.3779699057779065, "learning_rate": 3.0176036787383384e-06, "loss": 0.0258, "step": 105295 }, { "epoch": 0.4393687776952542, "grad_norm": 0.5766530184230378, "learning_rate": 3.0175320341951465e-06, "loss": 0.0166, "step": 105300 }, { "epoch": 0.4393896404102444, "grad_norm": 1.0517897008840653, "learning_rate": 3.0174603947547093e-06, "loss": 0.0178, "step": 105305 }, { "epoch": 0.4394105031252347, "grad_norm": 0.5556757323727949, "learning_rate": 3.0173887604164196e-06, "loss": 0.0267, "step": 105310 }, { "epoch": 0.43943136584022496, "grad_norm": 0.8125949785238668, "learning_rate": 3.0173171311796738e-06, "loss": 0.0328, "step": 105315 }, { "epoch": 0.43945222855521526, "grad_norm": 0.8069914559778908, "learning_rate": 3.0172455070438663e-06, "loss": 0.0316, "step": 105320 }, { "epoch": 0.43947309127020556, "grad_norm": 1.3369222007841877, "learning_rate": 3.01717388800839e-06, "loss": 0.035, "step": 105325 }, { "epoch": 0.4394939539851958, "grad_norm": 0.7024800252754468, "learning_rate": 3.0171022740726414e-06, "loss": 0.0237, "step": 105330 }, { "epoch": 0.4395148167001861, "grad_norm": 0.8837698245710413, "learning_rate": 3.0170306652360137e-06, "loss": 0.0247, "step": 105335 }, { "epoch": 0.43953567941517635, "grad_norm": 0.5627814124112739, "learning_rate": 3.0169590614979035e-06, "loss": 0.0315, "step": 105340 }, { "epoch": 0.43955654213016665, "grad_norm": 0.7811525747173137, "learning_rate": 3.016887462857704e-06, "loss": 0.0275, "step": 105345 }, { "epoch": 0.43957740484515695, "grad_norm": 1.1561904564955257, "learning_rate": 3.0168158693148124e-06, "loss": 0.0265, "step": 105350 }, { "epoch": 0.4395982675601472, "grad_norm": 0.4894787721149215, "learning_rate": 3.0167442808686225e-06, "loss": 0.0253, "step": 105355 }, { "epoch": 0.4396191302751375, "grad_norm": 1.0690897082362438, "learning_rate": 3.0166726975185297e-06, "loss": 0.0255, "step": 105360 }, { "epoch": 0.43963999299012774, "grad_norm": 0.8857889131789437, "learning_rate": 3.0166011192639295e-06, "loss": 0.0267, "step": 105365 }, { "epoch": 0.43966085570511804, "grad_norm": 0.9492326157971783, "learning_rate": 3.0165295461042177e-06, "loss": 0.0279, "step": 105370 }, { "epoch": 0.43968171842010834, "grad_norm": 0.751963945826942, "learning_rate": 3.01645797803879e-06, "loss": 0.0407, "step": 105375 }, { "epoch": 0.4397025811350986, "grad_norm": 0.9614214150163054, "learning_rate": 3.016386415067042e-06, "loss": 0.0312, "step": 105380 }, { "epoch": 0.4397234438500889, "grad_norm": 0.5017265893298473, "learning_rate": 3.016314857188368e-06, "loss": 0.0233, "step": 105385 }, { "epoch": 0.4397443065650792, "grad_norm": 0.5161500119052694, "learning_rate": 3.0162433044021666e-06, "loss": 0.0347, "step": 105390 }, { "epoch": 0.4397651692800694, "grad_norm": 1.3813785240875283, "learning_rate": 3.016171756707832e-06, "loss": 0.0238, "step": 105395 }, { "epoch": 0.4397860319950597, "grad_norm": 0.7095336589184107, "learning_rate": 3.0161002141047614e-06, "loss": 0.0277, "step": 105400 }, { "epoch": 0.43980689471004997, "grad_norm": 0.8175647594431322, "learning_rate": 3.0160286765923495e-06, "loss": 0.0226, "step": 105405 }, { "epoch": 0.43982775742504027, "grad_norm": 0.8812214817309345, "learning_rate": 3.015957144169994e-06, "loss": 0.0178, "step": 105410 }, { "epoch": 0.43984862014003057, "grad_norm": 0.8133544179676048, "learning_rate": 3.015885616837091e-06, "loss": 0.0293, "step": 105415 }, { "epoch": 0.4398694828550208, "grad_norm": 0.6345890283337323, "learning_rate": 3.015814094593037e-06, "loss": 0.0219, "step": 105420 }, { "epoch": 0.4398903455700111, "grad_norm": 1.2922963108308714, "learning_rate": 3.0157425774372286e-06, "loss": 0.0433, "step": 105425 }, { "epoch": 0.43991120828500135, "grad_norm": 0.9347920871975184, "learning_rate": 3.0156710653690613e-06, "loss": 0.0238, "step": 105430 }, { "epoch": 0.43993207099999165, "grad_norm": 1.134208855593872, "learning_rate": 3.0155995583879337e-06, "loss": 0.0249, "step": 105435 }, { "epoch": 0.43995293371498195, "grad_norm": 0.9760359831052906, "learning_rate": 3.015528056493242e-06, "loss": 0.0286, "step": 105440 }, { "epoch": 0.4399737964299722, "grad_norm": 0.5138066993306115, "learning_rate": 3.0154565596843834e-06, "loss": 0.0275, "step": 105445 }, { "epoch": 0.4399946591449625, "grad_norm": 0.3198660479872119, "learning_rate": 3.015385067960755e-06, "loss": 0.0189, "step": 105450 }, { "epoch": 0.44001552185995274, "grad_norm": 1.9312142322980232, "learning_rate": 3.0153135813217533e-06, "loss": 0.0265, "step": 105455 }, { "epoch": 0.44003638457494304, "grad_norm": 0.9818704623037118, "learning_rate": 3.0152420997667765e-06, "loss": 0.0266, "step": 105460 }, { "epoch": 0.44005724728993334, "grad_norm": 1.418573475577716, "learning_rate": 3.0151706232952217e-06, "loss": 0.0288, "step": 105465 }, { "epoch": 0.4400781100049236, "grad_norm": 0.8791947300953633, "learning_rate": 3.015099151906486e-06, "loss": 0.0304, "step": 105470 }, { "epoch": 0.4400989727199139, "grad_norm": 0.5322180535395699, "learning_rate": 3.0150276855999677e-06, "loss": 0.0239, "step": 105475 }, { "epoch": 0.4401198354349042, "grad_norm": 0.8596440640380797, "learning_rate": 3.014956224375064e-06, "loss": 0.0292, "step": 105480 }, { "epoch": 0.4401406981498944, "grad_norm": 0.7262585654590745, "learning_rate": 3.014884768231174e-06, "loss": 0.0201, "step": 105485 }, { "epoch": 0.4401615608648847, "grad_norm": 0.7732574356438482, "learning_rate": 3.0148133171676937e-06, "loss": 0.0375, "step": 105490 }, { "epoch": 0.44018242357987497, "grad_norm": 0.4261912509385408, "learning_rate": 3.0147418711840214e-06, "loss": 0.02, "step": 105495 }, { "epoch": 0.44020328629486527, "grad_norm": 0.5557991069675663, "learning_rate": 3.014670430279557e-06, "loss": 0.0219, "step": 105500 }, { "epoch": 0.44022414900985557, "grad_norm": 1.2867650618202957, "learning_rate": 3.014598994453696e-06, "loss": 0.0253, "step": 105505 }, { "epoch": 0.4402450117248458, "grad_norm": 0.8111924229513107, "learning_rate": 3.0145275637058396e-06, "loss": 0.0228, "step": 105510 }, { "epoch": 0.4402658744398361, "grad_norm": 0.8271189268355671, "learning_rate": 3.014456138035385e-06, "loss": 0.0246, "step": 105515 }, { "epoch": 0.44028673715482636, "grad_norm": 1.5282793454945502, "learning_rate": 3.0143847174417296e-06, "loss": 0.03, "step": 105520 }, { "epoch": 0.44030759986981666, "grad_norm": 0.49986446551333524, "learning_rate": 3.014313301924273e-06, "loss": 0.0237, "step": 105525 }, { "epoch": 0.44032846258480696, "grad_norm": 0.40983239941356536, "learning_rate": 3.0142418914824144e-06, "loss": 0.0198, "step": 105530 }, { "epoch": 0.4403493252997972, "grad_norm": 0.8009137789530333, "learning_rate": 3.014170486115552e-06, "loss": 0.0296, "step": 105535 }, { "epoch": 0.4403701880147875, "grad_norm": 0.7649888368860609, "learning_rate": 3.0140990858230844e-06, "loss": 0.0209, "step": 105540 }, { "epoch": 0.44039105072977774, "grad_norm": 0.8912811536052989, "learning_rate": 3.0140276906044115e-06, "loss": 0.0259, "step": 105545 }, { "epoch": 0.44041191344476804, "grad_norm": 0.8437349108567608, "learning_rate": 3.013956300458931e-06, "loss": 0.0218, "step": 105550 }, { "epoch": 0.44043277615975834, "grad_norm": 0.9290386861826847, "learning_rate": 3.0138849153860442e-06, "loss": 0.0467, "step": 105555 }, { "epoch": 0.4404536388747486, "grad_norm": 2.2215028489621056, "learning_rate": 3.0138135353851488e-06, "loss": 0.0211, "step": 105560 }, { "epoch": 0.4404745015897389, "grad_norm": 0.8607347813654038, "learning_rate": 3.0137421604556445e-06, "loss": 0.0183, "step": 105565 }, { "epoch": 0.4404953643047292, "grad_norm": 0.6062934094522576, "learning_rate": 3.0136707905969314e-06, "loss": 0.025, "step": 105570 }, { "epoch": 0.44051622701971943, "grad_norm": 0.8764154093322369, "learning_rate": 3.013599425808408e-06, "loss": 0.0229, "step": 105575 }, { "epoch": 0.44053708973470973, "grad_norm": 0.5810513293747054, "learning_rate": 3.013528066089475e-06, "loss": 0.0241, "step": 105580 }, { "epoch": 0.4405579524497, "grad_norm": 0.5857090781403916, "learning_rate": 3.0134567114395324e-06, "loss": 0.0292, "step": 105585 }, { "epoch": 0.4405788151646903, "grad_norm": 0.892671608274868, "learning_rate": 3.013385361857979e-06, "loss": 0.0268, "step": 105590 }, { "epoch": 0.4405996778796806, "grad_norm": 0.7682238161960852, "learning_rate": 3.0133140173442155e-06, "loss": 0.0228, "step": 105595 }, { "epoch": 0.4406205405946708, "grad_norm": 0.7457603012905393, "learning_rate": 3.0132426778976414e-06, "loss": 0.0245, "step": 105600 }, { "epoch": 0.4406414033096611, "grad_norm": 0.9547772276300143, "learning_rate": 3.0131713435176584e-06, "loss": 0.0255, "step": 105605 }, { "epoch": 0.44066226602465136, "grad_norm": 0.5822305243852784, "learning_rate": 3.0131000142036647e-06, "loss": 0.0234, "step": 105610 }, { "epoch": 0.44068312873964166, "grad_norm": 0.5587049008580875, "learning_rate": 3.0130286899550627e-06, "loss": 0.0295, "step": 105615 }, { "epoch": 0.44070399145463196, "grad_norm": 0.7947622155892498, "learning_rate": 3.012957370771252e-06, "loss": 0.0251, "step": 105620 }, { "epoch": 0.4407248541696222, "grad_norm": 0.9075676579147419, "learning_rate": 3.0128860566516333e-06, "loss": 0.0341, "step": 105625 }, { "epoch": 0.4407457168846125, "grad_norm": 0.6562631346108166, "learning_rate": 3.0128147475956065e-06, "loss": 0.0216, "step": 105630 }, { "epoch": 0.44076657959960275, "grad_norm": 0.6336292095854271, "learning_rate": 3.012743443602574e-06, "loss": 0.0197, "step": 105635 }, { "epoch": 0.44078744231459305, "grad_norm": 0.6856576002562055, "learning_rate": 3.0126721446719353e-06, "loss": 0.0284, "step": 105640 }, { "epoch": 0.44080830502958335, "grad_norm": 0.8742426833747499, "learning_rate": 3.0126008508030923e-06, "loss": 0.0418, "step": 105645 }, { "epoch": 0.4408291677445736, "grad_norm": 0.34646696003759814, "learning_rate": 3.0125295619954453e-06, "loss": 0.0258, "step": 105650 }, { "epoch": 0.4408500304595639, "grad_norm": 1.352703885844799, "learning_rate": 3.0124582782483962e-06, "loss": 0.0289, "step": 105655 }, { "epoch": 0.4408708931745542, "grad_norm": 0.4593688695421713, "learning_rate": 3.0123869995613462e-06, "loss": 0.0293, "step": 105660 }, { "epoch": 0.44089175588954443, "grad_norm": 1.1306749907592553, "learning_rate": 3.012315725933696e-06, "loss": 0.0204, "step": 105665 }, { "epoch": 0.44091261860453473, "grad_norm": 0.7726211398401287, "learning_rate": 3.012244457364848e-06, "loss": 0.0235, "step": 105670 }, { "epoch": 0.440933481319525, "grad_norm": 0.9549644445522963, "learning_rate": 3.0121731938542037e-06, "loss": 0.0209, "step": 105675 }, { "epoch": 0.4409543440345153, "grad_norm": 0.5864737344757713, "learning_rate": 3.0121019354011644e-06, "loss": 0.0262, "step": 105680 }, { "epoch": 0.4409752067495056, "grad_norm": 0.90811397995762, "learning_rate": 3.0120306820051316e-06, "loss": 0.0241, "step": 105685 }, { "epoch": 0.4409960694644958, "grad_norm": 0.43953652674555216, "learning_rate": 3.0119594336655085e-06, "loss": 0.0302, "step": 105690 }, { "epoch": 0.4410169321794861, "grad_norm": 0.5771857943547422, "learning_rate": 3.0118881903816947e-06, "loss": 0.029, "step": 105695 }, { "epoch": 0.44103779489447636, "grad_norm": 0.9670749767745439, "learning_rate": 3.011816952153095e-06, "loss": 0.0252, "step": 105700 }, { "epoch": 0.44105865760946666, "grad_norm": 0.7486199593738344, "learning_rate": 3.0117457189791103e-06, "loss": 0.0257, "step": 105705 }, { "epoch": 0.44107952032445696, "grad_norm": 1.0240385549865902, "learning_rate": 3.011674490859143e-06, "loss": 0.0275, "step": 105710 }, { "epoch": 0.4411003830394472, "grad_norm": 1.6782094820675086, "learning_rate": 3.0116032677925948e-06, "loss": 0.0438, "step": 105715 }, { "epoch": 0.4411212457544375, "grad_norm": 2.5107516215641303, "learning_rate": 3.0115320497788692e-06, "loss": 0.0253, "step": 105720 }, { "epoch": 0.44114210846942775, "grad_norm": 0.6903064739108299, "learning_rate": 3.0114608368173683e-06, "loss": 0.0228, "step": 105725 }, { "epoch": 0.44116297118441805, "grad_norm": 0.7120189658846351, "learning_rate": 3.011389628907495e-06, "loss": 0.0315, "step": 105730 }, { "epoch": 0.44118383389940835, "grad_norm": 0.5860852616626608, "learning_rate": 3.0113184260486523e-06, "loss": 0.0226, "step": 105735 }, { "epoch": 0.4412046966143986, "grad_norm": 0.4844033921117754, "learning_rate": 3.0112472282402425e-06, "loss": 0.0247, "step": 105740 }, { "epoch": 0.4412255593293889, "grad_norm": 0.6855203038336866, "learning_rate": 3.0111760354816687e-06, "loss": 0.0259, "step": 105745 }, { "epoch": 0.4412464220443792, "grad_norm": 0.6898561744500611, "learning_rate": 3.011104847772335e-06, "loss": 0.0307, "step": 105750 }, { "epoch": 0.44126728475936944, "grad_norm": 1.2598778843461935, "learning_rate": 3.011033665111643e-06, "loss": 0.0366, "step": 105755 }, { "epoch": 0.44128814747435974, "grad_norm": 0.6897985592580532, "learning_rate": 3.010962487498997e-06, "loss": 0.0228, "step": 105760 }, { "epoch": 0.44130901018935, "grad_norm": 0.7639297920744146, "learning_rate": 3.0108913149338e-06, "loss": 0.0261, "step": 105765 }, { "epoch": 0.4413298729043403, "grad_norm": 0.5270627721911668, "learning_rate": 3.0108201474154552e-06, "loss": 0.0257, "step": 105770 }, { "epoch": 0.4413507356193306, "grad_norm": 0.6801871801525781, "learning_rate": 3.010748984943367e-06, "loss": 0.0252, "step": 105775 }, { "epoch": 0.4413715983343208, "grad_norm": 0.8356809691401303, "learning_rate": 3.010677827516938e-06, "loss": 0.0331, "step": 105780 }, { "epoch": 0.4413924610493111, "grad_norm": 0.49103616180232224, "learning_rate": 3.010606675135573e-06, "loss": 0.0226, "step": 105785 }, { "epoch": 0.44141332376430137, "grad_norm": 0.8368782724904082, "learning_rate": 3.0105355277986754e-06, "loss": 0.0194, "step": 105790 }, { "epoch": 0.44143418647929167, "grad_norm": 0.6556028989347987, "learning_rate": 3.010464385505649e-06, "loss": 0.0328, "step": 105795 }, { "epoch": 0.44145504919428197, "grad_norm": 0.8821629231621092, "learning_rate": 3.010393248255899e-06, "loss": 0.0209, "step": 105800 }, { "epoch": 0.4414759119092722, "grad_norm": 0.7428464591734776, "learning_rate": 3.0103221160488276e-06, "loss": 0.0351, "step": 105805 }, { "epoch": 0.4414967746242625, "grad_norm": 0.8319495925085045, "learning_rate": 3.0102509888838406e-06, "loss": 0.0223, "step": 105810 }, { "epoch": 0.44151763733925276, "grad_norm": 1.2144947627312117, "learning_rate": 3.010179866760341e-06, "loss": 0.0343, "step": 105815 }, { "epoch": 0.44153850005424305, "grad_norm": 0.6769078086984267, "learning_rate": 3.0101087496777353e-06, "loss": 0.022, "step": 105820 }, { "epoch": 0.44155936276923335, "grad_norm": 0.5375089748917313, "learning_rate": 3.0100376376354268e-06, "loss": 0.0285, "step": 105825 }, { "epoch": 0.4415802254842236, "grad_norm": 0.6587822302286411, "learning_rate": 3.0099665306328195e-06, "loss": 0.0246, "step": 105830 }, { "epoch": 0.4416010881992139, "grad_norm": 0.7630978187798163, "learning_rate": 3.009895428669319e-06, "loss": 0.0264, "step": 105835 }, { "epoch": 0.4416219509142042, "grad_norm": 1.1995308506574245, "learning_rate": 3.0098243317443304e-06, "loss": 0.0257, "step": 105840 }, { "epoch": 0.44164281362919444, "grad_norm": 1.0801867020231997, "learning_rate": 3.009753239857258e-06, "loss": 0.0272, "step": 105845 }, { "epoch": 0.44166367634418474, "grad_norm": 0.5949696613185469, "learning_rate": 3.009682153007508e-06, "loss": 0.0245, "step": 105850 }, { "epoch": 0.441684539059175, "grad_norm": 1.1036855619890795, "learning_rate": 3.009611071194484e-06, "loss": 0.0198, "step": 105855 }, { "epoch": 0.4417054017741653, "grad_norm": 1.0563896699389506, "learning_rate": 3.0095399944175917e-06, "loss": 0.0291, "step": 105860 }, { "epoch": 0.4417262644891556, "grad_norm": 0.6877419490432969, "learning_rate": 3.009468922676237e-06, "loss": 0.0184, "step": 105865 }, { "epoch": 0.44174712720414583, "grad_norm": 0.5582815972472807, "learning_rate": 3.009397855969825e-06, "loss": 0.0235, "step": 105870 }, { "epoch": 0.44176798991913613, "grad_norm": 0.5658710395374978, "learning_rate": 3.0093267942977614e-06, "loss": 0.0237, "step": 105875 }, { "epoch": 0.44178885263412637, "grad_norm": 0.5045200522072487, "learning_rate": 3.0092557376594524e-06, "loss": 0.0181, "step": 105880 }, { "epoch": 0.44180971534911667, "grad_norm": 0.38968311157086577, "learning_rate": 3.0091846860543016e-06, "loss": 0.0185, "step": 105885 }, { "epoch": 0.44183057806410697, "grad_norm": 0.4586985198674458, "learning_rate": 3.009113639481717e-06, "loss": 0.0196, "step": 105890 }, { "epoch": 0.4418514407790972, "grad_norm": 0.4477816722138594, "learning_rate": 3.009042597941104e-06, "loss": 0.0152, "step": 105895 }, { "epoch": 0.4418723034940875, "grad_norm": 0.7536918235262223, "learning_rate": 3.0089715614318686e-06, "loss": 0.0219, "step": 105900 }, { "epoch": 0.44189316620907776, "grad_norm": 0.5438569137227423, "learning_rate": 3.0089005299534167e-06, "loss": 0.0209, "step": 105905 }, { "epoch": 0.44191402892406806, "grad_norm": 0.8410996579487988, "learning_rate": 3.0088295035051546e-06, "loss": 0.0238, "step": 105910 }, { "epoch": 0.44193489163905836, "grad_norm": 0.4996547482511022, "learning_rate": 3.0087584820864883e-06, "loss": 0.0206, "step": 105915 }, { "epoch": 0.4419557543540486, "grad_norm": 0.4267716398143113, "learning_rate": 3.008687465696826e-06, "loss": 0.0218, "step": 105920 }, { "epoch": 0.4419766170690389, "grad_norm": 0.9577040016724561, "learning_rate": 3.0086164543355712e-06, "loss": 0.0211, "step": 105925 }, { "epoch": 0.4419974797840292, "grad_norm": 0.7139787232950903, "learning_rate": 3.0085454480021326e-06, "loss": 0.0282, "step": 105930 }, { "epoch": 0.44201834249901945, "grad_norm": 1.4755439180076246, "learning_rate": 3.0084744466959167e-06, "loss": 0.0285, "step": 105935 }, { "epoch": 0.44203920521400974, "grad_norm": 0.8478632823245787, "learning_rate": 3.00840345041633e-06, "loss": 0.0242, "step": 105940 }, { "epoch": 0.442060067929, "grad_norm": 0.41118344311904254, "learning_rate": 3.0083324591627796e-06, "loss": 0.0244, "step": 105945 }, { "epoch": 0.4420809306439903, "grad_norm": 0.8894509995998566, "learning_rate": 3.008261472934672e-06, "loss": 0.0253, "step": 105950 }, { "epoch": 0.4421017933589806, "grad_norm": 0.6324543496935613, "learning_rate": 3.0081904917314153e-06, "loss": 0.0259, "step": 105955 }, { "epoch": 0.44212265607397083, "grad_norm": 0.5504436784195208, "learning_rate": 3.0081195155524156e-06, "loss": 0.0278, "step": 105960 }, { "epoch": 0.44214351878896113, "grad_norm": 0.4809826513884886, "learning_rate": 3.0080485443970807e-06, "loss": 0.0235, "step": 105965 }, { "epoch": 0.4421643815039514, "grad_norm": 0.8145318245258353, "learning_rate": 3.007977578264819e-06, "loss": 0.0271, "step": 105970 }, { "epoch": 0.4421852442189417, "grad_norm": 0.4389114002499995, "learning_rate": 3.0079066171550365e-06, "loss": 0.0193, "step": 105975 }, { "epoch": 0.442206106933932, "grad_norm": 0.6718893754540687, "learning_rate": 3.0078356610671412e-06, "loss": 0.0309, "step": 105980 }, { "epoch": 0.4422269696489222, "grad_norm": 0.816661377430947, "learning_rate": 3.0077647100005407e-06, "loss": 0.0279, "step": 105985 }, { "epoch": 0.4422478323639125, "grad_norm": 0.5786827017810283, "learning_rate": 3.0076937639546438e-06, "loss": 0.0285, "step": 105990 }, { "epoch": 0.44226869507890276, "grad_norm": 0.8582008943866574, "learning_rate": 3.0076228229288574e-06, "loss": 0.0258, "step": 105995 }, { "epoch": 0.44228955779389306, "grad_norm": 0.6027756645101433, "learning_rate": 3.0075518869225895e-06, "loss": 0.0253, "step": 106000 }, { "epoch": 0.44231042050888336, "grad_norm": 0.520685441735181, "learning_rate": 3.0074809559352485e-06, "loss": 0.0237, "step": 106005 }, { "epoch": 0.4423312832238736, "grad_norm": 0.44761742400265886, "learning_rate": 3.0074100299662423e-06, "loss": 0.0331, "step": 106010 }, { "epoch": 0.4423521459388639, "grad_norm": 0.9223170469317306, "learning_rate": 3.00733910901498e-06, "loss": 0.0217, "step": 106015 }, { "epoch": 0.4423730086538542, "grad_norm": 0.593252597621995, "learning_rate": 3.0072681930808693e-06, "loss": 0.0202, "step": 106020 }, { "epoch": 0.44239387136884445, "grad_norm": 0.8865642088863145, "learning_rate": 3.0071972821633184e-06, "loss": 0.0244, "step": 106025 }, { "epoch": 0.44241473408383475, "grad_norm": 0.6485178916811081, "learning_rate": 3.0071263762617365e-06, "loss": 0.0297, "step": 106030 }, { "epoch": 0.442435596798825, "grad_norm": 0.857032806875248, "learning_rate": 3.007055475375532e-06, "loss": 0.0295, "step": 106035 }, { "epoch": 0.4424564595138153, "grad_norm": 1.5068419661455354, "learning_rate": 3.006984579504114e-06, "loss": 0.0345, "step": 106040 }, { "epoch": 0.4424773222288056, "grad_norm": 0.5570185220781556, "learning_rate": 3.006913688646891e-06, "loss": 0.0261, "step": 106045 }, { "epoch": 0.44249818494379584, "grad_norm": 0.8524720513999573, "learning_rate": 3.006842802803272e-06, "loss": 0.0341, "step": 106050 }, { "epoch": 0.44251904765878614, "grad_norm": 0.8040756691958652, "learning_rate": 3.0067719219726656e-06, "loss": 0.0402, "step": 106055 }, { "epoch": 0.4425399103737764, "grad_norm": 0.6150575392189016, "learning_rate": 3.0067010461544816e-06, "loss": 0.0228, "step": 106060 }, { "epoch": 0.4425607730887667, "grad_norm": 0.3332494965713342, "learning_rate": 3.0066301753481297e-06, "loss": 0.0268, "step": 106065 }, { "epoch": 0.442581635803757, "grad_norm": 0.5591762264352217, "learning_rate": 3.0065593095530183e-06, "loss": 0.0231, "step": 106070 }, { "epoch": 0.4426024985187472, "grad_norm": 0.9531204011484528, "learning_rate": 3.0064884487685574e-06, "loss": 0.0205, "step": 106075 }, { "epoch": 0.4426233612337375, "grad_norm": 0.7041576826247252, "learning_rate": 3.0064175929941562e-06, "loss": 0.0356, "step": 106080 }, { "epoch": 0.44264422394872777, "grad_norm": 1.4008147523481558, "learning_rate": 3.0063467422292248e-06, "loss": 0.0268, "step": 106085 }, { "epoch": 0.44266508666371807, "grad_norm": 0.7601174906089523, "learning_rate": 3.006275896473173e-06, "loss": 0.0331, "step": 106090 }, { "epoch": 0.44268594937870837, "grad_norm": 0.5744511312225123, "learning_rate": 3.0062050557254104e-06, "loss": 0.0271, "step": 106095 }, { "epoch": 0.4427068120936986, "grad_norm": 0.6001699714962109, "learning_rate": 3.006134219985346e-06, "loss": 0.0278, "step": 106100 }, { "epoch": 0.4427276748086889, "grad_norm": 0.9753492454107731, "learning_rate": 3.0060633892523916e-06, "loss": 0.0255, "step": 106105 }, { "epoch": 0.4427485375236792, "grad_norm": 0.5299129239034795, "learning_rate": 3.005992563525956e-06, "loss": 0.029, "step": 106110 }, { "epoch": 0.44276940023866945, "grad_norm": 0.8232929355319929, "learning_rate": 3.0059217428054506e-06, "loss": 0.0243, "step": 106115 }, { "epoch": 0.44279026295365975, "grad_norm": 0.6260950939614727, "learning_rate": 3.0058509270902842e-06, "loss": 0.0278, "step": 106120 }, { "epoch": 0.44281112566865, "grad_norm": 0.7414814740149881, "learning_rate": 3.005780116379869e-06, "loss": 0.0272, "step": 106125 }, { "epoch": 0.4428319883836403, "grad_norm": 0.7109949411166235, "learning_rate": 3.0057093106736135e-06, "loss": 0.0247, "step": 106130 }, { "epoch": 0.4428528510986306, "grad_norm": 0.6644335041851349, "learning_rate": 3.00563850997093e-06, "loss": 0.0224, "step": 106135 }, { "epoch": 0.44287371381362084, "grad_norm": 0.48934011197407545, "learning_rate": 3.0055677142712285e-06, "loss": 0.0307, "step": 106140 }, { "epoch": 0.44289457652861114, "grad_norm": 0.6959543020618667, "learning_rate": 3.0054969235739205e-06, "loss": 0.0223, "step": 106145 }, { "epoch": 0.4429154392436014, "grad_norm": 0.6615166862167902, "learning_rate": 3.0054261378784167e-06, "loss": 0.0303, "step": 106150 }, { "epoch": 0.4429363019585917, "grad_norm": 0.5746087212854041, "learning_rate": 3.005355357184127e-06, "loss": 0.0323, "step": 106155 }, { "epoch": 0.442957164673582, "grad_norm": 0.4803942483586579, "learning_rate": 3.0052845814904635e-06, "loss": 0.024, "step": 106160 }, { "epoch": 0.4429780273885722, "grad_norm": 0.3817070552715898, "learning_rate": 3.005213810796837e-06, "loss": 0.0297, "step": 106165 }, { "epoch": 0.4429988901035625, "grad_norm": 0.42654408129468374, "learning_rate": 3.005143045102659e-06, "loss": 0.025, "step": 106170 }, { "epoch": 0.44301975281855277, "grad_norm": 0.6786682278820201, "learning_rate": 3.0050722844073416e-06, "loss": 0.0229, "step": 106175 }, { "epoch": 0.44304061553354307, "grad_norm": 1.2203741094955045, "learning_rate": 3.0050015287102952e-06, "loss": 0.0292, "step": 106180 }, { "epoch": 0.44306147824853337, "grad_norm": 0.5113331011962444, "learning_rate": 3.004930778010932e-06, "loss": 0.0262, "step": 106185 }, { "epoch": 0.4430823409635236, "grad_norm": 0.4475602614545162, "learning_rate": 3.0048600323086636e-06, "loss": 0.0199, "step": 106190 }, { "epoch": 0.4431032036785139, "grad_norm": 0.5422108290947983, "learning_rate": 3.0047892916029013e-06, "loss": 0.0217, "step": 106195 }, { "epoch": 0.4431240663935042, "grad_norm": 0.8341309001147733, "learning_rate": 3.004718555893058e-06, "loss": 0.0304, "step": 106200 }, { "epoch": 0.44314492910849446, "grad_norm": 0.8966348639965368, "learning_rate": 3.004647825178545e-06, "loss": 0.0245, "step": 106205 }, { "epoch": 0.44316579182348476, "grad_norm": 1.0757150879434223, "learning_rate": 3.0045770994587743e-06, "loss": 0.0272, "step": 106210 }, { "epoch": 0.443186654538475, "grad_norm": 1.0166953228705162, "learning_rate": 3.0045063787331572e-06, "loss": 0.0329, "step": 106215 }, { "epoch": 0.4432075172534653, "grad_norm": 0.8143419477698133, "learning_rate": 3.0044356630011086e-06, "loss": 0.0275, "step": 106220 }, { "epoch": 0.4432283799684556, "grad_norm": 0.3206789860628221, "learning_rate": 3.0043649522620384e-06, "loss": 0.024, "step": 106225 }, { "epoch": 0.44324924268344584, "grad_norm": 0.5845112211545006, "learning_rate": 3.0042942465153605e-06, "loss": 0.021, "step": 106230 }, { "epoch": 0.44327010539843614, "grad_norm": 25.166734514182085, "learning_rate": 3.0042235457604862e-06, "loss": 0.0326, "step": 106235 }, { "epoch": 0.4432909681134264, "grad_norm": 0.6878504517216504, "learning_rate": 3.0041528499968294e-06, "loss": 0.0235, "step": 106240 }, { "epoch": 0.4433118308284167, "grad_norm": 0.6850528502302093, "learning_rate": 3.004082159223802e-06, "loss": 0.0257, "step": 106245 }, { "epoch": 0.443332693543407, "grad_norm": 0.577145381654302, "learning_rate": 3.0040114734408175e-06, "loss": 0.0192, "step": 106250 }, { "epoch": 0.44335355625839723, "grad_norm": 0.750250620158264, "learning_rate": 3.0039407926472885e-06, "loss": 0.0268, "step": 106255 }, { "epoch": 0.44337441897338753, "grad_norm": 1.0193554920428107, "learning_rate": 3.003870116842628e-06, "loss": 0.024, "step": 106260 }, { "epoch": 0.4433952816883778, "grad_norm": 0.5873276635028971, "learning_rate": 3.00379944602625e-06, "loss": 0.025, "step": 106265 }, { "epoch": 0.4434161444033681, "grad_norm": 0.6797919927927883, "learning_rate": 3.003728780197566e-06, "loss": 0.0163, "step": 106270 }, { "epoch": 0.4434370071183584, "grad_norm": 0.7301239677645893, "learning_rate": 3.0036581193559905e-06, "loss": 0.0243, "step": 106275 }, { "epoch": 0.4434578698333486, "grad_norm": 0.7430348465217284, "learning_rate": 3.0035874635009364e-06, "loss": 0.0372, "step": 106280 }, { "epoch": 0.4434787325483389, "grad_norm": 0.5452819088400709, "learning_rate": 3.003516812631818e-06, "loss": 0.0235, "step": 106285 }, { "epoch": 0.4434995952633292, "grad_norm": 0.5406604040030453, "learning_rate": 3.003446166748049e-06, "loss": 0.0255, "step": 106290 }, { "epoch": 0.44352045797831946, "grad_norm": 0.423660954985973, "learning_rate": 3.003375525849042e-06, "loss": 0.0239, "step": 106295 }, { "epoch": 0.44354132069330976, "grad_norm": 1.0854439522439951, "learning_rate": 3.003304889934212e-06, "loss": 0.0271, "step": 106300 }, { "epoch": 0.4435621834083, "grad_norm": 0.8926672602527471, "learning_rate": 3.0032342590029716e-06, "loss": 0.0233, "step": 106305 }, { "epoch": 0.4435830461232903, "grad_norm": 0.9333384218445285, "learning_rate": 3.0031636330547357e-06, "loss": 0.0304, "step": 106310 }, { "epoch": 0.4436039088382806, "grad_norm": 0.8053165628776414, "learning_rate": 3.003093012088919e-06, "loss": 0.0253, "step": 106315 }, { "epoch": 0.44362477155327085, "grad_norm": 0.6334579857864467, "learning_rate": 3.0030223961049345e-06, "loss": 0.0242, "step": 106320 }, { "epoch": 0.44364563426826115, "grad_norm": 0.8780720564271203, "learning_rate": 3.0029517851021973e-06, "loss": 0.0385, "step": 106325 }, { "epoch": 0.4436664969832514, "grad_norm": 0.7777480572566992, "learning_rate": 3.002881179080121e-06, "loss": 0.0303, "step": 106330 }, { "epoch": 0.4436873596982417, "grad_norm": 0.688107883045124, "learning_rate": 3.0028105780381206e-06, "loss": 0.024, "step": 106335 }, { "epoch": 0.443708222413232, "grad_norm": 0.6952193339972069, "learning_rate": 3.0027399819756114e-06, "loss": 0.0229, "step": 106340 }, { "epoch": 0.44372908512822223, "grad_norm": 0.6346530338486364, "learning_rate": 3.0026693908920068e-06, "loss": 0.0223, "step": 106345 }, { "epoch": 0.44374994784321253, "grad_norm": 0.462916545828669, "learning_rate": 3.0025988047867227e-06, "loss": 0.0383, "step": 106350 }, { "epoch": 0.4437708105582028, "grad_norm": 0.7374067816845041, "learning_rate": 3.002528223659173e-06, "loss": 0.0293, "step": 106355 }, { "epoch": 0.4437916732731931, "grad_norm": 0.9983197796915074, "learning_rate": 3.002457647508773e-06, "loss": 0.0335, "step": 106360 }, { "epoch": 0.4438125359881834, "grad_norm": 0.20925078538014602, "learning_rate": 3.002387076334938e-06, "loss": 0.022, "step": 106365 }, { "epoch": 0.4438333987031736, "grad_norm": 0.3204856958292349, "learning_rate": 3.0023165101370834e-06, "loss": 0.0187, "step": 106370 }, { "epoch": 0.4438542614181639, "grad_norm": 1.2362844214070197, "learning_rate": 3.0022459489146237e-06, "loss": 0.0303, "step": 106375 }, { "epoch": 0.4438751241331542, "grad_norm": 0.5980073410687152, "learning_rate": 3.002175392666975e-06, "loss": 0.0172, "step": 106380 }, { "epoch": 0.44389598684814446, "grad_norm": 1.1903012411763096, "learning_rate": 3.0021048413935524e-06, "loss": 0.027, "step": 106385 }, { "epoch": 0.44391684956313476, "grad_norm": 0.5776334904037224, "learning_rate": 3.0020342950937714e-06, "loss": 0.0278, "step": 106390 }, { "epoch": 0.443937712278125, "grad_norm": 0.5756888972690738, "learning_rate": 3.0019637537670476e-06, "loss": 0.0203, "step": 106395 }, { "epoch": 0.4439585749931153, "grad_norm": 0.547864556548326, "learning_rate": 3.0018932174127976e-06, "loss": 0.0271, "step": 106400 }, { "epoch": 0.4439794377081056, "grad_norm": 0.24413465951083113, "learning_rate": 3.0018226860304357e-06, "loss": 0.0308, "step": 106405 }, { "epoch": 0.44400030042309585, "grad_norm": 0.4617490341683291, "learning_rate": 3.0017521596193793e-06, "loss": 0.0223, "step": 106410 }, { "epoch": 0.44402116313808615, "grad_norm": 0.4333523725744964, "learning_rate": 3.001681638179044e-06, "loss": 0.0226, "step": 106415 }, { "epoch": 0.4440420258530764, "grad_norm": 0.27635089342113817, "learning_rate": 3.0016111217088444e-06, "loss": 0.0178, "step": 106420 }, { "epoch": 0.4440628885680667, "grad_norm": 0.5066508521801716, "learning_rate": 3.0015406102081995e-06, "loss": 0.019, "step": 106425 }, { "epoch": 0.444083751283057, "grad_norm": 0.8570587437585934, "learning_rate": 3.0014701036765236e-06, "loss": 0.0273, "step": 106430 }, { "epoch": 0.44410461399804724, "grad_norm": 1.142639817526417, "learning_rate": 3.001399602113234e-06, "loss": 0.0304, "step": 106435 }, { "epoch": 0.44412547671303754, "grad_norm": 0.46222531880360246, "learning_rate": 3.0013291055177463e-06, "loss": 0.0205, "step": 106440 }, { "epoch": 0.4441463394280278, "grad_norm": 0.8973730092988174, "learning_rate": 3.0012586138894783e-06, "loss": 0.0288, "step": 106445 }, { "epoch": 0.4441672021430181, "grad_norm": 0.8865732443731643, "learning_rate": 3.0011881272278463e-06, "loss": 0.026, "step": 106450 }, { "epoch": 0.4441880648580084, "grad_norm": 0.6723838339529106, "learning_rate": 3.0011176455322665e-06, "loss": 0.0225, "step": 106455 }, { "epoch": 0.4442089275729986, "grad_norm": 1.7745319693289887, "learning_rate": 3.001047168802157e-06, "loss": 0.0264, "step": 106460 }, { "epoch": 0.4442297902879889, "grad_norm": 0.9186742062238463, "learning_rate": 3.000976697036934e-06, "loss": 0.0238, "step": 106465 }, { "epoch": 0.4442506530029792, "grad_norm": 0.39460974540606486, "learning_rate": 3.0009062302360137e-06, "loss": 0.0244, "step": 106470 }, { "epoch": 0.44427151571796947, "grad_norm": 0.8334471904768314, "learning_rate": 3.0008357683988147e-06, "loss": 0.0292, "step": 106475 }, { "epoch": 0.44429237843295977, "grad_norm": 0.5728893245891393, "learning_rate": 3.000765311524754e-06, "loss": 0.0212, "step": 106480 }, { "epoch": 0.44431324114795, "grad_norm": 0.6612885469979123, "learning_rate": 3.0006948596132485e-06, "loss": 0.026, "step": 106485 }, { "epoch": 0.4443341038629403, "grad_norm": 0.6261493691700809, "learning_rate": 3.000624412663716e-06, "loss": 0.0258, "step": 106490 }, { "epoch": 0.4443549665779306, "grad_norm": 1.1298761345600479, "learning_rate": 3.0005539706755742e-06, "loss": 0.0359, "step": 106495 }, { "epoch": 0.44437582929292085, "grad_norm": 0.8975276926149739, "learning_rate": 3.000483533648241e-06, "loss": 0.0281, "step": 106500 }, { "epoch": 0.44439669200791115, "grad_norm": 1.0057437724507956, "learning_rate": 3.0004131015811333e-06, "loss": 0.0345, "step": 106505 }, { "epoch": 0.4444175547229014, "grad_norm": 0.5294599289914534, "learning_rate": 3.000342674473669e-06, "loss": 0.0277, "step": 106510 }, { "epoch": 0.4444384174378917, "grad_norm": 0.6096059767188647, "learning_rate": 3.000272252325267e-06, "loss": 0.0213, "step": 106515 }, { "epoch": 0.444459280152882, "grad_norm": 0.6128651379333694, "learning_rate": 3.000201835135345e-06, "loss": 0.0198, "step": 106520 }, { "epoch": 0.44448014286787224, "grad_norm": 1.636037788453525, "learning_rate": 3.00013142290332e-06, "loss": 0.0263, "step": 106525 }, { "epoch": 0.44450100558286254, "grad_norm": 1.1051768095420336, "learning_rate": 3.000061015628612e-06, "loss": 0.0228, "step": 106530 }, { "epoch": 0.4445218682978528, "grad_norm": 0.7191797460801134, "learning_rate": 2.999990613310639e-06, "loss": 0.0254, "step": 106535 }, { "epoch": 0.4445427310128431, "grad_norm": 0.6936788939717835, "learning_rate": 2.999920215948818e-06, "loss": 0.0163, "step": 106540 }, { "epoch": 0.4445635937278334, "grad_norm": 0.9938870582366418, "learning_rate": 2.9998498235425687e-06, "loss": 0.0735, "step": 106545 }, { "epoch": 0.44458445644282363, "grad_norm": 0.5477580310064225, "learning_rate": 2.9997794360913097e-06, "loss": 0.0161, "step": 106550 }, { "epoch": 0.4446053191578139, "grad_norm": 0.5082724267421698, "learning_rate": 2.9997090535944595e-06, "loss": 0.0233, "step": 106555 }, { "epoch": 0.4446261818728042, "grad_norm": 0.4904266582984461, "learning_rate": 2.9996386760514367e-06, "loss": 0.0171, "step": 106560 }, { "epoch": 0.44464704458779447, "grad_norm": 0.5178055952345937, "learning_rate": 2.999568303461661e-06, "loss": 0.0281, "step": 106565 }, { "epoch": 0.44466790730278477, "grad_norm": 0.9678663351527637, "learning_rate": 2.9994979358245506e-06, "loss": 0.0279, "step": 106570 }, { "epoch": 0.444688770017775, "grad_norm": 0.6512884638148803, "learning_rate": 2.9994275731395245e-06, "loss": 0.0189, "step": 106575 }, { "epoch": 0.4447096327327653, "grad_norm": 0.7529175515006531, "learning_rate": 2.9993572154060023e-06, "loss": 0.0294, "step": 106580 }, { "epoch": 0.4447304954477556, "grad_norm": 0.32752966121485366, "learning_rate": 2.9992868626234033e-06, "loss": 0.0167, "step": 106585 }, { "epoch": 0.44475135816274586, "grad_norm": 0.5250757078572247, "learning_rate": 2.9992165147911466e-06, "loss": 0.0258, "step": 106590 }, { "epoch": 0.44477222087773616, "grad_norm": 1.1592016341359077, "learning_rate": 2.999146171908653e-06, "loss": 0.0318, "step": 106595 }, { "epoch": 0.4447930835927264, "grad_norm": 0.9109762490394777, "learning_rate": 2.9990758339753395e-06, "loss": 0.0227, "step": 106600 }, { "epoch": 0.4448139463077167, "grad_norm": 0.7963531745405413, "learning_rate": 2.9990055009906284e-06, "loss": 0.0285, "step": 106605 }, { "epoch": 0.444834809022707, "grad_norm": 1.2491551700422983, "learning_rate": 2.9989351729539373e-06, "loss": 0.0316, "step": 106610 }, { "epoch": 0.44485567173769724, "grad_norm": 1.1655501900158374, "learning_rate": 2.9988648498646877e-06, "loss": 0.0153, "step": 106615 }, { "epoch": 0.44487653445268754, "grad_norm": 0.6234233928592866, "learning_rate": 2.998794531722299e-06, "loss": 0.0205, "step": 106620 }, { "epoch": 0.4448973971676778, "grad_norm": 0.6776038824501115, "learning_rate": 2.9987242185261906e-06, "loss": 0.0232, "step": 106625 }, { "epoch": 0.4449182598826681, "grad_norm": 0.8407592143195479, "learning_rate": 2.9986539102757834e-06, "loss": 0.0253, "step": 106630 }, { "epoch": 0.4449391225976584, "grad_norm": 0.6847069090776863, "learning_rate": 2.9985836069704977e-06, "loss": 0.0276, "step": 106635 }, { "epoch": 0.44495998531264863, "grad_norm": 0.7021085261626651, "learning_rate": 2.9985133086097534e-06, "loss": 0.0226, "step": 106640 }, { "epoch": 0.44498084802763893, "grad_norm": 1.1793290698971621, "learning_rate": 2.9984430151929713e-06, "loss": 0.03, "step": 106645 }, { "epoch": 0.4450017107426292, "grad_norm": 0.7688437483486059, "learning_rate": 2.9983727267195713e-06, "loss": 0.0163, "step": 106650 }, { "epoch": 0.4450225734576195, "grad_norm": 1.090023838207999, "learning_rate": 2.998302443188975e-06, "loss": 0.0235, "step": 106655 }, { "epoch": 0.4450434361726098, "grad_norm": 0.7539905838646193, "learning_rate": 2.9982321646006024e-06, "loss": 0.0182, "step": 106660 }, { "epoch": 0.4450642988876, "grad_norm": 1.223104357514779, "learning_rate": 2.9981618909538747e-06, "loss": 0.0263, "step": 106665 }, { "epoch": 0.4450851616025903, "grad_norm": 0.9326073574822514, "learning_rate": 2.9980916222482116e-06, "loss": 0.045, "step": 106670 }, { "epoch": 0.4451060243175806, "grad_norm": 0.35880231233659443, "learning_rate": 2.998021358483036e-06, "loss": 0.0236, "step": 106675 }, { "epoch": 0.44512688703257086, "grad_norm": 0.6973499997408255, "learning_rate": 2.9979510996577678e-06, "loss": 0.0275, "step": 106680 }, { "epoch": 0.44514774974756116, "grad_norm": 1.751814667170893, "learning_rate": 2.997880845771829e-06, "loss": 0.0233, "step": 106685 }, { "epoch": 0.4451686124625514, "grad_norm": 1.2411807255711034, "learning_rate": 2.99781059682464e-06, "loss": 0.0336, "step": 106690 }, { "epoch": 0.4451894751775417, "grad_norm": 1.3689182506876019, "learning_rate": 2.9977403528156224e-06, "loss": 0.0325, "step": 106695 }, { "epoch": 0.445210337892532, "grad_norm": 1.5132557264629831, "learning_rate": 2.997670113744198e-06, "loss": 0.032, "step": 106700 }, { "epoch": 0.44523120060752225, "grad_norm": 0.9590321461257914, "learning_rate": 2.9975998796097883e-06, "loss": 0.0306, "step": 106705 }, { "epoch": 0.44525206332251255, "grad_norm": 0.4953163932447717, "learning_rate": 2.997529650411815e-06, "loss": 0.0249, "step": 106710 }, { "epoch": 0.4452729260375028, "grad_norm": 0.9492577256238994, "learning_rate": 2.9974594261496996e-06, "loss": 0.0242, "step": 106715 }, { "epoch": 0.4452937887524931, "grad_norm": 1.4603632533047834, "learning_rate": 2.9973892068228645e-06, "loss": 0.0337, "step": 106720 }, { "epoch": 0.4453146514674834, "grad_norm": 0.6471395413110779, "learning_rate": 2.9973189924307306e-06, "loss": 0.0231, "step": 106725 }, { "epoch": 0.44533551418247364, "grad_norm": 1.003045861903839, "learning_rate": 2.9972487829727204e-06, "loss": 0.0318, "step": 106730 }, { "epoch": 0.44535637689746393, "grad_norm": 0.7901082797849768, "learning_rate": 2.997178578448257e-06, "loss": 0.0328, "step": 106735 }, { "epoch": 0.4453772396124542, "grad_norm": 0.5015948217865936, "learning_rate": 2.9971083788567613e-06, "loss": 0.024, "step": 106740 }, { "epoch": 0.4453981023274445, "grad_norm": 1.3620004180640544, "learning_rate": 2.9970381841976563e-06, "loss": 0.0286, "step": 106745 }, { "epoch": 0.4454189650424348, "grad_norm": 0.783209875718503, "learning_rate": 2.996967994470365e-06, "loss": 0.0205, "step": 106750 }, { "epoch": 0.445439827757425, "grad_norm": 1.38028717762969, "learning_rate": 2.9968978096743077e-06, "loss": 0.0239, "step": 106755 }, { "epoch": 0.4454606904724153, "grad_norm": 0.5035920558639301, "learning_rate": 2.99682762980891e-06, "loss": 0.0239, "step": 106760 }, { "epoch": 0.4454815531874056, "grad_norm": 1.4768731159714086, "learning_rate": 2.996757454873593e-06, "loss": 0.0307, "step": 106765 }, { "epoch": 0.44550241590239587, "grad_norm": 0.6579277333415728, "learning_rate": 2.996687284867779e-06, "loss": 0.0278, "step": 106770 }, { "epoch": 0.44552327861738616, "grad_norm": 0.4920824686114617, "learning_rate": 2.9966171197908922e-06, "loss": 0.0242, "step": 106775 }, { "epoch": 0.4455441413323764, "grad_norm": 1.6196644865715368, "learning_rate": 2.996546959642354e-06, "loss": 0.0195, "step": 106780 }, { "epoch": 0.4455650040473667, "grad_norm": 0.6668704813747651, "learning_rate": 2.9964768044215896e-06, "loss": 0.0277, "step": 106785 }, { "epoch": 0.445585866762357, "grad_norm": 1.009789877627278, "learning_rate": 2.9964066541280206e-06, "loss": 0.0136, "step": 106790 }, { "epoch": 0.44560672947734725, "grad_norm": 0.5131589778290223, "learning_rate": 2.996336508761071e-06, "loss": 0.0198, "step": 106795 }, { "epoch": 0.44562759219233755, "grad_norm": 0.9317921988987984, "learning_rate": 2.9962663683201628e-06, "loss": 0.032, "step": 106800 }, { "epoch": 0.4456484549073278, "grad_norm": 1.042413048033496, "learning_rate": 2.996196232804721e-06, "loss": 0.0301, "step": 106805 }, { "epoch": 0.4456693176223181, "grad_norm": 0.6067541132460804, "learning_rate": 2.9961261022141698e-06, "loss": 0.0211, "step": 106810 }, { "epoch": 0.4456901803373084, "grad_norm": 1.1402237353774105, "learning_rate": 2.9960559765479303e-06, "loss": 0.0194, "step": 106815 }, { "epoch": 0.44571104305229864, "grad_norm": 1.37336936563328, "learning_rate": 2.995985855805429e-06, "loss": 0.0155, "step": 106820 }, { "epoch": 0.44573190576728894, "grad_norm": 0.5224422330740498, "learning_rate": 2.9959157399860874e-06, "loss": 0.025, "step": 106825 }, { "epoch": 0.4457527684822792, "grad_norm": 0.6122172658874702, "learning_rate": 2.9958456290893306e-06, "loss": 0.028, "step": 106830 }, { "epoch": 0.4457736311972695, "grad_norm": 0.9658500683224809, "learning_rate": 2.995775523114583e-06, "loss": 0.0255, "step": 106835 }, { "epoch": 0.4457944939122598, "grad_norm": 0.5002781019782188, "learning_rate": 2.9957054220612675e-06, "loss": 0.0176, "step": 106840 }, { "epoch": 0.44581535662725, "grad_norm": 1.4352434652627355, "learning_rate": 2.9956353259288096e-06, "loss": 0.0269, "step": 106845 }, { "epoch": 0.4458362193422403, "grad_norm": 0.49567789204835283, "learning_rate": 2.995565234716633e-06, "loss": 0.0277, "step": 106850 }, { "epoch": 0.4458570820572306, "grad_norm": 0.46164853753, "learning_rate": 2.9954951484241616e-06, "loss": 0.0286, "step": 106855 }, { "epoch": 0.44587794477222087, "grad_norm": 0.8114995552663901, "learning_rate": 2.9954250670508213e-06, "loss": 0.025, "step": 106860 }, { "epoch": 0.44589880748721117, "grad_norm": 0.42024983616896483, "learning_rate": 2.995354990596036e-06, "loss": 0.0288, "step": 106865 }, { "epoch": 0.4459196702022014, "grad_norm": 0.6049279510668386, "learning_rate": 2.9952849190592296e-06, "loss": 0.0181, "step": 106870 }, { "epoch": 0.4459405329171917, "grad_norm": 0.8835943090357049, "learning_rate": 2.9952148524398276e-06, "loss": 0.0225, "step": 106875 }, { "epoch": 0.445961395632182, "grad_norm": 1.02224840206056, "learning_rate": 2.995144790737255e-06, "loss": 0.0199, "step": 106880 }, { "epoch": 0.44598225834717226, "grad_norm": 0.5344266071399464, "learning_rate": 2.995074733950937e-06, "loss": 0.0225, "step": 106885 }, { "epoch": 0.44600312106216256, "grad_norm": 1.038084398969969, "learning_rate": 2.995004682080298e-06, "loss": 0.0219, "step": 106890 }, { "epoch": 0.4460239837771528, "grad_norm": 0.8716961976125795, "learning_rate": 2.994934635124763e-06, "loss": 0.0271, "step": 106895 }, { "epoch": 0.4460448464921431, "grad_norm": 0.8343635634938666, "learning_rate": 2.994864593083758e-06, "loss": 0.02, "step": 106900 }, { "epoch": 0.4460657092071334, "grad_norm": 0.5537657060216891, "learning_rate": 2.9947945559567084e-06, "loss": 0.0212, "step": 106905 }, { "epoch": 0.44608657192212364, "grad_norm": 0.6639153563499925, "learning_rate": 2.994724523743039e-06, "loss": 0.0243, "step": 106910 }, { "epoch": 0.44610743463711394, "grad_norm": 0.7482534584342645, "learning_rate": 2.9946544964421758e-06, "loss": 0.0173, "step": 106915 }, { "epoch": 0.4461282973521042, "grad_norm": 0.5074270341995788, "learning_rate": 2.9945844740535434e-06, "loss": 0.0272, "step": 106920 }, { "epoch": 0.4461491600670945, "grad_norm": 0.8898176352626967, "learning_rate": 2.99451445657657e-06, "loss": 0.0326, "step": 106925 }, { "epoch": 0.4461700227820848, "grad_norm": 1.1110678506263534, "learning_rate": 2.9944444440106785e-06, "loss": 0.0242, "step": 106930 }, { "epoch": 0.44619088549707503, "grad_norm": 0.549950091035259, "learning_rate": 2.994374436355296e-06, "loss": 0.0189, "step": 106935 }, { "epoch": 0.44621174821206533, "grad_norm": 0.49340428632239675, "learning_rate": 2.99430443360985e-06, "loss": 0.0228, "step": 106940 }, { "epoch": 0.44623261092705563, "grad_norm": 0.4593821425037378, "learning_rate": 2.9942344357737635e-06, "loss": 0.0219, "step": 106945 }, { "epoch": 0.4462534736420459, "grad_norm": 0.9139691049060576, "learning_rate": 2.9941644428464663e-06, "loss": 0.0278, "step": 106950 }, { "epoch": 0.44627433635703617, "grad_norm": 0.6463305088815436, "learning_rate": 2.9940944548273816e-06, "loss": 0.0259, "step": 106955 }, { "epoch": 0.4462951990720264, "grad_norm": 0.7829885656399918, "learning_rate": 2.994024471715937e-06, "loss": 0.0236, "step": 106960 }, { "epoch": 0.4463160617870167, "grad_norm": 0.5279941552084124, "learning_rate": 2.9939544935115592e-06, "loss": 0.0206, "step": 106965 }, { "epoch": 0.446336924502007, "grad_norm": 0.8575552396066015, "learning_rate": 2.9938845202136747e-06, "loss": 0.0199, "step": 106970 }, { "epoch": 0.44635778721699726, "grad_norm": 0.6150476549828031, "learning_rate": 2.99381455182171e-06, "loss": 0.0251, "step": 106975 }, { "epoch": 0.44637864993198756, "grad_norm": 1.237623128162196, "learning_rate": 2.993744588335091e-06, "loss": 0.0399, "step": 106980 }, { "epoch": 0.4463995126469778, "grad_norm": 0.8060391025092699, "learning_rate": 2.9936746297532465e-06, "loss": 0.0294, "step": 106985 }, { "epoch": 0.4464203753619681, "grad_norm": 1.2285703294462584, "learning_rate": 2.993604676075602e-06, "loss": 0.0395, "step": 106990 }, { "epoch": 0.4464412380769584, "grad_norm": 0.8746754243104489, "learning_rate": 2.9935347273015845e-06, "loss": 0.0458, "step": 106995 }, { "epoch": 0.44646210079194865, "grad_norm": 0.47915233998307166, "learning_rate": 2.9934647834306223e-06, "loss": 0.028, "step": 107000 }, { "epoch": 0.44648296350693895, "grad_norm": 0.7598811866867844, "learning_rate": 2.993394844462142e-06, "loss": 0.0199, "step": 107005 }, { "epoch": 0.4465038262219292, "grad_norm": 0.35959175300262314, "learning_rate": 2.9933249103955705e-06, "loss": 0.0337, "step": 107010 }, { "epoch": 0.4465246889369195, "grad_norm": 0.6367277126028993, "learning_rate": 2.9932549812303352e-06, "loss": 0.0257, "step": 107015 }, { "epoch": 0.4465455516519098, "grad_norm": 0.6613910090137403, "learning_rate": 2.9931850569658645e-06, "loss": 0.0284, "step": 107020 }, { "epoch": 0.44656641436690003, "grad_norm": 0.6290002081298184, "learning_rate": 2.9931151376015848e-06, "loss": 0.0405, "step": 107025 }, { "epoch": 0.44658727708189033, "grad_norm": 0.7035369765774644, "learning_rate": 2.9930452231369244e-06, "loss": 0.0255, "step": 107030 }, { "epoch": 0.44660813979688063, "grad_norm": 0.44062763605512933, "learning_rate": 2.992975313571311e-06, "loss": 0.0284, "step": 107035 }, { "epoch": 0.4466290025118709, "grad_norm": 0.49093399714176994, "learning_rate": 2.9929054089041737e-06, "loss": 0.0209, "step": 107040 }, { "epoch": 0.4466498652268612, "grad_norm": 0.6774796386455402, "learning_rate": 2.9928355091349386e-06, "loss": 0.0241, "step": 107045 }, { "epoch": 0.4466707279418514, "grad_norm": 0.8113927867568828, "learning_rate": 2.9927656142630346e-06, "loss": 0.029, "step": 107050 }, { "epoch": 0.4466915906568417, "grad_norm": 0.4276498438737302, "learning_rate": 2.9926957242878895e-06, "loss": 0.0174, "step": 107055 }, { "epoch": 0.446712453371832, "grad_norm": 0.414683509469813, "learning_rate": 2.9926258392089323e-06, "loss": 0.0425, "step": 107060 }, { "epoch": 0.44673331608682226, "grad_norm": 0.9434388594187203, "learning_rate": 2.992555959025591e-06, "loss": 0.0277, "step": 107065 }, { "epoch": 0.44675417880181256, "grad_norm": 1.2209729614648603, "learning_rate": 2.9924860837372936e-06, "loss": 0.0272, "step": 107070 }, { "epoch": 0.4467750415168028, "grad_norm": 0.6677766643720953, "learning_rate": 2.99241621334347e-06, "loss": 0.0311, "step": 107075 }, { "epoch": 0.4467959042317931, "grad_norm": 0.7956670849155123, "learning_rate": 2.992346347843546e-06, "loss": 0.0209, "step": 107080 }, { "epoch": 0.4468167669467834, "grad_norm": 0.4381288157538662, "learning_rate": 2.9922764872369537e-06, "loss": 0.025, "step": 107085 }, { "epoch": 0.44683762966177365, "grad_norm": 1.1659676524725242, "learning_rate": 2.9922066315231203e-06, "loss": 0.0242, "step": 107090 }, { "epoch": 0.44685849237676395, "grad_norm": 0.7186496345588135, "learning_rate": 2.992136780701474e-06, "loss": 0.028, "step": 107095 }, { "epoch": 0.4468793550917542, "grad_norm": 0.9582005214482177, "learning_rate": 2.992066934771445e-06, "loss": 0.0325, "step": 107100 }, { "epoch": 0.4469002178067445, "grad_norm": 0.47686743148483496, "learning_rate": 2.9919970937324614e-06, "loss": 0.0265, "step": 107105 }, { "epoch": 0.4469210805217348, "grad_norm": 0.4644121412479877, "learning_rate": 2.9919272575839536e-06, "loss": 0.0264, "step": 107110 }, { "epoch": 0.44694194323672504, "grad_norm": 1.5274021521224481, "learning_rate": 2.99185742632535e-06, "loss": 0.0281, "step": 107115 }, { "epoch": 0.44696280595171534, "grad_norm": 0.8753899993835139, "learning_rate": 2.9917875999560803e-06, "loss": 0.028, "step": 107120 }, { "epoch": 0.44698366866670564, "grad_norm": 0.6210290808641437, "learning_rate": 2.9917177784755743e-06, "loss": 0.0282, "step": 107125 }, { "epoch": 0.4470045313816959, "grad_norm": 0.5336146444025283, "learning_rate": 2.9916479618832606e-06, "loss": 0.0217, "step": 107130 }, { "epoch": 0.4470253940966862, "grad_norm": 0.7240844206232804, "learning_rate": 2.9915781501785702e-06, "loss": 0.0267, "step": 107135 }, { "epoch": 0.4470462568116764, "grad_norm": 0.7568625691188182, "learning_rate": 2.991508343360931e-06, "loss": 0.0203, "step": 107140 }, { "epoch": 0.4470671195266667, "grad_norm": 0.8999711088661914, "learning_rate": 2.9914385414297746e-06, "loss": 0.0224, "step": 107145 }, { "epoch": 0.447087982241657, "grad_norm": 0.5332111385635071, "learning_rate": 2.99136874438453e-06, "loss": 0.026, "step": 107150 }, { "epoch": 0.44710884495664727, "grad_norm": 0.3972827665874269, "learning_rate": 2.9912989522246277e-06, "loss": 0.0254, "step": 107155 }, { "epoch": 0.44712970767163757, "grad_norm": 0.7237558292116754, "learning_rate": 2.991229164949498e-06, "loss": 0.0254, "step": 107160 }, { "epoch": 0.4471505703866278, "grad_norm": 0.8108833062772115, "learning_rate": 2.99115938255857e-06, "loss": 0.0258, "step": 107165 }, { "epoch": 0.4471714331016181, "grad_norm": 1.0838978950235316, "learning_rate": 2.991089605051275e-06, "loss": 0.0226, "step": 107170 }, { "epoch": 0.4471922958166084, "grad_norm": 1.2306389818029533, "learning_rate": 2.991019832427043e-06, "loss": 0.026, "step": 107175 }, { "epoch": 0.44721315853159865, "grad_norm": 0.47699293110312585, "learning_rate": 2.9909500646853047e-06, "loss": 0.0272, "step": 107180 }, { "epoch": 0.44723402124658895, "grad_norm": 0.7855577843433205, "learning_rate": 2.9908803018254912e-06, "loss": 0.0282, "step": 107185 }, { "epoch": 0.4472548839615792, "grad_norm": 1.2075532771535145, "learning_rate": 2.990810543847032e-06, "loss": 0.0361, "step": 107190 }, { "epoch": 0.4472757466765695, "grad_norm": 0.5344852015384893, "learning_rate": 2.9907407907493586e-06, "loss": 0.0241, "step": 107195 }, { "epoch": 0.4472966093915598, "grad_norm": 0.6739049785007827, "learning_rate": 2.9906710425319013e-06, "loss": 0.0312, "step": 107200 }, { "epoch": 0.44731747210655004, "grad_norm": 0.46366659379704134, "learning_rate": 2.990601299194092e-06, "loss": 0.0188, "step": 107205 }, { "epoch": 0.44733833482154034, "grad_norm": 1.262446261240556, "learning_rate": 2.9905315607353615e-06, "loss": 0.0223, "step": 107210 }, { "epoch": 0.44735919753653064, "grad_norm": 0.49199458912175137, "learning_rate": 2.99046182715514e-06, "loss": 0.0301, "step": 107215 }, { "epoch": 0.4473800602515209, "grad_norm": 0.7009550019928815, "learning_rate": 2.99039209845286e-06, "loss": 0.0386, "step": 107220 }, { "epoch": 0.4474009229665112, "grad_norm": 0.762254850639862, "learning_rate": 2.990322374627952e-06, "loss": 0.0269, "step": 107225 }, { "epoch": 0.4474217856815014, "grad_norm": 1.097061217977, "learning_rate": 2.990252655679848e-06, "loss": 0.0286, "step": 107230 }, { "epoch": 0.4474426483964917, "grad_norm": 0.9303141889143555, "learning_rate": 2.9901829416079787e-06, "loss": 0.0247, "step": 107235 }, { "epoch": 0.447463511111482, "grad_norm": 0.7203482379024395, "learning_rate": 2.9901132324117767e-06, "loss": 0.0212, "step": 107240 }, { "epoch": 0.44748437382647227, "grad_norm": 1.2438363288263163, "learning_rate": 2.9900435280906724e-06, "loss": 0.0258, "step": 107245 }, { "epoch": 0.44750523654146257, "grad_norm": 0.6606170313388083, "learning_rate": 2.989973828644099e-06, "loss": 0.0151, "step": 107250 }, { "epoch": 0.4475260992564528, "grad_norm": 0.5588926511478736, "learning_rate": 2.9899041340714886e-06, "loss": 0.0242, "step": 107255 }, { "epoch": 0.4475469619714431, "grad_norm": 0.8151464127146189, "learning_rate": 2.989834444372271e-06, "loss": 0.0238, "step": 107260 }, { "epoch": 0.4475678246864334, "grad_norm": 0.46026858036782176, "learning_rate": 2.9897647595458805e-06, "loss": 0.0213, "step": 107265 }, { "epoch": 0.44758868740142366, "grad_norm": 0.650214398046848, "learning_rate": 2.9896950795917478e-06, "loss": 0.0257, "step": 107270 }, { "epoch": 0.44760955011641396, "grad_norm": 0.4777800270017784, "learning_rate": 2.9896254045093065e-06, "loss": 0.0223, "step": 107275 }, { "epoch": 0.4476304128314042, "grad_norm": 1.5235262782325316, "learning_rate": 2.9895557342979873e-06, "loss": 0.0373, "step": 107280 }, { "epoch": 0.4476512755463945, "grad_norm": 0.7308858541418728, "learning_rate": 2.9894860689572245e-06, "loss": 0.0298, "step": 107285 }, { "epoch": 0.4476721382613848, "grad_norm": 0.7337843571177743, "learning_rate": 2.9894164084864493e-06, "loss": 0.021, "step": 107290 }, { "epoch": 0.44769300097637504, "grad_norm": 0.4665914988497087, "learning_rate": 2.9893467528850945e-06, "loss": 0.0267, "step": 107295 }, { "epoch": 0.44771386369136534, "grad_norm": 0.8414608669974383, "learning_rate": 2.989277102152594e-06, "loss": 0.0213, "step": 107300 }, { "epoch": 0.44773472640635564, "grad_norm": 1.017747603353988, "learning_rate": 2.9892074562883788e-06, "loss": 0.0335, "step": 107305 }, { "epoch": 0.4477555891213459, "grad_norm": 0.9232096374669253, "learning_rate": 2.9891378152918824e-06, "loss": 0.0247, "step": 107310 }, { "epoch": 0.4477764518363362, "grad_norm": 1.5487881696352956, "learning_rate": 2.989068179162539e-06, "loss": 0.0313, "step": 107315 }, { "epoch": 0.44779731455132643, "grad_norm": 0.8694810308176706, "learning_rate": 2.9889985478997802e-06, "loss": 0.0337, "step": 107320 }, { "epoch": 0.44781817726631673, "grad_norm": 0.721739761432303, "learning_rate": 2.98892892150304e-06, "loss": 0.0306, "step": 107325 }, { "epoch": 0.44783903998130703, "grad_norm": 0.6887949179588398, "learning_rate": 2.9888592999717506e-06, "loss": 0.0278, "step": 107330 }, { "epoch": 0.4478599026962973, "grad_norm": 0.711957802047602, "learning_rate": 2.9887896833053478e-06, "loss": 0.0235, "step": 107335 }, { "epoch": 0.4478807654112876, "grad_norm": 0.8534855148929067, "learning_rate": 2.988720071503262e-06, "loss": 0.0399, "step": 107340 }, { "epoch": 0.4479016281262778, "grad_norm": 0.27294836215891566, "learning_rate": 2.988650464564929e-06, "loss": 0.025, "step": 107345 }, { "epoch": 0.4479224908412681, "grad_norm": 0.46202398411751355, "learning_rate": 2.9885808624897814e-06, "loss": 0.0321, "step": 107350 }, { "epoch": 0.4479433535562584, "grad_norm": 0.7242502692637145, "learning_rate": 2.988511265277253e-06, "loss": 0.0224, "step": 107355 }, { "epoch": 0.44796421627124866, "grad_norm": 0.8361926414993911, "learning_rate": 2.9884416729267777e-06, "loss": 0.0252, "step": 107360 }, { "epoch": 0.44798507898623896, "grad_norm": 1.5907212530270773, "learning_rate": 2.98837208543779e-06, "loss": 0.0252, "step": 107365 }, { "epoch": 0.4480059417012292, "grad_norm": 0.8084097874656379, "learning_rate": 2.988302502809724e-06, "loss": 0.0296, "step": 107370 }, { "epoch": 0.4480268044162195, "grad_norm": 1.22280806300455, "learning_rate": 2.9882329250420123e-06, "loss": 0.0287, "step": 107375 }, { "epoch": 0.4480476671312098, "grad_norm": 0.8568265456814346, "learning_rate": 2.98816335213409e-06, "loss": 0.0261, "step": 107380 }, { "epoch": 0.44806852984620005, "grad_norm": 0.7291764320433083, "learning_rate": 2.9880937840853916e-06, "loss": 0.0293, "step": 107385 }, { "epoch": 0.44808939256119035, "grad_norm": 0.4697729909614666, "learning_rate": 2.9880242208953517e-06, "loss": 0.0197, "step": 107390 }, { "epoch": 0.44811025527618065, "grad_norm": 0.8179758499665328, "learning_rate": 2.987954662563405e-06, "loss": 0.022, "step": 107395 }, { "epoch": 0.4481311179911709, "grad_norm": 0.7019946623523724, "learning_rate": 2.9878851090889845e-06, "loss": 0.0378, "step": 107400 }, { "epoch": 0.4481519807061612, "grad_norm": 0.5866538738125231, "learning_rate": 2.987815560471526e-06, "loss": 0.0221, "step": 107405 }, { "epoch": 0.44817284342115143, "grad_norm": 0.8152704200540257, "learning_rate": 2.9877460167104644e-06, "loss": 0.0298, "step": 107410 }, { "epoch": 0.44819370613614173, "grad_norm": 0.47601479003919256, "learning_rate": 2.9876764778052335e-06, "loss": 0.027, "step": 107415 }, { "epoch": 0.44821456885113203, "grad_norm": 0.8215163736711596, "learning_rate": 2.9876069437552695e-06, "loss": 0.0212, "step": 107420 }, { "epoch": 0.4482354315661223, "grad_norm": 0.8955167704318377, "learning_rate": 2.9875374145600067e-06, "loss": 0.0229, "step": 107425 }, { "epoch": 0.4482562942811126, "grad_norm": 0.9012589570331804, "learning_rate": 2.987467890218881e-06, "loss": 0.0278, "step": 107430 }, { "epoch": 0.4482771569961028, "grad_norm": 1.20523558068455, "learning_rate": 2.9873983707313263e-06, "loss": 0.0286, "step": 107435 }, { "epoch": 0.4482980197110931, "grad_norm": 0.8067093856159168, "learning_rate": 2.9873288560967793e-06, "loss": 0.0294, "step": 107440 }, { "epoch": 0.4483188824260834, "grad_norm": 0.7488432925703046, "learning_rate": 2.9872593463146743e-06, "loss": 0.0299, "step": 107445 }, { "epoch": 0.44833974514107366, "grad_norm": 0.6879272131003201, "learning_rate": 2.987189841384447e-06, "loss": 0.0259, "step": 107450 }, { "epoch": 0.44836060785606396, "grad_norm": 1.1038365439323212, "learning_rate": 2.987120341305534e-06, "loss": 0.0286, "step": 107455 }, { "epoch": 0.4483814705710542, "grad_norm": 0.6689514378964276, "learning_rate": 2.9870508460773696e-06, "loss": 0.023, "step": 107460 }, { "epoch": 0.4484023332860445, "grad_norm": 0.28666625100527987, "learning_rate": 2.9869813556993898e-06, "loss": 0.0256, "step": 107465 }, { "epoch": 0.4484231960010348, "grad_norm": 0.5403628441469761, "learning_rate": 2.9869118701710314e-06, "loss": 0.0248, "step": 107470 }, { "epoch": 0.44844405871602505, "grad_norm": 0.8215747235988852, "learning_rate": 2.9868423894917296e-06, "loss": 0.0251, "step": 107475 }, { "epoch": 0.44846492143101535, "grad_norm": 0.4483602721111634, "learning_rate": 2.986772913660921e-06, "loss": 0.0236, "step": 107480 }, { "epoch": 0.44848578414600565, "grad_norm": 0.42843035049228595, "learning_rate": 2.98670344267804e-06, "loss": 0.0204, "step": 107485 }, { "epoch": 0.4485066468609959, "grad_norm": 1.0213381913686963, "learning_rate": 2.986633976542525e-06, "loss": 0.0212, "step": 107490 }, { "epoch": 0.4485275095759862, "grad_norm": 1.1507442326817092, "learning_rate": 2.9865645152538118e-06, "loss": 0.0268, "step": 107495 }, { "epoch": 0.44854837229097644, "grad_norm": 1.112709912544435, "learning_rate": 2.986495058811336e-06, "loss": 0.0303, "step": 107500 }, { "epoch": 0.44856923500596674, "grad_norm": 0.4496919199344129, "learning_rate": 2.986425607214535e-06, "loss": 0.0269, "step": 107505 }, { "epoch": 0.44859009772095704, "grad_norm": 1.0484780592574516, "learning_rate": 2.9863561604628443e-06, "loss": 0.0312, "step": 107510 }, { "epoch": 0.4486109604359473, "grad_norm": 0.5126048597299744, "learning_rate": 2.9862867185557015e-06, "loss": 0.0256, "step": 107515 }, { "epoch": 0.4486318231509376, "grad_norm": 0.6034917744369691, "learning_rate": 2.9862172814925435e-06, "loss": 0.0346, "step": 107520 }, { "epoch": 0.4486526858659278, "grad_norm": 0.8583564995681456, "learning_rate": 2.986147849272806e-06, "loss": 0.0236, "step": 107525 }, { "epoch": 0.4486735485809181, "grad_norm": 0.6387675777537347, "learning_rate": 2.986078421895927e-06, "loss": 0.0242, "step": 107530 }, { "epoch": 0.4486944112959084, "grad_norm": 0.6341636446158108, "learning_rate": 2.9860089993613434e-06, "loss": 0.0305, "step": 107535 }, { "epoch": 0.44871527401089867, "grad_norm": 1.272343282227979, "learning_rate": 2.9859395816684924e-06, "loss": 0.0284, "step": 107540 }, { "epoch": 0.44873613672588897, "grad_norm": 0.9890198105173753, "learning_rate": 2.9858701688168107e-06, "loss": 0.0304, "step": 107545 }, { "epoch": 0.4487569994408792, "grad_norm": 0.6196944990098449, "learning_rate": 2.985800760805736e-06, "loss": 0.0255, "step": 107550 }, { "epoch": 0.4487778621558695, "grad_norm": 1.009167713122235, "learning_rate": 2.9857313576347057e-06, "loss": 0.0274, "step": 107555 }, { "epoch": 0.4487987248708598, "grad_norm": 0.6161290891408654, "learning_rate": 2.985661959303157e-06, "loss": 0.0198, "step": 107560 }, { "epoch": 0.44881958758585005, "grad_norm": 0.5377186107753748, "learning_rate": 2.9855925658105282e-06, "loss": 0.0251, "step": 107565 }, { "epoch": 0.44884045030084035, "grad_norm": 0.4667058175019813, "learning_rate": 2.9855231771562566e-06, "loss": 0.022, "step": 107570 }, { "epoch": 0.44886131301583065, "grad_norm": 0.948044957454175, "learning_rate": 2.9854537933397797e-06, "loss": 0.0265, "step": 107575 }, { "epoch": 0.4488821757308209, "grad_norm": 0.44452944424466195, "learning_rate": 2.9853844143605364e-06, "loss": 0.0234, "step": 107580 }, { "epoch": 0.4489030384458112, "grad_norm": 0.6251596366901935, "learning_rate": 2.985315040217963e-06, "loss": 0.0264, "step": 107585 }, { "epoch": 0.44892390116080144, "grad_norm": 0.37284427101534523, "learning_rate": 2.985245670911499e-06, "loss": 0.0261, "step": 107590 }, { "epoch": 0.44894476387579174, "grad_norm": 0.8532087502871004, "learning_rate": 2.9851763064405813e-06, "loss": 0.0201, "step": 107595 }, { "epoch": 0.44896562659078204, "grad_norm": 0.270246313104661, "learning_rate": 2.9851069468046495e-06, "loss": 0.0201, "step": 107600 }, { "epoch": 0.4489864893057723, "grad_norm": 0.40819434527028675, "learning_rate": 2.985037592003141e-06, "loss": 0.0243, "step": 107605 }, { "epoch": 0.4490073520207626, "grad_norm": 1.0535763863024463, "learning_rate": 2.984968242035494e-06, "loss": 0.0284, "step": 107610 }, { "epoch": 0.44902821473575283, "grad_norm": 0.7842954525989194, "learning_rate": 2.9848988969011484e-06, "loss": 0.0225, "step": 107615 }, { "epoch": 0.44904907745074313, "grad_norm": 0.46312940550873055, "learning_rate": 2.984829556599541e-06, "loss": 0.024, "step": 107620 }, { "epoch": 0.44906994016573343, "grad_norm": 0.9452219703051729, "learning_rate": 2.9847602211301125e-06, "loss": 0.0279, "step": 107625 }, { "epoch": 0.44909080288072367, "grad_norm": 0.8172973497523776, "learning_rate": 2.9846908904922995e-06, "loss": 0.0175, "step": 107630 }, { "epoch": 0.44911166559571397, "grad_norm": 0.6311788896309821, "learning_rate": 2.9846215646855424e-06, "loss": 0.0247, "step": 107635 }, { "epoch": 0.4491325283107042, "grad_norm": 0.8113865893129072, "learning_rate": 2.98455224370928e-06, "loss": 0.0288, "step": 107640 }, { "epoch": 0.4491533910256945, "grad_norm": 0.5134647313590456, "learning_rate": 2.9844829275629504e-06, "loss": 0.0382, "step": 107645 }, { "epoch": 0.4491742537406848, "grad_norm": 0.990804559459535, "learning_rate": 2.984413616245994e-06, "loss": 0.0249, "step": 107650 }, { "epoch": 0.44919511645567506, "grad_norm": 0.6987126777550144, "learning_rate": 2.984344309757849e-06, "loss": 0.0233, "step": 107655 }, { "epoch": 0.44921597917066536, "grad_norm": 1.0135973773707814, "learning_rate": 2.984275008097956e-06, "loss": 0.0318, "step": 107660 }, { "epoch": 0.44923684188565566, "grad_norm": 1.1200709373996176, "learning_rate": 2.984205711265753e-06, "loss": 0.0251, "step": 107665 }, { "epoch": 0.4492577046006459, "grad_norm": 0.4872915288876352, "learning_rate": 2.9841364192606803e-06, "loss": 0.025, "step": 107670 }, { "epoch": 0.4492785673156362, "grad_norm": 0.7044085436042696, "learning_rate": 2.984067132082177e-06, "loss": 0.0222, "step": 107675 }, { "epoch": 0.44929943003062645, "grad_norm": 0.924289585872728, "learning_rate": 2.9839978497296836e-06, "loss": 0.0249, "step": 107680 }, { "epoch": 0.44932029274561675, "grad_norm": 0.470884182763659, "learning_rate": 2.9839285722026397e-06, "loss": 0.0199, "step": 107685 }, { "epoch": 0.44934115546060704, "grad_norm": 0.3141174053546426, "learning_rate": 2.983859299500484e-06, "loss": 0.0203, "step": 107690 }, { "epoch": 0.4493620181755973, "grad_norm": 0.5402084557808693, "learning_rate": 2.9837900316226582e-06, "loss": 0.0226, "step": 107695 }, { "epoch": 0.4493828808905876, "grad_norm": 1.7618554025535438, "learning_rate": 2.9837207685686016e-06, "loss": 0.0381, "step": 107700 }, { "epoch": 0.44940374360557783, "grad_norm": 0.6718313417439467, "learning_rate": 2.9836515103377535e-06, "loss": 0.025, "step": 107705 }, { "epoch": 0.44942460632056813, "grad_norm": 0.49224417311974605, "learning_rate": 2.9835822569295553e-06, "loss": 0.0173, "step": 107710 }, { "epoch": 0.44944546903555843, "grad_norm": 0.789967521704928, "learning_rate": 2.983513008343447e-06, "loss": 0.0212, "step": 107715 }, { "epoch": 0.4494663317505487, "grad_norm": 1.5864995318846309, "learning_rate": 2.9834437645788696e-06, "loss": 0.0253, "step": 107720 }, { "epoch": 0.449487194465539, "grad_norm": 1.2614638053313332, "learning_rate": 2.9833745256352623e-06, "loss": 0.0288, "step": 107725 }, { "epoch": 0.4495080571805292, "grad_norm": 0.7304968965905926, "learning_rate": 2.9833052915120663e-06, "loss": 0.0264, "step": 107730 }, { "epoch": 0.4495289198955195, "grad_norm": 0.6911589673644022, "learning_rate": 2.983236062208723e-06, "loss": 0.021, "step": 107735 }, { "epoch": 0.4495497826105098, "grad_norm": 0.504869828571562, "learning_rate": 2.983166837724672e-06, "loss": 0.0174, "step": 107740 }, { "epoch": 0.44957064532550006, "grad_norm": 0.9449709394549414, "learning_rate": 2.9830976180593553e-06, "loss": 0.0338, "step": 107745 }, { "epoch": 0.44959150804049036, "grad_norm": 0.809136332689301, "learning_rate": 2.983028403212213e-06, "loss": 0.0253, "step": 107750 }, { "epoch": 0.44961237075548066, "grad_norm": 0.69013607030984, "learning_rate": 2.9829591931826874e-06, "loss": 0.035, "step": 107755 }, { "epoch": 0.4496332334704709, "grad_norm": 0.614844001436602, "learning_rate": 2.982889987970218e-06, "loss": 0.0234, "step": 107760 }, { "epoch": 0.4496540961854612, "grad_norm": 1.0118753704872387, "learning_rate": 2.9828207875742466e-06, "loss": 0.0363, "step": 107765 }, { "epoch": 0.44967495890045145, "grad_norm": 1.085285167381305, "learning_rate": 2.982751591994216e-06, "loss": 0.0292, "step": 107770 }, { "epoch": 0.44969582161544175, "grad_norm": 0.9862194389006237, "learning_rate": 2.982682401229565e-06, "loss": 0.0294, "step": 107775 }, { "epoch": 0.44971668433043205, "grad_norm": 0.3988567364171961, "learning_rate": 2.9826132152797367e-06, "loss": 0.0262, "step": 107780 }, { "epoch": 0.4497375470454223, "grad_norm": 0.7356093548332261, "learning_rate": 2.982544034144173e-06, "loss": 0.0212, "step": 107785 }, { "epoch": 0.4497584097604126, "grad_norm": 0.9043680138924737, "learning_rate": 2.9824748578223144e-06, "loss": 0.0192, "step": 107790 }, { "epoch": 0.44977927247540284, "grad_norm": 0.6165779697671531, "learning_rate": 2.9824056863136043e-06, "loss": 0.025, "step": 107795 }, { "epoch": 0.44980013519039314, "grad_norm": 0.4478450305525549, "learning_rate": 2.982336519617483e-06, "loss": 0.0273, "step": 107800 }, { "epoch": 0.44982099790538344, "grad_norm": 0.6796423906181072, "learning_rate": 2.9822673577333936e-06, "loss": 0.0178, "step": 107805 }, { "epoch": 0.4498418606203737, "grad_norm": 1.8850465517273935, "learning_rate": 2.982198200660777e-06, "loss": 0.0285, "step": 107810 }, { "epoch": 0.449862723335364, "grad_norm": 1.1481080196809539, "learning_rate": 2.9821290483990767e-06, "loss": 0.0277, "step": 107815 }, { "epoch": 0.4498835860503542, "grad_norm": 0.6526989665987473, "learning_rate": 2.9820599009477346e-06, "loss": 0.0267, "step": 107820 }, { "epoch": 0.4499044487653445, "grad_norm": 0.5156283235086582, "learning_rate": 2.981990758306192e-06, "loss": 0.0257, "step": 107825 }, { "epoch": 0.4499253114803348, "grad_norm": 0.6393045021988102, "learning_rate": 2.981921620473892e-06, "loss": 0.0249, "step": 107830 }, { "epoch": 0.44994617419532507, "grad_norm": 1.0660651931744805, "learning_rate": 2.9818524874502767e-06, "loss": 0.0271, "step": 107835 }, { "epoch": 0.44996703691031537, "grad_norm": 1.18306429832113, "learning_rate": 2.9817833592347906e-06, "loss": 0.0375, "step": 107840 }, { "epoch": 0.44998789962530567, "grad_norm": 0.591122986144263, "learning_rate": 2.9817142358268742e-06, "loss": 0.021, "step": 107845 }, { "epoch": 0.4500087623402959, "grad_norm": 0.8417920800849863, "learning_rate": 2.9816451172259702e-06, "loss": 0.0233, "step": 107850 }, { "epoch": 0.4500296250552862, "grad_norm": 1.927514949427277, "learning_rate": 2.9815760034315233e-06, "loss": 0.0423, "step": 107855 }, { "epoch": 0.45005048777027645, "grad_norm": 0.3798769484428466, "learning_rate": 2.981506894442975e-06, "loss": 0.02, "step": 107860 }, { "epoch": 0.45007135048526675, "grad_norm": 0.9372968095125163, "learning_rate": 2.981437790259769e-06, "loss": 0.0197, "step": 107865 }, { "epoch": 0.45009221320025705, "grad_norm": 1.0810829175226173, "learning_rate": 2.9813686908813485e-06, "loss": 0.0322, "step": 107870 }, { "epoch": 0.4501130759152473, "grad_norm": 0.51346991344186, "learning_rate": 2.9812995963071557e-06, "loss": 0.0203, "step": 107875 }, { "epoch": 0.4501339386302376, "grad_norm": 0.6079832642334873, "learning_rate": 2.9812305065366355e-06, "loss": 0.0228, "step": 107880 }, { "epoch": 0.45015480134522784, "grad_norm": 1.2831596021513907, "learning_rate": 2.9811614215692303e-06, "loss": 0.0205, "step": 107885 }, { "epoch": 0.45017566406021814, "grad_norm": 2.1323435151111867, "learning_rate": 2.9810923414043835e-06, "loss": 0.0209, "step": 107890 }, { "epoch": 0.45019652677520844, "grad_norm": 0.8934714382153494, "learning_rate": 2.9810232660415396e-06, "loss": 0.0296, "step": 107895 }, { "epoch": 0.4502173894901987, "grad_norm": 0.7152501492140894, "learning_rate": 2.9809541954801414e-06, "loss": 0.0209, "step": 107900 }, { "epoch": 0.450238252205189, "grad_norm": 0.7119352556949787, "learning_rate": 2.9808851297196327e-06, "loss": 0.0276, "step": 107905 }, { "epoch": 0.4502591149201792, "grad_norm": 0.9461756490084577, "learning_rate": 2.980816068759458e-06, "loss": 0.0168, "step": 107910 }, { "epoch": 0.4502799776351695, "grad_norm": 1.1084335042608555, "learning_rate": 2.9807470125990606e-06, "loss": 0.0331, "step": 107915 }, { "epoch": 0.4503008403501598, "grad_norm": 0.5491556085135378, "learning_rate": 2.9806779612378855e-06, "loss": 0.0401, "step": 107920 }, { "epoch": 0.45032170306515007, "grad_norm": 0.5975518611031423, "learning_rate": 2.980608914675376e-06, "loss": 0.0225, "step": 107925 }, { "epoch": 0.45034256578014037, "grad_norm": 0.7368890255940865, "learning_rate": 2.9805398729109758e-06, "loss": 0.0314, "step": 107930 }, { "epoch": 0.45036342849513067, "grad_norm": 0.6347394219891224, "learning_rate": 2.9804708359441303e-06, "loss": 0.0232, "step": 107935 }, { "epoch": 0.4503842912101209, "grad_norm": 1.1986544229021685, "learning_rate": 2.9804018037742844e-06, "loss": 0.0249, "step": 107940 }, { "epoch": 0.4504051539251112, "grad_norm": 0.7709127359926322, "learning_rate": 2.9803327764008803e-06, "loss": 0.017, "step": 107945 }, { "epoch": 0.45042601664010146, "grad_norm": 0.6965153853639577, "learning_rate": 2.980263753823365e-06, "loss": 0.0308, "step": 107950 }, { "epoch": 0.45044687935509176, "grad_norm": 0.8963820777275192, "learning_rate": 2.980194736041182e-06, "loss": 0.034, "step": 107955 }, { "epoch": 0.45046774207008206, "grad_norm": 0.8520785550672223, "learning_rate": 2.9801257230537763e-06, "loss": 0.0221, "step": 107960 }, { "epoch": 0.4504886047850723, "grad_norm": 0.6717776838299395, "learning_rate": 2.9800567148605926e-06, "loss": 0.0219, "step": 107965 }, { "epoch": 0.4505094675000626, "grad_norm": 0.3259543084351921, "learning_rate": 2.979987711461076e-06, "loss": 0.0218, "step": 107970 }, { "epoch": 0.45053033021505284, "grad_norm": 0.9334417774069785, "learning_rate": 2.9799187128546712e-06, "loss": 0.0248, "step": 107975 }, { "epoch": 0.45055119293004314, "grad_norm": 0.5279752044440265, "learning_rate": 2.9798497190408247e-06, "loss": 0.0252, "step": 107980 }, { "epoch": 0.45057205564503344, "grad_norm": 0.6997905919787526, "learning_rate": 2.9797807300189795e-06, "loss": 0.0251, "step": 107985 }, { "epoch": 0.4505929183600237, "grad_norm": 0.6204832148096118, "learning_rate": 2.9797117457885828e-06, "loss": 0.025, "step": 107990 }, { "epoch": 0.450613781075014, "grad_norm": 0.9789302584903968, "learning_rate": 2.979642766349079e-06, "loss": 0.03, "step": 107995 }, { "epoch": 0.45063464379000423, "grad_norm": 1.1243543224085062, "learning_rate": 2.979573791699914e-06, "loss": 0.0297, "step": 108000 }, { "epoch": 0.45065550650499453, "grad_norm": 0.4673861948562773, "learning_rate": 2.9795048218405333e-06, "loss": 0.0238, "step": 108005 }, { "epoch": 0.45067636921998483, "grad_norm": 0.7671315691672042, "learning_rate": 2.9794358567703822e-06, "loss": 0.0184, "step": 108010 }, { "epoch": 0.4506972319349751, "grad_norm": 0.7631615088852989, "learning_rate": 2.9793668964889074e-06, "loss": 0.0288, "step": 108015 }, { "epoch": 0.4507180946499654, "grad_norm": 0.6636042901350128, "learning_rate": 2.979297940995553e-06, "loss": 0.0182, "step": 108020 }, { "epoch": 0.4507389573649557, "grad_norm": 1.0963739182995003, "learning_rate": 2.979228990289767e-06, "loss": 0.03, "step": 108025 }, { "epoch": 0.4507598200799459, "grad_norm": 0.649392499988883, "learning_rate": 2.9791600443709938e-06, "loss": 0.0231, "step": 108030 }, { "epoch": 0.4507806827949362, "grad_norm": 0.6663601287056174, "learning_rate": 2.9790911032386803e-06, "loss": 0.0302, "step": 108035 }, { "epoch": 0.45080154550992646, "grad_norm": 0.6722420404514461, "learning_rate": 2.9790221668922725e-06, "loss": 0.0258, "step": 108040 }, { "epoch": 0.45082240822491676, "grad_norm": 1.8356271122361987, "learning_rate": 2.9789532353312175e-06, "loss": 0.0246, "step": 108045 }, { "epoch": 0.45084327093990706, "grad_norm": 0.9765445723321754, "learning_rate": 2.9788843085549605e-06, "loss": 0.0232, "step": 108050 }, { "epoch": 0.4508641336548973, "grad_norm": 0.6227598240818336, "learning_rate": 2.978815386562948e-06, "loss": 0.0297, "step": 108055 }, { "epoch": 0.4508849963698876, "grad_norm": 0.8354274196295045, "learning_rate": 2.9787464693546276e-06, "loss": 0.0236, "step": 108060 }, { "epoch": 0.45090585908487785, "grad_norm": 0.949541827276688, "learning_rate": 2.9786775569294453e-06, "loss": 0.0265, "step": 108065 }, { "epoch": 0.45092672179986815, "grad_norm": 0.7314891585308958, "learning_rate": 2.978608649286848e-06, "loss": 0.0259, "step": 108070 }, { "epoch": 0.45094758451485845, "grad_norm": 0.7370381787647206, "learning_rate": 2.9785397464262815e-06, "loss": 0.0309, "step": 108075 }, { "epoch": 0.4509684472298487, "grad_norm": 1.0091279787202367, "learning_rate": 2.9784708483471946e-06, "loss": 0.0241, "step": 108080 }, { "epoch": 0.450989309944839, "grad_norm": 0.6092691885685959, "learning_rate": 2.978401955049033e-06, "loss": 0.0212, "step": 108085 }, { "epoch": 0.45101017265982923, "grad_norm": 0.8881637474906914, "learning_rate": 2.9783330665312445e-06, "loss": 0.0284, "step": 108090 }, { "epoch": 0.45103103537481953, "grad_norm": 1.7242640225520232, "learning_rate": 2.9782641827932762e-06, "loss": 0.0216, "step": 108095 }, { "epoch": 0.45105189808980983, "grad_norm": 0.7430810522410058, "learning_rate": 2.9781953038345745e-06, "loss": 0.0272, "step": 108100 }, { "epoch": 0.4510727608048001, "grad_norm": 0.8364072543123968, "learning_rate": 2.9781264296545874e-06, "loss": 0.0241, "step": 108105 }, { "epoch": 0.4510936235197904, "grad_norm": 1.5044106507933184, "learning_rate": 2.978057560252763e-06, "loss": 0.0355, "step": 108110 }, { "epoch": 0.4511144862347807, "grad_norm": 1.328643280294635, "learning_rate": 2.9779886956285474e-06, "loss": 0.026, "step": 108115 }, { "epoch": 0.4511353489497709, "grad_norm": 0.8361281080766668, "learning_rate": 2.977919835781389e-06, "loss": 0.0239, "step": 108120 }, { "epoch": 0.4511562116647612, "grad_norm": 0.6345812345510337, "learning_rate": 2.9778509807107364e-06, "loss": 0.0277, "step": 108125 }, { "epoch": 0.45117707437975146, "grad_norm": 0.6630264068149542, "learning_rate": 2.9777821304160358e-06, "loss": 0.0319, "step": 108130 }, { "epoch": 0.45119793709474176, "grad_norm": 1.3780411336202076, "learning_rate": 2.977713284896736e-06, "loss": 0.0264, "step": 108135 }, { "epoch": 0.45121879980973206, "grad_norm": 0.8364067038509676, "learning_rate": 2.9776444441522855e-06, "loss": 0.0248, "step": 108140 }, { "epoch": 0.4512396625247223, "grad_norm": 0.5324325427157763, "learning_rate": 2.9775756081821317e-06, "loss": 0.0228, "step": 108145 }, { "epoch": 0.4512605252397126, "grad_norm": 0.5880766288115817, "learning_rate": 2.9775067769857226e-06, "loss": 0.0235, "step": 108150 }, { "epoch": 0.45128138795470285, "grad_norm": 0.971272716262531, "learning_rate": 2.977437950562507e-06, "loss": 0.0334, "step": 108155 }, { "epoch": 0.45130225066969315, "grad_norm": 0.7312344371315269, "learning_rate": 2.977369128911932e-06, "loss": 0.0225, "step": 108160 }, { "epoch": 0.45132311338468345, "grad_norm": 0.5532399316970643, "learning_rate": 2.977300312033448e-06, "loss": 0.0209, "step": 108165 }, { "epoch": 0.4513439760996737, "grad_norm": 0.7208638845797697, "learning_rate": 2.977231499926502e-06, "loss": 0.0214, "step": 108170 }, { "epoch": 0.451364838814664, "grad_norm": 0.7480996460921823, "learning_rate": 2.977162692590544e-06, "loss": 0.0257, "step": 108175 }, { "epoch": 0.45138570152965424, "grad_norm": 0.9790978053284484, "learning_rate": 2.9770938900250213e-06, "loss": 0.0305, "step": 108180 }, { "epoch": 0.45140656424464454, "grad_norm": 0.7464134028644254, "learning_rate": 2.977025092229383e-06, "loss": 0.0278, "step": 108185 }, { "epoch": 0.45142742695963484, "grad_norm": 0.8745640002239424, "learning_rate": 2.9769562992030787e-06, "loss": 0.0333, "step": 108190 }, { "epoch": 0.4514482896746251, "grad_norm": 0.6738793183817494, "learning_rate": 2.976887510945557e-06, "loss": 0.0211, "step": 108195 }, { "epoch": 0.4514691523896154, "grad_norm": 0.4075461824573052, "learning_rate": 2.9768187274562665e-06, "loss": 0.0264, "step": 108200 }, { "epoch": 0.4514900151046057, "grad_norm": 0.3841409556728453, "learning_rate": 2.976749948734657e-06, "loss": 0.0276, "step": 108205 }, { "epoch": 0.4515108778195959, "grad_norm": 0.35508984876161087, "learning_rate": 2.976681174780178e-06, "loss": 0.0196, "step": 108210 }, { "epoch": 0.4515317405345862, "grad_norm": 0.95248818942319, "learning_rate": 2.9766124055922772e-06, "loss": 0.0229, "step": 108215 }, { "epoch": 0.45155260324957647, "grad_norm": 0.5106261405666892, "learning_rate": 2.9765436411704057e-06, "loss": 0.0231, "step": 108220 }, { "epoch": 0.45157346596456677, "grad_norm": 0.8215186957086114, "learning_rate": 2.9764748815140127e-06, "loss": 0.0312, "step": 108225 }, { "epoch": 0.45159432867955707, "grad_norm": 0.564506228805313, "learning_rate": 2.976406126622548e-06, "loss": 0.0219, "step": 108230 }, { "epoch": 0.4516151913945473, "grad_norm": 0.6884816219821722, "learning_rate": 2.97633737649546e-06, "loss": 0.0222, "step": 108235 }, { "epoch": 0.4516360541095376, "grad_norm": 0.7943787984804194, "learning_rate": 2.9762686311322e-06, "loss": 0.0261, "step": 108240 }, { "epoch": 0.45165691682452785, "grad_norm": 0.4804736482169008, "learning_rate": 2.9761998905322164e-06, "loss": 0.0436, "step": 108245 }, { "epoch": 0.45167777953951815, "grad_norm": 0.6260435626966552, "learning_rate": 2.976131154694961e-06, "loss": 0.0295, "step": 108250 }, { "epoch": 0.45169864225450845, "grad_norm": 0.44775545523299637, "learning_rate": 2.9760624236198815e-06, "loss": 0.0239, "step": 108255 }, { "epoch": 0.4517195049694987, "grad_norm": 1.0968732763347657, "learning_rate": 2.9759936973064306e-06, "loss": 0.0242, "step": 108260 }, { "epoch": 0.451740367684489, "grad_norm": 1.0526141574687906, "learning_rate": 2.9759249757540565e-06, "loss": 0.0245, "step": 108265 }, { "epoch": 0.45176123039947924, "grad_norm": 0.9309293820599449, "learning_rate": 2.9758562589622105e-06, "loss": 0.0362, "step": 108270 }, { "epoch": 0.45178209311446954, "grad_norm": 0.6130412306915951, "learning_rate": 2.975787546930343e-06, "loss": 0.029, "step": 108275 }, { "epoch": 0.45180295582945984, "grad_norm": 0.5149337988701536, "learning_rate": 2.975718839657904e-06, "loss": 0.0195, "step": 108280 }, { "epoch": 0.4518238185444501, "grad_norm": 1.2040998214432155, "learning_rate": 2.9756501371443446e-06, "loss": 0.0291, "step": 108285 }, { "epoch": 0.4518446812594404, "grad_norm": 0.4835573763677135, "learning_rate": 2.975581439389115e-06, "loss": 0.0237, "step": 108290 }, { "epoch": 0.4518655439744307, "grad_norm": 0.5606163965875898, "learning_rate": 2.9755127463916668e-06, "loss": 0.0184, "step": 108295 }, { "epoch": 0.4518864066894209, "grad_norm": 0.8879466536771189, "learning_rate": 2.9754440581514498e-06, "loss": 0.023, "step": 108300 }, { "epoch": 0.4519072694044112, "grad_norm": 0.6566853491039695, "learning_rate": 2.975375374667915e-06, "loss": 0.026, "step": 108305 }, { "epoch": 0.45192813211940147, "grad_norm": 0.5330704294510193, "learning_rate": 2.9753066959405136e-06, "loss": 0.0247, "step": 108310 }, { "epoch": 0.45194899483439177, "grad_norm": 0.6532971142180224, "learning_rate": 2.975238021968698e-06, "loss": 0.0248, "step": 108315 }, { "epoch": 0.45196985754938207, "grad_norm": 1.029503323786715, "learning_rate": 2.975169352751917e-06, "loss": 0.0289, "step": 108320 }, { "epoch": 0.4519907202643723, "grad_norm": 1.4139829695000952, "learning_rate": 2.975100688289624e-06, "loss": 0.0179, "step": 108325 }, { "epoch": 0.4520115829793626, "grad_norm": 1.0895270860738895, "learning_rate": 2.9750320285812696e-06, "loss": 0.0308, "step": 108330 }, { "epoch": 0.45203244569435286, "grad_norm": 0.6361966749688404, "learning_rate": 2.974963373626305e-06, "loss": 0.0334, "step": 108335 }, { "epoch": 0.45205330840934316, "grad_norm": 0.29681367365823164, "learning_rate": 2.974894723424182e-06, "loss": 0.0304, "step": 108340 }, { "epoch": 0.45207417112433346, "grad_norm": 0.45424128554627996, "learning_rate": 2.974826077974352e-06, "loss": 0.0237, "step": 108345 }, { "epoch": 0.4520950338393237, "grad_norm": 0.327217440030149, "learning_rate": 2.9747574372762674e-06, "loss": 0.0293, "step": 108350 }, { "epoch": 0.452115896554314, "grad_norm": 0.6560200512143558, "learning_rate": 2.974688801329379e-06, "loss": 0.0233, "step": 108355 }, { "epoch": 0.45213675926930424, "grad_norm": 0.8785368584718585, "learning_rate": 2.9746201701331396e-06, "loss": 0.0222, "step": 108360 }, { "epoch": 0.45215762198429454, "grad_norm": 1.170493862128465, "learning_rate": 2.974551543687001e-06, "loss": 0.0259, "step": 108365 }, { "epoch": 0.45217848469928484, "grad_norm": 0.72181610992582, "learning_rate": 2.9744829219904147e-06, "loss": 0.0246, "step": 108370 }, { "epoch": 0.4521993474142751, "grad_norm": 0.6593420856795922, "learning_rate": 2.9744143050428336e-06, "loss": 0.0247, "step": 108375 }, { "epoch": 0.4522202101292654, "grad_norm": 0.7333304961920337, "learning_rate": 2.97434569284371e-06, "loss": 0.0252, "step": 108380 }, { "epoch": 0.4522410728442557, "grad_norm": 1.3198275922733587, "learning_rate": 2.974277085392496e-06, "loss": 0.035, "step": 108385 }, { "epoch": 0.45226193555924593, "grad_norm": 0.5059747405008758, "learning_rate": 2.974208482688643e-06, "loss": 0.0175, "step": 108390 }, { "epoch": 0.45228279827423623, "grad_norm": 1.014071167700664, "learning_rate": 2.9741398847316055e-06, "loss": 0.0357, "step": 108395 }, { "epoch": 0.4523036609892265, "grad_norm": 0.9518848214582049, "learning_rate": 2.974071291520835e-06, "loss": 0.0265, "step": 108400 }, { "epoch": 0.4523245237042168, "grad_norm": 0.7740054029008476, "learning_rate": 2.974002703055784e-06, "loss": 0.0248, "step": 108405 }, { "epoch": 0.4523453864192071, "grad_norm": 0.628534935976872, "learning_rate": 2.9739341193359055e-06, "loss": 0.0221, "step": 108410 }, { "epoch": 0.4523662491341973, "grad_norm": 0.9640601447044517, "learning_rate": 2.973865540360653e-06, "loss": 0.0224, "step": 108415 }, { "epoch": 0.4523871118491876, "grad_norm": 1.155207139844233, "learning_rate": 2.9737969661294787e-06, "loss": 0.0238, "step": 108420 }, { "epoch": 0.45240797456417786, "grad_norm": 0.903017941866464, "learning_rate": 2.9737283966418363e-06, "loss": 0.0231, "step": 108425 }, { "epoch": 0.45242883727916816, "grad_norm": 0.5643545532734519, "learning_rate": 2.973659831897178e-06, "loss": 0.0292, "step": 108430 }, { "epoch": 0.45244969999415846, "grad_norm": 1.1557288524934939, "learning_rate": 2.9735912718949576e-06, "loss": 0.0348, "step": 108435 }, { "epoch": 0.4524705627091487, "grad_norm": 0.7387849094032143, "learning_rate": 2.973522716634629e-06, "loss": 0.0218, "step": 108440 }, { "epoch": 0.452491425424139, "grad_norm": 0.792292702037309, "learning_rate": 2.9734541661156454e-06, "loss": 0.0294, "step": 108445 }, { "epoch": 0.45251228813912925, "grad_norm": 0.7845649768245914, "learning_rate": 2.973385620337459e-06, "loss": 0.0242, "step": 108450 }, { "epoch": 0.45253315085411955, "grad_norm": 0.7977194762863902, "learning_rate": 2.9733170792995246e-06, "loss": 0.028, "step": 108455 }, { "epoch": 0.45255401356910985, "grad_norm": 0.4856926295623778, "learning_rate": 2.9732485430012953e-06, "loss": 0.0276, "step": 108460 }, { "epoch": 0.4525748762841001, "grad_norm": 0.41773314403429573, "learning_rate": 2.9731800114422255e-06, "loss": 0.0235, "step": 108465 }, { "epoch": 0.4525957389990904, "grad_norm": 0.598067267357788, "learning_rate": 2.973111484621769e-06, "loss": 0.0285, "step": 108470 }, { "epoch": 0.4526166017140807, "grad_norm": 1.2323355477496847, "learning_rate": 2.973042962539379e-06, "loss": 0.0205, "step": 108475 }, { "epoch": 0.45263746442907093, "grad_norm": 1.0004773343867859, "learning_rate": 2.9729744451945106e-06, "loss": 0.0277, "step": 108480 }, { "epoch": 0.45265832714406123, "grad_norm": 1.2424520320925196, "learning_rate": 2.9729059325866168e-06, "loss": 0.0273, "step": 108485 }, { "epoch": 0.4526791898590515, "grad_norm": 0.8363317867488841, "learning_rate": 2.9728374247151524e-06, "loss": 0.0303, "step": 108490 }, { "epoch": 0.4527000525740418, "grad_norm": 1.7979524716140765, "learning_rate": 2.9727689215795717e-06, "loss": 0.0243, "step": 108495 }, { "epoch": 0.4527209152890321, "grad_norm": 0.414850932818795, "learning_rate": 2.9727004231793295e-06, "loss": 0.0198, "step": 108500 }, { "epoch": 0.4527417780040223, "grad_norm": 0.8924490055411035, "learning_rate": 2.9726319295138795e-06, "loss": 0.0264, "step": 108505 }, { "epoch": 0.4527626407190126, "grad_norm": 0.6567029695851221, "learning_rate": 2.9725634405826757e-06, "loss": 0.0288, "step": 108510 }, { "epoch": 0.45278350343400287, "grad_norm": 0.7836926298098522, "learning_rate": 2.972494956385174e-06, "loss": 0.0207, "step": 108515 }, { "epoch": 0.45280436614899316, "grad_norm": 1.096964204555716, "learning_rate": 2.972426476920829e-06, "loss": 0.0288, "step": 108520 }, { "epoch": 0.45282522886398346, "grad_norm": 0.8439048185852093, "learning_rate": 2.9723580021890946e-06, "loss": 0.019, "step": 108525 }, { "epoch": 0.4528460915789737, "grad_norm": 0.9258307371784887, "learning_rate": 2.9722895321894275e-06, "loss": 0.0234, "step": 108530 }, { "epoch": 0.452866954293964, "grad_norm": 0.6167237197904124, "learning_rate": 2.97222106692128e-06, "loss": 0.0224, "step": 108535 }, { "epoch": 0.45288781700895425, "grad_norm": 0.9724405952739479, "learning_rate": 2.97215260638411e-06, "loss": 0.0301, "step": 108540 }, { "epoch": 0.45290867972394455, "grad_norm": 1.1476407091751066, "learning_rate": 2.97208415057737e-06, "loss": 0.0275, "step": 108545 }, { "epoch": 0.45292954243893485, "grad_norm": 0.8569337268933692, "learning_rate": 2.9720156995005177e-06, "loss": 0.0366, "step": 108550 }, { "epoch": 0.4529504051539251, "grad_norm": 1.1729421310984907, "learning_rate": 2.971947253153007e-06, "loss": 0.0206, "step": 108555 }, { "epoch": 0.4529712678689154, "grad_norm": 0.6501554308769035, "learning_rate": 2.9718788115342938e-06, "loss": 0.035, "step": 108560 }, { "epoch": 0.4529921305839057, "grad_norm": 0.5373206745129067, "learning_rate": 2.9718103746438338e-06, "loss": 0.0327, "step": 108565 }, { "epoch": 0.45301299329889594, "grad_norm": 0.4276280939282651, "learning_rate": 2.971741942481082e-06, "loss": 0.0301, "step": 108570 }, { "epoch": 0.45303385601388624, "grad_norm": 0.7994310960476861, "learning_rate": 2.9716735150454946e-06, "loss": 0.0248, "step": 108575 }, { "epoch": 0.4530547187288765, "grad_norm": 0.9467465707926713, "learning_rate": 2.9716050923365265e-06, "loss": 0.025, "step": 108580 }, { "epoch": 0.4530755814438668, "grad_norm": 0.6987775599642253, "learning_rate": 2.971536674353635e-06, "loss": 0.0228, "step": 108585 }, { "epoch": 0.4530964441588571, "grad_norm": 1.3452392643592732, "learning_rate": 2.9714682610962753e-06, "loss": 0.0332, "step": 108590 }, { "epoch": 0.4531173068738473, "grad_norm": 0.6999408116407598, "learning_rate": 2.9713998525639032e-06, "loss": 0.0282, "step": 108595 }, { "epoch": 0.4531381695888376, "grad_norm": 0.844434723426806, "learning_rate": 2.9713314487559756e-06, "loss": 0.0237, "step": 108600 }, { "epoch": 0.45315903230382787, "grad_norm": 0.8525262786703297, "learning_rate": 2.971263049671948e-06, "loss": 0.0225, "step": 108605 }, { "epoch": 0.45317989501881817, "grad_norm": 0.729975447701292, "learning_rate": 2.971194655311277e-06, "loss": 0.0249, "step": 108610 }, { "epoch": 0.45320075773380847, "grad_norm": 0.9072138178118726, "learning_rate": 2.9711262656734193e-06, "loss": 0.0328, "step": 108615 }, { "epoch": 0.4532216204487987, "grad_norm": 0.4779518937927045, "learning_rate": 2.9710578807578305e-06, "loss": 0.026, "step": 108620 }, { "epoch": 0.453242483163789, "grad_norm": 0.7124269014081368, "learning_rate": 2.970989500563968e-06, "loss": 0.0185, "step": 108625 }, { "epoch": 0.45326334587877926, "grad_norm": 0.4449740252270873, "learning_rate": 2.970921125091288e-06, "loss": 0.0292, "step": 108630 }, { "epoch": 0.45328420859376956, "grad_norm": 0.674834654446557, "learning_rate": 2.970852754339248e-06, "loss": 0.0345, "step": 108635 }, { "epoch": 0.45330507130875985, "grad_norm": 0.6711570529392289, "learning_rate": 2.9707843883073032e-06, "loss": 0.0315, "step": 108640 }, { "epoch": 0.4533259340237501, "grad_norm": 0.4526813890933316, "learning_rate": 2.9707160269949127e-06, "loss": 0.0222, "step": 108645 }, { "epoch": 0.4533467967387404, "grad_norm": 0.6385126040232322, "learning_rate": 2.9706476704015325e-06, "loss": 0.0227, "step": 108650 }, { "epoch": 0.4533676594537307, "grad_norm": 0.7590184111572806, "learning_rate": 2.970579318526619e-06, "loss": 0.0239, "step": 108655 }, { "epoch": 0.45338852216872094, "grad_norm": 0.806422030282402, "learning_rate": 2.9705109713696302e-06, "loss": 0.0262, "step": 108660 }, { "epoch": 0.45340938488371124, "grad_norm": 0.6859137127736495, "learning_rate": 2.970442628930023e-06, "loss": 0.0193, "step": 108665 }, { "epoch": 0.4534302475987015, "grad_norm": 0.6309910680038835, "learning_rate": 2.9703742912072554e-06, "loss": 0.0228, "step": 108670 }, { "epoch": 0.4534511103136918, "grad_norm": 0.6087414813210211, "learning_rate": 2.9703059582007845e-06, "loss": 0.0265, "step": 108675 }, { "epoch": 0.4534719730286821, "grad_norm": 0.5109803348021379, "learning_rate": 2.9702376299100666e-06, "loss": 0.026, "step": 108680 }, { "epoch": 0.45349283574367233, "grad_norm": 0.5173097069813528, "learning_rate": 2.9701693063345617e-06, "loss": 0.0209, "step": 108685 }, { "epoch": 0.45351369845866263, "grad_norm": 0.6924701969355732, "learning_rate": 2.9701009874737257e-06, "loss": 0.025, "step": 108690 }, { "epoch": 0.4535345611736529, "grad_norm": 0.9203836855451603, "learning_rate": 2.9700326733270167e-06, "loss": 0.024, "step": 108695 }, { "epoch": 0.45355542388864317, "grad_norm": 0.821615891031406, "learning_rate": 2.9699643638938937e-06, "loss": 0.0249, "step": 108700 }, { "epoch": 0.45357628660363347, "grad_norm": 0.2603890340675393, "learning_rate": 2.9698960591738128e-06, "loss": 0.0202, "step": 108705 }, { "epoch": 0.4535971493186237, "grad_norm": 0.5731764334974305, "learning_rate": 2.969827759166234e-06, "loss": 0.0236, "step": 108710 }, { "epoch": 0.453618012033614, "grad_norm": 0.6462064760367894, "learning_rate": 2.969759463870614e-06, "loss": 0.0227, "step": 108715 }, { "epoch": 0.45363887474860426, "grad_norm": 0.9889216633891551, "learning_rate": 2.9696911732864113e-06, "loss": 0.0293, "step": 108720 }, { "epoch": 0.45365973746359456, "grad_norm": 0.5437498241382, "learning_rate": 2.9696228874130857e-06, "loss": 0.0282, "step": 108725 }, { "epoch": 0.45368060017858486, "grad_norm": 0.9510333963712287, "learning_rate": 2.9695546062500935e-06, "loss": 0.0298, "step": 108730 }, { "epoch": 0.4537014628935751, "grad_norm": 0.615733540579923, "learning_rate": 2.969486329796894e-06, "loss": 0.0311, "step": 108735 }, { "epoch": 0.4537223256085654, "grad_norm": 0.8041295573617223, "learning_rate": 2.969418058052946e-06, "loss": 0.0208, "step": 108740 }, { "epoch": 0.4537431883235557, "grad_norm": 1.0771617857111495, "learning_rate": 2.969349791017708e-06, "loss": 0.0333, "step": 108745 }, { "epoch": 0.45376405103854595, "grad_norm": 0.9441496301785857, "learning_rate": 2.969281528690639e-06, "loss": 0.0228, "step": 108750 }, { "epoch": 0.45378491375353625, "grad_norm": 0.7560612425168486, "learning_rate": 2.969213271071197e-06, "loss": 0.0287, "step": 108755 }, { "epoch": 0.4538057764685265, "grad_norm": 0.3935611356927186, "learning_rate": 2.9691450181588425e-06, "loss": 0.0181, "step": 108760 }, { "epoch": 0.4538266391835168, "grad_norm": 1.1584328202177243, "learning_rate": 2.9690767699530336e-06, "loss": 0.0269, "step": 108765 }, { "epoch": 0.4538475018985071, "grad_norm": 0.4096843619048807, "learning_rate": 2.969008526453229e-06, "loss": 0.0211, "step": 108770 }, { "epoch": 0.45386836461349733, "grad_norm": 0.8942451085411545, "learning_rate": 2.9689402876588885e-06, "loss": 0.026, "step": 108775 }, { "epoch": 0.45388922732848763, "grad_norm": 0.627124045386901, "learning_rate": 2.968872053569471e-06, "loss": 0.0261, "step": 108780 }, { "epoch": 0.4539100900434779, "grad_norm": 0.9756050985975058, "learning_rate": 2.9688038241844365e-06, "loss": 0.0299, "step": 108785 }, { "epoch": 0.4539309527584682, "grad_norm": 0.7345712402890507, "learning_rate": 2.968735599503243e-06, "loss": 0.0206, "step": 108790 }, { "epoch": 0.4539518154734585, "grad_norm": 0.4865476267081341, "learning_rate": 2.968667379525352e-06, "loss": 0.0206, "step": 108795 }, { "epoch": 0.4539726781884487, "grad_norm": 1.297378024796553, "learning_rate": 2.968599164250222e-06, "loss": 0.0277, "step": 108800 }, { "epoch": 0.453993540903439, "grad_norm": 0.9490031439447546, "learning_rate": 2.968530953677312e-06, "loss": 0.0275, "step": 108805 }, { "epoch": 0.45401440361842926, "grad_norm": 0.4663770953481775, "learning_rate": 2.968462747806084e-06, "loss": 0.0214, "step": 108810 }, { "epoch": 0.45403526633341956, "grad_norm": 0.6128092223854599, "learning_rate": 2.968394546635996e-06, "loss": 0.0222, "step": 108815 }, { "epoch": 0.45405612904840986, "grad_norm": 0.8270000887843348, "learning_rate": 2.968326350166508e-06, "loss": 0.0228, "step": 108820 }, { "epoch": 0.4540769917634001, "grad_norm": 0.26292142307402067, "learning_rate": 2.9682581583970815e-06, "loss": 0.0203, "step": 108825 }, { "epoch": 0.4540978544783904, "grad_norm": 0.8003752045182011, "learning_rate": 2.9681899713271757e-06, "loss": 0.0282, "step": 108830 }, { "epoch": 0.4541187171933807, "grad_norm": 0.49106085193073157, "learning_rate": 2.96812178895625e-06, "loss": 0.0187, "step": 108835 }, { "epoch": 0.45413957990837095, "grad_norm": 0.7398385025859726, "learning_rate": 2.9680536112837666e-06, "loss": 0.0199, "step": 108840 }, { "epoch": 0.45416044262336125, "grad_norm": 0.9501973188783015, "learning_rate": 2.9679854383091845e-06, "loss": 0.03, "step": 108845 }, { "epoch": 0.4541813053383515, "grad_norm": 0.8913088433548957, "learning_rate": 2.9679172700319647e-06, "loss": 0.0282, "step": 108850 }, { "epoch": 0.4542021680533418, "grad_norm": 0.4526503895320146, "learning_rate": 2.967849106451568e-06, "loss": 0.0254, "step": 108855 }, { "epoch": 0.4542230307683321, "grad_norm": 0.7128463208301237, "learning_rate": 2.967780947567454e-06, "loss": 0.0323, "step": 108860 }, { "epoch": 0.45424389348332234, "grad_norm": 0.7256821145068737, "learning_rate": 2.967712793379085e-06, "loss": 0.0234, "step": 108865 }, { "epoch": 0.45426475619831264, "grad_norm": 0.9416614807923601, "learning_rate": 2.9676446438859207e-06, "loss": 0.0371, "step": 108870 }, { "epoch": 0.4542856189133029, "grad_norm": 0.6683461491991931, "learning_rate": 2.9675764990874224e-06, "loss": 0.0252, "step": 108875 }, { "epoch": 0.4543064816282932, "grad_norm": 0.39469056919255874, "learning_rate": 2.9675083589830516e-06, "loss": 0.0204, "step": 108880 }, { "epoch": 0.4543273443432835, "grad_norm": 0.9734520682911395, "learning_rate": 2.9674402235722682e-06, "loss": 0.0276, "step": 108885 }, { "epoch": 0.4543482070582737, "grad_norm": 0.7527350713073279, "learning_rate": 2.967372092854535e-06, "loss": 0.0304, "step": 108890 }, { "epoch": 0.454369069773264, "grad_norm": 0.47068919843100454, "learning_rate": 2.967303966829312e-06, "loss": 0.0258, "step": 108895 }, { "epoch": 0.45438993248825427, "grad_norm": 0.5676000194200609, "learning_rate": 2.967235845496061e-06, "loss": 0.0242, "step": 108900 }, { "epoch": 0.45441079520324457, "grad_norm": 0.6933936062086896, "learning_rate": 2.967167728854243e-06, "loss": 0.0199, "step": 108905 }, { "epoch": 0.45443165791823487, "grad_norm": 0.5432115551155483, "learning_rate": 2.9670996169033202e-06, "loss": 0.0307, "step": 108910 }, { "epoch": 0.4544525206332251, "grad_norm": 0.3211452063019004, "learning_rate": 2.9670315096427536e-06, "loss": 0.0187, "step": 108915 }, { "epoch": 0.4544733833482154, "grad_norm": 0.7037844854543966, "learning_rate": 2.9669634070720055e-06, "loss": 0.0172, "step": 108920 }, { "epoch": 0.4544942460632057, "grad_norm": 0.9063790771649969, "learning_rate": 2.9668953091905385e-06, "loss": 0.0211, "step": 108925 }, { "epoch": 0.45451510877819595, "grad_norm": 0.6548307076303582, "learning_rate": 2.966827215997812e-06, "loss": 0.0269, "step": 108930 }, { "epoch": 0.45453597149318625, "grad_norm": 0.6850257756958898, "learning_rate": 2.96675912749329e-06, "loss": 0.0245, "step": 108935 }, { "epoch": 0.4545568342081765, "grad_norm": 0.5988311268757034, "learning_rate": 2.966691043676434e-06, "loss": 0.0299, "step": 108940 }, { "epoch": 0.4545776969231668, "grad_norm": 0.3145788051802209, "learning_rate": 2.966622964546706e-06, "loss": 0.0191, "step": 108945 }, { "epoch": 0.4545985596381571, "grad_norm": 0.6463835574078576, "learning_rate": 2.9665548901035686e-06, "loss": 0.0252, "step": 108950 }, { "epoch": 0.45461942235314734, "grad_norm": 0.9902534439160366, "learning_rate": 2.9664868203464834e-06, "loss": 0.0265, "step": 108955 }, { "epoch": 0.45464028506813764, "grad_norm": 0.6286755522100306, "learning_rate": 2.9664187552749135e-06, "loss": 0.0244, "step": 108960 }, { "epoch": 0.4546611477831279, "grad_norm": 0.5557307899046178, "learning_rate": 2.9663506948883213e-06, "loss": 0.0298, "step": 108965 }, { "epoch": 0.4546820104981182, "grad_norm": 1.0754983483729612, "learning_rate": 2.9662826391861695e-06, "loss": 0.0322, "step": 108970 }, { "epoch": 0.4547028732131085, "grad_norm": 1.2152486783087495, "learning_rate": 2.9662145881679193e-06, "loss": 0.0249, "step": 108975 }, { "epoch": 0.4547237359280987, "grad_norm": 0.6351655201927612, "learning_rate": 2.966146541833036e-06, "loss": 0.025, "step": 108980 }, { "epoch": 0.454744598643089, "grad_norm": 0.7352943425006108, "learning_rate": 2.96607850018098e-06, "loss": 0.0248, "step": 108985 }, { "epoch": 0.45476546135807927, "grad_norm": 0.925895288389199, "learning_rate": 2.9660104632112157e-06, "loss": 0.0236, "step": 108990 }, { "epoch": 0.45478632407306957, "grad_norm": 0.5677987623341292, "learning_rate": 2.965942430923206e-06, "loss": 0.0203, "step": 108995 }, { "epoch": 0.45480718678805987, "grad_norm": 0.5654857987669453, "learning_rate": 2.9658744033164132e-06, "loss": 0.0315, "step": 109000 }, { "epoch": 0.4548280495030501, "grad_norm": 0.651203635301512, "learning_rate": 2.9658063803903012e-06, "loss": 0.0315, "step": 109005 }, { "epoch": 0.4548489122180404, "grad_norm": 0.9144803478953822, "learning_rate": 2.965738362144333e-06, "loss": 0.0309, "step": 109010 }, { "epoch": 0.45486977493303066, "grad_norm": 0.6169889627470352, "learning_rate": 2.965670348577972e-06, "loss": 0.0194, "step": 109015 }, { "epoch": 0.45489063764802096, "grad_norm": 1.0522714088162037, "learning_rate": 2.9656023396906814e-06, "loss": 0.0287, "step": 109020 }, { "epoch": 0.45491150036301126, "grad_norm": 0.9935774605913438, "learning_rate": 2.965534335481925e-06, "loss": 0.029, "step": 109025 }, { "epoch": 0.4549323630780015, "grad_norm": 0.17429751078577135, "learning_rate": 2.965466335951166e-06, "loss": 0.0186, "step": 109030 }, { "epoch": 0.4549532257929918, "grad_norm": 0.46873506833122314, "learning_rate": 2.9653983410978686e-06, "loss": 0.0319, "step": 109035 }, { "epoch": 0.4549740885079821, "grad_norm": 0.4725244276004271, "learning_rate": 2.9653303509214965e-06, "loss": 0.0273, "step": 109040 }, { "epoch": 0.45499495122297234, "grad_norm": 0.714219682374929, "learning_rate": 2.9652623654215136e-06, "loss": 0.0198, "step": 109045 }, { "epoch": 0.45501581393796264, "grad_norm": 0.6289978563505204, "learning_rate": 2.9651943845973835e-06, "loss": 0.0246, "step": 109050 }, { "epoch": 0.4550366766529529, "grad_norm": 0.5635409572611728, "learning_rate": 2.9651264084485703e-06, "loss": 0.0319, "step": 109055 }, { "epoch": 0.4550575393679432, "grad_norm": 0.7642196458244912, "learning_rate": 2.9650584369745383e-06, "loss": 0.031, "step": 109060 }, { "epoch": 0.4550784020829335, "grad_norm": 0.5343963734954531, "learning_rate": 2.9649904701747517e-06, "loss": 0.041, "step": 109065 }, { "epoch": 0.45509926479792373, "grad_norm": 1.0143941177834819, "learning_rate": 2.9649225080486753e-06, "loss": 0.0239, "step": 109070 }, { "epoch": 0.45512012751291403, "grad_norm": 1.0154848954383573, "learning_rate": 2.9648545505957728e-06, "loss": 0.0278, "step": 109075 }, { "epoch": 0.4551409902279043, "grad_norm": 0.9393492936861972, "learning_rate": 2.9647865978155085e-06, "loss": 0.0273, "step": 109080 }, { "epoch": 0.4551618529428946, "grad_norm": 0.6745459887371286, "learning_rate": 2.964718649707347e-06, "loss": 0.0287, "step": 109085 }, { "epoch": 0.4551827156578849, "grad_norm": 0.9965857574021058, "learning_rate": 2.964650706270754e-06, "loss": 0.0349, "step": 109090 }, { "epoch": 0.4552035783728751, "grad_norm": 0.8058994193053912, "learning_rate": 2.964582767505193e-06, "loss": 0.0234, "step": 109095 }, { "epoch": 0.4552244410878654, "grad_norm": 3.7480213216063696, "learning_rate": 2.964514833410129e-06, "loss": 0.0267, "step": 109100 }, { "epoch": 0.45524530380285566, "grad_norm": 0.734841678345153, "learning_rate": 2.9644469039850277e-06, "loss": 0.0305, "step": 109105 }, { "epoch": 0.45526616651784596, "grad_norm": 0.29165117220672254, "learning_rate": 2.9643789792293533e-06, "loss": 0.0129, "step": 109110 }, { "epoch": 0.45528702923283626, "grad_norm": 0.5761122787603569, "learning_rate": 2.964311059142571e-06, "loss": 0.0216, "step": 109115 }, { "epoch": 0.4553078919478265, "grad_norm": 0.7520410319775902, "learning_rate": 2.964243143724146e-06, "loss": 0.0276, "step": 109120 }, { "epoch": 0.4553287546628168, "grad_norm": 0.5752292929027918, "learning_rate": 2.964175232973544e-06, "loss": 0.0244, "step": 109125 }, { "epoch": 0.4553496173778071, "grad_norm": 1.654253898403185, "learning_rate": 2.9641073268902297e-06, "loss": 0.0311, "step": 109130 }, { "epoch": 0.45537048009279735, "grad_norm": 0.9152811348182224, "learning_rate": 2.964039425473668e-06, "loss": 0.0304, "step": 109135 }, { "epoch": 0.45539134280778765, "grad_norm": 0.8542447199356823, "learning_rate": 2.9639715287233263e-06, "loss": 0.031, "step": 109140 }, { "epoch": 0.4554122055227779, "grad_norm": 0.6113190827897057, "learning_rate": 2.9639036366386685e-06, "loss": 0.0318, "step": 109145 }, { "epoch": 0.4554330682377682, "grad_norm": 0.982436792687076, "learning_rate": 2.963835749219161e-06, "loss": 0.0267, "step": 109150 }, { "epoch": 0.4554539309527585, "grad_norm": 0.7088170940872168, "learning_rate": 2.9637678664642694e-06, "loss": 0.0234, "step": 109155 }, { "epoch": 0.45547479366774873, "grad_norm": 0.6063847283210436, "learning_rate": 2.963699988373459e-06, "loss": 0.0226, "step": 109160 }, { "epoch": 0.45549565638273903, "grad_norm": 1.1215140792190144, "learning_rate": 2.9636321149461965e-06, "loss": 0.0257, "step": 109165 }, { "epoch": 0.4555165190977293, "grad_norm": 0.7936583367586721, "learning_rate": 2.9635642461819473e-06, "loss": 0.0247, "step": 109170 }, { "epoch": 0.4555373818127196, "grad_norm": 0.9284271103149487, "learning_rate": 2.9634963820801783e-06, "loss": 0.022, "step": 109175 }, { "epoch": 0.4555582445277099, "grad_norm": 0.6773472284099453, "learning_rate": 2.9634285226403546e-06, "loss": 0.0303, "step": 109180 }, { "epoch": 0.4555791072427001, "grad_norm": 0.42581590713311235, "learning_rate": 2.9633606678619437e-06, "loss": 0.0254, "step": 109185 }, { "epoch": 0.4555999699576904, "grad_norm": 0.7797459253466709, "learning_rate": 2.963292817744411e-06, "loss": 0.0279, "step": 109190 }, { "epoch": 0.45562083267268066, "grad_norm": 0.6372454320216064, "learning_rate": 2.963224972287223e-06, "loss": 0.0243, "step": 109195 }, { "epoch": 0.45564169538767096, "grad_norm": 0.5307966504754846, "learning_rate": 2.963157131489847e-06, "loss": 0.02, "step": 109200 }, { "epoch": 0.45566255810266126, "grad_norm": 0.8661438319495366, "learning_rate": 2.9630892953517485e-06, "loss": 0.0265, "step": 109205 }, { "epoch": 0.4556834208176515, "grad_norm": 1.4572032454045865, "learning_rate": 2.963021463872396e-06, "loss": 0.0342, "step": 109210 }, { "epoch": 0.4557042835326418, "grad_norm": 0.450312178624852, "learning_rate": 2.962953637051254e-06, "loss": 0.0225, "step": 109215 }, { "epoch": 0.4557251462476321, "grad_norm": 0.5378735846517431, "learning_rate": 2.9628858148877905e-06, "loss": 0.0224, "step": 109220 }, { "epoch": 0.45574600896262235, "grad_norm": 0.45891636198500585, "learning_rate": 2.9628179973814725e-06, "loss": 0.0225, "step": 109225 }, { "epoch": 0.45576687167761265, "grad_norm": 0.45966121244191904, "learning_rate": 2.962750184531767e-06, "loss": 0.0305, "step": 109230 }, { "epoch": 0.4557877343926029, "grad_norm": 0.6927811726873004, "learning_rate": 2.962682376338141e-06, "loss": 0.0249, "step": 109235 }, { "epoch": 0.4558085971075932, "grad_norm": 0.9571814369028803, "learning_rate": 2.9626145728000617e-06, "loss": 0.0217, "step": 109240 }, { "epoch": 0.4558294598225835, "grad_norm": 0.6271340022001737, "learning_rate": 2.9625467739169968e-06, "loss": 0.0229, "step": 109245 }, { "epoch": 0.45585032253757374, "grad_norm": 0.6839450274072589, "learning_rate": 2.962478979688413e-06, "loss": 0.0173, "step": 109250 }, { "epoch": 0.45587118525256404, "grad_norm": 0.8781568584577242, "learning_rate": 2.962411190113778e-06, "loss": 0.0207, "step": 109255 }, { "epoch": 0.4558920479675543, "grad_norm": 0.5088797770696355, "learning_rate": 2.9623434051925596e-06, "loss": 0.0323, "step": 109260 }, { "epoch": 0.4559129106825446, "grad_norm": 0.6382217850841426, "learning_rate": 2.9622756249242253e-06, "loss": 0.0281, "step": 109265 }, { "epoch": 0.4559337733975349, "grad_norm": 1.2638939713009516, "learning_rate": 2.9622078493082423e-06, "loss": 0.0339, "step": 109270 }, { "epoch": 0.4559546361125251, "grad_norm": 4.9215489528713485, "learning_rate": 2.96214007834408e-06, "loss": 0.0197, "step": 109275 }, { "epoch": 0.4559754988275154, "grad_norm": 1.2141862320282264, "learning_rate": 2.9620723120312038e-06, "loss": 0.0149, "step": 109280 }, { "epoch": 0.45599636154250567, "grad_norm": 0.8179796113244907, "learning_rate": 2.962004550369084e-06, "loss": 0.0173, "step": 109285 }, { "epoch": 0.45601722425749597, "grad_norm": 0.8311019427414587, "learning_rate": 2.9619367933571874e-06, "loss": 0.0192, "step": 109290 }, { "epoch": 0.45603808697248627, "grad_norm": 0.5055958840855136, "learning_rate": 2.961869040994983e-06, "loss": 0.0242, "step": 109295 }, { "epoch": 0.4560589496874765, "grad_norm": 0.5922411635190382, "learning_rate": 2.961801293281938e-06, "loss": 0.0215, "step": 109300 }, { "epoch": 0.4560798124024668, "grad_norm": 0.9069564858178288, "learning_rate": 2.961733550217521e-06, "loss": 0.0261, "step": 109305 }, { "epoch": 0.4561006751174571, "grad_norm": 0.8002664250749377, "learning_rate": 2.961665811801201e-06, "loss": 0.0273, "step": 109310 }, { "epoch": 0.45612153783244735, "grad_norm": 0.7318893161962859, "learning_rate": 2.961598078032446e-06, "loss": 0.0283, "step": 109315 }, { "epoch": 0.45614240054743765, "grad_norm": 1.4228347940916868, "learning_rate": 2.961530348910725e-06, "loss": 0.0337, "step": 109320 }, { "epoch": 0.4561632632624279, "grad_norm": 1.0271488463926386, "learning_rate": 2.961462624435506e-06, "loss": 0.0269, "step": 109325 }, { "epoch": 0.4561841259774182, "grad_norm": 0.9034201600419942, "learning_rate": 2.9613949046062578e-06, "loss": 0.0258, "step": 109330 }, { "epoch": 0.4562049886924085, "grad_norm": 0.4519096763218742, "learning_rate": 2.96132718942245e-06, "loss": 0.02, "step": 109335 }, { "epoch": 0.45622585140739874, "grad_norm": 0.6261297034216409, "learning_rate": 2.9612594788835508e-06, "loss": 0.0188, "step": 109340 }, { "epoch": 0.45624671412238904, "grad_norm": 1.1345953200092393, "learning_rate": 2.96119177298903e-06, "loss": 0.0274, "step": 109345 }, { "epoch": 0.4562675768373793, "grad_norm": 0.45776083028043363, "learning_rate": 2.961124071738355e-06, "loss": 0.0246, "step": 109350 }, { "epoch": 0.4562884395523696, "grad_norm": 0.6109028421174395, "learning_rate": 2.9610563751309968e-06, "loss": 0.0262, "step": 109355 }, { "epoch": 0.4563093022673599, "grad_norm": 1.0125911450152592, "learning_rate": 2.9609886831664236e-06, "loss": 0.037, "step": 109360 }, { "epoch": 0.45633016498235013, "grad_norm": 0.8328082250491554, "learning_rate": 2.9609209958441047e-06, "loss": 0.0256, "step": 109365 }, { "epoch": 0.45635102769734043, "grad_norm": 0.5256594666413025, "learning_rate": 2.960853313163511e-06, "loss": 0.02, "step": 109370 }, { "epoch": 0.45637189041233067, "grad_norm": 0.9174740371603631, "learning_rate": 2.96078563512411e-06, "loss": 0.0326, "step": 109375 }, { "epoch": 0.45639275312732097, "grad_norm": 0.8990836209877501, "learning_rate": 2.9607179617253716e-06, "loss": 0.0244, "step": 109380 }, { "epoch": 0.45641361584231127, "grad_norm": 0.6204141833100162, "learning_rate": 2.9606502929667674e-06, "loss": 0.023, "step": 109385 }, { "epoch": 0.4564344785573015, "grad_norm": 0.46320258607957615, "learning_rate": 2.960582628847765e-06, "loss": 0.0177, "step": 109390 }, { "epoch": 0.4564553412722918, "grad_norm": 0.8025343312756547, "learning_rate": 2.9605149693678353e-06, "loss": 0.0222, "step": 109395 }, { "epoch": 0.4564762039872821, "grad_norm": 0.7898632999310942, "learning_rate": 2.960447314526448e-06, "loss": 0.0235, "step": 109400 }, { "epoch": 0.45649706670227236, "grad_norm": 0.855402359276067, "learning_rate": 2.960379664323073e-06, "loss": 0.0261, "step": 109405 }, { "epoch": 0.45651792941726266, "grad_norm": 0.7097409590230416, "learning_rate": 2.9603120187571806e-06, "loss": 0.0273, "step": 109410 }, { "epoch": 0.4565387921322529, "grad_norm": 0.9121640635211732, "learning_rate": 2.9602443778282407e-06, "loss": 0.0346, "step": 109415 }, { "epoch": 0.4565596548472432, "grad_norm": 0.36341727703300913, "learning_rate": 2.960176741535724e-06, "loss": 0.0195, "step": 109420 }, { "epoch": 0.4565805175622335, "grad_norm": 0.6955774252595172, "learning_rate": 2.9601091098791e-06, "loss": 0.0196, "step": 109425 }, { "epoch": 0.45660138027722375, "grad_norm": 0.5673688983883143, "learning_rate": 2.9600414828578406e-06, "loss": 0.0209, "step": 109430 }, { "epoch": 0.45662224299221404, "grad_norm": 0.49196259680901383, "learning_rate": 2.959973860471415e-06, "loss": 0.0207, "step": 109435 }, { "epoch": 0.4566431057072043, "grad_norm": 0.9061522018908774, "learning_rate": 2.9599062427192943e-06, "loss": 0.0277, "step": 109440 }, { "epoch": 0.4566639684221946, "grad_norm": 0.4562411290134173, "learning_rate": 2.959838629600949e-06, "loss": 0.0269, "step": 109445 }, { "epoch": 0.4566848311371849, "grad_norm": 0.7731919891958688, "learning_rate": 2.9597710211158502e-06, "loss": 0.0265, "step": 109450 }, { "epoch": 0.45670569385217513, "grad_norm": 0.6283496841270511, "learning_rate": 2.9597034172634694e-06, "loss": 0.0305, "step": 109455 }, { "epoch": 0.45672655656716543, "grad_norm": 0.6434080179943424, "learning_rate": 2.9596358180432755e-06, "loss": 0.0271, "step": 109460 }, { "epoch": 0.4567474192821557, "grad_norm": 0.3812503874142317, "learning_rate": 2.959568223454741e-06, "loss": 0.0203, "step": 109465 }, { "epoch": 0.456768281997146, "grad_norm": 0.3751910666226886, "learning_rate": 2.9595006334973374e-06, "loss": 0.0166, "step": 109470 }, { "epoch": 0.4567891447121363, "grad_norm": 0.9657262859985105, "learning_rate": 2.959433048170535e-06, "loss": 0.0319, "step": 109475 }, { "epoch": 0.4568100074271265, "grad_norm": 0.6595420090521757, "learning_rate": 2.9593654674738054e-06, "loss": 0.0207, "step": 109480 }, { "epoch": 0.4568308701421168, "grad_norm": 0.6549981739531486, "learning_rate": 2.95929789140662e-06, "loss": 0.0215, "step": 109485 }, { "epoch": 0.4568517328571071, "grad_norm": 1.1555422502122574, "learning_rate": 2.9592303199684504e-06, "loss": 0.0233, "step": 109490 }, { "epoch": 0.45687259557209736, "grad_norm": 1.4230473584823566, "learning_rate": 2.959162753158767e-06, "loss": 0.0272, "step": 109495 }, { "epoch": 0.45689345828708766, "grad_norm": 0.8574495877446379, "learning_rate": 2.959095190977043e-06, "loss": 0.0214, "step": 109500 }, { "epoch": 0.4569143210020779, "grad_norm": 0.7679037002388232, "learning_rate": 2.9590276334227496e-06, "loss": 0.026, "step": 109505 }, { "epoch": 0.4569351837170682, "grad_norm": 0.7114505448731977, "learning_rate": 2.9589600804953584e-06, "loss": 0.0315, "step": 109510 }, { "epoch": 0.4569560464320585, "grad_norm": 0.5367580089075968, "learning_rate": 2.9588925321943417e-06, "loss": 0.0304, "step": 109515 }, { "epoch": 0.45697690914704875, "grad_norm": 0.39732008358917054, "learning_rate": 2.9588249885191706e-06, "loss": 0.0259, "step": 109520 }, { "epoch": 0.45699777186203905, "grad_norm": 0.3314773720458409, "learning_rate": 2.958757449469318e-06, "loss": 0.0266, "step": 109525 }, { "epoch": 0.4570186345770293, "grad_norm": 0.9042583918173679, "learning_rate": 2.9586899150442554e-06, "loss": 0.0205, "step": 109530 }, { "epoch": 0.4570394972920196, "grad_norm": 0.4884450019807349, "learning_rate": 2.958622385243456e-06, "loss": 0.0311, "step": 109535 }, { "epoch": 0.4570603600070099, "grad_norm": 0.7112080859784873, "learning_rate": 2.9585548600663905e-06, "loss": 0.0268, "step": 109540 }, { "epoch": 0.45708122272200014, "grad_norm": 0.8586172240466798, "learning_rate": 2.9584873395125325e-06, "loss": 0.0243, "step": 109545 }, { "epoch": 0.45710208543699044, "grad_norm": 0.40131207359425886, "learning_rate": 2.958419823581354e-06, "loss": 0.0246, "step": 109550 }, { "epoch": 0.4571229481519807, "grad_norm": 0.9970302894542682, "learning_rate": 2.958352312272328e-06, "loss": 0.0211, "step": 109555 }, { "epoch": 0.457143810866971, "grad_norm": 0.8685047337051248, "learning_rate": 2.9582848055849266e-06, "loss": 0.0293, "step": 109560 }, { "epoch": 0.4571646735819613, "grad_norm": 0.6029844313876186, "learning_rate": 2.9582173035186227e-06, "loss": 0.0219, "step": 109565 }, { "epoch": 0.4571855362969515, "grad_norm": 0.5687902930812551, "learning_rate": 2.95814980607289e-06, "loss": 0.0411, "step": 109570 }, { "epoch": 0.4572063990119418, "grad_norm": 1.2651579008675462, "learning_rate": 2.958082313247199e-06, "loss": 0.0286, "step": 109575 }, { "epoch": 0.4572272617269321, "grad_norm": 0.4029142535299093, "learning_rate": 2.958014825041025e-06, "loss": 0.023, "step": 109580 }, { "epoch": 0.45724812444192237, "grad_norm": 0.8651795079370417, "learning_rate": 2.957947341453841e-06, "loss": 0.0324, "step": 109585 }, { "epoch": 0.45726898715691267, "grad_norm": 0.3661422224612558, "learning_rate": 2.957879862485118e-06, "loss": 0.0223, "step": 109590 }, { "epoch": 0.4572898498719029, "grad_norm": 0.7906045789014808, "learning_rate": 2.9578123881343317e-06, "loss": 0.0219, "step": 109595 }, { "epoch": 0.4573107125868932, "grad_norm": 0.7027055443410869, "learning_rate": 2.957744918400954e-06, "loss": 0.0202, "step": 109600 }, { "epoch": 0.4573315753018835, "grad_norm": 0.8189756607265136, "learning_rate": 2.9576774532844586e-06, "loss": 0.0275, "step": 109605 }, { "epoch": 0.45735243801687375, "grad_norm": 0.5797270669878956, "learning_rate": 2.9576099927843187e-06, "loss": 0.02, "step": 109610 }, { "epoch": 0.45737330073186405, "grad_norm": 0.974138119595064, "learning_rate": 2.9575425369000087e-06, "loss": 0.0268, "step": 109615 }, { "epoch": 0.4573941634468543, "grad_norm": 0.6971679691151446, "learning_rate": 2.9574750856310012e-06, "loss": 0.0207, "step": 109620 }, { "epoch": 0.4574150261618446, "grad_norm": 0.6708970505412712, "learning_rate": 2.9574076389767707e-06, "loss": 0.0227, "step": 109625 }, { "epoch": 0.4574358888768349, "grad_norm": 0.7204526073141779, "learning_rate": 2.9573401969367915e-06, "loss": 0.0247, "step": 109630 }, { "epoch": 0.45745675159182514, "grad_norm": 0.9166136355376853, "learning_rate": 2.957272759510536e-06, "loss": 0.0272, "step": 109635 }, { "epoch": 0.45747761430681544, "grad_norm": 1.0433929302819722, "learning_rate": 2.9572053266974786e-06, "loss": 0.0293, "step": 109640 }, { "epoch": 0.4574984770218057, "grad_norm": 0.5232838050979168, "learning_rate": 2.9571378984970945e-06, "loss": 0.022, "step": 109645 }, { "epoch": 0.457519339736796, "grad_norm": 0.5947114100193782, "learning_rate": 2.9570704749088565e-06, "loss": 0.0214, "step": 109650 }, { "epoch": 0.4575402024517863, "grad_norm": 0.7416001784526774, "learning_rate": 2.9570030559322394e-06, "loss": 0.0242, "step": 109655 }, { "epoch": 0.4575610651667765, "grad_norm": 1.983089048273903, "learning_rate": 2.9569356415667184e-06, "loss": 0.0263, "step": 109660 }, { "epoch": 0.4575819278817668, "grad_norm": 0.7895374529880682, "learning_rate": 2.956868231811766e-06, "loss": 0.02, "step": 109665 }, { "epoch": 0.4576027905967571, "grad_norm": 0.5143576091151991, "learning_rate": 2.9568008266668572e-06, "loss": 0.0204, "step": 109670 }, { "epoch": 0.45762365331174737, "grad_norm": 0.9179446888166978, "learning_rate": 2.9567334261314683e-06, "loss": 0.0285, "step": 109675 }, { "epoch": 0.45764451602673767, "grad_norm": 0.42622386965304093, "learning_rate": 2.9566660302050724e-06, "loss": 0.0426, "step": 109680 }, { "epoch": 0.4576653787417279, "grad_norm": 0.958471980058836, "learning_rate": 2.9565986388871443e-06, "loss": 0.0244, "step": 109685 }, { "epoch": 0.4576862414567182, "grad_norm": 0.9291480532015999, "learning_rate": 2.956531252177159e-06, "loss": 0.0291, "step": 109690 }, { "epoch": 0.4577071041717085, "grad_norm": 0.666259184349344, "learning_rate": 2.9564638700745914e-06, "loss": 0.0281, "step": 109695 }, { "epoch": 0.45772796688669876, "grad_norm": 0.5800314414835533, "learning_rate": 2.9563964925789167e-06, "loss": 0.0282, "step": 109700 }, { "epoch": 0.45774882960168906, "grad_norm": 0.7759394904642587, "learning_rate": 2.95632911968961e-06, "loss": 0.0297, "step": 109705 }, { "epoch": 0.4577696923166793, "grad_norm": 0.3958766422627121, "learning_rate": 2.9562617514061458e-06, "loss": 0.0151, "step": 109710 }, { "epoch": 0.4577905550316696, "grad_norm": 0.8015449088940669, "learning_rate": 2.956194387728001e-06, "loss": 0.0221, "step": 109715 }, { "epoch": 0.4578114177466599, "grad_norm": 1.1330968251618463, "learning_rate": 2.9561270286546482e-06, "loss": 0.0266, "step": 109720 }, { "epoch": 0.45783228046165014, "grad_norm": 0.511725841759017, "learning_rate": 2.956059674185565e-06, "loss": 0.0205, "step": 109725 }, { "epoch": 0.45785314317664044, "grad_norm": 0.3358242756323474, "learning_rate": 2.9559923243202266e-06, "loss": 0.019, "step": 109730 }, { "epoch": 0.4578740058916307, "grad_norm": 1.063455547980551, "learning_rate": 2.9559249790581073e-06, "loss": 0.0234, "step": 109735 }, { "epoch": 0.457894868606621, "grad_norm": 0.4830035938160354, "learning_rate": 2.9558576383986847e-06, "loss": 0.0178, "step": 109740 }, { "epoch": 0.4579157313216113, "grad_norm": 1.1174986166502416, "learning_rate": 2.955790302341433e-06, "loss": 0.0275, "step": 109745 }, { "epoch": 0.45793659403660153, "grad_norm": 0.5473623517303008, "learning_rate": 2.9557229708858284e-06, "loss": 0.0264, "step": 109750 }, { "epoch": 0.45795745675159183, "grad_norm": 0.6000208472198887, "learning_rate": 2.9556556440313474e-06, "loss": 0.0389, "step": 109755 }, { "epoch": 0.45797831946658213, "grad_norm": 0.9136867731624638, "learning_rate": 2.955588321777465e-06, "loss": 0.0215, "step": 109760 }, { "epoch": 0.4579991821815724, "grad_norm": 1.0901434009783177, "learning_rate": 2.955521004123658e-06, "loss": 0.0221, "step": 109765 }, { "epoch": 0.4580200448965627, "grad_norm": 0.655037862650592, "learning_rate": 2.9554536910694027e-06, "loss": 0.0309, "step": 109770 }, { "epoch": 0.4580409076115529, "grad_norm": 1.4220577196932376, "learning_rate": 2.9553863826141743e-06, "loss": 0.0287, "step": 109775 }, { "epoch": 0.4580617703265432, "grad_norm": 0.7417883350651834, "learning_rate": 2.9553190787574506e-06, "loss": 0.0244, "step": 109780 }, { "epoch": 0.4580826330415335, "grad_norm": 0.7576158520179287, "learning_rate": 2.9552517794987067e-06, "loss": 0.0352, "step": 109785 }, { "epoch": 0.45810349575652376, "grad_norm": 0.6857806355211747, "learning_rate": 2.9551844848374197e-06, "loss": 0.0193, "step": 109790 }, { "epoch": 0.45812435847151406, "grad_norm": 0.4646097189531523, "learning_rate": 2.955117194773066e-06, "loss": 0.0196, "step": 109795 }, { "epoch": 0.4581452211865043, "grad_norm": 0.8324406325252458, "learning_rate": 2.9550499093051226e-06, "loss": 0.0217, "step": 109800 }, { "epoch": 0.4581660839014946, "grad_norm": 0.7804219510458192, "learning_rate": 2.954982628433066e-06, "loss": 0.0198, "step": 109805 }, { "epoch": 0.4581869466164849, "grad_norm": 0.6511066386121196, "learning_rate": 2.954915352156373e-06, "loss": 0.0382, "step": 109810 }, { "epoch": 0.45820780933147515, "grad_norm": 0.9330306314449744, "learning_rate": 2.9548480804745206e-06, "loss": 0.0208, "step": 109815 }, { "epoch": 0.45822867204646545, "grad_norm": 0.724395119492476, "learning_rate": 2.9547808133869855e-06, "loss": 0.0348, "step": 109820 }, { "epoch": 0.4582495347614557, "grad_norm": 1.280492347273195, "learning_rate": 2.954713550893245e-06, "loss": 0.0279, "step": 109825 }, { "epoch": 0.458270397476446, "grad_norm": 0.5684500643618847, "learning_rate": 2.9546462929927767e-06, "loss": 0.0202, "step": 109830 }, { "epoch": 0.4582912601914363, "grad_norm": 1.0641285879979179, "learning_rate": 2.9545790396850573e-06, "loss": 0.0231, "step": 109835 }, { "epoch": 0.45831212290642653, "grad_norm": 0.31474027335530375, "learning_rate": 2.9545117909695635e-06, "loss": 0.046, "step": 109840 }, { "epoch": 0.45833298562141683, "grad_norm": 0.4957026116071696, "learning_rate": 2.954444546845774e-06, "loss": 0.0222, "step": 109845 }, { "epoch": 0.45835384833640713, "grad_norm": 0.7432880793939922, "learning_rate": 2.954377307313166e-06, "loss": 0.0284, "step": 109850 }, { "epoch": 0.4583747110513974, "grad_norm": 0.7283588381664202, "learning_rate": 2.9543100723712164e-06, "loss": 0.0279, "step": 109855 }, { "epoch": 0.4583955737663877, "grad_norm": 0.8273148576376093, "learning_rate": 2.954242842019403e-06, "loss": 0.0234, "step": 109860 }, { "epoch": 0.4584164364813779, "grad_norm": 0.5922677709716448, "learning_rate": 2.954175616257205e-06, "loss": 0.0236, "step": 109865 }, { "epoch": 0.4584372991963682, "grad_norm": 0.5321307623000457, "learning_rate": 2.9541083950840975e-06, "loss": 0.022, "step": 109870 }, { "epoch": 0.4584581619113585, "grad_norm": 1.013872555460549, "learning_rate": 2.954041178499561e-06, "loss": 0.0247, "step": 109875 }, { "epoch": 0.45847902462634876, "grad_norm": 0.8220509849271523, "learning_rate": 2.953973966503072e-06, "loss": 0.0235, "step": 109880 }, { "epoch": 0.45849988734133906, "grad_norm": 0.7559312543281088, "learning_rate": 2.9539067590941094e-06, "loss": 0.0238, "step": 109885 }, { "epoch": 0.4585207500563293, "grad_norm": 0.4095264861348074, "learning_rate": 2.9538395562721507e-06, "loss": 0.0316, "step": 109890 }, { "epoch": 0.4585416127713196, "grad_norm": 0.4377508520135271, "learning_rate": 2.9537723580366743e-06, "loss": 0.0233, "step": 109895 }, { "epoch": 0.4585624754863099, "grad_norm": 0.3683118222647498, "learning_rate": 2.9537051643871587e-06, "loss": 0.0249, "step": 109900 }, { "epoch": 0.45858333820130015, "grad_norm": 0.7848605790303363, "learning_rate": 2.9536379753230827e-06, "loss": 0.0282, "step": 109905 }, { "epoch": 0.45860420091629045, "grad_norm": 0.743766635531635, "learning_rate": 2.9535707908439236e-06, "loss": 0.022, "step": 109910 }, { "epoch": 0.4586250636312807, "grad_norm": 0.9033278679335366, "learning_rate": 2.953503610949161e-06, "loss": 0.0269, "step": 109915 }, { "epoch": 0.458645926346271, "grad_norm": 0.5254635873104486, "learning_rate": 2.9534364356382733e-06, "loss": 0.0186, "step": 109920 }, { "epoch": 0.4586667890612613, "grad_norm": 2.299185947226444, "learning_rate": 2.9533692649107394e-06, "loss": 0.0226, "step": 109925 }, { "epoch": 0.45868765177625154, "grad_norm": 0.9242715482789886, "learning_rate": 2.9533020987660375e-06, "loss": 0.0213, "step": 109930 }, { "epoch": 0.45870851449124184, "grad_norm": 0.7929436588858232, "learning_rate": 2.9532349372036474e-06, "loss": 0.0277, "step": 109935 }, { "epoch": 0.45872937720623214, "grad_norm": 0.5083414768241199, "learning_rate": 2.953167780223047e-06, "loss": 0.021, "step": 109940 }, { "epoch": 0.4587502399212224, "grad_norm": 0.6744566759187457, "learning_rate": 2.9531006278237167e-06, "loss": 0.0286, "step": 109945 }, { "epoch": 0.4587711026362127, "grad_norm": 0.8730453169570456, "learning_rate": 2.953033480005134e-06, "loss": 0.0248, "step": 109950 }, { "epoch": 0.4587919653512029, "grad_norm": 0.341398397519088, "learning_rate": 2.9529663367667798e-06, "loss": 0.0255, "step": 109955 }, { "epoch": 0.4588128280661932, "grad_norm": 0.6103708350623909, "learning_rate": 2.952899198108133e-06, "loss": 0.0273, "step": 109960 }, { "epoch": 0.4588336907811835, "grad_norm": 1.0245959516367877, "learning_rate": 2.952832064028672e-06, "loss": 0.0278, "step": 109965 }, { "epoch": 0.45885455349617377, "grad_norm": 0.53969608802401, "learning_rate": 2.952764934527877e-06, "loss": 0.0231, "step": 109970 }, { "epoch": 0.45887541621116407, "grad_norm": 0.5005292033424495, "learning_rate": 2.9526978096052276e-06, "loss": 0.0272, "step": 109975 }, { "epoch": 0.4588962789261543, "grad_norm": 0.8868470866974208, "learning_rate": 2.952630689260204e-06, "loss": 0.0226, "step": 109980 }, { "epoch": 0.4589171416411446, "grad_norm": 0.22795726666471522, "learning_rate": 2.9525635734922848e-06, "loss": 0.0177, "step": 109985 }, { "epoch": 0.4589380043561349, "grad_norm": 0.7946147816493238, "learning_rate": 2.9524964623009506e-06, "loss": 0.022, "step": 109990 }, { "epoch": 0.45895886707112515, "grad_norm": 0.795933634484529, "learning_rate": 2.952429355685681e-06, "loss": 0.0334, "step": 109995 }, { "epoch": 0.45897972978611545, "grad_norm": 0.7855845144668039, "learning_rate": 2.9523622536459554e-06, "loss": 0.0332, "step": 110000 }, { "epoch": 0.4590005925011057, "grad_norm": 0.524139065107165, "learning_rate": 2.952295156181255e-06, "loss": 0.0218, "step": 110005 }, { "epoch": 0.459021455216096, "grad_norm": 0.6267949516759874, "learning_rate": 2.95222806329106e-06, "loss": 0.0232, "step": 110010 }, { "epoch": 0.4590423179310863, "grad_norm": 0.5205188454960141, "learning_rate": 2.9521609749748493e-06, "loss": 0.0238, "step": 110015 }, { "epoch": 0.45906318064607654, "grad_norm": 0.8246690775042301, "learning_rate": 2.9520938912321044e-06, "loss": 0.0271, "step": 110020 }, { "epoch": 0.45908404336106684, "grad_norm": 0.9636530465295898, "learning_rate": 2.9520268120623053e-06, "loss": 0.0289, "step": 110025 }, { "epoch": 0.45910490607605714, "grad_norm": 0.45665062203371726, "learning_rate": 2.9519597374649324e-06, "loss": 0.0291, "step": 110030 }, { "epoch": 0.4591257687910474, "grad_norm": 0.5569339207091495, "learning_rate": 2.951892667439466e-06, "loss": 0.0239, "step": 110035 }, { "epoch": 0.4591466315060377, "grad_norm": 0.6802375958203047, "learning_rate": 2.9518256019853876e-06, "loss": 0.0247, "step": 110040 }, { "epoch": 0.4591674942210279, "grad_norm": 0.7892383643029287, "learning_rate": 2.9517585411021773e-06, "loss": 0.0223, "step": 110045 }, { "epoch": 0.4591883569360182, "grad_norm": 0.7920555661715658, "learning_rate": 2.9516914847893153e-06, "loss": 0.0238, "step": 110050 }, { "epoch": 0.4592092196510085, "grad_norm": 2.6908323415061615, "learning_rate": 2.9516244330462834e-06, "loss": 0.0447, "step": 110055 }, { "epoch": 0.45923008236599877, "grad_norm": 0.8067134319014817, "learning_rate": 2.9515573858725627e-06, "loss": 0.0212, "step": 110060 }, { "epoch": 0.45925094508098907, "grad_norm": 0.6432042425168338, "learning_rate": 2.951490343267634e-06, "loss": 0.0297, "step": 110065 }, { "epoch": 0.4592718077959793, "grad_norm": 0.8614279364695075, "learning_rate": 2.951423305230978e-06, "loss": 0.0277, "step": 110070 }, { "epoch": 0.4592926705109696, "grad_norm": 0.9628064342514322, "learning_rate": 2.9513562717620765e-06, "loss": 0.025, "step": 110075 }, { "epoch": 0.4593135332259599, "grad_norm": 0.5611428834950496, "learning_rate": 2.9512892428604107e-06, "loss": 0.0214, "step": 110080 }, { "epoch": 0.45933439594095016, "grad_norm": 1.119255856191062, "learning_rate": 2.9512222185254617e-06, "loss": 0.0292, "step": 110085 }, { "epoch": 0.45935525865594046, "grad_norm": 0.8713931255626346, "learning_rate": 2.951155198756711e-06, "loss": 0.0335, "step": 110090 }, { "epoch": 0.4593761213709307, "grad_norm": 0.6174714925914626, "learning_rate": 2.9510881835536402e-06, "loss": 0.0224, "step": 110095 }, { "epoch": 0.459396984085921, "grad_norm": 0.8693488368280737, "learning_rate": 2.951021172915732e-06, "loss": 0.0339, "step": 110100 }, { "epoch": 0.4594178468009113, "grad_norm": 0.6389614252138327, "learning_rate": 2.9509541668424663e-06, "loss": 0.0299, "step": 110105 }, { "epoch": 0.45943870951590154, "grad_norm": 0.7771102858773529, "learning_rate": 2.950887165333326e-06, "loss": 0.026, "step": 110110 }, { "epoch": 0.45945957223089184, "grad_norm": 0.819791254388778, "learning_rate": 2.9508201683877925e-06, "loss": 0.032, "step": 110115 }, { "epoch": 0.45948043494588214, "grad_norm": 0.5647483185787106, "learning_rate": 2.9507531760053475e-06, "loss": 0.0249, "step": 110120 }, { "epoch": 0.4595012976608724, "grad_norm": 0.7029219405323225, "learning_rate": 2.9506861881854746e-06, "loss": 0.0201, "step": 110125 }, { "epoch": 0.4595221603758627, "grad_norm": 0.708945507899846, "learning_rate": 2.950619204927654e-06, "loss": 0.0192, "step": 110130 }, { "epoch": 0.45954302309085293, "grad_norm": 0.8523330688211715, "learning_rate": 2.950552226231369e-06, "loss": 0.0305, "step": 110135 }, { "epoch": 0.45956388580584323, "grad_norm": 0.49970787484448964, "learning_rate": 2.9504852520961025e-06, "loss": 0.0278, "step": 110140 }, { "epoch": 0.45958474852083353, "grad_norm": 0.4243493450069825, "learning_rate": 2.950418282521335e-06, "loss": 0.0266, "step": 110145 }, { "epoch": 0.4596056112358238, "grad_norm": 0.9930786422208141, "learning_rate": 2.9503513175065503e-06, "loss": 0.0346, "step": 110150 }, { "epoch": 0.4596264739508141, "grad_norm": 0.543036881165392, "learning_rate": 2.95028435705123e-06, "loss": 0.0215, "step": 110155 }, { "epoch": 0.4596473366658043, "grad_norm": 1.3656403278643914, "learning_rate": 2.9502174011548584e-06, "loss": 0.0381, "step": 110160 }, { "epoch": 0.4596681993807946, "grad_norm": 1.1213148728749034, "learning_rate": 2.9501504498169166e-06, "loss": 0.0343, "step": 110165 }, { "epoch": 0.4596890620957849, "grad_norm": 0.8898374856944062, "learning_rate": 2.9500835030368886e-06, "loss": 0.0289, "step": 110170 }, { "epoch": 0.45970992481077516, "grad_norm": 0.4743505821647132, "learning_rate": 2.9500165608142554e-06, "loss": 0.0255, "step": 110175 }, { "epoch": 0.45973078752576546, "grad_norm": 0.7484500087561492, "learning_rate": 2.949949623148502e-06, "loss": 0.0254, "step": 110180 }, { "epoch": 0.4597516502407557, "grad_norm": 0.7873237719633822, "learning_rate": 2.9498826900391105e-06, "loss": 0.0229, "step": 110185 }, { "epoch": 0.459772512955746, "grad_norm": 0.3682074571622376, "learning_rate": 2.9498157614855638e-06, "loss": 0.0191, "step": 110190 }, { "epoch": 0.4597933756707363, "grad_norm": 0.4351422032941349, "learning_rate": 2.949748837487346e-06, "loss": 0.0319, "step": 110195 }, { "epoch": 0.45981423838572655, "grad_norm": 0.7655794985275657, "learning_rate": 2.949681918043939e-06, "loss": 0.0263, "step": 110200 }, { "epoch": 0.45983510110071685, "grad_norm": 1.1091365733823642, "learning_rate": 2.949615003154827e-06, "loss": 0.0237, "step": 110205 }, { "epoch": 0.45985596381570715, "grad_norm": 0.7899296682133952, "learning_rate": 2.9495480928194936e-06, "loss": 0.0212, "step": 110210 }, { "epoch": 0.4598768265306974, "grad_norm": 0.9650021172456074, "learning_rate": 2.9494811870374223e-06, "loss": 0.0195, "step": 110215 }, { "epoch": 0.4598976892456877, "grad_norm": 1.3363661489973293, "learning_rate": 2.9494142858080967e-06, "loss": 0.0316, "step": 110220 }, { "epoch": 0.45991855196067793, "grad_norm": 1.0790614537753036, "learning_rate": 2.9493473891309993e-06, "loss": 0.0209, "step": 110225 }, { "epoch": 0.45993941467566823, "grad_norm": 0.7013980207867871, "learning_rate": 2.9492804970056156e-06, "loss": 0.0277, "step": 110230 }, { "epoch": 0.45996027739065853, "grad_norm": 0.9047343430515113, "learning_rate": 2.949213609431429e-06, "loss": 0.0224, "step": 110235 }, { "epoch": 0.4599811401056488, "grad_norm": 0.9980780108087862, "learning_rate": 2.9491467264079228e-06, "loss": 0.0332, "step": 110240 }, { "epoch": 0.4600020028206391, "grad_norm": 0.7587331309749888, "learning_rate": 2.949079847934581e-06, "loss": 0.0265, "step": 110245 }, { "epoch": 0.4600228655356293, "grad_norm": 0.31791997903696856, "learning_rate": 2.9490129740108885e-06, "loss": 0.0227, "step": 110250 }, { "epoch": 0.4600437282506196, "grad_norm": 0.5427793154745835, "learning_rate": 2.9489461046363286e-06, "loss": 0.0271, "step": 110255 }, { "epoch": 0.4600645909656099, "grad_norm": 0.4087817921463841, "learning_rate": 2.9488792398103865e-06, "loss": 0.0289, "step": 110260 }, { "epoch": 0.46008545368060016, "grad_norm": 0.749436885924713, "learning_rate": 2.948812379532546e-06, "loss": 0.0217, "step": 110265 }, { "epoch": 0.46010631639559046, "grad_norm": 0.5091007491616816, "learning_rate": 2.948745523802291e-06, "loss": 0.0205, "step": 110270 }, { "epoch": 0.4601271791105807, "grad_norm": 0.5155041457790511, "learning_rate": 2.948678672619107e-06, "loss": 0.023, "step": 110275 }, { "epoch": 0.460148041825571, "grad_norm": 0.5762353385728232, "learning_rate": 2.9486118259824786e-06, "loss": 0.0374, "step": 110280 }, { "epoch": 0.4601689045405613, "grad_norm": 0.94091654436494, "learning_rate": 2.9485449838918893e-06, "loss": 0.0311, "step": 110285 }, { "epoch": 0.46018976725555155, "grad_norm": 0.570785147605244, "learning_rate": 2.948478146346825e-06, "loss": 0.0222, "step": 110290 }, { "epoch": 0.46021062997054185, "grad_norm": 1.4588343054779214, "learning_rate": 2.94841131334677e-06, "loss": 0.0196, "step": 110295 }, { "epoch": 0.46023149268553215, "grad_norm": 0.36480201591230665, "learning_rate": 2.948344484891209e-06, "loss": 0.0224, "step": 110300 }, { "epoch": 0.4602523554005224, "grad_norm": 0.9077192768044495, "learning_rate": 2.9482776609796284e-06, "loss": 0.0295, "step": 110305 }, { "epoch": 0.4602732181155127, "grad_norm": 0.7289567983427406, "learning_rate": 2.9482108416115106e-06, "loss": 0.0247, "step": 110310 }, { "epoch": 0.46029408083050294, "grad_norm": 2.3459836878453757, "learning_rate": 2.948144026786344e-06, "loss": 0.041, "step": 110315 }, { "epoch": 0.46031494354549324, "grad_norm": 0.7608535274329937, "learning_rate": 2.9480772165036116e-06, "loss": 0.0189, "step": 110320 }, { "epoch": 0.46033580626048354, "grad_norm": 1.8212450864097167, "learning_rate": 2.9480104107627987e-06, "loss": 0.0224, "step": 110325 }, { "epoch": 0.4603566689754738, "grad_norm": 0.5787440735925229, "learning_rate": 2.9479436095633924e-06, "loss": 0.0254, "step": 110330 }, { "epoch": 0.4603775316904641, "grad_norm": 1.136127260678578, "learning_rate": 2.9478768129048768e-06, "loss": 0.0281, "step": 110335 }, { "epoch": 0.4603983944054543, "grad_norm": 1.2892381383480827, "learning_rate": 2.9478100207867372e-06, "loss": 0.0315, "step": 110340 }, { "epoch": 0.4604192571204446, "grad_norm": 0.644752712913048, "learning_rate": 2.9477432332084605e-06, "loss": 0.0266, "step": 110345 }, { "epoch": 0.4604401198354349, "grad_norm": 0.8182802691717055, "learning_rate": 2.9476764501695316e-06, "loss": 0.0295, "step": 110350 }, { "epoch": 0.46046098255042517, "grad_norm": 1.0876474776452798, "learning_rate": 2.9476096716694364e-06, "loss": 0.0295, "step": 110355 }, { "epoch": 0.46048184526541547, "grad_norm": 0.6829526137236627, "learning_rate": 2.9475428977076607e-06, "loss": 0.0283, "step": 110360 }, { "epoch": 0.4605027079804057, "grad_norm": 0.5598704171714358, "learning_rate": 2.9474761282836916e-06, "loss": 0.0229, "step": 110365 }, { "epoch": 0.460523570695396, "grad_norm": 0.7809726789673295, "learning_rate": 2.947409363397013e-06, "loss": 0.0256, "step": 110370 }, { "epoch": 0.4605444334103863, "grad_norm": 0.9155305601209865, "learning_rate": 2.9473426030471134e-06, "loss": 0.0261, "step": 110375 }, { "epoch": 0.46056529612537656, "grad_norm": 0.7433410776343152, "learning_rate": 2.9472758472334773e-06, "loss": 0.0218, "step": 110380 }, { "epoch": 0.46058615884036685, "grad_norm": 0.48747813261659845, "learning_rate": 2.9472090959555916e-06, "loss": 0.0252, "step": 110385 }, { "epoch": 0.46060702155535715, "grad_norm": 0.49224246059771537, "learning_rate": 2.947142349212943e-06, "loss": 0.025, "step": 110390 }, { "epoch": 0.4606278842703474, "grad_norm": 0.723418207103249, "learning_rate": 2.9470756070050172e-06, "loss": 0.0213, "step": 110395 }, { "epoch": 0.4606487469853377, "grad_norm": 1.494103093330469, "learning_rate": 2.9470088693313015e-06, "loss": 0.0296, "step": 110400 }, { "epoch": 0.46066960970032794, "grad_norm": 0.7290343423459551, "learning_rate": 2.946942136191282e-06, "loss": 0.0289, "step": 110405 }, { "epoch": 0.46069047241531824, "grad_norm": 0.33100398452455987, "learning_rate": 2.946875407584446e-06, "loss": 0.0193, "step": 110410 }, { "epoch": 0.46071133513030854, "grad_norm": 0.6126842258444405, "learning_rate": 2.9468086835102795e-06, "loss": 0.0292, "step": 110415 }, { "epoch": 0.4607321978452988, "grad_norm": 1.2361327537459423, "learning_rate": 2.9467419639682704e-06, "loss": 0.023, "step": 110420 }, { "epoch": 0.4607530605602891, "grad_norm": 0.5754606290733213, "learning_rate": 2.946675248957904e-06, "loss": 0.0391, "step": 110425 }, { "epoch": 0.46077392327527933, "grad_norm": 1.2570804472918975, "learning_rate": 2.9466085384786695e-06, "loss": 0.0355, "step": 110430 }, { "epoch": 0.46079478599026963, "grad_norm": 0.7196662735034027, "learning_rate": 2.9465418325300522e-06, "loss": 0.0209, "step": 110435 }, { "epoch": 0.46081564870525993, "grad_norm": 0.6763619830116037, "learning_rate": 2.9464751311115404e-06, "loss": 0.0218, "step": 110440 }, { "epoch": 0.46083651142025017, "grad_norm": 0.8723117547190258, "learning_rate": 2.9464084342226208e-06, "loss": 0.0296, "step": 110445 }, { "epoch": 0.46085737413524047, "grad_norm": 1.486763011422208, "learning_rate": 2.946341741862781e-06, "loss": 0.0227, "step": 110450 }, { "epoch": 0.4608782368502307, "grad_norm": 0.6331168772372492, "learning_rate": 2.9462750540315083e-06, "loss": 0.0207, "step": 110455 }, { "epoch": 0.460899099565221, "grad_norm": 0.5325538041074627, "learning_rate": 2.9462083707282903e-06, "loss": 0.0251, "step": 110460 }, { "epoch": 0.4609199622802113, "grad_norm": 1.0689534746519187, "learning_rate": 2.946141691952615e-06, "loss": 0.0239, "step": 110465 }, { "epoch": 0.46094082499520156, "grad_norm": 0.6304644138907717, "learning_rate": 2.9460750177039686e-06, "loss": 0.0256, "step": 110470 }, { "epoch": 0.46096168771019186, "grad_norm": 0.5527972203969798, "learning_rate": 2.9460083479818414e-06, "loss": 0.0225, "step": 110475 }, { "epoch": 0.46098255042518216, "grad_norm": 1.9434129993968157, "learning_rate": 2.9459416827857186e-06, "loss": 0.0263, "step": 110480 }, { "epoch": 0.4610034131401724, "grad_norm": 1.8774044831617551, "learning_rate": 2.94587502211509e-06, "loss": 0.0196, "step": 110485 }, { "epoch": 0.4610242758551627, "grad_norm": 0.5446682060445036, "learning_rate": 2.945808365969443e-06, "loss": 0.0285, "step": 110490 }, { "epoch": 0.46104513857015295, "grad_norm": 0.6957911702851155, "learning_rate": 2.945741714348265e-06, "loss": 0.0219, "step": 110495 }, { "epoch": 0.46106600128514325, "grad_norm": 0.6009501734461866, "learning_rate": 2.945675067251046e-06, "loss": 0.0205, "step": 110500 }, { "epoch": 0.46108686400013355, "grad_norm": 1.641121976629863, "learning_rate": 2.945608424677272e-06, "loss": 0.0275, "step": 110505 }, { "epoch": 0.4611077267151238, "grad_norm": 0.9527445129775476, "learning_rate": 2.9455417866264323e-06, "loss": 0.0204, "step": 110510 }, { "epoch": 0.4611285894301141, "grad_norm": 0.7293662976451278, "learning_rate": 2.9454751530980165e-06, "loss": 0.0182, "step": 110515 }, { "epoch": 0.46114945214510433, "grad_norm": 1.0020735027985486, "learning_rate": 2.945408524091511e-06, "loss": 0.0323, "step": 110520 }, { "epoch": 0.46117031486009463, "grad_norm": 0.5333083597421759, "learning_rate": 2.9453418996064058e-06, "loss": 0.0232, "step": 110525 }, { "epoch": 0.46119117757508493, "grad_norm": 0.6022828357953541, "learning_rate": 2.9452752796421892e-06, "loss": 0.02, "step": 110530 }, { "epoch": 0.4612120402900752, "grad_norm": 0.767016966109405, "learning_rate": 2.9452086641983496e-06, "loss": 0.0232, "step": 110535 }, { "epoch": 0.4612329030050655, "grad_norm": 0.6327237057180083, "learning_rate": 2.945142053274376e-06, "loss": 0.0208, "step": 110540 }, { "epoch": 0.4612537657200557, "grad_norm": 1.7509892199996822, "learning_rate": 2.945075446869758e-06, "loss": 0.0308, "step": 110545 }, { "epoch": 0.461274628435046, "grad_norm": 1.183090848437101, "learning_rate": 2.945008844983984e-06, "loss": 0.0427, "step": 110550 }, { "epoch": 0.4612954911500363, "grad_norm": 0.6647721702188659, "learning_rate": 2.944942247616542e-06, "loss": 0.0206, "step": 110555 }, { "epoch": 0.46131635386502656, "grad_norm": 0.4948712935813146, "learning_rate": 2.9448756547669235e-06, "loss": 0.0175, "step": 110560 }, { "epoch": 0.46133721658001686, "grad_norm": 1.6006708284333448, "learning_rate": 2.9448090664346154e-06, "loss": 0.03, "step": 110565 }, { "epoch": 0.46135807929500716, "grad_norm": 0.33628374917358755, "learning_rate": 2.944742482619108e-06, "loss": 0.0292, "step": 110570 }, { "epoch": 0.4613789420099974, "grad_norm": 1.109997260684034, "learning_rate": 2.944675903319891e-06, "loss": 0.0251, "step": 110575 }, { "epoch": 0.4613998047249877, "grad_norm": 0.7987310351043866, "learning_rate": 2.944609328536454e-06, "loss": 0.022, "step": 110580 }, { "epoch": 0.46142066743997795, "grad_norm": 0.9501827196926148, "learning_rate": 2.9445427582682856e-06, "loss": 0.0232, "step": 110585 }, { "epoch": 0.46144153015496825, "grad_norm": 0.9128363400348192, "learning_rate": 2.944476192514876e-06, "loss": 0.0311, "step": 110590 }, { "epoch": 0.46146239286995855, "grad_norm": 0.48936130240702946, "learning_rate": 2.944409631275715e-06, "loss": 0.027, "step": 110595 }, { "epoch": 0.4614832555849488, "grad_norm": 0.40543574883320227, "learning_rate": 2.944343074550292e-06, "loss": 0.0166, "step": 110600 }, { "epoch": 0.4615041182999391, "grad_norm": 0.9697083783646381, "learning_rate": 2.9442765223380975e-06, "loss": 0.0315, "step": 110605 }, { "epoch": 0.46152498101492934, "grad_norm": 1.0797272683573862, "learning_rate": 2.944209974638621e-06, "loss": 0.0379, "step": 110610 }, { "epoch": 0.46154584372991964, "grad_norm": 1.1140855974700608, "learning_rate": 2.944143431451352e-06, "loss": 0.0331, "step": 110615 }, { "epoch": 0.46156670644490994, "grad_norm": 0.49718465568059544, "learning_rate": 2.9440768927757817e-06, "loss": 0.0321, "step": 110620 }, { "epoch": 0.4615875691599002, "grad_norm": 0.6850846494673065, "learning_rate": 2.9440103586114e-06, "loss": 0.0279, "step": 110625 }, { "epoch": 0.4616084318748905, "grad_norm": 0.9617514979427442, "learning_rate": 2.943943828957697e-06, "loss": 0.0232, "step": 110630 }, { "epoch": 0.4616292945898807, "grad_norm": 0.5959167607660762, "learning_rate": 2.943877303814163e-06, "loss": 0.0233, "step": 110635 }, { "epoch": 0.461650157304871, "grad_norm": 0.9362496086745199, "learning_rate": 2.943810783180288e-06, "loss": 0.0353, "step": 110640 }, { "epoch": 0.4616710200198613, "grad_norm": 0.6431356268766436, "learning_rate": 2.9437442670555633e-06, "loss": 0.0438, "step": 110645 }, { "epoch": 0.46169188273485157, "grad_norm": 0.41315929866096396, "learning_rate": 2.943677755439479e-06, "loss": 0.021, "step": 110650 }, { "epoch": 0.46171274544984187, "grad_norm": 0.49152260872125453, "learning_rate": 2.943611248331526e-06, "loss": 0.0284, "step": 110655 }, { "epoch": 0.46173360816483217, "grad_norm": 0.9128147722165698, "learning_rate": 2.943544745731195e-06, "loss": 0.0321, "step": 110660 }, { "epoch": 0.4617544708798224, "grad_norm": 0.8132342315966222, "learning_rate": 2.9434782476379774e-06, "loss": 0.0312, "step": 110665 }, { "epoch": 0.4617753335948127, "grad_norm": 0.9071194693995858, "learning_rate": 2.943411754051363e-06, "loss": 0.0261, "step": 110670 }, { "epoch": 0.46179619630980295, "grad_norm": 0.624741358329592, "learning_rate": 2.9433452649708435e-06, "loss": 0.0228, "step": 110675 }, { "epoch": 0.46181705902479325, "grad_norm": 0.8949967995983251, "learning_rate": 2.9432787803959095e-06, "loss": 0.0212, "step": 110680 }, { "epoch": 0.46183792173978355, "grad_norm": 0.7062881551930574, "learning_rate": 2.943212300326053e-06, "loss": 0.0279, "step": 110685 }, { "epoch": 0.4618587844547738, "grad_norm": 0.33622367587086516, "learning_rate": 2.943145824760765e-06, "loss": 0.0258, "step": 110690 }, { "epoch": 0.4618796471697641, "grad_norm": 0.6504430629682795, "learning_rate": 2.9430793536995355e-06, "loss": 0.044, "step": 110695 }, { "epoch": 0.46190050988475434, "grad_norm": 0.5194058932653136, "learning_rate": 2.943012887141858e-06, "loss": 0.0215, "step": 110700 }, { "epoch": 0.46192137259974464, "grad_norm": 0.7957118793297749, "learning_rate": 2.9429464250872223e-06, "loss": 0.0223, "step": 110705 }, { "epoch": 0.46194223531473494, "grad_norm": 0.6097349925516848, "learning_rate": 2.942879967535121e-06, "loss": 0.0236, "step": 110710 }, { "epoch": 0.4619630980297252, "grad_norm": 0.9729306655987212, "learning_rate": 2.942813514485045e-06, "loss": 0.03, "step": 110715 }, { "epoch": 0.4619839607447155, "grad_norm": 0.791058205821568, "learning_rate": 2.942747065936487e-06, "loss": 0.0281, "step": 110720 }, { "epoch": 0.4620048234597057, "grad_norm": 0.6976264882475205, "learning_rate": 2.9426806218889375e-06, "loss": 0.0798, "step": 110725 }, { "epoch": 0.462025686174696, "grad_norm": 0.9925033048017388, "learning_rate": 2.9426141823418896e-06, "loss": 0.0216, "step": 110730 }, { "epoch": 0.4620465488896863, "grad_norm": 0.5747035188637086, "learning_rate": 2.9425477472948343e-06, "loss": 0.0237, "step": 110735 }, { "epoch": 0.46206741160467657, "grad_norm": 0.3509267659249891, "learning_rate": 2.9424813167472645e-06, "loss": 0.023, "step": 110740 }, { "epoch": 0.46208827431966687, "grad_norm": 0.35990753477501597, "learning_rate": 2.9424148906986717e-06, "loss": 0.0205, "step": 110745 }, { "epoch": 0.46210913703465717, "grad_norm": 0.5044253945758811, "learning_rate": 2.942348469148548e-06, "loss": 0.0269, "step": 110750 }, { "epoch": 0.4621299997496474, "grad_norm": 0.4981324172222268, "learning_rate": 2.9422820520963863e-06, "loss": 0.0176, "step": 110755 }, { "epoch": 0.4621508624646377, "grad_norm": 0.6636922154947313, "learning_rate": 2.9422156395416785e-06, "loss": 0.0248, "step": 110760 }, { "epoch": 0.46217172517962796, "grad_norm": 0.4516553717470487, "learning_rate": 2.9421492314839173e-06, "loss": 0.0285, "step": 110765 }, { "epoch": 0.46219258789461826, "grad_norm": 0.954557466619045, "learning_rate": 2.942082827922595e-06, "loss": 0.0321, "step": 110770 }, { "epoch": 0.46221345060960856, "grad_norm": 0.6595867930313171, "learning_rate": 2.9420164288572044e-06, "loss": 0.0278, "step": 110775 }, { "epoch": 0.4622343133245988, "grad_norm": 0.38534127624898884, "learning_rate": 2.941950034287238e-06, "loss": 0.0212, "step": 110780 }, { "epoch": 0.4622551760395891, "grad_norm": 0.8159643405326614, "learning_rate": 2.9418836442121895e-06, "loss": 0.0301, "step": 110785 }, { "epoch": 0.46227603875457934, "grad_norm": 0.9351137123311735, "learning_rate": 2.9418172586315504e-06, "loss": 0.0271, "step": 110790 }, { "epoch": 0.46229690146956964, "grad_norm": 0.738933627358971, "learning_rate": 2.9417508775448132e-06, "loss": 0.0321, "step": 110795 }, { "epoch": 0.46231776418455994, "grad_norm": 0.7981595483723292, "learning_rate": 2.9416845009514728e-06, "loss": 0.0261, "step": 110800 }, { "epoch": 0.4623386268995502, "grad_norm": 0.5857063534978395, "learning_rate": 2.941618128851021e-06, "loss": 0.0214, "step": 110805 }, { "epoch": 0.4623594896145405, "grad_norm": 0.705257194841417, "learning_rate": 2.9415517612429517e-06, "loss": 0.0254, "step": 110810 }, { "epoch": 0.46238035232953073, "grad_norm": 0.6513734974477685, "learning_rate": 2.9414853981267575e-06, "loss": 0.0235, "step": 110815 }, { "epoch": 0.46240121504452103, "grad_norm": 0.8577741569400007, "learning_rate": 2.9414190395019323e-06, "loss": 0.0514, "step": 110820 }, { "epoch": 0.46242207775951133, "grad_norm": 0.5369050572887618, "learning_rate": 2.9413526853679686e-06, "loss": 0.0333, "step": 110825 }, { "epoch": 0.4624429404745016, "grad_norm": 1.1148585222927094, "learning_rate": 2.9412863357243608e-06, "loss": 0.0187, "step": 110830 }, { "epoch": 0.4624638031894919, "grad_norm": 0.902596247055303, "learning_rate": 2.9412199905706022e-06, "loss": 0.0273, "step": 110835 }, { "epoch": 0.4624846659044822, "grad_norm": 0.8289269492481272, "learning_rate": 2.9411536499061865e-06, "loss": 0.0255, "step": 110840 }, { "epoch": 0.4625055286194724, "grad_norm": 0.7127359501522325, "learning_rate": 2.941087313730607e-06, "loss": 0.0213, "step": 110845 }, { "epoch": 0.4625263913344627, "grad_norm": 1.3559374552242498, "learning_rate": 2.9410209820433583e-06, "loss": 0.0259, "step": 110850 }, { "epoch": 0.46254725404945296, "grad_norm": 0.730347504684322, "learning_rate": 2.940954654843933e-06, "loss": 0.0273, "step": 110855 }, { "epoch": 0.46256811676444326, "grad_norm": 0.25570075168633855, "learning_rate": 2.9408883321318264e-06, "loss": 0.0196, "step": 110860 }, { "epoch": 0.46258897947943356, "grad_norm": 0.6411530960025484, "learning_rate": 2.940822013906532e-06, "loss": 0.0188, "step": 110865 }, { "epoch": 0.4626098421944238, "grad_norm": 1.0309349229508313, "learning_rate": 2.940755700167544e-06, "loss": 0.0348, "step": 110870 }, { "epoch": 0.4626307049094141, "grad_norm": 0.38326991806733296, "learning_rate": 2.940689390914356e-06, "loss": 0.0211, "step": 110875 }, { "epoch": 0.46265156762440435, "grad_norm": 0.4800322433976128, "learning_rate": 2.9406230861464634e-06, "loss": 0.0201, "step": 110880 }, { "epoch": 0.46267243033939465, "grad_norm": 0.981970504269608, "learning_rate": 2.94055678586336e-06, "loss": 0.0222, "step": 110885 }, { "epoch": 0.46269329305438495, "grad_norm": 0.7424280061623362, "learning_rate": 2.94049049006454e-06, "loss": 0.0256, "step": 110890 }, { "epoch": 0.4627141557693752, "grad_norm": 0.6407118428347748, "learning_rate": 2.9404241987494985e-06, "loss": 0.0263, "step": 110895 }, { "epoch": 0.4627350184843655, "grad_norm": 0.3609570957942157, "learning_rate": 2.9403579119177294e-06, "loss": 0.0267, "step": 110900 }, { "epoch": 0.46275588119935573, "grad_norm": 0.3977756020636972, "learning_rate": 2.9402916295687276e-06, "loss": 0.0252, "step": 110905 }, { "epoch": 0.46277674391434603, "grad_norm": 0.45117921812115164, "learning_rate": 2.9402253517019887e-06, "loss": 0.0192, "step": 110910 }, { "epoch": 0.46279760662933633, "grad_norm": 0.3852717000836678, "learning_rate": 2.9401590783170066e-06, "loss": 0.0201, "step": 110915 }, { "epoch": 0.4628184693443266, "grad_norm": 1.3038746026333181, "learning_rate": 2.940092809413276e-06, "loss": 0.0203, "step": 110920 }, { "epoch": 0.4628393320593169, "grad_norm": 0.6091279389944729, "learning_rate": 2.9400265449902924e-06, "loss": 0.0211, "step": 110925 }, { "epoch": 0.4628601947743072, "grad_norm": 0.5813780093650238, "learning_rate": 2.9399602850475516e-06, "loss": 0.0223, "step": 110930 }, { "epoch": 0.4628810574892974, "grad_norm": 0.7891201464947909, "learning_rate": 2.9398940295845474e-06, "loss": 0.0265, "step": 110935 }, { "epoch": 0.4629019202042877, "grad_norm": 1.2425336432829668, "learning_rate": 2.9398277786007763e-06, "loss": 0.0254, "step": 110940 }, { "epoch": 0.46292278291927796, "grad_norm": 1.0656371846963129, "learning_rate": 2.9397615320957325e-06, "loss": 0.028, "step": 110945 }, { "epoch": 0.46294364563426826, "grad_norm": 0.5794469322469218, "learning_rate": 2.9396952900689117e-06, "loss": 0.0224, "step": 110950 }, { "epoch": 0.46296450834925856, "grad_norm": 0.42564783215169044, "learning_rate": 2.9396290525198103e-06, "loss": 0.0214, "step": 110955 }, { "epoch": 0.4629853710642488, "grad_norm": 0.8369555659559963, "learning_rate": 2.9395628194479226e-06, "loss": 0.0296, "step": 110960 }, { "epoch": 0.4630062337792391, "grad_norm": 1.01823677190628, "learning_rate": 2.939496590852745e-06, "loss": 0.0266, "step": 110965 }, { "epoch": 0.46302709649422935, "grad_norm": 0.655475851087507, "learning_rate": 2.9394303667337732e-06, "loss": 0.0266, "step": 110970 }, { "epoch": 0.46304795920921965, "grad_norm": 0.6370688353992912, "learning_rate": 2.939364147090502e-06, "loss": 0.0189, "step": 110975 }, { "epoch": 0.46306882192420995, "grad_norm": 1.4065837547763183, "learning_rate": 2.9392979319224284e-06, "loss": 0.0457, "step": 110980 }, { "epoch": 0.4630896846392002, "grad_norm": 0.542783514101947, "learning_rate": 2.939231721229049e-06, "loss": 0.0186, "step": 110985 }, { "epoch": 0.4631105473541905, "grad_norm": 2.4250406918020224, "learning_rate": 2.939165515009858e-06, "loss": 0.0235, "step": 110990 }, { "epoch": 0.46313141006918074, "grad_norm": 0.7063697505651098, "learning_rate": 2.9390993132643525e-06, "loss": 0.0268, "step": 110995 }, { "epoch": 0.46315227278417104, "grad_norm": 0.38557565255962944, "learning_rate": 2.939033115992028e-06, "loss": 0.0269, "step": 111000 }, { "epoch": 0.46317313549916134, "grad_norm": 0.8567674671056918, "learning_rate": 2.9389669231923825e-06, "loss": 0.0241, "step": 111005 }, { "epoch": 0.4631939982141516, "grad_norm": 0.3127634234977477, "learning_rate": 2.9389007348649107e-06, "loss": 0.0305, "step": 111010 }, { "epoch": 0.4632148609291419, "grad_norm": 0.7060844515538419, "learning_rate": 2.938834551009109e-06, "loss": 0.0183, "step": 111015 }, { "epoch": 0.4632357236441322, "grad_norm": 1.8148032144363258, "learning_rate": 2.9387683716244753e-06, "loss": 0.0236, "step": 111020 }, { "epoch": 0.4632565863591224, "grad_norm": 0.8763150456022742, "learning_rate": 2.938702196710505e-06, "loss": 0.027, "step": 111025 }, { "epoch": 0.4632774490741127, "grad_norm": 0.45577647571335606, "learning_rate": 2.9386360262666953e-06, "loss": 0.0228, "step": 111030 }, { "epoch": 0.46329831178910297, "grad_norm": 1.1453491165867367, "learning_rate": 2.9385698602925424e-06, "loss": 0.0346, "step": 111035 }, { "epoch": 0.46331917450409327, "grad_norm": 0.6298860273788915, "learning_rate": 2.9385036987875437e-06, "loss": 0.0299, "step": 111040 }, { "epoch": 0.46334003721908357, "grad_norm": 0.6970539325505746, "learning_rate": 2.9384375417511956e-06, "loss": 0.0306, "step": 111045 }, { "epoch": 0.4633608999340738, "grad_norm": 0.6882992462923485, "learning_rate": 2.9383713891829964e-06, "loss": 0.0179, "step": 111050 }, { "epoch": 0.4633817626490641, "grad_norm": 0.4290365729211988, "learning_rate": 2.9383052410824415e-06, "loss": 0.0208, "step": 111055 }, { "epoch": 0.46340262536405435, "grad_norm": 0.4304127030558466, "learning_rate": 2.9382390974490286e-06, "loss": 0.0334, "step": 111060 }, { "epoch": 0.46342348807904465, "grad_norm": 0.6399682729683243, "learning_rate": 2.9381729582822556e-06, "loss": 0.023, "step": 111065 }, { "epoch": 0.46344435079403495, "grad_norm": 0.8824080579243806, "learning_rate": 2.9381068235816183e-06, "loss": 0.0276, "step": 111070 }, { "epoch": 0.4634652135090252, "grad_norm": 0.8254770959393058, "learning_rate": 2.938040693346616e-06, "loss": 0.0238, "step": 111075 }, { "epoch": 0.4634860762240155, "grad_norm": 0.3482199969409767, "learning_rate": 2.937974567576745e-06, "loss": 0.0275, "step": 111080 }, { "epoch": 0.46350693893900574, "grad_norm": 0.637547054791266, "learning_rate": 2.937908446271503e-06, "loss": 0.0216, "step": 111085 }, { "epoch": 0.46352780165399604, "grad_norm": 1.3252870612378516, "learning_rate": 2.9378423294303882e-06, "loss": 0.0262, "step": 111090 }, { "epoch": 0.46354866436898634, "grad_norm": 0.5799545849499625, "learning_rate": 2.937776217052897e-06, "loss": 0.0236, "step": 111095 }, { "epoch": 0.4635695270839766, "grad_norm": 0.6161881452924648, "learning_rate": 2.937710109138528e-06, "loss": 0.02, "step": 111100 }, { "epoch": 0.4635903897989669, "grad_norm": 0.6862312012795178, "learning_rate": 2.9376440056867793e-06, "loss": 0.021, "step": 111105 }, { "epoch": 0.4636112525139572, "grad_norm": 0.545063063956283, "learning_rate": 2.937577906697149e-06, "loss": 0.0227, "step": 111110 }, { "epoch": 0.46363211522894743, "grad_norm": 0.6932522790711553, "learning_rate": 2.937511812169134e-06, "loss": 0.0257, "step": 111115 }, { "epoch": 0.4636529779439377, "grad_norm": 1.0151039807801303, "learning_rate": 2.9374457221022336e-06, "loss": 0.0337, "step": 111120 }, { "epoch": 0.46367384065892797, "grad_norm": 0.6122804601672402, "learning_rate": 2.937379636495945e-06, "loss": 0.02, "step": 111125 }, { "epoch": 0.46369470337391827, "grad_norm": 0.5502115760107067, "learning_rate": 2.9373135553497674e-06, "loss": 0.0494, "step": 111130 }, { "epoch": 0.46371556608890857, "grad_norm": 0.6266418278661687, "learning_rate": 2.937247478663198e-06, "loss": 0.0259, "step": 111135 }, { "epoch": 0.4637364288038988, "grad_norm": 0.7344542135996615, "learning_rate": 2.9371814064357363e-06, "loss": 0.0227, "step": 111140 }, { "epoch": 0.4637572915188891, "grad_norm": 0.5362994886711385, "learning_rate": 2.93711533866688e-06, "loss": 0.0227, "step": 111145 }, { "epoch": 0.46377815423387936, "grad_norm": 0.6394798247962612, "learning_rate": 2.9370492753561288e-06, "loss": 0.0197, "step": 111150 }, { "epoch": 0.46379901694886966, "grad_norm": 1.9573086689105825, "learning_rate": 2.9369832165029797e-06, "loss": 0.0501, "step": 111155 }, { "epoch": 0.46381987966385996, "grad_norm": 0.42291943103474205, "learning_rate": 2.9369171621069324e-06, "loss": 0.0229, "step": 111160 }, { "epoch": 0.4638407423788502, "grad_norm": 1.1985217903602923, "learning_rate": 2.9368511121674863e-06, "loss": 0.0317, "step": 111165 }, { "epoch": 0.4638616050938405, "grad_norm": 0.36668498551478557, "learning_rate": 2.93678506668414e-06, "loss": 0.0244, "step": 111170 }, { "epoch": 0.46388246780883075, "grad_norm": 0.995989842205536, "learning_rate": 2.936719025656391e-06, "loss": 0.0298, "step": 111175 }, { "epoch": 0.46390333052382104, "grad_norm": 0.5878573532606248, "learning_rate": 2.9366529890837396e-06, "loss": 0.0245, "step": 111180 }, { "epoch": 0.46392419323881134, "grad_norm": 1.4579754031412255, "learning_rate": 2.936586956965685e-06, "loss": 0.0302, "step": 111185 }, { "epoch": 0.4639450559538016, "grad_norm": 0.5623173707826324, "learning_rate": 2.936520929301726e-06, "loss": 0.0261, "step": 111190 }, { "epoch": 0.4639659186687919, "grad_norm": 0.6264422308693784, "learning_rate": 2.936454906091363e-06, "loss": 0.0322, "step": 111195 }, { "epoch": 0.4639867813837822, "grad_norm": 0.8291670597781221, "learning_rate": 2.936388887334093e-06, "loss": 0.0271, "step": 111200 }, { "epoch": 0.46400764409877243, "grad_norm": 0.5633379453996917, "learning_rate": 2.9363228730294175e-06, "loss": 0.0359, "step": 111205 }, { "epoch": 0.46402850681376273, "grad_norm": 0.5021078767953979, "learning_rate": 2.936256863176835e-06, "loss": 0.0295, "step": 111210 }, { "epoch": 0.464049369528753, "grad_norm": 0.7387227342098078, "learning_rate": 2.9361908577758463e-06, "loss": 0.027, "step": 111215 }, { "epoch": 0.4640702322437433, "grad_norm": 0.5025109770810955, "learning_rate": 2.936124856825949e-06, "loss": 0.0204, "step": 111220 }, { "epoch": 0.4640910949587336, "grad_norm": 0.5212742844508793, "learning_rate": 2.9360588603266455e-06, "loss": 0.0279, "step": 111225 }, { "epoch": 0.4641119576737238, "grad_norm": 0.6530636466157546, "learning_rate": 2.935992868277433e-06, "loss": 0.02, "step": 111230 }, { "epoch": 0.4641328203887141, "grad_norm": 0.7279252735573088, "learning_rate": 2.9359268806778134e-06, "loss": 0.0224, "step": 111235 }, { "epoch": 0.46415368310370436, "grad_norm": 1.0458706054692626, "learning_rate": 2.9358608975272857e-06, "loss": 0.0205, "step": 111240 }, { "epoch": 0.46417454581869466, "grad_norm": 0.968923696548287, "learning_rate": 2.9357949188253497e-06, "loss": 0.025, "step": 111245 }, { "epoch": 0.46419540853368496, "grad_norm": 0.7172153530370255, "learning_rate": 2.9357289445715063e-06, "loss": 0.0254, "step": 111250 }, { "epoch": 0.4642162712486752, "grad_norm": 4.414574212870387, "learning_rate": 2.935662974765256e-06, "loss": 0.0248, "step": 111255 }, { "epoch": 0.4642371339636655, "grad_norm": 0.6809174615304984, "learning_rate": 2.935597009406098e-06, "loss": 0.0266, "step": 111260 }, { "epoch": 0.46425799667865575, "grad_norm": 0.929058230369874, "learning_rate": 2.9355310484935336e-06, "loss": 0.024, "step": 111265 }, { "epoch": 0.46427885939364605, "grad_norm": 0.717018748142861, "learning_rate": 2.935465092027063e-06, "loss": 0.0191, "step": 111270 }, { "epoch": 0.46429972210863635, "grad_norm": 0.5455012849747584, "learning_rate": 2.935399140006186e-06, "loss": 0.0286, "step": 111275 }, { "epoch": 0.4643205848236266, "grad_norm": 0.5830971984294415, "learning_rate": 2.9353331924304048e-06, "loss": 0.0198, "step": 111280 }, { "epoch": 0.4643414475386169, "grad_norm": 0.6701372455880678, "learning_rate": 2.9352672492992184e-06, "loss": 0.0283, "step": 111285 }, { "epoch": 0.4643623102536072, "grad_norm": 0.629218759016241, "learning_rate": 2.9352013106121284e-06, "loss": 0.0209, "step": 111290 }, { "epoch": 0.46438317296859744, "grad_norm": 0.41564938068241225, "learning_rate": 2.9351353763686364e-06, "loss": 0.0239, "step": 111295 }, { "epoch": 0.46440403568358773, "grad_norm": 0.7716731414107726, "learning_rate": 2.9350694465682418e-06, "loss": 0.0237, "step": 111300 }, { "epoch": 0.464424898398578, "grad_norm": 0.7618583955099624, "learning_rate": 2.935003521210446e-06, "loss": 0.0269, "step": 111305 }, { "epoch": 0.4644457611135683, "grad_norm": 0.5976731967780288, "learning_rate": 2.934937600294751e-06, "loss": 0.027, "step": 111310 }, { "epoch": 0.4644666238285586, "grad_norm": 0.7967543882271689, "learning_rate": 2.9348716838206575e-06, "loss": 0.026, "step": 111315 }, { "epoch": 0.4644874865435488, "grad_norm": 0.6755968327040424, "learning_rate": 2.9348057717876666e-06, "loss": 0.0198, "step": 111320 }, { "epoch": 0.4645083492585391, "grad_norm": 0.40209836868686094, "learning_rate": 2.9347398641952803e-06, "loss": 0.0254, "step": 111325 }, { "epoch": 0.46452921197352937, "grad_norm": 0.8466110611779297, "learning_rate": 2.934673961042998e-06, "loss": 0.0278, "step": 111330 }, { "epoch": 0.46455007468851967, "grad_norm": 0.8005338479929507, "learning_rate": 2.9346080623303234e-06, "loss": 0.0248, "step": 111335 }, { "epoch": 0.46457093740350996, "grad_norm": 0.7581384897494718, "learning_rate": 2.9345421680567572e-06, "loss": 0.021, "step": 111340 }, { "epoch": 0.4645918001185002, "grad_norm": 0.5887813051526167, "learning_rate": 2.9344762782218012e-06, "loss": 0.0272, "step": 111345 }, { "epoch": 0.4646126628334905, "grad_norm": 0.7585138964715284, "learning_rate": 2.934410392824957e-06, "loss": 0.016, "step": 111350 }, { "epoch": 0.46463352554848075, "grad_norm": 0.5963928736684905, "learning_rate": 2.934344511865726e-06, "loss": 0.0201, "step": 111355 }, { "epoch": 0.46465438826347105, "grad_norm": 0.5435700047931812, "learning_rate": 2.934278635343611e-06, "loss": 0.0253, "step": 111360 }, { "epoch": 0.46467525097846135, "grad_norm": 0.4089387404453061, "learning_rate": 2.934212763258113e-06, "loss": 0.0222, "step": 111365 }, { "epoch": 0.4646961136934516, "grad_norm": 0.7604891086921409, "learning_rate": 2.934146895608734e-06, "loss": 0.0246, "step": 111370 }, { "epoch": 0.4647169764084419, "grad_norm": 1.0211055491386647, "learning_rate": 2.9340810323949776e-06, "loss": 0.0236, "step": 111375 }, { "epoch": 0.46473783912343214, "grad_norm": 0.4102983048623651, "learning_rate": 2.934015173616344e-06, "loss": 0.0265, "step": 111380 }, { "epoch": 0.46475870183842244, "grad_norm": 0.46131184458924596, "learning_rate": 2.9339493192723375e-06, "loss": 0.0157, "step": 111385 }, { "epoch": 0.46477956455341274, "grad_norm": 0.4333628164711225, "learning_rate": 2.9338834693624586e-06, "loss": 0.0255, "step": 111390 }, { "epoch": 0.464800427268403, "grad_norm": 1.3668913519193504, "learning_rate": 2.93381762388621e-06, "loss": 0.0245, "step": 111395 }, { "epoch": 0.4648212899833933, "grad_norm": 1.05711596681147, "learning_rate": 2.933751782843095e-06, "loss": 0.0343, "step": 111400 }, { "epoch": 0.4648421526983836, "grad_norm": 1.248009549442791, "learning_rate": 2.933685946232616e-06, "loss": 0.029, "step": 111405 }, { "epoch": 0.4648630154133738, "grad_norm": 0.7424085157913378, "learning_rate": 2.9336201140542763e-06, "loss": 0.02, "step": 111410 }, { "epoch": 0.4648838781283641, "grad_norm": 0.6133173346795017, "learning_rate": 2.933554286307577e-06, "loss": 0.0281, "step": 111415 }, { "epoch": 0.46490474084335437, "grad_norm": 0.5828760200537172, "learning_rate": 2.933488462992022e-06, "loss": 0.033, "step": 111420 }, { "epoch": 0.46492560355834467, "grad_norm": 0.9146311645661733, "learning_rate": 2.9334226441071134e-06, "loss": 0.0276, "step": 111425 }, { "epoch": 0.46494646627333497, "grad_norm": 0.3576446980436807, "learning_rate": 2.9333568296523553e-06, "loss": 0.0315, "step": 111430 }, { "epoch": 0.4649673289883252, "grad_norm": 0.7700730490382568, "learning_rate": 2.93329101962725e-06, "loss": 0.0248, "step": 111435 }, { "epoch": 0.4649881917033155, "grad_norm": 0.9324620858472811, "learning_rate": 2.9332252140313007e-06, "loss": 0.0236, "step": 111440 }, { "epoch": 0.46500905441830576, "grad_norm": 0.8325638779072917, "learning_rate": 2.9331594128640107e-06, "loss": 0.021, "step": 111445 }, { "epoch": 0.46502991713329606, "grad_norm": 0.5721828444731252, "learning_rate": 2.9330936161248836e-06, "loss": 0.0237, "step": 111450 }, { "epoch": 0.46505077984828636, "grad_norm": 0.4716875417486658, "learning_rate": 2.9330278238134218e-06, "loss": 0.0251, "step": 111455 }, { "epoch": 0.4650716425632766, "grad_norm": 0.41375465656396077, "learning_rate": 2.93296203592913e-06, "loss": 0.0252, "step": 111460 }, { "epoch": 0.4650925052782669, "grad_norm": 0.7296524598051826, "learning_rate": 2.9328962524715105e-06, "loss": 0.0344, "step": 111465 }, { "epoch": 0.46511336799325714, "grad_norm": 1.114802592291093, "learning_rate": 2.932830473440068e-06, "loss": 0.0383, "step": 111470 }, { "epoch": 0.46513423070824744, "grad_norm": 0.8145179165212272, "learning_rate": 2.9327646988343056e-06, "loss": 0.0255, "step": 111475 }, { "epoch": 0.46515509342323774, "grad_norm": 0.8532373768505097, "learning_rate": 2.9326989286537267e-06, "loss": 0.0353, "step": 111480 }, { "epoch": 0.465175956138228, "grad_norm": 0.4167113742842291, "learning_rate": 2.9326331628978353e-06, "loss": 0.024, "step": 111485 }, { "epoch": 0.4651968188532183, "grad_norm": 0.8536096407914145, "learning_rate": 2.9325674015661364e-06, "loss": 0.023, "step": 111490 }, { "epoch": 0.4652176815682086, "grad_norm": 0.42892849053526994, "learning_rate": 2.9325016446581322e-06, "loss": 0.0222, "step": 111495 }, { "epoch": 0.46523854428319883, "grad_norm": 0.9097402868125922, "learning_rate": 2.932435892173328e-06, "loss": 0.0241, "step": 111500 }, { "epoch": 0.46525940699818913, "grad_norm": 0.7510328401504928, "learning_rate": 2.932370144111228e-06, "loss": 0.0245, "step": 111505 }, { "epoch": 0.4652802697131794, "grad_norm": 0.45791338656406366, "learning_rate": 2.932304400471336e-06, "loss": 0.0189, "step": 111510 }, { "epoch": 0.4653011324281697, "grad_norm": 0.5976400485989798, "learning_rate": 2.9322386612531557e-06, "loss": 0.022, "step": 111515 }, { "epoch": 0.46532199514315997, "grad_norm": 0.3951181706123301, "learning_rate": 2.9321729264561928e-06, "loss": 0.0337, "step": 111520 }, { "epoch": 0.4653428578581502, "grad_norm": 0.5331000792181732, "learning_rate": 2.9321071960799503e-06, "loss": 0.0201, "step": 111525 }, { "epoch": 0.4653637205731405, "grad_norm": 1.0707952798463232, "learning_rate": 2.9320414701239335e-06, "loss": 0.0234, "step": 111530 }, { "epoch": 0.46538458328813076, "grad_norm": 0.3004529938755773, "learning_rate": 2.9319757485876475e-06, "loss": 0.0151, "step": 111535 }, { "epoch": 0.46540544600312106, "grad_norm": 0.55793749424217, "learning_rate": 2.931910031470596e-06, "loss": 0.026, "step": 111540 }, { "epoch": 0.46542630871811136, "grad_norm": 0.4893324900332087, "learning_rate": 2.9318443187722845e-06, "loss": 0.0323, "step": 111545 }, { "epoch": 0.4654471714331016, "grad_norm": 0.8673169777617712, "learning_rate": 2.9317786104922174e-06, "loss": 0.033, "step": 111550 }, { "epoch": 0.4654680341480919, "grad_norm": 0.5341704900572735, "learning_rate": 2.9317129066299e-06, "loss": 0.0316, "step": 111555 }, { "epoch": 0.46548889686308215, "grad_norm": 1.6187560075062024, "learning_rate": 2.931647207184837e-06, "loss": 0.0399, "step": 111560 }, { "epoch": 0.46550975957807245, "grad_norm": 0.9439433277152912, "learning_rate": 2.9315815121565333e-06, "loss": 0.0252, "step": 111565 }, { "epoch": 0.46553062229306275, "grad_norm": 1.0913336752154976, "learning_rate": 2.9315158215444943e-06, "loss": 0.0247, "step": 111570 }, { "epoch": 0.465551485008053, "grad_norm": 1.1072908155985375, "learning_rate": 2.931450135348225e-06, "loss": 0.0289, "step": 111575 }, { "epoch": 0.4655723477230433, "grad_norm": 0.6519157088827164, "learning_rate": 2.9313844535672315e-06, "loss": 0.0265, "step": 111580 }, { "epoch": 0.4655932104380336, "grad_norm": 1.3021347122427525, "learning_rate": 2.9313187762010176e-06, "loss": 0.0283, "step": 111585 }, { "epoch": 0.46561407315302383, "grad_norm": 0.4428840683284109, "learning_rate": 2.931253103249091e-06, "loss": 0.0176, "step": 111590 }, { "epoch": 0.46563493586801413, "grad_norm": 0.5208275245710265, "learning_rate": 2.9311874347109553e-06, "loss": 0.0185, "step": 111595 }, { "epoch": 0.4656557985830044, "grad_norm": 0.5507598758605046, "learning_rate": 2.931121770586116e-06, "loss": 0.0244, "step": 111600 }, { "epoch": 0.4656766612979947, "grad_norm": 3.155401889373811, "learning_rate": 2.931056110874081e-06, "loss": 0.0454, "step": 111605 }, { "epoch": 0.465697524012985, "grad_norm": 0.8257172020702712, "learning_rate": 2.9309904555743533e-06, "loss": 0.0324, "step": 111610 }, { "epoch": 0.4657183867279752, "grad_norm": 0.6911089654942524, "learning_rate": 2.9309248046864414e-06, "loss": 0.0224, "step": 111615 }, { "epoch": 0.4657392494429655, "grad_norm": 0.4728147258628064, "learning_rate": 2.9308591582098495e-06, "loss": 0.0254, "step": 111620 }, { "epoch": 0.46576011215795576, "grad_norm": 1.2226676459839705, "learning_rate": 2.930793516144084e-06, "loss": 0.0258, "step": 111625 }, { "epoch": 0.46578097487294606, "grad_norm": 1.2109118996258752, "learning_rate": 2.9307278784886506e-06, "loss": 0.0221, "step": 111630 }, { "epoch": 0.46580183758793636, "grad_norm": 0.5435716123887411, "learning_rate": 2.930662245243056e-06, "loss": 0.0222, "step": 111635 }, { "epoch": 0.4658227003029266, "grad_norm": 0.42457154237447287, "learning_rate": 2.930596616406806e-06, "loss": 0.0234, "step": 111640 }, { "epoch": 0.4658435630179169, "grad_norm": 0.5910314468125795, "learning_rate": 2.9305309919794077e-06, "loss": 0.0228, "step": 111645 }, { "epoch": 0.46586442573290715, "grad_norm": 1.453545264185674, "learning_rate": 2.9304653719603672e-06, "loss": 0.0229, "step": 111650 }, { "epoch": 0.46588528844789745, "grad_norm": 0.8064663835211423, "learning_rate": 2.9303997563491905e-06, "loss": 0.0222, "step": 111655 }, { "epoch": 0.46590615116288775, "grad_norm": 0.7704652310477824, "learning_rate": 2.9303341451453837e-06, "loss": 0.0277, "step": 111660 }, { "epoch": 0.465927013877878, "grad_norm": 1.1980855534298496, "learning_rate": 2.930268538348455e-06, "loss": 0.032, "step": 111665 }, { "epoch": 0.4659478765928683, "grad_norm": 1.1579629706037973, "learning_rate": 2.9302029359579096e-06, "loss": 0.0277, "step": 111670 }, { "epoch": 0.4659687393078586, "grad_norm": 0.8130319475475675, "learning_rate": 2.9301373379732556e-06, "loss": 0.0374, "step": 111675 }, { "epoch": 0.46598960202284884, "grad_norm": 0.60628593116284, "learning_rate": 2.930071744393999e-06, "loss": 0.0239, "step": 111680 }, { "epoch": 0.46601046473783914, "grad_norm": 0.8204688323358207, "learning_rate": 2.930006155219646e-06, "loss": 0.0211, "step": 111685 }, { "epoch": 0.4660313274528294, "grad_norm": 1.0060800481896022, "learning_rate": 2.929940570449706e-06, "loss": 0.0358, "step": 111690 }, { "epoch": 0.4660521901678197, "grad_norm": 0.6210357705134555, "learning_rate": 2.9298749900836833e-06, "loss": 0.0264, "step": 111695 }, { "epoch": 0.46607305288281, "grad_norm": 0.7337016181374788, "learning_rate": 2.9298094141210863e-06, "loss": 0.0214, "step": 111700 }, { "epoch": 0.4660939155978002, "grad_norm": 0.8640444124203568, "learning_rate": 2.929743842561423e-06, "loss": 0.0328, "step": 111705 }, { "epoch": 0.4661147783127905, "grad_norm": 0.919492422700067, "learning_rate": 2.929678275404199e-06, "loss": 0.0332, "step": 111710 }, { "epoch": 0.46613564102778077, "grad_norm": 0.5518630036455465, "learning_rate": 2.9296127126489236e-06, "loss": 0.0317, "step": 111715 }, { "epoch": 0.46615650374277107, "grad_norm": 0.6578631375477366, "learning_rate": 2.929547154295103e-06, "loss": 0.031, "step": 111720 }, { "epoch": 0.46617736645776137, "grad_norm": 1.046226675613342, "learning_rate": 2.9294816003422448e-06, "loss": 0.0197, "step": 111725 }, { "epoch": 0.4661982291727516, "grad_norm": 1.0712573580920632, "learning_rate": 2.9294160507898576e-06, "loss": 0.0254, "step": 111730 }, { "epoch": 0.4662190918877419, "grad_norm": 1.46115309178476, "learning_rate": 2.9293505056374484e-06, "loss": 0.0255, "step": 111735 }, { "epoch": 0.46623995460273215, "grad_norm": 0.8038627017591748, "learning_rate": 2.9292849648845245e-06, "loss": 0.0312, "step": 111740 }, { "epoch": 0.46626081731772245, "grad_norm": 1.3053883209838828, "learning_rate": 2.929219428530594e-06, "loss": 0.0279, "step": 111745 }, { "epoch": 0.46628168003271275, "grad_norm": 0.6336726135099955, "learning_rate": 2.9291538965751653e-06, "loss": 0.0244, "step": 111750 }, { "epoch": 0.466302542747703, "grad_norm": 0.7003845641740566, "learning_rate": 2.9290883690177463e-06, "loss": 0.0313, "step": 111755 }, { "epoch": 0.4663234054626933, "grad_norm": 0.5108019207315256, "learning_rate": 2.9290228458578457e-06, "loss": 0.0228, "step": 111760 }, { "epoch": 0.4663442681776836, "grad_norm": 0.8224589325857474, "learning_rate": 2.9289573270949695e-06, "loss": 0.0315, "step": 111765 }, { "epoch": 0.46636513089267384, "grad_norm": 0.8485309871346607, "learning_rate": 2.9288918127286287e-06, "loss": 0.0304, "step": 111770 }, { "epoch": 0.46638599360766414, "grad_norm": 0.4949966025144094, "learning_rate": 2.9288263027583297e-06, "loss": 0.0232, "step": 111775 }, { "epoch": 0.4664068563226544, "grad_norm": 0.45686425658646485, "learning_rate": 2.9287607971835815e-06, "loss": 0.0265, "step": 111780 }, { "epoch": 0.4664277190376447, "grad_norm": 0.4945953659404009, "learning_rate": 2.928695296003893e-06, "loss": 0.0291, "step": 111785 }, { "epoch": 0.466448581752635, "grad_norm": 0.4645614911300168, "learning_rate": 2.9286297992187712e-06, "loss": 0.0254, "step": 111790 }, { "epoch": 0.4664694444676252, "grad_norm": 0.49055156979689424, "learning_rate": 2.9285643068277262e-06, "loss": 0.0249, "step": 111795 }, { "epoch": 0.4664903071826155, "grad_norm": 0.30947557344177706, "learning_rate": 2.928498818830267e-06, "loss": 0.0207, "step": 111800 }, { "epoch": 0.46651116989760577, "grad_norm": 1.1148556574913961, "learning_rate": 2.9284333352259013e-06, "loss": 0.0292, "step": 111805 }, { "epoch": 0.46653203261259607, "grad_norm": 0.6246217803492807, "learning_rate": 2.9283678560141387e-06, "loss": 0.0239, "step": 111810 }, { "epoch": 0.46655289532758637, "grad_norm": 0.46794652667747877, "learning_rate": 2.9283023811944873e-06, "loss": 0.0268, "step": 111815 }, { "epoch": 0.4665737580425766, "grad_norm": 0.9946628700403743, "learning_rate": 2.9282369107664564e-06, "loss": 0.0298, "step": 111820 }, { "epoch": 0.4665946207575669, "grad_norm": 0.9913618690615855, "learning_rate": 2.928171444729556e-06, "loss": 0.0298, "step": 111825 }, { "epoch": 0.46661548347255716, "grad_norm": 0.49117278814227106, "learning_rate": 2.928105983083295e-06, "loss": 0.0279, "step": 111830 }, { "epoch": 0.46663634618754746, "grad_norm": 0.29223950897855583, "learning_rate": 2.9280405258271816e-06, "loss": 0.0279, "step": 111835 }, { "epoch": 0.46665720890253776, "grad_norm": 0.9229096720628498, "learning_rate": 2.9279750729607246e-06, "loss": 0.0264, "step": 111840 }, { "epoch": 0.466678071617528, "grad_norm": 0.7060311247072043, "learning_rate": 2.927909624483436e-06, "loss": 0.0199, "step": 111845 }, { "epoch": 0.4666989343325183, "grad_norm": 0.9494821349054023, "learning_rate": 2.927844180394823e-06, "loss": 0.0364, "step": 111850 }, { "epoch": 0.4667197970475086, "grad_norm": 0.8930688760655086, "learning_rate": 2.927778740694397e-06, "loss": 0.0249, "step": 111855 }, { "epoch": 0.46674065976249884, "grad_norm": 1.0240057183854896, "learning_rate": 2.9277133053816657e-06, "loss": 0.0334, "step": 111860 }, { "epoch": 0.46676152247748914, "grad_norm": 0.7121881853147098, "learning_rate": 2.92764787445614e-06, "loss": 0.021, "step": 111865 }, { "epoch": 0.4667823851924794, "grad_norm": 0.6015300092191735, "learning_rate": 2.927582447917329e-06, "loss": 0.0211, "step": 111870 }, { "epoch": 0.4668032479074697, "grad_norm": 0.7938581726663073, "learning_rate": 2.927517025764743e-06, "loss": 0.0299, "step": 111875 }, { "epoch": 0.46682411062246, "grad_norm": 1.0952189584469387, "learning_rate": 2.927451607997892e-06, "loss": 0.0248, "step": 111880 }, { "epoch": 0.46684497333745023, "grad_norm": 0.6529883246523527, "learning_rate": 2.9273861946162857e-06, "loss": 0.0297, "step": 111885 }, { "epoch": 0.46686583605244053, "grad_norm": 0.4554073571140589, "learning_rate": 2.927320785619434e-06, "loss": 0.0246, "step": 111890 }, { "epoch": 0.4668866987674308, "grad_norm": 1.516297242011686, "learning_rate": 2.9272553810068484e-06, "loss": 0.0325, "step": 111895 }, { "epoch": 0.4669075614824211, "grad_norm": 0.8198983808908333, "learning_rate": 2.9271899807780372e-06, "loss": 0.0242, "step": 111900 }, { "epoch": 0.4669284241974114, "grad_norm": 1.1514559246547869, "learning_rate": 2.927124584932512e-06, "loss": 0.0273, "step": 111905 }, { "epoch": 0.4669492869124016, "grad_norm": 0.706234697100429, "learning_rate": 2.9270591934697823e-06, "loss": 0.0243, "step": 111910 }, { "epoch": 0.4669701496273919, "grad_norm": 0.5467397945042205, "learning_rate": 2.9269938063893593e-06, "loss": 0.0284, "step": 111915 }, { "epoch": 0.46699101234238216, "grad_norm": 0.2858765055478748, "learning_rate": 2.9269284236907543e-06, "loss": 0.0229, "step": 111920 }, { "epoch": 0.46701187505737246, "grad_norm": 0.9462694731186851, "learning_rate": 2.926863045373476e-06, "loss": 0.033, "step": 111925 }, { "epoch": 0.46703273777236276, "grad_norm": 0.5669231214145344, "learning_rate": 2.926797671437036e-06, "loss": 0.0248, "step": 111930 }, { "epoch": 0.467053600487353, "grad_norm": 0.6360334387489213, "learning_rate": 2.9267323018809452e-06, "loss": 0.0218, "step": 111935 }, { "epoch": 0.4670744632023433, "grad_norm": 0.4585375796113266, "learning_rate": 2.9266669367047147e-06, "loss": 0.0266, "step": 111940 }, { "epoch": 0.4670953259173336, "grad_norm": 0.8604334350062826, "learning_rate": 2.926601575907855e-06, "loss": 0.0194, "step": 111945 }, { "epoch": 0.46711618863232385, "grad_norm": 0.6529800279643012, "learning_rate": 2.9265362194898773e-06, "loss": 0.0272, "step": 111950 }, { "epoch": 0.46713705134731415, "grad_norm": 1.2007734305747702, "learning_rate": 2.926470867450292e-06, "loss": 0.031, "step": 111955 }, { "epoch": 0.4671579140623044, "grad_norm": 0.8118508511230496, "learning_rate": 2.9264055197886117e-06, "loss": 0.0309, "step": 111960 }, { "epoch": 0.4671787767772947, "grad_norm": 0.4512606483718806, "learning_rate": 2.9263401765043457e-06, "loss": 0.0198, "step": 111965 }, { "epoch": 0.467199639492285, "grad_norm": 1.0428221835381792, "learning_rate": 2.9262748375970074e-06, "loss": 0.0221, "step": 111970 }, { "epoch": 0.46722050220727523, "grad_norm": 0.32600393015418644, "learning_rate": 2.926209503066107e-06, "loss": 0.0853, "step": 111975 }, { "epoch": 0.46724136492226553, "grad_norm": 0.8650797166657377, "learning_rate": 2.9261441729111554e-06, "loss": 0.0396, "step": 111980 }, { "epoch": 0.4672622276372558, "grad_norm": 0.7589974685676307, "learning_rate": 2.926078847131665e-06, "loss": 0.0229, "step": 111985 }, { "epoch": 0.4672830903522461, "grad_norm": 0.9280759654863722, "learning_rate": 2.9260135257271484e-06, "loss": 0.0213, "step": 111990 }, { "epoch": 0.4673039530672364, "grad_norm": 0.7684736987581505, "learning_rate": 2.925948208697115e-06, "loss": 0.0265, "step": 111995 }, { "epoch": 0.4673248157822266, "grad_norm": 0.3704431916894166, "learning_rate": 2.9258828960410786e-06, "loss": 0.0281, "step": 112000 }, { "epoch": 0.4673456784972169, "grad_norm": 0.6542865191660241, "learning_rate": 2.9258175877585493e-06, "loss": 0.022, "step": 112005 }, { "epoch": 0.46736654121220716, "grad_norm": 0.5563808307963882, "learning_rate": 2.9257522838490402e-06, "loss": 0.0298, "step": 112010 }, { "epoch": 0.46738740392719746, "grad_norm": 0.7159682685440905, "learning_rate": 2.9256869843120635e-06, "loss": 0.0259, "step": 112015 }, { "epoch": 0.46740826664218776, "grad_norm": 0.2412240450627314, "learning_rate": 2.9256216891471303e-06, "loss": 0.0255, "step": 112020 }, { "epoch": 0.467429129357178, "grad_norm": 0.14336155561913072, "learning_rate": 2.925556398353753e-06, "loss": 0.0169, "step": 112025 }, { "epoch": 0.4674499920721683, "grad_norm": 0.7889604932392995, "learning_rate": 2.9254911119314443e-06, "loss": 0.0249, "step": 112030 }, { "epoch": 0.4674708547871586, "grad_norm": 0.6955952969673775, "learning_rate": 2.925425829879716e-06, "loss": 0.0282, "step": 112035 }, { "epoch": 0.46749171750214885, "grad_norm": 0.6020341695683142, "learning_rate": 2.9253605521980816e-06, "loss": 0.0273, "step": 112040 }, { "epoch": 0.46751258021713915, "grad_norm": 0.7635482434319711, "learning_rate": 2.9252952788860517e-06, "loss": 0.0246, "step": 112045 }, { "epoch": 0.4675334429321294, "grad_norm": 0.6799754152476671, "learning_rate": 2.92523000994314e-06, "loss": 0.0282, "step": 112050 }, { "epoch": 0.4675543056471197, "grad_norm": 0.8184388076829112, "learning_rate": 2.9251647453688586e-06, "loss": 0.0243, "step": 112055 }, { "epoch": 0.46757516836211, "grad_norm": 0.6726496798586795, "learning_rate": 2.9250994851627213e-06, "loss": 0.0318, "step": 112060 }, { "epoch": 0.46759603107710024, "grad_norm": 0.6595625863093851, "learning_rate": 2.9250342293242395e-06, "loss": 0.0302, "step": 112065 }, { "epoch": 0.46761689379209054, "grad_norm": 0.3578659970504125, "learning_rate": 2.9249689778529266e-06, "loss": 0.0206, "step": 112070 }, { "epoch": 0.4676377565070808, "grad_norm": 0.9895531098354122, "learning_rate": 2.924903730748296e-06, "loss": 0.0287, "step": 112075 }, { "epoch": 0.4676586192220711, "grad_norm": 0.28232193520547744, "learning_rate": 2.924838488009859e-06, "loss": 0.0322, "step": 112080 }, { "epoch": 0.4676794819370614, "grad_norm": 0.7239603336072895, "learning_rate": 2.9247732496371308e-06, "loss": 0.0225, "step": 112085 }, { "epoch": 0.4677003446520516, "grad_norm": 1.2354069142408985, "learning_rate": 2.924708015629623e-06, "loss": 0.0313, "step": 112090 }, { "epoch": 0.4677212073670419, "grad_norm": 0.42930404272607203, "learning_rate": 2.9246427859868497e-06, "loss": 0.0243, "step": 112095 }, { "epoch": 0.46774207008203217, "grad_norm": 1.086087503543159, "learning_rate": 2.9245775607083244e-06, "loss": 0.0241, "step": 112100 }, { "epoch": 0.46776293279702247, "grad_norm": 0.6016908787510707, "learning_rate": 2.9245123397935586e-06, "loss": 0.0193, "step": 112105 }, { "epoch": 0.46778379551201277, "grad_norm": 2.113844964639912, "learning_rate": 2.9244471232420683e-06, "loss": 0.0224, "step": 112110 }, { "epoch": 0.467804658227003, "grad_norm": 1.1562391400147214, "learning_rate": 2.924381911053365e-06, "loss": 0.0231, "step": 112115 }, { "epoch": 0.4678255209419933, "grad_norm": 0.881449114620484, "learning_rate": 2.9243167032269636e-06, "loss": 0.0336, "step": 112120 }, { "epoch": 0.4678463836569836, "grad_norm": 0.7080404911605417, "learning_rate": 2.924251499762377e-06, "loss": 0.019, "step": 112125 }, { "epoch": 0.46786724637197386, "grad_norm": 0.9962622271209756, "learning_rate": 2.9241863006591193e-06, "loss": 0.025, "step": 112130 }, { "epoch": 0.46788810908696415, "grad_norm": 0.9551008008556172, "learning_rate": 2.9241211059167043e-06, "loss": 0.0239, "step": 112135 }, { "epoch": 0.4679089718019544, "grad_norm": 0.7027787510726176, "learning_rate": 2.924055915534645e-06, "loss": 0.0262, "step": 112140 }, { "epoch": 0.4679298345169447, "grad_norm": 0.6597399915873974, "learning_rate": 2.923990729512457e-06, "loss": 0.0255, "step": 112145 }, { "epoch": 0.467950697231935, "grad_norm": 0.7942356820093041, "learning_rate": 2.9239255478496538e-06, "loss": 0.023, "step": 112150 }, { "epoch": 0.46797155994692524, "grad_norm": 0.8171812924489876, "learning_rate": 2.9238603705457485e-06, "loss": 0.028, "step": 112155 }, { "epoch": 0.46799242266191554, "grad_norm": 0.5318566621264086, "learning_rate": 2.923795197600256e-06, "loss": 0.0241, "step": 112160 }, { "epoch": 0.4680132853769058, "grad_norm": 0.6925344353015006, "learning_rate": 2.923730029012691e-06, "loss": 0.0216, "step": 112165 }, { "epoch": 0.4680341480918961, "grad_norm": 0.5904138441642581, "learning_rate": 2.923664864782568e-06, "loss": 0.0408, "step": 112170 }, { "epoch": 0.4680550108068864, "grad_norm": 0.5507306373225271, "learning_rate": 2.9235997049094e-06, "loss": 0.0179, "step": 112175 }, { "epoch": 0.46807587352187663, "grad_norm": 0.8358609865941671, "learning_rate": 2.923534549392703e-06, "loss": 0.0321, "step": 112180 }, { "epoch": 0.46809673623686693, "grad_norm": 0.6599560289851655, "learning_rate": 2.9234693982319907e-06, "loss": 0.0247, "step": 112185 }, { "epoch": 0.4681175989518572, "grad_norm": 0.4967989580778796, "learning_rate": 2.923404251426778e-06, "loss": 0.0315, "step": 112190 }, { "epoch": 0.46813846166684747, "grad_norm": 0.4784800747524165, "learning_rate": 2.92333910897658e-06, "loss": 0.0206, "step": 112195 }, { "epoch": 0.46815932438183777, "grad_norm": 0.345893018543194, "learning_rate": 2.9232739708809103e-06, "loss": 0.0192, "step": 112200 }, { "epoch": 0.468180187096828, "grad_norm": 0.7233710086726389, "learning_rate": 2.9232088371392857e-06, "loss": 0.0146, "step": 112205 }, { "epoch": 0.4682010498118183, "grad_norm": 0.2885200234030115, "learning_rate": 2.92314370775122e-06, "loss": 0.0192, "step": 112210 }, { "epoch": 0.4682219125268086, "grad_norm": 0.6989231922826838, "learning_rate": 2.923078582716228e-06, "loss": 0.0286, "step": 112215 }, { "epoch": 0.46824277524179886, "grad_norm": 0.4254769327467012, "learning_rate": 2.9230134620338256e-06, "loss": 0.0218, "step": 112220 }, { "epoch": 0.46826363795678916, "grad_norm": 1.1621432620748287, "learning_rate": 2.922948345703527e-06, "loss": 0.0342, "step": 112225 }, { "epoch": 0.4682845006717794, "grad_norm": 0.7311481910938136, "learning_rate": 2.9228832337248485e-06, "loss": 0.0271, "step": 112230 }, { "epoch": 0.4683053633867697, "grad_norm": 1.2347437444762985, "learning_rate": 2.9228181260973043e-06, "loss": 0.0382, "step": 112235 }, { "epoch": 0.46832622610176, "grad_norm": 0.7635553488450227, "learning_rate": 2.922753022820411e-06, "loss": 0.029, "step": 112240 }, { "epoch": 0.46834708881675025, "grad_norm": 0.7053702348565951, "learning_rate": 2.922687923893683e-06, "loss": 0.0358, "step": 112245 }, { "epoch": 0.46836795153174055, "grad_norm": 0.6349120747049832, "learning_rate": 2.922622829316637e-06, "loss": 0.0206, "step": 112250 }, { "epoch": 0.4683888142467308, "grad_norm": 1.317348374686017, "learning_rate": 2.9225577390887884e-06, "loss": 0.0218, "step": 112255 }, { "epoch": 0.4684096769617211, "grad_norm": 0.6774245657325405, "learning_rate": 2.9224926532096516e-06, "loss": 0.0213, "step": 112260 }, { "epoch": 0.4684305396767114, "grad_norm": 0.9928754177633377, "learning_rate": 2.9224275716787435e-06, "loss": 0.0288, "step": 112265 }, { "epoch": 0.46845140239170163, "grad_norm": 0.3779751327112712, "learning_rate": 2.92236249449558e-06, "loss": 0.035, "step": 112270 }, { "epoch": 0.46847226510669193, "grad_norm": 0.724719083785688, "learning_rate": 2.9222974216596767e-06, "loss": 0.0235, "step": 112275 }, { "epoch": 0.4684931278216822, "grad_norm": 0.6996082940597121, "learning_rate": 2.9222323531705506e-06, "loss": 0.0317, "step": 112280 }, { "epoch": 0.4685139905366725, "grad_norm": 0.6327714052016629, "learning_rate": 2.9221672890277157e-06, "loss": 0.0239, "step": 112285 }, { "epoch": 0.4685348532516628, "grad_norm": 0.5490376149527401, "learning_rate": 2.92210222923069e-06, "loss": 0.018, "step": 112290 }, { "epoch": 0.468555715966653, "grad_norm": 0.7600012221313204, "learning_rate": 2.9220371737789893e-06, "loss": 0.0197, "step": 112295 }, { "epoch": 0.4685765786816433, "grad_norm": 0.9008927746942634, "learning_rate": 2.92197212267213e-06, "loss": 0.0288, "step": 112300 }, { "epoch": 0.4685974413966336, "grad_norm": 0.8422978024290424, "learning_rate": 2.9219070759096273e-06, "loss": 0.0245, "step": 112305 }, { "epoch": 0.46861830411162386, "grad_norm": 0.8613859949707524, "learning_rate": 2.921842033490999e-06, "loss": 0.0204, "step": 112310 }, { "epoch": 0.46863916682661416, "grad_norm": 1.0224979356071007, "learning_rate": 2.921776995415762e-06, "loss": 0.0325, "step": 112315 }, { "epoch": 0.4686600295416044, "grad_norm": 0.8013670930446143, "learning_rate": 2.921711961683431e-06, "loss": 0.0422, "step": 112320 }, { "epoch": 0.4686808922565947, "grad_norm": 0.5880039382882705, "learning_rate": 2.9216469322935254e-06, "loss": 0.0203, "step": 112325 }, { "epoch": 0.468701754971585, "grad_norm": 1.0422874429365931, "learning_rate": 2.9215819072455593e-06, "loss": 0.0272, "step": 112330 }, { "epoch": 0.46872261768657525, "grad_norm": 0.8857669895815651, "learning_rate": 2.921516886539051e-06, "loss": 0.0267, "step": 112335 }, { "epoch": 0.46874348040156555, "grad_norm": 0.39798813239800596, "learning_rate": 2.9214518701735172e-06, "loss": 0.0222, "step": 112340 }, { "epoch": 0.4687643431165558, "grad_norm": 3.0695858396530276, "learning_rate": 2.921386858148475e-06, "loss": 0.0292, "step": 112345 }, { "epoch": 0.4687852058315461, "grad_norm": 0.8567236236163465, "learning_rate": 2.921321850463441e-06, "loss": 0.0244, "step": 112350 }, { "epoch": 0.4688060685465364, "grad_norm": 0.34410696321694517, "learning_rate": 2.9212568471179325e-06, "loss": 0.0235, "step": 112355 }, { "epoch": 0.46882693126152664, "grad_norm": 0.41219970433622866, "learning_rate": 2.921191848111467e-06, "loss": 0.0273, "step": 112360 }, { "epoch": 0.46884779397651694, "grad_norm": 0.7364192263793292, "learning_rate": 2.9211268534435617e-06, "loss": 0.0312, "step": 112365 }, { "epoch": 0.4688686566915072, "grad_norm": 0.4786518008110626, "learning_rate": 2.9210618631137338e-06, "loss": 0.0332, "step": 112370 }, { "epoch": 0.4688895194064975, "grad_norm": 0.4136480418836592, "learning_rate": 2.920996877121501e-06, "loss": 0.0279, "step": 112375 }, { "epoch": 0.4689103821214878, "grad_norm": 1.358045323882231, "learning_rate": 2.9209318954663806e-06, "loss": 0.038, "step": 112380 }, { "epoch": 0.468931244836478, "grad_norm": 0.5324394050286344, "learning_rate": 2.9208669181478897e-06, "loss": 0.0242, "step": 112385 }, { "epoch": 0.4689521075514683, "grad_norm": 0.7982403495246234, "learning_rate": 2.9208019451655477e-06, "loss": 0.025, "step": 112390 }, { "epoch": 0.4689729702664586, "grad_norm": 1.0371782144093058, "learning_rate": 2.9207369765188705e-06, "loss": 0.0229, "step": 112395 }, { "epoch": 0.46899383298144887, "grad_norm": 0.49693443845697793, "learning_rate": 2.920672012207377e-06, "loss": 0.0308, "step": 112400 }, { "epoch": 0.46901469569643917, "grad_norm": 0.9602439798626379, "learning_rate": 2.920607052230584e-06, "loss": 0.0289, "step": 112405 }, { "epoch": 0.4690355584114294, "grad_norm": 0.36528030268597966, "learning_rate": 2.9205420965880103e-06, "loss": 0.0202, "step": 112410 }, { "epoch": 0.4690564211264197, "grad_norm": 0.3730387695493126, "learning_rate": 2.9204771452791743e-06, "loss": 0.0193, "step": 112415 }, { "epoch": 0.46907728384141, "grad_norm": 1.197463339549856, "learning_rate": 2.920412198303594e-06, "loss": 0.0265, "step": 112420 }, { "epoch": 0.46909814655640025, "grad_norm": 0.5715348241254409, "learning_rate": 2.9203472556607863e-06, "loss": 0.0275, "step": 112425 }, { "epoch": 0.46911900927139055, "grad_norm": 0.7728537270267516, "learning_rate": 2.920282317350271e-06, "loss": 0.0252, "step": 112430 }, { "epoch": 0.4691398719863808, "grad_norm": 1.620450546333115, "learning_rate": 2.920217383371566e-06, "loss": 0.0328, "step": 112435 }, { "epoch": 0.4691607347013711, "grad_norm": 0.7081834754394376, "learning_rate": 2.920152453724189e-06, "loss": 0.0267, "step": 112440 }, { "epoch": 0.4691815974163614, "grad_norm": 0.9486681607754529, "learning_rate": 2.9200875284076595e-06, "loss": 0.028, "step": 112445 }, { "epoch": 0.46920246013135164, "grad_norm": 0.5550210832662713, "learning_rate": 2.9200226074214954e-06, "loss": 0.0276, "step": 112450 }, { "epoch": 0.46922332284634194, "grad_norm": 1.3822938731867034, "learning_rate": 2.9199576907652155e-06, "loss": 0.0209, "step": 112455 }, { "epoch": 0.4692441855613322, "grad_norm": 0.9904762054478141, "learning_rate": 2.919892778438339e-06, "loss": 0.0209, "step": 112460 }, { "epoch": 0.4692650482763225, "grad_norm": 0.5608222595585328, "learning_rate": 2.919827870440384e-06, "loss": 0.0231, "step": 112465 }, { "epoch": 0.4692859109913128, "grad_norm": 0.6902718613144525, "learning_rate": 2.91976296677087e-06, "loss": 0.0275, "step": 112470 }, { "epoch": 0.469306773706303, "grad_norm": 1.0570005937037292, "learning_rate": 2.9196980674293153e-06, "loss": 0.025, "step": 112475 }, { "epoch": 0.4693276364212933, "grad_norm": 1.1227458790261144, "learning_rate": 2.9196331724152393e-06, "loss": 0.0274, "step": 112480 }, { "epoch": 0.4693484991362836, "grad_norm": 0.9871736386595594, "learning_rate": 2.919568281728161e-06, "loss": 0.0301, "step": 112485 }, { "epoch": 0.46936936185127387, "grad_norm": 0.5608578017593518, "learning_rate": 2.9195033953676e-06, "loss": 0.0272, "step": 112490 }, { "epoch": 0.46939022456626417, "grad_norm": 1.0877376530450742, "learning_rate": 2.9194385133330747e-06, "loss": 0.034, "step": 112495 }, { "epoch": 0.4694110872812544, "grad_norm": 0.7703104777185268, "learning_rate": 2.9193736356241054e-06, "loss": 0.0186, "step": 112500 }, { "epoch": 0.4694319499962447, "grad_norm": 0.5444667641864522, "learning_rate": 2.9193087622402105e-06, "loss": 0.0258, "step": 112505 }, { "epoch": 0.469452812711235, "grad_norm": 1.0088897974040099, "learning_rate": 2.9192438931809104e-06, "loss": 0.0264, "step": 112510 }, { "epoch": 0.46947367542622526, "grad_norm": 0.4769107340797997, "learning_rate": 2.919179028445724e-06, "loss": 0.0198, "step": 112515 }, { "epoch": 0.46949453814121556, "grad_norm": 1.0958758414667555, "learning_rate": 2.9191141680341713e-06, "loss": 0.0242, "step": 112520 }, { "epoch": 0.4695154008562058, "grad_norm": 0.42781414413829044, "learning_rate": 2.919049311945771e-06, "loss": 0.0221, "step": 112525 }, { "epoch": 0.4695362635711961, "grad_norm": 0.6457989982754919, "learning_rate": 2.9189844601800444e-06, "loss": 0.026, "step": 112530 }, { "epoch": 0.4695571262861864, "grad_norm": 0.7695030629336151, "learning_rate": 2.91891961273651e-06, "loss": 0.0285, "step": 112535 }, { "epoch": 0.46957798900117664, "grad_norm": 0.6137635273395466, "learning_rate": 2.918854769614689e-06, "loss": 0.0247, "step": 112540 }, { "epoch": 0.46959885171616694, "grad_norm": 0.8596267664573671, "learning_rate": 2.918789930814101e-06, "loss": 0.0178, "step": 112545 }, { "epoch": 0.4696197144311572, "grad_norm": 1.1799890801528665, "learning_rate": 2.918725096334265e-06, "loss": 0.0275, "step": 112550 }, { "epoch": 0.4696405771461475, "grad_norm": 0.6747416941548291, "learning_rate": 2.918660266174702e-06, "loss": 0.0312, "step": 112555 }, { "epoch": 0.4696614398611378, "grad_norm": 0.6221322696237918, "learning_rate": 2.918595440334932e-06, "loss": 0.0266, "step": 112560 }, { "epoch": 0.46968230257612803, "grad_norm": 0.8892998591806933, "learning_rate": 2.918530618814476e-06, "loss": 0.0246, "step": 112565 }, { "epoch": 0.46970316529111833, "grad_norm": 0.30863443444547817, "learning_rate": 2.9184658016128538e-06, "loss": 0.0239, "step": 112570 }, { "epoch": 0.46972402800610863, "grad_norm": 0.6650810843253819, "learning_rate": 2.918400988729585e-06, "loss": 0.0257, "step": 112575 }, { "epoch": 0.4697448907210989, "grad_norm": 0.6699104739251257, "learning_rate": 2.918336180164192e-06, "loss": 0.0225, "step": 112580 }, { "epoch": 0.4697657534360892, "grad_norm": 0.5468116591316002, "learning_rate": 2.9182713759161933e-06, "loss": 0.0204, "step": 112585 }, { "epoch": 0.4697866161510794, "grad_norm": 1.113570183682937, "learning_rate": 2.918206575985112e-06, "loss": 0.0151, "step": 112590 }, { "epoch": 0.4698074788660697, "grad_norm": 3.977101664860417, "learning_rate": 2.918141780370466e-06, "loss": 0.0262, "step": 112595 }, { "epoch": 0.46982834158106, "grad_norm": 0.39283122375090535, "learning_rate": 2.9180769890717788e-06, "loss": 0.0178, "step": 112600 }, { "epoch": 0.46984920429605026, "grad_norm": 0.5856291239667337, "learning_rate": 2.9180122020885686e-06, "loss": 0.0308, "step": 112605 }, { "epoch": 0.46987006701104056, "grad_norm": 1.0969024823310614, "learning_rate": 2.9179474194203594e-06, "loss": 0.0227, "step": 112610 }, { "epoch": 0.4698909297260308, "grad_norm": 0.8372713312511229, "learning_rate": 2.9178826410666696e-06, "loss": 0.0277, "step": 112615 }, { "epoch": 0.4699117924410211, "grad_norm": 0.6271607853681618, "learning_rate": 2.917817867027021e-06, "loss": 0.0252, "step": 112620 }, { "epoch": 0.4699326551560114, "grad_norm": 0.6631369274246677, "learning_rate": 2.9177530973009362e-06, "loss": 0.0219, "step": 112625 }, { "epoch": 0.46995351787100165, "grad_norm": 0.42910753663875445, "learning_rate": 2.917688331887935e-06, "loss": 0.0213, "step": 112630 }, { "epoch": 0.46997438058599195, "grad_norm": 0.2384732945076455, "learning_rate": 2.9176235707875394e-06, "loss": 0.0382, "step": 112635 }, { "epoch": 0.4699952433009822, "grad_norm": 0.5606645450474718, "learning_rate": 2.91755881399927e-06, "loss": 0.0233, "step": 112640 }, { "epoch": 0.4700161060159725, "grad_norm": 0.9070489425872511, "learning_rate": 2.917494061522649e-06, "loss": 0.0198, "step": 112645 }, { "epoch": 0.4700369687309628, "grad_norm": 1.120483040023941, "learning_rate": 2.9174293133571978e-06, "loss": 0.0269, "step": 112650 }, { "epoch": 0.47005783144595303, "grad_norm": 0.919863661908915, "learning_rate": 2.917364569502438e-06, "loss": 0.0252, "step": 112655 }, { "epoch": 0.47007869416094333, "grad_norm": 1.9576572408100446, "learning_rate": 2.9172998299578915e-06, "loss": 0.0269, "step": 112660 }, { "epoch": 0.47009955687593363, "grad_norm": 0.6157387093766125, "learning_rate": 2.9172350947230792e-06, "loss": 0.0291, "step": 112665 }, { "epoch": 0.4701204195909239, "grad_norm": 0.5291069526914631, "learning_rate": 2.9171703637975247e-06, "loss": 0.0255, "step": 112670 }, { "epoch": 0.4701412823059142, "grad_norm": 0.8221591077635025, "learning_rate": 2.9171056371807484e-06, "loss": 0.0287, "step": 112675 }, { "epoch": 0.4701621450209044, "grad_norm": 0.5459733680947532, "learning_rate": 2.917040914872272e-06, "loss": 0.0267, "step": 112680 }, { "epoch": 0.4701830077358947, "grad_norm": 0.6513368831197446, "learning_rate": 2.916976196871619e-06, "loss": 0.0195, "step": 112685 }, { "epoch": 0.470203870450885, "grad_norm": 0.9002804148913452, "learning_rate": 2.9169114831783117e-06, "loss": 0.0238, "step": 112690 }, { "epoch": 0.47022473316587526, "grad_norm": 0.6184749641701861, "learning_rate": 2.9168467737918703e-06, "loss": 0.023, "step": 112695 }, { "epoch": 0.47024559588086556, "grad_norm": 0.7023134888890082, "learning_rate": 2.916782068711819e-06, "loss": 0.0283, "step": 112700 }, { "epoch": 0.4702664585958558, "grad_norm": 0.5268935906553257, "learning_rate": 2.9167173679376794e-06, "loss": 0.0221, "step": 112705 }, { "epoch": 0.4702873213108461, "grad_norm": 0.8984104101222453, "learning_rate": 2.916652671468974e-06, "loss": 0.027, "step": 112710 }, { "epoch": 0.4703081840258364, "grad_norm": 0.4940119305576545, "learning_rate": 2.916587979305225e-06, "loss": 0.0249, "step": 112715 }, { "epoch": 0.47032904674082665, "grad_norm": 0.9580525800356158, "learning_rate": 2.916523291445956e-06, "loss": 0.0209, "step": 112720 }, { "epoch": 0.47034990945581695, "grad_norm": 0.650678803090362, "learning_rate": 2.916458607890688e-06, "loss": 0.019, "step": 112725 }, { "epoch": 0.4703707721708072, "grad_norm": 1.1973892638472896, "learning_rate": 2.916393928638946e-06, "loss": 0.0203, "step": 112730 }, { "epoch": 0.4703916348857975, "grad_norm": 0.9105910753505762, "learning_rate": 2.916329253690251e-06, "loss": 0.018, "step": 112735 }, { "epoch": 0.4704124976007878, "grad_norm": 0.5808731671468624, "learning_rate": 2.916264583044126e-06, "loss": 0.0229, "step": 112740 }, { "epoch": 0.47043336031577804, "grad_norm": 0.745033352521879, "learning_rate": 2.916199916700095e-06, "loss": 0.0193, "step": 112745 }, { "epoch": 0.47045422303076834, "grad_norm": 0.43544219360972, "learning_rate": 2.9161352546576805e-06, "loss": 0.0262, "step": 112750 }, { "epoch": 0.47047508574575864, "grad_norm": 1.0184885773663945, "learning_rate": 2.9160705969164053e-06, "loss": 0.0265, "step": 112755 }, { "epoch": 0.4704959484607489, "grad_norm": 0.9108173789689187, "learning_rate": 2.916005943475794e-06, "loss": 0.0302, "step": 112760 }, { "epoch": 0.4705168111757392, "grad_norm": 0.40866940945297814, "learning_rate": 2.915941294335367e-06, "loss": 0.0288, "step": 112765 }, { "epoch": 0.4705376738907294, "grad_norm": 0.8314164172240627, "learning_rate": 2.9158766494946505e-06, "loss": 0.035, "step": 112770 }, { "epoch": 0.4705585366057197, "grad_norm": 1.2068342567269683, "learning_rate": 2.9158120089531666e-06, "loss": 0.027, "step": 112775 }, { "epoch": 0.47057939932071, "grad_norm": 0.4002471698127692, "learning_rate": 2.915747372710439e-06, "loss": 0.0247, "step": 112780 }, { "epoch": 0.47060026203570027, "grad_norm": 1.1267947732273462, "learning_rate": 2.9156827407659918e-06, "loss": 0.0272, "step": 112785 }, { "epoch": 0.47062112475069057, "grad_norm": 0.6706416551305361, "learning_rate": 2.9156181131193466e-06, "loss": 0.0256, "step": 112790 }, { "epoch": 0.4706419874656808, "grad_norm": 0.8380537270450692, "learning_rate": 2.9155534897700294e-06, "loss": 0.0243, "step": 112795 }, { "epoch": 0.4706628501806711, "grad_norm": 0.7581870407735983, "learning_rate": 2.915488870717563e-06, "loss": 0.0253, "step": 112800 }, { "epoch": 0.4706837128956614, "grad_norm": 0.49153385155233914, "learning_rate": 2.9154242559614723e-06, "loss": 0.0227, "step": 112805 }, { "epoch": 0.47070457561065165, "grad_norm": 0.9328024562319828, "learning_rate": 2.9153596455012793e-06, "loss": 0.0246, "step": 112810 }, { "epoch": 0.47072543832564195, "grad_norm": 0.8473979043793005, "learning_rate": 2.9152950393365094e-06, "loss": 0.03, "step": 112815 }, { "epoch": 0.4707463010406322, "grad_norm": 0.8945556663084194, "learning_rate": 2.915230437466686e-06, "loss": 0.0379, "step": 112820 }, { "epoch": 0.4707671637556225, "grad_norm": 0.8894095900891815, "learning_rate": 2.915165839891334e-06, "loss": 0.0259, "step": 112825 }, { "epoch": 0.4707880264706128, "grad_norm": 1.1334914300182923, "learning_rate": 2.9151012466099776e-06, "loss": 0.0291, "step": 112830 }, { "epoch": 0.47080888918560304, "grad_norm": 0.7329183015052969, "learning_rate": 2.91503665762214e-06, "loss": 0.0252, "step": 112835 }, { "epoch": 0.47082975190059334, "grad_norm": 0.39376572827806305, "learning_rate": 2.914972072927346e-06, "loss": 0.0211, "step": 112840 }, { "epoch": 0.47085061461558364, "grad_norm": 0.5460824334477182, "learning_rate": 2.914907492525121e-06, "loss": 0.0295, "step": 112845 }, { "epoch": 0.4708714773305739, "grad_norm": 0.5495645194754374, "learning_rate": 2.9148429164149884e-06, "loss": 0.0214, "step": 112850 }, { "epoch": 0.4708923400455642, "grad_norm": 0.7892274734430827, "learning_rate": 2.9147783445964732e-06, "loss": 0.0269, "step": 112855 }, { "epoch": 0.47091320276055443, "grad_norm": 0.7387289188323803, "learning_rate": 2.9147137770691e-06, "loss": 0.0237, "step": 112860 }, { "epoch": 0.4709340654755447, "grad_norm": 0.4701883979255168, "learning_rate": 2.914649213832394e-06, "loss": 0.0201, "step": 112865 }, { "epoch": 0.470954928190535, "grad_norm": 0.4921847538038474, "learning_rate": 2.914584654885879e-06, "loss": 0.0196, "step": 112870 }, { "epoch": 0.47097579090552527, "grad_norm": 0.7079765976136417, "learning_rate": 2.914520100229081e-06, "loss": 0.0333, "step": 112875 }, { "epoch": 0.47099665362051557, "grad_norm": 0.5127643174884189, "learning_rate": 2.9144555498615246e-06, "loss": 0.0224, "step": 112880 }, { "epoch": 0.4710175163355058, "grad_norm": 0.5965557293407494, "learning_rate": 2.9143910037827343e-06, "loss": 0.0187, "step": 112885 }, { "epoch": 0.4710383790504961, "grad_norm": 0.3734357092867418, "learning_rate": 2.9143264619922355e-06, "loss": 0.0209, "step": 112890 }, { "epoch": 0.4710592417654864, "grad_norm": 0.7909954129691742, "learning_rate": 2.9142619244895543e-06, "loss": 0.0223, "step": 112895 }, { "epoch": 0.47108010448047666, "grad_norm": 1.0528162537253607, "learning_rate": 2.9141973912742146e-06, "loss": 0.0279, "step": 112900 }, { "epoch": 0.47110096719546696, "grad_norm": 0.6739460003957316, "learning_rate": 2.914132862345742e-06, "loss": 0.0271, "step": 112905 }, { "epoch": 0.4711218299104572, "grad_norm": 0.5573819672435911, "learning_rate": 2.914068337703663e-06, "loss": 0.0257, "step": 112910 }, { "epoch": 0.4711426926254475, "grad_norm": 0.5936821000405886, "learning_rate": 2.9140038173475018e-06, "loss": 0.0259, "step": 112915 }, { "epoch": 0.4711635553404378, "grad_norm": 0.41753843279985126, "learning_rate": 2.9139393012767836e-06, "loss": 0.0241, "step": 112920 }, { "epoch": 0.47118441805542804, "grad_norm": 1.216888221686064, "learning_rate": 2.9138747894910357e-06, "loss": 0.0344, "step": 112925 }, { "epoch": 0.47120528077041834, "grad_norm": 0.46287767424008136, "learning_rate": 2.913810281989783e-06, "loss": 0.0284, "step": 112930 }, { "epoch": 0.47122614348540864, "grad_norm": 1.3014007754919674, "learning_rate": 2.9137457787725504e-06, "loss": 0.0237, "step": 112935 }, { "epoch": 0.4712470062003989, "grad_norm": 0.6084992792047756, "learning_rate": 2.913681279838865e-06, "loss": 0.0229, "step": 112940 }, { "epoch": 0.4712678689153892, "grad_norm": 1.0955995572919968, "learning_rate": 2.9136167851882524e-06, "loss": 0.0316, "step": 112945 }, { "epoch": 0.47128873163037943, "grad_norm": 1.3108615988085603, "learning_rate": 2.9135522948202382e-06, "loss": 0.0237, "step": 112950 }, { "epoch": 0.47130959434536973, "grad_norm": 0.6513301369351796, "learning_rate": 2.9134878087343487e-06, "loss": 0.0464, "step": 112955 }, { "epoch": 0.47133045706036003, "grad_norm": 0.5144513987493521, "learning_rate": 2.9134233269301104e-06, "loss": 0.035, "step": 112960 }, { "epoch": 0.4713513197753503, "grad_norm": 0.596248557338572, "learning_rate": 2.9133588494070493e-06, "loss": 0.0291, "step": 112965 }, { "epoch": 0.4713721824903406, "grad_norm": 0.3974590084284004, "learning_rate": 2.9132943761646908e-06, "loss": 0.0174, "step": 112970 }, { "epoch": 0.4713930452053308, "grad_norm": 1.0264110490607072, "learning_rate": 2.9132299072025625e-06, "loss": 0.0385, "step": 112975 }, { "epoch": 0.4714139079203211, "grad_norm": 1.3594855682754738, "learning_rate": 2.91316544252019e-06, "loss": 0.0332, "step": 112980 }, { "epoch": 0.4714347706353114, "grad_norm": 0.9968228386251895, "learning_rate": 2.9131009821171003e-06, "loss": 0.0232, "step": 112985 }, { "epoch": 0.47145563335030166, "grad_norm": 1.0102778143400866, "learning_rate": 2.9130365259928193e-06, "loss": 0.0303, "step": 112990 }, { "epoch": 0.47147649606529196, "grad_norm": 0.9970115588604158, "learning_rate": 2.9129720741468747e-06, "loss": 0.0211, "step": 112995 }, { "epoch": 0.4714973587802822, "grad_norm": 0.5006419656947214, "learning_rate": 2.9129076265787925e-06, "loss": 0.0265, "step": 113000 }, { "epoch": 0.4715182214952725, "grad_norm": 0.6674761950909667, "learning_rate": 2.9128431832880996e-06, "loss": 0.0258, "step": 113005 }, { "epoch": 0.4715390842102628, "grad_norm": 0.6997489639920247, "learning_rate": 2.9127787442743233e-06, "loss": 0.0269, "step": 113010 }, { "epoch": 0.47155994692525305, "grad_norm": 0.44177674823002333, "learning_rate": 2.91271430953699e-06, "loss": 0.0178, "step": 113015 }, { "epoch": 0.47158080964024335, "grad_norm": 0.392731290917671, "learning_rate": 2.9126498790756266e-06, "loss": 0.0222, "step": 113020 }, { "epoch": 0.47160167235523365, "grad_norm": 0.8989980986049645, "learning_rate": 2.9125854528897607e-06, "loss": 0.0267, "step": 113025 }, { "epoch": 0.4716225350702239, "grad_norm": 0.6401489470755646, "learning_rate": 2.9125210309789193e-06, "loss": 0.0209, "step": 113030 }, { "epoch": 0.4716433977852142, "grad_norm": 0.621110002828674, "learning_rate": 2.912456613342629e-06, "loss": 0.0255, "step": 113035 }, { "epoch": 0.47166426050020444, "grad_norm": 1.2338519586481187, "learning_rate": 2.9123921999804182e-06, "loss": 0.0307, "step": 113040 }, { "epoch": 0.47168512321519473, "grad_norm": 0.8204568310474084, "learning_rate": 2.912327790891814e-06, "loss": 0.0136, "step": 113045 }, { "epoch": 0.47170598593018503, "grad_norm": 0.5536706878369558, "learning_rate": 2.912263386076344e-06, "loss": 0.0224, "step": 113050 }, { "epoch": 0.4717268486451753, "grad_norm": 0.562721725299126, "learning_rate": 2.912198985533534e-06, "loss": 0.0313, "step": 113055 }, { "epoch": 0.4717477113601656, "grad_norm": 0.5071648849668963, "learning_rate": 2.912134589262914e-06, "loss": 0.0305, "step": 113060 }, { "epoch": 0.4717685740751558, "grad_norm": 0.6291184055274602, "learning_rate": 2.91207019726401e-06, "loss": 0.0245, "step": 113065 }, { "epoch": 0.4717894367901461, "grad_norm": 1.0636649118185755, "learning_rate": 2.9120058095363506e-06, "loss": 0.0259, "step": 113070 }, { "epoch": 0.4718102995051364, "grad_norm": 0.46476750748652607, "learning_rate": 2.9119414260794637e-06, "loss": 0.0268, "step": 113075 }, { "epoch": 0.47183116222012667, "grad_norm": 1.0242884307556435, "learning_rate": 2.911877046892877e-06, "loss": 0.0282, "step": 113080 }, { "epoch": 0.47185202493511696, "grad_norm": 0.6413022633839629, "learning_rate": 2.911812671976118e-06, "loss": 0.0218, "step": 113085 }, { "epoch": 0.4718728876501072, "grad_norm": 0.45723671359480683, "learning_rate": 2.911748301328715e-06, "loss": 0.023, "step": 113090 }, { "epoch": 0.4718937503650975, "grad_norm": 0.6149002515495617, "learning_rate": 2.9116839349501964e-06, "loss": 0.0232, "step": 113095 }, { "epoch": 0.4719146130800878, "grad_norm": 0.47413624988464215, "learning_rate": 2.91161957284009e-06, "loss": 0.0173, "step": 113100 }, { "epoch": 0.47193547579507805, "grad_norm": 0.5146915032080825, "learning_rate": 2.911555214997925e-06, "loss": 0.0217, "step": 113105 }, { "epoch": 0.47195633851006835, "grad_norm": 0.6021814731573167, "learning_rate": 2.911490861423228e-06, "loss": 0.0244, "step": 113110 }, { "epoch": 0.47197720122505865, "grad_norm": 0.7037679364891317, "learning_rate": 2.9114265121155287e-06, "loss": 0.0229, "step": 113115 }, { "epoch": 0.4719980639400489, "grad_norm": 0.6468562499456691, "learning_rate": 2.911362167074356e-06, "loss": 0.0237, "step": 113120 }, { "epoch": 0.4720189266550392, "grad_norm": 0.9273583913628861, "learning_rate": 2.9112978262992363e-06, "loss": 0.0221, "step": 113125 }, { "epoch": 0.47203978937002944, "grad_norm": 0.533929609973582, "learning_rate": 2.911233489789701e-06, "loss": 0.0215, "step": 113130 }, { "epoch": 0.47206065208501974, "grad_norm": 1.977013876579535, "learning_rate": 2.911169157545277e-06, "loss": 0.0361, "step": 113135 }, { "epoch": 0.47208151480001004, "grad_norm": 0.9315426057890484, "learning_rate": 2.9111048295654936e-06, "loss": 0.0312, "step": 113140 }, { "epoch": 0.4721023775150003, "grad_norm": 0.3670269206056473, "learning_rate": 2.91104050584988e-06, "loss": 0.0207, "step": 113145 }, { "epoch": 0.4721232402299906, "grad_norm": 0.6610838101774489, "learning_rate": 2.9109761863979636e-06, "loss": 0.0281, "step": 113150 }, { "epoch": 0.4721441029449808, "grad_norm": 0.8255533196292724, "learning_rate": 2.910911871209275e-06, "loss": 0.0239, "step": 113155 }, { "epoch": 0.4721649656599711, "grad_norm": 0.616045480363805, "learning_rate": 2.9108475602833426e-06, "loss": 0.0268, "step": 113160 }, { "epoch": 0.4721858283749614, "grad_norm": 0.9630346681113404, "learning_rate": 2.9107832536196964e-06, "loss": 0.0255, "step": 113165 }, { "epoch": 0.47220669108995167, "grad_norm": 1.0519056561887228, "learning_rate": 2.9107189512178642e-06, "loss": 0.0292, "step": 113170 }, { "epoch": 0.47222755380494197, "grad_norm": 0.9823024721873811, "learning_rate": 2.9106546530773757e-06, "loss": 0.0276, "step": 113175 }, { "epoch": 0.4722484165199322, "grad_norm": 1.9483729028187315, "learning_rate": 2.9105903591977606e-06, "loss": 0.0312, "step": 113180 }, { "epoch": 0.4722692792349225, "grad_norm": 0.4396408572607647, "learning_rate": 2.910526069578548e-06, "loss": 0.024, "step": 113185 }, { "epoch": 0.4722901419499128, "grad_norm": 0.8200452997029427, "learning_rate": 2.9104617842192682e-06, "loss": 0.0223, "step": 113190 }, { "epoch": 0.47231100466490306, "grad_norm": 0.4595105942795118, "learning_rate": 2.9103975031194503e-06, "loss": 0.0386, "step": 113195 }, { "epoch": 0.47233186737989336, "grad_norm": 0.6319538658973985, "learning_rate": 2.910333226278624e-06, "loss": 0.0285, "step": 113200 }, { "epoch": 0.47235273009488365, "grad_norm": 1.156605955424709, "learning_rate": 2.9102689536963185e-06, "loss": 0.0247, "step": 113205 }, { "epoch": 0.4723735928098739, "grad_norm": 0.6631626433348377, "learning_rate": 2.910204685372064e-06, "loss": 0.0258, "step": 113210 }, { "epoch": 0.4723944555248642, "grad_norm": 0.879178316114465, "learning_rate": 2.9101404213053904e-06, "loss": 0.0326, "step": 113215 }, { "epoch": 0.47241531823985444, "grad_norm": 0.30168168931117534, "learning_rate": 2.9100761614958265e-06, "loss": 0.0313, "step": 113220 }, { "epoch": 0.47243618095484474, "grad_norm": 1.0697719647337807, "learning_rate": 2.9100119059429048e-06, "loss": 0.0308, "step": 113225 }, { "epoch": 0.47245704366983504, "grad_norm": 0.6314886516651282, "learning_rate": 2.9099476546461537e-06, "loss": 0.0253, "step": 113230 }, { "epoch": 0.4724779063848253, "grad_norm": 1.2591376425190783, "learning_rate": 2.909883407605103e-06, "loss": 0.0226, "step": 113235 }, { "epoch": 0.4724987690998156, "grad_norm": 0.39399287636762625, "learning_rate": 2.9098191648192843e-06, "loss": 0.0304, "step": 113240 }, { "epoch": 0.47251963181480583, "grad_norm": 0.691457197261605, "learning_rate": 2.9097549262882257e-06, "loss": 0.0215, "step": 113245 }, { "epoch": 0.47254049452979613, "grad_norm": 0.9335403288096892, "learning_rate": 2.9096906920114603e-06, "loss": 0.0255, "step": 113250 }, { "epoch": 0.47256135724478643, "grad_norm": 1.4844723144017586, "learning_rate": 2.909626461988517e-06, "loss": 0.027, "step": 113255 }, { "epoch": 0.4725822199597767, "grad_norm": 0.7932377871604934, "learning_rate": 2.909562236218926e-06, "loss": 0.0212, "step": 113260 }, { "epoch": 0.47260308267476697, "grad_norm": 0.6645121912522168, "learning_rate": 2.9094980147022187e-06, "loss": 0.0309, "step": 113265 }, { "epoch": 0.4726239453897572, "grad_norm": 0.986615947077364, "learning_rate": 2.9094337974379255e-06, "loss": 0.0326, "step": 113270 }, { "epoch": 0.4726448081047475, "grad_norm": 1.0313260566491231, "learning_rate": 2.9093695844255776e-06, "loss": 0.0247, "step": 113275 }, { "epoch": 0.4726656708197378, "grad_norm": 0.908065205839873, "learning_rate": 2.909305375664705e-06, "loss": 0.0223, "step": 113280 }, { "epoch": 0.47268653353472806, "grad_norm": 0.7006185009067095, "learning_rate": 2.9092411711548385e-06, "loss": 0.0289, "step": 113285 }, { "epoch": 0.47270739624971836, "grad_norm": 0.6426262556858611, "learning_rate": 2.9091769708955096e-06, "loss": 0.0262, "step": 113290 }, { "epoch": 0.47272825896470866, "grad_norm": 0.4048484078463744, "learning_rate": 2.9091127748862495e-06, "loss": 0.019, "step": 113295 }, { "epoch": 0.4727491216796989, "grad_norm": 0.33940412455858476, "learning_rate": 2.9090485831265884e-06, "loss": 0.0247, "step": 113300 }, { "epoch": 0.4727699843946892, "grad_norm": 0.9556786948893486, "learning_rate": 2.9089843956160583e-06, "loss": 0.0259, "step": 113305 }, { "epoch": 0.47279084710967945, "grad_norm": 0.6052080762046613, "learning_rate": 2.9089202123541905e-06, "loss": 0.0253, "step": 113310 }, { "epoch": 0.47281170982466975, "grad_norm": 1.5393205249949757, "learning_rate": 2.9088560333405164e-06, "loss": 0.0254, "step": 113315 }, { "epoch": 0.47283257253966005, "grad_norm": 0.8641720125556966, "learning_rate": 2.9087918585745656e-06, "loss": 0.0221, "step": 113320 }, { "epoch": 0.4728534352546503, "grad_norm": 0.5805453231570766, "learning_rate": 2.9087276880558723e-06, "loss": 0.0156, "step": 113325 }, { "epoch": 0.4728742979696406, "grad_norm": 0.543510732562068, "learning_rate": 2.9086635217839653e-06, "loss": 0.0247, "step": 113330 }, { "epoch": 0.47289516068463083, "grad_norm": 1.114837773717075, "learning_rate": 2.9085993597583786e-06, "loss": 0.0289, "step": 113335 }, { "epoch": 0.47291602339962113, "grad_norm": 0.9906458683397937, "learning_rate": 2.9085352019786427e-06, "loss": 0.0238, "step": 113340 }, { "epoch": 0.47293688611461143, "grad_norm": 0.8130307230129923, "learning_rate": 2.9084710484442897e-06, "loss": 0.0314, "step": 113345 }, { "epoch": 0.4729577488296017, "grad_norm": 0.6494414681540677, "learning_rate": 2.908406899154851e-06, "loss": 0.0227, "step": 113350 }, { "epoch": 0.472978611544592, "grad_norm": 1.2185574563727999, "learning_rate": 2.908342754109858e-06, "loss": 0.023, "step": 113355 }, { "epoch": 0.4729994742595822, "grad_norm": 0.7343406598904989, "learning_rate": 2.9082786133088435e-06, "loss": 0.0332, "step": 113360 }, { "epoch": 0.4730203369745725, "grad_norm": 0.6361169580081197, "learning_rate": 2.9082144767513398e-06, "loss": 0.0227, "step": 113365 }, { "epoch": 0.4730411996895628, "grad_norm": 0.6908697966482066, "learning_rate": 2.908150344436879e-06, "loss": 0.0322, "step": 113370 }, { "epoch": 0.47306206240455306, "grad_norm": 0.4288952086904391, "learning_rate": 2.9080862163649926e-06, "loss": 0.0227, "step": 113375 }, { "epoch": 0.47308292511954336, "grad_norm": 0.7402364547317533, "learning_rate": 2.9080220925352126e-06, "loss": 0.0308, "step": 113380 }, { "epoch": 0.47310378783453366, "grad_norm": 0.7552105746631735, "learning_rate": 2.9079579729470726e-06, "loss": 0.0245, "step": 113385 }, { "epoch": 0.4731246505495239, "grad_norm": 1.052375254812796, "learning_rate": 2.9078938576001036e-06, "loss": 0.0254, "step": 113390 }, { "epoch": 0.4731455132645142, "grad_norm": 0.6495302753291959, "learning_rate": 2.9078297464938393e-06, "loss": 0.0286, "step": 113395 }, { "epoch": 0.47316637597950445, "grad_norm": 0.8132101223980039, "learning_rate": 2.907765639627811e-06, "loss": 0.0225, "step": 113400 }, { "epoch": 0.47318723869449475, "grad_norm": 0.5430324460493817, "learning_rate": 2.9077015370015527e-06, "loss": 0.021, "step": 113405 }, { "epoch": 0.47320810140948505, "grad_norm": 1.1800453063686924, "learning_rate": 2.907637438614596e-06, "loss": 0.0282, "step": 113410 }, { "epoch": 0.4732289641244753, "grad_norm": 0.5854833953107643, "learning_rate": 2.907573344466474e-06, "loss": 0.0203, "step": 113415 }, { "epoch": 0.4732498268394656, "grad_norm": 0.8861411047174952, "learning_rate": 2.9075092545567196e-06, "loss": 0.0244, "step": 113420 }, { "epoch": 0.47327068955445584, "grad_norm": 0.46422319267313866, "learning_rate": 2.9074451688848655e-06, "loss": 0.0271, "step": 113425 }, { "epoch": 0.47329155226944614, "grad_norm": 1.1618877970480634, "learning_rate": 2.907381087450445e-06, "loss": 0.032, "step": 113430 }, { "epoch": 0.47331241498443644, "grad_norm": 0.5769249741006915, "learning_rate": 2.9073170102529907e-06, "loss": 0.0194, "step": 113435 }, { "epoch": 0.4733332776994267, "grad_norm": 0.7728926742985274, "learning_rate": 2.9072529372920366e-06, "loss": 0.0215, "step": 113440 }, { "epoch": 0.473354140414417, "grad_norm": 1.0613115432422884, "learning_rate": 2.907188868567115e-06, "loss": 0.03, "step": 113445 }, { "epoch": 0.4733750031294072, "grad_norm": 0.8881192087797048, "learning_rate": 2.9071248040777587e-06, "loss": 0.0242, "step": 113450 }, { "epoch": 0.4733958658443975, "grad_norm": 0.8389972859088395, "learning_rate": 2.907060743823503e-06, "loss": 0.0313, "step": 113455 }, { "epoch": 0.4734167285593878, "grad_norm": 0.530695961280762, "learning_rate": 2.906996687803879e-06, "loss": 0.0192, "step": 113460 }, { "epoch": 0.47343759127437807, "grad_norm": 0.45833305563689947, "learning_rate": 2.906932636018422e-06, "loss": 0.0236, "step": 113465 }, { "epoch": 0.47345845398936837, "grad_norm": 0.4883213202386286, "learning_rate": 2.9068685884666644e-06, "loss": 0.0212, "step": 113470 }, { "epoch": 0.47347931670435867, "grad_norm": 0.5986236578850683, "learning_rate": 2.9068045451481403e-06, "loss": 0.0202, "step": 113475 }, { "epoch": 0.4735001794193489, "grad_norm": 0.9128562915940523, "learning_rate": 2.906740506062384e-06, "loss": 0.0401, "step": 113480 }, { "epoch": 0.4735210421343392, "grad_norm": 0.5322023142936622, "learning_rate": 2.9066764712089274e-06, "loss": 0.0208, "step": 113485 }, { "epoch": 0.47354190484932945, "grad_norm": 1.0676176017106518, "learning_rate": 2.906612440587306e-06, "loss": 0.0332, "step": 113490 }, { "epoch": 0.47356276756431975, "grad_norm": 0.5628678066536134, "learning_rate": 2.9065484141970534e-06, "loss": 0.0277, "step": 113495 }, { "epoch": 0.47358363027931005, "grad_norm": 0.6390007323152981, "learning_rate": 2.9064843920377033e-06, "loss": 0.0215, "step": 113500 }, { "epoch": 0.4736044929943003, "grad_norm": 0.8156889509895954, "learning_rate": 2.9064203741087898e-06, "loss": 0.0242, "step": 113505 }, { "epoch": 0.4736253557092906, "grad_norm": 0.7420112251506451, "learning_rate": 2.906356360409847e-06, "loss": 0.0477, "step": 113510 }, { "epoch": 0.47364621842428084, "grad_norm": 0.3819214347827844, "learning_rate": 2.9062923509404095e-06, "loss": 0.0265, "step": 113515 }, { "epoch": 0.47366708113927114, "grad_norm": 1.1206865350444972, "learning_rate": 2.9062283457000114e-06, "loss": 0.0228, "step": 113520 }, { "epoch": 0.47368794385426144, "grad_norm": 0.9234582731343537, "learning_rate": 2.9061643446881864e-06, "loss": 0.0283, "step": 113525 }, { "epoch": 0.4737088065692517, "grad_norm": 0.7431640947908998, "learning_rate": 2.9061003479044695e-06, "loss": 0.025, "step": 113530 }, { "epoch": 0.473729669284242, "grad_norm": 0.6695924125705969, "learning_rate": 2.9060363553483957e-06, "loss": 0.024, "step": 113535 }, { "epoch": 0.4737505319992322, "grad_norm": 1.046171157310417, "learning_rate": 2.9059723670194983e-06, "loss": 0.0247, "step": 113540 }, { "epoch": 0.4737713947142225, "grad_norm": 0.3679684738511219, "learning_rate": 2.905908382917313e-06, "loss": 0.0285, "step": 113545 }, { "epoch": 0.4737922574292128, "grad_norm": 1.0449904315511795, "learning_rate": 2.9058444030413733e-06, "loss": 0.0218, "step": 113550 }, { "epoch": 0.47381312014420307, "grad_norm": 0.7525808422868897, "learning_rate": 2.905780427391216e-06, "loss": 0.0218, "step": 113555 }, { "epoch": 0.47383398285919337, "grad_norm": 0.44019296776898537, "learning_rate": 2.9057164559663736e-06, "loss": 0.0215, "step": 113560 }, { "epoch": 0.47385484557418367, "grad_norm": 0.6069771241226254, "learning_rate": 2.9056524887663823e-06, "loss": 0.0217, "step": 113565 }, { "epoch": 0.4738757082891739, "grad_norm": 1.1157003762100863, "learning_rate": 2.905588525790777e-06, "loss": 0.0359, "step": 113570 }, { "epoch": 0.4738965710041642, "grad_norm": 0.9472357500405616, "learning_rate": 2.905524567039093e-06, "loss": 0.0279, "step": 113575 }, { "epoch": 0.47391743371915446, "grad_norm": 1.022352653597771, "learning_rate": 2.905460612510864e-06, "loss": 0.0255, "step": 113580 }, { "epoch": 0.47393829643414476, "grad_norm": 0.5704060830633613, "learning_rate": 2.905396662205627e-06, "loss": 0.0332, "step": 113585 }, { "epoch": 0.47395915914913506, "grad_norm": 0.3801947575251764, "learning_rate": 2.905332716122917e-06, "loss": 0.0276, "step": 113590 }, { "epoch": 0.4739800218641253, "grad_norm": 0.4879129735810844, "learning_rate": 2.905268774262268e-06, "loss": 0.0176, "step": 113595 }, { "epoch": 0.4740008845791156, "grad_norm": 0.6263848216194369, "learning_rate": 2.905204836623217e-06, "loss": 0.0255, "step": 113600 }, { "epoch": 0.47402174729410584, "grad_norm": 0.4540205462221688, "learning_rate": 2.9051409032052985e-06, "loss": 0.0204, "step": 113605 }, { "epoch": 0.47404261000909614, "grad_norm": 0.42065160647895766, "learning_rate": 2.9050769740080486e-06, "loss": 0.0205, "step": 113610 }, { "epoch": 0.47406347272408644, "grad_norm": 0.6173881885865441, "learning_rate": 2.9050130490310026e-06, "loss": 0.029, "step": 113615 }, { "epoch": 0.4740843354390767, "grad_norm": 0.620748291107181, "learning_rate": 2.9049491282736957e-06, "loss": 0.0255, "step": 113620 }, { "epoch": 0.474105198154067, "grad_norm": 0.6401712728058244, "learning_rate": 2.9048852117356652e-06, "loss": 0.03, "step": 113625 }, { "epoch": 0.47412606086905723, "grad_norm": 0.34848844645243965, "learning_rate": 2.904821299416445e-06, "loss": 0.0179, "step": 113630 }, { "epoch": 0.47414692358404753, "grad_norm": 0.6251738884643103, "learning_rate": 2.9047573913155725e-06, "loss": 0.026, "step": 113635 }, { "epoch": 0.47416778629903783, "grad_norm": 0.9446950510953709, "learning_rate": 2.9046934874325832e-06, "loss": 0.0242, "step": 113640 }, { "epoch": 0.4741886490140281, "grad_norm": 1.3914407495342986, "learning_rate": 2.9046295877670133e-06, "loss": 0.0257, "step": 113645 }, { "epoch": 0.4742095117290184, "grad_norm": 1.1909025157939341, "learning_rate": 2.904565692318399e-06, "loss": 0.0379, "step": 113650 }, { "epoch": 0.4742303744440087, "grad_norm": 0.6673454738886185, "learning_rate": 2.9045018010862758e-06, "loss": 0.0201, "step": 113655 }, { "epoch": 0.4742512371589989, "grad_norm": 0.5097929957096416, "learning_rate": 2.9044379140701807e-06, "loss": 0.0223, "step": 113660 }, { "epoch": 0.4742720998739892, "grad_norm": 0.5799105652791812, "learning_rate": 2.9043740312696496e-06, "loss": 0.0271, "step": 113665 }, { "epoch": 0.47429296258897946, "grad_norm": 0.7685584761062113, "learning_rate": 2.9043101526842195e-06, "loss": 0.0278, "step": 113670 }, { "epoch": 0.47431382530396976, "grad_norm": 0.6422315478009665, "learning_rate": 2.9042462783134264e-06, "loss": 0.0289, "step": 113675 }, { "epoch": 0.47433468801896006, "grad_norm": 0.8493137246357417, "learning_rate": 2.9041824081568063e-06, "loss": 0.0222, "step": 113680 }, { "epoch": 0.4743555507339503, "grad_norm": 0.728859145067179, "learning_rate": 2.9041185422138974e-06, "loss": 0.0247, "step": 113685 }, { "epoch": 0.4743764134489406, "grad_norm": 0.5417022184941109, "learning_rate": 2.9040546804842345e-06, "loss": 0.025, "step": 113690 }, { "epoch": 0.47439727616393085, "grad_norm": 0.6798072660922535, "learning_rate": 2.9039908229673565e-06, "loss": 0.017, "step": 113695 }, { "epoch": 0.47441813887892115, "grad_norm": 0.5351901801564944, "learning_rate": 2.9039269696627988e-06, "loss": 0.0217, "step": 113700 }, { "epoch": 0.47443900159391145, "grad_norm": 0.8000189001382455, "learning_rate": 2.903863120570098e-06, "loss": 0.0282, "step": 113705 }, { "epoch": 0.4744598643089017, "grad_norm": 1.0061737685811538, "learning_rate": 2.9037992756887924e-06, "loss": 0.0273, "step": 113710 }, { "epoch": 0.474480727023892, "grad_norm": 0.8137064288919171, "learning_rate": 2.9037354350184176e-06, "loss": 0.0294, "step": 113715 }, { "epoch": 0.47450158973888223, "grad_norm": 0.44256205497019746, "learning_rate": 2.9036715985585123e-06, "loss": 0.0265, "step": 113720 }, { "epoch": 0.47452245245387253, "grad_norm": 0.7333609143191155, "learning_rate": 2.9036077663086127e-06, "loss": 0.0206, "step": 113725 }, { "epoch": 0.47454331516886283, "grad_norm": 0.690437374794718, "learning_rate": 2.9035439382682568e-06, "loss": 0.0241, "step": 113730 }, { "epoch": 0.4745641778838531, "grad_norm": 0.28464002715010156, "learning_rate": 2.90348011443698e-06, "loss": 0.0263, "step": 113735 }, { "epoch": 0.4745850405988434, "grad_norm": 0.7934496950130635, "learning_rate": 2.9034162948143223e-06, "loss": 0.0311, "step": 113740 }, { "epoch": 0.4746059033138336, "grad_norm": 0.7088258179701206, "learning_rate": 2.9033524793998196e-06, "loss": 0.0198, "step": 113745 }, { "epoch": 0.4746267660288239, "grad_norm": 0.6476475315295493, "learning_rate": 2.90328866819301e-06, "loss": 0.0243, "step": 113750 }, { "epoch": 0.4746476287438142, "grad_norm": 0.8654605965762319, "learning_rate": 2.9032248611934317e-06, "loss": 0.028, "step": 113755 }, { "epoch": 0.47466849145880446, "grad_norm": 0.8279817652414032, "learning_rate": 2.9031610584006203e-06, "loss": 0.0202, "step": 113760 }, { "epoch": 0.47468935417379476, "grad_norm": 0.7849197304798284, "learning_rate": 2.903097259814116e-06, "loss": 0.0244, "step": 113765 }, { "epoch": 0.47471021688878506, "grad_norm": 1.0381416421218166, "learning_rate": 2.9030334654334553e-06, "loss": 0.0226, "step": 113770 }, { "epoch": 0.4747310796037753, "grad_norm": 0.7364935912860253, "learning_rate": 2.9029696752581767e-06, "loss": 0.0199, "step": 113775 }, { "epoch": 0.4747519423187656, "grad_norm": 0.739540490192123, "learning_rate": 2.9029058892878174e-06, "loss": 0.0209, "step": 113780 }, { "epoch": 0.47477280503375585, "grad_norm": 1.519947191225627, "learning_rate": 2.902842107521916e-06, "loss": 0.025, "step": 113785 }, { "epoch": 0.47479366774874615, "grad_norm": 0.6606973453647395, "learning_rate": 2.902778329960011e-06, "loss": 0.0198, "step": 113790 }, { "epoch": 0.47481453046373645, "grad_norm": 0.5589382280853719, "learning_rate": 2.9027145566016403e-06, "loss": 0.0234, "step": 113795 }, { "epoch": 0.4748353931787267, "grad_norm": 1.7204643732662326, "learning_rate": 2.902650787446341e-06, "loss": 0.03, "step": 113800 }, { "epoch": 0.474856255893717, "grad_norm": 0.7879412558030598, "learning_rate": 2.9025870224936534e-06, "loss": 0.0223, "step": 113805 }, { "epoch": 0.47487711860870724, "grad_norm": 0.6838550669280559, "learning_rate": 2.902523261743115e-06, "loss": 0.0268, "step": 113810 }, { "epoch": 0.47489798132369754, "grad_norm": 0.5511068139982624, "learning_rate": 2.9024595051942637e-06, "loss": 0.0249, "step": 113815 }, { "epoch": 0.47491884403868784, "grad_norm": 1.0515629827224091, "learning_rate": 2.9023957528466397e-06, "loss": 0.0434, "step": 113820 }, { "epoch": 0.4749397067536781, "grad_norm": 1.2960066483905284, "learning_rate": 2.9023320046997794e-06, "loss": 0.0374, "step": 113825 }, { "epoch": 0.4749605694686684, "grad_norm": 0.4646421896654827, "learning_rate": 2.9022682607532234e-06, "loss": 0.0251, "step": 113830 }, { "epoch": 0.4749814321836586, "grad_norm": 0.2539302067452195, "learning_rate": 2.902204521006509e-06, "loss": 0.0299, "step": 113835 }, { "epoch": 0.4750022948986489, "grad_norm": 0.6000861742824737, "learning_rate": 2.902140785459176e-06, "loss": 0.0235, "step": 113840 }, { "epoch": 0.4750231576136392, "grad_norm": 0.34507735523560723, "learning_rate": 2.902077054110763e-06, "loss": 0.0212, "step": 113845 }, { "epoch": 0.47504402032862947, "grad_norm": 0.7820622701508438, "learning_rate": 2.9020133269608096e-06, "loss": 0.0232, "step": 113850 }, { "epoch": 0.47506488304361977, "grad_norm": 0.5311448856657859, "learning_rate": 2.901949604008854e-06, "loss": 0.025, "step": 113855 }, { "epoch": 0.47508574575861007, "grad_norm": 0.7378247568466395, "learning_rate": 2.901885885254435e-06, "loss": 0.0295, "step": 113860 }, { "epoch": 0.4751066084736003, "grad_norm": 0.6475323191836778, "learning_rate": 2.9018221706970934e-06, "loss": 0.0213, "step": 113865 }, { "epoch": 0.4751274711885906, "grad_norm": 0.5887634006666977, "learning_rate": 2.901758460336367e-06, "loss": 0.03, "step": 113870 }, { "epoch": 0.47514833390358086, "grad_norm": 0.7042001816196348, "learning_rate": 2.9016947541717947e-06, "loss": 0.0252, "step": 113875 }, { "epoch": 0.47516919661857115, "grad_norm": 0.989209648742047, "learning_rate": 2.9016310522029185e-06, "loss": 0.0283, "step": 113880 }, { "epoch": 0.47519005933356145, "grad_norm": 0.9225757759336347, "learning_rate": 2.9015673544292744e-06, "loss": 0.0279, "step": 113885 }, { "epoch": 0.4752109220485517, "grad_norm": 0.8290868360597603, "learning_rate": 2.9015036608504044e-06, "loss": 0.0234, "step": 113890 }, { "epoch": 0.475231784763542, "grad_norm": 0.737395396887892, "learning_rate": 2.9014399714658475e-06, "loss": 0.0289, "step": 113895 }, { "epoch": 0.47525264747853224, "grad_norm": 0.7193389157227357, "learning_rate": 2.9013762862751434e-06, "loss": 0.0252, "step": 113900 }, { "epoch": 0.47527351019352254, "grad_norm": 0.8155029363169398, "learning_rate": 2.9013126052778314e-06, "loss": 0.0278, "step": 113905 }, { "epoch": 0.47529437290851284, "grad_norm": 0.7706869608615385, "learning_rate": 2.901248928473452e-06, "loss": 0.0193, "step": 113910 }, { "epoch": 0.4753152356235031, "grad_norm": 0.8625769904119532, "learning_rate": 2.9011852558615446e-06, "loss": 0.0279, "step": 113915 }, { "epoch": 0.4753360983384934, "grad_norm": 0.7469485460065148, "learning_rate": 2.9011215874416495e-06, "loss": 0.0214, "step": 113920 }, { "epoch": 0.47535696105348363, "grad_norm": 0.4415110004673291, "learning_rate": 2.901057923213306e-06, "loss": 0.0242, "step": 113925 }, { "epoch": 0.47537782376847393, "grad_norm": 0.8627509628769813, "learning_rate": 2.9009942631760553e-06, "loss": 0.0292, "step": 113930 }, { "epoch": 0.47539868648346423, "grad_norm": 0.6450103844020606, "learning_rate": 2.9009306073294367e-06, "loss": 0.0277, "step": 113935 }, { "epoch": 0.47541954919845447, "grad_norm": 0.5599880904158684, "learning_rate": 2.900866955672991e-06, "loss": 0.0216, "step": 113940 }, { "epoch": 0.47544041191344477, "grad_norm": 0.8293001825986852, "learning_rate": 2.900803308206258e-06, "loss": 0.0208, "step": 113945 }, { "epoch": 0.47546127462843507, "grad_norm": 0.6887604279465251, "learning_rate": 2.9007396649287796e-06, "loss": 0.0208, "step": 113950 }, { "epoch": 0.4754821373434253, "grad_norm": 0.6721741064797316, "learning_rate": 2.900676025840094e-06, "loss": 0.0218, "step": 113955 }, { "epoch": 0.4755030000584156, "grad_norm": 1.0412010761200097, "learning_rate": 2.9006123909397433e-06, "loss": 0.0326, "step": 113960 }, { "epoch": 0.47552386277340586, "grad_norm": 0.7397900704992698, "learning_rate": 2.9005487602272664e-06, "loss": 0.0245, "step": 113965 }, { "epoch": 0.47554472548839616, "grad_norm": 0.4302828304646363, "learning_rate": 2.9004851337022066e-06, "loss": 0.0176, "step": 113970 }, { "epoch": 0.47556558820338646, "grad_norm": 0.581590438382283, "learning_rate": 2.9004215113641026e-06, "loss": 0.0242, "step": 113975 }, { "epoch": 0.4755864509183767, "grad_norm": 0.740717740009464, "learning_rate": 2.900357893212496e-06, "loss": 0.0271, "step": 113980 }, { "epoch": 0.475607313633367, "grad_norm": 0.5951151125872711, "learning_rate": 2.9002942792469276e-06, "loss": 0.0197, "step": 113985 }, { "epoch": 0.47562817634835725, "grad_norm": 0.7904243143814572, "learning_rate": 2.900230669466938e-06, "loss": 0.019, "step": 113990 }, { "epoch": 0.47564903906334755, "grad_norm": 0.44333057512848656, "learning_rate": 2.9001670638720685e-06, "loss": 0.0188, "step": 113995 }, { "epoch": 0.47566990177833784, "grad_norm": 0.350562671065036, "learning_rate": 2.900103462461861e-06, "loss": 0.0244, "step": 114000 }, { "epoch": 0.4756907644933281, "grad_norm": 1.1480914835896063, "learning_rate": 2.9000398652358545e-06, "loss": 0.0282, "step": 114005 }, { "epoch": 0.4757116272083184, "grad_norm": 0.9144474524815952, "learning_rate": 2.8999762721935924e-06, "loss": 0.0223, "step": 114010 }, { "epoch": 0.47573248992330863, "grad_norm": 0.48710465706607303, "learning_rate": 2.899912683334615e-06, "loss": 0.0187, "step": 114015 }, { "epoch": 0.47575335263829893, "grad_norm": 0.8489678447728989, "learning_rate": 2.899849098658465e-06, "loss": 0.021, "step": 114020 }, { "epoch": 0.47577421535328923, "grad_norm": 0.38165267785600826, "learning_rate": 2.899785518164681e-06, "loss": 0.018, "step": 114025 }, { "epoch": 0.4757950780682795, "grad_norm": 0.49806814270049443, "learning_rate": 2.899721941852807e-06, "loss": 0.0366, "step": 114030 }, { "epoch": 0.4758159407832698, "grad_norm": 0.9842868295703727, "learning_rate": 2.8996583697223845e-06, "loss": 0.0225, "step": 114035 }, { "epoch": 0.4758368034982601, "grad_norm": 1.0459464965867222, "learning_rate": 2.8995948017729542e-06, "loss": 0.0331, "step": 114040 }, { "epoch": 0.4758576662132503, "grad_norm": 0.839077958432325, "learning_rate": 2.8995312380040572e-06, "loss": 0.0317, "step": 114045 }, { "epoch": 0.4758785289282406, "grad_norm": 0.6333592407469738, "learning_rate": 2.899467678415237e-06, "loss": 0.026, "step": 114050 }, { "epoch": 0.47589939164323086, "grad_norm": 0.7313836806302803, "learning_rate": 2.899404123006035e-06, "loss": 0.0169, "step": 114055 }, { "epoch": 0.47592025435822116, "grad_norm": 0.5514688899054683, "learning_rate": 2.8993405717759925e-06, "loss": 0.0225, "step": 114060 }, { "epoch": 0.47594111707321146, "grad_norm": 0.6615296612031375, "learning_rate": 2.8992770247246515e-06, "loss": 0.0263, "step": 114065 }, { "epoch": 0.4759619797882017, "grad_norm": 0.4660269468111427, "learning_rate": 2.899213481851555e-06, "loss": 0.0252, "step": 114070 }, { "epoch": 0.475982842503192, "grad_norm": 0.6671242283247142, "learning_rate": 2.899149943156244e-06, "loss": 0.0234, "step": 114075 }, { "epoch": 0.47600370521818225, "grad_norm": 0.5232555614896297, "learning_rate": 2.899086408638262e-06, "loss": 0.0181, "step": 114080 }, { "epoch": 0.47602456793317255, "grad_norm": 0.9082142263749207, "learning_rate": 2.89902287829715e-06, "loss": 0.0239, "step": 114085 }, { "epoch": 0.47604543064816285, "grad_norm": 1.1455615853480123, "learning_rate": 2.898959352132451e-06, "loss": 0.0276, "step": 114090 }, { "epoch": 0.4760662933631531, "grad_norm": 1.3494066942730085, "learning_rate": 2.8988958301437083e-06, "loss": 0.023, "step": 114095 }, { "epoch": 0.4760871560781434, "grad_norm": 1.1259424480839717, "learning_rate": 2.8988323123304623e-06, "loss": 0.0321, "step": 114100 }, { "epoch": 0.47610801879313364, "grad_norm": 0.7559451505984452, "learning_rate": 2.8987687986922574e-06, "loss": 0.0234, "step": 114105 }, { "epoch": 0.47612888150812394, "grad_norm": 0.6678822978347461, "learning_rate": 2.8987052892286353e-06, "loss": 0.0261, "step": 114110 }, { "epoch": 0.47614974422311424, "grad_norm": 1.1513935038077694, "learning_rate": 2.8986417839391394e-06, "loss": 0.0413, "step": 114115 }, { "epoch": 0.4761706069381045, "grad_norm": 0.8498158865639243, "learning_rate": 2.898578282823312e-06, "loss": 0.0268, "step": 114120 }, { "epoch": 0.4761914696530948, "grad_norm": 1.204909917738231, "learning_rate": 2.898514785880696e-06, "loss": 0.0318, "step": 114125 }, { "epoch": 0.4762123323680851, "grad_norm": 0.2031789571701838, "learning_rate": 2.8984512931108344e-06, "loss": 0.0149, "step": 114130 }, { "epoch": 0.4762331950830753, "grad_norm": 1.115837448313249, "learning_rate": 2.89838780451327e-06, "loss": 0.025, "step": 114135 }, { "epoch": 0.4762540577980656, "grad_norm": 0.9638531215805144, "learning_rate": 2.8983243200875468e-06, "loss": 0.0236, "step": 114140 }, { "epoch": 0.47627492051305587, "grad_norm": 0.7047428965621493, "learning_rate": 2.898260839833206e-06, "loss": 0.0274, "step": 114145 }, { "epoch": 0.47629578322804617, "grad_norm": 0.5325002814227809, "learning_rate": 2.898197363749793e-06, "loss": 0.0236, "step": 114150 }, { "epoch": 0.47631664594303647, "grad_norm": 0.7654483376056802, "learning_rate": 2.8981338918368496e-06, "loss": 0.0259, "step": 114155 }, { "epoch": 0.4763375086580267, "grad_norm": 0.655834611602601, "learning_rate": 2.8980704240939196e-06, "loss": 0.0283, "step": 114160 }, { "epoch": 0.476358371373017, "grad_norm": 0.8790833449207485, "learning_rate": 2.8980069605205465e-06, "loss": 0.0252, "step": 114165 }, { "epoch": 0.47637923408800725, "grad_norm": 0.5803053124756647, "learning_rate": 2.897943501116274e-06, "loss": 0.0228, "step": 114170 }, { "epoch": 0.47640009680299755, "grad_norm": 0.8693062691732428, "learning_rate": 2.8978800458806442e-06, "loss": 0.0203, "step": 114175 }, { "epoch": 0.47642095951798785, "grad_norm": 1.1207345360420586, "learning_rate": 2.897816594813203e-06, "loss": 0.0244, "step": 114180 }, { "epoch": 0.4764418222329781, "grad_norm": 0.4505780348653506, "learning_rate": 2.897753147913493e-06, "loss": 0.0199, "step": 114185 }, { "epoch": 0.4764626849479684, "grad_norm": 0.5929195964368319, "learning_rate": 2.8976897051810576e-06, "loss": 0.0214, "step": 114190 }, { "epoch": 0.47648354766295864, "grad_norm": 0.46014219046664695, "learning_rate": 2.897626266615441e-06, "loss": 0.025, "step": 114195 }, { "epoch": 0.47650441037794894, "grad_norm": 1.1126477198394926, "learning_rate": 2.897562832216187e-06, "loss": 0.0246, "step": 114200 }, { "epoch": 0.47652527309293924, "grad_norm": 0.5752227099885563, "learning_rate": 2.8974994019828395e-06, "loss": 0.0251, "step": 114205 }, { "epoch": 0.4765461358079295, "grad_norm": 1.291646500803961, "learning_rate": 2.8974359759149427e-06, "loss": 0.0286, "step": 114210 }, { "epoch": 0.4765669985229198, "grad_norm": 0.6627655451947243, "learning_rate": 2.897372554012041e-06, "loss": 0.0204, "step": 114215 }, { "epoch": 0.4765878612379101, "grad_norm": 0.7261193327285844, "learning_rate": 2.897309136273678e-06, "loss": 0.0245, "step": 114220 }, { "epoch": 0.4766087239529003, "grad_norm": 0.572783122094077, "learning_rate": 2.897245722699398e-06, "loss": 0.0239, "step": 114225 }, { "epoch": 0.4766295866678906, "grad_norm": 0.6819982134099586, "learning_rate": 2.897182313288746e-06, "loss": 0.0214, "step": 114230 }, { "epoch": 0.47665044938288087, "grad_norm": 0.8064965316574186, "learning_rate": 2.8971189080412663e-06, "loss": 0.0215, "step": 114235 }, { "epoch": 0.47667131209787117, "grad_norm": 0.5119572616138647, "learning_rate": 2.8970555069565026e-06, "loss": 0.0282, "step": 114240 }, { "epoch": 0.47669217481286147, "grad_norm": 0.582108660359934, "learning_rate": 2.8969921100339994e-06, "loss": 0.0188, "step": 114245 }, { "epoch": 0.4767130375278517, "grad_norm": 0.3792681344491257, "learning_rate": 2.8969287172733024e-06, "loss": 0.0204, "step": 114250 }, { "epoch": 0.476733900242842, "grad_norm": 0.41834371662931974, "learning_rate": 2.896865328673955e-06, "loss": 0.0223, "step": 114255 }, { "epoch": 0.47675476295783226, "grad_norm": 0.7787384307550842, "learning_rate": 2.8968019442355033e-06, "loss": 0.022, "step": 114260 }, { "epoch": 0.47677562567282256, "grad_norm": 0.8054678192200571, "learning_rate": 2.8967385639574907e-06, "loss": 0.0252, "step": 114265 }, { "epoch": 0.47679648838781286, "grad_norm": 1.2258629253403337, "learning_rate": 2.8966751878394635e-06, "loss": 0.0232, "step": 114270 }, { "epoch": 0.4768173511028031, "grad_norm": 0.6374574404854425, "learning_rate": 2.896611815880965e-06, "loss": 0.0222, "step": 114275 }, { "epoch": 0.4768382138177934, "grad_norm": 0.5154662511104087, "learning_rate": 2.8965484480815414e-06, "loss": 0.0197, "step": 114280 }, { "epoch": 0.47685907653278364, "grad_norm": 0.7946743137925215, "learning_rate": 2.8964850844407377e-06, "loss": 0.0226, "step": 114285 }, { "epoch": 0.47687993924777394, "grad_norm": 0.6706270015630079, "learning_rate": 2.896421724958099e-06, "loss": 0.0342, "step": 114290 }, { "epoch": 0.47690080196276424, "grad_norm": 0.4496283188634442, "learning_rate": 2.89635836963317e-06, "loss": 0.0345, "step": 114295 }, { "epoch": 0.4769216646777545, "grad_norm": 0.4430263944099951, "learning_rate": 2.8962950184654963e-06, "loss": 0.0212, "step": 114300 }, { "epoch": 0.4769425273927448, "grad_norm": 0.5752056770224934, "learning_rate": 2.8962316714546235e-06, "loss": 0.0242, "step": 114305 }, { "epoch": 0.4769633901077351, "grad_norm": 0.6962424660972085, "learning_rate": 2.8961683286000976e-06, "loss": 0.0248, "step": 114310 }, { "epoch": 0.47698425282272533, "grad_norm": 0.6194866631335605, "learning_rate": 2.8961049899014627e-06, "loss": 0.0204, "step": 114315 }, { "epoch": 0.47700511553771563, "grad_norm": 0.6742690200266463, "learning_rate": 2.8960416553582648e-06, "loss": 0.0261, "step": 114320 }, { "epoch": 0.4770259782527059, "grad_norm": 0.9143597152759225, "learning_rate": 2.89597832497005e-06, "loss": 0.038, "step": 114325 }, { "epoch": 0.4770468409676962, "grad_norm": 0.7136625174469636, "learning_rate": 2.8959149987363644e-06, "loss": 0.0222, "step": 114330 }, { "epoch": 0.4770677036826865, "grad_norm": 0.9067633264019627, "learning_rate": 2.895851676656753e-06, "loss": 0.0245, "step": 114335 }, { "epoch": 0.4770885663976767, "grad_norm": 0.6976278870256898, "learning_rate": 2.8957883587307618e-06, "loss": 0.0279, "step": 114340 }, { "epoch": 0.477109429112667, "grad_norm": 0.9229822764006452, "learning_rate": 2.8957250449579365e-06, "loss": 0.0232, "step": 114345 }, { "epoch": 0.47713029182765726, "grad_norm": 0.8966348451097554, "learning_rate": 2.8956617353378237e-06, "loss": 0.0278, "step": 114350 }, { "epoch": 0.47715115454264756, "grad_norm": 0.7088508081480356, "learning_rate": 2.895598429869969e-06, "loss": 0.0249, "step": 114355 }, { "epoch": 0.47717201725763786, "grad_norm": 0.5739642898657437, "learning_rate": 2.8955351285539185e-06, "loss": 0.0326, "step": 114360 }, { "epoch": 0.4771928799726281, "grad_norm": 0.6844733974344689, "learning_rate": 2.8954718313892193e-06, "loss": 0.026, "step": 114365 }, { "epoch": 0.4772137426876184, "grad_norm": 0.9201644602598151, "learning_rate": 2.8954085383754165e-06, "loss": 0.0253, "step": 114370 }, { "epoch": 0.47723460540260865, "grad_norm": 0.8309085210075989, "learning_rate": 2.895345249512057e-06, "loss": 0.0252, "step": 114375 }, { "epoch": 0.47725546811759895, "grad_norm": 0.8500056484970923, "learning_rate": 2.8952819647986867e-06, "loss": 0.0253, "step": 114380 }, { "epoch": 0.47727633083258925, "grad_norm": 0.8177011888433019, "learning_rate": 2.8952186842348533e-06, "loss": 0.0249, "step": 114385 }, { "epoch": 0.4772971935475795, "grad_norm": 0.5547763075199946, "learning_rate": 2.8951554078201023e-06, "loss": 0.0193, "step": 114390 }, { "epoch": 0.4773180562625698, "grad_norm": 0.5061970289697075, "learning_rate": 2.8950921355539803e-06, "loss": 0.0304, "step": 114395 }, { "epoch": 0.4773389189775601, "grad_norm": 0.647907761799655, "learning_rate": 2.8950288674360344e-06, "loss": 0.0236, "step": 114400 }, { "epoch": 0.47735978169255033, "grad_norm": 0.5752543614520986, "learning_rate": 2.8949656034658106e-06, "loss": 0.0233, "step": 114405 }, { "epoch": 0.47738064440754063, "grad_norm": 0.6660721376460297, "learning_rate": 2.8949023436428575e-06, "loss": 0.0277, "step": 114410 }, { "epoch": 0.4774015071225309, "grad_norm": 0.7157166572841578, "learning_rate": 2.8948390879667198e-06, "loss": 0.0268, "step": 114415 }, { "epoch": 0.4774223698375212, "grad_norm": 0.8457190995192108, "learning_rate": 2.894775836436946e-06, "loss": 0.025, "step": 114420 }, { "epoch": 0.4774432325525115, "grad_norm": 0.7755662127037058, "learning_rate": 2.894712589053083e-06, "loss": 0.038, "step": 114425 }, { "epoch": 0.4774640952675017, "grad_norm": 0.9056723297686219, "learning_rate": 2.8946493458146767e-06, "loss": 0.0286, "step": 114430 }, { "epoch": 0.477484957982492, "grad_norm": 0.42081029662438396, "learning_rate": 2.894586106721276e-06, "loss": 0.021, "step": 114435 }, { "epoch": 0.47750582069748226, "grad_norm": 0.41445366577120196, "learning_rate": 2.8945228717724268e-06, "loss": 0.0259, "step": 114440 }, { "epoch": 0.47752668341247256, "grad_norm": 0.464910706863125, "learning_rate": 2.894459640967677e-06, "loss": 0.0166, "step": 114445 }, { "epoch": 0.47754754612746286, "grad_norm": 0.494977838642995, "learning_rate": 2.8943964143065735e-06, "loss": 0.0318, "step": 114450 }, { "epoch": 0.4775684088424531, "grad_norm": 0.4168647279499173, "learning_rate": 2.894333191788664e-06, "loss": 0.0277, "step": 114455 }, { "epoch": 0.4775892715574434, "grad_norm": 0.3618923172298741, "learning_rate": 2.8942699734134967e-06, "loss": 0.0259, "step": 114460 }, { "epoch": 0.47761013427243365, "grad_norm": 0.7911667729342333, "learning_rate": 2.894206759180618e-06, "loss": 0.0287, "step": 114465 }, { "epoch": 0.47763099698742395, "grad_norm": 0.9808502497107682, "learning_rate": 2.894143549089577e-06, "loss": 0.0292, "step": 114470 }, { "epoch": 0.47765185970241425, "grad_norm": 0.36268646246224695, "learning_rate": 2.89408034313992e-06, "loss": 0.022, "step": 114475 }, { "epoch": 0.4776727224174045, "grad_norm": 0.6791697998106297, "learning_rate": 2.8940171413311958e-06, "loss": 0.0279, "step": 114480 }, { "epoch": 0.4776935851323948, "grad_norm": 0.508430491533653, "learning_rate": 2.893953943662951e-06, "loss": 0.029, "step": 114485 }, { "epoch": 0.4777144478473851, "grad_norm": 0.5457109865451513, "learning_rate": 2.893890750134735e-06, "loss": 0.0227, "step": 114490 }, { "epoch": 0.47773531056237534, "grad_norm": 0.9075270398657027, "learning_rate": 2.893827560746095e-06, "loss": 0.0265, "step": 114495 }, { "epoch": 0.47775617327736564, "grad_norm": 0.9306998265287592, "learning_rate": 2.8937643754965796e-06, "loss": 0.0236, "step": 114500 }, { "epoch": 0.4777770359923559, "grad_norm": 0.7001390341134337, "learning_rate": 2.893701194385737e-06, "loss": 0.0247, "step": 114505 }, { "epoch": 0.4777978987073462, "grad_norm": 0.7809039646084617, "learning_rate": 2.893638017413114e-06, "loss": 0.021, "step": 114510 }, { "epoch": 0.4778187614223365, "grad_norm": 0.905278608886249, "learning_rate": 2.89357484457826e-06, "loss": 0.0289, "step": 114515 }, { "epoch": 0.4778396241373267, "grad_norm": 0.56895642400658, "learning_rate": 2.8935116758807237e-06, "loss": 0.0239, "step": 114520 }, { "epoch": 0.477860486852317, "grad_norm": 0.6292826665780654, "learning_rate": 2.8934485113200527e-06, "loss": 0.0253, "step": 114525 }, { "epoch": 0.47788134956730727, "grad_norm": 1.913914115301188, "learning_rate": 2.893385350895796e-06, "loss": 0.022, "step": 114530 }, { "epoch": 0.47790221228229757, "grad_norm": 0.44052690961274843, "learning_rate": 2.8933221946075023e-06, "loss": 0.0192, "step": 114535 }, { "epoch": 0.47792307499728787, "grad_norm": 0.749058142016434, "learning_rate": 2.8932590424547197e-06, "loss": 0.0291, "step": 114540 }, { "epoch": 0.4779439377122781, "grad_norm": 1.2215821436175138, "learning_rate": 2.893195894436997e-06, "loss": 0.03, "step": 114545 }, { "epoch": 0.4779648004272684, "grad_norm": 0.8937445335567861, "learning_rate": 2.893132750553883e-06, "loss": 0.0164, "step": 114550 }, { "epoch": 0.47798566314225865, "grad_norm": 0.46366669676857347, "learning_rate": 2.8930696108049265e-06, "loss": 0.019, "step": 114555 }, { "epoch": 0.47800652585724895, "grad_norm": 0.5317845089893112, "learning_rate": 2.893006475189677e-06, "loss": 0.0273, "step": 114560 }, { "epoch": 0.47802738857223925, "grad_norm": 0.5330950408501094, "learning_rate": 2.892943343707683e-06, "loss": 0.0215, "step": 114565 }, { "epoch": 0.4780482512872295, "grad_norm": 0.7297016991876828, "learning_rate": 2.892880216358493e-06, "loss": 0.0221, "step": 114570 }, { "epoch": 0.4780691140022198, "grad_norm": 0.5027187839878094, "learning_rate": 2.892817093141657e-06, "loss": 0.0224, "step": 114575 }, { "epoch": 0.4780899767172101, "grad_norm": 0.47384566728961514, "learning_rate": 2.8927539740567234e-06, "loss": 0.0254, "step": 114580 }, { "epoch": 0.47811083943220034, "grad_norm": 0.49185892018179683, "learning_rate": 2.8926908591032426e-06, "loss": 0.0266, "step": 114585 }, { "epoch": 0.47813170214719064, "grad_norm": 0.4108280255569271, "learning_rate": 2.8926277482807625e-06, "loss": 0.0262, "step": 114590 }, { "epoch": 0.4781525648621809, "grad_norm": 0.47361626008802477, "learning_rate": 2.8925646415888336e-06, "loss": 0.0312, "step": 114595 }, { "epoch": 0.4781734275771712, "grad_norm": 1.365857899235485, "learning_rate": 2.8925015390270043e-06, "loss": 0.0336, "step": 114600 }, { "epoch": 0.4781942902921615, "grad_norm": 0.7601353612321136, "learning_rate": 2.8924384405948252e-06, "loss": 0.0206, "step": 114605 }, { "epoch": 0.4782151530071517, "grad_norm": 0.9824754248370318, "learning_rate": 2.8923753462918454e-06, "loss": 0.0282, "step": 114610 }, { "epoch": 0.478236015722142, "grad_norm": 1.1082151208942723, "learning_rate": 2.892312256117615e-06, "loss": 0.0261, "step": 114615 }, { "epoch": 0.47825687843713227, "grad_norm": 0.7784304783543388, "learning_rate": 2.892249170071682e-06, "loss": 0.0245, "step": 114620 }, { "epoch": 0.47827774115212257, "grad_norm": 0.5045865752983809, "learning_rate": 2.8921860881535984e-06, "loss": 0.0264, "step": 114625 }, { "epoch": 0.47829860386711287, "grad_norm": 0.7854863166206951, "learning_rate": 2.8921230103629134e-06, "loss": 0.0216, "step": 114630 }, { "epoch": 0.4783194665821031, "grad_norm": 0.5857993742930124, "learning_rate": 2.892059936699176e-06, "loss": 0.0223, "step": 114635 }, { "epoch": 0.4783403292970934, "grad_norm": 0.7877708943191188, "learning_rate": 2.891996867161937e-06, "loss": 0.0283, "step": 114640 }, { "epoch": 0.47836119201208366, "grad_norm": 0.5318740520322134, "learning_rate": 2.8919338017507464e-06, "loss": 0.0279, "step": 114645 }, { "epoch": 0.47838205472707396, "grad_norm": 0.4044080313296228, "learning_rate": 2.891870740465154e-06, "loss": 0.0172, "step": 114650 }, { "epoch": 0.47840291744206426, "grad_norm": 0.7455830743416079, "learning_rate": 2.891807683304711e-06, "loss": 0.0278, "step": 114655 }, { "epoch": 0.4784237801570545, "grad_norm": 0.6137273257825281, "learning_rate": 2.8917446302689665e-06, "loss": 0.0312, "step": 114660 }, { "epoch": 0.4784446428720448, "grad_norm": 0.7408796485103724, "learning_rate": 2.891681581357472e-06, "loss": 0.0273, "step": 114665 }, { "epoch": 0.4784655055870351, "grad_norm": 0.7071845589985377, "learning_rate": 2.8916185365697765e-06, "loss": 0.0252, "step": 114670 }, { "epoch": 0.47848636830202534, "grad_norm": 0.36441370093330305, "learning_rate": 2.891555495905431e-06, "loss": 0.0233, "step": 114675 }, { "epoch": 0.47850723101701564, "grad_norm": 0.6324083481677495, "learning_rate": 2.8914924593639866e-06, "loss": 0.0194, "step": 114680 }, { "epoch": 0.4785280937320059, "grad_norm": 0.6713491090173649, "learning_rate": 2.8914294269449938e-06, "loss": 0.0353, "step": 114685 }, { "epoch": 0.4785489564469962, "grad_norm": 0.2887315782462807, "learning_rate": 2.891366398648003e-06, "loss": 0.0203, "step": 114690 }, { "epoch": 0.4785698191619865, "grad_norm": 0.4854568798336269, "learning_rate": 2.8913033744725643e-06, "loss": 0.0213, "step": 114695 }, { "epoch": 0.47859068187697673, "grad_norm": 0.3833448975759293, "learning_rate": 2.8912403544182297e-06, "loss": 0.058, "step": 114700 }, { "epoch": 0.47861154459196703, "grad_norm": 0.6002327853841075, "learning_rate": 2.8911773384845504e-06, "loss": 0.0233, "step": 114705 }, { "epoch": 0.4786324073069573, "grad_norm": 0.5095397881105029, "learning_rate": 2.8911143266710756e-06, "loss": 0.0228, "step": 114710 }, { "epoch": 0.4786532700219476, "grad_norm": 0.8568492811691761, "learning_rate": 2.8910513189773582e-06, "loss": 0.0303, "step": 114715 }, { "epoch": 0.4786741327369379, "grad_norm": 0.3206870564522463, "learning_rate": 2.890988315402948e-06, "loss": 0.0211, "step": 114720 }, { "epoch": 0.4786949954519281, "grad_norm": 0.8960504794251961, "learning_rate": 2.8909253159473964e-06, "loss": 0.022, "step": 114725 }, { "epoch": 0.4787158581669184, "grad_norm": 0.5819275920728627, "learning_rate": 2.890862320610255e-06, "loss": 0.0223, "step": 114730 }, { "epoch": 0.47873672088190866, "grad_norm": 0.8397051977767253, "learning_rate": 2.8907993293910746e-06, "loss": 0.0252, "step": 114735 }, { "epoch": 0.47875758359689896, "grad_norm": 0.8035430586293646, "learning_rate": 2.8907363422894075e-06, "loss": 0.0334, "step": 114740 }, { "epoch": 0.47877844631188926, "grad_norm": 0.3627741805902965, "learning_rate": 2.890673359304804e-06, "loss": 0.0285, "step": 114745 }, { "epoch": 0.4787993090268795, "grad_norm": 1.040080515273686, "learning_rate": 2.8906103804368164e-06, "loss": 0.0278, "step": 114750 }, { "epoch": 0.4788201717418698, "grad_norm": 0.7984671530367172, "learning_rate": 2.8905474056849964e-06, "loss": 0.021, "step": 114755 }, { "epoch": 0.4788410344568601, "grad_norm": 0.7417987700737065, "learning_rate": 2.8904844350488946e-06, "loss": 0.0272, "step": 114760 }, { "epoch": 0.47886189717185035, "grad_norm": 1.046886100413948, "learning_rate": 2.890421468528064e-06, "loss": 0.0299, "step": 114765 }, { "epoch": 0.47888275988684065, "grad_norm": 1.0910174078444266, "learning_rate": 2.8903585061220557e-06, "loss": 0.0417, "step": 114770 }, { "epoch": 0.4789036226018309, "grad_norm": 0.5370532427666888, "learning_rate": 2.890295547830421e-06, "loss": 0.0303, "step": 114775 }, { "epoch": 0.4789244853168212, "grad_norm": 0.5564150916890186, "learning_rate": 2.8902325936527133e-06, "loss": 0.0299, "step": 114780 }, { "epoch": 0.4789453480318115, "grad_norm": 0.4247958155254989, "learning_rate": 2.890169643588483e-06, "loss": 0.022, "step": 114785 }, { "epoch": 0.47896621074680173, "grad_norm": 0.5577190917747034, "learning_rate": 2.890106697637284e-06, "loss": 0.0246, "step": 114790 }, { "epoch": 0.47898707346179203, "grad_norm": 0.3461005307326063, "learning_rate": 2.890043755798666e-06, "loss": 0.0224, "step": 114795 }, { "epoch": 0.4790079361767823, "grad_norm": 0.7009547191778246, "learning_rate": 2.8899808180721832e-06, "loss": 0.0198, "step": 114800 }, { "epoch": 0.4790287988917726, "grad_norm": 0.8185693882740426, "learning_rate": 2.8899178844573873e-06, "loss": 0.0242, "step": 114805 }, { "epoch": 0.4790496616067629, "grad_norm": 0.7111260019502831, "learning_rate": 2.8898549549538298e-06, "loss": 0.0296, "step": 114810 }, { "epoch": 0.4790705243217531, "grad_norm": 0.38326059024799725, "learning_rate": 2.889792029561065e-06, "loss": 0.0244, "step": 114815 }, { "epoch": 0.4790913870367434, "grad_norm": 0.3300300740831545, "learning_rate": 2.889729108278643e-06, "loss": 0.0164, "step": 114820 }, { "epoch": 0.47911224975173367, "grad_norm": 0.4679964331991168, "learning_rate": 2.889666191106118e-06, "loss": 0.0216, "step": 114825 }, { "epoch": 0.47913311246672396, "grad_norm": 0.6774000343724063, "learning_rate": 2.8896032780430416e-06, "loss": 0.0183, "step": 114830 }, { "epoch": 0.47915397518171426, "grad_norm": 0.6528493897654268, "learning_rate": 2.889540369088968e-06, "loss": 0.0303, "step": 114835 }, { "epoch": 0.4791748378967045, "grad_norm": 0.7994003650788667, "learning_rate": 2.8894774642434483e-06, "loss": 0.0216, "step": 114840 }, { "epoch": 0.4791957006116948, "grad_norm": 0.5313345961596097, "learning_rate": 2.8894145635060355e-06, "loss": 0.0186, "step": 114845 }, { "epoch": 0.4792165633266851, "grad_norm": 0.4774342137752232, "learning_rate": 2.889351666876283e-06, "loss": 0.0221, "step": 114850 }, { "epoch": 0.47923742604167535, "grad_norm": 0.5130506037893738, "learning_rate": 2.889288774353744e-06, "loss": 0.0215, "step": 114855 }, { "epoch": 0.47925828875666565, "grad_norm": 0.4585230278135079, "learning_rate": 2.8892258859379712e-06, "loss": 0.023, "step": 114860 }, { "epoch": 0.4792791514716559, "grad_norm": 0.42673912787288565, "learning_rate": 2.8891630016285177e-06, "loss": 0.0237, "step": 114865 }, { "epoch": 0.4793000141866462, "grad_norm": 0.62296633742977, "learning_rate": 2.8891001214249364e-06, "loss": 0.031, "step": 114870 }, { "epoch": 0.4793208769016365, "grad_norm": 0.7639352123031419, "learning_rate": 2.8890372453267808e-06, "loss": 0.0297, "step": 114875 }, { "epoch": 0.47934173961662674, "grad_norm": 0.38611254629873437, "learning_rate": 2.888974373333604e-06, "loss": 0.0273, "step": 114880 }, { "epoch": 0.47936260233161704, "grad_norm": 0.45932623621203744, "learning_rate": 2.8889115054449596e-06, "loss": 0.0278, "step": 114885 }, { "epoch": 0.4793834650466073, "grad_norm": 0.6396812726025162, "learning_rate": 2.8888486416604006e-06, "loss": 0.0241, "step": 114890 }, { "epoch": 0.4794043277615976, "grad_norm": 0.6402867245714743, "learning_rate": 2.8887857819794803e-06, "loss": 0.0201, "step": 114895 }, { "epoch": 0.4794251904765879, "grad_norm": 1.0861193917803995, "learning_rate": 2.8887229264017536e-06, "loss": 0.0353, "step": 114900 }, { "epoch": 0.4794460531915781, "grad_norm": 0.3374935751677631, "learning_rate": 2.8886600749267724e-06, "loss": 0.0182, "step": 114905 }, { "epoch": 0.4794669159065684, "grad_norm": 0.6178404506927218, "learning_rate": 2.888597227554092e-06, "loss": 0.0201, "step": 114910 }, { "epoch": 0.47948777862155867, "grad_norm": 0.44187568772455904, "learning_rate": 2.888534384283264e-06, "loss": 0.0272, "step": 114915 }, { "epoch": 0.47950864133654897, "grad_norm": 0.969899826220293, "learning_rate": 2.888471545113845e-06, "loss": 0.0236, "step": 114920 }, { "epoch": 0.47952950405153927, "grad_norm": 0.9177231996808082, "learning_rate": 2.8884087100453868e-06, "loss": 0.0267, "step": 114925 }, { "epoch": 0.4795503667665295, "grad_norm": 0.2982617563913945, "learning_rate": 2.8883458790774443e-06, "loss": 0.0183, "step": 114930 }, { "epoch": 0.4795712294815198, "grad_norm": 0.3935739061289539, "learning_rate": 2.888283052209571e-06, "loss": 0.0223, "step": 114935 }, { "epoch": 0.4795920921965101, "grad_norm": 0.36711777881763796, "learning_rate": 2.8882202294413213e-06, "loss": 0.0214, "step": 114940 }, { "epoch": 0.47961295491150036, "grad_norm": 0.4454735019856878, "learning_rate": 2.8881574107722497e-06, "loss": 0.0194, "step": 114945 }, { "epoch": 0.47963381762649065, "grad_norm": 0.7599107127974514, "learning_rate": 2.8880945962019104e-06, "loss": 0.0234, "step": 114950 }, { "epoch": 0.4796546803414809, "grad_norm": 0.9398360949219374, "learning_rate": 2.8880317857298562e-06, "loss": 0.0301, "step": 114955 }, { "epoch": 0.4796755430564712, "grad_norm": 0.2720709008771381, "learning_rate": 2.8879689793556432e-06, "loss": 0.0184, "step": 114960 }, { "epoch": 0.4796964057714615, "grad_norm": 0.8893657886325194, "learning_rate": 2.887906177078825e-06, "loss": 0.0291, "step": 114965 }, { "epoch": 0.47971726848645174, "grad_norm": 0.5858865238968564, "learning_rate": 2.8878433788989575e-06, "loss": 0.0166, "step": 114970 }, { "epoch": 0.47973813120144204, "grad_norm": 0.6598471224270801, "learning_rate": 2.887780584815593e-06, "loss": 0.0169, "step": 114975 }, { "epoch": 0.4797589939164323, "grad_norm": 0.4834477006168363, "learning_rate": 2.8877177948282875e-06, "loss": 0.0236, "step": 114980 }, { "epoch": 0.4797798566314226, "grad_norm": 0.6495236315804023, "learning_rate": 2.8876550089365963e-06, "loss": 0.0261, "step": 114985 }, { "epoch": 0.4798007193464129, "grad_norm": 1.1586607075338917, "learning_rate": 2.8875922271400725e-06, "loss": 0.0349, "step": 114990 }, { "epoch": 0.47982158206140313, "grad_norm": 0.8009548181492828, "learning_rate": 2.887529449438272e-06, "loss": 0.0247, "step": 114995 }, { "epoch": 0.47984244477639343, "grad_norm": 0.7371174770896859, "learning_rate": 2.8874666758307494e-06, "loss": 0.026, "step": 115000 }, { "epoch": 0.4798633074913837, "grad_norm": 0.5820696283310737, "learning_rate": 2.8874039063170606e-06, "loss": 0.0171, "step": 115005 }, { "epoch": 0.479884170206374, "grad_norm": 0.9534456574942431, "learning_rate": 2.8873411408967593e-06, "loss": 0.0233, "step": 115010 }, { "epoch": 0.47990503292136427, "grad_norm": 1.094866329689882, "learning_rate": 2.887278379569401e-06, "loss": 0.0303, "step": 115015 }, { "epoch": 0.4799258956363545, "grad_norm": 0.5930999773584783, "learning_rate": 2.8872156223345414e-06, "loss": 0.016, "step": 115020 }, { "epoch": 0.4799467583513448, "grad_norm": 0.6631015892648447, "learning_rate": 2.8871528691917357e-06, "loss": 0.025, "step": 115025 }, { "epoch": 0.4799676210663351, "grad_norm": 0.3368137320205393, "learning_rate": 2.8870901201405384e-06, "loss": 0.0216, "step": 115030 }, { "epoch": 0.47998848378132536, "grad_norm": 0.5951563600355596, "learning_rate": 2.887027375180506e-06, "loss": 0.0178, "step": 115035 }, { "epoch": 0.48000934649631566, "grad_norm": 0.8039223231639521, "learning_rate": 2.886964634311193e-06, "loss": 0.0296, "step": 115040 }, { "epoch": 0.4800302092113059, "grad_norm": 0.84079723810094, "learning_rate": 2.8869018975321555e-06, "loss": 0.0269, "step": 115045 }, { "epoch": 0.4800510719262962, "grad_norm": 1.146290224473551, "learning_rate": 2.8868391648429484e-06, "loss": 0.0162, "step": 115050 }, { "epoch": 0.4800719346412865, "grad_norm": 1.115794277251668, "learning_rate": 2.8867764362431282e-06, "loss": 0.0215, "step": 115055 }, { "epoch": 0.48009279735627675, "grad_norm": 0.7394964816988661, "learning_rate": 2.8867137117322504e-06, "loss": 0.0209, "step": 115060 }, { "epoch": 0.48011366007126705, "grad_norm": 1.467628289460654, "learning_rate": 2.8866509913098705e-06, "loss": 0.0268, "step": 115065 }, { "epoch": 0.4801345227862573, "grad_norm": 0.8625871299950597, "learning_rate": 2.8865882749755453e-06, "loss": 0.0253, "step": 115070 }, { "epoch": 0.4801553855012476, "grad_norm": 0.6969311332407427, "learning_rate": 2.8865255627288286e-06, "loss": 0.0176, "step": 115075 }, { "epoch": 0.4801762482162379, "grad_norm": 0.9189282191887127, "learning_rate": 2.8864628545692785e-06, "loss": 0.0227, "step": 115080 }, { "epoch": 0.48019711093122813, "grad_norm": 1.0122094060454792, "learning_rate": 2.8864001504964506e-06, "loss": 0.0179, "step": 115085 }, { "epoch": 0.48021797364621843, "grad_norm": 0.4714041317391443, "learning_rate": 2.8863374505099e-06, "loss": 0.0185, "step": 115090 }, { "epoch": 0.4802388363612087, "grad_norm": 0.8009963153422582, "learning_rate": 2.886274754609184e-06, "loss": 0.0303, "step": 115095 }, { "epoch": 0.480259699076199, "grad_norm": 0.9600914578829726, "learning_rate": 2.886212062793859e-06, "loss": 0.0267, "step": 115100 }, { "epoch": 0.4802805617911893, "grad_norm": 0.3934021951360027, "learning_rate": 2.88614937506348e-06, "loss": 0.0179, "step": 115105 }, { "epoch": 0.4803014245061795, "grad_norm": 0.5720461778703794, "learning_rate": 2.886086691417605e-06, "loss": 0.0202, "step": 115110 }, { "epoch": 0.4803222872211698, "grad_norm": 1.0399882985226157, "learning_rate": 2.886024011855789e-06, "loss": 0.0237, "step": 115115 }, { "epoch": 0.4803431499361601, "grad_norm": 0.5107786749136968, "learning_rate": 2.8859613363775894e-06, "loss": 0.0181, "step": 115120 }, { "epoch": 0.48036401265115036, "grad_norm": 0.7998469616861612, "learning_rate": 2.8858986649825633e-06, "loss": 0.0292, "step": 115125 }, { "epoch": 0.48038487536614066, "grad_norm": 0.7871484591281079, "learning_rate": 2.8858359976702665e-06, "loss": 0.0189, "step": 115130 }, { "epoch": 0.4804057380811309, "grad_norm": 0.7715360946987239, "learning_rate": 2.8857733344402555e-06, "loss": 0.018, "step": 115135 }, { "epoch": 0.4804266007961212, "grad_norm": 0.42975417591925413, "learning_rate": 2.885710675292088e-06, "loss": 0.0272, "step": 115140 }, { "epoch": 0.4804474635111115, "grad_norm": 0.7473671821244361, "learning_rate": 2.8856480202253197e-06, "loss": 0.0258, "step": 115145 }, { "epoch": 0.48046832622610175, "grad_norm": 1.5401475980274901, "learning_rate": 2.8855853692395093e-06, "loss": 0.0279, "step": 115150 }, { "epoch": 0.48048918894109205, "grad_norm": 0.42880184854957976, "learning_rate": 2.885522722334212e-06, "loss": 0.0195, "step": 115155 }, { "epoch": 0.4805100516560823, "grad_norm": 0.48257683861324013, "learning_rate": 2.8854600795089854e-06, "loss": 0.0176, "step": 115160 }, { "epoch": 0.4805309143710726, "grad_norm": 0.8762861253595685, "learning_rate": 2.885397440763388e-06, "loss": 0.0321, "step": 115165 }, { "epoch": 0.4805517770860629, "grad_norm": 0.9833471100928199, "learning_rate": 2.885334806096975e-06, "loss": 0.0218, "step": 115170 }, { "epoch": 0.48057263980105314, "grad_norm": 0.9520268609673485, "learning_rate": 2.8852721755093045e-06, "loss": 0.0286, "step": 115175 }, { "epoch": 0.48059350251604344, "grad_norm": 0.7635062955536045, "learning_rate": 2.8852095489999345e-06, "loss": 0.0246, "step": 115180 }, { "epoch": 0.4806143652310337, "grad_norm": 0.830216146295146, "learning_rate": 2.8851469265684217e-06, "loss": 0.0286, "step": 115185 }, { "epoch": 0.480635227946024, "grad_norm": 0.7707056153039592, "learning_rate": 2.885084308214323e-06, "loss": 0.0226, "step": 115190 }, { "epoch": 0.4806560906610143, "grad_norm": 0.6311655474043596, "learning_rate": 2.885021693937197e-06, "loss": 0.0267, "step": 115195 }, { "epoch": 0.4806769533760045, "grad_norm": 1.2692445471524634, "learning_rate": 2.8849590837366017e-06, "loss": 0.0293, "step": 115200 }, { "epoch": 0.4806978160909948, "grad_norm": 0.3883277542465055, "learning_rate": 2.884896477612093e-06, "loss": 0.0293, "step": 115205 }, { "epoch": 0.4807186788059851, "grad_norm": 0.7467005571985368, "learning_rate": 2.8848338755632304e-06, "loss": 0.0194, "step": 115210 }, { "epoch": 0.48073954152097537, "grad_norm": 0.5884551590550265, "learning_rate": 2.8847712775895707e-06, "loss": 0.0224, "step": 115215 }, { "epoch": 0.48076040423596567, "grad_norm": 0.6698586108709085, "learning_rate": 2.884708683690672e-06, "loss": 0.0237, "step": 115220 }, { "epoch": 0.4807812669509559, "grad_norm": 0.7900634461717543, "learning_rate": 2.884646093866093e-06, "loss": 0.0287, "step": 115225 }, { "epoch": 0.4808021296659462, "grad_norm": 0.6814694521851252, "learning_rate": 2.8845835081153902e-06, "loss": 0.0269, "step": 115230 }, { "epoch": 0.4808229923809365, "grad_norm": 1.0239006888067879, "learning_rate": 2.884520926438123e-06, "loss": 0.0276, "step": 115235 }, { "epoch": 0.48084385509592675, "grad_norm": 1.3648788987286358, "learning_rate": 2.8844583488338486e-06, "loss": 0.0245, "step": 115240 }, { "epoch": 0.48086471781091705, "grad_norm": 0.34823752095605937, "learning_rate": 2.8843957753021255e-06, "loss": 0.0269, "step": 115245 }, { "epoch": 0.4808855805259073, "grad_norm": 0.879261632442631, "learning_rate": 2.8843332058425133e-06, "loss": 0.0212, "step": 115250 }, { "epoch": 0.4809064432408976, "grad_norm": 0.5189542358062089, "learning_rate": 2.8842706404545686e-06, "loss": 0.0212, "step": 115255 }, { "epoch": 0.4809273059558879, "grad_norm": 0.875711851140009, "learning_rate": 2.8842080791378496e-06, "loss": 0.0288, "step": 115260 }, { "epoch": 0.48094816867087814, "grad_norm": 0.4939112411797858, "learning_rate": 2.8841455218919167e-06, "loss": 0.0293, "step": 115265 }, { "epoch": 0.48096903138586844, "grad_norm": 0.7112482247455989, "learning_rate": 2.884082968716327e-06, "loss": 0.0197, "step": 115270 }, { "epoch": 0.4809898941008587, "grad_norm": 0.7183617314822864, "learning_rate": 2.884020419610639e-06, "loss": 0.025, "step": 115275 }, { "epoch": 0.481010756815849, "grad_norm": 0.5014530846033829, "learning_rate": 2.883957874574413e-06, "loss": 0.0221, "step": 115280 }, { "epoch": 0.4810316195308393, "grad_norm": 1.3737110406224091, "learning_rate": 2.8838953336072057e-06, "loss": 0.026, "step": 115285 }, { "epoch": 0.4810524822458295, "grad_norm": 1.0566210917454881, "learning_rate": 2.883832796708577e-06, "loss": 0.0245, "step": 115290 }, { "epoch": 0.4810733449608198, "grad_norm": 0.7403141496221165, "learning_rate": 2.883770263878086e-06, "loss": 0.0237, "step": 115295 }, { "epoch": 0.4810942076758101, "grad_norm": 0.8400885632356009, "learning_rate": 2.8837077351152907e-06, "loss": 0.0228, "step": 115300 }, { "epoch": 0.48111507039080037, "grad_norm": 0.581449482093554, "learning_rate": 2.8836452104197515e-06, "loss": 0.0274, "step": 115305 }, { "epoch": 0.48113593310579067, "grad_norm": 0.6892408468812382, "learning_rate": 2.8835826897910263e-06, "loss": 0.0254, "step": 115310 }, { "epoch": 0.4811567958207809, "grad_norm": 0.6209574869695836, "learning_rate": 2.8835201732286746e-06, "loss": 0.0386, "step": 115315 }, { "epoch": 0.4811776585357712, "grad_norm": 0.7446596425297634, "learning_rate": 2.8834576607322555e-06, "loss": 0.0294, "step": 115320 }, { "epoch": 0.4811985212507615, "grad_norm": 0.727475840320053, "learning_rate": 2.883395152301328e-06, "loss": 0.0282, "step": 115325 }, { "epoch": 0.48121938396575176, "grad_norm": 0.6576234503883934, "learning_rate": 2.883332647935453e-06, "loss": 0.0236, "step": 115330 }, { "epoch": 0.48124024668074206, "grad_norm": 0.37228564745343845, "learning_rate": 2.8832701476341878e-06, "loss": 0.0245, "step": 115335 }, { "epoch": 0.4812611093957323, "grad_norm": 1.0595731846815268, "learning_rate": 2.8832076513970934e-06, "loss": 0.0226, "step": 115340 }, { "epoch": 0.4812819721107226, "grad_norm": 0.8888065206899067, "learning_rate": 2.8831451592237298e-06, "loss": 0.0266, "step": 115345 }, { "epoch": 0.4813028348257129, "grad_norm": 0.7235548672462201, "learning_rate": 2.8830826711136544e-06, "loss": 0.0277, "step": 115350 }, { "epoch": 0.48132369754070314, "grad_norm": 0.5706886434287366, "learning_rate": 2.883020187066429e-06, "loss": 0.0238, "step": 115355 }, { "epoch": 0.48134456025569344, "grad_norm": 0.5278827347123913, "learning_rate": 2.882957707081612e-06, "loss": 0.0209, "step": 115360 }, { "epoch": 0.4813654229706837, "grad_norm": 0.5998005974598091, "learning_rate": 2.8828952311587638e-06, "loss": 0.0273, "step": 115365 }, { "epoch": 0.481386285685674, "grad_norm": 0.8522279457327724, "learning_rate": 2.882832759297444e-06, "loss": 0.0261, "step": 115370 }, { "epoch": 0.4814071484006643, "grad_norm": 0.7458747411736018, "learning_rate": 2.8827702914972135e-06, "loss": 0.0229, "step": 115375 }, { "epoch": 0.48142801111565453, "grad_norm": 0.882943254008074, "learning_rate": 2.882707827757631e-06, "loss": 0.0342, "step": 115380 }, { "epoch": 0.48144887383064483, "grad_norm": 0.34740245962317706, "learning_rate": 2.8826453680782573e-06, "loss": 0.0189, "step": 115385 }, { "epoch": 0.48146973654563513, "grad_norm": 0.5487295151379121, "learning_rate": 2.8825829124586522e-06, "loss": 0.0306, "step": 115390 }, { "epoch": 0.4814905992606254, "grad_norm": 2.2233311978582306, "learning_rate": 2.882520460898377e-06, "loss": 0.0394, "step": 115395 }, { "epoch": 0.4815114619756157, "grad_norm": 1.0120940892622032, "learning_rate": 2.8824580133969903e-06, "loss": 0.0295, "step": 115400 }, { "epoch": 0.4815323246906059, "grad_norm": 0.5125147601670922, "learning_rate": 2.882395569954054e-06, "loss": 0.0198, "step": 115405 }, { "epoch": 0.4815531874055962, "grad_norm": 0.620719486463668, "learning_rate": 2.8823331305691282e-06, "loss": 0.0203, "step": 115410 }, { "epoch": 0.4815740501205865, "grad_norm": 0.6981490811260181, "learning_rate": 2.8822706952417724e-06, "loss": 0.0205, "step": 115415 }, { "epoch": 0.48159491283557676, "grad_norm": 1.2780432185082822, "learning_rate": 2.8822082639715478e-06, "loss": 0.0193, "step": 115420 }, { "epoch": 0.48161577555056706, "grad_norm": 0.6197273635666752, "learning_rate": 2.882145836758015e-06, "loss": 0.0199, "step": 115425 }, { "epoch": 0.4816366382655573, "grad_norm": 0.7532957511952001, "learning_rate": 2.8820834136007354e-06, "loss": 0.0195, "step": 115430 }, { "epoch": 0.4816575009805476, "grad_norm": 2.3926943931944034, "learning_rate": 2.882020994499268e-06, "loss": 0.031, "step": 115435 }, { "epoch": 0.4816783636955379, "grad_norm": 1.0781926479638364, "learning_rate": 2.8819585794531756e-06, "loss": 0.0248, "step": 115440 }, { "epoch": 0.48169922641052815, "grad_norm": 0.5997699751986979, "learning_rate": 2.881896168462018e-06, "loss": 0.0245, "step": 115445 }, { "epoch": 0.48172008912551845, "grad_norm": 0.20411361851509907, "learning_rate": 2.881833761525356e-06, "loss": 0.0155, "step": 115450 }, { "epoch": 0.4817409518405087, "grad_norm": 0.9319200768849977, "learning_rate": 2.8817713586427508e-06, "loss": 0.0223, "step": 115455 }, { "epoch": 0.481761814555499, "grad_norm": 0.7356492849527071, "learning_rate": 2.8817089598137643e-06, "loss": 0.0251, "step": 115460 }, { "epoch": 0.4817826772704893, "grad_norm": 0.7919476140755455, "learning_rate": 2.8816465650379567e-06, "loss": 0.0274, "step": 115465 }, { "epoch": 0.48180353998547953, "grad_norm": 0.6027361022607758, "learning_rate": 2.881584174314889e-06, "loss": 0.0215, "step": 115470 }, { "epoch": 0.48182440270046983, "grad_norm": 0.5586725714828602, "learning_rate": 2.881521787644124e-06, "loss": 0.0198, "step": 115475 }, { "epoch": 0.48184526541546013, "grad_norm": 0.6641644903369937, "learning_rate": 2.8814594050252216e-06, "loss": 0.0204, "step": 115480 }, { "epoch": 0.4818661281304504, "grad_norm": 0.6764043029798175, "learning_rate": 2.881397026457744e-06, "loss": 0.0198, "step": 115485 }, { "epoch": 0.4818869908454407, "grad_norm": 0.6651428741480819, "learning_rate": 2.881334651941252e-06, "loss": 0.0224, "step": 115490 }, { "epoch": 0.4819078535604309, "grad_norm": 0.7301195990120034, "learning_rate": 2.8812722814753074e-06, "loss": 0.0245, "step": 115495 }, { "epoch": 0.4819287162754212, "grad_norm": 0.9586405320621273, "learning_rate": 2.8812099150594724e-06, "loss": 0.0227, "step": 115500 }, { "epoch": 0.4819495789904115, "grad_norm": 0.6352190432870389, "learning_rate": 2.881147552693308e-06, "loss": 0.0268, "step": 115505 }, { "epoch": 0.48197044170540176, "grad_norm": 0.8759350588202113, "learning_rate": 2.8810851943763762e-06, "loss": 0.0299, "step": 115510 }, { "epoch": 0.48199130442039206, "grad_norm": 0.5771382272416776, "learning_rate": 2.8810228401082384e-06, "loss": 0.0273, "step": 115515 }, { "epoch": 0.4820121671353823, "grad_norm": 0.5415178083207335, "learning_rate": 2.880960489888458e-06, "loss": 0.0203, "step": 115520 }, { "epoch": 0.4820330298503726, "grad_norm": 0.5281442792424864, "learning_rate": 2.880898143716595e-06, "loss": 0.0173, "step": 115525 }, { "epoch": 0.4820538925653629, "grad_norm": 0.8475453062405781, "learning_rate": 2.880835801592212e-06, "loss": 0.0301, "step": 115530 }, { "epoch": 0.48207475528035315, "grad_norm": 0.8352107920785183, "learning_rate": 2.880773463514872e-06, "loss": 0.0255, "step": 115535 }, { "epoch": 0.48209561799534345, "grad_norm": 0.5974192561865393, "learning_rate": 2.8807111294841368e-06, "loss": 0.0195, "step": 115540 }, { "epoch": 0.4821164807103337, "grad_norm": 0.5301936984292406, "learning_rate": 2.8806487994995674e-06, "loss": 0.0224, "step": 115545 }, { "epoch": 0.482137343425324, "grad_norm": 0.5982365715876425, "learning_rate": 2.8805864735607277e-06, "loss": 0.0232, "step": 115550 }, { "epoch": 0.4821582061403143, "grad_norm": 0.8090681826170688, "learning_rate": 2.880524151667179e-06, "loss": 0.0281, "step": 115555 }, { "epoch": 0.48217906885530454, "grad_norm": 0.505765425297325, "learning_rate": 2.8804618338184843e-06, "loss": 0.0136, "step": 115560 }, { "epoch": 0.48219993157029484, "grad_norm": 0.7300326986272744, "learning_rate": 2.8803995200142055e-06, "loss": 0.0177, "step": 115565 }, { "epoch": 0.48222079428528514, "grad_norm": 0.9339911697924907, "learning_rate": 2.880337210253906e-06, "loss": 0.0213, "step": 115570 }, { "epoch": 0.4822416570002754, "grad_norm": 0.3244275082121993, "learning_rate": 2.8802749045371474e-06, "loss": 0.0221, "step": 115575 }, { "epoch": 0.4822625197152657, "grad_norm": 1.079145315995056, "learning_rate": 2.8802126028634935e-06, "loss": 0.0246, "step": 115580 }, { "epoch": 0.4822833824302559, "grad_norm": 0.6191711357946271, "learning_rate": 2.8801503052325063e-06, "loss": 0.0236, "step": 115585 }, { "epoch": 0.4823042451452462, "grad_norm": 1.2677462065178282, "learning_rate": 2.8800880116437484e-06, "loss": 0.0308, "step": 115590 }, { "epoch": 0.4823251078602365, "grad_norm": 0.7495642639146389, "learning_rate": 2.8800257220967836e-06, "loss": 0.0203, "step": 115595 }, { "epoch": 0.48234597057522677, "grad_norm": 0.9435540167567245, "learning_rate": 2.879963436591174e-06, "loss": 0.0258, "step": 115600 }, { "epoch": 0.48236683329021707, "grad_norm": 1.0231197038714135, "learning_rate": 2.8799011551264826e-06, "loss": 0.0271, "step": 115605 }, { "epoch": 0.4823876960052073, "grad_norm": 0.4293201553749353, "learning_rate": 2.8798388777022736e-06, "loss": 0.0241, "step": 115610 }, { "epoch": 0.4824085587201976, "grad_norm": 0.8706072134638386, "learning_rate": 2.8797766043181085e-06, "loss": 0.028, "step": 115615 }, { "epoch": 0.4824294214351879, "grad_norm": 0.7681202521933378, "learning_rate": 2.879714334973551e-06, "loss": 0.034, "step": 115620 }, { "epoch": 0.48245028415017815, "grad_norm": 0.6180003578636535, "learning_rate": 2.8796520696681657e-06, "loss": 0.0253, "step": 115625 }, { "epoch": 0.48247114686516845, "grad_norm": 0.8787359289373204, "learning_rate": 2.879589808401515e-06, "loss": 0.0268, "step": 115630 }, { "epoch": 0.4824920095801587, "grad_norm": 12.380035480529676, "learning_rate": 2.8795275511731617e-06, "loss": 0.026, "step": 115635 }, { "epoch": 0.482512872295149, "grad_norm": 0.3738662623573669, "learning_rate": 2.8794652979826704e-06, "loss": 0.0203, "step": 115640 }, { "epoch": 0.4825337350101393, "grad_norm": 0.5919237431229928, "learning_rate": 2.879403048829604e-06, "loss": 0.0222, "step": 115645 }, { "epoch": 0.48255459772512954, "grad_norm": 1.280158209916235, "learning_rate": 2.8793408037135257e-06, "loss": 0.0331, "step": 115650 }, { "epoch": 0.48257546044011984, "grad_norm": 0.27141084484817796, "learning_rate": 2.8792785626339996e-06, "loss": 0.0155, "step": 115655 }, { "epoch": 0.48259632315511014, "grad_norm": 0.9263870513237696, "learning_rate": 2.8792163255905896e-06, "loss": 0.0276, "step": 115660 }, { "epoch": 0.4826171858701004, "grad_norm": 0.48078669990846595, "learning_rate": 2.8791540925828597e-06, "loss": 0.0196, "step": 115665 }, { "epoch": 0.4826380485850907, "grad_norm": 0.48776248269405764, "learning_rate": 2.8790918636103735e-06, "loss": 0.0261, "step": 115670 }, { "epoch": 0.48265891130008093, "grad_norm": 0.6120409645787944, "learning_rate": 2.879029638672695e-06, "loss": 0.0277, "step": 115675 }, { "epoch": 0.48267977401507123, "grad_norm": 0.4529126704604049, "learning_rate": 2.878967417769388e-06, "loss": 0.0188, "step": 115680 }, { "epoch": 0.4827006367300615, "grad_norm": 0.426298322911354, "learning_rate": 2.878905200900017e-06, "loss": 0.0173, "step": 115685 }, { "epoch": 0.48272149944505177, "grad_norm": 0.4445217825945124, "learning_rate": 2.878842988064145e-06, "loss": 0.0254, "step": 115690 }, { "epoch": 0.48274236216004207, "grad_norm": 0.6900853128179977, "learning_rate": 2.878780779261338e-06, "loss": 0.0249, "step": 115695 }, { "epoch": 0.4827632248750323, "grad_norm": 0.4499747723922083, "learning_rate": 2.8787185744911587e-06, "loss": 0.0156, "step": 115700 }, { "epoch": 0.4827840875900226, "grad_norm": 0.8579756139871159, "learning_rate": 2.8786563737531725e-06, "loss": 0.0233, "step": 115705 }, { "epoch": 0.4828049503050129, "grad_norm": 0.9143419199617915, "learning_rate": 2.8785941770469428e-06, "loss": 0.0259, "step": 115710 }, { "epoch": 0.48282581302000316, "grad_norm": 1.096893853296913, "learning_rate": 2.8785319843720343e-06, "loss": 0.0344, "step": 115715 }, { "epoch": 0.48284667573499346, "grad_norm": 0.8312940714524859, "learning_rate": 2.878469795728013e-06, "loss": 0.0245, "step": 115720 }, { "epoch": 0.4828675384499837, "grad_norm": 0.6663866304190256, "learning_rate": 2.8784076111144414e-06, "loss": 0.0257, "step": 115725 }, { "epoch": 0.482888401164974, "grad_norm": 0.7187494498016717, "learning_rate": 2.8783454305308854e-06, "loss": 0.0259, "step": 115730 }, { "epoch": 0.4829092638799643, "grad_norm": 0.46894245730120504, "learning_rate": 2.8782832539769095e-06, "loss": 0.0247, "step": 115735 }, { "epoch": 0.48293012659495455, "grad_norm": 0.7165451022521258, "learning_rate": 2.8782210814520784e-06, "loss": 0.0351, "step": 115740 }, { "epoch": 0.48295098930994484, "grad_norm": 0.640742119223749, "learning_rate": 2.8781589129559566e-06, "loss": 0.0256, "step": 115745 }, { "epoch": 0.48297185202493514, "grad_norm": 0.6477459749633936, "learning_rate": 2.87809674848811e-06, "loss": 0.0229, "step": 115750 }, { "epoch": 0.4829927147399254, "grad_norm": 0.7288206237915587, "learning_rate": 2.8780345880481025e-06, "loss": 0.0277, "step": 115755 }, { "epoch": 0.4830135774549157, "grad_norm": 0.7590755962633949, "learning_rate": 2.8779724316355003e-06, "loss": 0.0235, "step": 115760 }, { "epoch": 0.48303444016990593, "grad_norm": 0.9724538787918819, "learning_rate": 2.877910279249867e-06, "loss": 0.0307, "step": 115765 }, { "epoch": 0.48305530288489623, "grad_norm": 0.7331688878181388, "learning_rate": 2.877848130890769e-06, "loss": 0.0376, "step": 115770 }, { "epoch": 0.48307616559988653, "grad_norm": 0.6107493251318966, "learning_rate": 2.877785986557771e-06, "loss": 0.0238, "step": 115775 }, { "epoch": 0.4830970283148768, "grad_norm": 0.5530380700039635, "learning_rate": 2.877723846250439e-06, "loss": 0.0213, "step": 115780 }, { "epoch": 0.4831178910298671, "grad_norm": 0.5376809205995708, "learning_rate": 2.8776617099683373e-06, "loss": 0.0196, "step": 115785 }, { "epoch": 0.4831387537448573, "grad_norm": 0.6102522829773158, "learning_rate": 2.8775995777110327e-06, "loss": 0.0268, "step": 115790 }, { "epoch": 0.4831596164598476, "grad_norm": 0.48249968893698425, "learning_rate": 2.8775374494780893e-06, "loss": 0.021, "step": 115795 }, { "epoch": 0.4831804791748379, "grad_norm": 0.5941116443236132, "learning_rate": 2.877475325269074e-06, "loss": 0.0341, "step": 115800 }, { "epoch": 0.48320134188982816, "grad_norm": 0.33992487265540483, "learning_rate": 2.877413205083552e-06, "loss": 0.0225, "step": 115805 }, { "epoch": 0.48322220460481846, "grad_norm": 1.9016793085351464, "learning_rate": 2.877351088921088e-06, "loss": 0.0235, "step": 115810 }, { "epoch": 0.4832430673198087, "grad_norm": 0.6887028762467398, "learning_rate": 2.8772889767812488e-06, "loss": 0.0191, "step": 115815 }, { "epoch": 0.483263930034799, "grad_norm": 0.23399842338409108, "learning_rate": 2.8772268686636e-06, "loss": 0.0194, "step": 115820 }, { "epoch": 0.4832847927497893, "grad_norm": 0.6366370019362961, "learning_rate": 2.877164764567708e-06, "loss": 0.023, "step": 115825 }, { "epoch": 0.48330565546477955, "grad_norm": 0.7137750872049633, "learning_rate": 2.8771026644931382e-06, "loss": 0.0201, "step": 115830 }, { "epoch": 0.48332651817976985, "grad_norm": 0.4420137773385261, "learning_rate": 2.877040568439457e-06, "loss": 0.0198, "step": 115835 }, { "epoch": 0.48334738089476015, "grad_norm": 0.8557955906757818, "learning_rate": 2.87697847640623e-06, "loss": 0.0285, "step": 115840 }, { "epoch": 0.4833682436097504, "grad_norm": 1.0288707898317206, "learning_rate": 2.8769163883930234e-06, "loss": 0.03, "step": 115845 }, { "epoch": 0.4833891063247407, "grad_norm": 0.6085090234842827, "learning_rate": 2.8768543043994046e-06, "loss": 0.03, "step": 115850 }, { "epoch": 0.48340996903973094, "grad_norm": 0.17163272527038284, "learning_rate": 2.876792224424938e-06, "loss": 0.0319, "step": 115855 }, { "epoch": 0.48343083175472124, "grad_norm": 0.7859684141716994, "learning_rate": 2.876730148469192e-06, "loss": 0.0324, "step": 115860 }, { "epoch": 0.48345169446971153, "grad_norm": 0.9112491960089301, "learning_rate": 2.8766680765317306e-06, "loss": 0.0243, "step": 115865 }, { "epoch": 0.4834725571847018, "grad_norm": 0.5731880473897192, "learning_rate": 2.8766060086121227e-06, "loss": 0.0235, "step": 115870 }, { "epoch": 0.4834934198996921, "grad_norm": 0.6947787753389443, "learning_rate": 2.876543944709934e-06, "loss": 0.0245, "step": 115875 }, { "epoch": 0.4835142826146823, "grad_norm": 0.7415015928798577, "learning_rate": 2.8764818848247304e-06, "loss": 0.0188, "step": 115880 }, { "epoch": 0.4835351453296726, "grad_norm": 0.6730840147088729, "learning_rate": 2.8764198289560796e-06, "loss": 0.0245, "step": 115885 }, { "epoch": 0.4835560080446629, "grad_norm": 1.1653282403088518, "learning_rate": 2.876357777103548e-06, "loss": 0.0275, "step": 115890 }, { "epoch": 0.48357687075965317, "grad_norm": 0.7114581908261784, "learning_rate": 2.8762957292667027e-06, "loss": 0.0294, "step": 115895 }, { "epoch": 0.48359773347464347, "grad_norm": 0.8346377874822968, "learning_rate": 2.8762336854451097e-06, "loss": 0.0304, "step": 115900 }, { "epoch": 0.4836185961896337, "grad_norm": 0.6423018428027369, "learning_rate": 2.8761716456383364e-06, "loss": 0.026, "step": 115905 }, { "epoch": 0.483639458904624, "grad_norm": 0.6670224679412675, "learning_rate": 2.8761096098459508e-06, "loss": 0.0212, "step": 115910 }, { "epoch": 0.4836603216196143, "grad_norm": 0.8420868465985535, "learning_rate": 2.8760475780675184e-06, "loss": 0.0168, "step": 115915 }, { "epoch": 0.48368118433460455, "grad_norm": 0.904526496501862, "learning_rate": 2.875985550302608e-06, "loss": 0.0281, "step": 115920 }, { "epoch": 0.48370204704959485, "grad_norm": 0.8836329536790403, "learning_rate": 2.875923526550785e-06, "loss": 0.0247, "step": 115925 }, { "epoch": 0.48372290976458515, "grad_norm": 0.4734411471826799, "learning_rate": 2.875861506811618e-06, "loss": 0.0241, "step": 115930 }, { "epoch": 0.4837437724795754, "grad_norm": 0.45313922019799713, "learning_rate": 2.875799491084674e-06, "loss": 0.0204, "step": 115935 }, { "epoch": 0.4837646351945657, "grad_norm": 1.4311916238995286, "learning_rate": 2.875737479369521e-06, "loss": 0.0306, "step": 115940 }, { "epoch": 0.48378549790955594, "grad_norm": 0.8894844678975923, "learning_rate": 2.8756754716657246e-06, "loss": 0.023, "step": 115945 }, { "epoch": 0.48380636062454624, "grad_norm": 0.6328356006606655, "learning_rate": 2.875613467972855e-06, "loss": 0.0153, "step": 115950 }, { "epoch": 0.48382722333953654, "grad_norm": 0.7591811996285748, "learning_rate": 2.8755514682904774e-06, "loss": 0.0274, "step": 115955 }, { "epoch": 0.4838480860545268, "grad_norm": 1.2291749777752268, "learning_rate": 2.875489472618161e-06, "loss": 0.0287, "step": 115960 }, { "epoch": 0.4838689487695171, "grad_norm": 0.38208034192473406, "learning_rate": 2.875427480955473e-06, "loss": 0.0193, "step": 115965 }, { "epoch": 0.4838898114845073, "grad_norm": 0.8493960347882481, "learning_rate": 2.8753654933019813e-06, "loss": 0.0234, "step": 115970 }, { "epoch": 0.4839106741994976, "grad_norm": 0.5502790839391559, "learning_rate": 2.8753035096572537e-06, "loss": 0.0315, "step": 115975 }, { "epoch": 0.4839315369144879, "grad_norm": 0.9405552872706119, "learning_rate": 2.8752415300208574e-06, "loss": 0.0264, "step": 115980 }, { "epoch": 0.48395239962947817, "grad_norm": 0.9607476648579312, "learning_rate": 2.8751795543923624e-06, "loss": 0.0169, "step": 115985 }, { "epoch": 0.48397326234446847, "grad_norm": 0.7622694817483094, "learning_rate": 2.875117582771335e-06, "loss": 0.0235, "step": 115990 }, { "epoch": 0.4839941250594587, "grad_norm": 0.35272163951746055, "learning_rate": 2.875055615157344e-06, "loss": 0.0306, "step": 115995 }, { "epoch": 0.484014987774449, "grad_norm": 1.3720864353321347, "learning_rate": 2.8749936515499573e-06, "loss": 0.039, "step": 116000 }, { "epoch": 0.4840358504894393, "grad_norm": 0.7680824168699707, "learning_rate": 2.8749316919487435e-06, "loss": 0.0237, "step": 116005 }, { "epoch": 0.48405671320442956, "grad_norm": 0.9728384910945315, "learning_rate": 2.87486973635327e-06, "loss": 0.0347, "step": 116010 }, { "epoch": 0.48407757591941986, "grad_norm": 1.1143211948039764, "learning_rate": 2.8748077847631075e-06, "loss": 0.0325, "step": 116015 }, { "epoch": 0.48409843863441016, "grad_norm": 0.8905357276700394, "learning_rate": 2.8747458371778218e-06, "loss": 0.0232, "step": 116020 }, { "epoch": 0.4841193013494004, "grad_norm": 0.7787485123499169, "learning_rate": 2.8746838935969834e-06, "loss": 0.0227, "step": 116025 }, { "epoch": 0.4841401640643907, "grad_norm": 0.5726125890501341, "learning_rate": 2.874621954020159e-06, "loss": 0.0247, "step": 116030 }, { "epoch": 0.48416102677938094, "grad_norm": 0.8376607940900866, "learning_rate": 2.874560018446919e-06, "loss": 0.0338, "step": 116035 }, { "epoch": 0.48418188949437124, "grad_norm": 0.675663515771589, "learning_rate": 2.8744980868768314e-06, "loss": 0.0287, "step": 116040 }, { "epoch": 0.48420275220936154, "grad_norm": 0.9528562278391285, "learning_rate": 2.8744361593094643e-06, "loss": 0.0241, "step": 116045 }, { "epoch": 0.4842236149243518, "grad_norm": 0.2715875482703629, "learning_rate": 2.8743742357443884e-06, "loss": 0.0222, "step": 116050 }, { "epoch": 0.4842444776393421, "grad_norm": 0.5020499215911888, "learning_rate": 2.874312316181171e-06, "loss": 0.0249, "step": 116055 }, { "epoch": 0.48426534035433233, "grad_norm": 0.6081788195798798, "learning_rate": 2.8742504006193812e-06, "loss": 0.0161, "step": 116060 }, { "epoch": 0.48428620306932263, "grad_norm": 0.6840794151953052, "learning_rate": 2.8741884890585884e-06, "loss": 0.0266, "step": 116065 }, { "epoch": 0.48430706578431293, "grad_norm": 0.4650801726087922, "learning_rate": 2.874126581498362e-06, "loss": 0.0257, "step": 116070 }, { "epoch": 0.4843279284993032, "grad_norm": 0.7072378395602821, "learning_rate": 2.874064677938271e-06, "loss": 0.0244, "step": 116075 }, { "epoch": 0.4843487912142935, "grad_norm": 1.9039743360753902, "learning_rate": 2.874002778377884e-06, "loss": 0.0241, "step": 116080 }, { "epoch": 0.4843696539292837, "grad_norm": 2.6649518837700406, "learning_rate": 2.873940882816771e-06, "loss": 0.0204, "step": 116085 }, { "epoch": 0.484390516644274, "grad_norm": 0.8568569254001938, "learning_rate": 2.873878991254501e-06, "loss": 0.0281, "step": 116090 }, { "epoch": 0.4844113793592643, "grad_norm": 1.2095856627494332, "learning_rate": 2.873817103690644e-06, "loss": 0.0198, "step": 116095 }, { "epoch": 0.48443224207425456, "grad_norm": 0.39590490609189743, "learning_rate": 2.8737552201247693e-06, "loss": 0.016, "step": 116100 }, { "epoch": 0.48445310478924486, "grad_norm": 0.6803575309358286, "learning_rate": 2.873693340556446e-06, "loss": 0.0203, "step": 116105 }, { "epoch": 0.4844739675042351, "grad_norm": 1.8167879661378217, "learning_rate": 2.8736314649852443e-06, "loss": 0.0326, "step": 116110 }, { "epoch": 0.4844948302192254, "grad_norm": 0.8777479929564377, "learning_rate": 2.873569593410733e-06, "loss": 0.0267, "step": 116115 }, { "epoch": 0.4845156929342157, "grad_norm": 0.7079020204063277, "learning_rate": 2.8735077258324827e-06, "loss": 0.0181, "step": 116120 }, { "epoch": 0.48453655564920595, "grad_norm": 0.45086642138997834, "learning_rate": 2.8734458622500636e-06, "loss": 0.0251, "step": 116125 }, { "epoch": 0.48455741836419625, "grad_norm": 1.2508924055893405, "learning_rate": 2.8733840026630443e-06, "loss": 0.0222, "step": 116130 }, { "epoch": 0.48457828107918655, "grad_norm": 0.945529123612185, "learning_rate": 2.8733221470709954e-06, "loss": 0.0261, "step": 116135 }, { "epoch": 0.4845991437941768, "grad_norm": 0.7002302934984242, "learning_rate": 2.8732602954734873e-06, "loss": 0.0291, "step": 116140 }, { "epoch": 0.4846200065091671, "grad_norm": 0.5959330880667085, "learning_rate": 2.87319844787009e-06, "loss": 0.0247, "step": 116145 }, { "epoch": 0.48464086922415733, "grad_norm": 0.7703808504909845, "learning_rate": 2.873136604260373e-06, "loss": 0.0244, "step": 116150 }, { "epoch": 0.48466173193914763, "grad_norm": 0.3847712499527884, "learning_rate": 2.873074764643907e-06, "loss": 0.0318, "step": 116155 }, { "epoch": 0.48468259465413793, "grad_norm": 0.7295873906834855, "learning_rate": 2.873012929020262e-06, "loss": 0.0288, "step": 116160 }, { "epoch": 0.4847034573691282, "grad_norm": 1.0658212361534753, "learning_rate": 2.8729510973890077e-06, "loss": 0.0238, "step": 116165 }, { "epoch": 0.4847243200841185, "grad_norm": 1.3026146108092953, "learning_rate": 2.872889269749717e-06, "loss": 0.0286, "step": 116170 }, { "epoch": 0.4847451827991087, "grad_norm": 0.6449083149416506, "learning_rate": 2.872827446101958e-06, "loss": 0.0274, "step": 116175 }, { "epoch": 0.484766045514099, "grad_norm": 0.6867694017306236, "learning_rate": 2.872765626445301e-06, "loss": 0.0313, "step": 116180 }, { "epoch": 0.4847869082290893, "grad_norm": 0.5176756720153131, "learning_rate": 2.8727038107793187e-06, "loss": 0.0255, "step": 116185 }, { "epoch": 0.48480777094407956, "grad_norm": 0.7329961058014544, "learning_rate": 2.8726419991035805e-06, "loss": 0.0245, "step": 116190 }, { "epoch": 0.48482863365906986, "grad_norm": 0.7138140182792071, "learning_rate": 2.872580191417657e-06, "loss": 0.0274, "step": 116195 }, { "epoch": 0.4848494963740601, "grad_norm": 0.4249070266169792, "learning_rate": 2.8725183877211193e-06, "loss": 0.025, "step": 116200 }, { "epoch": 0.4848703590890504, "grad_norm": 0.8379068230951245, "learning_rate": 2.872456588013538e-06, "loss": 0.0293, "step": 116205 }, { "epoch": 0.4848912218040407, "grad_norm": 0.5180780488143599, "learning_rate": 2.8723947922944845e-06, "loss": 0.0257, "step": 116210 }, { "epoch": 0.48491208451903095, "grad_norm": 1.0390705814446284, "learning_rate": 2.872333000563529e-06, "loss": 0.027, "step": 116215 }, { "epoch": 0.48493294723402125, "grad_norm": 0.43723312594994296, "learning_rate": 2.8722712128202436e-06, "loss": 0.028, "step": 116220 }, { "epoch": 0.48495380994901155, "grad_norm": 1.173805732997672, "learning_rate": 2.8722094290641983e-06, "loss": 0.0317, "step": 116225 }, { "epoch": 0.4849746726640018, "grad_norm": 0.8248679527031908, "learning_rate": 2.872147649294965e-06, "loss": 0.0196, "step": 116230 }, { "epoch": 0.4849955353789921, "grad_norm": 0.5451326418354099, "learning_rate": 2.8720858735121147e-06, "loss": 0.023, "step": 116235 }, { "epoch": 0.48501639809398234, "grad_norm": 0.37333833808453515, "learning_rate": 2.872024101715219e-06, "loss": 0.0241, "step": 116240 }, { "epoch": 0.48503726080897264, "grad_norm": 0.47854500246953713, "learning_rate": 2.871962333903849e-06, "loss": 0.0215, "step": 116245 }, { "epoch": 0.48505812352396294, "grad_norm": 0.5999037887088345, "learning_rate": 2.8719005700775765e-06, "loss": 0.019, "step": 116250 }, { "epoch": 0.4850789862389532, "grad_norm": 0.7087994607484115, "learning_rate": 2.871838810235973e-06, "loss": 0.0256, "step": 116255 }, { "epoch": 0.4850998489539435, "grad_norm": 0.5409459762101999, "learning_rate": 2.8717770543786095e-06, "loss": 0.0211, "step": 116260 }, { "epoch": 0.4851207116689337, "grad_norm": 0.396114706638484, "learning_rate": 2.8717153025050576e-06, "loss": 0.0225, "step": 116265 }, { "epoch": 0.485141574383924, "grad_norm": 0.7632780504274896, "learning_rate": 2.8716535546148894e-06, "loss": 0.0344, "step": 116270 }, { "epoch": 0.4851624370989143, "grad_norm": 0.6584074899627854, "learning_rate": 2.871591810707677e-06, "loss": 0.0293, "step": 116275 }, { "epoch": 0.48518329981390457, "grad_norm": 0.5072040966528727, "learning_rate": 2.871530070782991e-06, "loss": 0.0278, "step": 116280 }, { "epoch": 0.48520416252889487, "grad_norm": 0.48318649033769695, "learning_rate": 2.871468334840404e-06, "loss": 0.0154, "step": 116285 }, { "epoch": 0.4852250252438851, "grad_norm": 0.6831515196736977, "learning_rate": 2.8714066028794897e-06, "loss": 0.0263, "step": 116290 }, { "epoch": 0.4852458879588754, "grad_norm": 0.8418297223522001, "learning_rate": 2.8713448748998174e-06, "loss": 0.0231, "step": 116295 }, { "epoch": 0.4852667506738657, "grad_norm": 0.6661617147784648, "learning_rate": 2.8712831509009597e-06, "loss": 0.0237, "step": 116300 }, { "epoch": 0.48528761338885595, "grad_norm": 0.34631581489636665, "learning_rate": 2.8712214308824903e-06, "loss": 0.0233, "step": 116305 }, { "epoch": 0.48530847610384625, "grad_norm": 0.6827272451576053, "learning_rate": 2.87115971484398e-06, "loss": 0.0237, "step": 116310 }, { "epoch": 0.48532933881883655, "grad_norm": 1.239066202857158, "learning_rate": 2.8710980027850014e-06, "loss": 0.0344, "step": 116315 }, { "epoch": 0.4853502015338268, "grad_norm": 0.6389317428074541, "learning_rate": 2.8710362947051273e-06, "loss": 0.0247, "step": 116320 }, { "epoch": 0.4853710642488171, "grad_norm": 0.8772120197337107, "learning_rate": 2.87097459060393e-06, "loss": 0.0237, "step": 116325 }, { "epoch": 0.48539192696380734, "grad_norm": 0.5491926166131298, "learning_rate": 2.870912890480981e-06, "loss": 0.017, "step": 116330 }, { "epoch": 0.48541278967879764, "grad_norm": 0.8109146175960122, "learning_rate": 2.870851194335854e-06, "loss": 0.0324, "step": 116335 }, { "epoch": 0.48543365239378794, "grad_norm": 0.6141637606593422, "learning_rate": 2.8707895021681216e-06, "loss": 0.0235, "step": 116340 }, { "epoch": 0.4854545151087782, "grad_norm": 0.646042710388294, "learning_rate": 2.870727813977355e-06, "loss": 0.0173, "step": 116345 }, { "epoch": 0.4854753778237685, "grad_norm": 0.4906477777798746, "learning_rate": 2.870666129763129e-06, "loss": 0.0287, "step": 116350 }, { "epoch": 0.4854962405387587, "grad_norm": 0.5433132851542624, "learning_rate": 2.8706044495250148e-06, "loss": 0.0294, "step": 116355 }, { "epoch": 0.485517103253749, "grad_norm": 0.9162472688921244, "learning_rate": 2.8705427732625856e-06, "loss": 0.0335, "step": 116360 }, { "epoch": 0.4855379659687393, "grad_norm": 0.7787165110368525, "learning_rate": 2.8704811009754147e-06, "loss": 0.0246, "step": 116365 }, { "epoch": 0.48555882868372957, "grad_norm": 1.149545398558962, "learning_rate": 2.870419432663075e-06, "loss": 0.0363, "step": 116370 }, { "epoch": 0.48557969139871987, "grad_norm": 1.1261901929449036, "learning_rate": 2.8703577683251393e-06, "loss": 0.0235, "step": 116375 }, { "epoch": 0.4856005541137101, "grad_norm": 0.729648397464304, "learning_rate": 2.8702961079611815e-06, "loss": 0.0274, "step": 116380 }, { "epoch": 0.4856214168287004, "grad_norm": 0.7938094281319144, "learning_rate": 2.8702344515707738e-06, "loss": 0.0236, "step": 116385 }, { "epoch": 0.4856422795436907, "grad_norm": 0.7774686633405318, "learning_rate": 2.8701727991534893e-06, "loss": 0.0248, "step": 116390 }, { "epoch": 0.48566314225868096, "grad_norm": 0.9685212956793384, "learning_rate": 2.8701111507089025e-06, "loss": 0.0255, "step": 116395 }, { "epoch": 0.48568400497367126, "grad_norm": 0.7143678605894381, "learning_rate": 2.870049506236586e-06, "loss": 0.024, "step": 116400 }, { "epoch": 0.48570486768866156, "grad_norm": 0.9544034334370108, "learning_rate": 2.8699878657361123e-06, "loss": 0.0347, "step": 116405 }, { "epoch": 0.4857257304036518, "grad_norm": 0.5584119931970446, "learning_rate": 2.869926229207057e-06, "loss": 0.0215, "step": 116410 }, { "epoch": 0.4857465931186421, "grad_norm": 0.5151100232167471, "learning_rate": 2.8698645966489926e-06, "loss": 0.0234, "step": 116415 }, { "epoch": 0.48576745583363234, "grad_norm": 0.38209467212109205, "learning_rate": 2.8698029680614927e-06, "loss": 0.0289, "step": 116420 }, { "epoch": 0.48578831854862264, "grad_norm": 1.6560625968593767, "learning_rate": 2.8697413434441307e-06, "loss": 0.0288, "step": 116425 }, { "epoch": 0.48580918126361294, "grad_norm": 0.8241107747742434, "learning_rate": 2.86967972279648e-06, "loss": 0.0249, "step": 116430 }, { "epoch": 0.4858300439786032, "grad_norm": 0.5546736051276634, "learning_rate": 2.869618106118116e-06, "loss": 0.0223, "step": 116435 }, { "epoch": 0.4858509066935935, "grad_norm": 0.5733873509339512, "learning_rate": 2.8695564934086113e-06, "loss": 0.0252, "step": 116440 }, { "epoch": 0.48587176940858373, "grad_norm": 0.576686844068705, "learning_rate": 2.86949488466754e-06, "loss": 0.0249, "step": 116445 }, { "epoch": 0.48589263212357403, "grad_norm": 0.9035829903020237, "learning_rate": 2.8694332798944767e-06, "loss": 0.0241, "step": 116450 }, { "epoch": 0.48591349483856433, "grad_norm": 0.5691998954129892, "learning_rate": 2.869371679088995e-06, "loss": 0.025, "step": 116455 }, { "epoch": 0.4859343575535546, "grad_norm": 0.6467964072948553, "learning_rate": 2.8693100822506694e-06, "loss": 0.0333, "step": 116460 }, { "epoch": 0.4859552202685449, "grad_norm": 0.8569314990391295, "learning_rate": 2.8692484893790735e-06, "loss": 0.0269, "step": 116465 }, { "epoch": 0.4859760829835351, "grad_norm": 0.7544652185927418, "learning_rate": 2.8691869004737823e-06, "loss": 0.0192, "step": 116470 }, { "epoch": 0.4859969456985254, "grad_norm": 0.6192884858363031, "learning_rate": 2.869125315534369e-06, "loss": 0.0216, "step": 116475 }, { "epoch": 0.4860178084135157, "grad_norm": 0.7292628850146479, "learning_rate": 2.869063734560409e-06, "loss": 0.0366, "step": 116480 }, { "epoch": 0.48603867112850596, "grad_norm": 0.6758589965853896, "learning_rate": 2.8690021575514765e-06, "loss": 0.0194, "step": 116485 }, { "epoch": 0.48605953384349626, "grad_norm": 1.5568411330171295, "learning_rate": 2.868940584507146e-06, "loss": 0.0348, "step": 116490 }, { "epoch": 0.48608039655848656, "grad_norm": 0.7617817039637079, "learning_rate": 2.8688790154269922e-06, "loss": 0.0191, "step": 116495 }, { "epoch": 0.4861012592734768, "grad_norm": 0.5593236103287254, "learning_rate": 2.86881745031059e-06, "loss": 0.0179, "step": 116500 }, { "epoch": 0.4861221219884671, "grad_norm": 0.5337804842793964, "learning_rate": 2.868755889157513e-06, "loss": 0.0221, "step": 116505 }, { "epoch": 0.48614298470345735, "grad_norm": 0.5333689731576379, "learning_rate": 2.868694331967337e-06, "loss": 0.0282, "step": 116510 }, { "epoch": 0.48616384741844765, "grad_norm": 0.9907717740630708, "learning_rate": 2.868632778739637e-06, "loss": 0.0291, "step": 116515 }, { "epoch": 0.48618471013343795, "grad_norm": 1.3452791029341806, "learning_rate": 2.868571229473987e-06, "loss": 0.0304, "step": 116520 }, { "epoch": 0.4862055728484282, "grad_norm": 1.0387221091162397, "learning_rate": 2.8685096841699624e-06, "loss": 0.0257, "step": 116525 }, { "epoch": 0.4862264355634185, "grad_norm": 0.9045787881282644, "learning_rate": 2.868448142827139e-06, "loss": 0.0274, "step": 116530 }, { "epoch": 0.48624729827840873, "grad_norm": 0.8700631532433318, "learning_rate": 2.8683866054450903e-06, "loss": 0.0287, "step": 116535 }, { "epoch": 0.48626816099339903, "grad_norm": 1.129715869080986, "learning_rate": 2.8683250720233925e-06, "loss": 0.0264, "step": 116540 }, { "epoch": 0.48628902370838933, "grad_norm": 0.8427702039118765, "learning_rate": 2.868263542561621e-06, "loss": 0.0273, "step": 116545 }, { "epoch": 0.4863098864233796, "grad_norm": 0.5315310538359455, "learning_rate": 2.8682020170593507e-06, "loss": 0.0206, "step": 116550 }, { "epoch": 0.4863307491383699, "grad_norm": 0.5779573191469018, "learning_rate": 2.8681404955161566e-06, "loss": 0.0232, "step": 116555 }, { "epoch": 0.4863516118533601, "grad_norm": 0.9878155145009998, "learning_rate": 2.8680789779316155e-06, "loss": 0.0232, "step": 116560 }, { "epoch": 0.4863724745683504, "grad_norm": 1.1876686901144344, "learning_rate": 2.868017464305301e-06, "loss": 0.0257, "step": 116565 }, { "epoch": 0.4863933372833407, "grad_norm": 0.9620636444926655, "learning_rate": 2.8679559546367896e-06, "loss": 0.0178, "step": 116570 }, { "epoch": 0.48641419999833096, "grad_norm": 0.5822150293225241, "learning_rate": 2.867894448925657e-06, "loss": 0.0285, "step": 116575 }, { "epoch": 0.48643506271332126, "grad_norm": 0.7925229570067377, "learning_rate": 2.8678329471714794e-06, "loss": 0.0229, "step": 116580 }, { "epoch": 0.48645592542831156, "grad_norm": 0.732610679848344, "learning_rate": 2.867771449373831e-06, "loss": 0.0306, "step": 116585 }, { "epoch": 0.4864767881433018, "grad_norm": 0.6419586742161135, "learning_rate": 2.867709955532289e-06, "loss": 0.0247, "step": 116590 }, { "epoch": 0.4864976508582921, "grad_norm": 0.7908899820867613, "learning_rate": 2.8676484656464287e-06, "loss": 0.0221, "step": 116595 }, { "epoch": 0.48651851357328235, "grad_norm": 0.9899153116472635, "learning_rate": 2.867586979715826e-06, "loss": 0.0223, "step": 116600 }, { "epoch": 0.48653937628827265, "grad_norm": 0.5536581975198861, "learning_rate": 2.8675254977400566e-06, "loss": 0.0247, "step": 116605 }, { "epoch": 0.48656023900326295, "grad_norm": 0.7307940665732489, "learning_rate": 2.867464019718697e-06, "loss": 0.0238, "step": 116610 }, { "epoch": 0.4865811017182532, "grad_norm": 0.42736638245987185, "learning_rate": 2.867402545651324e-06, "loss": 0.0236, "step": 116615 }, { "epoch": 0.4866019644332435, "grad_norm": 0.6495969873552521, "learning_rate": 2.8673410755375124e-06, "loss": 0.0235, "step": 116620 }, { "epoch": 0.48662282714823374, "grad_norm": 0.7645752660515067, "learning_rate": 2.867279609376839e-06, "loss": 0.0264, "step": 116625 }, { "epoch": 0.48664368986322404, "grad_norm": 0.5756392921427312, "learning_rate": 2.8672181471688803e-06, "loss": 0.0298, "step": 116630 }, { "epoch": 0.48666455257821434, "grad_norm": 0.5913903105305266, "learning_rate": 2.867156688913212e-06, "loss": 0.0303, "step": 116635 }, { "epoch": 0.4866854152932046, "grad_norm": 0.7494359165717882, "learning_rate": 2.8670952346094117e-06, "loss": 0.0272, "step": 116640 }, { "epoch": 0.4867062780081949, "grad_norm": 0.20879075774888367, "learning_rate": 2.867033784257055e-06, "loss": 0.0141, "step": 116645 }, { "epoch": 0.4867271407231851, "grad_norm": 0.553763876022802, "learning_rate": 2.8669723378557186e-06, "loss": 0.0234, "step": 116650 }, { "epoch": 0.4867480034381754, "grad_norm": 0.5914178622951315, "learning_rate": 2.8669108954049794e-06, "loss": 0.0207, "step": 116655 }, { "epoch": 0.4867688661531657, "grad_norm": 0.7465690887162959, "learning_rate": 2.866849456904414e-06, "loss": 0.0285, "step": 116660 }, { "epoch": 0.48678972886815597, "grad_norm": 0.4209993807173057, "learning_rate": 2.866788022353599e-06, "loss": 0.0218, "step": 116665 }, { "epoch": 0.48681059158314627, "grad_norm": 0.9522345181122838, "learning_rate": 2.8667265917521115e-06, "loss": 0.0249, "step": 116670 }, { "epoch": 0.48683145429813657, "grad_norm": 0.6737087697437715, "learning_rate": 2.8666651650995282e-06, "loss": 0.0322, "step": 116675 }, { "epoch": 0.4868523170131268, "grad_norm": 0.7781412123074557, "learning_rate": 2.8666037423954263e-06, "loss": 0.026, "step": 116680 }, { "epoch": 0.4868731797281171, "grad_norm": 0.8037534455886766, "learning_rate": 2.866542323639382e-06, "loss": 0.0366, "step": 116685 }, { "epoch": 0.48689404244310736, "grad_norm": 0.7684915161469064, "learning_rate": 2.866480908830973e-06, "loss": 0.02, "step": 116690 }, { "epoch": 0.48691490515809766, "grad_norm": 0.31180402473812546, "learning_rate": 2.866419497969776e-06, "loss": 0.0213, "step": 116695 }, { "epoch": 0.48693576787308795, "grad_norm": 0.3758832861218825, "learning_rate": 2.866358091055369e-06, "loss": 0.0239, "step": 116700 }, { "epoch": 0.4869566305880782, "grad_norm": 0.6676789934915879, "learning_rate": 2.866296688087329e-06, "loss": 0.0291, "step": 116705 }, { "epoch": 0.4869774933030685, "grad_norm": 0.8012103641181426, "learning_rate": 2.8662352890652327e-06, "loss": 0.022, "step": 116710 }, { "epoch": 0.48699835601805874, "grad_norm": 1.1892912168716605, "learning_rate": 2.866173893988658e-06, "loss": 0.0291, "step": 116715 }, { "epoch": 0.48701921873304904, "grad_norm": 0.6144733690705622, "learning_rate": 2.8661125028571823e-06, "loss": 0.0255, "step": 116720 }, { "epoch": 0.48704008144803934, "grad_norm": 1.1557017891479784, "learning_rate": 2.866051115670383e-06, "loss": 0.0333, "step": 116725 }, { "epoch": 0.4870609441630296, "grad_norm": 0.3778271430680466, "learning_rate": 2.865989732427838e-06, "loss": 0.014, "step": 116730 }, { "epoch": 0.4870818068780199, "grad_norm": 0.900825294783975, "learning_rate": 2.865928353129125e-06, "loss": 0.0229, "step": 116735 }, { "epoch": 0.48710266959301013, "grad_norm": 0.6944247270859484, "learning_rate": 2.865866977773821e-06, "loss": 0.0206, "step": 116740 }, { "epoch": 0.48712353230800043, "grad_norm": 1.1910087998989662, "learning_rate": 2.8658056063615035e-06, "loss": 0.0324, "step": 116745 }, { "epoch": 0.48714439502299073, "grad_norm": 0.7918963963410863, "learning_rate": 2.865744238891752e-06, "loss": 0.0269, "step": 116750 }, { "epoch": 0.487165257737981, "grad_norm": 0.7004966913577162, "learning_rate": 2.865682875364143e-06, "loss": 0.0381, "step": 116755 }, { "epoch": 0.48718612045297127, "grad_norm": 0.8094872375883767, "learning_rate": 2.8656215157782547e-06, "loss": 0.0244, "step": 116760 }, { "epoch": 0.48720698316796157, "grad_norm": 1.168879640168255, "learning_rate": 2.8655601601336657e-06, "loss": 0.0229, "step": 116765 }, { "epoch": 0.4872278458829518, "grad_norm": 0.9156668798081303, "learning_rate": 2.865498808429953e-06, "loss": 0.0323, "step": 116770 }, { "epoch": 0.4872487085979421, "grad_norm": 0.894861642397744, "learning_rate": 2.8654374606666963e-06, "loss": 0.0275, "step": 116775 }, { "epoch": 0.48726957131293236, "grad_norm": 0.9032567177639912, "learning_rate": 2.8653761168434723e-06, "loss": 0.024, "step": 116780 }, { "epoch": 0.48729043402792266, "grad_norm": 0.4657348230809958, "learning_rate": 2.8653147769598596e-06, "loss": 0.028, "step": 116785 }, { "epoch": 0.48731129674291296, "grad_norm": 0.5605583085054249, "learning_rate": 2.8652534410154375e-06, "loss": 0.0291, "step": 116790 }, { "epoch": 0.4873321594579032, "grad_norm": 1.4213622260808372, "learning_rate": 2.865192109009783e-06, "loss": 0.0213, "step": 116795 }, { "epoch": 0.4873530221728935, "grad_norm": 0.7729273409400229, "learning_rate": 2.8651307809424755e-06, "loss": 0.0174, "step": 116800 }, { "epoch": 0.48737388488788375, "grad_norm": 0.807553991021115, "learning_rate": 2.8650694568130937e-06, "loss": 0.0221, "step": 116805 }, { "epoch": 0.48739474760287405, "grad_norm": 0.9040868378781395, "learning_rate": 2.8650081366212156e-06, "loss": 0.0312, "step": 116810 }, { "epoch": 0.48741561031786435, "grad_norm": 0.699398459371301, "learning_rate": 2.86494682036642e-06, "loss": 0.0328, "step": 116815 }, { "epoch": 0.4874364730328546, "grad_norm": 0.8424475163999956, "learning_rate": 2.8648855080482858e-06, "loss": 0.02, "step": 116820 }, { "epoch": 0.4874573357478449, "grad_norm": 1.0010756526306528, "learning_rate": 2.8648241996663916e-06, "loss": 0.0329, "step": 116825 }, { "epoch": 0.48747819846283513, "grad_norm": 0.9502807968168687, "learning_rate": 2.864762895220316e-06, "loss": 0.0212, "step": 116830 }, { "epoch": 0.48749906117782543, "grad_norm": 0.6664971980915723, "learning_rate": 2.8647015947096386e-06, "loss": 0.024, "step": 116835 }, { "epoch": 0.48751992389281573, "grad_norm": 1.2357592533808937, "learning_rate": 2.864640298133938e-06, "loss": 0.0271, "step": 116840 }, { "epoch": 0.487540786607806, "grad_norm": 0.5226420202656028, "learning_rate": 2.8645790054927924e-06, "loss": 0.0195, "step": 116845 }, { "epoch": 0.4875616493227963, "grad_norm": 1.0590054453573416, "learning_rate": 2.8645177167857825e-06, "loss": 0.0271, "step": 116850 }, { "epoch": 0.4875825120377866, "grad_norm": 1.5701714604191024, "learning_rate": 2.8644564320124863e-06, "loss": 0.0278, "step": 116855 }, { "epoch": 0.4876033747527768, "grad_norm": 0.6484743973999584, "learning_rate": 2.8643951511724834e-06, "loss": 0.0336, "step": 116860 }, { "epoch": 0.4876242374677671, "grad_norm": 0.6600542185292853, "learning_rate": 2.8643338742653527e-06, "loss": 0.0266, "step": 116865 }, { "epoch": 0.48764510018275736, "grad_norm": 0.7512030103592658, "learning_rate": 2.864272601290674e-06, "loss": 0.0331, "step": 116870 }, { "epoch": 0.48766596289774766, "grad_norm": 0.9387808980301457, "learning_rate": 2.864211332248027e-06, "loss": 0.0294, "step": 116875 }, { "epoch": 0.48768682561273796, "grad_norm": 0.4475213551803728, "learning_rate": 2.864150067136991e-06, "loss": 0.0236, "step": 116880 }, { "epoch": 0.4877076883277282, "grad_norm": 0.46293617897344097, "learning_rate": 2.864088805957145e-06, "loss": 0.0239, "step": 116885 }, { "epoch": 0.4877285510427185, "grad_norm": 0.6913489707351181, "learning_rate": 2.8640275487080683e-06, "loss": 0.0242, "step": 116890 }, { "epoch": 0.48774941375770875, "grad_norm": 0.991611449916313, "learning_rate": 2.8639662953893415e-06, "loss": 0.0216, "step": 116895 }, { "epoch": 0.48777027647269905, "grad_norm": 1.0563341653647846, "learning_rate": 2.863905046000544e-06, "loss": 0.0243, "step": 116900 }, { "epoch": 0.48779113918768935, "grad_norm": 0.41181608529078995, "learning_rate": 2.8638438005412562e-06, "loss": 0.0231, "step": 116905 }, { "epoch": 0.4878120019026796, "grad_norm": 1.0828616601310048, "learning_rate": 2.8637825590110563e-06, "loss": 0.0293, "step": 116910 }, { "epoch": 0.4878328646176699, "grad_norm": 0.6412552420187412, "learning_rate": 2.8637213214095256e-06, "loss": 0.0272, "step": 116915 }, { "epoch": 0.48785372733266014, "grad_norm": 1.0508481802690595, "learning_rate": 2.863660087736244e-06, "loss": 0.0307, "step": 116920 }, { "epoch": 0.48787459004765044, "grad_norm": 0.53407373782592, "learning_rate": 2.8635988579907914e-06, "loss": 0.0245, "step": 116925 }, { "epoch": 0.48789545276264074, "grad_norm": 0.6323240395555265, "learning_rate": 2.863537632172747e-06, "loss": 0.034, "step": 116930 }, { "epoch": 0.487916315477631, "grad_norm": 0.4396769063168943, "learning_rate": 2.8634764102816918e-06, "loss": 0.0218, "step": 116935 }, { "epoch": 0.4879371781926213, "grad_norm": 0.6289729911078943, "learning_rate": 2.8634151923172067e-06, "loss": 0.0216, "step": 116940 }, { "epoch": 0.4879580409076116, "grad_norm": 0.7284083090701667, "learning_rate": 2.8633539782788705e-06, "loss": 0.0235, "step": 116945 }, { "epoch": 0.4879789036226018, "grad_norm": 0.3624853139672587, "learning_rate": 2.8632927681662647e-06, "loss": 0.0213, "step": 116950 }, { "epoch": 0.4879997663375921, "grad_norm": 0.5346291354340976, "learning_rate": 2.863231561978969e-06, "loss": 0.018, "step": 116955 }, { "epoch": 0.48802062905258237, "grad_norm": 0.603570882984121, "learning_rate": 2.8631703597165646e-06, "loss": 0.0179, "step": 116960 }, { "epoch": 0.48804149176757267, "grad_norm": 0.626392548679281, "learning_rate": 2.863109161378631e-06, "loss": 0.0335, "step": 116965 }, { "epoch": 0.48806235448256297, "grad_norm": 0.529481834405528, "learning_rate": 2.86304796696475e-06, "loss": 0.0189, "step": 116970 }, { "epoch": 0.4880832171975532, "grad_norm": 0.844780660397828, "learning_rate": 2.8629867764745013e-06, "loss": 0.0262, "step": 116975 }, { "epoch": 0.4881040799125435, "grad_norm": 0.5130495346614173, "learning_rate": 2.862925589907466e-06, "loss": 0.0209, "step": 116980 }, { "epoch": 0.48812494262753375, "grad_norm": 0.43828304628822395, "learning_rate": 2.862864407263225e-06, "loss": 0.0312, "step": 116985 }, { "epoch": 0.48814580534252405, "grad_norm": 1.380644623660713, "learning_rate": 2.8628032285413593e-06, "loss": 0.0341, "step": 116990 }, { "epoch": 0.48816666805751435, "grad_norm": 1.0884990430081694, "learning_rate": 2.862742053741449e-06, "loss": 0.0247, "step": 116995 }, { "epoch": 0.4881875307725046, "grad_norm": 0.37705378112764515, "learning_rate": 2.8626808828630758e-06, "loss": 0.017, "step": 117000 }, { "epoch": 0.4882083934874949, "grad_norm": 0.8261814132718301, "learning_rate": 2.8626197159058212e-06, "loss": 0.0242, "step": 117005 }, { "epoch": 0.48822925620248514, "grad_norm": 1.035876151035411, "learning_rate": 2.862558552869265e-06, "loss": 0.0317, "step": 117010 }, { "epoch": 0.48825011891747544, "grad_norm": 0.9293523155744102, "learning_rate": 2.8624973937529893e-06, "loss": 0.0238, "step": 117015 }, { "epoch": 0.48827098163246574, "grad_norm": 0.8290229406694981, "learning_rate": 2.8624362385565746e-06, "loss": 0.0254, "step": 117020 }, { "epoch": 0.488291844347456, "grad_norm": 0.6852555350251338, "learning_rate": 2.8623750872796037e-06, "loss": 0.025, "step": 117025 }, { "epoch": 0.4883127070624463, "grad_norm": 0.894194068890995, "learning_rate": 2.862313939921656e-06, "loss": 0.0285, "step": 117030 }, { "epoch": 0.4883335697774366, "grad_norm": 0.913075042581039, "learning_rate": 2.8622527964823145e-06, "loss": 0.0242, "step": 117035 }, { "epoch": 0.4883544324924268, "grad_norm": 0.6205870265860984, "learning_rate": 2.86219165696116e-06, "loss": 0.0289, "step": 117040 }, { "epoch": 0.4883752952074171, "grad_norm": 0.7023125090484019, "learning_rate": 2.8621305213577738e-06, "loss": 0.0333, "step": 117045 }, { "epoch": 0.48839615792240737, "grad_norm": 0.7782077098733564, "learning_rate": 2.8620693896717377e-06, "loss": 0.0206, "step": 117050 }, { "epoch": 0.48841702063739767, "grad_norm": 0.876255323886991, "learning_rate": 2.862008261902634e-06, "loss": 0.0257, "step": 117055 }, { "epoch": 0.48843788335238797, "grad_norm": 0.6127463684213987, "learning_rate": 2.8619471380500437e-06, "loss": 0.0213, "step": 117060 }, { "epoch": 0.4884587460673782, "grad_norm": 0.7050323460015034, "learning_rate": 2.861886018113548e-06, "loss": 0.0245, "step": 117065 }, { "epoch": 0.4884796087823685, "grad_norm": 0.5847459099141137, "learning_rate": 2.8618249020927304e-06, "loss": 0.0251, "step": 117070 }, { "epoch": 0.48850047149735876, "grad_norm": 0.5463957256396077, "learning_rate": 2.861763789987172e-06, "loss": 0.0355, "step": 117075 }, { "epoch": 0.48852133421234906, "grad_norm": 0.7992909081278625, "learning_rate": 2.8617026817964544e-06, "loss": 0.0248, "step": 117080 }, { "epoch": 0.48854219692733936, "grad_norm": 0.7506932926147803, "learning_rate": 2.86164157752016e-06, "loss": 0.0304, "step": 117085 }, { "epoch": 0.4885630596423296, "grad_norm": 0.8953515421595041, "learning_rate": 2.8615804771578705e-06, "loss": 0.0296, "step": 117090 }, { "epoch": 0.4885839223573199, "grad_norm": 0.6921080261768021, "learning_rate": 2.861519380709169e-06, "loss": 0.0271, "step": 117095 }, { "epoch": 0.48860478507231014, "grad_norm": 1.3032486986842986, "learning_rate": 2.861458288173637e-06, "loss": 0.027, "step": 117100 }, { "epoch": 0.48862564778730044, "grad_norm": 1.5821744259809325, "learning_rate": 2.8613971995508572e-06, "loss": 0.0391, "step": 117105 }, { "epoch": 0.48864651050229074, "grad_norm": 0.6001991104111335, "learning_rate": 2.8613361148404116e-06, "loss": 0.0232, "step": 117110 }, { "epoch": 0.488667373217281, "grad_norm": 0.7450817685090699, "learning_rate": 2.8612750340418827e-06, "loss": 0.0284, "step": 117115 }, { "epoch": 0.4886882359322713, "grad_norm": 0.6259641545916925, "learning_rate": 2.8612139571548532e-06, "loss": 0.0249, "step": 117120 }, { "epoch": 0.4887090986472616, "grad_norm": 0.6709462997356688, "learning_rate": 2.861152884178905e-06, "loss": 0.0279, "step": 117125 }, { "epoch": 0.48872996136225183, "grad_norm": 0.5721030009981832, "learning_rate": 2.8610918151136215e-06, "loss": 0.0225, "step": 117130 }, { "epoch": 0.48875082407724213, "grad_norm": 0.6700154168284883, "learning_rate": 2.861030749958585e-06, "loss": 0.0287, "step": 117135 }, { "epoch": 0.4887716867922324, "grad_norm": 0.8456133801732085, "learning_rate": 2.8609696887133783e-06, "loss": 0.022, "step": 117140 }, { "epoch": 0.4887925495072227, "grad_norm": 2.047018490596138, "learning_rate": 2.860908631377584e-06, "loss": 0.038, "step": 117145 }, { "epoch": 0.488813412222213, "grad_norm": 0.7448764681882284, "learning_rate": 2.8608475779507855e-06, "loss": 0.0193, "step": 117150 }, { "epoch": 0.4888342749372032, "grad_norm": 0.7337723490541418, "learning_rate": 2.860786528432565e-06, "loss": 0.0221, "step": 117155 }, { "epoch": 0.4888551376521935, "grad_norm": 0.9296996713680415, "learning_rate": 2.860725482822506e-06, "loss": 0.0297, "step": 117160 }, { "epoch": 0.48887600036718376, "grad_norm": 0.8510923383064382, "learning_rate": 2.860664441120191e-06, "loss": 0.0232, "step": 117165 }, { "epoch": 0.48889686308217406, "grad_norm": 0.542632019840054, "learning_rate": 2.860603403325204e-06, "loss": 0.0216, "step": 117170 }, { "epoch": 0.48891772579716436, "grad_norm": 0.6189330454560286, "learning_rate": 2.8605423694371277e-06, "loss": 0.0189, "step": 117175 }, { "epoch": 0.4889385885121546, "grad_norm": 1.164746062415834, "learning_rate": 2.8604813394555446e-06, "loss": 0.0317, "step": 117180 }, { "epoch": 0.4889594512271449, "grad_norm": 1.3133835456890426, "learning_rate": 2.8604203133800396e-06, "loss": 0.0248, "step": 117185 }, { "epoch": 0.48898031394213515, "grad_norm": 0.5078775705759754, "learning_rate": 2.860359291210194e-06, "loss": 0.0204, "step": 117190 }, { "epoch": 0.48900117665712545, "grad_norm": 1.045206769489647, "learning_rate": 2.860298272945593e-06, "loss": 0.0244, "step": 117195 }, { "epoch": 0.48902203937211575, "grad_norm": 1.081003985634466, "learning_rate": 2.860237258585819e-06, "loss": 0.0232, "step": 117200 }, { "epoch": 0.489042902087106, "grad_norm": 0.6669224050182225, "learning_rate": 2.860176248130456e-06, "loss": 0.0257, "step": 117205 }, { "epoch": 0.4890637648020963, "grad_norm": 0.5955865289559864, "learning_rate": 2.8601152415790877e-06, "loss": 0.0241, "step": 117210 }, { "epoch": 0.4890846275170866, "grad_norm": 2.1219439388003547, "learning_rate": 2.860054238931298e-06, "loss": 0.0321, "step": 117215 }, { "epoch": 0.48910549023207683, "grad_norm": 0.6444138828231493, "learning_rate": 2.8599932401866697e-06, "loss": 0.0262, "step": 117220 }, { "epoch": 0.48912635294706713, "grad_norm": 0.7843438007664297, "learning_rate": 2.8599322453447868e-06, "loss": 0.017, "step": 117225 }, { "epoch": 0.4891472156620574, "grad_norm": 0.7435123745080896, "learning_rate": 2.859871254405234e-06, "loss": 0.0288, "step": 117230 }, { "epoch": 0.4891680783770477, "grad_norm": 0.5285040711127782, "learning_rate": 2.8598102673675947e-06, "loss": 0.018, "step": 117235 }, { "epoch": 0.489188941092038, "grad_norm": 0.957043599084329, "learning_rate": 2.859749284231453e-06, "loss": 0.0205, "step": 117240 }, { "epoch": 0.4892098038070282, "grad_norm": 0.791735199387804, "learning_rate": 2.8596883049963926e-06, "loss": 0.0299, "step": 117245 }, { "epoch": 0.4892306665220185, "grad_norm": 1.2954116048452227, "learning_rate": 2.8596273296619974e-06, "loss": 0.0261, "step": 117250 }, { "epoch": 0.48925152923700876, "grad_norm": 0.950570962820837, "learning_rate": 2.859566358227852e-06, "loss": 0.0218, "step": 117255 }, { "epoch": 0.48927239195199906, "grad_norm": 0.904022730592698, "learning_rate": 2.859505390693541e-06, "loss": 0.0195, "step": 117260 }, { "epoch": 0.48929325466698936, "grad_norm": 0.935662043991305, "learning_rate": 2.859444427058648e-06, "loss": 0.0219, "step": 117265 }, { "epoch": 0.4893141173819796, "grad_norm": 0.9934355551769252, "learning_rate": 2.8593834673227576e-06, "loss": 0.0213, "step": 117270 }, { "epoch": 0.4893349800969699, "grad_norm": 0.6741146412586488, "learning_rate": 2.8593225114854544e-06, "loss": 0.0206, "step": 117275 }, { "epoch": 0.48935584281196015, "grad_norm": 1.1486113170284433, "learning_rate": 2.8592615595463226e-06, "loss": 0.0361, "step": 117280 }, { "epoch": 0.48937670552695045, "grad_norm": 0.6896781090331637, "learning_rate": 2.8592006115049463e-06, "loss": 0.0199, "step": 117285 }, { "epoch": 0.48939756824194075, "grad_norm": 0.574526236645728, "learning_rate": 2.8591396673609113e-06, "loss": 0.0233, "step": 117290 }, { "epoch": 0.489418430956931, "grad_norm": 0.7350550150685392, "learning_rate": 2.859078727113801e-06, "loss": 0.0318, "step": 117295 }, { "epoch": 0.4894392936719213, "grad_norm": 0.568497683096639, "learning_rate": 2.8590177907632013e-06, "loss": 0.0241, "step": 117300 }, { "epoch": 0.4894601563869116, "grad_norm": 0.9082150752771107, "learning_rate": 2.8589568583086957e-06, "loss": 0.0347, "step": 117305 }, { "epoch": 0.48948101910190184, "grad_norm": 0.5331453663175832, "learning_rate": 2.8588959297498704e-06, "loss": 0.0208, "step": 117310 }, { "epoch": 0.48950188181689214, "grad_norm": 1.1334048619506154, "learning_rate": 2.858835005086309e-06, "loss": 0.0273, "step": 117315 }, { "epoch": 0.4895227445318824, "grad_norm": 0.7025469608307636, "learning_rate": 2.858774084317597e-06, "loss": 0.0271, "step": 117320 }, { "epoch": 0.4895436072468727, "grad_norm": 0.5049574207521211, "learning_rate": 2.85871316744332e-06, "loss": 0.0224, "step": 117325 }, { "epoch": 0.489564469961863, "grad_norm": 6.1598294058048735, "learning_rate": 2.8586522544630624e-06, "loss": 0.0339, "step": 117330 }, { "epoch": 0.4895853326768532, "grad_norm": 0.6256363686598744, "learning_rate": 2.85859134537641e-06, "loss": 0.0194, "step": 117335 }, { "epoch": 0.4896061953918435, "grad_norm": 0.6939853601082071, "learning_rate": 2.8585304401829465e-06, "loss": 0.0226, "step": 117340 }, { "epoch": 0.48962705810683377, "grad_norm": 0.961257564365022, "learning_rate": 2.858469538882259e-06, "loss": 0.0288, "step": 117345 }, { "epoch": 0.48964792082182407, "grad_norm": 0.6384167560048726, "learning_rate": 2.8584086414739322e-06, "loss": 0.0201, "step": 117350 }, { "epoch": 0.48966878353681437, "grad_norm": 0.6258012435636109, "learning_rate": 2.8583477479575507e-06, "loss": 0.0269, "step": 117355 }, { "epoch": 0.4896896462518046, "grad_norm": 0.4471605797526618, "learning_rate": 2.858286858332701e-06, "loss": 0.0158, "step": 117360 }, { "epoch": 0.4897105089667949, "grad_norm": 1.1093284188606534, "learning_rate": 2.8582259725989685e-06, "loss": 0.0167, "step": 117365 }, { "epoch": 0.48973137168178515, "grad_norm": 0.9286466073630105, "learning_rate": 2.858165090755938e-06, "loss": 0.0273, "step": 117370 }, { "epoch": 0.48975223439677545, "grad_norm": 0.8616549404179618, "learning_rate": 2.858104212803196e-06, "loss": 0.0216, "step": 117375 }, { "epoch": 0.48977309711176575, "grad_norm": 0.8614004315043675, "learning_rate": 2.858043338740328e-06, "loss": 0.0274, "step": 117380 }, { "epoch": 0.489793959826756, "grad_norm": 0.6504756484106597, "learning_rate": 2.8579824685669195e-06, "loss": 0.0218, "step": 117385 }, { "epoch": 0.4898148225417463, "grad_norm": 0.7387830269424379, "learning_rate": 2.8579216022825566e-06, "loss": 0.0242, "step": 117390 }, { "epoch": 0.4898356852567366, "grad_norm": 0.9010546391637706, "learning_rate": 2.857860739886825e-06, "loss": 0.0267, "step": 117395 }, { "epoch": 0.48985654797172684, "grad_norm": 0.5609919331274674, "learning_rate": 2.8577998813793105e-06, "loss": 0.026, "step": 117400 }, { "epoch": 0.48987741068671714, "grad_norm": 0.6471658524336024, "learning_rate": 2.8577390267596e-06, "loss": 0.0311, "step": 117405 }, { "epoch": 0.4898982734017074, "grad_norm": 0.33149377080188924, "learning_rate": 2.857678176027279e-06, "loss": 0.026, "step": 117410 }, { "epoch": 0.4899191361166977, "grad_norm": 0.629592859421682, "learning_rate": 2.8576173291819326e-06, "loss": 0.0305, "step": 117415 }, { "epoch": 0.489939998831688, "grad_norm": 0.8153177364119897, "learning_rate": 2.8575564862231486e-06, "loss": 0.023, "step": 117420 }, { "epoch": 0.48996086154667823, "grad_norm": 0.9481086803859858, "learning_rate": 2.857495647150513e-06, "loss": 0.0246, "step": 117425 }, { "epoch": 0.4899817242616685, "grad_norm": 1.1870833297659629, "learning_rate": 2.8574348119636106e-06, "loss": 0.026, "step": 117430 }, { "epoch": 0.49000258697665877, "grad_norm": 0.5337141367324922, "learning_rate": 2.8573739806620304e-06, "loss": 0.0234, "step": 117435 }, { "epoch": 0.49002344969164907, "grad_norm": 0.824142084394464, "learning_rate": 2.857313153245356e-06, "loss": 0.0277, "step": 117440 }, { "epoch": 0.49004431240663937, "grad_norm": 1.5620662404110677, "learning_rate": 2.8572523297131767e-06, "loss": 0.0324, "step": 117445 }, { "epoch": 0.4900651751216296, "grad_norm": 0.5745702589223505, "learning_rate": 2.857191510065077e-06, "loss": 0.0239, "step": 117450 }, { "epoch": 0.4900860378366199, "grad_norm": 0.7137550887033287, "learning_rate": 2.8571306943006445e-06, "loss": 0.0236, "step": 117455 }, { "epoch": 0.49010690055161016, "grad_norm": 1.0668887917659344, "learning_rate": 2.8570698824194655e-06, "loss": 0.0328, "step": 117460 }, { "epoch": 0.49012776326660046, "grad_norm": 1.0827858522054925, "learning_rate": 2.8570090744211266e-06, "loss": 0.0259, "step": 117465 }, { "epoch": 0.49014862598159076, "grad_norm": 0.7867718108668231, "learning_rate": 2.856948270305215e-06, "loss": 0.0224, "step": 117470 }, { "epoch": 0.490169488696581, "grad_norm": 0.7407219216811158, "learning_rate": 2.856887470071318e-06, "loss": 0.0253, "step": 117475 }, { "epoch": 0.4901903514115713, "grad_norm": 0.909863048864253, "learning_rate": 2.8568266737190213e-06, "loss": 0.0262, "step": 117480 }, { "epoch": 0.4902112141265616, "grad_norm": 0.7038713340500458, "learning_rate": 2.856765881247913e-06, "loss": 0.0261, "step": 117485 }, { "epoch": 0.49023207684155184, "grad_norm": 0.736294828367263, "learning_rate": 2.8567050926575796e-06, "loss": 0.0329, "step": 117490 }, { "epoch": 0.49025293955654214, "grad_norm": 0.43356127187167837, "learning_rate": 2.8566443079476086e-06, "loss": 0.028, "step": 117495 }, { "epoch": 0.4902738022715324, "grad_norm": 0.6711324740096379, "learning_rate": 2.8565835271175868e-06, "loss": 0.0221, "step": 117500 }, { "epoch": 0.4902946649865227, "grad_norm": 1.6624843693381823, "learning_rate": 2.8565227501671016e-06, "loss": 0.0399, "step": 117505 }, { "epoch": 0.490315527701513, "grad_norm": 0.7497246856382199, "learning_rate": 2.8564619770957407e-06, "loss": 0.0278, "step": 117510 }, { "epoch": 0.49033639041650323, "grad_norm": 0.7405737895224765, "learning_rate": 2.856401207903091e-06, "loss": 0.0254, "step": 117515 }, { "epoch": 0.49035725313149353, "grad_norm": 0.53155893404675, "learning_rate": 2.8563404425887404e-06, "loss": 0.0264, "step": 117520 }, { "epoch": 0.4903781158464838, "grad_norm": 0.6918794121122043, "learning_rate": 2.8562796811522753e-06, "loss": 0.0214, "step": 117525 }, { "epoch": 0.4903989785614741, "grad_norm": 1.2923856558122198, "learning_rate": 2.8562189235932847e-06, "loss": 0.0255, "step": 117530 }, { "epoch": 0.4904198412764644, "grad_norm": 0.4307700217765697, "learning_rate": 2.8561581699113554e-06, "loss": 0.0254, "step": 117535 }, { "epoch": 0.4904407039914546, "grad_norm": 0.9470118334638215, "learning_rate": 2.856097420106075e-06, "loss": 0.0255, "step": 117540 }, { "epoch": 0.4904615667064449, "grad_norm": 0.7483965598560925, "learning_rate": 2.8560366741770313e-06, "loss": 0.0249, "step": 117545 }, { "epoch": 0.49048242942143516, "grad_norm": 0.7784497164246127, "learning_rate": 2.8559759321238127e-06, "loss": 0.0308, "step": 117550 }, { "epoch": 0.49050329213642546, "grad_norm": 0.4888406237196273, "learning_rate": 2.8559151939460066e-06, "loss": 0.0251, "step": 117555 }, { "epoch": 0.49052415485141576, "grad_norm": 0.7697729450263254, "learning_rate": 2.8558544596432e-06, "loss": 0.0301, "step": 117560 }, { "epoch": 0.490545017566406, "grad_norm": 0.8434873695444822, "learning_rate": 2.8557937292149827e-06, "loss": 0.0265, "step": 117565 }, { "epoch": 0.4905658802813963, "grad_norm": 1.0328844722585175, "learning_rate": 2.8557330026609425e-06, "loss": 0.0307, "step": 117570 }, { "epoch": 0.4905867429963866, "grad_norm": 0.5083308311166906, "learning_rate": 2.8556722799806663e-06, "loss": 0.0238, "step": 117575 }, { "epoch": 0.49060760571137685, "grad_norm": 0.4973157702599617, "learning_rate": 2.8556115611737423e-06, "loss": 0.0154, "step": 117580 }, { "epoch": 0.49062846842636715, "grad_norm": 0.926257696112302, "learning_rate": 2.8555508462397595e-06, "loss": 0.0241, "step": 117585 }, { "epoch": 0.4906493311413574, "grad_norm": 0.5762715823405499, "learning_rate": 2.8554901351783067e-06, "loss": 0.0257, "step": 117590 }, { "epoch": 0.4906701938563477, "grad_norm": 0.49536556269613014, "learning_rate": 2.8554294279889706e-06, "loss": 0.0218, "step": 117595 }, { "epoch": 0.490691056571338, "grad_norm": 0.43414320357854236, "learning_rate": 2.8553687246713412e-06, "loss": 0.0246, "step": 117600 }, { "epoch": 0.49071191928632824, "grad_norm": 0.4894173388454335, "learning_rate": 2.855308025225006e-06, "loss": 0.0198, "step": 117605 }, { "epoch": 0.49073278200131853, "grad_norm": 0.5929164820944965, "learning_rate": 2.8552473296495543e-06, "loss": 0.0259, "step": 117610 }, { "epoch": 0.4907536447163088, "grad_norm": 1.2035132592545843, "learning_rate": 2.8551866379445738e-06, "loss": 0.0275, "step": 117615 }, { "epoch": 0.4907745074312991, "grad_norm": 0.631973014548958, "learning_rate": 2.8551259501096534e-06, "loss": 0.0244, "step": 117620 }, { "epoch": 0.4907953701462894, "grad_norm": 1.4645527427728071, "learning_rate": 2.8550652661443827e-06, "loss": 0.0242, "step": 117625 }, { "epoch": 0.4908162328612796, "grad_norm": 1.266940348094615, "learning_rate": 2.8550045860483496e-06, "loss": 0.0279, "step": 117630 }, { "epoch": 0.4908370955762699, "grad_norm": 0.6844212054815831, "learning_rate": 2.854943909821143e-06, "loss": 0.0221, "step": 117635 }, { "epoch": 0.49085795829126017, "grad_norm": 0.665493960990222, "learning_rate": 2.854883237462352e-06, "loss": 0.0212, "step": 117640 }, { "epoch": 0.49087882100625047, "grad_norm": 0.35744586561636593, "learning_rate": 2.8548225689715653e-06, "loss": 0.0215, "step": 117645 }, { "epoch": 0.49089968372124076, "grad_norm": 0.7371345096924352, "learning_rate": 2.854761904348373e-06, "loss": 0.0212, "step": 117650 }, { "epoch": 0.490920546436231, "grad_norm": 0.915117726400395, "learning_rate": 2.8547012435923627e-06, "loss": 0.0278, "step": 117655 }, { "epoch": 0.4909414091512213, "grad_norm": 0.33857568040782254, "learning_rate": 2.854640586703124e-06, "loss": 0.0214, "step": 117660 }, { "epoch": 0.4909622718662116, "grad_norm": 0.8269602998753484, "learning_rate": 2.854579933680247e-06, "loss": 0.0218, "step": 117665 }, { "epoch": 0.49098313458120185, "grad_norm": 1.2112457667617866, "learning_rate": 2.85451928452332e-06, "loss": 0.0269, "step": 117670 }, { "epoch": 0.49100399729619215, "grad_norm": 0.6512011140868587, "learning_rate": 2.8544586392319324e-06, "loss": 0.0182, "step": 117675 }, { "epoch": 0.4910248600111824, "grad_norm": 0.6122823388735572, "learning_rate": 2.8543979978056736e-06, "loss": 0.0265, "step": 117680 }, { "epoch": 0.4910457227261727, "grad_norm": 0.7691762664573703, "learning_rate": 2.854337360244134e-06, "loss": 0.0225, "step": 117685 }, { "epoch": 0.491066585441163, "grad_norm": 1.02032370603254, "learning_rate": 2.8542767265469016e-06, "loss": 0.0349, "step": 117690 }, { "epoch": 0.49108744815615324, "grad_norm": 0.8598672700166928, "learning_rate": 2.8542160967135673e-06, "loss": 0.0217, "step": 117695 }, { "epoch": 0.49110831087114354, "grad_norm": 0.5493061628333834, "learning_rate": 2.85415547074372e-06, "loss": 0.0311, "step": 117700 }, { "epoch": 0.4911291735861338, "grad_norm": 0.5217481139435356, "learning_rate": 2.8540948486369496e-06, "loss": 0.0221, "step": 117705 }, { "epoch": 0.4911500363011241, "grad_norm": 0.8619912370979427, "learning_rate": 2.854034230392846e-06, "loss": 0.0261, "step": 117710 }, { "epoch": 0.4911708990161144, "grad_norm": 0.8070305325803162, "learning_rate": 2.853973616010999e-06, "loss": 0.0287, "step": 117715 }, { "epoch": 0.4911917617311046, "grad_norm": 0.5586279564174125, "learning_rate": 2.853913005490998e-06, "loss": 0.0179, "step": 117720 }, { "epoch": 0.4912126244460949, "grad_norm": 0.5591653330022208, "learning_rate": 2.8538523988324333e-06, "loss": 0.0209, "step": 117725 }, { "epoch": 0.49123348716108517, "grad_norm": 0.6701810104732272, "learning_rate": 2.8537917960348956e-06, "loss": 0.0181, "step": 117730 }, { "epoch": 0.49125434987607547, "grad_norm": 1.1858385704134458, "learning_rate": 2.8537311970979736e-06, "loss": 0.0295, "step": 117735 }, { "epoch": 0.49127521259106577, "grad_norm": 0.5764602092206733, "learning_rate": 2.8536706020212577e-06, "loss": 0.0251, "step": 117740 }, { "epoch": 0.491296075306056, "grad_norm": 0.6551455509925047, "learning_rate": 2.8536100108043393e-06, "loss": 0.03, "step": 117745 }, { "epoch": 0.4913169380210463, "grad_norm": 0.8422903216084386, "learning_rate": 2.8535494234468074e-06, "loss": 0.0204, "step": 117750 }, { "epoch": 0.4913378007360366, "grad_norm": 0.5712142498142057, "learning_rate": 2.853488839948253e-06, "loss": 0.0225, "step": 117755 }, { "epoch": 0.49135866345102686, "grad_norm": 0.740095980892199, "learning_rate": 2.853428260308266e-06, "loss": 0.0224, "step": 117760 }, { "epoch": 0.49137952616601716, "grad_norm": 0.6687006874934627, "learning_rate": 2.853367684526437e-06, "loss": 0.0232, "step": 117765 }, { "epoch": 0.4914003888810074, "grad_norm": 0.31974359207201203, "learning_rate": 2.8533071126023563e-06, "loss": 0.0288, "step": 117770 }, { "epoch": 0.4914212515959977, "grad_norm": 1.5351576248916539, "learning_rate": 2.853246544535615e-06, "loss": 0.0275, "step": 117775 }, { "epoch": 0.491442114310988, "grad_norm": 0.7700657477992227, "learning_rate": 2.853185980325803e-06, "loss": 0.025, "step": 117780 }, { "epoch": 0.49146297702597824, "grad_norm": 1.0852095252145946, "learning_rate": 2.8531254199725122e-06, "loss": 0.0257, "step": 117785 }, { "epoch": 0.49148383974096854, "grad_norm": 0.9940563110854552, "learning_rate": 2.853064863475332e-06, "loss": 0.0292, "step": 117790 }, { "epoch": 0.4915047024559588, "grad_norm": 1.8236218550288499, "learning_rate": 2.853004310833854e-06, "loss": 0.0273, "step": 117795 }, { "epoch": 0.4915255651709491, "grad_norm": 0.49000188019047697, "learning_rate": 2.8529437620476684e-06, "loss": 0.0236, "step": 117800 }, { "epoch": 0.4915464278859394, "grad_norm": 0.7822311834184538, "learning_rate": 2.852883217116366e-06, "loss": 0.0306, "step": 117805 }, { "epoch": 0.49156729060092963, "grad_norm": 0.33142116638703223, "learning_rate": 2.8528226760395396e-06, "loss": 0.0233, "step": 117810 }, { "epoch": 0.49158815331591993, "grad_norm": 0.9379210332265575, "learning_rate": 2.8527621388167775e-06, "loss": 0.0228, "step": 117815 }, { "epoch": 0.4916090160309102, "grad_norm": 0.8081564167863948, "learning_rate": 2.852701605447673e-06, "loss": 0.0326, "step": 117820 }, { "epoch": 0.4916298787459005, "grad_norm": 0.642217604758719, "learning_rate": 2.8526410759318163e-06, "loss": 0.0231, "step": 117825 }, { "epoch": 0.49165074146089077, "grad_norm": 0.4635174503686571, "learning_rate": 2.8525805502687983e-06, "loss": 0.0236, "step": 117830 }, { "epoch": 0.491671604175881, "grad_norm": 1.1036692908007257, "learning_rate": 2.852520028458211e-06, "loss": 0.0265, "step": 117835 }, { "epoch": 0.4916924668908713, "grad_norm": 0.6165151658552527, "learning_rate": 2.8524595104996453e-06, "loss": 0.0261, "step": 117840 }, { "epoch": 0.4917133296058616, "grad_norm": 0.9911637835071027, "learning_rate": 2.8523989963926935e-06, "loss": 0.0214, "step": 117845 }, { "epoch": 0.49173419232085186, "grad_norm": 0.5844453461221921, "learning_rate": 2.8523384861369456e-06, "loss": 0.0305, "step": 117850 }, { "epoch": 0.49175505503584216, "grad_norm": 0.9579485275824211, "learning_rate": 2.8522779797319945e-06, "loss": 0.0256, "step": 117855 }, { "epoch": 0.4917759177508324, "grad_norm": 0.5735011021961758, "learning_rate": 2.8522174771774303e-06, "loss": 0.0227, "step": 117860 }, { "epoch": 0.4917967804658227, "grad_norm": 0.6235738259783704, "learning_rate": 2.8521569784728464e-06, "loss": 0.021, "step": 117865 }, { "epoch": 0.491817643180813, "grad_norm": 0.8445891846709078, "learning_rate": 2.852096483617833e-06, "loss": 0.0213, "step": 117870 }, { "epoch": 0.49183850589580325, "grad_norm": 0.7941708044805055, "learning_rate": 2.8520359926119827e-06, "loss": 0.0255, "step": 117875 }, { "epoch": 0.49185936861079355, "grad_norm": 0.9875884119520152, "learning_rate": 2.851975505454887e-06, "loss": 0.0235, "step": 117880 }, { "epoch": 0.4918802313257838, "grad_norm": 0.62184330566526, "learning_rate": 2.851915022146138e-06, "loss": 0.0319, "step": 117885 }, { "epoch": 0.4919010940407741, "grad_norm": 1.1973673639202689, "learning_rate": 2.8518545426853274e-06, "loss": 0.0287, "step": 117890 }, { "epoch": 0.4919219567557644, "grad_norm": 0.6290291952958316, "learning_rate": 2.8517940670720475e-06, "loss": 0.029, "step": 117895 }, { "epoch": 0.49194281947075463, "grad_norm": 0.4766702550446438, "learning_rate": 2.85173359530589e-06, "loss": 0.0178, "step": 117900 }, { "epoch": 0.49196368218574493, "grad_norm": 1.3639952404943472, "learning_rate": 2.851673127386447e-06, "loss": 0.0411, "step": 117905 }, { "epoch": 0.4919845449007352, "grad_norm": 0.5357074963902045, "learning_rate": 2.8516126633133114e-06, "loss": 0.023, "step": 117910 }, { "epoch": 0.4920054076157255, "grad_norm": 0.5855186441165353, "learning_rate": 2.851552203086075e-06, "loss": 0.0281, "step": 117915 }, { "epoch": 0.4920262703307158, "grad_norm": 0.7494996718096478, "learning_rate": 2.85149174670433e-06, "loss": 0.0343, "step": 117920 }, { "epoch": 0.492047133045706, "grad_norm": 0.9772245743282417, "learning_rate": 2.8514312941676686e-06, "loss": 0.0318, "step": 117925 }, { "epoch": 0.4920679957606963, "grad_norm": 0.5117027240600585, "learning_rate": 2.851370845475684e-06, "loss": 0.0193, "step": 117930 }, { "epoch": 0.4920888584756866, "grad_norm": 0.43291293081732096, "learning_rate": 2.851310400627968e-06, "loss": 0.0261, "step": 117935 }, { "epoch": 0.49210972119067686, "grad_norm": 0.6676611953629781, "learning_rate": 2.8512499596241133e-06, "loss": 0.0238, "step": 117940 }, { "epoch": 0.49213058390566716, "grad_norm": 1.3648570645417595, "learning_rate": 2.8511895224637126e-06, "loss": 0.0234, "step": 117945 }, { "epoch": 0.4921514466206574, "grad_norm": 3.112251538623758, "learning_rate": 2.8511290891463585e-06, "loss": 0.0232, "step": 117950 }, { "epoch": 0.4921723093356477, "grad_norm": 0.9174416908364497, "learning_rate": 2.851068659671644e-06, "loss": 0.0312, "step": 117955 }, { "epoch": 0.492193172050638, "grad_norm": 0.6571546778060766, "learning_rate": 2.8510082340391604e-06, "loss": 0.0222, "step": 117960 }, { "epoch": 0.49221403476562825, "grad_norm": 0.5883791354834669, "learning_rate": 2.850947812248503e-06, "loss": 0.0286, "step": 117965 }, { "epoch": 0.49223489748061855, "grad_norm": 1.0369848959389658, "learning_rate": 2.850887394299263e-06, "loss": 0.0256, "step": 117970 }, { "epoch": 0.4922557601956088, "grad_norm": 0.49436589017770155, "learning_rate": 2.850826980191035e-06, "loss": 0.0186, "step": 117975 }, { "epoch": 0.4922766229105991, "grad_norm": 0.36319674553682263, "learning_rate": 2.8507665699234095e-06, "loss": 0.0191, "step": 117980 }, { "epoch": 0.4922974856255894, "grad_norm": 0.720306696470896, "learning_rate": 2.850706163495982e-06, "loss": 0.0226, "step": 117985 }, { "epoch": 0.49231834834057964, "grad_norm": 1.093764483079593, "learning_rate": 2.8506457609083444e-06, "loss": 0.0209, "step": 117990 }, { "epoch": 0.49233921105556994, "grad_norm": 0.19785524505403496, "learning_rate": 2.85058536216009e-06, "loss": 0.0228, "step": 117995 }, { "epoch": 0.4923600737705602, "grad_norm": 0.4232744926325931, "learning_rate": 2.8505249672508125e-06, "loss": 0.0233, "step": 118000 }, { "epoch": 0.4923809364855505, "grad_norm": 0.9129533912436532, "learning_rate": 2.8504645761801054e-06, "loss": 0.0275, "step": 118005 }, { "epoch": 0.4924017992005408, "grad_norm": 0.8808909439075203, "learning_rate": 2.8504041889475616e-06, "loss": 0.0224, "step": 118010 }, { "epoch": 0.492422661915531, "grad_norm": 0.8723878725863778, "learning_rate": 2.8503438055527737e-06, "loss": 0.0291, "step": 118015 }, { "epoch": 0.4924435246305213, "grad_norm": 0.8588281991986539, "learning_rate": 2.850283425995337e-06, "loss": 0.0276, "step": 118020 }, { "epoch": 0.4924643873455116, "grad_norm": 0.5583401293398201, "learning_rate": 2.8502230502748446e-06, "loss": 0.0253, "step": 118025 }, { "epoch": 0.49248525006050187, "grad_norm": 0.6378707949095413, "learning_rate": 2.8501626783908894e-06, "loss": 0.0227, "step": 118030 }, { "epoch": 0.49250611277549217, "grad_norm": 0.5489665658939958, "learning_rate": 2.850102310343066e-06, "loss": 0.0213, "step": 118035 }, { "epoch": 0.4925269754904824, "grad_norm": 1.2980752135074543, "learning_rate": 2.850041946130967e-06, "loss": 0.0322, "step": 118040 }, { "epoch": 0.4925478382054727, "grad_norm": 0.5228611229206517, "learning_rate": 2.849981585754187e-06, "loss": 0.0301, "step": 118045 }, { "epoch": 0.492568700920463, "grad_norm": 0.6702627875074506, "learning_rate": 2.84992122921232e-06, "loss": 0.019, "step": 118050 }, { "epoch": 0.49258956363545325, "grad_norm": 1.022435131491787, "learning_rate": 2.8498608765049595e-06, "loss": 0.023, "step": 118055 }, { "epoch": 0.49261042635044355, "grad_norm": 0.5897258492763618, "learning_rate": 2.8498005276317004e-06, "loss": 0.024, "step": 118060 }, { "epoch": 0.4926312890654338, "grad_norm": 0.84369362895124, "learning_rate": 2.849740182592135e-06, "loss": 0.0271, "step": 118065 }, { "epoch": 0.4926521517804241, "grad_norm": 0.5781955337215048, "learning_rate": 2.849679841385859e-06, "loss": 0.0255, "step": 118070 }, { "epoch": 0.4926730144954144, "grad_norm": 0.6211902006135752, "learning_rate": 2.849619504012467e-06, "loss": 0.0293, "step": 118075 }, { "epoch": 0.49269387721040464, "grad_norm": 0.5985386735581669, "learning_rate": 2.8495591704715514e-06, "loss": 0.0201, "step": 118080 }, { "epoch": 0.49271473992539494, "grad_norm": 0.6107834951211669, "learning_rate": 2.849498840762707e-06, "loss": 0.02, "step": 118085 }, { "epoch": 0.4927356026403852, "grad_norm": 0.7893133273568166, "learning_rate": 2.8494385148855293e-06, "loss": 0.0254, "step": 118090 }, { "epoch": 0.4927564653553755, "grad_norm": 0.703891910356927, "learning_rate": 2.849378192839612e-06, "loss": 0.0219, "step": 118095 }, { "epoch": 0.4927773280703658, "grad_norm": 0.8412858060468354, "learning_rate": 2.84931787462455e-06, "loss": 0.0251, "step": 118100 }, { "epoch": 0.492798190785356, "grad_norm": 0.7845409147054365, "learning_rate": 2.8492575602399362e-06, "loss": 0.0248, "step": 118105 }, { "epoch": 0.4928190535003463, "grad_norm": 0.9189006595810568, "learning_rate": 2.8491972496853677e-06, "loss": 0.0217, "step": 118110 }, { "epoch": 0.4928399162153366, "grad_norm": 1.1189511518837858, "learning_rate": 2.849136942960437e-06, "loss": 0.0236, "step": 118115 }, { "epoch": 0.49286077893032687, "grad_norm": 0.5246909248034702, "learning_rate": 2.8490766400647403e-06, "loss": 0.0187, "step": 118120 }, { "epoch": 0.49288164164531717, "grad_norm": 0.5964696895133474, "learning_rate": 2.8490163409978715e-06, "loss": 0.0269, "step": 118125 }, { "epoch": 0.4929025043603074, "grad_norm": 0.44704024394699604, "learning_rate": 2.8489560457594263e-06, "loss": 0.0185, "step": 118130 }, { "epoch": 0.4929233670752977, "grad_norm": 0.7399783270749916, "learning_rate": 2.8488957543489988e-06, "loss": 0.0239, "step": 118135 }, { "epoch": 0.492944229790288, "grad_norm": 0.5627706888392564, "learning_rate": 2.848835466766184e-06, "loss": 0.0255, "step": 118140 }, { "epoch": 0.49296509250527826, "grad_norm": 0.544527616097317, "learning_rate": 2.848775183010577e-06, "loss": 0.0181, "step": 118145 }, { "epoch": 0.49298595522026856, "grad_norm": 0.8300762501121719, "learning_rate": 2.848714903081773e-06, "loss": 0.029, "step": 118150 }, { "epoch": 0.4930068179352588, "grad_norm": 0.587416605327156, "learning_rate": 2.8486546269793673e-06, "loss": 0.0255, "step": 118155 }, { "epoch": 0.4930276806502491, "grad_norm": 0.6331954497469191, "learning_rate": 2.848594354702955e-06, "loss": 0.0258, "step": 118160 }, { "epoch": 0.4930485433652394, "grad_norm": 1.3196088439038136, "learning_rate": 2.8485340862521317e-06, "loss": 0.0211, "step": 118165 }, { "epoch": 0.49306940608022964, "grad_norm": 0.7567849850798288, "learning_rate": 2.8484738216264924e-06, "loss": 0.0217, "step": 118170 }, { "epoch": 0.49309026879521994, "grad_norm": 0.6907025578348496, "learning_rate": 2.848413560825632e-06, "loss": 0.0201, "step": 118175 }, { "epoch": 0.4931111315102102, "grad_norm": 1.0578182745026932, "learning_rate": 2.848353303849146e-06, "loss": 0.0316, "step": 118180 }, { "epoch": 0.4931319942252005, "grad_norm": 0.9979237835575542, "learning_rate": 2.848293050696631e-06, "loss": 0.0239, "step": 118185 }, { "epoch": 0.4931528569401908, "grad_norm": 0.4867006694941596, "learning_rate": 2.8482328013676817e-06, "loss": 0.0326, "step": 118190 }, { "epoch": 0.49317371965518103, "grad_norm": 0.43889044426362395, "learning_rate": 2.848172555861894e-06, "loss": 0.0164, "step": 118195 }, { "epoch": 0.49319458237017133, "grad_norm": 1.5007512477902523, "learning_rate": 2.848112314178863e-06, "loss": 0.0312, "step": 118200 }, { "epoch": 0.49321544508516163, "grad_norm": 0.4629178464239357, "learning_rate": 2.848052076318185e-06, "loss": 0.0284, "step": 118205 }, { "epoch": 0.4932363078001519, "grad_norm": 0.5969116329332278, "learning_rate": 2.847991842279456e-06, "loss": 0.0209, "step": 118210 }, { "epoch": 0.4932571705151422, "grad_norm": 0.9364541115695769, "learning_rate": 2.8479316120622715e-06, "loss": 0.0259, "step": 118215 }, { "epoch": 0.4932780332301324, "grad_norm": 0.6835869804910036, "learning_rate": 2.847871385666227e-06, "loss": 0.022, "step": 118220 }, { "epoch": 0.4932988959451227, "grad_norm": 0.11287225951883374, "learning_rate": 2.8478111630909193e-06, "loss": 0.0182, "step": 118225 }, { "epoch": 0.493319758660113, "grad_norm": 0.7967589184448444, "learning_rate": 2.8477509443359443e-06, "loss": 0.0258, "step": 118230 }, { "epoch": 0.49334062137510326, "grad_norm": 0.6130746892982107, "learning_rate": 2.8476907294008977e-06, "loss": 0.0263, "step": 118235 }, { "epoch": 0.49336148409009356, "grad_norm": 0.9477454268793887, "learning_rate": 2.847630518285376e-06, "loss": 0.0306, "step": 118240 }, { "epoch": 0.4933823468050838, "grad_norm": 0.7514746519499146, "learning_rate": 2.8475703109889746e-06, "loss": 0.0266, "step": 118245 }, { "epoch": 0.4934032095200741, "grad_norm": 0.6391751765209438, "learning_rate": 2.847510107511291e-06, "loss": 0.0174, "step": 118250 }, { "epoch": 0.4934240722350644, "grad_norm": 0.9015479211915196, "learning_rate": 2.8474499078519215e-06, "loss": 0.0288, "step": 118255 }, { "epoch": 0.49344493495005465, "grad_norm": 0.41890321988177803, "learning_rate": 2.847389712010461e-06, "loss": 0.0161, "step": 118260 }, { "epoch": 0.49346579766504495, "grad_norm": 0.8457509628274119, "learning_rate": 2.8473295199865074e-06, "loss": 0.0197, "step": 118265 }, { "epoch": 0.4934866603800352, "grad_norm": 0.49416482731669953, "learning_rate": 2.8472693317796566e-06, "loss": 0.0229, "step": 118270 }, { "epoch": 0.4935075230950255, "grad_norm": 0.5212804188915272, "learning_rate": 2.847209147389506e-06, "loss": 0.0214, "step": 118275 }, { "epoch": 0.4935283858100158, "grad_norm": 0.9487105631835002, "learning_rate": 2.8471489668156506e-06, "loss": 0.0207, "step": 118280 }, { "epoch": 0.49354924852500603, "grad_norm": 0.34732659333368837, "learning_rate": 2.847088790057689e-06, "loss": 0.0188, "step": 118285 }, { "epoch": 0.49357011123999633, "grad_norm": 0.4443850023570316, "learning_rate": 2.8470286171152163e-06, "loss": 0.0324, "step": 118290 }, { "epoch": 0.49359097395498663, "grad_norm": 0.7045090736560891, "learning_rate": 2.84696844798783e-06, "loss": 0.0275, "step": 118295 }, { "epoch": 0.4936118366699769, "grad_norm": 0.9056330457872123, "learning_rate": 2.8469082826751276e-06, "loss": 0.0288, "step": 118300 }, { "epoch": 0.4936326993849672, "grad_norm": 0.5269666942222514, "learning_rate": 2.846848121176705e-06, "loss": 0.021, "step": 118305 }, { "epoch": 0.4936535620999574, "grad_norm": 0.420217178492446, "learning_rate": 2.84678796349216e-06, "loss": 0.0213, "step": 118310 }, { "epoch": 0.4936744248149477, "grad_norm": 0.5828175303863575, "learning_rate": 2.846727809621089e-06, "loss": 0.0193, "step": 118315 }, { "epoch": 0.493695287529938, "grad_norm": 0.7571220093809949, "learning_rate": 2.84666765956309e-06, "loss": 0.0317, "step": 118320 }, { "epoch": 0.49371615024492826, "grad_norm": 0.5384674282914632, "learning_rate": 2.8466075133177594e-06, "loss": 0.0202, "step": 118325 }, { "epoch": 0.49373701295991856, "grad_norm": 0.9221611630420338, "learning_rate": 2.846547370884695e-06, "loss": 0.0408, "step": 118330 }, { "epoch": 0.4937578756749088, "grad_norm": 0.6196381170841321, "learning_rate": 2.846487232263493e-06, "loss": 0.0267, "step": 118335 }, { "epoch": 0.4937787383898991, "grad_norm": 0.5850809363852931, "learning_rate": 2.8464270974537516e-06, "loss": 0.0222, "step": 118340 }, { "epoch": 0.4937996011048894, "grad_norm": 0.6153513194093462, "learning_rate": 2.8463669664550687e-06, "loss": 0.0217, "step": 118345 }, { "epoch": 0.49382046381987965, "grad_norm": 0.8061199563392805, "learning_rate": 2.846306839267041e-06, "loss": 0.0263, "step": 118350 }, { "epoch": 0.49384132653486995, "grad_norm": 1.4008337998893, "learning_rate": 2.8462467158892653e-06, "loss": 0.0303, "step": 118355 }, { "epoch": 0.4938621892498602, "grad_norm": 1.1383315386866673, "learning_rate": 2.846186596321341e-06, "loss": 0.0207, "step": 118360 }, { "epoch": 0.4938830519648505, "grad_norm": 1.0581981401480398, "learning_rate": 2.8461264805628645e-06, "loss": 0.0274, "step": 118365 }, { "epoch": 0.4939039146798408, "grad_norm": 1.6840513082980548, "learning_rate": 2.8460663686134345e-06, "loss": 0.029, "step": 118370 }, { "epoch": 0.49392477739483104, "grad_norm": 0.7668939442127795, "learning_rate": 2.8460062604726476e-06, "loss": 0.0222, "step": 118375 }, { "epoch": 0.49394564010982134, "grad_norm": 0.6878995637302231, "learning_rate": 2.845946156140102e-06, "loss": 0.0201, "step": 118380 }, { "epoch": 0.49396650282481164, "grad_norm": 0.7580090947954298, "learning_rate": 2.845886055615396e-06, "loss": 0.0247, "step": 118385 }, { "epoch": 0.4939873655398019, "grad_norm": 0.3290466454283344, "learning_rate": 2.8458259588981267e-06, "loss": 0.0151, "step": 118390 }, { "epoch": 0.4940082282547922, "grad_norm": 0.503273310793542, "learning_rate": 2.8457658659878938e-06, "loss": 0.0283, "step": 118395 }, { "epoch": 0.4940290909697824, "grad_norm": 1.3779800386794228, "learning_rate": 2.845705776884294e-06, "loss": 0.0265, "step": 118400 }, { "epoch": 0.4940499536847727, "grad_norm": 0.6117338854289112, "learning_rate": 2.845645691586925e-06, "loss": 0.0247, "step": 118405 }, { "epoch": 0.494070816399763, "grad_norm": 0.7292401356297376, "learning_rate": 2.8455856100953865e-06, "loss": 0.0303, "step": 118410 }, { "epoch": 0.49409167911475327, "grad_norm": 0.9252388433409111, "learning_rate": 2.8455255324092753e-06, "loss": 0.029, "step": 118415 }, { "epoch": 0.49411254182974357, "grad_norm": 1.1100191061407525, "learning_rate": 2.845465458528191e-06, "loss": 0.0351, "step": 118420 }, { "epoch": 0.4941334045447338, "grad_norm": 0.3745831701461865, "learning_rate": 2.8454053884517303e-06, "loss": 0.0209, "step": 118425 }, { "epoch": 0.4941542672597241, "grad_norm": 0.8630010327294367, "learning_rate": 2.8453453221794935e-06, "loss": 0.0244, "step": 118430 }, { "epoch": 0.4941751299747144, "grad_norm": 1.028609077898012, "learning_rate": 2.845285259711078e-06, "loss": 0.0275, "step": 118435 }, { "epoch": 0.49419599268970466, "grad_norm": 0.6285430028559024, "learning_rate": 2.8452252010460825e-06, "loss": 0.0232, "step": 118440 }, { "epoch": 0.49421685540469495, "grad_norm": 0.7377160677898282, "learning_rate": 2.845165146184106e-06, "loss": 0.0183, "step": 118445 }, { "epoch": 0.4942377181196852, "grad_norm": 1.0125659737846975, "learning_rate": 2.845105095124746e-06, "loss": 0.0197, "step": 118450 }, { "epoch": 0.4942585808346755, "grad_norm": 0.9781242599132597, "learning_rate": 2.8450450478676026e-06, "loss": 0.0227, "step": 118455 }, { "epoch": 0.4942794435496658, "grad_norm": 0.6660807320277783, "learning_rate": 2.844985004412274e-06, "loss": 0.0256, "step": 118460 }, { "epoch": 0.49430030626465604, "grad_norm": 0.7407192788680378, "learning_rate": 2.8449249647583588e-06, "loss": 0.0277, "step": 118465 }, { "epoch": 0.49432116897964634, "grad_norm": 0.7361325299146529, "learning_rate": 2.8448649289054565e-06, "loss": 0.0245, "step": 118470 }, { "epoch": 0.4943420316946366, "grad_norm": 0.6332528188959847, "learning_rate": 2.8448048968531657e-06, "loss": 0.0266, "step": 118475 }, { "epoch": 0.4943628944096269, "grad_norm": 0.527236333544012, "learning_rate": 2.8447448686010853e-06, "loss": 0.0208, "step": 118480 }, { "epoch": 0.4943837571246172, "grad_norm": 0.915390328128067, "learning_rate": 2.8446848441488145e-06, "loss": 0.0198, "step": 118485 }, { "epoch": 0.49440461983960743, "grad_norm": 1.450502231955079, "learning_rate": 2.8446248234959524e-06, "loss": 0.0302, "step": 118490 }, { "epoch": 0.49442548255459773, "grad_norm": 0.8926143373209708, "learning_rate": 2.844564806642098e-06, "loss": 0.0297, "step": 118495 }, { "epoch": 0.49444634526958803, "grad_norm": 0.7347366738425325, "learning_rate": 2.8445047935868506e-06, "loss": 0.0178, "step": 118500 }, { "epoch": 0.49446720798457827, "grad_norm": 0.6430112930656826, "learning_rate": 2.84444478432981e-06, "loss": 0.0237, "step": 118505 }, { "epoch": 0.49448807069956857, "grad_norm": 0.7428279499348389, "learning_rate": 2.8443847788705753e-06, "loss": 0.0233, "step": 118510 }, { "epoch": 0.4945089334145588, "grad_norm": 0.9524892592185691, "learning_rate": 2.844324777208746e-06, "loss": 0.0253, "step": 118515 }, { "epoch": 0.4945297961295491, "grad_norm": 0.47760657967730924, "learning_rate": 2.8442647793439215e-06, "loss": 0.0264, "step": 118520 }, { "epoch": 0.4945506588445394, "grad_norm": 0.5640378384230106, "learning_rate": 2.8442047852757017e-06, "loss": 0.0485, "step": 118525 }, { "epoch": 0.49457152155952966, "grad_norm": 0.661185664312212, "learning_rate": 2.8441447950036854e-06, "loss": 0.0265, "step": 118530 }, { "epoch": 0.49459238427451996, "grad_norm": 1.1112520210882528, "learning_rate": 2.8440848085274724e-06, "loss": 0.0305, "step": 118535 }, { "epoch": 0.4946132469895102, "grad_norm": 0.8006324741037015, "learning_rate": 2.8440248258466636e-06, "loss": 0.0381, "step": 118540 }, { "epoch": 0.4946341097045005, "grad_norm": 1.3151255948533573, "learning_rate": 2.8439648469608568e-06, "loss": 0.0321, "step": 118545 }, { "epoch": 0.4946549724194908, "grad_norm": 0.5696990135221537, "learning_rate": 2.843904871869654e-06, "loss": 0.0263, "step": 118550 }, { "epoch": 0.49467583513448105, "grad_norm": 0.5486065536922863, "learning_rate": 2.843844900572653e-06, "loss": 0.0245, "step": 118555 }, { "epoch": 0.49469669784947135, "grad_norm": 0.6197546865463164, "learning_rate": 2.8437849330694555e-06, "loss": 0.0229, "step": 118560 }, { "epoch": 0.4947175605644616, "grad_norm": 0.5508803925526401, "learning_rate": 2.8437249693596606e-06, "loss": 0.0179, "step": 118565 }, { "epoch": 0.4947384232794519, "grad_norm": 0.8521952227812227, "learning_rate": 2.843665009442869e-06, "loss": 0.0245, "step": 118570 }, { "epoch": 0.4947592859944422, "grad_norm": 1.2117805509690194, "learning_rate": 2.8436050533186802e-06, "loss": 0.0346, "step": 118575 }, { "epoch": 0.49478014870943243, "grad_norm": 0.912375073043446, "learning_rate": 2.8435451009866956e-06, "loss": 0.0285, "step": 118580 }, { "epoch": 0.49480101142442273, "grad_norm": 0.5581076962419256, "learning_rate": 2.843485152446513e-06, "loss": 0.0288, "step": 118585 }, { "epoch": 0.49482187413941303, "grad_norm": 0.5353246291149576, "learning_rate": 2.843425207697735e-06, "loss": 0.0215, "step": 118590 }, { "epoch": 0.4948427368544033, "grad_norm": 0.9758475700963235, "learning_rate": 2.843365266739961e-06, "loss": 0.0283, "step": 118595 }, { "epoch": 0.4948635995693936, "grad_norm": 1.2360565946819542, "learning_rate": 2.8433053295727917e-06, "loss": 0.0303, "step": 118600 }, { "epoch": 0.4948844622843838, "grad_norm": 0.8289943415356954, "learning_rate": 2.8432453961958277e-06, "loss": 0.0262, "step": 118605 }, { "epoch": 0.4949053249993741, "grad_norm": 1.1568229596214568, "learning_rate": 2.8431854666086696e-06, "loss": 0.0239, "step": 118610 }, { "epoch": 0.4949261877143644, "grad_norm": 0.6570384390675744, "learning_rate": 2.843125540810918e-06, "loss": 0.0273, "step": 118615 }, { "epoch": 0.49494705042935466, "grad_norm": 0.7910380490408787, "learning_rate": 2.8430656188021728e-06, "loss": 0.0287, "step": 118620 }, { "epoch": 0.49496791314434496, "grad_norm": 0.7597086091984603, "learning_rate": 2.8430057005820354e-06, "loss": 0.0226, "step": 118625 }, { "epoch": 0.4949887758593352, "grad_norm": 1.1099413807303364, "learning_rate": 2.8429457861501063e-06, "loss": 0.0193, "step": 118630 }, { "epoch": 0.4950096385743255, "grad_norm": 0.5222563098113896, "learning_rate": 2.842885875505987e-06, "loss": 0.021, "step": 118635 }, { "epoch": 0.4950305012893158, "grad_norm": 0.4199565796782905, "learning_rate": 2.8428259686492777e-06, "loss": 0.0304, "step": 118640 }, { "epoch": 0.49505136400430605, "grad_norm": 0.37104210304844515, "learning_rate": 2.8427660655795795e-06, "loss": 0.0209, "step": 118645 }, { "epoch": 0.49507222671929635, "grad_norm": 0.69800732371863, "learning_rate": 2.8427061662964938e-06, "loss": 0.019, "step": 118650 }, { "epoch": 0.4950930894342866, "grad_norm": 0.37613744023269463, "learning_rate": 2.842646270799621e-06, "loss": 0.0235, "step": 118655 }, { "epoch": 0.4951139521492769, "grad_norm": 0.9142307067226025, "learning_rate": 2.842586379088564e-06, "loss": 0.0302, "step": 118660 }, { "epoch": 0.4951348148642672, "grad_norm": 0.6077483031130987, "learning_rate": 2.8425264911629212e-06, "loss": 0.0205, "step": 118665 }, { "epoch": 0.49515567757925744, "grad_norm": 0.8572762903827941, "learning_rate": 2.842466607022296e-06, "loss": 0.0249, "step": 118670 }, { "epoch": 0.49517654029424774, "grad_norm": 1.3274872232847121, "learning_rate": 2.842406726666289e-06, "loss": 0.029, "step": 118675 }, { "epoch": 0.49519740300923804, "grad_norm": 1.2608733893530641, "learning_rate": 2.842346850094501e-06, "loss": 0.024, "step": 118680 }, { "epoch": 0.4952182657242283, "grad_norm": 0.4017472595910682, "learning_rate": 2.842286977306535e-06, "loss": 0.02, "step": 118685 }, { "epoch": 0.4952391284392186, "grad_norm": 0.3020876256559228, "learning_rate": 2.8422271083019907e-06, "loss": 0.0242, "step": 118690 }, { "epoch": 0.4952599911542088, "grad_norm": 0.7538146557823572, "learning_rate": 2.8421672430804715e-06, "loss": 0.0215, "step": 118695 }, { "epoch": 0.4952808538691991, "grad_norm": 0.6752539469084412, "learning_rate": 2.8421073816415773e-06, "loss": 0.025, "step": 118700 }, { "epoch": 0.4953017165841894, "grad_norm": 0.5953097849636968, "learning_rate": 2.8420475239849106e-06, "loss": 0.0264, "step": 118705 }, { "epoch": 0.49532257929917967, "grad_norm": 0.5844090555728697, "learning_rate": 2.841987670110073e-06, "loss": 0.0258, "step": 118710 }, { "epoch": 0.49534344201416997, "grad_norm": 0.3732126798499187, "learning_rate": 2.841927820016666e-06, "loss": 0.0265, "step": 118715 }, { "epoch": 0.4953643047291602, "grad_norm": 0.5656177010608989, "learning_rate": 2.8418679737042927e-06, "loss": 0.0183, "step": 118720 }, { "epoch": 0.4953851674441505, "grad_norm": 0.6035390380548048, "learning_rate": 2.8418081311725525e-06, "loss": 0.0213, "step": 118725 }, { "epoch": 0.4954060301591408, "grad_norm": 0.8589250073305813, "learning_rate": 2.8417482924210504e-06, "loss": 0.0392, "step": 118730 }, { "epoch": 0.49542689287413105, "grad_norm": 0.9107974007216734, "learning_rate": 2.8416884574493863e-06, "loss": 0.0346, "step": 118735 }, { "epoch": 0.49544775558912135, "grad_norm": 0.7865819549905229, "learning_rate": 2.841628626257163e-06, "loss": 0.0203, "step": 118740 }, { "epoch": 0.4954686183041116, "grad_norm": 1.0982317923639249, "learning_rate": 2.841568798843983e-06, "loss": 0.0304, "step": 118745 }, { "epoch": 0.4954894810191019, "grad_norm": 0.7903053275077109, "learning_rate": 2.841508975209447e-06, "loss": 0.0226, "step": 118750 }, { "epoch": 0.4955103437340922, "grad_norm": 0.8987247428121561, "learning_rate": 2.841449155353159e-06, "loss": 0.0229, "step": 118755 }, { "epoch": 0.49553120644908244, "grad_norm": 0.8126148451304331, "learning_rate": 2.841389339274721e-06, "loss": 0.0216, "step": 118760 }, { "epoch": 0.49555206916407274, "grad_norm": 0.830535019788587, "learning_rate": 2.8413295269737346e-06, "loss": 0.0242, "step": 118765 }, { "epoch": 0.49557293187906304, "grad_norm": 0.8366509439273684, "learning_rate": 2.8412697184498027e-06, "loss": 0.0225, "step": 118770 }, { "epoch": 0.4955937945940533, "grad_norm": 0.8549328077296737, "learning_rate": 2.8412099137025273e-06, "loss": 0.0323, "step": 118775 }, { "epoch": 0.4956146573090436, "grad_norm": 0.6582902016247852, "learning_rate": 2.8411501127315115e-06, "loss": 0.0243, "step": 118780 }, { "epoch": 0.4956355200240338, "grad_norm": 0.5072108037312487, "learning_rate": 2.8410903155363585e-06, "loss": 0.0193, "step": 118785 }, { "epoch": 0.4956563827390241, "grad_norm": 0.578851166490578, "learning_rate": 2.8410305221166694e-06, "loss": 0.0224, "step": 118790 }, { "epoch": 0.4956772454540144, "grad_norm": 0.7448420345266134, "learning_rate": 2.8409707324720486e-06, "loss": 0.031, "step": 118795 }, { "epoch": 0.49569810816900467, "grad_norm": 1.0056307244757696, "learning_rate": 2.840910946602097e-06, "loss": 0.0252, "step": 118800 }, { "epoch": 0.49571897088399497, "grad_norm": 0.5732525406284453, "learning_rate": 2.8408511645064195e-06, "loss": 0.0244, "step": 118805 }, { "epoch": 0.4957398335989852, "grad_norm": 0.5004952991659538, "learning_rate": 2.8407913861846175e-06, "loss": 0.0249, "step": 118810 }, { "epoch": 0.4957606963139755, "grad_norm": 0.34656889969692595, "learning_rate": 2.8407316116362947e-06, "loss": 0.0175, "step": 118815 }, { "epoch": 0.4957815590289658, "grad_norm": 0.7429932025266056, "learning_rate": 2.840671840861054e-06, "loss": 0.0261, "step": 118820 }, { "epoch": 0.49580242174395606, "grad_norm": 0.8638126842945023, "learning_rate": 2.840612073858498e-06, "loss": 0.0173, "step": 118825 }, { "epoch": 0.49582328445894636, "grad_norm": 0.5554091480635084, "learning_rate": 2.8405523106282308e-06, "loss": 0.0315, "step": 118830 }, { "epoch": 0.4958441471739366, "grad_norm": 1.3051241765769972, "learning_rate": 2.8404925511698546e-06, "loss": 0.0311, "step": 118835 }, { "epoch": 0.4958650098889269, "grad_norm": 0.5536320828171268, "learning_rate": 2.840432795482973e-06, "loss": 0.0306, "step": 118840 }, { "epoch": 0.4958858726039172, "grad_norm": 1.1890310913603361, "learning_rate": 2.8403730435671895e-06, "loss": 0.0244, "step": 118845 }, { "epoch": 0.49590673531890744, "grad_norm": 0.5463101289874894, "learning_rate": 2.8403132954221068e-06, "loss": 0.0179, "step": 118850 }, { "epoch": 0.49592759803389774, "grad_norm": 0.8578451300952897, "learning_rate": 2.8402535510473296e-06, "loss": 0.0269, "step": 118855 }, { "epoch": 0.49594846074888804, "grad_norm": 0.5960974207755225, "learning_rate": 2.8401938104424597e-06, "loss": 0.0198, "step": 118860 }, { "epoch": 0.4959693234638783, "grad_norm": 0.5900322071306765, "learning_rate": 2.8401340736071028e-06, "loss": 0.0252, "step": 118865 }, { "epoch": 0.4959901861788686, "grad_norm": 0.7039527283947989, "learning_rate": 2.8400743405408604e-06, "loss": 0.028, "step": 118870 }, { "epoch": 0.49601104889385883, "grad_norm": 0.7791062788761666, "learning_rate": 2.8400146112433374e-06, "loss": 0.028, "step": 118875 }, { "epoch": 0.49603191160884913, "grad_norm": 0.5589057295341793, "learning_rate": 2.8399548857141373e-06, "loss": 0.0315, "step": 118880 }, { "epoch": 0.49605277432383943, "grad_norm": 0.46865354375604495, "learning_rate": 2.8398951639528634e-06, "loss": 0.0255, "step": 118885 }, { "epoch": 0.4960736370388297, "grad_norm": 1.1628308516942527, "learning_rate": 2.83983544595912e-06, "loss": 0.0236, "step": 118890 }, { "epoch": 0.49609449975382, "grad_norm": 1.0024194791121814, "learning_rate": 2.8397757317325108e-06, "loss": 0.0351, "step": 118895 }, { "epoch": 0.4961153624688102, "grad_norm": 0.58976501213939, "learning_rate": 2.83971602127264e-06, "loss": 0.0209, "step": 118900 }, { "epoch": 0.4961362251838005, "grad_norm": 1.2391111416734215, "learning_rate": 2.8396563145791107e-06, "loss": 0.0245, "step": 118905 }, { "epoch": 0.4961570878987908, "grad_norm": 1.1334399570923148, "learning_rate": 2.839596611651529e-06, "loss": 0.0312, "step": 118910 }, { "epoch": 0.49617795061378106, "grad_norm": 0.7839793834675487, "learning_rate": 2.839536912489497e-06, "loss": 0.0265, "step": 118915 }, { "epoch": 0.49619881332877136, "grad_norm": 0.618288146240017, "learning_rate": 2.839477217092619e-06, "loss": 0.0222, "step": 118920 }, { "epoch": 0.4962196760437616, "grad_norm": 0.8891043985731673, "learning_rate": 2.839417525460501e-06, "loss": 0.0277, "step": 118925 }, { "epoch": 0.4962405387587519, "grad_norm": 0.6336847967160119, "learning_rate": 2.8393578375927453e-06, "loss": 0.0204, "step": 118930 }, { "epoch": 0.4962614014737422, "grad_norm": 0.5124591145141115, "learning_rate": 2.839298153488958e-06, "loss": 0.021, "step": 118935 }, { "epoch": 0.49628226418873245, "grad_norm": 0.6496920298659696, "learning_rate": 2.839238473148742e-06, "loss": 0.0267, "step": 118940 }, { "epoch": 0.49630312690372275, "grad_norm": 1.2943945470556877, "learning_rate": 2.8391787965717028e-06, "loss": 0.0287, "step": 118945 }, { "epoch": 0.49632398961871305, "grad_norm": 0.5832267071402456, "learning_rate": 2.8391191237574445e-06, "loss": 0.0314, "step": 118950 }, { "epoch": 0.4963448523337033, "grad_norm": 1.1963679148501496, "learning_rate": 2.839059454705571e-06, "loss": 0.0318, "step": 118955 }, { "epoch": 0.4963657150486936, "grad_norm": 0.49430917211661596, "learning_rate": 2.8389997894156884e-06, "loss": 0.0298, "step": 118960 }, { "epoch": 0.49638657776368383, "grad_norm": 0.7855761535039707, "learning_rate": 2.838940127887401e-06, "loss": 0.0208, "step": 118965 }, { "epoch": 0.49640744047867413, "grad_norm": 0.7634962553064188, "learning_rate": 2.8388804701203127e-06, "loss": 0.0248, "step": 118970 }, { "epoch": 0.49642830319366443, "grad_norm": 0.8799573783346198, "learning_rate": 2.8388208161140292e-06, "loss": 0.0224, "step": 118975 }, { "epoch": 0.4964491659086547, "grad_norm": 0.6450282788792641, "learning_rate": 2.8387611658681544e-06, "loss": 0.0281, "step": 118980 }, { "epoch": 0.496470028623645, "grad_norm": 0.9357249733320778, "learning_rate": 2.838701519382295e-06, "loss": 0.0244, "step": 118985 }, { "epoch": 0.4964908913386352, "grad_norm": 1.1715442553338433, "learning_rate": 2.8386418766560542e-06, "loss": 0.0244, "step": 118990 }, { "epoch": 0.4965117540536255, "grad_norm": 0.7961289274351719, "learning_rate": 2.838582237689038e-06, "loss": 0.026, "step": 118995 }, { "epoch": 0.4965326167686158, "grad_norm": 0.5227478153642642, "learning_rate": 2.838522602480851e-06, "loss": 0.0302, "step": 119000 }, { "epoch": 0.49655347948360606, "grad_norm": 0.847171917460004, "learning_rate": 2.838462971031099e-06, "loss": 0.0247, "step": 119005 }, { "epoch": 0.49657434219859636, "grad_norm": 0.8608973838195672, "learning_rate": 2.838403343339387e-06, "loss": 0.024, "step": 119010 }, { "epoch": 0.4965952049135866, "grad_norm": 0.987248161597012, "learning_rate": 2.83834371940532e-06, "loss": 0.0221, "step": 119015 }, { "epoch": 0.4966160676285769, "grad_norm": 0.7845181263989427, "learning_rate": 2.8382840992285033e-06, "loss": 0.0204, "step": 119020 }, { "epoch": 0.4966369303435672, "grad_norm": 0.4934345999565881, "learning_rate": 2.838224482808543e-06, "loss": 0.0259, "step": 119025 }, { "epoch": 0.49665779305855745, "grad_norm": 0.4516535179220817, "learning_rate": 2.838164870145044e-06, "loss": 0.028, "step": 119030 }, { "epoch": 0.49667865577354775, "grad_norm": 0.549770511055339, "learning_rate": 2.838105261237612e-06, "loss": 0.0232, "step": 119035 }, { "epoch": 0.49669951848853805, "grad_norm": 0.7212354496008391, "learning_rate": 2.838045656085852e-06, "loss": 0.0279, "step": 119040 }, { "epoch": 0.4967203812035283, "grad_norm": 0.4065506409354332, "learning_rate": 2.8379860546893707e-06, "loss": 0.0269, "step": 119045 }, { "epoch": 0.4967412439185186, "grad_norm": 0.7658791370882521, "learning_rate": 2.8379264570477728e-06, "loss": 0.0273, "step": 119050 }, { "epoch": 0.49676210663350884, "grad_norm": 0.6514550745541672, "learning_rate": 2.837866863160665e-06, "loss": 0.0236, "step": 119055 }, { "epoch": 0.49678296934849914, "grad_norm": 0.9686418888002849, "learning_rate": 2.8378072730276524e-06, "loss": 0.0246, "step": 119060 }, { "epoch": 0.49680383206348944, "grad_norm": 0.8842190077087864, "learning_rate": 2.8377476866483405e-06, "loss": 0.0295, "step": 119065 }, { "epoch": 0.4968246947784797, "grad_norm": 0.718762031080483, "learning_rate": 2.8376881040223363e-06, "loss": 0.0198, "step": 119070 }, { "epoch": 0.49684555749347, "grad_norm": 0.6497761775317169, "learning_rate": 2.8376285251492457e-06, "loss": 0.0186, "step": 119075 }, { "epoch": 0.4968664202084602, "grad_norm": 0.5804196947982642, "learning_rate": 2.837568950028674e-06, "loss": 0.0172, "step": 119080 }, { "epoch": 0.4968872829234505, "grad_norm": 0.9320244715007849, "learning_rate": 2.837509378660228e-06, "loss": 0.0269, "step": 119085 }, { "epoch": 0.4969081456384408, "grad_norm": 0.9389783167920838, "learning_rate": 2.8374498110435126e-06, "loss": 0.0291, "step": 119090 }, { "epoch": 0.49692900835343107, "grad_norm": 0.36467686229825824, "learning_rate": 2.837390247178136e-06, "loss": 0.0242, "step": 119095 }, { "epoch": 0.49694987106842137, "grad_norm": 0.7309210183931315, "learning_rate": 2.8373306870637023e-06, "loss": 0.022, "step": 119100 }, { "epoch": 0.4969707337834116, "grad_norm": 0.9899373721464765, "learning_rate": 2.8372711306998195e-06, "loss": 0.0205, "step": 119105 }, { "epoch": 0.4969915964984019, "grad_norm": 1.2696240445376588, "learning_rate": 2.8372115780860937e-06, "loss": 0.0218, "step": 119110 }, { "epoch": 0.4970124592133922, "grad_norm": 0.7320709734104185, "learning_rate": 2.837152029222131e-06, "loss": 0.0207, "step": 119115 }, { "epoch": 0.49703332192838245, "grad_norm": 0.5288452057630021, "learning_rate": 2.8370924841075378e-06, "loss": 0.0231, "step": 119120 }, { "epoch": 0.49705418464337275, "grad_norm": 0.7299980088102518, "learning_rate": 2.8370329427419208e-06, "loss": 0.0311, "step": 119125 }, { "epoch": 0.49707504735836305, "grad_norm": 0.9560350673810348, "learning_rate": 2.8369734051248866e-06, "loss": 0.0295, "step": 119130 }, { "epoch": 0.4970959100733533, "grad_norm": 0.7119991561914335, "learning_rate": 2.836913871256042e-06, "loss": 0.0273, "step": 119135 }, { "epoch": 0.4971167727883436, "grad_norm": 0.7669008392376261, "learning_rate": 2.8368543411349935e-06, "loss": 0.0236, "step": 119140 }, { "epoch": 0.49713763550333384, "grad_norm": 0.9138979846368396, "learning_rate": 2.8367948147613485e-06, "loss": 0.0294, "step": 119145 }, { "epoch": 0.49715849821832414, "grad_norm": 0.8698407168392956, "learning_rate": 2.8367352921347134e-06, "loss": 0.0258, "step": 119150 }, { "epoch": 0.49717936093331444, "grad_norm": 0.4548521090489738, "learning_rate": 2.836675773254695e-06, "loss": 0.0298, "step": 119155 }, { "epoch": 0.4972002236483047, "grad_norm": 0.9857059774964455, "learning_rate": 2.8366162581209e-06, "loss": 0.0172, "step": 119160 }, { "epoch": 0.497221086363295, "grad_norm": 0.5156596583040525, "learning_rate": 2.8365567467329364e-06, "loss": 0.0234, "step": 119165 }, { "epoch": 0.49724194907828523, "grad_norm": 0.864756170606634, "learning_rate": 2.8364972390904107e-06, "loss": 0.0344, "step": 119170 }, { "epoch": 0.4972628117932755, "grad_norm": 0.9815137684884437, "learning_rate": 2.8364377351929294e-06, "loss": 0.035, "step": 119175 }, { "epoch": 0.4972836745082658, "grad_norm": 0.29583893618384544, "learning_rate": 2.8363782350401015e-06, "loss": 0.0235, "step": 119180 }, { "epoch": 0.49730453722325607, "grad_norm": 0.7509441022104402, "learning_rate": 2.8363187386315324e-06, "loss": 0.0247, "step": 119185 }, { "epoch": 0.49732539993824637, "grad_norm": 0.6717698513502718, "learning_rate": 2.8362592459668302e-06, "loss": 0.0203, "step": 119190 }, { "epoch": 0.4973462626532366, "grad_norm": 0.7638969225627043, "learning_rate": 2.8361997570456016e-06, "loss": 0.0221, "step": 119195 }, { "epoch": 0.4973671253682269, "grad_norm": 0.8682439864778445, "learning_rate": 2.8361402718674552e-06, "loss": 0.0216, "step": 119200 }, { "epoch": 0.4973879880832172, "grad_norm": 0.7875635294247005, "learning_rate": 2.8360807904319985e-06, "loss": 0.0234, "step": 119205 }, { "epoch": 0.49740885079820746, "grad_norm": 0.25600253027204767, "learning_rate": 2.836021312738838e-06, "loss": 0.0218, "step": 119210 }, { "epoch": 0.49742971351319776, "grad_norm": 1.005986684609769, "learning_rate": 2.8359618387875813e-06, "loss": 0.0463, "step": 119215 }, { "epoch": 0.49745057622818806, "grad_norm": 0.5119813294032772, "learning_rate": 2.8359023685778373e-06, "loss": 0.0263, "step": 119220 }, { "epoch": 0.4974714389431783, "grad_norm": 0.9572264200384695, "learning_rate": 2.8358429021092126e-06, "loss": 0.0329, "step": 119225 }, { "epoch": 0.4974923016581686, "grad_norm": 0.9878336723746122, "learning_rate": 2.8357834393813154e-06, "loss": 0.0247, "step": 119230 }, { "epoch": 0.49751316437315884, "grad_norm": 0.5941229538302087, "learning_rate": 2.835723980393753e-06, "loss": 0.0191, "step": 119235 }, { "epoch": 0.49753402708814914, "grad_norm": 1.0539165529422345, "learning_rate": 2.8356645251461345e-06, "loss": 0.0267, "step": 119240 }, { "epoch": 0.49755488980313944, "grad_norm": 3.04294319129794, "learning_rate": 2.835605073638066e-06, "loss": 0.0161, "step": 119245 }, { "epoch": 0.4975757525181297, "grad_norm": 0.6593630213954268, "learning_rate": 2.8355456258691583e-06, "loss": 0.0205, "step": 119250 }, { "epoch": 0.49759661523312, "grad_norm": 0.4576018790819993, "learning_rate": 2.8354861818390166e-06, "loss": 0.0216, "step": 119255 }, { "epoch": 0.49761747794811023, "grad_norm": 3.9900945180115905, "learning_rate": 2.83542674154725e-06, "loss": 0.0293, "step": 119260 }, { "epoch": 0.49763834066310053, "grad_norm": 0.6993562792493833, "learning_rate": 2.8353673049934676e-06, "loss": 0.0302, "step": 119265 }, { "epoch": 0.49765920337809083, "grad_norm": 0.7954462187613492, "learning_rate": 2.835307872177277e-06, "loss": 0.0355, "step": 119270 }, { "epoch": 0.4976800660930811, "grad_norm": 0.6254708330270854, "learning_rate": 2.835248443098286e-06, "loss": 0.0273, "step": 119275 }, { "epoch": 0.4977009288080714, "grad_norm": 0.7765929066316252, "learning_rate": 2.835189017756103e-06, "loss": 0.0246, "step": 119280 }, { "epoch": 0.4977217915230616, "grad_norm": 0.6877130109011016, "learning_rate": 2.8351295961503373e-06, "loss": 0.0173, "step": 119285 }, { "epoch": 0.4977426542380519, "grad_norm": 0.7045410339218887, "learning_rate": 2.835070178280597e-06, "loss": 0.0347, "step": 119290 }, { "epoch": 0.4977635169530422, "grad_norm": 0.7681878134537273, "learning_rate": 2.8350107641464897e-06, "loss": 0.0219, "step": 119295 }, { "epoch": 0.49778437966803246, "grad_norm": 0.7210664137817675, "learning_rate": 2.834951353747625e-06, "loss": 0.0196, "step": 119300 }, { "epoch": 0.49780524238302276, "grad_norm": 0.6609943952402418, "learning_rate": 2.8348919470836118e-06, "loss": 0.0271, "step": 119305 }, { "epoch": 0.49782610509801306, "grad_norm": 0.5092990639783985, "learning_rate": 2.8348325441540573e-06, "loss": 0.025, "step": 119310 }, { "epoch": 0.4978469678130033, "grad_norm": 0.9917883063472682, "learning_rate": 2.834773144958572e-06, "loss": 0.0291, "step": 119315 }, { "epoch": 0.4978678305279936, "grad_norm": 0.7306365750265827, "learning_rate": 2.8347137494967636e-06, "loss": 0.0244, "step": 119320 }, { "epoch": 0.49788869324298385, "grad_norm": 0.9471622461033243, "learning_rate": 2.8346543577682416e-06, "loss": 0.0206, "step": 119325 }, { "epoch": 0.49790955595797415, "grad_norm": 0.5419243939040298, "learning_rate": 2.834594969772615e-06, "loss": 0.0191, "step": 119330 }, { "epoch": 0.49793041867296445, "grad_norm": 0.8004831507677541, "learning_rate": 2.8345355855094915e-06, "loss": 0.0193, "step": 119335 }, { "epoch": 0.4979512813879547, "grad_norm": 0.4823944352643839, "learning_rate": 2.8344762049784812e-06, "loss": 0.0216, "step": 119340 }, { "epoch": 0.497972144102945, "grad_norm": 1.06908801507684, "learning_rate": 2.834416828179193e-06, "loss": 0.0214, "step": 119345 }, { "epoch": 0.49799300681793524, "grad_norm": 0.6378443651490279, "learning_rate": 2.834357455111237e-06, "loss": 0.029, "step": 119350 }, { "epoch": 0.49801386953292553, "grad_norm": 0.9611631406195067, "learning_rate": 2.83429808577422e-06, "loss": 0.0214, "step": 119355 }, { "epoch": 0.49803473224791583, "grad_norm": 0.40951039141387074, "learning_rate": 2.8342387201677546e-06, "loss": 0.0198, "step": 119360 }, { "epoch": 0.4980555949629061, "grad_norm": 0.9548255998220697, "learning_rate": 2.834179358291447e-06, "loss": 0.0265, "step": 119365 }, { "epoch": 0.4980764576778964, "grad_norm": 1.7326027410910634, "learning_rate": 2.834120000144908e-06, "loss": 0.0173, "step": 119370 }, { "epoch": 0.4980973203928866, "grad_norm": 0.49932181205704446, "learning_rate": 2.834060645727747e-06, "loss": 0.0376, "step": 119375 }, { "epoch": 0.4981181831078769, "grad_norm": 0.8637950416990549, "learning_rate": 2.834001295039574e-06, "loss": 0.0196, "step": 119380 }, { "epoch": 0.4981390458228672, "grad_norm": 0.990520624128329, "learning_rate": 2.8339419480799974e-06, "loss": 0.0192, "step": 119385 }, { "epoch": 0.49815990853785747, "grad_norm": 0.7310414599668014, "learning_rate": 2.833882604848627e-06, "loss": 0.0236, "step": 119390 }, { "epoch": 0.49818077125284776, "grad_norm": 0.5697218707313835, "learning_rate": 2.8338232653450743e-06, "loss": 0.0216, "step": 119395 }, { "epoch": 0.49820163396783806, "grad_norm": 2.9261449069289904, "learning_rate": 2.8337639295689463e-06, "loss": 0.0231, "step": 119400 }, { "epoch": 0.4982224966828283, "grad_norm": 0.7258841173372, "learning_rate": 2.8337045975198547e-06, "loss": 0.0278, "step": 119405 }, { "epoch": 0.4982433593978186, "grad_norm": 1.2781191866016242, "learning_rate": 2.8336452691974083e-06, "loss": 0.0221, "step": 119410 }, { "epoch": 0.49826422211280885, "grad_norm": 0.8802427519843855, "learning_rate": 2.8335859446012177e-06, "loss": 0.0244, "step": 119415 }, { "epoch": 0.49828508482779915, "grad_norm": 0.4289237677387162, "learning_rate": 2.833526623730892e-06, "loss": 0.0205, "step": 119420 }, { "epoch": 0.49830594754278945, "grad_norm": 0.8877789573900068, "learning_rate": 2.833467306586043e-06, "loss": 0.0219, "step": 119425 }, { "epoch": 0.4983268102577797, "grad_norm": 0.40906181539342285, "learning_rate": 2.8334079931662784e-06, "loss": 0.0255, "step": 119430 }, { "epoch": 0.49834767297277, "grad_norm": 0.8287064660714277, "learning_rate": 2.8333486834712097e-06, "loss": 0.0379, "step": 119435 }, { "epoch": 0.49836853568776024, "grad_norm": 1.2596730223910964, "learning_rate": 2.8332893775004473e-06, "loss": 0.021, "step": 119440 }, { "epoch": 0.49838939840275054, "grad_norm": 0.7979596870961227, "learning_rate": 2.8332300752536006e-06, "loss": 0.0177, "step": 119445 }, { "epoch": 0.49841026111774084, "grad_norm": 1.1856279651442663, "learning_rate": 2.8331707767302806e-06, "loss": 0.0187, "step": 119450 }, { "epoch": 0.4984311238327311, "grad_norm": 0.6617815388198072, "learning_rate": 2.8331114819300977e-06, "loss": 0.0231, "step": 119455 }, { "epoch": 0.4984519865477214, "grad_norm": 0.7770411590409579, "learning_rate": 2.833052190852661e-06, "loss": 0.0199, "step": 119460 }, { "epoch": 0.4984728492627116, "grad_norm": 0.7944334021987925, "learning_rate": 2.8329929034975827e-06, "loss": 0.0261, "step": 119465 }, { "epoch": 0.4984937119777019, "grad_norm": 0.5348263927803311, "learning_rate": 2.8329336198644726e-06, "loss": 0.0201, "step": 119470 }, { "epoch": 0.4985145746926922, "grad_norm": 0.708193805505082, "learning_rate": 2.8328743399529407e-06, "loss": 0.0322, "step": 119475 }, { "epoch": 0.49853543740768247, "grad_norm": 0.8188467737464119, "learning_rate": 2.8328150637625985e-06, "loss": 0.034, "step": 119480 }, { "epoch": 0.49855630012267277, "grad_norm": 0.7014303547687717, "learning_rate": 2.8327557912930565e-06, "loss": 0.0241, "step": 119485 }, { "epoch": 0.49857716283766307, "grad_norm": 0.5244253038564209, "learning_rate": 2.832696522543925e-06, "loss": 0.0296, "step": 119490 }, { "epoch": 0.4985980255526533, "grad_norm": 0.7978880713451573, "learning_rate": 2.8326372575148153e-06, "loss": 0.028, "step": 119495 }, { "epoch": 0.4986188882676436, "grad_norm": 0.8414839089821415, "learning_rate": 2.8325779962053386e-06, "loss": 0.027, "step": 119500 }, { "epoch": 0.49863975098263386, "grad_norm": 1.0603418494158152, "learning_rate": 2.8325187386151047e-06, "loss": 0.0231, "step": 119505 }, { "epoch": 0.49866061369762416, "grad_norm": 0.5703057235455041, "learning_rate": 2.832459484743726e-06, "loss": 0.0262, "step": 119510 }, { "epoch": 0.49868147641261446, "grad_norm": 0.7026194434007904, "learning_rate": 2.832400234590812e-06, "loss": 0.0265, "step": 119515 }, { "epoch": 0.4987023391276047, "grad_norm": 0.8338466971417443, "learning_rate": 2.8323409881559748e-06, "loss": 0.0222, "step": 119520 }, { "epoch": 0.498723201842595, "grad_norm": 0.3322234327061644, "learning_rate": 2.832281745438826e-06, "loss": 0.0221, "step": 119525 }, { "epoch": 0.49874406455758524, "grad_norm": 0.6785984501803696, "learning_rate": 2.832222506438975e-06, "loss": 0.0206, "step": 119530 }, { "epoch": 0.49876492727257554, "grad_norm": 0.5241400791256332, "learning_rate": 2.832163271156035e-06, "loss": 0.0237, "step": 119535 }, { "epoch": 0.49878578998756584, "grad_norm": 0.5668889176213314, "learning_rate": 2.8321040395896166e-06, "loss": 0.0317, "step": 119540 }, { "epoch": 0.4988066527025561, "grad_norm": 0.5600109780404295, "learning_rate": 2.8320448117393308e-06, "loss": 0.024, "step": 119545 }, { "epoch": 0.4988275154175464, "grad_norm": 0.6276653346982073, "learning_rate": 2.8319855876047892e-06, "loss": 0.0229, "step": 119550 }, { "epoch": 0.49884837813253663, "grad_norm": 0.46808784049732394, "learning_rate": 2.8319263671856035e-06, "loss": 0.0254, "step": 119555 }, { "epoch": 0.49886924084752693, "grad_norm": 0.3296799233839398, "learning_rate": 2.8318671504813853e-06, "loss": 0.0221, "step": 119560 }, { "epoch": 0.49889010356251723, "grad_norm": 0.8761420253389433, "learning_rate": 2.8318079374917463e-06, "loss": 0.0271, "step": 119565 }, { "epoch": 0.4989109662775075, "grad_norm": 2.522017020000306, "learning_rate": 2.8317487282162976e-06, "loss": 0.0247, "step": 119570 }, { "epoch": 0.4989318289924978, "grad_norm": 1.3088174354870832, "learning_rate": 2.8316895226546513e-06, "loss": 0.0345, "step": 119575 }, { "epoch": 0.49895269170748807, "grad_norm": 0.5663063897526155, "learning_rate": 2.83163032080642e-06, "loss": 0.0218, "step": 119580 }, { "epoch": 0.4989735544224783, "grad_norm": 0.5632856021522676, "learning_rate": 2.831571122671214e-06, "loss": 0.0197, "step": 119585 }, { "epoch": 0.4989944171374686, "grad_norm": 1.0281842570556634, "learning_rate": 2.8315119282486457e-06, "loss": 0.0256, "step": 119590 }, { "epoch": 0.49901527985245886, "grad_norm": 0.7480052218351935, "learning_rate": 2.831452737538328e-06, "loss": 0.0251, "step": 119595 }, { "epoch": 0.49903614256744916, "grad_norm": 0.5625136958840954, "learning_rate": 2.831393550539872e-06, "loss": 0.0253, "step": 119600 }, { "epoch": 0.49905700528243946, "grad_norm": 1.0135156631648596, "learning_rate": 2.831334367252889e-06, "loss": 0.0228, "step": 119605 }, { "epoch": 0.4990778679974297, "grad_norm": 1.01043319035322, "learning_rate": 2.8312751876769934e-06, "loss": 0.0214, "step": 119610 }, { "epoch": 0.49909873071242, "grad_norm": 0.6199182866375921, "learning_rate": 2.8312160118117954e-06, "loss": 0.0185, "step": 119615 }, { "epoch": 0.49911959342741025, "grad_norm": 0.7684892385783706, "learning_rate": 2.831156839656908e-06, "loss": 0.0193, "step": 119620 }, { "epoch": 0.49914045614240055, "grad_norm": 1.1680007820576765, "learning_rate": 2.8310976712119425e-06, "loss": 0.0275, "step": 119625 }, { "epoch": 0.49916131885739085, "grad_norm": 1.3166608466069172, "learning_rate": 2.831038506476513e-06, "loss": 0.0275, "step": 119630 }, { "epoch": 0.4991821815723811, "grad_norm": 0.7083761963077513, "learning_rate": 2.8309793454502306e-06, "loss": 0.0214, "step": 119635 }, { "epoch": 0.4992030442873714, "grad_norm": 0.5594342842953103, "learning_rate": 2.8309201881327087e-06, "loss": 0.0261, "step": 119640 }, { "epoch": 0.49922390700236163, "grad_norm": 0.5960532291410688, "learning_rate": 2.830861034523559e-06, "loss": 0.0266, "step": 119645 }, { "epoch": 0.49924476971735193, "grad_norm": 0.5844839210591041, "learning_rate": 2.830801884622394e-06, "loss": 0.0323, "step": 119650 }, { "epoch": 0.49926563243234223, "grad_norm": 4.715152933779234, "learning_rate": 2.830742738428827e-06, "loss": 0.0394, "step": 119655 }, { "epoch": 0.4992864951473325, "grad_norm": 0.724301213500572, "learning_rate": 2.8306835959424704e-06, "loss": 0.0276, "step": 119660 }, { "epoch": 0.4993073578623228, "grad_norm": 0.4965455755928804, "learning_rate": 2.8306244571629367e-06, "loss": 0.0197, "step": 119665 }, { "epoch": 0.4993282205773131, "grad_norm": 1.5284430934847484, "learning_rate": 2.8305653220898387e-06, "loss": 0.0287, "step": 119670 }, { "epoch": 0.4993490832923033, "grad_norm": 0.850498388294159, "learning_rate": 2.8305061907227906e-06, "loss": 0.0244, "step": 119675 }, { "epoch": 0.4993699460072936, "grad_norm": 0.4234133053535612, "learning_rate": 2.830447063061403e-06, "loss": 0.0231, "step": 119680 }, { "epoch": 0.49939080872228386, "grad_norm": 0.47659222439646776, "learning_rate": 2.830387939105291e-06, "loss": 0.0189, "step": 119685 }, { "epoch": 0.49941167143727416, "grad_norm": 0.7273911624480434, "learning_rate": 2.830328818854066e-06, "loss": 0.0186, "step": 119690 }, { "epoch": 0.49943253415226446, "grad_norm": 0.453273291623302, "learning_rate": 2.8302697023073415e-06, "loss": 0.0275, "step": 119695 }, { "epoch": 0.4994533968672547, "grad_norm": 0.4487276004085843, "learning_rate": 2.830210589464731e-06, "loss": 0.0169, "step": 119700 }, { "epoch": 0.499474259582245, "grad_norm": 0.855374185705919, "learning_rate": 2.8301514803258483e-06, "loss": 0.022, "step": 119705 }, { "epoch": 0.49949512229723525, "grad_norm": 0.6136303785666835, "learning_rate": 2.830092374890306e-06, "loss": 0.025, "step": 119710 }, { "epoch": 0.49951598501222555, "grad_norm": 0.4365973446111138, "learning_rate": 2.830033273157717e-06, "loss": 0.0237, "step": 119715 }, { "epoch": 0.49953684772721585, "grad_norm": 8.128197236677966, "learning_rate": 2.829974175127695e-06, "loss": 0.0266, "step": 119720 }, { "epoch": 0.4995577104422061, "grad_norm": 0.8786645442729186, "learning_rate": 2.829915080799853e-06, "loss": 0.0341, "step": 119725 }, { "epoch": 0.4995785731571964, "grad_norm": 0.6409950569221424, "learning_rate": 2.829855990173806e-06, "loss": 0.0206, "step": 119730 }, { "epoch": 0.49959943587218664, "grad_norm": 0.5527947026900337, "learning_rate": 2.8297969032491656e-06, "loss": 0.0202, "step": 119735 }, { "epoch": 0.49962029858717694, "grad_norm": 0.5554295900709632, "learning_rate": 2.8297378200255467e-06, "loss": 0.0275, "step": 119740 }, { "epoch": 0.49964116130216724, "grad_norm": 0.691552537475149, "learning_rate": 2.829678740502562e-06, "loss": 0.0244, "step": 119745 }, { "epoch": 0.4996620240171575, "grad_norm": 0.8226040389647558, "learning_rate": 2.829619664679826e-06, "loss": 0.0289, "step": 119750 }, { "epoch": 0.4996828867321478, "grad_norm": 0.6453564973874331, "learning_rate": 2.8295605925569523e-06, "loss": 0.0195, "step": 119755 }, { "epoch": 0.4997037494471381, "grad_norm": 0.5504408443264955, "learning_rate": 2.829501524133555e-06, "loss": 0.0344, "step": 119760 }, { "epoch": 0.4997246121621283, "grad_norm": 0.5808031381513284, "learning_rate": 2.829442459409247e-06, "loss": 0.0197, "step": 119765 }, { "epoch": 0.4997454748771186, "grad_norm": 0.9627620565727005, "learning_rate": 2.829383398383643e-06, "loss": 0.0301, "step": 119770 }, { "epoch": 0.49976633759210887, "grad_norm": 1.2303405112642216, "learning_rate": 2.8293243410563563e-06, "loss": 0.0232, "step": 119775 }, { "epoch": 0.49978720030709917, "grad_norm": 0.5393514095357058, "learning_rate": 2.829265287427002e-06, "loss": 0.0178, "step": 119780 }, { "epoch": 0.49980806302208947, "grad_norm": 0.9028754155145614, "learning_rate": 2.8292062374951933e-06, "loss": 0.0296, "step": 119785 }, { "epoch": 0.4998289257370797, "grad_norm": 0.8977788339923704, "learning_rate": 2.829147191260545e-06, "loss": 0.0304, "step": 119790 }, { "epoch": 0.49984978845207, "grad_norm": 0.8595553296957871, "learning_rate": 2.8290881487226707e-06, "loss": 0.0268, "step": 119795 }, { "epoch": 0.49987065116706025, "grad_norm": 0.29743121348530677, "learning_rate": 2.8290291098811857e-06, "loss": 0.022, "step": 119800 }, { "epoch": 0.49989151388205055, "grad_norm": 0.5448701187345296, "learning_rate": 2.8289700747357023e-06, "loss": 0.0233, "step": 119805 }, { "epoch": 0.49991237659704085, "grad_norm": 0.8283551783614903, "learning_rate": 2.8289110432858373e-06, "loss": 0.0358, "step": 119810 }, { "epoch": 0.4999332393120311, "grad_norm": 0.8731753645780574, "learning_rate": 2.828852015531204e-06, "loss": 0.0331, "step": 119815 }, { "epoch": 0.4999541020270214, "grad_norm": 0.5821858178156886, "learning_rate": 2.828792991471416e-06, "loss": 0.0333, "step": 119820 }, { "epoch": 0.49997496474201164, "grad_norm": 0.7945240614266124, "learning_rate": 2.8287339711060894e-06, "loss": 0.0242, "step": 119825 }, { "epoch": 0.49999582745700194, "grad_norm": 0.5681676925224506, "learning_rate": 2.8286749544348385e-06, "loss": 0.0219, "step": 119830 }, { "epoch": 0.5000166901719922, "grad_norm": 0.6940039534372625, "learning_rate": 2.8286159414572767e-06, "loss": 0.017, "step": 119835 }, { "epoch": 0.5000375528869825, "grad_norm": 0.7650122225868402, "learning_rate": 2.8285569321730204e-06, "loss": 0.0203, "step": 119840 }, { "epoch": 0.5000584156019727, "grad_norm": 0.983876158107217, "learning_rate": 2.828497926581683e-06, "loss": 0.0248, "step": 119845 }, { "epoch": 0.5000792783169631, "grad_norm": 0.5964520120928938, "learning_rate": 2.828438924682881e-06, "loss": 0.0287, "step": 119850 }, { "epoch": 0.5001001410319533, "grad_norm": 0.41999691158747926, "learning_rate": 2.828379926476227e-06, "loss": 0.0145, "step": 119855 }, { "epoch": 0.5001210037469436, "grad_norm": 0.5894355050428574, "learning_rate": 2.8283209319613382e-06, "loss": 0.0202, "step": 119860 }, { "epoch": 0.5001418664619339, "grad_norm": 0.7597714777033366, "learning_rate": 2.8282619411378283e-06, "loss": 0.0315, "step": 119865 }, { "epoch": 0.5001627291769242, "grad_norm": 1.2897019736914, "learning_rate": 2.8282029540053124e-06, "loss": 0.0282, "step": 119870 }, { "epoch": 0.5001835918919144, "grad_norm": 0.5453668063103585, "learning_rate": 2.828143970563406e-06, "loss": 0.0258, "step": 119875 }, { "epoch": 0.5002044546069048, "grad_norm": 0.9803694413921235, "learning_rate": 2.8280849908117236e-06, "loss": 0.0267, "step": 119880 }, { "epoch": 0.500225317321895, "grad_norm": 0.8779191469222066, "learning_rate": 2.8280260147498816e-06, "loss": 0.0216, "step": 119885 }, { "epoch": 0.5002461800368853, "grad_norm": 0.4680470457943813, "learning_rate": 2.827967042377495e-06, "loss": 0.0174, "step": 119890 }, { "epoch": 0.5002670427518756, "grad_norm": 0.7632053561669189, "learning_rate": 2.8279080736941782e-06, "loss": 0.0234, "step": 119895 }, { "epoch": 0.5002879054668659, "grad_norm": 0.8244487024477722, "learning_rate": 2.8278491086995473e-06, "loss": 0.0268, "step": 119900 }, { "epoch": 0.5003087681818561, "grad_norm": 0.6145652444299272, "learning_rate": 2.827790147393218e-06, "loss": 0.0209, "step": 119905 }, { "epoch": 0.5003296308968463, "grad_norm": 0.7620166232065236, "learning_rate": 2.827731189774805e-06, "loss": 0.0248, "step": 119910 }, { "epoch": 0.5003504936118367, "grad_norm": 0.7227998258751596, "learning_rate": 2.8276722358439246e-06, "loss": 0.021, "step": 119915 }, { "epoch": 0.5003713563268269, "grad_norm": 0.6473117143420283, "learning_rate": 2.8276132856001915e-06, "loss": 0.021, "step": 119920 }, { "epoch": 0.5003922190418172, "grad_norm": 0.6271221376053963, "learning_rate": 2.8275543390432227e-06, "loss": 0.0268, "step": 119925 }, { "epoch": 0.5004130817568075, "grad_norm": 1.4238965317467378, "learning_rate": 2.8274953961726328e-06, "loss": 0.0179, "step": 119930 }, { "epoch": 0.5004339444717978, "grad_norm": 0.7504244469662654, "learning_rate": 2.827436456988038e-06, "loss": 0.0226, "step": 119935 }, { "epoch": 0.500454807186788, "grad_norm": 0.7028108417334035, "learning_rate": 2.8273775214890543e-06, "loss": 0.0231, "step": 119940 }, { "epoch": 0.5004756699017784, "grad_norm": 0.8181127802081534, "learning_rate": 2.827318589675297e-06, "loss": 0.0279, "step": 119945 }, { "epoch": 0.5004965326167686, "grad_norm": 0.33267350583200284, "learning_rate": 2.8272596615463834e-06, "loss": 0.0258, "step": 119950 }, { "epoch": 0.5005173953317589, "grad_norm": 0.890255280753875, "learning_rate": 2.827200737101928e-06, "loss": 0.0262, "step": 119955 }, { "epoch": 0.5005382580467491, "grad_norm": 0.7565500489943824, "learning_rate": 2.8271418163415475e-06, "loss": 0.0207, "step": 119960 }, { "epoch": 0.5005591207617395, "grad_norm": 0.9817060579898771, "learning_rate": 2.8270828992648582e-06, "loss": 0.0309, "step": 119965 }, { "epoch": 0.5005799834767297, "grad_norm": 0.535235982189263, "learning_rate": 2.827023985871476e-06, "loss": 0.016, "step": 119970 }, { "epoch": 0.50060084619172, "grad_norm": 0.4998588208321236, "learning_rate": 2.826965076161017e-06, "loss": 0.0259, "step": 119975 }, { "epoch": 0.5006217089067103, "grad_norm": 0.6172338227783289, "learning_rate": 2.826906170133098e-06, "loss": 0.0285, "step": 119980 }, { "epoch": 0.5006425716217006, "grad_norm": 1.2306293264172048, "learning_rate": 2.8268472677873356e-06, "loss": 0.0277, "step": 119985 }, { "epoch": 0.5006634343366908, "grad_norm": 1.649340176683767, "learning_rate": 2.8267883691233443e-06, "loss": 0.0388, "step": 119990 }, { "epoch": 0.5006842970516812, "grad_norm": 0.7044385254210598, "learning_rate": 2.826729474140743e-06, "loss": 0.0313, "step": 119995 }, { "epoch": 0.5007051597666714, "grad_norm": 0.586075067623122, "learning_rate": 2.826670582839147e-06, "loss": 0.0241, "step": 120000 }, { "epoch": 0.5007260224816616, "grad_norm": 0.9577019818915881, "learning_rate": 2.8266116952181723e-06, "loss": 0.022, "step": 120005 }, { "epoch": 0.500746885196652, "grad_norm": 0.6303802628277518, "learning_rate": 2.8265528112774365e-06, "loss": 0.0173, "step": 120010 }, { "epoch": 0.5007677479116422, "grad_norm": 0.6711710277098297, "learning_rate": 2.826493931016556e-06, "loss": 0.0251, "step": 120015 }, { "epoch": 0.5007886106266325, "grad_norm": 1.3629562033604596, "learning_rate": 2.8264350544351484e-06, "loss": 0.0298, "step": 120020 }, { "epoch": 0.5008094733416227, "grad_norm": 0.4644598180649928, "learning_rate": 2.826376181532829e-06, "loss": 0.0181, "step": 120025 }, { "epoch": 0.5008303360566131, "grad_norm": 0.702402601654459, "learning_rate": 2.8263173123092154e-06, "loss": 0.0219, "step": 120030 }, { "epoch": 0.5008511987716033, "grad_norm": 0.5219826451203521, "learning_rate": 2.8262584467639235e-06, "loss": 0.0227, "step": 120035 }, { "epoch": 0.5008720614865936, "grad_norm": 2.213964870260603, "learning_rate": 2.8261995848965724e-06, "loss": 0.0256, "step": 120040 }, { "epoch": 0.5008929242015839, "grad_norm": 1.0840843918264942, "learning_rate": 2.8261407267067775e-06, "loss": 0.0203, "step": 120045 }, { "epoch": 0.5009137869165742, "grad_norm": 0.7332864564326422, "learning_rate": 2.8260818721941563e-06, "loss": 0.0219, "step": 120050 }, { "epoch": 0.5009346496315644, "grad_norm": 1.0655754404513338, "learning_rate": 2.8260230213583264e-06, "loss": 0.0199, "step": 120055 }, { "epoch": 0.5009555123465548, "grad_norm": 0.7899185710342634, "learning_rate": 2.825964174198903e-06, "loss": 0.0238, "step": 120060 }, { "epoch": 0.500976375061545, "grad_norm": 0.9452233558704135, "learning_rate": 2.825905330715506e-06, "loss": 0.0294, "step": 120065 }, { "epoch": 0.5009972377765353, "grad_norm": 0.9545619035358379, "learning_rate": 2.8258464909077516e-06, "loss": 0.0284, "step": 120070 }, { "epoch": 0.5010181004915256, "grad_norm": 1.085732682032896, "learning_rate": 2.8257876547752562e-06, "loss": 0.0277, "step": 120075 }, { "epoch": 0.5010389632065159, "grad_norm": 0.8391818716126204, "learning_rate": 2.8257288223176387e-06, "loss": 0.0247, "step": 120080 }, { "epoch": 0.5010598259215061, "grad_norm": 0.8432116479460416, "learning_rate": 2.8256699935345157e-06, "loss": 0.03, "step": 120085 }, { "epoch": 0.5010806886364964, "grad_norm": 0.7619637985092477, "learning_rate": 2.8256111684255044e-06, "loss": 0.0189, "step": 120090 }, { "epoch": 0.5011015513514867, "grad_norm": 0.9233450241243701, "learning_rate": 2.8255523469902237e-06, "loss": 0.0201, "step": 120095 }, { "epoch": 0.501122414066477, "grad_norm": 0.5531974351640077, "learning_rate": 2.8254935292282902e-06, "loss": 0.0184, "step": 120100 }, { "epoch": 0.5011432767814672, "grad_norm": 0.7730532276805254, "learning_rate": 2.8254347151393224e-06, "loss": 0.0269, "step": 120105 }, { "epoch": 0.5011641394964576, "grad_norm": 0.6029289342839331, "learning_rate": 2.825375904722936e-06, "loss": 0.028, "step": 120110 }, { "epoch": 0.5011850022114478, "grad_norm": 0.6729327048260813, "learning_rate": 2.825317097978752e-06, "loss": 0.0307, "step": 120115 }, { "epoch": 0.501205864926438, "grad_norm": 0.37451490754170663, "learning_rate": 2.8252582949063854e-06, "loss": 0.0243, "step": 120120 }, { "epoch": 0.5012267276414284, "grad_norm": 0.7422274179816368, "learning_rate": 2.8251994955054564e-06, "loss": 0.0324, "step": 120125 }, { "epoch": 0.5012475903564186, "grad_norm": 0.7277724523263059, "learning_rate": 2.825140699775581e-06, "loss": 0.0244, "step": 120130 }, { "epoch": 0.5012684530714089, "grad_norm": 1.0842889586007367, "learning_rate": 2.8250819077163777e-06, "loss": 0.022, "step": 120135 }, { "epoch": 0.5012893157863991, "grad_norm": 1.1276869506685916, "learning_rate": 2.8250231193274656e-06, "loss": 0.027, "step": 120140 }, { "epoch": 0.5013101785013895, "grad_norm": 0.584940206030164, "learning_rate": 2.8249643346084615e-06, "loss": 0.0293, "step": 120145 }, { "epoch": 0.5013310412163797, "grad_norm": 0.6121999588129773, "learning_rate": 2.824905553558985e-06, "loss": 0.0219, "step": 120150 }, { "epoch": 0.50135190393137, "grad_norm": 0.6226796168917529, "learning_rate": 2.824846776178653e-06, "loss": 0.0279, "step": 120155 }, { "epoch": 0.5013727666463603, "grad_norm": 0.5740075082146351, "learning_rate": 2.824788002467085e-06, "loss": 0.0243, "step": 120160 }, { "epoch": 0.5013936293613506, "grad_norm": 1.786046737898354, "learning_rate": 2.8247292324238985e-06, "loss": 0.0315, "step": 120165 }, { "epoch": 0.5014144920763408, "grad_norm": 0.45195729740541674, "learning_rate": 2.8246704660487116e-06, "loss": 0.0242, "step": 120170 }, { "epoch": 0.5014353547913312, "grad_norm": 0.5490697490361798, "learning_rate": 2.8246117033411436e-06, "loss": 0.0183, "step": 120175 }, { "epoch": 0.5014562175063214, "grad_norm": 0.5946155960954822, "learning_rate": 2.8245529443008127e-06, "loss": 0.0277, "step": 120180 }, { "epoch": 0.5014770802213117, "grad_norm": 1.2135706161990099, "learning_rate": 2.8244941889273375e-06, "loss": 0.0264, "step": 120185 }, { "epoch": 0.501497942936302, "grad_norm": 1.4029076462515992, "learning_rate": 2.824435437220337e-06, "loss": 0.0275, "step": 120190 }, { "epoch": 0.5015188056512923, "grad_norm": 0.4939650339777376, "learning_rate": 2.8243766891794287e-06, "loss": 0.0312, "step": 120195 }, { "epoch": 0.5015396683662825, "grad_norm": 0.471348037652145, "learning_rate": 2.824317944804233e-06, "loss": 0.0198, "step": 120200 }, { "epoch": 0.5015605310812727, "grad_norm": 0.614822718857404, "learning_rate": 2.824259204094368e-06, "loss": 0.025, "step": 120205 }, { "epoch": 0.5015813937962631, "grad_norm": 0.6152803472298379, "learning_rate": 2.8242004670494516e-06, "loss": 0.0256, "step": 120210 }, { "epoch": 0.5016022565112533, "grad_norm": 3.386502998921006, "learning_rate": 2.8241417336691042e-06, "loss": 0.0243, "step": 120215 }, { "epoch": 0.5016231192262436, "grad_norm": 0.5278085932126702, "learning_rate": 2.824083003952944e-06, "loss": 0.0184, "step": 120220 }, { "epoch": 0.5016439819412339, "grad_norm": 0.7891623355595909, "learning_rate": 2.82402427790059e-06, "loss": 0.0233, "step": 120225 }, { "epoch": 0.5016648446562242, "grad_norm": 0.7315621928206683, "learning_rate": 2.823965555511661e-06, "loss": 0.0214, "step": 120230 }, { "epoch": 0.5016857073712144, "grad_norm": 0.9715246896096348, "learning_rate": 2.823906836785777e-06, "loss": 0.0241, "step": 120235 }, { "epoch": 0.5017065700862048, "grad_norm": 1.01750609189521, "learning_rate": 2.8238481217225567e-06, "loss": 0.0424, "step": 120240 }, { "epoch": 0.501727432801195, "grad_norm": 3.1129641604490677, "learning_rate": 2.823789410321619e-06, "loss": 0.0323, "step": 120245 }, { "epoch": 0.5017482955161853, "grad_norm": 0.838458214378513, "learning_rate": 2.8237307025825837e-06, "loss": 0.024, "step": 120250 }, { "epoch": 0.5017691582311756, "grad_norm": 0.6998855328370444, "learning_rate": 2.8236719985050705e-06, "loss": 0.0203, "step": 120255 }, { "epoch": 0.5017900209461659, "grad_norm": 0.9652696884885585, "learning_rate": 2.823613298088698e-06, "loss": 0.0227, "step": 120260 }, { "epoch": 0.5018108836611561, "grad_norm": 0.3370125246035069, "learning_rate": 2.8235546013330855e-06, "loss": 0.0186, "step": 120265 }, { "epoch": 0.5018317463761464, "grad_norm": 0.4593993677643646, "learning_rate": 2.8234959082378538e-06, "loss": 0.02, "step": 120270 }, { "epoch": 0.5018526090911367, "grad_norm": 1.1070561287659464, "learning_rate": 2.8234372188026214e-06, "loss": 0.0403, "step": 120275 }, { "epoch": 0.501873471806127, "grad_norm": 0.6380165636218148, "learning_rate": 2.823378533027008e-06, "loss": 0.0239, "step": 120280 }, { "epoch": 0.5018943345211172, "grad_norm": 0.8921625919678059, "learning_rate": 2.8233198509106336e-06, "loss": 0.0201, "step": 120285 }, { "epoch": 0.5019151972361076, "grad_norm": 0.6573131202221199, "learning_rate": 2.8232611724531176e-06, "loss": 0.0277, "step": 120290 }, { "epoch": 0.5019360599510978, "grad_norm": 0.328043737010488, "learning_rate": 2.8232024976540805e-06, "loss": 0.024, "step": 120295 }, { "epoch": 0.501956922666088, "grad_norm": 0.6224281540080209, "learning_rate": 2.823143826513141e-06, "loss": 0.0273, "step": 120300 }, { "epoch": 0.5019777853810784, "grad_norm": 0.8392155908751712, "learning_rate": 2.8230851590299208e-06, "loss": 0.0284, "step": 120305 }, { "epoch": 0.5019986480960686, "grad_norm": 0.9495513381757796, "learning_rate": 2.8230264952040384e-06, "loss": 0.03, "step": 120310 }, { "epoch": 0.5020195108110589, "grad_norm": 0.40019053470766475, "learning_rate": 2.8229678350351136e-06, "loss": 0.0229, "step": 120315 }, { "epoch": 0.5020403735260491, "grad_norm": 0.7992390977945574, "learning_rate": 2.8229091785227673e-06, "loss": 0.029, "step": 120320 }, { "epoch": 0.5020612362410395, "grad_norm": 0.7313170104664987, "learning_rate": 2.8228505256666193e-06, "loss": 0.0322, "step": 120325 }, { "epoch": 0.5020820989560297, "grad_norm": 0.7141324380345667, "learning_rate": 2.82279187646629e-06, "loss": 0.0269, "step": 120330 }, { "epoch": 0.50210296167102, "grad_norm": 0.6573506206660673, "learning_rate": 2.8227332309213996e-06, "loss": 0.0221, "step": 120335 }, { "epoch": 0.5021238243860103, "grad_norm": 0.8980122874808968, "learning_rate": 2.822674589031568e-06, "loss": 0.0289, "step": 120340 }, { "epoch": 0.5021446871010006, "grad_norm": 0.6730106833731079, "learning_rate": 2.822615950796416e-06, "loss": 0.0403, "step": 120345 }, { "epoch": 0.5021655498159908, "grad_norm": 0.5621049289584888, "learning_rate": 2.822557316215564e-06, "loss": 0.0218, "step": 120350 }, { "epoch": 0.5021864125309812, "grad_norm": 0.42375823978487476, "learning_rate": 2.8224986852886323e-06, "loss": 0.0236, "step": 120355 }, { "epoch": 0.5022072752459714, "grad_norm": 0.6725904683231352, "learning_rate": 2.8224400580152404e-06, "loss": 0.0195, "step": 120360 }, { "epoch": 0.5022281379609617, "grad_norm": 1.2673700097548841, "learning_rate": 2.8223814343950113e-06, "loss": 0.0849, "step": 120365 }, { "epoch": 0.502249000675952, "grad_norm": 0.6234193205166072, "learning_rate": 2.822322814427563e-06, "loss": 0.0276, "step": 120370 }, { "epoch": 0.5022698633909423, "grad_norm": 0.48739615700664984, "learning_rate": 2.822264198112518e-06, "loss": 0.0219, "step": 120375 }, { "epoch": 0.5022907261059325, "grad_norm": 0.6763318029973718, "learning_rate": 2.822205585449497e-06, "loss": 0.0221, "step": 120380 }, { "epoch": 0.5023115888209227, "grad_norm": 0.5726469193345431, "learning_rate": 2.82214697643812e-06, "loss": 0.0194, "step": 120385 }, { "epoch": 0.5023324515359131, "grad_norm": 0.5401193984938073, "learning_rate": 2.822088371078008e-06, "loss": 0.0316, "step": 120390 }, { "epoch": 0.5023533142509033, "grad_norm": 1.0498458685995131, "learning_rate": 2.822029769368781e-06, "loss": 0.0325, "step": 120395 }, { "epoch": 0.5023741769658936, "grad_norm": 0.4116435563775881, "learning_rate": 2.821971171310062e-06, "loss": 0.0183, "step": 120400 }, { "epoch": 0.502395039680884, "grad_norm": 0.8589376282951892, "learning_rate": 2.8219125769014704e-06, "loss": 0.0242, "step": 120405 }, { "epoch": 0.5024159023958742, "grad_norm": 0.7248017823964075, "learning_rate": 2.8218539861426274e-06, "loss": 0.0221, "step": 120410 }, { "epoch": 0.5024367651108644, "grad_norm": 1.754865629676435, "learning_rate": 2.821795399033156e-06, "loss": 0.0212, "step": 120415 }, { "epoch": 0.5024576278258548, "grad_norm": 0.5973641897849674, "learning_rate": 2.821736815572674e-06, "loss": 0.024, "step": 120420 }, { "epoch": 0.502478490540845, "grad_norm": 0.2873138744324614, "learning_rate": 2.821678235760806e-06, "loss": 0.0182, "step": 120425 }, { "epoch": 0.5024993532558353, "grad_norm": 0.41598712054167786, "learning_rate": 2.821619659597171e-06, "loss": 0.0247, "step": 120430 }, { "epoch": 0.5025202159708256, "grad_norm": 0.480356036756968, "learning_rate": 2.8215610870813916e-06, "loss": 0.0217, "step": 120435 }, { "epoch": 0.5025410786858159, "grad_norm": 0.7226985880375956, "learning_rate": 2.821502518213089e-06, "loss": 0.0385, "step": 120440 }, { "epoch": 0.5025619414008061, "grad_norm": 0.4865323292524761, "learning_rate": 2.8214439529918836e-06, "loss": 0.0232, "step": 120445 }, { "epoch": 0.5025828041157964, "grad_norm": 1.071461571449798, "learning_rate": 2.8213853914173978e-06, "loss": 0.0292, "step": 120450 }, { "epoch": 0.5026036668307867, "grad_norm": 0.5485150994668666, "learning_rate": 2.8213268334892537e-06, "loss": 0.0246, "step": 120455 }, { "epoch": 0.502624529545777, "grad_norm": 0.4758546858984609, "learning_rate": 2.821268279207072e-06, "loss": 0.0206, "step": 120460 }, { "epoch": 0.5026453922607672, "grad_norm": 0.9470168928198481, "learning_rate": 2.821209728570474e-06, "loss": 0.0204, "step": 120465 }, { "epoch": 0.5026662549757576, "grad_norm": 0.7332356490252732, "learning_rate": 2.8211511815790826e-06, "loss": 0.0222, "step": 120470 }, { "epoch": 0.5026871176907478, "grad_norm": 0.6823634023273406, "learning_rate": 2.82109263823252e-06, "loss": 0.0218, "step": 120475 }, { "epoch": 0.502707980405738, "grad_norm": 0.49815757023088164, "learning_rate": 2.8210340985304052e-06, "loss": 0.0211, "step": 120480 }, { "epoch": 0.5027288431207284, "grad_norm": 0.5045467594847353, "learning_rate": 2.8209755624723634e-06, "loss": 0.015, "step": 120485 }, { "epoch": 0.5027497058357187, "grad_norm": 0.5900529745180331, "learning_rate": 2.8209170300580147e-06, "loss": 0.0223, "step": 120490 }, { "epoch": 0.5027705685507089, "grad_norm": 0.5177033109665885, "learning_rate": 2.8208585012869816e-06, "loss": 0.0194, "step": 120495 }, { "epoch": 0.5027914312656991, "grad_norm": 1.2532133477326834, "learning_rate": 2.820799976158886e-06, "loss": 0.0288, "step": 120500 }, { "epoch": 0.5028122939806895, "grad_norm": 0.7406437080387066, "learning_rate": 2.8207414546733503e-06, "loss": 0.0158, "step": 120505 }, { "epoch": 0.5028331566956797, "grad_norm": 0.8841568778513558, "learning_rate": 2.820682936829996e-06, "loss": 0.0255, "step": 120510 }, { "epoch": 0.50285401941067, "grad_norm": 0.5211365162943491, "learning_rate": 2.8206244226284464e-06, "loss": 0.0168, "step": 120515 }, { "epoch": 0.5028748821256603, "grad_norm": 0.40747681123839846, "learning_rate": 2.8205659120683227e-06, "loss": 0.0216, "step": 120520 }, { "epoch": 0.5028957448406506, "grad_norm": 0.9548964069836102, "learning_rate": 2.820507405149248e-06, "loss": 0.0183, "step": 120525 }, { "epoch": 0.5029166075556408, "grad_norm": 0.6653580939984377, "learning_rate": 2.8204489018708443e-06, "loss": 0.0261, "step": 120530 }, { "epoch": 0.5029374702706312, "grad_norm": 0.7330244631738485, "learning_rate": 2.820390402232734e-06, "loss": 0.0243, "step": 120535 }, { "epoch": 0.5029583329856214, "grad_norm": 0.5292142288232646, "learning_rate": 2.82033190623454e-06, "loss": 0.0283, "step": 120540 }, { "epoch": 0.5029791957006117, "grad_norm": 0.7927434931466604, "learning_rate": 2.8202734138758837e-06, "loss": 0.0239, "step": 120545 }, { "epoch": 0.503000058415602, "grad_norm": 0.4627781055345934, "learning_rate": 2.8202149251563893e-06, "loss": 0.024, "step": 120550 }, { "epoch": 0.5030209211305923, "grad_norm": 0.39690077304622545, "learning_rate": 2.8201564400756787e-06, "loss": 0.0271, "step": 120555 }, { "epoch": 0.5030417838455825, "grad_norm": 0.5795141397161931, "learning_rate": 2.8200979586333743e-06, "loss": 0.0216, "step": 120560 }, { "epoch": 0.5030626465605728, "grad_norm": 0.7039860328663649, "learning_rate": 2.8200394808290997e-06, "loss": 0.0207, "step": 120565 }, { "epoch": 0.5030835092755631, "grad_norm": 0.5458447437531813, "learning_rate": 2.8199810066624768e-06, "loss": 0.0256, "step": 120570 }, { "epoch": 0.5031043719905534, "grad_norm": 0.6419517448043782, "learning_rate": 2.8199225361331293e-06, "loss": 0.024, "step": 120575 }, { "epoch": 0.5031252347055436, "grad_norm": 0.6445865083880034, "learning_rate": 2.8198640692406792e-06, "loss": 0.0254, "step": 120580 }, { "epoch": 0.503146097420534, "grad_norm": 0.5850293605171362, "learning_rate": 2.8198056059847508e-06, "loss": 0.021, "step": 120585 }, { "epoch": 0.5031669601355242, "grad_norm": 0.451210547554036, "learning_rate": 2.819747146364965e-06, "loss": 0.0241, "step": 120590 }, { "epoch": 0.5031878228505144, "grad_norm": 0.6488533981079055, "learning_rate": 2.8196886903809473e-06, "loss": 0.0294, "step": 120595 }, { "epoch": 0.5032086855655048, "grad_norm": 0.7017228293219008, "learning_rate": 2.8196302380323197e-06, "loss": 0.0236, "step": 120600 }, { "epoch": 0.503229548280495, "grad_norm": 0.6375183196521184, "learning_rate": 2.8195717893187056e-06, "loss": 0.0219, "step": 120605 }, { "epoch": 0.5032504109954853, "grad_norm": 0.7912692199760734, "learning_rate": 2.819513344239728e-06, "loss": 0.0253, "step": 120610 }, { "epoch": 0.5032712737104756, "grad_norm": 0.6115292039673912, "learning_rate": 2.8194549027950107e-06, "loss": 0.0249, "step": 120615 }, { "epoch": 0.5032921364254659, "grad_norm": 1.3661303493916328, "learning_rate": 2.8193964649841764e-06, "loss": 0.0249, "step": 120620 }, { "epoch": 0.5033129991404561, "grad_norm": 0.688243503208737, "learning_rate": 2.8193380308068492e-06, "loss": 0.0258, "step": 120625 }, { "epoch": 0.5033338618554464, "grad_norm": 0.6195644747613611, "learning_rate": 2.819279600262652e-06, "loss": 0.0221, "step": 120630 }, { "epoch": 0.5033547245704367, "grad_norm": 0.7256943857732547, "learning_rate": 2.8192211733512087e-06, "loss": 0.0279, "step": 120635 }, { "epoch": 0.503375587285427, "grad_norm": 1.2562779159209185, "learning_rate": 2.8191627500721425e-06, "loss": 0.0349, "step": 120640 }, { "epoch": 0.5033964500004172, "grad_norm": 1.2422806935735542, "learning_rate": 2.8191043304250776e-06, "loss": 0.034, "step": 120645 }, { "epoch": 0.5034173127154076, "grad_norm": 0.7125738809336606, "learning_rate": 2.8190459144096378e-06, "loss": 0.0157, "step": 120650 }, { "epoch": 0.5034381754303978, "grad_norm": 0.40883999082644534, "learning_rate": 2.818987502025446e-06, "loss": 0.0241, "step": 120655 }, { "epoch": 0.5034590381453881, "grad_norm": 1.0156486002514922, "learning_rate": 2.818929093272127e-06, "loss": 0.0292, "step": 120660 }, { "epoch": 0.5034799008603784, "grad_norm": 0.7152152466002784, "learning_rate": 2.8188706881493033e-06, "loss": 0.0256, "step": 120665 }, { "epoch": 0.5035007635753687, "grad_norm": 2.3739424012386947, "learning_rate": 2.8188122866566004e-06, "loss": 0.0281, "step": 120670 }, { "epoch": 0.5035216262903589, "grad_norm": 0.7939660473433978, "learning_rate": 2.818753888793641e-06, "loss": 0.0211, "step": 120675 }, { "epoch": 0.5035424890053491, "grad_norm": 0.8487958960283386, "learning_rate": 2.8186954945600503e-06, "loss": 0.0252, "step": 120680 }, { "epoch": 0.5035633517203395, "grad_norm": 0.433799960567018, "learning_rate": 2.818637103955451e-06, "loss": 0.0195, "step": 120685 }, { "epoch": 0.5035842144353297, "grad_norm": 0.4245785036602776, "learning_rate": 2.8185787169794685e-06, "loss": 0.022, "step": 120690 }, { "epoch": 0.50360507715032, "grad_norm": 0.21508503502873216, "learning_rate": 2.8185203336317267e-06, "loss": 0.0228, "step": 120695 }, { "epoch": 0.5036259398653103, "grad_norm": 0.6524348701916654, "learning_rate": 2.818461953911849e-06, "loss": 0.0261, "step": 120700 }, { "epoch": 0.5036468025803006, "grad_norm": 0.5452434727422057, "learning_rate": 2.818403577819461e-06, "loss": 0.0249, "step": 120705 }, { "epoch": 0.5036676652952908, "grad_norm": 0.5826698173577651, "learning_rate": 2.8183452053541853e-06, "loss": 0.0194, "step": 120710 }, { "epoch": 0.5036885280102812, "grad_norm": 1.2273031980302183, "learning_rate": 2.8182868365156485e-06, "loss": 0.0238, "step": 120715 }, { "epoch": 0.5037093907252714, "grad_norm": 0.529770663074177, "learning_rate": 2.8182284713034737e-06, "loss": 0.018, "step": 120720 }, { "epoch": 0.5037302534402617, "grad_norm": 0.7235868308661062, "learning_rate": 2.8181701097172847e-06, "loss": 0.0214, "step": 120725 }, { "epoch": 0.503751116155252, "grad_norm": 1.1470448180838095, "learning_rate": 2.818111751756708e-06, "loss": 0.0243, "step": 120730 }, { "epoch": 0.5037719788702423, "grad_norm": 1.0260994326899002, "learning_rate": 2.818053397421367e-06, "loss": 0.0132, "step": 120735 }, { "epoch": 0.5037928415852325, "grad_norm": 0.6734088124023774, "learning_rate": 2.8179950467108868e-06, "loss": 0.023, "step": 120740 }, { "epoch": 0.5038137043002228, "grad_norm": 0.45954192496284924, "learning_rate": 2.8179366996248913e-06, "loss": 0.0263, "step": 120745 }, { "epoch": 0.5038345670152131, "grad_norm": 0.42095431361324426, "learning_rate": 2.8178783561630064e-06, "loss": 0.025, "step": 120750 }, { "epoch": 0.5038554297302034, "grad_norm": 0.7927313051766679, "learning_rate": 2.8178200163248563e-06, "loss": 0.0304, "step": 120755 }, { "epoch": 0.5038762924451936, "grad_norm": 1.0474477241193814, "learning_rate": 2.8177616801100667e-06, "loss": 0.0283, "step": 120760 }, { "epoch": 0.503897155160184, "grad_norm": 0.8030548372167868, "learning_rate": 2.8177033475182615e-06, "loss": 0.0245, "step": 120765 }, { "epoch": 0.5039180178751742, "grad_norm": 0.4513299838941431, "learning_rate": 2.817645018549066e-06, "loss": 0.0159, "step": 120770 }, { "epoch": 0.5039388805901645, "grad_norm": 0.7807145595794144, "learning_rate": 2.8175866932021057e-06, "loss": 0.0164, "step": 120775 }, { "epoch": 0.5039597433051548, "grad_norm": 1.0624420899624643, "learning_rate": 2.817528371477005e-06, "loss": 0.0304, "step": 120780 }, { "epoch": 0.503980606020145, "grad_norm": 0.8233137866255718, "learning_rate": 2.8174700533733897e-06, "loss": 0.0275, "step": 120785 }, { "epoch": 0.5040014687351353, "grad_norm": 0.5271859589102179, "learning_rate": 2.8174117388908846e-06, "loss": 0.0207, "step": 120790 }, { "epoch": 0.5040223314501255, "grad_norm": 0.6215660616232812, "learning_rate": 2.8173534280291154e-06, "loss": 0.0246, "step": 120795 }, { "epoch": 0.5040431941651159, "grad_norm": 0.9753104500803086, "learning_rate": 2.817295120787707e-06, "loss": 0.0303, "step": 120800 }, { "epoch": 0.5040640568801061, "grad_norm": 0.690292297046676, "learning_rate": 2.8172368171662844e-06, "loss": 0.0226, "step": 120805 }, { "epoch": 0.5040849195950964, "grad_norm": 0.5932147524450265, "learning_rate": 2.817178517164475e-06, "loss": 0.0223, "step": 120810 }, { "epoch": 0.5041057823100867, "grad_norm": 0.6820995369773001, "learning_rate": 2.8171202207819026e-06, "loss": 0.0281, "step": 120815 }, { "epoch": 0.504126645025077, "grad_norm": 0.7408221640791346, "learning_rate": 2.817061928018192e-06, "loss": 0.0239, "step": 120820 }, { "epoch": 0.5041475077400672, "grad_norm": 0.7090191607953664, "learning_rate": 2.817003638872971e-06, "loss": 0.0241, "step": 120825 }, { "epoch": 0.5041683704550576, "grad_norm": 0.5505355152402166, "learning_rate": 2.8169453533458636e-06, "loss": 0.0312, "step": 120830 }, { "epoch": 0.5041892331700478, "grad_norm": 0.8733456293446475, "learning_rate": 2.816887071436496e-06, "loss": 0.0248, "step": 120835 }, { "epoch": 0.5042100958850381, "grad_norm": 0.7072772367168148, "learning_rate": 2.816828793144494e-06, "loss": 0.018, "step": 120840 }, { "epoch": 0.5042309586000284, "grad_norm": 0.7319736263027649, "learning_rate": 2.816770518469484e-06, "loss": 0.0219, "step": 120845 }, { "epoch": 0.5042518213150187, "grad_norm": 0.7315760440443805, "learning_rate": 2.8167122474110906e-06, "loss": 0.0233, "step": 120850 }, { "epoch": 0.5042726840300089, "grad_norm": 0.4264357963334753, "learning_rate": 2.8166539799689406e-06, "loss": 0.0249, "step": 120855 }, { "epoch": 0.5042935467449992, "grad_norm": 0.8550958017909371, "learning_rate": 2.8165957161426607e-06, "loss": 0.0147, "step": 120860 }, { "epoch": 0.5043144094599895, "grad_norm": 0.6835826424386985, "learning_rate": 2.816537455931875e-06, "loss": 0.0255, "step": 120865 }, { "epoch": 0.5043352721749798, "grad_norm": 1.4256421684656548, "learning_rate": 2.8164791993362108e-06, "loss": 0.0293, "step": 120870 }, { "epoch": 0.50435613488997, "grad_norm": 0.757723956937593, "learning_rate": 2.8164209463552945e-06, "loss": 0.037, "step": 120875 }, { "epoch": 0.5043769976049604, "grad_norm": 1.4105038322320045, "learning_rate": 2.816362696988751e-06, "loss": 0.0252, "step": 120880 }, { "epoch": 0.5043978603199506, "grad_norm": 0.9942086858701223, "learning_rate": 2.816304451236208e-06, "loss": 0.0304, "step": 120885 }, { "epoch": 0.5044187230349408, "grad_norm": 0.7042030888668159, "learning_rate": 2.8162462090972915e-06, "loss": 0.019, "step": 120890 }, { "epoch": 0.5044395857499312, "grad_norm": 0.9730744511172457, "learning_rate": 2.816187970571627e-06, "loss": 0.0292, "step": 120895 }, { "epoch": 0.5044604484649214, "grad_norm": 0.6196067426106681, "learning_rate": 2.8161297356588417e-06, "loss": 0.0223, "step": 120900 }, { "epoch": 0.5044813111799117, "grad_norm": 0.5001099191679184, "learning_rate": 2.8160715043585617e-06, "loss": 0.0225, "step": 120905 }, { "epoch": 0.504502173894902, "grad_norm": 1.500158661923749, "learning_rate": 2.816013276670414e-06, "loss": 0.0222, "step": 120910 }, { "epoch": 0.5045230366098923, "grad_norm": 0.7082847444851287, "learning_rate": 2.815955052594025e-06, "loss": 0.0283, "step": 120915 }, { "epoch": 0.5045438993248825, "grad_norm": 1.0965259717237243, "learning_rate": 2.815896832129021e-06, "loss": 0.0202, "step": 120920 }, { "epoch": 0.5045647620398728, "grad_norm": 0.526632102453327, "learning_rate": 2.8158386152750284e-06, "loss": 0.0182, "step": 120925 }, { "epoch": 0.5045856247548631, "grad_norm": 0.6580077564975311, "learning_rate": 2.8157804020316753e-06, "loss": 0.0198, "step": 120930 }, { "epoch": 0.5046064874698534, "grad_norm": 1.116870115668021, "learning_rate": 2.815722192398587e-06, "loss": 0.0213, "step": 120935 }, { "epoch": 0.5046273501848436, "grad_norm": 0.9315396850657597, "learning_rate": 2.815663986375391e-06, "loss": 0.0226, "step": 120940 }, { "epoch": 0.504648212899834, "grad_norm": 0.7084327164455687, "learning_rate": 2.815605783961715e-06, "loss": 0.0296, "step": 120945 }, { "epoch": 0.5046690756148242, "grad_norm": 0.39244104914590544, "learning_rate": 2.8155475851571844e-06, "loss": 0.0206, "step": 120950 }, { "epoch": 0.5046899383298145, "grad_norm": 0.8825613725456792, "learning_rate": 2.815489389961427e-06, "loss": 0.0238, "step": 120955 }, { "epoch": 0.5047108010448048, "grad_norm": 0.9395387261256068, "learning_rate": 2.8154311983740697e-06, "loss": 0.0184, "step": 120960 }, { "epoch": 0.5047316637597951, "grad_norm": 1.1810700861587449, "learning_rate": 2.8153730103947397e-06, "loss": 0.0276, "step": 120965 }, { "epoch": 0.5047525264747853, "grad_norm": 0.7313628230254744, "learning_rate": 2.8153148260230645e-06, "loss": 0.0261, "step": 120970 }, { "epoch": 0.5047733891897755, "grad_norm": 0.6120504044567134, "learning_rate": 2.8152566452586704e-06, "loss": 0.0291, "step": 120975 }, { "epoch": 0.5047942519047659, "grad_norm": 0.4707987180703529, "learning_rate": 2.815198468101186e-06, "loss": 0.0215, "step": 120980 }, { "epoch": 0.5048151146197561, "grad_norm": 0.4212120435187725, "learning_rate": 2.8151402945502375e-06, "loss": 0.0237, "step": 120985 }, { "epoch": 0.5048359773347464, "grad_norm": 0.4420952434709212, "learning_rate": 2.8150821246054527e-06, "loss": 0.0222, "step": 120990 }, { "epoch": 0.5048568400497367, "grad_norm": 0.7230351224991127, "learning_rate": 2.8150239582664592e-06, "loss": 0.0225, "step": 120995 }, { "epoch": 0.504877702764727, "grad_norm": 0.8110285520681894, "learning_rate": 2.814965795532884e-06, "loss": 0.024, "step": 121000 }, { "epoch": 0.5048985654797172, "grad_norm": 0.8910127553426884, "learning_rate": 2.8149076364043554e-06, "loss": 0.0275, "step": 121005 }, { "epoch": 0.5049194281947076, "grad_norm": 0.5968673072620224, "learning_rate": 2.8148494808805005e-06, "loss": 0.0162, "step": 121010 }, { "epoch": 0.5049402909096978, "grad_norm": 0.49278298362965967, "learning_rate": 2.814791328960947e-06, "loss": 0.0174, "step": 121015 }, { "epoch": 0.5049611536246881, "grad_norm": 0.6216321281802138, "learning_rate": 2.8147331806453226e-06, "loss": 0.0263, "step": 121020 }, { "epoch": 0.5049820163396784, "grad_norm": 0.7770674919398528, "learning_rate": 2.8146750359332555e-06, "loss": 0.0254, "step": 121025 }, { "epoch": 0.5050028790546687, "grad_norm": 0.7459328453170778, "learning_rate": 2.8146168948243725e-06, "loss": 0.0221, "step": 121030 }, { "epoch": 0.5050237417696589, "grad_norm": 0.4585845869510043, "learning_rate": 2.8145587573183026e-06, "loss": 0.0172, "step": 121035 }, { "epoch": 0.5050446044846492, "grad_norm": 0.5033224804440026, "learning_rate": 2.8145006234146728e-06, "loss": 0.0217, "step": 121040 }, { "epoch": 0.5050654671996395, "grad_norm": 1.3707207815267233, "learning_rate": 2.8144424931131115e-06, "loss": 0.0237, "step": 121045 }, { "epoch": 0.5050863299146298, "grad_norm": 0.7599574755495035, "learning_rate": 2.814384366413247e-06, "loss": 0.0289, "step": 121050 }, { "epoch": 0.50510719262962, "grad_norm": 1.2373603286622463, "learning_rate": 2.8143262433147065e-06, "loss": 0.0245, "step": 121055 }, { "epoch": 0.5051280553446104, "grad_norm": 0.6084593147749177, "learning_rate": 2.81426812381712e-06, "loss": 0.0191, "step": 121060 }, { "epoch": 0.5051489180596006, "grad_norm": 0.8931616609562464, "learning_rate": 2.8142100079201133e-06, "loss": 0.0305, "step": 121065 }, { "epoch": 0.5051697807745908, "grad_norm": 0.4755716346763479, "learning_rate": 2.8141518956233165e-06, "loss": 0.0215, "step": 121070 }, { "epoch": 0.5051906434895812, "grad_norm": 0.9623406056657874, "learning_rate": 2.8140937869263567e-06, "loss": 0.0289, "step": 121075 }, { "epoch": 0.5052115062045714, "grad_norm": 0.6138635166986738, "learning_rate": 2.8140356818288635e-06, "loss": 0.0327, "step": 121080 }, { "epoch": 0.5052323689195617, "grad_norm": 0.6247008751631068, "learning_rate": 2.813977580330464e-06, "loss": 0.0217, "step": 121085 }, { "epoch": 0.505253231634552, "grad_norm": 0.5256589455050649, "learning_rate": 2.813919482430787e-06, "loss": 0.0243, "step": 121090 }, { "epoch": 0.5052740943495423, "grad_norm": 0.8458341802578997, "learning_rate": 2.8138613881294618e-06, "loss": 0.0205, "step": 121095 }, { "epoch": 0.5052949570645325, "grad_norm": 0.7238964823379549, "learning_rate": 2.8138032974261165e-06, "loss": 0.024, "step": 121100 }, { "epoch": 0.5053158197795228, "grad_norm": 0.5005435433872711, "learning_rate": 2.813745210320379e-06, "loss": 0.0161, "step": 121105 }, { "epoch": 0.5053366824945131, "grad_norm": 0.8933889676781122, "learning_rate": 2.813687126811879e-06, "loss": 0.0423, "step": 121110 }, { "epoch": 0.5053575452095034, "grad_norm": 0.7068679761788604, "learning_rate": 2.8136290469002446e-06, "loss": 0.0238, "step": 121115 }, { "epoch": 0.5053784079244936, "grad_norm": 0.7813872169268878, "learning_rate": 2.813570970585105e-06, "loss": 0.0258, "step": 121120 }, { "epoch": 0.505399270639484, "grad_norm": 0.750654312710283, "learning_rate": 2.8135128978660886e-06, "loss": 0.0291, "step": 121125 }, { "epoch": 0.5054201333544742, "grad_norm": 0.8713128746455873, "learning_rate": 2.813454828742825e-06, "loss": 0.0279, "step": 121130 }, { "epoch": 0.5054409960694645, "grad_norm": 0.6821991572230364, "learning_rate": 2.813396763214943e-06, "loss": 0.0205, "step": 121135 }, { "epoch": 0.5054618587844548, "grad_norm": 0.8308603936638924, "learning_rate": 2.8133387012820708e-06, "loss": 0.0273, "step": 121140 }, { "epoch": 0.5054827214994451, "grad_norm": 0.7027671063639661, "learning_rate": 2.8132806429438375e-06, "loss": 0.0219, "step": 121145 }, { "epoch": 0.5055035842144353, "grad_norm": 0.828845314825747, "learning_rate": 2.8132225881998733e-06, "loss": 0.0203, "step": 121150 }, { "epoch": 0.5055244469294256, "grad_norm": 0.6607800098432205, "learning_rate": 2.813164537049806e-06, "loss": 0.0287, "step": 121155 }, { "epoch": 0.5055453096444159, "grad_norm": 0.8100230583122261, "learning_rate": 2.813106489493267e-06, "loss": 0.0262, "step": 121160 }, { "epoch": 0.5055661723594062, "grad_norm": 0.6485770241566221, "learning_rate": 2.813048445529883e-06, "loss": 0.0215, "step": 121165 }, { "epoch": 0.5055870350743964, "grad_norm": 0.3254305370293793, "learning_rate": 2.8129904051592844e-06, "loss": 0.0201, "step": 121170 }, { "epoch": 0.5056078977893868, "grad_norm": 0.6234272579537748, "learning_rate": 2.8129323683811003e-06, "loss": 0.0249, "step": 121175 }, { "epoch": 0.505628760504377, "grad_norm": 0.40703004361379075, "learning_rate": 2.812874335194961e-06, "loss": 0.0251, "step": 121180 }, { "epoch": 0.5056496232193672, "grad_norm": 0.8368355263025223, "learning_rate": 2.8128163056004957e-06, "loss": 0.0266, "step": 121185 }, { "epoch": 0.5056704859343576, "grad_norm": 0.6334402501030734, "learning_rate": 2.8127582795973335e-06, "loss": 0.019, "step": 121190 }, { "epoch": 0.5056913486493478, "grad_norm": 0.40463595450955697, "learning_rate": 2.8127002571851036e-06, "loss": 0.0266, "step": 121195 }, { "epoch": 0.5057122113643381, "grad_norm": 0.6271661649622698, "learning_rate": 2.8126422383634365e-06, "loss": 0.0241, "step": 121200 }, { "epoch": 0.5057330740793284, "grad_norm": 0.6436805905378176, "learning_rate": 2.812584223131961e-06, "loss": 0.0214, "step": 121205 }, { "epoch": 0.5057539367943187, "grad_norm": 1.3009880740536832, "learning_rate": 2.8125262114903085e-06, "loss": 0.0357, "step": 121210 }, { "epoch": 0.5057747995093089, "grad_norm": 0.7830896891712253, "learning_rate": 2.8124682034381068e-06, "loss": 0.0251, "step": 121215 }, { "epoch": 0.5057956622242992, "grad_norm": 0.7416635876631962, "learning_rate": 2.8124101989749873e-06, "loss": 0.0176, "step": 121220 }, { "epoch": 0.5058165249392895, "grad_norm": 1.023928576193004, "learning_rate": 2.8123521981005792e-06, "loss": 0.0216, "step": 121225 }, { "epoch": 0.5058373876542798, "grad_norm": 0.9493833531024946, "learning_rate": 2.8122942008145114e-06, "loss": 0.0342, "step": 121230 }, { "epoch": 0.50585825036927, "grad_norm": 0.8577099003004385, "learning_rate": 2.8122362071164167e-06, "loss": 0.0318, "step": 121235 }, { "epoch": 0.5058791130842604, "grad_norm": 1.030833702119932, "learning_rate": 2.8121782170059225e-06, "loss": 0.018, "step": 121240 }, { "epoch": 0.5058999757992506, "grad_norm": 0.5834269678629463, "learning_rate": 2.81212023048266e-06, "loss": 0.0221, "step": 121245 }, { "epoch": 0.5059208385142409, "grad_norm": 0.7932705944429234, "learning_rate": 2.8120622475462593e-06, "loss": 0.0233, "step": 121250 }, { "epoch": 0.5059417012292312, "grad_norm": 1.028157788694815, "learning_rate": 2.8120042681963513e-06, "loss": 0.0333, "step": 121255 }, { "epoch": 0.5059625639442215, "grad_norm": 0.9023571370942489, "learning_rate": 2.8119462924325652e-06, "loss": 0.0228, "step": 121260 }, { "epoch": 0.5059834266592117, "grad_norm": 0.4036779329315104, "learning_rate": 2.811888320254532e-06, "loss": 0.038, "step": 121265 }, { "epoch": 0.506004289374202, "grad_norm": 0.9853430077764485, "learning_rate": 2.8118303516618813e-06, "loss": 0.0239, "step": 121270 }, { "epoch": 0.5060251520891923, "grad_norm": 0.925525465916127, "learning_rate": 2.811772386654244e-06, "loss": 0.0247, "step": 121275 }, { "epoch": 0.5060460148041825, "grad_norm": 1.2327272689657487, "learning_rate": 2.8117144252312513e-06, "loss": 0.0226, "step": 121280 }, { "epoch": 0.5060668775191728, "grad_norm": 1.0508096296859506, "learning_rate": 2.8116564673925336e-06, "loss": 0.0268, "step": 121285 }, { "epoch": 0.5060877402341631, "grad_norm": 0.6521788945206834, "learning_rate": 2.8115985131377194e-06, "loss": 0.023, "step": 121290 }, { "epoch": 0.5061086029491534, "grad_norm": 0.6286330306713426, "learning_rate": 2.811540562466442e-06, "loss": 0.0278, "step": 121295 }, { "epoch": 0.5061294656641436, "grad_norm": 0.6739441677477475, "learning_rate": 2.8114826153783313e-06, "loss": 0.0222, "step": 121300 }, { "epoch": 0.506150328379134, "grad_norm": 1.0623649955234888, "learning_rate": 2.8114246718730175e-06, "loss": 0.026, "step": 121305 }, { "epoch": 0.5061711910941242, "grad_norm": 0.7358150044955409, "learning_rate": 2.811366731950132e-06, "loss": 0.03, "step": 121310 }, { "epoch": 0.5061920538091145, "grad_norm": 0.751494609778391, "learning_rate": 2.811308795609305e-06, "loss": 0.0227, "step": 121315 }, { "epoch": 0.5062129165241048, "grad_norm": 1.0709391988549966, "learning_rate": 2.811250862850168e-06, "loss": 0.0263, "step": 121320 }, { "epoch": 0.5062337792390951, "grad_norm": 0.32159715132133865, "learning_rate": 2.8111929336723527e-06, "loss": 0.0205, "step": 121325 }, { "epoch": 0.5062546419540853, "grad_norm": 0.703689237422724, "learning_rate": 2.8111350080754884e-06, "loss": 0.0252, "step": 121330 }, { "epoch": 0.5062755046690756, "grad_norm": 0.35782396331680005, "learning_rate": 2.8110770860592074e-06, "loss": 0.0281, "step": 121335 }, { "epoch": 0.5062963673840659, "grad_norm": 0.5280332268789387, "learning_rate": 2.81101916762314e-06, "loss": 0.0239, "step": 121340 }, { "epoch": 0.5063172300990562, "grad_norm": 0.9085353633760784, "learning_rate": 2.8109612527669184e-06, "loss": 0.0248, "step": 121345 }, { "epoch": 0.5063380928140464, "grad_norm": 0.6148943810187067, "learning_rate": 2.810903341490173e-06, "loss": 0.0267, "step": 121350 }, { "epoch": 0.5063589555290368, "grad_norm": 0.990809701103266, "learning_rate": 2.8108454337925355e-06, "loss": 0.0242, "step": 121355 }, { "epoch": 0.506379818244027, "grad_norm": 0.42360683386024245, "learning_rate": 2.8107875296736375e-06, "loss": 0.0431, "step": 121360 }, { "epoch": 0.5064006809590172, "grad_norm": 0.4758655022216242, "learning_rate": 2.81072962913311e-06, "loss": 0.0182, "step": 121365 }, { "epoch": 0.5064215436740076, "grad_norm": 0.5355705546689239, "learning_rate": 2.810671732170584e-06, "loss": 0.0233, "step": 121370 }, { "epoch": 0.5064424063889978, "grad_norm": 0.7456494819682895, "learning_rate": 2.810613838785692e-06, "loss": 0.0282, "step": 121375 }, { "epoch": 0.5064632691039881, "grad_norm": 2.24152769771457, "learning_rate": 2.810555948978065e-06, "loss": 0.031, "step": 121380 }, { "epoch": 0.5064841318189784, "grad_norm": 0.3777051685807171, "learning_rate": 2.810498062747334e-06, "loss": 0.0233, "step": 121385 }, { "epoch": 0.5065049945339687, "grad_norm": 0.48253925453239904, "learning_rate": 2.8104401800931324e-06, "loss": 0.0237, "step": 121390 }, { "epoch": 0.5065258572489589, "grad_norm": 1.1336618841362458, "learning_rate": 2.8103823010150906e-06, "loss": 0.0223, "step": 121395 }, { "epoch": 0.5065467199639492, "grad_norm": 0.8025683203253385, "learning_rate": 2.8103244255128403e-06, "loss": 0.0255, "step": 121400 }, { "epoch": 0.5065675826789395, "grad_norm": 1.937530131570433, "learning_rate": 2.810266553586014e-06, "loss": 0.0266, "step": 121405 }, { "epoch": 0.5065884453939298, "grad_norm": 0.6860852345206464, "learning_rate": 2.810208685234243e-06, "loss": 0.0316, "step": 121410 }, { "epoch": 0.50660930810892, "grad_norm": 0.7963712133052507, "learning_rate": 2.81015082045716e-06, "loss": 0.0238, "step": 121415 }, { "epoch": 0.5066301708239104, "grad_norm": 0.6879688352808266, "learning_rate": 2.8100929592543957e-06, "loss": 0.0203, "step": 121420 }, { "epoch": 0.5066510335389006, "grad_norm": 0.8918256651714209, "learning_rate": 2.8100351016255833e-06, "loss": 0.0212, "step": 121425 }, { "epoch": 0.5066718962538909, "grad_norm": 0.8511894964279482, "learning_rate": 2.8099772475703542e-06, "loss": 0.0209, "step": 121430 }, { "epoch": 0.5066927589688812, "grad_norm": 0.2844244510070752, "learning_rate": 2.8099193970883417e-06, "loss": 0.0183, "step": 121435 }, { "epoch": 0.5067136216838715, "grad_norm": 0.6264601960154009, "learning_rate": 2.8098615501791763e-06, "loss": 0.0341, "step": 121440 }, { "epoch": 0.5067344843988617, "grad_norm": 0.4635586494238538, "learning_rate": 2.8098037068424914e-06, "loss": 0.0248, "step": 121445 }, { "epoch": 0.5067553471138521, "grad_norm": 1.139314521322273, "learning_rate": 2.8097458670779186e-06, "loss": 0.0333, "step": 121450 }, { "epoch": 0.5067762098288423, "grad_norm": 0.676988796143076, "learning_rate": 2.809688030885091e-06, "loss": 0.0319, "step": 121455 }, { "epoch": 0.5067970725438325, "grad_norm": 0.5030941099156675, "learning_rate": 2.8096301982636414e-06, "loss": 0.0218, "step": 121460 }, { "epoch": 0.5068179352588228, "grad_norm": 0.5577094681364905, "learning_rate": 2.8095723692132005e-06, "loss": 0.0168, "step": 121465 }, { "epoch": 0.5068387979738131, "grad_norm": 0.5842929781315758, "learning_rate": 2.809514543733402e-06, "loss": 0.0236, "step": 121470 }, { "epoch": 0.5068596606888034, "grad_norm": 0.5657756117576357, "learning_rate": 2.809456721823879e-06, "loss": 0.0268, "step": 121475 }, { "epoch": 0.5068805234037936, "grad_norm": 0.8683366900326054, "learning_rate": 2.8093989034842624e-06, "loss": 0.0232, "step": 121480 }, { "epoch": 0.506901386118784, "grad_norm": 0.8844070103024879, "learning_rate": 2.8093410887141863e-06, "loss": 0.032, "step": 121485 }, { "epoch": 0.5069222488337742, "grad_norm": 0.5325582294009857, "learning_rate": 2.8092832775132835e-06, "loss": 0.0318, "step": 121490 }, { "epoch": 0.5069431115487645, "grad_norm": 0.6162923647975885, "learning_rate": 2.8092254698811854e-06, "loss": 0.0265, "step": 121495 }, { "epoch": 0.5069639742637548, "grad_norm": 0.929294303526708, "learning_rate": 2.8091676658175266e-06, "loss": 0.0304, "step": 121500 }, { "epoch": 0.5069848369787451, "grad_norm": 0.6667563926886058, "learning_rate": 2.8091098653219386e-06, "loss": 0.0279, "step": 121505 }, { "epoch": 0.5070056996937353, "grad_norm": 0.5795848800852483, "learning_rate": 2.8090520683940555e-06, "loss": 0.0214, "step": 121510 }, { "epoch": 0.5070265624087256, "grad_norm": 0.7576221512014959, "learning_rate": 2.808994275033509e-06, "loss": 0.0311, "step": 121515 }, { "epoch": 0.5070474251237159, "grad_norm": 0.817332188234155, "learning_rate": 2.8089364852399327e-06, "loss": 0.0238, "step": 121520 }, { "epoch": 0.5070682878387062, "grad_norm": 1.2004522992292284, "learning_rate": 2.8088786990129594e-06, "loss": 0.0275, "step": 121525 }, { "epoch": 0.5070891505536964, "grad_norm": 0.5226015703096469, "learning_rate": 2.8088209163522236e-06, "loss": 0.0235, "step": 121530 }, { "epoch": 0.5071100132686868, "grad_norm": 0.5344591380794906, "learning_rate": 2.8087631372573575e-06, "loss": 0.03, "step": 121535 }, { "epoch": 0.507130875983677, "grad_norm": 0.9392366964071837, "learning_rate": 2.808705361727994e-06, "loss": 0.0318, "step": 121540 }, { "epoch": 0.5071517386986673, "grad_norm": 0.7691956474308425, "learning_rate": 2.8086475897637673e-06, "loss": 0.018, "step": 121545 }, { "epoch": 0.5071726014136576, "grad_norm": 1.2400503550951463, "learning_rate": 2.8085898213643096e-06, "loss": 0.0254, "step": 121550 }, { "epoch": 0.5071934641286479, "grad_norm": 0.5897892260080171, "learning_rate": 2.808532056529255e-06, "loss": 0.0269, "step": 121555 }, { "epoch": 0.5072143268436381, "grad_norm": 0.5575342247899784, "learning_rate": 2.8084742952582374e-06, "loss": 0.0146, "step": 121560 }, { "epoch": 0.5072351895586285, "grad_norm": 0.32656135784138446, "learning_rate": 2.8084165375508897e-06, "loss": 0.0216, "step": 121565 }, { "epoch": 0.5072560522736187, "grad_norm": 2.5081437883797992, "learning_rate": 2.8083587834068453e-06, "loss": 0.0257, "step": 121570 }, { "epoch": 0.5072769149886089, "grad_norm": 0.7714110471109668, "learning_rate": 2.8083010328257388e-06, "loss": 0.0209, "step": 121575 }, { "epoch": 0.5072977777035992, "grad_norm": 0.7188023140073638, "learning_rate": 2.8082432858072028e-06, "loss": 0.0195, "step": 121580 }, { "epoch": 0.5073186404185895, "grad_norm": 1.3079322733276766, "learning_rate": 2.8081855423508715e-06, "loss": 0.0259, "step": 121585 }, { "epoch": 0.5073395031335798, "grad_norm": 0.5659569268594608, "learning_rate": 2.8081278024563785e-06, "loss": 0.026, "step": 121590 }, { "epoch": 0.50736036584857, "grad_norm": 0.6185905692057301, "learning_rate": 2.8080700661233584e-06, "loss": 0.0182, "step": 121595 }, { "epoch": 0.5073812285635604, "grad_norm": 0.6406604405132635, "learning_rate": 2.8080123333514435e-06, "loss": 0.0325, "step": 121600 }, { "epoch": 0.5074020912785506, "grad_norm": 0.5721789824149471, "learning_rate": 2.8079546041402696e-06, "loss": 0.0243, "step": 121605 }, { "epoch": 0.5074229539935409, "grad_norm": 1.2800393457357984, "learning_rate": 2.8078968784894696e-06, "loss": 0.032, "step": 121610 }, { "epoch": 0.5074438167085312, "grad_norm": 0.8434542920564579, "learning_rate": 2.8078391563986774e-06, "loss": 0.0218, "step": 121615 }, { "epoch": 0.5074646794235215, "grad_norm": 0.5188345130259548, "learning_rate": 2.8077814378675277e-06, "loss": 0.0199, "step": 121620 }, { "epoch": 0.5074855421385117, "grad_norm": 0.6072768206237186, "learning_rate": 2.807723722895654e-06, "loss": 0.0255, "step": 121625 }, { "epoch": 0.5075064048535021, "grad_norm": 0.6119374542687441, "learning_rate": 2.8076660114826913e-06, "loss": 0.0246, "step": 121630 }, { "epoch": 0.5075272675684923, "grad_norm": 0.9329631837168325, "learning_rate": 2.807608303628273e-06, "loss": 0.0267, "step": 121635 }, { "epoch": 0.5075481302834826, "grad_norm": 1.0782598634717926, "learning_rate": 2.8075505993320353e-06, "loss": 0.0285, "step": 121640 }, { "epoch": 0.5075689929984728, "grad_norm": 0.8046164727853895, "learning_rate": 2.8074928985936097e-06, "loss": 0.0206, "step": 121645 }, { "epoch": 0.5075898557134632, "grad_norm": 0.8487034986594182, "learning_rate": 2.8074352014126326e-06, "loss": 0.0261, "step": 121650 }, { "epoch": 0.5076107184284534, "grad_norm": 0.8029442743064333, "learning_rate": 2.807377507788737e-06, "loss": 0.041, "step": 121655 }, { "epoch": 0.5076315811434436, "grad_norm": 0.9862628158244323, "learning_rate": 2.80731981772156e-06, "loss": 0.0378, "step": 121660 }, { "epoch": 0.507652443858434, "grad_norm": 0.9081361708988078, "learning_rate": 2.807262131210733e-06, "loss": 0.0278, "step": 121665 }, { "epoch": 0.5076733065734242, "grad_norm": 0.6890737183535313, "learning_rate": 2.8072044482558925e-06, "loss": 0.0246, "step": 121670 }, { "epoch": 0.5076941692884145, "grad_norm": 0.9080649255217778, "learning_rate": 2.8071467688566735e-06, "loss": 0.0221, "step": 121675 }, { "epoch": 0.5077150320034048, "grad_norm": 0.5985563357673382, "learning_rate": 2.8070890930127092e-06, "loss": 0.0165, "step": 121680 }, { "epoch": 0.5077358947183951, "grad_norm": 0.5774399054027849, "learning_rate": 2.807031420723636e-06, "loss": 0.0232, "step": 121685 }, { "epoch": 0.5077567574333853, "grad_norm": 1.3695136553642486, "learning_rate": 2.8069737519890877e-06, "loss": 0.0282, "step": 121690 }, { "epoch": 0.5077776201483756, "grad_norm": 0.668761282945551, "learning_rate": 2.806916086808699e-06, "loss": 0.0238, "step": 121695 }, { "epoch": 0.5077984828633659, "grad_norm": 0.8094931275789309, "learning_rate": 2.8068584251821058e-06, "loss": 0.0269, "step": 121700 }, { "epoch": 0.5078193455783562, "grad_norm": 0.6463515080781332, "learning_rate": 2.806800767108942e-06, "loss": 0.029, "step": 121705 }, { "epoch": 0.5078402082933464, "grad_norm": 0.44305448057776714, "learning_rate": 2.806743112588843e-06, "loss": 0.0232, "step": 121710 }, { "epoch": 0.5078610710083368, "grad_norm": 0.7923751528970873, "learning_rate": 2.8066854616214455e-06, "loss": 0.0267, "step": 121715 }, { "epoch": 0.507881933723327, "grad_norm": 0.5242414491254938, "learning_rate": 2.8066278142063824e-06, "loss": 0.0357, "step": 121720 }, { "epoch": 0.5079027964383173, "grad_norm": 1.9940351116356003, "learning_rate": 2.8065701703432897e-06, "loss": 0.0305, "step": 121725 }, { "epoch": 0.5079236591533076, "grad_norm": 0.26722280957482886, "learning_rate": 2.8065125300318023e-06, "loss": 0.018, "step": 121730 }, { "epoch": 0.5079445218682979, "grad_norm": 0.7331267113404519, "learning_rate": 2.806454893271557e-06, "loss": 0.0271, "step": 121735 }, { "epoch": 0.5079653845832881, "grad_norm": 0.6505668477987854, "learning_rate": 2.8063972600621876e-06, "loss": 0.0268, "step": 121740 }, { "epoch": 0.5079862472982785, "grad_norm": 0.4257737481212697, "learning_rate": 2.8063396304033297e-06, "loss": 0.0189, "step": 121745 }, { "epoch": 0.5080071100132687, "grad_norm": 0.6282474296863102, "learning_rate": 2.8062820042946193e-06, "loss": 0.0288, "step": 121750 }, { "epoch": 0.508027972728259, "grad_norm": 0.7110903717451732, "learning_rate": 2.8062243817356915e-06, "loss": 0.0202, "step": 121755 }, { "epoch": 0.5080488354432492, "grad_norm": 0.8722814239389649, "learning_rate": 2.8061667627261824e-06, "loss": 0.0241, "step": 121760 }, { "epoch": 0.5080696981582395, "grad_norm": 0.5271268294354632, "learning_rate": 2.806109147265727e-06, "loss": 0.0348, "step": 121765 }, { "epoch": 0.5080905608732298, "grad_norm": 0.4878349902992547, "learning_rate": 2.8060515353539613e-06, "loss": 0.0177, "step": 121770 }, { "epoch": 0.50811142358822, "grad_norm": 0.889642973368219, "learning_rate": 2.8059939269905213e-06, "loss": 0.0313, "step": 121775 }, { "epoch": 0.5081322863032104, "grad_norm": 0.9698513932122669, "learning_rate": 2.805936322175042e-06, "loss": 0.0231, "step": 121780 }, { "epoch": 0.5081531490182006, "grad_norm": 0.6777172987425537, "learning_rate": 2.8058787209071596e-06, "loss": 0.0211, "step": 121785 }, { "epoch": 0.5081740117331909, "grad_norm": 0.6770637117926054, "learning_rate": 2.8058211231865107e-06, "loss": 0.0256, "step": 121790 }, { "epoch": 0.5081948744481812, "grad_norm": 0.850512783809435, "learning_rate": 2.80576352901273e-06, "loss": 0.0235, "step": 121795 }, { "epoch": 0.5082157371631715, "grad_norm": 0.9727743639211825, "learning_rate": 2.805705938385454e-06, "loss": 0.0331, "step": 121800 }, { "epoch": 0.5082365998781617, "grad_norm": 0.7294620735133781, "learning_rate": 2.805648351304319e-06, "loss": 0.0208, "step": 121805 }, { "epoch": 0.5082574625931521, "grad_norm": 0.7591773667945725, "learning_rate": 2.8055907677689614e-06, "loss": 0.0255, "step": 121810 }, { "epoch": 0.5082783253081423, "grad_norm": 0.5548119273729606, "learning_rate": 2.8055331877790165e-06, "loss": 0.02, "step": 121815 }, { "epoch": 0.5082991880231326, "grad_norm": 0.42925516468788244, "learning_rate": 2.805475611334121e-06, "loss": 0.0227, "step": 121820 }, { "epoch": 0.5083200507381228, "grad_norm": 0.6627680376716341, "learning_rate": 2.8054180384339107e-06, "loss": 0.0224, "step": 121825 }, { "epoch": 0.5083409134531132, "grad_norm": 0.7608743328221395, "learning_rate": 2.8053604690780222e-06, "loss": 0.0251, "step": 121830 }, { "epoch": 0.5083617761681034, "grad_norm": 0.904059721238683, "learning_rate": 2.805302903266092e-06, "loss": 0.0245, "step": 121835 }, { "epoch": 0.5083826388830937, "grad_norm": 0.45522351323669025, "learning_rate": 2.8052453409977565e-06, "loss": 0.023, "step": 121840 }, { "epoch": 0.508403501598084, "grad_norm": 1.1564833262869374, "learning_rate": 2.805187782272652e-06, "loss": 0.0289, "step": 121845 }, { "epoch": 0.5084243643130743, "grad_norm": 0.89970465811837, "learning_rate": 2.8051302270904154e-06, "loss": 0.0279, "step": 121850 }, { "epoch": 0.5084452270280645, "grad_norm": 0.47788978293795553, "learning_rate": 2.8050726754506822e-06, "loss": 0.0237, "step": 121855 }, { "epoch": 0.5084660897430548, "grad_norm": 0.48103098387761684, "learning_rate": 2.80501512735309e-06, "loss": 0.0273, "step": 121860 }, { "epoch": 0.5084869524580451, "grad_norm": 0.6404299854747472, "learning_rate": 2.804957582797275e-06, "loss": 0.0236, "step": 121865 }, { "epoch": 0.5085078151730353, "grad_norm": 0.6104254156962664, "learning_rate": 2.804900041782875e-06, "loss": 0.0252, "step": 121870 }, { "epoch": 0.5085286778880256, "grad_norm": 0.6057350703953952, "learning_rate": 2.8048425043095247e-06, "loss": 0.0226, "step": 121875 }, { "epoch": 0.5085495406030159, "grad_norm": 1.4513809618038775, "learning_rate": 2.8047849703768625e-06, "loss": 0.0342, "step": 121880 }, { "epoch": 0.5085704033180062, "grad_norm": 0.4180773684527384, "learning_rate": 2.8047274399845247e-06, "loss": 0.0269, "step": 121885 }, { "epoch": 0.5085912660329964, "grad_norm": 0.3384787414789105, "learning_rate": 2.8046699131321486e-06, "loss": 0.0284, "step": 121890 }, { "epoch": 0.5086121287479868, "grad_norm": 0.778194725720919, "learning_rate": 2.8046123898193716e-06, "loss": 0.0274, "step": 121895 }, { "epoch": 0.508632991462977, "grad_norm": 0.5714338398465209, "learning_rate": 2.8045548700458293e-06, "loss": 0.0197, "step": 121900 }, { "epoch": 0.5086538541779673, "grad_norm": 0.538142856696804, "learning_rate": 2.80449735381116e-06, "loss": 0.0216, "step": 121905 }, { "epoch": 0.5086747168929576, "grad_norm": 0.4418957350793191, "learning_rate": 2.804439841115e-06, "loss": 0.0314, "step": 121910 }, { "epoch": 0.5086955796079479, "grad_norm": 0.5566404917781761, "learning_rate": 2.8043823319569873e-06, "loss": 0.0255, "step": 121915 }, { "epoch": 0.5087164423229381, "grad_norm": 0.47002120108825973, "learning_rate": 2.8043248263367587e-06, "loss": 0.019, "step": 121920 }, { "epoch": 0.5087373050379285, "grad_norm": 0.6689740519650816, "learning_rate": 2.8042673242539515e-06, "loss": 0.0178, "step": 121925 }, { "epoch": 0.5087581677529187, "grad_norm": 0.6238479613661115, "learning_rate": 2.8042098257082035e-06, "loss": 0.026, "step": 121930 }, { "epoch": 0.508779030467909, "grad_norm": 1.2849427233358997, "learning_rate": 2.804152330699151e-06, "loss": 0.0243, "step": 121935 }, { "epoch": 0.5087998931828992, "grad_norm": 0.6177349345166452, "learning_rate": 2.8040948392264326e-06, "loss": 0.0269, "step": 121940 }, { "epoch": 0.5088207558978896, "grad_norm": 0.4566967590291808, "learning_rate": 2.804037351289685e-06, "loss": 0.0252, "step": 121945 }, { "epoch": 0.5088416186128798, "grad_norm": 0.5195442718562894, "learning_rate": 2.8039798668885466e-06, "loss": 0.0244, "step": 121950 }, { "epoch": 0.50886248132787, "grad_norm": 0.5264609697503411, "learning_rate": 2.803922386022654e-06, "loss": 0.0219, "step": 121955 }, { "epoch": 0.5088833440428604, "grad_norm": 0.5483852116983357, "learning_rate": 2.8038649086916457e-06, "loss": 0.0254, "step": 121960 }, { "epoch": 0.5089042067578506, "grad_norm": 0.6921732075178654, "learning_rate": 2.8038074348951586e-06, "loss": 0.0289, "step": 121965 }, { "epoch": 0.5089250694728409, "grad_norm": 0.29561917370176566, "learning_rate": 2.8037499646328314e-06, "loss": 0.0273, "step": 121970 }, { "epoch": 0.5089459321878312, "grad_norm": 0.6819857903961906, "learning_rate": 2.8036924979043014e-06, "loss": 0.0324, "step": 121975 }, { "epoch": 0.5089667949028215, "grad_norm": 0.9063069645377199, "learning_rate": 2.8036350347092067e-06, "loss": 0.0246, "step": 121980 }, { "epoch": 0.5089876576178117, "grad_norm": 0.8487508735593423, "learning_rate": 2.8035775750471846e-06, "loss": 0.0303, "step": 121985 }, { "epoch": 0.5090085203328021, "grad_norm": 0.6455443750413193, "learning_rate": 2.8035201189178735e-06, "loss": 0.0292, "step": 121990 }, { "epoch": 0.5090293830477923, "grad_norm": 0.6765025839145512, "learning_rate": 2.8034626663209114e-06, "loss": 0.0183, "step": 121995 }, { "epoch": 0.5090502457627826, "grad_norm": 0.38651213313104005, "learning_rate": 2.803405217255937e-06, "loss": 0.0198, "step": 122000 }, { "epoch": 0.5090711084777728, "grad_norm": 0.6581827479759171, "learning_rate": 2.803347771722587e-06, "loss": 0.0263, "step": 122005 }, { "epoch": 0.5090919711927632, "grad_norm": 0.9271425432115251, "learning_rate": 2.8032903297205005e-06, "loss": 0.0316, "step": 122010 }, { "epoch": 0.5091128339077534, "grad_norm": 0.25264415973892224, "learning_rate": 2.803232891249316e-06, "loss": 0.0233, "step": 122015 }, { "epoch": 0.5091336966227437, "grad_norm": 1.1178057936328174, "learning_rate": 2.803175456308671e-06, "loss": 0.0287, "step": 122020 }, { "epoch": 0.509154559337734, "grad_norm": 0.35028547626978723, "learning_rate": 2.803118024898205e-06, "loss": 0.0171, "step": 122025 }, { "epoch": 0.5091754220527243, "grad_norm": 0.571822356937, "learning_rate": 2.803060597017555e-06, "loss": 0.028, "step": 122030 }, { "epoch": 0.5091962847677145, "grad_norm": 0.6312872140345467, "learning_rate": 2.8030031726663597e-06, "loss": 0.0234, "step": 122035 }, { "epoch": 0.5092171474827049, "grad_norm": 0.6885747669870976, "learning_rate": 2.802945751844259e-06, "loss": 0.0246, "step": 122040 }, { "epoch": 0.5092380101976951, "grad_norm": 0.8115229390567624, "learning_rate": 2.8028883345508896e-06, "loss": 0.03, "step": 122045 }, { "epoch": 0.5092588729126853, "grad_norm": 0.5588811689942196, "learning_rate": 2.8028309207858907e-06, "loss": 0.0222, "step": 122050 }, { "epoch": 0.5092797356276756, "grad_norm": 1.0004032297462606, "learning_rate": 2.8027735105489014e-06, "loss": 0.0215, "step": 122055 }, { "epoch": 0.5093005983426659, "grad_norm": 0.5402974470322526, "learning_rate": 2.80271610383956e-06, "loss": 0.0184, "step": 122060 }, { "epoch": 0.5093214610576562, "grad_norm": 0.5291955813901376, "learning_rate": 2.8026587006575055e-06, "loss": 0.0183, "step": 122065 }, { "epoch": 0.5093423237726464, "grad_norm": 0.7489754800724518, "learning_rate": 2.802601301002377e-06, "loss": 0.0242, "step": 122070 }, { "epoch": 0.5093631864876368, "grad_norm": 0.5408567843563903, "learning_rate": 2.8025439048738118e-06, "loss": 0.0307, "step": 122075 }, { "epoch": 0.509384049202627, "grad_norm": 0.5973687584651151, "learning_rate": 2.802486512271451e-06, "loss": 0.0215, "step": 122080 }, { "epoch": 0.5094049119176173, "grad_norm": 0.5957061865660149, "learning_rate": 2.8024291231949314e-06, "loss": 0.031, "step": 122085 }, { "epoch": 0.5094257746326076, "grad_norm": 0.7995283030170115, "learning_rate": 2.8023717376438936e-06, "loss": 0.0227, "step": 122090 }, { "epoch": 0.5094466373475979, "grad_norm": 1.4993936466628013, "learning_rate": 2.802314355617976e-06, "loss": 0.0284, "step": 122095 }, { "epoch": 0.5094675000625881, "grad_norm": 0.4927620696383747, "learning_rate": 2.802256977116818e-06, "loss": 0.0219, "step": 122100 }, { "epoch": 0.5094883627775785, "grad_norm": 0.9598537458707179, "learning_rate": 2.8021996021400582e-06, "loss": 0.0231, "step": 122105 }, { "epoch": 0.5095092254925687, "grad_norm": 0.9742498713469623, "learning_rate": 2.8021422306873362e-06, "loss": 0.0276, "step": 122110 }, { "epoch": 0.509530088207559, "grad_norm": 0.46336294553527085, "learning_rate": 2.802084862758292e-06, "loss": 0.0245, "step": 122115 }, { "epoch": 0.5095509509225492, "grad_norm": 0.2989834991083524, "learning_rate": 2.802027498352563e-06, "loss": 0.0223, "step": 122120 }, { "epoch": 0.5095718136375396, "grad_norm": 0.6325996708817692, "learning_rate": 2.801970137469791e-06, "loss": 0.0195, "step": 122125 }, { "epoch": 0.5095926763525298, "grad_norm": 0.9151167443391598, "learning_rate": 2.8019127801096137e-06, "loss": 0.0197, "step": 122130 }, { "epoch": 0.50961353906752, "grad_norm": 0.3566221722355801, "learning_rate": 2.8018554262716707e-06, "loss": 0.0201, "step": 122135 }, { "epoch": 0.5096344017825104, "grad_norm": 0.7151424728145326, "learning_rate": 2.8017980759556017e-06, "loss": 0.0271, "step": 122140 }, { "epoch": 0.5096552644975006, "grad_norm": 0.624208960103086, "learning_rate": 2.8017407291610473e-06, "loss": 0.0187, "step": 122145 }, { "epoch": 0.5096761272124909, "grad_norm": 0.3489886554000397, "learning_rate": 2.8016833858876456e-06, "loss": 0.0236, "step": 122150 }, { "epoch": 0.5096969899274812, "grad_norm": 0.580538299070867, "learning_rate": 2.8016260461350368e-06, "loss": 0.0287, "step": 122155 }, { "epoch": 0.5097178526424715, "grad_norm": 0.6253534296385367, "learning_rate": 2.801568709902861e-06, "loss": 0.0277, "step": 122160 }, { "epoch": 0.5097387153574617, "grad_norm": 0.4471965542657839, "learning_rate": 2.801511377190758e-06, "loss": 0.0228, "step": 122165 }, { "epoch": 0.5097595780724521, "grad_norm": 0.5510221796208636, "learning_rate": 2.801454047998367e-06, "loss": 0.0186, "step": 122170 }, { "epoch": 0.5097804407874423, "grad_norm": 0.5163718455096615, "learning_rate": 2.8013967223253286e-06, "loss": 0.0361, "step": 122175 }, { "epoch": 0.5098013035024326, "grad_norm": 0.47710144736929383, "learning_rate": 2.801339400171282e-06, "loss": 0.0166, "step": 122180 }, { "epoch": 0.5098221662174228, "grad_norm": 1.240491666265262, "learning_rate": 2.801282081535868e-06, "loss": 0.0219, "step": 122185 }, { "epoch": 0.5098430289324132, "grad_norm": 0.717111733803219, "learning_rate": 2.8012247664187255e-06, "loss": 0.0187, "step": 122190 }, { "epoch": 0.5098638916474034, "grad_norm": 1.1246705535426877, "learning_rate": 2.801167454819496e-06, "loss": 0.0197, "step": 122195 }, { "epoch": 0.5098847543623937, "grad_norm": 1.800036878905826, "learning_rate": 2.801110146737818e-06, "loss": 0.0356, "step": 122200 }, { "epoch": 0.509905617077384, "grad_norm": 0.917674181238289, "learning_rate": 2.8010528421733343e-06, "loss": 0.0242, "step": 122205 }, { "epoch": 0.5099264797923743, "grad_norm": 0.49402887754651975, "learning_rate": 2.800995541125682e-06, "loss": 0.0239, "step": 122210 }, { "epoch": 0.5099473425073645, "grad_norm": 0.5722892832764814, "learning_rate": 2.8009382435945037e-06, "loss": 0.0206, "step": 122215 }, { "epoch": 0.5099682052223549, "grad_norm": 0.4594372054840582, "learning_rate": 2.8008809495794387e-06, "loss": 0.0173, "step": 122220 }, { "epoch": 0.5099890679373451, "grad_norm": 0.8797941492860357, "learning_rate": 2.8008236590801275e-06, "loss": 0.0292, "step": 122225 }, { "epoch": 0.5100099306523354, "grad_norm": 0.6116866238654384, "learning_rate": 2.800766372096211e-06, "loss": 0.0199, "step": 122230 }, { "epoch": 0.5100307933673256, "grad_norm": 0.8373036305683002, "learning_rate": 2.8007090886273288e-06, "loss": 0.0342, "step": 122235 }, { "epoch": 0.510051656082316, "grad_norm": 0.5077760354724521, "learning_rate": 2.800651808673122e-06, "loss": 0.019, "step": 122240 }, { "epoch": 0.5100725187973062, "grad_norm": 0.45567520269420925, "learning_rate": 2.800594532233232e-06, "loss": 0.0245, "step": 122245 }, { "epoch": 0.5100933815122964, "grad_norm": 0.4883558524922146, "learning_rate": 2.8005372593072983e-06, "loss": 0.0223, "step": 122250 }, { "epoch": 0.5101142442272868, "grad_norm": 0.5112751533368346, "learning_rate": 2.800479989894962e-06, "loss": 0.0197, "step": 122255 }, { "epoch": 0.510135106942277, "grad_norm": 0.3709211516684981, "learning_rate": 2.8004227239958637e-06, "loss": 0.0173, "step": 122260 }, { "epoch": 0.5101559696572673, "grad_norm": 1.123871037910711, "learning_rate": 2.8003654616096446e-06, "loss": 0.0198, "step": 122265 }, { "epoch": 0.5101768323722576, "grad_norm": 0.7789500633224189, "learning_rate": 2.800308202735945e-06, "loss": 0.0258, "step": 122270 }, { "epoch": 0.5101976950872479, "grad_norm": 1.2951796622532943, "learning_rate": 2.800250947374406e-06, "loss": 0.0299, "step": 122275 }, { "epoch": 0.5102185578022381, "grad_norm": 0.5826425953470826, "learning_rate": 2.80019369552467e-06, "loss": 0.0259, "step": 122280 }, { "epoch": 0.5102394205172285, "grad_norm": 0.7298486526910065, "learning_rate": 2.800136447186375e-06, "loss": 0.0409, "step": 122285 }, { "epoch": 0.5102602832322187, "grad_norm": 0.591855566009977, "learning_rate": 2.800079202359165e-06, "loss": 0.0283, "step": 122290 }, { "epoch": 0.510281145947209, "grad_norm": 0.8662834610024042, "learning_rate": 2.8000219610426793e-06, "loss": 0.0305, "step": 122295 }, { "epoch": 0.5103020086621992, "grad_norm": 0.7762905383521495, "learning_rate": 2.79996472323656e-06, "loss": 0.0319, "step": 122300 }, { "epoch": 0.5103228713771896, "grad_norm": 0.5359684904419683, "learning_rate": 2.799907488940448e-06, "loss": 0.0289, "step": 122305 }, { "epoch": 0.5103437340921798, "grad_norm": 0.7084473286042818, "learning_rate": 2.7998502581539845e-06, "loss": 0.024, "step": 122310 }, { "epoch": 0.5103645968071701, "grad_norm": 1.017412754271035, "learning_rate": 2.7997930308768106e-06, "loss": 0.0302, "step": 122315 }, { "epoch": 0.5103854595221604, "grad_norm": 1.6917700358302987, "learning_rate": 2.7997358071085685e-06, "loss": 0.0331, "step": 122320 }, { "epoch": 0.5104063222371507, "grad_norm": 0.7995783117214763, "learning_rate": 2.799678586848899e-06, "loss": 0.0253, "step": 122325 }, { "epoch": 0.5104271849521409, "grad_norm": 1.094848220668294, "learning_rate": 2.7996213700974436e-06, "loss": 0.0269, "step": 122330 }, { "epoch": 0.5104480476671313, "grad_norm": 0.8798912352466067, "learning_rate": 2.7995641568538434e-06, "loss": 0.0203, "step": 122335 }, { "epoch": 0.5104689103821215, "grad_norm": 0.9251482562813623, "learning_rate": 2.7995069471177418e-06, "loss": 0.027, "step": 122340 }, { "epoch": 0.5104897730971117, "grad_norm": 0.4766395268273469, "learning_rate": 2.7994497408887775e-06, "loss": 0.02, "step": 122345 }, { "epoch": 0.5105106358121021, "grad_norm": 0.9580231068715596, "learning_rate": 2.799392538166595e-06, "loss": 0.0324, "step": 122350 }, { "epoch": 0.5105314985270923, "grad_norm": 0.7983368700775249, "learning_rate": 2.7993353389508344e-06, "loss": 0.0305, "step": 122355 }, { "epoch": 0.5105523612420826, "grad_norm": 1.385009898589106, "learning_rate": 2.799278143241138e-06, "loss": 0.0235, "step": 122360 }, { "epoch": 0.5105732239570728, "grad_norm": 0.7535460282282279, "learning_rate": 2.7992209510371476e-06, "loss": 0.0269, "step": 122365 }, { "epoch": 0.5105940866720632, "grad_norm": 0.5066136496447505, "learning_rate": 2.7991637623385047e-06, "loss": 0.0247, "step": 122370 }, { "epoch": 0.5106149493870534, "grad_norm": 0.532096120381045, "learning_rate": 2.7991065771448524e-06, "loss": 0.0262, "step": 122375 }, { "epoch": 0.5106358121020437, "grad_norm": 0.7113562591250446, "learning_rate": 2.799049395455831e-06, "loss": 0.0182, "step": 122380 }, { "epoch": 0.510656674817034, "grad_norm": 0.5108162152126138, "learning_rate": 2.798992217271084e-06, "loss": 0.0219, "step": 122385 }, { "epoch": 0.5106775375320243, "grad_norm": 0.8072378849556595, "learning_rate": 2.7989350425902526e-06, "loss": 0.0259, "step": 122390 }, { "epoch": 0.5106984002470145, "grad_norm": 0.5824972391470497, "learning_rate": 2.7988778714129793e-06, "loss": 0.0185, "step": 122395 }, { "epoch": 0.5107192629620049, "grad_norm": 1.3401463610312172, "learning_rate": 2.7988207037389063e-06, "loss": 0.0305, "step": 122400 }, { "epoch": 0.5107401256769951, "grad_norm": 0.437617706939729, "learning_rate": 2.798763539567676e-06, "loss": 0.0303, "step": 122405 }, { "epoch": 0.5107609883919854, "grad_norm": 0.8494179870081726, "learning_rate": 2.7987063788989307e-06, "loss": 0.022, "step": 122410 }, { "epoch": 0.5107818511069756, "grad_norm": 0.5698204798375713, "learning_rate": 2.798649221732312e-06, "loss": 0.0224, "step": 122415 }, { "epoch": 0.510802713821966, "grad_norm": 0.7384241093118619, "learning_rate": 2.7985920680674634e-06, "loss": 0.0242, "step": 122420 }, { "epoch": 0.5108235765369562, "grad_norm": 0.6268487693262945, "learning_rate": 2.7985349179040263e-06, "loss": 0.0252, "step": 122425 }, { "epoch": 0.5108444392519464, "grad_norm": 0.47353508959114177, "learning_rate": 2.798477771241644e-06, "loss": 0.0213, "step": 122430 }, { "epoch": 0.5108653019669368, "grad_norm": 0.2876464647138192, "learning_rate": 2.7984206280799583e-06, "loss": 0.0183, "step": 122435 }, { "epoch": 0.510886164681927, "grad_norm": 0.26504174554497845, "learning_rate": 2.7983634884186124e-06, "loss": 0.0209, "step": 122440 }, { "epoch": 0.5109070273969173, "grad_norm": 1.3056745753278975, "learning_rate": 2.798306352257249e-06, "loss": 0.0313, "step": 122445 }, { "epoch": 0.5109278901119076, "grad_norm": 0.3898128193798127, "learning_rate": 2.7982492195955113e-06, "loss": 0.022, "step": 122450 }, { "epoch": 0.5109487528268979, "grad_norm": 0.5271465548125907, "learning_rate": 2.79819209043304e-06, "loss": 0.0252, "step": 122455 }, { "epoch": 0.5109696155418881, "grad_norm": 0.5257460094908157, "learning_rate": 2.7981349647694804e-06, "loss": 0.0163, "step": 122460 }, { "epoch": 0.5109904782568785, "grad_norm": 0.7166565616786014, "learning_rate": 2.7980778426044734e-06, "loss": 0.0193, "step": 122465 }, { "epoch": 0.5110113409718687, "grad_norm": 0.9377371149489513, "learning_rate": 2.798020723937664e-06, "loss": 0.0205, "step": 122470 }, { "epoch": 0.511032203686859, "grad_norm": 0.8539684956223528, "learning_rate": 2.797963608768693e-06, "loss": 0.025, "step": 122475 }, { "epoch": 0.5110530664018492, "grad_norm": 1.0570768770167598, "learning_rate": 2.7979064970972043e-06, "loss": 0.0245, "step": 122480 }, { "epoch": 0.5110739291168396, "grad_norm": 1.2319337073257617, "learning_rate": 2.7978493889228413e-06, "loss": 0.0263, "step": 122485 }, { "epoch": 0.5110947918318298, "grad_norm": 0.9869199905801798, "learning_rate": 2.7977922842452466e-06, "loss": 0.0232, "step": 122490 }, { "epoch": 0.5111156545468201, "grad_norm": 0.42326912133450867, "learning_rate": 2.797735183064064e-06, "loss": 0.0193, "step": 122495 }, { "epoch": 0.5111365172618104, "grad_norm": 1.5661796503277312, "learning_rate": 2.7976780853789354e-06, "loss": 0.0268, "step": 122500 }, { "epoch": 0.5111573799768007, "grad_norm": 0.5334034349311139, "learning_rate": 2.797620991189506e-06, "loss": 0.02, "step": 122505 }, { "epoch": 0.5111782426917909, "grad_norm": 0.7391844265045353, "learning_rate": 2.7975639004954173e-06, "loss": 0.0214, "step": 122510 }, { "epoch": 0.5111991054067813, "grad_norm": 0.5100386688438489, "learning_rate": 2.797506813296314e-06, "loss": 0.0194, "step": 122515 }, { "epoch": 0.5112199681217715, "grad_norm": 0.6498714756203242, "learning_rate": 2.797449729591839e-06, "loss": 0.0308, "step": 122520 }, { "epoch": 0.5112408308367618, "grad_norm": 1.3473619086732438, "learning_rate": 2.7973926493816354e-06, "loss": 0.0324, "step": 122525 }, { "epoch": 0.5112616935517521, "grad_norm": 0.7760572141372376, "learning_rate": 2.797335572665347e-06, "loss": 0.0277, "step": 122530 }, { "epoch": 0.5112825562667423, "grad_norm": 0.7739547867746738, "learning_rate": 2.797278499442618e-06, "loss": 0.0226, "step": 122535 }, { "epoch": 0.5113034189817326, "grad_norm": 0.7435654449208098, "learning_rate": 2.7972214297130906e-06, "loss": 0.026, "step": 122540 }, { "epoch": 0.5113242816967228, "grad_norm": 0.7348089371217276, "learning_rate": 2.797164363476411e-06, "loss": 0.0205, "step": 122545 }, { "epoch": 0.5113451444117132, "grad_norm": 0.44980344504075637, "learning_rate": 2.797107300732219e-06, "loss": 0.0236, "step": 122550 }, { "epoch": 0.5113660071267034, "grad_norm": 1.6123678716498164, "learning_rate": 2.797050241480162e-06, "loss": 0.0314, "step": 122555 }, { "epoch": 0.5113868698416937, "grad_norm": 1.1704280520608725, "learning_rate": 2.7969931857198825e-06, "loss": 0.0261, "step": 122560 }, { "epoch": 0.511407732556684, "grad_norm": 0.418524199300622, "learning_rate": 2.796936133451024e-06, "loss": 0.0176, "step": 122565 }, { "epoch": 0.5114285952716743, "grad_norm": 1.2165936094428866, "learning_rate": 2.796879084673231e-06, "loss": 0.0361, "step": 122570 }, { "epoch": 0.5114494579866645, "grad_norm": 0.897084824193026, "learning_rate": 2.7968220393861467e-06, "loss": 0.0252, "step": 122575 }, { "epoch": 0.5114703207016549, "grad_norm": 0.4407169803749717, "learning_rate": 2.7967649975894164e-06, "loss": 0.0237, "step": 122580 }, { "epoch": 0.5114911834166451, "grad_norm": 0.4952381558534126, "learning_rate": 2.796707959282683e-06, "loss": 0.0294, "step": 122585 }, { "epoch": 0.5115120461316354, "grad_norm": 0.8033196287523605, "learning_rate": 2.7966509244655917e-06, "loss": 0.0283, "step": 122590 }, { "epoch": 0.5115329088466256, "grad_norm": 0.7529310426596365, "learning_rate": 2.796593893137785e-06, "loss": 0.0289, "step": 122595 }, { "epoch": 0.511553771561616, "grad_norm": 0.7647731547062301, "learning_rate": 2.796536865298909e-06, "loss": 0.0212, "step": 122600 }, { "epoch": 0.5115746342766062, "grad_norm": 0.5449702995982642, "learning_rate": 2.796479840948608e-06, "loss": 0.0197, "step": 122605 }, { "epoch": 0.5115954969915965, "grad_norm": 0.8290527480029163, "learning_rate": 2.796422820086524e-06, "loss": 0.0268, "step": 122610 }, { "epoch": 0.5116163597065868, "grad_norm": 1.5427174139752122, "learning_rate": 2.7963658027123037e-06, "loss": 0.0282, "step": 122615 }, { "epoch": 0.511637222421577, "grad_norm": 0.8887394240580404, "learning_rate": 2.7963087888255902e-06, "loss": 0.0239, "step": 122620 }, { "epoch": 0.5116580851365673, "grad_norm": 0.6045777537770426, "learning_rate": 2.796251778426029e-06, "loss": 0.0321, "step": 122625 }, { "epoch": 0.5116789478515577, "grad_norm": 0.3008678223269452, "learning_rate": 2.796194771513265e-06, "loss": 0.0197, "step": 122630 }, { "epoch": 0.5116998105665479, "grad_norm": 0.5981162848611892, "learning_rate": 2.7961377680869407e-06, "loss": 0.0192, "step": 122635 }, { "epoch": 0.5117206732815381, "grad_norm": 0.4567029770786442, "learning_rate": 2.7960807681467026e-06, "loss": 0.0203, "step": 122640 }, { "epoch": 0.5117415359965285, "grad_norm": 1.517250143734054, "learning_rate": 2.7960237716921953e-06, "loss": 0.0247, "step": 122645 }, { "epoch": 0.5117623987115187, "grad_norm": 0.883255128645761, "learning_rate": 2.795966778723062e-06, "loss": 0.035, "step": 122650 }, { "epoch": 0.511783261426509, "grad_norm": 1.0562633803843846, "learning_rate": 2.7959097892389493e-06, "loss": 0.0245, "step": 122655 }, { "epoch": 0.5118041241414992, "grad_norm": 0.6040059775137753, "learning_rate": 2.795852803239501e-06, "loss": 0.0198, "step": 122660 }, { "epoch": 0.5118249868564896, "grad_norm": 0.8907160562219566, "learning_rate": 2.795795820724362e-06, "loss": 0.0255, "step": 122665 }, { "epoch": 0.5118458495714798, "grad_norm": 0.8017884514493032, "learning_rate": 2.795738841693178e-06, "loss": 0.0262, "step": 122670 }, { "epoch": 0.5118667122864701, "grad_norm": 0.40206390382458634, "learning_rate": 2.795681866145594e-06, "loss": 0.0201, "step": 122675 }, { "epoch": 0.5118875750014604, "grad_norm": 0.5602907739042378, "learning_rate": 2.795624894081253e-06, "loss": 0.0252, "step": 122680 }, { "epoch": 0.5119084377164507, "grad_norm": 0.5323885215657544, "learning_rate": 2.795567925499803e-06, "loss": 0.0341, "step": 122685 }, { "epoch": 0.5119293004314409, "grad_norm": 0.41071357752258053, "learning_rate": 2.7955109604008873e-06, "loss": 0.0274, "step": 122690 }, { "epoch": 0.5119501631464313, "grad_norm": 0.5452010307947208, "learning_rate": 2.7954539987841517e-06, "loss": 0.0218, "step": 122695 }, { "epoch": 0.5119710258614215, "grad_norm": 0.8195775605751027, "learning_rate": 2.7953970406492412e-06, "loss": 0.0244, "step": 122700 }, { "epoch": 0.5119918885764118, "grad_norm": 1.6746592149775494, "learning_rate": 2.7953400859958018e-06, "loss": 0.0269, "step": 122705 }, { "epoch": 0.5120127512914021, "grad_norm": 0.40983627544968637, "learning_rate": 2.7952831348234784e-06, "loss": 0.0189, "step": 122710 }, { "epoch": 0.5120336140063924, "grad_norm": 0.6108378195848055, "learning_rate": 2.795226187131916e-06, "loss": 0.022, "step": 122715 }, { "epoch": 0.5120544767213826, "grad_norm": 0.6181484086804361, "learning_rate": 2.79516924292076e-06, "loss": 0.0221, "step": 122720 }, { "epoch": 0.5120753394363728, "grad_norm": 0.7704745639858962, "learning_rate": 2.7951123021896563e-06, "loss": 0.0237, "step": 122725 }, { "epoch": 0.5120962021513632, "grad_norm": 0.9456887019685408, "learning_rate": 2.795055364938251e-06, "loss": 0.0228, "step": 122730 }, { "epoch": 0.5121170648663534, "grad_norm": 0.7956139585811388, "learning_rate": 2.7949984311661886e-06, "loss": 0.0279, "step": 122735 }, { "epoch": 0.5121379275813437, "grad_norm": 0.5397606493352879, "learning_rate": 2.794941500873115e-06, "loss": 0.023, "step": 122740 }, { "epoch": 0.512158790296334, "grad_norm": 0.6615554758372566, "learning_rate": 2.7948845740586772e-06, "loss": 0.0175, "step": 122745 }, { "epoch": 0.5121796530113243, "grad_norm": 0.6223861847621249, "learning_rate": 2.794827650722519e-06, "loss": 0.0285, "step": 122750 }, { "epoch": 0.5122005157263145, "grad_norm": 1.2093762240697965, "learning_rate": 2.7947707308642873e-06, "loss": 0.0246, "step": 122755 }, { "epoch": 0.5122213784413049, "grad_norm": 0.8097278392642366, "learning_rate": 2.7947138144836283e-06, "loss": 0.0266, "step": 122760 }, { "epoch": 0.5122422411562951, "grad_norm": 0.7974373670621452, "learning_rate": 2.794656901580187e-06, "loss": 0.0193, "step": 122765 }, { "epoch": 0.5122631038712854, "grad_norm": 0.7075806895175805, "learning_rate": 2.7945999921536097e-06, "loss": 0.0282, "step": 122770 }, { "epoch": 0.5122839665862756, "grad_norm": 0.6742756968332457, "learning_rate": 2.7945430862035428e-06, "loss": 0.0295, "step": 122775 }, { "epoch": 0.512304829301266, "grad_norm": 1.409835846338337, "learning_rate": 2.7944861837296317e-06, "loss": 0.0319, "step": 122780 }, { "epoch": 0.5123256920162562, "grad_norm": 0.5222339640610372, "learning_rate": 2.794429284731523e-06, "loss": 0.0207, "step": 122785 }, { "epoch": 0.5123465547312465, "grad_norm": 0.9746157485841492, "learning_rate": 2.7943723892088626e-06, "loss": 0.0257, "step": 122790 }, { "epoch": 0.5123674174462368, "grad_norm": 0.7640482299587171, "learning_rate": 2.794315497161297e-06, "loss": 0.0331, "step": 122795 }, { "epoch": 0.5123882801612271, "grad_norm": 0.3689674568600815, "learning_rate": 2.7942586085884713e-06, "loss": 0.0281, "step": 122800 }, { "epoch": 0.5124091428762173, "grad_norm": 0.7817542010016559, "learning_rate": 2.794201723490034e-06, "loss": 0.0307, "step": 122805 }, { "epoch": 0.5124300055912077, "grad_norm": 0.8632039231867342, "learning_rate": 2.794144841865629e-06, "loss": 0.0261, "step": 122810 }, { "epoch": 0.5124508683061979, "grad_norm": 0.6852393194602284, "learning_rate": 2.794087963714905e-06, "loss": 0.0303, "step": 122815 }, { "epoch": 0.5124717310211881, "grad_norm": 0.7086943360020197, "learning_rate": 2.7940310890375065e-06, "loss": 0.0247, "step": 122820 }, { "epoch": 0.5124925937361785, "grad_norm": 0.5147415421011245, "learning_rate": 2.7939742178330813e-06, "loss": 0.0264, "step": 122825 }, { "epoch": 0.5125134564511687, "grad_norm": 0.7127719051174372, "learning_rate": 2.7939173501012756e-06, "loss": 0.0213, "step": 122830 }, { "epoch": 0.512534319166159, "grad_norm": 0.5467182040290165, "learning_rate": 2.793860485841736e-06, "loss": 0.0209, "step": 122835 }, { "epoch": 0.5125551818811492, "grad_norm": 0.4807427373455481, "learning_rate": 2.793803625054109e-06, "loss": 0.022, "step": 122840 }, { "epoch": 0.5125760445961396, "grad_norm": 0.6643198186158468, "learning_rate": 2.7937467677380414e-06, "loss": 0.0367, "step": 122845 }, { "epoch": 0.5125969073111298, "grad_norm": 0.8779538570342156, "learning_rate": 2.79368991389318e-06, "loss": 0.0217, "step": 122850 }, { "epoch": 0.5126177700261201, "grad_norm": 0.38320494662323246, "learning_rate": 2.7936330635191715e-06, "loss": 0.0246, "step": 122855 }, { "epoch": 0.5126386327411104, "grad_norm": 1.2168623863975767, "learning_rate": 2.793576216615663e-06, "loss": 0.0271, "step": 122860 }, { "epoch": 0.5126594954561007, "grad_norm": 0.573339193763772, "learning_rate": 2.7935193731823013e-06, "loss": 0.0199, "step": 122865 }, { "epoch": 0.5126803581710909, "grad_norm": 1.5894073332072483, "learning_rate": 2.793462533218733e-06, "loss": 0.0246, "step": 122870 }, { "epoch": 0.5127012208860813, "grad_norm": 0.45859039487600206, "learning_rate": 2.7934056967246063e-06, "loss": 0.0136, "step": 122875 }, { "epoch": 0.5127220836010715, "grad_norm": 1.1120988808541497, "learning_rate": 2.7933488636995665e-06, "loss": 0.0258, "step": 122880 }, { "epoch": 0.5127429463160618, "grad_norm": 0.9115404409546831, "learning_rate": 2.793292034143262e-06, "loss": 0.029, "step": 122885 }, { "epoch": 0.5127638090310521, "grad_norm": 0.9159353983907675, "learning_rate": 2.79323520805534e-06, "loss": 0.0333, "step": 122890 }, { "epoch": 0.5127846717460424, "grad_norm": 1.269023769149686, "learning_rate": 2.7931783854354466e-06, "loss": 0.0333, "step": 122895 }, { "epoch": 0.5128055344610326, "grad_norm": 0.7503094154992129, "learning_rate": 2.79312156628323e-06, "loss": 0.0194, "step": 122900 }, { "epoch": 0.5128263971760229, "grad_norm": 1.1393391741682628, "learning_rate": 2.7930647505983375e-06, "loss": 0.0205, "step": 122905 }, { "epoch": 0.5128472598910132, "grad_norm": 1.080848397097859, "learning_rate": 2.7930079383804165e-06, "loss": 0.024, "step": 122910 }, { "epoch": 0.5128681226060035, "grad_norm": 0.8337240579090738, "learning_rate": 2.792951129629114e-06, "loss": 0.0296, "step": 122915 }, { "epoch": 0.5128889853209937, "grad_norm": 0.7977788768396407, "learning_rate": 2.7928943243440776e-06, "loss": 0.0292, "step": 122920 }, { "epoch": 0.512909848035984, "grad_norm": 0.4558419235981754, "learning_rate": 2.7928375225249543e-06, "loss": 0.0269, "step": 122925 }, { "epoch": 0.5129307107509743, "grad_norm": 1.3582652195126537, "learning_rate": 2.7927807241713926e-06, "loss": 0.0327, "step": 122930 }, { "epoch": 0.5129515734659645, "grad_norm": 0.6084308147397776, "learning_rate": 2.79272392928304e-06, "loss": 0.0308, "step": 122935 }, { "epoch": 0.5129724361809549, "grad_norm": 0.6653401449411557, "learning_rate": 2.7926671378595437e-06, "loss": 0.0277, "step": 122940 }, { "epoch": 0.5129932988959451, "grad_norm": 0.61587306104334, "learning_rate": 2.7926103499005518e-06, "loss": 0.0253, "step": 122945 }, { "epoch": 0.5130141616109354, "grad_norm": 0.6305814472605962, "learning_rate": 2.7925535654057116e-06, "loss": 0.0219, "step": 122950 }, { "epoch": 0.5130350243259256, "grad_norm": 0.6773284509087083, "learning_rate": 2.7924967843746716e-06, "loss": 0.0282, "step": 122955 }, { "epoch": 0.513055887040916, "grad_norm": 0.4829335604519141, "learning_rate": 2.792440006807079e-06, "loss": 0.0178, "step": 122960 }, { "epoch": 0.5130767497559062, "grad_norm": 0.8415430770950986, "learning_rate": 2.792383232702582e-06, "loss": 0.021, "step": 122965 }, { "epoch": 0.5130976124708965, "grad_norm": 0.5155983767847037, "learning_rate": 2.792326462060829e-06, "loss": 0.0315, "step": 122970 }, { "epoch": 0.5131184751858868, "grad_norm": 0.645894319014365, "learning_rate": 2.7922696948814664e-06, "loss": 0.0177, "step": 122975 }, { "epoch": 0.5131393379008771, "grad_norm": 0.8006201538488832, "learning_rate": 2.7922129311641445e-06, "loss": 0.0207, "step": 122980 }, { "epoch": 0.5131602006158673, "grad_norm": 0.6726350105753166, "learning_rate": 2.79215617090851e-06, "loss": 0.0167, "step": 122985 }, { "epoch": 0.5131810633308577, "grad_norm": 0.38700820153873045, "learning_rate": 2.7920994141142115e-06, "loss": 0.024, "step": 122990 }, { "epoch": 0.5132019260458479, "grad_norm": 0.4441708306405165, "learning_rate": 2.792042660780897e-06, "loss": 0.0215, "step": 122995 }, { "epoch": 0.5132227887608382, "grad_norm": 0.3100539238381629, "learning_rate": 2.7919859109082146e-06, "loss": 0.0189, "step": 123000 }, { "epoch": 0.5132436514758285, "grad_norm": 0.9724519455950524, "learning_rate": 2.7919291644958135e-06, "loss": 0.0207, "step": 123005 }, { "epoch": 0.5132645141908188, "grad_norm": 0.5644039249728685, "learning_rate": 2.7918724215433413e-06, "loss": 0.0253, "step": 123010 }, { "epoch": 0.513285376905809, "grad_norm": 0.758537764099906, "learning_rate": 2.7918156820504468e-06, "loss": 0.0283, "step": 123015 }, { "epoch": 0.5133062396207992, "grad_norm": 0.47975500879949856, "learning_rate": 2.791758946016778e-06, "loss": 0.0237, "step": 123020 }, { "epoch": 0.5133271023357896, "grad_norm": 1.0607134206465179, "learning_rate": 2.7917022134419837e-06, "loss": 0.0213, "step": 123025 }, { "epoch": 0.5133479650507798, "grad_norm": 0.6378818506430297, "learning_rate": 2.7916454843257125e-06, "loss": 0.0215, "step": 123030 }, { "epoch": 0.5133688277657701, "grad_norm": 0.728131614921769, "learning_rate": 2.7915887586676133e-06, "loss": 0.0249, "step": 123035 }, { "epoch": 0.5133896904807604, "grad_norm": 0.9382530418279547, "learning_rate": 2.7915320364673344e-06, "loss": 0.021, "step": 123040 }, { "epoch": 0.5134105531957507, "grad_norm": 0.9345872087468503, "learning_rate": 2.791475317724524e-06, "loss": 0.0229, "step": 123045 }, { "epoch": 0.5134314159107409, "grad_norm": 0.48151880661366137, "learning_rate": 2.7914186024388317e-06, "loss": 0.0228, "step": 123050 }, { "epoch": 0.5134522786257313, "grad_norm": 0.7319490664826912, "learning_rate": 2.791361890609906e-06, "loss": 0.0336, "step": 123055 }, { "epoch": 0.5134731413407215, "grad_norm": 1.3829658176846031, "learning_rate": 2.7913051822373965e-06, "loss": 0.0255, "step": 123060 }, { "epoch": 0.5134940040557118, "grad_norm": 0.49067429200860907, "learning_rate": 2.791248477320951e-06, "loss": 0.0262, "step": 123065 }, { "epoch": 0.5135148667707021, "grad_norm": 0.8122645969948852, "learning_rate": 2.7911917758602187e-06, "loss": 0.0244, "step": 123070 }, { "epoch": 0.5135357294856924, "grad_norm": 0.7790105459312919, "learning_rate": 2.791135077854849e-06, "loss": 0.0232, "step": 123075 }, { "epoch": 0.5135565922006826, "grad_norm": 0.4284212848469869, "learning_rate": 2.791078383304491e-06, "loss": 0.0272, "step": 123080 }, { "epoch": 0.5135774549156729, "grad_norm": 0.6721463194702828, "learning_rate": 2.7910216922087927e-06, "loss": 0.0227, "step": 123085 }, { "epoch": 0.5135983176306632, "grad_norm": 0.5149017287458018, "learning_rate": 2.7909650045674047e-06, "loss": 0.0266, "step": 123090 }, { "epoch": 0.5136191803456535, "grad_norm": 0.29489160428052336, "learning_rate": 2.7909083203799764e-06, "loss": 0.0175, "step": 123095 }, { "epoch": 0.5136400430606437, "grad_norm": 0.7189312707468213, "learning_rate": 2.7908516396461555e-06, "loss": 0.0248, "step": 123100 }, { "epoch": 0.5136609057756341, "grad_norm": 0.9886690612431368, "learning_rate": 2.7907949623655927e-06, "loss": 0.0242, "step": 123105 }, { "epoch": 0.5136817684906243, "grad_norm": 0.6930075240093245, "learning_rate": 2.7907382885379364e-06, "loss": 0.0276, "step": 123110 }, { "epoch": 0.5137026312056145, "grad_norm": 0.5674344206734439, "learning_rate": 2.7906816181628364e-06, "loss": 0.0174, "step": 123115 }, { "epoch": 0.5137234939206049, "grad_norm": 0.7734107252007568, "learning_rate": 2.7906249512399424e-06, "loss": 0.0404, "step": 123120 }, { "epoch": 0.5137443566355951, "grad_norm": 0.7347832807339023, "learning_rate": 2.7905682877689038e-06, "loss": 0.0187, "step": 123125 }, { "epoch": 0.5137652193505854, "grad_norm": 0.49767975030145023, "learning_rate": 2.7905116277493706e-06, "loss": 0.0268, "step": 123130 }, { "epoch": 0.5137860820655756, "grad_norm": 0.9351018975888565, "learning_rate": 2.7904549711809906e-06, "loss": 0.031, "step": 123135 }, { "epoch": 0.513806944780566, "grad_norm": 0.6553404112407042, "learning_rate": 2.7903983180634157e-06, "loss": 0.0294, "step": 123140 }, { "epoch": 0.5138278074955562, "grad_norm": 0.7430853089855611, "learning_rate": 2.790341668396295e-06, "loss": 0.0196, "step": 123145 }, { "epoch": 0.5138486702105465, "grad_norm": 0.5227094053695468, "learning_rate": 2.7902850221792778e-06, "loss": 0.0216, "step": 123150 }, { "epoch": 0.5138695329255368, "grad_norm": 0.8717363175647399, "learning_rate": 2.790228379412014e-06, "loss": 0.0211, "step": 123155 }, { "epoch": 0.5138903956405271, "grad_norm": 1.0591616074787402, "learning_rate": 2.790171740094153e-06, "loss": 0.0242, "step": 123160 }, { "epoch": 0.5139112583555173, "grad_norm": 0.4310886916582449, "learning_rate": 2.7901151042253454e-06, "loss": 0.0287, "step": 123165 }, { "epoch": 0.5139321210705077, "grad_norm": 1.3840305779357764, "learning_rate": 2.7900584718052416e-06, "loss": 0.0206, "step": 123170 }, { "epoch": 0.5139529837854979, "grad_norm": 0.4488083841623687, "learning_rate": 2.7900018428334907e-06, "loss": 0.0165, "step": 123175 }, { "epoch": 0.5139738465004882, "grad_norm": 0.6133829486015306, "learning_rate": 2.789945217309743e-06, "loss": 0.0159, "step": 123180 }, { "epoch": 0.5139947092154785, "grad_norm": 1.042937182210418, "learning_rate": 2.7898885952336485e-06, "loss": 0.0234, "step": 123185 }, { "epoch": 0.5140155719304688, "grad_norm": 1.0273396074554764, "learning_rate": 2.789831976604858e-06, "loss": 0.0279, "step": 123190 }, { "epoch": 0.514036434645459, "grad_norm": 0.6953983140507336, "learning_rate": 2.7897753614230216e-06, "loss": 0.0203, "step": 123195 }, { "epoch": 0.5140572973604493, "grad_norm": 0.9878559595262979, "learning_rate": 2.7897187496877885e-06, "loss": 0.032, "step": 123200 }, { "epoch": 0.5140781600754396, "grad_norm": 0.8441516988442205, "learning_rate": 2.78966214139881e-06, "loss": 0.026, "step": 123205 }, { "epoch": 0.5140990227904298, "grad_norm": 0.8252282008564293, "learning_rate": 2.7896055365557363e-06, "loss": 0.029, "step": 123210 }, { "epoch": 0.5141198855054201, "grad_norm": 0.9856955356742189, "learning_rate": 2.7895489351582176e-06, "loss": 0.0262, "step": 123215 }, { "epoch": 0.5141407482204104, "grad_norm": 0.569869981950065, "learning_rate": 2.7894923372059046e-06, "loss": 0.0254, "step": 123220 }, { "epoch": 0.5141616109354007, "grad_norm": 0.6346863699816793, "learning_rate": 2.789435742698448e-06, "loss": 0.0195, "step": 123225 }, { "epoch": 0.5141824736503909, "grad_norm": 1.0140882637315976, "learning_rate": 2.7893791516354977e-06, "loss": 0.0302, "step": 123230 }, { "epoch": 0.5142033363653813, "grad_norm": 1.1378159743975296, "learning_rate": 2.789322564016705e-06, "loss": 0.0357, "step": 123235 }, { "epoch": 0.5142241990803715, "grad_norm": 0.8684964923183031, "learning_rate": 2.78926597984172e-06, "loss": 0.0249, "step": 123240 }, { "epoch": 0.5142450617953618, "grad_norm": 0.4327434872066316, "learning_rate": 2.7892093991101933e-06, "loss": 0.0231, "step": 123245 }, { "epoch": 0.514265924510352, "grad_norm": 0.575598825101131, "learning_rate": 2.7891528218217763e-06, "loss": 0.0193, "step": 123250 }, { "epoch": 0.5142867872253424, "grad_norm": 0.5159205721338813, "learning_rate": 2.7890962479761197e-06, "loss": 0.0268, "step": 123255 }, { "epoch": 0.5143076499403326, "grad_norm": 0.37929552867041116, "learning_rate": 2.789039677572874e-06, "loss": 0.0211, "step": 123260 }, { "epoch": 0.5143285126553229, "grad_norm": 0.9772025478929037, "learning_rate": 2.7889831106116906e-06, "loss": 0.028, "step": 123265 }, { "epoch": 0.5143493753703132, "grad_norm": 1.0945903932883403, "learning_rate": 2.7889265470922194e-06, "loss": 0.0413, "step": 123270 }, { "epoch": 0.5143702380853035, "grad_norm": 0.5111452194967024, "learning_rate": 2.788869987014113e-06, "loss": 0.0193, "step": 123275 }, { "epoch": 0.5143911008002937, "grad_norm": 0.4553900978391443, "learning_rate": 2.788813430377021e-06, "loss": 0.0204, "step": 123280 }, { "epoch": 0.5144119635152841, "grad_norm": 0.5885637731689028, "learning_rate": 2.788756877180595e-06, "loss": 0.0216, "step": 123285 }, { "epoch": 0.5144328262302743, "grad_norm": 0.643869344285203, "learning_rate": 2.788700327424487e-06, "loss": 0.0221, "step": 123290 }, { "epoch": 0.5144536889452646, "grad_norm": 0.6792219829254872, "learning_rate": 2.7886437811083463e-06, "loss": 0.021, "step": 123295 }, { "epoch": 0.5144745516602549, "grad_norm": 0.6887193511878511, "learning_rate": 2.7885872382318265e-06, "loss": 0.0296, "step": 123300 }, { "epoch": 0.5144954143752452, "grad_norm": 1.2112272132221888, "learning_rate": 2.7885306987945764e-06, "loss": 0.026, "step": 123305 }, { "epoch": 0.5145162770902354, "grad_norm": 0.35461102096448816, "learning_rate": 2.7884741627962497e-06, "loss": 0.0229, "step": 123310 }, { "epoch": 0.5145371398052256, "grad_norm": 0.6344699454439406, "learning_rate": 2.7884176302364963e-06, "loss": 0.0222, "step": 123315 }, { "epoch": 0.514558002520216, "grad_norm": 0.5891899314402572, "learning_rate": 2.7883611011149687e-06, "loss": 0.0224, "step": 123320 }, { "epoch": 0.5145788652352062, "grad_norm": 0.6993223742907727, "learning_rate": 2.788304575431317e-06, "loss": 0.0199, "step": 123325 }, { "epoch": 0.5145997279501965, "grad_norm": 0.9080020998082535, "learning_rate": 2.788248053185194e-06, "loss": 0.0216, "step": 123330 }, { "epoch": 0.5146205906651868, "grad_norm": 0.5025598056538153, "learning_rate": 2.788191534376251e-06, "loss": 0.0232, "step": 123335 }, { "epoch": 0.5146414533801771, "grad_norm": 0.5826457150696995, "learning_rate": 2.7881350190041396e-06, "loss": 0.02, "step": 123340 }, { "epoch": 0.5146623160951673, "grad_norm": 0.4973266527806102, "learning_rate": 2.788078507068511e-06, "loss": 0.0289, "step": 123345 }, { "epoch": 0.5146831788101577, "grad_norm": 4.079679690995475, "learning_rate": 2.7880219985690183e-06, "loss": 0.0287, "step": 123350 }, { "epoch": 0.5147040415251479, "grad_norm": 0.3735115196191075, "learning_rate": 2.7879654935053114e-06, "loss": 0.0276, "step": 123355 }, { "epoch": 0.5147249042401382, "grad_norm": 0.8066906206367602, "learning_rate": 2.787908991877043e-06, "loss": 0.0245, "step": 123360 }, { "epoch": 0.5147457669551285, "grad_norm": 0.6410618167990614, "learning_rate": 2.787852493683866e-06, "loss": 0.0199, "step": 123365 }, { "epoch": 0.5147666296701188, "grad_norm": 0.765413203792303, "learning_rate": 2.7877959989254304e-06, "loss": 0.0225, "step": 123370 }, { "epoch": 0.514787492385109, "grad_norm": 0.9060487174678532, "learning_rate": 2.7877395076013903e-06, "loss": 0.0309, "step": 123375 }, { "epoch": 0.5148083551000993, "grad_norm": 1.1190832793464547, "learning_rate": 2.787683019711396e-06, "loss": 0.0243, "step": 123380 }, { "epoch": 0.5148292178150896, "grad_norm": 0.7851793315136429, "learning_rate": 2.787626535255101e-06, "loss": 0.0276, "step": 123385 }, { "epoch": 0.5148500805300799, "grad_norm": 1.2894451630714985, "learning_rate": 2.7875700542321554e-06, "loss": 0.027, "step": 123390 }, { "epoch": 0.5148709432450701, "grad_norm": 1.0291908019453204, "learning_rate": 2.787513576642214e-06, "loss": 0.0209, "step": 123395 }, { "epoch": 0.5148918059600605, "grad_norm": 0.6915796855945625, "learning_rate": 2.7874571024849272e-06, "loss": 0.0293, "step": 123400 }, { "epoch": 0.5149126686750507, "grad_norm": 1.4077890477086399, "learning_rate": 2.787400631759948e-06, "loss": 0.0321, "step": 123405 }, { "epoch": 0.5149335313900409, "grad_norm": 0.6423900781180484, "learning_rate": 2.7873441644669287e-06, "loss": 0.0272, "step": 123410 }, { "epoch": 0.5149543941050313, "grad_norm": 0.5199661631534523, "learning_rate": 2.7872877006055217e-06, "loss": 0.0243, "step": 123415 }, { "epoch": 0.5149752568200215, "grad_norm": 0.8740596879246608, "learning_rate": 2.7872312401753786e-06, "loss": 0.0234, "step": 123420 }, { "epoch": 0.5149961195350118, "grad_norm": 0.7686140139029862, "learning_rate": 2.787174783176153e-06, "loss": 0.0226, "step": 123425 }, { "epoch": 0.515016982250002, "grad_norm": 0.507698542192303, "learning_rate": 2.787118329607498e-06, "loss": 0.031, "step": 123430 }, { "epoch": 0.5150378449649924, "grad_norm": 0.5927000820452094, "learning_rate": 2.787061879469064e-06, "loss": 0.0199, "step": 123435 }, { "epoch": 0.5150587076799826, "grad_norm": 0.5165602886855637, "learning_rate": 2.787005432760505e-06, "loss": 0.026, "step": 123440 }, { "epoch": 0.5150795703949729, "grad_norm": 0.7035112963384131, "learning_rate": 2.7869489894814744e-06, "loss": 0.0322, "step": 123445 }, { "epoch": 0.5151004331099632, "grad_norm": 0.45283081229723876, "learning_rate": 2.7868925496316235e-06, "loss": 0.0191, "step": 123450 }, { "epoch": 0.5151212958249535, "grad_norm": 0.8648181926666205, "learning_rate": 2.786836113210606e-06, "loss": 0.027, "step": 123455 }, { "epoch": 0.5151421585399437, "grad_norm": 0.8994777530182002, "learning_rate": 2.7867796802180743e-06, "loss": 0.0219, "step": 123460 }, { "epoch": 0.5151630212549341, "grad_norm": 0.671247454926097, "learning_rate": 2.7867232506536814e-06, "loss": 0.0209, "step": 123465 }, { "epoch": 0.5151838839699243, "grad_norm": 0.7009410550383212, "learning_rate": 2.7866668245170796e-06, "loss": 0.0281, "step": 123470 }, { "epoch": 0.5152047466849146, "grad_norm": 0.5595685805720306, "learning_rate": 2.7866104018079234e-06, "loss": 0.0262, "step": 123475 }, { "epoch": 0.5152256093999049, "grad_norm": 0.7882950789950789, "learning_rate": 2.786553982525864e-06, "loss": 0.0247, "step": 123480 }, { "epoch": 0.5152464721148952, "grad_norm": 0.8954136491758794, "learning_rate": 2.7864975666705564e-06, "loss": 0.0264, "step": 123485 }, { "epoch": 0.5152673348298854, "grad_norm": 0.5938917167932979, "learning_rate": 2.7864411542416527e-06, "loss": 0.0266, "step": 123490 }, { "epoch": 0.5152881975448756, "grad_norm": 0.45839933724639653, "learning_rate": 2.7863847452388054e-06, "loss": 0.0276, "step": 123495 }, { "epoch": 0.515309060259866, "grad_norm": 0.6398968909667738, "learning_rate": 2.786328339661669e-06, "loss": 0.0217, "step": 123500 }, { "epoch": 0.5153299229748562, "grad_norm": 0.7026961524210538, "learning_rate": 2.786271937509896e-06, "loss": 0.0282, "step": 123505 }, { "epoch": 0.5153507856898465, "grad_norm": 1.1887410358121495, "learning_rate": 2.7862155387831403e-06, "loss": 0.0356, "step": 123510 }, { "epoch": 0.5153716484048368, "grad_norm": 0.6181931912718226, "learning_rate": 2.786159143481055e-06, "loss": 0.0238, "step": 123515 }, { "epoch": 0.5153925111198271, "grad_norm": 0.7697051178799698, "learning_rate": 2.786102751603293e-06, "loss": 0.0224, "step": 123520 }, { "epoch": 0.5154133738348173, "grad_norm": 0.5054989388822243, "learning_rate": 2.7860463631495083e-06, "loss": 0.0206, "step": 123525 }, { "epoch": 0.5154342365498077, "grad_norm": 0.5119729812573615, "learning_rate": 2.7859899781193545e-06, "loss": 0.0293, "step": 123530 }, { "epoch": 0.5154550992647979, "grad_norm": 0.41626824631190973, "learning_rate": 2.7859335965124843e-06, "loss": 0.0195, "step": 123535 }, { "epoch": 0.5154759619797882, "grad_norm": 0.42985967719939594, "learning_rate": 2.785877218328554e-06, "loss": 0.0269, "step": 123540 }, { "epoch": 0.5154968246947785, "grad_norm": 0.725451788707168, "learning_rate": 2.7858208435672135e-06, "loss": 0.0286, "step": 123545 }, { "epoch": 0.5155176874097688, "grad_norm": 0.728225478812833, "learning_rate": 2.7857644722281186e-06, "loss": 0.0172, "step": 123550 }, { "epoch": 0.515538550124759, "grad_norm": 0.7938248805878615, "learning_rate": 2.7857081043109234e-06, "loss": 0.0283, "step": 123555 }, { "epoch": 0.5155594128397493, "grad_norm": 1.1584426527558935, "learning_rate": 2.7856517398152805e-06, "loss": 0.0329, "step": 123560 }, { "epoch": 0.5155802755547396, "grad_norm": 0.5906181462535856, "learning_rate": 2.7855953787408447e-06, "loss": 0.0208, "step": 123565 }, { "epoch": 0.5156011382697299, "grad_norm": 1.207110114110836, "learning_rate": 2.7855390210872696e-06, "loss": 0.0243, "step": 123570 }, { "epoch": 0.5156220009847201, "grad_norm": 0.564922201397256, "learning_rate": 2.785482666854209e-06, "loss": 0.0261, "step": 123575 }, { "epoch": 0.5156428636997105, "grad_norm": 0.46083130306946735, "learning_rate": 2.785426316041317e-06, "loss": 0.0233, "step": 123580 }, { "epoch": 0.5156637264147007, "grad_norm": 0.6620629944950874, "learning_rate": 2.785369968648248e-06, "loss": 0.0269, "step": 123585 }, { "epoch": 0.515684589129691, "grad_norm": 0.7670272993345244, "learning_rate": 2.7853136246746555e-06, "loss": 0.0273, "step": 123590 }, { "epoch": 0.5157054518446813, "grad_norm": 0.37415876205516346, "learning_rate": 2.7852572841201943e-06, "loss": 0.0187, "step": 123595 }, { "epoch": 0.5157263145596716, "grad_norm": 0.5732394596429141, "learning_rate": 2.785200946984518e-06, "loss": 0.0222, "step": 123600 }, { "epoch": 0.5157471772746618, "grad_norm": 0.8199076398846497, "learning_rate": 2.7851446132672814e-06, "loss": 0.0224, "step": 123605 }, { "epoch": 0.515768039989652, "grad_norm": 0.5712769970346284, "learning_rate": 2.7850882829681386e-06, "loss": 0.0175, "step": 123610 }, { "epoch": 0.5157889027046424, "grad_norm": 0.8651606659809251, "learning_rate": 2.785031956086744e-06, "loss": 0.0284, "step": 123615 }, { "epoch": 0.5158097654196326, "grad_norm": 0.7473101253873773, "learning_rate": 2.784975632622752e-06, "loss": 0.0238, "step": 123620 }, { "epoch": 0.5158306281346229, "grad_norm": 0.8459701576556178, "learning_rate": 2.7849193125758164e-06, "loss": 0.0241, "step": 123625 }, { "epoch": 0.5158514908496132, "grad_norm": 1.0393526949112575, "learning_rate": 2.7848629959455924e-06, "loss": 0.033, "step": 123630 }, { "epoch": 0.5158723535646035, "grad_norm": 1.3207038105878843, "learning_rate": 2.7848066827317343e-06, "loss": 0.0325, "step": 123635 }, { "epoch": 0.5158932162795937, "grad_norm": 1.3780994932404254, "learning_rate": 2.7847503729338974e-06, "loss": 0.0341, "step": 123640 }, { "epoch": 0.5159140789945841, "grad_norm": 0.7188568730817876, "learning_rate": 2.7846940665517357e-06, "loss": 0.0248, "step": 123645 }, { "epoch": 0.5159349417095743, "grad_norm": 0.521969762469253, "learning_rate": 2.7846377635849036e-06, "loss": 0.0312, "step": 123650 }, { "epoch": 0.5159558044245646, "grad_norm": 0.7493944137940217, "learning_rate": 2.7845814640330564e-06, "loss": 0.0178, "step": 123655 }, { "epoch": 0.5159766671395549, "grad_norm": 2.5654475576524938, "learning_rate": 2.7845251678958484e-06, "loss": 0.0301, "step": 123660 }, { "epoch": 0.5159975298545452, "grad_norm": 0.7651599756038555, "learning_rate": 2.7844688751729354e-06, "loss": 0.0235, "step": 123665 }, { "epoch": 0.5160183925695354, "grad_norm": 0.6137426189525985, "learning_rate": 2.7844125858639713e-06, "loss": 0.0248, "step": 123670 }, { "epoch": 0.5160392552845257, "grad_norm": 0.5642124388918892, "learning_rate": 2.7843562999686118e-06, "loss": 0.0213, "step": 123675 }, { "epoch": 0.516060117999516, "grad_norm": 0.687008783707613, "learning_rate": 2.784300017486511e-06, "loss": 0.0201, "step": 123680 }, { "epoch": 0.5160809807145063, "grad_norm": 0.6522649910431723, "learning_rate": 2.7842437384173242e-06, "loss": 0.0253, "step": 123685 }, { "epoch": 0.5161018434294965, "grad_norm": 0.5762349063613041, "learning_rate": 2.7841874627607067e-06, "loss": 0.0235, "step": 123690 }, { "epoch": 0.5161227061444869, "grad_norm": 0.9411521439584903, "learning_rate": 2.7841311905163145e-06, "loss": 0.0244, "step": 123695 }, { "epoch": 0.5161435688594771, "grad_norm": 0.6965966276623532, "learning_rate": 2.7840749216838014e-06, "loss": 0.0186, "step": 123700 }, { "epoch": 0.5161644315744673, "grad_norm": 0.6351818341913983, "learning_rate": 2.784018656262823e-06, "loss": 0.0235, "step": 123705 }, { "epoch": 0.5161852942894577, "grad_norm": 0.8233667060293223, "learning_rate": 2.7839623942530347e-06, "loss": 0.0305, "step": 123710 }, { "epoch": 0.5162061570044479, "grad_norm": 0.7022486165741383, "learning_rate": 2.783906135654092e-06, "loss": 0.0267, "step": 123715 }, { "epoch": 0.5162270197194382, "grad_norm": 0.7639114249552613, "learning_rate": 2.7838498804656505e-06, "loss": 0.0183, "step": 123720 }, { "epoch": 0.5162478824344285, "grad_norm": 0.7065765997069774, "learning_rate": 2.7837936286873644e-06, "loss": 0.0222, "step": 123725 }, { "epoch": 0.5162687451494188, "grad_norm": 0.4682850761410603, "learning_rate": 2.783737380318891e-06, "loss": 0.026, "step": 123730 }, { "epoch": 0.516289607864409, "grad_norm": 0.7412806500940794, "learning_rate": 2.783681135359885e-06, "loss": 0.0285, "step": 123735 }, { "epoch": 0.5163104705793993, "grad_norm": 0.7375079444895688, "learning_rate": 2.783624893810001e-06, "loss": 0.0229, "step": 123740 }, { "epoch": 0.5163313332943896, "grad_norm": 0.45561112894999184, "learning_rate": 2.783568655668896e-06, "loss": 0.0205, "step": 123745 }, { "epoch": 0.5163521960093799, "grad_norm": 0.3510634539011733, "learning_rate": 2.7835124209362245e-06, "loss": 0.0216, "step": 123750 }, { "epoch": 0.5163730587243701, "grad_norm": 0.6704793955600277, "learning_rate": 2.783456189611644e-06, "loss": 0.0209, "step": 123755 }, { "epoch": 0.5163939214393605, "grad_norm": 0.6024543851853214, "learning_rate": 2.783399961694809e-06, "loss": 0.0288, "step": 123760 }, { "epoch": 0.5164147841543507, "grad_norm": 0.5461964959771085, "learning_rate": 2.7833437371853743e-06, "loss": 0.0236, "step": 123765 }, { "epoch": 0.516435646869341, "grad_norm": 0.6142142379239519, "learning_rate": 2.783287516082998e-06, "loss": 0.0287, "step": 123770 }, { "epoch": 0.5164565095843313, "grad_norm": 0.5392773987611785, "learning_rate": 2.7832312983873343e-06, "loss": 0.0241, "step": 123775 }, { "epoch": 0.5164773722993216, "grad_norm": 1.021016496050694, "learning_rate": 2.7831750840980404e-06, "loss": 0.0255, "step": 123780 }, { "epoch": 0.5164982350143118, "grad_norm": 0.6836594117685381, "learning_rate": 2.783118873214772e-06, "loss": 0.0213, "step": 123785 }, { "epoch": 0.516519097729302, "grad_norm": 0.5016404150731867, "learning_rate": 2.7830626657371834e-06, "loss": 0.0278, "step": 123790 }, { "epoch": 0.5165399604442924, "grad_norm": 0.8900377318317013, "learning_rate": 2.783006461664934e-06, "loss": 0.0305, "step": 123795 }, { "epoch": 0.5165608231592826, "grad_norm": 0.5774113466562071, "learning_rate": 2.782950260997677e-06, "loss": 0.0253, "step": 123800 }, { "epoch": 0.5165816858742729, "grad_norm": 0.4291041005492477, "learning_rate": 2.78289406373507e-06, "loss": 0.0243, "step": 123805 }, { "epoch": 0.5166025485892632, "grad_norm": 0.4058176751038915, "learning_rate": 2.7828378698767688e-06, "loss": 0.0243, "step": 123810 }, { "epoch": 0.5166234113042535, "grad_norm": 0.48662399889511465, "learning_rate": 2.7827816794224306e-06, "loss": 0.02, "step": 123815 }, { "epoch": 0.5166442740192437, "grad_norm": 0.5350374461306362, "learning_rate": 2.7827254923717104e-06, "loss": 0.0286, "step": 123820 }, { "epoch": 0.5166651367342341, "grad_norm": 0.563630914884422, "learning_rate": 2.7826693087242662e-06, "loss": 0.0187, "step": 123825 }, { "epoch": 0.5166859994492243, "grad_norm": 0.7514548199286237, "learning_rate": 2.782613128479753e-06, "loss": 0.0264, "step": 123830 }, { "epoch": 0.5167068621642146, "grad_norm": 0.8173024171251152, "learning_rate": 2.7825569516378277e-06, "loss": 0.0276, "step": 123835 }, { "epoch": 0.5167277248792049, "grad_norm": 0.9686297206198897, "learning_rate": 2.7825007781981474e-06, "loss": 0.0234, "step": 123840 }, { "epoch": 0.5167485875941952, "grad_norm": 0.7340553753336789, "learning_rate": 2.7824446081603678e-06, "loss": 0.0262, "step": 123845 }, { "epoch": 0.5167694503091854, "grad_norm": 0.8169710309593158, "learning_rate": 2.7823884415241462e-06, "loss": 0.0291, "step": 123850 }, { "epoch": 0.5167903130241757, "grad_norm": 0.832428183742778, "learning_rate": 2.7823322782891394e-06, "loss": 0.0265, "step": 123855 }, { "epoch": 0.516811175739166, "grad_norm": 0.2151193712928273, "learning_rate": 2.7822761184550037e-06, "loss": 0.0245, "step": 123860 }, { "epoch": 0.5168320384541563, "grad_norm": 0.7228687565218702, "learning_rate": 2.7822199620213956e-06, "loss": 0.0292, "step": 123865 }, { "epoch": 0.5168529011691465, "grad_norm": 0.6496335183764821, "learning_rate": 2.7821638089879727e-06, "loss": 0.0251, "step": 123870 }, { "epoch": 0.5168737638841369, "grad_norm": 0.5620069702272603, "learning_rate": 2.782107659354392e-06, "loss": 0.0182, "step": 123875 }, { "epoch": 0.5168946265991271, "grad_norm": 0.9047383590446808, "learning_rate": 2.78205151312031e-06, "loss": 0.0246, "step": 123880 }, { "epoch": 0.5169154893141173, "grad_norm": 0.6793534117860033, "learning_rate": 2.7819953702853825e-06, "loss": 0.0228, "step": 123885 }, { "epoch": 0.5169363520291077, "grad_norm": 0.732736625870706, "learning_rate": 2.781939230849269e-06, "loss": 0.0198, "step": 123890 }, { "epoch": 0.516957214744098, "grad_norm": 1.248953990886252, "learning_rate": 2.7818830948116247e-06, "loss": 0.0283, "step": 123895 }, { "epoch": 0.5169780774590882, "grad_norm": 0.7883446274160881, "learning_rate": 2.7818269621721077e-06, "loss": 0.0167, "step": 123900 }, { "epoch": 0.5169989401740785, "grad_norm": 1.1145802138677825, "learning_rate": 2.781770832930375e-06, "loss": 0.0207, "step": 123905 }, { "epoch": 0.5170198028890688, "grad_norm": 0.617997456571579, "learning_rate": 2.7817147070860828e-06, "loss": 0.0217, "step": 123910 }, { "epoch": 0.517040665604059, "grad_norm": 0.8722879657814087, "learning_rate": 2.78165858463889e-06, "loss": 0.0265, "step": 123915 }, { "epoch": 0.5170615283190493, "grad_norm": 0.9142490955619134, "learning_rate": 2.7816024655884526e-06, "loss": 0.0261, "step": 123920 }, { "epoch": 0.5170823910340396, "grad_norm": 0.43473477913963504, "learning_rate": 2.781546349934429e-06, "loss": 0.0171, "step": 123925 }, { "epoch": 0.5171032537490299, "grad_norm": 0.6869155785948863, "learning_rate": 2.7814902376764757e-06, "loss": 0.0219, "step": 123930 }, { "epoch": 0.5171241164640201, "grad_norm": 0.6071916770074559, "learning_rate": 2.781434128814251e-06, "loss": 0.0254, "step": 123935 }, { "epoch": 0.5171449791790105, "grad_norm": 0.8600949833622233, "learning_rate": 2.781378023347412e-06, "loss": 0.0269, "step": 123940 }, { "epoch": 0.5171658418940007, "grad_norm": 0.6227630993994787, "learning_rate": 2.781321921275616e-06, "loss": 0.0315, "step": 123945 }, { "epoch": 0.517186704608991, "grad_norm": 1.4594861953941687, "learning_rate": 2.7812658225985214e-06, "loss": 0.0243, "step": 123950 }, { "epoch": 0.5172075673239813, "grad_norm": 0.9010563234385435, "learning_rate": 2.7812097273157844e-06, "loss": 0.0203, "step": 123955 }, { "epoch": 0.5172284300389716, "grad_norm": 0.6781274787533518, "learning_rate": 2.7811536354270645e-06, "loss": 0.0269, "step": 123960 }, { "epoch": 0.5172492927539618, "grad_norm": 0.9822092111768385, "learning_rate": 2.7810975469320184e-06, "loss": 0.0315, "step": 123965 }, { "epoch": 0.517270155468952, "grad_norm": 0.47821311283316326, "learning_rate": 2.7810414618303037e-06, "loss": 0.0198, "step": 123970 }, { "epoch": 0.5172910181839424, "grad_norm": 0.6600890827653052, "learning_rate": 2.7809853801215798e-06, "loss": 0.0273, "step": 123975 }, { "epoch": 0.5173118808989327, "grad_norm": 0.6750505814952203, "learning_rate": 2.7809293018055027e-06, "loss": 0.031, "step": 123980 }, { "epoch": 0.5173327436139229, "grad_norm": 1.1072136576922536, "learning_rate": 2.7808732268817312e-06, "loss": 0.0185, "step": 123985 }, { "epoch": 0.5173536063289133, "grad_norm": 0.8178595849024176, "learning_rate": 2.7808171553499235e-06, "loss": 0.0314, "step": 123990 }, { "epoch": 0.5173744690439035, "grad_norm": 0.5263245979335286, "learning_rate": 2.780761087209737e-06, "loss": 0.0201, "step": 123995 }, { "epoch": 0.5173953317588937, "grad_norm": 0.30782766129256256, "learning_rate": 2.7807050224608305e-06, "loss": 0.0264, "step": 124000 }, { "epoch": 0.5174161944738841, "grad_norm": 0.718492346093819, "learning_rate": 2.7806489611028613e-06, "loss": 0.0322, "step": 124005 }, { "epoch": 0.5174370571888743, "grad_norm": 0.5690255623881172, "learning_rate": 2.7805929031354887e-06, "loss": 0.0254, "step": 124010 }, { "epoch": 0.5174579199038646, "grad_norm": 0.7736528580976421, "learning_rate": 2.7805368485583706e-06, "loss": 0.0205, "step": 124015 }, { "epoch": 0.5174787826188549, "grad_norm": 0.8944164808117777, "learning_rate": 2.7804807973711643e-06, "loss": 0.0329, "step": 124020 }, { "epoch": 0.5174996453338452, "grad_norm": 0.5613687600684683, "learning_rate": 2.78042474957353e-06, "loss": 0.0253, "step": 124025 }, { "epoch": 0.5175205080488354, "grad_norm": 0.8931831919042862, "learning_rate": 2.780368705165124e-06, "loss": 0.0273, "step": 124030 }, { "epoch": 0.5175413707638257, "grad_norm": 1.2018634115021964, "learning_rate": 2.7803126641456063e-06, "loss": 0.0266, "step": 124035 }, { "epoch": 0.517562233478816, "grad_norm": 1.189801579408847, "learning_rate": 2.7802566265146342e-06, "loss": 0.027, "step": 124040 }, { "epoch": 0.5175830961938063, "grad_norm": 0.7836345901115241, "learning_rate": 2.780200592271867e-06, "loss": 0.0215, "step": 124045 }, { "epoch": 0.5176039589087965, "grad_norm": 0.9313180087261657, "learning_rate": 2.780144561416963e-06, "loss": 0.0331, "step": 124050 }, { "epoch": 0.5176248216237869, "grad_norm": 0.5953652919607884, "learning_rate": 2.7800885339495816e-06, "loss": 0.0253, "step": 124055 }, { "epoch": 0.5176456843387771, "grad_norm": 0.6607398418010916, "learning_rate": 2.7800325098693804e-06, "loss": 0.0263, "step": 124060 }, { "epoch": 0.5176665470537674, "grad_norm": 0.5143177716918159, "learning_rate": 2.7799764891760183e-06, "loss": 0.0237, "step": 124065 }, { "epoch": 0.5176874097687577, "grad_norm": 0.7661966928064002, "learning_rate": 2.7799204718691546e-06, "loss": 0.0241, "step": 124070 }, { "epoch": 0.517708272483748, "grad_norm": 1.0730403566652473, "learning_rate": 2.7798644579484478e-06, "loss": 0.0191, "step": 124075 }, { "epoch": 0.5177291351987382, "grad_norm": 0.8932244330960369, "learning_rate": 2.7798084474135565e-06, "loss": 0.0213, "step": 124080 }, { "epoch": 0.5177499979137286, "grad_norm": 0.7314660108779398, "learning_rate": 2.7797524402641408e-06, "loss": 0.0275, "step": 124085 }, { "epoch": 0.5177708606287188, "grad_norm": 0.8717134943407339, "learning_rate": 2.779696436499858e-06, "loss": 0.0181, "step": 124090 }, { "epoch": 0.517791723343709, "grad_norm": 0.7232341287011249, "learning_rate": 2.7796404361203682e-06, "loss": 0.0211, "step": 124095 }, { "epoch": 0.5178125860586993, "grad_norm": 0.9470133898600465, "learning_rate": 2.7795844391253296e-06, "loss": 0.0289, "step": 124100 }, { "epoch": 0.5178334487736896, "grad_norm": 0.43643592181341206, "learning_rate": 2.7795284455144022e-06, "loss": 0.024, "step": 124105 }, { "epoch": 0.5178543114886799, "grad_norm": 0.544304973701251, "learning_rate": 2.7794724552872453e-06, "loss": 0.0309, "step": 124110 }, { "epoch": 0.5178751742036701, "grad_norm": 0.43138083685625406, "learning_rate": 2.7794164684435167e-06, "loss": 0.0227, "step": 124115 }, { "epoch": 0.5178960369186605, "grad_norm": 0.45040709750381525, "learning_rate": 2.7793604849828774e-06, "loss": 0.0287, "step": 124120 }, { "epoch": 0.5179168996336507, "grad_norm": 0.6014259739200377, "learning_rate": 2.779304504904985e-06, "loss": 0.0178, "step": 124125 }, { "epoch": 0.517937762348641, "grad_norm": 0.6013627079017437, "learning_rate": 2.7792485282095003e-06, "loss": 0.0273, "step": 124130 }, { "epoch": 0.5179586250636313, "grad_norm": 0.8781179935409407, "learning_rate": 2.779192554896082e-06, "loss": 0.0286, "step": 124135 }, { "epoch": 0.5179794877786216, "grad_norm": 1.8111402616140029, "learning_rate": 2.7791365849643897e-06, "loss": 0.0262, "step": 124140 }, { "epoch": 0.5180003504936118, "grad_norm": 3.5324535660014047, "learning_rate": 2.7790806184140826e-06, "loss": 0.0316, "step": 124145 }, { "epoch": 0.5180212132086021, "grad_norm": 1.204432628092801, "learning_rate": 2.7790246552448206e-06, "loss": 0.0279, "step": 124150 }, { "epoch": 0.5180420759235924, "grad_norm": 0.6862246265746318, "learning_rate": 2.7789686954562632e-06, "loss": 0.0287, "step": 124155 }, { "epoch": 0.5180629386385827, "grad_norm": 0.8632202170166613, "learning_rate": 2.7789127390480703e-06, "loss": 0.0253, "step": 124160 }, { "epoch": 0.5180838013535729, "grad_norm": 0.8140320869969363, "learning_rate": 2.778856786019901e-06, "loss": 0.0253, "step": 124165 }, { "epoch": 0.5181046640685633, "grad_norm": 0.5576256686356671, "learning_rate": 2.778800836371415e-06, "loss": 0.0215, "step": 124170 }, { "epoch": 0.5181255267835535, "grad_norm": 0.48312503366083126, "learning_rate": 2.778744890102273e-06, "loss": 0.028, "step": 124175 }, { "epoch": 0.5181463894985437, "grad_norm": 0.9779610768715942, "learning_rate": 2.7786889472121343e-06, "loss": 0.0248, "step": 124180 }, { "epoch": 0.5181672522135341, "grad_norm": 0.637361967143879, "learning_rate": 2.7786330077006584e-06, "loss": 0.03, "step": 124185 }, { "epoch": 0.5181881149285243, "grad_norm": 0.7064756659386443, "learning_rate": 2.778577071567505e-06, "loss": 0.0266, "step": 124190 }, { "epoch": 0.5182089776435146, "grad_norm": 0.7612622708339387, "learning_rate": 2.778521138812335e-06, "loss": 0.0288, "step": 124195 }, { "epoch": 0.518229840358505, "grad_norm": 0.39696720180821, "learning_rate": 2.7784652094348088e-06, "loss": 0.0243, "step": 124200 }, { "epoch": 0.5182507030734952, "grad_norm": 0.6830941181116557, "learning_rate": 2.778409283434585e-06, "loss": 0.0264, "step": 124205 }, { "epoch": 0.5182715657884854, "grad_norm": 1.0148549721321205, "learning_rate": 2.7783533608113243e-06, "loss": 0.0206, "step": 124210 }, { "epoch": 0.5182924285034757, "grad_norm": 0.5759343711483463, "learning_rate": 2.7782974415646873e-06, "loss": 0.0288, "step": 124215 }, { "epoch": 0.518313291218466, "grad_norm": 0.5761085750979348, "learning_rate": 2.7782415256943333e-06, "loss": 0.0242, "step": 124220 }, { "epoch": 0.5183341539334563, "grad_norm": 0.8459127614119365, "learning_rate": 2.778185613199924e-06, "loss": 0.026, "step": 124225 }, { "epoch": 0.5183550166484465, "grad_norm": 1.0855763463628614, "learning_rate": 2.7781297040811185e-06, "loss": 0.0324, "step": 124230 }, { "epoch": 0.5183758793634369, "grad_norm": 0.41960925082375755, "learning_rate": 2.778073798337577e-06, "loss": 0.0158, "step": 124235 }, { "epoch": 0.5183967420784271, "grad_norm": 0.7563095337625859, "learning_rate": 2.778017895968961e-06, "loss": 0.0162, "step": 124240 }, { "epoch": 0.5184176047934174, "grad_norm": 0.48746158427864217, "learning_rate": 2.77796199697493e-06, "loss": 0.0204, "step": 124245 }, { "epoch": 0.5184384675084077, "grad_norm": 0.7715674232397614, "learning_rate": 2.777906101355145e-06, "loss": 0.0201, "step": 124250 }, { "epoch": 0.518459330223398, "grad_norm": 0.39683645602915424, "learning_rate": 2.7778502091092662e-06, "loss": 0.0201, "step": 124255 }, { "epoch": 0.5184801929383882, "grad_norm": 0.6489385138769396, "learning_rate": 2.7777943202369546e-06, "loss": 0.0239, "step": 124260 }, { "epoch": 0.5185010556533786, "grad_norm": 0.5945427593425864, "learning_rate": 2.777738434737871e-06, "loss": 0.0141, "step": 124265 }, { "epoch": 0.5185219183683688, "grad_norm": 0.5826369843756982, "learning_rate": 2.777682552611675e-06, "loss": 0.0221, "step": 124270 }, { "epoch": 0.518542781083359, "grad_norm": 0.6699348630035974, "learning_rate": 2.7776266738580287e-06, "loss": 0.0221, "step": 124275 }, { "epoch": 0.5185636437983493, "grad_norm": 0.5543929165958109, "learning_rate": 2.777570798476592e-06, "loss": 0.0252, "step": 124280 }, { "epoch": 0.5185845065133396, "grad_norm": 0.41686035689592715, "learning_rate": 2.777514926467026e-06, "loss": 0.0238, "step": 124285 }, { "epoch": 0.5186053692283299, "grad_norm": 0.6507499819836889, "learning_rate": 2.7774590578289917e-06, "loss": 0.0288, "step": 124290 }, { "epoch": 0.5186262319433201, "grad_norm": 0.6935225071778228, "learning_rate": 2.777403192562149e-06, "loss": 0.0265, "step": 124295 }, { "epoch": 0.5186470946583105, "grad_norm": 0.6074633050301249, "learning_rate": 2.777347330666161e-06, "loss": 0.0197, "step": 124300 }, { "epoch": 0.5186679573733007, "grad_norm": 0.24695939953269302, "learning_rate": 2.7772914721406867e-06, "loss": 0.0218, "step": 124305 }, { "epoch": 0.518688820088291, "grad_norm": 0.39693797529477365, "learning_rate": 2.777235616985389e-06, "loss": 0.0305, "step": 124310 }, { "epoch": 0.5187096828032813, "grad_norm": 0.4003695508875733, "learning_rate": 2.7771797651999265e-06, "loss": 0.0177, "step": 124315 }, { "epoch": 0.5187305455182716, "grad_norm": 0.5888110723963532, "learning_rate": 2.7771239167839633e-06, "loss": 0.0238, "step": 124320 }, { "epoch": 0.5187514082332618, "grad_norm": 0.718847180627832, "learning_rate": 2.777068071737158e-06, "loss": 0.0269, "step": 124325 }, { "epoch": 0.5187722709482521, "grad_norm": 0.662833696436495, "learning_rate": 2.7770122300591732e-06, "loss": 0.0246, "step": 124330 }, { "epoch": 0.5187931336632424, "grad_norm": 0.7302447369552999, "learning_rate": 2.77695639174967e-06, "loss": 0.0306, "step": 124335 }, { "epoch": 0.5188139963782327, "grad_norm": 0.5296497070462424, "learning_rate": 2.7769005568083104e-06, "loss": 0.0165, "step": 124340 }, { "epoch": 0.5188348590932229, "grad_norm": 0.6302428937650438, "learning_rate": 2.7768447252347554e-06, "loss": 0.0286, "step": 124345 }, { "epoch": 0.5188557218082133, "grad_norm": 0.4140251378096152, "learning_rate": 2.7767888970286654e-06, "loss": 0.0198, "step": 124350 }, { "epoch": 0.5188765845232035, "grad_norm": 0.9501638293866802, "learning_rate": 2.776733072189703e-06, "loss": 0.0364, "step": 124355 }, { "epoch": 0.5188974472381938, "grad_norm": 0.4558419632721073, "learning_rate": 2.7766772507175295e-06, "loss": 0.0173, "step": 124360 }, { "epoch": 0.5189183099531841, "grad_norm": 0.881603370831332, "learning_rate": 2.776621432611807e-06, "loss": 0.0275, "step": 124365 }, { "epoch": 0.5189391726681744, "grad_norm": 0.6054893122372077, "learning_rate": 2.776565617872196e-06, "loss": 0.0206, "step": 124370 }, { "epoch": 0.5189600353831646, "grad_norm": 0.64365946102254, "learning_rate": 2.776509806498359e-06, "loss": 0.0191, "step": 124375 }, { "epoch": 0.518980898098155, "grad_norm": 0.5955827142433471, "learning_rate": 2.7764539984899575e-06, "loss": 0.0269, "step": 124380 }, { "epoch": 0.5190017608131452, "grad_norm": 0.9903360950483759, "learning_rate": 2.7763981938466534e-06, "loss": 0.0265, "step": 124385 }, { "epoch": 0.5190226235281354, "grad_norm": 0.5413858222455207, "learning_rate": 2.776342392568109e-06, "loss": 0.0195, "step": 124390 }, { "epoch": 0.5190434862431257, "grad_norm": 1.3389054382522856, "learning_rate": 2.7762865946539847e-06, "loss": 0.0272, "step": 124395 }, { "epoch": 0.519064348958116, "grad_norm": 0.7509410755162607, "learning_rate": 2.7762308001039438e-06, "loss": 0.0405, "step": 124400 }, { "epoch": 0.5190852116731063, "grad_norm": 0.3423646922177199, "learning_rate": 2.776175008917648e-06, "loss": 0.0168, "step": 124405 }, { "epoch": 0.5191060743880965, "grad_norm": 0.7813448160632831, "learning_rate": 2.7761192210947594e-06, "loss": 0.0237, "step": 124410 }, { "epoch": 0.5191269371030869, "grad_norm": 0.7644451892649066, "learning_rate": 2.7760634366349396e-06, "loss": 0.0284, "step": 124415 }, { "epoch": 0.5191477998180771, "grad_norm": 0.6955110539051063, "learning_rate": 2.776007655537851e-06, "loss": 0.019, "step": 124420 }, { "epoch": 0.5191686625330674, "grad_norm": 0.7324438731364394, "learning_rate": 2.775951877803156e-06, "loss": 0.0226, "step": 124425 }, { "epoch": 0.5191895252480577, "grad_norm": 0.41360010891800986, "learning_rate": 2.7758961034305167e-06, "loss": 0.0183, "step": 124430 }, { "epoch": 0.519210387963048, "grad_norm": 0.8756117523215241, "learning_rate": 2.7758403324195942e-06, "loss": 0.0244, "step": 124435 }, { "epoch": 0.5192312506780382, "grad_norm": 0.5382887716975234, "learning_rate": 2.7757845647700526e-06, "loss": 0.0186, "step": 124440 }, { "epoch": 0.5192521133930286, "grad_norm": 0.5986697199361783, "learning_rate": 2.7757288004815537e-06, "loss": 0.0159, "step": 124445 }, { "epoch": 0.5192729761080188, "grad_norm": 0.8572393610118605, "learning_rate": 2.7756730395537597e-06, "loss": 0.0246, "step": 124450 }, { "epoch": 0.5192938388230091, "grad_norm": 0.7008415232755478, "learning_rate": 2.7756172819863327e-06, "loss": 0.0235, "step": 124455 }, { "epoch": 0.5193147015379993, "grad_norm": 0.8452878537090633, "learning_rate": 2.7755615277789353e-06, "loss": 0.0301, "step": 124460 }, { "epoch": 0.5193355642529897, "grad_norm": 0.5943829116378536, "learning_rate": 2.775505776931231e-06, "loss": 0.0239, "step": 124465 }, { "epoch": 0.5193564269679799, "grad_norm": 1.0424331026458422, "learning_rate": 2.7754500294428816e-06, "loss": 0.0301, "step": 124470 }, { "epoch": 0.5193772896829701, "grad_norm": 0.610281132661527, "learning_rate": 2.7753942853135496e-06, "loss": 0.0299, "step": 124475 }, { "epoch": 0.5193981523979605, "grad_norm": 0.7333769994962741, "learning_rate": 2.7753385445428977e-06, "loss": 0.0227, "step": 124480 }, { "epoch": 0.5194190151129507, "grad_norm": 1.2080913439918906, "learning_rate": 2.7752828071305887e-06, "loss": 0.0276, "step": 124485 }, { "epoch": 0.519439877827941, "grad_norm": 0.7248340102816587, "learning_rate": 2.7752270730762863e-06, "loss": 0.0286, "step": 124490 }, { "epoch": 0.5194607405429313, "grad_norm": 1.0677579740189032, "learning_rate": 2.775171342379652e-06, "loss": 0.0328, "step": 124495 }, { "epoch": 0.5194816032579216, "grad_norm": 0.6244823285257536, "learning_rate": 2.77511561504035e-06, "loss": 0.0227, "step": 124500 }, { "epoch": 0.5195024659729118, "grad_norm": 0.684047231446212, "learning_rate": 2.775059891058042e-06, "loss": 0.0192, "step": 124505 }, { "epoch": 0.5195233286879021, "grad_norm": 0.39996193093922955, "learning_rate": 2.7750041704323915e-06, "loss": 0.0176, "step": 124510 }, { "epoch": 0.5195441914028924, "grad_norm": 0.7734937540143905, "learning_rate": 2.7749484531630613e-06, "loss": 0.0245, "step": 124515 }, { "epoch": 0.5195650541178827, "grad_norm": 0.8300775775707152, "learning_rate": 2.7748927392497145e-06, "loss": 0.0286, "step": 124520 }, { "epoch": 0.5195859168328729, "grad_norm": 0.7833888114801695, "learning_rate": 2.7748370286920146e-06, "loss": 0.0254, "step": 124525 }, { "epoch": 0.5196067795478633, "grad_norm": 0.754195929447482, "learning_rate": 2.774781321489625e-06, "loss": 0.0239, "step": 124530 }, { "epoch": 0.5196276422628535, "grad_norm": 0.7824231431180199, "learning_rate": 2.7747256176422076e-06, "loss": 0.0201, "step": 124535 }, { "epoch": 0.5196485049778438, "grad_norm": 0.6957636460495136, "learning_rate": 2.7746699171494267e-06, "loss": 0.0287, "step": 124540 }, { "epoch": 0.5196693676928341, "grad_norm": 0.4989915129041045, "learning_rate": 2.7746142200109454e-06, "loss": 0.025, "step": 124545 }, { "epoch": 0.5196902304078244, "grad_norm": 0.5639133509180887, "learning_rate": 2.7745585262264273e-06, "loss": 0.0363, "step": 124550 }, { "epoch": 0.5197110931228146, "grad_norm": 0.4471886742912987, "learning_rate": 2.7745028357955356e-06, "loss": 0.0213, "step": 124555 }, { "epoch": 0.519731955837805, "grad_norm": 1.1129811418761288, "learning_rate": 2.7744471487179337e-06, "loss": 0.0202, "step": 124560 }, { "epoch": 0.5197528185527952, "grad_norm": 1.1449759770137906, "learning_rate": 2.7743914649932846e-06, "loss": 0.022, "step": 124565 }, { "epoch": 0.5197736812677854, "grad_norm": 0.7708691543102487, "learning_rate": 2.7743357846212526e-06, "loss": 0.0294, "step": 124570 }, { "epoch": 0.5197945439827757, "grad_norm": 0.8591246556750802, "learning_rate": 2.774280107601501e-06, "loss": 0.0285, "step": 124575 }, { "epoch": 0.519815406697766, "grad_norm": 0.8787494231326698, "learning_rate": 2.774224433933694e-06, "loss": 0.0244, "step": 124580 }, { "epoch": 0.5198362694127563, "grad_norm": 0.8288312234217546, "learning_rate": 2.774168763617494e-06, "loss": 0.0224, "step": 124585 }, { "epoch": 0.5198571321277465, "grad_norm": 0.7805700270399938, "learning_rate": 2.774113096652566e-06, "loss": 0.0325, "step": 124590 }, { "epoch": 0.5198779948427369, "grad_norm": 0.4645084764371507, "learning_rate": 2.774057433038573e-06, "loss": 0.0222, "step": 124595 }, { "epoch": 0.5198988575577271, "grad_norm": 0.3454143617904362, "learning_rate": 2.774001772775179e-06, "loss": 0.016, "step": 124600 }, { "epoch": 0.5199197202727174, "grad_norm": 0.6162268662458161, "learning_rate": 2.773946115862048e-06, "loss": 0.0231, "step": 124605 }, { "epoch": 0.5199405829877077, "grad_norm": 1.1137881424322216, "learning_rate": 2.773890462298844e-06, "loss": 0.0275, "step": 124610 }, { "epoch": 0.519961445702698, "grad_norm": 0.8392353100246442, "learning_rate": 2.7738348120852302e-06, "loss": 0.0301, "step": 124615 }, { "epoch": 0.5199823084176882, "grad_norm": 0.648119383386709, "learning_rate": 2.7737791652208725e-06, "loss": 0.019, "step": 124620 }, { "epoch": 0.5200031711326786, "grad_norm": 0.653133530409135, "learning_rate": 2.7737235217054326e-06, "loss": 0.0283, "step": 124625 }, { "epoch": 0.5200240338476688, "grad_norm": 0.2965046227553291, "learning_rate": 2.773667881538576e-06, "loss": 0.0225, "step": 124630 }, { "epoch": 0.5200448965626591, "grad_norm": 0.9729369103484781, "learning_rate": 2.7736122447199665e-06, "loss": 0.0268, "step": 124635 }, { "epoch": 0.5200657592776493, "grad_norm": 0.6894674777848173, "learning_rate": 2.7735566112492685e-06, "loss": 0.0224, "step": 124640 }, { "epoch": 0.5200866219926397, "grad_norm": 0.5425659412497248, "learning_rate": 2.773500981126146e-06, "loss": 0.0231, "step": 124645 }, { "epoch": 0.5201074847076299, "grad_norm": 0.8148755804913927, "learning_rate": 2.7734453543502638e-06, "loss": 0.0289, "step": 124650 }, { "epoch": 0.5201283474226202, "grad_norm": 0.40467413073430486, "learning_rate": 2.773389730921285e-06, "loss": 0.0264, "step": 124655 }, { "epoch": 0.5201492101376105, "grad_norm": 0.8848130486088488, "learning_rate": 2.7733341108388756e-06, "loss": 0.0251, "step": 124660 }, { "epoch": 0.5201700728526008, "grad_norm": 0.71274010982554, "learning_rate": 2.7732784941026993e-06, "loss": 0.0264, "step": 124665 }, { "epoch": 0.520190935567591, "grad_norm": 0.6419186189861501, "learning_rate": 2.77322288071242e-06, "loss": 0.0232, "step": 124670 }, { "epoch": 0.5202117982825814, "grad_norm": 0.4522545230740225, "learning_rate": 2.773167270667703e-06, "loss": 0.02, "step": 124675 }, { "epoch": 0.5202326609975716, "grad_norm": 0.7212780378763695, "learning_rate": 2.7731116639682127e-06, "loss": 0.0314, "step": 124680 }, { "epoch": 0.5202535237125618, "grad_norm": 0.699171924755081, "learning_rate": 2.7730560606136136e-06, "loss": 0.0261, "step": 124685 }, { "epoch": 0.5202743864275521, "grad_norm": 0.5598122742014279, "learning_rate": 2.7730004606035706e-06, "loss": 0.0232, "step": 124690 }, { "epoch": 0.5202952491425424, "grad_norm": 0.6686800143140262, "learning_rate": 2.772944863937749e-06, "loss": 0.02, "step": 124695 }, { "epoch": 0.5203161118575327, "grad_norm": 0.7143460282713266, "learning_rate": 2.7728892706158116e-06, "loss": 0.0236, "step": 124700 }, { "epoch": 0.5203369745725229, "grad_norm": 0.8628130209534585, "learning_rate": 2.772833680637425e-06, "loss": 0.0286, "step": 124705 }, { "epoch": 0.5203578372875133, "grad_norm": 0.9256956390281244, "learning_rate": 2.7727780940022543e-06, "loss": 0.0245, "step": 124710 }, { "epoch": 0.5203787000025035, "grad_norm": 0.6928914999306488, "learning_rate": 2.7727225107099625e-06, "loss": 0.0176, "step": 124715 }, { "epoch": 0.5203995627174938, "grad_norm": 0.8640206930609781, "learning_rate": 2.772666930760216e-06, "loss": 0.0213, "step": 124720 }, { "epoch": 0.5204204254324841, "grad_norm": 0.7132913854250793, "learning_rate": 2.77261135415268e-06, "loss": 0.0249, "step": 124725 }, { "epoch": 0.5204412881474744, "grad_norm": 0.5132693899302831, "learning_rate": 2.772555780887019e-06, "loss": 0.0206, "step": 124730 }, { "epoch": 0.5204621508624646, "grad_norm": 1.106381172322393, "learning_rate": 2.772500210962898e-06, "loss": 0.0287, "step": 124735 }, { "epoch": 0.520483013577455, "grad_norm": 0.7445564522557861, "learning_rate": 2.7724446443799824e-06, "loss": 0.0234, "step": 124740 }, { "epoch": 0.5205038762924452, "grad_norm": 0.9098350412010107, "learning_rate": 2.7723890811379368e-06, "loss": 0.017, "step": 124745 }, { "epoch": 0.5205247390074355, "grad_norm": 0.6978933237205679, "learning_rate": 2.7723335212364273e-06, "loss": 0.0201, "step": 124750 }, { "epoch": 0.5205456017224257, "grad_norm": 1.125586795970285, "learning_rate": 2.7722779646751185e-06, "loss": 0.0298, "step": 124755 }, { "epoch": 0.520566464437416, "grad_norm": 0.670836565241999, "learning_rate": 2.772222411453676e-06, "loss": 0.0186, "step": 124760 }, { "epoch": 0.5205873271524063, "grad_norm": 0.7359207787658634, "learning_rate": 2.772166861571766e-06, "loss": 0.0186, "step": 124765 }, { "epoch": 0.5206081898673965, "grad_norm": 0.5459179905588121, "learning_rate": 2.772111315029053e-06, "loss": 0.0242, "step": 124770 }, { "epoch": 0.5206290525823869, "grad_norm": 1.2479395805668207, "learning_rate": 2.772055771825202e-06, "loss": 0.0288, "step": 124775 }, { "epoch": 0.5206499152973771, "grad_norm": 0.5381959257798747, "learning_rate": 2.7720002319598793e-06, "loss": 0.0202, "step": 124780 }, { "epoch": 0.5206707780123674, "grad_norm": 0.5213509653077572, "learning_rate": 2.77194469543275e-06, "loss": 0.0262, "step": 124785 }, { "epoch": 0.5206916407273577, "grad_norm": 0.7748668754970274, "learning_rate": 2.771889162243481e-06, "loss": 0.0367, "step": 124790 }, { "epoch": 0.520712503442348, "grad_norm": 0.5276467190789325, "learning_rate": 2.771833632391736e-06, "loss": 0.0202, "step": 124795 }, { "epoch": 0.5207333661573382, "grad_norm": 0.7674316412324583, "learning_rate": 2.771778105877182e-06, "loss": 0.022, "step": 124800 }, { "epoch": 0.5207542288723286, "grad_norm": 0.7654425336358675, "learning_rate": 2.771722582699485e-06, "loss": 0.0332, "step": 124805 }, { "epoch": 0.5207750915873188, "grad_norm": 0.7992989693906372, "learning_rate": 2.7716670628583093e-06, "loss": 0.0201, "step": 124810 }, { "epoch": 0.5207959543023091, "grad_norm": 0.4543127596504437, "learning_rate": 2.771611546353322e-06, "loss": 0.0212, "step": 124815 }, { "epoch": 0.5208168170172993, "grad_norm": 0.6937257623629338, "learning_rate": 2.771556033184189e-06, "loss": 0.0217, "step": 124820 }, { "epoch": 0.5208376797322897, "grad_norm": 0.6292958434166769, "learning_rate": 2.7715005233505754e-06, "loss": 0.0251, "step": 124825 }, { "epoch": 0.5208585424472799, "grad_norm": 0.5695426609063132, "learning_rate": 2.7714450168521483e-06, "loss": 0.0265, "step": 124830 }, { "epoch": 0.5208794051622702, "grad_norm": 0.8723689742760177, "learning_rate": 2.771389513688573e-06, "loss": 0.0199, "step": 124835 }, { "epoch": 0.5209002678772605, "grad_norm": 0.530584551933629, "learning_rate": 2.7713340138595153e-06, "loss": 0.0211, "step": 124840 }, { "epoch": 0.5209211305922508, "grad_norm": 0.5092200664103873, "learning_rate": 2.7712785173646417e-06, "loss": 0.0229, "step": 124845 }, { "epoch": 0.520941993307241, "grad_norm": 0.7190966421489334, "learning_rate": 2.7712230242036186e-06, "loss": 0.0189, "step": 124850 }, { "epoch": 0.5209628560222314, "grad_norm": 0.8904492038348502, "learning_rate": 2.771167534376113e-06, "loss": 0.0283, "step": 124855 }, { "epoch": 0.5209837187372216, "grad_norm": 0.7966147872150322, "learning_rate": 2.7711120478817886e-06, "loss": 0.0234, "step": 124860 }, { "epoch": 0.5210045814522118, "grad_norm": 0.4522562726886109, "learning_rate": 2.7710565647203147e-06, "loss": 0.0227, "step": 124865 }, { "epoch": 0.5210254441672021, "grad_norm": 0.4514890571308642, "learning_rate": 2.7710010848913556e-06, "loss": 0.0237, "step": 124870 }, { "epoch": 0.5210463068821924, "grad_norm": 0.4939966645148889, "learning_rate": 2.7709456083945783e-06, "loss": 0.0208, "step": 124875 }, { "epoch": 0.5210671695971827, "grad_norm": 0.650553692950171, "learning_rate": 2.7708901352296496e-06, "loss": 0.0178, "step": 124880 }, { "epoch": 0.5210880323121729, "grad_norm": 0.5390225304228838, "learning_rate": 2.7708346653962357e-06, "loss": 0.0219, "step": 124885 }, { "epoch": 0.5211088950271633, "grad_norm": 0.4384799988364743, "learning_rate": 2.770779198894003e-06, "loss": 0.0184, "step": 124890 }, { "epoch": 0.5211297577421535, "grad_norm": 0.34731133383852536, "learning_rate": 2.7707237357226184e-06, "loss": 0.0217, "step": 124895 }, { "epoch": 0.5211506204571438, "grad_norm": 1.1613547878601802, "learning_rate": 2.770668275881749e-06, "loss": 0.0244, "step": 124900 }, { "epoch": 0.5211714831721341, "grad_norm": 0.4230577117231992, "learning_rate": 2.7706128193710604e-06, "loss": 0.0211, "step": 124905 }, { "epoch": 0.5211923458871244, "grad_norm": 0.45895717547054876, "learning_rate": 2.77055736619022e-06, "loss": 0.0217, "step": 124910 }, { "epoch": 0.5212132086021146, "grad_norm": 0.7653283949035843, "learning_rate": 2.7705019163388947e-06, "loss": 0.0347, "step": 124915 }, { "epoch": 0.521234071317105, "grad_norm": 0.7656554231458804, "learning_rate": 2.7704464698167503e-06, "loss": 0.0243, "step": 124920 }, { "epoch": 0.5212549340320952, "grad_norm": 0.5787290447600106, "learning_rate": 2.7703910266234552e-06, "loss": 0.0217, "step": 124925 }, { "epoch": 0.5212757967470855, "grad_norm": 0.6435672503856319, "learning_rate": 2.7703355867586752e-06, "loss": 0.0216, "step": 124930 }, { "epoch": 0.5212966594620757, "grad_norm": 1.0520223493551226, "learning_rate": 2.7702801502220782e-06, "loss": 0.0282, "step": 124935 }, { "epoch": 0.5213175221770661, "grad_norm": 0.7931131365667946, "learning_rate": 2.77022471701333e-06, "loss": 0.032, "step": 124940 }, { "epoch": 0.5213383848920563, "grad_norm": 0.906224483210409, "learning_rate": 2.7701692871320984e-06, "loss": 0.03, "step": 124945 }, { "epoch": 0.5213592476070465, "grad_norm": 0.9494685623744206, "learning_rate": 2.7701138605780507e-06, "loss": 0.0286, "step": 124950 }, { "epoch": 0.5213801103220369, "grad_norm": 1.2461679670239267, "learning_rate": 2.7700584373508536e-06, "loss": 0.0271, "step": 124955 }, { "epoch": 0.5214009730370271, "grad_norm": 0.9049884912662178, "learning_rate": 2.7700030174501747e-06, "loss": 0.025, "step": 124960 }, { "epoch": 0.5214218357520174, "grad_norm": 0.7931596564871032, "learning_rate": 2.769947600875681e-06, "loss": 0.0294, "step": 124965 }, { "epoch": 0.5214426984670077, "grad_norm": 0.6356693487906573, "learning_rate": 2.7698921876270395e-06, "loss": 0.0184, "step": 124970 }, { "epoch": 0.521463561181998, "grad_norm": 0.7297297529963762, "learning_rate": 2.7698367777039187e-06, "loss": 0.0299, "step": 124975 }, { "epoch": 0.5214844238969882, "grad_norm": 0.6094178682938729, "learning_rate": 2.7697813711059846e-06, "loss": 0.0347, "step": 124980 }, { "epoch": 0.5215052866119786, "grad_norm": 0.6358217818509686, "learning_rate": 2.769725967832905e-06, "loss": 0.0255, "step": 124985 }, { "epoch": 0.5215261493269688, "grad_norm": 0.4326521791236748, "learning_rate": 2.7696705678843477e-06, "loss": 0.0213, "step": 124990 }, { "epoch": 0.5215470120419591, "grad_norm": 0.6129730462377457, "learning_rate": 2.7696151712599805e-06, "loss": 0.0275, "step": 124995 }, { "epoch": 0.5215678747569493, "grad_norm": 0.3114567390415099, "learning_rate": 2.76955977795947e-06, "loss": 0.0146, "step": 125000 }, { "epoch": 0.5215887374719397, "grad_norm": 0.7972966140346347, "learning_rate": 2.769504387982485e-06, "loss": 0.0333, "step": 125005 }, { "epoch": 0.5216096001869299, "grad_norm": 1.2190260248697093, "learning_rate": 2.7694490013286923e-06, "loss": 0.0298, "step": 125010 }, { "epoch": 0.5216304629019202, "grad_norm": 0.5801466205846713, "learning_rate": 2.7693936179977596e-06, "loss": 0.0227, "step": 125015 }, { "epoch": 0.5216513256169105, "grad_norm": 1.1611636518371977, "learning_rate": 2.7693382379893557e-06, "loss": 0.0359, "step": 125020 }, { "epoch": 0.5216721883319008, "grad_norm": 0.47133462797006653, "learning_rate": 2.769282861303147e-06, "loss": 0.0265, "step": 125025 }, { "epoch": 0.521693051046891, "grad_norm": 0.6369881764541886, "learning_rate": 2.7692274879388026e-06, "loss": 0.0223, "step": 125030 }, { "epoch": 0.5217139137618814, "grad_norm": 1.1927229587412786, "learning_rate": 2.7691721178959894e-06, "loss": 0.0244, "step": 125035 }, { "epoch": 0.5217347764768716, "grad_norm": 0.7316083983568884, "learning_rate": 2.769116751174376e-06, "loss": 0.0257, "step": 125040 }, { "epoch": 0.5217556391918619, "grad_norm": 0.8903655486603426, "learning_rate": 2.76906138777363e-06, "loss": 0.0272, "step": 125045 }, { "epoch": 0.5217765019068521, "grad_norm": 0.3601872830949434, "learning_rate": 2.76900602769342e-06, "loss": 0.019, "step": 125050 }, { "epoch": 0.5217973646218425, "grad_norm": 0.6282727404330382, "learning_rate": 2.768950670933413e-06, "loss": 0.0174, "step": 125055 }, { "epoch": 0.5218182273368327, "grad_norm": 0.6975609808964233, "learning_rate": 2.7688953174932782e-06, "loss": 0.0224, "step": 125060 }, { "epoch": 0.5218390900518229, "grad_norm": 0.45177951484121553, "learning_rate": 2.7688399673726834e-06, "loss": 0.0292, "step": 125065 }, { "epoch": 0.5218599527668133, "grad_norm": 0.9265285888966468, "learning_rate": 2.7687846205712964e-06, "loss": 0.0295, "step": 125070 }, { "epoch": 0.5218808154818035, "grad_norm": 0.7166501312282009, "learning_rate": 2.7687292770887865e-06, "loss": 0.025, "step": 125075 }, { "epoch": 0.5219016781967938, "grad_norm": 0.5440185539199216, "learning_rate": 2.7686739369248212e-06, "loss": 0.022, "step": 125080 }, { "epoch": 0.5219225409117841, "grad_norm": 0.5894153334840527, "learning_rate": 2.7686186000790694e-06, "loss": 0.0228, "step": 125085 }, { "epoch": 0.5219434036267744, "grad_norm": 0.6338772302654919, "learning_rate": 2.7685632665511986e-06, "loss": 0.0258, "step": 125090 }, { "epoch": 0.5219642663417646, "grad_norm": 0.785450610677384, "learning_rate": 2.768507936340878e-06, "loss": 0.019, "step": 125095 }, { "epoch": 0.521985129056755, "grad_norm": 0.42569464722807393, "learning_rate": 2.7684526094477765e-06, "loss": 0.0212, "step": 125100 }, { "epoch": 0.5220059917717452, "grad_norm": 0.6632645497468969, "learning_rate": 2.768397285871561e-06, "loss": 0.0262, "step": 125105 }, { "epoch": 0.5220268544867355, "grad_norm": 0.9062761327449803, "learning_rate": 2.768341965611902e-06, "loss": 0.0279, "step": 125110 }, { "epoch": 0.5220477172017257, "grad_norm": 0.45248871930861834, "learning_rate": 2.7682866486684675e-06, "loss": 0.0212, "step": 125115 }, { "epoch": 0.5220685799167161, "grad_norm": 0.5517770696525721, "learning_rate": 2.7682313350409255e-06, "loss": 0.0266, "step": 125120 }, { "epoch": 0.5220894426317063, "grad_norm": 0.8806125760293746, "learning_rate": 2.768176024728946e-06, "loss": 0.0202, "step": 125125 }, { "epoch": 0.5221103053466966, "grad_norm": 1.0003059271717112, "learning_rate": 2.7681207177321963e-06, "loss": 0.0187, "step": 125130 }, { "epoch": 0.5221311680616869, "grad_norm": 0.7845492883250448, "learning_rate": 2.768065414050346e-06, "loss": 0.0275, "step": 125135 }, { "epoch": 0.5221520307766772, "grad_norm": 0.8211748509852316, "learning_rate": 2.7680101136830645e-06, "loss": 0.0198, "step": 125140 }, { "epoch": 0.5221728934916674, "grad_norm": 0.8120817411091672, "learning_rate": 2.7679548166300197e-06, "loss": 0.0249, "step": 125145 }, { "epoch": 0.5221937562066578, "grad_norm": 1.0586514806354927, "learning_rate": 2.7678995228908813e-06, "loss": 0.0274, "step": 125150 }, { "epoch": 0.522214618921648, "grad_norm": 0.9239601117487081, "learning_rate": 2.7678442324653183e-06, "loss": 0.0188, "step": 125155 }, { "epoch": 0.5222354816366382, "grad_norm": 0.5205574009740248, "learning_rate": 2.767788945352999e-06, "loss": 0.0221, "step": 125160 }, { "epoch": 0.5222563443516286, "grad_norm": 3.299850209130491, "learning_rate": 2.7677336615535934e-06, "loss": 0.03, "step": 125165 }, { "epoch": 0.5222772070666188, "grad_norm": 0.803785725008458, "learning_rate": 2.76767838106677e-06, "loss": 0.0294, "step": 125170 }, { "epoch": 0.5222980697816091, "grad_norm": 0.8593427721831225, "learning_rate": 2.767623103892198e-06, "loss": 0.0324, "step": 125175 }, { "epoch": 0.5223189324965993, "grad_norm": 0.5578836793254711, "learning_rate": 2.767567830029547e-06, "loss": 0.0211, "step": 125180 }, { "epoch": 0.5223397952115897, "grad_norm": 3.069434775399338, "learning_rate": 2.767512559478487e-06, "loss": 0.0376, "step": 125185 }, { "epoch": 0.5223606579265799, "grad_norm": 0.7467136418788511, "learning_rate": 2.7674572922386865e-06, "loss": 0.0265, "step": 125190 }, { "epoch": 0.5223815206415702, "grad_norm": 1.0288080430586701, "learning_rate": 2.7674020283098143e-06, "loss": 0.0284, "step": 125195 }, { "epoch": 0.5224023833565605, "grad_norm": 0.9610658123317946, "learning_rate": 2.7673467676915407e-06, "loss": 0.0211, "step": 125200 }, { "epoch": 0.5224232460715508, "grad_norm": 0.8343818120486544, "learning_rate": 2.7672915103835353e-06, "loss": 0.0241, "step": 125205 }, { "epoch": 0.522444108786541, "grad_norm": 0.5262477396051015, "learning_rate": 2.7672362563854667e-06, "loss": 0.0263, "step": 125210 }, { "epoch": 0.5224649715015314, "grad_norm": 0.359310284803592, "learning_rate": 2.7671810056970056e-06, "loss": 0.0201, "step": 125215 }, { "epoch": 0.5224858342165216, "grad_norm": 0.9274379559091815, "learning_rate": 2.76712575831782e-06, "loss": 0.0275, "step": 125220 }, { "epoch": 0.5225066969315119, "grad_norm": 0.5749843438290568, "learning_rate": 2.767070514247582e-06, "loss": 0.0276, "step": 125225 }, { "epoch": 0.5225275596465021, "grad_norm": 0.5545069645082407, "learning_rate": 2.7670152734859586e-06, "loss": 0.0248, "step": 125230 }, { "epoch": 0.5225484223614925, "grad_norm": 1.2121978087962355, "learning_rate": 2.7669600360326216e-06, "loss": 0.0228, "step": 125235 }, { "epoch": 0.5225692850764827, "grad_norm": 0.6548099580407223, "learning_rate": 2.7669048018872396e-06, "loss": 0.0295, "step": 125240 }, { "epoch": 0.522590147791473, "grad_norm": 0.838032799738539, "learning_rate": 2.7668495710494835e-06, "loss": 0.0258, "step": 125245 }, { "epoch": 0.5226110105064633, "grad_norm": 1.0532754592343434, "learning_rate": 2.7667943435190227e-06, "loss": 0.0191, "step": 125250 }, { "epoch": 0.5226318732214535, "grad_norm": 0.4037138334961872, "learning_rate": 2.7667391192955262e-06, "loss": 0.0186, "step": 125255 }, { "epoch": 0.5226527359364438, "grad_norm": 0.6850063303921219, "learning_rate": 2.766683898378665e-06, "loss": 0.0237, "step": 125260 }, { "epoch": 0.5226735986514341, "grad_norm": 0.27795509706885996, "learning_rate": 2.7666286807681096e-06, "loss": 0.0213, "step": 125265 }, { "epoch": 0.5226944613664244, "grad_norm": 0.8494205367532102, "learning_rate": 2.7665734664635295e-06, "loss": 0.0253, "step": 125270 }, { "epoch": 0.5227153240814146, "grad_norm": 0.5442921067222829, "learning_rate": 2.766518255464594e-06, "loss": 0.0241, "step": 125275 }, { "epoch": 0.522736186796405, "grad_norm": 0.39347845308316365, "learning_rate": 2.766463047770975e-06, "loss": 0.0184, "step": 125280 }, { "epoch": 0.5227570495113952, "grad_norm": 0.9683417330052064, "learning_rate": 2.766407843382341e-06, "loss": 0.0215, "step": 125285 }, { "epoch": 0.5227779122263855, "grad_norm": 0.6374916748315314, "learning_rate": 2.766352642298363e-06, "loss": 0.0267, "step": 125290 }, { "epoch": 0.5227987749413757, "grad_norm": 0.7520150305514278, "learning_rate": 2.7662974445187117e-06, "loss": 0.037, "step": 125295 }, { "epoch": 0.5228196376563661, "grad_norm": 0.4608794314907078, "learning_rate": 2.766242250043057e-06, "loss": 0.0186, "step": 125300 }, { "epoch": 0.5228405003713563, "grad_norm": 0.9487834730238283, "learning_rate": 2.766187058871069e-06, "loss": 0.0259, "step": 125305 }, { "epoch": 0.5228613630863466, "grad_norm": 1.192417969264242, "learning_rate": 2.766131871002419e-06, "loss": 0.0189, "step": 125310 }, { "epoch": 0.5228822258013369, "grad_norm": 0.5749408666764426, "learning_rate": 2.766076686436776e-06, "loss": 0.0213, "step": 125315 }, { "epoch": 0.5229030885163272, "grad_norm": 0.9718977912730755, "learning_rate": 2.7660215051738127e-06, "loss": 0.028, "step": 125320 }, { "epoch": 0.5229239512313174, "grad_norm": 0.3150579393627723, "learning_rate": 2.7659663272131976e-06, "loss": 0.0258, "step": 125325 }, { "epoch": 0.5229448139463078, "grad_norm": 0.6041474773432425, "learning_rate": 2.7659111525546027e-06, "loss": 0.0318, "step": 125330 }, { "epoch": 0.522965676661298, "grad_norm": 0.9438255843824317, "learning_rate": 2.7658559811976983e-06, "loss": 0.0315, "step": 125335 }, { "epoch": 0.5229865393762883, "grad_norm": 0.2681511984100552, "learning_rate": 2.7658008131421547e-06, "loss": 0.0203, "step": 125340 }, { "epoch": 0.5230074020912786, "grad_norm": 0.6734355007079718, "learning_rate": 2.7657456483876434e-06, "loss": 0.0185, "step": 125345 }, { "epoch": 0.5230282648062688, "grad_norm": 0.6560020679736731, "learning_rate": 2.765690486933835e-06, "loss": 0.0215, "step": 125350 }, { "epoch": 0.5230491275212591, "grad_norm": 0.5450626054464163, "learning_rate": 2.7656353287803995e-06, "loss": 0.0181, "step": 125355 }, { "epoch": 0.5230699902362493, "grad_norm": 0.7775098000615894, "learning_rate": 2.7655801739270088e-06, "loss": 0.0191, "step": 125360 }, { "epoch": 0.5230908529512397, "grad_norm": 0.35830960915862914, "learning_rate": 2.765525022373334e-06, "loss": 0.0159, "step": 125365 }, { "epoch": 0.5231117156662299, "grad_norm": 0.9199515882592179, "learning_rate": 2.765469874119045e-06, "loss": 0.0224, "step": 125370 }, { "epoch": 0.5231325783812202, "grad_norm": 0.659499838701224, "learning_rate": 2.7654147291638134e-06, "loss": 0.028, "step": 125375 }, { "epoch": 0.5231534410962105, "grad_norm": 1.1247894983302453, "learning_rate": 2.7653595875073103e-06, "loss": 0.033, "step": 125380 }, { "epoch": 0.5231743038112008, "grad_norm": 0.8054141103098081, "learning_rate": 2.7653044491492078e-06, "loss": 0.0267, "step": 125385 }, { "epoch": 0.523195166526191, "grad_norm": 1.1277321060429917, "learning_rate": 2.765249314089175e-06, "loss": 0.0291, "step": 125390 }, { "epoch": 0.5232160292411814, "grad_norm": 3.097313711731182, "learning_rate": 2.7651941823268853e-06, "loss": 0.0182, "step": 125395 }, { "epoch": 0.5232368919561716, "grad_norm": 0.4971346436582648, "learning_rate": 2.765139053862008e-06, "loss": 0.0278, "step": 125400 }, { "epoch": 0.5232577546711619, "grad_norm": 0.4271697392146593, "learning_rate": 2.765083928694216e-06, "loss": 0.0245, "step": 125405 }, { "epoch": 0.5232786173861521, "grad_norm": 0.7808669674418991, "learning_rate": 2.76502880682318e-06, "loss": 0.0251, "step": 125410 }, { "epoch": 0.5232994801011425, "grad_norm": 0.45960375890557364, "learning_rate": 2.7649736882485716e-06, "loss": 0.0201, "step": 125415 }, { "epoch": 0.5233203428161327, "grad_norm": 0.5477361866326335, "learning_rate": 2.7649185729700615e-06, "loss": 0.0272, "step": 125420 }, { "epoch": 0.523341205531123, "grad_norm": 0.8443839291562533, "learning_rate": 2.7648634609873228e-06, "loss": 0.0323, "step": 125425 }, { "epoch": 0.5233620682461133, "grad_norm": 0.4555756654961589, "learning_rate": 2.764808352300025e-06, "loss": 0.0157, "step": 125430 }, { "epoch": 0.5233829309611036, "grad_norm": 0.5530602846207603, "learning_rate": 2.7647532469078412e-06, "loss": 0.0215, "step": 125435 }, { "epoch": 0.5234037936760938, "grad_norm": 0.7298879318031091, "learning_rate": 2.764698144810443e-06, "loss": 0.0222, "step": 125440 }, { "epoch": 0.5234246563910842, "grad_norm": 0.4672967143158373, "learning_rate": 2.7646430460075007e-06, "loss": 0.0276, "step": 125445 }, { "epoch": 0.5234455191060744, "grad_norm": 0.5228699214404192, "learning_rate": 2.764587950498688e-06, "loss": 0.0234, "step": 125450 }, { "epoch": 0.5234663818210646, "grad_norm": 0.6684611492910412, "learning_rate": 2.7645328582836755e-06, "loss": 0.0266, "step": 125455 }, { "epoch": 0.523487244536055, "grad_norm": 0.7902296230711129, "learning_rate": 2.764477769362135e-06, "loss": 0.0203, "step": 125460 }, { "epoch": 0.5235081072510452, "grad_norm": 0.5791115826641856, "learning_rate": 2.7644226837337384e-06, "loss": 0.0276, "step": 125465 }, { "epoch": 0.5235289699660355, "grad_norm": 0.8152429389016801, "learning_rate": 2.764367601398158e-06, "loss": 0.0268, "step": 125470 }, { "epoch": 0.5235498326810257, "grad_norm": 0.4679215259590774, "learning_rate": 2.7643125223550655e-06, "loss": 0.0223, "step": 125475 }, { "epoch": 0.5235706953960161, "grad_norm": 0.6168098454956429, "learning_rate": 2.7642574466041335e-06, "loss": 0.0213, "step": 125480 }, { "epoch": 0.5235915581110063, "grad_norm": 1.0147947807419062, "learning_rate": 2.7642023741450324e-06, "loss": 0.0157, "step": 125485 }, { "epoch": 0.5236124208259966, "grad_norm": 0.7220643801927731, "learning_rate": 2.7641473049774366e-06, "loss": 0.0248, "step": 125490 }, { "epoch": 0.5236332835409869, "grad_norm": 0.670570311482318, "learning_rate": 2.7640922391010157e-06, "loss": 0.0315, "step": 125495 }, { "epoch": 0.5236541462559772, "grad_norm": 0.37547971642019934, "learning_rate": 2.7640371765154444e-06, "loss": 0.0182, "step": 125500 }, { "epoch": 0.5236750089709674, "grad_norm": 0.6207476297219431, "learning_rate": 2.763982117220393e-06, "loss": 0.0322, "step": 125505 }, { "epoch": 0.5236958716859578, "grad_norm": 0.709522773587765, "learning_rate": 2.763927061215535e-06, "loss": 0.0294, "step": 125510 }, { "epoch": 0.523716734400948, "grad_norm": 0.6034934995845702, "learning_rate": 2.7638720085005423e-06, "loss": 0.0256, "step": 125515 }, { "epoch": 0.5237375971159383, "grad_norm": 0.7019894101711998, "learning_rate": 2.763816959075087e-06, "loss": 0.0206, "step": 125520 }, { "epoch": 0.5237584598309285, "grad_norm": 0.5481173449786452, "learning_rate": 2.7637619129388416e-06, "loss": 0.0259, "step": 125525 }, { "epoch": 0.5237793225459189, "grad_norm": 0.7645913198116975, "learning_rate": 2.7637068700914788e-06, "loss": 0.0264, "step": 125530 }, { "epoch": 0.5238001852609091, "grad_norm": 0.6155320892155982, "learning_rate": 2.7636518305326714e-06, "loss": 0.0247, "step": 125535 }, { "epoch": 0.5238210479758993, "grad_norm": 0.6760784972461651, "learning_rate": 2.763596794262091e-06, "loss": 0.0177, "step": 125540 }, { "epoch": 0.5238419106908897, "grad_norm": 0.6426431344578515, "learning_rate": 2.7635417612794113e-06, "loss": 0.0267, "step": 125545 }, { "epoch": 0.52386277340588, "grad_norm": 0.6016547298798705, "learning_rate": 2.763486731584304e-06, "loss": 0.0202, "step": 125550 }, { "epoch": 0.5238836361208702, "grad_norm": 0.5989283818080086, "learning_rate": 2.7634317051764427e-06, "loss": 0.021, "step": 125555 }, { "epoch": 0.5239044988358605, "grad_norm": 0.4415508600482378, "learning_rate": 2.7633766820554993e-06, "loss": 0.017, "step": 125560 }, { "epoch": 0.5239253615508508, "grad_norm": 0.49708929361256304, "learning_rate": 2.7633216622211466e-06, "loss": 0.0228, "step": 125565 }, { "epoch": 0.523946224265841, "grad_norm": 1.036987929589003, "learning_rate": 2.763266645673058e-06, "loss": 0.0216, "step": 125570 }, { "epoch": 0.5239670869808314, "grad_norm": 0.5752456290095144, "learning_rate": 2.7632116324109066e-06, "loss": 0.0338, "step": 125575 }, { "epoch": 0.5239879496958216, "grad_norm": 0.5775886660204405, "learning_rate": 2.763156622434364e-06, "loss": 0.0314, "step": 125580 }, { "epoch": 0.5240088124108119, "grad_norm": 0.7590612067296433, "learning_rate": 2.7631016157431044e-06, "loss": 0.0227, "step": 125585 }, { "epoch": 0.5240296751258021, "grad_norm": 0.34398268668382126, "learning_rate": 2.7630466123368012e-06, "loss": 0.0268, "step": 125590 }, { "epoch": 0.5240505378407925, "grad_norm": 0.778730280237309, "learning_rate": 2.7629916122151257e-06, "loss": 0.0294, "step": 125595 }, { "epoch": 0.5240714005557827, "grad_norm": 0.7173493739315229, "learning_rate": 2.7629366153777528e-06, "loss": 0.0231, "step": 125600 }, { "epoch": 0.524092263270773, "grad_norm": 0.5640400133207719, "learning_rate": 2.7628816218243536e-06, "loss": 0.0162, "step": 125605 }, { "epoch": 0.5241131259857633, "grad_norm": 0.9222666052449561, "learning_rate": 2.762826631554603e-06, "loss": 0.0327, "step": 125610 }, { "epoch": 0.5241339887007536, "grad_norm": 0.6204710293520364, "learning_rate": 2.762771644568174e-06, "loss": 0.023, "step": 125615 }, { "epoch": 0.5241548514157438, "grad_norm": 0.7102336129350352, "learning_rate": 2.76271666086474e-06, "loss": 0.0181, "step": 125620 }, { "epoch": 0.5241757141307342, "grad_norm": 0.9891769688117471, "learning_rate": 2.7626616804439733e-06, "loss": 0.0257, "step": 125625 }, { "epoch": 0.5241965768457244, "grad_norm": 0.6905538306692381, "learning_rate": 2.762606703305548e-06, "loss": 0.022, "step": 125630 }, { "epoch": 0.5242174395607146, "grad_norm": 0.8594655077658071, "learning_rate": 2.7625517294491377e-06, "loss": 0.03, "step": 125635 }, { "epoch": 0.524238302275705, "grad_norm": 0.5373666276794782, "learning_rate": 2.7624967588744155e-06, "loss": 0.0321, "step": 125640 }, { "epoch": 0.5242591649906952, "grad_norm": 1.1998649124826626, "learning_rate": 2.7624417915810554e-06, "loss": 0.0249, "step": 125645 }, { "epoch": 0.5242800277056855, "grad_norm": 0.7923327715413889, "learning_rate": 2.7623868275687295e-06, "loss": 0.0313, "step": 125650 }, { "epoch": 0.5243008904206757, "grad_norm": 0.5441687494109675, "learning_rate": 2.7623318668371133e-06, "loss": 0.0239, "step": 125655 }, { "epoch": 0.5243217531356661, "grad_norm": 0.8644995813838775, "learning_rate": 2.7622769093858796e-06, "loss": 0.0277, "step": 125660 }, { "epoch": 0.5243426158506563, "grad_norm": 0.6609970334886982, "learning_rate": 2.7622219552147022e-06, "loss": 0.0195, "step": 125665 }, { "epoch": 0.5243634785656466, "grad_norm": 0.5685093116871487, "learning_rate": 2.7621670043232546e-06, "loss": 0.0191, "step": 125670 }, { "epoch": 0.5243843412806369, "grad_norm": 0.6175820329611655, "learning_rate": 2.7621120567112103e-06, "loss": 0.0332, "step": 125675 }, { "epoch": 0.5244052039956272, "grad_norm": 1.193562907424745, "learning_rate": 2.7620571123782443e-06, "loss": 0.0271, "step": 125680 }, { "epoch": 0.5244260667106174, "grad_norm": 0.964011620425682, "learning_rate": 2.762002171324029e-06, "loss": 0.03, "step": 125685 }, { "epoch": 0.5244469294256078, "grad_norm": 0.9255910187551551, "learning_rate": 2.7619472335482394e-06, "loss": 0.0292, "step": 125690 }, { "epoch": 0.524467792140598, "grad_norm": 0.7207615484511605, "learning_rate": 2.761892299050549e-06, "loss": 0.0183, "step": 125695 }, { "epoch": 0.5244886548555883, "grad_norm": 0.8762975505705061, "learning_rate": 2.761837367830633e-06, "loss": 0.023, "step": 125700 }, { "epoch": 0.5245095175705785, "grad_norm": 0.7028650250048517, "learning_rate": 2.761782439888163e-06, "loss": 0.0223, "step": 125705 }, { "epoch": 0.5245303802855689, "grad_norm": 0.4437842043042465, "learning_rate": 2.761727515222815e-06, "loss": 0.0201, "step": 125710 }, { "epoch": 0.5245512430005591, "grad_norm": 1.0340073386381974, "learning_rate": 2.7616725938342624e-06, "loss": 0.0214, "step": 125715 }, { "epoch": 0.5245721057155494, "grad_norm": 0.482225945525852, "learning_rate": 2.76161767572218e-06, "loss": 0.0203, "step": 125720 }, { "epoch": 0.5245929684305397, "grad_norm": 0.8297649482176584, "learning_rate": 2.7615627608862412e-06, "loss": 0.0296, "step": 125725 }, { "epoch": 0.52461383114553, "grad_norm": 1.2566953009767643, "learning_rate": 2.7615078493261214e-06, "loss": 0.0286, "step": 125730 }, { "epoch": 0.5246346938605202, "grad_norm": 0.7903303213881044, "learning_rate": 2.7614529410414936e-06, "loss": 0.0239, "step": 125735 }, { "epoch": 0.5246555565755106, "grad_norm": 0.8692189376671856, "learning_rate": 2.7613980360320335e-06, "loss": 0.036, "step": 125740 }, { "epoch": 0.5246764192905008, "grad_norm": 0.6043225675002385, "learning_rate": 2.761343134297414e-06, "loss": 0.0227, "step": 125745 }, { "epoch": 0.524697282005491, "grad_norm": 1.196849308005004, "learning_rate": 2.761288235837311e-06, "loss": 0.032, "step": 125750 }, { "epoch": 0.5247181447204814, "grad_norm": 0.505898616834441, "learning_rate": 2.7612333406513986e-06, "loss": 0.0233, "step": 125755 }, { "epoch": 0.5247390074354716, "grad_norm": 0.9779326979721629, "learning_rate": 2.7611784487393506e-06, "loss": 0.0335, "step": 125760 }, { "epoch": 0.5247598701504619, "grad_norm": 0.5709974343452611, "learning_rate": 2.761123560100843e-06, "loss": 0.0231, "step": 125765 }, { "epoch": 0.5247807328654521, "grad_norm": 0.7768753215308485, "learning_rate": 2.7610686747355492e-06, "loss": 0.0292, "step": 125770 }, { "epoch": 0.5248015955804425, "grad_norm": 0.46255667483985563, "learning_rate": 2.7610137926431445e-06, "loss": 0.0146, "step": 125775 }, { "epoch": 0.5248224582954327, "grad_norm": 0.5325613051711078, "learning_rate": 2.7609589138233027e-06, "loss": 0.0165, "step": 125780 }, { "epoch": 0.524843321010423, "grad_norm": 1.0192909322007035, "learning_rate": 2.7609040382757e-06, "loss": 0.033, "step": 125785 }, { "epoch": 0.5248641837254133, "grad_norm": 0.2051168156233188, "learning_rate": 2.7608491660000107e-06, "loss": 0.0168, "step": 125790 }, { "epoch": 0.5248850464404036, "grad_norm": 0.7374349039785476, "learning_rate": 2.760794296995909e-06, "loss": 0.0268, "step": 125795 }, { "epoch": 0.5249059091553938, "grad_norm": 0.3581495484179143, "learning_rate": 2.7607394312630703e-06, "loss": 0.0216, "step": 125800 }, { "epoch": 0.5249267718703842, "grad_norm": 0.7176571833995667, "learning_rate": 2.7606845688011704e-06, "loss": 0.0313, "step": 125805 }, { "epoch": 0.5249476345853744, "grad_norm": 0.8158646327793885, "learning_rate": 2.7606297096098827e-06, "loss": 0.0266, "step": 125810 }, { "epoch": 0.5249684973003647, "grad_norm": 0.659568976900112, "learning_rate": 2.760574853688884e-06, "loss": 0.0197, "step": 125815 }, { "epoch": 0.524989360015355, "grad_norm": 1.2035598364117397, "learning_rate": 2.7605200010378477e-06, "loss": 0.0252, "step": 125820 }, { "epoch": 0.5250102227303453, "grad_norm": 0.8467592416628733, "learning_rate": 2.7604651516564497e-06, "loss": 0.0226, "step": 125825 }, { "epoch": 0.5250310854453355, "grad_norm": 0.8887068909766223, "learning_rate": 2.7604103055443655e-06, "loss": 0.0246, "step": 125830 }, { "epoch": 0.5250519481603257, "grad_norm": 0.8423659987097983, "learning_rate": 2.7603554627012695e-06, "loss": 0.0429, "step": 125835 }, { "epoch": 0.5250728108753161, "grad_norm": 0.484470140537827, "learning_rate": 2.760300623126838e-06, "loss": 0.0195, "step": 125840 }, { "epoch": 0.5250936735903063, "grad_norm": 0.7895890101724753, "learning_rate": 2.7602457868207463e-06, "loss": 0.0196, "step": 125845 }, { "epoch": 0.5251145363052966, "grad_norm": 1.110348947283008, "learning_rate": 2.760190953782668e-06, "loss": 0.0262, "step": 125850 }, { "epoch": 0.5251353990202869, "grad_norm": 0.34026200675799084, "learning_rate": 2.7601361240122805e-06, "loss": 0.0181, "step": 125855 }, { "epoch": 0.5251562617352772, "grad_norm": 0.9623537540285825, "learning_rate": 2.7600812975092585e-06, "loss": 0.0216, "step": 125860 }, { "epoch": 0.5251771244502674, "grad_norm": 1.3268980383044413, "learning_rate": 2.7600264742732778e-06, "loss": 0.0303, "step": 125865 }, { "epoch": 0.5251979871652578, "grad_norm": 0.6261562025968541, "learning_rate": 2.7599716543040134e-06, "loss": 0.0179, "step": 125870 }, { "epoch": 0.525218849880248, "grad_norm": 0.5271416501726679, "learning_rate": 2.7599168376011414e-06, "loss": 0.0252, "step": 125875 }, { "epoch": 0.5252397125952383, "grad_norm": 0.8998891940873702, "learning_rate": 2.759862024164337e-06, "loss": 0.0206, "step": 125880 }, { "epoch": 0.5252605753102285, "grad_norm": 0.6686812576612935, "learning_rate": 2.759807213993276e-06, "loss": 0.0265, "step": 125885 }, { "epoch": 0.5252814380252189, "grad_norm": 0.5814577905686961, "learning_rate": 2.7597524070876345e-06, "loss": 0.0167, "step": 125890 }, { "epoch": 0.5253023007402091, "grad_norm": 0.5421094763192515, "learning_rate": 2.7596976034470885e-06, "loss": 0.0178, "step": 125895 }, { "epoch": 0.5253231634551994, "grad_norm": 0.5131120673266407, "learning_rate": 2.759642803071313e-06, "loss": 0.0179, "step": 125900 }, { "epoch": 0.5253440261701897, "grad_norm": 1.1998401180359284, "learning_rate": 2.7595880059599838e-06, "loss": 0.0268, "step": 125905 }, { "epoch": 0.52536488888518, "grad_norm": 0.6731290846086, "learning_rate": 2.7595332121127784e-06, "loss": 0.0234, "step": 125910 }, { "epoch": 0.5253857516001702, "grad_norm": 0.8888450507840876, "learning_rate": 2.75947842152937e-06, "loss": 0.0208, "step": 125915 }, { "epoch": 0.5254066143151606, "grad_norm": 0.9262868959746804, "learning_rate": 2.759423634209437e-06, "loss": 0.025, "step": 125920 }, { "epoch": 0.5254274770301508, "grad_norm": 0.9922392875632732, "learning_rate": 2.759368850152655e-06, "loss": 0.0288, "step": 125925 }, { "epoch": 0.525448339745141, "grad_norm": 0.5880868402199461, "learning_rate": 2.759314069358699e-06, "loss": 0.0281, "step": 125930 }, { "epoch": 0.5254692024601314, "grad_norm": 0.5519570546618888, "learning_rate": 2.759259291827246e-06, "loss": 0.0238, "step": 125935 }, { "epoch": 0.5254900651751216, "grad_norm": 0.4918228343409471, "learning_rate": 2.759204517557972e-06, "loss": 0.0206, "step": 125940 }, { "epoch": 0.5255109278901119, "grad_norm": 0.4976497384076822, "learning_rate": 2.7591497465505534e-06, "loss": 0.0205, "step": 125945 }, { "epoch": 0.5255317906051021, "grad_norm": 0.6751521192574166, "learning_rate": 2.7590949788046662e-06, "loss": 0.0285, "step": 125950 }, { "epoch": 0.5255526533200925, "grad_norm": 0.7238169194501606, "learning_rate": 2.7590402143199868e-06, "loss": 0.0168, "step": 125955 }, { "epoch": 0.5255735160350827, "grad_norm": 0.7665565219231103, "learning_rate": 2.758985453096192e-06, "loss": 0.0214, "step": 125960 }, { "epoch": 0.525594378750073, "grad_norm": 0.8284458890748132, "learning_rate": 2.7589306951329575e-06, "loss": 0.0293, "step": 125965 }, { "epoch": 0.5256152414650633, "grad_norm": 0.9516245784542589, "learning_rate": 2.75887594042996e-06, "loss": 0.0271, "step": 125970 }, { "epoch": 0.5256361041800536, "grad_norm": 1.1579362514684126, "learning_rate": 2.758821188986876e-06, "loss": 0.0195, "step": 125975 }, { "epoch": 0.5256569668950438, "grad_norm": 0.47111620392377984, "learning_rate": 2.758766440803382e-06, "loss": 0.0247, "step": 125980 }, { "epoch": 0.5256778296100342, "grad_norm": 0.50012143242262, "learning_rate": 2.7587116958791547e-06, "loss": 0.0206, "step": 125985 }, { "epoch": 0.5256986923250244, "grad_norm": 1.4077943392571415, "learning_rate": 2.7586569542138706e-06, "loss": 0.0338, "step": 125990 }, { "epoch": 0.5257195550400147, "grad_norm": 0.6023956531445783, "learning_rate": 2.758602215807207e-06, "loss": 0.0233, "step": 125995 }, { "epoch": 0.525740417755005, "grad_norm": 0.5817895172581334, "learning_rate": 2.75854748065884e-06, "loss": 0.0239, "step": 126000 }, { "epoch": 0.5257612804699953, "grad_norm": 1.029421293568088, "learning_rate": 2.758492748768446e-06, "loss": 0.0266, "step": 126005 }, { "epoch": 0.5257821431849855, "grad_norm": 0.3851250214799661, "learning_rate": 2.7584380201357023e-06, "loss": 0.0244, "step": 126010 }, { "epoch": 0.5258030058999758, "grad_norm": 0.7317080712171887, "learning_rate": 2.7583832947602856e-06, "loss": 0.0203, "step": 126015 }, { "epoch": 0.5258238686149661, "grad_norm": 0.5322284608406253, "learning_rate": 2.7583285726418733e-06, "loss": 0.0242, "step": 126020 }, { "epoch": 0.5258447313299563, "grad_norm": 0.9881208946681433, "learning_rate": 2.7582738537801417e-06, "loss": 0.0268, "step": 126025 }, { "epoch": 0.5258655940449466, "grad_norm": 1.1249976249722333, "learning_rate": 2.758219138174768e-06, "loss": 0.0188, "step": 126030 }, { "epoch": 0.525886456759937, "grad_norm": 0.7724026891689024, "learning_rate": 2.758164425825429e-06, "loss": 0.0235, "step": 126035 }, { "epoch": 0.5259073194749272, "grad_norm": 0.5893448051044594, "learning_rate": 2.7581097167318026e-06, "loss": 0.0221, "step": 126040 }, { "epoch": 0.5259281821899174, "grad_norm": 1.0710681267160687, "learning_rate": 2.7580550108935655e-06, "loss": 0.0225, "step": 126045 }, { "epoch": 0.5259490449049078, "grad_norm": 0.7166833875789147, "learning_rate": 2.758000308310394e-06, "loss": 0.0217, "step": 126050 }, { "epoch": 0.525969907619898, "grad_norm": 0.5376779485540021, "learning_rate": 2.7579456089819667e-06, "loss": 0.0156, "step": 126055 }, { "epoch": 0.5259907703348883, "grad_norm": 1.0824654494110653, "learning_rate": 2.7578909129079596e-06, "loss": 0.0262, "step": 126060 }, { "epoch": 0.5260116330498785, "grad_norm": 0.679211588902368, "learning_rate": 2.757836220088051e-06, "loss": 0.0318, "step": 126065 }, { "epoch": 0.5260324957648689, "grad_norm": 0.22543466662107622, "learning_rate": 2.7577815305219174e-06, "loss": 0.025, "step": 126070 }, { "epoch": 0.5260533584798591, "grad_norm": 0.6906547274385493, "learning_rate": 2.757726844209237e-06, "loss": 0.029, "step": 126075 }, { "epoch": 0.5260742211948494, "grad_norm": 0.8110240530291865, "learning_rate": 2.7576721611496872e-06, "loss": 0.0223, "step": 126080 }, { "epoch": 0.5260950839098397, "grad_norm": 0.7325337304873797, "learning_rate": 2.7576174813429444e-06, "loss": 0.0332, "step": 126085 }, { "epoch": 0.52611594662483, "grad_norm": 0.6589731106866202, "learning_rate": 2.7575628047886874e-06, "loss": 0.0254, "step": 126090 }, { "epoch": 0.5261368093398202, "grad_norm": 0.851016860966305, "learning_rate": 2.757508131486593e-06, "loss": 0.0282, "step": 126095 }, { "epoch": 0.5261576720548106, "grad_norm": 0.9023193663852727, "learning_rate": 2.757453461436339e-06, "loss": 0.0263, "step": 126100 }, { "epoch": 0.5261785347698008, "grad_norm": 0.22847683682628608, "learning_rate": 2.7573987946376033e-06, "loss": 0.0169, "step": 126105 }, { "epoch": 0.526199397484791, "grad_norm": 0.8429173682037742, "learning_rate": 2.757344131090064e-06, "loss": 0.0235, "step": 126110 }, { "epoch": 0.5262202601997814, "grad_norm": 0.525508677092622, "learning_rate": 2.757289470793397e-06, "loss": 0.0204, "step": 126115 }, { "epoch": 0.5262411229147717, "grad_norm": 0.6557740066042224, "learning_rate": 2.7572348137472826e-06, "loss": 0.0297, "step": 126120 }, { "epoch": 0.5262619856297619, "grad_norm": 0.45665538883561707, "learning_rate": 2.7571801599513965e-06, "loss": 0.0231, "step": 126125 }, { "epoch": 0.5262828483447521, "grad_norm": 0.6425115033720034, "learning_rate": 2.757125509405418e-06, "loss": 0.0256, "step": 126130 }, { "epoch": 0.5263037110597425, "grad_norm": 0.5765765530943102, "learning_rate": 2.7570708621090244e-06, "loss": 0.0305, "step": 126135 }, { "epoch": 0.5263245737747327, "grad_norm": 0.8586617402858351, "learning_rate": 2.757016218061894e-06, "loss": 0.0288, "step": 126140 }, { "epoch": 0.526345436489723, "grad_norm": 0.5778020710291046, "learning_rate": 2.756961577263704e-06, "loss": 0.0249, "step": 126145 }, { "epoch": 0.5263662992047133, "grad_norm": 2.320058103913857, "learning_rate": 2.756906939714134e-06, "loss": 0.0352, "step": 126150 }, { "epoch": 0.5263871619197036, "grad_norm": 0.559340264922227, "learning_rate": 2.7568523054128603e-06, "loss": 0.0314, "step": 126155 }, { "epoch": 0.5264080246346938, "grad_norm": 0.8801594461906274, "learning_rate": 2.7567976743595628e-06, "loss": 0.0251, "step": 126160 }, { "epoch": 0.5264288873496842, "grad_norm": 0.8364958896600287, "learning_rate": 2.756743046553918e-06, "loss": 0.0289, "step": 126165 }, { "epoch": 0.5264497500646744, "grad_norm": 0.33092485038526337, "learning_rate": 2.756688421995605e-06, "loss": 0.0208, "step": 126170 }, { "epoch": 0.5264706127796647, "grad_norm": 0.8098279789387526, "learning_rate": 2.756633800684303e-06, "loss": 0.0226, "step": 126175 }, { "epoch": 0.526491475494655, "grad_norm": 1.0389375499719034, "learning_rate": 2.756579182619688e-06, "loss": 0.0253, "step": 126180 }, { "epoch": 0.5265123382096453, "grad_norm": 0.434386238766175, "learning_rate": 2.7565245678014406e-06, "loss": 0.037, "step": 126185 }, { "epoch": 0.5265332009246355, "grad_norm": 1.019484025914621, "learning_rate": 2.7564699562292385e-06, "loss": 0.0282, "step": 126190 }, { "epoch": 0.5265540636396258, "grad_norm": 0.4192615120469993, "learning_rate": 2.7564153479027595e-06, "loss": 0.0306, "step": 126195 }, { "epoch": 0.5265749263546161, "grad_norm": 0.6650880196582601, "learning_rate": 2.756360742821683e-06, "loss": 0.0173, "step": 126200 }, { "epoch": 0.5265957890696064, "grad_norm": 0.7676881222044772, "learning_rate": 2.7563061409856868e-06, "loss": 0.0243, "step": 126205 }, { "epoch": 0.5266166517845966, "grad_norm": 1.0280635805322147, "learning_rate": 2.75625154239445e-06, "loss": 0.0192, "step": 126210 }, { "epoch": 0.526637514499587, "grad_norm": 0.8020633763920791, "learning_rate": 2.756196947047652e-06, "loss": 0.0235, "step": 126215 }, { "epoch": 0.5266583772145772, "grad_norm": 0.4701713308012733, "learning_rate": 2.7561423549449693e-06, "loss": 0.0258, "step": 126220 }, { "epoch": 0.5266792399295674, "grad_norm": 0.5025044017901295, "learning_rate": 2.756087766086083e-06, "loss": 0.0298, "step": 126225 }, { "epoch": 0.5267001026445578, "grad_norm": 1.9652315386639607, "learning_rate": 2.7560331804706704e-06, "loss": 0.0199, "step": 126230 }, { "epoch": 0.526720965359548, "grad_norm": 0.6373213621628477, "learning_rate": 2.755978598098411e-06, "loss": 0.0387, "step": 126235 }, { "epoch": 0.5267418280745383, "grad_norm": 0.31081328149512466, "learning_rate": 2.755924018968983e-06, "loss": 0.0235, "step": 126240 }, { "epoch": 0.5267626907895285, "grad_norm": 0.5197521882337593, "learning_rate": 2.7558694430820664e-06, "loss": 0.0216, "step": 126245 }, { "epoch": 0.5267835535045189, "grad_norm": 0.8070982652193588, "learning_rate": 2.7558148704373387e-06, "loss": 0.0296, "step": 126250 }, { "epoch": 0.5268044162195091, "grad_norm": 0.4997823853299136, "learning_rate": 2.7557603010344804e-06, "loss": 0.02, "step": 126255 }, { "epoch": 0.5268252789344994, "grad_norm": 1.2743874190894366, "learning_rate": 2.755705734873169e-06, "loss": 0.0305, "step": 126260 }, { "epoch": 0.5268461416494897, "grad_norm": 0.4894817987015197, "learning_rate": 2.755651171953085e-06, "loss": 0.0257, "step": 126265 }, { "epoch": 0.52686700436448, "grad_norm": 0.4307600784990841, "learning_rate": 2.7555966122739062e-06, "loss": 0.0216, "step": 126270 }, { "epoch": 0.5268878670794702, "grad_norm": 1.0593286612274566, "learning_rate": 2.7555420558353137e-06, "loss": 0.0237, "step": 126275 }, { "epoch": 0.5269087297944606, "grad_norm": 0.5346406566097133, "learning_rate": 2.7554875026369848e-06, "loss": 0.026, "step": 126280 }, { "epoch": 0.5269295925094508, "grad_norm": 0.9317898708180217, "learning_rate": 2.7554329526785996e-06, "loss": 0.0261, "step": 126285 }, { "epoch": 0.5269504552244411, "grad_norm": 0.526312823207292, "learning_rate": 2.7553784059598372e-06, "loss": 0.0331, "step": 126290 }, { "epoch": 0.5269713179394314, "grad_norm": 0.6177316830875332, "learning_rate": 2.7553238624803774e-06, "loss": 0.0204, "step": 126295 }, { "epoch": 0.5269921806544217, "grad_norm": 2.1130109837080204, "learning_rate": 2.7552693222398992e-06, "loss": 0.0291, "step": 126300 }, { "epoch": 0.5270130433694119, "grad_norm": 1.5523363951992402, "learning_rate": 2.7552147852380815e-06, "loss": 0.0216, "step": 126305 }, { "epoch": 0.5270339060844021, "grad_norm": 1.6760028807826044, "learning_rate": 2.755160251474605e-06, "loss": 0.0272, "step": 126310 }, { "epoch": 0.5270547687993925, "grad_norm": 0.712353665598104, "learning_rate": 2.7551057209491484e-06, "loss": 0.0207, "step": 126315 }, { "epoch": 0.5270756315143827, "grad_norm": 0.6662560206110847, "learning_rate": 2.7550511936613917e-06, "loss": 0.0238, "step": 126320 }, { "epoch": 0.527096494229373, "grad_norm": 0.9590156870318866, "learning_rate": 2.7549966696110144e-06, "loss": 0.0263, "step": 126325 }, { "epoch": 0.5271173569443633, "grad_norm": 0.6025661191643302, "learning_rate": 2.754942148797696e-06, "loss": 0.0228, "step": 126330 }, { "epoch": 0.5271382196593536, "grad_norm": 1.157719089825917, "learning_rate": 2.754887631221116e-06, "loss": 0.0243, "step": 126335 }, { "epoch": 0.5271590823743438, "grad_norm": 0.26676834766791513, "learning_rate": 2.7548331168809546e-06, "loss": 0.0231, "step": 126340 }, { "epoch": 0.5271799450893342, "grad_norm": 0.6078240218875639, "learning_rate": 2.754778605776891e-06, "loss": 0.0251, "step": 126345 }, { "epoch": 0.5272008078043244, "grad_norm": 0.41844523257606986, "learning_rate": 2.754724097908606e-06, "loss": 0.0228, "step": 126350 }, { "epoch": 0.5272216705193147, "grad_norm": 0.8736826277077552, "learning_rate": 2.754669593275779e-06, "loss": 0.0249, "step": 126355 }, { "epoch": 0.527242533234305, "grad_norm": 0.4462878278633186, "learning_rate": 2.75461509187809e-06, "loss": 0.0211, "step": 126360 }, { "epoch": 0.5272633959492953, "grad_norm": 0.6156912803959876, "learning_rate": 2.7545605937152185e-06, "loss": 0.0209, "step": 126365 }, { "epoch": 0.5272842586642855, "grad_norm": 1.3412399500545855, "learning_rate": 2.754506098786845e-06, "loss": 0.0299, "step": 126370 }, { "epoch": 0.5273051213792758, "grad_norm": 0.8043564832216097, "learning_rate": 2.7544516070926497e-06, "loss": 0.0203, "step": 126375 }, { "epoch": 0.5273259840942661, "grad_norm": 0.5340487590604595, "learning_rate": 2.754397118632312e-06, "loss": 0.027, "step": 126380 }, { "epoch": 0.5273468468092564, "grad_norm": 0.9348479224518483, "learning_rate": 2.754342633405513e-06, "loss": 0.0207, "step": 126385 }, { "epoch": 0.5273677095242466, "grad_norm": 0.7177505765148997, "learning_rate": 2.7542881514119324e-06, "loss": 0.0271, "step": 126390 }, { "epoch": 0.527388572239237, "grad_norm": 0.6233399454010714, "learning_rate": 2.75423367265125e-06, "loss": 0.0312, "step": 126395 }, { "epoch": 0.5274094349542272, "grad_norm": 0.6745475381673769, "learning_rate": 2.754179197123147e-06, "loss": 0.0233, "step": 126400 }, { "epoch": 0.5274302976692175, "grad_norm": 0.38243503944176405, "learning_rate": 2.7541247248273034e-06, "loss": 0.0217, "step": 126405 }, { "epoch": 0.5274511603842078, "grad_norm": 0.7238792493341547, "learning_rate": 2.7540702557633984e-06, "loss": 0.0333, "step": 126410 }, { "epoch": 0.527472023099198, "grad_norm": 0.4696615351221558, "learning_rate": 2.7540157899311147e-06, "loss": 0.0243, "step": 126415 }, { "epoch": 0.5274928858141883, "grad_norm": 0.9302568035539547, "learning_rate": 2.753961327330131e-06, "loss": 0.0326, "step": 126420 }, { "epoch": 0.5275137485291785, "grad_norm": 0.8844990077383803, "learning_rate": 2.7539068679601284e-06, "loss": 0.0318, "step": 126425 }, { "epoch": 0.5275346112441689, "grad_norm": 0.8726987977449385, "learning_rate": 2.753852411820787e-06, "loss": 0.0205, "step": 126430 }, { "epoch": 0.5275554739591591, "grad_norm": 0.8578604415388551, "learning_rate": 2.7537979589117884e-06, "loss": 0.028, "step": 126435 }, { "epoch": 0.5275763366741494, "grad_norm": 0.9729144710786332, "learning_rate": 2.7537435092328123e-06, "loss": 0.0165, "step": 126440 }, { "epoch": 0.5275971993891397, "grad_norm": 0.9805193751711551, "learning_rate": 2.75368906278354e-06, "loss": 0.0234, "step": 126445 }, { "epoch": 0.52761806210413, "grad_norm": 0.6137267481401405, "learning_rate": 2.7536346195636514e-06, "loss": 0.0198, "step": 126450 }, { "epoch": 0.5276389248191202, "grad_norm": 0.6495312819140294, "learning_rate": 2.753580179572828e-06, "loss": 0.0329, "step": 126455 }, { "epoch": 0.5276597875341106, "grad_norm": 0.5005363291396616, "learning_rate": 2.753525742810751e-06, "loss": 0.0279, "step": 126460 }, { "epoch": 0.5276806502491008, "grad_norm": 1.210953288607915, "learning_rate": 2.7534713092771e-06, "loss": 0.0238, "step": 126465 }, { "epoch": 0.5277015129640911, "grad_norm": 1.3174505912935583, "learning_rate": 2.7534168789715563e-06, "loss": 0.0232, "step": 126470 }, { "epoch": 0.5277223756790814, "grad_norm": 0.7456013257931273, "learning_rate": 2.753362451893802e-06, "loss": 0.023, "step": 126475 }, { "epoch": 0.5277432383940717, "grad_norm": 0.8154635135175656, "learning_rate": 2.7533080280435166e-06, "loss": 0.0268, "step": 126480 }, { "epoch": 0.5277641011090619, "grad_norm": 0.8025996494144036, "learning_rate": 2.7532536074203824e-06, "loss": 0.0222, "step": 126485 }, { "epoch": 0.5277849638240522, "grad_norm": 0.7113761617254571, "learning_rate": 2.7531991900240794e-06, "loss": 0.0212, "step": 126490 }, { "epoch": 0.5278058265390425, "grad_norm": 0.7581078719100165, "learning_rate": 2.753144775854289e-06, "loss": 0.023, "step": 126495 }, { "epoch": 0.5278266892540328, "grad_norm": 0.9889234340652497, "learning_rate": 2.7530903649106927e-06, "loss": 0.0221, "step": 126500 }, { "epoch": 0.527847551969023, "grad_norm": 0.6361345416102216, "learning_rate": 2.7530359571929715e-06, "loss": 0.0156, "step": 126505 }, { "epoch": 0.5278684146840134, "grad_norm": 0.9556559184729825, "learning_rate": 2.752981552700807e-06, "loss": 0.0266, "step": 126510 }, { "epoch": 0.5278892773990036, "grad_norm": 0.41777502630711333, "learning_rate": 2.7529271514338798e-06, "loss": 0.0194, "step": 126515 }, { "epoch": 0.5279101401139938, "grad_norm": 0.6010999383992971, "learning_rate": 2.7528727533918714e-06, "loss": 0.0179, "step": 126520 }, { "epoch": 0.5279310028289842, "grad_norm": 0.4560333973727261, "learning_rate": 2.7528183585744645e-06, "loss": 0.0271, "step": 126525 }, { "epoch": 0.5279518655439744, "grad_norm": 0.6873564755876711, "learning_rate": 2.752763966981339e-06, "loss": 0.0284, "step": 126530 }, { "epoch": 0.5279727282589647, "grad_norm": 0.3607484766300134, "learning_rate": 2.752709578612176e-06, "loss": 0.0261, "step": 126535 }, { "epoch": 0.527993590973955, "grad_norm": 0.7317007719342861, "learning_rate": 2.7526551934666583e-06, "loss": 0.0266, "step": 126540 }, { "epoch": 0.5280144536889453, "grad_norm": 1.1847906864162312, "learning_rate": 2.7526008115444675e-06, "loss": 0.0387, "step": 126545 }, { "epoch": 0.5280353164039355, "grad_norm": 2.1876044013652534, "learning_rate": 2.7525464328452844e-06, "loss": 0.0295, "step": 126550 }, { "epoch": 0.5280561791189258, "grad_norm": 0.5132105215074397, "learning_rate": 2.752492057368791e-06, "loss": 0.0212, "step": 126555 }, { "epoch": 0.5280770418339161, "grad_norm": 0.31093196767924797, "learning_rate": 2.7524376851146695e-06, "loss": 0.0336, "step": 126560 }, { "epoch": 0.5280979045489064, "grad_norm": 0.8967812386460197, "learning_rate": 2.7523833160826007e-06, "loss": 0.0267, "step": 126565 }, { "epoch": 0.5281187672638966, "grad_norm": 0.4453307897754116, "learning_rate": 2.7523289502722674e-06, "loss": 0.0232, "step": 126570 }, { "epoch": 0.528139629978887, "grad_norm": 0.27690692925024657, "learning_rate": 2.75227458768335e-06, "loss": 0.0221, "step": 126575 }, { "epoch": 0.5281604926938772, "grad_norm": 0.9235322788027691, "learning_rate": 2.7522202283155314e-06, "loss": 0.0289, "step": 126580 }, { "epoch": 0.5281813554088675, "grad_norm": 0.5721704256196259, "learning_rate": 2.7521658721684934e-06, "loss": 0.0256, "step": 126585 }, { "epoch": 0.5282022181238578, "grad_norm": 0.6039049684251068, "learning_rate": 2.752111519241918e-06, "loss": 0.0237, "step": 126590 }, { "epoch": 0.5282230808388481, "grad_norm": 1.0947530238737841, "learning_rate": 2.7520571695354877e-06, "loss": 0.0257, "step": 126595 }, { "epoch": 0.5282439435538383, "grad_norm": 0.5224255138841952, "learning_rate": 2.7520028230488834e-06, "loss": 0.0285, "step": 126600 }, { "epoch": 0.5282648062688285, "grad_norm": 0.7885009013573635, "learning_rate": 2.7519484797817873e-06, "loss": 0.0152, "step": 126605 }, { "epoch": 0.5282856689838189, "grad_norm": 0.9116563446629463, "learning_rate": 2.7518941397338828e-06, "loss": 0.0228, "step": 126610 }, { "epoch": 0.5283065316988091, "grad_norm": 0.7910181896183619, "learning_rate": 2.751839802904851e-06, "loss": 0.0187, "step": 126615 }, { "epoch": 0.5283273944137994, "grad_norm": 0.35109309771571756, "learning_rate": 2.751785469294374e-06, "loss": 0.0196, "step": 126620 }, { "epoch": 0.5283482571287897, "grad_norm": 0.47589587471218964, "learning_rate": 2.751731138902135e-06, "loss": 0.019, "step": 126625 }, { "epoch": 0.52836911984378, "grad_norm": 0.445471852840619, "learning_rate": 2.7516768117278157e-06, "loss": 0.0208, "step": 126630 }, { "epoch": 0.5283899825587702, "grad_norm": 1.3928259345904328, "learning_rate": 2.7516224877710977e-06, "loss": 0.0253, "step": 126635 }, { "epoch": 0.5284108452737606, "grad_norm": 1.277549173174916, "learning_rate": 2.7515681670316654e-06, "loss": 0.0222, "step": 126640 }, { "epoch": 0.5284317079887508, "grad_norm": 0.8778608387546349, "learning_rate": 2.7515138495091998e-06, "loss": 0.0287, "step": 126645 }, { "epoch": 0.5284525707037411, "grad_norm": 0.728057196972778, "learning_rate": 2.7514595352033833e-06, "loss": 0.0245, "step": 126650 }, { "epoch": 0.5284734334187314, "grad_norm": 0.7849791742974653, "learning_rate": 2.7514052241138996e-06, "loss": 0.0347, "step": 126655 }, { "epoch": 0.5284942961337217, "grad_norm": 0.343118001500242, "learning_rate": 2.7513509162404293e-06, "loss": 0.0208, "step": 126660 }, { "epoch": 0.5285151588487119, "grad_norm": 0.5978196946901522, "learning_rate": 2.7512966115826565e-06, "loss": 0.0218, "step": 126665 }, { "epoch": 0.5285360215637022, "grad_norm": 0.4892944787118525, "learning_rate": 2.751242310140264e-06, "loss": 0.0204, "step": 126670 }, { "epoch": 0.5285568842786925, "grad_norm": 0.8079084717319936, "learning_rate": 2.751188011912934e-06, "loss": 0.0286, "step": 126675 }, { "epoch": 0.5285777469936828, "grad_norm": 0.7115808969850179, "learning_rate": 2.75113371690035e-06, "loss": 0.0252, "step": 126680 }, { "epoch": 0.528598609708673, "grad_norm": 0.8708347441848465, "learning_rate": 2.7510794251021933e-06, "loss": 0.0289, "step": 126685 }, { "epoch": 0.5286194724236634, "grad_norm": 0.4845213179098802, "learning_rate": 2.751025136518147e-06, "loss": 0.0253, "step": 126690 }, { "epoch": 0.5286403351386536, "grad_norm": 0.4503689537508514, "learning_rate": 2.7509708511478956e-06, "loss": 0.017, "step": 126695 }, { "epoch": 0.5286611978536438, "grad_norm": 1.231315635982899, "learning_rate": 2.7509165689911206e-06, "loss": 0.0196, "step": 126700 }, { "epoch": 0.5286820605686342, "grad_norm": 1.0943619447285555, "learning_rate": 2.750862290047506e-06, "loss": 0.0222, "step": 126705 }, { "epoch": 0.5287029232836244, "grad_norm": 1.0224236404793028, "learning_rate": 2.7508080143167328e-06, "loss": 0.0279, "step": 126710 }, { "epoch": 0.5287237859986147, "grad_norm": 0.3690171724371088, "learning_rate": 2.7507537417984863e-06, "loss": 0.02, "step": 126715 }, { "epoch": 0.528744648713605, "grad_norm": 0.6182220684420637, "learning_rate": 2.7506994724924484e-06, "loss": 0.0259, "step": 126720 }, { "epoch": 0.5287655114285953, "grad_norm": 0.5145894787520714, "learning_rate": 2.750645206398303e-06, "loss": 0.0244, "step": 126725 }, { "epoch": 0.5287863741435855, "grad_norm": 0.48714709975632287, "learning_rate": 2.7505909435157324e-06, "loss": 0.0263, "step": 126730 }, { "epoch": 0.5288072368585758, "grad_norm": 0.5085365321506465, "learning_rate": 2.7505366838444202e-06, "loss": 0.0257, "step": 126735 }, { "epoch": 0.5288280995735661, "grad_norm": 0.6589748265138685, "learning_rate": 2.7504824273840503e-06, "loss": 0.0227, "step": 126740 }, { "epoch": 0.5288489622885564, "grad_norm": 0.49323256563288137, "learning_rate": 2.7504281741343054e-06, "loss": 0.0259, "step": 126745 }, { "epoch": 0.5288698250035466, "grad_norm": 0.5643015532116104, "learning_rate": 2.7503739240948683e-06, "loss": 0.0247, "step": 126750 }, { "epoch": 0.528890687718537, "grad_norm": 1.1730438593618004, "learning_rate": 2.750319677265424e-06, "loss": 0.0211, "step": 126755 }, { "epoch": 0.5289115504335272, "grad_norm": 0.6619170948077668, "learning_rate": 2.7502654336456547e-06, "loss": 0.0234, "step": 126760 }, { "epoch": 0.5289324131485175, "grad_norm": 0.39802413922993896, "learning_rate": 2.7502111932352437e-06, "loss": 0.023, "step": 126765 }, { "epoch": 0.5289532758635078, "grad_norm": 1.0090522048445092, "learning_rate": 2.7501569560338755e-06, "loss": 0.0247, "step": 126770 }, { "epoch": 0.5289741385784981, "grad_norm": 1.1206622733511924, "learning_rate": 2.7501027220412328e-06, "loss": 0.025, "step": 126775 }, { "epoch": 0.5289950012934883, "grad_norm": 1.4658754853717302, "learning_rate": 2.750048491257e-06, "loss": 0.0244, "step": 126780 }, { "epoch": 0.5290158640084786, "grad_norm": 0.6948255703589098, "learning_rate": 2.7499942636808607e-06, "loss": 0.0236, "step": 126785 }, { "epoch": 0.5290367267234689, "grad_norm": 0.7019331020023005, "learning_rate": 2.749940039312498e-06, "loss": 0.0188, "step": 126790 }, { "epoch": 0.5290575894384592, "grad_norm": 1.0312567602269924, "learning_rate": 2.7498858181515957e-06, "loss": 0.0257, "step": 126795 }, { "epoch": 0.5290784521534494, "grad_norm": 0.6302925361331271, "learning_rate": 2.7498316001978387e-06, "loss": 0.0219, "step": 126800 }, { "epoch": 0.5290993148684398, "grad_norm": 0.4417406080008031, "learning_rate": 2.749777385450909e-06, "loss": 0.016, "step": 126805 }, { "epoch": 0.52912017758343, "grad_norm": 0.9997615146766995, "learning_rate": 2.749723173910493e-06, "loss": 0.0317, "step": 126810 }, { "epoch": 0.5291410402984202, "grad_norm": 0.7075712294006982, "learning_rate": 2.749668965576272e-06, "loss": 0.0262, "step": 126815 }, { "epoch": 0.5291619030134106, "grad_norm": 0.4578510713613631, "learning_rate": 2.749614760447931e-06, "loss": 0.0192, "step": 126820 }, { "epoch": 0.5291827657284008, "grad_norm": 0.4917441953174433, "learning_rate": 2.749560558525155e-06, "loss": 0.0248, "step": 126825 }, { "epoch": 0.5292036284433911, "grad_norm": 0.6382860833477895, "learning_rate": 2.7495063598076273e-06, "loss": 0.023, "step": 126830 }, { "epoch": 0.5292244911583814, "grad_norm": 1.0827203342466616, "learning_rate": 2.749452164295031e-06, "loss": 0.0343, "step": 126835 }, { "epoch": 0.5292453538733717, "grad_norm": 0.8134660073018477, "learning_rate": 2.7493979719870516e-06, "loss": 0.0261, "step": 126840 }, { "epoch": 0.5292662165883619, "grad_norm": 0.7093564410031239, "learning_rate": 2.749343782883373e-06, "loss": 0.0167, "step": 126845 }, { "epoch": 0.5292870793033522, "grad_norm": 1.0240673158919826, "learning_rate": 2.7492895969836797e-06, "loss": 0.0285, "step": 126850 }, { "epoch": 0.5293079420183425, "grad_norm": 0.449188732367003, "learning_rate": 2.7492354142876547e-06, "loss": 0.0276, "step": 126855 }, { "epoch": 0.5293288047333328, "grad_norm": 0.7107208252784105, "learning_rate": 2.749181234794984e-06, "loss": 0.0218, "step": 126860 }, { "epoch": 0.529349667448323, "grad_norm": 0.539767758889349, "learning_rate": 2.74912705850535e-06, "loss": 0.0209, "step": 126865 }, { "epoch": 0.5293705301633134, "grad_norm": 0.7434585781413693, "learning_rate": 2.74907288541844e-06, "loss": 0.0191, "step": 126870 }, { "epoch": 0.5293913928783036, "grad_norm": 0.7099969322426631, "learning_rate": 2.7490187155339358e-06, "loss": 0.0227, "step": 126875 }, { "epoch": 0.5294122555932939, "grad_norm": 0.8088365620887824, "learning_rate": 2.748964548851523e-06, "loss": 0.0195, "step": 126880 }, { "epoch": 0.5294331183082842, "grad_norm": 0.7310647880919529, "learning_rate": 2.748910385370886e-06, "loss": 0.0294, "step": 126885 }, { "epoch": 0.5294539810232745, "grad_norm": 0.9752916455444236, "learning_rate": 2.7488562250917095e-06, "loss": 0.0273, "step": 126890 }, { "epoch": 0.5294748437382647, "grad_norm": 0.3062290019304916, "learning_rate": 2.748802068013678e-06, "loss": 0.026, "step": 126895 }, { "epoch": 0.529495706453255, "grad_norm": 0.7784453477809589, "learning_rate": 2.748747914136476e-06, "loss": 0.0219, "step": 126900 }, { "epoch": 0.5295165691682453, "grad_norm": 0.7252745604757398, "learning_rate": 2.748693763459788e-06, "loss": 0.0219, "step": 126905 }, { "epoch": 0.5295374318832355, "grad_norm": 0.487345988058253, "learning_rate": 2.7486396159833003e-06, "loss": 0.0243, "step": 126910 }, { "epoch": 0.5295582945982258, "grad_norm": 0.5554106892044942, "learning_rate": 2.748585471706696e-06, "loss": 0.0219, "step": 126915 }, { "epoch": 0.5295791573132161, "grad_norm": 0.5772410299231195, "learning_rate": 2.74853133062966e-06, "loss": 0.0197, "step": 126920 }, { "epoch": 0.5296000200282064, "grad_norm": 0.5490020388744091, "learning_rate": 2.7484771927518784e-06, "loss": 0.0225, "step": 126925 }, { "epoch": 0.5296208827431966, "grad_norm": 1.2035035867089947, "learning_rate": 2.748423058073035e-06, "loss": 0.0191, "step": 126930 }, { "epoch": 0.529641745458187, "grad_norm": 0.7947145414690308, "learning_rate": 2.7483689265928154e-06, "loss": 0.0295, "step": 126935 }, { "epoch": 0.5296626081731772, "grad_norm": 0.38150177628204957, "learning_rate": 2.7483147983109044e-06, "loss": 0.0212, "step": 126940 }, { "epoch": 0.5296834708881675, "grad_norm": 0.8660125555320587, "learning_rate": 2.7482606732269866e-06, "loss": 0.0253, "step": 126945 }, { "epoch": 0.5297043336031578, "grad_norm": 0.45148573977807277, "learning_rate": 2.7482065513407486e-06, "loss": 0.0245, "step": 126950 }, { "epoch": 0.5297251963181481, "grad_norm": 2.016224980039896, "learning_rate": 2.7481524326518736e-06, "loss": 0.0327, "step": 126955 }, { "epoch": 0.5297460590331383, "grad_norm": 0.7841744966616758, "learning_rate": 2.7480983171600483e-06, "loss": 0.024, "step": 126960 }, { "epoch": 0.5297669217481286, "grad_norm": 0.9065656085215811, "learning_rate": 2.7480442048649575e-06, "loss": 0.03, "step": 126965 }, { "epoch": 0.5297877844631189, "grad_norm": 0.3373959755682142, "learning_rate": 2.7479900957662857e-06, "loss": 0.0175, "step": 126970 }, { "epoch": 0.5298086471781092, "grad_norm": 0.9172418170617773, "learning_rate": 2.74793598986372e-06, "loss": 0.0199, "step": 126975 }, { "epoch": 0.5298295098930994, "grad_norm": 0.39371588694245324, "learning_rate": 2.7478818871569442e-06, "loss": 0.0231, "step": 126980 }, { "epoch": 0.5298503726080898, "grad_norm": 0.8623395733287145, "learning_rate": 2.747827787645643e-06, "loss": 0.0238, "step": 126985 }, { "epoch": 0.52987123532308, "grad_norm": 0.6400514343677022, "learning_rate": 2.7477736913295044e-06, "loss": 0.0258, "step": 126990 }, { "epoch": 0.5298920980380702, "grad_norm": 0.6896628235540319, "learning_rate": 2.7477195982082123e-06, "loss": 0.0171, "step": 126995 }, { "epoch": 0.5299129607530606, "grad_norm": 0.5012700707720023, "learning_rate": 2.747665508281452e-06, "loss": 0.0281, "step": 127000 }, { "epoch": 0.5299338234680508, "grad_norm": 0.7332796775323315, "learning_rate": 2.7476114215489104e-06, "loss": 0.0246, "step": 127005 }, { "epoch": 0.5299546861830411, "grad_norm": 0.802688854397893, "learning_rate": 2.747557338010271e-06, "loss": 0.0203, "step": 127010 }, { "epoch": 0.5299755488980314, "grad_norm": 0.46558547405134143, "learning_rate": 2.747503257665222e-06, "loss": 0.0172, "step": 127015 }, { "epoch": 0.5299964116130217, "grad_norm": 0.6169769449640146, "learning_rate": 2.7474491805134475e-06, "loss": 0.0269, "step": 127020 }, { "epoch": 0.5300172743280119, "grad_norm": 0.4391095965028616, "learning_rate": 2.7473951065546333e-06, "loss": 0.0273, "step": 127025 }, { "epoch": 0.5300381370430022, "grad_norm": 0.7309468541720636, "learning_rate": 2.7473410357884655e-06, "loss": 0.0238, "step": 127030 }, { "epoch": 0.5300589997579925, "grad_norm": 0.4097515732132095, "learning_rate": 2.74728696821463e-06, "loss": 0.0201, "step": 127035 }, { "epoch": 0.5300798624729828, "grad_norm": 0.1779329124407793, "learning_rate": 2.7472329038328127e-06, "loss": 0.0251, "step": 127040 }, { "epoch": 0.530100725187973, "grad_norm": 0.40896552324476926, "learning_rate": 2.7471788426427e-06, "loss": 0.0281, "step": 127045 }, { "epoch": 0.5301215879029634, "grad_norm": 0.5274364444499066, "learning_rate": 2.7471247846439767e-06, "loss": 0.026, "step": 127050 }, { "epoch": 0.5301424506179536, "grad_norm": 0.7060553018261775, "learning_rate": 2.7470707298363298e-06, "loss": 0.0214, "step": 127055 }, { "epoch": 0.5301633133329439, "grad_norm": 1.2320268661754574, "learning_rate": 2.7470166782194445e-06, "loss": 0.0396, "step": 127060 }, { "epoch": 0.5301841760479342, "grad_norm": 1.1189865047101704, "learning_rate": 2.746962629793008e-06, "loss": 0.0254, "step": 127065 }, { "epoch": 0.5302050387629245, "grad_norm": 0.9265869915435879, "learning_rate": 2.7469085845567058e-06, "loss": 0.0282, "step": 127070 }, { "epoch": 0.5302259014779147, "grad_norm": 0.8444613874493825, "learning_rate": 2.7468545425102242e-06, "loss": 0.0232, "step": 127075 }, { "epoch": 0.5302467641929051, "grad_norm": 0.4756117476783004, "learning_rate": 2.746800503653249e-06, "loss": 0.0209, "step": 127080 }, { "epoch": 0.5302676269078953, "grad_norm": 0.6289644415889136, "learning_rate": 2.7467464679854674e-06, "loss": 0.0173, "step": 127085 }, { "epoch": 0.5302884896228856, "grad_norm": 1.2050391222821037, "learning_rate": 2.7466924355065648e-06, "loss": 0.0235, "step": 127090 }, { "epoch": 0.5303093523378758, "grad_norm": 0.6618452663974618, "learning_rate": 2.7466384062162283e-06, "loss": 0.0273, "step": 127095 }, { "epoch": 0.5303302150528661, "grad_norm": 0.4878900267552618, "learning_rate": 2.746584380114144e-06, "loss": 0.0156, "step": 127100 }, { "epoch": 0.5303510777678564, "grad_norm": 0.4441321319475865, "learning_rate": 2.746530357199998e-06, "loss": 0.0449, "step": 127105 }, { "epoch": 0.5303719404828466, "grad_norm": 0.6135040691593702, "learning_rate": 2.7464763374734778e-06, "loss": 0.0153, "step": 127110 }, { "epoch": 0.530392803197837, "grad_norm": 0.705213380060245, "learning_rate": 2.746422320934268e-06, "loss": 0.0217, "step": 127115 }, { "epoch": 0.5304136659128272, "grad_norm": 0.38557535564355694, "learning_rate": 2.7463683075820573e-06, "loss": 0.0174, "step": 127120 }, { "epoch": 0.5304345286278175, "grad_norm": 0.5118384687571041, "learning_rate": 2.7463142974165314e-06, "loss": 0.0235, "step": 127125 }, { "epoch": 0.5304553913428078, "grad_norm": 1.3184106990014015, "learning_rate": 2.746260290437377e-06, "loss": 0.0275, "step": 127130 }, { "epoch": 0.5304762540577981, "grad_norm": 0.6394271478228324, "learning_rate": 2.7462062866442806e-06, "loss": 0.0211, "step": 127135 }, { "epoch": 0.5304971167727883, "grad_norm": 0.8563893794041828, "learning_rate": 2.7461522860369294e-06, "loss": 0.0253, "step": 127140 }, { "epoch": 0.5305179794877786, "grad_norm": 0.5362412744530695, "learning_rate": 2.74609828861501e-06, "loss": 0.0215, "step": 127145 }, { "epoch": 0.5305388422027689, "grad_norm": 0.6238567485767574, "learning_rate": 2.7460442943782095e-06, "loss": 0.0257, "step": 127150 }, { "epoch": 0.5305597049177592, "grad_norm": 1.0108481144916341, "learning_rate": 2.7459903033262137e-06, "loss": 0.0343, "step": 127155 }, { "epoch": 0.5305805676327494, "grad_norm": 1.6585349564827252, "learning_rate": 2.7459363154587102e-06, "loss": 0.0246, "step": 127160 }, { "epoch": 0.5306014303477398, "grad_norm": 0.6188665849645307, "learning_rate": 2.745882330775387e-06, "loss": 0.02, "step": 127165 }, { "epoch": 0.53062229306273, "grad_norm": 0.6658372297106027, "learning_rate": 2.74582834927593e-06, "loss": 0.0276, "step": 127170 }, { "epoch": 0.5306431557777203, "grad_norm": 0.9560320006201792, "learning_rate": 2.745774370960026e-06, "loss": 0.0231, "step": 127175 }, { "epoch": 0.5306640184927106, "grad_norm": 0.8362828552151443, "learning_rate": 2.7457203958273625e-06, "loss": 0.0229, "step": 127180 }, { "epoch": 0.5306848812077009, "grad_norm": 0.8929345861362852, "learning_rate": 2.745666423877627e-06, "loss": 0.0309, "step": 127185 }, { "epoch": 0.5307057439226911, "grad_norm": 0.6323041927953915, "learning_rate": 2.745612455110507e-06, "loss": 0.0209, "step": 127190 }, { "epoch": 0.5307266066376815, "grad_norm": 0.8887198134120512, "learning_rate": 2.7455584895256882e-06, "loss": 0.0236, "step": 127195 }, { "epoch": 0.5307474693526717, "grad_norm": 0.719001732912442, "learning_rate": 2.7455045271228586e-06, "loss": 0.0258, "step": 127200 }, { "epoch": 0.5307683320676619, "grad_norm": 0.4241875843335451, "learning_rate": 2.7454505679017057e-06, "loss": 0.0201, "step": 127205 }, { "epoch": 0.5307891947826522, "grad_norm": 0.6870697980426075, "learning_rate": 2.7453966118619175e-06, "loss": 0.0283, "step": 127210 }, { "epoch": 0.5308100574976425, "grad_norm": 1.0949020429574383, "learning_rate": 2.7453426590031802e-06, "loss": 0.0287, "step": 127215 }, { "epoch": 0.5308309202126328, "grad_norm": 0.482686035319794, "learning_rate": 2.7452887093251817e-06, "loss": 0.0295, "step": 127220 }, { "epoch": 0.530851782927623, "grad_norm": 1.2857213700662653, "learning_rate": 2.7452347628276097e-06, "loss": 0.0193, "step": 127225 }, { "epoch": 0.5308726456426134, "grad_norm": 0.6336286424938632, "learning_rate": 2.745180819510152e-06, "loss": 0.0242, "step": 127230 }, { "epoch": 0.5308935083576036, "grad_norm": 0.5179405260495149, "learning_rate": 2.7451268793724954e-06, "loss": 0.0203, "step": 127235 }, { "epoch": 0.5309143710725939, "grad_norm": 0.4903993172147199, "learning_rate": 2.7450729424143275e-06, "loss": 0.0336, "step": 127240 }, { "epoch": 0.5309352337875842, "grad_norm": 0.5190906398472404, "learning_rate": 2.745019008635337e-06, "loss": 0.0259, "step": 127245 }, { "epoch": 0.5309560965025745, "grad_norm": 0.6755832219876229, "learning_rate": 2.74496507803521e-06, "loss": 0.0249, "step": 127250 }, { "epoch": 0.5309769592175647, "grad_norm": 0.9398922440972515, "learning_rate": 2.744911150613636e-06, "loss": 0.0313, "step": 127255 }, { "epoch": 0.5309978219325551, "grad_norm": 1.8431770406713297, "learning_rate": 2.744857226370301e-06, "loss": 0.0191, "step": 127260 }, { "epoch": 0.5310186846475453, "grad_norm": 0.8123423374116164, "learning_rate": 2.7448033053048946e-06, "loss": 0.021, "step": 127265 }, { "epoch": 0.5310395473625356, "grad_norm": 0.8454729533216943, "learning_rate": 2.7447493874171034e-06, "loss": 0.0363, "step": 127270 }, { "epoch": 0.5310604100775258, "grad_norm": 0.6381912008595813, "learning_rate": 2.744695472706616e-06, "loss": 0.0256, "step": 127275 }, { "epoch": 0.5310812727925162, "grad_norm": 0.5208994681854412, "learning_rate": 2.74464156117312e-06, "loss": 0.0242, "step": 127280 }, { "epoch": 0.5311021355075064, "grad_norm": 0.5974599735561178, "learning_rate": 2.744587652816303e-06, "loss": 0.0208, "step": 127285 }, { "epoch": 0.5311229982224966, "grad_norm": 1.0801707661021362, "learning_rate": 2.744533747635854e-06, "loss": 0.0211, "step": 127290 }, { "epoch": 0.531143860937487, "grad_norm": 0.6181311161920714, "learning_rate": 2.7444798456314607e-06, "loss": 0.0251, "step": 127295 }, { "epoch": 0.5311647236524772, "grad_norm": 0.8621304419453052, "learning_rate": 2.7444259468028113e-06, "loss": 0.0208, "step": 127300 }, { "epoch": 0.5311855863674675, "grad_norm": 0.5768312679140821, "learning_rate": 2.7443720511495935e-06, "loss": 0.0269, "step": 127305 }, { "epoch": 0.5312064490824578, "grad_norm": 0.8260982019773506, "learning_rate": 2.7443181586714957e-06, "loss": 0.0265, "step": 127310 }, { "epoch": 0.5312273117974481, "grad_norm": 0.6765578044552356, "learning_rate": 2.7442642693682065e-06, "loss": 0.0337, "step": 127315 }, { "epoch": 0.5312481745124383, "grad_norm": 0.5460854828502869, "learning_rate": 2.744210383239414e-06, "loss": 0.0238, "step": 127320 }, { "epoch": 0.5312690372274286, "grad_norm": 0.5507192614604137, "learning_rate": 2.7441565002848065e-06, "loss": 0.0226, "step": 127325 }, { "epoch": 0.5312898999424189, "grad_norm": 0.8509654388133191, "learning_rate": 2.744102620504072e-06, "loss": 0.0261, "step": 127330 }, { "epoch": 0.5313107626574092, "grad_norm": 1.4768839299227732, "learning_rate": 2.7440487438969e-06, "loss": 0.0268, "step": 127335 }, { "epoch": 0.5313316253723994, "grad_norm": 0.748586737998723, "learning_rate": 2.7439948704629778e-06, "loss": 0.021, "step": 127340 }, { "epoch": 0.5313524880873898, "grad_norm": 1.114355279708344, "learning_rate": 2.7439410002019947e-06, "loss": 0.0238, "step": 127345 }, { "epoch": 0.53137335080238, "grad_norm": 0.7362407467852051, "learning_rate": 2.7438871331136387e-06, "loss": 0.0278, "step": 127350 }, { "epoch": 0.5313942135173703, "grad_norm": 0.44214934503124675, "learning_rate": 2.743833269197599e-06, "loss": 0.0215, "step": 127355 }, { "epoch": 0.5314150762323606, "grad_norm": 0.8749645637025, "learning_rate": 2.7437794084535634e-06, "loss": 0.0212, "step": 127360 }, { "epoch": 0.5314359389473509, "grad_norm": 0.7643743962776989, "learning_rate": 2.7437255508812215e-06, "loss": 0.0264, "step": 127365 }, { "epoch": 0.5314568016623411, "grad_norm": 0.7033261331657493, "learning_rate": 2.7436716964802617e-06, "loss": 0.0277, "step": 127370 }, { "epoch": 0.5314776643773315, "grad_norm": 0.6100489051179926, "learning_rate": 2.7436178452503725e-06, "loss": 0.0159, "step": 127375 }, { "epoch": 0.5314985270923217, "grad_norm": 0.9980041528161617, "learning_rate": 2.743563997191243e-06, "loss": 0.0246, "step": 127380 }, { "epoch": 0.531519389807312, "grad_norm": 0.8514894363106391, "learning_rate": 2.743510152302562e-06, "loss": 0.0359, "step": 127385 }, { "epoch": 0.5315402525223022, "grad_norm": 0.5059786427501092, "learning_rate": 2.7434563105840184e-06, "loss": 0.0179, "step": 127390 }, { "epoch": 0.5315611152372925, "grad_norm": 0.9821923469666495, "learning_rate": 2.743402472035301e-06, "loss": 0.0234, "step": 127395 }, { "epoch": 0.5315819779522828, "grad_norm": 0.5138993050610856, "learning_rate": 2.7433486366560985e-06, "loss": 0.0162, "step": 127400 }, { "epoch": 0.531602840667273, "grad_norm": 0.6238016579134367, "learning_rate": 2.743294804446101e-06, "loss": 0.0219, "step": 127405 }, { "epoch": 0.5316237033822634, "grad_norm": 0.5463724829445891, "learning_rate": 2.7432409754049965e-06, "loss": 0.0196, "step": 127410 }, { "epoch": 0.5316445660972536, "grad_norm": 0.5527238884753578, "learning_rate": 2.7431871495324747e-06, "loss": 0.0202, "step": 127415 }, { "epoch": 0.5316654288122439, "grad_norm": 0.561289691390879, "learning_rate": 2.743133326828224e-06, "loss": 0.0244, "step": 127420 }, { "epoch": 0.5316862915272342, "grad_norm": 0.8883924771099471, "learning_rate": 2.743079507291935e-06, "loss": 0.0256, "step": 127425 }, { "epoch": 0.5317071542422245, "grad_norm": 0.9675748426552081, "learning_rate": 2.7430256909232956e-06, "loss": 0.0291, "step": 127430 }, { "epoch": 0.5317280169572147, "grad_norm": 0.5720319443447736, "learning_rate": 2.7429718777219954e-06, "loss": 0.0206, "step": 127435 }, { "epoch": 0.5317488796722051, "grad_norm": 0.41138624426550924, "learning_rate": 2.742918067687724e-06, "loss": 0.0155, "step": 127440 }, { "epoch": 0.5317697423871953, "grad_norm": 0.8878102076566312, "learning_rate": 2.7428642608201705e-06, "loss": 0.0173, "step": 127445 }, { "epoch": 0.5317906051021856, "grad_norm": 0.467041343057184, "learning_rate": 2.7428104571190246e-06, "loss": 0.0221, "step": 127450 }, { "epoch": 0.5318114678171758, "grad_norm": 0.5116274162263401, "learning_rate": 2.742756656583976e-06, "loss": 0.021, "step": 127455 }, { "epoch": 0.5318323305321662, "grad_norm": 0.6900480215385865, "learning_rate": 2.742702859214713e-06, "loss": 0.0247, "step": 127460 }, { "epoch": 0.5318531932471564, "grad_norm": 0.541394171817081, "learning_rate": 2.7426490650109265e-06, "loss": 0.0184, "step": 127465 }, { "epoch": 0.5318740559621467, "grad_norm": 0.3498428255014916, "learning_rate": 2.742595273972306e-06, "loss": 0.0193, "step": 127470 }, { "epoch": 0.531894918677137, "grad_norm": 0.4663806398091839, "learning_rate": 2.74254148609854e-06, "loss": 0.0188, "step": 127475 }, { "epoch": 0.5319157813921273, "grad_norm": 0.4815757807501714, "learning_rate": 2.7424877013893185e-06, "loss": 0.0198, "step": 127480 }, { "epoch": 0.5319366441071175, "grad_norm": 0.5333773673644463, "learning_rate": 2.7424339198443323e-06, "loss": 0.0435, "step": 127485 }, { "epoch": 0.5319575068221079, "grad_norm": 0.6607477806655876, "learning_rate": 2.74238014146327e-06, "loss": 0.0282, "step": 127490 }, { "epoch": 0.5319783695370981, "grad_norm": 0.6207285774097501, "learning_rate": 2.742326366245822e-06, "loss": 0.0328, "step": 127495 }, { "epoch": 0.5319992322520883, "grad_norm": 0.7941147942649762, "learning_rate": 2.742272594191677e-06, "loss": 0.024, "step": 127500 }, { "epoch": 0.5320200949670786, "grad_norm": 0.4184538743795214, "learning_rate": 2.742218825300527e-06, "loss": 0.0206, "step": 127505 }, { "epoch": 0.5320409576820689, "grad_norm": 0.4988810503280004, "learning_rate": 2.7421650595720604e-06, "loss": 0.0241, "step": 127510 }, { "epoch": 0.5320618203970592, "grad_norm": 0.4084552992915288, "learning_rate": 2.742111297005967e-06, "loss": 0.0148, "step": 127515 }, { "epoch": 0.5320826831120494, "grad_norm": 0.6262689015675595, "learning_rate": 2.7420575376019376e-06, "loss": 0.0283, "step": 127520 }, { "epoch": 0.5321035458270398, "grad_norm": 0.6499081959515584, "learning_rate": 2.742003781359662e-06, "loss": 0.0209, "step": 127525 }, { "epoch": 0.53212440854203, "grad_norm": 0.6857366862758234, "learning_rate": 2.7419500282788303e-06, "loss": 0.02, "step": 127530 }, { "epoch": 0.5321452712570203, "grad_norm": 0.7429528546701325, "learning_rate": 2.7418962783591324e-06, "loss": 0.0227, "step": 127535 }, { "epoch": 0.5321661339720106, "grad_norm": 1.0793464009404565, "learning_rate": 2.7418425316002583e-06, "loss": 0.0323, "step": 127540 }, { "epoch": 0.5321869966870009, "grad_norm": 0.36640365537619346, "learning_rate": 2.7417887880018988e-06, "loss": 0.022, "step": 127545 }, { "epoch": 0.5322078594019911, "grad_norm": 0.7998675915280076, "learning_rate": 2.7417350475637437e-06, "loss": 0.0286, "step": 127550 }, { "epoch": 0.5322287221169815, "grad_norm": 0.6307535790067489, "learning_rate": 2.7416813102854845e-06, "loss": 0.0191, "step": 127555 }, { "epoch": 0.5322495848319717, "grad_norm": 0.6231333647935587, "learning_rate": 2.74162757616681e-06, "loss": 0.0228, "step": 127560 }, { "epoch": 0.532270447546962, "grad_norm": 0.9753420268024956, "learning_rate": 2.7415738452074104e-06, "loss": 0.0248, "step": 127565 }, { "epoch": 0.5322913102619522, "grad_norm": 0.4633331589685854, "learning_rate": 2.7415201174069777e-06, "loss": 0.0175, "step": 127570 }, { "epoch": 0.5323121729769426, "grad_norm": 0.8450468101165067, "learning_rate": 2.7414663927652015e-06, "loss": 0.0299, "step": 127575 }, { "epoch": 0.5323330356919328, "grad_norm": 0.6068203612124712, "learning_rate": 2.741412671281772e-06, "loss": 0.02, "step": 127580 }, { "epoch": 0.532353898406923, "grad_norm": 0.7050673247788831, "learning_rate": 2.7413589529563805e-06, "loss": 0.0179, "step": 127585 }, { "epoch": 0.5323747611219134, "grad_norm": 0.6882991004931822, "learning_rate": 2.7413052377887166e-06, "loss": 0.026, "step": 127590 }, { "epoch": 0.5323956238369036, "grad_norm": 0.9564586350380676, "learning_rate": 2.741251525778472e-06, "loss": 0.0213, "step": 127595 }, { "epoch": 0.5324164865518939, "grad_norm": 1.2739675476707424, "learning_rate": 2.741197816925337e-06, "loss": 0.0292, "step": 127600 }, { "epoch": 0.5324373492668842, "grad_norm": 0.8248878783973799, "learning_rate": 2.741144111229003e-06, "loss": 0.0265, "step": 127605 }, { "epoch": 0.5324582119818745, "grad_norm": 0.5268694088313317, "learning_rate": 2.7410904086891594e-06, "loss": 0.0214, "step": 127610 }, { "epoch": 0.5324790746968647, "grad_norm": 0.8653854491800009, "learning_rate": 2.7410367093054974e-06, "loss": 0.0268, "step": 127615 }, { "epoch": 0.5324999374118551, "grad_norm": 1.1616384448321122, "learning_rate": 2.7409830130777083e-06, "loss": 0.0215, "step": 127620 }, { "epoch": 0.5325208001268453, "grad_norm": 0.9774676809128063, "learning_rate": 2.740929320005483e-06, "loss": 0.0239, "step": 127625 }, { "epoch": 0.5325416628418356, "grad_norm": 3.0415599413072454, "learning_rate": 2.740875630088512e-06, "loss": 0.0364, "step": 127630 }, { "epoch": 0.5325625255568258, "grad_norm": 0.4584525644501911, "learning_rate": 2.740821943326487e-06, "loss": 0.0207, "step": 127635 }, { "epoch": 0.5325833882718162, "grad_norm": 0.7848008832339565, "learning_rate": 2.740768259719098e-06, "loss": 0.0183, "step": 127640 }, { "epoch": 0.5326042509868064, "grad_norm": 0.9801359749186274, "learning_rate": 2.7407145792660373e-06, "loss": 0.0189, "step": 127645 }, { "epoch": 0.5326251137017967, "grad_norm": 0.32869971516691776, "learning_rate": 2.740660901966995e-06, "loss": 0.0161, "step": 127650 }, { "epoch": 0.532645976416787, "grad_norm": 0.8590977252091979, "learning_rate": 2.7406072278216627e-06, "loss": 0.0256, "step": 127655 }, { "epoch": 0.5326668391317773, "grad_norm": 0.7167717885676426, "learning_rate": 2.7405535568297316e-06, "loss": 0.0208, "step": 127660 }, { "epoch": 0.5326877018467675, "grad_norm": 1.2508431314817097, "learning_rate": 2.7404998889908925e-06, "loss": 0.039, "step": 127665 }, { "epoch": 0.5327085645617579, "grad_norm": 1.2248436386286272, "learning_rate": 2.740446224304837e-06, "loss": 0.0241, "step": 127670 }, { "epoch": 0.5327294272767481, "grad_norm": 0.4838534293393417, "learning_rate": 2.740392562771257e-06, "loss": 0.0234, "step": 127675 }, { "epoch": 0.5327502899917383, "grad_norm": 1.0777785576536913, "learning_rate": 2.7403389043898426e-06, "loss": 0.0275, "step": 127680 }, { "epoch": 0.5327711527067286, "grad_norm": 0.3795983125128836, "learning_rate": 2.7402852491602866e-06, "loss": 0.0148, "step": 127685 }, { "epoch": 0.532792015421719, "grad_norm": 0.8074812447298452, "learning_rate": 2.7402315970822797e-06, "loss": 0.0249, "step": 127690 }, { "epoch": 0.5328128781367092, "grad_norm": 0.44473017837109236, "learning_rate": 2.7401779481555124e-06, "loss": 0.0192, "step": 127695 }, { "epoch": 0.5328337408516994, "grad_norm": 0.4570049551387461, "learning_rate": 2.740124302379679e-06, "loss": 0.0197, "step": 127700 }, { "epoch": 0.5328546035666898, "grad_norm": 0.521534880266272, "learning_rate": 2.740070659754468e-06, "loss": 0.03, "step": 127705 }, { "epoch": 0.53287546628168, "grad_norm": 0.567607326290956, "learning_rate": 2.740017020279573e-06, "loss": 0.0243, "step": 127710 }, { "epoch": 0.5328963289966703, "grad_norm": 0.6075880081812305, "learning_rate": 2.739963383954685e-06, "loss": 0.021, "step": 127715 }, { "epoch": 0.5329171917116606, "grad_norm": 0.7530267715892595, "learning_rate": 2.7399097507794954e-06, "loss": 0.0215, "step": 127720 }, { "epoch": 0.5329380544266509, "grad_norm": 0.6883771503974886, "learning_rate": 2.739856120753696e-06, "loss": 0.022, "step": 127725 }, { "epoch": 0.5329589171416411, "grad_norm": 1.1486100530761643, "learning_rate": 2.7398024938769794e-06, "loss": 0.0275, "step": 127730 }, { "epoch": 0.5329797798566315, "grad_norm": 0.4871376288268782, "learning_rate": 2.739748870149038e-06, "loss": 0.0203, "step": 127735 }, { "epoch": 0.5330006425716217, "grad_norm": 1.1759958657223635, "learning_rate": 2.7396952495695604e-06, "loss": 0.0281, "step": 127740 }, { "epoch": 0.533021505286612, "grad_norm": 1.3011139750700424, "learning_rate": 2.739641632138242e-06, "loss": 0.0208, "step": 127745 }, { "epoch": 0.5330423680016022, "grad_norm": 1.428039647714904, "learning_rate": 2.7395880178547736e-06, "loss": 0.0233, "step": 127750 }, { "epoch": 0.5330632307165926, "grad_norm": 0.5271532341269628, "learning_rate": 2.7395344067188462e-06, "loss": 0.0216, "step": 127755 }, { "epoch": 0.5330840934315828, "grad_norm": 0.8762680650812192, "learning_rate": 2.7394807987301536e-06, "loss": 0.0325, "step": 127760 }, { "epoch": 0.533104956146573, "grad_norm": 1.0932612158892794, "learning_rate": 2.7394271938883858e-06, "loss": 0.0243, "step": 127765 }, { "epoch": 0.5331258188615634, "grad_norm": 0.601661226566036, "learning_rate": 2.739373592193237e-06, "loss": 0.0186, "step": 127770 }, { "epoch": 0.5331466815765536, "grad_norm": 0.7031774948403754, "learning_rate": 2.7393199936443987e-06, "loss": 0.0243, "step": 127775 }, { "epoch": 0.5331675442915439, "grad_norm": 0.6617257784549508, "learning_rate": 2.739266398241562e-06, "loss": 0.0179, "step": 127780 }, { "epoch": 0.5331884070065342, "grad_norm": 0.5995722384003425, "learning_rate": 2.739212805984421e-06, "loss": 0.0213, "step": 127785 }, { "epoch": 0.5332092697215245, "grad_norm": 0.7744599381563639, "learning_rate": 2.739159216872666e-06, "loss": 0.0245, "step": 127790 }, { "epoch": 0.5332301324365147, "grad_norm": 0.8704299374006521, "learning_rate": 2.739105630905991e-06, "loss": 0.0201, "step": 127795 }, { "epoch": 0.5332509951515051, "grad_norm": 0.8026960400906961, "learning_rate": 2.739052048084088e-06, "loss": 0.0245, "step": 127800 }, { "epoch": 0.5332718578664953, "grad_norm": 0.6359561901368046, "learning_rate": 2.7389984684066477e-06, "loss": 0.0245, "step": 127805 }, { "epoch": 0.5332927205814856, "grad_norm": 0.6222114119191287, "learning_rate": 2.738944891873365e-06, "loss": 0.0225, "step": 127810 }, { "epoch": 0.5333135832964758, "grad_norm": 0.3333812467811508, "learning_rate": 2.7388913184839317e-06, "loss": 0.0172, "step": 127815 }, { "epoch": 0.5333344460114662, "grad_norm": 0.7328977530915264, "learning_rate": 2.7388377482380396e-06, "loss": 0.0227, "step": 127820 }, { "epoch": 0.5333553087264564, "grad_norm": 1.2742266314001234, "learning_rate": 2.7387841811353817e-06, "loss": 0.0207, "step": 127825 }, { "epoch": 0.5333761714414467, "grad_norm": 1.0752539057801014, "learning_rate": 2.7387306171756506e-06, "loss": 0.0205, "step": 127830 }, { "epoch": 0.533397034156437, "grad_norm": 0.4720435989871583, "learning_rate": 2.7386770563585396e-06, "loss": 0.0208, "step": 127835 }, { "epoch": 0.5334178968714273, "grad_norm": 0.5711523504030338, "learning_rate": 2.73862349868374e-06, "loss": 0.027, "step": 127840 }, { "epoch": 0.5334387595864175, "grad_norm": 1.179254199246392, "learning_rate": 2.738569944150946e-06, "loss": 0.0293, "step": 127845 }, { "epoch": 0.5334596223014079, "grad_norm": 0.7261195104974878, "learning_rate": 2.7385163927598494e-06, "loss": 0.0287, "step": 127850 }, { "epoch": 0.5334804850163981, "grad_norm": 0.7625783590075756, "learning_rate": 2.7384628445101442e-06, "loss": 0.0253, "step": 127855 }, { "epoch": 0.5335013477313884, "grad_norm": 0.8992652709461657, "learning_rate": 2.7384092994015223e-06, "loss": 0.0205, "step": 127860 }, { "epoch": 0.5335222104463786, "grad_norm": 0.5687868724458105, "learning_rate": 2.7383557574336765e-06, "loss": 0.0255, "step": 127865 }, { "epoch": 0.533543073161369, "grad_norm": 0.5312716355059461, "learning_rate": 2.7383022186063e-06, "loss": 0.019, "step": 127870 }, { "epoch": 0.5335639358763592, "grad_norm": 0.9647736051069088, "learning_rate": 2.7382486829190863e-06, "loss": 0.0325, "step": 127875 }, { "epoch": 0.5335847985913494, "grad_norm": 0.7178785468600245, "learning_rate": 2.738195150371728e-06, "loss": 0.0223, "step": 127880 }, { "epoch": 0.5336056613063398, "grad_norm": 0.7776729609621997, "learning_rate": 2.7381416209639183e-06, "loss": 0.0275, "step": 127885 }, { "epoch": 0.53362652402133, "grad_norm": 0.7102646220617258, "learning_rate": 2.7380880946953504e-06, "loss": 0.0254, "step": 127890 }, { "epoch": 0.5336473867363203, "grad_norm": 0.46587037540368226, "learning_rate": 2.738034571565717e-06, "loss": 0.021, "step": 127895 }, { "epoch": 0.5336682494513106, "grad_norm": 0.7873274473493419, "learning_rate": 2.7379810515747117e-06, "loss": 0.0269, "step": 127900 }, { "epoch": 0.5336891121663009, "grad_norm": 0.3492571222722277, "learning_rate": 2.7379275347220285e-06, "loss": 0.0231, "step": 127905 }, { "epoch": 0.5337099748812911, "grad_norm": 0.7924893412748628, "learning_rate": 2.737874021007359e-06, "loss": 0.0214, "step": 127910 }, { "epoch": 0.5337308375962815, "grad_norm": 0.5081537748595986, "learning_rate": 2.7378205104303977e-06, "loss": 0.0212, "step": 127915 }, { "epoch": 0.5337517003112717, "grad_norm": 0.6312827357170081, "learning_rate": 2.7377670029908386e-06, "loss": 0.0271, "step": 127920 }, { "epoch": 0.533772563026262, "grad_norm": 0.7461741619916322, "learning_rate": 2.7377134986883736e-06, "loss": 0.0206, "step": 127925 }, { "epoch": 0.5337934257412522, "grad_norm": 0.8756662694242549, "learning_rate": 2.737659997522697e-06, "loss": 0.0329, "step": 127930 }, { "epoch": 0.5338142884562426, "grad_norm": 1.6913621799193397, "learning_rate": 2.737606499493502e-06, "loss": 0.0362, "step": 127935 }, { "epoch": 0.5338351511712328, "grad_norm": 0.42671038075374274, "learning_rate": 2.7375530046004833e-06, "loss": 0.0196, "step": 127940 }, { "epoch": 0.5338560138862231, "grad_norm": 0.6875893577157224, "learning_rate": 2.737499512843333e-06, "loss": 0.0373, "step": 127945 }, { "epoch": 0.5338768766012134, "grad_norm": 0.4353370665646076, "learning_rate": 2.7374460242217447e-06, "loss": 0.0243, "step": 127950 }, { "epoch": 0.5338977393162037, "grad_norm": 0.7457795069252731, "learning_rate": 2.737392538735413e-06, "loss": 0.0251, "step": 127955 }, { "epoch": 0.5339186020311939, "grad_norm": 0.428385088673708, "learning_rate": 2.737339056384032e-06, "loss": 0.0194, "step": 127960 }, { "epoch": 0.5339394647461843, "grad_norm": 1.07659056678027, "learning_rate": 2.7372855771672942e-06, "loss": 0.0213, "step": 127965 }, { "epoch": 0.5339603274611745, "grad_norm": 0.6670747643257183, "learning_rate": 2.7372321010848936e-06, "loss": 0.0205, "step": 127970 }, { "epoch": 0.5339811901761647, "grad_norm": 1.4989058491431755, "learning_rate": 2.737178628136525e-06, "loss": 0.0333, "step": 127975 }, { "epoch": 0.534002052891155, "grad_norm": 1.0210428159924418, "learning_rate": 2.737125158321882e-06, "loss": 0.0217, "step": 127980 }, { "epoch": 0.5340229156061453, "grad_norm": 0.5539556370963489, "learning_rate": 2.737071691640657e-06, "loss": 0.0221, "step": 127985 }, { "epoch": 0.5340437783211356, "grad_norm": 0.7028931800274214, "learning_rate": 2.7370182280925462e-06, "loss": 0.0288, "step": 127990 }, { "epoch": 0.5340646410361258, "grad_norm": 0.6041656566319108, "learning_rate": 2.7369647676772422e-06, "loss": 0.0235, "step": 127995 }, { "epoch": 0.5340855037511162, "grad_norm": 1.1722929917693061, "learning_rate": 2.7369113103944393e-06, "loss": 0.0239, "step": 128000 }, { "epoch": 0.5341063664661064, "grad_norm": 1.1467012235843146, "learning_rate": 2.736857856243832e-06, "loss": 0.0288, "step": 128005 }, { "epoch": 0.5341272291810967, "grad_norm": 0.5132712124836671, "learning_rate": 2.7368044052251147e-06, "loss": 0.0199, "step": 128010 }, { "epoch": 0.534148091896087, "grad_norm": 0.4613836845757208, "learning_rate": 2.7367509573379806e-06, "loss": 0.0181, "step": 128015 }, { "epoch": 0.5341689546110773, "grad_norm": 0.4704016291347154, "learning_rate": 2.736697512582125e-06, "loss": 0.0182, "step": 128020 }, { "epoch": 0.5341898173260675, "grad_norm": 1.0772490530418268, "learning_rate": 2.7366440709572406e-06, "loss": 0.0264, "step": 128025 }, { "epoch": 0.5342106800410579, "grad_norm": 1.0153516387625585, "learning_rate": 2.736590632463023e-06, "loss": 0.0348, "step": 128030 }, { "epoch": 0.5342315427560481, "grad_norm": 0.6594895213527792, "learning_rate": 2.7365371970991665e-06, "loss": 0.0217, "step": 128035 }, { "epoch": 0.5342524054710384, "grad_norm": 0.6680215716336663, "learning_rate": 2.7364837648653652e-06, "loss": 0.0272, "step": 128040 }, { "epoch": 0.5342732681860286, "grad_norm": 0.7747357062683682, "learning_rate": 2.736430335761314e-06, "loss": 0.0196, "step": 128045 }, { "epoch": 0.534294130901019, "grad_norm": 0.7541375365916382, "learning_rate": 2.736376909786706e-06, "loss": 0.0301, "step": 128050 }, { "epoch": 0.5343149936160092, "grad_norm": 0.7515888858940746, "learning_rate": 2.736323486941237e-06, "loss": 0.0275, "step": 128055 }, { "epoch": 0.5343358563309994, "grad_norm": 0.5682249970821285, "learning_rate": 2.7362700672246013e-06, "loss": 0.0193, "step": 128060 }, { "epoch": 0.5343567190459898, "grad_norm": 0.4753884702022281, "learning_rate": 2.7362166506364934e-06, "loss": 0.0259, "step": 128065 }, { "epoch": 0.53437758176098, "grad_norm": 0.394022039818684, "learning_rate": 2.736163237176608e-06, "loss": 0.0188, "step": 128070 }, { "epoch": 0.5343984444759703, "grad_norm": 0.9375640906576591, "learning_rate": 2.7361098268446403e-06, "loss": 0.0251, "step": 128075 }, { "epoch": 0.5344193071909606, "grad_norm": 0.8201590700631343, "learning_rate": 2.736056419640283e-06, "loss": 0.0252, "step": 128080 }, { "epoch": 0.5344401699059509, "grad_norm": 0.8378242032743892, "learning_rate": 2.736003015563234e-06, "loss": 0.0203, "step": 128085 }, { "epoch": 0.5344610326209411, "grad_norm": 0.4705715218392802, "learning_rate": 2.7359496146131846e-06, "loss": 0.0286, "step": 128090 }, { "epoch": 0.5344818953359315, "grad_norm": 0.7144033053637535, "learning_rate": 2.735896216789833e-06, "loss": 0.0191, "step": 128095 }, { "epoch": 0.5345027580509217, "grad_norm": 1.456506850477467, "learning_rate": 2.7358428220928723e-06, "loss": 0.0293, "step": 128100 }, { "epoch": 0.534523620765912, "grad_norm": 0.3997482130068655, "learning_rate": 2.7357894305219973e-06, "loss": 0.0212, "step": 128105 }, { "epoch": 0.5345444834809022, "grad_norm": 0.5816607677982197, "learning_rate": 2.7357360420769034e-06, "loss": 0.0241, "step": 128110 }, { "epoch": 0.5345653461958926, "grad_norm": 0.5409950504842479, "learning_rate": 2.7356826567572857e-06, "loss": 0.0339, "step": 128115 }, { "epoch": 0.5345862089108828, "grad_norm": 0.6613565198030175, "learning_rate": 2.73562927456284e-06, "loss": 0.0236, "step": 128120 }, { "epoch": 0.5346070716258731, "grad_norm": 0.9106671199608816, "learning_rate": 2.7355758954932595e-06, "loss": 0.0226, "step": 128125 }, { "epoch": 0.5346279343408634, "grad_norm": 5.3513315108214545, "learning_rate": 2.7355225195482403e-06, "loss": 0.0266, "step": 128130 }, { "epoch": 0.5346487970558537, "grad_norm": 0.6652931544488904, "learning_rate": 2.7354691467274785e-06, "loss": 0.0261, "step": 128135 }, { "epoch": 0.5346696597708439, "grad_norm": 1.010884792581557, "learning_rate": 2.7354157770306677e-06, "loss": 0.0255, "step": 128140 }, { "epoch": 0.5346905224858343, "grad_norm": 0.4400872393821099, "learning_rate": 2.7353624104575044e-06, "loss": 0.0235, "step": 128145 }, { "epoch": 0.5347113852008245, "grad_norm": 0.45137433824587403, "learning_rate": 2.7353090470076836e-06, "loss": 0.0224, "step": 128150 }, { "epoch": 0.5347322479158148, "grad_norm": 0.8396532120330333, "learning_rate": 2.7352556866809004e-06, "loss": 0.0198, "step": 128155 }, { "epoch": 0.534753110630805, "grad_norm": 0.843288156352974, "learning_rate": 2.7352023294768503e-06, "loss": 0.0219, "step": 128160 }, { "epoch": 0.5347739733457954, "grad_norm": 0.7553924436073791, "learning_rate": 2.7351489753952287e-06, "loss": 0.0244, "step": 128165 }, { "epoch": 0.5347948360607856, "grad_norm": 0.6598087631667954, "learning_rate": 2.735095624435731e-06, "loss": 0.0231, "step": 128170 }, { "epoch": 0.5348156987757758, "grad_norm": 0.7798833371471685, "learning_rate": 2.735042276598053e-06, "loss": 0.0436, "step": 128175 }, { "epoch": 0.5348365614907662, "grad_norm": 1.0091737429066903, "learning_rate": 2.7349889318818907e-06, "loss": 0.0281, "step": 128180 }, { "epoch": 0.5348574242057564, "grad_norm": 1.0548237534797622, "learning_rate": 2.734935590286938e-06, "loss": 0.0343, "step": 128185 }, { "epoch": 0.5348782869207467, "grad_norm": 0.9563716730907444, "learning_rate": 2.734882251812892e-06, "loss": 0.0281, "step": 128190 }, { "epoch": 0.534899149635737, "grad_norm": 0.4929069601786586, "learning_rate": 2.734828916459449e-06, "loss": 0.0273, "step": 128195 }, { "epoch": 0.5349200123507273, "grad_norm": 0.43546490484911343, "learning_rate": 2.7347755842263022e-06, "loss": 0.0192, "step": 128200 }, { "epoch": 0.5349408750657175, "grad_norm": 0.5311240153141136, "learning_rate": 2.7347222551131503e-06, "loss": 0.0213, "step": 128205 }, { "epoch": 0.5349617377807079, "grad_norm": 0.6782605983994201, "learning_rate": 2.7346689291196867e-06, "loss": 0.0278, "step": 128210 }, { "epoch": 0.5349826004956981, "grad_norm": 0.5975286341586901, "learning_rate": 2.7346156062456088e-06, "loss": 0.0205, "step": 128215 }, { "epoch": 0.5350034632106884, "grad_norm": 0.48395912753920206, "learning_rate": 2.734562286490612e-06, "loss": 0.0333, "step": 128220 }, { "epoch": 0.5350243259256786, "grad_norm": 0.8679878675081233, "learning_rate": 2.7345089698543916e-06, "loss": 0.0308, "step": 128225 }, { "epoch": 0.535045188640669, "grad_norm": 0.731271976324746, "learning_rate": 2.7344556563366447e-06, "loss": 0.0162, "step": 128230 }, { "epoch": 0.5350660513556592, "grad_norm": 0.881030619390817, "learning_rate": 2.7344023459370666e-06, "loss": 0.0181, "step": 128235 }, { "epoch": 0.5350869140706495, "grad_norm": 1.0835946837625725, "learning_rate": 2.7343490386553532e-06, "loss": 0.0222, "step": 128240 }, { "epoch": 0.5351077767856398, "grad_norm": 0.8452359507888122, "learning_rate": 2.734295734491202e-06, "loss": 0.0254, "step": 128245 }, { "epoch": 0.53512863950063, "grad_norm": 0.6244294816366289, "learning_rate": 2.7342424334443074e-06, "loss": 0.0284, "step": 128250 }, { "epoch": 0.5351495022156203, "grad_norm": 0.47910543816986173, "learning_rate": 2.7341891355143655e-06, "loss": 0.0198, "step": 128255 }, { "epoch": 0.5351703649306107, "grad_norm": 0.2660245445192322, "learning_rate": 2.734135840701075e-06, "loss": 0.0172, "step": 128260 }, { "epoch": 0.5351912276456009, "grad_norm": 1.153606125223036, "learning_rate": 2.7340825490041288e-06, "loss": 0.0284, "step": 128265 }, { "epoch": 0.5352120903605911, "grad_norm": 0.6676624219479522, "learning_rate": 2.7340292604232254e-06, "loss": 0.02, "step": 128270 }, { "epoch": 0.5352329530755815, "grad_norm": 0.783143174145259, "learning_rate": 2.7339759749580606e-06, "loss": 0.0295, "step": 128275 }, { "epoch": 0.5352538157905717, "grad_norm": 0.29112834171105556, "learning_rate": 2.7339226926083314e-06, "loss": 0.0225, "step": 128280 }, { "epoch": 0.535274678505562, "grad_norm": 1.0201076035572065, "learning_rate": 2.7338694133737326e-06, "loss": 0.0203, "step": 128285 }, { "epoch": 0.5352955412205522, "grad_norm": 0.8329483352872483, "learning_rate": 2.7338161372539625e-06, "loss": 0.0215, "step": 128290 }, { "epoch": 0.5353164039355426, "grad_norm": 0.857706265507265, "learning_rate": 2.7337628642487164e-06, "loss": 0.0187, "step": 128295 }, { "epoch": 0.5353372666505328, "grad_norm": 0.7369384942841688, "learning_rate": 2.7337095943576915e-06, "loss": 0.0201, "step": 128300 }, { "epoch": 0.5353581293655231, "grad_norm": 0.5018299099103801, "learning_rate": 2.7336563275805838e-06, "loss": 0.0341, "step": 128305 }, { "epoch": 0.5353789920805134, "grad_norm": 0.43441326740948855, "learning_rate": 2.7336030639170904e-06, "loss": 0.0253, "step": 128310 }, { "epoch": 0.5353998547955037, "grad_norm": 0.5094346839167617, "learning_rate": 2.7335498033669077e-06, "loss": 0.0141, "step": 128315 }, { "epoch": 0.5354207175104939, "grad_norm": 1.2961485607243208, "learning_rate": 2.7334965459297324e-06, "loss": 0.0231, "step": 128320 }, { "epoch": 0.5354415802254843, "grad_norm": 0.32862405561616115, "learning_rate": 2.7334432916052618e-06, "loss": 0.0209, "step": 128325 }, { "epoch": 0.5354624429404745, "grad_norm": 1.1729574038018127, "learning_rate": 2.7333900403931925e-06, "loss": 0.0294, "step": 128330 }, { "epoch": 0.5354833056554648, "grad_norm": 0.4921689307828613, "learning_rate": 2.733336792293221e-06, "loss": 0.0223, "step": 128335 }, { "epoch": 0.535504168370455, "grad_norm": 1.0254359046263484, "learning_rate": 2.733283547305044e-06, "loss": 0.0285, "step": 128340 }, { "epoch": 0.5355250310854454, "grad_norm": 0.8226614276075451, "learning_rate": 2.7332303054283586e-06, "loss": 0.0287, "step": 128345 }, { "epoch": 0.5355458938004356, "grad_norm": 0.7134210758088084, "learning_rate": 2.7331770666628625e-06, "loss": 0.0256, "step": 128350 }, { "epoch": 0.5355667565154258, "grad_norm": 0.40937196311246377, "learning_rate": 2.733123831008252e-06, "loss": 0.0226, "step": 128355 }, { "epoch": 0.5355876192304162, "grad_norm": 0.5861972993576063, "learning_rate": 2.733070598464225e-06, "loss": 0.0171, "step": 128360 }, { "epoch": 0.5356084819454064, "grad_norm": 0.7090878962150323, "learning_rate": 2.733017369030477e-06, "loss": 0.0295, "step": 128365 }, { "epoch": 0.5356293446603967, "grad_norm": 0.4730675708716661, "learning_rate": 2.732964142706706e-06, "loss": 0.0274, "step": 128370 }, { "epoch": 0.535650207375387, "grad_norm": 0.7263790436331613, "learning_rate": 2.7329109194926094e-06, "loss": 0.0214, "step": 128375 }, { "epoch": 0.5356710700903773, "grad_norm": 0.4405529906427862, "learning_rate": 2.732857699387884e-06, "loss": 0.0303, "step": 128380 }, { "epoch": 0.5356919328053675, "grad_norm": 0.6820855217999843, "learning_rate": 2.7328044823922278e-06, "loss": 0.0186, "step": 128385 }, { "epoch": 0.5357127955203579, "grad_norm": 0.6989705925025127, "learning_rate": 2.732751268505337e-06, "loss": 0.0224, "step": 128390 }, { "epoch": 0.5357336582353481, "grad_norm": 0.509553558839891, "learning_rate": 2.7326980577269103e-06, "loss": 0.0302, "step": 128395 }, { "epoch": 0.5357545209503384, "grad_norm": 1.0519107409132107, "learning_rate": 2.7326448500566434e-06, "loss": 0.0305, "step": 128400 }, { "epoch": 0.5357753836653286, "grad_norm": 1.7701366396144882, "learning_rate": 2.7325916454942352e-06, "loss": 0.0456, "step": 128405 }, { "epoch": 0.535796246380319, "grad_norm": 0.5097601193480918, "learning_rate": 2.7325384440393826e-06, "loss": 0.0232, "step": 128410 }, { "epoch": 0.5358171090953092, "grad_norm": 0.4895573903943892, "learning_rate": 2.7324852456917822e-06, "loss": 0.0206, "step": 128415 }, { "epoch": 0.5358379718102995, "grad_norm": 0.9759116633353435, "learning_rate": 2.732432050451133e-06, "loss": 0.0249, "step": 128420 }, { "epoch": 0.5358588345252898, "grad_norm": 2.050725839683767, "learning_rate": 2.732378858317132e-06, "loss": 0.0263, "step": 128425 }, { "epoch": 0.5358796972402801, "grad_norm": 0.5382443269539253, "learning_rate": 2.7323256692894772e-06, "loss": 0.0185, "step": 128430 }, { "epoch": 0.5359005599552703, "grad_norm": 0.6504346514027126, "learning_rate": 2.7322724833678655e-06, "loss": 0.0272, "step": 128435 }, { "epoch": 0.5359214226702607, "grad_norm": 2.038040404777222, "learning_rate": 2.732219300551995e-06, "loss": 0.0203, "step": 128440 }, { "epoch": 0.5359422853852509, "grad_norm": 1.2378180449093368, "learning_rate": 2.7321661208415634e-06, "loss": 0.025, "step": 128445 }, { "epoch": 0.5359631481002411, "grad_norm": 0.44395540464335875, "learning_rate": 2.732112944236269e-06, "loss": 0.0198, "step": 128450 }, { "epoch": 0.5359840108152315, "grad_norm": 0.43198851066438704, "learning_rate": 2.7320597707358087e-06, "loss": 0.023, "step": 128455 }, { "epoch": 0.5360048735302217, "grad_norm": 0.810614038852759, "learning_rate": 2.7320066003398805e-06, "loss": 0.03, "step": 128460 }, { "epoch": 0.536025736245212, "grad_norm": 2.356022565525444, "learning_rate": 2.7319534330481833e-06, "loss": 0.0325, "step": 128465 }, { "epoch": 0.5360465989602022, "grad_norm": 0.8342641753071083, "learning_rate": 2.731900268860414e-06, "loss": 0.0297, "step": 128470 }, { "epoch": 0.5360674616751926, "grad_norm": 0.4622744455214261, "learning_rate": 2.7318471077762715e-06, "loss": 0.027, "step": 128475 }, { "epoch": 0.5360883243901828, "grad_norm": 0.27443749513663557, "learning_rate": 2.731793949795453e-06, "loss": 0.0274, "step": 128480 }, { "epoch": 0.5361091871051731, "grad_norm": 0.5196571717032269, "learning_rate": 2.7317407949176573e-06, "loss": 0.0192, "step": 128485 }, { "epoch": 0.5361300498201634, "grad_norm": 1.0599839256204573, "learning_rate": 2.731687643142582e-06, "loss": 0.0317, "step": 128490 }, { "epoch": 0.5361509125351537, "grad_norm": 0.8324814086404297, "learning_rate": 2.731634494469925e-06, "loss": 0.023, "step": 128495 }, { "epoch": 0.5361717752501439, "grad_norm": 0.7163367427232574, "learning_rate": 2.7315813488993854e-06, "loss": 0.0201, "step": 128500 }, { "epoch": 0.5361926379651343, "grad_norm": 0.5135605113293955, "learning_rate": 2.7315282064306613e-06, "loss": 0.0268, "step": 128505 }, { "epoch": 0.5362135006801245, "grad_norm": 0.5317170636466484, "learning_rate": 2.73147506706345e-06, "loss": 0.0193, "step": 128510 }, { "epoch": 0.5362343633951148, "grad_norm": 1.2708409995499446, "learning_rate": 2.7314219307974505e-06, "loss": 0.0212, "step": 128515 }, { "epoch": 0.536255226110105, "grad_norm": 0.47306778963559526, "learning_rate": 2.7313687976323618e-06, "loss": 0.0247, "step": 128520 }, { "epoch": 0.5362760888250954, "grad_norm": 0.6718107680524055, "learning_rate": 2.7313156675678807e-06, "loss": 0.022, "step": 128525 }, { "epoch": 0.5362969515400856, "grad_norm": 0.35571879067675544, "learning_rate": 2.7312625406037067e-06, "loss": 0.0303, "step": 128530 }, { "epoch": 0.5363178142550759, "grad_norm": 0.9668930320111891, "learning_rate": 2.7312094167395386e-06, "loss": 0.0219, "step": 128535 }, { "epoch": 0.5363386769700662, "grad_norm": 0.8316394099724361, "learning_rate": 2.731156295975075e-06, "loss": 0.0252, "step": 128540 }, { "epoch": 0.5363595396850565, "grad_norm": 0.4778692378679006, "learning_rate": 2.7311031783100135e-06, "loss": 0.0214, "step": 128545 }, { "epoch": 0.5363804024000467, "grad_norm": 0.43768495025173065, "learning_rate": 2.731050063744053e-06, "loss": 0.0153, "step": 128550 }, { "epoch": 0.536401265115037, "grad_norm": 0.9232097006674322, "learning_rate": 2.730996952276893e-06, "loss": 0.0211, "step": 128555 }, { "epoch": 0.5364221278300273, "grad_norm": 0.79642931820807, "learning_rate": 2.730943843908231e-06, "loss": 0.0251, "step": 128560 }, { "epoch": 0.5364429905450175, "grad_norm": 0.487178130738801, "learning_rate": 2.7308907386377663e-06, "loss": 0.0195, "step": 128565 }, { "epoch": 0.5364638532600079, "grad_norm": 0.44728470938272386, "learning_rate": 2.730837636465198e-06, "loss": 0.0249, "step": 128570 }, { "epoch": 0.5364847159749981, "grad_norm": 0.7125984675308045, "learning_rate": 2.730784537390224e-06, "loss": 0.019, "step": 128575 }, { "epoch": 0.5365055786899884, "grad_norm": 0.42557777176910117, "learning_rate": 2.730731441412545e-06, "loss": 0.0157, "step": 128580 }, { "epoch": 0.5365264414049786, "grad_norm": 0.6650508212800785, "learning_rate": 2.7306783485318573e-06, "loss": 0.0288, "step": 128585 }, { "epoch": 0.536547304119969, "grad_norm": 0.37189242174124304, "learning_rate": 2.730625258747862e-06, "loss": 0.0184, "step": 128590 }, { "epoch": 0.5365681668349592, "grad_norm": 0.6737014860759836, "learning_rate": 2.7305721720602567e-06, "loss": 0.0278, "step": 128595 }, { "epoch": 0.5365890295499495, "grad_norm": 1.4190853907317822, "learning_rate": 2.7305190884687414e-06, "loss": 0.0326, "step": 128600 }, { "epoch": 0.5366098922649398, "grad_norm": 0.8996141942312538, "learning_rate": 2.730466007973015e-06, "loss": 0.0165, "step": 128605 }, { "epoch": 0.5366307549799301, "grad_norm": 0.6514084909028018, "learning_rate": 2.7304129305727762e-06, "loss": 0.0235, "step": 128610 }, { "epoch": 0.5366516176949203, "grad_norm": 0.39943032500124437, "learning_rate": 2.730359856267724e-06, "loss": 0.0218, "step": 128615 }, { "epoch": 0.5366724804099107, "grad_norm": 0.72307419712045, "learning_rate": 2.7303067850575575e-06, "loss": 0.0243, "step": 128620 }, { "epoch": 0.5366933431249009, "grad_norm": 0.4205892407841625, "learning_rate": 2.7302537169419775e-06, "loss": 0.0233, "step": 128625 }, { "epoch": 0.5367142058398912, "grad_norm": 1.0000688425796715, "learning_rate": 2.7302006519206813e-06, "loss": 0.0226, "step": 128630 }, { "epoch": 0.5367350685548815, "grad_norm": 1.0202441762314005, "learning_rate": 2.7301475899933694e-06, "loss": 0.0161, "step": 128635 }, { "epoch": 0.5367559312698718, "grad_norm": 0.5325282039093918, "learning_rate": 2.730094531159741e-06, "loss": 0.0235, "step": 128640 }, { "epoch": 0.536776793984862, "grad_norm": 0.699624464275178, "learning_rate": 2.7300414754194942e-06, "loss": 0.0235, "step": 128645 }, { "epoch": 0.5367976566998522, "grad_norm": 0.37807656529065964, "learning_rate": 2.7299884227723304e-06, "loss": 0.0216, "step": 128650 }, { "epoch": 0.5368185194148426, "grad_norm": 0.5000989725978544, "learning_rate": 2.7299353732179477e-06, "loss": 0.023, "step": 128655 }, { "epoch": 0.5368393821298328, "grad_norm": 0.5636600009981806, "learning_rate": 2.729882326756047e-06, "loss": 0.0256, "step": 128660 }, { "epoch": 0.5368602448448231, "grad_norm": 1.2951520311234128, "learning_rate": 2.729829283386326e-06, "loss": 0.0304, "step": 128665 }, { "epoch": 0.5368811075598134, "grad_norm": 0.7495696517018363, "learning_rate": 2.729776243108486e-06, "loss": 0.0197, "step": 128670 }, { "epoch": 0.5369019702748037, "grad_norm": 1.642564352088374, "learning_rate": 2.729723205922225e-06, "loss": 0.0342, "step": 128675 }, { "epoch": 0.5369228329897939, "grad_norm": 0.7590978184416404, "learning_rate": 2.729670171827244e-06, "loss": 0.0303, "step": 128680 }, { "epoch": 0.5369436957047843, "grad_norm": 1.1749191816295954, "learning_rate": 2.729617140823242e-06, "loss": 0.018, "step": 128685 }, { "epoch": 0.5369645584197745, "grad_norm": 0.8143505432682755, "learning_rate": 2.729564112909919e-06, "loss": 0.0333, "step": 128690 }, { "epoch": 0.5369854211347648, "grad_norm": 0.9186220581846984, "learning_rate": 2.7295110880869753e-06, "loss": 0.0245, "step": 128695 }, { "epoch": 0.537006283849755, "grad_norm": 0.8298447490432873, "learning_rate": 2.7294580663541098e-06, "loss": 0.022, "step": 128700 }, { "epoch": 0.5370271465647454, "grad_norm": 0.9390035719255221, "learning_rate": 2.7294050477110235e-06, "loss": 0.0181, "step": 128705 }, { "epoch": 0.5370480092797356, "grad_norm": 1.2828356698565913, "learning_rate": 2.7293520321574153e-06, "loss": 0.0249, "step": 128710 }, { "epoch": 0.5370688719947259, "grad_norm": 0.3673024874232265, "learning_rate": 2.7292990196929854e-06, "loss": 0.0179, "step": 128715 }, { "epoch": 0.5370897347097162, "grad_norm": 0.5577683164513692, "learning_rate": 2.729246010317434e-06, "loss": 0.0251, "step": 128720 }, { "epoch": 0.5371105974247065, "grad_norm": 0.6447933124322964, "learning_rate": 2.729193004030461e-06, "loss": 0.0195, "step": 128725 }, { "epoch": 0.5371314601396967, "grad_norm": 0.7051541301710047, "learning_rate": 2.729140000831767e-06, "loss": 0.0194, "step": 128730 }, { "epoch": 0.5371523228546871, "grad_norm": 0.6926397546571331, "learning_rate": 2.7290870007210513e-06, "loss": 0.0231, "step": 128735 }, { "epoch": 0.5371731855696773, "grad_norm": 0.4488360219674317, "learning_rate": 2.7290340036980147e-06, "loss": 0.0192, "step": 128740 }, { "epoch": 0.5371940482846675, "grad_norm": 0.5542269908694392, "learning_rate": 2.728981009762357e-06, "loss": 0.0259, "step": 128745 }, { "epoch": 0.5372149109996579, "grad_norm": 0.4360837334626581, "learning_rate": 2.7289280189137783e-06, "loss": 0.0203, "step": 128750 }, { "epoch": 0.5372357737146481, "grad_norm": 1.2236504404487223, "learning_rate": 2.7288750311519797e-06, "loss": 0.0299, "step": 128755 }, { "epoch": 0.5372566364296384, "grad_norm": 0.8059658650824432, "learning_rate": 2.728822046476661e-06, "loss": 0.0259, "step": 128760 }, { "epoch": 0.5372774991446286, "grad_norm": 0.6568546397736991, "learning_rate": 2.7287690648875224e-06, "loss": 0.0166, "step": 128765 }, { "epoch": 0.537298361859619, "grad_norm": 0.5746680321198704, "learning_rate": 2.728716086384265e-06, "loss": 0.0305, "step": 128770 }, { "epoch": 0.5373192245746092, "grad_norm": 0.6919586962901841, "learning_rate": 2.7286631109665883e-06, "loss": 0.0198, "step": 128775 }, { "epoch": 0.5373400872895995, "grad_norm": 0.8396828924712437, "learning_rate": 2.7286101386341936e-06, "loss": 0.0231, "step": 128780 }, { "epoch": 0.5373609500045898, "grad_norm": 0.8150195836228452, "learning_rate": 2.7285571693867815e-06, "loss": 0.0189, "step": 128785 }, { "epoch": 0.5373818127195801, "grad_norm": 0.28633880319446614, "learning_rate": 2.728504203224051e-06, "loss": 0.0163, "step": 128790 }, { "epoch": 0.5374026754345703, "grad_norm": 0.3829584377712369, "learning_rate": 2.728451240145705e-06, "loss": 0.0212, "step": 128795 }, { "epoch": 0.5374235381495607, "grad_norm": 1.2597400984257754, "learning_rate": 2.7283982801514424e-06, "loss": 0.0217, "step": 128800 }, { "epoch": 0.5374444008645509, "grad_norm": 0.5688280512299595, "learning_rate": 2.7283453232409648e-06, "loss": 0.0293, "step": 128805 }, { "epoch": 0.5374652635795412, "grad_norm": 0.6903264798227233, "learning_rate": 2.728292369413973e-06, "loss": 0.0267, "step": 128810 }, { "epoch": 0.5374861262945315, "grad_norm": 1.0192139497150148, "learning_rate": 2.7282394186701666e-06, "loss": 0.0269, "step": 128815 }, { "epoch": 0.5375069890095218, "grad_norm": 1.2741761967177963, "learning_rate": 2.728186471009248e-06, "loss": 0.0236, "step": 128820 }, { "epoch": 0.537527851724512, "grad_norm": 0.38346698712951716, "learning_rate": 2.728133526430917e-06, "loss": 0.0224, "step": 128825 }, { "epoch": 0.5375487144395023, "grad_norm": 0.309243415241798, "learning_rate": 2.728080584934875e-06, "loss": 0.0191, "step": 128830 }, { "epoch": 0.5375695771544926, "grad_norm": 0.6442974816759202, "learning_rate": 2.728027646520823e-06, "loss": 0.0244, "step": 128835 }, { "epoch": 0.5375904398694829, "grad_norm": 0.6681513048276335, "learning_rate": 2.727974711188462e-06, "loss": 0.0233, "step": 128840 }, { "epoch": 0.5376113025844731, "grad_norm": 0.5323612999767563, "learning_rate": 2.7279217789374924e-06, "loss": 0.0279, "step": 128845 }, { "epoch": 0.5376321652994634, "grad_norm": 0.4892932928299576, "learning_rate": 2.7278688497676153e-06, "loss": 0.0193, "step": 128850 }, { "epoch": 0.5376530280144537, "grad_norm": 0.5486295620697725, "learning_rate": 2.7278159236785325e-06, "loss": 0.0256, "step": 128855 }, { "epoch": 0.5376738907294439, "grad_norm": 0.4234717024930099, "learning_rate": 2.727763000669945e-06, "loss": 0.0237, "step": 128860 }, { "epoch": 0.5376947534444343, "grad_norm": 0.49790600881721464, "learning_rate": 2.7277100807415535e-06, "loss": 0.0216, "step": 128865 }, { "epoch": 0.5377156161594245, "grad_norm": 1.444084107629249, "learning_rate": 2.7276571638930598e-06, "loss": 0.0259, "step": 128870 }, { "epoch": 0.5377364788744148, "grad_norm": 1.0927119266075602, "learning_rate": 2.7276042501241643e-06, "loss": 0.0299, "step": 128875 }, { "epoch": 0.537757341589405, "grad_norm": 1.6346997179136835, "learning_rate": 2.72755133943457e-06, "loss": 0.0292, "step": 128880 }, { "epoch": 0.5377782043043954, "grad_norm": 0.6179718279083714, "learning_rate": 2.7274984318239755e-06, "loss": 0.0281, "step": 128885 }, { "epoch": 0.5377990670193856, "grad_norm": 7.1966583804843225, "learning_rate": 2.7274455272920853e-06, "loss": 0.0361, "step": 128890 }, { "epoch": 0.5378199297343759, "grad_norm": 0.8841224567905771, "learning_rate": 2.7273926258385984e-06, "loss": 0.0248, "step": 128895 }, { "epoch": 0.5378407924493662, "grad_norm": 0.8625792869257193, "learning_rate": 2.727339727463218e-06, "loss": 0.0245, "step": 128900 }, { "epoch": 0.5378616551643565, "grad_norm": 0.29291495473252604, "learning_rate": 2.7272868321656443e-06, "loss": 0.0236, "step": 128905 }, { "epoch": 0.5378825178793467, "grad_norm": 0.8757589700572556, "learning_rate": 2.72723393994558e-06, "loss": 0.0276, "step": 128910 }, { "epoch": 0.5379033805943371, "grad_norm": 0.5712077860743873, "learning_rate": 2.727181050802725e-06, "loss": 0.0202, "step": 128915 }, { "epoch": 0.5379242433093273, "grad_norm": 1.1436928387278926, "learning_rate": 2.7271281647367825e-06, "loss": 0.0248, "step": 128920 }, { "epoch": 0.5379451060243176, "grad_norm": 0.4661373653856487, "learning_rate": 2.7270752817474538e-06, "loss": 0.0305, "step": 128925 }, { "epoch": 0.5379659687393079, "grad_norm": 0.6209524368066459, "learning_rate": 2.727022401834441e-06, "loss": 0.0261, "step": 128930 }, { "epoch": 0.5379868314542982, "grad_norm": 0.6991753663742091, "learning_rate": 2.7269695249974443e-06, "loss": 0.0264, "step": 128935 }, { "epoch": 0.5380076941692884, "grad_norm": 0.9728292236737648, "learning_rate": 2.7269166512361672e-06, "loss": 0.0209, "step": 128940 }, { "epoch": 0.5380285568842786, "grad_norm": 0.9795585593001616, "learning_rate": 2.7268637805503105e-06, "loss": 0.0254, "step": 128945 }, { "epoch": 0.538049419599269, "grad_norm": 0.9928543836115, "learning_rate": 2.7268109129395765e-06, "loss": 0.0237, "step": 128950 }, { "epoch": 0.5380702823142592, "grad_norm": 0.620702919020377, "learning_rate": 2.7267580484036673e-06, "loss": 0.0217, "step": 128955 }, { "epoch": 0.5380911450292495, "grad_norm": 0.5154336347231396, "learning_rate": 2.7267051869422844e-06, "loss": 0.028, "step": 128960 }, { "epoch": 0.5381120077442398, "grad_norm": 0.940419241356202, "learning_rate": 2.72665232855513e-06, "loss": 0.0235, "step": 128965 }, { "epoch": 0.5381328704592301, "grad_norm": 0.9730007014879164, "learning_rate": 2.726599473241906e-06, "loss": 0.0213, "step": 128970 }, { "epoch": 0.5381537331742203, "grad_norm": 0.6427218083965287, "learning_rate": 2.726546621002315e-06, "loss": 0.0231, "step": 128975 }, { "epoch": 0.5381745958892107, "grad_norm": 0.4487304295826895, "learning_rate": 2.726493771836058e-06, "loss": 0.0196, "step": 128980 }, { "epoch": 0.5381954586042009, "grad_norm": 0.7627787848647389, "learning_rate": 2.7264409257428385e-06, "loss": 0.019, "step": 128985 }, { "epoch": 0.5382163213191912, "grad_norm": 1.611355893356208, "learning_rate": 2.726388082722358e-06, "loss": 0.0239, "step": 128990 }, { "epoch": 0.5382371840341815, "grad_norm": 1.0146916440304967, "learning_rate": 2.7263352427743188e-06, "loss": 0.0257, "step": 128995 }, { "epoch": 0.5382580467491718, "grad_norm": 0.7771026572556539, "learning_rate": 2.726282405898423e-06, "loss": 0.0267, "step": 129000 }, { "epoch": 0.538278909464162, "grad_norm": 0.8251040320187419, "learning_rate": 2.7262295720943734e-06, "loss": 0.0234, "step": 129005 }, { "epoch": 0.5382997721791523, "grad_norm": 0.49858133972942364, "learning_rate": 2.7261767413618716e-06, "loss": 0.0186, "step": 129010 }, { "epoch": 0.5383206348941426, "grad_norm": 1.3626659552508227, "learning_rate": 2.7261239137006206e-06, "loss": 0.0317, "step": 129015 }, { "epoch": 0.5383414976091329, "grad_norm": 0.574579402273329, "learning_rate": 2.726071089110323e-06, "loss": 0.0209, "step": 129020 }, { "epoch": 0.5383623603241231, "grad_norm": 0.6754889647032348, "learning_rate": 2.726018267590681e-06, "loss": 0.0219, "step": 129025 }, { "epoch": 0.5383832230391135, "grad_norm": 0.6774438114621114, "learning_rate": 2.7259654491413966e-06, "loss": 0.0201, "step": 129030 }, { "epoch": 0.5384040857541037, "grad_norm": 0.8113413130768837, "learning_rate": 2.7259126337621734e-06, "loss": 0.0193, "step": 129035 }, { "epoch": 0.538424948469094, "grad_norm": 0.5972121782321911, "learning_rate": 2.7258598214527127e-06, "loss": 0.0194, "step": 129040 }, { "epoch": 0.5384458111840843, "grad_norm": 0.9650398628902098, "learning_rate": 2.725807012212719e-06, "loss": 0.0264, "step": 129045 }, { "epoch": 0.5384666738990745, "grad_norm": 0.6635424711549878, "learning_rate": 2.725754206041893e-06, "loss": 0.0417, "step": 129050 }, { "epoch": 0.5384875366140648, "grad_norm": 0.8906629720147822, "learning_rate": 2.7257014029399387e-06, "loss": 0.0264, "step": 129055 }, { "epoch": 0.538508399329055, "grad_norm": 0.6822172410679509, "learning_rate": 2.7256486029065587e-06, "loss": 0.0276, "step": 129060 }, { "epoch": 0.5385292620440454, "grad_norm": 1.4652884671161357, "learning_rate": 2.725595805941455e-06, "loss": 0.0211, "step": 129065 }, { "epoch": 0.5385501247590356, "grad_norm": 0.7880626079368293, "learning_rate": 2.725543012044331e-06, "loss": 0.0205, "step": 129070 }, { "epoch": 0.5385709874740259, "grad_norm": 0.6657303925272489, "learning_rate": 2.7254902212148897e-06, "loss": 0.0279, "step": 129075 }, { "epoch": 0.5385918501890162, "grad_norm": 0.45088457741237, "learning_rate": 2.7254374334528343e-06, "loss": 0.0221, "step": 129080 }, { "epoch": 0.5386127129040065, "grad_norm": 0.4152636344234287, "learning_rate": 2.725384648757867e-06, "loss": 0.0216, "step": 129085 }, { "epoch": 0.5386335756189967, "grad_norm": 0.6285292275768596, "learning_rate": 2.7253318671296914e-06, "loss": 0.0216, "step": 129090 }, { "epoch": 0.5386544383339871, "grad_norm": 0.7363369211500201, "learning_rate": 2.72527908856801e-06, "loss": 0.0345, "step": 129095 }, { "epoch": 0.5386753010489773, "grad_norm": 0.8434362565500344, "learning_rate": 2.7252263130725266e-06, "loss": 0.0261, "step": 129100 }, { "epoch": 0.5386961637639676, "grad_norm": 0.7686904011487302, "learning_rate": 2.725173540642944e-06, "loss": 0.0234, "step": 129105 }, { "epoch": 0.5387170264789579, "grad_norm": 0.7209525566855403, "learning_rate": 2.7251207712789652e-06, "loss": 0.0263, "step": 129110 }, { "epoch": 0.5387378891939482, "grad_norm": 5.708985201736765, "learning_rate": 2.7250680049802936e-06, "loss": 0.0333, "step": 129115 }, { "epoch": 0.5387587519089384, "grad_norm": 0.36487998050989173, "learning_rate": 2.725015241746632e-06, "loss": 0.0218, "step": 129120 }, { "epoch": 0.5387796146239286, "grad_norm": 0.7733276592086706, "learning_rate": 2.7249624815776844e-06, "loss": 0.0341, "step": 129125 }, { "epoch": 0.538800477338919, "grad_norm": 0.6282682776354672, "learning_rate": 2.724909724473153e-06, "loss": 0.0244, "step": 129130 }, { "epoch": 0.5388213400539092, "grad_norm": 0.5702122902416022, "learning_rate": 2.7248569704327427e-06, "loss": 0.0283, "step": 129135 }, { "epoch": 0.5388422027688995, "grad_norm": 0.557907357912276, "learning_rate": 2.724804219456156e-06, "loss": 0.0225, "step": 129140 }, { "epoch": 0.5388630654838898, "grad_norm": 0.5797842460334899, "learning_rate": 2.7247514715430968e-06, "loss": 0.0285, "step": 129145 }, { "epoch": 0.5388839281988801, "grad_norm": 0.8295575932478838, "learning_rate": 2.7246987266932674e-06, "loss": 0.0217, "step": 129150 }, { "epoch": 0.5389047909138703, "grad_norm": 0.6353143813815475, "learning_rate": 2.7246459849063728e-06, "loss": 0.0199, "step": 129155 }, { "epoch": 0.5389256536288607, "grad_norm": 0.9060845399314224, "learning_rate": 2.7245932461821155e-06, "loss": 0.0242, "step": 129160 }, { "epoch": 0.5389465163438509, "grad_norm": 0.8591985445147957, "learning_rate": 2.7245405105202e-06, "loss": 0.0233, "step": 129165 }, { "epoch": 0.5389673790588412, "grad_norm": 0.6979426096782649, "learning_rate": 2.724487777920329e-06, "loss": 0.028, "step": 129170 }, { "epoch": 0.5389882417738315, "grad_norm": 0.8461855363964845, "learning_rate": 2.724435048382208e-06, "loss": 0.0299, "step": 129175 }, { "epoch": 0.5390091044888218, "grad_norm": 0.781036815517438, "learning_rate": 2.724382321905538e-06, "loss": 0.0215, "step": 129180 }, { "epoch": 0.539029967203812, "grad_norm": 1.18853928235315, "learning_rate": 2.7243295984900246e-06, "loss": 0.0259, "step": 129185 }, { "epoch": 0.5390508299188023, "grad_norm": 0.6324243627677977, "learning_rate": 2.724276878135371e-06, "loss": 0.023, "step": 129190 }, { "epoch": 0.5390716926337926, "grad_norm": 0.5471759265365337, "learning_rate": 2.7242241608412817e-06, "loss": 0.0214, "step": 129195 }, { "epoch": 0.5390925553487829, "grad_norm": 1.1185338935521905, "learning_rate": 2.7241714466074603e-06, "loss": 0.0263, "step": 129200 }, { "epoch": 0.5391134180637731, "grad_norm": 0.8949084654856103, "learning_rate": 2.7241187354336103e-06, "loss": 0.025, "step": 129205 }, { "epoch": 0.5391342807787635, "grad_norm": 2.245099757161641, "learning_rate": 2.7240660273194356e-06, "loss": 0.02, "step": 129210 }, { "epoch": 0.5391551434937537, "grad_norm": 0.31362102578984974, "learning_rate": 2.724013322264641e-06, "loss": 0.0163, "step": 129215 }, { "epoch": 0.539176006208744, "grad_norm": 0.7424946246257998, "learning_rate": 2.72396062026893e-06, "loss": 0.0263, "step": 129220 }, { "epoch": 0.5391968689237343, "grad_norm": 0.5747181959391826, "learning_rate": 2.723907921332007e-06, "loss": 0.0199, "step": 129225 }, { "epoch": 0.5392177316387246, "grad_norm": 0.8023668256819786, "learning_rate": 2.7238552254535755e-06, "loss": 0.0266, "step": 129230 }, { "epoch": 0.5392385943537148, "grad_norm": 0.548299623597049, "learning_rate": 2.7238025326333396e-06, "loss": 0.0278, "step": 129235 }, { "epoch": 0.539259457068705, "grad_norm": 0.5571853747501068, "learning_rate": 2.7237498428710044e-06, "loss": 0.0248, "step": 129240 }, { "epoch": 0.5392803197836954, "grad_norm": 0.28792766479369697, "learning_rate": 2.723697156166274e-06, "loss": 0.0194, "step": 129245 }, { "epoch": 0.5393011824986856, "grad_norm": 0.5375920170803963, "learning_rate": 2.7236444725188526e-06, "loss": 0.0319, "step": 129250 }, { "epoch": 0.5393220452136759, "grad_norm": 0.4975728821974953, "learning_rate": 2.723591791928444e-06, "loss": 0.023, "step": 129255 }, { "epoch": 0.5393429079286662, "grad_norm": 0.716899243410671, "learning_rate": 2.7235391143947526e-06, "loss": 0.0231, "step": 129260 }, { "epoch": 0.5393637706436565, "grad_norm": 0.6397169190835632, "learning_rate": 2.7234864399174835e-06, "loss": 0.0259, "step": 129265 }, { "epoch": 0.5393846333586467, "grad_norm": 0.5418308374498935, "learning_rate": 2.7234337684963407e-06, "loss": 0.0261, "step": 129270 }, { "epoch": 0.5394054960736371, "grad_norm": 0.34000026737001765, "learning_rate": 2.723381100131029e-06, "loss": 0.0262, "step": 129275 }, { "epoch": 0.5394263587886273, "grad_norm": 0.9469324006481644, "learning_rate": 2.7233284348212525e-06, "loss": 0.0272, "step": 129280 }, { "epoch": 0.5394472215036176, "grad_norm": 0.7455109580407246, "learning_rate": 2.723275772566716e-06, "loss": 0.0207, "step": 129285 }, { "epoch": 0.5394680842186079, "grad_norm": 0.6608622160547917, "learning_rate": 2.723223113367124e-06, "loss": 0.0248, "step": 129290 }, { "epoch": 0.5394889469335982, "grad_norm": 0.9568160640084783, "learning_rate": 2.7231704572221813e-06, "loss": 0.0319, "step": 129295 }, { "epoch": 0.5395098096485884, "grad_norm": 0.6418955952900012, "learning_rate": 2.723117804131592e-06, "loss": 0.0228, "step": 129300 }, { "epoch": 0.5395306723635787, "grad_norm": 0.8840115732699684, "learning_rate": 2.7230651540950615e-06, "loss": 0.0271, "step": 129305 }, { "epoch": 0.539551535078569, "grad_norm": 0.8950276467557986, "learning_rate": 2.7230125071122947e-06, "loss": 0.0283, "step": 129310 }, { "epoch": 0.5395723977935593, "grad_norm": 0.6392219141635368, "learning_rate": 2.722959863182996e-06, "loss": 0.0174, "step": 129315 }, { "epoch": 0.5395932605085495, "grad_norm": 0.5786590232355594, "learning_rate": 2.7229072223068707e-06, "loss": 0.025, "step": 129320 }, { "epoch": 0.5396141232235399, "grad_norm": 0.7377842279151879, "learning_rate": 2.722854584483623e-06, "loss": 0.0183, "step": 129325 }, { "epoch": 0.5396349859385301, "grad_norm": 0.4701818673570632, "learning_rate": 2.722801949712958e-06, "loss": 0.0203, "step": 129330 }, { "epoch": 0.5396558486535203, "grad_norm": 1.1578572235015818, "learning_rate": 2.7227493179945813e-06, "loss": 0.0263, "step": 129335 }, { "epoch": 0.5396767113685107, "grad_norm": 0.45031478920985596, "learning_rate": 2.722696689328197e-06, "loss": 0.0278, "step": 129340 }, { "epoch": 0.5396975740835009, "grad_norm": 0.7683599842921373, "learning_rate": 2.722644063713511e-06, "loss": 0.0247, "step": 129345 }, { "epoch": 0.5397184367984912, "grad_norm": 0.65589572889479, "learning_rate": 2.7225914411502274e-06, "loss": 0.0202, "step": 129350 }, { "epoch": 0.5397392995134815, "grad_norm": 0.5883363092824313, "learning_rate": 2.722538821638052e-06, "loss": 0.0233, "step": 129355 }, { "epoch": 0.5397601622284718, "grad_norm": 0.999597539255348, "learning_rate": 2.7224862051766902e-06, "loss": 0.0191, "step": 129360 }, { "epoch": 0.539781024943462, "grad_norm": 0.5803166486762531, "learning_rate": 2.7224335917658473e-06, "loss": 0.0188, "step": 129365 }, { "epoch": 0.5398018876584523, "grad_norm": 0.680453155388529, "learning_rate": 2.7223809814052277e-06, "loss": 0.0231, "step": 129370 }, { "epoch": 0.5398227503734426, "grad_norm": 0.6725384156526002, "learning_rate": 2.7223283740945373e-06, "loss": 0.0133, "step": 129375 }, { "epoch": 0.5398436130884329, "grad_norm": 0.6109113164412391, "learning_rate": 2.7222757698334807e-06, "loss": 0.0204, "step": 129380 }, { "epoch": 0.5398644758034231, "grad_norm": 0.8787788655268679, "learning_rate": 2.7222231686217643e-06, "loss": 0.0243, "step": 129385 }, { "epoch": 0.5398853385184135, "grad_norm": 0.543003495419993, "learning_rate": 2.7221705704590924e-06, "loss": 0.021, "step": 129390 }, { "epoch": 0.5399062012334037, "grad_norm": 0.4327982280390025, "learning_rate": 2.7221179753451717e-06, "loss": 0.024, "step": 129395 }, { "epoch": 0.539927063948394, "grad_norm": 0.3868578271695227, "learning_rate": 2.7220653832797073e-06, "loss": 0.0231, "step": 129400 }, { "epoch": 0.5399479266633843, "grad_norm": 0.9179339856783485, "learning_rate": 2.7220127942624036e-06, "loss": 0.0279, "step": 129405 }, { "epoch": 0.5399687893783746, "grad_norm": 1.0205091775262902, "learning_rate": 2.721960208292968e-06, "loss": 0.0296, "step": 129410 }, { "epoch": 0.5399896520933648, "grad_norm": 0.6712809071282166, "learning_rate": 2.7219076253711048e-06, "loss": 0.038, "step": 129415 }, { "epoch": 0.540010514808355, "grad_norm": 0.7419431723864707, "learning_rate": 2.7218550454965197e-06, "loss": 0.0255, "step": 129420 }, { "epoch": 0.5400313775233454, "grad_norm": 0.7654928809876534, "learning_rate": 2.721802468668919e-06, "loss": 0.0308, "step": 129425 }, { "epoch": 0.5400522402383356, "grad_norm": 0.5592307601664965, "learning_rate": 2.721749894888008e-06, "loss": 0.0209, "step": 129430 }, { "epoch": 0.5400731029533259, "grad_norm": 0.664347513995053, "learning_rate": 2.7216973241534923e-06, "loss": 0.0236, "step": 129435 }, { "epoch": 0.5400939656683162, "grad_norm": 0.6578283933147108, "learning_rate": 2.7216447564650783e-06, "loss": 0.0213, "step": 129440 }, { "epoch": 0.5401148283833065, "grad_norm": 0.4607242993146167, "learning_rate": 2.721592191822471e-06, "loss": 0.0395, "step": 129445 }, { "epoch": 0.5401356910982967, "grad_norm": 0.6059493973532386, "learning_rate": 2.7215396302253778e-06, "loss": 0.0217, "step": 129450 }, { "epoch": 0.5401565538132871, "grad_norm": 0.7883442748452052, "learning_rate": 2.721487071673503e-06, "loss": 0.025, "step": 129455 }, { "epoch": 0.5401774165282773, "grad_norm": 0.2974042728528887, "learning_rate": 2.7214345161665535e-06, "loss": 0.0292, "step": 129460 }, { "epoch": 0.5401982792432676, "grad_norm": 0.7743988069511696, "learning_rate": 2.7213819637042344e-06, "loss": 0.0347, "step": 129465 }, { "epoch": 0.5402191419582579, "grad_norm": 0.5063545461607682, "learning_rate": 2.721329414286253e-06, "loss": 0.0215, "step": 129470 }, { "epoch": 0.5402400046732482, "grad_norm": 0.7343959938174998, "learning_rate": 2.7212768679123143e-06, "loss": 0.021, "step": 129475 }, { "epoch": 0.5402608673882384, "grad_norm": 0.9625554252085938, "learning_rate": 2.721224324582125e-06, "loss": 0.0208, "step": 129480 }, { "epoch": 0.5402817301032287, "grad_norm": 0.6192952852589316, "learning_rate": 2.721171784295391e-06, "loss": 0.031, "step": 129485 }, { "epoch": 0.540302592818219, "grad_norm": 0.34820195272749493, "learning_rate": 2.7211192470518186e-06, "loss": 0.0192, "step": 129490 }, { "epoch": 0.5403234555332093, "grad_norm": 0.4747799691557691, "learning_rate": 2.721066712851115e-06, "loss": 0.0253, "step": 129495 }, { "epoch": 0.5403443182481995, "grad_norm": 0.5915684440385848, "learning_rate": 2.7210141816929843e-06, "loss": 0.0246, "step": 129500 }, { "epoch": 0.5403651809631899, "grad_norm": 0.7352880393905904, "learning_rate": 2.7209616535771344e-06, "loss": 0.0227, "step": 129505 }, { "epoch": 0.5403860436781801, "grad_norm": 0.8327370895069207, "learning_rate": 2.720909128503271e-06, "loss": 0.0221, "step": 129510 }, { "epoch": 0.5404069063931703, "grad_norm": 0.6173274059216799, "learning_rate": 2.720856606471102e-06, "loss": 0.0273, "step": 129515 }, { "epoch": 0.5404277691081607, "grad_norm": 0.9906677575482636, "learning_rate": 2.7208040874803316e-06, "loss": 0.0332, "step": 129520 }, { "epoch": 0.540448631823151, "grad_norm": 0.43758709896459025, "learning_rate": 2.7207515715306675e-06, "loss": 0.0237, "step": 129525 }, { "epoch": 0.5404694945381412, "grad_norm": 0.7348311926540626, "learning_rate": 2.720699058621817e-06, "loss": 0.024, "step": 129530 }, { "epoch": 0.5404903572531315, "grad_norm": 0.9157915838884033, "learning_rate": 2.7206465487534846e-06, "loss": 0.0288, "step": 129535 }, { "epoch": 0.5405112199681218, "grad_norm": 0.49428622290634877, "learning_rate": 2.7205940419253777e-06, "loss": 0.0334, "step": 129540 }, { "epoch": 0.540532082683112, "grad_norm": 0.6664167676856306, "learning_rate": 2.7205415381372043e-06, "loss": 0.0252, "step": 129545 }, { "epoch": 0.5405529453981023, "grad_norm": 0.32502523342247586, "learning_rate": 2.72048903738867e-06, "loss": 0.028, "step": 129550 }, { "epoch": 0.5405738081130926, "grad_norm": 1.2841975605234532, "learning_rate": 2.7204365396794813e-06, "loss": 0.0247, "step": 129555 }, { "epoch": 0.5405946708280829, "grad_norm": 0.644275975290917, "learning_rate": 2.7203840450093448e-06, "loss": 0.0265, "step": 129560 }, { "epoch": 0.5406155335430731, "grad_norm": 0.5397656666796437, "learning_rate": 2.7203315533779686e-06, "loss": 0.0254, "step": 129565 }, { "epoch": 0.5406363962580635, "grad_norm": 0.5520361049810492, "learning_rate": 2.7202790647850576e-06, "loss": 0.0308, "step": 129570 }, { "epoch": 0.5406572589730537, "grad_norm": 0.5855273017806797, "learning_rate": 2.7202265792303207e-06, "loss": 0.0208, "step": 129575 }, { "epoch": 0.540678121688044, "grad_norm": 0.8288009863945791, "learning_rate": 2.7201740967134625e-06, "loss": 0.0222, "step": 129580 }, { "epoch": 0.5406989844030343, "grad_norm": 0.6479815100333902, "learning_rate": 2.7201216172341927e-06, "loss": 0.0249, "step": 129585 }, { "epoch": 0.5407198471180246, "grad_norm": 1.1502592399165896, "learning_rate": 2.7200691407922162e-06, "loss": 0.0305, "step": 129590 }, { "epoch": 0.5407407098330148, "grad_norm": 0.9465246268596021, "learning_rate": 2.7200166673872412e-06, "loss": 0.0227, "step": 129595 }, { "epoch": 0.540761572548005, "grad_norm": 0.6199727449542571, "learning_rate": 2.719964197018974e-06, "loss": 0.0272, "step": 129600 }, { "epoch": 0.5407824352629954, "grad_norm": 0.6152567179233439, "learning_rate": 2.719911729687122e-06, "loss": 0.0342, "step": 129605 }, { "epoch": 0.5408032979779857, "grad_norm": 0.8024826339465414, "learning_rate": 2.719859265391392e-06, "loss": 0.0211, "step": 129610 }, { "epoch": 0.5408241606929759, "grad_norm": 0.6621881917431657, "learning_rate": 2.7198068041314924e-06, "loss": 0.0223, "step": 129615 }, { "epoch": 0.5408450234079663, "grad_norm": 0.45140329360989123, "learning_rate": 2.719754345907129e-06, "loss": 0.02, "step": 129620 }, { "epoch": 0.5408658861229565, "grad_norm": 0.6377635329851037, "learning_rate": 2.71970189071801e-06, "loss": 0.0211, "step": 129625 }, { "epoch": 0.5408867488379467, "grad_norm": 1.153776993332654, "learning_rate": 2.7196494385638416e-06, "loss": 0.0279, "step": 129630 }, { "epoch": 0.5409076115529371, "grad_norm": 1.5017154483496415, "learning_rate": 2.719596989444333e-06, "loss": 0.0259, "step": 129635 }, { "epoch": 0.5409284742679273, "grad_norm": 0.3840939244153423, "learning_rate": 2.71954454335919e-06, "loss": 0.0244, "step": 129640 }, { "epoch": 0.5409493369829176, "grad_norm": 0.6255723319698505, "learning_rate": 2.71949210030812e-06, "loss": 0.0177, "step": 129645 }, { "epoch": 0.5409701996979079, "grad_norm": 0.8488410580602495, "learning_rate": 2.7194396602908318e-06, "loss": 0.027, "step": 129650 }, { "epoch": 0.5409910624128982, "grad_norm": 0.7198903150345536, "learning_rate": 2.719387223307032e-06, "loss": 0.0174, "step": 129655 }, { "epoch": 0.5410119251278884, "grad_norm": 0.5039540917181233, "learning_rate": 2.7193347893564283e-06, "loss": 0.0258, "step": 129660 }, { "epoch": 0.5410327878428787, "grad_norm": 0.9776800917860494, "learning_rate": 2.7192823584387285e-06, "loss": 0.0334, "step": 129665 }, { "epoch": 0.541053650557869, "grad_norm": 0.48250831498465824, "learning_rate": 2.7192299305536395e-06, "loss": 0.023, "step": 129670 }, { "epoch": 0.5410745132728593, "grad_norm": 0.4058205802026868, "learning_rate": 2.7191775057008696e-06, "loss": 0.0179, "step": 129675 }, { "epoch": 0.5410953759878495, "grad_norm": 1.148102230733188, "learning_rate": 2.7191250838801265e-06, "loss": 0.0288, "step": 129680 }, { "epoch": 0.5411162387028399, "grad_norm": 0.8600839361252336, "learning_rate": 2.7190726650911177e-06, "loss": 0.0371, "step": 129685 }, { "epoch": 0.5411371014178301, "grad_norm": 0.6569086911190946, "learning_rate": 2.7190202493335504e-06, "loss": 0.0194, "step": 129690 }, { "epoch": 0.5411579641328204, "grad_norm": 0.5525401103909905, "learning_rate": 2.718967836607134e-06, "loss": 0.0243, "step": 129695 }, { "epoch": 0.5411788268478107, "grad_norm": 0.515432325197345, "learning_rate": 2.7189154269115754e-06, "loss": 0.0223, "step": 129700 }, { "epoch": 0.541199689562801, "grad_norm": 0.3867703655778783, "learning_rate": 2.7188630202465828e-06, "loss": 0.0333, "step": 129705 }, { "epoch": 0.5412205522777912, "grad_norm": 0.6083302869678554, "learning_rate": 2.7188106166118632e-06, "loss": 0.0252, "step": 129710 }, { "epoch": 0.5412414149927816, "grad_norm": 0.7894090616120263, "learning_rate": 2.718758216007126e-06, "loss": 0.0215, "step": 129715 }, { "epoch": 0.5412622777077718, "grad_norm": 0.94016073506501, "learning_rate": 2.7187058184320788e-06, "loss": 0.029, "step": 129720 }, { "epoch": 0.541283140422762, "grad_norm": 0.9367327055198931, "learning_rate": 2.718653423886428e-06, "loss": 0.0233, "step": 129725 }, { "epoch": 0.5413040031377523, "grad_norm": 0.43719694126644393, "learning_rate": 2.718601032369884e-06, "loss": 0.0209, "step": 129730 }, { "epoch": 0.5413248658527426, "grad_norm": 0.9338652186707821, "learning_rate": 2.718548643882154e-06, "loss": 0.0278, "step": 129735 }, { "epoch": 0.5413457285677329, "grad_norm": 0.2954539688110111, "learning_rate": 2.718496258422946e-06, "loss": 0.0239, "step": 129740 }, { "epoch": 0.5413665912827231, "grad_norm": 0.4232252328155652, "learning_rate": 2.718443875991969e-06, "loss": 0.0146, "step": 129745 }, { "epoch": 0.5413874539977135, "grad_norm": 0.9179161548136576, "learning_rate": 2.7183914965889303e-06, "loss": 0.0267, "step": 129750 }, { "epoch": 0.5414083167127037, "grad_norm": 1.2261973130463784, "learning_rate": 2.718339120213539e-06, "loss": 0.0243, "step": 129755 }, { "epoch": 0.541429179427694, "grad_norm": 0.6794180161797148, "learning_rate": 2.7182867468655024e-06, "loss": 0.0279, "step": 129760 }, { "epoch": 0.5414500421426843, "grad_norm": 0.3299202080526994, "learning_rate": 2.71823437654453e-06, "loss": 0.0259, "step": 129765 }, { "epoch": 0.5414709048576746, "grad_norm": 0.47001813946308973, "learning_rate": 2.718182009250329e-06, "loss": 0.0203, "step": 129770 }, { "epoch": 0.5414917675726648, "grad_norm": 0.5943782955637213, "learning_rate": 2.718129644982609e-06, "loss": 0.0342, "step": 129775 }, { "epoch": 0.5415126302876551, "grad_norm": 1.0751542036356323, "learning_rate": 2.7180772837410786e-06, "loss": 0.0221, "step": 129780 }, { "epoch": 0.5415334930026454, "grad_norm": 0.6612924771135005, "learning_rate": 2.7180249255254448e-06, "loss": 0.0179, "step": 129785 }, { "epoch": 0.5415543557176357, "grad_norm": 0.4384758032912005, "learning_rate": 2.717972570335418e-06, "loss": 0.0231, "step": 129790 }, { "epoch": 0.5415752184326259, "grad_norm": 1.0926153172160749, "learning_rate": 2.717920218170706e-06, "loss": 0.0238, "step": 129795 }, { "epoch": 0.5415960811476163, "grad_norm": 0.9315328113822646, "learning_rate": 2.7178678690310167e-06, "loss": 0.022, "step": 129800 }, { "epoch": 0.5416169438626065, "grad_norm": 1.0829345709202969, "learning_rate": 2.7178155229160596e-06, "loss": 0.0281, "step": 129805 }, { "epoch": 0.5416378065775967, "grad_norm": 0.7355025057273359, "learning_rate": 2.7177631798255437e-06, "loss": 0.0283, "step": 129810 }, { "epoch": 0.5416586692925871, "grad_norm": 0.626571356659113, "learning_rate": 2.7177108397591777e-06, "loss": 0.0228, "step": 129815 }, { "epoch": 0.5416795320075773, "grad_norm": 0.3762320920204362, "learning_rate": 2.7176585027166698e-06, "loss": 0.0226, "step": 129820 }, { "epoch": 0.5417003947225676, "grad_norm": 0.5190535403656169, "learning_rate": 2.717606168697729e-06, "loss": 0.022, "step": 129825 }, { "epoch": 0.541721257437558, "grad_norm": 0.7585504655356345, "learning_rate": 2.7175538377020643e-06, "loss": 0.0345, "step": 129830 }, { "epoch": 0.5417421201525482, "grad_norm": 0.6270084568049268, "learning_rate": 2.7175015097293857e-06, "loss": 0.0301, "step": 129835 }, { "epoch": 0.5417629828675384, "grad_norm": 0.7199606063083507, "learning_rate": 2.7174491847794e-06, "loss": 0.026, "step": 129840 }, { "epoch": 0.5417838455825287, "grad_norm": 0.4088083103321547, "learning_rate": 2.7173968628518177e-06, "loss": 0.0164, "step": 129845 }, { "epoch": 0.541804708297519, "grad_norm": 0.849356914371596, "learning_rate": 2.7173445439463477e-06, "loss": 0.0293, "step": 129850 }, { "epoch": 0.5418255710125093, "grad_norm": 0.662344874392484, "learning_rate": 2.717292228062699e-06, "loss": 0.0246, "step": 129855 }, { "epoch": 0.5418464337274995, "grad_norm": 0.4483202712953222, "learning_rate": 2.71723991520058e-06, "loss": 0.021, "step": 129860 }, { "epoch": 0.5418672964424899, "grad_norm": 0.8421885731913913, "learning_rate": 2.7171876053597013e-06, "loss": 0.0274, "step": 129865 }, { "epoch": 0.5418881591574801, "grad_norm": 0.973505201028655, "learning_rate": 2.71713529853977e-06, "loss": 0.0216, "step": 129870 }, { "epoch": 0.5419090218724704, "grad_norm": 0.6927494162299705, "learning_rate": 2.717082994740498e-06, "loss": 0.0292, "step": 129875 }, { "epoch": 0.5419298845874607, "grad_norm": 0.503949447734673, "learning_rate": 2.7170306939615927e-06, "loss": 0.0208, "step": 129880 }, { "epoch": 0.541950747302451, "grad_norm": 1.3473956723157847, "learning_rate": 2.7169783962027636e-06, "loss": 0.0302, "step": 129885 }, { "epoch": 0.5419716100174412, "grad_norm": 0.49710249928614564, "learning_rate": 2.7169261014637204e-06, "loss": 0.0263, "step": 129890 }, { "epoch": 0.5419924727324316, "grad_norm": 0.5851894417100577, "learning_rate": 2.7168738097441726e-06, "loss": 0.0238, "step": 129895 }, { "epoch": 0.5420133354474218, "grad_norm": 0.6149680961677273, "learning_rate": 2.7168215210438297e-06, "loss": 0.0264, "step": 129900 }, { "epoch": 0.542034198162412, "grad_norm": 0.5081526020350319, "learning_rate": 2.716769235362401e-06, "loss": 0.0246, "step": 129905 }, { "epoch": 0.5420550608774023, "grad_norm": 0.6115156035268081, "learning_rate": 2.7167169526995957e-06, "loss": 0.0166, "step": 129910 }, { "epoch": 0.5420759235923927, "grad_norm": 1.1266186405543053, "learning_rate": 2.716664673055124e-06, "loss": 0.0221, "step": 129915 }, { "epoch": 0.5420967863073829, "grad_norm": 0.9383779134382664, "learning_rate": 2.7166123964286944e-06, "loss": 0.0324, "step": 129920 }, { "epoch": 0.5421176490223731, "grad_norm": 0.28868249191144546, "learning_rate": 2.7165601228200176e-06, "loss": 0.0258, "step": 129925 }, { "epoch": 0.5421385117373635, "grad_norm": 0.3496883020659917, "learning_rate": 2.716507852228803e-06, "loss": 0.0317, "step": 129930 }, { "epoch": 0.5421593744523537, "grad_norm": 0.5104207421787679, "learning_rate": 2.7164555846547607e-06, "loss": 0.0219, "step": 129935 }, { "epoch": 0.542180237167344, "grad_norm": 0.4098565932248859, "learning_rate": 2.716403320097599e-06, "loss": 0.0147, "step": 129940 }, { "epoch": 0.5422010998823343, "grad_norm": 0.6448406272475992, "learning_rate": 2.7163510585570297e-06, "loss": 0.0291, "step": 129945 }, { "epoch": 0.5422219625973246, "grad_norm": 1.1213605210110622, "learning_rate": 2.716298800032761e-06, "loss": 0.034, "step": 129950 }, { "epoch": 0.5422428253123148, "grad_norm": 1.0290232847954013, "learning_rate": 2.7162465445245033e-06, "loss": 0.0272, "step": 129955 }, { "epoch": 0.5422636880273051, "grad_norm": 0.5871046567943099, "learning_rate": 2.7161942920319666e-06, "loss": 0.023, "step": 129960 }, { "epoch": 0.5422845507422954, "grad_norm": 0.66473458888988, "learning_rate": 2.716142042554861e-06, "loss": 0.0234, "step": 129965 }, { "epoch": 0.5423054134572857, "grad_norm": 0.536287408571341, "learning_rate": 2.7160897960928966e-06, "loss": 0.021, "step": 129970 }, { "epoch": 0.5423262761722759, "grad_norm": 0.9278034173437488, "learning_rate": 2.7160375526457828e-06, "loss": 0.0213, "step": 129975 }, { "epoch": 0.5423471388872663, "grad_norm": 1.1641853707442926, "learning_rate": 2.7159853122132295e-06, "loss": 0.0319, "step": 129980 }, { "epoch": 0.5423680016022565, "grad_norm": 0.548455258310819, "learning_rate": 2.715933074794948e-06, "loss": 0.0192, "step": 129985 }, { "epoch": 0.5423888643172468, "grad_norm": 0.4901872995615877, "learning_rate": 2.715880840390648e-06, "loss": 0.0236, "step": 129990 }, { "epoch": 0.5424097270322371, "grad_norm": 0.7014185491956645, "learning_rate": 2.7158286090000384e-06, "loss": 0.0213, "step": 129995 }, { "epoch": 0.5424305897472274, "grad_norm": 0.923550426499346, "learning_rate": 2.715776380622831e-06, "loss": 0.0273, "step": 130000 }, { "epoch": 0.5424514524622176, "grad_norm": 1.4048126902984883, "learning_rate": 2.7157241552587353e-06, "loss": 0.0203, "step": 130005 }, { "epoch": 0.542472315177208, "grad_norm": 0.5655545077903573, "learning_rate": 2.7156719329074623e-06, "loss": 0.0246, "step": 130010 }, { "epoch": 0.5424931778921982, "grad_norm": 0.9603691937321809, "learning_rate": 2.7156197135687205e-06, "loss": 0.0315, "step": 130015 }, { "epoch": 0.5425140406071884, "grad_norm": 0.8462120877600928, "learning_rate": 2.715567497242223e-06, "loss": 0.0259, "step": 130020 }, { "epoch": 0.5425349033221787, "grad_norm": 0.4199411291910493, "learning_rate": 2.715515283927678e-06, "loss": 0.0281, "step": 130025 }, { "epoch": 0.542555766037169, "grad_norm": 1.1440830687631092, "learning_rate": 2.7154630736247968e-06, "loss": 0.0286, "step": 130030 }, { "epoch": 0.5425766287521593, "grad_norm": 0.4271859272953574, "learning_rate": 2.7154108663332902e-06, "loss": 0.0334, "step": 130035 }, { "epoch": 0.5425974914671495, "grad_norm": 0.6744753453384926, "learning_rate": 2.7153586620528676e-06, "loss": 0.03, "step": 130040 }, { "epoch": 0.5426183541821399, "grad_norm": 0.7396539902046046, "learning_rate": 2.7153064607832414e-06, "loss": 0.0528, "step": 130045 }, { "epoch": 0.5426392168971301, "grad_norm": 0.9764587701095014, "learning_rate": 2.7152542625241202e-06, "loss": 0.0202, "step": 130050 }, { "epoch": 0.5426600796121204, "grad_norm": 0.6662087518637864, "learning_rate": 2.715202067275216e-06, "loss": 0.0244, "step": 130055 }, { "epoch": 0.5426809423271107, "grad_norm": 0.8473524136781785, "learning_rate": 2.715149875036239e-06, "loss": 0.0228, "step": 130060 }, { "epoch": 0.542701805042101, "grad_norm": 0.82530597759527, "learning_rate": 2.7150976858069e-06, "loss": 0.0202, "step": 130065 }, { "epoch": 0.5427226677570912, "grad_norm": 0.9366629019695075, "learning_rate": 2.71504549958691e-06, "loss": 0.0279, "step": 130070 }, { "epoch": 0.5427435304720816, "grad_norm": 0.6528238669816923, "learning_rate": 2.7149933163759786e-06, "loss": 0.0298, "step": 130075 }, { "epoch": 0.5427643931870718, "grad_norm": 0.6308597810918534, "learning_rate": 2.7149411361738185e-06, "loss": 0.0223, "step": 130080 }, { "epoch": 0.5427852559020621, "grad_norm": 0.9362579488668686, "learning_rate": 2.714888958980139e-06, "loss": 0.0217, "step": 130085 }, { "epoch": 0.5428061186170523, "grad_norm": 0.4770570145813266, "learning_rate": 2.7148367847946527e-06, "loss": 0.0256, "step": 130090 }, { "epoch": 0.5428269813320427, "grad_norm": 0.40493500395825316, "learning_rate": 2.7147846136170684e-06, "loss": 0.0242, "step": 130095 }, { "epoch": 0.5428478440470329, "grad_norm": 0.33913026772816507, "learning_rate": 2.7147324454470986e-06, "loss": 0.0211, "step": 130100 }, { "epoch": 0.5428687067620231, "grad_norm": 0.5247028742940995, "learning_rate": 2.7146802802844545e-06, "loss": 0.0231, "step": 130105 }, { "epoch": 0.5428895694770135, "grad_norm": 0.3777271596521356, "learning_rate": 2.714628118128846e-06, "loss": 0.0166, "step": 130110 }, { "epoch": 0.5429104321920037, "grad_norm": 0.26122957467250696, "learning_rate": 2.7145759589799854e-06, "loss": 0.0142, "step": 130115 }, { "epoch": 0.542931294906994, "grad_norm": 1.357334504529761, "learning_rate": 2.714523802837583e-06, "loss": 0.0267, "step": 130120 }, { "epoch": 0.5429521576219843, "grad_norm": 0.612932529059931, "learning_rate": 2.71447164970135e-06, "loss": 0.0238, "step": 130125 }, { "epoch": 0.5429730203369746, "grad_norm": 1.642593770630401, "learning_rate": 2.714419499570999e-06, "loss": 0.0175, "step": 130130 }, { "epoch": 0.5429938830519648, "grad_norm": 1.2289093259578576, "learning_rate": 2.714367352446239e-06, "loss": 0.0177, "step": 130135 }, { "epoch": 0.5430147457669551, "grad_norm": 0.4450325619651021, "learning_rate": 2.7143152083267825e-06, "loss": 0.0264, "step": 130140 }, { "epoch": 0.5430356084819454, "grad_norm": 1.2559794943996208, "learning_rate": 2.7142630672123414e-06, "loss": 0.0346, "step": 130145 }, { "epoch": 0.5430564711969357, "grad_norm": 0.6092955320343653, "learning_rate": 2.714210929102627e-06, "loss": 0.0314, "step": 130150 }, { "epoch": 0.5430773339119259, "grad_norm": 0.5016645221493007, "learning_rate": 2.7141587939973497e-06, "loss": 0.0254, "step": 130155 }, { "epoch": 0.5430981966269163, "grad_norm": 0.695440093129939, "learning_rate": 2.7141066618962213e-06, "loss": 0.0267, "step": 130160 }, { "epoch": 0.5431190593419065, "grad_norm": 0.3100182686343367, "learning_rate": 2.714054532798954e-06, "loss": 0.0235, "step": 130165 }, { "epoch": 0.5431399220568968, "grad_norm": 0.7929143902670509, "learning_rate": 2.7140024067052577e-06, "loss": 0.0195, "step": 130170 }, { "epoch": 0.5431607847718871, "grad_norm": 1.3170034407345983, "learning_rate": 2.7139502836148468e-06, "loss": 0.0224, "step": 130175 }, { "epoch": 0.5431816474868774, "grad_norm": 0.854491129105736, "learning_rate": 2.7138981635274302e-06, "loss": 0.0247, "step": 130180 }, { "epoch": 0.5432025102018676, "grad_norm": 0.8610043126937366, "learning_rate": 2.713846046442721e-06, "loss": 0.0252, "step": 130185 }, { "epoch": 0.543223372916858, "grad_norm": 1.1083320616921795, "learning_rate": 2.71379393236043e-06, "loss": 0.0232, "step": 130190 }, { "epoch": 0.5432442356318482, "grad_norm": 0.6142095885201929, "learning_rate": 2.71374182128027e-06, "loss": 0.0238, "step": 130195 }, { "epoch": 0.5432650983468384, "grad_norm": 0.6917774319087797, "learning_rate": 2.713689713201952e-06, "loss": 0.0243, "step": 130200 }, { "epoch": 0.5432859610618287, "grad_norm": 0.7263778983248232, "learning_rate": 2.713637608125188e-06, "loss": 0.0249, "step": 130205 }, { "epoch": 0.543306823776819, "grad_norm": 0.8385795266667448, "learning_rate": 2.7135855060496902e-06, "loss": 0.0266, "step": 130210 }, { "epoch": 0.5433276864918093, "grad_norm": 0.6713103204657231, "learning_rate": 2.7135334069751693e-06, "loss": 0.0257, "step": 130215 }, { "epoch": 0.5433485492067995, "grad_norm": 1.0169520993413634, "learning_rate": 2.7134813109013393e-06, "loss": 0.0247, "step": 130220 }, { "epoch": 0.5433694119217899, "grad_norm": 0.89006215874706, "learning_rate": 2.7134292178279096e-06, "loss": 0.0277, "step": 130225 }, { "epoch": 0.5433902746367801, "grad_norm": 0.6973318108265941, "learning_rate": 2.7133771277545944e-06, "loss": 0.0176, "step": 130230 }, { "epoch": 0.5434111373517704, "grad_norm": 0.6841571189302691, "learning_rate": 2.7133250406811052e-06, "loss": 0.0233, "step": 130235 }, { "epoch": 0.5434320000667607, "grad_norm": 0.754395434323, "learning_rate": 2.713272956607153e-06, "loss": 0.0185, "step": 130240 }, { "epoch": 0.543452862781751, "grad_norm": 0.869410937578428, "learning_rate": 2.7132208755324506e-06, "loss": 0.0241, "step": 130245 }, { "epoch": 0.5434737254967412, "grad_norm": 0.9270718511011548, "learning_rate": 2.713168797456711e-06, "loss": 0.0256, "step": 130250 }, { "epoch": 0.5434945882117315, "grad_norm": 0.4559578860294885, "learning_rate": 2.713116722379645e-06, "loss": 0.0184, "step": 130255 }, { "epoch": 0.5435154509267218, "grad_norm": 0.6761889638758684, "learning_rate": 2.7130646503009663e-06, "loss": 0.0267, "step": 130260 }, { "epoch": 0.5435363136417121, "grad_norm": 0.6945776521819417, "learning_rate": 2.7130125812203856e-06, "loss": 0.0158, "step": 130265 }, { "epoch": 0.5435571763567023, "grad_norm": 0.5638154814046494, "learning_rate": 2.7129605151376163e-06, "loss": 0.0221, "step": 130270 }, { "epoch": 0.5435780390716927, "grad_norm": 0.8335899816731319, "learning_rate": 2.712908452052371e-06, "loss": 0.0268, "step": 130275 }, { "epoch": 0.5435989017866829, "grad_norm": 1.1442337702935637, "learning_rate": 2.7128563919643603e-06, "loss": 0.0308, "step": 130280 }, { "epoch": 0.5436197645016732, "grad_norm": 0.6991429942570562, "learning_rate": 2.7128043348732985e-06, "loss": 0.0227, "step": 130285 }, { "epoch": 0.5436406272166635, "grad_norm": 0.6428409906929818, "learning_rate": 2.7127522807788974e-06, "loss": 0.0283, "step": 130290 }, { "epoch": 0.5436614899316538, "grad_norm": 0.5562010258646183, "learning_rate": 2.71270022968087e-06, "loss": 0.0221, "step": 130295 }, { "epoch": 0.543682352646644, "grad_norm": 2.5216989274747674, "learning_rate": 2.712648181578928e-06, "loss": 0.0211, "step": 130300 }, { "epoch": 0.5437032153616344, "grad_norm": 0.49170490483192086, "learning_rate": 2.7125961364727838e-06, "loss": 0.0209, "step": 130305 }, { "epoch": 0.5437240780766246, "grad_norm": 0.5954140261052434, "learning_rate": 2.712544094362151e-06, "loss": 0.02, "step": 130310 }, { "epoch": 0.5437449407916148, "grad_norm": 0.6328568493153841, "learning_rate": 2.7124920552467415e-06, "loss": 0.0384, "step": 130315 }, { "epoch": 0.5437658035066051, "grad_norm": 0.5197916626071402, "learning_rate": 2.7124400191262694e-06, "loss": 0.0275, "step": 130320 }, { "epoch": 0.5437866662215954, "grad_norm": 0.6750851795259246, "learning_rate": 2.7123879860004454e-06, "loss": 0.0273, "step": 130325 }, { "epoch": 0.5438075289365857, "grad_norm": 1.1099005746947963, "learning_rate": 2.7123359558689837e-06, "loss": 0.03, "step": 130330 }, { "epoch": 0.5438283916515759, "grad_norm": 0.9317160398925058, "learning_rate": 2.7122839287315967e-06, "loss": 0.0274, "step": 130335 }, { "epoch": 0.5438492543665663, "grad_norm": 0.7585544134647889, "learning_rate": 2.712231904587997e-06, "loss": 0.0308, "step": 130340 }, { "epoch": 0.5438701170815565, "grad_norm": 0.6622083821208876, "learning_rate": 2.7121798834378974e-06, "loss": 0.0265, "step": 130345 }, { "epoch": 0.5438909797965468, "grad_norm": 0.7304296729611466, "learning_rate": 2.7121278652810117e-06, "loss": 0.0268, "step": 130350 }, { "epoch": 0.5439118425115371, "grad_norm": 1.1236412304488659, "learning_rate": 2.7120758501170524e-06, "loss": 0.03, "step": 130355 }, { "epoch": 0.5439327052265274, "grad_norm": 0.3988278434236869, "learning_rate": 2.712023837945732e-06, "loss": 0.0184, "step": 130360 }, { "epoch": 0.5439535679415176, "grad_norm": 1.2170001086362294, "learning_rate": 2.711971828766764e-06, "loss": 0.0298, "step": 130365 }, { "epoch": 0.543974430656508, "grad_norm": 0.3573332073055386, "learning_rate": 2.711919822579862e-06, "loss": 0.0223, "step": 130370 }, { "epoch": 0.5439952933714982, "grad_norm": 2.022897240013937, "learning_rate": 2.711867819384738e-06, "loss": 0.0269, "step": 130375 }, { "epoch": 0.5440161560864885, "grad_norm": 0.8258351104938921, "learning_rate": 2.711815819181106e-06, "loss": 0.0182, "step": 130380 }, { "epoch": 0.5440370188014787, "grad_norm": 0.8974110858553351, "learning_rate": 2.711763821968679e-06, "loss": 0.0222, "step": 130385 }, { "epoch": 0.544057881516469, "grad_norm": 0.46456585943946876, "learning_rate": 2.71171182774717e-06, "loss": 0.0164, "step": 130390 }, { "epoch": 0.5440787442314593, "grad_norm": 0.8796176639176373, "learning_rate": 2.7116598365162926e-06, "loss": 0.0254, "step": 130395 }, { "epoch": 0.5440996069464495, "grad_norm": 0.5455704149930457, "learning_rate": 2.71160784827576e-06, "loss": 0.0197, "step": 130400 }, { "epoch": 0.5441204696614399, "grad_norm": 0.5669802724617665, "learning_rate": 2.7115558630252857e-06, "loss": 0.0175, "step": 130405 }, { "epoch": 0.5441413323764301, "grad_norm": 1.0096623025398552, "learning_rate": 2.7115038807645826e-06, "loss": 0.0206, "step": 130410 }, { "epoch": 0.5441621950914204, "grad_norm": 0.8854199002299107, "learning_rate": 2.711451901493365e-06, "loss": 0.0216, "step": 130415 }, { "epoch": 0.5441830578064107, "grad_norm": 0.9049033898093731, "learning_rate": 2.7113999252113453e-06, "loss": 0.0255, "step": 130420 }, { "epoch": 0.544203920521401, "grad_norm": 0.8318115241784372, "learning_rate": 2.7113479519182377e-06, "loss": 0.0235, "step": 130425 }, { "epoch": 0.5442247832363912, "grad_norm": 1.054182144897273, "learning_rate": 2.7112959816137553e-06, "loss": 0.0315, "step": 130430 }, { "epoch": 0.5442456459513815, "grad_norm": 0.7000832112764794, "learning_rate": 2.711244014297612e-06, "loss": 0.0282, "step": 130435 }, { "epoch": 0.5442665086663718, "grad_norm": 0.5481541315081521, "learning_rate": 2.711192049969522e-06, "loss": 0.0226, "step": 130440 }, { "epoch": 0.5442873713813621, "grad_norm": 2.874922424694197, "learning_rate": 2.7111400886291984e-06, "loss": 0.0258, "step": 130445 }, { "epoch": 0.5443082340963523, "grad_norm": 0.5228355604579896, "learning_rate": 2.7110881302763535e-06, "loss": 0.018, "step": 130450 }, { "epoch": 0.5443290968113427, "grad_norm": 0.8903418751124739, "learning_rate": 2.711036174910704e-06, "loss": 0.0265, "step": 130455 }, { "epoch": 0.5443499595263329, "grad_norm": 1.2176843964652981, "learning_rate": 2.710984222531962e-06, "loss": 0.0319, "step": 130460 }, { "epoch": 0.5443708222413232, "grad_norm": 0.5183263631974057, "learning_rate": 2.7109322731398403e-06, "loss": 0.0268, "step": 130465 }, { "epoch": 0.5443916849563135, "grad_norm": 0.6467792749318751, "learning_rate": 2.7108803267340546e-06, "loss": 0.0319, "step": 130470 }, { "epoch": 0.5444125476713038, "grad_norm": 0.6029884112649702, "learning_rate": 2.7108283833143175e-06, "loss": 0.0234, "step": 130475 }, { "epoch": 0.544433410386294, "grad_norm": 0.47771903360558815, "learning_rate": 2.710776442880344e-06, "loss": 0.0183, "step": 130480 }, { "epoch": 0.5444542731012844, "grad_norm": 0.5976343481142374, "learning_rate": 2.710724505431847e-06, "loss": 0.0213, "step": 130485 }, { "epoch": 0.5444751358162746, "grad_norm": 1.5019885810115934, "learning_rate": 2.7106725709685415e-06, "loss": 0.0267, "step": 130490 }, { "epoch": 0.5444959985312648, "grad_norm": 1.4221780137905704, "learning_rate": 2.7106206394901415e-06, "loss": 0.0229, "step": 130495 }, { "epoch": 0.5445168612462551, "grad_norm": 0.6552641660090596, "learning_rate": 2.7105687109963603e-06, "loss": 0.0276, "step": 130500 }, { "epoch": 0.5445377239612454, "grad_norm": 0.8511193799882546, "learning_rate": 2.7105167854869117e-06, "loss": 0.0257, "step": 130505 }, { "epoch": 0.5445585866762357, "grad_norm": 0.3967696771839281, "learning_rate": 2.7104648629615115e-06, "loss": 0.0202, "step": 130510 }, { "epoch": 0.5445794493912259, "grad_norm": 0.5240042570218927, "learning_rate": 2.710412943419873e-06, "loss": 0.0207, "step": 130515 }, { "epoch": 0.5446003121062163, "grad_norm": 0.5398747297340878, "learning_rate": 2.7103610268617097e-06, "loss": 0.0188, "step": 130520 }, { "epoch": 0.5446211748212065, "grad_norm": 0.4954774295398941, "learning_rate": 2.710309113286737e-06, "loss": 0.0348, "step": 130525 }, { "epoch": 0.5446420375361968, "grad_norm": 0.336715508495041, "learning_rate": 2.7102572026946688e-06, "loss": 0.0269, "step": 130530 }, { "epoch": 0.5446629002511871, "grad_norm": 0.4825795504003973, "learning_rate": 2.7102052950852193e-06, "loss": 0.0246, "step": 130535 }, { "epoch": 0.5446837629661774, "grad_norm": 0.951293697164309, "learning_rate": 2.7101533904581034e-06, "loss": 0.024, "step": 130540 }, { "epoch": 0.5447046256811676, "grad_norm": 0.6954281317656847, "learning_rate": 2.7101014888130343e-06, "loss": 0.0247, "step": 130545 }, { "epoch": 0.544725488396158, "grad_norm": 0.5866326289642032, "learning_rate": 2.7100495901497283e-06, "loss": 0.0219, "step": 130550 }, { "epoch": 0.5447463511111482, "grad_norm": 0.749510947049149, "learning_rate": 2.7099976944678987e-06, "loss": 0.0295, "step": 130555 }, { "epoch": 0.5447672138261385, "grad_norm": 0.5485114356698266, "learning_rate": 2.70994580176726e-06, "loss": 0.0219, "step": 130560 }, { "epoch": 0.5447880765411287, "grad_norm": 0.2667576847867846, "learning_rate": 2.7098939120475275e-06, "loss": 0.023, "step": 130565 }, { "epoch": 0.5448089392561191, "grad_norm": 0.44669035949527125, "learning_rate": 2.7098420253084146e-06, "loss": 0.0157, "step": 130570 }, { "epoch": 0.5448298019711093, "grad_norm": 0.7989076680108738, "learning_rate": 2.7097901415496376e-06, "loss": 0.0281, "step": 130575 }, { "epoch": 0.5448506646860996, "grad_norm": 0.7634133041883913, "learning_rate": 2.70973826077091e-06, "loss": 0.028, "step": 130580 }, { "epoch": 0.5448715274010899, "grad_norm": 0.39202661508408254, "learning_rate": 2.7096863829719466e-06, "loss": 0.0191, "step": 130585 }, { "epoch": 0.5448923901160801, "grad_norm": 0.8598320442049145, "learning_rate": 2.7096345081524626e-06, "loss": 0.027, "step": 130590 }, { "epoch": 0.5449132528310704, "grad_norm": 0.6272752219626644, "learning_rate": 2.709582636312173e-06, "loss": 0.0186, "step": 130595 }, { "epoch": 0.5449341155460607, "grad_norm": 0.9132352803099413, "learning_rate": 2.7095307674507926e-06, "loss": 0.0167, "step": 130600 }, { "epoch": 0.544954978261051, "grad_norm": 0.5069893508099469, "learning_rate": 2.7094789015680356e-06, "loss": 0.0235, "step": 130605 }, { "epoch": 0.5449758409760412, "grad_norm": 0.7517695391012263, "learning_rate": 2.709427038663617e-06, "loss": 0.023, "step": 130610 }, { "epoch": 0.5449967036910315, "grad_norm": 0.7695042030415273, "learning_rate": 2.7093751787372526e-06, "loss": 0.0198, "step": 130615 }, { "epoch": 0.5450175664060218, "grad_norm": 0.6009229550996361, "learning_rate": 2.7093233217886565e-06, "loss": 0.023, "step": 130620 }, { "epoch": 0.5450384291210121, "grad_norm": 0.6771667231885542, "learning_rate": 2.7092714678175443e-06, "loss": 0.0224, "step": 130625 }, { "epoch": 0.5450592918360023, "grad_norm": 0.581467192419173, "learning_rate": 2.7092196168236314e-06, "loss": 0.02, "step": 130630 }, { "epoch": 0.5450801545509927, "grad_norm": 0.7849318128759976, "learning_rate": 2.7091677688066314e-06, "loss": 0.0282, "step": 130635 }, { "epoch": 0.5451010172659829, "grad_norm": 1.7393431646995265, "learning_rate": 2.709115923766261e-06, "loss": 0.0269, "step": 130640 }, { "epoch": 0.5451218799809732, "grad_norm": 0.8621191570138528, "learning_rate": 2.7090640817022353e-06, "loss": 0.0202, "step": 130645 }, { "epoch": 0.5451427426959635, "grad_norm": 0.6314039831925025, "learning_rate": 2.7090122426142683e-06, "loss": 0.0254, "step": 130650 }, { "epoch": 0.5451636054109538, "grad_norm": 0.4683464649867525, "learning_rate": 2.7089604065020775e-06, "loss": 0.0184, "step": 130655 }, { "epoch": 0.545184468125944, "grad_norm": 0.5671915126417977, "learning_rate": 2.708908573365376e-06, "loss": 0.0196, "step": 130660 }, { "epoch": 0.5452053308409344, "grad_norm": 0.36024595288655287, "learning_rate": 2.7088567432038795e-06, "loss": 0.0255, "step": 130665 }, { "epoch": 0.5452261935559246, "grad_norm": 0.7183797508108957, "learning_rate": 2.7088049160173043e-06, "loss": 0.0247, "step": 130670 }, { "epoch": 0.5452470562709149, "grad_norm": 0.6805324953975328, "learning_rate": 2.708753091805365e-06, "loss": 0.0253, "step": 130675 }, { "epoch": 0.5452679189859051, "grad_norm": 0.5944809996948718, "learning_rate": 2.7087012705677778e-06, "loss": 0.0226, "step": 130680 }, { "epoch": 0.5452887817008955, "grad_norm": 0.5176813270155993, "learning_rate": 2.7086494523042572e-06, "loss": 0.016, "step": 130685 }, { "epoch": 0.5453096444158857, "grad_norm": 0.7030866787428205, "learning_rate": 2.7085976370145203e-06, "loss": 0.0235, "step": 130690 }, { "epoch": 0.5453305071308759, "grad_norm": 0.4625338506371995, "learning_rate": 2.708545824698281e-06, "loss": 0.018, "step": 130695 }, { "epoch": 0.5453513698458663, "grad_norm": 0.715257584573418, "learning_rate": 2.7084940153552558e-06, "loss": 0.0238, "step": 130700 }, { "epoch": 0.5453722325608565, "grad_norm": 0.6628517164337655, "learning_rate": 2.7084422089851603e-06, "loss": 0.025, "step": 130705 }, { "epoch": 0.5453930952758468, "grad_norm": 0.2907323270196886, "learning_rate": 2.7083904055877096e-06, "loss": 0.0321, "step": 130710 }, { "epoch": 0.5454139579908371, "grad_norm": 0.8309706562792584, "learning_rate": 2.7083386051626203e-06, "loss": 0.0218, "step": 130715 }, { "epoch": 0.5454348207058274, "grad_norm": 1.1324237235189187, "learning_rate": 2.708286807709608e-06, "loss": 0.022, "step": 130720 }, { "epoch": 0.5454556834208176, "grad_norm": 0.5964376552103124, "learning_rate": 2.708235013228388e-06, "loss": 0.0279, "step": 130725 }, { "epoch": 0.545476546135808, "grad_norm": 0.7744343123889719, "learning_rate": 2.7081832217186767e-06, "loss": 0.0238, "step": 130730 }, { "epoch": 0.5454974088507982, "grad_norm": 0.6025574031924253, "learning_rate": 2.7081314331801893e-06, "loss": 0.0277, "step": 130735 }, { "epoch": 0.5455182715657885, "grad_norm": 0.48334469725182083, "learning_rate": 2.7080796476126423e-06, "loss": 0.0207, "step": 130740 }, { "epoch": 0.5455391342807787, "grad_norm": 0.3553752415908411, "learning_rate": 2.7080278650157513e-06, "loss": 0.0256, "step": 130745 }, { "epoch": 0.5455599969957691, "grad_norm": 0.5867758211994651, "learning_rate": 2.707976085389233e-06, "loss": 0.0226, "step": 130750 }, { "epoch": 0.5455808597107593, "grad_norm": 1.0292545379478424, "learning_rate": 2.707924308732802e-06, "loss": 0.0212, "step": 130755 }, { "epoch": 0.5456017224257496, "grad_norm": 0.8531631925302402, "learning_rate": 2.707872535046176e-06, "loss": 0.0183, "step": 130760 }, { "epoch": 0.5456225851407399, "grad_norm": 0.8303400437185992, "learning_rate": 2.7078207643290707e-06, "loss": 0.0203, "step": 130765 }, { "epoch": 0.5456434478557302, "grad_norm": 0.4516427691727662, "learning_rate": 2.707768996581201e-06, "loss": 0.0239, "step": 130770 }, { "epoch": 0.5456643105707204, "grad_norm": 0.5598964923380317, "learning_rate": 2.7077172318022845e-06, "loss": 0.0187, "step": 130775 }, { "epoch": 0.5456851732857108, "grad_norm": 0.7834407726864925, "learning_rate": 2.707665469992037e-06, "loss": 0.0215, "step": 130780 }, { "epoch": 0.545706036000701, "grad_norm": 0.8427378817374428, "learning_rate": 2.707613711150174e-06, "loss": 0.0251, "step": 130785 }, { "epoch": 0.5457268987156912, "grad_norm": 0.6904848377925872, "learning_rate": 2.707561955276413e-06, "loss": 0.0243, "step": 130790 }, { "epoch": 0.5457477614306815, "grad_norm": 0.8560229235981058, "learning_rate": 2.7075102023704696e-06, "loss": 0.0256, "step": 130795 }, { "epoch": 0.5457686241456718, "grad_norm": 1.1024546021407002, "learning_rate": 2.707458452432061e-06, "loss": 0.0246, "step": 130800 }, { "epoch": 0.5457894868606621, "grad_norm": 0.8693935345485292, "learning_rate": 2.7074067054609026e-06, "loss": 0.026, "step": 130805 }, { "epoch": 0.5458103495756523, "grad_norm": 0.8109982578487522, "learning_rate": 2.7073549614567105e-06, "loss": 0.0266, "step": 130810 }, { "epoch": 0.5458312122906427, "grad_norm": 0.7488662485955861, "learning_rate": 2.707303220419203e-06, "loss": 0.0267, "step": 130815 }, { "epoch": 0.5458520750056329, "grad_norm": 0.8361928159811227, "learning_rate": 2.707251482348095e-06, "loss": 0.0242, "step": 130820 }, { "epoch": 0.5458729377206232, "grad_norm": 0.7775125938144093, "learning_rate": 2.7071997472431037e-06, "loss": 0.025, "step": 130825 }, { "epoch": 0.5458938004356135, "grad_norm": 0.9511511417260808, "learning_rate": 2.707148015103946e-06, "loss": 0.0133, "step": 130830 }, { "epoch": 0.5459146631506038, "grad_norm": 1.5447119502554152, "learning_rate": 2.7070962859303372e-06, "loss": 0.0266, "step": 130835 }, { "epoch": 0.545935525865594, "grad_norm": 1.2135734517131784, "learning_rate": 2.707044559721996e-06, "loss": 0.0236, "step": 130840 }, { "epoch": 0.5459563885805844, "grad_norm": 0.954817402709071, "learning_rate": 2.706992836478638e-06, "loss": 0.021, "step": 130845 }, { "epoch": 0.5459772512955746, "grad_norm": 0.3504383016478287, "learning_rate": 2.7069411161999793e-06, "loss": 0.0238, "step": 130850 }, { "epoch": 0.5459981140105649, "grad_norm": 0.6506634028734489, "learning_rate": 2.7068893988857383e-06, "loss": 0.0274, "step": 130855 }, { "epoch": 0.5460189767255551, "grad_norm": 0.6432345603742095, "learning_rate": 2.7068376845356297e-06, "loss": 0.0201, "step": 130860 }, { "epoch": 0.5460398394405455, "grad_norm": 0.5463471666196936, "learning_rate": 2.706785973149373e-06, "loss": 0.0205, "step": 130865 }, { "epoch": 0.5460607021555357, "grad_norm": 0.5972758435213005, "learning_rate": 2.7067342647266827e-06, "loss": 0.031, "step": 130870 }, { "epoch": 0.546081564870526, "grad_norm": 1.0336974739676867, "learning_rate": 2.7066825592672772e-06, "loss": 0.0308, "step": 130875 }, { "epoch": 0.5461024275855163, "grad_norm": 0.640929731550375, "learning_rate": 2.7066308567708726e-06, "loss": 0.029, "step": 130880 }, { "epoch": 0.5461232903005065, "grad_norm": 0.5051348943941922, "learning_rate": 2.7065791572371867e-06, "loss": 0.0251, "step": 130885 }, { "epoch": 0.5461441530154968, "grad_norm": 0.4614633337795421, "learning_rate": 2.706527460665936e-06, "loss": 0.0239, "step": 130890 }, { "epoch": 0.5461650157304871, "grad_norm": 1.1322670548116958, "learning_rate": 2.7064757670568377e-06, "loss": 0.0258, "step": 130895 }, { "epoch": 0.5461858784454774, "grad_norm": 0.35419259572294204, "learning_rate": 2.7064240764096096e-06, "loss": 0.0214, "step": 130900 }, { "epoch": 0.5462067411604676, "grad_norm": 0.5081036159976572, "learning_rate": 2.7063723887239674e-06, "loss": 0.0286, "step": 130905 }, { "epoch": 0.546227603875458, "grad_norm": 1.1571304765617323, "learning_rate": 2.70632070399963e-06, "loss": 0.0297, "step": 130910 }, { "epoch": 0.5462484665904482, "grad_norm": 0.7602745256118869, "learning_rate": 2.706269022236313e-06, "loss": 0.0263, "step": 130915 }, { "epoch": 0.5462693293054385, "grad_norm": 0.2835132596911444, "learning_rate": 2.706217343433735e-06, "loss": 0.0232, "step": 130920 }, { "epoch": 0.5462901920204287, "grad_norm": 0.7231564845818381, "learning_rate": 2.7061656675916132e-06, "loss": 0.0326, "step": 130925 }, { "epoch": 0.5463110547354191, "grad_norm": 0.6076839321016491, "learning_rate": 2.7061139947096645e-06, "loss": 0.0209, "step": 130930 }, { "epoch": 0.5463319174504093, "grad_norm": 0.5651670321865421, "learning_rate": 2.7060623247876055e-06, "loss": 0.0239, "step": 130935 }, { "epoch": 0.5463527801653996, "grad_norm": 0.5330985868924769, "learning_rate": 2.7060106578251554e-06, "loss": 0.0228, "step": 130940 }, { "epoch": 0.5463736428803899, "grad_norm": 0.46586078905398615, "learning_rate": 2.7059589938220305e-06, "loss": 0.0193, "step": 130945 }, { "epoch": 0.5463945055953802, "grad_norm": 0.7364094590641365, "learning_rate": 2.705907332777949e-06, "loss": 0.0266, "step": 130950 }, { "epoch": 0.5464153683103704, "grad_norm": 0.5445339448622696, "learning_rate": 2.7058556746926275e-06, "loss": 0.0227, "step": 130955 }, { "epoch": 0.5464362310253608, "grad_norm": 0.6699197959405031, "learning_rate": 2.7058040195657844e-06, "loss": 0.0194, "step": 130960 }, { "epoch": 0.546457093740351, "grad_norm": 1.2641824393252574, "learning_rate": 2.7057523673971366e-06, "loss": 0.0283, "step": 130965 }, { "epoch": 0.5464779564553413, "grad_norm": 1.1498038810056466, "learning_rate": 2.7057007181864024e-06, "loss": 0.0299, "step": 130970 }, { "epoch": 0.5464988191703315, "grad_norm": 0.5272825159907485, "learning_rate": 2.7056490719332997e-06, "loss": 0.0219, "step": 130975 }, { "epoch": 0.5465196818853219, "grad_norm": 0.8975444598877326, "learning_rate": 2.705597428637546e-06, "loss": 0.0217, "step": 130980 }, { "epoch": 0.5465405446003121, "grad_norm": 0.8265568724345764, "learning_rate": 2.7055457882988585e-06, "loss": 0.0193, "step": 130985 }, { "epoch": 0.5465614073153023, "grad_norm": 1.6507532073549747, "learning_rate": 2.705494150916955e-06, "loss": 0.0278, "step": 130990 }, { "epoch": 0.5465822700302927, "grad_norm": 0.9081987339749358, "learning_rate": 2.705442516491555e-06, "loss": 0.0232, "step": 130995 }, { "epoch": 0.5466031327452829, "grad_norm": 2.554311589713109, "learning_rate": 2.705390885022374e-06, "loss": 0.0495, "step": 131000 }, { "epoch": 0.5466239954602732, "grad_norm": 0.2694271237649101, "learning_rate": 2.705339256509132e-06, "loss": 0.0138, "step": 131005 }, { "epoch": 0.5466448581752635, "grad_norm": 0.6073903317953843, "learning_rate": 2.7052876309515454e-06, "loss": 0.0343, "step": 131010 }, { "epoch": 0.5466657208902538, "grad_norm": 0.443944919167624, "learning_rate": 2.705236008349333e-06, "loss": 0.0219, "step": 131015 }, { "epoch": 0.546686583605244, "grad_norm": 1.0068052336650373, "learning_rate": 2.705184388702213e-06, "loss": 0.0248, "step": 131020 }, { "epoch": 0.5467074463202344, "grad_norm": 0.6435585120796228, "learning_rate": 2.7051327720099023e-06, "loss": 0.0233, "step": 131025 }, { "epoch": 0.5467283090352246, "grad_norm": 0.7835419240643412, "learning_rate": 2.705081158272121e-06, "loss": 0.0305, "step": 131030 }, { "epoch": 0.5467491717502149, "grad_norm": 1.3821977667611338, "learning_rate": 2.705029547488585e-06, "loss": 0.0273, "step": 131035 }, { "epoch": 0.5467700344652051, "grad_norm": 0.6069543462646594, "learning_rate": 2.704977939659014e-06, "loss": 0.0234, "step": 131040 }, { "epoch": 0.5467908971801955, "grad_norm": 0.5489163217589218, "learning_rate": 2.7049263347831266e-06, "loss": 0.021, "step": 131045 }, { "epoch": 0.5468117598951857, "grad_norm": 0.6345995838075795, "learning_rate": 2.70487473286064e-06, "loss": 0.0205, "step": 131050 }, { "epoch": 0.546832622610176, "grad_norm": 0.9901265208590442, "learning_rate": 2.704823133891272e-06, "loss": 0.0195, "step": 131055 }, { "epoch": 0.5468534853251663, "grad_norm": 0.6599388019917283, "learning_rate": 2.7047715378747423e-06, "loss": 0.024, "step": 131060 }, { "epoch": 0.5468743480401566, "grad_norm": 0.5040196335378362, "learning_rate": 2.704719944810769e-06, "loss": 0.0251, "step": 131065 }, { "epoch": 0.5468952107551468, "grad_norm": 0.41424871295131765, "learning_rate": 2.7046683546990706e-06, "loss": 0.0259, "step": 131070 }, { "epoch": 0.5469160734701372, "grad_norm": 1.17335026005358, "learning_rate": 2.704616767539364e-06, "loss": 0.0224, "step": 131075 }, { "epoch": 0.5469369361851274, "grad_norm": 0.7643765570990976, "learning_rate": 2.70456518333137e-06, "loss": 0.0233, "step": 131080 }, { "epoch": 0.5469577989001176, "grad_norm": 0.5761028372653396, "learning_rate": 2.7045136020748046e-06, "loss": 0.0224, "step": 131085 }, { "epoch": 0.546978661615108, "grad_norm": 0.45927378043929684, "learning_rate": 2.7044620237693885e-06, "loss": 0.0219, "step": 131090 }, { "epoch": 0.5469995243300982, "grad_norm": 0.8811198702476704, "learning_rate": 2.70441044841484e-06, "loss": 0.0228, "step": 131095 }, { "epoch": 0.5470203870450885, "grad_norm": 0.6535298782646624, "learning_rate": 2.704358876010877e-06, "loss": 0.0303, "step": 131100 }, { "epoch": 0.5470412497600787, "grad_norm": 0.6866672564786959, "learning_rate": 2.704307306557218e-06, "loss": 0.0191, "step": 131105 }, { "epoch": 0.5470621124750691, "grad_norm": 0.5055583260562146, "learning_rate": 2.704255740053583e-06, "loss": 0.0209, "step": 131110 }, { "epoch": 0.5470829751900593, "grad_norm": 2.9423807436587146, "learning_rate": 2.704204176499689e-06, "loss": 0.0293, "step": 131115 }, { "epoch": 0.5471038379050496, "grad_norm": 0.9818960210958758, "learning_rate": 2.7041526158952563e-06, "loss": 0.0245, "step": 131120 }, { "epoch": 0.5471247006200399, "grad_norm": 0.5067797423883525, "learning_rate": 2.7041010582400033e-06, "loss": 0.0187, "step": 131125 }, { "epoch": 0.5471455633350302, "grad_norm": 0.37381356879003924, "learning_rate": 2.704049503533649e-06, "loss": 0.0208, "step": 131130 }, { "epoch": 0.5471664260500204, "grad_norm": 0.8702144005658115, "learning_rate": 2.703997951775911e-06, "loss": 0.02, "step": 131135 }, { "epoch": 0.5471872887650108, "grad_norm": 0.7331357830094156, "learning_rate": 2.7039464029665095e-06, "loss": 0.0235, "step": 131140 }, { "epoch": 0.547208151480001, "grad_norm": 0.8913603585354648, "learning_rate": 2.7038948571051637e-06, "loss": 0.0248, "step": 131145 }, { "epoch": 0.5472290141949913, "grad_norm": 0.566188752884295, "learning_rate": 2.703843314191592e-06, "loss": 0.0271, "step": 131150 }, { "epoch": 0.5472498769099815, "grad_norm": 0.6587511360514889, "learning_rate": 2.703791774225514e-06, "loss": 0.0193, "step": 131155 }, { "epoch": 0.5472707396249719, "grad_norm": 0.7811959839059145, "learning_rate": 2.7037402372066475e-06, "loss": 0.0304, "step": 131160 }, { "epoch": 0.5472916023399621, "grad_norm": 0.5991103009918205, "learning_rate": 2.7036887031347132e-06, "loss": 0.0227, "step": 131165 }, { "epoch": 0.5473124650549523, "grad_norm": 1.007348281844372, "learning_rate": 2.7036371720094296e-06, "loss": 0.0265, "step": 131170 }, { "epoch": 0.5473333277699427, "grad_norm": 0.9181454144175487, "learning_rate": 2.7035856438305153e-06, "loss": 0.0283, "step": 131175 }, { "epoch": 0.547354190484933, "grad_norm": 0.8304238844953847, "learning_rate": 2.7035341185976905e-06, "loss": 0.0249, "step": 131180 }, { "epoch": 0.5473750531999232, "grad_norm": 1.1481478165004098, "learning_rate": 2.7034825963106735e-06, "loss": 0.0295, "step": 131185 }, { "epoch": 0.5473959159149135, "grad_norm": 0.736254283455099, "learning_rate": 2.703431076969185e-06, "loss": 0.0189, "step": 131190 }, { "epoch": 0.5474167786299038, "grad_norm": 0.925356321446928, "learning_rate": 2.7033795605729435e-06, "loss": 0.0242, "step": 131195 }, { "epoch": 0.547437641344894, "grad_norm": 0.66110565063872, "learning_rate": 2.7033280471216685e-06, "loss": 0.0253, "step": 131200 }, { "epoch": 0.5474585040598844, "grad_norm": 1.3336447861536982, "learning_rate": 2.7032765366150785e-06, "loss": 0.0281, "step": 131205 }, { "epoch": 0.5474793667748746, "grad_norm": 0.7096582670198023, "learning_rate": 2.703225029052895e-06, "loss": 0.0301, "step": 131210 }, { "epoch": 0.5475002294898649, "grad_norm": 0.3787720354598247, "learning_rate": 2.7031735244348357e-06, "loss": 0.0261, "step": 131215 }, { "epoch": 0.5475210922048551, "grad_norm": 0.5878897319870936, "learning_rate": 2.703122022760621e-06, "loss": 0.0256, "step": 131220 }, { "epoch": 0.5475419549198455, "grad_norm": 0.9830544007913253, "learning_rate": 2.7030705240299704e-06, "loss": 0.0245, "step": 131225 }, { "epoch": 0.5475628176348357, "grad_norm": 0.5909250095350398, "learning_rate": 2.7030190282426027e-06, "loss": 0.024, "step": 131230 }, { "epoch": 0.547583680349826, "grad_norm": 0.5908612822119793, "learning_rate": 2.7029675353982388e-06, "loss": 0.0259, "step": 131235 }, { "epoch": 0.5476045430648163, "grad_norm": 0.15528768367825643, "learning_rate": 2.7029160454965973e-06, "loss": 0.0228, "step": 131240 }, { "epoch": 0.5476254057798066, "grad_norm": 0.6479140729456867, "learning_rate": 2.702864558537399e-06, "loss": 0.0208, "step": 131245 }, { "epoch": 0.5476462684947968, "grad_norm": 0.7238087464635954, "learning_rate": 2.702813074520363e-06, "loss": 0.0193, "step": 131250 }, { "epoch": 0.5476671312097872, "grad_norm": 1.1778475760219627, "learning_rate": 2.702761593445209e-06, "loss": 0.0256, "step": 131255 }, { "epoch": 0.5476879939247774, "grad_norm": 0.7418726130862178, "learning_rate": 2.702710115311657e-06, "loss": 0.0231, "step": 131260 }, { "epoch": 0.5477088566397676, "grad_norm": 0.8602645155222421, "learning_rate": 2.7026586401194273e-06, "loss": 0.0209, "step": 131265 }, { "epoch": 0.547729719354758, "grad_norm": 0.8623255634575261, "learning_rate": 2.7026071678682384e-06, "loss": 0.0246, "step": 131270 }, { "epoch": 0.5477505820697482, "grad_norm": 0.6292190912414758, "learning_rate": 2.702555698557812e-06, "loss": 0.0236, "step": 131275 }, { "epoch": 0.5477714447847385, "grad_norm": 0.5699952263027526, "learning_rate": 2.7025042321878673e-06, "loss": 0.025, "step": 131280 }, { "epoch": 0.5477923074997287, "grad_norm": 1.0058101136996236, "learning_rate": 2.7024527687581247e-06, "loss": 0.0255, "step": 131285 }, { "epoch": 0.5478131702147191, "grad_norm": 0.5709483248255811, "learning_rate": 2.702401308268304e-06, "loss": 0.0256, "step": 131290 }, { "epoch": 0.5478340329297093, "grad_norm": 0.4826520535083478, "learning_rate": 2.7023498507181244e-06, "loss": 0.0474, "step": 131295 }, { "epoch": 0.5478548956446996, "grad_norm": 0.6575720705082986, "learning_rate": 2.7022983961073073e-06, "loss": 0.0205, "step": 131300 }, { "epoch": 0.5478757583596899, "grad_norm": 0.31886778344006145, "learning_rate": 2.7022469444355725e-06, "loss": 0.0143, "step": 131305 }, { "epoch": 0.5478966210746802, "grad_norm": 0.9231691333200207, "learning_rate": 2.70219549570264e-06, "loss": 0.0178, "step": 131310 }, { "epoch": 0.5479174837896704, "grad_norm": 0.923329012988229, "learning_rate": 2.7021440499082306e-06, "loss": 0.0251, "step": 131315 }, { "epoch": 0.5479383465046608, "grad_norm": 0.9502548358379957, "learning_rate": 2.702092607052064e-06, "loss": 0.0224, "step": 131320 }, { "epoch": 0.547959209219651, "grad_norm": 0.595484970605401, "learning_rate": 2.7020411671338605e-06, "loss": 0.0268, "step": 131325 }, { "epoch": 0.5479800719346413, "grad_norm": 0.7783630281290876, "learning_rate": 2.701989730153341e-06, "loss": 0.024, "step": 131330 }, { "epoch": 0.5480009346496315, "grad_norm": 0.5712712003766207, "learning_rate": 2.701938296110225e-06, "loss": 0.0216, "step": 131335 }, { "epoch": 0.5480217973646219, "grad_norm": 0.38387782347095556, "learning_rate": 2.701886865004234e-06, "loss": 0.0224, "step": 131340 }, { "epoch": 0.5480426600796121, "grad_norm": 0.7547280491163169, "learning_rate": 2.701835436835088e-06, "loss": 0.0241, "step": 131345 }, { "epoch": 0.5480635227946024, "grad_norm": 0.581766579568459, "learning_rate": 2.7017840116025066e-06, "loss": 0.0156, "step": 131350 }, { "epoch": 0.5480843855095927, "grad_norm": 0.7575382035745438, "learning_rate": 2.7017325893062124e-06, "loss": 0.0176, "step": 131355 }, { "epoch": 0.548105248224583, "grad_norm": 0.7536088775071728, "learning_rate": 2.7016811699459246e-06, "loss": 0.0276, "step": 131360 }, { "epoch": 0.5481261109395732, "grad_norm": 0.3989421971368738, "learning_rate": 2.7016297535213635e-06, "loss": 0.0186, "step": 131365 }, { "epoch": 0.5481469736545636, "grad_norm": 0.30201102821154946, "learning_rate": 2.701578340032251e-06, "loss": 0.0164, "step": 131370 }, { "epoch": 0.5481678363695538, "grad_norm": 0.4733786028430175, "learning_rate": 2.7015269294783064e-06, "loss": 0.0248, "step": 131375 }, { "epoch": 0.548188699084544, "grad_norm": 0.7839697876439867, "learning_rate": 2.7014755218592513e-06, "loss": 0.021, "step": 131380 }, { "epoch": 0.5482095617995344, "grad_norm": 0.8494341524483643, "learning_rate": 2.7014241171748062e-06, "loss": 0.0217, "step": 131385 }, { "epoch": 0.5482304245145246, "grad_norm": 0.7363907295234121, "learning_rate": 2.701372715424692e-06, "loss": 0.0311, "step": 131390 }, { "epoch": 0.5482512872295149, "grad_norm": 1.5565299738603309, "learning_rate": 2.70132131660863e-06, "loss": 0.032, "step": 131395 }, { "epoch": 0.5482721499445051, "grad_norm": 0.4092247121708817, "learning_rate": 2.7012699207263405e-06, "loss": 0.0259, "step": 131400 }, { "epoch": 0.5482930126594955, "grad_norm": 0.7424944101215054, "learning_rate": 2.7012185277775445e-06, "loss": 0.0251, "step": 131405 }, { "epoch": 0.5483138753744857, "grad_norm": 0.8894311857151229, "learning_rate": 2.7011671377619627e-06, "loss": 0.0267, "step": 131410 }, { "epoch": 0.548334738089476, "grad_norm": 0.691238409688412, "learning_rate": 2.7011157506793162e-06, "loss": 0.0198, "step": 131415 }, { "epoch": 0.5483556008044663, "grad_norm": 0.7551700483675928, "learning_rate": 2.701064366529326e-06, "loss": 0.0272, "step": 131420 }, { "epoch": 0.5483764635194566, "grad_norm": 0.495244627087819, "learning_rate": 2.701012985311714e-06, "loss": 0.024, "step": 131425 }, { "epoch": 0.5483973262344468, "grad_norm": 0.6115263660831944, "learning_rate": 2.700961607026201e-06, "loss": 0.031, "step": 131430 }, { "epoch": 0.5484181889494372, "grad_norm": 0.795795262482586, "learning_rate": 2.700910231672507e-06, "loss": 0.022, "step": 131435 }, { "epoch": 0.5484390516644274, "grad_norm": 0.8715810127081711, "learning_rate": 2.7008588592503548e-06, "loss": 0.022, "step": 131440 }, { "epoch": 0.5484599143794177, "grad_norm": 0.3461746128304506, "learning_rate": 2.7008074897594646e-06, "loss": 0.0181, "step": 131445 }, { "epoch": 0.548480777094408, "grad_norm": 1.3682543531085465, "learning_rate": 2.700756123199557e-06, "loss": 0.0378, "step": 131450 }, { "epoch": 0.5485016398093983, "grad_norm": 1.0778771378891199, "learning_rate": 2.700704759570355e-06, "loss": 0.0204, "step": 131455 }, { "epoch": 0.5485225025243885, "grad_norm": 0.6813771576986468, "learning_rate": 2.7006533988715787e-06, "loss": 0.0282, "step": 131460 }, { "epoch": 0.5485433652393787, "grad_norm": 0.40366007565899165, "learning_rate": 2.7006020411029506e-06, "loss": 0.0153, "step": 131465 }, { "epoch": 0.5485642279543691, "grad_norm": 0.7172597138414577, "learning_rate": 2.7005506862641905e-06, "loss": 0.0224, "step": 131470 }, { "epoch": 0.5485850906693593, "grad_norm": 0.3989245806951001, "learning_rate": 2.7004993343550213e-06, "loss": 0.0189, "step": 131475 }, { "epoch": 0.5486059533843496, "grad_norm": 0.8535815616807916, "learning_rate": 2.700447985375163e-06, "loss": 0.0239, "step": 131480 }, { "epoch": 0.5486268160993399, "grad_norm": 0.8874490251045632, "learning_rate": 2.7003966393243384e-06, "loss": 0.0304, "step": 131485 }, { "epoch": 0.5486476788143302, "grad_norm": 0.9115617764994649, "learning_rate": 2.700345296202269e-06, "loss": 0.0273, "step": 131490 }, { "epoch": 0.5486685415293204, "grad_norm": 1.1168271455111627, "learning_rate": 2.7002939560086756e-06, "loss": 0.022, "step": 131495 }, { "epoch": 0.5486894042443108, "grad_norm": 0.8665084480615577, "learning_rate": 2.7002426187432805e-06, "loss": 0.024, "step": 131500 }, { "epoch": 0.548710266959301, "grad_norm": 0.3769259062912009, "learning_rate": 2.7001912844058047e-06, "loss": 0.0232, "step": 131505 }, { "epoch": 0.5487311296742913, "grad_norm": 0.5955347075199194, "learning_rate": 2.7001399529959704e-06, "loss": 0.0187, "step": 131510 }, { "epoch": 0.5487519923892815, "grad_norm": 0.6764485391022754, "learning_rate": 2.7000886245134993e-06, "loss": 0.0201, "step": 131515 }, { "epoch": 0.5487728551042719, "grad_norm": 0.6944350509486459, "learning_rate": 2.7000372989581132e-06, "loss": 0.0252, "step": 131520 }, { "epoch": 0.5487937178192621, "grad_norm": 0.39097850034474313, "learning_rate": 2.699985976329534e-06, "loss": 0.0171, "step": 131525 }, { "epoch": 0.5488145805342524, "grad_norm": 0.48877308551358184, "learning_rate": 2.6999346566274825e-06, "loss": 0.0191, "step": 131530 }, { "epoch": 0.5488354432492427, "grad_norm": 1.408852756882049, "learning_rate": 2.6998833398516823e-06, "loss": 0.0241, "step": 131535 }, { "epoch": 0.548856305964233, "grad_norm": 0.7605714896053963, "learning_rate": 2.699832026001854e-06, "loss": 0.0183, "step": 131540 }, { "epoch": 0.5488771686792232, "grad_norm": 0.6843930769962575, "learning_rate": 2.69978071507772e-06, "loss": 0.0223, "step": 131545 }, { "epoch": 0.5488980313942136, "grad_norm": 0.838142290836055, "learning_rate": 2.6997294070790026e-06, "loss": 0.0308, "step": 131550 }, { "epoch": 0.5489188941092038, "grad_norm": 0.7686195299052282, "learning_rate": 2.699678102005423e-06, "loss": 0.0187, "step": 131555 }, { "epoch": 0.548939756824194, "grad_norm": 0.740472319219913, "learning_rate": 2.6996267998567042e-06, "loss": 0.0193, "step": 131560 }, { "epoch": 0.5489606195391844, "grad_norm": 0.8748803549355341, "learning_rate": 2.699575500632568e-06, "loss": 0.035, "step": 131565 }, { "epoch": 0.5489814822541746, "grad_norm": 1.1614321510988193, "learning_rate": 2.6995242043327364e-06, "loss": 0.0286, "step": 131570 }, { "epoch": 0.5490023449691649, "grad_norm": 0.5746581556790986, "learning_rate": 2.699472910956931e-06, "loss": 0.0218, "step": 131575 }, { "epoch": 0.5490232076841551, "grad_norm": 0.6666874445206795, "learning_rate": 2.6994216205048753e-06, "loss": 0.0228, "step": 131580 }, { "epoch": 0.5490440703991455, "grad_norm": 2.098539347046711, "learning_rate": 2.6993703329762905e-06, "loss": 0.0273, "step": 131585 }, { "epoch": 0.5490649331141357, "grad_norm": 0.4000551770944804, "learning_rate": 2.6993190483709e-06, "loss": 0.0232, "step": 131590 }, { "epoch": 0.549085795829126, "grad_norm": 0.6252102966508615, "learning_rate": 2.6992677666884242e-06, "loss": 0.0277, "step": 131595 }, { "epoch": 0.5491066585441163, "grad_norm": 0.9811035913121173, "learning_rate": 2.699216487928587e-06, "loss": 0.027, "step": 131600 }, { "epoch": 0.5491275212591066, "grad_norm": 1.308145914535696, "learning_rate": 2.699165212091111e-06, "loss": 0.0244, "step": 131605 }, { "epoch": 0.5491483839740968, "grad_norm": 0.7905101510822548, "learning_rate": 2.6991139391757182e-06, "loss": 0.0231, "step": 131610 }, { "epoch": 0.5491692466890872, "grad_norm": 0.548069896878035, "learning_rate": 2.6990626691821304e-06, "loss": 0.0218, "step": 131615 }, { "epoch": 0.5491901094040774, "grad_norm": 0.6393663070110778, "learning_rate": 2.699011402110071e-06, "loss": 0.0282, "step": 131620 }, { "epoch": 0.5492109721190677, "grad_norm": 0.3553079671174972, "learning_rate": 2.6989601379592623e-06, "loss": 0.0261, "step": 131625 }, { "epoch": 0.549231834834058, "grad_norm": 0.7507875420439267, "learning_rate": 2.6989088767294264e-06, "loss": 0.02, "step": 131630 }, { "epoch": 0.5492526975490483, "grad_norm": 0.5454312641532737, "learning_rate": 2.698857618420286e-06, "loss": 0.0209, "step": 131635 }, { "epoch": 0.5492735602640385, "grad_norm": 0.6653561506920507, "learning_rate": 2.6988063630315653e-06, "loss": 0.0212, "step": 131640 }, { "epoch": 0.5492944229790288, "grad_norm": 1.183444097375648, "learning_rate": 2.698755110562985e-06, "loss": 0.0269, "step": 131645 }, { "epoch": 0.5493152856940191, "grad_norm": 0.6822633969077146, "learning_rate": 2.6987038610142687e-06, "loss": 0.0202, "step": 131650 }, { "epoch": 0.5493361484090094, "grad_norm": 1.092791093896163, "learning_rate": 2.6986526143851386e-06, "loss": 0.0236, "step": 131655 }, { "epoch": 0.5493570111239996, "grad_norm": 0.6878493545697157, "learning_rate": 2.698601370675319e-06, "loss": 0.0261, "step": 131660 }, { "epoch": 0.54937787383899, "grad_norm": 0.364713775819815, "learning_rate": 2.698550129884531e-06, "loss": 0.0252, "step": 131665 }, { "epoch": 0.5493987365539802, "grad_norm": 0.61835978209958, "learning_rate": 2.6984988920124983e-06, "loss": 0.0241, "step": 131670 }, { "epoch": 0.5494195992689704, "grad_norm": 0.3950918061370362, "learning_rate": 2.698447657058944e-06, "loss": 0.0189, "step": 131675 }, { "epoch": 0.5494404619839608, "grad_norm": 0.41417754946486635, "learning_rate": 2.6983964250235906e-06, "loss": 0.0227, "step": 131680 }, { "epoch": 0.549461324698951, "grad_norm": 0.8591241736696095, "learning_rate": 2.6983451959061607e-06, "loss": 0.0475, "step": 131685 }, { "epoch": 0.5494821874139413, "grad_norm": 0.3749243539194248, "learning_rate": 2.6982939697063794e-06, "loss": 0.0234, "step": 131690 }, { "epoch": 0.5495030501289315, "grad_norm": 1.1218554186059002, "learning_rate": 2.6982427464239668e-06, "loss": 0.0256, "step": 131695 }, { "epoch": 0.5495239128439219, "grad_norm": 0.6363934255761302, "learning_rate": 2.698191526058648e-06, "loss": 0.0251, "step": 131700 }, { "epoch": 0.5495447755589121, "grad_norm": 0.41122902321568555, "learning_rate": 2.6981403086101455e-06, "loss": 0.0251, "step": 131705 }, { "epoch": 0.5495656382739024, "grad_norm": 0.6180966528896917, "learning_rate": 2.698089094078183e-06, "loss": 0.0261, "step": 131710 }, { "epoch": 0.5495865009888927, "grad_norm": 0.561553941641637, "learning_rate": 2.6980378824624823e-06, "loss": 0.0258, "step": 131715 }, { "epoch": 0.549607363703883, "grad_norm": 0.5451134515567656, "learning_rate": 2.6979866737627685e-06, "loss": 0.02, "step": 131720 }, { "epoch": 0.5496282264188732, "grad_norm": 0.9683384524888896, "learning_rate": 2.697935467978764e-06, "loss": 0.0202, "step": 131725 }, { "epoch": 0.5496490891338636, "grad_norm": 0.44539472983063866, "learning_rate": 2.697884265110192e-06, "loss": 0.0178, "step": 131730 }, { "epoch": 0.5496699518488538, "grad_norm": 0.5611009337334534, "learning_rate": 2.6978330651567754e-06, "loss": 0.0204, "step": 131735 }, { "epoch": 0.549690814563844, "grad_norm": 0.6627452948281705, "learning_rate": 2.6977818681182387e-06, "loss": 0.0193, "step": 131740 }, { "epoch": 0.5497116772788344, "grad_norm": 0.7876409897132779, "learning_rate": 2.697730673994305e-06, "loss": 0.0243, "step": 131745 }, { "epoch": 0.5497325399938247, "grad_norm": 0.6480885645987665, "learning_rate": 2.6976794827846967e-06, "loss": 0.0212, "step": 131750 }, { "epoch": 0.5497534027088149, "grad_norm": 1.1104375059488707, "learning_rate": 2.6976282944891392e-06, "loss": 0.0251, "step": 131755 }, { "epoch": 0.5497742654238051, "grad_norm": 1.064002784463026, "learning_rate": 2.6975771091073543e-06, "loss": 0.0195, "step": 131760 }, { "epoch": 0.5497951281387955, "grad_norm": 0.7106923544801346, "learning_rate": 2.6975259266390667e-06, "loss": 0.0259, "step": 131765 }, { "epoch": 0.5498159908537857, "grad_norm": 0.6962949274504316, "learning_rate": 2.6974747470839994e-06, "loss": 0.0255, "step": 131770 }, { "epoch": 0.549836853568776, "grad_norm": 0.5462540811793204, "learning_rate": 2.6974235704418767e-06, "loss": 0.0266, "step": 131775 }, { "epoch": 0.5498577162837663, "grad_norm": 0.4162517239854322, "learning_rate": 2.6973723967124217e-06, "loss": 0.0204, "step": 131780 }, { "epoch": 0.5498785789987566, "grad_norm": 0.86748447392121, "learning_rate": 2.697321225895358e-06, "loss": 0.0167, "step": 131785 }, { "epoch": 0.5498994417137468, "grad_norm": 1.5163759570065942, "learning_rate": 2.69727005799041e-06, "loss": 0.0317, "step": 131790 }, { "epoch": 0.5499203044287372, "grad_norm": 0.6137008970571327, "learning_rate": 2.6972188929973005e-06, "loss": 0.0168, "step": 131795 }, { "epoch": 0.5499411671437274, "grad_norm": 0.37477951824998385, "learning_rate": 2.697167730915754e-06, "loss": 0.0273, "step": 131800 }, { "epoch": 0.5499620298587177, "grad_norm": 0.790885172562953, "learning_rate": 2.6971165717454945e-06, "loss": 0.0206, "step": 131805 }, { "epoch": 0.549982892573708, "grad_norm": 0.8101453250680393, "learning_rate": 2.6970654154862465e-06, "loss": 0.0197, "step": 131810 }, { "epoch": 0.5500037552886983, "grad_norm": 1.0484393434762214, "learning_rate": 2.6970142621377327e-06, "loss": 0.0252, "step": 131815 }, { "epoch": 0.5500246180036885, "grad_norm": 0.8736768167847608, "learning_rate": 2.696963111699677e-06, "loss": 0.0251, "step": 131820 }, { "epoch": 0.5500454807186788, "grad_norm": 0.4838102332545343, "learning_rate": 2.696911964171805e-06, "loss": 0.02, "step": 131825 }, { "epoch": 0.5500663434336691, "grad_norm": 0.52525043230347, "learning_rate": 2.696860819553839e-06, "loss": 0.0264, "step": 131830 }, { "epoch": 0.5500872061486594, "grad_norm": 0.5601512325431731, "learning_rate": 2.6968096778455045e-06, "loss": 0.0195, "step": 131835 }, { "epoch": 0.5501080688636496, "grad_norm": 0.45067626158718277, "learning_rate": 2.696758539046524e-06, "loss": 0.0177, "step": 131840 }, { "epoch": 0.55012893157864, "grad_norm": 1.2040434140231813, "learning_rate": 2.696707403156623e-06, "loss": 0.0252, "step": 131845 }, { "epoch": 0.5501497942936302, "grad_norm": 0.49989214532174386, "learning_rate": 2.696656270175526e-06, "loss": 0.0207, "step": 131850 }, { "epoch": 0.5501706570086204, "grad_norm": 0.5585286414033025, "learning_rate": 2.6966051401029557e-06, "loss": 0.0229, "step": 131855 }, { "epoch": 0.5501915197236108, "grad_norm": 0.854327218799377, "learning_rate": 2.696554012938638e-06, "loss": 0.0204, "step": 131860 }, { "epoch": 0.550212382438601, "grad_norm": 0.7594350542460534, "learning_rate": 2.696502888682296e-06, "loss": 0.0222, "step": 131865 }, { "epoch": 0.5502332451535913, "grad_norm": 1.1005336926230165, "learning_rate": 2.6964517673336543e-06, "loss": 0.0209, "step": 131870 }, { "epoch": 0.5502541078685815, "grad_norm": 0.4954153308948625, "learning_rate": 2.6964006488924377e-06, "loss": 0.0156, "step": 131875 }, { "epoch": 0.5502749705835719, "grad_norm": 0.8668462483442955, "learning_rate": 2.6963495333583707e-06, "loss": 0.0207, "step": 131880 }, { "epoch": 0.5502958332985621, "grad_norm": 0.9799010089385316, "learning_rate": 2.696298420731177e-06, "loss": 0.029, "step": 131885 }, { "epoch": 0.5503166960135524, "grad_norm": 0.744880703776521, "learning_rate": 2.696247311010582e-06, "loss": 0.0228, "step": 131890 }, { "epoch": 0.5503375587285427, "grad_norm": 0.9494850656213701, "learning_rate": 2.6961962041963095e-06, "loss": 0.0343, "step": 131895 }, { "epoch": 0.550358421443533, "grad_norm": 0.5143021418694439, "learning_rate": 2.696145100288084e-06, "loss": 0.0203, "step": 131900 }, { "epoch": 0.5503792841585232, "grad_norm": 0.5265672411953999, "learning_rate": 2.696093999285631e-06, "loss": 0.0233, "step": 131905 }, { "epoch": 0.5504001468735136, "grad_norm": 0.5325854773288223, "learning_rate": 2.6960429011886747e-06, "loss": 0.0183, "step": 131910 }, { "epoch": 0.5504210095885038, "grad_norm": 1.0594733223014754, "learning_rate": 2.695991805996939e-06, "loss": 0.0282, "step": 131915 }, { "epoch": 0.5504418723034941, "grad_norm": 0.7239537584123147, "learning_rate": 2.69594071371015e-06, "loss": 0.0277, "step": 131920 }, { "epoch": 0.5504627350184844, "grad_norm": 0.9575898614597594, "learning_rate": 2.6958896243280312e-06, "loss": 0.0249, "step": 131925 }, { "epoch": 0.5504835977334747, "grad_norm": 0.4022007686074467, "learning_rate": 2.6958385378503084e-06, "loss": 0.0258, "step": 131930 }, { "epoch": 0.5505044604484649, "grad_norm": 0.5609713577246683, "learning_rate": 2.695787454276706e-06, "loss": 0.0218, "step": 131935 }, { "epoch": 0.5505253231634551, "grad_norm": 0.7916667438155326, "learning_rate": 2.6957363736069482e-06, "loss": 0.0267, "step": 131940 }, { "epoch": 0.5505461858784455, "grad_norm": 0.7226243244989943, "learning_rate": 2.695685295840761e-06, "loss": 0.0292, "step": 131945 }, { "epoch": 0.5505670485934357, "grad_norm": 0.6724316917244351, "learning_rate": 2.6956342209778687e-06, "loss": 0.022, "step": 131950 }, { "epoch": 0.550587911308426, "grad_norm": 0.5020516450388133, "learning_rate": 2.6955831490179963e-06, "loss": 0.0225, "step": 131955 }, { "epoch": 0.5506087740234163, "grad_norm": 1.1224559572151749, "learning_rate": 2.6955320799608685e-06, "loss": 0.0293, "step": 131960 }, { "epoch": 0.5506296367384066, "grad_norm": 0.7354555619090295, "learning_rate": 2.6954810138062114e-06, "loss": 0.0426, "step": 131965 }, { "epoch": 0.5506504994533968, "grad_norm": 2.032913429798476, "learning_rate": 2.6954299505537496e-06, "loss": 0.024, "step": 131970 }, { "epoch": 0.5506713621683872, "grad_norm": 0.5509792186914526, "learning_rate": 2.6953788902032074e-06, "loss": 0.022, "step": 131975 }, { "epoch": 0.5506922248833774, "grad_norm": 0.4495224955301801, "learning_rate": 2.695327832754311e-06, "loss": 0.0244, "step": 131980 }, { "epoch": 0.5507130875983677, "grad_norm": 0.4705504233422838, "learning_rate": 2.6952767782067846e-06, "loss": 0.0189, "step": 131985 }, { "epoch": 0.550733950313358, "grad_norm": 0.4439324419441056, "learning_rate": 2.695225726560354e-06, "loss": 0.0187, "step": 131990 }, { "epoch": 0.5507548130283483, "grad_norm": 0.46639227462439403, "learning_rate": 2.6951746778147453e-06, "loss": 0.0249, "step": 131995 }, { "epoch": 0.5507756757433385, "grad_norm": 0.3962263634399468, "learning_rate": 2.6951236319696826e-06, "loss": 0.0194, "step": 132000 }, { "epoch": 0.5507965384583288, "grad_norm": 0.6363074819170996, "learning_rate": 2.695072589024891e-06, "loss": 0.0262, "step": 132005 }, { "epoch": 0.5508174011733191, "grad_norm": 1.3963534136794367, "learning_rate": 2.6950215489800974e-06, "loss": 0.0282, "step": 132010 }, { "epoch": 0.5508382638883094, "grad_norm": 1.9955508208982224, "learning_rate": 2.6949705118350257e-06, "loss": 0.0239, "step": 132015 }, { "epoch": 0.5508591266032996, "grad_norm": 0.6874663259056648, "learning_rate": 2.694919477589402e-06, "loss": 0.016, "step": 132020 }, { "epoch": 0.55087998931829, "grad_norm": 0.5538267352224294, "learning_rate": 2.6948684462429514e-06, "loss": 0.0305, "step": 132025 }, { "epoch": 0.5509008520332802, "grad_norm": 0.925959019241768, "learning_rate": 2.6948174177954e-06, "loss": 0.0193, "step": 132030 }, { "epoch": 0.5509217147482705, "grad_norm": 0.4722975733260342, "learning_rate": 2.6947663922464735e-06, "loss": 0.0278, "step": 132035 }, { "epoch": 0.5509425774632608, "grad_norm": 0.9587002655519434, "learning_rate": 2.694715369595896e-06, "loss": 0.0309, "step": 132040 }, { "epoch": 0.550963440178251, "grad_norm": 1.7144594653637943, "learning_rate": 2.694664349843395e-06, "loss": 0.0395, "step": 132045 }, { "epoch": 0.5509843028932413, "grad_norm": 0.760598351847983, "learning_rate": 2.6946133329886946e-06, "loss": 0.0362, "step": 132050 }, { "epoch": 0.5510051656082315, "grad_norm": 0.543423009879184, "learning_rate": 2.6945623190315217e-06, "loss": 0.0235, "step": 132055 }, { "epoch": 0.5510260283232219, "grad_norm": 0.4550497460930717, "learning_rate": 2.6945113079716017e-06, "loss": 0.0276, "step": 132060 }, { "epoch": 0.5510468910382121, "grad_norm": 0.4516904394293725, "learning_rate": 2.69446029980866e-06, "loss": 0.0181, "step": 132065 }, { "epoch": 0.5510677537532024, "grad_norm": 0.5985017500641282, "learning_rate": 2.694409294542422e-06, "loss": 0.0286, "step": 132070 }, { "epoch": 0.5510886164681927, "grad_norm": 1.183705862302997, "learning_rate": 2.694358292172615e-06, "loss": 0.0294, "step": 132075 }, { "epoch": 0.551109479183183, "grad_norm": 0.842760377876412, "learning_rate": 2.6943072926989643e-06, "loss": 0.0219, "step": 132080 }, { "epoch": 0.5511303418981732, "grad_norm": 0.570503214107843, "learning_rate": 2.694256296121195e-06, "loss": 0.019, "step": 132085 }, { "epoch": 0.5511512046131636, "grad_norm": 0.6138229982406259, "learning_rate": 2.6942053024390336e-06, "loss": 0.0225, "step": 132090 }, { "epoch": 0.5511720673281538, "grad_norm": 0.7564281845113612, "learning_rate": 2.6941543116522064e-06, "loss": 0.0312, "step": 132095 }, { "epoch": 0.5511929300431441, "grad_norm": 0.7888898468119401, "learning_rate": 2.6941033237604387e-06, "loss": 0.0232, "step": 132100 }, { "epoch": 0.5512137927581344, "grad_norm": 0.617335810876818, "learning_rate": 2.694052338763457e-06, "loss": 0.0228, "step": 132105 }, { "epoch": 0.5512346554731247, "grad_norm": 0.543373353842002, "learning_rate": 2.6940013566609875e-06, "loss": 0.0251, "step": 132110 }, { "epoch": 0.5512555181881149, "grad_norm": 0.5046448834184728, "learning_rate": 2.6939503774527563e-06, "loss": 0.0389, "step": 132115 }, { "epoch": 0.5512763809031052, "grad_norm": 0.8888793460877793, "learning_rate": 2.6938994011384895e-06, "loss": 0.0315, "step": 132120 }, { "epoch": 0.5512972436180955, "grad_norm": 3.403128718712763, "learning_rate": 2.693848427717913e-06, "loss": 0.0254, "step": 132125 }, { "epoch": 0.5513181063330858, "grad_norm": 0.9445771844504923, "learning_rate": 2.6937974571907534e-06, "loss": 0.027, "step": 132130 }, { "epoch": 0.551338969048076, "grad_norm": 0.6106298354915196, "learning_rate": 2.693746489556737e-06, "loss": 0.0198, "step": 132135 }, { "epoch": 0.5513598317630664, "grad_norm": 0.4635681144293047, "learning_rate": 2.69369552481559e-06, "loss": 0.0203, "step": 132140 }, { "epoch": 0.5513806944780566, "grad_norm": 0.24874391387041567, "learning_rate": 2.693644562967039e-06, "loss": 0.0239, "step": 132145 }, { "epoch": 0.5514015571930468, "grad_norm": 0.5989259174384225, "learning_rate": 2.6935936040108103e-06, "loss": 0.0263, "step": 132150 }, { "epoch": 0.5514224199080372, "grad_norm": 0.5260938255073253, "learning_rate": 2.693542647946629e-06, "loss": 0.018, "step": 132155 }, { "epoch": 0.5514432826230274, "grad_norm": 1.2164682035736176, "learning_rate": 2.693491694774224e-06, "loss": 0.0305, "step": 132160 }, { "epoch": 0.5514641453380177, "grad_norm": 0.6395470818458778, "learning_rate": 2.6934407444933195e-06, "loss": 0.0352, "step": 132165 }, { "epoch": 0.551485008053008, "grad_norm": 0.7365062307223497, "learning_rate": 2.6933897971036443e-06, "loss": 0.0266, "step": 132170 }, { "epoch": 0.5515058707679983, "grad_norm": 0.5377977028446548, "learning_rate": 2.693338852604923e-06, "loss": 0.0182, "step": 132175 }, { "epoch": 0.5515267334829885, "grad_norm": 0.5422714839091978, "learning_rate": 2.693287910996883e-06, "loss": 0.0252, "step": 132180 }, { "epoch": 0.5515475961979788, "grad_norm": 0.7463195764476058, "learning_rate": 2.6932369722792504e-06, "loss": 0.0189, "step": 132185 }, { "epoch": 0.5515684589129691, "grad_norm": 0.7887882068688803, "learning_rate": 2.6931860364517532e-06, "loss": 0.0251, "step": 132190 }, { "epoch": 0.5515893216279594, "grad_norm": 1.460628810674608, "learning_rate": 2.693135103514116e-06, "loss": 0.0438, "step": 132195 }, { "epoch": 0.5516101843429496, "grad_norm": 1.1696327890552785, "learning_rate": 2.6930841734660686e-06, "loss": 0.023, "step": 132200 }, { "epoch": 0.55163104705794, "grad_norm": 0.4171730884158627, "learning_rate": 2.6930332463073345e-06, "loss": 0.0222, "step": 132205 }, { "epoch": 0.5516519097729302, "grad_norm": 0.5392802613050155, "learning_rate": 2.692982322037643e-06, "loss": 0.0296, "step": 132210 }, { "epoch": 0.5516727724879205, "grad_norm": 0.5251079660868723, "learning_rate": 2.692931400656719e-06, "loss": 0.0227, "step": 132215 }, { "epoch": 0.5516936352029108, "grad_norm": 0.3628671893006698, "learning_rate": 2.692880482164291e-06, "loss": 0.0196, "step": 132220 }, { "epoch": 0.5517144979179011, "grad_norm": 1.2857115333733196, "learning_rate": 2.692829566560085e-06, "loss": 0.0272, "step": 132225 }, { "epoch": 0.5517353606328913, "grad_norm": 0.7529400185965481, "learning_rate": 2.6927786538438284e-06, "loss": 0.0269, "step": 132230 }, { "epoch": 0.5517562233478815, "grad_norm": 0.5815381972822818, "learning_rate": 2.6927277440152478e-06, "loss": 0.0185, "step": 132235 }, { "epoch": 0.5517770860628719, "grad_norm": 0.6752972025125582, "learning_rate": 2.6926768370740707e-06, "loss": 0.0245, "step": 132240 }, { "epoch": 0.5517979487778621, "grad_norm": 0.4774474663383722, "learning_rate": 2.6926259330200245e-06, "loss": 0.028, "step": 132245 }, { "epoch": 0.5518188114928524, "grad_norm": 0.624520713389807, "learning_rate": 2.6925750318528347e-06, "loss": 0.027, "step": 132250 }, { "epoch": 0.5518396742078427, "grad_norm": 0.8687729020639948, "learning_rate": 2.6925241335722303e-06, "loss": 0.0273, "step": 132255 }, { "epoch": 0.551860536922833, "grad_norm": 0.5539934857800216, "learning_rate": 2.6924732381779377e-06, "loss": 0.0214, "step": 132260 }, { "epoch": 0.5518813996378232, "grad_norm": 0.6832428942084082, "learning_rate": 2.6924223456696836e-06, "loss": 0.0216, "step": 132265 }, { "epoch": 0.5519022623528136, "grad_norm": 0.5336232920803649, "learning_rate": 2.6923714560471963e-06, "loss": 0.0229, "step": 132270 }, { "epoch": 0.5519231250678038, "grad_norm": 0.7838052644137445, "learning_rate": 2.692320569310202e-06, "loss": 0.0267, "step": 132275 }, { "epoch": 0.5519439877827941, "grad_norm": 0.4518542483644692, "learning_rate": 2.692269685458429e-06, "loss": 0.0194, "step": 132280 }, { "epoch": 0.5519648504977844, "grad_norm": 1.0870641668508993, "learning_rate": 2.6922188044916042e-06, "loss": 0.031, "step": 132285 }, { "epoch": 0.5519857132127747, "grad_norm": 0.5320674039730104, "learning_rate": 2.692167926409455e-06, "loss": 0.0254, "step": 132290 }, { "epoch": 0.5520065759277649, "grad_norm": 0.9466579878011707, "learning_rate": 2.692117051211709e-06, "loss": 0.0357, "step": 132295 }, { "epoch": 0.5520274386427552, "grad_norm": 0.8535131533096181, "learning_rate": 2.6920661788980933e-06, "loss": 0.0264, "step": 132300 }, { "epoch": 0.5520483013577455, "grad_norm": 0.9411449156830476, "learning_rate": 2.692015309468336e-06, "loss": 0.0255, "step": 132305 }, { "epoch": 0.5520691640727358, "grad_norm": 0.38581428321369715, "learning_rate": 2.691964442922164e-06, "loss": 0.0262, "step": 132310 }, { "epoch": 0.552090026787726, "grad_norm": 1.6052327766454755, "learning_rate": 2.6919135792593054e-06, "loss": 0.0266, "step": 132315 }, { "epoch": 0.5521108895027164, "grad_norm": 0.5825216019278191, "learning_rate": 2.6918627184794876e-06, "loss": 0.0257, "step": 132320 }, { "epoch": 0.5521317522177066, "grad_norm": 1.691007099357808, "learning_rate": 2.6918118605824377e-06, "loss": 0.0341, "step": 132325 }, { "epoch": 0.5521526149326969, "grad_norm": 0.906936273325795, "learning_rate": 2.691761005567885e-06, "loss": 0.0231, "step": 132330 }, { "epoch": 0.5521734776476872, "grad_norm": 0.7259958835774329, "learning_rate": 2.6917101534355556e-06, "loss": 0.0179, "step": 132335 }, { "epoch": 0.5521943403626774, "grad_norm": 0.6729803871773752, "learning_rate": 2.691659304185178e-06, "loss": 0.0236, "step": 132340 }, { "epoch": 0.5522152030776677, "grad_norm": 0.4572340552650665, "learning_rate": 2.691608457816479e-06, "loss": 0.0235, "step": 132345 }, { "epoch": 0.552236065792658, "grad_norm": 0.6871421673844353, "learning_rate": 2.691557614329188e-06, "loss": 0.0204, "step": 132350 }, { "epoch": 0.5522569285076483, "grad_norm": 1.2182363896969186, "learning_rate": 2.6915067737230314e-06, "loss": 0.0241, "step": 132355 }, { "epoch": 0.5522777912226385, "grad_norm": 1.0150932929099976, "learning_rate": 2.6914559359977386e-06, "loss": 0.0208, "step": 132360 }, { "epoch": 0.5522986539376288, "grad_norm": 0.915093848156944, "learning_rate": 2.6914051011530368e-06, "loss": 0.0293, "step": 132365 }, { "epoch": 0.5523195166526191, "grad_norm": 0.7445678405529252, "learning_rate": 2.6913542691886535e-06, "loss": 0.0209, "step": 132370 }, { "epoch": 0.5523403793676094, "grad_norm": 0.5018887723326189, "learning_rate": 2.691303440104317e-06, "loss": 0.0227, "step": 132375 }, { "epoch": 0.5523612420825996, "grad_norm": 0.5947973499582078, "learning_rate": 2.6912526138997564e-06, "loss": 0.024, "step": 132380 }, { "epoch": 0.55238210479759, "grad_norm": 0.6114428571343171, "learning_rate": 2.691201790574698e-06, "loss": 0.024, "step": 132385 }, { "epoch": 0.5524029675125802, "grad_norm": 0.5898543943617954, "learning_rate": 2.6911509701288705e-06, "loss": 0.0234, "step": 132390 }, { "epoch": 0.5524238302275705, "grad_norm": 0.8049277162979014, "learning_rate": 2.6911001525620035e-06, "loss": 0.0252, "step": 132395 }, { "epoch": 0.5524446929425608, "grad_norm": 0.4846772773842808, "learning_rate": 2.6910493378738228e-06, "loss": 0.021, "step": 132400 }, { "epoch": 0.5524655556575511, "grad_norm": 0.7938517325906554, "learning_rate": 2.6909985260640587e-06, "loss": 0.0234, "step": 132405 }, { "epoch": 0.5524864183725413, "grad_norm": 1.1321583929624928, "learning_rate": 2.6909477171324384e-06, "loss": 0.0223, "step": 132410 }, { "epoch": 0.5525072810875316, "grad_norm": 1.0156902453989511, "learning_rate": 2.6908969110786905e-06, "loss": 0.0288, "step": 132415 }, { "epoch": 0.5525281438025219, "grad_norm": 0.9805973699656046, "learning_rate": 2.6908461079025426e-06, "loss": 0.0314, "step": 132420 }, { "epoch": 0.5525490065175122, "grad_norm": 0.567450316778218, "learning_rate": 2.690795307603724e-06, "loss": 0.0348, "step": 132425 }, { "epoch": 0.5525698692325024, "grad_norm": 3.254774398957632, "learning_rate": 2.690744510181963e-06, "loss": 0.0269, "step": 132430 }, { "epoch": 0.5525907319474928, "grad_norm": 0.2862626571513689, "learning_rate": 2.690693715636988e-06, "loss": 0.0277, "step": 132435 }, { "epoch": 0.552611594662483, "grad_norm": 0.7155456216406535, "learning_rate": 2.690642923968527e-06, "loss": 0.0256, "step": 132440 }, { "epoch": 0.5526324573774732, "grad_norm": 0.33309528512416775, "learning_rate": 2.690592135176309e-06, "loss": 0.0182, "step": 132445 }, { "epoch": 0.5526533200924636, "grad_norm": 0.8766029034742853, "learning_rate": 2.690541349260063e-06, "loss": 0.0235, "step": 132450 }, { "epoch": 0.5526741828074538, "grad_norm": 0.702853110957112, "learning_rate": 2.6904905662195164e-06, "loss": 0.0196, "step": 132455 }, { "epoch": 0.5526950455224441, "grad_norm": 0.9998127677931067, "learning_rate": 2.6904397860543984e-06, "loss": 0.026, "step": 132460 }, { "epoch": 0.5527159082374344, "grad_norm": 0.6187118396738258, "learning_rate": 2.6903890087644375e-06, "loss": 0.0161, "step": 132465 }, { "epoch": 0.5527367709524247, "grad_norm": 0.6901047889047837, "learning_rate": 2.690338234349363e-06, "loss": 0.0213, "step": 132470 }, { "epoch": 0.5527576336674149, "grad_norm": 0.3931410144396694, "learning_rate": 2.6902874628089033e-06, "loss": 0.0229, "step": 132475 }, { "epoch": 0.5527784963824052, "grad_norm": 0.9100697978416842, "learning_rate": 2.6902366941427865e-06, "loss": 0.0225, "step": 132480 }, { "epoch": 0.5527993590973955, "grad_norm": 0.262512530669985, "learning_rate": 2.690185928350742e-06, "loss": 0.0284, "step": 132485 }, { "epoch": 0.5528202218123858, "grad_norm": 0.5711402436687275, "learning_rate": 2.690135165432499e-06, "loss": 0.0207, "step": 132490 }, { "epoch": 0.552841084527376, "grad_norm": 0.5810172736360544, "learning_rate": 2.6900844053877856e-06, "loss": 0.0315, "step": 132495 }, { "epoch": 0.5528619472423664, "grad_norm": 0.4927017170665504, "learning_rate": 2.6900336482163315e-06, "loss": 0.0209, "step": 132500 }, { "epoch": 0.5528828099573566, "grad_norm": 0.475631893025279, "learning_rate": 2.689982893917865e-06, "loss": 0.0207, "step": 132505 }, { "epoch": 0.5529036726723469, "grad_norm": 1.5579828165081917, "learning_rate": 2.6899321424921153e-06, "loss": 0.0438, "step": 132510 }, { "epoch": 0.5529245353873372, "grad_norm": 0.7038821765544184, "learning_rate": 2.689881393938811e-06, "loss": 0.0243, "step": 132515 }, { "epoch": 0.5529453981023275, "grad_norm": 0.5515550923601902, "learning_rate": 2.6898306482576825e-06, "loss": 0.0192, "step": 132520 }, { "epoch": 0.5529662608173177, "grad_norm": 0.9067367930450859, "learning_rate": 2.689779905448457e-06, "loss": 0.0258, "step": 132525 }, { "epoch": 0.5529871235323081, "grad_norm": 0.7112663934898569, "learning_rate": 2.6897291655108647e-06, "loss": 0.0243, "step": 132530 }, { "epoch": 0.5530079862472983, "grad_norm": 0.5881882750668425, "learning_rate": 2.6896784284446355e-06, "loss": 0.0144, "step": 132535 }, { "epoch": 0.5530288489622885, "grad_norm": 0.7674774216250481, "learning_rate": 2.6896276942494964e-06, "loss": 0.0224, "step": 132540 }, { "epoch": 0.5530497116772788, "grad_norm": 0.7207239428664306, "learning_rate": 2.689576962925179e-06, "loss": 0.0262, "step": 132545 }, { "epoch": 0.5530705743922691, "grad_norm": 0.6277515091826175, "learning_rate": 2.6895262344714106e-06, "loss": 0.028, "step": 132550 }, { "epoch": 0.5530914371072594, "grad_norm": 0.8245381001712857, "learning_rate": 2.689475508887922e-06, "loss": 0.019, "step": 132555 }, { "epoch": 0.5531122998222496, "grad_norm": 0.9348316246495318, "learning_rate": 2.689424786174442e-06, "loss": 0.0252, "step": 132560 }, { "epoch": 0.55313316253724, "grad_norm": 0.8633221817604706, "learning_rate": 2.6893740663307e-06, "loss": 0.0254, "step": 132565 }, { "epoch": 0.5531540252522302, "grad_norm": 0.8193445639368666, "learning_rate": 2.6893233493564252e-06, "loss": 0.0253, "step": 132570 }, { "epoch": 0.5531748879672205, "grad_norm": 0.9961332020360952, "learning_rate": 2.689272635251347e-06, "loss": 0.0249, "step": 132575 }, { "epoch": 0.5531957506822108, "grad_norm": 0.6157942845650645, "learning_rate": 2.6892219240151953e-06, "loss": 0.0219, "step": 132580 }, { "epoch": 0.5532166133972011, "grad_norm": 0.40114150500880447, "learning_rate": 2.689171215647699e-06, "loss": 0.023, "step": 132585 }, { "epoch": 0.5532374761121913, "grad_norm": 0.779662674299321, "learning_rate": 2.6891205101485885e-06, "loss": 0.0252, "step": 132590 }, { "epoch": 0.5532583388271816, "grad_norm": 0.7763707105814834, "learning_rate": 2.689069807517593e-06, "loss": 0.0341, "step": 132595 }, { "epoch": 0.5532792015421719, "grad_norm": 0.9040695883702755, "learning_rate": 2.6890191077544416e-06, "loss": 0.0327, "step": 132600 }, { "epoch": 0.5533000642571622, "grad_norm": 0.4665605016374191, "learning_rate": 2.6889684108588643e-06, "loss": 0.0186, "step": 132605 }, { "epoch": 0.5533209269721524, "grad_norm": 1.2765714169135973, "learning_rate": 2.6889177168305913e-06, "loss": 0.0214, "step": 132610 }, { "epoch": 0.5533417896871428, "grad_norm": 0.3345606024404359, "learning_rate": 2.6888670256693516e-06, "loss": 0.0212, "step": 132615 }, { "epoch": 0.553362652402133, "grad_norm": 0.9368739042351167, "learning_rate": 2.6888163373748755e-06, "loss": 0.0226, "step": 132620 }, { "epoch": 0.5533835151171232, "grad_norm": 0.6103563886630529, "learning_rate": 2.6887656519468926e-06, "loss": 0.0268, "step": 132625 }, { "epoch": 0.5534043778321136, "grad_norm": 1.030403626901465, "learning_rate": 2.688714969385132e-06, "loss": 0.0267, "step": 132630 }, { "epoch": 0.5534252405471038, "grad_norm": 0.601966797393054, "learning_rate": 2.688664289689325e-06, "loss": 0.0218, "step": 132635 }, { "epoch": 0.5534461032620941, "grad_norm": 0.8374811849822036, "learning_rate": 2.6886136128592012e-06, "loss": 0.0244, "step": 132640 }, { "epoch": 0.5534669659770844, "grad_norm": 0.8766601051328147, "learning_rate": 2.6885629388944897e-06, "loss": 0.0285, "step": 132645 }, { "epoch": 0.5534878286920747, "grad_norm": 0.5236055569491406, "learning_rate": 2.6885122677949204e-06, "loss": 0.0178, "step": 132650 }, { "epoch": 0.5535086914070649, "grad_norm": 0.9826524047988024, "learning_rate": 2.688461599560224e-06, "loss": 0.0371, "step": 132655 }, { "epoch": 0.5535295541220552, "grad_norm": 0.9787178592200587, "learning_rate": 2.688410934190131e-06, "loss": 0.0308, "step": 132660 }, { "epoch": 0.5535504168370455, "grad_norm": 0.6770653853009517, "learning_rate": 2.688360271684371e-06, "loss": 0.0249, "step": 132665 }, { "epoch": 0.5535712795520358, "grad_norm": 0.6159935853719447, "learning_rate": 2.6883096120426734e-06, "loss": 0.0313, "step": 132670 }, { "epoch": 0.553592142267026, "grad_norm": 0.7653846307683774, "learning_rate": 2.6882589552647686e-06, "loss": 0.0308, "step": 132675 }, { "epoch": 0.5536130049820164, "grad_norm": 1.5784627360067505, "learning_rate": 2.6882083013503878e-06, "loss": 0.0283, "step": 132680 }, { "epoch": 0.5536338676970066, "grad_norm": 0.4531425620061747, "learning_rate": 2.68815765029926e-06, "loss": 0.0238, "step": 132685 }, { "epoch": 0.5536547304119969, "grad_norm": 0.4632286844286275, "learning_rate": 2.688107002111117e-06, "loss": 0.0236, "step": 132690 }, { "epoch": 0.5536755931269872, "grad_norm": 0.6091317333578976, "learning_rate": 2.6880563567856877e-06, "loss": 0.0268, "step": 132695 }, { "epoch": 0.5536964558419775, "grad_norm": 0.5804416090192085, "learning_rate": 2.6880057143227023e-06, "loss": 0.0287, "step": 132700 }, { "epoch": 0.5537173185569677, "grad_norm": 0.5182331374449723, "learning_rate": 2.6879550747218925e-06, "loss": 0.0236, "step": 132705 }, { "epoch": 0.553738181271958, "grad_norm": 0.844431864187311, "learning_rate": 2.6879044379829877e-06, "loss": 0.021, "step": 132710 }, { "epoch": 0.5537590439869483, "grad_norm": 0.9856233606277299, "learning_rate": 2.6878538041057185e-06, "loss": 0.0231, "step": 132715 }, { "epoch": 0.5537799067019386, "grad_norm": 1.0221024187769492, "learning_rate": 2.687803173089815e-06, "loss": 0.0284, "step": 132720 }, { "epoch": 0.5538007694169288, "grad_norm": 0.49950620605903023, "learning_rate": 2.687752544935009e-06, "loss": 0.0264, "step": 132725 }, { "epoch": 0.5538216321319192, "grad_norm": 1.0255608645213796, "learning_rate": 2.68770191964103e-06, "loss": 0.0267, "step": 132730 }, { "epoch": 0.5538424948469094, "grad_norm": 0.8329508252620516, "learning_rate": 2.6876512972076086e-06, "loss": 0.0239, "step": 132735 }, { "epoch": 0.5538633575618996, "grad_norm": 0.5489134881892386, "learning_rate": 2.687600677634476e-06, "loss": 0.0189, "step": 132740 }, { "epoch": 0.55388422027689, "grad_norm": 0.4268629939873442, "learning_rate": 2.6875500609213622e-06, "loss": 0.024, "step": 132745 }, { "epoch": 0.5539050829918802, "grad_norm": 0.611221870821385, "learning_rate": 2.6874994470679984e-06, "loss": 0.0213, "step": 132750 }, { "epoch": 0.5539259457068705, "grad_norm": 0.35573640091043407, "learning_rate": 2.6874488360741145e-06, "loss": 0.0189, "step": 132755 }, { "epoch": 0.5539468084218608, "grad_norm": 1.1457441612649215, "learning_rate": 2.6873982279394425e-06, "loss": 0.0283, "step": 132760 }, { "epoch": 0.5539676711368511, "grad_norm": 0.5452440140793203, "learning_rate": 2.6873476226637124e-06, "loss": 0.0229, "step": 132765 }, { "epoch": 0.5539885338518413, "grad_norm": 0.43044777017328506, "learning_rate": 2.6872970202466546e-06, "loss": 0.019, "step": 132770 }, { "epoch": 0.5540093965668316, "grad_norm": 0.7933319879108196, "learning_rate": 2.6872464206880018e-06, "loss": 0.0272, "step": 132775 }, { "epoch": 0.5540302592818219, "grad_norm": 0.6028035206877457, "learning_rate": 2.687195823987483e-06, "loss": 0.0241, "step": 132780 }, { "epoch": 0.5540511219968122, "grad_norm": 0.9053488257798514, "learning_rate": 2.68714523014483e-06, "loss": 0.0159, "step": 132785 }, { "epoch": 0.5540719847118024, "grad_norm": 0.4624915414042719, "learning_rate": 2.687094639159773e-06, "loss": 0.0231, "step": 132790 }, { "epoch": 0.5540928474267928, "grad_norm": 0.968024303877108, "learning_rate": 2.6870440510320433e-06, "loss": 0.0272, "step": 132795 }, { "epoch": 0.554113710141783, "grad_norm": 0.7473417640185219, "learning_rate": 2.686993465761373e-06, "loss": 0.0196, "step": 132800 }, { "epoch": 0.5541345728567733, "grad_norm": 0.7484895010353292, "learning_rate": 2.6869428833474926e-06, "loss": 0.0252, "step": 132805 }, { "epoch": 0.5541554355717636, "grad_norm": 0.5178208935556088, "learning_rate": 2.686892303790133e-06, "loss": 0.0277, "step": 132810 }, { "epoch": 0.5541762982867539, "grad_norm": 0.7546588214853635, "learning_rate": 2.6868417270890245e-06, "loss": 0.0215, "step": 132815 }, { "epoch": 0.5541971610017441, "grad_norm": 0.49298256116983735, "learning_rate": 2.6867911532439e-06, "loss": 0.018, "step": 132820 }, { "epoch": 0.5542180237167345, "grad_norm": 1.0969730520137597, "learning_rate": 2.6867405822544894e-06, "loss": 0.0263, "step": 132825 }, { "epoch": 0.5542388864317247, "grad_norm": 1.0251289754336685, "learning_rate": 2.6866900141205248e-06, "loss": 0.0224, "step": 132830 }, { "epoch": 0.5542597491467149, "grad_norm": 0.536783608319916, "learning_rate": 2.6866394488417375e-06, "loss": 0.0221, "step": 132835 }, { "epoch": 0.5542806118617052, "grad_norm": 0.5685334272414035, "learning_rate": 2.686588886417858e-06, "loss": 0.0204, "step": 132840 }, { "epoch": 0.5543014745766955, "grad_norm": 0.6763780312770193, "learning_rate": 2.6865383268486177e-06, "loss": 0.0284, "step": 132845 }, { "epoch": 0.5543223372916858, "grad_norm": 2.523293309202449, "learning_rate": 2.686487770133749e-06, "loss": 0.0205, "step": 132850 }, { "epoch": 0.554343200006676, "grad_norm": 0.7341810897650393, "learning_rate": 2.6864372162729828e-06, "loss": 0.0238, "step": 132855 }, { "epoch": 0.5543640627216664, "grad_norm": 0.8135176268263244, "learning_rate": 2.68638666526605e-06, "loss": 0.0187, "step": 132860 }, { "epoch": 0.5543849254366566, "grad_norm": 0.675195013757545, "learning_rate": 2.6863361171126834e-06, "loss": 0.0206, "step": 132865 }, { "epoch": 0.5544057881516469, "grad_norm": 0.5201941854001317, "learning_rate": 2.6862855718126128e-06, "loss": 0.0308, "step": 132870 }, { "epoch": 0.5544266508666372, "grad_norm": 0.6871424496988454, "learning_rate": 2.6862350293655715e-06, "loss": 0.0269, "step": 132875 }, { "epoch": 0.5544475135816275, "grad_norm": 1.6966327760215063, "learning_rate": 2.68618448977129e-06, "loss": 0.0247, "step": 132880 }, { "epoch": 0.5544683762966177, "grad_norm": 0.8993565151343915, "learning_rate": 2.686133953029501e-06, "loss": 0.0254, "step": 132885 }, { "epoch": 0.554489239011608, "grad_norm": 0.7677618430481602, "learning_rate": 2.686083419139935e-06, "loss": 0.0178, "step": 132890 }, { "epoch": 0.5545101017265983, "grad_norm": 0.585688315181346, "learning_rate": 2.6860328881023246e-06, "loss": 0.0222, "step": 132895 }, { "epoch": 0.5545309644415886, "grad_norm": 1.5946360936133879, "learning_rate": 2.6859823599164e-06, "loss": 0.0248, "step": 132900 }, { "epoch": 0.5545518271565788, "grad_norm": 0.4707593130683703, "learning_rate": 2.6859318345818956e-06, "loss": 0.0292, "step": 132905 }, { "epoch": 0.5545726898715692, "grad_norm": 0.6795925458133975, "learning_rate": 2.6858813120985412e-06, "loss": 0.0203, "step": 132910 }, { "epoch": 0.5545935525865594, "grad_norm": 0.8884066312073182, "learning_rate": 2.6858307924660694e-06, "loss": 0.0275, "step": 132915 }, { "epoch": 0.5546144153015496, "grad_norm": 0.6043320919988419, "learning_rate": 2.6857802756842115e-06, "loss": 0.0214, "step": 132920 }, { "epoch": 0.55463527801654, "grad_norm": 0.844565617234202, "learning_rate": 2.6857297617527e-06, "loss": 0.0203, "step": 132925 }, { "epoch": 0.5546561407315302, "grad_norm": 0.6714379154848891, "learning_rate": 2.685679250671267e-06, "loss": 0.0254, "step": 132930 }, { "epoch": 0.5546770034465205, "grad_norm": 0.5170823831686217, "learning_rate": 2.6856287424396444e-06, "loss": 0.0301, "step": 132935 }, { "epoch": 0.5546978661615108, "grad_norm": 0.3411553028782234, "learning_rate": 2.6855782370575634e-06, "loss": 0.0199, "step": 132940 }, { "epoch": 0.5547187288765011, "grad_norm": 1.6404088976429372, "learning_rate": 2.6855277345247572e-06, "loss": 0.0353, "step": 132945 }, { "epoch": 0.5547395915914913, "grad_norm": 0.4789736291649886, "learning_rate": 2.6854772348409573e-06, "loss": 0.0198, "step": 132950 }, { "epoch": 0.5547604543064816, "grad_norm": 1.0188873134106795, "learning_rate": 2.685426738005896e-06, "loss": 0.0292, "step": 132955 }, { "epoch": 0.5547813170214719, "grad_norm": 0.7989728401028902, "learning_rate": 2.6853762440193055e-06, "loss": 0.0238, "step": 132960 }, { "epoch": 0.5548021797364622, "grad_norm": 0.4368628523636243, "learning_rate": 2.685325752880918e-06, "loss": 0.0162, "step": 132965 }, { "epoch": 0.5548230424514524, "grad_norm": 1.284045384717267, "learning_rate": 2.685275264590466e-06, "loss": 0.0176, "step": 132970 }, { "epoch": 0.5548439051664428, "grad_norm": 1.8547936993538419, "learning_rate": 2.6852247791476805e-06, "loss": 0.0279, "step": 132975 }, { "epoch": 0.554864767881433, "grad_norm": 0.8488991533503722, "learning_rate": 2.685174296552295e-06, "loss": 0.0256, "step": 132980 }, { "epoch": 0.5548856305964233, "grad_norm": 0.8059358778504602, "learning_rate": 2.685123816804042e-06, "loss": 0.0236, "step": 132985 }, { "epoch": 0.5549064933114136, "grad_norm": 0.47340672365895065, "learning_rate": 2.685073339902654e-06, "loss": 0.0224, "step": 132990 }, { "epoch": 0.5549273560264039, "grad_norm": 0.6443518447274901, "learning_rate": 2.6850228658478634e-06, "loss": 0.0296, "step": 132995 }, { "epoch": 0.5549482187413941, "grad_norm": 1.1595589532267792, "learning_rate": 2.684972394639401e-06, "loss": 0.0219, "step": 133000 }, { "epoch": 0.5549690814563845, "grad_norm": 0.613610033592264, "learning_rate": 2.684921926277001e-06, "loss": 0.0241, "step": 133005 }, { "epoch": 0.5549899441713747, "grad_norm": 0.821670219674514, "learning_rate": 2.6848714607603956e-06, "loss": 0.0224, "step": 133010 }, { "epoch": 0.555010806886365, "grad_norm": 0.6125831828792961, "learning_rate": 2.6848209980893173e-06, "loss": 0.0239, "step": 133015 }, { "epoch": 0.5550316696013552, "grad_norm": 0.39876957274549263, "learning_rate": 2.6847705382634986e-06, "loss": 0.023, "step": 133020 }, { "epoch": 0.5550525323163455, "grad_norm": 4.350192136019483, "learning_rate": 2.6847200812826716e-06, "loss": 0.0219, "step": 133025 }, { "epoch": 0.5550733950313358, "grad_norm": 1.2696324234519982, "learning_rate": 2.68466962714657e-06, "loss": 0.0281, "step": 133030 }, { "epoch": 0.555094257746326, "grad_norm": 0.44064462217337536, "learning_rate": 2.6846191758549263e-06, "loss": 0.0218, "step": 133035 }, { "epoch": 0.5551151204613164, "grad_norm": 0.5863569414776671, "learning_rate": 2.6845687274074722e-06, "loss": 0.0249, "step": 133040 }, { "epoch": 0.5551359831763066, "grad_norm": 0.7219651023691214, "learning_rate": 2.684518281803942e-06, "loss": 0.0305, "step": 133045 }, { "epoch": 0.5551568458912969, "grad_norm": 0.24717500093129505, "learning_rate": 2.6844678390440675e-06, "loss": 0.0198, "step": 133050 }, { "epoch": 0.5551777086062872, "grad_norm": 0.5502157638181429, "learning_rate": 2.684417399127581e-06, "loss": 0.0282, "step": 133055 }, { "epoch": 0.5551985713212775, "grad_norm": 0.4681223614532887, "learning_rate": 2.684366962054217e-06, "loss": 0.0178, "step": 133060 }, { "epoch": 0.5552194340362677, "grad_norm": 0.8570380499714018, "learning_rate": 2.6843165278237084e-06, "loss": 0.0193, "step": 133065 }, { "epoch": 0.555240296751258, "grad_norm": 0.8393416780960276, "learning_rate": 2.684266096435786e-06, "loss": 0.0256, "step": 133070 }, { "epoch": 0.5552611594662483, "grad_norm": 0.52022391044702, "learning_rate": 2.6842156678901844e-06, "loss": 0.0379, "step": 133075 }, { "epoch": 0.5552820221812386, "grad_norm": 0.8416128153026833, "learning_rate": 2.6841652421866365e-06, "loss": 0.0252, "step": 133080 }, { "epoch": 0.5553028848962288, "grad_norm": 0.3926095708131634, "learning_rate": 2.6841148193248756e-06, "loss": 0.0179, "step": 133085 }, { "epoch": 0.5553237476112192, "grad_norm": 0.40496262152811496, "learning_rate": 2.684064399304634e-06, "loss": 0.0284, "step": 133090 }, { "epoch": 0.5553446103262094, "grad_norm": 0.7444020504318322, "learning_rate": 2.6840139821256452e-06, "loss": 0.0237, "step": 133095 }, { "epoch": 0.5553654730411997, "grad_norm": 0.7747526756826477, "learning_rate": 2.6839635677876425e-06, "loss": 0.0242, "step": 133100 }, { "epoch": 0.55538633575619, "grad_norm": 0.8784148013800179, "learning_rate": 2.6839131562903588e-06, "loss": 0.0222, "step": 133105 }, { "epoch": 0.5554071984711803, "grad_norm": 0.6664923976586928, "learning_rate": 2.6838627476335276e-06, "loss": 0.0207, "step": 133110 }, { "epoch": 0.5554280611861705, "grad_norm": 0.6319095550888559, "learning_rate": 2.683812341816882e-06, "loss": 0.0212, "step": 133115 }, { "epoch": 0.5554489239011609, "grad_norm": 0.7010595406610279, "learning_rate": 2.6837619388401553e-06, "loss": 0.0223, "step": 133120 }, { "epoch": 0.5554697866161511, "grad_norm": 0.5278245370931844, "learning_rate": 2.6837115387030813e-06, "loss": 0.0189, "step": 133125 }, { "epoch": 0.5554906493311413, "grad_norm": 0.5567258287266161, "learning_rate": 2.683661141405393e-06, "loss": 0.0228, "step": 133130 }, { "epoch": 0.5555115120461316, "grad_norm": 0.5268945605579745, "learning_rate": 2.683610746946824e-06, "loss": 0.0231, "step": 133135 }, { "epoch": 0.5555323747611219, "grad_norm": 0.8119260937716142, "learning_rate": 2.6835603553271066e-06, "loss": 0.0268, "step": 133140 }, { "epoch": 0.5555532374761122, "grad_norm": 0.5947264017686869, "learning_rate": 2.6835099665459756e-06, "loss": 0.0209, "step": 133145 }, { "epoch": 0.5555741001911024, "grad_norm": 0.9545732261794031, "learning_rate": 2.6834595806031643e-06, "loss": 0.0281, "step": 133150 }, { "epoch": 0.5555949629060928, "grad_norm": 0.595149243234256, "learning_rate": 2.6834091974984063e-06, "loss": 0.0194, "step": 133155 }, { "epoch": 0.555615825621083, "grad_norm": 0.6764559012821053, "learning_rate": 2.6833588172314344e-06, "loss": 0.0169, "step": 133160 }, { "epoch": 0.5556366883360733, "grad_norm": 0.6864566210582961, "learning_rate": 2.683308439801983e-06, "loss": 0.0236, "step": 133165 }, { "epoch": 0.5556575510510636, "grad_norm": 0.44321148990622833, "learning_rate": 2.6832580652097855e-06, "loss": 0.0282, "step": 133170 }, { "epoch": 0.5556784137660539, "grad_norm": 0.8481037975163602, "learning_rate": 2.6832076934545757e-06, "loss": 0.0308, "step": 133175 }, { "epoch": 0.5556992764810441, "grad_norm": 1.0602903302752538, "learning_rate": 2.6831573245360878e-06, "loss": 0.0236, "step": 133180 }, { "epoch": 0.5557201391960345, "grad_norm": 0.551890897277675, "learning_rate": 2.6831069584540544e-06, "loss": 0.0184, "step": 133185 }, { "epoch": 0.5557410019110247, "grad_norm": 0.3747431583344314, "learning_rate": 2.68305659520821e-06, "loss": 0.0198, "step": 133190 }, { "epoch": 0.555761864626015, "grad_norm": 0.8396155327208109, "learning_rate": 2.6830062347982883e-06, "loss": 0.0297, "step": 133195 }, { "epoch": 0.5557827273410052, "grad_norm": 0.736571835598064, "learning_rate": 2.6829558772240228e-06, "loss": 0.02, "step": 133200 }, { "epoch": 0.5558035900559956, "grad_norm": 0.7398351158518072, "learning_rate": 2.682905522485148e-06, "loss": 0.0186, "step": 133205 }, { "epoch": 0.5558244527709858, "grad_norm": 0.8033450678911899, "learning_rate": 2.6828551705813975e-06, "loss": 0.0224, "step": 133210 }, { "epoch": 0.555845315485976, "grad_norm": 0.8530764521845488, "learning_rate": 2.6828048215125057e-06, "loss": 0.028, "step": 133215 }, { "epoch": 0.5558661782009664, "grad_norm": 0.6334765439785165, "learning_rate": 2.682754475278206e-06, "loss": 0.0252, "step": 133220 }, { "epoch": 0.5558870409159566, "grad_norm": 0.7488549674671945, "learning_rate": 2.6827041318782326e-06, "loss": 0.0197, "step": 133225 }, { "epoch": 0.5559079036309469, "grad_norm": 2.520134485931986, "learning_rate": 2.6826537913123196e-06, "loss": 0.0332, "step": 133230 }, { "epoch": 0.5559287663459372, "grad_norm": 0.8498468860173081, "learning_rate": 2.6826034535802016e-06, "loss": 0.0258, "step": 133235 }, { "epoch": 0.5559496290609275, "grad_norm": 1.1621118463203857, "learning_rate": 2.6825531186816124e-06, "loss": 0.031, "step": 133240 }, { "epoch": 0.5559704917759177, "grad_norm": 0.7344185397943941, "learning_rate": 2.6825027866162862e-06, "loss": 0.0315, "step": 133245 }, { "epoch": 0.555991354490908, "grad_norm": 0.5633722859990797, "learning_rate": 2.682452457383956e-06, "loss": 0.0182, "step": 133250 }, { "epoch": 0.5560122172058983, "grad_norm": 1.1686953660162929, "learning_rate": 2.6824021309843584e-06, "loss": 0.0295, "step": 133255 }, { "epoch": 0.5560330799208886, "grad_norm": 0.589762694942057, "learning_rate": 2.6823518074172257e-06, "loss": 0.0214, "step": 133260 }, { "epoch": 0.5560539426358788, "grad_norm": 0.7889358409690005, "learning_rate": 2.6823014866822933e-06, "loss": 0.0268, "step": 133265 }, { "epoch": 0.5560748053508692, "grad_norm": 1.6658528739616283, "learning_rate": 2.6822511687792953e-06, "loss": 0.0241, "step": 133270 }, { "epoch": 0.5560956680658594, "grad_norm": 0.6148345017344968, "learning_rate": 2.6822008537079653e-06, "loss": 0.0191, "step": 133275 }, { "epoch": 0.5561165307808497, "grad_norm": 0.6751788537270328, "learning_rate": 2.6821505414680386e-06, "loss": 0.0224, "step": 133280 }, { "epoch": 0.55613739349584, "grad_norm": 0.33747750429407664, "learning_rate": 2.6821002320592496e-06, "loss": 0.0261, "step": 133285 }, { "epoch": 0.5561582562108303, "grad_norm": 0.7410073266011014, "learning_rate": 2.6820499254813333e-06, "loss": 0.0273, "step": 133290 }, { "epoch": 0.5561791189258205, "grad_norm": 0.4527319784644908, "learning_rate": 2.6819996217340228e-06, "loss": 0.0174, "step": 133295 }, { "epoch": 0.5561999816408109, "grad_norm": 0.49307362517403885, "learning_rate": 2.6819493208170538e-06, "loss": 0.029, "step": 133300 }, { "epoch": 0.5562208443558011, "grad_norm": 0.5516649621276353, "learning_rate": 2.68189902273016e-06, "loss": 0.0341, "step": 133305 }, { "epoch": 0.5562417070707913, "grad_norm": 0.3729688555171163, "learning_rate": 2.681848727473077e-06, "loss": 0.0176, "step": 133310 }, { "epoch": 0.5562625697857816, "grad_norm": 1.17919407386754, "learning_rate": 2.681798435045539e-06, "loss": 0.0187, "step": 133315 }, { "epoch": 0.556283432500772, "grad_norm": 0.9149211923806613, "learning_rate": 2.6817481454472805e-06, "loss": 0.0186, "step": 133320 }, { "epoch": 0.5563042952157622, "grad_norm": 0.5317152731854092, "learning_rate": 2.681697858678037e-06, "loss": 0.0238, "step": 133325 }, { "epoch": 0.5563251579307524, "grad_norm": 0.6548866077652993, "learning_rate": 2.6816475747375424e-06, "loss": 0.0245, "step": 133330 }, { "epoch": 0.5563460206457428, "grad_norm": 0.783617049082044, "learning_rate": 2.6815972936255314e-06, "loss": 0.0233, "step": 133335 }, { "epoch": 0.556366883360733, "grad_norm": 1.0847020327419665, "learning_rate": 2.68154701534174e-06, "loss": 0.0213, "step": 133340 }, { "epoch": 0.5563877460757233, "grad_norm": 0.4605604065605803, "learning_rate": 2.6814967398859014e-06, "loss": 0.0217, "step": 133345 }, { "epoch": 0.5564086087907136, "grad_norm": 1.1896574485786042, "learning_rate": 2.681446467257752e-06, "loss": 0.018, "step": 133350 }, { "epoch": 0.5564294715057039, "grad_norm": 0.37855579984172694, "learning_rate": 2.6813961974570267e-06, "loss": 0.0294, "step": 133355 }, { "epoch": 0.5564503342206941, "grad_norm": 0.5493301173486203, "learning_rate": 2.6813459304834593e-06, "loss": 0.0193, "step": 133360 }, { "epoch": 0.5564711969356845, "grad_norm": 1.0128052852654899, "learning_rate": 2.6812956663367856e-06, "loss": 0.0223, "step": 133365 }, { "epoch": 0.5564920596506747, "grad_norm": 0.6732960703252228, "learning_rate": 2.681245405016741e-06, "loss": 0.0228, "step": 133370 }, { "epoch": 0.556512922365665, "grad_norm": 0.2738495964146186, "learning_rate": 2.6811951465230596e-06, "loss": 0.0184, "step": 133375 }, { "epoch": 0.5565337850806552, "grad_norm": 0.7008332669651762, "learning_rate": 2.681144890855477e-06, "loss": 0.0199, "step": 133380 }, { "epoch": 0.5565546477956456, "grad_norm": 0.4048483393437594, "learning_rate": 2.6810946380137286e-06, "loss": 0.0321, "step": 133385 }, { "epoch": 0.5565755105106358, "grad_norm": 0.9651838008588923, "learning_rate": 2.6810443879975494e-06, "loss": 0.021, "step": 133390 }, { "epoch": 0.556596373225626, "grad_norm": 0.49091666315773813, "learning_rate": 2.680994140806675e-06, "loss": 0.0182, "step": 133395 }, { "epoch": 0.5566172359406164, "grad_norm": 0.37815607382706495, "learning_rate": 2.680943896440839e-06, "loss": 0.0244, "step": 133400 }, { "epoch": 0.5566380986556067, "grad_norm": 0.41886895553471126, "learning_rate": 2.6808936548997786e-06, "loss": 0.0253, "step": 133405 }, { "epoch": 0.5566589613705969, "grad_norm": 0.5998600520207923, "learning_rate": 2.6808434161832287e-06, "loss": 0.023, "step": 133410 }, { "epoch": 0.5566798240855872, "grad_norm": 0.4064121948050303, "learning_rate": 2.680793180290924e-06, "loss": 0.0195, "step": 133415 }, { "epoch": 0.5567006868005775, "grad_norm": 0.629274760169232, "learning_rate": 2.680742947222601e-06, "loss": 0.0246, "step": 133420 }, { "epoch": 0.5567215495155677, "grad_norm": 1.710974074641397, "learning_rate": 2.6806927169779934e-06, "loss": 0.0353, "step": 133425 }, { "epoch": 0.556742412230558, "grad_norm": 0.7992944195892772, "learning_rate": 2.6806424895568383e-06, "loss": 0.0356, "step": 133430 }, { "epoch": 0.5567632749455483, "grad_norm": 0.5512540028518368, "learning_rate": 2.6805922649588706e-06, "loss": 0.0209, "step": 133435 }, { "epoch": 0.5567841376605386, "grad_norm": 0.8026774333804515, "learning_rate": 2.6805420431838258e-06, "loss": 0.0197, "step": 133440 }, { "epoch": 0.5568050003755288, "grad_norm": 0.4662101357389591, "learning_rate": 2.6804918242314395e-06, "loss": 0.0245, "step": 133445 }, { "epoch": 0.5568258630905192, "grad_norm": 0.6961662464310477, "learning_rate": 2.6804416081014474e-06, "loss": 0.0231, "step": 133450 }, { "epoch": 0.5568467258055094, "grad_norm": 0.8249769895689754, "learning_rate": 2.680391394793585e-06, "loss": 0.0237, "step": 133455 }, { "epoch": 0.5568675885204997, "grad_norm": 0.9854873727214901, "learning_rate": 2.6803411843075885e-06, "loss": 0.0207, "step": 133460 }, { "epoch": 0.55688845123549, "grad_norm": 0.3985002745034882, "learning_rate": 2.680290976643192e-06, "loss": 0.0235, "step": 133465 }, { "epoch": 0.5569093139504803, "grad_norm": 0.22741528078182102, "learning_rate": 2.6802407718001327e-06, "loss": 0.0182, "step": 133470 }, { "epoch": 0.5569301766654705, "grad_norm": 0.5250043934443918, "learning_rate": 2.6801905697781465e-06, "loss": 0.0259, "step": 133475 }, { "epoch": 0.5569510393804609, "grad_norm": 0.7572686373173101, "learning_rate": 2.6801403705769684e-06, "loss": 0.024, "step": 133480 }, { "epoch": 0.5569719020954511, "grad_norm": 1.6795474661498797, "learning_rate": 2.6800901741963347e-06, "loss": 0.0243, "step": 133485 }, { "epoch": 0.5569927648104414, "grad_norm": 0.7625473383438296, "learning_rate": 2.680039980635981e-06, "loss": 0.0272, "step": 133490 }, { "epoch": 0.5570136275254316, "grad_norm": 1.2884140017718466, "learning_rate": 2.679989789895644e-06, "loss": 0.0321, "step": 133495 }, { "epoch": 0.557034490240422, "grad_norm": 1.2622893893805887, "learning_rate": 2.6799396019750585e-06, "loss": 0.0264, "step": 133500 }, { "epoch": 0.5570553529554122, "grad_norm": 0.4545176956285347, "learning_rate": 2.6798894168739615e-06, "loss": 0.0257, "step": 133505 }, { "epoch": 0.5570762156704024, "grad_norm": 0.559535137843223, "learning_rate": 2.6798392345920883e-06, "loss": 0.0175, "step": 133510 }, { "epoch": 0.5570970783853928, "grad_norm": 0.7935422581006614, "learning_rate": 2.6797890551291746e-06, "loss": 0.0169, "step": 133515 }, { "epoch": 0.557117941100383, "grad_norm": 1.105165051786566, "learning_rate": 2.6797388784849574e-06, "loss": 0.0276, "step": 133520 }, { "epoch": 0.5571388038153733, "grad_norm": 0.704747388659686, "learning_rate": 2.679688704659173e-06, "loss": 0.0205, "step": 133525 }, { "epoch": 0.5571596665303636, "grad_norm": 0.8961069045319989, "learning_rate": 2.6796385336515564e-06, "loss": 0.0211, "step": 133530 }, { "epoch": 0.5571805292453539, "grad_norm": 0.8149877597799805, "learning_rate": 2.679588365461845e-06, "loss": 0.0238, "step": 133535 }, { "epoch": 0.5572013919603441, "grad_norm": 0.5945208542052283, "learning_rate": 2.679538200089774e-06, "loss": 0.0234, "step": 133540 }, { "epoch": 0.5572222546753345, "grad_norm": 0.7606120164805001, "learning_rate": 2.6794880375350805e-06, "loss": 0.0192, "step": 133545 }, { "epoch": 0.5572431173903247, "grad_norm": 0.9561942561459362, "learning_rate": 2.6794378777975e-06, "loss": 0.0304, "step": 133550 }, { "epoch": 0.557263980105315, "grad_norm": 0.8735095207177238, "learning_rate": 2.6793877208767698e-06, "loss": 0.0252, "step": 133555 }, { "epoch": 0.5572848428203052, "grad_norm": 0.5287984711521677, "learning_rate": 2.6793375667726258e-06, "loss": 0.0267, "step": 133560 }, { "epoch": 0.5573057055352956, "grad_norm": 0.6642729100244078, "learning_rate": 2.679287415484804e-06, "loss": 0.0173, "step": 133565 }, { "epoch": 0.5573265682502858, "grad_norm": 0.9221661703264408, "learning_rate": 2.6792372670130412e-06, "loss": 0.0214, "step": 133570 }, { "epoch": 0.5573474309652761, "grad_norm": 0.8222937387519353, "learning_rate": 2.6791871213570734e-06, "loss": 0.0282, "step": 133575 }, { "epoch": 0.5573682936802664, "grad_norm": 0.8474511056449923, "learning_rate": 2.6791369785166384e-06, "loss": 0.0214, "step": 133580 }, { "epoch": 0.5573891563952567, "grad_norm": 0.4212247817688196, "learning_rate": 2.6790868384914708e-06, "loss": 0.0201, "step": 133585 }, { "epoch": 0.5574100191102469, "grad_norm": 0.7295115224465872, "learning_rate": 2.679036701281309e-06, "loss": 0.0245, "step": 133590 }, { "epoch": 0.5574308818252373, "grad_norm": 0.853726920370597, "learning_rate": 2.6789865668858888e-06, "loss": 0.0253, "step": 133595 }, { "epoch": 0.5574517445402275, "grad_norm": 0.8327757589958604, "learning_rate": 2.6789364353049467e-06, "loss": 0.0245, "step": 133600 }, { "epoch": 0.5574726072552177, "grad_norm": 0.5106721627034397, "learning_rate": 2.67888630653822e-06, "loss": 0.0249, "step": 133605 }, { "epoch": 0.557493469970208, "grad_norm": 0.7426721838844339, "learning_rate": 2.6788361805854442e-06, "loss": 0.0234, "step": 133610 }, { "epoch": 0.5575143326851983, "grad_norm": 0.9040972012250806, "learning_rate": 2.678786057446357e-06, "loss": 0.0267, "step": 133615 }, { "epoch": 0.5575351954001886, "grad_norm": 0.45864948868862204, "learning_rate": 2.6787359371206955e-06, "loss": 0.0167, "step": 133620 }, { "epoch": 0.5575560581151788, "grad_norm": 0.5098823357495575, "learning_rate": 2.6786858196081957e-06, "loss": 0.0194, "step": 133625 }, { "epoch": 0.5575769208301692, "grad_norm": 0.8391031691330195, "learning_rate": 2.678635704908595e-06, "loss": 0.0143, "step": 133630 }, { "epoch": 0.5575977835451594, "grad_norm": 0.7622374515934597, "learning_rate": 2.678585593021629e-06, "loss": 0.0248, "step": 133635 }, { "epoch": 0.5576186462601497, "grad_norm": 0.6881016939976766, "learning_rate": 2.678535483947037e-06, "loss": 0.0203, "step": 133640 }, { "epoch": 0.55763950897514, "grad_norm": 0.562162018315669, "learning_rate": 2.678485377684554e-06, "loss": 0.0208, "step": 133645 }, { "epoch": 0.5576603716901303, "grad_norm": 0.607405432669395, "learning_rate": 2.678435274233917e-06, "loss": 0.0203, "step": 133650 }, { "epoch": 0.5576812344051205, "grad_norm": 0.4970207834206602, "learning_rate": 2.678385173594864e-06, "loss": 0.0218, "step": 133655 }, { "epoch": 0.5577020971201109, "grad_norm": 0.901085842801952, "learning_rate": 2.6783350757671325e-06, "loss": 0.0235, "step": 133660 }, { "epoch": 0.5577229598351011, "grad_norm": 1.1185639929799975, "learning_rate": 2.678284980750457e-06, "loss": 0.0263, "step": 133665 }, { "epoch": 0.5577438225500914, "grad_norm": 0.6486375841213807, "learning_rate": 2.678234888544578e-06, "loss": 0.0317, "step": 133670 }, { "epoch": 0.5577646852650816, "grad_norm": 0.41679252566002006, "learning_rate": 2.6781847991492303e-06, "loss": 0.0201, "step": 133675 }, { "epoch": 0.557785547980072, "grad_norm": 0.5415473112482161, "learning_rate": 2.6781347125641523e-06, "loss": 0.0259, "step": 133680 }, { "epoch": 0.5578064106950622, "grad_norm": 0.8235558064341847, "learning_rate": 2.6780846287890798e-06, "loss": 0.0163, "step": 133685 }, { "epoch": 0.5578272734100524, "grad_norm": 0.5177783452857272, "learning_rate": 2.6780345478237523e-06, "loss": 0.019, "step": 133690 }, { "epoch": 0.5578481361250428, "grad_norm": 0.36197159286308467, "learning_rate": 2.677984469667905e-06, "loss": 0.017, "step": 133695 }, { "epoch": 0.557868998840033, "grad_norm": 0.6852455228754097, "learning_rate": 2.677934394321276e-06, "loss": 0.0189, "step": 133700 }, { "epoch": 0.5578898615550233, "grad_norm": 0.9548339620791516, "learning_rate": 2.6778843217836025e-06, "loss": 0.0279, "step": 133705 }, { "epoch": 0.5579107242700136, "grad_norm": 0.669117092847111, "learning_rate": 2.6778342520546224e-06, "loss": 0.0258, "step": 133710 }, { "epoch": 0.5579315869850039, "grad_norm": 0.29227672473050786, "learning_rate": 2.6777841851340723e-06, "loss": 0.0207, "step": 133715 }, { "epoch": 0.5579524496999941, "grad_norm": 0.4668622752476338, "learning_rate": 2.677734121021691e-06, "loss": 0.0217, "step": 133720 }, { "epoch": 0.5579733124149845, "grad_norm": 0.2723318616142447, "learning_rate": 2.6776840597172155e-06, "loss": 0.0245, "step": 133725 }, { "epoch": 0.5579941751299747, "grad_norm": 0.7097380770091877, "learning_rate": 2.6776340012203823e-06, "loss": 0.0207, "step": 133730 }, { "epoch": 0.558015037844965, "grad_norm": 0.5483705956631358, "learning_rate": 2.6775839455309292e-06, "loss": 0.0202, "step": 133735 }, { "epoch": 0.5580359005599552, "grad_norm": 1.3952505940517361, "learning_rate": 2.677533892648595e-06, "loss": 0.0298, "step": 133740 }, { "epoch": 0.5580567632749456, "grad_norm": 0.7811224877344509, "learning_rate": 2.6774838425731163e-06, "loss": 0.0276, "step": 133745 }, { "epoch": 0.5580776259899358, "grad_norm": 0.48725306748062786, "learning_rate": 2.6774337953042316e-06, "loss": 0.0262, "step": 133750 }, { "epoch": 0.5580984887049261, "grad_norm": 1.0791553634729063, "learning_rate": 2.6773837508416777e-06, "loss": 0.0276, "step": 133755 }, { "epoch": 0.5581193514199164, "grad_norm": 0.5075303188678263, "learning_rate": 2.677333709185193e-06, "loss": 0.0214, "step": 133760 }, { "epoch": 0.5581402141349067, "grad_norm": 0.40223171523391366, "learning_rate": 2.6772836703345145e-06, "loss": 0.0201, "step": 133765 }, { "epoch": 0.5581610768498969, "grad_norm": 1.4984410521633178, "learning_rate": 2.6772336342893814e-06, "loss": 0.0318, "step": 133770 }, { "epoch": 0.5581819395648873, "grad_norm": 0.36099665911005646, "learning_rate": 2.67718360104953e-06, "loss": 0.0164, "step": 133775 }, { "epoch": 0.5582028022798775, "grad_norm": 0.6187603890075462, "learning_rate": 2.6771335706146988e-06, "loss": 0.0198, "step": 133780 }, { "epoch": 0.5582236649948678, "grad_norm": 0.7239958281077153, "learning_rate": 2.677083542984626e-06, "loss": 0.0182, "step": 133785 }, { "epoch": 0.558244527709858, "grad_norm": 0.5828237208260487, "learning_rate": 2.6770335181590497e-06, "loss": 0.0295, "step": 133790 }, { "epoch": 0.5582653904248484, "grad_norm": 0.5628422805921804, "learning_rate": 2.6769834961377074e-06, "loss": 0.0264, "step": 133795 }, { "epoch": 0.5582862531398386, "grad_norm": 0.8698310970045123, "learning_rate": 2.6769334769203365e-06, "loss": 0.0217, "step": 133800 }, { "epoch": 0.5583071158548288, "grad_norm": 0.5490159164474756, "learning_rate": 2.6768834605066763e-06, "loss": 0.0255, "step": 133805 }, { "epoch": 0.5583279785698192, "grad_norm": 0.9540547945817562, "learning_rate": 2.676833446896464e-06, "loss": 0.0244, "step": 133810 }, { "epoch": 0.5583488412848094, "grad_norm": 0.68303068535002, "learning_rate": 2.6767834360894386e-06, "loss": 0.0289, "step": 133815 }, { "epoch": 0.5583697039997997, "grad_norm": 0.9175254840395948, "learning_rate": 2.6767334280853376e-06, "loss": 0.0215, "step": 133820 }, { "epoch": 0.55839056671479, "grad_norm": 0.8132931292414302, "learning_rate": 2.676683422883899e-06, "loss": 0.0196, "step": 133825 }, { "epoch": 0.5584114294297803, "grad_norm": 1.0963455541083809, "learning_rate": 2.676633420484861e-06, "loss": 0.0233, "step": 133830 }, { "epoch": 0.5584322921447705, "grad_norm": 0.9988185898909153, "learning_rate": 2.676583420887963e-06, "loss": 0.0254, "step": 133835 }, { "epoch": 0.5584531548597609, "grad_norm": 1.1164454535561343, "learning_rate": 2.6765334240929418e-06, "loss": 0.0251, "step": 133840 }, { "epoch": 0.5584740175747511, "grad_norm": 1.1542373815158018, "learning_rate": 2.6764834300995373e-06, "loss": 0.0273, "step": 133845 }, { "epoch": 0.5584948802897414, "grad_norm": 0.7941635894471032, "learning_rate": 2.676433438907486e-06, "loss": 0.0241, "step": 133850 }, { "epoch": 0.5585157430047316, "grad_norm": 0.3014162037422123, "learning_rate": 2.6763834505165277e-06, "loss": 0.0198, "step": 133855 }, { "epoch": 0.558536605719722, "grad_norm": 0.7451569955589344, "learning_rate": 2.6763334649264e-06, "loss": 0.0201, "step": 133860 }, { "epoch": 0.5585574684347122, "grad_norm": 0.8426732985001177, "learning_rate": 2.676283482136842e-06, "loss": 0.0396, "step": 133865 }, { "epoch": 0.5585783311497025, "grad_norm": 1.2336565629480136, "learning_rate": 2.676233502147592e-06, "loss": 0.0298, "step": 133870 }, { "epoch": 0.5585991938646928, "grad_norm": 0.3943437134369086, "learning_rate": 2.676183524958389e-06, "loss": 0.0304, "step": 133875 }, { "epoch": 0.558620056579683, "grad_norm": 1.6165153283579983, "learning_rate": 2.67613355056897e-06, "loss": 0.0274, "step": 133880 }, { "epoch": 0.5586409192946733, "grad_norm": 0.9279877571816991, "learning_rate": 2.6760835789790752e-06, "loss": 0.0246, "step": 133885 }, { "epoch": 0.5586617820096637, "grad_norm": 0.580576733519777, "learning_rate": 2.676033610188443e-06, "loss": 0.0288, "step": 133890 }, { "epoch": 0.5586826447246539, "grad_norm": 0.49077468662826235, "learning_rate": 2.6759836441968113e-06, "loss": 0.0203, "step": 133895 }, { "epoch": 0.5587035074396441, "grad_norm": 0.7919987433768053, "learning_rate": 2.675933681003919e-06, "loss": 0.0209, "step": 133900 }, { "epoch": 0.5587243701546345, "grad_norm": 0.4322611575068695, "learning_rate": 2.675883720609505e-06, "loss": 0.0188, "step": 133905 }, { "epoch": 0.5587452328696247, "grad_norm": 0.42720165611964317, "learning_rate": 2.675833763013308e-06, "loss": 0.0244, "step": 133910 }, { "epoch": 0.558766095584615, "grad_norm": 0.6115920754087115, "learning_rate": 2.6757838082150676e-06, "loss": 0.0278, "step": 133915 }, { "epoch": 0.5587869582996052, "grad_norm": 0.7959308940614066, "learning_rate": 2.6757338562145213e-06, "loss": 0.0317, "step": 133920 }, { "epoch": 0.5588078210145956, "grad_norm": 0.502176602006233, "learning_rate": 2.675683907011409e-06, "loss": 0.0257, "step": 133925 }, { "epoch": 0.5588286837295858, "grad_norm": 0.6266920721154235, "learning_rate": 2.675633960605469e-06, "loss": 0.0277, "step": 133930 }, { "epoch": 0.5588495464445761, "grad_norm": 0.7390513262065784, "learning_rate": 2.675584016996441e-06, "loss": 0.0219, "step": 133935 }, { "epoch": 0.5588704091595664, "grad_norm": 0.5363204156039232, "learning_rate": 2.6755340761840632e-06, "loss": 0.0244, "step": 133940 }, { "epoch": 0.5588912718745567, "grad_norm": 0.579350796903105, "learning_rate": 2.675484138168074e-06, "loss": 0.0166, "step": 133945 }, { "epoch": 0.5589121345895469, "grad_norm": 0.4288091997653889, "learning_rate": 2.6754342029482144e-06, "loss": 0.0269, "step": 133950 }, { "epoch": 0.5589329973045373, "grad_norm": 0.4526399253017003, "learning_rate": 2.675384270524222e-06, "loss": 0.0203, "step": 133955 }, { "epoch": 0.5589538600195275, "grad_norm": 0.6962436619632956, "learning_rate": 2.675334340895836e-06, "loss": 0.0256, "step": 133960 }, { "epoch": 0.5589747227345178, "grad_norm": 0.3895032462824837, "learning_rate": 2.675284414062796e-06, "loss": 0.0244, "step": 133965 }, { "epoch": 0.558995585449508, "grad_norm": 0.8380286035338428, "learning_rate": 2.675234490024841e-06, "loss": 0.0262, "step": 133970 }, { "epoch": 0.5590164481644984, "grad_norm": 0.41718614031137086, "learning_rate": 2.6751845687817108e-06, "loss": 0.0234, "step": 133975 }, { "epoch": 0.5590373108794886, "grad_norm": 0.5700200097388962, "learning_rate": 2.6751346503331433e-06, "loss": 0.0188, "step": 133980 }, { "epoch": 0.5590581735944788, "grad_norm": 0.7113749769542309, "learning_rate": 2.675084734678879e-06, "loss": 0.0172, "step": 133985 }, { "epoch": 0.5590790363094692, "grad_norm": 0.7955106215287447, "learning_rate": 2.6750348218186563e-06, "loss": 0.0213, "step": 133990 }, { "epoch": 0.5590998990244594, "grad_norm": 0.45737360039177394, "learning_rate": 2.6749849117522146e-06, "loss": 0.0254, "step": 133995 }, { "epoch": 0.5591207617394497, "grad_norm": 0.6578023888281718, "learning_rate": 2.674935004479295e-06, "loss": 0.0181, "step": 134000 }, { "epoch": 0.55914162445444, "grad_norm": 0.7702611058307223, "learning_rate": 2.6748850999996347e-06, "loss": 0.0201, "step": 134005 }, { "epoch": 0.5591624871694303, "grad_norm": 0.4592944812717393, "learning_rate": 2.674835198312974e-06, "loss": 0.0214, "step": 134010 }, { "epoch": 0.5591833498844205, "grad_norm": 0.6515157513689308, "learning_rate": 2.6747852994190525e-06, "loss": 0.0278, "step": 134015 }, { "epoch": 0.5592042125994109, "grad_norm": 0.6454969414846164, "learning_rate": 2.67473540331761e-06, "loss": 0.0283, "step": 134020 }, { "epoch": 0.5592250753144011, "grad_norm": 1.2324439415640476, "learning_rate": 2.6746855100083854e-06, "loss": 0.0199, "step": 134025 }, { "epoch": 0.5592459380293914, "grad_norm": 0.3371651671688997, "learning_rate": 2.674635619491118e-06, "loss": 0.0228, "step": 134030 }, { "epoch": 0.5592668007443816, "grad_norm": 0.5222887863618152, "learning_rate": 2.674585731765549e-06, "loss": 0.0221, "step": 134035 }, { "epoch": 0.559287663459372, "grad_norm": 0.7896913784034606, "learning_rate": 2.674535846831417e-06, "loss": 0.0247, "step": 134040 }, { "epoch": 0.5593085261743622, "grad_norm": 0.6596046473283641, "learning_rate": 2.6744859646884615e-06, "loss": 0.0297, "step": 134045 }, { "epoch": 0.5593293888893525, "grad_norm": 0.615478399868277, "learning_rate": 2.674436085336422e-06, "loss": 0.0177, "step": 134050 }, { "epoch": 0.5593502516043428, "grad_norm": 0.5469271785657176, "learning_rate": 2.6743862087750395e-06, "loss": 0.0224, "step": 134055 }, { "epoch": 0.5593711143193331, "grad_norm": 0.6055757555444349, "learning_rate": 2.6743363350040524e-06, "loss": 0.0246, "step": 134060 }, { "epoch": 0.5593919770343233, "grad_norm": 0.786365713280279, "learning_rate": 2.6742864640232006e-06, "loss": 0.0248, "step": 134065 }, { "epoch": 0.5594128397493137, "grad_norm": 0.7292009124878349, "learning_rate": 2.674236595832226e-06, "loss": 0.0233, "step": 134070 }, { "epoch": 0.5594337024643039, "grad_norm": 0.6716240941059924, "learning_rate": 2.6741867304308663e-06, "loss": 0.0242, "step": 134075 }, { "epoch": 0.5594545651792941, "grad_norm": 0.5541817432491792, "learning_rate": 2.674136867818862e-06, "loss": 0.0193, "step": 134080 }, { "epoch": 0.5594754278942845, "grad_norm": 0.5665674461736419, "learning_rate": 2.674087007995953e-06, "loss": 0.0282, "step": 134085 }, { "epoch": 0.5594962906092747, "grad_norm": 0.5313965968288643, "learning_rate": 2.6740371509618796e-06, "loss": 0.0236, "step": 134090 }, { "epoch": 0.559517153324265, "grad_norm": 0.5585192694702311, "learning_rate": 2.6739872967163817e-06, "loss": 0.0202, "step": 134095 }, { "epoch": 0.5595380160392552, "grad_norm": 0.579497410068123, "learning_rate": 2.6739374452591994e-06, "loss": 0.0183, "step": 134100 }, { "epoch": 0.5595588787542456, "grad_norm": 0.3857278580687911, "learning_rate": 2.673887596590073e-06, "loss": 0.0263, "step": 134105 }, { "epoch": 0.5595797414692358, "grad_norm": 0.6555718187123478, "learning_rate": 2.673837750708742e-06, "loss": 0.0234, "step": 134110 }, { "epoch": 0.5596006041842261, "grad_norm": 0.7657418830438633, "learning_rate": 2.673787907614947e-06, "loss": 0.0211, "step": 134115 }, { "epoch": 0.5596214668992164, "grad_norm": 0.546844807833343, "learning_rate": 2.673738067308428e-06, "loss": 0.0225, "step": 134120 }, { "epoch": 0.5596423296142067, "grad_norm": 0.835360103760002, "learning_rate": 2.6736882297889255e-06, "loss": 0.0208, "step": 134125 }, { "epoch": 0.5596631923291969, "grad_norm": 0.4478288979353672, "learning_rate": 2.6736383950561796e-06, "loss": 0.0257, "step": 134130 }, { "epoch": 0.5596840550441873, "grad_norm": 0.7195638227981671, "learning_rate": 2.6735885631099307e-06, "loss": 0.0188, "step": 134135 }, { "epoch": 0.5597049177591775, "grad_norm": 0.6908870772379849, "learning_rate": 2.673538733949919e-06, "loss": 0.022, "step": 134140 }, { "epoch": 0.5597257804741678, "grad_norm": 0.8332547696103586, "learning_rate": 2.673488907575885e-06, "loss": 0.0216, "step": 134145 }, { "epoch": 0.559746643189158, "grad_norm": 0.5427353122244408, "learning_rate": 2.673439083987569e-06, "loss": 0.0184, "step": 134150 }, { "epoch": 0.5597675059041484, "grad_norm": 1.074942475733133, "learning_rate": 2.6733892631847114e-06, "loss": 0.0267, "step": 134155 }, { "epoch": 0.5597883686191386, "grad_norm": 0.8458481738729174, "learning_rate": 2.6733394451670524e-06, "loss": 0.0222, "step": 134160 }, { "epoch": 0.5598092313341289, "grad_norm": 0.5693441618189768, "learning_rate": 2.6732896299343335e-06, "loss": 0.0242, "step": 134165 }, { "epoch": 0.5598300940491192, "grad_norm": 0.621410439198313, "learning_rate": 2.6732398174862943e-06, "loss": 0.02, "step": 134170 }, { "epoch": 0.5598509567641095, "grad_norm": 0.35369588435450355, "learning_rate": 2.673190007822675e-06, "loss": 0.0192, "step": 134175 }, { "epoch": 0.5598718194790997, "grad_norm": 0.5015841871132961, "learning_rate": 2.673140200943218e-06, "loss": 0.0201, "step": 134180 }, { "epoch": 0.55989268219409, "grad_norm": 0.671645060776287, "learning_rate": 2.6730903968476625e-06, "loss": 0.0185, "step": 134185 }, { "epoch": 0.5599135449090803, "grad_norm": 0.6969764433752098, "learning_rate": 2.6730405955357492e-06, "loss": 0.0248, "step": 134190 }, { "epoch": 0.5599344076240705, "grad_norm": 0.5016075073100037, "learning_rate": 2.672990797007219e-06, "loss": 0.0198, "step": 134195 }, { "epoch": 0.5599552703390609, "grad_norm": 1.3280816707199898, "learning_rate": 2.672941001261812e-06, "loss": 0.0334, "step": 134200 }, { "epoch": 0.5599761330540511, "grad_norm": 0.43045464186115984, "learning_rate": 2.672891208299271e-06, "loss": 0.0242, "step": 134205 }, { "epoch": 0.5599969957690414, "grad_norm": 0.7280917996817818, "learning_rate": 2.672841418119335e-06, "loss": 0.0258, "step": 134210 }, { "epoch": 0.5600178584840316, "grad_norm": 0.2814759111897548, "learning_rate": 2.672791630721745e-06, "loss": 0.0211, "step": 134215 }, { "epoch": 0.560038721199022, "grad_norm": 1.1287975841122453, "learning_rate": 2.6727418461062423e-06, "loss": 0.0253, "step": 134220 }, { "epoch": 0.5600595839140122, "grad_norm": 0.829263768437065, "learning_rate": 2.6726920642725674e-06, "loss": 0.0289, "step": 134225 }, { "epoch": 0.5600804466290025, "grad_norm": 0.45706387557542066, "learning_rate": 2.672642285220462e-06, "loss": 0.0259, "step": 134230 }, { "epoch": 0.5601013093439928, "grad_norm": 0.7024104707799151, "learning_rate": 2.672592508949666e-06, "loss": 0.0253, "step": 134235 }, { "epoch": 0.5601221720589831, "grad_norm": 0.6489330588367427, "learning_rate": 2.672542735459922e-06, "loss": 0.0234, "step": 134240 }, { "epoch": 0.5601430347739733, "grad_norm": 0.511027210078855, "learning_rate": 2.6724929647509694e-06, "loss": 0.0187, "step": 134245 }, { "epoch": 0.5601638974889637, "grad_norm": 0.6434251202969866, "learning_rate": 2.67244319682255e-06, "loss": 0.0217, "step": 134250 }, { "epoch": 0.5601847602039539, "grad_norm": 0.6785030453345539, "learning_rate": 2.672393431674405e-06, "loss": 0.0261, "step": 134255 }, { "epoch": 0.5602056229189442, "grad_norm": 0.768420190126764, "learning_rate": 2.6723436693062754e-06, "loss": 0.0155, "step": 134260 }, { "epoch": 0.5602264856339345, "grad_norm": 0.9501270759066623, "learning_rate": 2.6722939097179024e-06, "loss": 0.026, "step": 134265 }, { "epoch": 0.5602473483489248, "grad_norm": 0.889730144743822, "learning_rate": 2.672244152909027e-06, "loss": 0.0295, "step": 134270 }, { "epoch": 0.560268211063915, "grad_norm": 0.8977864051132826, "learning_rate": 2.6721943988793903e-06, "loss": 0.029, "step": 134275 }, { "epoch": 0.5602890737789052, "grad_norm": 0.746541027110094, "learning_rate": 2.6721446476287344e-06, "loss": 0.0274, "step": 134280 }, { "epoch": 0.5603099364938956, "grad_norm": 0.2691028531851368, "learning_rate": 2.6720948991568002e-06, "loss": 0.015, "step": 134285 }, { "epoch": 0.5603307992088858, "grad_norm": 0.5002024386614267, "learning_rate": 2.6720451534633285e-06, "loss": 0.0175, "step": 134290 }, { "epoch": 0.5603516619238761, "grad_norm": 1.2294730879252562, "learning_rate": 2.6719954105480613e-06, "loss": 0.0307, "step": 134295 }, { "epoch": 0.5603725246388664, "grad_norm": 0.7227021425172685, "learning_rate": 2.6719456704107403e-06, "loss": 0.0209, "step": 134300 }, { "epoch": 0.5603933873538567, "grad_norm": 0.7017655144502215, "learning_rate": 2.6718959330511063e-06, "loss": 0.0253, "step": 134305 }, { "epoch": 0.5604142500688469, "grad_norm": 1.662787287107837, "learning_rate": 2.6718461984689006e-06, "loss": 0.0261, "step": 134310 }, { "epoch": 0.5604351127838373, "grad_norm": 0.5836605284342674, "learning_rate": 2.671796466663865e-06, "loss": 0.018, "step": 134315 }, { "epoch": 0.5604559754988275, "grad_norm": 0.49181719543297403, "learning_rate": 2.671746737635742e-06, "loss": 0.0202, "step": 134320 }, { "epoch": 0.5604768382138178, "grad_norm": 1.1758293431549551, "learning_rate": 2.6716970113842718e-06, "loss": 0.0292, "step": 134325 }, { "epoch": 0.560497700928808, "grad_norm": 0.8896392782085244, "learning_rate": 2.6716472879091964e-06, "loss": 0.0276, "step": 134330 }, { "epoch": 0.5605185636437984, "grad_norm": 0.6695953912181795, "learning_rate": 2.671597567210258e-06, "loss": 0.0222, "step": 134335 }, { "epoch": 0.5605394263587886, "grad_norm": 0.5268387707163509, "learning_rate": 2.671547849287197e-06, "loss": 0.0248, "step": 134340 }, { "epoch": 0.5605602890737789, "grad_norm": 0.5548563052073452, "learning_rate": 2.6714981341397566e-06, "loss": 0.0194, "step": 134345 }, { "epoch": 0.5605811517887692, "grad_norm": 0.5324079547892829, "learning_rate": 2.6714484217676784e-06, "loss": 0.0211, "step": 134350 }, { "epoch": 0.5606020145037595, "grad_norm": 0.8584023781118221, "learning_rate": 2.671398712170703e-06, "loss": 0.0214, "step": 134355 }, { "epoch": 0.5606228772187497, "grad_norm": 1.104793979320699, "learning_rate": 2.6713490053485735e-06, "loss": 0.0276, "step": 134360 }, { "epoch": 0.5606437399337401, "grad_norm": 1.0376182755642849, "learning_rate": 2.671299301301031e-06, "loss": 0.0265, "step": 134365 }, { "epoch": 0.5606646026487303, "grad_norm": 0.7953961226735897, "learning_rate": 2.6712496000278177e-06, "loss": 0.0202, "step": 134370 }, { "epoch": 0.5606854653637205, "grad_norm": 0.9193220774644864, "learning_rate": 2.671199901528675e-06, "loss": 0.0213, "step": 134375 }, { "epoch": 0.5607063280787109, "grad_norm": 0.576781432402112, "learning_rate": 2.671150205803345e-06, "loss": 0.026, "step": 134380 }, { "epoch": 0.5607271907937011, "grad_norm": 1.6019614574737107, "learning_rate": 2.671100512851571e-06, "loss": 0.0292, "step": 134385 }, { "epoch": 0.5607480535086914, "grad_norm": 0.7026744357437688, "learning_rate": 2.671050822673093e-06, "loss": 0.0255, "step": 134390 }, { "epoch": 0.5607689162236816, "grad_norm": 0.41640266572289913, "learning_rate": 2.6710011352676547e-06, "loss": 0.0301, "step": 134395 }, { "epoch": 0.560789778938672, "grad_norm": 0.6121014499857147, "learning_rate": 2.670951450634997e-06, "loss": 0.0222, "step": 134400 }, { "epoch": 0.5608106416536622, "grad_norm": 0.5114030749831106, "learning_rate": 2.670901768774863e-06, "loss": 0.0213, "step": 134405 }, { "epoch": 0.5608315043686525, "grad_norm": 0.7457699116950625, "learning_rate": 2.670852089686994e-06, "loss": 0.0262, "step": 134410 }, { "epoch": 0.5608523670836428, "grad_norm": 0.8065682081772497, "learning_rate": 2.6708024133711327e-06, "loss": 0.0209, "step": 134415 }, { "epoch": 0.5608732297986331, "grad_norm": 0.6265104977350826, "learning_rate": 2.670752739827021e-06, "loss": 0.0219, "step": 134420 }, { "epoch": 0.5608940925136233, "grad_norm": 0.9345750957773687, "learning_rate": 2.6707030690544012e-06, "loss": 0.0202, "step": 134425 }, { "epoch": 0.5609149552286137, "grad_norm": 0.6263514334037444, "learning_rate": 2.670653401053016e-06, "loss": 0.0264, "step": 134430 }, { "epoch": 0.5609358179436039, "grad_norm": 0.6604907604746099, "learning_rate": 2.670603735822608e-06, "loss": 0.027, "step": 134435 }, { "epoch": 0.5609566806585942, "grad_norm": 0.6443368248119267, "learning_rate": 2.670554073362918e-06, "loss": 0.0258, "step": 134440 }, { "epoch": 0.5609775433735845, "grad_norm": 0.5257717276448773, "learning_rate": 2.6705044136736897e-06, "loss": 0.0226, "step": 134445 }, { "epoch": 0.5609984060885748, "grad_norm": 0.7832019832820559, "learning_rate": 2.670454756754665e-06, "loss": 0.0244, "step": 134450 }, { "epoch": 0.561019268803565, "grad_norm": 0.43769291288904316, "learning_rate": 2.670405102605587e-06, "loss": 0.0209, "step": 134455 }, { "epoch": 0.5610401315185553, "grad_norm": 0.5674707614331022, "learning_rate": 2.670355451226198e-06, "loss": 0.0231, "step": 134460 }, { "epoch": 0.5610609942335456, "grad_norm": 0.5867199450553477, "learning_rate": 2.67030580261624e-06, "loss": 0.0184, "step": 134465 }, { "epoch": 0.5610818569485359, "grad_norm": 0.7825089103188204, "learning_rate": 2.6702561567754556e-06, "loss": 0.0246, "step": 134470 }, { "epoch": 0.5611027196635261, "grad_norm": 0.29374660062538804, "learning_rate": 2.6702065137035876e-06, "loss": 0.0311, "step": 134475 }, { "epoch": 0.5611235823785165, "grad_norm": 0.9244062056703216, "learning_rate": 2.6701568734003785e-06, "loss": 0.0231, "step": 134480 }, { "epoch": 0.5611444450935067, "grad_norm": 0.6946910625660285, "learning_rate": 2.6701072358655717e-06, "loss": 0.0236, "step": 134485 }, { "epoch": 0.5611653078084969, "grad_norm": 0.9841264700982396, "learning_rate": 2.670057601098909e-06, "loss": 0.0229, "step": 134490 }, { "epoch": 0.5611861705234873, "grad_norm": 0.604649612269179, "learning_rate": 2.6700079691001335e-06, "loss": 0.0198, "step": 134495 }, { "epoch": 0.5612070332384775, "grad_norm": 1.3035574227107716, "learning_rate": 2.6699583398689876e-06, "loss": 0.0205, "step": 134500 }, { "epoch": 0.5612278959534678, "grad_norm": 0.4900116562374555, "learning_rate": 2.669908713405215e-06, "loss": 0.0166, "step": 134505 }, { "epoch": 0.561248758668458, "grad_norm": 0.7084572665478471, "learning_rate": 2.669859089708557e-06, "loss": 0.0264, "step": 134510 }, { "epoch": 0.5612696213834484, "grad_norm": 0.4870021718831338, "learning_rate": 2.6698094687787577e-06, "loss": 0.0283, "step": 134515 }, { "epoch": 0.5612904840984386, "grad_norm": 1.2813011053120156, "learning_rate": 2.66975985061556e-06, "loss": 0.0321, "step": 134520 }, { "epoch": 0.5613113468134289, "grad_norm": 0.42983097538710857, "learning_rate": 2.669710235218706e-06, "loss": 0.0265, "step": 134525 }, { "epoch": 0.5613322095284192, "grad_norm": 1.0322640826521896, "learning_rate": 2.6696606225879397e-06, "loss": 0.0216, "step": 134530 }, { "epoch": 0.5613530722434095, "grad_norm": 0.3792522834158378, "learning_rate": 2.669611012723003e-06, "loss": 0.0172, "step": 134535 }, { "epoch": 0.5613739349583997, "grad_norm": 0.44787559666037385, "learning_rate": 2.6695614056236398e-06, "loss": 0.0257, "step": 134540 }, { "epoch": 0.5613947976733901, "grad_norm": 0.8497280004127409, "learning_rate": 2.669511801289592e-06, "loss": 0.0313, "step": 134545 }, { "epoch": 0.5614156603883803, "grad_norm": 0.39027325828175774, "learning_rate": 2.669462199720604e-06, "loss": 0.0184, "step": 134550 }, { "epoch": 0.5614365231033706, "grad_norm": 0.8167439227450394, "learning_rate": 2.669412600916419e-06, "loss": 0.026, "step": 134555 }, { "epoch": 0.5614573858183609, "grad_norm": 0.5106424555376299, "learning_rate": 2.669363004876779e-06, "loss": 0.0204, "step": 134560 }, { "epoch": 0.5614782485333512, "grad_norm": 0.8486684104824377, "learning_rate": 2.6693134116014273e-06, "loss": 0.0258, "step": 134565 }, { "epoch": 0.5614991112483414, "grad_norm": 0.6377933125833956, "learning_rate": 2.6692638210901086e-06, "loss": 0.0211, "step": 134570 }, { "epoch": 0.5615199739633316, "grad_norm": 1.183278156775183, "learning_rate": 2.6692142333425643e-06, "loss": 0.0276, "step": 134575 }, { "epoch": 0.561540836678322, "grad_norm": 1.194538028669918, "learning_rate": 2.6691646483585388e-06, "loss": 0.0208, "step": 134580 }, { "epoch": 0.5615616993933122, "grad_norm": 0.4745165175612257, "learning_rate": 2.6691150661377752e-06, "loss": 0.024, "step": 134585 }, { "epoch": 0.5615825621083025, "grad_norm": 0.6478382738034888, "learning_rate": 2.669065486680017e-06, "loss": 0.0229, "step": 134590 }, { "epoch": 0.5616034248232928, "grad_norm": 0.7709257500299693, "learning_rate": 2.669015909985007e-06, "loss": 0.0283, "step": 134595 }, { "epoch": 0.5616242875382831, "grad_norm": 0.7820686102485854, "learning_rate": 2.6689663360524894e-06, "loss": 0.0224, "step": 134600 }, { "epoch": 0.5616451502532733, "grad_norm": 1.3402288946936418, "learning_rate": 2.6689167648822075e-06, "loss": 0.0273, "step": 134605 }, { "epoch": 0.5616660129682637, "grad_norm": 0.7499782501848046, "learning_rate": 2.668867196473904e-06, "loss": 0.0213, "step": 134610 }, { "epoch": 0.5616868756832539, "grad_norm": 1.2860446145268174, "learning_rate": 2.6688176308273233e-06, "loss": 0.0277, "step": 134615 }, { "epoch": 0.5617077383982442, "grad_norm": 0.5454232027358134, "learning_rate": 2.6687680679422086e-06, "loss": 0.0236, "step": 134620 }, { "epoch": 0.5617286011132345, "grad_norm": 0.49291524174273615, "learning_rate": 2.6687185078183036e-06, "loss": 0.0206, "step": 134625 }, { "epoch": 0.5617494638282248, "grad_norm": 0.7217763842771031, "learning_rate": 2.668668950455352e-06, "loss": 0.0191, "step": 134630 }, { "epoch": 0.561770326543215, "grad_norm": 0.3931308936204659, "learning_rate": 2.668619395853097e-06, "loss": 0.0163, "step": 134635 }, { "epoch": 0.5617911892582053, "grad_norm": 0.6371768046807958, "learning_rate": 2.6685698440112834e-06, "loss": 0.0326, "step": 134640 }, { "epoch": 0.5618120519731956, "grad_norm": 0.656332587237471, "learning_rate": 2.6685202949296535e-06, "loss": 0.0256, "step": 134645 }, { "epoch": 0.5618329146881859, "grad_norm": 0.7675810389055294, "learning_rate": 2.668470748607952e-06, "loss": 0.022, "step": 134650 }, { "epoch": 0.5618537774031761, "grad_norm": 0.690523207837359, "learning_rate": 2.6684212050459218e-06, "loss": 0.0234, "step": 134655 }, { "epoch": 0.5618746401181665, "grad_norm": 0.510305292432068, "learning_rate": 2.6683716642433075e-06, "loss": 0.023, "step": 134660 }, { "epoch": 0.5618955028331567, "grad_norm": 1.101536004658624, "learning_rate": 2.6683221261998535e-06, "loss": 0.0181, "step": 134665 }, { "epoch": 0.561916365548147, "grad_norm": 0.389440367777262, "learning_rate": 2.6682725909153028e-06, "loss": 0.0249, "step": 134670 }, { "epoch": 0.5619372282631373, "grad_norm": 0.7620602994179271, "learning_rate": 2.6682230583893994e-06, "loss": 0.0279, "step": 134675 }, { "epoch": 0.5619580909781275, "grad_norm": 0.34344195702234076, "learning_rate": 2.668173528621887e-06, "loss": 0.0209, "step": 134680 }, { "epoch": 0.5619789536931178, "grad_norm": 0.7439599516479446, "learning_rate": 2.6681240016125103e-06, "loss": 0.0185, "step": 134685 }, { "epoch": 0.561999816408108, "grad_norm": 1.437849630633412, "learning_rate": 2.6680744773610135e-06, "loss": 0.0263, "step": 134690 }, { "epoch": 0.5620206791230984, "grad_norm": 0.7314251399880745, "learning_rate": 2.6680249558671395e-06, "loss": 0.0247, "step": 134695 }, { "epoch": 0.5620415418380886, "grad_norm": 0.7208298546194354, "learning_rate": 2.667975437130633e-06, "loss": 0.0203, "step": 134700 }, { "epoch": 0.5620624045530789, "grad_norm": 0.5240029254647113, "learning_rate": 2.667925921151239e-06, "loss": 0.0276, "step": 134705 }, { "epoch": 0.5620832672680692, "grad_norm": 0.39470157230018815, "learning_rate": 2.6678764079286996e-06, "loss": 0.0187, "step": 134710 }, { "epoch": 0.5621041299830595, "grad_norm": 0.6128501250816083, "learning_rate": 2.667826897462761e-06, "loss": 0.0246, "step": 134715 }, { "epoch": 0.5621249926980497, "grad_norm": 0.46269977393645917, "learning_rate": 2.667777389753166e-06, "loss": 0.0276, "step": 134720 }, { "epoch": 0.5621458554130401, "grad_norm": 0.9373733412364708, "learning_rate": 2.6677278847996603e-06, "loss": 0.029, "step": 134725 }, { "epoch": 0.5621667181280303, "grad_norm": 0.6818278217211193, "learning_rate": 2.6676783826019868e-06, "loss": 0.021, "step": 134730 }, { "epoch": 0.5621875808430206, "grad_norm": 0.9976687758245892, "learning_rate": 2.6676288831598905e-06, "loss": 0.033, "step": 134735 }, { "epoch": 0.5622084435580109, "grad_norm": 0.6107056734144961, "learning_rate": 2.6675793864731162e-06, "loss": 0.0187, "step": 134740 }, { "epoch": 0.5622293062730012, "grad_norm": 0.90045903944331, "learning_rate": 2.6675298925414073e-06, "loss": 0.0261, "step": 134745 }, { "epoch": 0.5622501689879914, "grad_norm": 0.7781773604841067, "learning_rate": 2.667480401364509e-06, "loss": 0.0183, "step": 134750 }, { "epoch": 0.5622710317029816, "grad_norm": 0.9359590020498102, "learning_rate": 2.667430912942165e-06, "loss": 0.0211, "step": 134755 }, { "epoch": 0.562291894417972, "grad_norm": 0.49226757550384337, "learning_rate": 2.6673814272741203e-06, "loss": 0.0325, "step": 134760 }, { "epoch": 0.5623127571329622, "grad_norm": 1.1728116624029539, "learning_rate": 2.66733194436012e-06, "loss": 0.0232, "step": 134765 }, { "epoch": 0.5623336198479525, "grad_norm": 1.3576405726908545, "learning_rate": 2.667282464199907e-06, "loss": 0.0255, "step": 134770 }, { "epoch": 0.5623544825629428, "grad_norm": 0.5077108546777543, "learning_rate": 2.667232986793228e-06, "loss": 0.0271, "step": 134775 }, { "epoch": 0.5623753452779331, "grad_norm": 0.9088511009149323, "learning_rate": 2.667183512139825e-06, "loss": 0.0256, "step": 134780 }, { "epoch": 0.5623962079929233, "grad_norm": 0.7880121008493505, "learning_rate": 2.6671340402394457e-06, "loss": 0.0263, "step": 134785 }, { "epoch": 0.5624170707079137, "grad_norm": 0.9909794363849715, "learning_rate": 2.6670845710918327e-06, "loss": 0.0259, "step": 134790 }, { "epoch": 0.5624379334229039, "grad_norm": 0.648827502543235, "learning_rate": 2.667035104696732e-06, "loss": 0.0301, "step": 134795 }, { "epoch": 0.5624587961378942, "grad_norm": 0.5125936185167222, "learning_rate": 2.6669856410538865e-06, "loss": 0.0216, "step": 134800 }, { "epoch": 0.5624796588528845, "grad_norm": 0.46749610209195797, "learning_rate": 2.666936180163042e-06, "loss": 0.0224, "step": 134805 }, { "epoch": 0.5625005215678748, "grad_norm": 0.45640164145865125, "learning_rate": 2.666886722023944e-06, "loss": 0.0282, "step": 134810 }, { "epoch": 0.562521384282865, "grad_norm": 0.9098202524777584, "learning_rate": 2.6668372666363372e-06, "loss": 0.0243, "step": 134815 }, { "epoch": 0.5625422469978553, "grad_norm": 0.9927405513406823, "learning_rate": 2.6667878139999658e-06, "loss": 0.0298, "step": 134820 }, { "epoch": 0.5625631097128456, "grad_norm": 0.5174093874766688, "learning_rate": 2.666738364114575e-06, "loss": 0.0162, "step": 134825 }, { "epoch": 0.5625839724278359, "grad_norm": 0.8469214981751253, "learning_rate": 2.66668891697991e-06, "loss": 0.0204, "step": 134830 }, { "epoch": 0.5626048351428261, "grad_norm": 0.31273930526608046, "learning_rate": 2.666639472595715e-06, "loss": 0.0224, "step": 134835 }, { "epoch": 0.5626256978578165, "grad_norm": 0.8980447627087955, "learning_rate": 2.6665900309617365e-06, "loss": 0.0381, "step": 134840 }, { "epoch": 0.5626465605728067, "grad_norm": 0.8595148937036874, "learning_rate": 2.6665405920777175e-06, "loss": 0.0231, "step": 134845 }, { "epoch": 0.562667423287797, "grad_norm": 0.6002116189354445, "learning_rate": 2.6664911559434054e-06, "loss": 0.0257, "step": 134850 }, { "epoch": 0.5626882860027873, "grad_norm": 0.7929544315472024, "learning_rate": 2.666441722558543e-06, "loss": 0.0138, "step": 134855 }, { "epoch": 0.5627091487177776, "grad_norm": 0.5797563188962568, "learning_rate": 2.6663922919228772e-06, "loss": 0.0252, "step": 134860 }, { "epoch": 0.5627300114327678, "grad_norm": 0.6949740987629449, "learning_rate": 2.666342864036153e-06, "loss": 0.0235, "step": 134865 }, { "epoch": 0.562750874147758, "grad_norm": 0.5375345535554549, "learning_rate": 2.6662934388981144e-06, "loss": 0.0224, "step": 134870 }, { "epoch": 0.5627717368627484, "grad_norm": 0.7193503481068878, "learning_rate": 2.6662440165085077e-06, "loss": 0.0168, "step": 134875 }, { "epoch": 0.5627925995777386, "grad_norm": 0.7090764223443138, "learning_rate": 2.6661945968670783e-06, "loss": 0.0282, "step": 134880 }, { "epoch": 0.5628134622927289, "grad_norm": 0.9935043880966142, "learning_rate": 2.6661451799735706e-06, "loss": 0.0311, "step": 134885 }, { "epoch": 0.5628343250077192, "grad_norm": 0.816005841770896, "learning_rate": 2.6660957658277307e-06, "loss": 0.0288, "step": 134890 }, { "epoch": 0.5628551877227095, "grad_norm": 1.1428046870481874, "learning_rate": 2.666046354429304e-06, "loss": 0.0238, "step": 134895 }, { "epoch": 0.5628760504376997, "grad_norm": 0.7967267681978338, "learning_rate": 2.6659969457780356e-06, "loss": 0.023, "step": 134900 }, { "epoch": 0.5628969131526901, "grad_norm": 0.6880630217182552, "learning_rate": 2.665947539873671e-06, "loss": 0.0211, "step": 134905 }, { "epoch": 0.5629177758676803, "grad_norm": 0.9814804537844652, "learning_rate": 2.665898136715955e-06, "loss": 0.0303, "step": 134910 }, { "epoch": 0.5629386385826706, "grad_norm": 0.41557632557312785, "learning_rate": 2.6658487363046355e-06, "loss": 0.0232, "step": 134915 }, { "epoch": 0.5629595012976609, "grad_norm": 0.41737427306874114, "learning_rate": 2.6657993386394555e-06, "loss": 0.0212, "step": 134920 }, { "epoch": 0.5629803640126512, "grad_norm": 1.0278729770212496, "learning_rate": 2.665749943720161e-06, "loss": 0.0252, "step": 134925 }, { "epoch": 0.5630012267276414, "grad_norm": 0.33476040222175346, "learning_rate": 2.665700551546499e-06, "loss": 0.0209, "step": 134930 }, { "epoch": 0.5630220894426317, "grad_norm": 1.2935977196054336, "learning_rate": 2.665651162118214e-06, "loss": 0.0271, "step": 134935 }, { "epoch": 0.563042952157622, "grad_norm": 1.0849494110784128, "learning_rate": 2.6656017754350523e-06, "loss": 0.0202, "step": 134940 }, { "epoch": 0.5630638148726123, "grad_norm": 0.5473772718082908, "learning_rate": 2.6655523914967583e-06, "loss": 0.026, "step": 134945 }, { "epoch": 0.5630846775876025, "grad_norm": 0.6106925743755832, "learning_rate": 2.6655030103030792e-06, "loss": 0.0288, "step": 134950 }, { "epoch": 0.5631055403025929, "grad_norm": 0.7012260259512341, "learning_rate": 2.66545363185376e-06, "loss": 0.0287, "step": 134955 }, { "epoch": 0.5631264030175831, "grad_norm": 0.5255241918784577, "learning_rate": 2.6654042561485472e-06, "loss": 0.0217, "step": 134960 }, { "epoch": 0.5631472657325733, "grad_norm": 0.9670080226056414, "learning_rate": 2.665354883187186e-06, "loss": 0.0236, "step": 134965 }, { "epoch": 0.5631681284475637, "grad_norm": 1.15532350317012, "learning_rate": 2.6653055129694227e-06, "loss": 0.0202, "step": 134970 }, { "epoch": 0.5631889911625539, "grad_norm": 0.7106695007010395, "learning_rate": 2.665256145495003e-06, "loss": 0.0233, "step": 134975 }, { "epoch": 0.5632098538775442, "grad_norm": 0.7361250098374875, "learning_rate": 2.665206780763673e-06, "loss": 0.0251, "step": 134980 }, { "epoch": 0.5632307165925345, "grad_norm": 1.3597821866104767, "learning_rate": 2.6651574187751777e-06, "loss": 0.039, "step": 134985 }, { "epoch": 0.5632515793075248, "grad_norm": 0.8177133302935382, "learning_rate": 2.6651080595292645e-06, "loss": 0.0171, "step": 134990 }, { "epoch": 0.563272442022515, "grad_norm": 0.4673357955247871, "learning_rate": 2.665058703025679e-06, "loss": 0.0285, "step": 134995 }, { "epoch": 0.5632933047375053, "grad_norm": 0.41813349048357507, "learning_rate": 2.665009349264167e-06, "loss": 0.0188, "step": 135000 }, { "epoch": 0.5633141674524956, "grad_norm": 1.0219296254921175, "learning_rate": 2.6649599982444756e-06, "loss": 0.033, "step": 135005 }, { "epoch": 0.5633350301674859, "grad_norm": 0.38790853220187216, "learning_rate": 2.664910649966349e-06, "loss": 0.0223, "step": 135010 }, { "epoch": 0.5633558928824761, "grad_norm": 0.31786289413319363, "learning_rate": 2.664861304429535e-06, "loss": 0.0163, "step": 135015 }, { "epoch": 0.5633767555974665, "grad_norm": 0.6731712418994804, "learning_rate": 2.6648119616337797e-06, "loss": 0.0192, "step": 135020 }, { "epoch": 0.5633976183124567, "grad_norm": 0.38009201206889826, "learning_rate": 2.6647626215788286e-06, "loss": 0.0277, "step": 135025 }, { "epoch": 0.563418481027447, "grad_norm": 0.7395117143449503, "learning_rate": 2.664713284264428e-06, "loss": 0.0283, "step": 135030 }, { "epoch": 0.5634393437424373, "grad_norm": 0.7670566987861, "learning_rate": 2.6646639496903247e-06, "loss": 0.022, "step": 135035 }, { "epoch": 0.5634602064574276, "grad_norm": 0.8010094040431514, "learning_rate": 2.664614617856265e-06, "loss": 0.0159, "step": 135040 }, { "epoch": 0.5634810691724178, "grad_norm": 0.5271007761644525, "learning_rate": 2.6645652887619957e-06, "loss": 0.0189, "step": 135045 }, { "epoch": 0.563501931887408, "grad_norm": 0.7517920610295354, "learning_rate": 2.6645159624072612e-06, "loss": 0.0306, "step": 135050 }, { "epoch": 0.5635227946023984, "grad_norm": 0.6768997636800275, "learning_rate": 2.6644666387918106e-06, "loss": 0.0268, "step": 135055 }, { "epoch": 0.5635436573173886, "grad_norm": 0.6031390712175037, "learning_rate": 2.6644173179153892e-06, "loss": 0.0273, "step": 135060 }, { "epoch": 0.5635645200323789, "grad_norm": 0.8845789041643494, "learning_rate": 2.6643679997777426e-06, "loss": 0.0294, "step": 135065 }, { "epoch": 0.5635853827473692, "grad_norm": 0.6983077980236232, "learning_rate": 2.664318684378619e-06, "loss": 0.0399, "step": 135070 }, { "epoch": 0.5636062454623595, "grad_norm": 0.6489037195132581, "learning_rate": 2.664269371717764e-06, "loss": 0.029, "step": 135075 }, { "epoch": 0.5636271081773497, "grad_norm": 0.6473789890925132, "learning_rate": 2.6642200617949244e-06, "loss": 0.0191, "step": 135080 }, { "epoch": 0.5636479708923401, "grad_norm": 0.8109754253126886, "learning_rate": 2.6641707546098466e-06, "loss": 0.0247, "step": 135085 }, { "epoch": 0.5636688336073303, "grad_norm": 0.7141144998764178, "learning_rate": 2.6641214501622777e-06, "loss": 0.0233, "step": 135090 }, { "epoch": 0.5636896963223206, "grad_norm": 0.5160650469942768, "learning_rate": 2.664072148451964e-06, "loss": 0.0269, "step": 135095 }, { "epoch": 0.5637105590373109, "grad_norm": 0.4495080944261872, "learning_rate": 2.664022849478652e-06, "loss": 0.0283, "step": 135100 }, { "epoch": 0.5637314217523012, "grad_norm": 0.6932853650462676, "learning_rate": 2.6639735532420896e-06, "loss": 0.0214, "step": 135105 }, { "epoch": 0.5637522844672914, "grad_norm": 0.6555445632807525, "learning_rate": 2.6639242597420225e-06, "loss": 0.0241, "step": 135110 }, { "epoch": 0.5637731471822817, "grad_norm": 0.603710293301525, "learning_rate": 2.663874968978198e-06, "loss": 0.0195, "step": 135115 }, { "epoch": 0.563794009897272, "grad_norm": 0.45836446444565726, "learning_rate": 2.663825680950363e-06, "loss": 0.022, "step": 135120 }, { "epoch": 0.5638148726122623, "grad_norm": 0.7777935825618122, "learning_rate": 2.663776395658264e-06, "loss": 0.0183, "step": 135125 }, { "epoch": 0.5638357353272525, "grad_norm": 1.4030330299157796, "learning_rate": 2.6637271131016486e-06, "loss": 0.0185, "step": 135130 }, { "epoch": 0.5638565980422429, "grad_norm": 0.755527861415406, "learning_rate": 2.6636778332802633e-06, "loss": 0.0228, "step": 135135 }, { "epoch": 0.5638774607572331, "grad_norm": 0.8275099172043026, "learning_rate": 2.6636285561938545e-06, "loss": 0.0264, "step": 135140 }, { "epoch": 0.5638983234722234, "grad_norm": 0.48831768885643023, "learning_rate": 2.663579281842171e-06, "loss": 0.0188, "step": 135145 }, { "epoch": 0.5639191861872137, "grad_norm": 0.4861758849277426, "learning_rate": 2.6635300102249578e-06, "loss": 0.0241, "step": 135150 }, { "epoch": 0.563940048902204, "grad_norm": 0.860696684295454, "learning_rate": 2.663480741341963e-06, "loss": 0.0201, "step": 135155 }, { "epoch": 0.5639609116171942, "grad_norm": 0.7227997701099558, "learning_rate": 2.663431475192934e-06, "loss": 0.0235, "step": 135160 }, { "epoch": 0.5639817743321844, "grad_norm": 0.683986633175323, "learning_rate": 2.6633822117776183e-06, "loss": 0.0231, "step": 135165 }, { "epoch": 0.5640026370471748, "grad_norm": 1.0158668499661874, "learning_rate": 2.663332951095761e-06, "loss": 0.0275, "step": 135170 }, { "epoch": 0.564023499762165, "grad_norm": 0.9165252186063517, "learning_rate": 2.663283693147112e-06, "loss": 0.0318, "step": 135175 }, { "epoch": 0.5640443624771553, "grad_norm": 0.9468745229629142, "learning_rate": 2.6632344379314174e-06, "loss": 0.0309, "step": 135180 }, { "epoch": 0.5640652251921456, "grad_norm": 0.5799096639666026, "learning_rate": 2.663185185448423e-06, "loss": 0.0195, "step": 135185 }, { "epoch": 0.5640860879071359, "grad_norm": 0.5407350750487422, "learning_rate": 2.6631359356978785e-06, "loss": 0.0301, "step": 135190 }, { "epoch": 0.5641069506221261, "grad_norm": 0.5445224119136398, "learning_rate": 2.6630866886795304e-06, "loss": 0.0171, "step": 135195 }, { "epoch": 0.5641278133371165, "grad_norm": 0.8184232182721505, "learning_rate": 2.663037444393126e-06, "loss": 0.0213, "step": 135200 }, { "epoch": 0.5641486760521067, "grad_norm": 0.48064501561359857, "learning_rate": 2.6629882028384118e-06, "loss": 0.0203, "step": 135205 }, { "epoch": 0.564169538767097, "grad_norm": 0.586316183755496, "learning_rate": 2.662938964015137e-06, "loss": 0.0237, "step": 135210 }, { "epoch": 0.5641904014820873, "grad_norm": 0.48975912633510454, "learning_rate": 2.6628897279230483e-06, "loss": 0.0255, "step": 135215 }, { "epoch": 0.5642112641970776, "grad_norm": 0.7457327484178792, "learning_rate": 2.6628404945618923e-06, "loss": 0.0229, "step": 135220 }, { "epoch": 0.5642321269120678, "grad_norm": 0.5276930778150195, "learning_rate": 2.6627912639314185e-06, "loss": 0.0236, "step": 135225 }, { "epoch": 0.564252989627058, "grad_norm": 0.5973004179797035, "learning_rate": 2.6627420360313728e-06, "loss": 0.0273, "step": 135230 }, { "epoch": 0.5642738523420484, "grad_norm": 0.8010290401882549, "learning_rate": 2.6626928108615036e-06, "loss": 0.0211, "step": 135235 }, { "epoch": 0.5642947150570387, "grad_norm": 0.5751159155732102, "learning_rate": 2.662643588421558e-06, "loss": 0.019, "step": 135240 }, { "epoch": 0.5643155777720289, "grad_norm": 0.47180431726500116, "learning_rate": 2.662594368711284e-06, "loss": 0.0214, "step": 135245 }, { "epoch": 0.5643364404870193, "grad_norm": 0.2774431544817736, "learning_rate": 2.6625451517304303e-06, "loss": 0.0228, "step": 135250 }, { "epoch": 0.5643573032020095, "grad_norm": 0.8218664517270409, "learning_rate": 2.6624959374787425e-06, "loss": 0.023, "step": 135255 }, { "epoch": 0.5643781659169997, "grad_norm": 0.42148022141987684, "learning_rate": 2.66244672595597e-06, "loss": 0.0183, "step": 135260 }, { "epoch": 0.5643990286319901, "grad_norm": 0.9526375141473047, "learning_rate": 2.6623975171618603e-06, "loss": 0.0318, "step": 135265 }, { "epoch": 0.5644198913469803, "grad_norm": 0.28776958773012, "learning_rate": 2.662348311096161e-06, "loss": 0.0163, "step": 135270 }, { "epoch": 0.5644407540619706, "grad_norm": 0.9716648591857231, "learning_rate": 2.6622991077586207e-06, "loss": 0.0255, "step": 135275 }, { "epoch": 0.5644616167769609, "grad_norm": 1.101491783722942, "learning_rate": 2.662249907148986e-06, "loss": 0.024, "step": 135280 }, { "epoch": 0.5644824794919512, "grad_norm": 1.1357994297390264, "learning_rate": 2.662200709267006e-06, "loss": 0.0284, "step": 135285 }, { "epoch": 0.5645033422069414, "grad_norm": 0.6643281565313797, "learning_rate": 2.662151514112428e-06, "loss": 0.0178, "step": 135290 }, { "epoch": 0.5645242049219317, "grad_norm": 0.8792373401328948, "learning_rate": 2.6621023216850008e-06, "loss": 0.0183, "step": 135295 }, { "epoch": 0.564545067636922, "grad_norm": 1.0200882934071878, "learning_rate": 2.6620531319844715e-06, "loss": 0.0345, "step": 135300 }, { "epoch": 0.5645659303519123, "grad_norm": 0.9768020764420204, "learning_rate": 2.6620039450105882e-06, "loss": 0.0325, "step": 135305 }, { "epoch": 0.5645867930669025, "grad_norm": 0.818888554917411, "learning_rate": 2.6619547607631003e-06, "loss": 0.0225, "step": 135310 }, { "epoch": 0.5646076557818929, "grad_norm": 0.7961786458910083, "learning_rate": 2.661905579241755e-06, "loss": 0.0266, "step": 135315 }, { "epoch": 0.5646285184968831, "grad_norm": 1.1406168132020704, "learning_rate": 2.6618564004463e-06, "loss": 0.0268, "step": 135320 }, { "epoch": 0.5646493812118734, "grad_norm": 0.755362339919871, "learning_rate": 2.6618072243764837e-06, "loss": 0.0199, "step": 135325 }, { "epoch": 0.5646702439268637, "grad_norm": 0.4596971644289796, "learning_rate": 2.661758051032055e-06, "loss": 0.0215, "step": 135330 }, { "epoch": 0.564691106641854, "grad_norm": 0.6987251581079694, "learning_rate": 2.661708880412762e-06, "loss": 0.03, "step": 135335 }, { "epoch": 0.5647119693568442, "grad_norm": 0.730826013771198, "learning_rate": 2.6616597125183522e-06, "loss": 0.03, "step": 135340 }, { "epoch": 0.5647328320718344, "grad_norm": 1.2389893804133434, "learning_rate": 2.6616105473485755e-06, "loss": 0.0295, "step": 135345 }, { "epoch": 0.5647536947868248, "grad_norm": 0.4467068337142327, "learning_rate": 2.6615613849031787e-06, "loss": 0.0183, "step": 135350 }, { "epoch": 0.564774557501815, "grad_norm": 0.4792126864214623, "learning_rate": 2.661512225181911e-06, "loss": 0.0208, "step": 135355 }, { "epoch": 0.5647954202168053, "grad_norm": 0.4489644602883503, "learning_rate": 2.6614630681845203e-06, "loss": 0.0185, "step": 135360 }, { "epoch": 0.5648162829317956, "grad_norm": 0.6077551663162185, "learning_rate": 2.6614139139107557e-06, "loss": 0.0209, "step": 135365 }, { "epoch": 0.5648371456467859, "grad_norm": 1.2520331012322745, "learning_rate": 2.6613647623603654e-06, "loss": 0.0168, "step": 135370 }, { "epoch": 0.5648580083617761, "grad_norm": 0.45846003006463887, "learning_rate": 2.661315613533098e-06, "loss": 0.0178, "step": 135375 }, { "epoch": 0.5648788710767665, "grad_norm": 0.8560448009733171, "learning_rate": 2.661266467428702e-06, "loss": 0.0227, "step": 135380 }, { "epoch": 0.5648997337917567, "grad_norm": 0.574029423814162, "learning_rate": 2.6612173240469262e-06, "loss": 0.0203, "step": 135385 }, { "epoch": 0.564920596506747, "grad_norm": 0.6641079883312959, "learning_rate": 2.6611681833875188e-06, "loss": 0.0237, "step": 135390 }, { "epoch": 0.5649414592217373, "grad_norm": 0.5801927389499898, "learning_rate": 2.661119045450229e-06, "loss": 0.0294, "step": 135395 }, { "epoch": 0.5649623219367276, "grad_norm": 0.7530011922332213, "learning_rate": 2.661069910234804e-06, "loss": 0.0258, "step": 135400 }, { "epoch": 0.5649831846517178, "grad_norm": 0.6857668947454313, "learning_rate": 2.6610207777409953e-06, "loss": 0.0154, "step": 135405 }, { "epoch": 0.5650040473667081, "grad_norm": 0.6406995704244008, "learning_rate": 2.660971647968549e-06, "loss": 0.0185, "step": 135410 }, { "epoch": 0.5650249100816984, "grad_norm": 0.293796248630286, "learning_rate": 2.6609225209172153e-06, "loss": 0.0145, "step": 135415 }, { "epoch": 0.5650457727966887, "grad_norm": 0.9687274069844883, "learning_rate": 2.660873396586743e-06, "loss": 0.0217, "step": 135420 }, { "epoch": 0.5650666355116789, "grad_norm": 0.9639511133171121, "learning_rate": 2.66082427497688e-06, "loss": 0.0287, "step": 135425 }, { "epoch": 0.5650874982266693, "grad_norm": 1.4198105423606973, "learning_rate": 2.660775156087376e-06, "loss": 0.0225, "step": 135430 }, { "epoch": 0.5651083609416595, "grad_norm": 0.6563461244037533, "learning_rate": 2.6607260399179795e-06, "loss": 0.0385, "step": 135435 }, { "epoch": 0.5651292236566497, "grad_norm": 0.6071917002570105, "learning_rate": 2.6606769264684407e-06, "loss": 0.0167, "step": 135440 }, { "epoch": 0.5651500863716401, "grad_norm": 1.624765885134934, "learning_rate": 2.6606278157385066e-06, "loss": 0.0358, "step": 135445 }, { "epoch": 0.5651709490866303, "grad_norm": 1.5136859543818393, "learning_rate": 2.6605787077279278e-06, "loss": 0.0279, "step": 135450 }, { "epoch": 0.5651918118016206, "grad_norm": 0.9181873637380312, "learning_rate": 2.6605296024364526e-06, "loss": 0.0199, "step": 135455 }, { "epoch": 0.565212674516611, "grad_norm": 0.3305586908013401, "learning_rate": 2.66048049986383e-06, "loss": 0.0222, "step": 135460 }, { "epoch": 0.5652335372316012, "grad_norm": 0.7077322082581369, "learning_rate": 2.660431400009809e-06, "loss": 0.0215, "step": 135465 }, { "epoch": 0.5652543999465914, "grad_norm": 0.42091351980729924, "learning_rate": 2.660382302874139e-06, "loss": 0.015, "step": 135470 }, { "epoch": 0.5652752626615817, "grad_norm": 0.4255633227114162, "learning_rate": 2.66033320845657e-06, "loss": 0.0191, "step": 135475 }, { "epoch": 0.565296125376572, "grad_norm": 0.8957591885218503, "learning_rate": 2.660284116756851e-06, "loss": 0.0213, "step": 135480 }, { "epoch": 0.5653169880915623, "grad_norm": 0.6521668272680572, "learning_rate": 2.6602350277747294e-06, "loss": 0.0219, "step": 135485 }, { "epoch": 0.5653378508065525, "grad_norm": 0.5076815688980477, "learning_rate": 2.6601859415099567e-06, "loss": 0.0217, "step": 135490 }, { "epoch": 0.5653587135215429, "grad_norm": 0.7108045322130517, "learning_rate": 2.660136857962281e-06, "loss": 0.0259, "step": 135495 }, { "epoch": 0.5653795762365331, "grad_norm": 1.0673262009321915, "learning_rate": 2.660087777131452e-06, "loss": 0.023, "step": 135500 }, { "epoch": 0.5654004389515234, "grad_norm": 0.4160238422734667, "learning_rate": 2.6600386990172187e-06, "loss": 0.0201, "step": 135505 }, { "epoch": 0.5654213016665137, "grad_norm": 1.0386425110817283, "learning_rate": 2.659989623619331e-06, "loss": 0.025, "step": 135510 }, { "epoch": 0.565442164381504, "grad_norm": 1.3759592601341228, "learning_rate": 2.6599405509375385e-06, "loss": 0.0223, "step": 135515 }, { "epoch": 0.5654630270964942, "grad_norm": 0.8057218983940515, "learning_rate": 2.6598914809715897e-06, "loss": 0.0276, "step": 135520 }, { "epoch": 0.5654838898114845, "grad_norm": 0.7513157853368421, "learning_rate": 2.6598424137212354e-06, "loss": 0.0277, "step": 135525 }, { "epoch": 0.5655047525264748, "grad_norm": 0.6739583007852809, "learning_rate": 2.6597933491862243e-06, "loss": 0.0216, "step": 135530 }, { "epoch": 0.565525615241465, "grad_norm": 1.4232319012537045, "learning_rate": 2.659744287366306e-06, "loss": 0.029, "step": 135535 }, { "epoch": 0.5655464779564553, "grad_norm": 0.7649988632215738, "learning_rate": 2.6596952282612302e-06, "loss": 0.0243, "step": 135540 }, { "epoch": 0.5655673406714457, "grad_norm": 1.1047464366956903, "learning_rate": 2.6596461718707463e-06, "loss": 0.0376, "step": 135545 }, { "epoch": 0.5655882033864359, "grad_norm": 0.7474617795958266, "learning_rate": 2.6595971181946044e-06, "loss": 0.0203, "step": 135550 }, { "epoch": 0.5656090661014261, "grad_norm": 0.7594972941738869, "learning_rate": 2.6595480672325547e-06, "loss": 0.0252, "step": 135555 }, { "epoch": 0.5656299288164165, "grad_norm": 0.33417579341967324, "learning_rate": 2.6594990189843456e-06, "loss": 0.0164, "step": 135560 }, { "epoch": 0.5656507915314067, "grad_norm": 0.7794533964563066, "learning_rate": 2.6594499734497277e-06, "loss": 0.0307, "step": 135565 }, { "epoch": 0.565671654246397, "grad_norm": 0.593002993436981, "learning_rate": 2.6594009306284507e-06, "loss": 0.0251, "step": 135570 }, { "epoch": 0.5656925169613873, "grad_norm": 0.5715130869750933, "learning_rate": 2.6593518905202642e-06, "loss": 0.025, "step": 135575 }, { "epoch": 0.5657133796763776, "grad_norm": 0.736774863880462, "learning_rate": 2.659302853124918e-06, "loss": 0.0293, "step": 135580 }, { "epoch": 0.5657342423913678, "grad_norm": 0.49064848465307664, "learning_rate": 2.6592538184421623e-06, "loss": 0.0284, "step": 135585 }, { "epoch": 0.5657551051063581, "grad_norm": 0.7575971198566022, "learning_rate": 2.6592047864717475e-06, "loss": 0.0262, "step": 135590 }, { "epoch": 0.5657759678213484, "grad_norm": 0.7034925032549171, "learning_rate": 2.6591557572134225e-06, "loss": 0.0247, "step": 135595 }, { "epoch": 0.5657968305363387, "grad_norm": 0.6030456812472634, "learning_rate": 2.659106730666938e-06, "loss": 0.0236, "step": 135600 }, { "epoch": 0.5658176932513289, "grad_norm": 0.6707102043221957, "learning_rate": 2.659057706832043e-06, "loss": 0.0296, "step": 135605 }, { "epoch": 0.5658385559663193, "grad_norm": 0.5619390370363821, "learning_rate": 2.659008685708489e-06, "loss": 0.0267, "step": 135610 }, { "epoch": 0.5658594186813095, "grad_norm": 1.2832581824961815, "learning_rate": 2.6589596672960255e-06, "loss": 0.027, "step": 135615 }, { "epoch": 0.5658802813962998, "grad_norm": 1.013896219165143, "learning_rate": 2.658910651594402e-06, "loss": 0.0302, "step": 135620 }, { "epoch": 0.5659011441112901, "grad_norm": 0.714591156475009, "learning_rate": 2.65886163860337e-06, "loss": 0.0193, "step": 135625 }, { "epoch": 0.5659220068262804, "grad_norm": 0.45954127940325123, "learning_rate": 2.658812628322678e-06, "loss": 0.026, "step": 135630 }, { "epoch": 0.5659428695412706, "grad_norm": 0.5248912195584937, "learning_rate": 2.658763620752078e-06, "loss": 0.0277, "step": 135635 }, { "epoch": 0.565963732256261, "grad_norm": 0.30953807290269914, "learning_rate": 2.6587146158913186e-06, "loss": 0.0238, "step": 135640 }, { "epoch": 0.5659845949712512, "grad_norm": 1.0056566372206452, "learning_rate": 2.6586656137401507e-06, "loss": 0.0383, "step": 135645 }, { "epoch": 0.5660054576862414, "grad_norm": 0.943733232194926, "learning_rate": 2.6586166142983248e-06, "loss": 0.0311, "step": 135650 }, { "epoch": 0.5660263204012317, "grad_norm": 1.4009068911215783, "learning_rate": 2.658567617565592e-06, "loss": 0.0277, "step": 135655 }, { "epoch": 0.566047183116222, "grad_norm": 0.46800303366638063, "learning_rate": 2.658518623541701e-06, "loss": 0.0198, "step": 135660 }, { "epoch": 0.5660680458312123, "grad_norm": 0.7324542750976458, "learning_rate": 2.6584696322264026e-06, "loss": 0.026, "step": 135665 }, { "epoch": 0.5660889085462025, "grad_norm": 0.5193452429908323, "learning_rate": 2.6584206436194486e-06, "loss": 0.0204, "step": 135670 }, { "epoch": 0.5661097712611929, "grad_norm": 0.5410077765973746, "learning_rate": 2.658371657720588e-06, "loss": 0.0209, "step": 135675 }, { "epoch": 0.5661306339761831, "grad_norm": 0.25722347613463753, "learning_rate": 2.658322674529572e-06, "loss": 0.0257, "step": 135680 }, { "epoch": 0.5661514966911734, "grad_norm": 0.636895169218094, "learning_rate": 2.658273694046151e-06, "loss": 0.0273, "step": 135685 }, { "epoch": 0.5661723594061637, "grad_norm": 0.3354862357784935, "learning_rate": 2.6582247162700757e-06, "loss": 0.0156, "step": 135690 }, { "epoch": 0.566193222121154, "grad_norm": 0.7657857841495628, "learning_rate": 2.658175741201097e-06, "loss": 0.0266, "step": 135695 }, { "epoch": 0.5662140848361442, "grad_norm": 0.7785737775509294, "learning_rate": 2.658126768838964e-06, "loss": 0.0253, "step": 135700 }, { "epoch": 0.5662349475511345, "grad_norm": 0.448389525740158, "learning_rate": 2.658077799183429e-06, "loss": 0.0204, "step": 135705 }, { "epoch": 0.5662558102661248, "grad_norm": 0.5696304785285448, "learning_rate": 2.6580288322342425e-06, "loss": 0.0252, "step": 135710 }, { "epoch": 0.5662766729811151, "grad_norm": 0.696395646962671, "learning_rate": 2.657979867991154e-06, "loss": 0.0225, "step": 135715 }, { "epoch": 0.5662975356961053, "grad_norm": 0.8093922943418915, "learning_rate": 2.6579309064539153e-06, "loss": 0.0236, "step": 135720 }, { "epoch": 0.5663183984110957, "grad_norm": 0.6093892713832709, "learning_rate": 2.6578819476222773e-06, "loss": 0.0231, "step": 135725 }, { "epoch": 0.5663392611260859, "grad_norm": 0.5203292901146986, "learning_rate": 2.6578329914959904e-06, "loss": 0.0261, "step": 135730 }, { "epoch": 0.5663601238410761, "grad_norm": 0.7289815493797261, "learning_rate": 2.657784038074806e-06, "loss": 0.023, "step": 135735 }, { "epoch": 0.5663809865560665, "grad_norm": 0.8312509821122566, "learning_rate": 2.6577350873584744e-06, "loss": 0.026, "step": 135740 }, { "epoch": 0.5664018492710567, "grad_norm": 0.5802918363938678, "learning_rate": 2.6576861393467467e-06, "loss": 0.0291, "step": 135745 }, { "epoch": 0.566422711986047, "grad_norm": 0.7573041863958297, "learning_rate": 2.6576371940393737e-06, "loss": 0.0272, "step": 135750 }, { "epoch": 0.5664435747010373, "grad_norm": 0.6911720337468445, "learning_rate": 2.657588251436107e-06, "loss": 0.0187, "step": 135755 }, { "epoch": 0.5664644374160276, "grad_norm": 0.5789252301635534, "learning_rate": 2.6575393115366965e-06, "loss": 0.016, "step": 135760 }, { "epoch": 0.5664853001310178, "grad_norm": 0.6694914807454909, "learning_rate": 2.6574903743408943e-06, "loss": 0.024, "step": 135765 }, { "epoch": 0.5665061628460081, "grad_norm": 0.8600344947508277, "learning_rate": 2.657441439848451e-06, "loss": 0.0249, "step": 135770 }, { "epoch": 0.5665270255609984, "grad_norm": 0.8698653287366455, "learning_rate": 2.657392508059118e-06, "loss": 0.0265, "step": 135775 }, { "epoch": 0.5665478882759887, "grad_norm": 0.6676874239216164, "learning_rate": 2.657343578972646e-06, "loss": 0.0242, "step": 135780 }, { "epoch": 0.5665687509909789, "grad_norm": 0.8191715599087708, "learning_rate": 2.657294652588786e-06, "loss": 0.0253, "step": 135785 }, { "epoch": 0.5665896137059693, "grad_norm": 0.7157325420839734, "learning_rate": 2.6572457289072905e-06, "loss": 0.0218, "step": 135790 }, { "epoch": 0.5666104764209595, "grad_norm": 1.0660737244235603, "learning_rate": 2.65719680792791e-06, "loss": 0.0243, "step": 135795 }, { "epoch": 0.5666313391359498, "grad_norm": 0.4681998332592934, "learning_rate": 2.6571478896503946e-06, "loss": 0.0228, "step": 135800 }, { "epoch": 0.5666522018509401, "grad_norm": 0.3889187427724479, "learning_rate": 2.657098974074497e-06, "loss": 0.0259, "step": 135805 }, { "epoch": 0.5666730645659304, "grad_norm": 1.0074492186352535, "learning_rate": 2.6570500611999688e-06, "loss": 0.025, "step": 135810 }, { "epoch": 0.5666939272809206, "grad_norm": 1.1871810925364286, "learning_rate": 2.65700115102656e-06, "loss": 0.0261, "step": 135815 }, { "epoch": 0.566714789995911, "grad_norm": 0.9934469214842897, "learning_rate": 2.656952243554024e-06, "loss": 0.0218, "step": 135820 }, { "epoch": 0.5667356527109012, "grad_norm": 0.8017480259142349, "learning_rate": 2.6569033387821104e-06, "loss": 0.0204, "step": 135825 }, { "epoch": 0.5667565154258914, "grad_norm": 0.9081492743172951, "learning_rate": 2.6568544367105707e-06, "loss": 0.0272, "step": 135830 }, { "epoch": 0.5667773781408817, "grad_norm": 0.7212501245673336, "learning_rate": 2.6568055373391573e-06, "loss": 0.0156, "step": 135835 }, { "epoch": 0.566798240855872, "grad_norm": 0.9986571627213352, "learning_rate": 2.656756640667622e-06, "loss": 0.0247, "step": 135840 }, { "epoch": 0.5668191035708623, "grad_norm": 1.029272463275282, "learning_rate": 2.6567077466957146e-06, "loss": 0.0235, "step": 135845 }, { "epoch": 0.5668399662858525, "grad_norm": 0.42156227232913457, "learning_rate": 2.6566588554231894e-06, "loss": 0.0202, "step": 135850 }, { "epoch": 0.5668608290008429, "grad_norm": 1.3127801420046796, "learning_rate": 2.656609966849795e-06, "loss": 0.0244, "step": 135855 }, { "epoch": 0.5668816917158331, "grad_norm": 0.5765086867915551, "learning_rate": 2.656561080975285e-06, "loss": 0.0222, "step": 135860 }, { "epoch": 0.5669025544308234, "grad_norm": 0.5352809453153888, "learning_rate": 2.6565121977994115e-06, "loss": 0.0224, "step": 135865 }, { "epoch": 0.5669234171458137, "grad_norm": 1.3277932600820965, "learning_rate": 2.6564633173219247e-06, "loss": 0.0221, "step": 135870 }, { "epoch": 0.566944279860804, "grad_norm": 0.7792808611077041, "learning_rate": 2.6564144395425768e-06, "loss": 0.022, "step": 135875 }, { "epoch": 0.5669651425757942, "grad_norm": 0.47952272800243456, "learning_rate": 2.6563655644611198e-06, "loss": 0.0214, "step": 135880 }, { "epoch": 0.5669860052907845, "grad_norm": 0.8175861668191524, "learning_rate": 2.656316692077306e-06, "loss": 0.0231, "step": 135885 }, { "epoch": 0.5670068680057748, "grad_norm": 0.8985390570616509, "learning_rate": 2.656267822390886e-06, "loss": 0.0272, "step": 135890 }, { "epoch": 0.5670277307207651, "grad_norm": 0.9768597344715351, "learning_rate": 2.6562189554016134e-06, "loss": 0.0223, "step": 135895 }, { "epoch": 0.5670485934357553, "grad_norm": 0.7608764794577038, "learning_rate": 2.6561700911092386e-06, "loss": 0.0315, "step": 135900 }, { "epoch": 0.5670694561507457, "grad_norm": 0.9497717006940526, "learning_rate": 2.6561212295135143e-06, "loss": 0.0204, "step": 135905 }, { "epoch": 0.5670903188657359, "grad_norm": 0.7097787494740116, "learning_rate": 2.656072370614192e-06, "loss": 0.0248, "step": 135910 }, { "epoch": 0.5671111815807262, "grad_norm": 0.6306881864889283, "learning_rate": 2.656023514411024e-06, "loss": 0.018, "step": 135915 }, { "epoch": 0.5671320442957165, "grad_norm": 1.0214049923691482, "learning_rate": 2.655974660903762e-06, "loss": 0.0221, "step": 135920 }, { "epoch": 0.5671529070107068, "grad_norm": 0.8657362248606266, "learning_rate": 2.65592581009216e-06, "loss": 0.0258, "step": 135925 }, { "epoch": 0.567173769725697, "grad_norm": 0.44767953819657963, "learning_rate": 2.6558769619759666e-06, "loss": 0.0292, "step": 135930 }, { "epoch": 0.5671946324406874, "grad_norm": 0.5087681115022137, "learning_rate": 2.655828116554937e-06, "loss": 0.0205, "step": 135935 }, { "epoch": 0.5672154951556776, "grad_norm": 0.5935251562810832, "learning_rate": 2.655779273828822e-06, "loss": 0.0243, "step": 135940 }, { "epoch": 0.5672363578706678, "grad_norm": 1.082162836015377, "learning_rate": 2.655730433797374e-06, "loss": 0.0249, "step": 135945 }, { "epoch": 0.5672572205856581, "grad_norm": 0.6875495188041627, "learning_rate": 2.655681596460345e-06, "loss": 0.0285, "step": 135950 }, { "epoch": 0.5672780833006484, "grad_norm": 0.8791683796611899, "learning_rate": 2.6556327618174878e-06, "loss": 0.0279, "step": 135955 }, { "epoch": 0.5672989460156387, "grad_norm": 0.7421845675622913, "learning_rate": 2.655583929868555e-06, "loss": 0.028, "step": 135960 }, { "epoch": 0.5673198087306289, "grad_norm": 1.0505730135602458, "learning_rate": 2.6555351006132973e-06, "loss": 0.0227, "step": 135965 }, { "epoch": 0.5673406714456193, "grad_norm": 0.7308890591015689, "learning_rate": 2.6554862740514685e-06, "loss": 0.0178, "step": 135970 }, { "epoch": 0.5673615341606095, "grad_norm": 0.3767611577812626, "learning_rate": 2.6554374501828208e-06, "loss": 0.027, "step": 135975 }, { "epoch": 0.5673823968755998, "grad_norm": 0.7559006956466032, "learning_rate": 2.6553886290071064e-06, "loss": 0.0234, "step": 135980 }, { "epoch": 0.5674032595905901, "grad_norm": 0.4037767359123705, "learning_rate": 2.6553398105240775e-06, "loss": 0.0269, "step": 135985 }, { "epoch": 0.5674241223055804, "grad_norm": 0.26242035287102816, "learning_rate": 2.6552909947334864e-06, "loss": 0.0213, "step": 135990 }, { "epoch": 0.5674449850205706, "grad_norm": 0.6191072918540833, "learning_rate": 2.655242181635087e-06, "loss": 0.0253, "step": 135995 }, { "epoch": 0.567465847735561, "grad_norm": 0.8941210691197782, "learning_rate": 2.655193371228631e-06, "loss": 0.0326, "step": 136000 }, { "epoch": 0.5674867104505512, "grad_norm": 0.7370956314622987, "learning_rate": 2.65514456351387e-06, "loss": 0.0216, "step": 136005 }, { "epoch": 0.5675075731655415, "grad_norm": 0.40679890730068513, "learning_rate": 2.6550957584905586e-06, "loss": 0.0155, "step": 136010 }, { "epoch": 0.5675284358805317, "grad_norm": 0.4356161040810532, "learning_rate": 2.655046956158448e-06, "loss": 0.0175, "step": 136015 }, { "epoch": 0.5675492985955221, "grad_norm": 0.5228730895834642, "learning_rate": 2.654998156517291e-06, "loss": 0.0233, "step": 136020 }, { "epoch": 0.5675701613105123, "grad_norm": 0.645963231736999, "learning_rate": 2.6549493595668406e-06, "loss": 0.022, "step": 136025 }, { "epoch": 0.5675910240255025, "grad_norm": 0.8086927683110868, "learning_rate": 2.65490056530685e-06, "loss": 0.0224, "step": 136030 }, { "epoch": 0.5676118867404929, "grad_norm": 0.40638905322629015, "learning_rate": 2.654851773737071e-06, "loss": 0.0204, "step": 136035 }, { "epoch": 0.5676327494554831, "grad_norm": 0.6375624954938294, "learning_rate": 2.6548029848572573e-06, "loss": 0.0325, "step": 136040 }, { "epoch": 0.5676536121704734, "grad_norm": 1.23029317787928, "learning_rate": 2.654754198667161e-06, "loss": 0.0295, "step": 136045 }, { "epoch": 0.5676744748854637, "grad_norm": 0.7098216652282852, "learning_rate": 2.654705415166535e-06, "loss": 0.0229, "step": 136050 }, { "epoch": 0.567695337600454, "grad_norm": 0.8908558406852746, "learning_rate": 2.654656634355133e-06, "loss": 0.0241, "step": 136055 }, { "epoch": 0.5677162003154442, "grad_norm": 0.5779854773317421, "learning_rate": 2.6546078562327076e-06, "loss": 0.0165, "step": 136060 }, { "epoch": 0.5677370630304345, "grad_norm": 0.7122649145165908, "learning_rate": 2.654559080799012e-06, "loss": 0.0315, "step": 136065 }, { "epoch": 0.5677579257454248, "grad_norm": 0.8067535288927503, "learning_rate": 2.6545103080537983e-06, "loss": 0.0228, "step": 136070 }, { "epoch": 0.5677787884604151, "grad_norm": 0.8764530467704463, "learning_rate": 2.6544615379968196e-06, "loss": 0.0235, "step": 136075 }, { "epoch": 0.5677996511754053, "grad_norm": 0.9988249323553717, "learning_rate": 2.65441277062783e-06, "loss": 0.0189, "step": 136080 }, { "epoch": 0.5678205138903957, "grad_norm": 1.219572059825461, "learning_rate": 2.654364005946582e-06, "loss": 0.0247, "step": 136085 }, { "epoch": 0.5678413766053859, "grad_norm": 1.0982417878619544, "learning_rate": 2.6543152439528287e-06, "loss": 0.0283, "step": 136090 }, { "epoch": 0.5678622393203762, "grad_norm": 0.7466955331886617, "learning_rate": 2.6542664846463227e-06, "loss": 0.0251, "step": 136095 }, { "epoch": 0.5678831020353665, "grad_norm": 0.4378034864141301, "learning_rate": 2.6542177280268178e-06, "loss": 0.0237, "step": 136100 }, { "epoch": 0.5679039647503568, "grad_norm": 0.5275839485682863, "learning_rate": 2.654168974094068e-06, "loss": 0.0289, "step": 136105 }, { "epoch": 0.567924827465347, "grad_norm": 0.8552416090838215, "learning_rate": 2.6541202228478245e-06, "loss": 0.0306, "step": 136110 }, { "epoch": 0.5679456901803374, "grad_norm": 0.9801212538172326, "learning_rate": 2.654071474287843e-06, "loss": 0.0233, "step": 136115 }, { "epoch": 0.5679665528953276, "grad_norm": 0.6940017059057375, "learning_rate": 2.6540227284138753e-06, "loss": 0.0214, "step": 136120 }, { "epoch": 0.5679874156103178, "grad_norm": 0.62747303067163, "learning_rate": 2.6539739852256753e-06, "loss": 0.0301, "step": 136125 }, { "epoch": 0.5680082783253081, "grad_norm": 0.8942266808699246, "learning_rate": 2.6539252447229958e-06, "loss": 0.0282, "step": 136130 }, { "epoch": 0.5680291410402984, "grad_norm": 0.8474055347290966, "learning_rate": 2.6538765069055907e-06, "loss": 0.0309, "step": 136135 }, { "epoch": 0.5680500037552887, "grad_norm": 0.7066483709155892, "learning_rate": 2.653827771773213e-06, "loss": 0.0297, "step": 136140 }, { "epoch": 0.5680708664702789, "grad_norm": 1.095079182526263, "learning_rate": 2.653779039325617e-06, "loss": 0.0159, "step": 136145 }, { "epoch": 0.5680917291852693, "grad_norm": 0.8288894345634477, "learning_rate": 2.653730309562555e-06, "loss": 0.0211, "step": 136150 }, { "epoch": 0.5681125919002595, "grad_norm": 0.7150040636459646, "learning_rate": 2.653681582483781e-06, "loss": 0.0333, "step": 136155 }, { "epoch": 0.5681334546152498, "grad_norm": 0.63040617702573, "learning_rate": 2.6536328580890497e-06, "loss": 0.0225, "step": 136160 }, { "epoch": 0.5681543173302401, "grad_norm": 0.2980550605244092, "learning_rate": 2.6535841363781133e-06, "loss": 0.0244, "step": 136165 }, { "epoch": 0.5681751800452304, "grad_norm": 0.7163527027815487, "learning_rate": 2.6535354173507264e-06, "loss": 0.0281, "step": 136170 }, { "epoch": 0.5681960427602206, "grad_norm": 0.5578887192280154, "learning_rate": 2.6534867010066416e-06, "loss": 0.0266, "step": 136175 }, { "epoch": 0.568216905475211, "grad_norm": 0.6186453063560603, "learning_rate": 2.653437987345614e-06, "loss": 0.0249, "step": 136180 }, { "epoch": 0.5682377681902012, "grad_norm": 0.7951733394903515, "learning_rate": 2.653389276367396e-06, "loss": 0.0239, "step": 136185 }, { "epoch": 0.5682586309051915, "grad_norm": 0.47118888687008387, "learning_rate": 2.653340568071742e-06, "loss": 0.0206, "step": 136190 }, { "epoch": 0.5682794936201817, "grad_norm": 0.6028945519817319, "learning_rate": 2.653291862458405e-06, "loss": 0.0186, "step": 136195 }, { "epoch": 0.5683003563351721, "grad_norm": 0.604865581240698, "learning_rate": 2.65324315952714e-06, "loss": 0.0177, "step": 136200 }, { "epoch": 0.5683212190501623, "grad_norm": 1.3342349308094554, "learning_rate": 2.6531944592777004e-06, "loss": 0.0253, "step": 136205 }, { "epoch": 0.5683420817651526, "grad_norm": 0.4609094597106009, "learning_rate": 2.6531457617098394e-06, "loss": 0.0188, "step": 136210 }, { "epoch": 0.5683629444801429, "grad_norm": 0.460270347364287, "learning_rate": 2.6530970668233124e-06, "loss": 0.0141, "step": 136215 }, { "epoch": 0.5683838071951332, "grad_norm": 0.4979279310927441, "learning_rate": 2.653048374617872e-06, "loss": 0.0276, "step": 136220 }, { "epoch": 0.5684046699101234, "grad_norm": 0.491255729152227, "learning_rate": 2.652999685093273e-06, "loss": 0.0352, "step": 136225 }, { "epoch": 0.5684255326251137, "grad_norm": 0.643732886679513, "learning_rate": 2.6529509982492684e-06, "loss": 0.0186, "step": 136230 }, { "epoch": 0.568446395340104, "grad_norm": 0.49326228231783714, "learning_rate": 2.6529023140856136e-06, "loss": 0.0174, "step": 136235 }, { "epoch": 0.5684672580550942, "grad_norm": 0.8562939454303246, "learning_rate": 2.6528536326020617e-06, "loss": 0.0337, "step": 136240 }, { "epoch": 0.5684881207700845, "grad_norm": 0.49009849345459255, "learning_rate": 2.652804953798367e-06, "loss": 0.0253, "step": 136245 }, { "epoch": 0.5685089834850748, "grad_norm": 0.6433382140288986, "learning_rate": 2.652756277674284e-06, "loss": 0.0224, "step": 136250 }, { "epoch": 0.5685298462000651, "grad_norm": 0.7374209531856791, "learning_rate": 2.6527076042295663e-06, "loss": 0.0187, "step": 136255 }, { "epoch": 0.5685507089150553, "grad_norm": 0.8210319206031518, "learning_rate": 2.6526589334639687e-06, "loss": 0.0283, "step": 136260 }, { "epoch": 0.5685715716300457, "grad_norm": 0.6960861223537703, "learning_rate": 2.6526102653772444e-06, "loss": 0.0697, "step": 136265 }, { "epoch": 0.5685924343450359, "grad_norm": 0.5088291157368539, "learning_rate": 2.652561599969149e-06, "loss": 0.0266, "step": 136270 }, { "epoch": 0.5686132970600262, "grad_norm": 0.5755646702949306, "learning_rate": 2.6525129372394366e-06, "loss": 0.0227, "step": 136275 }, { "epoch": 0.5686341597750165, "grad_norm": 0.35901658881687065, "learning_rate": 2.65246427718786e-06, "loss": 0.0199, "step": 136280 }, { "epoch": 0.5686550224900068, "grad_norm": 0.5845918792119121, "learning_rate": 2.652415619814176e-06, "loss": 0.0187, "step": 136285 }, { "epoch": 0.568675885204997, "grad_norm": 0.9024010964136464, "learning_rate": 2.6523669651181367e-06, "loss": 0.0302, "step": 136290 }, { "epoch": 0.5686967479199874, "grad_norm": 0.6473651676780343, "learning_rate": 2.652318313099498e-06, "loss": 0.0206, "step": 136295 }, { "epoch": 0.5687176106349776, "grad_norm": 0.8805597674295429, "learning_rate": 2.652269663758013e-06, "loss": 0.0335, "step": 136300 }, { "epoch": 0.5687384733499679, "grad_norm": 1.079105789869282, "learning_rate": 2.652221017093438e-06, "loss": 0.0252, "step": 136305 }, { "epoch": 0.5687593360649581, "grad_norm": 0.545084937621417, "learning_rate": 2.652172373105526e-06, "loss": 0.0164, "step": 136310 }, { "epoch": 0.5687801987799485, "grad_norm": 0.8613475717614005, "learning_rate": 2.652123731794032e-06, "loss": 0.0219, "step": 136315 }, { "epoch": 0.5688010614949387, "grad_norm": 1.002516654653832, "learning_rate": 2.6520750931587113e-06, "loss": 0.0296, "step": 136320 }, { "epoch": 0.5688219242099289, "grad_norm": 0.5663529167531476, "learning_rate": 2.6520264571993178e-06, "loss": 0.0185, "step": 136325 }, { "epoch": 0.5688427869249193, "grad_norm": 0.4883120689509902, "learning_rate": 2.6519778239156057e-06, "loss": 0.0306, "step": 136330 }, { "epoch": 0.5688636496399095, "grad_norm": 0.8732764475140781, "learning_rate": 2.6519291933073297e-06, "loss": 0.0232, "step": 136335 }, { "epoch": 0.5688845123548998, "grad_norm": 1.6090292324692685, "learning_rate": 2.6518805653742464e-06, "loss": 0.024, "step": 136340 }, { "epoch": 0.5689053750698901, "grad_norm": 0.5306085474897589, "learning_rate": 2.651831940116108e-06, "loss": 0.0228, "step": 136345 }, { "epoch": 0.5689262377848804, "grad_norm": 0.7994723927228753, "learning_rate": 2.651783317532671e-06, "loss": 0.0272, "step": 136350 }, { "epoch": 0.5689471004998706, "grad_norm": 0.6342579489502812, "learning_rate": 2.6517346976236898e-06, "loss": 0.0149, "step": 136355 }, { "epoch": 0.568967963214861, "grad_norm": 0.6426418636354096, "learning_rate": 2.6516860803889184e-06, "loss": 0.0194, "step": 136360 }, { "epoch": 0.5689888259298512, "grad_norm": 0.466707793970861, "learning_rate": 2.6516374658281124e-06, "loss": 0.0265, "step": 136365 }, { "epoch": 0.5690096886448415, "grad_norm": 0.39815777966462634, "learning_rate": 2.6515888539410268e-06, "loss": 0.0268, "step": 136370 }, { "epoch": 0.5690305513598317, "grad_norm": 0.9342496652313119, "learning_rate": 2.6515402447274165e-06, "loss": 0.0262, "step": 136375 }, { "epoch": 0.5690514140748221, "grad_norm": 0.7085774352646774, "learning_rate": 2.6514916381870354e-06, "loss": 0.0293, "step": 136380 }, { "epoch": 0.5690722767898123, "grad_norm": 0.7027200482840426, "learning_rate": 2.65144303431964e-06, "loss": 0.0404, "step": 136385 }, { "epoch": 0.5690931395048026, "grad_norm": 0.8094162838506049, "learning_rate": 2.6513944331249854e-06, "loss": 0.0308, "step": 136390 }, { "epoch": 0.5691140022197929, "grad_norm": 0.9559811814169257, "learning_rate": 2.651345834602825e-06, "loss": 0.0223, "step": 136395 }, { "epoch": 0.5691348649347832, "grad_norm": 0.4834360733837716, "learning_rate": 2.6512972387529147e-06, "loss": 0.0225, "step": 136400 }, { "epoch": 0.5691557276497734, "grad_norm": 0.7694769546091323, "learning_rate": 2.6512486455750103e-06, "loss": 0.027, "step": 136405 }, { "epoch": 0.5691765903647638, "grad_norm": 0.6528279667258248, "learning_rate": 2.651200055068866e-06, "loss": 0.0219, "step": 136410 }, { "epoch": 0.569197453079754, "grad_norm": 0.3624271270765134, "learning_rate": 2.6511514672342376e-06, "loss": 0.0174, "step": 136415 }, { "epoch": 0.5692183157947442, "grad_norm": 0.6566807345664016, "learning_rate": 2.6511028820708797e-06, "loss": 0.0268, "step": 136420 }, { "epoch": 0.5692391785097345, "grad_norm": 0.7343123341436368, "learning_rate": 2.6510542995785475e-06, "loss": 0.0214, "step": 136425 }, { "epoch": 0.5692600412247248, "grad_norm": 1.127247580461877, "learning_rate": 2.651005719756998e-06, "loss": 0.0293, "step": 136430 }, { "epoch": 0.5692809039397151, "grad_norm": 0.7385867165645357, "learning_rate": 2.6509571426059838e-06, "loss": 0.0204, "step": 136435 }, { "epoch": 0.5693017666547053, "grad_norm": 0.8006360027755425, "learning_rate": 2.6509085681252623e-06, "loss": 0.0248, "step": 136440 }, { "epoch": 0.5693226293696957, "grad_norm": 0.378245979939753, "learning_rate": 2.650859996314588e-06, "loss": 0.0217, "step": 136445 }, { "epoch": 0.5693434920846859, "grad_norm": 0.7187163604543676, "learning_rate": 2.650811427173717e-06, "loss": 0.0213, "step": 136450 }, { "epoch": 0.5693643547996762, "grad_norm": 0.8411938815300725, "learning_rate": 2.6507628607024034e-06, "loss": 0.0203, "step": 136455 }, { "epoch": 0.5693852175146665, "grad_norm": 3.029194272615995, "learning_rate": 2.6507142969004033e-06, "loss": 0.0244, "step": 136460 }, { "epoch": 0.5694060802296568, "grad_norm": 0.803444963637484, "learning_rate": 2.6506657357674735e-06, "loss": 0.0208, "step": 136465 }, { "epoch": 0.569426942944647, "grad_norm": 0.5109732276035638, "learning_rate": 2.650617177303368e-06, "loss": 0.0209, "step": 136470 }, { "epoch": 0.5694478056596374, "grad_norm": 1.1074946113035893, "learning_rate": 2.650568621507842e-06, "loss": 0.0291, "step": 136475 }, { "epoch": 0.5694686683746276, "grad_norm": 0.6169433060978511, "learning_rate": 2.650520068380652e-06, "loss": 0.028, "step": 136480 }, { "epoch": 0.5694895310896179, "grad_norm": 0.8016315462966608, "learning_rate": 2.650471517921554e-06, "loss": 0.0304, "step": 136485 }, { "epoch": 0.5695103938046081, "grad_norm": 0.6756186360676831, "learning_rate": 2.6504229701303026e-06, "loss": 0.0225, "step": 136490 }, { "epoch": 0.5695312565195985, "grad_norm": 1.0168622500350506, "learning_rate": 2.6503744250066542e-06, "loss": 0.0315, "step": 136495 }, { "epoch": 0.5695521192345887, "grad_norm": 0.4861194563204561, "learning_rate": 2.6503258825503634e-06, "loss": 0.0185, "step": 136500 }, { "epoch": 0.569572981949579, "grad_norm": 0.5524630785851506, "learning_rate": 2.650277342761188e-06, "loss": 0.026, "step": 136505 }, { "epoch": 0.5695938446645693, "grad_norm": 0.39086569827655204, "learning_rate": 2.650228805638882e-06, "loss": 0.0158, "step": 136510 }, { "epoch": 0.5696147073795595, "grad_norm": 0.8418883474359407, "learning_rate": 2.650180271183202e-06, "loss": 0.0226, "step": 136515 }, { "epoch": 0.5696355700945498, "grad_norm": 0.4755065926307262, "learning_rate": 2.6501317393939037e-06, "loss": 0.0194, "step": 136520 }, { "epoch": 0.5696564328095401, "grad_norm": 0.5783626130725634, "learning_rate": 2.6500832102707424e-06, "loss": 0.0254, "step": 136525 }, { "epoch": 0.5696772955245304, "grad_norm": 0.6184517523447713, "learning_rate": 2.6500346838134745e-06, "loss": 0.0388, "step": 136530 }, { "epoch": 0.5696981582395206, "grad_norm": 0.40540813017969407, "learning_rate": 2.649986160021856e-06, "loss": 0.0225, "step": 136535 }, { "epoch": 0.569719020954511, "grad_norm": 0.8545215974468652, "learning_rate": 2.649937638895643e-06, "loss": 0.0188, "step": 136540 }, { "epoch": 0.5697398836695012, "grad_norm": 2.3680712136972617, "learning_rate": 2.649889120434591e-06, "loss": 0.0345, "step": 136545 }, { "epoch": 0.5697607463844915, "grad_norm": 1.0661954594751673, "learning_rate": 2.6498406046384565e-06, "loss": 0.0369, "step": 136550 }, { "epoch": 0.5697816090994817, "grad_norm": 0.8391010029015158, "learning_rate": 2.649792091506995e-06, "loss": 0.0262, "step": 136555 }, { "epoch": 0.5698024718144721, "grad_norm": 0.5171150182266537, "learning_rate": 2.6497435810399637e-06, "loss": 0.0148, "step": 136560 }, { "epoch": 0.5698233345294623, "grad_norm": 0.5898227064744704, "learning_rate": 2.6496950732371167e-06, "loss": 0.0233, "step": 136565 }, { "epoch": 0.5698441972444526, "grad_norm": 0.9457062879328488, "learning_rate": 2.6496465680982126e-06, "loss": 0.0233, "step": 136570 }, { "epoch": 0.5698650599594429, "grad_norm": 0.6037875525627605, "learning_rate": 2.649598065623005e-06, "loss": 0.0304, "step": 136575 }, { "epoch": 0.5698859226744332, "grad_norm": 0.8113371430273797, "learning_rate": 2.6495495658112525e-06, "loss": 0.0293, "step": 136580 }, { "epoch": 0.5699067853894234, "grad_norm": 0.7879387803090268, "learning_rate": 2.6495010686627104e-06, "loss": 0.0244, "step": 136585 }, { "epoch": 0.5699276481044138, "grad_norm": 0.5865134689905309, "learning_rate": 2.6494525741771344e-06, "loss": 0.0229, "step": 136590 }, { "epoch": 0.569948510819404, "grad_norm": 0.7017861732276504, "learning_rate": 2.6494040823542815e-06, "loss": 0.0193, "step": 136595 }, { "epoch": 0.5699693735343943, "grad_norm": 0.5151611303383248, "learning_rate": 2.6493555931939076e-06, "loss": 0.0189, "step": 136600 }, { "epoch": 0.5699902362493845, "grad_norm": 0.6931004477365932, "learning_rate": 2.6493071066957693e-06, "loss": 0.0207, "step": 136605 }, { "epoch": 0.5700110989643749, "grad_norm": 1.010988705534844, "learning_rate": 2.6492586228596235e-06, "loss": 0.0329, "step": 136610 }, { "epoch": 0.5700319616793651, "grad_norm": 0.6039175664461574, "learning_rate": 2.6492101416852258e-06, "loss": 0.0217, "step": 136615 }, { "epoch": 0.5700528243943553, "grad_norm": 0.6318129316447023, "learning_rate": 2.6491616631723326e-06, "loss": 0.023, "step": 136620 }, { "epoch": 0.5700736871093457, "grad_norm": 0.9832644448934879, "learning_rate": 2.6491131873207013e-06, "loss": 0.0248, "step": 136625 }, { "epoch": 0.5700945498243359, "grad_norm": 0.8875270476356436, "learning_rate": 2.649064714130088e-06, "loss": 0.0253, "step": 136630 }, { "epoch": 0.5701154125393262, "grad_norm": 0.8497810642132866, "learning_rate": 2.6490162436002487e-06, "loss": 0.0267, "step": 136635 }, { "epoch": 0.5701362752543165, "grad_norm": 0.9519654657008613, "learning_rate": 2.648967775730941e-06, "loss": 0.021, "step": 136640 }, { "epoch": 0.5701571379693068, "grad_norm": 0.22068796444079383, "learning_rate": 2.6489193105219202e-06, "loss": 0.0201, "step": 136645 }, { "epoch": 0.570178000684297, "grad_norm": 0.4961281343576853, "learning_rate": 2.6488708479729435e-06, "loss": 0.023, "step": 136650 }, { "epoch": 0.5701988633992874, "grad_norm": 0.4114332004503985, "learning_rate": 2.648822388083769e-06, "loss": 0.023, "step": 136655 }, { "epoch": 0.5702197261142776, "grad_norm": 0.9664704847731713, "learning_rate": 2.648773930854151e-06, "loss": 0.0231, "step": 136660 }, { "epoch": 0.5702405888292679, "grad_norm": 0.8284301231449115, "learning_rate": 2.648725476283848e-06, "loss": 0.0201, "step": 136665 }, { "epoch": 0.5702614515442581, "grad_norm": 0.8554707909244196, "learning_rate": 2.6486770243726164e-06, "loss": 0.0178, "step": 136670 }, { "epoch": 0.5702823142592485, "grad_norm": 0.6319106629460653, "learning_rate": 2.648628575120212e-06, "loss": 0.0185, "step": 136675 }, { "epoch": 0.5703031769742387, "grad_norm": 0.5596282255345438, "learning_rate": 2.6485801285263936e-06, "loss": 0.0301, "step": 136680 }, { "epoch": 0.570324039689229, "grad_norm": 0.64306284177766, "learning_rate": 2.648531684590916e-06, "loss": 0.0234, "step": 136685 }, { "epoch": 0.5703449024042193, "grad_norm": 1.1758781850748752, "learning_rate": 2.6484832433135373e-06, "loss": 0.0203, "step": 136690 }, { "epoch": 0.5703657651192096, "grad_norm": 0.45147166629533947, "learning_rate": 2.648434804694014e-06, "loss": 0.0285, "step": 136695 }, { "epoch": 0.5703866278341998, "grad_norm": 0.3485998969363646, "learning_rate": 2.6483863687321033e-06, "loss": 0.0205, "step": 136700 }, { "epoch": 0.5704074905491902, "grad_norm": 0.5193809553768675, "learning_rate": 2.6483379354275617e-06, "loss": 0.0221, "step": 136705 }, { "epoch": 0.5704283532641804, "grad_norm": 0.4458892606041873, "learning_rate": 2.6482895047801467e-06, "loss": 0.0258, "step": 136710 }, { "epoch": 0.5704492159791706, "grad_norm": 0.8151112707592284, "learning_rate": 2.6482410767896154e-06, "loss": 0.0213, "step": 136715 }, { "epoch": 0.570470078694161, "grad_norm": 1.4155464564358002, "learning_rate": 2.648192651455725e-06, "loss": 0.0267, "step": 136720 }, { "epoch": 0.5704909414091512, "grad_norm": 1.135990731153379, "learning_rate": 2.6481442287782323e-06, "loss": 0.0336, "step": 136725 }, { "epoch": 0.5705118041241415, "grad_norm": 0.712377390470575, "learning_rate": 2.6480958087568944e-06, "loss": 0.0173, "step": 136730 }, { "epoch": 0.5705326668391317, "grad_norm": 0.8646394927913643, "learning_rate": 2.6480473913914685e-06, "loss": 0.0223, "step": 136735 }, { "epoch": 0.5705535295541221, "grad_norm": 0.5505245284996687, "learning_rate": 2.6479989766817116e-06, "loss": 0.031, "step": 136740 }, { "epoch": 0.5705743922691123, "grad_norm": 0.5404204283696518, "learning_rate": 2.6479505646273813e-06, "loss": 0.0219, "step": 136745 }, { "epoch": 0.5705952549841026, "grad_norm": 0.6974811908782451, "learning_rate": 2.647902155228235e-06, "loss": 0.0232, "step": 136750 }, { "epoch": 0.5706161176990929, "grad_norm": 0.8756176680254992, "learning_rate": 2.6478537484840294e-06, "loss": 0.0238, "step": 136755 }, { "epoch": 0.5706369804140832, "grad_norm": 0.5589391879766663, "learning_rate": 2.6478053443945223e-06, "loss": 0.0189, "step": 136760 }, { "epoch": 0.5706578431290734, "grad_norm": 1.323364589235454, "learning_rate": 2.6477569429594712e-06, "loss": 0.0263, "step": 136765 }, { "epoch": 0.5706787058440638, "grad_norm": 1.3295440755842531, "learning_rate": 2.647708544178634e-06, "loss": 0.0323, "step": 136770 }, { "epoch": 0.570699568559054, "grad_norm": 0.6928455163487043, "learning_rate": 2.647660148051766e-06, "loss": 0.0262, "step": 136775 }, { "epoch": 0.5707204312740443, "grad_norm": 1.0535456210436613, "learning_rate": 2.6476117545786268e-06, "loss": 0.0262, "step": 136780 }, { "epoch": 0.5707412939890345, "grad_norm": 0.2191587062848437, "learning_rate": 2.6475633637589733e-06, "loss": 0.0166, "step": 136785 }, { "epoch": 0.5707621567040249, "grad_norm": 0.9551540228252556, "learning_rate": 2.647514975592562e-06, "loss": 0.0228, "step": 136790 }, { "epoch": 0.5707830194190151, "grad_norm": 0.5693026811782697, "learning_rate": 2.6474665900791525e-06, "loss": 0.0233, "step": 136795 }, { "epoch": 0.5708038821340053, "grad_norm": 0.9363209236357224, "learning_rate": 2.6474182072185005e-06, "loss": 0.0162, "step": 136800 }, { "epoch": 0.5708247448489957, "grad_norm": 0.5824084846054369, "learning_rate": 2.6473698270103636e-06, "loss": 0.0254, "step": 136805 }, { "epoch": 0.570845607563986, "grad_norm": 0.4198618555312041, "learning_rate": 2.647321449454501e-06, "loss": 0.0146, "step": 136810 }, { "epoch": 0.5708664702789762, "grad_norm": 1.0331988631381253, "learning_rate": 2.6472730745506697e-06, "loss": 0.021, "step": 136815 }, { "epoch": 0.5708873329939665, "grad_norm": 0.6996869904206072, "learning_rate": 2.647224702298627e-06, "loss": 0.019, "step": 136820 }, { "epoch": 0.5709081957089568, "grad_norm": 1.1566006233533142, "learning_rate": 2.647176332698131e-06, "loss": 0.0355, "step": 136825 }, { "epoch": 0.570929058423947, "grad_norm": 0.6059616089053363, "learning_rate": 2.647127965748939e-06, "loss": 0.0213, "step": 136830 }, { "epoch": 0.5709499211389374, "grad_norm": 0.5440554940312133, "learning_rate": 2.6470796014508088e-06, "loss": 0.0247, "step": 136835 }, { "epoch": 0.5709707838539276, "grad_norm": 0.4542801660462633, "learning_rate": 2.647031239803499e-06, "loss": 0.0162, "step": 136840 }, { "epoch": 0.5709916465689179, "grad_norm": 0.8218306160042941, "learning_rate": 2.646982880806767e-06, "loss": 0.0234, "step": 136845 }, { "epoch": 0.5710125092839081, "grad_norm": 1.0328927808083097, "learning_rate": 2.6469345244603708e-06, "loss": 0.0223, "step": 136850 }, { "epoch": 0.5710333719988985, "grad_norm": 0.6666203535401021, "learning_rate": 2.6468861707640674e-06, "loss": 0.0236, "step": 136855 }, { "epoch": 0.5710542347138887, "grad_norm": 0.8822211558802308, "learning_rate": 2.6468378197176164e-06, "loss": 0.0222, "step": 136860 }, { "epoch": 0.571075097428879, "grad_norm": 0.41545935076714546, "learning_rate": 2.6467894713207746e-06, "loss": 0.0155, "step": 136865 }, { "epoch": 0.5710959601438693, "grad_norm": 0.3572995821470385, "learning_rate": 2.6467411255733007e-06, "loss": 0.021, "step": 136870 }, { "epoch": 0.5711168228588596, "grad_norm": 0.7026341163246402, "learning_rate": 2.6466927824749524e-06, "loss": 0.0234, "step": 136875 }, { "epoch": 0.5711376855738498, "grad_norm": 0.6112899065866126, "learning_rate": 2.646644442025487e-06, "loss": 0.0183, "step": 136880 }, { "epoch": 0.5711585482888402, "grad_norm": 0.5972617738848163, "learning_rate": 2.6465961042246645e-06, "loss": 0.0344, "step": 136885 }, { "epoch": 0.5711794110038304, "grad_norm": 0.24785521037093514, "learning_rate": 2.646547769072241e-06, "loss": 0.0172, "step": 136890 }, { "epoch": 0.5712002737188207, "grad_norm": 0.5459133068973789, "learning_rate": 2.646499436567976e-06, "loss": 0.0203, "step": 136895 }, { "epoch": 0.571221136433811, "grad_norm": 0.5989847751067894, "learning_rate": 2.6464511067116274e-06, "loss": 0.0211, "step": 136900 }, { "epoch": 0.5712419991488012, "grad_norm": 0.5661519579529349, "learning_rate": 2.6464027795029527e-06, "loss": 0.0276, "step": 136905 }, { "epoch": 0.5712628618637915, "grad_norm": 0.7446108126778681, "learning_rate": 2.6463544549417113e-06, "loss": 0.0305, "step": 136910 }, { "epoch": 0.5712837245787817, "grad_norm": 0.27980303325492567, "learning_rate": 2.646306133027661e-06, "loss": 0.0176, "step": 136915 }, { "epoch": 0.5713045872937721, "grad_norm": 0.6952891511675254, "learning_rate": 2.64625781376056e-06, "loss": 0.0272, "step": 136920 }, { "epoch": 0.5713254500087623, "grad_norm": 0.8626352062301399, "learning_rate": 2.6462094971401664e-06, "loss": 0.0278, "step": 136925 }, { "epoch": 0.5713463127237526, "grad_norm": 0.6925316657442284, "learning_rate": 2.6461611831662396e-06, "loss": 0.0243, "step": 136930 }, { "epoch": 0.5713671754387429, "grad_norm": 0.775557090730731, "learning_rate": 2.646112871838537e-06, "loss": 0.0252, "step": 136935 }, { "epoch": 0.5713880381537332, "grad_norm": 0.7232609486629313, "learning_rate": 2.646064563156818e-06, "loss": 0.016, "step": 136940 }, { "epoch": 0.5714089008687234, "grad_norm": 0.5133404520885928, "learning_rate": 2.646016257120839e-06, "loss": 0.0266, "step": 136945 }, { "epoch": 0.5714297635837138, "grad_norm": 0.7324375663923872, "learning_rate": 2.645967953730361e-06, "loss": 0.0285, "step": 136950 }, { "epoch": 0.571450626298704, "grad_norm": 0.4300209482606804, "learning_rate": 2.6459196529851417e-06, "loss": 0.0313, "step": 136955 }, { "epoch": 0.5714714890136943, "grad_norm": 0.687090987064304, "learning_rate": 2.645871354884939e-06, "loss": 0.024, "step": 136960 }, { "epoch": 0.5714923517286845, "grad_norm": 1.4587161382438658, "learning_rate": 2.645823059429512e-06, "loss": 0.0262, "step": 136965 }, { "epoch": 0.5715132144436749, "grad_norm": 0.7992357915269346, "learning_rate": 2.6457747666186194e-06, "loss": 0.0278, "step": 136970 }, { "epoch": 0.5715340771586651, "grad_norm": 0.7364847136484454, "learning_rate": 2.6457264764520202e-06, "loss": 0.0278, "step": 136975 }, { "epoch": 0.5715549398736554, "grad_norm": 0.7244502624688854, "learning_rate": 2.645678188929472e-06, "loss": 0.0251, "step": 136980 }, { "epoch": 0.5715758025886457, "grad_norm": 0.6310416435274925, "learning_rate": 2.6456299040507345e-06, "loss": 0.0233, "step": 136985 }, { "epoch": 0.571596665303636, "grad_norm": 0.7940038026081018, "learning_rate": 2.645581621815567e-06, "loss": 0.0203, "step": 136990 }, { "epoch": 0.5716175280186262, "grad_norm": 0.6530927979839446, "learning_rate": 2.6455333422237263e-06, "loss": 0.0225, "step": 136995 }, { "epoch": 0.5716383907336166, "grad_norm": 0.7686486154647214, "learning_rate": 2.6454850652749726e-06, "loss": 0.0202, "step": 137000 }, { "epoch": 0.5716592534486068, "grad_norm": 0.6064714070367905, "learning_rate": 2.645436790969064e-06, "loss": 0.0259, "step": 137005 }, { "epoch": 0.571680116163597, "grad_norm": 0.882372384386287, "learning_rate": 2.645388519305761e-06, "loss": 0.019, "step": 137010 }, { "epoch": 0.5717009788785874, "grad_norm": 0.849508783395881, "learning_rate": 2.6453402502848208e-06, "loss": 0.0194, "step": 137015 }, { "epoch": 0.5717218415935776, "grad_norm": 0.8863905705671886, "learning_rate": 2.645291983906003e-06, "loss": 0.0268, "step": 137020 }, { "epoch": 0.5717427043085679, "grad_norm": 0.620071040619274, "learning_rate": 2.645243720169066e-06, "loss": 0.0256, "step": 137025 }, { "epoch": 0.5717635670235581, "grad_norm": 0.4596552058238633, "learning_rate": 2.6451954590737705e-06, "loss": 0.0229, "step": 137030 }, { "epoch": 0.5717844297385485, "grad_norm": 0.7894850692312508, "learning_rate": 2.6451472006198735e-06, "loss": 0.0212, "step": 137035 }, { "epoch": 0.5718052924535387, "grad_norm": 0.7924201303779638, "learning_rate": 2.645098944807135e-06, "loss": 0.0217, "step": 137040 }, { "epoch": 0.571826155168529, "grad_norm": 0.624342092689203, "learning_rate": 2.645050691635314e-06, "loss": 0.0197, "step": 137045 }, { "epoch": 0.5718470178835193, "grad_norm": 0.5286419133474619, "learning_rate": 2.6450024411041697e-06, "loss": 0.0207, "step": 137050 }, { "epoch": 0.5718678805985096, "grad_norm": 0.5778848277907923, "learning_rate": 2.644954193213461e-06, "loss": 0.023, "step": 137055 }, { "epoch": 0.5718887433134998, "grad_norm": 0.4377267091542892, "learning_rate": 2.6449059479629472e-06, "loss": 0.0234, "step": 137060 }, { "epoch": 0.5719096060284902, "grad_norm": 0.7180257298596618, "learning_rate": 2.6448577053523882e-06, "loss": 0.0232, "step": 137065 }, { "epoch": 0.5719304687434804, "grad_norm": 1.2050182455232585, "learning_rate": 2.644809465381542e-06, "loss": 0.0207, "step": 137070 }, { "epoch": 0.5719513314584707, "grad_norm": 0.6266241824064754, "learning_rate": 2.644761228050169e-06, "loss": 0.0181, "step": 137075 }, { "epoch": 0.571972194173461, "grad_norm": 0.9780587555852248, "learning_rate": 2.644712993358027e-06, "loss": 0.0188, "step": 137080 }, { "epoch": 0.5719930568884513, "grad_norm": 0.8399937455635907, "learning_rate": 2.6446647613048775e-06, "loss": 0.0269, "step": 137085 }, { "epoch": 0.5720139196034415, "grad_norm": 0.4758285580594092, "learning_rate": 2.644616531890478e-06, "loss": 0.0145, "step": 137090 }, { "epoch": 0.5720347823184317, "grad_norm": 0.6733099483098823, "learning_rate": 2.644568305114589e-06, "loss": 0.0232, "step": 137095 }, { "epoch": 0.5720556450334221, "grad_norm": 0.8626253618282177, "learning_rate": 2.6445200809769693e-06, "loss": 0.0302, "step": 137100 }, { "epoch": 0.5720765077484123, "grad_norm": 0.7258655037000097, "learning_rate": 2.644471859477379e-06, "loss": 0.0193, "step": 137105 }, { "epoch": 0.5720973704634026, "grad_norm": 0.5966289133334888, "learning_rate": 2.6444236406155764e-06, "loss": 0.0199, "step": 137110 }, { "epoch": 0.5721182331783929, "grad_norm": 0.5451780640832254, "learning_rate": 2.6443754243913227e-06, "loss": 0.0207, "step": 137115 }, { "epoch": 0.5721390958933832, "grad_norm": 0.5278743138085696, "learning_rate": 2.644327210804376e-06, "loss": 0.0214, "step": 137120 }, { "epoch": 0.5721599586083734, "grad_norm": 0.9550120304657321, "learning_rate": 2.644278999854497e-06, "loss": 0.0278, "step": 137125 }, { "epoch": 0.5721808213233638, "grad_norm": 0.8246160351051585, "learning_rate": 2.6442307915414447e-06, "loss": 0.0223, "step": 137130 }, { "epoch": 0.572201684038354, "grad_norm": 0.5827933457017772, "learning_rate": 2.6441825858649785e-06, "loss": 0.0223, "step": 137135 }, { "epoch": 0.5722225467533443, "grad_norm": 0.5021072843014402, "learning_rate": 2.6441343828248583e-06, "loss": 0.0224, "step": 137140 }, { "epoch": 0.5722434094683345, "grad_norm": 0.6458641109706938, "learning_rate": 2.644086182420844e-06, "loss": 0.027, "step": 137145 }, { "epoch": 0.5722642721833249, "grad_norm": 1.015389152809209, "learning_rate": 2.6440379846526954e-06, "loss": 0.0308, "step": 137150 }, { "epoch": 0.5722851348983151, "grad_norm": 1.5833827668522267, "learning_rate": 2.643989789520172e-06, "loss": 0.0312, "step": 137155 }, { "epoch": 0.5723059976133054, "grad_norm": 0.8805812420859316, "learning_rate": 2.643941597023034e-06, "loss": 0.0229, "step": 137160 }, { "epoch": 0.5723268603282957, "grad_norm": 0.7701279338619708, "learning_rate": 2.643893407161041e-06, "loss": 0.0245, "step": 137165 }, { "epoch": 0.572347723043286, "grad_norm": 0.5005972733962052, "learning_rate": 2.6438452199339527e-06, "loss": 0.0205, "step": 137170 }, { "epoch": 0.5723685857582762, "grad_norm": 0.7837528149289753, "learning_rate": 2.643797035341529e-06, "loss": 0.0269, "step": 137175 }, { "epoch": 0.5723894484732666, "grad_norm": 0.632202960468384, "learning_rate": 2.64374885338353e-06, "loss": 0.0241, "step": 137180 }, { "epoch": 0.5724103111882568, "grad_norm": 1.3396491842772542, "learning_rate": 2.6437006740597153e-06, "loss": 0.0295, "step": 137185 }, { "epoch": 0.572431173903247, "grad_norm": 1.67051890363831, "learning_rate": 2.6436524973698457e-06, "loss": 0.0284, "step": 137190 }, { "epoch": 0.5724520366182374, "grad_norm": 0.8755274876561656, "learning_rate": 2.6436043233136805e-06, "loss": 0.0272, "step": 137195 }, { "epoch": 0.5724728993332276, "grad_norm": 0.46005428049134794, "learning_rate": 2.64355615189098e-06, "loss": 0.0192, "step": 137200 }, { "epoch": 0.5724937620482179, "grad_norm": 0.47260470980401786, "learning_rate": 2.643507983101504e-06, "loss": 0.0239, "step": 137205 }, { "epoch": 0.5725146247632081, "grad_norm": 0.3989656903101024, "learning_rate": 2.643459816945012e-06, "loss": 0.0218, "step": 137210 }, { "epoch": 0.5725354874781985, "grad_norm": 0.7967510617560265, "learning_rate": 2.643411653421266e-06, "loss": 0.0239, "step": 137215 }, { "epoch": 0.5725563501931887, "grad_norm": 0.8732599485481595, "learning_rate": 2.643363492530025e-06, "loss": 0.0363, "step": 137220 }, { "epoch": 0.572577212908179, "grad_norm": 0.5755479979120574, "learning_rate": 2.6433153342710494e-06, "loss": 0.0241, "step": 137225 }, { "epoch": 0.5725980756231693, "grad_norm": 0.5075144831508708, "learning_rate": 2.643267178644099e-06, "loss": 0.018, "step": 137230 }, { "epoch": 0.5726189383381596, "grad_norm": 0.84109312126648, "learning_rate": 2.6432190256489342e-06, "loss": 0.0244, "step": 137235 }, { "epoch": 0.5726398010531498, "grad_norm": 0.5993305159769174, "learning_rate": 2.6431708752853157e-06, "loss": 0.0242, "step": 137240 }, { "epoch": 0.5726606637681402, "grad_norm": 0.43740372307024106, "learning_rate": 2.6431227275530042e-06, "loss": 0.0255, "step": 137245 }, { "epoch": 0.5726815264831304, "grad_norm": 0.6273020449090501, "learning_rate": 2.643074582451759e-06, "loss": 0.0251, "step": 137250 }, { "epoch": 0.5727023891981207, "grad_norm": 0.47790393214325416, "learning_rate": 2.6430264399813404e-06, "loss": 0.0208, "step": 137255 }, { "epoch": 0.572723251913111, "grad_norm": 0.7160259314043298, "learning_rate": 2.6429783001415104e-06, "loss": 0.0193, "step": 137260 }, { "epoch": 0.5727441146281013, "grad_norm": 0.5523297117794104, "learning_rate": 2.6429301629320277e-06, "loss": 0.027, "step": 137265 }, { "epoch": 0.5727649773430915, "grad_norm": 0.4612313569395115, "learning_rate": 2.6428820283526534e-06, "loss": 0.0224, "step": 137270 }, { "epoch": 0.5727858400580818, "grad_norm": 0.8523867803056093, "learning_rate": 2.6428338964031488e-06, "loss": 0.0224, "step": 137275 }, { "epoch": 0.5728067027730721, "grad_norm": 0.3321332382321748, "learning_rate": 2.642785767083273e-06, "loss": 0.0269, "step": 137280 }, { "epoch": 0.5728275654880624, "grad_norm": 0.9253264469369321, "learning_rate": 2.6427376403927873e-06, "loss": 0.0181, "step": 137285 }, { "epoch": 0.5728484282030526, "grad_norm": 0.983341463337409, "learning_rate": 2.642689516331452e-06, "loss": 0.0226, "step": 137290 }, { "epoch": 0.572869290918043, "grad_norm": 0.4578373749312353, "learning_rate": 2.642641394899029e-06, "loss": 0.0191, "step": 137295 }, { "epoch": 0.5728901536330332, "grad_norm": 0.3684882053446968, "learning_rate": 2.6425932760952773e-06, "loss": 0.0176, "step": 137300 }, { "epoch": 0.5729110163480234, "grad_norm": 0.6187680910911048, "learning_rate": 2.642545159919958e-06, "loss": 0.0235, "step": 137305 }, { "epoch": 0.5729318790630138, "grad_norm": 0.9932832438316549, "learning_rate": 2.6424970463728323e-06, "loss": 0.0181, "step": 137310 }, { "epoch": 0.572952741778004, "grad_norm": 1.2023807748310098, "learning_rate": 2.6424489354536607e-06, "loss": 0.0276, "step": 137315 }, { "epoch": 0.5729736044929943, "grad_norm": 0.9382138069537056, "learning_rate": 2.6424008271622043e-06, "loss": 0.0237, "step": 137320 }, { "epoch": 0.5729944672079845, "grad_norm": 0.625446302814966, "learning_rate": 2.642352721498223e-06, "loss": 0.0224, "step": 137325 }, { "epoch": 0.5730153299229749, "grad_norm": 0.615862217320448, "learning_rate": 2.6423046184614785e-06, "loss": 0.0207, "step": 137330 }, { "epoch": 0.5730361926379651, "grad_norm": 1.086715238259518, "learning_rate": 2.6422565180517314e-06, "loss": 0.027, "step": 137335 }, { "epoch": 0.5730570553529554, "grad_norm": 0.6393250369395214, "learning_rate": 2.6422084202687427e-06, "loss": 0.0291, "step": 137340 }, { "epoch": 0.5730779180679457, "grad_norm": 0.707129251440931, "learning_rate": 2.642160325112273e-06, "loss": 0.03, "step": 137345 }, { "epoch": 0.573098780782936, "grad_norm": 0.9200478338295797, "learning_rate": 2.6421122325820836e-06, "loss": 0.0269, "step": 137350 }, { "epoch": 0.5731196434979262, "grad_norm": 1.0321303952825118, "learning_rate": 2.6420641426779353e-06, "loss": 0.0289, "step": 137355 }, { "epoch": 0.5731405062129166, "grad_norm": 0.6930710584878231, "learning_rate": 2.642016055399589e-06, "loss": 0.0275, "step": 137360 }, { "epoch": 0.5731613689279068, "grad_norm": 0.8389139453877866, "learning_rate": 2.6419679707468064e-06, "loss": 0.0296, "step": 137365 }, { "epoch": 0.573182231642897, "grad_norm": 1.2183662701444682, "learning_rate": 2.6419198887193476e-06, "loss": 0.0439, "step": 137370 }, { "epoch": 0.5732030943578874, "grad_norm": 0.38690416461666144, "learning_rate": 2.6418718093169747e-06, "loss": 0.0168, "step": 137375 }, { "epoch": 0.5732239570728777, "grad_norm": 0.5903309257054946, "learning_rate": 2.641823732539448e-06, "loss": 0.0262, "step": 137380 }, { "epoch": 0.5732448197878679, "grad_norm": 0.7984588914914906, "learning_rate": 2.6417756583865294e-06, "loss": 0.0253, "step": 137385 }, { "epoch": 0.5732656825028581, "grad_norm": 0.47300516434715584, "learning_rate": 2.64172758685798e-06, "loss": 0.019, "step": 137390 }, { "epoch": 0.5732865452178485, "grad_norm": 0.5653963266925458, "learning_rate": 2.64167951795356e-06, "loss": 0.0254, "step": 137395 }, { "epoch": 0.5733074079328387, "grad_norm": 0.5177800468885755, "learning_rate": 2.6416314516730317e-06, "loss": 0.0296, "step": 137400 }, { "epoch": 0.573328270647829, "grad_norm": 0.5427424699486985, "learning_rate": 2.641583388016157e-06, "loss": 0.0222, "step": 137405 }, { "epoch": 0.5733491333628193, "grad_norm": 1.0022957278937388, "learning_rate": 2.6415353269826955e-06, "loss": 0.0234, "step": 137410 }, { "epoch": 0.5733699960778096, "grad_norm": 0.3738455682242493, "learning_rate": 2.6414872685724096e-06, "loss": 0.0216, "step": 137415 }, { "epoch": 0.5733908587927998, "grad_norm": 0.8320638373833464, "learning_rate": 2.641439212785061e-06, "loss": 0.0203, "step": 137420 }, { "epoch": 0.5734117215077902, "grad_norm": 0.5527321331389433, "learning_rate": 2.64139115962041e-06, "loss": 0.0229, "step": 137425 }, { "epoch": 0.5734325842227804, "grad_norm": 0.7720210237408902, "learning_rate": 2.641343109078219e-06, "loss": 0.0324, "step": 137430 }, { "epoch": 0.5734534469377707, "grad_norm": 0.6650313274980505, "learning_rate": 2.641295061158249e-06, "loss": 0.0187, "step": 137435 }, { "epoch": 0.5734743096527609, "grad_norm": 0.48399273278597, "learning_rate": 2.6412470158602617e-06, "loss": 0.0264, "step": 137440 }, { "epoch": 0.5734951723677513, "grad_norm": 0.9515909706483267, "learning_rate": 2.6411989731840185e-06, "loss": 0.0273, "step": 137445 }, { "epoch": 0.5735160350827415, "grad_norm": 0.5550258636641996, "learning_rate": 2.6411509331292814e-06, "loss": 0.0214, "step": 137450 }, { "epoch": 0.5735368977977318, "grad_norm": 0.9103343300737498, "learning_rate": 2.6411028956958118e-06, "loss": 0.0151, "step": 137455 }, { "epoch": 0.5735577605127221, "grad_norm": 0.9268336169516806, "learning_rate": 2.641054860883371e-06, "loss": 0.0277, "step": 137460 }, { "epoch": 0.5735786232277124, "grad_norm": 0.9209063465200962, "learning_rate": 2.6410068286917206e-06, "loss": 0.0208, "step": 137465 }, { "epoch": 0.5735994859427026, "grad_norm": 1.4580338486699818, "learning_rate": 2.640958799120623e-06, "loss": 0.0275, "step": 137470 }, { "epoch": 0.573620348657693, "grad_norm": 0.850357924547802, "learning_rate": 2.640910772169839e-06, "loss": 0.0184, "step": 137475 }, { "epoch": 0.5736412113726832, "grad_norm": 0.8658173817230441, "learning_rate": 2.6408627478391315e-06, "loss": 0.0272, "step": 137480 }, { "epoch": 0.5736620740876734, "grad_norm": 0.6620930366042888, "learning_rate": 2.640814726128261e-06, "loss": 0.0246, "step": 137485 }, { "epoch": 0.5736829368026638, "grad_norm": 0.5493719432647837, "learning_rate": 2.64076670703699e-06, "loss": 0.021, "step": 137490 }, { "epoch": 0.573703799517654, "grad_norm": 1.1649832017086157, "learning_rate": 2.6407186905650805e-06, "loss": 0.023, "step": 137495 }, { "epoch": 0.5737246622326443, "grad_norm": 0.6411592824470652, "learning_rate": 2.640670676712295e-06, "loss": 0.0241, "step": 137500 }, { "epoch": 0.5737455249476345, "grad_norm": 0.7132362602526087, "learning_rate": 2.6406226654783933e-06, "loss": 0.0168, "step": 137505 }, { "epoch": 0.5737663876626249, "grad_norm": 0.5330332072914942, "learning_rate": 2.640574656863139e-06, "loss": 0.0228, "step": 137510 }, { "epoch": 0.5737872503776151, "grad_norm": 0.4695913034423428, "learning_rate": 2.6405266508662935e-06, "loss": 0.0211, "step": 137515 }, { "epoch": 0.5738081130926054, "grad_norm": 0.855726008824548, "learning_rate": 2.6404786474876188e-06, "loss": 0.0231, "step": 137520 }, { "epoch": 0.5738289758075957, "grad_norm": 0.3707053275413935, "learning_rate": 2.6404306467268777e-06, "loss": 0.0188, "step": 137525 }, { "epoch": 0.573849838522586, "grad_norm": 0.6189604046638344, "learning_rate": 2.640382648583831e-06, "loss": 0.0241, "step": 137530 }, { "epoch": 0.5738707012375762, "grad_norm": 0.739236717788507, "learning_rate": 2.6403346530582413e-06, "loss": 0.0174, "step": 137535 }, { "epoch": 0.5738915639525666, "grad_norm": 0.90987431813678, "learning_rate": 2.6402866601498713e-06, "loss": 0.0232, "step": 137540 }, { "epoch": 0.5739124266675568, "grad_norm": 0.4116256695360922, "learning_rate": 2.6402386698584824e-06, "loss": 0.0275, "step": 137545 }, { "epoch": 0.5739332893825471, "grad_norm": 1.1640069928463557, "learning_rate": 2.640190682183837e-06, "loss": 0.0236, "step": 137550 }, { "epoch": 0.5739541520975374, "grad_norm": 0.7959682740246476, "learning_rate": 2.6401426971256977e-06, "loss": 0.0249, "step": 137555 }, { "epoch": 0.5739750148125277, "grad_norm": 0.4568734883217593, "learning_rate": 2.6400947146838262e-06, "loss": 0.0175, "step": 137560 }, { "epoch": 0.5739958775275179, "grad_norm": 0.6209202996749859, "learning_rate": 2.6400467348579843e-06, "loss": 0.0221, "step": 137565 }, { "epoch": 0.5740167402425081, "grad_norm": 0.5946119466724319, "learning_rate": 2.6399987576479363e-06, "loss": 0.0201, "step": 137570 }, { "epoch": 0.5740376029574985, "grad_norm": 1.1735624230920878, "learning_rate": 2.6399507830534423e-06, "loss": 0.0246, "step": 137575 }, { "epoch": 0.5740584656724887, "grad_norm": 1.72293630057322, "learning_rate": 2.6399028110742654e-06, "loss": 0.0267, "step": 137580 }, { "epoch": 0.574079328387479, "grad_norm": 1.0662571696681609, "learning_rate": 2.6398548417101684e-06, "loss": 0.0273, "step": 137585 }, { "epoch": 0.5741001911024693, "grad_norm": 0.5330444431919106, "learning_rate": 2.6398068749609134e-06, "loss": 0.0259, "step": 137590 }, { "epoch": 0.5741210538174596, "grad_norm": 1.5440738438972517, "learning_rate": 2.6397589108262628e-06, "loss": 0.0246, "step": 137595 }, { "epoch": 0.5741419165324498, "grad_norm": 1.1428269343584017, "learning_rate": 2.6397109493059793e-06, "loss": 0.0263, "step": 137600 }, { "epoch": 0.5741627792474402, "grad_norm": 0.9922942625318859, "learning_rate": 2.6396629903998242e-06, "loss": 0.0226, "step": 137605 }, { "epoch": 0.5741836419624304, "grad_norm": 0.3073751124624279, "learning_rate": 2.639615034107562e-06, "loss": 0.0187, "step": 137610 }, { "epoch": 0.5742045046774207, "grad_norm": 2.1027205759856384, "learning_rate": 2.6395670804289545e-06, "loss": 0.0245, "step": 137615 }, { "epoch": 0.5742253673924109, "grad_norm": 0.7664894159149082, "learning_rate": 2.6395191293637644e-06, "loss": 0.0226, "step": 137620 }, { "epoch": 0.5742462301074013, "grad_norm": 0.6163190364307869, "learning_rate": 2.6394711809117534e-06, "loss": 0.0237, "step": 137625 }, { "epoch": 0.5742670928223915, "grad_norm": 1.1289316248407109, "learning_rate": 2.639423235072686e-06, "loss": 0.0295, "step": 137630 }, { "epoch": 0.5742879555373818, "grad_norm": 0.5286144711490914, "learning_rate": 2.6393752918463222e-06, "loss": 0.0246, "step": 137635 }, { "epoch": 0.5743088182523721, "grad_norm": 0.4244412659934971, "learning_rate": 2.6393273512324276e-06, "loss": 0.0167, "step": 137640 }, { "epoch": 0.5743296809673624, "grad_norm": 0.8650107829748103, "learning_rate": 2.6392794132307627e-06, "loss": 0.0168, "step": 137645 }, { "epoch": 0.5743505436823526, "grad_norm": 1.2933818596662476, "learning_rate": 2.6392314778410917e-06, "loss": 0.0251, "step": 137650 }, { "epoch": 0.574371406397343, "grad_norm": 0.6297198787454423, "learning_rate": 2.639183545063176e-06, "loss": 0.0189, "step": 137655 }, { "epoch": 0.5743922691123332, "grad_norm": 0.9627159567264102, "learning_rate": 2.63913561489678e-06, "loss": 0.0252, "step": 137660 }, { "epoch": 0.5744131318273235, "grad_norm": 0.9179005636577394, "learning_rate": 2.639087687341666e-06, "loss": 0.0393, "step": 137665 }, { "epoch": 0.5744339945423138, "grad_norm": 1.0083645912812715, "learning_rate": 2.6390397623975965e-06, "loss": 0.0294, "step": 137670 }, { "epoch": 0.574454857257304, "grad_norm": 0.6418461427320573, "learning_rate": 2.638991840064335e-06, "loss": 0.0173, "step": 137675 }, { "epoch": 0.5744757199722943, "grad_norm": 0.4477273105409842, "learning_rate": 2.6389439203416443e-06, "loss": 0.0245, "step": 137680 }, { "epoch": 0.5744965826872845, "grad_norm": 1.0832694465452484, "learning_rate": 2.638896003229287e-06, "loss": 0.0338, "step": 137685 }, { "epoch": 0.5745174454022749, "grad_norm": 0.498132541673939, "learning_rate": 2.638848088727027e-06, "loss": 0.0213, "step": 137690 }, { "epoch": 0.5745383081172651, "grad_norm": 0.5439355808607375, "learning_rate": 2.6388001768346263e-06, "loss": 0.019, "step": 137695 }, { "epoch": 0.5745591708322554, "grad_norm": 0.9441926296873743, "learning_rate": 2.6387522675518477e-06, "loss": 0.0274, "step": 137700 }, { "epoch": 0.5745800335472457, "grad_norm": 0.8475759460473795, "learning_rate": 2.6387043608784557e-06, "loss": 0.021, "step": 137705 }, { "epoch": 0.574600896262236, "grad_norm": 0.4884294875701189, "learning_rate": 2.6386564568142132e-06, "loss": 0.018, "step": 137710 }, { "epoch": 0.5746217589772262, "grad_norm": 0.8392352509065288, "learning_rate": 2.6386085553588826e-06, "loss": 0.0193, "step": 137715 }, { "epoch": 0.5746426216922166, "grad_norm": 0.7451413860110496, "learning_rate": 2.6385606565122275e-06, "loss": 0.0256, "step": 137720 }, { "epoch": 0.5746634844072068, "grad_norm": 0.8640245379327852, "learning_rate": 2.638512760274011e-06, "loss": 0.0202, "step": 137725 }, { "epoch": 0.5746843471221971, "grad_norm": 1.0430871393022934, "learning_rate": 2.6384648666439967e-06, "loss": 0.0234, "step": 137730 }, { "epoch": 0.5747052098371874, "grad_norm": 0.667926680898449, "learning_rate": 2.6384169756219474e-06, "loss": 0.0246, "step": 137735 }, { "epoch": 0.5747260725521777, "grad_norm": 0.5013447691684558, "learning_rate": 2.6383690872076266e-06, "loss": 0.0206, "step": 137740 }, { "epoch": 0.5747469352671679, "grad_norm": 0.3430078139347331, "learning_rate": 2.638321201400798e-06, "loss": 0.0213, "step": 137745 }, { "epoch": 0.5747677979821582, "grad_norm": 0.4599902049461251, "learning_rate": 2.6382733182012243e-06, "loss": 0.0162, "step": 137750 }, { "epoch": 0.5747886606971485, "grad_norm": 0.6624019771294973, "learning_rate": 2.6382254376086697e-06, "loss": 0.0245, "step": 137755 }, { "epoch": 0.5748095234121388, "grad_norm": 0.39328677783707333, "learning_rate": 2.638177559622897e-06, "loss": 0.0177, "step": 137760 }, { "epoch": 0.574830386127129, "grad_norm": 0.5779588365409066, "learning_rate": 2.63812968424367e-06, "loss": 0.0211, "step": 137765 }, { "epoch": 0.5748512488421194, "grad_norm": 0.6617790419666711, "learning_rate": 2.6380818114707517e-06, "loss": 0.0168, "step": 137770 }, { "epoch": 0.5748721115571096, "grad_norm": 0.7339118973725891, "learning_rate": 2.6380339413039067e-06, "loss": 0.0255, "step": 137775 }, { "epoch": 0.5748929742720998, "grad_norm": 0.5153349305270329, "learning_rate": 2.637986073742898e-06, "loss": 0.0237, "step": 137780 }, { "epoch": 0.5749138369870902, "grad_norm": 1.0896413803630485, "learning_rate": 2.637938208787489e-06, "loss": 0.0279, "step": 137785 }, { "epoch": 0.5749346997020804, "grad_norm": 0.6448890542344636, "learning_rate": 2.6378903464374435e-06, "loss": 0.0172, "step": 137790 }, { "epoch": 0.5749555624170707, "grad_norm": 0.9237588658691015, "learning_rate": 2.6378424866925245e-06, "loss": 0.0303, "step": 137795 }, { "epoch": 0.5749764251320609, "grad_norm": 2.4589270960608594, "learning_rate": 2.6377946295524966e-06, "loss": 0.0363, "step": 137800 }, { "epoch": 0.5749972878470513, "grad_norm": 0.8042368196478189, "learning_rate": 2.6377467750171233e-06, "loss": 0.0246, "step": 137805 }, { "epoch": 0.5750181505620415, "grad_norm": 0.8914655769637659, "learning_rate": 2.6376989230861684e-06, "loss": 0.0225, "step": 137810 }, { "epoch": 0.5750390132770318, "grad_norm": 1.0086535857244365, "learning_rate": 2.6376510737593947e-06, "loss": 0.0256, "step": 137815 }, { "epoch": 0.5750598759920221, "grad_norm": 0.4236956729230135, "learning_rate": 2.6376032270365676e-06, "loss": 0.0281, "step": 137820 }, { "epoch": 0.5750807387070124, "grad_norm": 0.5531065891689564, "learning_rate": 2.63755538291745e-06, "loss": 0.023, "step": 137825 }, { "epoch": 0.5751016014220026, "grad_norm": 1.008221016738715, "learning_rate": 2.637507541401805e-06, "loss": 0.0247, "step": 137830 }, { "epoch": 0.575122464136993, "grad_norm": 0.9701381892188268, "learning_rate": 2.637459702489399e-06, "loss": 0.0249, "step": 137835 }, { "epoch": 0.5751433268519832, "grad_norm": 0.7167318054647945, "learning_rate": 2.637411866179993e-06, "loss": 0.0212, "step": 137840 }, { "epoch": 0.5751641895669735, "grad_norm": 1.3365204368674062, "learning_rate": 2.637364032473353e-06, "loss": 0.0228, "step": 137845 }, { "epoch": 0.5751850522819638, "grad_norm": 0.8248575432641245, "learning_rate": 2.6373162013692424e-06, "loss": 0.0284, "step": 137850 }, { "epoch": 0.5752059149969541, "grad_norm": 0.4610859038647131, "learning_rate": 2.637268372867424e-06, "loss": 0.0217, "step": 137855 }, { "epoch": 0.5752267777119443, "grad_norm": 0.6245693744383373, "learning_rate": 2.6372205469676637e-06, "loss": 0.0152, "step": 137860 }, { "epoch": 0.5752476404269345, "grad_norm": 0.7617773471078108, "learning_rate": 2.637172723669724e-06, "loss": 0.0205, "step": 137865 }, { "epoch": 0.5752685031419249, "grad_norm": 0.702993770639128, "learning_rate": 2.637124902973371e-06, "loss": 0.0245, "step": 137870 }, { "epoch": 0.5752893658569151, "grad_norm": 0.48299324025666124, "learning_rate": 2.637077084878367e-06, "loss": 0.021, "step": 137875 }, { "epoch": 0.5753102285719054, "grad_norm": 0.645103410935266, "learning_rate": 2.6370292693844764e-06, "loss": 0.0204, "step": 137880 }, { "epoch": 0.5753310912868957, "grad_norm": 0.4808545218700346, "learning_rate": 2.6369814564914637e-06, "loss": 0.0205, "step": 137885 }, { "epoch": 0.575351954001886, "grad_norm": 0.5082103091055205, "learning_rate": 2.6369336461990934e-06, "loss": 0.0217, "step": 137890 }, { "epoch": 0.5753728167168762, "grad_norm": 0.40004634142861334, "learning_rate": 2.63688583850713e-06, "loss": 0.0231, "step": 137895 }, { "epoch": 0.5753936794318666, "grad_norm": 0.5711114733939905, "learning_rate": 2.6368380334153364e-06, "loss": 0.0222, "step": 137900 }, { "epoch": 0.5754145421468568, "grad_norm": 0.7165627820639252, "learning_rate": 2.6367902309234788e-06, "loss": 0.0247, "step": 137905 }, { "epoch": 0.5754354048618471, "grad_norm": 0.5097344767033559, "learning_rate": 2.63674243103132e-06, "loss": 0.0248, "step": 137910 }, { "epoch": 0.5754562675768374, "grad_norm": 0.42102718188592275, "learning_rate": 2.636694633738625e-06, "loss": 0.025, "step": 137915 }, { "epoch": 0.5754771302918277, "grad_norm": 0.8309626343819809, "learning_rate": 2.6366468390451584e-06, "loss": 0.0182, "step": 137920 }, { "epoch": 0.5754979930068179, "grad_norm": 0.7849825011843384, "learning_rate": 2.6365990469506835e-06, "loss": 0.0208, "step": 137925 }, { "epoch": 0.5755188557218082, "grad_norm": 1.0774597030924162, "learning_rate": 2.636551257454967e-06, "loss": 0.025, "step": 137930 }, { "epoch": 0.5755397184367985, "grad_norm": 0.9713745852647844, "learning_rate": 2.636503470557771e-06, "loss": 0.0219, "step": 137935 }, { "epoch": 0.5755605811517888, "grad_norm": 0.5036738902143502, "learning_rate": 2.6364556862588614e-06, "loss": 0.0215, "step": 137940 }, { "epoch": 0.575581443866779, "grad_norm": 1.0471306617426852, "learning_rate": 2.636407904558002e-06, "loss": 0.025, "step": 137945 }, { "epoch": 0.5756023065817694, "grad_norm": 0.5197837028110667, "learning_rate": 2.6363601254549583e-06, "loss": 0.0238, "step": 137950 }, { "epoch": 0.5756231692967596, "grad_norm": 0.45986152027124483, "learning_rate": 2.6363123489494943e-06, "loss": 0.0209, "step": 137955 }, { "epoch": 0.5756440320117499, "grad_norm": 1.0877602317174069, "learning_rate": 2.636264575041374e-06, "loss": 0.0196, "step": 137960 }, { "epoch": 0.5756648947267402, "grad_norm": 0.4787581702928097, "learning_rate": 2.636216803730363e-06, "loss": 0.0271, "step": 137965 }, { "epoch": 0.5756857574417305, "grad_norm": 1.031814613048329, "learning_rate": 2.6361690350162265e-06, "loss": 0.0319, "step": 137970 }, { "epoch": 0.5757066201567207, "grad_norm": 0.4219452427255423, "learning_rate": 2.6361212688987275e-06, "loss": 0.0179, "step": 137975 }, { "epoch": 0.5757274828717109, "grad_norm": 0.5050605438708271, "learning_rate": 2.6360735053776327e-06, "loss": 0.0186, "step": 137980 }, { "epoch": 0.5757483455867013, "grad_norm": 0.8277999427287461, "learning_rate": 2.6360257444527053e-06, "loss": 0.0255, "step": 137985 }, { "epoch": 0.5757692083016915, "grad_norm": 0.5825400303501943, "learning_rate": 2.635977986123711e-06, "loss": 0.0258, "step": 137990 }, { "epoch": 0.5757900710166818, "grad_norm": 2.922342539046189, "learning_rate": 2.635930230390415e-06, "loss": 0.0321, "step": 137995 }, { "epoch": 0.5758109337316721, "grad_norm": 0.8964103949014524, "learning_rate": 2.635882477252581e-06, "loss": 0.0319, "step": 138000 }, { "epoch": 0.5758317964466624, "grad_norm": 0.9090220352723052, "learning_rate": 2.6358347267099748e-06, "loss": 0.0179, "step": 138005 }, { "epoch": 0.5758526591616526, "grad_norm": 0.7082475314572851, "learning_rate": 2.635786978762361e-06, "loss": 0.0659, "step": 138010 }, { "epoch": 0.575873521876643, "grad_norm": 0.779227430386823, "learning_rate": 2.635739233409504e-06, "loss": 0.0215, "step": 138015 }, { "epoch": 0.5758943845916332, "grad_norm": 1.0116457424729417, "learning_rate": 2.63569149065117e-06, "loss": 0.0282, "step": 138020 }, { "epoch": 0.5759152473066235, "grad_norm": 0.35992427499149304, "learning_rate": 2.6356437504871235e-06, "loss": 0.0264, "step": 138025 }, { "epoch": 0.5759361100216138, "grad_norm": 1.040308231639451, "learning_rate": 2.6355960129171293e-06, "loss": 0.0238, "step": 138030 }, { "epoch": 0.5759569727366041, "grad_norm": 0.4118769549931215, "learning_rate": 2.635548277940953e-06, "loss": 0.0177, "step": 138035 }, { "epoch": 0.5759778354515943, "grad_norm": 1.221550322071465, "learning_rate": 2.635500545558359e-06, "loss": 0.0288, "step": 138040 }, { "epoch": 0.5759986981665846, "grad_norm": 0.8361309662198566, "learning_rate": 2.6354528157691127e-06, "loss": 0.0243, "step": 138045 }, { "epoch": 0.5760195608815749, "grad_norm": 0.9717712751878221, "learning_rate": 2.6354050885729797e-06, "loss": 0.036, "step": 138050 }, { "epoch": 0.5760404235965652, "grad_norm": 0.7634318864464282, "learning_rate": 2.635357363969725e-06, "loss": 0.0183, "step": 138055 }, { "epoch": 0.5760612863115554, "grad_norm": 0.6398276343969593, "learning_rate": 2.635309641959114e-06, "loss": 0.0218, "step": 138060 }, { "epoch": 0.5760821490265458, "grad_norm": 0.47334332653239036, "learning_rate": 2.6352619225409116e-06, "loss": 0.0186, "step": 138065 }, { "epoch": 0.576103011741536, "grad_norm": 1.0140584153575953, "learning_rate": 2.6352142057148835e-06, "loss": 0.0199, "step": 138070 }, { "epoch": 0.5761238744565262, "grad_norm": 0.2744694608398922, "learning_rate": 2.635166491480794e-06, "loss": 0.022, "step": 138075 }, { "epoch": 0.5761447371715166, "grad_norm": 0.6984256841566473, "learning_rate": 2.6351187798384097e-06, "loss": 0.0245, "step": 138080 }, { "epoch": 0.5761655998865068, "grad_norm": 0.7276817615524928, "learning_rate": 2.6350710707874953e-06, "loss": 0.0241, "step": 138085 }, { "epoch": 0.5761864626014971, "grad_norm": 0.9934884292859343, "learning_rate": 2.635023364327817e-06, "loss": 0.0304, "step": 138090 }, { "epoch": 0.5762073253164874, "grad_norm": 0.3178369659494151, "learning_rate": 2.634975660459139e-06, "loss": 0.0184, "step": 138095 }, { "epoch": 0.5762281880314777, "grad_norm": 0.514348568773003, "learning_rate": 2.634927959181227e-06, "loss": 0.0212, "step": 138100 }, { "epoch": 0.5762490507464679, "grad_norm": 0.5612246218898721, "learning_rate": 2.6348802604938477e-06, "loss": 0.0159, "step": 138105 }, { "epoch": 0.5762699134614582, "grad_norm": 0.9503048026600485, "learning_rate": 2.634832564396766e-06, "loss": 0.0216, "step": 138110 }, { "epoch": 0.5762907761764485, "grad_norm": 0.8449493177623856, "learning_rate": 2.634784870889747e-06, "loss": 0.0203, "step": 138115 }, { "epoch": 0.5763116388914388, "grad_norm": 0.840710302532579, "learning_rate": 2.6347371799725567e-06, "loss": 0.0134, "step": 138120 }, { "epoch": 0.576332501606429, "grad_norm": 1.373634722774537, "learning_rate": 2.6346894916449606e-06, "loss": 0.0205, "step": 138125 }, { "epoch": 0.5763533643214194, "grad_norm": 0.7933920101744605, "learning_rate": 2.6346418059067243e-06, "loss": 0.022, "step": 138130 }, { "epoch": 0.5763742270364096, "grad_norm": 0.8446227768823894, "learning_rate": 2.634594122757614e-06, "loss": 0.0203, "step": 138135 }, { "epoch": 0.5763950897513999, "grad_norm": 1.1051601282748407, "learning_rate": 2.6345464421973943e-06, "loss": 0.0334, "step": 138140 }, { "epoch": 0.5764159524663902, "grad_norm": 0.7377376159595181, "learning_rate": 2.6344987642258322e-06, "loss": 0.0242, "step": 138145 }, { "epoch": 0.5764368151813805, "grad_norm": 0.6974461564965628, "learning_rate": 2.6344510888426927e-06, "loss": 0.0233, "step": 138150 }, { "epoch": 0.5764576778963707, "grad_norm": 0.623097011732755, "learning_rate": 2.6344034160477416e-06, "loss": 0.0248, "step": 138155 }, { "epoch": 0.576478540611361, "grad_norm": 0.7868529931520928, "learning_rate": 2.6343557458407454e-06, "loss": 0.0203, "step": 138160 }, { "epoch": 0.5764994033263513, "grad_norm": 0.4019181309247986, "learning_rate": 2.6343080782214693e-06, "loss": 0.0269, "step": 138165 }, { "epoch": 0.5765202660413415, "grad_norm": 0.6123329317611251, "learning_rate": 2.6342604131896793e-06, "loss": 0.0164, "step": 138170 }, { "epoch": 0.5765411287563318, "grad_norm": 0.7866384441695948, "learning_rate": 2.6342127507451417e-06, "loss": 0.0251, "step": 138175 }, { "epoch": 0.5765619914713221, "grad_norm": 0.7245260286878193, "learning_rate": 2.6341650908876214e-06, "loss": 0.0221, "step": 138180 }, { "epoch": 0.5765828541863124, "grad_norm": 0.812333267655357, "learning_rate": 2.6341174336168864e-06, "loss": 0.0246, "step": 138185 }, { "epoch": 0.5766037169013026, "grad_norm": 0.808495661855682, "learning_rate": 2.6340697789327003e-06, "loss": 0.0213, "step": 138190 }, { "epoch": 0.576624579616293, "grad_norm": 0.3004961429108886, "learning_rate": 2.634022126834831e-06, "loss": 0.0203, "step": 138195 }, { "epoch": 0.5766454423312832, "grad_norm": 0.42506086285612316, "learning_rate": 2.6339744773230437e-06, "loss": 0.0237, "step": 138200 }, { "epoch": 0.5766663050462735, "grad_norm": 0.30609515989662417, "learning_rate": 2.6339268303971043e-06, "loss": 0.0188, "step": 138205 }, { "epoch": 0.5766871677612638, "grad_norm": 0.5125155750299175, "learning_rate": 2.633879186056779e-06, "loss": 0.0291, "step": 138210 }, { "epoch": 0.5767080304762541, "grad_norm": 1.5811261295550232, "learning_rate": 2.6338315443018353e-06, "loss": 0.0292, "step": 138215 }, { "epoch": 0.5767288931912443, "grad_norm": 0.756253212252117, "learning_rate": 2.6337839051320372e-06, "loss": 0.0246, "step": 138220 }, { "epoch": 0.5767497559062346, "grad_norm": 1.400768382082598, "learning_rate": 2.6337362685471525e-06, "loss": 0.0251, "step": 138225 }, { "epoch": 0.5767706186212249, "grad_norm": 1.0082983084504609, "learning_rate": 2.633688634546947e-06, "loss": 0.0223, "step": 138230 }, { "epoch": 0.5767914813362152, "grad_norm": 0.9021115451219026, "learning_rate": 2.633641003131187e-06, "loss": 0.0333, "step": 138235 }, { "epoch": 0.5768123440512054, "grad_norm": 1.537566377239725, "learning_rate": 2.6335933742996387e-06, "loss": 0.0212, "step": 138240 }, { "epoch": 0.5768332067661958, "grad_norm": 0.5962218533989324, "learning_rate": 2.6335457480520686e-06, "loss": 0.0199, "step": 138245 }, { "epoch": 0.576854069481186, "grad_norm": 0.45704439442349076, "learning_rate": 2.633498124388243e-06, "loss": 0.0287, "step": 138250 }, { "epoch": 0.5768749321961762, "grad_norm": 0.7035005711916565, "learning_rate": 2.633450503307928e-06, "loss": 0.0212, "step": 138255 }, { "epoch": 0.5768957949111666, "grad_norm": 0.5636560223153354, "learning_rate": 2.63340288481089e-06, "loss": 0.0191, "step": 138260 }, { "epoch": 0.5769166576261568, "grad_norm": 0.6940125540252863, "learning_rate": 2.6333552688968965e-06, "loss": 0.0207, "step": 138265 }, { "epoch": 0.5769375203411471, "grad_norm": 0.6334527149177346, "learning_rate": 2.6333076555657127e-06, "loss": 0.0188, "step": 138270 }, { "epoch": 0.5769583830561374, "grad_norm": 0.7667927525242241, "learning_rate": 2.6332600448171057e-06, "loss": 0.0228, "step": 138275 }, { "epoch": 0.5769792457711277, "grad_norm": 0.5900161535983224, "learning_rate": 2.6332124366508417e-06, "loss": 0.0278, "step": 138280 }, { "epoch": 0.5770001084861179, "grad_norm": 0.6259272405317197, "learning_rate": 2.6331648310666874e-06, "loss": 0.025, "step": 138285 }, { "epoch": 0.5770209712011082, "grad_norm": 0.5149862111634911, "learning_rate": 2.63311722806441e-06, "loss": 0.0156, "step": 138290 }, { "epoch": 0.5770418339160985, "grad_norm": 0.8123954736529593, "learning_rate": 2.6330696276437754e-06, "loss": 0.0216, "step": 138295 }, { "epoch": 0.5770626966310888, "grad_norm": 0.8184212501848769, "learning_rate": 2.633022029804551e-06, "loss": 0.0187, "step": 138300 }, { "epoch": 0.577083559346079, "grad_norm": 1.254748677788235, "learning_rate": 2.6329744345465025e-06, "loss": 0.0248, "step": 138305 }, { "epoch": 0.5771044220610694, "grad_norm": 0.4635426537635943, "learning_rate": 2.6329268418693972e-06, "loss": 0.0221, "step": 138310 }, { "epoch": 0.5771252847760596, "grad_norm": 0.894818046751027, "learning_rate": 2.632879251773002e-06, "loss": 0.0297, "step": 138315 }, { "epoch": 0.5771461474910499, "grad_norm": 0.6040825172683965, "learning_rate": 2.6328316642570835e-06, "loss": 0.0163, "step": 138320 }, { "epoch": 0.5771670102060402, "grad_norm": 0.4562541463044235, "learning_rate": 2.6327840793214083e-06, "loss": 0.0177, "step": 138325 }, { "epoch": 0.5771878729210305, "grad_norm": 0.743911286385022, "learning_rate": 2.6327364969657434e-06, "loss": 0.0266, "step": 138330 }, { "epoch": 0.5772087356360207, "grad_norm": 0.5262089834621977, "learning_rate": 2.632688917189855e-06, "loss": 0.0332, "step": 138335 }, { "epoch": 0.577229598351011, "grad_norm": 0.7358172973325393, "learning_rate": 2.6326413399935113e-06, "loss": 0.0249, "step": 138340 }, { "epoch": 0.5772504610660013, "grad_norm": 0.8464336169750383, "learning_rate": 2.6325937653764783e-06, "loss": 0.0239, "step": 138345 }, { "epoch": 0.5772713237809916, "grad_norm": 0.41586140843470887, "learning_rate": 2.6325461933385234e-06, "loss": 0.0213, "step": 138350 }, { "epoch": 0.5772921864959818, "grad_norm": 0.7296808919237135, "learning_rate": 2.6324986238794137e-06, "loss": 0.0192, "step": 138355 }, { "epoch": 0.5773130492109722, "grad_norm": 0.8852858310691052, "learning_rate": 2.6324510569989154e-06, "loss": 0.0192, "step": 138360 }, { "epoch": 0.5773339119259624, "grad_norm": 1.438177829226331, "learning_rate": 2.6324034926967964e-06, "loss": 0.028, "step": 138365 }, { "epoch": 0.5773547746409526, "grad_norm": 1.0321404708387827, "learning_rate": 2.632355930972823e-06, "loss": 0.0264, "step": 138370 }, { "epoch": 0.577375637355943, "grad_norm": 0.9756980553846821, "learning_rate": 2.6323083718267628e-06, "loss": 0.0237, "step": 138375 }, { "epoch": 0.5773965000709332, "grad_norm": 1.1981530529309974, "learning_rate": 2.632260815258383e-06, "loss": 0.0285, "step": 138380 }, { "epoch": 0.5774173627859235, "grad_norm": 0.5817982500479791, "learning_rate": 2.632213261267451e-06, "loss": 0.026, "step": 138385 }, { "epoch": 0.5774382255009138, "grad_norm": 0.7410696761701779, "learning_rate": 2.6321657098537334e-06, "loss": 0.0242, "step": 138390 }, { "epoch": 0.5774590882159041, "grad_norm": 0.3332550536842306, "learning_rate": 2.6321181610169977e-06, "loss": 0.0176, "step": 138395 }, { "epoch": 0.5774799509308943, "grad_norm": 0.5070080223853464, "learning_rate": 2.632070614757011e-06, "loss": 0.023, "step": 138400 }, { "epoch": 0.5775008136458846, "grad_norm": 0.9434517439432828, "learning_rate": 2.6320230710735406e-06, "loss": 0.0248, "step": 138405 }, { "epoch": 0.5775216763608749, "grad_norm": 0.4028217549502984, "learning_rate": 2.631975529966354e-06, "loss": 0.0163, "step": 138410 }, { "epoch": 0.5775425390758652, "grad_norm": 0.7684003129135311, "learning_rate": 2.6319279914352184e-06, "loss": 0.0238, "step": 138415 }, { "epoch": 0.5775634017908554, "grad_norm": 0.6938955976865013, "learning_rate": 2.631880455479901e-06, "loss": 0.0212, "step": 138420 }, { "epoch": 0.5775842645058458, "grad_norm": 0.5540037074294002, "learning_rate": 2.6318329221001694e-06, "loss": 0.0191, "step": 138425 }, { "epoch": 0.577605127220836, "grad_norm": 0.5025622257594455, "learning_rate": 2.6317853912957915e-06, "loss": 0.0294, "step": 138430 }, { "epoch": 0.5776259899358263, "grad_norm": 0.6852450057121253, "learning_rate": 2.6317378630665335e-06, "loss": 0.0165, "step": 138435 }, { "epoch": 0.5776468526508166, "grad_norm": 0.762055719283001, "learning_rate": 2.631690337412164e-06, "loss": 0.0257, "step": 138440 }, { "epoch": 0.5776677153658069, "grad_norm": 0.6064803705299243, "learning_rate": 2.6316428143324494e-06, "loss": 0.0235, "step": 138445 }, { "epoch": 0.5776885780807971, "grad_norm": 0.6289887875588572, "learning_rate": 2.6315952938271595e-06, "loss": 0.0189, "step": 138450 }, { "epoch": 0.5777094407957875, "grad_norm": 1.008969397417806, "learning_rate": 2.631547775896059e-06, "loss": 0.0266, "step": 138455 }, { "epoch": 0.5777303035107777, "grad_norm": 0.6642473710051325, "learning_rate": 2.6315002605389173e-06, "loss": 0.0247, "step": 138460 }, { "epoch": 0.5777511662257679, "grad_norm": 0.2508766758795419, "learning_rate": 2.6314527477555016e-06, "loss": 0.0158, "step": 138465 }, { "epoch": 0.5777720289407582, "grad_norm": 0.7328483929054807, "learning_rate": 2.63140523754558e-06, "loss": 0.0226, "step": 138470 }, { "epoch": 0.5777928916557485, "grad_norm": 0.5155762910899265, "learning_rate": 2.6313577299089195e-06, "loss": 0.0241, "step": 138475 }, { "epoch": 0.5778137543707388, "grad_norm": 0.9842401409814309, "learning_rate": 2.6313102248452876e-06, "loss": 0.0215, "step": 138480 }, { "epoch": 0.577834617085729, "grad_norm": 0.3261236352979012, "learning_rate": 2.631262722354453e-06, "loss": 0.0159, "step": 138485 }, { "epoch": 0.5778554798007194, "grad_norm": 0.9594807477055645, "learning_rate": 2.6312152224361824e-06, "loss": 0.0218, "step": 138490 }, { "epoch": 0.5778763425157096, "grad_norm": 0.8059943599861001, "learning_rate": 2.6311677250902446e-06, "loss": 0.0232, "step": 138495 }, { "epoch": 0.5778972052306999, "grad_norm": 0.9835724894407487, "learning_rate": 2.6311202303164075e-06, "loss": 0.0264, "step": 138500 }, { "epoch": 0.5779180679456902, "grad_norm": 0.49309659641310366, "learning_rate": 2.631072738114438e-06, "loss": 0.0214, "step": 138505 }, { "epoch": 0.5779389306606805, "grad_norm": 0.5109932025782303, "learning_rate": 2.6310252484841043e-06, "loss": 0.0299, "step": 138510 }, { "epoch": 0.5779597933756707, "grad_norm": 0.4156484078621282, "learning_rate": 2.6309777614251745e-06, "loss": 0.0201, "step": 138515 }, { "epoch": 0.577980656090661, "grad_norm": 0.5212040188126219, "learning_rate": 2.6309302769374175e-06, "loss": 0.0247, "step": 138520 }, { "epoch": 0.5780015188056513, "grad_norm": 0.4381715600358539, "learning_rate": 2.630882795020599e-06, "loss": 0.0182, "step": 138525 }, { "epoch": 0.5780223815206416, "grad_norm": 0.546870316040154, "learning_rate": 2.630835315674489e-06, "loss": 0.02, "step": 138530 }, { "epoch": 0.5780432442356318, "grad_norm": 1.2297900236521209, "learning_rate": 2.630787838898855e-06, "loss": 0.0223, "step": 138535 }, { "epoch": 0.5780641069506222, "grad_norm": 0.5144597264685202, "learning_rate": 2.630740364693465e-06, "loss": 0.0195, "step": 138540 }, { "epoch": 0.5780849696656124, "grad_norm": 0.8123251415377427, "learning_rate": 2.630692893058087e-06, "loss": 0.0233, "step": 138545 }, { "epoch": 0.5781058323806026, "grad_norm": 0.7620723737004299, "learning_rate": 2.6306454239924897e-06, "loss": 0.0264, "step": 138550 }, { "epoch": 0.578126695095593, "grad_norm": 0.7930567773221878, "learning_rate": 2.6305979574964396e-06, "loss": 0.0194, "step": 138555 }, { "epoch": 0.5781475578105832, "grad_norm": 0.4684245510681217, "learning_rate": 2.630550493569707e-06, "loss": 0.0254, "step": 138560 }, { "epoch": 0.5781684205255735, "grad_norm": 0.6810844793443447, "learning_rate": 2.630503032212059e-06, "loss": 0.0227, "step": 138565 }, { "epoch": 0.5781892832405638, "grad_norm": 0.5189467496024384, "learning_rate": 2.6304555734232634e-06, "loss": 0.0319, "step": 138570 }, { "epoch": 0.5782101459555541, "grad_norm": 0.7089502025259206, "learning_rate": 2.6304081172030897e-06, "loss": 0.018, "step": 138575 }, { "epoch": 0.5782310086705443, "grad_norm": 0.5802924283487598, "learning_rate": 2.6303606635513053e-06, "loss": 0.0195, "step": 138580 }, { "epoch": 0.5782518713855346, "grad_norm": 0.6439797291747034, "learning_rate": 2.6303132124676788e-06, "loss": 0.0249, "step": 138585 }, { "epoch": 0.5782727341005249, "grad_norm": 0.7494888350056107, "learning_rate": 2.630265763951979e-06, "loss": 0.0236, "step": 138590 }, { "epoch": 0.5782935968155152, "grad_norm": 0.9076065562880323, "learning_rate": 2.630218318003974e-06, "loss": 0.0213, "step": 138595 }, { "epoch": 0.5783144595305054, "grad_norm": 1.0114940671500374, "learning_rate": 2.6301708746234318e-06, "loss": 0.023, "step": 138600 }, { "epoch": 0.5783353222454958, "grad_norm": 0.31368016232493856, "learning_rate": 2.630123433810121e-06, "loss": 0.0266, "step": 138605 }, { "epoch": 0.578356184960486, "grad_norm": 0.8334112400887956, "learning_rate": 2.6300759955638104e-06, "loss": 0.0255, "step": 138610 }, { "epoch": 0.5783770476754763, "grad_norm": 0.8531746575607955, "learning_rate": 2.6300285598842683e-06, "loss": 0.0273, "step": 138615 }, { "epoch": 0.5783979103904666, "grad_norm": 0.30100757343678697, "learning_rate": 2.6299811267712635e-06, "loss": 0.0183, "step": 138620 }, { "epoch": 0.5784187731054569, "grad_norm": 0.2935808217794845, "learning_rate": 2.6299336962245635e-06, "loss": 0.0223, "step": 138625 }, { "epoch": 0.5784396358204471, "grad_norm": 0.7549784221658581, "learning_rate": 2.6298862682439393e-06, "loss": 0.0247, "step": 138630 }, { "epoch": 0.5784604985354375, "grad_norm": 0.7436904034680833, "learning_rate": 2.6298388428291566e-06, "loss": 0.0188, "step": 138635 }, { "epoch": 0.5784813612504277, "grad_norm": 0.9067957652844106, "learning_rate": 2.6297914199799863e-06, "loss": 0.0266, "step": 138640 }, { "epoch": 0.578502223965418, "grad_norm": 0.7567111503419958, "learning_rate": 2.6297439996961955e-06, "loss": 0.0255, "step": 138645 }, { "epoch": 0.5785230866804082, "grad_norm": 0.9839026159413977, "learning_rate": 2.6296965819775537e-06, "loss": 0.0225, "step": 138650 }, { "epoch": 0.5785439493953985, "grad_norm": 0.7629630737737769, "learning_rate": 2.6296491668238306e-06, "loss": 0.0251, "step": 138655 }, { "epoch": 0.5785648121103888, "grad_norm": 0.3615382738155905, "learning_rate": 2.629601754234793e-06, "loss": 0.0305, "step": 138660 }, { "epoch": 0.578585674825379, "grad_norm": 0.4965976102520798, "learning_rate": 2.6295543442102113e-06, "loss": 0.0258, "step": 138665 }, { "epoch": 0.5786065375403694, "grad_norm": 0.1947308429875085, "learning_rate": 2.6295069367498526e-06, "loss": 0.0281, "step": 138670 }, { "epoch": 0.5786274002553596, "grad_norm": 0.632110009965958, "learning_rate": 2.6294595318534884e-06, "loss": 0.0257, "step": 138675 }, { "epoch": 0.5786482629703499, "grad_norm": 0.6242096418939984, "learning_rate": 2.6294121295208846e-06, "loss": 0.0188, "step": 138680 }, { "epoch": 0.5786691256853402, "grad_norm": 0.5050139292910573, "learning_rate": 2.629364729751812e-06, "loss": 0.0244, "step": 138685 }, { "epoch": 0.5786899884003305, "grad_norm": 0.47327594262966954, "learning_rate": 2.629317332546039e-06, "loss": 0.0195, "step": 138690 }, { "epoch": 0.5787108511153207, "grad_norm": 0.37056324663634316, "learning_rate": 2.6292699379033353e-06, "loss": 0.0195, "step": 138695 }, { "epoch": 0.578731713830311, "grad_norm": 0.7199277227862576, "learning_rate": 2.6292225458234686e-06, "loss": 0.023, "step": 138700 }, { "epoch": 0.5787525765453013, "grad_norm": 0.44558808702392233, "learning_rate": 2.629175156306209e-06, "loss": 0.0192, "step": 138705 }, { "epoch": 0.5787734392602916, "grad_norm": 0.7656772083807141, "learning_rate": 2.629127769351325e-06, "loss": 0.0182, "step": 138710 }, { "epoch": 0.5787943019752818, "grad_norm": 0.7100653950718813, "learning_rate": 2.6290803849585863e-06, "loss": 0.0276, "step": 138715 }, { "epoch": 0.5788151646902722, "grad_norm": 0.9041800342608425, "learning_rate": 2.629033003127761e-06, "loss": 0.0237, "step": 138720 }, { "epoch": 0.5788360274052624, "grad_norm": 0.5564593137671606, "learning_rate": 2.6289856238586193e-06, "loss": 0.0182, "step": 138725 }, { "epoch": 0.5788568901202527, "grad_norm": 1.0572333408420482, "learning_rate": 2.6289382471509293e-06, "loss": 0.0294, "step": 138730 }, { "epoch": 0.578877752835243, "grad_norm": 0.47918429355451414, "learning_rate": 2.6288908730044615e-06, "loss": 0.021, "step": 138735 }, { "epoch": 0.5788986155502333, "grad_norm": 0.9900086139604555, "learning_rate": 2.6288435014189843e-06, "loss": 0.0353, "step": 138740 }, { "epoch": 0.5789194782652235, "grad_norm": 0.6066364298297185, "learning_rate": 2.6287961323942672e-06, "loss": 0.0166, "step": 138745 }, { "epoch": 0.5789403409802139, "grad_norm": 0.45405006581407137, "learning_rate": 2.62874876593008e-06, "loss": 0.0203, "step": 138750 }, { "epoch": 0.5789612036952041, "grad_norm": 0.5393542500253473, "learning_rate": 2.6287014020261904e-06, "loss": 0.024, "step": 138755 }, { "epoch": 0.5789820664101943, "grad_norm": 1.0735733867189292, "learning_rate": 2.628654040682369e-06, "loss": 0.0296, "step": 138760 }, { "epoch": 0.5790029291251846, "grad_norm": 0.5471034051201135, "learning_rate": 2.6286066818983853e-06, "loss": 0.02, "step": 138765 }, { "epoch": 0.5790237918401749, "grad_norm": 1.0285123944476062, "learning_rate": 2.628559325674009e-06, "loss": 0.0296, "step": 138770 }, { "epoch": 0.5790446545551652, "grad_norm": 1.4379057899448489, "learning_rate": 2.628511972009008e-06, "loss": 0.0284, "step": 138775 }, { "epoch": 0.5790655172701554, "grad_norm": 0.9231634415899452, "learning_rate": 2.628464620903153e-06, "loss": 0.0287, "step": 138780 }, { "epoch": 0.5790863799851458, "grad_norm": 1.1024305103907024, "learning_rate": 2.6284172723562134e-06, "loss": 0.0234, "step": 138785 }, { "epoch": 0.579107242700136, "grad_norm": 0.5669060452099753, "learning_rate": 2.6283699263679584e-06, "loss": 0.0288, "step": 138790 }, { "epoch": 0.5791281054151263, "grad_norm": 0.3812568208173258, "learning_rate": 2.6283225829381583e-06, "loss": 0.0207, "step": 138795 }, { "epoch": 0.5791489681301166, "grad_norm": 0.9936020240330092, "learning_rate": 2.6282752420665815e-06, "loss": 0.0262, "step": 138800 }, { "epoch": 0.5791698308451069, "grad_norm": 0.5922635731372632, "learning_rate": 2.6282279037529977e-06, "loss": 0.0194, "step": 138805 }, { "epoch": 0.5791906935600971, "grad_norm": 0.4315529825913056, "learning_rate": 2.6281805679971784e-06, "loss": 0.0212, "step": 138810 }, { "epoch": 0.5792115562750875, "grad_norm": 2.1983298263753515, "learning_rate": 2.628133234798891e-06, "loss": 0.0321, "step": 138815 }, { "epoch": 0.5792324189900777, "grad_norm": 0.4474600183315363, "learning_rate": 2.6280859041579064e-06, "loss": 0.0262, "step": 138820 }, { "epoch": 0.579253281705068, "grad_norm": 0.3186749823582148, "learning_rate": 2.628038576073994e-06, "loss": 0.0281, "step": 138825 }, { "epoch": 0.5792741444200582, "grad_norm": 0.595292649274524, "learning_rate": 2.627991250546924e-06, "loss": 0.0246, "step": 138830 }, { "epoch": 0.5792950071350486, "grad_norm": 0.5428900736422355, "learning_rate": 2.6279439275764657e-06, "loss": 0.024, "step": 138835 }, { "epoch": 0.5793158698500388, "grad_norm": 0.8149603989276654, "learning_rate": 2.627896607162389e-06, "loss": 0.0233, "step": 138840 }, { "epoch": 0.579336732565029, "grad_norm": 0.6385219535189394, "learning_rate": 2.627849289304464e-06, "loss": 0.0272, "step": 138845 }, { "epoch": 0.5793575952800194, "grad_norm": 0.7556582333825521, "learning_rate": 2.62780197400246e-06, "loss": 0.0242, "step": 138850 }, { "epoch": 0.5793784579950096, "grad_norm": 0.5084401338058422, "learning_rate": 2.6277546612561477e-06, "loss": 0.0241, "step": 138855 }, { "epoch": 0.5793993207099999, "grad_norm": 1.0704735994666859, "learning_rate": 2.627707351065296e-06, "loss": 0.0282, "step": 138860 }, { "epoch": 0.5794201834249902, "grad_norm": 0.4300627962059089, "learning_rate": 2.6276600434296764e-06, "loss": 0.0234, "step": 138865 }, { "epoch": 0.5794410461399805, "grad_norm": 0.8863915596867784, "learning_rate": 2.6276127383490575e-06, "loss": 0.0261, "step": 138870 }, { "epoch": 0.5794619088549707, "grad_norm": 0.4621135319425654, "learning_rate": 2.62756543582321e-06, "loss": 0.0208, "step": 138875 }, { "epoch": 0.579482771569961, "grad_norm": 0.6283383371381545, "learning_rate": 2.6275181358519035e-06, "loss": 0.0265, "step": 138880 }, { "epoch": 0.5795036342849513, "grad_norm": 0.56555142892937, "learning_rate": 2.6274708384349084e-06, "loss": 0.0243, "step": 138885 }, { "epoch": 0.5795244969999416, "grad_norm": 0.5379619768296054, "learning_rate": 2.6274235435719946e-06, "loss": 0.0238, "step": 138890 }, { "epoch": 0.5795453597149318, "grad_norm": 0.45827762234662206, "learning_rate": 2.627376251262933e-06, "loss": 0.0197, "step": 138895 }, { "epoch": 0.5795662224299222, "grad_norm": 0.4281170490037387, "learning_rate": 2.6273289615074927e-06, "loss": 0.0197, "step": 138900 }, { "epoch": 0.5795870851449124, "grad_norm": 0.42911729009634597, "learning_rate": 2.6272816743054448e-06, "loss": 0.0323, "step": 138905 }, { "epoch": 0.5796079478599027, "grad_norm": 0.5118846325311179, "learning_rate": 2.6272343896565587e-06, "loss": 0.0183, "step": 138910 }, { "epoch": 0.579628810574893, "grad_norm": 0.6465645359901877, "learning_rate": 2.6271871075606053e-06, "loss": 0.0234, "step": 138915 }, { "epoch": 0.5796496732898833, "grad_norm": 0.539723621808654, "learning_rate": 2.6271398280173543e-06, "loss": 0.019, "step": 138920 }, { "epoch": 0.5796705360048735, "grad_norm": 0.6622970606106487, "learning_rate": 2.6270925510265765e-06, "loss": 0.018, "step": 138925 }, { "epoch": 0.5796913987198639, "grad_norm": 0.7301942962726884, "learning_rate": 2.6270452765880424e-06, "loss": 0.019, "step": 138930 }, { "epoch": 0.5797122614348541, "grad_norm": 0.6827130287839748, "learning_rate": 2.6269980047015215e-06, "loss": 0.021, "step": 138935 }, { "epoch": 0.5797331241498443, "grad_norm": 0.7132557578086207, "learning_rate": 2.626950735366785e-06, "loss": 0.0153, "step": 138940 }, { "epoch": 0.5797539868648346, "grad_norm": 0.5563327233573725, "learning_rate": 2.6269034685836032e-06, "loss": 0.023, "step": 138945 }, { "epoch": 0.579774849579825, "grad_norm": 0.7503189286923719, "learning_rate": 2.6268562043517467e-06, "loss": 0.0254, "step": 138950 }, { "epoch": 0.5797957122948152, "grad_norm": 0.5298213552099593, "learning_rate": 2.6268089426709852e-06, "loss": 0.0197, "step": 138955 }, { "epoch": 0.5798165750098054, "grad_norm": 1.353560956351358, "learning_rate": 2.6267616835410897e-06, "loss": 0.0191, "step": 138960 }, { "epoch": 0.5798374377247958, "grad_norm": 0.33396824920551094, "learning_rate": 2.626714426961831e-06, "loss": 0.016, "step": 138965 }, { "epoch": 0.579858300439786, "grad_norm": 0.6122140536717626, "learning_rate": 2.6266671729329794e-06, "loss": 0.0264, "step": 138970 }, { "epoch": 0.5798791631547763, "grad_norm": 0.9538276434573371, "learning_rate": 2.6266199214543054e-06, "loss": 0.0379, "step": 138975 }, { "epoch": 0.5799000258697666, "grad_norm": 0.5533012070490309, "learning_rate": 2.62657267252558e-06, "loss": 0.029, "step": 138980 }, { "epoch": 0.5799208885847569, "grad_norm": 0.8932922236803147, "learning_rate": 2.6265254261465734e-06, "loss": 0.0297, "step": 138985 }, { "epoch": 0.5799417512997471, "grad_norm": 0.5983485982272074, "learning_rate": 2.626478182317057e-06, "loss": 0.0224, "step": 138990 }, { "epoch": 0.5799626140147375, "grad_norm": 0.38006374965310397, "learning_rate": 2.6264309410368005e-06, "loss": 0.0219, "step": 138995 }, { "epoch": 0.5799834767297277, "grad_norm": 0.8916736384537722, "learning_rate": 2.626383702305576e-06, "loss": 0.029, "step": 139000 }, { "epoch": 0.580004339444718, "grad_norm": 0.6563900672657316, "learning_rate": 2.626336466123153e-06, "loss": 0.0206, "step": 139005 }, { "epoch": 0.5800252021597082, "grad_norm": 0.6330698546727798, "learning_rate": 2.626289232489303e-06, "loss": 0.0203, "step": 139010 }, { "epoch": 0.5800460648746986, "grad_norm": 1.155801976181992, "learning_rate": 2.6262420014037964e-06, "loss": 0.0351, "step": 139015 }, { "epoch": 0.5800669275896888, "grad_norm": 0.41003235365113266, "learning_rate": 2.6261947728664038e-06, "loss": 0.0235, "step": 139020 }, { "epoch": 0.580087790304679, "grad_norm": 0.45256727915457023, "learning_rate": 2.6261475468768977e-06, "loss": 0.0204, "step": 139025 }, { "epoch": 0.5801086530196694, "grad_norm": 0.660862508735728, "learning_rate": 2.626100323435047e-06, "loss": 0.0178, "step": 139030 }, { "epoch": 0.5801295157346597, "grad_norm": 0.3148222335430729, "learning_rate": 2.626053102540624e-06, "loss": 0.0224, "step": 139035 }, { "epoch": 0.5801503784496499, "grad_norm": 0.6332736165294948, "learning_rate": 2.626005884193399e-06, "loss": 0.0263, "step": 139040 }, { "epoch": 0.5801712411646403, "grad_norm": 0.5116629680316955, "learning_rate": 2.625958668393143e-06, "loss": 0.0245, "step": 139045 }, { "epoch": 0.5801921038796305, "grad_norm": 0.3897105643112478, "learning_rate": 2.6259114551396277e-06, "loss": 0.0223, "step": 139050 }, { "epoch": 0.5802129665946207, "grad_norm": 0.9188376926679607, "learning_rate": 2.6258642444326234e-06, "loss": 0.0308, "step": 139055 }, { "epoch": 0.580233829309611, "grad_norm": 0.7200904173889195, "learning_rate": 2.6258170362719017e-06, "loss": 0.0296, "step": 139060 }, { "epoch": 0.5802546920246013, "grad_norm": 0.2209228934221852, "learning_rate": 2.625769830657233e-06, "loss": 0.0168, "step": 139065 }, { "epoch": 0.5802755547395916, "grad_norm": 0.34897206147381876, "learning_rate": 2.6257226275883895e-06, "loss": 0.0239, "step": 139070 }, { "epoch": 0.5802964174545818, "grad_norm": 0.5321646806423108, "learning_rate": 2.625675427065142e-06, "loss": 0.0215, "step": 139075 }, { "epoch": 0.5803172801695722, "grad_norm": 0.8226986987797069, "learning_rate": 2.625628229087261e-06, "loss": 0.0253, "step": 139080 }, { "epoch": 0.5803381428845624, "grad_norm": 0.8916208816984117, "learning_rate": 2.625581033654519e-06, "loss": 0.0264, "step": 139085 }, { "epoch": 0.5803590055995527, "grad_norm": 0.9204502448432017, "learning_rate": 2.6255338407666864e-06, "loss": 0.0217, "step": 139090 }, { "epoch": 0.580379868314543, "grad_norm": 1.355133529900472, "learning_rate": 2.6254866504235344e-06, "loss": 0.027, "step": 139095 }, { "epoch": 0.5804007310295333, "grad_norm": 0.5145856227475346, "learning_rate": 2.6254394626248347e-06, "loss": 0.0262, "step": 139100 }, { "epoch": 0.5804215937445235, "grad_norm": 0.4285420249515473, "learning_rate": 2.625392277370358e-06, "loss": 0.0198, "step": 139105 }, { "epoch": 0.5804424564595139, "grad_norm": 0.8798304411969509, "learning_rate": 2.6253450946598768e-06, "loss": 0.0197, "step": 139110 }, { "epoch": 0.5804633191745041, "grad_norm": 0.630861553429231, "learning_rate": 2.6252979144931616e-06, "loss": 0.0244, "step": 139115 }, { "epoch": 0.5804841818894944, "grad_norm": 0.7023519153713156, "learning_rate": 2.6252507368699847e-06, "loss": 0.0315, "step": 139120 }, { "epoch": 0.5805050446044846, "grad_norm": 0.444287022297448, "learning_rate": 2.6252035617901165e-06, "loss": 0.0317, "step": 139125 }, { "epoch": 0.580525907319475, "grad_norm": 0.4022103536474505, "learning_rate": 2.6251563892533293e-06, "loss": 0.0272, "step": 139130 }, { "epoch": 0.5805467700344652, "grad_norm": 1.2883124417323164, "learning_rate": 2.6251092192593943e-06, "loss": 0.024, "step": 139135 }, { "epoch": 0.5805676327494554, "grad_norm": 0.9496109657024201, "learning_rate": 2.625062051808083e-06, "loss": 0.019, "step": 139140 }, { "epoch": 0.5805884954644458, "grad_norm": 0.59622254447073, "learning_rate": 2.6250148868991677e-06, "loss": 0.0247, "step": 139145 }, { "epoch": 0.580609358179436, "grad_norm": 0.3387814213270641, "learning_rate": 2.6249677245324183e-06, "loss": 0.0149, "step": 139150 }, { "epoch": 0.5806302208944263, "grad_norm": 0.5376629380561262, "learning_rate": 2.624920564707608e-06, "loss": 0.0186, "step": 139155 }, { "epoch": 0.5806510836094166, "grad_norm": 0.5108552481653299, "learning_rate": 2.624873407424508e-06, "loss": 0.0199, "step": 139160 }, { "epoch": 0.5806719463244069, "grad_norm": 0.7304153168041835, "learning_rate": 2.6248262526828904e-06, "loss": 0.0277, "step": 139165 }, { "epoch": 0.5806928090393971, "grad_norm": 0.2637972612761588, "learning_rate": 2.624779100482526e-06, "loss": 0.0171, "step": 139170 }, { "epoch": 0.5807136717543875, "grad_norm": 0.6466284131209504, "learning_rate": 2.6247319508231874e-06, "loss": 0.0255, "step": 139175 }, { "epoch": 0.5807345344693777, "grad_norm": 1.2495353748715854, "learning_rate": 2.624684803704646e-06, "loss": 0.0242, "step": 139180 }, { "epoch": 0.580755397184368, "grad_norm": 0.4292566761642876, "learning_rate": 2.624637659126673e-06, "loss": 0.0211, "step": 139185 }, { "epoch": 0.5807762598993582, "grad_norm": 0.3194895037350194, "learning_rate": 2.624590517089042e-06, "loss": 0.0308, "step": 139190 }, { "epoch": 0.5807971226143486, "grad_norm": 1.0374232562966086, "learning_rate": 2.624543377591523e-06, "loss": 0.0168, "step": 139195 }, { "epoch": 0.5808179853293388, "grad_norm": 0.6078663364090378, "learning_rate": 2.6244962406338893e-06, "loss": 0.0364, "step": 139200 }, { "epoch": 0.5808388480443291, "grad_norm": 0.5892092061455899, "learning_rate": 2.6244491062159115e-06, "loss": 0.0244, "step": 139205 }, { "epoch": 0.5808597107593194, "grad_norm": 0.7065872513898763, "learning_rate": 2.6244019743373626e-06, "loss": 0.0198, "step": 139210 }, { "epoch": 0.5808805734743097, "grad_norm": 1.1283676271161491, "learning_rate": 2.6243548449980144e-06, "loss": 0.0295, "step": 139215 }, { "epoch": 0.5809014361892999, "grad_norm": 2.2848392225832144, "learning_rate": 2.624307718197639e-06, "loss": 0.0384, "step": 139220 }, { "epoch": 0.5809222989042903, "grad_norm": 1.2441571261467785, "learning_rate": 2.624260593936007e-06, "loss": 0.0307, "step": 139225 }, { "epoch": 0.5809431616192805, "grad_norm": 0.9472000340683776, "learning_rate": 2.6242134722128932e-06, "loss": 0.0193, "step": 139230 }, { "epoch": 0.5809640243342707, "grad_norm": 1.1109990230750497, "learning_rate": 2.624166353028067e-06, "loss": 0.0249, "step": 139235 }, { "epoch": 0.580984887049261, "grad_norm": 0.7366234221375005, "learning_rate": 2.6241192363813015e-06, "loss": 0.0235, "step": 139240 }, { "epoch": 0.5810057497642513, "grad_norm": 2.4624604160215116, "learning_rate": 2.6240721222723697e-06, "loss": 0.0247, "step": 139245 }, { "epoch": 0.5810266124792416, "grad_norm": 0.6432107929594865, "learning_rate": 2.6240250107010427e-06, "loss": 0.0281, "step": 139250 }, { "epoch": 0.5810474751942318, "grad_norm": 0.56536375031117, "learning_rate": 2.623977901667094e-06, "loss": 0.0185, "step": 139255 }, { "epoch": 0.5810683379092222, "grad_norm": 0.8545959822223878, "learning_rate": 2.6239307951702943e-06, "loss": 0.0219, "step": 139260 }, { "epoch": 0.5810892006242124, "grad_norm": 0.5681979815470873, "learning_rate": 2.6238836912104163e-06, "loss": 0.03, "step": 139265 }, { "epoch": 0.5811100633392027, "grad_norm": 0.6162046401481927, "learning_rate": 2.623836589787233e-06, "loss": 0.0239, "step": 139270 }, { "epoch": 0.581130926054193, "grad_norm": 0.4053330811046207, "learning_rate": 2.6237894909005163e-06, "loss": 0.0213, "step": 139275 }, { "epoch": 0.5811517887691833, "grad_norm": 1.6154758915160918, "learning_rate": 2.6237423945500384e-06, "loss": 0.0245, "step": 139280 }, { "epoch": 0.5811726514841735, "grad_norm": 0.6359619943704821, "learning_rate": 2.623695300735572e-06, "loss": 0.0181, "step": 139285 }, { "epoch": 0.5811935141991639, "grad_norm": 0.7742279770877771, "learning_rate": 2.6236482094568888e-06, "loss": 0.0172, "step": 139290 }, { "epoch": 0.5812143769141541, "grad_norm": 0.4413689062982124, "learning_rate": 2.6236011207137625e-06, "loss": 0.0232, "step": 139295 }, { "epoch": 0.5812352396291444, "grad_norm": 0.5902426508198407, "learning_rate": 2.623554034505964e-06, "loss": 0.0182, "step": 139300 }, { "epoch": 0.5812561023441346, "grad_norm": 0.6901937020848832, "learning_rate": 2.6235069508332665e-06, "loss": 0.0166, "step": 139305 }, { "epoch": 0.581276965059125, "grad_norm": 0.7354937055738776, "learning_rate": 2.6234598696954432e-06, "loss": 0.0239, "step": 139310 }, { "epoch": 0.5812978277741152, "grad_norm": 0.8038403324794925, "learning_rate": 2.6234127910922665e-06, "loss": 0.0249, "step": 139315 }, { "epoch": 0.5813186904891054, "grad_norm": 1.1282159606193771, "learning_rate": 2.623365715023508e-06, "loss": 0.0167, "step": 139320 }, { "epoch": 0.5813395532040958, "grad_norm": 0.45385871933023475, "learning_rate": 2.623318641488941e-06, "loss": 0.0158, "step": 139325 }, { "epoch": 0.581360415919086, "grad_norm": 1.1364324488046758, "learning_rate": 2.623271570488338e-06, "loss": 0.0229, "step": 139330 }, { "epoch": 0.5813812786340763, "grad_norm": 0.542064467314881, "learning_rate": 2.6232245020214717e-06, "loss": 0.0211, "step": 139335 }, { "epoch": 0.5814021413490666, "grad_norm": 0.7369729979414269, "learning_rate": 2.6231774360881156e-06, "loss": 0.0215, "step": 139340 }, { "epoch": 0.5814230040640569, "grad_norm": 0.6497396646091187, "learning_rate": 2.623130372688041e-06, "loss": 0.0175, "step": 139345 }, { "epoch": 0.5814438667790471, "grad_norm": 0.5480970484469105, "learning_rate": 2.6230833118210206e-06, "loss": 0.0213, "step": 139350 }, { "epoch": 0.5814647294940375, "grad_norm": 0.8976504756422344, "learning_rate": 2.6230362534868286e-06, "loss": 0.0399, "step": 139355 }, { "epoch": 0.5814855922090277, "grad_norm": 0.7165725739305568, "learning_rate": 2.622989197685237e-06, "loss": 0.0202, "step": 139360 }, { "epoch": 0.581506454924018, "grad_norm": 0.4857671658943946, "learning_rate": 2.6229421444160193e-06, "loss": 0.0224, "step": 139365 }, { "epoch": 0.5815273176390082, "grad_norm": 0.7668883425024512, "learning_rate": 2.622895093678947e-06, "loss": 0.0255, "step": 139370 }, { "epoch": 0.5815481803539986, "grad_norm": 0.5410923471646399, "learning_rate": 2.622848045473794e-06, "loss": 0.0293, "step": 139375 }, { "epoch": 0.5815690430689888, "grad_norm": 2.1504737867321766, "learning_rate": 2.6228009998003335e-06, "loss": 0.0231, "step": 139380 }, { "epoch": 0.5815899057839791, "grad_norm": 0.6715371543386289, "learning_rate": 2.6227539566583377e-06, "loss": 0.0279, "step": 139385 }, { "epoch": 0.5816107684989694, "grad_norm": 0.5047870685065438, "learning_rate": 2.6227069160475795e-06, "loss": 0.0218, "step": 139390 }, { "epoch": 0.5816316312139597, "grad_norm": 1.0957619857386283, "learning_rate": 2.6226598779678325e-06, "loss": 0.0221, "step": 139395 }, { "epoch": 0.5816524939289499, "grad_norm": 0.8929555076731189, "learning_rate": 2.6226128424188695e-06, "loss": 0.0217, "step": 139400 }, { "epoch": 0.5816733566439403, "grad_norm": 1.0646385333748765, "learning_rate": 2.6225658094004637e-06, "loss": 0.0191, "step": 139405 }, { "epoch": 0.5816942193589305, "grad_norm": 0.9809750813533455, "learning_rate": 2.6225187789123883e-06, "loss": 0.0231, "step": 139410 }, { "epoch": 0.5817150820739208, "grad_norm": 1.0263644888476648, "learning_rate": 2.6224717509544157e-06, "loss": 0.0297, "step": 139415 }, { "epoch": 0.581735944788911, "grad_norm": 0.5972105477988635, "learning_rate": 2.6224247255263203e-06, "loss": 0.0306, "step": 139420 }, { "epoch": 0.5817568075039014, "grad_norm": 0.840841595121767, "learning_rate": 2.6223777026278736e-06, "loss": 0.0208, "step": 139425 }, { "epoch": 0.5817776702188916, "grad_norm": 0.5558179583617903, "learning_rate": 2.6223306822588503e-06, "loss": 0.018, "step": 139430 }, { "epoch": 0.5817985329338818, "grad_norm": 0.520439504945039, "learning_rate": 2.622283664419023e-06, "loss": 0.0253, "step": 139435 }, { "epoch": 0.5818193956488722, "grad_norm": 0.8907278857455987, "learning_rate": 2.622236649108165e-06, "loss": 0.0279, "step": 139440 }, { "epoch": 0.5818402583638624, "grad_norm": 0.3776962551567648, "learning_rate": 2.6221896363260503e-06, "loss": 0.0199, "step": 139445 }, { "epoch": 0.5818611210788527, "grad_norm": 0.4451155208070799, "learning_rate": 2.6221426260724506e-06, "loss": 0.021, "step": 139450 }, { "epoch": 0.581881983793843, "grad_norm": 0.7162657313704933, "learning_rate": 2.6220956183471406e-06, "loss": 0.0275, "step": 139455 }, { "epoch": 0.5819028465088333, "grad_norm": 0.6971244335804204, "learning_rate": 2.622048613149894e-06, "loss": 0.0159, "step": 139460 }, { "epoch": 0.5819237092238235, "grad_norm": 0.4529368812976673, "learning_rate": 2.622001610480483e-06, "loss": 0.023, "step": 139465 }, { "epoch": 0.5819445719388139, "grad_norm": 1.1113107674194342, "learning_rate": 2.621954610338682e-06, "loss": 0.0282, "step": 139470 }, { "epoch": 0.5819654346538041, "grad_norm": 0.9089307671161246, "learning_rate": 2.6219076127242626e-06, "loss": 0.0233, "step": 139475 }, { "epoch": 0.5819862973687944, "grad_norm": 0.7459678383466065, "learning_rate": 2.621860617637001e-06, "loss": 0.0191, "step": 139480 }, { "epoch": 0.5820071600837846, "grad_norm": 0.8074558330733979, "learning_rate": 2.6218136250766694e-06, "loss": 0.0183, "step": 139485 }, { "epoch": 0.582028022798775, "grad_norm": 0.6620660708023169, "learning_rate": 2.621766635043041e-06, "loss": 0.0299, "step": 139490 }, { "epoch": 0.5820488855137652, "grad_norm": 0.44388130718438085, "learning_rate": 2.6217196475358903e-06, "loss": 0.0187, "step": 139495 }, { "epoch": 0.5820697482287555, "grad_norm": 1.387290147605024, "learning_rate": 2.62167266255499e-06, "loss": 0.018, "step": 139500 }, { "epoch": 0.5820906109437458, "grad_norm": 0.5365251101998153, "learning_rate": 2.6216256801001142e-06, "loss": 0.0158, "step": 139505 }, { "epoch": 0.5821114736587361, "grad_norm": 0.37571231969130464, "learning_rate": 2.621578700171036e-06, "loss": 0.0189, "step": 139510 }, { "epoch": 0.5821323363737263, "grad_norm": 0.49993287427421024, "learning_rate": 2.6215317227675307e-06, "loss": 0.0171, "step": 139515 }, { "epoch": 0.5821531990887167, "grad_norm": 0.6329230784031012, "learning_rate": 2.6214847478893707e-06, "loss": 0.0228, "step": 139520 }, { "epoch": 0.5821740618037069, "grad_norm": 0.9038141177595697, "learning_rate": 2.6214377755363298e-06, "loss": 0.0245, "step": 139525 }, { "epoch": 0.5821949245186971, "grad_norm": 0.5694938149605163, "learning_rate": 2.621390805708182e-06, "loss": 0.028, "step": 139530 }, { "epoch": 0.5822157872336875, "grad_norm": 0.7258278785287995, "learning_rate": 2.6213438384047003e-06, "loss": 0.0256, "step": 139535 }, { "epoch": 0.5822366499486777, "grad_norm": 0.7312417734657449, "learning_rate": 2.6212968736256607e-06, "loss": 0.0148, "step": 139540 }, { "epoch": 0.582257512663668, "grad_norm": 0.8245362503958947, "learning_rate": 2.6212499113708347e-06, "loss": 0.0208, "step": 139545 }, { "epoch": 0.5822783753786582, "grad_norm": 0.3898776997075219, "learning_rate": 2.621202951639997e-06, "loss": 0.0265, "step": 139550 }, { "epoch": 0.5822992380936486, "grad_norm": 0.959683616448728, "learning_rate": 2.621155994432922e-06, "loss": 0.0276, "step": 139555 }, { "epoch": 0.5823201008086388, "grad_norm": 0.744269792442523, "learning_rate": 2.6211090397493837e-06, "loss": 0.0309, "step": 139560 }, { "epoch": 0.5823409635236291, "grad_norm": 0.4668860648095246, "learning_rate": 2.621062087589155e-06, "loss": 0.016, "step": 139565 }, { "epoch": 0.5823618262386194, "grad_norm": 0.989552311591139, "learning_rate": 2.621015137952011e-06, "loss": 0.0216, "step": 139570 }, { "epoch": 0.5823826889536097, "grad_norm": 1.0460556894462396, "learning_rate": 2.620968190837726e-06, "loss": 0.0266, "step": 139575 }, { "epoch": 0.5824035516685999, "grad_norm": 1.1494576545515949, "learning_rate": 2.6209212462460728e-06, "loss": 0.0216, "step": 139580 }, { "epoch": 0.5824244143835903, "grad_norm": 0.6425910038439537, "learning_rate": 2.620874304176826e-06, "loss": 0.0228, "step": 139585 }, { "epoch": 0.5824452770985805, "grad_norm": 0.8961052948193029, "learning_rate": 2.62082736462976e-06, "loss": 0.0231, "step": 139590 }, { "epoch": 0.5824661398135708, "grad_norm": 0.6481283709307432, "learning_rate": 2.6207804276046483e-06, "loss": 0.0168, "step": 139595 }, { "epoch": 0.582487002528561, "grad_norm": 0.8786926646754396, "learning_rate": 2.620733493101266e-06, "loss": 0.028, "step": 139600 }, { "epoch": 0.5825078652435514, "grad_norm": 0.6585468741323517, "learning_rate": 2.620686561119387e-06, "loss": 0.0199, "step": 139605 }, { "epoch": 0.5825287279585416, "grad_norm": 0.8445021826322509, "learning_rate": 2.6206396316587856e-06, "loss": 0.0259, "step": 139610 }, { "epoch": 0.5825495906735318, "grad_norm": 0.47242765635143336, "learning_rate": 2.6205927047192353e-06, "loss": 0.033, "step": 139615 }, { "epoch": 0.5825704533885222, "grad_norm": 1.0108862667202168, "learning_rate": 2.6205457803005114e-06, "loss": 0.0252, "step": 139620 }, { "epoch": 0.5825913161035124, "grad_norm": 0.5582081050247826, "learning_rate": 2.6204988584023873e-06, "loss": 0.0219, "step": 139625 }, { "epoch": 0.5826121788185027, "grad_norm": 1.0603064686430597, "learning_rate": 2.620451939024638e-06, "loss": 0.056, "step": 139630 }, { "epoch": 0.582633041533493, "grad_norm": 0.6148850615613186, "learning_rate": 2.6204050221670376e-06, "loss": 0.019, "step": 139635 }, { "epoch": 0.5826539042484833, "grad_norm": 1.0221294485784973, "learning_rate": 2.620358107829361e-06, "loss": 0.0316, "step": 139640 }, { "epoch": 0.5826747669634735, "grad_norm": 1.368006223847698, "learning_rate": 2.620311196011382e-06, "loss": 0.0196, "step": 139645 }, { "epoch": 0.5826956296784639, "grad_norm": 0.6344359919652952, "learning_rate": 2.6202642867128748e-06, "loss": 0.0268, "step": 139650 }, { "epoch": 0.5827164923934541, "grad_norm": 1.7810197473850244, "learning_rate": 2.620217379933615e-06, "loss": 0.0311, "step": 139655 }, { "epoch": 0.5827373551084444, "grad_norm": 0.6521796751921528, "learning_rate": 2.6201704756733764e-06, "loss": 0.0209, "step": 139660 }, { "epoch": 0.5827582178234346, "grad_norm": 0.36236217964861966, "learning_rate": 2.6201235739319336e-06, "loss": 0.02, "step": 139665 }, { "epoch": 0.582779080538425, "grad_norm": 0.9997760444638463, "learning_rate": 2.6200766747090606e-06, "loss": 0.023, "step": 139670 }, { "epoch": 0.5827999432534152, "grad_norm": 0.6963292218081083, "learning_rate": 2.620029778004533e-06, "loss": 0.0267, "step": 139675 }, { "epoch": 0.5828208059684055, "grad_norm": 2.0997783513804156, "learning_rate": 2.619982883818125e-06, "loss": 0.0235, "step": 139680 }, { "epoch": 0.5828416686833958, "grad_norm": 0.38375151938924085, "learning_rate": 2.6199359921496117e-06, "loss": 0.0259, "step": 139685 }, { "epoch": 0.5828625313983861, "grad_norm": 2.002681147117528, "learning_rate": 2.6198891029987672e-06, "loss": 0.0294, "step": 139690 }, { "epoch": 0.5828833941133763, "grad_norm": 0.5776160849996886, "learning_rate": 2.6198422163653657e-06, "loss": 0.029, "step": 139695 }, { "epoch": 0.5829042568283667, "grad_norm": 0.8123113986715245, "learning_rate": 2.619795332249184e-06, "loss": 0.049, "step": 139700 }, { "epoch": 0.5829251195433569, "grad_norm": 0.584554335702472, "learning_rate": 2.6197484506499943e-06, "loss": 0.023, "step": 139705 }, { "epoch": 0.5829459822583472, "grad_norm": 1.2491311029549808, "learning_rate": 2.6197015715675726e-06, "loss": 0.028, "step": 139710 }, { "epoch": 0.5829668449733375, "grad_norm": 0.8095184785598579, "learning_rate": 2.6196546950016942e-06, "loss": 0.0203, "step": 139715 }, { "epoch": 0.5829877076883277, "grad_norm": 0.585545041899193, "learning_rate": 2.6196078209521335e-06, "loss": 0.0246, "step": 139720 }, { "epoch": 0.583008570403318, "grad_norm": 0.448868092583276, "learning_rate": 2.619560949418665e-06, "loss": 0.0228, "step": 139725 }, { "epoch": 0.5830294331183082, "grad_norm": 0.8116343161871755, "learning_rate": 2.6195140804010645e-06, "loss": 0.0236, "step": 139730 }, { "epoch": 0.5830502958332986, "grad_norm": 0.7260144230671455, "learning_rate": 2.619467213899106e-06, "loss": 0.0229, "step": 139735 }, { "epoch": 0.5830711585482888, "grad_norm": 0.5599185018329916, "learning_rate": 2.6194203499125652e-06, "loss": 0.0185, "step": 139740 }, { "epoch": 0.5830920212632791, "grad_norm": 0.6618592282566466, "learning_rate": 2.6193734884412163e-06, "loss": 0.0242, "step": 139745 }, { "epoch": 0.5831128839782694, "grad_norm": 0.7737420887488361, "learning_rate": 2.6193266294848353e-06, "loss": 0.021, "step": 139750 }, { "epoch": 0.5831337466932597, "grad_norm": 0.7278747282834223, "learning_rate": 2.6192797730431964e-06, "loss": 0.0307, "step": 139755 }, { "epoch": 0.5831546094082499, "grad_norm": 0.687509274843473, "learning_rate": 2.619232919116075e-06, "loss": 0.0241, "step": 139760 }, { "epoch": 0.5831754721232403, "grad_norm": 0.7285646781568783, "learning_rate": 2.6191860677032464e-06, "loss": 0.019, "step": 139765 }, { "epoch": 0.5831963348382305, "grad_norm": 0.5435643361427152, "learning_rate": 2.6191392188044854e-06, "loss": 0.0237, "step": 139770 }, { "epoch": 0.5832171975532208, "grad_norm": 0.7398331763579145, "learning_rate": 2.6190923724195675e-06, "loss": 0.0333, "step": 139775 }, { "epoch": 0.583238060268211, "grad_norm": 1.0882683790679586, "learning_rate": 2.619045528548268e-06, "loss": 0.0247, "step": 139780 }, { "epoch": 0.5832589229832014, "grad_norm": 0.9358283700625735, "learning_rate": 2.6189986871903616e-06, "loss": 0.0194, "step": 139785 }, { "epoch": 0.5832797856981916, "grad_norm": 0.6420884615440949, "learning_rate": 2.6189518483456234e-06, "loss": 0.0214, "step": 139790 }, { "epoch": 0.5833006484131819, "grad_norm": 0.49347441126593267, "learning_rate": 2.61890501201383e-06, "loss": 0.0191, "step": 139795 }, { "epoch": 0.5833215111281722, "grad_norm": 0.725938233688462, "learning_rate": 2.6188581781947546e-06, "loss": 0.0241, "step": 139800 }, { "epoch": 0.5833423738431625, "grad_norm": 1.0145221687599217, "learning_rate": 2.6188113468881747e-06, "loss": 0.0198, "step": 139805 }, { "epoch": 0.5833632365581527, "grad_norm": 0.6948981876300219, "learning_rate": 2.618764518093864e-06, "loss": 0.0246, "step": 139810 }, { "epoch": 0.583384099273143, "grad_norm": 0.5772849269267174, "learning_rate": 2.6187176918115994e-06, "loss": 0.0164, "step": 139815 }, { "epoch": 0.5834049619881333, "grad_norm": 0.3059760799842687, "learning_rate": 2.618670868041155e-06, "loss": 0.0215, "step": 139820 }, { "epoch": 0.5834258247031235, "grad_norm": 0.5514582128714511, "learning_rate": 2.6186240467823065e-06, "loss": 0.0219, "step": 139825 }, { "epoch": 0.5834466874181139, "grad_norm": 0.5229619890467664, "learning_rate": 2.6185772280348298e-06, "loss": 0.0166, "step": 139830 }, { "epoch": 0.5834675501331041, "grad_norm": 0.5526227520200382, "learning_rate": 2.6185304117985004e-06, "loss": 0.0214, "step": 139835 }, { "epoch": 0.5834884128480944, "grad_norm": 0.8691050590505842, "learning_rate": 2.618483598073094e-06, "loss": 0.0273, "step": 139840 }, { "epoch": 0.5835092755630846, "grad_norm": 0.6499877504430568, "learning_rate": 2.618436786858385e-06, "loss": 0.0217, "step": 139845 }, { "epoch": 0.583530138278075, "grad_norm": 0.7159294327513104, "learning_rate": 2.61838997815415e-06, "loss": 0.0181, "step": 139850 }, { "epoch": 0.5835510009930652, "grad_norm": 1.2953080025403236, "learning_rate": 2.6183431719601644e-06, "loss": 0.0428, "step": 139855 }, { "epoch": 0.5835718637080555, "grad_norm": 0.8975307800375507, "learning_rate": 2.6182963682762034e-06, "loss": 0.0211, "step": 139860 }, { "epoch": 0.5835927264230458, "grad_norm": 0.9054460109945365, "learning_rate": 2.6182495671020443e-06, "loss": 0.0265, "step": 139865 }, { "epoch": 0.5836135891380361, "grad_norm": 0.7192343850386124, "learning_rate": 2.6182027684374604e-06, "loss": 0.0239, "step": 139870 }, { "epoch": 0.5836344518530263, "grad_norm": 0.42018909661573284, "learning_rate": 2.61815597228223e-06, "loss": 0.0275, "step": 139875 }, { "epoch": 0.5836553145680167, "grad_norm": 0.7672616447632652, "learning_rate": 2.6181091786361264e-06, "loss": 0.0223, "step": 139880 }, { "epoch": 0.5836761772830069, "grad_norm": 0.9455336090957999, "learning_rate": 2.6180623874989266e-06, "loss": 0.023, "step": 139885 }, { "epoch": 0.5836970399979972, "grad_norm": 0.7518459189364493, "learning_rate": 2.6180155988704066e-06, "loss": 0.0293, "step": 139890 }, { "epoch": 0.5837179027129874, "grad_norm": 0.31336437674271606, "learning_rate": 2.6179688127503417e-06, "loss": 0.0141, "step": 139895 }, { "epoch": 0.5837387654279778, "grad_norm": 0.7236929388793514, "learning_rate": 2.6179220291385082e-06, "loss": 0.0246, "step": 139900 }, { "epoch": 0.583759628142968, "grad_norm": 0.5089714204957853, "learning_rate": 2.6178752480346813e-06, "loss": 0.0199, "step": 139905 }, { "epoch": 0.5837804908579582, "grad_norm": 0.6874789533504087, "learning_rate": 2.617828469438638e-06, "loss": 0.0192, "step": 139910 }, { "epoch": 0.5838013535729486, "grad_norm": 0.533313794664506, "learning_rate": 2.617781693350154e-06, "loss": 0.0212, "step": 139915 }, { "epoch": 0.5838222162879388, "grad_norm": 0.9055466612361994, "learning_rate": 2.617734919769004e-06, "loss": 0.0275, "step": 139920 }, { "epoch": 0.5838430790029291, "grad_norm": 2.0416633673163416, "learning_rate": 2.6176881486949653e-06, "loss": 0.023, "step": 139925 }, { "epoch": 0.5838639417179194, "grad_norm": 0.7013338657307437, "learning_rate": 2.6176413801278134e-06, "loss": 0.0204, "step": 139930 }, { "epoch": 0.5838848044329097, "grad_norm": 0.8711463914662486, "learning_rate": 2.6175946140673246e-06, "loss": 0.0253, "step": 139935 }, { "epoch": 0.5839056671478999, "grad_norm": 0.8388137282859885, "learning_rate": 2.6175478505132758e-06, "loss": 0.0285, "step": 139940 }, { "epoch": 0.5839265298628903, "grad_norm": 0.7839552344180643, "learning_rate": 2.617501089465441e-06, "loss": 0.0279, "step": 139945 }, { "epoch": 0.5839473925778805, "grad_norm": 0.598531805035783, "learning_rate": 2.6174543309235984e-06, "loss": 0.0237, "step": 139950 }, { "epoch": 0.5839682552928708, "grad_norm": 1.051536594357511, "learning_rate": 2.6174075748875227e-06, "loss": 0.0259, "step": 139955 }, { "epoch": 0.583989118007861, "grad_norm": 0.4069162525909137, "learning_rate": 2.617360821356991e-06, "loss": 0.0205, "step": 139960 }, { "epoch": 0.5840099807228514, "grad_norm": 0.5987786714598425, "learning_rate": 2.61731407033178e-06, "loss": 0.0248, "step": 139965 }, { "epoch": 0.5840308434378416, "grad_norm": 0.6613622534553651, "learning_rate": 2.617267321811665e-06, "loss": 0.0201, "step": 139970 }, { "epoch": 0.5840517061528319, "grad_norm": 0.7185186904721037, "learning_rate": 2.6172205757964218e-06, "loss": 0.0222, "step": 139975 }, { "epoch": 0.5840725688678222, "grad_norm": 0.6904017626552081, "learning_rate": 2.617173832285828e-06, "loss": 0.0255, "step": 139980 }, { "epoch": 0.5840934315828125, "grad_norm": 0.45747421439770214, "learning_rate": 2.6171270912796602e-06, "loss": 0.0302, "step": 139985 }, { "epoch": 0.5841142942978027, "grad_norm": 0.7689388409881426, "learning_rate": 2.6170803527776933e-06, "loss": 0.0182, "step": 139990 }, { "epoch": 0.5841351570127931, "grad_norm": 0.6045363075886094, "learning_rate": 2.6170336167797045e-06, "loss": 0.0164, "step": 139995 }, { "epoch": 0.5841560197277833, "grad_norm": 0.6721702518503831, "learning_rate": 2.6169868832854696e-06, "loss": 0.0319, "step": 140000 }, { "epoch": 0.5841768824427735, "grad_norm": 0.9234652182905039, "learning_rate": 2.6169401522947656e-06, "loss": 0.0312, "step": 140005 }, { "epoch": 0.5841977451577639, "grad_norm": 0.7901852269228524, "learning_rate": 2.6168934238073696e-06, "loss": 0.0215, "step": 140010 }, { "epoch": 0.5842186078727541, "grad_norm": 0.5306478108592461, "learning_rate": 2.6168466978230572e-06, "loss": 0.0264, "step": 140015 }, { "epoch": 0.5842394705877444, "grad_norm": 0.8146645876650224, "learning_rate": 2.616799974341605e-06, "loss": 0.0284, "step": 140020 }, { "epoch": 0.5842603333027346, "grad_norm": 1.0284498307835015, "learning_rate": 2.61675325336279e-06, "loss": 0.0306, "step": 140025 }, { "epoch": 0.584281196017725, "grad_norm": 0.39230502846209003, "learning_rate": 2.6167065348863883e-06, "loss": 0.0276, "step": 140030 }, { "epoch": 0.5843020587327152, "grad_norm": 0.5587164867492606, "learning_rate": 2.6166598189121774e-06, "loss": 0.0177, "step": 140035 }, { "epoch": 0.5843229214477055, "grad_norm": 0.8203992703991344, "learning_rate": 2.616613105439933e-06, "loss": 0.0201, "step": 140040 }, { "epoch": 0.5843437841626958, "grad_norm": 0.6896223208967786, "learning_rate": 2.616566394469432e-06, "loss": 0.0271, "step": 140045 }, { "epoch": 0.5843646468776861, "grad_norm": 0.7161507536316015, "learning_rate": 2.616519686000451e-06, "loss": 0.0246, "step": 140050 }, { "epoch": 0.5843855095926763, "grad_norm": 0.8803883063875398, "learning_rate": 2.6164729800327676e-06, "loss": 0.0231, "step": 140055 }, { "epoch": 0.5844063723076667, "grad_norm": 0.896392295145756, "learning_rate": 2.6164262765661573e-06, "loss": 0.02, "step": 140060 }, { "epoch": 0.5844272350226569, "grad_norm": 0.5700920462239065, "learning_rate": 2.616379575600398e-06, "loss": 0.0156, "step": 140065 }, { "epoch": 0.5844480977376472, "grad_norm": 0.8594420654182767, "learning_rate": 2.6163328771352656e-06, "loss": 0.02, "step": 140070 }, { "epoch": 0.5844689604526374, "grad_norm": 0.4508286018285944, "learning_rate": 2.6162861811705374e-06, "loss": 0.0198, "step": 140075 }, { "epoch": 0.5844898231676278, "grad_norm": 0.9639832965239196, "learning_rate": 2.616239487705991e-06, "loss": 0.0257, "step": 140080 }, { "epoch": 0.584510685882618, "grad_norm": 0.5309647774613432, "learning_rate": 2.6161927967414013e-06, "loss": 0.0224, "step": 140085 }, { "epoch": 0.5845315485976083, "grad_norm": 0.49788050500523795, "learning_rate": 2.6161461082765476e-06, "loss": 0.0367, "step": 140090 }, { "epoch": 0.5845524113125986, "grad_norm": 0.3348734448266753, "learning_rate": 2.616099422311205e-06, "loss": 0.0159, "step": 140095 }, { "epoch": 0.5845732740275889, "grad_norm": 0.5314117470176613, "learning_rate": 2.616052738845152e-06, "loss": 0.0183, "step": 140100 }, { "epoch": 0.5845941367425791, "grad_norm": 0.9486395410660352, "learning_rate": 2.616006057878164e-06, "loss": 0.0252, "step": 140105 }, { "epoch": 0.5846149994575695, "grad_norm": 0.9867725546187205, "learning_rate": 2.615959379410019e-06, "loss": 0.0228, "step": 140110 }, { "epoch": 0.5846358621725597, "grad_norm": 0.45489526549650505, "learning_rate": 2.6159127034404942e-06, "loss": 0.0226, "step": 140115 }, { "epoch": 0.5846567248875499, "grad_norm": 0.47784412107505836, "learning_rate": 2.6158660299693667e-06, "loss": 0.0223, "step": 140120 }, { "epoch": 0.5846775876025403, "grad_norm": 0.5727517216679289, "learning_rate": 2.6158193589964124e-06, "loss": 0.0244, "step": 140125 }, { "epoch": 0.5846984503175305, "grad_norm": 0.6522706125642665, "learning_rate": 2.6157726905214104e-06, "loss": 0.0231, "step": 140130 }, { "epoch": 0.5847193130325208, "grad_norm": 0.7653772922066298, "learning_rate": 2.6157260245441363e-06, "loss": 0.0195, "step": 140135 }, { "epoch": 0.584740175747511, "grad_norm": 0.6674292002510701, "learning_rate": 2.6156793610643684e-06, "loss": 0.0265, "step": 140140 }, { "epoch": 0.5847610384625014, "grad_norm": 0.9495358747922911, "learning_rate": 2.615632700081883e-06, "loss": 0.0259, "step": 140145 }, { "epoch": 0.5847819011774916, "grad_norm": 0.6120945045451478, "learning_rate": 2.615586041596458e-06, "loss": 0.0192, "step": 140150 }, { "epoch": 0.5848027638924819, "grad_norm": 0.5674005716234458, "learning_rate": 2.615539385607871e-06, "loss": 0.0261, "step": 140155 }, { "epoch": 0.5848236266074722, "grad_norm": 0.9240861012245627, "learning_rate": 2.615492732115898e-06, "loss": 0.0273, "step": 140160 }, { "epoch": 0.5848444893224625, "grad_norm": 0.49885971534734036, "learning_rate": 2.615446081120317e-06, "loss": 0.0238, "step": 140165 }, { "epoch": 0.5848653520374527, "grad_norm": 0.49473927727479494, "learning_rate": 2.6153994326209065e-06, "loss": 0.018, "step": 140170 }, { "epoch": 0.5848862147524431, "grad_norm": 0.42944023347563637, "learning_rate": 2.615352786617442e-06, "loss": 0.0252, "step": 140175 }, { "epoch": 0.5849070774674333, "grad_norm": 0.6980792294891071, "learning_rate": 2.6153061431097028e-06, "loss": 0.0199, "step": 140180 }, { "epoch": 0.5849279401824236, "grad_norm": 0.5279602504679863, "learning_rate": 2.6152595020974647e-06, "loss": 0.0179, "step": 140185 }, { "epoch": 0.5849488028974139, "grad_norm": 0.7209325258600104, "learning_rate": 2.6152128635805057e-06, "loss": 0.0237, "step": 140190 }, { "epoch": 0.5849696656124042, "grad_norm": 0.8233155609501444, "learning_rate": 2.615166227558604e-06, "loss": 0.031, "step": 140195 }, { "epoch": 0.5849905283273944, "grad_norm": 0.4848153293972507, "learning_rate": 2.6151195940315366e-06, "loss": 0.018, "step": 140200 }, { "epoch": 0.5850113910423846, "grad_norm": 1.1839979747449603, "learning_rate": 2.615072962999081e-06, "loss": 0.0201, "step": 140205 }, { "epoch": 0.585032253757375, "grad_norm": 0.6396332998208016, "learning_rate": 2.6150263344610154e-06, "loss": 0.0231, "step": 140210 }, { "epoch": 0.5850531164723652, "grad_norm": 0.6663412753897408, "learning_rate": 2.6149797084171163e-06, "loss": 0.0394, "step": 140215 }, { "epoch": 0.5850739791873555, "grad_norm": 0.6669703633748082, "learning_rate": 2.614933084867162e-06, "loss": 0.0169, "step": 140220 }, { "epoch": 0.5850948419023458, "grad_norm": 0.5128853065485508, "learning_rate": 2.6148864638109306e-06, "loss": 0.0174, "step": 140225 }, { "epoch": 0.5851157046173361, "grad_norm": 0.8181540843710554, "learning_rate": 2.6148398452481992e-06, "loss": 0.0367, "step": 140230 }, { "epoch": 0.5851365673323263, "grad_norm": 0.8890333734960194, "learning_rate": 2.6147932291787467e-06, "loss": 0.0344, "step": 140235 }, { "epoch": 0.5851574300473167, "grad_norm": 0.7702577417316135, "learning_rate": 2.6147466156023488e-06, "loss": 0.0211, "step": 140240 }, { "epoch": 0.5851782927623069, "grad_norm": 0.6577426757918634, "learning_rate": 2.614700004518784e-06, "loss": 0.0282, "step": 140245 }, { "epoch": 0.5851991554772972, "grad_norm": 0.6505154908706353, "learning_rate": 2.6146533959278316e-06, "loss": 0.0222, "step": 140250 }, { "epoch": 0.5852200181922874, "grad_norm": 0.43841261712947266, "learning_rate": 2.6146067898292676e-06, "loss": 0.0215, "step": 140255 }, { "epoch": 0.5852408809072778, "grad_norm": 0.7197365755086064, "learning_rate": 2.614560186222871e-06, "loss": 0.0216, "step": 140260 }, { "epoch": 0.585261743622268, "grad_norm": 1.2028996277214636, "learning_rate": 2.614513585108419e-06, "loss": 0.027, "step": 140265 }, { "epoch": 0.5852826063372583, "grad_norm": 0.37667845700411323, "learning_rate": 2.6144669864856904e-06, "loss": 0.0226, "step": 140270 }, { "epoch": 0.5853034690522486, "grad_norm": 0.3832993615376894, "learning_rate": 2.6144203903544617e-06, "loss": 0.0184, "step": 140275 }, { "epoch": 0.5853243317672389, "grad_norm": 0.732516806537816, "learning_rate": 2.6143737967145127e-06, "loss": 0.0211, "step": 140280 }, { "epoch": 0.5853451944822291, "grad_norm": 0.8311861705135427, "learning_rate": 2.6143272055656207e-06, "loss": 0.0187, "step": 140285 }, { "epoch": 0.5853660571972195, "grad_norm": 0.6226655874939921, "learning_rate": 2.6142806169075624e-06, "loss": 0.0235, "step": 140290 }, { "epoch": 0.5853869199122097, "grad_norm": 1.883900587660665, "learning_rate": 2.6142340307401178e-06, "loss": 0.0367, "step": 140295 }, { "epoch": 0.5854077826272, "grad_norm": 1.095540638355177, "learning_rate": 2.614187447063064e-06, "loss": 0.0227, "step": 140300 }, { "epoch": 0.5854286453421903, "grad_norm": 1.4528968189469254, "learning_rate": 2.614140865876179e-06, "loss": 0.0344, "step": 140305 }, { "epoch": 0.5854495080571805, "grad_norm": 0.8774709919677856, "learning_rate": 2.6140942871792417e-06, "loss": 0.0227, "step": 140310 }, { "epoch": 0.5854703707721708, "grad_norm": 0.7419125630443109, "learning_rate": 2.6140477109720304e-06, "loss": 0.0268, "step": 140315 }, { "epoch": 0.585491233487161, "grad_norm": 0.6980234024047125, "learning_rate": 2.6140011372543217e-06, "loss": 0.0232, "step": 140320 }, { "epoch": 0.5855120962021514, "grad_norm": 0.6407231231233453, "learning_rate": 2.613954566025896e-06, "loss": 0.0223, "step": 140325 }, { "epoch": 0.5855329589171416, "grad_norm": 0.9480080192776893, "learning_rate": 2.6139079972865294e-06, "loss": 0.0304, "step": 140330 }, { "epoch": 0.5855538216321319, "grad_norm": 1.190975561848195, "learning_rate": 2.6138614310360023e-06, "loss": 0.0211, "step": 140335 }, { "epoch": 0.5855746843471222, "grad_norm": 0.4156234061956637, "learning_rate": 2.613814867274091e-06, "loss": 0.0204, "step": 140340 }, { "epoch": 0.5855955470621125, "grad_norm": 0.9475232422375515, "learning_rate": 2.6137683060005754e-06, "loss": 0.0233, "step": 140345 }, { "epoch": 0.5856164097771027, "grad_norm": 0.7955404689842948, "learning_rate": 2.6137217472152326e-06, "loss": 0.0248, "step": 140350 }, { "epoch": 0.5856372724920931, "grad_norm": 0.6300812537221141, "learning_rate": 2.6136751909178424e-06, "loss": 0.0237, "step": 140355 }, { "epoch": 0.5856581352070833, "grad_norm": 0.6120287630782838, "learning_rate": 2.6136286371081825e-06, "loss": 0.0268, "step": 140360 }, { "epoch": 0.5856789979220736, "grad_norm": 0.4087337610780584, "learning_rate": 2.6135820857860313e-06, "loss": 0.023, "step": 140365 }, { "epoch": 0.5856998606370639, "grad_norm": 1.0594267185142712, "learning_rate": 2.6135355369511674e-06, "loss": 0.0283, "step": 140370 }, { "epoch": 0.5857207233520542, "grad_norm": 0.5957797337260664, "learning_rate": 2.613488990603369e-06, "loss": 0.0253, "step": 140375 }, { "epoch": 0.5857415860670444, "grad_norm": 0.2733848244014938, "learning_rate": 2.6134424467424153e-06, "loss": 0.0197, "step": 140380 }, { "epoch": 0.5857624487820347, "grad_norm": 0.9182561049610098, "learning_rate": 2.6133959053680842e-06, "loss": 0.0222, "step": 140385 }, { "epoch": 0.585783311497025, "grad_norm": 0.4610451901172897, "learning_rate": 2.613349366480154e-06, "loss": 0.0236, "step": 140390 }, { "epoch": 0.5858041742120152, "grad_norm": 1.0518076671560346, "learning_rate": 2.6133028300784047e-06, "loss": 0.0185, "step": 140395 }, { "epoch": 0.5858250369270055, "grad_norm": 0.6322993719604879, "learning_rate": 2.613256296162614e-06, "loss": 0.0188, "step": 140400 }, { "epoch": 0.5858458996419958, "grad_norm": 0.6154836777597625, "learning_rate": 2.6132097647325603e-06, "loss": 0.0169, "step": 140405 }, { "epoch": 0.5858667623569861, "grad_norm": 0.6721253485386284, "learning_rate": 2.613163235788024e-06, "loss": 0.0166, "step": 140410 }, { "epoch": 0.5858876250719763, "grad_norm": 0.5916218133837172, "learning_rate": 2.6131167093287813e-06, "loss": 0.0243, "step": 140415 }, { "epoch": 0.5859084877869667, "grad_norm": 0.6369237728667937, "learning_rate": 2.6130701853546126e-06, "loss": 0.0186, "step": 140420 }, { "epoch": 0.5859293505019569, "grad_norm": 0.7122296704310209, "learning_rate": 2.613023663865296e-06, "loss": 0.0249, "step": 140425 }, { "epoch": 0.5859502132169472, "grad_norm": 0.7604876747102968, "learning_rate": 2.6129771448606108e-06, "loss": 0.0197, "step": 140430 }, { "epoch": 0.5859710759319374, "grad_norm": 0.7056359415413541, "learning_rate": 2.6129306283403354e-06, "loss": 0.0284, "step": 140435 }, { "epoch": 0.5859919386469278, "grad_norm": 0.7719716827786189, "learning_rate": 2.6128841143042493e-06, "loss": 0.0205, "step": 140440 }, { "epoch": 0.586012801361918, "grad_norm": 1.0011978570957134, "learning_rate": 2.6128376027521306e-06, "loss": 0.0294, "step": 140445 }, { "epoch": 0.5860336640769083, "grad_norm": 0.9267469296438156, "learning_rate": 2.6127910936837593e-06, "loss": 0.0374, "step": 140450 }, { "epoch": 0.5860545267918986, "grad_norm": 1.0019496037578446, "learning_rate": 2.612744587098913e-06, "loss": 0.0197, "step": 140455 }, { "epoch": 0.5860753895068889, "grad_norm": 0.9927528817932741, "learning_rate": 2.612698082997372e-06, "loss": 0.0226, "step": 140460 }, { "epoch": 0.5860962522218791, "grad_norm": 1.048753056800483, "learning_rate": 2.6126515813789143e-06, "loss": 0.0292, "step": 140465 }, { "epoch": 0.5861171149368695, "grad_norm": 0.49747732602517936, "learning_rate": 2.6126050822433197e-06, "loss": 0.0232, "step": 140470 }, { "epoch": 0.5861379776518597, "grad_norm": 0.37270423041611406, "learning_rate": 2.612558585590366e-06, "loss": 0.0295, "step": 140475 }, { "epoch": 0.58615884036685, "grad_norm": 0.6627568540777684, "learning_rate": 2.612512091419834e-06, "loss": 0.0263, "step": 140480 }, { "epoch": 0.5861797030818403, "grad_norm": 0.874150674076138, "learning_rate": 2.6124655997315017e-06, "loss": 0.036, "step": 140485 }, { "epoch": 0.5862005657968306, "grad_norm": 0.7609194280620425, "learning_rate": 2.6124191105251484e-06, "loss": 0.0245, "step": 140490 }, { "epoch": 0.5862214285118208, "grad_norm": 0.8406290130863371, "learning_rate": 2.6123726238005538e-06, "loss": 0.0198, "step": 140495 }, { "epoch": 0.586242291226811, "grad_norm": 0.46299192350473534, "learning_rate": 2.6123261395574964e-06, "loss": 0.0213, "step": 140500 }, { "epoch": 0.5862631539418014, "grad_norm": 0.7392799455590238, "learning_rate": 2.6122796577957554e-06, "loss": 0.0243, "step": 140505 }, { "epoch": 0.5862840166567916, "grad_norm": 0.45868232379495416, "learning_rate": 2.6122331785151104e-06, "loss": 0.0268, "step": 140510 }, { "epoch": 0.5863048793717819, "grad_norm": 0.46150529040228655, "learning_rate": 2.6121867017153414e-06, "loss": 0.0189, "step": 140515 }, { "epoch": 0.5863257420867722, "grad_norm": 0.9244087499619762, "learning_rate": 2.6121402273962266e-06, "loss": 0.0242, "step": 140520 }, { "epoch": 0.5863466048017625, "grad_norm": 0.5515269211886437, "learning_rate": 2.6120937555575453e-06, "loss": 0.0213, "step": 140525 }, { "epoch": 0.5863674675167527, "grad_norm": 0.7421143603633727, "learning_rate": 2.6120472861990777e-06, "loss": 0.0189, "step": 140530 }, { "epoch": 0.5863883302317431, "grad_norm": 0.7465535567650444, "learning_rate": 2.6120008193206027e-06, "loss": 0.0177, "step": 140535 }, { "epoch": 0.5864091929467333, "grad_norm": 0.7160251663324689, "learning_rate": 2.6119543549219e-06, "loss": 0.0179, "step": 140540 }, { "epoch": 0.5864300556617236, "grad_norm": 0.41439799632037694, "learning_rate": 2.611907893002748e-06, "loss": 0.0201, "step": 140545 }, { "epoch": 0.5864509183767139, "grad_norm": 0.5567271245629115, "learning_rate": 2.6118614335629276e-06, "loss": 0.028, "step": 140550 }, { "epoch": 0.5864717810917042, "grad_norm": 0.5915456158820485, "learning_rate": 2.611814976602218e-06, "loss": 0.022, "step": 140555 }, { "epoch": 0.5864926438066944, "grad_norm": 0.24121364051606412, "learning_rate": 2.611768522120398e-06, "loss": 0.0164, "step": 140560 }, { "epoch": 0.5865135065216847, "grad_norm": 0.8556463875965102, "learning_rate": 2.6117220701172473e-06, "loss": 0.0227, "step": 140565 }, { "epoch": 0.586534369236675, "grad_norm": 1.487457706242542, "learning_rate": 2.6116756205925465e-06, "loss": 0.0377, "step": 140570 }, { "epoch": 0.5865552319516653, "grad_norm": 0.6274814116150534, "learning_rate": 2.611629173546074e-06, "loss": 0.0253, "step": 140575 }, { "epoch": 0.5865760946666555, "grad_norm": 0.7174237804839112, "learning_rate": 2.6115827289776104e-06, "loss": 0.0204, "step": 140580 }, { "epoch": 0.5865969573816459, "grad_norm": 0.4233631037911645, "learning_rate": 2.611536286886934e-06, "loss": 0.021, "step": 140585 }, { "epoch": 0.5866178200966361, "grad_norm": 0.5721116611135675, "learning_rate": 2.6114898472738263e-06, "loss": 0.0201, "step": 140590 }, { "epoch": 0.5866386828116263, "grad_norm": 0.7903638068872829, "learning_rate": 2.6114434101380658e-06, "loss": 0.0226, "step": 140595 }, { "epoch": 0.5866595455266167, "grad_norm": 2.4339157160059, "learning_rate": 2.6113969754794328e-06, "loss": 0.0149, "step": 140600 }, { "epoch": 0.5866804082416069, "grad_norm": 0.8072353764907791, "learning_rate": 2.6113505432977058e-06, "loss": 0.0225, "step": 140605 }, { "epoch": 0.5867012709565972, "grad_norm": 0.7185773890177647, "learning_rate": 2.6113041135926663e-06, "loss": 0.0196, "step": 140610 }, { "epoch": 0.5867221336715874, "grad_norm": 1.2782431184642373, "learning_rate": 2.6112576863640936e-06, "loss": 0.019, "step": 140615 }, { "epoch": 0.5867429963865778, "grad_norm": 0.7102602149161121, "learning_rate": 2.6112112616117676e-06, "loss": 0.022, "step": 140620 }, { "epoch": 0.586763859101568, "grad_norm": 0.7020858283100002, "learning_rate": 2.6111648393354675e-06, "loss": 0.0236, "step": 140625 }, { "epoch": 0.5867847218165583, "grad_norm": 0.8579989978944557, "learning_rate": 2.6111184195349744e-06, "loss": 0.0285, "step": 140630 }, { "epoch": 0.5868055845315486, "grad_norm": 0.7374510797613975, "learning_rate": 2.6110720022100673e-06, "loss": 0.0267, "step": 140635 }, { "epoch": 0.5868264472465389, "grad_norm": 0.69176244668025, "learning_rate": 2.6110255873605254e-06, "loss": 0.0244, "step": 140640 }, { "epoch": 0.5868473099615291, "grad_norm": 0.4407186071732726, "learning_rate": 2.610979174986131e-06, "loss": 0.018, "step": 140645 }, { "epoch": 0.5868681726765195, "grad_norm": 1.7782004161308325, "learning_rate": 2.610932765086663e-06, "loss": 0.0251, "step": 140650 }, { "epoch": 0.5868890353915097, "grad_norm": 0.99843915499307, "learning_rate": 2.6108863576619e-06, "loss": 0.0185, "step": 140655 }, { "epoch": 0.5869098981065, "grad_norm": 0.9153938102597271, "learning_rate": 2.6108399527116247e-06, "loss": 0.0259, "step": 140660 }, { "epoch": 0.5869307608214903, "grad_norm": 0.5635190088241603, "learning_rate": 2.610793550235615e-06, "loss": 0.0234, "step": 140665 }, { "epoch": 0.5869516235364806, "grad_norm": 0.7068730531773462, "learning_rate": 2.6107471502336523e-06, "loss": 0.0243, "step": 140670 }, { "epoch": 0.5869724862514708, "grad_norm": 0.6141519326413115, "learning_rate": 2.6107007527055162e-06, "loss": 0.0222, "step": 140675 }, { "epoch": 0.586993348966461, "grad_norm": 0.5765095763934212, "learning_rate": 2.610654357650987e-06, "loss": 0.0222, "step": 140680 }, { "epoch": 0.5870142116814514, "grad_norm": 0.7088173498888628, "learning_rate": 2.610607965069845e-06, "loss": 0.0257, "step": 140685 }, { "epoch": 0.5870350743964416, "grad_norm": 0.511437161913276, "learning_rate": 2.6105615749618705e-06, "loss": 0.0284, "step": 140690 }, { "epoch": 0.5870559371114319, "grad_norm": 0.618612369325149, "learning_rate": 2.6105151873268437e-06, "loss": 0.0158, "step": 140695 }, { "epoch": 0.5870767998264222, "grad_norm": 0.7196005968652452, "learning_rate": 2.6104688021645446e-06, "loss": 0.0157, "step": 140700 }, { "epoch": 0.5870976625414125, "grad_norm": 3.333641217731305, "learning_rate": 2.6104224194747545e-06, "loss": 0.0207, "step": 140705 }, { "epoch": 0.5871185252564027, "grad_norm": 0.9066996683269717, "learning_rate": 2.6103760392572524e-06, "loss": 0.0458, "step": 140710 }, { "epoch": 0.5871393879713931, "grad_norm": 1.1091050536469882, "learning_rate": 2.6103296615118194e-06, "loss": 0.0228, "step": 140715 }, { "epoch": 0.5871602506863833, "grad_norm": 0.8097301686512361, "learning_rate": 2.610283286238236e-06, "loss": 0.0211, "step": 140720 }, { "epoch": 0.5871811134013736, "grad_norm": 0.5782562782336012, "learning_rate": 2.6102369134362825e-06, "loss": 0.0277, "step": 140725 }, { "epoch": 0.5872019761163639, "grad_norm": 0.9622981181028647, "learning_rate": 2.6101905431057395e-06, "loss": 0.0222, "step": 140730 }, { "epoch": 0.5872228388313542, "grad_norm": 0.5318727601362525, "learning_rate": 2.6101441752463875e-06, "loss": 0.0198, "step": 140735 }, { "epoch": 0.5872437015463444, "grad_norm": 0.6445187597556834, "learning_rate": 2.610097809858006e-06, "loss": 0.0284, "step": 140740 }, { "epoch": 0.5872645642613347, "grad_norm": 0.6243437864487006, "learning_rate": 2.6100514469403765e-06, "loss": 0.0208, "step": 140745 }, { "epoch": 0.587285426976325, "grad_norm": 0.6320667899898861, "learning_rate": 2.61000508649328e-06, "loss": 0.0254, "step": 140750 }, { "epoch": 0.5873062896913153, "grad_norm": 0.6512248583718896, "learning_rate": 2.6099587285164963e-06, "loss": 0.0251, "step": 140755 }, { "epoch": 0.5873271524063055, "grad_norm": 0.4600244436622072, "learning_rate": 2.6099123730098058e-06, "loss": 0.018, "step": 140760 }, { "epoch": 0.5873480151212959, "grad_norm": 0.6709575699808062, "learning_rate": 2.6098660199729904e-06, "loss": 0.0282, "step": 140765 }, { "epoch": 0.5873688778362861, "grad_norm": 1.3402042381300578, "learning_rate": 2.6098196694058297e-06, "loss": 0.0234, "step": 140770 }, { "epoch": 0.5873897405512764, "grad_norm": 0.5540282358163657, "learning_rate": 2.6097733213081045e-06, "loss": 0.0317, "step": 140775 }, { "epoch": 0.5874106032662667, "grad_norm": 0.4509869201741972, "learning_rate": 2.6097269756795956e-06, "loss": 0.0141, "step": 140780 }, { "epoch": 0.587431465981257, "grad_norm": 0.6856462795990458, "learning_rate": 2.609680632520084e-06, "loss": 0.0272, "step": 140785 }, { "epoch": 0.5874523286962472, "grad_norm": 0.7927396711680942, "learning_rate": 2.6096342918293507e-06, "loss": 0.0202, "step": 140790 }, { "epoch": 0.5874731914112374, "grad_norm": 0.8076721404192219, "learning_rate": 2.6095879536071763e-06, "loss": 0.0178, "step": 140795 }, { "epoch": 0.5874940541262278, "grad_norm": 0.9367667636621438, "learning_rate": 2.609541617853341e-06, "loss": 0.022, "step": 140800 }, { "epoch": 0.587514916841218, "grad_norm": 0.7244686413485178, "learning_rate": 2.6094952845676264e-06, "loss": 0.0273, "step": 140805 }, { "epoch": 0.5875357795562083, "grad_norm": 0.4572413331491026, "learning_rate": 2.6094489537498134e-06, "loss": 0.0284, "step": 140810 }, { "epoch": 0.5875566422711986, "grad_norm": 0.6235934019899702, "learning_rate": 2.6094026253996823e-06, "loss": 0.0264, "step": 140815 }, { "epoch": 0.5875775049861889, "grad_norm": 0.3904950193064726, "learning_rate": 2.6093562995170144e-06, "loss": 0.0288, "step": 140820 }, { "epoch": 0.5875983677011791, "grad_norm": 0.49132898057199786, "learning_rate": 2.6093099761015918e-06, "loss": 0.0277, "step": 140825 }, { "epoch": 0.5876192304161695, "grad_norm": 0.61745877899436, "learning_rate": 2.6092636551531937e-06, "loss": 0.0238, "step": 140830 }, { "epoch": 0.5876400931311597, "grad_norm": 0.8685424308851666, "learning_rate": 2.6092173366716016e-06, "loss": 0.0196, "step": 140835 }, { "epoch": 0.58766095584615, "grad_norm": 0.44898450402012946, "learning_rate": 2.6091710206565973e-06, "loss": 0.0306, "step": 140840 }, { "epoch": 0.5876818185611403, "grad_norm": 0.7552007099841785, "learning_rate": 2.6091247071079612e-06, "loss": 0.0233, "step": 140845 }, { "epoch": 0.5877026812761306, "grad_norm": 0.7886913414212413, "learning_rate": 2.6090783960254746e-06, "loss": 0.0353, "step": 140850 }, { "epoch": 0.5877235439911208, "grad_norm": 0.543248845782368, "learning_rate": 2.609032087408919e-06, "loss": 0.0318, "step": 140855 }, { "epoch": 0.5877444067061111, "grad_norm": 0.8816425792639941, "learning_rate": 2.6089857812580748e-06, "loss": 0.0268, "step": 140860 }, { "epoch": 0.5877652694211014, "grad_norm": 0.4439548494431956, "learning_rate": 2.608939477572724e-06, "loss": 0.0289, "step": 140865 }, { "epoch": 0.5877861321360917, "grad_norm": 0.5925754763042816, "learning_rate": 2.6088931763526477e-06, "loss": 0.0193, "step": 140870 }, { "epoch": 0.5878069948510819, "grad_norm": 0.6692974556021869, "learning_rate": 2.6088468775976262e-06, "loss": 0.0223, "step": 140875 }, { "epoch": 0.5878278575660723, "grad_norm": 0.4803630378515735, "learning_rate": 2.608800581307442e-06, "loss": 0.0179, "step": 140880 }, { "epoch": 0.5878487202810625, "grad_norm": 0.5578924328777094, "learning_rate": 2.608754287481876e-06, "loss": 0.0201, "step": 140885 }, { "epoch": 0.5878695829960527, "grad_norm": 1.264373361712563, "learning_rate": 2.608707996120709e-06, "loss": 0.0193, "step": 140890 }, { "epoch": 0.5878904457110431, "grad_norm": 0.6439628330347344, "learning_rate": 2.6086617072237236e-06, "loss": 0.024, "step": 140895 }, { "epoch": 0.5879113084260333, "grad_norm": 0.5394890187492621, "learning_rate": 2.6086154207906993e-06, "loss": 0.0249, "step": 140900 }, { "epoch": 0.5879321711410236, "grad_norm": 1.1425425080317875, "learning_rate": 2.6085691368214193e-06, "loss": 0.0286, "step": 140905 }, { "epoch": 0.5879530338560139, "grad_norm": 0.6315089218029154, "learning_rate": 2.6085228553156643e-06, "loss": 0.0225, "step": 140910 }, { "epoch": 0.5879738965710042, "grad_norm": 0.7930358307955523, "learning_rate": 2.6084765762732162e-06, "loss": 0.027, "step": 140915 }, { "epoch": 0.5879947592859944, "grad_norm": 0.466587562346072, "learning_rate": 2.608430299693855e-06, "loss": 0.0212, "step": 140920 }, { "epoch": 0.5880156220009847, "grad_norm": 0.4574616761424541, "learning_rate": 2.608384025577364e-06, "loss": 0.0178, "step": 140925 }, { "epoch": 0.588036484715975, "grad_norm": 0.6065430406132617, "learning_rate": 2.608337753923524e-06, "loss": 0.0228, "step": 140930 }, { "epoch": 0.5880573474309653, "grad_norm": 1.1040445548290994, "learning_rate": 2.6082914847321165e-06, "loss": 0.0305, "step": 140935 }, { "epoch": 0.5880782101459555, "grad_norm": 0.7504329343035123, "learning_rate": 2.6082452180029233e-06, "loss": 0.0192, "step": 140940 }, { "epoch": 0.5880990728609459, "grad_norm": 0.41209049349923693, "learning_rate": 2.6081989537357262e-06, "loss": 0.0209, "step": 140945 }, { "epoch": 0.5881199355759361, "grad_norm": 1.0446391858594029, "learning_rate": 2.6081526919303063e-06, "loss": 0.0267, "step": 140950 }, { "epoch": 0.5881407982909264, "grad_norm": 0.701143883039776, "learning_rate": 2.608106432586446e-06, "loss": 0.0311, "step": 140955 }, { "epoch": 0.5881616610059167, "grad_norm": 1.4100738247238038, "learning_rate": 2.608060175703926e-06, "loss": 0.0301, "step": 140960 }, { "epoch": 0.588182523720907, "grad_norm": 0.8876859749336861, "learning_rate": 2.608013921282529e-06, "loss": 0.025, "step": 140965 }, { "epoch": 0.5882033864358972, "grad_norm": 0.6397679685238541, "learning_rate": 2.6079676693220366e-06, "loss": 0.0185, "step": 140970 }, { "epoch": 0.5882242491508874, "grad_norm": 0.5106053002172424, "learning_rate": 2.60792141982223e-06, "loss": 0.0219, "step": 140975 }, { "epoch": 0.5882451118658778, "grad_norm": 0.8615694693867375, "learning_rate": 2.607875172782892e-06, "loss": 0.0335, "step": 140980 }, { "epoch": 0.588265974580868, "grad_norm": 0.45005122032755174, "learning_rate": 2.607828928203804e-06, "loss": 0.0247, "step": 140985 }, { "epoch": 0.5882868372958583, "grad_norm": 1.0081052839435813, "learning_rate": 2.6077826860847473e-06, "loss": 0.0193, "step": 140990 }, { "epoch": 0.5883077000108486, "grad_norm": 0.5258287771128469, "learning_rate": 2.607736446425504e-06, "loss": 0.0239, "step": 140995 }, { "epoch": 0.5883285627258389, "grad_norm": 0.6027316847256583, "learning_rate": 2.6076902092258567e-06, "loss": 0.0249, "step": 141000 }, { "epoch": 0.5883494254408291, "grad_norm": 0.8551722262569952, "learning_rate": 2.607643974485587e-06, "loss": 0.0239, "step": 141005 }, { "epoch": 0.5883702881558195, "grad_norm": 0.6904233552901788, "learning_rate": 2.6075977422044766e-06, "loss": 0.0189, "step": 141010 }, { "epoch": 0.5883911508708097, "grad_norm": 0.633924268170848, "learning_rate": 2.607551512382308e-06, "loss": 0.0216, "step": 141015 }, { "epoch": 0.5884120135858, "grad_norm": 0.5957983202750627, "learning_rate": 2.607505285018863e-06, "loss": 0.0278, "step": 141020 }, { "epoch": 0.5884328763007903, "grad_norm": 0.37005620762644686, "learning_rate": 2.607459060113923e-06, "loss": 0.0252, "step": 141025 }, { "epoch": 0.5884537390157806, "grad_norm": 1.05051051974204, "learning_rate": 2.607412837667272e-06, "loss": 0.0253, "step": 141030 }, { "epoch": 0.5884746017307708, "grad_norm": 0.5197958038136657, "learning_rate": 2.60736661767869e-06, "loss": 0.026, "step": 141035 }, { "epoch": 0.5884954644457611, "grad_norm": 0.9628922421820321, "learning_rate": 2.6073204001479596e-06, "loss": 0.0206, "step": 141040 }, { "epoch": 0.5885163271607514, "grad_norm": 0.6892532849867299, "learning_rate": 2.607274185074864e-06, "loss": 0.0215, "step": 141045 }, { "epoch": 0.5885371898757417, "grad_norm": 0.5487471591533457, "learning_rate": 2.6072279724591842e-06, "loss": 0.0236, "step": 141050 }, { "epoch": 0.5885580525907319, "grad_norm": 1.4696065479639928, "learning_rate": 2.6071817623007034e-06, "loss": 0.026, "step": 141055 }, { "epoch": 0.5885789153057223, "grad_norm": 0.3897475690513643, "learning_rate": 2.607135554599204e-06, "loss": 0.0218, "step": 141060 }, { "epoch": 0.5885997780207125, "grad_norm": 0.5634739145627972, "learning_rate": 2.607089349354466e-06, "loss": 0.0205, "step": 141065 }, { "epoch": 0.5886206407357027, "grad_norm": 0.5753988289292447, "learning_rate": 2.6070431465662756e-06, "loss": 0.0212, "step": 141070 }, { "epoch": 0.5886415034506931, "grad_norm": 0.9108456854692569, "learning_rate": 2.606996946234411e-06, "loss": 0.0265, "step": 141075 }, { "epoch": 0.5886623661656833, "grad_norm": 0.40695786007920737, "learning_rate": 2.6069507483586577e-06, "loss": 0.023, "step": 141080 }, { "epoch": 0.5886832288806736, "grad_norm": 0.8651164749283609, "learning_rate": 2.6069045529387967e-06, "loss": 0.0257, "step": 141085 }, { "epoch": 0.588704091595664, "grad_norm": 0.7516908805059984, "learning_rate": 2.6068583599746107e-06, "loss": 0.0239, "step": 141090 }, { "epoch": 0.5887249543106542, "grad_norm": 0.6891943164412532, "learning_rate": 2.6068121694658814e-06, "loss": 0.02, "step": 141095 }, { "epoch": 0.5887458170256444, "grad_norm": 0.2427378285499367, "learning_rate": 2.606765981412393e-06, "loss": 0.0225, "step": 141100 }, { "epoch": 0.5887666797406347, "grad_norm": 0.8569416942115703, "learning_rate": 2.606719795813926e-06, "loss": 0.0192, "step": 141105 }, { "epoch": 0.588787542455625, "grad_norm": 0.7697189375400387, "learning_rate": 2.606673612670264e-06, "loss": 0.0229, "step": 141110 }, { "epoch": 0.5888084051706153, "grad_norm": 0.5986627208283979, "learning_rate": 2.6066274319811897e-06, "loss": 0.023, "step": 141115 }, { "epoch": 0.5888292678856055, "grad_norm": 0.8511259992226673, "learning_rate": 2.6065812537464857e-06, "loss": 0.0285, "step": 141120 }, { "epoch": 0.5888501306005959, "grad_norm": 1.1367877441309924, "learning_rate": 2.606535077965934e-06, "loss": 0.0726, "step": 141125 }, { "epoch": 0.5888709933155861, "grad_norm": 0.5249491651747853, "learning_rate": 2.606488904639317e-06, "loss": 0.0259, "step": 141130 }, { "epoch": 0.5888918560305764, "grad_norm": 0.808051645404236, "learning_rate": 2.6064427337664183e-06, "loss": 0.0207, "step": 141135 }, { "epoch": 0.5889127187455667, "grad_norm": 0.41997869594854853, "learning_rate": 2.6063965653470204e-06, "loss": 0.0291, "step": 141140 }, { "epoch": 0.588933581460557, "grad_norm": 0.7512598725172297, "learning_rate": 2.6063503993809047e-06, "loss": 0.0222, "step": 141145 }, { "epoch": 0.5889544441755472, "grad_norm": 1.0387421992244485, "learning_rate": 2.6063042358678557e-06, "loss": 0.0245, "step": 141150 }, { "epoch": 0.5889753068905375, "grad_norm": 0.6540920729184383, "learning_rate": 2.6062580748076553e-06, "loss": 0.0251, "step": 141155 }, { "epoch": 0.5889961696055278, "grad_norm": 0.6323668970125023, "learning_rate": 2.606211916200087e-06, "loss": 0.028, "step": 141160 }, { "epoch": 0.589017032320518, "grad_norm": 1.1918195064096817, "learning_rate": 2.6061657600449326e-06, "loss": 0.0205, "step": 141165 }, { "epoch": 0.5890378950355083, "grad_norm": 0.5197363773940709, "learning_rate": 2.6061196063419753e-06, "loss": 0.0195, "step": 141170 }, { "epoch": 0.5890587577504987, "grad_norm": 0.8432132597580433, "learning_rate": 2.6060734550909983e-06, "loss": 0.0303, "step": 141175 }, { "epoch": 0.5890796204654889, "grad_norm": 0.5321268748960545, "learning_rate": 2.606027306291784e-06, "loss": 0.022, "step": 141180 }, { "epoch": 0.5891004831804791, "grad_norm": 0.9942601904211553, "learning_rate": 2.6059811599441165e-06, "loss": 0.0262, "step": 141185 }, { "epoch": 0.5891213458954695, "grad_norm": 0.6793651710867199, "learning_rate": 2.605935016047777e-06, "loss": 0.0233, "step": 141190 }, { "epoch": 0.5891422086104597, "grad_norm": 0.6554427518311954, "learning_rate": 2.6058888746025496e-06, "loss": 0.0191, "step": 141195 }, { "epoch": 0.58916307132545, "grad_norm": 0.6274278562834193, "learning_rate": 2.6058427356082173e-06, "loss": 0.0294, "step": 141200 }, { "epoch": 0.5891839340404403, "grad_norm": 0.794678635814573, "learning_rate": 2.6057965990645623e-06, "loss": 0.0224, "step": 141205 }, { "epoch": 0.5892047967554306, "grad_norm": 0.5945545253788334, "learning_rate": 2.605750464971369e-06, "loss": 0.0271, "step": 141210 }, { "epoch": 0.5892256594704208, "grad_norm": 0.4808922285342431, "learning_rate": 2.6057043333284195e-06, "loss": 0.021, "step": 141215 }, { "epoch": 0.5892465221854111, "grad_norm": 0.6629051398741667, "learning_rate": 2.6056582041354968e-06, "loss": 0.0269, "step": 141220 }, { "epoch": 0.5892673849004014, "grad_norm": 0.8472936477158518, "learning_rate": 2.6056120773923844e-06, "loss": 0.0226, "step": 141225 }, { "epoch": 0.5892882476153917, "grad_norm": 0.8306666632083662, "learning_rate": 2.605565953098866e-06, "loss": 0.0298, "step": 141230 }, { "epoch": 0.5893091103303819, "grad_norm": 0.8180202471867273, "learning_rate": 2.6055198312547235e-06, "loss": 0.0159, "step": 141235 }, { "epoch": 0.5893299730453723, "grad_norm": 0.8215350578198785, "learning_rate": 2.6054737118597417e-06, "loss": 0.0237, "step": 141240 }, { "epoch": 0.5893508357603625, "grad_norm": 1.0752937797805764, "learning_rate": 2.6054275949137025e-06, "loss": 0.0221, "step": 141245 }, { "epoch": 0.5893716984753528, "grad_norm": 0.35025851282293347, "learning_rate": 2.6053814804163898e-06, "loss": 0.0178, "step": 141250 }, { "epoch": 0.5893925611903431, "grad_norm": 0.8266672869988019, "learning_rate": 2.6053353683675863e-06, "loss": 0.0229, "step": 141255 }, { "epoch": 0.5894134239053334, "grad_norm": 0.35080531688582983, "learning_rate": 2.605289258767077e-06, "loss": 0.0226, "step": 141260 }, { "epoch": 0.5894342866203236, "grad_norm": 0.43303982469329677, "learning_rate": 2.605243151614643e-06, "loss": 0.0223, "step": 141265 }, { "epoch": 0.589455149335314, "grad_norm": 0.7401323136253486, "learning_rate": 2.6051970469100697e-06, "loss": 0.0263, "step": 141270 }, { "epoch": 0.5894760120503042, "grad_norm": 0.6760569112558278, "learning_rate": 2.605150944653139e-06, "loss": 0.0233, "step": 141275 }, { "epoch": 0.5894968747652944, "grad_norm": 0.5825611377800625, "learning_rate": 2.605104844843635e-06, "loss": 0.0187, "step": 141280 }, { "epoch": 0.5895177374802847, "grad_norm": 0.7162873700677498, "learning_rate": 2.6050587474813415e-06, "loss": 0.0172, "step": 141285 }, { "epoch": 0.589538600195275, "grad_norm": 0.5189242992801908, "learning_rate": 2.6050126525660414e-06, "loss": 0.0208, "step": 141290 }, { "epoch": 0.5895594629102653, "grad_norm": 0.6364079779793049, "learning_rate": 2.6049665600975175e-06, "loss": 0.0209, "step": 141295 }, { "epoch": 0.5895803256252555, "grad_norm": 0.42420454717498085, "learning_rate": 2.6049204700755555e-06, "loss": 0.0214, "step": 141300 }, { "epoch": 0.5896011883402459, "grad_norm": 0.31332181488209015, "learning_rate": 2.6048743824999373e-06, "loss": 0.0209, "step": 141305 }, { "epoch": 0.5896220510552361, "grad_norm": 1.0128416792279358, "learning_rate": 2.6048282973704466e-06, "loss": 0.0243, "step": 141310 }, { "epoch": 0.5896429137702264, "grad_norm": 0.7038571811166525, "learning_rate": 2.604782214686868e-06, "loss": 0.0299, "step": 141315 }, { "epoch": 0.5896637764852167, "grad_norm": 0.547330491438424, "learning_rate": 2.6047361344489836e-06, "loss": 0.0267, "step": 141320 }, { "epoch": 0.589684639200207, "grad_norm": 0.4430915999026467, "learning_rate": 2.604690056656578e-06, "loss": 0.0178, "step": 141325 }, { "epoch": 0.5897055019151972, "grad_norm": 0.6106611749582408, "learning_rate": 2.604643981309436e-06, "loss": 0.0204, "step": 141330 }, { "epoch": 0.5897263646301875, "grad_norm": 1.1646194841096214, "learning_rate": 2.604597908407339e-06, "loss": 0.0196, "step": 141335 }, { "epoch": 0.5897472273451778, "grad_norm": 0.3261646660731506, "learning_rate": 2.604551837950073e-06, "loss": 0.0204, "step": 141340 }, { "epoch": 0.5897680900601681, "grad_norm": 0.556591130136698, "learning_rate": 2.6045057699374197e-06, "loss": 0.0219, "step": 141345 }, { "epoch": 0.5897889527751583, "grad_norm": 0.5647922147415789, "learning_rate": 2.6044597043691642e-06, "loss": 0.0254, "step": 141350 }, { "epoch": 0.5898098154901487, "grad_norm": 1.3802539998585934, "learning_rate": 2.6044136412450905e-06, "loss": 0.0314, "step": 141355 }, { "epoch": 0.5898306782051389, "grad_norm": 0.2932915642302901, "learning_rate": 2.604367580564982e-06, "loss": 0.0219, "step": 141360 }, { "epoch": 0.5898515409201291, "grad_norm": 0.5957214986415436, "learning_rate": 2.6043215223286227e-06, "loss": 0.029, "step": 141365 }, { "epoch": 0.5898724036351195, "grad_norm": 0.3927181064998553, "learning_rate": 2.604275466535796e-06, "loss": 0.0234, "step": 141370 }, { "epoch": 0.5898932663501097, "grad_norm": 0.6052285733805188, "learning_rate": 2.6042294131862865e-06, "loss": 0.0239, "step": 141375 }, { "epoch": 0.5899141290651, "grad_norm": 0.6735529741785171, "learning_rate": 2.6041833622798783e-06, "loss": 0.0203, "step": 141380 }, { "epoch": 0.5899349917800903, "grad_norm": 0.8626766653045863, "learning_rate": 2.604137313816355e-06, "loss": 0.0227, "step": 141385 }, { "epoch": 0.5899558544950806, "grad_norm": 0.8513457802040146, "learning_rate": 2.604091267795501e-06, "loss": 0.0233, "step": 141390 }, { "epoch": 0.5899767172100708, "grad_norm": 0.6749866379006971, "learning_rate": 2.6040452242170993e-06, "loss": 0.0238, "step": 141395 }, { "epoch": 0.5899975799250611, "grad_norm": 0.8215851077125398, "learning_rate": 2.6039991830809353e-06, "loss": 0.0278, "step": 141400 }, { "epoch": 0.5900184426400514, "grad_norm": 0.49934272921122586, "learning_rate": 2.6039531443867928e-06, "loss": 0.0284, "step": 141405 }, { "epoch": 0.5900393053550417, "grad_norm": 0.41139655145102577, "learning_rate": 2.603907108134455e-06, "loss": 0.0269, "step": 141410 }, { "epoch": 0.5900601680700319, "grad_norm": 0.49490188811891783, "learning_rate": 2.603861074323707e-06, "loss": 0.0228, "step": 141415 }, { "epoch": 0.5900810307850223, "grad_norm": 0.33076911810430787, "learning_rate": 2.603815042954333e-06, "loss": 0.0317, "step": 141420 }, { "epoch": 0.5901018935000125, "grad_norm": 0.5857747396841312, "learning_rate": 2.603769014026116e-06, "loss": 0.0261, "step": 141425 }, { "epoch": 0.5901227562150028, "grad_norm": 0.9460685851368933, "learning_rate": 2.6037229875388428e-06, "loss": 0.0332, "step": 141430 }, { "epoch": 0.5901436189299931, "grad_norm": 0.6094259091666019, "learning_rate": 2.603676963492295e-06, "loss": 0.0254, "step": 141435 }, { "epoch": 0.5901644816449834, "grad_norm": 0.8198462385373232, "learning_rate": 2.603630941886258e-06, "loss": 0.0201, "step": 141440 }, { "epoch": 0.5901853443599736, "grad_norm": 0.8525515868367629, "learning_rate": 2.6035849227205164e-06, "loss": 0.0183, "step": 141445 }, { "epoch": 0.590206207074964, "grad_norm": 0.6615326733246488, "learning_rate": 2.603538905994854e-06, "loss": 0.0218, "step": 141450 }, { "epoch": 0.5902270697899542, "grad_norm": 1.3400674557404342, "learning_rate": 2.603492891709055e-06, "loss": 0.0249, "step": 141455 }, { "epoch": 0.5902479325049445, "grad_norm": 0.40593938698305637, "learning_rate": 2.603446879862905e-06, "loss": 0.0192, "step": 141460 }, { "epoch": 0.5902687952199347, "grad_norm": 0.8554868621314472, "learning_rate": 2.603400870456187e-06, "loss": 0.0179, "step": 141465 }, { "epoch": 0.590289657934925, "grad_norm": 0.6563925079399966, "learning_rate": 2.603354863488686e-06, "loss": 0.0253, "step": 141470 }, { "epoch": 0.5903105206499153, "grad_norm": 0.6874395630843789, "learning_rate": 2.6033088589601875e-06, "loss": 0.0279, "step": 141475 }, { "epoch": 0.5903313833649055, "grad_norm": 0.9492016102830165, "learning_rate": 2.6032628568704742e-06, "loss": 0.0189, "step": 141480 }, { "epoch": 0.5903522460798959, "grad_norm": 0.8641687328008927, "learning_rate": 2.6032168572193315e-06, "loss": 0.0205, "step": 141485 }, { "epoch": 0.5903731087948861, "grad_norm": 0.6255230870056532, "learning_rate": 2.6031708600065443e-06, "loss": 0.0177, "step": 141490 }, { "epoch": 0.5903939715098764, "grad_norm": 0.598736038426032, "learning_rate": 2.6031248652318964e-06, "loss": 0.029, "step": 141495 }, { "epoch": 0.5904148342248667, "grad_norm": 0.4703776325220118, "learning_rate": 2.6030788728951733e-06, "loss": 0.0233, "step": 141500 }, { "epoch": 0.590435696939857, "grad_norm": 1.536658056972644, "learning_rate": 2.6030328829961594e-06, "loss": 0.0331, "step": 141505 }, { "epoch": 0.5904565596548472, "grad_norm": 1.1670314563272681, "learning_rate": 2.602986895534638e-06, "loss": 0.0292, "step": 141510 }, { "epoch": 0.5904774223698375, "grad_norm": 0.7643153707134824, "learning_rate": 2.6029409105103963e-06, "loss": 0.0163, "step": 141515 }, { "epoch": 0.5904982850848278, "grad_norm": 0.6087193221877836, "learning_rate": 2.602894927923217e-06, "loss": 0.0221, "step": 141520 }, { "epoch": 0.5905191477998181, "grad_norm": 0.47378581521042024, "learning_rate": 2.602848947772885e-06, "loss": 0.0215, "step": 141525 }, { "epoch": 0.5905400105148083, "grad_norm": 0.6669951161427303, "learning_rate": 2.6028029700591867e-06, "loss": 0.0245, "step": 141530 }, { "epoch": 0.5905608732297987, "grad_norm": 0.7884652707325257, "learning_rate": 2.6027569947819054e-06, "loss": 0.023, "step": 141535 }, { "epoch": 0.5905817359447889, "grad_norm": 0.5608870274855998, "learning_rate": 2.6027110219408263e-06, "loss": 0.0237, "step": 141540 }, { "epoch": 0.5906025986597792, "grad_norm": 0.5808671501845308, "learning_rate": 2.602665051535734e-06, "loss": 0.0191, "step": 141545 }, { "epoch": 0.5906234613747695, "grad_norm": 0.8840001984174864, "learning_rate": 2.602619083566414e-06, "loss": 0.0204, "step": 141550 }, { "epoch": 0.5906443240897598, "grad_norm": 1.1717520207770318, "learning_rate": 2.60257311803265e-06, "loss": 0.0221, "step": 141555 }, { "epoch": 0.59066518680475, "grad_norm": 0.5390203241532973, "learning_rate": 2.6025271549342283e-06, "loss": 0.0225, "step": 141560 }, { "epoch": 0.5906860495197404, "grad_norm": 0.5575594710359543, "learning_rate": 2.602481194270934e-06, "loss": 0.0215, "step": 141565 }, { "epoch": 0.5907069122347306, "grad_norm": 0.5710295351546403, "learning_rate": 2.602435236042551e-06, "loss": 0.0319, "step": 141570 }, { "epoch": 0.5907277749497208, "grad_norm": 0.7779915039845672, "learning_rate": 2.602389280248864e-06, "loss": 0.0246, "step": 141575 }, { "epoch": 0.5907486376647111, "grad_norm": 0.865384882193892, "learning_rate": 2.6023433268896598e-06, "loss": 0.0205, "step": 141580 }, { "epoch": 0.5907695003797014, "grad_norm": 0.7999680213678791, "learning_rate": 2.6022973759647218e-06, "loss": 0.0244, "step": 141585 }, { "epoch": 0.5907903630946917, "grad_norm": 0.26022067685475775, "learning_rate": 2.602251427473836e-06, "loss": 0.0207, "step": 141590 }, { "epoch": 0.5908112258096819, "grad_norm": 0.8312351935186625, "learning_rate": 2.6022054814167874e-06, "loss": 0.0276, "step": 141595 }, { "epoch": 0.5908320885246723, "grad_norm": 0.7450779300614204, "learning_rate": 2.6021595377933606e-06, "loss": 0.0254, "step": 141600 }, { "epoch": 0.5908529512396625, "grad_norm": 0.666468462375898, "learning_rate": 2.602113596603342e-06, "loss": 0.0207, "step": 141605 }, { "epoch": 0.5908738139546528, "grad_norm": 0.640674158833164, "learning_rate": 2.602067657846515e-06, "loss": 0.0146, "step": 141610 }, { "epoch": 0.5908946766696431, "grad_norm": 0.5780309777854579, "learning_rate": 2.602021721522666e-06, "loss": 0.0256, "step": 141615 }, { "epoch": 0.5909155393846334, "grad_norm": 0.48958781883838115, "learning_rate": 2.6019757876315806e-06, "loss": 0.0219, "step": 141620 }, { "epoch": 0.5909364020996236, "grad_norm": 0.7853490727417196, "learning_rate": 2.601929856173043e-06, "loss": 0.0214, "step": 141625 }, { "epoch": 0.590957264814614, "grad_norm": 0.6708826504326336, "learning_rate": 2.601883927146839e-06, "loss": 0.0198, "step": 141630 }, { "epoch": 0.5909781275296042, "grad_norm": 0.3104791942617174, "learning_rate": 2.6018380005527542e-06, "loss": 0.0143, "step": 141635 }, { "epoch": 0.5909989902445945, "grad_norm": 0.5215975605544045, "learning_rate": 2.6017920763905735e-06, "loss": 0.0204, "step": 141640 }, { "epoch": 0.5910198529595847, "grad_norm": 0.4671417431118745, "learning_rate": 2.6017461546600825e-06, "loss": 0.035, "step": 141645 }, { "epoch": 0.5910407156745751, "grad_norm": 0.696995235162488, "learning_rate": 2.601700235361067e-06, "loss": 0.0337, "step": 141650 }, { "epoch": 0.5910615783895653, "grad_norm": 0.6237985862455798, "learning_rate": 2.6016543184933117e-06, "loss": 0.0241, "step": 141655 }, { "epoch": 0.5910824411045555, "grad_norm": 0.7341189586555275, "learning_rate": 2.6016084040566023e-06, "loss": 0.0205, "step": 141660 }, { "epoch": 0.5911033038195459, "grad_norm": 1.018238421936464, "learning_rate": 2.6015624920507247e-06, "loss": 0.0273, "step": 141665 }, { "epoch": 0.5911241665345361, "grad_norm": 0.4456122870142444, "learning_rate": 2.6015165824754644e-06, "loss": 0.0267, "step": 141670 }, { "epoch": 0.5911450292495264, "grad_norm": 0.9392974232048741, "learning_rate": 2.6014706753306063e-06, "loss": 0.0189, "step": 141675 }, { "epoch": 0.5911658919645167, "grad_norm": 0.9285450381525088, "learning_rate": 2.601424770615936e-06, "loss": 0.019, "step": 141680 }, { "epoch": 0.591186754679507, "grad_norm": 0.57693496257926, "learning_rate": 2.6013788683312396e-06, "loss": 0.022, "step": 141685 }, { "epoch": 0.5912076173944972, "grad_norm": 0.9233887630818454, "learning_rate": 2.6013329684763033e-06, "loss": 0.021, "step": 141690 }, { "epoch": 0.5912284801094875, "grad_norm": 0.9297921838485106, "learning_rate": 2.6012870710509115e-06, "loss": 0.0298, "step": 141695 }, { "epoch": 0.5912493428244778, "grad_norm": 0.5285780212049944, "learning_rate": 2.60124117605485e-06, "loss": 0.0189, "step": 141700 }, { "epoch": 0.5912702055394681, "grad_norm": 0.4103525392244064, "learning_rate": 2.601195283487905e-06, "loss": 0.0224, "step": 141705 }, { "epoch": 0.5912910682544583, "grad_norm": 0.8125221589524666, "learning_rate": 2.601149393349862e-06, "loss": 0.0268, "step": 141710 }, { "epoch": 0.5913119309694487, "grad_norm": 0.28501003308658956, "learning_rate": 2.6011035056405075e-06, "loss": 0.0159, "step": 141715 }, { "epoch": 0.5913327936844389, "grad_norm": 0.4020532238302681, "learning_rate": 2.601057620359626e-06, "loss": 0.0203, "step": 141720 }, { "epoch": 0.5913536563994292, "grad_norm": 0.7318586624712541, "learning_rate": 2.601011737507005e-06, "loss": 0.0238, "step": 141725 }, { "epoch": 0.5913745191144195, "grad_norm": 0.4547339345047792, "learning_rate": 2.600965857082428e-06, "loss": 0.0248, "step": 141730 }, { "epoch": 0.5913953818294098, "grad_norm": 0.8564992219065712, "learning_rate": 2.6009199790856826e-06, "loss": 0.0214, "step": 141735 }, { "epoch": 0.5914162445444, "grad_norm": 0.4477652755633275, "learning_rate": 2.6008741035165547e-06, "loss": 0.0262, "step": 141740 }, { "epoch": 0.5914371072593904, "grad_norm": 0.7360164499875935, "learning_rate": 2.600828230374829e-06, "loss": 0.0246, "step": 141745 }, { "epoch": 0.5914579699743806, "grad_norm": 1.05515760113035, "learning_rate": 2.600782359660292e-06, "loss": 0.0193, "step": 141750 }, { "epoch": 0.5914788326893708, "grad_norm": 0.5457834419322779, "learning_rate": 2.6007364913727308e-06, "loss": 0.0224, "step": 141755 }, { "epoch": 0.5914996954043611, "grad_norm": 0.8156807905654572, "learning_rate": 2.6006906255119295e-06, "loss": 0.0234, "step": 141760 }, { "epoch": 0.5915205581193514, "grad_norm": 0.8626944354964944, "learning_rate": 2.600644762077676e-06, "loss": 0.023, "step": 141765 }, { "epoch": 0.5915414208343417, "grad_norm": 0.4432659430264712, "learning_rate": 2.6005989010697547e-06, "loss": 0.0243, "step": 141770 }, { "epoch": 0.5915622835493319, "grad_norm": 0.33252345505989794, "learning_rate": 2.6005530424879527e-06, "loss": 0.0205, "step": 141775 }, { "epoch": 0.5915831462643223, "grad_norm": 0.26769985942412466, "learning_rate": 2.600507186332056e-06, "loss": 0.0205, "step": 141780 }, { "epoch": 0.5916040089793125, "grad_norm": 0.3405438810067326, "learning_rate": 2.60046133260185e-06, "loss": 0.0231, "step": 141785 }, { "epoch": 0.5916248716943028, "grad_norm": 0.6186468149783352, "learning_rate": 2.6004154812971216e-06, "loss": 0.0319, "step": 141790 }, { "epoch": 0.5916457344092931, "grad_norm": 0.7446543510964436, "learning_rate": 2.6003696324176564e-06, "loss": 0.0208, "step": 141795 }, { "epoch": 0.5916665971242834, "grad_norm": 0.5660638025637834, "learning_rate": 2.6003237859632413e-06, "loss": 0.0193, "step": 141800 }, { "epoch": 0.5916874598392736, "grad_norm": 1.048178777511909, "learning_rate": 2.6002779419336626e-06, "loss": 0.0213, "step": 141805 }, { "epoch": 0.591708322554264, "grad_norm": 0.4226313815685875, "learning_rate": 2.600232100328705e-06, "loss": 0.0186, "step": 141810 }, { "epoch": 0.5917291852692542, "grad_norm": 0.4628704877891146, "learning_rate": 2.6001862611481572e-06, "loss": 0.0155, "step": 141815 }, { "epoch": 0.5917500479842445, "grad_norm": 0.5547710821981221, "learning_rate": 2.600140424391803e-06, "loss": 0.0264, "step": 141820 }, { "epoch": 0.5917709106992347, "grad_norm": 0.3321207080907317, "learning_rate": 2.600094590059431e-06, "loss": 0.0253, "step": 141825 }, { "epoch": 0.5917917734142251, "grad_norm": 0.5943304086389811, "learning_rate": 2.6000487581508254e-06, "loss": 0.0178, "step": 141830 }, { "epoch": 0.5918126361292153, "grad_norm": 1.6034228973160578, "learning_rate": 2.600002928665775e-06, "loss": 0.0245, "step": 141835 }, { "epoch": 0.5918334988442056, "grad_norm": 1.4443968909645277, "learning_rate": 2.5999571016040635e-06, "loss": 0.0188, "step": 141840 }, { "epoch": 0.5918543615591959, "grad_norm": 0.6357292036384213, "learning_rate": 2.59991127696548e-06, "loss": 0.0204, "step": 141845 }, { "epoch": 0.5918752242741862, "grad_norm": 1.0419644175812484, "learning_rate": 2.5998654547498085e-06, "loss": 0.0257, "step": 141850 }, { "epoch": 0.5918960869891764, "grad_norm": 0.4278186411331014, "learning_rate": 2.5998196349568376e-06, "loss": 0.031, "step": 141855 }, { "epoch": 0.5919169497041668, "grad_norm": 0.7966265711138909, "learning_rate": 2.599773817586353e-06, "loss": 0.0224, "step": 141860 }, { "epoch": 0.591937812419157, "grad_norm": 0.3539988780332248, "learning_rate": 2.599728002638141e-06, "loss": 0.02, "step": 141865 }, { "epoch": 0.5919586751341472, "grad_norm": 0.737008030414599, "learning_rate": 2.5996821901119886e-06, "loss": 0.0212, "step": 141870 }, { "epoch": 0.5919795378491375, "grad_norm": 0.39980450621176095, "learning_rate": 2.5996363800076812e-06, "loss": 0.0199, "step": 141875 }, { "epoch": 0.5920004005641278, "grad_norm": 1.195869264342004, "learning_rate": 2.5995905723250076e-06, "loss": 0.0378, "step": 141880 }, { "epoch": 0.5920212632791181, "grad_norm": 0.48406706948660594, "learning_rate": 2.5995447670637525e-06, "loss": 0.0315, "step": 141885 }, { "epoch": 0.5920421259941083, "grad_norm": 0.5604267074488136, "learning_rate": 2.5994989642237034e-06, "loss": 0.0205, "step": 141890 }, { "epoch": 0.5920629887090987, "grad_norm": 0.5333182621165417, "learning_rate": 2.5994531638046473e-06, "loss": 0.0251, "step": 141895 }, { "epoch": 0.5920838514240889, "grad_norm": 0.6621391865597387, "learning_rate": 2.5994073658063703e-06, "loss": 0.0296, "step": 141900 }, { "epoch": 0.5921047141390792, "grad_norm": 0.7191043870208937, "learning_rate": 2.599361570228659e-06, "loss": 0.0224, "step": 141905 }, { "epoch": 0.5921255768540695, "grad_norm": 1.1324997066925002, "learning_rate": 2.5993157770713005e-06, "loss": 0.0264, "step": 141910 }, { "epoch": 0.5921464395690598, "grad_norm": 0.7427881878380107, "learning_rate": 2.5992699863340825e-06, "loss": 0.0266, "step": 141915 }, { "epoch": 0.59216730228405, "grad_norm": 0.890540116121131, "learning_rate": 2.599224198016791e-06, "loss": 0.0223, "step": 141920 }, { "epoch": 0.5921881649990404, "grad_norm": 0.8964058943224061, "learning_rate": 2.5991784121192128e-06, "loss": 0.0269, "step": 141925 }, { "epoch": 0.5922090277140306, "grad_norm": 0.5151264197819124, "learning_rate": 2.5991326286411345e-06, "loss": 0.0217, "step": 141930 }, { "epoch": 0.5922298904290209, "grad_norm": 0.6039225742666677, "learning_rate": 2.5990868475823437e-06, "loss": 0.02, "step": 141935 }, { "epoch": 0.5922507531440111, "grad_norm": 0.49427271164563846, "learning_rate": 2.5990410689426265e-06, "loss": 0.0229, "step": 141940 }, { "epoch": 0.5922716158590015, "grad_norm": 0.5420600474081138, "learning_rate": 2.598995292721771e-06, "loss": 0.0197, "step": 141945 }, { "epoch": 0.5922924785739917, "grad_norm": 0.4424346368123728, "learning_rate": 2.5989495189195628e-06, "loss": 0.0245, "step": 141950 }, { "epoch": 0.5923133412889819, "grad_norm": 0.7430226811505769, "learning_rate": 2.598903747535791e-06, "loss": 0.0231, "step": 141955 }, { "epoch": 0.5923342040039723, "grad_norm": 0.49613039463884423, "learning_rate": 2.5988579785702413e-06, "loss": 0.0208, "step": 141960 }, { "epoch": 0.5923550667189625, "grad_norm": 0.9753634152072933, "learning_rate": 2.5988122120226995e-06, "loss": 0.0245, "step": 141965 }, { "epoch": 0.5923759294339528, "grad_norm": 1.28205404342979, "learning_rate": 2.598766447892955e-06, "loss": 0.0266, "step": 141970 }, { "epoch": 0.5923967921489431, "grad_norm": 0.8948331859757884, "learning_rate": 2.598720686180794e-06, "loss": 0.0434, "step": 141975 }, { "epoch": 0.5924176548639334, "grad_norm": 0.4262187352097434, "learning_rate": 2.5986749268860033e-06, "loss": 0.0147, "step": 141980 }, { "epoch": 0.5924385175789236, "grad_norm": 0.9109570340901993, "learning_rate": 2.5986291700083705e-06, "loss": 0.0187, "step": 141985 }, { "epoch": 0.592459380293914, "grad_norm": 0.6338893306590151, "learning_rate": 2.5985834155476823e-06, "loss": 0.0247, "step": 141990 }, { "epoch": 0.5924802430089042, "grad_norm": 0.6803852013583808, "learning_rate": 2.598537663503727e-06, "loss": 0.0221, "step": 141995 }, { "epoch": 0.5925011057238945, "grad_norm": 0.9224301465061555, "learning_rate": 2.598491913876291e-06, "loss": 0.0213, "step": 142000 }, { "epoch": 0.5925219684388847, "grad_norm": 0.8757714764260597, "learning_rate": 2.5984461666651618e-06, "loss": 0.0246, "step": 142005 }, { "epoch": 0.5925428311538751, "grad_norm": 1.3278407868934963, "learning_rate": 2.5984004218701266e-06, "loss": 0.0247, "step": 142010 }, { "epoch": 0.5925636938688653, "grad_norm": 0.42719564011530925, "learning_rate": 2.5983546794909725e-06, "loss": 0.0129, "step": 142015 }, { "epoch": 0.5925845565838556, "grad_norm": 0.740454880029363, "learning_rate": 2.5983089395274873e-06, "loss": 0.0229, "step": 142020 }, { "epoch": 0.5926054192988459, "grad_norm": 0.6952503691624585, "learning_rate": 2.5982632019794585e-06, "loss": 0.0236, "step": 142025 }, { "epoch": 0.5926262820138362, "grad_norm": 0.4774503112691173, "learning_rate": 2.598217466846673e-06, "loss": 0.0216, "step": 142030 }, { "epoch": 0.5926471447288264, "grad_norm": 0.6379669353682321, "learning_rate": 2.5981717341289185e-06, "loss": 0.0242, "step": 142035 }, { "epoch": 0.5926680074438168, "grad_norm": 0.5544262399845694, "learning_rate": 2.598126003825983e-06, "loss": 0.0249, "step": 142040 }, { "epoch": 0.592688870158807, "grad_norm": 0.9960887761979211, "learning_rate": 2.5980802759376535e-06, "loss": 0.0227, "step": 142045 }, { "epoch": 0.5927097328737972, "grad_norm": 1.22070843485255, "learning_rate": 2.5980345504637168e-06, "loss": 0.0225, "step": 142050 }, { "epoch": 0.5927305955887875, "grad_norm": 0.6334906403419277, "learning_rate": 2.5979888274039616e-06, "loss": 0.0259, "step": 142055 }, { "epoch": 0.5927514583037778, "grad_norm": 0.46242970769304, "learning_rate": 2.5979431067581746e-06, "loss": 0.0282, "step": 142060 }, { "epoch": 0.5927723210187681, "grad_norm": 0.5693172024274731, "learning_rate": 2.5978973885261442e-06, "loss": 0.0184, "step": 142065 }, { "epoch": 0.5927931837337583, "grad_norm": 0.8083802701384801, "learning_rate": 2.5978516727076577e-06, "loss": 0.0224, "step": 142070 }, { "epoch": 0.5928140464487487, "grad_norm": 1.355055726328201, "learning_rate": 2.597805959302502e-06, "loss": 0.0267, "step": 142075 }, { "epoch": 0.5928349091637389, "grad_norm": 0.48792474003742864, "learning_rate": 2.5977602483104657e-06, "loss": 0.0238, "step": 142080 }, { "epoch": 0.5928557718787292, "grad_norm": 0.828211905840726, "learning_rate": 2.5977145397313368e-06, "loss": 0.0178, "step": 142085 }, { "epoch": 0.5928766345937195, "grad_norm": 0.38243456683014837, "learning_rate": 2.597668833564902e-06, "loss": 0.0283, "step": 142090 }, { "epoch": 0.5928974973087098, "grad_norm": 0.6360310105188248, "learning_rate": 2.5976231298109496e-06, "loss": 0.0168, "step": 142095 }, { "epoch": 0.5929183600237, "grad_norm": 0.5531744834351038, "learning_rate": 2.5975774284692675e-06, "loss": 0.02, "step": 142100 }, { "epoch": 0.5929392227386904, "grad_norm": 1.1327991102126198, "learning_rate": 2.597531729539643e-06, "loss": 0.0274, "step": 142105 }, { "epoch": 0.5929600854536806, "grad_norm": 0.36025061345907144, "learning_rate": 2.597486033021864e-06, "loss": 0.0239, "step": 142110 }, { "epoch": 0.5929809481686709, "grad_norm": 1.3353102943159005, "learning_rate": 2.597440338915719e-06, "loss": 0.0252, "step": 142115 }, { "epoch": 0.5930018108836611, "grad_norm": 0.39528007715063695, "learning_rate": 2.597394647220996e-06, "loss": 0.0271, "step": 142120 }, { "epoch": 0.5930226735986515, "grad_norm": 0.6448433100956834, "learning_rate": 2.5973489579374822e-06, "loss": 0.0251, "step": 142125 }, { "epoch": 0.5930435363136417, "grad_norm": 0.5958475496728478, "learning_rate": 2.5973032710649647e-06, "loss": 0.0329, "step": 142130 }, { "epoch": 0.593064399028632, "grad_norm": 0.609475302868462, "learning_rate": 2.5972575866032333e-06, "loss": 0.0246, "step": 142135 }, { "epoch": 0.5930852617436223, "grad_norm": 0.48579400904604825, "learning_rate": 2.5972119045520756e-06, "loss": 0.0173, "step": 142140 }, { "epoch": 0.5931061244586125, "grad_norm": 0.47645695048494774, "learning_rate": 2.5971662249112785e-06, "loss": 0.0232, "step": 142145 }, { "epoch": 0.5931269871736028, "grad_norm": 0.6002273696901693, "learning_rate": 2.597120547680631e-06, "loss": 0.0239, "step": 142150 }, { "epoch": 0.5931478498885931, "grad_norm": 1.3275171656204372, "learning_rate": 2.597074872859921e-06, "loss": 0.0226, "step": 142155 }, { "epoch": 0.5931687126035834, "grad_norm": 0.9725101275973893, "learning_rate": 2.5970292004489363e-06, "loss": 0.0317, "step": 142160 }, { "epoch": 0.5931895753185736, "grad_norm": 0.45908255158048283, "learning_rate": 2.5969835304474655e-06, "loss": 0.0223, "step": 142165 }, { "epoch": 0.5932104380335639, "grad_norm": 0.8867209733459439, "learning_rate": 2.596937862855296e-06, "loss": 0.0235, "step": 142170 }, { "epoch": 0.5932313007485542, "grad_norm": 0.6442597848960587, "learning_rate": 2.596892197672217e-06, "loss": 0.0294, "step": 142175 }, { "epoch": 0.5932521634635445, "grad_norm": 0.8866219113078053, "learning_rate": 2.5968465348980153e-06, "loss": 0.0224, "step": 142180 }, { "epoch": 0.5932730261785347, "grad_norm": 0.3868663179523534, "learning_rate": 2.5968008745324808e-06, "loss": 0.021, "step": 142185 }, { "epoch": 0.5932938888935251, "grad_norm": 1.276620711919222, "learning_rate": 2.5967552165754e-06, "loss": 0.0293, "step": 142190 }, { "epoch": 0.5933147516085153, "grad_norm": 0.770081982595758, "learning_rate": 2.596709561026563e-06, "loss": 0.0215, "step": 142195 }, { "epoch": 0.5933356143235056, "grad_norm": 0.2783352585470914, "learning_rate": 2.596663907885757e-06, "loss": 0.0332, "step": 142200 }, { "epoch": 0.5933564770384959, "grad_norm": 1.317262466521743, "learning_rate": 2.5966182571527703e-06, "loss": 0.0317, "step": 142205 }, { "epoch": 0.5933773397534862, "grad_norm": 0.5853525392083978, "learning_rate": 2.596572608827392e-06, "loss": 0.0246, "step": 142210 }, { "epoch": 0.5933982024684764, "grad_norm": 0.5415605527548327, "learning_rate": 2.596526962909409e-06, "loss": 0.0227, "step": 142215 }, { "epoch": 0.5934190651834668, "grad_norm": 0.4821013884367864, "learning_rate": 2.596481319398611e-06, "loss": 0.0181, "step": 142220 }, { "epoch": 0.593439927898457, "grad_norm": 0.6303927426358633, "learning_rate": 2.5964356782947864e-06, "loss": 0.0256, "step": 142225 }, { "epoch": 0.5934607906134473, "grad_norm": 0.6986140108072805, "learning_rate": 2.596390039597723e-06, "loss": 0.0251, "step": 142230 }, { "epoch": 0.5934816533284375, "grad_norm": 0.82942618745706, "learning_rate": 2.59634440330721e-06, "loss": 0.0213, "step": 142235 }, { "epoch": 0.5935025160434279, "grad_norm": 1.0603600339637886, "learning_rate": 2.596298769423035e-06, "loss": 0.0272, "step": 142240 }, { "epoch": 0.5935233787584181, "grad_norm": 0.6839211066832463, "learning_rate": 2.5962531379449874e-06, "loss": 0.0163, "step": 142245 }, { "epoch": 0.5935442414734083, "grad_norm": 0.8213079428860828, "learning_rate": 2.5962075088728555e-06, "loss": 0.0304, "step": 142250 }, { "epoch": 0.5935651041883987, "grad_norm": 0.5576314041985169, "learning_rate": 2.5961618822064277e-06, "loss": 0.0228, "step": 142255 }, { "epoch": 0.5935859669033889, "grad_norm": 0.5895800549369616, "learning_rate": 2.596116257945493e-06, "loss": 0.022, "step": 142260 }, { "epoch": 0.5936068296183792, "grad_norm": 0.5805940866845015, "learning_rate": 2.596070636089839e-06, "loss": 0.0233, "step": 142265 }, { "epoch": 0.5936276923333695, "grad_norm": 0.6843443770989245, "learning_rate": 2.5960250166392557e-06, "loss": 0.023, "step": 142270 }, { "epoch": 0.5936485550483598, "grad_norm": 0.5315906025566505, "learning_rate": 2.5959793995935314e-06, "loss": 0.0246, "step": 142275 }, { "epoch": 0.59366941776335, "grad_norm": 0.7638695690577815, "learning_rate": 2.595933784952455e-06, "loss": 0.0236, "step": 142280 }, { "epoch": 0.5936902804783404, "grad_norm": 0.7864539901465173, "learning_rate": 2.5958881727158143e-06, "loss": 0.0283, "step": 142285 }, { "epoch": 0.5937111431933306, "grad_norm": 0.9582447425828146, "learning_rate": 2.5958425628833984e-06, "loss": 0.0181, "step": 142290 }, { "epoch": 0.5937320059083209, "grad_norm": 0.47277856435508153, "learning_rate": 2.5957969554549967e-06, "loss": 0.0223, "step": 142295 }, { "epoch": 0.5937528686233111, "grad_norm": 0.7099651228481081, "learning_rate": 2.595751350430398e-06, "loss": 0.0203, "step": 142300 }, { "epoch": 0.5937737313383015, "grad_norm": 0.6733809144948152, "learning_rate": 2.59570574780939e-06, "loss": 0.0229, "step": 142305 }, { "epoch": 0.5937945940532917, "grad_norm": 0.4344001376573586, "learning_rate": 2.595660147591763e-06, "loss": 0.0226, "step": 142310 }, { "epoch": 0.593815456768282, "grad_norm": 1.887083856296635, "learning_rate": 2.595614549777305e-06, "loss": 0.0258, "step": 142315 }, { "epoch": 0.5938363194832723, "grad_norm": 0.8025646386816467, "learning_rate": 2.595568954365806e-06, "loss": 0.0253, "step": 142320 }, { "epoch": 0.5938571821982626, "grad_norm": 0.6243962176017142, "learning_rate": 2.595523361357053e-06, "loss": 0.0187, "step": 142325 }, { "epoch": 0.5938780449132528, "grad_norm": 0.6929851892746339, "learning_rate": 2.5954777707508367e-06, "loss": 0.027, "step": 142330 }, { "epoch": 0.5938989076282432, "grad_norm": 0.7965448264850549, "learning_rate": 2.595432182546946e-06, "loss": 0.0241, "step": 142335 }, { "epoch": 0.5939197703432334, "grad_norm": 0.7492367176957911, "learning_rate": 2.595386596745169e-06, "loss": 0.025, "step": 142340 }, { "epoch": 0.5939406330582236, "grad_norm": 0.4618838384217594, "learning_rate": 2.5953410133452955e-06, "loss": 0.0179, "step": 142345 }, { "epoch": 0.5939614957732139, "grad_norm": 0.33695713610227784, "learning_rate": 2.595295432347114e-06, "loss": 0.0247, "step": 142350 }, { "epoch": 0.5939823584882042, "grad_norm": 0.9227544943151579, "learning_rate": 2.5952498537504145e-06, "loss": 0.0191, "step": 142355 }, { "epoch": 0.5940032212031945, "grad_norm": 0.7220797045924198, "learning_rate": 2.5952042775549848e-06, "loss": 0.0287, "step": 142360 }, { "epoch": 0.5940240839181847, "grad_norm": 0.4063400872081973, "learning_rate": 2.5951587037606152e-06, "loss": 0.0338, "step": 142365 }, { "epoch": 0.5940449466331751, "grad_norm": 0.6638618619374573, "learning_rate": 2.595113132367095e-06, "loss": 0.0246, "step": 142370 }, { "epoch": 0.5940658093481653, "grad_norm": 0.383253504222711, "learning_rate": 2.5950675633742116e-06, "loss": 0.0217, "step": 142375 }, { "epoch": 0.5940866720631556, "grad_norm": 1.2256428836782163, "learning_rate": 2.595021996781757e-06, "loss": 0.0213, "step": 142380 }, { "epoch": 0.5941075347781459, "grad_norm": 0.5900902946930698, "learning_rate": 2.594976432589518e-06, "loss": 0.0257, "step": 142385 }, { "epoch": 0.5941283974931362, "grad_norm": 0.7702479637647158, "learning_rate": 2.5949308707972857e-06, "loss": 0.0287, "step": 142390 }, { "epoch": 0.5941492602081264, "grad_norm": 0.6310043340330077, "learning_rate": 2.5948853114048484e-06, "loss": 0.0197, "step": 142395 }, { "epoch": 0.5941701229231168, "grad_norm": 1.928395901389284, "learning_rate": 2.5948397544119953e-06, "loss": 0.0252, "step": 142400 }, { "epoch": 0.594190985638107, "grad_norm": 0.9682718233204964, "learning_rate": 2.5947941998185167e-06, "loss": 0.023, "step": 142405 }, { "epoch": 0.5942118483530973, "grad_norm": 0.9233393417390969, "learning_rate": 2.5947486476242005e-06, "loss": 0.0214, "step": 142410 }, { "epoch": 0.5942327110680875, "grad_norm": 0.3995320864317607, "learning_rate": 2.5947030978288383e-06, "loss": 0.0156, "step": 142415 }, { "epoch": 0.5942535737830779, "grad_norm": 0.6651742171738724, "learning_rate": 2.5946575504322175e-06, "loss": 0.0264, "step": 142420 }, { "epoch": 0.5942744364980681, "grad_norm": 0.35117846305775136, "learning_rate": 2.5946120054341285e-06, "loss": 0.0176, "step": 142425 }, { "epoch": 0.5942952992130583, "grad_norm": 0.9296824510468643, "learning_rate": 2.5945664628343607e-06, "loss": 0.0253, "step": 142430 }, { "epoch": 0.5943161619280487, "grad_norm": 0.697532737430233, "learning_rate": 2.5945209226327033e-06, "loss": 0.0218, "step": 142435 }, { "epoch": 0.594337024643039, "grad_norm": 0.8397355678429582, "learning_rate": 2.5944753848289466e-06, "loss": 0.0246, "step": 142440 }, { "epoch": 0.5943578873580292, "grad_norm": 0.4342656252911774, "learning_rate": 2.5944298494228794e-06, "loss": 0.0172, "step": 142445 }, { "epoch": 0.5943787500730195, "grad_norm": 0.5301083204421266, "learning_rate": 2.5943843164142914e-06, "loss": 0.0278, "step": 142450 }, { "epoch": 0.5943996127880098, "grad_norm": 0.6889543699219551, "learning_rate": 2.5943387858029727e-06, "loss": 0.0229, "step": 142455 }, { "epoch": 0.594420475503, "grad_norm": 0.4096782667015276, "learning_rate": 2.5942932575887123e-06, "loss": 0.0233, "step": 142460 }, { "epoch": 0.5944413382179904, "grad_norm": 0.5577255693608105, "learning_rate": 2.594247731771301e-06, "loss": 0.0257, "step": 142465 }, { "epoch": 0.5944622009329806, "grad_norm": 0.42125805964241747, "learning_rate": 2.594202208350527e-06, "loss": 0.0207, "step": 142470 }, { "epoch": 0.5944830636479709, "grad_norm": 0.5563515454868131, "learning_rate": 2.5941566873261816e-06, "loss": 0.0229, "step": 142475 }, { "epoch": 0.5945039263629611, "grad_norm": 0.7769178628829612, "learning_rate": 2.594111168698053e-06, "loss": 0.024, "step": 142480 }, { "epoch": 0.5945247890779515, "grad_norm": 0.4785898495487745, "learning_rate": 2.594065652465932e-06, "loss": 0.0152, "step": 142485 }, { "epoch": 0.5945456517929417, "grad_norm": 0.8665388438619331, "learning_rate": 2.594020138629608e-06, "loss": 0.0238, "step": 142490 }, { "epoch": 0.594566514507932, "grad_norm": 0.9624743632771356, "learning_rate": 2.593974627188871e-06, "loss": 0.0215, "step": 142495 }, { "epoch": 0.5945873772229223, "grad_norm": 1.0230487694625405, "learning_rate": 2.5939291181435113e-06, "loss": 0.0254, "step": 142500 }, { "epoch": 0.5946082399379126, "grad_norm": 0.5227774752838765, "learning_rate": 2.5938836114933173e-06, "loss": 0.0201, "step": 142505 }, { "epoch": 0.5946291026529028, "grad_norm": 1.230466344690329, "learning_rate": 2.593838107238081e-06, "loss": 0.0215, "step": 142510 }, { "epoch": 0.5946499653678932, "grad_norm": 1.024233937278449, "learning_rate": 2.59379260537759e-06, "loss": 0.0201, "step": 142515 }, { "epoch": 0.5946708280828834, "grad_norm": 0.9800752185561857, "learning_rate": 2.5937471059116365e-06, "loss": 0.022, "step": 142520 }, { "epoch": 0.5946916907978737, "grad_norm": 0.6476879331833658, "learning_rate": 2.593701608840009e-06, "loss": 0.0195, "step": 142525 }, { "epoch": 0.5947125535128639, "grad_norm": 0.4667563113249684, "learning_rate": 2.5936561141624983e-06, "loss": 0.0231, "step": 142530 }, { "epoch": 0.5947334162278543, "grad_norm": 0.23850743340272604, "learning_rate": 2.5936106218788944e-06, "loss": 0.0211, "step": 142535 }, { "epoch": 0.5947542789428445, "grad_norm": 0.6767303184228256, "learning_rate": 2.593565131988986e-06, "loss": 0.0255, "step": 142540 }, { "epoch": 0.5947751416578347, "grad_norm": 0.7656881173255636, "learning_rate": 2.5935196444925653e-06, "loss": 0.021, "step": 142545 }, { "epoch": 0.5947960043728251, "grad_norm": 0.8375623226477698, "learning_rate": 2.5934741593894206e-06, "loss": 0.0228, "step": 142550 }, { "epoch": 0.5948168670878153, "grad_norm": 1.2303089085036196, "learning_rate": 2.5934286766793436e-06, "loss": 0.0295, "step": 142555 }, { "epoch": 0.5948377298028056, "grad_norm": 0.7828045014381098, "learning_rate": 2.593383196362123e-06, "loss": 0.0178, "step": 142560 }, { "epoch": 0.5948585925177959, "grad_norm": 0.6751037220234373, "learning_rate": 2.59333771843755e-06, "loss": 0.0198, "step": 142565 }, { "epoch": 0.5948794552327862, "grad_norm": 0.9082856480284518, "learning_rate": 2.5932922429054154e-06, "loss": 0.0254, "step": 142570 }, { "epoch": 0.5949003179477764, "grad_norm": 0.5399683198240328, "learning_rate": 2.5932467697655074e-06, "loss": 0.0212, "step": 142575 }, { "epoch": 0.5949211806627668, "grad_norm": 0.736816651800258, "learning_rate": 2.593201299017618e-06, "loss": 0.022, "step": 142580 }, { "epoch": 0.594942043377757, "grad_norm": 0.610645786948513, "learning_rate": 2.5931558306615366e-06, "loss": 0.0205, "step": 142585 }, { "epoch": 0.5949629060927473, "grad_norm": 0.5958035118160113, "learning_rate": 2.593110364697054e-06, "loss": 0.022, "step": 142590 }, { "epoch": 0.5949837688077375, "grad_norm": 0.760108509078719, "learning_rate": 2.5930649011239606e-06, "loss": 0.0254, "step": 142595 }, { "epoch": 0.5950046315227279, "grad_norm": 0.9727210357552905, "learning_rate": 2.5930194399420462e-06, "loss": 0.02, "step": 142600 }, { "epoch": 0.5950254942377181, "grad_norm": 0.8053123850451932, "learning_rate": 2.5929739811511017e-06, "loss": 0.0253, "step": 142605 }, { "epoch": 0.5950463569527084, "grad_norm": 0.34117902835077346, "learning_rate": 2.592928524750918e-06, "loss": 0.0215, "step": 142610 }, { "epoch": 0.5950672196676987, "grad_norm": 0.8250349718051319, "learning_rate": 2.592883070741284e-06, "loss": 0.0263, "step": 142615 }, { "epoch": 0.595088082382689, "grad_norm": 0.33457519732134994, "learning_rate": 2.5928376191219916e-06, "loss": 0.019, "step": 142620 }, { "epoch": 0.5951089450976792, "grad_norm": 0.5879994472249488, "learning_rate": 2.5927921698928306e-06, "loss": 0.0192, "step": 142625 }, { "epoch": 0.5951298078126696, "grad_norm": 0.6602447543418983, "learning_rate": 2.5927467230535917e-06, "loss": 0.0335, "step": 142630 }, { "epoch": 0.5951506705276598, "grad_norm": 0.5377169889597312, "learning_rate": 2.5927012786040657e-06, "loss": 0.0184, "step": 142635 }, { "epoch": 0.59517153324265, "grad_norm": 0.5755860487058442, "learning_rate": 2.592655836544043e-06, "loss": 0.0218, "step": 142640 }, { "epoch": 0.5951923959576404, "grad_norm": 0.602077554472515, "learning_rate": 2.5926103968733147e-06, "loss": 0.0252, "step": 142645 }, { "epoch": 0.5952132586726306, "grad_norm": 1.0290148889651745, "learning_rate": 2.5925649595916695e-06, "loss": 0.023, "step": 142650 }, { "epoch": 0.5952341213876209, "grad_norm": 1.0340741211473845, "learning_rate": 2.592519524698901e-06, "loss": 0.0205, "step": 142655 }, { "epoch": 0.5952549841026111, "grad_norm": 1.143482305261026, "learning_rate": 2.592474092194797e-06, "loss": 0.0281, "step": 142660 }, { "epoch": 0.5952758468176015, "grad_norm": 0.8895013070060448, "learning_rate": 2.592428662079151e-06, "loss": 0.0321, "step": 142665 }, { "epoch": 0.5952967095325917, "grad_norm": 0.4913653850205741, "learning_rate": 2.5923832343517515e-06, "loss": 0.0226, "step": 142670 }, { "epoch": 0.595317572247582, "grad_norm": 0.6836221748985876, "learning_rate": 2.5923378090123895e-06, "loss": 0.0194, "step": 142675 }, { "epoch": 0.5953384349625723, "grad_norm": 0.6977341125380467, "learning_rate": 2.5922923860608575e-06, "loss": 0.0295, "step": 142680 }, { "epoch": 0.5953592976775626, "grad_norm": 0.6557690812173032, "learning_rate": 2.5922469654969444e-06, "loss": 0.0179, "step": 142685 }, { "epoch": 0.5953801603925528, "grad_norm": 0.5659089489841159, "learning_rate": 2.592201547320442e-06, "loss": 0.0321, "step": 142690 }, { "epoch": 0.5954010231075432, "grad_norm": 0.5711110760321465, "learning_rate": 2.592156131531141e-06, "loss": 0.0235, "step": 142695 }, { "epoch": 0.5954218858225334, "grad_norm": 0.4586540494391121, "learning_rate": 2.592110718128832e-06, "loss": 0.024, "step": 142700 }, { "epoch": 0.5954427485375237, "grad_norm": 0.9472918803201268, "learning_rate": 2.592065307113306e-06, "loss": 0.0262, "step": 142705 }, { "epoch": 0.5954636112525139, "grad_norm": 1.0375119817928276, "learning_rate": 2.592019898484354e-06, "loss": 0.0235, "step": 142710 }, { "epoch": 0.5954844739675043, "grad_norm": 0.6643316148794186, "learning_rate": 2.591974492241768e-06, "loss": 0.0259, "step": 142715 }, { "epoch": 0.5955053366824945, "grad_norm": 0.28864227512188867, "learning_rate": 2.591929088385337e-06, "loss": 0.0172, "step": 142720 }, { "epoch": 0.5955261993974847, "grad_norm": 0.7160497454444126, "learning_rate": 2.591883686914853e-06, "loss": 0.0227, "step": 142725 }, { "epoch": 0.5955470621124751, "grad_norm": 0.5856272021826662, "learning_rate": 2.591838287830108e-06, "loss": 0.0184, "step": 142730 }, { "epoch": 0.5955679248274653, "grad_norm": 0.5377389682554172, "learning_rate": 2.5917928911308916e-06, "loss": 0.017, "step": 142735 }, { "epoch": 0.5955887875424556, "grad_norm": 0.8531336150047836, "learning_rate": 2.5917474968169955e-06, "loss": 0.0219, "step": 142740 }, { "epoch": 0.5956096502574459, "grad_norm": 0.5163537619175441, "learning_rate": 2.5917021048882107e-06, "loss": 0.0195, "step": 142745 }, { "epoch": 0.5956305129724362, "grad_norm": 0.6328916983219007, "learning_rate": 2.591656715344329e-06, "loss": 0.0197, "step": 142750 }, { "epoch": 0.5956513756874264, "grad_norm": 0.7482509807146681, "learning_rate": 2.59161132818514e-06, "loss": 0.0214, "step": 142755 }, { "epoch": 0.5956722384024168, "grad_norm": 0.6895141219582495, "learning_rate": 2.5915659434104363e-06, "loss": 0.0213, "step": 142760 }, { "epoch": 0.595693101117407, "grad_norm": 0.6166790693039936, "learning_rate": 2.591520561020009e-06, "loss": 0.023, "step": 142765 }, { "epoch": 0.5957139638323973, "grad_norm": 0.7484733021593364, "learning_rate": 2.5914751810136484e-06, "loss": 0.0219, "step": 142770 }, { "epoch": 0.5957348265473875, "grad_norm": 0.5820454844872905, "learning_rate": 2.5914298033911468e-06, "loss": 0.0153, "step": 142775 }, { "epoch": 0.5957556892623779, "grad_norm": 0.4334393464779923, "learning_rate": 2.591384428152295e-06, "loss": 0.02, "step": 142780 }, { "epoch": 0.5957765519773681, "grad_norm": 0.4281641298546612, "learning_rate": 2.591339055296884e-06, "loss": 0.019, "step": 142785 }, { "epoch": 0.5957974146923584, "grad_norm": 0.6028778129749635, "learning_rate": 2.5912936848247065e-06, "loss": 0.0213, "step": 142790 }, { "epoch": 0.5958182774073487, "grad_norm": 0.7030477609602199, "learning_rate": 2.591248316735552e-06, "loss": 0.022, "step": 142795 }, { "epoch": 0.595839140122339, "grad_norm": 0.7758133968042538, "learning_rate": 2.5912029510292134e-06, "loss": 0.0225, "step": 142800 }, { "epoch": 0.5958600028373292, "grad_norm": 0.7508496337605233, "learning_rate": 2.5911575877054813e-06, "loss": 0.0244, "step": 142805 }, { "epoch": 0.5958808655523196, "grad_norm": 0.761838247000836, "learning_rate": 2.5911122267641475e-06, "loss": 0.0227, "step": 142810 }, { "epoch": 0.5959017282673098, "grad_norm": 0.8566429784301026, "learning_rate": 2.5910668682050033e-06, "loss": 0.0218, "step": 142815 }, { "epoch": 0.5959225909823, "grad_norm": 0.3010761779478714, "learning_rate": 2.591021512027841e-06, "loss": 0.0271, "step": 142820 }, { "epoch": 0.5959434536972904, "grad_norm": 0.7177108031242639, "learning_rate": 2.5909761582324504e-06, "loss": 0.0192, "step": 142825 }, { "epoch": 0.5959643164122806, "grad_norm": 0.6998627782956157, "learning_rate": 2.5909308068186245e-06, "loss": 0.0255, "step": 142830 }, { "epoch": 0.5959851791272709, "grad_norm": 0.9805825965478977, "learning_rate": 2.5908854577861543e-06, "loss": 0.0217, "step": 142835 }, { "epoch": 0.5960060418422611, "grad_norm": 0.6563864235776794, "learning_rate": 2.5908401111348313e-06, "loss": 0.0203, "step": 142840 }, { "epoch": 0.5960269045572515, "grad_norm": 0.6257775242391637, "learning_rate": 2.5907947668644485e-06, "loss": 0.0296, "step": 142845 }, { "epoch": 0.5960477672722417, "grad_norm": 0.9772302032682173, "learning_rate": 2.5907494249747953e-06, "loss": 0.0264, "step": 142850 }, { "epoch": 0.596068629987232, "grad_norm": 0.731253922836882, "learning_rate": 2.5907040854656648e-06, "loss": 0.0206, "step": 142855 }, { "epoch": 0.5960894927022223, "grad_norm": 0.9657619603393296, "learning_rate": 2.5906587483368484e-06, "loss": 0.0265, "step": 142860 }, { "epoch": 0.5961103554172126, "grad_norm": 1.4095843637822014, "learning_rate": 2.5906134135881383e-06, "loss": 0.0243, "step": 142865 }, { "epoch": 0.5961312181322028, "grad_norm": 0.6463668180768588, "learning_rate": 2.590568081219325e-06, "loss": 0.0204, "step": 142870 }, { "epoch": 0.5961520808471932, "grad_norm": 0.482879712881352, "learning_rate": 2.5905227512302016e-06, "loss": 0.0223, "step": 142875 }, { "epoch": 0.5961729435621834, "grad_norm": 0.5891578235871126, "learning_rate": 2.590477423620559e-06, "loss": 0.0236, "step": 142880 }, { "epoch": 0.5961938062771737, "grad_norm": 0.9644500256705696, "learning_rate": 2.5904320983901904e-06, "loss": 0.0313, "step": 142885 }, { "epoch": 0.5962146689921639, "grad_norm": 0.5009697163967688, "learning_rate": 2.5903867755388855e-06, "loss": 0.023, "step": 142890 }, { "epoch": 0.5962355317071543, "grad_norm": 0.6534963906022527, "learning_rate": 2.590341455066438e-06, "loss": 0.0293, "step": 142895 }, { "epoch": 0.5962563944221445, "grad_norm": 0.8396598705603898, "learning_rate": 2.5902961369726394e-06, "loss": 0.0255, "step": 142900 }, { "epoch": 0.5962772571371348, "grad_norm": 0.32524099882136626, "learning_rate": 2.590250821257281e-06, "loss": 0.0234, "step": 142905 }, { "epoch": 0.5962981198521251, "grad_norm": 0.6766170265383337, "learning_rate": 2.5902055079201547e-06, "loss": 0.0274, "step": 142910 }, { "epoch": 0.5963189825671154, "grad_norm": 0.7416163341129808, "learning_rate": 2.5901601969610536e-06, "loss": 0.0213, "step": 142915 }, { "epoch": 0.5963398452821056, "grad_norm": 0.6859921386012208, "learning_rate": 2.5901148883797687e-06, "loss": 0.0283, "step": 142920 }, { "epoch": 0.596360707997096, "grad_norm": 0.7722307786035286, "learning_rate": 2.590069582176093e-06, "loss": 0.0224, "step": 142925 }, { "epoch": 0.5963815707120862, "grad_norm": 0.64281766153659, "learning_rate": 2.5900242783498174e-06, "loss": 0.0198, "step": 142930 }, { "epoch": 0.5964024334270764, "grad_norm": 0.6502238930387699, "learning_rate": 2.589978976900735e-06, "loss": 0.0314, "step": 142935 }, { "epoch": 0.5964232961420668, "grad_norm": 0.43697726922635677, "learning_rate": 2.589933677828637e-06, "loss": 0.0199, "step": 142940 }, { "epoch": 0.596444158857057, "grad_norm": 0.6996087083852466, "learning_rate": 2.5898883811333163e-06, "loss": 0.0246, "step": 142945 }, { "epoch": 0.5964650215720473, "grad_norm": 0.542732432358359, "learning_rate": 2.5898430868145645e-06, "loss": 0.023, "step": 142950 }, { "epoch": 0.5964858842870375, "grad_norm": 0.7112670176784994, "learning_rate": 2.589797794872174e-06, "loss": 0.0249, "step": 142955 }, { "epoch": 0.5965067470020279, "grad_norm": 0.8621167395426972, "learning_rate": 2.5897525053059374e-06, "loss": 0.0217, "step": 142960 }, { "epoch": 0.5965276097170181, "grad_norm": 0.6405513028397749, "learning_rate": 2.5897072181156468e-06, "loss": 0.0269, "step": 142965 }, { "epoch": 0.5965484724320084, "grad_norm": 0.3866349591309202, "learning_rate": 2.589661933301094e-06, "loss": 0.0173, "step": 142970 }, { "epoch": 0.5965693351469987, "grad_norm": 1.4666760591071417, "learning_rate": 2.5896166508620714e-06, "loss": 0.0367, "step": 142975 }, { "epoch": 0.596590197861989, "grad_norm": 0.84951013176129, "learning_rate": 2.5895713707983716e-06, "loss": 0.02, "step": 142980 }, { "epoch": 0.5966110605769792, "grad_norm": 0.6318094816286397, "learning_rate": 2.589526093109787e-06, "loss": 0.0265, "step": 142985 }, { "epoch": 0.5966319232919696, "grad_norm": 0.9550761931982938, "learning_rate": 2.5894808177961095e-06, "loss": 0.0217, "step": 142990 }, { "epoch": 0.5966527860069598, "grad_norm": 0.6758377019494297, "learning_rate": 2.5894355448571325e-06, "loss": 0.023, "step": 142995 }, { "epoch": 0.5966736487219501, "grad_norm": 0.635917568157394, "learning_rate": 2.5893902742926467e-06, "loss": 0.0215, "step": 143000 }, { "epoch": 0.5966945114369404, "grad_norm": 0.7080642895563021, "learning_rate": 2.589345006102446e-06, "loss": 0.0246, "step": 143005 }, { "epoch": 0.5967153741519307, "grad_norm": 0.5144155479619993, "learning_rate": 2.5892997402863224e-06, "loss": 0.0189, "step": 143010 }, { "epoch": 0.5967362368669209, "grad_norm": 0.5294038016189434, "learning_rate": 2.589254476844068e-06, "loss": 0.0252, "step": 143015 }, { "epoch": 0.5967570995819111, "grad_norm": 0.9669014673677808, "learning_rate": 2.5892092157754767e-06, "loss": 0.0258, "step": 143020 }, { "epoch": 0.5967779622969015, "grad_norm": 0.5979921176658619, "learning_rate": 2.5891639570803395e-06, "loss": 0.0209, "step": 143025 }, { "epoch": 0.5967988250118917, "grad_norm": 0.691868572874893, "learning_rate": 2.5891187007584494e-06, "loss": 0.0307, "step": 143030 }, { "epoch": 0.596819687726882, "grad_norm": 0.4511705680106997, "learning_rate": 2.589073446809599e-06, "loss": 0.0283, "step": 143035 }, { "epoch": 0.5968405504418723, "grad_norm": 0.9666793656687656, "learning_rate": 2.5890281952335816e-06, "loss": 0.0238, "step": 143040 }, { "epoch": 0.5968614131568626, "grad_norm": 0.9970061438341734, "learning_rate": 2.588982946030189e-06, "loss": 0.0229, "step": 143045 }, { "epoch": 0.5968822758718528, "grad_norm": 1.1160833813065907, "learning_rate": 2.5889376991992145e-06, "loss": 0.0263, "step": 143050 }, { "epoch": 0.5969031385868432, "grad_norm": 0.3273426877873574, "learning_rate": 2.58889245474045e-06, "loss": 0.0194, "step": 143055 }, { "epoch": 0.5969240013018334, "grad_norm": 0.9209903123463357, "learning_rate": 2.588847212653689e-06, "loss": 0.0228, "step": 143060 }, { "epoch": 0.5969448640168237, "grad_norm": 0.767310383644998, "learning_rate": 2.588801972938724e-06, "loss": 0.0246, "step": 143065 }, { "epoch": 0.5969657267318139, "grad_norm": 0.4134497327701842, "learning_rate": 2.5887567355953477e-06, "loss": 0.0267, "step": 143070 }, { "epoch": 0.5969865894468043, "grad_norm": 0.937343004162859, "learning_rate": 2.588711500623353e-06, "loss": 0.0201, "step": 143075 }, { "epoch": 0.5970074521617945, "grad_norm": 0.7083032577946183, "learning_rate": 2.5886662680225314e-06, "loss": 0.0195, "step": 143080 }, { "epoch": 0.5970283148767848, "grad_norm": 0.4918444051039823, "learning_rate": 2.5886210377926785e-06, "loss": 0.0278, "step": 143085 }, { "epoch": 0.5970491775917751, "grad_norm": 0.9678028762804572, "learning_rate": 2.588575809933585e-06, "loss": 0.0294, "step": 143090 }, { "epoch": 0.5970700403067654, "grad_norm": 1.0700818736093034, "learning_rate": 2.5885305844450454e-06, "loss": 0.0272, "step": 143095 }, { "epoch": 0.5970909030217556, "grad_norm": 0.6755737587514586, "learning_rate": 2.5884853613268506e-06, "loss": 0.0206, "step": 143100 }, { "epoch": 0.597111765736746, "grad_norm": 0.6379556189843787, "learning_rate": 2.588440140578795e-06, "loss": 0.0248, "step": 143105 }, { "epoch": 0.5971326284517362, "grad_norm": 0.9239739287273485, "learning_rate": 2.588394922200671e-06, "loss": 0.0223, "step": 143110 }, { "epoch": 0.5971534911667264, "grad_norm": 0.46709031492855346, "learning_rate": 2.5883497061922726e-06, "loss": 0.0204, "step": 143115 }, { "epoch": 0.5971743538817168, "grad_norm": 0.5974834096432541, "learning_rate": 2.5883044925533912e-06, "loss": 0.0199, "step": 143120 }, { "epoch": 0.597195216596707, "grad_norm": 0.8087254061504057, "learning_rate": 2.5882592812838207e-06, "loss": 0.0226, "step": 143125 }, { "epoch": 0.5972160793116973, "grad_norm": 0.7137976595355072, "learning_rate": 2.5882140723833547e-06, "loss": 0.0302, "step": 143130 }, { "epoch": 0.5972369420266875, "grad_norm": 0.8772804082758537, "learning_rate": 2.5881688658517857e-06, "loss": 0.0222, "step": 143135 }, { "epoch": 0.5972578047416779, "grad_norm": 0.5795528620254562, "learning_rate": 2.5881236616889067e-06, "loss": 0.0282, "step": 143140 }, { "epoch": 0.5972786674566681, "grad_norm": 0.6226730941823526, "learning_rate": 2.5880784598945114e-06, "loss": 0.0269, "step": 143145 }, { "epoch": 0.5972995301716584, "grad_norm": 0.600258459828543, "learning_rate": 2.5880332604683923e-06, "loss": 0.028, "step": 143150 }, { "epoch": 0.5973203928866487, "grad_norm": 1.2873714280113555, "learning_rate": 2.5879880634103426e-06, "loss": 0.034, "step": 143155 }, { "epoch": 0.597341255601639, "grad_norm": 1.0886844068097052, "learning_rate": 2.587942868720156e-06, "loss": 0.0218, "step": 143160 }, { "epoch": 0.5973621183166292, "grad_norm": 0.985384397159042, "learning_rate": 2.587897676397626e-06, "loss": 0.0213, "step": 143165 }, { "epoch": 0.5973829810316196, "grad_norm": 1.1710461773697924, "learning_rate": 2.587852486442546e-06, "loss": 0.0299, "step": 143170 }, { "epoch": 0.5974038437466098, "grad_norm": 0.41111890967613757, "learning_rate": 2.5878072988547084e-06, "loss": 0.0238, "step": 143175 }, { "epoch": 0.5974247064616001, "grad_norm": 0.43486396620509155, "learning_rate": 2.5877621136339065e-06, "loss": 0.0245, "step": 143180 }, { "epoch": 0.5974455691765904, "grad_norm": 1.0726062270532926, "learning_rate": 2.5877169307799344e-06, "loss": 0.0247, "step": 143185 }, { "epoch": 0.5974664318915807, "grad_norm": 0.7567431909030168, "learning_rate": 2.5876717502925847e-06, "loss": 0.0212, "step": 143190 }, { "epoch": 0.5974872946065709, "grad_norm": 0.4769018002835159, "learning_rate": 2.587626572171652e-06, "loss": 0.0189, "step": 143195 }, { "epoch": 0.5975081573215612, "grad_norm": 0.9904071311621448, "learning_rate": 2.587581396416929e-06, "loss": 0.0182, "step": 143200 }, { "epoch": 0.5975290200365515, "grad_norm": 2.2786451069614615, "learning_rate": 2.5875362230282087e-06, "loss": 0.0185, "step": 143205 }, { "epoch": 0.5975498827515417, "grad_norm": 0.6407455017449153, "learning_rate": 2.5874910520052854e-06, "loss": 0.0307, "step": 143210 }, { "epoch": 0.597570745466532, "grad_norm": 0.7103197786203759, "learning_rate": 2.587445883347952e-06, "loss": 0.0232, "step": 143215 }, { "epoch": 0.5975916081815223, "grad_norm": 1.2777537893686974, "learning_rate": 2.587400717056003e-06, "loss": 0.0212, "step": 143220 }, { "epoch": 0.5976124708965126, "grad_norm": 0.6833409857862863, "learning_rate": 2.5873555531292297e-06, "loss": 0.0209, "step": 143225 }, { "epoch": 0.5976333336115028, "grad_norm": 0.47264474633828424, "learning_rate": 2.5873103915674285e-06, "loss": 0.0239, "step": 143230 }, { "epoch": 0.5976541963264932, "grad_norm": 0.9599966240620094, "learning_rate": 2.5872652323703917e-06, "loss": 0.024, "step": 143235 }, { "epoch": 0.5976750590414834, "grad_norm": 0.4510783643875936, "learning_rate": 2.587220075537913e-06, "loss": 0.0184, "step": 143240 }, { "epoch": 0.5976959217564737, "grad_norm": 0.4366381165479873, "learning_rate": 2.5871749210697853e-06, "loss": 0.0224, "step": 143245 }, { "epoch": 0.5977167844714639, "grad_norm": 1.0230479089082336, "learning_rate": 2.5871297689658037e-06, "loss": 0.0227, "step": 143250 }, { "epoch": 0.5977376471864543, "grad_norm": 0.9872778419831355, "learning_rate": 2.5870846192257605e-06, "loss": 0.0259, "step": 143255 }, { "epoch": 0.5977585099014445, "grad_norm": 0.8064400776164662, "learning_rate": 2.587039471849451e-06, "loss": 0.0216, "step": 143260 }, { "epoch": 0.5977793726164348, "grad_norm": 0.8203460935049696, "learning_rate": 2.5869943268366677e-06, "loss": 0.023, "step": 143265 }, { "epoch": 0.5978002353314251, "grad_norm": 0.9468220670718965, "learning_rate": 2.5869491841872047e-06, "loss": 0.0213, "step": 143270 }, { "epoch": 0.5978210980464154, "grad_norm": 1.112644577841679, "learning_rate": 2.5869040439008557e-06, "loss": 0.0231, "step": 143275 }, { "epoch": 0.5978419607614056, "grad_norm": 1.8807251323144645, "learning_rate": 2.586858905977415e-06, "loss": 0.0278, "step": 143280 }, { "epoch": 0.597862823476396, "grad_norm": 0.7300753764920193, "learning_rate": 2.5868137704166764e-06, "loss": 0.0246, "step": 143285 }, { "epoch": 0.5978836861913862, "grad_norm": 0.7064168438248136, "learning_rate": 2.5867686372184337e-06, "loss": 0.0258, "step": 143290 }, { "epoch": 0.5979045489063765, "grad_norm": 0.8868859047392436, "learning_rate": 2.5867235063824804e-06, "loss": 0.0203, "step": 143295 }, { "epoch": 0.5979254116213668, "grad_norm": 1.1700718072400855, "learning_rate": 2.586678377908611e-06, "loss": 0.0303, "step": 143300 }, { "epoch": 0.597946274336357, "grad_norm": 0.7122075814537656, "learning_rate": 2.586633251796619e-06, "loss": 0.0224, "step": 143305 }, { "epoch": 0.5979671370513473, "grad_norm": 0.43666326421161966, "learning_rate": 2.5865881280462985e-06, "loss": 0.0185, "step": 143310 }, { "epoch": 0.5979879997663375, "grad_norm": 0.5745876026963094, "learning_rate": 2.5865430066574438e-06, "loss": 0.0186, "step": 143315 }, { "epoch": 0.5980088624813279, "grad_norm": 0.3358478201859087, "learning_rate": 2.586497887629848e-06, "loss": 0.0245, "step": 143320 }, { "epoch": 0.5980297251963181, "grad_norm": 0.7997118102295749, "learning_rate": 2.586452770963307e-06, "loss": 0.0248, "step": 143325 }, { "epoch": 0.5980505879113084, "grad_norm": 0.6799338319835034, "learning_rate": 2.586407656657613e-06, "loss": 0.0273, "step": 143330 }, { "epoch": 0.5980714506262987, "grad_norm": 0.49665060402476224, "learning_rate": 2.5863625447125614e-06, "loss": 0.0227, "step": 143335 }, { "epoch": 0.598092313341289, "grad_norm": 0.6542598959625192, "learning_rate": 2.586317435127945e-06, "loss": 0.0235, "step": 143340 }, { "epoch": 0.5981131760562792, "grad_norm": 0.9210839998722958, "learning_rate": 2.5862723279035595e-06, "loss": 0.0237, "step": 143345 }, { "epoch": 0.5981340387712696, "grad_norm": 0.3821530836795155, "learning_rate": 2.5862272230391982e-06, "loss": 0.0231, "step": 143350 }, { "epoch": 0.5981549014862598, "grad_norm": 0.9828502010179385, "learning_rate": 2.5861821205346555e-06, "loss": 0.0226, "step": 143355 }, { "epoch": 0.5981757642012501, "grad_norm": 0.32866428331572084, "learning_rate": 2.5861370203897255e-06, "loss": 0.0184, "step": 143360 }, { "epoch": 0.5981966269162404, "grad_norm": 0.5381877082778466, "learning_rate": 2.5860919226042032e-06, "loss": 0.014, "step": 143365 }, { "epoch": 0.5982174896312307, "grad_norm": 0.738805888616054, "learning_rate": 2.586046827177881e-06, "loss": 0.0192, "step": 143370 }, { "epoch": 0.5982383523462209, "grad_norm": 0.6816577509723424, "learning_rate": 2.586001734110555e-06, "loss": 0.0214, "step": 143375 }, { "epoch": 0.5982592150612112, "grad_norm": 1.0736174089332813, "learning_rate": 2.5859566434020195e-06, "loss": 0.0218, "step": 143380 }, { "epoch": 0.5982800777762015, "grad_norm": 1.256835069098245, "learning_rate": 2.5859115550520686e-06, "loss": 0.0246, "step": 143385 }, { "epoch": 0.5983009404911918, "grad_norm": 0.6641907157310661, "learning_rate": 2.585866469060496e-06, "loss": 0.0254, "step": 143390 }, { "epoch": 0.598321803206182, "grad_norm": 0.9557726936958284, "learning_rate": 2.585821385427096e-06, "loss": 0.0182, "step": 143395 }, { "epoch": 0.5983426659211724, "grad_norm": 0.5003375501673403, "learning_rate": 2.585776304151664e-06, "loss": 0.0274, "step": 143400 }, { "epoch": 0.5983635286361626, "grad_norm": 0.9257017614318586, "learning_rate": 2.5857312252339943e-06, "loss": 0.0201, "step": 143405 }, { "epoch": 0.5983843913511528, "grad_norm": 0.5186407048116366, "learning_rate": 2.585686148673881e-06, "loss": 0.0192, "step": 143410 }, { "epoch": 0.5984052540661432, "grad_norm": 0.26865480128508473, "learning_rate": 2.5856410744711193e-06, "loss": 0.0181, "step": 143415 }, { "epoch": 0.5984261167811334, "grad_norm": 0.6196387838722228, "learning_rate": 2.5855960026255025e-06, "loss": 0.0211, "step": 143420 }, { "epoch": 0.5984469794961237, "grad_norm": 0.6657954110775725, "learning_rate": 2.585550933136827e-06, "loss": 0.0284, "step": 143425 }, { "epoch": 0.5984678422111139, "grad_norm": 0.3257606493981696, "learning_rate": 2.585505866004885e-06, "loss": 0.0167, "step": 143430 }, { "epoch": 0.5984887049261043, "grad_norm": 0.5297888042522112, "learning_rate": 2.5854608012294723e-06, "loss": 0.0217, "step": 143435 }, { "epoch": 0.5985095676410945, "grad_norm": 0.5016212883566105, "learning_rate": 2.5854157388103843e-06, "loss": 0.0184, "step": 143440 }, { "epoch": 0.5985304303560848, "grad_norm": 0.9226763911209818, "learning_rate": 2.5853706787474147e-06, "loss": 0.0203, "step": 143445 }, { "epoch": 0.5985512930710751, "grad_norm": 0.8420404287611376, "learning_rate": 2.5853256210403587e-06, "loss": 0.0194, "step": 143450 }, { "epoch": 0.5985721557860654, "grad_norm": 1.0226760484227941, "learning_rate": 2.5852805656890107e-06, "loss": 0.0256, "step": 143455 }, { "epoch": 0.5985930185010556, "grad_norm": 0.447763443111231, "learning_rate": 2.5852355126931655e-06, "loss": 0.0235, "step": 143460 }, { "epoch": 0.598613881216046, "grad_norm": 0.4797506718656011, "learning_rate": 2.585190462052618e-06, "loss": 0.0226, "step": 143465 }, { "epoch": 0.5986347439310362, "grad_norm": 0.6896499204716743, "learning_rate": 2.5851454137671627e-06, "loss": 0.02, "step": 143470 }, { "epoch": 0.5986556066460265, "grad_norm": 0.45568172972433835, "learning_rate": 2.5851003678365943e-06, "loss": 0.0263, "step": 143475 }, { "epoch": 0.5986764693610168, "grad_norm": 0.7373453027869366, "learning_rate": 2.5850553242607084e-06, "loss": 0.0297, "step": 143480 }, { "epoch": 0.5986973320760071, "grad_norm": 0.547100545070574, "learning_rate": 2.5850102830392993e-06, "loss": 0.019, "step": 143485 }, { "epoch": 0.5987181947909973, "grad_norm": 0.831794032799319, "learning_rate": 2.584965244172162e-06, "loss": 0.024, "step": 143490 }, { "epoch": 0.5987390575059875, "grad_norm": 0.7693327994311616, "learning_rate": 2.5849202076590913e-06, "loss": 0.0259, "step": 143495 }, { "epoch": 0.5987599202209779, "grad_norm": 0.6061025411946739, "learning_rate": 2.584875173499882e-06, "loss": 0.0242, "step": 143500 }, { "epoch": 0.5987807829359681, "grad_norm": 0.6933873361587781, "learning_rate": 2.58483014169433e-06, "loss": 0.02, "step": 143505 }, { "epoch": 0.5988016456509584, "grad_norm": 0.7373384106350555, "learning_rate": 2.584785112242229e-06, "loss": 0.0304, "step": 143510 }, { "epoch": 0.5988225083659487, "grad_norm": 0.7950652799908774, "learning_rate": 2.584740085143375e-06, "loss": 0.0259, "step": 143515 }, { "epoch": 0.598843371080939, "grad_norm": 0.8819361218962586, "learning_rate": 2.5846950603975624e-06, "loss": 0.0225, "step": 143520 }, { "epoch": 0.5988642337959292, "grad_norm": 0.8121602753108473, "learning_rate": 2.5846500380045865e-06, "loss": 0.0317, "step": 143525 }, { "epoch": 0.5988850965109196, "grad_norm": 1.3634276458528378, "learning_rate": 2.5846050179642425e-06, "loss": 0.0223, "step": 143530 }, { "epoch": 0.5989059592259098, "grad_norm": 1.188585654562514, "learning_rate": 2.584560000276325e-06, "loss": 0.0309, "step": 143535 }, { "epoch": 0.5989268219409001, "grad_norm": 0.5984376686572522, "learning_rate": 2.58451498494063e-06, "loss": 0.0236, "step": 143540 }, { "epoch": 0.5989476846558904, "grad_norm": 0.8441484067123393, "learning_rate": 2.5844699719569516e-06, "loss": 0.022, "step": 143545 }, { "epoch": 0.5989685473708807, "grad_norm": 1.0548799233804178, "learning_rate": 2.584424961325086e-06, "loss": 0.0292, "step": 143550 }, { "epoch": 0.5989894100858709, "grad_norm": 0.8521936746596566, "learning_rate": 2.584379953044828e-06, "loss": 0.0184, "step": 143555 }, { "epoch": 0.5990102728008612, "grad_norm": 0.6013152373522279, "learning_rate": 2.5843349471159734e-06, "loss": 0.0287, "step": 143560 }, { "epoch": 0.5990311355158515, "grad_norm": 2.6558516509784913, "learning_rate": 2.584289943538316e-06, "loss": 0.0279, "step": 143565 }, { "epoch": 0.5990519982308418, "grad_norm": 0.5088082975142231, "learning_rate": 2.5842449423116526e-06, "loss": 0.0232, "step": 143570 }, { "epoch": 0.599072860945832, "grad_norm": 0.87695503245698, "learning_rate": 2.5841999434357778e-06, "loss": 0.0238, "step": 143575 }, { "epoch": 0.5990937236608224, "grad_norm": 0.6356200840738976, "learning_rate": 2.5841549469104863e-06, "loss": 0.0242, "step": 143580 }, { "epoch": 0.5991145863758126, "grad_norm": 4.199559400800533, "learning_rate": 2.584109952735575e-06, "loss": 0.0286, "step": 143585 }, { "epoch": 0.5991354490908029, "grad_norm": 0.6247125996443976, "learning_rate": 2.5840649609108386e-06, "loss": 0.0245, "step": 143590 }, { "epoch": 0.5991563118057932, "grad_norm": 0.5577601324146765, "learning_rate": 2.5840199714360723e-06, "loss": 0.0183, "step": 143595 }, { "epoch": 0.5991771745207835, "grad_norm": 0.8527298530593915, "learning_rate": 2.583974984311071e-06, "loss": 0.0263, "step": 143600 }, { "epoch": 0.5991980372357737, "grad_norm": 0.2761391236644471, "learning_rate": 2.583929999535632e-06, "loss": 0.0212, "step": 143605 }, { "epoch": 0.5992188999507639, "grad_norm": 0.8022779094507058, "learning_rate": 2.5838850171095486e-06, "loss": 0.0246, "step": 143610 }, { "epoch": 0.5992397626657543, "grad_norm": 1.1016354269588051, "learning_rate": 2.5838400370326176e-06, "loss": 0.0272, "step": 143615 }, { "epoch": 0.5992606253807445, "grad_norm": 0.36825086181020245, "learning_rate": 2.5837950593046347e-06, "loss": 0.0226, "step": 143620 }, { "epoch": 0.5992814880957348, "grad_norm": 0.6793206942755023, "learning_rate": 2.583750083925394e-06, "loss": 0.0193, "step": 143625 }, { "epoch": 0.5993023508107251, "grad_norm": 0.3627481352460832, "learning_rate": 2.583705110894693e-06, "loss": 0.0232, "step": 143630 }, { "epoch": 0.5993232135257154, "grad_norm": 0.4147164498028618, "learning_rate": 2.5836601402123264e-06, "loss": 0.0241, "step": 143635 }, { "epoch": 0.5993440762407056, "grad_norm": 0.3262264456055107, "learning_rate": 2.583615171878089e-06, "loss": 0.0192, "step": 143640 }, { "epoch": 0.599364938955696, "grad_norm": 0.6606587836349939, "learning_rate": 2.5835702058917783e-06, "loss": 0.0277, "step": 143645 }, { "epoch": 0.5993858016706862, "grad_norm": 0.4913089466411537, "learning_rate": 2.5835252422531885e-06, "loss": 0.0363, "step": 143650 }, { "epoch": 0.5994066643856765, "grad_norm": 1.0160442393053988, "learning_rate": 2.5834802809621163e-06, "loss": 0.0239, "step": 143655 }, { "epoch": 0.5994275271006668, "grad_norm": 0.9128965464215355, "learning_rate": 2.5834353220183566e-06, "loss": 0.0221, "step": 143660 }, { "epoch": 0.5994483898156571, "grad_norm": 0.8067140411449316, "learning_rate": 2.5833903654217057e-06, "loss": 0.0236, "step": 143665 }, { "epoch": 0.5994692525306473, "grad_norm": 0.7741996427687191, "learning_rate": 2.5833454111719587e-06, "loss": 0.023, "step": 143670 }, { "epoch": 0.5994901152456376, "grad_norm": 0.8470973937826335, "learning_rate": 2.5833004592689126e-06, "loss": 0.0239, "step": 143675 }, { "epoch": 0.5995109779606279, "grad_norm": 1.5110819646270357, "learning_rate": 2.5832555097123617e-06, "loss": 0.0255, "step": 143680 }, { "epoch": 0.5995318406756182, "grad_norm": 0.8877989958074105, "learning_rate": 2.583210562502103e-06, "loss": 0.0257, "step": 143685 }, { "epoch": 0.5995527033906084, "grad_norm": 0.7004552774403373, "learning_rate": 2.583165617637933e-06, "loss": 0.0222, "step": 143690 }, { "epoch": 0.5995735661055988, "grad_norm": 0.7633006382265927, "learning_rate": 2.583120675119646e-06, "loss": 0.0283, "step": 143695 }, { "epoch": 0.599594428820589, "grad_norm": 0.42267135629561975, "learning_rate": 2.5830757349470385e-06, "loss": 0.0277, "step": 143700 }, { "epoch": 0.5996152915355792, "grad_norm": 0.23848259492684395, "learning_rate": 2.5830307971199063e-06, "loss": 0.017, "step": 143705 }, { "epoch": 0.5996361542505696, "grad_norm": 0.4828951658788516, "learning_rate": 2.5829858616380466e-06, "loss": 0.0194, "step": 143710 }, { "epoch": 0.5996570169655598, "grad_norm": 0.7528592793123853, "learning_rate": 2.5829409285012543e-06, "loss": 0.0235, "step": 143715 }, { "epoch": 0.5996778796805501, "grad_norm": 0.5649248671975302, "learning_rate": 2.582895997709325e-06, "loss": 0.0167, "step": 143720 }, { "epoch": 0.5996987423955404, "grad_norm": 0.45414038586694844, "learning_rate": 2.5828510692620556e-06, "loss": 0.019, "step": 143725 }, { "epoch": 0.5997196051105307, "grad_norm": 0.670469941920178, "learning_rate": 2.582806143159242e-06, "loss": 0.0218, "step": 143730 }, { "epoch": 0.5997404678255209, "grad_norm": 0.5053801615581346, "learning_rate": 2.5827612194006806e-06, "loss": 0.0222, "step": 143735 }, { "epoch": 0.5997613305405112, "grad_norm": 0.8802196980202512, "learning_rate": 2.5827162979861674e-06, "loss": 0.0224, "step": 143740 }, { "epoch": 0.5997821932555015, "grad_norm": 0.36094365024054675, "learning_rate": 2.5826713789154978e-06, "loss": 0.0349, "step": 143745 }, { "epoch": 0.5998030559704918, "grad_norm": 0.6671354030150196, "learning_rate": 2.5826264621884688e-06, "loss": 0.0166, "step": 143750 }, { "epoch": 0.599823918685482, "grad_norm": 0.42234169383520676, "learning_rate": 2.5825815478048767e-06, "loss": 0.0231, "step": 143755 }, { "epoch": 0.5998447814004724, "grad_norm": 0.3214954842613728, "learning_rate": 2.582536635764517e-06, "loss": 0.0217, "step": 143760 }, { "epoch": 0.5998656441154626, "grad_norm": 0.6975545851384647, "learning_rate": 2.5824917260671868e-06, "loss": 0.018, "step": 143765 }, { "epoch": 0.5998865068304529, "grad_norm": 0.45650178564424226, "learning_rate": 2.5824468187126815e-06, "loss": 0.0198, "step": 143770 }, { "epoch": 0.5999073695454432, "grad_norm": 0.23478777184327365, "learning_rate": 2.582401913700798e-06, "loss": 0.0202, "step": 143775 }, { "epoch": 0.5999282322604335, "grad_norm": 0.7256373961196739, "learning_rate": 2.5823570110313322e-06, "loss": 0.0204, "step": 143780 }, { "epoch": 0.5999490949754237, "grad_norm": 0.3416525296233091, "learning_rate": 2.5823121107040817e-06, "loss": 0.0226, "step": 143785 }, { "epoch": 0.599969957690414, "grad_norm": 0.4092345184406275, "learning_rate": 2.5822672127188408e-06, "loss": 0.0213, "step": 143790 }, { "epoch": 0.5999908204054043, "grad_norm": 0.5777003012722398, "learning_rate": 2.5822223170754075e-06, "loss": 0.0193, "step": 143795 }, { "epoch": 0.6000116831203945, "grad_norm": 0.7722461379889267, "learning_rate": 2.5821774237735782e-06, "loss": 0.0299, "step": 143800 }, { "epoch": 0.6000325458353848, "grad_norm": 0.9680010601835558, "learning_rate": 2.5821325328131487e-06, "loss": 0.0277, "step": 143805 }, { "epoch": 0.6000534085503751, "grad_norm": 0.48068077643615403, "learning_rate": 2.582087644193916e-06, "loss": 0.0197, "step": 143810 }, { "epoch": 0.6000742712653654, "grad_norm": 0.6850657766987588, "learning_rate": 2.5820427579156752e-06, "loss": 0.0209, "step": 143815 }, { "epoch": 0.6000951339803556, "grad_norm": 1.0123197460608213, "learning_rate": 2.5819978739782246e-06, "loss": 0.0266, "step": 143820 }, { "epoch": 0.600115996695346, "grad_norm": 0.7777209618689744, "learning_rate": 2.5819529923813602e-06, "loss": 0.0233, "step": 143825 }, { "epoch": 0.6001368594103362, "grad_norm": 1.3433750422706574, "learning_rate": 2.5819081131248784e-06, "loss": 0.0282, "step": 143830 }, { "epoch": 0.6001577221253265, "grad_norm": 0.8992884268110971, "learning_rate": 2.581863236208576e-06, "loss": 0.0151, "step": 143835 }, { "epoch": 0.6001785848403168, "grad_norm": 0.3980831236761925, "learning_rate": 2.5818183616322495e-06, "loss": 0.0232, "step": 143840 }, { "epoch": 0.6001994475553071, "grad_norm": 0.6736102003224453, "learning_rate": 2.581773489395696e-06, "loss": 0.0186, "step": 143845 }, { "epoch": 0.6002203102702973, "grad_norm": 0.5019464270246081, "learning_rate": 2.5817286194987105e-06, "loss": 0.0157, "step": 143850 }, { "epoch": 0.6002411729852876, "grad_norm": 0.6206840497021914, "learning_rate": 2.5816837519410924e-06, "loss": 0.023, "step": 143855 }, { "epoch": 0.6002620357002779, "grad_norm": 0.7477194199298772, "learning_rate": 2.5816388867226362e-06, "loss": 0.0229, "step": 143860 }, { "epoch": 0.6002828984152682, "grad_norm": 0.6306339096143575, "learning_rate": 2.5815940238431396e-06, "loss": 0.0204, "step": 143865 }, { "epoch": 0.6003037611302584, "grad_norm": 0.5933156102697364, "learning_rate": 2.5815491633024e-06, "loss": 0.0233, "step": 143870 }, { "epoch": 0.6003246238452488, "grad_norm": 0.37769237261396676, "learning_rate": 2.581504305100212e-06, "loss": 0.0253, "step": 143875 }, { "epoch": 0.600345486560239, "grad_norm": 0.5802575373237951, "learning_rate": 2.5814594492363747e-06, "loss": 0.0253, "step": 143880 }, { "epoch": 0.6003663492752292, "grad_norm": 0.663794656109669, "learning_rate": 2.581414595710684e-06, "loss": 0.015, "step": 143885 }, { "epoch": 0.6003872119902196, "grad_norm": 0.42540450702933347, "learning_rate": 2.581369744522937e-06, "loss": 0.019, "step": 143890 }, { "epoch": 0.6004080747052098, "grad_norm": 0.4093023755204315, "learning_rate": 2.5813248956729304e-06, "loss": 0.0174, "step": 143895 }, { "epoch": 0.6004289374202001, "grad_norm": 1.4057606601259032, "learning_rate": 2.5812800491604608e-06, "loss": 0.0296, "step": 143900 }, { "epoch": 0.6004498001351904, "grad_norm": 1.2388946964610403, "learning_rate": 2.581235204985326e-06, "loss": 0.0295, "step": 143905 }, { "epoch": 0.6004706628501807, "grad_norm": 0.4066440523837341, "learning_rate": 2.581190363147323e-06, "loss": 0.0184, "step": 143910 }, { "epoch": 0.6004915255651709, "grad_norm": 2.5907116395055083, "learning_rate": 2.5811455236462474e-06, "loss": 0.0271, "step": 143915 }, { "epoch": 0.6005123882801612, "grad_norm": 0.6079222856287614, "learning_rate": 2.5811006864818977e-06, "loss": 0.024, "step": 143920 }, { "epoch": 0.6005332509951515, "grad_norm": 0.6939271991150192, "learning_rate": 2.58105585165407e-06, "loss": 0.023, "step": 143925 }, { "epoch": 0.6005541137101418, "grad_norm": 0.48194348478392535, "learning_rate": 2.5810110191625628e-06, "loss": 0.0192, "step": 143930 }, { "epoch": 0.600574976425132, "grad_norm": 0.6554774514284802, "learning_rate": 2.580966189007171e-06, "loss": 0.0242, "step": 143935 }, { "epoch": 0.6005958391401224, "grad_norm": 0.8679783273707481, "learning_rate": 2.5809213611876936e-06, "loss": 0.0258, "step": 143940 }, { "epoch": 0.6006167018551126, "grad_norm": 0.6946806174252065, "learning_rate": 2.5808765357039266e-06, "loss": 0.0258, "step": 143945 }, { "epoch": 0.6006375645701029, "grad_norm": 0.26732399524975087, "learning_rate": 2.580831712555668e-06, "loss": 0.0237, "step": 143950 }, { "epoch": 0.6006584272850932, "grad_norm": 0.3878779618311763, "learning_rate": 2.580786891742715e-06, "loss": 0.0199, "step": 143955 }, { "epoch": 0.6006792900000835, "grad_norm": 0.9189648076003168, "learning_rate": 2.580742073264864e-06, "loss": 0.0252, "step": 143960 }, { "epoch": 0.6007001527150737, "grad_norm": 0.8502126941074533, "learning_rate": 2.5806972571219124e-06, "loss": 0.0205, "step": 143965 }, { "epoch": 0.600721015430064, "grad_norm": 0.6021582423541992, "learning_rate": 2.580652443313658e-06, "loss": 0.0259, "step": 143970 }, { "epoch": 0.6007418781450543, "grad_norm": 0.5592339518128493, "learning_rate": 2.5806076318398986e-06, "loss": 0.0222, "step": 143975 }, { "epoch": 0.6007627408600446, "grad_norm": 0.7017088162773212, "learning_rate": 2.5805628227004305e-06, "loss": 0.0245, "step": 143980 }, { "epoch": 0.6007836035750348, "grad_norm": 0.780151424287735, "learning_rate": 2.580518015895051e-06, "loss": 0.0167, "step": 143985 }, { "epoch": 0.6008044662900252, "grad_norm": 0.9136423789215412, "learning_rate": 2.5804732114235576e-06, "loss": 0.0285, "step": 143990 }, { "epoch": 0.6008253290050154, "grad_norm": 0.6726273651553584, "learning_rate": 2.5804284092857486e-06, "loss": 0.0201, "step": 143995 }, { "epoch": 0.6008461917200056, "grad_norm": 0.32899884178831196, "learning_rate": 2.5803836094814205e-06, "loss": 0.0197, "step": 144000 }, { "epoch": 0.600867054434996, "grad_norm": 0.7354398327005163, "learning_rate": 2.5803388120103705e-06, "loss": 0.0237, "step": 144005 }, { "epoch": 0.6008879171499862, "grad_norm": 0.4990925515512126, "learning_rate": 2.5802940168723973e-06, "loss": 0.0157, "step": 144010 }, { "epoch": 0.6009087798649765, "grad_norm": 0.3053180917819137, "learning_rate": 2.5802492240672973e-06, "loss": 0.021, "step": 144015 }, { "epoch": 0.6009296425799668, "grad_norm": 0.6148404064595077, "learning_rate": 2.580204433594868e-06, "loss": 0.0277, "step": 144020 }, { "epoch": 0.6009505052949571, "grad_norm": 0.9322559911503605, "learning_rate": 2.580159645454908e-06, "loss": 0.0239, "step": 144025 }, { "epoch": 0.6009713680099473, "grad_norm": 1.2652735770977517, "learning_rate": 2.5801148596472135e-06, "loss": 0.03, "step": 144030 }, { "epoch": 0.6009922307249376, "grad_norm": 9.627229589589177, "learning_rate": 2.580070076171583e-06, "loss": 0.0165, "step": 144035 }, { "epoch": 0.6010130934399279, "grad_norm": 0.46525791100250147, "learning_rate": 2.5800252950278143e-06, "loss": 0.0236, "step": 144040 }, { "epoch": 0.6010339561549182, "grad_norm": 0.6478385194388634, "learning_rate": 2.5799805162157045e-06, "loss": 0.0357, "step": 144045 }, { "epoch": 0.6010548188699084, "grad_norm": 0.3308458262075474, "learning_rate": 2.579935739735051e-06, "loss": 0.0208, "step": 144050 }, { "epoch": 0.6010756815848988, "grad_norm": 0.5820962331876378, "learning_rate": 2.579890965585652e-06, "loss": 0.0244, "step": 144055 }, { "epoch": 0.601096544299889, "grad_norm": 0.6328999687964307, "learning_rate": 2.579846193767305e-06, "loss": 0.0256, "step": 144060 }, { "epoch": 0.6011174070148793, "grad_norm": 0.4165466870190645, "learning_rate": 2.579801424279809e-06, "loss": 0.026, "step": 144065 }, { "epoch": 0.6011382697298696, "grad_norm": 0.34844565973449954, "learning_rate": 2.5797566571229588e-06, "loss": 0.0265, "step": 144070 }, { "epoch": 0.6011591324448599, "grad_norm": 0.33033298213352746, "learning_rate": 2.5797118922965547e-06, "loss": 0.0191, "step": 144075 }, { "epoch": 0.6011799951598501, "grad_norm": 0.6173643878668521, "learning_rate": 2.5796671298003938e-06, "loss": 0.0192, "step": 144080 }, { "epoch": 0.6012008578748405, "grad_norm": 0.7351508251820816, "learning_rate": 2.5796223696342743e-06, "loss": 0.02, "step": 144085 }, { "epoch": 0.6012217205898307, "grad_norm": 0.2945436071943518, "learning_rate": 2.5795776117979936e-06, "loss": 0.0125, "step": 144090 }, { "epoch": 0.6012425833048209, "grad_norm": 0.6369164579982216, "learning_rate": 2.5795328562913495e-06, "loss": 0.02, "step": 144095 }, { "epoch": 0.6012634460198112, "grad_norm": 0.7105724440880141, "learning_rate": 2.5794881031141404e-06, "loss": 0.0229, "step": 144100 }, { "epoch": 0.6012843087348015, "grad_norm": 1.0884488515570565, "learning_rate": 2.579443352266163e-06, "loss": 0.0264, "step": 144105 }, { "epoch": 0.6013051714497918, "grad_norm": 1.1024020650734627, "learning_rate": 2.579398603747218e-06, "loss": 0.0258, "step": 144110 }, { "epoch": 0.601326034164782, "grad_norm": 0.5199007790018993, "learning_rate": 2.5793538575571e-06, "loss": 0.0418, "step": 144115 }, { "epoch": 0.6013468968797724, "grad_norm": 0.6351041370338593, "learning_rate": 2.579309113695609e-06, "loss": 0.0244, "step": 144120 }, { "epoch": 0.6013677595947626, "grad_norm": 0.6454177609292285, "learning_rate": 2.579264372162543e-06, "loss": 0.0199, "step": 144125 }, { "epoch": 0.6013886223097529, "grad_norm": 0.5761333296758381, "learning_rate": 2.5792196329576998e-06, "loss": 0.0183, "step": 144130 }, { "epoch": 0.6014094850247432, "grad_norm": 0.6916687714925162, "learning_rate": 2.5791748960808766e-06, "loss": 0.0226, "step": 144135 }, { "epoch": 0.6014303477397335, "grad_norm": 0.6176074665082951, "learning_rate": 2.5791301615318724e-06, "loss": 0.0268, "step": 144140 }, { "epoch": 0.6014512104547237, "grad_norm": 0.49433152328164764, "learning_rate": 2.5790854293104863e-06, "loss": 0.0197, "step": 144145 }, { "epoch": 0.601472073169714, "grad_norm": 0.7440282858388713, "learning_rate": 2.579040699416514e-06, "loss": 0.0176, "step": 144150 }, { "epoch": 0.6014929358847043, "grad_norm": 0.6758469639285428, "learning_rate": 2.578995971849756e-06, "loss": 0.0195, "step": 144155 }, { "epoch": 0.6015137985996946, "grad_norm": 1.1520537971800808, "learning_rate": 2.5789512466100087e-06, "loss": 0.0281, "step": 144160 }, { "epoch": 0.6015346613146848, "grad_norm": 0.7615385337725751, "learning_rate": 2.5789065236970717e-06, "loss": 0.0211, "step": 144165 }, { "epoch": 0.6015555240296752, "grad_norm": 0.6090633132561766, "learning_rate": 2.5788618031107432e-06, "loss": 0.0261, "step": 144170 }, { "epoch": 0.6015763867446654, "grad_norm": 0.830913039753397, "learning_rate": 2.5788170848508204e-06, "loss": 0.0196, "step": 144175 }, { "epoch": 0.6015972494596556, "grad_norm": 0.8916887331599062, "learning_rate": 2.5787723689171024e-06, "loss": 0.0152, "step": 144180 }, { "epoch": 0.601618112174646, "grad_norm": 0.7360089657136936, "learning_rate": 2.578727655309387e-06, "loss": 0.0239, "step": 144185 }, { "epoch": 0.6016389748896362, "grad_norm": 0.8155031735801767, "learning_rate": 2.5786829440274735e-06, "loss": 0.0247, "step": 144190 }, { "epoch": 0.6016598376046265, "grad_norm": 0.4948138776966284, "learning_rate": 2.5786382350711597e-06, "loss": 0.0158, "step": 144195 }, { "epoch": 0.6016807003196168, "grad_norm": 0.48368773607050025, "learning_rate": 2.5785935284402437e-06, "loss": 0.0192, "step": 144200 }, { "epoch": 0.6017015630346071, "grad_norm": 0.922968397475755, "learning_rate": 2.578548824134524e-06, "loss": 0.0271, "step": 144205 }, { "epoch": 0.6017224257495973, "grad_norm": 0.5123789650271706, "learning_rate": 2.5785041221538e-06, "loss": 0.0292, "step": 144210 }, { "epoch": 0.6017432884645876, "grad_norm": 0.43505358020252216, "learning_rate": 2.578459422497868e-06, "loss": 0.0236, "step": 144215 }, { "epoch": 0.6017641511795779, "grad_norm": 0.597960328116421, "learning_rate": 2.5784147251665298e-06, "loss": 0.0234, "step": 144220 }, { "epoch": 0.6017850138945682, "grad_norm": 0.9032235085836038, "learning_rate": 2.5783700301595802e-06, "loss": 0.0267, "step": 144225 }, { "epoch": 0.6018058766095584, "grad_norm": 1.6355521213603852, "learning_rate": 2.5783253374768214e-06, "loss": 0.0302, "step": 144230 }, { "epoch": 0.6018267393245488, "grad_norm": 0.7600574900019774, "learning_rate": 2.578280647118049e-06, "loss": 0.0222, "step": 144235 }, { "epoch": 0.601847602039539, "grad_norm": 0.4873935801888493, "learning_rate": 2.5782359590830637e-06, "loss": 0.0185, "step": 144240 }, { "epoch": 0.6018684647545293, "grad_norm": 0.8805565979317744, "learning_rate": 2.5781912733716625e-06, "loss": 0.0203, "step": 144245 }, { "epoch": 0.6018893274695196, "grad_norm": 0.6280280800727279, "learning_rate": 2.5781465899836444e-06, "loss": 0.0247, "step": 144250 }, { "epoch": 0.6019101901845099, "grad_norm": 0.46089470347317635, "learning_rate": 2.5781019089188093e-06, "loss": 0.0242, "step": 144255 }, { "epoch": 0.6019310528995001, "grad_norm": 1.1171241585078178, "learning_rate": 2.5780572301769545e-06, "loss": 0.0273, "step": 144260 }, { "epoch": 0.6019519156144905, "grad_norm": 0.9594234597045124, "learning_rate": 2.578012553757879e-06, "loss": 0.0285, "step": 144265 }, { "epoch": 0.6019727783294807, "grad_norm": 0.5418491818911264, "learning_rate": 2.5779678796613826e-06, "loss": 0.0289, "step": 144270 }, { "epoch": 0.601993641044471, "grad_norm": 2.1948378244219735, "learning_rate": 2.5779232078872627e-06, "loss": 0.0205, "step": 144275 }, { "epoch": 0.6020145037594612, "grad_norm": 1.1711928400136105, "learning_rate": 2.577878538435319e-06, "loss": 0.0202, "step": 144280 }, { "epoch": 0.6020353664744515, "grad_norm": 1.4179284479732455, "learning_rate": 2.5778338713053497e-06, "loss": 0.0317, "step": 144285 }, { "epoch": 0.6020562291894418, "grad_norm": 0.7944274230408858, "learning_rate": 2.577789206497154e-06, "loss": 0.0242, "step": 144290 }, { "epoch": 0.602077091904432, "grad_norm": 0.6955347384524898, "learning_rate": 2.5777445440105304e-06, "loss": 0.0209, "step": 144295 }, { "epoch": 0.6020979546194224, "grad_norm": 0.4139406391566726, "learning_rate": 2.5776998838452783e-06, "loss": 0.0192, "step": 144300 }, { "epoch": 0.6021188173344126, "grad_norm": 0.5913186624080649, "learning_rate": 2.5776552260011968e-06, "loss": 0.02, "step": 144305 }, { "epoch": 0.6021396800494029, "grad_norm": 1.1003689069205498, "learning_rate": 2.5776105704780833e-06, "loss": 0.0236, "step": 144310 }, { "epoch": 0.6021605427643932, "grad_norm": 1.3475736959418816, "learning_rate": 2.577565917275739e-06, "loss": 0.0262, "step": 144315 }, { "epoch": 0.6021814054793835, "grad_norm": 1.0228437921645193, "learning_rate": 2.5775212663939615e-06, "loss": 0.0156, "step": 144320 }, { "epoch": 0.6022022681943737, "grad_norm": 0.7838627310920699, "learning_rate": 2.5774766178325496e-06, "loss": 0.0257, "step": 144325 }, { "epoch": 0.602223130909364, "grad_norm": 0.6589522604923749, "learning_rate": 2.5774319715913032e-06, "loss": 0.0244, "step": 144330 }, { "epoch": 0.6022439936243543, "grad_norm": 0.786668614446784, "learning_rate": 2.577387327670021e-06, "loss": 0.0231, "step": 144335 }, { "epoch": 0.6022648563393446, "grad_norm": 0.7292572216582588, "learning_rate": 2.577342686068502e-06, "loss": 0.0191, "step": 144340 }, { "epoch": 0.6022857190543348, "grad_norm": 0.768574492722935, "learning_rate": 2.577298046786545e-06, "loss": 0.026, "step": 144345 }, { "epoch": 0.6023065817693252, "grad_norm": 1.0546742187360012, "learning_rate": 2.5772534098239498e-06, "loss": 0.023, "step": 144350 }, { "epoch": 0.6023274444843154, "grad_norm": 0.4440098047192853, "learning_rate": 2.5772087751805153e-06, "loss": 0.0267, "step": 144355 }, { "epoch": 0.6023483071993057, "grad_norm": 1.2697215097130752, "learning_rate": 2.5771641428560405e-06, "loss": 0.025, "step": 144360 }, { "epoch": 0.602369169914296, "grad_norm": 1.3323855713220292, "learning_rate": 2.5771195128503253e-06, "loss": 0.0206, "step": 144365 }, { "epoch": 0.6023900326292863, "grad_norm": 0.6145409850478712, "learning_rate": 2.577074885163168e-06, "loss": 0.0252, "step": 144370 }, { "epoch": 0.6024108953442765, "grad_norm": 0.8065759941306032, "learning_rate": 2.577030259794368e-06, "loss": 0.0171, "step": 144375 }, { "epoch": 0.6024317580592669, "grad_norm": 0.3591916026052377, "learning_rate": 2.576985636743725e-06, "loss": 0.0192, "step": 144380 }, { "epoch": 0.6024526207742571, "grad_norm": 1.0276488023979111, "learning_rate": 2.576941016011038e-06, "loss": 0.0169, "step": 144385 }, { "epoch": 0.6024734834892473, "grad_norm": 0.38547637952110037, "learning_rate": 2.5768963975961064e-06, "loss": 0.0212, "step": 144390 }, { "epoch": 0.6024943462042376, "grad_norm": 0.852965953914126, "learning_rate": 2.57685178149873e-06, "loss": 0.0335, "step": 144395 }, { "epoch": 0.6025152089192279, "grad_norm": 0.6936076045076763, "learning_rate": 2.5768071677187078e-06, "loss": 0.0211, "step": 144400 }, { "epoch": 0.6025360716342182, "grad_norm": 0.4590136362541559, "learning_rate": 2.5767625562558384e-06, "loss": 0.0242, "step": 144405 }, { "epoch": 0.6025569343492084, "grad_norm": 0.3927324811641376, "learning_rate": 2.576717947109923e-06, "loss": 0.0233, "step": 144410 }, { "epoch": 0.6025777970641988, "grad_norm": 0.9689981314238288, "learning_rate": 2.5766733402807598e-06, "loss": 0.0229, "step": 144415 }, { "epoch": 0.602598659779189, "grad_norm": 0.5786152702658921, "learning_rate": 2.5766287357681485e-06, "loss": 0.0227, "step": 144420 }, { "epoch": 0.6026195224941793, "grad_norm": 0.4535404100318391, "learning_rate": 2.5765841335718882e-06, "loss": 0.0189, "step": 144425 }, { "epoch": 0.6026403852091696, "grad_norm": 0.5685705963179255, "learning_rate": 2.5765395336917794e-06, "loss": 0.018, "step": 144430 }, { "epoch": 0.6026612479241599, "grad_norm": 0.7514635244444903, "learning_rate": 2.576494936127621e-06, "loss": 0.0269, "step": 144435 }, { "epoch": 0.6026821106391501, "grad_norm": 0.5878280856805457, "learning_rate": 2.5764503408792122e-06, "loss": 0.0237, "step": 144440 }, { "epoch": 0.6027029733541405, "grad_norm": 0.35552731851239644, "learning_rate": 2.5764057479463537e-06, "loss": 0.0175, "step": 144445 }, { "epoch": 0.6027238360691307, "grad_norm": 0.6212927320626626, "learning_rate": 2.5763611573288445e-06, "loss": 0.0264, "step": 144450 }, { "epoch": 0.602744698784121, "grad_norm": 0.8756498867310856, "learning_rate": 2.5763165690264843e-06, "loss": 0.0216, "step": 144455 }, { "epoch": 0.6027655614991112, "grad_norm": 0.6854662056837953, "learning_rate": 2.576271983039072e-06, "loss": 0.022, "step": 144460 }, { "epoch": 0.6027864242141016, "grad_norm": 1.1580255751518764, "learning_rate": 2.576227399366408e-06, "loss": 0.0271, "step": 144465 }, { "epoch": 0.6028072869290918, "grad_norm": 0.5566829551626227, "learning_rate": 2.5761828180082926e-06, "loss": 0.0154, "step": 144470 }, { "epoch": 0.602828149644082, "grad_norm": 0.6352655505220641, "learning_rate": 2.576138238964525e-06, "loss": 0.0236, "step": 144475 }, { "epoch": 0.6028490123590724, "grad_norm": 0.4779975374810927, "learning_rate": 2.5760936622349046e-06, "loss": 0.0155, "step": 144480 }, { "epoch": 0.6028698750740626, "grad_norm": 0.3903664509604763, "learning_rate": 2.576049087819232e-06, "loss": 0.0212, "step": 144485 }, { "epoch": 0.6028907377890529, "grad_norm": 0.553492860380341, "learning_rate": 2.5760045157173064e-06, "loss": 0.0211, "step": 144490 }, { "epoch": 0.6029116005040432, "grad_norm": 0.9435070951862652, "learning_rate": 2.5759599459289273e-06, "loss": 0.0232, "step": 144495 }, { "epoch": 0.6029324632190335, "grad_norm": 0.6826960060040229, "learning_rate": 2.5759153784538947e-06, "loss": 0.0263, "step": 144500 }, { "epoch": 0.6029533259340237, "grad_norm": 0.7207350014095988, "learning_rate": 2.57587081329201e-06, "loss": 0.0194, "step": 144505 }, { "epoch": 0.602974188649014, "grad_norm": 0.9856432370331742, "learning_rate": 2.5758262504430716e-06, "loss": 0.0188, "step": 144510 }, { "epoch": 0.6029950513640043, "grad_norm": 0.6683529523192767, "learning_rate": 2.5757816899068796e-06, "loss": 0.0199, "step": 144515 }, { "epoch": 0.6030159140789946, "grad_norm": 0.5651087409379258, "learning_rate": 2.5757371316832336e-06, "loss": 0.0227, "step": 144520 }, { "epoch": 0.6030367767939848, "grad_norm": 0.5232321270854448, "learning_rate": 2.5756925757719346e-06, "loss": 0.0186, "step": 144525 }, { "epoch": 0.6030576395089752, "grad_norm": 0.6131861132080394, "learning_rate": 2.575648022172782e-06, "loss": 0.0161, "step": 144530 }, { "epoch": 0.6030785022239654, "grad_norm": 1.4375436108627866, "learning_rate": 2.575603470885576e-06, "loss": 0.0236, "step": 144535 }, { "epoch": 0.6030993649389557, "grad_norm": 0.6899330619823495, "learning_rate": 2.575558921910117e-06, "loss": 0.0223, "step": 144540 }, { "epoch": 0.603120227653946, "grad_norm": 0.42712095597666383, "learning_rate": 2.575514375246204e-06, "loss": 0.0201, "step": 144545 }, { "epoch": 0.6031410903689363, "grad_norm": 0.5536175067937842, "learning_rate": 2.5754698308936386e-06, "loss": 0.027, "step": 144550 }, { "epoch": 0.6031619530839265, "grad_norm": 0.964316380386254, "learning_rate": 2.5754252888522195e-06, "loss": 0.0308, "step": 144555 }, { "epoch": 0.6031828157989169, "grad_norm": 0.628757982472466, "learning_rate": 2.5753807491217473e-06, "loss": 0.0344, "step": 144560 }, { "epoch": 0.6032036785139071, "grad_norm": 0.3747003511387352, "learning_rate": 2.575336211702023e-06, "loss": 0.0189, "step": 144565 }, { "epoch": 0.6032245412288973, "grad_norm": 0.7010830036830209, "learning_rate": 2.575291676592846e-06, "loss": 0.0187, "step": 144570 }, { "epoch": 0.6032454039438876, "grad_norm": 0.25125618926998305, "learning_rate": 2.5752471437940167e-06, "loss": 0.0288, "step": 144575 }, { "epoch": 0.603266266658878, "grad_norm": 0.5650709868832553, "learning_rate": 2.5752026133053355e-06, "loss": 0.0204, "step": 144580 }, { "epoch": 0.6032871293738682, "grad_norm": 0.34942982759099894, "learning_rate": 2.5751580851266024e-06, "loss": 0.0226, "step": 144585 }, { "epoch": 0.6033079920888584, "grad_norm": 0.6744998244660123, "learning_rate": 2.5751135592576177e-06, "loss": 0.0271, "step": 144590 }, { "epoch": 0.6033288548038488, "grad_norm": 0.7852919514013933, "learning_rate": 2.575069035698181e-06, "loss": 0.0256, "step": 144595 }, { "epoch": 0.603349717518839, "grad_norm": 1.0471244316397692, "learning_rate": 2.5750245144480945e-06, "loss": 0.0361, "step": 144600 }, { "epoch": 0.6033705802338293, "grad_norm": 1.1603466845725008, "learning_rate": 2.5749799955071575e-06, "loss": 0.0293, "step": 144605 }, { "epoch": 0.6033914429488196, "grad_norm": 0.658269748523469, "learning_rate": 2.57493547887517e-06, "loss": 0.0243, "step": 144610 }, { "epoch": 0.6034123056638099, "grad_norm": 0.6710594731940491, "learning_rate": 2.574890964551934e-06, "loss": 0.0172, "step": 144615 }, { "epoch": 0.6034331683788001, "grad_norm": 0.6179835960097629, "learning_rate": 2.5748464525372474e-06, "loss": 0.0343, "step": 144620 }, { "epoch": 0.6034540310937904, "grad_norm": 0.7737190957116186, "learning_rate": 2.574801942830913e-06, "loss": 0.0222, "step": 144625 }, { "epoch": 0.6034748938087807, "grad_norm": 0.40954090682288036, "learning_rate": 2.57475743543273e-06, "loss": 0.0151, "step": 144630 }, { "epoch": 0.603495756523771, "grad_norm": 0.618407448116296, "learning_rate": 2.574712930342499e-06, "loss": 0.0188, "step": 144635 }, { "epoch": 0.6035166192387612, "grad_norm": 0.5247612255463565, "learning_rate": 2.5746684275600214e-06, "loss": 0.0186, "step": 144640 }, { "epoch": 0.6035374819537516, "grad_norm": 0.501865343645574, "learning_rate": 2.5746239270850963e-06, "loss": 0.0224, "step": 144645 }, { "epoch": 0.6035583446687418, "grad_norm": 0.3708595594836663, "learning_rate": 2.574579428917526e-06, "loss": 0.0223, "step": 144650 }, { "epoch": 0.603579207383732, "grad_norm": 0.4683086753776056, "learning_rate": 2.57453493305711e-06, "loss": 0.0249, "step": 144655 }, { "epoch": 0.6036000700987224, "grad_norm": 1.5233629896403984, "learning_rate": 2.57449043950365e-06, "loss": 0.0253, "step": 144660 }, { "epoch": 0.6036209328137127, "grad_norm": 0.397992178233072, "learning_rate": 2.574445948256945e-06, "loss": 0.0231, "step": 144665 }, { "epoch": 0.6036417955287029, "grad_norm": 0.5137564723374816, "learning_rate": 2.5744014593167964e-06, "loss": 0.0181, "step": 144670 }, { "epoch": 0.6036626582436933, "grad_norm": 0.8110327648143542, "learning_rate": 2.574356972683006e-06, "loss": 0.0236, "step": 144675 }, { "epoch": 0.6036835209586835, "grad_norm": 0.666472292366511, "learning_rate": 2.5743124883553727e-06, "loss": 0.0292, "step": 144680 }, { "epoch": 0.6037043836736737, "grad_norm": 0.46502779066883054, "learning_rate": 2.574268006333699e-06, "loss": 0.0237, "step": 144685 }, { "epoch": 0.603725246388664, "grad_norm": 0.9448590363506955, "learning_rate": 2.5742235266177846e-06, "loss": 0.0258, "step": 144690 }, { "epoch": 0.6037461091036543, "grad_norm": 0.6260698226147532, "learning_rate": 2.5741790492074303e-06, "loss": 0.0168, "step": 144695 }, { "epoch": 0.6037669718186446, "grad_norm": 1.022348999526441, "learning_rate": 2.574134574102437e-06, "loss": 0.0283, "step": 144700 }, { "epoch": 0.6037878345336348, "grad_norm": 0.6320753397872273, "learning_rate": 2.574090101302606e-06, "loss": 0.0295, "step": 144705 }, { "epoch": 0.6038086972486252, "grad_norm": 0.6650316522692069, "learning_rate": 2.5740456308077382e-06, "loss": 0.0126, "step": 144710 }, { "epoch": 0.6038295599636154, "grad_norm": 0.39541005371580723, "learning_rate": 2.5740011626176336e-06, "loss": 0.0177, "step": 144715 }, { "epoch": 0.6038504226786057, "grad_norm": 0.8395644220464005, "learning_rate": 2.5739566967320943e-06, "loss": 0.0331, "step": 144720 }, { "epoch": 0.603871285393596, "grad_norm": 1.0194075697729117, "learning_rate": 2.5739122331509208e-06, "loss": 0.0257, "step": 144725 }, { "epoch": 0.6038921481085863, "grad_norm": 0.7171907469383763, "learning_rate": 2.5738677718739137e-06, "loss": 0.018, "step": 144730 }, { "epoch": 0.6039130108235765, "grad_norm": 0.3677886595419037, "learning_rate": 2.5738233129008743e-06, "loss": 0.0242, "step": 144735 }, { "epoch": 0.6039338735385669, "grad_norm": 0.6299194933533451, "learning_rate": 2.573778856231604e-06, "loss": 0.019, "step": 144740 }, { "epoch": 0.6039547362535571, "grad_norm": 0.776604520734979, "learning_rate": 2.573734401865903e-06, "loss": 0.0184, "step": 144745 }, { "epoch": 0.6039755989685474, "grad_norm": 0.7116341774687404, "learning_rate": 2.573689949803573e-06, "loss": 0.0283, "step": 144750 }, { "epoch": 0.6039964616835376, "grad_norm": 0.5480680107202536, "learning_rate": 2.5736455000444143e-06, "loss": 0.0199, "step": 144755 }, { "epoch": 0.604017324398528, "grad_norm": 0.7985877563413932, "learning_rate": 2.5736010525882295e-06, "loss": 0.0209, "step": 144760 }, { "epoch": 0.6040381871135182, "grad_norm": 0.7798035871134199, "learning_rate": 2.5735566074348185e-06, "loss": 0.0243, "step": 144765 }, { "epoch": 0.6040590498285084, "grad_norm": 0.47419273271669626, "learning_rate": 2.5735121645839827e-06, "loss": 0.0189, "step": 144770 }, { "epoch": 0.6040799125434988, "grad_norm": 0.8233030386501992, "learning_rate": 2.573467724035523e-06, "loss": 0.0266, "step": 144775 }, { "epoch": 0.604100775258489, "grad_norm": 0.5037723813587245, "learning_rate": 2.5734232857892423e-06, "loss": 0.0288, "step": 144780 }, { "epoch": 0.6041216379734793, "grad_norm": 0.7079149487888003, "learning_rate": 2.5733788498449395e-06, "loss": 0.025, "step": 144785 }, { "epoch": 0.6041425006884696, "grad_norm": 0.48850853348208184, "learning_rate": 2.573334416202417e-06, "loss": 0.0187, "step": 144790 }, { "epoch": 0.6041633634034599, "grad_norm": 0.45791383499541227, "learning_rate": 2.5732899848614766e-06, "loss": 0.0186, "step": 144795 }, { "epoch": 0.6041842261184501, "grad_norm": 0.6735641229154383, "learning_rate": 2.573245555821919e-06, "loss": 0.0292, "step": 144800 }, { "epoch": 0.6042050888334404, "grad_norm": 0.6688747149573431, "learning_rate": 2.573201129083545e-06, "loss": 0.0169, "step": 144805 }, { "epoch": 0.6042259515484307, "grad_norm": 0.5089367417015711, "learning_rate": 2.5731567046461577e-06, "loss": 0.0252, "step": 144810 }, { "epoch": 0.604246814263421, "grad_norm": 2.127158484757035, "learning_rate": 2.5731122825095562e-06, "loss": 0.032, "step": 144815 }, { "epoch": 0.6042676769784112, "grad_norm": 0.40993173830221863, "learning_rate": 2.5730678626735433e-06, "loss": 0.0287, "step": 144820 }, { "epoch": 0.6042885396934016, "grad_norm": 1.7119494730764357, "learning_rate": 2.57302344513792e-06, "loss": 0.0211, "step": 144825 }, { "epoch": 0.6043094024083918, "grad_norm": 0.9473529520996616, "learning_rate": 2.572979029902488e-06, "loss": 0.0164, "step": 144830 }, { "epoch": 0.6043302651233821, "grad_norm": 1.0144079334127176, "learning_rate": 2.572934616967049e-06, "loss": 0.0285, "step": 144835 }, { "epoch": 0.6043511278383724, "grad_norm": 0.8892906383031864, "learning_rate": 2.5728902063314036e-06, "loss": 0.0182, "step": 144840 }, { "epoch": 0.6043719905533627, "grad_norm": 0.5045614394513742, "learning_rate": 2.572845797995354e-06, "loss": 0.0188, "step": 144845 }, { "epoch": 0.6043928532683529, "grad_norm": 0.32352644591778146, "learning_rate": 2.5728013919587026e-06, "loss": 0.0222, "step": 144850 }, { "epoch": 0.6044137159833433, "grad_norm": 0.9075652093889556, "learning_rate": 2.5727569882212492e-06, "loss": 0.0231, "step": 144855 }, { "epoch": 0.6044345786983335, "grad_norm": 0.3369800188258187, "learning_rate": 2.572712586782796e-06, "loss": 0.0198, "step": 144860 }, { "epoch": 0.6044554414133237, "grad_norm": 0.649405329203885, "learning_rate": 2.572668187643146e-06, "loss": 0.0249, "step": 144865 }, { "epoch": 0.604476304128314, "grad_norm": 0.7287230811622893, "learning_rate": 2.572623790802099e-06, "loss": 0.0145, "step": 144870 }, { "epoch": 0.6044971668433043, "grad_norm": 0.7020225924459893, "learning_rate": 2.572579396259457e-06, "loss": 0.024, "step": 144875 }, { "epoch": 0.6045180295582946, "grad_norm": 0.9307143108164031, "learning_rate": 2.572535004015023e-06, "loss": 0.0284, "step": 144880 }, { "epoch": 0.6045388922732848, "grad_norm": 0.8024664346632612, "learning_rate": 2.572490614068597e-06, "loss": 0.0263, "step": 144885 }, { "epoch": 0.6045597549882752, "grad_norm": 0.7488914792123913, "learning_rate": 2.5724462264199817e-06, "loss": 0.0168, "step": 144890 }, { "epoch": 0.6045806177032654, "grad_norm": 0.7149205888613273, "learning_rate": 2.5724018410689785e-06, "loss": 0.0219, "step": 144895 }, { "epoch": 0.6046014804182557, "grad_norm": 1.1300596057493073, "learning_rate": 2.57235745801539e-06, "loss": 0.0302, "step": 144900 }, { "epoch": 0.604622343133246, "grad_norm": 0.6082185790743022, "learning_rate": 2.572313077259017e-06, "loss": 0.0188, "step": 144905 }, { "epoch": 0.6046432058482363, "grad_norm": 0.6293969226749586, "learning_rate": 2.5722686987996614e-06, "loss": 0.0233, "step": 144910 }, { "epoch": 0.6046640685632265, "grad_norm": 1.1635121874008405, "learning_rate": 2.5722243226371264e-06, "loss": 0.0203, "step": 144915 }, { "epoch": 0.6046849312782169, "grad_norm": 0.6231111975414854, "learning_rate": 2.5721799487712123e-06, "loss": 0.0224, "step": 144920 }, { "epoch": 0.6047057939932071, "grad_norm": 1.472731880544703, "learning_rate": 2.5721355772017214e-06, "loss": 0.0217, "step": 144925 }, { "epoch": 0.6047266567081974, "grad_norm": 2.0209702449916387, "learning_rate": 2.572091207928456e-06, "loss": 0.0199, "step": 144930 }, { "epoch": 0.6047475194231876, "grad_norm": 0.418742937866444, "learning_rate": 2.5720468409512178e-06, "loss": 0.0212, "step": 144935 }, { "epoch": 0.604768382138178, "grad_norm": 0.8495434180051709, "learning_rate": 2.5720024762698094e-06, "loss": 0.0219, "step": 144940 }, { "epoch": 0.6047892448531682, "grad_norm": 0.6617022604175985, "learning_rate": 2.5719581138840314e-06, "loss": 0.0232, "step": 144945 }, { "epoch": 0.6048101075681585, "grad_norm": 0.7284510831060885, "learning_rate": 2.5719137537936874e-06, "loss": 0.0247, "step": 144950 }, { "epoch": 0.6048309702831488, "grad_norm": 0.5058662094908999, "learning_rate": 2.5718693959985785e-06, "loss": 0.0214, "step": 144955 }, { "epoch": 0.604851832998139, "grad_norm": 0.7580406153953323, "learning_rate": 2.571825040498507e-06, "loss": 0.0164, "step": 144960 }, { "epoch": 0.6048726957131293, "grad_norm": 0.5891120524463748, "learning_rate": 2.5717806872932754e-06, "loss": 0.0287, "step": 144965 }, { "epoch": 0.6048935584281196, "grad_norm": 0.7574837365068835, "learning_rate": 2.5717363363826852e-06, "loss": 0.023, "step": 144970 }, { "epoch": 0.6049144211431099, "grad_norm": 0.5617679852684389, "learning_rate": 2.5716919877665387e-06, "loss": 0.0173, "step": 144975 }, { "epoch": 0.6049352838581001, "grad_norm": 1.383228841018768, "learning_rate": 2.5716476414446385e-06, "loss": 0.0281, "step": 144980 }, { "epoch": 0.6049561465730904, "grad_norm": 0.6628707827956202, "learning_rate": 2.571603297416786e-06, "loss": 0.0213, "step": 144985 }, { "epoch": 0.6049770092880807, "grad_norm": 1.5921146880870958, "learning_rate": 2.5715589556827843e-06, "loss": 0.0404, "step": 144990 }, { "epoch": 0.604997872003071, "grad_norm": 0.21181232036809214, "learning_rate": 2.571514616242435e-06, "loss": 0.0191, "step": 144995 }, { "epoch": 0.6050187347180612, "grad_norm": 0.6894611197891657, "learning_rate": 2.571470279095541e-06, "loss": 0.0195, "step": 145000 }, { "epoch": 0.6050395974330516, "grad_norm": 0.7647879757103831, "learning_rate": 2.571425944241904e-06, "loss": 0.0172, "step": 145005 }, { "epoch": 0.6050604601480418, "grad_norm": 0.9753098946893608, "learning_rate": 2.5713816116813266e-06, "loss": 0.0298, "step": 145010 }, { "epoch": 0.6050813228630321, "grad_norm": 0.6404791702158065, "learning_rate": 2.571337281413611e-06, "loss": 0.025, "step": 145015 }, { "epoch": 0.6051021855780224, "grad_norm": 0.6660581022010085, "learning_rate": 2.5712929534385594e-06, "loss": 0.022, "step": 145020 }, { "epoch": 0.6051230482930127, "grad_norm": 0.6733643505009906, "learning_rate": 2.571248627755975e-06, "loss": 0.0314, "step": 145025 }, { "epoch": 0.6051439110080029, "grad_norm": 0.5276383883079541, "learning_rate": 2.5712043043656586e-06, "loss": 0.0225, "step": 145030 }, { "epoch": 0.6051647737229933, "grad_norm": 0.34870775712538626, "learning_rate": 2.5711599832674145e-06, "loss": 0.0256, "step": 145035 }, { "epoch": 0.6051856364379835, "grad_norm": 0.7263757389051575, "learning_rate": 2.571115664461044e-06, "loss": 0.02, "step": 145040 }, { "epoch": 0.6052064991529738, "grad_norm": 0.7710407706056999, "learning_rate": 2.57107134794635e-06, "loss": 0.029, "step": 145045 }, { "epoch": 0.605227361867964, "grad_norm": 0.5999993554426425, "learning_rate": 2.571027033723135e-06, "loss": 0.0197, "step": 145050 }, { "epoch": 0.6052482245829544, "grad_norm": 0.8043602719047175, "learning_rate": 2.5709827217912015e-06, "loss": 0.0246, "step": 145055 }, { "epoch": 0.6052690872979446, "grad_norm": 0.6292085753505775, "learning_rate": 2.5709384121503516e-06, "loss": 0.0226, "step": 145060 }, { "epoch": 0.6052899500129348, "grad_norm": 0.6400126691701493, "learning_rate": 2.570894104800389e-06, "loss": 0.0247, "step": 145065 }, { "epoch": 0.6053108127279252, "grad_norm": 1.5394834794501149, "learning_rate": 2.570849799741115e-06, "loss": 0.0247, "step": 145070 }, { "epoch": 0.6053316754429154, "grad_norm": 0.7208000274214766, "learning_rate": 2.570805496972333e-06, "loss": 0.0319, "step": 145075 }, { "epoch": 0.6053525381579057, "grad_norm": 0.510361588526431, "learning_rate": 2.570761196493845e-06, "loss": 0.0256, "step": 145080 }, { "epoch": 0.605373400872896, "grad_norm": 0.40337340034398705, "learning_rate": 2.570716898305455e-06, "loss": 0.0227, "step": 145085 }, { "epoch": 0.6053942635878863, "grad_norm": 1.1250316975824828, "learning_rate": 2.570672602406964e-06, "loss": 0.0249, "step": 145090 }, { "epoch": 0.6054151263028765, "grad_norm": 0.5168208800808481, "learning_rate": 2.570628308798176e-06, "loss": 0.0227, "step": 145095 }, { "epoch": 0.6054359890178669, "grad_norm": 0.6094445903885416, "learning_rate": 2.5705840174788933e-06, "loss": 0.0214, "step": 145100 }, { "epoch": 0.6054568517328571, "grad_norm": 0.8496301953263659, "learning_rate": 2.570539728448918e-06, "loss": 0.0243, "step": 145105 }, { "epoch": 0.6054777144478474, "grad_norm": 0.6606521542821728, "learning_rate": 2.570495441708055e-06, "loss": 0.0268, "step": 145110 }, { "epoch": 0.6054985771628376, "grad_norm": 0.9003617639781621, "learning_rate": 2.570451157256104e-06, "loss": 0.0241, "step": 145115 }, { "epoch": 0.605519439877828, "grad_norm": 0.34809266644452147, "learning_rate": 2.57040687509287e-06, "loss": 0.0184, "step": 145120 }, { "epoch": 0.6055403025928182, "grad_norm": 0.8713433048925755, "learning_rate": 2.570362595218156e-06, "loss": 0.0267, "step": 145125 }, { "epoch": 0.6055611653078085, "grad_norm": 0.6588324473168065, "learning_rate": 2.570318317631764e-06, "loss": 0.0244, "step": 145130 }, { "epoch": 0.6055820280227988, "grad_norm": 0.9220229078189396, "learning_rate": 2.570274042333497e-06, "loss": 0.0212, "step": 145135 }, { "epoch": 0.6056028907377891, "grad_norm": 0.8312491473751413, "learning_rate": 2.570229769323158e-06, "loss": 0.0262, "step": 145140 }, { "epoch": 0.6056237534527793, "grad_norm": 0.4441733003563252, "learning_rate": 2.5701854986005503e-06, "loss": 0.0208, "step": 145145 }, { "epoch": 0.6056446161677697, "grad_norm": 0.7403157518349341, "learning_rate": 2.570141230165476e-06, "loss": 0.0262, "step": 145150 }, { "epoch": 0.6056654788827599, "grad_norm": 0.6191119838687804, "learning_rate": 2.5700969640177396e-06, "loss": 0.0199, "step": 145155 }, { "epoch": 0.6056863415977501, "grad_norm": 0.8384519077474555, "learning_rate": 2.570052700157143e-06, "loss": 0.0187, "step": 145160 }, { "epoch": 0.6057072043127404, "grad_norm": 0.6916588943073345, "learning_rate": 2.5700084385834895e-06, "loss": 0.0232, "step": 145165 }, { "epoch": 0.6057280670277307, "grad_norm": 0.6934873431791249, "learning_rate": 2.5699641792965824e-06, "loss": 0.0163, "step": 145170 }, { "epoch": 0.605748929742721, "grad_norm": 0.46416822106965844, "learning_rate": 2.569919922296224e-06, "loss": 0.0184, "step": 145175 }, { "epoch": 0.6057697924577112, "grad_norm": 1.2972373195920255, "learning_rate": 2.5698756675822188e-06, "loss": 0.0234, "step": 145180 }, { "epoch": 0.6057906551727016, "grad_norm": 0.4044667636070256, "learning_rate": 2.5698314151543684e-06, "loss": 0.0219, "step": 145185 }, { "epoch": 0.6058115178876918, "grad_norm": 0.5370874922976245, "learning_rate": 2.569787165012478e-06, "loss": 0.0215, "step": 145190 }, { "epoch": 0.6058323806026821, "grad_norm": 0.5117749612791075, "learning_rate": 2.5697429171563486e-06, "loss": 0.0173, "step": 145195 }, { "epoch": 0.6058532433176724, "grad_norm": 0.5727190833748605, "learning_rate": 2.569698671585784e-06, "loss": 0.0248, "step": 145200 }, { "epoch": 0.6058741060326627, "grad_norm": 0.35172927055253117, "learning_rate": 2.5696544283005887e-06, "loss": 0.0179, "step": 145205 }, { "epoch": 0.6058949687476529, "grad_norm": 0.9016624094587133, "learning_rate": 2.5696101873005647e-06, "loss": 0.0262, "step": 145210 }, { "epoch": 0.6059158314626433, "grad_norm": 0.7423925722653938, "learning_rate": 2.569565948585516e-06, "loss": 0.0254, "step": 145215 }, { "epoch": 0.6059366941776335, "grad_norm": 1.2031193500068238, "learning_rate": 2.569521712155245e-06, "loss": 0.028, "step": 145220 }, { "epoch": 0.6059575568926238, "grad_norm": 0.8744051522922756, "learning_rate": 2.569477478009556e-06, "loss": 0.0281, "step": 145225 }, { "epoch": 0.605978419607614, "grad_norm": 0.6639242762876457, "learning_rate": 2.569433246148252e-06, "loss": 0.0257, "step": 145230 }, { "epoch": 0.6059992823226044, "grad_norm": 0.6413494026397357, "learning_rate": 2.569389016571136e-06, "loss": 0.0153, "step": 145235 }, { "epoch": 0.6060201450375946, "grad_norm": 0.7283922592297568, "learning_rate": 2.5693447892780126e-06, "loss": 0.0228, "step": 145240 }, { "epoch": 0.6060410077525848, "grad_norm": 0.6979862769219891, "learning_rate": 2.5693005642686836e-06, "loss": 0.0221, "step": 145245 }, { "epoch": 0.6060618704675752, "grad_norm": 0.6922032575140403, "learning_rate": 2.569256341542954e-06, "loss": 0.0326, "step": 145250 }, { "epoch": 0.6060827331825654, "grad_norm": 0.5485542236445189, "learning_rate": 2.569212121100626e-06, "loss": 0.0214, "step": 145255 }, { "epoch": 0.6061035958975557, "grad_norm": 1.1574892827298533, "learning_rate": 2.569167902941504e-06, "loss": 0.0237, "step": 145260 }, { "epoch": 0.606124458612546, "grad_norm": 0.967212144116789, "learning_rate": 2.5691236870653907e-06, "loss": 0.0253, "step": 145265 }, { "epoch": 0.6061453213275363, "grad_norm": 0.45956562974152754, "learning_rate": 2.5690794734720905e-06, "loss": 0.0254, "step": 145270 }, { "epoch": 0.6061661840425265, "grad_norm": 0.5424841540697651, "learning_rate": 2.569035262161407e-06, "loss": 0.024, "step": 145275 }, { "epoch": 0.6061870467575169, "grad_norm": 1.707190468684403, "learning_rate": 2.568991053133143e-06, "loss": 0.0514, "step": 145280 }, { "epoch": 0.6062079094725071, "grad_norm": 0.8106909923160311, "learning_rate": 2.5689468463871025e-06, "loss": 0.0228, "step": 145285 }, { "epoch": 0.6062287721874974, "grad_norm": 0.5168310351930593, "learning_rate": 2.5689026419230887e-06, "loss": 0.021, "step": 145290 }, { "epoch": 0.6062496349024876, "grad_norm": 1.0605490176882357, "learning_rate": 2.5688584397409067e-06, "loss": 0.0308, "step": 145295 }, { "epoch": 0.606270497617478, "grad_norm": 0.16814076711421175, "learning_rate": 2.5688142398403587e-06, "loss": 0.0131, "step": 145300 }, { "epoch": 0.6062913603324682, "grad_norm": 0.6430171443908207, "learning_rate": 2.568770042221249e-06, "loss": 0.0171, "step": 145305 }, { "epoch": 0.6063122230474585, "grad_norm": 0.635278874399353, "learning_rate": 2.5687258468833813e-06, "loss": 0.0261, "step": 145310 }, { "epoch": 0.6063330857624488, "grad_norm": 0.7432573692702515, "learning_rate": 2.5686816538265596e-06, "loss": 0.023, "step": 145315 }, { "epoch": 0.6063539484774391, "grad_norm": 0.771748707776055, "learning_rate": 2.5686374630505873e-06, "loss": 0.0268, "step": 145320 }, { "epoch": 0.6063748111924293, "grad_norm": 0.4142840534570269, "learning_rate": 2.568593274555269e-06, "loss": 0.0171, "step": 145325 }, { "epoch": 0.6063956739074197, "grad_norm": 0.6007301963917884, "learning_rate": 2.5685490883404067e-06, "loss": 0.017, "step": 145330 }, { "epoch": 0.6064165366224099, "grad_norm": 0.5818861489752025, "learning_rate": 2.5685049044058063e-06, "loss": 0.0164, "step": 145335 }, { "epoch": 0.6064373993374002, "grad_norm": 1.1956962382096707, "learning_rate": 2.5684607227512705e-06, "loss": 0.0218, "step": 145340 }, { "epoch": 0.6064582620523904, "grad_norm": 0.500225657730133, "learning_rate": 2.568416543376604e-06, "loss": 0.0197, "step": 145345 }, { "epoch": 0.6064791247673808, "grad_norm": 0.5665654262535113, "learning_rate": 2.5683723662816103e-06, "loss": 0.023, "step": 145350 }, { "epoch": 0.606499987482371, "grad_norm": 0.6340936037769201, "learning_rate": 2.5683281914660925e-06, "loss": 0.027, "step": 145355 }, { "epoch": 0.6065208501973612, "grad_norm": 0.6154877265484384, "learning_rate": 2.5682840189298557e-06, "loss": 0.0186, "step": 145360 }, { "epoch": 0.6065417129123516, "grad_norm": 0.7543334391551143, "learning_rate": 2.5682398486727038e-06, "loss": 0.0204, "step": 145365 }, { "epoch": 0.6065625756273418, "grad_norm": 0.7498681321099008, "learning_rate": 2.5681956806944415e-06, "loss": 0.0237, "step": 145370 }, { "epoch": 0.6065834383423321, "grad_norm": 0.5681868305063601, "learning_rate": 2.568151514994871e-06, "loss": 0.0288, "step": 145375 }, { "epoch": 0.6066043010573224, "grad_norm": 0.5631632398668756, "learning_rate": 2.568107351573797e-06, "loss": 0.0192, "step": 145380 }, { "epoch": 0.6066251637723127, "grad_norm": 1.1625604514551655, "learning_rate": 2.568063190431025e-06, "loss": 0.0295, "step": 145385 }, { "epoch": 0.6066460264873029, "grad_norm": 0.5854090561951599, "learning_rate": 2.568019031566357e-06, "loss": 0.0177, "step": 145390 }, { "epoch": 0.6066668892022933, "grad_norm": 0.9678862296530105, "learning_rate": 2.567974874979599e-06, "loss": 0.0203, "step": 145395 }, { "epoch": 0.6066877519172835, "grad_norm": 1.0593941129351079, "learning_rate": 2.5679307206705545e-06, "loss": 0.0261, "step": 145400 }, { "epoch": 0.6067086146322738, "grad_norm": 0.8129364115477523, "learning_rate": 2.567886568639027e-06, "loss": 0.0195, "step": 145405 }, { "epoch": 0.606729477347264, "grad_norm": 0.523314165486368, "learning_rate": 2.5678424188848218e-06, "loss": 0.0221, "step": 145410 }, { "epoch": 0.6067503400622544, "grad_norm": 0.5034229951632844, "learning_rate": 2.567798271407742e-06, "loss": 0.0265, "step": 145415 }, { "epoch": 0.6067712027772446, "grad_norm": 0.5528434518756448, "learning_rate": 2.567754126207593e-06, "loss": 0.0192, "step": 145420 }, { "epoch": 0.6067920654922349, "grad_norm": 0.55494063706239, "learning_rate": 2.5677099832841786e-06, "loss": 0.02, "step": 145425 }, { "epoch": 0.6068129282072252, "grad_norm": 0.5358140589340871, "learning_rate": 2.5676658426373025e-06, "loss": 0.0211, "step": 145430 }, { "epoch": 0.6068337909222155, "grad_norm": 0.6621251138760482, "learning_rate": 2.5676217042667702e-06, "loss": 0.02, "step": 145435 }, { "epoch": 0.6068546536372057, "grad_norm": 0.5374680951341345, "learning_rate": 2.5675775681723853e-06, "loss": 0.0276, "step": 145440 }, { "epoch": 0.606875516352196, "grad_norm": 0.8221950580853742, "learning_rate": 2.567533434353952e-06, "loss": 0.023, "step": 145445 }, { "epoch": 0.6068963790671863, "grad_norm": 0.9912500443528249, "learning_rate": 2.5674893028112754e-06, "loss": 0.0209, "step": 145450 }, { "epoch": 0.6069172417821765, "grad_norm": 0.7139595459630956, "learning_rate": 2.567445173544159e-06, "loss": 0.0262, "step": 145455 }, { "epoch": 0.6069381044971669, "grad_norm": 1.182277412807569, "learning_rate": 2.5674010465524086e-06, "loss": 0.023, "step": 145460 }, { "epoch": 0.6069589672121571, "grad_norm": 0.9490293974332872, "learning_rate": 2.5673569218358272e-06, "loss": 0.0197, "step": 145465 }, { "epoch": 0.6069798299271474, "grad_norm": 1.3805368287787791, "learning_rate": 2.5673127993942205e-06, "loss": 0.0329, "step": 145470 }, { "epoch": 0.6070006926421376, "grad_norm": 0.5722737043757881, "learning_rate": 2.567268679227392e-06, "loss": 0.0176, "step": 145475 }, { "epoch": 0.607021555357128, "grad_norm": 0.7323125113172403, "learning_rate": 2.567224561335147e-06, "loss": 0.0231, "step": 145480 }, { "epoch": 0.6070424180721182, "grad_norm": 0.24720022810430892, "learning_rate": 2.56718044571729e-06, "loss": 0.0215, "step": 145485 }, { "epoch": 0.6070632807871085, "grad_norm": 0.5900336461069622, "learning_rate": 2.5671363323736254e-06, "loss": 0.0184, "step": 145490 }, { "epoch": 0.6070841435020988, "grad_norm": 1.1845541610769212, "learning_rate": 2.5670922213039574e-06, "loss": 0.0256, "step": 145495 }, { "epoch": 0.6071050062170891, "grad_norm": 0.6095778531225948, "learning_rate": 2.5670481125080916e-06, "loss": 0.0332, "step": 145500 }, { "epoch": 0.6071258689320793, "grad_norm": 0.5122891611446547, "learning_rate": 2.5670040059858313e-06, "loss": 0.0219, "step": 145505 }, { "epoch": 0.6071467316470697, "grad_norm": 0.8131857103479632, "learning_rate": 2.5669599017369825e-06, "loss": 0.0214, "step": 145510 }, { "epoch": 0.6071675943620599, "grad_norm": 0.4974080776819165, "learning_rate": 2.5669157997613494e-06, "loss": 0.0288, "step": 145515 }, { "epoch": 0.6071884570770502, "grad_norm": 0.5074068433540445, "learning_rate": 2.566871700058737e-06, "loss": 0.0201, "step": 145520 }, { "epoch": 0.6072093197920404, "grad_norm": 0.8711077814987345, "learning_rate": 2.566827602628949e-06, "loss": 0.0261, "step": 145525 }, { "epoch": 0.6072301825070308, "grad_norm": 0.42559950250088285, "learning_rate": 2.5667835074717916e-06, "loss": 0.0341, "step": 145530 }, { "epoch": 0.607251045222021, "grad_norm": 0.9758644686123532, "learning_rate": 2.5667394145870683e-06, "loss": 0.0181, "step": 145535 }, { "epoch": 0.6072719079370112, "grad_norm": 0.5048112623132611, "learning_rate": 2.5666953239745855e-06, "loss": 0.0307, "step": 145540 }, { "epoch": 0.6072927706520016, "grad_norm": 0.49708911699813374, "learning_rate": 2.566651235634146e-06, "loss": 0.0195, "step": 145545 }, { "epoch": 0.6073136333669918, "grad_norm": 1.1457339939455022, "learning_rate": 2.566607149565557e-06, "loss": 0.0231, "step": 145550 }, { "epoch": 0.6073344960819821, "grad_norm": 0.5973096672926661, "learning_rate": 2.5665630657686217e-06, "loss": 0.023, "step": 145555 }, { "epoch": 0.6073553587969724, "grad_norm": 0.8485363278305647, "learning_rate": 2.566518984243145e-06, "loss": 0.0213, "step": 145560 }, { "epoch": 0.6073762215119627, "grad_norm": 0.8446657615190555, "learning_rate": 2.5664749049889333e-06, "loss": 0.0271, "step": 145565 }, { "epoch": 0.6073970842269529, "grad_norm": 0.5887551448147204, "learning_rate": 2.5664308280057896e-06, "loss": 0.0325, "step": 145570 }, { "epoch": 0.6074179469419433, "grad_norm": 0.5530614247751915, "learning_rate": 2.5663867532935204e-06, "loss": 0.0149, "step": 145575 }, { "epoch": 0.6074388096569335, "grad_norm": 0.5480608287114329, "learning_rate": 2.56634268085193e-06, "loss": 0.0231, "step": 145580 }, { "epoch": 0.6074596723719238, "grad_norm": 0.6505158354449208, "learning_rate": 2.566298610680824e-06, "loss": 0.0241, "step": 145585 }, { "epoch": 0.607480535086914, "grad_norm": 0.8019824055701584, "learning_rate": 2.566254542780007e-06, "loss": 0.0254, "step": 145590 }, { "epoch": 0.6075013978019044, "grad_norm": 0.6428119441614316, "learning_rate": 2.5662104771492848e-06, "loss": 0.0302, "step": 145595 }, { "epoch": 0.6075222605168946, "grad_norm": 0.4374970621304173, "learning_rate": 2.5661664137884607e-06, "loss": 0.0274, "step": 145600 }, { "epoch": 0.6075431232318849, "grad_norm": 0.6205182825650815, "learning_rate": 2.5661223526973415e-06, "loss": 0.0265, "step": 145605 }, { "epoch": 0.6075639859468752, "grad_norm": 0.5626290032377459, "learning_rate": 2.5660782938757323e-06, "loss": 0.0226, "step": 145610 }, { "epoch": 0.6075848486618655, "grad_norm": 0.812692411686045, "learning_rate": 2.5660342373234375e-06, "loss": 0.0209, "step": 145615 }, { "epoch": 0.6076057113768557, "grad_norm": 0.7730541158143791, "learning_rate": 2.565990183040263e-06, "loss": 0.0196, "step": 145620 }, { "epoch": 0.6076265740918461, "grad_norm": 0.7767172583822385, "learning_rate": 2.5659461310260132e-06, "loss": 0.021, "step": 145625 }, { "epoch": 0.6076474368068363, "grad_norm": 1.1265670984925922, "learning_rate": 2.5659020812804934e-06, "loss": 0.0159, "step": 145630 }, { "epoch": 0.6076682995218265, "grad_norm": 0.4974875717673099, "learning_rate": 2.56585803380351e-06, "loss": 0.0173, "step": 145635 }, { "epoch": 0.6076891622368169, "grad_norm": 0.6545434149780777, "learning_rate": 2.5658139885948674e-06, "loss": 0.0216, "step": 145640 }, { "epoch": 0.6077100249518071, "grad_norm": 0.4972245617245356, "learning_rate": 2.5657699456543717e-06, "loss": 0.026, "step": 145645 }, { "epoch": 0.6077308876667974, "grad_norm": 1.4650531128472584, "learning_rate": 2.5657259049818266e-06, "loss": 0.0263, "step": 145650 }, { "epoch": 0.6077517503817876, "grad_norm": 1.3401970722905086, "learning_rate": 2.565681866577039e-06, "loss": 0.0272, "step": 145655 }, { "epoch": 0.607772613096778, "grad_norm": 1.1862114655500866, "learning_rate": 2.5656378304398137e-06, "loss": 0.0199, "step": 145660 }, { "epoch": 0.6077934758117682, "grad_norm": 0.5566427210426638, "learning_rate": 2.5655937965699566e-06, "loss": 0.0212, "step": 145665 }, { "epoch": 0.6078143385267585, "grad_norm": 0.6140939313404677, "learning_rate": 2.565549764967272e-06, "loss": 0.0263, "step": 145670 }, { "epoch": 0.6078352012417488, "grad_norm": 0.8860678067193574, "learning_rate": 2.5655057356315672e-06, "loss": 0.022, "step": 145675 }, { "epoch": 0.6078560639567391, "grad_norm": 0.8921971888007871, "learning_rate": 2.5654617085626453e-06, "loss": 0.0144, "step": 145680 }, { "epoch": 0.6078769266717293, "grad_norm": 0.6970290083058186, "learning_rate": 2.565417683760314e-06, "loss": 0.0242, "step": 145685 }, { "epoch": 0.6078977893867197, "grad_norm": 0.6234102365336625, "learning_rate": 2.5653736612243774e-06, "loss": 0.0272, "step": 145690 }, { "epoch": 0.6079186521017099, "grad_norm": 0.9267648209336781, "learning_rate": 2.565329640954642e-06, "loss": 0.0192, "step": 145695 }, { "epoch": 0.6079395148167002, "grad_norm": 1.0567538540945765, "learning_rate": 2.565285622950912e-06, "loss": 0.0215, "step": 145700 }, { "epoch": 0.6079603775316904, "grad_norm": 0.46263248499131937, "learning_rate": 2.5652416072129956e-06, "loss": 0.0201, "step": 145705 }, { "epoch": 0.6079812402466808, "grad_norm": 0.3900228736126985, "learning_rate": 2.5651975937406952e-06, "loss": 0.02, "step": 145710 }, { "epoch": 0.608002102961671, "grad_norm": 0.44053564004860524, "learning_rate": 2.5651535825338188e-06, "loss": 0.0279, "step": 145715 }, { "epoch": 0.6080229656766613, "grad_norm": 0.8028605634753818, "learning_rate": 2.5651095735921706e-06, "loss": 0.0181, "step": 145720 }, { "epoch": 0.6080438283916516, "grad_norm": 0.7704304612502284, "learning_rate": 2.565065566915557e-06, "loss": 0.018, "step": 145725 }, { "epoch": 0.6080646911066419, "grad_norm": 0.7860741115644249, "learning_rate": 2.5650215625037844e-06, "loss": 0.0199, "step": 145730 }, { "epoch": 0.6080855538216321, "grad_norm": 0.7694568490180361, "learning_rate": 2.5649775603566575e-06, "loss": 0.0252, "step": 145735 }, { "epoch": 0.6081064165366225, "grad_norm": 0.7697771793827372, "learning_rate": 2.564933560473982e-06, "loss": 0.0294, "step": 145740 }, { "epoch": 0.6081272792516127, "grad_norm": 0.5504704314787818, "learning_rate": 2.564889562855564e-06, "loss": 0.0185, "step": 145745 }, { "epoch": 0.6081481419666029, "grad_norm": 1.0629323504384003, "learning_rate": 2.5648455675012095e-06, "loss": 0.0253, "step": 145750 }, { "epoch": 0.6081690046815933, "grad_norm": 0.4835446868708475, "learning_rate": 2.5648015744107246e-06, "loss": 0.0266, "step": 145755 }, { "epoch": 0.6081898673965835, "grad_norm": 0.9800620425270912, "learning_rate": 2.564757583583914e-06, "loss": 0.0214, "step": 145760 }, { "epoch": 0.6082107301115738, "grad_norm": 0.8031552592946455, "learning_rate": 2.5647135950205847e-06, "loss": 0.0235, "step": 145765 }, { "epoch": 0.608231592826564, "grad_norm": 1.1064030760094117, "learning_rate": 2.5646696087205426e-06, "loss": 0.0252, "step": 145770 }, { "epoch": 0.6082524555415544, "grad_norm": 0.8803438537689394, "learning_rate": 2.5646256246835926e-06, "loss": 0.025, "step": 145775 }, { "epoch": 0.6082733182565446, "grad_norm": 0.28560679183771615, "learning_rate": 2.5645816429095415e-06, "loss": 0.0236, "step": 145780 }, { "epoch": 0.6082941809715349, "grad_norm": 0.9668570600418865, "learning_rate": 2.564537663398195e-06, "loss": 0.025, "step": 145785 }, { "epoch": 0.6083150436865252, "grad_norm": 0.7839793999882575, "learning_rate": 2.564493686149359e-06, "loss": 0.0216, "step": 145790 }, { "epoch": 0.6083359064015155, "grad_norm": 0.8253789556441042, "learning_rate": 2.5644497111628404e-06, "loss": 0.0207, "step": 145795 }, { "epoch": 0.6083567691165057, "grad_norm": 1.251239419144486, "learning_rate": 2.5644057384384435e-06, "loss": 0.0196, "step": 145800 }, { "epoch": 0.6083776318314961, "grad_norm": 0.8280876742488744, "learning_rate": 2.564361767975976e-06, "loss": 0.0281, "step": 145805 }, { "epoch": 0.6083984945464863, "grad_norm": 0.6052890046283498, "learning_rate": 2.5643177997752427e-06, "loss": 0.0193, "step": 145810 }, { "epoch": 0.6084193572614766, "grad_norm": 0.6122983436470375, "learning_rate": 2.564273833836051e-06, "loss": 0.0251, "step": 145815 }, { "epoch": 0.6084402199764669, "grad_norm": 1.0174644320974242, "learning_rate": 2.5642298701582064e-06, "loss": 0.0245, "step": 145820 }, { "epoch": 0.6084610826914572, "grad_norm": 1.110192804447548, "learning_rate": 2.5641859087415143e-06, "loss": 0.0321, "step": 145825 }, { "epoch": 0.6084819454064474, "grad_norm": 0.7816669005876116, "learning_rate": 2.5641419495857823e-06, "loss": 0.0224, "step": 145830 }, { "epoch": 0.6085028081214376, "grad_norm": 0.35497524952536974, "learning_rate": 2.5640979926908156e-06, "loss": 0.0222, "step": 145835 }, { "epoch": 0.608523670836428, "grad_norm": 0.7313646681742029, "learning_rate": 2.564054038056421e-06, "loss": 0.0205, "step": 145840 }, { "epoch": 0.6085445335514182, "grad_norm": 0.6768904045425582, "learning_rate": 2.564010085682404e-06, "loss": 0.0215, "step": 145845 }, { "epoch": 0.6085653962664085, "grad_norm": 0.5784921655986351, "learning_rate": 2.5639661355685717e-06, "loss": 0.0272, "step": 145850 }, { "epoch": 0.6085862589813988, "grad_norm": 0.7391080788678673, "learning_rate": 2.563922187714731e-06, "loss": 0.0195, "step": 145855 }, { "epoch": 0.6086071216963891, "grad_norm": 0.6517458888021059, "learning_rate": 2.563878242120686e-06, "loss": 0.0267, "step": 145860 }, { "epoch": 0.6086279844113793, "grad_norm": 0.367344797712192, "learning_rate": 2.563834298786245e-06, "loss": 0.0291, "step": 145865 }, { "epoch": 0.6086488471263697, "grad_norm": 0.5376448772371739, "learning_rate": 2.5637903577112133e-06, "loss": 0.0253, "step": 145870 }, { "epoch": 0.6086697098413599, "grad_norm": 1.2431881731161538, "learning_rate": 2.5637464188953974e-06, "loss": 0.0247, "step": 145875 }, { "epoch": 0.6086905725563502, "grad_norm": 1.5939725293916323, "learning_rate": 2.563702482338605e-06, "loss": 0.0269, "step": 145880 }, { "epoch": 0.6087114352713404, "grad_norm": 1.0079908905115056, "learning_rate": 2.5636585480406407e-06, "loss": 0.0244, "step": 145885 }, { "epoch": 0.6087322979863308, "grad_norm": 0.47803727518471006, "learning_rate": 2.563614616001312e-06, "loss": 0.0212, "step": 145890 }, { "epoch": 0.608753160701321, "grad_norm": 0.8712830577387355, "learning_rate": 2.563570686220425e-06, "loss": 0.018, "step": 145895 }, { "epoch": 0.6087740234163113, "grad_norm": 0.7480119911388365, "learning_rate": 2.563526758697787e-06, "loss": 0.019, "step": 145900 }, { "epoch": 0.6087948861313016, "grad_norm": 0.4465245002342109, "learning_rate": 2.563482833433203e-06, "loss": 0.0278, "step": 145905 }, { "epoch": 0.6088157488462919, "grad_norm": 0.5906293253034759, "learning_rate": 2.5634389104264813e-06, "loss": 0.0253, "step": 145910 }, { "epoch": 0.6088366115612821, "grad_norm": 0.6254910296861789, "learning_rate": 2.5633949896774275e-06, "loss": 0.0235, "step": 145915 }, { "epoch": 0.6088574742762725, "grad_norm": 1.7766059228931206, "learning_rate": 2.5633510711858475e-06, "loss": 0.0293, "step": 145920 }, { "epoch": 0.6088783369912627, "grad_norm": 0.8794079596499385, "learning_rate": 2.563307154951549e-06, "loss": 0.0245, "step": 145925 }, { "epoch": 0.608899199706253, "grad_norm": 0.7215006837207869, "learning_rate": 2.563263240974339e-06, "loss": 0.0265, "step": 145930 }, { "epoch": 0.6089200624212433, "grad_norm": 1.1228672600777798, "learning_rate": 2.563219329254023e-06, "loss": 0.0199, "step": 145935 }, { "epoch": 0.6089409251362335, "grad_norm": 0.717657339102416, "learning_rate": 2.563175419790409e-06, "loss": 0.0198, "step": 145940 }, { "epoch": 0.6089617878512238, "grad_norm": 0.7081818424004602, "learning_rate": 2.563131512583302e-06, "loss": 0.0318, "step": 145945 }, { "epoch": 0.608982650566214, "grad_norm": 0.539531421130461, "learning_rate": 2.5630876076325105e-06, "loss": 0.0232, "step": 145950 }, { "epoch": 0.6090035132812044, "grad_norm": 0.4458411711609251, "learning_rate": 2.5630437049378404e-06, "loss": 0.0279, "step": 145955 }, { "epoch": 0.6090243759961946, "grad_norm": 0.7736771432201315, "learning_rate": 2.5629998044990985e-06, "loss": 0.0301, "step": 145960 }, { "epoch": 0.6090452387111849, "grad_norm": 0.623300791227697, "learning_rate": 2.562955906316091e-06, "loss": 0.0229, "step": 145965 }, { "epoch": 0.6090661014261752, "grad_norm": 1.385977764407229, "learning_rate": 2.5629120103886257e-06, "loss": 0.0301, "step": 145970 }, { "epoch": 0.6090869641411655, "grad_norm": 0.28696396391363144, "learning_rate": 2.5628681167165094e-06, "loss": 0.0191, "step": 145975 }, { "epoch": 0.6091078268561557, "grad_norm": 0.847308650299419, "learning_rate": 2.5628242252995485e-06, "loss": 0.0286, "step": 145980 }, { "epoch": 0.6091286895711461, "grad_norm": 0.5521977817963257, "learning_rate": 2.56278033613755e-06, "loss": 0.0289, "step": 145985 }, { "epoch": 0.6091495522861363, "grad_norm": 0.3265553126826078, "learning_rate": 2.562736449230321e-06, "loss": 0.0245, "step": 145990 }, { "epoch": 0.6091704150011266, "grad_norm": 1.0383927110813636, "learning_rate": 2.5626925645776686e-06, "loss": 0.0218, "step": 145995 }, { "epoch": 0.6091912777161169, "grad_norm": 0.92580030312544, "learning_rate": 2.5626486821793995e-06, "loss": 0.0242, "step": 146000 }, { "epoch": 0.6092121404311072, "grad_norm": 0.4687660922523626, "learning_rate": 2.5626048020353205e-06, "loss": 0.0213, "step": 146005 }, { "epoch": 0.6092330031460974, "grad_norm": 0.4959037410357742, "learning_rate": 2.5625609241452386e-06, "loss": 0.025, "step": 146010 }, { "epoch": 0.6092538658610877, "grad_norm": 0.5974531321015567, "learning_rate": 2.5625170485089614e-06, "loss": 0.0257, "step": 146015 }, { "epoch": 0.609274728576078, "grad_norm": 0.5806785682450246, "learning_rate": 2.562473175126296e-06, "loss": 0.0211, "step": 146020 }, { "epoch": 0.6092955912910683, "grad_norm": 0.37077009541542033, "learning_rate": 2.5624293039970483e-06, "loss": 0.0235, "step": 146025 }, { "epoch": 0.6093164540060585, "grad_norm": 0.8363831168142929, "learning_rate": 2.562385435121027e-06, "loss": 0.0191, "step": 146030 }, { "epoch": 0.6093373167210488, "grad_norm": 0.5318901508356962, "learning_rate": 2.562341568498038e-06, "loss": 0.0233, "step": 146035 }, { "epoch": 0.6093581794360391, "grad_norm": 0.5152315099823952, "learning_rate": 2.562297704127889e-06, "loss": 0.0226, "step": 146040 }, { "epoch": 0.6093790421510293, "grad_norm": 0.4601815975449573, "learning_rate": 2.5622538420103877e-06, "loss": 0.0184, "step": 146045 }, { "epoch": 0.6093999048660197, "grad_norm": 0.8907457903444306, "learning_rate": 2.5622099821453396e-06, "loss": 0.0254, "step": 146050 }, { "epoch": 0.6094207675810099, "grad_norm": 0.6975733836677037, "learning_rate": 2.5621661245325535e-06, "loss": 0.0329, "step": 146055 }, { "epoch": 0.6094416302960002, "grad_norm": 0.9826594308837718, "learning_rate": 2.5621222691718365e-06, "loss": 0.0172, "step": 146060 }, { "epoch": 0.6094624930109904, "grad_norm": 0.6918438619665991, "learning_rate": 2.562078416062995e-06, "loss": 0.0272, "step": 146065 }, { "epoch": 0.6094833557259808, "grad_norm": 0.4588034549833192, "learning_rate": 2.5620345652058376e-06, "loss": 0.014, "step": 146070 }, { "epoch": 0.609504218440971, "grad_norm": 0.6614348781391506, "learning_rate": 2.5619907166001703e-06, "loss": 0.0214, "step": 146075 }, { "epoch": 0.6095250811559613, "grad_norm": 0.7905223031312866, "learning_rate": 2.5619468702458005e-06, "loss": 0.0226, "step": 146080 }, { "epoch": 0.6095459438709516, "grad_norm": 0.6527081776225988, "learning_rate": 2.561903026142537e-06, "loss": 0.0222, "step": 146085 }, { "epoch": 0.6095668065859419, "grad_norm": 0.6630378908964393, "learning_rate": 2.5618591842901856e-06, "loss": 0.0227, "step": 146090 }, { "epoch": 0.6095876693009321, "grad_norm": 0.7085135309056481, "learning_rate": 2.5618153446885547e-06, "loss": 0.0263, "step": 146095 }, { "epoch": 0.6096085320159225, "grad_norm": 0.5366675137857181, "learning_rate": 2.561771507337451e-06, "loss": 0.0306, "step": 146100 }, { "epoch": 0.6096293947309127, "grad_norm": 0.6483208465839935, "learning_rate": 2.5617276722366826e-06, "loss": 0.0251, "step": 146105 }, { "epoch": 0.609650257445903, "grad_norm": 0.9065182505604458, "learning_rate": 2.561683839386056e-06, "loss": 0.0216, "step": 146110 }, { "epoch": 0.6096711201608933, "grad_norm": 0.491046593884699, "learning_rate": 2.5616400087853804e-06, "loss": 0.0188, "step": 146115 }, { "epoch": 0.6096919828758836, "grad_norm": 0.9658831420396291, "learning_rate": 2.5615961804344617e-06, "loss": 0.0282, "step": 146120 }, { "epoch": 0.6097128455908738, "grad_norm": 1.0505899324673136, "learning_rate": 2.5615523543331083e-06, "loss": 0.0246, "step": 146125 }, { "epoch": 0.609733708305864, "grad_norm": 0.6066375938429567, "learning_rate": 2.561508530481127e-06, "loss": 0.0237, "step": 146130 }, { "epoch": 0.6097545710208544, "grad_norm": 1.0399398284026111, "learning_rate": 2.561464708878327e-06, "loss": 0.024, "step": 146135 }, { "epoch": 0.6097754337358446, "grad_norm": 0.5174851563930711, "learning_rate": 2.561420889524514e-06, "loss": 0.0235, "step": 146140 }, { "epoch": 0.6097962964508349, "grad_norm": 0.6090313173030167, "learning_rate": 2.5613770724194965e-06, "loss": 0.0294, "step": 146145 }, { "epoch": 0.6098171591658252, "grad_norm": 0.7300065531305105, "learning_rate": 2.5613332575630823e-06, "loss": 0.0252, "step": 146150 }, { "epoch": 0.6098380218808155, "grad_norm": 0.40342588794144907, "learning_rate": 2.561289444955079e-06, "loss": 0.0222, "step": 146155 }, { "epoch": 0.6098588845958057, "grad_norm": 0.6007615072657583, "learning_rate": 2.561245634595294e-06, "loss": 0.0237, "step": 146160 }, { "epoch": 0.6098797473107961, "grad_norm": 0.7930313281162907, "learning_rate": 2.5612018264835353e-06, "loss": 0.0246, "step": 146165 }, { "epoch": 0.6099006100257863, "grad_norm": 0.8824160155466793, "learning_rate": 2.56115802061961e-06, "loss": 0.0206, "step": 146170 }, { "epoch": 0.6099214727407766, "grad_norm": 0.5237467913282493, "learning_rate": 2.5611142170033277e-06, "loss": 0.02, "step": 146175 }, { "epoch": 0.6099423354557669, "grad_norm": 0.5218507650980738, "learning_rate": 2.5610704156344945e-06, "loss": 0.0142, "step": 146180 }, { "epoch": 0.6099631981707572, "grad_norm": 1.0678730864090658, "learning_rate": 2.5610266165129184e-06, "loss": 0.0323, "step": 146185 }, { "epoch": 0.6099840608857474, "grad_norm": 0.6928348747840052, "learning_rate": 2.560982819638407e-06, "loss": 0.0208, "step": 146190 }, { "epoch": 0.6100049236007377, "grad_norm": 0.5437171071794303, "learning_rate": 2.5609390250107695e-06, "loss": 0.0241, "step": 146195 }, { "epoch": 0.610025786315728, "grad_norm": 0.6637313481143092, "learning_rate": 2.5608952326298134e-06, "loss": 0.0262, "step": 146200 }, { "epoch": 0.6100466490307183, "grad_norm": 0.6506599425030816, "learning_rate": 2.560851442495346e-06, "loss": 0.0246, "step": 146205 }, { "epoch": 0.6100675117457085, "grad_norm": 0.9241664752822298, "learning_rate": 2.5608076546071745e-06, "loss": 0.029, "step": 146210 }, { "epoch": 0.6100883744606989, "grad_norm": 0.5664974368949363, "learning_rate": 2.560763868965109e-06, "loss": 0.0347, "step": 146215 }, { "epoch": 0.6101092371756891, "grad_norm": 0.44770557330929484, "learning_rate": 2.5607200855689556e-06, "loss": 0.0201, "step": 146220 }, { "epoch": 0.6101300998906793, "grad_norm": 0.5594870131907911, "learning_rate": 2.5606763044185235e-06, "loss": 0.022, "step": 146225 }, { "epoch": 0.6101509626056697, "grad_norm": 0.5894798946680712, "learning_rate": 2.5606325255136194e-06, "loss": 0.019, "step": 146230 }, { "epoch": 0.6101718253206599, "grad_norm": 0.6702518372856667, "learning_rate": 2.560588748854053e-06, "loss": 0.0182, "step": 146235 }, { "epoch": 0.6101926880356502, "grad_norm": 0.36987220262977444, "learning_rate": 2.5605449744396315e-06, "loss": 0.0207, "step": 146240 }, { "epoch": 0.6102135507506404, "grad_norm": 0.6463129401530529, "learning_rate": 2.5605012022701626e-06, "loss": 0.0265, "step": 146245 }, { "epoch": 0.6102344134656308, "grad_norm": 0.6833084106266523, "learning_rate": 2.5604574323454552e-06, "loss": 0.0224, "step": 146250 }, { "epoch": 0.610255276180621, "grad_norm": 0.34222888369234195, "learning_rate": 2.560413664665317e-06, "loss": 0.0199, "step": 146255 }, { "epoch": 0.6102761388956113, "grad_norm": 0.9949208988907368, "learning_rate": 2.560369899229556e-06, "loss": 0.0243, "step": 146260 }, { "epoch": 0.6102970016106016, "grad_norm": 0.6812171656577448, "learning_rate": 2.560326136037981e-06, "loss": 0.0229, "step": 146265 }, { "epoch": 0.6103178643255919, "grad_norm": 0.43393847978895445, "learning_rate": 2.5602823750904e-06, "loss": 0.0209, "step": 146270 }, { "epoch": 0.6103387270405821, "grad_norm": 0.757013044916685, "learning_rate": 2.560238616386621e-06, "loss": 0.0265, "step": 146275 }, { "epoch": 0.6103595897555725, "grad_norm": 0.6956849898183857, "learning_rate": 2.5601948599264526e-06, "loss": 0.0209, "step": 146280 }, { "epoch": 0.6103804524705627, "grad_norm": 0.7304566211170939, "learning_rate": 2.5601511057097023e-06, "loss": 0.0249, "step": 146285 }, { "epoch": 0.610401315185553, "grad_norm": 0.29473311045835254, "learning_rate": 2.5601073537361794e-06, "loss": 0.0151, "step": 146290 }, { "epoch": 0.6104221779005433, "grad_norm": 0.48743546375297725, "learning_rate": 2.5600636040056916e-06, "loss": 0.0243, "step": 146295 }, { "epoch": 0.6104430406155336, "grad_norm": 0.9559947277986015, "learning_rate": 2.560019856518047e-06, "loss": 0.0228, "step": 146300 }, { "epoch": 0.6104639033305238, "grad_norm": 0.3974806204128956, "learning_rate": 2.5599761112730554e-06, "loss": 0.0221, "step": 146305 }, { "epoch": 0.610484766045514, "grad_norm": 1.0007164557705344, "learning_rate": 2.5599323682705236e-06, "loss": 0.0233, "step": 146310 }, { "epoch": 0.6105056287605044, "grad_norm": 0.4185722834838712, "learning_rate": 2.559888627510261e-06, "loss": 0.0209, "step": 146315 }, { "epoch": 0.6105264914754946, "grad_norm": 0.5406501070883477, "learning_rate": 2.5598448889920757e-06, "loss": 0.0161, "step": 146320 }, { "epoch": 0.6105473541904849, "grad_norm": 0.693455001255928, "learning_rate": 2.559801152715776e-06, "loss": 0.0194, "step": 146325 }, { "epoch": 0.6105682169054752, "grad_norm": 0.7236617599361999, "learning_rate": 2.5597574186811703e-06, "loss": 0.0209, "step": 146330 }, { "epoch": 0.6105890796204655, "grad_norm": 0.5852738090285173, "learning_rate": 2.559713686888067e-06, "loss": 0.0234, "step": 146335 }, { "epoch": 0.6106099423354557, "grad_norm": 0.9200143270940365, "learning_rate": 2.5596699573362755e-06, "loss": 0.0205, "step": 146340 }, { "epoch": 0.6106308050504461, "grad_norm": 0.5601616762689179, "learning_rate": 2.559626230025604e-06, "loss": 0.0233, "step": 146345 }, { "epoch": 0.6106516677654363, "grad_norm": 0.4568531470147559, "learning_rate": 2.5595825049558602e-06, "loss": 0.0164, "step": 146350 }, { "epoch": 0.6106725304804266, "grad_norm": 0.6760026907801656, "learning_rate": 2.5595387821268543e-06, "loss": 0.0252, "step": 146355 }, { "epoch": 0.6106933931954169, "grad_norm": 0.5269321803503275, "learning_rate": 2.5594950615383936e-06, "loss": 0.0285, "step": 146360 }, { "epoch": 0.6107142559104072, "grad_norm": 1.9695037707108374, "learning_rate": 2.5594513431902872e-06, "loss": 0.0609, "step": 146365 }, { "epoch": 0.6107351186253974, "grad_norm": 0.9663232369683321, "learning_rate": 2.5594076270823437e-06, "loss": 0.0272, "step": 146370 }, { "epoch": 0.6107559813403877, "grad_norm": 0.7810171483837475, "learning_rate": 2.5593639132143716e-06, "loss": 0.0305, "step": 146375 }, { "epoch": 0.610776844055378, "grad_norm": 0.8393344220920117, "learning_rate": 2.55932020158618e-06, "loss": 0.026, "step": 146380 }, { "epoch": 0.6107977067703683, "grad_norm": 0.3572717297578666, "learning_rate": 2.559276492197578e-06, "loss": 0.016, "step": 146385 }, { "epoch": 0.6108185694853585, "grad_norm": 0.5813225264567008, "learning_rate": 2.5592327850483734e-06, "loss": 0.021, "step": 146390 }, { "epoch": 0.6108394322003489, "grad_norm": 0.7526163914865301, "learning_rate": 2.5591890801383757e-06, "loss": 0.0245, "step": 146395 }, { "epoch": 0.6108602949153391, "grad_norm": 0.694221533286652, "learning_rate": 2.559145377467393e-06, "loss": 0.0223, "step": 146400 }, { "epoch": 0.6108811576303294, "grad_norm": 0.4558698059414301, "learning_rate": 2.559101677035235e-06, "loss": 0.0258, "step": 146405 }, { "epoch": 0.6109020203453197, "grad_norm": 0.7251801716282904, "learning_rate": 2.55905797884171e-06, "loss": 0.0201, "step": 146410 }, { "epoch": 0.61092288306031, "grad_norm": 2.293404341073388, "learning_rate": 2.559014282886627e-06, "loss": 0.0331, "step": 146415 }, { "epoch": 0.6109437457753002, "grad_norm": 0.7646034269548686, "learning_rate": 2.5589705891697953e-06, "loss": 0.0263, "step": 146420 }, { "epoch": 0.6109646084902904, "grad_norm": 0.6365531153761019, "learning_rate": 2.5589268976910233e-06, "loss": 0.0218, "step": 146425 }, { "epoch": 0.6109854712052808, "grad_norm": 0.3736517587186285, "learning_rate": 2.55888320845012e-06, "loss": 0.0193, "step": 146430 }, { "epoch": 0.611006333920271, "grad_norm": 0.5567156996363826, "learning_rate": 2.5588395214468942e-06, "loss": 0.0163, "step": 146435 }, { "epoch": 0.6110271966352613, "grad_norm": 0.8397754678147202, "learning_rate": 2.5587958366811548e-06, "loss": 0.0283, "step": 146440 }, { "epoch": 0.6110480593502516, "grad_norm": 2.0468661124996133, "learning_rate": 2.5587521541527122e-06, "loss": 0.0222, "step": 146445 }, { "epoch": 0.6110689220652419, "grad_norm": 1.0728611126932168, "learning_rate": 2.558708473861374e-06, "loss": 0.0205, "step": 146450 }, { "epoch": 0.6110897847802321, "grad_norm": 0.7166079088474308, "learning_rate": 2.558664795806949e-06, "loss": 0.0189, "step": 146455 }, { "epoch": 0.6111106474952225, "grad_norm": 0.7602636134134899, "learning_rate": 2.5586211199892472e-06, "loss": 0.0225, "step": 146460 }, { "epoch": 0.6111315102102127, "grad_norm": 0.38987266523708997, "learning_rate": 2.558577446408078e-06, "loss": 0.0194, "step": 146465 }, { "epoch": 0.611152372925203, "grad_norm": 0.4931006187205306, "learning_rate": 2.5585337750632496e-06, "loss": 0.0153, "step": 146470 }, { "epoch": 0.6111732356401933, "grad_norm": 0.5217327021163154, "learning_rate": 2.5584901059545716e-06, "loss": 0.0244, "step": 146475 }, { "epoch": 0.6111940983551836, "grad_norm": 0.9449674142840797, "learning_rate": 2.5584464390818525e-06, "loss": 0.0292, "step": 146480 }, { "epoch": 0.6112149610701738, "grad_norm": 0.5009732647127028, "learning_rate": 2.558402774444903e-06, "loss": 0.0216, "step": 146485 }, { "epoch": 0.6112358237851641, "grad_norm": 0.7381564227362234, "learning_rate": 2.558359112043531e-06, "loss": 0.0209, "step": 146490 }, { "epoch": 0.6112566865001544, "grad_norm": 0.7439744666660506, "learning_rate": 2.558315451877546e-06, "loss": 0.0214, "step": 146495 }, { "epoch": 0.6112775492151447, "grad_norm": 0.5435526671969964, "learning_rate": 2.5582717939467577e-06, "loss": 0.02, "step": 146500 }, { "epoch": 0.6112984119301349, "grad_norm": 1.1251021965723662, "learning_rate": 2.5582281382509747e-06, "loss": 0.0254, "step": 146505 }, { "epoch": 0.6113192746451253, "grad_norm": 0.6492423770289708, "learning_rate": 2.5581844847900073e-06, "loss": 0.0209, "step": 146510 }, { "epoch": 0.6113401373601155, "grad_norm": 0.7858186916092135, "learning_rate": 2.5581408335636643e-06, "loss": 0.0181, "step": 146515 }, { "epoch": 0.6113610000751057, "grad_norm": 0.354839356475663, "learning_rate": 2.558097184571754e-06, "loss": 0.0222, "step": 146520 }, { "epoch": 0.6113818627900961, "grad_norm": 0.681640088491781, "learning_rate": 2.5580535378140876e-06, "loss": 0.0321, "step": 146525 }, { "epoch": 0.6114027255050863, "grad_norm": 0.48182670304328495, "learning_rate": 2.5580098932904735e-06, "loss": 0.0247, "step": 146530 }, { "epoch": 0.6114235882200766, "grad_norm": 0.716387226496993, "learning_rate": 2.5579662510007212e-06, "loss": 0.025, "step": 146535 }, { "epoch": 0.6114444509350669, "grad_norm": 0.3741897238175018, "learning_rate": 2.5579226109446405e-06, "loss": 0.0256, "step": 146540 }, { "epoch": 0.6114653136500572, "grad_norm": 0.7115046315513296, "learning_rate": 2.5578789731220405e-06, "loss": 0.019, "step": 146545 }, { "epoch": 0.6114861763650474, "grad_norm": 0.6431354002372696, "learning_rate": 2.557835337532731e-06, "loss": 0.0212, "step": 146550 }, { "epoch": 0.6115070390800377, "grad_norm": 1.1099216168290673, "learning_rate": 2.5577917041765203e-06, "loss": 0.0186, "step": 146555 }, { "epoch": 0.611527901795028, "grad_norm": 0.5328879960625558, "learning_rate": 2.5577480730532202e-06, "loss": 0.0198, "step": 146560 }, { "epoch": 0.6115487645100183, "grad_norm": 1.5202557000758143, "learning_rate": 2.5577044441626385e-06, "loss": 0.0199, "step": 146565 }, { "epoch": 0.6115696272250085, "grad_norm": 0.4571493941923822, "learning_rate": 2.5576608175045853e-06, "loss": 0.0164, "step": 146570 }, { "epoch": 0.6115904899399989, "grad_norm": 0.5016460765553995, "learning_rate": 2.55761719307887e-06, "loss": 0.0172, "step": 146575 }, { "epoch": 0.6116113526549891, "grad_norm": 0.6230327268927609, "learning_rate": 2.5575735708853027e-06, "loss": 0.0188, "step": 146580 }, { "epoch": 0.6116322153699794, "grad_norm": 1.4936171208613302, "learning_rate": 2.557529950923693e-06, "loss": 0.0267, "step": 146585 }, { "epoch": 0.6116530780849697, "grad_norm": 0.6252537191671753, "learning_rate": 2.55748633319385e-06, "loss": 0.0176, "step": 146590 }, { "epoch": 0.61167394079996, "grad_norm": 0.900330192738454, "learning_rate": 2.557442717695584e-06, "loss": 0.0274, "step": 146595 }, { "epoch": 0.6116948035149502, "grad_norm": 1.08970639852037, "learning_rate": 2.5573991044287045e-06, "loss": 0.0236, "step": 146600 }, { "epoch": 0.6117156662299404, "grad_norm": 0.813016813586262, "learning_rate": 2.5573554933930204e-06, "loss": 0.0169, "step": 146605 }, { "epoch": 0.6117365289449308, "grad_norm": 0.7240892992128874, "learning_rate": 2.557311884588343e-06, "loss": 0.0217, "step": 146610 }, { "epoch": 0.611757391659921, "grad_norm": 0.8164483303062938, "learning_rate": 2.557268278014482e-06, "loss": 0.0169, "step": 146615 }, { "epoch": 0.6117782543749113, "grad_norm": 1.036812476475081, "learning_rate": 2.557224673671246e-06, "loss": 0.0313, "step": 146620 }, { "epoch": 0.6117991170899016, "grad_norm": 0.7187004394471337, "learning_rate": 2.557181071558445e-06, "loss": 0.0222, "step": 146625 }, { "epoch": 0.6118199798048919, "grad_norm": 0.9821723056020382, "learning_rate": 2.5571374716758896e-06, "loss": 0.0288, "step": 146630 }, { "epoch": 0.6118408425198821, "grad_norm": 0.596371362799076, "learning_rate": 2.5570938740233896e-06, "loss": 0.0243, "step": 146635 }, { "epoch": 0.6118617052348725, "grad_norm": 0.46260245648606674, "learning_rate": 2.5570502786007538e-06, "loss": 0.0156, "step": 146640 }, { "epoch": 0.6118825679498627, "grad_norm": 0.5984798693430193, "learning_rate": 2.557006685407794e-06, "loss": 0.0224, "step": 146645 }, { "epoch": 0.611903430664853, "grad_norm": 0.8321790804793282, "learning_rate": 2.5569630944443188e-06, "loss": 0.0195, "step": 146650 }, { "epoch": 0.6119242933798433, "grad_norm": 1.0544877091698655, "learning_rate": 2.556919505710138e-06, "loss": 0.0214, "step": 146655 }, { "epoch": 0.6119451560948336, "grad_norm": 0.59013804260349, "learning_rate": 2.556875919205063e-06, "loss": 0.0261, "step": 146660 }, { "epoch": 0.6119660188098238, "grad_norm": 0.352954532085731, "learning_rate": 2.556832334928902e-06, "loss": 0.0205, "step": 146665 }, { "epoch": 0.6119868815248141, "grad_norm": 0.5082230469241319, "learning_rate": 2.5567887528814666e-06, "loss": 0.0178, "step": 146670 }, { "epoch": 0.6120077442398044, "grad_norm": 1.4466945185423972, "learning_rate": 2.556745173062566e-06, "loss": 0.0287, "step": 146675 }, { "epoch": 0.6120286069547947, "grad_norm": 1.428258440941572, "learning_rate": 2.5567015954720104e-06, "loss": 0.0401, "step": 146680 }, { "epoch": 0.6120494696697849, "grad_norm": 0.7855097396523899, "learning_rate": 2.5566580201096097e-06, "loss": 0.025, "step": 146685 }, { "epoch": 0.6120703323847753, "grad_norm": 0.9376637259913506, "learning_rate": 2.5566144469751745e-06, "loss": 0.0171, "step": 146690 }, { "epoch": 0.6120911950997655, "grad_norm": 0.7405835495222974, "learning_rate": 2.556570876068515e-06, "loss": 0.0295, "step": 146695 }, { "epoch": 0.6121120578147558, "grad_norm": 0.40506535175561664, "learning_rate": 2.556527307389441e-06, "loss": 0.0268, "step": 146700 }, { "epoch": 0.6121329205297461, "grad_norm": 0.7643860843749766, "learning_rate": 2.5564837409377623e-06, "loss": 0.0303, "step": 146705 }, { "epoch": 0.6121537832447363, "grad_norm": 0.2653525620003486, "learning_rate": 2.55644017671329e-06, "loss": 0.0175, "step": 146710 }, { "epoch": 0.6121746459597266, "grad_norm": 0.5938468461263269, "learning_rate": 2.5563966147158343e-06, "loss": 0.0242, "step": 146715 }, { "epoch": 0.612195508674717, "grad_norm": 0.36186463549383013, "learning_rate": 2.5563530549452053e-06, "loss": 0.0188, "step": 146720 }, { "epoch": 0.6122163713897072, "grad_norm": 0.7528522669424417, "learning_rate": 2.5563094974012124e-06, "loss": 0.0275, "step": 146725 }, { "epoch": 0.6122372341046974, "grad_norm": 0.865128584285616, "learning_rate": 2.5562659420836673e-06, "loss": 0.0245, "step": 146730 }, { "epoch": 0.6122580968196877, "grad_norm": 1.6080945949306897, "learning_rate": 2.5562223889923792e-06, "loss": 0.0225, "step": 146735 }, { "epoch": 0.612278959534678, "grad_norm": 0.6452685383580319, "learning_rate": 2.5561788381271594e-06, "loss": 0.0256, "step": 146740 }, { "epoch": 0.6122998222496683, "grad_norm": 0.39381345951774693, "learning_rate": 2.5561352894878173e-06, "loss": 0.0191, "step": 146745 }, { "epoch": 0.6123206849646585, "grad_norm": 0.4881887449178627, "learning_rate": 2.5560917430741643e-06, "loss": 0.0204, "step": 146750 }, { "epoch": 0.6123415476796489, "grad_norm": 0.699270476842884, "learning_rate": 2.5560481988860103e-06, "loss": 0.0216, "step": 146755 }, { "epoch": 0.6123624103946391, "grad_norm": 0.42233834530968034, "learning_rate": 2.556004656923165e-06, "loss": 0.0207, "step": 146760 }, { "epoch": 0.6123832731096294, "grad_norm": 0.9339194688714879, "learning_rate": 2.5559611171854406e-06, "loss": 0.028, "step": 146765 }, { "epoch": 0.6124041358246197, "grad_norm": 0.6907189336277679, "learning_rate": 2.555917579672646e-06, "loss": 0.0188, "step": 146770 }, { "epoch": 0.61242499853961, "grad_norm": 0.6352213131184272, "learning_rate": 2.5558740443845926e-06, "loss": 0.019, "step": 146775 }, { "epoch": 0.6124458612546002, "grad_norm": 1.1065547578987582, "learning_rate": 2.5558305113210907e-06, "loss": 0.0271, "step": 146780 }, { "epoch": 0.6124667239695905, "grad_norm": 0.5756017243619557, "learning_rate": 2.5557869804819507e-06, "loss": 0.028, "step": 146785 }, { "epoch": 0.6124875866845808, "grad_norm": 0.24997741146780839, "learning_rate": 2.5557434518669832e-06, "loss": 0.0243, "step": 146790 }, { "epoch": 0.612508449399571, "grad_norm": 0.6652193415452935, "learning_rate": 2.5556999254759995e-06, "loss": 0.0313, "step": 146795 }, { "epoch": 0.6125293121145613, "grad_norm": 0.5066665951251558, "learning_rate": 2.5556564013088092e-06, "loss": 0.0168, "step": 146800 }, { "epoch": 0.6125501748295517, "grad_norm": 0.6576863778276526, "learning_rate": 2.5556128793652236e-06, "loss": 0.0277, "step": 146805 }, { "epoch": 0.6125710375445419, "grad_norm": 0.5190752347884796, "learning_rate": 2.5555693596450525e-06, "loss": 0.0247, "step": 146810 }, { "epoch": 0.6125919002595321, "grad_norm": 0.5880547120242235, "learning_rate": 2.5555258421481082e-06, "loss": 0.0319, "step": 146815 }, { "epoch": 0.6126127629745225, "grad_norm": 0.3865974868456188, "learning_rate": 2.5554823268742e-06, "loss": 0.0131, "step": 146820 }, { "epoch": 0.6126336256895127, "grad_norm": 0.8429480067288284, "learning_rate": 2.5554388138231388e-06, "loss": 0.0282, "step": 146825 }, { "epoch": 0.612654488404503, "grad_norm": 0.778885864936508, "learning_rate": 2.555395302994736e-06, "loss": 0.0213, "step": 146830 }, { "epoch": 0.6126753511194933, "grad_norm": 1.0553761771556096, "learning_rate": 2.5553517943888025e-06, "loss": 0.0309, "step": 146835 }, { "epoch": 0.6126962138344836, "grad_norm": 0.8141423567693958, "learning_rate": 2.5553082880051477e-06, "loss": 0.0282, "step": 146840 }, { "epoch": 0.6127170765494738, "grad_norm": 0.6123289368140544, "learning_rate": 2.555264783843584e-06, "loss": 0.0228, "step": 146845 }, { "epoch": 0.6127379392644641, "grad_norm": 0.9080931983972482, "learning_rate": 2.555221281903921e-06, "loss": 0.0239, "step": 146850 }, { "epoch": 0.6127588019794544, "grad_norm": 0.6294704469597578, "learning_rate": 2.5551777821859714e-06, "loss": 0.0198, "step": 146855 }, { "epoch": 0.6127796646944447, "grad_norm": 0.9040375587904574, "learning_rate": 2.555134284689544e-06, "loss": 0.0234, "step": 146860 }, { "epoch": 0.6128005274094349, "grad_norm": 0.7748501229962763, "learning_rate": 2.555090789414451e-06, "loss": 0.0227, "step": 146865 }, { "epoch": 0.6128213901244253, "grad_norm": 0.8151291832789282, "learning_rate": 2.555047296360503e-06, "loss": 0.026, "step": 146870 }, { "epoch": 0.6128422528394155, "grad_norm": 1.3727884912231672, "learning_rate": 2.5550038055275108e-06, "loss": 0.0251, "step": 146875 }, { "epoch": 0.6128631155544058, "grad_norm": 0.5968785905037715, "learning_rate": 2.554960316915285e-06, "loss": 0.0216, "step": 146880 }, { "epoch": 0.6128839782693961, "grad_norm": 0.5113813201005816, "learning_rate": 2.5549168305236376e-06, "loss": 0.0182, "step": 146885 }, { "epoch": 0.6129048409843864, "grad_norm": 0.9567251013119727, "learning_rate": 2.5548733463523797e-06, "loss": 0.0237, "step": 146890 }, { "epoch": 0.6129257036993766, "grad_norm": 0.6315659874922824, "learning_rate": 2.554829864401321e-06, "loss": 0.0252, "step": 146895 }, { "epoch": 0.6129465664143668, "grad_norm": 0.5815571063464013, "learning_rate": 2.554786384670273e-06, "loss": 0.02, "step": 146900 }, { "epoch": 0.6129674291293572, "grad_norm": 1.0208691008711201, "learning_rate": 2.5547429071590484e-06, "loss": 0.0197, "step": 146905 }, { "epoch": 0.6129882918443474, "grad_norm": 1.0658368756881387, "learning_rate": 2.554699431867456e-06, "loss": 0.0303, "step": 146910 }, { "epoch": 0.6130091545593377, "grad_norm": 0.9198941364164053, "learning_rate": 2.554655958795309e-06, "loss": 0.0278, "step": 146915 }, { "epoch": 0.613030017274328, "grad_norm": 0.49194192316585, "learning_rate": 2.5546124879424166e-06, "loss": 0.0144, "step": 146920 }, { "epoch": 0.6130508799893183, "grad_norm": 0.9041707888819757, "learning_rate": 2.554569019308592e-06, "loss": 0.0233, "step": 146925 }, { "epoch": 0.6130717427043085, "grad_norm": 0.678353556567284, "learning_rate": 2.5545255528936445e-06, "loss": 0.0208, "step": 146930 }, { "epoch": 0.6130926054192989, "grad_norm": 0.80010437826064, "learning_rate": 2.554482088697387e-06, "loss": 0.0232, "step": 146935 }, { "epoch": 0.6131134681342891, "grad_norm": 0.5491736125982827, "learning_rate": 2.5544386267196297e-06, "loss": 0.0196, "step": 146940 }, { "epoch": 0.6131343308492794, "grad_norm": 15.068076026450948, "learning_rate": 2.5543951669601848e-06, "loss": 0.024, "step": 146945 }, { "epoch": 0.6131551935642697, "grad_norm": 0.9588500369907001, "learning_rate": 2.5543517094188617e-06, "loss": 0.0226, "step": 146950 }, { "epoch": 0.61317605627926, "grad_norm": 0.816388323560686, "learning_rate": 2.554308254095474e-06, "loss": 0.0291, "step": 146955 }, { "epoch": 0.6131969189942502, "grad_norm": 1.3617675072342799, "learning_rate": 2.5542648009898313e-06, "loss": 0.0318, "step": 146960 }, { "epoch": 0.6132177817092405, "grad_norm": 0.6851670576223555, "learning_rate": 2.5542213501017466e-06, "loss": 0.0255, "step": 146965 }, { "epoch": 0.6132386444242308, "grad_norm": 0.32294618537871794, "learning_rate": 2.55417790143103e-06, "loss": 0.028, "step": 146970 }, { "epoch": 0.6132595071392211, "grad_norm": 0.7959152458325102, "learning_rate": 2.5541344549774926e-06, "loss": 0.0213, "step": 146975 }, { "epoch": 0.6132803698542113, "grad_norm": 0.9480838308731919, "learning_rate": 2.5540910107409476e-06, "loss": 0.023, "step": 146980 }, { "epoch": 0.6133012325692017, "grad_norm": 0.6725226623549693, "learning_rate": 2.554047568721205e-06, "loss": 0.0186, "step": 146985 }, { "epoch": 0.6133220952841919, "grad_norm": 0.618653787318722, "learning_rate": 2.5540041289180767e-06, "loss": 0.0234, "step": 146990 }, { "epoch": 0.6133429579991821, "grad_norm": 0.6329746189008925, "learning_rate": 2.553960691331374e-06, "loss": 0.021, "step": 146995 }, { "epoch": 0.6133638207141725, "grad_norm": 0.47504937300463584, "learning_rate": 2.553917255960909e-06, "loss": 0.0183, "step": 147000 }, { "epoch": 0.6133846834291627, "grad_norm": 0.429926923221758, "learning_rate": 2.553873822806493e-06, "loss": 0.0273, "step": 147005 }, { "epoch": 0.613405546144153, "grad_norm": 0.3375709311930796, "learning_rate": 2.5538303918679375e-06, "loss": 0.0288, "step": 147010 }, { "epoch": 0.6134264088591433, "grad_norm": 0.7363201545226369, "learning_rate": 2.5537869631450534e-06, "loss": 0.0211, "step": 147015 }, { "epoch": 0.6134472715741336, "grad_norm": 0.7754883366568862, "learning_rate": 2.5537435366376533e-06, "loss": 0.0218, "step": 147020 }, { "epoch": 0.6134681342891238, "grad_norm": 0.3014032895954698, "learning_rate": 2.5537001123455486e-06, "loss": 0.0177, "step": 147025 }, { "epoch": 0.6134889970041141, "grad_norm": 0.892857184223285, "learning_rate": 2.5536566902685505e-06, "loss": 0.0196, "step": 147030 }, { "epoch": 0.6135098597191044, "grad_norm": 0.8323029070299892, "learning_rate": 2.5536132704064713e-06, "loss": 0.0202, "step": 147035 }, { "epoch": 0.6135307224340947, "grad_norm": 0.9234575745445339, "learning_rate": 2.5535698527591223e-06, "loss": 0.0243, "step": 147040 }, { "epoch": 0.6135515851490849, "grad_norm": 0.2979946776998316, "learning_rate": 2.553526437326316e-06, "loss": 0.0136, "step": 147045 }, { "epoch": 0.6135724478640753, "grad_norm": 0.8011996121335749, "learning_rate": 2.5534830241078625e-06, "loss": 0.0284, "step": 147050 }, { "epoch": 0.6135933105790655, "grad_norm": 0.7097231664081121, "learning_rate": 2.5534396131035756e-06, "loss": 0.0203, "step": 147055 }, { "epoch": 0.6136141732940558, "grad_norm": 1.2356335936122387, "learning_rate": 2.5533962043132656e-06, "loss": 0.0211, "step": 147060 }, { "epoch": 0.6136350360090461, "grad_norm": 0.578374708553448, "learning_rate": 2.5533527977367443e-06, "loss": 0.0224, "step": 147065 }, { "epoch": 0.6136558987240364, "grad_norm": 0.4229758180397092, "learning_rate": 2.5533093933738247e-06, "loss": 0.0274, "step": 147070 }, { "epoch": 0.6136767614390266, "grad_norm": 1.9501745894217304, "learning_rate": 2.5532659912243174e-06, "loss": 0.0339, "step": 147075 }, { "epoch": 0.6136976241540169, "grad_norm": 0.8507513710852872, "learning_rate": 2.5532225912880355e-06, "loss": 0.0307, "step": 147080 }, { "epoch": 0.6137184868690072, "grad_norm": 0.8421476617871914, "learning_rate": 2.55317919356479e-06, "loss": 0.0239, "step": 147085 }, { "epoch": 0.6137393495839975, "grad_norm": 0.7033673296787669, "learning_rate": 2.553135798054393e-06, "loss": 0.0267, "step": 147090 }, { "epoch": 0.6137602122989877, "grad_norm": 0.7798615835954834, "learning_rate": 2.553092404756657e-06, "loss": 0.0194, "step": 147095 }, { "epoch": 0.613781075013978, "grad_norm": 0.7846581863279598, "learning_rate": 2.5530490136713932e-06, "loss": 0.0273, "step": 147100 }, { "epoch": 0.6138019377289683, "grad_norm": 0.8259257981727788, "learning_rate": 2.5530056247984137e-06, "loss": 0.0159, "step": 147105 }, { "epoch": 0.6138228004439585, "grad_norm": 0.9926237972047949, "learning_rate": 2.5529622381375314e-06, "loss": 0.0233, "step": 147110 }, { "epoch": 0.6138436631589489, "grad_norm": 0.9434046473639498, "learning_rate": 2.5529188536885576e-06, "loss": 0.0209, "step": 147115 }, { "epoch": 0.6138645258739391, "grad_norm": 0.4476163135751404, "learning_rate": 2.5528754714513036e-06, "loss": 0.0186, "step": 147120 }, { "epoch": 0.6138853885889294, "grad_norm": 1.4903080817163754, "learning_rate": 2.5528320914255832e-06, "loss": 0.0314, "step": 147125 }, { "epoch": 0.6139062513039197, "grad_norm": 0.5785692317944393, "learning_rate": 2.552788713611207e-06, "loss": 0.0251, "step": 147130 }, { "epoch": 0.61392711401891, "grad_norm": 0.7941996964740661, "learning_rate": 2.5527453380079884e-06, "loss": 0.027, "step": 147135 }, { "epoch": 0.6139479767339002, "grad_norm": 0.7344530942880039, "learning_rate": 2.5527019646157392e-06, "loss": 0.029, "step": 147140 }, { "epoch": 0.6139688394488905, "grad_norm": 0.4897493195319167, "learning_rate": 2.5526585934342706e-06, "loss": 0.0201, "step": 147145 }, { "epoch": 0.6139897021638808, "grad_norm": 0.67143938892978, "learning_rate": 2.552615224463396e-06, "loss": 0.0232, "step": 147150 }, { "epoch": 0.6140105648788711, "grad_norm": 0.8105939142865435, "learning_rate": 2.5525718577029266e-06, "loss": 0.035, "step": 147155 }, { "epoch": 0.6140314275938613, "grad_norm": 0.42992295449590373, "learning_rate": 2.5525284931526756e-06, "loss": 0.0185, "step": 147160 }, { "epoch": 0.6140522903088517, "grad_norm": 0.7099569484767103, "learning_rate": 2.5524851308124547e-06, "loss": 0.0245, "step": 147165 }, { "epoch": 0.6140731530238419, "grad_norm": 0.8422374964420972, "learning_rate": 2.5524417706820763e-06, "loss": 0.0229, "step": 147170 }, { "epoch": 0.6140940157388322, "grad_norm": 0.9774552709113556, "learning_rate": 2.5523984127613527e-06, "loss": 0.024, "step": 147175 }, { "epoch": 0.6141148784538225, "grad_norm": 1.1663744092412685, "learning_rate": 2.5523550570500967e-06, "loss": 0.0264, "step": 147180 }, { "epoch": 0.6141357411688128, "grad_norm": 0.5193584697173861, "learning_rate": 2.552311703548119e-06, "loss": 0.0222, "step": 147185 }, { "epoch": 0.614156603883803, "grad_norm": 0.45220686682330624, "learning_rate": 2.5522683522552344e-06, "loss": 0.0206, "step": 147190 }, { "epoch": 0.6141774665987934, "grad_norm": 0.5914585013340575, "learning_rate": 2.5522250031712536e-06, "loss": 0.0162, "step": 147195 }, { "epoch": 0.6141983293137836, "grad_norm": 0.40757096924206737, "learning_rate": 2.55218165629599e-06, "loss": 0.0193, "step": 147200 }, { "epoch": 0.6142191920287738, "grad_norm": 0.8235996503764237, "learning_rate": 2.552138311629255e-06, "loss": 0.0266, "step": 147205 }, { "epoch": 0.6142400547437641, "grad_norm": 0.4860435143920354, "learning_rate": 2.5520949691708617e-06, "loss": 0.0146, "step": 147210 }, { "epoch": 0.6142609174587544, "grad_norm": 1.0848333895996207, "learning_rate": 2.5520516289206227e-06, "loss": 0.0261, "step": 147215 }, { "epoch": 0.6142817801737447, "grad_norm": 0.5300306731996902, "learning_rate": 2.55200829087835e-06, "loss": 0.0151, "step": 147220 }, { "epoch": 0.6143026428887349, "grad_norm": 0.42272823727570474, "learning_rate": 2.5519649550438567e-06, "loss": 0.0207, "step": 147225 }, { "epoch": 0.6143235056037253, "grad_norm": 0.6480958693186917, "learning_rate": 2.551921621416955e-06, "loss": 0.0198, "step": 147230 }, { "epoch": 0.6143443683187155, "grad_norm": 1.1669381234825296, "learning_rate": 2.551878289997458e-06, "loss": 0.0296, "step": 147235 }, { "epoch": 0.6143652310337058, "grad_norm": 0.5902065707906055, "learning_rate": 2.551834960785177e-06, "loss": 0.019, "step": 147240 }, { "epoch": 0.6143860937486961, "grad_norm": 0.5769114899884821, "learning_rate": 2.5517916337799262e-06, "loss": 0.0218, "step": 147245 }, { "epoch": 0.6144069564636864, "grad_norm": 0.5046632478263223, "learning_rate": 2.5517483089815176e-06, "loss": 0.0231, "step": 147250 }, { "epoch": 0.6144278191786766, "grad_norm": 0.5627514879798987, "learning_rate": 2.5517049863897636e-06, "loss": 0.02, "step": 147255 }, { "epoch": 0.6144486818936669, "grad_norm": 1.3425648097310992, "learning_rate": 2.5516616660044767e-06, "loss": 0.0251, "step": 147260 }, { "epoch": 0.6144695446086572, "grad_norm": 1.1821625046062227, "learning_rate": 2.55161834782547e-06, "loss": 0.0236, "step": 147265 }, { "epoch": 0.6144904073236475, "grad_norm": 0.6464276287956694, "learning_rate": 2.5515750318525573e-06, "loss": 0.0199, "step": 147270 }, { "epoch": 0.6145112700386377, "grad_norm": 0.9573765287217337, "learning_rate": 2.551531718085549e-06, "loss": 0.0218, "step": 147275 }, { "epoch": 0.6145321327536281, "grad_norm": 0.3813629684731585, "learning_rate": 2.55148840652426e-06, "loss": 0.0238, "step": 147280 }, { "epoch": 0.6145529954686183, "grad_norm": 0.442893073641584, "learning_rate": 2.551445097168502e-06, "loss": 0.0248, "step": 147285 }, { "epoch": 0.6145738581836085, "grad_norm": 0.6066536013681592, "learning_rate": 2.551401790018088e-06, "loss": 0.0328, "step": 147290 }, { "epoch": 0.6145947208985989, "grad_norm": 0.8476258093347492, "learning_rate": 2.551358485072831e-06, "loss": 0.0227, "step": 147295 }, { "epoch": 0.6146155836135891, "grad_norm": 1.202339218716502, "learning_rate": 2.5513151823325437e-06, "loss": 0.0304, "step": 147300 }, { "epoch": 0.6146364463285794, "grad_norm": 0.6814234738603339, "learning_rate": 2.5512718817970394e-06, "loss": 0.0263, "step": 147305 }, { "epoch": 0.6146573090435697, "grad_norm": 0.9557087118368788, "learning_rate": 2.5512285834661305e-06, "loss": 0.0252, "step": 147310 }, { "epoch": 0.61467817175856, "grad_norm": 0.6962165994650146, "learning_rate": 2.5511852873396296e-06, "loss": 0.0209, "step": 147315 }, { "epoch": 0.6146990344735502, "grad_norm": 0.3864202886338878, "learning_rate": 2.551141993417351e-06, "loss": 0.0284, "step": 147320 }, { "epoch": 0.6147198971885405, "grad_norm": 0.9344193520362531, "learning_rate": 2.551098701699107e-06, "loss": 0.0375, "step": 147325 }, { "epoch": 0.6147407599035308, "grad_norm": 0.8634162136223598, "learning_rate": 2.5510554121847096e-06, "loss": 0.0266, "step": 147330 }, { "epoch": 0.6147616226185211, "grad_norm": 0.7491617006239435, "learning_rate": 2.5510121248739726e-06, "loss": 0.0218, "step": 147335 }, { "epoch": 0.6147824853335113, "grad_norm": 0.6675949526688674, "learning_rate": 2.55096883976671e-06, "loss": 0.0227, "step": 147340 }, { "epoch": 0.6148033480485017, "grad_norm": 1.1699057001182416, "learning_rate": 2.5509255568627335e-06, "loss": 0.0307, "step": 147345 }, { "epoch": 0.6148242107634919, "grad_norm": 0.40398926160950566, "learning_rate": 2.5508822761618566e-06, "loss": 0.0181, "step": 147350 }, { "epoch": 0.6148450734784822, "grad_norm": 0.3663627019167127, "learning_rate": 2.5508389976638927e-06, "loss": 0.0158, "step": 147355 }, { "epoch": 0.6148659361934725, "grad_norm": 1.0992246894156352, "learning_rate": 2.5507957213686544e-06, "loss": 0.036, "step": 147360 }, { "epoch": 0.6148867989084628, "grad_norm": 0.4104015145503954, "learning_rate": 2.5507524472759555e-06, "loss": 0.0145, "step": 147365 }, { "epoch": 0.614907661623453, "grad_norm": 0.7481025682720641, "learning_rate": 2.5507091753856087e-06, "loss": 0.0209, "step": 147370 }, { "epoch": 0.6149285243384434, "grad_norm": 0.5826599957456898, "learning_rate": 2.5506659056974277e-06, "loss": 0.0193, "step": 147375 }, { "epoch": 0.6149493870534336, "grad_norm": 0.5622193911659665, "learning_rate": 2.550622638211225e-06, "loss": 0.0181, "step": 147380 }, { "epoch": 0.6149702497684238, "grad_norm": 0.7100348920861445, "learning_rate": 2.550579372926814e-06, "loss": 0.0199, "step": 147385 }, { "epoch": 0.6149911124834141, "grad_norm": 0.4784887887766373, "learning_rate": 2.5505361098440086e-06, "loss": 0.0191, "step": 147390 }, { "epoch": 0.6150119751984044, "grad_norm": 0.6637873444051253, "learning_rate": 2.5504928489626215e-06, "loss": 0.0351, "step": 147395 }, { "epoch": 0.6150328379133947, "grad_norm": 1.1943050550232137, "learning_rate": 2.5504495902824663e-06, "loss": 0.0183, "step": 147400 }, { "epoch": 0.6150537006283849, "grad_norm": 0.653804728961459, "learning_rate": 2.5504063338033563e-06, "loss": 0.0279, "step": 147405 }, { "epoch": 0.6150745633433753, "grad_norm": 1.0714734491574545, "learning_rate": 2.550363079525104e-06, "loss": 0.0225, "step": 147410 }, { "epoch": 0.6150954260583655, "grad_norm": 0.5462426865695811, "learning_rate": 2.5503198274475243e-06, "loss": 0.0211, "step": 147415 }, { "epoch": 0.6151162887733558, "grad_norm": 0.7345056457002482, "learning_rate": 2.5502765775704297e-06, "loss": 0.0314, "step": 147420 }, { "epoch": 0.6151371514883461, "grad_norm": 0.6465100317325656, "learning_rate": 2.550233329893633e-06, "loss": 0.0203, "step": 147425 }, { "epoch": 0.6151580142033364, "grad_norm": 1.2680994946187036, "learning_rate": 2.5501900844169496e-06, "loss": 0.034, "step": 147430 }, { "epoch": 0.6151788769183266, "grad_norm": 0.7945618760977001, "learning_rate": 2.5501468411401907e-06, "loss": 0.0208, "step": 147435 }, { "epoch": 0.6151997396333169, "grad_norm": 0.4815763065951372, "learning_rate": 2.550103600063172e-06, "loss": 0.0209, "step": 147440 }, { "epoch": 0.6152206023483072, "grad_norm": 0.9089946860681105, "learning_rate": 2.550060361185705e-06, "loss": 0.018, "step": 147445 }, { "epoch": 0.6152414650632975, "grad_norm": 0.9282909226986746, "learning_rate": 2.5500171245076044e-06, "loss": 0.0198, "step": 147450 }, { "epoch": 0.6152623277782877, "grad_norm": 0.6576308400395106, "learning_rate": 2.5499738900286836e-06, "loss": 0.0167, "step": 147455 }, { "epoch": 0.6152831904932781, "grad_norm": 0.8706788329861387, "learning_rate": 2.549930657748755e-06, "loss": 0.0286, "step": 147460 }, { "epoch": 0.6153040532082683, "grad_norm": 0.6883060161798796, "learning_rate": 2.5498874276676345e-06, "loss": 0.025, "step": 147465 }, { "epoch": 0.6153249159232586, "grad_norm": 0.866644650742167, "learning_rate": 2.5498441997851335e-06, "loss": 0.0234, "step": 147470 }, { "epoch": 0.6153457786382489, "grad_norm": 0.9729970816885124, "learning_rate": 2.5498009741010676e-06, "loss": 0.0261, "step": 147475 }, { "epoch": 0.6153666413532392, "grad_norm": 0.4590974422845247, "learning_rate": 2.5497577506152483e-06, "loss": 0.0216, "step": 147480 }, { "epoch": 0.6153875040682294, "grad_norm": 0.5230177578220331, "learning_rate": 2.549714529327491e-06, "loss": 0.0192, "step": 147485 }, { "epoch": 0.6154083667832198, "grad_norm": 0.4331950900423631, "learning_rate": 2.5496713102376087e-06, "loss": 0.0207, "step": 147490 }, { "epoch": 0.61542922949821, "grad_norm": 0.5835971418119039, "learning_rate": 2.5496280933454152e-06, "loss": 0.0199, "step": 147495 }, { "epoch": 0.6154500922132002, "grad_norm": 0.9244005127542785, "learning_rate": 2.5495848786507247e-06, "loss": 0.0217, "step": 147500 }, { "epoch": 0.6154709549281905, "grad_norm": 2.272251252115666, "learning_rate": 2.5495416661533507e-06, "loss": 0.023, "step": 147505 }, { "epoch": 0.6154918176431808, "grad_norm": 0.5708827153656768, "learning_rate": 2.549498455853106e-06, "loss": 0.0231, "step": 147510 }, { "epoch": 0.6155126803581711, "grad_norm": 0.5525662855341693, "learning_rate": 2.549455247749806e-06, "loss": 0.0205, "step": 147515 }, { "epoch": 0.6155335430731613, "grad_norm": 0.4576280670899325, "learning_rate": 2.5494120418432637e-06, "loss": 0.0188, "step": 147520 }, { "epoch": 0.6155544057881517, "grad_norm": 0.7433057770156446, "learning_rate": 2.5493688381332933e-06, "loss": 0.0187, "step": 147525 }, { "epoch": 0.6155752685031419, "grad_norm": 0.41965931583754823, "learning_rate": 2.5493256366197085e-06, "loss": 0.0173, "step": 147530 }, { "epoch": 0.6155961312181322, "grad_norm": 0.5633849937444151, "learning_rate": 2.549282437302323e-06, "loss": 0.0226, "step": 147535 }, { "epoch": 0.6156169939331225, "grad_norm": 0.9270857199695319, "learning_rate": 2.5492392401809505e-06, "loss": 0.0197, "step": 147540 }, { "epoch": 0.6156378566481128, "grad_norm": 0.5984148596277555, "learning_rate": 2.549196045255406e-06, "loss": 0.0318, "step": 147545 }, { "epoch": 0.615658719363103, "grad_norm": 0.4676371660200876, "learning_rate": 2.549152852525503e-06, "loss": 0.0189, "step": 147550 }, { "epoch": 0.6156795820780934, "grad_norm": 0.8749687383123772, "learning_rate": 2.549109661991055e-06, "loss": 0.0244, "step": 147555 }, { "epoch": 0.6157004447930836, "grad_norm": 0.4230687698632888, "learning_rate": 2.5490664736518763e-06, "loss": 0.0196, "step": 147560 }, { "epoch": 0.6157213075080739, "grad_norm": 0.5067108226367155, "learning_rate": 2.549023287507781e-06, "loss": 0.018, "step": 147565 }, { "epoch": 0.6157421702230641, "grad_norm": 0.8604463650407912, "learning_rate": 2.5489801035585827e-06, "loss": 0.0248, "step": 147570 }, { "epoch": 0.6157630329380545, "grad_norm": 1.2834480877644343, "learning_rate": 2.5489369218040967e-06, "loss": 0.0235, "step": 147575 }, { "epoch": 0.6157838956530447, "grad_norm": 0.6090253022993989, "learning_rate": 2.5488937422441363e-06, "loss": 0.0264, "step": 147580 }, { "epoch": 0.6158047583680349, "grad_norm": 0.8401646594349584, "learning_rate": 2.5488505648785156e-06, "loss": 0.0314, "step": 147585 }, { "epoch": 0.6158256210830253, "grad_norm": 0.388647833798333, "learning_rate": 2.5488073897070485e-06, "loss": 0.015, "step": 147590 }, { "epoch": 0.6158464837980155, "grad_norm": 0.5114481229566409, "learning_rate": 2.54876421672955e-06, "loss": 0.0207, "step": 147595 }, { "epoch": 0.6158673465130058, "grad_norm": 0.8096866249052755, "learning_rate": 2.5487210459458333e-06, "loss": 0.0189, "step": 147600 }, { "epoch": 0.6158882092279961, "grad_norm": 0.586358197674762, "learning_rate": 2.5486778773557136e-06, "loss": 0.0199, "step": 147605 }, { "epoch": 0.6159090719429864, "grad_norm": 0.9653453639965921, "learning_rate": 2.5486347109590043e-06, "loss": 0.0243, "step": 147610 }, { "epoch": 0.6159299346579766, "grad_norm": 0.384122758311866, "learning_rate": 2.54859154675552e-06, "loss": 0.0195, "step": 147615 }, { "epoch": 0.6159507973729669, "grad_norm": 0.70745140299932, "learning_rate": 2.548548384745075e-06, "loss": 0.0224, "step": 147620 }, { "epoch": 0.6159716600879572, "grad_norm": 1.6191892354232755, "learning_rate": 2.5485052249274843e-06, "loss": 0.025, "step": 147625 }, { "epoch": 0.6159925228029475, "grad_norm": 0.4885907146351756, "learning_rate": 2.548462067302561e-06, "loss": 0.0235, "step": 147630 }, { "epoch": 0.6160133855179377, "grad_norm": 0.5421734415750402, "learning_rate": 2.54841891187012e-06, "loss": 0.0226, "step": 147635 }, { "epoch": 0.6160342482329281, "grad_norm": 0.5231397958146401, "learning_rate": 2.5483757586299752e-06, "loss": 0.0197, "step": 147640 }, { "epoch": 0.6160551109479183, "grad_norm": 0.7107178585970003, "learning_rate": 2.548332607581942e-06, "loss": 0.0249, "step": 147645 }, { "epoch": 0.6160759736629086, "grad_norm": 0.4709811730363153, "learning_rate": 2.548289458725834e-06, "loss": 0.0257, "step": 147650 }, { "epoch": 0.6160968363778989, "grad_norm": 0.4658266737530905, "learning_rate": 2.548246312061466e-06, "loss": 0.026, "step": 147655 }, { "epoch": 0.6161176990928892, "grad_norm": 0.5740252095307262, "learning_rate": 2.548203167588652e-06, "loss": 0.0213, "step": 147660 }, { "epoch": 0.6161385618078794, "grad_norm": 0.8148918907132262, "learning_rate": 2.5481600253072076e-06, "loss": 0.0219, "step": 147665 }, { "epoch": 0.6161594245228698, "grad_norm": 0.4388350292950517, "learning_rate": 2.548116885216946e-06, "loss": 0.0197, "step": 147670 }, { "epoch": 0.61618028723786, "grad_norm": 0.5950591946875702, "learning_rate": 2.5480737473176818e-06, "loss": 0.0189, "step": 147675 }, { "epoch": 0.6162011499528502, "grad_norm": 1.361093507107135, "learning_rate": 2.5480306116092307e-06, "loss": 0.0333, "step": 147680 }, { "epoch": 0.6162220126678405, "grad_norm": 0.2552307360579415, "learning_rate": 2.547987478091406e-06, "loss": 0.0174, "step": 147685 }, { "epoch": 0.6162428753828308, "grad_norm": 0.9936993009375806, "learning_rate": 2.5479443467640236e-06, "loss": 0.0222, "step": 147690 }, { "epoch": 0.6162637380978211, "grad_norm": 0.7549433157387385, "learning_rate": 2.547901217626897e-06, "loss": 0.0233, "step": 147695 }, { "epoch": 0.6162846008128113, "grad_norm": 0.39264274410221467, "learning_rate": 2.547858090679841e-06, "loss": 0.0257, "step": 147700 }, { "epoch": 0.6163054635278017, "grad_norm": 1.3060122655802484, "learning_rate": 2.5478149659226704e-06, "loss": 0.0237, "step": 147705 }, { "epoch": 0.6163263262427919, "grad_norm": 0.7292523016659318, "learning_rate": 2.5477718433552003e-06, "loss": 0.0224, "step": 147710 }, { "epoch": 0.6163471889577822, "grad_norm": 0.9509641048734868, "learning_rate": 2.5477287229772448e-06, "loss": 0.0235, "step": 147715 }, { "epoch": 0.6163680516727725, "grad_norm": 1.4392179719072054, "learning_rate": 2.547685604788619e-06, "loss": 0.0274, "step": 147720 }, { "epoch": 0.6163889143877628, "grad_norm": 1.0798166611462343, "learning_rate": 2.547642488789137e-06, "loss": 0.0238, "step": 147725 }, { "epoch": 0.616409777102753, "grad_norm": 1.0466292539620639, "learning_rate": 2.547599374978615e-06, "loss": 0.0239, "step": 147730 }, { "epoch": 0.6164306398177434, "grad_norm": 0.7369127956007797, "learning_rate": 2.547556263356866e-06, "loss": 0.0247, "step": 147735 }, { "epoch": 0.6164515025327336, "grad_norm": 0.41080872346931024, "learning_rate": 2.547513153923706e-06, "loss": 0.0302, "step": 147740 }, { "epoch": 0.6164723652477239, "grad_norm": 0.7645213472221807, "learning_rate": 2.547470046678949e-06, "loss": 0.0211, "step": 147745 }, { "epoch": 0.6164932279627141, "grad_norm": 0.3888808036635955, "learning_rate": 2.5474269416224106e-06, "loss": 0.0174, "step": 147750 }, { "epoch": 0.6165140906777045, "grad_norm": 1.2395080750462655, "learning_rate": 2.547383838753906e-06, "loss": 0.0236, "step": 147755 }, { "epoch": 0.6165349533926947, "grad_norm": 0.5525900536766628, "learning_rate": 2.547340738073249e-06, "loss": 0.0193, "step": 147760 }, { "epoch": 0.616555816107685, "grad_norm": 1.097023697185492, "learning_rate": 2.547297639580255e-06, "loss": 0.0235, "step": 147765 }, { "epoch": 0.6165766788226753, "grad_norm": 0.81593546926545, "learning_rate": 2.5472545432747386e-06, "loss": 0.0191, "step": 147770 }, { "epoch": 0.6165975415376655, "grad_norm": 0.5996727209496855, "learning_rate": 2.547211449156516e-06, "loss": 0.0216, "step": 147775 }, { "epoch": 0.6166184042526558, "grad_norm": 0.880708392830232, "learning_rate": 2.547168357225401e-06, "loss": 0.0225, "step": 147780 }, { "epoch": 0.6166392669676461, "grad_norm": 0.3844951667828925, "learning_rate": 2.5471252674812087e-06, "loss": 0.0225, "step": 147785 }, { "epoch": 0.6166601296826364, "grad_norm": 0.724046532162915, "learning_rate": 2.547082179923754e-06, "loss": 0.0179, "step": 147790 }, { "epoch": 0.6166809923976266, "grad_norm": 0.9721209189749846, "learning_rate": 2.547039094552853e-06, "loss": 0.0314, "step": 147795 }, { "epoch": 0.6167018551126169, "grad_norm": 0.38910857570748253, "learning_rate": 2.5469960113683195e-06, "loss": 0.0201, "step": 147800 }, { "epoch": 0.6167227178276072, "grad_norm": 1.0223217158873883, "learning_rate": 2.5469529303699697e-06, "loss": 0.0188, "step": 147805 }, { "epoch": 0.6167435805425975, "grad_norm": 0.5387308775708407, "learning_rate": 2.546909851557618e-06, "loss": 0.0213, "step": 147810 }, { "epoch": 0.6167644432575877, "grad_norm": 0.4608968135298593, "learning_rate": 2.54686677493108e-06, "loss": 0.0167, "step": 147815 }, { "epoch": 0.6167853059725781, "grad_norm": 0.3932405521833889, "learning_rate": 2.5468237004901702e-06, "loss": 0.0282, "step": 147820 }, { "epoch": 0.6168061686875683, "grad_norm": 0.46566445605535733, "learning_rate": 2.546780628234704e-06, "loss": 0.0244, "step": 147825 }, { "epoch": 0.6168270314025586, "grad_norm": 0.39131594088511445, "learning_rate": 2.546737558164497e-06, "loss": 0.0132, "step": 147830 }, { "epoch": 0.6168478941175489, "grad_norm": 0.304877805949642, "learning_rate": 2.5466944902793644e-06, "loss": 0.0139, "step": 147835 }, { "epoch": 0.6168687568325392, "grad_norm": 0.6774589772837949, "learning_rate": 2.5466514245791207e-06, "loss": 0.0172, "step": 147840 }, { "epoch": 0.6168896195475294, "grad_norm": 0.7612061379898402, "learning_rate": 2.546608361063582e-06, "loss": 0.0256, "step": 147845 }, { "epoch": 0.6169104822625198, "grad_norm": 0.936200998835581, "learning_rate": 2.5465652997325636e-06, "loss": 0.0234, "step": 147850 }, { "epoch": 0.61693134497751, "grad_norm": 0.6688271624337868, "learning_rate": 2.5465222405858803e-06, "loss": 0.0192, "step": 147855 }, { "epoch": 0.6169522076925003, "grad_norm": 0.831526916793401, "learning_rate": 2.5464791836233477e-06, "loss": 0.0261, "step": 147860 }, { "epoch": 0.6169730704074905, "grad_norm": 0.6807182952138275, "learning_rate": 2.5464361288447808e-06, "loss": 0.0213, "step": 147865 }, { "epoch": 0.6169939331224809, "grad_norm": 0.7532319439578862, "learning_rate": 2.5463930762499956e-06, "loss": 0.0225, "step": 147870 }, { "epoch": 0.6170147958374711, "grad_norm": 2.3429823439615607, "learning_rate": 2.5463500258388076e-06, "loss": 0.0292, "step": 147875 }, { "epoch": 0.6170356585524613, "grad_norm": 1.0117543157862006, "learning_rate": 2.5463069776110304e-06, "loss": 0.0277, "step": 147880 }, { "epoch": 0.6170565212674517, "grad_norm": 2.2682854101872025, "learning_rate": 2.5462639315664823e-06, "loss": 0.0285, "step": 147885 }, { "epoch": 0.6170773839824419, "grad_norm": 0.6430912784851591, "learning_rate": 2.546220887704976e-06, "loss": 0.0185, "step": 147890 }, { "epoch": 0.6170982466974322, "grad_norm": 0.5183785824213777, "learning_rate": 2.5461778460263294e-06, "loss": 0.0228, "step": 147895 }, { "epoch": 0.6171191094124225, "grad_norm": 0.4321393771081625, "learning_rate": 2.5461348065303565e-06, "loss": 0.0209, "step": 147900 }, { "epoch": 0.6171399721274128, "grad_norm": 0.8203733907417402, "learning_rate": 2.5460917692168736e-06, "loss": 0.0237, "step": 147905 }, { "epoch": 0.617160834842403, "grad_norm": 0.7387593611588792, "learning_rate": 2.5460487340856954e-06, "loss": 0.0225, "step": 147910 }, { "epoch": 0.6171816975573934, "grad_norm": 0.5702626151689293, "learning_rate": 2.546005701136638e-06, "loss": 0.0181, "step": 147915 }, { "epoch": 0.6172025602723836, "grad_norm": 0.6037916292581151, "learning_rate": 2.5459626703695174e-06, "loss": 0.027, "step": 147920 }, { "epoch": 0.6172234229873739, "grad_norm": 0.37351208653532814, "learning_rate": 2.545919641784148e-06, "loss": 0.016, "step": 147925 }, { "epoch": 0.6172442857023641, "grad_norm": 0.7793423552167688, "learning_rate": 2.545876615380347e-06, "loss": 0.0282, "step": 147930 }, { "epoch": 0.6172651484173545, "grad_norm": 0.6886328660758769, "learning_rate": 2.545833591157929e-06, "loss": 0.0162, "step": 147935 }, { "epoch": 0.6172860111323447, "grad_norm": 0.8631611585546122, "learning_rate": 2.54579056911671e-06, "loss": 0.0293, "step": 147940 }, { "epoch": 0.617306873847335, "grad_norm": 0.8440900528517937, "learning_rate": 2.5457475492565058e-06, "loss": 0.0292, "step": 147945 }, { "epoch": 0.6173277365623253, "grad_norm": 0.6118353413932857, "learning_rate": 2.545704531577132e-06, "loss": 0.024, "step": 147950 }, { "epoch": 0.6173485992773156, "grad_norm": 0.6847005175720557, "learning_rate": 2.545661516078404e-06, "loss": 0.0173, "step": 147955 }, { "epoch": 0.6173694619923058, "grad_norm": 0.7226084638608165, "learning_rate": 2.5456185027601378e-06, "loss": 0.0229, "step": 147960 }, { "epoch": 0.6173903247072962, "grad_norm": 0.6429846318518178, "learning_rate": 2.54557549162215e-06, "loss": 0.0273, "step": 147965 }, { "epoch": 0.6174111874222864, "grad_norm": 0.3083306510050061, "learning_rate": 2.5455324826642554e-06, "loss": 0.0187, "step": 147970 }, { "epoch": 0.6174320501372766, "grad_norm": 0.7408553237941441, "learning_rate": 2.5454894758862697e-06, "loss": 0.0215, "step": 147975 }, { "epoch": 0.6174529128522669, "grad_norm": 0.21297313390116732, "learning_rate": 2.54544647128801e-06, "loss": 0.0234, "step": 147980 }, { "epoch": 0.6174737755672572, "grad_norm": 0.6310136414883291, "learning_rate": 2.5454034688692914e-06, "loss": 0.0261, "step": 147985 }, { "epoch": 0.6174946382822475, "grad_norm": 0.6401736962625821, "learning_rate": 2.5453604686299295e-06, "loss": 0.0226, "step": 147990 }, { "epoch": 0.6175155009972377, "grad_norm": 0.5379971355149198, "learning_rate": 2.5453174705697403e-06, "loss": 0.0243, "step": 147995 }, { "epoch": 0.6175363637122281, "grad_norm": 0.5895050442422466, "learning_rate": 2.5452744746885403e-06, "loss": 0.0238, "step": 148000 }, { "epoch": 0.6175572264272183, "grad_norm": 0.9389766613763117, "learning_rate": 2.545231480986145e-06, "loss": 0.0255, "step": 148005 }, { "epoch": 0.6175780891422086, "grad_norm": 0.8399749454320584, "learning_rate": 2.5451884894623706e-06, "loss": 0.0309, "step": 148010 }, { "epoch": 0.6175989518571989, "grad_norm": 0.5607010170493643, "learning_rate": 2.545145500117033e-06, "loss": 0.0267, "step": 148015 }, { "epoch": 0.6176198145721892, "grad_norm": 0.6579624292065123, "learning_rate": 2.5451025129499485e-06, "loss": 0.0203, "step": 148020 }, { "epoch": 0.6176406772871794, "grad_norm": 0.5503862657894398, "learning_rate": 2.5450595279609324e-06, "loss": 0.0231, "step": 148025 }, { "epoch": 0.6176615400021698, "grad_norm": 0.5909021655602772, "learning_rate": 2.545016545149802e-06, "loss": 0.0251, "step": 148030 }, { "epoch": 0.61768240271716, "grad_norm": 0.398101790789811, "learning_rate": 2.5449735645163725e-06, "loss": 0.0274, "step": 148035 }, { "epoch": 0.6177032654321503, "grad_norm": 0.5379283131219923, "learning_rate": 2.54493058606046e-06, "loss": 0.0222, "step": 148040 }, { "epoch": 0.6177241281471405, "grad_norm": 2.35587284974582, "learning_rate": 2.5448876097818807e-06, "loss": 0.0302, "step": 148045 }, { "epoch": 0.6177449908621309, "grad_norm": 1.2146198407688844, "learning_rate": 2.5448446356804516e-06, "loss": 0.0227, "step": 148050 }, { "epoch": 0.6177658535771211, "grad_norm": 0.5746535303641314, "learning_rate": 2.544801663755988e-06, "loss": 0.0268, "step": 148055 }, { "epoch": 0.6177867162921113, "grad_norm": 0.43266920660988684, "learning_rate": 2.5447586940083056e-06, "loss": 0.0227, "step": 148060 }, { "epoch": 0.6178075790071017, "grad_norm": 0.7263801598726104, "learning_rate": 2.5447157264372224e-06, "loss": 0.0297, "step": 148065 }, { "epoch": 0.617828441722092, "grad_norm": 0.6289481866511161, "learning_rate": 2.544672761042553e-06, "loss": 0.0192, "step": 148070 }, { "epoch": 0.6178493044370822, "grad_norm": 0.5170961402872225, "learning_rate": 2.544629797824114e-06, "loss": 0.0179, "step": 148075 }, { "epoch": 0.6178701671520725, "grad_norm": 1.0052402611664513, "learning_rate": 2.5445868367817227e-06, "loss": 0.0185, "step": 148080 }, { "epoch": 0.6178910298670628, "grad_norm": 0.845209933421428, "learning_rate": 2.5445438779151943e-06, "loss": 0.0223, "step": 148085 }, { "epoch": 0.617911892582053, "grad_norm": 0.9198951159741378, "learning_rate": 2.5445009212243454e-06, "loss": 0.0251, "step": 148090 }, { "epoch": 0.6179327552970434, "grad_norm": 1.5269500132808065, "learning_rate": 2.5444579667089926e-06, "loss": 0.024, "step": 148095 }, { "epoch": 0.6179536180120336, "grad_norm": 1.1236796526884207, "learning_rate": 2.544415014368952e-06, "loss": 0.0245, "step": 148100 }, { "epoch": 0.6179744807270239, "grad_norm": 0.7916914739244472, "learning_rate": 2.5443720642040405e-06, "loss": 0.0287, "step": 148105 }, { "epoch": 0.6179953434420141, "grad_norm": 1.1765649019068316, "learning_rate": 2.5443291162140737e-06, "loss": 0.031, "step": 148110 }, { "epoch": 0.6180162061570045, "grad_norm": 0.4983836592268465, "learning_rate": 2.544286170398869e-06, "loss": 0.0251, "step": 148115 }, { "epoch": 0.6180370688719947, "grad_norm": 0.8928050526705521, "learning_rate": 2.5442432267582423e-06, "loss": 0.0248, "step": 148120 }, { "epoch": 0.618057931586985, "grad_norm": 0.5474713429966439, "learning_rate": 2.54420028529201e-06, "loss": 0.0215, "step": 148125 }, { "epoch": 0.6180787943019753, "grad_norm": 0.43564287946539904, "learning_rate": 2.5441573459999883e-06, "loss": 0.0183, "step": 148130 }, { "epoch": 0.6180996570169656, "grad_norm": 0.3004226085077923, "learning_rate": 2.544114408881995e-06, "loss": 0.018, "step": 148135 }, { "epoch": 0.6181205197319558, "grad_norm": 0.5435643453424285, "learning_rate": 2.544071473937846e-06, "loss": 0.0197, "step": 148140 }, { "epoch": 0.6181413824469462, "grad_norm": 0.6228669276466142, "learning_rate": 2.544028541167358e-06, "loss": 0.0267, "step": 148145 }, { "epoch": 0.6181622451619364, "grad_norm": 0.4032023618931803, "learning_rate": 2.5439856105703465e-06, "loss": 0.0211, "step": 148150 }, { "epoch": 0.6181831078769267, "grad_norm": 0.7371396370520136, "learning_rate": 2.543942682146629e-06, "loss": 0.028, "step": 148155 }, { "epoch": 0.6182039705919169, "grad_norm": 0.8469906591646721, "learning_rate": 2.543899755896022e-06, "loss": 0.0309, "step": 148160 }, { "epoch": 0.6182248333069073, "grad_norm": 0.9735705808187749, "learning_rate": 2.5438568318183428e-06, "loss": 0.0251, "step": 148165 }, { "epoch": 0.6182456960218975, "grad_norm": 0.6967122612919231, "learning_rate": 2.5438139099134074e-06, "loss": 0.0205, "step": 148170 }, { "epoch": 0.6182665587368877, "grad_norm": 0.9243108043436845, "learning_rate": 2.5437709901810327e-06, "loss": 0.021, "step": 148175 }, { "epoch": 0.6182874214518781, "grad_norm": 1.1668749922919746, "learning_rate": 2.543728072621035e-06, "loss": 0.0224, "step": 148180 }, { "epoch": 0.6183082841668683, "grad_norm": 0.5169246442600155, "learning_rate": 2.5436851572332324e-06, "loss": 0.0165, "step": 148185 }, { "epoch": 0.6183291468818586, "grad_norm": 1.2455206715120213, "learning_rate": 2.5436422440174396e-06, "loss": 0.026, "step": 148190 }, { "epoch": 0.6183500095968489, "grad_norm": 0.8622328947295393, "learning_rate": 2.543599332973475e-06, "loss": 0.0228, "step": 148195 }, { "epoch": 0.6183708723118392, "grad_norm": 0.4764378691041359, "learning_rate": 2.5435564241011544e-06, "loss": 0.0184, "step": 148200 }, { "epoch": 0.6183917350268294, "grad_norm": 0.7279100620695078, "learning_rate": 2.5435135174002956e-06, "loss": 0.049, "step": 148205 }, { "epoch": 0.6184125977418198, "grad_norm": 0.41416673982167884, "learning_rate": 2.5434706128707156e-06, "loss": 0.0179, "step": 148210 }, { "epoch": 0.61843346045681, "grad_norm": 0.8858379519067483, "learning_rate": 2.5434277105122296e-06, "loss": 0.0203, "step": 148215 }, { "epoch": 0.6184543231718003, "grad_norm": 0.8881985236848975, "learning_rate": 2.543384810324656e-06, "loss": 0.0269, "step": 148220 }, { "epoch": 0.6184751858867905, "grad_norm": 1.6911340027780128, "learning_rate": 2.543341912307811e-06, "loss": 0.0228, "step": 148225 }, { "epoch": 0.6184960486017809, "grad_norm": 0.7015651945396534, "learning_rate": 2.5432990164615128e-06, "loss": 0.0189, "step": 148230 }, { "epoch": 0.6185169113167711, "grad_norm": 1.018140393507626, "learning_rate": 2.5432561227855766e-06, "loss": 0.026, "step": 148235 }, { "epoch": 0.6185377740317614, "grad_norm": 1.0705189511966848, "learning_rate": 2.54321323127982e-06, "loss": 0.0232, "step": 148240 }, { "epoch": 0.6185586367467517, "grad_norm": 0.6563608757357237, "learning_rate": 2.5431703419440614e-06, "loss": 0.0262, "step": 148245 }, { "epoch": 0.618579499461742, "grad_norm": 0.4379073855511812, "learning_rate": 2.5431274547781155e-06, "loss": 0.0226, "step": 148250 }, { "epoch": 0.6186003621767322, "grad_norm": 0.43969884479395716, "learning_rate": 2.543084569781801e-06, "loss": 0.0203, "step": 148255 }, { "epoch": 0.6186212248917226, "grad_norm": 0.6250796645544926, "learning_rate": 2.5430416869549342e-06, "loss": 0.0215, "step": 148260 }, { "epoch": 0.6186420876067128, "grad_norm": 2.5745863766320483, "learning_rate": 2.542998806297332e-06, "loss": 0.0285, "step": 148265 }, { "epoch": 0.618662950321703, "grad_norm": 0.8824406456569233, "learning_rate": 2.5429559278088126e-06, "loss": 0.0177, "step": 148270 }, { "epoch": 0.6186838130366934, "grad_norm": 0.5697624156718092, "learning_rate": 2.542913051489192e-06, "loss": 0.0179, "step": 148275 }, { "epoch": 0.6187046757516836, "grad_norm": 0.47507405240118006, "learning_rate": 2.5428701773382884e-06, "loss": 0.0176, "step": 148280 }, { "epoch": 0.6187255384666739, "grad_norm": 0.5584648836908138, "learning_rate": 2.5428273053559185e-06, "loss": 0.0185, "step": 148285 }, { "epoch": 0.6187464011816641, "grad_norm": 0.6497529825799481, "learning_rate": 2.542784435541899e-06, "loss": 0.0239, "step": 148290 }, { "epoch": 0.6187672638966545, "grad_norm": 0.8755282273938564, "learning_rate": 2.5427415678960477e-06, "loss": 0.0201, "step": 148295 }, { "epoch": 0.6187881266116447, "grad_norm": 0.7359201033812609, "learning_rate": 2.5426987024181823e-06, "loss": 0.0221, "step": 148300 }, { "epoch": 0.618808989326635, "grad_norm": 0.48684834986127906, "learning_rate": 2.5426558391081185e-06, "loss": 0.029, "step": 148305 }, { "epoch": 0.6188298520416253, "grad_norm": 0.6494252916451028, "learning_rate": 2.542612977965675e-06, "loss": 0.0198, "step": 148310 }, { "epoch": 0.6188507147566156, "grad_norm": 0.6017548840031253, "learning_rate": 2.542570118990669e-06, "loss": 0.0315, "step": 148315 }, { "epoch": 0.6188715774716058, "grad_norm": 0.9984855518930303, "learning_rate": 2.5425272621829174e-06, "loss": 0.0293, "step": 148320 }, { "epoch": 0.6188924401865962, "grad_norm": 0.6158264214635515, "learning_rate": 2.542484407542237e-06, "loss": 0.022, "step": 148325 }, { "epoch": 0.6189133029015864, "grad_norm": 0.5282082468127408, "learning_rate": 2.5424415550684467e-06, "loss": 0.0273, "step": 148330 }, { "epoch": 0.6189341656165767, "grad_norm": 0.5200933415086342, "learning_rate": 2.5423987047613623e-06, "loss": 0.0181, "step": 148335 }, { "epoch": 0.6189550283315669, "grad_norm": 0.5665800849799868, "learning_rate": 2.5423558566208023e-06, "loss": 0.0174, "step": 148340 }, { "epoch": 0.6189758910465573, "grad_norm": 0.7997938808689371, "learning_rate": 2.5423130106465836e-06, "loss": 0.0208, "step": 148345 }, { "epoch": 0.6189967537615475, "grad_norm": 0.914877344447842, "learning_rate": 2.5422701668385243e-06, "loss": 0.0247, "step": 148350 }, { "epoch": 0.6190176164765377, "grad_norm": 0.6652669830214315, "learning_rate": 2.5422273251964412e-06, "loss": 0.0206, "step": 148355 }, { "epoch": 0.6190384791915281, "grad_norm": 0.6505137250559311, "learning_rate": 2.5421844857201516e-06, "loss": 0.0217, "step": 148360 }, { "epoch": 0.6190593419065183, "grad_norm": 0.6525297617875183, "learning_rate": 2.5421416484094744e-06, "loss": 0.0198, "step": 148365 }, { "epoch": 0.6190802046215086, "grad_norm": 0.7784181111414796, "learning_rate": 2.5420988132642256e-06, "loss": 0.0213, "step": 148370 }, { "epoch": 0.619101067336499, "grad_norm": 0.5304114751998646, "learning_rate": 2.5420559802842234e-06, "loss": 0.0197, "step": 148375 }, { "epoch": 0.6191219300514892, "grad_norm": 0.6941230046467739, "learning_rate": 2.5420131494692856e-06, "loss": 0.0228, "step": 148380 }, { "epoch": 0.6191427927664794, "grad_norm": 0.8422558014267719, "learning_rate": 2.5419703208192294e-06, "loss": 0.0223, "step": 148385 }, { "epoch": 0.6191636554814698, "grad_norm": 0.5646225254512915, "learning_rate": 2.541927494333872e-06, "loss": 0.0244, "step": 148390 }, { "epoch": 0.61918451819646, "grad_norm": 0.5797741969182787, "learning_rate": 2.5418846700130327e-06, "loss": 0.0231, "step": 148395 }, { "epoch": 0.6192053809114503, "grad_norm": 1.1472347433012, "learning_rate": 2.5418418478565276e-06, "loss": 0.0178, "step": 148400 }, { "epoch": 0.6192262436264405, "grad_norm": 0.7135911125414836, "learning_rate": 2.541799027864175e-06, "loss": 0.025, "step": 148405 }, { "epoch": 0.6192471063414309, "grad_norm": 0.5223076316772794, "learning_rate": 2.5417562100357928e-06, "loss": 0.017, "step": 148410 }, { "epoch": 0.6192679690564211, "grad_norm": 1.0174223569955219, "learning_rate": 2.5417133943711985e-06, "loss": 0.03, "step": 148415 }, { "epoch": 0.6192888317714114, "grad_norm": 0.7511796391823661, "learning_rate": 2.5416705808702096e-06, "loss": 0.0216, "step": 148420 }, { "epoch": 0.6193096944864017, "grad_norm": 0.6825208169745192, "learning_rate": 2.5416277695326446e-06, "loss": 0.0315, "step": 148425 }, { "epoch": 0.619330557201392, "grad_norm": 1.1190187039950996, "learning_rate": 2.5415849603583204e-06, "loss": 0.024, "step": 148430 }, { "epoch": 0.6193514199163822, "grad_norm": 0.7062119493659024, "learning_rate": 2.5415421533470553e-06, "loss": 0.0221, "step": 148435 }, { "epoch": 0.6193722826313726, "grad_norm": 0.7558593625171521, "learning_rate": 2.5414993484986673e-06, "loss": 0.0216, "step": 148440 }, { "epoch": 0.6193931453463628, "grad_norm": 0.5579339394031947, "learning_rate": 2.5414565458129737e-06, "loss": 0.0249, "step": 148445 }, { "epoch": 0.619414008061353, "grad_norm": 0.747224142784843, "learning_rate": 2.5414137452897934e-06, "loss": 0.0178, "step": 148450 }, { "epoch": 0.6194348707763434, "grad_norm": 1.122521765963765, "learning_rate": 2.5413709469289434e-06, "loss": 0.0216, "step": 148455 }, { "epoch": 0.6194557334913336, "grad_norm": 0.435973598696776, "learning_rate": 2.541328150730242e-06, "loss": 0.0211, "step": 148460 }, { "epoch": 0.6194765962063239, "grad_norm": 0.8330042446236255, "learning_rate": 2.541285356693507e-06, "loss": 0.0235, "step": 148465 }, { "epoch": 0.6194974589213141, "grad_norm": 0.5216708079043997, "learning_rate": 2.5412425648185563e-06, "loss": 0.017, "step": 148470 }, { "epoch": 0.6195183216363045, "grad_norm": 0.5407641418363376, "learning_rate": 2.5411997751052087e-06, "loss": 0.0234, "step": 148475 }, { "epoch": 0.6195391843512947, "grad_norm": 0.6272323981318607, "learning_rate": 2.541156987553281e-06, "loss": 0.0195, "step": 148480 }, { "epoch": 0.619560047066285, "grad_norm": 0.6119661085788158, "learning_rate": 2.5411142021625915e-06, "loss": 0.0184, "step": 148485 }, { "epoch": 0.6195809097812753, "grad_norm": 0.7766720758500802, "learning_rate": 2.5410714189329593e-06, "loss": 0.0207, "step": 148490 }, { "epoch": 0.6196017724962656, "grad_norm": 0.7876545524754187, "learning_rate": 2.5410286378642014e-06, "loss": 0.0244, "step": 148495 }, { "epoch": 0.6196226352112558, "grad_norm": 0.9530267150872423, "learning_rate": 2.5409858589561364e-06, "loss": 0.0339, "step": 148500 }, { "epoch": 0.6196434979262462, "grad_norm": 1.0733774648561893, "learning_rate": 2.540943082208582e-06, "loss": 0.0313, "step": 148505 }, { "epoch": 0.6196643606412364, "grad_norm": 0.7159173670568458, "learning_rate": 2.540900307621357e-06, "loss": 0.0263, "step": 148510 }, { "epoch": 0.6196852233562267, "grad_norm": 1.1054668257159657, "learning_rate": 2.540857535194279e-06, "loss": 0.0261, "step": 148515 }, { "epoch": 0.6197060860712169, "grad_norm": 0.5007751432640323, "learning_rate": 2.540814764927167e-06, "loss": 0.0217, "step": 148520 }, { "epoch": 0.6197269487862073, "grad_norm": 0.5081977934979951, "learning_rate": 2.540771996819838e-06, "loss": 0.0212, "step": 148525 }, { "epoch": 0.6197478115011975, "grad_norm": 0.46426920024608065, "learning_rate": 2.5407292308721115e-06, "loss": 0.0266, "step": 148530 }, { "epoch": 0.6197686742161878, "grad_norm": 0.36610790366595236, "learning_rate": 2.5406864670838044e-06, "loss": 0.0219, "step": 148535 }, { "epoch": 0.6197895369311781, "grad_norm": 0.5851851546923862, "learning_rate": 2.5406437054547363e-06, "loss": 0.017, "step": 148540 }, { "epoch": 0.6198103996461684, "grad_norm": 1.1411003945227824, "learning_rate": 2.5406009459847243e-06, "loss": 0.0306, "step": 148545 }, { "epoch": 0.6198312623611586, "grad_norm": 0.9302972628901611, "learning_rate": 2.5405581886735875e-06, "loss": 0.0197, "step": 148550 }, { "epoch": 0.619852125076149, "grad_norm": 0.49837283814950323, "learning_rate": 2.540515433521144e-06, "loss": 0.0173, "step": 148555 }, { "epoch": 0.6198729877911392, "grad_norm": 0.6033917299265861, "learning_rate": 2.5404726805272124e-06, "loss": 0.0197, "step": 148560 }, { "epoch": 0.6198938505061294, "grad_norm": 0.5777848858967447, "learning_rate": 2.540429929691611e-06, "loss": 0.0205, "step": 148565 }, { "epoch": 0.6199147132211198, "grad_norm": 0.6772552808950297, "learning_rate": 2.540387181014158e-06, "loss": 0.021, "step": 148570 }, { "epoch": 0.61993557593611, "grad_norm": 0.35337523484947925, "learning_rate": 2.5403444344946716e-06, "loss": 0.0208, "step": 148575 }, { "epoch": 0.6199564386511003, "grad_norm": 0.8076443602789636, "learning_rate": 2.5403016901329707e-06, "loss": 0.0239, "step": 148580 }, { "epoch": 0.6199773013660905, "grad_norm": 0.9122145208355972, "learning_rate": 2.5402589479288736e-06, "loss": 0.0243, "step": 148585 }, { "epoch": 0.6199981640810809, "grad_norm": 0.6101471519580767, "learning_rate": 2.5402162078821995e-06, "loss": 0.025, "step": 148590 }, { "epoch": 0.6200190267960711, "grad_norm": 0.7404539903866078, "learning_rate": 2.5401734699927654e-06, "loss": 0.0194, "step": 148595 }, { "epoch": 0.6200398895110614, "grad_norm": 1.5058538134811492, "learning_rate": 2.5401307342603914e-06, "loss": 0.0266, "step": 148600 }, { "epoch": 0.6200607522260517, "grad_norm": 0.3952971274770335, "learning_rate": 2.540088000684895e-06, "loss": 0.0207, "step": 148605 }, { "epoch": 0.620081614941042, "grad_norm": 1.5013687664299704, "learning_rate": 2.540045269266095e-06, "loss": 0.03, "step": 148610 }, { "epoch": 0.6201024776560322, "grad_norm": 0.41838266881543085, "learning_rate": 2.54000254000381e-06, "loss": 0.0223, "step": 148615 }, { "epoch": 0.6201233403710226, "grad_norm": 0.29732683196478915, "learning_rate": 2.5399598128978593e-06, "loss": 0.0193, "step": 148620 }, { "epoch": 0.6201442030860128, "grad_norm": 0.564091675274521, "learning_rate": 2.5399170879480605e-06, "loss": 0.0241, "step": 148625 }, { "epoch": 0.6201650658010031, "grad_norm": 0.529244390944883, "learning_rate": 2.539874365154233e-06, "loss": 0.0236, "step": 148630 }, { "epoch": 0.6201859285159934, "grad_norm": 0.46725614328568493, "learning_rate": 2.539831644516195e-06, "loss": 0.0227, "step": 148635 }, { "epoch": 0.6202067912309837, "grad_norm": 0.9876140997047608, "learning_rate": 2.5397889260337654e-06, "loss": 0.023, "step": 148640 }, { "epoch": 0.6202276539459739, "grad_norm": 0.38104834993463976, "learning_rate": 2.5397462097067633e-06, "loss": 0.0192, "step": 148645 }, { "epoch": 0.6202485166609641, "grad_norm": 0.7582842496816863, "learning_rate": 2.5397034955350064e-06, "loss": 0.034, "step": 148650 }, { "epoch": 0.6202693793759545, "grad_norm": 0.9090612036438263, "learning_rate": 2.5396607835183144e-06, "loss": 0.0207, "step": 148655 }, { "epoch": 0.6202902420909447, "grad_norm": 0.5415920574937735, "learning_rate": 2.5396180736565065e-06, "loss": 0.0224, "step": 148660 }, { "epoch": 0.620311104805935, "grad_norm": 0.5757806625512675, "learning_rate": 2.5395753659494004e-06, "loss": 0.0188, "step": 148665 }, { "epoch": 0.6203319675209253, "grad_norm": 0.6957812166645824, "learning_rate": 2.539532660396815e-06, "loss": 0.0228, "step": 148670 }, { "epoch": 0.6203528302359156, "grad_norm": 0.5736289731695435, "learning_rate": 2.5394899569985697e-06, "loss": 0.0244, "step": 148675 }, { "epoch": 0.6203736929509058, "grad_norm": 0.6009516397044797, "learning_rate": 2.5394472557544838e-06, "loss": 0.0379, "step": 148680 }, { "epoch": 0.6203945556658962, "grad_norm": 0.7051300557876213, "learning_rate": 2.5394045566643747e-06, "loss": 0.0233, "step": 148685 }, { "epoch": 0.6204154183808864, "grad_norm": 0.595470612646288, "learning_rate": 2.5393618597280634e-06, "loss": 0.0152, "step": 148690 }, { "epoch": 0.6204362810958767, "grad_norm": 0.5893996951774058, "learning_rate": 2.5393191649453667e-06, "loss": 0.0294, "step": 148695 }, { "epoch": 0.6204571438108669, "grad_norm": 0.7308532834144373, "learning_rate": 2.5392764723161053e-06, "loss": 0.019, "step": 148700 }, { "epoch": 0.6204780065258573, "grad_norm": 0.39828220025513605, "learning_rate": 2.539233781840097e-06, "loss": 0.0182, "step": 148705 }, { "epoch": 0.6204988692408475, "grad_norm": 1.1400129358711073, "learning_rate": 2.5391910935171614e-06, "loss": 0.0243, "step": 148710 }, { "epoch": 0.6205197319558378, "grad_norm": 0.244709557760912, "learning_rate": 2.5391484073471174e-06, "loss": 0.0175, "step": 148715 }, { "epoch": 0.6205405946708281, "grad_norm": 1.561746863579508, "learning_rate": 2.5391057233297835e-06, "loss": 0.0215, "step": 148720 }, { "epoch": 0.6205614573858184, "grad_norm": 0.5832879317815851, "learning_rate": 2.539063041464979e-06, "loss": 0.0231, "step": 148725 }, { "epoch": 0.6205823201008086, "grad_norm": 0.42858970218915765, "learning_rate": 2.5390203617525238e-06, "loss": 0.0199, "step": 148730 }, { "epoch": 0.620603182815799, "grad_norm": 0.501805470364355, "learning_rate": 2.538977684192237e-06, "loss": 0.0255, "step": 148735 }, { "epoch": 0.6206240455307892, "grad_norm": 0.5200135670405313, "learning_rate": 2.5389350087839365e-06, "loss": 0.0304, "step": 148740 }, { "epoch": 0.6206449082457794, "grad_norm": 0.8878534026000784, "learning_rate": 2.538892335527442e-06, "loss": 0.0213, "step": 148745 }, { "epoch": 0.6206657709607698, "grad_norm": 0.5508113704342958, "learning_rate": 2.5388496644225736e-06, "loss": 0.0222, "step": 148750 }, { "epoch": 0.62068663367576, "grad_norm": 0.7126762910226745, "learning_rate": 2.538806995469149e-06, "loss": 0.017, "step": 148755 }, { "epoch": 0.6207074963907503, "grad_norm": 0.6152620217183422, "learning_rate": 2.5387643286669884e-06, "loss": 0.0209, "step": 148760 }, { "epoch": 0.6207283591057405, "grad_norm": 0.998115237200288, "learning_rate": 2.53872166401591e-06, "loss": 0.0315, "step": 148765 }, { "epoch": 0.6207492218207309, "grad_norm": 0.7101041910626759, "learning_rate": 2.538679001515735e-06, "loss": 0.0237, "step": 148770 }, { "epoch": 0.6207700845357211, "grad_norm": 0.6234397358290412, "learning_rate": 2.5386363411662808e-06, "loss": 0.0238, "step": 148775 }, { "epoch": 0.6207909472507114, "grad_norm": 0.6095534929685754, "learning_rate": 2.5385936829673674e-06, "loss": 0.0223, "step": 148780 }, { "epoch": 0.6208118099657017, "grad_norm": 0.44020921023318954, "learning_rate": 2.5385510269188145e-06, "loss": 0.0202, "step": 148785 }, { "epoch": 0.620832672680692, "grad_norm": 0.5324110428461674, "learning_rate": 2.5385083730204407e-06, "loss": 0.0332, "step": 148790 }, { "epoch": 0.6208535353956822, "grad_norm": 0.8051326519484548, "learning_rate": 2.538465721272066e-06, "loss": 0.0214, "step": 148795 }, { "epoch": 0.6208743981106726, "grad_norm": 0.613324906967473, "learning_rate": 2.5384230716735094e-06, "loss": 0.0319, "step": 148800 }, { "epoch": 0.6208952608256628, "grad_norm": 1.1191197079513142, "learning_rate": 2.53838042422459e-06, "loss": 0.028, "step": 148805 }, { "epoch": 0.6209161235406531, "grad_norm": 0.5790072696975697, "learning_rate": 2.5383377789251285e-06, "loss": 0.0174, "step": 148810 }, { "epoch": 0.6209369862556434, "grad_norm": 1.014064290947374, "learning_rate": 2.538295135774943e-06, "loss": 0.0306, "step": 148815 }, { "epoch": 0.6209578489706337, "grad_norm": 0.8331815284144628, "learning_rate": 2.5382524947738537e-06, "loss": 0.0244, "step": 148820 }, { "epoch": 0.6209787116856239, "grad_norm": 0.5503798164901494, "learning_rate": 2.5382098559216794e-06, "loss": 0.0186, "step": 148825 }, { "epoch": 0.6209995744006142, "grad_norm": 0.5565737169444906, "learning_rate": 2.5381672192182405e-06, "loss": 0.0211, "step": 148830 }, { "epoch": 0.6210204371156045, "grad_norm": 1.2110761120169413, "learning_rate": 2.5381245846633557e-06, "loss": 0.0256, "step": 148835 }, { "epoch": 0.6210412998305948, "grad_norm": 0.4508987986932347, "learning_rate": 2.5380819522568454e-06, "loss": 0.026, "step": 148840 }, { "epoch": 0.621062162545585, "grad_norm": 0.5217329790246394, "learning_rate": 2.538039321998529e-06, "loss": 0.0193, "step": 148845 }, { "epoch": 0.6210830252605753, "grad_norm": 0.5788904468241847, "learning_rate": 2.537996693888225e-06, "loss": 0.0254, "step": 148850 }, { "epoch": 0.6211038879755656, "grad_norm": 1.0931744254562865, "learning_rate": 2.537954067925754e-06, "loss": 0.0188, "step": 148855 }, { "epoch": 0.6211247506905558, "grad_norm": 0.6536870588760346, "learning_rate": 2.537911444110936e-06, "loss": 0.0424, "step": 148860 }, { "epoch": 0.6211456134055462, "grad_norm": 0.353171099755622, "learning_rate": 2.53786882244359e-06, "loss": 0.0217, "step": 148865 }, { "epoch": 0.6211664761205364, "grad_norm": 0.9523915762101605, "learning_rate": 2.5378262029235357e-06, "loss": 0.0203, "step": 148870 }, { "epoch": 0.6211873388355267, "grad_norm": 0.697523077048366, "learning_rate": 2.5377835855505927e-06, "loss": 0.0245, "step": 148875 }, { "epoch": 0.6212082015505169, "grad_norm": 0.4387836979023474, "learning_rate": 2.5377409703245814e-06, "loss": 0.0235, "step": 148880 }, { "epoch": 0.6212290642655073, "grad_norm": 1.3314541566331586, "learning_rate": 2.5376983572453212e-06, "loss": 0.0196, "step": 148885 }, { "epoch": 0.6212499269804975, "grad_norm": 1.0054864556054435, "learning_rate": 2.537655746312632e-06, "loss": 0.0247, "step": 148890 }, { "epoch": 0.6212707896954878, "grad_norm": 0.3728304781872495, "learning_rate": 2.5376131375263325e-06, "loss": 0.0295, "step": 148895 }, { "epoch": 0.6212916524104781, "grad_norm": 0.2814613313329609, "learning_rate": 2.5375705308862435e-06, "loss": 0.0201, "step": 148900 }, { "epoch": 0.6213125151254684, "grad_norm": 1.002422787947838, "learning_rate": 2.5375279263921853e-06, "loss": 0.0295, "step": 148905 }, { "epoch": 0.6213333778404586, "grad_norm": 0.5088276743077499, "learning_rate": 2.5374853240439767e-06, "loss": 0.0264, "step": 148910 }, { "epoch": 0.621354240555449, "grad_norm": 0.9328061447563452, "learning_rate": 2.5374427238414384e-06, "loss": 0.0282, "step": 148915 }, { "epoch": 0.6213751032704392, "grad_norm": 0.7977932476530314, "learning_rate": 2.53740012578439e-06, "loss": 0.027, "step": 148920 }, { "epoch": 0.6213959659854295, "grad_norm": 0.4657532310305937, "learning_rate": 2.537357529872651e-06, "loss": 0.0192, "step": 148925 }, { "epoch": 0.6214168287004198, "grad_norm": 0.5318698139481354, "learning_rate": 2.5373149361060415e-06, "loss": 0.0276, "step": 148930 }, { "epoch": 0.62143769141541, "grad_norm": 0.5190909039288593, "learning_rate": 2.537272344484382e-06, "loss": 0.0281, "step": 148935 }, { "epoch": 0.6214585541304003, "grad_norm": 1.1605790453829818, "learning_rate": 2.5372297550074925e-06, "loss": 0.0256, "step": 148940 }, { "epoch": 0.6214794168453905, "grad_norm": 1.0314665684957496, "learning_rate": 2.5371871676751917e-06, "loss": 0.0293, "step": 148945 }, { "epoch": 0.6215002795603809, "grad_norm": 0.7677172984764637, "learning_rate": 2.5371445824873014e-06, "loss": 0.0218, "step": 148950 }, { "epoch": 0.6215211422753711, "grad_norm": 0.5335689721902696, "learning_rate": 2.5371019994436407e-06, "loss": 0.0244, "step": 148955 }, { "epoch": 0.6215420049903614, "grad_norm": 0.8338184482658767, "learning_rate": 2.5370594185440296e-06, "loss": 0.0225, "step": 148960 }, { "epoch": 0.6215628677053517, "grad_norm": 0.9706955476916674, "learning_rate": 2.5370168397882885e-06, "loss": 0.0228, "step": 148965 }, { "epoch": 0.621583730420342, "grad_norm": 0.6443273267115643, "learning_rate": 2.5369742631762366e-06, "loss": 0.0273, "step": 148970 }, { "epoch": 0.6216045931353322, "grad_norm": 0.5028840253907283, "learning_rate": 2.5369316887076956e-06, "loss": 0.0238, "step": 148975 }, { "epoch": 0.6216254558503226, "grad_norm": 0.7599576410920749, "learning_rate": 2.536889116382484e-06, "loss": 0.0186, "step": 148980 }, { "epoch": 0.6216463185653128, "grad_norm": 0.7255440070486242, "learning_rate": 2.5368465462004237e-06, "loss": 0.0247, "step": 148985 }, { "epoch": 0.6216671812803031, "grad_norm": 0.8372588386377041, "learning_rate": 2.5368039781613333e-06, "loss": 0.022, "step": 148990 }, { "epoch": 0.6216880439952934, "grad_norm": 0.8356384585268969, "learning_rate": 2.5367614122650343e-06, "loss": 0.0254, "step": 148995 }, { "epoch": 0.6217089067102837, "grad_norm": 0.6952956801820692, "learning_rate": 2.5367188485113457e-06, "loss": 0.0261, "step": 149000 }, { "epoch": 0.6217297694252739, "grad_norm": 0.6187245489756662, "learning_rate": 2.536676286900089e-06, "loss": 0.0232, "step": 149005 }, { "epoch": 0.6217506321402642, "grad_norm": 0.5977361160725166, "learning_rate": 2.536633727431083e-06, "loss": 0.0256, "step": 149010 }, { "epoch": 0.6217714948552545, "grad_norm": 0.5665181451749844, "learning_rate": 2.5365911701041494e-06, "loss": 0.0238, "step": 149015 }, { "epoch": 0.6217923575702448, "grad_norm": 0.5774477019527179, "learning_rate": 2.5365486149191084e-06, "loss": 0.0318, "step": 149020 }, { "epoch": 0.621813220285235, "grad_norm": 0.38661934055947705, "learning_rate": 2.5365060618757796e-06, "loss": 0.0243, "step": 149025 }, { "epoch": 0.6218340830002254, "grad_norm": 0.5012903582308997, "learning_rate": 2.5364635109739835e-06, "loss": 0.0266, "step": 149030 }, { "epoch": 0.6218549457152156, "grad_norm": 0.7103565098730112, "learning_rate": 2.5364209622135404e-06, "loss": 0.0207, "step": 149035 }, { "epoch": 0.6218758084302058, "grad_norm": 0.4205109158390367, "learning_rate": 2.5363784155942717e-06, "loss": 0.0247, "step": 149040 }, { "epoch": 0.6218966711451962, "grad_norm": 0.7764232696419593, "learning_rate": 2.5363358711159965e-06, "loss": 0.0245, "step": 149045 }, { "epoch": 0.6219175338601864, "grad_norm": 0.8742785098906424, "learning_rate": 2.5362933287785365e-06, "loss": 0.025, "step": 149050 }, { "epoch": 0.6219383965751767, "grad_norm": 0.6169927376858669, "learning_rate": 2.53625078858171e-06, "loss": 0.0261, "step": 149055 }, { "epoch": 0.6219592592901669, "grad_norm": 0.807935008314541, "learning_rate": 2.53620825052534e-06, "loss": 0.0224, "step": 149060 }, { "epoch": 0.6219801220051573, "grad_norm": 0.6041110516467135, "learning_rate": 2.536165714609246e-06, "loss": 0.02, "step": 149065 }, { "epoch": 0.6220009847201475, "grad_norm": 0.6547929225544706, "learning_rate": 2.5361231808332487e-06, "loss": 0.0224, "step": 149070 }, { "epoch": 0.6220218474351378, "grad_norm": 0.7137981710270204, "learning_rate": 2.536080649197168e-06, "loss": 0.0227, "step": 149075 }, { "epoch": 0.6220427101501281, "grad_norm": 0.49582792360629513, "learning_rate": 2.5360381197008247e-06, "loss": 0.0272, "step": 149080 }, { "epoch": 0.6220635728651184, "grad_norm": 0.4429702829827755, "learning_rate": 2.5359955923440403e-06, "loss": 0.0194, "step": 149085 }, { "epoch": 0.6220844355801086, "grad_norm": 0.6788111710269411, "learning_rate": 2.535953067126634e-06, "loss": 0.015, "step": 149090 }, { "epoch": 0.622105298295099, "grad_norm": 0.7782026199176448, "learning_rate": 2.535910544048428e-06, "loss": 0.0295, "step": 149095 }, { "epoch": 0.6221261610100892, "grad_norm": 0.6008741757763123, "learning_rate": 2.5358680231092413e-06, "loss": 0.022, "step": 149100 }, { "epoch": 0.6221470237250795, "grad_norm": 0.7608253500343476, "learning_rate": 2.5358255043088957e-06, "loss": 0.0241, "step": 149105 }, { "epoch": 0.6221678864400698, "grad_norm": 0.5700236256928499, "learning_rate": 2.5357829876472117e-06, "loss": 0.0166, "step": 149110 }, { "epoch": 0.6221887491550601, "grad_norm": 0.8474126365036155, "learning_rate": 2.5357404731240106e-06, "loss": 0.0213, "step": 149115 }, { "epoch": 0.6222096118700503, "grad_norm": 0.8194276973443985, "learning_rate": 2.535697960739111e-06, "loss": 0.0193, "step": 149120 }, { "epoch": 0.6222304745850405, "grad_norm": 0.32666984706422, "learning_rate": 2.535655450492336e-06, "loss": 0.023, "step": 149125 }, { "epoch": 0.6222513373000309, "grad_norm": 0.5521627615372441, "learning_rate": 2.5356129423835052e-06, "loss": 0.0291, "step": 149130 }, { "epoch": 0.6222722000150211, "grad_norm": 1.5856129052357182, "learning_rate": 2.53557043641244e-06, "loss": 0.0302, "step": 149135 }, { "epoch": 0.6222930627300114, "grad_norm": 0.4398085318907065, "learning_rate": 2.5355279325789604e-06, "loss": 0.0185, "step": 149140 }, { "epoch": 0.6223139254450017, "grad_norm": 0.6962600141130132, "learning_rate": 2.5354854308828887e-06, "loss": 0.0228, "step": 149145 }, { "epoch": 0.622334788159992, "grad_norm": 0.8220969561204702, "learning_rate": 2.5354429313240438e-06, "loss": 0.0204, "step": 149150 }, { "epoch": 0.6223556508749822, "grad_norm": 0.27746100301404725, "learning_rate": 2.535400433902248e-06, "loss": 0.0208, "step": 149155 }, { "epoch": 0.6223765135899726, "grad_norm": 0.5637370497542221, "learning_rate": 2.5353579386173223e-06, "loss": 0.0205, "step": 149160 }, { "epoch": 0.6223973763049628, "grad_norm": 0.7116377138201835, "learning_rate": 2.535315445469086e-06, "loss": 0.0224, "step": 149165 }, { "epoch": 0.6224182390199531, "grad_norm": 0.43858461821093137, "learning_rate": 2.5352729544573623e-06, "loss": 0.0223, "step": 149170 }, { "epoch": 0.6224391017349434, "grad_norm": 0.39185668327053214, "learning_rate": 2.5352304655819703e-06, "loss": 0.0183, "step": 149175 }, { "epoch": 0.6224599644499337, "grad_norm": 0.9962220985994615, "learning_rate": 2.535187978842732e-06, "loss": 0.021, "step": 149180 }, { "epoch": 0.6224808271649239, "grad_norm": 1.2917383144119188, "learning_rate": 2.5351454942394682e-06, "loss": 0.0286, "step": 149185 }, { "epoch": 0.6225016898799142, "grad_norm": 0.5270994603661624, "learning_rate": 2.535103011772e-06, "loss": 0.028, "step": 149190 }, { "epoch": 0.6225225525949045, "grad_norm": 0.6902007808771721, "learning_rate": 2.535060531440148e-06, "loss": 0.0188, "step": 149195 }, { "epoch": 0.6225434153098948, "grad_norm": 0.3985667262183086, "learning_rate": 2.535018053243734e-06, "loss": 0.0254, "step": 149200 }, { "epoch": 0.622564278024885, "grad_norm": 0.5792254207819849, "learning_rate": 2.534975577182579e-06, "loss": 0.0232, "step": 149205 }, { "epoch": 0.6225851407398754, "grad_norm": 0.4927977820603691, "learning_rate": 2.534933103256503e-06, "loss": 0.0222, "step": 149210 }, { "epoch": 0.6226060034548656, "grad_norm": 0.5231082150234335, "learning_rate": 2.5348906314653286e-06, "loss": 0.0167, "step": 149215 }, { "epoch": 0.6226268661698559, "grad_norm": 1.2367011076970265, "learning_rate": 2.534848161808876e-06, "loss": 0.024, "step": 149220 }, { "epoch": 0.6226477288848462, "grad_norm": 0.7637513519151409, "learning_rate": 2.534805694286967e-06, "loss": 0.0195, "step": 149225 }, { "epoch": 0.6226685915998365, "grad_norm": 0.6479193041450779, "learning_rate": 2.5347632288994222e-06, "loss": 0.023, "step": 149230 }, { "epoch": 0.6226894543148267, "grad_norm": 1.2326380129085273, "learning_rate": 2.5347207656460637e-06, "loss": 0.0297, "step": 149235 }, { "epoch": 0.6227103170298169, "grad_norm": 0.6639019741073316, "learning_rate": 2.5346783045267115e-06, "loss": 0.0277, "step": 149240 }, { "epoch": 0.6227311797448073, "grad_norm": 0.5877154769813417, "learning_rate": 2.5346358455411877e-06, "loss": 0.0234, "step": 149245 }, { "epoch": 0.6227520424597975, "grad_norm": 0.7685312683742187, "learning_rate": 2.5345933886893137e-06, "loss": 0.0236, "step": 149250 }, { "epoch": 0.6227729051747878, "grad_norm": 0.6777491754426633, "learning_rate": 2.5345509339709107e-06, "loss": 0.0224, "step": 149255 }, { "epoch": 0.6227937678897781, "grad_norm": 0.4150713069056229, "learning_rate": 2.5345084813857996e-06, "loss": 0.0218, "step": 149260 }, { "epoch": 0.6228146306047684, "grad_norm": 0.6457693005463445, "learning_rate": 2.5344660309338016e-06, "loss": 0.0184, "step": 149265 }, { "epoch": 0.6228354933197586, "grad_norm": 2.5225977875399823, "learning_rate": 2.534423582614739e-06, "loss": 0.0254, "step": 149270 }, { "epoch": 0.622856356034749, "grad_norm": 0.34203753270306775, "learning_rate": 2.5343811364284327e-06, "loss": 0.0273, "step": 149275 }, { "epoch": 0.6228772187497392, "grad_norm": 1.2050906154966012, "learning_rate": 2.534338692374704e-06, "loss": 0.025, "step": 149280 }, { "epoch": 0.6228980814647295, "grad_norm": 1.0059387209425592, "learning_rate": 2.5342962504533737e-06, "loss": 0.0223, "step": 149285 }, { "epoch": 0.6229189441797198, "grad_norm": 0.6746378956581838, "learning_rate": 2.534253810664265e-06, "loss": 0.021, "step": 149290 }, { "epoch": 0.6229398068947101, "grad_norm": 0.5272473651444721, "learning_rate": 2.5342113730071975e-06, "loss": 0.0225, "step": 149295 }, { "epoch": 0.6229606696097003, "grad_norm": 0.8222054272858481, "learning_rate": 2.5341689374819945e-06, "loss": 0.0178, "step": 149300 }, { "epoch": 0.6229815323246906, "grad_norm": 0.8068694808297343, "learning_rate": 2.5341265040884756e-06, "loss": 0.0231, "step": 149305 }, { "epoch": 0.6230023950396809, "grad_norm": 0.6938845722989763, "learning_rate": 2.534084072826464e-06, "loss": 0.0259, "step": 149310 }, { "epoch": 0.6230232577546712, "grad_norm": 0.8530935499876108, "learning_rate": 2.5340416436957797e-06, "loss": 0.0272, "step": 149315 }, { "epoch": 0.6230441204696614, "grad_norm": 0.6267791448212929, "learning_rate": 2.5339992166962458e-06, "loss": 0.023, "step": 149320 }, { "epoch": 0.6230649831846518, "grad_norm": 0.8798645791437663, "learning_rate": 2.533956791827683e-06, "loss": 0.0241, "step": 149325 }, { "epoch": 0.623085845899642, "grad_norm": 0.5133070693735978, "learning_rate": 2.5339143690899133e-06, "loss": 0.0159, "step": 149330 }, { "epoch": 0.6231067086146322, "grad_norm": 0.42914639193083637, "learning_rate": 2.533871948482758e-06, "loss": 0.0201, "step": 149335 }, { "epoch": 0.6231275713296226, "grad_norm": 1.0920270796935025, "learning_rate": 2.533829530006039e-06, "loss": 0.0335, "step": 149340 }, { "epoch": 0.6231484340446128, "grad_norm": 0.6634245595124747, "learning_rate": 2.5337871136595777e-06, "loss": 0.0242, "step": 149345 }, { "epoch": 0.6231692967596031, "grad_norm": 0.8290982230829647, "learning_rate": 2.5337446994431965e-06, "loss": 0.0278, "step": 149350 }, { "epoch": 0.6231901594745933, "grad_norm": 0.6782414901858598, "learning_rate": 2.5337022873567156e-06, "loss": 0.0246, "step": 149355 }, { "epoch": 0.6232110221895837, "grad_norm": 0.7700167282186541, "learning_rate": 2.533659877399959e-06, "loss": 0.0232, "step": 149360 }, { "epoch": 0.6232318849045739, "grad_norm": 0.7733462875496014, "learning_rate": 2.5336174695727467e-06, "loss": 0.0217, "step": 149365 }, { "epoch": 0.6232527476195642, "grad_norm": 0.7493430395913465, "learning_rate": 2.5335750638749012e-06, "loss": 0.0206, "step": 149370 }, { "epoch": 0.6232736103345545, "grad_norm": 0.5126331300136608, "learning_rate": 2.5335326603062438e-06, "loss": 0.0201, "step": 149375 }, { "epoch": 0.6232944730495448, "grad_norm": 0.6402281764026587, "learning_rate": 2.5334902588665973e-06, "loss": 0.0192, "step": 149380 }, { "epoch": 0.623315335764535, "grad_norm": 0.9086707464066389, "learning_rate": 2.5334478595557826e-06, "loss": 0.0269, "step": 149385 }, { "epoch": 0.6233361984795254, "grad_norm": 0.7677468874786045, "learning_rate": 2.5334054623736214e-06, "loss": 0.0191, "step": 149390 }, { "epoch": 0.6233570611945156, "grad_norm": 0.6913685965281685, "learning_rate": 2.5333630673199368e-06, "loss": 0.0195, "step": 149395 }, { "epoch": 0.6233779239095059, "grad_norm": 0.8186433484946622, "learning_rate": 2.53332067439455e-06, "loss": 0.0267, "step": 149400 }, { "epoch": 0.6233987866244962, "grad_norm": 0.6727128099462941, "learning_rate": 2.5332782835972824e-06, "loss": 0.0251, "step": 149405 }, { "epoch": 0.6234196493394865, "grad_norm": 0.8394684018464459, "learning_rate": 2.533235894927957e-06, "loss": 0.0262, "step": 149410 }, { "epoch": 0.6234405120544767, "grad_norm": 0.7521679036285177, "learning_rate": 2.5331935083863947e-06, "loss": 0.0222, "step": 149415 }, { "epoch": 0.623461374769467, "grad_norm": 0.5859306017906066, "learning_rate": 2.5331511239724184e-06, "loss": 0.0173, "step": 149420 }, { "epoch": 0.6234822374844573, "grad_norm": 0.4413874508032147, "learning_rate": 2.5331087416858496e-06, "loss": 0.0182, "step": 149425 }, { "epoch": 0.6235031001994475, "grad_norm": 0.5454768920564327, "learning_rate": 2.5330663615265112e-06, "loss": 0.0264, "step": 149430 }, { "epoch": 0.6235239629144378, "grad_norm": 0.5798036360759113, "learning_rate": 2.533023983494224e-06, "loss": 0.0245, "step": 149435 }, { "epoch": 0.6235448256294281, "grad_norm": 0.31615758261331744, "learning_rate": 2.5329816075888103e-06, "loss": 0.0164, "step": 149440 }, { "epoch": 0.6235656883444184, "grad_norm": 1.0378879397149754, "learning_rate": 2.5329392338100933e-06, "loss": 0.0297, "step": 149445 }, { "epoch": 0.6235865510594086, "grad_norm": 0.9163822317983191, "learning_rate": 2.5328968621578935e-06, "loss": 0.0202, "step": 149450 }, { "epoch": 0.623607413774399, "grad_norm": 1.0672476491622123, "learning_rate": 2.532854492632035e-06, "loss": 0.0303, "step": 149455 }, { "epoch": 0.6236282764893892, "grad_norm": 0.5790416115761242, "learning_rate": 2.5328121252323377e-06, "loss": 0.0169, "step": 149460 }, { "epoch": 0.6236491392043795, "grad_norm": 1.071255519285932, "learning_rate": 2.5327697599586255e-06, "loss": 0.0328, "step": 149465 }, { "epoch": 0.6236700019193698, "grad_norm": 0.7647208690536073, "learning_rate": 2.5327273968107204e-06, "loss": 0.0232, "step": 149470 }, { "epoch": 0.6236908646343601, "grad_norm": 0.6915417059916533, "learning_rate": 2.5326850357884436e-06, "loss": 0.0222, "step": 149475 }, { "epoch": 0.6237117273493503, "grad_norm": 0.4171721892963287, "learning_rate": 2.5326426768916186e-06, "loss": 0.0131, "step": 149480 }, { "epoch": 0.6237325900643406, "grad_norm": 0.4930070137124028, "learning_rate": 2.5326003201200673e-06, "loss": 0.0254, "step": 149485 }, { "epoch": 0.6237534527793309, "grad_norm": 0.876226156231495, "learning_rate": 2.532557965473611e-06, "loss": 0.0174, "step": 149490 }, { "epoch": 0.6237743154943212, "grad_norm": 0.6492780036876326, "learning_rate": 2.5325156129520733e-06, "loss": 0.0184, "step": 149495 }, { "epoch": 0.6237951782093114, "grad_norm": 0.5760947297027365, "learning_rate": 2.5324732625552755e-06, "loss": 0.0208, "step": 149500 }, { "epoch": 0.6238160409243018, "grad_norm": 0.8633864936416618, "learning_rate": 2.532430914283041e-06, "loss": 0.0257, "step": 149505 }, { "epoch": 0.623836903639292, "grad_norm": 0.8029438181560975, "learning_rate": 2.5323885681351917e-06, "loss": 0.0233, "step": 149510 }, { "epoch": 0.6238577663542823, "grad_norm": 0.587454278006111, "learning_rate": 2.5323462241115494e-06, "loss": 0.0216, "step": 149515 }, { "epoch": 0.6238786290692726, "grad_norm": 0.6193203493673347, "learning_rate": 2.5323038822119376e-06, "loss": 0.0233, "step": 149520 }, { "epoch": 0.6238994917842628, "grad_norm": 0.8543170258783251, "learning_rate": 2.532261542436178e-06, "loss": 0.0293, "step": 149525 }, { "epoch": 0.6239203544992531, "grad_norm": 0.8089950120902292, "learning_rate": 2.5322192047840928e-06, "loss": 0.0311, "step": 149530 }, { "epoch": 0.6239412172142433, "grad_norm": 0.7349833577450137, "learning_rate": 2.5321768692555053e-06, "loss": 0.0289, "step": 149535 }, { "epoch": 0.6239620799292337, "grad_norm": 0.2854263305570546, "learning_rate": 2.5321345358502376e-06, "loss": 0.0249, "step": 149540 }, { "epoch": 0.6239829426442239, "grad_norm": 0.730148772201767, "learning_rate": 2.532092204568112e-06, "loss": 0.0237, "step": 149545 }, { "epoch": 0.6240038053592142, "grad_norm": 1.0395514608045702, "learning_rate": 2.532049875408951e-06, "loss": 0.0262, "step": 149550 }, { "epoch": 0.6240246680742045, "grad_norm": 0.8513665746587666, "learning_rate": 2.532007548372579e-06, "loss": 0.0231, "step": 149555 }, { "epoch": 0.6240455307891948, "grad_norm": 0.5584027639662623, "learning_rate": 2.531965223458815e-06, "loss": 0.0237, "step": 149560 }, { "epoch": 0.624066393504185, "grad_norm": 0.7719853804332104, "learning_rate": 2.5319229006674843e-06, "loss": 0.0184, "step": 149565 }, { "epoch": 0.6240872562191754, "grad_norm": 0.6565576993644391, "learning_rate": 2.5318805799984094e-06, "loss": 0.0232, "step": 149570 }, { "epoch": 0.6241081189341656, "grad_norm": 1.1688667590871635, "learning_rate": 2.5318382614514116e-06, "loss": 0.0275, "step": 149575 }, { "epoch": 0.6241289816491559, "grad_norm": 0.904417967328885, "learning_rate": 2.531795945026314e-06, "loss": 0.0244, "step": 149580 }, { "epoch": 0.6241498443641462, "grad_norm": 0.4946770149957946, "learning_rate": 2.5317536307229403e-06, "loss": 0.0261, "step": 149585 }, { "epoch": 0.6241707070791365, "grad_norm": 0.569037816042347, "learning_rate": 2.531711318541112e-06, "loss": 0.0204, "step": 149590 }, { "epoch": 0.6241915697941267, "grad_norm": 1.147385676447527, "learning_rate": 2.5316690084806527e-06, "loss": 0.0269, "step": 149595 }, { "epoch": 0.624212432509117, "grad_norm": 0.28880398624557524, "learning_rate": 2.531626700541385e-06, "loss": 0.0281, "step": 149600 }, { "epoch": 0.6242332952241073, "grad_norm": 0.7217439989914736, "learning_rate": 2.531584394723131e-06, "loss": 0.0166, "step": 149605 }, { "epoch": 0.6242541579390976, "grad_norm": 0.42853130359400005, "learning_rate": 2.531542091025714e-06, "loss": 0.0233, "step": 149610 }, { "epoch": 0.6242750206540878, "grad_norm": 1.0157577392786479, "learning_rate": 2.531499789448956e-06, "loss": 0.0312, "step": 149615 }, { "epoch": 0.6242958833690782, "grad_norm": 1.031127729149408, "learning_rate": 2.531457489992681e-06, "loss": 0.0218, "step": 149620 }, { "epoch": 0.6243167460840684, "grad_norm": 0.9250883161540157, "learning_rate": 2.531415192656712e-06, "loss": 0.0226, "step": 149625 }, { "epoch": 0.6243376087990586, "grad_norm": 0.993149143225724, "learning_rate": 2.5313728974408708e-06, "loss": 0.0253, "step": 149630 }, { "epoch": 0.624358471514049, "grad_norm": 0.677612776954122, "learning_rate": 2.5313306043449807e-06, "loss": 0.0279, "step": 149635 }, { "epoch": 0.6243793342290392, "grad_norm": 0.6067629818011236, "learning_rate": 2.5312883133688647e-06, "loss": 0.0214, "step": 149640 }, { "epoch": 0.6244001969440295, "grad_norm": 0.626080809189555, "learning_rate": 2.5312460245123453e-06, "loss": 0.0198, "step": 149645 }, { "epoch": 0.6244210596590198, "grad_norm": 0.7843059890166508, "learning_rate": 2.5312037377752464e-06, "loss": 0.0272, "step": 149650 }, { "epoch": 0.6244419223740101, "grad_norm": 0.41262681658212685, "learning_rate": 2.5311614531573904e-06, "loss": 0.0166, "step": 149655 }, { "epoch": 0.6244627850890003, "grad_norm": 0.9386186592929688, "learning_rate": 2.5311191706586004e-06, "loss": 0.0243, "step": 149660 }, { "epoch": 0.6244836478039906, "grad_norm": 0.7507543148857113, "learning_rate": 2.5310768902786984e-06, "loss": 0.0253, "step": 149665 }, { "epoch": 0.6245045105189809, "grad_norm": 0.7643022341215688, "learning_rate": 2.5310346120175096e-06, "loss": 0.0237, "step": 149670 }, { "epoch": 0.6245253732339712, "grad_norm": 0.760544794470462, "learning_rate": 2.530992335874855e-06, "loss": 0.0235, "step": 149675 }, { "epoch": 0.6245462359489614, "grad_norm": 0.404616115064011, "learning_rate": 2.5309500618505583e-06, "loss": 0.0215, "step": 149680 }, { "epoch": 0.6245670986639518, "grad_norm": 0.5969868758129432, "learning_rate": 2.5309077899444434e-06, "loss": 0.028, "step": 149685 }, { "epoch": 0.624587961378942, "grad_norm": 1.4007775959839333, "learning_rate": 2.530865520156332e-06, "loss": 0.0289, "step": 149690 }, { "epoch": 0.6246088240939323, "grad_norm": 1.5017001099716374, "learning_rate": 2.5308232524860487e-06, "loss": 0.0289, "step": 149695 }, { "epoch": 0.6246296868089226, "grad_norm": 0.6561957309582789, "learning_rate": 2.5307809869334153e-06, "loss": 0.0242, "step": 149700 }, { "epoch": 0.6246505495239129, "grad_norm": 0.9275363845883545, "learning_rate": 2.5307387234982567e-06, "loss": 0.0278, "step": 149705 }, { "epoch": 0.6246714122389031, "grad_norm": 0.6076796970987232, "learning_rate": 2.530696462180394e-06, "loss": 0.0182, "step": 149710 }, { "epoch": 0.6246922749538933, "grad_norm": 0.43347600401916725, "learning_rate": 2.5306542029796523e-06, "loss": 0.0199, "step": 149715 }, { "epoch": 0.6247131376688837, "grad_norm": 0.48734939903114627, "learning_rate": 2.530611945895854e-06, "loss": 0.026, "step": 149720 }, { "epoch": 0.6247340003838739, "grad_norm": 0.5379859823840119, "learning_rate": 2.530569690928822e-06, "loss": 0.0202, "step": 149725 }, { "epoch": 0.6247548630988642, "grad_norm": 0.5794278904307915, "learning_rate": 2.53052743807838e-06, "loss": 0.0148, "step": 149730 }, { "epoch": 0.6247757258138545, "grad_norm": 0.5468189693416523, "learning_rate": 2.5304851873443514e-06, "loss": 0.0245, "step": 149735 }, { "epoch": 0.6247965885288448, "grad_norm": 0.7556822380052804, "learning_rate": 2.530442938726559e-06, "loss": 0.0223, "step": 149740 }, { "epoch": 0.624817451243835, "grad_norm": 0.5749515100004843, "learning_rate": 2.530400692224827e-06, "loss": 0.026, "step": 149745 }, { "epoch": 0.6248383139588254, "grad_norm": 0.4401873232131652, "learning_rate": 2.530358447838978e-06, "loss": 0.0177, "step": 149750 }, { "epoch": 0.6248591766738156, "grad_norm": 0.688494172915577, "learning_rate": 2.5303162055688355e-06, "loss": 0.0233, "step": 149755 }, { "epoch": 0.6248800393888059, "grad_norm": 0.4031699614163772, "learning_rate": 2.5302739654142237e-06, "loss": 0.0171, "step": 149760 }, { "epoch": 0.6249009021037962, "grad_norm": 0.5044520805705259, "learning_rate": 2.530231727374965e-06, "loss": 0.0174, "step": 149765 }, { "epoch": 0.6249217648187865, "grad_norm": 0.9703649282987962, "learning_rate": 2.5301894914508833e-06, "loss": 0.0386, "step": 149770 }, { "epoch": 0.6249426275337767, "grad_norm": 0.8092075688176406, "learning_rate": 2.5301472576418023e-06, "loss": 0.02, "step": 149775 }, { "epoch": 0.624963490248767, "grad_norm": 0.9494693592055888, "learning_rate": 2.5301050259475445e-06, "loss": 0.0258, "step": 149780 }, { "epoch": 0.6249843529637573, "grad_norm": 0.4914135549629349, "learning_rate": 2.530062796367935e-06, "loss": 0.0192, "step": 149785 }, { "epoch": 0.6250052156787476, "grad_norm": 0.6870655476009907, "learning_rate": 2.530020568902796e-06, "loss": 0.0219, "step": 149790 }, { "epoch": 0.6250260783937378, "grad_norm": 1.4198693446119974, "learning_rate": 2.5299783435519515e-06, "loss": 0.0207, "step": 149795 }, { "epoch": 0.6250469411087282, "grad_norm": 0.7937143923797514, "learning_rate": 2.529936120315225e-06, "loss": 0.0214, "step": 149800 }, { "epoch": 0.6250678038237184, "grad_norm": 0.4060306771049437, "learning_rate": 2.5298938991924405e-06, "loss": 0.0235, "step": 149805 }, { "epoch": 0.6250886665387086, "grad_norm": 1.3283234348660322, "learning_rate": 2.529851680183421e-06, "loss": 0.0316, "step": 149810 }, { "epoch": 0.625109529253699, "grad_norm": 0.992945061368871, "learning_rate": 2.5298094632879912e-06, "loss": 0.0264, "step": 149815 }, { "epoch": 0.6251303919686892, "grad_norm": 0.7212067243881894, "learning_rate": 2.5297672485059727e-06, "loss": 0.0268, "step": 149820 }, { "epoch": 0.6251512546836795, "grad_norm": 0.4022626071349889, "learning_rate": 2.5297250358371917e-06, "loss": 0.0637, "step": 149825 }, { "epoch": 0.6251721173986698, "grad_norm": 0.36319264116117606, "learning_rate": 2.52968282528147e-06, "loss": 0.0186, "step": 149830 }, { "epoch": 0.6251929801136601, "grad_norm": 0.7387965374021936, "learning_rate": 2.529640616838632e-06, "loss": 0.0231, "step": 149835 }, { "epoch": 0.6252138428286503, "grad_norm": 0.5577821189186195, "learning_rate": 2.529598410508501e-06, "loss": 0.018, "step": 149840 }, { "epoch": 0.6252347055436406, "grad_norm": 0.6905773095205514, "learning_rate": 2.5295562062909017e-06, "loss": 0.0282, "step": 149845 }, { "epoch": 0.6252555682586309, "grad_norm": 0.435151408939743, "learning_rate": 2.529514004185657e-06, "loss": 0.0195, "step": 149850 }, { "epoch": 0.6252764309736212, "grad_norm": 1.5720751316470094, "learning_rate": 2.529471804192591e-06, "loss": 0.0215, "step": 149855 }, { "epoch": 0.6252972936886114, "grad_norm": 0.9140814155561561, "learning_rate": 2.5294296063115276e-06, "loss": 0.0245, "step": 149860 }, { "epoch": 0.6253181564036018, "grad_norm": 0.44249969684645457, "learning_rate": 2.5293874105422906e-06, "loss": 0.0233, "step": 149865 }, { "epoch": 0.625339019118592, "grad_norm": 0.7683680498084234, "learning_rate": 2.5293452168847037e-06, "loss": 0.0148, "step": 149870 }, { "epoch": 0.6253598818335823, "grad_norm": 0.812139555606573, "learning_rate": 2.5293030253385908e-06, "loss": 0.0261, "step": 149875 }, { "epoch": 0.6253807445485726, "grad_norm": 0.5239941376034574, "learning_rate": 2.529260835903777e-06, "loss": 0.0213, "step": 149880 }, { "epoch": 0.6254016072635629, "grad_norm": 1.8580037233955617, "learning_rate": 2.529218648580084e-06, "loss": 0.0237, "step": 149885 }, { "epoch": 0.6254224699785531, "grad_norm": 0.5681398064703918, "learning_rate": 2.529176463367337e-06, "loss": 0.0246, "step": 149890 }, { "epoch": 0.6254433326935434, "grad_norm": 1.3287147661600416, "learning_rate": 2.52913428026536e-06, "loss": 0.0245, "step": 149895 }, { "epoch": 0.6254641954085337, "grad_norm": 0.7159721825358083, "learning_rate": 2.529092099273977e-06, "loss": 0.0316, "step": 149900 }, { "epoch": 0.625485058123524, "grad_norm": 0.7030362318871928, "learning_rate": 2.529049920393012e-06, "loss": 0.0317, "step": 149905 }, { "epoch": 0.6255059208385142, "grad_norm": 0.6586884731410377, "learning_rate": 2.529007743622288e-06, "loss": 0.0254, "step": 149910 }, { "epoch": 0.6255267835535046, "grad_norm": 0.6245199767765377, "learning_rate": 2.5289655689616305e-06, "loss": 0.0224, "step": 149915 }, { "epoch": 0.6255476462684948, "grad_norm": 1.2050873430588152, "learning_rate": 2.528923396410863e-06, "loss": 0.0178, "step": 149920 }, { "epoch": 0.625568508983485, "grad_norm": 0.7178642532897502, "learning_rate": 2.528881225969809e-06, "loss": 0.0225, "step": 149925 }, { "epoch": 0.6255893716984754, "grad_norm": 0.6131091866124058, "learning_rate": 2.5288390576382943e-06, "loss": 0.0247, "step": 149930 }, { "epoch": 0.6256102344134656, "grad_norm": 0.4168561076900699, "learning_rate": 2.528796891416141e-06, "loss": 0.0239, "step": 149935 }, { "epoch": 0.6256310971284559, "grad_norm": 0.457808080230006, "learning_rate": 2.528754727303174e-06, "loss": 0.0158, "step": 149940 }, { "epoch": 0.6256519598434462, "grad_norm": 0.5484915277177396, "learning_rate": 2.528712565299218e-06, "loss": 0.0209, "step": 149945 }, { "epoch": 0.6256728225584365, "grad_norm": 0.2631141633920251, "learning_rate": 2.5286704054040963e-06, "loss": 0.0193, "step": 149950 }, { "epoch": 0.6256936852734267, "grad_norm": 0.684508972319312, "learning_rate": 2.5286282476176343e-06, "loss": 0.018, "step": 149955 }, { "epoch": 0.625714547988417, "grad_norm": 2.0910032133032797, "learning_rate": 2.528586091939655e-06, "loss": 0.0255, "step": 149960 }, { "epoch": 0.6257354107034073, "grad_norm": 4.064449350023077, "learning_rate": 2.5285439383699835e-06, "loss": 0.0268, "step": 149965 }, { "epoch": 0.6257562734183976, "grad_norm": 0.773463230424506, "learning_rate": 2.528501786908443e-06, "loss": 0.0326, "step": 149970 }, { "epoch": 0.6257771361333878, "grad_norm": 0.7154107981244059, "learning_rate": 2.528459637554859e-06, "loss": 0.0211, "step": 149975 }, { "epoch": 0.6257979988483782, "grad_norm": 0.8487293949015893, "learning_rate": 2.528417490309055e-06, "loss": 0.0281, "step": 149980 }, { "epoch": 0.6258188615633684, "grad_norm": 0.477167852730226, "learning_rate": 2.5283753451708563e-06, "loss": 0.0181, "step": 149985 }, { "epoch": 0.6258397242783587, "grad_norm": 1.015244924885219, "learning_rate": 2.528333202140087e-06, "loss": 0.0297, "step": 149990 }, { "epoch": 0.625860586993349, "grad_norm": 1.7060160119170207, "learning_rate": 2.52829106121657e-06, "loss": 0.0249, "step": 149995 }, { "epoch": 0.6258814497083393, "grad_norm": 0.3531748911064582, "learning_rate": 2.5282489224001316e-06, "loss": 0.0201, "step": 150000 }, { "epoch": 0.6259023124233295, "grad_norm": 0.6782759868014678, "learning_rate": 2.5282067856905946e-06, "loss": 0.0251, "step": 150005 }, { "epoch": 0.6259231751383199, "grad_norm": 0.8730111084519849, "learning_rate": 2.5281646510877848e-06, "loss": 0.0204, "step": 150010 }, { "epoch": 0.6259440378533101, "grad_norm": 0.6849168608960031, "learning_rate": 2.528122518591526e-06, "loss": 0.0176, "step": 150015 }, { "epoch": 0.6259649005683003, "grad_norm": 1.0130034016080618, "learning_rate": 2.5280803882016425e-06, "loss": 0.0215, "step": 150020 }, { "epoch": 0.6259857632832906, "grad_norm": 0.7742408903798594, "learning_rate": 2.5280382599179594e-06, "loss": 0.0227, "step": 150025 }, { "epoch": 0.6260066259982809, "grad_norm": 1.2439644229854445, "learning_rate": 2.527996133740301e-06, "loss": 0.0315, "step": 150030 }, { "epoch": 0.6260274887132712, "grad_norm": 0.7642959850546688, "learning_rate": 2.527954009668491e-06, "loss": 0.0249, "step": 150035 }, { "epoch": 0.6260483514282614, "grad_norm": 0.48066565230256836, "learning_rate": 2.527911887702355e-06, "loss": 0.0215, "step": 150040 }, { "epoch": 0.6260692141432518, "grad_norm": 1.2497060988689914, "learning_rate": 2.5278697678417175e-06, "loss": 0.0307, "step": 150045 }, { "epoch": 0.626090076858242, "grad_norm": 0.7421338698765074, "learning_rate": 2.5278276500864028e-06, "loss": 0.0267, "step": 150050 }, { "epoch": 0.6261109395732323, "grad_norm": 0.93070201249471, "learning_rate": 2.527785534436235e-06, "loss": 0.0263, "step": 150055 }, { "epoch": 0.6261318022882226, "grad_norm": 0.6667023594422764, "learning_rate": 2.52774342089104e-06, "loss": 0.0209, "step": 150060 }, { "epoch": 0.6261526650032129, "grad_norm": 0.576829791091228, "learning_rate": 2.5277013094506414e-06, "loss": 0.0148, "step": 150065 }, { "epoch": 0.6261735277182031, "grad_norm": 0.6001273036471968, "learning_rate": 2.527659200114864e-06, "loss": 0.0212, "step": 150070 }, { "epoch": 0.6261943904331934, "grad_norm": 0.6239987506613026, "learning_rate": 2.527617092883533e-06, "loss": 0.024, "step": 150075 }, { "epoch": 0.6262152531481837, "grad_norm": 0.6193217911682551, "learning_rate": 2.527574987756473e-06, "loss": 0.0219, "step": 150080 }, { "epoch": 0.626236115863174, "grad_norm": 0.3958801398356713, "learning_rate": 2.5275328847335083e-06, "loss": 0.0162, "step": 150085 }, { "epoch": 0.6262569785781642, "grad_norm": 0.613318348145091, "learning_rate": 2.527490783814464e-06, "loss": 0.0205, "step": 150090 }, { "epoch": 0.6262778412931546, "grad_norm": 0.4080908280184131, "learning_rate": 2.5274486849991653e-06, "loss": 0.0163, "step": 150095 }, { "epoch": 0.6262987040081448, "grad_norm": 0.3997670209320662, "learning_rate": 2.527406588287436e-06, "loss": 0.0146, "step": 150100 }, { "epoch": 0.626319566723135, "grad_norm": 0.4405743734952237, "learning_rate": 2.527364493679102e-06, "loss": 0.0185, "step": 150105 }, { "epoch": 0.6263404294381254, "grad_norm": 0.6503528231079551, "learning_rate": 2.527322401173987e-06, "loss": 0.0217, "step": 150110 }, { "epoch": 0.6263612921531156, "grad_norm": 1.0070050223325346, "learning_rate": 2.5272803107719167e-06, "loss": 0.0193, "step": 150115 }, { "epoch": 0.6263821548681059, "grad_norm": 0.739114369908498, "learning_rate": 2.527238222472716e-06, "loss": 0.0197, "step": 150120 }, { "epoch": 0.6264030175830962, "grad_norm": 0.4303505129547347, "learning_rate": 2.5271961362762095e-06, "loss": 0.0215, "step": 150125 }, { "epoch": 0.6264238802980865, "grad_norm": 0.7941680076612418, "learning_rate": 2.5271540521822217e-06, "loss": 0.0218, "step": 150130 }, { "epoch": 0.6264447430130767, "grad_norm": 0.7699738606013784, "learning_rate": 2.527111970190579e-06, "loss": 0.0224, "step": 150135 }, { "epoch": 0.626465605728067, "grad_norm": 0.5922975434972956, "learning_rate": 2.5270698903011043e-06, "loss": 0.0174, "step": 150140 }, { "epoch": 0.6264864684430573, "grad_norm": 0.6523830592788893, "learning_rate": 2.5270278125136245e-06, "loss": 0.0183, "step": 150145 }, { "epoch": 0.6265073311580476, "grad_norm": 0.5794232425471463, "learning_rate": 2.5269857368279625e-06, "loss": 0.0173, "step": 150150 }, { "epoch": 0.6265281938730378, "grad_norm": 0.36999493455902277, "learning_rate": 2.526943663243946e-06, "loss": 0.0195, "step": 150155 }, { "epoch": 0.6265490565880282, "grad_norm": 0.7057409183523359, "learning_rate": 2.526901591761398e-06, "loss": 0.0186, "step": 150160 }, { "epoch": 0.6265699193030184, "grad_norm": 0.8494309998476491, "learning_rate": 2.5268595223801444e-06, "loss": 0.0319, "step": 150165 }, { "epoch": 0.6265907820180087, "grad_norm": 0.7472615912959457, "learning_rate": 2.52681745510001e-06, "loss": 0.024, "step": 150170 }, { "epoch": 0.626611644732999, "grad_norm": 0.7838201058958407, "learning_rate": 2.52677538992082e-06, "loss": 0.0196, "step": 150175 }, { "epoch": 0.6266325074479893, "grad_norm": 0.5341545626450384, "learning_rate": 2.5267333268424e-06, "loss": 0.0186, "step": 150180 }, { "epoch": 0.6266533701629795, "grad_norm": 0.7718251158847009, "learning_rate": 2.5266912658645744e-06, "loss": 0.0185, "step": 150185 }, { "epoch": 0.6266742328779699, "grad_norm": 0.6210889472975559, "learning_rate": 2.5266492069871683e-06, "loss": 0.024, "step": 150190 }, { "epoch": 0.6266950955929601, "grad_norm": 0.3785372053841093, "learning_rate": 2.5266071502100082e-06, "loss": 0.0248, "step": 150195 }, { "epoch": 0.6267159583079503, "grad_norm": 0.9912390119440063, "learning_rate": 2.5265650955329173e-06, "loss": 0.0244, "step": 150200 }, { "epoch": 0.6267368210229406, "grad_norm": 0.49059059168937, "learning_rate": 2.5265230429557223e-06, "loss": 0.0258, "step": 150205 }, { "epoch": 0.626757683737931, "grad_norm": 0.8809457957895491, "learning_rate": 2.526480992478248e-06, "loss": 0.0276, "step": 150210 }, { "epoch": 0.6267785464529212, "grad_norm": 0.6789656617993278, "learning_rate": 2.52643894410032e-06, "loss": 0.0306, "step": 150215 }, { "epoch": 0.6267994091679114, "grad_norm": 0.7809188320570629, "learning_rate": 2.5263968978217627e-06, "loss": 0.0211, "step": 150220 }, { "epoch": 0.6268202718829018, "grad_norm": 1.2282913983860415, "learning_rate": 2.5263548536424026e-06, "loss": 0.0278, "step": 150225 }, { "epoch": 0.626841134597892, "grad_norm": 2.69938441639111, "learning_rate": 2.5263128115620642e-06, "loss": 0.0257, "step": 150230 }, { "epoch": 0.6268619973128823, "grad_norm": 0.42748835796375473, "learning_rate": 2.5262707715805736e-06, "loss": 0.0182, "step": 150235 }, { "epoch": 0.6268828600278726, "grad_norm": 0.5108688752441306, "learning_rate": 2.5262287336977552e-06, "loss": 0.019, "step": 150240 }, { "epoch": 0.6269037227428629, "grad_norm": 0.5392083633255172, "learning_rate": 2.5261866979134352e-06, "loss": 0.0254, "step": 150245 }, { "epoch": 0.6269245854578531, "grad_norm": 0.6717040956280883, "learning_rate": 2.5261446642274386e-06, "loss": 0.0176, "step": 150250 }, { "epoch": 0.6269454481728434, "grad_norm": 0.41003737763077563, "learning_rate": 2.5261026326395904e-06, "loss": 0.023, "step": 150255 }, { "epoch": 0.6269663108878337, "grad_norm": 0.6308175546838273, "learning_rate": 2.526060603149717e-06, "loss": 0.0202, "step": 150260 }, { "epoch": 0.626987173602824, "grad_norm": 0.8312888085220541, "learning_rate": 2.526018575757643e-06, "loss": 0.0243, "step": 150265 }, { "epoch": 0.6270080363178142, "grad_norm": 0.870705492455135, "learning_rate": 2.525976550463195e-06, "loss": 0.028, "step": 150270 }, { "epoch": 0.6270288990328046, "grad_norm": 0.74998124248348, "learning_rate": 2.5259345272661977e-06, "loss": 0.0209, "step": 150275 }, { "epoch": 0.6270497617477948, "grad_norm": 0.43406566938840846, "learning_rate": 2.5258925061664767e-06, "loss": 0.0188, "step": 150280 }, { "epoch": 0.627070624462785, "grad_norm": 0.5640772230982192, "learning_rate": 2.525850487163858e-06, "loss": 0.0144, "step": 150285 }, { "epoch": 0.6270914871777754, "grad_norm": 0.9717676132015833, "learning_rate": 2.525808470258166e-06, "loss": 0.0333, "step": 150290 }, { "epoch": 0.6271123498927657, "grad_norm": 0.43171294621238876, "learning_rate": 2.5257664554492277e-06, "loss": 0.0293, "step": 150295 }, { "epoch": 0.6271332126077559, "grad_norm": 0.3031244110100691, "learning_rate": 2.5257244427368684e-06, "loss": 0.0232, "step": 150300 }, { "epoch": 0.6271540753227463, "grad_norm": 0.4077040186693633, "learning_rate": 2.5256824321209123e-06, "loss": 0.0209, "step": 150305 }, { "epoch": 0.6271749380377365, "grad_norm": 0.4254030878215296, "learning_rate": 2.5256404236011874e-06, "loss": 0.0234, "step": 150310 }, { "epoch": 0.6271958007527267, "grad_norm": 0.35217980771086027, "learning_rate": 2.525598417177518e-06, "loss": 0.0212, "step": 150315 }, { "epoch": 0.627216663467717, "grad_norm": 0.6647072333514855, "learning_rate": 2.5255564128497293e-06, "loss": 0.0179, "step": 150320 }, { "epoch": 0.6272375261827073, "grad_norm": 0.6822345334900407, "learning_rate": 2.525514410617648e-06, "loss": 0.0176, "step": 150325 }, { "epoch": 0.6272583888976976, "grad_norm": 0.5509671666442058, "learning_rate": 2.5254724104811e-06, "loss": 0.0207, "step": 150330 }, { "epoch": 0.6272792516126878, "grad_norm": 0.49563043107164123, "learning_rate": 2.5254304124399106e-06, "loss": 0.0208, "step": 150335 }, { "epoch": 0.6273001143276782, "grad_norm": 0.6406525391062856, "learning_rate": 2.5253884164939057e-06, "loss": 0.0182, "step": 150340 }, { "epoch": 0.6273209770426684, "grad_norm": 1.0899394652518009, "learning_rate": 2.525346422642911e-06, "loss": 0.0305, "step": 150345 }, { "epoch": 0.6273418397576587, "grad_norm": 0.29985229903978244, "learning_rate": 2.525304430886752e-06, "loss": 0.0227, "step": 150350 }, { "epoch": 0.627362702472649, "grad_norm": 0.5024717276143161, "learning_rate": 2.5252624412252547e-06, "loss": 0.0281, "step": 150355 }, { "epoch": 0.6273835651876393, "grad_norm": 0.7892326022230016, "learning_rate": 2.5252204536582453e-06, "loss": 0.0221, "step": 150360 }, { "epoch": 0.6274044279026295, "grad_norm": 0.8170557738966953, "learning_rate": 2.5251784681855496e-06, "loss": 0.0229, "step": 150365 }, { "epoch": 0.6274252906176199, "grad_norm": 0.5235212273632882, "learning_rate": 2.5251364848069932e-06, "loss": 0.0292, "step": 150370 }, { "epoch": 0.6274461533326101, "grad_norm": 0.46976569742923846, "learning_rate": 2.5250945035224023e-06, "loss": 0.0204, "step": 150375 }, { "epoch": 0.6274670160476004, "grad_norm": 0.5492662942708074, "learning_rate": 2.5250525243316027e-06, "loss": 0.021, "step": 150380 }, { "epoch": 0.6274878787625906, "grad_norm": 0.8061478089989691, "learning_rate": 2.5250105472344212e-06, "loss": 0.0265, "step": 150385 }, { "epoch": 0.627508741477581, "grad_norm": 0.5556062263232094, "learning_rate": 2.524968572230682e-06, "loss": 0.0217, "step": 150390 }, { "epoch": 0.6275296041925712, "grad_norm": 0.6609545243700198, "learning_rate": 2.5249265993202125e-06, "loss": 0.023, "step": 150395 }, { "epoch": 0.6275504669075614, "grad_norm": 0.2525310313635344, "learning_rate": 2.524884628502839e-06, "loss": 0.0182, "step": 150400 }, { "epoch": 0.6275713296225518, "grad_norm": 0.663875968708128, "learning_rate": 2.5248426597783854e-06, "loss": 0.0316, "step": 150405 }, { "epoch": 0.627592192337542, "grad_norm": 0.8755904875531292, "learning_rate": 2.52480069314668e-06, "loss": 0.0257, "step": 150410 }, { "epoch": 0.6276130550525323, "grad_norm": 0.7415416038571232, "learning_rate": 2.524758728607548e-06, "loss": 0.0203, "step": 150415 }, { "epoch": 0.6276339177675226, "grad_norm": 0.6353780916551494, "learning_rate": 2.524716766160816e-06, "loss": 0.0227, "step": 150420 }, { "epoch": 0.6276547804825129, "grad_norm": 0.5505809475039977, "learning_rate": 2.52467480580631e-06, "loss": 0.0208, "step": 150425 }, { "epoch": 0.6276756431975031, "grad_norm": 0.7333642593054127, "learning_rate": 2.5246328475438555e-06, "loss": 0.0188, "step": 150430 }, { "epoch": 0.6276965059124934, "grad_norm": 0.6603575825331284, "learning_rate": 2.5245908913732786e-06, "loss": 0.0197, "step": 150435 }, { "epoch": 0.6277173686274837, "grad_norm": 0.7816400039111451, "learning_rate": 2.5245489372944067e-06, "loss": 0.0226, "step": 150440 }, { "epoch": 0.627738231342474, "grad_norm": 0.5868239024815917, "learning_rate": 2.5245069853070647e-06, "loss": 0.0212, "step": 150445 }, { "epoch": 0.6277590940574642, "grad_norm": 0.5105868357097513, "learning_rate": 2.5244650354110795e-06, "loss": 0.0276, "step": 150450 }, { "epoch": 0.6277799567724546, "grad_norm": 0.9072260992063214, "learning_rate": 2.524423087606277e-06, "loss": 0.0245, "step": 150455 }, { "epoch": 0.6278008194874448, "grad_norm": 0.49025718990343986, "learning_rate": 2.524381141892484e-06, "loss": 0.0137, "step": 150460 }, { "epoch": 0.6278216822024351, "grad_norm": 0.5807792199853657, "learning_rate": 2.5243391982695265e-06, "loss": 0.0224, "step": 150465 }, { "epoch": 0.6278425449174254, "grad_norm": 0.8017867479584913, "learning_rate": 2.5242972567372303e-06, "loss": 0.0278, "step": 150470 }, { "epoch": 0.6278634076324157, "grad_norm": 0.6042524720202771, "learning_rate": 2.5242553172954228e-06, "loss": 0.0212, "step": 150475 }, { "epoch": 0.6278842703474059, "grad_norm": 0.4625807177815739, "learning_rate": 2.5242133799439295e-06, "loss": 0.0252, "step": 150480 }, { "epoch": 0.6279051330623963, "grad_norm": 0.6582740218389469, "learning_rate": 2.5241714446825765e-06, "loss": 0.0214, "step": 150485 }, { "epoch": 0.6279259957773865, "grad_norm": 0.48988518831616906, "learning_rate": 2.5241295115111913e-06, "loss": 0.0178, "step": 150490 }, { "epoch": 0.6279468584923767, "grad_norm": 0.8504364535406007, "learning_rate": 2.5240875804296e-06, "loss": 0.0186, "step": 150495 }, { "epoch": 0.627967721207367, "grad_norm": 0.5457332469293318, "learning_rate": 2.524045651437628e-06, "loss": 0.0244, "step": 150500 }, { "epoch": 0.6279885839223573, "grad_norm": 0.7650895235224213, "learning_rate": 2.5240037245351027e-06, "loss": 0.0262, "step": 150505 }, { "epoch": 0.6280094466373476, "grad_norm": 0.49258236151481666, "learning_rate": 2.5239617997218507e-06, "loss": 0.0185, "step": 150510 }, { "epoch": 0.6280303093523378, "grad_norm": 0.7358460352075273, "learning_rate": 2.523919876997698e-06, "loss": 0.0217, "step": 150515 }, { "epoch": 0.6280511720673282, "grad_norm": 0.6237078806099227, "learning_rate": 2.523877956362471e-06, "loss": 0.0172, "step": 150520 }, { "epoch": 0.6280720347823184, "grad_norm": 0.5827184840838785, "learning_rate": 2.5238360378159964e-06, "loss": 0.0224, "step": 150525 }, { "epoch": 0.6280928974973087, "grad_norm": 0.5516340398277729, "learning_rate": 2.5237941213581006e-06, "loss": 0.0234, "step": 150530 }, { "epoch": 0.628113760212299, "grad_norm": 0.47954090904214847, "learning_rate": 2.523752206988611e-06, "loss": 0.0165, "step": 150535 }, { "epoch": 0.6281346229272893, "grad_norm": 0.37350391886975753, "learning_rate": 2.5237102947073533e-06, "loss": 0.0191, "step": 150540 }, { "epoch": 0.6281554856422795, "grad_norm": 0.6480029676012804, "learning_rate": 2.5236683845141542e-06, "loss": 0.0218, "step": 150545 }, { "epoch": 0.6281763483572699, "grad_norm": 0.6615817921566748, "learning_rate": 2.523626476408841e-06, "loss": 0.0214, "step": 150550 }, { "epoch": 0.6281972110722601, "grad_norm": 0.8740119694852808, "learning_rate": 2.5235845703912394e-06, "loss": 0.0197, "step": 150555 }, { "epoch": 0.6282180737872504, "grad_norm": 1.0093567696092798, "learning_rate": 2.523542666461176e-06, "loss": 0.0312, "step": 150560 }, { "epoch": 0.6282389365022406, "grad_norm": 0.8609429037096926, "learning_rate": 2.5235007646184788e-06, "loss": 0.0316, "step": 150565 }, { "epoch": 0.628259799217231, "grad_norm": 0.6970107486847751, "learning_rate": 2.5234588648629736e-06, "loss": 0.0228, "step": 150570 }, { "epoch": 0.6282806619322212, "grad_norm": 0.20046436004492335, "learning_rate": 2.5234169671944873e-06, "loss": 0.017, "step": 150575 }, { "epoch": 0.6283015246472115, "grad_norm": 0.3741624579194202, "learning_rate": 2.5233750716128463e-06, "loss": 0.0183, "step": 150580 }, { "epoch": 0.6283223873622018, "grad_norm": 0.6654211631702557, "learning_rate": 2.5233331781178777e-06, "loss": 0.0277, "step": 150585 }, { "epoch": 0.628343250077192, "grad_norm": 0.8267740257772329, "learning_rate": 2.5232912867094087e-06, "loss": 0.0287, "step": 150590 }, { "epoch": 0.6283641127921823, "grad_norm": 0.7116676737605335, "learning_rate": 2.5232493973872653e-06, "loss": 0.0285, "step": 150595 }, { "epoch": 0.6283849755071726, "grad_norm": 0.9101763260316981, "learning_rate": 2.5232075101512748e-06, "loss": 0.0276, "step": 150600 }, { "epoch": 0.6284058382221629, "grad_norm": 0.7512995865664844, "learning_rate": 2.5231656250012637e-06, "loss": 0.0231, "step": 150605 }, { "epoch": 0.6284267009371531, "grad_norm": 0.822510061898177, "learning_rate": 2.523123741937059e-06, "loss": 0.0201, "step": 150610 }, { "epoch": 0.6284475636521434, "grad_norm": 0.9272461725144762, "learning_rate": 2.523081860958488e-06, "loss": 0.0306, "step": 150615 }, { "epoch": 0.6284684263671337, "grad_norm": 0.5582011372750553, "learning_rate": 2.523039982065377e-06, "loss": 0.0201, "step": 150620 }, { "epoch": 0.628489289082124, "grad_norm": 0.42212359115740117, "learning_rate": 2.5229981052575535e-06, "loss": 0.0217, "step": 150625 }, { "epoch": 0.6285101517971142, "grad_norm": 0.40813596257039114, "learning_rate": 2.522956230534845e-06, "loss": 0.0245, "step": 150630 }, { "epoch": 0.6285310145121046, "grad_norm": 0.42632950412895226, "learning_rate": 2.522914357897076e-06, "loss": 0.0246, "step": 150635 }, { "epoch": 0.6285518772270948, "grad_norm": 0.7276414062014855, "learning_rate": 2.5228724873440765e-06, "loss": 0.0174, "step": 150640 }, { "epoch": 0.6285727399420851, "grad_norm": 0.540525033154977, "learning_rate": 2.5228306188756712e-06, "loss": 0.0223, "step": 150645 }, { "epoch": 0.6285936026570754, "grad_norm": 0.7137287350327561, "learning_rate": 2.5227887524916887e-06, "loss": 0.0358, "step": 150650 }, { "epoch": 0.6286144653720657, "grad_norm": 0.7104187604018939, "learning_rate": 2.5227468881919553e-06, "loss": 0.0228, "step": 150655 }, { "epoch": 0.6286353280870559, "grad_norm": 0.6566811218613575, "learning_rate": 2.5227050259762986e-06, "loss": 0.0235, "step": 150660 }, { "epoch": 0.6286561908020463, "grad_norm": 0.6979239104885261, "learning_rate": 2.5226631658445445e-06, "loss": 0.0263, "step": 150665 }, { "epoch": 0.6286770535170365, "grad_norm": 0.5646897912411702, "learning_rate": 2.522621307796521e-06, "loss": 0.0182, "step": 150670 }, { "epoch": 0.6286979162320268, "grad_norm": 0.6374501555002339, "learning_rate": 2.522579451832056e-06, "loss": 0.0257, "step": 150675 }, { "epoch": 0.628718778947017, "grad_norm": 0.8516691963337303, "learning_rate": 2.5225375979509752e-06, "loss": 0.02, "step": 150680 }, { "epoch": 0.6287396416620074, "grad_norm": 0.45277679818805977, "learning_rate": 2.5224957461531063e-06, "loss": 0.0271, "step": 150685 }, { "epoch": 0.6287605043769976, "grad_norm": 0.47031928517334354, "learning_rate": 2.5224538964382765e-06, "loss": 0.018, "step": 150690 }, { "epoch": 0.6287813670919878, "grad_norm": 0.49494619700154907, "learning_rate": 2.522412048806314e-06, "loss": 0.0177, "step": 150695 }, { "epoch": 0.6288022298069782, "grad_norm": 0.5694460968954489, "learning_rate": 2.5223702032570442e-06, "loss": 0.0156, "step": 150700 }, { "epoch": 0.6288230925219684, "grad_norm": 0.7416491013440178, "learning_rate": 2.522328359790295e-06, "loss": 0.022, "step": 150705 }, { "epoch": 0.6288439552369587, "grad_norm": 0.8256127163480118, "learning_rate": 2.5222865184058944e-06, "loss": 0.0189, "step": 150710 }, { "epoch": 0.628864817951949, "grad_norm": 0.5785763205862507, "learning_rate": 2.5222446791036694e-06, "loss": 0.0197, "step": 150715 }, { "epoch": 0.6288856806669393, "grad_norm": 0.6368824650416148, "learning_rate": 2.5222028418834467e-06, "loss": 0.0279, "step": 150720 }, { "epoch": 0.6289065433819295, "grad_norm": 0.7960144702709759, "learning_rate": 2.522161006745054e-06, "loss": 0.0238, "step": 150725 }, { "epoch": 0.6289274060969199, "grad_norm": 0.6200301837035886, "learning_rate": 2.522119173688319e-06, "loss": 0.0206, "step": 150730 }, { "epoch": 0.6289482688119101, "grad_norm": 0.4704917367168568, "learning_rate": 2.522077342713069e-06, "loss": 0.0173, "step": 150735 }, { "epoch": 0.6289691315269004, "grad_norm": 0.36109067187564076, "learning_rate": 2.522035513819131e-06, "loss": 0.021, "step": 150740 }, { "epoch": 0.6289899942418906, "grad_norm": 0.6344436822705843, "learning_rate": 2.5219936870063322e-06, "loss": 0.0222, "step": 150745 }, { "epoch": 0.629010856956881, "grad_norm": 0.42096382915333225, "learning_rate": 2.5219518622745008e-06, "loss": 0.026, "step": 150750 }, { "epoch": 0.6290317196718712, "grad_norm": 0.2706396637963089, "learning_rate": 2.521910039623464e-06, "loss": 0.0222, "step": 150755 }, { "epoch": 0.6290525823868615, "grad_norm": 0.8272386665588379, "learning_rate": 2.5218682190530484e-06, "loss": 0.0245, "step": 150760 }, { "epoch": 0.6290734451018518, "grad_norm": 0.4908982656979357, "learning_rate": 2.521826400563083e-06, "loss": 0.0195, "step": 150765 }, { "epoch": 0.6290943078168421, "grad_norm": 0.3537291100270262, "learning_rate": 2.5217845841533944e-06, "loss": 0.0321, "step": 150770 }, { "epoch": 0.6291151705318323, "grad_norm": 0.41697383136065036, "learning_rate": 2.5217427698238102e-06, "loss": 0.015, "step": 150775 }, { "epoch": 0.6291360332468227, "grad_norm": 0.6340888764205685, "learning_rate": 2.521700957574158e-06, "loss": 0.0221, "step": 150780 }, { "epoch": 0.6291568959618129, "grad_norm": 0.7911502622117416, "learning_rate": 2.5216591474042656e-06, "loss": 0.0417, "step": 150785 }, { "epoch": 0.6291777586768031, "grad_norm": 0.5028564906474468, "learning_rate": 2.5216173393139604e-06, "loss": 0.0152, "step": 150790 }, { "epoch": 0.6291986213917934, "grad_norm": 0.5362682719068389, "learning_rate": 2.5215755333030694e-06, "loss": 0.0246, "step": 150795 }, { "epoch": 0.6292194841067837, "grad_norm": 0.7701135246687937, "learning_rate": 2.5215337293714216e-06, "loss": 0.0233, "step": 150800 }, { "epoch": 0.629240346821774, "grad_norm": 0.5160527986866442, "learning_rate": 2.5214919275188436e-06, "loss": 0.0192, "step": 150805 }, { "epoch": 0.6292612095367642, "grad_norm": 0.48204640717614866, "learning_rate": 2.521450127745163e-06, "loss": 0.0224, "step": 150810 }, { "epoch": 0.6292820722517546, "grad_norm": 0.421653332127389, "learning_rate": 2.5214083300502084e-06, "loss": 0.0304, "step": 150815 }, { "epoch": 0.6293029349667448, "grad_norm": 0.3766046280535397, "learning_rate": 2.5213665344338067e-06, "loss": 0.0199, "step": 150820 }, { "epoch": 0.6293237976817351, "grad_norm": 0.38420174186207123, "learning_rate": 2.521324740895786e-06, "loss": 0.0203, "step": 150825 }, { "epoch": 0.6293446603967254, "grad_norm": 0.830645303535401, "learning_rate": 2.5212829494359736e-06, "loss": 0.03, "step": 150830 }, { "epoch": 0.6293655231117157, "grad_norm": 0.5119187301861853, "learning_rate": 2.521241160054198e-06, "loss": 0.0261, "step": 150835 }, { "epoch": 0.6293863858267059, "grad_norm": 0.4734295513027, "learning_rate": 2.5211993727502865e-06, "loss": 0.0167, "step": 150840 }, { "epoch": 0.6294072485416963, "grad_norm": 0.5682917579870054, "learning_rate": 2.5211575875240674e-06, "loss": 0.0204, "step": 150845 }, { "epoch": 0.6294281112566865, "grad_norm": 0.42949712994686157, "learning_rate": 2.5211158043753673e-06, "loss": 0.0241, "step": 150850 }, { "epoch": 0.6294489739716768, "grad_norm": 1.0042867253353678, "learning_rate": 2.521074023304016e-06, "loss": 0.0214, "step": 150855 }, { "epoch": 0.629469836686667, "grad_norm": 0.7368526629315482, "learning_rate": 2.5210322443098393e-06, "loss": 0.026, "step": 150860 }, { "epoch": 0.6294906994016574, "grad_norm": 0.5107850930222109, "learning_rate": 2.520990467392667e-06, "loss": 0.0171, "step": 150865 }, { "epoch": 0.6295115621166476, "grad_norm": 0.7795364807376255, "learning_rate": 2.520948692552325e-06, "loss": 0.0188, "step": 150870 }, { "epoch": 0.6295324248316378, "grad_norm": 0.5748661858586342, "learning_rate": 2.5209069197886432e-06, "loss": 0.0194, "step": 150875 }, { "epoch": 0.6295532875466282, "grad_norm": 0.9609244654558552, "learning_rate": 2.5208651491014487e-06, "loss": 0.0268, "step": 150880 }, { "epoch": 0.6295741502616184, "grad_norm": 1.0158427531355123, "learning_rate": 2.520823380490569e-06, "loss": 0.0236, "step": 150885 }, { "epoch": 0.6295950129766087, "grad_norm": 0.6197643591162514, "learning_rate": 2.520781613955833e-06, "loss": 0.0177, "step": 150890 }, { "epoch": 0.629615875691599, "grad_norm": 0.3546401992520027, "learning_rate": 2.5207398494970676e-06, "loss": 0.0202, "step": 150895 }, { "epoch": 0.6296367384065893, "grad_norm": 0.7585275214059767, "learning_rate": 2.520698087114102e-06, "loss": 0.0277, "step": 150900 }, { "epoch": 0.6296576011215795, "grad_norm": 0.6145552289092272, "learning_rate": 2.5206563268067633e-06, "loss": 0.0175, "step": 150905 }, { "epoch": 0.6296784638365699, "grad_norm": 0.5892974361093398, "learning_rate": 2.520614568574881e-06, "loss": 0.0182, "step": 150910 }, { "epoch": 0.6296993265515601, "grad_norm": 0.23120141910565375, "learning_rate": 2.520572812418281e-06, "loss": 0.0207, "step": 150915 }, { "epoch": 0.6297201892665504, "grad_norm": 1.027608665818053, "learning_rate": 2.520531058336793e-06, "loss": 0.0289, "step": 150920 }, { "epoch": 0.6297410519815406, "grad_norm": 0.38085876389152645, "learning_rate": 2.520489306330245e-06, "loss": 0.0246, "step": 150925 }, { "epoch": 0.629761914696531, "grad_norm": 0.4506657063951254, "learning_rate": 2.5204475563984644e-06, "loss": 0.0184, "step": 150930 }, { "epoch": 0.6297827774115212, "grad_norm": 0.7952581965034239, "learning_rate": 2.5204058085412802e-06, "loss": 0.0224, "step": 150935 }, { "epoch": 0.6298036401265115, "grad_norm": 0.8348876170168287, "learning_rate": 2.52036406275852e-06, "loss": 0.0216, "step": 150940 }, { "epoch": 0.6298245028415018, "grad_norm": 1.2255772446675417, "learning_rate": 2.5203223190500126e-06, "loss": 0.0254, "step": 150945 }, { "epoch": 0.6298453655564921, "grad_norm": 0.5796310205520618, "learning_rate": 2.5202805774155857e-06, "loss": 0.0279, "step": 150950 }, { "epoch": 0.6298662282714823, "grad_norm": 0.36842471049350695, "learning_rate": 2.5202388378550673e-06, "loss": 0.019, "step": 150955 }, { "epoch": 0.6298870909864727, "grad_norm": 1.0033444642489924, "learning_rate": 2.5201971003682875e-06, "loss": 0.0254, "step": 150960 }, { "epoch": 0.6299079537014629, "grad_norm": 0.48109794784001114, "learning_rate": 2.520155364955072e-06, "loss": 0.0214, "step": 150965 }, { "epoch": 0.6299288164164532, "grad_norm": 0.6633190589888739, "learning_rate": 2.5201136316152502e-06, "loss": 0.0265, "step": 150970 }, { "epoch": 0.6299496791314434, "grad_norm": 0.21285998649481552, "learning_rate": 2.5200719003486514e-06, "loss": 0.0171, "step": 150975 }, { "epoch": 0.6299705418464338, "grad_norm": 0.5537415888712808, "learning_rate": 2.520030171155102e-06, "loss": 0.0204, "step": 150980 }, { "epoch": 0.629991404561424, "grad_norm": 0.47190819332828016, "learning_rate": 2.519988444034433e-06, "loss": 0.0355, "step": 150985 }, { "epoch": 0.6300122672764142, "grad_norm": 0.558629913102869, "learning_rate": 2.5199467189864703e-06, "loss": 0.0214, "step": 150990 }, { "epoch": 0.6300331299914046, "grad_norm": 0.48508805474524347, "learning_rate": 2.5199049960110433e-06, "loss": 0.0248, "step": 150995 }, { "epoch": 0.6300539927063948, "grad_norm": 0.5464167397844972, "learning_rate": 2.5198632751079803e-06, "loss": 0.0267, "step": 151000 }, { "epoch": 0.6300748554213851, "grad_norm": 0.4917480139383282, "learning_rate": 2.51982155627711e-06, "loss": 0.0236, "step": 151005 }, { "epoch": 0.6300957181363754, "grad_norm": 0.41302226156380517, "learning_rate": 2.5197798395182603e-06, "loss": 0.0208, "step": 151010 }, { "epoch": 0.6301165808513657, "grad_norm": 0.6766232887650375, "learning_rate": 2.5197381248312608e-06, "loss": 0.025, "step": 151015 }, { "epoch": 0.6301374435663559, "grad_norm": 1.3840080941003525, "learning_rate": 2.519696412215939e-06, "loss": 0.0192, "step": 151020 }, { "epoch": 0.6301583062813463, "grad_norm": 0.7494949667210259, "learning_rate": 2.519654701672124e-06, "loss": 0.0206, "step": 151025 }, { "epoch": 0.6301791689963365, "grad_norm": 0.9343393949390607, "learning_rate": 2.519612993199644e-06, "loss": 0.0253, "step": 151030 }, { "epoch": 0.6302000317113268, "grad_norm": 0.3709514233396649, "learning_rate": 2.5195712867983275e-06, "loss": 0.0168, "step": 151035 }, { "epoch": 0.630220894426317, "grad_norm": 0.7790380796224071, "learning_rate": 2.519529582468003e-06, "loss": 0.0237, "step": 151040 }, { "epoch": 0.6302417571413074, "grad_norm": 0.683892345839012, "learning_rate": 2.5194878802085e-06, "loss": 0.0173, "step": 151045 }, { "epoch": 0.6302626198562976, "grad_norm": 1.123610888295113, "learning_rate": 2.519446180019646e-06, "loss": 0.0162, "step": 151050 }, { "epoch": 0.6302834825712879, "grad_norm": 0.645518044666994, "learning_rate": 2.51940448190127e-06, "loss": 0.0193, "step": 151055 }, { "epoch": 0.6303043452862782, "grad_norm": 0.670036359118588, "learning_rate": 2.519362785853201e-06, "loss": 0.0349, "step": 151060 }, { "epoch": 0.6303252080012685, "grad_norm": 0.47233905412453675, "learning_rate": 2.5193210918752676e-06, "loss": 0.0187, "step": 151065 }, { "epoch": 0.6303460707162587, "grad_norm": 0.9160071135481505, "learning_rate": 2.519279399967298e-06, "loss": 0.0227, "step": 151070 }, { "epoch": 0.630366933431249, "grad_norm": 0.9785886976224243, "learning_rate": 2.5192377101291223e-06, "loss": 0.0228, "step": 151075 }, { "epoch": 0.6303877961462393, "grad_norm": 0.9067709934865344, "learning_rate": 2.5191960223605677e-06, "loss": 0.0295, "step": 151080 }, { "epoch": 0.6304086588612295, "grad_norm": 0.6334603935735547, "learning_rate": 2.5191543366614636e-06, "loss": 0.0169, "step": 151085 }, { "epoch": 0.6304295215762199, "grad_norm": 0.49720230820835515, "learning_rate": 2.5191126530316386e-06, "loss": 0.0188, "step": 151090 }, { "epoch": 0.6304503842912101, "grad_norm": 0.73674044633438, "learning_rate": 2.5190709714709215e-06, "loss": 0.0222, "step": 151095 }, { "epoch": 0.6304712470062004, "grad_norm": 0.6372046896239392, "learning_rate": 2.5190292919791413e-06, "loss": 0.0258, "step": 151100 }, { "epoch": 0.6304921097211906, "grad_norm": 0.7764310802683597, "learning_rate": 2.5189876145561272e-06, "loss": 0.0204, "step": 151105 }, { "epoch": 0.630512972436181, "grad_norm": 0.8603876716021711, "learning_rate": 2.5189459392017074e-06, "loss": 0.0176, "step": 151110 }, { "epoch": 0.6305338351511712, "grad_norm": 0.46630083286409074, "learning_rate": 2.518904265915711e-06, "loss": 0.0198, "step": 151115 }, { "epoch": 0.6305546978661615, "grad_norm": 0.6371884553059125, "learning_rate": 2.518862594697967e-06, "loss": 0.0272, "step": 151120 }, { "epoch": 0.6305755605811518, "grad_norm": 0.6422893055440952, "learning_rate": 2.5188209255483047e-06, "loss": 0.024, "step": 151125 }, { "epoch": 0.6305964232961421, "grad_norm": 0.52573687215602, "learning_rate": 2.518779258466552e-06, "loss": 0.0269, "step": 151130 }, { "epoch": 0.6306172860111323, "grad_norm": 0.7791292063832467, "learning_rate": 2.518737593452539e-06, "loss": 0.0236, "step": 151135 }, { "epoch": 0.6306381487261227, "grad_norm": 0.6567532921915019, "learning_rate": 2.518695930506094e-06, "loss": 0.0236, "step": 151140 }, { "epoch": 0.6306590114411129, "grad_norm": 0.277084141308489, "learning_rate": 2.5186542696270468e-06, "loss": 0.0191, "step": 151145 }, { "epoch": 0.6306798741561032, "grad_norm": 1.3238923798529774, "learning_rate": 2.518612610815225e-06, "loss": 0.0254, "step": 151150 }, { "epoch": 0.6307007368710934, "grad_norm": 1.1739264576159203, "learning_rate": 2.5185709540704585e-06, "loss": 0.0262, "step": 151155 }, { "epoch": 0.6307215995860838, "grad_norm": 0.448104706104387, "learning_rate": 2.5185292993925763e-06, "loss": 0.02, "step": 151160 }, { "epoch": 0.630742462301074, "grad_norm": 0.639704484289846, "learning_rate": 2.5184876467814078e-06, "loss": 0.023, "step": 151165 }, { "epoch": 0.6307633250160642, "grad_norm": 0.4393784551109069, "learning_rate": 2.518445996236782e-06, "loss": 0.0174, "step": 151170 }, { "epoch": 0.6307841877310546, "grad_norm": 0.6414565583188959, "learning_rate": 2.518404347758528e-06, "loss": 0.0207, "step": 151175 }, { "epoch": 0.6308050504460448, "grad_norm": 0.8194836266267079, "learning_rate": 2.518362701346474e-06, "loss": 0.0312, "step": 151180 }, { "epoch": 0.6308259131610351, "grad_norm": 0.4429621893372902, "learning_rate": 2.5183210570004508e-06, "loss": 0.0217, "step": 151185 }, { "epoch": 0.6308467758760254, "grad_norm": 0.5843793555453558, "learning_rate": 2.518279414720286e-06, "loss": 0.0296, "step": 151190 }, { "epoch": 0.6308676385910157, "grad_norm": 0.8497695986143968, "learning_rate": 2.5182377745058103e-06, "loss": 0.023, "step": 151195 }, { "epoch": 0.6308885013060059, "grad_norm": 1.3601930795210617, "learning_rate": 2.5181961363568514e-06, "loss": 0.032, "step": 151200 }, { "epoch": 0.6309093640209963, "grad_norm": 0.8622176799479123, "learning_rate": 2.51815450027324e-06, "loss": 0.0197, "step": 151205 }, { "epoch": 0.6309302267359865, "grad_norm": 0.39155774586435266, "learning_rate": 2.5181128662548044e-06, "loss": 0.0219, "step": 151210 }, { "epoch": 0.6309510894509768, "grad_norm": 0.45420176134016343, "learning_rate": 2.518071234301374e-06, "loss": 0.0178, "step": 151215 }, { "epoch": 0.630971952165967, "grad_norm": 0.4170777784650574, "learning_rate": 2.5180296044127783e-06, "loss": 0.0256, "step": 151220 }, { "epoch": 0.6309928148809574, "grad_norm": 0.8296380582320992, "learning_rate": 2.5179879765888466e-06, "loss": 0.0279, "step": 151225 }, { "epoch": 0.6310136775959476, "grad_norm": 0.7902393393271276, "learning_rate": 2.517946350829408e-06, "loss": 0.0194, "step": 151230 }, { "epoch": 0.6310345403109379, "grad_norm": 0.41780088371976126, "learning_rate": 2.517904727134293e-06, "loss": 0.0162, "step": 151235 }, { "epoch": 0.6310554030259282, "grad_norm": 0.7761138565442963, "learning_rate": 2.5178631055033294e-06, "loss": 0.0154, "step": 151240 }, { "epoch": 0.6310762657409185, "grad_norm": 0.8638475657329175, "learning_rate": 2.5178214859363464e-06, "loss": 0.0227, "step": 151245 }, { "epoch": 0.6310971284559087, "grad_norm": 0.9581769226921364, "learning_rate": 2.5177798684331757e-06, "loss": 0.0242, "step": 151250 }, { "epoch": 0.6311179911708991, "grad_norm": 0.30362061789767786, "learning_rate": 2.5177382529936445e-06, "loss": 0.0269, "step": 151255 }, { "epoch": 0.6311388538858893, "grad_norm": 0.4629867635798367, "learning_rate": 2.5176966396175834e-06, "loss": 0.0273, "step": 151260 }, { "epoch": 0.6311597166008796, "grad_norm": 0.37501357864064266, "learning_rate": 2.5176550283048217e-06, "loss": 0.0244, "step": 151265 }, { "epoch": 0.6311805793158699, "grad_norm": 0.4586897789180065, "learning_rate": 2.5176134190551888e-06, "loss": 0.0208, "step": 151270 }, { "epoch": 0.6312014420308601, "grad_norm": 0.718205351374086, "learning_rate": 2.5175718118685135e-06, "loss": 0.022, "step": 151275 }, { "epoch": 0.6312223047458504, "grad_norm": 0.7547372814134562, "learning_rate": 2.517530206744627e-06, "loss": 0.022, "step": 151280 }, { "epoch": 0.6312431674608406, "grad_norm": 1.2991711960279326, "learning_rate": 2.517488603683357e-06, "loss": 0.026, "step": 151285 }, { "epoch": 0.631264030175831, "grad_norm": 0.6777471479735517, "learning_rate": 2.5174470026845338e-06, "loss": 0.0225, "step": 151290 }, { "epoch": 0.6312848928908212, "grad_norm": 0.3715107692733392, "learning_rate": 2.517405403747988e-06, "loss": 0.022, "step": 151295 }, { "epoch": 0.6313057556058115, "grad_norm": 0.7127988001797424, "learning_rate": 2.5173638068735485e-06, "loss": 0.0174, "step": 151300 }, { "epoch": 0.6313266183208018, "grad_norm": 0.15768305991175682, "learning_rate": 2.517322212061044e-06, "loss": 0.0225, "step": 151305 }, { "epoch": 0.6313474810357921, "grad_norm": 0.5762807089135814, "learning_rate": 2.517280619310305e-06, "loss": 0.0291, "step": 151310 }, { "epoch": 0.6313683437507823, "grad_norm": 1.075821781380883, "learning_rate": 2.517239028621162e-06, "loss": 0.0225, "step": 151315 }, { "epoch": 0.6313892064657727, "grad_norm": 0.38955998315449714, "learning_rate": 2.517197439993443e-06, "loss": 0.0192, "step": 151320 }, { "epoch": 0.6314100691807629, "grad_norm": 0.5529631837877289, "learning_rate": 2.517155853426979e-06, "loss": 0.0191, "step": 151325 }, { "epoch": 0.6314309318957532, "grad_norm": 0.7512480256069627, "learning_rate": 2.5171142689215995e-06, "loss": 0.0275, "step": 151330 }, { "epoch": 0.6314517946107434, "grad_norm": 0.41306656175976786, "learning_rate": 2.5170726864771335e-06, "loss": 0.02, "step": 151335 }, { "epoch": 0.6314726573257338, "grad_norm": 0.1934898680643831, "learning_rate": 2.517031106093411e-06, "loss": 0.0176, "step": 151340 }, { "epoch": 0.631493520040724, "grad_norm": 1.3026816205410625, "learning_rate": 2.5169895277702626e-06, "loss": 0.0257, "step": 151345 }, { "epoch": 0.6315143827557143, "grad_norm": 0.6338103293915454, "learning_rate": 2.516947951507518e-06, "loss": 0.0278, "step": 151350 }, { "epoch": 0.6315352454707046, "grad_norm": 0.26637605500676786, "learning_rate": 2.516906377305006e-06, "loss": 0.0269, "step": 151355 }, { "epoch": 0.6315561081856949, "grad_norm": 0.5486325146727375, "learning_rate": 2.5168648051625576e-06, "loss": 0.0251, "step": 151360 }, { "epoch": 0.6315769709006851, "grad_norm": 0.7438330629704116, "learning_rate": 2.516823235080002e-06, "loss": 0.0267, "step": 151365 }, { "epoch": 0.6315978336156755, "grad_norm": 1.6021912600932673, "learning_rate": 2.5167816670571695e-06, "loss": 0.0238, "step": 151370 }, { "epoch": 0.6316186963306657, "grad_norm": 0.6158001225310986, "learning_rate": 2.51674010109389e-06, "loss": 0.0184, "step": 151375 }, { "epoch": 0.6316395590456559, "grad_norm": 0.6365791795968484, "learning_rate": 2.5166985371899925e-06, "loss": 0.0248, "step": 151380 }, { "epoch": 0.6316604217606463, "grad_norm": 0.5307416198271981, "learning_rate": 2.516656975345308e-06, "loss": 0.0265, "step": 151385 }, { "epoch": 0.6316812844756365, "grad_norm": 0.4744650511128426, "learning_rate": 2.5166154155596663e-06, "loss": 0.0246, "step": 151390 }, { "epoch": 0.6317021471906268, "grad_norm": 0.6397185861721929, "learning_rate": 2.5165738578328973e-06, "loss": 0.0275, "step": 151395 }, { "epoch": 0.631723009905617, "grad_norm": 0.5362829846432852, "learning_rate": 2.516532302164831e-06, "loss": 0.0183, "step": 151400 }, { "epoch": 0.6317438726206074, "grad_norm": 0.442377254552686, "learning_rate": 2.516490748555297e-06, "loss": 0.0224, "step": 151405 }, { "epoch": 0.6317647353355976, "grad_norm": 0.5887275967685386, "learning_rate": 2.5164491970041257e-06, "loss": 0.0263, "step": 151410 }, { "epoch": 0.6317855980505879, "grad_norm": 0.4256784700706048, "learning_rate": 2.5164076475111476e-06, "loss": 0.0244, "step": 151415 }, { "epoch": 0.6318064607655782, "grad_norm": 1.1011330487579147, "learning_rate": 2.5163661000761923e-06, "loss": 0.0248, "step": 151420 }, { "epoch": 0.6318273234805685, "grad_norm": 0.34433662637038076, "learning_rate": 2.51632455469909e-06, "loss": 0.0223, "step": 151425 }, { "epoch": 0.6318481861955587, "grad_norm": 1.3342966991971867, "learning_rate": 2.516283011379671e-06, "loss": 0.0184, "step": 151430 }, { "epoch": 0.6318690489105491, "grad_norm": 0.6698958819420315, "learning_rate": 2.516241470117765e-06, "loss": 0.0209, "step": 151435 }, { "epoch": 0.6318899116255393, "grad_norm": 0.6935441009051462, "learning_rate": 2.5161999309132024e-06, "loss": 0.023, "step": 151440 }, { "epoch": 0.6319107743405296, "grad_norm": 0.8544780310942478, "learning_rate": 2.5161583937658134e-06, "loss": 0.0293, "step": 151445 }, { "epoch": 0.6319316370555199, "grad_norm": 0.5371842889599496, "learning_rate": 2.5161168586754286e-06, "loss": 0.0201, "step": 151450 }, { "epoch": 0.6319524997705102, "grad_norm": 0.9457773103726608, "learning_rate": 2.516075325641878e-06, "loss": 0.0252, "step": 151455 }, { "epoch": 0.6319733624855004, "grad_norm": 0.5118153145432538, "learning_rate": 2.516033794664991e-06, "loss": 0.0295, "step": 151460 }, { "epoch": 0.6319942252004906, "grad_norm": 0.6046153423966266, "learning_rate": 2.5159922657445995e-06, "loss": 0.0211, "step": 151465 }, { "epoch": 0.632015087915481, "grad_norm": 0.284563372708904, "learning_rate": 2.5159507388805326e-06, "loss": 0.0185, "step": 151470 }, { "epoch": 0.6320359506304712, "grad_norm": 8.111409256986594, "learning_rate": 2.5159092140726205e-06, "loss": 0.0279, "step": 151475 }, { "epoch": 0.6320568133454615, "grad_norm": 0.7835990577433877, "learning_rate": 2.515867691320694e-06, "loss": 0.0215, "step": 151480 }, { "epoch": 0.6320776760604518, "grad_norm": 0.9651423428322164, "learning_rate": 2.515826170624584e-06, "loss": 0.0238, "step": 151485 }, { "epoch": 0.6320985387754421, "grad_norm": 0.4325990247048515, "learning_rate": 2.5157846519841193e-06, "loss": 0.0193, "step": 151490 }, { "epoch": 0.6321194014904323, "grad_norm": 0.9579638112951011, "learning_rate": 2.515743135399132e-06, "loss": 0.0217, "step": 151495 }, { "epoch": 0.6321402642054227, "grad_norm": 0.475457145810023, "learning_rate": 2.5157016208694512e-06, "loss": 0.0242, "step": 151500 }, { "epoch": 0.6321611269204129, "grad_norm": 0.3015484967805504, "learning_rate": 2.5156601083949084e-06, "loss": 0.0226, "step": 151505 }, { "epoch": 0.6321819896354032, "grad_norm": 1.1344619621305356, "learning_rate": 2.5156185979753327e-06, "loss": 0.024, "step": 151510 }, { "epoch": 0.6322028523503934, "grad_norm": 0.734787123299111, "learning_rate": 2.5155770896105556e-06, "loss": 0.0242, "step": 151515 }, { "epoch": 0.6322237150653838, "grad_norm": 0.5189425581905228, "learning_rate": 2.5155355833004073e-06, "loss": 0.0261, "step": 151520 }, { "epoch": 0.632244577780374, "grad_norm": 0.49111074366519236, "learning_rate": 2.5154940790447184e-06, "loss": 0.0256, "step": 151525 }, { "epoch": 0.6322654404953643, "grad_norm": 0.8754244956107138, "learning_rate": 2.5154525768433198e-06, "loss": 0.0319, "step": 151530 }, { "epoch": 0.6322863032103546, "grad_norm": 0.9500315071042158, "learning_rate": 2.515411076696041e-06, "loss": 0.0247, "step": 151535 }, { "epoch": 0.6323071659253449, "grad_norm": 1.007452868645263, "learning_rate": 2.515369578602713e-06, "loss": 0.0246, "step": 151540 }, { "epoch": 0.6323280286403351, "grad_norm": 0.27775310147768145, "learning_rate": 2.515328082563167e-06, "loss": 0.0302, "step": 151545 }, { "epoch": 0.6323488913553255, "grad_norm": 0.9620198641515548, "learning_rate": 2.515286588577233e-06, "loss": 0.028, "step": 151550 }, { "epoch": 0.6323697540703157, "grad_norm": 0.6862869470034381, "learning_rate": 2.5152450966447418e-06, "loss": 0.0203, "step": 151555 }, { "epoch": 0.632390616785306, "grad_norm": 0.5981197378522858, "learning_rate": 2.515203606765524e-06, "loss": 0.0174, "step": 151560 }, { "epoch": 0.6324114795002963, "grad_norm": 0.892348813372594, "learning_rate": 2.51516211893941e-06, "loss": 0.0229, "step": 151565 }, { "epoch": 0.6324323422152865, "grad_norm": 0.7171052814570043, "learning_rate": 2.5151206331662307e-06, "loss": 0.0188, "step": 151570 }, { "epoch": 0.6324532049302768, "grad_norm": 0.37963375783643066, "learning_rate": 2.515079149445817e-06, "loss": 0.0237, "step": 151575 }, { "epoch": 0.632474067645267, "grad_norm": 1.3430903363251814, "learning_rate": 2.5150376677779998e-06, "loss": 0.0215, "step": 151580 }, { "epoch": 0.6324949303602574, "grad_norm": 0.5810334305983106, "learning_rate": 2.5149961881626085e-06, "loss": 0.0252, "step": 151585 }, { "epoch": 0.6325157930752476, "grad_norm": 0.5632883370432135, "learning_rate": 2.514954710599475e-06, "loss": 0.0209, "step": 151590 }, { "epoch": 0.6325366557902379, "grad_norm": 0.6580057461362085, "learning_rate": 2.5149132350884308e-06, "loss": 0.0195, "step": 151595 }, { "epoch": 0.6325575185052282, "grad_norm": 0.6363032867017393, "learning_rate": 2.5148717616293055e-06, "loss": 0.0258, "step": 151600 }, { "epoch": 0.6325783812202185, "grad_norm": 0.7222682541006503, "learning_rate": 2.5148302902219294e-06, "loss": 0.0224, "step": 151605 }, { "epoch": 0.6325992439352087, "grad_norm": 0.5318209958691348, "learning_rate": 2.5147888208661345e-06, "loss": 0.0237, "step": 151610 }, { "epoch": 0.6326201066501991, "grad_norm": 1.0506715263791906, "learning_rate": 2.5147473535617522e-06, "loss": 0.0218, "step": 151615 }, { "epoch": 0.6326409693651893, "grad_norm": 0.3580506865615092, "learning_rate": 2.514705888308612e-06, "loss": 0.0198, "step": 151620 }, { "epoch": 0.6326618320801796, "grad_norm": 0.4557923725711132, "learning_rate": 2.514664425106545e-06, "loss": 0.0267, "step": 151625 }, { "epoch": 0.6326826947951698, "grad_norm": 0.7668907007924035, "learning_rate": 2.514622963955382e-06, "loss": 0.0241, "step": 151630 }, { "epoch": 0.6327035575101602, "grad_norm": 0.7660913092398303, "learning_rate": 2.514581504854955e-06, "loss": 0.0258, "step": 151635 }, { "epoch": 0.6327244202251504, "grad_norm": 0.7653285296454614, "learning_rate": 2.514540047805094e-06, "loss": 0.021, "step": 151640 }, { "epoch": 0.6327452829401407, "grad_norm": 0.8210103801137374, "learning_rate": 2.5144985928056298e-06, "loss": 0.0321, "step": 151645 }, { "epoch": 0.632766145655131, "grad_norm": 0.853709474394095, "learning_rate": 2.5144571398563942e-06, "loss": 0.0233, "step": 151650 }, { "epoch": 0.6327870083701213, "grad_norm": 0.7997720181435302, "learning_rate": 2.514415688957218e-06, "loss": 0.0237, "step": 151655 }, { "epoch": 0.6328078710851115, "grad_norm": 0.9847561660342009, "learning_rate": 2.5143742401079316e-06, "loss": 0.0199, "step": 151660 }, { "epoch": 0.6328287338001019, "grad_norm": 0.873664949838921, "learning_rate": 2.514332793308367e-06, "loss": 0.0267, "step": 151665 }, { "epoch": 0.6328495965150921, "grad_norm": 0.6703980554760471, "learning_rate": 2.514291348558355e-06, "loss": 0.0211, "step": 151670 }, { "epoch": 0.6328704592300823, "grad_norm": 0.38637122745305014, "learning_rate": 2.514249905857726e-06, "loss": 0.0248, "step": 151675 }, { "epoch": 0.6328913219450727, "grad_norm": 0.7749914230793292, "learning_rate": 2.5142084652063115e-06, "loss": 0.0213, "step": 151680 }, { "epoch": 0.6329121846600629, "grad_norm": 0.7912480834275989, "learning_rate": 2.5141670266039426e-06, "loss": 0.0209, "step": 151685 }, { "epoch": 0.6329330473750532, "grad_norm": 0.5772097009234768, "learning_rate": 2.5141255900504506e-06, "loss": 0.0187, "step": 151690 }, { "epoch": 0.6329539100900434, "grad_norm": 0.6348891567487748, "learning_rate": 2.5140841555456667e-06, "loss": 0.0308, "step": 151695 }, { "epoch": 0.6329747728050338, "grad_norm": 0.5446658003617103, "learning_rate": 2.514042723089422e-06, "loss": 0.026, "step": 151700 }, { "epoch": 0.632995635520024, "grad_norm": 0.4838765277324799, "learning_rate": 2.514001292681548e-06, "loss": 0.0272, "step": 151705 }, { "epoch": 0.6330164982350143, "grad_norm": 0.7861382276639777, "learning_rate": 2.5139598643218756e-06, "loss": 0.0199, "step": 151710 }, { "epoch": 0.6330373609500046, "grad_norm": 0.6076403670946852, "learning_rate": 2.5139184380102355e-06, "loss": 0.0217, "step": 151715 }, { "epoch": 0.6330582236649949, "grad_norm": 0.7641819392714093, "learning_rate": 2.5138770137464598e-06, "loss": 0.0219, "step": 151720 }, { "epoch": 0.6330790863799851, "grad_norm": 0.8515561689322139, "learning_rate": 2.51383559153038e-06, "loss": 0.0258, "step": 151725 }, { "epoch": 0.6330999490949755, "grad_norm": 1.229353366967252, "learning_rate": 2.513794171361826e-06, "loss": 0.0214, "step": 151730 }, { "epoch": 0.6331208118099657, "grad_norm": 0.6998097951910781, "learning_rate": 2.51375275324063e-06, "loss": 0.0198, "step": 151735 }, { "epoch": 0.633141674524956, "grad_norm": 0.953811789530097, "learning_rate": 2.513711337166624e-06, "loss": 0.0242, "step": 151740 }, { "epoch": 0.6331625372399463, "grad_norm": 0.9644365816223004, "learning_rate": 2.513669923139639e-06, "loss": 0.0233, "step": 151745 }, { "epoch": 0.6331833999549366, "grad_norm": 0.7507675490831903, "learning_rate": 2.5136285111595053e-06, "loss": 0.0222, "step": 151750 }, { "epoch": 0.6332042626699268, "grad_norm": 0.7739767284087975, "learning_rate": 2.513587101226056e-06, "loss": 0.0235, "step": 151755 }, { "epoch": 0.633225125384917, "grad_norm": 1.96449947002999, "learning_rate": 2.5135456933391204e-06, "loss": 0.0245, "step": 151760 }, { "epoch": 0.6332459880999074, "grad_norm": 0.549107004121723, "learning_rate": 2.513504287498532e-06, "loss": 0.018, "step": 151765 }, { "epoch": 0.6332668508148976, "grad_norm": 0.8016298496359138, "learning_rate": 2.5134628837041214e-06, "loss": 0.0253, "step": 151770 }, { "epoch": 0.6332877135298879, "grad_norm": 0.7342805846115492, "learning_rate": 2.5134214819557202e-06, "loss": 0.0188, "step": 151775 }, { "epoch": 0.6333085762448782, "grad_norm": 0.4194521216687892, "learning_rate": 2.5133800822531594e-06, "loss": 0.0203, "step": 151780 }, { "epoch": 0.6333294389598685, "grad_norm": 0.6315181156711046, "learning_rate": 2.5133386845962717e-06, "loss": 0.0209, "step": 151785 }, { "epoch": 0.6333503016748587, "grad_norm": 0.6395403030034394, "learning_rate": 2.513297288984887e-06, "loss": 0.022, "step": 151790 }, { "epoch": 0.6333711643898491, "grad_norm": 0.9430878020914286, "learning_rate": 2.513255895418838e-06, "loss": 0.0251, "step": 151795 }, { "epoch": 0.6333920271048393, "grad_norm": 0.6079356687952346, "learning_rate": 2.5132145038979563e-06, "loss": 0.0181, "step": 151800 }, { "epoch": 0.6334128898198296, "grad_norm": 0.6916561748570317, "learning_rate": 2.5131731144220727e-06, "loss": 0.023, "step": 151805 }, { "epoch": 0.6334337525348198, "grad_norm": 0.6638456998670748, "learning_rate": 2.5131317269910195e-06, "loss": 0.0195, "step": 151810 }, { "epoch": 0.6334546152498102, "grad_norm": 0.54692453548565, "learning_rate": 2.5130903416046275e-06, "loss": 0.0252, "step": 151815 }, { "epoch": 0.6334754779648004, "grad_norm": 0.39598439984759565, "learning_rate": 2.51304895826273e-06, "loss": 0.0164, "step": 151820 }, { "epoch": 0.6334963406797907, "grad_norm": 0.20037579521817944, "learning_rate": 2.5130075769651566e-06, "loss": 0.0165, "step": 151825 }, { "epoch": 0.633517203394781, "grad_norm": 0.45170118806446585, "learning_rate": 2.512966197711741e-06, "loss": 0.024, "step": 151830 }, { "epoch": 0.6335380661097713, "grad_norm": 0.482825911580727, "learning_rate": 2.5129248205023133e-06, "loss": 0.0184, "step": 151835 }, { "epoch": 0.6335589288247615, "grad_norm": 0.8032349311331464, "learning_rate": 2.5128834453367063e-06, "loss": 0.0163, "step": 151840 }, { "epoch": 0.6335797915397519, "grad_norm": 0.5584614550134706, "learning_rate": 2.5128420722147504e-06, "loss": 0.0201, "step": 151845 }, { "epoch": 0.6336006542547421, "grad_norm": 0.3430045852445045, "learning_rate": 2.5128007011362793e-06, "loss": 0.0237, "step": 151850 }, { "epoch": 0.6336215169697323, "grad_norm": 1.376672868074312, "learning_rate": 2.5127593321011236e-06, "loss": 0.0278, "step": 151855 }, { "epoch": 0.6336423796847227, "grad_norm": 0.25890741768325115, "learning_rate": 2.5127179651091147e-06, "loss": 0.0212, "step": 151860 }, { "epoch": 0.633663242399713, "grad_norm": 0.4022657782970588, "learning_rate": 2.5126766001600853e-06, "loss": 0.0206, "step": 151865 }, { "epoch": 0.6336841051147032, "grad_norm": 0.586892908534372, "learning_rate": 2.512635237253867e-06, "loss": 0.0231, "step": 151870 }, { "epoch": 0.6337049678296934, "grad_norm": 0.9493157103450756, "learning_rate": 2.5125938763902926e-06, "loss": 0.0285, "step": 151875 }, { "epoch": 0.6337258305446838, "grad_norm": 0.4886346559277138, "learning_rate": 2.5125525175691916e-06, "loss": 0.0167, "step": 151880 }, { "epoch": 0.633746693259674, "grad_norm": 0.7476092049854444, "learning_rate": 2.512511160790398e-06, "loss": 0.0246, "step": 151885 }, { "epoch": 0.6337675559746643, "grad_norm": 0.9874420486009899, "learning_rate": 2.512469806053743e-06, "loss": 0.0213, "step": 151890 }, { "epoch": 0.6337884186896546, "grad_norm": 0.5486719482019166, "learning_rate": 2.5124284533590577e-06, "loss": 0.026, "step": 151895 }, { "epoch": 0.6338092814046449, "grad_norm": 1.3198911712654813, "learning_rate": 2.512387102706176e-06, "loss": 0.0247, "step": 151900 }, { "epoch": 0.6338301441196351, "grad_norm": 3.8192766763650536, "learning_rate": 2.512345754094928e-06, "loss": 0.0261, "step": 151905 }, { "epoch": 0.6338510068346255, "grad_norm": 0.637857756816452, "learning_rate": 2.512304407525147e-06, "loss": 0.0261, "step": 151910 }, { "epoch": 0.6338718695496157, "grad_norm": 0.5858052558766623, "learning_rate": 2.5122630629966644e-06, "loss": 0.0218, "step": 151915 }, { "epoch": 0.633892732264606, "grad_norm": 0.5871567906149764, "learning_rate": 2.5122217205093123e-06, "loss": 0.0312, "step": 151920 }, { "epoch": 0.6339135949795963, "grad_norm": 0.7273192806913716, "learning_rate": 2.512180380062923e-06, "loss": 0.0332, "step": 151925 }, { "epoch": 0.6339344576945866, "grad_norm": 0.43321328592432734, "learning_rate": 2.512139041657328e-06, "loss": 0.0191, "step": 151930 }, { "epoch": 0.6339553204095768, "grad_norm": 0.9026386992595284, "learning_rate": 2.5120977052923607e-06, "loss": 0.0247, "step": 151935 }, { "epoch": 0.633976183124567, "grad_norm": 0.9653837126168291, "learning_rate": 2.512056370967852e-06, "loss": 0.0235, "step": 151940 }, { "epoch": 0.6339970458395574, "grad_norm": 0.9384854734923526, "learning_rate": 2.512015038683634e-06, "loss": 0.0216, "step": 151945 }, { "epoch": 0.6340179085545476, "grad_norm": 0.7959475241273414, "learning_rate": 2.511973708439539e-06, "loss": 0.0258, "step": 151950 }, { "epoch": 0.6340387712695379, "grad_norm": 0.9708079212846257, "learning_rate": 2.5119323802353995e-06, "loss": 0.0319, "step": 151955 }, { "epoch": 0.6340596339845282, "grad_norm": 0.6483884826093442, "learning_rate": 2.5118910540710483e-06, "loss": 0.0236, "step": 151960 }, { "epoch": 0.6340804966995185, "grad_norm": 0.5826435856427397, "learning_rate": 2.5118497299463163e-06, "loss": 0.016, "step": 151965 }, { "epoch": 0.6341013594145087, "grad_norm": 1.0800149992568364, "learning_rate": 2.511808407861036e-06, "loss": 0.0166, "step": 151970 }, { "epoch": 0.6341222221294991, "grad_norm": 1.0831667675089556, "learning_rate": 2.511767087815041e-06, "loss": 0.0238, "step": 151975 }, { "epoch": 0.6341430848444893, "grad_norm": 1.2785831051196415, "learning_rate": 2.5117257698081616e-06, "loss": 0.0244, "step": 151980 }, { "epoch": 0.6341639475594796, "grad_norm": 0.5871903777912012, "learning_rate": 2.511684453840232e-06, "loss": 0.0201, "step": 151985 }, { "epoch": 0.6341848102744698, "grad_norm": 0.9414869518917168, "learning_rate": 2.5116431399110823e-06, "loss": 0.0217, "step": 151990 }, { "epoch": 0.6342056729894602, "grad_norm": 0.41812135421460805, "learning_rate": 2.5116018280205466e-06, "loss": 0.0213, "step": 151995 }, { "epoch": 0.6342265357044504, "grad_norm": 0.6086894681326983, "learning_rate": 2.511560518168457e-06, "loss": 0.0304, "step": 152000 }, { "epoch": 0.6342473984194407, "grad_norm": 0.8232556816957529, "learning_rate": 2.5115192103546455e-06, "loss": 0.0225, "step": 152005 }, { "epoch": 0.634268261134431, "grad_norm": 0.6410526899018657, "learning_rate": 2.5114779045789444e-06, "loss": 0.0227, "step": 152010 }, { "epoch": 0.6342891238494213, "grad_norm": 0.8268272151728853, "learning_rate": 2.5114366008411867e-06, "loss": 0.021, "step": 152015 }, { "epoch": 0.6343099865644115, "grad_norm": 0.8165454703144193, "learning_rate": 2.511395299141204e-06, "loss": 0.028, "step": 152020 }, { "epoch": 0.6343308492794019, "grad_norm": 0.6235674015744331, "learning_rate": 2.5113539994788293e-06, "loss": 0.0244, "step": 152025 }, { "epoch": 0.6343517119943921, "grad_norm": 0.6774806836238927, "learning_rate": 2.5113127018538954e-06, "loss": 0.0198, "step": 152030 }, { "epoch": 0.6343725747093824, "grad_norm": 3.1244199703069637, "learning_rate": 2.5112714062662337e-06, "loss": 0.0292, "step": 152035 }, { "epoch": 0.6343934374243727, "grad_norm": 0.7651900473108575, "learning_rate": 2.511230112715677e-06, "loss": 0.0252, "step": 152040 }, { "epoch": 0.634414300139363, "grad_norm": 0.5357033645031037, "learning_rate": 2.5111888212020586e-06, "loss": 0.0239, "step": 152045 }, { "epoch": 0.6344351628543532, "grad_norm": 0.8827893904387953, "learning_rate": 2.5111475317252103e-06, "loss": 0.0277, "step": 152050 }, { "epoch": 0.6344560255693434, "grad_norm": 0.8710235420124361, "learning_rate": 2.5111062442849654e-06, "loss": 0.0216, "step": 152055 }, { "epoch": 0.6344768882843338, "grad_norm": 1.0013344885520248, "learning_rate": 2.5110649588811554e-06, "loss": 0.031, "step": 152060 }, { "epoch": 0.634497750999324, "grad_norm": 0.6293126147526414, "learning_rate": 2.5110236755136143e-06, "loss": 0.0166, "step": 152065 }, { "epoch": 0.6345186137143143, "grad_norm": 0.25838238253917123, "learning_rate": 2.510982394182173e-06, "loss": 0.0222, "step": 152070 }, { "epoch": 0.6345394764293046, "grad_norm": 0.4326525338253793, "learning_rate": 2.510941114886666e-06, "loss": 0.0247, "step": 152075 }, { "epoch": 0.6345603391442949, "grad_norm": 0.855759777137364, "learning_rate": 2.5108998376269244e-06, "loss": 0.0297, "step": 152080 }, { "epoch": 0.6345812018592851, "grad_norm": 1.901072151584727, "learning_rate": 2.5108585624027813e-06, "loss": 0.0329, "step": 152085 }, { "epoch": 0.6346020645742755, "grad_norm": 0.5921480965018413, "learning_rate": 2.5108172892140703e-06, "loss": 0.0217, "step": 152090 }, { "epoch": 0.6346229272892657, "grad_norm": 0.7024660009765311, "learning_rate": 2.510776018060623e-06, "loss": 0.0169, "step": 152095 }, { "epoch": 0.634643790004256, "grad_norm": 0.8605050011973628, "learning_rate": 2.5107347489422727e-06, "loss": 0.017, "step": 152100 }, { "epoch": 0.6346646527192463, "grad_norm": 0.711865083168005, "learning_rate": 2.5106934818588524e-06, "loss": 0.0218, "step": 152105 }, { "epoch": 0.6346855154342366, "grad_norm": 0.41042866294972896, "learning_rate": 2.5106522168101936e-06, "loss": 0.0231, "step": 152110 }, { "epoch": 0.6347063781492268, "grad_norm": 0.9575184991587671, "learning_rate": 2.5106109537961305e-06, "loss": 0.0261, "step": 152115 }, { "epoch": 0.6347272408642171, "grad_norm": 0.8377708520507893, "learning_rate": 2.510569692816495e-06, "loss": 0.0219, "step": 152120 }, { "epoch": 0.6347481035792074, "grad_norm": 0.5733587733586597, "learning_rate": 2.5105284338711205e-06, "loss": 0.0174, "step": 152125 }, { "epoch": 0.6347689662941977, "grad_norm": 0.8474288847059991, "learning_rate": 2.51048717695984e-06, "loss": 0.027, "step": 152130 }, { "epoch": 0.6347898290091879, "grad_norm": 0.7880915730898969, "learning_rate": 2.5104459220824854e-06, "loss": 0.0261, "step": 152135 }, { "epoch": 0.6348106917241783, "grad_norm": 0.4897376246820719, "learning_rate": 2.5104046692388904e-06, "loss": 0.0189, "step": 152140 }, { "epoch": 0.6348315544391685, "grad_norm": 0.593102771186395, "learning_rate": 2.510363418428888e-06, "loss": 0.0152, "step": 152145 }, { "epoch": 0.6348524171541587, "grad_norm": 0.709886243698039, "learning_rate": 2.5103221696523104e-06, "loss": 0.0241, "step": 152150 }, { "epoch": 0.6348732798691491, "grad_norm": 0.6408777084044571, "learning_rate": 2.5102809229089912e-06, "loss": 0.0209, "step": 152155 }, { "epoch": 0.6348941425841393, "grad_norm": 0.5708678994042085, "learning_rate": 2.5102396781987638e-06, "loss": 0.0273, "step": 152160 }, { "epoch": 0.6349150052991296, "grad_norm": 0.5566586979832843, "learning_rate": 2.51019843552146e-06, "loss": 0.0214, "step": 152165 }, { "epoch": 0.6349358680141198, "grad_norm": 0.857103921341615, "learning_rate": 2.5101571948769125e-06, "loss": 0.0189, "step": 152170 }, { "epoch": 0.6349567307291102, "grad_norm": 0.6747854767043223, "learning_rate": 2.5101159562649563e-06, "loss": 0.0314, "step": 152175 }, { "epoch": 0.6349775934441004, "grad_norm": 0.6832981005232527, "learning_rate": 2.510074719685423e-06, "loss": 0.018, "step": 152180 }, { "epoch": 0.6349984561590907, "grad_norm": 1.156181317048664, "learning_rate": 2.510033485138146e-06, "loss": 0.0249, "step": 152185 }, { "epoch": 0.635019318874081, "grad_norm": 1.4418153070537656, "learning_rate": 2.509992252622958e-06, "loss": 0.0279, "step": 152190 }, { "epoch": 0.6350401815890713, "grad_norm": 0.8548399781578707, "learning_rate": 2.5099510221396928e-06, "loss": 0.022, "step": 152195 }, { "epoch": 0.6350610443040615, "grad_norm": 0.4471605445693541, "learning_rate": 2.5099097936881827e-06, "loss": 0.0214, "step": 152200 }, { "epoch": 0.6350819070190519, "grad_norm": 0.6533088622032689, "learning_rate": 2.5098685672682616e-06, "loss": 0.0259, "step": 152205 }, { "epoch": 0.6351027697340421, "grad_norm": 0.6288989493895293, "learning_rate": 2.5098273428797627e-06, "loss": 0.0263, "step": 152210 }, { "epoch": 0.6351236324490324, "grad_norm": 4.385270956490786, "learning_rate": 2.5097861205225183e-06, "loss": 0.0244, "step": 152215 }, { "epoch": 0.6351444951640227, "grad_norm": 0.46689665909331984, "learning_rate": 2.5097449001963627e-06, "loss": 0.0232, "step": 152220 }, { "epoch": 0.635165357879013, "grad_norm": 0.7485726734104399, "learning_rate": 2.509703681901128e-06, "loss": 0.0235, "step": 152225 }, { "epoch": 0.6351862205940032, "grad_norm": 1.1167720858046377, "learning_rate": 2.509662465636648e-06, "loss": 0.0319, "step": 152230 }, { "epoch": 0.6352070833089934, "grad_norm": 0.5126079366315401, "learning_rate": 2.5096212514027563e-06, "loss": 0.0214, "step": 152235 }, { "epoch": 0.6352279460239838, "grad_norm": 0.5632536411339221, "learning_rate": 2.5095800391992855e-06, "loss": 0.0238, "step": 152240 }, { "epoch": 0.635248808738974, "grad_norm": 0.5177274004658291, "learning_rate": 2.509538829026069e-06, "loss": 0.0228, "step": 152245 }, { "epoch": 0.6352696714539643, "grad_norm": 0.4057558160291418, "learning_rate": 2.509497620882941e-06, "loss": 0.0288, "step": 152250 }, { "epoch": 0.6352905341689546, "grad_norm": 1.0421163553173014, "learning_rate": 2.509456414769733e-06, "loss": 0.0415, "step": 152255 }, { "epoch": 0.6353113968839449, "grad_norm": 0.494635582680676, "learning_rate": 2.50941521068628e-06, "loss": 0.0198, "step": 152260 }, { "epoch": 0.6353322595989351, "grad_norm": 0.7527506327853047, "learning_rate": 2.5093740086324153e-06, "loss": 0.0173, "step": 152265 }, { "epoch": 0.6353531223139255, "grad_norm": 0.6912413606522393, "learning_rate": 2.5093328086079717e-06, "loss": 0.0213, "step": 152270 }, { "epoch": 0.6353739850289157, "grad_norm": 1.6465786617771363, "learning_rate": 2.5092916106127823e-06, "loss": 0.0251, "step": 152275 }, { "epoch": 0.635394847743906, "grad_norm": 0.5997332680460403, "learning_rate": 2.509250414646681e-06, "loss": 0.0174, "step": 152280 }, { "epoch": 0.6354157104588963, "grad_norm": 0.49411348665698396, "learning_rate": 2.5092092207095015e-06, "loss": 0.0265, "step": 152285 }, { "epoch": 0.6354365731738866, "grad_norm": 0.6948030068370411, "learning_rate": 2.5091680288010773e-06, "loss": 0.0234, "step": 152290 }, { "epoch": 0.6354574358888768, "grad_norm": 0.9967925963072801, "learning_rate": 2.5091268389212402e-06, "loss": 0.0266, "step": 152295 }, { "epoch": 0.6354782986038671, "grad_norm": 0.6886882306432565, "learning_rate": 2.5090856510698264e-06, "loss": 0.0269, "step": 152300 }, { "epoch": 0.6354991613188574, "grad_norm": 0.5752922760535567, "learning_rate": 2.5090444652466677e-06, "loss": 0.0255, "step": 152305 }, { "epoch": 0.6355200240338477, "grad_norm": 0.7134756115072187, "learning_rate": 2.509003281451598e-06, "loss": 0.0214, "step": 152310 }, { "epoch": 0.6355408867488379, "grad_norm": 0.6410031327772214, "learning_rate": 2.5089620996844504e-06, "loss": 0.0247, "step": 152315 }, { "epoch": 0.6355617494638283, "grad_norm": 0.6363232189858472, "learning_rate": 2.5089209199450596e-06, "loss": 0.0223, "step": 152320 }, { "epoch": 0.6355826121788185, "grad_norm": 0.8119231417477074, "learning_rate": 2.5088797422332583e-06, "loss": 0.0257, "step": 152325 }, { "epoch": 0.6356034748938088, "grad_norm": 0.35584133762088654, "learning_rate": 2.5088385665488803e-06, "loss": 0.0245, "step": 152330 }, { "epoch": 0.6356243376087991, "grad_norm": 0.46969545718603034, "learning_rate": 2.508797392891759e-06, "loss": 0.0213, "step": 152335 }, { "epoch": 0.6356452003237893, "grad_norm": 0.5244828139173335, "learning_rate": 2.508756221261729e-06, "loss": 0.028, "step": 152340 }, { "epoch": 0.6356660630387796, "grad_norm": 0.9224170933431269, "learning_rate": 2.508715051658623e-06, "loss": 0.0254, "step": 152345 }, { "epoch": 0.6356869257537698, "grad_norm": 0.3558574737088363, "learning_rate": 2.5086738840822748e-06, "loss": 0.0217, "step": 152350 }, { "epoch": 0.6357077884687602, "grad_norm": 0.8361023324901244, "learning_rate": 2.508632718532519e-06, "loss": 0.0276, "step": 152355 }, { "epoch": 0.6357286511837504, "grad_norm": 0.8580139441918911, "learning_rate": 2.5085915550091877e-06, "loss": 0.0209, "step": 152360 }, { "epoch": 0.6357495138987407, "grad_norm": 0.8261094019966476, "learning_rate": 2.5085503935121165e-06, "loss": 0.0185, "step": 152365 }, { "epoch": 0.635770376613731, "grad_norm": 0.4507177963002036, "learning_rate": 2.5085092340411375e-06, "loss": 0.0164, "step": 152370 }, { "epoch": 0.6357912393287213, "grad_norm": 0.386889187739728, "learning_rate": 2.5084680765960855e-06, "loss": 0.0249, "step": 152375 }, { "epoch": 0.6358121020437115, "grad_norm": 0.44684631090680443, "learning_rate": 2.508426921176795e-06, "loss": 0.0215, "step": 152380 }, { "epoch": 0.6358329647587019, "grad_norm": 0.6501297840072591, "learning_rate": 2.5083857677830973e-06, "loss": 0.0161, "step": 152385 }, { "epoch": 0.6358538274736921, "grad_norm": 0.45434860001087357, "learning_rate": 2.508344616414829e-06, "loss": 0.0159, "step": 152390 }, { "epoch": 0.6358746901886824, "grad_norm": 1.4801771018365695, "learning_rate": 2.5083034670718226e-06, "loss": 0.0284, "step": 152395 }, { "epoch": 0.6358955529036727, "grad_norm": 0.8116293286023908, "learning_rate": 2.508262319753912e-06, "loss": 0.0207, "step": 152400 }, { "epoch": 0.635916415618663, "grad_norm": 0.6871581686558755, "learning_rate": 2.5082211744609315e-06, "loss": 0.0305, "step": 152405 }, { "epoch": 0.6359372783336532, "grad_norm": 0.4509097199362535, "learning_rate": 2.5081800311927145e-06, "loss": 0.0184, "step": 152410 }, { "epoch": 0.6359581410486435, "grad_norm": 0.44887749182006353, "learning_rate": 2.5081388899490954e-06, "loss": 0.0246, "step": 152415 }, { "epoch": 0.6359790037636338, "grad_norm": 0.4240786896192628, "learning_rate": 2.508097750729908e-06, "loss": 0.0239, "step": 152420 }, { "epoch": 0.635999866478624, "grad_norm": 1.3593932368962525, "learning_rate": 2.5080566135349867e-06, "loss": 0.0299, "step": 152425 }, { "epoch": 0.6360207291936143, "grad_norm": 1.8247563261127533, "learning_rate": 2.508015478364165e-06, "loss": 0.0413, "step": 152430 }, { "epoch": 0.6360415919086047, "grad_norm": 0.6629628782267146, "learning_rate": 2.507974345217276e-06, "loss": 0.0218, "step": 152435 }, { "epoch": 0.6360624546235949, "grad_norm": 0.7365140103087845, "learning_rate": 2.507933214094156e-06, "loss": 0.0258, "step": 152440 }, { "epoch": 0.6360833173385851, "grad_norm": 0.49286234633752746, "learning_rate": 2.507892084994637e-06, "loss": 0.0156, "step": 152445 }, { "epoch": 0.6361041800535755, "grad_norm": 0.7167537605434574, "learning_rate": 2.507850957918554e-06, "loss": 0.0195, "step": 152450 }, { "epoch": 0.6361250427685657, "grad_norm": 0.5373856612994103, "learning_rate": 2.5078098328657414e-06, "loss": 0.0227, "step": 152455 }, { "epoch": 0.636145905483556, "grad_norm": 1.205371019650278, "learning_rate": 2.5077687098360328e-06, "loss": 0.0238, "step": 152460 }, { "epoch": 0.6361667681985463, "grad_norm": 0.6556357865495264, "learning_rate": 2.5077275888292616e-06, "loss": 0.0197, "step": 152465 }, { "epoch": 0.6361876309135366, "grad_norm": 0.479907260282957, "learning_rate": 2.5076864698452636e-06, "loss": 0.019, "step": 152470 }, { "epoch": 0.6362084936285268, "grad_norm": 0.31455907106719616, "learning_rate": 2.507645352883872e-06, "loss": 0.0256, "step": 152475 }, { "epoch": 0.6362293563435171, "grad_norm": 0.8927090834076995, "learning_rate": 2.507604237944921e-06, "loss": 0.0273, "step": 152480 }, { "epoch": 0.6362502190585074, "grad_norm": 3.010685007015158, "learning_rate": 2.507563125028245e-06, "loss": 0.0189, "step": 152485 }, { "epoch": 0.6362710817734977, "grad_norm": 0.6700608733684719, "learning_rate": 2.507522014133678e-06, "loss": 0.0305, "step": 152490 }, { "epoch": 0.6362919444884879, "grad_norm": 0.35018805909891904, "learning_rate": 2.5074809052610545e-06, "loss": 0.0187, "step": 152495 }, { "epoch": 0.6363128072034783, "grad_norm": 1.9684661455765518, "learning_rate": 2.507439798410209e-06, "loss": 0.0283, "step": 152500 }, { "epoch": 0.6363336699184685, "grad_norm": 0.9948941099200926, "learning_rate": 2.5073986935809745e-06, "loss": 0.0217, "step": 152505 }, { "epoch": 0.6363545326334588, "grad_norm": 0.973815552167499, "learning_rate": 2.5073575907731874e-06, "loss": 0.0313, "step": 152510 }, { "epoch": 0.6363753953484491, "grad_norm": 0.5725481118738647, "learning_rate": 2.5073164899866804e-06, "loss": 0.0262, "step": 152515 }, { "epoch": 0.6363962580634394, "grad_norm": 0.617350515473933, "learning_rate": 2.5072753912212877e-06, "loss": 0.0147, "step": 152520 }, { "epoch": 0.6364171207784296, "grad_norm": 0.43838775685438797, "learning_rate": 2.5072342944768452e-06, "loss": 0.023, "step": 152525 }, { "epoch": 0.6364379834934198, "grad_norm": 0.9816071284434618, "learning_rate": 2.507193199753186e-06, "loss": 0.0236, "step": 152530 }, { "epoch": 0.6364588462084102, "grad_norm": 0.9991339742792074, "learning_rate": 2.5071521070501447e-06, "loss": 0.025, "step": 152535 }, { "epoch": 0.6364797089234004, "grad_norm": 0.5512744419748836, "learning_rate": 2.5071110163675562e-06, "loss": 0.0235, "step": 152540 }, { "epoch": 0.6365005716383907, "grad_norm": 0.5036212053029069, "learning_rate": 2.507069927705254e-06, "loss": 0.0232, "step": 152545 }, { "epoch": 0.636521434353381, "grad_norm": 0.4218777659774569, "learning_rate": 2.5070288410630737e-06, "loss": 0.0175, "step": 152550 }, { "epoch": 0.6365422970683713, "grad_norm": 0.5641180682906319, "learning_rate": 2.506987756440849e-06, "loss": 0.0256, "step": 152555 }, { "epoch": 0.6365631597833615, "grad_norm": 0.5723253280502008, "learning_rate": 2.506946673838415e-06, "loss": 0.0203, "step": 152560 }, { "epoch": 0.6365840224983519, "grad_norm": 0.5760728295008959, "learning_rate": 2.5069055932556053e-06, "loss": 0.0247, "step": 152565 }, { "epoch": 0.6366048852133421, "grad_norm": 0.3252511806497369, "learning_rate": 2.5068645146922554e-06, "loss": 0.0171, "step": 152570 }, { "epoch": 0.6366257479283324, "grad_norm": 0.6921151187084384, "learning_rate": 2.5068234381481994e-06, "loss": 0.0233, "step": 152575 }, { "epoch": 0.6366466106433227, "grad_norm": 0.9725916952147673, "learning_rate": 2.5067823636232714e-06, "loss": 0.0221, "step": 152580 }, { "epoch": 0.636667473358313, "grad_norm": 0.713645145438804, "learning_rate": 2.5067412911173065e-06, "loss": 0.018, "step": 152585 }, { "epoch": 0.6366883360733032, "grad_norm": 0.5752597159529605, "learning_rate": 2.5067002206301396e-06, "loss": 0.0183, "step": 152590 }, { "epoch": 0.6367091987882935, "grad_norm": 0.5521780406029237, "learning_rate": 2.506659152161605e-06, "loss": 0.025, "step": 152595 }, { "epoch": 0.6367300615032838, "grad_norm": 0.5206801495228509, "learning_rate": 2.5066180857115375e-06, "loss": 0.0189, "step": 152600 }, { "epoch": 0.6367509242182741, "grad_norm": 0.28373471714298254, "learning_rate": 2.5065770212797706e-06, "loss": 0.0187, "step": 152605 }, { "epoch": 0.6367717869332643, "grad_norm": 0.4598119957208428, "learning_rate": 2.5065359588661414e-06, "loss": 0.0374, "step": 152610 }, { "epoch": 0.6367926496482547, "grad_norm": 1.1006477750461692, "learning_rate": 2.5064948984704823e-06, "loss": 0.0229, "step": 152615 }, { "epoch": 0.6368135123632449, "grad_norm": 0.503943223629448, "learning_rate": 2.5064538400926293e-06, "loss": 0.0242, "step": 152620 }, { "epoch": 0.6368343750782351, "grad_norm": 0.6007449003329909, "learning_rate": 2.5064127837324164e-06, "loss": 0.0253, "step": 152625 }, { "epoch": 0.6368552377932255, "grad_norm": 0.945180714010583, "learning_rate": 2.506371729389679e-06, "loss": 0.0252, "step": 152630 }, { "epoch": 0.6368761005082157, "grad_norm": 0.48338732282435964, "learning_rate": 2.5063306770642515e-06, "loss": 0.0213, "step": 152635 }, { "epoch": 0.636896963223206, "grad_norm": 0.5306314993881636, "learning_rate": 2.5062896267559688e-06, "loss": 0.0155, "step": 152640 }, { "epoch": 0.6369178259381963, "grad_norm": 0.45348652429886205, "learning_rate": 2.506248578464666e-06, "loss": 0.0229, "step": 152645 }, { "epoch": 0.6369386886531866, "grad_norm": 0.7167409952451174, "learning_rate": 2.5062075321901767e-06, "loss": 0.0264, "step": 152650 }, { "epoch": 0.6369595513681768, "grad_norm": 0.6714047683299285, "learning_rate": 2.5061664879323375e-06, "loss": 0.0275, "step": 152655 }, { "epoch": 0.6369804140831671, "grad_norm": 0.5564642825497905, "learning_rate": 2.5061254456909827e-06, "loss": 0.0172, "step": 152660 }, { "epoch": 0.6370012767981574, "grad_norm": 0.9866310817482609, "learning_rate": 2.506084405465946e-06, "loss": 0.0266, "step": 152665 }, { "epoch": 0.6370221395131477, "grad_norm": 1.082322977992903, "learning_rate": 2.506043367257064e-06, "loss": 0.0192, "step": 152670 }, { "epoch": 0.6370430022281379, "grad_norm": 0.7074157136803948, "learning_rate": 2.5060023310641707e-06, "loss": 0.023, "step": 152675 }, { "epoch": 0.6370638649431283, "grad_norm": 0.9846162182826967, "learning_rate": 2.505961296887101e-06, "loss": 0.0238, "step": 152680 }, { "epoch": 0.6370847276581185, "grad_norm": 0.7660410034451789, "learning_rate": 2.5059202647256902e-06, "loss": 0.0272, "step": 152685 }, { "epoch": 0.6371055903731088, "grad_norm": 0.75042753012992, "learning_rate": 2.505879234579773e-06, "loss": 0.0204, "step": 152690 }, { "epoch": 0.6371264530880991, "grad_norm": 0.6865268491993939, "learning_rate": 2.5058382064491846e-06, "loss": 0.0207, "step": 152695 }, { "epoch": 0.6371473158030894, "grad_norm": 0.8241444943967583, "learning_rate": 2.505797180333761e-06, "loss": 0.0258, "step": 152700 }, { "epoch": 0.6371681785180796, "grad_norm": 0.344739190171848, "learning_rate": 2.5057561562333347e-06, "loss": 0.019, "step": 152705 }, { "epoch": 0.6371890412330699, "grad_norm": 0.6728815367926881, "learning_rate": 2.5057151341477438e-06, "loss": 0.0218, "step": 152710 }, { "epoch": 0.6372099039480602, "grad_norm": 0.6096499367109396, "learning_rate": 2.505674114076821e-06, "loss": 0.0244, "step": 152715 }, { "epoch": 0.6372307666630505, "grad_norm": 0.9275232077867741, "learning_rate": 2.5056330960204025e-06, "loss": 0.0191, "step": 152720 }, { "epoch": 0.6372516293780407, "grad_norm": 0.7139666072266735, "learning_rate": 2.505592079978323e-06, "loss": 0.0312, "step": 152725 }, { "epoch": 0.637272492093031, "grad_norm": 0.38595529953690566, "learning_rate": 2.505551065950418e-06, "loss": 0.0242, "step": 152730 }, { "epoch": 0.6372933548080213, "grad_norm": 0.49623076283094697, "learning_rate": 2.5055100539365223e-06, "loss": 0.0213, "step": 152735 }, { "epoch": 0.6373142175230115, "grad_norm": 0.23434774655830562, "learning_rate": 2.5054690439364716e-06, "loss": 0.0193, "step": 152740 }, { "epoch": 0.6373350802380019, "grad_norm": 0.6204638289220071, "learning_rate": 2.505428035950101e-06, "loss": 0.0245, "step": 152745 }, { "epoch": 0.6373559429529921, "grad_norm": 0.9157057389632416, "learning_rate": 2.505387029977245e-06, "loss": 0.0298, "step": 152750 }, { "epoch": 0.6373768056679824, "grad_norm": 0.6117622006110948, "learning_rate": 2.5053460260177393e-06, "loss": 0.0287, "step": 152755 }, { "epoch": 0.6373976683829727, "grad_norm": 1.00199200992723, "learning_rate": 2.5053050240714193e-06, "loss": 0.0286, "step": 152760 }, { "epoch": 0.637418531097963, "grad_norm": 0.2862503706612103, "learning_rate": 2.5052640241381205e-06, "loss": 0.0173, "step": 152765 }, { "epoch": 0.6374393938129532, "grad_norm": 0.8730315704657335, "learning_rate": 2.505223026217677e-06, "loss": 0.0318, "step": 152770 }, { "epoch": 0.6374602565279435, "grad_norm": 0.43969241604639486, "learning_rate": 2.5051820303099255e-06, "loss": 0.0193, "step": 152775 }, { "epoch": 0.6374811192429338, "grad_norm": 0.5804993836340523, "learning_rate": 2.5051410364147007e-06, "loss": 0.0268, "step": 152780 }, { "epoch": 0.6375019819579241, "grad_norm": 0.7629915703031744, "learning_rate": 2.505100044531838e-06, "loss": 0.0205, "step": 152785 }, { "epoch": 0.6375228446729143, "grad_norm": 0.957687616563085, "learning_rate": 2.5050590546611726e-06, "loss": 0.0251, "step": 152790 }, { "epoch": 0.6375437073879047, "grad_norm": 1.0427778246417703, "learning_rate": 2.50501806680254e-06, "loss": 0.0285, "step": 152795 }, { "epoch": 0.6375645701028949, "grad_norm": 0.542622278678717, "learning_rate": 2.5049770809557755e-06, "loss": 0.0234, "step": 152800 }, { "epoch": 0.6375854328178852, "grad_norm": 0.3741483455549559, "learning_rate": 2.5049360971207152e-06, "loss": 0.0252, "step": 152805 }, { "epoch": 0.6376062955328755, "grad_norm": 0.788126695698254, "learning_rate": 2.5048951152971935e-06, "loss": 0.0214, "step": 152810 }, { "epoch": 0.6376271582478658, "grad_norm": 0.40969354390152274, "learning_rate": 2.5048541354850463e-06, "loss": 0.0227, "step": 152815 }, { "epoch": 0.637648020962856, "grad_norm": 0.6321092056136569, "learning_rate": 2.5048131576841095e-06, "loss": 0.0205, "step": 152820 }, { "epoch": 0.6376688836778464, "grad_norm": 0.7898464309900065, "learning_rate": 2.5047721818942185e-06, "loss": 0.0162, "step": 152825 }, { "epoch": 0.6376897463928366, "grad_norm": 0.4912652790586589, "learning_rate": 2.5047312081152075e-06, "loss": 0.0178, "step": 152830 }, { "epoch": 0.6377106091078268, "grad_norm": 0.7082790691124585, "learning_rate": 2.504690236346914e-06, "loss": 0.0198, "step": 152835 }, { "epoch": 0.6377314718228171, "grad_norm": 0.5229055945224167, "learning_rate": 2.5046492665891723e-06, "loss": 0.0269, "step": 152840 }, { "epoch": 0.6377523345378074, "grad_norm": 0.4358547556356864, "learning_rate": 2.5046082988418186e-06, "loss": 0.0196, "step": 152845 }, { "epoch": 0.6377731972527977, "grad_norm": 0.7786246434102163, "learning_rate": 2.504567333104688e-06, "loss": 0.0218, "step": 152850 }, { "epoch": 0.6377940599677879, "grad_norm": 0.561153444397433, "learning_rate": 2.5045263693776155e-06, "loss": 0.0245, "step": 152855 }, { "epoch": 0.6378149226827783, "grad_norm": 1.8643154670794333, "learning_rate": 2.5044854076604384e-06, "loss": 0.0641, "step": 152860 }, { "epoch": 0.6378357853977685, "grad_norm": 0.7866040702272976, "learning_rate": 2.504444447952991e-06, "loss": 0.021, "step": 152865 }, { "epoch": 0.6378566481127588, "grad_norm": 1.9024840147301982, "learning_rate": 2.50440349025511e-06, "loss": 0.0214, "step": 152870 }, { "epoch": 0.6378775108277491, "grad_norm": 0.5458749381598662, "learning_rate": 2.50436253456663e-06, "loss": 0.0248, "step": 152875 }, { "epoch": 0.6378983735427394, "grad_norm": 0.4329773051007185, "learning_rate": 2.5043215808873875e-06, "loss": 0.0185, "step": 152880 }, { "epoch": 0.6379192362577296, "grad_norm": 1.1127515678400328, "learning_rate": 2.5042806292172176e-06, "loss": 0.0224, "step": 152885 }, { "epoch": 0.6379400989727199, "grad_norm": 0.5108586952020622, "learning_rate": 2.504239679555957e-06, "loss": 0.0247, "step": 152890 }, { "epoch": 0.6379609616877102, "grad_norm": 0.5524557325529146, "learning_rate": 2.50419873190344e-06, "loss": 0.0143, "step": 152895 }, { "epoch": 0.6379818244027005, "grad_norm": 0.5356443196366053, "learning_rate": 2.504157786259504e-06, "loss": 0.0159, "step": 152900 }, { "epoch": 0.6380026871176907, "grad_norm": 0.5216037884031413, "learning_rate": 2.504116842623983e-06, "loss": 0.0168, "step": 152905 }, { "epoch": 0.6380235498326811, "grad_norm": 0.9468377502485129, "learning_rate": 2.504075900996715e-06, "loss": 0.0265, "step": 152910 }, { "epoch": 0.6380444125476713, "grad_norm": 0.8461251266199235, "learning_rate": 2.5040349613775338e-06, "loss": 0.0245, "step": 152915 }, { "epoch": 0.6380652752626615, "grad_norm": 0.8069592372767499, "learning_rate": 2.5039940237662765e-06, "loss": 0.0276, "step": 152920 }, { "epoch": 0.6380861379776519, "grad_norm": 0.8314986946769933, "learning_rate": 2.5039530881627783e-06, "loss": 0.019, "step": 152925 }, { "epoch": 0.6381070006926421, "grad_norm": 0.41689411632336704, "learning_rate": 2.5039121545668752e-06, "loss": 0.0188, "step": 152930 }, { "epoch": 0.6381278634076324, "grad_norm": 0.7049468093748558, "learning_rate": 2.503871222978404e-06, "loss": 0.0203, "step": 152935 }, { "epoch": 0.6381487261226227, "grad_norm": 0.5590513287446428, "learning_rate": 2.5038302933971993e-06, "loss": 0.0199, "step": 152940 }, { "epoch": 0.638169588837613, "grad_norm": 0.6100693988402837, "learning_rate": 2.503789365823098e-06, "loss": 0.0228, "step": 152945 }, { "epoch": 0.6381904515526032, "grad_norm": 1.0946623724346676, "learning_rate": 2.5037484402559355e-06, "loss": 0.0381, "step": 152950 }, { "epoch": 0.6382113142675935, "grad_norm": 0.41396902654583695, "learning_rate": 2.503707516695548e-06, "loss": 0.0205, "step": 152955 }, { "epoch": 0.6382321769825838, "grad_norm": 0.7814272144853167, "learning_rate": 2.5036665951417716e-06, "loss": 0.0228, "step": 152960 }, { "epoch": 0.6382530396975741, "grad_norm": 0.4597231923752161, "learning_rate": 2.5036256755944415e-06, "loss": 0.0265, "step": 152965 }, { "epoch": 0.6382739024125643, "grad_norm": 0.6157769561598685, "learning_rate": 2.5035847580533957e-06, "loss": 0.0205, "step": 152970 }, { "epoch": 0.6382947651275547, "grad_norm": 0.5351110415914094, "learning_rate": 2.5035438425184683e-06, "loss": 0.0209, "step": 152975 }, { "epoch": 0.6383156278425449, "grad_norm": 0.8982741206935186, "learning_rate": 2.503502928989496e-06, "loss": 0.0219, "step": 152980 }, { "epoch": 0.6383364905575352, "grad_norm": 1.0111776452129622, "learning_rate": 2.503462017466315e-06, "loss": 0.0191, "step": 152985 }, { "epoch": 0.6383573532725255, "grad_norm": 1.19720918473781, "learning_rate": 2.503421107948762e-06, "loss": 0.0292, "step": 152990 }, { "epoch": 0.6383782159875158, "grad_norm": 0.4454007043533706, "learning_rate": 2.503380200436672e-06, "loss": 0.0332, "step": 152995 }, { "epoch": 0.638399078702506, "grad_norm": 0.4877255944912077, "learning_rate": 2.503339294929882e-06, "loss": 0.021, "step": 153000 }, { "epoch": 0.6384199414174964, "grad_norm": 0.43389024540871896, "learning_rate": 2.5032983914282273e-06, "loss": 0.0249, "step": 153005 }, { "epoch": 0.6384408041324866, "grad_norm": 0.8360054475184396, "learning_rate": 2.503257489931545e-06, "loss": 0.0247, "step": 153010 }, { "epoch": 0.6384616668474768, "grad_norm": 1.2119588216237753, "learning_rate": 2.503216590439671e-06, "loss": 0.0229, "step": 153015 }, { "epoch": 0.6384825295624671, "grad_norm": 1.0736029599301768, "learning_rate": 2.5031756929524415e-06, "loss": 0.0289, "step": 153020 }, { "epoch": 0.6385033922774574, "grad_norm": 0.9027355901392973, "learning_rate": 2.503134797469693e-06, "loss": 0.0284, "step": 153025 }, { "epoch": 0.6385242549924477, "grad_norm": 0.7289505914821944, "learning_rate": 2.503093903991261e-06, "loss": 0.0316, "step": 153030 }, { "epoch": 0.6385451177074379, "grad_norm": 0.7194377352848087, "learning_rate": 2.5030530125169824e-06, "loss": 0.0252, "step": 153035 }, { "epoch": 0.6385659804224283, "grad_norm": 1.1271572923357895, "learning_rate": 2.5030121230466936e-06, "loss": 0.0231, "step": 153040 }, { "epoch": 0.6385868431374185, "grad_norm": 0.4010325450257833, "learning_rate": 2.5029712355802307e-06, "loss": 0.0237, "step": 153045 }, { "epoch": 0.6386077058524088, "grad_norm": 0.3504763469718241, "learning_rate": 2.5029303501174297e-06, "loss": 0.0191, "step": 153050 }, { "epoch": 0.6386285685673991, "grad_norm": 0.6489893686831737, "learning_rate": 2.5028894666581276e-06, "loss": 0.024, "step": 153055 }, { "epoch": 0.6386494312823894, "grad_norm": 0.534923563661409, "learning_rate": 2.5028485852021605e-06, "loss": 0.0179, "step": 153060 }, { "epoch": 0.6386702939973796, "grad_norm": 1.1851460651175443, "learning_rate": 2.5028077057493644e-06, "loss": 0.0286, "step": 153065 }, { "epoch": 0.6386911567123699, "grad_norm": 0.5826551595269275, "learning_rate": 2.5027668282995764e-06, "loss": 0.0217, "step": 153070 }, { "epoch": 0.6387120194273602, "grad_norm": 1.1133843308901998, "learning_rate": 2.5027259528526326e-06, "loss": 0.0281, "step": 153075 }, { "epoch": 0.6387328821423505, "grad_norm": 0.9202848060250419, "learning_rate": 2.5026850794083694e-06, "loss": 0.0236, "step": 153080 }, { "epoch": 0.6387537448573407, "grad_norm": 0.5510881561669474, "learning_rate": 2.5026442079666233e-06, "loss": 0.0211, "step": 153085 }, { "epoch": 0.6387746075723311, "grad_norm": 0.5694119070076629, "learning_rate": 2.5026033385272314e-06, "loss": 0.0252, "step": 153090 }, { "epoch": 0.6387954702873213, "grad_norm": 0.42570345032664914, "learning_rate": 2.5025624710900293e-06, "loss": 0.0202, "step": 153095 }, { "epoch": 0.6388163330023116, "grad_norm": 0.6826284798220247, "learning_rate": 2.5025216056548535e-06, "loss": 0.0249, "step": 153100 }, { "epoch": 0.6388371957173019, "grad_norm": 0.6944235321514742, "learning_rate": 2.5024807422215415e-06, "loss": 0.0299, "step": 153105 }, { "epoch": 0.6388580584322922, "grad_norm": 0.661672033368433, "learning_rate": 2.5024398807899287e-06, "loss": 0.0252, "step": 153110 }, { "epoch": 0.6388789211472824, "grad_norm": 0.5569529657842379, "learning_rate": 2.5023990213598527e-06, "loss": 0.0165, "step": 153115 }, { "epoch": 0.6388997838622728, "grad_norm": 0.668320798063458, "learning_rate": 2.5023581639311496e-06, "loss": 0.0256, "step": 153120 }, { "epoch": 0.638920646577263, "grad_norm": 0.27843216257184783, "learning_rate": 2.502317308503656e-06, "loss": 0.024, "step": 153125 }, { "epoch": 0.6389415092922532, "grad_norm": 0.33604542213521077, "learning_rate": 2.5022764550772083e-06, "loss": 0.0148, "step": 153130 }, { "epoch": 0.6389623720072435, "grad_norm": 0.5466571166376227, "learning_rate": 2.5022356036516437e-06, "loss": 0.026, "step": 153135 }, { "epoch": 0.6389832347222338, "grad_norm": 0.9688701064299065, "learning_rate": 2.5021947542267988e-06, "loss": 0.0264, "step": 153140 }, { "epoch": 0.6390040974372241, "grad_norm": 1.0068605604654248, "learning_rate": 2.50215390680251e-06, "loss": 0.0206, "step": 153145 }, { "epoch": 0.6390249601522143, "grad_norm": 1.286415841715558, "learning_rate": 2.502113061378615e-06, "loss": 0.0272, "step": 153150 }, { "epoch": 0.6390458228672047, "grad_norm": 2.451242482858729, "learning_rate": 2.5020722179549484e-06, "loss": 0.0243, "step": 153155 }, { "epoch": 0.6390666855821949, "grad_norm": 0.8224931702132675, "learning_rate": 2.502031376531349e-06, "loss": 0.0221, "step": 153160 }, { "epoch": 0.6390875482971852, "grad_norm": 0.5858539543714547, "learning_rate": 2.501990537107653e-06, "loss": 0.0179, "step": 153165 }, { "epoch": 0.6391084110121755, "grad_norm": 0.2971309132856923, "learning_rate": 2.5019496996836963e-06, "loss": 0.0213, "step": 153170 }, { "epoch": 0.6391292737271658, "grad_norm": 0.4490420224437779, "learning_rate": 2.501908864259317e-06, "loss": 0.0252, "step": 153175 }, { "epoch": 0.639150136442156, "grad_norm": 0.5210006425922163, "learning_rate": 2.501868030834351e-06, "loss": 0.0214, "step": 153180 }, { "epoch": 0.6391709991571464, "grad_norm": 0.7893059438317815, "learning_rate": 2.501827199408636e-06, "loss": 0.0261, "step": 153185 }, { "epoch": 0.6391918618721366, "grad_norm": 0.4831201030493894, "learning_rate": 2.5017863699820076e-06, "loss": 0.0236, "step": 153190 }, { "epoch": 0.6392127245871269, "grad_norm": 2.966969167230093, "learning_rate": 2.5017455425543042e-06, "loss": 0.0226, "step": 153195 }, { "epoch": 0.6392335873021171, "grad_norm": 0.749441362826534, "learning_rate": 2.501704717125361e-06, "loss": 0.0205, "step": 153200 }, { "epoch": 0.6392544500171075, "grad_norm": 0.7299477026407453, "learning_rate": 2.5016638936950167e-06, "loss": 0.0151, "step": 153205 }, { "epoch": 0.6392753127320977, "grad_norm": 1.219940575434615, "learning_rate": 2.501623072263107e-06, "loss": 0.0178, "step": 153210 }, { "epoch": 0.6392961754470879, "grad_norm": 0.38746594740436774, "learning_rate": 2.501582252829469e-06, "loss": 0.0148, "step": 153215 }, { "epoch": 0.6393170381620783, "grad_norm": 0.7231631377196864, "learning_rate": 2.5015414353939404e-06, "loss": 0.0258, "step": 153220 }, { "epoch": 0.6393379008770685, "grad_norm": 1.1882096566562899, "learning_rate": 2.5015006199563573e-06, "loss": 0.0243, "step": 153225 }, { "epoch": 0.6393587635920588, "grad_norm": 1.504327562592607, "learning_rate": 2.5014598065165573e-06, "loss": 0.0304, "step": 153230 }, { "epoch": 0.6393796263070491, "grad_norm": 0.24745404556401585, "learning_rate": 2.501418995074378e-06, "loss": 0.016, "step": 153235 }, { "epoch": 0.6394004890220394, "grad_norm": 0.7005505792799914, "learning_rate": 2.5013781856296545e-06, "loss": 0.0283, "step": 153240 }, { "epoch": 0.6394213517370296, "grad_norm": 0.9607067503858276, "learning_rate": 2.501337378182226e-06, "loss": 0.0206, "step": 153245 }, { "epoch": 0.6394422144520199, "grad_norm": 0.762340791543604, "learning_rate": 2.501296572731928e-06, "loss": 0.0274, "step": 153250 }, { "epoch": 0.6394630771670102, "grad_norm": 0.7503401149118895, "learning_rate": 2.501255769278598e-06, "loss": 0.0204, "step": 153255 }, { "epoch": 0.6394839398820005, "grad_norm": 0.32858845865887515, "learning_rate": 2.5012149678220743e-06, "loss": 0.0217, "step": 153260 }, { "epoch": 0.6395048025969907, "grad_norm": 0.69452625885512, "learning_rate": 2.5011741683621927e-06, "loss": 0.0169, "step": 153265 }, { "epoch": 0.6395256653119811, "grad_norm": 0.5384126430114753, "learning_rate": 2.5011333708987907e-06, "loss": 0.021, "step": 153270 }, { "epoch": 0.6395465280269713, "grad_norm": 1.0000000423775526, "learning_rate": 2.5010925754317055e-06, "loss": 0.0325, "step": 153275 }, { "epoch": 0.6395673907419616, "grad_norm": 0.40313463144161626, "learning_rate": 2.501051781960774e-06, "loss": 0.0246, "step": 153280 }, { "epoch": 0.6395882534569519, "grad_norm": 1.2312762888083477, "learning_rate": 2.501010990485835e-06, "loss": 0.0273, "step": 153285 }, { "epoch": 0.6396091161719422, "grad_norm": 0.3986116687457, "learning_rate": 2.500970201006724e-06, "loss": 0.0203, "step": 153290 }, { "epoch": 0.6396299788869324, "grad_norm": 0.5227652133983296, "learning_rate": 2.5009294135232782e-06, "loss": 0.0133, "step": 153295 }, { "epoch": 0.6396508416019228, "grad_norm": 1.4903007618393058, "learning_rate": 2.5008886280353356e-06, "loss": 0.0266, "step": 153300 }, { "epoch": 0.639671704316913, "grad_norm": 0.4569833529150215, "learning_rate": 2.5008478445427336e-06, "loss": 0.0206, "step": 153305 }, { "epoch": 0.6396925670319032, "grad_norm": 0.4802848975753658, "learning_rate": 2.5008070630453092e-06, "loss": 0.0223, "step": 153310 }, { "epoch": 0.6397134297468935, "grad_norm": 0.663455207189296, "learning_rate": 2.5007662835429e-06, "loss": 0.026, "step": 153315 }, { "epoch": 0.6397342924618838, "grad_norm": 0.8666768778701063, "learning_rate": 2.500725506035343e-06, "loss": 0.0267, "step": 153320 }, { "epoch": 0.6397551551768741, "grad_norm": 1.0101000096541906, "learning_rate": 2.5006847305224753e-06, "loss": 0.0299, "step": 153325 }, { "epoch": 0.6397760178918643, "grad_norm": 0.6741526048660489, "learning_rate": 2.5006439570041347e-06, "loss": 0.0236, "step": 153330 }, { "epoch": 0.6397968806068547, "grad_norm": 0.7724630412129666, "learning_rate": 2.5006031854801587e-06, "loss": 0.0308, "step": 153335 }, { "epoch": 0.6398177433218449, "grad_norm": 0.5506285902925063, "learning_rate": 2.500562415950385e-06, "loss": 0.0163, "step": 153340 }, { "epoch": 0.6398386060368352, "grad_norm": 0.784074851960008, "learning_rate": 2.50052164841465e-06, "loss": 0.0165, "step": 153345 }, { "epoch": 0.6398594687518255, "grad_norm": 0.9259335124284959, "learning_rate": 2.5004808828727924e-06, "loss": 0.0244, "step": 153350 }, { "epoch": 0.6398803314668158, "grad_norm": 0.6422075400302422, "learning_rate": 2.5004401193246484e-06, "loss": 0.0229, "step": 153355 }, { "epoch": 0.639901194181806, "grad_norm": 0.6658122467552976, "learning_rate": 2.500399357770056e-06, "loss": 0.0275, "step": 153360 }, { "epoch": 0.6399220568967964, "grad_norm": 0.9996185132083397, "learning_rate": 2.5003585982088535e-06, "loss": 0.0194, "step": 153365 }, { "epoch": 0.6399429196117866, "grad_norm": 0.7017259532363304, "learning_rate": 2.5003178406408773e-06, "loss": 0.0262, "step": 153370 }, { "epoch": 0.6399637823267769, "grad_norm": 0.8759526657042183, "learning_rate": 2.500277085065966e-06, "loss": 0.029, "step": 153375 }, { "epoch": 0.6399846450417671, "grad_norm": 2.787202948366416, "learning_rate": 2.500236331483957e-06, "loss": 0.0194, "step": 153380 }, { "epoch": 0.6400055077567575, "grad_norm": 0.5918477744952894, "learning_rate": 2.5001955798946866e-06, "loss": 0.0151, "step": 153385 }, { "epoch": 0.6400263704717477, "grad_norm": 0.7274293355004074, "learning_rate": 2.5001548302979934e-06, "loss": 0.014, "step": 153390 }, { "epoch": 0.640047233186738, "grad_norm": 0.6290559638252533, "learning_rate": 2.500114082693715e-06, "loss": 0.0242, "step": 153395 }, { "epoch": 0.6400680959017283, "grad_norm": 0.7191813968776628, "learning_rate": 2.5000733370816893e-06, "loss": 0.0281, "step": 153400 }, { "epoch": 0.6400889586167186, "grad_norm": 0.8760435593824942, "learning_rate": 2.500032593461753e-06, "loss": 0.0201, "step": 153405 }, { "epoch": 0.6401098213317088, "grad_norm": 0.8312958346313585, "learning_rate": 2.499991851833745e-06, "loss": 0.0197, "step": 153410 }, { "epoch": 0.6401306840466991, "grad_norm": 0.6410486676795394, "learning_rate": 2.499951112197502e-06, "loss": 0.0242, "step": 153415 }, { "epoch": 0.6401515467616894, "grad_norm": 0.7157111196501275, "learning_rate": 2.499910374552863e-06, "loss": 0.0191, "step": 153420 }, { "epoch": 0.6401724094766796, "grad_norm": 0.6862101085904694, "learning_rate": 2.499869638899664e-06, "loss": 0.0185, "step": 153425 }, { "epoch": 0.6401932721916699, "grad_norm": 0.6950120336037167, "learning_rate": 2.4998289052377434e-06, "loss": 0.0161, "step": 153430 }, { "epoch": 0.6402141349066602, "grad_norm": 0.32405391220111696, "learning_rate": 2.4997881735669403e-06, "loss": 0.015, "step": 153435 }, { "epoch": 0.6402349976216505, "grad_norm": 0.5385672581002863, "learning_rate": 2.49974744388709e-06, "loss": 0.0233, "step": 153440 }, { "epoch": 0.6402558603366407, "grad_norm": 0.533541964739941, "learning_rate": 2.499706716198033e-06, "loss": 0.0155, "step": 153445 }, { "epoch": 0.6402767230516311, "grad_norm": 0.6112818322928525, "learning_rate": 2.4996659904996055e-06, "loss": 0.0226, "step": 153450 }, { "epoch": 0.6402975857666213, "grad_norm": 0.6344954062735151, "learning_rate": 2.499625266791645e-06, "loss": 0.0199, "step": 153455 }, { "epoch": 0.6403184484816116, "grad_norm": 0.3259185368394175, "learning_rate": 2.4995845450739908e-06, "loss": 0.0214, "step": 153460 }, { "epoch": 0.6403393111966019, "grad_norm": 0.8968787375712403, "learning_rate": 2.4995438253464797e-06, "loss": 0.0229, "step": 153465 }, { "epoch": 0.6403601739115922, "grad_norm": 0.6607504451553827, "learning_rate": 2.4995031076089505e-06, "loss": 0.0307, "step": 153470 }, { "epoch": 0.6403810366265824, "grad_norm": 0.5115932557384856, "learning_rate": 2.4994623918612394e-06, "loss": 0.0169, "step": 153475 }, { "epoch": 0.6404018993415728, "grad_norm": 0.5368804390312556, "learning_rate": 2.499421678103186e-06, "loss": 0.0195, "step": 153480 }, { "epoch": 0.640422762056563, "grad_norm": 0.7432661902083572, "learning_rate": 2.499380966334628e-06, "loss": 0.0214, "step": 153485 }, { "epoch": 0.6404436247715533, "grad_norm": 0.7442317672463565, "learning_rate": 2.499340256555403e-06, "loss": 0.02, "step": 153490 }, { "epoch": 0.6404644874865435, "grad_norm": 1.0295716755407625, "learning_rate": 2.499299548765349e-06, "loss": 0.0336, "step": 153495 }, { "epoch": 0.6404853502015339, "grad_norm": 0.5162805858613659, "learning_rate": 2.499258842964304e-06, "loss": 0.0285, "step": 153500 }, { "epoch": 0.6405062129165241, "grad_norm": 1.1023583055889516, "learning_rate": 2.4992181391521068e-06, "loss": 0.0252, "step": 153505 }, { "epoch": 0.6405270756315143, "grad_norm": 0.8654199614694121, "learning_rate": 2.4991774373285945e-06, "loss": 0.0242, "step": 153510 }, { "epoch": 0.6405479383465047, "grad_norm": 3.9305251467026325, "learning_rate": 2.4991367374936047e-06, "loss": 0.0253, "step": 153515 }, { "epoch": 0.6405688010614949, "grad_norm": 0.21266685370290406, "learning_rate": 2.499096039646977e-06, "loss": 0.027, "step": 153520 }, { "epoch": 0.6405896637764852, "grad_norm": 1.1166593033989605, "learning_rate": 2.4990553437885485e-06, "loss": 0.0308, "step": 153525 }, { "epoch": 0.6406105264914755, "grad_norm": 0.6325437650426132, "learning_rate": 2.499014649918158e-06, "loss": 0.0209, "step": 153530 }, { "epoch": 0.6406313892064658, "grad_norm": 0.8890792289577225, "learning_rate": 2.498973958035643e-06, "loss": 0.0221, "step": 153535 }, { "epoch": 0.640652251921456, "grad_norm": 0.9063466161839862, "learning_rate": 2.498933268140842e-06, "loss": 0.0276, "step": 153540 }, { "epoch": 0.6406731146364464, "grad_norm": 0.4065236750185185, "learning_rate": 2.4988925802335927e-06, "loss": 0.0408, "step": 153545 }, { "epoch": 0.6406939773514366, "grad_norm": 0.628211773410693, "learning_rate": 2.4988518943137335e-06, "loss": 0.0227, "step": 153550 }, { "epoch": 0.6407148400664269, "grad_norm": 0.6629615112670239, "learning_rate": 2.4988112103811037e-06, "loss": 0.0299, "step": 153555 }, { "epoch": 0.6407357027814171, "grad_norm": 0.12845772821771562, "learning_rate": 2.49877052843554e-06, "loss": 0.0262, "step": 153560 }, { "epoch": 0.6407565654964075, "grad_norm": 0.8054577374924543, "learning_rate": 2.498729848476881e-06, "loss": 0.0208, "step": 153565 }, { "epoch": 0.6407774282113977, "grad_norm": 1.2280643236101783, "learning_rate": 2.4986891705049657e-06, "loss": 0.0268, "step": 153570 }, { "epoch": 0.640798290926388, "grad_norm": 0.834337033304166, "learning_rate": 2.4986484945196323e-06, "loss": 0.0244, "step": 153575 }, { "epoch": 0.6408191536413783, "grad_norm": 0.5602876266865389, "learning_rate": 2.4986078205207183e-06, "loss": 0.0209, "step": 153580 }, { "epoch": 0.6408400163563686, "grad_norm": 0.5292517339165955, "learning_rate": 2.498567148508062e-06, "loss": 0.0239, "step": 153585 }, { "epoch": 0.6408608790713588, "grad_norm": 0.5825970665609843, "learning_rate": 2.4985264784815035e-06, "loss": 0.0254, "step": 153590 }, { "epoch": 0.6408817417863492, "grad_norm": 0.5379529970226828, "learning_rate": 2.4984858104408784e-06, "loss": 0.0182, "step": 153595 }, { "epoch": 0.6409026045013394, "grad_norm": 0.6919584580701147, "learning_rate": 2.4984451443860276e-06, "loss": 0.0246, "step": 153600 }, { "epoch": 0.6409234672163296, "grad_norm": 1.0105215474662728, "learning_rate": 2.498404480316788e-06, "loss": 0.0242, "step": 153605 }, { "epoch": 0.6409443299313199, "grad_norm": 0.5973900563676763, "learning_rate": 2.4983638182329983e-06, "loss": 0.0208, "step": 153610 }, { "epoch": 0.6409651926463102, "grad_norm": 0.8234051606330391, "learning_rate": 2.4983231581344976e-06, "loss": 0.0242, "step": 153615 }, { "epoch": 0.6409860553613005, "grad_norm": 2.663331500087138, "learning_rate": 2.498282500021124e-06, "loss": 0.0289, "step": 153620 }, { "epoch": 0.6410069180762907, "grad_norm": 0.8344211345028171, "learning_rate": 2.4982418438927155e-06, "loss": 0.0176, "step": 153625 }, { "epoch": 0.6410277807912811, "grad_norm": 0.5440603667565842, "learning_rate": 2.498201189749111e-06, "loss": 0.0288, "step": 153630 }, { "epoch": 0.6410486435062713, "grad_norm": 0.4193380403855286, "learning_rate": 2.498160537590148e-06, "loss": 0.0314, "step": 153635 }, { "epoch": 0.6410695062212616, "grad_norm": 0.49822807772128747, "learning_rate": 2.498119887415667e-06, "loss": 0.0203, "step": 153640 }, { "epoch": 0.6410903689362519, "grad_norm": 0.9942826540621379, "learning_rate": 2.4980792392255056e-06, "loss": 0.0343, "step": 153645 }, { "epoch": 0.6411112316512422, "grad_norm": 0.35763312039408707, "learning_rate": 2.4980385930195023e-06, "loss": 0.0192, "step": 153650 }, { "epoch": 0.6411320943662324, "grad_norm": 0.6276353313743906, "learning_rate": 2.497997948797495e-06, "loss": 0.0226, "step": 153655 }, { "epoch": 0.6411529570812228, "grad_norm": 1.125926839786188, "learning_rate": 2.497957306559324e-06, "loss": 0.0234, "step": 153660 }, { "epoch": 0.641173819796213, "grad_norm": 0.3958644923450438, "learning_rate": 2.4979166663048263e-06, "loss": 0.0186, "step": 153665 }, { "epoch": 0.6411946825112033, "grad_norm": 0.4182462482806785, "learning_rate": 2.4978760280338408e-06, "loss": 0.0159, "step": 153670 }, { "epoch": 0.6412155452261935, "grad_norm": 0.665652038826774, "learning_rate": 2.4978353917462074e-06, "loss": 0.0202, "step": 153675 }, { "epoch": 0.6412364079411839, "grad_norm": 0.8520192362518764, "learning_rate": 2.4977947574417632e-06, "loss": 0.0258, "step": 153680 }, { "epoch": 0.6412572706561741, "grad_norm": 0.9212044062792625, "learning_rate": 2.4977541251203473e-06, "loss": 0.0184, "step": 153685 }, { "epoch": 0.6412781333711643, "grad_norm": 0.7448566569765078, "learning_rate": 2.497713494781799e-06, "loss": 0.0269, "step": 153690 }, { "epoch": 0.6412989960861547, "grad_norm": 1.2173383783025356, "learning_rate": 2.497672866425957e-06, "loss": 0.0186, "step": 153695 }, { "epoch": 0.641319858801145, "grad_norm": 0.9234475819351943, "learning_rate": 2.4976322400526597e-06, "loss": 0.0347, "step": 153700 }, { "epoch": 0.6413407215161352, "grad_norm": 0.5781111231753813, "learning_rate": 2.497591615661746e-06, "loss": 0.0316, "step": 153705 }, { "epoch": 0.6413615842311255, "grad_norm": 0.687274031419563, "learning_rate": 2.497550993253054e-06, "loss": 0.0301, "step": 153710 }, { "epoch": 0.6413824469461158, "grad_norm": 0.36175380137614294, "learning_rate": 2.497510372826424e-06, "loss": 0.0263, "step": 153715 }, { "epoch": 0.641403309661106, "grad_norm": 0.2892466742607062, "learning_rate": 2.497469754381693e-06, "loss": 0.0244, "step": 153720 }, { "epoch": 0.6414241723760964, "grad_norm": 0.3405963543569985, "learning_rate": 2.4974291379187014e-06, "loss": 0.0188, "step": 153725 }, { "epoch": 0.6414450350910866, "grad_norm": 0.21264997204063624, "learning_rate": 2.497388523437287e-06, "loss": 0.0183, "step": 153730 }, { "epoch": 0.6414658978060769, "grad_norm": 0.4826418422591296, "learning_rate": 2.4973479109372895e-06, "loss": 0.0293, "step": 153735 }, { "epoch": 0.6414867605210671, "grad_norm": 0.820333348236308, "learning_rate": 2.4973073004185474e-06, "loss": 0.0311, "step": 153740 }, { "epoch": 0.6415076232360575, "grad_norm": 0.48418544126176416, "learning_rate": 2.4972666918808996e-06, "loss": 0.019, "step": 153745 }, { "epoch": 0.6415284859510477, "grad_norm": 1.064905922376175, "learning_rate": 2.4972260853241846e-06, "loss": 0.0391, "step": 153750 }, { "epoch": 0.641549348666038, "grad_norm": 0.8922684985112903, "learning_rate": 2.4971854807482425e-06, "loss": 0.0282, "step": 153755 }, { "epoch": 0.6415702113810283, "grad_norm": 0.4710473778223751, "learning_rate": 2.4971448781529116e-06, "loss": 0.0175, "step": 153760 }, { "epoch": 0.6415910740960186, "grad_norm": 0.7233666956693676, "learning_rate": 2.4971042775380306e-06, "loss": 0.0176, "step": 153765 }, { "epoch": 0.6416119368110088, "grad_norm": 0.5758369177222902, "learning_rate": 2.4970636789034385e-06, "loss": 0.0146, "step": 153770 }, { "epoch": 0.6416327995259992, "grad_norm": 0.8139484156071523, "learning_rate": 2.497023082248975e-06, "loss": 0.022, "step": 153775 }, { "epoch": 0.6416536622409894, "grad_norm": 0.7107924072361498, "learning_rate": 2.4969824875744786e-06, "loss": 0.0207, "step": 153780 }, { "epoch": 0.6416745249559797, "grad_norm": 0.6279367108576653, "learning_rate": 2.4969418948797885e-06, "loss": 0.0205, "step": 153785 }, { "epoch": 0.6416953876709699, "grad_norm": 0.6023349267184956, "learning_rate": 2.4969013041647435e-06, "loss": 0.0199, "step": 153790 }, { "epoch": 0.6417162503859603, "grad_norm": 0.41021618499927825, "learning_rate": 2.4968607154291836e-06, "loss": 0.0185, "step": 153795 }, { "epoch": 0.6417371131009505, "grad_norm": 0.6882943744825432, "learning_rate": 2.4968201286729475e-06, "loss": 0.022, "step": 153800 }, { "epoch": 0.6417579758159407, "grad_norm": 0.5517201858652082, "learning_rate": 2.4967795438958733e-06, "loss": 0.0195, "step": 153805 }, { "epoch": 0.6417788385309311, "grad_norm": 0.6472875700317883, "learning_rate": 2.4967389610978013e-06, "loss": 0.0246, "step": 153810 }, { "epoch": 0.6417997012459213, "grad_norm": 0.7913306073981947, "learning_rate": 2.4966983802785703e-06, "loss": 0.0372, "step": 153815 }, { "epoch": 0.6418205639609116, "grad_norm": 1.6194044295068035, "learning_rate": 2.4966578014380197e-06, "loss": 0.036, "step": 153820 }, { "epoch": 0.6418414266759019, "grad_norm": 0.4521680194937987, "learning_rate": 2.4966172245759885e-06, "loss": 0.0312, "step": 153825 }, { "epoch": 0.6418622893908922, "grad_norm": 1.13284298653471, "learning_rate": 2.496576649692316e-06, "loss": 0.0279, "step": 153830 }, { "epoch": 0.6418831521058824, "grad_norm": 0.30716270240674576, "learning_rate": 2.4965360767868417e-06, "loss": 0.0215, "step": 153835 }, { "epoch": 0.6419040148208728, "grad_norm": 0.5473187848690513, "learning_rate": 2.496495505859404e-06, "loss": 0.02, "step": 153840 }, { "epoch": 0.641924877535863, "grad_norm": 0.7306648624563565, "learning_rate": 2.4964549369098427e-06, "loss": 0.0236, "step": 153845 }, { "epoch": 0.6419457402508533, "grad_norm": 1.2102674937092792, "learning_rate": 2.4964143699379982e-06, "loss": 0.0209, "step": 153850 }, { "epoch": 0.6419666029658435, "grad_norm": 0.8773891406801662, "learning_rate": 2.496373804943708e-06, "loss": 0.0247, "step": 153855 }, { "epoch": 0.6419874656808339, "grad_norm": 0.6094122407602002, "learning_rate": 2.4963332419268124e-06, "loss": 0.0212, "step": 153860 }, { "epoch": 0.6420083283958241, "grad_norm": 0.512733748171479, "learning_rate": 2.4962926808871504e-06, "loss": 0.0259, "step": 153865 }, { "epoch": 0.6420291911108144, "grad_norm": 0.45006165239910295, "learning_rate": 2.496252121824562e-06, "loss": 0.0189, "step": 153870 }, { "epoch": 0.6420500538258047, "grad_norm": 0.5344850752430784, "learning_rate": 2.496211564738886e-06, "loss": 0.0187, "step": 153875 }, { "epoch": 0.642070916540795, "grad_norm": 0.850475655215381, "learning_rate": 2.4961710096299614e-06, "loss": 0.0235, "step": 153880 }, { "epoch": 0.6420917792557852, "grad_norm": 0.927039045041175, "learning_rate": 2.496130456497629e-06, "loss": 0.0211, "step": 153885 }, { "epoch": 0.6421126419707756, "grad_norm": 0.7630465645356483, "learning_rate": 2.496089905341727e-06, "loss": 0.027, "step": 153890 }, { "epoch": 0.6421335046857658, "grad_norm": 0.6374045687087829, "learning_rate": 2.4960493561620952e-06, "loss": 0.0282, "step": 153895 }, { "epoch": 0.642154367400756, "grad_norm": 0.9567636464662539, "learning_rate": 2.4960088089585733e-06, "loss": 0.0233, "step": 153900 }, { "epoch": 0.6421752301157464, "grad_norm": 0.5514624037189263, "learning_rate": 2.4959682637310005e-06, "loss": 0.0215, "step": 153905 }, { "epoch": 0.6421960928307366, "grad_norm": 0.7771416217790004, "learning_rate": 2.495927720479217e-06, "loss": 0.0248, "step": 153910 }, { "epoch": 0.6422169555457269, "grad_norm": 0.42411545190877936, "learning_rate": 2.4958871792030616e-06, "loss": 0.0251, "step": 153915 }, { "epoch": 0.6422378182607171, "grad_norm": 0.5553332757969088, "learning_rate": 2.4958466399023736e-06, "loss": 0.0171, "step": 153920 }, { "epoch": 0.6422586809757075, "grad_norm": 0.8565378043353077, "learning_rate": 2.4958061025769936e-06, "loss": 0.0286, "step": 153925 }, { "epoch": 0.6422795436906977, "grad_norm": 0.41114994587506065, "learning_rate": 2.4957655672267596e-06, "loss": 0.0184, "step": 153930 }, { "epoch": 0.642300406405688, "grad_norm": 0.862115308572567, "learning_rate": 2.495725033851514e-06, "loss": 0.0238, "step": 153935 }, { "epoch": 0.6423212691206783, "grad_norm": 0.61294744100637, "learning_rate": 2.4956845024510933e-06, "loss": 0.0224, "step": 153940 }, { "epoch": 0.6423421318356686, "grad_norm": 0.5678159996117992, "learning_rate": 2.495643973025339e-06, "loss": 0.0179, "step": 153945 }, { "epoch": 0.6423629945506588, "grad_norm": 1.2220406088355884, "learning_rate": 2.4956034455740905e-06, "loss": 0.0215, "step": 153950 }, { "epoch": 0.6423838572656492, "grad_norm": 0.6666030319567359, "learning_rate": 2.495562920097187e-06, "loss": 0.0192, "step": 153955 }, { "epoch": 0.6424047199806394, "grad_norm": 0.693723290362149, "learning_rate": 2.495522396594468e-06, "loss": 0.0204, "step": 153960 }, { "epoch": 0.6424255826956297, "grad_norm": 0.5531167906316179, "learning_rate": 2.4954818750657743e-06, "loss": 0.0251, "step": 153965 }, { "epoch": 0.6424464454106199, "grad_norm": 0.8924987622114903, "learning_rate": 2.495441355510945e-06, "loss": 0.0166, "step": 153970 }, { "epoch": 0.6424673081256103, "grad_norm": 0.4290592560053463, "learning_rate": 2.4954008379298205e-06, "loss": 0.0191, "step": 153975 }, { "epoch": 0.6424881708406005, "grad_norm": 0.7934546611707208, "learning_rate": 2.495360322322239e-06, "loss": 0.0182, "step": 153980 }, { "epoch": 0.6425090335555907, "grad_norm": 0.7655517968208548, "learning_rate": 2.495319808688042e-06, "loss": 0.0241, "step": 153985 }, { "epoch": 0.6425298962705811, "grad_norm": 0.5030299463250506, "learning_rate": 2.4952792970270676e-06, "loss": 0.0287, "step": 153990 }, { "epoch": 0.6425507589855713, "grad_norm": 0.7887236847945367, "learning_rate": 2.4952387873391567e-06, "loss": 0.0209, "step": 153995 }, { "epoch": 0.6425716217005616, "grad_norm": 0.7351361394674037, "learning_rate": 2.49519827962415e-06, "loss": 0.0165, "step": 154000 }, { "epoch": 0.642592484415552, "grad_norm": 1.607993741726773, "learning_rate": 2.4951577738818857e-06, "loss": 0.0269, "step": 154005 }, { "epoch": 0.6426133471305422, "grad_norm": 0.548262485894859, "learning_rate": 2.4951172701122046e-06, "loss": 0.0254, "step": 154010 }, { "epoch": 0.6426342098455324, "grad_norm": 0.6071562696987185, "learning_rate": 2.4950767683149464e-06, "loss": 0.0172, "step": 154015 }, { "epoch": 0.6426550725605228, "grad_norm": 0.7120876982556749, "learning_rate": 2.495036268489951e-06, "loss": 0.0201, "step": 154020 }, { "epoch": 0.642675935275513, "grad_norm": 0.49213466125551364, "learning_rate": 2.494995770637058e-06, "loss": 0.0281, "step": 154025 }, { "epoch": 0.6426967979905033, "grad_norm": 0.5783332877987233, "learning_rate": 2.494955274756108e-06, "loss": 0.0163, "step": 154030 }, { "epoch": 0.6427176607054935, "grad_norm": 0.6839455855586217, "learning_rate": 2.4949147808469405e-06, "loss": 0.0166, "step": 154035 }, { "epoch": 0.6427385234204839, "grad_norm": 0.7516816530122026, "learning_rate": 2.494874288909396e-06, "loss": 0.0236, "step": 154040 }, { "epoch": 0.6427593861354741, "grad_norm": 0.5553144312755804, "learning_rate": 2.4948337989433137e-06, "loss": 0.0186, "step": 154045 }, { "epoch": 0.6427802488504644, "grad_norm": 0.5959185601536648, "learning_rate": 2.494793310948534e-06, "loss": 0.0203, "step": 154050 }, { "epoch": 0.6428011115654547, "grad_norm": 0.5814583215690649, "learning_rate": 2.4947528249248975e-06, "loss": 0.0248, "step": 154055 }, { "epoch": 0.642821974280445, "grad_norm": 0.7545886977807984, "learning_rate": 2.4947123408722443e-06, "loss": 0.0284, "step": 154060 }, { "epoch": 0.6428428369954352, "grad_norm": 1.0091338315642755, "learning_rate": 2.4946718587904132e-06, "loss": 0.0339, "step": 154065 }, { "epoch": 0.6428636997104256, "grad_norm": 0.33236803792843106, "learning_rate": 2.4946313786792454e-06, "loss": 0.0179, "step": 154070 }, { "epoch": 0.6428845624254158, "grad_norm": 0.5814004880774983, "learning_rate": 2.4945909005385806e-06, "loss": 0.024, "step": 154075 }, { "epoch": 0.642905425140406, "grad_norm": 1.5608686602212347, "learning_rate": 2.494550424368259e-06, "loss": 0.0334, "step": 154080 }, { "epoch": 0.6429262878553963, "grad_norm": 0.7632372181336802, "learning_rate": 2.4945099501681207e-06, "loss": 0.0215, "step": 154085 }, { "epoch": 0.6429471505703866, "grad_norm": 0.7018260713415059, "learning_rate": 2.4944694779380065e-06, "loss": 0.021, "step": 154090 }, { "epoch": 0.6429680132853769, "grad_norm": 0.6579874248199867, "learning_rate": 2.4944290076777556e-06, "loss": 0.0264, "step": 154095 }, { "epoch": 0.6429888760003671, "grad_norm": 1.5899143610022757, "learning_rate": 2.4943885393872086e-06, "loss": 0.0288, "step": 154100 }, { "epoch": 0.6430097387153575, "grad_norm": 0.7644340952614701, "learning_rate": 2.494348073066206e-06, "loss": 0.0195, "step": 154105 }, { "epoch": 0.6430306014303477, "grad_norm": 1.3489376916618598, "learning_rate": 2.494307608714588e-06, "loss": 0.0245, "step": 154110 }, { "epoch": 0.643051464145338, "grad_norm": 1.2609418064126399, "learning_rate": 2.4942671463321945e-06, "loss": 0.0293, "step": 154115 }, { "epoch": 0.6430723268603283, "grad_norm": 0.7520606463535972, "learning_rate": 2.4942266859188663e-06, "loss": 0.0367, "step": 154120 }, { "epoch": 0.6430931895753186, "grad_norm": 0.5614058713922742, "learning_rate": 2.494186227474443e-06, "loss": 0.027, "step": 154125 }, { "epoch": 0.6431140522903088, "grad_norm": 1.1842966342619017, "learning_rate": 2.494145770998766e-06, "loss": 0.0317, "step": 154130 }, { "epoch": 0.6431349150052992, "grad_norm": 0.7870773331542035, "learning_rate": 2.4941053164916744e-06, "loss": 0.026, "step": 154135 }, { "epoch": 0.6431557777202894, "grad_norm": 0.41172426137786106, "learning_rate": 2.494064863953009e-06, "loss": 0.0265, "step": 154140 }, { "epoch": 0.6431766404352797, "grad_norm": 0.6141764741529933, "learning_rate": 2.4940244133826104e-06, "loss": 0.0216, "step": 154145 }, { "epoch": 0.6431975031502699, "grad_norm": 0.7749542661682686, "learning_rate": 2.493983964780319e-06, "loss": 0.0218, "step": 154150 }, { "epoch": 0.6432183658652603, "grad_norm": 0.6556297624041098, "learning_rate": 2.493943518145975e-06, "loss": 0.0176, "step": 154155 }, { "epoch": 0.6432392285802505, "grad_norm": 1.0432484259107675, "learning_rate": 2.4939030734794194e-06, "loss": 0.0223, "step": 154160 }, { "epoch": 0.6432600912952408, "grad_norm": 0.5483444404293008, "learning_rate": 2.493862630780492e-06, "loss": 0.0256, "step": 154165 }, { "epoch": 0.6432809540102311, "grad_norm": 0.4522385045625992, "learning_rate": 2.4938221900490326e-06, "loss": 0.018, "step": 154170 }, { "epoch": 0.6433018167252214, "grad_norm": 0.6478991024313362, "learning_rate": 2.493781751284883e-06, "loss": 0.0296, "step": 154175 }, { "epoch": 0.6433226794402116, "grad_norm": 0.8939872485658926, "learning_rate": 2.4937413144878835e-06, "loss": 0.03, "step": 154180 }, { "epoch": 0.643343542155202, "grad_norm": 0.7211261778207343, "learning_rate": 2.4937008796578742e-06, "loss": 0.0155, "step": 154185 }, { "epoch": 0.6433644048701922, "grad_norm": 0.9957174868411581, "learning_rate": 2.4936604467946954e-06, "loss": 0.0216, "step": 154190 }, { "epoch": 0.6433852675851824, "grad_norm": 1.5050376518432642, "learning_rate": 2.4936200158981887e-06, "loss": 0.0338, "step": 154195 }, { "epoch": 0.6434061303001728, "grad_norm": 0.5308472423839434, "learning_rate": 2.4935795869681934e-06, "loss": 0.0228, "step": 154200 }, { "epoch": 0.643426993015163, "grad_norm": 0.4994921255732828, "learning_rate": 2.4935391600045504e-06, "loss": 0.0253, "step": 154205 }, { "epoch": 0.6434478557301533, "grad_norm": 0.9772731480077633, "learning_rate": 2.4934987350071016e-06, "loss": 0.0291, "step": 154210 }, { "epoch": 0.6434687184451435, "grad_norm": 1.7125270136919821, "learning_rate": 2.4934583119756862e-06, "loss": 0.0267, "step": 154215 }, { "epoch": 0.6434895811601339, "grad_norm": 0.2792053678269205, "learning_rate": 2.4934178909101445e-06, "loss": 0.016, "step": 154220 }, { "epoch": 0.6435104438751241, "grad_norm": 0.6730022291014066, "learning_rate": 2.4933774718103194e-06, "loss": 0.0274, "step": 154225 }, { "epoch": 0.6435313065901144, "grad_norm": 0.688423578111701, "learning_rate": 2.4933370546760486e-06, "loss": 0.0188, "step": 154230 }, { "epoch": 0.6435521693051047, "grad_norm": 0.40485404381876355, "learning_rate": 2.4932966395071746e-06, "loss": 0.0232, "step": 154235 }, { "epoch": 0.643573032020095, "grad_norm": 0.9068095939029366, "learning_rate": 2.493256226303539e-06, "loss": 0.0274, "step": 154240 }, { "epoch": 0.6435938947350852, "grad_norm": 0.5328202393155448, "learning_rate": 2.49321581506498e-06, "loss": 0.0254, "step": 154245 }, { "epoch": 0.6436147574500756, "grad_norm": 0.4629545978553187, "learning_rate": 2.4931754057913407e-06, "loss": 0.0209, "step": 154250 }, { "epoch": 0.6436356201650658, "grad_norm": 0.7153443411176059, "learning_rate": 2.4931349984824602e-06, "loss": 0.0228, "step": 154255 }, { "epoch": 0.6436564828800561, "grad_norm": 0.7918935408627076, "learning_rate": 2.4930945931381804e-06, "loss": 0.0234, "step": 154260 }, { "epoch": 0.6436773455950463, "grad_norm": 1.0049755497995296, "learning_rate": 2.4930541897583415e-06, "loss": 0.0235, "step": 154265 }, { "epoch": 0.6436982083100367, "grad_norm": 0.9854008187900692, "learning_rate": 2.493013788342784e-06, "loss": 0.0247, "step": 154270 }, { "epoch": 0.6437190710250269, "grad_norm": 0.862769204680976, "learning_rate": 2.49297338889135e-06, "loss": 0.0219, "step": 154275 }, { "epoch": 0.6437399337400171, "grad_norm": 0.6454332884742834, "learning_rate": 2.4929329914038787e-06, "loss": 0.0226, "step": 154280 }, { "epoch": 0.6437607964550075, "grad_norm": 0.5547843199830275, "learning_rate": 2.4928925958802127e-06, "loss": 0.0257, "step": 154285 }, { "epoch": 0.6437816591699977, "grad_norm": 1.1115534005321603, "learning_rate": 2.4928522023201915e-06, "loss": 0.0238, "step": 154290 }, { "epoch": 0.643802521884988, "grad_norm": 0.6140901605009572, "learning_rate": 2.492811810723657e-06, "loss": 0.0236, "step": 154295 }, { "epoch": 0.6438233845999783, "grad_norm": 0.660870182438504, "learning_rate": 2.49277142109045e-06, "loss": 0.02, "step": 154300 }, { "epoch": 0.6438442473149686, "grad_norm": 0.34728727922419944, "learning_rate": 2.492731033420411e-06, "loss": 0.0165, "step": 154305 }, { "epoch": 0.6438651100299588, "grad_norm": 1.0246217702912004, "learning_rate": 2.492690647713381e-06, "loss": 0.0259, "step": 154310 }, { "epoch": 0.6438859727449492, "grad_norm": 0.513092606499738, "learning_rate": 2.492650263969201e-06, "loss": 0.0245, "step": 154315 }, { "epoch": 0.6439068354599394, "grad_norm": 0.3283833347587243, "learning_rate": 2.492609882187712e-06, "loss": 0.0215, "step": 154320 }, { "epoch": 0.6439276981749297, "grad_norm": 0.5028715915412104, "learning_rate": 2.4925695023687556e-06, "loss": 0.0271, "step": 154325 }, { "epoch": 0.6439485608899199, "grad_norm": 0.7649212444820036, "learning_rate": 2.492529124512172e-06, "loss": 0.0245, "step": 154330 }, { "epoch": 0.6439694236049103, "grad_norm": 0.5912152367150947, "learning_rate": 2.492488748617803e-06, "loss": 0.02, "step": 154335 }, { "epoch": 0.6439902863199005, "grad_norm": 0.6755419206643231, "learning_rate": 2.4924483746854894e-06, "loss": 0.0275, "step": 154340 }, { "epoch": 0.6440111490348908, "grad_norm": 0.6683047468677639, "learning_rate": 2.4924080027150716e-06, "loss": 0.0218, "step": 154345 }, { "epoch": 0.6440320117498811, "grad_norm": 0.4890445009981332, "learning_rate": 2.4923676327063924e-06, "loss": 0.0148, "step": 154350 }, { "epoch": 0.6440528744648714, "grad_norm": 0.753219017868536, "learning_rate": 2.492327264659291e-06, "loss": 0.0259, "step": 154355 }, { "epoch": 0.6440737371798616, "grad_norm": 1.3659839049601776, "learning_rate": 2.4922868985736102e-06, "loss": 0.0298, "step": 154360 }, { "epoch": 0.644094599894852, "grad_norm": 0.6195990844899447, "learning_rate": 2.4922465344491897e-06, "loss": 0.0307, "step": 154365 }, { "epoch": 0.6441154626098422, "grad_norm": 0.45568746717597214, "learning_rate": 2.4922061722858723e-06, "loss": 0.02, "step": 154370 }, { "epoch": 0.6441363253248324, "grad_norm": 0.9862023742705128, "learning_rate": 2.4921658120834972e-06, "loss": 0.021, "step": 154375 }, { "epoch": 0.6441571880398228, "grad_norm": 0.42013431812603247, "learning_rate": 2.4921254538419073e-06, "loss": 0.0211, "step": 154380 }, { "epoch": 0.644178050754813, "grad_norm": 0.5339693343652929, "learning_rate": 2.492085097560943e-06, "loss": 0.0172, "step": 154385 }, { "epoch": 0.6441989134698033, "grad_norm": 0.5510157718911826, "learning_rate": 2.4920447432404458e-06, "loss": 0.0224, "step": 154390 }, { "epoch": 0.6442197761847935, "grad_norm": 0.8055521706237831, "learning_rate": 2.492004390880258e-06, "loss": 0.0255, "step": 154395 }, { "epoch": 0.6442406388997839, "grad_norm": 0.46677436339692024, "learning_rate": 2.491964040480219e-06, "loss": 0.0263, "step": 154400 }, { "epoch": 0.6442615016147741, "grad_norm": 0.8377396978882081, "learning_rate": 2.491923692040171e-06, "loss": 0.0205, "step": 154405 }, { "epoch": 0.6442823643297644, "grad_norm": 0.6928531443835514, "learning_rate": 2.491883345559955e-06, "loss": 0.0254, "step": 154410 }, { "epoch": 0.6443032270447547, "grad_norm": 1.1843881046751392, "learning_rate": 2.4918430010394138e-06, "loss": 0.0236, "step": 154415 }, { "epoch": 0.644324089759745, "grad_norm": 0.7452308203622313, "learning_rate": 2.4918026584783865e-06, "loss": 0.0275, "step": 154420 }, { "epoch": 0.6443449524747352, "grad_norm": 0.5076229943924573, "learning_rate": 2.491762317876716e-06, "loss": 0.0219, "step": 154425 }, { "epoch": 0.6443658151897256, "grad_norm": 0.5854847733208408, "learning_rate": 2.4917219792342436e-06, "loss": 0.0239, "step": 154430 }, { "epoch": 0.6443866779047158, "grad_norm": 0.6579711441983089, "learning_rate": 2.49168164255081e-06, "loss": 0.0216, "step": 154435 }, { "epoch": 0.6444075406197061, "grad_norm": 0.8222420511724676, "learning_rate": 2.4916413078262574e-06, "loss": 0.0296, "step": 154440 }, { "epoch": 0.6444284033346963, "grad_norm": 0.4980993702048146, "learning_rate": 2.491600975060427e-06, "loss": 0.016, "step": 154445 }, { "epoch": 0.6444492660496867, "grad_norm": 0.8377386890567691, "learning_rate": 2.49156064425316e-06, "loss": 0.0224, "step": 154450 }, { "epoch": 0.6444701287646769, "grad_norm": 1.3916538613151825, "learning_rate": 2.491520315404298e-06, "loss": 0.0269, "step": 154455 }, { "epoch": 0.6444909914796672, "grad_norm": 0.5674625154102201, "learning_rate": 2.4914799885136827e-06, "loss": 0.0249, "step": 154460 }, { "epoch": 0.6445118541946575, "grad_norm": 0.28839894534421356, "learning_rate": 2.491439663581156e-06, "loss": 0.0153, "step": 154465 }, { "epoch": 0.6445327169096478, "grad_norm": 0.47334109799838275, "learning_rate": 2.4913993406065583e-06, "loss": 0.0244, "step": 154470 }, { "epoch": 0.644553579624638, "grad_norm": 0.9423281139658906, "learning_rate": 2.4913590195897324e-06, "loss": 0.0275, "step": 154475 }, { "epoch": 0.6445744423396284, "grad_norm": 0.5168945725812694, "learning_rate": 2.4913187005305194e-06, "loss": 0.0253, "step": 154480 }, { "epoch": 0.6445953050546186, "grad_norm": 0.8642065755953372, "learning_rate": 2.4912783834287607e-06, "loss": 0.0228, "step": 154485 }, { "epoch": 0.6446161677696088, "grad_norm": 0.791749272958953, "learning_rate": 2.4912380682842973e-06, "loss": 0.0414, "step": 154490 }, { "epoch": 0.6446370304845992, "grad_norm": 0.3632604572192341, "learning_rate": 2.4911977550969725e-06, "loss": 0.0266, "step": 154495 }, { "epoch": 0.6446578931995894, "grad_norm": 0.6629366294955426, "learning_rate": 2.491157443866627e-06, "loss": 0.0262, "step": 154500 }, { "epoch": 0.6446787559145797, "grad_norm": 1.0348243740324454, "learning_rate": 2.4911171345931016e-06, "loss": 0.0299, "step": 154505 }, { "epoch": 0.6446996186295699, "grad_norm": 0.45578137313164646, "learning_rate": 2.49107682727624e-06, "loss": 0.0246, "step": 154510 }, { "epoch": 0.6447204813445603, "grad_norm": 0.8646720324576448, "learning_rate": 2.4910365219158825e-06, "loss": 0.026, "step": 154515 }, { "epoch": 0.6447413440595505, "grad_norm": 0.7667531969474232, "learning_rate": 2.490996218511871e-06, "loss": 0.018, "step": 154520 }, { "epoch": 0.6447622067745408, "grad_norm": 0.7754585447047048, "learning_rate": 2.490955917064047e-06, "loss": 0.0261, "step": 154525 }, { "epoch": 0.6447830694895311, "grad_norm": 0.9807463371284212, "learning_rate": 2.490915617572253e-06, "loss": 0.0244, "step": 154530 }, { "epoch": 0.6448039322045214, "grad_norm": 0.6106289591858577, "learning_rate": 2.4908753200363305e-06, "loss": 0.0268, "step": 154535 }, { "epoch": 0.6448247949195116, "grad_norm": 0.46073466044752087, "learning_rate": 2.4908350244561217e-06, "loss": 0.0203, "step": 154540 }, { "epoch": 0.644845657634502, "grad_norm": 0.5787369520235404, "learning_rate": 2.4907947308314667e-06, "loss": 0.0259, "step": 154545 }, { "epoch": 0.6448665203494922, "grad_norm": 0.6968043349283333, "learning_rate": 2.4907544391622093e-06, "loss": 0.0221, "step": 154550 }, { "epoch": 0.6448873830644825, "grad_norm": 0.6942875288091898, "learning_rate": 2.4907141494481907e-06, "loss": 0.0314, "step": 154555 }, { "epoch": 0.6449082457794728, "grad_norm": 0.7085945272295222, "learning_rate": 2.4906738616892524e-06, "loss": 0.0173, "step": 154560 }, { "epoch": 0.644929108494463, "grad_norm": 0.38457814681204716, "learning_rate": 2.490633575885237e-06, "loss": 0.0162, "step": 154565 }, { "epoch": 0.6449499712094533, "grad_norm": 1.0843680169795151, "learning_rate": 2.4905932920359855e-06, "loss": 0.0221, "step": 154570 }, { "epoch": 0.6449708339244435, "grad_norm": 0.697384245650371, "learning_rate": 2.4905530101413405e-06, "loss": 0.0233, "step": 154575 }, { "epoch": 0.6449916966394339, "grad_norm": 0.42155283304861657, "learning_rate": 2.4905127302011436e-06, "loss": 0.0209, "step": 154580 }, { "epoch": 0.6450125593544241, "grad_norm": 0.576186542955285, "learning_rate": 2.490472452215237e-06, "loss": 0.0168, "step": 154585 }, { "epoch": 0.6450334220694144, "grad_norm": 0.38627581008373074, "learning_rate": 2.4904321761834624e-06, "loss": 0.0283, "step": 154590 }, { "epoch": 0.6450542847844047, "grad_norm": 1.0343138122711122, "learning_rate": 2.4903919021056622e-06, "loss": 0.0274, "step": 154595 }, { "epoch": 0.645075147499395, "grad_norm": 0.7528343608438612, "learning_rate": 2.490351629981678e-06, "loss": 0.0188, "step": 154600 }, { "epoch": 0.6450960102143852, "grad_norm": 0.42525546147057786, "learning_rate": 2.4903113598113524e-06, "loss": 0.018, "step": 154605 }, { "epoch": 0.6451168729293756, "grad_norm": 0.8092754320998621, "learning_rate": 2.4902710915945265e-06, "loss": 0.0292, "step": 154610 }, { "epoch": 0.6451377356443658, "grad_norm": 0.4431498471015678, "learning_rate": 2.490230825331043e-06, "loss": 0.0248, "step": 154615 }, { "epoch": 0.6451585983593561, "grad_norm": 1.403597246561352, "learning_rate": 2.4901905610207437e-06, "loss": 0.0308, "step": 154620 }, { "epoch": 0.6451794610743463, "grad_norm": 0.3874715767172037, "learning_rate": 2.4901502986634714e-06, "loss": 0.0118, "step": 154625 }, { "epoch": 0.6452003237893367, "grad_norm": 0.5831939099030111, "learning_rate": 2.4901100382590674e-06, "loss": 0.0188, "step": 154630 }, { "epoch": 0.6452211865043269, "grad_norm": 0.5150334485052559, "learning_rate": 2.4900697798073743e-06, "loss": 0.0281, "step": 154635 }, { "epoch": 0.6452420492193172, "grad_norm": 0.6938556353299831, "learning_rate": 2.4900295233082344e-06, "loss": 0.0244, "step": 154640 }, { "epoch": 0.6452629119343075, "grad_norm": 0.6499523636767747, "learning_rate": 2.4899892687614885e-06, "loss": 0.0237, "step": 154645 }, { "epoch": 0.6452837746492978, "grad_norm": 1.2336492243561203, "learning_rate": 2.4899490161669807e-06, "loss": 0.0381, "step": 154650 }, { "epoch": 0.645304637364288, "grad_norm": 0.7922237230449692, "learning_rate": 2.4899087655245527e-06, "loss": 0.0241, "step": 154655 }, { "epoch": 0.6453255000792784, "grad_norm": 0.7493292801291035, "learning_rate": 2.489868516834046e-06, "loss": 0.0225, "step": 154660 }, { "epoch": 0.6453463627942686, "grad_norm": 0.993024988757564, "learning_rate": 2.489828270095303e-06, "loss": 0.0226, "step": 154665 }, { "epoch": 0.6453672255092588, "grad_norm": 0.8556236889658569, "learning_rate": 2.489788025308166e-06, "loss": 0.0226, "step": 154670 }, { "epoch": 0.6453880882242492, "grad_norm": 0.7230756832646305, "learning_rate": 2.4897477824724788e-06, "loss": 0.0208, "step": 154675 }, { "epoch": 0.6454089509392394, "grad_norm": 0.7424566738188139, "learning_rate": 2.4897075415880808e-06, "loss": 0.0246, "step": 154680 }, { "epoch": 0.6454298136542297, "grad_norm": 0.3973906882533179, "learning_rate": 2.489667302654817e-06, "loss": 0.0232, "step": 154685 }, { "epoch": 0.6454506763692199, "grad_norm": 0.8537112335219574, "learning_rate": 2.489627065672528e-06, "loss": 0.0223, "step": 154690 }, { "epoch": 0.6454715390842103, "grad_norm": 0.3818516110240512, "learning_rate": 2.489586830641057e-06, "loss": 0.0141, "step": 154695 }, { "epoch": 0.6454924017992005, "grad_norm": 1.984624866510194, "learning_rate": 2.4895465975602462e-06, "loss": 0.0255, "step": 154700 }, { "epoch": 0.6455132645141908, "grad_norm": 0.5254116805799457, "learning_rate": 2.4895063664299383e-06, "loss": 0.0215, "step": 154705 }, { "epoch": 0.6455341272291811, "grad_norm": 0.7703138406663788, "learning_rate": 2.4894661372499746e-06, "loss": 0.017, "step": 154710 }, { "epoch": 0.6455549899441714, "grad_norm": 0.6267522655440082, "learning_rate": 2.489425910020198e-06, "loss": 0.0241, "step": 154715 }, { "epoch": 0.6455758526591616, "grad_norm": 0.5542613958904097, "learning_rate": 2.4893856847404525e-06, "loss": 0.018, "step": 154720 }, { "epoch": 0.645596715374152, "grad_norm": 1.1048059240904837, "learning_rate": 2.489345461410578e-06, "loss": 0.0255, "step": 154725 }, { "epoch": 0.6456175780891422, "grad_norm": 0.621799406293186, "learning_rate": 2.489305240030419e-06, "loss": 0.0279, "step": 154730 }, { "epoch": 0.6456384408041325, "grad_norm": 0.7572945241858577, "learning_rate": 2.489265020599817e-06, "loss": 0.0199, "step": 154735 }, { "epoch": 0.6456593035191228, "grad_norm": 0.9858739006366513, "learning_rate": 2.4892248031186144e-06, "loss": 0.0267, "step": 154740 }, { "epoch": 0.6456801662341131, "grad_norm": 0.45368479435192943, "learning_rate": 2.4891845875866545e-06, "loss": 0.0164, "step": 154745 }, { "epoch": 0.6457010289491033, "grad_norm": 0.5552829526337716, "learning_rate": 2.4891443740037797e-06, "loss": 0.0256, "step": 154750 }, { "epoch": 0.6457218916640936, "grad_norm": 0.6738194674454077, "learning_rate": 2.489104162369832e-06, "loss": 0.0235, "step": 154755 }, { "epoch": 0.6457427543790839, "grad_norm": 0.8459575179186818, "learning_rate": 2.4890639526846543e-06, "loss": 0.025, "step": 154760 }, { "epoch": 0.6457636170940741, "grad_norm": 0.8182717711569766, "learning_rate": 2.489023744948088e-06, "loss": 0.0243, "step": 154765 }, { "epoch": 0.6457844798090644, "grad_norm": 0.4845896300670948, "learning_rate": 2.4889835391599786e-06, "loss": 0.0202, "step": 154770 }, { "epoch": 0.6458053425240547, "grad_norm": 1.293340978209884, "learning_rate": 2.4889433353201664e-06, "loss": 0.029, "step": 154775 }, { "epoch": 0.645826205239045, "grad_norm": 0.44729043032519233, "learning_rate": 2.488903133428494e-06, "loss": 0.0192, "step": 154780 }, { "epoch": 0.6458470679540352, "grad_norm": 0.5216934655268326, "learning_rate": 2.4888629334848054e-06, "loss": 0.0213, "step": 154785 }, { "epoch": 0.6458679306690256, "grad_norm": 0.42587711568040015, "learning_rate": 2.488822735488943e-06, "loss": 0.0248, "step": 154790 }, { "epoch": 0.6458887933840158, "grad_norm": 0.57057273894069, "learning_rate": 2.488782539440749e-06, "loss": 0.0199, "step": 154795 }, { "epoch": 0.6459096560990061, "grad_norm": 0.6528841318230532, "learning_rate": 2.4887423453400653e-06, "loss": 0.0334, "step": 154800 }, { "epoch": 0.6459305188139963, "grad_norm": 0.5254547094983256, "learning_rate": 2.488702153186737e-06, "loss": 0.0164, "step": 154805 }, { "epoch": 0.6459513815289867, "grad_norm": 0.6475576958793053, "learning_rate": 2.4886619629806044e-06, "loss": 0.016, "step": 154810 }, { "epoch": 0.6459722442439769, "grad_norm": 0.4990925053115995, "learning_rate": 2.4886217747215118e-06, "loss": 0.0207, "step": 154815 }, { "epoch": 0.6459931069589672, "grad_norm": 0.27060350436195285, "learning_rate": 2.488581588409301e-06, "loss": 0.0209, "step": 154820 }, { "epoch": 0.6460139696739575, "grad_norm": 0.7252034033067981, "learning_rate": 2.488541404043816e-06, "loss": 0.0277, "step": 154825 }, { "epoch": 0.6460348323889478, "grad_norm": 0.765038966240631, "learning_rate": 2.4885012216248986e-06, "loss": 0.0212, "step": 154830 }, { "epoch": 0.646055695103938, "grad_norm": 0.9314338160900123, "learning_rate": 2.4884610411523922e-06, "loss": 0.0234, "step": 154835 }, { "epoch": 0.6460765578189284, "grad_norm": 0.44472342578942803, "learning_rate": 2.48842086262614e-06, "loss": 0.026, "step": 154840 }, { "epoch": 0.6460974205339186, "grad_norm": 0.6560779086114072, "learning_rate": 2.4883806860459835e-06, "loss": 0.0178, "step": 154845 }, { "epoch": 0.6461182832489089, "grad_norm": 0.40707318500604833, "learning_rate": 2.4883405114117665e-06, "loss": 0.0188, "step": 154850 }, { "epoch": 0.6461391459638992, "grad_norm": 1.006304586388694, "learning_rate": 2.4883003387233324e-06, "loss": 0.0269, "step": 154855 }, { "epoch": 0.6461600086788895, "grad_norm": 0.7431883065872056, "learning_rate": 2.4882601679805236e-06, "loss": 0.02, "step": 154860 }, { "epoch": 0.6461808713938797, "grad_norm": 0.5747559482333399, "learning_rate": 2.488219999183183e-06, "loss": 0.0217, "step": 154865 }, { "epoch": 0.6462017341088699, "grad_norm": 0.9200987279907121, "learning_rate": 2.4881798323311533e-06, "loss": 0.0333, "step": 154870 }, { "epoch": 0.6462225968238603, "grad_norm": 0.722996012080108, "learning_rate": 2.4881396674242788e-06, "loss": 0.0205, "step": 154875 }, { "epoch": 0.6462434595388505, "grad_norm": 0.7378414759134869, "learning_rate": 2.488099504462401e-06, "loss": 0.0191, "step": 154880 }, { "epoch": 0.6462643222538408, "grad_norm": 0.6109411576068807, "learning_rate": 2.4880593434453635e-06, "loss": 0.0192, "step": 154885 }, { "epoch": 0.6462851849688311, "grad_norm": 0.3854465430466292, "learning_rate": 2.488019184373009e-06, "loss": 0.0166, "step": 154890 }, { "epoch": 0.6463060476838214, "grad_norm": 0.7999567718094535, "learning_rate": 2.4879790272451817e-06, "loss": 0.0209, "step": 154895 }, { "epoch": 0.6463269103988116, "grad_norm": 1.0195569272059108, "learning_rate": 2.487938872061723e-06, "loss": 0.0175, "step": 154900 }, { "epoch": 0.646347773113802, "grad_norm": 0.6343260491590881, "learning_rate": 2.487898718822477e-06, "loss": 0.0277, "step": 154905 }, { "epoch": 0.6463686358287922, "grad_norm": 0.7294222469409691, "learning_rate": 2.4878585675272883e-06, "loss": 0.0221, "step": 154910 }, { "epoch": 0.6463894985437825, "grad_norm": 0.5597981328536352, "learning_rate": 2.4878184181759967e-06, "loss": 0.0205, "step": 154915 }, { "epoch": 0.6464103612587728, "grad_norm": 0.9881995768825474, "learning_rate": 2.4877782707684472e-06, "loss": 0.0222, "step": 154920 }, { "epoch": 0.6464312239737631, "grad_norm": 0.7092444373183036, "learning_rate": 2.4877381253044834e-06, "loss": 0.0241, "step": 154925 }, { "epoch": 0.6464520866887533, "grad_norm": 0.7840437757560779, "learning_rate": 2.487697981783948e-06, "loss": 0.0226, "step": 154930 }, { "epoch": 0.6464729494037436, "grad_norm": 0.5044558384062879, "learning_rate": 2.4876578402066835e-06, "loss": 0.0227, "step": 154935 }, { "epoch": 0.6464938121187339, "grad_norm": 0.7278427527082685, "learning_rate": 2.487617700572534e-06, "loss": 0.0233, "step": 154940 }, { "epoch": 0.6465146748337242, "grad_norm": 0.3803091096370517, "learning_rate": 2.487577562881343e-06, "loss": 0.0174, "step": 154945 }, { "epoch": 0.6465355375487144, "grad_norm": 0.3324033870075728, "learning_rate": 2.487537427132953e-06, "loss": 0.0211, "step": 154950 }, { "epoch": 0.6465564002637048, "grad_norm": 1.101306202852032, "learning_rate": 2.4874972933272073e-06, "loss": 0.0208, "step": 154955 }, { "epoch": 0.646577262978695, "grad_norm": 0.32381131941094865, "learning_rate": 2.48745716146395e-06, "loss": 0.0148, "step": 154960 }, { "epoch": 0.6465981256936852, "grad_norm": 0.5188843495682867, "learning_rate": 2.4874170315430237e-06, "loss": 0.0189, "step": 154965 }, { "epoch": 0.6466189884086756, "grad_norm": 0.5562810144210096, "learning_rate": 2.487376903564271e-06, "loss": 0.0181, "step": 154970 }, { "epoch": 0.6466398511236658, "grad_norm": 0.5474344033695222, "learning_rate": 2.487336777527537e-06, "loss": 0.0257, "step": 154975 }, { "epoch": 0.6466607138386561, "grad_norm": 0.580987679147409, "learning_rate": 2.487296653432664e-06, "loss": 0.016, "step": 154980 }, { "epoch": 0.6466815765536463, "grad_norm": 0.8232068716625704, "learning_rate": 2.487256531279495e-06, "loss": 0.0253, "step": 154985 }, { "epoch": 0.6467024392686367, "grad_norm": 1.0210271147389698, "learning_rate": 2.4872164110678745e-06, "loss": 0.0222, "step": 154990 }, { "epoch": 0.6467233019836269, "grad_norm": 0.9496623459481045, "learning_rate": 2.487176292797645e-06, "loss": 0.0177, "step": 154995 }, { "epoch": 0.6467441646986172, "grad_norm": 0.9846354583980118, "learning_rate": 2.487136176468651e-06, "loss": 0.0209, "step": 155000 }, { "epoch": 0.6467650274136075, "grad_norm": 1.5290962708581493, "learning_rate": 2.4870960620807347e-06, "loss": 0.0213, "step": 155005 }, { "epoch": 0.6467858901285978, "grad_norm": 0.6598770924425068, "learning_rate": 2.48705594963374e-06, "loss": 0.0199, "step": 155010 }, { "epoch": 0.646806752843588, "grad_norm": 0.8461794231271192, "learning_rate": 2.487015839127511e-06, "loss": 0.0259, "step": 155015 }, { "epoch": 0.6468276155585784, "grad_norm": 0.7352662281091635, "learning_rate": 2.4869757305618902e-06, "loss": 0.0241, "step": 155020 }, { "epoch": 0.6468484782735686, "grad_norm": 1.0947295390008656, "learning_rate": 2.486935623936722e-06, "loss": 0.0215, "step": 155025 }, { "epoch": 0.6468693409885589, "grad_norm": 0.6857913763868896, "learning_rate": 2.4868955192518494e-06, "loss": 0.0243, "step": 155030 }, { "epoch": 0.6468902037035492, "grad_norm": 0.631855615071766, "learning_rate": 2.4868554165071164e-06, "loss": 0.0186, "step": 155035 }, { "epoch": 0.6469110664185395, "grad_norm": 0.6553664489011284, "learning_rate": 2.4868153157023663e-06, "loss": 0.0153, "step": 155040 }, { "epoch": 0.6469319291335297, "grad_norm": 0.7733827416717418, "learning_rate": 2.4867752168374428e-06, "loss": 0.0194, "step": 155045 }, { "epoch": 0.64695279184852, "grad_norm": 0.5954176389503112, "learning_rate": 2.4867351199121883e-06, "loss": 0.0185, "step": 155050 }, { "epoch": 0.6469736545635103, "grad_norm": 1.040067477123483, "learning_rate": 2.4866950249264486e-06, "loss": 0.0256, "step": 155055 }, { "epoch": 0.6469945172785005, "grad_norm": 0.6701910710558343, "learning_rate": 2.486654931880066e-06, "loss": 0.0265, "step": 155060 }, { "epoch": 0.6470153799934908, "grad_norm": 0.4854284029106877, "learning_rate": 2.4866148407728846e-06, "loss": 0.0285, "step": 155065 }, { "epoch": 0.6470362427084811, "grad_norm": 0.8908437288990438, "learning_rate": 2.4865747516047477e-06, "loss": 0.0264, "step": 155070 }, { "epoch": 0.6470571054234714, "grad_norm": 0.2897118323034487, "learning_rate": 2.4865346643754997e-06, "loss": 0.0216, "step": 155075 }, { "epoch": 0.6470779681384616, "grad_norm": 1.0313621154096173, "learning_rate": 2.4864945790849834e-06, "loss": 0.0298, "step": 155080 }, { "epoch": 0.647098830853452, "grad_norm": 0.7895713206060444, "learning_rate": 2.4864544957330435e-06, "loss": 0.0203, "step": 155085 }, { "epoch": 0.6471196935684422, "grad_norm": 0.9824079991306705, "learning_rate": 2.4864144143195224e-06, "loss": 0.0224, "step": 155090 }, { "epoch": 0.6471405562834325, "grad_norm": 0.6248818417983858, "learning_rate": 2.4863743348442656e-06, "loss": 0.0208, "step": 155095 }, { "epoch": 0.6471614189984228, "grad_norm": 0.5207151350655638, "learning_rate": 2.486334257307115e-06, "loss": 0.0229, "step": 155100 }, { "epoch": 0.6471822817134131, "grad_norm": 0.6462033118644963, "learning_rate": 2.4862941817079165e-06, "loss": 0.0202, "step": 155105 }, { "epoch": 0.6472031444284033, "grad_norm": 0.42291395735430126, "learning_rate": 2.486254108046512e-06, "loss": 0.023, "step": 155110 }, { "epoch": 0.6472240071433936, "grad_norm": 0.5387331054132198, "learning_rate": 2.4862140363227463e-06, "loss": 0.0257, "step": 155115 }, { "epoch": 0.6472448698583839, "grad_norm": 0.9751363691794145, "learning_rate": 2.4861739665364635e-06, "loss": 0.0201, "step": 155120 }, { "epoch": 0.6472657325733742, "grad_norm": 0.9860379246030347, "learning_rate": 2.4861338986875063e-06, "loss": 0.0221, "step": 155125 }, { "epoch": 0.6472865952883644, "grad_norm": 0.336298507673586, "learning_rate": 2.4860938327757203e-06, "loss": 0.014, "step": 155130 }, { "epoch": 0.6473074580033548, "grad_norm": 0.5630085660573799, "learning_rate": 2.486053768800948e-06, "loss": 0.0246, "step": 155135 }, { "epoch": 0.647328320718345, "grad_norm": 0.8199933844762953, "learning_rate": 2.486013706763034e-06, "loss": 0.0244, "step": 155140 }, { "epoch": 0.6473491834333353, "grad_norm": 1.8265819871129731, "learning_rate": 2.485973646661822e-06, "loss": 0.0325, "step": 155145 }, { "epoch": 0.6473700461483256, "grad_norm": 0.40922778057075576, "learning_rate": 2.4859335884971563e-06, "loss": 0.0236, "step": 155150 }, { "epoch": 0.6473909088633159, "grad_norm": 0.6414891247632342, "learning_rate": 2.48589353226888e-06, "loss": 0.0235, "step": 155155 }, { "epoch": 0.6474117715783061, "grad_norm": 0.5064764643234279, "learning_rate": 2.485853477976838e-06, "loss": 0.0209, "step": 155160 }, { "epoch": 0.6474326342932963, "grad_norm": 0.6075361764044561, "learning_rate": 2.485813425620874e-06, "loss": 0.0283, "step": 155165 }, { "epoch": 0.6474534970082867, "grad_norm": 0.6598089664039564, "learning_rate": 2.4857733752008324e-06, "loss": 0.0174, "step": 155170 }, { "epoch": 0.6474743597232769, "grad_norm": 0.625228479406975, "learning_rate": 2.4857333267165566e-06, "loss": 0.0249, "step": 155175 }, { "epoch": 0.6474952224382672, "grad_norm": 0.6190398009907218, "learning_rate": 2.4856932801678908e-06, "loss": 0.0212, "step": 155180 }, { "epoch": 0.6475160851532575, "grad_norm": 1.1121098169688663, "learning_rate": 2.4856532355546796e-06, "loss": 0.0248, "step": 155185 }, { "epoch": 0.6475369478682478, "grad_norm": 0.34962875024349294, "learning_rate": 2.4856131928767667e-06, "loss": 0.0214, "step": 155190 }, { "epoch": 0.647557810583238, "grad_norm": 0.4352047864426935, "learning_rate": 2.4855731521339964e-06, "loss": 0.0286, "step": 155195 }, { "epoch": 0.6475786732982284, "grad_norm": 0.7254507686692793, "learning_rate": 2.485533113326212e-06, "loss": 0.0157, "step": 155200 }, { "epoch": 0.6475995360132186, "grad_norm": 0.6185521566290553, "learning_rate": 2.485493076453259e-06, "loss": 0.023, "step": 155205 }, { "epoch": 0.6476203987282089, "grad_norm": 0.9171618214052426, "learning_rate": 2.485453041514981e-06, "loss": 0.0221, "step": 155210 }, { "epoch": 0.6476412614431992, "grad_norm": 0.5241890546381267, "learning_rate": 2.4854130085112218e-06, "loss": 0.0217, "step": 155215 }, { "epoch": 0.6476621241581895, "grad_norm": 1.0448258308553977, "learning_rate": 2.4853729774418264e-06, "loss": 0.0258, "step": 155220 }, { "epoch": 0.6476829868731797, "grad_norm": 1.0340510261353106, "learning_rate": 2.4853329483066385e-06, "loss": 0.0222, "step": 155225 }, { "epoch": 0.64770384958817, "grad_norm": 0.9114506755816608, "learning_rate": 2.485292921105502e-06, "loss": 0.0259, "step": 155230 }, { "epoch": 0.6477247123031603, "grad_norm": 0.7262092466933869, "learning_rate": 2.4852528958382624e-06, "loss": 0.0224, "step": 155235 }, { "epoch": 0.6477455750181506, "grad_norm": 0.35541816846807667, "learning_rate": 2.4852128725047622e-06, "loss": 0.0216, "step": 155240 }, { "epoch": 0.6477664377331408, "grad_norm": 0.3241415286860635, "learning_rate": 2.4851728511048473e-06, "loss": 0.0301, "step": 155245 }, { "epoch": 0.6477873004481312, "grad_norm": 0.5498560972861322, "learning_rate": 2.485132831638361e-06, "loss": 0.019, "step": 155250 }, { "epoch": 0.6478081631631214, "grad_norm": 0.46270827529892056, "learning_rate": 2.4850928141051487e-06, "loss": 0.022, "step": 155255 }, { "epoch": 0.6478290258781116, "grad_norm": 0.581455316530794, "learning_rate": 2.485052798505053e-06, "loss": 0.0179, "step": 155260 }, { "epoch": 0.647849888593102, "grad_norm": 0.42022039713768916, "learning_rate": 2.4850127848379203e-06, "loss": 0.0298, "step": 155265 }, { "epoch": 0.6478707513080922, "grad_norm": 0.4423002979391096, "learning_rate": 2.484972773103594e-06, "loss": 0.0205, "step": 155270 }, { "epoch": 0.6478916140230825, "grad_norm": 0.4878489904134098, "learning_rate": 2.4849327633019178e-06, "loss": 0.0159, "step": 155275 }, { "epoch": 0.6479124767380728, "grad_norm": 0.6281628215336388, "learning_rate": 2.484892755432737e-06, "loss": 0.031, "step": 155280 }, { "epoch": 0.6479333394530631, "grad_norm": 0.6205988524747673, "learning_rate": 2.4848527494958964e-06, "loss": 0.0267, "step": 155285 }, { "epoch": 0.6479542021680533, "grad_norm": 1.0179146471113307, "learning_rate": 2.4848127454912396e-06, "loss": 0.0272, "step": 155290 }, { "epoch": 0.6479750648830436, "grad_norm": 0.9150356092367582, "learning_rate": 2.4847727434186115e-06, "loss": 0.0221, "step": 155295 }, { "epoch": 0.6479959275980339, "grad_norm": 1.0560728036371745, "learning_rate": 2.4847327432778556e-06, "loss": 0.0247, "step": 155300 }, { "epoch": 0.6480167903130242, "grad_norm": 0.46383224734863454, "learning_rate": 2.484692745068819e-06, "loss": 0.0172, "step": 155305 }, { "epoch": 0.6480376530280144, "grad_norm": 0.5004004845352662, "learning_rate": 2.4846527487913433e-06, "loss": 0.0252, "step": 155310 }, { "epoch": 0.6480585157430048, "grad_norm": 0.7081865956738137, "learning_rate": 2.484612754445275e-06, "loss": 0.0266, "step": 155315 }, { "epoch": 0.648079378457995, "grad_norm": 0.32990015136619016, "learning_rate": 2.484572762030457e-06, "loss": 0.0304, "step": 155320 }, { "epoch": 0.6481002411729853, "grad_norm": 0.6359047492939316, "learning_rate": 2.4845327715467353e-06, "loss": 0.02, "step": 155325 }, { "epoch": 0.6481211038879756, "grad_norm": 0.3861957829357092, "learning_rate": 2.4844927829939537e-06, "loss": 0.0182, "step": 155330 }, { "epoch": 0.6481419666029659, "grad_norm": 0.8235305006173836, "learning_rate": 2.484452796371958e-06, "loss": 0.0213, "step": 155335 }, { "epoch": 0.6481628293179561, "grad_norm": 1.0334802276312667, "learning_rate": 2.4844128116805917e-06, "loss": 0.0305, "step": 155340 }, { "epoch": 0.6481836920329463, "grad_norm": 0.657161334979704, "learning_rate": 2.484372828919699e-06, "loss": 0.0207, "step": 155345 }, { "epoch": 0.6482045547479367, "grad_norm": 0.48204668315621635, "learning_rate": 2.4843328480891255e-06, "loss": 0.0242, "step": 155350 }, { "epoch": 0.648225417462927, "grad_norm": 0.903116498643025, "learning_rate": 2.484292869188716e-06, "loss": 0.0156, "step": 155355 }, { "epoch": 0.6482462801779172, "grad_norm": 0.7074277429266148, "learning_rate": 2.484252892218314e-06, "loss": 0.0174, "step": 155360 }, { "epoch": 0.6482671428929075, "grad_norm": 1.1510589006262302, "learning_rate": 2.4842129171777664e-06, "loss": 0.0231, "step": 155365 }, { "epoch": 0.6482880056078978, "grad_norm": 1.0417700557356218, "learning_rate": 2.484172944066916e-06, "loss": 0.0267, "step": 155370 }, { "epoch": 0.648308868322888, "grad_norm": 0.3592056391595545, "learning_rate": 2.484132972885608e-06, "loss": 0.0204, "step": 155375 }, { "epoch": 0.6483297310378784, "grad_norm": 1.5894892466550412, "learning_rate": 2.484093003633687e-06, "loss": 0.0255, "step": 155380 }, { "epoch": 0.6483505937528686, "grad_norm": 0.5874011387509812, "learning_rate": 2.4840530363109987e-06, "loss": 0.0287, "step": 155385 }, { "epoch": 0.6483714564678589, "grad_norm": 0.6262694181303634, "learning_rate": 2.4840130709173867e-06, "loss": 0.0203, "step": 155390 }, { "epoch": 0.6483923191828492, "grad_norm": 0.6041441380919135, "learning_rate": 2.483973107452697e-06, "loss": 0.0208, "step": 155395 }, { "epoch": 0.6484131818978395, "grad_norm": 0.49476433381670915, "learning_rate": 2.4839331459167733e-06, "loss": 0.0162, "step": 155400 }, { "epoch": 0.6484340446128297, "grad_norm": 0.8815247791248143, "learning_rate": 2.483893186309462e-06, "loss": 0.0313, "step": 155405 }, { "epoch": 0.64845490732782, "grad_norm": 0.6547737729400518, "learning_rate": 2.483853228630606e-06, "loss": 0.0188, "step": 155410 }, { "epoch": 0.6484757700428103, "grad_norm": 0.603728669067028, "learning_rate": 2.4838132728800517e-06, "loss": 0.0202, "step": 155415 }, { "epoch": 0.6484966327578006, "grad_norm": 0.7973187827120317, "learning_rate": 2.483773319057643e-06, "loss": 0.0232, "step": 155420 }, { "epoch": 0.6485174954727908, "grad_norm": 0.5079685535924062, "learning_rate": 2.483733367163226e-06, "loss": 0.0187, "step": 155425 }, { "epoch": 0.6485383581877812, "grad_norm": 0.5201172691189331, "learning_rate": 2.4836934171966444e-06, "loss": 0.0225, "step": 155430 }, { "epoch": 0.6485592209027714, "grad_norm": 0.5968404846052309, "learning_rate": 2.483653469157744e-06, "loss": 0.0213, "step": 155435 }, { "epoch": 0.6485800836177616, "grad_norm": 0.5834898416563619, "learning_rate": 2.4836135230463702e-06, "loss": 0.0217, "step": 155440 }, { "epoch": 0.648600946332752, "grad_norm": 0.2652623311182596, "learning_rate": 2.4835735788623663e-06, "loss": 0.021, "step": 155445 }, { "epoch": 0.6486218090477422, "grad_norm": 1.0382263466283985, "learning_rate": 2.4835336366055786e-06, "loss": 0.0202, "step": 155450 }, { "epoch": 0.6486426717627325, "grad_norm": 0.6906192774244249, "learning_rate": 2.483493696275852e-06, "loss": 0.0373, "step": 155455 }, { "epoch": 0.6486635344777228, "grad_norm": 0.5079806700516285, "learning_rate": 2.4834537578730312e-06, "loss": 0.0249, "step": 155460 }, { "epoch": 0.6486843971927131, "grad_norm": 1.5977677214840535, "learning_rate": 2.483413821396962e-06, "loss": 0.0383, "step": 155465 }, { "epoch": 0.6487052599077033, "grad_norm": 0.4996292704062718, "learning_rate": 2.4833738868474884e-06, "loss": 0.0219, "step": 155470 }, { "epoch": 0.6487261226226936, "grad_norm": 0.9220055321634075, "learning_rate": 2.4833339542244563e-06, "loss": 0.0188, "step": 155475 }, { "epoch": 0.6487469853376839, "grad_norm": 0.2921145249093982, "learning_rate": 2.4832940235277105e-06, "loss": 0.0192, "step": 155480 }, { "epoch": 0.6487678480526742, "grad_norm": 0.44220245252089335, "learning_rate": 2.4832540947570964e-06, "loss": 0.0177, "step": 155485 }, { "epoch": 0.6487887107676644, "grad_norm": 0.5951136346457107, "learning_rate": 2.4832141679124587e-06, "loss": 0.0212, "step": 155490 }, { "epoch": 0.6488095734826548, "grad_norm": 0.765869045472245, "learning_rate": 2.4831742429936427e-06, "loss": 0.0215, "step": 155495 }, { "epoch": 0.648830436197645, "grad_norm": 1.0784023495028012, "learning_rate": 2.4831343200004946e-06, "loss": 0.0278, "step": 155500 }, { "epoch": 0.6488512989126353, "grad_norm": 1.195558286195592, "learning_rate": 2.4830943989328575e-06, "loss": 0.0182, "step": 155505 }, { "epoch": 0.6488721616276256, "grad_norm": 0.49759332609496326, "learning_rate": 2.4830544797905788e-06, "loss": 0.0234, "step": 155510 }, { "epoch": 0.6488930243426159, "grad_norm": 0.9631451209475873, "learning_rate": 2.4830145625735026e-06, "loss": 0.0257, "step": 155515 }, { "epoch": 0.6489138870576061, "grad_norm": 0.8257638677063809, "learning_rate": 2.4829746472814744e-06, "loss": 0.0217, "step": 155520 }, { "epoch": 0.6489347497725964, "grad_norm": 1.225336670027323, "learning_rate": 2.4829347339143395e-06, "loss": 0.026, "step": 155525 }, { "epoch": 0.6489556124875867, "grad_norm": 0.7417440196680146, "learning_rate": 2.4828948224719427e-06, "loss": 0.0216, "step": 155530 }, { "epoch": 0.648976475202577, "grad_norm": 0.503385285272905, "learning_rate": 2.48285491295413e-06, "loss": 0.0228, "step": 155535 }, { "epoch": 0.6489973379175672, "grad_norm": 0.40245325180455105, "learning_rate": 2.4828150053607465e-06, "loss": 0.0215, "step": 155540 }, { "epoch": 0.6490182006325576, "grad_norm": 1.0644401465081965, "learning_rate": 2.4827750996916377e-06, "loss": 0.0253, "step": 155545 }, { "epoch": 0.6490390633475478, "grad_norm": 0.5125759888735705, "learning_rate": 2.4827351959466485e-06, "loss": 0.0191, "step": 155550 }, { "epoch": 0.649059926062538, "grad_norm": 0.5447925497981562, "learning_rate": 2.4826952941256248e-06, "loss": 0.0174, "step": 155555 }, { "epoch": 0.6490807887775284, "grad_norm": 0.46160407668089887, "learning_rate": 2.482655394228412e-06, "loss": 0.0187, "step": 155560 }, { "epoch": 0.6491016514925186, "grad_norm": 0.8194240505589014, "learning_rate": 2.482615496254854e-06, "loss": 0.0157, "step": 155565 }, { "epoch": 0.6491225142075089, "grad_norm": 0.39951561068009656, "learning_rate": 2.4825756002047988e-06, "loss": 0.019, "step": 155570 }, { "epoch": 0.6491433769224992, "grad_norm": 0.8572073463057642, "learning_rate": 2.48253570607809e-06, "loss": 0.0294, "step": 155575 }, { "epoch": 0.6491642396374895, "grad_norm": 0.836500038274754, "learning_rate": 2.4824958138745737e-06, "loss": 0.0163, "step": 155580 }, { "epoch": 0.6491851023524797, "grad_norm": 0.5906264222845603, "learning_rate": 2.482455923594095e-06, "loss": 0.0278, "step": 155585 }, { "epoch": 0.64920596506747, "grad_norm": 0.6925352218350985, "learning_rate": 2.4824160352365e-06, "loss": 0.0186, "step": 155590 }, { "epoch": 0.6492268277824603, "grad_norm": 0.6982385174974407, "learning_rate": 2.482376148801635e-06, "loss": 0.0199, "step": 155595 }, { "epoch": 0.6492476904974506, "grad_norm": 0.6729831279223629, "learning_rate": 2.4823362642893433e-06, "loss": 0.0161, "step": 155600 }, { "epoch": 0.6492685532124408, "grad_norm": 0.5828568775627764, "learning_rate": 2.4822963816994716e-06, "loss": 0.0159, "step": 155605 }, { "epoch": 0.6492894159274312, "grad_norm": 0.4988734469346338, "learning_rate": 2.4822565010318654e-06, "loss": 0.0169, "step": 155610 }, { "epoch": 0.6493102786424214, "grad_norm": 0.7168689017657122, "learning_rate": 2.482216622286371e-06, "loss": 0.0175, "step": 155615 }, { "epoch": 0.6493311413574117, "grad_norm": 0.7562276696984374, "learning_rate": 2.482176745462833e-06, "loss": 0.021, "step": 155620 }, { "epoch": 0.649352004072402, "grad_norm": 1.1177486062886532, "learning_rate": 2.4821368705610975e-06, "loss": 0.0271, "step": 155625 }, { "epoch": 0.6493728667873923, "grad_norm": 0.5305254599356045, "learning_rate": 2.4820969975810103e-06, "loss": 0.0217, "step": 155630 }, { "epoch": 0.6493937295023825, "grad_norm": 1.1145124802371191, "learning_rate": 2.4820571265224165e-06, "loss": 0.0208, "step": 155635 }, { "epoch": 0.6494145922173729, "grad_norm": 0.7967901793076799, "learning_rate": 2.482017257385162e-06, "loss": 0.0271, "step": 155640 }, { "epoch": 0.6494354549323631, "grad_norm": 1.170200600312094, "learning_rate": 2.481977390169093e-06, "loss": 0.0278, "step": 155645 }, { "epoch": 0.6494563176473533, "grad_norm": 0.5543493413411986, "learning_rate": 2.4819375248740546e-06, "loss": 0.0218, "step": 155650 }, { "epoch": 0.6494771803623436, "grad_norm": 0.7768109569232227, "learning_rate": 2.4818976614998924e-06, "loss": 0.0232, "step": 155655 }, { "epoch": 0.6494980430773339, "grad_norm": 0.4249311413722451, "learning_rate": 2.4818578000464527e-06, "loss": 0.0175, "step": 155660 }, { "epoch": 0.6495189057923242, "grad_norm": 2.2969722574500384, "learning_rate": 2.481817940513581e-06, "loss": 0.017, "step": 155665 }, { "epoch": 0.6495397685073144, "grad_norm": 1.7778430636663627, "learning_rate": 2.4817780829011236e-06, "loss": 0.0198, "step": 155670 }, { "epoch": 0.6495606312223048, "grad_norm": 0.48638947636063945, "learning_rate": 2.481738227208925e-06, "loss": 0.0236, "step": 155675 }, { "epoch": 0.649581493937295, "grad_norm": 0.8009080066928045, "learning_rate": 2.481698373436832e-06, "loss": 0.019, "step": 155680 }, { "epoch": 0.6496023566522853, "grad_norm": 0.4313827617443013, "learning_rate": 2.4816585215846913e-06, "loss": 0.0224, "step": 155685 }, { "epoch": 0.6496232193672756, "grad_norm": 0.7135428113084208, "learning_rate": 2.4816186716523464e-06, "loss": 0.0247, "step": 155690 }, { "epoch": 0.6496440820822659, "grad_norm": 0.8930912425104589, "learning_rate": 2.481578823639645e-06, "loss": 0.0236, "step": 155695 }, { "epoch": 0.6496649447972561, "grad_norm": 0.5996572930652792, "learning_rate": 2.4815389775464317e-06, "loss": 0.0253, "step": 155700 }, { "epoch": 0.6496858075122464, "grad_norm": 0.47566899954112063, "learning_rate": 2.481499133372554e-06, "loss": 0.0212, "step": 155705 }, { "epoch": 0.6497066702272367, "grad_norm": 0.2825764649310342, "learning_rate": 2.4814592911178567e-06, "loss": 0.0202, "step": 155710 }, { "epoch": 0.649727532942227, "grad_norm": 0.8514934434998568, "learning_rate": 2.4814194507821863e-06, "loss": 0.0232, "step": 155715 }, { "epoch": 0.6497483956572172, "grad_norm": 0.9496283076289158, "learning_rate": 2.4813796123653886e-06, "loss": 0.0297, "step": 155720 }, { "epoch": 0.6497692583722076, "grad_norm": 0.7021525154773752, "learning_rate": 2.4813397758673086e-06, "loss": 0.0187, "step": 155725 }, { "epoch": 0.6497901210871978, "grad_norm": 0.5044879000891868, "learning_rate": 2.4812999412877942e-06, "loss": 0.0175, "step": 155730 }, { "epoch": 0.649810983802188, "grad_norm": 0.5418911571888554, "learning_rate": 2.481260108626689e-06, "loss": 0.0211, "step": 155735 }, { "epoch": 0.6498318465171784, "grad_norm": 0.6904781126485453, "learning_rate": 2.4812202778838414e-06, "loss": 0.0203, "step": 155740 }, { "epoch": 0.6498527092321686, "grad_norm": 0.5994925410967493, "learning_rate": 2.4811804490590964e-06, "loss": 0.0225, "step": 155745 }, { "epoch": 0.6498735719471589, "grad_norm": 0.5284346118072724, "learning_rate": 2.4811406221522995e-06, "loss": 0.0205, "step": 155750 }, { "epoch": 0.6498944346621492, "grad_norm": 0.4712534741370997, "learning_rate": 2.481100797163297e-06, "loss": 0.0257, "step": 155755 }, { "epoch": 0.6499152973771395, "grad_norm": 0.7946695758724628, "learning_rate": 2.481060974091936e-06, "loss": 0.0221, "step": 155760 }, { "epoch": 0.6499361600921297, "grad_norm": 0.46940984332961316, "learning_rate": 2.481021152938062e-06, "loss": 0.0238, "step": 155765 }, { "epoch": 0.64995702280712, "grad_norm": 0.6665709127080708, "learning_rate": 2.480981333701521e-06, "loss": 0.0189, "step": 155770 }, { "epoch": 0.6499778855221103, "grad_norm": 0.65862379474432, "learning_rate": 2.4809415163821585e-06, "loss": 0.0196, "step": 155775 }, { "epoch": 0.6499987482371006, "grad_norm": 0.6315315820676543, "learning_rate": 2.4809017009798215e-06, "loss": 0.0228, "step": 155780 }, { "epoch": 0.6500196109520908, "grad_norm": 0.8968227673785051, "learning_rate": 2.4808618874943565e-06, "loss": 0.0211, "step": 155785 }, { "epoch": 0.6500404736670812, "grad_norm": 0.6227446480248091, "learning_rate": 2.480822075925609e-06, "loss": 0.0173, "step": 155790 }, { "epoch": 0.6500613363820714, "grad_norm": 0.4054010411407016, "learning_rate": 2.4807822662734255e-06, "loss": 0.0255, "step": 155795 }, { "epoch": 0.6500821990970617, "grad_norm": 0.6629041683244998, "learning_rate": 2.4807424585376524e-06, "loss": 0.0245, "step": 155800 }, { "epoch": 0.650103061812052, "grad_norm": 0.7106436526030556, "learning_rate": 2.4807026527181354e-06, "loss": 0.019, "step": 155805 }, { "epoch": 0.6501239245270423, "grad_norm": 0.8029789935531073, "learning_rate": 2.480662848814721e-06, "loss": 0.0199, "step": 155810 }, { "epoch": 0.6501447872420325, "grad_norm": 0.6924149991805615, "learning_rate": 2.480623046827256e-06, "loss": 0.0315, "step": 155815 }, { "epoch": 0.6501656499570229, "grad_norm": 0.5239632381426202, "learning_rate": 2.480583246755586e-06, "loss": 0.0191, "step": 155820 }, { "epoch": 0.6501865126720131, "grad_norm": 0.8362100396009366, "learning_rate": 2.4805434485995573e-06, "loss": 0.0188, "step": 155825 }, { "epoch": 0.6502073753870034, "grad_norm": 0.3133715420891452, "learning_rate": 2.480503652359017e-06, "loss": 0.0168, "step": 155830 }, { "epoch": 0.6502282381019936, "grad_norm": 0.7708385064714454, "learning_rate": 2.4804638580338104e-06, "loss": 0.0217, "step": 155835 }, { "epoch": 0.650249100816984, "grad_norm": 0.7833822724145165, "learning_rate": 2.4804240656237847e-06, "loss": 0.0266, "step": 155840 }, { "epoch": 0.6502699635319742, "grad_norm": 0.3157051533433306, "learning_rate": 2.480384275128786e-06, "loss": 0.0157, "step": 155845 }, { "epoch": 0.6502908262469644, "grad_norm": 0.6913175064264323, "learning_rate": 2.4803444865486605e-06, "loss": 0.0145, "step": 155850 }, { "epoch": 0.6503116889619548, "grad_norm": 1.193119085752462, "learning_rate": 2.4803046998832555e-06, "loss": 0.027, "step": 155855 }, { "epoch": 0.650332551676945, "grad_norm": 0.9897439580763645, "learning_rate": 2.480264915132416e-06, "loss": 0.025, "step": 155860 }, { "epoch": 0.6503534143919353, "grad_norm": 0.6534981513788327, "learning_rate": 2.48022513229599e-06, "loss": 0.0227, "step": 155865 }, { "epoch": 0.6503742771069256, "grad_norm": 0.5556549984111384, "learning_rate": 2.4801853513738226e-06, "loss": 0.0116, "step": 155870 }, { "epoch": 0.6503951398219159, "grad_norm": 1.3716450633341213, "learning_rate": 2.480145572365761e-06, "loss": 0.0375, "step": 155875 }, { "epoch": 0.6504160025369061, "grad_norm": 0.8854427528663892, "learning_rate": 2.480105795271651e-06, "loss": 0.0219, "step": 155880 }, { "epoch": 0.6504368652518964, "grad_norm": 0.5723575724032544, "learning_rate": 2.480066020091341e-06, "loss": 0.0215, "step": 155885 }, { "epoch": 0.6504577279668867, "grad_norm": 0.699612137024361, "learning_rate": 2.480026246824675e-06, "loss": 0.0194, "step": 155890 }, { "epoch": 0.650478590681877, "grad_norm": 0.5865058832696838, "learning_rate": 2.4799864754715015e-06, "loss": 0.0239, "step": 155895 }, { "epoch": 0.6504994533968672, "grad_norm": 1.468769115142708, "learning_rate": 2.4799467060316663e-06, "loss": 0.0385, "step": 155900 }, { "epoch": 0.6505203161118576, "grad_norm": 0.6286549724674582, "learning_rate": 2.479906938505016e-06, "loss": 0.0212, "step": 155905 }, { "epoch": 0.6505411788268478, "grad_norm": 0.65827750700285, "learning_rate": 2.4798671728913972e-06, "loss": 0.0264, "step": 155910 }, { "epoch": 0.650562041541838, "grad_norm": 0.3329187231471215, "learning_rate": 2.4798274091906567e-06, "loss": 0.018, "step": 155915 }, { "epoch": 0.6505829042568284, "grad_norm": 0.5498434078495136, "learning_rate": 2.4797876474026415e-06, "loss": 0.0172, "step": 155920 }, { "epoch": 0.6506037669718187, "grad_norm": 0.47762355388629435, "learning_rate": 2.479747887527197e-06, "loss": 0.0254, "step": 155925 }, { "epoch": 0.6506246296868089, "grad_norm": 0.5533767175555296, "learning_rate": 2.479708129564171e-06, "loss": 0.0212, "step": 155930 }, { "epoch": 0.6506454924017993, "grad_norm": 0.750637332552673, "learning_rate": 2.47966837351341e-06, "loss": 0.0183, "step": 155935 }, { "epoch": 0.6506663551167895, "grad_norm": 0.4194169087942779, "learning_rate": 2.4796286193747605e-06, "loss": 0.0215, "step": 155940 }, { "epoch": 0.6506872178317797, "grad_norm": 0.8041427310120992, "learning_rate": 2.4795888671480693e-06, "loss": 0.0256, "step": 155945 }, { "epoch": 0.65070808054677, "grad_norm": 0.6770281311446529, "learning_rate": 2.4795491168331835e-06, "loss": 0.0142, "step": 155950 }, { "epoch": 0.6507289432617603, "grad_norm": 0.5122310875918197, "learning_rate": 2.479509368429949e-06, "loss": 0.0163, "step": 155955 }, { "epoch": 0.6507498059767506, "grad_norm": 0.6646924253713212, "learning_rate": 2.4794696219382135e-06, "loss": 0.0236, "step": 155960 }, { "epoch": 0.6507706686917408, "grad_norm": 0.5916919277669689, "learning_rate": 2.4794298773578233e-06, "loss": 0.0355, "step": 155965 }, { "epoch": 0.6507915314067312, "grad_norm": 0.6070360639135116, "learning_rate": 2.479390134688626e-06, "loss": 0.0151, "step": 155970 }, { "epoch": 0.6508123941217214, "grad_norm": 0.9998027796173554, "learning_rate": 2.4793503939304673e-06, "loss": 0.0252, "step": 155975 }, { "epoch": 0.6508332568367117, "grad_norm": 0.8608061394791411, "learning_rate": 2.479310655083194e-06, "loss": 0.0263, "step": 155980 }, { "epoch": 0.650854119551702, "grad_norm": 0.6921984602263808, "learning_rate": 2.4792709181466535e-06, "loss": 0.0243, "step": 155985 }, { "epoch": 0.6508749822666923, "grad_norm": 0.680261677738196, "learning_rate": 2.479231183120693e-06, "loss": 0.0271, "step": 155990 }, { "epoch": 0.6508958449816825, "grad_norm": 0.5037240165831286, "learning_rate": 2.4791914500051593e-06, "loss": 0.0237, "step": 155995 }, { "epoch": 0.6509167076966729, "grad_norm": 0.5864510084932243, "learning_rate": 2.4791517187998993e-06, "loss": 0.0246, "step": 156000 }, { "epoch": 0.6509375704116631, "grad_norm": 0.8431299996439955, "learning_rate": 2.4791119895047595e-06, "loss": 0.0213, "step": 156005 }, { "epoch": 0.6509584331266534, "grad_norm": 0.3420254629346064, "learning_rate": 2.4790722621195863e-06, "loss": 0.0115, "step": 156010 }, { "epoch": 0.6509792958416436, "grad_norm": 0.24217631789766036, "learning_rate": 2.4790325366442285e-06, "loss": 0.0255, "step": 156015 }, { "epoch": 0.651000158556634, "grad_norm": 0.9499894029608866, "learning_rate": 2.478992813078531e-06, "loss": 0.0344, "step": 156020 }, { "epoch": 0.6510210212716242, "grad_norm": 0.5304127826576398, "learning_rate": 2.478953091422343e-06, "loss": 0.0194, "step": 156025 }, { "epoch": 0.6510418839866144, "grad_norm": 1.0053277146327348, "learning_rate": 2.4789133716755096e-06, "loss": 0.0189, "step": 156030 }, { "epoch": 0.6510627467016048, "grad_norm": 0.8319663462572361, "learning_rate": 2.4788736538378784e-06, "loss": 0.0224, "step": 156035 }, { "epoch": 0.651083609416595, "grad_norm": 0.45679898115936224, "learning_rate": 2.4788339379092976e-06, "loss": 0.0227, "step": 156040 }, { "epoch": 0.6511044721315853, "grad_norm": 0.5680563994365317, "learning_rate": 2.4787942238896127e-06, "loss": 0.0236, "step": 156045 }, { "epoch": 0.6511253348465756, "grad_norm": 0.7992324629302644, "learning_rate": 2.478754511778671e-06, "loss": 0.0163, "step": 156050 }, { "epoch": 0.6511461975615659, "grad_norm": 0.7766460945305194, "learning_rate": 2.478714801576321e-06, "loss": 0.022, "step": 156055 }, { "epoch": 0.6511670602765561, "grad_norm": 0.5750081726149713, "learning_rate": 2.4786750932824082e-06, "loss": 0.0338, "step": 156060 }, { "epoch": 0.6511879229915464, "grad_norm": 0.3736465703120719, "learning_rate": 2.478635386896781e-06, "loss": 0.0155, "step": 156065 }, { "epoch": 0.6512087857065367, "grad_norm": 0.6199573867794069, "learning_rate": 2.478595682419285e-06, "loss": 0.0241, "step": 156070 }, { "epoch": 0.651229648421527, "grad_norm": 0.7568702770306094, "learning_rate": 2.4785559798497692e-06, "loss": 0.0257, "step": 156075 }, { "epoch": 0.6512505111365172, "grad_norm": 0.2900778209263136, "learning_rate": 2.4785162791880794e-06, "loss": 0.018, "step": 156080 }, { "epoch": 0.6512713738515076, "grad_norm": 0.6995039065386526, "learning_rate": 2.478476580434064e-06, "loss": 0.0216, "step": 156085 }, { "epoch": 0.6512922365664978, "grad_norm": 1.0635321726072078, "learning_rate": 2.4784368835875693e-06, "loss": 0.0296, "step": 156090 }, { "epoch": 0.6513130992814881, "grad_norm": 0.6848312623177942, "learning_rate": 2.4783971886484425e-06, "loss": 0.0198, "step": 156095 }, { "epoch": 0.6513339619964784, "grad_norm": 0.46728207715099734, "learning_rate": 2.478357495616531e-06, "loss": 0.0154, "step": 156100 }, { "epoch": 0.6513548247114687, "grad_norm": 0.6342827871855239, "learning_rate": 2.4783178044916826e-06, "loss": 0.0257, "step": 156105 }, { "epoch": 0.6513756874264589, "grad_norm": 0.464926456669194, "learning_rate": 2.478278115273744e-06, "loss": 0.0213, "step": 156110 }, { "epoch": 0.6513965501414493, "grad_norm": 0.4572015569889116, "learning_rate": 2.4782384279625627e-06, "loss": 0.0208, "step": 156115 }, { "epoch": 0.6514174128564395, "grad_norm": 0.4958618832242135, "learning_rate": 2.478198742557987e-06, "loss": 0.0284, "step": 156120 }, { "epoch": 0.6514382755714297, "grad_norm": 0.37730626064973644, "learning_rate": 2.4781590590598624e-06, "loss": 0.0197, "step": 156125 }, { "epoch": 0.65145913828642, "grad_norm": 0.8282871885780342, "learning_rate": 2.4781193774680373e-06, "loss": 0.0197, "step": 156130 }, { "epoch": 0.6514800010014103, "grad_norm": 1.0003017253402844, "learning_rate": 2.4780796977823594e-06, "loss": 0.0254, "step": 156135 }, { "epoch": 0.6515008637164006, "grad_norm": 1.6247472623281558, "learning_rate": 2.478040020002675e-06, "loss": 0.0255, "step": 156140 }, { "epoch": 0.6515217264313908, "grad_norm": 0.866598326956587, "learning_rate": 2.4780003441288327e-06, "loss": 0.0249, "step": 156145 }, { "epoch": 0.6515425891463812, "grad_norm": 0.5278997470680296, "learning_rate": 2.4779606701606797e-06, "loss": 0.0176, "step": 156150 }, { "epoch": 0.6515634518613714, "grad_norm": 0.5179823011392041, "learning_rate": 2.4779209980980625e-06, "loss": 0.0222, "step": 156155 }, { "epoch": 0.6515843145763617, "grad_norm": 0.9185847680576646, "learning_rate": 2.4778813279408297e-06, "loss": 0.0237, "step": 156160 }, { "epoch": 0.651605177291352, "grad_norm": 0.5582573906339588, "learning_rate": 2.477841659688828e-06, "loss": 0.0163, "step": 156165 }, { "epoch": 0.6516260400063423, "grad_norm": 0.8967336837362011, "learning_rate": 2.4778019933419054e-06, "loss": 0.0267, "step": 156170 }, { "epoch": 0.6516469027213325, "grad_norm": 1.5096133904077405, "learning_rate": 2.4777623288999094e-06, "loss": 0.0206, "step": 156175 }, { "epoch": 0.6516677654363229, "grad_norm": 3.922406752556936, "learning_rate": 2.4777226663626875e-06, "loss": 0.0218, "step": 156180 }, { "epoch": 0.6516886281513131, "grad_norm": 0.29070411712751326, "learning_rate": 2.477683005730087e-06, "loss": 0.0201, "step": 156185 }, { "epoch": 0.6517094908663034, "grad_norm": 0.3065745935772021, "learning_rate": 2.477643347001955e-06, "loss": 0.0188, "step": 156190 }, { "epoch": 0.6517303535812936, "grad_norm": 0.7007641989455828, "learning_rate": 2.4776036901781406e-06, "loss": 0.0226, "step": 156195 }, { "epoch": 0.651751216296284, "grad_norm": 0.928938138123067, "learning_rate": 2.4775640352584893e-06, "loss": 0.0263, "step": 156200 }, { "epoch": 0.6517720790112742, "grad_norm": 1.072827854395429, "learning_rate": 2.477524382242851e-06, "loss": 0.0363, "step": 156205 }, { "epoch": 0.6517929417262645, "grad_norm": 0.3760986922550289, "learning_rate": 2.477484731131072e-06, "loss": 0.0175, "step": 156210 }, { "epoch": 0.6518138044412548, "grad_norm": 0.663248159800593, "learning_rate": 2.477445081923e-06, "loss": 0.024, "step": 156215 }, { "epoch": 0.651834667156245, "grad_norm": 0.5018848135212661, "learning_rate": 2.4774054346184833e-06, "loss": 0.0231, "step": 156220 }, { "epoch": 0.6518555298712353, "grad_norm": 1.407754413263203, "learning_rate": 2.477365789217369e-06, "loss": 0.0228, "step": 156225 }, { "epoch": 0.6518763925862257, "grad_norm": 0.47885072978246357, "learning_rate": 2.4773261457195048e-06, "loss": 0.0177, "step": 156230 }, { "epoch": 0.6518972553012159, "grad_norm": 0.908848254933487, "learning_rate": 2.477286504124739e-06, "loss": 0.0274, "step": 156235 }, { "epoch": 0.6519181180162061, "grad_norm": 1.0239881097167098, "learning_rate": 2.477246864432918e-06, "loss": 0.0235, "step": 156240 }, { "epoch": 0.6519389807311964, "grad_norm": 0.8065028745623221, "learning_rate": 2.477207226643892e-06, "loss": 0.0244, "step": 156245 }, { "epoch": 0.6519598434461867, "grad_norm": 0.7925637035008096, "learning_rate": 2.4771675907575064e-06, "loss": 0.0228, "step": 156250 }, { "epoch": 0.651980706161177, "grad_norm": 0.6340435520288559, "learning_rate": 2.477127956773609e-06, "loss": 0.0177, "step": 156255 }, { "epoch": 0.6520015688761672, "grad_norm": 0.8602913413996018, "learning_rate": 2.47708832469205e-06, "loss": 0.0208, "step": 156260 }, { "epoch": 0.6520224315911576, "grad_norm": 0.5157344016614267, "learning_rate": 2.477048694512675e-06, "loss": 0.0224, "step": 156265 }, { "epoch": 0.6520432943061478, "grad_norm": 0.4156708597152871, "learning_rate": 2.4770090662353322e-06, "loss": 0.0228, "step": 156270 }, { "epoch": 0.6520641570211381, "grad_norm": 0.9038867930725477, "learning_rate": 2.47696943985987e-06, "loss": 0.0219, "step": 156275 }, { "epoch": 0.6520850197361284, "grad_norm": 0.7004932590575282, "learning_rate": 2.4769298153861367e-06, "loss": 0.0233, "step": 156280 }, { "epoch": 0.6521058824511187, "grad_norm": 1.2503420785173254, "learning_rate": 2.476890192813979e-06, "loss": 0.0249, "step": 156285 }, { "epoch": 0.6521267451661089, "grad_norm": 0.6356454326600943, "learning_rate": 2.476850572143245e-06, "loss": 0.0208, "step": 156290 }, { "epoch": 0.6521476078810993, "grad_norm": 0.4125374205300398, "learning_rate": 2.476810953373784e-06, "loss": 0.0209, "step": 156295 }, { "epoch": 0.6521684705960895, "grad_norm": 0.3779670514668132, "learning_rate": 2.4767713365054423e-06, "loss": 0.022, "step": 156300 }, { "epoch": 0.6521893333110798, "grad_norm": 0.5064018824629418, "learning_rate": 2.476731721538069e-06, "loss": 0.0195, "step": 156305 }, { "epoch": 0.65221019602607, "grad_norm": 0.5876394522878743, "learning_rate": 2.4766921084715107e-06, "loss": 0.0229, "step": 156310 }, { "epoch": 0.6522310587410604, "grad_norm": 0.4626932140122691, "learning_rate": 2.476652497305617e-06, "loss": 0.0184, "step": 156315 }, { "epoch": 0.6522519214560506, "grad_norm": 0.6871248928881216, "learning_rate": 2.4766128880402352e-06, "loss": 0.0193, "step": 156320 }, { "epoch": 0.6522727841710408, "grad_norm": 0.4585065623249777, "learning_rate": 2.476573280675213e-06, "loss": 0.0219, "step": 156325 }, { "epoch": 0.6522936468860312, "grad_norm": 0.9429677579180671, "learning_rate": 2.476533675210399e-06, "loss": 0.0284, "step": 156330 }, { "epoch": 0.6523145096010214, "grad_norm": 0.7035064309002996, "learning_rate": 2.4764940716456412e-06, "loss": 0.0224, "step": 156335 }, { "epoch": 0.6523353723160117, "grad_norm": 0.5680145278323169, "learning_rate": 2.476454469980787e-06, "loss": 0.029, "step": 156340 }, { "epoch": 0.652356235031002, "grad_norm": 0.6869579970657986, "learning_rate": 2.4764148702156856e-06, "loss": 0.0241, "step": 156345 }, { "epoch": 0.6523770977459923, "grad_norm": 0.6906342190024043, "learning_rate": 2.476375272350184e-06, "loss": 0.0202, "step": 156350 }, { "epoch": 0.6523979604609825, "grad_norm": 0.9158247573624477, "learning_rate": 2.4763356763841313e-06, "loss": 0.03, "step": 156355 }, { "epoch": 0.6524188231759728, "grad_norm": 0.8418748967707242, "learning_rate": 2.476296082317375e-06, "loss": 0.0146, "step": 156360 }, { "epoch": 0.6524396858909631, "grad_norm": 0.3409051417458916, "learning_rate": 2.4762564901497633e-06, "loss": 0.0209, "step": 156365 }, { "epoch": 0.6524605486059534, "grad_norm": 0.5703370301187012, "learning_rate": 2.476216899881145e-06, "loss": 0.021, "step": 156370 }, { "epoch": 0.6524814113209436, "grad_norm": 0.8366154420325667, "learning_rate": 2.4761773115113674e-06, "loss": 0.0226, "step": 156375 }, { "epoch": 0.652502274035934, "grad_norm": 0.5728606617205415, "learning_rate": 2.47613772504028e-06, "loss": 0.0225, "step": 156380 }, { "epoch": 0.6525231367509242, "grad_norm": 0.3499697489796865, "learning_rate": 2.47609814046773e-06, "loss": 0.0234, "step": 156385 }, { "epoch": 0.6525439994659145, "grad_norm": 0.7006646175848819, "learning_rate": 2.476058557793565e-06, "loss": 0.0283, "step": 156390 }, { "epoch": 0.6525648621809048, "grad_norm": 0.47371876828908094, "learning_rate": 2.4760189770176346e-06, "loss": 0.0226, "step": 156395 }, { "epoch": 0.6525857248958951, "grad_norm": 1.1163930285384873, "learning_rate": 2.475979398139787e-06, "loss": 0.0218, "step": 156400 }, { "epoch": 0.6526065876108853, "grad_norm": 1.353048030503449, "learning_rate": 2.4759398211598696e-06, "loss": 0.019, "step": 156405 }, { "epoch": 0.6526274503258757, "grad_norm": 0.8245974060617453, "learning_rate": 2.4759002460777315e-06, "loss": 0.0141, "step": 156410 }, { "epoch": 0.6526483130408659, "grad_norm": 1.0365480295699916, "learning_rate": 2.475860672893221e-06, "loss": 0.0281, "step": 156415 }, { "epoch": 0.6526691757558561, "grad_norm": 0.4066631776256235, "learning_rate": 2.4758211016061858e-06, "loss": 0.0227, "step": 156420 }, { "epoch": 0.6526900384708464, "grad_norm": 0.6005137072538743, "learning_rate": 2.4757815322164745e-06, "loss": 0.0121, "step": 156425 }, { "epoch": 0.6527109011858367, "grad_norm": 0.8917722799772317, "learning_rate": 2.4757419647239362e-06, "loss": 0.0265, "step": 156430 }, { "epoch": 0.652731763900827, "grad_norm": 0.6794933838339536, "learning_rate": 2.475702399128419e-06, "loss": 0.0214, "step": 156435 }, { "epoch": 0.6527526266158172, "grad_norm": 0.4624631189889923, "learning_rate": 2.4756628354297704e-06, "loss": 0.0169, "step": 156440 }, { "epoch": 0.6527734893308076, "grad_norm": 0.7341045890405942, "learning_rate": 2.4756232736278396e-06, "loss": 0.0166, "step": 156445 }, { "epoch": 0.6527943520457978, "grad_norm": 0.3701606366016997, "learning_rate": 2.4755837137224757e-06, "loss": 0.0294, "step": 156450 }, { "epoch": 0.6528152147607881, "grad_norm": 0.3543050769687742, "learning_rate": 2.4755441557135258e-06, "loss": 0.0131, "step": 156455 }, { "epoch": 0.6528360774757784, "grad_norm": 0.8812224475461473, "learning_rate": 2.475504599600839e-06, "loss": 0.0284, "step": 156460 }, { "epoch": 0.6528569401907687, "grad_norm": 0.6514386862942838, "learning_rate": 2.475465045384264e-06, "loss": 0.0251, "step": 156465 }, { "epoch": 0.6528778029057589, "grad_norm": 1.2035922373051333, "learning_rate": 2.4754254930636495e-06, "loss": 0.0317, "step": 156470 }, { "epoch": 0.6528986656207493, "grad_norm": 0.423533897479812, "learning_rate": 2.4753859426388438e-06, "loss": 0.015, "step": 156475 }, { "epoch": 0.6529195283357395, "grad_norm": 0.7077929534853767, "learning_rate": 2.4753463941096948e-06, "loss": 0.0285, "step": 156480 }, { "epoch": 0.6529403910507298, "grad_norm": 0.6430532467162189, "learning_rate": 2.475306847476052e-06, "loss": 0.0266, "step": 156485 }, { "epoch": 0.65296125376572, "grad_norm": 0.2991174121478011, "learning_rate": 2.475267302737764e-06, "loss": 0.0289, "step": 156490 }, { "epoch": 0.6529821164807104, "grad_norm": 1.3288256384433745, "learning_rate": 2.4752277598946786e-06, "loss": 0.0269, "step": 156495 }, { "epoch": 0.6530029791957006, "grad_norm": 0.6221886558791988, "learning_rate": 2.475188218946645e-06, "loss": 0.0182, "step": 156500 }, { "epoch": 0.6530238419106908, "grad_norm": 1.224781294186694, "learning_rate": 2.4751486798935117e-06, "loss": 0.0365, "step": 156505 }, { "epoch": 0.6530447046256812, "grad_norm": 0.659691800631008, "learning_rate": 2.475109142735127e-06, "loss": 0.0215, "step": 156510 }, { "epoch": 0.6530655673406714, "grad_norm": 0.60643717946186, "learning_rate": 2.4750696074713406e-06, "loss": 0.0298, "step": 156515 }, { "epoch": 0.6530864300556617, "grad_norm": 0.6919528739354047, "learning_rate": 2.475030074102e-06, "loss": 0.0239, "step": 156520 }, { "epoch": 0.653107292770652, "grad_norm": 1.0252877320294311, "learning_rate": 2.4749905426269545e-06, "loss": 0.0214, "step": 156525 }, { "epoch": 0.6531281554856423, "grad_norm": 0.7201987659189685, "learning_rate": 2.474951013046053e-06, "loss": 0.0159, "step": 156530 }, { "epoch": 0.6531490182006325, "grad_norm": 1.1816387572893305, "learning_rate": 2.474911485359144e-06, "loss": 0.0409, "step": 156535 }, { "epoch": 0.6531698809156228, "grad_norm": 0.653441329510225, "learning_rate": 2.4748719595660766e-06, "loss": 0.0175, "step": 156540 }, { "epoch": 0.6531907436306131, "grad_norm": 0.3725766879486404, "learning_rate": 2.474832435666699e-06, "loss": 0.0185, "step": 156545 }, { "epoch": 0.6532116063456034, "grad_norm": 1.239214858107476, "learning_rate": 2.47479291366086e-06, "loss": 0.0225, "step": 156550 }, { "epoch": 0.6532324690605936, "grad_norm": 0.3935314791965183, "learning_rate": 2.4747533935484088e-06, "loss": 0.0337, "step": 156555 }, { "epoch": 0.653253331775584, "grad_norm": 0.5466769078023195, "learning_rate": 2.4747138753291937e-06, "loss": 0.0159, "step": 156560 }, { "epoch": 0.6532741944905742, "grad_norm": 0.5033469673972839, "learning_rate": 2.4746743590030646e-06, "loss": 0.0225, "step": 156565 }, { "epoch": 0.6532950572055645, "grad_norm": 0.2958474660743007, "learning_rate": 2.474634844569869e-06, "loss": 0.013, "step": 156570 }, { "epoch": 0.6533159199205548, "grad_norm": 1.1814001174144317, "learning_rate": 2.474595332029457e-06, "loss": 0.0282, "step": 156575 }, { "epoch": 0.6533367826355451, "grad_norm": 0.6469473792657694, "learning_rate": 2.474555821381677e-06, "loss": 0.0148, "step": 156580 }, { "epoch": 0.6533576453505353, "grad_norm": 0.4998784383617289, "learning_rate": 2.4745163126263776e-06, "loss": 0.0181, "step": 156585 }, { "epoch": 0.6533785080655257, "grad_norm": 0.5472913976100079, "learning_rate": 2.474476805763408e-06, "loss": 0.0263, "step": 156590 }, { "epoch": 0.6533993707805159, "grad_norm": 0.7872585918301479, "learning_rate": 2.474437300792617e-06, "loss": 0.0285, "step": 156595 }, { "epoch": 0.6534202334955062, "grad_norm": 0.9256838135792925, "learning_rate": 2.474397797713854e-06, "loss": 0.0233, "step": 156600 }, { "epoch": 0.6534410962104964, "grad_norm": 0.6227036692852095, "learning_rate": 2.474358296526968e-06, "loss": 0.0273, "step": 156605 }, { "epoch": 0.6534619589254868, "grad_norm": 0.6605939915681894, "learning_rate": 2.474318797231807e-06, "loss": 0.0187, "step": 156610 }, { "epoch": 0.653482821640477, "grad_norm": 1.1396976708328852, "learning_rate": 2.4742792998282205e-06, "loss": 0.0202, "step": 156615 }, { "epoch": 0.6535036843554672, "grad_norm": 0.364308540840469, "learning_rate": 2.4742398043160584e-06, "loss": 0.0188, "step": 156620 }, { "epoch": 0.6535245470704576, "grad_norm": 0.6640247561430966, "learning_rate": 2.474200310695169e-06, "loss": 0.023, "step": 156625 }, { "epoch": 0.6535454097854478, "grad_norm": 1.0344009335590676, "learning_rate": 2.474160818965401e-06, "loss": 0.0289, "step": 156630 }, { "epoch": 0.6535662725004381, "grad_norm": 1.0974218523599015, "learning_rate": 2.4741213291266042e-06, "loss": 0.0248, "step": 156635 }, { "epoch": 0.6535871352154284, "grad_norm": 0.6067197131070846, "learning_rate": 2.4740818411786276e-06, "loss": 0.0244, "step": 156640 }, { "epoch": 0.6536079979304187, "grad_norm": 0.40088339574004567, "learning_rate": 2.4740423551213196e-06, "loss": 0.0177, "step": 156645 }, { "epoch": 0.6536288606454089, "grad_norm": 0.49624669320997755, "learning_rate": 2.47400287095453e-06, "loss": 0.0203, "step": 156650 }, { "epoch": 0.6536497233603993, "grad_norm": 0.7354159911427309, "learning_rate": 2.473963388678108e-06, "loss": 0.0382, "step": 156655 }, { "epoch": 0.6536705860753895, "grad_norm": 0.5168005082736744, "learning_rate": 2.4739239082919024e-06, "loss": 0.0267, "step": 156660 }, { "epoch": 0.6536914487903798, "grad_norm": 0.3299697808018044, "learning_rate": 2.473884429795763e-06, "loss": 0.0245, "step": 156665 }, { "epoch": 0.65371231150537, "grad_norm": 0.7500664400202409, "learning_rate": 2.4738449531895383e-06, "loss": 0.0178, "step": 156670 }, { "epoch": 0.6537331742203604, "grad_norm": 0.6474465011876752, "learning_rate": 2.4738054784730773e-06, "loss": 0.0217, "step": 156675 }, { "epoch": 0.6537540369353506, "grad_norm": 0.2694753193062513, "learning_rate": 2.47376600564623e-06, "loss": 0.0195, "step": 156680 }, { "epoch": 0.6537748996503409, "grad_norm": 0.41652504322653866, "learning_rate": 2.473726534708845e-06, "loss": 0.0206, "step": 156685 }, { "epoch": 0.6537957623653312, "grad_norm": 0.4845770494279048, "learning_rate": 2.4736870656607726e-06, "loss": 0.0211, "step": 156690 }, { "epoch": 0.6538166250803215, "grad_norm": 0.6881419560994274, "learning_rate": 2.4736475985018607e-06, "loss": 0.0253, "step": 156695 }, { "epoch": 0.6538374877953117, "grad_norm": 0.8555747354034522, "learning_rate": 2.47360813323196e-06, "loss": 0.0176, "step": 156700 }, { "epoch": 0.653858350510302, "grad_norm": 0.737754160200266, "learning_rate": 2.473568669850919e-06, "loss": 0.0284, "step": 156705 }, { "epoch": 0.6538792132252923, "grad_norm": 0.7379761840540242, "learning_rate": 2.4735292083585865e-06, "loss": 0.0299, "step": 156710 }, { "epoch": 0.6539000759402825, "grad_norm": 0.6789502218890558, "learning_rate": 2.4734897487548124e-06, "loss": 0.0223, "step": 156715 }, { "epoch": 0.6539209386552728, "grad_norm": 0.9241484334288217, "learning_rate": 2.4734502910394466e-06, "loss": 0.0251, "step": 156720 }, { "epoch": 0.6539418013702631, "grad_norm": 0.6230911293722974, "learning_rate": 2.473410835212338e-06, "loss": 0.0176, "step": 156725 }, { "epoch": 0.6539626640852534, "grad_norm": 0.7403928223000121, "learning_rate": 2.473371381273336e-06, "loss": 0.0229, "step": 156730 }, { "epoch": 0.6539835268002436, "grad_norm": 0.6182212470923283, "learning_rate": 2.47333192922229e-06, "loss": 0.0251, "step": 156735 }, { "epoch": 0.654004389515234, "grad_norm": 0.7890046694037137, "learning_rate": 2.4732924790590497e-06, "loss": 0.0249, "step": 156740 }, { "epoch": 0.6540252522302242, "grad_norm": 0.5623292566624439, "learning_rate": 2.473253030783464e-06, "loss": 0.0213, "step": 156745 }, { "epoch": 0.6540461149452145, "grad_norm": 0.5250221577994324, "learning_rate": 2.4732135843953827e-06, "loss": 0.024, "step": 156750 }, { "epoch": 0.6540669776602048, "grad_norm": 0.9650526006171268, "learning_rate": 2.473174139894655e-06, "loss": 0.0286, "step": 156755 }, { "epoch": 0.6540878403751951, "grad_norm": 0.434914471805562, "learning_rate": 2.473134697281132e-06, "loss": 0.015, "step": 156760 }, { "epoch": 0.6541087030901853, "grad_norm": 0.6293727551091883, "learning_rate": 2.473095256554661e-06, "loss": 0.0297, "step": 156765 }, { "epoch": 0.6541295658051757, "grad_norm": 0.9046894830702531, "learning_rate": 2.473055817715092e-06, "loss": 0.025, "step": 156770 }, { "epoch": 0.6541504285201659, "grad_norm": 0.7099788093089585, "learning_rate": 2.4730163807622754e-06, "loss": 0.0294, "step": 156775 }, { "epoch": 0.6541712912351562, "grad_norm": 0.6840445105915607, "learning_rate": 2.4729769456960607e-06, "loss": 0.0176, "step": 156780 }, { "epoch": 0.6541921539501464, "grad_norm": 0.5330023549002503, "learning_rate": 2.4729375125162964e-06, "loss": 0.0295, "step": 156785 }, { "epoch": 0.6542130166651368, "grad_norm": 0.8981097647226018, "learning_rate": 2.472898081222833e-06, "loss": 0.0191, "step": 156790 }, { "epoch": 0.654233879380127, "grad_norm": 0.8550714476555649, "learning_rate": 2.4728586518155205e-06, "loss": 0.0247, "step": 156795 }, { "epoch": 0.6542547420951172, "grad_norm": 0.6604161114065781, "learning_rate": 2.4728192242942077e-06, "loss": 0.0202, "step": 156800 }, { "epoch": 0.6542756048101076, "grad_norm": 0.7828959603028269, "learning_rate": 2.4727797986587448e-06, "loss": 0.024, "step": 156805 }, { "epoch": 0.6542964675250978, "grad_norm": 0.816411235793333, "learning_rate": 2.4727403749089805e-06, "loss": 0.0194, "step": 156810 }, { "epoch": 0.6543173302400881, "grad_norm": 0.5108749379706146, "learning_rate": 2.4727009530447653e-06, "loss": 0.0204, "step": 156815 }, { "epoch": 0.6543381929550784, "grad_norm": 1.775420006904822, "learning_rate": 2.4726615330659497e-06, "loss": 0.0227, "step": 156820 }, { "epoch": 0.6543590556700687, "grad_norm": 0.5522386806153986, "learning_rate": 2.4726221149723818e-06, "loss": 0.031, "step": 156825 }, { "epoch": 0.6543799183850589, "grad_norm": 1.094397812090222, "learning_rate": 2.472582698763912e-06, "loss": 0.0161, "step": 156830 }, { "epoch": 0.6544007811000493, "grad_norm": 0.7905228029171538, "learning_rate": 2.47254328444039e-06, "loss": 0.0274, "step": 156835 }, { "epoch": 0.6544216438150395, "grad_norm": 0.30424024009603484, "learning_rate": 2.4725038720016662e-06, "loss": 0.0192, "step": 156840 }, { "epoch": 0.6544425065300298, "grad_norm": 0.3466394351118992, "learning_rate": 2.4724644614475898e-06, "loss": 0.022, "step": 156845 }, { "epoch": 0.65446336924502, "grad_norm": 0.4190791898491016, "learning_rate": 2.4724250527780104e-06, "loss": 0.0204, "step": 156850 }, { "epoch": 0.6544842319600104, "grad_norm": 1.4314105066622025, "learning_rate": 2.472385645992778e-06, "loss": 0.0353, "step": 156855 }, { "epoch": 0.6545050946750006, "grad_norm": 0.6002735598181141, "learning_rate": 2.472346241091743e-06, "loss": 0.0204, "step": 156860 }, { "epoch": 0.6545259573899909, "grad_norm": 0.8811478545762557, "learning_rate": 2.472306838074754e-06, "loss": 0.0217, "step": 156865 }, { "epoch": 0.6545468201049812, "grad_norm": 0.45377457468685695, "learning_rate": 2.472267436941662e-06, "loss": 0.0236, "step": 156870 }, { "epoch": 0.6545676828199715, "grad_norm": 0.42391811090793785, "learning_rate": 2.4722280376923166e-06, "loss": 0.0215, "step": 156875 }, { "epoch": 0.6545885455349617, "grad_norm": 0.8460993132408409, "learning_rate": 2.472188640326568e-06, "loss": 0.0283, "step": 156880 }, { "epoch": 0.6546094082499521, "grad_norm": 0.5469130736541474, "learning_rate": 2.4721492448442648e-06, "loss": 0.0236, "step": 156885 }, { "epoch": 0.6546302709649423, "grad_norm": 0.8117728282237041, "learning_rate": 2.472109851245259e-06, "loss": 0.0217, "step": 156890 }, { "epoch": 0.6546511336799326, "grad_norm": 0.4855325725374559, "learning_rate": 2.472070459529399e-06, "loss": 0.0188, "step": 156895 }, { "epoch": 0.6546719963949228, "grad_norm": 0.7034650190217344, "learning_rate": 2.4720310696965353e-06, "loss": 0.0208, "step": 156900 }, { "epoch": 0.6546928591099132, "grad_norm": 0.760950319568177, "learning_rate": 2.471991681746517e-06, "loss": 0.0235, "step": 156905 }, { "epoch": 0.6547137218249034, "grad_norm": 1.0747619221920863, "learning_rate": 2.471952295679196e-06, "loss": 0.0199, "step": 156910 }, { "epoch": 0.6547345845398936, "grad_norm": 0.4335495693416191, "learning_rate": 2.4719129114944207e-06, "loss": 0.0221, "step": 156915 }, { "epoch": 0.654755447254884, "grad_norm": 1.0354910385460454, "learning_rate": 2.4718735291920414e-06, "loss": 0.0201, "step": 156920 }, { "epoch": 0.6547763099698742, "grad_norm": 0.24312206825998478, "learning_rate": 2.4718341487719087e-06, "loss": 0.0194, "step": 156925 }, { "epoch": 0.6547971726848645, "grad_norm": 0.2826321876616485, "learning_rate": 2.471794770233873e-06, "loss": 0.0134, "step": 156930 }, { "epoch": 0.6548180353998548, "grad_norm": 0.7110943594206485, "learning_rate": 2.4717553935777833e-06, "loss": 0.0234, "step": 156935 }, { "epoch": 0.6548388981148451, "grad_norm": 0.4994520231369302, "learning_rate": 2.47171601880349e-06, "loss": 0.0374, "step": 156940 }, { "epoch": 0.6548597608298353, "grad_norm": 0.9422712873105121, "learning_rate": 2.4716766459108436e-06, "loss": 0.0344, "step": 156945 }, { "epoch": 0.6548806235448257, "grad_norm": 0.6720950536478195, "learning_rate": 2.4716372748996935e-06, "loss": 0.023, "step": 156950 }, { "epoch": 0.6549014862598159, "grad_norm": 0.7015457626850614, "learning_rate": 2.471597905769891e-06, "loss": 0.0283, "step": 156955 }, { "epoch": 0.6549223489748062, "grad_norm": 0.6775974371118058, "learning_rate": 2.471558538521285e-06, "loss": 0.0247, "step": 156960 }, { "epoch": 0.6549432116897964, "grad_norm": 0.4966693127937226, "learning_rate": 2.471519173153727e-06, "loss": 0.0243, "step": 156965 }, { "epoch": 0.6549640744047868, "grad_norm": 0.5546952843234821, "learning_rate": 2.4714798096670663e-06, "loss": 0.0248, "step": 156970 }, { "epoch": 0.654984937119777, "grad_norm": 0.6760491741651474, "learning_rate": 2.4714404480611536e-06, "loss": 0.0417, "step": 156975 }, { "epoch": 0.6550057998347673, "grad_norm": 0.4202202480736916, "learning_rate": 2.4714010883358387e-06, "loss": 0.0183, "step": 156980 }, { "epoch": 0.6550266625497576, "grad_norm": 0.41602484630231057, "learning_rate": 2.4713617304909717e-06, "loss": 0.0188, "step": 156985 }, { "epoch": 0.6550475252647479, "grad_norm": 0.4369897904775262, "learning_rate": 2.471322374526404e-06, "loss": 0.0281, "step": 156990 }, { "epoch": 0.6550683879797381, "grad_norm": 0.4793724019453882, "learning_rate": 2.4712830204419845e-06, "loss": 0.0229, "step": 156995 }, { "epoch": 0.6550892506947285, "grad_norm": 0.7295602824373596, "learning_rate": 2.4712436682375645e-06, "loss": 0.0205, "step": 157000 }, { "epoch": 0.6551101134097187, "grad_norm": 0.557540515533241, "learning_rate": 2.471204317912994e-06, "loss": 0.0246, "step": 157005 }, { "epoch": 0.6551309761247089, "grad_norm": 0.9747884158515383, "learning_rate": 2.471164969468123e-06, "loss": 0.0244, "step": 157010 }, { "epoch": 0.6551518388396993, "grad_norm": 0.811489688618862, "learning_rate": 2.4711256229028024e-06, "loss": 0.0231, "step": 157015 }, { "epoch": 0.6551727015546895, "grad_norm": 0.5679386596770295, "learning_rate": 2.471086278216882e-06, "loss": 0.0228, "step": 157020 }, { "epoch": 0.6551935642696798, "grad_norm": 0.7165308431213846, "learning_rate": 2.471046935410213e-06, "loss": 0.0201, "step": 157025 }, { "epoch": 0.65521442698467, "grad_norm": 0.6842508130414856, "learning_rate": 2.471007594482645e-06, "loss": 0.0184, "step": 157030 }, { "epoch": 0.6552352896996604, "grad_norm": 0.607771728464991, "learning_rate": 2.4709682554340284e-06, "loss": 0.0206, "step": 157035 }, { "epoch": 0.6552561524146506, "grad_norm": 0.5176001969272129, "learning_rate": 2.470928918264214e-06, "loss": 0.0205, "step": 157040 }, { "epoch": 0.6552770151296409, "grad_norm": 0.323009879436654, "learning_rate": 2.4708895829730533e-06, "loss": 0.0199, "step": 157045 }, { "epoch": 0.6552978778446312, "grad_norm": 0.840111238404386, "learning_rate": 2.4708502495603946e-06, "loss": 0.0277, "step": 157050 }, { "epoch": 0.6553187405596215, "grad_norm": 0.30597906964072813, "learning_rate": 2.4708109180260896e-06, "loss": 0.0294, "step": 157055 }, { "epoch": 0.6553396032746117, "grad_norm": 0.6978601853870915, "learning_rate": 2.470771588369989e-06, "loss": 0.0175, "step": 157060 }, { "epoch": 0.6553604659896021, "grad_norm": 0.8754680478192345, "learning_rate": 2.470732260591943e-06, "loss": 0.019, "step": 157065 }, { "epoch": 0.6553813287045923, "grad_norm": 0.8027504006028094, "learning_rate": 2.4706929346918018e-06, "loss": 0.0223, "step": 157070 }, { "epoch": 0.6554021914195826, "grad_norm": 0.8903153544063254, "learning_rate": 2.4706536106694165e-06, "loss": 0.028, "step": 157075 }, { "epoch": 0.6554230541345728, "grad_norm": 0.6729832449172045, "learning_rate": 2.4706142885246372e-06, "loss": 0.0209, "step": 157080 }, { "epoch": 0.6554439168495632, "grad_norm": 0.5210786067522462, "learning_rate": 2.4705749682573154e-06, "loss": 0.0225, "step": 157085 }, { "epoch": 0.6554647795645534, "grad_norm": 0.657207079629341, "learning_rate": 2.4705356498673007e-06, "loss": 0.0275, "step": 157090 }, { "epoch": 0.6554856422795436, "grad_norm": 0.6698115796049738, "learning_rate": 2.4704963333544444e-06, "loss": 0.0238, "step": 157095 }, { "epoch": 0.655506504994534, "grad_norm": 0.6111732390724184, "learning_rate": 2.470457018718596e-06, "loss": 0.0178, "step": 157100 }, { "epoch": 0.6555273677095242, "grad_norm": 0.7250416772713711, "learning_rate": 2.4704177059596077e-06, "loss": 0.0189, "step": 157105 }, { "epoch": 0.6555482304245145, "grad_norm": 0.7604183158892659, "learning_rate": 2.470378395077329e-06, "loss": 0.0302, "step": 157110 }, { "epoch": 0.6555690931395048, "grad_norm": 0.5144212101603198, "learning_rate": 2.470339086071611e-06, "loss": 0.0312, "step": 157115 }, { "epoch": 0.6555899558544951, "grad_norm": 2.553382529589316, "learning_rate": 2.470299778942305e-06, "loss": 0.0251, "step": 157120 }, { "epoch": 0.6556108185694853, "grad_norm": 0.49438752107564415, "learning_rate": 2.4702604736892603e-06, "loss": 0.0244, "step": 157125 }, { "epoch": 0.6556316812844757, "grad_norm": 0.9606917210426009, "learning_rate": 2.4702211703123294e-06, "loss": 0.0222, "step": 157130 }, { "epoch": 0.6556525439994659, "grad_norm": 0.82600243861579, "learning_rate": 2.4701818688113622e-06, "loss": 0.0316, "step": 157135 }, { "epoch": 0.6556734067144562, "grad_norm": 0.6364212302802161, "learning_rate": 2.4701425691862084e-06, "loss": 0.0235, "step": 157140 }, { "epoch": 0.6556942694294464, "grad_norm": 0.6956600313045033, "learning_rate": 2.4701032714367204e-06, "loss": 0.0198, "step": 157145 }, { "epoch": 0.6557151321444368, "grad_norm": 0.4205042093998502, "learning_rate": 2.4700639755627485e-06, "loss": 0.0241, "step": 157150 }, { "epoch": 0.655735994859427, "grad_norm": 0.8095393616389145, "learning_rate": 2.470024681564144e-06, "loss": 0.0243, "step": 157155 }, { "epoch": 0.6557568575744173, "grad_norm": 0.34338656982899096, "learning_rate": 2.469985389440756e-06, "loss": 0.0183, "step": 157160 }, { "epoch": 0.6557777202894076, "grad_norm": 0.7276082191409058, "learning_rate": 2.469946099192437e-06, "loss": 0.0193, "step": 157165 }, { "epoch": 0.6557985830043979, "grad_norm": 0.6912502028750855, "learning_rate": 2.469906810819038e-06, "loss": 0.0244, "step": 157170 }, { "epoch": 0.6558194457193881, "grad_norm": 0.9885481918635274, "learning_rate": 2.4698675243204078e-06, "loss": 0.0198, "step": 157175 }, { "epoch": 0.6558403084343785, "grad_norm": 0.5074107452892699, "learning_rate": 2.4698282396964e-06, "loss": 0.0175, "step": 157180 }, { "epoch": 0.6558611711493687, "grad_norm": 0.7310812420292743, "learning_rate": 2.469788956946864e-06, "loss": 0.0187, "step": 157185 }, { "epoch": 0.655882033864359, "grad_norm": 0.9576218293809196, "learning_rate": 2.469749676071651e-06, "loss": 0.0204, "step": 157190 }, { "epoch": 0.6559028965793493, "grad_norm": 0.36560439829397146, "learning_rate": 2.4697103970706122e-06, "loss": 0.0176, "step": 157195 }, { "epoch": 0.6559237592943395, "grad_norm": 0.7465164460840971, "learning_rate": 2.469671119943598e-06, "loss": 0.0251, "step": 157200 }, { "epoch": 0.6559446220093298, "grad_norm": 1.2323715645923607, "learning_rate": 2.46963184469046e-06, "loss": 0.0303, "step": 157205 }, { "epoch": 0.65596548472432, "grad_norm": 0.5869576768103462, "learning_rate": 2.469592571311049e-06, "loss": 0.0293, "step": 157210 }, { "epoch": 0.6559863474393104, "grad_norm": 0.5356005306773421, "learning_rate": 2.469553299805216e-06, "loss": 0.0233, "step": 157215 }, { "epoch": 0.6560072101543006, "grad_norm": 0.8402201706103423, "learning_rate": 2.4695140301728118e-06, "loss": 0.0231, "step": 157220 }, { "epoch": 0.6560280728692909, "grad_norm": 0.8240174735068588, "learning_rate": 2.4694747624136875e-06, "loss": 0.0223, "step": 157225 }, { "epoch": 0.6560489355842812, "grad_norm": 0.732657860247672, "learning_rate": 2.4694354965276948e-06, "loss": 0.0269, "step": 157230 }, { "epoch": 0.6560697982992715, "grad_norm": 0.26607458190817057, "learning_rate": 2.469396232514684e-06, "loss": 0.0201, "step": 157235 }, { "epoch": 0.6560906610142617, "grad_norm": 0.5185474698665347, "learning_rate": 2.4693569703745063e-06, "loss": 0.0313, "step": 157240 }, { "epoch": 0.6561115237292521, "grad_norm": 0.8501875622464362, "learning_rate": 2.4693177101070136e-06, "loss": 0.028, "step": 157245 }, { "epoch": 0.6561323864442423, "grad_norm": 1.0425556133124416, "learning_rate": 2.4692784517120562e-06, "loss": 0.0263, "step": 157250 }, { "epoch": 0.6561532491592326, "grad_norm": 0.5874641581283637, "learning_rate": 2.4692391951894855e-06, "loss": 0.0164, "step": 157255 }, { "epoch": 0.6561741118742228, "grad_norm": 0.7698725636915966, "learning_rate": 2.469199940539153e-06, "loss": 0.0233, "step": 157260 }, { "epoch": 0.6561949745892132, "grad_norm": 0.8055648488719964, "learning_rate": 2.4691606877609094e-06, "loss": 0.0211, "step": 157265 }, { "epoch": 0.6562158373042034, "grad_norm": 0.7163071599744831, "learning_rate": 2.469121436854606e-06, "loss": 0.0245, "step": 157270 }, { "epoch": 0.6562367000191937, "grad_norm": 0.7144997194353987, "learning_rate": 2.469082187820094e-06, "loss": 0.0214, "step": 157275 }, { "epoch": 0.656257562734184, "grad_norm": 0.38171166264058753, "learning_rate": 2.469042940657225e-06, "loss": 0.0218, "step": 157280 }, { "epoch": 0.6562784254491743, "grad_norm": 0.5354693380373416, "learning_rate": 2.4690036953658496e-06, "loss": 0.018, "step": 157285 }, { "epoch": 0.6562992881641645, "grad_norm": 0.5258423783039341, "learning_rate": 2.46896445194582e-06, "loss": 0.0237, "step": 157290 }, { "epoch": 0.6563201508791549, "grad_norm": 0.5425665035524182, "learning_rate": 2.468925210396986e-06, "loss": 0.0267, "step": 157295 }, { "epoch": 0.6563410135941451, "grad_norm": 0.38006899678089473, "learning_rate": 2.4688859707192008e-06, "loss": 0.019, "step": 157300 }, { "epoch": 0.6563618763091353, "grad_norm": 0.30095193273745846, "learning_rate": 2.468846732912315e-06, "loss": 0.0173, "step": 157305 }, { "epoch": 0.6563827390241257, "grad_norm": 0.5799513947044389, "learning_rate": 2.468807496976179e-06, "loss": 0.0277, "step": 157310 }, { "epoch": 0.6564036017391159, "grad_norm": 0.9351052408950169, "learning_rate": 2.468768262910645e-06, "loss": 0.0332, "step": 157315 }, { "epoch": 0.6564244644541062, "grad_norm": 1.009186700068842, "learning_rate": 2.468729030715564e-06, "loss": 0.0251, "step": 157320 }, { "epoch": 0.6564453271690964, "grad_norm": 1.2141674564026566, "learning_rate": 2.468689800390788e-06, "loss": 0.0264, "step": 157325 }, { "epoch": 0.6564661898840868, "grad_norm": 0.9564402793405737, "learning_rate": 2.468650571936168e-06, "loss": 0.0209, "step": 157330 }, { "epoch": 0.656487052599077, "grad_norm": 1.026225615201625, "learning_rate": 2.468611345351555e-06, "loss": 0.0261, "step": 157335 }, { "epoch": 0.6565079153140673, "grad_norm": 0.5463356175994087, "learning_rate": 2.468572120636801e-06, "loss": 0.025, "step": 157340 }, { "epoch": 0.6565287780290576, "grad_norm": 0.9179951402156253, "learning_rate": 2.4685328977917577e-06, "loss": 0.022, "step": 157345 }, { "epoch": 0.6565496407440479, "grad_norm": 1.1740503419459933, "learning_rate": 2.468493676816276e-06, "loss": 0.0218, "step": 157350 }, { "epoch": 0.6565705034590381, "grad_norm": 0.5031376253406413, "learning_rate": 2.468454457710207e-06, "loss": 0.0175, "step": 157355 }, { "epoch": 0.6565913661740285, "grad_norm": 0.44112147609483204, "learning_rate": 2.468415240473403e-06, "loss": 0.0199, "step": 157360 }, { "epoch": 0.6566122288890187, "grad_norm": 0.746528934997708, "learning_rate": 2.4683760251057158e-06, "loss": 0.0246, "step": 157365 }, { "epoch": 0.656633091604009, "grad_norm": 0.7025119138570478, "learning_rate": 2.468336811606996e-06, "loss": 0.0285, "step": 157370 }, { "epoch": 0.6566539543189993, "grad_norm": 0.7721759238729637, "learning_rate": 2.468297599977096e-06, "loss": 0.0207, "step": 157375 }, { "epoch": 0.6566748170339896, "grad_norm": 1.3728532860831493, "learning_rate": 2.468258390215866e-06, "loss": 0.0234, "step": 157380 }, { "epoch": 0.6566956797489798, "grad_norm": 0.7932110084262306, "learning_rate": 2.468219182323159e-06, "loss": 0.0287, "step": 157385 }, { "epoch": 0.65671654246397, "grad_norm": 0.5221153896669634, "learning_rate": 2.4681799762988266e-06, "loss": 0.0223, "step": 157390 }, { "epoch": 0.6567374051789604, "grad_norm": 0.7363329109909383, "learning_rate": 2.4681407721427194e-06, "loss": 0.0256, "step": 157395 }, { "epoch": 0.6567582678939506, "grad_norm": 0.5756005111224283, "learning_rate": 2.468101569854689e-06, "loss": 0.0171, "step": 157400 }, { "epoch": 0.6567791306089409, "grad_norm": 0.6775664740058521, "learning_rate": 2.4680623694345884e-06, "loss": 0.0223, "step": 157405 }, { "epoch": 0.6567999933239312, "grad_norm": 0.6911763587033269, "learning_rate": 2.4680231708822686e-06, "loss": 0.0268, "step": 157410 }, { "epoch": 0.6568208560389215, "grad_norm": 0.3800026699891746, "learning_rate": 2.4679839741975806e-06, "loss": 0.0237, "step": 157415 }, { "epoch": 0.6568417187539117, "grad_norm": 1.8406413400276338, "learning_rate": 2.4679447793803767e-06, "loss": 0.0249, "step": 157420 }, { "epoch": 0.6568625814689021, "grad_norm": 0.35781191147115354, "learning_rate": 2.467905586430509e-06, "loss": 0.022, "step": 157425 }, { "epoch": 0.6568834441838923, "grad_norm": 0.46667748183839597, "learning_rate": 2.467866395347828e-06, "loss": 0.0203, "step": 157430 }, { "epoch": 0.6569043068988826, "grad_norm": 0.8617904998308227, "learning_rate": 2.467827206132187e-06, "loss": 0.0263, "step": 157435 }, { "epoch": 0.6569251696138728, "grad_norm": 1.174277866703321, "learning_rate": 2.4677880187834367e-06, "loss": 0.0226, "step": 157440 }, { "epoch": 0.6569460323288632, "grad_norm": 0.4823585353092567, "learning_rate": 2.4677488333014286e-06, "loss": 0.0237, "step": 157445 }, { "epoch": 0.6569668950438534, "grad_norm": 0.6895573351880709, "learning_rate": 2.467709649686016e-06, "loss": 0.024, "step": 157450 }, { "epoch": 0.6569877577588437, "grad_norm": 1.7133789698554127, "learning_rate": 2.467670467937049e-06, "loss": 0.0248, "step": 157455 }, { "epoch": 0.657008620473834, "grad_norm": 0.9125596565986686, "learning_rate": 2.467631288054381e-06, "loss": 0.0243, "step": 157460 }, { "epoch": 0.6570294831888243, "grad_norm": 1.2928732059912402, "learning_rate": 2.4675921100378627e-06, "loss": 0.0321, "step": 157465 }, { "epoch": 0.6570503459038145, "grad_norm": 0.8732061503402344, "learning_rate": 2.467552933887346e-06, "loss": 0.0249, "step": 157470 }, { "epoch": 0.6570712086188049, "grad_norm": 0.5443825168846836, "learning_rate": 2.467513759602683e-06, "loss": 0.0253, "step": 157475 }, { "epoch": 0.6570920713337951, "grad_norm": 0.7771516150461786, "learning_rate": 2.467474587183727e-06, "loss": 0.0222, "step": 157480 }, { "epoch": 0.6571129340487853, "grad_norm": 1.8721412212708999, "learning_rate": 2.467435416630327e-06, "loss": 0.0269, "step": 157485 }, { "epoch": 0.6571337967637757, "grad_norm": 0.6599222989108964, "learning_rate": 2.4673962479423376e-06, "loss": 0.0188, "step": 157490 }, { "epoch": 0.657154659478766, "grad_norm": 1.209306616631796, "learning_rate": 2.4673570811196095e-06, "loss": 0.023, "step": 157495 }, { "epoch": 0.6571755221937562, "grad_norm": 0.8424135875682229, "learning_rate": 2.4673179161619945e-06, "loss": 0.0183, "step": 157500 }, { "epoch": 0.6571963849087464, "grad_norm": 0.5535269894674231, "learning_rate": 2.467278753069345e-06, "loss": 0.0243, "step": 157505 }, { "epoch": 0.6572172476237368, "grad_norm": 0.5196775864496771, "learning_rate": 2.4672395918415124e-06, "loss": 0.0165, "step": 157510 }, { "epoch": 0.657238110338727, "grad_norm": 0.6474721455507787, "learning_rate": 2.4672004324783506e-06, "loss": 0.023, "step": 157515 }, { "epoch": 0.6572589730537173, "grad_norm": 0.8643337040852127, "learning_rate": 2.467161274979709e-06, "loss": 0.0209, "step": 157520 }, { "epoch": 0.6572798357687076, "grad_norm": 0.7506052013601325, "learning_rate": 2.467122119345441e-06, "loss": 0.0235, "step": 157525 }, { "epoch": 0.6573006984836979, "grad_norm": 0.8595111372295396, "learning_rate": 2.467082965575399e-06, "loss": 0.0219, "step": 157530 }, { "epoch": 0.6573215611986881, "grad_norm": 0.5337050070576901, "learning_rate": 2.4670438136694346e-06, "loss": 0.0139, "step": 157535 }, { "epoch": 0.6573424239136785, "grad_norm": 0.3908019280809955, "learning_rate": 2.4670046636274e-06, "loss": 0.0212, "step": 157540 }, { "epoch": 0.6573632866286687, "grad_norm": 0.5508202544735973, "learning_rate": 2.466965515449147e-06, "loss": 0.0197, "step": 157545 }, { "epoch": 0.657384149343659, "grad_norm": 0.8375813663822044, "learning_rate": 2.466926369134528e-06, "loss": 0.0244, "step": 157550 }, { "epoch": 0.6574050120586493, "grad_norm": 0.7890682455900303, "learning_rate": 2.4668872246833954e-06, "loss": 0.0311, "step": 157555 }, { "epoch": 0.6574258747736396, "grad_norm": 0.570773318555066, "learning_rate": 2.4668480820956003e-06, "loss": 0.0278, "step": 157560 }, { "epoch": 0.6574467374886298, "grad_norm": 0.8150868087519485, "learning_rate": 2.466808941370996e-06, "loss": 0.0205, "step": 157565 }, { "epoch": 0.65746760020362, "grad_norm": 1.031477810916248, "learning_rate": 2.4667698025094346e-06, "loss": 0.0178, "step": 157570 }, { "epoch": 0.6574884629186104, "grad_norm": 0.7338246374627436, "learning_rate": 2.466730665510768e-06, "loss": 0.0234, "step": 157575 }, { "epoch": 0.6575093256336006, "grad_norm": 0.957709742334154, "learning_rate": 2.4666915303748485e-06, "loss": 0.0339, "step": 157580 }, { "epoch": 0.6575301883485909, "grad_norm": 0.8164047515967583, "learning_rate": 2.466652397101528e-06, "loss": 0.0265, "step": 157585 }, { "epoch": 0.6575510510635812, "grad_norm": 0.8629056307797632, "learning_rate": 2.466613265690659e-06, "loss": 0.0289, "step": 157590 }, { "epoch": 0.6575719137785715, "grad_norm": 0.9002578136236522, "learning_rate": 2.466574136142094e-06, "loss": 0.022, "step": 157595 }, { "epoch": 0.6575927764935617, "grad_norm": 1.0962954332403714, "learning_rate": 2.4665350084556855e-06, "loss": 0.0269, "step": 157600 }, { "epoch": 0.6576136392085521, "grad_norm": 0.6998692741098707, "learning_rate": 2.4664958826312853e-06, "loss": 0.0221, "step": 157605 }, { "epoch": 0.6576345019235423, "grad_norm": 0.9820610967628752, "learning_rate": 2.4664567586687453e-06, "loss": 0.0194, "step": 157610 }, { "epoch": 0.6576553646385326, "grad_norm": 0.4495236090818195, "learning_rate": 2.4664176365679185e-06, "loss": 0.0225, "step": 157615 }, { "epoch": 0.6576762273535228, "grad_norm": 0.5618046427894816, "learning_rate": 2.4663785163286575e-06, "loss": 0.0282, "step": 157620 }, { "epoch": 0.6576970900685132, "grad_norm": 0.5732986654526447, "learning_rate": 2.466339397950814e-06, "loss": 0.0194, "step": 157625 }, { "epoch": 0.6577179527835034, "grad_norm": 0.5117343986208852, "learning_rate": 2.466300281434241e-06, "loss": 0.0207, "step": 157630 }, { "epoch": 0.6577388154984937, "grad_norm": 1.4583817875432445, "learning_rate": 2.4662611667787904e-06, "loss": 0.0177, "step": 157635 }, { "epoch": 0.657759678213484, "grad_norm": 0.6855499607565011, "learning_rate": 2.4662220539843146e-06, "loss": 0.0228, "step": 157640 }, { "epoch": 0.6577805409284743, "grad_norm": 0.8201924616979657, "learning_rate": 2.466182943050667e-06, "loss": 0.0236, "step": 157645 }, { "epoch": 0.6578014036434645, "grad_norm": 0.608450422007329, "learning_rate": 2.466143833977699e-06, "loss": 0.0255, "step": 157650 }, { "epoch": 0.6578222663584549, "grad_norm": 0.43353038179252085, "learning_rate": 2.466104726765263e-06, "loss": 0.0176, "step": 157655 }, { "epoch": 0.6578431290734451, "grad_norm": 0.7128569296940078, "learning_rate": 2.4660656214132116e-06, "loss": 0.019, "step": 157660 }, { "epoch": 0.6578639917884354, "grad_norm": 0.6809226515568642, "learning_rate": 2.4660265179213983e-06, "loss": 0.0179, "step": 157665 }, { "epoch": 0.6578848545034257, "grad_norm": 0.7022553189576266, "learning_rate": 2.4659874162896754e-06, "loss": 0.0269, "step": 157670 }, { "epoch": 0.657905717218416, "grad_norm": 0.8194036983638922, "learning_rate": 2.4659483165178936e-06, "loss": 0.0236, "step": 157675 }, { "epoch": 0.6579265799334062, "grad_norm": 0.43033592115251446, "learning_rate": 2.4659092186059074e-06, "loss": 0.023, "step": 157680 }, { "epoch": 0.6579474426483964, "grad_norm": 0.9445089814796997, "learning_rate": 2.4658701225535684e-06, "loss": 0.0263, "step": 157685 }, { "epoch": 0.6579683053633868, "grad_norm": 0.7456069986148192, "learning_rate": 2.4658310283607307e-06, "loss": 0.0223, "step": 157690 }, { "epoch": 0.657989168078377, "grad_norm": 0.6022419467040512, "learning_rate": 2.4657919360272445e-06, "loss": 0.0224, "step": 157695 }, { "epoch": 0.6580100307933673, "grad_norm": 0.4055240653984411, "learning_rate": 2.465752845552964e-06, "loss": 0.0207, "step": 157700 }, { "epoch": 0.6580308935083576, "grad_norm": 0.9885830118263169, "learning_rate": 2.4657137569377416e-06, "loss": 0.0265, "step": 157705 }, { "epoch": 0.6580517562233479, "grad_norm": 1.0990938506588321, "learning_rate": 2.4656746701814296e-06, "loss": 0.0285, "step": 157710 }, { "epoch": 0.6580726189383381, "grad_norm": 0.6928131634079752, "learning_rate": 2.465635585283881e-06, "loss": 0.0179, "step": 157715 }, { "epoch": 0.6580934816533285, "grad_norm": 0.46898723832983946, "learning_rate": 2.4655965022449488e-06, "loss": 0.0255, "step": 157720 }, { "epoch": 0.6581143443683187, "grad_norm": 0.7757711895308893, "learning_rate": 2.4655574210644852e-06, "loss": 0.0184, "step": 157725 }, { "epoch": 0.658135207083309, "grad_norm": 0.7277245015143565, "learning_rate": 2.4655183417423425e-06, "loss": 0.0213, "step": 157730 }, { "epoch": 0.6581560697982993, "grad_norm": 0.6288966390806315, "learning_rate": 2.465479264278375e-06, "loss": 0.0187, "step": 157735 }, { "epoch": 0.6581769325132896, "grad_norm": 1.1033043210770266, "learning_rate": 2.4654401886724334e-06, "loss": 0.0181, "step": 157740 }, { "epoch": 0.6581977952282798, "grad_norm": 0.6807663523159255, "learning_rate": 2.465401114924371e-06, "loss": 0.0216, "step": 157745 }, { "epoch": 0.6582186579432701, "grad_norm": 0.5727412978700289, "learning_rate": 2.4653620430340416e-06, "loss": 0.0252, "step": 157750 }, { "epoch": 0.6582395206582604, "grad_norm": 0.6057317425386401, "learning_rate": 2.4653229730012982e-06, "loss": 0.0193, "step": 157755 }, { "epoch": 0.6582603833732507, "grad_norm": 0.47253485705622406, "learning_rate": 2.4652839048259918e-06, "loss": 0.0279, "step": 157760 }, { "epoch": 0.6582812460882409, "grad_norm": 0.8345624246726057, "learning_rate": 2.465244838507976e-06, "loss": 0.0206, "step": 157765 }, { "epoch": 0.6583021088032313, "grad_norm": 1.6083361058076944, "learning_rate": 2.4652057740471053e-06, "loss": 0.0278, "step": 157770 }, { "epoch": 0.6583229715182215, "grad_norm": 0.4220025667287694, "learning_rate": 2.4651667114432305e-06, "loss": 0.0231, "step": 157775 }, { "epoch": 0.6583438342332117, "grad_norm": 0.6412415480815441, "learning_rate": 2.4651276506962044e-06, "loss": 0.023, "step": 157780 }, { "epoch": 0.6583646969482021, "grad_norm": 1.1086561212038937, "learning_rate": 2.465088591805882e-06, "loss": 0.0321, "step": 157785 }, { "epoch": 0.6583855596631923, "grad_norm": 1.2537111951075066, "learning_rate": 2.4650495347721133e-06, "loss": 0.038, "step": 157790 }, { "epoch": 0.6584064223781826, "grad_norm": 0.5172941743670447, "learning_rate": 2.4650104795947536e-06, "loss": 0.0125, "step": 157795 }, { "epoch": 0.6584272850931728, "grad_norm": 1.4037762338642168, "learning_rate": 2.464971426273655e-06, "loss": 0.022, "step": 157800 }, { "epoch": 0.6584481478081632, "grad_norm": 0.7109960025429811, "learning_rate": 2.464932374808671e-06, "loss": 0.0183, "step": 157805 }, { "epoch": 0.6584690105231534, "grad_norm": 0.748047200967129, "learning_rate": 2.464893325199654e-06, "loss": 0.016, "step": 157810 }, { "epoch": 0.6584898732381437, "grad_norm": 0.6831001095510784, "learning_rate": 2.4648542774464566e-06, "loss": 0.024, "step": 157815 }, { "epoch": 0.658510735953134, "grad_norm": 1.0791767512485702, "learning_rate": 2.464815231548933e-06, "loss": 0.0239, "step": 157820 }, { "epoch": 0.6585315986681243, "grad_norm": 1.293510462994428, "learning_rate": 2.4647761875069346e-06, "loss": 0.0239, "step": 157825 }, { "epoch": 0.6585524613831145, "grad_norm": 0.6660738498534329, "learning_rate": 2.464737145320315e-06, "loss": 0.0198, "step": 157830 }, { "epoch": 0.6585733240981049, "grad_norm": 0.6366577031280727, "learning_rate": 2.4646981049889285e-06, "loss": 0.0228, "step": 157835 }, { "epoch": 0.6585941868130951, "grad_norm": 0.6508856307388422, "learning_rate": 2.4646590665126267e-06, "loss": 0.0311, "step": 157840 }, { "epoch": 0.6586150495280854, "grad_norm": 0.5847876834596567, "learning_rate": 2.4646200298912635e-06, "loss": 0.0199, "step": 157845 }, { "epoch": 0.6586359122430757, "grad_norm": 0.547006339039342, "learning_rate": 2.464580995124692e-06, "loss": 0.0161, "step": 157850 }, { "epoch": 0.658656774958066, "grad_norm": 0.5556632840567951, "learning_rate": 2.4645419622127648e-06, "loss": 0.0305, "step": 157855 }, { "epoch": 0.6586776376730562, "grad_norm": 0.3435988729809214, "learning_rate": 2.4645029311553353e-06, "loss": 0.0176, "step": 157860 }, { "epoch": 0.6586985003880464, "grad_norm": 0.6635835499735224, "learning_rate": 2.464463901952257e-06, "loss": 0.0164, "step": 157865 }, { "epoch": 0.6587193631030368, "grad_norm": 0.24703321715412063, "learning_rate": 2.464424874603383e-06, "loss": 0.0207, "step": 157870 }, { "epoch": 0.658740225818027, "grad_norm": 0.7491993441011152, "learning_rate": 2.4643858491085654e-06, "loss": 0.0181, "step": 157875 }, { "epoch": 0.6587610885330173, "grad_norm": 0.34300234864588797, "learning_rate": 2.464346825467659e-06, "loss": 0.0139, "step": 157880 }, { "epoch": 0.6587819512480076, "grad_norm": 0.9335969146137774, "learning_rate": 2.464307803680516e-06, "loss": 0.0322, "step": 157885 }, { "epoch": 0.6588028139629979, "grad_norm": 0.6426876813926986, "learning_rate": 2.46426878374699e-06, "loss": 0.0256, "step": 157890 }, { "epoch": 0.6588236766779881, "grad_norm": 0.7172426690789416, "learning_rate": 2.4642297656669338e-06, "loss": 0.0219, "step": 157895 }, { "epoch": 0.6588445393929785, "grad_norm": 0.7781021249917635, "learning_rate": 2.4641907494402008e-06, "loss": 0.0232, "step": 157900 }, { "epoch": 0.6588654021079687, "grad_norm": 0.6348014270298095, "learning_rate": 2.4641517350666454e-06, "loss": 0.0215, "step": 157905 }, { "epoch": 0.658886264822959, "grad_norm": 0.5254508606240311, "learning_rate": 2.46411272254612e-06, "loss": 0.02, "step": 157910 }, { "epoch": 0.6589071275379493, "grad_norm": 0.7807879601987555, "learning_rate": 2.4640737118784772e-06, "loss": 0.0143, "step": 157915 }, { "epoch": 0.6589279902529396, "grad_norm": 0.6893636279776173, "learning_rate": 2.4640347030635718e-06, "loss": 0.0281, "step": 157920 }, { "epoch": 0.6589488529679298, "grad_norm": 0.8724589984881247, "learning_rate": 2.4639956961012557e-06, "loss": 0.0273, "step": 157925 }, { "epoch": 0.6589697156829201, "grad_norm": 0.6460545321751728, "learning_rate": 2.4639566909913833e-06, "loss": 0.0185, "step": 157930 }, { "epoch": 0.6589905783979104, "grad_norm": 0.5626713535610111, "learning_rate": 2.463917687733808e-06, "loss": 0.0179, "step": 157935 }, { "epoch": 0.6590114411129007, "grad_norm": 0.7891032248305199, "learning_rate": 2.463878686328383e-06, "loss": 0.0227, "step": 157940 }, { "epoch": 0.6590323038278909, "grad_norm": 0.5827244187209626, "learning_rate": 2.4638396867749612e-06, "loss": 0.023, "step": 157945 }, { "epoch": 0.6590531665428813, "grad_norm": 0.7328100227974887, "learning_rate": 2.463800689073396e-06, "loss": 0.024, "step": 157950 }, { "epoch": 0.6590740292578715, "grad_norm": 0.6831958485257518, "learning_rate": 2.463761693223542e-06, "loss": 0.0176, "step": 157955 }, { "epoch": 0.6590948919728618, "grad_norm": 0.6190944981652267, "learning_rate": 2.4637226992252516e-06, "loss": 0.0213, "step": 157960 }, { "epoch": 0.6591157546878521, "grad_norm": 0.38881219831264846, "learning_rate": 2.4636837070783786e-06, "loss": 0.0146, "step": 157965 }, { "epoch": 0.6591366174028424, "grad_norm": 0.8861761077438375, "learning_rate": 2.4636447167827766e-06, "loss": 0.0216, "step": 157970 }, { "epoch": 0.6591574801178326, "grad_norm": 0.7173837055994884, "learning_rate": 2.4636057283382995e-06, "loss": 0.0165, "step": 157975 }, { "epoch": 0.6591783428328228, "grad_norm": 0.7660267760527985, "learning_rate": 2.4635667417448e-06, "loss": 0.0235, "step": 157980 }, { "epoch": 0.6591992055478132, "grad_norm": 0.7953661273670085, "learning_rate": 2.463527757002131e-06, "loss": 0.0178, "step": 157985 }, { "epoch": 0.6592200682628034, "grad_norm": 0.6213063254927277, "learning_rate": 2.463488774110148e-06, "loss": 0.0259, "step": 157990 }, { "epoch": 0.6592409309777937, "grad_norm": 0.661671779727345, "learning_rate": 2.463449793068704e-06, "loss": 0.0258, "step": 157995 }, { "epoch": 0.659261793692784, "grad_norm": 0.6831663272546772, "learning_rate": 2.463410813877652e-06, "loss": 0.0205, "step": 158000 }, { "epoch": 0.6592826564077743, "grad_norm": 1.0674366688523536, "learning_rate": 2.463371836536845e-06, "loss": 0.0257, "step": 158005 }, { "epoch": 0.6593035191227645, "grad_norm": 1.062028652147505, "learning_rate": 2.463332861046139e-06, "loss": 0.0209, "step": 158010 }, { "epoch": 0.6593243818377549, "grad_norm": 1.1045333852375412, "learning_rate": 2.463293887405385e-06, "loss": 0.0224, "step": 158015 }, { "epoch": 0.6593452445527451, "grad_norm": 0.7457090077117539, "learning_rate": 2.463254915614438e-06, "loss": 0.0227, "step": 158020 }, { "epoch": 0.6593661072677354, "grad_norm": 0.7892215773478835, "learning_rate": 2.463215945673152e-06, "loss": 0.0245, "step": 158025 }, { "epoch": 0.6593869699827257, "grad_norm": 1.4261256515086733, "learning_rate": 2.4631769775813795e-06, "loss": 0.0178, "step": 158030 }, { "epoch": 0.659407832697716, "grad_norm": 0.8270414034259738, "learning_rate": 2.463138011338975e-06, "loss": 0.0229, "step": 158035 }, { "epoch": 0.6594286954127062, "grad_norm": 0.6867409565367163, "learning_rate": 2.463099046945792e-06, "loss": 0.0273, "step": 158040 }, { "epoch": 0.6594495581276965, "grad_norm": 0.8811726370893409, "learning_rate": 2.463060084401685e-06, "loss": 0.0248, "step": 158045 }, { "epoch": 0.6594704208426868, "grad_norm": 1.5356785237690955, "learning_rate": 2.4630211237065064e-06, "loss": 0.028, "step": 158050 }, { "epoch": 0.659491283557677, "grad_norm": 1.4241630501186058, "learning_rate": 2.4629821648601104e-06, "loss": 0.0281, "step": 158055 }, { "epoch": 0.6595121462726673, "grad_norm": 0.6869373071295354, "learning_rate": 2.4629432078623514e-06, "loss": 0.0246, "step": 158060 }, { "epoch": 0.6595330089876577, "grad_norm": 0.7206322609291257, "learning_rate": 2.4629042527130835e-06, "loss": 0.0206, "step": 158065 }, { "epoch": 0.6595538717026479, "grad_norm": 0.5376431755918565, "learning_rate": 2.4628652994121584e-06, "loss": 0.023, "step": 158070 }, { "epoch": 0.6595747344176381, "grad_norm": 1.0624409202527667, "learning_rate": 2.462826347959433e-06, "loss": 0.0302, "step": 158075 }, { "epoch": 0.6595955971326285, "grad_norm": 0.6472724467431225, "learning_rate": 2.4627873983547583e-06, "loss": 0.0298, "step": 158080 }, { "epoch": 0.6596164598476187, "grad_norm": 0.6954631094218828, "learning_rate": 2.4627484505979904e-06, "loss": 0.0138, "step": 158085 }, { "epoch": 0.659637322562609, "grad_norm": 0.6853132562417555, "learning_rate": 2.4627095046889816e-06, "loss": 0.0219, "step": 158090 }, { "epoch": 0.6596581852775993, "grad_norm": 0.5949041461493056, "learning_rate": 2.4626705606275867e-06, "loss": 0.0159, "step": 158095 }, { "epoch": 0.6596790479925896, "grad_norm": 1.0712796025201612, "learning_rate": 2.4626316184136588e-06, "loss": 0.0211, "step": 158100 }, { "epoch": 0.6596999107075798, "grad_norm": 0.4715026932495961, "learning_rate": 2.462592678047053e-06, "loss": 0.0195, "step": 158105 }, { "epoch": 0.6597207734225701, "grad_norm": 0.8687224823650678, "learning_rate": 2.4625537395276223e-06, "loss": 0.024, "step": 158110 }, { "epoch": 0.6597416361375604, "grad_norm": 0.32779430077068494, "learning_rate": 2.4625148028552215e-06, "loss": 0.0166, "step": 158115 }, { "epoch": 0.6597624988525507, "grad_norm": 0.7015233047450726, "learning_rate": 2.4624758680297036e-06, "loss": 0.0206, "step": 158120 }, { "epoch": 0.6597833615675409, "grad_norm": 0.2613751684532231, "learning_rate": 2.4624369350509225e-06, "loss": 0.0238, "step": 158125 }, { "epoch": 0.6598042242825313, "grad_norm": 0.7719399657599654, "learning_rate": 2.462398003918734e-06, "loss": 0.0236, "step": 158130 }, { "epoch": 0.6598250869975215, "grad_norm": 1.452242569194895, "learning_rate": 2.46235907463299e-06, "loss": 0.02, "step": 158135 }, { "epoch": 0.6598459497125118, "grad_norm": 0.5139274357194117, "learning_rate": 2.462320147193546e-06, "loss": 0.0241, "step": 158140 }, { "epoch": 0.6598668124275021, "grad_norm": 0.5792899249326403, "learning_rate": 2.4622812216002555e-06, "loss": 0.0231, "step": 158145 }, { "epoch": 0.6598876751424924, "grad_norm": 0.45212874870756303, "learning_rate": 2.462242297852972e-06, "loss": 0.0207, "step": 158150 }, { "epoch": 0.6599085378574826, "grad_norm": 0.9861109575289552, "learning_rate": 2.462203375951551e-06, "loss": 0.0289, "step": 158155 }, { "epoch": 0.6599294005724728, "grad_norm": 0.7235331553038417, "learning_rate": 2.4621644558958447e-06, "loss": 0.0271, "step": 158160 }, { "epoch": 0.6599502632874632, "grad_norm": 0.3399845038066505, "learning_rate": 2.46212553768571e-06, "loss": 0.0129, "step": 158165 }, { "epoch": 0.6599711260024534, "grad_norm": 0.7127002134989676, "learning_rate": 2.4620866213209986e-06, "loss": 0.0193, "step": 158170 }, { "epoch": 0.6599919887174437, "grad_norm": 0.7432857346378283, "learning_rate": 2.4620477068015646e-06, "loss": 0.0146, "step": 158175 }, { "epoch": 0.660012851432434, "grad_norm": 0.24109842263297002, "learning_rate": 2.4620087941272644e-06, "loss": 0.012, "step": 158180 }, { "epoch": 0.6600337141474243, "grad_norm": 0.42806898705044166, "learning_rate": 2.4619698832979498e-06, "loss": 0.0181, "step": 158185 }, { "epoch": 0.6600545768624145, "grad_norm": 0.8117154421799732, "learning_rate": 2.4619309743134763e-06, "loss": 0.0208, "step": 158190 }, { "epoch": 0.6600754395774049, "grad_norm": 0.5150956581019506, "learning_rate": 2.4618920671736983e-06, "loss": 0.0278, "step": 158195 }, { "epoch": 0.6600963022923951, "grad_norm": 0.46136605970200395, "learning_rate": 2.4618531618784692e-06, "loss": 0.0161, "step": 158200 }, { "epoch": 0.6601171650073854, "grad_norm": 0.5482862891039096, "learning_rate": 2.4618142584276438e-06, "loss": 0.0181, "step": 158205 }, { "epoch": 0.6601380277223757, "grad_norm": 0.8652925881082401, "learning_rate": 2.4617753568210763e-06, "loss": 0.0174, "step": 158210 }, { "epoch": 0.660158890437366, "grad_norm": 0.3414954582595008, "learning_rate": 2.4617364570586207e-06, "loss": 0.0221, "step": 158215 }, { "epoch": 0.6601797531523562, "grad_norm": 0.6487022534025567, "learning_rate": 2.4616975591401317e-06, "loss": 0.0237, "step": 158220 }, { "epoch": 0.6602006158673465, "grad_norm": 0.4527028387510467, "learning_rate": 2.461658663065463e-06, "loss": 0.0155, "step": 158225 }, { "epoch": 0.6602214785823368, "grad_norm": 0.29046707411229505, "learning_rate": 2.46161976883447e-06, "loss": 0.0221, "step": 158230 }, { "epoch": 0.6602423412973271, "grad_norm": 1.0749613903813022, "learning_rate": 2.4615808764470063e-06, "loss": 0.024, "step": 158235 }, { "epoch": 0.6602632040123173, "grad_norm": 0.46525531384978625, "learning_rate": 2.461541985902926e-06, "loss": 0.0193, "step": 158240 }, { "epoch": 0.6602840667273077, "grad_norm": 1.4332037848102273, "learning_rate": 2.4615030972020836e-06, "loss": 0.0218, "step": 158245 }, { "epoch": 0.6603049294422979, "grad_norm": 1.276251846302483, "learning_rate": 2.4614642103443346e-06, "loss": 0.0214, "step": 158250 }, { "epoch": 0.6603257921572881, "grad_norm": 0.8309325513315172, "learning_rate": 2.461425325329533e-06, "loss": 0.0275, "step": 158255 }, { "epoch": 0.6603466548722785, "grad_norm": 1.763236526423814, "learning_rate": 2.4613864421575316e-06, "loss": 0.0322, "step": 158260 }, { "epoch": 0.6603675175872687, "grad_norm": 0.6341158845545957, "learning_rate": 2.461347560828187e-06, "loss": 0.0247, "step": 158265 }, { "epoch": 0.660388380302259, "grad_norm": 0.4985336688032097, "learning_rate": 2.4613086813413527e-06, "loss": 0.0293, "step": 158270 }, { "epoch": 0.6604092430172493, "grad_norm": 0.5881340800175012, "learning_rate": 2.4612698036968827e-06, "loss": 0.0169, "step": 158275 }, { "epoch": 0.6604301057322396, "grad_norm": 1.2723612737857986, "learning_rate": 2.4612309278946323e-06, "loss": 0.0247, "step": 158280 }, { "epoch": 0.6604509684472298, "grad_norm": 0.4621851783609011, "learning_rate": 2.461192053934456e-06, "loss": 0.0186, "step": 158285 }, { "epoch": 0.6604718311622201, "grad_norm": 1.3832441565288323, "learning_rate": 2.461153181816208e-06, "loss": 0.0225, "step": 158290 }, { "epoch": 0.6604926938772104, "grad_norm": 0.8864016129981757, "learning_rate": 2.461114311539743e-06, "loss": 0.0201, "step": 158295 }, { "epoch": 0.6605135565922007, "grad_norm": 0.927792237869334, "learning_rate": 2.4610754431049148e-06, "loss": 0.0243, "step": 158300 }, { "epoch": 0.6605344193071909, "grad_norm": 0.7364481647863157, "learning_rate": 2.46103657651158e-06, "loss": 0.0277, "step": 158305 }, { "epoch": 0.6605552820221813, "grad_norm": 0.490911954419617, "learning_rate": 2.460997711759591e-06, "loss": 0.0185, "step": 158310 }, { "epoch": 0.6605761447371715, "grad_norm": 1.1051257460424788, "learning_rate": 2.4609588488488033e-06, "loss": 0.0228, "step": 158315 }, { "epoch": 0.6605970074521618, "grad_norm": 0.6736350542601788, "learning_rate": 2.4609199877790715e-06, "loss": 0.0198, "step": 158320 }, { "epoch": 0.6606178701671521, "grad_norm": 0.6696917359039839, "learning_rate": 2.4608811285502506e-06, "loss": 0.0167, "step": 158325 }, { "epoch": 0.6606387328821424, "grad_norm": 0.8610671320152772, "learning_rate": 2.4608422711621945e-06, "loss": 0.0208, "step": 158330 }, { "epoch": 0.6606595955971326, "grad_norm": 0.738717509806592, "learning_rate": 2.4608034156147587e-06, "loss": 0.0226, "step": 158335 }, { "epoch": 0.6606804583121229, "grad_norm": 0.5582341343359275, "learning_rate": 2.4607645619077973e-06, "loss": 0.0236, "step": 158340 }, { "epoch": 0.6607013210271132, "grad_norm": 1.2233238110925322, "learning_rate": 2.4607257100411657e-06, "loss": 0.0321, "step": 158345 }, { "epoch": 0.6607221837421035, "grad_norm": 0.44009340881023873, "learning_rate": 2.4606868600147174e-06, "loss": 0.0256, "step": 158350 }, { "epoch": 0.6607430464570937, "grad_norm": 0.7912410309321922, "learning_rate": 2.460648011828308e-06, "loss": 0.0254, "step": 158355 }, { "epoch": 0.660763909172084, "grad_norm": 0.6039230470341302, "learning_rate": 2.4606091654817922e-06, "loss": 0.022, "step": 158360 }, { "epoch": 0.6607847718870743, "grad_norm": 0.2999262615454761, "learning_rate": 2.460570320975025e-06, "loss": 0.0211, "step": 158365 }, { "epoch": 0.6608056346020645, "grad_norm": 0.5191659179025709, "learning_rate": 2.4605314783078605e-06, "loss": 0.0176, "step": 158370 }, { "epoch": 0.6608264973170549, "grad_norm": 0.6581099132112415, "learning_rate": 2.460492637480154e-06, "loss": 0.0208, "step": 158375 }, { "epoch": 0.6608473600320451, "grad_norm": 0.6967008192418004, "learning_rate": 2.46045379849176e-06, "loss": 0.0165, "step": 158380 }, { "epoch": 0.6608682227470354, "grad_norm": 0.5678045224913254, "learning_rate": 2.460414961342534e-06, "loss": 0.0209, "step": 158385 }, { "epoch": 0.6608890854620257, "grad_norm": 0.5873090810222346, "learning_rate": 2.46037612603233e-06, "loss": 0.0175, "step": 158390 }, { "epoch": 0.660909948177016, "grad_norm": 0.6385842273707764, "learning_rate": 2.460337292561003e-06, "loss": 0.0221, "step": 158395 }, { "epoch": 0.6609308108920062, "grad_norm": 0.2581663845514874, "learning_rate": 2.460298460928409e-06, "loss": 0.0204, "step": 158400 }, { "epoch": 0.6609516736069965, "grad_norm": 0.5383200922533852, "learning_rate": 2.460259631134402e-06, "loss": 0.0197, "step": 158405 }, { "epoch": 0.6609725363219868, "grad_norm": 0.6706045208395632, "learning_rate": 2.460220803178836e-06, "loss": 0.0225, "step": 158410 }, { "epoch": 0.6609933990369771, "grad_norm": 0.5874349655126052, "learning_rate": 2.4601819770615678e-06, "loss": 0.0252, "step": 158415 }, { "epoch": 0.6610142617519673, "grad_norm": 0.42557014789644515, "learning_rate": 2.460143152782451e-06, "loss": 0.0201, "step": 158420 }, { "epoch": 0.6610351244669577, "grad_norm": 0.4038400989183402, "learning_rate": 2.460104330341341e-06, "loss": 0.0189, "step": 158425 }, { "epoch": 0.6610559871819479, "grad_norm": 1.9459896043306972, "learning_rate": 2.4600655097380928e-06, "loss": 0.0246, "step": 158430 }, { "epoch": 0.6610768498969382, "grad_norm": 0.8794083641217786, "learning_rate": 2.460026690972562e-06, "loss": 0.0276, "step": 158435 }, { "epoch": 0.6610977126119285, "grad_norm": 1.2158069119204382, "learning_rate": 2.4599878740446022e-06, "loss": 0.0209, "step": 158440 }, { "epoch": 0.6611185753269188, "grad_norm": 0.6961101883682278, "learning_rate": 2.4599490589540693e-06, "loss": 0.0265, "step": 158445 }, { "epoch": 0.661139438041909, "grad_norm": 0.8135087012760184, "learning_rate": 2.459910245700819e-06, "loss": 0.0309, "step": 158450 }, { "epoch": 0.6611603007568994, "grad_norm": 0.7291948698667998, "learning_rate": 2.459871434284705e-06, "loss": 0.0206, "step": 158455 }, { "epoch": 0.6611811634718896, "grad_norm": 0.564887644695683, "learning_rate": 2.459832624705583e-06, "loss": 0.0285, "step": 158460 }, { "epoch": 0.6612020261868798, "grad_norm": 0.5282906212964664, "learning_rate": 2.4597938169633083e-06, "loss": 0.0286, "step": 158465 }, { "epoch": 0.6612228889018701, "grad_norm": 3.094833954560974, "learning_rate": 2.4597550110577356e-06, "loss": 0.0335, "step": 158470 }, { "epoch": 0.6612437516168604, "grad_norm": 0.468702188796207, "learning_rate": 2.459716206988721e-06, "loss": 0.0198, "step": 158475 }, { "epoch": 0.6612646143318507, "grad_norm": 0.9342534431323071, "learning_rate": 2.4596774047561176e-06, "loss": 0.0204, "step": 158480 }, { "epoch": 0.6612854770468409, "grad_norm": 0.9176924693721016, "learning_rate": 2.459638604359783e-06, "loss": 0.0195, "step": 158485 }, { "epoch": 0.6613063397618313, "grad_norm": 0.788457685096083, "learning_rate": 2.459599805799571e-06, "loss": 0.0216, "step": 158490 }, { "epoch": 0.6613272024768215, "grad_norm": 0.7063696595046483, "learning_rate": 2.4595610090753364e-06, "loss": 0.0221, "step": 158495 }, { "epoch": 0.6613480651918118, "grad_norm": 0.5298862085411963, "learning_rate": 2.4595222141869353e-06, "loss": 0.0193, "step": 158500 }, { "epoch": 0.6613689279068021, "grad_norm": 0.7896597175550747, "learning_rate": 2.4594834211342223e-06, "loss": 0.0264, "step": 158505 }, { "epoch": 0.6613897906217924, "grad_norm": 0.5991693978552172, "learning_rate": 2.4594446299170535e-06, "loss": 0.0295, "step": 158510 }, { "epoch": 0.6614106533367826, "grad_norm": 0.6260013822112342, "learning_rate": 2.4594058405352834e-06, "loss": 0.018, "step": 158515 }, { "epoch": 0.6614315160517729, "grad_norm": 0.9033907977368725, "learning_rate": 2.4593670529887683e-06, "loss": 0.0318, "step": 158520 }, { "epoch": 0.6614523787667632, "grad_norm": 0.6644775706324095, "learning_rate": 2.4593282672773614e-06, "loss": 0.0256, "step": 158525 }, { "epoch": 0.6614732414817535, "grad_norm": 1.0969680058861921, "learning_rate": 2.4592894834009197e-06, "loss": 0.0218, "step": 158530 }, { "epoch": 0.6614941041967437, "grad_norm": 0.2763068412194331, "learning_rate": 2.459250701359298e-06, "loss": 0.017, "step": 158535 }, { "epoch": 0.6615149669117341, "grad_norm": 0.7888550674496343, "learning_rate": 2.459211921152352e-06, "loss": 0.0219, "step": 158540 }, { "epoch": 0.6615358296267243, "grad_norm": 0.802956856486247, "learning_rate": 2.4591731427799364e-06, "loss": 0.023, "step": 158545 }, { "epoch": 0.6615566923417145, "grad_norm": 0.6527155160834744, "learning_rate": 2.4591343662419073e-06, "loss": 0.0223, "step": 158550 }, { "epoch": 0.6615775550567049, "grad_norm": 0.5148076274421395, "learning_rate": 2.4590955915381193e-06, "loss": 0.0221, "step": 158555 }, { "epoch": 0.6615984177716951, "grad_norm": 0.8474160123918593, "learning_rate": 2.4590568186684286e-06, "loss": 0.0249, "step": 158560 }, { "epoch": 0.6616192804866854, "grad_norm": 0.6332029637635774, "learning_rate": 2.4590180476326904e-06, "loss": 0.0226, "step": 158565 }, { "epoch": 0.6616401432016757, "grad_norm": 0.6555312007830447, "learning_rate": 2.458979278430759e-06, "loss": 0.0247, "step": 158570 }, { "epoch": 0.661661005916666, "grad_norm": 0.41791962453438153, "learning_rate": 2.4589405110624915e-06, "loss": 0.0176, "step": 158575 }, { "epoch": 0.6616818686316562, "grad_norm": 0.38687646405940296, "learning_rate": 2.4589017455277423e-06, "loss": 0.0172, "step": 158580 }, { "epoch": 0.6617027313466465, "grad_norm": 0.5411364747236453, "learning_rate": 2.458862981826367e-06, "loss": 0.0253, "step": 158585 }, { "epoch": 0.6617235940616368, "grad_norm": 0.3902419976849721, "learning_rate": 2.4588242199582225e-06, "loss": 0.0182, "step": 158590 }, { "epoch": 0.6617444567766271, "grad_norm": 1.0028536941106705, "learning_rate": 2.4587854599231625e-06, "loss": 0.0134, "step": 158595 }, { "epoch": 0.6617653194916173, "grad_norm": 0.3309265888012067, "learning_rate": 2.4587467017210433e-06, "loss": 0.0162, "step": 158600 }, { "epoch": 0.6617861822066077, "grad_norm": 0.6438428026648679, "learning_rate": 2.458707945351719e-06, "loss": 0.029, "step": 158605 }, { "epoch": 0.6618070449215979, "grad_norm": 0.8240917153062811, "learning_rate": 2.4586691908150482e-06, "loss": 0.0243, "step": 158610 }, { "epoch": 0.6618279076365882, "grad_norm": 0.801260635961654, "learning_rate": 2.458630438110884e-06, "loss": 0.025, "step": 158615 }, { "epoch": 0.6618487703515785, "grad_norm": 0.5414442295238177, "learning_rate": 2.4585916872390823e-06, "loss": 0.0228, "step": 158620 }, { "epoch": 0.6618696330665688, "grad_norm": 0.42633010150473866, "learning_rate": 2.4585529381994998e-06, "loss": 0.0149, "step": 158625 }, { "epoch": 0.661890495781559, "grad_norm": 0.3052279673086764, "learning_rate": 2.458514190991991e-06, "loss": 0.0267, "step": 158630 }, { "epoch": 0.6619113584965494, "grad_norm": 0.8450473091344425, "learning_rate": 2.458475445616412e-06, "loss": 0.0221, "step": 158635 }, { "epoch": 0.6619322212115396, "grad_norm": 0.46873779311703356, "learning_rate": 2.458436702072619e-06, "loss": 0.0997, "step": 158640 }, { "epoch": 0.6619530839265299, "grad_norm": 0.7056284942536778, "learning_rate": 2.458397960360466e-06, "loss": 0.0256, "step": 158645 }, { "epoch": 0.6619739466415201, "grad_norm": 0.6172571926495374, "learning_rate": 2.4583592204798106e-06, "loss": 0.0185, "step": 158650 }, { "epoch": 0.6619948093565104, "grad_norm": 0.9645057781266245, "learning_rate": 2.4583204824305073e-06, "loss": 0.0163, "step": 158655 }, { "epoch": 0.6620156720715007, "grad_norm": 0.5455273120322891, "learning_rate": 2.4582817462124125e-06, "loss": 0.0225, "step": 158660 }, { "epoch": 0.6620365347864909, "grad_norm": 0.7083384988814806, "learning_rate": 2.458243011825382e-06, "loss": 0.026, "step": 158665 }, { "epoch": 0.6620573975014813, "grad_norm": 0.31541072991241176, "learning_rate": 2.4582042792692705e-06, "loss": 0.0139, "step": 158670 }, { "epoch": 0.6620782602164715, "grad_norm": 0.740388162212028, "learning_rate": 2.4581655485439345e-06, "loss": 0.0235, "step": 158675 }, { "epoch": 0.6620991229314618, "grad_norm": 0.6418276682818583, "learning_rate": 2.4581268196492294e-06, "loss": 0.018, "step": 158680 }, { "epoch": 0.6621199856464521, "grad_norm": 0.5312664386055292, "learning_rate": 2.458088092585012e-06, "loss": 0.0241, "step": 158685 }, { "epoch": 0.6621408483614424, "grad_norm": 0.49650791805430183, "learning_rate": 2.4580493673511367e-06, "loss": 0.0156, "step": 158690 }, { "epoch": 0.6621617110764326, "grad_norm": 0.5931712814694765, "learning_rate": 2.4580106439474603e-06, "loss": 0.0243, "step": 158695 }, { "epoch": 0.6621825737914229, "grad_norm": 0.7267241761940646, "learning_rate": 2.4579719223738384e-06, "loss": 0.0229, "step": 158700 }, { "epoch": 0.6622034365064132, "grad_norm": 0.590059353560302, "learning_rate": 2.457933202630127e-06, "loss": 0.0168, "step": 158705 }, { "epoch": 0.6622242992214035, "grad_norm": 0.649285695631496, "learning_rate": 2.457894484716182e-06, "loss": 0.0333, "step": 158710 }, { "epoch": 0.6622451619363937, "grad_norm": 0.4065925246861599, "learning_rate": 2.4578557686318585e-06, "loss": 0.0206, "step": 158715 }, { "epoch": 0.6622660246513841, "grad_norm": 0.6357409576310332, "learning_rate": 2.4578170543770133e-06, "loss": 0.0232, "step": 158720 }, { "epoch": 0.6622868873663743, "grad_norm": 0.7157423452370117, "learning_rate": 2.457778341951502e-06, "loss": 0.0215, "step": 158725 }, { "epoch": 0.6623077500813646, "grad_norm": 0.6485752153868533, "learning_rate": 2.457739631355181e-06, "loss": 0.0183, "step": 158730 }, { "epoch": 0.6623286127963549, "grad_norm": 0.7959096774781094, "learning_rate": 2.4577009225879048e-06, "loss": 0.025, "step": 158735 }, { "epoch": 0.6623494755113452, "grad_norm": 0.3322429526991486, "learning_rate": 2.457662215649531e-06, "loss": 0.0154, "step": 158740 }, { "epoch": 0.6623703382263354, "grad_norm": 0.4213622825631016, "learning_rate": 2.457623510539915e-06, "loss": 0.0221, "step": 158745 }, { "epoch": 0.6623912009413258, "grad_norm": 1.0408934847768732, "learning_rate": 2.4575848072589126e-06, "loss": 0.0248, "step": 158750 }, { "epoch": 0.662412063656316, "grad_norm": 0.6451242552865114, "learning_rate": 2.45754610580638e-06, "loss": 0.0237, "step": 158755 }, { "epoch": 0.6624329263713062, "grad_norm": 1.1231068477485748, "learning_rate": 2.4575074061821737e-06, "loss": 0.0176, "step": 158760 }, { "epoch": 0.6624537890862965, "grad_norm": 0.6148223471868948, "learning_rate": 2.4574687083861485e-06, "loss": 0.0215, "step": 158765 }, { "epoch": 0.6624746518012868, "grad_norm": 0.8101505763559299, "learning_rate": 2.457430012418162e-06, "loss": 0.0229, "step": 158770 }, { "epoch": 0.6624955145162771, "grad_norm": 0.34268707111651636, "learning_rate": 2.4573913182780687e-06, "loss": 0.0215, "step": 158775 }, { "epoch": 0.6625163772312673, "grad_norm": 1.1213902765320007, "learning_rate": 2.4573526259657264e-06, "loss": 0.0165, "step": 158780 }, { "epoch": 0.6625372399462577, "grad_norm": 0.5809126446887445, "learning_rate": 2.4573139354809898e-06, "loss": 0.0218, "step": 158785 }, { "epoch": 0.6625581026612479, "grad_norm": 0.23872300998580284, "learning_rate": 2.4572752468237154e-06, "loss": 0.0165, "step": 158790 }, { "epoch": 0.6625789653762382, "grad_norm": 0.6431829245154695, "learning_rate": 2.4572365599937603e-06, "loss": 0.0162, "step": 158795 }, { "epoch": 0.6625998280912285, "grad_norm": 0.5790248186665442, "learning_rate": 2.457197874990979e-06, "loss": 0.0218, "step": 158800 }, { "epoch": 0.6626206908062188, "grad_norm": 0.5266126015362912, "learning_rate": 2.4571591918152292e-06, "loss": 0.0165, "step": 158805 }, { "epoch": 0.662641553521209, "grad_norm": 0.5606581395729201, "learning_rate": 2.4571205104663656e-06, "loss": 0.0218, "step": 158810 }, { "epoch": 0.6626624162361993, "grad_norm": 0.5286640122344924, "learning_rate": 2.4570818309442458e-06, "loss": 0.0225, "step": 158815 }, { "epoch": 0.6626832789511896, "grad_norm": 0.8987042291518555, "learning_rate": 2.457043153248726e-06, "loss": 0.0217, "step": 158820 }, { "epoch": 0.6627041416661799, "grad_norm": 0.5797214812099712, "learning_rate": 2.4570044773796605e-06, "loss": 0.0218, "step": 158825 }, { "epoch": 0.6627250043811701, "grad_norm": 0.594981265671985, "learning_rate": 2.4569658033369083e-06, "loss": 0.0179, "step": 158830 }, { "epoch": 0.6627458670961605, "grad_norm": 0.5682363691251565, "learning_rate": 2.456927131120323e-06, "loss": 0.0187, "step": 158835 }, { "epoch": 0.6627667298111507, "grad_norm": 0.49479246391424786, "learning_rate": 2.456888460729763e-06, "loss": 0.0211, "step": 158840 }, { "epoch": 0.662787592526141, "grad_norm": 0.49853656437956134, "learning_rate": 2.4568497921650835e-06, "loss": 0.0272, "step": 158845 }, { "epoch": 0.6628084552411313, "grad_norm": 0.5593889800928766, "learning_rate": 2.4568111254261422e-06, "loss": 0.0176, "step": 158850 }, { "epoch": 0.6628293179561215, "grad_norm": 1.234007873743866, "learning_rate": 2.4567724605127933e-06, "loss": 0.0293, "step": 158855 }, { "epoch": 0.6628501806711118, "grad_norm": 0.4666444674011772, "learning_rate": 2.4567337974248944e-06, "loss": 0.024, "step": 158860 }, { "epoch": 0.6628710433861021, "grad_norm": 0.6727693359171706, "learning_rate": 2.456695136162302e-06, "loss": 0.0149, "step": 158865 }, { "epoch": 0.6628919061010924, "grad_norm": 0.33045362862572186, "learning_rate": 2.4566564767248715e-06, "loss": 0.0153, "step": 158870 }, { "epoch": 0.6629127688160826, "grad_norm": 0.4982163343237749, "learning_rate": 2.4566178191124604e-06, "loss": 0.0143, "step": 158875 }, { "epoch": 0.6629336315310729, "grad_norm": 0.3983382459559025, "learning_rate": 2.4565791633249246e-06, "loss": 0.0211, "step": 158880 }, { "epoch": 0.6629544942460632, "grad_norm": 0.38565925374532506, "learning_rate": 2.456540509362121e-06, "loss": 0.0276, "step": 158885 }, { "epoch": 0.6629753569610535, "grad_norm": 0.6686369888932041, "learning_rate": 2.456501857223905e-06, "loss": 0.02, "step": 158890 }, { "epoch": 0.6629962196760437, "grad_norm": 0.26223310651496134, "learning_rate": 2.456463206910134e-06, "loss": 0.0202, "step": 158895 }, { "epoch": 0.6630170823910341, "grad_norm": 0.5383095282311267, "learning_rate": 2.4564245584206643e-06, "loss": 0.022, "step": 158900 }, { "epoch": 0.6630379451060243, "grad_norm": 0.3982559897442456, "learning_rate": 2.456385911755352e-06, "loss": 0.0166, "step": 158905 }, { "epoch": 0.6630588078210146, "grad_norm": 1.0419000734067145, "learning_rate": 2.456347266914054e-06, "loss": 0.023, "step": 158910 }, { "epoch": 0.6630796705360049, "grad_norm": 0.5998159827409958, "learning_rate": 2.456308623896627e-06, "loss": 0.0229, "step": 158915 }, { "epoch": 0.6631005332509952, "grad_norm": 1.2688921200545413, "learning_rate": 2.4562699827029266e-06, "loss": 0.027, "step": 158920 }, { "epoch": 0.6631213959659854, "grad_norm": 0.5062178990600998, "learning_rate": 2.4562313433328104e-06, "loss": 0.0203, "step": 158925 }, { "epoch": 0.6631422586809758, "grad_norm": 0.9092017304946292, "learning_rate": 2.4561927057861342e-06, "loss": 0.0249, "step": 158930 }, { "epoch": 0.663163121395966, "grad_norm": 0.8824263831201081, "learning_rate": 2.4561540700627557e-06, "loss": 0.0204, "step": 158935 }, { "epoch": 0.6631839841109562, "grad_norm": 0.47727074898565275, "learning_rate": 2.45611543616253e-06, "loss": 0.0233, "step": 158940 }, { "epoch": 0.6632048468259465, "grad_norm": 0.47304090625401907, "learning_rate": 2.456076804085315e-06, "loss": 0.019, "step": 158945 }, { "epoch": 0.6632257095409368, "grad_norm": 0.7705437935373606, "learning_rate": 2.456038173830967e-06, "loss": 0.0235, "step": 158950 }, { "epoch": 0.6632465722559271, "grad_norm": 0.6019986138835552, "learning_rate": 2.4559995453993424e-06, "loss": 0.0236, "step": 158955 }, { "epoch": 0.6632674349709173, "grad_norm": 0.7455539088874684, "learning_rate": 2.455960918790297e-06, "loss": 0.0302, "step": 158960 }, { "epoch": 0.6632882976859077, "grad_norm": 0.5939487653410714, "learning_rate": 2.4559222940036895e-06, "loss": 0.0189, "step": 158965 }, { "epoch": 0.6633091604008979, "grad_norm": 0.7759940668018813, "learning_rate": 2.4558836710393754e-06, "loss": 0.0199, "step": 158970 }, { "epoch": 0.6633300231158882, "grad_norm": 1.0460122193346293, "learning_rate": 2.4558450498972113e-06, "loss": 0.021, "step": 158975 }, { "epoch": 0.6633508858308785, "grad_norm": 0.5039666987870769, "learning_rate": 2.4558064305770536e-06, "loss": 0.0203, "step": 158980 }, { "epoch": 0.6633717485458688, "grad_norm": 0.3507375199271852, "learning_rate": 2.4557678130787604e-06, "loss": 0.0157, "step": 158985 }, { "epoch": 0.663392611260859, "grad_norm": 0.2913541429736258, "learning_rate": 2.4557291974021873e-06, "loss": 0.0235, "step": 158990 }, { "epoch": 0.6634134739758493, "grad_norm": 0.4962511996501226, "learning_rate": 2.455690583547191e-06, "loss": 0.0218, "step": 158995 }, { "epoch": 0.6634343366908396, "grad_norm": 0.3977942568023999, "learning_rate": 2.4556519715136298e-06, "loss": 0.0279, "step": 159000 }, { "epoch": 0.6634551994058299, "grad_norm": 0.5673625264684595, "learning_rate": 2.4556133613013586e-06, "loss": 0.0438, "step": 159005 }, { "epoch": 0.6634760621208201, "grad_norm": 0.8812003124499527, "learning_rate": 2.4555747529102357e-06, "loss": 0.0183, "step": 159010 }, { "epoch": 0.6634969248358105, "grad_norm": 0.448528165829268, "learning_rate": 2.455536146340117e-06, "loss": 0.0189, "step": 159015 }, { "epoch": 0.6635177875508007, "grad_norm": 0.9256973284234656, "learning_rate": 2.455497541590859e-06, "loss": 0.0296, "step": 159020 }, { "epoch": 0.663538650265791, "grad_norm": 0.5115007070440065, "learning_rate": 2.45545893866232e-06, "loss": 0.0205, "step": 159025 }, { "epoch": 0.6635595129807813, "grad_norm": 0.6780886984436559, "learning_rate": 2.4554203375543558e-06, "loss": 0.0163, "step": 159030 }, { "epoch": 0.6635803756957716, "grad_norm": 0.8827594030499536, "learning_rate": 2.4553817382668237e-06, "loss": 0.0292, "step": 159035 }, { "epoch": 0.6636012384107618, "grad_norm": 0.6927060083469149, "learning_rate": 2.455343140799581e-06, "loss": 0.0186, "step": 159040 }, { "epoch": 0.6636221011257522, "grad_norm": 0.7446434333608307, "learning_rate": 2.4553045451524836e-06, "loss": 0.0239, "step": 159045 }, { "epoch": 0.6636429638407424, "grad_norm": 0.6698063252745448, "learning_rate": 2.455265951325389e-06, "loss": 0.0188, "step": 159050 }, { "epoch": 0.6636638265557326, "grad_norm": 0.9378192565862509, "learning_rate": 2.4552273593181545e-06, "loss": 0.0193, "step": 159055 }, { "epoch": 0.6636846892707229, "grad_norm": 0.21361376802944226, "learning_rate": 2.4551887691306367e-06, "loss": 0.0205, "step": 159060 }, { "epoch": 0.6637055519857132, "grad_norm": 0.8330042669666907, "learning_rate": 2.4551501807626925e-06, "loss": 0.024, "step": 159065 }, { "epoch": 0.6637264147007035, "grad_norm": 0.7542698938550074, "learning_rate": 2.455111594214179e-06, "loss": 0.0206, "step": 159070 }, { "epoch": 0.6637472774156937, "grad_norm": 0.5066072114663508, "learning_rate": 2.4550730094849533e-06, "loss": 0.0182, "step": 159075 }, { "epoch": 0.6637681401306841, "grad_norm": 0.6050360760143632, "learning_rate": 2.4550344265748723e-06, "loss": 0.0167, "step": 159080 }, { "epoch": 0.6637890028456743, "grad_norm": 0.6063488972930524, "learning_rate": 2.454995845483794e-06, "loss": 0.0263, "step": 159085 }, { "epoch": 0.6638098655606646, "grad_norm": 0.42817653607482015, "learning_rate": 2.454957266211574e-06, "loss": 0.0162, "step": 159090 }, { "epoch": 0.6638307282756549, "grad_norm": 0.7873750137856577, "learning_rate": 2.4549186887580704e-06, "loss": 0.0217, "step": 159095 }, { "epoch": 0.6638515909906452, "grad_norm": 0.8245841712156792, "learning_rate": 2.4548801131231397e-06, "loss": 0.0228, "step": 159100 }, { "epoch": 0.6638724537056354, "grad_norm": 0.846250963899692, "learning_rate": 2.4548415393066393e-06, "loss": 0.0267, "step": 159105 }, { "epoch": 0.6638933164206258, "grad_norm": 0.4613098500126416, "learning_rate": 2.4548029673084263e-06, "loss": 0.0192, "step": 159110 }, { "epoch": 0.663914179135616, "grad_norm": 0.728249629969414, "learning_rate": 2.4547643971283582e-06, "loss": 0.0212, "step": 159115 }, { "epoch": 0.6639350418506063, "grad_norm": 1.0416700349765486, "learning_rate": 2.454725828766292e-06, "loss": 0.0259, "step": 159120 }, { "epoch": 0.6639559045655965, "grad_norm": 0.7192826617818386, "learning_rate": 2.4546872622220846e-06, "loss": 0.0237, "step": 159125 }, { "epoch": 0.6639767672805869, "grad_norm": 0.6728954067053398, "learning_rate": 2.454648697495593e-06, "loss": 0.0244, "step": 159130 }, { "epoch": 0.6639976299955771, "grad_norm": 0.6770297357948539, "learning_rate": 2.4546101345866752e-06, "loss": 0.0203, "step": 159135 }, { "epoch": 0.6640184927105673, "grad_norm": 0.8720207655512687, "learning_rate": 2.4545715734951876e-06, "loss": 0.0174, "step": 159140 }, { "epoch": 0.6640393554255577, "grad_norm": 0.8265135357116112, "learning_rate": 2.4545330142209884e-06, "loss": 0.0219, "step": 159145 }, { "epoch": 0.6640602181405479, "grad_norm": 0.8108770340419228, "learning_rate": 2.4544944567639337e-06, "loss": 0.0272, "step": 159150 }, { "epoch": 0.6640810808555382, "grad_norm": 0.6767099331456402, "learning_rate": 2.454455901123882e-06, "loss": 0.0271, "step": 159155 }, { "epoch": 0.6641019435705285, "grad_norm": 0.5124013718900424, "learning_rate": 2.454417347300689e-06, "loss": 0.0164, "step": 159160 }, { "epoch": 0.6641228062855188, "grad_norm": 0.5929220491290249, "learning_rate": 2.454378795294214e-06, "loss": 0.0186, "step": 159165 }, { "epoch": 0.664143669000509, "grad_norm": 1.1945006559770734, "learning_rate": 2.454340245104313e-06, "loss": 0.0261, "step": 159170 }, { "epoch": 0.6641645317154993, "grad_norm": 0.7699018228162123, "learning_rate": 2.454301696730844e-06, "loss": 0.0206, "step": 159175 }, { "epoch": 0.6641853944304896, "grad_norm": 0.8471185995780681, "learning_rate": 2.4542631501736637e-06, "loss": 0.0242, "step": 159180 }, { "epoch": 0.6642062571454799, "grad_norm": 0.8715910272500392, "learning_rate": 2.4542246054326292e-06, "loss": 0.0385, "step": 159185 }, { "epoch": 0.6642271198604701, "grad_norm": 0.93795146621209, "learning_rate": 2.4541860625075995e-06, "loss": 0.0175, "step": 159190 }, { "epoch": 0.6642479825754605, "grad_norm": 0.6801139702246107, "learning_rate": 2.4541475213984306e-06, "loss": 0.0236, "step": 159195 }, { "epoch": 0.6642688452904507, "grad_norm": 0.46594136566580985, "learning_rate": 2.45410898210498e-06, "loss": 0.0161, "step": 159200 }, { "epoch": 0.664289708005441, "grad_norm": 0.8047386462794692, "learning_rate": 2.4540704446271066e-06, "loss": 0.0249, "step": 159205 }, { "epoch": 0.6643105707204313, "grad_norm": 0.910927641503231, "learning_rate": 2.4540319089646653e-06, "loss": 0.023, "step": 159210 }, { "epoch": 0.6643314334354216, "grad_norm": 0.4779513545154168, "learning_rate": 2.4539933751175156e-06, "loss": 0.0186, "step": 159215 }, { "epoch": 0.6643522961504118, "grad_norm": 0.6271198892274018, "learning_rate": 2.4539548430855142e-06, "loss": 0.0206, "step": 159220 }, { "epoch": 0.6643731588654022, "grad_norm": 0.46445911424066094, "learning_rate": 2.4539163128685194e-06, "loss": 0.0164, "step": 159225 }, { "epoch": 0.6643940215803924, "grad_norm": 0.8503433287019821, "learning_rate": 2.4538777844663876e-06, "loss": 0.0226, "step": 159230 }, { "epoch": 0.6644148842953826, "grad_norm": 1.0179857580089606, "learning_rate": 2.4538392578789764e-06, "loss": 0.0268, "step": 159235 }, { "epoch": 0.6644357470103729, "grad_norm": 0.3247707404443638, "learning_rate": 2.4538007331061444e-06, "loss": 0.0182, "step": 159240 }, { "epoch": 0.6644566097253632, "grad_norm": 0.42683692347284585, "learning_rate": 2.453762210147748e-06, "loss": 0.0226, "step": 159245 }, { "epoch": 0.6644774724403535, "grad_norm": 1.33346745763455, "learning_rate": 2.4537236890036454e-06, "loss": 0.0318, "step": 159250 }, { "epoch": 0.6644983351553437, "grad_norm": 1.1960063409574928, "learning_rate": 2.4536851696736942e-06, "loss": 0.0241, "step": 159255 }, { "epoch": 0.6645191978703341, "grad_norm": 0.8179447378487926, "learning_rate": 2.453646652157752e-06, "loss": 0.0276, "step": 159260 }, { "epoch": 0.6645400605853243, "grad_norm": 0.6257525271980559, "learning_rate": 2.453608136455676e-06, "loss": 0.0228, "step": 159265 }, { "epoch": 0.6645609233003146, "grad_norm": 0.7828677437311689, "learning_rate": 2.4535696225673243e-06, "loss": 0.0262, "step": 159270 }, { "epoch": 0.6645817860153049, "grad_norm": 0.8517212972527257, "learning_rate": 2.453531110492555e-06, "loss": 0.0255, "step": 159275 }, { "epoch": 0.6646026487302952, "grad_norm": 0.6343297791688749, "learning_rate": 2.4534926002312245e-06, "loss": 0.0211, "step": 159280 }, { "epoch": 0.6646235114452854, "grad_norm": 0.7063780602508949, "learning_rate": 2.453454091783191e-06, "loss": 0.0215, "step": 159285 }, { "epoch": 0.6646443741602758, "grad_norm": 0.6639311302339004, "learning_rate": 2.453415585148313e-06, "loss": 0.0261, "step": 159290 }, { "epoch": 0.664665236875266, "grad_norm": 0.9380615857156721, "learning_rate": 2.453377080326447e-06, "loss": 0.0234, "step": 159295 }, { "epoch": 0.6646860995902563, "grad_norm": 1.304639778559274, "learning_rate": 2.4533385773174514e-06, "loss": 0.0253, "step": 159300 }, { "epoch": 0.6647069623052465, "grad_norm": 0.36669130321602084, "learning_rate": 2.4533000761211843e-06, "loss": 0.0217, "step": 159305 }, { "epoch": 0.6647278250202369, "grad_norm": 0.8887068751060369, "learning_rate": 2.453261576737503e-06, "loss": 0.0233, "step": 159310 }, { "epoch": 0.6647486877352271, "grad_norm": 5.419633821638768, "learning_rate": 2.453223079166265e-06, "loss": 0.0302, "step": 159315 }, { "epoch": 0.6647695504502174, "grad_norm": 0.6203473407776193, "learning_rate": 2.453184583407329e-06, "loss": 0.0228, "step": 159320 }, { "epoch": 0.6647904131652077, "grad_norm": 0.564111814870158, "learning_rate": 2.4531460894605514e-06, "loss": 0.0185, "step": 159325 }, { "epoch": 0.664811275880198, "grad_norm": 0.862439101525809, "learning_rate": 2.4531075973257915e-06, "loss": 0.0216, "step": 159330 }, { "epoch": 0.6648321385951882, "grad_norm": 0.9749234481264019, "learning_rate": 2.4530691070029063e-06, "loss": 0.0317, "step": 159335 }, { "epoch": 0.6648530013101785, "grad_norm": 0.3887086980326553, "learning_rate": 2.4530306184917534e-06, "loss": 0.0183, "step": 159340 }, { "epoch": 0.6648738640251688, "grad_norm": 1.079628740786258, "learning_rate": 2.4529921317921922e-06, "loss": 0.0237, "step": 159345 }, { "epoch": 0.664894726740159, "grad_norm": 0.6965381551535323, "learning_rate": 2.4529536469040783e-06, "loss": 0.0197, "step": 159350 }, { "epoch": 0.6649155894551493, "grad_norm": 1.1666345199549633, "learning_rate": 2.452915163827271e-06, "loss": 0.0228, "step": 159355 }, { "epoch": 0.6649364521701396, "grad_norm": 1.1954034391553563, "learning_rate": 2.452876682561629e-06, "loss": 0.027, "step": 159360 }, { "epoch": 0.6649573148851299, "grad_norm": 0.67539985721805, "learning_rate": 2.452838203107009e-06, "loss": 0.0198, "step": 159365 }, { "epoch": 0.6649781776001201, "grad_norm": 0.7598110147958849, "learning_rate": 2.4527997254632692e-06, "loss": 0.0215, "step": 159370 }, { "epoch": 0.6649990403151105, "grad_norm": 0.437076805413674, "learning_rate": 2.452761249630267e-06, "loss": 0.0281, "step": 159375 }, { "epoch": 0.6650199030301007, "grad_norm": 0.4969061219012626, "learning_rate": 2.4527227756078617e-06, "loss": 0.0267, "step": 159380 }, { "epoch": 0.665040765745091, "grad_norm": 0.6772382280140926, "learning_rate": 2.45268430339591e-06, "loss": 0.0181, "step": 159385 }, { "epoch": 0.6650616284600813, "grad_norm": 0.6418056394400793, "learning_rate": 2.452645832994271e-06, "loss": 0.0223, "step": 159390 }, { "epoch": 0.6650824911750716, "grad_norm": 0.8582025562326419, "learning_rate": 2.4526073644028023e-06, "loss": 0.0197, "step": 159395 }, { "epoch": 0.6651033538900618, "grad_norm": 0.8652841727793847, "learning_rate": 2.452568897621361e-06, "loss": 0.0219, "step": 159400 }, { "epoch": 0.6651242166050522, "grad_norm": 0.9310963961426703, "learning_rate": 2.452530432649807e-06, "loss": 0.0165, "step": 159405 }, { "epoch": 0.6651450793200424, "grad_norm": 0.43625826744294555, "learning_rate": 2.452491969487997e-06, "loss": 0.0186, "step": 159410 }, { "epoch": 0.6651659420350327, "grad_norm": 0.6872292783749611, "learning_rate": 2.45245350813579e-06, "loss": 0.0208, "step": 159415 }, { "epoch": 0.6651868047500229, "grad_norm": 0.6952721001683814, "learning_rate": 2.452415048593043e-06, "loss": 0.0233, "step": 159420 }, { "epoch": 0.6652076674650133, "grad_norm": 0.5531851592243773, "learning_rate": 2.4523765908596148e-06, "loss": 0.0197, "step": 159425 }, { "epoch": 0.6652285301800035, "grad_norm": 0.445883944114224, "learning_rate": 2.4523381349353635e-06, "loss": 0.0261, "step": 159430 }, { "epoch": 0.6652493928949937, "grad_norm": 0.5789211619419188, "learning_rate": 2.4522996808201476e-06, "loss": 0.0215, "step": 159435 }, { "epoch": 0.6652702556099841, "grad_norm": 0.9524990295196428, "learning_rate": 2.4522612285138248e-06, "loss": 0.0269, "step": 159440 }, { "epoch": 0.6652911183249743, "grad_norm": 0.8771676962403384, "learning_rate": 2.452222778016254e-06, "loss": 0.0187, "step": 159445 }, { "epoch": 0.6653119810399646, "grad_norm": 0.7695930146788086, "learning_rate": 2.4521843293272916e-06, "loss": 0.0212, "step": 159450 }, { "epoch": 0.6653328437549549, "grad_norm": 3.0163342883944932, "learning_rate": 2.4521458824467976e-06, "loss": 0.021, "step": 159455 }, { "epoch": 0.6653537064699452, "grad_norm": 0.41020745389454316, "learning_rate": 2.4521074373746295e-06, "loss": 0.0169, "step": 159460 }, { "epoch": 0.6653745691849354, "grad_norm": 1.535505746864038, "learning_rate": 2.4520689941106464e-06, "loss": 0.0314, "step": 159465 }, { "epoch": 0.6653954318999258, "grad_norm": 1.344579470702778, "learning_rate": 2.4520305526547047e-06, "loss": 0.0262, "step": 159470 }, { "epoch": 0.665416294614916, "grad_norm": 0.5754162352515259, "learning_rate": 2.4519921130066645e-06, "loss": 0.0187, "step": 159475 }, { "epoch": 0.6654371573299063, "grad_norm": 0.6307007061358008, "learning_rate": 2.4519536751663833e-06, "loss": 0.0223, "step": 159480 }, { "epoch": 0.6654580200448965, "grad_norm": 0.7433944356849197, "learning_rate": 2.4519152391337195e-06, "loss": 0.0166, "step": 159485 }, { "epoch": 0.6654788827598869, "grad_norm": 0.9137866682969529, "learning_rate": 2.4518768049085317e-06, "loss": 0.0212, "step": 159490 }, { "epoch": 0.6654997454748771, "grad_norm": 0.6852231788981392, "learning_rate": 2.451838372490678e-06, "loss": 0.0222, "step": 159495 }, { "epoch": 0.6655206081898674, "grad_norm": 1.1385130696246342, "learning_rate": 2.451799941880017e-06, "loss": 0.0236, "step": 159500 }, { "epoch": 0.6655414709048577, "grad_norm": 0.7034170753230745, "learning_rate": 2.451761513076406e-06, "loss": 0.0244, "step": 159505 }, { "epoch": 0.665562333619848, "grad_norm": 0.6072392662658228, "learning_rate": 2.4517230860797052e-06, "loss": 0.0252, "step": 159510 }, { "epoch": 0.6655831963348382, "grad_norm": 0.27558966459964396, "learning_rate": 2.4516846608897717e-06, "loss": 0.0195, "step": 159515 }, { "epoch": 0.6656040590498286, "grad_norm": 0.42546895648048166, "learning_rate": 2.451646237506464e-06, "loss": 0.0157, "step": 159520 }, { "epoch": 0.6656249217648188, "grad_norm": 0.5301626453267875, "learning_rate": 2.4516078159296407e-06, "loss": 0.019, "step": 159525 }, { "epoch": 0.665645784479809, "grad_norm": 0.8838961465061433, "learning_rate": 2.451569396159161e-06, "loss": 0.0191, "step": 159530 }, { "epoch": 0.6656666471947993, "grad_norm": 0.2787939187449067, "learning_rate": 2.4515309781948825e-06, "loss": 0.0203, "step": 159535 }, { "epoch": 0.6656875099097896, "grad_norm": 0.854686367189885, "learning_rate": 2.4514925620366635e-06, "loss": 0.0298, "step": 159540 }, { "epoch": 0.6657083726247799, "grad_norm": 0.8913144012011396, "learning_rate": 2.4514541476843628e-06, "loss": 0.0349, "step": 159545 }, { "epoch": 0.6657292353397701, "grad_norm": 0.7940585530500708, "learning_rate": 2.4514157351378397e-06, "loss": 0.0219, "step": 159550 }, { "epoch": 0.6657500980547605, "grad_norm": 1.1446329049003596, "learning_rate": 2.4513773243969517e-06, "loss": 0.0283, "step": 159555 }, { "epoch": 0.6657709607697507, "grad_norm": 0.9012566282905973, "learning_rate": 2.4513389154615575e-06, "loss": 0.0266, "step": 159560 }, { "epoch": 0.665791823484741, "grad_norm": 0.959532796048384, "learning_rate": 2.4513005083315163e-06, "loss": 0.0206, "step": 159565 }, { "epoch": 0.6658126861997313, "grad_norm": 1.1900247613716846, "learning_rate": 2.451262103006686e-06, "loss": 0.0263, "step": 159570 }, { "epoch": 0.6658335489147216, "grad_norm": 0.7153791031576632, "learning_rate": 2.451223699486925e-06, "loss": 0.0232, "step": 159575 }, { "epoch": 0.6658544116297118, "grad_norm": 1.436103351545811, "learning_rate": 2.451185297772093e-06, "loss": 0.0362, "step": 159580 }, { "epoch": 0.6658752743447022, "grad_norm": 1.0275269033775487, "learning_rate": 2.4511468978620478e-06, "loss": 0.0255, "step": 159585 }, { "epoch": 0.6658961370596924, "grad_norm": 0.7508067089532108, "learning_rate": 2.4511084997566475e-06, "loss": 0.0208, "step": 159590 }, { "epoch": 0.6659169997746827, "grad_norm": 0.4775335642145794, "learning_rate": 2.451070103455752e-06, "loss": 0.0247, "step": 159595 }, { "epoch": 0.6659378624896729, "grad_norm": 0.489729606376604, "learning_rate": 2.4510317089592192e-06, "loss": 0.0194, "step": 159600 }, { "epoch": 0.6659587252046633, "grad_norm": 1.009819323207174, "learning_rate": 2.450993316266908e-06, "loss": 0.0259, "step": 159605 }, { "epoch": 0.6659795879196535, "grad_norm": 1.322491149722251, "learning_rate": 2.4509549253786764e-06, "loss": 0.0244, "step": 159610 }, { "epoch": 0.6660004506346437, "grad_norm": 0.7674992400215176, "learning_rate": 2.4509165362943846e-06, "loss": 0.0233, "step": 159615 }, { "epoch": 0.6660213133496341, "grad_norm": 0.5852282803132279, "learning_rate": 2.450878149013891e-06, "loss": 0.0212, "step": 159620 }, { "epoch": 0.6660421760646243, "grad_norm": 0.9144050345991895, "learning_rate": 2.450839763537053e-06, "loss": 0.0356, "step": 159625 }, { "epoch": 0.6660630387796146, "grad_norm": 0.9841423666502819, "learning_rate": 2.4508013798637296e-06, "loss": 0.0136, "step": 159630 }, { "epoch": 0.666083901494605, "grad_norm": 0.6337885554333755, "learning_rate": 2.450762997993782e-06, "loss": 0.0213, "step": 159635 }, { "epoch": 0.6661047642095952, "grad_norm": 1.0473319637472491, "learning_rate": 2.450724617927066e-06, "loss": 0.0226, "step": 159640 }, { "epoch": 0.6661256269245854, "grad_norm": 0.7018109083427657, "learning_rate": 2.4506862396634414e-06, "loss": 0.0227, "step": 159645 }, { "epoch": 0.6661464896395758, "grad_norm": 0.6719696051777357, "learning_rate": 2.450647863202768e-06, "loss": 0.0186, "step": 159650 }, { "epoch": 0.666167352354566, "grad_norm": 0.9661451336387462, "learning_rate": 2.4506094885449035e-06, "loss": 0.0239, "step": 159655 }, { "epoch": 0.6661882150695563, "grad_norm": 0.8054502831290917, "learning_rate": 2.450571115689707e-06, "loss": 0.0205, "step": 159660 }, { "epoch": 0.6662090777845465, "grad_norm": 0.9443399790694298, "learning_rate": 2.450532744637037e-06, "loss": 0.0272, "step": 159665 }, { "epoch": 0.6662299404995369, "grad_norm": 0.5586404035563742, "learning_rate": 2.4504943753867537e-06, "loss": 0.0198, "step": 159670 }, { "epoch": 0.6662508032145271, "grad_norm": 0.7660232450414033, "learning_rate": 2.4504560079387152e-06, "loss": 0.0241, "step": 159675 }, { "epoch": 0.6662716659295174, "grad_norm": 0.5419435472851029, "learning_rate": 2.4504176422927796e-06, "loss": 0.0247, "step": 159680 }, { "epoch": 0.6662925286445077, "grad_norm": 0.41112651200409434, "learning_rate": 2.4503792784488075e-06, "loss": 0.0182, "step": 159685 }, { "epoch": 0.666313391359498, "grad_norm": 0.4249773933030574, "learning_rate": 2.4503409164066565e-06, "loss": 0.026, "step": 159690 }, { "epoch": 0.6663342540744882, "grad_norm": 0.9839031578173802, "learning_rate": 2.4503025561661858e-06, "loss": 0.0264, "step": 159695 }, { "epoch": 0.6663551167894786, "grad_norm": 0.4302322686440934, "learning_rate": 2.450264197727255e-06, "loss": 0.0194, "step": 159700 }, { "epoch": 0.6663759795044688, "grad_norm": 0.63346871197542, "learning_rate": 2.4502258410897227e-06, "loss": 0.0203, "step": 159705 }, { "epoch": 0.666396842219459, "grad_norm": 0.638825628847907, "learning_rate": 2.450187486253447e-06, "loss": 0.018, "step": 159710 }, { "epoch": 0.6664177049344493, "grad_norm": 0.6576124881384987, "learning_rate": 2.4501491332182887e-06, "loss": 0.0198, "step": 159715 }, { "epoch": 0.6664385676494397, "grad_norm": 0.8843254427429086, "learning_rate": 2.450110781984106e-06, "loss": 0.0233, "step": 159720 }, { "epoch": 0.6664594303644299, "grad_norm": 0.452263787333012, "learning_rate": 2.4500724325507573e-06, "loss": 0.0296, "step": 159725 }, { "epoch": 0.6664802930794201, "grad_norm": 0.9718956051924984, "learning_rate": 2.450034084918103e-06, "loss": 0.0207, "step": 159730 }, { "epoch": 0.6665011557944105, "grad_norm": 1.1189721865718152, "learning_rate": 2.449995739086001e-06, "loss": 0.0234, "step": 159735 }, { "epoch": 0.6665220185094007, "grad_norm": 0.39812248539678025, "learning_rate": 2.4499573950543113e-06, "loss": 0.0171, "step": 159740 }, { "epoch": 0.666542881224391, "grad_norm": 0.477354532246092, "learning_rate": 2.4499190528228918e-06, "loss": 0.0198, "step": 159745 }, { "epoch": 0.6665637439393813, "grad_norm": 0.7065845154026434, "learning_rate": 2.4498807123916028e-06, "loss": 0.0243, "step": 159750 }, { "epoch": 0.6665846066543716, "grad_norm": 0.8844014226418677, "learning_rate": 2.4498423737603037e-06, "loss": 0.0235, "step": 159755 }, { "epoch": 0.6666054693693618, "grad_norm": 2.962559534605086, "learning_rate": 2.449804036928852e-06, "loss": 0.0254, "step": 159760 }, { "epoch": 0.6666263320843522, "grad_norm": 1.3414922927006778, "learning_rate": 2.4497657018971085e-06, "loss": 0.0273, "step": 159765 }, { "epoch": 0.6666471947993424, "grad_norm": 0.6006022410625588, "learning_rate": 2.449727368664932e-06, "loss": 0.0217, "step": 159770 }, { "epoch": 0.6666680575143327, "grad_norm": 0.8278744094767632, "learning_rate": 2.4496890372321814e-06, "loss": 0.0242, "step": 159775 }, { "epoch": 0.6666889202293229, "grad_norm": 1.1532481940142278, "learning_rate": 2.449650707598715e-06, "loss": 0.0223, "step": 159780 }, { "epoch": 0.6667097829443133, "grad_norm": 0.5307980054869956, "learning_rate": 2.4496123797643942e-06, "loss": 0.0182, "step": 159785 }, { "epoch": 0.6667306456593035, "grad_norm": 0.48296981776456127, "learning_rate": 2.4495740537290773e-06, "loss": 0.0179, "step": 159790 }, { "epoch": 0.6667515083742938, "grad_norm": 0.9112685492722026, "learning_rate": 2.449535729492623e-06, "loss": 0.0211, "step": 159795 }, { "epoch": 0.6667723710892841, "grad_norm": 0.7443454091149904, "learning_rate": 2.4494974070548903e-06, "loss": 0.0191, "step": 159800 }, { "epoch": 0.6667932338042744, "grad_norm": 0.8520206333405472, "learning_rate": 2.44945908641574e-06, "loss": 0.0252, "step": 159805 }, { "epoch": 0.6668140965192646, "grad_norm": 0.7416273784021915, "learning_rate": 2.44942076757503e-06, "loss": 0.0274, "step": 159810 }, { "epoch": 0.666834959234255, "grad_norm": 0.5857270425457828, "learning_rate": 2.449382450532621e-06, "loss": 0.0213, "step": 159815 }, { "epoch": 0.6668558219492452, "grad_norm": 0.7049282976724285, "learning_rate": 2.4493441352883708e-06, "loss": 0.0262, "step": 159820 }, { "epoch": 0.6668766846642354, "grad_norm": 0.4972510851217223, "learning_rate": 2.4493058218421403e-06, "loss": 0.0239, "step": 159825 }, { "epoch": 0.6668975473792258, "grad_norm": 0.6948380031273778, "learning_rate": 2.4492675101937877e-06, "loss": 0.023, "step": 159830 }, { "epoch": 0.666918410094216, "grad_norm": 0.7840628295725035, "learning_rate": 2.4492292003431727e-06, "loss": 0.0235, "step": 159835 }, { "epoch": 0.6669392728092063, "grad_norm": 0.5269821035943455, "learning_rate": 2.449190892290155e-06, "loss": 0.0216, "step": 159840 }, { "epoch": 0.6669601355241965, "grad_norm": 0.5736060198180504, "learning_rate": 2.449152586034594e-06, "loss": 0.0195, "step": 159845 }, { "epoch": 0.6669809982391869, "grad_norm": 0.7395629237885832, "learning_rate": 2.4491142815763488e-06, "loss": 0.022, "step": 159850 }, { "epoch": 0.6670018609541771, "grad_norm": 0.6317555417398028, "learning_rate": 2.4490759789152787e-06, "loss": 0.0347, "step": 159855 }, { "epoch": 0.6670227236691674, "grad_norm": 0.48314591738181784, "learning_rate": 2.449037678051244e-06, "loss": 0.0211, "step": 159860 }, { "epoch": 0.6670435863841577, "grad_norm": 1.0282620775715143, "learning_rate": 2.4489993789841035e-06, "loss": 0.0191, "step": 159865 }, { "epoch": 0.667064449099148, "grad_norm": 0.552900138540396, "learning_rate": 2.448961081713717e-06, "loss": 0.0204, "step": 159870 }, { "epoch": 0.6670853118141382, "grad_norm": 0.5608223995015729, "learning_rate": 2.4489227862399444e-06, "loss": 0.0178, "step": 159875 }, { "epoch": 0.6671061745291286, "grad_norm": 0.5910644071038471, "learning_rate": 2.448884492562644e-06, "loss": 0.0211, "step": 159880 }, { "epoch": 0.6671270372441188, "grad_norm": 1.2239538247717878, "learning_rate": 2.448846200681676e-06, "loss": 0.0346, "step": 159885 }, { "epoch": 0.6671478999591091, "grad_norm": 0.7627430656360144, "learning_rate": 2.448807910596901e-06, "loss": 0.0249, "step": 159890 }, { "epoch": 0.6671687626740993, "grad_norm": 1.0600067433211633, "learning_rate": 2.448769622308177e-06, "loss": 0.029, "step": 159895 }, { "epoch": 0.6671896253890897, "grad_norm": 0.6450448453093238, "learning_rate": 2.4487313358153643e-06, "loss": 0.0229, "step": 159900 }, { "epoch": 0.6672104881040799, "grad_norm": 0.724255469968386, "learning_rate": 2.448693051118322e-06, "loss": 0.0208, "step": 159905 }, { "epoch": 0.6672313508190701, "grad_norm": 0.47717109640618155, "learning_rate": 2.448654768216911e-06, "loss": 0.0229, "step": 159910 }, { "epoch": 0.6672522135340605, "grad_norm": 1.0544206357494108, "learning_rate": 2.448616487110989e-06, "loss": 0.0234, "step": 159915 }, { "epoch": 0.6672730762490507, "grad_norm": 0.4659976357407452, "learning_rate": 2.448578207800418e-06, "loss": 0.0198, "step": 159920 }, { "epoch": 0.667293938964041, "grad_norm": 0.6863197168683121, "learning_rate": 2.4485399302850564e-06, "loss": 0.0144, "step": 159925 }, { "epoch": 0.6673148016790313, "grad_norm": 1.0116828042090311, "learning_rate": 2.448501654564763e-06, "loss": 0.0317, "step": 159930 }, { "epoch": 0.6673356643940216, "grad_norm": 0.6248004472350925, "learning_rate": 2.4484633806393986e-06, "loss": 0.0212, "step": 159935 }, { "epoch": 0.6673565271090118, "grad_norm": 1.0623529652056853, "learning_rate": 2.448425108508823e-06, "loss": 0.0317, "step": 159940 }, { "epoch": 0.6673773898240022, "grad_norm": 1.1214268644337368, "learning_rate": 2.4483868381728956e-06, "loss": 0.0182, "step": 159945 }, { "epoch": 0.6673982525389924, "grad_norm": 0.27738165060579345, "learning_rate": 2.4483485696314764e-06, "loss": 0.0228, "step": 159950 }, { "epoch": 0.6674191152539827, "grad_norm": 0.5108622566864366, "learning_rate": 2.4483103028844246e-06, "loss": 0.0221, "step": 159955 }, { "epoch": 0.6674399779689729, "grad_norm": 0.5529610318257645, "learning_rate": 2.4482720379316007e-06, "loss": 0.0238, "step": 159960 }, { "epoch": 0.6674608406839633, "grad_norm": 0.8080966781670157, "learning_rate": 2.4482337747728642e-06, "loss": 0.0149, "step": 159965 }, { "epoch": 0.6674817033989535, "grad_norm": 0.4516918798012471, "learning_rate": 2.4481955134080745e-06, "loss": 0.0164, "step": 159970 }, { "epoch": 0.6675025661139438, "grad_norm": 0.49845515409141283, "learning_rate": 2.448157253837092e-06, "loss": 0.0193, "step": 159975 }, { "epoch": 0.6675234288289341, "grad_norm": 0.43747614920936057, "learning_rate": 2.4481189960597766e-06, "loss": 0.0203, "step": 159980 }, { "epoch": 0.6675442915439244, "grad_norm": 0.7165629318481086, "learning_rate": 2.4480807400759872e-06, "loss": 0.0247, "step": 159985 }, { "epoch": 0.6675651542589146, "grad_norm": 0.3954805778167268, "learning_rate": 2.4480424858855848e-06, "loss": 0.0206, "step": 159990 }, { "epoch": 0.667586016973905, "grad_norm": 0.592676643925988, "learning_rate": 2.448004233488429e-06, "loss": 0.0182, "step": 159995 }, { "epoch": 0.6676068796888952, "grad_norm": 0.859124599426169, "learning_rate": 2.4479659828843792e-06, "loss": 0.0224, "step": 160000 }, { "epoch": 0.6676277424038854, "grad_norm": 0.5249445385580276, "learning_rate": 2.447927734073296e-06, "loss": 0.0213, "step": 160005 }, { "epoch": 0.6676486051188758, "grad_norm": 0.6110434075571867, "learning_rate": 2.4478894870550385e-06, "loss": 0.018, "step": 160010 }, { "epoch": 0.667669467833866, "grad_norm": 0.6176727399174288, "learning_rate": 2.4478512418294675e-06, "loss": 0.0238, "step": 160015 }, { "epoch": 0.6676903305488563, "grad_norm": 0.7447482582683654, "learning_rate": 2.447812998396442e-06, "loss": 0.026, "step": 160020 }, { "epoch": 0.6677111932638465, "grad_norm": 0.4949354928436639, "learning_rate": 2.447774756755823e-06, "loss": 0.0202, "step": 160025 }, { "epoch": 0.6677320559788369, "grad_norm": 0.6790021517945136, "learning_rate": 2.4477365169074706e-06, "loss": 0.0212, "step": 160030 }, { "epoch": 0.6677529186938271, "grad_norm": 0.3959281849815433, "learning_rate": 2.4476982788512433e-06, "loss": 0.0258, "step": 160035 }, { "epoch": 0.6677737814088174, "grad_norm": 0.2787676813820676, "learning_rate": 2.4476600425870027e-06, "loss": 0.0162, "step": 160040 }, { "epoch": 0.6677946441238077, "grad_norm": 0.4927091940210396, "learning_rate": 2.447621808114608e-06, "loss": 0.0203, "step": 160045 }, { "epoch": 0.667815506838798, "grad_norm": 1.110848308106551, "learning_rate": 2.447583575433919e-06, "loss": 0.0263, "step": 160050 }, { "epoch": 0.6678363695537882, "grad_norm": 0.37461927969702324, "learning_rate": 2.4475453445447972e-06, "loss": 0.0273, "step": 160055 }, { "epoch": 0.6678572322687786, "grad_norm": 1.0426098476827452, "learning_rate": 2.447507115447101e-06, "loss": 0.024, "step": 160060 }, { "epoch": 0.6678780949837688, "grad_norm": 0.4151644251750149, "learning_rate": 2.4474688881406914e-06, "loss": 0.0204, "step": 160065 }, { "epoch": 0.6678989576987591, "grad_norm": 0.668253148215216, "learning_rate": 2.447430662625429e-06, "loss": 0.028, "step": 160070 }, { "epoch": 0.6679198204137493, "grad_norm": 0.6961787786864907, "learning_rate": 2.4473924389011723e-06, "loss": 0.0186, "step": 160075 }, { "epoch": 0.6679406831287397, "grad_norm": 0.517450173381563, "learning_rate": 2.4473542169677834e-06, "loss": 0.0233, "step": 160080 }, { "epoch": 0.6679615458437299, "grad_norm": 0.834655007286157, "learning_rate": 2.447315996825121e-06, "loss": 0.0237, "step": 160085 }, { "epoch": 0.6679824085587202, "grad_norm": 0.7062778053452029, "learning_rate": 2.4472777784730454e-06, "loss": 0.0238, "step": 160090 }, { "epoch": 0.6680032712737105, "grad_norm": 0.522711603818075, "learning_rate": 2.447239561911418e-06, "loss": 0.0224, "step": 160095 }, { "epoch": 0.6680241339887008, "grad_norm": 0.33732240877980424, "learning_rate": 2.4472013471400975e-06, "loss": 0.0225, "step": 160100 }, { "epoch": 0.668044996703691, "grad_norm": 0.38603969768700624, "learning_rate": 2.447163134158945e-06, "loss": 0.0259, "step": 160105 }, { "epoch": 0.6680658594186814, "grad_norm": 0.7576975841128054, "learning_rate": 2.44712492296782e-06, "loss": 0.0364, "step": 160110 }, { "epoch": 0.6680867221336716, "grad_norm": 0.4318391695398007, "learning_rate": 2.447086713566584e-06, "loss": 0.0194, "step": 160115 }, { "epoch": 0.6681075848486618, "grad_norm": 0.6575449525977523, "learning_rate": 2.4470485059550967e-06, "loss": 0.0196, "step": 160120 }, { "epoch": 0.6681284475636522, "grad_norm": 1.0601549628888127, "learning_rate": 2.4470103001332172e-06, "loss": 0.029, "step": 160125 }, { "epoch": 0.6681493102786424, "grad_norm": 0.6226605328425345, "learning_rate": 2.446972096100808e-06, "loss": 0.0269, "step": 160130 }, { "epoch": 0.6681701729936327, "grad_norm": 0.3953052899773282, "learning_rate": 2.4469338938577274e-06, "loss": 0.0266, "step": 160135 }, { "epoch": 0.6681910357086229, "grad_norm": 0.6522526388108116, "learning_rate": 2.4468956934038367e-06, "loss": 0.0212, "step": 160140 }, { "epoch": 0.6682118984236133, "grad_norm": 0.8895722604329505, "learning_rate": 2.446857494738996e-06, "loss": 0.02, "step": 160145 }, { "epoch": 0.6682327611386035, "grad_norm": 0.4172251772362308, "learning_rate": 2.446819297863066e-06, "loss": 0.0167, "step": 160150 }, { "epoch": 0.6682536238535938, "grad_norm": 0.3586875334968996, "learning_rate": 2.4467811027759066e-06, "loss": 0.0241, "step": 160155 }, { "epoch": 0.6682744865685841, "grad_norm": 0.4035267867614915, "learning_rate": 2.446742909477379e-06, "loss": 0.0131, "step": 160160 }, { "epoch": 0.6682953492835744, "grad_norm": 0.9795148000439217, "learning_rate": 2.446704717967343e-06, "loss": 0.029, "step": 160165 }, { "epoch": 0.6683162119985646, "grad_norm": 0.9099027106864206, "learning_rate": 2.4466665282456585e-06, "loss": 0.0274, "step": 160170 }, { "epoch": 0.668337074713555, "grad_norm": 0.762808289568082, "learning_rate": 2.4466283403121866e-06, "loss": 0.0186, "step": 160175 }, { "epoch": 0.6683579374285452, "grad_norm": 0.82359664611813, "learning_rate": 2.446590154166788e-06, "loss": 0.0327, "step": 160180 }, { "epoch": 0.6683788001435355, "grad_norm": 0.41944796669381945, "learning_rate": 2.4465519698093228e-06, "loss": 0.0179, "step": 160185 }, { "epoch": 0.6683996628585258, "grad_norm": 0.41598756677435117, "learning_rate": 2.446513787239651e-06, "loss": 0.02, "step": 160190 }, { "epoch": 0.6684205255735161, "grad_norm": 1.0346658799274746, "learning_rate": 2.446475606457634e-06, "loss": 0.0177, "step": 160195 }, { "epoch": 0.6684413882885063, "grad_norm": 0.9032933122347914, "learning_rate": 2.4464374274631317e-06, "loss": 0.0226, "step": 160200 }, { "epoch": 0.6684622510034965, "grad_norm": 0.9733592307885163, "learning_rate": 2.446399250256005e-06, "loss": 0.0205, "step": 160205 }, { "epoch": 0.6684831137184869, "grad_norm": 0.8868366150420168, "learning_rate": 2.4463610748361143e-06, "loss": 0.0171, "step": 160210 }, { "epoch": 0.6685039764334771, "grad_norm": 0.28352911541671183, "learning_rate": 2.44632290120332e-06, "loss": 0.0218, "step": 160215 }, { "epoch": 0.6685248391484674, "grad_norm": 0.7587092456677665, "learning_rate": 2.446284729357483e-06, "loss": 0.02, "step": 160220 }, { "epoch": 0.6685457018634577, "grad_norm": 0.6485145583913087, "learning_rate": 2.4462465592984635e-06, "loss": 0.0164, "step": 160225 }, { "epoch": 0.668566564578448, "grad_norm": 0.6964890989292626, "learning_rate": 2.4462083910261217e-06, "loss": 0.0278, "step": 160230 }, { "epoch": 0.6685874272934382, "grad_norm": 0.319741221500338, "learning_rate": 2.4461702245403197e-06, "loss": 0.018, "step": 160235 }, { "epoch": 0.6686082900084286, "grad_norm": 1.078098952596537, "learning_rate": 2.446132059840917e-06, "loss": 0.024, "step": 160240 }, { "epoch": 0.6686291527234188, "grad_norm": 0.9935683263110199, "learning_rate": 2.4460938969277743e-06, "loss": 0.028, "step": 160245 }, { "epoch": 0.6686500154384091, "grad_norm": 0.9449795309194757, "learning_rate": 2.446055735800753e-06, "loss": 0.0303, "step": 160250 }, { "epoch": 0.6686708781533993, "grad_norm": 0.8052020452259144, "learning_rate": 2.446017576459713e-06, "loss": 0.0207, "step": 160255 }, { "epoch": 0.6686917408683897, "grad_norm": 0.6250319623847406, "learning_rate": 2.4459794189045148e-06, "loss": 0.0202, "step": 160260 }, { "epoch": 0.6687126035833799, "grad_norm": 0.7406187534503804, "learning_rate": 2.4459412631350196e-06, "loss": 0.0189, "step": 160265 }, { "epoch": 0.6687334662983702, "grad_norm": 1.1055023127921135, "learning_rate": 2.4459031091510886e-06, "loss": 0.0212, "step": 160270 }, { "epoch": 0.6687543290133605, "grad_norm": 0.6576599336892455, "learning_rate": 2.445864956952581e-06, "loss": 0.0291, "step": 160275 }, { "epoch": 0.6687751917283508, "grad_norm": 0.477544222386468, "learning_rate": 2.4458268065393596e-06, "loss": 0.0193, "step": 160280 }, { "epoch": 0.668796054443341, "grad_norm": 0.8405663831921474, "learning_rate": 2.445788657911284e-06, "loss": 0.0261, "step": 160285 }, { "epoch": 0.6688169171583314, "grad_norm": 0.6576470686203906, "learning_rate": 2.4457505110682146e-06, "loss": 0.0175, "step": 160290 }, { "epoch": 0.6688377798733216, "grad_norm": 0.26201937347413107, "learning_rate": 2.445712366010013e-06, "loss": 0.0179, "step": 160295 }, { "epoch": 0.6688586425883118, "grad_norm": 1.0696924600101276, "learning_rate": 2.4456742227365396e-06, "loss": 0.0158, "step": 160300 }, { "epoch": 0.6688795053033022, "grad_norm": 1.2960330220926641, "learning_rate": 2.4456360812476558e-06, "loss": 0.0243, "step": 160305 }, { "epoch": 0.6689003680182924, "grad_norm": 0.5737930720171146, "learning_rate": 2.4455979415432217e-06, "loss": 0.026, "step": 160310 }, { "epoch": 0.6689212307332827, "grad_norm": 0.36579882117980483, "learning_rate": 2.4455598036230985e-06, "loss": 0.0162, "step": 160315 }, { "epoch": 0.6689420934482729, "grad_norm": 0.5355130010510956, "learning_rate": 2.445521667487147e-06, "loss": 0.0254, "step": 160320 }, { "epoch": 0.6689629561632633, "grad_norm": 0.42828204779871487, "learning_rate": 2.4454835331352284e-06, "loss": 0.0243, "step": 160325 }, { "epoch": 0.6689838188782535, "grad_norm": 0.6058746194817222, "learning_rate": 2.4454454005672028e-06, "loss": 0.0177, "step": 160330 }, { "epoch": 0.6690046815932438, "grad_norm": 0.7489156230829533, "learning_rate": 2.4454072697829324e-06, "loss": 0.0207, "step": 160335 }, { "epoch": 0.6690255443082341, "grad_norm": 0.7892208722356262, "learning_rate": 2.445369140782277e-06, "loss": 0.0209, "step": 160340 }, { "epoch": 0.6690464070232244, "grad_norm": 0.6077861134666226, "learning_rate": 2.4453310135650975e-06, "loss": 0.02, "step": 160345 }, { "epoch": 0.6690672697382146, "grad_norm": 0.8869783265751949, "learning_rate": 2.445292888131256e-06, "loss": 0.0189, "step": 160350 }, { "epoch": 0.669088132453205, "grad_norm": 1.086152768583764, "learning_rate": 2.445254764480613e-06, "loss": 0.0247, "step": 160355 }, { "epoch": 0.6691089951681952, "grad_norm": 0.6257759954536563, "learning_rate": 2.445216642613029e-06, "loss": 0.0248, "step": 160360 }, { "epoch": 0.6691298578831855, "grad_norm": 1.0873076750506845, "learning_rate": 2.4451785225283653e-06, "loss": 0.0231, "step": 160365 }, { "epoch": 0.6691507205981758, "grad_norm": 0.7872466765670221, "learning_rate": 2.4451404042264828e-06, "loss": 0.03, "step": 160370 }, { "epoch": 0.6691715833131661, "grad_norm": 0.9801260909535922, "learning_rate": 2.445102287707243e-06, "loss": 0.0214, "step": 160375 }, { "epoch": 0.6691924460281563, "grad_norm": 0.5597260753221581, "learning_rate": 2.4450641729705064e-06, "loss": 0.0194, "step": 160380 }, { "epoch": 0.6692133087431466, "grad_norm": 0.8492064050475389, "learning_rate": 2.4450260600161343e-06, "loss": 0.0276, "step": 160385 }, { "epoch": 0.6692341714581369, "grad_norm": 0.5991833346753994, "learning_rate": 2.444987948843988e-06, "loss": 0.0135, "step": 160390 }, { "epoch": 0.6692550341731272, "grad_norm": 0.5177338545594937, "learning_rate": 2.4449498394539285e-06, "loss": 0.0205, "step": 160395 }, { "epoch": 0.6692758968881174, "grad_norm": 0.7143733496960952, "learning_rate": 2.4449117318458166e-06, "loss": 0.012, "step": 160400 }, { "epoch": 0.6692967596031077, "grad_norm": 0.5700892112641399, "learning_rate": 2.444873626019514e-06, "loss": 0.016, "step": 160405 }, { "epoch": 0.669317622318098, "grad_norm": 0.5908931871884001, "learning_rate": 2.4448355219748814e-06, "loss": 0.0219, "step": 160410 }, { "epoch": 0.6693384850330882, "grad_norm": 0.7487904658329946, "learning_rate": 2.4447974197117795e-06, "loss": 0.0271, "step": 160415 }, { "epoch": 0.6693593477480786, "grad_norm": 0.23752922592222617, "learning_rate": 2.4447593192300713e-06, "loss": 0.0183, "step": 160420 }, { "epoch": 0.6693802104630688, "grad_norm": 0.5783383277142744, "learning_rate": 2.4447212205296162e-06, "loss": 0.0199, "step": 160425 }, { "epoch": 0.6694010731780591, "grad_norm": 0.4424073111132938, "learning_rate": 2.4446831236102757e-06, "loss": 0.0202, "step": 160430 }, { "epoch": 0.6694219358930493, "grad_norm": 1.865077408460611, "learning_rate": 2.4446450284719114e-06, "loss": 0.024, "step": 160435 }, { "epoch": 0.6694427986080397, "grad_norm": 0.9538554658455579, "learning_rate": 2.4446069351143847e-06, "loss": 0.0243, "step": 160440 }, { "epoch": 0.6694636613230299, "grad_norm": 0.4835104325795023, "learning_rate": 2.444568843537556e-06, "loss": 0.0169, "step": 160445 }, { "epoch": 0.6694845240380202, "grad_norm": 0.47567492209374584, "learning_rate": 2.4445307537412878e-06, "loss": 0.0172, "step": 160450 }, { "epoch": 0.6695053867530105, "grad_norm": 0.4638145458977206, "learning_rate": 2.444492665725441e-06, "loss": 0.0148, "step": 160455 }, { "epoch": 0.6695262494680008, "grad_norm": 0.919734407111777, "learning_rate": 2.444454579489876e-06, "loss": 0.0257, "step": 160460 }, { "epoch": 0.669547112182991, "grad_norm": 0.5289627502633484, "learning_rate": 2.4444164950344552e-06, "loss": 0.0203, "step": 160465 }, { "epoch": 0.6695679748979814, "grad_norm": 0.5904514969675773, "learning_rate": 2.4443784123590394e-06, "loss": 0.0277, "step": 160470 }, { "epoch": 0.6695888376129716, "grad_norm": 0.6346953709625436, "learning_rate": 2.4443403314634904e-06, "loss": 0.0199, "step": 160475 }, { "epoch": 0.6696097003279619, "grad_norm": 0.774115263211988, "learning_rate": 2.4443022523476682e-06, "loss": 0.0233, "step": 160480 }, { "epoch": 0.6696305630429522, "grad_norm": 0.4001692911418799, "learning_rate": 2.444264175011436e-06, "loss": 0.0212, "step": 160485 }, { "epoch": 0.6696514257579425, "grad_norm": 1.2784345156039354, "learning_rate": 2.444226099454655e-06, "loss": 0.024, "step": 160490 }, { "epoch": 0.6696722884729327, "grad_norm": 0.40912503669339045, "learning_rate": 2.4441880256771845e-06, "loss": 0.0177, "step": 160495 }, { "epoch": 0.6696931511879229, "grad_norm": 0.3751627355365374, "learning_rate": 2.4441499536788883e-06, "loss": 0.0191, "step": 160500 }, { "epoch": 0.6697140139029133, "grad_norm": 0.9257283775474731, "learning_rate": 2.444111883459627e-06, "loss": 0.0188, "step": 160505 }, { "epoch": 0.6697348766179035, "grad_norm": 0.7111538336837269, "learning_rate": 2.444073815019262e-06, "loss": 0.0182, "step": 160510 }, { "epoch": 0.6697557393328938, "grad_norm": 0.9495119713835575, "learning_rate": 2.4440357483576545e-06, "loss": 0.0218, "step": 160515 }, { "epoch": 0.6697766020478841, "grad_norm": 0.7416777226398603, "learning_rate": 2.4439976834746662e-06, "loss": 0.0183, "step": 160520 }, { "epoch": 0.6697974647628744, "grad_norm": 0.9431974232690162, "learning_rate": 2.443959620370159e-06, "loss": 0.0244, "step": 160525 }, { "epoch": 0.6698183274778646, "grad_norm": 0.822240078347945, "learning_rate": 2.443921559043994e-06, "loss": 0.0204, "step": 160530 }, { "epoch": 0.669839190192855, "grad_norm": 0.7833782165893755, "learning_rate": 2.4438834994960326e-06, "loss": 0.0237, "step": 160535 }, { "epoch": 0.6698600529078452, "grad_norm": 0.9303325432631772, "learning_rate": 2.4438454417261366e-06, "loss": 0.0327, "step": 160540 }, { "epoch": 0.6698809156228355, "grad_norm": 1.071596167779208, "learning_rate": 2.4438073857341677e-06, "loss": 0.0248, "step": 160545 }, { "epoch": 0.6699017783378258, "grad_norm": 0.5737107919302751, "learning_rate": 2.443769331519987e-06, "loss": 0.0189, "step": 160550 }, { "epoch": 0.6699226410528161, "grad_norm": 0.889694046471483, "learning_rate": 2.4437312790834565e-06, "loss": 0.0204, "step": 160555 }, { "epoch": 0.6699435037678063, "grad_norm": 0.6825738521267496, "learning_rate": 2.443693228424438e-06, "loss": 0.0287, "step": 160560 }, { "epoch": 0.6699643664827966, "grad_norm": 0.6177588532063302, "learning_rate": 2.443655179542792e-06, "loss": 0.0238, "step": 160565 }, { "epoch": 0.6699852291977869, "grad_norm": 0.38734644507032784, "learning_rate": 2.4436171324383816e-06, "loss": 0.0164, "step": 160570 }, { "epoch": 0.6700060919127772, "grad_norm": 0.3474892340055525, "learning_rate": 2.4435790871110675e-06, "loss": 0.0306, "step": 160575 }, { "epoch": 0.6700269546277674, "grad_norm": 1.4110734917990948, "learning_rate": 2.443541043560712e-06, "loss": 0.0293, "step": 160580 }, { "epoch": 0.6700478173427578, "grad_norm": 0.9848485813814225, "learning_rate": 2.443503001787176e-06, "loss": 0.0174, "step": 160585 }, { "epoch": 0.670068680057748, "grad_norm": 0.4182782786737613, "learning_rate": 2.4434649617903214e-06, "loss": 0.0207, "step": 160590 }, { "epoch": 0.6700895427727382, "grad_norm": 0.4240629857132061, "learning_rate": 2.443426923570011e-06, "loss": 0.0152, "step": 160595 }, { "epoch": 0.6701104054877286, "grad_norm": 0.8280145063178708, "learning_rate": 2.4433888871261048e-06, "loss": 0.0231, "step": 160600 }, { "epoch": 0.6701312682027188, "grad_norm": 1.0842030639236035, "learning_rate": 2.4433508524584657e-06, "loss": 0.0253, "step": 160605 }, { "epoch": 0.6701521309177091, "grad_norm": 0.4678106771616561, "learning_rate": 2.4433128195669547e-06, "loss": 0.0198, "step": 160610 }, { "epoch": 0.6701729936326993, "grad_norm": 1.2110536924842128, "learning_rate": 2.443274788451435e-06, "loss": 0.0212, "step": 160615 }, { "epoch": 0.6701938563476897, "grad_norm": 1.145564693430474, "learning_rate": 2.443236759111766e-06, "loss": 0.0228, "step": 160620 }, { "epoch": 0.6702147190626799, "grad_norm": 0.6957088075406279, "learning_rate": 2.443198731547812e-06, "loss": 0.0328, "step": 160625 }, { "epoch": 0.6702355817776702, "grad_norm": 0.5538976407244559, "learning_rate": 2.4431607057594335e-06, "loss": 0.0224, "step": 160630 }, { "epoch": 0.6702564444926605, "grad_norm": 0.5840219637729338, "learning_rate": 2.4431226817464922e-06, "loss": 0.0226, "step": 160635 }, { "epoch": 0.6702773072076508, "grad_norm": 0.45897283746708206, "learning_rate": 2.4430846595088507e-06, "loss": 0.0302, "step": 160640 }, { "epoch": 0.670298169922641, "grad_norm": 0.5596603529372733, "learning_rate": 2.44304663904637e-06, "loss": 0.033, "step": 160645 }, { "epoch": 0.6703190326376314, "grad_norm": 0.8377169836719167, "learning_rate": 2.4430086203589125e-06, "loss": 0.0208, "step": 160650 }, { "epoch": 0.6703398953526216, "grad_norm": 0.5003565536184915, "learning_rate": 2.44297060344634e-06, "loss": 0.0242, "step": 160655 }, { "epoch": 0.6703607580676119, "grad_norm": 1.1712759623654678, "learning_rate": 2.4429325883085152e-06, "loss": 0.0208, "step": 160660 }, { "epoch": 0.6703816207826022, "grad_norm": 0.5139270235125013, "learning_rate": 2.442894574945298e-06, "loss": 0.0229, "step": 160665 }, { "epoch": 0.6704024834975925, "grad_norm": 0.842357675484315, "learning_rate": 2.442856563356552e-06, "loss": 0.0238, "step": 160670 }, { "epoch": 0.6704233462125827, "grad_norm": 0.950612373242348, "learning_rate": 2.4428185535421388e-06, "loss": 0.019, "step": 160675 }, { "epoch": 0.670444208927573, "grad_norm": 0.46956581956529286, "learning_rate": 2.4427805455019206e-06, "loss": 0.0229, "step": 160680 }, { "epoch": 0.6704650716425633, "grad_norm": 0.5670205307807779, "learning_rate": 2.4427425392357582e-06, "loss": 0.024, "step": 160685 }, { "epoch": 0.6704859343575535, "grad_norm": 0.8982439460150179, "learning_rate": 2.4427045347435154e-06, "loss": 0.0182, "step": 160690 }, { "epoch": 0.6705067970725438, "grad_norm": 0.9434846605204638, "learning_rate": 2.4426665320250528e-06, "loss": 0.0256, "step": 160695 }, { "epoch": 0.6705276597875341, "grad_norm": 0.8678648781190262, "learning_rate": 2.4426285310802326e-06, "loss": 0.021, "step": 160700 }, { "epoch": 0.6705485225025244, "grad_norm": 0.4649042478753342, "learning_rate": 2.442590531908917e-06, "loss": 0.0212, "step": 160705 }, { "epoch": 0.6705693852175146, "grad_norm": 0.9132219783479508, "learning_rate": 2.442552534510969e-06, "loss": 0.0228, "step": 160710 }, { "epoch": 0.670590247932505, "grad_norm": 0.8040584311420491, "learning_rate": 2.4425145388862496e-06, "loss": 0.0213, "step": 160715 }, { "epoch": 0.6706111106474952, "grad_norm": 0.5696586413656428, "learning_rate": 2.442476545034621e-06, "loss": 0.0208, "step": 160720 }, { "epoch": 0.6706319733624855, "grad_norm": 0.31457331032414326, "learning_rate": 2.4424385529559455e-06, "loss": 0.0178, "step": 160725 }, { "epoch": 0.6706528360774758, "grad_norm": 0.84548192590538, "learning_rate": 2.4424005626500853e-06, "loss": 0.0267, "step": 160730 }, { "epoch": 0.6706736987924661, "grad_norm": 0.5176417480520425, "learning_rate": 2.442362574116902e-06, "loss": 0.0172, "step": 160735 }, { "epoch": 0.6706945615074563, "grad_norm": 0.6226896807819021, "learning_rate": 2.4423245873562576e-06, "loss": 0.0191, "step": 160740 }, { "epoch": 0.6707154242224466, "grad_norm": 0.740111229137167, "learning_rate": 2.4422866023680157e-06, "loss": 0.0172, "step": 160745 }, { "epoch": 0.6707362869374369, "grad_norm": 0.5382466260371793, "learning_rate": 2.4422486191520377e-06, "loss": 0.0188, "step": 160750 }, { "epoch": 0.6707571496524272, "grad_norm": 0.7131913661176569, "learning_rate": 2.442210637708185e-06, "loss": 0.0269, "step": 160755 }, { "epoch": 0.6707780123674174, "grad_norm": 1.1509950549994026, "learning_rate": 2.4421726580363206e-06, "loss": 0.0244, "step": 160760 }, { "epoch": 0.6707988750824078, "grad_norm": 0.596094492756889, "learning_rate": 2.442134680136307e-06, "loss": 0.018, "step": 160765 }, { "epoch": 0.670819737797398, "grad_norm": 0.6628631227979519, "learning_rate": 2.4420967040080055e-06, "loss": 0.0204, "step": 160770 }, { "epoch": 0.6708406005123883, "grad_norm": 0.5935598382589252, "learning_rate": 2.4420587296512794e-06, "loss": 0.0239, "step": 160775 }, { "epoch": 0.6708614632273786, "grad_norm": 0.5715106524220026, "learning_rate": 2.44202075706599e-06, "loss": 0.0256, "step": 160780 }, { "epoch": 0.6708823259423689, "grad_norm": 0.4377155744145787, "learning_rate": 2.441982786252e-06, "loss": 0.0179, "step": 160785 }, { "epoch": 0.6709031886573591, "grad_norm": 0.30503290236542113, "learning_rate": 2.4419448172091715e-06, "loss": 0.0231, "step": 160790 }, { "epoch": 0.6709240513723493, "grad_norm": 0.743754190528419, "learning_rate": 2.4419068499373674e-06, "loss": 0.0163, "step": 160795 }, { "epoch": 0.6709449140873397, "grad_norm": 0.6512491689235271, "learning_rate": 2.44186888443645e-06, "loss": 0.028, "step": 160800 }, { "epoch": 0.6709657768023299, "grad_norm": 1.1360388667446184, "learning_rate": 2.4418309207062806e-06, "loss": 0.0247, "step": 160805 }, { "epoch": 0.6709866395173202, "grad_norm": 0.37473105415417685, "learning_rate": 2.4417929587467224e-06, "loss": 0.0207, "step": 160810 }, { "epoch": 0.6710075022323105, "grad_norm": 0.677281539222629, "learning_rate": 2.441754998557638e-06, "loss": 0.0238, "step": 160815 }, { "epoch": 0.6710283649473008, "grad_norm": 0.49124084754258474, "learning_rate": 2.4417170401388887e-06, "loss": 0.0182, "step": 160820 }, { "epoch": 0.671049227662291, "grad_norm": 0.6510834377869028, "learning_rate": 2.441679083490338e-06, "loss": 0.0237, "step": 160825 }, { "epoch": 0.6710700903772814, "grad_norm": 0.561144073062954, "learning_rate": 2.4416411286118476e-06, "loss": 0.0177, "step": 160830 }, { "epoch": 0.6710909530922716, "grad_norm": 0.7428899057436447, "learning_rate": 2.4416031755032805e-06, "loss": 0.0267, "step": 160835 }, { "epoch": 0.6711118158072619, "grad_norm": 0.5406738380595805, "learning_rate": 2.4415652241644994e-06, "loss": 0.0231, "step": 160840 }, { "epoch": 0.6711326785222522, "grad_norm": 0.4244721010733465, "learning_rate": 2.4415272745953654e-06, "loss": 0.0201, "step": 160845 }, { "epoch": 0.6711535412372425, "grad_norm": 0.6757067756447039, "learning_rate": 2.4414893267957424e-06, "loss": 0.0358, "step": 160850 }, { "epoch": 0.6711744039522327, "grad_norm": 0.4878641673581681, "learning_rate": 2.4414513807654917e-06, "loss": 0.0222, "step": 160855 }, { "epoch": 0.671195266667223, "grad_norm": 0.4707090025207698, "learning_rate": 2.441413436504477e-06, "loss": 0.0242, "step": 160860 }, { "epoch": 0.6712161293822133, "grad_norm": 0.6080696462373094, "learning_rate": 2.4413754940125592e-06, "loss": 0.021, "step": 160865 }, { "epoch": 0.6712369920972036, "grad_norm": 0.6721165641806951, "learning_rate": 2.4413375532896035e-06, "loss": 0.0234, "step": 160870 }, { "epoch": 0.6712578548121938, "grad_norm": 0.845126580735641, "learning_rate": 2.44129961433547e-06, "loss": 0.0218, "step": 160875 }, { "epoch": 0.6712787175271842, "grad_norm": 0.7856071434301365, "learning_rate": 2.441261677150022e-06, "loss": 0.025, "step": 160880 }, { "epoch": 0.6712995802421744, "grad_norm": 1.1810060349063871, "learning_rate": 2.441223741733122e-06, "loss": 0.027, "step": 160885 }, { "epoch": 0.6713204429571646, "grad_norm": 0.571125370210119, "learning_rate": 2.4411858080846324e-06, "loss": 0.0188, "step": 160890 }, { "epoch": 0.671341305672155, "grad_norm": 0.853487132737083, "learning_rate": 2.441147876204417e-06, "loss": 0.0196, "step": 160895 }, { "epoch": 0.6713621683871452, "grad_norm": 0.7449134015854664, "learning_rate": 2.4411099460923373e-06, "loss": 0.0224, "step": 160900 }, { "epoch": 0.6713830311021355, "grad_norm": 0.4332540065765878, "learning_rate": 2.4410720177482556e-06, "loss": 0.0249, "step": 160905 }, { "epoch": 0.6714038938171258, "grad_norm": 0.48128700989896456, "learning_rate": 2.441034091172036e-06, "loss": 0.025, "step": 160910 }, { "epoch": 0.6714247565321161, "grad_norm": 0.5229799922633258, "learning_rate": 2.4409961663635405e-06, "loss": 0.0222, "step": 160915 }, { "epoch": 0.6714456192471063, "grad_norm": 0.7124492216082046, "learning_rate": 2.4409582433226313e-06, "loss": 0.0355, "step": 160920 }, { "epoch": 0.6714664819620966, "grad_norm": 0.46202857412147424, "learning_rate": 2.4409203220491707e-06, "loss": 0.0178, "step": 160925 }, { "epoch": 0.6714873446770869, "grad_norm": 0.5749417752498823, "learning_rate": 2.440882402543023e-06, "loss": 0.0194, "step": 160930 }, { "epoch": 0.6715082073920772, "grad_norm": 0.3759659371399354, "learning_rate": 2.44084448480405e-06, "loss": 0.0232, "step": 160935 }, { "epoch": 0.6715290701070674, "grad_norm": 0.5515151935933251, "learning_rate": 2.4408065688321143e-06, "loss": 0.0153, "step": 160940 }, { "epoch": 0.6715499328220578, "grad_norm": 0.5786848164801615, "learning_rate": 2.440768654627079e-06, "loss": 0.0171, "step": 160945 }, { "epoch": 0.671570795537048, "grad_norm": 0.5595907286791366, "learning_rate": 2.4407307421888067e-06, "loss": 0.0262, "step": 160950 }, { "epoch": 0.6715916582520383, "grad_norm": 0.8661290695104393, "learning_rate": 2.4406928315171603e-06, "loss": 0.0217, "step": 160955 }, { "epoch": 0.6716125209670286, "grad_norm": 0.2771257641122052, "learning_rate": 2.440654922612003e-06, "loss": 0.0208, "step": 160960 }, { "epoch": 0.6716333836820189, "grad_norm": 0.6623407754607157, "learning_rate": 2.440617015473196e-06, "loss": 0.0207, "step": 160965 }, { "epoch": 0.6716542463970091, "grad_norm": 0.679245204687068, "learning_rate": 2.4405791101006044e-06, "loss": 0.0145, "step": 160970 }, { "epoch": 0.6716751091119993, "grad_norm": 0.5950648370715869, "learning_rate": 2.440541206494089e-06, "loss": 0.0151, "step": 160975 }, { "epoch": 0.6716959718269897, "grad_norm": 0.9961460504717675, "learning_rate": 2.440503304653514e-06, "loss": 0.0191, "step": 160980 }, { "epoch": 0.67171683454198, "grad_norm": 0.5059868612458277, "learning_rate": 2.4404654045787425e-06, "loss": 0.0214, "step": 160985 }, { "epoch": 0.6717376972569702, "grad_norm": 0.8461142642073817, "learning_rate": 2.4404275062696364e-06, "loss": 0.0247, "step": 160990 }, { "epoch": 0.6717585599719605, "grad_norm": 0.28635212097209306, "learning_rate": 2.4403896097260583e-06, "loss": 0.0125, "step": 160995 }, { "epoch": 0.6717794226869508, "grad_norm": 0.46371310809998717, "learning_rate": 2.4403517149478726e-06, "loss": 0.0217, "step": 161000 }, { "epoch": 0.671800285401941, "grad_norm": 0.7587862729819835, "learning_rate": 2.4403138219349416e-06, "loss": 0.0198, "step": 161005 }, { "epoch": 0.6718211481169314, "grad_norm": 0.6650477602447997, "learning_rate": 2.4402759306871274e-06, "loss": 0.0179, "step": 161010 }, { "epoch": 0.6718420108319216, "grad_norm": 0.6979707069058002, "learning_rate": 2.440238041204294e-06, "loss": 0.0212, "step": 161015 }, { "epoch": 0.6718628735469119, "grad_norm": 0.7217603999209975, "learning_rate": 2.4402001534863045e-06, "loss": 0.0235, "step": 161020 }, { "epoch": 0.6718837362619022, "grad_norm": 0.4826786799219257, "learning_rate": 2.4401622675330204e-06, "loss": 0.0164, "step": 161025 }, { "epoch": 0.6719045989768925, "grad_norm": 0.9912522736776668, "learning_rate": 2.4401243833443064e-06, "loss": 0.0224, "step": 161030 }, { "epoch": 0.6719254616918827, "grad_norm": 0.5789172573777743, "learning_rate": 2.4400865009200246e-06, "loss": 0.0228, "step": 161035 }, { "epoch": 0.671946324406873, "grad_norm": 1.073057262536154, "learning_rate": 2.440048620260039e-06, "loss": 0.0305, "step": 161040 }, { "epoch": 0.6719671871218633, "grad_norm": 0.31282685889100453, "learning_rate": 2.4400107413642117e-06, "loss": 0.0206, "step": 161045 }, { "epoch": 0.6719880498368536, "grad_norm": 0.32847636574891137, "learning_rate": 2.4399728642324057e-06, "loss": 0.017, "step": 161050 }, { "epoch": 0.6720089125518438, "grad_norm": 0.8652468460791365, "learning_rate": 2.439934988864485e-06, "loss": 0.0223, "step": 161055 }, { "epoch": 0.6720297752668342, "grad_norm": 0.35967038830337766, "learning_rate": 2.4398971152603114e-06, "loss": 0.0228, "step": 161060 }, { "epoch": 0.6720506379818244, "grad_norm": 0.6810419595766076, "learning_rate": 2.439859243419749e-06, "loss": 0.0185, "step": 161065 }, { "epoch": 0.6720715006968146, "grad_norm": 0.5866133842990161, "learning_rate": 2.4398213733426607e-06, "loss": 0.0284, "step": 161070 }, { "epoch": 0.672092363411805, "grad_norm": 0.4827901170367182, "learning_rate": 2.43978350502891e-06, "loss": 0.0216, "step": 161075 }, { "epoch": 0.6721132261267952, "grad_norm": 1.108344762312236, "learning_rate": 2.4397456384783596e-06, "loss": 0.0187, "step": 161080 }, { "epoch": 0.6721340888417855, "grad_norm": 1.0616959555588048, "learning_rate": 2.439707773690872e-06, "loss": 0.021, "step": 161085 }, { "epoch": 0.6721549515567757, "grad_norm": 0.6397488193349083, "learning_rate": 2.4396699106663126e-06, "loss": 0.0209, "step": 161090 }, { "epoch": 0.6721758142717661, "grad_norm": 1.4794285367975797, "learning_rate": 2.4396320494045425e-06, "loss": 0.0277, "step": 161095 }, { "epoch": 0.6721966769867563, "grad_norm": 0.9393070455023842, "learning_rate": 2.4395941899054256e-06, "loss": 0.0251, "step": 161100 }, { "epoch": 0.6722175397017466, "grad_norm": 0.4742339995362431, "learning_rate": 2.439556332168824e-06, "loss": 0.022, "step": 161105 }, { "epoch": 0.6722384024167369, "grad_norm": 0.5997125564589199, "learning_rate": 2.4395184761946035e-06, "loss": 0.0228, "step": 161110 }, { "epoch": 0.6722592651317272, "grad_norm": 0.41290943930774615, "learning_rate": 2.4394806219826256e-06, "loss": 0.0271, "step": 161115 }, { "epoch": 0.6722801278467174, "grad_norm": 0.6855134589616079, "learning_rate": 2.4394427695327536e-06, "loss": 0.0255, "step": 161120 }, { "epoch": 0.6723009905617078, "grad_norm": 0.31851442471312563, "learning_rate": 2.439404918844852e-06, "loss": 0.0169, "step": 161125 }, { "epoch": 0.672321853276698, "grad_norm": 0.9613848165841045, "learning_rate": 2.4393670699187823e-06, "loss": 0.0212, "step": 161130 }, { "epoch": 0.6723427159916883, "grad_norm": 0.41328039009827006, "learning_rate": 2.4393292227544086e-06, "loss": 0.0259, "step": 161135 }, { "epoch": 0.6723635787066786, "grad_norm": 0.8418310965851796, "learning_rate": 2.4392913773515954e-06, "loss": 0.0199, "step": 161140 }, { "epoch": 0.6723844414216689, "grad_norm": 0.419802890803234, "learning_rate": 2.4392535337102045e-06, "loss": 0.0185, "step": 161145 }, { "epoch": 0.6724053041366591, "grad_norm": 0.6932642422190981, "learning_rate": 2.4392156918301e-06, "loss": 0.0247, "step": 161150 }, { "epoch": 0.6724261668516494, "grad_norm": 0.6033499873377743, "learning_rate": 2.4391778517111444e-06, "loss": 0.0157, "step": 161155 }, { "epoch": 0.6724470295666397, "grad_norm": 0.6021595826783471, "learning_rate": 2.439140013353203e-06, "loss": 0.0248, "step": 161160 }, { "epoch": 0.67246789228163, "grad_norm": 0.5728471083959661, "learning_rate": 2.439102176756137e-06, "loss": 0.0208, "step": 161165 }, { "epoch": 0.6724887549966202, "grad_norm": 0.789119751851926, "learning_rate": 2.439064341919811e-06, "loss": 0.0262, "step": 161170 }, { "epoch": 0.6725096177116106, "grad_norm": 0.5570921380836785, "learning_rate": 2.4390265088440885e-06, "loss": 0.0241, "step": 161175 }, { "epoch": 0.6725304804266008, "grad_norm": 0.4584302342486348, "learning_rate": 2.4389886775288325e-06, "loss": 0.0191, "step": 161180 }, { "epoch": 0.672551343141591, "grad_norm": 0.481801538736265, "learning_rate": 2.4389508479739066e-06, "loss": 0.0248, "step": 161185 }, { "epoch": 0.6725722058565814, "grad_norm": 0.9366519021998623, "learning_rate": 2.438913020179175e-06, "loss": 0.0196, "step": 161190 }, { "epoch": 0.6725930685715716, "grad_norm": 1.0497318641518183, "learning_rate": 2.4388751941445005e-06, "loss": 0.0196, "step": 161195 }, { "epoch": 0.6726139312865619, "grad_norm": 0.6099080393903302, "learning_rate": 2.4388373698697463e-06, "loss": 0.0231, "step": 161200 }, { "epoch": 0.6726347940015522, "grad_norm": 0.4077685344557093, "learning_rate": 2.4387995473547764e-06, "loss": 0.0186, "step": 161205 }, { "epoch": 0.6726556567165425, "grad_norm": 1.0421050055234922, "learning_rate": 2.4387617265994547e-06, "loss": 0.016, "step": 161210 }, { "epoch": 0.6726765194315327, "grad_norm": 0.4853879519832992, "learning_rate": 2.4387239076036444e-06, "loss": 0.0186, "step": 161215 }, { "epoch": 0.672697382146523, "grad_norm": 2.905916652012245, "learning_rate": 2.4386860903672085e-06, "loss": 0.0227, "step": 161220 }, { "epoch": 0.6727182448615133, "grad_norm": 1.2630182514157917, "learning_rate": 2.4386482748900116e-06, "loss": 0.0303, "step": 161225 }, { "epoch": 0.6727391075765036, "grad_norm": 0.6574230860674091, "learning_rate": 2.438610461171916e-06, "loss": 0.0272, "step": 161230 }, { "epoch": 0.6727599702914938, "grad_norm": 0.5715917881257011, "learning_rate": 2.438572649212787e-06, "loss": 0.022, "step": 161235 }, { "epoch": 0.6727808330064842, "grad_norm": 0.4460058293393094, "learning_rate": 2.438534839012487e-06, "loss": 0.024, "step": 161240 }, { "epoch": 0.6728016957214744, "grad_norm": 0.7844367588896213, "learning_rate": 2.43849703057088e-06, "loss": 0.0232, "step": 161245 }, { "epoch": 0.6728225584364647, "grad_norm": 0.5037783931321569, "learning_rate": 2.43845922388783e-06, "loss": 0.0211, "step": 161250 }, { "epoch": 0.672843421151455, "grad_norm": 0.6089786875319824, "learning_rate": 2.4384214189632003e-06, "loss": 0.0189, "step": 161255 }, { "epoch": 0.6728642838664453, "grad_norm": 0.6848055730960864, "learning_rate": 2.4383836157968545e-06, "loss": 0.0213, "step": 161260 }, { "epoch": 0.6728851465814355, "grad_norm": 1.0915768313330405, "learning_rate": 2.438345814388657e-06, "loss": 0.0259, "step": 161265 }, { "epoch": 0.6729060092964257, "grad_norm": 0.6701249228520659, "learning_rate": 2.4383080147384705e-06, "loss": 0.0214, "step": 161270 }, { "epoch": 0.6729268720114161, "grad_norm": 0.8328809557930743, "learning_rate": 2.4382702168461596e-06, "loss": 0.028, "step": 161275 }, { "epoch": 0.6729477347264063, "grad_norm": 0.4340074912841656, "learning_rate": 2.4382324207115874e-06, "loss": 0.0204, "step": 161280 }, { "epoch": 0.6729685974413966, "grad_norm": 0.6978420882994432, "learning_rate": 2.438194626334618e-06, "loss": 0.0203, "step": 161285 }, { "epoch": 0.6729894601563869, "grad_norm": 0.808167546421438, "learning_rate": 2.4381568337151154e-06, "loss": 0.0219, "step": 161290 }, { "epoch": 0.6730103228713772, "grad_norm": 0.7843138161275319, "learning_rate": 2.438119042852943e-06, "loss": 0.0233, "step": 161295 }, { "epoch": 0.6730311855863674, "grad_norm": 0.3661706765724983, "learning_rate": 2.438081253747965e-06, "loss": 0.0318, "step": 161300 }, { "epoch": 0.6730520483013578, "grad_norm": 0.5464286902846158, "learning_rate": 2.4380434664000445e-06, "loss": 0.0161, "step": 161305 }, { "epoch": 0.673072911016348, "grad_norm": 0.4814526286645287, "learning_rate": 2.438005680809046e-06, "loss": 0.0207, "step": 161310 }, { "epoch": 0.6730937737313383, "grad_norm": 0.7144075020041055, "learning_rate": 2.4379678969748335e-06, "loss": 0.0202, "step": 161315 }, { "epoch": 0.6731146364463286, "grad_norm": 1.0701037620691771, "learning_rate": 2.4379301148972702e-06, "loss": 0.0195, "step": 161320 }, { "epoch": 0.6731354991613189, "grad_norm": 0.5274385066439146, "learning_rate": 2.4378923345762208e-06, "loss": 0.0267, "step": 161325 }, { "epoch": 0.6731563618763091, "grad_norm": 0.8339288625955337, "learning_rate": 2.4378545560115483e-06, "loss": 0.0275, "step": 161330 }, { "epoch": 0.6731772245912994, "grad_norm": 0.7608275632042608, "learning_rate": 2.4378167792031174e-06, "loss": 0.0187, "step": 161335 }, { "epoch": 0.6731980873062897, "grad_norm": 0.5934823218264926, "learning_rate": 2.4377790041507916e-06, "loss": 0.0255, "step": 161340 }, { "epoch": 0.67321895002128, "grad_norm": 0.6801717076762109, "learning_rate": 2.437741230854435e-06, "loss": 0.0196, "step": 161345 }, { "epoch": 0.6732398127362702, "grad_norm": 0.7409973209719055, "learning_rate": 2.4377034593139116e-06, "loss": 0.0189, "step": 161350 }, { "epoch": 0.6732606754512606, "grad_norm": 0.6667100747003959, "learning_rate": 2.4376656895290855e-06, "loss": 0.0236, "step": 161355 }, { "epoch": 0.6732815381662508, "grad_norm": 0.775972652106655, "learning_rate": 2.43762792149982e-06, "loss": 0.0164, "step": 161360 }, { "epoch": 0.673302400881241, "grad_norm": 0.6234966146798608, "learning_rate": 2.4375901552259796e-06, "loss": 0.0243, "step": 161365 }, { "epoch": 0.6733232635962314, "grad_norm": 0.7255701601135668, "learning_rate": 2.4375523907074284e-06, "loss": 0.0195, "step": 161370 }, { "epoch": 0.6733441263112216, "grad_norm": 0.6340781569814276, "learning_rate": 2.4375146279440303e-06, "loss": 0.0149, "step": 161375 }, { "epoch": 0.6733649890262119, "grad_norm": 0.5693592447774016, "learning_rate": 2.4374768669356496e-06, "loss": 0.0206, "step": 161380 }, { "epoch": 0.6733858517412022, "grad_norm": 0.9485236847744745, "learning_rate": 2.4374391076821498e-06, "loss": 0.0215, "step": 161385 }, { "epoch": 0.6734067144561925, "grad_norm": 0.9854864200260746, "learning_rate": 2.437401350183396e-06, "loss": 0.0188, "step": 161390 }, { "epoch": 0.6734275771711827, "grad_norm": 0.2690792391115014, "learning_rate": 2.437363594439251e-06, "loss": 0.0183, "step": 161395 }, { "epoch": 0.673448439886173, "grad_norm": 0.4364090449343359, "learning_rate": 2.43732584044958e-06, "loss": 0.0189, "step": 161400 }, { "epoch": 0.6734693026011633, "grad_norm": 0.5164793962970328, "learning_rate": 2.4372880882142463e-06, "loss": 0.019, "step": 161405 }, { "epoch": 0.6734901653161536, "grad_norm": 0.5850730847925523, "learning_rate": 2.4372503377331143e-06, "loss": 0.0231, "step": 161410 }, { "epoch": 0.6735110280311438, "grad_norm": 1.3911254911855189, "learning_rate": 2.4372125890060486e-06, "loss": 0.0215, "step": 161415 }, { "epoch": 0.6735318907461342, "grad_norm": 0.7745947682983383, "learning_rate": 2.437174842032913e-06, "loss": 0.0381, "step": 161420 }, { "epoch": 0.6735527534611244, "grad_norm": 0.5066266093245698, "learning_rate": 2.4371370968135716e-06, "loss": 0.0183, "step": 161425 }, { "epoch": 0.6735736161761147, "grad_norm": 0.6567903610199562, "learning_rate": 2.4370993533478883e-06, "loss": 0.0224, "step": 161430 }, { "epoch": 0.673594478891105, "grad_norm": 0.5668891736379936, "learning_rate": 2.437061611635728e-06, "loss": 0.0206, "step": 161435 }, { "epoch": 0.6736153416060953, "grad_norm": 0.6397653034733728, "learning_rate": 2.4370238716769547e-06, "loss": 0.0219, "step": 161440 }, { "epoch": 0.6736362043210855, "grad_norm": 0.7529902520796338, "learning_rate": 2.436986133471433e-06, "loss": 0.0222, "step": 161445 }, { "epoch": 0.6736570670360758, "grad_norm": 1.7756018446553756, "learning_rate": 2.4369483970190265e-06, "loss": 0.0209, "step": 161450 }, { "epoch": 0.6736779297510661, "grad_norm": 0.7039061520639224, "learning_rate": 2.436910662319599e-06, "loss": 0.0224, "step": 161455 }, { "epoch": 0.6736987924660564, "grad_norm": 0.5083440884172381, "learning_rate": 2.436872929373016e-06, "loss": 0.0221, "step": 161460 }, { "epoch": 0.6737196551810466, "grad_norm": 0.5220324624555311, "learning_rate": 2.4368351981791416e-06, "loss": 0.029, "step": 161465 }, { "epoch": 0.673740517896037, "grad_norm": 0.2949362038118347, "learning_rate": 2.4367974687378396e-06, "loss": 0.0263, "step": 161470 }, { "epoch": 0.6737613806110272, "grad_norm": 0.4264878976333499, "learning_rate": 2.436759741048974e-06, "loss": 0.0191, "step": 161475 }, { "epoch": 0.6737822433260174, "grad_norm": 2.3184718395359547, "learning_rate": 2.43672201511241e-06, "loss": 0.0179, "step": 161480 }, { "epoch": 0.6738031060410078, "grad_norm": 0.7526154093876375, "learning_rate": 2.436684290928012e-06, "loss": 0.024, "step": 161485 }, { "epoch": 0.673823968755998, "grad_norm": 0.5839174426247931, "learning_rate": 2.4366465684956435e-06, "loss": 0.0242, "step": 161490 }, { "epoch": 0.6738448314709883, "grad_norm": 0.40811397579752706, "learning_rate": 2.4366088478151694e-06, "loss": 0.0218, "step": 161495 }, { "epoch": 0.6738656941859786, "grad_norm": 0.6412167163848996, "learning_rate": 2.4365711288864547e-06, "loss": 0.0185, "step": 161500 }, { "epoch": 0.6738865569009689, "grad_norm": 0.5923892044065046, "learning_rate": 2.4365334117093626e-06, "loss": 0.0195, "step": 161505 }, { "epoch": 0.6739074196159591, "grad_norm": 0.8596582712731446, "learning_rate": 2.436495696283758e-06, "loss": 0.0227, "step": 161510 }, { "epoch": 0.6739282823309494, "grad_norm": 0.49146304281278663, "learning_rate": 2.436457982609506e-06, "loss": 0.0215, "step": 161515 }, { "epoch": 0.6739491450459397, "grad_norm": 0.7307592651800452, "learning_rate": 2.4364202706864704e-06, "loss": 0.0239, "step": 161520 }, { "epoch": 0.67397000776093, "grad_norm": 0.7401715310415565, "learning_rate": 2.4363825605145157e-06, "loss": 0.0185, "step": 161525 }, { "epoch": 0.6739908704759202, "grad_norm": 0.6190356453198308, "learning_rate": 2.436344852093507e-06, "loss": 0.0285, "step": 161530 }, { "epoch": 0.6740117331909106, "grad_norm": 0.5932174273408815, "learning_rate": 2.436307145423308e-06, "loss": 0.0271, "step": 161535 }, { "epoch": 0.6740325959059008, "grad_norm": 0.6317475275757038, "learning_rate": 2.436269440503783e-06, "loss": 0.0195, "step": 161540 }, { "epoch": 0.674053458620891, "grad_norm": 0.23408381246526946, "learning_rate": 2.4362317373347973e-06, "loss": 0.0236, "step": 161545 }, { "epoch": 0.6740743213358814, "grad_norm": 0.5689859436606848, "learning_rate": 2.4361940359162158e-06, "loss": 0.0239, "step": 161550 }, { "epoch": 0.6740951840508717, "grad_norm": 0.38753647447409717, "learning_rate": 2.4361563362479025e-06, "loss": 0.0239, "step": 161555 }, { "epoch": 0.6741160467658619, "grad_norm": 0.7482922675620715, "learning_rate": 2.436118638329721e-06, "loss": 0.0273, "step": 161560 }, { "epoch": 0.6741369094808523, "grad_norm": 0.6876004862081988, "learning_rate": 2.4360809421615376e-06, "loss": 0.0231, "step": 161565 }, { "epoch": 0.6741577721958425, "grad_norm": 1.0457907939762405, "learning_rate": 2.4360432477432163e-06, "loss": 0.0194, "step": 161570 }, { "epoch": 0.6741786349108327, "grad_norm": 0.5757083696326125, "learning_rate": 2.4360055550746215e-06, "loss": 0.0297, "step": 161575 }, { "epoch": 0.674199497625823, "grad_norm": 0.8562910420527071, "learning_rate": 2.4359678641556177e-06, "loss": 0.0257, "step": 161580 }, { "epoch": 0.6742203603408133, "grad_norm": 0.6228958600519728, "learning_rate": 2.43593017498607e-06, "loss": 0.0226, "step": 161585 }, { "epoch": 0.6742412230558036, "grad_norm": 0.6216528472845422, "learning_rate": 2.4358924875658424e-06, "loss": 0.0215, "step": 161590 }, { "epoch": 0.6742620857707938, "grad_norm": 0.5048618204677281, "learning_rate": 2.4358548018948e-06, "loss": 0.0199, "step": 161595 }, { "epoch": 0.6742829484857842, "grad_norm": 0.5268507852352128, "learning_rate": 2.4358171179728082e-06, "loss": 0.0287, "step": 161600 }, { "epoch": 0.6743038112007744, "grad_norm": 0.4920635345757898, "learning_rate": 2.435779435799731e-06, "loss": 0.0185, "step": 161605 }, { "epoch": 0.6743246739157647, "grad_norm": 2.4824328871689416, "learning_rate": 2.4357417553754327e-06, "loss": 0.0295, "step": 161610 }, { "epoch": 0.674345536630755, "grad_norm": 0.7409431956292757, "learning_rate": 2.4357040766997786e-06, "loss": 0.021, "step": 161615 }, { "epoch": 0.6743663993457453, "grad_norm": 0.47799101719260845, "learning_rate": 2.4356663997726334e-06, "loss": 0.0232, "step": 161620 }, { "epoch": 0.6743872620607355, "grad_norm": 0.561577511834768, "learning_rate": 2.435628724593862e-06, "loss": 0.0253, "step": 161625 }, { "epoch": 0.6744081247757258, "grad_norm": 0.5912723563485922, "learning_rate": 2.435591051163329e-06, "loss": 0.0224, "step": 161630 }, { "epoch": 0.6744289874907161, "grad_norm": 0.5486488527708836, "learning_rate": 2.435553379480899e-06, "loss": 0.0247, "step": 161635 }, { "epoch": 0.6744498502057064, "grad_norm": 1.0185875989131352, "learning_rate": 2.435515709546437e-06, "loss": 0.0208, "step": 161640 }, { "epoch": 0.6744707129206966, "grad_norm": 0.968202947160405, "learning_rate": 2.4354780413598082e-06, "loss": 0.0223, "step": 161645 }, { "epoch": 0.674491575635687, "grad_norm": 0.8498563238286505, "learning_rate": 2.435440374920877e-06, "loss": 0.0181, "step": 161650 }, { "epoch": 0.6745124383506772, "grad_norm": 0.9434497730498987, "learning_rate": 2.435402710229508e-06, "loss": 0.0231, "step": 161655 }, { "epoch": 0.6745333010656674, "grad_norm": 1.0010088750324557, "learning_rate": 2.4353650472855665e-06, "loss": 0.0312, "step": 161660 }, { "epoch": 0.6745541637806578, "grad_norm": 0.5723516670717884, "learning_rate": 2.4353273860889175e-06, "loss": 0.0229, "step": 161665 }, { "epoch": 0.674575026495648, "grad_norm": 0.3862052135140388, "learning_rate": 2.4352897266394256e-06, "loss": 0.0156, "step": 161670 }, { "epoch": 0.6745958892106383, "grad_norm": 0.7767373922528625, "learning_rate": 2.435252068936956e-06, "loss": 0.0254, "step": 161675 }, { "epoch": 0.6746167519256286, "grad_norm": 1.2367968099215865, "learning_rate": 2.4352144129813734e-06, "loss": 0.0242, "step": 161680 }, { "epoch": 0.6746376146406189, "grad_norm": 0.7302670278390346, "learning_rate": 2.435176758772543e-06, "loss": 0.0265, "step": 161685 }, { "epoch": 0.6746584773556091, "grad_norm": 0.6466915728579339, "learning_rate": 2.4351391063103295e-06, "loss": 0.0266, "step": 161690 }, { "epoch": 0.6746793400705994, "grad_norm": 1.23237413853969, "learning_rate": 2.4351014555945973e-06, "loss": 0.0177, "step": 161695 }, { "epoch": 0.6747002027855897, "grad_norm": 0.5508345078582454, "learning_rate": 2.4350638066252124e-06, "loss": 0.0197, "step": 161700 }, { "epoch": 0.67472106550058, "grad_norm": 0.6036836058138361, "learning_rate": 2.4350261594020392e-06, "loss": 0.0248, "step": 161705 }, { "epoch": 0.6747419282155702, "grad_norm": 0.6365956867439609, "learning_rate": 2.4349885139249436e-06, "loss": 0.0285, "step": 161710 }, { "epoch": 0.6747627909305606, "grad_norm": 0.6963553038048436, "learning_rate": 2.434950870193789e-06, "loss": 0.0268, "step": 161715 }, { "epoch": 0.6747836536455508, "grad_norm": 0.33938468660290383, "learning_rate": 2.4349132282084424e-06, "loss": 0.0181, "step": 161720 }, { "epoch": 0.6748045163605411, "grad_norm": 0.6366077958477161, "learning_rate": 2.434875587968768e-06, "loss": 0.0277, "step": 161725 }, { "epoch": 0.6748253790755314, "grad_norm": 1.250404179671232, "learning_rate": 2.4348379494746297e-06, "loss": 0.0225, "step": 161730 }, { "epoch": 0.6748462417905217, "grad_norm": 0.5276429028451822, "learning_rate": 2.434800312725894e-06, "loss": 0.0196, "step": 161735 }, { "epoch": 0.6748671045055119, "grad_norm": 0.3457209807976389, "learning_rate": 2.4347626777224263e-06, "loss": 0.0171, "step": 161740 }, { "epoch": 0.6748879672205023, "grad_norm": 1.1345756986989677, "learning_rate": 2.4347250444640906e-06, "loss": 0.0408, "step": 161745 }, { "epoch": 0.6749088299354925, "grad_norm": 0.8345792694093681, "learning_rate": 2.4346874129507524e-06, "loss": 0.025, "step": 161750 }, { "epoch": 0.6749296926504827, "grad_norm": 0.6241403654729227, "learning_rate": 2.434649783182277e-06, "loss": 0.0236, "step": 161755 }, { "epoch": 0.674950555365473, "grad_norm": 0.6179754228664878, "learning_rate": 2.43461215515853e-06, "loss": 0.02, "step": 161760 }, { "epoch": 0.6749714180804633, "grad_norm": 0.45611704871352016, "learning_rate": 2.4345745288793755e-06, "loss": 0.0241, "step": 161765 }, { "epoch": 0.6749922807954536, "grad_norm": 1.0814004774699792, "learning_rate": 2.4345369043446795e-06, "loss": 0.0226, "step": 161770 }, { "epoch": 0.6750131435104438, "grad_norm": 0.6584605566519082, "learning_rate": 2.434499281554307e-06, "loss": 0.03, "step": 161775 }, { "epoch": 0.6750340062254342, "grad_norm": 0.951603058268316, "learning_rate": 2.4344616605081236e-06, "loss": 0.0199, "step": 161780 }, { "epoch": 0.6750548689404244, "grad_norm": 0.32097731158928566, "learning_rate": 2.4344240412059935e-06, "loss": 0.0283, "step": 161785 }, { "epoch": 0.6750757316554147, "grad_norm": 0.31484400844028493, "learning_rate": 2.434386423647783e-06, "loss": 0.0187, "step": 161790 }, { "epoch": 0.675096594370405, "grad_norm": 0.21273591344389495, "learning_rate": 2.4343488078333574e-06, "loss": 0.0212, "step": 161795 }, { "epoch": 0.6751174570853953, "grad_norm": 0.74432138726597, "learning_rate": 2.4343111937625808e-06, "loss": 0.0217, "step": 161800 }, { "epoch": 0.6751383198003855, "grad_norm": 0.9099195609238848, "learning_rate": 2.43427358143532e-06, "loss": 0.0175, "step": 161805 }, { "epoch": 0.6751591825153758, "grad_norm": 0.7141344917426636, "learning_rate": 2.434235970851439e-06, "loss": 0.0247, "step": 161810 }, { "epoch": 0.6751800452303661, "grad_norm": 1.3308370953531095, "learning_rate": 2.4341983620108036e-06, "loss": 0.0245, "step": 161815 }, { "epoch": 0.6752009079453564, "grad_norm": 0.3153051109316922, "learning_rate": 2.434160754913279e-06, "loss": 0.0277, "step": 161820 }, { "epoch": 0.6752217706603466, "grad_norm": 0.6080095040705682, "learning_rate": 2.434123149558732e-06, "loss": 0.0212, "step": 161825 }, { "epoch": 0.675242633375337, "grad_norm": 0.4484424867144793, "learning_rate": 2.4340855459470253e-06, "loss": 0.0155, "step": 161830 }, { "epoch": 0.6752634960903272, "grad_norm": 0.4849047570421155, "learning_rate": 2.4340479440780267e-06, "loss": 0.0297, "step": 161835 }, { "epoch": 0.6752843588053175, "grad_norm": 0.262870975105879, "learning_rate": 2.4340103439516003e-06, "loss": 0.0192, "step": 161840 }, { "epoch": 0.6753052215203078, "grad_norm": 0.28089481350481926, "learning_rate": 2.4339727455676116e-06, "loss": 0.0189, "step": 161845 }, { "epoch": 0.675326084235298, "grad_norm": 0.9296716640994055, "learning_rate": 2.4339351489259266e-06, "loss": 0.0233, "step": 161850 }, { "epoch": 0.6753469469502883, "grad_norm": 2.6579559731272013, "learning_rate": 2.433897554026411e-06, "loss": 0.0302, "step": 161855 }, { "epoch": 0.6753678096652787, "grad_norm": 0.42630999922700563, "learning_rate": 2.433859960868929e-06, "loss": 0.0181, "step": 161860 }, { "epoch": 0.6753886723802689, "grad_norm": 0.5471253811715837, "learning_rate": 2.433822369453346e-06, "loss": 0.0271, "step": 161865 }, { "epoch": 0.6754095350952591, "grad_norm": 0.6852457320734893, "learning_rate": 2.4337847797795293e-06, "loss": 0.026, "step": 161870 }, { "epoch": 0.6754303978102494, "grad_norm": 1.5061828666652672, "learning_rate": 2.433747191847343e-06, "loss": 0.027, "step": 161875 }, { "epoch": 0.6754512605252397, "grad_norm": 0.622087418522361, "learning_rate": 2.433709605656653e-06, "loss": 0.0238, "step": 161880 }, { "epoch": 0.67547212324023, "grad_norm": 1.5161940258933253, "learning_rate": 2.4336720212073246e-06, "loss": 0.0173, "step": 161885 }, { "epoch": 0.6754929859552202, "grad_norm": 0.4660170051349689, "learning_rate": 2.4336344384992235e-06, "loss": 0.0152, "step": 161890 }, { "epoch": 0.6755138486702106, "grad_norm": 0.8357444715541091, "learning_rate": 2.4335968575322156e-06, "loss": 0.0221, "step": 161895 }, { "epoch": 0.6755347113852008, "grad_norm": 0.7945330628677154, "learning_rate": 2.4335592783061657e-06, "loss": 0.0234, "step": 161900 }, { "epoch": 0.6755555741001911, "grad_norm": 0.29912921553708205, "learning_rate": 2.43352170082094e-06, "loss": 0.0144, "step": 161905 }, { "epoch": 0.6755764368151814, "grad_norm": 0.7980782264712005, "learning_rate": 2.4334841250764036e-06, "loss": 0.0181, "step": 161910 }, { "epoch": 0.6755972995301717, "grad_norm": 0.46801782681145654, "learning_rate": 2.4334465510724233e-06, "loss": 0.0242, "step": 161915 }, { "epoch": 0.6756181622451619, "grad_norm": 0.8493345261239603, "learning_rate": 2.4334089788088627e-06, "loss": 0.0267, "step": 161920 }, { "epoch": 0.6756390249601523, "grad_norm": 0.33685570722611, "learning_rate": 2.4333714082855898e-06, "loss": 0.0255, "step": 161925 }, { "epoch": 0.6756598876751425, "grad_norm": 0.8658594450946753, "learning_rate": 2.4333338395024685e-06, "loss": 0.0233, "step": 161930 }, { "epoch": 0.6756807503901328, "grad_norm": 0.4666232866963313, "learning_rate": 2.4332962724593647e-06, "loss": 0.0192, "step": 161935 }, { "epoch": 0.675701613105123, "grad_norm": 0.7741338498397616, "learning_rate": 2.4332587071561445e-06, "loss": 0.0168, "step": 161940 }, { "epoch": 0.6757224758201134, "grad_norm": 0.7198052771839878, "learning_rate": 2.433221143592674e-06, "loss": 0.0219, "step": 161945 }, { "epoch": 0.6757433385351036, "grad_norm": 0.6582795573205479, "learning_rate": 2.433183581768818e-06, "loss": 0.0278, "step": 161950 }, { "epoch": 0.6757642012500938, "grad_norm": 0.988869555913856, "learning_rate": 2.4331460216844433e-06, "loss": 0.0262, "step": 161955 }, { "epoch": 0.6757850639650842, "grad_norm": 0.7413242382492801, "learning_rate": 2.433108463339414e-06, "loss": 0.0237, "step": 161960 }, { "epoch": 0.6758059266800744, "grad_norm": 0.5925829374772755, "learning_rate": 2.433070906733598e-06, "loss": 0.0197, "step": 161965 }, { "epoch": 0.6758267893950647, "grad_norm": 0.840082251179462, "learning_rate": 2.4330333518668592e-06, "loss": 0.0257, "step": 161970 }, { "epoch": 0.675847652110055, "grad_norm": 0.546118275789037, "learning_rate": 2.4329957987390642e-06, "loss": 0.0268, "step": 161975 }, { "epoch": 0.6758685148250453, "grad_norm": 0.5959732311255, "learning_rate": 2.4329582473500792e-06, "loss": 0.0221, "step": 161980 }, { "epoch": 0.6758893775400355, "grad_norm": 0.29588409893758744, "learning_rate": 2.4329206976997695e-06, "loss": 0.0187, "step": 161985 }, { "epoch": 0.6759102402550258, "grad_norm": 0.8549145012774629, "learning_rate": 2.4328831497880005e-06, "loss": 0.023, "step": 161990 }, { "epoch": 0.6759311029700161, "grad_norm": 0.5556256725463111, "learning_rate": 2.4328456036146386e-06, "loss": 0.0281, "step": 161995 }, { "epoch": 0.6759519656850064, "grad_norm": 0.9664888744536502, "learning_rate": 2.43280805917955e-06, "loss": 0.0217, "step": 162000 }, { "epoch": 0.6759728283999966, "grad_norm": 0.7387482166442203, "learning_rate": 2.4327705164826e-06, "loss": 0.0152, "step": 162005 }, { "epoch": 0.675993691114987, "grad_norm": 0.48284738109026026, "learning_rate": 2.4327329755236547e-06, "loss": 0.0217, "step": 162010 }, { "epoch": 0.6760145538299772, "grad_norm": 1.0271776727505344, "learning_rate": 2.43269543630258e-06, "loss": 0.0323, "step": 162015 }, { "epoch": 0.6760354165449675, "grad_norm": 0.7257712233932692, "learning_rate": 2.4326578988192418e-06, "loss": 0.0223, "step": 162020 }, { "epoch": 0.6760562792599578, "grad_norm": 0.8713099077758681, "learning_rate": 2.4326203630735064e-06, "loss": 0.026, "step": 162025 }, { "epoch": 0.6760771419749481, "grad_norm": 0.8786238177844241, "learning_rate": 2.4325828290652386e-06, "loss": 0.0221, "step": 162030 }, { "epoch": 0.6760980046899383, "grad_norm": 0.38555695556394953, "learning_rate": 2.4325452967943056e-06, "loss": 0.0215, "step": 162035 }, { "epoch": 0.6761188674049287, "grad_norm": 0.6079700838751375, "learning_rate": 2.432507766260573e-06, "loss": 0.0151, "step": 162040 }, { "epoch": 0.6761397301199189, "grad_norm": 0.5166612566811188, "learning_rate": 2.4324702374639063e-06, "loss": 0.0225, "step": 162045 }, { "epoch": 0.6761605928349091, "grad_norm": 0.6242064966290795, "learning_rate": 2.4324327104041724e-06, "loss": 0.0164, "step": 162050 }, { "epoch": 0.6761814555498994, "grad_norm": 0.9523086806702679, "learning_rate": 2.432395185081237e-06, "loss": 0.0176, "step": 162055 }, { "epoch": 0.6762023182648897, "grad_norm": 0.6807782989335234, "learning_rate": 2.4323576614949652e-06, "loss": 0.0217, "step": 162060 }, { "epoch": 0.67622318097988, "grad_norm": 0.6687566456048895, "learning_rate": 2.4323201396452247e-06, "loss": 0.0288, "step": 162065 }, { "epoch": 0.6762440436948702, "grad_norm": 0.8000374487656853, "learning_rate": 2.43228261953188e-06, "loss": 0.0182, "step": 162070 }, { "epoch": 0.6762649064098606, "grad_norm": 1.1651297783427228, "learning_rate": 2.4322451011547984e-06, "loss": 0.0208, "step": 162075 }, { "epoch": 0.6762857691248508, "grad_norm": 1.0893981526477203, "learning_rate": 2.432207584513845e-06, "loss": 0.0334, "step": 162080 }, { "epoch": 0.6763066318398411, "grad_norm": 0.6865329325224833, "learning_rate": 2.4321700696088866e-06, "loss": 0.0169, "step": 162085 }, { "epoch": 0.6763274945548314, "grad_norm": 0.5598840221595168, "learning_rate": 2.432132556439789e-06, "loss": 0.0194, "step": 162090 }, { "epoch": 0.6763483572698217, "grad_norm": 1.1297402671151464, "learning_rate": 2.4320950450064185e-06, "loss": 0.0276, "step": 162095 }, { "epoch": 0.6763692199848119, "grad_norm": 0.5290533612739208, "learning_rate": 2.4320575353086416e-06, "loss": 0.0253, "step": 162100 }, { "epoch": 0.6763900826998023, "grad_norm": 0.6657263250055053, "learning_rate": 2.4320200273463235e-06, "loss": 0.0245, "step": 162105 }, { "epoch": 0.6764109454147925, "grad_norm": 0.5497227278386543, "learning_rate": 2.431982521119331e-06, "loss": 0.0186, "step": 162110 }, { "epoch": 0.6764318081297828, "grad_norm": 0.6483803987803396, "learning_rate": 2.4319450166275305e-06, "loss": 0.0186, "step": 162115 }, { "epoch": 0.676452670844773, "grad_norm": 0.7846000211993094, "learning_rate": 2.4319075138707876e-06, "loss": 0.0281, "step": 162120 }, { "epoch": 0.6764735335597634, "grad_norm": 0.6548692137445049, "learning_rate": 2.4318700128489693e-06, "loss": 0.0211, "step": 162125 }, { "epoch": 0.6764943962747536, "grad_norm": 0.6884467157396014, "learning_rate": 2.431832513561941e-06, "loss": 0.0193, "step": 162130 }, { "epoch": 0.6765152589897439, "grad_norm": 0.4759646176168161, "learning_rate": 2.4317950160095692e-06, "loss": 0.0194, "step": 162135 }, { "epoch": 0.6765361217047342, "grad_norm": 0.5645600227047142, "learning_rate": 2.4317575201917206e-06, "loss": 0.0198, "step": 162140 }, { "epoch": 0.6765569844197244, "grad_norm": 0.3045132700888398, "learning_rate": 2.431720026108261e-06, "loss": 0.0182, "step": 162145 }, { "epoch": 0.6765778471347147, "grad_norm": 0.7329669309743263, "learning_rate": 2.431682533759057e-06, "loss": 0.0227, "step": 162150 }, { "epoch": 0.676598709849705, "grad_norm": 1.0317658622398478, "learning_rate": 2.4316450431439754e-06, "loss": 0.0252, "step": 162155 }, { "epoch": 0.6766195725646953, "grad_norm": 0.9033184430660266, "learning_rate": 2.431607554262881e-06, "loss": 0.0217, "step": 162160 }, { "epoch": 0.6766404352796855, "grad_norm": 0.6460725085013361, "learning_rate": 2.4315700671156417e-06, "loss": 0.0235, "step": 162165 }, { "epoch": 0.6766612979946758, "grad_norm": 0.7267284028851886, "learning_rate": 2.431532581702123e-06, "loss": 0.0212, "step": 162170 }, { "epoch": 0.6766821607096661, "grad_norm": 0.44005150561379025, "learning_rate": 2.431495098022191e-06, "loss": 0.0175, "step": 162175 }, { "epoch": 0.6767030234246564, "grad_norm": 0.7362640603608529, "learning_rate": 2.4314576160757137e-06, "loss": 0.0287, "step": 162180 }, { "epoch": 0.6767238861396466, "grad_norm": 0.6834640398641204, "learning_rate": 2.4314201358625557e-06, "loss": 0.0217, "step": 162185 }, { "epoch": 0.676744748854637, "grad_norm": 0.9913654611290903, "learning_rate": 2.4313826573825842e-06, "loss": 0.0227, "step": 162190 }, { "epoch": 0.6767656115696272, "grad_norm": 0.5762379337604678, "learning_rate": 2.431345180635665e-06, "loss": 0.0216, "step": 162195 }, { "epoch": 0.6767864742846175, "grad_norm": 0.6793982732285515, "learning_rate": 2.4313077056216652e-06, "loss": 0.0185, "step": 162200 }, { "epoch": 0.6768073369996078, "grad_norm": 0.8276900624138204, "learning_rate": 2.4312702323404517e-06, "loss": 0.0232, "step": 162205 }, { "epoch": 0.6768281997145981, "grad_norm": 0.46492233384231024, "learning_rate": 2.43123276079189e-06, "loss": 0.036, "step": 162210 }, { "epoch": 0.6768490624295883, "grad_norm": 0.5228308257607845, "learning_rate": 2.431195290975847e-06, "loss": 0.0273, "step": 162215 }, { "epoch": 0.6768699251445787, "grad_norm": 0.4289562190091513, "learning_rate": 2.431157822892189e-06, "loss": 0.0152, "step": 162220 }, { "epoch": 0.6768907878595689, "grad_norm": 0.7127492325714594, "learning_rate": 2.4311203565407827e-06, "loss": 0.0237, "step": 162225 }, { "epoch": 0.6769116505745592, "grad_norm": 1.0275338181060019, "learning_rate": 2.4310828919214945e-06, "loss": 0.0183, "step": 162230 }, { "epoch": 0.6769325132895494, "grad_norm": 0.46202330475468695, "learning_rate": 2.431045429034191e-06, "loss": 0.0138, "step": 162235 }, { "epoch": 0.6769533760045398, "grad_norm": 0.6030063797674755, "learning_rate": 2.431007967878739e-06, "loss": 0.0228, "step": 162240 }, { "epoch": 0.67697423871953, "grad_norm": 0.4290519847321893, "learning_rate": 2.4309705084550048e-06, "loss": 0.0149, "step": 162245 }, { "epoch": 0.6769951014345202, "grad_norm": 0.3945032603391022, "learning_rate": 2.430933050762855e-06, "loss": 0.0267, "step": 162250 }, { "epoch": 0.6770159641495106, "grad_norm": 0.4601835746409557, "learning_rate": 2.430895594802156e-06, "loss": 0.0163, "step": 162255 }, { "epoch": 0.6770368268645008, "grad_norm": 1.0653713613011095, "learning_rate": 2.4308581405727748e-06, "loss": 0.0217, "step": 162260 }, { "epoch": 0.6770576895794911, "grad_norm": 0.8938684445443407, "learning_rate": 2.430820688074578e-06, "loss": 0.0292, "step": 162265 }, { "epoch": 0.6770785522944814, "grad_norm": 0.9819609172374245, "learning_rate": 2.430783237307432e-06, "loss": 0.0223, "step": 162270 }, { "epoch": 0.6770994150094717, "grad_norm": 0.7753715871084437, "learning_rate": 2.4307457882712042e-06, "loss": 0.0188, "step": 162275 }, { "epoch": 0.6771202777244619, "grad_norm": 0.7907459490744423, "learning_rate": 2.43070834096576e-06, "loss": 0.0249, "step": 162280 }, { "epoch": 0.6771411404394523, "grad_norm": 0.8546036777385008, "learning_rate": 2.4306708953909665e-06, "loss": 0.0224, "step": 162285 }, { "epoch": 0.6771620031544425, "grad_norm": 0.8040178262367417, "learning_rate": 2.4306334515466907e-06, "loss": 0.0295, "step": 162290 }, { "epoch": 0.6771828658694328, "grad_norm": 0.35996875554617824, "learning_rate": 2.4305960094327994e-06, "loss": 0.0168, "step": 162295 }, { "epoch": 0.677203728584423, "grad_norm": 0.4487238447134227, "learning_rate": 2.4305585690491597e-06, "loss": 0.0357, "step": 162300 }, { "epoch": 0.6772245912994134, "grad_norm": 1.2455014271477356, "learning_rate": 2.4305211303956373e-06, "loss": 0.0213, "step": 162305 }, { "epoch": 0.6772454540144036, "grad_norm": 0.4654551691266886, "learning_rate": 2.430483693472099e-06, "loss": 0.0212, "step": 162310 }, { "epoch": 0.6772663167293939, "grad_norm": 0.7208862412569038, "learning_rate": 2.430446258278413e-06, "loss": 0.0204, "step": 162315 }, { "epoch": 0.6772871794443842, "grad_norm": 0.6787003040838925, "learning_rate": 2.430408824814444e-06, "loss": 0.0234, "step": 162320 }, { "epoch": 0.6773080421593745, "grad_norm": 0.47613284396074873, "learning_rate": 2.4303713930800606e-06, "loss": 0.021, "step": 162325 }, { "epoch": 0.6773289048743647, "grad_norm": 1.481962636474997, "learning_rate": 2.4303339630751288e-06, "loss": 0.0335, "step": 162330 }, { "epoch": 0.6773497675893551, "grad_norm": 0.45920003898826006, "learning_rate": 2.430296534799516e-06, "loss": 0.0235, "step": 162335 }, { "epoch": 0.6773706303043453, "grad_norm": 0.6338052812298863, "learning_rate": 2.430259108253088e-06, "loss": 0.0208, "step": 162340 }, { "epoch": 0.6773914930193355, "grad_norm": 0.7355324536666155, "learning_rate": 2.430221683435713e-06, "loss": 0.023, "step": 162345 }, { "epoch": 0.6774123557343258, "grad_norm": 0.6496171462424012, "learning_rate": 2.430184260347256e-06, "loss": 0.0289, "step": 162350 }, { "epoch": 0.6774332184493161, "grad_norm": 0.5027485884819897, "learning_rate": 2.430146838987586e-06, "loss": 0.0148, "step": 162355 }, { "epoch": 0.6774540811643064, "grad_norm": 0.2061001647805112, "learning_rate": 2.4301094193565685e-06, "loss": 0.0231, "step": 162360 }, { "epoch": 0.6774749438792966, "grad_norm": 0.6850219684768643, "learning_rate": 2.4300720014540704e-06, "loss": 0.0197, "step": 162365 }, { "epoch": 0.677495806594287, "grad_norm": 0.7291569949138531, "learning_rate": 2.4300345852799597e-06, "loss": 0.0226, "step": 162370 }, { "epoch": 0.6775166693092772, "grad_norm": 0.589936917671219, "learning_rate": 2.4299971708341027e-06, "loss": 0.0298, "step": 162375 }, { "epoch": 0.6775375320242675, "grad_norm": 0.7425249829377754, "learning_rate": 2.429959758116366e-06, "loss": 0.0223, "step": 162380 }, { "epoch": 0.6775583947392578, "grad_norm": 0.8993717734786653, "learning_rate": 2.429922347126617e-06, "loss": 0.0332, "step": 162385 }, { "epoch": 0.6775792574542481, "grad_norm": 0.914342982309551, "learning_rate": 2.429884937864723e-06, "loss": 0.0218, "step": 162390 }, { "epoch": 0.6776001201692383, "grad_norm": 0.7433353801176519, "learning_rate": 2.4298475303305502e-06, "loss": 0.0302, "step": 162395 }, { "epoch": 0.6776209828842287, "grad_norm": 1.314342645181176, "learning_rate": 2.429810124523966e-06, "loss": 0.0229, "step": 162400 }, { "epoch": 0.6776418455992189, "grad_norm": 0.7462719981223761, "learning_rate": 2.4297727204448373e-06, "loss": 0.0231, "step": 162405 }, { "epoch": 0.6776627083142092, "grad_norm": 1.0829092600033525, "learning_rate": 2.429735318093032e-06, "loss": 0.0306, "step": 162410 }, { "epoch": 0.6776835710291994, "grad_norm": 0.7781784284890582, "learning_rate": 2.429697917468416e-06, "loss": 0.0198, "step": 162415 }, { "epoch": 0.6777044337441898, "grad_norm": 0.8587220270992962, "learning_rate": 2.4296605185708565e-06, "loss": 0.0199, "step": 162420 }, { "epoch": 0.67772529645918, "grad_norm": 0.8298729402118112, "learning_rate": 2.4296231214002217e-06, "loss": 0.0235, "step": 162425 }, { "epoch": 0.6777461591741702, "grad_norm": 0.9424198309605334, "learning_rate": 2.4295857259563775e-06, "loss": 0.0295, "step": 162430 }, { "epoch": 0.6777670218891606, "grad_norm": 0.7861177947237019, "learning_rate": 2.4295483322391913e-06, "loss": 0.0187, "step": 162435 }, { "epoch": 0.6777878846041508, "grad_norm": 0.9086751022949757, "learning_rate": 2.4295109402485306e-06, "loss": 0.0236, "step": 162440 }, { "epoch": 0.6778087473191411, "grad_norm": 0.6556550646301843, "learning_rate": 2.4294735499842627e-06, "loss": 0.029, "step": 162445 }, { "epoch": 0.6778296100341314, "grad_norm": 0.38202199862403824, "learning_rate": 2.429436161446253e-06, "loss": 0.0239, "step": 162450 }, { "epoch": 0.6778504727491217, "grad_norm": 0.9567141438079602, "learning_rate": 2.4293987746343713e-06, "loss": 0.0209, "step": 162455 }, { "epoch": 0.6778713354641119, "grad_norm": 0.42693461515406445, "learning_rate": 2.4293613895484835e-06, "loss": 0.0207, "step": 162460 }, { "epoch": 0.6778921981791023, "grad_norm": 0.7480172503466599, "learning_rate": 2.429324006188456e-06, "loss": 0.0227, "step": 162465 }, { "epoch": 0.6779130608940925, "grad_norm": 0.7161628592070056, "learning_rate": 2.4292866245541573e-06, "loss": 0.0201, "step": 162470 }, { "epoch": 0.6779339236090828, "grad_norm": 0.5799991185615526, "learning_rate": 2.429249244645455e-06, "loss": 0.0177, "step": 162475 }, { "epoch": 0.677954786324073, "grad_norm": 1.0341098861586326, "learning_rate": 2.429211866462214e-06, "loss": 0.0158, "step": 162480 }, { "epoch": 0.6779756490390634, "grad_norm": 0.32916845552474977, "learning_rate": 2.429174490004304e-06, "loss": 0.0217, "step": 162485 }, { "epoch": 0.6779965117540536, "grad_norm": 0.7630565529186061, "learning_rate": 2.4291371152715908e-06, "loss": 0.0199, "step": 162490 }, { "epoch": 0.6780173744690439, "grad_norm": 1.2078335557250026, "learning_rate": 2.429099742263943e-06, "loss": 0.0236, "step": 162495 }, { "epoch": 0.6780382371840342, "grad_norm": 0.9121249367894891, "learning_rate": 2.4290623709812265e-06, "loss": 0.021, "step": 162500 }, { "epoch": 0.6780590998990245, "grad_norm": 0.3622008719167267, "learning_rate": 2.4290250014233093e-06, "loss": 0.0179, "step": 162505 }, { "epoch": 0.6780799626140147, "grad_norm": 0.9453321702433106, "learning_rate": 2.4289876335900587e-06, "loss": 0.0256, "step": 162510 }, { "epoch": 0.6781008253290051, "grad_norm": 0.8560552708238743, "learning_rate": 2.4289502674813426e-06, "loss": 0.0199, "step": 162515 }, { "epoch": 0.6781216880439953, "grad_norm": 0.7662504568771199, "learning_rate": 2.4289129030970264e-06, "loss": 0.0256, "step": 162520 }, { "epoch": 0.6781425507589856, "grad_norm": 0.5438795745409394, "learning_rate": 2.4288755404369804e-06, "loss": 0.0207, "step": 162525 }, { "epoch": 0.6781634134739758, "grad_norm": 0.6097259186880459, "learning_rate": 2.42883817950107e-06, "loss": 0.018, "step": 162530 }, { "epoch": 0.6781842761889662, "grad_norm": 0.5809416183184242, "learning_rate": 2.428800820289162e-06, "loss": 0.0259, "step": 162535 }, { "epoch": 0.6782051389039564, "grad_norm": 0.3377497113400207, "learning_rate": 2.428763462801126e-06, "loss": 0.0234, "step": 162540 }, { "epoch": 0.6782260016189466, "grad_norm": 0.8049965515269798, "learning_rate": 2.428726107036828e-06, "loss": 0.0273, "step": 162545 }, { "epoch": 0.678246864333937, "grad_norm": 0.6254109294976395, "learning_rate": 2.4286887529961357e-06, "loss": 0.0319, "step": 162550 }, { "epoch": 0.6782677270489272, "grad_norm": 0.4211995106841448, "learning_rate": 2.4286514006789164e-06, "loss": 0.019, "step": 162555 }, { "epoch": 0.6782885897639175, "grad_norm": 0.3106471793320711, "learning_rate": 2.4286140500850377e-06, "loss": 0.0299, "step": 162560 }, { "epoch": 0.6783094524789078, "grad_norm": 0.6117957474711441, "learning_rate": 2.4285767012143675e-06, "loss": 0.024, "step": 162565 }, { "epoch": 0.6783303151938981, "grad_norm": 0.6756904404152458, "learning_rate": 2.4285393540667724e-06, "loss": 0.0237, "step": 162570 }, { "epoch": 0.6783511779088883, "grad_norm": 0.7641890283568957, "learning_rate": 2.428502008642121e-06, "loss": 0.0255, "step": 162575 }, { "epoch": 0.6783720406238787, "grad_norm": 1.4994494635254654, "learning_rate": 2.4284646649402805e-06, "loss": 0.0241, "step": 162580 }, { "epoch": 0.6783929033388689, "grad_norm": 0.6950419189915383, "learning_rate": 2.4284273229611174e-06, "loss": 0.0206, "step": 162585 }, { "epoch": 0.6784137660538592, "grad_norm": 0.6391362953692569, "learning_rate": 2.428389982704501e-06, "loss": 0.0277, "step": 162590 }, { "epoch": 0.6784346287688494, "grad_norm": 0.5181367523514835, "learning_rate": 2.428352644170297e-06, "loss": 0.0175, "step": 162595 }, { "epoch": 0.6784554914838398, "grad_norm": 0.8340214711296713, "learning_rate": 2.4283153073583744e-06, "loss": 0.0299, "step": 162600 }, { "epoch": 0.67847635419883, "grad_norm": 1.0561249758264208, "learning_rate": 2.4282779722686003e-06, "loss": 0.0242, "step": 162605 }, { "epoch": 0.6784972169138203, "grad_norm": 1.0957602250620078, "learning_rate": 2.428240638900843e-06, "loss": 0.0232, "step": 162610 }, { "epoch": 0.6785180796288106, "grad_norm": 0.3457052939771221, "learning_rate": 2.4282033072549685e-06, "loss": 0.017, "step": 162615 }, { "epoch": 0.6785389423438009, "grad_norm": 0.5489598198694742, "learning_rate": 2.4281659773308456e-06, "loss": 0.0215, "step": 162620 }, { "epoch": 0.6785598050587911, "grad_norm": 0.3951042808095311, "learning_rate": 2.428128649128342e-06, "loss": 0.0197, "step": 162625 }, { "epoch": 0.6785806677737815, "grad_norm": 0.41426740568747583, "learning_rate": 2.4280913226473256e-06, "loss": 0.0253, "step": 162630 }, { "epoch": 0.6786015304887717, "grad_norm": 0.499107876919646, "learning_rate": 2.428053997887663e-06, "loss": 0.0204, "step": 162635 }, { "epoch": 0.6786223932037619, "grad_norm": 0.5265806972161735, "learning_rate": 2.4280166748492225e-06, "loss": 0.0168, "step": 162640 }, { "epoch": 0.6786432559187523, "grad_norm": 0.622598598065981, "learning_rate": 2.4279793535318726e-06, "loss": 0.021, "step": 162645 }, { "epoch": 0.6786641186337425, "grad_norm": 0.746877155035256, "learning_rate": 2.42794203393548e-06, "loss": 0.0217, "step": 162650 }, { "epoch": 0.6786849813487328, "grad_norm": 0.3522542912595131, "learning_rate": 2.4279047160599126e-06, "loss": 0.0142, "step": 162655 }, { "epoch": 0.678705844063723, "grad_norm": 0.6797109239050133, "learning_rate": 2.427867399905038e-06, "loss": 0.0203, "step": 162660 }, { "epoch": 0.6787267067787134, "grad_norm": 0.7686387587838664, "learning_rate": 2.427830085470725e-06, "loss": 0.0298, "step": 162665 }, { "epoch": 0.6787475694937036, "grad_norm": 0.6260127958957646, "learning_rate": 2.42779277275684e-06, "loss": 0.0226, "step": 162670 }, { "epoch": 0.6787684322086939, "grad_norm": 0.5028742405016875, "learning_rate": 2.4277554617632516e-06, "loss": 0.0193, "step": 162675 }, { "epoch": 0.6787892949236842, "grad_norm": 0.5190528570205621, "learning_rate": 2.4277181524898276e-06, "loss": 0.021, "step": 162680 }, { "epoch": 0.6788101576386745, "grad_norm": 0.477087705227445, "learning_rate": 2.427680844936435e-06, "loss": 0.0182, "step": 162685 }, { "epoch": 0.6788310203536647, "grad_norm": 0.7691660410789747, "learning_rate": 2.4276435391029436e-06, "loss": 0.0249, "step": 162690 }, { "epoch": 0.6788518830686551, "grad_norm": 0.6447486761416726, "learning_rate": 2.427606234989219e-06, "loss": 0.0242, "step": 162695 }, { "epoch": 0.6788727457836453, "grad_norm": 0.6843452783706571, "learning_rate": 2.4275689325951307e-06, "loss": 0.025, "step": 162700 }, { "epoch": 0.6788936084986356, "grad_norm": 0.9367131928903855, "learning_rate": 2.4275316319205454e-06, "loss": 0.0272, "step": 162705 }, { "epoch": 0.6789144712136258, "grad_norm": 0.9305120039379348, "learning_rate": 2.4274943329653315e-06, "loss": 0.0277, "step": 162710 }, { "epoch": 0.6789353339286162, "grad_norm": 0.9420558690135233, "learning_rate": 2.4274570357293578e-06, "loss": 0.0241, "step": 162715 }, { "epoch": 0.6789561966436064, "grad_norm": 0.3607013615948616, "learning_rate": 2.4274197402124908e-06, "loss": 0.0184, "step": 162720 }, { "epoch": 0.6789770593585966, "grad_norm": 0.5659960612310644, "learning_rate": 2.427382446414599e-06, "loss": 0.0255, "step": 162725 }, { "epoch": 0.678997922073587, "grad_norm": 1.5798142715400274, "learning_rate": 2.42734515433555e-06, "loss": 0.0213, "step": 162730 }, { "epoch": 0.6790187847885772, "grad_norm": 1.1580016466131817, "learning_rate": 2.4273078639752126e-06, "loss": 0.0153, "step": 162735 }, { "epoch": 0.6790396475035675, "grad_norm": 2.3871470582691843, "learning_rate": 2.4272705753334546e-06, "loss": 0.0212, "step": 162740 }, { "epoch": 0.6790605102185578, "grad_norm": 0.8109131499914706, "learning_rate": 2.4272332884101434e-06, "loss": 0.016, "step": 162745 }, { "epoch": 0.6790813729335481, "grad_norm": 0.4597296916867019, "learning_rate": 2.4271960032051474e-06, "loss": 0.022, "step": 162750 }, { "epoch": 0.6791022356485383, "grad_norm": 0.5549470816765664, "learning_rate": 2.427158719718335e-06, "loss": 0.0229, "step": 162755 }, { "epoch": 0.6791230983635287, "grad_norm": 0.6526230960543952, "learning_rate": 2.4271214379495732e-06, "loss": 0.0203, "step": 162760 }, { "epoch": 0.6791439610785189, "grad_norm": 0.6313279780631499, "learning_rate": 2.4270841578987304e-06, "loss": 0.0199, "step": 162765 }, { "epoch": 0.6791648237935092, "grad_norm": 1.1306019590502758, "learning_rate": 2.427046879565676e-06, "loss": 0.0358, "step": 162770 }, { "epoch": 0.6791856865084994, "grad_norm": 0.8179580086128178, "learning_rate": 2.4270096029502762e-06, "loss": 0.0241, "step": 162775 }, { "epoch": 0.6792065492234898, "grad_norm": 0.3238682584567645, "learning_rate": 2.4269723280524002e-06, "loss": 0.0338, "step": 162780 }, { "epoch": 0.67922741193848, "grad_norm": 1.414791104391686, "learning_rate": 2.4269350548719157e-06, "loss": 0.0245, "step": 162785 }, { "epoch": 0.6792482746534703, "grad_norm": 0.6410408285884539, "learning_rate": 2.426897783408692e-06, "loss": 0.0254, "step": 162790 }, { "epoch": 0.6792691373684606, "grad_norm": 0.9930096883135433, "learning_rate": 2.4268605136625947e-06, "loss": 0.0253, "step": 162795 }, { "epoch": 0.6792900000834509, "grad_norm": 0.8780570801611585, "learning_rate": 2.4268232456334944e-06, "loss": 0.0216, "step": 162800 }, { "epoch": 0.6793108627984411, "grad_norm": 0.9022181052252101, "learning_rate": 2.4267859793212574e-06, "loss": 0.0252, "step": 162805 }, { "epoch": 0.6793317255134315, "grad_norm": 0.3682432657323903, "learning_rate": 2.426748714725754e-06, "loss": 0.026, "step": 162810 }, { "epoch": 0.6793525882284217, "grad_norm": 0.841784398887634, "learning_rate": 2.4267114518468505e-06, "loss": 0.0215, "step": 162815 }, { "epoch": 0.679373450943412, "grad_norm": 0.7934035635576717, "learning_rate": 2.426674190684416e-06, "loss": 0.0197, "step": 162820 }, { "epoch": 0.6793943136584023, "grad_norm": 0.760488196466251, "learning_rate": 2.4266369312383182e-06, "loss": 0.025, "step": 162825 }, { "epoch": 0.6794151763733925, "grad_norm": 0.7641720753742657, "learning_rate": 2.426599673508426e-06, "loss": 0.0273, "step": 162830 }, { "epoch": 0.6794360390883828, "grad_norm": 0.5952214546197879, "learning_rate": 2.4265624174946077e-06, "loss": 0.0314, "step": 162835 }, { "epoch": 0.679456901803373, "grad_norm": 0.48218060619130665, "learning_rate": 2.426525163196731e-06, "loss": 0.0141, "step": 162840 }, { "epoch": 0.6794777645183634, "grad_norm": 0.8770384200336294, "learning_rate": 2.4264879106146642e-06, "loss": 0.0179, "step": 162845 }, { "epoch": 0.6794986272333536, "grad_norm": 0.9694007203994616, "learning_rate": 2.4264506597482755e-06, "loss": 0.0208, "step": 162850 }, { "epoch": 0.6795194899483439, "grad_norm": 0.5992527936258096, "learning_rate": 2.4264134105974343e-06, "loss": 0.0301, "step": 162855 }, { "epoch": 0.6795403526633342, "grad_norm": 0.7518853931626653, "learning_rate": 2.426376163162007e-06, "loss": 0.0253, "step": 162860 }, { "epoch": 0.6795612153783245, "grad_norm": 0.5726051032597791, "learning_rate": 2.426338917441864e-06, "loss": 0.0162, "step": 162865 }, { "epoch": 0.6795820780933147, "grad_norm": 0.4993943199802571, "learning_rate": 2.4263016734368725e-06, "loss": 0.0155, "step": 162870 }, { "epoch": 0.6796029408083051, "grad_norm": 0.4024697616791388, "learning_rate": 2.426264431146901e-06, "loss": 0.0183, "step": 162875 }, { "epoch": 0.6796238035232953, "grad_norm": 0.3452530299696871, "learning_rate": 2.426227190571818e-06, "loss": 0.0198, "step": 162880 }, { "epoch": 0.6796446662382856, "grad_norm": 0.5035320609283706, "learning_rate": 2.426189951711492e-06, "loss": 0.0257, "step": 162885 }, { "epoch": 0.6796655289532758, "grad_norm": 0.7279846908163873, "learning_rate": 2.4261527145657917e-06, "loss": 0.0184, "step": 162890 }, { "epoch": 0.6796863916682662, "grad_norm": 0.9505143160790902, "learning_rate": 2.4261154791345837e-06, "loss": 0.0322, "step": 162895 }, { "epoch": 0.6797072543832564, "grad_norm": 0.5515560755231889, "learning_rate": 2.426078245417739e-06, "loss": 0.025, "step": 162900 }, { "epoch": 0.6797281170982467, "grad_norm": 0.8002260007825476, "learning_rate": 2.4260410134151247e-06, "loss": 0.0226, "step": 162905 }, { "epoch": 0.679748979813237, "grad_norm": 0.7678990521140034, "learning_rate": 2.426003783126609e-06, "loss": 0.0228, "step": 162910 }, { "epoch": 0.6797698425282273, "grad_norm": 0.7059906253725619, "learning_rate": 2.4259665545520615e-06, "loss": 0.0281, "step": 162915 }, { "epoch": 0.6797907052432175, "grad_norm": 0.30132145277830785, "learning_rate": 2.4259293276913497e-06, "loss": 0.021, "step": 162920 }, { "epoch": 0.6798115679582079, "grad_norm": 0.8215568021304559, "learning_rate": 2.4258921025443418e-06, "loss": 0.0201, "step": 162925 }, { "epoch": 0.6798324306731981, "grad_norm": 1.0265426730711156, "learning_rate": 2.425854879110908e-06, "loss": 0.0217, "step": 162930 }, { "epoch": 0.6798532933881883, "grad_norm": 0.6551825694353923, "learning_rate": 2.4258176573909147e-06, "loss": 0.0279, "step": 162935 }, { "epoch": 0.6798741561031787, "grad_norm": 0.6168116659556375, "learning_rate": 2.425780437384233e-06, "loss": 0.0198, "step": 162940 }, { "epoch": 0.6798950188181689, "grad_norm": 0.8333148487738818, "learning_rate": 2.4257432190907283e-06, "loss": 0.0254, "step": 162945 }, { "epoch": 0.6799158815331592, "grad_norm": 0.6323274540292457, "learning_rate": 2.4257060025102724e-06, "loss": 0.023, "step": 162950 }, { "epoch": 0.6799367442481494, "grad_norm": 1.0276509529662776, "learning_rate": 2.425668787642732e-06, "loss": 0.0341, "step": 162955 }, { "epoch": 0.6799576069631398, "grad_norm": 0.6103895033719398, "learning_rate": 2.4256315744879754e-06, "loss": 0.0181, "step": 162960 }, { "epoch": 0.67997846967813, "grad_norm": 0.8260669346652589, "learning_rate": 2.4255943630458724e-06, "loss": 0.027, "step": 162965 }, { "epoch": 0.6799993323931203, "grad_norm": 0.5534422139564166, "learning_rate": 2.425557153316291e-06, "loss": 0.0339, "step": 162970 }, { "epoch": 0.6800201951081106, "grad_norm": 0.9827334650124141, "learning_rate": 2.4255199452991006e-06, "loss": 0.025, "step": 162975 }, { "epoch": 0.6800410578231009, "grad_norm": 0.2822869972009488, "learning_rate": 2.4254827389941683e-06, "loss": 0.0185, "step": 162980 }, { "epoch": 0.6800619205380911, "grad_norm": 0.7306221863953506, "learning_rate": 2.4254455344013643e-06, "loss": 0.0383, "step": 162985 }, { "epoch": 0.6800827832530815, "grad_norm": 0.6444787012611463, "learning_rate": 2.4254083315205563e-06, "loss": 0.0188, "step": 162990 }, { "epoch": 0.6801036459680717, "grad_norm": 0.8026896938483715, "learning_rate": 2.425371130351614e-06, "loss": 0.0209, "step": 162995 }, { "epoch": 0.680124508683062, "grad_norm": 0.7349908937594496, "learning_rate": 2.4253339308944053e-06, "loss": 0.0264, "step": 163000 }, { "epoch": 0.6801453713980523, "grad_norm": 0.4469595076086297, "learning_rate": 2.4252967331487993e-06, "loss": 0.0212, "step": 163005 }, { "epoch": 0.6801662341130426, "grad_norm": 0.6198681608534303, "learning_rate": 2.425259537114665e-06, "loss": 0.0209, "step": 163010 }, { "epoch": 0.6801870968280328, "grad_norm": 0.7641846715334298, "learning_rate": 2.4252223427918695e-06, "loss": 0.0305, "step": 163015 }, { "epoch": 0.680207959543023, "grad_norm": 0.7031142137596313, "learning_rate": 2.425185150180284e-06, "loss": 0.0224, "step": 163020 }, { "epoch": 0.6802288222580134, "grad_norm": 0.4882711879831915, "learning_rate": 2.4251479592797766e-06, "loss": 0.0254, "step": 163025 }, { "epoch": 0.6802496849730036, "grad_norm": 0.7891349212283782, "learning_rate": 2.4251107700902147e-06, "loss": 0.0279, "step": 163030 }, { "epoch": 0.6802705476879939, "grad_norm": 1.1962820558554172, "learning_rate": 2.425073582611468e-06, "loss": 0.0188, "step": 163035 }, { "epoch": 0.6802914104029842, "grad_norm": 0.6703267250829009, "learning_rate": 2.425036396843406e-06, "loss": 0.0268, "step": 163040 }, { "epoch": 0.6803122731179745, "grad_norm": 0.18220925835475602, "learning_rate": 2.4249992127858964e-06, "loss": 0.0177, "step": 163045 }, { "epoch": 0.6803331358329647, "grad_norm": 1.2337638689621406, "learning_rate": 2.4249620304388094e-06, "loss": 0.0247, "step": 163050 }, { "epoch": 0.6803539985479551, "grad_norm": 0.7482532754077978, "learning_rate": 2.424924849802013e-06, "loss": 0.0244, "step": 163055 }, { "epoch": 0.6803748612629453, "grad_norm": 0.44538083974223147, "learning_rate": 2.4248876708753753e-06, "loss": 0.0293, "step": 163060 }, { "epoch": 0.6803957239779356, "grad_norm": 0.958912041897605, "learning_rate": 2.424850493658767e-06, "loss": 0.0233, "step": 163065 }, { "epoch": 0.6804165866929258, "grad_norm": 2.046906445612113, "learning_rate": 2.4248133181520554e-06, "loss": 0.0251, "step": 163070 }, { "epoch": 0.6804374494079162, "grad_norm": 2.2869697322479934, "learning_rate": 2.424776144355111e-06, "loss": 0.0147, "step": 163075 }, { "epoch": 0.6804583121229064, "grad_norm": 0.99941577306687, "learning_rate": 2.4247389722678015e-06, "loss": 0.0267, "step": 163080 }, { "epoch": 0.6804791748378967, "grad_norm": 0.5547470094650965, "learning_rate": 2.4247018018899956e-06, "loss": 0.0323, "step": 163085 }, { "epoch": 0.680500037552887, "grad_norm": 0.5666698989895291, "learning_rate": 2.424664633221564e-06, "loss": 0.0229, "step": 163090 }, { "epoch": 0.6805209002678773, "grad_norm": 0.5371903482825064, "learning_rate": 2.424627466262374e-06, "loss": 0.0178, "step": 163095 }, { "epoch": 0.6805417629828675, "grad_norm": 0.9647889224313186, "learning_rate": 2.4245903010122954e-06, "loss": 0.0288, "step": 163100 }, { "epoch": 0.6805626256978579, "grad_norm": 0.7549943702861359, "learning_rate": 2.424553137471197e-06, "loss": 0.021, "step": 163105 }, { "epoch": 0.6805834884128481, "grad_norm": 0.9173175318490887, "learning_rate": 2.4245159756389477e-06, "loss": 0.0229, "step": 163110 }, { "epoch": 0.6806043511278383, "grad_norm": 0.7585481204304301, "learning_rate": 2.424478815515417e-06, "loss": 0.0176, "step": 163115 }, { "epoch": 0.6806252138428287, "grad_norm": 0.5871597952435355, "learning_rate": 2.4244416571004736e-06, "loss": 0.017, "step": 163120 }, { "epoch": 0.680646076557819, "grad_norm": 0.6617282294695422, "learning_rate": 2.4244045003939866e-06, "loss": 0.0204, "step": 163125 }, { "epoch": 0.6806669392728092, "grad_norm": 1.0121169672111516, "learning_rate": 2.4243673453958253e-06, "loss": 0.0208, "step": 163130 }, { "epoch": 0.6806878019877994, "grad_norm": 0.7519367563130397, "learning_rate": 2.424330192105858e-06, "loss": 0.0245, "step": 163135 }, { "epoch": 0.6807086647027898, "grad_norm": 1.0823803877180993, "learning_rate": 2.4242930405239547e-06, "loss": 0.0197, "step": 163140 }, { "epoch": 0.68072952741778, "grad_norm": 0.8259212167833252, "learning_rate": 2.424255890649985e-06, "loss": 0.0266, "step": 163145 }, { "epoch": 0.6807503901327703, "grad_norm": 0.7161758160893977, "learning_rate": 2.4242187424838168e-06, "loss": 0.028, "step": 163150 }, { "epoch": 0.6807712528477606, "grad_norm": 0.820980040378057, "learning_rate": 2.4241815960253193e-06, "loss": 0.0292, "step": 163155 }, { "epoch": 0.6807921155627509, "grad_norm": 0.6137713234800084, "learning_rate": 2.4241444512743627e-06, "loss": 0.0202, "step": 163160 }, { "epoch": 0.6808129782777411, "grad_norm": 0.6445931328011011, "learning_rate": 2.4241073082308153e-06, "loss": 0.0237, "step": 163165 }, { "epoch": 0.6808338409927315, "grad_norm": 0.7439305931295229, "learning_rate": 2.4240701668945466e-06, "loss": 0.0197, "step": 163170 }, { "epoch": 0.6808547037077217, "grad_norm": 0.4831145584681265, "learning_rate": 2.4240330272654263e-06, "loss": 0.0215, "step": 163175 }, { "epoch": 0.680875566422712, "grad_norm": 1.1565267422930965, "learning_rate": 2.4239958893433226e-06, "loss": 0.0179, "step": 163180 }, { "epoch": 0.6808964291377023, "grad_norm": 1.0648958270102926, "learning_rate": 2.4239587531281055e-06, "loss": 0.0237, "step": 163185 }, { "epoch": 0.6809172918526926, "grad_norm": 1.2957043981145038, "learning_rate": 2.4239216186196445e-06, "loss": 0.0294, "step": 163190 }, { "epoch": 0.6809381545676828, "grad_norm": 0.5004036477356483, "learning_rate": 2.423884485817808e-06, "loss": 0.0168, "step": 163195 }, { "epoch": 0.680959017282673, "grad_norm": 0.5250548270662012, "learning_rate": 2.4238473547224653e-06, "loss": 0.0209, "step": 163200 }, { "epoch": 0.6809798799976634, "grad_norm": 0.8056038621260333, "learning_rate": 2.4238102253334867e-06, "loss": 0.0376, "step": 163205 }, { "epoch": 0.6810007427126537, "grad_norm": 0.5470599786738914, "learning_rate": 2.4237730976507404e-06, "loss": 0.0184, "step": 163210 }, { "epoch": 0.6810216054276439, "grad_norm": 0.7063285945393785, "learning_rate": 2.4237359716740967e-06, "loss": 0.0162, "step": 163215 }, { "epoch": 0.6810424681426342, "grad_norm": 0.5842810526131152, "learning_rate": 2.4236988474034244e-06, "loss": 0.0158, "step": 163220 }, { "epoch": 0.6810633308576245, "grad_norm": 0.8348467169194844, "learning_rate": 2.4236617248385925e-06, "loss": 0.0305, "step": 163225 }, { "epoch": 0.6810841935726147, "grad_norm": 0.49024326549008995, "learning_rate": 2.4236246039794716e-06, "loss": 0.0234, "step": 163230 }, { "epoch": 0.6811050562876051, "grad_norm": 0.43479023051506643, "learning_rate": 2.4235874848259294e-06, "loss": 0.0211, "step": 163235 }, { "epoch": 0.6811259190025953, "grad_norm": 0.4657641653244807, "learning_rate": 2.4235503673778364e-06, "loss": 0.0161, "step": 163240 }, { "epoch": 0.6811467817175856, "grad_norm": 0.28409161152568035, "learning_rate": 2.423513251635062e-06, "loss": 0.0186, "step": 163245 }, { "epoch": 0.6811676444325758, "grad_norm": 2.7454006311952526, "learning_rate": 2.4234761375974753e-06, "loss": 0.0201, "step": 163250 }, { "epoch": 0.6811885071475662, "grad_norm": 0.25351966746689975, "learning_rate": 2.4234390252649464e-06, "loss": 0.021, "step": 163255 }, { "epoch": 0.6812093698625564, "grad_norm": 1.02851893258138, "learning_rate": 2.423401914637343e-06, "loss": 0.0235, "step": 163260 }, { "epoch": 0.6812302325775467, "grad_norm": 0.8773268080898887, "learning_rate": 2.4233648057145363e-06, "loss": 0.0224, "step": 163265 }, { "epoch": 0.681251095292537, "grad_norm": 0.5658382095677679, "learning_rate": 2.4233276984963957e-06, "loss": 0.0182, "step": 163270 }, { "epoch": 0.6812719580075273, "grad_norm": 0.5765344639348141, "learning_rate": 2.42329059298279e-06, "loss": 0.0208, "step": 163275 }, { "epoch": 0.6812928207225175, "grad_norm": 0.7385106892210106, "learning_rate": 2.4232534891735887e-06, "loss": 0.0147, "step": 163280 }, { "epoch": 0.6813136834375079, "grad_norm": 1.2724377055031428, "learning_rate": 2.4232163870686615e-06, "loss": 0.0177, "step": 163285 }, { "epoch": 0.6813345461524981, "grad_norm": 0.33256147080859927, "learning_rate": 2.423179286667878e-06, "loss": 0.0237, "step": 163290 }, { "epoch": 0.6813554088674884, "grad_norm": 0.9857530961780867, "learning_rate": 2.423142187971107e-06, "loss": 0.0267, "step": 163295 }, { "epoch": 0.6813762715824787, "grad_norm": 0.46839583840308363, "learning_rate": 2.4231050909782203e-06, "loss": 0.017, "step": 163300 }, { "epoch": 0.681397134297469, "grad_norm": 0.722649951653093, "learning_rate": 2.4230679956890852e-06, "loss": 0.0226, "step": 163305 }, { "epoch": 0.6814179970124592, "grad_norm": 0.5095513171075087, "learning_rate": 2.423030902103572e-06, "loss": 0.0162, "step": 163310 }, { "epoch": 0.6814388597274494, "grad_norm": 1.283628306550688, "learning_rate": 2.422993810221551e-06, "loss": 0.0229, "step": 163315 }, { "epoch": 0.6814597224424398, "grad_norm": 1.0623669672619058, "learning_rate": 2.4229567200428904e-06, "loss": 0.0281, "step": 163320 }, { "epoch": 0.68148058515743, "grad_norm": 0.4900645293475043, "learning_rate": 2.4229196315674607e-06, "loss": 0.0216, "step": 163325 }, { "epoch": 0.6815014478724203, "grad_norm": 0.8641069653583264, "learning_rate": 2.4228825447951313e-06, "loss": 0.0203, "step": 163330 }, { "epoch": 0.6815223105874106, "grad_norm": 0.8432681508675641, "learning_rate": 2.4228454597257723e-06, "loss": 0.0196, "step": 163335 }, { "epoch": 0.6815431733024009, "grad_norm": 0.8072859891978987, "learning_rate": 2.422808376359253e-06, "loss": 0.0165, "step": 163340 }, { "epoch": 0.6815640360173911, "grad_norm": 0.4365083045139652, "learning_rate": 2.422771294695444e-06, "loss": 0.0219, "step": 163345 }, { "epoch": 0.6815848987323815, "grad_norm": 0.4830517260268241, "learning_rate": 2.4227342147342127e-06, "loss": 0.0196, "step": 163350 }, { "epoch": 0.6816057614473717, "grad_norm": 0.3458757784889226, "learning_rate": 2.4226971364754314e-06, "loss": 0.0197, "step": 163355 }, { "epoch": 0.681626624162362, "grad_norm": 1.0757542047858812, "learning_rate": 2.4226600599189687e-06, "loss": 0.0239, "step": 163360 }, { "epoch": 0.6816474868773523, "grad_norm": 0.4968873170560489, "learning_rate": 2.4226229850646936e-06, "loss": 0.0189, "step": 163365 }, { "epoch": 0.6816683495923426, "grad_norm": 0.45773044269656366, "learning_rate": 2.4225859119124774e-06, "loss": 0.0285, "step": 163370 }, { "epoch": 0.6816892123073328, "grad_norm": 0.6824266493312949, "learning_rate": 2.4225488404621884e-06, "loss": 0.0199, "step": 163375 }, { "epoch": 0.6817100750223231, "grad_norm": 0.9454475971562092, "learning_rate": 2.422511770713698e-06, "loss": 0.0238, "step": 163380 }, { "epoch": 0.6817309377373134, "grad_norm": 0.6326856828490789, "learning_rate": 2.4224747026668746e-06, "loss": 0.028, "step": 163385 }, { "epoch": 0.6817518004523037, "grad_norm": 0.6976836712788992, "learning_rate": 2.4224376363215884e-06, "loss": 0.028, "step": 163390 }, { "epoch": 0.6817726631672939, "grad_norm": 0.8671065155197005, "learning_rate": 2.422400571677709e-06, "loss": 0.0213, "step": 163395 }, { "epoch": 0.6817935258822843, "grad_norm": 0.599439374518286, "learning_rate": 2.4223635087351074e-06, "loss": 0.0247, "step": 163400 }, { "epoch": 0.6818143885972745, "grad_norm": 0.7388514955574675, "learning_rate": 2.422326447493652e-06, "loss": 0.0214, "step": 163405 }, { "epoch": 0.6818352513122647, "grad_norm": 0.5758573937077991, "learning_rate": 2.4222893879532134e-06, "loss": 0.0155, "step": 163410 }, { "epoch": 0.6818561140272551, "grad_norm": 0.499795238150459, "learning_rate": 2.4222523301136614e-06, "loss": 0.0278, "step": 163415 }, { "epoch": 0.6818769767422453, "grad_norm": 0.5160276524988905, "learning_rate": 2.4222152739748664e-06, "loss": 0.0167, "step": 163420 }, { "epoch": 0.6818978394572356, "grad_norm": 1.7298300753003042, "learning_rate": 2.422178219536697e-06, "loss": 0.0194, "step": 163425 }, { "epoch": 0.6819187021722258, "grad_norm": 0.7276050062784317, "learning_rate": 2.4221411667990245e-06, "loss": 0.0359, "step": 163430 }, { "epoch": 0.6819395648872162, "grad_norm": 0.35350243291234984, "learning_rate": 2.4221041157617184e-06, "loss": 0.0156, "step": 163435 }, { "epoch": 0.6819604276022064, "grad_norm": 0.7692047380044249, "learning_rate": 2.4220670664246475e-06, "loss": 0.0271, "step": 163440 }, { "epoch": 0.6819812903171967, "grad_norm": 0.6739130223043808, "learning_rate": 2.4220300187876836e-06, "loss": 0.0165, "step": 163445 }, { "epoch": 0.682002153032187, "grad_norm": 1.0794607891534924, "learning_rate": 2.4219929728506957e-06, "loss": 0.0295, "step": 163450 }, { "epoch": 0.6820230157471773, "grad_norm": 1.4635281822619701, "learning_rate": 2.4219559286135543e-06, "loss": 0.0287, "step": 163455 }, { "epoch": 0.6820438784621675, "grad_norm": 0.6811026042564527, "learning_rate": 2.4219188860761285e-06, "loss": 0.0223, "step": 163460 }, { "epoch": 0.6820647411771579, "grad_norm": 0.5727059046921075, "learning_rate": 2.4218818452382887e-06, "loss": 0.0197, "step": 163465 }, { "epoch": 0.6820856038921481, "grad_norm": 0.6024434697210871, "learning_rate": 2.421844806099906e-06, "loss": 0.0176, "step": 163470 }, { "epoch": 0.6821064666071384, "grad_norm": 0.8496456348436185, "learning_rate": 2.4218077686608484e-06, "loss": 0.02, "step": 163475 }, { "epoch": 0.6821273293221287, "grad_norm": 0.35222499550655023, "learning_rate": 2.4217707329209878e-06, "loss": 0.0195, "step": 163480 }, { "epoch": 0.682148192037119, "grad_norm": 0.39902069352615105, "learning_rate": 2.4217336988801935e-06, "loss": 0.0191, "step": 163485 }, { "epoch": 0.6821690547521092, "grad_norm": 0.5307257490444606, "learning_rate": 2.421696666538336e-06, "loss": 0.0297, "step": 163490 }, { "epoch": 0.6821899174670994, "grad_norm": 0.7471581506439637, "learning_rate": 2.421659635895285e-06, "loss": 0.0247, "step": 163495 }, { "epoch": 0.6822107801820898, "grad_norm": 0.7121314154637427, "learning_rate": 2.42162260695091e-06, "loss": 0.0196, "step": 163500 }, { "epoch": 0.68223164289708, "grad_norm": 0.7661571334939391, "learning_rate": 2.421585579705083e-06, "loss": 0.0196, "step": 163505 }, { "epoch": 0.6822525056120703, "grad_norm": 0.39719214298010463, "learning_rate": 2.4215485541576724e-06, "loss": 0.0201, "step": 163510 }, { "epoch": 0.6822733683270606, "grad_norm": 0.7620674471016389, "learning_rate": 2.421511530308549e-06, "loss": 0.024, "step": 163515 }, { "epoch": 0.6822942310420509, "grad_norm": 0.3686098067745029, "learning_rate": 2.421474508157583e-06, "loss": 0.0164, "step": 163520 }, { "epoch": 0.6823150937570411, "grad_norm": 0.5726726136234327, "learning_rate": 2.421437487704644e-06, "loss": 0.0128, "step": 163525 }, { "epoch": 0.6823359564720315, "grad_norm": 0.7460150579536774, "learning_rate": 2.4214004689496034e-06, "loss": 0.0184, "step": 163530 }, { "epoch": 0.6823568191870217, "grad_norm": 0.7762505919443787, "learning_rate": 2.421363451892331e-06, "loss": 0.0202, "step": 163535 }, { "epoch": 0.682377681902012, "grad_norm": 0.6220285817006905, "learning_rate": 2.4213264365326962e-06, "loss": 0.0236, "step": 163540 }, { "epoch": 0.6823985446170022, "grad_norm": 0.8615724560834835, "learning_rate": 2.4212894228705703e-06, "loss": 0.025, "step": 163545 }, { "epoch": 0.6824194073319926, "grad_norm": 0.5313410558329937, "learning_rate": 2.421252410905822e-06, "loss": 0.0204, "step": 163550 }, { "epoch": 0.6824402700469828, "grad_norm": 0.4380685761737355, "learning_rate": 2.421215400638324e-06, "loss": 0.0212, "step": 163555 }, { "epoch": 0.6824611327619731, "grad_norm": 0.5308867432950394, "learning_rate": 2.421178392067945e-06, "loss": 0.0204, "step": 163560 }, { "epoch": 0.6824819954769634, "grad_norm": 0.6041604196833461, "learning_rate": 2.421141385194555e-06, "loss": 0.0197, "step": 163565 }, { "epoch": 0.6825028581919537, "grad_norm": 0.6055997626585803, "learning_rate": 2.4211043800180247e-06, "loss": 0.0294, "step": 163570 }, { "epoch": 0.6825237209069439, "grad_norm": 0.9501329367804003, "learning_rate": 2.4210673765382253e-06, "loss": 0.022, "step": 163575 }, { "epoch": 0.6825445836219343, "grad_norm": 0.7523289034695881, "learning_rate": 2.4210303747550266e-06, "loss": 0.025, "step": 163580 }, { "epoch": 0.6825654463369245, "grad_norm": 0.9652306908665625, "learning_rate": 2.420993374668298e-06, "loss": 0.0234, "step": 163585 }, { "epoch": 0.6825863090519148, "grad_norm": 0.48210331105472, "learning_rate": 2.420956376277911e-06, "loss": 0.0194, "step": 163590 }, { "epoch": 0.6826071717669051, "grad_norm": 0.6189277952425797, "learning_rate": 2.420919379583736e-06, "loss": 0.0223, "step": 163595 }, { "epoch": 0.6826280344818954, "grad_norm": 0.4698414378593477, "learning_rate": 2.4208823845856424e-06, "loss": 0.0213, "step": 163600 }, { "epoch": 0.6826488971968856, "grad_norm": 1.595404298415767, "learning_rate": 2.420845391283502e-06, "loss": 0.0247, "step": 163605 }, { "epoch": 0.6826697599118758, "grad_norm": 0.7330986085511085, "learning_rate": 2.4208083996771836e-06, "loss": 0.0228, "step": 163610 }, { "epoch": 0.6826906226268662, "grad_norm": 1.0858913386406202, "learning_rate": 2.420771409766559e-06, "loss": 0.022, "step": 163615 }, { "epoch": 0.6827114853418564, "grad_norm": 0.3760839642062877, "learning_rate": 2.4207344215514983e-06, "loss": 0.0227, "step": 163620 }, { "epoch": 0.6827323480568467, "grad_norm": 0.6386147683450457, "learning_rate": 2.420697435031872e-06, "loss": 0.0179, "step": 163625 }, { "epoch": 0.682753210771837, "grad_norm": 0.36883031412963874, "learning_rate": 2.42066045020755e-06, "loss": 0.015, "step": 163630 }, { "epoch": 0.6827740734868273, "grad_norm": 0.7306042615149829, "learning_rate": 2.4206234670784033e-06, "loss": 0.026, "step": 163635 }, { "epoch": 0.6827949362018175, "grad_norm": 0.7804692296262119, "learning_rate": 2.420586485644302e-06, "loss": 0.0194, "step": 163640 }, { "epoch": 0.6828157989168079, "grad_norm": 0.5025189559294818, "learning_rate": 2.4205495059051177e-06, "loss": 0.028, "step": 163645 }, { "epoch": 0.6828366616317981, "grad_norm": 0.990406203358153, "learning_rate": 2.42051252786072e-06, "loss": 0.0307, "step": 163650 }, { "epoch": 0.6828575243467884, "grad_norm": 0.6092812770603555, "learning_rate": 2.420475551510979e-06, "loss": 0.0241, "step": 163655 }, { "epoch": 0.6828783870617787, "grad_norm": 0.9021490923308244, "learning_rate": 2.4204385768557666e-06, "loss": 0.0231, "step": 163660 }, { "epoch": 0.682899249776769, "grad_norm": 0.5752341128625368, "learning_rate": 2.4204016038949523e-06, "loss": 0.0263, "step": 163665 }, { "epoch": 0.6829201124917592, "grad_norm": 0.6337212104188151, "learning_rate": 2.420364632628407e-06, "loss": 0.0201, "step": 163670 }, { "epoch": 0.6829409752067495, "grad_norm": 0.7387011843151928, "learning_rate": 2.420327663056002e-06, "loss": 0.0279, "step": 163675 }, { "epoch": 0.6829618379217398, "grad_norm": 0.515734215658239, "learning_rate": 2.420290695177607e-06, "loss": 0.0147, "step": 163680 }, { "epoch": 0.6829827006367301, "grad_norm": 0.6569162441001357, "learning_rate": 2.4202537289930926e-06, "loss": 0.0162, "step": 163685 }, { "epoch": 0.6830035633517203, "grad_norm": 0.3281109484507609, "learning_rate": 2.4202167645023295e-06, "loss": 0.0171, "step": 163690 }, { "epoch": 0.6830244260667107, "grad_norm": 0.3329200911399197, "learning_rate": 2.4201798017051893e-06, "loss": 0.0136, "step": 163695 }, { "epoch": 0.6830452887817009, "grad_norm": 0.2780229179048815, "learning_rate": 2.420142840601542e-06, "loss": 0.0243, "step": 163700 }, { "epoch": 0.6830661514966911, "grad_norm": 0.5128487103123789, "learning_rate": 2.420105881191258e-06, "loss": 0.0219, "step": 163705 }, { "epoch": 0.6830870142116815, "grad_norm": 0.6184978753648905, "learning_rate": 2.4200689234742084e-06, "loss": 0.0236, "step": 163710 }, { "epoch": 0.6831078769266717, "grad_norm": 0.6063245782175828, "learning_rate": 2.420031967450264e-06, "loss": 0.0252, "step": 163715 }, { "epoch": 0.683128739641662, "grad_norm": 0.44536003252731604, "learning_rate": 2.4199950131192946e-06, "loss": 0.0236, "step": 163720 }, { "epoch": 0.6831496023566522, "grad_norm": 0.6983505480259795, "learning_rate": 2.4199580604811728e-06, "loss": 0.0193, "step": 163725 }, { "epoch": 0.6831704650716426, "grad_norm": 1.1093249692311178, "learning_rate": 2.419921109535767e-06, "loss": 0.0209, "step": 163730 }, { "epoch": 0.6831913277866328, "grad_norm": 0.972804626902249, "learning_rate": 2.41988416028295e-06, "loss": 0.0227, "step": 163735 }, { "epoch": 0.6832121905016231, "grad_norm": 0.5367723125347214, "learning_rate": 2.419847212722592e-06, "loss": 0.0222, "step": 163740 }, { "epoch": 0.6832330532166134, "grad_norm": 1.3803610929405536, "learning_rate": 2.419810266854563e-06, "loss": 0.0219, "step": 163745 }, { "epoch": 0.6832539159316037, "grad_norm": 1.085474728529374, "learning_rate": 2.4197733226787344e-06, "loss": 0.0212, "step": 163750 }, { "epoch": 0.6832747786465939, "grad_norm": 0.7332196643185462, "learning_rate": 2.419736380194978e-06, "loss": 0.0196, "step": 163755 }, { "epoch": 0.6832956413615843, "grad_norm": 0.7040385104508946, "learning_rate": 2.4196994394031628e-06, "loss": 0.0227, "step": 163760 }, { "epoch": 0.6833165040765745, "grad_norm": 0.6060506201832341, "learning_rate": 2.419662500303161e-06, "loss": 0.0291, "step": 163765 }, { "epoch": 0.6833373667915648, "grad_norm": 0.5242052727149319, "learning_rate": 2.4196255628948423e-06, "loss": 0.0173, "step": 163770 }, { "epoch": 0.6833582295065551, "grad_norm": 0.37155369123154214, "learning_rate": 2.4195886271780785e-06, "loss": 0.0183, "step": 163775 }, { "epoch": 0.6833790922215454, "grad_norm": 0.5342056911230904, "learning_rate": 2.419551693152741e-06, "loss": 0.0262, "step": 163780 }, { "epoch": 0.6833999549365356, "grad_norm": 0.25565768345693923, "learning_rate": 2.4195147608186993e-06, "loss": 0.0237, "step": 163785 }, { "epoch": 0.6834208176515258, "grad_norm": 1.2121090164746853, "learning_rate": 2.419477830175825e-06, "loss": 0.026, "step": 163790 }, { "epoch": 0.6834416803665162, "grad_norm": 0.8145961242648143, "learning_rate": 2.41944090122399e-06, "loss": 0.0311, "step": 163795 }, { "epoch": 0.6834625430815064, "grad_norm": 0.28772705941552107, "learning_rate": 2.4194039739630633e-06, "loss": 0.0182, "step": 163800 }, { "epoch": 0.6834834057964967, "grad_norm": 0.4331150750784943, "learning_rate": 2.419367048392917e-06, "loss": 0.0238, "step": 163805 }, { "epoch": 0.683504268511487, "grad_norm": 1.0287202024694915, "learning_rate": 2.4193301245134217e-06, "loss": 0.0237, "step": 163810 }, { "epoch": 0.6835251312264773, "grad_norm": 0.7815100207672673, "learning_rate": 2.4192932023244496e-06, "loss": 0.0198, "step": 163815 }, { "epoch": 0.6835459939414675, "grad_norm": 0.6671136469834651, "learning_rate": 2.41925628182587e-06, "loss": 0.0347, "step": 163820 }, { "epoch": 0.6835668566564579, "grad_norm": 0.7378493388546943, "learning_rate": 2.419219363017555e-06, "loss": 0.0266, "step": 163825 }, { "epoch": 0.6835877193714481, "grad_norm": 0.23404797631117913, "learning_rate": 2.419182445899375e-06, "loss": 0.0164, "step": 163830 }, { "epoch": 0.6836085820864384, "grad_norm": 0.5607349958168042, "learning_rate": 2.419145530471201e-06, "loss": 0.0145, "step": 163835 }, { "epoch": 0.6836294448014287, "grad_norm": 0.771568762769304, "learning_rate": 2.419108616732905e-06, "loss": 0.0262, "step": 163840 }, { "epoch": 0.683650307516419, "grad_norm": 1.0954953584610263, "learning_rate": 2.4190717046843574e-06, "loss": 0.0264, "step": 163845 }, { "epoch": 0.6836711702314092, "grad_norm": 1.1266274863734462, "learning_rate": 2.4190347943254295e-06, "loss": 0.0247, "step": 163850 }, { "epoch": 0.6836920329463995, "grad_norm": 0.5511635291195883, "learning_rate": 2.418997885655992e-06, "loss": 0.017, "step": 163855 }, { "epoch": 0.6837128956613898, "grad_norm": 0.6492316598531831, "learning_rate": 2.4189609786759164e-06, "loss": 0.0158, "step": 163860 }, { "epoch": 0.6837337583763801, "grad_norm": 0.550727691549649, "learning_rate": 2.4189240733850737e-06, "loss": 0.0246, "step": 163865 }, { "epoch": 0.6837546210913703, "grad_norm": 0.45609664167866, "learning_rate": 2.418887169783335e-06, "loss": 0.0239, "step": 163870 }, { "epoch": 0.6837754838063607, "grad_norm": 0.6449065959095486, "learning_rate": 2.4188502678705715e-06, "loss": 0.0184, "step": 163875 }, { "epoch": 0.6837963465213509, "grad_norm": 1.1527567475127878, "learning_rate": 2.4188133676466545e-06, "loss": 0.0339, "step": 163880 }, { "epoch": 0.6838172092363412, "grad_norm": 0.9776873493487734, "learning_rate": 2.418776469111455e-06, "loss": 0.0199, "step": 163885 }, { "epoch": 0.6838380719513315, "grad_norm": 0.6054876957870525, "learning_rate": 2.4187395722648437e-06, "loss": 0.0296, "step": 163890 }, { "epoch": 0.6838589346663217, "grad_norm": 0.3117609041689864, "learning_rate": 2.4187026771066932e-06, "loss": 0.0221, "step": 163895 }, { "epoch": 0.683879797381312, "grad_norm": 0.6367819419726795, "learning_rate": 2.418665783636874e-06, "loss": 0.0286, "step": 163900 }, { "epoch": 0.6839006600963022, "grad_norm": 0.7581195590206359, "learning_rate": 2.4186288918552563e-06, "loss": 0.0224, "step": 163905 }, { "epoch": 0.6839215228112926, "grad_norm": 0.5633024070157012, "learning_rate": 2.418592001761713e-06, "loss": 0.0193, "step": 163910 }, { "epoch": 0.6839423855262828, "grad_norm": 0.6891138045672557, "learning_rate": 2.418555113356115e-06, "loss": 0.0178, "step": 163915 }, { "epoch": 0.6839632482412731, "grad_norm": 1.0035565807175784, "learning_rate": 2.418518226638333e-06, "loss": 0.0226, "step": 163920 }, { "epoch": 0.6839841109562634, "grad_norm": 0.6941732419310047, "learning_rate": 2.418481341608238e-06, "loss": 0.0269, "step": 163925 }, { "epoch": 0.6840049736712537, "grad_norm": 0.7401744107731062, "learning_rate": 2.4184444582657017e-06, "loss": 0.0336, "step": 163930 }, { "epoch": 0.6840258363862439, "grad_norm": 0.42779815603919197, "learning_rate": 2.4184075766105964e-06, "loss": 0.0232, "step": 163935 }, { "epoch": 0.6840466991012343, "grad_norm": 0.5358432914471174, "learning_rate": 2.418370696642792e-06, "loss": 0.0149, "step": 163940 }, { "epoch": 0.6840675618162245, "grad_norm": 0.30536120249610793, "learning_rate": 2.418333818362161e-06, "loss": 0.025, "step": 163945 }, { "epoch": 0.6840884245312148, "grad_norm": 0.4642687380029874, "learning_rate": 2.4182969417685744e-06, "loss": 0.0221, "step": 163950 }, { "epoch": 0.6841092872462051, "grad_norm": 0.8966565829727525, "learning_rate": 2.418260066861903e-06, "loss": 0.0247, "step": 163955 }, { "epoch": 0.6841301499611954, "grad_norm": 1.0071274746194292, "learning_rate": 2.418223193642018e-06, "loss": 0.0249, "step": 163960 }, { "epoch": 0.6841510126761856, "grad_norm": 0.60112329941048, "learning_rate": 2.4181863221087926e-06, "loss": 0.022, "step": 163965 }, { "epoch": 0.6841718753911759, "grad_norm": 0.5275804659746305, "learning_rate": 2.418149452262096e-06, "loss": 0.0183, "step": 163970 }, { "epoch": 0.6841927381061662, "grad_norm": 0.9104769893779822, "learning_rate": 2.418112584101801e-06, "loss": 0.0203, "step": 163975 }, { "epoch": 0.6842136008211565, "grad_norm": 0.5491281585624935, "learning_rate": 2.418075717627778e-06, "loss": 0.0258, "step": 163980 }, { "epoch": 0.6842344635361467, "grad_norm": 0.6182081549802235, "learning_rate": 2.4180388528399004e-06, "loss": 0.0213, "step": 163985 }, { "epoch": 0.684255326251137, "grad_norm": 0.6798464458817438, "learning_rate": 2.4180019897380383e-06, "loss": 0.0213, "step": 163990 }, { "epoch": 0.6842761889661273, "grad_norm": 0.3931599666672532, "learning_rate": 2.4179651283220628e-06, "loss": 0.0282, "step": 163995 }, { "epoch": 0.6842970516811175, "grad_norm": 0.7258679679930808, "learning_rate": 2.417928268591846e-06, "loss": 0.0264, "step": 164000 }, { "epoch": 0.6843179143961079, "grad_norm": 0.8841328634700103, "learning_rate": 2.4178914105472597e-06, "loss": 0.0201, "step": 164005 }, { "epoch": 0.6843387771110981, "grad_norm": 0.22863841026827636, "learning_rate": 2.4178545541881746e-06, "loss": 0.0173, "step": 164010 }, { "epoch": 0.6843596398260884, "grad_norm": 0.45231752397346825, "learning_rate": 2.4178176995144623e-06, "loss": 0.0226, "step": 164015 }, { "epoch": 0.6843805025410787, "grad_norm": 0.6005645533301255, "learning_rate": 2.4177808465259957e-06, "loss": 0.0195, "step": 164020 }, { "epoch": 0.684401365256069, "grad_norm": 1.2392868690668304, "learning_rate": 2.417743995222645e-06, "loss": 0.0262, "step": 164025 }, { "epoch": 0.6844222279710592, "grad_norm": 0.46101990158804584, "learning_rate": 2.417707145604282e-06, "loss": 0.0284, "step": 164030 }, { "epoch": 0.6844430906860495, "grad_norm": 0.8295483672695444, "learning_rate": 2.4176702976707792e-06, "loss": 0.0253, "step": 164035 }, { "epoch": 0.6844639534010398, "grad_norm": 0.878100182873125, "learning_rate": 2.4176334514220065e-06, "loss": 0.025, "step": 164040 }, { "epoch": 0.6844848161160301, "grad_norm": 0.4068001663843622, "learning_rate": 2.417596606857837e-06, "loss": 0.0224, "step": 164045 }, { "epoch": 0.6845056788310203, "grad_norm": 0.810530986658687, "learning_rate": 2.4175597639781424e-06, "loss": 0.0142, "step": 164050 }, { "epoch": 0.6845265415460107, "grad_norm": 0.658317626685849, "learning_rate": 2.4175229227827928e-06, "loss": 0.0206, "step": 164055 }, { "epoch": 0.6845474042610009, "grad_norm": 1.688221085299257, "learning_rate": 2.4174860832716614e-06, "loss": 0.0246, "step": 164060 }, { "epoch": 0.6845682669759912, "grad_norm": 0.8450821878193477, "learning_rate": 2.41744924544462e-06, "loss": 0.0201, "step": 164065 }, { "epoch": 0.6845891296909815, "grad_norm": 0.443309047579172, "learning_rate": 2.417412409301539e-06, "loss": 0.0187, "step": 164070 }, { "epoch": 0.6846099924059718, "grad_norm": 0.7563946247123265, "learning_rate": 2.417375574842291e-06, "loss": 0.0181, "step": 164075 }, { "epoch": 0.684630855120962, "grad_norm": 0.9860115802352593, "learning_rate": 2.417338742066747e-06, "loss": 0.0242, "step": 164080 }, { "epoch": 0.6846517178359522, "grad_norm": 0.6957948053772512, "learning_rate": 2.41730191097478e-06, "loss": 0.0293, "step": 164085 }, { "epoch": 0.6846725805509426, "grad_norm": 0.37339692031716104, "learning_rate": 2.4172650815662606e-06, "loss": 0.0227, "step": 164090 }, { "epoch": 0.6846934432659328, "grad_norm": 0.8986748965113541, "learning_rate": 2.4172282538410603e-06, "loss": 0.0253, "step": 164095 }, { "epoch": 0.6847143059809231, "grad_norm": 1.1263066634435153, "learning_rate": 2.4171914277990518e-06, "loss": 0.0214, "step": 164100 }, { "epoch": 0.6847351686959134, "grad_norm": 0.5168625262427576, "learning_rate": 2.4171546034401074e-06, "loss": 0.0179, "step": 164105 }, { "epoch": 0.6847560314109037, "grad_norm": 0.5182424317502794, "learning_rate": 2.4171177807640976e-06, "loss": 0.0168, "step": 164110 }, { "epoch": 0.6847768941258939, "grad_norm": 0.749701037873791, "learning_rate": 2.4170809597708946e-06, "loss": 0.0257, "step": 164115 }, { "epoch": 0.6847977568408843, "grad_norm": 0.983011000620405, "learning_rate": 2.4170441404603704e-06, "loss": 0.0202, "step": 164120 }, { "epoch": 0.6848186195558745, "grad_norm": 0.572843889122359, "learning_rate": 2.417007322832397e-06, "loss": 0.021, "step": 164125 }, { "epoch": 0.6848394822708648, "grad_norm": 0.6130342354045429, "learning_rate": 2.416970506886846e-06, "loss": 0.0194, "step": 164130 }, { "epoch": 0.6848603449858551, "grad_norm": 0.7573484267704925, "learning_rate": 2.416933692623589e-06, "loss": 0.0224, "step": 164135 }, { "epoch": 0.6848812077008454, "grad_norm": 0.3931945102465378, "learning_rate": 2.4168968800424984e-06, "loss": 0.0177, "step": 164140 }, { "epoch": 0.6849020704158356, "grad_norm": 1.05880414604781, "learning_rate": 2.416860069143446e-06, "loss": 0.0242, "step": 164145 }, { "epoch": 0.6849229331308259, "grad_norm": 0.5779819410200918, "learning_rate": 2.416823259926303e-06, "loss": 0.0163, "step": 164150 }, { "epoch": 0.6849437958458162, "grad_norm": 1.3362487565141812, "learning_rate": 2.4167864523909424e-06, "loss": 0.0224, "step": 164155 }, { "epoch": 0.6849646585608065, "grad_norm": 0.7248825539850997, "learning_rate": 2.4167496465372357e-06, "loss": 0.0247, "step": 164160 }, { "epoch": 0.6849855212757967, "grad_norm": 0.33497029733898515, "learning_rate": 2.4167128423650546e-06, "loss": 0.0233, "step": 164165 }, { "epoch": 0.6850063839907871, "grad_norm": 0.8954840899004227, "learning_rate": 2.4166760398742715e-06, "loss": 0.0215, "step": 164170 }, { "epoch": 0.6850272467057773, "grad_norm": 0.6864570708945106, "learning_rate": 2.4166392390647582e-06, "loss": 0.0171, "step": 164175 }, { "epoch": 0.6850481094207675, "grad_norm": 0.6235216166391695, "learning_rate": 2.416602439936387e-06, "loss": 0.0324, "step": 164180 }, { "epoch": 0.6850689721357579, "grad_norm": 0.5730115079812023, "learning_rate": 2.416565642489029e-06, "loss": 0.0151, "step": 164185 }, { "epoch": 0.6850898348507481, "grad_norm": 0.8996746288630962, "learning_rate": 2.416528846722557e-06, "loss": 0.0209, "step": 164190 }, { "epoch": 0.6851106975657384, "grad_norm": 0.6604983940695291, "learning_rate": 2.4164920526368426e-06, "loss": 0.0209, "step": 164195 }, { "epoch": 0.6851315602807287, "grad_norm": 0.7074457359145734, "learning_rate": 2.416455260231758e-06, "loss": 0.0193, "step": 164200 }, { "epoch": 0.685152422995719, "grad_norm": 0.895808985466251, "learning_rate": 2.416418469507176e-06, "loss": 0.025, "step": 164205 }, { "epoch": 0.6851732857107092, "grad_norm": 0.777829430698436, "learning_rate": 2.4163816804629673e-06, "loss": 0.0201, "step": 164210 }, { "epoch": 0.6851941484256995, "grad_norm": 0.6424396096034455, "learning_rate": 2.416344893099005e-06, "loss": 0.0258, "step": 164215 }, { "epoch": 0.6852150111406898, "grad_norm": 1.0409518556943873, "learning_rate": 2.416308107415161e-06, "loss": 0.0214, "step": 164220 }, { "epoch": 0.6852358738556801, "grad_norm": 0.3068231292407737, "learning_rate": 2.4162713234113072e-06, "loss": 0.0234, "step": 164225 }, { "epoch": 0.6852567365706703, "grad_norm": 0.6920083235369944, "learning_rate": 2.4162345410873156e-06, "loss": 0.017, "step": 164230 }, { "epoch": 0.6852775992856607, "grad_norm": 0.596055619824613, "learning_rate": 2.416197760443059e-06, "loss": 0.0297, "step": 164235 }, { "epoch": 0.6852984620006509, "grad_norm": 0.40583944256500465, "learning_rate": 2.416160981478409e-06, "loss": 0.0201, "step": 164240 }, { "epoch": 0.6853193247156412, "grad_norm": 0.9227912780147205, "learning_rate": 2.4161242041932376e-06, "loss": 0.0243, "step": 164245 }, { "epoch": 0.6853401874306315, "grad_norm": 0.7484248399686964, "learning_rate": 2.4160874285874175e-06, "loss": 0.0246, "step": 164250 }, { "epoch": 0.6853610501456218, "grad_norm": 0.9444926152669557, "learning_rate": 2.4160506546608208e-06, "loss": 0.0192, "step": 164255 }, { "epoch": 0.685381912860612, "grad_norm": 0.41100384985061, "learning_rate": 2.4160138824133195e-06, "loss": 0.0189, "step": 164260 }, { "epoch": 0.6854027755756023, "grad_norm": 0.5550918124412616, "learning_rate": 2.4159771118447857e-06, "loss": 0.0202, "step": 164265 }, { "epoch": 0.6854236382905926, "grad_norm": 0.3970077683589361, "learning_rate": 2.4159403429550924e-06, "loss": 0.015, "step": 164270 }, { "epoch": 0.6854445010055829, "grad_norm": 0.5144128164447431, "learning_rate": 2.415903575744111e-06, "loss": 0.0228, "step": 164275 }, { "epoch": 0.6854653637205731, "grad_norm": 0.47773146574635966, "learning_rate": 2.4158668102117143e-06, "loss": 0.0183, "step": 164280 }, { "epoch": 0.6854862264355635, "grad_norm": 0.37285593324279476, "learning_rate": 2.415830046357774e-06, "loss": 0.0194, "step": 164285 }, { "epoch": 0.6855070891505537, "grad_norm": 1.1091223888613555, "learning_rate": 2.4157932841821633e-06, "loss": 0.0182, "step": 164290 }, { "epoch": 0.6855279518655439, "grad_norm": 0.7254018145540102, "learning_rate": 2.4157565236847535e-06, "loss": 0.0206, "step": 164295 }, { "epoch": 0.6855488145805343, "grad_norm": 0.686584958478827, "learning_rate": 2.4157197648654175e-06, "loss": 0.0177, "step": 164300 }, { "epoch": 0.6855696772955245, "grad_norm": 0.6675332272068667, "learning_rate": 2.415683007724027e-06, "loss": 0.0287, "step": 164305 }, { "epoch": 0.6855905400105148, "grad_norm": 0.7348991579099253, "learning_rate": 2.4156462522604557e-06, "loss": 0.0206, "step": 164310 }, { "epoch": 0.6856114027255051, "grad_norm": 0.6798932630741409, "learning_rate": 2.415609498474575e-06, "loss": 0.025, "step": 164315 }, { "epoch": 0.6856322654404954, "grad_norm": 0.913166877582259, "learning_rate": 2.415572746366257e-06, "loss": 0.0249, "step": 164320 }, { "epoch": 0.6856531281554856, "grad_norm": 0.7065565325410259, "learning_rate": 2.4155359959353745e-06, "loss": 0.0207, "step": 164325 }, { "epoch": 0.6856739908704759, "grad_norm": 0.5492557843605763, "learning_rate": 2.4154992471818e-06, "loss": 0.0173, "step": 164330 }, { "epoch": 0.6856948535854662, "grad_norm": 1.1162659236337755, "learning_rate": 2.415462500105406e-06, "loss": 0.0191, "step": 164335 }, { "epoch": 0.6857157163004565, "grad_norm": 0.4635210080585631, "learning_rate": 2.415425754706065e-06, "loss": 0.0211, "step": 164340 }, { "epoch": 0.6857365790154467, "grad_norm": 0.39224422580812873, "learning_rate": 2.4153890109836487e-06, "loss": 0.0178, "step": 164345 }, { "epoch": 0.6857574417304371, "grad_norm": 0.8176797188055771, "learning_rate": 2.41535226893803e-06, "loss": 0.018, "step": 164350 }, { "epoch": 0.6857783044454273, "grad_norm": 0.9542392221062742, "learning_rate": 2.415315528569082e-06, "loss": 0.0268, "step": 164355 }, { "epoch": 0.6857991671604176, "grad_norm": 1.5723231747258315, "learning_rate": 2.4152787898766757e-06, "loss": 0.0191, "step": 164360 }, { "epoch": 0.6858200298754079, "grad_norm": 0.2953153012300607, "learning_rate": 2.4152420528606853e-06, "loss": 0.0169, "step": 164365 }, { "epoch": 0.6858408925903982, "grad_norm": 0.9888461518473407, "learning_rate": 2.4152053175209815e-06, "loss": 0.0275, "step": 164370 }, { "epoch": 0.6858617553053884, "grad_norm": 0.3499275038393403, "learning_rate": 2.4151685838574386e-06, "loss": 0.0207, "step": 164375 }, { "epoch": 0.6858826180203788, "grad_norm": 1.1327228723971288, "learning_rate": 2.4151318518699282e-06, "loss": 0.0259, "step": 164380 }, { "epoch": 0.685903480735369, "grad_norm": 0.7973356982930929, "learning_rate": 2.415095121558323e-06, "loss": 0.0234, "step": 164385 }, { "epoch": 0.6859243434503592, "grad_norm": 1.4251555356779395, "learning_rate": 2.4150583929224953e-06, "loss": 0.0291, "step": 164390 }, { "epoch": 0.6859452061653495, "grad_norm": 0.568503501144872, "learning_rate": 2.4150216659623187e-06, "loss": 0.0203, "step": 164395 }, { "epoch": 0.6859660688803398, "grad_norm": 0.30842773930202827, "learning_rate": 2.4149849406776642e-06, "loss": 0.0161, "step": 164400 }, { "epoch": 0.6859869315953301, "grad_norm": 0.6436404311873476, "learning_rate": 2.4149482170684054e-06, "loss": 0.0254, "step": 164405 }, { "epoch": 0.6860077943103203, "grad_norm": 0.5265690137824353, "learning_rate": 2.414911495134415e-06, "loss": 0.0228, "step": 164410 }, { "epoch": 0.6860286570253107, "grad_norm": 0.5207162068522796, "learning_rate": 2.414874774875566e-06, "loss": 0.0183, "step": 164415 }, { "epoch": 0.6860495197403009, "grad_norm": 0.41912871830129483, "learning_rate": 2.4148380562917297e-06, "loss": 0.0179, "step": 164420 }, { "epoch": 0.6860703824552912, "grad_norm": 0.6954370530521125, "learning_rate": 2.4148013393827793e-06, "loss": 0.0194, "step": 164425 }, { "epoch": 0.6860912451702815, "grad_norm": 0.7338712752318735, "learning_rate": 2.414764624148588e-06, "loss": 0.025, "step": 164430 }, { "epoch": 0.6861121078852718, "grad_norm": 0.5596147378311651, "learning_rate": 2.414727910589028e-06, "loss": 0.0292, "step": 164435 }, { "epoch": 0.686132970600262, "grad_norm": 1.0838407925576454, "learning_rate": 2.414691198703972e-06, "loss": 0.0302, "step": 164440 }, { "epoch": 0.6861538333152523, "grad_norm": 0.40508099154355803, "learning_rate": 2.414654488493294e-06, "loss": 0.0215, "step": 164445 }, { "epoch": 0.6861746960302426, "grad_norm": 0.394191351246706, "learning_rate": 2.4146177799568643e-06, "loss": 0.0213, "step": 164450 }, { "epoch": 0.6861955587452329, "grad_norm": 0.5450443808643275, "learning_rate": 2.4145810730945572e-06, "loss": 0.0319, "step": 164455 }, { "epoch": 0.6862164214602231, "grad_norm": 0.6692244050890425, "learning_rate": 2.4145443679062457e-06, "loss": 0.0241, "step": 164460 }, { "epoch": 0.6862372841752135, "grad_norm": 1.5667234626726503, "learning_rate": 2.4145076643918012e-06, "loss": 0.0163, "step": 164465 }, { "epoch": 0.6862581468902037, "grad_norm": 0.7100852856087386, "learning_rate": 2.414470962551098e-06, "loss": 0.0209, "step": 164470 }, { "epoch": 0.686279009605194, "grad_norm": 0.7689582114554793, "learning_rate": 2.4144342623840084e-06, "loss": 0.0231, "step": 164475 }, { "epoch": 0.6862998723201843, "grad_norm": 0.5085641769877237, "learning_rate": 2.4143975638904047e-06, "loss": 0.0194, "step": 164480 }, { "epoch": 0.6863207350351745, "grad_norm": 0.854156937194271, "learning_rate": 2.4143608670701603e-06, "loss": 0.0204, "step": 164485 }, { "epoch": 0.6863415977501648, "grad_norm": 0.4085265195869939, "learning_rate": 2.4143241719231474e-06, "loss": 0.0235, "step": 164490 }, { "epoch": 0.6863624604651551, "grad_norm": 0.35458143745402765, "learning_rate": 2.4142874784492396e-06, "loss": 0.0142, "step": 164495 }, { "epoch": 0.6863833231801454, "grad_norm": 0.6931928193821286, "learning_rate": 2.4142507866483096e-06, "loss": 0.0185, "step": 164500 }, { "epoch": 0.6864041858951356, "grad_norm": 0.725536545289219, "learning_rate": 2.41421409652023e-06, "loss": 0.0231, "step": 164505 }, { "epoch": 0.6864250486101259, "grad_norm": 0.5639592841320293, "learning_rate": 2.4141774080648733e-06, "loss": 0.025, "step": 164510 }, { "epoch": 0.6864459113251162, "grad_norm": 0.760259322125967, "learning_rate": 2.4141407212821138e-06, "loss": 0.0158, "step": 164515 }, { "epoch": 0.6864667740401065, "grad_norm": 0.5132055050640267, "learning_rate": 2.4141040361718225e-06, "loss": 0.024, "step": 164520 }, { "epoch": 0.6864876367550967, "grad_norm": 0.8506661795990619, "learning_rate": 2.4140673527338736e-06, "loss": 0.0258, "step": 164525 }, { "epoch": 0.6865084994700871, "grad_norm": 0.655544624734817, "learning_rate": 2.41403067096814e-06, "loss": 0.0219, "step": 164530 }, { "epoch": 0.6865293621850773, "grad_norm": 0.804618173206748, "learning_rate": 2.4139939908744947e-06, "loss": 0.0183, "step": 164535 }, { "epoch": 0.6865502249000676, "grad_norm": 0.8244701886597574, "learning_rate": 2.41395731245281e-06, "loss": 0.0272, "step": 164540 }, { "epoch": 0.6865710876150579, "grad_norm": 0.355300337804781, "learning_rate": 2.4139206357029593e-06, "loss": 0.0183, "step": 164545 }, { "epoch": 0.6865919503300482, "grad_norm": 0.3413770744433716, "learning_rate": 2.4138839606248157e-06, "loss": 0.0184, "step": 164550 }, { "epoch": 0.6866128130450384, "grad_norm": 0.727318372472367, "learning_rate": 2.4138472872182518e-06, "loss": 0.0155, "step": 164555 }, { "epoch": 0.6866336757600288, "grad_norm": 0.5120575976326001, "learning_rate": 2.4138106154831414e-06, "loss": 0.0239, "step": 164560 }, { "epoch": 0.686654538475019, "grad_norm": 0.6888403493564494, "learning_rate": 2.4137739454193574e-06, "loss": 0.0311, "step": 164565 }, { "epoch": 0.6866754011900092, "grad_norm": 0.3535385742166776, "learning_rate": 2.4137372770267716e-06, "loss": 0.0176, "step": 164570 }, { "epoch": 0.6866962639049995, "grad_norm": 0.8515767200232779, "learning_rate": 2.413700610305258e-06, "loss": 0.0246, "step": 164575 }, { "epoch": 0.6867171266199898, "grad_norm": 1.0168708606079429, "learning_rate": 2.4136639452546904e-06, "loss": 0.0203, "step": 164580 }, { "epoch": 0.6867379893349801, "grad_norm": 0.8713838475483762, "learning_rate": 2.413627281874941e-06, "loss": 0.0239, "step": 164585 }, { "epoch": 0.6867588520499703, "grad_norm": 0.4600269429066216, "learning_rate": 2.4135906201658824e-06, "loss": 0.0175, "step": 164590 }, { "epoch": 0.6867797147649607, "grad_norm": 0.9050795940480293, "learning_rate": 2.413553960127389e-06, "loss": 0.0129, "step": 164595 }, { "epoch": 0.6868005774799509, "grad_norm": 0.37743319265112396, "learning_rate": 2.413517301759333e-06, "loss": 0.0215, "step": 164600 }, { "epoch": 0.6868214401949412, "grad_norm": 0.7879856240254849, "learning_rate": 2.4134806450615884e-06, "loss": 0.0215, "step": 164605 }, { "epoch": 0.6868423029099315, "grad_norm": 0.932515471876961, "learning_rate": 2.413443990034027e-06, "loss": 0.0245, "step": 164610 }, { "epoch": 0.6868631656249218, "grad_norm": 0.7869227135717396, "learning_rate": 2.413407336676524e-06, "loss": 0.0241, "step": 164615 }, { "epoch": 0.686884028339912, "grad_norm": 0.8737895964856386, "learning_rate": 2.41337068498895e-06, "loss": 0.0179, "step": 164620 }, { "epoch": 0.6869048910549023, "grad_norm": 0.5906018134953002, "learning_rate": 2.4133340349711803e-06, "loss": 0.0218, "step": 164625 }, { "epoch": 0.6869257537698926, "grad_norm": 0.39617787597473025, "learning_rate": 2.413297386623087e-06, "loss": 0.0207, "step": 164630 }, { "epoch": 0.6869466164848829, "grad_norm": 0.6517582302232088, "learning_rate": 2.413260739944544e-06, "loss": 0.0225, "step": 164635 }, { "epoch": 0.6869674791998731, "grad_norm": 0.5936126843911461, "learning_rate": 2.413224094935424e-06, "loss": 0.0244, "step": 164640 }, { "epoch": 0.6869883419148635, "grad_norm": 0.4921892219255294, "learning_rate": 2.413187451595601e-06, "loss": 0.0212, "step": 164645 }, { "epoch": 0.6870092046298537, "grad_norm": 1.6611712656469766, "learning_rate": 2.4131508099249475e-06, "loss": 0.0211, "step": 164650 }, { "epoch": 0.687030067344844, "grad_norm": 0.40734414809554487, "learning_rate": 2.4131141699233374e-06, "loss": 0.0208, "step": 164655 }, { "epoch": 0.6870509300598343, "grad_norm": 0.8121938575082885, "learning_rate": 2.4130775315906434e-06, "loss": 0.0267, "step": 164660 }, { "epoch": 0.6870717927748246, "grad_norm": 0.46271239108788415, "learning_rate": 2.4130408949267382e-06, "loss": 0.0179, "step": 164665 }, { "epoch": 0.6870926554898148, "grad_norm": 0.40547115774336445, "learning_rate": 2.4130042599314973e-06, "loss": 0.0215, "step": 164670 }, { "epoch": 0.6871135182048052, "grad_norm": 0.9412296641331717, "learning_rate": 2.4129676266047926e-06, "loss": 0.0268, "step": 164675 }, { "epoch": 0.6871343809197954, "grad_norm": 0.5959298438531261, "learning_rate": 2.412930994946497e-06, "loss": 0.0184, "step": 164680 }, { "epoch": 0.6871552436347856, "grad_norm": 0.7611739051012053, "learning_rate": 2.4128943649564845e-06, "loss": 0.026, "step": 164685 }, { "epoch": 0.6871761063497759, "grad_norm": 0.7182124001033914, "learning_rate": 2.4128577366346287e-06, "loss": 0.0229, "step": 164690 }, { "epoch": 0.6871969690647662, "grad_norm": 0.36642525075576876, "learning_rate": 2.4128211099808026e-06, "loss": 0.0202, "step": 164695 }, { "epoch": 0.6872178317797565, "grad_norm": 0.3285765491385608, "learning_rate": 2.412784484994879e-06, "loss": 0.0181, "step": 164700 }, { "epoch": 0.6872386944947467, "grad_norm": 0.5727123944169066, "learning_rate": 2.412747861676733e-06, "loss": 0.0165, "step": 164705 }, { "epoch": 0.6872595572097371, "grad_norm": 0.6152188762936522, "learning_rate": 2.4127112400262367e-06, "loss": 0.0216, "step": 164710 }, { "epoch": 0.6872804199247273, "grad_norm": 3.740456757138068, "learning_rate": 2.412674620043264e-06, "loss": 0.0279, "step": 164715 }, { "epoch": 0.6873012826397176, "grad_norm": 0.4905753025131703, "learning_rate": 2.4126380017276882e-06, "loss": 0.0188, "step": 164720 }, { "epoch": 0.6873221453547079, "grad_norm": 0.769640703694514, "learning_rate": 2.4126013850793825e-06, "loss": 0.0259, "step": 164725 }, { "epoch": 0.6873430080696982, "grad_norm": 0.7946214570425955, "learning_rate": 2.4125647700982206e-06, "loss": 0.0245, "step": 164730 }, { "epoch": 0.6873638707846884, "grad_norm": 0.6080132450993971, "learning_rate": 2.4125281567840766e-06, "loss": 0.0199, "step": 164735 }, { "epoch": 0.6873847334996788, "grad_norm": 0.5039732533156799, "learning_rate": 2.412491545136823e-06, "loss": 0.0211, "step": 164740 }, { "epoch": 0.687405596214669, "grad_norm": 1.0753665205683876, "learning_rate": 2.412454935156334e-06, "loss": 0.0248, "step": 164745 }, { "epoch": 0.6874264589296593, "grad_norm": 0.48726199374875406, "learning_rate": 2.4124183268424833e-06, "loss": 0.0236, "step": 164750 }, { "epoch": 0.6874473216446495, "grad_norm": 0.6652031861917473, "learning_rate": 2.4123817201951434e-06, "loss": 0.0227, "step": 164755 }, { "epoch": 0.6874681843596399, "grad_norm": 0.4673850736827524, "learning_rate": 2.4123451152141893e-06, "loss": 0.0186, "step": 164760 }, { "epoch": 0.6874890470746301, "grad_norm": 0.4553829384966666, "learning_rate": 2.4123085118994933e-06, "loss": 0.0236, "step": 164765 }, { "epoch": 0.6875099097896203, "grad_norm": 0.5435461484517681, "learning_rate": 2.4122719102509303e-06, "loss": 0.0166, "step": 164770 }, { "epoch": 0.6875307725046107, "grad_norm": 0.75279833813568, "learning_rate": 2.4122353102683726e-06, "loss": 0.0282, "step": 164775 }, { "epoch": 0.6875516352196009, "grad_norm": 1.7494597577622422, "learning_rate": 2.4121987119516945e-06, "loss": 0.0242, "step": 164780 }, { "epoch": 0.6875724979345912, "grad_norm": 0.8131567921870713, "learning_rate": 2.412162115300769e-06, "loss": 0.0175, "step": 164785 }, { "epoch": 0.6875933606495815, "grad_norm": 0.8925850674673758, "learning_rate": 2.412125520315471e-06, "loss": 0.0252, "step": 164790 }, { "epoch": 0.6876142233645718, "grad_norm": 0.7668663473226602, "learning_rate": 2.412088926995673e-06, "loss": 0.026, "step": 164795 }, { "epoch": 0.687635086079562, "grad_norm": 0.3781980510684311, "learning_rate": 2.412052335341249e-06, "loss": 0.0234, "step": 164800 }, { "epoch": 0.6876559487945523, "grad_norm": 0.6351875054482711, "learning_rate": 2.412015745352073e-06, "loss": 0.0238, "step": 164805 }, { "epoch": 0.6876768115095426, "grad_norm": 0.9101658555882669, "learning_rate": 2.411979157028018e-06, "loss": 0.0207, "step": 164810 }, { "epoch": 0.6876976742245329, "grad_norm": 0.8394755300819614, "learning_rate": 2.4119425703689584e-06, "loss": 0.0239, "step": 164815 }, { "epoch": 0.6877185369395231, "grad_norm": 0.45075534218133195, "learning_rate": 2.4119059853747678e-06, "loss": 0.0183, "step": 164820 }, { "epoch": 0.6877393996545135, "grad_norm": 0.6462736486669141, "learning_rate": 2.41186940204532e-06, "loss": 0.025, "step": 164825 }, { "epoch": 0.6877602623695037, "grad_norm": 0.9570132016684592, "learning_rate": 2.4118328203804884e-06, "loss": 0.0252, "step": 164830 }, { "epoch": 0.687781125084494, "grad_norm": 0.32224520790832034, "learning_rate": 2.411796240380147e-06, "loss": 0.0188, "step": 164835 }, { "epoch": 0.6878019877994843, "grad_norm": 0.38195436107139463, "learning_rate": 2.4117596620441693e-06, "loss": 0.0193, "step": 164840 }, { "epoch": 0.6878228505144746, "grad_norm": 1.1671161091444857, "learning_rate": 2.4117230853724293e-06, "loss": 0.0286, "step": 164845 }, { "epoch": 0.6878437132294648, "grad_norm": 0.7190238252801919, "learning_rate": 2.4116865103648003e-06, "loss": 0.0174, "step": 164850 }, { "epoch": 0.6878645759444552, "grad_norm": 0.7318710945047782, "learning_rate": 2.4116499370211577e-06, "loss": 0.021, "step": 164855 }, { "epoch": 0.6878854386594454, "grad_norm": 0.911996438542885, "learning_rate": 2.4116133653413738e-06, "loss": 0.0229, "step": 164860 }, { "epoch": 0.6879063013744356, "grad_norm": 0.8713597684969129, "learning_rate": 2.411576795325323e-06, "loss": 0.0282, "step": 164865 }, { "epoch": 0.6879271640894259, "grad_norm": 0.3675346617606718, "learning_rate": 2.4115402269728785e-06, "loss": 0.0206, "step": 164870 }, { "epoch": 0.6879480268044162, "grad_norm": 1.408467159718486, "learning_rate": 2.4115036602839155e-06, "loss": 0.0239, "step": 164875 }, { "epoch": 0.6879688895194065, "grad_norm": 0.804644717119209, "learning_rate": 2.411467095258307e-06, "loss": 0.0172, "step": 164880 }, { "epoch": 0.6879897522343967, "grad_norm": 0.8424555870508803, "learning_rate": 2.4114305318959264e-06, "loss": 0.0202, "step": 164885 }, { "epoch": 0.6880106149493871, "grad_norm": 1.0658677900784828, "learning_rate": 2.4113939701966487e-06, "loss": 0.0278, "step": 164890 }, { "epoch": 0.6880314776643773, "grad_norm": 0.47527237029001246, "learning_rate": 2.4113574101603473e-06, "loss": 0.02, "step": 164895 }, { "epoch": 0.6880523403793676, "grad_norm": 0.49756172718775954, "learning_rate": 2.411320851786896e-06, "loss": 0.0228, "step": 164900 }, { "epoch": 0.6880732030943579, "grad_norm": 0.7269850924768293, "learning_rate": 2.4112842950761694e-06, "loss": 0.0274, "step": 164905 }, { "epoch": 0.6880940658093482, "grad_norm": 0.7519843467035141, "learning_rate": 2.411247740028041e-06, "loss": 0.0209, "step": 164910 }, { "epoch": 0.6881149285243384, "grad_norm": 0.7432483687328391, "learning_rate": 2.4112111866423843e-06, "loss": 0.0207, "step": 164915 }, { "epoch": 0.6881357912393288, "grad_norm": 0.674651277607461, "learning_rate": 2.4111746349190735e-06, "loss": 0.0169, "step": 164920 }, { "epoch": 0.688156653954319, "grad_norm": 0.26461920613561324, "learning_rate": 2.411138084857984e-06, "loss": 0.0187, "step": 164925 }, { "epoch": 0.6881775166693093, "grad_norm": 0.505068808319223, "learning_rate": 2.4111015364589883e-06, "loss": 0.0211, "step": 164930 }, { "epoch": 0.6881983793842995, "grad_norm": 0.6109676195820042, "learning_rate": 2.41106498972196e-06, "loss": 0.013, "step": 164935 }, { "epoch": 0.6882192420992899, "grad_norm": 0.6166346448812928, "learning_rate": 2.411028444646775e-06, "loss": 0.0236, "step": 164940 }, { "epoch": 0.6882401048142801, "grad_norm": 1.0453788432097788, "learning_rate": 2.410991901233306e-06, "loss": 0.0267, "step": 164945 }, { "epoch": 0.6882609675292704, "grad_norm": 0.7090300244808354, "learning_rate": 2.410955359481428e-06, "loss": 0.0168, "step": 164950 }, { "epoch": 0.6882818302442607, "grad_norm": 0.7185331315409869, "learning_rate": 2.410918819391013e-06, "loss": 0.0224, "step": 164955 }, { "epoch": 0.688302692959251, "grad_norm": 0.6381497936150698, "learning_rate": 2.410882280961938e-06, "loss": 0.0213, "step": 164960 }, { "epoch": 0.6883235556742412, "grad_norm": 0.5265372447537653, "learning_rate": 2.410845744194075e-06, "loss": 0.022, "step": 164965 }, { "epoch": 0.6883444183892315, "grad_norm": 0.6672914503479662, "learning_rate": 2.4108092090872987e-06, "loss": 0.0208, "step": 164970 }, { "epoch": 0.6883652811042218, "grad_norm": 0.43284704932893064, "learning_rate": 2.4107726756414847e-06, "loss": 0.0251, "step": 164975 }, { "epoch": 0.688386143819212, "grad_norm": 0.35071699809957596, "learning_rate": 2.410736143856505e-06, "loss": 0.0249, "step": 164980 }, { "epoch": 0.6884070065342023, "grad_norm": 0.5849491518657124, "learning_rate": 2.410699613732234e-06, "loss": 0.0164, "step": 164985 }, { "epoch": 0.6884278692491926, "grad_norm": 0.7003741339614383, "learning_rate": 2.410663085268547e-06, "loss": 0.0257, "step": 164990 }, { "epoch": 0.6884487319641829, "grad_norm": 0.5219052442908205, "learning_rate": 2.410626558465318e-06, "loss": 0.0319, "step": 164995 }, { "epoch": 0.6884695946791731, "grad_norm": 0.44428182889295953, "learning_rate": 2.41059003332242e-06, "loss": 0.0218, "step": 165000 }, { "epoch": 0.6884904573941635, "grad_norm": 0.3737661883875846, "learning_rate": 2.4105535098397286e-06, "loss": 0.0179, "step": 165005 }, { "epoch": 0.6885113201091537, "grad_norm": 0.49554637260252754, "learning_rate": 2.4105169880171184e-06, "loss": 0.0219, "step": 165010 }, { "epoch": 0.688532182824144, "grad_norm": 0.8740236929733696, "learning_rate": 2.4104804678544614e-06, "loss": 0.0236, "step": 165015 }, { "epoch": 0.6885530455391343, "grad_norm": 1.2173570485273744, "learning_rate": 2.4104439493516333e-06, "loss": 0.029, "step": 165020 }, { "epoch": 0.6885739082541246, "grad_norm": 0.9089918989783667, "learning_rate": 2.410407432508509e-06, "loss": 0.0208, "step": 165025 }, { "epoch": 0.6885947709691148, "grad_norm": 0.5083268688665102, "learning_rate": 2.410370917324962e-06, "loss": 0.0237, "step": 165030 }, { "epoch": 0.6886156336841052, "grad_norm": 0.3575014497206037, "learning_rate": 2.4103344038008667e-06, "loss": 0.0318, "step": 165035 }, { "epoch": 0.6886364963990954, "grad_norm": 0.34074230031500924, "learning_rate": 2.410297891936097e-06, "loss": 0.0152, "step": 165040 }, { "epoch": 0.6886573591140857, "grad_norm": 0.7126817584800246, "learning_rate": 2.4102613817305287e-06, "loss": 0.0168, "step": 165045 }, { "epoch": 0.6886782218290759, "grad_norm": 0.8926725364122599, "learning_rate": 2.4102248731840337e-06, "loss": 0.0198, "step": 165050 }, { "epoch": 0.6886990845440663, "grad_norm": 0.7296749340856473, "learning_rate": 2.4101883662964884e-06, "loss": 0.0243, "step": 165055 }, { "epoch": 0.6887199472590565, "grad_norm": 0.6689520436829984, "learning_rate": 2.4101518610677664e-06, "loss": 0.0224, "step": 165060 }, { "epoch": 0.6887408099740467, "grad_norm": 0.37171107321104846, "learning_rate": 2.410115357497742e-06, "loss": 0.0251, "step": 165065 }, { "epoch": 0.6887616726890371, "grad_norm": 0.3581870445877292, "learning_rate": 2.4100788555862904e-06, "loss": 0.0288, "step": 165070 }, { "epoch": 0.6887825354040273, "grad_norm": 1.0997388511332296, "learning_rate": 2.410042355333285e-06, "loss": 0.0268, "step": 165075 }, { "epoch": 0.6888033981190176, "grad_norm": 0.996694009830547, "learning_rate": 2.4100058567386e-06, "loss": 0.0242, "step": 165080 }, { "epoch": 0.6888242608340079, "grad_norm": 0.9104605655404585, "learning_rate": 2.409969359802111e-06, "loss": 0.0242, "step": 165085 }, { "epoch": 0.6888451235489982, "grad_norm": 0.5413483953647168, "learning_rate": 2.409932864523692e-06, "loss": 0.0203, "step": 165090 }, { "epoch": 0.6888659862639884, "grad_norm": 0.5878042151386483, "learning_rate": 2.4098963709032177e-06, "loss": 0.0239, "step": 165095 }, { "epoch": 0.6888868489789788, "grad_norm": 0.5591215259159995, "learning_rate": 2.409859878940561e-06, "loss": 0.0242, "step": 165100 }, { "epoch": 0.688907711693969, "grad_norm": 0.5595399561788323, "learning_rate": 2.409823388635598e-06, "loss": 0.0231, "step": 165105 }, { "epoch": 0.6889285744089593, "grad_norm": 0.25738342575003664, "learning_rate": 2.4097868999882035e-06, "loss": 0.0148, "step": 165110 }, { "epoch": 0.6889494371239495, "grad_norm": 0.2681577753033531, "learning_rate": 2.4097504129982507e-06, "loss": 0.0223, "step": 165115 }, { "epoch": 0.6889702998389399, "grad_norm": 0.508794629490394, "learning_rate": 2.4097139276656144e-06, "loss": 0.0224, "step": 165120 }, { "epoch": 0.6889911625539301, "grad_norm": 0.28530474845982934, "learning_rate": 2.40967744399017e-06, "loss": 0.0233, "step": 165125 }, { "epoch": 0.6890120252689204, "grad_norm": 0.7213630361510335, "learning_rate": 2.4096409619717916e-06, "loss": 0.0161, "step": 165130 }, { "epoch": 0.6890328879839107, "grad_norm": 0.4965607861451389, "learning_rate": 2.409604481610353e-06, "loss": 0.0254, "step": 165135 }, { "epoch": 0.689053750698901, "grad_norm": 0.7770703613751638, "learning_rate": 2.40956800290573e-06, "loss": 0.0221, "step": 165140 }, { "epoch": 0.6890746134138912, "grad_norm": 0.35291475940674516, "learning_rate": 2.409531525857797e-06, "loss": 0.0225, "step": 165145 }, { "epoch": 0.6890954761288816, "grad_norm": 0.682620948080337, "learning_rate": 2.4094950504664276e-06, "loss": 0.0212, "step": 165150 }, { "epoch": 0.6891163388438718, "grad_norm": 0.9164011530819561, "learning_rate": 2.4094585767314973e-06, "loss": 0.026, "step": 165155 }, { "epoch": 0.689137201558862, "grad_norm": 0.8700997990231222, "learning_rate": 2.4094221046528806e-06, "loss": 0.0502, "step": 165160 }, { "epoch": 0.6891580642738523, "grad_norm": 0.8950499439203208, "learning_rate": 2.4093856342304518e-06, "loss": 0.025, "step": 165165 }, { "epoch": 0.6891789269888426, "grad_norm": 0.995789771343726, "learning_rate": 2.409349165464086e-06, "loss": 0.0185, "step": 165170 }, { "epoch": 0.6891997897038329, "grad_norm": 0.6511276143031209, "learning_rate": 2.409312698353658e-06, "loss": 0.0264, "step": 165175 }, { "epoch": 0.6892206524188231, "grad_norm": 0.48081176690556315, "learning_rate": 2.4092762328990418e-06, "loss": 0.0199, "step": 165180 }, { "epoch": 0.6892415151338135, "grad_norm": 0.9378048725419882, "learning_rate": 2.409239769100112e-06, "loss": 0.0214, "step": 165185 }, { "epoch": 0.6892623778488037, "grad_norm": 0.6583073180499398, "learning_rate": 2.409203306956744e-06, "loss": 0.0244, "step": 165190 }, { "epoch": 0.689283240563794, "grad_norm": 0.41311977268024697, "learning_rate": 2.409166846468813e-06, "loss": 0.0186, "step": 165195 }, { "epoch": 0.6893041032787843, "grad_norm": 0.5433442132539178, "learning_rate": 2.409130387636192e-06, "loss": 0.0242, "step": 165200 }, { "epoch": 0.6893249659937746, "grad_norm": 0.20717118182221358, "learning_rate": 2.4090939304587583e-06, "loss": 0.0176, "step": 165205 }, { "epoch": 0.6893458287087648, "grad_norm": 0.42840331182444397, "learning_rate": 2.409057474936384e-06, "loss": 0.0256, "step": 165210 }, { "epoch": 0.6893666914237552, "grad_norm": 0.3272905429266106, "learning_rate": 2.409021021068945e-06, "loss": 0.0186, "step": 165215 }, { "epoch": 0.6893875541387454, "grad_norm": 1.0091798339887446, "learning_rate": 2.408984568856317e-06, "loss": 0.0219, "step": 165220 }, { "epoch": 0.6894084168537357, "grad_norm": 0.7381802736642217, "learning_rate": 2.408948118298373e-06, "loss": 0.021, "step": 165225 }, { "epoch": 0.6894292795687259, "grad_norm": 0.6858905261001651, "learning_rate": 2.408911669394989e-06, "loss": 0.0213, "step": 165230 }, { "epoch": 0.6894501422837163, "grad_norm": 0.4743928029310886, "learning_rate": 2.40887522214604e-06, "loss": 0.0219, "step": 165235 }, { "epoch": 0.6894710049987065, "grad_norm": 0.657788657833574, "learning_rate": 2.4088387765513996e-06, "loss": 0.0305, "step": 165240 }, { "epoch": 0.6894918677136967, "grad_norm": 0.7481830350787148, "learning_rate": 2.4088023326109442e-06, "loss": 0.0219, "step": 165245 }, { "epoch": 0.6895127304286871, "grad_norm": 0.8644629800065579, "learning_rate": 2.4087658903245477e-06, "loss": 0.0278, "step": 165250 }, { "epoch": 0.6895335931436773, "grad_norm": 1.6575237821247066, "learning_rate": 2.4087294496920855e-06, "loss": 0.0161, "step": 165255 }, { "epoch": 0.6895544558586676, "grad_norm": 0.879961371204897, "learning_rate": 2.408693010713432e-06, "loss": 0.0203, "step": 165260 }, { "epoch": 0.689575318573658, "grad_norm": 0.6372035509708373, "learning_rate": 2.408656573388462e-06, "loss": 0.0207, "step": 165265 }, { "epoch": 0.6895961812886482, "grad_norm": 1.0231877737741106, "learning_rate": 2.4086201377170513e-06, "loss": 0.0258, "step": 165270 }, { "epoch": 0.6896170440036384, "grad_norm": 1.0321389354122752, "learning_rate": 2.408583703699074e-06, "loss": 0.0198, "step": 165275 }, { "epoch": 0.6896379067186288, "grad_norm": 0.44213087731852135, "learning_rate": 2.4085472713344056e-06, "loss": 0.0273, "step": 165280 }, { "epoch": 0.689658769433619, "grad_norm": 0.42893082421513046, "learning_rate": 2.4085108406229206e-06, "loss": 0.0163, "step": 165285 }, { "epoch": 0.6896796321486093, "grad_norm": 0.6510577539732717, "learning_rate": 2.4084744115644944e-06, "loss": 0.0212, "step": 165290 }, { "epoch": 0.6897004948635995, "grad_norm": 0.6011048197252351, "learning_rate": 2.4084379841590015e-06, "loss": 0.0261, "step": 165295 }, { "epoch": 0.6897213575785899, "grad_norm": 0.6346029653889533, "learning_rate": 2.4084015584063175e-06, "loss": 0.02, "step": 165300 }, { "epoch": 0.6897422202935801, "grad_norm": 0.4950571665018251, "learning_rate": 2.408365134306317e-06, "loss": 0.0219, "step": 165305 }, { "epoch": 0.6897630830085704, "grad_norm": 0.3080006360105939, "learning_rate": 2.4083287118588753e-06, "loss": 0.0198, "step": 165310 }, { "epoch": 0.6897839457235607, "grad_norm": 0.26384381203456886, "learning_rate": 2.4082922910638667e-06, "loss": 0.0226, "step": 165315 }, { "epoch": 0.689804808438551, "grad_norm": 0.7474694362763904, "learning_rate": 2.408255871921168e-06, "loss": 0.0244, "step": 165320 }, { "epoch": 0.6898256711535412, "grad_norm": 0.801765084239275, "learning_rate": 2.4082194544306516e-06, "loss": 0.0269, "step": 165325 }, { "epoch": 0.6898465338685316, "grad_norm": 0.46133263827991555, "learning_rate": 2.4081830385921952e-06, "loss": 0.0286, "step": 165330 }, { "epoch": 0.6898673965835218, "grad_norm": 0.9520681360340827, "learning_rate": 2.4081466244056725e-06, "loss": 0.0226, "step": 165335 }, { "epoch": 0.689888259298512, "grad_norm": 0.9597163226591322, "learning_rate": 2.408110211870959e-06, "loss": 0.0246, "step": 165340 }, { "epoch": 0.6899091220135023, "grad_norm": 0.5159940190637451, "learning_rate": 2.408073800987929e-06, "loss": 0.0194, "step": 165345 }, { "epoch": 0.6899299847284927, "grad_norm": 0.7759496101784318, "learning_rate": 2.408037391756459e-06, "loss": 0.0184, "step": 165350 }, { "epoch": 0.6899508474434829, "grad_norm": 0.6204944103424832, "learning_rate": 2.4080009841764232e-06, "loss": 0.0246, "step": 165355 }, { "epoch": 0.6899717101584731, "grad_norm": 0.6216405407032549, "learning_rate": 2.4079645782476975e-06, "loss": 0.0207, "step": 165360 }, { "epoch": 0.6899925728734635, "grad_norm": 0.5913868870980679, "learning_rate": 2.4079281739701567e-06, "loss": 0.0224, "step": 165365 }, { "epoch": 0.6900134355884537, "grad_norm": 1.5288123686297468, "learning_rate": 2.4078917713436753e-06, "loss": 0.0263, "step": 165370 }, { "epoch": 0.690034298303444, "grad_norm": 0.6616430327583757, "learning_rate": 2.4078553703681294e-06, "loss": 0.0226, "step": 165375 }, { "epoch": 0.6900551610184343, "grad_norm": 0.7012434403048561, "learning_rate": 2.407818971043394e-06, "loss": 0.0239, "step": 165380 }, { "epoch": 0.6900760237334246, "grad_norm": 0.711621546536827, "learning_rate": 2.407782573369344e-06, "loss": 0.0185, "step": 165385 }, { "epoch": 0.6900968864484148, "grad_norm": 0.6153336152584676, "learning_rate": 2.4077461773458554e-06, "loss": 0.0153, "step": 165390 }, { "epoch": 0.6901177491634052, "grad_norm": 0.7026026775989788, "learning_rate": 2.407709782972803e-06, "loss": 0.0268, "step": 165395 }, { "epoch": 0.6901386118783954, "grad_norm": 0.8433973702319596, "learning_rate": 2.407673390250061e-06, "loss": 0.0211, "step": 165400 }, { "epoch": 0.6901594745933857, "grad_norm": 1.008965420794169, "learning_rate": 2.407636999177507e-06, "loss": 0.0196, "step": 165405 }, { "epoch": 0.6901803373083759, "grad_norm": 1.0675509908271734, "learning_rate": 2.4076006097550143e-06, "loss": 0.0257, "step": 165410 }, { "epoch": 0.6902012000233663, "grad_norm": 0.2888919902193393, "learning_rate": 2.407564221982459e-06, "loss": 0.0188, "step": 165415 }, { "epoch": 0.6902220627383565, "grad_norm": 0.2853809401820302, "learning_rate": 2.4075278358597165e-06, "loss": 0.0227, "step": 165420 }, { "epoch": 0.6902429254533468, "grad_norm": 0.982032386381375, "learning_rate": 2.407491451386662e-06, "loss": 0.0307, "step": 165425 }, { "epoch": 0.6902637881683371, "grad_norm": 0.8279781446900623, "learning_rate": 2.4074550685631705e-06, "loss": 0.0232, "step": 165430 }, { "epoch": 0.6902846508833274, "grad_norm": 0.9196436961884801, "learning_rate": 2.4074186873891178e-06, "loss": 0.0237, "step": 165435 }, { "epoch": 0.6903055135983176, "grad_norm": 1.0120924791478616, "learning_rate": 2.4073823078643792e-06, "loss": 0.0172, "step": 165440 }, { "epoch": 0.690326376313308, "grad_norm": 0.5887515364546522, "learning_rate": 2.4073459299888304e-06, "loss": 0.0198, "step": 165445 }, { "epoch": 0.6903472390282982, "grad_norm": 0.38320807135664237, "learning_rate": 2.407309553762346e-06, "loss": 0.0326, "step": 165450 }, { "epoch": 0.6903681017432884, "grad_norm": 0.2759080723261129, "learning_rate": 2.407273179184802e-06, "loss": 0.0214, "step": 165455 }, { "epoch": 0.6903889644582788, "grad_norm": 0.5307151905490078, "learning_rate": 2.4072368062560736e-06, "loss": 0.0228, "step": 165460 }, { "epoch": 0.690409827173269, "grad_norm": 0.6878090916361596, "learning_rate": 2.4072004349760366e-06, "loss": 0.0154, "step": 165465 }, { "epoch": 0.6904306898882593, "grad_norm": 0.4625515416980329, "learning_rate": 2.4071640653445654e-06, "loss": 0.0127, "step": 165470 }, { "epoch": 0.6904515526032495, "grad_norm": 0.7481122167541696, "learning_rate": 2.407127697361537e-06, "loss": 0.0255, "step": 165475 }, { "epoch": 0.6904724153182399, "grad_norm": 0.4363742704709741, "learning_rate": 2.407091331026826e-06, "loss": 0.0221, "step": 165480 }, { "epoch": 0.6904932780332301, "grad_norm": 1.166642494092677, "learning_rate": 2.4070549663403075e-06, "loss": 0.0255, "step": 165485 }, { "epoch": 0.6905141407482204, "grad_norm": 0.6300005428404365, "learning_rate": 2.4070186033018582e-06, "loss": 0.018, "step": 165490 }, { "epoch": 0.6905350034632107, "grad_norm": 0.8554163995648685, "learning_rate": 2.4069822419113527e-06, "loss": 0.0249, "step": 165495 }, { "epoch": 0.690555866178201, "grad_norm": 0.6466742672290199, "learning_rate": 2.4069458821686667e-06, "loss": 0.0292, "step": 165500 }, { "epoch": 0.6905767288931912, "grad_norm": 0.6744580521385639, "learning_rate": 2.406909524073675e-06, "loss": 0.0214, "step": 165505 }, { "epoch": 0.6905975916081816, "grad_norm": 0.5265972373458857, "learning_rate": 2.406873167626255e-06, "loss": 0.0167, "step": 165510 }, { "epoch": 0.6906184543231718, "grad_norm": 0.48988955895084485, "learning_rate": 2.406836812826281e-06, "loss": 0.0173, "step": 165515 }, { "epoch": 0.6906393170381621, "grad_norm": 0.6709008235261938, "learning_rate": 2.406800459673629e-06, "loss": 0.0179, "step": 165520 }, { "epoch": 0.6906601797531523, "grad_norm": 0.4839199255338314, "learning_rate": 2.406764108168174e-06, "loss": 0.0202, "step": 165525 }, { "epoch": 0.6906810424681427, "grad_norm": 0.34104034539936734, "learning_rate": 2.4067277583097924e-06, "loss": 0.0168, "step": 165530 }, { "epoch": 0.6907019051831329, "grad_norm": 0.8208637046862928, "learning_rate": 2.406691410098359e-06, "loss": 0.0226, "step": 165535 }, { "epoch": 0.6907227678981231, "grad_norm": 0.6365185491476654, "learning_rate": 2.40665506353375e-06, "loss": 0.0313, "step": 165540 }, { "epoch": 0.6907436306131135, "grad_norm": 1.1392914625009922, "learning_rate": 2.406618718615841e-06, "loss": 0.0291, "step": 165545 }, { "epoch": 0.6907644933281037, "grad_norm": 0.6226577563288929, "learning_rate": 2.406582375344508e-06, "loss": 0.0159, "step": 165550 }, { "epoch": 0.690785356043094, "grad_norm": 1.2209656485010776, "learning_rate": 2.4065460337196254e-06, "loss": 0.0219, "step": 165555 }, { "epoch": 0.6908062187580843, "grad_norm": 0.35036180025705616, "learning_rate": 2.4065096937410705e-06, "loss": 0.0226, "step": 165560 }, { "epoch": 0.6908270814730746, "grad_norm": 0.5958508332574598, "learning_rate": 2.4064733554087177e-06, "loss": 0.0286, "step": 165565 }, { "epoch": 0.6908479441880648, "grad_norm": 0.6869600674302077, "learning_rate": 2.4064370187224435e-06, "loss": 0.0225, "step": 165570 }, { "epoch": 0.6908688069030552, "grad_norm": 0.3380678352354565, "learning_rate": 2.4064006836821237e-06, "loss": 0.0249, "step": 165575 }, { "epoch": 0.6908896696180454, "grad_norm": 0.3538603939117593, "learning_rate": 2.4063643502876333e-06, "loss": 0.0145, "step": 165580 }, { "epoch": 0.6909105323330357, "grad_norm": 0.5511910412617828, "learning_rate": 2.406328018538849e-06, "loss": 0.0229, "step": 165585 }, { "epoch": 0.6909313950480259, "grad_norm": 0.6225496334884306, "learning_rate": 2.4062916884356456e-06, "loss": 0.0216, "step": 165590 }, { "epoch": 0.6909522577630163, "grad_norm": 0.3608840801977825, "learning_rate": 2.4062553599778996e-06, "loss": 0.022, "step": 165595 }, { "epoch": 0.6909731204780065, "grad_norm": 0.5598430853875749, "learning_rate": 2.4062190331654864e-06, "loss": 0.0228, "step": 165600 }, { "epoch": 0.6909939831929968, "grad_norm": 0.6672255498217485, "learning_rate": 2.406182707998282e-06, "loss": 0.028, "step": 165605 }, { "epoch": 0.6910148459079871, "grad_norm": 0.5919955318313056, "learning_rate": 2.406146384476163e-06, "loss": 0.0266, "step": 165610 }, { "epoch": 0.6910357086229774, "grad_norm": 0.8220088777838118, "learning_rate": 2.406110062599003e-06, "loss": 0.0268, "step": 165615 }, { "epoch": 0.6910565713379676, "grad_norm": 0.8445359800802391, "learning_rate": 2.4060737423666797e-06, "loss": 0.0206, "step": 165620 }, { "epoch": 0.691077434052958, "grad_norm": 0.5499315033973953, "learning_rate": 2.4060374237790685e-06, "loss": 0.0205, "step": 165625 }, { "epoch": 0.6910982967679482, "grad_norm": 0.5189220831242946, "learning_rate": 2.406001106836046e-06, "loss": 0.022, "step": 165630 }, { "epoch": 0.6911191594829384, "grad_norm": 0.7070414118462119, "learning_rate": 2.4059647915374867e-06, "loss": 0.019, "step": 165635 }, { "epoch": 0.6911400221979288, "grad_norm": 0.5700854153596111, "learning_rate": 2.405928477883267e-06, "loss": 0.0226, "step": 165640 }, { "epoch": 0.691160884912919, "grad_norm": 1.2588340064650154, "learning_rate": 2.4058921658732634e-06, "loss": 0.022, "step": 165645 }, { "epoch": 0.6911817476279093, "grad_norm": 0.7039214566578413, "learning_rate": 2.405855855507351e-06, "loss": 0.0218, "step": 165650 }, { "epoch": 0.6912026103428995, "grad_norm": 0.19430150401370155, "learning_rate": 2.4058195467854065e-06, "loss": 0.0199, "step": 165655 }, { "epoch": 0.6912234730578899, "grad_norm": 0.378586532742084, "learning_rate": 2.4057832397073053e-06, "loss": 0.0176, "step": 165660 }, { "epoch": 0.6912443357728801, "grad_norm": 0.40113624353079547, "learning_rate": 2.4057469342729237e-06, "loss": 0.0206, "step": 165665 }, { "epoch": 0.6912651984878704, "grad_norm": 0.5820746876701128, "learning_rate": 2.4057106304821374e-06, "loss": 0.0202, "step": 165670 }, { "epoch": 0.6912860612028607, "grad_norm": 1.3381073383307514, "learning_rate": 2.4056743283348225e-06, "loss": 0.0244, "step": 165675 }, { "epoch": 0.691306923917851, "grad_norm": 0.6714603447348617, "learning_rate": 2.4056380278308553e-06, "loss": 0.0217, "step": 165680 }, { "epoch": 0.6913277866328412, "grad_norm": 0.40119295728835624, "learning_rate": 2.4056017289701113e-06, "loss": 0.021, "step": 165685 }, { "epoch": 0.6913486493478316, "grad_norm": 1.2858162033409772, "learning_rate": 2.405565431752467e-06, "loss": 0.0323, "step": 165690 }, { "epoch": 0.6913695120628218, "grad_norm": 0.9909590111172573, "learning_rate": 2.4055291361777985e-06, "loss": 0.0316, "step": 165695 }, { "epoch": 0.6913903747778121, "grad_norm": 1.7020307596667856, "learning_rate": 2.4054928422459807e-06, "loss": 0.0418, "step": 165700 }, { "epoch": 0.6914112374928023, "grad_norm": 0.29671307740370084, "learning_rate": 2.405456549956891e-06, "loss": 0.0248, "step": 165705 }, { "epoch": 0.6914321002077927, "grad_norm": 0.7642193817955111, "learning_rate": 2.4054202593104047e-06, "loss": 0.0165, "step": 165710 }, { "epoch": 0.6914529629227829, "grad_norm": 0.6211350560644538, "learning_rate": 2.405383970306399e-06, "loss": 0.0175, "step": 165715 }, { "epoch": 0.6914738256377732, "grad_norm": 0.5748901410693626, "learning_rate": 2.405347682944749e-06, "loss": 0.0247, "step": 165720 }, { "epoch": 0.6914946883527635, "grad_norm": 0.7988479632331468, "learning_rate": 2.4053113972253305e-06, "loss": 0.0183, "step": 165725 }, { "epoch": 0.6915155510677538, "grad_norm": 1.2326031514091755, "learning_rate": 2.4052751131480206e-06, "loss": 0.0249, "step": 165730 }, { "epoch": 0.691536413782744, "grad_norm": 1.8065589940563929, "learning_rate": 2.405238830712695e-06, "loss": 0.0206, "step": 165735 }, { "epoch": 0.6915572764977344, "grad_norm": 0.5969427427677385, "learning_rate": 2.4052025499192295e-06, "loss": 0.029, "step": 165740 }, { "epoch": 0.6915781392127246, "grad_norm": 0.48693658532520206, "learning_rate": 2.4051662707675017e-06, "loss": 0.0226, "step": 165745 }, { "epoch": 0.6915990019277148, "grad_norm": 0.46087376799532837, "learning_rate": 2.4051299932573857e-06, "loss": 0.0191, "step": 165750 }, { "epoch": 0.6916198646427052, "grad_norm": 0.5456084706702886, "learning_rate": 2.4050937173887594e-06, "loss": 0.0199, "step": 165755 }, { "epoch": 0.6916407273576954, "grad_norm": 0.704400503267762, "learning_rate": 2.405057443161498e-06, "loss": 0.0302, "step": 165760 }, { "epoch": 0.6916615900726857, "grad_norm": 0.5621278428049616, "learning_rate": 2.405021170575478e-06, "loss": 0.0176, "step": 165765 }, { "epoch": 0.6916824527876759, "grad_norm": 0.70612911945604, "learning_rate": 2.404984899630576e-06, "loss": 0.0224, "step": 165770 }, { "epoch": 0.6917033155026663, "grad_norm": 0.6335050491178982, "learning_rate": 2.4049486303266676e-06, "loss": 0.0179, "step": 165775 }, { "epoch": 0.6917241782176565, "grad_norm": 0.9900830920050306, "learning_rate": 2.40491236266363e-06, "loss": 0.025, "step": 165780 }, { "epoch": 0.6917450409326468, "grad_norm": 1.0176608888172898, "learning_rate": 2.4048760966413388e-06, "loss": 0.0248, "step": 165785 }, { "epoch": 0.6917659036476371, "grad_norm": 0.626508536911705, "learning_rate": 2.404839832259671e-06, "loss": 0.0186, "step": 165790 }, { "epoch": 0.6917867663626274, "grad_norm": 0.6155588385238576, "learning_rate": 2.4048035695185014e-06, "loss": 0.0207, "step": 165795 }, { "epoch": 0.6918076290776176, "grad_norm": 0.5795997133512022, "learning_rate": 2.4047673084177078e-06, "loss": 0.0236, "step": 165800 }, { "epoch": 0.691828491792608, "grad_norm": 1.0259450304877948, "learning_rate": 2.4047310489571654e-06, "loss": 0.0191, "step": 165805 }, { "epoch": 0.6918493545075982, "grad_norm": 0.6622198798303678, "learning_rate": 2.4046947911367517e-06, "loss": 0.0267, "step": 165810 }, { "epoch": 0.6918702172225885, "grad_norm": 0.7819477407864175, "learning_rate": 2.4046585349563427e-06, "loss": 0.0207, "step": 165815 }, { "epoch": 0.6918910799375787, "grad_norm": 1.961892442119175, "learning_rate": 2.4046222804158137e-06, "loss": 0.0267, "step": 165820 }, { "epoch": 0.6919119426525691, "grad_norm": 0.29884159386134124, "learning_rate": 2.4045860275150422e-06, "loss": 0.0242, "step": 165825 }, { "epoch": 0.6919328053675593, "grad_norm": 0.3703126718201107, "learning_rate": 2.4045497762539046e-06, "loss": 0.0259, "step": 165830 }, { "epoch": 0.6919536680825495, "grad_norm": 0.6836523073251835, "learning_rate": 2.4045135266322772e-06, "loss": 0.0172, "step": 165835 }, { "epoch": 0.6919745307975399, "grad_norm": 0.6657330108774342, "learning_rate": 2.404477278650036e-06, "loss": 0.0173, "step": 165840 }, { "epoch": 0.6919953935125301, "grad_norm": 0.5285310474128957, "learning_rate": 2.404441032307058e-06, "loss": 0.0187, "step": 165845 }, { "epoch": 0.6920162562275204, "grad_norm": 1.2003569871616244, "learning_rate": 2.4044047876032187e-06, "loss": 0.0297, "step": 165850 }, { "epoch": 0.6920371189425107, "grad_norm": 0.7050625359988737, "learning_rate": 2.4043685445383954e-06, "loss": 0.0241, "step": 165855 }, { "epoch": 0.692057981657501, "grad_norm": 0.660740777318994, "learning_rate": 2.4043323031124648e-06, "loss": 0.022, "step": 165860 }, { "epoch": 0.6920788443724912, "grad_norm": 0.4415041127629798, "learning_rate": 2.4042960633253024e-06, "loss": 0.0245, "step": 165865 }, { "epoch": 0.6920997070874816, "grad_norm": 0.5283622208575537, "learning_rate": 2.4042598251767854e-06, "loss": 0.0164, "step": 165870 }, { "epoch": 0.6921205698024718, "grad_norm": 0.763296992930119, "learning_rate": 2.4042235886667905e-06, "loss": 0.0196, "step": 165875 }, { "epoch": 0.6921414325174621, "grad_norm": 1.2647337155250198, "learning_rate": 2.4041873537951936e-06, "loss": 0.0231, "step": 165880 }, { "epoch": 0.6921622952324523, "grad_norm": 1.0773270989518746, "learning_rate": 2.4041511205618716e-06, "loss": 0.0184, "step": 165885 }, { "epoch": 0.6921831579474427, "grad_norm": 0.5640414901182833, "learning_rate": 2.4041148889667015e-06, "loss": 0.0213, "step": 165890 }, { "epoch": 0.6922040206624329, "grad_norm": 0.41653615057176263, "learning_rate": 2.404078659009559e-06, "loss": 0.0182, "step": 165895 }, { "epoch": 0.6922248833774232, "grad_norm": 0.4950354576062843, "learning_rate": 2.404042430690321e-06, "loss": 0.0138, "step": 165900 }, { "epoch": 0.6922457460924135, "grad_norm": 0.7209055582082848, "learning_rate": 2.4040062040088636e-06, "loss": 0.0227, "step": 165905 }, { "epoch": 0.6922666088074038, "grad_norm": 0.48160687879260694, "learning_rate": 2.403969978965065e-06, "loss": 0.0223, "step": 165910 }, { "epoch": 0.692287471522394, "grad_norm": 1.1303207936272368, "learning_rate": 2.4039337555588e-06, "loss": 0.0243, "step": 165915 }, { "epoch": 0.6923083342373844, "grad_norm": 0.764929381468606, "learning_rate": 2.403897533789946e-06, "loss": 0.0218, "step": 165920 }, { "epoch": 0.6923291969523746, "grad_norm": 0.42237296078150177, "learning_rate": 2.4038613136583802e-06, "loss": 0.0154, "step": 165925 }, { "epoch": 0.6923500596673648, "grad_norm": 0.6088600049703905, "learning_rate": 2.4038250951639782e-06, "loss": 0.0194, "step": 165930 }, { "epoch": 0.6923709223823552, "grad_norm": 0.8960033506794028, "learning_rate": 2.4037888783066177e-06, "loss": 0.0489, "step": 165935 }, { "epoch": 0.6923917850973454, "grad_norm": 0.3340805982156615, "learning_rate": 2.4037526630861743e-06, "loss": 0.0227, "step": 165940 }, { "epoch": 0.6924126478123357, "grad_norm": 0.6563057027443148, "learning_rate": 2.403716449502525e-06, "loss": 0.0178, "step": 165945 }, { "epoch": 0.6924335105273259, "grad_norm": 0.781528601699276, "learning_rate": 2.4036802375555475e-06, "loss": 0.0177, "step": 165950 }, { "epoch": 0.6924543732423163, "grad_norm": 0.7773422396252595, "learning_rate": 2.403644027245117e-06, "loss": 0.02, "step": 165955 }, { "epoch": 0.6924752359573065, "grad_norm": 0.6790366471758021, "learning_rate": 2.403607818571112e-06, "loss": 0.0181, "step": 165960 }, { "epoch": 0.6924960986722968, "grad_norm": 0.37380990476355036, "learning_rate": 2.4035716115334073e-06, "loss": 0.0214, "step": 165965 }, { "epoch": 0.6925169613872871, "grad_norm": 0.4619674341346997, "learning_rate": 2.403535406131881e-06, "loss": 0.0154, "step": 165970 }, { "epoch": 0.6925378241022774, "grad_norm": 1.0262510844974337, "learning_rate": 2.4034992023664093e-06, "loss": 0.0199, "step": 165975 }, { "epoch": 0.6925586868172676, "grad_norm": 0.6541094411670405, "learning_rate": 2.4034630002368694e-06, "loss": 0.0172, "step": 165980 }, { "epoch": 0.692579549532258, "grad_norm": 0.41010193838812825, "learning_rate": 2.4034267997431376e-06, "loss": 0.0179, "step": 165985 }, { "epoch": 0.6926004122472482, "grad_norm": 0.7010718463206945, "learning_rate": 2.4033906008850918e-06, "loss": 0.0223, "step": 165990 }, { "epoch": 0.6926212749622385, "grad_norm": 0.5268470373593299, "learning_rate": 2.403354403662607e-06, "loss": 0.0197, "step": 165995 }, { "epoch": 0.6926421376772287, "grad_norm": 0.707728885472978, "learning_rate": 2.403318208075561e-06, "loss": 0.0223, "step": 166000 }, { "epoch": 0.6926630003922191, "grad_norm": 0.5595669752280196, "learning_rate": 2.4032820141238313e-06, "loss": 0.02, "step": 166005 }, { "epoch": 0.6926838631072093, "grad_norm": 0.6756288222397271, "learning_rate": 2.403245821807294e-06, "loss": 0.0155, "step": 166010 }, { "epoch": 0.6927047258221996, "grad_norm": 0.8171478480014643, "learning_rate": 2.403209631125826e-06, "loss": 0.0227, "step": 166015 }, { "epoch": 0.6927255885371899, "grad_norm": 0.31906892870009684, "learning_rate": 2.4031734420793044e-06, "loss": 0.0183, "step": 166020 }, { "epoch": 0.6927464512521802, "grad_norm": 0.7636236539133149, "learning_rate": 2.403137254667606e-06, "loss": 0.022, "step": 166025 }, { "epoch": 0.6927673139671704, "grad_norm": 0.7468354587094539, "learning_rate": 2.4031010688906077e-06, "loss": 0.019, "step": 166030 }, { "epoch": 0.6927881766821608, "grad_norm": 0.6077373765164301, "learning_rate": 2.403064884748187e-06, "loss": 0.0225, "step": 166035 }, { "epoch": 0.692809039397151, "grad_norm": 0.6388957544789312, "learning_rate": 2.4030287022402197e-06, "loss": 0.0227, "step": 166040 }, { "epoch": 0.6928299021121412, "grad_norm": 0.5608729620277128, "learning_rate": 2.402992521366584e-06, "loss": 0.0169, "step": 166045 }, { "epoch": 0.6928507648271316, "grad_norm": 0.4935610395418824, "learning_rate": 2.4029563421271552e-06, "loss": 0.0176, "step": 166050 }, { "epoch": 0.6928716275421218, "grad_norm": 0.6779054789160494, "learning_rate": 2.4029201645218127e-06, "loss": 0.0218, "step": 166055 }, { "epoch": 0.6928924902571121, "grad_norm": 0.5955843051970103, "learning_rate": 2.402883988550431e-06, "loss": 0.0239, "step": 166060 }, { "epoch": 0.6929133529721023, "grad_norm": 0.6746573689044507, "learning_rate": 2.4028478142128887e-06, "loss": 0.0169, "step": 166065 }, { "epoch": 0.6929342156870927, "grad_norm": 0.3108900609172738, "learning_rate": 2.402811641509062e-06, "loss": 0.0209, "step": 166070 }, { "epoch": 0.6929550784020829, "grad_norm": 0.5914587975275214, "learning_rate": 2.4027754704388286e-06, "loss": 0.0249, "step": 166075 }, { "epoch": 0.6929759411170732, "grad_norm": 0.6575051965050623, "learning_rate": 2.4027393010020655e-06, "loss": 0.0182, "step": 166080 }, { "epoch": 0.6929968038320635, "grad_norm": 0.2772658021386076, "learning_rate": 2.402703133198649e-06, "loss": 0.0202, "step": 166085 }, { "epoch": 0.6930176665470538, "grad_norm": 0.6770366369175248, "learning_rate": 2.4026669670284574e-06, "loss": 0.0214, "step": 166090 }, { "epoch": 0.693038529262044, "grad_norm": 0.5708192779429974, "learning_rate": 2.4026308024913657e-06, "loss": 0.024, "step": 166095 }, { "epoch": 0.6930593919770344, "grad_norm": 0.5697974158651495, "learning_rate": 2.4025946395872533e-06, "loss": 0.0228, "step": 166100 }, { "epoch": 0.6930802546920246, "grad_norm": 0.6673681902225491, "learning_rate": 2.4025584783159967e-06, "loss": 0.0175, "step": 166105 }, { "epoch": 0.6931011174070149, "grad_norm": 0.8213120863358682, "learning_rate": 2.4025223186774715e-06, "loss": 0.0244, "step": 166110 }, { "epoch": 0.6931219801220052, "grad_norm": 0.793130649009851, "learning_rate": 2.4024861606715566e-06, "loss": 0.0186, "step": 166115 }, { "epoch": 0.6931428428369955, "grad_norm": 0.5970559999142059, "learning_rate": 2.4024500042981287e-06, "loss": 0.0199, "step": 166120 }, { "epoch": 0.6931637055519857, "grad_norm": 1.0828718167049853, "learning_rate": 2.402413849557065e-06, "loss": 0.0369, "step": 166125 }, { "epoch": 0.6931845682669759, "grad_norm": 0.5927950228284965, "learning_rate": 2.402377696448242e-06, "loss": 0.0257, "step": 166130 }, { "epoch": 0.6932054309819663, "grad_norm": 1.0764041087749674, "learning_rate": 2.4023415449715377e-06, "loss": 0.0163, "step": 166135 }, { "epoch": 0.6932262936969565, "grad_norm": 1.007869933691813, "learning_rate": 2.4023053951268284e-06, "loss": 0.0228, "step": 166140 }, { "epoch": 0.6932471564119468, "grad_norm": 0.698831623628365, "learning_rate": 2.402269246913993e-06, "loss": 0.0209, "step": 166145 }, { "epoch": 0.6932680191269371, "grad_norm": 0.5664974872448848, "learning_rate": 2.402233100332907e-06, "loss": 0.0194, "step": 166150 }, { "epoch": 0.6932888818419274, "grad_norm": 0.4901122759178357, "learning_rate": 2.4021969553834477e-06, "loss": 0.0221, "step": 166155 }, { "epoch": 0.6933097445569176, "grad_norm": 0.941868442431763, "learning_rate": 2.4021608120654945e-06, "loss": 0.0323, "step": 166160 }, { "epoch": 0.693330607271908, "grad_norm": 0.9401016040057916, "learning_rate": 2.4021246703789216e-06, "loss": 0.0202, "step": 166165 }, { "epoch": 0.6933514699868982, "grad_norm": 0.7437473192462175, "learning_rate": 2.402088530323608e-06, "loss": 0.035, "step": 166170 }, { "epoch": 0.6933723327018885, "grad_norm": 0.8214672715722169, "learning_rate": 2.4020523918994313e-06, "loss": 0.0222, "step": 166175 }, { "epoch": 0.6933931954168787, "grad_norm": 1.0154231648085046, "learning_rate": 2.4020162551062677e-06, "loss": 0.0225, "step": 166180 }, { "epoch": 0.6934140581318691, "grad_norm": 0.35229066072378057, "learning_rate": 2.401980119943996e-06, "loss": 0.0142, "step": 166185 }, { "epoch": 0.6934349208468593, "grad_norm": 0.7733492699377498, "learning_rate": 2.4019439864124914e-06, "loss": 0.0233, "step": 166190 }, { "epoch": 0.6934557835618496, "grad_norm": 0.20236510765915247, "learning_rate": 2.4019078545116335e-06, "loss": 0.0192, "step": 166195 }, { "epoch": 0.6934766462768399, "grad_norm": 0.22253506899913048, "learning_rate": 2.4018717242412983e-06, "loss": 0.017, "step": 166200 }, { "epoch": 0.6934975089918302, "grad_norm": 0.5771314585129268, "learning_rate": 2.401835595601363e-06, "loss": 0.0269, "step": 166205 }, { "epoch": 0.6935183717068204, "grad_norm": 0.364449663952543, "learning_rate": 2.401799468591706e-06, "loss": 0.0171, "step": 166210 }, { "epoch": 0.6935392344218108, "grad_norm": 0.29406436988846596, "learning_rate": 2.4017633432122043e-06, "loss": 0.0157, "step": 166215 }, { "epoch": 0.693560097136801, "grad_norm": 0.49262268260488185, "learning_rate": 2.401727219462735e-06, "loss": 0.0253, "step": 166220 }, { "epoch": 0.6935809598517912, "grad_norm": 0.5388504907687461, "learning_rate": 2.4016910973431754e-06, "loss": 0.0234, "step": 166225 }, { "epoch": 0.6936018225667816, "grad_norm": 1.1236076728999478, "learning_rate": 2.4016549768534034e-06, "loss": 0.0225, "step": 166230 }, { "epoch": 0.6936226852817718, "grad_norm": 0.5390394491979402, "learning_rate": 2.401618857993296e-06, "loss": 0.0177, "step": 166235 }, { "epoch": 0.6936435479967621, "grad_norm": 0.7354170831126208, "learning_rate": 2.401582740762731e-06, "loss": 0.0215, "step": 166240 }, { "epoch": 0.6936644107117523, "grad_norm": 0.7472906016622607, "learning_rate": 2.4015466251615866e-06, "loss": 0.0175, "step": 166245 }, { "epoch": 0.6936852734267427, "grad_norm": 1.4028913032266699, "learning_rate": 2.4015105111897387e-06, "loss": 0.0246, "step": 166250 }, { "epoch": 0.6937061361417329, "grad_norm": 1.1078443757261696, "learning_rate": 2.401474398847066e-06, "loss": 0.021, "step": 166255 }, { "epoch": 0.6937269988567232, "grad_norm": 0.4296386456536634, "learning_rate": 2.401438288133445e-06, "loss": 0.0225, "step": 166260 }, { "epoch": 0.6937478615717135, "grad_norm": 1.1191872799954934, "learning_rate": 2.4014021790487543e-06, "loss": 0.0203, "step": 166265 }, { "epoch": 0.6937687242867038, "grad_norm": 0.597225098557658, "learning_rate": 2.401366071592871e-06, "loss": 0.0233, "step": 166270 }, { "epoch": 0.693789587001694, "grad_norm": 0.4773187828359062, "learning_rate": 2.4013299657656724e-06, "loss": 0.0202, "step": 166275 }, { "epoch": 0.6938104497166844, "grad_norm": 0.3627908233966966, "learning_rate": 2.4012938615670367e-06, "loss": 0.0222, "step": 166280 }, { "epoch": 0.6938313124316746, "grad_norm": 0.6012553382050585, "learning_rate": 2.40125775899684e-06, "loss": 0.0208, "step": 166285 }, { "epoch": 0.6938521751466649, "grad_norm": 0.7323110440183185, "learning_rate": 2.401221658054962e-06, "loss": 0.0228, "step": 166290 }, { "epoch": 0.6938730378616552, "grad_norm": 0.6978797690579788, "learning_rate": 2.401185558741279e-06, "loss": 0.0184, "step": 166295 }, { "epoch": 0.6938939005766455, "grad_norm": 1.8538700040123064, "learning_rate": 2.4011494610556686e-06, "loss": 0.0265, "step": 166300 }, { "epoch": 0.6939147632916357, "grad_norm": 0.5131764490222399, "learning_rate": 2.4011133649980085e-06, "loss": 0.02, "step": 166305 }, { "epoch": 0.693935626006626, "grad_norm": 0.9308574795426787, "learning_rate": 2.4010772705681775e-06, "loss": 0.0294, "step": 166310 }, { "epoch": 0.6939564887216163, "grad_norm": 0.5084868833208022, "learning_rate": 2.4010411777660512e-06, "loss": 0.0279, "step": 166315 }, { "epoch": 0.6939773514366065, "grad_norm": 0.7391196614905126, "learning_rate": 2.401005086591509e-06, "loss": 0.0213, "step": 166320 }, { "epoch": 0.6939982141515968, "grad_norm": 0.6266112492605694, "learning_rate": 2.4009689970444274e-06, "loss": 0.0209, "step": 166325 }, { "epoch": 0.6940190768665871, "grad_norm": 0.655508340127039, "learning_rate": 2.4009329091246854e-06, "loss": 0.0235, "step": 166330 }, { "epoch": 0.6940399395815774, "grad_norm": 1.0955112305255212, "learning_rate": 2.4008968228321593e-06, "loss": 0.0237, "step": 166335 }, { "epoch": 0.6940608022965676, "grad_norm": 0.8190068356219737, "learning_rate": 2.4008607381667273e-06, "loss": 0.0231, "step": 166340 }, { "epoch": 0.694081665011558, "grad_norm": 0.2700301510048578, "learning_rate": 2.4008246551282676e-06, "loss": 0.0265, "step": 166345 }, { "epoch": 0.6941025277265482, "grad_norm": 0.8928042406971685, "learning_rate": 2.400788573716658e-06, "loss": 0.0225, "step": 166350 }, { "epoch": 0.6941233904415385, "grad_norm": 0.8407551638998667, "learning_rate": 2.4007524939317747e-06, "loss": 0.0287, "step": 166355 }, { "epoch": 0.6941442531565287, "grad_norm": 0.8714152822588297, "learning_rate": 2.400716415773498e-06, "loss": 0.0264, "step": 166360 }, { "epoch": 0.6941651158715191, "grad_norm": 0.7713653294031481, "learning_rate": 2.4006803392417035e-06, "loss": 0.0226, "step": 166365 }, { "epoch": 0.6941859785865093, "grad_norm": 0.8165166387060758, "learning_rate": 2.40064426433627e-06, "loss": 0.027, "step": 166370 }, { "epoch": 0.6942068413014996, "grad_norm": 0.9889665664396254, "learning_rate": 2.400608191057075e-06, "loss": 0.0231, "step": 166375 }, { "epoch": 0.6942277040164899, "grad_norm": 0.22295844691054384, "learning_rate": 2.400572119403996e-06, "loss": 0.0142, "step": 166380 }, { "epoch": 0.6942485667314802, "grad_norm": 0.3539990407345003, "learning_rate": 2.4005360493769122e-06, "loss": 0.0152, "step": 166385 }, { "epoch": 0.6942694294464704, "grad_norm": 0.5512155885867532, "learning_rate": 2.4004999809757e-06, "loss": 0.0202, "step": 166390 }, { "epoch": 0.6942902921614608, "grad_norm": 0.6525919240197741, "learning_rate": 2.4004639142002376e-06, "loss": 0.0249, "step": 166395 }, { "epoch": 0.694311154876451, "grad_norm": 9.706151524201676, "learning_rate": 2.4004278490504036e-06, "loss": 0.0202, "step": 166400 }, { "epoch": 0.6943320175914413, "grad_norm": 1.6744248352647808, "learning_rate": 2.400391785526075e-06, "loss": 0.0266, "step": 166405 }, { "epoch": 0.6943528803064316, "grad_norm": 0.7528656943027736, "learning_rate": 2.40035572362713e-06, "loss": 0.0209, "step": 166410 }, { "epoch": 0.6943737430214219, "grad_norm": 0.7734187738048731, "learning_rate": 2.400319663353447e-06, "loss": 0.0156, "step": 166415 }, { "epoch": 0.6943946057364121, "grad_norm": 0.4872396699870609, "learning_rate": 2.400283604704903e-06, "loss": 0.0282, "step": 166420 }, { "epoch": 0.6944154684514023, "grad_norm": 0.38877458497227174, "learning_rate": 2.400247547681376e-06, "loss": 0.0228, "step": 166425 }, { "epoch": 0.6944363311663927, "grad_norm": 0.7969096760025841, "learning_rate": 2.4002114922827448e-06, "loss": 0.0244, "step": 166430 }, { "epoch": 0.6944571938813829, "grad_norm": 1.44395578219699, "learning_rate": 2.4001754385088868e-06, "loss": 0.028, "step": 166435 }, { "epoch": 0.6944780565963732, "grad_norm": 0.6685001858710441, "learning_rate": 2.4001393863596807e-06, "loss": 0.0217, "step": 166440 }, { "epoch": 0.6944989193113635, "grad_norm": 0.3821592833223701, "learning_rate": 2.400103335835003e-06, "loss": 0.0244, "step": 166445 }, { "epoch": 0.6945197820263538, "grad_norm": 0.7311955229646462, "learning_rate": 2.400067286934733e-06, "loss": 0.0193, "step": 166450 }, { "epoch": 0.694540644741344, "grad_norm": 0.5970745487461351, "learning_rate": 2.400031239658748e-06, "loss": 0.0286, "step": 166455 }, { "epoch": 0.6945615074563344, "grad_norm": 0.5995625120913204, "learning_rate": 2.399995194006927e-06, "loss": 0.0175, "step": 166460 }, { "epoch": 0.6945823701713246, "grad_norm": 0.4509483876000716, "learning_rate": 2.399959149979147e-06, "loss": 0.0215, "step": 166465 }, { "epoch": 0.6946032328863149, "grad_norm": 0.7679421686548279, "learning_rate": 2.3999231075752863e-06, "loss": 0.0177, "step": 166470 }, { "epoch": 0.6946240956013052, "grad_norm": 0.34908967131362073, "learning_rate": 2.399887066795223e-06, "loss": 0.0253, "step": 166475 }, { "epoch": 0.6946449583162955, "grad_norm": 1.0840393984508745, "learning_rate": 2.3998510276388352e-06, "loss": 0.0285, "step": 166480 }, { "epoch": 0.6946658210312857, "grad_norm": 0.4477219902940132, "learning_rate": 2.399814990106001e-06, "loss": 0.0434, "step": 166485 }, { "epoch": 0.694686683746276, "grad_norm": 0.7391441694842696, "learning_rate": 2.3997789541965984e-06, "loss": 0.023, "step": 166490 }, { "epoch": 0.6947075464612663, "grad_norm": 0.7143339821812137, "learning_rate": 2.3997429199105062e-06, "loss": 0.0186, "step": 166495 }, { "epoch": 0.6947284091762566, "grad_norm": 1.0045926193286412, "learning_rate": 2.399706887247602e-06, "loss": 0.0185, "step": 166500 }, { "epoch": 0.6947492718912468, "grad_norm": 0.3329658974335243, "learning_rate": 2.3996708562077626e-06, "loss": 0.0257, "step": 166505 }, { "epoch": 0.6947701346062372, "grad_norm": 0.8305797658326571, "learning_rate": 2.399634826790869e-06, "loss": 0.0295, "step": 166510 }, { "epoch": 0.6947909973212274, "grad_norm": 0.5472030171849238, "learning_rate": 2.399598798996797e-06, "loss": 0.0195, "step": 166515 }, { "epoch": 0.6948118600362176, "grad_norm": 0.44019877807715396, "learning_rate": 2.3995627728254263e-06, "loss": 0.0211, "step": 166520 }, { "epoch": 0.694832722751208, "grad_norm": 0.7590604764628083, "learning_rate": 2.399526748276634e-06, "loss": 0.019, "step": 166525 }, { "epoch": 0.6948535854661982, "grad_norm": 0.6489071306467313, "learning_rate": 2.3994907253502987e-06, "loss": 0.0239, "step": 166530 }, { "epoch": 0.6948744481811885, "grad_norm": 0.5708299389087148, "learning_rate": 2.3994547040462985e-06, "loss": 0.0247, "step": 166535 }, { "epoch": 0.6948953108961787, "grad_norm": 0.5768146847143242, "learning_rate": 2.3994186843645116e-06, "loss": 0.0249, "step": 166540 }, { "epoch": 0.6949161736111691, "grad_norm": 0.3766711961336477, "learning_rate": 2.3993826663048174e-06, "loss": 0.0153, "step": 166545 }, { "epoch": 0.6949370363261593, "grad_norm": 0.6009181452509456, "learning_rate": 2.399346649867092e-06, "loss": 0.0224, "step": 166550 }, { "epoch": 0.6949578990411496, "grad_norm": 0.906818237901082, "learning_rate": 2.3993106350512156e-06, "loss": 0.0239, "step": 166555 }, { "epoch": 0.6949787617561399, "grad_norm": 0.6541581429832706, "learning_rate": 2.3992746218570654e-06, "loss": 0.0239, "step": 166560 }, { "epoch": 0.6949996244711302, "grad_norm": 0.5933143472520728, "learning_rate": 2.39923861028452e-06, "loss": 0.0237, "step": 166565 }, { "epoch": 0.6950204871861204, "grad_norm": 0.403384917857973, "learning_rate": 2.3992026003334578e-06, "loss": 0.0182, "step": 166570 }, { "epoch": 0.6950413499011108, "grad_norm": 0.6187286788185029, "learning_rate": 2.3991665920037568e-06, "loss": 0.0186, "step": 166575 }, { "epoch": 0.695062212616101, "grad_norm": 0.3580553024291252, "learning_rate": 2.3991305852952955e-06, "loss": 0.0256, "step": 166580 }, { "epoch": 0.6950830753310913, "grad_norm": 0.7560296601236106, "learning_rate": 2.3990945802079532e-06, "loss": 0.0202, "step": 166585 }, { "epoch": 0.6951039380460816, "grad_norm": 0.3317721372265326, "learning_rate": 2.3990585767416063e-06, "loss": 0.0157, "step": 166590 }, { "epoch": 0.6951248007610719, "grad_norm": 0.5627132732325931, "learning_rate": 2.3990225748961348e-06, "loss": 0.018, "step": 166595 }, { "epoch": 0.6951456634760621, "grad_norm": 0.5463572599677624, "learning_rate": 2.3989865746714164e-06, "loss": 0.0218, "step": 166600 }, { "epoch": 0.6951665261910523, "grad_norm": 0.702953601025577, "learning_rate": 2.3989505760673295e-06, "loss": 0.02, "step": 166605 }, { "epoch": 0.6951873889060427, "grad_norm": 0.5351764516429177, "learning_rate": 2.398914579083753e-06, "loss": 0.0195, "step": 166610 }, { "epoch": 0.695208251621033, "grad_norm": 0.5573889267586724, "learning_rate": 2.3988785837205645e-06, "loss": 0.0192, "step": 166615 }, { "epoch": 0.6952291143360232, "grad_norm": 0.37533102932765255, "learning_rate": 2.3988425899776433e-06, "loss": 0.0192, "step": 166620 }, { "epoch": 0.6952499770510135, "grad_norm": 0.8077542630170957, "learning_rate": 2.3988065978548675e-06, "loss": 0.0289, "step": 166625 }, { "epoch": 0.6952708397660038, "grad_norm": 0.40152272994483973, "learning_rate": 2.398770607352115e-06, "loss": 0.02, "step": 166630 }, { "epoch": 0.695291702480994, "grad_norm": 0.5264174474987101, "learning_rate": 2.398734618469265e-06, "loss": 0.0218, "step": 166635 }, { "epoch": 0.6953125651959844, "grad_norm": 1.1624812163644684, "learning_rate": 2.3986986312061956e-06, "loss": 0.0283, "step": 166640 }, { "epoch": 0.6953334279109746, "grad_norm": 0.5316188875621081, "learning_rate": 2.398662645562786e-06, "loss": 0.0251, "step": 166645 }, { "epoch": 0.6953542906259649, "grad_norm": 0.6898668459315237, "learning_rate": 2.3986266615389135e-06, "loss": 0.0146, "step": 166650 }, { "epoch": 0.6953751533409552, "grad_norm": 0.37107842485603704, "learning_rate": 2.3985906791344575e-06, "loss": 0.0163, "step": 166655 }, { "epoch": 0.6953960160559455, "grad_norm": 0.8704022705517587, "learning_rate": 2.3985546983492965e-06, "loss": 0.0323, "step": 166660 }, { "epoch": 0.6954168787709357, "grad_norm": 0.33116554921606417, "learning_rate": 2.3985187191833088e-06, "loss": 0.0265, "step": 166665 }, { "epoch": 0.695437741485926, "grad_norm": 0.7276432450569225, "learning_rate": 2.3984827416363733e-06, "loss": 0.0201, "step": 166670 }, { "epoch": 0.6954586042009163, "grad_norm": 0.6914283885077804, "learning_rate": 2.398446765708368e-06, "loss": 0.022, "step": 166675 }, { "epoch": 0.6954794669159066, "grad_norm": 0.5398489670684609, "learning_rate": 2.398410791399172e-06, "loss": 0.0266, "step": 166680 }, { "epoch": 0.6955003296308968, "grad_norm": 0.5485596376882564, "learning_rate": 2.3983748187086635e-06, "loss": 0.0205, "step": 166685 }, { "epoch": 0.6955211923458872, "grad_norm": 0.5277677868977696, "learning_rate": 2.3983388476367214e-06, "loss": 0.0175, "step": 166690 }, { "epoch": 0.6955420550608774, "grad_norm": 0.38436019103537933, "learning_rate": 2.3983028781832247e-06, "loss": 0.0199, "step": 166695 }, { "epoch": 0.6955629177758677, "grad_norm": 0.4857679373069623, "learning_rate": 2.398266910348051e-06, "loss": 0.0151, "step": 166700 }, { "epoch": 0.695583780490858, "grad_norm": 1.6775552492751764, "learning_rate": 2.39823094413108e-06, "loss": 0.0226, "step": 166705 }, { "epoch": 0.6956046432058482, "grad_norm": 1.0411698427383593, "learning_rate": 2.3981949795321894e-06, "loss": 0.018, "step": 166710 }, { "epoch": 0.6956255059208385, "grad_norm": 0.5259501013775446, "learning_rate": 2.398159016551259e-06, "loss": 0.0172, "step": 166715 }, { "epoch": 0.6956463686358287, "grad_norm": 0.7618257045268692, "learning_rate": 2.398123055188166e-06, "loss": 0.0242, "step": 166720 }, { "epoch": 0.6956672313508191, "grad_norm": 0.8586990393604385, "learning_rate": 2.3980870954427905e-06, "loss": 0.0202, "step": 166725 }, { "epoch": 0.6956880940658093, "grad_norm": 0.9295081678810799, "learning_rate": 2.3980511373150108e-06, "loss": 0.0216, "step": 166730 }, { "epoch": 0.6957089567807996, "grad_norm": 0.32231968380669884, "learning_rate": 2.398015180804705e-06, "loss": 0.0252, "step": 166735 }, { "epoch": 0.6957298194957899, "grad_norm": 0.4884150225428114, "learning_rate": 2.397979225911753e-06, "loss": 0.0251, "step": 166740 }, { "epoch": 0.6957506822107802, "grad_norm": 0.7275894177996975, "learning_rate": 2.397943272636033e-06, "loss": 0.0253, "step": 166745 }, { "epoch": 0.6957715449257704, "grad_norm": 0.6596757057904425, "learning_rate": 2.397907320977423e-06, "loss": 0.0165, "step": 166750 }, { "epoch": 0.6957924076407608, "grad_norm": 0.38194324595772156, "learning_rate": 2.397871370935803e-06, "loss": 0.0259, "step": 166755 }, { "epoch": 0.695813270355751, "grad_norm": 0.8441528979575647, "learning_rate": 2.397835422511051e-06, "loss": 0.029, "step": 166760 }, { "epoch": 0.6958341330707413, "grad_norm": 0.2656588695358132, "learning_rate": 2.3977994757030463e-06, "loss": 0.0222, "step": 166765 }, { "epoch": 0.6958549957857316, "grad_norm": 0.4123007963939699, "learning_rate": 2.3977635305116665e-06, "loss": 0.0183, "step": 166770 }, { "epoch": 0.6958758585007219, "grad_norm": 0.5871900779058844, "learning_rate": 2.397727586936793e-06, "loss": 0.0195, "step": 166775 }, { "epoch": 0.6958967212157121, "grad_norm": 0.470053500702427, "learning_rate": 2.397691644978302e-06, "loss": 0.0206, "step": 166780 }, { "epoch": 0.6959175839307024, "grad_norm": 0.4187639588560922, "learning_rate": 2.3976557046360736e-06, "loss": 0.0196, "step": 166785 }, { "epoch": 0.6959384466456927, "grad_norm": 0.7075181760100152, "learning_rate": 2.397619765909987e-06, "loss": 0.0301, "step": 166790 }, { "epoch": 0.695959309360683, "grad_norm": 0.5610791505354955, "learning_rate": 2.3975838287999196e-06, "loss": 0.0185, "step": 166795 }, { "epoch": 0.6959801720756732, "grad_norm": 0.3970501988270373, "learning_rate": 2.397547893305752e-06, "loss": 0.0202, "step": 166800 }, { "epoch": 0.6960010347906636, "grad_norm": 0.4506857562139946, "learning_rate": 2.3975119594273618e-06, "loss": 0.0196, "step": 166805 }, { "epoch": 0.6960218975056538, "grad_norm": 0.7124213410816621, "learning_rate": 2.3974760271646288e-06, "loss": 0.0243, "step": 166810 }, { "epoch": 0.696042760220644, "grad_norm": 0.7669691906260891, "learning_rate": 2.3974400965174316e-06, "loss": 0.0214, "step": 166815 }, { "epoch": 0.6960636229356344, "grad_norm": 0.37544467979457047, "learning_rate": 2.397404167485649e-06, "loss": 0.0184, "step": 166820 }, { "epoch": 0.6960844856506246, "grad_norm": 0.5745260456387655, "learning_rate": 2.3973682400691608e-06, "loss": 0.0172, "step": 166825 }, { "epoch": 0.6961053483656149, "grad_norm": 0.4010947682187848, "learning_rate": 2.397332314267844e-06, "loss": 0.018, "step": 166830 }, { "epoch": 0.6961262110806052, "grad_norm": 0.3823419157362566, "learning_rate": 2.39729639008158e-06, "loss": 0.0167, "step": 166835 }, { "epoch": 0.6961470737955955, "grad_norm": 0.9690967739916932, "learning_rate": 2.397260467510246e-06, "loss": 0.0235, "step": 166840 }, { "epoch": 0.6961679365105857, "grad_norm": 0.23023871569476614, "learning_rate": 2.3972245465537224e-06, "loss": 0.0211, "step": 166845 }, { "epoch": 0.696188799225576, "grad_norm": 0.8317527874786316, "learning_rate": 2.397188627211887e-06, "loss": 0.0291, "step": 166850 }, { "epoch": 0.6962096619405663, "grad_norm": 0.5698205475393774, "learning_rate": 2.397152709484619e-06, "loss": 0.0203, "step": 166855 }, { "epoch": 0.6962305246555566, "grad_norm": 0.7127330357549366, "learning_rate": 2.397116793371798e-06, "loss": 0.0254, "step": 166860 }, { "epoch": 0.6962513873705468, "grad_norm": 0.7408044489642633, "learning_rate": 2.3970808788733034e-06, "loss": 0.0284, "step": 166865 }, { "epoch": 0.6962722500855372, "grad_norm": 3.3505310466468683, "learning_rate": 2.397044965989013e-06, "loss": 0.0225, "step": 166870 }, { "epoch": 0.6962931128005274, "grad_norm": 0.5673949407313349, "learning_rate": 2.3970090547188067e-06, "loss": 0.0173, "step": 166875 }, { "epoch": 0.6963139755155177, "grad_norm": 1.2217872984720264, "learning_rate": 2.3969731450625635e-06, "loss": 0.0301, "step": 166880 }, { "epoch": 0.696334838230508, "grad_norm": 0.9119249791127158, "learning_rate": 2.3969372370201625e-06, "loss": 0.0172, "step": 166885 }, { "epoch": 0.6963557009454983, "grad_norm": 0.7295323267250985, "learning_rate": 2.3969013305914825e-06, "loss": 0.0182, "step": 166890 }, { "epoch": 0.6963765636604885, "grad_norm": 0.5668510882837899, "learning_rate": 2.3968654257764037e-06, "loss": 0.0172, "step": 166895 }, { "epoch": 0.6963974263754787, "grad_norm": 0.44294926136996315, "learning_rate": 2.396829522574804e-06, "loss": 0.0238, "step": 166900 }, { "epoch": 0.6964182890904691, "grad_norm": 0.487112367253806, "learning_rate": 2.396793620986563e-06, "loss": 0.0197, "step": 166905 }, { "epoch": 0.6964391518054593, "grad_norm": 0.7034693948974146, "learning_rate": 2.3967577210115602e-06, "loss": 0.0232, "step": 166910 }, { "epoch": 0.6964600145204496, "grad_norm": 0.7605423504985369, "learning_rate": 2.3967218226496743e-06, "loss": 0.0212, "step": 166915 }, { "epoch": 0.6964808772354399, "grad_norm": 0.6005748136728393, "learning_rate": 2.3966859259007847e-06, "loss": 0.0132, "step": 166920 }, { "epoch": 0.6965017399504302, "grad_norm": 0.4227789290689515, "learning_rate": 2.3966500307647704e-06, "loss": 0.017, "step": 166925 }, { "epoch": 0.6965226026654204, "grad_norm": 0.38509002567964706, "learning_rate": 2.3966141372415115e-06, "loss": 0.0169, "step": 166930 }, { "epoch": 0.6965434653804108, "grad_norm": 0.5319072908433564, "learning_rate": 2.396578245330886e-06, "loss": 0.0243, "step": 166935 }, { "epoch": 0.696564328095401, "grad_norm": 0.4193711414020825, "learning_rate": 2.3965423550327737e-06, "loss": 0.0219, "step": 166940 }, { "epoch": 0.6965851908103913, "grad_norm": 0.6433668354945274, "learning_rate": 2.3965064663470546e-06, "loss": 0.0168, "step": 166945 }, { "epoch": 0.6966060535253816, "grad_norm": 0.6836667410232353, "learning_rate": 2.3964705792736064e-06, "loss": 0.0187, "step": 166950 }, { "epoch": 0.6966269162403719, "grad_norm": 0.4318942992750313, "learning_rate": 2.3964346938123093e-06, "loss": 0.0191, "step": 166955 }, { "epoch": 0.6966477789553621, "grad_norm": 1.2092827850446175, "learning_rate": 2.396398809963043e-06, "loss": 0.0259, "step": 166960 }, { "epoch": 0.6966686416703524, "grad_norm": 0.5508314332310036, "learning_rate": 2.3963629277256865e-06, "loss": 0.015, "step": 166965 }, { "epoch": 0.6966895043853427, "grad_norm": 0.6127602174336942, "learning_rate": 2.3963270471001187e-06, "loss": 0.0228, "step": 166970 }, { "epoch": 0.696710367100333, "grad_norm": 0.5182353269772706, "learning_rate": 2.396291168086219e-06, "loss": 0.0277, "step": 166975 }, { "epoch": 0.6967312298153232, "grad_norm": 1.0085822912795905, "learning_rate": 2.3962552906838674e-06, "loss": 0.0279, "step": 166980 }, { "epoch": 0.6967520925303136, "grad_norm": 0.525945415964635, "learning_rate": 2.3962194148929425e-06, "loss": 0.0213, "step": 166985 }, { "epoch": 0.6967729552453038, "grad_norm": 0.34792768817263553, "learning_rate": 2.3961835407133238e-06, "loss": 0.0182, "step": 166990 }, { "epoch": 0.696793817960294, "grad_norm": 0.4730886063942639, "learning_rate": 2.3961476681448913e-06, "loss": 0.0202, "step": 166995 }, { "epoch": 0.6968146806752844, "grad_norm": 0.5111182004824663, "learning_rate": 2.3961117971875243e-06, "loss": 0.0176, "step": 167000 }, { "epoch": 0.6968355433902746, "grad_norm": 1.517855834154041, "learning_rate": 2.3960759278411014e-06, "loss": 0.028, "step": 167005 }, { "epoch": 0.6968564061052649, "grad_norm": 0.6340033618340849, "learning_rate": 2.3960400601055027e-06, "loss": 0.022, "step": 167010 }, { "epoch": 0.6968772688202552, "grad_norm": 1.2288084219976798, "learning_rate": 2.3960041939806074e-06, "loss": 0.0278, "step": 167015 }, { "epoch": 0.6968981315352455, "grad_norm": 0.4066000684059575, "learning_rate": 2.395968329466295e-06, "loss": 0.0245, "step": 167020 }, { "epoch": 0.6969189942502357, "grad_norm": 0.7143047361866528, "learning_rate": 2.395932466562445e-06, "loss": 0.0226, "step": 167025 }, { "epoch": 0.696939856965226, "grad_norm": 0.8311371149795375, "learning_rate": 2.395896605268937e-06, "loss": 0.0178, "step": 167030 }, { "epoch": 0.6969607196802163, "grad_norm": 0.8998947290377582, "learning_rate": 2.3958607455856505e-06, "loss": 0.0255, "step": 167035 }, { "epoch": 0.6969815823952066, "grad_norm": 1.008202990632964, "learning_rate": 2.3958248875124644e-06, "loss": 0.0207, "step": 167040 }, { "epoch": 0.6970024451101968, "grad_norm": 0.27766498825219404, "learning_rate": 2.395789031049259e-06, "loss": 0.0147, "step": 167045 }, { "epoch": 0.6970233078251872, "grad_norm": 0.5182866353118376, "learning_rate": 2.395753176195913e-06, "loss": 0.0211, "step": 167050 }, { "epoch": 0.6970441705401774, "grad_norm": 0.6070077360681797, "learning_rate": 2.3957173229523074e-06, "loss": 0.0238, "step": 167055 }, { "epoch": 0.6970650332551677, "grad_norm": 0.8280705824153224, "learning_rate": 2.39568147131832e-06, "loss": 0.0139, "step": 167060 }, { "epoch": 0.697085895970158, "grad_norm": 0.7193154210366817, "learning_rate": 2.3956456212938317e-06, "loss": 0.021, "step": 167065 }, { "epoch": 0.6971067586851483, "grad_norm": 1.1388492971200246, "learning_rate": 2.395609772878721e-06, "loss": 0.0193, "step": 167070 }, { "epoch": 0.6971276214001385, "grad_norm": 0.3616299208236673, "learning_rate": 2.395573926072868e-06, "loss": 0.0128, "step": 167075 }, { "epoch": 0.6971484841151288, "grad_norm": 0.6705978095463767, "learning_rate": 2.395538080876153e-06, "loss": 0.0202, "step": 167080 }, { "epoch": 0.6971693468301191, "grad_norm": 0.5025844713174266, "learning_rate": 2.3955022372884547e-06, "loss": 0.0149, "step": 167085 }, { "epoch": 0.6971902095451094, "grad_norm": 0.5386587192652956, "learning_rate": 2.3954663953096523e-06, "loss": 0.0159, "step": 167090 }, { "epoch": 0.6972110722600996, "grad_norm": 0.9636220899275396, "learning_rate": 2.395430554939627e-06, "loss": 0.0193, "step": 167095 }, { "epoch": 0.69723193497509, "grad_norm": 0.5488319452558511, "learning_rate": 2.3953947161782567e-06, "loss": 0.024, "step": 167100 }, { "epoch": 0.6972527976900802, "grad_norm": 0.6881610047282871, "learning_rate": 2.395358879025423e-06, "loss": 0.0145, "step": 167105 }, { "epoch": 0.6972736604050704, "grad_norm": 0.6724799348207292, "learning_rate": 2.3953230434810038e-06, "loss": 0.0188, "step": 167110 }, { "epoch": 0.6972945231200608, "grad_norm": 0.5507588063376696, "learning_rate": 2.3952872095448796e-06, "loss": 0.021, "step": 167115 }, { "epoch": 0.697315385835051, "grad_norm": 0.7282858549371952, "learning_rate": 2.3952513772169297e-06, "loss": 0.017, "step": 167120 }, { "epoch": 0.6973362485500413, "grad_norm": 0.6308655532804718, "learning_rate": 2.3952155464970345e-06, "loss": 0.0212, "step": 167125 }, { "epoch": 0.6973571112650316, "grad_norm": 0.3816951554921035, "learning_rate": 2.3951797173850734e-06, "loss": 0.0163, "step": 167130 }, { "epoch": 0.6973779739800219, "grad_norm": 0.8063801109011577, "learning_rate": 2.395143889880926e-06, "loss": 0.0216, "step": 167135 }, { "epoch": 0.6973988366950121, "grad_norm": 0.9547356944397505, "learning_rate": 2.395108063984472e-06, "loss": 0.0223, "step": 167140 }, { "epoch": 0.6974196994100024, "grad_norm": 0.9544450221945997, "learning_rate": 2.395072239695591e-06, "loss": 0.0236, "step": 167145 }, { "epoch": 0.6974405621249927, "grad_norm": 0.542127206492076, "learning_rate": 2.395036417014164e-06, "loss": 0.0261, "step": 167150 }, { "epoch": 0.697461424839983, "grad_norm": 1.2850755826362745, "learning_rate": 2.3950005959400694e-06, "loss": 0.0369, "step": 167155 }, { "epoch": 0.6974822875549732, "grad_norm": 1.1889604984377442, "learning_rate": 2.3949647764731874e-06, "loss": 0.0281, "step": 167160 }, { "epoch": 0.6975031502699636, "grad_norm": 1.235384248275853, "learning_rate": 2.394928958613398e-06, "loss": 0.0329, "step": 167165 }, { "epoch": 0.6975240129849538, "grad_norm": 0.8824072812024184, "learning_rate": 2.3948931423605807e-06, "loss": 0.0229, "step": 167170 }, { "epoch": 0.6975448756999441, "grad_norm": 1.0886868601910125, "learning_rate": 2.394857327714616e-06, "loss": 0.0257, "step": 167175 }, { "epoch": 0.6975657384149344, "grad_norm": 0.2289425216531875, "learning_rate": 2.3948215146753832e-06, "loss": 0.0157, "step": 167180 }, { "epoch": 0.6975866011299247, "grad_norm": 0.7414052265182615, "learning_rate": 2.394785703242762e-06, "loss": 0.0199, "step": 167185 }, { "epoch": 0.6976074638449149, "grad_norm": 1.1814765157280096, "learning_rate": 2.394749893416633e-06, "loss": 0.0228, "step": 167190 }, { "epoch": 0.6976283265599053, "grad_norm": 0.8335151891046377, "learning_rate": 2.394714085196875e-06, "loss": 0.0422, "step": 167195 }, { "epoch": 0.6976491892748955, "grad_norm": 15.08445164069442, "learning_rate": 2.394678278583369e-06, "loss": 0.0184, "step": 167200 }, { "epoch": 0.6976700519898857, "grad_norm": 0.48769175860187086, "learning_rate": 2.3946424735759944e-06, "loss": 0.0202, "step": 167205 }, { "epoch": 0.697690914704876, "grad_norm": 0.5515028434871811, "learning_rate": 2.3946066701746313e-06, "loss": 0.0289, "step": 167210 }, { "epoch": 0.6977117774198663, "grad_norm": 1.6465607828819429, "learning_rate": 2.394570868379159e-06, "loss": 0.0237, "step": 167215 }, { "epoch": 0.6977326401348566, "grad_norm": 0.3584453921107264, "learning_rate": 2.394535068189459e-06, "loss": 0.0239, "step": 167220 }, { "epoch": 0.6977535028498468, "grad_norm": 0.5140703150707089, "learning_rate": 2.39449926960541e-06, "loss": 0.0177, "step": 167225 }, { "epoch": 0.6977743655648372, "grad_norm": 0.6800532138589576, "learning_rate": 2.394463472626892e-06, "loss": 0.0221, "step": 167230 }, { "epoch": 0.6977952282798274, "grad_norm": 0.7711526613844115, "learning_rate": 2.3944276772537853e-06, "loss": 0.0239, "step": 167235 }, { "epoch": 0.6978160909948177, "grad_norm": 0.3691987762645839, "learning_rate": 2.3943918834859697e-06, "loss": 0.0181, "step": 167240 }, { "epoch": 0.697836953709808, "grad_norm": 0.5524602761865006, "learning_rate": 2.394356091323326e-06, "loss": 0.0262, "step": 167245 }, { "epoch": 0.6978578164247983, "grad_norm": 0.6580181552204053, "learning_rate": 2.394320300765733e-06, "loss": 0.0302, "step": 167250 }, { "epoch": 0.6978786791397885, "grad_norm": 0.4872631637308635, "learning_rate": 2.3942845118130715e-06, "loss": 0.017, "step": 167255 }, { "epoch": 0.6978995418547788, "grad_norm": 0.39234401807478325, "learning_rate": 2.394248724465222e-06, "loss": 0.0245, "step": 167260 }, { "epoch": 0.6979204045697691, "grad_norm": 0.5968004101405587, "learning_rate": 2.394212938722063e-06, "loss": 0.0185, "step": 167265 }, { "epoch": 0.6979412672847594, "grad_norm": 0.5785508693811063, "learning_rate": 2.394177154583476e-06, "loss": 0.0231, "step": 167270 }, { "epoch": 0.6979621299997496, "grad_norm": 0.9566732676375941, "learning_rate": 2.3941413720493408e-06, "loss": 0.0226, "step": 167275 }, { "epoch": 0.69798299271474, "grad_norm": 0.6350056238148386, "learning_rate": 2.394105591119537e-06, "loss": 0.0188, "step": 167280 }, { "epoch": 0.6980038554297302, "grad_norm": 0.7016065605931918, "learning_rate": 2.394069811793945e-06, "loss": 0.0225, "step": 167285 }, { "epoch": 0.6980247181447204, "grad_norm": 1.2702326090770624, "learning_rate": 2.3940340340724453e-06, "loss": 0.0286, "step": 167290 }, { "epoch": 0.6980455808597108, "grad_norm": 0.9057268830262233, "learning_rate": 2.3939982579549176e-06, "loss": 0.0244, "step": 167295 }, { "epoch": 0.698066443574701, "grad_norm": 0.9091524332073447, "learning_rate": 2.3939624834412426e-06, "loss": 0.021, "step": 167300 }, { "epoch": 0.6980873062896913, "grad_norm": 0.6190112505107178, "learning_rate": 2.3939267105312995e-06, "loss": 0.0205, "step": 167305 }, { "epoch": 0.6981081690046816, "grad_norm": 1.6472206277831487, "learning_rate": 2.3938909392249697e-06, "loss": 0.0286, "step": 167310 }, { "epoch": 0.6981290317196719, "grad_norm": 0.6070710172582885, "learning_rate": 2.3938551695221323e-06, "loss": 0.0173, "step": 167315 }, { "epoch": 0.6981498944346621, "grad_norm": 0.9951486870105103, "learning_rate": 2.3938194014226676e-06, "loss": 0.0192, "step": 167320 }, { "epoch": 0.6981707571496524, "grad_norm": 0.5067095129054359, "learning_rate": 2.393783634926457e-06, "loss": 0.0234, "step": 167325 }, { "epoch": 0.6981916198646427, "grad_norm": 0.38159859237535537, "learning_rate": 2.3937478700333793e-06, "loss": 0.0196, "step": 167330 }, { "epoch": 0.698212482579633, "grad_norm": 1.3136680327072507, "learning_rate": 2.393712106743316e-06, "loss": 0.0195, "step": 167335 }, { "epoch": 0.6982333452946232, "grad_norm": 0.9436441151831426, "learning_rate": 2.393676345056146e-06, "loss": 0.0281, "step": 167340 }, { "epoch": 0.6982542080096136, "grad_norm": 0.4791051749022469, "learning_rate": 2.3936405849717506e-06, "loss": 0.0224, "step": 167345 }, { "epoch": 0.6982750707246038, "grad_norm": 0.6506379495700132, "learning_rate": 2.3936048264900096e-06, "loss": 0.0253, "step": 167350 }, { "epoch": 0.6982959334395941, "grad_norm": 0.49070326552110743, "learning_rate": 2.3935690696108037e-06, "loss": 0.0209, "step": 167355 }, { "epoch": 0.6983167961545844, "grad_norm": 0.8490234797367744, "learning_rate": 2.393533314334012e-06, "loss": 0.0266, "step": 167360 }, { "epoch": 0.6983376588695747, "grad_norm": 0.46521327292163583, "learning_rate": 2.393497560659517e-06, "loss": 0.027, "step": 167365 }, { "epoch": 0.6983585215845649, "grad_norm": 0.7293049722360033, "learning_rate": 2.393461808587197e-06, "loss": 0.0142, "step": 167370 }, { "epoch": 0.6983793842995553, "grad_norm": 0.8924796243947098, "learning_rate": 2.3934260581169343e-06, "loss": 0.0164, "step": 167375 }, { "epoch": 0.6984002470145455, "grad_norm": 0.8273491951303501, "learning_rate": 2.3933903092486064e-06, "loss": 0.0211, "step": 167380 }, { "epoch": 0.6984211097295357, "grad_norm": 0.5915114909909758, "learning_rate": 2.3933545619820966e-06, "loss": 0.0239, "step": 167385 }, { "epoch": 0.698441972444526, "grad_norm": 0.6254940607833637, "learning_rate": 2.3933188163172834e-06, "loss": 0.0184, "step": 167390 }, { "epoch": 0.6984628351595163, "grad_norm": 0.7713172066637872, "learning_rate": 2.393283072254048e-06, "loss": 0.0253, "step": 167395 }, { "epoch": 0.6984836978745066, "grad_norm": 0.6275724599899791, "learning_rate": 2.3932473297922713e-06, "loss": 0.023, "step": 167400 }, { "epoch": 0.6985045605894968, "grad_norm": 0.9068789315869431, "learning_rate": 2.3932115889318323e-06, "loss": 0.0243, "step": 167405 }, { "epoch": 0.6985254233044872, "grad_norm": 1.0327724730638466, "learning_rate": 2.3931758496726125e-06, "loss": 0.0247, "step": 167410 }, { "epoch": 0.6985462860194774, "grad_norm": 0.9945594574498376, "learning_rate": 2.393140112014492e-06, "loss": 0.0321, "step": 167415 }, { "epoch": 0.6985671487344677, "grad_norm": 0.44640982423180625, "learning_rate": 2.3931043759573513e-06, "loss": 0.0205, "step": 167420 }, { "epoch": 0.698588011449458, "grad_norm": 0.373589474402315, "learning_rate": 2.3930686415010713e-06, "loss": 0.0204, "step": 167425 }, { "epoch": 0.6986088741644483, "grad_norm": 0.4172116368407739, "learning_rate": 2.393032908645531e-06, "loss": 0.0188, "step": 167430 }, { "epoch": 0.6986297368794385, "grad_norm": 0.4727907179693695, "learning_rate": 2.3929971773906122e-06, "loss": 0.0183, "step": 167435 }, { "epoch": 0.6986505995944288, "grad_norm": 0.5069464255777355, "learning_rate": 2.392961447736196e-06, "loss": 0.0184, "step": 167440 }, { "epoch": 0.6986714623094191, "grad_norm": 0.613933135081386, "learning_rate": 2.3929257196821616e-06, "loss": 0.0289, "step": 167445 }, { "epoch": 0.6986923250244094, "grad_norm": 0.5758071516963295, "learning_rate": 2.39288999322839e-06, "loss": 0.023, "step": 167450 }, { "epoch": 0.6987131877393996, "grad_norm": 0.702908523664819, "learning_rate": 2.392854268374762e-06, "loss": 0.0249, "step": 167455 }, { "epoch": 0.69873405045439, "grad_norm": 0.819106824023671, "learning_rate": 2.3928185451211575e-06, "loss": 0.0194, "step": 167460 }, { "epoch": 0.6987549131693802, "grad_norm": 0.6470070650376697, "learning_rate": 2.3927828234674574e-06, "loss": 0.0278, "step": 167465 }, { "epoch": 0.6987757758843705, "grad_norm": 0.31375228638370206, "learning_rate": 2.3927471034135425e-06, "loss": 0.0159, "step": 167470 }, { "epoch": 0.6987966385993608, "grad_norm": 0.5811572537370256, "learning_rate": 2.3927113849592934e-06, "loss": 0.0201, "step": 167475 }, { "epoch": 0.698817501314351, "grad_norm": 0.6441963466580283, "learning_rate": 2.3926756681045906e-06, "loss": 0.0261, "step": 167480 }, { "epoch": 0.6988383640293413, "grad_norm": 0.5768651774394968, "learning_rate": 2.392639952849314e-06, "loss": 0.0197, "step": 167485 }, { "epoch": 0.6988592267443317, "grad_norm": 0.8015817300044696, "learning_rate": 2.392604239193345e-06, "loss": 0.0283, "step": 167490 }, { "epoch": 0.6988800894593219, "grad_norm": 0.4271970281322788, "learning_rate": 2.392568527136565e-06, "loss": 0.0186, "step": 167495 }, { "epoch": 0.6989009521743121, "grad_norm": 1.0636556252836182, "learning_rate": 2.3925328166788533e-06, "loss": 0.0201, "step": 167500 }, { "epoch": 0.6989218148893024, "grad_norm": 1.079915925782814, "learning_rate": 2.392497107820091e-06, "loss": 0.0319, "step": 167505 }, { "epoch": 0.6989426776042927, "grad_norm": 0.6820687053989676, "learning_rate": 2.392461400560159e-06, "loss": 0.0179, "step": 167510 }, { "epoch": 0.698963540319283, "grad_norm": 1.2562226021820324, "learning_rate": 2.392425694898938e-06, "loss": 0.0344, "step": 167515 }, { "epoch": 0.6989844030342732, "grad_norm": 0.3649285543876675, "learning_rate": 2.392389990836308e-06, "loss": 0.0165, "step": 167520 }, { "epoch": 0.6990052657492636, "grad_norm": 0.4332036760415449, "learning_rate": 2.392354288372151e-06, "loss": 0.0188, "step": 167525 }, { "epoch": 0.6990261284642538, "grad_norm": 0.666388533936375, "learning_rate": 2.3923185875063467e-06, "loss": 0.0142, "step": 167530 }, { "epoch": 0.6990469911792441, "grad_norm": 0.7184023405295302, "learning_rate": 2.3922828882387758e-06, "loss": 0.0209, "step": 167535 }, { "epoch": 0.6990678538942344, "grad_norm": 0.6643793474154351, "learning_rate": 2.3922471905693192e-06, "loss": 0.0139, "step": 167540 }, { "epoch": 0.6990887166092247, "grad_norm": 0.3136317272081032, "learning_rate": 2.392211494497859e-06, "loss": 0.0223, "step": 167545 }, { "epoch": 0.6991095793242149, "grad_norm": 0.819882656645491, "learning_rate": 2.392175800024274e-06, "loss": 0.0294, "step": 167550 }, { "epoch": 0.6991304420392053, "grad_norm": 0.4547129184651234, "learning_rate": 2.392140107148446e-06, "loss": 0.0244, "step": 167555 }, { "epoch": 0.6991513047541955, "grad_norm": 0.8371920045222794, "learning_rate": 2.3921044158702556e-06, "loss": 0.0219, "step": 167560 }, { "epoch": 0.6991721674691858, "grad_norm": 0.8685513672331019, "learning_rate": 2.392068726189584e-06, "loss": 0.0254, "step": 167565 }, { "epoch": 0.699193030184176, "grad_norm": 0.5785929807657663, "learning_rate": 2.392033038106311e-06, "loss": 0.0221, "step": 167570 }, { "epoch": 0.6992138928991664, "grad_norm": 0.558897367322263, "learning_rate": 2.3919973516203186e-06, "loss": 0.0181, "step": 167575 }, { "epoch": 0.6992347556141566, "grad_norm": 0.4489590320230275, "learning_rate": 2.391961666731487e-06, "loss": 0.0193, "step": 167580 }, { "epoch": 0.6992556183291468, "grad_norm": 0.9066989652036606, "learning_rate": 2.391925983439697e-06, "loss": 0.0166, "step": 167585 }, { "epoch": 0.6992764810441372, "grad_norm": 0.9714785007660027, "learning_rate": 2.3918903017448306e-06, "loss": 0.0188, "step": 167590 }, { "epoch": 0.6992973437591274, "grad_norm": 0.6182497631421038, "learning_rate": 2.391854621646767e-06, "loss": 0.0191, "step": 167595 }, { "epoch": 0.6993182064741177, "grad_norm": 0.7133539995177519, "learning_rate": 2.3918189431453885e-06, "loss": 0.0206, "step": 167600 }, { "epoch": 0.699339069189108, "grad_norm": 0.32638254692214225, "learning_rate": 2.391783266240575e-06, "loss": 0.0211, "step": 167605 }, { "epoch": 0.6993599319040983, "grad_norm": 0.6405992600257825, "learning_rate": 2.3917475909322084e-06, "loss": 0.0183, "step": 167610 }, { "epoch": 0.6993807946190885, "grad_norm": 0.7662708308469149, "learning_rate": 2.3917119172201687e-06, "loss": 0.0189, "step": 167615 }, { "epoch": 0.6994016573340788, "grad_norm": 0.49575289842054915, "learning_rate": 2.391676245104337e-06, "loss": 0.0253, "step": 167620 }, { "epoch": 0.6994225200490691, "grad_norm": 0.44517862131127756, "learning_rate": 2.391640574584595e-06, "loss": 0.0179, "step": 167625 }, { "epoch": 0.6994433827640594, "grad_norm": 0.3076155769221154, "learning_rate": 2.3916049056608233e-06, "loss": 0.0148, "step": 167630 }, { "epoch": 0.6994642454790496, "grad_norm": 1.0081347814363437, "learning_rate": 2.3915692383329027e-06, "loss": 0.0212, "step": 167635 }, { "epoch": 0.69948510819404, "grad_norm": 0.8168682984955431, "learning_rate": 2.391533572600714e-06, "loss": 0.0227, "step": 167640 }, { "epoch": 0.6995059709090302, "grad_norm": 1.03142410251636, "learning_rate": 2.3914979084641392e-06, "loss": 0.0263, "step": 167645 }, { "epoch": 0.6995268336240205, "grad_norm": 0.7185915366283744, "learning_rate": 2.3914622459230584e-06, "loss": 0.0216, "step": 167650 }, { "epoch": 0.6995476963390108, "grad_norm": 0.6622198481177285, "learning_rate": 2.3914265849773523e-06, "loss": 0.0286, "step": 167655 }, { "epoch": 0.6995685590540011, "grad_norm": 0.567258149755321, "learning_rate": 2.391390925626903e-06, "loss": 0.0239, "step": 167660 }, { "epoch": 0.6995894217689913, "grad_norm": 0.49367162414756915, "learning_rate": 2.391355267871592e-06, "loss": 0.0214, "step": 167665 }, { "epoch": 0.6996102844839817, "grad_norm": 0.8733516581448083, "learning_rate": 2.3913196117112984e-06, "loss": 0.0212, "step": 167670 }, { "epoch": 0.6996311471989719, "grad_norm": 0.6050778861941283, "learning_rate": 2.391283957145905e-06, "loss": 0.0187, "step": 167675 }, { "epoch": 0.6996520099139621, "grad_norm": 0.4640508357007407, "learning_rate": 2.3912483041752917e-06, "loss": 0.0264, "step": 167680 }, { "epoch": 0.6996728726289524, "grad_norm": 0.696529160820701, "learning_rate": 2.391212652799341e-06, "loss": 0.0281, "step": 167685 }, { "epoch": 0.6996937353439427, "grad_norm": 0.6572038891243118, "learning_rate": 2.3911770030179327e-06, "loss": 0.0388, "step": 167690 }, { "epoch": 0.699714598058933, "grad_norm": 0.7037843736471174, "learning_rate": 2.3911413548309484e-06, "loss": 0.0253, "step": 167695 }, { "epoch": 0.6997354607739232, "grad_norm": 0.5544887209627042, "learning_rate": 2.3911057082382698e-06, "loss": 0.0193, "step": 167700 }, { "epoch": 0.6997563234889136, "grad_norm": 0.3934981568734967, "learning_rate": 2.3910700632397775e-06, "loss": 0.0199, "step": 167705 }, { "epoch": 0.6997771862039038, "grad_norm": 0.8653248369702753, "learning_rate": 2.3910344198353524e-06, "loss": 0.0209, "step": 167710 }, { "epoch": 0.6997980489188941, "grad_norm": 0.9641070591311254, "learning_rate": 2.390998778024876e-06, "loss": 0.0275, "step": 167715 }, { "epoch": 0.6998189116338844, "grad_norm": 0.41172426046279265, "learning_rate": 2.39096313780823e-06, "loss": 0.0202, "step": 167720 }, { "epoch": 0.6998397743488747, "grad_norm": 0.9501847178459369, "learning_rate": 2.390927499185295e-06, "loss": 0.0169, "step": 167725 }, { "epoch": 0.6998606370638649, "grad_norm": 0.9363053138884981, "learning_rate": 2.3908918621559525e-06, "loss": 0.0277, "step": 167730 }, { "epoch": 0.6998814997788553, "grad_norm": 0.7785591428211568, "learning_rate": 2.390856226720084e-06, "loss": 0.0196, "step": 167735 }, { "epoch": 0.6999023624938455, "grad_norm": 0.9848022827029115, "learning_rate": 2.3908205928775704e-06, "loss": 0.0156, "step": 167740 }, { "epoch": 0.6999232252088358, "grad_norm": 0.5868190594190204, "learning_rate": 2.3907849606282923e-06, "loss": 0.0155, "step": 167745 }, { "epoch": 0.699944087923826, "grad_norm": 0.5199431032870698, "learning_rate": 2.3907493299721325e-06, "loss": 0.0218, "step": 167750 }, { "epoch": 0.6999649506388164, "grad_norm": 0.7382260660897044, "learning_rate": 2.390713700908971e-06, "loss": 0.0232, "step": 167755 }, { "epoch": 0.6999858133538066, "grad_norm": 0.7377142984272232, "learning_rate": 2.3906780734386896e-06, "loss": 0.0226, "step": 167760 }, { "epoch": 0.7000066760687969, "grad_norm": 1.6765200034392018, "learning_rate": 2.3906424475611694e-06, "loss": 0.0328, "step": 167765 }, { "epoch": 0.7000275387837872, "grad_norm": 1.4403342699388766, "learning_rate": 2.3906068232762915e-06, "loss": 0.026, "step": 167770 }, { "epoch": 0.7000484014987775, "grad_norm": 0.47272356894489836, "learning_rate": 2.3905712005839383e-06, "loss": 0.0248, "step": 167775 }, { "epoch": 0.7000692642137677, "grad_norm": 0.9481393009415456, "learning_rate": 2.3905355794839906e-06, "loss": 0.0198, "step": 167780 }, { "epoch": 0.700090126928758, "grad_norm": 0.8668960878988454, "learning_rate": 2.3904999599763293e-06, "loss": 0.0194, "step": 167785 }, { "epoch": 0.7001109896437483, "grad_norm": 0.6652807941428952, "learning_rate": 2.3904643420608363e-06, "loss": 0.0245, "step": 167790 }, { "epoch": 0.7001318523587385, "grad_norm": 0.6353238842991219, "learning_rate": 2.3904287257373925e-06, "loss": 0.0183, "step": 167795 }, { "epoch": 0.7001527150737288, "grad_norm": 0.4965426141728973, "learning_rate": 2.3903931110058802e-06, "loss": 0.0172, "step": 167800 }, { "epoch": 0.7001735777887191, "grad_norm": 0.5736982249406286, "learning_rate": 2.3903574978661796e-06, "loss": 0.0159, "step": 167805 }, { "epoch": 0.7001944405037094, "grad_norm": 0.5221929931399797, "learning_rate": 2.3903218863181733e-06, "loss": 0.0242, "step": 167810 }, { "epoch": 0.7002153032186996, "grad_norm": 0.666889656546545, "learning_rate": 2.390286276361742e-06, "loss": 0.0179, "step": 167815 }, { "epoch": 0.70023616593369, "grad_norm": 0.850219459501776, "learning_rate": 2.3902506679967673e-06, "loss": 0.0251, "step": 167820 }, { "epoch": 0.7002570286486802, "grad_norm": 0.2688939869807913, "learning_rate": 2.390215061223131e-06, "loss": 0.0212, "step": 167825 }, { "epoch": 0.7002778913636705, "grad_norm": 0.8438145222423198, "learning_rate": 2.3901794560407135e-06, "loss": 0.0149, "step": 167830 }, { "epoch": 0.7002987540786608, "grad_norm": 0.7894875895047557, "learning_rate": 2.3901438524493976e-06, "loss": 0.0187, "step": 167835 }, { "epoch": 0.7003196167936511, "grad_norm": 0.29629830533833845, "learning_rate": 2.390108250449064e-06, "loss": 0.0234, "step": 167840 }, { "epoch": 0.7003404795086413, "grad_norm": 1.381712107340001, "learning_rate": 2.3900726500395944e-06, "loss": 0.0425, "step": 167845 }, { "epoch": 0.7003613422236317, "grad_norm": 0.678947619903759, "learning_rate": 2.3900370512208713e-06, "loss": 0.0197, "step": 167850 }, { "epoch": 0.7003822049386219, "grad_norm": 0.5206689973175608, "learning_rate": 2.3900014539927748e-06, "loss": 0.0237, "step": 167855 }, { "epoch": 0.7004030676536122, "grad_norm": 0.5499999595052268, "learning_rate": 2.3899658583551868e-06, "loss": 0.0208, "step": 167860 }, { "epoch": 0.7004239303686024, "grad_norm": 1.3106424370126517, "learning_rate": 2.389930264307989e-06, "loss": 0.0268, "step": 167865 }, { "epoch": 0.7004447930835928, "grad_norm": 0.4954813369669288, "learning_rate": 2.3898946718510636e-06, "loss": 0.0204, "step": 167870 }, { "epoch": 0.700465655798583, "grad_norm": 0.7734902731398023, "learning_rate": 2.389859080984291e-06, "loss": 0.0239, "step": 167875 }, { "epoch": 0.7004865185135732, "grad_norm": 0.7829525015150552, "learning_rate": 2.3898234917075535e-06, "loss": 0.0191, "step": 167880 }, { "epoch": 0.7005073812285636, "grad_norm": 0.2798786951494721, "learning_rate": 2.389787904020733e-06, "loss": 0.0175, "step": 167885 }, { "epoch": 0.7005282439435538, "grad_norm": 0.7174161352112666, "learning_rate": 2.3897523179237103e-06, "loss": 0.0204, "step": 167890 }, { "epoch": 0.7005491066585441, "grad_norm": 0.5446185139042808, "learning_rate": 2.3897167334163677e-06, "loss": 0.0244, "step": 167895 }, { "epoch": 0.7005699693735344, "grad_norm": 1.004555160882602, "learning_rate": 2.389681150498587e-06, "loss": 0.0251, "step": 167900 }, { "epoch": 0.7005908320885247, "grad_norm": 0.48941400864855944, "learning_rate": 2.389645569170249e-06, "loss": 0.0173, "step": 167905 }, { "epoch": 0.7006116948035149, "grad_norm": 0.35804238820188716, "learning_rate": 2.3896099894312364e-06, "loss": 0.0274, "step": 167910 }, { "epoch": 0.7006325575185053, "grad_norm": 0.5240155546779504, "learning_rate": 2.3895744112814297e-06, "loss": 0.03, "step": 167915 }, { "epoch": 0.7006534202334955, "grad_norm": 1.372538149665002, "learning_rate": 2.3895388347207117e-06, "loss": 0.0314, "step": 167920 }, { "epoch": 0.7006742829484858, "grad_norm": 0.7182975998085408, "learning_rate": 2.3895032597489633e-06, "loss": 0.026, "step": 167925 }, { "epoch": 0.700695145663476, "grad_norm": 0.30813292331454967, "learning_rate": 2.389467686366067e-06, "loss": 0.022, "step": 167930 }, { "epoch": 0.7007160083784664, "grad_norm": 0.46368711713947225, "learning_rate": 2.3894321145719037e-06, "loss": 0.018, "step": 167935 }, { "epoch": 0.7007368710934566, "grad_norm": 0.3892301804061105, "learning_rate": 2.3893965443663554e-06, "loss": 0.0221, "step": 167940 }, { "epoch": 0.7007577338084469, "grad_norm": 0.5830896682109952, "learning_rate": 2.389360975749305e-06, "loss": 0.0201, "step": 167945 }, { "epoch": 0.7007785965234372, "grad_norm": 0.7722065525061005, "learning_rate": 2.3893254087206323e-06, "loss": 0.0307, "step": 167950 }, { "epoch": 0.7007994592384275, "grad_norm": 0.3801650756170205, "learning_rate": 2.3892898432802208e-06, "loss": 0.0175, "step": 167955 }, { "epoch": 0.7008203219534177, "grad_norm": 0.839767707543931, "learning_rate": 2.3892542794279507e-06, "loss": 0.018, "step": 167960 }, { "epoch": 0.7008411846684081, "grad_norm": 0.9200828022477315, "learning_rate": 2.389218717163705e-06, "loss": 0.0205, "step": 167965 }, { "epoch": 0.7008620473833983, "grad_norm": 0.6547973973065218, "learning_rate": 2.3891831564873656e-06, "loss": 0.0201, "step": 167970 }, { "epoch": 0.7008829100983885, "grad_norm": 0.6014705357723447, "learning_rate": 2.389147597398814e-06, "loss": 0.0197, "step": 167975 }, { "epoch": 0.7009037728133788, "grad_norm": 0.6886471785923755, "learning_rate": 2.3891120398979314e-06, "loss": 0.0187, "step": 167980 }, { "epoch": 0.7009246355283691, "grad_norm": 1.2776244089925617, "learning_rate": 2.389076483984601e-06, "loss": 0.0244, "step": 167985 }, { "epoch": 0.7009454982433594, "grad_norm": 0.5440583821247124, "learning_rate": 2.3890409296587033e-06, "loss": 0.0222, "step": 167990 }, { "epoch": 0.7009663609583496, "grad_norm": 0.5938085814904159, "learning_rate": 2.389005376920121e-06, "loss": 0.017, "step": 167995 }, { "epoch": 0.70098722367334, "grad_norm": 0.9963581557368704, "learning_rate": 2.388969825768736e-06, "loss": 0.0284, "step": 168000 }, { "epoch": 0.7010080863883302, "grad_norm": 0.6075808501881625, "learning_rate": 2.3889342762044292e-06, "loss": 0.0164, "step": 168005 }, { "epoch": 0.7010289491033205, "grad_norm": 0.9686748750150698, "learning_rate": 2.3888987282270838e-06, "loss": 0.0273, "step": 168010 }, { "epoch": 0.7010498118183108, "grad_norm": 0.5469419575052317, "learning_rate": 2.3888631818365817e-06, "loss": 0.0186, "step": 168015 }, { "epoch": 0.7010706745333011, "grad_norm": 0.6577643720443271, "learning_rate": 2.388827637032804e-06, "loss": 0.0269, "step": 168020 }, { "epoch": 0.7010915372482913, "grad_norm": 0.5335998634729869, "learning_rate": 2.388792093815633e-06, "loss": 0.0179, "step": 168025 }, { "epoch": 0.7011123999632817, "grad_norm": 0.5332050038195317, "learning_rate": 2.3887565521849508e-06, "loss": 0.0215, "step": 168030 }, { "epoch": 0.7011332626782719, "grad_norm": 0.8504796410123416, "learning_rate": 2.388721012140639e-06, "loss": 0.0275, "step": 168035 }, { "epoch": 0.7011541253932622, "grad_norm": 0.8471026256886836, "learning_rate": 2.38868547368258e-06, "loss": 0.033, "step": 168040 }, { "epoch": 0.7011749881082524, "grad_norm": 0.5580248217419888, "learning_rate": 2.388649936810656e-06, "loss": 0.0206, "step": 168045 }, { "epoch": 0.7011958508232428, "grad_norm": 0.7466201028385059, "learning_rate": 2.3886144015247487e-06, "loss": 0.0155, "step": 168050 }, { "epoch": 0.701216713538233, "grad_norm": 0.4997234632300529, "learning_rate": 2.38857886782474e-06, "loss": 0.0261, "step": 168055 }, { "epoch": 0.7012375762532232, "grad_norm": 0.6284448201665818, "learning_rate": 2.3885433357105123e-06, "loss": 0.0249, "step": 168060 }, { "epoch": 0.7012584389682136, "grad_norm": 0.9115358380514651, "learning_rate": 2.3885078051819473e-06, "loss": 0.0154, "step": 168065 }, { "epoch": 0.7012793016832038, "grad_norm": 0.4851897041249473, "learning_rate": 2.3884722762389263e-06, "loss": 0.0194, "step": 168070 }, { "epoch": 0.7013001643981941, "grad_norm": 0.5831015985409852, "learning_rate": 2.3884367488813333e-06, "loss": 0.0203, "step": 168075 }, { "epoch": 0.7013210271131844, "grad_norm": 0.6490503282885294, "learning_rate": 2.388401223109049e-06, "loss": 0.0166, "step": 168080 }, { "epoch": 0.7013418898281747, "grad_norm": 0.6561086140696776, "learning_rate": 2.388365698921956e-06, "loss": 0.0237, "step": 168085 }, { "epoch": 0.7013627525431649, "grad_norm": 0.902723800581094, "learning_rate": 2.388330176319936e-06, "loss": 0.0189, "step": 168090 }, { "epoch": 0.7013836152581553, "grad_norm": 0.8059758160131216, "learning_rate": 2.3882946553028726e-06, "loss": 0.0265, "step": 168095 }, { "epoch": 0.7014044779731455, "grad_norm": 0.7756905900968187, "learning_rate": 2.3882591358706454e-06, "loss": 0.0245, "step": 168100 }, { "epoch": 0.7014253406881358, "grad_norm": 0.9388223498848058, "learning_rate": 2.3882236180231386e-06, "loss": 0.0237, "step": 168105 }, { "epoch": 0.701446203403126, "grad_norm": 0.43763062901344885, "learning_rate": 2.3881881017602335e-06, "loss": 0.0281, "step": 168110 }, { "epoch": 0.7014670661181164, "grad_norm": 1.1637658540708027, "learning_rate": 2.3881525870818123e-06, "loss": 0.0223, "step": 168115 }, { "epoch": 0.7014879288331066, "grad_norm": 1.0718506221675854, "learning_rate": 2.3881170739877574e-06, "loss": 0.0215, "step": 168120 }, { "epoch": 0.7015087915480969, "grad_norm": 1.2727697710373427, "learning_rate": 2.3880815624779513e-06, "loss": 0.0248, "step": 168125 }, { "epoch": 0.7015296542630872, "grad_norm": 0.7857713058234487, "learning_rate": 2.3880460525522757e-06, "loss": 0.023, "step": 168130 }, { "epoch": 0.7015505169780775, "grad_norm": 0.6402475420741067, "learning_rate": 2.388010544210613e-06, "loss": 0.0249, "step": 168135 }, { "epoch": 0.7015713796930677, "grad_norm": 0.6392614787133325, "learning_rate": 2.387975037452845e-06, "loss": 0.0244, "step": 168140 }, { "epoch": 0.7015922424080581, "grad_norm": 0.9488418351825465, "learning_rate": 2.3879395322788546e-06, "loss": 0.0224, "step": 168145 }, { "epoch": 0.7016131051230483, "grad_norm": 0.2970394527526897, "learning_rate": 2.3879040286885237e-06, "loss": 0.0232, "step": 168150 }, { "epoch": 0.7016339678380386, "grad_norm": 0.716621793278709, "learning_rate": 2.3878685266817353e-06, "loss": 0.0313, "step": 168155 }, { "epoch": 0.7016548305530288, "grad_norm": 0.38786573709867844, "learning_rate": 2.387833026258371e-06, "loss": 0.0202, "step": 168160 }, { "epoch": 0.7016756932680192, "grad_norm": 0.49490591300960707, "learning_rate": 2.3877975274183126e-06, "loss": 0.0162, "step": 168165 }, { "epoch": 0.7016965559830094, "grad_norm": 0.636436402356962, "learning_rate": 2.387762030161443e-06, "loss": 0.0199, "step": 168170 }, { "epoch": 0.7017174186979996, "grad_norm": 0.9185033759318945, "learning_rate": 2.3877265344876453e-06, "loss": 0.0281, "step": 168175 }, { "epoch": 0.70173828141299, "grad_norm": 0.37117391731608396, "learning_rate": 2.3876910403968008e-06, "loss": 0.0185, "step": 168180 }, { "epoch": 0.7017591441279802, "grad_norm": 0.45900962933532097, "learning_rate": 2.3876555478887915e-06, "loss": 0.0139, "step": 168185 }, { "epoch": 0.7017800068429705, "grad_norm": 0.5475820009376671, "learning_rate": 2.387620056963501e-06, "loss": 0.0232, "step": 168190 }, { "epoch": 0.7018008695579608, "grad_norm": 0.5929724247896302, "learning_rate": 2.387584567620811e-06, "loss": 0.0258, "step": 168195 }, { "epoch": 0.7018217322729511, "grad_norm": 0.5455037544557237, "learning_rate": 2.387549079860604e-06, "loss": 0.0202, "step": 168200 }, { "epoch": 0.7018425949879413, "grad_norm": 0.5572292420893757, "learning_rate": 2.3875135936827616e-06, "loss": 0.022, "step": 168205 }, { "epoch": 0.7018634577029317, "grad_norm": 0.577633293980445, "learning_rate": 2.387478109087168e-06, "loss": 0.024, "step": 168210 }, { "epoch": 0.7018843204179219, "grad_norm": 0.9925743068594213, "learning_rate": 2.3874426260737038e-06, "loss": 0.0228, "step": 168215 }, { "epoch": 0.7019051831329122, "grad_norm": 1.4181109621780836, "learning_rate": 2.3874071446422533e-06, "loss": 0.0241, "step": 168220 }, { "epoch": 0.7019260458479024, "grad_norm": 0.7092599757598929, "learning_rate": 2.3873716647926965e-06, "loss": 0.0183, "step": 168225 }, { "epoch": 0.7019469085628928, "grad_norm": 1.2598815946342192, "learning_rate": 2.387336186524918e-06, "loss": 0.0354, "step": 168230 }, { "epoch": 0.701967771277883, "grad_norm": 0.705444709266162, "learning_rate": 2.387300709838799e-06, "loss": 0.018, "step": 168235 }, { "epoch": 0.7019886339928733, "grad_norm": 0.6622692758995532, "learning_rate": 2.3872652347342224e-06, "loss": 0.0197, "step": 168240 }, { "epoch": 0.7020094967078636, "grad_norm": 0.5321807090859174, "learning_rate": 2.3872297612110718e-06, "loss": 0.0209, "step": 168245 }, { "epoch": 0.7020303594228539, "grad_norm": 0.6630472466487155, "learning_rate": 2.3871942892692274e-06, "loss": 0.0241, "step": 168250 }, { "epoch": 0.7020512221378441, "grad_norm": 1.2136388211203484, "learning_rate": 2.387158818908574e-06, "loss": 0.0303, "step": 168255 }, { "epoch": 0.7020720848528345, "grad_norm": 0.6992080006338339, "learning_rate": 2.3871233501289923e-06, "loss": 0.0235, "step": 168260 }, { "epoch": 0.7020929475678247, "grad_norm": 0.6919075189007661, "learning_rate": 2.3870878829303664e-06, "loss": 0.0241, "step": 168265 }, { "epoch": 0.7021138102828149, "grad_norm": 0.9600496693804839, "learning_rate": 2.3870524173125777e-06, "loss": 0.0193, "step": 168270 }, { "epoch": 0.7021346729978052, "grad_norm": 0.13446700179223003, "learning_rate": 2.387016953275509e-06, "loss": 0.0189, "step": 168275 }, { "epoch": 0.7021555357127955, "grad_norm": 0.8674012049054428, "learning_rate": 2.3869814908190438e-06, "loss": 0.0223, "step": 168280 }, { "epoch": 0.7021763984277858, "grad_norm": 0.6536155341436154, "learning_rate": 2.386946029943063e-06, "loss": 0.018, "step": 168285 }, { "epoch": 0.702197261142776, "grad_norm": 0.5245949929061081, "learning_rate": 2.386910570647451e-06, "loss": 0.0126, "step": 168290 }, { "epoch": 0.7022181238577664, "grad_norm": 0.48765616342095036, "learning_rate": 2.386875112932089e-06, "loss": 0.0235, "step": 168295 }, { "epoch": 0.7022389865727566, "grad_norm": 0.41118279909516386, "learning_rate": 2.3868396567968607e-06, "loss": 0.0197, "step": 168300 }, { "epoch": 0.7022598492877469, "grad_norm": 0.5677995480555472, "learning_rate": 2.386804202241648e-06, "loss": 0.022, "step": 168305 }, { "epoch": 0.7022807120027372, "grad_norm": 0.4800233976839032, "learning_rate": 2.386768749266334e-06, "loss": 0.0241, "step": 168310 }, { "epoch": 0.7023015747177275, "grad_norm": 0.47463222464914334, "learning_rate": 2.3867332978708006e-06, "loss": 0.021, "step": 168315 }, { "epoch": 0.7023224374327177, "grad_norm": 0.8264147539965628, "learning_rate": 2.3866978480549315e-06, "loss": 0.0373, "step": 168320 }, { "epoch": 0.7023433001477081, "grad_norm": 1.210459108049064, "learning_rate": 2.386662399818609e-06, "loss": 0.0206, "step": 168325 }, { "epoch": 0.7023641628626983, "grad_norm": 0.6715428490808654, "learning_rate": 2.3866269531617155e-06, "loss": 0.0208, "step": 168330 }, { "epoch": 0.7023850255776886, "grad_norm": 0.7356106127871024, "learning_rate": 2.3865915080841344e-06, "loss": 0.019, "step": 168335 }, { "epoch": 0.7024058882926788, "grad_norm": 0.7277223649557745, "learning_rate": 2.3865560645857474e-06, "loss": 0.0176, "step": 168340 }, { "epoch": 0.7024267510076692, "grad_norm": 1.0413053525759066, "learning_rate": 2.3865206226664383e-06, "loss": 0.0144, "step": 168345 }, { "epoch": 0.7024476137226594, "grad_norm": 0.8291719991571268, "learning_rate": 2.3864851823260892e-06, "loss": 0.0186, "step": 168350 }, { "epoch": 0.7024684764376496, "grad_norm": 1.2213468863305106, "learning_rate": 2.386449743564583e-06, "loss": 0.0239, "step": 168355 }, { "epoch": 0.70248933915264, "grad_norm": 0.513248247111979, "learning_rate": 2.386414306381803e-06, "loss": 0.0237, "step": 168360 }, { "epoch": 0.7025102018676302, "grad_norm": 1.0340152285158486, "learning_rate": 2.3863788707776312e-06, "loss": 0.0262, "step": 168365 }, { "epoch": 0.7025310645826205, "grad_norm": 0.7040764574932453, "learning_rate": 2.3863434367519505e-06, "loss": 0.0229, "step": 168370 }, { "epoch": 0.7025519272976108, "grad_norm": 0.41846957108558364, "learning_rate": 2.386308004304644e-06, "loss": 0.0175, "step": 168375 }, { "epoch": 0.7025727900126011, "grad_norm": 1.0210792889070226, "learning_rate": 2.3862725734355946e-06, "loss": 0.0249, "step": 168380 }, { "epoch": 0.7025936527275913, "grad_norm": 0.5717866766983827, "learning_rate": 2.3862371441446853e-06, "loss": 0.0174, "step": 168385 }, { "epoch": 0.7026145154425817, "grad_norm": 0.47548635133112677, "learning_rate": 2.386201716431798e-06, "loss": 0.0202, "step": 168390 }, { "epoch": 0.7026353781575719, "grad_norm": 0.26816960138918483, "learning_rate": 2.3861662902968165e-06, "loss": 0.0094, "step": 168395 }, { "epoch": 0.7026562408725622, "grad_norm": 0.3372385938691254, "learning_rate": 2.386130865739623e-06, "loss": 0.0175, "step": 168400 }, { "epoch": 0.7026771035875524, "grad_norm": 1.1925079511225942, "learning_rate": 2.3860954427601016e-06, "loss": 0.0242, "step": 168405 }, { "epoch": 0.7026979663025428, "grad_norm": 0.836348698684365, "learning_rate": 2.3860600213581336e-06, "loss": 0.0207, "step": 168410 }, { "epoch": 0.702718829017533, "grad_norm": 0.8382651947687482, "learning_rate": 2.3860246015336027e-06, "loss": 0.0204, "step": 168415 }, { "epoch": 0.7027396917325233, "grad_norm": 0.4656044864790834, "learning_rate": 2.3859891832863924e-06, "loss": 0.02, "step": 168420 }, { "epoch": 0.7027605544475136, "grad_norm": 0.42535690412758953, "learning_rate": 2.385953766616385e-06, "loss": 0.0185, "step": 168425 }, { "epoch": 0.7027814171625039, "grad_norm": 0.7875727886823243, "learning_rate": 2.3859183515234626e-06, "loss": 0.0213, "step": 168430 }, { "epoch": 0.7028022798774941, "grad_norm": 0.5166311700593238, "learning_rate": 2.38588293800751e-06, "loss": 0.0191, "step": 168435 }, { "epoch": 0.7028231425924845, "grad_norm": 0.4862326344993422, "learning_rate": 2.3858475260684084e-06, "loss": 0.0218, "step": 168440 }, { "epoch": 0.7028440053074747, "grad_norm": 0.8133253394489032, "learning_rate": 2.3858121157060423e-06, "loss": 0.0251, "step": 168445 }, { "epoch": 0.702864868022465, "grad_norm": 1.014704291465898, "learning_rate": 2.3857767069202933e-06, "loss": 0.0173, "step": 168450 }, { "epoch": 0.7028857307374552, "grad_norm": 0.6590145215692987, "learning_rate": 2.3857412997110455e-06, "loss": 0.017, "step": 168455 }, { "epoch": 0.7029065934524455, "grad_norm": 0.771397852273806, "learning_rate": 2.3857058940781814e-06, "loss": 0.0162, "step": 168460 }, { "epoch": 0.7029274561674358, "grad_norm": 0.7423805052295663, "learning_rate": 2.3856704900215843e-06, "loss": 0.0278, "step": 168465 }, { "epoch": 0.702948318882426, "grad_norm": 0.4147156527690329, "learning_rate": 2.385635087541137e-06, "loss": 0.0168, "step": 168470 }, { "epoch": 0.7029691815974164, "grad_norm": 0.8159279251058165, "learning_rate": 2.3855996866367227e-06, "loss": 0.0278, "step": 168475 }, { "epoch": 0.7029900443124066, "grad_norm": 1.020958480676525, "learning_rate": 2.3855642873082245e-06, "loss": 0.0173, "step": 168480 }, { "epoch": 0.7030109070273969, "grad_norm": 0.7481871372731594, "learning_rate": 2.385528889555525e-06, "loss": 0.0242, "step": 168485 }, { "epoch": 0.7030317697423872, "grad_norm": 0.578411787560693, "learning_rate": 2.3854934933785076e-06, "loss": 0.0228, "step": 168490 }, { "epoch": 0.7030526324573775, "grad_norm": 0.6967888626360436, "learning_rate": 2.385458098777056e-06, "loss": 0.0259, "step": 168495 }, { "epoch": 0.7030734951723677, "grad_norm": 0.35638291804206346, "learning_rate": 2.3854227057510524e-06, "loss": 0.0221, "step": 168500 }, { "epoch": 0.7030943578873581, "grad_norm": 0.6152316055014446, "learning_rate": 2.3853873143003808e-06, "loss": 0.0281, "step": 168505 }, { "epoch": 0.7031152206023483, "grad_norm": 0.5897978146187629, "learning_rate": 2.385351924424923e-06, "loss": 0.0131, "step": 168510 }, { "epoch": 0.7031360833173386, "grad_norm": 1.0397185593307352, "learning_rate": 2.385316536124564e-06, "loss": 0.017, "step": 168515 }, { "epoch": 0.7031569460323288, "grad_norm": 0.4766747106820595, "learning_rate": 2.385281149399186e-06, "loss": 0.0151, "step": 168520 }, { "epoch": 0.7031778087473192, "grad_norm": 0.7090937083925271, "learning_rate": 2.385245764248671e-06, "loss": 0.0222, "step": 168525 }, { "epoch": 0.7031986714623094, "grad_norm": 0.7559905844313006, "learning_rate": 2.385210380672904e-06, "loss": 0.0238, "step": 168530 }, { "epoch": 0.7032195341772997, "grad_norm": 0.32157377872683746, "learning_rate": 2.385174998671768e-06, "loss": 0.0161, "step": 168535 }, { "epoch": 0.70324039689229, "grad_norm": 0.6674724235646228, "learning_rate": 2.3851396182451455e-06, "loss": 0.0183, "step": 168540 }, { "epoch": 0.7032612596072803, "grad_norm": 1.364010700612973, "learning_rate": 2.38510423939292e-06, "loss": 0.0243, "step": 168545 }, { "epoch": 0.7032821223222705, "grad_norm": 1.0585565083684754, "learning_rate": 2.3850688621149747e-06, "loss": 0.025, "step": 168550 }, { "epoch": 0.7033029850372609, "grad_norm": 0.915074644166482, "learning_rate": 2.3850334864111927e-06, "loss": 0.0232, "step": 168555 }, { "epoch": 0.7033238477522511, "grad_norm": 0.2933660101119533, "learning_rate": 2.3849981122814576e-06, "loss": 0.0128, "step": 168560 }, { "epoch": 0.7033447104672413, "grad_norm": 1.221755886125557, "learning_rate": 2.3849627397256524e-06, "loss": 0.0242, "step": 168565 }, { "epoch": 0.7033655731822317, "grad_norm": 0.42032402962053234, "learning_rate": 2.3849273687436606e-06, "loss": 0.0161, "step": 168570 }, { "epoch": 0.7033864358972219, "grad_norm": 0.5512101778547329, "learning_rate": 2.3848919993353655e-06, "loss": 0.0197, "step": 168575 }, { "epoch": 0.7034072986122122, "grad_norm": 0.6223955360781697, "learning_rate": 2.3848566315006504e-06, "loss": 0.0183, "step": 168580 }, { "epoch": 0.7034281613272024, "grad_norm": 0.6859776982478236, "learning_rate": 2.384821265239398e-06, "loss": 0.0244, "step": 168585 }, { "epoch": 0.7034490240421928, "grad_norm": 0.643954350625543, "learning_rate": 2.384785900551493e-06, "loss": 0.0214, "step": 168590 }, { "epoch": 0.703469886757183, "grad_norm": 1.2296564810328692, "learning_rate": 2.3847505374368175e-06, "loss": 0.0311, "step": 168595 }, { "epoch": 0.7034907494721733, "grad_norm": 0.8016035531443435, "learning_rate": 2.3847151758952553e-06, "loss": 0.0237, "step": 168600 }, { "epoch": 0.7035116121871636, "grad_norm": 0.5890295649178674, "learning_rate": 2.38467981592669e-06, "loss": 0.0259, "step": 168605 }, { "epoch": 0.7035324749021539, "grad_norm": 0.5789488179057757, "learning_rate": 2.384644457531004e-06, "loss": 0.02, "step": 168610 }, { "epoch": 0.7035533376171441, "grad_norm": 0.4864571424149432, "learning_rate": 2.3846091007080823e-06, "loss": 0.0244, "step": 168615 }, { "epoch": 0.7035742003321345, "grad_norm": 1.4672239300831336, "learning_rate": 2.3845737454578064e-06, "loss": 0.0244, "step": 168620 }, { "epoch": 0.7035950630471247, "grad_norm": 1.057990167086911, "learning_rate": 2.3845383917800617e-06, "loss": 0.029, "step": 168625 }, { "epoch": 0.703615925762115, "grad_norm": 0.41362451184496757, "learning_rate": 2.3845030396747303e-06, "loss": 0.0235, "step": 168630 }, { "epoch": 0.7036367884771052, "grad_norm": 0.6444679540753773, "learning_rate": 2.3844676891416965e-06, "loss": 0.0199, "step": 168635 }, { "epoch": 0.7036576511920956, "grad_norm": 0.6223100679425043, "learning_rate": 2.384432340180843e-06, "loss": 0.0236, "step": 168640 }, { "epoch": 0.7036785139070858, "grad_norm": 1.9599422890759612, "learning_rate": 2.3843969927920534e-06, "loss": 0.0222, "step": 168645 }, { "epoch": 0.703699376622076, "grad_norm": 0.5228760445192117, "learning_rate": 2.384361646975211e-06, "loss": 0.0255, "step": 168650 }, { "epoch": 0.7037202393370664, "grad_norm": 0.7255097977296566, "learning_rate": 2.384326302730201e-06, "loss": 0.0246, "step": 168655 }, { "epoch": 0.7037411020520566, "grad_norm": 0.9415242230177999, "learning_rate": 2.3842909600569045e-06, "loss": 0.0233, "step": 168660 }, { "epoch": 0.7037619647670469, "grad_norm": 0.46374024842542344, "learning_rate": 2.3842556189552058e-06, "loss": 0.0212, "step": 168665 }, { "epoch": 0.7037828274820372, "grad_norm": 0.29405362139214875, "learning_rate": 2.384220279424989e-06, "loss": 0.0184, "step": 168670 }, { "epoch": 0.7038036901970275, "grad_norm": 0.6358546397025907, "learning_rate": 2.3841849414661374e-06, "loss": 0.018, "step": 168675 }, { "epoch": 0.7038245529120177, "grad_norm": 0.9019617807107252, "learning_rate": 2.3841496050785344e-06, "loss": 0.0307, "step": 168680 }, { "epoch": 0.7038454156270081, "grad_norm": 1.4080938030248253, "learning_rate": 2.384114270262064e-06, "loss": 0.0288, "step": 168685 }, { "epoch": 0.7038662783419983, "grad_norm": 0.7126987400791152, "learning_rate": 2.3840789370166094e-06, "loss": 0.0195, "step": 168690 }, { "epoch": 0.7038871410569886, "grad_norm": 0.6703836251605195, "learning_rate": 2.3840436053420536e-06, "loss": 0.0201, "step": 168695 }, { "epoch": 0.7039080037719788, "grad_norm": 1.1064642344273905, "learning_rate": 2.384008275238281e-06, "loss": 0.0345, "step": 168700 }, { "epoch": 0.7039288664869692, "grad_norm": 0.48341539539020306, "learning_rate": 2.383972946705175e-06, "loss": 0.0154, "step": 168705 }, { "epoch": 0.7039497292019594, "grad_norm": 0.7436105349750279, "learning_rate": 2.3839376197426196e-06, "loss": 0.0183, "step": 168710 }, { "epoch": 0.7039705919169497, "grad_norm": 0.4326594118563921, "learning_rate": 2.383902294350498e-06, "loss": 0.0216, "step": 168715 }, { "epoch": 0.70399145463194, "grad_norm": 0.6159648854404406, "learning_rate": 2.383866970528694e-06, "loss": 0.0228, "step": 168720 }, { "epoch": 0.7040123173469303, "grad_norm": 0.5316460300756148, "learning_rate": 2.3838316482770903e-06, "loss": 0.0218, "step": 168725 }, { "epoch": 0.7040331800619205, "grad_norm": 2.408100634430555, "learning_rate": 2.3837963275955724e-06, "loss": 0.0265, "step": 168730 }, { "epoch": 0.7040540427769109, "grad_norm": 0.8119110301893785, "learning_rate": 2.3837610084840225e-06, "loss": 0.0234, "step": 168735 }, { "epoch": 0.7040749054919011, "grad_norm": 0.964184288498926, "learning_rate": 2.383725690942326e-06, "loss": 0.0212, "step": 168740 }, { "epoch": 0.7040957682068913, "grad_norm": 0.3830707489565954, "learning_rate": 2.3836903749703645e-06, "loss": 0.018, "step": 168745 }, { "epoch": 0.7041166309218817, "grad_norm": 0.7048637892188728, "learning_rate": 2.3836550605680226e-06, "loss": 0.0252, "step": 168750 }, { "epoch": 0.704137493636872, "grad_norm": 0.7972858610379034, "learning_rate": 2.3836197477351845e-06, "loss": 0.0246, "step": 168755 }, { "epoch": 0.7041583563518622, "grad_norm": 0.5960767778752227, "learning_rate": 2.383584436471733e-06, "loss": 0.0179, "step": 168760 }, { "epoch": 0.7041792190668524, "grad_norm": 0.6197986499768071, "learning_rate": 2.3835491267775528e-06, "loss": 0.0185, "step": 168765 }, { "epoch": 0.7042000817818428, "grad_norm": 0.3959224779334154, "learning_rate": 2.3835138186525276e-06, "loss": 0.0151, "step": 168770 }, { "epoch": 0.704220944496833, "grad_norm": 0.5078532118497028, "learning_rate": 2.3834785120965407e-06, "loss": 0.0206, "step": 168775 }, { "epoch": 0.7042418072118233, "grad_norm": 0.9886127850646059, "learning_rate": 2.3834432071094757e-06, "loss": 0.0223, "step": 168780 }, { "epoch": 0.7042626699268136, "grad_norm": 0.2680651048174681, "learning_rate": 2.3834079036912173e-06, "loss": 0.0224, "step": 168785 }, { "epoch": 0.7042835326418039, "grad_norm": 0.5124275676920081, "learning_rate": 2.383372601841648e-06, "loss": 0.017, "step": 168790 }, { "epoch": 0.7043043953567941, "grad_norm": 0.5105838295016087, "learning_rate": 2.383337301560653e-06, "loss": 0.0168, "step": 168795 }, { "epoch": 0.7043252580717845, "grad_norm": 0.5856272186178815, "learning_rate": 2.383302002848116e-06, "loss": 0.0334, "step": 168800 }, { "epoch": 0.7043461207867747, "grad_norm": 0.6412786863739014, "learning_rate": 2.3832667057039195e-06, "loss": 0.0367, "step": 168805 }, { "epoch": 0.704366983501765, "grad_norm": 0.9871456241537975, "learning_rate": 2.383231410127949e-06, "loss": 0.0236, "step": 168810 }, { "epoch": 0.7043878462167552, "grad_norm": 0.6357864460201829, "learning_rate": 2.3831961161200875e-06, "loss": 0.0162, "step": 168815 }, { "epoch": 0.7044087089317456, "grad_norm": 0.34341484885424817, "learning_rate": 2.3831608236802185e-06, "loss": 0.0186, "step": 168820 }, { "epoch": 0.7044295716467358, "grad_norm": 0.3211368214011734, "learning_rate": 2.383125532808227e-06, "loss": 0.0226, "step": 168825 }, { "epoch": 0.704450434361726, "grad_norm": 0.6515218273704191, "learning_rate": 2.3830902435039964e-06, "loss": 0.0215, "step": 168830 }, { "epoch": 0.7044712970767164, "grad_norm": 0.5010997365453944, "learning_rate": 2.38305495576741e-06, "loss": 0.0252, "step": 168835 }, { "epoch": 0.7044921597917067, "grad_norm": 0.5048100145102193, "learning_rate": 2.3830196695983525e-06, "loss": 0.0212, "step": 168840 }, { "epoch": 0.7045130225066969, "grad_norm": 0.9721883304853643, "learning_rate": 2.3829843849967083e-06, "loss": 0.0223, "step": 168845 }, { "epoch": 0.7045338852216873, "grad_norm": 0.9381676872602156, "learning_rate": 2.3829491019623605e-06, "loss": 0.0232, "step": 168850 }, { "epoch": 0.7045547479366775, "grad_norm": 0.6680406535779934, "learning_rate": 2.382913820495193e-06, "loss": 0.0205, "step": 168855 }, { "epoch": 0.7045756106516677, "grad_norm": 0.9161275668461886, "learning_rate": 2.3828785405950904e-06, "loss": 0.0222, "step": 168860 }, { "epoch": 0.7045964733666581, "grad_norm": 0.2907048073598829, "learning_rate": 2.3828432622619364e-06, "loss": 0.0174, "step": 168865 }, { "epoch": 0.7046173360816483, "grad_norm": 0.534925305332208, "learning_rate": 2.382807985495615e-06, "loss": 0.0321, "step": 168870 }, { "epoch": 0.7046381987966386, "grad_norm": 0.647859327621103, "learning_rate": 2.3827727102960103e-06, "loss": 0.0178, "step": 168875 }, { "epoch": 0.7046590615116288, "grad_norm": 0.969153582345477, "learning_rate": 2.3827374366630056e-06, "loss": 0.0208, "step": 168880 }, { "epoch": 0.7046799242266192, "grad_norm": 0.9817167731412425, "learning_rate": 2.3827021645964864e-06, "loss": 0.0267, "step": 168885 }, { "epoch": 0.7047007869416094, "grad_norm": 0.3348982680279646, "learning_rate": 2.3826668940963356e-06, "loss": 0.0185, "step": 168890 }, { "epoch": 0.7047216496565997, "grad_norm": 0.45792009474378953, "learning_rate": 2.3826316251624377e-06, "loss": 0.0176, "step": 168895 }, { "epoch": 0.70474251237159, "grad_norm": 1.3862168921589282, "learning_rate": 2.3825963577946775e-06, "loss": 0.0225, "step": 168900 }, { "epoch": 0.7047633750865803, "grad_norm": 0.5362999244179573, "learning_rate": 2.382561091992937e-06, "loss": 0.0163, "step": 168905 }, { "epoch": 0.7047842378015705, "grad_norm": 0.8144110143597609, "learning_rate": 2.3825258277571024e-06, "loss": 0.0336, "step": 168910 }, { "epoch": 0.7048051005165609, "grad_norm": 0.8558868104391688, "learning_rate": 2.3824905650870577e-06, "loss": 0.0212, "step": 168915 }, { "epoch": 0.7048259632315511, "grad_norm": 0.552839994657662, "learning_rate": 2.3824553039826855e-06, "loss": 0.0271, "step": 168920 }, { "epoch": 0.7048468259465414, "grad_norm": 0.5532218549353889, "learning_rate": 2.3824200444438704e-06, "loss": 0.0278, "step": 168925 }, { "epoch": 0.7048676886615317, "grad_norm": 1.3126661488876834, "learning_rate": 2.3823847864704975e-06, "loss": 0.0257, "step": 168930 }, { "epoch": 0.704888551376522, "grad_norm": 0.5081125138004988, "learning_rate": 2.3823495300624512e-06, "loss": 0.0179, "step": 168935 }, { "epoch": 0.7049094140915122, "grad_norm": 0.4872358465724406, "learning_rate": 2.382314275219614e-06, "loss": 0.0208, "step": 168940 }, { "epoch": 0.7049302768065024, "grad_norm": 0.5117289510622997, "learning_rate": 2.382279021941871e-06, "loss": 0.0152, "step": 168945 }, { "epoch": 0.7049511395214928, "grad_norm": 1.0461022101497528, "learning_rate": 2.3822437702291063e-06, "loss": 0.0322, "step": 168950 }, { "epoch": 0.704972002236483, "grad_norm": 0.6494892705520721, "learning_rate": 2.382208520081205e-06, "loss": 0.0193, "step": 168955 }, { "epoch": 0.7049928649514733, "grad_norm": 0.4459203499995047, "learning_rate": 2.3821732714980502e-06, "loss": 0.0184, "step": 168960 }, { "epoch": 0.7050137276664636, "grad_norm": 0.7897670249109591, "learning_rate": 2.3821380244795264e-06, "loss": 0.0282, "step": 168965 }, { "epoch": 0.7050345903814539, "grad_norm": 0.6977281728989008, "learning_rate": 2.382102779025518e-06, "loss": 0.0267, "step": 168970 }, { "epoch": 0.7050554530964441, "grad_norm": 0.8115683365042278, "learning_rate": 2.382067535135909e-06, "loss": 0.0194, "step": 168975 }, { "epoch": 0.7050763158114345, "grad_norm": 0.4823646285980482, "learning_rate": 2.3820322928105848e-06, "loss": 0.0207, "step": 168980 }, { "epoch": 0.7050971785264247, "grad_norm": 0.5482859524777324, "learning_rate": 2.3819970520494275e-06, "loss": 0.0227, "step": 168985 }, { "epoch": 0.705118041241415, "grad_norm": 1.3587079582296124, "learning_rate": 2.3819618128523233e-06, "loss": 0.0254, "step": 168990 }, { "epoch": 0.7051389039564052, "grad_norm": 0.3811084881166138, "learning_rate": 2.3819265752191558e-06, "loss": 0.0158, "step": 168995 }, { "epoch": 0.7051597666713956, "grad_norm": 0.7269387427626735, "learning_rate": 2.3818913391498097e-06, "loss": 0.0172, "step": 169000 }, { "epoch": 0.7051806293863858, "grad_norm": 0.9387843450425671, "learning_rate": 2.3818561046441686e-06, "loss": 0.0188, "step": 169005 }, { "epoch": 0.7052014921013761, "grad_norm": 0.6282474322127856, "learning_rate": 2.3818208717021177e-06, "loss": 0.0189, "step": 169010 }, { "epoch": 0.7052223548163664, "grad_norm": 1.1081544530272276, "learning_rate": 2.3817856403235405e-06, "loss": 0.0307, "step": 169015 }, { "epoch": 0.7052432175313567, "grad_norm": 0.6958785146850364, "learning_rate": 2.3817504105083224e-06, "loss": 0.0373, "step": 169020 }, { "epoch": 0.7052640802463469, "grad_norm": 0.5227707235850725, "learning_rate": 2.3817151822563467e-06, "loss": 0.0209, "step": 169025 }, { "epoch": 0.7052849429613373, "grad_norm": 0.3368744272022308, "learning_rate": 2.3816799555674983e-06, "loss": 0.0194, "step": 169030 }, { "epoch": 0.7053058056763275, "grad_norm": 0.6522631088001184, "learning_rate": 2.3816447304416625e-06, "loss": 0.0229, "step": 169035 }, { "epoch": 0.7053266683913177, "grad_norm": 0.8629594960470001, "learning_rate": 2.381609506878722e-06, "loss": 0.0223, "step": 169040 }, { "epoch": 0.7053475311063081, "grad_norm": 0.7255688283208828, "learning_rate": 2.3815742848785617e-06, "loss": 0.0195, "step": 169045 }, { "epoch": 0.7053683938212983, "grad_norm": 1.0400625964682497, "learning_rate": 2.381539064441067e-06, "loss": 0.0167, "step": 169050 }, { "epoch": 0.7053892565362886, "grad_norm": 0.9703084532192541, "learning_rate": 2.3815038455661215e-06, "loss": 0.0172, "step": 169055 }, { "epoch": 0.7054101192512788, "grad_norm": 0.5980799016348257, "learning_rate": 2.3814686282536102e-06, "loss": 0.021, "step": 169060 }, { "epoch": 0.7054309819662692, "grad_norm": 0.5645906403249706, "learning_rate": 2.3814334125034165e-06, "loss": 0.0292, "step": 169065 }, { "epoch": 0.7054518446812594, "grad_norm": 0.39710127923582605, "learning_rate": 2.381398198315427e-06, "loss": 0.0168, "step": 169070 }, { "epoch": 0.7054727073962497, "grad_norm": 0.5387165564833801, "learning_rate": 2.381362985689524e-06, "loss": 0.0495, "step": 169075 }, { "epoch": 0.70549357011124, "grad_norm": 0.8684146668930516, "learning_rate": 2.381327774625593e-06, "loss": 0.0184, "step": 169080 }, { "epoch": 0.7055144328262303, "grad_norm": 0.6005324317623836, "learning_rate": 2.381292565123518e-06, "loss": 0.0215, "step": 169085 }, { "epoch": 0.7055352955412205, "grad_norm": 1.245978240946068, "learning_rate": 2.381257357183185e-06, "loss": 0.0266, "step": 169090 }, { "epoch": 0.7055561582562109, "grad_norm": 0.28248423084957, "learning_rate": 2.3812221508044766e-06, "loss": 0.0166, "step": 169095 }, { "epoch": 0.7055770209712011, "grad_norm": 0.7233699678585358, "learning_rate": 2.381186945987279e-06, "loss": 0.0224, "step": 169100 }, { "epoch": 0.7055978836861914, "grad_norm": 0.4559240237538093, "learning_rate": 2.3811517427314754e-06, "loss": 0.0237, "step": 169105 }, { "epoch": 0.7056187464011817, "grad_norm": 0.32573048801875243, "learning_rate": 2.3811165410369515e-06, "loss": 0.02, "step": 169110 }, { "epoch": 0.705639609116172, "grad_norm": 0.9942729898702699, "learning_rate": 2.381081340903591e-06, "loss": 0.0259, "step": 169115 }, { "epoch": 0.7056604718311622, "grad_norm": 0.6203197370909007, "learning_rate": 2.3810461423312797e-06, "loss": 0.0133, "step": 169120 }, { "epoch": 0.7056813345461525, "grad_norm": 0.9521048103254417, "learning_rate": 2.3810109453199006e-06, "loss": 0.0231, "step": 169125 }, { "epoch": 0.7057021972611428, "grad_norm": 0.9501676673787938, "learning_rate": 2.3809757498693397e-06, "loss": 0.0239, "step": 169130 }, { "epoch": 0.705723059976133, "grad_norm": 0.5371474024863433, "learning_rate": 2.3809405559794807e-06, "loss": 0.0286, "step": 169135 }, { "epoch": 0.7057439226911233, "grad_norm": 0.6563060528942494, "learning_rate": 2.3809053636502096e-06, "loss": 0.0232, "step": 169140 }, { "epoch": 0.7057647854061136, "grad_norm": 0.29025331938298254, "learning_rate": 2.380870172881409e-06, "loss": 0.02, "step": 169145 }, { "epoch": 0.7057856481211039, "grad_norm": 1.0745207284599994, "learning_rate": 2.3808349836729657e-06, "loss": 0.0272, "step": 169150 }, { "epoch": 0.7058065108360941, "grad_norm": 0.4525714224756635, "learning_rate": 2.3807997960247633e-06, "loss": 0.0185, "step": 169155 }, { "epoch": 0.7058273735510845, "grad_norm": 0.5333307196226311, "learning_rate": 2.380764609936686e-06, "loss": 0.0309, "step": 169160 }, { "epoch": 0.7058482362660747, "grad_norm": 0.608903856500514, "learning_rate": 2.3807294254086195e-06, "loss": 0.0211, "step": 169165 }, { "epoch": 0.705869098981065, "grad_norm": 0.6149684479052109, "learning_rate": 2.3806942424404487e-06, "loss": 0.019, "step": 169170 }, { "epoch": 0.7058899616960552, "grad_norm": 0.45325771444335267, "learning_rate": 2.380659061032057e-06, "loss": 0.0138, "step": 169175 }, { "epoch": 0.7059108244110456, "grad_norm": 0.2544514945353157, "learning_rate": 2.380623881183331e-06, "loss": 0.0136, "step": 169180 }, { "epoch": 0.7059316871260358, "grad_norm": 0.39781433620467677, "learning_rate": 2.3805887028941534e-06, "loss": 0.0196, "step": 169185 }, { "epoch": 0.7059525498410261, "grad_norm": 0.5888065586671254, "learning_rate": 2.3805535261644103e-06, "loss": 0.0194, "step": 169190 }, { "epoch": 0.7059734125560164, "grad_norm": 0.6690985931906083, "learning_rate": 2.3805183509939866e-06, "loss": 0.0156, "step": 169195 }, { "epoch": 0.7059942752710067, "grad_norm": 0.5964121889156597, "learning_rate": 2.3804831773827663e-06, "loss": 0.0238, "step": 169200 }, { "epoch": 0.7060151379859969, "grad_norm": 0.8946775028644698, "learning_rate": 2.380448005330635e-06, "loss": 0.0242, "step": 169205 }, { "epoch": 0.7060360007009873, "grad_norm": 0.27927973325543826, "learning_rate": 2.380412834837477e-06, "loss": 0.0179, "step": 169210 }, { "epoch": 0.7060568634159775, "grad_norm": 1.2692288980602155, "learning_rate": 2.380377665903177e-06, "loss": 0.0302, "step": 169215 }, { "epoch": 0.7060777261309678, "grad_norm": 0.17509272783923654, "learning_rate": 2.3803424985276207e-06, "loss": 0.023, "step": 169220 }, { "epoch": 0.7060985888459581, "grad_norm": 0.6756984534875153, "learning_rate": 2.3803073327106922e-06, "loss": 0.0251, "step": 169225 }, { "epoch": 0.7061194515609484, "grad_norm": 1.4258916775616841, "learning_rate": 2.3802721684522764e-06, "loss": 0.0307, "step": 169230 }, { "epoch": 0.7061403142759386, "grad_norm": 0.7754303748971496, "learning_rate": 2.380237005752259e-06, "loss": 0.0282, "step": 169235 }, { "epoch": 0.7061611769909288, "grad_norm": 0.9507106697293183, "learning_rate": 2.3802018446105236e-06, "loss": 0.0205, "step": 169240 }, { "epoch": 0.7061820397059192, "grad_norm": 0.2882533517398253, "learning_rate": 2.3801666850269562e-06, "loss": 0.0233, "step": 169245 }, { "epoch": 0.7062029024209094, "grad_norm": 0.4958753442267557, "learning_rate": 2.3801315270014404e-06, "loss": 0.0325, "step": 169250 }, { "epoch": 0.7062237651358997, "grad_norm": 0.6872260788692731, "learning_rate": 2.3800963705338633e-06, "loss": 0.0221, "step": 169255 }, { "epoch": 0.70624462785089, "grad_norm": 0.5674468738055803, "learning_rate": 2.3800612156241073e-06, "loss": 0.0215, "step": 169260 }, { "epoch": 0.7062654905658803, "grad_norm": 0.40781618940510966, "learning_rate": 2.3800260622720593e-06, "loss": 0.012, "step": 169265 }, { "epoch": 0.7062863532808705, "grad_norm": 0.7999894738236415, "learning_rate": 2.3799909104776033e-06, "loss": 0.0195, "step": 169270 }, { "epoch": 0.7063072159958609, "grad_norm": 0.8471389373772702, "learning_rate": 2.379955760240625e-06, "loss": 0.0289, "step": 169275 }, { "epoch": 0.7063280787108511, "grad_norm": 0.8283441457696143, "learning_rate": 2.3799206115610083e-06, "loss": 0.0188, "step": 169280 }, { "epoch": 0.7063489414258414, "grad_norm": 0.7607408471513258, "learning_rate": 2.3798854644386395e-06, "loss": 0.0266, "step": 169285 }, { "epoch": 0.7063698041408317, "grad_norm": 0.5101995497301838, "learning_rate": 2.3798503188734026e-06, "loss": 0.0151, "step": 169290 }, { "epoch": 0.706390666855822, "grad_norm": 0.963353982669568, "learning_rate": 2.379815174865183e-06, "loss": 0.033, "step": 169295 }, { "epoch": 0.7064115295708122, "grad_norm": 0.5924177510875613, "learning_rate": 2.379780032413866e-06, "loss": 0.0261, "step": 169300 }, { "epoch": 0.7064323922858025, "grad_norm": 0.8740380614594107, "learning_rate": 2.379744891519336e-06, "loss": 0.0209, "step": 169305 }, { "epoch": 0.7064532550007928, "grad_norm": 0.6510056994433333, "learning_rate": 2.3797097521814786e-06, "loss": 0.0293, "step": 169310 }, { "epoch": 0.7064741177157831, "grad_norm": 0.7112131340219572, "learning_rate": 2.3796746144001785e-06, "loss": 0.0212, "step": 169315 }, { "epoch": 0.7064949804307733, "grad_norm": 0.5420004618493967, "learning_rate": 2.3796394781753213e-06, "loss": 0.0263, "step": 169320 }, { "epoch": 0.7065158431457637, "grad_norm": 0.5454617711309467, "learning_rate": 2.379604343506792e-06, "loss": 0.0175, "step": 169325 }, { "epoch": 0.7065367058607539, "grad_norm": 0.6024184747664343, "learning_rate": 2.379569210394475e-06, "loss": 0.0139, "step": 169330 }, { "epoch": 0.7065575685757441, "grad_norm": 0.5965720607797615, "learning_rate": 2.3795340788382564e-06, "loss": 0.0275, "step": 169335 }, { "epoch": 0.7065784312907345, "grad_norm": 0.7179337714734876, "learning_rate": 2.3794989488380208e-06, "loss": 0.0144, "step": 169340 }, { "epoch": 0.7065992940057247, "grad_norm": 1.2727314909066587, "learning_rate": 2.3794638203936534e-06, "loss": 0.0265, "step": 169345 }, { "epoch": 0.706620156720715, "grad_norm": 0.5270327961922555, "learning_rate": 2.379428693505039e-06, "loss": 0.0168, "step": 169350 }, { "epoch": 0.7066410194357052, "grad_norm": 0.6779647185829627, "learning_rate": 2.3793935681720633e-06, "loss": 0.0163, "step": 169355 }, { "epoch": 0.7066618821506956, "grad_norm": 0.4155127205221016, "learning_rate": 2.3793584443946115e-06, "loss": 0.0169, "step": 169360 }, { "epoch": 0.7066827448656858, "grad_norm": 0.4498864597381262, "learning_rate": 2.3793233221725685e-06, "loss": 0.0228, "step": 169365 }, { "epoch": 0.7067036075806761, "grad_norm": 0.8335545594930276, "learning_rate": 2.37928820150582e-06, "loss": 0.0268, "step": 169370 }, { "epoch": 0.7067244702956664, "grad_norm": 0.9362380096114002, "learning_rate": 2.3792530823942503e-06, "loss": 0.03, "step": 169375 }, { "epoch": 0.7067453330106567, "grad_norm": 0.32365671810098295, "learning_rate": 2.379217964837745e-06, "loss": 0.0177, "step": 169380 }, { "epoch": 0.7067661957256469, "grad_norm": 0.39294490010886846, "learning_rate": 2.37918284883619e-06, "loss": 0.0196, "step": 169385 }, { "epoch": 0.7067870584406373, "grad_norm": 1.1099061113023734, "learning_rate": 2.37914773438947e-06, "loss": 0.0224, "step": 169390 }, { "epoch": 0.7068079211556275, "grad_norm": 0.5583236270057325, "learning_rate": 2.3791126214974702e-06, "loss": 0.0197, "step": 169395 }, { "epoch": 0.7068287838706178, "grad_norm": 0.6229359308852752, "learning_rate": 2.379077510160076e-06, "loss": 0.0189, "step": 169400 }, { "epoch": 0.7068496465856081, "grad_norm": 0.8410113134051217, "learning_rate": 2.3790424003771724e-06, "loss": 0.0207, "step": 169405 }, { "epoch": 0.7068705093005984, "grad_norm": 0.7354473993403954, "learning_rate": 2.3790072921486456e-06, "loss": 0.0188, "step": 169410 }, { "epoch": 0.7068913720155886, "grad_norm": 0.5091452355896702, "learning_rate": 2.3789721854743798e-06, "loss": 0.0213, "step": 169415 }, { "epoch": 0.7069122347305788, "grad_norm": 0.6296186244334807, "learning_rate": 2.378937080354261e-06, "loss": 0.0188, "step": 169420 }, { "epoch": 0.7069330974455692, "grad_norm": 0.79600951770007, "learning_rate": 2.3789019767881745e-06, "loss": 0.0321, "step": 169425 }, { "epoch": 0.7069539601605594, "grad_norm": 0.6170153664430439, "learning_rate": 2.378866874776005e-06, "loss": 0.0165, "step": 169430 }, { "epoch": 0.7069748228755497, "grad_norm": 0.6144226506839274, "learning_rate": 2.3788317743176385e-06, "loss": 0.0188, "step": 169435 }, { "epoch": 0.70699568559054, "grad_norm": 0.5055797020607464, "learning_rate": 2.3787966754129606e-06, "loss": 0.0244, "step": 169440 }, { "epoch": 0.7070165483055303, "grad_norm": 0.45758386672058593, "learning_rate": 2.378761578061856e-06, "loss": 0.0265, "step": 169445 }, { "epoch": 0.7070374110205205, "grad_norm": 0.6427154895880789, "learning_rate": 2.3787264822642107e-06, "loss": 0.0235, "step": 169450 }, { "epoch": 0.7070582737355109, "grad_norm": 1.1101816994651919, "learning_rate": 2.37869138801991e-06, "loss": 0.0264, "step": 169455 }, { "epoch": 0.7070791364505011, "grad_norm": 0.4286279351269616, "learning_rate": 2.3786562953288383e-06, "loss": 0.0238, "step": 169460 }, { "epoch": 0.7070999991654914, "grad_norm": 0.49605647126125463, "learning_rate": 2.3786212041908825e-06, "loss": 0.021, "step": 169465 }, { "epoch": 0.7071208618804817, "grad_norm": 0.7910355849881018, "learning_rate": 2.3785861146059274e-06, "loss": 0.0259, "step": 169470 }, { "epoch": 0.707141724595472, "grad_norm": 0.8490822749347934, "learning_rate": 2.3785510265738585e-06, "loss": 0.0184, "step": 169475 }, { "epoch": 0.7071625873104622, "grad_norm": 0.3496771652399938, "learning_rate": 2.378515940094561e-06, "loss": 0.03, "step": 169480 }, { "epoch": 0.7071834500254525, "grad_norm": 0.9077845180491337, "learning_rate": 2.3784808551679208e-06, "loss": 0.0187, "step": 169485 }, { "epoch": 0.7072043127404428, "grad_norm": 0.4224312054700116, "learning_rate": 2.3784457717938224e-06, "loss": 0.026, "step": 169490 }, { "epoch": 0.7072251754554331, "grad_norm": 0.5109490157054599, "learning_rate": 2.378410689972153e-06, "loss": 0.0216, "step": 169495 }, { "epoch": 0.7072460381704233, "grad_norm": 0.5539749510080731, "learning_rate": 2.3783756097027967e-06, "loss": 0.015, "step": 169500 }, { "epoch": 0.7072669008854137, "grad_norm": 0.5524141667239967, "learning_rate": 2.3783405309856397e-06, "loss": 0.0252, "step": 169505 }, { "epoch": 0.7072877636004039, "grad_norm": 1.1801379717753695, "learning_rate": 2.3783054538205673e-06, "loss": 0.0263, "step": 169510 }, { "epoch": 0.7073086263153942, "grad_norm": 0.6061227720645979, "learning_rate": 2.3782703782074655e-06, "loss": 0.0195, "step": 169515 }, { "epoch": 0.7073294890303845, "grad_norm": 0.39176586747466474, "learning_rate": 2.3782353041462187e-06, "loss": 0.019, "step": 169520 }, { "epoch": 0.7073503517453748, "grad_norm": 1.0663521920945471, "learning_rate": 2.378200231636714e-06, "loss": 0.0219, "step": 169525 }, { "epoch": 0.707371214460365, "grad_norm": 0.6511968924729816, "learning_rate": 2.378165160678836e-06, "loss": 0.0219, "step": 169530 }, { "epoch": 0.7073920771753552, "grad_norm": 0.5842164079649856, "learning_rate": 2.3781300912724703e-06, "loss": 0.0241, "step": 169535 }, { "epoch": 0.7074129398903456, "grad_norm": 1.1815189554086367, "learning_rate": 2.378095023417503e-06, "loss": 0.0253, "step": 169540 }, { "epoch": 0.7074338026053358, "grad_norm": 0.45292429959171093, "learning_rate": 2.3780599571138193e-06, "loss": 0.0236, "step": 169545 }, { "epoch": 0.7074546653203261, "grad_norm": 0.6845965624672624, "learning_rate": 2.378024892361305e-06, "loss": 0.024, "step": 169550 }, { "epoch": 0.7074755280353164, "grad_norm": 0.6960725209469161, "learning_rate": 2.377989829159846e-06, "loss": 0.0215, "step": 169555 }, { "epoch": 0.7074963907503067, "grad_norm": 0.3000161362580032, "learning_rate": 2.377954767509327e-06, "loss": 0.0216, "step": 169560 }, { "epoch": 0.7075172534652969, "grad_norm": 0.8987215904911834, "learning_rate": 2.3779197074096346e-06, "loss": 0.0219, "step": 169565 }, { "epoch": 0.7075381161802873, "grad_norm": 1.1014346158019643, "learning_rate": 2.377884648860655e-06, "loss": 0.0188, "step": 169570 }, { "epoch": 0.7075589788952775, "grad_norm": 0.566587123759313, "learning_rate": 2.377849591862272e-06, "loss": 0.0242, "step": 169575 }, { "epoch": 0.7075798416102678, "grad_norm": 0.2967264295017911, "learning_rate": 2.377814536414373e-06, "loss": 0.0169, "step": 169580 }, { "epoch": 0.7076007043252581, "grad_norm": 0.32749964252794855, "learning_rate": 2.377779482516843e-06, "loss": 0.0172, "step": 169585 }, { "epoch": 0.7076215670402484, "grad_norm": 0.351626634561308, "learning_rate": 2.3777444301695672e-06, "loss": 0.0191, "step": 169590 }, { "epoch": 0.7076424297552386, "grad_norm": 0.3094751529899767, "learning_rate": 2.3777093793724327e-06, "loss": 0.0216, "step": 169595 }, { "epoch": 0.7076632924702289, "grad_norm": 0.8674168324373022, "learning_rate": 2.377674330125325e-06, "loss": 0.0258, "step": 169600 }, { "epoch": 0.7076841551852192, "grad_norm": 0.6089885773195292, "learning_rate": 2.3776392824281285e-06, "loss": 0.0235, "step": 169605 }, { "epoch": 0.7077050179002095, "grad_norm": 0.33224038152599406, "learning_rate": 2.3776042362807305e-06, "loss": 0.0122, "step": 169610 }, { "epoch": 0.7077258806151997, "grad_norm": 0.6240243050633767, "learning_rate": 2.377569191683016e-06, "loss": 0.0205, "step": 169615 }, { "epoch": 0.70774674333019, "grad_norm": 0.7720055963370859, "learning_rate": 2.3775341486348706e-06, "loss": 0.0206, "step": 169620 }, { "epoch": 0.7077676060451803, "grad_norm": 0.45946895781705754, "learning_rate": 2.377499107136181e-06, "loss": 0.0195, "step": 169625 }, { "epoch": 0.7077884687601705, "grad_norm": 1.0818455174968398, "learning_rate": 2.3774640671868316e-06, "loss": 0.016, "step": 169630 }, { "epoch": 0.7078093314751609, "grad_norm": 0.5081291222877015, "learning_rate": 2.37742902878671e-06, "loss": 0.0229, "step": 169635 }, { "epoch": 0.7078301941901511, "grad_norm": 0.6137927601167986, "learning_rate": 2.3773939919357e-06, "loss": 0.0156, "step": 169640 }, { "epoch": 0.7078510569051414, "grad_norm": 0.7436869667694679, "learning_rate": 2.37735895663369e-06, "loss": 0.0208, "step": 169645 }, { "epoch": 0.7078719196201317, "grad_norm": 0.35634063595880416, "learning_rate": 2.377323922880564e-06, "loss": 0.0202, "step": 169650 }, { "epoch": 0.707892782335122, "grad_norm": 0.7312737156444692, "learning_rate": 2.3772888906762082e-06, "loss": 0.0214, "step": 169655 }, { "epoch": 0.7079136450501122, "grad_norm": 0.6337407882723367, "learning_rate": 2.377253860020509e-06, "loss": 0.0196, "step": 169660 }, { "epoch": 0.7079345077651025, "grad_norm": 0.4728661340159496, "learning_rate": 2.3772188309133507e-06, "loss": 0.0217, "step": 169665 }, { "epoch": 0.7079553704800928, "grad_norm": 0.4854763343049164, "learning_rate": 2.377183803354622e-06, "loss": 0.0164, "step": 169670 }, { "epoch": 0.7079762331950831, "grad_norm": 0.9137227124002957, "learning_rate": 2.3771487773442057e-06, "loss": 0.0226, "step": 169675 }, { "epoch": 0.7079970959100733, "grad_norm": 0.6815774677622046, "learning_rate": 2.3771137528819904e-06, "loss": 0.0181, "step": 169680 }, { "epoch": 0.7080179586250637, "grad_norm": 0.7160022877287952, "learning_rate": 2.3770787299678612e-06, "loss": 0.0307, "step": 169685 }, { "epoch": 0.7080388213400539, "grad_norm": 0.5966001557078794, "learning_rate": 2.3770437086017034e-06, "loss": 0.0209, "step": 169690 }, { "epoch": 0.7080596840550442, "grad_norm": 0.6131589894067278, "learning_rate": 2.377008688783403e-06, "loss": 0.018, "step": 169695 }, { "epoch": 0.7080805467700345, "grad_norm": 0.7977653731167151, "learning_rate": 2.376973670512847e-06, "loss": 0.034, "step": 169700 }, { "epoch": 0.7081014094850248, "grad_norm": 0.3002049682155869, "learning_rate": 2.3769386537899205e-06, "loss": 0.0171, "step": 169705 }, { "epoch": 0.708122272200015, "grad_norm": 0.8632840648395121, "learning_rate": 2.3769036386145096e-06, "loss": 0.0206, "step": 169710 }, { "epoch": 0.7081431349150052, "grad_norm": 1.3338355479468815, "learning_rate": 2.376868624986501e-06, "loss": 0.0255, "step": 169715 }, { "epoch": 0.7081639976299956, "grad_norm": 0.7807749337021601, "learning_rate": 2.37683361290578e-06, "loss": 0.0187, "step": 169720 }, { "epoch": 0.7081848603449858, "grad_norm": 0.6346493043350706, "learning_rate": 2.376798602372233e-06, "loss": 0.0193, "step": 169725 }, { "epoch": 0.7082057230599761, "grad_norm": 0.8790984049711461, "learning_rate": 2.376763593385746e-06, "loss": 0.0234, "step": 169730 }, { "epoch": 0.7082265857749664, "grad_norm": 0.3324493435511388, "learning_rate": 2.3767285859462043e-06, "loss": 0.022, "step": 169735 }, { "epoch": 0.7082474484899567, "grad_norm": 0.6947994973299991, "learning_rate": 2.3766935800534953e-06, "loss": 0.0193, "step": 169740 }, { "epoch": 0.7082683112049469, "grad_norm": 1.033483421674654, "learning_rate": 2.376658575707504e-06, "loss": 0.0243, "step": 169745 }, { "epoch": 0.7082891739199373, "grad_norm": 0.5415994043402566, "learning_rate": 2.3766235729081178e-06, "loss": 0.0199, "step": 169750 }, { "epoch": 0.7083100366349275, "grad_norm": 0.6386344568639574, "learning_rate": 2.3765885716552215e-06, "loss": 0.0197, "step": 169755 }, { "epoch": 0.7083308993499178, "grad_norm": 0.7232323548854371, "learning_rate": 2.376553571948702e-06, "loss": 0.0259, "step": 169760 }, { "epoch": 0.7083517620649081, "grad_norm": 0.4226906187158998, "learning_rate": 2.3765185737884445e-06, "loss": 0.0259, "step": 169765 }, { "epoch": 0.7083726247798984, "grad_norm": 0.7985678673619167, "learning_rate": 2.376483577174337e-06, "loss": 0.018, "step": 169770 }, { "epoch": 0.7083934874948886, "grad_norm": 0.9391374839840573, "learning_rate": 2.376448582106263e-06, "loss": 0.0246, "step": 169775 }, { "epoch": 0.7084143502098789, "grad_norm": 0.7000725448508001, "learning_rate": 2.37641358858411e-06, "loss": 0.0402, "step": 169780 }, { "epoch": 0.7084352129248692, "grad_norm": 0.8822197011907261, "learning_rate": 2.3763785966077653e-06, "loss": 0.0231, "step": 169785 }, { "epoch": 0.7084560756398595, "grad_norm": 0.5085026723998503, "learning_rate": 2.376343606177114e-06, "loss": 0.0137, "step": 169790 }, { "epoch": 0.7084769383548497, "grad_norm": 0.5730123670999634, "learning_rate": 2.3763086172920424e-06, "loss": 0.0224, "step": 169795 }, { "epoch": 0.7084978010698401, "grad_norm": 0.9135537598387445, "learning_rate": 2.376273629952437e-06, "loss": 0.0221, "step": 169800 }, { "epoch": 0.7085186637848303, "grad_norm": 0.9088892925868083, "learning_rate": 2.376238644158183e-06, "loss": 0.0251, "step": 169805 }, { "epoch": 0.7085395264998205, "grad_norm": 0.8600861108978668, "learning_rate": 2.3762036599091675e-06, "loss": 0.0181, "step": 169810 }, { "epoch": 0.7085603892148109, "grad_norm": 0.6224982120496337, "learning_rate": 2.376168677205277e-06, "loss": 0.0222, "step": 169815 }, { "epoch": 0.7085812519298011, "grad_norm": 0.7687205177631916, "learning_rate": 2.3761336960463973e-06, "loss": 0.0191, "step": 169820 }, { "epoch": 0.7086021146447914, "grad_norm": 0.4912963466487813, "learning_rate": 2.376098716432415e-06, "loss": 0.0178, "step": 169825 }, { "epoch": 0.7086229773597817, "grad_norm": 0.5373676826636171, "learning_rate": 2.376063738363216e-06, "loss": 0.0165, "step": 169830 }, { "epoch": 0.708643840074772, "grad_norm": 0.710972398347605, "learning_rate": 2.376028761838687e-06, "loss": 0.0219, "step": 169835 }, { "epoch": 0.7086647027897622, "grad_norm": 0.7054076648628498, "learning_rate": 2.375993786858714e-06, "loss": 0.0223, "step": 169840 }, { "epoch": 0.7086855655047525, "grad_norm": 0.30374894379786277, "learning_rate": 2.375958813423183e-06, "loss": 0.017, "step": 169845 }, { "epoch": 0.7087064282197428, "grad_norm": 0.7665759203955379, "learning_rate": 2.375923841531981e-06, "loss": 0.0205, "step": 169850 }, { "epoch": 0.7087272909347331, "grad_norm": 0.9835888595586966, "learning_rate": 2.375888871184995e-06, "loss": 0.0297, "step": 169855 }, { "epoch": 0.7087481536497233, "grad_norm": 0.7054699802501962, "learning_rate": 2.375853902382109e-06, "loss": 0.0162, "step": 169860 }, { "epoch": 0.7087690163647137, "grad_norm": 0.302775687118512, "learning_rate": 2.375818935123212e-06, "loss": 0.0157, "step": 169865 }, { "epoch": 0.7087898790797039, "grad_norm": 0.8822252325527019, "learning_rate": 2.375783969408189e-06, "loss": 0.0231, "step": 169870 }, { "epoch": 0.7088107417946942, "grad_norm": 0.25443123175611887, "learning_rate": 2.3757490052369263e-06, "loss": 0.0105, "step": 169875 }, { "epoch": 0.7088316045096845, "grad_norm": 1.1438449193423341, "learning_rate": 2.3757140426093105e-06, "loss": 0.0376, "step": 169880 }, { "epoch": 0.7088524672246748, "grad_norm": 0.5988185335860482, "learning_rate": 2.375679081525229e-06, "loss": 0.026, "step": 169885 }, { "epoch": 0.708873329939665, "grad_norm": 0.8838782334452275, "learning_rate": 2.375644121984566e-06, "loss": 0.0261, "step": 169890 }, { "epoch": 0.7088941926546553, "grad_norm": 0.5312235487389533, "learning_rate": 2.375609163987211e-06, "loss": 0.0306, "step": 169895 }, { "epoch": 0.7089150553696456, "grad_norm": 1.0061655365690196, "learning_rate": 2.3755742075330475e-06, "loss": 0.0224, "step": 169900 }, { "epoch": 0.7089359180846359, "grad_norm": 1.1804276455104288, "learning_rate": 2.375539252621963e-06, "loss": 0.0261, "step": 169905 }, { "epoch": 0.7089567807996261, "grad_norm": 0.3867043614022348, "learning_rate": 2.3755042992538456e-06, "loss": 0.0235, "step": 169910 }, { "epoch": 0.7089776435146165, "grad_norm": 0.8287579167955784, "learning_rate": 2.37546934742858e-06, "loss": 0.0238, "step": 169915 }, { "epoch": 0.7089985062296067, "grad_norm": 0.4776699463386418, "learning_rate": 2.375434397146052e-06, "loss": 0.0214, "step": 169920 }, { "epoch": 0.7090193689445969, "grad_norm": 0.7219001403619469, "learning_rate": 2.3753994484061505e-06, "loss": 0.0189, "step": 169925 }, { "epoch": 0.7090402316595873, "grad_norm": 0.5261465478658005, "learning_rate": 2.37536450120876e-06, "loss": 0.0209, "step": 169930 }, { "epoch": 0.7090610943745775, "grad_norm": 0.4743562661994226, "learning_rate": 2.375329555553768e-06, "loss": 0.0226, "step": 169935 }, { "epoch": 0.7090819570895678, "grad_norm": 0.8842917256923565, "learning_rate": 2.3752946114410606e-06, "loss": 0.0222, "step": 169940 }, { "epoch": 0.7091028198045581, "grad_norm": 1.3336602434079672, "learning_rate": 2.3752596688705247e-06, "loss": 0.0638, "step": 169945 }, { "epoch": 0.7091236825195484, "grad_norm": 0.634451353398845, "learning_rate": 2.375224727842047e-06, "loss": 0.0246, "step": 169950 }, { "epoch": 0.7091445452345386, "grad_norm": 0.6335983307989821, "learning_rate": 2.3751897883555137e-06, "loss": 0.0242, "step": 169955 }, { "epoch": 0.7091654079495289, "grad_norm": 0.3889778290036638, "learning_rate": 2.3751548504108115e-06, "loss": 0.0252, "step": 169960 }, { "epoch": 0.7091862706645192, "grad_norm": 0.764723255308128, "learning_rate": 2.375119914007827e-06, "loss": 0.0307, "step": 169965 }, { "epoch": 0.7092071333795095, "grad_norm": 0.6620449055054531, "learning_rate": 2.375084979146447e-06, "loss": 0.0211, "step": 169970 }, { "epoch": 0.7092279960944997, "grad_norm": 0.7057539491494861, "learning_rate": 2.375050045826558e-06, "loss": 0.0169, "step": 169975 }, { "epoch": 0.7092488588094901, "grad_norm": 0.6249045282810712, "learning_rate": 2.3750151140480464e-06, "loss": 0.0264, "step": 169980 }, { "epoch": 0.7092697215244803, "grad_norm": 0.3355508900768627, "learning_rate": 2.3749801838107993e-06, "loss": 0.0214, "step": 169985 }, { "epoch": 0.7092905842394706, "grad_norm": 0.5197179619688236, "learning_rate": 2.374945255114703e-06, "loss": 0.0153, "step": 169990 }, { "epoch": 0.7093114469544609, "grad_norm": 0.825730242262384, "learning_rate": 2.374910327959644e-06, "loss": 0.0279, "step": 169995 }, { "epoch": 0.7093323096694512, "grad_norm": 0.9868039081496622, "learning_rate": 2.3748754023455092e-06, "loss": 0.0215, "step": 170000 }, { "epoch": 0.7093531723844414, "grad_norm": 0.478925633627703, "learning_rate": 2.3748404782721858e-06, "loss": 0.0232, "step": 170005 }, { "epoch": 0.7093740350994318, "grad_norm": 0.3791166023237446, "learning_rate": 2.3748055557395606e-06, "loss": 0.0206, "step": 170010 }, { "epoch": 0.709394897814422, "grad_norm": 0.6956418035844949, "learning_rate": 2.374770634747519e-06, "loss": 0.0265, "step": 170015 }, { "epoch": 0.7094157605294122, "grad_norm": 0.49866197173585436, "learning_rate": 2.374735715295949e-06, "loss": 0.0224, "step": 170020 }, { "epoch": 0.7094366232444025, "grad_norm": 0.5578043198176292, "learning_rate": 2.374700797384737e-06, "loss": 0.0185, "step": 170025 }, { "epoch": 0.7094574859593928, "grad_norm": 0.5126321492412904, "learning_rate": 2.374665881013769e-06, "loss": 0.023, "step": 170030 }, { "epoch": 0.7094783486743831, "grad_norm": 0.47566650928505605, "learning_rate": 2.374630966182933e-06, "loss": 0.0193, "step": 170035 }, { "epoch": 0.7094992113893733, "grad_norm": 0.8391385125147188, "learning_rate": 2.374596052892115e-06, "loss": 0.0227, "step": 170040 }, { "epoch": 0.7095200741043637, "grad_norm": 0.8320293708568438, "learning_rate": 2.374561141141202e-06, "loss": 0.021, "step": 170045 }, { "epoch": 0.7095409368193539, "grad_norm": 0.75060539934098, "learning_rate": 2.374526230930081e-06, "loss": 0.017, "step": 170050 }, { "epoch": 0.7095617995343442, "grad_norm": 0.3428473653616856, "learning_rate": 2.374491322258638e-06, "loss": 0.0229, "step": 170055 }, { "epoch": 0.7095826622493345, "grad_norm": 1.1318212561278262, "learning_rate": 2.3744564151267613e-06, "loss": 0.027, "step": 170060 }, { "epoch": 0.7096035249643248, "grad_norm": 0.5440886228265557, "learning_rate": 2.374421509534337e-06, "loss": 0.0228, "step": 170065 }, { "epoch": 0.709624387679315, "grad_norm": 0.3344419510050207, "learning_rate": 2.374386605481251e-06, "loss": 0.0177, "step": 170070 }, { "epoch": 0.7096452503943053, "grad_norm": 0.47629210340587647, "learning_rate": 2.374351702967391e-06, "loss": 0.0312, "step": 170075 }, { "epoch": 0.7096661131092956, "grad_norm": 0.5079319227895632, "learning_rate": 2.3743168019926444e-06, "loss": 0.0215, "step": 170080 }, { "epoch": 0.7096869758242859, "grad_norm": 0.6784197201823412, "learning_rate": 2.374281902556897e-06, "loss": 0.0319, "step": 170085 }, { "epoch": 0.7097078385392761, "grad_norm": 0.43413267100665026, "learning_rate": 2.374247004660037e-06, "loss": 0.0247, "step": 170090 }, { "epoch": 0.7097287012542665, "grad_norm": 0.8278172615193823, "learning_rate": 2.3742121083019507e-06, "loss": 0.0223, "step": 170095 }, { "epoch": 0.7097495639692567, "grad_norm": 1.058738057263108, "learning_rate": 2.374177213482524e-06, "loss": 0.0294, "step": 170100 }, { "epoch": 0.709770426684247, "grad_norm": 0.5829809003080063, "learning_rate": 2.374142320201645e-06, "loss": 0.0189, "step": 170105 }, { "epoch": 0.7097912893992373, "grad_norm": 0.655354000381408, "learning_rate": 2.3741074284592004e-06, "loss": 0.0193, "step": 170110 }, { "epoch": 0.7098121521142275, "grad_norm": 0.42608931025701513, "learning_rate": 2.3740725382550776e-06, "loss": 0.0206, "step": 170115 }, { "epoch": 0.7098330148292178, "grad_norm": 0.9841611942028998, "learning_rate": 2.3740376495891624e-06, "loss": 0.0269, "step": 170120 }, { "epoch": 0.7098538775442081, "grad_norm": 0.8786431147965347, "learning_rate": 2.3740027624613425e-06, "loss": 0.0251, "step": 170125 }, { "epoch": 0.7098747402591984, "grad_norm": 0.6381336560266709, "learning_rate": 2.3739678768715054e-06, "loss": 0.0291, "step": 170130 }, { "epoch": 0.7098956029741886, "grad_norm": 1.1143670597107371, "learning_rate": 2.373932992819537e-06, "loss": 0.0297, "step": 170135 }, { "epoch": 0.7099164656891789, "grad_norm": 0.4830645245958055, "learning_rate": 2.373898110305325e-06, "loss": 0.0254, "step": 170140 }, { "epoch": 0.7099373284041692, "grad_norm": 0.5735096495588952, "learning_rate": 2.3738632293287557e-06, "loss": 0.0186, "step": 170145 }, { "epoch": 0.7099581911191595, "grad_norm": 0.45520425797600295, "learning_rate": 2.373828349889718e-06, "loss": 0.0192, "step": 170150 }, { "epoch": 0.7099790538341497, "grad_norm": 0.38969562073653186, "learning_rate": 2.3737934719880967e-06, "loss": 0.0152, "step": 170155 }, { "epoch": 0.7099999165491401, "grad_norm": 0.4926490757608867, "learning_rate": 2.3737585956237803e-06, "loss": 0.0193, "step": 170160 }, { "epoch": 0.7100207792641303, "grad_norm": 0.9229232105531129, "learning_rate": 2.373723720796655e-06, "loss": 0.0366, "step": 170165 }, { "epoch": 0.7100416419791206, "grad_norm": 1.230194054349334, "learning_rate": 2.3736888475066084e-06, "loss": 0.0214, "step": 170170 }, { "epoch": 0.7100625046941109, "grad_norm": 0.5808963385122035, "learning_rate": 2.3736539757535275e-06, "loss": 0.0254, "step": 170175 }, { "epoch": 0.7100833674091012, "grad_norm": 0.619719319865172, "learning_rate": 2.3736191055373e-06, "loss": 0.0213, "step": 170180 }, { "epoch": 0.7101042301240914, "grad_norm": 0.6802359871663846, "learning_rate": 2.3735842368578115e-06, "loss": 0.0152, "step": 170185 }, { "epoch": 0.7101250928390818, "grad_norm": 0.8499898624765837, "learning_rate": 2.3735493697149505e-06, "loss": 0.0217, "step": 170190 }, { "epoch": 0.710145955554072, "grad_norm": 0.5866863261657724, "learning_rate": 2.373514504108603e-06, "loss": 0.0237, "step": 170195 }, { "epoch": 0.7101668182690622, "grad_norm": 0.47346121544817105, "learning_rate": 2.3734796400386582e-06, "loss": 0.0157, "step": 170200 }, { "epoch": 0.7101876809840525, "grad_norm": 0.7000212183991171, "learning_rate": 2.3734447775050007e-06, "loss": 0.0205, "step": 170205 }, { "epoch": 0.7102085436990428, "grad_norm": 0.6921856706943802, "learning_rate": 2.3734099165075193e-06, "loss": 0.0204, "step": 170210 }, { "epoch": 0.7102294064140331, "grad_norm": 0.6164011788798074, "learning_rate": 2.3733750570461002e-06, "loss": 0.0189, "step": 170215 }, { "epoch": 0.7102502691290233, "grad_norm": 0.7165713866309661, "learning_rate": 2.373340199120632e-06, "loss": 0.026, "step": 170220 }, { "epoch": 0.7102711318440137, "grad_norm": 0.5536177683398171, "learning_rate": 2.3733053427310003e-06, "loss": 0.0219, "step": 170225 }, { "epoch": 0.7102919945590039, "grad_norm": 0.3107575429601485, "learning_rate": 2.3732704878770937e-06, "loss": 0.0208, "step": 170230 }, { "epoch": 0.7103128572739942, "grad_norm": 0.4503559591828789, "learning_rate": 2.3732356345587985e-06, "loss": 0.014, "step": 170235 }, { "epoch": 0.7103337199889845, "grad_norm": 0.4696154632336027, "learning_rate": 2.373200782776002e-06, "loss": 0.0196, "step": 170240 }, { "epoch": 0.7103545827039748, "grad_norm": 0.6844954704964354, "learning_rate": 2.3731659325285923e-06, "loss": 0.0197, "step": 170245 }, { "epoch": 0.710375445418965, "grad_norm": 1.2559384399682798, "learning_rate": 2.3731310838164555e-06, "loss": 0.0211, "step": 170250 }, { "epoch": 0.7103963081339553, "grad_norm": 0.7035601762601531, "learning_rate": 2.37309623663948e-06, "loss": 0.0197, "step": 170255 }, { "epoch": 0.7104171708489456, "grad_norm": 0.9594914699070625, "learning_rate": 2.373061390997552e-06, "loss": 0.026, "step": 170260 }, { "epoch": 0.7104380335639359, "grad_norm": 0.9222846862784543, "learning_rate": 2.37302654689056e-06, "loss": 0.0315, "step": 170265 }, { "epoch": 0.7104588962789261, "grad_norm": 1.0010443365548725, "learning_rate": 2.3729917043183903e-06, "loss": 0.0192, "step": 170270 }, { "epoch": 0.7104797589939165, "grad_norm": 0.6952594639720614, "learning_rate": 2.3729568632809302e-06, "loss": 0.0184, "step": 170275 }, { "epoch": 0.7105006217089067, "grad_norm": 0.6308548416175201, "learning_rate": 2.372922023778068e-06, "loss": 0.0218, "step": 170280 }, { "epoch": 0.710521484423897, "grad_norm": 0.8509825820335353, "learning_rate": 2.3728871858096904e-06, "loss": 0.0216, "step": 170285 }, { "epoch": 0.7105423471388873, "grad_norm": 1.171897214173704, "learning_rate": 2.3728523493756845e-06, "loss": 0.0159, "step": 170290 }, { "epoch": 0.7105632098538776, "grad_norm": 0.5654162884582233, "learning_rate": 2.3728175144759387e-06, "loss": 0.0225, "step": 170295 }, { "epoch": 0.7105840725688678, "grad_norm": 0.5889982154725816, "learning_rate": 2.3727826811103393e-06, "loss": 0.0172, "step": 170300 }, { "epoch": 0.7106049352838582, "grad_norm": 0.921040132199491, "learning_rate": 2.372747849278774e-06, "loss": 0.0293, "step": 170305 }, { "epoch": 0.7106257979988484, "grad_norm": 0.6103498254561568, "learning_rate": 2.3727130189811303e-06, "loss": 0.0312, "step": 170310 }, { "epoch": 0.7106466607138386, "grad_norm": 0.5425054404483269, "learning_rate": 2.3726781902172953e-06, "loss": 0.0205, "step": 170315 }, { "epoch": 0.7106675234288289, "grad_norm": 0.5929648970946378, "learning_rate": 2.3726433629871575e-06, "loss": 0.0227, "step": 170320 }, { "epoch": 0.7106883861438192, "grad_norm": 1.3793902606639459, "learning_rate": 2.3726085372906033e-06, "loss": 0.0219, "step": 170325 }, { "epoch": 0.7107092488588095, "grad_norm": 0.40208575741103414, "learning_rate": 2.37257371312752e-06, "loss": 0.0234, "step": 170330 }, { "epoch": 0.7107301115737997, "grad_norm": 0.5315021125295827, "learning_rate": 2.3725388904977966e-06, "loss": 0.0206, "step": 170335 }, { "epoch": 0.7107509742887901, "grad_norm": 0.6990253056242937, "learning_rate": 2.3725040694013187e-06, "loss": 0.0216, "step": 170340 }, { "epoch": 0.7107718370037803, "grad_norm": 0.5973866888860455, "learning_rate": 2.3724692498379742e-06, "loss": 0.019, "step": 170345 }, { "epoch": 0.7107926997187706, "grad_norm": 0.8173161157531057, "learning_rate": 2.372434431807652e-06, "loss": 0.0213, "step": 170350 }, { "epoch": 0.7108135624337609, "grad_norm": 0.38037899299315575, "learning_rate": 2.3723996153102376e-06, "loss": 0.0169, "step": 170355 }, { "epoch": 0.7108344251487512, "grad_norm": 0.860940434783132, "learning_rate": 2.37236480034562e-06, "loss": 0.0263, "step": 170360 }, { "epoch": 0.7108552878637414, "grad_norm": 0.8521297847531172, "learning_rate": 2.372329986913686e-06, "loss": 0.0265, "step": 170365 }, { "epoch": 0.7108761505787318, "grad_norm": 0.483424010976649, "learning_rate": 2.3722951750143234e-06, "loss": 0.0211, "step": 170370 }, { "epoch": 0.710897013293722, "grad_norm": 0.6312698651787815, "learning_rate": 2.3722603646474202e-06, "loss": 0.0235, "step": 170375 }, { "epoch": 0.7109178760087123, "grad_norm": 0.44734383769597225, "learning_rate": 2.372225555812863e-06, "loss": 0.0242, "step": 170380 }, { "epoch": 0.7109387387237025, "grad_norm": 0.5287550200472023, "learning_rate": 2.372190748510541e-06, "loss": 0.0141, "step": 170385 }, { "epoch": 0.7109596014386929, "grad_norm": 1.1845837181119925, "learning_rate": 2.3721559427403394e-06, "loss": 0.025, "step": 170390 }, { "epoch": 0.7109804641536831, "grad_norm": 0.6681389246329529, "learning_rate": 2.3721211385021477e-06, "loss": 0.0232, "step": 170395 }, { "epoch": 0.7110013268686733, "grad_norm": 0.695484682956928, "learning_rate": 2.3720863357958525e-06, "loss": 0.0214, "step": 170400 }, { "epoch": 0.7110221895836637, "grad_norm": 0.43166161968410516, "learning_rate": 2.3720515346213417e-06, "loss": 0.0202, "step": 170405 }, { "epoch": 0.7110430522986539, "grad_norm": 0.7383726392294457, "learning_rate": 2.3720167349785037e-06, "loss": 0.0327, "step": 170410 }, { "epoch": 0.7110639150136442, "grad_norm": 0.9477941609984489, "learning_rate": 2.371981936867225e-06, "loss": 0.0272, "step": 170415 }, { "epoch": 0.7110847777286345, "grad_norm": 0.5416311988285633, "learning_rate": 2.371947140287394e-06, "loss": 0.0152, "step": 170420 }, { "epoch": 0.7111056404436248, "grad_norm": 0.5652863570521696, "learning_rate": 2.3719123452388988e-06, "loss": 0.0266, "step": 170425 }, { "epoch": 0.711126503158615, "grad_norm": 0.5503032153529281, "learning_rate": 2.3718775517216256e-06, "loss": 0.0238, "step": 170430 }, { "epoch": 0.7111473658736053, "grad_norm": 1.037484710745465, "learning_rate": 2.3718427597354627e-06, "loss": 0.0332, "step": 170435 }, { "epoch": 0.7111682285885956, "grad_norm": 0.8958382329707175, "learning_rate": 2.3718079692802988e-06, "loss": 0.022, "step": 170440 }, { "epoch": 0.7111890913035859, "grad_norm": 1.5675525581568908, "learning_rate": 2.3717731803560207e-06, "loss": 0.0246, "step": 170445 }, { "epoch": 0.7112099540185761, "grad_norm": 1.1841119052602975, "learning_rate": 2.371738392962516e-06, "loss": 0.0233, "step": 170450 }, { "epoch": 0.7112308167335665, "grad_norm": 0.5085869066758465, "learning_rate": 2.371703607099673e-06, "loss": 0.0165, "step": 170455 }, { "epoch": 0.7112516794485567, "grad_norm": 0.3565189674459939, "learning_rate": 2.371668822767379e-06, "loss": 0.0204, "step": 170460 }, { "epoch": 0.711272542163547, "grad_norm": 0.6746037532518964, "learning_rate": 2.3716340399655225e-06, "loss": 0.021, "step": 170465 }, { "epoch": 0.7112934048785373, "grad_norm": 1.4429765053351926, "learning_rate": 2.37159925869399e-06, "loss": 0.0161, "step": 170470 }, { "epoch": 0.7113142675935276, "grad_norm": 1.5472564710745829, "learning_rate": 2.3715644789526702e-06, "loss": 0.0314, "step": 170475 }, { "epoch": 0.7113351303085178, "grad_norm": 0.8018186338962056, "learning_rate": 2.371529700741451e-06, "loss": 0.0301, "step": 170480 }, { "epoch": 0.7113559930235082, "grad_norm": 0.6748610411015314, "learning_rate": 2.3714949240602197e-06, "loss": 0.0263, "step": 170485 }, { "epoch": 0.7113768557384984, "grad_norm": 0.6443750545529962, "learning_rate": 2.3714601489088646e-06, "loss": 0.016, "step": 170490 }, { "epoch": 0.7113977184534886, "grad_norm": 0.39358191807818993, "learning_rate": 2.371425375287273e-06, "loss": 0.0167, "step": 170495 }, { "epoch": 0.7114185811684789, "grad_norm": 0.5445136574562119, "learning_rate": 2.371390603195333e-06, "loss": 0.0216, "step": 170500 }, { "epoch": 0.7114394438834692, "grad_norm": 0.601659412942714, "learning_rate": 2.371355832632933e-06, "loss": 0.0192, "step": 170505 }, { "epoch": 0.7114603065984595, "grad_norm": 0.9739548781282823, "learning_rate": 2.37132106359996e-06, "loss": 0.0318, "step": 170510 }, { "epoch": 0.7114811693134497, "grad_norm": 0.680241179007319, "learning_rate": 2.3712862960963025e-06, "loss": 0.0207, "step": 170515 }, { "epoch": 0.7115020320284401, "grad_norm": 0.7957759470840928, "learning_rate": 2.3712515301218476e-06, "loss": 0.0238, "step": 170520 }, { "epoch": 0.7115228947434303, "grad_norm": 0.9873827501849948, "learning_rate": 2.371216765676484e-06, "loss": 0.0199, "step": 170525 }, { "epoch": 0.7115437574584206, "grad_norm": 0.8119559536817792, "learning_rate": 2.3711820027600994e-06, "loss": 0.0264, "step": 170530 }, { "epoch": 0.7115646201734109, "grad_norm": 1.0544916436399137, "learning_rate": 2.3711472413725816e-06, "loss": 0.0168, "step": 170535 }, { "epoch": 0.7115854828884012, "grad_norm": 1.1874458008154007, "learning_rate": 2.3711124815138186e-06, "loss": 0.0217, "step": 170540 }, { "epoch": 0.7116063456033914, "grad_norm": 0.8931497503317567, "learning_rate": 2.371077723183699e-06, "loss": 0.0221, "step": 170545 }, { "epoch": 0.7116272083183817, "grad_norm": 0.5143331200005471, "learning_rate": 2.371042966382109e-06, "loss": 0.0234, "step": 170550 }, { "epoch": 0.711648071033372, "grad_norm": 0.5739583204808221, "learning_rate": 2.3710082111089383e-06, "loss": 0.0157, "step": 170555 }, { "epoch": 0.7116689337483623, "grad_norm": 0.7804125922332127, "learning_rate": 2.3709734573640742e-06, "loss": 0.0212, "step": 170560 }, { "epoch": 0.7116897964633525, "grad_norm": 0.35934804401526294, "learning_rate": 2.370938705147405e-06, "loss": 0.0163, "step": 170565 }, { "epoch": 0.7117106591783429, "grad_norm": 0.570791553593049, "learning_rate": 2.370903954458818e-06, "loss": 0.018, "step": 170570 }, { "epoch": 0.7117315218933331, "grad_norm": 0.7775650857520199, "learning_rate": 2.3708692052982022e-06, "loss": 0.0209, "step": 170575 }, { "epoch": 0.7117523846083234, "grad_norm": 0.5379257676525209, "learning_rate": 2.370834457665445e-06, "loss": 0.0253, "step": 170580 }, { "epoch": 0.7117732473233137, "grad_norm": 0.6999173856971412, "learning_rate": 2.370799711560434e-06, "loss": 0.0221, "step": 170585 }, { "epoch": 0.711794110038304, "grad_norm": 0.5445822019366505, "learning_rate": 2.3707649669830584e-06, "loss": 0.0208, "step": 170590 }, { "epoch": 0.7118149727532942, "grad_norm": 0.3798240470744648, "learning_rate": 2.370730223933206e-06, "loss": 0.0148, "step": 170595 }, { "epoch": 0.7118358354682846, "grad_norm": 0.8690035335308856, "learning_rate": 2.370695482410764e-06, "loss": 0.0202, "step": 170600 }, { "epoch": 0.7118566981832748, "grad_norm": 0.33197767229943465, "learning_rate": 2.370660742415621e-06, "loss": 0.0173, "step": 170605 }, { "epoch": 0.711877560898265, "grad_norm": 0.49051752754285666, "learning_rate": 2.3706260039476658e-06, "loss": 0.017, "step": 170610 }, { "epoch": 0.7118984236132553, "grad_norm": 0.8561137004190186, "learning_rate": 2.3705912670067847e-06, "loss": 0.0177, "step": 170615 }, { "epoch": 0.7119192863282456, "grad_norm": 0.38221153296786653, "learning_rate": 2.370556531592868e-06, "loss": 0.0167, "step": 170620 }, { "epoch": 0.7119401490432359, "grad_norm": 0.5255431323770199, "learning_rate": 2.3705217977058027e-06, "loss": 0.0215, "step": 170625 }, { "epoch": 0.7119610117582261, "grad_norm": 0.6664972860008049, "learning_rate": 2.370487065345477e-06, "loss": 0.0291, "step": 170630 }, { "epoch": 0.7119818744732165, "grad_norm": 0.9487155785541099, "learning_rate": 2.3704523345117787e-06, "loss": 0.0168, "step": 170635 }, { "epoch": 0.7120027371882067, "grad_norm": 0.8309964267690402, "learning_rate": 2.370417605204597e-06, "loss": 0.0313, "step": 170640 }, { "epoch": 0.712023599903197, "grad_norm": 1.2004622795581505, "learning_rate": 2.3703828774238187e-06, "loss": 0.0293, "step": 170645 }, { "epoch": 0.7120444626181873, "grad_norm": 0.17579842149168545, "learning_rate": 2.3703481511693336e-06, "loss": 0.0165, "step": 170650 }, { "epoch": 0.7120653253331776, "grad_norm": 0.37868298367489994, "learning_rate": 2.3703134264410287e-06, "loss": 0.034, "step": 170655 }, { "epoch": 0.7120861880481678, "grad_norm": 1.0794468348969373, "learning_rate": 2.370278703238793e-06, "loss": 0.0195, "step": 170660 }, { "epoch": 0.7121070507631582, "grad_norm": 0.5772493134510297, "learning_rate": 2.3702439815625133e-06, "loss": 0.0206, "step": 170665 }, { "epoch": 0.7121279134781484, "grad_norm": 0.2674329515810797, "learning_rate": 2.370209261412079e-06, "loss": 0.028, "step": 170670 }, { "epoch": 0.7121487761931387, "grad_norm": 0.792539875445976, "learning_rate": 2.3701745427873785e-06, "loss": 0.0204, "step": 170675 }, { "epoch": 0.7121696389081289, "grad_norm": 0.6220478152425301, "learning_rate": 2.3701398256883e-06, "loss": 0.0126, "step": 170680 }, { "epoch": 0.7121905016231193, "grad_norm": 0.6267280799160395, "learning_rate": 2.370105110114731e-06, "loss": 0.0242, "step": 170685 }, { "epoch": 0.7122113643381095, "grad_norm": 0.7779406282687904, "learning_rate": 2.3700703960665613e-06, "loss": 0.0173, "step": 170690 }, { "epoch": 0.7122322270530997, "grad_norm": 1.000082893114375, "learning_rate": 2.370035683543677e-06, "loss": 0.0173, "step": 170695 }, { "epoch": 0.7122530897680901, "grad_norm": 0.5419641539182027, "learning_rate": 2.370000972545968e-06, "loss": 0.0172, "step": 170700 }, { "epoch": 0.7122739524830803, "grad_norm": 0.4592507356264512, "learning_rate": 2.369966263073322e-06, "loss": 0.016, "step": 170705 }, { "epoch": 0.7122948151980706, "grad_norm": 0.6373437968529921, "learning_rate": 2.369931555125628e-06, "loss": 0.0207, "step": 170710 }, { "epoch": 0.7123156779130609, "grad_norm": 0.7671972653115815, "learning_rate": 2.369896848702774e-06, "loss": 0.0156, "step": 170715 }, { "epoch": 0.7123365406280512, "grad_norm": 1.3683763295124916, "learning_rate": 2.369862143804648e-06, "loss": 0.0265, "step": 170720 }, { "epoch": 0.7123574033430414, "grad_norm": 0.4597072537592687, "learning_rate": 2.369827440431138e-06, "loss": 0.0176, "step": 170725 }, { "epoch": 0.7123782660580317, "grad_norm": 0.8877495230387273, "learning_rate": 2.3697927385821334e-06, "loss": 0.0245, "step": 170730 }, { "epoch": 0.712399128773022, "grad_norm": 0.6568622172948974, "learning_rate": 2.3697580382575224e-06, "loss": 0.0182, "step": 170735 }, { "epoch": 0.7124199914880123, "grad_norm": 0.6419338385924308, "learning_rate": 2.369723339457193e-06, "loss": 0.0252, "step": 170740 }, { "epoch": 0.7124408542030025, "grad_norm": 0.4850359271602023, "learning_rate": 2.369688642181033e-06, "loss": 0.0435, "step": 170745 }, { "epoch": 0.7124617169179929, "grad_norm": 0.7681732507304168, "learning_rate": 2.3696539464289324e-06, "loss": 0.017, "step": 170750 }, { "epoch": 0.7124825796329831, "grad_norm": 0.7928517397474991, "learning_rate": 2.3696192522007783e-06, "loss": 0.0199, "step": 170755 }, { "epoch": 0.7125034423479734, "grad_norm": 0.5072402902227285, "learning_rate": 2.36958455949646e-06, "loss": 0.0249, "step": 170760 }, { "epoch": 0.7125243050629637, "grad_norm": 0.5622809404751946, "learning_rate": 2.3695498683158658e-06, "loss": 0.016, "step": 170765 }, { "epoch": 0.712545167777954, "grad_norm": 1.0016835499576857, "learning_rate": 2.3695151786588834e-06, "loss": 0.0283, "step": 170770 }, { "epoch": 0.7125660304929442, "grad_norm": 0.4801337667323081, "learning_rate": 2.3694804905254024e-06, "loss": 0.021, "step": 170775 }, { "epoch": 0.7125868932079346, "grad_norm": 0.7079601148469168, "learning_rate": 2.3694458039153105e-06, "loss": 0.0345, "step": 170780 }, { "epoch": 0.7126077559229248, "grad_norm": 0.5129511508299134, "learning_rate": 2.369411118828496e-06, "loss": 0.0176, "step": 170785 }, { "epoch": 0.712628618637915, "grad_norm": 0.7073326093935711, "learning_rate": 2.3693764352648485e-06, "loss": 0.0203, "step": 170790 }, { "epoch": 0.7126494813529053, "grad_norm": 0.6347878903419836, "learning_rate": 2.3693417532242554e-06, "loss": 0.018, "step": 170795 }, { "epoch": 0.7126703440678956, "grad_norm": 0.7435206683094798, "learning_rate": 2.369307072706606e-06, "loss": 0.0236, "step": 170800 }, { "epoch": 0.7126912067828859, "grad_norm": 1.024302951369596, "learning_rate": 2.3692723937117885e-06, "loss": 0.0294, "step": 170805 }, { "epoch": 0.7127120694978761, "grad_norm": 0.8165199552212309, "learning_rate": 2.3692377162396914e-06, "loss": 0.0262, "step": 170810 }, { "epoch": 0.7127329322128665, "grad_norm": 0.570965069273583, "learning_rate": 2.3692030402902034e-06, "loss": 0.0234, "step": 170815 }, { "epoch": 0.7127537949278567, "grad_norm": 0.6893600189263742, "learning_rate": 2.369168365863213e-06, "loss": 0.02, "step": 170820 }, { "epoch": 0.712774657642847, "grad_norm": 0.39924754504533644, "learning_rate": 2.3691336929586085e-06, "loss": 0.0206, "step": 170825 }, { "epoch": 0.7127955203578373, "grad_norm": 0.33924508257342595, "learning_rate": 2.369099021576279e-06, "loss": 0.0237, "step": 170830 }, { "epoch": 0.7128163830728276, "grad_norm": 1.2361699867697373, "learning_rate": 2.3690643517161133e-06, "loss": 0.0354, "step": 170835 }, { "epoch": 0.7128372457878178, "grad_norm": 0.48046055166849816, "learning_rate": 2.3690296833779994e-06, "loss": 0.0222, "step": 170840 }, { "epoch": 0.7128581085028082, "grad_norm": 0.5930114898463, "learning_rate": 2.368995016561826e-06, "loss": 0.0178, "step": 170845 }, { "epoch": 0.7128789712177984, "grad_norm": 0.4836108764071213, "learning_rate": 2.3689603512674824e-06, "loss": 0.0178, "step": 170850 }, { "epoch": 0.7128998339327887, "grad_norm": 1.0958003862407641, "learning_rate": 2.368925687494857e-06, "loss": 0.0249, "step": 170855 }, { "epoch": 0.7129206966477789, "grad_norm": 1.2175434633470432, "learning_rate": 2.368891025243838e-06, "loss": 0.0162, "step": 170860 }, { "epoch": 0.7129415593627693, "grad_norm": 0.7194133470746168, "learning_rate": 2.3688563645143146e-06, "loss": 0.0242, "step": 170865 }, { "epoch": 0.7129624220777595, "grad_norm": 0.9801440196190755, "learning_rate": 2.3688217053061744e-06, "loss": 0.0223, "step": 170870 }, { "epoch": 0.7129832847927497, "grad_norm": 0.6105886040608904, "learning_rate": 2.368787047619308e-06, "loss": 0.0266, "step": 170875 }, { "epoch": 0.7130041475077401, "grad_norm": 0.7764572267737838, "learning_rate": 2.368752391453602e-06, "loss": 0.0201, "step": 170880 }, { "epoch": 0.7130250102227303, "grad_norm": 0.4461916793424341, "learning_rate": 2.3687177368089472e-06, "loss": 0.0277, "step": 170885 }, { "epoch": 0.7130458729377206, "grad_norm": 0.7432691800897251, "learning_rate": 2.3686830836852306e-06, "loss": 0.0147, "step": 170890 }, { "epoch": 0.713066735652711, "grad_norm": 0.31552949743463604, "learning_rate": 2.368648432082342e-06, "loss": 0.0182, "step": 170895 }, { "epoch": 0.7130875983677012, "grad_norm": 0.37498202874874326, "learning_rate": 2.36861378200017e-06, "loss": 0.0189, "step": 170900 }, { "epoch": 0.7131084610826914, "grad_norm": 0.6245732680876648, "learning_rate": 2.368579133438603e-06, "loss": 0.0167, "step": 170905 }, { "epoch": 0.7131293237976817, "grad_norm": 0.6849361017554401, "learning_rate": 2.36854448639753e-06, "loss": 0.0226, "step": 170910 }, { "epoch": 0.713150186512672, "grad_norm": 0.7161491175549225, "learning_rate": 2.3685098408768404e-06, "loss": 0.018, "step": 170915 }, { "epoch": 0.7131710492276623, "grad_norm": 0.3052071394269067, "learning_rate": 2.368475196876422e-06, "loss": 0.0183, "step": 170920 }, { "epoch": 0.7131919119426525, "grad_norm": 0.7803244756129691, "learning_rate": 2.3684405543961645e-06, "loss": 0.0261, "step": 170925 }, { "epoch": 0.7132127746576429, "grad_norm": 0.5675461438516882, "learning_rate": 2.3684059134359554e-06, "loss": 0.0311, "step": 170930 }, { "epoch": 0.7132336373726331, "grad_norm": 0.8395042064964682, "learning_rate": 2.368371273995685e-06, "loss": 0.0259, "step": 170935 }, { "epoch": 0.7132545000876234, "grad_norm": 0.13791098410328473, "learning_rate": 2.3683366360752417e-06, "loss": 0.023, "step": 170940 }, { "epoch": 0.7132753628026137, "grad_norm": 1.2203913084600473, "learning_rate": 2.3683019996745135e-06, "loss": 0.0259, "step": 170945 }, { "epoch": 0.713296225517604, "grad_norm": 0.7315902758127694, "learning_rate": 2.368267364793391e-06, "loss": 0.0215, "step": 170950 }, { "epoch": 0.7133170882325942, "grad_norm": 0.894847942224179, "learning_rate": 2.3682327314317614e-06, "loss": 0.0199, "step": 170955 }, { "epoch": 0.7133379509475846, "grad_norm": 0.5196315997301153, "learning_rate": 2.368198099589514e-06, "loss": 0.0207, "step": 170960 }, { "epoch": 0.7133588136625748, "grad_norm": 0.5532945741852309, "learning_rate": 2.368163469266539e-06, "loss": 0.0228, "step": 170965 }, { "epoch": 0.713379676377565, "grad_norm": 0.6390530248892852, "learning_rate": 2.368128840462723e-06, "loss": 0.0239, "step": 170970 }, { "epoch": 0.7134005390925553, "grad_norm": 1.5186148191767672, "learning_rate": 2.3680942131779573e-06, "loss": 0.0254, "step": 170975 }, { "epoch": 0.7134214018075457, "grad_norm": 1.2854872414056473, "learning_rate": 2.3680595874121293e-06, "loss": 0.0203, "step": 170980 }, { "epoch": 0.7134422645225359, "grad_norm": 0.7532276960017866, "learning_rate": 2.3680249631651287e-06, "loss": 0.02, "step": 170985 }, { "epoch": 0.7134631272375261, "grad_norm": 0.27827756520313507, "learning_rate": 2.3679903404368443e-06, "loss": 0.0154, "step": 170990 }, { "epoch": 0.7134839899525165, "grad_norm": 0.7655637878422951, "learning_rate": 2.367955719227165e-06, "loss": 0.0205, "step": 170995 }, { "epoch": 0.7135048526675067, "grad_norm": 0.536888011277446, "learning_rate": 2.36792109953598e-06, "loss": 0.0195, "step": 171000 }, { "epoch": 0.713525715382497, "grad_norm": 0.4981035161406543, "learning_rate": 2.3678864813631774e-06, "loss": 0.0186, "step": 171005 }, { "epoch": 0.7135465780974873, "grad_norm": 0.4553829885825166, "learning_rate": 2.3678518647086474e-06, "loss": 0.0248, "step": 171010 }, { "epoch": 0.7135674408124776, "grad_norm": 0.9975903305094798, "learning_rate": 2.367817249572278e-06, "loss": 0.02, "step": 171015 }, { "epoch": 0.7135883035274678, "grad_norm": 0.9092531958021084, "learning_rate": 2.3677826359539597e-06, "loss": 0.0171, "step": 171020 }, { "epoch": 0.7136091662424582, "grad_norm": 0.8017913664220424, "learning_rate": 2.36774802385358e-06, "loss": 0.0216, "step": 171025 }, { "epoch": 0.7136300289574484, "grad_norm": 0.8195677864825279, "learning_rate": 2.367713413271028e-06, "loss": 0.0217, "step": 171030 }, { "epoch": 0.7136508916724387, "grad_norm": 2.825946831789708, "learning_rate": 2.367678804206194e-06, "loss": 0.0194, "step": 171035 }, { "epoch": 0.7136717543874289, "grad_norm": 0.6887367632039738, "learning_rate": 2.367644196658967e-06, "loss": 0.0175, "step": 171040 }, { "epoch": 0.7136926171024193, "grad_norm": 0.6573767865323604, "learning_rate": 2.3676095906292348e-06, "loss": 0.0186, "step": 171045 }, { "epoch": 0.7137134798174095, "grad_norm": 1.2155090788037035, "learning_rate": 2.367574986116887e-06, "loss": 0.0356, "step": 171050 }, { "epoch": 0.7137343425323998, "grad_norm": 0.5831529233491156, "learning_rate": 2.3675403831218133e-06, "loss": 0.0216, "step": 171055 }, { "epoch": 0.7137552052473901, "grad_norm": 0.41332635059865036, "learning_rate": 2.3675057816439025e-06, "loss": 0.0174, "step": 171060 }, { "epoch": 0.7137760679623804, "grad_norm": 1.501949014857237, "learning_rate": 2.3674711816830433e-06, "loss": 0.021, "step": 171065 }, { "epoch": 0.7137969306773706, "grad_norm": 0.48816627041657146, "learning_rate": 2.3674365832391257e-06, "loss": 0.0203, "step": 171070 }, { "epoch": 0.713817793392361, "grad_norm": 0.6384942936557445, "learning_rate": 2.3674019863120382e-06, "loss": 0.0219, "step": 171075 }, { "epoch": 0.7138386561073512, "grad_norm": 0.8236314125452868, "learning_rate": 2.36736739090167e-06, "loss": 0.0136, "step": 171080 }, { "epoch": 0.7138595188223414, "grad_norm": 0.7358134157087902, "learning_rate": 2.367332797007911e-06, "loss": 0.0217, "step": 171085 }, { "epoch": 0.7138803815373317, "grad_norm": 0.9006112982811153, "learning_rate": 2.367298204630649e-06, "loss": 0.0271, "step": 171090 }, { "epoch": 0.713901244252322, "grad_norm": 0.48840593014255407, "learning_rate": 2.3672636137697745e-06, "loss": 0.0164, "step": 171095 }, { "epoch": 0.7139221069673123, "grad_norm": 0.538863238465856, "learning_rate": 2.3672290244251765e-06, "loss": 0.021, "step": 171100 }, { "epoch": 0.7139429696823025, "grad_norm": 0.7031716882485014, "learning_rate": 2.3671944365967437e-06, "loss": 0.0219, "step": 171105 }, { "epoch": 0.7139638323972929, "grad_norm": 0.6460143165926381, "learning_rate": 2.3671598502843653e-06, "loss": 0.0279, "step": 171110 }, { "epoch": 0.7139846951122831, "grad_norm": 0.6304132532134131, "learning_rate": 2.367125265487931e-06, "loss": 0.0154, "step": 171115 }, { "epoch": 0.7140055578272734, "grad_norm": 0.33586707439335217, "learning_rate": 2.3670906822073304e-06, "loss": 0.0194, "step": 171120 }, { "epoch": 0.7140264205422637, "grad_norm": 0.8806328601592449, "learning_rate": 2.3670561004424525e-06, "loss": 0.0211, "step": 171125 }, { "epoch": 0.714047283257254, "grad_norm": 0.5462661978180282, "learning_rate": 2.367021520193186e-06, "loss": 0.0221, "step": 171130 }, { "epoch": 0.7140681459722442, "grad_norm": 1.1651622305450289, "learning_rate": 2.36698694145942e-06, "loss": 0.0185, "step": 171135 }, { "epoch": 0.7140890086872346, "grad_norm": 0.3152933442907175, "learning_rate": 2.366952364241045e-06, "loss": 0.0182, "step": 171140 }, { "epoch": 0.7141098714022248, "grad_norm": 0.8959939455521398, "learning_rate": 2.36691778853795e-06, "loss": 0.0234, "step": 171145 }, { "epoch": 0.7141307341172151, "grad_norm": 1.331412224477467, "learning_rate": 2.3668832143500233e-06, "loss": 0.0249, "step": 171150 }, { "epoch": 0.7141515968322053, "grad_norm": 0.4383055128948013, "learning_rate": 2.366848641677155e-06, "loss": 0.0183, "step": 171155 }, { "epoch": 0.7141724595471957, "grad_norm": 0.7049892605781154, "learning_rate": 2.3668140705192354e-06, "loss": 0.019, "step": 171160 }, { "epoch": 0.7141933222621859, "grad_norm": 0.4920070005061327, "learning_rate": 2.3667795008761524e-06, "loss": 0.0227, "step": 171165 }, { "epoch": 0.7142141849771761, "grad_norm": 0.8274990212015234, "learning_rate": 2.366744932747796e-06, "loss": 0.0187, "step": 171170 }, { "epoch": 0.7142350476921665, "grad_norm": 0.705291014416148, "learning_rate": 2.366710366134055e-06, "loss": 0.0181, "step": 171175 }, { "epoch": 0.7142559104071567, "grad_norm": 0.7600689053237316, "learning_rate": 2.3666758010348193e-06, "loss": 0.0274, "step": 171180 }, { "epoch": 0.714276773122147, "grad_norm": 0.6763698377338225, "learning_rate": 2.366641237449979e-06, "loss": 0.0274, "step": 171185 }, { "epoch": 0.7142976358371373, "grad_norm": 0.7101782721522925, "learning_rate": 2.366606675379422e-06, "loss": 0.0173, "step": 171190 }, { "epoch": 0.7143184985521276, "grad_norm": 0.5376407195994872, "learning_rate": 2.366572114823039e-06, "loss": 0.0194, "step": 171195 }, { "epoch": 0.7143393612671178, "grad_norm": 0.514004121763031, "learning_rate": 2.366537555780719e-06, "loss": 0.0189, "step": 171200 }, { "epoch": 0.7143602239821082, "grad_norm": 0.7020776920074803, "learning_rate": 2.3665029982523512e-06, "loss": 0.0193, "step": 171205 }, { "epoch": 0.7143810866970984, "grad_norm": 0.2148801990677088, "learning_rate": 2.3664684422378255e-06, "loss": 0.0203, "step": 171210 }, { "epoch": 0.7144019494120887, "grad_norm": 0.5535766740204477, "learning_rate": 2.3664338877370306e-06, "loss": 0.0188, "step": 171215 }, { "epoch": 0.7144228121270789, "grad_norm": 1.0456729919289534, "learning_rate": 2.366399334749857e-06, "loss": 0.0236, "step": 171220 }, { "epoch": 0.7144436748420693, "grad_norm": 0.5647946310842706, "learning_rate": 2.366364783276194e-06, "loss": 0.0256, "step": 171225 }, { "epoch": 0.7144645375570595, "grad_norm": 0.6181590471014203, "learning_rate": 2.36633023331593e-06, "loss": 0.0215, "step": 171230 }, { "epoch": 0.7144854002720498, "grad_norm": 1.1926766778500235, "learning_rate": 2.366295684868956e-06, "loss": 0.0266, "step": 171235 }, { "epoch": 0.7145062629870401, "grad_norm": 1.7516659714068632, "learning_rate": 2.366261137935161e-06, "loss": 0.0341, "step": 171240 }, { "epoch": 0.7145271257020304, "grad_norm": 1.0580984375661535, "learning_rate": 2.366226592514434e-06, "loss": 0.019, "step": 171245 }, { "epoch": 0.7145479884170206, "grad_norm": 0.8064207809846704, "learning_rate": 2.3661920486066656e-06, "loss": 0.0404, "step": 171250 }, { "epoch": 0.714568851132011, "grad_norm": 1.8307827861443444, "learning_rate": 2.366157506211744e-06, "loss": 0.0219, "step": 171255 }, { "epoch": 0.7145897138470012, "grad_norm": 1.336306049923168, "learning_rate": 2.3661229653295605e-06, "loss": 0.026, "step": 171260 }, { "epoch": 0.7146105765619915, "grad_norm": 0.5261066912668543, "learning_rate": 2.366088425960003e-06, "loss": 0.0305, "step": 171265 }, { "epoch": 0.7146314392769817, "grad_norm": 0.5843362138490479, "learning_rate": 2.366053888102962e-06, "loss": 0.0274, "step": 171270 }, { "epoch": 0.714652301991972, "grad_norm": 0.5107623416167123, "learning_rate": 2.366019351758327e-06, "loss": 0.0185, "step": 171275 }, { "epoch": 0.7146731647069623, "grad_norm": 0.7850827373268697, "learning_rate": 2.3659848169259877e-06, "loss": 0.0182, "step": 171280 }, { "epoch": 0.7146940274219525, "grad_norm": 0.6093056553960663, "learning_rate": 2.3659502836058336e-06, "loss": 0.0197, "step": 171285 }, { "epoch": 0.7147148901369429, "grad_norm": 0.46208848319328233, "learning_rate": 2.365915751797754e-06, "loss": 0.0221, "step": 171290 }, { "epoch": 0.7147357528519331, "grad_norm": 0.7233740262954652, "learning_rate": 2.3658812215016396e-06, "loss": 0.0236, "step": 171295 }, { "epoch": 0.7147566155669234, "grad_norm": 0.6833214703392931, "learning_rate": 2.365846692717379e-06, "loss": 0.0268, "step": 171300 }, { "epoch": 0.7147774782819137, "grad_norm": 0.930979959800367, "learning_rate": 2.3658121654448624e-06, "loss": 0.0188, "step": 171305 }, { "epoch": 0.714798340996904, "grad_norm": 0.6807352028839113, "learning_rate": 2.3657776396839793e-06, "loss": 0.0248, "step": 171310 }, { "epoch": 0.7148192037118942, "grad_norm": 1.2047162135739258, "learning_rate": 2.3657431154346193e-06, "loss": 0.0231, "step": 171315 }, { "epoch": 0.7148400664268846, "grad_norm": 0.3191277407591124, "learning_rate": 2.3657085926966726e-06, "loss": 0.0214, "step": 171320 }, { "epoch": 0.7148609291418748, "grad_norm": 0.3591102592732549, "learning_rate": 2.365674071470028e-06, "loss": 0.0157, "step": 171325 }, { "epoch": 0.7148817918568651, "grad_norm": 1.178159341224755, "learning_rate": 2.365639551754577e-06, "loss": 0.0235, "step": 171330 }, { "epoch": 0.7149026545718553, "grad_norm": 0.25516703458408874, "learning_rate": 2.365605033550207e-06, "loss": 0.0207, "step": 171335 }, { "epoch": 0.7149235172868457, "grad_norm": 0.4930190243990321, "learning_rate": 2.3655705168568094e-06, "loss": 0.0219, "step": 171340 }, { "epoch": 0.7149443800018359, "grad_norm": 0.871619972473909, "learning_rate": 2.3655360016742735e-06, "loss": 0.0295, "step": 171345 }, { "epoch": 0.7149652427168262, "grad_norm": 0.4746851281467987, "learning_rate": 2.365501488002489e-06, "loss": 0.0203, "step": 171350 }, { "epoch": 0.7149861054318165, "grad_norm": 0.6575140973015282, "learning_rate": 2.3654669758413458e-06, "loss": 0.0312, "step": 171355 }, { "epoch": 0.7150069681468068, "grad_norm": 1.0360408680065363, "learning_rate": 2.3654324651907338e-06, "loss": 0.027, "step": 171360 }, { "epoch": 0.715027830861797, "grad_norm": 1.2349762592532951, "learning_rate": 2.365397956050543e-06, "loss": 0.0181, "step": 171365 }, { "epoch": 0.7150486935767874, "grad_norm": 0.5203354175627626, "learning_rate": 2.3653634484206618e-06, "loss": 0.0233, "step": 171370 }, { "epoch": 0.7150695562917776, "grad_norm": 0.7688080576730554, "learning_rate": 2.365328942300982e-06, "loss": 0.0231, "step": 171375 }, { "epoch": 0.7150904190067678, "grad_norm": 0.7145510170650108, "learning_rate": 2.365294437691393e-06, "loss": 0.0185, "step": 171380 }, { "epoch": 0.7151112817217582, "grad_norm": 1.876891372074851, "learning_rate": 2.3652599345917836e-06, "loss": 0.0251, "step": 171385 }, { "epoch": 0.7151321444367484, "grad_norm": 0.549198832320491, "learning_rate": 2.365225433002045e-06, "loss": 0.0276, "step": 171390 }, { "epoch": 0.7151530071517387, "grad_norm": 0.4953866739198314, "learning_rate": 2.3651909329220656e-06, "loss": 0.0208, "step": 171395 }, { "epoch": 0.7151738698667289, "grad_norm": 0.5704569556283084, "learning_rate": 2.3651564343517363e-06, "loss": 0.0219, "step": 171400 }, { "epoch": 0.7151947325817193, "grad_norm": 0.794619318040647, "learning_rate": 2.3651219372909472e-06, "loss": 0.0157, "step": 171405 }, { "epoch": 0.7152155952967095, "grad_norm": 0.8664580956898507, "learning_rate": 2.365087441739588e-06, "loss": 0.0158, "step": 171410 }, { "epoch": 0.7152364580116998, "grad_norm": 0.6444690899205229, "learning_rate": 2.3650529476975483e-06, "loss": 0.0207, "step": 171415 }, { "epoch": 0.7152573207266901, "grad_norm": 0.5887507301697416, "learning_rate": 2.3650184551647178e-06, "loss": 0.0211, "step": 171420 }, { "epoch": 0.7152781834416804, "grad_norm": 0.29023096070007254, "learning_rate": 2.364983964140987e-06, "loss": 0.0152, "step": 171425 }, { "epoch": 0.7152990461566706, "grad_norm": 0.5396378427127221, "learning_rate": 2.364949474626246e-06, "loss": 0.0207, "step": 171430 }, { "epoch": 0.715319908871661, "grad_norm": 0.4745874270767489, "learning_rate": 2.3649149866203842e-06, "loss": 0.022, "step": 171435 }, { "epoch": 0.7153407715866512, "grad_norm": 1.026658742870582, "learning_rate": 2.364880500123292e-06, "loss": 0.0196, "step": 171440 }, { "epoch": 0.7153616343016415, "grad_norm": 0.571889185457439, "learning_rate": 2.3648460151348595e-06, "loss": 0.0234, "step": 171445 }, { "epoch": 0.7153824970166317, "grad_norm": 0.47267875792072833, "learning_rate": 2.364811531654976e-06, "loss": 0.0211, "step": 171450 }, { "epoch": 0.7154033597316221, "grad_norm": 0.7632898317168458, "learning_rate": 2.364777049683532e-06, "loss": 0.0216, "step": 171455 }, { "epoch": 0.7154242224466123, "grad_norm": 0.6405730156581804, "learning_rate": 2.364742569220418e-06, "loss": 0.0215, "step": 171460 }, { "epoch": 0.7154450851616025, "grad_norm": 0.7193642000145449, "learning_rate": 2.3647080902655236e-06, "loss": 0.0194, "step": 171465 }, { "epoch": 0.7154659478765929, "grad_norm": 0.732730358276414, "learning_rate": 2.3646736128187382e-06, "loss": 0.0172, "step": 171470 }, { "epoch": 0.7154868105915831, "grad_norm": 0.5276060211352889, "learning_rate": 2.3646391368799527e-06, "loss": 0.024, "step": 171475 }, { "epoch": 0.7155076733065734, "grad_norm": 0.9112015568706089, "learning_rate": 2.3646046624490574e-06, "loss": 0.0287, "step": 171480 }, { "epoch": 0.7155285360215637, "grad_norm": 0.5853099026747568, "learning_rate": 2.3645701895259418e-06, "loss": 0.0185, "step": 171485 }, { "epoch": 0.715549398736554, "grad_norm": 0.4630456107118087, "learning_rate": 2.3645357181104956e-06, "loss": 0.0118, "step": 171490 }, { "epoch": 0.7155702614515442, "grad_norm": 0.32549990468249007, "learning_rate": 2.3645012482026097e-06, "loss": 0.0214, "step": 171495 }, { "epoch": 0.7155911241665346, "grad_norm": 0.2442379947030111, "learning_rate": 2.3644667798021744e-06, "loss": 0.0253, "step": 171500 }, { "epoch": 0.7156119868815248, "grad_norm": 0.6592660756488762, "learning_rate": 2.3644323129090786e-06, "loss": 0.0302, "step": 171505 }, { "epoch": 0.7156328495965151, "grad_norm": 0.4729519101312238, "learning_rate": 2.364397847523214e-06, "loss": 0.0264, "step": 171510 }, { "epoch": 0.7156537123115053, "grad_norm": 0.5417098114731512, "learning_rate": 2.3643633836444697e-06, "loss": 0.031, "step": 171515 }, { "epoch": 0.7156745750264957, "grad_norm": 0.8900300369384884, "learning_rate": 2.364328921272736e-06, "loss": 0.0303, "step": 171520 }, { "epoch": 0.7156954377414859, "grad_norm": 2.172408289609441, "learning_rate": 2.3642944604079033e-06, "loss": 0.0267, "step": 171525 }, { "epoch": 0.7157163004564762, "grad_norm": 0.8924093926494387, "learning_rate": 2.364260001049861e-06, "loss": 0.0223, "step": 171530 }, { "epoch": 0.7157371631714665, "grad_norm": 1.4410750931174425, "learning_rate": 2.364225543198501e-06, "loss": 0.0253, "step": 171535 }, { "epoch": 0.7157580258864568, "grad_norm": 0.9864249795701384, "learning_rate": 2.364191086853712e-06, "loss": 0.023, "step": 171540 }, { "epoch": 0.715778888601447, "grad_norm": 0.5771383437225043, "learning_rate": 2.3641566320153846e-06, "loss": 0.0203, "step": 171545 }, { "epoch": 0.7157997513164374, "grad_norm": 0.5730276507217226, "learning_rate": 2.3641221786834096e-06, "loss": 0.0168, "step": 171550 }, { "epoch": 0.7158206140314276, "grad_norm": 0.6721485141387529, "learning_rate": 2.364087726857677e-06, "loss": 0.0271, "step": 171555 }, { "epoch": 0.7158414767464178, "grad_norm": 0.9921556384645512, "learning_rate": 2.3640532765380763e-06, "loss": 0.0294, "step": 171560 }, { "epoch": 0.7158623394614082, "grad_norm": 0.3312201396716861, "learning_rate": 2.364018827724498e-06, "loss": 0.0184, "step": 171565 }, { "epoch": 0.7158832021763984, "grad_norm": 0.600217504258347, "learning_rate": 2.363984380416833e-06, "loss": 0.0181, "step": 171570 }, { "epoch": 0.7159040648913887, "grad_norm": 0.8285125316311756, "learning_rate": 2.3639499346149718e-06, "loss": 0.0249, "step": 171575 }, { "epoch": 0.7159249276063789, "grad_norm": 0.9150310260714761, "learning_rate": 2.3639154903188037e-06, "loss": 0.0195, "step": 171580 }, { "epoch": 0.7159457903213693, "grad_norm": 0.7606885377550168, "learning_rate": 2.3638810475282194e-06, "loss": 0.0188, "step": 171585 }, { "epoch": 0.7159666530363595, "grad_norm": 0.41475319924914184, "learning_rate": 2.3638466062431095e-06, "loss": 0.0204, "step": 171590 }, { "epoch": 0.7159875157513498, "grad_norm": 0.8177914790390571, "learning_rate": 2.363812166463364e-06, "loss": 0.021, "step": 171595 }, { "epoch": 0.7160083784663401, "grad_norm": 0.6382831248135944, "learning_rate": 2.3637777281888733e-06, "loss": 0.0277, "step": 171600 }, { "epoch": 0.7160292411813304, "grad_norm": 0.7598574582041415, "learning_rate": 2.3637432914195277e-06, "loss": 0.0315, "step": 171605 }, { "epoch": 0.7160501038963206, "grad_norm": 0.47637721335500116, "learning_rate": 2.3637088561552182e-06, "loss": 0.0224, "step": 171610 }, { "epoch": 0.716070966611311, "grad_norm": 0.7780296498157208, "learning_rate": 2.3636744223958343e-06, "loss": 0.0198, "step": 171615 }, { "epoch": 0.7160918293263012, "grad_norm": 0.32314289235783816, "learning_rate": 2.3636399901412666e-06, "loss": 0.019, "step": 171620 }, { "epoch": 0.7161126920412915, "grad_norm": 1.2870715548654814, "learning_rate": 2.3636055593914064e-06, "loss": 0.022, "step": 171625 }, { "epoch": 0.7161335547562817, "grad_norm": 0.49175288222615365, "learning_rate": 2.363571130146142e-06, "loss": 0.0264, "step": 171630 }, { "epoch": 0.7161544174712721, "grad_norm": 0.8223344294355541, "learning_rate": 2.3635367024053665e-06, "loss": 0.0333, "step": 171635 }, { "epoch": 0.7161752801862623, "grad_norm": 0.9121106882900037, "learning_rate": 2.3635022761689684e-06, "loss": 0.0225, "step": 171640 }, { "epoch": 0.7161961429012526, "grad_norm": 0.313106992152815, "learning_rate": 2.3634678514368385e-06, "loss": 0.0208, "step": 171645 }, { "epoch": 0.7162170056162429, "grad_norm": 0.6033203234221193, "learning_rate": 2.363433428208868e-06, "loss": 0.0307, "step": 171650 }, { "epoch": 0.7162378683312332, "grad_norm": 0.4851420484839221, "learning_rate": 2.3633990064849464e-06, "loss": 0.0239, "step": 171655 }, { "epoch": 0.7162587310462234, "grad_norm": 1.0219157787941444, "learning_rate": 2.3633645862649652e-06, "loss": 0.0212, "step": 171660 }, { "epoch": 0.7162795937612138, "grad_norm": 1.0315422866245383, "learning_rate": 2.363330167548814e-06, "loss": 0.0277, "step": 171665 }, { "epoch": 0.716300456476204, "grad_norm": 0.2942974587769704, "learning_rate": 2.3632957503363842e-06, "loss": 0.0159, "step": 171670 }, { "epoch": 0.7163213191911942, "grad_norm": 0.6864461468221934, "learning_rate": 2.3632613346275646e-06, "loss": 0.0212, "step": 171675 }, { "epoch": 0.7163421819061846, "grad_norm": 0.540374195510115, "learning_rate": 2.3632269204222475e-06, "loss": 0.0172, "step": 171680 }, { "epoch": 0.7163630446211748, "grad_norm": 0.756577104963257, "learning_rate": 2.363192507720323e-06, "loss": 0.0258, "step": 171685 }, { "epoch": 0.7163839073361651, "grad_norm": 0.5721161135539973, "learning_rate": 2.363158096521681e-06, "loss": 0.0276, "step": 171690 }, { "epoch": 0.7164047700511553, "grad_norm": 0.4217849967150743, "learning_rate": 2.363123686826213e-06, "loss": 0.0235, "step": 171695 }, { "epoch": 0.7164256327661457, "grad_norm": 0.3440226327461248, "learning_rate": 2.3630892786338086e-06, "loss": 0.0207, "step": 171700 }, { "epoch": 0.7164464954811359, "grad_norm": 0.815895990761401, "learning_rate": 2.3630548719443593e-06, "loss": 0.0179, "step": 171705 }, { "epoch": 0.7164673581961262, "grad_norm": 0.6979395324993183, "learning_rate": 2.363020466757754e-06, "loss": 0.0318, "step": 171710 }, { "epoch": 0.7164882209111165, "grad_norm": 0.622029339408556, "learning_rate": 2.3629860630738854e-06, "loss": 0.0181, "step": 171715 }, { "epoch": 0.7165090836261068, "grad_norm": 0.4825533361654081, "learning_rate": 2.3629516608926436e-06, "loss": 0.0242, "step": 171720 }, { "epoch": 0.716529946341097, "grad_norm": 0.30990755605565395, "learning_rate": 2.362917260213919e-06, "loss": 0.0228, "step": 171725 }, { "epoch": 0.7165508090560874, "grad_norm": 0.7453316955625495, "learning_rate": 2.362882861037601e-06, "loss": 0.0212, "step": 171730 }, { "epoch": 0.7165716717710776, "grad_norm": 0.6030727187562928, "learning_rate": 2.362848463363582e-06, "loss": 0.0298, "step": 171735 }, { "epoch": 0.7165925344860679, "grad_norm": 1.0427240294615039, "learning_rate": 2.362814067191752e-06, "loss": 0.0313, "step": 171740 }, { "epoch": 0.7166133972010582, "grad_norm": 0.6708649946977565, "learning_rate": 2.3627796725220014e-06, "loss": 0.0312, "step": 171745 }, { "epoch": 0.7166342599160485, "grad_norm": 1.0250030390137685, "learning_rate": 2.3627452793542206e-06, "loss": 0.0243, "step": 171750 }, { "epoch": 0.7166551226310387, "grad_norm": 1.034427257415783, "learning_rate": 2.362710887688302e-06, "loss": 0.0225, "step": 171755 }, { "epoch": 0.7166759853460289, "grad_norm": 0.6120209105237303, "learning_rate": 2.3626764975241347e-06, "loss": 0.0205, "step": 171760 }, { "epoch": 0.7166968480610193, "grad_norm": 0.9438776036232298, "learning_rate": 2.36264210886161e-06, "loss": 0.0241, "step": 171765 }, { "epoch": 0.7167177107760095, "grad_norm": 0.544442245778296, "learning_rate": 2.3626077217006174e-06, "loss": 0.0203, "step": 171770 }, { "epoch": 0.7167385734909998, "grad_norm": 0.3216305354516092, "learning_rate": 2.3625733360410492e-06, "loss": 0.0244, "step": 171775 }, { "epoch": 0.7167594362059901, "grad_norm": 0.41382171378150634, "learning_rate": 2.362538951882796e-06, "loss": 0.0194, "step": 171780 }, { "epoch": 0.7167802989209804, "grad_norm": 1.323823285551843, "learning_rate": 2.362504569225748e-06, "loss": 0.0252, "step": 171785 }, { "epoch": 0.7168011616359706, "grad_norm": 0.5535660970124052, "learning_rate": 2.3624701880697965e-06, "loss": 0.0167, "step": 171790 }, { "epoch": 0.716822024350961, "grad_norm": 0.406658080468376, "learning_rate": 2.3624358084148313e-06, "loss": 0.0185, "step": 171795 }, { "epoch": 0.7168428870659512, "grad_norm": 1.2710971138548692, "learning_rate": 2.3624014302607438e-06, "loss": 0.0223, "step": 171800 }, { "epoch": 0.7168637497809415, "grad_norm": 0.7769631927686431, "learning_rate": 2.3623670536074245e-06, "loss": 0.0214, "step": 171805 }, { "epoch": 0.7168846124959317, "grad_norm": 0.7406227471510366, "learning_rate": 2.3623326784547656e-06, "loss": 0.0208, "step": 171810 }, { "epoch": 0.7169054752109221, "grad_norm": 0.5739143723631805, "learning_rate": 2.3622983048026565e-06, "loss": 0.0214, "step": 171815 }, { "epoch": 0.7169263379259123, "grad_norm": 0.33813255477370524, "learning_rate": 2.362263932650988e-06, "loss": 0.0176, "step": 171820 }, { "epoch": 0.7169472006409026, "grad_norm": 0.3699547144212135, "learning_rate": 2.3622295619996513e-06, "loss": 0.0257, "step": 171825 }, { "epoch": 0.7169680633558929, "grad_norm": 0.5792339038736851, "learning_rate": 2.3621951928485372e-06, "loss": 0.0194, "step": 171830 }, { "epoch": 0.7169889260708832, "grad_norm": 0.5665735016350691, "learning_rate": 2.362160825197537e-06, "loss": 0.0254, "step": 171835 }, { "epoch": 0.7170097887858734, "grad_norm": 0.475712981634604, "learning_rate": 2.3621264590465412e-06, "loss": 0.0272, "step": 171840 }, { "epoch": 0.7170306515008638, "grad_norm": 1.2136624755150112, "learning_rate": 2.3620920943954404e-06, "loss": 0.0296, "step": 171845 }, { "epoch": 0.717051514215854, "grad_norm": 0.6919241949211371, "learning_rate": 2.362057731244126e-06, "loss": 0.0267, "step": 171850 }, { "epoch": 0.7170723769308442, "grad_norm": 0.7407692815290516, "learning_rate": 2.3620233695924885e-06, "loss": 0.0314, "step": 171855 }, { "epoch": 0.7170932396458346, "grad_norm": 0.7338308978042943, "learning_rate": 2.3619890094404193e-06, "loss": 0.0195, "step": 171860 }, { "epoch": 0.7171141023608248, "grad_norm": 0.7829361565969908, "learning_rate": 2.361954650787809e-06, "loss": 0.0218, "step": 171865 }, { "epoch": 0.7171349650758151, "grad_norm": 0.6401203096339698, "learning_rate": 2.361920293634549e-06, "loss": 0.0186, "step": 171870 }, { "epoch": 0.7171558277908053, "grad_norm": 1.1440879677102989, "learning_rate": 2.3618859379805286e-06, "loss": 0.0232, "step": 171875 }, { "epoch": 0.7171766905057957, "grad_norm": 0.6991805255473484, "learning_rate": 2.361851583825641e-06, "loss": 0.0237, "step": 171880 }, { "epoch": 0.7171975532207859, "grad_norm": 0.46299694935938973, "learning_rate": 2.361817231169776e-06, "loss": 0.0283, "step": 171885 }, { "epoch": 0.7172184159357762, "grad_norm": 0.4386834277967379, "learning_rate": 2.3617828800128246e-06, "loss": 0.0182, "step": 171890 }, { "epoch": 0.7172392786507665, "grad_norm": 0.706650379700715, "learning_rate": 2.361748530354679e-06, "loss": 0.0274, "step": 171895 }, { "epoch": 0.7172601413657568, "grad_norm": 0.5254100953824233, "learning_rate": 2.361714182195228e-06, "loss": 0.0135, "step": 171900 }, { "epoch": 0.717281004080747, "grad_norm": 0.377363797942789, "learning_rate": 2.3616798355343643e-06, "loss": 0.0197, "step": 171905 }, { "epoch": 0.7173018667957374, "grad_norm": 1.1341188423519348, "learning_rate": 2.361645490371978e-06, "loss": 0.0338, "step": 171910 }, { "epoch": 0.7173227295107276, "grad_norm": 1.0617267765717782, "learning_rate": 2.361611146707961e-06, "loss": 0.0344, "step": 171915 }, { "epoch": 0.7173435922257179, "grad_norm": 0.5493759704339402, "learning_rate": 2.3615768045422045e-06, "loss": 0.0233, "step": 171920 }, { "epoch": 0.7173644549407082, "grad_norm": 1.125780238952937, "learning_rate": 2.361542463874598e-06, "loss": 0.0242, "step": 171925 }, { "epoch": 0.7173853176556985, "grad_norm": 0.3674172215105339, "learning_rate": 2.3615081247050342e-06, "loss": 0.0164, "step": 171930 }, { "epoch": 0.7174061803706887, "grad_norm": 1.4582421326369648, "learning_rate": 2.3614737870334032e-06, "loss": 0.0331, "step": 171935 }, { "epoch": 0.717427043085679, "grad_norm": 0.35362638370444843, "learning_rate": 2.361439450859597e-06, "loss": 0.018, "step": 171940 }, { "epoch": 0.7174479058006693, "grad_norm": 0.6318473033603544, "learning_rate": 2.3614051161835058e-06, "loss": 0.0184, "step": 171945 }, { "epoch": 0.7174687685156595, "grad_norm": 0.6491224283504934, "learning_rate": 2.361370783005021e-06, "loss": 0.0254, "step": 171950 }, { "epoch": 0.7174896312306498, "grad_norm": 0.519616362321138, "learning_rate": 2.361336451324034e-06, "loss": 0.0183, "step": 171955 }, { "epoch": 0.7175104939456401, "grad_norm": 1.2586435465165697, "learning_rate": 2.3613021211404356e-06, "loss": 0.0167, "step": 171960 }, { "epoch": 0.7175313566606304, "grad_norm": 1.0326831593472732, "learning_rate": 2.361267792454118e-06, "loss": 0.0183, "step": 171965 }, { "epoch": 0.7175522193756206, "grad_norm": 0.9310230525468965, "learning_rate": 2.361233465264971e-06, "loss": 0.0198, "step": 171970 }, { "epoch": 0.717573082090611, "grad_norm": 0.5745430937373959, "learning_rate": 2.361199139572886e-06, "loss": 0.0169, "step": 171975 }, { "epoch": 0.7175939448056012, "grad_norm": 0.6828900488137206, "learning_rate": 2.361164815377755e-06, "loss": 0.0206, "step": 171980 }, { "epoch": 0.7176148075205915, "grad_norm": 0.7306038534309456, "learning_rate": 2.361130492679468e-06, "loss": 0.0162, "step": 171985 }, { "epoch": 0.7176356702355817, "grad_norm": 0.8854392112308357, "learning_rate": 2.361096171477917e-06, "loss": 0.0211, "step": 171990 }, { "epoch": 0.7176565329505721, "grad_norm": 0.4599353422442467, "learning_rate": 2.3610618517729937e-06, "loss": 0.0154, "step": 171995 }, { "epoch": 0.7176773956655623, "grad_norm": 0.4588183251370302, "learning_rate": 2.3610275335645887e-06, "loss": 0.0161, "step": 172000 }, { "epoch": 0.7176982583805526, "grad_norm": 0.3050505590337144, "learning_rate": 2.3609932168525928e-06, "loss": 0.0257, "step": 172005 }, { "epoch": 0.7177191210955429, "grad_norm": 0.5409900383941078, "learning_rate": 2.3609589016368974e-06, "loss": 0.0178, "step": 172010 }, { "epoch": 0.7177399838105332, "grad_norm": 0.7244334481174951, "learning_rate": 2.3609245879173948e-06, "loss": 0.0274, "step": 172015 }, { "epoch": 0.7177608465255234, "grad_norm": 0.8171642951897388, "learning_rate": 2.3608902756939755e-06, "loss": 0.0175, "step": 172020 }, { "epoch": 0.7177817092405138, "grad_norm": 0.4321495659783802, "learning_rate": 2.3608559649665307e-06, "loss": 0.014, "step": 172025 }, { "epoch": 0.717802571955504, "grad_norm": 0.9782274168308095, "learning_rate": 2.360821655734952e-06, "loss": 0.0228, "step": 172030 }, { "epoch": 0.7178234346704943, "grad_norm": 0.7023055868123812, "learning_rate": 2.3607873479991307e-06, "loss": 0.0206, "step": 172035 }, { "epoch": 0.7178442973854846, "grad_norm": 0.9516614658005816, "learning_rate": 2.3607530417589573e-06, "loss": 0.0172, "step": 172040 }, { "epoch": 0.7178651601004749, "grad_norm": 0.7603767038835596, "learning_rate": 2.3607187370143244e-06, "loss": 0.0296, "step": 172045 }, { "epoch": 0.7178860228154651, "grad_norm": 0.7149081319916442, "learning_rate": 2.360684433765123e-06, "loss": 0.0249, "step": 172050 }, { "epoch": 0.7179068855304553, "grad_norm": 0.9575706900601577, "learning_rate": 2.3606501320112436e-06, "loss": 0.0276, "step": 172055 }, { "epoch": 0.7179277482454457, "grad_norm": 0.5964953083457868, "learning_rate": 2.3606158317525784e-06, "loss": 0.022, "step": 172060 }, { "epoch": 0.7179486109604359, "grad_norm": 0.8987457869214339, "learning_rate": 2.3605815329890188e-06, "loss": 0.0197, "step": 172065 }, { "epoch": 0.7179694736754262, "grad_norm": 1.0746994852808165, "learning_rate": 2.360547235720456e-06, "loss": 0.0322, "step": 172070 }, { "epoch": 0.7179903363904165, "grad_norm": 0.47677347322335234, "learning_rate": 2.360512939946781e-06, "loss": 0.0204, "step": 172075 }, { "epoch": 0.7180111991054068, "grad_norm": 1.02718749263853, "learning_rate": 2.360478645667886e-06, "loss": 0.0232, "step": 172080 }, { "epoch": 0.718032061820397, "grad_norm": 0.6880851337720887, "learning_rate": 2.3604443528836614e-06, "loss": 0.0216, "step": 172085 }, { "epoch": 0.7180529245353874, "grad_norm": 0.5499292638009334, "learning_rate": 2.360410061593999e-06, "loss": 0.0351, "step": 172090 }, { "epoch": 0.7180737872503776, "grad_norm": 0.7204295761328013, "learning_rate": 2.3603757717987916e-06, "loss": 0.0234, "step": 172095 }, { "epoch": 0.7180946499653679, "grad_norm": 0.3328921544589619, "learning_rate": 2.3603414834979286e-06, "loss": 0.0173, "step": 172100 }, { "epoch": 0.7181155126803582, "grad_norm": 0.7727462352991163, "learning_rate": 2.360307196691303e-06, "loss": 0.0245, "step": 172105 }, { "epoch": 0.7181363753953485, "grad_norm": 0.9834482234173516, "learning_rate": 2.3602729113788048e-06, "loss": 0.0225, "step": 172110 }, { "epoch": 0.7181572381103387, "grad_norm": 0.752048183275835, "learning_rate": 2.360238627560327e-06, "loss": 0.0171, "step": 172115 }, { "epoch": 0.718178100825329, "grad_norm": 0.7669731659703938, "learning_rate": 2.3602043452357596e-06, "loss": 0.0202, "step": 172120 }, { "epoch": 0.7181989635403193, "grad_norm": 0.8332678657301981, "learning_rate": 2.360170064404996e-06, "loss": 0.026, "step": 172125 }, { "epoch": 0.7182198262553096, "grad_norm": 0.2460589314356064, "learning_rate": 2.360135785067926e-06, "loss": 0.0195, "step": 172130 }, { "epoch": 0.7182406889702998, "grad_norm": 1.8469740882712584, "learning_rate": 2.3601015072244423e-06, "loss": 0.0195, "step": 172135 }, { "epoch": 0.7182615516852902, "grad_norm": 0.5685066284699389, "learning_rate": 2.360067230874435e-06, "loss": 0.0163, "step": 172140 }, { "epoch": 0.7182824144002804, "grad_norm": 0.6213003149590858, "learning_rate": 2.3600329560177974e-06, "loss": 0.0229, "step": 172145 }, { "epoch": 0.7183032771152706, "grad_norm": 1.7752197769961753, "learning_rate": 2.35999868265442e-06, "loss": 0.0304, "step": 172150 }, { "epoch": 0.718324139830261, "grad_norm": 0.8387594429368086, "learning_rate": 2.3599644107841942e-06, "loss": 0.0224, "step": 172155 }, { "epoch": 0.7183450025452512, "grad_norm": 0.6315596485114754, "learning_rate": 2.3599301404070126e-06, "loss": 0.0217, "step": 172160 }, { "epoch": 0.7183658652602415, "grad_norm": 0.4281568604663015, "learning_rate": 2.359895871522766e-06, "loss": 0.0155, "step": 172165 }, { "epoch": 0.7183867279752317, "grad_norm": 0.8250339540950284, "learning_rate": 2.3598616041313465e-06, "loss": 0.0286, "step": 172170 }, { "epoch": 0.7184075906902221, "grad_norm": 0.3297814027946294, "learning_rate": 2.359827338232645e-06, "loss": 0.0191, "step": 172175 }, { "epoch": 0.7184284534052123, "grad_norm": 0.7619852076114649, "learning_rate": 2.3597930738265534e-06, "loss": 0.0163, "step": 172180 }, { "epoch": 0.7184493161202026, "grad_norm": 0.7432407362594361, "learning_rate": 2.359758810912964e-06, "loss": 0.0262, "step": 172185 }, { "epoch": 0.7184701788351929, "grad_norm": 0.6131163032356116, "learning_rate": 2.3597245494917675e-06, "loss": 0.02, "step": 172190 }, { "epoch": 0.7184910415501832, "grad_norm": 0.6677570506229749, "learning_rate": 2.3596902895628562e-06, "loss": 0.0194, "step": 172195 }, { "epoch": 0.7185119042651734, "grad_norm": 0.38183473566861037, "learning_rate": 2.3596560311261214e-06, "loss": 0.0245, "step": 172200 }, { "epoch": 0.7185327669801638, "grad_norm": 1.1958463933544101, "learning_rate": 2.3596217741814555e-06, "loss": 0.0214, "step": 172205 }, { "epoch": 0.718553629695154, "grad_norm": 0.7339745626380052, "learning_rate": 2.359587518728749e-06, "loss": 0.0234, "step": 172210 }, { "epoch": 0.7185744924101443, "grad_norm": 0.5325136576670872, "learning_rate": 2.3595532647678948e-06, "loss": 0.0204, "step": 172215 }, { "epoch": 0.7185953551251346, "grad_norm": 0.35848460151354333, "learning_rate": 2.3595190122987836e-06, "loss": 0.0164, "step": 172220 }, { "epoch": 0.7186162178401249, "grad_norm": 0.5803334440095925, "learning_rate": 2.359484761321308e-06, "loss": 0.0171, "step": 172225 }, { "epoch": 0.7186370805551151, "grad_norm": 0.8804087993842212, "learning_rate": 2.3594505118353593e-06, "loss": 0.0198, "step": 172230 }, { "epoch": 0.7186579432701053, "grad_norm": 0.8575069622387853, "learning_rate": 2.3594162638408293e-06, "loss": 0.0208, "step": 172235 }, { "epoch": 0.7186788059850957, "grad_norm": 0.5045971685880493, "learning_rate": 2.3593820173376097e-06, "loss": 0.0221, "step": 172240 }, { "epoch": 0.718699668700086, "grad_norm": 0.6088974140690343, "learning_rate": 2.359347772325592e-06, "loss": 0.017, "step": 172245 }, { "epoch": 0.7187205314150762, "grad_norm": 0.9382881796891751, "learning_rate": 2.3593135288046685e-06, "loss": 0.0244, "step": 172250 }, { "epoch": 0.7187413941300665, "grad_norm": 0.8380751513458021, "learning_rate": 2.3592792867747315e-06, "loss": 0.03, "step": 172255 }, { "epoch": 0.7187622568450568, "grad_norm": 0.3770339401440943, "learning_rate": 2.359245046235671e-06, "loss": 0.0215, "step": 172260 }, { "epoch": 0.718783119560047, "grad_norm": 0.6503061068446877, "learning_rate": 2.3592108071873808e-06, "loss": 0.0203, "step": 172265 }, { "epoch": 0.7188039822750374, "grad_norm": 0.7346149886592187, "learning_rate": 2.359176569629752e-06, "loss": 0.0251, "step": 172270 }, { "epoch": 0.7188248449900276, "grad_norm": 0.36728856179581343, "learning_rate": 2.3591423335626757e-06, "loss": 0.0203, "step": 172275 }, { "epoch": 0.7188457077050179, "grad_norm": 0.49657771267927336, "learning_rate": 2.3591080989860445e-06, "loss": 0.0235, "step": 172280 }, { "epoch": 0.7188665704200082, "grad_norm": 0.6541160664229738, "learning_rate": 2.35907386589975e-06, "loss": 0.0167, "step": 172285 }, { "epoch": 0.7188874331349985, "grad_norm": 0.47595798152300883, "learning_rate": 2.359039634303685e-06, "loss": 0.0228, "step": 172290 }, { "epoch": 0.7189082958499887, "grad_norm": 0.7150425994171323, "learning_rate": 2.3590054041977393e-06, "loss": 0.0226, "step": 172295 }, { "epoch": 0.718929158564979, "grad_norm": 0.34555245165711646, "learning_rate": 2.358971175581807e-06, "loss": 0.016, "step": 172300 }, { "epoch": 0.7189500212799693, "grad_norm": 0.5356981814077006, "learning_rate": 2.3589369484557784e-06, "loss": 0.0203, "step": 172305 }, { "epoch": 0.7189708839949596, "grad_norm": 0.7837747623195791, "learning_rate": 2.3589027228195466e-06, "loss": 0.0251, "step": 172310 }, { "epoch": 0.7189917467099498, "grad_norm": 0.4630905153150848, "learning_rate": 2.3588684986730027e-06, "loss": 0.0189, "step": 172315 }, { "epoch": 0.7190126094249402, "grad_norm": 1.0463630023376118, "learning_rate": 2.358834276016039e-06, "loss": 0.0197, "step": 172320 }, { "epoch": 0.7190334721399304, "grad_norm": 1.2511033326156367, "learning_rate": 2.3588000548485476e-06, "loss": 0.023, "step": 172325 }, { "epoch": 0.7190543348549207, "grad_norm": 0.7962351029928829, "learning_rate": 2.3587658351704205e-06, "loss": 0.0242, "step": 172330 }, { "epoch": 0.719075197569911, "grad_norm": 0.4680402932091877, "learning_rate": 2.3587316169815484e-06, "loss": 0.0194, "step": 172335 }, { "epoch": 0.7190960602849013, "grad_norm": 0.41406125388512266, "learning_rate": 2.3586974002818255e-06, "loss": 0.0217, "step": 172340 }, { "epoch": 0.7191169229998915, "grad_norm": 0.9886237980011567, "learning_rate": 2.3586631850711416e-06, "loss": 0.0224, "step": 172345 }, { "epoch": 0.7191377857148817, "grad_norm": 0.5830737469546285, "learning_rate": 2.3586289713493904e-06, "loss": 0.0194, "step": 172350 }, { "epoch": 0.7191586484298721, "grad_norm": 0.5646418211380505, "learning_rate": 2.358594759116463e-06, "loss": 0.014, "step": 172355 }, { "epoch": 0.7191795111448623, "grad_norm": 0.4226265466603868, "learning_rate": 2.3585605483722517e-06, "loss": 0.0195, "step": 172360 }, { "epoch": 0.7192003738598526, "grad_norm": 0.8265207146940365, "learning_rate": 2.358526339116648e-06, "loss": 0.0239, "step": 172365 }, { "epoch": 0.7192212365748429, "grad_norm": 0.6231837812945134, "learning_rate": 2.358492131349545e-06, "loss": 0.0194, "step": 172370 }, { "epoch": 0.7192420992898332, "grad_norm": 0.9450064591334172, "learning_rate": 2.358457925070834e-06, "loss": 0.0228, "step": 172375 }, { "epoch": 0.7192629620048234, "grad_norm": 0.2733856106054882, "learning_rate": 2.358423720280407e-06, "loss": 0.0187, "step": 172380 }, { "epoch": 0.7192838247198138, "grad_norm": 0.6137819095741893, "learning_rate": 2.358389516978157e-06, "loss": 0.0191, "step": 172385 }, { "epoch": 0.719304687434804, "grad_norm": 0.6673986106810974, "learning_rate": 2.358355315163975e-06, "loss": 0.0177, "step": 172390 }, { "epoch": 0.7193255501497943, "grad_norm": 1.3477079647118069, "learning_rate": 2.3583211148377534e-06, "loss": 0.0333, "step": 172395 }, { "epoch": 0.7193464128647846, "grad_norm": 0.5610235681155802, "learning_rate": 2.358286915999384e-06, "loss": 0.0187, "step": 172400 }, { "epoch": 0.7193672755797749, "grad_norm": 0.4834434715115013, "learning_rate": 2.35825271864876e-06, "loss": 0.0168, "step": 172405 }, { "epoch": 0.7193881382947651, "grad_norm": 0.7008279342159417, "learning_rate": 2.3582185227857736e-06, "loss": 0.0228, "step": 172410 }, { "epoch": 0.7194090010097554, "grad_norm": 0.6204826727155118, "learning_rate": 2.358184328410315e-06, "loss": 0.0236, "step": 172415 }, { "epoch": 0.7194298637247457, "grad_norm": 0.5440971578591453, "learning_rate": 2.358150135522278e-06, "loss": 0.0156, "step": 172420 }, { "epoch": 0.719450726439736, "grad_norm": 0.6590376341224453, "learning_rate": 2.3581159441215545e-06, "loss": 0.0263, "step": 172425 }, { "epoch": 0.7194715891547262, "grad_norm": 0.727548165966972, "learning_rate": 2.358081754208036e-06, "loss": 0.0184, "step": 172430 }, { "epoch": 0.7194924518697166, "grad_norm": 0.5979258464267333, "learning_rate": 2.358047565781616e-06, "loss": 0.023, "step": 172435 }, { "epoch": 0.7195133145847068, "grad_norm": 0.5723518758233773, "learning_rate": 2.358013378842186e-06, "loss": 0.0185, "step": 172440 }, { "epoch": 0.719534177299697, "grad_norm": 0.821154425395339, "learning_rate": 2.3579791933896376e-06, "loss": 0.0233, "step": 172445 }, { "epoch": 0.7195550400146874, "grad_norm": 0.5979024740750934, "learning_rate": 2.3579450094238633e-06, "loss": 0.0249, "step": 172450 }, { "epoch": 0.7195759027296776, "grad_norm": 1.0034479115927986, "learning_rate": 2.3579108269447564e-06, "loss": 0.028, "step": 172455 }, { "epoch": 0.7195967654446679, "grad_norm": 2.6797072926938106, "learning_rate": 2.357876645952208e-06, "loss": 0.0214, "step": 172460 }, { "epoch": 0.7196176281596582, "grad_norm": 0.5857345258786196, "learning_rate": 2.3578424664461104e-06, "loss": 0.0224, "step": 172465 }, { "epoch": 0.7196384908746485, "grad_norm": 0.42096489118108477, "learning_rate": 2.3578082884263563e-06, "loss": 0.0209, "step": 172470 }, { "epoch": 0.7196593535896387, "grad_norm": 1.3726017104887493, "learning_rate": 2.357774111892838e-06, "loss": 0.0274, "step": 172475 }, { "epoch": 0.719680216304629, "grad_norm": 1.181073618176814, "learning_rate": 2.3577399368454476e-06, "loss": 0.0226, "step": 172480 }, { "epoch": 0.7197010790196193, "grad_norm": 1.1643640009134792, "learning_rate": 2.3577057632840776e-06, "loss": 0.0235, "step": 172485 }, { "epoch": 0.7197219417346096, "grad_norm": 0.6129852879887115, "learning_rate": 2.3576715912086194e-06, "loss": 0.0281, "step": 172490 }, { "epoch": 0.7197428044495998, "grad_norm": 0.708282668422619, "learning_rate": 2.357637420618967e-06, "loss": 0.0197, "step": 172495 }, { "epoch": 0.7197636671645902, "grad_norm": 0.36564892819356787, "learning_rate": 2.3576032515150114e-06, "loss": 0.0205, "step": 172500 }, { "epoch": 0.7197845298795804, "grad_norm": 0.5054474137460534, "learning_rate": 2.3575690838966452e-06, "loss": 0.026, "step": 172505 }, { "epoch": 0.7198053925945707, "grad_norm": 0.5259778929102288, "learning_rate": 2.3575349177637618e-06, "loss": 0.0158, "step": 172510 }, { "epoch": 0.719826255309561, "grad_norm": 0.5264323151440752, "learning_rate": 2.357500753116252e-06, "loss": 0.0234, "step": 172515 }, { "epoch": 0.7198471180245513, "grad_norm": 1.153096576698446, "learning_rate": 2.357466589954008e-06, "loss": 0.0236, "step": 172520 }, { "epoch": 0.7198679807395415, "grad_norm": 0.5979783241327038, "learning_rate": 2.3574324282769234e-06, "loss": 0.02, "step": 172525 }, { "epoch": 0.7198888434545317, "grad_norm": 2.0987599296341313, "learning_rate": 2.357398268084891e-06, "loss": 0.0197, "step": 172530 }, { "epoch": 0.7199097061695221, "grad_norm": 0.4125150317375601, "learning_rate": 2.357364109377802e-06, "loss": 0.016, "step": 172535 }, { "epoch": 0.7199305688845123, "grad_norm": 0.7836652121857797, "learning_rate": 2.357329952155549e-06, "loss": 0.0204, "step": 172540 }, { "epoch": 0.7199514315995026, "grad_norm": 0.6310804764973392, "learning_rate": 2.357295796418025e-06, "loss": 0.0231, "step": 172545 }, { "epoch": 0.7199722943144929, "grad_norm": 1.152812758387341, "learning_rate": 2.3572616421651217e-06, "loss": 0.0221, "step": 172550 }, { "epoch": 0.7199931570294832, "grad_norm": 0.9667318159754362, "learning_rate": 2.357227489396732e-06, "loss": 0.0282, "step": 172555 }, { "epoch": 0.7200140197444734, "grad_norm": 1.2166898943646536, "learning_rate": 2.3571933381127492e-06, "loss": 0.0211, "step": 172560 }, { "epoch": 0.7200348824594638, "grad_norm": 0.5136088013191139, "learning_rate": 2.3571591883130636e-06, "loss": 0.0195, "step": 172565 }, { "epoch": 0.720055745174454, "grad_norm": 0.8816533005458836, "learning_rate": 2.3571250399975695e-06, "loss": 0.0199, "step": 172570 }, { "epoch": 0.7200766078894443, "grad_norm": 0.5294265640303882, "learning_rate": 2.3570908931661592e-06, "loss": 0.0324, "step": 172575 }, { "epoch": 0.7200974706044346, "grad_norm": 0.7116613752350623, "learning_rate": 2.357056747818725e-06, "loss": 0.0178, "step": 172580 }, { "epoch": 0.7201183333194249, "grad_norm": 0.9279208834297845, "learning_rate": 2.357022603955159e-06, "loss": 0.0266, "step": 172585 }, { "epoch": 0.7201391960344151, "grad_norm": 0.9025081291349916, "learning_rate": 2.356988461575353e-06, "loss": 0.0187, "step": 172590 }, { "epoch": 0.7201600587494054, "grad_norm": 0.7680761903334732, "learning_rate": 2.356954320679202e-06, "loss": 0.0235, "step": 172595 }, { "epoch": 0.7201809214643957, "grad_norm": 0.6037544212475574, "learning_rate": 2.3569201812665965e-06, "loss": 0.018, "step": 172600 }, { "epoch": 0.720201784179386, "grad_norm": 0.5807912378970144, "learning_rate": 2.35688604333743e-06, "loss": 0.0231, "step": 172605 }, { "epoch": 0.7202226468943762, "grad_norm": 1.1339142855551139, "learning_rate": 2.3568519068915946e-06, "loss": 0.0208, "step": 172610 }, { "epoch": 0.7202435096093666, "grad_norm": 0.5221609057916772, "learning_rate": 2.356817771928983e-06, "loss": 0.0229, "step": 172615 }, { "epoch": 0.7202643723243568, "grad_norm": 0.43491308928657046, "learning_rate": 2.3567836384494875e-06, "loss": 0.0189, "step": 172620 }, { "epoch": 0.720285235039347, "grad_norm": 0.8219629363998807, "learning_rate": 2.3567495064530017e-06, "loss": 0.0226, "step": 172625 }, { "epoch": 0.7203060977543374, "grad_norm": 0.6253311736786399, "learning_rate": 2.356715375939417e-06, "loss": 0.0212, "step": 172630 }, { "epoch": 0.7203269604693276, "grad_norm": 0.9111499514595714, "learning_rate": 2.356681246908627e-06, "loss": 0.0255, "step": 172635 }, { "epoch": 0.7203478231843179, "grad_norm": 0.574805751491867, "learning_rate": 2.356647119360524e-06, "loss": 0.0264, "step": 172640 }, { "epoch": 0.7203686858993082, "grad_norm": 0.6146519033009034, "learning_rate": 2.356612993295e-06, "loss": 0.0175, "step": 172645 }, { "epoch": 0.7203895486142985, "grad_norm": 0.8140069169548316, "learning_rate": 2.356578868711948e-06, "loss": 0.0235, "step": 172650 }, { "epoch": 0.7204104113292887, "grad_norm": 0.9271023738204621, "learning_rate": 2.3565447456112615e-06, "loss": 0.0192, "step": 172655 }, { "epoch": 0.720431274044279, "grad_norm": 0.7799389445897856, "learning_rate": 2.356510623992832e-06, "loss": 0.0287, "step": 172660 }, { "epoch": 0.7204521367592693, "grad_norm": 0.7308204630566738, "learning_rate": 2.3564765038565534e-06, "loss": 0.0142, "step": 172665 }, { "epoch": 0.7204729994742596, "grad_norm": 0.7961084746512758, "learning_rate": 2.3564423852023175e-06, "loss": 0.0245, "step": 172670 }, { "epoch": 0.7204938621892498, "grad_norm": 0.6739131018192835, "learning_rate": 2.356408268030017e-06, "loss": 0.0203, "step": 172675 }, { "epoch": 0.7205147249042402, "grad_norm": 0.5771468702832694, "learning_rate": 2.3563741523395453e-06, "loss": 0.0249, "step": 172680 }, { "epoch": 0.7205355876192304, "grad_norm": 2.3117912607799567, "learning_rate": 2.3563400381307946e-06, "loss": 0.0253, "step": 172685 }, { "epoch": 0.7205564503342207, "grad_norm": 0.3965244536921582, "learning_rate": 2.3563059254036574e-06, "loss": 0.02, "step": 172690 }, { "epoch": 0.720577313049211, "grad_norm": 0.7721078938795883, "learning_rate": 2.356271814158027e-06, "loss": 0.0136, "step": 172695 }, { "epoch": 0.7205981757642013, "grad_norm": 0.4394354620429613, "learning_rate": 2.3562377043937966e-06, "loss": 0.02, "step": 172700 }, { "epoch": 0.7206190384791915, "grad_norm": 0.5650836326293177, "learning_rate": 2.3562035961108575e-06, "loss": 0.015, "step": 172705 }, { "epoch": 0.7206399011941818, "grad_norm": 0.3838258190863885, "learning_rate": 2.356169489309104e-06, "loss": 0.0217, "step": 172710 }, { "epoch": 0.7206607639091721, "grad_norm": 1.17850432904629, "learning_rate": 2.356135383988428e-06, "loss": 0.0283, "step": 172715 }, { "epoch": 0.7206816266241624, "grad_norm": 0.7319770323996412, "learning_rate": 2.3561012801487222e-06, "loss": 0.0284, "step": 172720 }, { "epoch": 0.7207024893391526, "grad_norm": 0.4289383044965818, "learning_rate": 2.3560671777898807e-06, "loss": 0.0211, "step": 172725 }, { "epoch": 0.720723352054143, "grad_norm": 0.48419113770679817, "learning_rate": 2.3560330769117946e-06, "loss": 0.0228, "step": 172730 }, { "epoch": 0.7207442147691332, "grad_norm": 3.1210298126530507, "learning_rate": 2.3559989775143578e-06, "loss": 0.0212, "step": 172735 }, { "epoch": 0.7207650774841234, "grad_norm": 0.7084675831647873, "learning_rate": 2.3559648795974633e-06, "loss": 0.023, "step": 172740 }, { "epoch": 0.7207859401991138, "grad_norm": 0.7289282570884715, "learning_rate": 2.3559307831610033e-06, "loss": 0.0246, "step": 172745 }, { "epoch": 0.720806802914104, "grad_norm": 0.376243105744508, "learning_rate": 2.355896688204871e-06, "loss": 0.0192, "step": 172750 }, { "epoch": 0.7208276656290943, "grad_norm": 1.281342450083273, "learning_rate": 2.355862594728959e-06, "loss": 0.0258, "step": 172755 }, { "epoch": 0.7208485283440846, "grad_norm": 0.6525357512490081, "learning_rate": 2.3558285027331606e-06, "loss": 0.019, "step": 172760 }, { "epoch": 0.7208693910590749, "grad_norm": 0.32596916051442865, "learning_rate": 2.355794412217369e-06, "loss": 0.0219, "step": 172765 }, { "epoch": 0.7208902537740651, "grad_norm": 1.422681072420973, "learning_rate": 2.355760323181476e-06, "loss": 0.0322, "step": 172770 }, { "epoch": 0.7209111164890554, "grad_norm": 0.677295876547138, "learning_rate": 2.3557262356253756e-06, "loss": 0.0234, "step": 172775 }, { "epoch": 0.7209319792040457, "grad_norm": 0.5761368450812864, "learning_rate": 2.35569214954896e-06, "loss": 0.0251, "step": 172780 }, { "epoch": 0.720952841919036, "grad_norm": 0.5843532174417929, "learning_rate": 2.355658064952123e-06, "loss": 0.0161, "step": 172785 }, { "epoch": 0.7209737046340262, "grad_norm": 0.356984379622813, "learning_rate": 2.3556239818347565e-06, "loss": 0.0148, "step": 172790 }, { "epoch": 0.7209945673490166, "grad_norm": 0.5704520531211181, "learning_rate": 2.3555899001967545e-06, "loss": 0.0302, "step": 172795 }, { "epoch": 0.7210154300640068, "grad_norm": 0.6333625552034242, "learning_rate": 2.3555558200380095e-06, "loss": 0.021, "step": 172800 }, { "epoch": 0.7210362927789971, "grad_norm": 0.715995655317117, "learning_rate": 2.355521741358415e-06, "loss": 0.0281, "step": 172805 }, { "epoch": 0.7210571554939874, "grad_norm": 0.8897220102186697, "learning_rate": 2.3554876641578622e-06, "loss": 0.0235, "step": 172810 }, { "epoch": 0.7210780182089777, "grad_norm": 0.4408815308896483, "learning_rate": 2.355453588436246e-06, "loss": 0.0215, "step": 172815 }, { "epoch": 0.7210988809239679, "grad_norm": 0.5426636638570927, "learning_rate": 2.355419514193459e-06, "loss": 0.0166, "step": 172820 }, { "epoch": 0.7211197436389583, "grad_norm": 0.5170517870345375, "learning_rate": 2.355385441429394e-06, "loss": 0.0187, "step": 172825 }, { "epoch": 0.7211406063539485, "grad_norm": 0.45062035062342376, "learning_rate": 2.3553513701439447e-06, "loss": 0.0176, "step": 172830 }, { "epoch": 0.7211614690689387, "grad_norm": 1.0140474631756615, "learning_rate": 2.3553173003370033e-06, "loss": 0.0252, "step": 172835 }, { "epoch": 0.721182331783929, "grad_norm": 0.5578085099023026, "learning_rate": 2.355283232008463e-06, "loss": 0.0192, "step": 172840 }, { "epoch": 0.7212031944989193, "grad_norm": 2.1785600897856345, "learning_rate": 2.355249165158217e-06, "loss": 0.0241, "step": 172845 }, { "epoch": 0.7212240572139096, "grad_norm": 0.4357868978452215, "learning_rate": 2.3552150997861585e-06, "loss": 0.0174, "step": 172850 }, { "epoch": 0.7212449199288998, "grad_norm": 0.5246255816343114, "learning_rate": 2.355181035892181e-06, "loss": 0.0243, "step": 172855 }, { "epoch": 0.7212657826438902, "grad_norm": 0.5411808507780351, "learning_rate": 2.355146973476177e-06, "loss": 0.0145, "step": 172860 }, { "epoch": 0.7212866453588804, "grad_norm": 0.4509626825662459, "learning_rate": 2.3551129125380395e-06, "loss": 0.0218, "step": 172865 }, { "epoch": 0.7213075080738707, "grad_norm": 0.656275518968936, "learning_rate": 2.3550788530776623e-06, "loss": 0.0283, "step": 172870 }, { "epoch": 0.721328370788861, "grad_norm": 0.606209191474249, "learning_rate": 2.3550447950949374e-06, "loss": 0.0241, "step": 172875 }, { "epoch": 0.7213492335038513, "grad_norm": 0.8964000853253888, "learning_rate": 2.3550107385897595e-06, "loss": 0.0227, "step": 172880 }, { "epoch": 0.7213700962188415, "grad_norm": 0.9503319642616389, "learning_rate": 2.354976683562021e-06, "loss": 0.0149, "step": 172885 }, { "epoch": 0.7213909589338318, "grad_norm": 0.7065621940489056, "learning_rate": 2.3549426300116153e-06, "loss": 0.0196, "step": 172890 }, { "epoch": 0.7214118216488221, "grad_norm": 0.7326696843257613, "learning_rate": 2.3549085779384348e-06, "loss": 0.0243, "step": 172895 }, { "epoch": 0.7214326843638124, "grad_norm": 0.5466045984689737, "learning_rate": 2.354874527342374e-06, "loss": 0.022, "step": 172900 }, { "epoch": 0.7214535470788026, "grad_norm": 0.7069175593599543, "learning_rate": 2.354840478223325e-06, "loss": 0.0234, "step": 172905 }, { "epoch": 0.721474409793793, "grad_norm": 0.38414664364703527, "learning_rate": 2.3548064305811803e-06, "loss": 0.0261, "step": 172910 }, { "epoch": 0.7214952725087832, "grad_norm": 1.1304363829893758, "learning_rate": 2.3547723844158357e-06, "loss": 0.018, "step": 172915 }, { "epoch": 0.7215161352237734, "grad_norm": 0.5339118848429693, "learning_rate": 2.354738339727183e-06, "loss": 0.0357, "step": 172920 }, { "epoch": 0.7215369979387638, "grad_norm": 0.8835608503280051, "learning_rate": 2.354704296515115e-06, "loss": 0.0234, "step": 172925 }, { "epoch": 0.721557860653754, "grad_norm": 0.7531072073568321, "learning_rate": 2.354670254779525e-06, "loss": 0.014, "step": 172930 }, { "epoch": 0.7215787233687443, "grad_norm": 0.9410754812345209, "learning_rate": 2.354636214520307e-06, "loss": 0.0168, "step": 172935 }, { "epoch": 0.7215995860837346, "grad_norm": 0.7892604598381636, "learning_rate": 2.3546021757373547e-06, "loss": 0.0272, "step": 172940 }, { "epoch": 0.7216204487987249, "grad_norm": 0.3249792775762555, "learning_rate": 2.3545681384305597e-06, "loss": 0.0191, "step": 172945 }, { "epoch": 0.7216413115137151, "grad_norm": 0.7371677294062036, "learning_rate": 2.3545341025998165e-06, "loss": 0.0233, "step": 172950 }, { "epoch": 0.7216621742287054, "grad_norm": 0.4497675419913357, "learning_rate": 2.354500068245019e-06, "loss": 0.023, "step": 172955 }, { "epoch": 0.7216830369436957, "grad_norm": 0.4657365740697263, "learning_rate": 2.354466035366059e-06, "loss": 0.0247, "step": 172960 }, { "epoch": 0.721703899658686, "grad_norm": 0.7283559508149965, "learning_rate": 2.354432003962831e-06, "loss": 0.0245, "step": 172965 }, { "epoch": 0.7217247623736762, "grad_norm": 0.8500076737178516, "learning_rate": 2.3543979740352267e-06, "loss": 0.023, "step": 172970 }, { "epoch": 0.7217456250886666, "grad_norm": 0.7927175072610172, "learning_rate": 2.354363945583142e-06, "loss": 0.0278, "step": 172975 }, { "epoch": 0.7217664878036568, "grad_norm": 0.2811287702092186, "learning_rate": 2.3543299186064688e-06, "loss": 0.0137, "step": 172980 }, { "epoch": 0.7217873505186471, "grad_norm": 0.30060590266740184, "learning_rate": 2.3542958931051e-06, "loss": 0.0186, "step": 172985 }, { "epoch": 0.7218082132336374, "grad_norm": 0.49935945755308897, "learning_rate": 2.3542618690789305e-06, "loss": 0.0193, "step": 172990 }, { "epoch": 0.7218290759486277, "grad_norm": 0.8854136953077107, "learning_rate": 2.3542278465278526e-06, "loss": 0.0265, "step": 172995 }, { "epoch": 0.7218499386636179, "grad_norm": 0.4939580057422087, "learning_rate": 2.3541938254517597e-06, "loss": 0.0198, "step": 173000 }, { "epoch": 0.7218708013786082, "grad_norm": 0.9802418759446676, "learning_rate": 2.3541598058505456e-06, "loss": 0.0298, "step": 173005 }, { "epoch": 0.7218916640935985, "grad_norm": 1.0762943631419275, "learning_rate": 2.354125787724104e-06, "loss": 0.0213, "step": 173010 }, { "epoch": 0.7219125268085888, "grad_norm": 0.5136290641695838, "learning_rate": 2.3540917710723272e-06, "loss": 0.0317, "step": 173015 }, { "epoch": 0.721933389523579, "grad_norm": 0.7906695209034076, "learning_rate": 2.35405775589511e-06, "loss": 0.0204, "step": 173020 }, { "epoch": 0.7219542522385693, "grad_norm": 0.679795377419401, "learning_rate": 2.3540237421923453e-06, "loss": 0.0208, "step": 173025 }, { "epoch": 0.7219751149535596, "grad_norm": 0.43144194812034237, "learning_rate": 2.353989729963926e-06, "loss": 0.0227, "step": 173030 }, { "epoch": 0.7219959776685498, "grad_norm": 0.4488019338840519, "learning_rate": 2.353955719209747e-06, "loss": 0.0216, "step": 173035 }, { "epoch": 0.7220168403835402, "grad_norm": 0.6661947767696019, "learning_rate": 2.353921709929701e-06, "loss": 0.0286, "step": 173040 }, { "epoch": 0.7220377030985304, "grad_norm": 0.64202967222225, "learning_rate": 2.3538877021236813e-06, "loss": 0.0247, "step": 173045 }, { "epoch": 0.7220585658135207, "grad_norm": 0.46827301113532627, "learning_rate": 2.353853695791581e-06, "loss": 0.0117, "step": 173050 }, { "epoch": 0.722079428528511, "grad_norm": 0.5990622370419156, "learning_rate": 2.3538196909332947e-06, "loss": 0.0185, "step": 173055 }, { "epoch": 0.7221002912435013, "grad_norm": 0.844336008136408, "learning_rate": 2.353785687548716e-06, "loss": 0.0253, "step": 173060 }, { "epoch": 0.7221211539584915, "grad_norm": 1.2288057480299117, "learning_rate": 2.3537516856377372e-06, "loss": 0.018, "step": 173065 }, { "epoch": 0.7221420166734818, "grad_norm": 0.7662168078026174, "learning_rate": 2.353717685200253e-06, "loss": 0.0201, "step": 173070 }, { "epoch": 0.7221628793884721, "grad_norm": 1.0619833001414254, "learning_rate": 2.3536836862361574e-06, "loss": 0.0249, "step": 173075 }, { "epoch": 0.7221837421034624, "grad_norm": 0.5870808687087774, "learning_rate": 2.3536496887453424e-06, "loss": 0.0189, "step": 173080 }, { "epoch": 0.7222046048184526, "grad_norm": 0.34300547921926167, "learning_rate": 2.353615692727702e-06, "loss": 0.0173, "step": 173085 }, { "epoch": 0.722225467533443, "grad_norm": 0.5725295551768498, "learning_rate": 2.3535816981831307e-06, "loss": 0.027, "step": 173090 }, { "epoch": 0.7222463302484332, "grad_norm": 0.21298994784938763, "learning_rate": 2.3535477051115218e-06, "loss": 0.0244, "step": 173095 }, { "epoch": 0.7222671929634235, "grad_norm": 0.7341686159295329, "learning_rate": 2.353513713512769e-06, "loss": 0.0295, "step": 173100 }, { "epoch": 0.7222880556784138, "grad_norm": 0.6988615150925922, "learning_rate": 2.353479723386765e-06, "loss": 0.0287, "step": 173105 }, { "epoch": 0.722308918393404, "grad_norm": 0.9768353277128424, "learning_rate": 2.3534457347334046e-06, "loss": 0.0195, "step": 173110 }, { "epoch": 0.7223297811083943, "grad_norm": 0.6361853690965504, "learning_rate": 2.3534117475525807e-06, "loss": 0.025, "step": 173115 }, { "epoch": 0.7223506438233847, "grad_norm": 0.9783517849737647, "learning_rate": 2.3533777618441877e-06, "loss": 0.0164, "step": 173120 }, { "epoch": 0.7223715065383749, "grad_norm": 0.39316996158772366, "learning_rate": 2.3533437776081187e-06, "loss": 0.0168, "step": 173125 }, { "epoch": 0.7223923692533651, "grad_norm": 0.7831423889782462, "learning_rate": 2.3533097948442675e-06, "loss": 0.0194, "step": 173130 }, { "epoch": 0.7224132319683554, "grad_norm": 0.7011643930711151, "learning_rate": 2.3532758135525274e-06, "loss": 0.0246, "step": 173135 }, { "epoch": 0.7224340946833457, "grad_norm": 0.5149902840016944, "learning_rate": 2.353241833732794e-06, "loss": 0.0174, "step": 173140 }, { "epoch": 0.722454957398336, "grad_norm": 1.05008134985793, "learning_rate": 2.3532078553849583e-06, "loss": 0.0201, "step": 173145 }, { "epoch": 0.7224758201133262, "grad_norm": 0.7997106559676707, "learning_rate": 2.353173878508916e-06, "loss": 0.0209, "step": 173150 }, { "epoch": 0.7224966828283166, "grad_norm": 0.7143685392860348, "learning_rate": 2.3531399031045603e-06, "loss": 0.0266, "step": 173155 }, { "epoch": 0.7225175455433068, "grad_norm": 1.232294549435017, "learning_rate": 2.353105929171785e-06, "loss": 0.0254, "step": 173160 }, { "epoch": 0.7225384082582971, "grad_norm": 0.7059440307566887, "learning_rate": 2.353071956710483e-06, "loss": 0.02, "step": 173165 }, { "epoch": 0.7225592709732874, "grad_norm": 0.5993010338015701, "learning_rate": 2.3530379857205495e-06, "loss": 0.022, "step": 173170 }, { "epoch": 0.7225801336882777, "grad_norm": 0.452832038074531, "learning_rate": 2.3530040162018775e-06, "loss": 0.0236, "step": 173175 }, { "epoch": 0.7226009964032679, "grad_norm": 0.8531291345235095, "learning_rate": 2.352970048154361e-06, "loss": 0.0189, "step": 173180 }, { "epoch": 0.7226218591182582, "grad_norm": 1.350372831875078, "learning_rate": 2.3529360815778936e-06, "loss": 0.0226, "step": 173185 }, { "epoch": 0.7226427218332485, "grad_norm": 0.5444188034130759, "learning_rate": 2.352902116472369e-06, "loss": 0.0159, "step": 173190 }, { "epoch": 0.7226635845482388, "grad_norm": 0.4767676061343403, "learning_rate": 2.352868152837682e-06, "loss": 0.0238, "step": 173195 }, { "epoch": 0.722684447263229, "grad_norm": 0.352979162695552, "learning_rate": 2.3528341906737256e-06, "loss": 0.0156, "step": 173200 }, { "epoch": 0.7227053099782194, "grad_norm": 0.7753275437627875, "learning_rate": 2.3528002299803937e-06, "loss": 0.0317, "step": 173205 }, { "epoch": 0.7227261726932096, "grad_norm": 0.3732969500798367, "learning_rate": 2.3527662707575803e-06, "loss": 0.0213, "step": 173210 }, { "epoch": 0.7227470354081998, "grad_norm": 0.49362358228943687, "learning_rate": 2.352732313005179e-06, "loss": 0.0185, "step": 173215 }, { "epoch": 0.7227678981231902, "grad_norm": 1.2685240499253037, "learning_rate": 2.3526983567230844e-06, "loss": 0.0318, "step": 173220 }, { "epoch": 0.7227887608381804, "grad_norm": 0.5811150619380736, "learning_rate": 2.35266440191119e-06, "loss": 0.0186, "step": 173225 }, { "epoch": 0.7228096235531707, "grad_norm": 0.5690538999543346, "learning_rate": 2.352630448569389e-06, "loss": 0.0264, "step": 173230 }, { "epoch": 0.722830486268161, "grad_norm": 0.3721445969434872, "learning_rate": 2.352596496697577e-06, "loss": 0.0214, "step": 173235 }, { "epoch": 0.7228513489831513, "grad_norm": 0.7515595412569155, "learning_rate": 2.3525625462956463e-06, "loss": 0.0281, "step": 173240 }, { "epoch": 0.7228722116981415, "grad_norm": 0.32120571136330583, "learning_rate": 2.3525285973634917e-06, "loss": 0.0173, "step": 173245 }, { "epoch": 0.7228930744131318, "grad_norm": 0.4319497218196271, "learning_rate": 2.3524946499010066e-06, "loss": 0.0271, "step": 173250 }, { "epoch": 0.7229139371281221, "grad_norm": 0.9118864623652129, "learning_rate": 2.3524607039080856e-06, "loss": 0.0272, "step": 173255 }, { "epoch": 0.7229347998431124, "grad_norm": 1.0336898922788347, "learning_rate": 2.352426759384622e-06, "loss": 0.0311, "step": 173260 }, { "epoch": 0.7229556625581026, "grad_norm": 0.9885382424299977, "learning_rate": 2.352392816330511e-06, "loss": 0.0441, "step": 173265 }, { "epoch": 0.722976525273093, "grad_norm": 0.48580883076055925, "learning_rate": 2.3523588747456446e-06, "loss": 0.0193, "step": 173270 }, { "epoch": 0.7229973879880832, "grad_norm": 1.6302237539574402, "learning_rate": 2.352324934629919e-06, "loss": 0.0256, "step": 173275 }, { "epoch": 0.7230182507030735, "grad_norm": 0.9463398215515294, "learning_rate": 2.352290995983227e-06, "loss": 0.0283, "step": 173280 }, { "epoch": 0.7230391134180638, "grad_norm": 0.432812244172344, "learning_rate": 2.3522570588054625e-06, "loss": 0.0184, "step": 173285 }, { "epoch": 0.7230599761330541, "grad_norm": 0.6851418294346215, "learning_rate": 2.3522231230965196e-06, "loss": 0.0208, "step": 173290 }, { "epoch": 0.7230808388480443, "grad_norm": 0.5109825011795098, "learning_rate": 2.3521891888562926e-06, "loss": 0.0216, "step": 173295 }, { "epoch": 0.7231017015630347, "grad_norm": 0.9580161546178891, "learning_rate": 2.352155256084676e-06, "loss": 0.0188, "step": 173300 }, { "epoch": 0.7231225642780249, "grad_norm": 0.5036879524411483, "learning_rate": 2.3521213247815634e-06, "loss": 0.0245, "step": 173305 }, { "epoch": 0.7231434269930151, "grad_norm": 0.6327498327327366, "learning_rate": 2.3520873949468483e-06, "loss": 0.028, "step": 173310 }, { "epoch": 0.7231642897080054, "grad_norm": 0.5100374273710754, "learning_rate": 2.3520534665804262e-06, "loss": 0.0193, "step": 173315 }, { "epoch": 0.7231851524229957, "grad_norm": 0.7581427399862561, "learning_rate": 2.35201953968219e-06, "loss": 0.0202, "step": 173320 }, { "epoch": 0.723206015137986, "grad_norm": 1.340667830518563, "learning_rate": 2.3519856142520344e-06, "loss": 0.0295, "step": 173325 }, { "epoch": 0.7232268778529762, "grad_norm": 0.8137231037207532, "learning_rate": 2.351951690289854e-06, "loss": 0.0201, "step": 173330 }, { "epoch": 0.7232477405679666, "grad_norm": 0.40086344291689324, "learning_rate": 2.351917767795541e-06, "loss": 0.0188, "step": 173335 }, { "epoch": 0.7232686032829568, "grad_norm": 0.7874004461572778, "learning_rate": 2.3518838467689915e-06, "loss": 0.0228, "step": 173340 }, { "epoch": 0.7232894659979471, "grad_norm": 0.8063256419693431, "learning_rate": 2.3518499272100985e-06, "loss": 0.0256, "step": 173345 }, { "epoch": 0.7233103287129374, "grad_norm": 0.5601484542705014, "learning_rate": 2.3518160091187574e-06, "loss": 0.0212, "step": 173350 }, { "epoch": 0.7233311914279277, "grad_norm": 0.6765373124241908, "learning_rate": 2.3517820924948616e-06, "loss": 0.021, "step": 173355 }, { "epoch": 0.7233520541429179, "grad_norm": 0.5492121155054731, "learning_rate": 2.3517481773383046e-06, "loss": 0.0269, "step": 173360 }, { "epoch": 0.7233729168579082, "grad_norm": 1.0214209452083287, "learning_rate": 2.351714263648982e-06, "loss": 0.0183, "step": 173365 }, { "epoch": 0.7233937795728985, "grad_norm": 0.7751865103162083, "learning_rate": 2.3516803514267867e-06, "loss": 0.0207, "step": 173370 }, { "epoch": 0.7234146422878888, "grad_norm": 0.5060636258982678, "learning_rate": 2.351646440671614e-06, "loss": 0.0217, "step": 173375 }, { "epoch": 0.723435505002879, "grad_norm": 0.6974657105916265, "learning_rate": 2.351612531383358e-06, "loss": 0.0212, "step": 173380 }, { "epoch": 0.7234563677178694, "grad_norm": 0.3012653595565154, "learning_rate": 2.3515786235619125e-06, "loss": 0.0135, "step": 173385 }, { "epoch": 0.7234772304328596, "grad_norm": 1.0495425985954003, "learning_rate": 2.3515447172071713e-06, "loss": 0.0202, "step": 173390 }, { "epoch": 0.7234980931478499, "grad_norm": 0.4740117910791772, "learning_rate": 2.35151081231903e-06, "loss": 0.023, "step": 173395 }, { "epoch": 0.7235189558628402, "grad_norm": 0.7557897906900185, "learning_rate": 2.351476908897382e-06, "loss": 0.0188, "step": 173400 }, { "epoch": 0.7235398185778305, "grad_norm": 0.7784214313898528, "learning_rate": 2.3514430069421213e-06, "loss": 0.0206, "step": 173405 }, { "epoch": 0.7235606812928207, "grad_norm": 0.477237605916954, "learning_rate": 2.3514091064531426e-06, "loss": 0.0237, "step": 173410 }, { "epoch": 0.723581544007811, "grad_norm": 0.9415112635768846, "learning_rate": 2.3513752074303406e-06, "loss": 0.0243, "step": 173415 }, { "epoch": 0.7236024067228013, "grad_norm": 0.5271735017166213, "learning_rate": 2.351341309873609e-06, "loss": 0.0249, "step": 173420 }, { "epoch": 0.7236232694377915, "grad_norm": 0.8668438543509538, "learning_rate": 2.3513074137828426e-06, "loss": 0.0222, "step": 173425 }, { "epoch": 0.7236441321527818, "grad_norm": 0.518625466897872, "learning_rate": 2.3512735191579352e-06, "loss": 0.0183, "step": 173430 }, { "epoch": 0.7236649948677721, "grad_norm": 0.9795207976530531, "learning_rate": 2.351239625998781e-06, "loss": 0.0226, "step": 173435 }, { "epoch": 0.7236858575827624, "grad_norm": 0.6710432499152722, "learning_rate": 2.351205734305276e-06, "loss": 0.0126, "step": 173440 }, { "epoch": 0.7237067202977526, "grad_norm": 0.596658434221084, "learning_rate": 2.3511718440773125e-06, "loss": 0.0174, "step": 173445 }, { "epoch": 0.723727583012743, "grad_norm": 0.6717267970185489, "learning_rate": 2.351137955314786e-06, "loss": 0.0351, "step": 173450 }, { "epoch": 0.7237484457277332, "grad_norm": 0.436131706362486, "learning_rate": 2.351104068017591e-06, "loss": 0.0245, "step": 173455 }, { "epoch": 0.7237693084427235, "grad_norm": 1.205878731356674, "learning_rate": 2.351070182185621e-06, "loss": 0.0255, "step": 173460 }, { "epoch": 0.7237901711577138, "grad_norm": 0.31314015204745366, "learning_rate": 2.351036297818771e-06, "loss": 0.0267, "step": 173465 }, { "epoch": 0.7238110338727041, "grad_norm": 0.9147857911800159, "learning_rate": 2.3510024149169366e-06, "loss": 0.0314, "step": 173470 }, { "epoch": 0.7238318965876943, "grad_norm": 0.6857468768573941, "learning_rate": 2.3509685334800095e-06, "loss": 0.0258, "step": 173475 }, { "epoch": 0.7238527593026847, "grad_norm": 0.3292226803199906, "learning_rate": 2.350934653507886e-06, "loss": 0.0174, "step": 173480 }, { "epoch": 0.7238736220176749, "grad_norm": 0.6509075851422218, "learning_rate": 2.350900775000461e-06, "loss": 0.02, "step": 173485 }, { "epoch": 0.7238944847326652, "grad_norm": 0.9104211613881122, "learning_rate": 2.3508668979576274e-06, "loss": 0.0168, "step": 173490 }, { "epoch": 0.7239153474476554, "grad_norm": 1.3137567364903309, "learning_rate": 2.3508330223792803e-06, "loss": 0.0262, "step": 173495 }, { "epoch": 0.7239362101626458, "grad_norm": 0.6144529600696146, "learning_rate": 2.350799148265315e-06, "loss": 0.0168, "step": 173500 }, { "epoch": 0.723957072877636, "grad_norm": 0.6329514901850798, "learning_rate": 2.3507652756156256e-06, "loss": 0.0277, "step": 173505 }, { "epoch": 0.7239779355926262, "grad_norm": 0.5486313412658665, "learning_rate": 2.3507314044301057e-06, "loss": 0.0194, "step": 173510 }, { "epoch": 0.7239987983076166, "grad_norm": 0.5271474639598202, "learning_rate": 2.350697534708651e-06, "loss": 0.0331, "step": 173515 }, { "epoch": 0.7240196610226068, "grad_norm": 0.5422297041157373, "learning_rate": 2.350663666451155e-06, "loss": 0.0206, "step": 173520 }, { "epoch": 0.7240405237375971, "grad_norm": 0.5168362728978679, "learning_rate": 2.350629799657513e-06, "loss": 0.0198, "step": 173525 }, { "epoch": 0.7240613864525874, "grad_norm": 0.9603889550867981, "learning_rate": 2.3505959343276195e-06, "loss": 0.0249, "step": 173530 }, { "epoch": 0.7240822491675777, "grad_norm": 0.5534621183279199, "learning_rate": 2.350562070461369e-06, "loss": 0.025, "step": 173535 }, { "epoch": 0.7241031118825679, "grad_norm": 0.5921368450687737, "learning_rate": 2.3505282080586554e-06, "loss": 0.0189, "step": 173540 }, { "epoch": 0.7241239745975582, "grad_norm": 0.9255799503643736, "learning_rate": 2.3504943471193746e-06, "loss": 0.02, "step": 173545 }, { "epoch": 0.7241448373125485, "grad_norm": 0.9972260181705516, "learning_rate": 2.3504604876434195e-06, "loss": 0.0284, "step": 173550 }, { "epoch": 0.7241657000275388, "grad_norm": 0.2536120560265896, "learning_rate": 2.3504266296306862e-06, "loss": 0.0202, "step": 173555 }, { "epoch": 0.724186562742529, "grad_norm": 0.7407860797684428, "learning_rate": 2.3503927730810686e-06, "loss": 0.0213, "step": 173560 }, { "epoch": 0.7242074254575194, "grad_norm": 0.7452564346421294, "learning_rate": 2.3503589179944615e-06, "loss": 0.02, "step": 173565 }, { "epoch": 0.7242282881725096, "grad_norm": 0.5251613740214269, "learning_rate": 2.35032506437076e-06, "loss": 0.0175, "step": 173570 }, { "epoch": 0.7242491508874999, "grad_norm": 0.725581138378967, "learning_rate": 2.3502912122098576e-06, "loss": 0.0248, "step": 173575 }, { "epoch": 0.7242700136024902, "grad_norm": 0.4941521547714992, "learning_rate": 2.35025736151165e-06, "loss": 0.0318, "step": 173580 }, { "epoch": 0.7242908763174805, "grad_norm": 0.588110984217809, "learning_rate": 2.350223512276031e-06, "loss": 0.0222, "step": 173585 }, { "epoch": 0.7243117390324707, "grad_norm": 0.21522159119138037, "learning_rate": 2.3501896645028968e-06, "loss": 0.0209, "step": 173590 }, { "epoch": 0.7243326017474611, "grad_norm": 0.4846847109909058, "learning_rate": 2.35015581819214e-06, "loss": 0.0187, "step": 173595 }, { "epoch": 0.7243534644624513, "grad_norm": 0.9263882140954343, "learning_rate": 2.3501219733436567e-06, "loss": 0.0235, "step": 173600 }, { "epoch": 0.7243743271774415, "grad_norm": 0.8574718390574811, "learning_rate": 2.350088129957341e-06, "loss": 0.0215, "step": 173605 }, { "epoch": 0.7243951898924318, "grad_norm": 0.6791993147335662, "learning_rate": 2.350054288033089e-06, "loss": 0.0145, "step": 173610 }, { "epoch": 0.7244160526074221, "grad_norm": 0.3391885208571393, "learning_rate": 2.3500204475707927e-06, "loss": 0.0185, "step": 173615 }, { "epoch": 0.7244369153224124, "grad_norm": 0.5494290306662932, "learning_rate": 2.3499866085703493e-06, "loss": 0.0264, "step": 173620 }, { "epoch": 0.7244577780374026, "grad_norm": 0.5700716490306708, "learning_rate": 2.3499527710316535e-06, "loss": 0.0253, "step": 173625 }, { "epoch": 0.724478640752393, "grad_norm": 0.5644324758801701, "learning_rate": 2.3499189349545977e-06, "loss": 0.028, "step": 173630 }, { "epoch": 0.7244995034673832, "grad_norm": 0.7411271411573351, "learning_rate": 2.3498851003390795e-06, "loss": 0.0259, "step": 173635 }, { "epoch": 0.7245203661823735, "grad_norm": 0.608232522881978, "learning_rate": 2.3498512671849917e-06, "loss": 0.0265, "step": 173640 }, { "epoch": 0.7245412288973638, "grad_norm": 0.6330424849669712, "learning_rate": 2.3498174354922305e-06, "loss": 0.0217, "step": 173645 }, { "epoch": 0.7245620916123541, "grad_norm": 0.8441754107633054, "learning_rate": 2.3497836052606897e-06, "loss": 0.0234, "step": 173650 }, { "epoch": 0.7245829543273443, "grad_norm": 0.7303472708174252, "learning_rate": 2.3497497764902642e-06, "loss": 0.0235, "step": 173655 }, { "epoch": 0.7246038170423347, "grad_norm": 0.43661537089800284, "learning_rate": 2.3497159491808494e-06, "loss": 0.0211, "step": 173660 }, { "epoch": 0.7246246797573249, "grad_norm": 0.530684753091249, "learning_rate": 2.34968212333234e-06, "loss": 0.0209, "step": 173665 }, { "epoch": 0.7246455424723152, "grad_norm": 0.748850578222221, "learning_rate": 2.34964829894463e-06, "loss": 0.0221, "step": 173670 }, { "epoch": 0.7246664051873054, "grad_norm": 1.744930537077669, "learning_rate": 2.349614476017616e-06, "loss": 0.03, "step": 173675 }, { "epoch": 0.7246872679022958, "grad_norm": 0.7944721082751319, "learning_rate": 2.3495806545511913e-06, "loss": 0.0309, "step": 173680 }, { "epoch": 0.724708130617286, "grad_norm": 1.2817806607185191, "learning_rate": 2.349546834545251e-06, "loss": 0.0273, "step": 173685 }, { "epoch": 0.7247289933322763, "grad_norm": 0.7391008466823973, "learning_rate": 2.349513015999691e-06, "loss": 0.0233, "step": 173690 }, { "epoch": 0.7247498560472666, "grad_norm": 0.5784759199452143, "learning_rate": 2.349479198914405e-06, "loss": 0.0165, "step": 173695 }, { "epoch": 0.7247707187622568, "grad_norm": 0.554921129149903, "learning_rate": 2.3494453832892883e-06, "loss": 0.0307, "step": 173700 }, { "epoch": 0.7247915814772471, "grad_norm": 0.8812904868106864, "learning_rate": 2.349411569124236e-06, "loss": 0.0222, "step": 173705 }, { "epoch": 0.7248124441922374, "grad_norm": 0.3417884517573101, "learning_rate": 2.3493777564191433e-06, "loss": 0.014, "step": 173710 }, { "epoch": 0.7248333069072277, "grad_norm": 0.9145679026188701, "learning_rate": 2.349343945173904e-06, "loss": 0.0263, "step": 173715 }, { "epoch": 0.7248541696222179, "grad_norm": 1.044200127255554, "learning_rate": 2.3493101353884143e-06, "loss": 0.0301, "step": 173720 }, { "epoch": 0.7248750323372082, "grad_norm": 0.4702175745147335, "learning_rate": 2.34927632706257e-06, "loss": 0.0173, "step": 173725 }, { "epoch": 0.7248958950521985, "grad_norm": 0.3253549612226853, "learning_rate": 2.3492425201962626e-06, "loss": 0.0165, "step": 173730 }, { "epoch": 0.7249167577671888, "grad_norm": 0.44720867571718687, "learning_rate": 2.3492087147893906e-06, "loss": 0.0204, "step": 173735 }, { "epoch": 0.724937620482179, "grad_norm": 0.2852754771277832, "learning_rate": 2.3491749108418467e-06, "loss": 0.0178, "step": 173740 }, { "epoch": 0.7249584831971694, "grad_norm": 0.8066029064513301, "learning_rate": 2.3491411083535284e-06, "loss": 0.0151, "step": 173745 }, { "epoch": 0.7249793459121596, "grad_norm": 0.8918872785322588, "learning_rate": 2.349107307324328e-06, "loss": 0.021, "step": 173750 }, { "epoch": 0.7250002086271499, "grad_norm": 0.3870306617175204, "learning_rate": 2.349073507754142e-06, "loss": 0.0207, "step": 173755 }, { "epoch": 0.7250210713421402, "grad_norm": 0.795290964452808, "learning_rate": 2.3490397096428656e-06, "loss": 0.0277, "step": 173760 }, { "epoch": 0.7250419340571305, "grad_norm": 0.7116760850959486, "learning_rate": 2.349005912990393e-06, "loss": 0.022, "step": 173765 }, { "epoch": 0.7250627967721207, "grad_norm": 0.47439607732184985, "learning_rate": 2.34897211779662e-06, "loss": 0.0223, "step": 173770 }, { "epoch": 0.7250836594871111, "grad_norm": 0.4515192271797248, "learning_rate": 2.3489383240614414e-06, "loss": 0.0194, "step": 173775 }, { "epoch": 0.7251045222021013, "grad_norm": 0.5357600569334786, "learning_rate": 2.3489045317847516e-06, "loss": 0.023, "step": 173780 }, { "epoch": 0.7251253849170916, "grad_norm": 0.8615001201844578, "learning_rate": 2.348870740966447e-06, "loss": 0.02, "step": 173785 }, { "epoch": 0.7251462476320818, "grad_norm": 3.131471054083521, "learning_rate": 2.3488369516064216e-06, "loss": 0.0325, "step": 173790 }, { "epoch": 0.7251671103470722, "grad_norm": 0.36016252627269346, "learning_rate": 2.3488031637045713e-06, "loss": 0.0183, "step": 173795 }, { "epoch": 0.7251879730620624, "grad_norm": 0.7329016293726587, "learning_rate": 2.3487693772607905e-06, "loss": 0.0188, "step": 173800 }, { "epoch": 0.7252088357770526, "grad_norm": 0.7422787198036992, "learning_rate": 2.348735592274975e-06, "loss": 0.0344, "step": 173805 }, { "epoch": 0.725229698492043, "grad_norm": 0.678811551989116, "learning_rate": 2.34870180874702e-06, "loss": 0.0198, "step": 173810 }, { "epoch": 0.7252505612070332, "grad_norm": 0.6417642114588681, "learning_rate": 2.3486680266768195e-06, "loss": 0.0174, "step": 173815 }, { "epoch": 0.7252714239220235, "grad_norm": 1.1248647998045829, "learning_rate": 2.34863424606427e-06, "loss": 0.0272, "step": 173820 }, { "epoch": 0.7252922866370138, "grad_norm": 0.5098171567974382, "learning_rate": 2.348600466909266e-06, "loss": 0.0225, "step": 173825 }, { "epoch": 0.7253131493520041, "grad_norm": 0.4369167682379187, "learning_rate": 2.3485666892117028e-06, "loss": 0.0157, "step": 173830 }, { "epoch": 0.7253340120669943, "grad_norm": 0.7034767032477957, "learning_rate": 2.3485329129714756e-06, "loss": 0.0272, "step": 173835 }, { "epoch": 0.7253548747819847, "grad_norm": 0.9954350797871121, "learning_rate": 2.3484991381884797e-06, "loss": 0.0212, "step": 173840 }, { "epoch": 0.7253757374969749, "grad_norm": 0.7299670845330589, "learning_rate": 2.3484653648626105e-06, "loss": 0.0258, "step": 173845 }, { "epoch": 0.7253966002119652, "grad_norm": 0.9329459894095375, "learning_rate": 2.3484315929937633e-06, "loss": 0.027, "step": 173850 }, { "epoch": 0.7254174629269554, "grad_norm": 0.9632830597224739, "learning_rate": 2.348397822581832e-06, "loss": 0.0205, "step": 173855 }, { "epoch": 0.7254383256419458, "grad_norm": 0.4121927779027359, "learning_rate": 2.3483640536267135e-06, "loss": 0.0155, "step": 173860 }, { "epoch": 0.725459188356936, "grad_norm": 0.7282059871817611, "learning_rate": 2.348330286128303e-06, "loss": 0.0225, "step": 173865 }, { "epoch": 0.7254800510719263, "grad_norm": 0.5136852091940938, "learning_rate": 2.3482965200864945e-06, "loss": 0.022, "step": 173870 }, { "epoch": 0.7255009137869166, "grad_norm": 0.6934270236715843, "learning_rate": 2.348262755501184e-06, "loss": 0.0231, "step": 173875 }, { "epoch": 0.7255217765019069, "grad_norm": 0.6057077567544598, "learning_rate": 2.348228992372267e-06, "loss": 0.0247, "step": 173880 }, { "epoch": 0.7255426392168971, "grad_norm": 0.3407055039239141, "learning_rate": 2.3481952306996383e-06, "loss": 0.0229, "step": 173885 }, { "epoch": 0.7255635019318875, "grad_norm": 0.6170589523677774, "learning_rate": 2.3481614704831938e-06, "loss": 0.0211, "step": 173890 }, { "epoch": 0.7255843646468777, "grad_norm": 0.565441980762768, "learning_rate": 2.3481277117228285e-06, "loss": 0.0209, "step": 173895 }, { "epoch": 0.7256052273618679, "grad_norm": 1.069962658048887, "learning_rate": 2.348093954418438e-06, "loss": 0.0241, "step": 173900 }, { "epoch": 0.7256260900768582, "grad_norm": 2.070657829480948, "learning_rate": 2.348060198569917e-06, "loss": 0.0239, "step": 173905 }, { "epoch": 0.7256469527918485, "grad_norm": 1.4997488344996102, "learning_rate": 2.348026444177161e-06, "loss": 0.0298, "step": 173910 }, { "epoch": 0.7256678155068388, "grad_norm": 0.4490884512478053, "learning_rate": 2.347992691240067e-06, "loss": 0.0172, "step": 173915 }, { "epoch": 0.725688678221829, "grad_norm": 0.7164595220508194, "learning_rate": 2.3479589397585278e-06, "loss": 0.0152, "step": 173920 }, { "epoch": 0.7257095409368194, "grad_norm": 1.0868792866228847, "learning_rate": 2.34792518973244e-06, "loss": 0.0258, "step": 173925 }, { "epoch": 0.7257304036518096, "grad_norm": 0.74185393329318, "learning_rate": 2.3478914411617e-06, "loss": 0.0218, "step": 173930 }, { "epoch": 0.7257512663667999, "grad_norm": 0.4216710095649575, "learning_rate": 2.3478576940462015e-06, "loss": 0.0372, "step": 173935 }, { "epoch": 0.7257721290817902, "grad_norm": 0.4196658515460151, "learning_rate": 2.3478239483858403e-06, "loss": 0.0151, "step": 173940 }, { "epoch": 0.7257929917967805, "grad_norm": 0.25733614413135364, "learning_rate": 2.3477902041805127e-06, "loss": 0.0222, "step": 173945 }, { "epoch": 0.7258138545117707, "grad_norm": 0.5201194563225255, "learning_rate": 2.3477564614301137e-06, "loss": 0.0223, "step": 173950 }, { "epoch": 0.7258347172267611, "grad_norm": 0.262046921127117, "learning_rate": 2.3477227201345383e-06, "loss": 0.0195, "step": 173955 }, { "epoch": 0.7258555799417513, "grad_norm": 0.7688240986499796, "learning_rate": 2.3476889802936822e-06, "loss": 0.0228, "step": 173960 }, { "epoch": 0.7258764426567416, "grad_norm": 0.7697483641148841, "learning_rate": 2.347655241907442e-06, "loss": 0.0222, "step": 173965 }, { "epoch": 0.7258973053717318, "grad_norm": 0.4881896947734754, "learning_rate": 2.347621504975711e-06, "loss": 0.021, "step": 173970 }, { "epoch": 0.7259181680867222, "grad_norm": 0.6913705607483903, "learning_rate": 2.3475877694983863e-06, "loss": 0.0332, "step": 173975 }, { "epoch": 0.7259390308017124, "grad_norm": 0.3872635617842861, "learning_rate": 2.347554035475363e-06, "loss": 0.0141, "step": 173980 }, { "epoch": 0.7259598935167026, "grad_norm": 0.6079743497489377, "learning_rate": 2.3475203029065367e-06, "loss": 0.021, "step": 173985 }, { "epoch": 0.725980756231693, "grad_norm": 0.5949527125750956, "learning_rate": 2.3474865717918022e-06, "loss": 0.0212, "step": 173990 }, { "epoch": 0.7260016189466832, "grad_norm": 0.7532374150003504, "learning_rate": 2.3474528421310565e-06, "loss": 0.0305, "step": 173995 }, { "epoch": 0.7260224816616735, "grad_norm": 0.5671871679635799, "learning_rate": 2.3474191139241938e-06, "loss": 0.0233, "step": 174000 }, { "epoch": 0.7260433443766638, "grad_norm": 1.065234581311321, "learning_rate": 2.34738538717111e-06, "loss": 0.0336, "step": 174005 }, { "epoch": 0.7260642070916541, "grad_norm": 0.6189349250127936, "learning_rate": 2.347351661871701e-06, "loss": 0.0174, "step": 174010 }, { "epoch": 0.7260850698066443, "grad_norm": 0.6887095083041059, "learning_rate": 2.3473179380258625e-06, "loss": 0.028, "step": 174015 }, { "epoch": 0.7261059325216347, "grad_norm": 1.7043946698065033, "learning_rate": 2.347284215633489e-06, "loss": 0.0188, "step": 174020 }, { "epoch": 0.7261267952366249, "grad_norm": 0.6748514512348741, "learning_rate": 2.347250494694478e-06, "loss": 0.0237, "step": 174025 }, { "epoch": 0.7261476579516152, "grad_norm": 1.8727142175391165, "learning_rate": 2.347216775208723e-06, "loss": 0.0253, "step": 174030 }, { "epoch": 0.7261685206666054, "grad_norm": 1.0234158064835874, "learning_rate": 2.347183057176121e-06, "loss": 0.0215, "step": 174035 }, { "epoch": 0.7261893833815958, "grad_norm": 1.2305132077155119, "learning_rate": 2.3471493405965676e-06, "loss": 0.0208, "step": 174040 }, { "epoch": 0.726210246096586, "grad_norm": 0.4820921524841988, "learning_rate": 2.3471156254699578e-06, "loss": 0.022, "step": 174045 }, { "epoch": 0.7262311088115763, "grad_norm": 0.9077316581871274, "learning_rate": 2.347081911796187e-06, "loss": 0.0209, "step": 174050 }, { "epoch": 0.7262519715265666, "grad_norm": 0.8311834554877084, "learning_rate": 2.3470481995751516e-06, "loss": 0.0227, "step": 174055 }, { "epoch": 0.7262728342415569, "grad_norm": 0.701304214538293, "learning_rate": 2.3470144888067474e-06, "loss": 0.027, "step": 174060 }, { "epoch": 0.7262936969565471, "grad_norm": 0.8799623095221125, "learning_rate": 2.3469807794908696e-06, "loss": 0.024, "step": 174065 }, { "epoch": 0.7263145596715375, "grad_norm": 0.30131148374205174, "learning_rate": 2.3469470716274135e-06, "loss": 0.0223, "step": 174070 }, { "epoch": 0.7263354223865277, "grad_norm": 0.7242516858356614, "learning_rate": 2.3469133652162763e-06, "loss": 0.0226, "step": 174075 }, { "epoch": 0.726356285101518, "grad_norm": 0.638395879838877, "learning_rate": 2.3468796602573517e-06, "loss": 0.0153, "step": 174080 }, { "epoch": 0.7263771478165082, "grad_norm": 1.0900896308894263, "learning_rate": 2.3468459567505377e-06, "loss": 0.0227, "step": 174085 }, { "epoch": 0.7263980105314986, "grad_norm": 0.5160979002507602, "learning_rate": 2.3468122546957274e-06, "loss": 0.0169, "step": 174090 }, { "epoch": 0.7264188732464888, "grad_norm": 0.7296946693258531, "learning_rate": 2.3467785540928188e-06, "loss": 0.0222, "step": 174095 }, { "epoch": 0.726439735961479, "grad_norm": 0.8699542473840951, "learning_rate": 2.346744854941707e-06, "loss": 0.0245, "step": 174100 }, { "epoch": 0.7264605986764694, "grad_norm": 0.737479154042638, "learning_rate": 2.346711157242287e-06, "loss": 0.0229, "step": 174105 }, { "epoch": 0.7264814613914596, "grad_norm": 0.79612379346866, "learning_rate": 2.3466774609944555e-06, "loss": 0.0182, "step": 174110 }, { "epoch": 0.7265023241064499, "grad_norm": 0.8263508544149515, "learning_rate": 2.3466437661981077e-06, "loss": 0.0258, "step": 174115 }, { "epoch": 0.7265231868214402, "grad_norm": 1.1131873473753788, "learning_rate": 2.34661007285314e-06, "loss": 0.0312, "step": 174120 }, { "epoch": 0.7265440495364305, "grad_norm": 0.48906039965967435, "learning_rate": 2.3465763809594473e-06, "loss": 0.0179, "step": 174125 }, { "epoch": 0.7265649122514207, "grad_norm": 1.3911561733456037, "learning_rate": 2.3465426905169264e-06, "loss": 0.021, "step": 174130 }, { "epoch": 0.7265857749664111, "grad_norm": 0.4337993333790456, "learning_rate": 2.3465090015254724e-06, "loss": 0.0162, "step": 174135 }, { "epoch": 0.7266066376814013, "grad_norm": 0.6444724889988431, "learning_rate": 2.346475313984981e-06, "loss": 0.0237, "step": 174140 }, { "epoch": 0.7266275003963916, "grad_norm": 0.6056398052416232, "learning_rate": 2.3464416278953494e-06, "loss": 0.0192, "step": 174145 }, { "epoch": 0.7266483631113818, "grad_norm": 0.9212704227696559, "learning_rate": 2.3464079432564716e-06, "loss": 0.0185, "step": 174150 }, { "epoch": 0.7266692258263722, "grad_norm": 1.018779529259311, "learning_rate": 2.3463742600682458e-06, "loss": 0.0293, "step": 174155 }, { "epoch": 0.7266900885413624, "grad_norm": 0.3785760273704872, "learning_rate": 2.3463405783305655e-06, "loss": 0.0205, "step": 174160 }, { "epoch": 0.7267109512563527, "grad_norm": 0.5336661476546718, "learning_rate": 2.346306898043327e-06, "loss": 0.0124, "step": 174165 }, { "epoch": 0.726731813971343, "grad_norm": 0.9159146926913709, "learning_rate": 2.3462732192064277e-06, "loss": 0.026, "step": 174170 }, { "epoch": 0.7267526766863333, "grad_norm": 0.5773620951300428, "learning_rate": 2.3462395418197626e-06, "loss": 0.0216, "step": 174175 }, { "epoch": 0.7267735394013235, "grad_norm": 0.35720021236943095, "learning_rate": 2.346205865883227e-06, "loss": 0.0198, "step": 174180 }, { "epoch": 0.7267944021163139, "grad_norm": 0.7277853551761677, "learning_rate": 2.346172191396718e-06, "loss": 0.0197, "step": 174185 }, { "epoch": 0.7268152648313041, "grad_norm": 0.922783298860549, "learning_rate": 2.346138518360131e-06, "loss": 0.024, "step": 174190 }, { "epoch": 0.7268361275462943, "grad_norm": 0.8380543526843011, "learning_rate": 2.3461048467733617e-06, "loss": 0.0226, "step": 174195 }, { "epoch": 0.7268569902612847, "grad_norm": 0.4970527643235849, "learning_rate": 2.3460711766363063e-06, "loss": 0.0227, "step": 174200 }, { "epoch": 0.7268778529762749, "grad_norm": 0.580628487490378, "learning_rate": 2.3460375079488606e-06, "loss": 0.0226, "step": 174205 }, { "epoch": 0.7268987156912652, "grad_norm": 0.6599581298547544, "learning_rate": 2.3460038407109207e-06, "loss": 0.0504, "step": 174210 }, { "epoch": 0.7269195784062554, "grad_norm": 0.4295877813853966, "learning_rate": 2.3459701749223834e-06, "loss": 0.0213, "step": 174215 }, { "epoch": 0.7269404411212458, "grad_norm": 0.8849611270395878, "learning_rate": 2.345936510583143e-06, "loss": 0.0287, "step": 174220 }, { "epoch": 0.726961303836236, "grad_norm": 0.8999412609181903, "learning_rate": 2.345902847693098e-06, "loss": 0.0189, "step": 174225 }, { "epoch": 0.7269821665512263, "grad_norm": 0.4627997259812277, "learning_rate": 2.3458691862521415e-06, "loss": 0.0235, "step": 174230 }, { "epoch": 0.7270030292662166, "grad_norm": 0.5391830344416176, "learning_rate": 2.3458355262601716e-06, "loss": 0.0164, "step": 174235 }, { "epoch": 0.7270238919812069, "grad_norm": 0.5733603854185564, "learning_rate": 2.3458018677170834e-06, "loss": 0.0209, "step": 174240 }, { "epoch": 0.7270447546961971, "grad_norm": 0.8349086813844503, "learning_rate": 2.3457682106227735e-06, "loss": 0.0228, "step": 174245 }, { "epoch": 0.7270656174111875, "grad_norm": 1.226473919039007, "learning_rate": 2.3457345549771372e-06, "loss": 0.024, "step": 174250 }, { "epoch": 0.7270864801261777, "grad_norm": 0.8571075261847279, "learning_rate": 2.3457009007800717e-06, "loss": 0.021, "step": 174255 }, { "epoch": 0.727107342841168, "grad_norm": 0.3249487727614033, "learning_rate": 2.345667248031472e-06, "loss": 0.0112, "step": 174260 }, { "epoch": 0.7271282055561582, "grad_norm": 0.40517081994261545, "learning_rate": 2.345633596731235e-06, "loss": 0.0138, "step": 174265 }, { "epoch": 0.7271490682711486, "grad_norm": 0.8938306777745049, "learning_rate": 2.3455999468792563e-06, "loss": 0.0283, "step": 174270 }, { "epoch": 0.7271699309861388, "grad_norm": 0.9686845225844773, "learning_rate": 2.3455662984754328e-06, "loss": 0.0348, "step": 174275 }, { "epoch": 0.727190793701129, "grad_norm": 0.6883892908937485, "learning_rate": 2.3455326515196596e-06, "loss": 0.0216, "step": 174280 }, { "epoch": 0.7272116564161194, "grad_norm": 0.8322633259458971, "learning_rate": 2.345499006011833e-06, "loss": 0.0291, "step": 174285 }, { "epoch": 0.7272325191311096, "grad_norm": 0.6543504160338497, "learning_rate": 2.3454653619518506e-06, "loss": 0.0265, "step": 174290 }, { "epoch": 0.7272533818460999, "grad_norm": 0.9150204831407246, "learning_rate": 2.345431719339606e-06, "loss": 0.0245, "step": 174295 }, { "epoch": 0.7272742445610902, "grad_norm": 0.8167360964272016, "learning_rate": 2.345398078174998e-06, "loss": 0.0275, "step": 174300 }, { "epoch": 0.7272951072760805, "grad_norm": 0.22974236603065254, "learning_rate": 2.345364438457921e-06, "loss": 0.0202, "step": 174305 }, { "epoch": 0.7273159699910707, "grad_norm": 0.527415649452176, "learning_rate": 2.3453308001882714e-06, "loss": 0.0212, "step": 174310 }, { "epoch": 0.7273368327060611, "grad_norm": 0.44380242430013395, "learning_rate": 2.3452971633659464e-06, "loss": 0.0185, "step": 174315 }, { "epoch": 0.7273576954210513, "grad_norm": 0.4354399993458499, "learning_rate": 2.345263527990841e-06, "loss": 0.0155, "step": 174320 }, { "epoch": 0.7273785581360416, "grad_norm": 0.6318480512813651, "learning_rate": 2.3452298940628526e-06, "loss": 0.0185, "step": 174325 }, { "epoch": 0.7273994208510318, "grad_norm": 3.380120461114753, "learning_rate": 2.3451962615818764e-06, "loss": 0.0268, "step": 174330 }, { "epoch": 0.7274202835660222, "grad_norm": 1.3361128715266135, "learning_rate": 2.3451626305478094e-06, "loss": 0.0253, "step": 174335 }, { "epoch": 0.7274411462810124, "grad_norm": 0.8529027884434992, "learning_rate": 2.3451290009605476e-06, "loss": 0.023, "step": 174340 }, { "epoch": 0.7274620089960027, "grad_norm": 0.6883593525838538, "learning_rate": 2.345095372819987e-06, "loss": 0.0224, "step": 174345 }, { "epoch": 0.727482871710993, "grad_norm": 0.9741275608170209, "learning_rate": 2.3450617461260243e-06, "loss": 0.0178, "step": 174350 }, { "epoch": 0.7275037344259833, "grad_norm": 0.5880021194222492, "learning_rate": 2.3450281208785556e-06, "loss": 0.0212, "step": 174355 }, { "epoch": 0.7275245971409735, "grad_norm": 0.5853773287182714, "learning_rate": 2.3449944970774775e-06, "loss": 0.0195, "step": 174360 }, { "epoch": 0.7275454598559639, "grad_norm": 0.47419767010652775, "learning_rate": 2.3449608747226853e-06, "loss": 0.0172, "step": 174365 }, { "epoch": 0.7275663225709541, "grad_norm": 0.30553002202365415, "learning_rate": 2.3449272538140756e-06, "loss": 0.0203, "step": 174370 }, { "epoch": 0.7275871852859443, "grad_norm": 0.5472727688083788, "learning_rate": 2.344893634351546e-06, "loss": 0.0194, "step": 174375 }, { "epoch": 0.7276080480009347, "grad_norm": 1.0565196405655504, "learning_rate": 2.3448600163349926e-06, "loss": 0.0218, "step": 174380 }, { "epoch": 0.727628910715925, "grad_norm": 0.7313530192323815, "learning_rate": 2.3448263997643097e-06, "loss": 0.0197, "step": 174385 }, { "epoch": 0.7276497734309152, "grad_norm": 0.7133443287351234, "learning_rate": 2.344792784639396e-06, "loss": 0.0197, "step": 174390 }, { "epoch": 0.7276706361459054, "grad_norm": 1.0869052578674836, "learning_rate": 2.3447591709601465e-06, "loss": 0.0226, "step": 174395 }, { "epoch": 0.7276914988608958, "grad_norm": 0.18337643525644282, "learning_rate": 2.344725558726458e-06, "loss": 0.0215, "step": 174400 }, { "epoch": 0.727712361575886, "grad_norm": 0.680887604363383, "learning_rate": 2.3446919479382278e-06, "loss": 0.0146, "step": 174405 }, { "epoch": 0.7277332242908763, "grad_norm": 0.9289607173490301, "learning_rate": 2.3446583385953507e-06, "loss": 0.0293, "step": 174410 }, { "epoch": 0.7277540870058666, "grad_norm": 0.7104621090706911, "learning_rate": 2.344624730697724e-06, "loss": 0.0274, "step": 174415 }, { "epoch": 0.7277749497208569, "grad_norm": 0.8161931949725542, "learning_rate": 2.344591124245244e-06, "loss": 0.0237, "step": 174420 }, { "epoch": 0.7277958124358471, "grad_norm": 0.42724022453996274, "learning_rate": 2.344557519237807e-06, "loss": 0.0172, "step": 174425 }, { "epoch": 0.7278166751508375, "grad_norm": 0.7129817934055008, "learning_rate": 2.34452391567531e-06, "loss": 0.0221, "step": 174430 }, { "epoch": 0.7278375378658277, "grad_norm": 0.39707397774694037, "learning_rate": 2.3444903135576485e-06, "loss": 0.0159, "step": 174435 }, { "epoch": 0.727858400580818, "grad_norm": 0.5734030681982895, "learning_rate": 2.3444567128847194e-06, "loss": 0.017, "step": 174440 }, { "epoch": 0.7278792632958082, "grad_norm": 0.5654904020351978, "learning_rate": 2.34442311365642e-06, "loss": 0.0216, "step": 174445 }, { "epoch": 0.7279001260107986, "grad_norm": 0.5804108241207085, "learning_rate": 2.3443895158726452e-06, "loss": 0.0229, "step": 174450 }, { "epoch": 0.7279209887257888, "grad_norm": 0.5132333345056119, "learning_rate": 2.3443559195332926e-06, "loss": 0.0209, "step": 174455 }, { "epoch": 0.727941851440779, "grad_norm": 0.8776543882335801, "learning_rate": 2.3443223246382586e-06, "loss": 0.0196, "step": 174460 }, { "epoch": 0.7279627141557694, "grad_norm": 0.633695458423119, "learning_rate": 2.3442887311874395e-06, "loss": 0.0213, "step": 174465 }, { "epoch": 0.7279835768707597, "grad_norm": 0.20532081171725447, "learning_rate": 2.3442551391807316e-06, "loss": 0.0132, "step": 174470 }, { "epoch": 0.7280044395857499, "grad_norm": 0.6821832628769471, "learning_rate": 2.344221548618032e-06, "loss": 0.0265, "step": 174475 }, { "epoch": 0.7280253023007403, "grad_norm": 0.36196519901019214, "learning_rate": 2.3441879594992374e-06, "loss": 0.0246, "step": 174480 }, { "epoch": 0.7280461650157305, "grad_norm": 0.45902808416611474, "learning_rate": 2.3441543718242436e-06, "loss": 0.0152, "step": 174485 }, { "epoch": 0.7280670277307207, "grad_norm": 0.7640988798210788, "learning_rate": 2.344120785592947e-06, "loss": 0.0196, "step": 174490 }, { "epoch": 0.7280878904457111, "grad_norm": 0.8400209885938931, "learning_rate": 2.3440872008052453e-06, "loss": 0.0182, "step": 174495 }, { "epoch": 0.7281087531607013, "grad_norm": 0.7872856247849968, "learning_rate": 2.344053617461034e-06, "loss": 0.0242, "step": 174500 }, { "epoch": 0.7281296158756916, "grad_norm": 0.7741216398227337, "learning_rate": 2.34402003556021e-06, "loss": 0.0245, "step": 174505 }, { "epoch": 0.7281504785906818, "grad_norm": 0.41507389929870403, "learning_rate": 2.3439864551026706e-06, "loss": 0.0345, "step": 174510 }, { "epoch": 0.7281713413056722, "grad_norm": 0.4300209193600968, "learning_rate": 2.3439528760883117e-06, "loss": 0.0184, "step": 174515 }, { "epoch": 0.7281922040206624, "grad_norm": 0.657535599131725, "learning_rate": 2.3439192985170302e-06, "loss": 0.0196, "step": 174520 }, { "epoch": 0.7282130667356527, "grad_norm": 0.5730647787694998, "learning_rate": 2.3438857223887223e-06, "loss": 0.0235, "step": 174525 }, { "epoch": 0.728233929450643, "grad_norm": 0.5238646016486367, "learning_rate": 2.343852147703286e-06, "loss": 0.0255, "step": 174530 }, { "epoch": 0.7282547921656333, "grad_norm": 0.621721799153751, "learning_rate": 2.343818574460616e-06, "loss": 0.0196, "step": 174535 }, { "epoch": 0.7282756548806235, "grad_norm": 0.613336976841375, "learning_rate": 2.3437850026606102e-06, "loss": 0.0223, "step": 174540 }, { "epoch": 0.7282965175956139, "grad_norm": 0.6393549533955103, "learning_rate": 2.343751432303165e-06, "loss": 0.0225, "step": 174545 }, { "epoch": 0.7283173803106041, "grad_norm": 0.626618470440212, "learning_rate": 2.3437178633881776e-06, "loss": 0.0198, "step": 174550 }, { "epoch": 0.7283382430255944, "grad_norm": 0.7152242619245549, "learning_rate": 2.3436842959155435e-06, "loss": 0.0189, "step": 174555 }, { "epoch": 0.7283591057405847, "grad_norm": 0.7889338974042824, "learning_rate": 2.343650729885161e-06, "loss": 0.0206, "step": 174560 }, { "epoch": 0.728379968455575, "grad_norm": 0.7518782673078719, "learning_rate": 2.343617165296925e-06, "loss": 0.0237, "step": 174565 }, { "epoch": 0.7284008311705652, "grad_norm": 0.8340240845621505, "learning_rate": 2.3435836021507337e-06, "loss": 0.0203, "step": 174570 }, { "epoch": 0.7284216938855554, "grad_norm": 1.2002477694904776, "learning_rate": 2.3435500404464833e-06, "loss": 0.0283, "step": 174575 }, { "epoch": 0.7284425566005458, "grad_norm": 0.7012618097809475, "learning_rate": 2.3435164801840706e-06, "loss": 0.0289, "step": 174580 }, { "epoch": 0.728463419315536, "grad_norm": 0.3928286067670304, "learning_rate": 2.3434829213633927e-06, "loss": 0.0199, "step": 174585 }, { "epoch": 0.7284842820305263, "grad_norm": 0.8603485623976432, "learning_rate": 2.3434493639843454e-06, "loss": 0.0227, "step": 174590 }, { "epoch": 0.7285051447455166, "grad_norm": 0.7894559689173415, "learning_rate": 2.343415808046826e-06, "loss": 0.0213, "step": 174595 }, { "epoch": 0.7285260074605069, "grad_norm": 0.9847639100607604, "learning_rate": 2.3433822535507325e-06, "loss": 0.0282, "step": 174600 }, { "epoch": 0.7285468701754971, "grad_norm": 0.6255750022049322, "learning_rate": 2.3433487004959595e-06, "loss": 0.0175, "step": 174605 }, { "epoch": 0.7285677328904875, "grad_norm": 0.19952789875062837, "learning_rate": 2.343315148882406e-06, "loss": 0.0231, "step": 174610 }, { "epoch": 0.7285885956054777, "grad_norm": 0.4646454314544577, "learning_rate": 2.343281598709967e-06, "loss": 0.0173, "step": 174615 }, { "epoch": 0.728609458320468, "grad_norm": 0.7035210613299203, "learning_rate": 2.3432480499785403e-06, "loss": 0.0274, "step": 174620 }, { "epoch": 0.7286303210354582, "grad_norm": 0.619532389793662, "learning_rate": 2.3432145026880226e-06, "loss": 0.0209, "step": 174625 }, { "epoch": 0.7286511837504486, "grad_norm": 0.8687053780925655, "learning_rate": 2.343180956838311e-06, "loss": 0.0365, "step": 174630 }, { "epoch": 0.7286720464654388, "grad_norm": 0.828451285792177, "learning_rate": 2.343147412429302e-06, "loss": 0.0174, "step": 174635 }, { "epoch": 0.7286929091804291, "grad_norm": 0.27916840253838887, "learning_rate": 2.343113869460892e-06, "loss": 0.0196, "step": 174640 }, { "epoch": 0.7287137718954194, "grad_norm": 0.7412174161887045, "learning_rate": 2.343080327932979e-06, "loss": 0.0203, "step": 174645 }, { "epoch": 0.7287346346104097, "grad_norm": 0.8371866714553438, "learning_rate": 2.343046787845459e-06, "loss": 0.031, "step": 174650 }, { "epoch": 0.7287554973253999, "grad_norm": 0.6238341905239537, "learning_rate": 2.34301324919823e-06, "loss": 0.0208, "step": 174655 }, { "epoch": 0.7287763600403903, "grad_norm": 0.5336972698852644, "learning_rate": 2.3429797119911874e-06, "loss": 0.0192, "step": 174660 }, { "epoch": 0.7287972227553805, "grad_norm": 0.4291574777307166, "learning_rate": 2.3429461762242294e-06, "loss": 0.0174, "step": 174665 }, { "epoch": 0.7288180854703707, "grad_norm": 0.22458943289760003, "learning_rate": 2.342912641897252e-06, "loss": 0.0152, "step": 174670 }, { "epoch": 0.7288389481853611, "grad_norm": 0.56527873995297, "learning_rate": 2.3428791090101525e-06, "loss": 0.0147, "step": 174675 }, { "epoch": 0.7288598109003513, "grad_norm": 1.075679794700808, "learning_rate": 2.3428455775628287e-06, "loss": 0.0249, "step": 174680 }, { "epoch": 0.7288806736153416, "grad_norm": 0.36587240174387603, "learning_rate": 2.3428120475551767e-06, "loss": 0.0173, "step": 174685 }, { "epoch": 0.7289015363303318, "grad_norm": 0.5464259471480877, "learning_rate": 2.3427785189870937e-06, "loss": 0.0232, "step": 174690 }, { "epoch": 0.7289223990453222, "grad_norm": 0.7125498752697802, "learning_rate": 2.3427449918584762e-06, "loss": 0.0252, "step": 174695 }, { "epoch": 0.7289432617603124, "grad_norm": 0.6002727981515302, "learning_rate": 2.3427114661692214e-06, "loss": 0.0177, "step": 174700 }, { "epoch": 0.7289641244753027, "grad_norm": 1.1747385102020307, "learning_rate": 2.342677941919227e-06, "loss": 0.0245, "step": 174705 }, { "epoch": 0.728984987190293, "grad_norm": 1.0888397345840597, "learning_rate": 2.3426444191083894e-06, "loss": 0.0266, "step": 174710 }, { "epoch": 0.7290058499052833, "grad_norm": 1.185028544703412, "learning_rate": 2.342610897736606e-06, "loss": 0.023, "step": 174715 }, { "epoch": 0.7290267126202735, "grad_norm": 1.5634929403384092, "learning_rate": 2.3425773778037735e-06, "loss": 0.0236, "step": 174720 }, { "epoch": 0.7290475753352639, "grad_norm": 0.9049130235028382, "learning_rate": 2.342543859309789e-06, "loss": 0.0181, "step": 174725 }, { "epoch": 0.7290684380502541, "grad_norm": 0.5161185988193973, "learning_rate": 2.34251034225455e-06, "loss": 0.0175, "step": 174730 }, { "epoch": 0.7290893007652444, "grad_norm": 0.8744613959965427, "learning_rate": 2.342476826637953e-06, "loss": 0.0223, "step": 174735 }, { "epoch": 0.7291101634802347, "grad_norm": 0.8472997441498087, "learning_rate": 2.342443312459895e-06, "loss": 0.0247, "step": 174740 }, { "epoch": 0.729131026195225, "grad_norm": 0.9167254267028292, "learning_rate": 2.342409799720274e-06, "loss": 0.0198, "step": 174745 }, { "epoch": 0.7291518889102152, "grad_norm": 0.7968768658953875, "learning_rate": 2.3423762884189863e-06, "loss": 0.0197, "step": 174750 }, { "epoch": 0.7291727516252055, "grad_norm": 0.4817748707326163, "learning_rate": 2.3423427785559293e-06, "loss": 0.026, "step": 174755 }, { "epoch": 0.7291936143401958, "grad_norm": 1.139195775707545, "learning_rate": 2.342309270131e-06, "loss": 0.0238, "step": 174760 }, { "epoch": 0.729214477055186, "grad_norm": 0.6772734205609228, "learning_rate": 2.3422757631440954e-06, "loss": 0.0158, "step": 174765 }, { "epoch": 0.7292353397701763, "grad_norm": 0.653394126940224, "learning_rate": 2.342242257595113e-06, "loss": 0.0285, "step": 174770 }, { "epoch": 0.7292562024851666, "grad_norm": 0.884892075757385, "learning_rate": 2.3422087534839502e-06, "loss": 0.0278, "step": 174775 }, { "epoch": 0.7292770652001569, "grad_norm": 0.45975699823779254, "learning_rate": 2.3421752508105034e-06, "loss": 0.0184, "step": 174780 }, { "epoch": 0.7292979279151471, "grad_norm": 0.5977950352161951, "learning_rate": 2.3421417495746696e-06, "loss": 0.0202, "step": 174785 }, { "epoch": 0.7293187906301375, "grad_norm": 0.4754255585860914, "learning_rate": 2.3421082497763478e-06, "loss": 0.02, "step": 174790 }, { "epoch": 0.7293396533451277, "grad_norm": 1.1667291441986622, "learning_rate": 2.3420747514154332e-06, "loss": 0.0265, "step": 174795 }, { "epoch": 0.729360516060118, "grad_norm": 1.053724307153417, "learning_rate": 2.3420412544918235e-06, "loss": 0.0191, "step": 174800 }, { "epoch": 0.7293813787751082, "grad_norm": 1.0909245155645355, "learning_rate": 2.3420077590054173e-06, "loss": 0.0175, "step": 174805 }, { "epoch": 0.7294022414900986, "grad_norm": 3.214867786856025, "learning_rate": 2.3419742649561096e-06, "loss": 0.0226, "step": 174810 }, { "epoch": 0.7294231042050888, "grad_norm": 0.5144241499325622, "learning_rate": 2.341940772343799e-06, "loss": 0.0192, "step": 174815 }, { "epoch": 0.7294439669200791, "grad_norm": 1.0646111910654725, "learning_rate": 2.3419072811683825e-06, "loss": 0.0265, "step": 174820 }, { "epoch": 0.7294648296350694, "grad_norm": 0.45002486825066285, "learning_rate": 2.3418737914297573e-06, "loss": 0.0172, "step": 174825 }, { "epoch": 0.7294856923500597, "grad_norm": 1.0102848029113587, "learning_rate": 2.341840303127821e-06, "loss": 0.0237, "step": 174830 }, { "epoch": 0.7295065550650499, "grad_norm": 0.840077373566252, "learning_rate": 2.34180681626247e-06, "loss": 0.0241, "step": 174835 }, { "epoch": 0.7295274177800403, "grad_norm": 0.4021057502931748, "learning_rate": 2.341773330833603e-06, "loss": 0.0236, "step": 174840 }, { "epoch": 0.7295482804950305, "grad_norm": 1.271075083573633, "learning_rate": 2.3417398468411163e-06, "loss": 0.0224, "step": 174845 }, { "epoch": 0.7295691432100208, "grad_norm": 0.7017322733650287, "learning_rate": 2.3417063642849065e-06, "loss": 0.0204, "step": 174850 }, { "epoch": 0.7295900059250111, "grad_norm": 0.6593613212964362, "learning_rate": 2.3416728831648732e-06, "loss": 0.0168, "step": 174855 }, { "epoch": 0.7296108686400014, "grad_norm": 0.604312750831791, "learning_rate": 2.341639403480912e-06, "loss": 0.0173, "step": 174860 }, { "epoch": 0.7296317313549916, "grad_norm": 0.7734398494198825, "learning_rate": 2.34160592523292e-06, "loss": 0.0198, "step": 174865 }, { "epoch": 0.7296525940699818, "grad_norm": 0.09698971072384568, "learning_rate": 2.341572448420795e-06, "loss": 0.0217, "step": 174870 }, { "epoch": 0.7296734567849722, "grad_norm": 0.7409234369607473, "learning_rate": 2.3415389730444355e-06, "loss": 0.0181, "step": 174875 }, { "epoch": 0.7296943194999624, "grad_norm": 0.49271242456969594, "learning_rate": 2.341505499103737e-06, "loss": 0.0366, "step": 174880 }, { "epoch": 0.7297151822149527, "grad_norm": 0.3692272199851105, "learning_rate": 2.341472026598599e-06, "loss": 0.0246, "step": 174885 }, { "epoch": 0.729736044929943, "grad_norm": 0.5950116993997031, "learning_rate": 2.341438555528917e-06, "loss": 0.0256, "step": 174890 }, { "epoch": 0.7297569076449333, "grad_norm": 1.0859952336796268, "learning_rate": 2.341405085894589e-06, "loss": 0.0174, "step": 174895 }, { "epoch": 0.7297777703599235, "grad_norm": 0.5193996783942033, "learning_rate": 2.341371617695512e-06, "loss": 0.0223, "step": 174900 }, { "epoch": 0.7297986330749139, "grad_norm": 0.6705444754182226, "learning_rate": 2.3413381509315847e-06, "loss": 0.0195, "step": 174905 }, { "epoch": 0.7298194957899041, "grad_norm": 0.7969576571975814, "learning_rate": 2.341304685602703e-06, "loss": 0.0239, "step": 174910 }, { "epoch": 0.7298403585048944, "grad_norm": 0.9958458887580828, "learning_rate": 2.341271221708766e-06, "loss": 0.031, "step": 174915 }, { "epoch": 0.7298612212198847, "grad_norm": 0.6093496195403946, "learning_rate": 2.3412377592496693e-06, "loss": 0.0263, "step": 174920 }, { "epoch": 0.729882083934875, "grad_norm": 0.5405218667478628, "learning_rate": 2.3412042982253123e-06, "loss": 0.0203, "step": 174925 }, { "epoch": 0.7299029466498652, "grad_norm": 0.42476221071184456, "learning_rate": 2.3411708386355907e-06, "loss": 0.0214, "step": 174930 }, { "epoch": 0.7299238093648555, "grad_norm": 0.3776059403143517, "learning_rate": 2.341137380480403e-06, "loss": 0.0185, "step": 174935 }, { "epoch": 0.7299446720798458, "grad_norm": 0.8000781107585786, "learning_rate": 2.3411039237596474e-06, "loss": 0.0192, "step": 174940 }, { "epoch": 0.7299655347948361, "grad_norm": 0.47042781362344455, "learning_rate": 2.3410704684732195e-06, "loss": 0.0203, "step": 174945 }, { "epoch": 0.7299863975098263, "grad_norm": 0.7819158647620497, "learning_rate": 2.341037014621018e-06, "loss": 0.0163, "step": 174950 }, { "epoch": 0.7300072602248167, "grad_norm": 0.5338284029840263, "learning_rate": 2.34100356220294e-06, "loss": 0.0205, "step": 174955 }, { "epoch": 0.7300281229398069, "grad_norm": 0.9814057138400692, "learning_rate": 2.3409701112188833e-06, "loss": 0.0227, "step": 174960 }, { "epoch": 0.7300489856547971, "grad_norm": 0.8071732487591754, "learning_rate": 2.340936661668746e-06, "loss": 0.0211, "step": 174965 }, { "epoch": 0.7300698483697875, "grad_norm": 1.0666910967305947, "learning_rate": 2.3409032135524247e-06, "loss": 0.0247, "step": 174970 }, { "epoch": 0.7300907110847777, "grad_norm": 0.9454349104181499, "learning_rate": 2.340869766869818e-06, "loss": 0.0193, "step": 174975 }, { "epoch": 0.730111573799768, "grad_norm": 0.7841129973662682, "learning_rate": 2.3408363216208214e-06, "loss": 0.0233, "step": 174980 }, { "epoch": 0.7301324365147582, "grad_norm": 0.9871137565021708, "learning_rate": 2.340802877805335e-06, "loss": 0.0232, "step": 174985 }, { "epoch": 0.7301532992297486, "grad_norm": 0.6698816303249459, "learning_rate": 2.3407694354232546e-06, "loss": 0.0265, "step": 174990 }, { "epoch": 0.7301741619447388, "grad_norm": 0.7994277186934923, "learning_rate": 2.3407359944744793e-06, "loss": 0.0186, "step": 174995 }, { "epoch": 0.7301950246597291, "grad_norm": 0.7049849749672662, "learning_rate": 2.3407025549589053e-06, "loss": 0.0263, "step": 175000 }, { "epoch": 0.7302158873747194, "grad_norm": 0.7498756180827996, "learning_rate": 2.340669116876432e-06, "loss": 0.0199, "step": 175005 }, { "epoch": 0.7302367500897097, "grad_norm": 1.319456216024864, "learning_rate": 2.3406356802269544e-06, "loss": 0.0262, "step": 175010 }, { "epoch": 0.7302576128046999, "grad_norm": 1.3135956055348825, "learning_rate": 2.3406022450103724e-06, "loss": 0.0311, "step": 175015 }, { "epoch": 0.7302784755196903, "grad_norm": 0.6012235390930778, "learning_rate": 2.3405688112265827e-06, "loss": 0.0172, "step": 175020 }, { "epoch": 0.7302993382346805, "grad_norm": 0.7619105011783508, "learning_rate": 2.3405353788754836e-06, "loss": 0.0236, "step": 175025 }, { "epoch": 0.7303202009496708, "grad_norm": 0.7107935591874184, "learning_rate": 2.3405019479569726e-06, "loss": 0.0244, "step": 175030 }, { "epoch": 0.7303410636646611, "grad_norm": 0.5743494501849934, "learning_rate": 2.3404685184709463e-06, "loss": 0.0252, "step": 175035 }, { "epoch": 0.7303619263796514, "grad_norm": 0.49655896582253733, "learning_rate": 2.340435090417304e-06, "loss": 0.0219, "step": 175040 }, { "epoch": 0.7303827890946416, "grad_norm": 0.5959360713098143, "learning_rate": 2.3404016637959423e-06, "loss": 0.0227, "step": 175045 }, { "epoch": 0.7304036518096318, "grad_norm": 0.5944894201695918, "learning_rate": 2.3403682386067596e-06, "loss": 0.0224, "step": 175050 }, { "epoch": 0.7304245145246222, "grad_norm": 0.40276829885016024, "learning_rate": 2.3403348148496525e-06, "loss": 0.0244, "step": 175055 }, { "epoch": 0.7304453772396124, "grad_norm": 0.5950552475810057, "learning_rate": 2.340301392524521e-06, "loss": 0.0133, "step": 175060 }, { "epoch": 0.7304662399546027, "grad_norm": 0.34892068228648304, "learning_rate": 2.340267971631261e-06, "loss": 0.014, "step": 175065 }, { "epoch": 0.730487102669593, "grad_norm": 1.357763095013166, "learning_rate": 2.3402345521697696e-06, "loss": 0.0305, "step": 175070 }, { "epoch": 0.7305079653845833, "grad_norm": 0.6220866033239469, "learning_rate": 2.3402011341399465e-06, "loss": 0.0275, "step": 175075 }, { "epoch": 0.7305288280995735, "grad_norm": 0.35381030545137354, "learning_rate": 2.3401677175416887e-06, "loss": 0.025, "step": 175080 }, { "epoch": 0.7305496908145639, "grad_norm": 0.45335646887931025, "learning_rate": 2.340134302374894e-06, "loss": 0.0221, "step": 175085 }, { "epoch": 0.7305705535295541, "grad_norm": 0.6741526077645618, "learning_rate": 2.3401008886394596e-06, "loss": 0.0181, "step": 175090 }, { "epoch": 0.7305914162445444, "grad_norm": 0.9106146025272795, "learning_rate": 2.3400674763352845e-06, "loss": 0.0256, "step": 175095 }, { "epoch": 0.7306122789595347, "grad_norm": 0.744109533546557, "learning_rate": 2.3400340654622655e-06, "loss": 0.0222, "step": 175100 }, { "epoch": 0.730633141674525, "grad_norm": 0.9019438660258323, "learning_rate": 2.340000656020301e-06, "loss": 0.0221, "step": 175105 }, { "epoch": 0.7306540043895152, "grad_norm": 1.7157802077417006, "learning_rate": 2.339967248009289e-06, "loss": 0.0175, "step": 175110 }, { "epoch": 0.7306748671045055, "grad_norm": 0.6470553575676178, "learning_rate": 2.3399338414291265e-06, "loss": 0.0182, "step": 175115 }, { "epoch": 0.7306957298194958, "grad_norm": 0.5689514353048754, "learning_rate": 2.339900436279712e-06, "loss": 0.0166, "step": 175120 }, { "epoch": 0.7307165925344861, "grad_norm": 0.8841806295720198, "learning_rate": 2.3398670325609434e-06, "loss": 0.0206, "step": 175125 }, { "epoch": 0.7307374552494763, "grad_norm": 0.5307335231576017, "learning_rate": 2.3398336302727186e-06, "loss": 0.0171, "step": 175130 }, { "epoch": 0.7307583179644667, "grad_norm": 0.7317253205301978, "learning_rate": 2.339800229414935e-06, "loss": 0.0249, "step": 175135 }, { "epoch": 0.7307791806794569, "grad_norm": 0.6743075295046284, "learning_rate": 2.3397668299874904e-06, "loss": 0.0165, "step": 175140 }, { "epoch": 0.7308000433944472, "grad_norm": 0.7452291565649527, "learning_rate": 2.339733431990284e-06, "loss": 0.0211, "step": 175145 }, { "epoch": 0.7308209061094375, "grad_norm": 1.0005605979276475, "learning_rate": 2.339700035423213e-06, "loss": 0.0197, "step": 175150 }, { "epoch": 0.7308417688244278, "grad_norm": 0.3792395447351065, "learning_rate": 2.3396666402861747e-06, "loss": 0.0213, "step": 175155 }, { "epoch": 0.730862631539418, "grad_norm": 0.9143983314370725, "learning_rate": 2.3396332465790674e-06, "loss": 0.0263, "step": 175160 }, { "epoch": 0.7308834942544082, "grad_norm": 0.7165543656160747, "learning_rate": 2.33959985430179e-06, "loss": 0.0245, "step": 175165 }, { "epoch": 0.7309043569693986, "grad_norm": 0.6159931357197023, "learning_rate": 2.3395664634542392e-06, "loss": 0.0208, "step": 175170 }, { "epoch": 0.7309252196843888, "grad_norm": 1.3024008668640175, "learning_rate": 2.3395330740363133e-06, "loss": 0.0256, "step": 175175 }, { "epoch": 0.7309460823993791, "grad_norm": 0.7707310868880083, "learning_rate": 2.339499686047911e-06, "loss": 0.0314, "step": 175180 }, { "epoch": 0.7309669451143694, "grad_norm": 0.5587994774558971, "learning_rate": 2.33946629948893e-06, "loss": 0.0226, "step": 175185 }, { "epoch": 0.7309878078293597, "grad_norm": 1.1654715516425205, "learning_rate": 2.3394329143592667e-06, "loss": 0.0207, "step": 175190 }, { "epoch": 0.7310086705443499, "grad_norm": 0.783833165767598, "learning_rate": 2.339399530658821e-06, "loss": 0.0196, "step": 175195 }, { "epoch": 0.7310295332593403, "grad_norm": 0.9334310636734386, "learning_rate": 2.3393661483874913e-06, "loss": 0.0207, "step": 175200 }, { "epoch": 0.7310503959743305, "grad_norm": 0.8731512247750466, "learning_rate": 2.3393327675451743e-06, "loss": 0.0192, "step": 175205 }, { "epoch": 0.7310712586893208, "grad_norm": 0.6001579356162843, "learning_rate": 2.3392993881317684e-06, "loss": 0.0149, "step": 175210 }, { "epoch": 0.7310921214043111, "grad_norm": 0.5564185665825038, "learning_rate": 2.339266010147172e-06, "loss": 0.0157, "step": 175215 }, { "epoch": 0.7311129841193014, "grad_norm": 0.7251612129835462, "learning_rate": 2.339232633591283e-06, "loss": 0.0232, "step": 175220 }, { "epoch": 0.7311338468342916, "grad_norm": 0.29937890609198764, "learning_rate": 2.3391992584639988e-06, "loss": 0.0192, "step": 175225 }, { "epoch": 0.7311547095492819, "grad_norm": 1.227406947790333, "learning_rate": 2.3391658847652184e-06, "loss": 0.0255, "step": 175230 }, { "epoch": 0.7311755722642722, "grad_norm": 0.7810143489353873, "learning_rate": 2.3391325124948396e-06, "loss": 0.0233, "step": 175235 }, { "epoch": 0.7311964349792625, "grad_norm": 0.4506220446176009, "learning_rate": 2.3390991416527608e-06, "loss": 0.017, "step": 175240 }, { "epoch": 0.7312172976942527, "grad_norm": 0.4725580917303457, "learning_rate": 2.3390657722388795e-06, "loss": 0.0254, "step": 175245 }, { "epoch": 0.731238160409243, "grad_norm": 0.42791401560631365, "learning_rate": 2.3390324042530945e-06, "loss": 0.03, "step": 175250 }, { "epoch": 0.7312590231242333, "grad_norm": 0.2793694396753034, "learning_rate": 2.338999037695303e-06, "loss": 0.016, "step": 175255 }, { "epoch": 0.7312798858392235, "grad_norm": 1.063050513449106, "learning_rate": 2.338965672565404e-06, "loss": 0.0231, "step": 175260 }, { "epoch": 0.7313007485542139, "grad_norm": 0.28871318534439483, "learning_rate": 2.3389323088632956e-06, "loss": 0.0284, "step": 175265 }, { "epoch": 0.7313216112692041, "grad_norm": 0.929001572101998, "learning_rate": 2.3388989465888755e-06, "loss": 0.0234, "step": 175270 }, { "epoch": 0.7313424739841944, "grad_norm": 0.5786121028658132, "learning_rate": 2.3388655857420425e-06, "loss": 0.0153, "step": 175275 }, { "epoch": 0.7313633366991846, "grad_norm": 0.6132834924792548, "learning_rate": 2.3388322263226943e-06, "loss": 0.0135, "step": 175280 }, { "epoch": 0.731384199414175, "grad_norm": 1.0382334093328045, "learning_rate": 2.3387988683307296e-06, "loss": 0.0174, "step": 175285 }, { "epoch": 0.7314050621291652, "grad_norm": 0.7944374297845122, "learning_rate": 2.338765511766046e-06, "loss": 0.0172, "step": 175290 }, { "epoch": 0.7314259248441555, "grad_norm": 0.7287550752238899, "learning_rate": 2.338732156628542e-06, "loss": 0.0178, "step": 175295 }, { "epoch": 0.7314467875591458, "grad_norm": 0.7606017509965762, "learning_rate": 2.3386988029181158e-06, "loss": 0.0174, "step": 175300 }, { "epoch": 0.7314676502741361, "grad_norm": 0.4638116087045419, "learning_rate": 2.3386654506346652e-06, "loss": 0.0226, "step": 175305 }, { "epoch": 0.7314885129891263, "grad_norm": 0.6281426243378048, "learning_rate": 2.33863209977809e-06, "loss": 0.0251, "step": 175310 }, { "epoch": 0.7315093757041167, "grad_norm": 0.7309309160557323, "learning_rate": 2.3385987503482867e-06, "loss": 0.0205, "step": 175315 }, { "epoch": 0.7315302384191069, "grad_norm": 0.7116701559861823, "learning_rate": 2.338565402345154e-06, "loss": 0.0204, "step": 175320 }, { "epoch": 0.7315511011340972, "grad_norm": 0.5893307742417114, "learning_rate": 2.3385320557685906e-06, "loss": 0.0183, "step": 175325 }, { "epoch": 0.7315719638490875, "grad_norm": 1.393812485653795, "learning_rate": 2.338498710618495e-06, "loss": 0.0224, "step": 175330 }, { "epoch": 0.7315928265640778, "grad_norm": 0.6411451960910997, "learning_rate": 2.338465366894765e-06, "loss": 0.0281, "step": 175335 }, { "epoch": 0.731613689279068, "grad_norm": 0.8937325613259601, "learning_rate": 2.3384320245972985e-06, "loss": 0.0235, "step": 175340 }, { "epoch": 0.7316345519940582, "grad_norm": 0.43782091779971116, "learning_rate": 2.3383986837259952e-06, "loss": 0.017, "step": 175345 }, { "epoch": 0.7316554147090486, "grad_norm": 0.6884723882399304, "learning_rate": 2.3383653442807516e-06, "loss": 0.0227, "step": 175350 }, { "epoch": 0.7316762774240388, "grad_norm": 0.6378097919498714, "learning_rate": 2.338332006261468e-06, "loss": 0.0223, "step": 175355 }, { "epoch": 0.7316971401390291, "grad_norm": 0.9708912527716059, "learning_rate": 2.3382986696680416e-06, "loss": 0.0234, "step": 175360 }, { "epoch": 0.7317180028540194, "grad_norm": 1.2933737899394167, "learning_rate": 2.3382653345003703e-06, "loss": 0.0262, "step": 175365 }, { "epoch": 0.7317388655690097, "grad_norm": 0.22267272514404604, "learning_rate": 2.3382320007583536e-06, "loss": 0.0205, "step": 175370 }, { "epoch": 0.7317597282839999, "grad_norm": 0.3993663125999183, "learning_rate": 2.3381986684418896e-06, "loss": 0.026, "step": 175375 }, { "epoch": 0.7317805909989903, "grad_norm": 0.6646221407360097, "learning_rate": 2.3381653375508766e-06, "loss": 0.0252, "step": 175380 }, { "epoch": 0.7318014537139805, "grad_norm": 0.6013214458878178, "learning_rate": 2.3381320080852125e-06, "loss": 0.0175, "step": 175385 }, { "epoch": 0.7318223164289708, "grad_norm": 0.711873960408223, "learning_rate": 2.338098680044796e-06, "loss": 0.0259, "step": 175390 }, { "epoch": 0.7318431791439611, "grad_norm": 0.5538893057880142, "learning_rate": 2.3380653534295263e-06, "loss": 0.017, "step": 175395 }, { "epoch": 0.7318640418589514, "grad_norm": 0.8067410549683346, "learning_rate": 2.338032028239301e-06, "loss": 0.0222, "step": 175400 }, { "epoch": 0.7318849045739416, "grad_norm": 0.9865582707281626, "learning_rate": 2.337998704474018e-06, "loss": 0.0197, "step": 175405 }, { "epoch": 0.7319057672889319, "grad_norm": 0.3311473364941205, "learning_rate": 2.3379653821335774e-06, "loss": 0.0178, "step": 175410 }, { "epoch": 0.7319266300039222, "grad_norm": 0.531161614106854, "learning_rate": 2.337932061217876e-06, "loss": 0.02, "step": 175415 }, { "epoch": 0.7319474927189125, "grad_norm": 0.8226865578720464, "learning_rate": 2.3378987417268136e-06, "loss": 0.025, "step": 175420 }, { "epoch": 0.7319683554339027, "grad_norm": 1.0575540564284602, "learning_rate": 2.337865423660288e-06, "loss": 0.0271, "step": 175425 }, { "epoch": 0.7319892181488931, "grad_norm": 0.7678287837743751, "learning_rate": 2.3378321070181976e-06, "loss": 0.0252, "step": 175430 }, { "epoch": 0.7320100808638833, "grad_norm": 0.34617031460243364, "learning_rate": 2.3377987918004413e-06, "loss": 0.0216, "step": 175435 }, { "epoch": 0.7320309435788735, "grad_norm": 0.7908068396541802, "learning_rate": 2.337765478006918e-06, "loss": 0.0237, "step": 175440 }, { "epoch": 0.7320518062938639, "grad_norm": 0.3354401947319236, "learning_rate": 2.3377321656375246e-06, "loss": 0.0211, "step": 175445 }, { "epoch": 0.7320726690088541, "grad_norm": 0.7727010718170151, "learning_rate": 2.337698854692161e-06, "loss": 0.0279, "step": 175450 }, { "epoch": 0.7320935317238444, "grad_norm": 0.9115740826509738, "learning_rate": 2.3376655451707258e-06, "loss": 0.0214, "step": 175455 }, { "epoch": 0.7321143944388346, "grad_norm": 0.4614819949212399, "learning_rate": 2.3376322370731174e-06, "loss": 0.0211, "step": 175460 }, { "epoch": 0.732135257153825, "grad_norm": 0.6783259172817492, "learning_rate": 2.337598930399233e-06, "loss": 0.0201, "step": 175465 }, { "epoch": 0.7321561198688152, "grad_norm": 0.4961518954440579, "learning_rate": 2.337565625148973e-06, "loss": 0.0212, "step": 175470 }, { "epoch": 0.7321769825838055, "grad_norm": 0.8294308386161882, "learning_rate": 2.3375323213222358e-06, "loss": 0.0257, "step": 175475 }, { "epoch": 0.7321978452987958, "grad_norm": 4.924589452116365, "learning_rate": 2.337499018918919e-06, "loss": 0.0225, "step": 175480 }, { "epoch": 0.7322187080137861, "grad_norm": 0.8305161636928904, "learning_rate": 2.3374657179389217e-06, "loss": 0.0215, "step": 175485 }, { "epoch": 0.7322395707287763, "grad_norm": 1.0692720282156667, "learning_rate": 2.337432418382143e-06, "loss": 0.0164, "step": 175490 }, { "epoch": 0.7322604334437667, "grad_norm": 0.9181543501902596, "learning_rate": 2.337399120248481e-06, "loss": 0.0224, "step": 175495 }, { "epoch": 0.7322812961587569, "grad_norm": 0.8327569252111504, "learning_rate": 2.3373658235378334e-06, "loss": 0.0275, "step": 175500 }, { "epoch": 0.7323021588737472, "grad_norm": 0.28448211364689685, "learning_rate": 2.337332528250101e-06, "loss": 0.0208, "step": 175505 }, { "epoch": 0.7323230215887375, "grad_norm": 0.6229103118877015, "learning_rate": 2.3372992343851806e-06, "loss": 0.0228, "step": 175510 }, { "epoch": 0.7323438843037278, "grad_norm": 0.5542439841688107, "learning_rate": 2.337265941942972e-06, "loss": 0.0183, "step": 175515 }, { "epoch": 0.732364747018718, "grad_norm": 0.7252221564314179, "learning_rate": 2.337232650923374e-06, "loss": 0.021, "step": 175520 }, { "epoch": 0.7323856097337083, "grad_norm": 1.0693937111114538, "learning_rate": 2.337199361326284e-06, "loss": 0.0261, "step": 175525 }, { "epoch": 0.7324064724486986, "grad_norm": 0.23066192222251355, "learning_rate": 2.3371660731516014e-06, "loss": 0.0211, "step": 175530 }, { "epoch": 0.7324273351636889, "grad_norm": 0.72599856035133, "learning_rate": 2.337132786399225e-06, "loss": 0.0222, "step": 175535 }, { "epoch": 0.7324481978786791, "grad_norm": 0.5115770118836331, "learning_rate": 2.3370995010690538e-06, "loss": 0.0183, "step": 175540 }, { "epoch": 0.7324690605936695, "grad_norm": 0.7274818822685645, "learning_rate": 2.337066217160986e-06, "loss": 0.0239, "step": 175545 }, { "epoch": 0.7324899233086597, "grad_norm": 1.2480797267276496, "learning_rate": 2.3370329346749206e-06, "loss": 0.0265, "step": 175550 }, { "epoch": 0.7325107860236499, "grad_norm": 0.3238651390781984, "learning_rate": 2.336999653610756e-06, "loss": 0.0316, "step": 175555 }, { "epoch": 0.7325316487386403, "grad_norm": 0.7236515470994306, "learning_rate": 2.3369663739683917e-06, "loss": 0.023, "step": 175560 }, { "epoch": 0.7325525114536305, "grad_norm": 0.8358863180188262, "learning_rate": 2.3369330957477254e-06, "loss": 0.0277, "step": 175565 }, { "epoch": 0.7325733741686208, "grad_norm": 0.4564347720798194, "learning_rate": 2.336899818948657e-06, "loss": 0.0167, "step": 175570 }, { "epoch": 0.7325942368836111, "grad_norm": 0.43697751767900755, "learning_rate": 2.3368665435710847e-06, "loss": 0.0161, "step": 175575 }, { "epoch": 0.7326150995986014, "grad_norm": 0.596588606230171, "learning_rate": 2.3368332696149075e-06, "loss": 0.0198, "step": 175580 }, { "epoch": 0.7326359623135916, "grad_norm": 0.4386455202419001, "learning_rate": 2.3367999970800235e-06, "loss": 0.0194, "step": 175585 }, { "epoch": 0.7326568250285819, "grad_norm": 0.8393328270750194, "learning_rate": 2.336766725966333e-06, "loss": 0.0283, "step": 175590 }, { "epoch": 0.7326776877435722, "grad_norm": 0.6458623689212174, "learning_rate": 2.3367334562737333e-06, "loss": 0.0262, "step": 175595 }, { "epoch": 0.7326985504585625, "grad_norm": 0.8262116935556582, "learning_rate": 2.3367001880021238e-06, "loss": 0.0215, "step": 175600 }, { "epoch": 0.7327194131735527, "grad_norm": 0.337587282746664, "learning_rate": 2.3366669211514035e-06, "loss": 0.0233, "step": 175605 }, { "epoch": 0.7327402758885431, "grad_norm": 0.7609288509104281, "learning_rate": 2.3366336557214718e-06, "loss": 0.0179, "step": 175610 }, { "epoch": 0.7327611386035333, "grad_norm": 0.458031732732628, "learning_rate": 2.336600391712226e-06, "loss": 0.0173, "step": 175615 }, { "epoch": 0.7327820013185236, "grad_norm": 0.8550770791286485, "learning_rate": 2.3365671291235667e-06, "loss": 0.0206, "step": 175620 }, { "epoch": 0.7328028640335139, "grad_norm": 0.6903342816972496, "learning_rate": 2.336533867955392e-06, "loss": 0.0171, "step": 175625 }, { "epoch": 0.7328237267485042, "grad_norm": 0.721576116039893, "learning_rate": 2.3365006082076002e-06, "loss": 0.0215, "step": 175630 }, { "epoch": 0.7328445894634944, "grad_norm": 0.5832779065537395, "learning_rate": 2.3364673498800914e-06, "loss": 0.0193, "step": 175635 }, { "epoch": 0.7328654521784846, "grad_norm": 0.8217000119738569, "learning_rate": 2.3364340929727633e-06, "loss": 0.0254, "step": 175640 }, { "epoch": 0.732886314893475, "grad_norm": 0.21823826146102582, "learning_rate": 2.3364008374855165e-06, "loss": 0.018, "step": 175645 }, { "epoch": 0.7329071776084652, "grad_norm": 0.5279406122142172, "learning_rate": 2.336367583418248e-06, "loss": 0.028, "step": 175650 }, { "epoch": 0.7329280403234555, "grad_norm": 0.955439231316834, "learning_rate": 2.3363343307708574e-06, "loss": 0.0234, "step": 175655 }, { "epoch": 0.7329489030384458, "grad_norm": 0.7988389724851223, "learning_rate": 2.3363010795432448e-06, "loss": 0.0171, "step": 175660 }, { "epoch": 0.7329697657534361, "grad_norm": 0.3683661059755102, "learning_rate": 2.336267829735308e-06, "loss": 0.0234, "step": 175665 }, { "epoch": 0.7329906284684263, "grad_norm": 0.5867161981447151, "learning_rate": 2.336234581346946e-06, "loss": 0.02, "step": 175670 }, { "epoch": 0.7330114911834167, "grad_norm": 1.026730351463648, "learning_rate": 2.336201334378059e-06, "loss": 0.0205, "step": 175675 }, { "epoch": 0.7330323538984069, "grad_norm": 1.0678467272579637, "learning_rate": 2.336168088828544e-06, "loss": 0.0208, "step": 175680 }, { "epoch": 0.7330532166133972, "grad_norm": 0.40352288932608094, "learning_rate": 2.3361348446983013e-06, "loss": 0.0224, "step": 175685 }, { "epoch": 0.7330740793283875, "grad_norm": 0.9085609009549281, "learning_rate": 2.3361016019872298e-06, "loss": 0.0232, "step": 175690 }, { "epoch": 0.7330949420433778, "grad_norm": 0.6534801580319974, "learning_rate": 2.336068360695229e-06, "loss": 0.0163, "step": 175695 }, { "epoch": 0.733115804758368, "grad_norm": 0.5398894885713766, "learning_rate": 2.3360351208221968e-06, "loss": 0.0167, "step": 175700 }, { "epoch": 0.7331366674733583, "grad_norm": 1.3688442272979315, "learning_rate": 2.3360018823680324e-06, "loss": 0.0176, "step": 175705 }, { "epoch": 0.7331575301883486, "grad_norm": 1.0214930462196758, "learning_rate": 2.335968645332636e-06, "loss": 0.0186, "step": 175710 }, { "epoch": 0.7331783929033389, "grad_norm": 0.5199431897725197, "learning_rate": 2.335935409715906e-06, "loss": 0.0232, "step": 175715 }, { "epoch": 0.7331992556183291, "grad_norm": 0.42894328391933156, "learning_rate": 2.335902175517741e-06, "loss": 0.0153, "step": 175720 }, { "epoch": 0.7332201183333195, "grad_norm": 0.27796131618853237, "learning_rate": 2.3358689427380403e-06, "loss": 0.0164, "step": 175725 }, { "epoch": 0.7332409810483097, "grad_norm": 0.8817938787826595, "learning_rate": 2.3358357113767037e-06, "loss": 0.0327, "step": 175730 }, { "epoch": 0.7332618437633, "grad_norm": 0.9268225628985719, "learning_rate": 2.3358024814336298e-06, "loss": 0.021, "step": 175735 }, { "epoch": 0.7332827064782903, "grad_norm": 0.5355423828726442, "learning_rate": 2.3357692529087173e-06, "loss": 0.0157, "step": 175740 }, { "epoch": 0.7333035691932805, "grad_norm": 0.7877470782097045, "learning_rate": 2.3357360258018667e-06, "loss": 0.018, "step": 175745 }, { "epoch": 0.7333244319082708, "grad_norm": 0.5780246284899782, "learning_rate": 2.3357028001129755e-06, "loss": 0.0179, "step": 175750 }, { "epoch": 0.7333452946232611, "grad_norm": 1.4919792500167044, "learning_rate": 2.3356695758419438e-06, "loss": 0.0294, "step": 175755 }, { "epoch": 0.7333661573382514, "grad_norm": 0.7178189146710372, "learning_rate": 2.3356363529886707e-06, "loss": 0.0242, "step": 175760 }, { "epoch": 0.7333870200532416, "grad_norm": 0.9087410334030468, "learning_rate": 2.3356031315530547e-06, "loss": 0.0215, "step": 175765 }, { "epoch": 0.7334078827682319, "grad_norm": 0.5299129763784646, "learning_rate": 2.3355699115349957e-06, "loss": 0.0193, "step": 175770 }, { "epoch": 0.7334287454832222, "grad_norm": 0.4044074700808307, "learning_rate": 2.3355366929343926e-06, "loss": 0.0211, "step": 175775 }, { "epoch": 0.7334496081982125, "grad_norm": 0.7527077262999371, "learning_rate": 2.3355034757511445e-06, "loss": 0.031, "step": 175780 }, { "epoch": 0.7334704709132027, "grad_norm": 0.3933449548206696, "learning_rate": 2.3354702599851515e-06, "loss": 0.0226, "step": 175785 }, { "epoch": 0.7334913336281931, "grad_norm": 0.5868111425801286, "learning_rate": 2.3354370456363116e-06, "loss": 0.0149, "step": 175790 }, { "epoch": 0.7335121963431833, "grad_norm": 0.9011270455396367, "learning_rate": 2.3354038327045244e-06, "loss": 0.0254, "step": 175795 }, { "epoch": 0.7335330590581736, "grad_norm": 0.7351116498910186, "learning_rate": 2.3353706211896902e-06, "loss": 0.0251, "step": 175800 }, { "epoch": 0.7335539217731639, "grad_norm": 0.589063018384202, "learning_rate": 2.3353374110917063e-06, "loss": 0.0217, "step": 175805 }, { "epoch": 0.7335747844881542, "grad_norm": 0.858696540634665, "learning_rate": 2.3353042024104735e-06, "loss": 0.0215, "step": 175810 }, { "epoch": 0.7335956472031444, "grad_norm": 0.6736256550469525, "learning_rate": 2.3352709951458907e-06, "loss": 0.0203, "step": 175815 }, { "epoch": 0.7336165099181347, "grad_norm": 0.9163838011963612, "learning_rate": 2.3352377892978565e-06, "loss": 0.0239, "step": 175820 }, { "epoch": 0.733637372633125, "grad_norm": 0.609437893061906, "learning_rate": 2.335204584866271e-06, "loss": 0.0218, "step": 175825 }, { "epoch": 0.7336582353481153, "grad_norm": 1.0054177736406515, "learning_rate": 2.3351713818510334e-06, "loss": 0.0251, "step": 175830 }, { "epoch": 0.7336790980631055, "grad_norm": 0.3728471284897759, "learning_rate": 2.335138180252043e-06, "loss": 0.0234, "step": 175835 }, { "epoch": 0.7336999607780958, "grad_norm": 0.9029443114390271, "learning_rate": 2.3351049800691985e-06, "loss": 0.0177, "step": 175840 }, { "epoch": 0.7337208234930861, "grad_norm": 0.7274834137525159, "learning_rate": 2.3350717813024e-06, "loss": 0.0248, "step": 175845 }, { "epoch": 0.7337416862080763, "grad_norm": 0.74374380727547, "learning_rate": 2.335038583951547e-06, "loss": 0.0187, "step": 175850 }, { "epoch": 0.7337625489230667, "grad_norm": 0.8966948577438953, "learning_rate": 2.3350053880165376e-06, "loss": 0.028, "step": 175855 }, { "epoch": 0.7337834116380569, "grad_norm": 0.4865718965061059, "learning_rate": 2.3349721934972718e-06, "loss": 0.015, "step": 175860 }, { "epoch": 0.7338042743530472, "grad_norm": 0.6437803783931727, "learning_rate": 2.33493900039365e-06, "loss": 0.014, "step": 175865 }, { "epoch": 0.7338251370680375, "grad_norm": 0.4922673218576629, "learning_rate": 2.33490580870557e-06, "loss": 0.0187, "step": 175870 }, { "epoch": 0.7338459997830278, "grad_norm": 0.9259284835056037, "learning_rate": 2.334872618432932e-06, "loss": 0.0178, "step": 175875 }, { "epoch": 0.733866862498018, "grad_norm": 0.5679719669628687, "learning_rate": 2.334839429575636e-06, "loss": 0.0227, "step": 175880 }, { "epoch": 0.7338877252130083, "grad_norm": 1.0217609773214285, "learning_rate": 2.3348062421335797e-06, "loss": 0.024, "step": 175885 }, { "epoch": 0.7339085879279986, "grad_norm": 1.1755840001130422, "learning_rate": 2.3347730561066645e-06, "loss": 0.0276, "step": 175890 }, { "epoch": 0.7339294506429889, "grad_norm": 1.122092938205946, "learning_rate": 2.334739871494788e-06, "loss": 0.023, "step": 175895 }, { "epoch": 0.7339503133579791, "grad_norm": 0.5420425754759786, "learning_rate": 2.3347066882978515e-06, "loss": 0.0172, "step": 175900 }, { "epoch": 0.7339711760729695, "grad_norm": 0.7477943571569661, "learning_rate": 2.334673506515753e-06, "loss": 0.0169, "step": 175905 }, { "epoch": 0.7339920387879597, "grad_norm": 0.4555982633634708, "learning_rate": 2.3346403261483926e-06, "loss": 0.0146, "step": 175910 }, { "epoch": 0.73401290150295, "grad_norm": 0.4279431249920554, "learning_rate": 2.3346071471956687e-06, "loss": 0.0243, "step": 175915 }, { "epoch": 0.7340337642179403, "grad_norm": 0.32849216712445745, "learning_rate": 2.334573969657482e-06, "loss": 0.0212, "step": 175920 }, { "epoch": 0.7340546269329306, "grad_norm": 0.8478830906122615, "learning_rate": 2.3345407935337327e-06, "loss": 0.0264, "step": 175925 }, { "epoch": 0.7340754896479208, "grad_norm": 0.7469205579502542, "learning_rate": 2.3345076188243182e-06, "loss": 0.022, "step": 175930 }, { "epoch": 0.7340963523629112, "grad_norm": 0.29625927071252595, "learning_rate": 2.33447444552914e-06, "loss": 0.0168, "step": 175935 }, { "epoch": 0.7341172150779014, "grad_norm": 0.6463940983610369, "learning_rate": 2.3344412736480956e-06, "loss": 0.0311, "step": 175940 }, { "epoch": 0.7341380777928916, "grad_norm": 0.7063366856453568, "learning_rate": 2.3344081031810862e-06, "loss": 0.0219, "step": 175945 }, { "epoch": 0.7341589405078819, "grad_norm": 1.000278312270475, "learning_rate": 2.334374934128011e-06, "loss": 0.0252, "step": 175950 }, { "epoch": 0.7341798032228722, "grad_norm": 1.0430161619059257, "learning_rate": 2.3343417664887693e-06, "loss": 0.0301, "step": 175955 }, { "epoch": 0.7342006659378625, "grad_norm": 0.604701083869136, "learning_rate": 2.3343086002632605e-06, "loss": 0.0198, "step": 175960 }, { "epoch": 0.7342215286528527, "grad_norm": 0.7863871369442808, "learning_rate": 2.3342754354513845e-06, "loss": 0.0262, "step": 175965 }, { "epoch": 0.7342423913678431, "grad_norm": 0.9346416225402486, "learning_rate": 2.334242272053041e-06, "loss": 0.0195, "step": 175970 }, { "epoch": 0.7342632540828333, "grad_norm": 1.375479944528514, "learning_rate": 2.3342091100681284e-06, "loss": 0.0314, "step": 175975 }, { "epoch": 0.7342841167978236, "grad_norm": 0.9562089027540819, "learning_rate": 2.334175949496548e-06, "loss": 0.0199, "step": 175980 }, { "epoch": 0.7343049795128139, "grad_norm": 0.9886065017847274, "learning_rate": 2.3341427903381985e-06, "loss": 0.0264, "step": 175985 }, { "epoch": 0.7343258422278042, "grad_norm": 0.6986817546629926, "learning_rate": 2.3341096325929794e-06, "loss": 0.0154, "step": 175990 }, { "epoch": 0.7343467049427944, "grad_norm": 0.5701205994452334, "learning_rate": 2.334076476260791e-06, "loss": 0.0264, "step": 175995 }, { "epoch": 0.7343675676577847, "grad_norm": 0.4620155952100877, "learning_rate": 2.334043321341532e-06, "loss": 0.0167, "step": 176000 }, { "epoch": 0.734388430372775, "grad_norm": 0.8450858938693807, "learning_rate": 2.334010167835103e-06, "loss": 0.0273, "step": 176005 }, { "epoch": 0.7344092930877653, "grad_norm": 0.6854375013049407, "learning_rate": 2.333977015741404e-06, "loss": 0.0188, "step": 176010 }, { "epoch": 0.7344301558027555, "grad_norm": 0.770478163165852, "learning_rate": 2.333943865060333e-06, "loss": 0.0222, "step": 176015 }, { "epoch": 0.7344510185177459, "grad_norm": 0.9620866777359678, "learning_rate": 2.3339107157917905e-06, "loss": 0.0251, "step": 176020 }, { "epoch": 0.7344718812327361, "grad_norm": 0.26310597697613236, "learning_rate": 2.333877567935677e-06, "loss": 0.0185, "step": 176025 }, { "epoch": 0.7344927439477263, "grad_norm": 0.5420537639741755, "learning_rate": 2.3338444214918908e-06, "loss": 0.024, "step": 176030 }, { "epoch": 0.7345136066627167, "grad_norm": 0.5402124183244219, "learning_rate": 2.3338112764603326e-06, "loss": 0.0181, "step": 176035 }, { "epoch": 0.7345344693777069, "grad_norm": 0.47096268932678065, "learning_rate": 2.3337781328409016e-06, "loss": 0.0225, "step": 176040 }, { "epoch": 0.7345553320926972, "grad_norm": 0.5722017975049019, "learning_rate": 2.3337449906334986e-06, "loss": 0.0283, "step": 176045 }, { "epoch": 0.7345761948076875, "grad_norm": 0.6049219556328297, "learning_rate": 2.3337118498380217e-06, "loss": 0.0257, "step": 176050 }, { "epoch": 0.7345970575226778, "grad_norm": 0.7511551140174176, "learning_rate": 2.3336787104543716e-06, "loss": 0.0218, "step": 176055 }, { "epoch": 0.734617920237668, "grad_norm": 0.8188725895010005, "learning_rate": 2.3336455724824476e-06, "loss": 0.0207, "step": 176060 }, { "epoch": 0.7346387829526583, "grad_norm": 0.580705573931393, "learning_rate": 2.3336124359221507e-06, "loss": 0.018, "step": 176065 }, { "epoch": 0.7346596456676486, "grad_norm": 0.9062700359359566, "learning_rate": 2.333579300773379e-06, "loss": 0.0238, "step": 176070 }, { "epoch": 0.7346805083826389, "grad_norm": 1.146478582437084, "learning_rate": 2.3335461670360333e-06, "loss": 0.0224, "step": 176075 }, { "epoch": 0.7347013710976291, "grad_norm": 0.939331015715426, "learning_rate": 2.3335130347100133e-06, "loss": 0.0255, "step": 176080 }, { "epoch": 0.7347222338126195, "grad_norm": 0.769320172374577, "learning_rate": 2.3334799037952185e-06, "loss": 0.0214, "step": 176085 }, { "epoch": 0.7347430965276097, "grad_norm": 0.7616450456770734, "learning_rate": 2.333446774291549e-06, "loss": 0.0171, "step": 176090 }, { "epoch": 0.7347639592426, "grad_norm": 0.4719963005850736, "learning_rate": 2.3334136461989046e-06, "loss": 0.0235, "step": 176095 }, { "epoch": 0.7347848219575903, "grad_norm": 0.6509971194098065, "learning_rate": 2.333380519517185e-06, "loss": 0.0219, "step": 176100 }, { "epoch": 0.7348056846725806, "grad_norm": 0.34905537702548173, "learning_rate": 2.3333473942462904e-06, "loss": 0.0197, "step": 176105 }, { "epoch": 0.7348265473875708, "grad_norm": 0.36366319325619456, "learning_rate": 2.33331427038612e-06, "loss": 0.0174, "step": 176110 }, { "epoch": 0.7348474101025612, "grad_norm": 0.4510081206079213, "learning_rate": 2.333281147936574e-06, "loss": 0.021, "step": 176115 }, { "epoch": 0.7348682728175514, "grad_norm": 1.1227961465888032, "learning_rate": 2.3332480268975524e-06, "loss": 0.0222, "step": 176120 }, { "epoch": 0.7348891355325416, "grad_norm": 0.7246497026530164, "learning_rate": 2.3332149072689553e-06, "loss": 0.0186, "step": 176125 }, { "epoch": 0.7349099982475319, "grad_norm": 0.44705747865111484, "learning_rate": 2.333181789050682e-06, "loss": 0.0234, "step": 176130 }, { "epoch": 0.7349308609625222, "grad_norm": 0.6708346551261044, "learning_rate": 2.3331486722426323e-06, "loss": 0.0273, "step": 176135 }, { "epoch": 0.7349517236775125, "grad_norm": 0.9081102743330637, "learning_rate": 2.3331155568447077e-06, "loss": 0.0262, "step": 176140 }, { "epoch": 0.7349725863925027, "grad_norm": 0.5910518376133128, "learning_rate": 2.3330824428568062e-06, "loss": 0.0215, "step": 176145 }, { "epoch": 0.7349934491074931, "grad_norm": 0.7869398275306124, "learning_rate": 2.3330493302788284e-06, "loss": 0.0299, "step": 176150 }, { "epoch": 0.7350143118224833, "grad_norm": 0.6251804055942769, "learning_rate": 2.3330162191106755e-06, "loss": 0.0221, "step": 176155 }, { "epoch": 0.7350351745374736, "grad_norm": 0.395505983772786, "learning_rate": 2.332983109352245e-06, "loss": 0.0172, "step": 176160 }, { "epoch": 0.7350560372524639, "grad_norm": 1.4716764055358718, "learning_rate": 2.332950001003439e-06, "loss": 0.0246, "step": 176165 }, { "epoch": 0.7350768999674542, "grad_norm": 0.4945521047049202, "learning_rate": 2.332916894064156e-06, "loss": 0.0206, "step": 176170 }, { "epoch": 0.7350977626824444, "grad_norm": 0.6179452318356136, "learning_rate": 2.332883788534297e-06, "loss": 0.0202, "step": 176175 }, { "epoch": 0.7351186253974347, "grad_norm": 1.2267560338768388, "learning_rate": 2.332850684413762e-06, "loss": 0.0232, "step": 176180 }, { "epoch": 0.735139488112425, "grad_norm": 0.6407395815040656, "learning_rate": 2.33281758170245e-06, "loss": 0.0171, "step": 176185 }, { "epoch": 0.7351603508274153, "grad_norm": 1.1744477340472153, "learning_rate": 2.3327844804002624e-06, "loss": 0.0277, "step": 176190 }, { "epoch": 0.7351812135424055, "grad_norm": 0.45916493916924667, "learning_rate": 2.3327513805070985e-06, "loss": 0.0191, "step": 176195 }, { "epoch": 0.7352020762573959, "grad_norm": 0.4729497967050445, "learning_rate": 2.332718282022858e-06, "loss": 0.019, "step": 176200 }, { "epoch": 0.7352229389723861, "grad_norm": 0.43985868723653254, "learning_rate": 2.3326851849474413e-06, "loss": 0.0139, "step": 176205 }, { "epoch": 0.7352438016873764, "grad_norm": 0.5405371493181762, "learning_rate": 2.332652089280749e-06, "loss": 0.0198, "step": 176210 }, { "epoch": 0.7352646644023667, "grad_norm": 0.5370324840999109, "learning_rate": 2.33261899502268e-06, "loss": 0.0169, "step": 176215 }, { "epoch": 0.735285527117357, "grad_norm": 0.25442538965531286, "learning_rate": 2.3325859021731355e-06, "loss": 0.0195, "step": 176220 }, { "epoch": 0.7353063898323472, "grad_norm": 0.1951397830817397, "learning_rate": 2.332552810732015e-06, "loss": 0.017, "step": 176225 }, { "epoch": 0.7353272525473376, "grad_norm": 0.28492962059030746, "learning_rate": 2.3325197206992186e-06, "loss": 0.0223, "step": 176230 }, { "epoch": 0.7353481152623278, "grad_norm": 0.8545008163462654, "learning_rate": 2.332486632074647e-06, "loss": 0.0205, "step": 176235 }, { "epoch": 0.735368977977318, "grad_norm": 0.9785133743399272, "learning_rate": 2.332453544858199e-06, "loss": 0.0238, "step": 176240 }, { "epoch": 0.7353898406923083, "grad_norm": 0.4889025500711935, "learning_rate": 2.3324204590497764e-06, "loss": 0.0199, "step": 176245 }, { "epoch": 0.7354107034072986, "grad_norm": 2.2404741685222205, "learning_rate": 2.3323873746492775e-06, "loss": 0.0273, "step": 176250 }, { "epoch": 0.7354315661222889, "grad_norm": 0.4041435286920329, "learning_rate": 2.332354291656604e-06, "loss": 0.0167, "step": 176255 }, { "epoch": 0.7354524288372791, "grad_norm": 0.4880194546354613, "learning_rate": 2.332321210071656e-06, "loss": 0.0234, "step": 176260 }, { "epoch": 0.7354732915522695, "grad_norm": 1.3057693328531195, "learning_rate": 2.332288129894333e-06, "loss": 0.0271, "step": 176265 }, { "epoch": 0.7354941542672597, "grad_norm": 1.0704368346205997, "learning_rate": 2.332255051124535e-06, "loss": 0.0219, "step": 176270 }, { "epoch": 0.73551501698225, "grad_norm": 1.1692198366489226, "learning_rate": 2.332221973762163e-06, "loss": 0.0205, "step": 176275 }, { "epoch": 0.7355358796972403, "grad_norm": 0.7279787229261395, "learning_rate": 2.3321888978071163e-06, "loss": 0.0213, "step": 176280 }, { "epoch": 0.7355567424122306, "grad_norm": 3.042817268872182, "learning_rate": 2.3321558232592957e-06, "loss": 0.0186, "step": 176285 }, { "epoch": 0.7355776051272208, "grad_norm": 0.8839557608300226, "learning_rate": 2.332122750118601e-06, "loss": 0.0193, "step": 176290 }, { "epoch": 0.7355984678422112, "grad_norm": 0.6404408471772137, "learning_rate": 2.332089678384934e-06, "loss": 0.0246, "step": 176295 }, { "epoch": 0.7356193305572014, "grad_norm": 0.27296698909661243, "learning_rate": 2.3320566080581922e-06, "loss": 0.0182, "step": 176300 }, { "epoch": 0.7356401932721917, "grad_norm": 0.5074721317845886, "learning_rate": 2.3320235391382775e-06, "loss": 0.0214, "step": 176305 }, { "epoch": 0.7356610559871819, "grad_norm": 0.6836546467913559, "learning_rate": 2.3319904716250905e-06, "loss": 0.0252, "step": 176310 }, { "epoch": 0.7356819187021723, "grad_norm": 0.34064556450873656, "learning_rate": 2.3319574055185305e-06, "loss": 0.0189, "step": 176315 }, { "epoch": 0.7357027814171625, "grad_norm": 0.8888336154113723, "learning_rate": 2.3319243408184984e-06, "loss": 0.0268, "step": 176320 }, { "epoch": 0.7357236441321527, "grad_norm": 0.4391377598102861, "learning_rate": 2.331891277524894e-06, "loss": 0.0264, "step": 176325 }, { "epoch": 0.7357445068471431, "grad_norm": 0.7048969767879628, "learning_rate": 2.3318582156376187e-06, "loss": 0.0206, "step": 176330 }, { "epoch": 0.7357653695621333, "grad_norm": 0.31584754531350234, "learning_rate": 2.3318251551565716e-06, "loss": 0.0197, "step": 176335 }, { "epoch": 0.7357862322771236, "grad_norm": 0.3816990397391117, "learning_rate": 2.331792096081653e-06, "loss": 0.0176, "step": 176340 }, { "epoch": 0.7358070949921139, "grad_norm": 0.604644329287863, "learning_rate": 2.331759038412764e-06, "loss": 0.0173, "step": 176345 }, { "epoch": 0.7358279577071042, "grad_norm": 0.71093191343212, "learning_rate": 2.3317259821498046e-06, "loss": 0.0234, "step": 176350 }, { "epoch": 0.7358488204220944, "grad_norm": 0.8497206807652455, "learning_rate": 2.3316929272926744e-06, "loss": 0.0257, "step": 176355 }, { "epoch": 0.7358696831370847, "grad_norm": 0.921589281243351, "learning_rate": 2.3316598738412753e-06, "loss": 0.0182, "step": 176360 }, { "epoch": 0.735890545852075, "grad_norm": 0.7689881043739487, "learning_rate": 2.3316268217955067e-06, "loss": 0.0222, "step": 176365 }, { "epoch": 0.7359114085670653, "grad_norm": 0.6598965472831497, "learning_rate": 2.331593771155269e-06, "loss": 0.0209, "step": 176370 }, { "epoch": 0.7359322712820555, "grad_norm": 1.8926034890782204, "learning_rate": 2.3315607219204626e-06, "loss": 0.0435, "step": 176375 }, { "epoch": 0.7359531339970459, "grad_norm": 0.5927529517768592, "learning_rate": 2.331527674090988e-06, "loss": 0.0153, "step": 176380 }, { "epoch": 0.7359739967120361, "grad_norm": 0.44724156374211493, "learning_rate": 2.331494627666746e-06, "loss": 0.022, "step": 176385 }, { "epoch": 0.7359948594270264, "grad_norm": 0.7338938252300402, "learning_rate": 2.3314615826476365e-06, "loss": 0.0174, "step": 176390 }, { "epoch": 0.7360157221420167, "grad_norm": 0.597076724282327, "learning_rate": 2.3314285390335596e-06, "loss": 0.02, "step": 176395 }, { "epoch": 0.736036584857007, "grad_norm": 0.5120777871558527, "learning_rate": 2.3313954968244163e-06, "loss": 0.0158, "step": 176400 }, { "epoch": 0.7360574475719972, "grad_norm": 1.2707277915924071, "learning_rate": 2.3313624560201073e-06, "loss": 0.0344, "step": 176405 }, { "epoch": 0.7360783102869876, "grad_norm": 0.7687503810575372, "learning_rate": 2.331329416620532e-06, "loss": 0.0216, "step": 176410 }, { "epoch": 0.7360991730019778, "grad_norm": 0.3594727129079204, "learning_rate": 2.3312963786255925e-06, "loss": 0.0212, "step": 176415 }, { "epoch": 0.736120035716968, "grad_norm": 0.445853423974099, "learning_rate": 2.331263342035188e-06, "loss": 0.0256, "step": 176420 }, { "epoch": 0.7361408984319583, "grad_norm": 0.6064225221445929, "learning_rate": 2.331230306849219e-06, "loss": 0.0187, "step": 176425 }, { "epoch": 0.7361617611469486, "grad_norm": 0.7792474013777152, "learning_rate": 2.331197273067587e-06, "loss": 0.0157, "step": 176430 }, { "epoch": 0.7361826238619389, "grad_norm": 0.6564309737436704, "learning_rate": 2.331164240690191e-06, "loss": 0.0277, "step": 176435 }, { "epoch": 0.7362034865769291, "grad_norm": 0.618080553093713, "learning_rate": 2.331131209716932e-06, "loss": 0.0206, "step": 176440 }, { "epoch": 0.7362243492919195, "grad_norm": 0.9425638594418692, "learning_rate": 2.331098180147712e-06, "loss": 0.0195, "step": 176445 }, { "epoch": 0.7362452120069097, "grad_norm": 1.123249188741396, "learning_rate": 2.3310651519824302e-06, "loss": 0.0233, "step": 176450 }, { "epoch": 0.7362660747219, "grad_norm": 0.3923741923442035, "learning_rate": 2.331032125220987e-06, "loss": 0.0164, "step": 176455 }, { "epoch": 0.7362869374368903, "grad_norm": 0.5136794622425525, "learning_rate": 2.3309990998632833e-06, "loss": 0.0176, "step": 176460 }, { "epoch": 0.7363078001518806, "grad_norm": 0.4885905956468183, "learning_rate": 2.3309660759092192e-06, "loss": 0.024, "step": 176465 }, { "epoch": 0.7363286628668708, "grad_norm": 0.5892201654938442, "learning_rate": 2.330933053358696e-06, "loss": 0.0145, "step": 176470 }, { "epoch": 0.7363495255818612, "grad_norm": 0.968662080745785, "learning_rate": 2.3309000322116144e-06, "loss": 0.0222, "step": 176475 }, { "epoch": 0.7363703882968514, "grad_norm": 0.6120204830688099, "learning_rate": 2.330867012467874e-06, "loss": 0.0211, "step": 176480 }, { "epoch": 0.7363912510118417, "grad_norm": 1.295756277919035, "learning_rate": 2.3308339941273764e-06, "loss": 0.0237, "step": 176485 }, { "epoch": 0.7364121137268319, "grad_norm": 0.8128955062885456, "learning_rate": 2.3308009771900215e-06, "loss": 0.0233, "step": 176490 }, { "epoch": 0.7364329764418223, "grad_norm": 0.6097578840434968, "learning_rate": 2.3307679616557106e-06, "loss": 0.0226, "step": 176495 }, { "epoch": 0.7364538391568125, "grad_norm": 0.7355859501210125, "learning_rate": 2.3307349475243445e-06, "loss": 0.0301, "step": 176500 }, { "epoch": 0.7364747018718028, "grad_norm": 0.8160135828634341, "learning_rate": 2.3307019347958226e-06, "loss": 0.024, "step": 176505 }, { "epoch": 0.7364955645867931, "grad_norm": 0.7883863840100513, "learning_rate": 2.330668923470046e-06, "loss": 0.0192, "step": 176510 }, { "epoch": 0.7365164273017833, "grad_norm": 0.3083050405987058, "learning_rate": 2.330635913546916e-06, "loss": 0.019, "step": 176515 }, { "epoch": 0.7365372900167736, "grad_norm": 0.37622676416613116, "learning_rate": 2.330602905026333e-06, "loss": 0.0171, "step": 176520 }, { "epoch": 0.736558152731764, "grad_norm": 0.6972242535918314, "learning_rate": 2.3305698979081973e-06, "loss": 0.0189, "step": 176525 }, { "epoch": 0.7365790154467542, "grad_norm": 0.42030574307051094, "learning_rate": 2.3305368921924103e-06, "loss": 0.0173, "step": 176530 }, { "epoch": 0.7365998781617444, "grad_norm": 0.8394805701438928, "learning_rate": 2.3305038878788723e-06, "loss": 0.0269, "step": 176535 }, { "epoch": 0.7366207408767347, "grad_norm": 0.3610689414653724, "learning_rate": 2.330470884967483e-06, "loss": 0.0149, "step": 176540 }, { "epoch": 0.736641603591725, "grad_norm": 0.6213769974242614, "learning_rate": 2.330437883458145e-06, "loss": 0.0211, "step": 176545 }, { "epoch": 0.7366624663067153, "grad_norm": 0.4288687078723957, "learning_rate": 2.3304048833507582e-06, "loss": 0.0187, "step": 176550 }, { "epoch": 0.7366833290217055, "grad_norm": 1.5684920642119562, "learning_rate": 2.330371884645223e-06, "loss": 0.0283, "step": 176555 }, { "epoch": 0.7367041917366959, "grad_norm": 0.3956350860974797, "learning_rate": 2.3303388873414403e-06, "loss": 0.0193, "step": 176560 }, { "epoch": 0.7367250544516861, "grad_norm": 0.5320538477842838, "learning_rate": 2.3303058914393113e-06, "loss": 0.0151, "step": 176565 }, { "epoch": 0.7367459171666764, "grad_norm": 0.7913657450802074, "learning_rate": 2.3302728969387364e-06, "loss": 0.0208, "step": 176570 }, { "epoch": 0.7367667798816667, "grad_norm": 0.6347556365447455, "learning_rate": 2.3302399038396165e-06, "loss": 0.0227, "step": 176575 }, { "epoch": 0.736787642596657, "grad_norm": 0.2830417758503685, "learning_rate": 2.330206912141852e-06, "loss": 0.018, "step": 176580 }, { "epoch": 0.7368085053116472, "grad_norm": 0.8759369985535197, "learning_rate": 2.3301739218453447e-06, "loss": 0.0217, "step": 176585 }, { "epoch": 0.7368293680266376, "grad_norm": 0.42872517429671164, "learning_rate": 2.3301409329499942e-06, "loss": 0.0222, "step": 176590 }, { "epoch": 0.7368502307416278, "grad_norm": 0.45276021212750234, "learning_rate": 2.3301079454557023e-06, "loss": 0.0163, "step": 176595 }, { "epoch": 0.736871093456618, "grad_norm": 0.49535088617294276, "learning_rate": 2.3300749593623686e-06, "loss": 0.0215, "step": 176600 }, { "epoch": 0.7368919561716083, "grad_norm": 0.7523364147316579, "learning_rate": 2.3300419746698956e-06, "loss": 0.0242, "step": 176605 }, { "epoch": 0.7369128188865987, "grad_norm": 0.3700649427393384, "learning_rate": 2.3300089913781827e-06, "loss": 0.0208, "step": 176610 }, { "epoch": 0.7369336816015889, "grad_norm": 0.6269991430802064, "learning_rate": 2.3299760094871314e-06, "loss": 0.027, "step": 176615 }, { "epoch": 0.7369545443165791, "grad_norm": 0.3250576309904534, "learning_rate": 2.329943028996643e-06, "loss": 0.0214, "step": 176620 }, { "epoch": 0.7369754070315695, "grad_norm": 0.6888620051424205, "learning_rate": 2.3299100499066177e-06, "loss": 0.0199, "step": 176625 }, { "epoch": 0.7369962697465597, "grad_norm": 0.5307173074172145, "learning_rate": 2.3298770722169562e-06, "loss": 0.0205, "step": 176630 }, { "epoch": 0.73701713246155, "grad_norm": 0.4619868773822667, "learning_rate": 2.3298440959275605e-06, "loss": 0.0156, "step": 176635 }, { "epoch": 0.7370379951765403, "grad_norm": 0.8164477182497452, "learning_rate": 2.3298111210383305e-06, "loss": 0.0175, "step": 176640 }, { "epoch": 0.7370588578915306, "grad_norm": 1.044578425684979, "learning_rate": 2.329778147549167e-06, "loss": 0.024, "step": 176645 }, { "epoch": 0.7370797206065208, "grad_norm": 0.6166209124433686, "learning_rate": 2.329745175459971e-06, "loss": 0.0228, "step": 176650 }, { "epoch": 0.7371005833215112, "grad_norm": 0.7615248091521609, "learning_rate": 2.329712204770645e-06, "loss": 0.0227, "step": 176655 }, { "epoch": 0.7371214460365014, "grad_norm": 0.9291628100220575, "learning_rate": 2.329679235481088e-06, "loss": 0.0268, "step": 176660 }, { "epoch": 0.7371423087514917, "grad_norm": 0.7788793031719226, "learning_rate": 2.329646267591202e-06, "loss": 0.0228, "step": 176665 }, { "epoch": 0.7371631714664819, "grad_norm": 0.8453096705113049, "learning_rate": 2.329613301100887e-06, "loss": 0.019, "step": 176670 }, { "epoch": 0.7371840341814723, "grad_norm": 0.573433623730333, "learning_rate": 2.3295803360100453e-06, "loss": 0.0177, "step": 176675 }, { "epoch": 0.7372048968964625, "grad_norm": 0.9561961878006643, "learning_rate": 2.329547372318577e-06, "loss": 0.0203, "step": 176680 }, { "epoch": 0.7372257596114528, "grad_norm": 0.3074875759288548, "learning_rate": 2.3295144100263832e-06, "loss": 0.0143, "step": 176685 }, { "epoch": 0.7372466223264431, "grad_norm": 0.6875922596193974, "learning_rate": 2.329481449133365e-06, "loss": 0.0213, "step": 176690 }, { "epoch": 0.7372674850414334, "grad_norm": 0.27294184281834716, "learning_rate": 2.3294484896394236e-06, "loss": 0.0187, "step": 176695 }, { "epoch": 0.7372883477564236, "grad_norm": 0.7954042082083896, "learning_rate": 2.32941553154446e-06, "loss": 0.0209, "step": 176700 }, { "epoch": 0.737309210471414, "grad_norm": 0.4541066226950567, "learning_rate": 2.329382574848375e-06, "loss": 0.0192, "step": 176705 }, { "epoch": 0.7373300731864042, "grad_norm": 0.5710206251325183, "learning_rate": 2.3293496195510694e-06, "loss": 0.0317, "step": 176710 }, { "epoch": 0.7373509359013944, "grad_norm": 0.8223011192138969, "learning_rate": 2.329316665652445e-06, "loss": 0.0254, "step": 176715 }, { "epoch": 0.7373717986163847, "grad_norm": 0.7420142012308529, "learning_rate": 2.329283713152403e-06, "loss": 0.0229, "step": 176720 }, { "epoch": 0.737392661331375, "grad_norm": 0.7146277340586864, "learning_rate": 2.3292507620508425e-06, "loss": 0.0259, "step": 176725 }, { "epoch": 0.7374135240463653, "grad_norm": 0.8612406225525144, "learning_rate": 2.3292178123476667e-06, "loss": 0.0207, "step": 176730 }, { "epoch": 0.7374343867613555, "grad_norm": 0.9903406819154629, "learning_rate": 2.3291848640427762e-06, "loss": 0.0241, "step": 176735 }, { "epoch": 0.7374552494763459, "grad_norm": 0.8484738157182434, "learning_rate": 2.329151917136072e-06, "loss": 0.0201, "step": 176740 }, { "epoch": 0.7374761121913361, "grad_norm": 0.8444406787467216, "learning_rate": 2.3291189716274542e-06, "loss": 0.0194, "step": 176745 }, { "epoch": 0.7374969749063264, "grad_norm": 0.4507234237646674, "learning_rate": 2.3290860275168257e-06, "loss": 0.0238, "step": 176750 }, { "epoch": 0.7375178376213167, "grad_norm": 0.6022661255975162, "learning_rate": 2.329053084804087e-06, "loss": 0.0159, "step": 176755 }, { "epoch": 0.737538700336307, "grad_norm": 0.5276847692867248, "learning_rate": 2.3290201434891386e-06, "loss": 0.0183, "step": 176760 }, { "epoch": 0.7375595630512972, "grad_norm": 0.38580395166704545, "learning_rate": 2.3289872035718823e-06, "loss": 0.0188, "step": 176765 }, { "epoch": 0.7375804257662876, "grad_norm": 0.5653539281950554, "learning_rate": 2.328954265052219e-06, "loss": 0.0251, "step": 176770 }, { "epoch": 0.7376012884812778, "grad_norm": 0.7645372554125563, "learning_rate": 2.3289213279300496e-06, "loss": 0.0212, "step": 176775 }, { "epoch": 0.7376221511962681, "grad_norm": 0.3355522865397406, "learning_rate": 2.328888392205276e-06, "loss": 0.0226, "step": 176780 }, { "epoch": 0.7376430139112583, "grad_norm": 0.30761151867753295, "learning_rate": 2.328855457877799e-06, "loss": 0.0203, "step": 176785 }, { "epoch": 0.7376638766262487, "grad_norm": 3.169347948233191, "learning_rate": 2.32882252494752e-06, "loss": 0.0214, "step": 176790 }, { "epoch": 0.7376847393412389, "grad_norm": 1.0295852374541865, "learning_rate": 2.3287895934143396e-06, "loss": 0.0278, "step": 176795 }, { "epoch": 0.7377056020562291, "grad_norm": 1.0278672285984496, "learning_rate": 2.3287566632781593e-06, "loss": 0.0174, "step": 176800 }, { "epoch": 0.7377264647712195, "grad_norm": 0.6600543973512372, "learning_rate": 2.328723734538881e-06, "loss": 0.0203, "step": 176805 }, { "epoch": 0.7377473274862097, "grad_norm": 1.154052904149793, "learning_rate": 2.3286908071964045e-06, "loss": 0.0192, "step": 176810 }, { "epoch": 0.7377681902012, "grad_norm": 0.904717904098487, "learning_rate": 2.3286578812506326e-06, "loss": 0.026, "step": 176815 }, { "epoch": 0.7377890529161903, "grad_norm": 0.8498261467635233, "learning_rate": 2.3286249567014665e-06, "loss": 0.019, "step": 176820 }, { "epoch": 0.7378099156311806, "grad_norm": 0.6410074486214534, "learning_rate": 2.3285920335488064e-06, "loss": 0.0203, "step": 176825 }, { "epoch": 0.7378307783461708, "grad_norm": 0.8638130021621863, "learning_rate": 2.328559111792554e-06, "loss": 0.0225, "step": 176830 }, { "epoch": 0.7378516410611612, "grad_norm": 0.5163628601147645, "learning_rate": 2.3285261914326103e-06, "loss": 0.0181, "step": 176835 }, { "epoch": 0.7378725037761514, "grad_norm": 0.41979045378664565, "learning_rate": 2.328493272468877e-06, "loss": 0.0201, "step": 176840 }, { "epoch": 0.7378933664911417, "grad_norm": 0.6915841124280742, "learning_rate": 2.328460354901256e-06, "loss": 0.0236, "step": 176845 }, { "epoch": 0.7379142292061319, "grad_norm": 0.7369237210572689, "learning_rate": 2.3284274387296475e-06, "loss": 0.03, "step": 176850 }, { "epoch": 0.7379350919211223, "grad_norm": 0.49050714063636286, "learning_rate": 2.3283945239539537e-06, "loss": 0.0175, "step": 176855 }, { "epoch": 0.7379559546361125, "grad_norm": 1.0500270251483779, "learning_rate": 2.3283616105740756e-06, "loss": 0.0253, "step": 176860 }, { "epoch": 0.7379768173511028, "grad_norm": 0.6463136607826078, "learning_rate": 2.328328698589914e-06, "loss": 0.0195, "step": 176865 }, { "epoch": 0.7379976800660931, "grad_norm": 0.6295012052715144, "learning_rate": 2.328295788001371e-06, "loss": 0.0209, "step": 176870 }, { "epoch": 0.7380185427810834, "grad_norm": 0.7701855533470784, "learning_rate": 2.328262878808348e-06, "loss": 0.0204, "step": 176875 }, { "epoch": 0.7380394054960736, "grad_norm": 0.5127110012568635, "learning_rate": 2.3282299710107463e-06, "loss": 0.0249, "step": 176880 }, { "epoch": 0.738060268211064, "grad_norm": 0.4738684287913602, "learning_rate": 2.328197064608466e-06, "loss": 0.0168, "step": 176885 }, { "epoch": 0.7380811309260542, "grad_norm": 0.5604262126343389, "learning_rate": 2.3281641596014104e-06, "loss": 0.0231, "step": 176890 }, { "epoch": 0.7381019936410445, "grad_norm": 0.7669001864643256, "learning_rate": 2.32813125598948e-06, "loss": 0.018, "step": 176895 }, { "epoch": 0.7381228563560347, "grad_norm": 1.0521668800135775, "learning_rate": 2.3280983537725763e-06, "loss": 0.018, "step": 176900 }, { "epoch": 0.738143719071025, "grad_norm": 1.042468849189672, "learning_rate": 2.3280654529506008e-06, "loss": 0.0184, "step": 176905 }, { "epoch": 0.7381645817860153, "grad_norm": 0.7296320494729757, "learning_rate": 2.328032553523455e-06, "loss": 0.0224, "step": 176910 }, { "epoch": 0.7381854445010055, "grad_norm": 1.7646272354939758, "learning_rate": 2.3279996554910404e-06, "loss": 0.0283, "step": 176915 }, { "epoch": 0.7382063072159959, "grad_norm": 0.8365582887869232, "learning_rate": 2.3279667588532577e-06, "loss": 0.0269, "step": 176920 }, { "epoch": 0.7382271699309861, "grad_norm": 0.30443990812544974, "learning_rate": 2.3279338636100097e-06, "loss": 0.0207, "step": 176925 }, { "epoch": 0.7382480326459764, "grad_norm": 0.8205677359464062, "learning_rate": 2.327900969761197e-06, "loss": 0.0174, "step": 176930 }, { "epoch": 0.7382688953609667, "grad_norm": 1.1312694285800977, "learning_rate": 2.3278680773067207e-06, "loss": 0.0175, "step": 176935 }, { "epoch": 0.738289758075957, "grad_norm": 0.3802712250928142, "learning_rate": 2.327835186246483e-06, "loss": 0.0158, "step": 176940 }, { "epoch": 0.7383106207909472, "grad_norm": 1.0539789544910423, "learning_rate": 2.3278022965803855e-06, "loss": 0.016, "step": 176945 }, { "epoch": 0.7383314835059376, "grad_norm": 0.4844498957539998, "learning_rate": 2.3277694083083293e-06, "loss": 0.0247, "step": 176950 }, { "epoch": 0.7383523462209278, "grad_norm": 0.2631056040527564, "learning_rate": 2.3277365214302156e-06, "loss": 0.0229, "step": 176955 }, { "epoch": 0.7383732089359181, "grad_norm": 0.29043439165518914, "learning_rate": 2.327703635945947e-06, "loss": 0.0183, "step": 176960 }, { "epoch": 0.7383940716509083, "grad_norm": 0.6279924890079197, "learning_rate": 2.3276707518554246e-06, "loss": 0.017, "step": 176965 }, { "epoch": 0.7384149343658987, "grad_norm": 0.343213752373895, "learning_rate": 2.3276378691585493e-06, "loss": 0.016, "step": 176970 }, { "epoch": 0.7384357970808889, "grad_norm": 0.73926197485924, "learning_rate": 2.3276049878552233e-06, "loss": 0.0268, "step": 176975 }, { "epoch": 0.7384566597958792, "grad_norm": 2.585746196660497, "learning_rate": 2.327572107945348e-06, "loss": 0.0282, "step": 176980 }, { "epoch": 0.7384775225108695, "grad_norm": 0.4249384100315733, "learning_rate": 2.3275392294288253e-06, "loss": 0.0145, "step": 176985 }, { "epoch": 0.7384983852258598, "grad_norm": 0.4633005272134255, "learning_rate": 2.327506352305556e-06, "loss": 0.0172, "step": 176990 }, { "epoch": 0.73851924794085, "grad_norm": 0.4549927907285284, "learning_rate": 2.3274734765754425e-06, "loss": 0.0173, "step": 176995 }, { "epoch": 0.7385401106558404, "grad_norm": 0.8200635097217941, "learning_rate": 2.327440602238386e-06, "loss": 0.019, "step": 177000 }, { "epoch": 0.7385609733708306, "grad_norm": 0.35358364239473183, "learning_rate": 2.327407729294288e-06, "loss": 0.019, "step": 177005 }, { "epoch": 0.7385818360858208, "grad_norm": 0.40775801530192346, "learning_rate": 2.327374857743051e-06, "loss": 0.0208, "step": 177010 }, { "epoch": 0.7386026988008112, "grad_norm": 0.9168303374101575, "learning_rate": 2.3273419875845756e-06, "loss": 0.0232, "step": 177015 }, { "epoch": 0.7386235615158014, "grad_norm": 0.8885668826772021, "learning_rate": 2.3273091188187636e-06, "loss": 0.0246, "step": 177020 }, { "epoch": 0.7386444242307917, "grad_norm": 0.7595290155426208, "learning_rate": 2.327276251445518e-06, "loss": 0.0258, "step": 177025 }, { "epoch": 0.7386652869457819, "grad_norm": 0.8068809655952763, "learning_rate": 2.3272433854647383e-06, "loss": 0.0264, "step": 177030 }, { "epoch": 0.7386861496607723, "grad_norm": 0.8170073129906062, "learning_rate": 2.3272105208763276e-06, "loss": 0.0175, "step": 177035 }, { "epoch": 0.7387070123757625, "grad_norm": 0.7127612934137822, "learning_rate": 2.3271776576801874e-06, "loss": 0.0152, "step": 177040 }, { "epoch": 0.7387278750907528, "grad_norm": 0.6642156736187002, "learning_rate": 2.327144795876219e-06, "loss": 0.018, "step": 177045 }, { "epoch": 0.7387487378057431, "grad_norm": 1.2687912395825636, "learning_rate": 2.327111935464325e-06, "loss": 0.0273, "step": 177050 }, { "epoch": 0.7387696005207334, "grad_norm": 0.3576588984484581, "learning_rate": 2.327079076444406e-06, "loss": 0.016, "step": 177055 }, { "epoch": 0.7387904632357236, "grad_norm": 0.6083230265872991, "learning_rate": 2.327046218816364e-06, "loss": 0.0214, "step": 177060 }, { "epoch": 0.738811325950714, "grad_norm": 0.8659999392476416, "learning_rate": 2.3270133625801014e-06, "loss": 0.0267, "step": 177065 }, { "epoch": 0.7388321886657042, "grad_norm": 0.539783577257166, "learning_rate": 2.3269805077355194e-06, "loss": 0.0195, "step": 177070 }, { "epoch": 0.7388530513806945, "grad_norm": 1.0348155261530971, "learning_rate": 2.32694765428252e-06, "loss": 0.0302, "step": 177075 }, { "epoch": 0.7388739140956847, "grad_norm": 1.0833517359527893, "learning_rate": 2.3269148022210047e-06, "loss": 0.0357, "step": 177080 }, { "epoch": 0.7388947768106751, "grad_norm": 0.7206618554601641, "learning_rate": 2.3268819515508755e-06, "loss": 0.0217, "step": 177085 }, { "epoch": 0.7389156395256653, "grad_norm": 0.5398500484679349, "learning_rate": 2.326849102272034e-06, "loss": 0.0223, "step": 177090 }, { "epoch": 0.7389365022406555, "grad_norm": 0.6310148157709338, "learning_rate": 2.3268162543843816e-06, "loss": 0.0217, "step": 177095 }, { "epoch": 0.7389573649556459, "grad_norm": 0.8083600635083461, "learning_rate": 2.3267834078878213e-06, "loss": 0.0209, "step": 177100 }, { "epoch": 0.7389782276706361, "grad_norm": 0.8299674265659485, "learning_rate": 2.326750562782254e-06, "loss": 0.0168, "step": 177105 }, { "epoch": 0.7389990903856264, "grad_norm": 0.9783972022720661, "learning_rate": 2.3267177190675815e-06, "loss": 0.0221, "step": 177110 }, { "epoch": 0.7390199531006167, "grad_norm": 0.5251486596154055, "learning_rate": 2.3266848767437062e-06, "loss": 0.0199, "step": 177115 }, { "epoch": 0.739040815815607, "grad_norm": 0.8150698664496068, "learning_rate": 2.32665203581053e-06, "loss": 0.0284, "step": 177120 }, { "epoch": 0.7390616785305972, "grad_norm": 1.2492751922776137, "learning_rate": 2.3266191962679534e-06, "loss": 0.0288, "step": 177125 }, { "epoch": 0.7390825412455876, "grad_norm": 0.3896305243056763, "learning_rate": 2.3265863581158798e-06, "loss": 0.0217, "step": 177130 }, { "epoch": 0.7391034039605778, "grad_norm": 0.5984899052301714, "learning_rate": 2.3265535213542103e-06, "loss": 0.0156, "step": 177135 }, { "epoch": 0.7391242666755681, "grad_norm": 0.4942386311655123, "learning_rate": 2.3265206859828475e-06, "loss": 0.0161, "step": 177140 }, { "epoch": 0.7391451293905583, "grad_norm": 0.3872669987024212, "learning_rate": 2.3264878520016924e-06, "loss": 0.0171, "step": 177145 }, { "epoch": 0.7391659921055487, "grad_norm": 0.8615515398346835, "learning_rate": 2.3264550194106476e-06, "loss": 0.0208, "step": 177150 }, { "epoch": 0.7391868548205389, "grad_norm": 0.8982940254317741, "learning_rate": 2.3264221882096143e-06, "loss": 0.0239, "step": 177155 }, { "epoch": 0.7392077175355292, "grad_norm": 0.9648332691995567, "learning_rate": 2.326389358398495e-06, "loss": 0.0262, "step": 177160 }, { "epoch": 0.7392285802505195, "grad_norm": 0.7259740137959675, "learning_rate": 2.3263565299771915e-06, "loss": 0.0212, "step": 177165 }, { "epoch": 0.7392494429655098, "grad_norm": 0.49793443329558046, "learning_rate": 2.326323702945606e-06, "loss": 0.0163, "step": 177170 }, { "epoch": 0.7392703056805, "grad_norm": 0.7730653473880105, "learning_rate": 2.32629087730364e-06, "loss": 0.0206, "step": 177175 }, { "epoch": 0.7392911683954904, "grad_norm": 0.8280559665269059, "learning_rate": 2.3262580530511954e-06, "loss": 0.0269, "step": 177180 }, { "epoch": 0.7393120311104806, "grad_norm": 0.8063868449825727, "learning_rate": 2.326225230188175e-06, "loss": 0.0177, "step": 177185 }, { "epoch": 0.7393328938254708, "grad_norm": 0.5662603438171562, "learning_rate": 2.326192408714479e-06, "loss": 0.0153, "step": 177190 }, { "epoch": 0.7393537565404612, "grad_norm": 0.4960217385862757, "learning_rate": 2.326159588630011e-06, "loss": 0.0177, "step": 177195 }, { "epoch": 0.7393746192554514, "grad_norm": 0.31457046256336324, "learning_rate": 2.3261267699346734e-06, "loss": 0.0246, "step": 177200 }, { "epoch": 0.7393954819704417, "grad_norm": 0.5017592725749371, "learning_rate": 2.3260939526283665e-06, "loss": 0.0175, "step": 177205 }, { "epoch": 0.7394163446854319, "grad_norm": 0.4461249289428034, "learning_rate": 2.3260611367109932e-06, "loss": 0.0238, "step": 177210 }, { "epoch": 0.7394372074004223, "grad_norm": 1.2424342085152584, "learning_rate": 2.326028322182456e-06, "loss": 0.0274, "step": 177215 }, { "epoch": 0.7394580701154125, "grad_norm": 0.6813476586398097, "learning_rate": 2.325995509042656e-06, "loss": 0.0212, "step": 177220 }, { "epoch": 0.7394789328304028, "grad_norm": 0.7361751747488798, "learning_rate": 2.3259626972914966e-06, "loss": 0.0233, "step": 177225 }, { "epoch": 0.7394997955453931, "grad_norm": 0.860256037883558, "learning_rate": 2.3259298869288778e-06, "loss": 0.026, "step": 177230 }, { "epoch": 0.7395206582603834, "grad_norm": 1.2748092995171403, "learning_rate": 2.3258970779547035e-06, "loss": 0.0218, "step": 177235 }, { "epoch": 0.7395415209753736, "grad_norm": 0.6991424332825527, "learning_rate": 2.325864270368875e-06, "loss": 0.0153, "step": 177240 }, { "epoch": 0.739562383690364, "grad_norm": 0.744969466019983, "learning_rate": 2.3258314641712947e-06, "loss": 0.0348, "step": 177245 }, { "epoch": 0.7395832464053542, "grad_norm": 0.6861805495579457, "learning_rate": 2.325798659361864e-06, "loss": 0.0189, "step": 177250 }, { "epoch": 0.7396041091203445, "grad_norm": 1.067805541888784, "learning_rate": 2.325765855940486e-06, "loss": 0.0235, "step": 177255 }, { "epoch": 0.7396249718353347, "grad_norm": 0.8498273867289903, "learning_rate": 2.325733053907062e-06, "loss": 0.0166, "step": 177260 }, { "epoch": 0.7396458345503251, "grad_norm": 1.0708204824678063, "learning_rate": 2.3257002532614947e-06, "loss": 0.0212, "step": 177265 }, { "epoch": 0.7396666972653153, "grad_norm": 0.5800557918541562, "learning_rate": 2.3256674540036858e-06, "loss": 0.0172, "step": 177270 }, { "epoch": 0.7396875599803056, "grad_norm": 0.5266486010490529, "learning_rate": 2.325634656133538e-06, "loss": 0.0203, "step": 177275 }, { "epoch": 0.7397084226952959, "grad_norm": 0.8475319757753548, "learning_rate": 2.3256018596509527e-06, "loss": 0.0172, "step": 177280 }, { "epoch": 0.7397292854102862, "grad_norm": 0.617964945547639, "learning_rate": 2.3255690645558326e-06, "loss": 0.0201, "step": 177285 }, { "epoch": 0.7397501481252764, "grad_norm": 0.8138777492868381, "learning_rate": 2.3255362708480795e-06, "loss": 0.0205, "step": 177290 }, { "epoch": 0.7397710108402668, "grad_norm": 0.4269061888799507, "learning_rate": 2.3255034785275958e-06, "loss": 0.0149, "step": 177295 }, { "epoch": 0.739791873555257, "grad_norm": 0.6353187083631596, "learning_rate": 2.3254706875942835e-06, "loss": 0.0211, "step": 177300 }, { "epoch": 0.7398127362702472, "grad_norm": 0.7061411139232542, "learning_rate": 2.3254378980480457e-06, "loss": 0.0232, "step": 177305 }, { "epoch": 0.7398335989852376, "grad_norm": 0.5749543759865644, "learning_rate": 2.325405109888784e-06, "loss": 0.0184, "step": 177310 }, { "epoch": 0.7398544617002278, "grad_norm": 1.2433241052203536, "learning_rate": 2.3253723231163996e-06, "loss": 0.0284, "step": 177315 }, { "epoch": 0.7398753244152181, "grad_norm": 0.34005077739659445, "learning_rate": 2.3253395377307965e-06, "loss": 0.0233, "step": 177320 }, { "epoch": 0.7398961871302083, "grad_norm": 0.18504194495041662, "learning_rate": 2.3253067537318757e-06, "loss": 0.0234, "step": 177325 }, { "epoch": 0.7399170498451987, "grad_norm": 0.3570520221983708, "learning_rate": 2.3252739711195398e-06, "loss": 0.0173, "step": 177330 }, { "epoch": 0.7399379125601889, "grad_norm": 0.4761766977241784, "learning_rate": 2.3252411898936913e-06, "loss": 0.019, "step": 177335 }, { "epoch": 0.7399587752751792, "grad_norm": 0.7414531478339984, "learning_rate": 2.325208410054232e-06, "loss": 0.0259, "step": 177340 }, { "epoch": 0.7399796379901695, "grad_norm": 0.6655357181720531, "learning_rate": 2.325175631601065e-06, "loss": 0.0246, "step": 177345 }, { "epoch": 0.7400005007051598, "grad_norm": 0.4903043729614637, "learning_rate": 2.3251428545340918e-06, "loss": 0.0241, "step": 177350 }, { "epoch": 0.74002136342015, "grad_norm": 0.46907418396801187, "learning_rate": 2.325110078853215e-06, "loss": 0.0262, "step": 177355 }, { "epoch": 0.7400422261351404, "grad_norm": 0.8575330962778772, "learning_rate": 2.3250773045583373e-06, "loss": 0.0182, "step": 177360 }, { "epoch": 0.7400630888501306, "grad_norm": 0.5047239375718665, "learning_rate": 2.3250445316493596e-06, "loss": 0.0257, "step": 177365 }, { "epoch": 0.7400839515651209, "grad_norm": 0.4619945005462228, "learning_rate": 2.3250117601261864e-06, "loss": 0.0367, "step": 177370 }, { "epoch": 0.7401048142801112, "grad_norm": 0.3969716001692683, "learning_rate": 2.324978989988718e-06, "loss": 0.0162, "step": 177375 }, { "epoch": 0.7401256769951015, "grad_norm": 0.8600486334911446, "learning_rate": 2.3249462212368578e-06, "loss": 0.0218, "step": 177380 }, { "epoch": 0.7401465397100917, "grad_norm": 0.7326194115249457, "learning_rate": 2.324913453870508e-06, "loss": 0.0222, "step": 177385 }, { "epoch": 0.7401674024250819, "grad_norm": 0.4796305160433908, "learning_rate": 2.324880687889571e-06, "loss": 0.0127, "step": 177390 }, { "epoch": 0.7401882651400723, "grad_norm": 0.8865097013198632, "learning_rate": 2.3248479232939493e-06, "loss": 0.0231, "step": 177395 }, { "epoch": 0.7402091278550625, "grad_norm": 0.31913290220636065, "learning_rate": 2.3248151600835447e-06, "loss": 0.0171, "step": 177400 }, { "epoch": 0.7402299905700528, "grad_norm": 0.344061887655595, "learning_rate": 2.32478239825826e-06, "loss": 0.027, "step": 177405 }, { "epoch": 0.7402508532850431, "grad_norm": 0.8751140527778455, "learning_rate": 2.3247496378179974e-06, "loss": 0.0225, "step": 177410 }, { "epoch": 0.7402717160000334, "grad_norm": 0.8330531788338563, "learning_rate": 2.3247168787626604e-06, "loss": 0.0221, "step": 177415 }, { "epoch": 0.7402925787150236, "grad_norm": 0.944723874390084, "learning_rate": 2.32468412109215e-06, "loss": 0.0186, "step": 177420 }, { "epoch": 0.740313441430014, "grad_norm": 0.32610178589492256, "learning_rate": 2.324651364806369e-06, "loss": 0.0166, "step": 177425 }, { "epoch": 0.7403343041450042, "grad_norm": 0.5332074369918148, "learning_rate": 2.32461860990522e-06, "loss": 0.0212, "step": 177430 }, { "epoch": 0.7403551668599945, "grad_norm": 0.44195725378172335, "learning_rate": 2.324585856388606e-06, "loss": 0.0215, "step": 177435 }, { "epoch": 0.7403760295749847, "grad_norm": 0.5392246704819562, "learning_rate": 2.3245531042564285e-06, "loss": 0.0269, "step": 177440 }, { "epoch": 0.7403968922899751, "grad_norm": 0.6870388766926737, "learning_rate": 2.3245203535085907e-06, "loss": 0.0196, "step": 177445 }, { "epoch": 0.7404177550049653, "grad_norm": 0.4619482526913235, "learning_rate": 2.324487604144994e-06, "loss": 0.023, "step": 177450 }, { "epoch": 0.7404386177199556, "grad_norm": 0.38794864096720777, "learning_rate": 2.324454856165542e-06, "loss": 0.019, "step": 177455 }, { "epoch": 0.7404594804349459, "grad_norm": 0.4015834698169414, "learning_rate": 2.3244221095701376e-06, "loss": 0.0162, "step": 177460 }, { "epoch": 0.7404803431499362, "grad_norm": 0.7024065294172702, "learning_rate": 2.3243893643586826e-06, "loss": 0.0225, "step": 177465 }, { "epoch": 0.7405012058649264, "grad_norm": 0.42427870531253126, "learning_rate": 2.324356620531079e-06, "loss": 0.0236, "step": 177470 }, { "epoch": 0.7405220685799168, "grad_norm": 0.6381484875639082, "learning_rate": 2.32432387808723e-06, "loss": 0.0226, "step": 177475 }, { "epoch": 0.740542931294907, "grad_norm": 1.012832277466643, "learning_rate": 2.324291137027038e-06, "loss": 0.0338, "step": 177480 }, { "epoch": 0.7405637940098972, "grad_norm": 0.6935859187167404, "learning_rate": 2.3242583973504053e-06, "loss": 0.0174, "step": 177485 }, { "epoch": 0.7405846567248876, "grad_norm": 0.8506317628050504, "learning_rate": 2.324225659057235e-06, "loss": 0.0209, "step": 177490 }, { "epoch": 0.7406055194398778, "grad_norm": 0.8900679219169323, "learning_rate": 2.3241929221474287e-06, "loss": 0.0179, "step": 177495 }, { "epoch": 0.7406263821548681, "grad_norm": 0.13866496763807026, "learning_rate": 2.3241601866208903e-06, "loss": 0.0204, "step": 177500 }, { "epoch": 0.7406472448698583, "grad_norm": 0.766020639583782, "learning_rate": 2.3241274524775216e-06, "loss": 0.0218, "step": 177505 }, { "epoch": 0.7406681075848487, "grad_norm": 0.8623433215042016, "learning_rate": 2.3240947197172255e-06, "loss": 0.0214, "step": 177510 }, { "epoch": 0.7406889702998389, "grad_norm": 0.5081511837477499, "learning_rate": 2.3240619883399042e-06, "loss": 0.0198, "step": 177515 }, { "epoch": 0.7407098330148292, "grad_norm": 0.6433246988242746, "learning_rate": 2.32402925834546e-06, "loss": 0.0202, "step": 177520 }, { "epoch": 0.7407306957298195, "grad_norm": 0.3227884496058504, "learning_rate": 2.323996529733797e-06, "loss": 0.0179, "step": 177525 }, { "epoch": 0.7407515584448098, "grad_norm": 0.6558183123514545, "learning_rate": 2.323963802504817e-06, "loss": 0.0255, "step": 177530 }, { "epoch": 0.7407724211598, "grad_norm": 0.7364436616947734, "learning_rate": 2.3239310766584224e-06, "loss": 0.0216, "step": 177535 }, { "epoch": 0.7407932838747904, "grad_norm": 0.8538723153699564, "learning_rate": 2.3238983521945155e-06, "loss": 0.0201, "step": 177540 }, { "epoch": 0.7408141465897806, "grad_norm": 0.4705479860613657, "learning_rate": 2.3238656291129997e-06, "loss": 0.0145, "step": 177545 }, { "epoch": 0.7408350093047709, "grad_norm": 0.7385078197937435, "learning_rate": 2.323832907413778e-06, "loss": 0.0197, "step": 177550 }, { "epoch": 0.7408558720197612, "grad_norm": 0.7530992437678864, "learning_rate": 2.3238001870967515e-06, "loss": 0.0162, "step": 177555 }, { "epoch": 0.7408767347347515, "grad_norm": 0.496724564354071, "learning_rate": 2.323767468161825e-06, "loss": 0.0258, "step": 177560 }, { "epoch": 0.7408975974497417, "grad_norm": 0.535891111583104, "learning_rate": 2.3237347506088993e-06, "loss": 0.025, "step": 177565 }, { "epoch": 0.740918460164732, "grad_norm": 0.5485359702953961, "learning_rate": 2.323702034437878e-06, "loss": 0.0192, "step": 177570 }, { "epoch": 0.7409393228797223, "grad_norm": 0.5324430914703114, "learning_rate": 2.3236693196486643e-06, "loss": 0.0172, "step": 177575 }, { "epoch": 0.7409601855947126, "grad_norm": 0.5518256431220361, "learning_rate": 2.32363660624116e-06, "loss": 0.0248, "step": 177580 }, { "epoch": 0.7409810483097028, "grad_norm": 0.6435272923724341, "learning_rate": 2.323603894215269e-06, "loss": 0.0295, "step": 177585 }, { "epoch": 0.7410019110246931, "grad_norm": 0.36414295967983, "learning_rate": 2.323571183570893e-06, "loss": 0.0208, "step": 177590 }, { "epoch": 0.7410227737396834, "grad_norm": 0.6209226493665827, "learning_rate": 2.323538474307935e-06, "loss": 0.0164, "step": 177595 }, { "epoch": 0.7410436364546736, "grad_norm": 0.44759779662469007, "learning_rate": 2.323505766426297e-06, "loss": 0.0192, "step": 177600 }, { "epoch": 0.741064499169664, "grad_norm": 1.0559804776825716, "learning_rate": 2.3234730599258833e-06, "loss": 0.0284, "step": 177605 }, { "epoch": 0.7410853618846542, "grad_norm": 0.502111164275978, "learning_rate": 2.3234403548065957e-06, "loss": 0.0179, "step": 177610 }, { "epoch": 0.7411062245996445, "grad_norm": 1.4221255202399568, "learning_rate": 2.3234076510683377e-06, "loss": 0.0233, "step": 177615 }, { "epoch": 0.7411270873146347, "grad_norm": 0.4462227152422045, "learning_rate": 2.323374948711012e-06, "loss": 0.024, "step": 177620 }, { "epoch": 0.7411479500296251, "grad_norm": 0.9225329481871669, "learning_rate": 2.3233422477345206e-06, "loss": 0.0252, "step": 177625 }, { "epoch": 0.7411688127446153, "grad_norm": 0.4279642584006326, "learning_rate": 2.323309548138767e-06, "loss": 0.0145, "step": 177630 }, { "epoch": 0.7411896754596056, "grad_norm": 0.4282091494828917, "learning_rate": 2.323276849923654e-06, "loss": 0.0181, "step": 177635 }, { "epoch": 0.7412105381745959, "grad_norm": 0.6678987851434494, "learning_rate": 2.323244153089084e-06, "loss": 0.0272, "step": 177640 }, { "epoch": 0.7412314008895862, "grad_norm": 0.4293965034093136, "learning_rate": 2.323211457634961e-06, "loss": 0.0219, "step": 177645 }, { "epoch": 0.7412522636045764, "grad_norm": 0.32113029711467533, "learning_rate": 2.323178763561186e-06, "loss": 0.0174, "step": 177650 }, { "epoch": 0.7412731263195668, "grad_norm": 0.5249751799059461, "learning_rate": 2.3231460708676636e-06, "loss": 0.0175, "step": 177655 }, { "epoch": 0.741293989034557, "grad_norm": 1.192186186497554, "learning_rate": 2.3231133795542956e-06, "loss": 0.019, "step": 177660 }, { "epoch": 0.7413148517495473, "grad_norm": 0.741578425663338, "learning_rate": 2.323080689620986e-06, "loss": 0.0303, "step": 177665 }, { "epoch": 0.7413357144645376, "grad_norm": 0.34753552468283366, "learning_rate": 2.3230480010676364e-06, "loss": 0.0193, "step": 177670 }, { "epoch": 0.7413565771795279, "grad_norm": 1.3298147034591508, "learning_rate": 2.3230153138941504e-06, "loss": 0.0203, "step": 177675 }, { "epoch": 0.7413774398945181, "grad_norm": 1.1439957492819735, "learning_rate": 2.3229826281004315e-06, "loss": 0.0211, "step": 177680 }, { "epoch": 0.7413983026095083, "grad_norm": 0.879128646404059, "learning_rate": 2.322949943686381e-06, "loss": 0.0236, "step": 177685 }, { "epoch": 0.7414191653244987, "grad_norm": 0.5366156798572784, "learning_rate": 2.322917260651904e-06, "loss": 0.0176, "step": 177690 }, { "epoch": 0.7414400280394889, "grad_norm": 0.6401216166018969, "learning_rate": 2.322884578996901e-06, "loss": 0.0174, "step": 177695 }, { "epoch": 0.7414608907544792, "grad_norm": 0.6969477582283922, "learning_rate": 2.322851898721277e-06, "loss": 0.0174, "step": 177700 }, { "epoch": 0.7414817534694695, "grad_norm": 1.1829333327013187, "learning_rate": 2.3228192198249342e-06, "loss": 0.0321, "step": 177705 }, { "epoch": 0.7415026161844598, "grad_norm": 0.6939532686880485, "learning_rate": 2.3227865423077755e-06, "loss": 0.0194, "step": 177710 }, { "epoch": 0.74152347889945, "grad_norm": 0.5125612710798937, "learning_rate": 2.322753866169704e-06, "loss": 0.0181, "step": 177715 }, { "epoch": 0.7415443416144404, "grad_norm": 0.6420998373299256, "learning_rate": 2.322721191410623e-06, "loss": 0.0212, "step": 177720 }, { "epoch": 0.7415652043294306, "grad_norm": 2.6172639590532127, "learning_rate": 2.322688518030435e-06, "loss": 0.0188, "step": 177725 }, { "epoch": 0.7415860670444209, "grad_norm": 0.8320197910830445, "learning_rate": 2.3226558460290434e-06, "loss": 0.0255, "step": 177730 }, { "epoch": 0.7416069297594111, "grad_norm": 0.8488813104532027, "learning_rate": 2.3226231754063504e-06, "loss": 0.0244, "step": 177735 }, { "epoch": 0.7416277924744015, "grad_norm": 0.7570618737015542, "learning_rate": 2.3225905061622603e-06, "loss": 0.0277, "step": 177740 }, { "epoch": 0.7416486551893917, "grad_norm": 0.3549941919052594, "learning_rate": 2.3225578382966756e-06, "loss": 0.0202, "step": 177745 }, { "epoch": 0.741669517904382, "grad_norm": 1.5569153335930237, "learning_rate": 2.322525171809499e-06, "loss": 0.0221, "step": 177750 }, { "epoch": 0.7416903806193723, "grad_norm": 0.5124843738308678, "learning_rate": 2.3224925067006342e-06, "loss": 0.0208, "step": 177755 }, { "epoch": 0.7417112433343626, "grad_norm": 0.3365213359036742, "learning_rate": 2.3224598429699828e-06, "loss": 0.0178, "step": 177760 }, { "epoch": 0.7417321060493528, "grad_norm": 0.5416984624966096, "learning_rate": 2.3224271806174505e-06, "loss": 0.0209, "step": 177765 }, { "epoch": 0.7417529687643432, "grad_norm": 0.35213487298090773, "learning_rate": 2.3223945196429384e-06, "loss": 0.0253, "step": 177770 }, { "epoch": 0.7417738314793334, "grad_norm": 0.4115056094750417, "learning_rate": 2.3223618600463498e-06, "loss": 0.0231, "step": 177775 }, { "epoch": 0.7417946941943236, "grad_norm": 1.174947778766427, "learning_rate": 2.3223292018275886e-06, "loss": 0.0355, "step": 177780 }, { "epoch": 0.741815556909314, "grad_norm": 1.029652401885912, "learning_rate": 2.3222965449865574e-06, "loss": 0.0231, "step": 177785 }, { "epoch": 0.7418364196243042, "grad_norm": 0.3336197027318038, "learning_rate": 2.3222638895231593e-06, "loss": 0.0202, "step": 177790 }, { "epoch": 0.7418572823392945, "grad_norm": 0.4529242276228281, "learning_rate": 2.3222312354372976e-06, "loss": 0.0177, "step": 177795 }, { "epoch": 0.7418781450542847, "grad_norm": 0.7469181076539686, "learning_rate": 2.3221985827288754e-06, "loss": 0.0177, "step": 177800 }, { "epoch": 0.7418990077692751, "grad_norm": 0.7454013519746941, "learning_rate": 2.322165931397795e-06, "loss": 0.018, "step": 177805 }, { "epoch": 0.7419198704842653, "grad_norm": 1.2392114598743247, "learning_rate": 2.322133281443962e-06, "loss": 0.0231, "step": 177810 }, { "epoch": 0.7419407331992556, "grad_norm": 0.4401742653131813, "learning_rate": 2.322100632867277e-06, "loss": 0.0191, "step": 177815 }, { "epoch": 0.7419615959142459, "grad_norm": 1.4237649472010963, "learning_rate": 2.322067985667645e-06, "loss": 0.0258, "step": 177820 }, { "epoch": 0.7419824586292362, "grad_norm": 1.0195931465517767, "learning_rate": 2.3220353398449674e-06, "loss": 0.0266, "step": 177825 }, { "epoch": 0.7420033213442264, "grad_norm": 0.5184960303762726, "learning_rate": 2.3220026953991487e-06, "loss": 0.026, "step": 177830 }, { "epoch": 0.7420241840592168, "grad_norm": 0.933468300915022, "learning_rate": 2.3219700523300927e-06, "loss": 0.0224, "step": 177835 }, { "epoch": 0.742045046774207, "grad_norm": 0.4641970063195525, "learning_rate": 2.3219374106377003e-06, "loss": 0.0172, "step": 177840 }, { "epoch": 0.7420659094891973, "grad_norm": 0.6736666286814306, "learning_rate": 2.321904770321877e-06, "loss": 0.0251, "step": 177845 }, { "epoch": 0.7420867722041876, "grad_norm": 0.593526617557128, "learning_rate": 2.3218721313825256e-06, "loss": 0.0175, "step": 177850 }, { "epoch": 0.7421076349191779, "grad_norm": 0.8835540661499162, "learning_rate": 2.321839493819549e-06, "loss": 0.0199, "step": 177855 }, { "epoch": 0.7421284976341681, "grad_norm": 0.869609151471609, "learning_rate": 2.3218068576328502e-06, "loss": 0.0248, "step": 177860 }, { "epoch": 0.7421493603491583, "grad_norm": 1.0946382742727798, "learning_rate": 2.321774222822332e-06, "loss": 0.0285, "step": 177865 }, { "epoch": 0.7421702230641487, "grad_norm": 0.727902905872882, "learning_rate": 2.3217415893878997e-06, "loss": 0.0183, "step": 177870 }, { "epoch": 0.742191085779139, "grad_norm": 0.5868471522194253, "learning_rate": 2.3217089573294547e-06, "loss": 0.0177, "step": 177875 }, { "epoch": 0.7422119484941292, "grad_norm": 0.3912108417620006, "learning_rate": 2.321676326646901e-06, "loss": 0.025, "step": 177880 }, { "epoch": 0.7422328112091195, "grad_norm": 0.5876102138927438, "learning_rate": 2.321643697340142e-06, "loss": 0.033, "step": 177885 }, { "epoch": 0.7422536739241098, "grad_norm": 0.677364766864098, "learning_rate": 2.3216110694090807e-06, "loss": 0.0202, "step": 177890 }, { "epoch": 0.7422745366391, "grad_norm": 0.4877655497318843, "learning_rate": 2.3215784428536206e-06, "loss": 0.0254, "step": 177895 }, { "epoch": 0.7422953993540904, "grad_norm": 0.5231567480165102, "learning_rate": 2.321545817673665e-06, "loss": 0.0197, "step": 177900 }, { "epoch": 0.7423162620690806, "grad_norm": 0.46703745569001714, "learning_rate": 2.3215131938691177e-06, "loss": 0.0193, "step": 177905 }, { "epoch": 0.7423371247840709, "grad_norm": 0.548057446969728, "learning_rate": 2.3214805714398816e-06, "loss": 0.0206, "step": 177910 }, { "epoch": 0.7423579874990611, "grad_norm": 0.32265665915413183, "learning_rate": 2.32144795038586e-06, "loss": 0.0186, "step": 177915 }, { "epoch": 0.7423788502140515, "grad_norm": 0.5210509052312078, "learning_rate": 2.321415330706956e-06, "loss": 0.0197, "step": 177920 }, { "epoch": 0.7423997129290417, "grad_norm": 0.5949990372930218, "learning_rate": 2.3213827124030745e-06, "loss": 0.0253, "step": 177925 }, { "epoch": 0.742420575644032, "grad_norm": 0.46343268219923184, "learning_rate": 2.3213500954741168e-06, "loss": 0.0249, "step": 177930 }, { "epoch": 0.7424414383590223, "grad_norm": 1.859553772847088, "learning_rate": 2.3213174799199876e-06, "loss": 0.0295, "step": 177935 }, { "epoch": 0.7424623010740126, "grad_norm": 0.19008172738727463, "learning_rate": 2.32128486574059e-06, "loss": 0.0218, "step": 177940 }, { "epoch": 0.7424831637890028, "grad_norm": 0.37529114508053785, "learning_rate": 2.3212522529358275e-06, "loss": 0.0193, "step": 177945 }, { "epoch": 0.7425040265039932, "grad_norm": 0.5931535399741157, "learning_rate": 2.3212196415056036e-06, "loss": 0.0187, "step": 177950 }, { "epoch": 0.7425248892189834, "grad_norm": 1.0929678996944323, "learning_rate": 2.321187031449822e-06, "loss": 0.0231, "step": 177955 }, { "epoch": 0.7425457519339737, "grad_norm": 0.5884113816767774, "learning_rate": 2.3211544227683855e-06, "loss": 0.0206, "step": 177960 }, { "epoch": 0.742566614648964, "grad_norm": 0.7456894193283103, "learning_rate": 2.3211218154611977e-06, "loss": 0.0179, "step": 177965 }, { "epoch": 0.7425874773639543, "grad_norm": 0.8149478407529466, "learning_rate": 2.321089209528162e-06, "loss": 0.0163, "step": 177970 }, { "epoch": 0.7426083400789445, "grad_norm": 0.8063785129245126, "learning_rate": 2.321056604969183e-06, "loss": 0.0201, "step": 177975 }, { "epoch": 0.7426292027939347, "grad_norm": 0.7272928889814873, "learning_rate": 2.321024001784163e-06, "loss": 0.0218, "step": 177980 }, { "epoch": 0.7426500655089251, "grad_norm": 0.5262610125943422, "learning_rate": 2.320991399973006e-06, "loss": 0.0178, "step": 177985 }, { "epoch": 0.7426709282239153, "grad_norm": 0.6501665628211379, "learning_rate": 2.320958799535615e-06, "loss": 0.0226, "step": 177990 }, { "epoch": 0.7426917909389056, "grad_norm": 0.7173391378769105, "learning_rate": 2.3209262004718938e-06, "loss": 0.0177, "step": 177995 }, { "epoch": 0.7427126536538959, "grad_norm": 0.6241354014050116, "learning_rate": 2.3208936027817457e-06, "loss": 0.0184, "step": 178000 }, { "epoch": 0.7427335163688862, "grad_norm": 0.715906408660394, "learning_rate": 2.3208610064650756e-06, "loss": 0.0197, "step": 178005 }, { "epoch": 0.7427543790838764, "grad_norm": 0.5239189106200478, "learning_rate": 2.320828411521785e-06, "loss": 0.0218, "step": 178010 }, { "epoch": 0.7427752417988668, "grad_norm": 1.143871798108273, "learning_rate": 2.3207958179517794e-06, "loss": 0.0223, "step": 178015 }, { "epoch": 0.742796104513857, "grad_norm": 1.1961802250329348, "learning_rate": 2.320763225754961e-06, "loss": 0.0227, "step": 178020 }, { "epoch": 0.7428169672288473, "grad_norm": 0.9118669195132665, "learning_rate": 2.320730634931234e-06, "loss": 0.0207, "step": 178025 }, { "epoch": 0.7428378299438376, "grad_norm": 0.7893770342570433, "learning_rate": 2.3206980454805015e-06, "loss": 0.0268, "step": 178030 }, { "epoch": 0.7428586926588279, "grad_norm": 0.69692733759688, "learning_rate": 2.3206654574026676e-06, "loss": 0.0224, "step": 178035 }, { "epoch": 0.7428795553738181, "grad_norm": 0.58708841366967, "learning_rate": 2.3206328706976357e-06, "loss": 0.0146, "step": 178040 }, { "epoch": 0.7429004180888084, "grad_norm": 0.3271449917558733, "learning_rate": 2.320600285365309e-06, "loss": 0.0159, "step": 178045 }, { "epoch": 0.7429212808037987, "grad_norm": 0.4900287840468272, "learning_rate": 2.3205677014055923e-06, "loss": 0.0257, "step": 178050 }, { "epoch": 0.742942143518789, "grad_norm": 0.7877170796756051, "learning_rate": 2.320535118818388e-06, "loss": 0.0109, "step": 178055 }, { "epoch": 0.7429630062337792, "grad_norm": 0.3225990618350069, "learning_rate": 2.320502537603601e-06, "loss": 0.0201, "step": 178060 }, { "epoch": 0.7429838689487696, "grad_norm": 0.5334788086209262, "learning_rate": 2.3204699577611335e-06, "loss": 0.0217, "step": 178065 }, { "epoch": 0.7430047316637598, "grad_norm": 0.32106491524608116, "learning_rate": 2.32043737929089e-06, "loss": 0.0221, "step": 178070 }, { "epoch": 0.74302559437875, "grad_norm": 0.7546000906684013, "learning_rate": 2.3204048021927746e-06, "loss": 0.0226, "step": 178075 }, { "epoch": 0.7430464570937404, "grad_norm": 0.626296703559664, "learning_rate": 2.32037222646669e-06, "loss": 0.0258, "step": 178080 }, { "epoch": 0.7430673198087306, "grad_norm": 1.2608429635162088, "learning_rate": 2.32033965211254e-06, "loss": 0.0234, "step": 178085 }, { "epoch": 0.7430881825237209, "grad_norm": 1.0866447081067991, "learning_rate": 2.320307079130229e-06, "loss": 0.0224, "step": 178090 }, { "epoch": 0.7431090452387111, "grad_norm": 0.6492586037061294, "learning_rate": 2.3202745075196602e-06, "loss": 0.0256, "step": 178095 }, { "epoch": 0.7431299079537015, "grad_norm": 0.5011354581741141, "learning_rate": 2.3202419372807374e-06, "loss": 0.02, "step": 178100 }, { "epoch": 0.7431507706686917, "grad_norm": 1.0473889038370778, "learning_rate": 2.3202093684133643e-06, "loss": 0.0179, "step": 178105 }, { "epoch": 0.743171633383682, "grad_norm": 0.5965959111853841, "learning_rate": 2.3201768009174455e-06, "loss": 0.0245, "step": 178110 }, { "epoch": 0.7431924960986723, "grad_norm": 1.1399455974207193, "learning_rate": 2.3201442347928836e-06, "loss": 0.0188, "step": 178115 }, { "epoch": 0.7432133588136626, "grad_norm": 0.4055045777065074, "learning_rate": 2.3201116700395825e-06, "loss": 0.0175, "step": 178120 }, { "epoch": 0.7432342215286528, "grad_norm": 1.14419791173606, "learning_rate": 2.3200791066574465e-06, "loss": 0.0157, "step": 178125 }, { "epoch": 0.7432550842436432, "grad_norm": 0.5724539044669364, "learning_rate": 2.320046544646379e-06, "loss": 0.0155, "step": 178130 }, { "epoch": 0.7432759469586334, "grad_norm": 0.6831054580978482, "learning_rate": 2.3200139840062834e-06, "loss": 0.0246, "step": 178135 }, { "epoch": 0.7432968096736237, "grad_norm": 0.3890400666827508, "learning_rate": 2.3199814247370645e-06, "loss": 0.0174, "step": 178140 }, { "epoch": 0.743317672388614, "grad_norm": 0.7583175744185131, "learning_rate": 2.319948866838626e-06, "loss": 0.0183, "step": 178145 }, { "epoch": 0.7433385351036043, "grad_norm": 0.46849261746620996, "learning_rate": 2.319916310310871e-06, "loss": 0.0191, "step": 178150 }, { "epoch": 0.7433593978185945, "grad_norm": 0.46639779547301147, "learning_rate": 2.3198837551537035e-06, "loss": 0.0217, "step": 178155 }, { "epoch": 0.7433802605335847, "grad_norm": 0.8989606163327427, "learning_rate": 2.3198512013670278e-06, "loss": 0.0197, "step": 178160 }, { "epoch": 0.7434011232485751, "grad_norm": 0.7810594528819435, "learning_rate": 2.3198186489507467e-06, "loss": 0.0229, "step": 178165 }, { "epoch": 0.7434219859635653, "grad_norm": 0.6859541941883877, "learning_rate": 2.3197860979047655e-06, "loss": 0.0242, "step": 178170 }, { "epoch": 0.7434428486785556, "grad_norm": 0.5178832884767198, "learning_rate": 2.3197535482289863e-06, "loss": 0.0345, "step": 178175 }, { "epoch": 0.743463711393546, "grad_norm": 0.5310724948361087, "learning_rate": 2.3197209999233155e-06, "loss": 0.0247, "step": 178180 }, { "epoch": 0.7434845741085362, "grad_norm": 0.8511163640899779, "learning_rate": 2.319688452987655e-06, "loss": 0.0231, "step": 178185 }, { "epoch": 0.7435054368235264, "grad_norm": 0.552147940973166, "learning_rate": 2.3196559074219086e-06, "loss": 0.0262, "step": 178190 }, { "epoch": 0.7435262995385168, "grad_norm": 0.35612599014991847, "learning_rate": 2.3196233632259815e-06, "loss": 0.0206, "step": 178195 }, { "epoch": 0.743547162253507, "grad_norm": 0.7379021744375135, "learning_rate": 2.3195908203997766e-06, "loss": 0.0224, "step": 178200 }, { "epoch": 0.7435680249684973, "grad_norm": 0.4187831329829997, "learning_rate": 2.319558278943198e-06, "loss": 0.0216, "step": 178205 }, { "epoch": 0.7435888876834876, "grad_norm": 0.5388328681445986, "learning_rate": 2.31952573885615e-06, "loss": 0.0148, "step": 178210 }, { "epoch": 0.7436097503984779, "grad_norm": 0.48349583306254523, "learning_rate": 2.3194932001385364e-06, "loss": 0.0205, "step": 178215 }, { "epoch": 0.7436306131134681, "grad_norm": 0.3864846048880509, "learning_rate": 2.3194606627902608e-06, "loss": 0.0131, "step": 178220 }, { "epoch": 0.7436514758284584, "grad_norm": 0.7339401876113836, "learning_rate": 2.319428126811227e-06, "loss": 0.0264, "step": 178225 }, { "epoch": 0.7436723385434487, "grad_norm": 0.6089613088674676, "learning_rate": 2.3193955922013404e-06, "loss": 0.0191, "step": 178230 }, { "epoch": 0.743693201258439, "grad_norm": 0.9021233199404322, "learning_rate": 2.319363058960503e-06, "loss": 0.0169, "step": 178235 }, { "epoch": 0.7437140639734292, "grad_norm": 0.7775265617667264, "learning_rate": 2.3193305270886202e-06, "loss": 0.019, "step": 178240 }, { "epoch": 0.7437349266884196, "grad_norm": 0.41461789226333556, "learning_rate": 2.3192979965855957e-06, "loss": 0.0178, "step": 178245 }, { "epoch": 0.7437557894034098, "grad_norm": 0.6121386675011334, "learning_rate": 2.3192654674513328e-06, "loss": 0.0165, "step": 178250 }, { "epoch": 0.7437766521184, "grad_norm": 0.5312862710717716, "learning_rate": 2.3192329396857366e-06, "loss": 0.0205, "step": 178255 }, { "epoch": 0.7437975148333904, "grad_norm": 1.338533648577794, "learning_rate": 2.31920041328871e-06, "loss": 0.0265, "step": 178260 }, { "epoch": 0.7438183775483806, "grad_norm": 0.5272111040859935, "learning_rate": 2.3191678882601587e-06, "loss": 0.0256, "step": 178265 }, { "epoch": 0.7438392402633709, "grad_norm": 0.6294886907191061, "learning_rate": 2.3191353645999847e-06, "loss": 0.0199, "step": 178270 }, { "epoch": 0.7438601029783611, "grad_norm": 0.34776320668228156, "learning_rate": 2.3191028423080933e-06, "loss": 0.0208, "step": 178275 }, { "epoch": 0.7438809656933515, "grad_norm": 0.37349945004632873, "learning_rate": 2.3190703213843884e-06, "loss": 0.0175, "step": 178280 }, { "epoch": 0.7439018284083417, "grad_norm": 0.4841856140360121, "learning_rate": 2.3190378018287734e-06, "loss": 0.026, "step": 178285 }, { "epoch": 0.743922691123332, "grad_norm": 0.5051667931710704, "learning_rate": 2.319005283641154e-06, "loss": 0.0249, "step": 178290 }, { "epoch": 0.7439435538383223, "grad_norm": 0.4623714703352843, "learning_rate": 2.3189727668214325e-06, "loss": 0.0174, "step": 178295 }, { "epoch": 0.7439644165533126, "grad_norm": 0.41090905206519035, "learning_rate": 2.3189402513695134e-06, "loss": 0.0214, "step": 178300 }, { "epoch": 0.7439852792683028, "grad_norm": 0.6572082946583269, "learning_rate": 2.318907737285302e-06, "loss": 0.0179, "step": 178305 }, { "epoch": 0.7440061419832932, "grad_norm": 0.8572117179760238, "learning_rate": 2.3188752245687007e-06, "loss": 0.0228, "step": 178310 }, { "epoch": 0.7440270046982834, "grad_norm": 0.4534482175863282, "learning_rate": 2.318842713219615e-06, "loss": 0.0169, "step": 178315 }, { "epoch": 0.7440478674132737, "grad_norm": 0.6607023127753939, "learning_rate": 2.318810203237948e-06, "loss": 0.022, "step": 178320 }, { "epoch": 0.744068730128264, "grad_norm": 0.5494097210726722, "learning_rate": 2.318777694623605e-06, "loss": 0.0229, "step": 178325 }, { "epoch": 0.7440895928432543, "grad_norm": 0.7042599403113237, "learning_rate": 2.3187451873764892e-06, "loss": 0.0256, "step": 178330 }, { "epoch": 0.7441104555582445, "grad_norm": 0.775018846559141, "learning_rate": 2.3187126814965054e-06, "loss": 0.0248, "step": 178335 }, { "epoch": 0.7441313182732348, "grad_norm": 0.7395761768632666, "learning_rate": 2.318680176983557e-06, "loss": 0.0191, "step": 178340 }, { "epoch": 0.7441521809882251, "grad_norm": 0.634639011988604, "learning_rate": 2.318647673837549e-06, "loss": 0.0161, "step": 178345 }, { "epoch": 0.7441730437032154, "grad_norm": 0.6084161103409077, "learning_rate": 2.318615172058385e-06, "loss": 0.0278, "step": 178350 }, { "epoch": 0.7441939064182056, "grad_norm": 15.642800844697145, "learning_rate": 2.31858267164597e-06, "loss": 0.0238, "step": 178355 }, { "epoch": 0.744214769133196, "grad_norm": 0.5912744153998916, "learning_rate": 2.318550172600207e-06, "loss": 0.0186, "step": 178360 }, { "epoch": 0.7442356318481862, "grad_norm": 1.4544305427844475, "learning_rate": 2.3185176749210012e-06, "loss": 0.0276, "step": 178365 }, { "epoch": 0.7442564945631764, "grad_norm": 0.5289874000934993, "learning_rate": 2.318485178608256e-06, "loss": 0.0224, "step": 178370 }, { "epoch": 0.7442773572781668, "grad_norm": 0.5348492896247328, "learning_rate": 2.3184526836618765e-06, "loss": 0.0184, "step": 178375 }, { "epoch": 0.744298219993157, "grad_norm": 0.8793574729344338, "learning_rate": 2.318420190081767e-06, "loss": 0.0256, "step": 178380 }, { "epoch": 0.7443190827081473, "grad_norm": 0.7392913090147119, "learning_rate": 2.318387697867831e-06, "loss": 0.0184, "step": 178385 }, { "epoch": 0.7443399454231376, "grad_norm": 0.5562138791121597, "learning_rate": 2.3183552070199726e-06, "loss": 0.0221, "step": 178390 }, { "epoch": 0.7443608081381279, "grad_norm": 0.6592285273628063, "learning_rate": 2.318322717538097e-06, "loss": 0.0146, "step": 178395 }, { "epoch": 0.7443816708531181, "grad_norm": 0.5120941958608501, "learning_rate": 2.318290229422109e-06, "loss": 0.0249, "step": 178400 }, { "epoch": 0.7444025335681084, "grad_norm": 0.3715245230615907, "learning_rate": 2.3182577426719107e-06, "loss": 0.023, "step": 178405 }, { "epoch": 0.7444233962830987, "grad_norm": 0.4134102876714631, "learning_rate": 2.3182252572874086e-06, "loss": 0.0206, "step": 178410 }, { "epoch": 0.744444258998089, "grad_norm": 0.720931842689448, "learning_rate": 2.3181927732685057e-06, "loss": 0.0244, "step": 178415 }, { "epoch": 0.7444651217130792, "grad_norm": 0.4407157065616835, "learning_rate": 2.3181602906151067e-06, "loss": 0.0187, "step": 178420 }, { "epoch": 0.7444859844280696, "grad_norm": 0.2906978539804974, "learning_rate": 2.3181278093271164e-06, "loss": 0.0187, "step": 178425 }, { "epoch": 0.7445068471430598, "grad_norm": 0.6269389230358696, "learning_rate": 2.318095329404438e-06, "loss": 0.0221, "step": 178430 }, { "epoch": 0.7445277098580501, "grad_norm": 0.9143149400031592, "learning_rate": 2.318062850846977e-06, "loss": 0.0325, "step": 178435 }, { "epoch": 0.7445485725730404, "grad_norm": 0.4985287352748824, "learning_rate": 2.318030373654638e-06, "loss": 0.0252, "step": 178440 }, { "epoch": 0.7445694352880307, "grad_norm": 0.41670211296444387, "learning_rate": 2.3179978978273237e-06, "loss": 0.0223, "step": 178445 }, { "epoch": 0.7445902980030209, "grad_norm": 0.39845477043596483, "learning_rate": 2.31796542336494e-06, "loss": 0.0228, "step": 178450 }, { "epoch": 0.7446111607180111, "grad_norm": 0.4981019726566882, "learning_rate": 2.3179329502673907e-06, "loss": 0.0209, "step": 178455 }, { "epoch": 0.7446320234330015, "grad_norm": 0.7604707690002908, "learning_rate": 2.31790047853458e-06, "loss": 0.0194, "step": 178460 }, { "epoch": 0.7446528861479917, "grad_norm": 0.5066737086896747, "learning_rate": 2.317868008166413e-06, "loss": 0.0142, "step": 178465 }, { "epoch": 0.744673748862982, "grad_norm": 0.8459697102054934, "learning_rate": 2.3178355391627937e-06, "loss": 0.0207, "step": 178470 }, { "epoch": 0.7446946115779723, "grad_norm": 0.552399173008407, "learning_rate": 2.317803071523626e-06, "loss": 0.0157, "step": 178475 }, { "epoch": 0.7447154742929626, "grad_norm": 0.3693726857130617, "learning_rate": 2.3177706052488156e-06, "loss": 0.0249, "step": 178480 }, { "epoch": 0.7447363370079528, "grad_norm": 0.5525577707214417, "learning_rate": 2.317738140338266e-06, "loss": 0.0253, "step": 178485 }, { "epoch": 0.7447571997229432, "grad_norm": 0.6151924029654657, "learning_rate": 2.317705676791882e-06, "loss": 0.0172, "step": 178490 }, { "epoch": 0.7447780624379334, "grad_norm": 0.8926742007761691, "learning_rate": 2.3176732146095676e-06, "loss": 0.0262, "step": 178495 }, { "epoch": 0.7447989251529237, "grad_norm": 0.8334769730438013, "learning_rate": 2.317640753791228e-06, "loss": 0.0228, "step": 178500 }, { "epoch": 0.744819787867914, "grad_norm": 0.4826281878327286, "learning_rate": 2.317608294336767e-06, "loss": 0.0194, "step": 178505 }, { "epoch": 0.7448406505829043, "grad_norm": 0.8302245592822484, "learning_rate": 2.3175758362460894e-06, "loss": 0.021, "step": 178510 }, { "epoch": 0.7448615132978945, "grad_norm": 0.5514558062615743, "learning_rate": 2.3175433795191e-06, "loss": 0.0225, "step": 178515 }, { "epoch": 0.7448823760128848, "grad_norm": 0.5129308746852889, "learning_rate": 2.317510924155703e-06, "loss": 0.0198, "step": 178520 }, { "epoch": 0.7449032387278751, "grad_norm": 0.30571332055897027, "learning_rate": 2.3174784701558027e-06, "loss": 0.0171, "step": 178525 }, { "epoch": 0.7449241014428654, "grad_norm": 0.9517150033300923, "learning_rate": 2.317446017519304e-06, "loss": 0.0257, "step": 178530 }, { "epoch": 0.7449449641578556, "grad_norm": 0.7342963733060945, "learning_rate": 2.3174135662461113e-06, "loss": 0.0165, "step": 178535 }, { "epoch": 0.744965826872846, "grad_norm": 0.7647612890046953, "learning_rate": 2.31738111633613e-06, "loss": 0.0289, "step": 178540 }, { "epoch": 0.7449866895878362, "grad_norm": 0.4779708706382793, "learning_rate": 2.3173486677892626e-06, "loss": 0.0357, "step": 178545 }, { "epoch": 0.7450075523028264, "grad_norm": 0.7008561664897719, "learning_rate": 2.3173162206054153e-06, "loss": 0.0328, "step": 178550 }, { "epoch": 0.7450284150178168, "grad_norm": 0.45376650598135, "learning_rate": 2.3172837747844923e-06, "loss": 0.0182, "step": 178555 }, { "epoch": 0.745049277732807, "grad_norm": 0.551266080048343, "learning_rate": 2.3172513303263983e-06, "loss": 0.0189, "step": 178560 }, { "epoch": 0.7450701404477973, "grad_norm": 0.4170239483520078, "learning_rate": 2.3172188872310376e-06, "loss": 0.0141, "step": 178565 }, { "epoch": 0.7450910031627876, "grad_norm": 0.44490441952796445, "learning_rate": 2.3171864454983153e-06, "loss": 0.0207, "step": 178570 }, { "epoch": 0.7451118658777779, "grad_norm": 0.9364167570794818, "learning_rate": 2.3171540051281357e-06, "loss": 0.0251, "step": 178575 }, { "epoch": 0.7451327285927681, "grad_norm": 0.9177601298910217, "learning_rate": 2.3171215661204027e-06, "loss": 0.0226, "step": 178580 }, { "epoch": 0.7451535913077584, "grad_norm": 0.8727056464709432, "learning_rate": 2.317089128475022e-06, "loss": 0.0206, "step": 178585 }, { "epoch": 0.7451744540227487, "grad_norm": 1.41618228091036, "learning_rate": 2.317056692191898e-06, "loss": 0.0245, "step": 178590 }, { "epoch": 0.745195316737739, "grad_norm": 0.6395412701271034, "learning_rate": 2.317024257270936e-06, "loss": 0.017, "step": 178595 }, { "epoch": 0.7452161794527292, "grad_norm": 0.405671090527774, "learning_rate": 2.316991823712039e-06, "loss": 0.0227, "step": 178600 }, { "epoch": 0.7452370421677196, "grad_norm": 0.49759927453968866, "learning_rate": 2.3169593915151127e-06, "loss": 0.0134, "step": 178605 }, { "epoch": 0.7452579048827098, "grad_norm": 1.1751424723665014, "learning_rate": 2.3169269606800618e-06, "loss": 0.0212, "step": 178610 }, { "epoch": 0.7452787675977001, "grad_norm": 0.574242828495429, "learning_rate": 2.3168945312067908e-06, "loss": 0.0183, "step": 178615 }, { "epoch": 0.7452996303126904, "grad_norm": 0.448059862753745, "learning_rate": 2.3168621030952045e-06, "loss": 0.0349, "step": 178620 }, { "epoch": 0.7453204930276807, "grad_norm": 1.3979315568902209, "learning_rate": 2.3168296763452075e-06, "loss": 0.0176, "step": 178625 }, { "epoch": 0.7453413557426709, "grad_norm": 0.6770988806065397, "learning_rate": 2.3167972509567046e-06, "loss": 0.0187, "step": 178630 }, { "epoch": 0.7453622184576612, "grad_norm": 0.6977309139162229, "learning_rate": 2.3167648269296e-06, "loss": 0.0208, "step": 178635 }, { "epoch": 0.7453830811726515, "grad_norm": 0.5692003568173373, "learning_rate": 2.3167324042638e-06, "loss": 0.0284, "step": 178640 }, { "epoch": 0.7454039438876418, "grad_norm": 0.7222224420681107, "learning_rate": 2.3166999829592076e-06, "loss": 0.023, "step": 178645 }, { "epoch": 0.745424806602632, "grad_norm": 0.5813952774417207, "learning_rate": 2.3166675630157285e-06, "loss": 0.0144, "step": 178650 }, { "epoch": 0.7454456693176224, "grad_norm": 0.6151007338785683, "learning_rate": 2.316635144433267e-06, "loss": 0.0231, "step": 178655 }, { "epoch": 0.7454665320326126, "grad_norm": 0.5133905904171561, "learning_rate": 2.316602727211729e-06, "loss": 0.021, "step": 178660 }, { "epoch": 0.7454873947476028, "grad_norm": 0.7687724607355361, "learning_rate": 2.316570311351017e-06, "loss": 0.0197, "step": 178665 }, { "epoch": 0.7455082574625932, "grad_norm": 0.2933100764513415, "learning_rate": 2.316537896851038e-06, "loss": 0.0232, "step": 178670 }, { "epoch": 0.7455291201775834, "grad_norm": 0.45890285779635587, "learning_rate": 2.3165054837116956e-06, "loss": 0.0161, "step": 178675 }, { "epoch": 0.7455499828925737, "grad_norm": 1.0615732896564845, "learning_rate": 2.316473071932895e-06, "loss": 0.0256, "step": 178680 }, { "epoch": 0.745570845607564, "grad_norm": 1.6527629979740477, "learning_rate": 2.316440661514541e-06, "loss": 0.0232, "step": 178685 }, { "epoch": 0.7455917083225543, "grad_norm": 0.32211196628918387, "learning_rate": 2.3164082524565387e-06, "loss": 0.0158, "step": 178690 }, { "epoch": 0.7456125710375445, "grad_norm": 0.5115569596778621, "learning_rate": 2.3163758447587924e-06, "loss": 0.0232, "step": 178695 }, { "epoch": 0.7456334337525348, "grad_norm": 1.0696244813338718, "learning_rate": 2.3163434384212074e-06, "loss": 0.037, "step": 178700 }, { "epoch": 0.7456542964675251, "grad_norm": 2.0932018778909955, "learning_rate": 2.3163110334436884e-06, "loss": 0.0181, "step": 178705 }, { "epoch": 0.7456751591825154, "grad_norm": 0.797187763005341, "learning_rate": 2.3162786298261405e-06, "loss": 0.0196, "step": 178710 }, { "epoch": 0.7456960218975056, "grad_norm": 0.7246387294609905, "learning_rate": 2.3162462275684684e-06, "loss": 0.0162, "step": 178715 }, { "epoch": 0.745716884612496, "grad_norm": 0.917283731029455, "learning_rate": 2.316213826670576e-06, "loss": 0.0217, "step": 178720 }, { "epoch": 0.7457377473274862, "grad_norm": 0.5731518332327137, "learning_rate": 2.3161814271323697e-06, "loss": 0.0165, "step": 178725 }, { "epoch": 0.7457586100424765, "grad_norm": 0.820913968847407, "learning_rate": 2.316149028953754e-06, "loss": 0.0345, "step": 178730 }, { "epoch": 0.7457794727574668, "grad_norm": 0.4510869670839127, "learning_rate": 2.3161166321346333e-06, "loss": 0.0216, "step": 178735 }, { "epoch": 0.745800335472457, "grad_norm": 0.8565310065526424, "learning_rate": 2.316084236674913e-06, "loss": 0.0271, "step": 178740 }, { "epoch": 0.7458211981874473, "grad_norm": 0.816223031731792, "learning_rate": 2.316051842574498e-06, "loss": 0.0228, "step": 178745 }, { "epoch": 0.7458420609024377, "grad_norm": 0.47138846516283534, "learning_rate": 2.3160194498332933e-06, "loss": 0.0188, "step": 178750 }, { "epoch": 0.7458629236174279, "grad_norm": 0.4190123136130016, "learning_rate": 2.3159870584512032e-06, "loss": 0.0229, "step": 178755 }, { "epoch": 0.7458837863324181, "grad_norm": 1.070146250520091, "learning_rate": 2.3159546684281335e-06, "loss": 0.0277, "step": 178760 }, { "epoch": 0.7459046490474084, "grad_norm": 0.7103331404352989, "learning_rate": 2.3159222797639887e-06, "loss": 0.0157, "step": 178765 }, { "epoch": 0.7459255117623987, "grad_norm": 0.6516168314744789, "learning_rate": 2.3158898924586737e-06, "loss": 0.0154, "step": 178770 }, { "epoch": 0.745946374477389, "grad_norm": 1.1959883575936738, "learning_rate": 2.315857506512094e-06, "loss": 0.0261, "step": 178775 }, { "epoch": 0.7459672371923792, "grad_norm": 0.5824666305292516, "learning_rate": 2.3158251219241543e-06, "loss": 0.0198, "step": 178780 }, { "epoch": 0.7459880999073696, "grad_norm": 0.6406993680155962, "learning_rate": 2.3157927386947598e-06, "loss": 0.0159, "step": 178785 }, { "epoch": 0.7460089626223598, "grad_norm": 1.0054364604176598, "learning_rate": 2.315760356823814e-06, "loss": 0.0229, "step": 178790 }, { "epoch": 0.7460298253373501, "grad_norm": 0.680486731914456, "learning_rate": 2.3157279763112244e-06, "loss": 0.0289, "step": 178795 }, { "epoch": 0.7460506880523404, "grad_norm": 0.5066240682984224, "learning_rate": 2.3156955971568948e-06, "loss": 0.0169, "step": 178800 }, { "epoch": 0.7460715507673307, "grad_norm": 0.9270165979772478, "learning_rate": 2.3156632193607304e-06, "loss": 0.0205, "step": 178805 }, { "epoch": 0.7460924134823209, "grad_norm": 0.373393714137715, "learning_rate": 2.3156308429226363e-06, "loss": 0.0211, "step": 178810 }, { "epoch": 0.7461132761973112, "grad_norm": 0.5404171325185099, "learning_rate": 2.3155984678425165e-06, "loss": 0.0215, "step": 178815 }, { "epoch": 0.7461341389123015, "grad_norm": 0.607461015054352, "learning_rate": 2.3155660941202778e-06, "loss": 0.0234, "step": 178820 }, { "epoch": 0.7461550016272918, "grad_norm": 0.8926131764472122, "learning_rate": 2.3155337217558244e-06, "loss": 0.0267, "step": 178825 }, { "epoch": 0.746175864342282, "grad_norm": 0.7630379297106122, "learning_rate": 2.315501350749061e-06, "loss": 0.0209, "step": 178830 }, { "epoch": 0.7461967270572724, "grad_norm": 0.7230347469741429, "learning_rate": 2.3154689810998933e-06, "loss": 0.0223, "step": 178835 }, { "epoch": 0.7462175897722626, "grad_norm": 0.6836588339706664, "learning_rate": 2.3154366128082267e-06, "loss": 0.0156, "step": 178840 }, { "epoch": 0.7462384524872528, "grad_norm": 0.6169893389137564, "learning_rate": 2.3154042458739656e-06, "loss": 0.0213, "step": 178845 }, { "epoch": 0.7462593152022432, "grad_norm": 1.533311145514133, "learning_rate": 2.315371880297016e-06, "loss": 0.0347, "step": 178850 }, { "epoch": 0.7462801779172334, "grad_norm": 0.4004028469503269, "learning_rate": 2.3153395160772815e-06, "loss": 0.0195, "step": 178855 }, { "epoch": 0.7463010406322237, "grad_norm": 0.42940969531239653, "learning_rate": 2.315307153214669e-06, "loss": 0.0197, "step": 178860 }, { "epoch": 0.746321903347214, "grad_norm": 0.7409063811706534, "learning_rate": 2.315274791709083e-06, "loss": 0.0226, "step": 178865 }, { "epoch": 0.7463427660622043, "grad_norm": 0.6585488329076424, "learning_rate": 2.3152424315604277e-06, "loss": 0.0255, "step": 178870 }, { "epoch": 0.7463636287771945, "grad_norm": 0.8334014674131855, "learning_rate": 2.3152100727686095e-06, "loss": 0.0233, "step": 178875 }, { "epoch": 0.7463844914921848, "grad_norm": 0.5124822469625373, "learning_rate": 2.3151777153335337e-06, "loss": 0.023, "step": 178880 }, { "epoch": 0.7464053542071751, "grad_norm": 1.4015760210902164, "learning_rate": 2.3151453592551043e-06, "loss": 0.0205, "step": 178885 }, { "epoch": 0.7464262169221654, "grad_norm": 0.44901529573293364, "learning_rate": 2.3151130045332274e-06, "loss": 0.0182, "step": 178890 }, { "epoch": 0.7464470796371556, "grad_norm": 0.7021611594452326, "learning_rate": 2.3150806511678085e-06, "loss": 0.0193, "step": 178895 }, { "epoch": 0.746467942352146, "grad_norm": 0.4319275149533908, "learning_rate": 2.315048299158752e-06, "loss": 0.022, "step": 178900 }, { "epoch": 0.7464888050671362, "grad_norm": 0.6914781221241524, "learning_rate": 2.315015948505963e-06, "loss": 0.021, "step": 178905 }, { "epoch": 0.7465096677821265, "grad_norm": 1.3505127292117876, "learning_rate": 2.3149835992093473e-06, "loss": 0.0185, "step": 178910 }, { "epoch": 0.7465305304971168, "grad_norm": 0.6455511300309735, "learning_rate": 2.3149512512688107e-06, "loss": 0.02, "step": 178915 }, { "epoch": 0.7465513932121071, "grad_norm": 0.6076776086222656, "learning_rate": 2.3149189046842574e-06, "loss": 0.0181, "step": 178920 }, { "epoch": 0.7465722559270973, "grad_norm": 0.8357493481389073, "learning_rate": 2.314886559455593e-06, "loss": 0.0261, "step": 178925 }, { "epoch": 0.7465931186420877, "grad_norm": 0.5004332041066171, "learning_rate": 2.314854215582723e-06, "loss": 0.0246, "step": 178930 }, { "epoch": 0.7466139813570779, "grad_norm": 0.5165083215396572, "learning_rate": 2.3148218730655526e-06, "loss": 0.0155, "step": 178935 }, { "epoch": 0.7466348440720681, "grad_norm": 1.0168644796855846, "learning_rate": 2.3147895319039867e-06, "loss": 0.0191, "step": 178940 }, { "epoch": 0.7466557067870584, "grad_norm": 0.9592452802061581, "learning_rate": 2.314757192097931e-06, "loss": 0.0182, "step": 178945 }, { "epoch": 0.7466765695020487, "grad_norm": 0.8033867865226509, "learning_rate": 2.3147248536472915e-06, "loss": 0.0215, "step": 178950 }, { "epoch": 0.746697432217039, "grad_norm": 0.27319283070355427, "learning_rate": 2.314692516551972e-06, "loss": 0.0188, "step": 178955 }, { "epoch": 0.7467182949320292, "grad_norm": 0.5643302990968723, "learning_rate": 2.3146601808118786e-06, "loss": 0.0218, "step": 178960 }, { "epoch": 0.7467391576470196, "grad_norm": 0.4390780368511975, "learning_rate": 2.314627846426917e-06, "loss": 0.0173, "step": 178965 }, { "epoch": 0.7467600203620098, "grad_norm": 0.7470430026066369, "learning_rate": 2.314595513396992e-06, "loss": 0.0349, "step": 178970 }, { "epoch": 0.7467808830770001, "grad_norm": 2.9638848167951313, "learning_rate": 2.314563181722009e-06, "loss": 0.0194, "step": 178975 }, { "epoch": 0.7468017457919904, "grad_norm": 0.7834310539889618, "learning_rate": 2.314530851401874e-06, "loss": 0.0246, "step": 178980 }, { "epoch": 0.7468226085069807, "grad_norm": 1.1900904917890285, "learning_rate": 2.3144985224364917e-06, "loss": 0.0265, "step": 178985 }, { "epoch": 0.7468434712219709, "grad_norm": 0.3754374750864672, "learning_rate": 2.3144661948257678e-06, "loss": 0.0214, "step": 178990 }, { "epoch": 0.7468643339369612, "grad_norm": 0.804817726588059, "learning_rate": 2.314433868569607e-06, "loss": 0.0237, "step": 178995 }, { "epoch": 0.7468851966519515, "grad_norm": 0.615187156009786, "learning_rate": 2.3144015436679164e-06, "loss": 0.0224, "step": 179000 }, { "epoch": 0.7469060593669418, "grad_norm": 0.38169583742632723, "learning_rate": 2.3143692201205992e-06, "loss": 0.0226, "step": 179005 }, { "epoch": 0.746926922081932, "grad_norm": 0.7279007753428283, "learning_rate": 2.3143368979275624e-06, "loss": 0.031, "step": 179010 }, { "epoch": 0.7469477847969224, "grad_norm": 0.5387114686949589, "learning_rate": 2.314304577088711e-06, "loss": 0.0191, "step": 179015 }, { "epoch": 0.7469686475119126, "grad_norm": 0.6347619811293448, "learning_rate": 2.3142722576039507e-06, "loss": 0.0272, "step": 179020 }, { "epoch": 0.7469895102269029, "grad_norm": 1.3068282715285522, "learning_rate": 2.314239939473186e-06, "loss": 0.0321, "step": 179025 }, { "epoch": 0.7470103729418932, "grad_norm": 0.7903898487053327, "learning_rate": 2.3142076226963235e-06, "loss": 0.0256, "step": 179030 }, { "epoch": 0.7470312356568835, "grad_norm": 0.8716060122134446, "learning_rate": 2.314175307273268e-06, "loss": 0.0298, "step": 179035 }, { "epoch": 0.7470520983718737, "grad_norm": 0.9484803627719747, "learning_rate": 2.3141429932039257e-06, "loss": 0.0207, "step": 179040 }, { "epoch": 0.747072961086864, "grad_norm": 0.572890537366887, "learning_rate": 2.3141106804882008e-06, "loss": 0.0172, "step": 179045 }, { "epoch": 0.7470938238018543, "grad_norm": 0.6867500607575487, "learning_rate": 2.314078369126e-06, "loss": 0.0267, "step": 179050 }, { "epoch": 0.7471146865168445, "grad_norm": 0.5045850541063474, "learning_rate": 2.3140460591172286e-06, "loss": 0.0227, "step": 179055 }, { "epoch": 0.7471355492318348, "grad_norm": 0.7892806848066732, "learning_rate": 2.3140137504617912e-06, "loss": 0.0201, "step": 179060 }, { "epoch": 0.7471564119468251, "grad_norm": 0.5025591654752205, "learning_rate": 2.3139814431595945e-06, "loss": 0.0227, "step": 179065 }, { "epoch": 0.7471772746618154, "grad_norm": 0.8818717908854101, "learning_rate": 2.3139491372105434e-06, "loss": 0.0224, "step": 179070 }, { "epoch": 0.7471981373768056, "grad_norm": 0.4406086310685306, "learning_rate": 2.3139168326145436e-06, "loss": 0.0229, "step": 179075 }, { "epoch": 0.747219000091796, "grad_norm": 0.7377471004064465, "learning_rate": 2.3138845293715007e-06, "loss": 0.0188, "step": 179080 }, { "epoch": 0.7472398628067862, "grad_norm": 0.9204953732803689, "learning_rate": 2.3138522274813206e-06, "loss": 0.0251, "step": 179085 }, { "epoch": 0.7472607255217765, "grad_norm": 0.5836492972248399, "learning_rate": 2.313819926943908e-06, "loss": 0.0236, "step": 179090 }, { "epoch": 0.7472815882367668, "grad_norm": 1.0988748252658833, "learning_rate": 2.3137876277591693e-06, "loss": 0.0249, "step": 179095 }, { "epoch": 0.7473024509517571, "grad_norm": 0.7227383483328795, "learning_rate": 2.31375532992701e-06, "loss": 0.0264, "step": 179100 }, { "epoch": 0.7473233136667473, "grad_norm": 0.9341494292795152, "learning_rate": 2.313723033447335e-06, "loss": 0.0202, "step": 179105 }, { "epoch": 0.7473441763817377, "grad_norm": 0.727296802934297, "learning_rate": 2.31369073832005e-06, "loss": 0.0211, "step": 179110 }, { "epoch": 0.7473650390967279, "grad_norm": 0.4470720011292189, "learning_rate": 2.3136584445450615e-06, "loss": 0.0167, "step": 179115 }, { "epoch": 0.7473859018117182, "grad_norm": 0.4504533596510014, "learning_rate": 2.313626152122275e-06, "loss": 0.0218, "step": 179120 }, { "epoch": 0.7474067645267084, "grad_norm": 0.9018939893199163, "learning_rate": 2.3135938610515947e-06, "loss": 0.0234, "step": 179125 }, { "epoch": 0.7474276272416988, "grad_norm": 0.8489786105460051, "learning_rate": 2.313561571332928e-06, "loss": 0.0177, "step": 179130 }, { "epoch": 0.747448489956689, "grad_norm": 0.8486485418574972, "learning_rate": 2.31352928296618e-06, "loss": 0.0206, "step": 179135 }, { "epoch": 0.7474693526716792, "grad_norm": 0.5886155601762476, "learning_rate": 2.313496995951256e-06, "loss": 0.0238, "step": 179140 }, { "epoch": 0.7474902153866696, "grad_norm": 1.210771169914228, "learning_rate": 2.3134647102880616e-06, "loss": 0.0192, "step": 179145 }, { "epoch": 0.7475110781016598, "grad_norm": 0.572531223327173, "learning_rate": 2.313432425976503e-06, "loss": 0.0205, "step": 179150 }, { "epoch": 0.7475319408166501, "grad_norm": 0.9430993227592872, "learning_rate": 2.313400143016486e-06, "loss": 0.0228, "step": 179155 }, { "epoch": 0.7475528035316404, "grad_norm": 0.7330370741222652, "learning_rate": 2.3133678614079158e-06, "loss": 0.0239, "step": 179160 }, { "epoch": 0.7475736662466307, "grad_norm": 0.8948755546218499, "learning_rate": 2.3133355811506983e-06, "loss": 0.0192, "step": 179165 }, { "epoch": 0.7475945289616209, "grad_norm": 0.5773825564108733, "learning_rate": 2.3133033022447394e-06, "loss": 0.0162, "step": 179170 }, { "epoch": 0.7476153916766112, "grad_norm": 0.9114329765210097, "learning_rate": 2.3132710246899438e-06, "loss": 0.0208, "step": 179175 }, { "epoch": 0.7476362543916015, "grad_norm": 0.5514289629366105, "learning_rate": 2.3132387484862195e-06, "loss": 0.0288, "step": 179180 }, { "epoch": 0.7476571171065918, "grad_norm": 0.34349258196036925, "learning_rate": 2.31320647363347e-06, "loss": 0.021, "step": 179185 }, { "epoch": 0.747677979821582, "grad_norm": 0.949303800771856, "learning_rate": 2.313174200131602e-06, "loss": 0.0309, "step": 179190 }, { "epoch": 0.7476988425365724, "grad_norm": 0.8430832450207314, "learning_rate": 2.3131419279805203e-06, "loss": 0.0256, "step": 179195 }, { "epoch": 0.7477197052515626, "grad_norm": 0.7339287485717441, "learning_rate": 2.3131096571801323e-06, "loss": 0.0254, "step": 179200 }, { "epoch": 0.7477405679665529, "grad_norm": 0.7294737014752385, "learning_rate": 2.3130773877303433e-06, "loss": 0.0185, "step": 179205 }, { "epoch": 0.7477614306815432, "grad_norm": 0.49264147408553655, "learning_rate": 2.313045119631058e-06, "loss": 0.0212, "step": 179210 }, { "epoch": 0.7477822933965335, "grad_norm": 0.8193464686465013, "learning_rate": 2.3130128528821837e-06, "loss": 0.0227, "step": 179215 }, { "epoch": 0.7478031561115237, "grad_norm": 0.6965377742038932, "learning_rate": 2.312980587483626e-06, "loss": 0.019, "step": 179220 }, { "epoch": 0.7478240188265141, "grad_norm": 1.035246891218567, "learning_rate": 2.312948323435289e-06, "loss": 0.0269, "step": 179225 }, { "epoch": 0.7478448815415043, "grad_norm": 0.5155736507758645, "learning_rate": 2.3129160607370806e-06, "loss": 0.0213, "step": 179230 }, { "epoch": 0.7478657442564945, "grad_norm": 0.705993225034719, "learning_rate": 2.312883799388905e-06, "loss": 0.0199, "step": 179235 }, { "epoch": 0.7478866069714848, "grad_norm": 0.3403811499992637, "learning_rate": 2.3128515393906702e-06, "loss": 0.0187, "step": 179240 }, { "epoch": 0.7479074696864751, "grad_norm": 0.3239453458753977, "learning_rate": 2.3128192807422797e-06, "loss": 0.0244, "step": 179245 }, { "epoch": 0.7479283324014654, "grad_norm": 1.146291026815849, "learning_rate": 2.3127870234436404e-06, "loss": 0.0298, "step": 179250 }, { "epoch": 0.7479491951164556, "grad_norm": 0.6418498731758803, "learning_rate": 2.3127547674946584e-06, "loss": 0.0218, "step": 179255 }, { "epoch": 0.747970057831446, "grad_norm": 0.5944309847302462, "learning_rate": 2.312722512895239e-06, "loss": 0.0264, "step": 179260 }, { "epoch": 0.7479909205464362, "grad_norm": 0.6465184042674685, "learning_rate": 2.312690259645289e-06, "loss": 0.0331, "step": 179265 }, { "epoch": 0.7480117832614265, "grad_norm": 1.080871520552437, "learning_rate": 2.312658007744713e-06, "loss": 0.0265, "step": 179270 }, { "epoch": 0.7480326459764168, "grad_norm": 0.8163408592505095, "learning_rate": 2.312625757193419e-06, "loss": 0.0212, "step": 179275 }, { "epoch": 0.7480535086914071, "grad_norm": 0.4473217852578527, "learning_rate": 2.3125935079913106e-06, "loss": 0.0174, "step": 179280 }, { "epoch": 0.7480743714063973, "grad_norm": 0.587566731072488, "learning_rate": 2.312561260138295e-06, "loss": 0.0207, "step": 179285 }, { "epoch": 0.7480952341213877, "grad_norm": 0.5784960887528741, "learning_rate": 2.3125290136342777e-06, "loss": 0.0219, "step": 179290 }, { "epoch": 0.7481160968363779, "grad_norm": 0.6997503553610303, "learning_rate": 2.312496768479165e-06, "loss": 0.0203, "step": 179295 }, { "epoch": 0.7481369595513682, "grad_norm": 0.6217754952522163, "learning_rate": 2.312464524672862e-06, "loss": 0.026, "step": 179300 }, { "epoch": 0.7481578222663584, "grad_norm": 1.0775322089025023, "learning_rate": 2.312432282215276e-06, "loss": 0.0291, "step": 179305 }, { "epoch": 0.7481786849813488, "grad_norm": 0.8925455383780752, "learning_rate": 2.312400041106312e-06, "loss": 0.0292, "step": 179310 }, { "epoch": 0.748199547696339, "grad_norm": 0.4108163817135167, "learning_rate": 2.3123678013458762e-06, "loss": 0.0171, "step": 179315 }, { "epoch": 0.7482204104113293, "grad_norm": 0.7220166115307257, "learning_rate": 2.312335562933875e-06, "loss": 0.0235, "step": 179320 }, { "epoch": 0.7482412731263196, "grad_norm": 0.7349444918869794, "learning_rate": 2.312303325870214e-06, "loss": 0.0237, "step": 179325 }, { "epoch": 0.7482621358413099, "grad_norm": 0.834776825089273, "learning_rate": 2.3122710901547993e-06, "loss": 0.0185, "step": 179330 }, { "epoch": 0.7482829985563001, "grad_norm": 0.4716956914197329, "learning_rate": 2.312238855787537e-06, "loss": 0.0199, "step": 179335 }, { "epoch": 0.7483038612712904, "grad_norm": 0.5756527061513691, "learning_rate": 2.312206622768333e-06, "loss": 0.0217, "step": 179340 }, { "epoch": 0.7483247239862807, "grad_norm": 0.8052169261845264, "learning_rate": 2.3121743910970933e-06, "loss": 0.0275, "step": 179345 }, { "epoch": 0.7483455867012709, "grad_norm": 1.2093845880922052, "learning_rate": 2.3121421607737243e-06, "loss": 0.0231, "step": 179350 }, { "epoch": 0.7483664494162612, "grad_norm": 0.8080337391555831, "learning_rate": 2.3121099317981316e-06, "loss": 0.0188, "step": 179355 }, { "epoch": 0.7483873121312515, "grad_norm": 1.418317001229635, "learning_rate": 2.3120777041702217e-06, "loss": 0.0232, "step": 179360 }, { "epoch": 0.7484081748462418, "grad_norm": 0.8936008494741257, "learning_rate": 2.3120454778899006e-06, "loss": 0.0217, "step": 179365 }, { "epoch": 0.748429037561232, "grad_norm": 0.8148989185680946, "learning_rate": 2.3120132529570734e-06, "loss": 0.0268, "step": 179370 }, { "epoch": 0.7484499002762224, "grad_norm": 0.5965753921880491, "learning_rate": 2.311981029371648e-06, "loss": 0.0165, "step": 179375 }, { "epoch": 0.7484707629912126, "grad_norm": 0.415141295824568, "learning_rate": 2.311948807133529e-06, "loss": 0.0161, "step": 179380 }, { "epoch": 0.7484916257062029, "grad_norm": 0.8171658797015176, "learning_rate": 2.3119165862426233e-06, "loss": 0.0218, "step": 179385 }, { "epoch": 0.7485124884211932, "grad_norm": 0.5578099872812287, "learning_rate": 2.3118843666988366e-06, "loss": 0.0207, "step": 179390 }, { "epoch": 0.7485333511361835, "grad_norm": 0.6555717182736297, "learning_rate": 2.3118521485020757e-06, "loss": 0.0167, "step": 179395 }, { "epoch": 0.7485542138511737, "grad_norm": 0.6379154707880755, "learning_rate": 2.311819931652246e-06, "loss": 0.0192, "step": 179400 }, { "epoch": 0.7485750765661641, "grad_norm": 0.5317144197209507, "learning_rate": 2.311787716149254e-06, "loss": 0.02, "step": 179405 }, { "epoch": 0.7485959392811543, "grad_norm": 0.5025788989605287, "learning_rate": 2.311755501993006e-06, "loss": 0.0222, "step": 179410 }, { "epoch": 0.7486168019961446, "grad_norm": 0.6630156556304568, "learning_rate": 2.3117232891834075e-06, "loss": 0.0173, "step": 179415 }, { "epoch": 0.7486376647111348, "grad_norm": 0.5942006188388149, "learning_rate": 2.311691077720365e-06, "loss": 0.0213, "step": 179420 }, { "epoch": 0.7486585274261252, "grad_norm": 1.117942377972517, "learning_rate": 2.311658867603785e-06, "loss": 0.0245, "step": 179425 }, { "epoch": 0.7486793901411154, "grad_norm": 0.5286322557648868, "learning_rate": 2.3116266588335744e-06, "loss": 0.0237, "step": 179430 }, { "epoch": 0.7487002528561056, "grad_norm": 0.5863708435596905, "learning_rate": 2.3115944514096373e-06, "loss": 0.0239, "step": 179435 }, { "epoch": 0.748721115571096, "grad_norm": 0.6155818978387745, "learning_rate": 2.3115622453318817e-06, "loss": 0.0189, "step": 179440 }, { "epoch": 0.7487419782860862, "grad_norm": 1.1398069899153722, "learning_rate": 2.3115300406002134e-06, "loss": 0.0198, "step": 179445 }, { "epoch": 0.7487628410010765, "grad_norm": 0.9675573823988833, "learning_rate": 2.311497837214538e-06, "loss": 0.0161, "step": 179450 }, { "epoch": 0.7487837037160668, "grad_norm": 0.8828139678477003, "learning_rate": 2.3114656351747626e-06, "loss": 0.0211, "step": 179455 }, { "epoch": 0.7488045664310571, "grad_norm": 0.5257507873489388, "learning_rate": 2.3114334344807932e-06, "loss": 0.0189, "step": 179460 }, { "epoch": 0.7488254291460473, "grad_norm": 0.6780391873181624, "learning_rate": 2.3114012351325356e-06, "loss": 0.018, "step": 179465 }, { "epoch": 0.7488462918610377, "grad_norm": 0.6537480251174379, "learning_rate": 2.3113690371298966e-06, "loss": 0.02, "step": 179470 }, { "epoch": 0.7488671545760279, "grad_norm": 0.5883330426132607, "learning_rate": 2.311336840472782e-06, "loss": 0.0204, "step": 179475 }, { "epoch": 0.7488880172910182, "grad_norm": 0.6195942884403205, "learning_rate": 2.3113046451610993e-06, "loss": 0.0258, "step": 179480 }, { "epoch": 0.7489088800060084, "grad_norm": 0.436584821439427, "learning_rate": 2.311272451194753e-06, "loss": 0.0228, "step": 179485 }, { "epoch": 0.7489297427209988, "grad_norm": 0.5148322937404227, "learning_rate": 2.3112402585736507e-06, "loss": 0.0153, "step": 179490 }, { "epoch": 0.748950605435989, "grad_norm": 0.9335864035749022, "learning_rate": 2.3112080672976983e-06, "loss": 0.0211, "step": 179495 }, { "epoch": 0.7489714681509793, "grad_norm": 0.4144658269344026, "learning_rate": 2.311175877366802e-06, "loss": 0.0195, "step": 179500 }, { "epoch": 0.7489923308659696, "grad_norm": 1.1218496866728407, "learning_rate": 2.3111436887808685e-06, "loss": 0.0228, "step": 179505 }, { "epoch": 0.7490131935809599, "grad_norm": 0.45501696522423585, "learning_rate": 2.311111501539804e-06, "loss": 0.0203, "step": 179510 }, { "epoch": 0.7490340562959501, "grad_norm": 0.8856541907327443, "learning_rate": 2.3110793156435145e-06, "loss": 0.0232, "step": 179515 }, { "epoch": 0.7490549190109405, "grad_norm": 1.5506498478544546, "learning_rate": 2.311047131091907e-06, "loss": 0.0225, "step": 179520 }, { "epoch": 0.7490757817259307, "grad_norm": 2.2957354650950816, "learning_rate": 2.3110149478848865e-06, "loss": 0.0192, "step": 179525 }, { "epoch": 0.749096644440921, "grad_norm": 0.7261865599898558, "learning_rate": 2.310982766022361e-06, "loss": 0.0202, "step": 179530 }, { "epoch": 0.7491175071559112, "grad_norm": 0.6830463346654485, "learning_rate": 2.310950585504237e-06, "loss": 0.0181, "step": 179535 }, { "epoch": 0.7491383698709015, "grad_norm": 0.4181932151087689, "learning_rate": 2.3109184063304193e-06, "loss": 0.0244, "step": 179540 }, { "epoch": 0.7491592325858918, "grad_norm": 0.7588144278830299, "learning_rate": 2.3108862285008156e-06, "loss": 0.0355, "step": 179545 }, { "epoch": 0.749180095300882, "grad_norm": 1.0847648416532545, "learning_rate": 2.3108540520153317e-06, "loss": 0.0264, "step": 179550 }, { "epoch": 0.7492009580158724, "grad_norm": 1.197981759274872, "learning_rate": 2.3108218768738743e-06, "loss": 0.0174, "step": 179555 }, { "epoch": 0.7492218207308626, "grad_norm": 0.7388676225739823, "learning_rate": 2.31078970307635e-06, "loss": 0.0222, "step": 179560 }, { "epoch": 0.7492426834458529, "grad_norm": 1.75068719578371, "learning_rate": 2.3107575306226644e-06, "loss": 0.016, "step": 179565 }, { "epoch": 0.7492635461608432, "grad_norm": 0.3437509025719967, "learning_rate": 2.310725359512725e-06, "loss": 0.0188, "step": 179570 }, { "epoch": 0.7492844088758335, "grad_norm": 1.4228772447126767, "learning_rate": 2.3106931897464375e-06, "loss": 0.0276, "step": 179575 }, { "epoch": 0.7493052715908237, "grad_norm": 0.927420289904611, "learning_rate": 2.3106610213237094e-06, "loss": 0.0239, "step": 179580 }, { "epoch": 0.7493261343058141, "grad_norm": 1.1258206330630607, "learning_rate": 2.3106288542444455e-06, "loss": 0.0213, "step": 179585 }, { "epoch": 0.7493469970208043, "grad_norm": 0.3968570172106386, "learning_rate": 2.310596688508554e-06, "loss": 0.0221, "step": 179590 }, { "epoch": 0.7493678597357946, "grad_norm": 0.727264496121745, "learning_rate": 2.3105645241159406e-06, "loss": 0.0207, "step": 179595 }, { "epoch": 0.7493887224507848, "grad_norm": 0.4822177399452071, "learning_rate": 2.3105323610665117e-06, "loss": 0.0146, "step": 179600 }, { "epoch": 0.7494095851657752, "grad_norm": 1.0471195457334492, "learning_rate": 2.310500199360174e-06, "loss": 0.0263, "step": 179605 }, { "epoch": 0.7494304478807654, "grad_norm": 0.7332919334269926, "learning_rate": 2.3104680389968337e-06, "loss": 0.0207, "step": 179610 }, { "epoch": 0.7494513105957556, "grad_norm": 0.44126678256860674, "learning_rate": 2.3104358799763983e-06, "loss": 0.0166, "step": 179615 }, { "epoch": 0.749472173310746, "grad_norm": 0.5753936706250861, "learning_rate": 2.3104037222987728e-06, "loss": 0.0171, "step": 179620 }, { "epoch": 0.7494930360257362, "grad_norm": 0.24998341965795476, "learning_rate": 2.3103715659638655e-06, "loss": 0.0156, "step": 179625 }, { "epoch": 0.7495138987407265, "grad_norm": 0.6610777248907692, "learning_rate": 2.3103394109715817e-06, "loss": 0.0199, "step": 179630 }, { "epoch": 0.7495347614557168, "grad_norm": 0.5817866159818147, "learning_rate": 2.3103072573218286e-06, "loss": 0.0228, "step": 179635 }, { "epoch": 0.7495556241707071, "grad_norm": 0.37607569092126086, "learning_rate": 2.3102751050145126e-06, "loss": 0.021, "step": 179640 }, { "epoch": 0.7495764868856973, "grad_norm": 1.4658397886381662, "learning_rate": 2.31024295404954e-06, "loss": 0.0207, "step": 179645 }, { "epoch": 0.7495973496006877, "grad_norm": 0.5237697966961177, "learning_rate": 2.3102108044268175e-06, "loss": 0.0164, "step": 179650 }, { "epoch": 0.7496182123156779, "grad_norm": 0.3658860012717907, "learning_rate": 2.310178656146252e-06, "loss": 0.0163, "step": 179655 }, { "epoch": 0.7496390750306682, "grad_norm": 0.42434194778067824, "learning_rate": 2.3101465092077502e-06, "loss": 0.0196, "step": 179660 }, { "epoch": 0.7496599377456584, "grad_norm": 0.693796120749348, "learning_rate": 2.3101143636112184e-06, "loss": 0.0283, "step": 179665 }, { "epoch": 0.7496808004606488, "grad_norm": 0.8236721753362666, "learning_rate": 2.3100822193565633e-06, "loss": 0.0179, "step": 179670 }, { "epoch": 0.749701663175639, "grad_norm": 0.4961698048646101, "learning_rate": 2.310050076443692e-06, "loss": 0.0222, "step": 179675 }, { "epoch": 0.7497225258906293, "grad_norm": 0.5803185745358728, "learning_rate": 2.31001793487251e-06, "loss": 0.0205, "step": 179680 }, { "epoch": 0.7497433886056196, "grad_norm": 0.2757562223440398, "learning_rate": 2.309985794642925e-06, "loss": 0.0182, "step": 179685 }, { "epoch": 0.7497642513206099, "grad_norm": 0.6684202444041155, "learning_rate": 2.309953655754843e-06, "loss": 0.02, "step": 179690 }, { "epoch": 0.7497851140356001, "grad_norm": 0.4903293779725172, "learning_rate": 2.309921518208172e-06, "loss": 0.0197, "step": 179695 }, { "epoch": 0.7498059767505905, "grad_norm": 0.4508273151791693, "learning_rate": 2.309889382002817e-06, "loss": 0.0218, "step": 179700 }, { "epoch": 0.7498268394655807, "grad_norm": 0.7929653311583182, "learning_rate": 2.3098572471386857e-06, "loss": 0.0251, "step": 179705 }, { "epoch": 0.749847702180571, "grad_norm": 0.5393806681307141, "learning_rate": 2.3098251136156844e-06, "loss": 0.0182, "step": 179710 }, { "epoch": 0.7498685648955612, "grad_norm": 0.5747010890797515, "learning_rate": 2.3097929814337196e-06, "loss": 0.0242, "step": 179715 }, { "epoch": 0.7498894276105516, "grad_norm": 0.6203635839211042, "learning_rate": 2.309760850592699e-06, "loss": 0.0283, "step": 179720 }, { "epoch": 0.7499102903255418, "grad_norm": 0.3254490349305844, "learning_rate": 2.3097287210925286e-06, "loss": 0.0206, "step": 179725 }, { "epoch": 0.749931153040532, "grad_norm": 0.5160144452357561, "learning_rate": 2.309696592933115e-06, "loss": 0.017, "step": 179730 }, { "epoch": 0.7499520157555224, "grad_norm": 0.2854500479199812, "learning_rate": 2.3096644661143657e-06, "loss": 0.0163, "step": 179735 }, { "epoch": 0.7499728784705126, "grad_norm": 0.5551764441350413, "learning_rate": 2.309632340636187e-06, "loss": 0.0175, "step": 179740 }, { "epoch": 0.7499937411855029, "grad_norm": 0.31248215315249106, "learning_rate": 2.3096002164984855e-06, "loss": 0.0202, "step": 179745 }, { "epoch": 0.7500146039004932, "grad_norm": 1.3721980281960906, "learning_rate": 2.3095680937011676e-06, "loss": 0.0216, "step": 179750 }, { "epoch": 0.7500354666154835, "grad_norm": 0.4103560310840045, "learning_rate": 2.3095359722441413e-06, "loss": 0.0262, "step": 179755 }, { "epoch": 0.7500563293304737, "grad_norm": 0.6800056087721884, "learning_rate": 2.309503852127312e-06, "loss": 0.0221, "step": 179760 }, { "epoch": 0.7500771920454641, "grad_norm": 0.5603710828175754, "learning_rate": 2.3094717333505877e-06, "loss": 0.0153, "step": 179765 }, { "epoch": 0.7500980547604543, "grad_norm": 0.46429418442090525, "learning_rate": 2.309439615913875e-06, "loss": 0.0208, "step": 179770 }, { "epoch": 0.7501189174754446, "grad_norm": 0.9743630902968847, "learning_rate": 2.3094074998170802e-06, "loss": 0.0191, "step": 179775 }, { "epoch": 0.7501397801904348, "grad_norm": 0.6790443222074289, "learning_rate": 2.3093753850601104e-06, "loss": 0.0254, "step": 179780 }, { "epoch": 0.7501606429054252, "grad_norm": 0.34229120273455715, "learning_rate": 2.309343271642872e-06, "loss": 0.0147, "step": 179785 }, { "epoch": 0.7501815056204154, "grad_norm": 0.7762997059073766, "learning_rate": 2.309311159565273e-06, "loss": 0.0171, "step": 179790 }, { "epoch": 0.7502023683354057, "grad_norm": 0.3965981713893988, "learning_rate": 2.3092790488272194e-06, "loss": 0.0279, "step": 179795 }, { "epoch": 0.750223231050396, "grad_norm": 0.6509522476413897, "learning_rate": 2.3092469394286185e-06, "loss": 0.0166, "step": 179800 }, { "epoch": 0.7502440937653863, "grad_norm": 0.9426165150788248, "learning_rate": 2.309214831369377e-06, "loss": 0.0281, "step": 179805 }, { "epoch": 0.7502649564803765, "grad_norm": 0.43959995363911236, "learning_rate": 2.309182724649401e-06, "loss": 0.0208, "step": 179810 }, { "epoch": 0.7502858191953669, "grad_norm": 0.7788659105454605, "learning_rate": 2.3091506192685984e-06, "loss": 0.0177, "step": 179815 }, { "epoch": 0.7503066819103571, "grad_norm": 0.482617480220886, "learning_rate": 2.309118515226876e-06, "loss": 0.0175, "step": 179820 }, { "epoch": 0.7503275446253473, "grad_norm": 0.6204919442062672, "learning_rate": 2.3090864125241405e-06, "loss": 0.0182, "step": 179825 }, { "epoch": 0.7503484073403377, "grad_norm": 0.7911593211437453, "learning_rate": 2.3090543111602985e-06, "loss": 0.0249, "step": 179830 }, { "epoch": 0.7503692700553279, "grad_norm": 0.9911266814116995, "learning_rate": 2.3090222111352572e-06, "loss": 0.0182, "step": 179835 }, { "epoch": 0.7503901327703182, "grad_norm": 0.8383646025674423, "learning_rate": 2.308990112448925e-06, "loss": 0.0215, "step": 179840 }, { "epoch": 0.7504109954853084, "grad_norm": 0.7737009767319566, "learning_rate": 2.3089580151012057e-06, "loss": 0.0144, "step": 179845 }, { "epoch": 0.7504318582002988, "grad_norm": 0.8972936829399518, "learning_rate": 2.3089259190920087e-06, "loss": 0.0288, "step": 179850 }, { "epoch": 0.750452720915289, "grad_norm": 0.4556040248984258, "learning_rate": 2.308893824421241e-06, "loss": 0.0204, "step": 179855 }, { "epoch": 0.7504735836302793, "grad_norm": 0.8795200972703942, "learning_rate": 2.3088617310888083e-06, "loss": 0.0272, "step": 179860 }, { "epoch": 0.7504944463452696, "grad_norm": 0.46188950533100187, "learning_rate": 2.308829639094618e-06, "loss": 0.0222, "step": 179865 }, { "epoch": 0.7505153090602599, "grad_norm": 1.1831249987789454, "learning_rate": 2.308797548438578e-06, "loss": 0.0197, "step": 179870 }, { "epoch": 0.7505361717752501, "grad_norm": 0.8202371380744753, "learning_rate": 2.308765459120594e-06, "loss": 0.0217, "step": 179875 }, { "epoch": 0.7505570344902405, "grad_norm": 0.726883327486981, "learning_rate": 2.3087333711405736e-06, "loss": 0.0202, "step": 179880 }, { "epoch": 0.7505778972052307, "grad_norm": 0.5050961487774237, "learning_rate": 2.3087012844984243e-06, "loss": 0.0176, "step": 179885 }, { "epoch": 0.750598759920221, "grad_norm": 0.7718771442938555, "learning_rate": 2.3086691991940525e-06, "loss": 0.0281, "step": 179890 }, { "epoch": 0.7506196226352112, "grad_norm": 0.8793391316962936, "learning_rate": 2.3086371152273655e-06, "loss": 0.026, "step": 179895 }, { "epoch": 0.7506404853502016, "grad_norm": 0.6863480701754743, "learning_rate": 2.30860503259827e-06, "loss": 0.0197, "step": 179900 }, { "epoch": 0.7506613480651918, "grad_norm": 3.084988252511734, "learning_rate": 2.3085729513066734e-06, "loss": 0.0206, "step": 179905 }, { "epoch": 0.750682210780182, "grad_norm": 0.7637292768213827, "learning_rate": 2.3085408713524827e-06, "loss": 0.0244, "step": 179910 }, { "epoch": 0.7507030734951724, "grad_norm": 0.5275473816920196, "learning_rate": 2.3085087927356046e-06, "loss": 0.0195, "step": 179915 }, { "epoch": 0.7507239362101626, "grad_norm": 1.0330462402960294, "learning_rate": 2.308476715455947e-06, "loss": 0.0219, "step": 179920 }, { "epoch": 0.7507447989251529, "grad_norm": 0.49030832739972846, "learning_rate": 2.308444639513417e-06, "loss": 0.0214, "step": 179925 }, { "epoch": 0.7507656616401432, "grad_norm": 0.31597711369127807, "learning_rate": 2.3084125649079207e-06, "loss": 0.0215, "step": 179930 }, { "epoch": 0.7507865243551335, "grad_norm": 0.3043433215152657, "learning_rate": 2.308380491639366e-06, "loss": 0.0233, "step": 179935 }, { "epoch": 0.7508073870701237, "grad_norm": 0.3922634499023656, "learning_rate": 2.30834841970766e-06, "loss": 0.0236, "step": 179940 }, { "epoch": 0.7508282497851141, "grad_norm": 0.3988031103114515, "learning_rate": 2.308316349112709e-06, "loss": 0.0219, "step": 179945 }, { "epoch": 0.7508491125001043, "grad_norm": 0.7729004680987017, "learning_rate": 2.308284279854421e-06, "loss": 0.0277, "step": 179950 }, { "epoch": 0.7508699752150946, "grad_norm": 0.6019658632778448, "learning_rate": 2.3082522119327035e-06, "loss": 0.0247, "step": 179955 }, { "epoch": 0.7508908379300848, "grad_norm": 0.3975950990953601, "learning_rate": 2.308220145347463e-06, "loss": 0.0241, "step": 179960 }, { "epoch": 0.7509117006450752, "grad_norm": 0.9031444883465198, "learning_rate": 2.3081880800986062e-06, "loss": 0.0246, "step": 179965 }, { "epoch": 0.7509325633600654, "grad_norm": 0.4348986478189012, "learning_rate": 2.308156016186041e-06, "loss": 0.0173, "step": 179970 }, { "epoch": 0.7509534260750557, "grad_norm": 0.47162680832874937, "learning_rate": 2.308123953609675e-06, "loss": 0.019, "step": 179975 }, { "epoch": 0.750974288790046, "grad_norm": 0.5393028308841594, "learning_rate": 2.308091892369414e-06, "loss": 0.02, "step": 179980 }, { "epoch": 0.7509951515050363, "grad_norm": 1.3728429806063798, "learning_rate": 2.3080598324651663e-06, "loss": 0.022, "step": 179985 }, { "epoch": 0.7510160142200265, "grad_norm": 0.25525952404387514, "learning_rate": 2.3080277738968394e-06, "loss": 0.0235, "step": 179990 }, { "epoch": 0.7510368769350169, "grad_norm": 0.3556943744847567, "learning_rate": 2.3079957166643397e-06, "loss": 0.0207, "step": 179995 }, { "epoch": 0.7510577396500071, "grad_norm": 0.456730684103053, "learning_rate": 2.307963660767575e-06, "loss": 0.0219, "step": 180000 }, { "epoch": 0.7510786023649973, "grad_norm": 0.39120091700446863, "learning_rate": 2.3079316062064516e-06, "loss": 0.0173, "step": 180005 }, { "epoch": 0.7510994650799876, "grad_norm": 0.8664489605758293, "learning_rate": 2.3078995529808777e-06, "loss": 0.0262, "step": 180010 }, { "epoch": 0.751120327794978, "grad_norm": 0.41831347608927694, "learning_rate": 2.30786750109076e-06, "loss": 0.0163, "step": 180015 }, { "epoch": 0.7511411905099682, "grad_norm": 0.6702324375916109, "learning_rate": 2.307835450536007e-06, "loss": 0.0226, "step": 180020 }, { "epoch": 0.7511620532249584, "grad_norm": 0.9089708131720138, "learning_rate": 2.3078034013165236e-06, "loss": 0.0193, "step": 180025 }, { "epoch": 0.7511829159399488, "grad_norm": 0.5528836736416142, "learning_rate": 2.3077713534322196e-06, "loss": 0.0235, "step": 180030 }, { "epoch": 0.751203778654939, "grad_norm": 0.7314031737895267, "learning_rate": 2.3077393068830005e-06, "loss": 0.0175, "step": 180035 }, { "epoch": 0.7512246413699293, "grad_norm": 0.651459161946963, "learning_rate": 2.307707261668775e-06, "loss": 0.0216, "step": 180040 }, { "epoch": 0.7512455040849196, "grad_norm": 0.4079742505824438, "learning_rate": 2.3076752177894493e-06, "loss": 0.0186, "step": 180045 }, { "epoch": 0.7512663667999099, "grad_norm": 0.4987077242552992, "learning_rate": 2.307643175244931e-06, "loss": 0.0162, "step": 180050 }, { "epoch": 0.7512872295149001, "grad_norm": 0.4116835757684578, "learning_rate": 2.3076111340351282e-06, "loss": 0.0215, "step": 180055 }, { "epoch": 0.7513080922298905, "grad_norm": 1.0903819440998297, "learning_rate": 2.3075790941599473e-06, "loss": 0.0214, "step": 180060 }, { "epoch": 0.7513289549448807, "grad_norm": 0.8076041434509214, "learning_rate": 2.307547055619296e-06, "loss": 0.024, "step": 180065 }, { "epoch": 0.751349817659871, "grad_norm": 0.4960813764468609, "learning_rate": 2.3075150184130814e-06, "loss": 0.0211, "step": 180070 }, { "epoch": 0.7513706803748612, "grad_norm": 0.39580707092155354, "learning_rate": 2.307482982541211e-06, "loss": 0.0173, "step": 180075 }, { "epoch": 0.7513915430898516, "grad_norm": 0.9453523127800836, "learning_rate": 2.3074509480035926e-06, "loss": 0.0241, "step": 180080 }, { "epoch": 0.7514124058048418, "grad_norm": 0.434822092190958, "learning_rate": 2.3074189148001335e-06, "loss": 0.0181, "step": 180085 }, { "epoch": 0.751433268519832, "grad_norm": 0.6038556733039881, "learning_rate": 2.3073868829307403e-06, "loss": 0.0229, "step": 180090 }, { "epoch": 0.7514541312348224, "grad_norm": 0.45851473598858017, "learning_rate": 2.307354852395321e-06, "loss": 0.0195, "step": 180095 }, { "epoch": 0.7514749939498127, "grad_norm": 0.45578996393317545, "learning_rate": 2.307322823193783e-06, "loss": 0.0165, "step": 180100 }, { "epoch": 0.7514958566648029, "grad_norm": 1.0219233075872938, "learning_rate": 2.307290795326034e-06, "loss": 0.0176, "step": 180105 }, { "epoch": 0.7515167193797933, "grad_norm": 1.254972166734762, "learning_rate": 2.3072587687919807e-06, "loss": 0.0185, "step": 180110 }, { "epoch": 0.7515375820947835, "grad_norm": 0.40688718738365304, "learning_rate": 2.307226743591532e-06, "loss": 0.0165, "step": 180115 }, { "epoch": 0.7515584448097737, "grad_norm": 0.9898672654829701, "learning_rate": 2.3071947197245928e-06, "loss": 0.028, "step": 180120 }, { "epoch": 0.7515793075247641, "grad_norm": 0.5609027320290151, "learning_rate": 2.307162697191073e-06, "loss": 0.021, "step": 180125 }, { "epoch": 0.7516001702397543, "grad_norm": 0.8176638893101533, "learning_rate": 2.307130675990879e-06, "loss": 0.0399, "step": 180130 }, { "epoch": 0.7516210329547446, "grad_norm": 1.0130255660924132, "learning_rate": 2.3070986561239185e-06, "loss": 0.0158, "step": 180135 }, { "epoch": 0.7516418956697348, "grad_norm": 0.49911247762566063, "learning_rate": 2.3070666375900985e-06, "loss": 0.022, "step": 180140 }, { "epoch": 0.7516627583847252, "grad_norm": 0.8981679835695218, "learning_rate": 2.307034620389327e-06, "loss": 0.0246, "step": 180145 }, { "epoch": 0.7516836210997154, "grad_norm": 0.6843533189997252, "learning_rate": 2.307002604521512e-06, "loss": 0.0153, "step": 180150 }, { "epoch": 0.7517044838147057, "grad_norm": 0.620386658510176, "learning_rate": 2.30697058998656e-06, "loss": 0.0225, "step": 180155 }, { "epoch": 0.751725346529696, "grad_norm": 0.589893544668705, "learning_rate": 2.306938576784379e-06, "loss": 0.0172, "step": 180160 }, { "epoch": 0.7517462092446863, "grad_norm": 0.5249994839383681, "learning_rate": 2.3069065649148765e-06, "loss": 0.0185, "step": 180165 }, { "epoch": 0.7517670719596765, "grad_norm": 0.5367644854092667, "learning_rate": 2.3068745543779593e-06, "loss": 0.0225, "step": 180170 }, { "epoch": 0.7517879346746669, "grad_norm": 0.7117263058816744, "learning_rate": 2.3068425451735365e-06, "loss": 0.0278, "step": 180175 }, { "epoch": 0.7518087973896571, "grad_norm": 1.0181695187832034, "learning_rate": 2.306810537301514e-06, "loss": 0.0237, "step": 180180 }, { "epoch": 0.7518296601046474, "grad_norm": 0.8181108024700792, "learning_rate": 2.3067785307618006e-06, "loss": 0.0297, "step": 180185 }, { "epoch": 0.7518505228196376, "grad_norm": 0.7768163839087886, "learning_rate": 2.306746525554304e-06, "loss": 0.0186, "step": 180190 }, { "epoch": 0.751871385534628, "grad_norm": 0.6329488660056352, "learning_rate": 2.3067145216789307e-06, "loss": 0.0251, "step": 180195 }, { "epoch": 0.7518922482496182, "grad_norm": 0.5402185113305266, "learning_rate": 2.306682519135589e-06, "loss": 0.0139, "step": 180200 }, { "epoch": 0.7519131109646084, "grad_norm": 0.5935901480806022, "learning_rate": 2.3066505179241856e-06, "loss": 0.0246, "step": 180205 }, { "epoch": 0.7519339736795988, "grad_norm": 0.37118951009851714, "learning_rate": 2.30661851804463e-06, "loss": 0.016, "step": 180210 }, { "epoch": 0.751954836394589, "grad_norm": 0.602748090003819, "learning_rate": 2.306586519496828e-06, "loss": 0.0124, "step": 180215 }, { "epoch": 0.7519756991095793, "grad_norm": 0.5312130758079251, "learning_rate": 2.306554522280688e-06, "loss": 0.0237, "step": 180220 }, { "epoch": 0.7519965618245696, "grad_norm": 0.24996753928226675, "learning_rate": 2.3065225263961175e-06, "loss": 0.0183, "step": 180225 }, { "epoch": 0.7520174245395599, "grad_norm": 0.9972663632249471, "learning_rate": 2.3064905318430243e-06, "loss": 0.0171, "step": 180230 }, { "epoch": 0.7520382872545501, "grad_norm": 0.6627400980564907, "learning_rate": 2.306458538621315e-06, "loss": 0.0257, "step": 180235 }, { "epoch": 0.7520591499695405, "grad_norm": 0.7281231314747395, "learning_rate": 2.3064265467308997e-06, "loss": 0.0189, "step": 180240 }, { "epoch": 0.7520800126845307, "grad_norm": 0.6883741335862488, "learning_rate": 2.3063945561716836e-06, "loss": 0.0222, "step": 180245 }, { "epoch": 0.752100875399521, "grad_norm": 0.920737451333234, "learning_rate": 2.3063625669435756e-06, "loss": 0.0276, "step": 180250 }, { "epoch": 0.7521217381145112, "grad_norm": 0.7053370535137897, "learning_rate": 2.3063305790464834e-06, "loss": 0.0212, "step": 180255 }, { "epoch": 0.7521426008295016, "grad_norm": 1.0708543745678278, "learning_rate": 2.306298592480314e-06, "loss": 0.0267, "step": 180260 }, { "epoch": 0.7521634635444918, "grad_norm": 0.6038874435878803, "learning_rate": 2.306266607244976e-06, "loss": 0.019, "step": 180265 }, { "epoch": 0.7521843262594821, "grad_norm": 0.5974648399425102, "learning_rate": 2.3062346233403763e-06, "loss": 0.0291, "step": 180270 }, { "epoch": 0.7522051889744724, "grad_norm": 0.3710377296286532, "learning_rate": 2.306202640766423e-06, "loss": 0.02, "step": 180275 }, { "epoch": 0.7522260516894627, "grad_norm": 0.5718891295730746, "learning_rate": 2.306170659523024e-06, "loss": 0.0268, "step": 180280 }, { "epoch": 0.7522469144044529, "grad_norm": 0.8269187093208543, "learning_rate": 2.306138679610087e-06, "loss": 0.0158, "step": 180285 }, { "epoch": 0.7522677771194433, "grad_norm": 0.7019408653272837, "learning_rate": 2.306106701027519e-06, "loss": 0.0182, "step": 180290 }, { "epoch": 0.7522886398344335, "grad_norm": 0.5122696515414603, "learning_rate": 2.306074723775229e-06, "loss": 0.016, "step": 180295 }, { "epoch": 0.7523095025494237, "grad_norm": 1.0445961423860732, "learning_rate": 2.3060427478531237e-06, "loss": 0.0292, "step": 180300 }, { "epoch": 0.7523303652644141, "grad_norm": 0.45278388944269493, "learning_rate": 2.3060107732611115e-06, "loss": 0.0169, "step": 180305 }, { "epoch": 0.7523512279794043, "grad_norm": 0.8296669105922121, "learning_rate": 2.3059787999991002e-06, "loss": 0.0241, "step": 180310 }, { "epoch": 0.7523720906943946, "grad_norm": 1.3056839967264957, "learning_rate": 2.305946828066997e-06, "loss": 0.0251, "step": 180315 }, { "epoch": 0.7523929534093848, "grad_norm": 0.5227187340037005, "learning_rate": 2.3059148574647113e-06, "loss": 0.0154, "step": 180320 }, { "epoch": 0.7524138161243752, "grad_norm": 1.1922007692451013, "learning_rate": 2.305882888192148e-06, "loss": 0.0243, "step": 180325 }, { "epoch": 0.7524346788393654, "grad_norm": 0.46586102016152736, "learning_rate": 2.3058509202492183e-06, "loss": 0.02, "step": 180330 }, { "epoch": 0.7524555415543557, "grad_norm": 0.8216678979852329, "learning_rate": 2.305818953635828e-06, "loss": 0.0174, "step": 180335 }, { "epoch": 0.752476404269346, "grad_norm": 0.4461219116229311, "learning_rate": 2.305786988351885e-06, "loss": 0.0197, "step": 180340 }, { "epoch": 0.7524972669843363, "grad_norm": 0.4095606536465423, "learning_rate": 2.3057550243972973e-06, "loss": 0.0216, "step": 180345 }, { "epoch": 0.7525181296993265, "grad_norm": 0.7041613049529637, "learning_rate": 2.3057230617719735e-06, "loss": 0.0184, "step": 180350 }, { "epoch": 0.7525389924143169, "grad_norm": 0.344136675591172, "learning_rate": 2.3056911004758205e-06, "loss": 0.0163, "step": 180355 }, { "epoch": 0.7525598551293071, "grad_norm": 0.7833965131442838, "learning_rate": 2.3056591405087473e-06, "loss": 0.0182, "step": 180360 }, { "epoch": 0.7525807178442974, "grad_norm": 0.7504205714744009, "learning_rate": 2.3056271818706607e-06, "loss": 0.0252, "step": 180365 }, { "epoch": 0.7526015805592876, "grad_norm": 0.4849045400785636, "learning_rate": 2.305595224561469e-06, "loss": 0.0162, "step": 180370 }, { "epoch": 0.752622443274278, "grad_norm": 1.099887705598407, "learning_rate": 2.30556326858108e-06, "loss": 0.0235, "step": 180375 }, { "epoch": 0.7526433059892682, "grad_norm": 0.3648872399157759, "learning_rate": 2.305531313929402e-06, "loss": 0.0135, "step": 180380 }, { "epoch": 0.7526641687042585, "grad_norm": 0.6867281629272287, "learning_rate": 2.3054993606063426e-06, "loss": 0.018, "step": 180385 }, { "epoch": 0.7526850314192488, "grad_norm": 0.8734875493818846, "learning_rate": 2.30546740861181e-06, "loss": 0.0181, "step": 180390 }, { "epoch": 0.752705894134239, "grad_norm": 0.5511690703194355, "learning_rate": 2.305435457945711e-06, "loss": 0.0229, "step": 180395 }, { "epoch": 0.7527267568492293, "grad_norm": 0.6885049495986303, "learning_rate": 2.3054035086079554e-06, "loss": 0.0233, "step": 180400 }, { "epoch": 0.7527476195642196, "grad_norm": 0.4593310785814942, "learning_rate": 2.30537156059845e-06, "loss": 0.0175, "step": 180405 }, { "epoch": 0.7527684822792099, "grad_norm": 0.5195041797816194, "learning_rate": 2.305339613917103e-06, "loss": 0.0242, "step": 180410 }, { "epoch": 0.7527893449942001, "grad_norm": 0.7111897836039448, "learning_rate": 2.305307668563823e-06, "loss": 0.0366, "step": 180415 }, { "epoch": 0.7528102077091905, "grad_norm": 0.8594825739590173, "learning_rate": 2.3052757245385167e-06, "loss": 0.0177, "step": 180420 }, { "epoch": 0.7528310704241807, "grad_norm": 0.7115701491029832, "learning_rate": 2.3052437818410926e-06, "loss": 0.0187, "step": 180425 }, { "epoch": 0.752851933139171, "grad_norm": 0.9250429724974525, "learning_rate": 2.305211840471459e-06, "loss": 0.0263, "step": 180430 }, { "epoch": 0.7528727958541612, "grad_norm": 0.945009601930239, "learning_rate": 2.305179900429524e-06, "loss": 0.0204, "step": 180435 }, { "epoch": 0.7528936585691516, "grad_norm": 0.60558066470898, "learning_rate": 2.3051479617151956e-06, "loss": 0.0201, "step": 180440 }, { "epoch": 0.7529145212841418, "grad_norm": 0.7685623911649654, "learning_rate": 2.3051160243283814e-06, "loss": 0.0327, "step": 180445 }, { "epoch": 0.7529353839991321, "grad_norm": 0.5183857926933451, "learning_rate": 2.3050840882689893e-06, "loss": 0.0308, "step": 180450 }, { "epoch": 0.7529562467141224, "grad_norm": 1.3181511643725268, "learning_rate": 2.3050521535369284e-06, "loss": 0.0275, "step": 180455 }, { "epoch": 0.7529771094291127, "grad_norm": 0.2882115365281916, "learning_rate": 2.305020220132106e-06, "loss": 0.015, "step": 180460 }, { "epoch": 0.7529979721441029, "grad_norm": 0.6164232835413729, "learning_rate": 2.3049882880544297e-06, "loss": 0.0264, "step": 180465 }, { "epoch": 0.7530188348590933, "grad_norm": 0.4448577162020252, "learning_rate": 2.304956357303809e-06, "loss": 0.0199, "step": 180470 }, { "epoch": 0.7530396975740835, "grad_norm": 0.6604209346947342, "learning_rate": 2.3049244278801506e-06, "loss": 0.0189, "step": 180475 }, { "epoch": 0.7530605602890738, "grad_norm": 1.0405168312734248, "learning_rate": 2.304892499783363e-06, "loss": 0.0274, "step": 180480 }, { "epoch": 0.7530814230040641, "grad_norm": 1.2429145861897222, "learning_rate": 2.3048605730133545e-06, "loss": 0.0217, "step": 180485 }, { "epoch": 0.7531022857190544, "grad_norm": 0.5792303308427833, "learning_rate": 2.3048286475700335e-06, "loss": 0.0365, "step": 180490 }, { "epoch": 0.7531231484340446, "grad_norm": 0.7305439004744056, "learning_rate": 2.304796723453307e-06, "loss": 0.0213, "step": 180495 }, { "epoch": 0.7531440111490348, "grad_norm": 0.6718586411640921, "learning_rate": 2.3047648006630846e-06, "loss": 0.0195, "step": 180500 }, { "epoch": 0.7531648738640252, "grad_norm": 0.5439781865453392, "learning_rate": 2.3047328791992735e-06, "loss": 0.0177, "step": 180505 }, { "epoch": 0.7531857365790154, "grad_norm": 0.41574678104174667, "learning_rate": 2.304700959061782e-06, "loss": 0.0189, "step": 180510 }, { "epoch": 0.7532065992940057, "grad_norm": 0.8457903696841489, "learning_rate": 2.3046690402505185e-06, "loss": 0.0259, "step": 180515 }, { "epoch": 0.753227462008996, "grad_norm": 1.168022204993672, "learning_rate": 2.304637122765391e-06, "loss": 0.0215, "step": 180520 }, { "epoch": 0.7532483247239863, "grad_norm": 0.4312560829928545, "learning_rate": 2.304605206606308e-06, "loss": 0.0161, "step": 180525 }, { "epoch": 0.7532691874389765, "grad_norm": 0.6452313410008147, "learning_rate": 2.304573291773176e-06, "loss": 0.0223, "step": 180530 }, { "epoch": 0.7532900501539669, "grad_norm": 0.7103241832646334, "learning_rate": 2.3045413782659053e-06, "loss": 0.0176, "step": 180535 }, { "epoch": 0.7533109128689571, "grad_norm": 0.38044575572991146, "learning_rate": 2.304509466084404e-06, "loss": 0.0174, "step": 180540 }, { "epoch": 0.7533317755839474, "grad_norm": 0.6179133724452508, "learning_rate": 2.3044775552285787e-06, "loss": 0.0202, "step": 180545 }, { "epoch": 0.7533526382989376, "grad_norm": 0.6895141903311707, "learning_rate": 2.304445645698339e-06, "loss": 0.0186, "step": 180550 }, { "epoch": 0.753373501013928, "grad_norm": 0.6500381749003629, "learning_rate": 2.304413737493593e-06, "loss": 0.0186, "step": 180555 }, { "epoch": 0.7533943637289182, "grad_norm": 0.8322100276455365, "learning_rate": 2.304381830614248e-06, "loss": 0.0275, "step": 180560 }, { "epoch": 0.7534152264439085, "grad_norm": 1.444218356863986, "learning_rate": 2.304349925060213e-06, "loss": 0.0238, "step": 180565 }, { "epoch": 0.7534360891588988, "grad_norm": 0.26863963273357183, "learning_rate": 2.3043180208313966e-06, "loss": 0.0197, "step": 180570 }, { "epoch": 0.7534569518738891, "grad_norm": 0.21522202039232152, "learning_rate": 2.304286117927706e-06, "loss": 0.027, "step": 180575 }, { "epoch": 0.7534778145888793, "grad_norm": 0.657985343855906, "learning_rate": 2.3042542163490505e-06, "loss": 0.0256, "step": 180580 }, { "epoch": 0.7534986773038697, "grad_norm": 0.43394599723094357, "learning_rate": 2.3042223160953373e-06, "loss": 0.0196, "step": 180585 }, { "epoch": 0.7535195400188599, "grad_norm": 0.5082157536840377, "learning_rate": 2.304190417166476e-06, "loss": 0.0235, "step": 180590 }, { "epoch": 0.7535404027338501, "grad_norm": 0.4505565753789222, "learning_rate": 2.3041585195623743e-06, "loss": 0.015, "step": 180595 }, { "epoch": 0.7535612654488405, "grad_norm": 0.5590475075953228, "learning_rate": 2.3041266232829397e-06, "loss": 0.0393, "step": 180600 }, { "epoch": 0.7535821281638307, "grad_norm": 0.35550012711273377, "learning_rate": 2.3040947283280817e-06, "loss": 0.0157, "step": 180605 }, { "epoch": 0.753602990878821, "grad_norm": 1.0623159490065557, "learning_rate": 2.304062834697708e-06, "loss": 0.0273, "step": 180610 }, { "epoch": 0.7536238535938112, "grad_norm": 1.101178625839863, "learning_rate": 2.3040309423917274e-06, "loss": 0.0226, "step": 180615 }, { "epoch": 0.7536447163088016, "grad_norm": 0.46797263667196587, "learning_rate": 2.3039990514100477e-06, "loss": 0.0221, "step": 180620 }, { "epoch": 0.7536655790237918, "grad_norm": 0.5640518511262937, "learning_rate": 2.303967161752578e-06, "loss": 0.0203, "step": 180625 }, { "epoch": 0.7536864417387821, "grad_norm": 1.3468421498326002, "learning_rate": 2.3039352734192254e-06, "loss": 0.0233, "step": 180630 }, { "epoch": 0.7537073044537724, "grad_norm": 0.6347651205376051, "learning_rate": 2.3039033864099e-06, "loss": 0.0227, "step": 180635 }, { "epoch": 0.7537281671687627, "grad_norm": 0.6765082816747574, "learning_rate": 2.3038715007245084e-06, "loss": 0.0202, "step": 180640 }, { "epoch": 0.7537490298837529, "grad_norm": 0.3526069178672902, "learning_rate": 2.30383961636296e-06, "loss": 0.0177, "step": 180645 }, { "epoch": 0.7537698925987433, "grad_norm": 0.9175147017915963, "learning_rate": 2.303807733325163e-06, "loss": 0.0201, "step": 180650 }, { "epoch": 0.7537907553137335, "grad_norm": 0.8983333068126834, "learning_rate": 2.303775851611026e-06, "loss": 0.0227, "step": 180655 }, { "epoch": 0.7538116180287238, "grad_norm": 0.47771925364557727, "learning_rate": 2.3037439712204573e-06, "loss": 0.022, "step": 180660 }, { "epoch": 0.7538324807437141, "grad_norm": 0.7307387634967498, "learning_rate": 2.3037120921533645e-06, "loss": 0.0258, "step": 180665 }, { "epoch": 0.7538533434587044, "grad_norm": 0.5833200884611227, "learning_rate": 2.3036802144096574e-06, "loss": 0.0248, "step": 180670 }, { "epoch": 0.7538742061736946, "grad_norm": 0.5290878395844425, "learning_rate": 2.3036483379892436e-06, "loss": 0.019, "step": 180675 }, { "epoch": 0.7538950688886848, "grad_norm": 0.7415175265391305, "learning_rate": 2.303616462892032e-06, "loss": 0.0264, "step": 180680 }, { "epoch": 0.7539159316036752, "grad_norm": 0.5298307744393566, "learning_rate": 2.3035845891179304e-06, "loss": 0.0208, "step": 180685 }, { "epoch": 0.7539367943186654, "grad_norm": 0.4685340750340324, "learning_rate": 2.303552716666848e-06, "loss": 0.0211, "step": 180690 }, { "epoch": 0.7539576570336557, "grad_norm": 0.6885108258894894, "learning_rate": 2.3035208455386932e-06, "loss": 0.0178, "step": 180695 }, { "epoch": 0.753978519748646, "grad_norm": 0.39984981719967877, "learning_rate": 2.303488975733374e-06, "loss": 0.0176, "step": 180700 }, { "epoch": 0.7539993824636363, "grad_norm": 0.3802753827946876, "learning_rate": 2.303457107250799e-06, "loss": 0.0209, "step": 180705 }, { "epoch": 0.7540202451786265, "grad_norm": 0.4655107022807075, "learning_rate": 2.303425240090877e-06, "loss": 0.0253, "step": 180710 }, { "epoch": 0.7540411078936169, "grad_norm": 0.38241047672675665, "learning_rate": 2.3033933742535163e-06, "loss": 0.0161, "step": 180715 }, { "epoch": 0.7540619706086071, "grad_norm": 0.8928478896528631, "learning_rate": 2.3033615097386253e-06, "loss": 0.0188, "step": 180720 }, { "epoch": 0.7540828333235974, "grad_norm": 0.8387140016270638, "learning_rate": 2.303329646546113e-06, "loss": 0.0234, "step": 180725 }, { "epoch": 0.7541036960385876, "grad_norm": 0.9116575776850095, "learning_rate": 2.303297784675888e-06, "loss": 0.0226, "step": 180730 }, { "epoch": 0.754124558753578, "grad_norm": 0.6138545053505619, "learning_rate": 2.303265924127857e-06, "loss": 0.0185, "step": 180735 }, { "epoch": 0.7541454214685682, "grad_norm": 0.8170361515531812, "learning_rate": 2.3032340649019314e-06, "loss": 0.0213, "step": 180740 }, { "epoch": 0.7541662841835585, "grad_norm": 0.49683287778439744, "learning_rate": 2.3032022069980183e-06, "loss": 0.0279, "step": 180745 }, { "epoch": 0.7541871468985488, "grad_norm": 6.13780008037006, "learning_rate": 2.303170350416026e-06, "loss": 0.0222, "step": 180750 }, { "epoch": 0.7542080096135391, "grad_norm": 0.5462959960659188, "learning_rate": 2.3031384951558634e-06, "loss": 0.0158, "step": 180755 }, { "epoch": 0.7542288723285293, "grad_norm": 0.7077875925657352, "learning_rate": 2.3031066412174396e-06, "loss": 0.0307, "step": 180760 }, { "epoch": 0.7542497350435197, "grad_norm": 0.5473834457882208, "learning_rate": 2.303074788600663e-06, "loss": 0.0213, "step": 180765 }, { "epoch": 0.7542705977585099, "grad_norm": 0.43896350538834455, "learning_rate": 2.303042937305441e-06, "loss": 0.0215, "step": 180770 }, { "epoch": 0.7542914604735002, "grad_norm": 0.5752803167051875, "learning_rate": 2.303011087331684e-06, "loss": 0.0194, "step": 180775 }, { "epoch": 0.7543123231884905, "grad_norm": 1.057988801050018, "learning_rate": 2.3029792386792998e-06, "loss": 0.0233, "step": 180780 }, { "epoch": 0.7543331859034808, "grad_norm": 1.0536023037494726, "learning_rate": 2.3029473913481963e-06, "loss": 0.0165, "step": 180785 }, { "epoch": 0.754354048618471, "grad_norm": 0.7878370229896079, "learning_rate": 2.3029155453382833e-06, "loss": 0.0232, "step": 180790 }, { "epoch": 0.7543749113334612, "grad_norm": 0.8092572279851155, "learning_rate": 2.302883700649469e-06, "loss": 0.0209, "step": 180795 }, { "epoch": 0.7543957740484516, "grad_norm": 0.7110814123621538, "learning_rate": 2.3028518572816623e-06, "loss": 0.0186, "step": 180800 }, { "epoch": 0.7544166367634418, "grad_norm": 0.8398291582328403, "learning_rate": 2.3028200152347714e-06, "loss": 0.0229, "step": 180805 }, { "epoch": 0.7544374994784321, "grad_norm": 0.9456035065487374, "learning_rate": 2.3027881745087054e-06, "loss": 0.0243, "step": 180810 }, { "epoch": 0.7544583621934224, "grad_norm": 0.5629052398407931, "learning_rate": 2.302756335103373e-06, "loss": 0.0198, "step": 180815 }, { "epoch": 0.7544792249084127, "grad_norm": 0.9116574154906802, "learning_rate": 2.302724497018683e-06, "loss": 0.0211, "step": 180820 }, { "epoch": 0.7545000876234029, "grad_norm": 0.39763150663106306, "learning_rate": 2.3026926602545428e-06, "loss": 0.0257, "step": 180825 }, { "epoch": 0.7545209503383933, "grad_norm": 0.7339671273464485, "learning_rate": 2.302660824810863e-06, "loss": 0.0188, "step": 180830 }, { "epoch": 0.7545418130533835, "grad_norm": 1.0137925613604395, "learning_rate": 2.302628990687551e-06, "loss": 0.0217, "step": 180835 }, { "epoch": 0.7545626757683738, "grad_norm": 0.5157722942252445, "learning_rate": 2.3025971578845162e-06, "loss": 0.0206, "step": 180840 }, { "epoch": 0.7545835384833641, "grad_norm": 0.696293884843538, "learning_rate": 2.3025653264016672e-06, "loss": 0.018, "step": 180845 }, { "epoch": 0.7546044011983544, "grad_norm": 0.5837973290844394, "learning_rate": 2.302533496238913e-06, "loss": 0.02, "step": 180850 }, { "epoch": 0.7546252639133446, "grad_norm": 0.29454337757847876, "learning_rate": 2.3025016673961617e-06, "loss": 0.0161, "step": 180855 }, { "epoch": 0.7546461266283349, "grad_norm": 0.6782351954858094, "learning_rate": 2.3024698398733227e-06, "loss": 0.0169, "step": 180860 }, { "epoch": 0.7546669893433252, "grad_norm": 0.8933489068818568, "learning_rate": 2.3024380136703045e-06, "loss": 0.0395, "step": 180865 }, { "epoch": 0.7546878520583155, "grad_norm": 0.6727834728801463, "learning_rate": 2.3024061887870153e-06, "loss": 0.019, "step": 180870 }, { "epoch": 0.7547087147733057, "grad_norm": 0.8061454611587324, "learning_rate": 2.302374365223365e-06, "loss": 0.0214, "step": 180875 }, { "epoch": 0.754729577488296, "grad_norm": 0.9415199233789968, "learning_rate": 2.302342542979262e-06, "loss": 0.0278, "step": 180880 }, { "epoch": 0.7547504402032863, "grad_norm": 0.4465504307501102, "learning_rate": 2.3023107220546147e-06, "loss": 0.0157, "step": 180885 }, { "epoch": 0.7547713029182765, "grad_norm": 0.5294966189579938, "learning_rate": 2.3022789024493326e-06, "loss": 0.0196, "step": 180890 }, { "epoch": 0.7547921656332669, "grad_norm": 0.5541074413621903, "learning_rate": 2.3022470841633235e-06, "loss": 0.0216, "step": 180895 }, { "epoch": 0.7548130283482571, "grad_norm": 0.5602500612553063, "learning_rate": 2.3022152671964974e-06, "loss": 0.0125, "step": 180900 }, { "epoch": 0.7548338910632474, "grad_norm": 0.7326884677895987, "learning_rate": 2.302183451548763e-06, "loss": 0.0218, "step": 180905 }, { "epoch": 0.7548547537782376, "grad_norm": 0.550500629684181, "learning_rate": 2.302151637220028e-06, "loss": 0.0147, "step": 180910 }, { "epoch": 0.754875616493228, "grad_norm": 0.4253568062604855, "learning_rate": 2.3021198242102025e-06, "loss": 0.0156, "step": 180915 }, { "epoch": 0.7548964792082182, "grad_norm": 0.908710614920331, "learning_rate": 2.302088012519195e-06, "loss": 0.0228, "step": 180920 }, { "epoch": 0.7549173419232085, "grad_norm": 0.5205212100719121, "learning_rate": 2.3020562021469137e-06, "loss": 0.0237, "step": 180925 }, { "epoch": 0.7549382046381988, "grad_norm": 0.24862522082959246, "learning_rate": 2.3020243930932685e-06, "loss": 0.0186, "step": 180930 }, { "epoch": 0.7549590673531891, "grad_norm": 0.3039410172539892, "learning_rate": 2.3019925853581686e-06, "loss": 0.021, "step": 180935 }, { "epoch": 0.7549799300681793, "grad_norm": 0.45883841361417615, "learning_rate": 2.301960778941521e-06, "loss": 0.0199, "step": 180940 }, { "epoch": 0.7550007927831697, "grad_norm": 0.5180055631725752, "learning_rate": 2.3019289738432364e-06, "loss": 0.0225, "step": 180945 }, { "epoch": 0.7550216554981599, "grad_norm": 1.2523284215180377, "learning_rate": 2.301897170063224e-06, "loss": 0.0234, "step": 180950 }, { "epoch": 0.7550425182131502, "grad_norm": 0.6925762778524228, "learning_rate": 2.3018653676013906e-06, "loss": 0.0195, "step": 180955 }, { "epoch": 0.7550633809281405, "grad_norm": 0.30703384052813626, "learning_rate": 2.301833566457647e-06, "loss": 0.0162, "step": 180960 }, { "epoch": 0.7550842436431308, "grad_norm": 0.3523243200613994, "learning_rate": 2.3018017666319014e-06, "loss": 0.0132, "step": 180965 }, { "epoch": 0.755105106358121, "grad_norm": 0.5862449134210558, "learning_rate": 2.3017699681240633e-06, "loss": 0.0236, "step": 180970 }, { "epoch": 0.7551259690731112, "grad_norm": 0.3907321647287934, "learning_rate": 2.301738170934041e-06, "loss": 0.0159, "step": 180975 }, { "epoch": 0.7551468317881016, "grad_norm": 0.35487970024332066, "learning_rate": 2.301706375061744e-06, "loss": 0.0161, "step": 180980 }, { "epoch": 0.7551676945030918, "grad_norm": 0.9680533039047927, "learning_rate": 2.301674580507081e-06, "loss": 0.0166, "step": 180985 }, { "epoch": 0.7551885572180821, "grad_norm": 0.6241588698318522, "learning_rate": 2.301642787269961e-06, "loss": 0.0203, "step": 180990 }, { "epoch": 0.7552094199330724, "grad_norm": 0.7157959121242787, "learning_rate": 2.3016109953502933e-06, "loss": 0.018, "step": 180995 }, { "epoch": 0.7552302826480627, "grad_norm": 1.0557689327616016, "learning_rate": 2.301579204747987e-06, "loss": 0.0244, "step": 181000 }, { "epoch": 0.7552511453630529, "grad_norm": 0.8650343789982994, "learning_rate": 2.30154741546295e-06, "loss": 0.0216, "step": 181005 }, { "epoch": 0.7552720080780433, "grad_norm": 0.5281453314009547, "learning_rate": 2.301515627495093e-06, "loss": 0.0264, "step": 181010 }, { "epoch": 0.7552928707930335, "grad_norm": 0.40606731713589783, "learning_rate": 2.3014838408443234e-06, "loss": 0.0207, "step": 181015 }, { "epoch": 0.7553137335080238, "grad_norm": 1.4329917891261064, "learning_rate": 2.301452055510552e-06, "loss": 0.0221, "step": 181020 }, { "epoch": 0.7553345962230141, "grad_norm": 0.4708378595985651, "learning_rate": 2.301420271493686e-06, "loss": 0.0206, "step": 181025 }, { "epoch": 0.7553554589380044, "grad_norm": 0.8951248176944514, "learning_rate": 2.3013884887936363e-06, "loss": 0.0225, "step": 181030 }, { "epoch": 0.7553763216529946, "grad_norm": 0.7337575047468327, "learning_rate": 2.3013567074103107e-06, "loss": 0.0278, "step": 181035 }, { "epoch": 0.7553971843679849, "grad_norm": 0.6359812006749189, "learning_rate": 2.301324927343618e-06, "loss": 0.0207, "step": 181040 }, { "epoch": 0.7554180470829752, "grad_norm": 0.3275619789225748, "learning_rate": 2.3012931485934686e-06, "loss": 0.02, "step": 181045 }, { "epoch": 0.7554389097979655, "grad_norm": 0.9704299795991328, "learning_rate": 2.301261371159771e-06, "loss": 0.0205, "step": 181050 }, { "epoch": 0.7554597725129557, "grad_norm": 0.407554440042198, "learning_rate": 2.3012295950424342e-06, "loss": 0.0161, "step": 181055 }, { "epoch": 0.7554806352279461, "grad_norm": 0.5316072815631784, "learning_rate": 2.3011978202413674e-06, "loss": 0.0127, "step": 181060 }, { "epoch": 0.7555014979429363, "grad_norm": 1.0534378413796661, "learning_rate": 2.301166046756479e-06, "loss": 0.0356, "step": 181065 }, { "epoch": 0.7555223606579266, "grad_norm": 0.8832790340472254, "learning_rate": 2.30113427458768e-06, "loss": 0.0216, "step": 181070 }, { "epoch": 0.7555432233729169, "grad_norm": 0.9641757728010228, "learning_rate": 2.3011025037348775e-06, "loss": 0.0169, "step": 181075 }, { "epoch": 0.7555640860879071, "grad_norm": 0.7666676585494541, "learning_rate": 2.301070734197982e-06, "loss": 0.0183, "step": 181080 }, { "epoch": 0.7555849488028974, "grad_norm": 0.5235443419677491, "learning_rate": 2.3010389659769016e-06, "loss": 0.0203, "step": 181085 }, { "epoch": 0.7556058115178876, "grad_norm": 0.3520164626260633, "learning_rate": 2.3010071990715464e-06, "loss": 0.0202, "step": 181090 }, { "epoch": 0.755626674232878, "grad_norm": 0.8481787841360756, "learning_rate": 2.3009754334818256e-06, "loss": 0.0202, "step": 181095 }, { "epoch": 0.7556475369478682, "grad_norm": 0.46082405218216194, "learning_rate": 2.3009436692076478e-06, "loss": 0.0281, "step": 181100 }, { "epoch": 0.7556683996628585, "grad_norm": 0.7994441183803195, "learning_rate": 2.3009119062489223e-06, "loss": 0.0174, "step": 181105 }, { "epoch": 0.7556892623778488, "grad_norm": 0.3379024889909214, "learning_rate": 2.300880144605559e-06, "loss": 0.0194, "step": 181110 }, { "epoch": 0.7557101250928391, "grad_norm": 0.6388195036848328, "learning_rate": 2.3008483842774658e-06, "loss": 0.0133, "step": 181115 }, { "epoch": 0.7557309878078293, "grad_norm": 0.48807212888852336, "learning_rate": 2.3008166252645535e-06, "loss": 0.0197, "step": 181120 }, { "epoch": 0.7557518505228197, "grad_norm": 0.4447659416419651, "learning_rate": 2.3007848675667297e-06, "loss": 0.0159, "step": 181125 }, { "epoch": 0.7557727132378099, "grad_norm": 0.6251878640340612, "learning_rate": 2.300753111183905e-06, "loss": 0.0241, "step": 181130 }, { "epoch": 0.7557935759528002, "grad_norm": 0.6317704974116358, "learning_rate": 2.300721356115988e-06, "loss": 0.019, "step": 181135 }, { "epoch": 0.7558144386677905, "grad_norm": 0.832137417719092, "learning_rate": 2.3006896023628884e-06, "loss": 0.0208, "step": 181140 }, { "epoch": 0.7558353013827808, "grad_norm": 0.7963679777971464, "learning_rate": 2.300657849924515e-06, "loss": 0.0188, "step": 181145 }, { "epoch": 0.755856164097771, "grad_norm": 3.673147291953235, "learning_rate": 2.300626098800777e-06, "loss": 0.0189, "step": 181150 }, { "epoch": 0.7558770268127613, "grad_norm": 0.5493007324282239, "learning_rate": 2.3005943489915837e-06, "loss": 0.0207, "step": 181155 }, { "epoch": 0.7558978895277516, "grad_norm": 0.3773752786964633, "learning_rate": 2.3005626004968453e-06, "loss": 0.0274, "step": 181160 }, { "epoch": 0.7559187522427419, "grad_norm": 0.5322122657582823, "learning_rate": 2.3005308533164704e-06, "loss": 0.0196, "step": 181165 }, { "epoch": 0.7559396149577321, "grad_norm": 0.8748926652164825, "learning_rate": 2.3004991074503675e-06, "loss": 0.0195, "step": 181170 }, { "epoch": 0.7559604776727225, "grad_norm": 0.5882682056357081, "learning_rate": 2.3004673628984477e-06, "loss": 0.0231, "step": 181175 }, { "epoch": 0.7559813403877127, "grad_norm": 0.4451417269133173, "learning_rate": 2.3004356196606185e-06, "loss": 0.0238, "step": 181180 }, { "epoch": 0.7560022031027029, "grad_norm": 0.7992880047868238, "learning_rate": 2.300403877736791e-06, "loss": 0.023, "step": 181185 }, { "epoch": 0.7560230658176933, "grad_norm": 0.674245921004154, "learning_rate": 2.3003721371268733e-06, "loss": 0.0167, "step": 181190 }, { "epoch": 0.7560439285326835, "grad_norm": 0.32357659387302035, "learning_rate": 2.300340397830775e-06, "loss": 0.019, "step": 181195 }, { "epoch": 0.7560647912476738, "grad_norm": 0.7011189913605768, "learning_rate": 2.3003086598484056e-06, "loss": 0.0212, "step": 181200 }, { "epoch": 0.7560856539626641, "grad_norm": 0.8315020290317366, "learning_rate": 2.300276923179675e-06, "loss": 0.0212, "step": 181205 }, { "epoch": 0.7561065166776544, "grad_norm": 0.9179630763929846, "learning_rate": 2.300245187824492e-06, "loss": 0.0276, "step": 181210 }, { "epoch": 0.7561273793926446, "grad_norm": 0.40742018342727904, "learning_rate": 2.300213453782765e-06, "loss": 0.0151, "step": 181215 }, { "epoch": 0.7561482421076349, "grad_norm": 0.6501506577525062, "learning_rate": 2.3001817210544054e-06, "loss": 0.0153, "step": 181220 }, { "epoch": 0.7561691048226252, "grad_norm": 0.5344335776353262, "learning_rate": 2.3001499896393216e-06, "loss": 0.0225, "step": 181225 }, { "epoch": 0.7561899675376155, "grad_norm": 0.5633427378079781, "learning_rate": 2.3001182595374226e-06, "loss": 0.0193, "step": 181230 }, { "epoch": 0.7562108302526057, "grad_norm": 0.6566842234938023, "learning_rate": 2.3000865307486186e-06, "loss": 0.0204, "step": 181235 }, { "epoch": 0.7562316929675961, "grad_norm": 0.6268717569317348, "learning_rate": 2.3000548032728186e-06, "loss": 0.0234, "step": 181240 }, { "epoch": 0.7562525556825863, "grad_norm": 0.28247773494361555, "learning_rate": 2.300023077109933e-06, "loss": 0.0202, "step": 181245 }, { "epoch": 0.7562734183975766, "grad_norm": 0.5598759159723928, "learning_rate": 2.2999913522598697e-06, "loss": 0.0182, "step": 181250 }, { "epoch": 0.7562942811125669, "grad_norm": 0.6337694504705209, "learning_rate": 2.2999596287225387e-06, "loss": 0.0278, "step": 181255 }, { "epoch": 0.7563151438275572, "grad_norm": 0.7296055128974477, "learning_rate": 2.2999279064978504e-06, "loss": 0.0243, "step": 181260 }, { "epoch": 0.7563360065425474, "grad_norm": 0.5405280663125124, "learning_rate": 2.2998961855857125e-06, "loss": 0.0173, "step": 181265 }, { "epoch": 0.7563568692575376, "grad_norm": 0.634339089184706, "learning_rate": 2.2998644659860365e-06, "loss": 0.0191, "step": 181270 }, { "epoch": 0.756377731972528, "grad_norm": 0.6996001840083269, "learning_rate": 2.2998327476987305e-06, "loss": 0.0209, "step": 181275 }, { "epoch": 0.7563985946875182, "grad_norm": 0.6937946175969445, "learning_rate": 2.2998010307237046e-06, "loss": 0.0265, "step": 181280 }, { "epoch": 0.7564194574025085, "grad_norm": 1.7466866614309269, "learning_rate": 2.2997693150608675e-06, "loss": 0.0319, "step": 181285 }, { "epoch": 0.7564403201174988, "grad_norm": 1.4201237058123994, "learning_rate": 2.2997376007101298e-06, "loss": 0.019, "step": 181290 }, { "epoch": 0.7564611828324891, "grad_norm": 0.8071957172523756, "learning_rate": 2.299705887671401e-06, "loss": 0.019, "step": 181295 }, { "epoch": 0.7564820455474793, "grad_norm": 1.0678570527442885, "learning_rate": 2.29967417594459e-06, "loss": 0.0186, "step": 181300 }, { "epoch": 0.7565029082624697, "grad_norm": 0.7246994784445093, "learning_rate": 2.2996424655296067e-06, "loss": 0.0188, "step": 181305 }, { "epoch": 0.7565237709774599, "grad_norm": 0.426106831159277, "learning_rate": 2.2996107564263604e-06, "loss": 0.0161, "step": 181310 }, { "epoch": 0.7565446336924502, "grad_norm": 0.590380714304554, "learning_rate": 2.299579048634761e-06, "loss": 0.028, "step": 181315 }, { "epoch": 0.7565654964074405, "grad_norm": 0.8977981127341298, "learning_rate": 2.2995473421547175e-06, "loss": 0.024, "step": 181320 }, { "epoch": 0.7565863591224308, "grad_norm": 0.6229741646727462, "learning_rate": 2.29951563698614e-06, "loss": 0.0209, "step": 181325 }, { "epoch": 0.756607221837421, "grad_norm": 0.8136107513440759, "learning_rate": 2.2994839331289386e-06, "loss": 0.0252, "step": 181330 }, { "epoch": 0.7566280845524113, "grad_norm": 0.6208513500606619, "learning_rate": 2.2994522305830216e-06, "loss": 0.0254, "step": 181335 }, { "epoch": 0.7566489472674016, "grad_norm": 0.5431921269053431, "learning_rate": 2.2994205293482993e-06, "loss": 0.0271, "step": 181340 }, { "epoch": 0.7566698099823919, "grad_norm": 0.8331252342978118, "learning_rate": 2.2993888294246816e-06, "loss": 0.0216, "step": 181345 }, { "epoch": 0.7566906726973821, "grad_norm": 0.3828422713068801, "learning_rate": 2.2993571308120777e-06, "loss": 0.023, "step": 181350 }, { "epoch": 0.7567115354123725, "grad_norm": 0.9904985585600224, "learning_rate": 2.2993254335103975e-06, "loss": 0.0326, "step": 181355 }, { "epoch": 0.7567323981273627, "grad_norm": 0.548531710262247, "learning_rate": 2.2992937375195503e-06, "loss": 0.0205, "step": 181360 }, { "epoch": 0.756753260842353, "grad_norm": 0.8459399043614696, "learning_rate": 2.299262042839446e-06, "loss": 0.0296, "step": 181365 }, { "epoch": 0.7567741235573433, "grad_norm": 0.7152208488033664, "learning_rate": 2.2992303494699943e-06, "loss": 0.0198, "step": 181370 }, { "epoch": 0.7567949862723335, "grad_norm": 0.5556443208315014, "learning_rate": 2.2991986574111045e-06, "loss": 0.0141, "step": 181375 }, { "epoch": 0.7568158489873238, "grad_norm": 0.2750213457630428, "learning_rate": 2.2991669666626874e-06, "loss": 0.0155, "step": 181380 }, { "epoch": 0.7568367117023141, "grad_norm": 0.6212165209680222, "learning_rate": 2.2991352772246515e-06, "loss": 0.021, "step": 181385 }, { "epoch": 0.7568575744173044, "grad_norm": 0.7263168300075056, "learning_rate": 2.2991035890969064e-06, "loss": 0.0174, "step": 181390 }, { "epoch": 0.7568784371322946, "grad_norm": 0.541688631920915, "learning_rate": 2.299071902279363e-06, "loss": 0.0204, "step": 181395 }, { "epoch": 0.7568992998472849, "grad_norm": 1.1496100499986883, "learning_rate": 2.2990402167719296e-06, "loss": 0.0274, "step": 181400 }, { "epoch": 0.7569201625622752, "grad_norm": 0.7588443578948553, "learning_rate": 2.299008532574517e-06, "loss": 0.0172, "step": 181405 }, { "epoch": 0.7569410252772655, "grad_norm": 1.0233013993021072, "learning_rate": 2.2989768496870345e-06, "loss": 0.0298, "step": 181410 }, { "epoch": 0.7569618879922557, "grad_norm": 0.9471782242192182, "learning_rate": 2.298945168109392e-06, "loss": 0.0274, "step": 181415 }, { "epoch": 0.7569827507072461, "grad_norm": 0.41848326767043253, "learning_rate": 2.298913487841499e-06, "loss": 0.0219, "step": 181420 }, { "epoch": 0.7570036134222363, "grad_norm": 0.8076782112645479, "learning_rate": 2.298881808883265e-06, "loss": 0.0205, "step": 181425 }, { "epoch": 0.7570244761372266, "grad_norm": 0.6281786106806084, "learning_rate": 2.2988501312346008e-06, "loss": 0.0184, "step": 181430 }, { "epoch": 0.7570453388522169, "grad_norm": 0.5298513541787169, "learning_rate": 2.298818454895415e-06, "loss": 0.0192, "step": 181435 }, { "epoch": 0.7570662015672072, "grad_norm": 0.37027162558290044, "learning_rate": 2.2987867798656185e-06, "loss": 0.0166, "step": 181440 }, { "epoch": 0.7570870642821974, "grad_norm": 0.5742918807167121, "learning_rate": 2.29875510614512e-06, "loss": 0.0244, "step": 181445 }, { "epoch": 0.7571079269971877, "grad_norm": 0.4939023432462813, "learning_rate": 2.2987234337338303e-06, "loss": 0.0214, "step": 181450 }, { "epoch": 0.757128789712178, "grad_norm": 0.5354919419485059, "learning_rate": 2.2986917626316584e-06, "loss": 0.0163, "step": 181455 }, { "epoch": 0.7571496524271683, "grad_norm": 0.5925348222909705, "learning_rate": 2.2986600928385145e-06, "loss": 0.0175, "step": 181460 }, { "epoch": 0.7571705151421585, "grad_norm": 0.6999601091228123, "learning_rate": 2.2986284243543085e-06, "loss": 0.0258, "step": 181465 }, { "epoch": 0.7571913778571489, "grad_norm": 0.5794277415595527, "learning_rate": 2.2985967571789504e-06, "loss": 0.0172, "step": 181470 }, { "epoch": 0.7572122405721391, "grad_norm": 0.5245191444964837, "learning_rate": 2.298565091312349e-06, "loss": 0.0203, "step": 181475 }, { "epoch": 0.7572331032871293, "grad_norm": 0.37110087705046757, "learning_rate": 2.2985334267544154e-06, "loss": 0.021, "step": 181480 }, { "epoch": 0.7572539660021197, "grad_norm": 0.8910086241208295, "learning_rate": 2.2985017635050593e-06, "loss": 0.0269, "step": 181485 }, { "epoch": 0.7572748287171099, "grad_norm": 0.4854325138598861, "learning_rate": 2.298470101564189e-06, "loss": 0.0226, "step": 181490 }, { "epoch": 0.7572956914321002, "grad_norm": 4.579733199333082, "learning_rate": 2.2984384409317167e-06, "loss": 0.0215, "step": 181495 }, { "epoch": 0.7573165541470905, "grad_norm": 0.4494001234891476, "learning_rate": 2.298406781607551e-06, "loss": 0.0209, "step": 181500 }, { "epoch": 0.7573374168620808, "grad_norm": 0.2967542726839957, "learning_rate": 2.298375123591602e-06, "loss": 0.0183, "step": 181505 }, { "epoch": 0.757358279577071, "grad_norm": 0.44836168555477046, "learning_rate": 2.2983434668837794e-06, "loss": 0.027, "step": 181510 }, { "epoch": 0.7573791422920613, "grad_norm": 0.6993380456959826, "learning_rate": 2.298311811483994e-06, "loss": 0.023, "step": 181515 }, { "epoch": 0.7574000050070516, "grad_norm": 0.6409194904999023, "learning_rate": 2.2982801573921542e-06, "loss": 0.0245, "step": 181520 }, { "epoch": 0.7574208677220419, "grad_norm": 0.8172609645323428, "learning_rate": 2.2982485046081714e-06, "loss": 0.0243, "step": 181525 }, { "epoch": 0.7574417304370321, "grad_norm": 1.3337429290633698, "learning_rate": 2.2982168531319545e-06, "loss": 0.0212, "step": 181530 }, { "epoch": 0.7574625931520225, "grad_norm": 0.9404168072011213, "learning_rate": 2.2981852029634146e-06, "loss": 0.0284, "step": 181535 }, { "epoch": 0.7574834558670127, "grad_norm": 0.7363031920369865, "learning_rate": 2.29815355410246e-06, "loss": 0.017, "step": 181540 }, { "epoch": 0.757504318582003, "grad_norm": 0.8139092172950544, "learning_rate": 2.298121906549002e-06, "loss": 0.0194, "step": 181545 }, { "epoch": 0.7575251812969933, "grad_norm": 0.6877112277796368, "learning_rate": 2.2980902603029505e-06, "loss": 0.0185, "step": 181550 }, { "epoch": 0.7575460440119836, "grad_norm": 0.48974655915336424, "learning_rate": 2.298058615364215e-06, "loss": 0.0174, "step": 181555 }, { "epoch": 0.7575669067269738, "grad_norm": 0.7561640365805842, "learning_rate": 2.2980269717327055e-06, "loss": 0.0242, "step": 181560 }, { "epoch": 0.7575877694419642, "grad_norm": 1.4093487180624662, "learning_rate": 2.297995329408332e-06, "loss": 0.0258, "step": 181565 }, { "epoch": 0.7576086321569544, "grad_norm": 0.3421057423398368, "learning_rate": 2.297963688391005e-06, "loss": 0.0156, "step": 181570 }, { "epoch": 0.7576294948719446, "grad_norm": 0.5158154074974138, "learning_rate": 2.297932048680634e-06, "loss": 0.0235, "step": 181575 }, { "epoch": 0.7576503575869349, "grad_norm": 0.7367056162426985, "learning_rate": 2.2979004102771293e-06, "loss": 0.0194, "step": 181580 }, { "epoch": 0.7576712203019252, "grad_norm": 0.49943391683173355, "learning_rate": 2.2978687731804013e-06, "loss": 0.02, "step": 181585 }, { "epoch": 0.7576920830169155, "grad_norm": 0.8453244581765981, "learning_rate": 2.297837137390359e-06, "loss": 0.0186, "step": 181590 }, { "epoch": 0.7577129457319057, "grad_norm": 0.41883440435337627, "learning_rate": 2.2978055029069128e-06, "loss": 0.02, "step": 181595 }, { "epoch": 0.7577338084468961, "grad_norm": 0.7448856450937403, "learning_rate": 2.2977738697299736e-06, "loss": 0.0248, "step": 181600 }, { "epoch": 0.7577546711618863, "grad_norm": 0.3592838961693451, "learning_rate": 2.297742237859451e-06, "loss": 0.0208, "step": 181605 }, { "epoch": 0.7577755338768766, "grad_norm": 0.38994587101780437, "learning_rate": 2.297710607295254e-06, "loss": 0.0183, "step": 181610 }, { "epoch": 0.7577963965918669, "grad_norm": 0.9993768029678549, "learning_rate": 2.2976789780372947e-06, "loss": 0.0236, "step": 181615 }, { "epoch": 0.7578172593068572, "grad_norm": 0.6693481996412376, "learning_rate": 2.297647350085482e-06, "loss": 0.017, "step": 181620 }, { "epoch": 0.7578381220218474, "grad_norm": 0.5906064235237406, "learning_rate": 2.297615723439726e-06, "loss": 0.0214, "step": 181625 }, { "epoch": 0.7578589847368377, "grad_norm": 0.5506137454841585, "learning_rate": 2.297584098099937e-06, "loss": 0.0193, "step": 181630 }, { "epoch": 0.757879847451828, "grad_norm": 0.7926175141709457, "learning_rate": 2.297552474066025e-06, "loss": 0.0201, "step": 181635 }, { "epoch": 0.7579007101668183, "grad_norm": 0.3820022083750876, "learning_rate": 2.2975208513379e-06, "loss": 0.0193, "step": 181640 }, { "epoch": 0.7579215728818085, "grad_norm": 0.8527491721739237, "learning_rate": 2.2974892299154726e-06, "loss": 0.0322, "step": 181645 }, { "epoch": 0.7579424355967989, "grad_norm": 0.857609614994831, "learning_rate": 2.2974576097986524e-06, "loss": 0.0179, "step": 181650 }, { "epoch": 0.7579632983117891, "grad_norm": 0.7670249964198246, "learning_rate": 2.2974259909873506e-06, "loss": 0.0213, "step": 181655 }, { "epoch": 0.7579841610267793, "grad_norm": 0.6053552779622492, "learning_rate": 2.2973943734814764e-06, "loss": 0.0189, "step": 181660 }, { "epoch": 0.7580050237417697, "grad_norm": 0.6331781495784095, "learning_rate": 2.29736275728094e-06, "loss": 0.0194, "step": 181665 }, { "epoch": 0.75802588645676, "grad_norm": 0.9856918803620556, "learning_rate": 2.297331142385652e-06, "loss": 0.0324, "step": 181670 }, { "epoch": 0.7580467491717502, "grad_norm": 1.0598033210601927, "learning_rate": 2.2972995287955223e-06, "loss": 0.0207, "step": 181675 }, { "epoch": 0.7580676118867405, "grad_norm": 0.7604632420985088, "learning_rate": 2.297267916510461e-06, "loss": 0.0242, "step": 181680 }, { "epoch": 0.7580884746017308, "grad_norm": 0.7701580954304317, "learning_rate": 2.297236305530379e-06, "loss": 0.021, "step": 181685 }, { "epoch": 0.758109337316721, "grad_norm": 0.6345482467395487, "learning_rate": 2.2972046958551856e-06, "loss": 0.0224, "step": 181690 }, { "epoch": 0.7581302000317113, "grad_norm": 0.6987776156442888, "learning_rate": 2.2971730874847914e-06, "loss": 0.0162, "step": 181695 }, { "epoch": 0.7581510627467016, "grad_norm": 0.5845762244394846, "learning_rate": 2.2971414804191074e-06, "loss": 0.019, "step": 181700 }, { "epoch": 0.7581719254616919, "grad_norm": 0.5594226032853128, "learning_rate": 2.2971098746580426e-06, "loss": 0.0165, "step": 181705 }, { "epoch": 0.7581927881766821, "grad_norm": 1.5519304283342246, "learning_rate": 2.297078270201507e-06, "loss": 0.0245, "step": 181710 }, { "epoch": 0.7582136508916725, "grad_norm": 0.5796613725245728, "learning_rate": 2.297046667049413e-06, "loss": 0.016, "step": 181715 }, { "epoch": 0.7582345136066627, "grad_norm": 1.2068422009951598, "learning_rate": 2.297015065201669e-06, "loss": 0.023, "step": 181720 }, { "epoch": 0.758255376321653, "grad_norm": 0.579629599988508, "learning_rate": 2.2969834646581856e-06, "loss": 0.0164, "step": 181725 }, { "epoch": 0.7582762390366433, "grad_norm": 0.6028063533929449, "learning_rate": 2.2969518654188734e-06, "loss": 0.0215, "step": 181730 }, { "epoch": 0.7582971017516336, "grad_norm": 0.8056283555322381, "learning_rate": 2.2969202674836426e-06, "loss": 0.0242, "step": 181735 }, { "epoch": 0.7583179644666238, "grad_norm": 0.4892542680727724, "learning_rate": 2.2968886708524037e-06, "loss": 0.0225, "step": 181740 }, { "epoch": 0.7583388271816142, "grad_norm": 0.679104848116774, "learning_rate": 2.2968570755250662e-06, "loss": 0.0204, "step": 181745 }, { "epoch": 0.7583596898966044, "grad_norm": 0.47569508077150036, "learning_rate": 2.296825481501541e-06, "loss": 0.0197, "step": 181750 }, { "epoch": 0.7583805526115946, "grad_norm": 0.4981214000280482, "learning_rate": 2.2967938887817394e-06, "loss": 0.0164, "step": 181755 }, { "epoch": 0.7584014153265849, "grad_norm": 0.42690393096304835, "learning_rate": 2.2967622973655698e-06, "loss": 0.0178, "step": 181760 }, { "epoch": 0.7584222780415752, "grad_norm": 1.0578302602608127, "learning_rate": 2.296730707252944e-06, "loss": 0.0246, "step": 181765 }, { "epoch": 0.7584431407565655, "grad_norm": 0.9540215748254005, "learning_rate": 2.296699118443772e-06, "loss": 0.0264, "step": 181770 }, { "epoch": 0.7584640034715557, "grad_norm": 0.47632901323633153, "learning_rate": 2.2966675309379638e-06, "loss": 0.0215, "step": 181775 }, { "epoch": 0.7584848661865461, "grad_norm": 0.39396833084849836, "learning_rate": 2.29663594473543e-06, "loss": 0.0153, "step": 181780 }, { "epoch": 0.7585057289015363, "grad_norm": 0.4804640094504145, "learning_rate": 2.2966043598360812e-06, "loss": 0.0193, "step": 181785 }, { "epoch": 0.7585265916165266, "grad_norm": 1.0238744598908862, "learning_rate": 2.2965727762398277e-06, "loss": 0.0192, "step": 181790 }, { "epoch": 0.7585474543315169, "grad_norm": 0.47394292745830835, "learning_rate": 2.29654119394658e-06, "loss": 0.019, "step": 181795 }, { "epoch": 0.7585683170465072, "grad_norm": 0.7447579755502166, "learning_rate": 2.2965096129562473e-06, "loss": 0.025, "step": 181800 }, { "epoch": 0.7585891797614974, "grad_norm": 0.5367141668097501, "learning_rate": 2.296478033268742e-06, "loss": 0.0186, "step": 181805 }, { "epoch": 0.7586100424764877, "grad_norm": 1.340329418155802, "learning_rate": 2.2964464548839732e-06, "loss": 0.0248, "step": 181810 }, { "epoch": 0.758630905191478, "grad_norm": 0.6899700457015164, "learning_rate": 2.2964148778018516e-06, "loss": 0.0275, "step": 181815 }, { "epoch": 0.7586517679064683, "grad_norm": 0.8849404242595487, "learning_rate": 2.296383302022288e-06, "loss": 0.0283, "step": 181820 }, { "epoch": 0.7586726306214585, "grad_norm": 0.8281623504587982, "learning_rate": 2.296351727545193e-06, "loss": 0.0239, "step": 181825 }, { "epoch": 0.7586934933364489, "grad_norm": 0.7444403653937969, "learning_rate": 2.2963201543704757e-06, "loss": 0.0164, "step": 181830 }, { "epoch": 0.7587143560514391, "grad_norm": 0.5576086742790899, "learning_rate": 2.2962885824980476e-06, "loss": 0.0185, "step": 181835 }, { "epoch": 0.7587352187664294, "grad_norm": 0.367632068555198, "learning_rate": 2.2962570119278195e-06, "loss": 0.0215, "step": 181840 }, { "epoch": 0.7587560814814197, "grad_norm": 0.7476959674722732, "learning_rate": 2.2962254426597013e-06, "loss": 0.027, "step": 181845 }, { "epoch": 0.75877694419641, "grad_norm": 0.7368779799474106, "learning_rate": 2.2961938746936037e-06, "loss": 0.0208, "step": 181850 }, { "epoch": 0.7587978069114002, "grad_norm": 0.5787861135085576, "learning_rate": 2.296162308029437e-06, "loss": 0.0187, "step": 181855 }, { "epoch": 0.7588186696263906, "grad_norm": 1.0707811447650255, "learning_rate": 2.2961307426671124e-06, "loss": 0.022, "step": 181860 }, { "epoch": 0.7588395323413808, "grad_norm": 0.6961320194317249, "learning_rate": 2.2960991786065397e-06, "loss": 0.0207, "step": 181865 }, { "epoch": 0.758860395056371, "grad_norm": 0.6001636494875286, "learning_rate": 2.296067615847629e-06, "loss": 0.0248, "step": 181870 }, { "epoch": 0.7588812577713613, "grad_norm": 0.6876123062599475, "learning_rate": 2.2960360543902927e-06, "loss": 0.0183, "step": 181875 }, { "epoch": 0.7589021204863516, "grad_norm": 0.7085801054036627, "learning_rate": 2.296004494234439e-06, "loss": 0.0163, "step": 181880 }, { "epoch": 0.7589229832013419, "grad_norm": 0.43526074593333464, "learning_rate": 2.29597293537998e-06, "loss": 0.0171, "step": 181885 }, { "epoch": 0.7589438459163321, "grad_norm": 0.6434284973039937, "learning_rate": 2.2959413778268257e-06, "loss": 0.0199, "step": 181890 }, { "epoch": 0.7589647086313225, "grad_norm": 0.20544277584035375, "learning_rate": 2.295909821574887e-06, "loss": 0.0178, "step": 181895 }, { "epoch": 0.7589855713463127, "grad_norm": 0.6611715314599106, "learning_rate": 2.295878266624074e-06, "loss": 0.0236, "step": 181900 }, { "epoch": 0.759006434061303, "grad_norm": 0.5386789606957401, "learning_rate": 2.295846712974298e-06, "loss": 0.0135, "step": 181905 }, { "epoch": 0.7590272967762933, "grad_norm": 0.6005343817714844, "learning_rate": 2.295815160625469e-06, "loss": 0.0206, "step": 181910 }, { "epoch": 0.7590481594912836, "grad_norm": 3.563578479014741, "learning_rate": 2.2957836095774976e-06, "loss": 0.0267, "step": 181915 }, { "epoch": 0.7590690222062738, "grad_norm": 0.6089209966836598, "learning_rate": 2.2957520598302947e-06, "loss": 0.0199, "step": 181920 }, { "epoch": 0.7590898849212642, "grad_norm": 0.5033228673841522, "learning_rate": 2.2957205113837714e-06, "loss": 0.0225, "step": 181925 }, { "epoch": 0.7591107476362544, "grad_norm": 0.4146684550526891, "learning_rate": 2.295688964237837e-06, "loss": 0.017, "step": 181930 }, { "epoch": 0.7591316103512447, "grad_norm": 0.36947091770816237, "learning_rate": 2.2956574183924026e-06, "loss": 0.0201, "step": 181935 }, { "epoch": 0.7591524730662349, "grad_norm": 0.49975724375526887, "learning_rate": 2.2956258738473796e-06, "loss": 0.0231, "step": 181940 }, { "epoch": 0.7591733357812253, "grad_norm": 0.7670749648636757, "learning_rate": 2.295594330602679e-06, "loss": 0.0194, "step": 181945 }, { "epoch": 0.7591941984962155, "grad_norm": 0.5694242653752595, "learning_rate": 2.295562788658209e-06, "loss": 0.0144, "step": 181950 }, { "epoch": 0.7592150612112057, "grad_norm": 0.7919557900060888, "learning_rate": 2.2955312480138834e-06, "loss": 0.0249, "step": 181955 }, { "epoch": 0.7592359239261961, "grad_norm": 0.9967044522098226, "learning_rate": 2.295499708669611e-06, "loss": 0.0307, "step": 181960 }, { "epoch": 0.7592567866411863, "grad_norm": 0.5783324468509264, "learning_rate": 2.295468170625303e-06, "loss": 0.0279, "step": 181965 }, { "epoch": 0.7592776493561766, "grad_norm": 0.36337279247766807, "learning_rate": 2.2954366338808692e-06, "loss": 0.0166, "step": 181970 }, { "epoch": 0.7592985120711669, "grad_norm": 0.5928058393324747, "learning_rate": 2.295405098436222e-06, "loss": 0.0272, "step": 181975 }, { "epoch": 0.7593193747861572, "grad_norm": 1.0487739133451812, "learning_rate": 2.295373564291271e-06, "loss": 0.0258, "step": 181980 }, { "epoch": 0.7593402375011474, "grad_norm": 0.6868984202348765, "learning_rate": 2.295342031445927e-06, "loss": 0.0163, "step": 181985 }, { "epoch": 0.7593611002161377, "grad_norm": 0.47888601893549126, "learning_rate": 2.295310499900101e-06, "loss": 0.0222, "step": 181990 }, { "epoch": 0.759381962931128, "grad_norm": 0.7368810855114873, "learning_rate": 2.295278969653704e-06, "loss": 0.0221, "step": 181995 }, { "epoch": 0.7594028256461183, "grad_norm": 0.5399874121147304, "learning_rate": 2.295247440706646e-06, "loss": 0.0188, "step": 182000 }, { "epoch": 0.7594236883611085, "grad_norm": 0.45001923840305924, "learning_rate": 2.2952159130588384e-06, "loss": 0.0189, "step": 182005 }, { "epoch": 0.7594445510760989, "grad_norm": 0.7644979516186156, "learning_rate": 2.295184386710191e-06, "loss": 0.0232, "step": 182010 }, { "epoch": 0.7594654137910891, "grad_norm": 0.5834716598199621, "learning_rate": 2.2951528616606163e-06, "loss": 0.0177, "step": 182015 }, { "epoch": 0.7594862765060794, "grad_norm": 0.7405847828300927, "learning_rate": 2.295121337910024e-06, "loss": 0.0223, "step": 182020 }, { "epoch": 0.7595071392210697, "grad_norm": 0.6162403510261062, "learning_rate": 2.2950898154583247e-06, "loss": 0.0228, "step": 182025 }, { "epoch": 0.75952800193606, "grad_norm": 0.45166391515856674, "learning_rate": 2.2950582943054295e-06, "loss": 0.0236, "step": 182030 }, { "epoch": 0.7595488646510502, "grad_norm": 0.49250351106784046, "learning_rate": 2.2950267744512487e-06, "loss": 0.0183, "step": 182035 }, { "epoch": 0.7595697273660406, "grad_norm": 0.7130123917211466, "learning_rate": 2.2949952558956945e-06, "loss": 0.0212, "step": 182040 }, { "epoch": 0.7595905900810308, "grad_norm": 0.4353229624037586, "learning_rate": 2.2949637386386764e-06, "loss": 0.0214, "step": 182045 }, { "epoch": 0.759611452796021, "grad_norm": 0.5242993347848715, "learning_rate": 2.294932222680106e-06, "loss": 0.038, "step": 182050 }, { "epoch": 0.7596323155110113, "grad_norm": 0.4373852922796837, "learning_rate": 2.2949007080198933e-06, "loss": 0.0175, "step": 182055 }, { "epoch": 0.7596531782260016, "grad_norm": 1.1366725041508094, "learning_rate": 2.2948691946579503e-06, "loss": 0.0205, "step": 182060 }, { "epoch": 0.7596740409409919, "grad_norm": 0.8998677746133611, "learning_rate": 2.294837682594187e-06, "loss": 0.0223, "step": 182065 }, { "epoch": 0.7596949036559821, "grad_norm": 0.8395759948985543, "learning_rate": 2.2948061718285148e-06, "loss": 0.0199, "step": 182070 }, { "epoch": 0.7597157663709725, "grad_norm": 0.9074055041080821, "learning_rate": 2.2947746623608443e-06, "loss": 0.0229, "step": 182075 }, { "epoch": 0.7597366290859627, "grad_norm": 0.9898813877222276, "learning_rate": 2.2947431541910854e-06, "loss": 0.0206, "step": 182080 }, { "epoch": 0.759757491800953, "grad_norm": 1.8865772403911332, "learning_rate": 2.2947116473191512e-06, "loss": 0.0255, "step": 182085 }, { "epoch": 0.7597783545159433, "grad_norm": 0.6856432195175541, "learning_rate": 2.294680141744951e-06, "loss": 0.021, "step": 182090 }, { "epoch": 0.7597992172309336, "grad_norm": 0.9923125034453135, "learning_rate": 2.2946486374683963e-06, "loss": 0.0204, "step": 182095 }, { "epoch": 0.7598200799459238, "grad_norm": 0.5816365970741029, "learning_rate": 2.294617134489398e-06, "loss": 0.0203, "step": 182100 }, { "epoch": 0.7598409426609142, "grad_norm": 1.0084129310978813, "learning_rate": 2.294585632807866e-06, "loss": 0.025, "step": 182105 }, { "epoch": 0.7598618053759044, "grad_norm": 1.868599873510915, "learning_rate": 2.2945541324237126e-06, "loss": 0.022, "step": 182110 }, { "epoch": 0.7598826680908947, "grad_norm": 0.587755638186909, "learning_rate": 2.2945226333368485e-06, "loss": 0.028, "step": 182115 }, { "epoch": 0.7599035308058849, "grad_norm": 1.0258459711906873, "learning_rate": 2.2944911355471845e-06, "loss": 0.0329, "step": 182120 }, { "epoch": 0.7599243935208753, "grad_norm": 0.7902257961313109, "learning_rate": 2.2944596390546315e-06, "loss": 0.0245, "step": 182125 }, { "epoch": 0.7599452562358655, "grad_norm": 0.3329344210343845, "learning_rate": 2.2944281438591e-06, "loss": 0.0159, "step": 182130 }, { "epoch": 0.7599661189508558, "grad_norm": 0.47334307108811896, "learning_rate": 2.294396649960502e-06, "loss": 0.0176, "step": 182135 }, { "epoch": 0.7599869816658461, "grad_norm": 0.4243370318335893, "learning_rate": 2.294365157358748e-06, "loss": 0.0168, "step": 182140 }, { "epoch": 0.7600078443808364, "grad_norm": 0.7683068305563457, "learning_rate": 2.2943336660537486e-06, "loss": 0.0156, "step": 182145 }, { "epoch": 0.7600287070958266, "grad_norm": 0.5956566326500383, "learning_rate": 2.294302176045415e-06, "loss": 0.0224, "step": 182150 }, { "epoch": 0.760049569810817, "grad_norm": 0.3391973142270333, "learning_rate": 2.294270687333659e-06, "loss": 0.0291, "step": 182155 }, { "epoch": 0.7600704325258072, "grad_norm": 0.30975744304115904, "learning_rate": 2.2942391999183904e-06, "loss": 0.0183, "step": 182160 }, { "epoch": 0.7600912952407974, "grad_norm": 0.552226567363379, "learning_rate": 2.2942077137995207e-06, "loss": 0.0248, "step": 182165 }, { "epoch": 0.7601121579557877, "grad_norm": 0.25672083119643097, "learning_rate": 2.2941762289769616e-06, "loss": 0.0164, "step": 182170 }, { "epoch": 0.760133020670778, "grad_norm": 0.8028439207496608, "learning_rate": 2.2941447454506234e-06, "loss": 0.0193, "step": 182175 }, { "epoch": 0.7601538833857683, "grad_norm": 0.6698733572129015, "learning_rate": 2.2941132632204176e-06, "loss": 0.0221, "step": 182180 }, { "epoch": 0.7601747461007585, "grad_norm": 0.6029727700833852, "learning_rate": 2.294081782286255e-06, "loss": 0.0321, "step": 182185 }, { "epoch": 0.7601956088157489, "grad_norm": 0.4273507851800963, "learning_rate": 2.2940503026480464e-06, "loss": 0.0267, "step": 182190 }, { "epoch": 0.7602164715307391, "grad_norm": 0.9325490857998388, "learning_rate": 2.2940188243057036e-06, "loss": 0.024, "step": 182195 }, { "epoch": 0.7602373342457294, "grad_norm": 0.9249015913715982, "learning_rate": 2.2939873472591374e-06, "loss": 0.0179, "step": 182200 }, { "epoch": 0.7602581969607197, "grad_norm": 0.6567235290946075, "learning_rate": 2.2939558715082584e-06, "loss": 0.0183, "step": 182205 }, { "epoch": 0.76027905967571, "grad_norm": 1.0151682899036392, "learning_rate": 2.2939243970529786e-06, "loss": 0.0207, "step": 182210 }, { "epoch": 0.7602999223907002, "grad_norm": 1.0164384673115077, "learning_rate": 2.2938929238932085e-06, "loss": 0.023, "step": 182215 }, { "epoch": 0.7603207851056906, "grad_norm": 0.9468533163240085, "learning_rate": 2.2938614520288594e-06, "loss": 0.0299, "step": 182220 }, { "epoch": 0.7603416478206808, "grad_norm": 0.6356014760639591, "learning_rate": 2.293829981459842e-06, "loss": 0.0311, "step": 182225 }, { "epoch": 0.760362510535671, "grad_norm": 1.107867099121135, "learning_rate": 2.2937985121860682e-06, "loss": 0.0238, "step": 182230 }, { "epoch": 0.7603833732506613, "grad_norm": 0.25208143356968, "learning_rate": 2.2937670442074496e-06, "loss": 0.0165, "step": 182235 }, { "epoch": 0.7604042359656517, "grad_norm": 0.5505224956232384, "learning_rate": 2.2937355775238957e-06, "loss": 0.0157, "step": 182240 }, { "epoch": 0.7604250986806419, "grad_norm": 0.5337349527547773, "learning_rate": 2.293704112135319e-06, "loss": 0.0205, "step": 182245 }, { "epoch": 0.7604459613956321, "grad_norm": 0.7096062128705433, "learning_rate": 2.2936726480416296e-06, "loss": 0.0194, "step": 182250 }, { "epoch": 0.7604668241106225, "grad_norm": 0.5670637131719947, "learning_rate": 2.29364118524274e-06, "loss": 0.0239, "step": 182255 }, { "epoch": 0.7604876868256127, "grad_norm": 0.5957224012313722, "learning_rate": 2.2936097237385604e-06, "loss": 0.0252, "step": 182260 }, { "epoch": 0.760508549540603, "grad_norm": 0.18357573084487538, "learning_rate": 2.2935782635290025e-06, "loss": 0.0157, "step": 182265 }, { "epoch": 0.7605294122555933, "grad_norm": 0.7131882577352048, "learning_rate": 2.293546804613977e-06, "loss": 0.0221, "step": 182270 }, { "epoch": 0.7605502749705836, "grad_norm": 0.7099321614886804, "learning_rate": 2.2935153469933957e-06, "loss": 0.0203, "step": 182275 }, { "epoch": 0.7605711376855738, "grad_norm": 1.2944375038685276, "learning_rate": 2.2934838906671697e-06, "loss": 0.0256, "step": 182280 }, { "epoch": 0.7605920004005642, "grad_norm": 0.2569138770992395, "learning_rate": 2.2934524356352104e-06, "loss": 0.0117, "step": 182285 }, { "epoch": 0.7606128631155544, "grad_norm": 0.49832385297228793, "learning_rate": 2.293420981897428e-06, "loss": 0.0248, "step": 182290 }, { "epoch": 0.7606337258305447, "grad_norm": 0.741954998565499, "learning_rate": 2.293389529453735e-06, "loss": 0.0232, "step": 182295 }, { "epoch": 0.7606545885455349, "grad_norm": 0.49557060205635467, "learning_rate": 2.2933580783040426e-06, "loss": 0.0288, "step": 182300 }, { "epoch": 0.7606754512605253, "grad_norm": 1.1348298476517609, "learning_rate": 2.2933266284482614e-06, "loss": 0.0263, "step": 182305 }, { "epoch": 0.7606963139755155, "grad_norm": 1.185675942303567, "learning_rate": 2.2932951798863028e-06, "loss": 0.02, "step": 182310 }, { "epoch": 0.7607171766905058, "grad_norm": 0.6242802286861477, "learning_rate": 2.293263732618078e-06, "loss": 0.0271, "step": 182315 }, { "epoch": 0.7607380394054961, "grad_norm": 0.49105039535905204, "learning_rate": 2.2932322866434993e-06, "loss": 0.0184, "step": 182320 }, { "epoch": 0.7607589021204864, "grad_norm": 0.9223717673721009, "learning_rate": 2.293200841962477e-06, "loss": 0.02, "step": 182325 }, { "epoch": 0.7607797648354766, "grad_norm": 0.46624293389395705, "learning_rate": 2.2931693985749224e-06, "loss": 0.0185, "step": 182330 }, { "epoch": 0.760800627550467, "grad_norm": 0.60049772231884, "learning_rate": 2.2931379564807473e-06, "loss": 0.016, "step": 182335 }, { "epoch": 0.7608214902654572, "grad_norm": 1.6071485131552452, "learning_rate": 2.2931065156798627e-06, "loss": 0.0267, "step": 182340 }, { "epoch": 0.7608423529804474, "grad_norm": 0.7657395849459706, "learning_rate": 2.2930750761721805e-06, "loss": 0.0439, "step": 182345 }, { "epoch": 0.7608632156954377, "grad_norm": 4.784509246054238, "learning_rate": 2.293043637957611e-06, "loss": 0.0217, "step": 182350 }, { "epoch": 0.760884078410428, "grad_norm": 0.4239161123379522, "learning_rate": 2.2930122010360667e-06, "loss": 0.0156, "step": 182355 }, { "epoch": 0.7609049411254183, "grad_norm": 0.45726704924888495, "learning_rate": 2.2929807654074583e-06, "loss": 0.0271, "step": 182360 }, { "epoch": 0.7609258038404085, "grad_norm": 0.6502924834370416, "learning_rate": 2.2929493310716975e-06, "loss": 0.0179, "step": 182365 }, { "epoch": 0.7609466665553989, "grad_norm": 0.7893588566706821, "learning_rate": 2.292917898028696e-06, "loss": 0.0289, "step": 182370 }, { "epoch": 0.7609675292703891, "grad_norm": 0.6171815313346379, "learning_rate": 2.2928864662783638e-06, "loss": 0.0204, "step": 182375 }, { "epoch": 0.7609883919853794, "grad_norm": 0.23470198447182247, "learning_rate": 2.2928550358206133e-06, "loss": 0.0206, "step": 182380 }, { "epoch": 0.7610092547003697, "grad_norm": 0.568349103775155, "learning_rate": 2.292823606655356e-06, "loss": 0.0168, "step": 182385 }, { "epoch": 0.76103011741536, "grad_norm": 0.34175139566293794, "learning_rate": 2.292792178782503e-06, "loss": 0.0166, "step": 182390 }, { "epoch": 0.7610509801303502, "grad_norm": 0.6744788396439039, "learning_rate": 2.2927607522019663e-06, "loss": 0.0205, "step": 182395 }, { "epoch": 0.7610718428453406, "grad_norm": 0.6839943880703523, "learning_rate": 2.2927293269136563e-06, "loss": 0.0195, "step": 182400 }, { "epoch": 0.7610927055603308, "grad_norm": 0.5221823991044243, "learning_rate": 2.2926979029174856e-06, "loss": 0.0173, "step": 182405 }, { "epoch": 0.7611135682753211, "grad_norm": 0.606895283498407, "learning_rate": 2.292666480213365e-06, "loss": 0.026, "step": 182410 }, { "epoch": 0.7611344309903113, "grad_norm": 0.9193425234665974, "learning_rate": 2.292635058801206e-06, "loss": 0.024, "step": 182415 }, { "epoch": 0.7611552937053017, "grad_norm": 0.764323101314771, "learning_rate": 2.29260363868092e-06, "loss": 0.0284, "step": 182420 }, { "epoch": 0.7611761564202919, "grad_norm": 0.2960793358724534, "learning_rate": 2.2925722198524192e-06, "loss": 0.0203, "step": 182425 }, { "epoch": 0.7611970191352821, "grad_norm": 1.3631722898387328, "learning_rate": 2.292540802315614e-06, "loss": 0.0232, "step": 182430 }, { "epoch": 0.7612178818502725, "grad_norm": 0.6408024199598021, "learning_rate": 2.292509386070416e-06, "loss": 0.0166, "step": 182435 }, { "epoch": 0.7612387445652627, "grad_norm": 1.2824999147228369, "learning_rate": 2.292477971116738e-06, "loss": 0.0274, "step": 182440 }, { "epoch": 0.761259607280253, "grad_norm": 0.49228053921817483, "learning_rate": 2.29244655745449e-06, "loss": 0.0249, "step": 182445 }, { "epoch": 0.7612804699952433, "grad_norm": 0.3224453352415447, "learning_rate": 2.292415145083585e-06, "loss": 0.021, "step": 182450 }, { "epoch": 0.7613013327102336, "grad_norm": 0.6293572560401518, "learning_rate": 2.2923837340039327e-06, "loss": 0.0207, "step": 182455 }, { "epoch": 0.7613221954252238, "grad_norm": 0.35932023621173886, "learning_rate": 2.292352324215446e-06, "loss": 0.0136, "step": 182460 }, { "epoch": 0.7613430581402141, "grad_norm": 0.3670131788204101, "learning_rate": 2.2923209157180356e-06, "loss": 0.0211, "step": 182465 }, { "epoch": 0.7613639208552044, "grad_norm": 0.6678203030685466, "learning_rate": 2.2922895085116135e-06, "loss": 0.0229, "step": 182470 }, { "epoch": 0.7613847835701947, "grad_norm": 0.5854619106402535, "learning_rate": 2.292258102596092e-06, "loss": 0.0265, "step": 182475 }, { "epoch": 0.7614056462851849, "grad_norm": 0.6944335542982248, "learning_rate": 2.292226697971381e-06, "loss": 0.0194, "step": 182480 }, { "epoch": 0.7614265090001753, "grad_norm": 1.0679348746100947, "learning_rate": 2.292195294637394e-06, "loss": 0.0219, "step": 182485 }, { "epoch": 0.7614473717151655, "grad_norm": 0.8675389741660918, "learning_rate": 2.2921638925940413e-06, "loss": 0.0212, "step": 182490 }, { "epoch": 0.7614682344301558, "grad_norm": 0.36988957201246786, "learning_rate": 2.292132491841235e-06, "loss": 0.0237, "step": 182495 }, { "epoch": 0.7614890971451461, "grad_norm": 0.5193806340235503, "learning_rate": 2.2921010923788857e-06, "loss": 0.0254, "step": 182500 }, { "epoch": 0.7615099598601364, "grad_norm": 0.4602207347522929, "learning_rate": 2.292069694206906e-06, "loss": 0.0215, "step": 182505 }, { "epoch": 0.7615308225751266, "grad_norm": 0.5888267046601089, "learning_rate": 2.2920382973252083e-06, "loss": 0.0243, "step": 182510 }, { "epoch": 0.761551685290117, "grad_norm": 0.6067008248627174, "learning_rate": 2.2920069017337022e-06, "loss": 0.0227, "step": 182515 }, { "epoch": 0.7615725480051072, "grad_norm": 0.44921424333571447, "learning_rate": 2.291975507432301e-06, "loss": 0.0202, "step": 182520 }, { "epoch": 0.7615934107200975, "grad_norm": 0.6933530401590099, "learning_rate": 2.2919441144209154e-06, "loss": 0.0235, "step": 182525 }, { "epoch": 0.7616142734350877, "grad_norm": 0.7077830179253327, "learning_rate": 2.2919127226994576e-06, "loss": 0.0145, "step": 182530 }, { "epoch": 0.761635136150078, "grad_norm": 0.360594123664054, "learning_rate": 2.291881332267839e-06, "loss": 0.0217, "step": 182535 }, { "epoch": 0.7616559988650683, "grad_norm": 0.8283462923451792, "learning_rate": 2.2918499431259714e-06, "loss": 0.0281, "step": 182540 }, { "epoch": 0.7616768615800585, "grad_norm": 0.4096537678923369, "learning_rate": 2.2918185552737664e-06, "loss": 0.0194, "step": 182545 }, { "epoch": 0.7616977242950489, "grad_norm": 0.7525755480730154, "learning_rate": 2.291787168711136e-06, "loss": 0.028, "step": 182550 }, { "epoch": 0.7617185870100391, "grad_norm": 0.8813243554336638, "learning_rate": 2.2917557834379915e-06, "loss": 0.0171, "step": 182555 }, { "epoch": 0.7617394497250294, "grad_norm": 0.593149584566968, "learning_rate": 2.291724399454245e-06, "loss": 0.0254, "step": 182560 }, { "epoch": 0.7617603124400197, "grad_norm": 0.45433175647020246, "learning_rate": 2.2916930167598073e-06, "loss": 0.0218, "step": 182565 }, { "epoch": 0.76178117515501, "grad_norm": 0.2509594028992327, "learning_rate": 2.2916616353545907e-06, "loss": 0.0314, "step": 182570 }, { "epoch": 0.7618020378700002, "grad_norm": 0.5593632171926186, "learning_rate": 2.291630255238507e-06, "loss": 0.0244, "step": 182575 }, { "epoch": 0.7618229005849906, "grad_norm": 0.6572940970880108, "learning_rate": 2.291598876411469e-06, "loss": 0.0206, "step": 182580 }, { "epoch": 0.7618437632999808, "grad_norm": 0.38616785798019415, "learning_rate": 2.2915674988733865e-06, "loss": 0.0195, "step": 182585 }, { "epoch": 0.7618646260149711, "grad_norm": 0.4347795782745337, "learning_rate": 2.2915361226241726e-06, "loss": 0.0181, "step": 182590 }, { "epoch": 0.7618854887299613, "grad_norm": 1.0796588091603394, "learning_rate": 2.2915047476637384e-06, "loss": 0.024, "step": 182595 }, { "epoch": 0.7619063514449517, "grad_norm": 0.4770923745337737, "learning_rate": 2.2914733739919957e-06, "loss": 0.0151, "step": 182600 }, { "epoch": 0.7619272141599419, "grad_norm": 0.38466297058337706, "learning_rate": 2.291442001608856e-06, "loss": 0.0224, "step": 182605 }, { "epoch": 0.7619480768749322, "grad_norm": 0.665085401380504, "learning_rate": 2.291410630514233e-06, "loss": 0.0195, "step": 182610 }, { "epoch": 0.7619689395899225, "grad_norm": 0.7813588825583164, "learning_rate": 2.291379260708036e-06, "loss": 0.031, "step": 182615 }, { "epoch": 0.7619898023049128, "grad_norm": 0.767565173384331, "learning_rate": 2.291347892190178e-06, "loss": 0.02, "step": 182620 }, { "epoch": 0.762010665019903, "grad_norm": 0.4634788504881838, "learning_rate": 2.291316524960571e-06, "loss": 0.0156, "step": 182625 }, { "epoch": 0.7620315277348934, "grad_norm": 0.8392301064568805, "learning_rate": 2.291285159019126e-06, "loss": 0.0215, "step": 182630 }, { "epoch": 0.7620523904498836, "grad_norm": 0.5111255237936813, "learning_rate": 2.291253794365756e-06, "loss": 0.0197, "step": 182635 }, { "epoch": 0.7620732531648738, "grad_norm": 1.5069439769308048, "learning_rate": 2.2912224310003718e-06, "loss": 0.0237, "step": 182640 }, { "epoch": 0.7620941158798641, "grad_norm": 0.5587967959755504, "learning_rate": 2.291191068922886e-06, "loss": 0.0207, "step": 182645 }, { "epoch": 0.7621149785948544, "grad_norm": 0.4949531489576312, "learning_rate": 2.291159708133209e-06, "loss": 0.0193, "step": 182650 }, { "epoch": 0.7621358413098447, "grad_norm": 2.0096749315285196, "learning_rate": 2.2911283486312554e-06, "loss": 0.0301, "step": 182655 }, { "epoch": 0.7621567040248349, "grad_norm": 0.5106565783352163, "learning_rate": 2.291096990416934e-06, "loss": 0.0238, "step": 182660 }, { "epoch": 0.7621775667398253, "grad_norm": 0.6356163608570011, "learning_rate": 2.2910656334901595e-06, "loss": 0.0226, "step": 182665 }, { "epoch": 0.7621984294548155, "grad_norm": 1.2150167976027983, "learning_rate": 2.291034277850841e-06, "loss": 0.0313, "step": 182670 }, { "epoch": 0.7622192921698058, "grad_norm": 0.24082453359684908, "learning_rate": 2.291002923498893e-06, "loss": 0.0204, "step": 182675 }, { "epoch": 0.7622401548847961, "grad_norm": 1.435109133275245, "learning_rate": 2.2909715704342257e-06, "loss": 0.0295, "step": 182680 }, { "epoch": 0.7622610175997864, "grad_norm": 0.7244029877965448, "learning_rate": 2.2909402186567516e-06, "loss": 0.0292, "step": 182685 }, { "epoch": 0.7622818803147766, "grad_norm": 0.8406944192880055, "learning_rate": 2.2909088681663826e-06, "loss": 0.0182, "step": 182690 }, { "epoch": 0.762302743029767, "grad_norm": 0.6459646781063112, "learning_rate": 2.2908775189630302e-06, "loss": 0.0167, "step": 182695 }, { "epoch": 0.7623236057447572, "grad_norm": 0.61676038547262, "learning_rate": 2.2908461710466076e-06, "loss": 0.0279, "step": 182700 }, { "epoch": 0.7623444684597475, "grad_norm": 0.8087868062592148, "learning_rate": 2.2908148244170254e-06, "loss": 0.022, "step": 182705 }, { "epoch": 0.7623653311747377, "grad_norm": 0.44970159403724885, "learning_rate": 2.2907834790741963e-06, "loss": 0.019, "step": 182710 }, { "epoch": 0.7623861938897281, "grad_norm": 1.2471536293529009, "learning_rate": 2.290752135018032e-06, "loss": 0.0262, "step": 182715 }, { "epoch": 0.7624070566047183, "grad_norm": 0.4456976338382063, "learning_rate": 2.290720792248444e-06, "loss": 0.0197, "step": 182720 }, { "epoch": 0.7624279193197085, "grad_norm": 0.8232897790775638, "learning_rate": 2.2906894507653455e-06, "loss": 0.0244, "step": 182725 }, { "epoch": 0.7624487820346989, "grad_norm": 0.8217879129245895, "learning_rate": 2.2906581105686478e-06, "loss": 0.0258, "step": 182730 }, { "epoch": 0.7624696447496891, "grad_norm": 0.1997197874602807, "learning_rate": 2.2906267716582627e-06, "loss": 0.0157, "step": 182735 }, { "epoch": 0.7624905074646794, "grad_norm": 0.6755875810008977, "learning_rate": 2.2905954340341014e-06, "loss": 0.019, "step": 182740 }, { "epoch": 0.7625113701796697, "grad_norm": 0.5737598931114667, "learning_rate": 2.2905640976960784e-06, "loss": 0.0251, "step": 182745 }, { "epoch": 0.76253223289466, "grad_norm": 0.34130147581465525, "learning_rate": 2.2905327626441033e-06, "loss": 0.0211, "step": 182750 }, { "epoch": 0.7625530956096502, "grad_norm": 0.5828039892246862, "learning_rate": 2.29050142887809e-06, "loss": 0.0199, "step": 182755 }, { "epoch": 0.7625739583246406, "grad_norm": 0.2458825734763722, "learning_rate": 2.290470096397949e-06, "loss": 0.0145, "step": 182760 }, { "epoch": 0.7625948210396308, "grad_norm": 0.8438871467736346, "learning_rate": 2.2904387652035933e-06, "loss": 0.0244, "step": 182765 }, { "epoch": 0.7626156837546211, "grad_norm": 0.8212812504653975, "learning_rate": 2.290407435294934e-06, "loss": 0.0181, "step": 182770 }, { "epoch": 0.7626365464696113, "grad_norm": 0.3314816490703962, "learning_rate": 2.290376106671885e-06, "loss": 0.0161, "step": 182775 }, { "epoch": 0.7626574091846017, "grad_norm": 0.9715238091840616, "learning_rate": 2.290344779334356e-06, "loss": 0.0211, "step": 182780 }, { "epoch": 0.7626782718995919, "grad_norm": 0.5060956773294117, "learning_rate": 2.290313453282261e-06, "loss": 0.0173, "step": 182785 }, { "epoch": 0.7626991346145822, "grad_norm": 0.8126195104810863, "learning_rate": 2.2902821285155113e-06, "loss": 0.0222, "step": 182790 }, { "epoch": 0.7627199973295725, "grad_norm": 0.45267800684234816, "learning_rate": 2.290250805034019e-06, "loss": 0.019, "step": 182795 }, { "epoch": 0.7627408600445628, "grad_norm": 0.8123182655305402, "learning_rate": 2.290219482837696e-06, "loss": 0.0286, "step": 182800 }, { "epoch": 0.762761722759553, "grad_norm": 0.7186575166116175, "learning_rate": 2.2901881619264553e-06, "loss": 0.0197, "step": 182805 }, { "epoch": 0.7627825854745434, "grad_norm": 1.0701588310112873, "learning_rate": 2.2901568423002076e-06, "loss": 0.0477, "step": 182810 }, { "epoch": 0.7628034481895336, "grad_norm": 0.32624645604758123, "learning_rate": 2.2901255239588665e-06, "loss": 0.0137, "step": 182815 }, { "epoch": 0.7628243109045239, "grad_norm": 0.4023630732945313, "learning_rate": 2.2900942069023436e-06, "loss": 0.0247, "step": 182820 }, { "epoch": 0.7628451736195141, "grad_norm": 0.5672060406438375, "learning_rate": 2.2900628911305507e-06, "loss": 0.023, "step": 182825 }, { "epoch": 0.7628660363345044, "grad_norm": 0.31511288856352976, "learning_rate": 2.2900315766434e-06, "loss": 0.0138, "step": 182830 }, { "epoch": 0.7628868990494947, "grad_norm": 0.5377949940513932, "learning_rate": 2.290000263440805e-06, "loss": 0.0223, "step": 182835 }, { "epoch": 0.7629077617644849, "grad_norm": 0.9359298322015228, "learning_rate": 2.289968951522676e-06, "loss": 0.0233, "step": 182840 }, { "epoch": 0.7629286244794753, "grad_norm": 0.7712565087980233, "learning_rate": 2.2899376408889264e-06, "loss": 0.0181, "step": 182845 }, { "epoch": 0.7629494871944655, "grad_norm": 1.5829786753248078, "learning_rate": 2.2899063315394673e-06, "loss": 0.0241, "step": 182850 }, { "epoch": 0.7629703499094558, "grad_norm": 0.5917997129168179, "learning_rate": 2.289875023474212e-06, "loss": 0.0194, "step": 182855 }, { "epoch": 0.7629912126244461, "grad_norm": 0.9945668735660496, "learning_rate": 2.2898437166930723e-06, "loss": 0.0256, "step": 182860 }, { "epoch": 0.7630120753394364, "grad_norm": 0.49216871341644675, "learning_rate": 2.2898124111959605e-06, "loss": 0.0176, "step": 182865 }, { "epoch": 0.7630329380544266, "grad_norm": 0.6893600584607393, "learning_rate": 2.2897811069827886e-06, "loss": 0.0232, "step": 182870 }, { "epoch": 0.763053800769417, "grad_norm": 0.46702289917463086, "learning_rate": 2.289749804053469e-06, "loss": 0.0262, "step": 182875 }, { "epoch": 0.7630746634844072, "grad_norm": 0.5702017627579274, "learning_rate": 2.289718502407914e-06, "loss": 0.0211, "step": 182880 }, { "epoch": 0.7630955261993975, "grad_norm": 0.3680236543113992, "learning_rate": 2.2896872020460356e-06, "loss": 0.0169, "step": 182885 }, { "epoch": 0.7631163889143877, "grad_norm": 0.6233173938382381, "learning_rate": 2.2896559029677464e-06, "loss": 0.0195, "step": 182890 }, { "epoch": 0.7631372516293781, "grad_norm": 0.8479477166398023, "learning_rate": 2.289624605172959e-06, "loss": 0.0206, "step": 182895 }, { "epoch": 0.7631581143443683, "grad_norm": 0.7113602418490853, "learning_rate": 2.2895933086615848e-06, "loss": 0.0276, "step": 182900 }, { "epoch": 0.7631789770593586, "grad_norm": 0.3405955347799924, "learning_rate": 2.2895620134335363e-06, "loss": 0.0159, "step": 182905 }, { "epoch": 0.7631998397743489, "grad_norm": 0.7402582265316354, "learning_rate": 2.2895307194887257e-06, "loss": 0.0247, "step": 182910 }, { "epoch": 0.7632207024893392, "grad_norm": 0.5986508810731995, "learning_rate": 2.2894994268270664e-06, "loss": 0.024, "step": 182915 }, { "epoch": 0.7632415652043294, "grad_norm": 0.7660026122509341, "learning_rate": 2.2894681354484696e-06, "loss": 0.0216, "step": 182920 }, { "epoch": 0.7632624279193198, "grad_norm": 0.85724011097704, "learning_rate": 2.289436845352848e-06, "loss": 0.0211, "step": 182925 }, { "epoch": 0.76328329063431, "grad_norm": 0.805633067320839, "learning_rate": 2.2894055565401134e-06, "loss": 0.0211, "step": 182930 }, { "epoch": 0.7633041533493002, "grad_norm": 0.8288000991391073, "learning_rate": 2.289374269010179e-06, "loss": 0.0242, "step": 182935 }, { "epoch": 0.7633250160642906, "grad_norm": 0.6797705077481159, "learning_rate": 2.2893429827629566e-06, "loss": 0.018, "step": 182940 }, { "epoch": 0.7633458787792808, "grad_norm": 0.8158565559773863, "learning_rate": 2.289311697798359e-06, "loss": 0.0261, "step": 182945 }, { "epoch": 0.7633667414942711, "grad_norm": 0.5279519922079196, "learning_rate": 2.289280414116298e-06, "loss": 0.0163, "step": 182950 }, { "epoch": 0.7633876042092613, "grad_norm": 0.8195038860704862, "learning_rate": 2.289249131716686e-06, "loss": 0.0209, "step": 182955 }, { "epoch": 0.7634084669242517, "grad_norm": 0.5281573918091126, "learning_rate": 2.2892178505994365e-06, "loss": 0.017, "step": 182960 }, { "epoch": 0.7634293296392419, "grad_norm": 0.6051964698230806, "learning_rate": 2.28918657076446e-06, "loss": 0.0271, "step": 182965 }, { "epoch": 0.7634501923542322, "grad_norm": 0.9226912565233533, "learning_rate": 2.2891552922116706e-06, "loss": 0.021, "step": 182970 }, { "epoch": 0.7634710550692225, "grad_norm": 0.3538659877547805, "learning_rate": 2.28912401494098e-06, "loss": 0.0198, "step": 182975 }, { "epoch": 0.7634919177842128, "grad_norm": 0.6911294457750508, "learning_rate": 2.2890927389523e-06, "loss": 0.0215, "step": 182980 }, { "epoch": 0.763512780499203, "grad_norm": 0.6197317975882413, "learning_rate": 2.289061464245544e-06, "loss": 0.0198, "step": 182985 }, { "epoch": 0.7635336432141934, "grad_norm": 0.8048953885821806, "learning_rate": 2.2890301908206246e-06, "loss": 0.0219, "step": 182990 }, { "epoch": 0.7635545059291836, "grad_norm": 0.6516984235283836, "learning_rate": 2.288998918677453e-06, "loss": 0.022, "step": 182995 }, { "epoch": 0.7635753686441739, "grad_norm": 0.6715399100302519, "learning_rate": 2.2889676478159423e-06, "loss": 0.0217, "step": 183000 }, { "epoch": 0.7635962313591641, "grad_norm": 0.6484243393156498, "learning_rate": 2.2889363782360057e-06, "loss": 0.0175, "step": 183005 }, { "epoch": 0.7636170940741545, "grad_norm": 0.4468388419348023, "learning_rate": 2.2889051099375545e-06, "loss": 0.0176, "step": 183010 }, { "epoch": 0.7636379567891447, "grad_norm": 0.4280524174550261, "learning_rate": 2.288873842920502e-06, "loss": 0.0194, "step": 183015 }, { "epoch": 0.763658819504135, "grad_norm": 0.3556619766401078, "learning_rate": 2.2888425771847597e-06, "loss": 0.0185, "step": 183020 }, { "epoch": 0.7636796822191253, "grad_norm": 0.437352429534977, "learning_rate": 2.2888113127302415e-06, "loss": 0.0213, "step": 183025 }, { "epoch": 0.7637005449341155, "grad_norm": 0.36243667630411985, "learning_rate": 2.288780049556859e-06, "loss": 0.0129, "step": 183030 }, { "epoch": 0.7637214076491058, "grad_norm": 0.6025078993574284, "learning_rate": 2.2887487876645247e-06, "loss": 0.0255, "step": 183035 }, { "epoch": 0.7637422703640961, "grad_norm": 0.48804788867202176, "learning_rate": 2.288717527053151e-06, "loss": 0.0232, "step": 183040 }, { "epoch": 0.7637631330790864, "grad_norm": 0.7721939654825204, "learning_rate": 2.288686267722651e-06, "loss": 0.0217, "step": 183045 }, { "epoch": 0.7637839957940766, "grad_norm": 0.6270663477544882, "learning_rate": 2.2886550096729367e-06, "loss": 0.024, "step": 183050 }, { "epoch": 0.763804858509067, "grad_norm": 0.9413610865015185, "learning_rate": 2.288623752903921e-06, "loss": 0.0147, "step": 183055 }, { "epoch": 0.7638257212240572, "grad_norm": 0.46399476671119405, "learning_rate": 2.2885924974155164e-06, "loss": 0.0217, "step": 183060 }, { "epoch": 0.7638465839390475, "grad_norm": 0.7059985487796107, "learning_rate": 2.288561243207635e-06, "loss": 0.0236, "step": 183065 }, { "epoch": 0.7638674466540377, "grad_norm": 0.43029989932405016, "learning_rate": 2.2885299902801895e-06, "loss": 0.028, "step": 183070 }, { "epoch": 0.7638883093690281, "grad_norm": 0.8586381474360263, "learning_rate": 2.2884987386330932e-06, "loss": 0.0212, "step": 183075 }, { "epoch": 0.7639091720840183, "grad_norm": 0.7998637086076132, "learning_rate": 2.288467488266258e-06, "loss": 0.0178, "step": 183080 }, { "epoch": 0.7639300347990086, "grad_norm": 0.3665610704286499, "learning_rate": 2.288436239179597e-06, "loss": 0.0148, "step": 183085 }, { "epoch": 0.7639508975139989, "grad_norm": 0.39577168697403964, "learning_rate": 2.2884049913730223e-06, "loss": 0.022, "step": 183090 }, { "epoch": 0.7639717602289892, "grad_norm": 0.7564927450265609, "learning_rate": 2.2883737448464464e-06, "loss": 0.0183, "step": 183095 }, { "epoch": 0.7639926229439794, "grad_norm": 0.6040514963721202, "learning_rate": 2.2883424995997826e-06, "loss": 0.0198, "step": 183100 }, { "epoch": 0.7640134856589698, "grad_norm": 0.5344554821603574, "learning_rate": 2.2883112556329424e-06, "loss": 0.014, "step": 183105 }, { "epoch": 0.76403434837396, "grad_norm": 0.5349803932390265, "learning_rate": 2.2882800129458394e-06, "loss": 0.0185, "step": 183110 }, { "epoch": 0.7640552110889502, "grad_norm": 1.0527532970971936, "learning_rate": 2.288248771538387e-06, "loss": 0.0256, "step": 183115 }, { "epoch": 0.7640760738039406, "grad_norm": 0.9365852090309374, "learning_rate": 2.2882175314104956e-06, "loss": 0.0174, "step": 183120 }, { "epoch": 0.7640969365189308, "grad_norm": 1.0940393936885124, "learning_rate": 2.2881862925620794e-06, "loss": 0.0197, "step": 183125 }, { "epoch": 0.7641177992339211, "grad_norm": 0.9664915149450523, "learning_rate": 2.288155054993051e-06, "loss": 0.0221, "step": 183130 }, { "epoch": 0.7641386619489113, "grad_norm": 0.38888295553056257, "learning_rate": 2.2881238187033223e-06, "loss": 0.0209, "step": 183135 }, { "epoch": 0.7641595246639017, "grad_norm": 0.805090057811001, "learning_rate": 2.2880925836928074e-06, "loss": 0.0233, "step": 183140 }, { "epoch": 0.7641803873788919, "grad_norm": 0.9610678605446398, "learning_rate": 2.2880613499614176e-06, "loss": 0.0236, "step": 183145 }, { "epoch": 0.7642012500938822, "grad_norm": 0.6875862302387887, "learning_rate": 2.2880301175090658e-06, "loss": 0.0283, "step": 183150 }, { "epoch": 0.7642221128088725, "grad_norm": 0.6652664063593453, "learning_rate": 2.287998886335665e-06, "loss": 0.0237, "step": 183155 }, { "epoch": 0.7642429755238628, "grad_norm": 0.38306794159769847, "learning_rate": 2.2879676564411283e-06, "loss": 0.0205, "step": 183160 }, { "epoch": 0.764263838238853, "grad_norm": 1.0681506773547698, "learning_rate": 2.287936427825368e-06, "loss": 0.0244, "step": 183165 }, { "epoch": 0.7642847009538434, "grad_norm": 1.0715733476205245, "learning_rate": 2.2879052004882967e-06, "loss": 0.0253, "step": 183170 }, { "epoch": 0.7643055636688336, "grad_norm": 0.6952602810798977, "learning_rate": 2.2878739744298277e-06, "loss": 0.021, "step": 183175 }, { "epoch": 0.7643264263838239, "grad_norm": 0.7635297842372893, "learning_rate": 2.287842749649873e-06, "loss": 0.024, "step": 183180 }, { "epoch": 0.7643472890988141, "grad_norm": 0.4872280731193308, "learning_rate": 2.2878115261483456e-06, "loss": 0.0173, "step": 183185 }, { "epoch": 0.7643681518138045, "grad_norm": 0.979804643837162, "learning_rate": 2.2877803039251583e-06, "loss": 0.0263, "step": 183190 }, { "epoch": 0.7643890145287947, "grad_norm": 0.9796433339431533, "learning_rate": 2.2877490829802247e-06, "loss": 0.0189, "step": 183195 }, { "epoch": 0.764409877243785, "grad_norm": 0.7349190644361059, "learning_rate": 2.2877178633134562e-06, "loss": 0.0205, "step": 183200 }, { "epoch": 0.7644307399587753, "grad_norm": 0.9063065439039403, "learning_rate": 2.287686644924766e-06, "loss": 0.0204, "step": 183205 }, { "epoch": 0.7644516026737656, "grad_norm": 0.3712037729296938, "learning_rate": 2.2876554278140676e-06, "loss": 0.0168, "step": 183210 }, { "epoch": 0.7644724653887558, "grad_norm": 0.5925193279256387, "learning_rate": 2.287624211981273e-06, "loss": 0.0198, "step": 183215 }, { "epoch": 0.7644933281037462, "grad_norm": 0.3768583695552152, "learning_rate": 2.2875929974262954e-06, "loss": 0.0214, "step": 183220 }, { "epoch": 0.7645141908187364, "grad_norm": 0.6964993922428637, "learning_rate": 2.287561784149048e-06, "loss": 0.0222, "step": 183225 }, { "epoch": 0.7645350535337266, "grad_norm": 0.9087504608493745, "learning_rate": 2.287530572149443e-06, "loss": 0.0173, "step": 183230 }, { "epoch": 0.764555916248717, "grad_norm": 0.49830087160880443, "learning_rate": 2.287499361427393e-06, "loss": 0.0149, "step": 183235 }, { "epoch": 0.7645767789637072, "grad_norm": 1.0215886009442816, "learning_rate": 2.2874681519828122e-06, "loss": 0.0234, "step": 183240 }, { "epoch": 0.7645976416786975, "grad_norm": 0.6462436642154772, "learning_rate": 2.2874369438156116e-06, "loss": 0.0237, "step": 183245 }, { "epoch": 0.7646185043936877, "grad_norm": 0.7253430484524224, "learning_rate": 2.2874057369257053e-06, "loss": 0.0236, "step": 183250 }, { "epoch": 0.7646393671086781, "grad_norm": 0.8660299910372633, "learning_rate": 2.287374531313006e-06, "loss": 0.0236, "step": 183255 }, { "epoch": 0.7646602298236683, "grad_norm": 0.8915125601228336, "learning_rate": 2.287343326977427e-06, "loss": 0.0246, "step": 183260 }, { "epoch": 0.7646810925386586, "grad_norm": 1.0068249390400064, "learning_rate": 2.2873121239188797e-06, "loss": 0.0257, "step": 183265 }, { "epoch": 0.7647019552536489, "grad_norm": 0.510618521928046, "learning_rate": 2.2872809221372788e-06, "loss": 0.0174, "step": 183270 }, { "epoch": 0.7647228179686392, "grad_norm": 0.5688868749999009, "learning_rate": 2.2872497216325356e-06, "loss": 0.017, "step": 183275 }, { "epoch": 0.7647436806836294, "grad_norm": 0.2613907443561891, "learning_rate": 2.287218522404564e-06, "loss": 0.0157, "step": 183280 }, { "epoch": 0.7647645433986198, "grad_norm": 0.8012328091814254, "learning_rate": 2.287187324453277e-06, "loss": 0.0291, "step": 183285 }, { "epoch": 0.76478540611361, "grad_norm": 0.37195899547268507, "learning_rate": 2.2871561277785873e-06, "loss": 0.0217, "step": 183290 }, { "epoch": 0.7648062688286003, "grad_norm": 0.6854079594465672, "learning_rate": 2.2871249323804075e-06, "loss": 0.0176, "step": 183295 }, { "epoch": 0.7648271315435906, "grad_norm": 0.5171269152675341, "learning_rate": 2.2870937382586513e-06, "loss": 0.0212, "step": 183300 }, { "epoch": 0.7648479942585809, "grad_norm": 0.4133251591855046, "learning_rate": 2.28706254541323e-06, "loss": 0.0183, "step": 183305 }, { "epoch": 0.7648688569735711, "grad_norm": 0.41521933317688936, "learning_rate": 2.2870313538440586e-06, "loss": 0.0203, "step": 183310 }, { "epoch": 0.7648897196885613, "grad_norm": 0.6196677413945396, "learning_rate": 2.2870001635510493e-06, "loss": 0.0183, "step": 183315 }, { "epoch": 0.7649105824035517, "grad_norm": 0.4976893540258725, "learning_rate": 2.2869689745341146e-06, "loss": 0.0188, "step": 183320 }, { "epoch": 0.7649314451185419, "grad_norm": 0.8772932050448001, "learning_rate": 2.286937786793168e-06, "loss": 0.0253, "step": 183325 }, { "epoch": 0.7649523078335322, "grad_norm": 0.8048702763826036, "learning_rate": 2.2869066003281227e-06, "loss": 0.0259, "step": 183330 }, { "epoch": 0.7649731705485225, "grad_norm": 0.49472396248292094, "learning_rate": 2.2868754151388913e-06, "loss": 0.0216, "step": 183335 }, { "epoch": 0.7649940332635128, "grad_norm": 0.34562862028862235, "learning_rate": 2.2868442312253865e-06, "loss": 0.0213, "step": 183340 }, { "epoch": 0.765014895978503, "grad_norm": 0.5887429619878319, "learning_rate": 2.286813048587522e-06, "loss": 0.0293, "step": 183345 }, { "epoch": 0.7650357586934934, "grad_norm": 0.47484713522778976, "learning_rate": 2.2867818672252106e-06, "loss": 0.0231, "step": 183350 }, { "epoch": 0.7650566214084836, "grad_norm": 0.6545768754047627, "learning_rate": 2.2867506871383656e-06, "loss": 0.0266, "step": 183355 }, { "epoch": 0.7650774841234739, "grad_norm": 0.507556453265861, "learning_rate": 2.2867195083268993e-06, "loss": 0.0145, "step": 183360 }, { "epoch": 0.7650983468384641, "grad_norm": 0.5388745745150438, "learning_rate": 2.2866883307907255e-06, "loss": 0.0304, "step": 183365 }, { "epoch": 0.7651192095534545, "grad_norm": 0.6604996273977215, "learning_rate": 2.2866571545297563e-06, "loss": 0.0252, "step": 183370 }, { "epoch": 0.7651400722684447, "grad_norm": 0.7071355073546307, "learning_rate": 2.2866259795439063e-06, "loss": 0.023, "step": 183375 }, { "epoch": 0.765160934983435, "grad_norm": 0.8390374706648215, "learning_rate": 2.2865948058330873e-06, "loss": 0.0184, "step": 183380 }, { "epoch": 0.7651817976984253, "grad_norm": 0.7369998327088613, "learning_rate": 2.286563633397213e-06, "loss": 0.0172, "step": 183385 }, { "epoch": 0.7652026604134156, "grad_norm": 0.6592247246052637, "learning_rate": 2.2865324622361963e-06, "loss": 0.0133, "step": 183390 }, { "epoch": 0.7652235231284058, "grad_norm": 1.1877432180158436, "learning_rate": 2.28650129234995e-06, "loss": 0.0215, "step": 183395 }, { "epoch": 0.7652443858433962, "grad_norm": 0.897289886828147, "learning_rate": 2.2864701237383876e-06, "loss": 0.0249, "step": 183400 }, { "epoch": 0.7652652485583864, "grad_norm": 0.9864714663624076, "learning_rate": 2.2864389564014226e-06, "loss": 0.0234, "step": 183405 }, { "epoch": 0.7652861112733766, "grad_norm": 0.7489065365237173, "learning_rate": 2.286407790338967e-06, "loss": 0.0241, "step": 183410 }, { "epoch": 0.765306973988367, "grad_norm": 0.5422555015133466, "learning_rate": 2.2863766255509353e-06, "loss": 0.0254, "step": 183415 }, { "epoch": 0.7653278367033572, "grad_norm": 1.1079260429751747, "learning_rate": 2.28634546203724e-06, "loss": 0.0196, "step": 183420 }, { "epoch": 0.7653486994183475, "grad_norm": 1.1213529033515826, "learning_rate": 2.2863142997977936e-06, "loss": 0.0256, "step": 183425 }, { "epoch": 0.7653695621333377, "grad_norm": 0.6501321823372325, "learning_rate": 2.2862831388325098e-06, "loss": 0.0337, "step": 183430 }, { "epoch": 0.7653904248483281, "grad_norm": 0.9426299274067111, "learning_rate": 2.2862519791413025e-06, "loss": 0.028, "step": 183435 }, { "epoch": 0.7654112875633183, "grad_norm": 0.38380276822683473, "learning_rate": 2.2862208207240844e-06, "loss": 0.0175, "step": 183440 }, { "epoch": 0.7654321502783086, "grad_norm": 0.6119392338863379, "learning_rate": 2.286189663580768e-06, "loss": 0.0233, "step": 183445 }, { "epoch": 0.7654530129932989, "grad_norm": 0.5132560840495826, "learning_rate": 2.2861585077112673e-06, "loss": 0.0207, "step": 183450 }, { "epoch": 0.7654738757082892, "grad_norm": 1.0459410624693988, "learning_rate": 2.2861273531154947e-06, "loss": 0.0271, "step": 183455 }, { "epoch": 0.7654947384232794, "grad_norm": 0.48961962793499975, "learning_rate": 2.2860961997933644e-06, "loss": 0.0204, "step": 183460 }, { "epoch": 0.7655156011382698, "grad_norm": 0.55658557319716, "learning_rate": 2.2860650477447893e-06, "loss": 0.0207, "step": 183465 }, { "epoch": 0.76553646385326, "grad_norm": 0.9175605776279706, "learning_rate": 2.2860338969696824e-06, "loss": 0.0217, "step": 183470 }, { "epoch": 0.7655573265682503, "grad_norm": 0.41584686158258233, "learning_rate": 2.286002747467957e-06, "loss": 0.0125, "step": 183475 }, { "epoch": 0.7655781892832406, "grad_norm": 0.7882926350204302, "learning_rate": 2.2859715992395263e-06, "loss": 0.0226, "step": 183480 }, { "epoch": 0.7655990519982309, "grad_norm": 0.41131750081972546, "learning_rate": 2.285940452284304e-06, "loss": 0.0209, "step": 183485 }, { "epoch": 0.7656199147132211, "grad_norm": 0.9207628583151415, "learning_rate": 2.2859093066022024e-06, "loss": 0.0208, "step": 183490 }, { "epoch": 0.7656407774282113, "grad_norm": 0.4061341570917484, "learning_rate": 2.285878162193136e-06, "loss": 0.018, "step": 183495 }, { "epoch": 0.7656616401432017, "grad_norm": 0.49521922664769197, "learning_rate": 2.2858470190570174e-06, "loss": 0.0163, "step": 183500 }, { "epoch": 0.765682502858192, "grad_norm": 0.8072999878744472, "learning_rate": 2.2858158771937597e-06, "loss": 0.0308, "step": 183505 }, { "epoch": 0.7657033655731822, "grad_norm": 0.4531814585525676, "learning_rate": 2.2857847366032767e-06, "loss": 0.0166, "step": 183510 }, { "epoch": 0.7657242282881725, "grad_norm": 0.7407028143663793, "learning_rate": 2.2857535972854805e-06, "loss": 0.0231, "step": 183515 }, { "epoch": 0.7657450910031628, "grad_norm": 1.231450181299549, "learning_rate": 2.2857224592402866e-06, "loss": 0.023, "step": 183520 }, { "epoch": 0.765765953718153, "grad_norm": 0.4636967874777333, "learning_rate": 2.2856913224676063e-06, "loss": 0.0166, "step": 183525 }, { "epoch": 0.7657868164331434, "grad_norm": 0.6459084992841708, "learning_rate": 2.2856601869673537e-06, "loss": 0.0261, "step": 183530 }, { "epoch": 0.7658076791481336, "grad_norm": 0.32766530258223264, "learning_rate": 2.285629052739443e-06, "loss": 0.0173, "step": 183535 }, { "epoch": 0.7658285418631239, "grad_norm": 0.6145923665554204, "learning_rate": 2.285597919783786e-06, "loss": 0.0154, "step": 183540 }, { "epoch": 0.7658494045781141, "grad_norm": 0.4388219492423578, "learning_rate": 2.2855667881002967e-06, "loss": 0.0186, "step": 183545 }, { "epoch": 0.7658702672931045, "grad_norm": 0.6249244600600734, "learning_rate": 2.2855356576888885e-06, "loss": 0.0201, "step": 183550 }, { "epoch": 0.7658911300080947, "grad_norm": 0.8072006165735378, "learning_rate": 2.2855045285494752e-06, "loss": 0.0145, "step": 183555 }, { "epoch": 0.765911992723085, "grad_norm": 0.7606989380478173, "learning_rate": 2.2854734006819696e-06, "loss": 0.0256, "step": 183560 }, { "epoch": 0.7659328554380753, "grad_norm": 1.253108409044967, "learning_rate": 2.2854422740862847e-06, "loss": 0.0508, "step": 183565 }, { "epoch": 0.7659537181530656, "grad_norm": 0.646911679181007, "learning_rate": 2.285411148762335e-06, "loss": 0.022, "step": 183570 }, { "epoch": 0.7659745808680558, "grad_norm": 1.9227059104474926, "learning_rate": 2.285380024710034e-06, "loss": 0.0226, "step": 183575 }, { "epoch": 0.7659954435830462, "grad_norm": 0.9644850770055232, "learning_rate": 2.285348901929293e-06, "loss": 0.0234, "step": 183580 }, { "epoch": 0.7660163062980364, "grad_norm": 0.5061993284337479, "learning_rate": 2.285317780420028e-06, "loss": 0.0179, "step": 183585 }, { "epoch": 0.7660371690130267, "grad_norm": 0.6119988338573761, "learning_rate": 2.285286660182151e-06, "loss": 0.0262, "step": 183590 }, { "epoch": 0.766058031728017, "grad_norm": 0.37994126582038806, "learning_rate": 2.2852555412155753e-06, "loss": 0.0175, "step": 183595 }, { "epoch": 0.7660788944430073, "grad_norm": 0.6601416900327934, "learning_rate": 2.285224423520215e-06, "loss": 0.0231, "step": 183600 }, { "epoch": 0.7660997571579975, "grad_norm": 0.8598292291825183, "learning_rate": 2.2851933070959838e-06, "loss": 0.0224, "step": 183605 }, { "epoch": 0.7661206198729877, "grad_norm": 0.9291040684448016, "learning_rate": 2.2851621919427946e-06, "loss": 0.0196, "step": 183610 }, { "epoch": 0.7661414825879781, "grad_norm": 0.7169952870734735, "learning_rate": 2.285131078060561e-06, "loss": 0.0162, "step": 183615 }, { "epoch": 0.7661623453029683, "grad_norm": 0.4148577780733085, "learning_rate": 2.285099965449196e-06, "loss": 0.0219, "step": 183620 }, { "epoch": 0.7661832080179586, "grad_norm": 0.5047481601333617, "learning_rate": 2.285068854108614e-06, "loss": 0.0154, "step": 183625 }, { "epoch": 0.7662040707329489, "grad_norm": 0.47321817021945445, "learning_rate": 2.2850377440387275e-06, "loss": 0.024, "step": 183630 }, { "epoch": 0.7662249334479392, "grad_norm": 0.836294560343461, "learning_rate": 2.285006635239451e-06, "loss": 0.0155, "step": 183635 }, { "epoch": 0.7662457961629294, "grad_norm": 0.8600995233911696, "learning_rate": 2.2849755277106975e-06, "loss": 0.022, "step": 183640 }, { "epoch": 0.7662666588779198, "grad_norm": 0.44251130735419647, "learning_rate": 2.2849444214523804e-06, "loss": 0.0234, "step": 183645 }, { "epoch": 0.76628752159291, "grad_norm": 0.6016326428629416, "learning_rate": 2.2849133164644137e-06, "loss": 0.0168, "step": 183650 }, { "epoch": 0.7663083843079003, "grad_norm": 0.7315858990885282, "learning_rate": 2.2848822127467106e-06, "loss": 0.022, "step": 183655 }, { "epoch": 0.7663292470228906, "grad_norm": 1.3125999151479113, "learning_rate": 2.284851110299184e-06, "loss": 0.0239, "step": 183660 }, { "epoch": 0.7663501097378809, "grad_norm": 0.5606649615376621, "learning_rate": 2.284820009121749e-06, "loss": 0.019, "step": 183665 }, { "epoch": 0.7663709724528711, "grad_norm": 0.9058392210140112, "learning_rate": 2.2847889092143176e-06, "loss": 0.0197, "step": 183670 }, { "epoch": 0.7663918351678614, "grad_norm": 0.5386273447169321, "learning_rate": 2.2847578105768046e-06, "loss": 0.0277, "step": 183675 }, { "epoch": 0.7664126978828517, "grad_norm": 0.35875588738492137, "learning_rate": 2.284726713209123e-06, "loss": 0.0169, "step": 183680 }, { "epoch": 0.766433560597842, "grad_norm": 0.5226705335121702, "learning_rate": 2.2846956171111866e-06, "loss": 0.0154, "step": 183685 }, { "epoch": 0.7664544233128322, "grad_norm": 0.6050321251576718, "learning_rate": 2.2846645222829083e-06, "loss": 0.02, "step": 183690 }, { "epoch": 0.7664752860278226, "grad_norm": 0.7276254820933914, "learning_rate": 2.2846334287242024e-06, "loss": 0.0225, "step": 183695 }, { "epoch": 0.7664961487428128, "grad_norm": 1.0253757933518075, "learning_rate": 2.284602336434982e-06, "loss": 0.0201, "step": 183700 }, { "epoch": 0.766517011457803, "grad_norm": 1.047544228929567, "learning_rate": 2.2845712454151612e-06, "loss": 0.0199, "step": 183705 }, { "epoch": 0.7665378741727934, "grad_norm": 0.4972971813406613, "learning_rate": 2.284540155664654e-06, "loss": 0.0176, "step": 183710 }, { "epoch": 0.7665587368877836, "grad_norm": 0.5047625886215888, "learning_rate": 2.284509067183373e-06, "loss": 0.0186, "step": 183715 }, { "epoch": 0.7665795996027739, "grad_norm": 0.8401483634225654, "learning_rate": 2.2844779799712323e-06, "loss": 0.0263, "step": 183720 }, { "epoch": 0.7666004623177641, "grad_norm": 0.6387262075485195, "learning_rate": 2.2844468940281462e-06, "loss": 0.0237, "step": 183725 }, { "epoch": 0.7666213250327545, "grad_norm": 0.5117098167545591, "learning_rate": 2.2844158093540273e-06, "loss": 0.025, "step": 183730 }, { "epoch": 0.7666421877477447, "grad_norm": 0.7008757866954846, "learning_rate": 2.28438472594879e-06, "loss": 0.0167, "step": 183735 }, { "epoch": 0.766663050462735, "grad_norm": 0.6272767731035137, "learning_rate": 2.284353643812347e-06, "loss": 0.0161, "step": 183740 }, { "epoch": 0.7666839131777253, "grad_norm": 1.1378470746340181, "learning_rate": 2.2843225629446135e-06, "loss": 0.0211, "step": 183745 }, { "epoch": 0.7667047758927156, "grad_norm": 0.3637422140689042, "learning_rate": 2.284291483345502e-06, "loss": 0.0185, "step": 183750 }, { "epoch": 0.7667256386077058, "grad_norm": 0.4555253211390746, "learning_rate": 2.2842604050149266e-06, "loss": 0.0211, "step": 183755 }, { "epoch": 0.7667465013226962, "grad_norm": 0.2107213815107993, "learning_rate": 2.284229327952802e-06, "loss": 0.0197, "step": 183760 }, { "epoch": 0.7667673640376864, "grad_norm": 0.38544433797483796, "learning_rate": 2.284198252159039e-06, "loss": 0.0209, "step": 183765 }, { "epoch": 0.7667882267526767, "grad_norm": 0.6224107071468158, "learning_rate": 2.2841671776335543e-06, "loss": 0.0317, "step": 183770 }, { "epoch": 0.766809089467667, "grad_norm": 1.050666735471282, "learning_rate": 2.2841361043762605e-06, "loss": 0.0193, "step": 183775 }, { "epoch": 0.7668299521826573, "grad_norm": 0.8123448995972028, "learning_rate": 2.2841050323870715e-06, "loss": 0.0191, "step": 183780 }, { "epoch": 0.7668508148976475, "grad_norm": 0.6273135585077032, "learning_rate": 2.2840739616659006e-06, "loss": 0.0173, "step": 183785 }, { "epoch": 0.7668716776126377, "grad_norm": 1.027231938078131, "learning_rate": 2.284042892212662e-06, "loss": 0.0243, "step": 183790 }, { "epoch": 0.7668925403276281, "grad_norm": 0.6512424976087717, "learning_rate": 2.28401182402727e-06, "loss": 0.0313, "step": 183795 }, { "epoch": 0.7669134030426183, "grad_norm": 0.7448246583657502, "learning_rate": 2.2839807571096374e-06, "loss": 0.0232, "step": 183800 }, { "epoch": 0.7669342657576086, "grad_norm": 0.6838268422925547, "learning_rate": 2.283949691459678e-06, "loss": 0.028, "step": 183805 }, { "epoch": 0.766955128472599, "grad_norm": 0.46800801652478213, "learning_rate": 2.283918627077306e-06, "loss": 0.0201, "step": 183810 }, { "epoch": 0.7669759911875892, "grad_norm": 0.650215540402243, "learning_rate": 2.2838875639624354e-06, "loss": 0.0186, "step": 183815 }, { "epoch": 0.7669968539025794, "grad_norm": 0.510163538288624, "learning_rate": 2.2838565021149795e-06, "loss": 0.0245, "step": 183820 }, { "epoch": 0.7670177166175698, "grad_norm": 0.8967494982402999, "learning_rate": 2.283825441534852e-06, "loss": 0.0235, "step": 183825 }, { "epoch": 0.76703857933256, "grad_norm": 0.6129039742782801, "learning_rate": 2.283794382221968e-06, "loss": 0.019, "step": 183830 }, { "epoch": 0.7670594420475503, "grad_norm": 0.9473484362329746, "learning_rate": 2.2837633241762396e-06, "loss": 0.0197, "step": 183835 }, { "epoch": 0.7670803047625406, "grad_norm": 0.43223952233775553, "learning_rate": 2.283732267397582e-06, "loss": 0.0247, "step": 183840 }, { "epoch": 0.7671011674775309, "grad_norm": 0.3787248283976709, "learning_rate": 2.283701211885908e-06, "loss": 0.0182, "step": 183845 }, { "epoch": 0.7671220301925211, "grad_norm": 0.7945364456437058, "learning_rate": 2.2836701576411327e-06, "loss": 0.0141, "step": 183850 }, { "epoch": 0.7671428929075114, "grad_norm": 0.4597475013022425, "learning_rate": 2.2836391046631684e-06, "loss": 0.0212, "step": 183855 }, { "epoch": 0.7671637556225017, "grad_norm": 0.7700723780880379, "learning_rate": 2.28360805295193e-06, "loss": 0.0283, "step": 183860 }, { "epoch": 0.767184618337492, "grad_norm": 0.3487942093215606, "learning_rate": 2.2835770025073315e-06, "loss": 0.0233, "step": 183865 }, { "epoch": 0.7672054810524822, "grad_norm": 0.7420694346785622, "learning_rate": 2.2835459533292866e-06, "loss": 0.0388, "step": 183870 }, { "epoch": 0.7672263437674726, "grad_norm": 0.7565518098502183, "learning_rate": 2.283514905417708e-06, "loss": 0.021, "step": 183875 }, { "epoch": 0.7672472064824628, "grad_norm": 0.824795619321765, "learning_rate": 2.2834838587725118e-06, "loss": 0.0162, "step": 183880 }, { "epoch": 0.767268069197453, "grad_norm": 0.5705804451031938, "learning_rate": 2.28345281339361e-06, "loss": 0.0212, "step": 183885 }, { "epoch": 0.7672889319124434, "grad_norm": 0.6927955976732175, "learning_rate": 2.2834217692809177e-06, "loss": 0.0195, "step": 183890 }, { "epoch": 0.7673097946274337, "grad_norm": 1.285959248272117, "learning_rate": 2.283390726434349e-06, "loss": 0.0163, "step": 183895 }, { "epoch": 0.7673306573424239, "grad_norm": 0.6736453724841195, "learning_rate": 2.283359684853816e-06, "loss": 0.0153, "step": 183900 }, { "epoch": 0.7673515200574141, "grad_norm": 0.7243837515653072, "learning_rate": 2.2833286445392343e-06, "loss": 0.019, "step": 183905 }, { "epoch": 0.7673723827724045, "grad_norm": 0.7743532521707371, "learning_rate": 2.2832976054905175e-06, "loss": 0.0221, "step": 183910 }, { "epoch": 0.7673932454873947, "grad_norm": 0.4392476403639534, "learning_rate": 2.28326656770758e-06, "loss": 0.0196, "step": 183915 }, { "epoch": 0.767414108202385, "grad_norm": 0.3166571979971135, "learning_rate": 2.2832355311903353e-06, "loss": 0.0155, "step": 183920 }, { "epoch": 0.7674349709173753, "grad_norm": 0.759209371083728, "learning_rate": 2.2832044959386966e-06, "loss": 0.0216, "step": 183925 }, { "epoch": 0.7674558336323656, "grad_norm": 1.6124300821185682, "learning_rate": 2.28317346195258e-06, "loss": 0.0293, "step": 183930 }, { "epoch": 0.7674766963473558, "grad_norm": 1.4231835891294717, "learning_rate": 2.2831424292318967e-06, "loss": 0.0262, "step": 183935 }, { "epoch": 0.7674975590623462, "grad_norm": 0.5703226441119628, "learning_rate": 2.283111397776563e-06, "loss": 0.0281, "step": 183940 }, { "epoch": 0.7675184217773364, "grad_norm": 1.0084617167137953, "learning_rate": 2.2830803675864914e-06, "loss": 0.0178, "step": 183945 }, { "epoch": 0.7675392844923267, "grad_norm": 0.7851437162599798, "learning_rate": 2.283049338661597e-06, "loss": 0.0227, "step": 183950 }, { "epoch": 0.767560147207317, "grad_norm": 0.6635982151502369, "learning_rate": 2.283018311001794e-06, "loss": 0.0213, "step": 183955 }, { "epoch": 0.7675810099223073, "grad_norm": 0.4955154939067579, "learning_rate": 2.2829872846069957e-06, "loss": 0.026, "step": 183960 }, { "epoch": 0.7676018726372975, "grad_norm": 0.6491518728808723, "learning_rate": 2.282956259477116e-06, "loss": 0.0257, "step": 183965 }, { "epoch": 0.7676227353522878, "grad_norm": 1.0315295493292935, "learning_rate": 2.2829252356120693e-06, "loss": 0.0236, "step": 183970 }, { "epoch": 0.7676435980672781, "grad_norm": 0.48644907463526776, "learning_rate": 2.2828942130117694e-06, "loss": 0.0299, "step": 183975 }, { "epoch": 0.7676644607822684, "grad_norm": 0.6146130915673814, "learning_rate": 2.2828631916761307e-06, "loss": 0.0216, "step": 183980 }, { "epoch": 0.7676853234972586, "grad_norm": 1.1619101706410917, "learning_rate": 2.282832171605067e-06, "loss": 0.0196, "step": 183985 }, { "epoch": 0.767706186212249, "grad_norm": 0.47574412715757997, "learning_rate": 2.2828011527984924e-06, "loss": 0.0277, "step": 183990 }, { "epoch": 0.7677270489272392, "grad_norm": 0.5512945099775379, "learning_rate": 2.2827701352563217e-06, "loss": 0.017, "step": 183995 }, { "epoch": 0.7677479116422294, "grad_norm": 0.4505802883811804, "learning_rate": 2.2827391189784687e-06, "loss": 0.0242, "step": 184000 }, { "epoch": 0.7677687743572198, "grad_norm": 1.0879371468976755, "learning_rate": 2.282708103964847e-06, "loss": 0.0357, "step": 184005 }, { "epoch": 0.76778963707221, "grad_norm": 1.4428191948046842, "learning_rate": 2.2826770902153703e-06, "loss": 0.0196, "step": 184010 }, { "epoch": 0.7678104997872003, "grad_norm": 0.5414362823610395, "learning_rate": 2.282646077729954e-06, "loss": 0.0174, "step": 184015 }, { "epoch": 0.7678313625021906, "grad_norm": 0.6287922837351743, "learning_rate": 2.2826150665085118e-06, "loss": 0.0233, "step": 184020 }, { "epoch": 0.7678522252171809, "grad_norm": 0.6951288703280774, "learning_rate": 2.2825840565509574e-06, "loss": 0.0193, "step": 184025 }, { "epoch": 0.7678730879321711, "grad_norm": 0.49890462479018705, "learning_rate": 2.2825530478572054e-06, "loss": 0.0183, "step": 184030 }, { "epoch": 0.7678939506471614, "grad_norm": 0.35095300685255654, "learning_rate": 2.28252204042717e-06, "loss": 0.017, "step": 184035 }, { "epoch": 0.7679148133621517, "grad_norm": 0.5843784724201101, "learning_rate": 2.2824910342607642e-06, "loss": 0.0218, "step": 184040 }, { "epoch": 0.767935676077142, "grad_norm": 0.6950933732573593, "learning_rate": 2.282460029357904e-06, "loss": 0.0275, "step": 184045 }, { "epoch": 0.7679565387921322, "grad_norm": 0.5903504256061968, "learning_rate": 2.2824290257185026e-06, "loss": 0.0201, "step": 184050 }, { "epoch": 0.7679774015071226, "grad_norm": 0.651380579203554, "learning_rate": 2.282398023342474e-06, "loss": 0.0202, "step": 184055 }, { "epoch": 0.7679982642221128, "grad_norm": 0.3945099349643463, "learning_rate": 2.282367022229733e-06, "loss": 0.0263, "step": 184060 }, { "epoch": 0.7680191269371031, "grad_norm": 0.6435990489900006, "learning_rate": 2.2823360223801934e-06, "loss": 0.0225, "step": 184065 }, { "epoch": 0.7680399896520934, "grad_norm": 0.7050301463116555, "learning_rate": 2.2823050237937697e-06, "loss": 0.0235, "step": 184070 }, { "epoch": 0.7680608523670837, "grad_norm": 0.594477751758568, "learning_rate": 2.2822740264703753e-06, "loss": 0.018, "step": 184075 }, { "epoch": 0.7680817150820739, "grad_norm": 1.3092259879122032, "learning_rate": 2.2822430304099262e-06, "loss": 0.0212, "step": 184080 }, { "epoch": 0.7681025777970641, "grad_norm": 0.45769352660815416, "learning_rate": 2.282212035612335e-06, "loss": 0.0157, "step": 184085 }, { "epoch": 0.7681234405120545, "grad_norm": 0.6625018247837521, "learning_rate": 2.282181042077516e-06, "loss": 0.0255, "step": 184090 }, { "epoch": 0.7681443032270447, "grad_norm": 0.5344037153486454, "learning_rate": 2.2821500498053847e-06, "loss": 0.0165, "step": 184095 }, { "epoch": 0.768165165942035, "grad_norm": 0.36636952299633774, "learning_rate": 2.2821190587958544e-06, "loss": 0.0183, "step": 184100 }, { "epoch": 0.7681860286570253, "grad_norm": 2.099597341415515, "learning_rate": 2.282088069048839e-06, "loss": 0.0201, "step": 184105 }, { "epoch": 0.7682068913720156, "grad_norm": 0.6337482156747092, "learning_rate": 2.2820570805642543e-06, "loss": 0.0193, "step": 184110 }, { "epoch": 0.7682277540870058, "grad_norm": 1.020856122689449, "learning_rate": 2.282026093342013e-06, "loss": 0.0297, "step": 184115 }, { "epoch": 0.7682486168019962, "grad_norm": 0.6947651533752903, "learning_rate": 2.2819951073820306e-06, "loss": 0.0221, "step": 184120 }, { "epoch": 0.7682694795169864, "grad_norm": 0.5719716321719355, "learning_rate": 2.28196412268422e-06, "loss": 0.0173, "step": 184125 }, { "epoch": 0.7682903422319767, "grad_norm": 0.9587539233605409, "learning_rate": 2.2819331392484968e-06, "loss": 0.017, "step": 184130 }, { "epoch": 0.768311204946967, "grad_norm": 0.6243711273913096, "learning_rate": 2.2819021570747754e-06, "loss": 0.0219, "step": 184135 }, { "epoch": 0.7683320676619573, "grad_norm": 0.861488708826662, "learning_rate": 2.281871176162969e-06, "loss": 0.0231, "step": 184140 }, { "epoch": 0.7683529303769475, "grad_norm": 0.9442235968704662, "learning_rate": 2.281840196512993e-06, "loss": 0.0282, "step": 184145 }, { "epoch": 0.7683737930919378, "grad_norm": 0.4692341323775694, "learning_rate": 2.2818092181247604e-06, "loss": 0.0242, "step": 184150 }, { "epoch": 0.7683946558069281, "grad_norm": 0.5493518334845842, "learning_rate": 2.2817782409981874e-06, "loss": 0.0195, "step": 184155 }, { "epoch": 0.7684155185219184, "grad_norm": 0.6509625680186804, "learning_rate": 2.281747265133187e-06, "loss": 0.0257, "step": 184160 }, { "epoch": 0.7684363812369086, "grad_norm": 0.5098500729362689, "learning_rate": 2.2817162905296744e-06, "loss": 0.0213, "step": 184165 }, { "epoch": 0.768457243951899, "grad_norm": 0.38027943057739116, "learning_rate": 2.281685317187563e-06, "loss": 0.0257, "step": 184170 }, { "epoch": 0.7684781066668892, "grad_norm": 0.5294121000943846, "learning_rate": 2.281654345106768e-06, "loss": 0.0175, "step": 184175 }, { "epoch": 0.7684989693818794, "grad_norm": 0.6615263152082128, "learning_rate": 2.2816233742872036e-06, "loss": 0.0168, "step": 184180 }, { "epoch": 0.7685198320968698, "grad_norm": 0.49156601329888633, "learning_rate": 2.2815924047287845e-06, "loss": 0.0192, "step": 184185 }, { "epoch": 0.76854069481186, "grad_norm": 0.889147133718904, "learning_rate": 2.281561436431424e-06, "loss": 0.0188, "step": 184190 }, { "epoch": 0.7685615575268503, "grad_norm": 0.7107215277831723, "learning_rate": 2.2815304693950375e-06, "loss": 0.0178, "step": 184195 }, { "epoch": 0.7685824202418406, "grad_norm": 0.8175110278752542, "learning_rate": 2.28149950361954e-06, "loss": 0.0219, "step": 184200 }, { "epoch": 0.7686032829568309, "grad_norm": 0.5895892628959118, "learning_rate": 2.2814685391048445e-06, "loss": 0.0231, "step": 184205 }, { "epoch": 0.7686241456718211, "grad_norm": 0.5949778142568739, "learning_rate": 2.2814375758508657e-06, "loss": 0.0179, "step": 184210 }, { "epoch": 0.7686450083868114, "grad_norm": 0.722340846418928, "learning_rate": 2.2814066138575193e-06, "loss": 0.0278, "step": 184215 }, { "epoch": 0.7686658711018017, "grad_norm": 0.2849324780062802, "learning_rate": 2.281375653124719e-06, "loss": 0.0156, "step": 184220 }, { "epoch": 0.768686733816792, "grad_norm": 0.5048397213489537, "learning_rate": 2.281344693652378e-06, "loss": 0.0233, "step": 184225 }, { "epoch": 0.7687075965317822, "grad_norm": 0.7500424681437476, "learning_rate": 2.2813137354404127e-06, "loss": 0.0327, "step": 184230 }, { "epoch": 0.7687284592467726, "grad_norm": 0.30961718103662594, "learning_rate": 2.281282778488737e-06, "loss": 0.0188, "step": 184235 }, { "epoch": 0.7687493219617628, "grad_norm": 0.9432395205084099, "learning_rate": 2.281251822797265e-06, "loss": 0.0247, "step": 184240 }, { "epoch": 0.7687701846767531, "grad_norm": 0.45814707604360466, "learning_rate": 2.2812208683659116e-06, "loss": 0.0167, "step": 184245 }, { "epoch": 0.7687910473917434, "grad_norm": 0.46224616954505265, "learning_rate": 2.281189915194591e-06, "loss": 0.0232, "step": 184250 }, { "epoch": 0.7688119101067337, "grad_norm": 0.7025073697724288, "learning_rate": 2.2811589632832184e-06, "loss": 0.0205, "step": 184255 }, { "epoch": 0.7688327728217239, "grad_norm": 0.31183211155027385, "learning_rate": 2.2811280126317066e-06, "loss": 0.018, "step": 184260 }, { "epoch": 0.7688536355367142, "grad_norm": 0.6075185271674963, "learning_rate": 2.281097063239972e-06, "loss": 0.0214, "step": 184265 }, { "epoch": 0.7688744982517045, "grad_norm": 0.6356318337948034, "learning_rate": 2.2810661151079286e-06, "loss": 0.0164, "step": 184270 }, { "epoch": 0.7688953609666948, "grad_norm": 0.7133047715366072, "learning_rate": 2.281035168235491e-06, "loss": 0.0273, "step": 184275 }, { "epoch": 0.768916223681685, "grad_norm": 0.6350312119290618, "learning_rate": 2.281004222622573e-06, "loss": 0.0234, "step": 184280 }, { "epoch": 0.7689370863966754, "grad_norm": 0.7905753024942609, "learning_rate": 2.28097327826909e-06, "loss": 0.0228, "step": 184285 }, { "epoch": 0.7689579491116656, "grad_norm": 0.5584308574739426, "learning_rate": 2.2809423351749563e-06, "loss": 0.0173, "step": 184290 }, { "epoch": 0.7689788118266558, "grad_norm": 0.37045722552216936, "learning_rate": 2.2809113933400865e-06, "loss": 0.0209, "step": 184295 }, { "epoch": 0.7689996745416462, "grad_norm": 0.6989321441904688, "learning_rate": 2.280880452764395e-06, "loss": 0.0189, "step": 184300 }, { "epoch": 0.7690205372566364, "grad_norm": 0.7475261508194455, "learning_rate": 2.2808495134477973e-06, "loss": 0.0205, "step": 184305 }, { "epoch": 0.7690413999716267, "grad_norm": 0.8931773349253984, "learning_rate": 2.280818575390207e-06, "loss": 0.0213, "step": 184310 }, { "epoch": 0.769062262686617, "grad_norm": 0.2954209377073987, "learning_rate": 2.2807876385915383e-06, "loss": 0.0199, "step": 184315 }, { "epoch": 0.7690831254016073, "grad_norm": 0.5460996971979434, "learning_rate": 2.280756703051707e-06, "loss": 0.0203, "step": 184320 }, { "epoch": 0.7691039881165975, "grad_norm": 0.7607409246651208, "learning_rate": 2.280725768770627e-06, "loss": 0.016, "step": 184325 }, { "epoch": 0.7691248508315878, "grad_norm": 0.3518098244697386, "learning_rate": 2.280694835748214e-06, "loss": 0.0171, "step": 184330 }, { "epoch": 0.7691457135465781, "grad_norm": 0.6308274584669042, "learning_rate": 2.2806639039843807e-06, "loss": 0.0203, "step": 184335 }, { "epoch": 0.7691665762615684, "grad_norm": 0.5930761625823877, "learning_rate": 2.280632973479044e-06, "loss": 0.0193, "step": 184340 }, { "epoch": 0.7691874389765586, "grad_norm": 0.8543063176274988, "learning_rate": 2.280602044232117e-06, "loss": 0.0193, "step": 184345 }, { "epoch": 0.769208301691549, "grad_norm": 0.8033425481754519, "learning_rate": 2.2805711162435144e-06, "loss": 0.0217, "step": 184350 }, { "epoch": 0.7692291644065392, "grad_norm": 0.9996094798933126, "learning_rate": 2.2805401895131516e-06, "loss": 0.0241, "step": 184355 }, { "epoch": 0.7692500271215295, "grad_norm": 0.6667376727459481, "learning_rate": 2.280509264040943e-06, "loss": 0.017, "step": 184360 }, { "epoch": 0.7692708898365198, "grad_norm": 0.603485843461985, "learning_rate": 2.2804783398268036e-06, "loss": 0.0186, "step": 184365 }, { "epoch": 0.76929175255151, "grad_norm": 0.5392701482263563, "learning_rate": 2.2804474168706477e-06, "loss": 0.0161, "step": 184370 }, { "epoch": 0.7693126152665003, "grad_norm": 0.8442319340301526, "learning_rate": 2.28041649517239e-06, "loss": 0.0199, "step": 184375 }, { "epoch": 0.7693334779814907, "grad_norm": 0.4380888726418907, "learning_rate": 2.2803855747319455e-06, "loss": 0.0232, "step": 184380 }, { "epoch": 0.7693543406964809, "grad_norm": 0.7256495456965846, "learning_rate": 2.2803546555492286e-06, "loss": 0.0243, "step": 184385 }, { "epoch": 0.7693752034114711, "grad_norm": 0.7083866487705073, "learning_rate": 2.2803237376241536e-06, "loss": 0.0264, "step": 184390 }, { "epoch": 0.7693960661264614, "grad_norm": 0.5726245413794838, "learning_rate": 2.280292820956637e-06, "loss": 0.0203, "step": 184395 }, { "epoch": 0.7694169288414517, "grad_norm": 0.2655489227547333, "learning_rate": 2.280261905546592e-06, "loss": 0.0211, "step": 184400 }, { "epoch": 0.769437791556442, "grad_norm": 0.4285854590972304, "learning_rate": 2.2802309913939335e-06, "loss": 0.0216, "step": 184405 }, { "epoch": 0.7694586542714322, "grad_norm": 0.8243958239900732, "learning_rate": 2.2802000784985766e-06, "loss": 0.0284, "step": 184410 }, { "epoch": 0.7694795169864226, "grad_norm": 0.5449571921600447, "learning_rate": 2.2801691668604364e-06, "loss": 0.02, "step": 184415 }, { "epoch": 0.7695003797014128, "grad_norm": 0.6751769627798376, "learning_rate": 2.2801382564794264e-06, "loss": 0.0181, "step": 184420 }, { "epoch": 0.7695212424164031, "grad_norm": 0.949910656330556, "learning_rate": 2.280107347355463e-06, "loss": 0.0256, "step": 184425 }, { "epoch": 0.7695421051313934, "grad_norm": 0.6993101240361386, "learning_rate": 2.2800764394884604e-06, "loss": 0.0283, "step": 184430 }, { "epoch": 0.7695629678463837, "grad_norm": 0.5491698077857035, "learning_rate": 2.2800455328783323e-06, "loss": 0.0217, "step": 184435 }, { "epoch": 0.7695838305613739, "grad_norm": 0.36762982799943494, "learning_rate": 2.2800146275249953e-06, "loss": 0.0167, "step": 184440 }, { "epoch": 0.7696046932763642, "grad_norm": 2.20572618590507, "learning_rate": 2.279983723428363e-06, "loss": 0.0218, "step": 184445 }, { "epoch": 0.7696255559913545, "grad_norm": 0.566013368972238, "learning_rate": 2.2799528205883514e-06, "loss": 0.0216, "step": 184450 }, { "epoch": 0.7696464187063448, "grad_norm": 0.6230295557455481, "learning_rate": 2.2799219190048742e-06, "loss": 0.0277, "step": 184455 }, { "epoch": 0.769667281421335, "grad_norm": 0.6206802395841007, "learning_rate": 2.2798910186778464e-06, "loss": 0.0196, "step": 184460 }, { "epoch": 0.7696881441363254, "grad_norm": 0.5794277362316184, "learning_rate": 2.2798601196071835e-06, "loss": 0.0201, "step": 184465 }, { "epoch": 0.7697090068513156, "grad_norm": 0.5090489683961125, "learning_rate": 2.2798292217927993e-06, "loss": 0.0286, "step": 184470 }, { "epoch": 0.7697298695663058, "grad_norm": 0.5679824937711886, "learning_rate": 2.2797983252346103e-06, "loss": 0.0268, "step": 184475 }, { "epoch": 0.7697507322812962, "grad_norm": 0.9056859197089046, "learning_rate": 2.27976742993253e-06, "loss": 0.029, "step": 184480 }, { "epoch": 0.7697715949962864, "grad_norm": 0.6061617551134235, "learning_rate": 2.2797365358864734e-06, "loss": 0.0198, "step": 184485 }, { "epoch": 0.7697924577112767, "grad_norm": 0.7737586914158066, "learning_rate": 2.279705643096356e-06, "loss": 0.0217, "step": 184490 }, { "epoch": 0.769813320426267, "grad_norm": 0.6556597675289537, "learning_rate": 2.2796747515620927e-06, "loss": 0.0206, "step": 184495 }, { "epoch": 0.7698341831412573, "grad_norm": 0.5730168881151624, "learning_rate": 2.2796438612835976e-06, "loss": 0.0181, "step": 184500 }, { "epoch": 0.7698550458562475, "grad_norm": 0.33108335167626185, "learning_rate": 2.279612972260787e-06, "loss": 0.0166, "step": 184505 }, { "epoch": 0.7698759085712378, "grad_norm": 0.536370693086572, "learning_rate": 2.279582084493574e-06, "loss": 0.02, "step": 184510 }, { "epoch": 0.7698967712862281, "grad_norm": 0.4196456810053344, "learning_rate": 2.279551197981875e-06, "loss": 0.0094, "step": 184515 }, { "epoch": 0.7699176340012184, "grad_norm": 0.7694329696971489, "learning_rate": 2.2795203127256044e-06, "loss": 0.0221, "step": 184520 }, { "epoch": 0.7699384967162086, "grad_norm": 0.7120772107547316, "learning_rate": 2.2794894287246775e-06, "loss": 0.0238, "step": 184525 }, { "epoch": 0.769959359431199, "grad_norm": 0.8992607063289579, "learning_rate": 2.279458545979009e-06, "loss": 0.0279, "step": 184530 }, { "epoch": 0.7699802221461892, "grad_norm": 0.9160030278812049, "learning_rate": 2.2794276644885132e-06, "loss": 0.0166, "step": 184535 }, { "epoch": 0.7700010848611795, "grad_norm": 0.9507139202561828, "learning_rate": 2.279396784253106e-06, "loss": 0.0261, "step": 184540 }, { "epoch": 0.7700219475761698, "grad_norm": 0.6815112151737243, "learning_rate": 2.2793659052727023e-06, "loss": 0.0214, "step": 184545 }, { "epoch": 0.7700428102911601, "grad_norm": 0.35290006940911145, "learning_rate": 2.279335027547217e-06, "loss": 0.0308, "step": 184550 }, { "epoch": 0.7700636730061503, "grad_norm": 0.6069611432584047, "learning_rate": 2.2793041510765653e-06, "loss": 0.0131, "step": 184555 }, { "epoch": 0.7700845357211407, "grad_norm": 0.5453528136533466, "learning_rate": 2.2792732758606618e-06, "loss": 0.0216, "step": 184560 }, { "epoch": 0.7701053984361309, "grad_norm": 0.5269044301177342, "learning_rate": 2.279242401899421e-06, "loss": 0.0203, "step": 184565 }, { "epoch": 0.7701262611511211, "grad_norm": 0.6598558133260922, "learning_rate": 2.279211529192759e-06, "loss": 0.0184, "step": 184570 }, { "epoch": 0.7701471238661114, "grad_norm": 0.6469277450064955, "learning_rate": 2.2791806577405907e-06, "loss": 0.0308, "step": 184575 }, { "epoch": 0.7701679865811017, "grad_norm": 0.9956134761405699, "learning_rate": 2.27914978754283e-06, "loss": 0.0249, "step": 184580 }, { "epoch": 0.770188849296092, "grad_norm": 0.4354861817441631, "learning_rate": 2.279118918599394e-06, "loss": 0.0185, "step": 184585 }, { "epoch": 0.7702097120110822, "grad_norm": 0.42145989456728106, "learning_rate": 2.2790880509101957e-06, "loss": 0.0276, "step": 184590 }, { "epoch": 0.7702305747260726, "grad_norm": 0.6616121034773002, "learning_rate": 2.279057184475151e-06, "loss": 0.0249, "step": 184595 }, { "epoch": 0.7702514374410628, "grad_norm": 0.9892613399162763, "learning_rate": 2.2790263192941762e-06, "loss": 0.0176, "step": 184600 }, { "epoch": 0.7702723001560531, "grad_norm": 0.7241260296180277, "learning_rate": 2.2789954553671844e-06, "loss": 0.0203, "step": 184605 }, { "epoch": 0.7702931628710434, "grad_norm": 0.6427022678691593, "learning_rate": 2.2789645926940914e-06, "loss": 0.0184, "step": 184610 }, { "epoch": 0.7703140255860337, "grad_norm": 0.7871515459230977, "learning_rate": 2.2789337312748126e-06, "loss": 0.0203, "step": 184615 }, { "epoch": 0.7703348883010239, "grad_norm": 0.41627375072398587, "learning_rate": 2.278902871109262e-06, "loss": 0.0288, "step": 184620 }, { "epoch": 0.7703557510160142, "grad_norm": 0.6915058842697219, "learning_rate": 2.2788720121973566e-06, "loss": 0.0247, "step": 184625 }, { "epoch": 0.7703766137310045, "grad_norm": 0.6817748276623428, "learning_rate": 2.2788411545390104e-06, "loss": 0.0137, "step": 184630 }, { "epoch": 0.7703974764459948, "grad_norm": 0.9925916757003393, "learning_rate": 2.2788102981341382e-06, "loss": 0.0232, "step": 184635 }, { "epoch": 0.770418339160985, "grad_norm": 0.845973141084868, "learning_rate": 2.278779442982656e-06, "loss": 0.0254, "step": 184640 }, { "epoch": 0.7704392018759754, "grad_norm": 0.906811138703765, "learning_rate": 2.2787485890844783e-06, "loss": 0.0216, "step": 184645 }, { "epoch": 0.7704600645909656, "grad_norm": 0.7049788860647954, "learning_rate": 2.278717736439521e-06, "loss": 0.0234, "step": 184650 }, { "epoch": 0.7704809273059559, "grad_norm": 0.4217574685500968, "learning_rate": 2.278686885047698e-06, "loss": 0.0152, "step": 184655 }, { "epoch": 0.7705017900209462, "grad_norm": 0.49684223434415115, "learning_rate": 2.2786560349089255e-06, "loss": 0.0232, "step": 184660 }, { "epoch": 0.7705226527359365, "grad_norm": 1.059102073014588, "learning_rate": 2.278625186023119e-06, "loss": 0.0231, "step": 184665 }, { "epoch": 0.7705435154509267, "grad_norm": 0.4042273170668776, "learning_rate": 2.278594338390193e-06, "loss": 0.0164, "step": 184670 }, { "epoch": 0.770564378165917, "grad_norm": 0.4884590328216069, "learning_rate": 2.2785634920100625e-06, "loss": 0.0212, "step": 184675 }, { "epoch": 0.7705852408809073, "grad_norm": 0.6862781172859822, "learning_rate": 2.278532646882643e-06, "loss": 0.0213, "step": 184680 }, { "epoch": 0.7706061035958975, "grad_norm": 0.6860955076067782, "learning_rate": 2.2785018030078496e-06, "loss": 0.0112, "step": 184685 }, { "epoch": 0.7706269663108878, "grad_norm": 0.4607867598286297, "learning_rate": 2.278470960385598e-06, "loss": 0.0232, "step": 184690 }, { "epoch": 0.7706478290258781, "grad_norm": 0.45373459800701904, "learning_rate": 2.2784401190158027e-06, "loss": 0.0213, "step": 184695 }, { "epoch": 0.7706686917408684, "grad_norm": 0.6243704686172418, "learning_rate": 2.27840927889838e-06, "loss": 0.0176, "step": 184700 }, { "epoch": 0.7706895544558586, "grad_norm": 0.28791658926259134, "learning_rate": 2.278378440033244e-06, "loss": 0.0172, "step": 184705 }, { "epoch": 0.770710417170849, "grad_norm": 0.4933930927999443, "learning_rate": 2.27834760242031e-06, "loss": 0.0138, "step": 184710 }, { "epoch": 0.7707312798858392, "grad_norm": 0.6345389179605778, "learning_rate": 2.2783167660594942e-06, "loss": 0.0211, "step": 184715 }, { "epoch": 0.7707521426008295, "grad_norm": 0.33931944963625227, "learning_rate": 2.278285930950711e-06, "loss": 0.0185, "step": 184720 }, { "epoch": 0.7707730053158198, "grad_norm": 1.1565191076898005, "learning_rate": 2.278255097093876e-06, "loss": 0.0202, "step": 184725 }, { "epoch": 0.7707938680308101, "grad_norm": 0.6034720594144215, "learning_rate": 2.278224264488905e-06, "loss": 0.02, "step": 184730 }, { "epoch": 0.7708147307458003, "grad_norm": 0.6727245575452744, "learning_rate": 2.2781934331357123e-06, "loss": 0.0233, "step": 184735 }, { "epoch": 0.7708355934607906, "grad_norm": 0.5134900815672531, "learning_rate": 2.2781626030342134e-06, "loss": 0.0213, "step": 184740 }, { "epoch": 0.7708564561757809, "grad_norm": 1.0049250340337672, "learning_rate": 2.2781317741843246e-06, "loss": 0.0314, "step": 184745 }, { "epoch": 0.7708773188907712, "grad_norm": 0.8355534252720439, "learning_rate": 2.2781009465859605e-06, "loss": 0.0203, "step": 184750 }, { "epoch": 0.7708981816057614, "grad_norm": 0.6067863215145374, "learning_rate": 2.2780701202390364e-06, "loss": 0.0299, "step": 184755 }, { "epoch": 0.7709190443207518, "grad_norm": 0.5244639303016754, "learning_rate": 2.278039295143467e-06, "loss": 0.0186, "step": 184760 }, { "epoch": 0.770939907035742, "grad_norm": 0.6930706147064453, "learning_rate": 2.2780084712991686e-06, "loss": 0.0209, "step": 184765 }, { "epoch": 0.7709607697507322, "grad_norm": 0.9868567434866129, "learning_rate": 2.2779776487060564e-06, "loss": 0.0224, "step": 184770 }, { "epoch": 0.7709816324657226, "grad_norm": 0.5436748740887761, "learning_rate": 2.277946827364046e-06, "loss": 0.0202, "step": 184775 }, { "epoch": 0.7710024951807128, "grad_norm": 0.7507824900022957, "learning_rate": 2.2779160072730516e-06, "loss": 0.0216, "step": 184780 }, { "epoch": 0.7710233578957031, "grad_norm": 0.8347973288885596, "learning_rate": 2.27788518843299e-06, "loss": 0.0222, "step": 184785 }, { "epoch": 0.7710442206106934, "grad_norm": 0.3232598279161439, "learning_rate": 2.2778543708437754e-06, "loss": 0.019, "step": 184790 }, { "epoch": 0.7710650833256837, "grad_norm": 1.1156154286007731, "learning_rate": 2.277823554505324e-06, "loss": 0.0148, "step": 184795 }, { "epoch": 0.7710859460406739, "grad_norm": 0.9042957679849454, "learning_rate": 2.2777927394175513e-06, "loss": 0.0217, "step": 184800 }, { "epoch": 0.7711068087556642, "grad_norm": 0.6939405228157038, "learning_rate": 2.2777619255803716e-06, "loss": 0.023, "step": 184805 }, { "epoch": 0.7711276714706545, "grad_norm": 0.5216744105335469, "learning_rate": 2.2777311129937014e-06, "loss": 0.0177, "step": 184810 }, { "epoch": 0.7711485341856448, "grad_norm": 0.7724469413119457, "learning_rate": 2.2777003016574555e-06, "loss": 0.0185, "step": 184815 }, { "epoch": 0.771169396900635, "grad_norm": 0.21958562877249063, "learning_rate": 2.27766949157155e-06, "loss": 0.0128, "step": 184820 }, { "epoch": 0.7711902596156254, "grad_norm": 0.9890900339883573, "learning_rate": 2.2776386827358996e-06, "loss": 0.0205, "step": 184825 }, { "epoch": 0.7712111223306156, "grad_norm": 0.8691871160765696, "learning_rate": 2.27760787515042e-06, "loss": 0.0279, "step": 184830 }, { "epoch": 0.7712319850456059, "grad_norm": 0.5435947083250741, "learning_rate": 2.277577068815027e-06, "loss": 0.0155, "step": 184835 }, { "epoch": 0.7712528477605962, "grad_norm": 0.18969220938408707, "learning_rate": 2.277546263729636e-06, "loss": 0.0152, "step": 184840 }, { "epoch": 0.7712737104755865, "grad_norm": 0.34129846055784185, "learning_rate": 2.277515459894161e-06, "loss": 0.0236, "step": 184845 }, { "epoch": 0.7712945731905767, "grad_norm": 0.8655213220506407, "learning_rate": 2.27748465730852e-06, "loss": 0.0187, "step": 184850 }, { "epoch": 0.7713154359055671, "grad_norm": 0.49321958203141125, "learning_rate": 2.2774538559726266e-06, "loss": 0.0227, "step": 184855 }, { "epoch": 0.7713362986205573, "grad_norm": 0.30523220025086223, "learning_rate": 2.2774230558863974e-06, "loss": 0.0182, "step": 184860 }, { "epoch": 0.7713571613355475, "grad_norm": 0.656545042940307, "learning_rate": 2.277392257049747e-06, "loss": 0.0188, "step": 184865 }, { "epoch": 0.7713780240505378, "grad_norm": 1.088753320434075, "learning_rate": 2.277361459462592e-06, "loss": 0.0356, "step": 184870 }, { "epoch": 0.7713988867655281, "grad_norm": 0.8988215894874708, "learning_rate": 2.277330663124846e-06, "loss": 0.0181, "step": 184875 }, { "epoch": 0.7714197494805184, "grad_norm": 0.43026033508542255, "learning_rate": 2.277299868036427e-06, "loss": 0.0192, "step": 184880 }, { "epoch": 0.7714406121955086, "grad_norm": 0.35471899825703457, "learning_rate": 2.2772690741972478e-06, "loss": 0.0191, "step": 184885 }, { "epoch": 0.771461474910499, "grad_norm": 0.4992071441044627, "learning_rate": 2.2772382816072267e-06, "loss": 0.0158, "step": 184890 }, { "epoch": 0.7714823376254892, "grad_norm": 0.8424306648249674, "learning_rate": 2.2772074902662773e-06, "loss": 0.0253, "step": 184895 }, { "epoch": 0.7715032003404795, "grad_norm": 0.8652857291364467, "learning_rate": 2.277176700174316e-06, "loss": 0.0217, "step": 184900 }, { "epoch": 0.7715240630554698, "grad_norm": 0.9457751316307008, "learning_rate": 2.2771459113312585e-06, "loss": 0.025, "step": 184905 }, { "epoch": 0.7715449257704601, "grad_norm": 0.8065294312188115, "learning_rate": 2.2771151237370196e-06, "loss": 0.0196, "step": 184910 }, { "epoch": 0.7715657884854503, "grad_norm": 0.5276525584917442, "learning_rate": 2.277084337391516e-06, "loss": 0.0203, "step": 184915 }, { "epoch": 0.7715866512004406, "grad_norm": 0.4747343967081808, "learning_rate": 2.277053552294662e-06, "loss": 0.0259, "step": 184920 }, { "epoch": 0.7716075139154309, "grad_norm": 0.774841687140574, "learning_rate": 2.277022768446374e-06, "loss": 0.0156, "step": 184925 }, { "epoch": 0.7716283766304212, "grad_norm": 1.1168165885464398, "learning_rate": 2.276991985846567e-06, "loss": 0.0208, "step": 184930 }, { "epoch": 0.7716492393454114, "grad_norm": 0.5839869986988154, "learning_rate": 2.276961204495158e-06, "loss": 0.0187, "step": 184935 }, { "epoch": 0.7716701020604018, "grad_norm": 1.0198952304291107, "learning_rate": 2.276930424392061e-06, "loss": 0.0269, "step": 184940 }, { "epoch": 0.771690964775392, "grad_norm": 0.7356121956259181, "learning_rate": 2.276899645537192e-06, "loss": 0.0211, "step": 184945 }, { "epoch": 0.7717118274903823, "grad_norm": 0.9430756254209022, "learning_rate": 2.276868867930468e-06, "loss": 0.0212, "step": 184950 }, { "epoch": 0.7717326902053726, "grad_norm": 0.6891573073607576, "learning_rate": 2.2768380915718025e-06, "loss": 0.0192, "step": 184955 }, { "epoch": 0.7717535529203629, "grad_norm": 0.8150989901849923, "learning_rate": 2.276807316461113e-06, "loss": 0.0242, "step": 184960 }, { "epoch": 0.7717744156353531, "grad_norm": 0.5622121449988878, "learning_rate": 2.2767765425983138e-06, "loss": 0.0282, "step": 184965 }, { "epoch": 0.7717952783503434, "grad_norm": 1.2076983435635746, "learning_rate": 2.276745769983322e-06, "loss": 0.0246, "step": 184970 }, { "epoch": 0.7718161410653337, "grad_norm": 0.7059753730713044, "learning_rate": 2.276714998616052e-06, "loss": 0.0223, "step": 184975 }, { "epoch": 0.7718370037803239, "grad_norm": 0.6669848909433669, "learning_rate": 2.2766842284964194e-06, "loss": 0.0156, "step": 184980 }, { "epoch": 0.7718578664953142, "grad_norm": 0.9152462114613726, "learning_rate": 2.2766534596243408e-06, "loss": 0.0282, "step": 184985 }, { "epoch": 0.7718787292103045, "grad_norm": 0.5957889255227531, "learning_rate": 2.2766226919997316e-06, "loss": 0.0201, "step": 184990 }, { "epoch": 0.7718995919252948, "grad_norm": 0.5462899212011217, "learning_rate": 2.276591925622507e-06, "loss": 0.0187, "step": 184995 }, { "epoch": 0.771920454640285, "grad_norm": 0.4994301358313206, "learning_rate": 2.276561160492583e-06, "loss": 0.021, "step": 185000 }, { "epoch": 0.7719413173552754, "grad_norm": 0.33599392357708024, "learning_rate": 2.2765303966098763e-06, "loss": 0.0253, "step": 185005 }, { "epoch": 0.7719621800702656, "grad_norm": 0.8011364708679095, "learning_rate": 2.2764996339743013e-06, "loss": 0.0213, "step": 185010 }, { "epoch": 0.7719830427852559, "grad_norm": 0.797438817310467, "learning_rate": 2.276468872585775e-06, "loss": 0.0203, "step": 185015 }, { "epoch": 0.7720039055002462, "grad_norm": 0.6794529170271921, "learning_rate": 2.276438112444211e-06, "loss": 0.02, "step": 185020 }, { "epoch": 0.7720247682152365, "grad_norm": 0.7397769621005418, "learning_rate": 2.2764073535495273e-06, "loss": 0.0192, "step": 185025 }, { "epoch": 0.7720456309302267, "grad_norm": 0.37890979987291096, "learning_rate": 2.276376595901639e-06, "loss": 0.0178, "step": 185030 }, { "epoch": 0.7720664936452171, "grad_norm": 0.9104458141219158, "learning_rate": 2.276345839500461e-06, "loss": 0.0198, "step": 185035 }, { "epoch": 0.7720873563602073, "grad_norm": 0.28157992405667776, "learning_rate": 2.27631508434591e-06, "loss": 0.0193, "step": 185040 }, { "epoch": 0.7721082190751976, "grad_norm": 0.7195413099992201, "learning_rate": 2.276284330437902e-06, "loss": 0.028, "step": 185045 }, { "epoch": 0.7721290817901878, "grad_norm": 0.5047881019176385, "learning_rate": 2.2762535777763516e-06, "loss": 0.0244, "step": 185050 }, { "epoch": 0.7721499445051782, "grad_norm": 0.6490272221240386, "learning_rate": 2.2762228263611756e-06, "loss": 0.0269, "step": 185055 }, { "epoch": 0.7721708072201684, "grad_norm": 0.6216174221706954, "learning_rate": 2.2761920761922896e-06, "loss": 0.02, "step": 185060 }, { "epoch": 0.7721916699351586, "grad_norm": 0.5087387759060432, "learning_rate": 2.2761613272696097e-06, "loss": 0.023, "step": 185065 }, { "epoch": 0.772212532650149, "grad_norm": 0.5505553462830153, "learning_rate": 2.2761305795930503e-06, "loss": 0.0157, "step": 185070 }, { "epoch": 0.7722333953651392, "grad_norm": 0.3629980906584239, "learning_rate": 2.2760998331625294e-06, "loss": 0.0185, "step": 185075 }, { "epoch": 0.7722542580801295, "grad_norm": 0.9152928754911611, "learning_rate": 2.2760690879779617e-06, "loss": 0.0291, "step": 185080 }, { "epoch": 0.7722751207951198, "grad_norm": 0.4544112214892236, "learning_rate": 2.2760383440392626e-06, "loss": 0.0257, "step": 185085 }, { "epoch": 0.7722959835101101, "grad_norm": 1.1451566870598253, "learning_rate": 2.276007601346349e-06, "loss": 0.0205, "step": 185090 }, { "epoch": 0.7723168462251003, "grad_norm": 0.5511126711613278, "learning_rate": 2.275976859899136e-06, "loss": 0.0244, "step": 185095 }, { "epoch": 0.7723377089400906, "grad_norm": 0.9226101470547792, "learning_rate": 2.27594611969754e-06, "loss": 0.0274, "step": 185100 }, { "epoch": 0.7723585716550809, "grad_norm": 0.633731626379868, "learning_rate": 2.2759153807414765e-06, "loss": 0.0228, "step": 185105 }, { "epoch": 0.7723794343700712, "grad_norm": 0.3913968372244672, "learning_rate": 2.275884643030861e-06, "loss": 0.0218, "step": 185110 }, { "epoch": 0.7724002970850614, "grad_norm": 0.9662422232083069, "learning_rate": 2.2758539065656105e-06, "loss": 0.0208, "step": 185115 }, { "epoch": 0.7724211598000518, "grad_norm": 0.44453806127653733, "learning_rate": 2.2758231713456396e-06, "loss": 0.0205, "step": 185120 }, { "epoch": 0.772442022515042, "grad_norm": 0.3774811221788799, "learning_rate": 2.275792437370866e-06, "loss": 0.0225, "step": 185125 }, { "epoch": 0.7724628852300323, "grad_norm": 0.9093024658641242, "learning_rate": 2.275761704641204e-06, "loss": 0.0257, "step": 185130 }, { "epoch": 0.7724837479450226, "grad_norm": 0.2695681983225783, "learning_rate": 2.2757309731565695e-06, "loss": 0.0175, "step": 185135 }, { "epoch": 0.7725046106600129, "grad_norm": 0.8889957864902859, "learning_rate": 2.2757002429168796e-06, "loss": 0.0198, "step": 185140 }, { "epoch": 0.7725254733750031, "grad_norm": 0.4243106991562173, "learning_rate": 2.27566951392205e-06, "loss": 0.02, "step": 185145 }, { "epoch": 0.7725463360899935, "grad_norm": 0.24580642311121248, "learning_rate": 2.2756387861719957e-06, "loss": 0.0208, "step": 185150 }, { "epoch": 0.7725671988049837, "grad_norm": 1.2047585351231698, "learning_rate": 2.2756080596666335e-06, "loss": 0.0281, "step": 185155 }, { "epoch": 0.772588061519974, "grad_norm": 0.38356305324027473, "learning_rate": 2.275577334405879e-06, "loss": 0.0206, "step": 185160 }, { "epoch": 0.7726089242349642, "grad_norm": 0.7703841341856895, "learning_rate": 2.275546610389649e-06, "loss": 0.0223, "step": 185165 }, { "epoch": 0.7726297869499545, "grad_norm": 0.4957272624108312, "learning_rate": 2.2755158876178586e-06, "loss": 0.0188, "step": 185170 }, { "epoch": 0.7726506496649448, "grad_norm": 0.5663530192492533, "learning_rate": 2.2754851660904236e-06, "loss": 0.0155, "step": 185175 }, { "epoch": 0.772671512379935, "grad_norm": 1.7656759169600997, "learning_rate": 2.2754544458072604e-06, "loss": 0.0281, "step": 185180 }, { "epoch": 0.7726923750949254, "grad_norm": 0.7781881655558651, "learning_rate": 2.2754237267682853e-06, "loss": 0.0186, "step": 185185 }, { "epoch": 0.7727132378099156, "grad_norm": 0.4718535347121213, "learning_rate": 2.2753930089734144e-06, "loss": 0.0212, "step": 185190 }, { "epoch": 0.7727341005249059, "grad_norm": 1.5481488961348084, "learning_rate": 2.275362292422563e-06, "loss": 0.0296, "step": 185195 }, { "epoch": 0.7727549632398962, "grad_norm": 0.8359974644825205, "learning_rate": 2.2753315771156477e-06, "loss": 0.0272, "step": 185200 }, { "epoch": 0.7727758259548865, "grad_norm": 0.7562462574953541, "learning_rate": 2.2753008630525837e-06, "loss": 0.0303, "step": 185205 }, { "epoch": 0.7727966886698767, "grad_norm": 0.6511441320082108, "learning_rate": 2.275270150233288e-06, "loss": 0.0235, "step": 185210 }, { "epoch": 0.7728175513848671, "grad_norm": 1.910560658846247, "learning_rate": 2.2752394386576766e-06, "loss": 0.0268, "step": 185215 }, { "epoch": 0.7728384140998573, "grad_norm": 0.3174354750342337, "learning_rate": 2.2752087283256653e-06, "loss": 0.0203, "step": 185220 }, { "epoch": 0.7728592768148476, "grad_norm": 0.36995622555142355, "learning_rate": 2.27517801923717e-06, "loss": 0.0187, "step": 185225 }, { "epoch": 0.7728801395298378, "grad_norm": 0.6518232087473048, "learning_rate": 2.275147311392107e-06, "loss": 0.0222, "step": 185230 }, { "epoch": 0.7729010022448282, "grad_norm": 0.32389010604707175, "learning_rate": 2.2751166047903924e-06, "loss": 0.0125, "step": 185235 }, { "epoch": 0.7729218649598184, "grad_norm": 0.6349748070429371, "learning_rate": 2.2750858994319426e-06, "loss": 0.018, "step": 185240 }, { "epoch": 0.7729427276748086, "grad_norm": 0.7395020408988284, "learning_rate": 2.275055195316673e-06, "loss": 0.0211, "step": 185245 }, { "epoch": 0.772963590389799, "grad_norm": 0.8265344086547993, "learning_rate": 2.2750244924445e-06, "loss": 0.016, "step": 185250 }, { "epoch": 0.7729844531047892, "grad_norm": 0.4477778908949622, "learning_rate": 2.2749937908153403e-06, "loss": 0.016, "step": 185255 }, { "epoch": 0.7730053158197795, "grad_norm": 0.7557561677654016, "learning_rate": 2.2749630904291094e-06, "loss": 0.0179, "step": 185260 }, { "epoch": 0.7730261785347698, "grad_norm": 0.47669523336354475, "learning_rate": 2.274932391285723e-06, "loss": 0.0235, "step": 185265 }, { "epoch": 0.7730470412497601, "grad_norm": 0.4935235406293234, "learning_rate": 2.274901693385098e-06, "loss": 0.0192, "step": 185270 }, { "epoch": 0.7730679039647503, "grad_norm": 0.9843625589385576, "learning_rate": 2.274870996727151e-06, "loss": 0.0238, "step": 185275 }, { "epoch": 0.7730887666797406, "grad_norm": 0.9346731494356543, "learning_rate": 2.274840301311797e-06, "loss": 0.0148, "step": 185280 }, { "epoch": 0.7731096293947309, "grad_norm": 0.7390416529830007, "learning_rate": 2.274809607138953e-06, "loss": 0.0221, "step": 185285 }, { "epoch": 0.7731304921097212, "grad_norm": 4.857086435057701, "learning_rate": 2.2747789142085346e-06, "loss": 0.0227, "step": 185290 }, { "epoch": 0.7731513548247114, "grad_norm": 1.1533697960857128, "learning_rate": 2.2747482225204588e-06, "loss": 0.0181, "step": 185295 }, { "epoch": 0.7731722175397018, "grad_norm": 0.8877502454229178, "learning_rate": 2.274717532074641e-06, "loss": 0.0249, "step": 185300 }, { "epoch": 0.773193080254692, "grad_norm": 0.35446871522843026, "learning_rate": 2.274686842870997e-06, "loss": 0.0159, "step": 185305 }, { "epoch": 0.7732139429696823, "grad_norm": 0.8873593356867958, "learning_rate": 2.2746561549094444e-06, "loss": 0.0208, "step": 185310 }, { "epoch": 0.7732348056846726, "grad_norm": 0.5784430253031813, "learning_rate": 2.2746254681898983e-06, "loss": 0.0222, "step": 185315 }, { "epoch": 0.7732556683996629, "grad_norm": 0.6850146159423013, "learning_rate": 2.2745947827122756e-06, "loss": 0.0228, "step": 185320 }, { "epoch": 0.7732765311146531, "grad_norm": 0.6850116277257737, "learning_rate": 2.2745640984764927e-06, "loss": 0.0209, "step": 185325 }, { "epoch": 0.7732973938296435, "grad_norm": 0.5973653817061275, "learning_rate": 2.2745334154824642e-06, "loss": 0.0188, "step": 185330 }, { "epoch": 0.7733182565446337, "grad_norm": 0.8555277785915641, "learning_rate": 2.2745027337301086e-06, "loss": 0.0188, "step": 185335 }, { "epoch": 0.773339119259624, "grad_norm": 1.1082270602455537, "learning_rate": 2.27447205321934e-06, "loss": 0.0234, "step": 185340 }, { "epoch": 0.7733599819746142, "grad_norm": 0.7550611918427986, "learning_rate": 2.274441373950077e-06, "loss": 0.027, "step": 185345 }, { "epoch": 0.7733808446896046, "grad_norm": 0.335109219690071, "learning_rate": 2.2744106959222335e-06, "loss": 0.0205, "step": 185350 }, { "epoch": 0.7734017074045948, "grad_norm": 0.6746883134345717, "learning_rate": 2.274380019135728e-06, "loss": 0.0217, "step": 185355 }, { "epoch": 0.773422570119585, "grad_norm": 0.633673938852954, "learning_rate": 2.2743493435904746e-06, "loss": 0.0207, "step": 185360 }, { "epoch": 0.7734434328345754, "grad_norm": 0.5802821922067439, "learning_rate": 2.274318669286391e-06, "loss": 0.0149, "step": 185365 }, { "epoch": 0.7734642955495656, "grad_norm": 0.3687367918355243, "learning_rate": 2.2742879962233936e-06, "loss": 0.02, "step": 185370 }, { "epoch": 0.7734851582645559, "grad_norm": 1.0065689327722078, "learning_rate": 2.274257324401398e-06, "loss": 0.0265, "step": 185375 }, { "epoch": 0.7735060209795462, "grad_norm": 0.8165852186220696, "learning_rate": 2.2742266538203207e-06, "loss": 0.0274, "step": 185380 }, { "epoch": 0.7735268836945365, "grad_norm": 0.8687883646811587, "learning_rate": 2.274195984480079e-06, "loss": 0.0139, "step": 185385 }, { "epoch": 0.7735477464095267, "grad_norm": 0.4409658874846489, "learning_rate": 2.2741653163805873e-06, "loss": 0.0159, "step": 185390 }, { "epoch": 0.7735686091245171, "grad_norm": 0.6543684952429091, "learning_rate": 2.2741346495217636e-06, "loss": 0.0202, "step": 185395 }, { "epoch": 0.7735894718395073, "grad_norm": 0.3847527652410764, "learning_rate": 2.2741039839035234e-06, "loss": 0.0188, "step": 185400 }, { "epoch": 0.7736103345544976, "grad_norm": 0.8216298663470316, "learning_rate": 2.2740733195257837e-06, "loss": 0.0223, "step": 185405 }, { "epoch": 0.7736311972694878, "grad_norm": 0.2732296299007725, "learning_rate": 2.2740426563884602e-06, "loss": 0.013, "step": 185410 }, { "epoch": 0.7736520599844782, "grad_norm": 0.4447980864896939, "learning_rate": 2.2740119944914695e-06, "loss": 0.0143, "step": 185415 }, { "epoch": 0.7736729226994684, "grad_norm": 1.095676082999009, "learning_rate": 2.2739813338347286e-06, "loss": 0.0245, "step": 185420 }, { "epoch": 0.7736937854144587, "grad_norm": 1.2013875943719736, "learning_rate": 2.2739506744181532e-06, "loss": 0.0243, "step": 185425 }, { "epoch": 0.773714648129449, "grad_norm": 0.2666554353493571, "learning_rate": 2.2739200162416595e-06, "loss": 0.0246, "step": 185430 }, { "epoch": 0.7737355108444393, "grad_norm": 1.0062172061310888, "learning_rate": 2.2738893593051644e-06, "loss": 0.0166, "step": 185435 }, { "epoch": 0.7737563735594295, "grad_norm": 0.8088633070766396, "learning_rate": 2.273858703608584e-06, "loss": 0.0246, "step": 185440 }, { "epoch": 0.7737772362744199, "grad_norm": 1.0098695734745364, "learning_rate": 2.273828049151835e-06, "loss": 0.0242, "step": 185445 }, { "epoch": 0.7737980989894101, "grad_norm": 1.6028743551567448, "learning_rate": 2.2737973959348335e-06, "loss": 0.0279, "step": 185450 }, { "epoch": 0.7738189617044003, "grad_norm": 0.6527317662400973, "learning_rate": 2.273766743957497e-06, "loss": 0.0187, "step": 185455 }, { "epoch": 0.7738398244193906, "grad_norm": 0.9844339354582854, "learning_rate": 2.27373609321974e-06, "loss": 0.0266, "step": 185460 }, { "epoch": 0.7738606871343809, "grad_norm": 0.46754680943976873, "learning_rate": 2.273705443721481e-06, "loss": 0.0256, "step": 185465 }, { "epoch": 0.7738815498493712, "grad_norm": 0.5291590494682296, "learning_rate": 2.2736747954626355e-06, "loss": 0.024, "step": 185470 }, { "epoch": 0.7739024125643614, "grad_norm": 0.9134596919030944, "learning_rate": 2.2736441484431196e-06, "loss": 0.0264, "step": 185475 }, { "epoch": 0.7739232752793518, "grad_norm": 0.6458685048862134, "learning_rate": 2.2736135026628496e-06, "loss": 0.0243, "step": 185480 }, { "epoch": 0.773944137994342, "grad_norm": 0.47619768798655165, "learning_rate": 2.273582858121743e-06, "loss": 0.0163, "step": 185485 }, { "epoch": 0.7739650007093323, "grad_norm": 1.1198495914661737, "learning_rate": 2.2735522148197164e-06, "loss": 0.0159, "step": 185490 }, { "epoch": 0.7739858634243226, "grad_norm": 0.5767551068182254, "learning_rate": 2.2735215727566853e-06, "loss": 0.017, "step": 185495 }, { "epoch": 0.7740067261393129, "grad_norm": 2.580432913096276, "learning_rate": 2.2734909319325665e-06, "loss": 0.0275, "step": 185500 }, { "epoch": 0.7740275888543031, "grad_norm": 0.35977431951402394, "learning_rate": 2.2734602923472775e-06, "loss": 0.0179, "step": 185505 }, { "epoch": 0.7740484515692935, "grad_norm": 0.42199572518923817, "learning_rate": 2.273429654000733e-06, "loss": 0.0212, "step": 185510 }, { "epoch": 0.7740693142842837, "grad_norm": 0.5626449711798076, "learning_rate": 2.2733990168928506e-06, "loss": 0.0228, "step": 185515 }, { "epoch": 0.774090176999274, "grad_norm": 1.164013849988196, "learning_rate": 2.2733683810235468e-06, "loss": 0.0269, "step": 185520 }, { "epoch": 0.7741110397142642, "grad_norm": 1.0093888992140994, "learning_rate": 2.273337746392739e-06, "loss": 0.0252, "step": 185525 }, { "epoch": 0.7741319024292546, "grad_norm": 0.4448127769007158, "learning_rate": 2.2733071130003424e-06, "loss": 0.0208, "step": 185530 }, { "epoch": 0.7741527651442448, "grad_norm": 0.4305214327661182, "learning_rate": 2.2732764808462733e-06, "loss": 0.0192, "step": 185535 }, { "epoch": 0.774173627859235, "grad_norm": 0.37366782908091317, "learning_rate": 2.27324584993045e-06, "loss": 0.0259, "step": 185540 }, { "epoch": 0.7741944905742254, "grad_norm": 0.8408956962829767, "learning_rate": 2.2732152202527875e-06, "loss": 0.0296, "step": 185545 }, { "epoch": 0.7742153532892156, "grad_norm": 0.4987487762546083, "learning_rate": 2.273184591813203e-06, "loss": 0.0167, "step": 185550 }, { "epoch": 0.7742362160042059, "grad_norm": 0.5043681781509731, "learning_rate": 2.2731539646116132e-06, "loss": 0.0173, "step": 185555 }, { "epoch": 0.7742570787191962, "grad_norm": 0.5589899600853803, "learning_rate": 2.273123338647935e-06, "loss": 0.0205, "step": 185560 }, { "epoch": 0.7742779414341865, "grad_norm": 0.2901556908952005, "learning_rate": 2.273092713922084e-06, "loss": 0.0222, "step": 185565 }, { "epoch": 0.7742988041491767, "grad_norm": 1.2277093402849053, "learning_rate": 2.273062090433977e-06, "loss": 0.0218, "step": 185570 }, { "epoch": 0.7743196668641671, "grad_norm": 0.6276521256876204, "learning_rate": 2.273031468183532e-06, "loss": 0.0261, "step": 185575 }, { "epoch": 0.7743405295791573, "grad_norm": 5.476108256924747, "learning_rate": 2.2730008471706643e-06, "loss": 0.0236, "step": 185580 }, { "epoch": 0.7743613922941476, "grad_norm": 0.5369934399331933, "learning_rate": 2.2729702273952906e-06, "loss": 0.0259, "step": 185585 }, { "epoch": 0.7743822550091378, "grad_norm": 0.7727358451008849, "learning_rate": 2.2729396088573286e-06, "loss": 0.0201, "step": 185590 }, { "epoch": 0.7744031177241282, "grad_norm": 1.0491043829282698, "learning_rate": 2.2729089915566933e-06, "loss": 0.0213, "step": 185595 }, { "epoch": 0.7744239804391184, "grad_norm": 0.6224975191324881, "learning_rate": 2.2728783754933026e-06, "loss": 0.0168, "step": 185600 }, { "epoch": 0.7744448431541087, "grad_norm": 0.5349441959040039, "learning_rate": 2.2728477606670733e-06, "loss": 0.0299, "step": 185605 }, { "epoch": 0.774465705869099, "grad_norm": 0.6342524847442041, "learning_rate": 2.2728171470779212e-06, "loss": 0.0173, "step": 185610 }, { "epoch": 0.7744865685840893, "grad_norm": 0.458079990451205, "learning_rate": 2.2727865347257634e-06, "loss": 0.0167, "step": 185615 }, { "epoch": 0.7745074312990795, "grad_norm": 0.5774750449796099, "learning_rate": 2.2727559236105167e-06, "loss": 0.0177, "step": 185620 }, { "epoch": 0.7745282940140699, "grad_norm": 0.7860405433325846, "learning_rate": 2.2727253137320983e-06, "loss": 0.016, "step": 185625 }, { "epoch": 0.7745491567290601, "grad_norm": 0.5656251503153454, "learning_rate": 2.2726947050904238e-06, "loss": 0.0243, "step": 185630 }, { "epoch": 0.7745700194440504, "grad_norm": 0.6109547117808485, "learning_rate": 2.27266409768541e-06, "loss": 0.0238, "step": 185635 }, { "epoch": 0.7745908821590406, "grad_norm": 0.6147246048110492, "learning_rate": 2.2726334915169744e-06, "loss": 0.0252, "step": 185640 }, { "epoch": 0.774611744874031, "grad_norm": 0.430277094027889, "learning_rate": 2.272602886585034e-06, "loss": 0.0139, "step": 185645 }, { "epoch": 0.7746326075890212, "grad_norm": 0.5974672025444804, "learning_rate": 2.2725722828895045e-06, "loss": 0.0237, "step": 185650 }, { "epoch": 0.7746534703040114, "grad_norm": 0.837147846648705, "learning_rate": 2.272541680430303e-06, "loss": 0.0179, "step": 185655 }, { "epoch": 0.7746743330190018, "grad_norm": 1.1137197529117269, "learning_rate": 2.2725110792073472e-06, "loss": 0.0316, "step": 185660 }, { "epoch": 0.774695195733992, "grad_norm": 0.8583228816093227, "learning_rate": 2.2724804792205524e-06, "loss": 0.0218, "step": 185665 }, { "epoch": 0.7747160584489823, "grad_norm": 0.8436817390561997, "learning_rate": 2.2724498804698358e-06, "loss": 0.0187, "step": 185670 }, { "epoch": 0.7747369211639726, "grad_norm": 0.9161847969069641, "learning_rate": 2.2724192829551147e-06, "loss": 0.0206, "step": 185675 }, { "epoch": 0.7747577838789629, "grad_norm": 0.7794476763713308, "learning_rate": 2.2723886866763057e-06, "loss": 0.0222, "step": 185680 }, { "epoch": 0.7747786465939531, "grad_norm": 0.6247822954257152, "learning_rate": 2.2723580916333254e-06, "loss": 0.0202, "step": 185685 }, { "epoch": 0.7747995093089435, "grad_norm": 1.314402265195812, "learning_rate": 2.2723274978260904e-06, "loss": 0.0358, "step": 185690 }, { "epoch": 0.7748203720239337, "grad_norm": 0.7399285737734115, "learning_rate": 2.2722969052545185e-06, "loss": 0.021, "step": 185695 }, { "epoch": 0.774841234738924, "grad_norm": 0.7525947294465607, "learning_rate": 2.2722663139185254e-06, "loss": 0.0216, "step": 185700 }, { "epoch": 0.7748620974539142, "grad_norm": 0.21206808507336597, "learning_rate": 2.2722357238180285e-06, "loss": 0.0216, "step": 185705 }, { "epoch": 0.7748829601689046, "grad_norm": 0.7333483439545777, "learning_rate": 2.272205134952945e-06, "loss": 0.0318, "step": 185710 }, { "epoch": 0.7749038228838948, "grad_norm": 1.1019283644347255, "learning_rate": 2.2721745473231902e-06, "loss": 0.0307, "step": 185715 }, { "epoch": 0.774924685598885, "grad_norm": 0.5859008081854303, "learning_rate": 2.2721439609286823e-06, "loss": 0.0205, "step": 185720 }, { "epoch": 0.7749455483138754, "grad_norm": 0.8610242195003133, "learning_rate": 2.2721133757693386e-06, "loss": 0.0166, "step": 185725 }, { "epoch": 0.7749664110288657, "grad_norm": 1.3276932492595543, "learning_rate": 2.2720827918450748e-06, "loss": 0.0343, "step": 185730 }, { "epoch": 0.7749872737438559, "grad_norm": 0.6573621658902811, "learning_rate": 2.2720522091558083e-06, "loss": 0.0203, "step": 185735 }, { "epoch": 0.7750081364588463, "grad_norm": 0.37940527318043976, "learning_rate": 2.2720216277014557e-06, "loss": 0.0292, "step": 185740 }, { "epoch": 0.7750289991738365, "grad_norm": 0.27900396931768023, "learning_rate": 2.271991047481935e-06, "loss": 0.0185, "step": 185745 }, { "epoch": 0.7750498618888267, "grad_norm": 0.4617713929471434, "learning_rate": 2.2719604684971607e-06, "loss": 0.0254, "step": 185750 }, { "epoch": 0.7750707246038171, "grad_norm": 0.3787744324323927, "learning_rate": 2.271929890747052e-06, "loss": 0.0215, "step": 185755 }, { "epoch": 0.7750915873188073, "grad_norm": 0.805451635515059, "learning_rate": 2.271899314231525e-06, "loss": 0.0139, "step": 185760 }, { "epoch": 0.7751124500337976, "grad_norm": 1.5144840014639276, "learning_rate": 2.271868738950497e-06, "loss": 0.0284, "step": 185765 }, { "epoch": 0.7751333127487878, "grad_norm": 0.5107174809974839, "learning_rate": 2.2718381649038845e-06, "loss": 0.019, "step": 185770 }, { "epoch": 0.7751541754637782, "grad_norm": 0.5139256507600971, "learning_rate": 2.2718075920916037e-06, "loss": 0.0169, "step": 185775 }, { "epoch": 0.7751750381787684, "grad_norm": 1.3999709721255382, "learning_rate": 2.2717770205135735e-06, "loss": 0.0237, "step": 185780 }, { "epoch": 0.7751959008937587, "grad_norm": 0.6165289289111047, "learning_rate": 2.2717464501697093e-06, "loss": 0.0167, "step": 185785 }, { "epoch": 0.775216763608749, "grad_norm": 0.27407319364950755, "learning_rate": 2.2717158810599285e-06, "loss": 0.0131, "step": 185790 }, { "epoch": 0.7752376263237393, "grad_norm": 1.1585832305471084, "learning_rate": 2.271685313184148e-06, "loss": 0.0253, "step": 185795 }, { "epoch": 0.7752584890387295, "grad_norm": 0.9255639491115698, "learning_rate": 2.2716547465422855e-06, "loss": 0.0298, "step": 185800 }, { "epoch": 0.7752793517537199, "grad_norm": 0.8060237825584092, "learning_rate": 2.2716241811342563e-06, "loss": 0.0248, "step": 185805 }, { "epoch": 0.7753002144687101, "grad_norm": 0.9270053627168803, "learning_rate": 2.2715936169599786e-06, "loss": 0.0247, "step": 185810 }, { "epoch": 0.7753210771837004, "grad_norm": 0.9623757171686911, "learning_rate": 2.2715630540193695e-06, "loss": 0.0269, "step": 185815 }, { "epoch": 0.7753419398986906, "grad_norm": 0.7347977656647515, "learning_rate": 2.2715324923123463e-06, "loss": 0.0179, "step": 185820 }, { "epoch": 0.775362802613681, "grad_norm": 0.5513317295094954, "learning_rate": 2.2715019318388245e-06, "loss": 0.0198, "step": 185825 }, { "epoch": 0.7753836653286712, "grad_norm": 0.59569069616632, "learning_rate": 2.271471372598723e-06, "loss": 0.0221, "step": 185830 }, { "epoch": 0.7754045280436614, "grad_norm": 0.6910516661559861, "learning_rate": 2.2714408145919574e-06, "loss": 0.0223, "step": 185835 }, { "epoch": 0.7754253907586518, "grad_norm": 0.5090295654675108, "learning_rate": 2.2714102578184454e-06, "loss": 0.0244, "step": 185840 }, { "epoch": 0.775446253473642, "grad_norm": 0.801352543588117, "learning_rate": 2.2713797022781036e-06, "loss": 0.0221, "step": 185845 }, { "epoch": 0.7754671161886323, "grad_norm": 0.9310148766305953, "learning_rate": 2.27134914797085e-06, "loss": 0.0263, "step": 185850 }, { "epoch": 0.7754879789036226, "grad_norm": 1.093721000570815, "learning_rate": 2.2713185948966005e-06, "loss": 0.028, "step": 185855 }, { "epoch": 0.7755088416186129, "grad_norm": 0.3798027732570145, "learning_rate": 2.271288043055273e-06, "loss": 0.0291, "step": 185860 }, { "epoch": 0.7755297043336031, "grad_norm": 0.8006142344279352, "learning_rate": 2.271257492446784e-06, "loss": 0.0276, "step": 185865 }, { "epoch": 0.7755505670485935, "grad_norm": 0.8548058914702887, "learning_rate": 2.2712269430710516e-06, "loss": 0.0239, "step": 185870 }, { "epoch": 0.7755714297635837, "grad_norm": 0.6272682535586201, "learning_rate": 2.2711963949279913e-06, "loss": 0.0244, "step": 185875 }, { "epoch": 0.775592292478574, "grad_norm": 0.2890770158410178, "learning_rate": 2.2711658480175215e-06, "loss": 0.0191, "step": 185880 }, { "epoch": 0.7756131551935642, "grad_norm": 0.6651403033763374, "learning_rate": 2.2711353023395587e-06, "loss": 0.024, "step": 185885 }, { "epoch": 0.7756340179085546, "grad_norm": 0.6109083941937676, "learning_rate": 2.2711047578940204e-06, "loss": 0.0264, "step": 185890 }, { "epoch": 0.7756548806235448, "grad_norm": 1.1767679411569014, "learning_rate": 2.271074214680824e-06, "loss": 0.022, "step": 185895 }, { "epoch": 0.7756757433385351, "grad_norm": 0.6226016732236763, "learning_rate": 2.2710436726998855e-06, "loss": 0.016, "step": 185900 }, { "epoch": 0.7756966060535254, "grad_norm": 0.6437988300826812, "learning_rate": 2.271013131951123e-06, "loss": 0.0179, "step": 185905 }, { "epoch": 0.7757174687685157, "grad_norm": 0.5047491947612688, "learning_rate": 2.2709825924344537e-06, "loss": 0.024, "step": 185910 }, { "epoch": 0.7757383314835059, "grad_norm": 0.5291892960645126, "learning_rate": 2.270952054149794e-06, "loss": 0.0161, "step": 185915 }, { "epoch": 0.7757591941984963, "grad_norm": 0.484274709762613, "learning_rate": 2.2709215170970612e-06, "loss": 0.017, "step": 185920 }, { "epoch": 0.7757800569134865, "grad_norm": 1.34670105609699, "learning_rate": 2.2708909812761738e-06, "loss": 0.0185, "step": 185925 }, { "epoch": 0.7758009196284767, "grad_norm": 0.8193533744216887, "learning_rate": 2.270860446687047e-06, "loss": 0.025, "step": 185930 }, { "epoch": 0.7758217823434671, "grad_norm": 0.5395740538868836, "learning_rate": 2.2708299133295995e-06, "loss": 0.0197, "step": 185935 }, { "epoch": 0.7758426450584573, "grad_norm": 0.7665115364018809, "learning_rate": 2.2707993812037475e-06, "loss": 0.0229, "step": 185940 }, { "epoch": 0.7758635077734476, "grad_norm": 0.8030987918842608, "learning_rate": 2.2707688503094095e-06, "loss": 0.0179, "step": 185945 }, { "epoch": 0.7758843704884378, "grad_norm": 0.506347534121506, "learning_rate": 2.2707383206465015e-06, "loss": 0.02, "step": 185950 }, { "epoch": 0.7759052332034282, "grad_norm": 0.6745666947377826, "learning_rate": 2.2707077922149406e-06, "loss": 0.0152, "step": 185955 }, { "epoch": 0.7759260959184184, "grad_norm": 0.3420268440094831, "learning_rate": 2.2706772650146445e-06, "loss": 0.0126, "step": 185960 }, { "epoch": 0.7759469586334087, "grad_norm": 1.1832946736845997, "learning_rate": 2.270646739045531e-06, "loss": 0.0195, "step": 185965 }, { "epoch": 0.775967821348399, "grad_norm": 0.779733514786109, "learning_rate": 2.270616214307517e-06, "loss": 0.0193, "step": 185970 }, { "epoch": 0.7759886840633893, "grad_norm": 0.41172065675567404, "learning_rate": 2.270585690800519e-06, "loss": 0.0158, "step": 185975 }, { "epoch": 0.7760095467783795, "grad_norm": 0.620974333708599, "learning_rate": 2.270555168524455e-06, "loss": 0.0197, "step": 185980 }, { "epoch": 0.7760304094933699, "grad_norm": 0.9590821901350888, "learning_rate": 2.2705246474792426e-06, "loss": 0.0252, "step": 185985 }, { "epoch": 0.7760512722083601, "grad_norm": 1.0180068093336276, "learning_rate": 2.2704941276647977e-06, "loss": 0.0219, "step": 185990 }, { "epoch": 0.7760721349233504, "grad_norm": 0.2147500246137377, "learning_rate": 2.2704636090810387e-06, "loss": 0.0173, "step": 185995 }, { "epoch": 0.7760929976383406, "grad_norm": 0.5279361291355871, "learning_rate": 2.270433091727883e-06, "loss": 0.023, "step": 186000 }, { "epoch": 0.776113860353331, "grad_norm": 0.5098426711314078, "learning_rate": 2.270402575605247e-06, "loss": 0.0271, "step": 186005 }, { "epoch": 0.7761347230683212, "grad_norm": 0.35039593451141743, "learning_rate": 2.270372060713049e-06, "loss": 0.0166, "step": 186010 }, { "epoch": 0.7761555857833115, "grad_norm": 0.5392100518120823, "learning_rate": 2.270341547051206e-06, "loss": 0.0202, "step": 186015 }, { "epoch": 0.7761764484983018, "grad_norm": 1.1145253555924384, "learning_rate": 2.2703110346196344e-06, "loss": 0.0248, "step": 186020 }, { "epoch": 0.776197311213292, "grad_norm": 0.4937366841385615, "learning_rate": 2.270280523418253e-06, "loss": 0.0187, "step": 186025 }, { "epoch": 0.7762181739282823, "grad_norm": 0.5378913020286432, "learning_rate": 2.2702500134469784e-06, "loss": 0.0244, "step": 186030 }, { "epoch": 0.7762390366432727, "grad_norm": 0.6774644577847707, "learning_rate": 2.2702195047057283e-06, "loss": 0.0219, "step": 186035 }, { "epoch": 0.7762598993582629, "grad_norm": 0.9102204056972045, "learning_rate": 2.270188997194419e-06, "loss": 0.0175, "step": 186040 }, { "epoch": 0.7762807620732531, "grad_norm": 0.4592230081621919, "learning_rate": 2.270158490912969e-06, "loss": 0.0216, "step": 186045 }, { "epoch": 0.7763016247882435, "grad_norm": 0.5087675392925486, "learning_rate": 2.270127985861295e-06, "loss": 0.0158, "step": 186050 }, { "epoch": 0.7763224875032337, "grad_norm": 0.8246932238480387, "learning_rate": 2.2700974820393147e-06, "loss": 0.03, "step": 186055 }, { "epoch": 0.776343350218224, "grad_norm": 0.462090241287294, "learning_rate": 2.2700669794469457e-06, "loss": 0.0235, "step": 186060 }, { "epoch": 0.7763642129332142, "grad_norm": 0.8892650414907115, "learning_rate": 2.270036478084105e-06, "loss": 0.0216, "step": 186065 }, { "epoch": 0.7763850756482046, "grad_norm": 0.3540096887169741, "learning_rate": 2.270005977950711e-06, "loss": 0.0164, "step": 186070 }, { "epoch": 0.7764059383631948, "grad_norm": 0.7185617235439925, "learning_rate": 2.269975479046679e-06, "loss": 0.0283, "step": 186075 }, { "epoch": 0.7764268010781851, "grad_norm": 0.36006355863927064, "learning_rate": 2.269944981371928e-06, "loss": 0.0171, "step": 186080 }, { "epoch": 0.7764476637931754, "grad_norm": 0.6206512898393121, "learning_rate": 2.2699144849263755e-06, "loss": 0.0193, "step": 186085 }, { "epoch": 0.7764685265081657, "grad_norm": 0.5731171803732713, "learning_rate": 2.269883989709938e-06, "loss": 0.02, "step": 186090 }, { "epoch": 0.7764893892231559, "grad_norm": 1.2146965923982913, "learning_rate": 2.269853495722534e-06, "loss": 0.0256, "step": 186095 }, { "epoch": 0.7765102519381463, "grad_norm": 0.24693981385532093, "learning_rate": 2.26982300296408e-06, "loss": 0.0182, "step": 186100 }, { "epoch": 0.7765311146531365, "grad_norm": 0.6974391290150226, "learning_rate": 2.269792511434494e-06, "loss": 0.0176, "step": 186105 }, { "epoch": 0.7765519773681268, "grad_norm": 0.4933959334534463, "learning_rate": 2.2697620211336928e-06, "loss": 0.0242, "step": 186110 }, { "epoch": 0.7765728400831171, "grad_norm": 0.5163600925439994, "learning_rate": 2.2697315320615955e-06, "loss": 0.0195, "step": 186115 }, { "epoch": 0.7765937027981074, "grad_norm": 0.5870147441845269, "learning_rate": 2.2697010442181174e-06, "loss": 0.0315, "step": 186120 }, { "epoch": 0.7766145655130976, "grad_norm": 0.787936904852447, "learning_rate": 2.269670557603178e-06, "loss": 0.0173, "step": 186125 }, { "epoch": 0.7766354282280878, "grad_norm": 0.37295905958539355, "learning_rate": 2.269640072216693e-06, "loss": 0.0172, "step": 186130 }, { "epoch": 0.7766562909430782, "grad_norm": 0.5534851735519747, "learning_rate": 2.2696095880585815e-06, "loss": 0.0284, "step": 186135 }, { "epoch": 0.7766771536580684, "grad_norm": 0.8931804595473595, "learning_rate": 2.2695791051287598e-06, "loss": 0.0257, "step": 186140 }, { "epoch": 0.7766980163730587, "grad_norm": 1.2581214372469909, "learning_rate": 2.269548623427146e-06, "loss": 0.0249, "step": 186145 }, { "epoch": 0.776718879088049, "grad_norm": 0.7214941922449909, "learning_rate": 2.2695181429536574e-06, "loss": 0.0172, "step": 186150 }, { "epoch": 0.7767397418030393, "grad_norm": 0.756915630285052, "learning_rate": 2.2694876637082113e-06, "loss": 0.0221, "step": 186155 }, { "epoch": 0.7767606045180295, "grad_norm": 0.5884627027549686, "learning_rate": 2.269457185690726e-06, "loss": 0.0174, "step": 186160 }, { "epoch": 0.7767814672330199, "grad_norm": 0.6567186645748333, "learning_rate": 2.2694267089011187e-06, "loss": 0.0223, "step": 186165 }, { "epoch": 0.7768023299480101, "grad_norm": 0.8770625859700095, "learning_rate": 2.269396233339307e-06, "loss": 0.028, "step": 186170 }, { "epoch": 0.7768231926630004, "grad_norm": 0.5046646790413581, "learning_rate": 2.2693657590052082e-06, "loss": 0.0183, "step": 186175 }, { "epoch": 0.7768440553779906, "grad_norm": 0.7690348670960222, "learning_rate": 2.26933528589874e-06, "loss": 0.0206, "step": 186180 }, { "epoch": 0.776864918092981, "grad_norm": 0.30282717254382907, "learning_rate": 2.2693048140198196e-06, "loss": 0.0236, "step": 186185 }, { "epoch": 0.7768857808079712, "grad_norm": 0.9211140351089594, "learning_rate": 2.269274343368365e-06, "loss": 0.0155, "step": 186190 }, { "epoch": 0.7769066435229615, "grad_norm": 1.3539505622323915, "learning_rate": 2.269243873944295e-06, "loss": 0.03, "step": 186195 }, { "epoch": 0.7769275062379518, "grad_norm": 0.865674693276233, "learning_rate": 2.269213405747525e-06, "loss": 0.0231, "step": 186200 }, { "epoch": 0.7769483689529421, "grad_norm": 0.8018769056877588, "learning_rate": 2.269182938777973e-06, "loss": 0.0242, "step": 186205 }, { "epoch": 0.7769692316679323, "grad_norm": 0.34157282616319556, "learning_rate": 2.269152473035558e-06, "loss": 0.0227, "step": 186210 }, { "epoch": 0.7769900943829227, "grad_norm": 0.500201852956302, "learning_rate": 2.2691220085201966e-06, "loss": 0.0172, "step": 186215 }, { "epoch": 0.7770109570979129, "grad_norm": 0.43350880116298274, "learning_rate": 2.2690915452318072e-06, "loss": 0.0195, "step": 186220 }, { "epoch": 0.7770318198129031, "grad_norm": 1.424981446834147, "learning_rate": 2.2690610831703065e-06, "loss": 0.0177, "step": 186225 }, { "epoch": 0.7770526825278935, "grad_norm": 0.43066843567146296, "learning_rate": 2.2690306223356124e-06, "loss": 0.0163, "step": 186230 }, { "epoch": 0.7770735452428837, "grad_norm": 0.6812816425126388, "learning_rate": 2.2690001627276425e-06, "loss": 0.0223, "step": 186235 }, { "epoch": 0.777094407957874, "grad_norm": 0.4584497565823789, "learning_rate": 2.2689697043463154e-06, "loss": 0.0165, "step": 186240 }, { "epoch": 0.7771152706728642, "grad_norm": 0.6556680655583161, "learning_rate": 2.2689392471915478e-06, "loss": 0.0179, "step": 186245 }, { "epoch": 0.7771361333878546, "grad_norm": 0.6038385331114068, "learning_rate": 2.2689087912632572e-06, "loss": 0.0197, "step": 186250 }, { "epoch": 0.7771569961028448, "grad_norm": 0.765276567441648, "learning_rate": 2.268878336561362e-06, "loss": 0.0205, "step": 186255 }, { "epoch": 0.7771778588178351, "grad_norm": 0.463055290589969, "learning_rate": 2.26884788308578e-06, "loss": 0.0273, "step": 186260 }, { "epoch": 0.7771987215328254, "grad_norm": 0.5473691739503161, "learning_rate": 2.2688174308364278e-06, "loss": 0.0194, "step": 186265 }, { "epoch": 0.7772195842478157, "grad_norm": 0.577010670027823, "learning_rate": 2.268786979813224e-06, "loss": 0.0195, "step": 186270 }, { "epoch": 0.7772404469628059, "grad_norm": 0.7700563704064107, "learning_rate": 2.2687565300160863e-06, "loss": 0.0249, "step": 186275 }, { "epoch": 0.7772613096777963, "grad_norm": 0.8127061685523566, "learning_rate": 2.268726081444932e-06, "loss": 0.0226, "step": 186280 }, { "epoch": 0.7772821723927865, "grad_norm": 0.34184559432571987, "learning_rate": 2.2686956340996797e-06, "loss": 0.0198, "step": 186285 }, { "epoch": 0.7773030351077768, "grad_norm": 0.4254408082788021, "learning_rate": 2.268665187980246e-06, "loss": 0.0169, "step": 186290 }, { "epoch": 0.7773238978227671, "grad_norm": 1.3149355725468161, "learning_rate": 2.2686347430865495e-06, "loss": 0.0184, "step": 186295 }, { "epoch": 0.7773447605377574, "grad_norm": 0.7438487130500464, "learning_rate": 2.2686042994185078e-06, "loss": 0.0141, "step": 186300 }, { "epoch": 0.7773656232527476, "grad_norm": 0.4468207084997394, "learning_rate": 2.2685738569760376e-06, "loss": 0.0183, "step": 186305 }, { "epoch": 0.7773864859677379, "grad_norm": 0.6171626778023049, "learning_rate": 2.2685434157590584e-06, "loss": 0.0183, "step": 186310 }, { "epoch": 0.7774073486827282, "grad_norm": 0.5604278780316457, "learning_rate": 2.268512975767487e-06, "loss": 0.018, "step": 186315 }, { "epoch": 0.7774282113977184, "grad_norm": 0.9075511764925187, "learning_rate": 2.2684825370012414e-06, "loss": 0.0197, "step": 186320 }, { "epoch": 0.7774490741127087, "grad_norm": 0.5547926071676159, "learning_rate": 2.2684520994602395e-06, "loss": 0.0213, "step": 186325 }, { "epoch": 0.777469936827699, "grad_norm": 0.5140477915489191, "learning_rate": 2.2684216631443984e-06, "loss": 0.0191, "step": 186330 }, { "epoch": 0.7774907995426893, "grad_norm": 0.6053092450549403, "learning_rate": 2.2683912280536368e-06, "loss": 0.0177, "step": 186335 }, { "epoch": 0.7775116622576795, "grad_norm": 0.9041513843851327, "learning_rate": 2.268360794187872e-06, "loss": 0.0186, "step": 186340 }, { "epoch": 0.7775325249726699, "grad_norm": 0.5685413082091234, "learning_rate": 2.268330361547022e-06, "loss": 0.0239, "step": 186345 }, { "epoch": 0.7775533876876601, "grad_norm": 0.5772983890734871, "learning_rate": 2.2682999301310046e-06, "loss": 0.0188, "step": 186350 }, { "epoch": 0.7775742504026504, "grad_norm": 0.7963967034654452, "learning_rate": 2.268269499939738e-06, "loss": 0.0291, "step": 186355 }, { "epoch": 0.7775951131176406, "grad_norm": 1.2015062190911232, "learning_rate": 2.26823907097314e-06, "loss": 0.0238, "step": 186360 }, { "epoch": 0.777615975832631, "grad_norm": 0.15243326070403573, "learning_rate": 2.268208643231128e-06, "loss": 0.0205, "step": 186365 }, { "epoch": 0.7776368385476212, "grad_norm": 0.7086074621958927, "learning_rate": 2.2681782167136197e-06, "loss": 0.017, "step": 186370 }, { "epoch": 0.7776577012626115, "grad_norm": 0.8779815368169122, "learning_rate": 2.268147791420533e-06, "loss": 0.02, "step": 186375 }, { "epoch": 0.7776785639776018, "grad_norm": 0.26268150112258365, "learning_rate": 2.2681173673517867e-06, "loss": 0.0227, "step": 186380 }, { "epoch": 0.7776994266925921, "grad_norm": 0.6063908439140202, "learning_rate": 2.268086944507298e-06, "loss": 0.0318, "step": 186385 }, { "epoch": 0.7777202894075823, "grad_norm": 0.6134548069331419, "learning_rate": 2.2680565228869845e-06, "loss": 0.0246, "step": 186390 }, { "epoch": 0.7777411521225727, "grad_norm": 0.7444192184089358, "learning_rate": 2.2680261024907656e-06, "loss": 0.0153, "step": 186395 }, { "epoch": 0.7777620148375629, "grad_norm": 1.035430629172628, "learning_rate": 2.267995683318557e-06, "loss": 0.0271, "step": 186400 }, { "epoch": 0.7777828775525532, "grad_norm": 0.36367528089620466, "learning_rate": 2.267965265370278e-06, "loss": 0.0228, "step": 186405 }, { "epoch": 0.7778037402675435, "grad_norm": 0.4193336602718557, "learning_rate": 2.2679348486458462e-06, "loss": 0.0182, "step": 186410 }, { "epoch": 0.7778246029825338, "grad_norm": 0.9136774526050165, "learning_rate": 2.2679044331451796e-06, "loss": 0.0173, "step": 186415 }, { "epoch": 0.777845465697524, "grad_norm": 1.2443602982460433, "learning_rate": 2.2678740188681964e-06, "loss": 0.024, "step": 186420 }, { "epoch": 0.7778663284125142, "grad_norm": 0.7237420708385475, "learning_rate": 2.267843605814814e-06, "loss": 0.0194, "step": 186425 }, { "epoch": 0.7778871911275046, "grad_norm": 0.4209306084949242, "learning_rate": 2.267813193984951e-06, "loss": 0.0213, "step": 186430 }, { "epoch": 0.7779080538424948, "grad_norm": 0.3710200568828993, "learning_rate": 2.2677827833785247e-06, "loss": 0.0156, "step": 186435 }, { "epoch": 0.7779289165574851, "grad_norm": 0.5172942370727394, "learning_rate": 2.2677523739954535e-06, "loss": 0.0207, "step": 186440 }, { "epoch": 0.7779497792724754, "grad_norm": 0.9672485529872952, "learning_rate": 2.2677219658356546e-06, "loss": 0.0295, "step": 186445 }, { "epoch": 0.7779706419874657, "grad_norm": 0.878191062690663, "learning_rate": 2.267691558899048e-06, "loss": 0.0287, "step": 186450 }, { "epoch": 0.7779915047024559, "grad_norm": 0.7783722576420709, "learning_rate": 2.267661153185549e-06, "loss": 0.02, "step": 186455 }, { "epoch": 0.7780123674174463, "grad_norm": 0.2517837436964643, "learning_rate": 2.2676307486950776e-06, "loss": 0.0193, "step": 186460 }, { "epoch": 0.7780332301324365, "grad_norm": 0.9070239174406419, "learning_rate": 2.267600345427551e-06, "loss": 0.0175, "step": 186465 }, { "epoch": 0.7780540928474268, "grad_norm": 1.6299343012037517, "learning_rate": 2.2675699433828873e-06, "loss": 0.0232, "step": 186470 }, { "epoch": 0.7780749555624171, "grad_norm": 0.8251541087614314, "learning_rate": 2.2675395425610043e-06, "loss": 0.0228, "step": 186475 }, { "epoch": 0.7780958182774074, "grad_norm": 1.0198837419148272, "learning_rate": 2.267509142961821e-06, "loss": 0.0231, "step": 186480 }, { "epoch": 0.7781166809923976, "grad_norm": 0.6650951130003461, "learning_rate": 2.2674787445852543e-06, "loss": 0.0181, "step": 186485 }, { "epoch": 0.7781375437073879, "grad_norm": 0.7424686368663804, "learning_rate": 2.267448347431223e-06, "loss": 0.0173, "step": 186490 }, { "epoch": 0.7781584064223782, "grad_norm": 0.5488313819748847, "learning_rate": 2.267417951499645e-06, "loss": 0.0242, "step": 186495 }, { "epoch": 0.7781792691373685, "grad_norm": 0.1722878779692483, "learning_rate": 2.267387556790438e-06, "loss": 0.0172, "step": 186500 }, { "epoch": 0.7782001318523587, "grad_norm": 0.6463422837537504, "learning_rate": 2.26735716330352e-06, "loss": 0.0217, "step": 186505 }, { "epoch": 0.7782209945673491, "grad_norm": 0.8754457775234995, "learning_rate": 2.26732677103881e-06, "loss": 0.0184, "step": 186510 }, { "epoch": 0.7782418572823393, "grad_norm": 0.5078529841268385, "learning_rate": 2.267296379996225e-06, "loss": 0.0162, "step": 186515 }, { "epoch": 0.7782627199973295, "grad_norm": 0.3416789992583206, "learning_rate": 2.267265990175684e-06, "loss": 0.0206, "step": 186520 }, { "epoch": 0.7782835827123199, "grad_norm": 0.3356570292398027, "learning_rate": 2.267235601577105e-06, "loss": 0.0205, "step": 186525 }, { "epoch": 0.7783044454273101, "grad_norm": 0.3885713490930798, "learning_rate": 2.2672052142004048e-06, "loss": 0.0151, "step": 186530 }, { "epoch": 0.7783253081423004, "grad_norm": 0.7767736425971415, "learning_rate": 2.2671748280455033e-06, "loss": 0.0183, "step": 186535 }, { "epoch": 0.7783461708572906, "grad_norm": 0.32676605541915, "learning_rate": 2.2671444431123172e-06, "loss": 0.0265, "step": 186540 }, { "epoch": 0.778367033572281, "grad_norm": 0.9969340495575308, "learning_rate": 2.267114059400766e-06, "loss": 0.018, "step": 186545 }, { "epoch": 0.7783878962872712, "grad_norm": 0.406173998236482, "learning_rate": 2.2670836769107666e-06, "loss": 0.0156, "step": 186550 }, { "epoch": 0.7784087590022615, "grad_norm": 0.7805872147968403, "learning_rate": 2.267053295642238e-06, "loss": 0.0262, "step": 186555 }, { "epoch": 0.7784296217172518, "grad_norm": 0.5210755266492996, "learning_rate": 2.2670229155950977e-06, "loss": 0.0151, "step": 186560 }, { "epoch": 0.7784504844322421, "grad_norm": 0.6812410172696951, "learning_rate": 2.2669925367692644e-06, "loss": 0.0137, "step": 186565 }, { "epoch": 0.7784713471472323, "grad_norm": 0.6733532708929029, "learning_rate": 2.266962159164656e-06, "loss": 0.0248, "step": 186570 }, { "epoch": 0.7784922098622227, "grad_norm": 0.6725513791850125, "learning_rate": 2.2669317827811903e-06, "loss": 0.0114, "step": 186575 }, { "epoch": 0.7785130725772129, "grad_norm": 0.6369845791840136, "learning_rate": 2.2669014076187868e-06, "loss": 0.0227, "step": 186580 }, { "epoch": 0.7785339352922032, "grad_norm": 0.5543475428833413, "learning_rate": 2.2668710336773626e-06, "loss": 0.0157, "step": 186585 }, { "epoch": 0.7785547980071935, "grad_norm": 0.488414817598051, "learning_rate": 2.266840660956836e-06, "loss": 0.02, "step": 186590 }, { "epoch": 0.7785756607221838, "grad_norm": 0.3988469139946065, "learning_rate": 2.2668102894571246e-06, "loss": 0.0193, "step": 186595 }, { "epoch": 0.778596523437174, "grad_norm": 0.3644764356332166, "learning_rate": 2.2667799191781485e-06, "loss": 0.0169, "step": 186600 }, { "epoch": 0.7786173861521642, "grad_norm": 0.2247779207753798, "learning_rate": 2.2667495501198245e-06, "loss": 0.019, "step": 186605 }, { "epoch": 0.7786382488671546, "grad_norm": 0.42777098798563296, "learning_rate": 2.2667191822820706e-06, "loss": 0.0128, "step": 186610 }, { "epoch": 0.7786591115821448, "grad_norm": 0.4384724765951372, "learning_rate": 2.266688815664806e-06, "loss": 0.0209, "step": 186615 }, { "epoch": 0.7786799742971351, "grad_norm": 0.8486967410184103, "learning_rate": 2.2666584502679484e-06, "loss": 0.0203, "step": 186620 }, { "epoch": 0.7787008370121254, "grad_norm": 0.5732288892406541, "learning_rate": 2.2666280860914165e-06, "loss": 0.0242, "step": 186625 }, { "epoch": 0.7787216997271157, "grad_norm": 0.6168866540749167, "learning_rate": 2.2665977231351275e-06, "loss": 0.0167, "step": 186630 }, { "epoch": 0.7787425624421059, "grad_norm": 0.5463876007911217, "learning_rate": 2.266567361399001e-06, "loss": 0.0261, "step": 186635 }, { "epoch": 0.7787634251570963, "grad_norm": 0.5012168777215457, "learning_rate": 2.266537000882955e-06, "loss": 0.0265, "step": 186640 }, { "epoch": 0.7787842878720865, "grad_norm": 1.0630481728896903, "learning_rate": 2.2665066415869067e-06, "loss": 0.0192, "step": 186645 }, { "epoch": 0.7788051505870768, "grad_norm": 0.5683936236622203, "learning_rate": 2.2664762835107756e-06, "loss": 0.0246, "step": 186650 }, { "epoch": 0.7788260133020671, "grad_norm": 0.9535590716223833, "learning_rate": 2.2664459266544794e-06, "loss": 0.0282, "step": 186655 }, { "epoch": 0.7788468760170574, "grad_norm": 0.6082749939376256, "learning_rate": 2.266415571017936e-06, "loss": 0.0165, "step": 186660 }, { "epoch": 0.7788677387320476, "grad_norm": 0.5800940378637454, "learning_rate": 2.2663852166010653e-06, "loss": 0.0213, "step": 186665 }, { "epoch": 0.7788886014470379, "grad_norm": 0.8363339798010894, "learning_rate": 2.2663548634037847e-06, "loss": 0.0232, "step": 186670 }, { "epoch": 0.7789094641620282, "grad_norm": 0.9234969144749324, "learning_rate": 2.2663245114260117e-06, "loss": 0.0276, "step": 186675 }, { "epoch": 0.7789303268770185, "grad_norm": 0.6848237222978047, "learning_rate": 2.2662941606676655e-06, "loss": 0.019, "step": 186680 }, { "epoch": 0.7789511895920087, "grad_norm": 0.6868201603294917, "learning_rate": 2.266263811128665e-06, "loss": 0.0175, "step": 186685 }, { "epoch": 0.7789720523069991, "grad_norm": 0.5275439850136331, "learning_rate": 2.2662334628089275e-06, "loss": 0.0186, "step": 186690 }, { "epoch": 0.7789929150219893, "grad_norm": 0.5742016351753927, "learning_rate": 2.2662031157083715e-06, "loss": 0.023, "step": 186695 }, { "epoch": 0.7790137777369796, "grad_norm": 0.34698335559372095, "learning_rate": 2.2661727698269157e-06, "loss": 0.0129, "step": 186700 }, { "epoch": 0.7790346404519699, "grad_norm": 0.523758307170107, "learning_rate": 2.2661424251644792e-06, "loss": 0.0236, "step": 186705 }, { "epoch": 0.7790555031669602, "grad_norm": 0.3814429148242959, "learning_rate": 2.2661120817209787e-06, "loss": 0.0224, "step": 186710 }, { "epoch": 0.7790763658819504, "grad_norm": 0.9545962119545174, "learning_rate": 2.2660817394963343e-06, "loss": 0.0254, "step": 186715 }, { "epoch": 0.7790972285969406, "grad_norm": 1.1714107516319532, "learning_rate": 2.266051398490463e-06, "loss": 0.017, "step": 186720 }, { "epoch": 0.779118091311931, "grad_norm": 0.3878102087423518, "learning_rate": 2.2660210587032838e-06, "loss": 0.0244, "step": 186725 }, { "epoch": 0.7791389540269212, "grad_norm": 1.1303081993800272, "learning_rate": 2.2659907201347153e-06, "loss": 0.0209, "step": 186730 }, { "epoch": 0.7791598167419115, "grad_norm": 0.3260264681997052, "learning_rate": 2.2659603827846753e-06, "loss": 0.0223, "step": 186735 }, { "epoch": 0.7791806794569018, "grad_norm": 1.913054480173931, "learning_rate": 2.2659300466530837e-06, "loss": 0.021, "step": 186740 }, { "epoch": 0.7792015421718921, "grad_norm": 0.3253707131036116, "learning_rate": 2.2658997117398575e-06, "loss": 0.0302, "step": 186745 }, { "epoch": 0.7792224048868823, "grad_norm": 1.0052902706747886, "learning_rate": 2.2658693780449155e-06, "loss": 0.0219, "step": 186750 }, { "epoch": 0.7792432676018727, "grad_norm": 2.568641731965468, "learning_rate": 2.265839045568176e-06, "loss": 0.0227, "step": 186755 }, { "epoch": 0.7792641303168629, "grad_norm": 1.0656721237928009, "learning_rate": 2.2658087143095584e-06, "loss": 0.0234, "step": 186760 }, { "epoch": 0.7792849930318532, "grad_norm": 0.8039658541926114, "learning_rate": 2.26577838426898e-06, "loss": 0.0279, "step": 186765 }, { "epoch": 0.7793058557468435, "grad_norm": 0.6670242607705391, "learning_rate": 2.2657480554463594e-06, "loss": 0.0206, "step": 186770 }, { "epoch": 0.7793267184618338, "grad_norm": 0.7229948262623718, "learning_rate": 2.2657177278416164e-06, "loss": 0.0191, "step": 186775 }, { "epoch": 0.779347581176824, "grad_norm": 0.9272994717627586, "learning_rate": 2.2656874014546675e-06, "loss": 0.0238, "step": 186780 }, { "epoch": 0.7793684438918143, "grad_norm": 0.6345984969499691, "learning_rate": 2.2656570762854326e-06, "loss": 0.0212, "step": 186785 }, { "epoch": 0.7793893066068046, "grad_norm": 0.42373886454430415, "learning_rate": 2.26562675233383e-06, "loss": 0.0171, "step": 186790 }, { "epoch": 0.7794101693217949, "grad_norm": 0.6349933847590742, "learning_rate": 2.265596429599778e-06, "loss": 0.0133, "step": 186795 }, { "epoch": 0.7794310320367851, "grad_norm": 0.30176536669416226, "learning_rate": 2.2655661080831954e-06, "loss": 0.0119, "step": 186800 }, { "epoch": 0.7794518947517755, "grad_norm": 0.3253192374264925, "learning_rate": 2.2655357877840006e-06, "loss": 0.0187, "step": 186805 }, { "epoch": 0.7794727574667657, "grad_norm": 0.28032283165361405, "learning_rate": 2.2655054687021116e-06, "loss": 0.0256, "step": 186810 }, { "epoch": 0.7794936201817559, "grad_norm": 1.2422325601796034, "learning_rate": 2.2654751508374475e-06, "loss": 0.0233, "step": 186815 }, { "epoch": 0.7795144828967463, "grad_norm": 0.5191268568046706, "learning_rate": 2.265444834189927e-06, "loss": 0.017, "step": 186820 }, { "epoch": 0.7795353456117365, "grad_norm": 0.6360093835702246, "learning_rate": 2.2654145187594683e-06, "loss": 0.0235, "step": 186825 }, { "epoch": 0.7795562083267268, "grad_norm": 0.6339153910681354, "learning_rate": 2.26538420454599e-06, "loss": 0.022, "step": 186830 }, { "epoch": 0.7795770710417171, "grad_norm": 0.5768980030350358, "learning_rate": 2.2653538915494106e-06, "loss": 0.016, "step": 186835 }, { "epoch": 0.7795979337567074, "grad_norm": 0.42360709983328637, "learning_rate": 2.2653235797696495e-06, "loss": 0.026, "step": 186840 }, { "epoch": 0.7796187964716976, "grad_norm": 0.8242054050417619, "learning_rate": 2.265293269206624e-06, "loss": 0.0269, "step": 186845 }, { "epoch": 0.7796396591866879, "grad_norm": 0.8646835696579204, "learning_rate": 2.265262959860254e-06, "loss": 0.0223, "step": 186850 }, { "epoch": 0.7796605219016782, "grad_norm": 0.40839305891710087, "learning_rate": 2.265232651730457e-06, "loss": 0.0212, "step": 186855 }, { "epoch": 0.7796813846166685, "grad_norm": 0.5970396103604839, "learning_rate": 2.265202344817152e-06, "loss": 0.0192, "step": 186860 }, { "epoch": 0.7797022473316587, "grad_norm": 0.34287096005291084, "learning_rate": 2.265172039120258e-06, "loss": 0.0151, "step": 186865 }, { "epoch": 0.7797231100466491, "grad_norm": 0.5847471892236269, "learning_rate": 2.2651417346396934e-06, "loss": 0.0193, "step": 186870 }, { "epoch": 0.7797439727616393, "grad_norm": 0.7462405382974463, "learning_rate": 2.265111431375377e-06, "loss": 0.0271, "step": 186875 }, { "epoch": 0.7797648354766296, "grad_norm": 1.3009075056793604, "learning_rate": 2.2650811293272264e-06, "loss": 0.0213, "step": 186880 }, { "epoch": 0.7797856981916199, "grad_norm": 0.7303751142299103, "learning_rate": 2.2650508284951615e-06, "loss": 0.0169, "step": 186885 }, { "epoch": 0.7798065609066102, "grad_norm": 0.5029899808172069, "learning_rate": 2.2650205288791006e-06, "loss": 0.0237, "step": 186890 }, { "epoch": 0.7798274236216004, "grad_norm": 0.3156370915758777, "learning_rate": 2.2649902304789622e-06, "loss": 0.0146, "step": 186895 }, { "epoch": 0.7798482863365906, "grad_norm": 0.7913397775079829, "learning_rate": 2.2649599332946656e-06, "loss": 0.0193, "step": 186900 }, { "epoch": 0.779869149051581, "grad_norm": 0.24276097583302567, "learning_rate": 2.2649296373261284e-06, "loss": 0.0218, "step": 186905 }, { "epoch": 0.7798900117665712, "grad_norm": 0.9598224364464312, "learning_rate": 2.26489934257327e-06, "loss": 0.0266, "step": 186910 }, { "epoch": 0.7799108744815615, "grad_norm": 0.6551677115513218, "learning_rate": 2.264869049036009e-06, "loss": 0.0234, "step": 186915 }, { "epoch": 0.7799317371965518, "grad_norm": 0.7741497172369279, "learning_rate": 2.264838756714264e-06, "loss": 0.0173, "step": 186920 }, { "epoch": 0.7799525999115421, "grad_norm": 0.697698497607275, "learning_rate": 2.2648084656079537e-06, "loss": 0.0245, "step": 186925 }, { "epoch": 0.7799734626265323, "grad_norm": 0.5560699343047044, "learning_rate": 2.264778175716997e-06, "loss": 0.016, "step": 186930 }, { "epoch": 0.7799943253415227, "grad_norm": 1.224090453015417, "learning_rate": 2.264747887041312e-06, "loss": 0.0166, "step": 186935 }, { "epoch": 0.7800151880565129, "grad_norm": 0.5916246839903243, "learning_rate": 2.264717599580819e-06, "loss": 0.0205, "step": 186940 }, { "epoch": 0.7800360507715032, "grad_norm": 0.8174793300358782, "learning_rate": 2.2646873133354353e-06, "loss": 0.0206, "step": 186945 }, { "epoch": 0.7800569134864935, "grad_norm": 0.1440738008369432, "learning_rate": 2.26465702830508e-06, "loss": 0.017, "step": 186950 }, { "epoch": 0.7800777762014838, "grad_norm": 0.8476029215344003, "learning_rate": 2.2646267444896718e-06, "loss": 0.0162, "step": 186955 }, { "epoch": 0.780098638916474, "grad_norm": 0.4337223859671252, "learning_rate": 2.26459646188913e-06, "loss": 0.0259, "step": 186960 }, { "epoch": 0.7801195016314643, "grad_norm": 0.6100925407356844, "learning_rate": 2.264566180503373e-06, "loss": 0.0192, "step": 186965 }, { "epoch": 0.7801403643464546, "grad_norm": 0.41275666208159323, "learning_rate": 2.2645359003323193e-06, "loss": 0.0189, "step": 186970 }, { "epoch": 0.7801612270614449, "grad_norm": 0.4254525770132464, "learning_rate": 2.2645056213758883e-06, "loss": 0.0218, "step": 186975 }, { "epoch": 0.7801820897764351, "grad_norm": 0.8153386550769937, "learning_rate": 2.264475343633998e-06, "loss": 0.0315, "step": 186980 }, { "epoch": 0.7802029524914255, "grad_norm": 0.7271201394144566, "learning_rate": 2.2644450671065677e-06, "loss": 0.0223, "step": 186985 }, { "epoch": 0.7802238152064157, "grad_norm": 0.8578639806322388, "learning_rate": 2.264414791793517e-06, "loss": 0.0154, "step": 186990 }, { "epoch": 0.780244677921406, "grad_norm": 0.6333665384804646, "learning_rate": 2.2643845176947626e-06, "loss": 0.0206, "step": 186995 }, { "epoch": 0.7802655406363963, "grad_norm": 0.8766380484320843, "learning_rate": 2.2643542448102255e-06, "loss": 0.0154, "step": 187000 }, { "epoch": 0.7802864033513865, "grad_norm": 1.175828571173145, "learning_rate": 2.2643239731398232e-06, "loss": 0.019, "step": 187005 }, { "epoch": 0.7803072660663768, "grad_norm": 1.0888836057366744, "learning_rate": 2.2642937026834754e-06, "loss": 0.019, "step": 187010 }, { "epoch": 0.7803281287813671, "grad_norm": 0.725336241849697, "learning_rate": 2.2642634334411003e-06, "loss": 0.0237, "step": 187015 }, { "epoch": 0.7803489914963574, "grad_norm": 0.7369029252763084, "learning_rate": 2.2642331654126175e-06, "loss": 0.0167, "step": 187020 }, { "epoch": 0.7803698542113476, "grad_norm": 0.5295993018563145, "learning_rate": 2.2642028985979447e-06, "loss": 0.0149, "step": 187025 }, { "epoch": 0.7803907169263379, "grad_norm": 0.8070343881702989, "learning_rate": 2.264172632997002e-06, "loss": 0.0206, "step": 187030 }, { "epoch": 0.7804115796413282, "grad_norm": 0.4338429953616699, "learning_rate": 2.264142368609707e-06, "loss": 0.0249, "step": 187035 }, { "epoch": 0.7804324423563185, "grad_norm": 0.6338991205867569, "learning_rate": 2.2641121054359804e-06, "loss": 0.0157, "step": 187040 }, { "epoch": 0.7804533050713087, "grad_norm": 0.42447942423851037, "learning_rate": 2.26408184347574e-06, "loss": 0.0173, "step": 187045 }, { "epoch": 0.7804741677862991, "grad_norm": 1.0931694805091579, "learning_rate": 2.264051582728904e-06, "loss": 0.0222, "step": 187050 }, { "epoch": 0.7804950305012893, "grad_norm": 0.42206344722245126, "learning_rate": 2.264021323195392e-06, "loss": 0.0155, "step": 187055 }, { "epoch": 0.7805158932162796, "grad_norm": 1.2178812889866923, "learning_rate": 2.263991064875123e-06, "loss": 0.0266, "step": 187060 }, { "epoch": 0.7805367559312699, "grad_norm": 0.6554098195178122, "learning_rate": 2.2639608077680167e-06, "loss": 0.0225, "step": 187065 }, { "epoch": 0.7805576186462602, "grad_norm": 0.6772136093504318, "learning_rate": 2.2639305518739906e-06, "loss": 0.0215, "step": 187070 }, { "epoch": 0.7805784813612504, "grad_norm": 0.40223898279172043, "learning_rate": 2.263900297192965e-06, "loss": 0.02, "step": 187075 }, { "epoch": 0.7805993440762407, "grad_norm": 0.7038474637747716, "learning_rate": 2.263870043724857e-06, "loss": 0.017, "step": 187080 }, { "epoch": 0.780620206791231, "grad_norm": 0.6750777488445469, "learning_rate": 2.2638397914695873e-06, "loss": 0.0191, "step": 187085 }, { "epoch": 0.7806410695062213, "grad_norm": 0.8263835037782911, "learning_rate": 2.263809540427074e-06, "loss": 0.0255, "step": 187090 }, { "epoch": 0.7806619322212115, "grad_norm": 0.707854464556702, "learning_rate": 2.263779290597237e-06, "loss": 0.0145, "step": 187095 }, { "epoch": 0.7806827949362019, "grad_norm": 0.8047889776215611, "learning_rate": 2.263749041979994e-06, "loss": 0.0259, "step": 187100 }, { "epoch": 0.7807036576511921, "grad_norm": 0.6486711912138104, "learning_rate": 2.2637187945752648e-06, "loss": 0.0263, "step": 187105 }, { "epoch": 0.7807245203661823, "grad_norm": 0.6476439119551062, "learning_rate": 2.2636885483829677e-06, "loss": 0.0258, "step": 187110 }, { "epoch": 0.7807453830811727, "grad_norm": 0.8203274111320058, "learning_rate": 2.263658303403023e-06, "loss": 0.0251, "step": 187115 }, { "epoch": 0.7807662457961629, "grad_norm": 0.5573903632151211, "learning_rate": 2.263628059635349e-06, "loss": 0.0217, "step": 187120 }, { "epoch": 0.7807871085111532, "grad_norm": 0.8308389785566112, "learning_rate": 2.2635978170798636e-06, "loss": 0.0286, "step": 187125 }, { "epoch": 0.7808079712261435, "grad_norm": 0.7385420993326927, "learning_rate": 2.263567575736488e-06, "loss": 0.022, "step": 187130 }, { "epoch": 0.7808288339411338, "grad_norm": 0.6296442728638368, "learning_rate": 2.2635373356051394e-06, "loss": 0.0177, "step": 187135 }, { "epoch": 0.780849696656124, "grad_norm": 0.6123993834191791, "learning_rate": 2.2635070966857375e-06, "loss": 0.0175, "step": 187140 }, { "epoch": 0.7808705593711143, "grad_norm": 0.7661714574665771, "learning_rate": 2.263476858978202e-06, "loss": 0.0158, "step": 187145 }, { "epoch": 0.7808914220861046, "grad_norm": 0.5530577712849792, "learning_rate": 2.2634466224824506e-06, "loss": 0.019, "step": 187150 }, { "epoch": 0.7809122848010949, "grad_norm": 0.5899010169258023, "learning_rate": 2.263416387198403e-06, "loss": 0.022, "step": 187155 }, { "epoch": 0.7809331475160851, "grad_norm": 0.39544630034948136, "learning_rate": 2.263386153125979e-06, "loss": 0.0175, "step": 187160 }, { "epoch": 0.7809540102310755, "grad_norm": 0.5518728573645132, "learning_rate": 2.263355920265097e-06, "loss": 0.0303, "step": 187165 }, { "epoch": 0.7809748729460657, "grad_norm": 0.540337108124308, "learning_rate": 2.263325688615676e-06, "loss": 0.0208, "step": 187170 }, { "epoch": 0.780995735661056, "grad_norm": 0.34034125909813007, "learning_rate": 2.2632954581776353e-06, "loss": 0.0206, "step": 187175 }, { "epoch": 0.7810165983760463, "grad_norm": 0.6762263547479043, "learning_rate": 2.263265228950894e-06, "loss": 0.0125, "step": 187180 }, { "epoch": 0.7810374610910366, "grad_norm": 0.6459449721975999, "learning_rate": 2.2632350009353714e-06, "loss": 0.0249, "step": 187185 }, { "epoch": 0.7810583238060268, "grad_norm": 0.9099682772164549, "learning_rate": 2.2632047741309855e-06, "loss": 0.0235, "step": 187190 }, { "epoch": 0.781079186521017, "grad_norm": 0.6833153768627359, "learning_rate": 2.2631745485376565e-06, "loss": 0.0305, "step": 187195 }, { "epoch": 0.7811000492360074, "grad_norm": 0.8923404624209205, "learning_rate": 2.263144324155304e-06, "loss": 0.0258, "step": 187200 }, { "epoch": 0.7811209119509976, "grad_norm": 0.6861637472001081, "learning_rate": 2.263114100983846e-06, "loss": 0.0251, "step": 187205 }, { "epoch": 0.7811417746659879, "grad_norm": 0.5886831688844009, "learning_rate": 2.263083879023203e-06, "loss": 0.0178, "step": 187210 }, { "epoch": 0.7811626373809782, "grad_norm": 0.7155592389664978, "learning_rate": 2.263053658273292e-06, "loss": 0.022, "step": 187215 }, { "epoch": 0.7811835000959685, "grad_norm": 0.6142689389296973, "learning_rate": 2.263023438734034e-06, "loss": 0.0204, "step": 187220 }, { "epoch": 0.7812043628109587, "grad_norm": 1.088039691762572, "learning_rate": 2.2629932204053478e-06, "loss": 0.028, "step": 187225 }, { "epoch": 0.7812252255259491, "grad_norm": 0.7380298826465199, "learning_rate": 2.2629630032871517e-06, "loss": 0.0213, "step": 187230 }, { "epoch": 0.7812460882409393, "grad_norm": 0.7017658328392243, "learning_rate": 2.262932787379366e-06, "loss": 0.0217, "step": 187235 }, { "epoch": 0.7812669509559296, "grad_norm": 0.8223969203732474, "learning_rate": 2.262902572681909e-06, "loss": 0.0188, "step": 187240 }, { "epoch": 0.7812878136709199, "grad_norm": 0.9588933885960862, "learning_rate": 2.262872359194701e-06, "loss": 0.0223, "step": 187245 }, { "epoch": 0.7813086763859102, "grad_norm": 1.205765598778974, "learning_rate": 2.26284214691766e-06, "loss": 0.0314, "step": 187250 }, { "epoch": 0.7813295391009004, "grad_norm": 0.35814966151559224, "learning_rate": 2.2628119358507065e-06, "loss": 0.0274, "step": 187255 }, { "epoch": 0.7813504018158907, "grad_norm": 0.697950897413227, "learning_rate": 2.262781725993759e-06, "loss": 0.0217, "step": 187260 }, { "epoch": 0.781371264530881, "grad_norm": 0.23594577316942442, "learning_rate": 2.262751517346736e-06, "loss": 0.0192, "step": 187265 }, { "epoch": 0.7813921272458713, "grad_norm": 0.4565026230588767, "learning_rate": 2.262721309909558e-06, "loss": 0.0142, "step": 187270 }, { "epoch": 0.7814129899608615, "grad_norm": 0.6923534709751205, "learning_rate": 2.2626911036821436e-06, "loss": 0.0217, "step": 187275 }, { "epoch": 0.7814338526758519, "grad_norm": 0.9093065133687508, "learning_rate": 2.262660898664412e-06, "loss": 0.0181, "step": 187280 }, { "epoch": 0.7814547153908421, "grad_norm": 0.6864807567113237, "learning_rate": 2.262630694856283e-06, "loss": 0.0293, "step": 187285 }, { "epoch": 0.7814755781058323, "grad_norm": 0.6190691325043236, "learning_rate": 2.262600492257675e-06, "loss": 0.0208, "step": 187290 }, { "epoch": 0.7814964408208227, "grad_norm": 1.2919120854694324, "learning_rate": 2.262570290868508e-06, "loss": 0.0247, "step": 187295 }, { "epoch": 0.781517303535813, "grad_norm": 0.49522322451817463, "learning_rate": 2.262540090688701e-06, "loss": 0.0205, "step": 187300 }, { "epoch": 0.7815381662508032, "grad_norm": 0.4125186333071315, "learning_rate": 2.2625098917181737e-06, "loss": 0.018, "step": 187305 }, { "epoch": 0.7815590289657935, "grad_norm": 0.4752898582019338, "learning_rate": 2.262479693956845e-06, "loss": 0.0182, "step": 187310 }, { "epoch": 0.7815798916807838, "grad_norm": 0.7821379376206772, "learning_rate": 2.2624494974046336e-06, "loss": 0.0141, "step": 187315 }, { "epoch": 0.781600754395774, "grad_norm": 0.2788247037101401, "learning_rate": 2.2624193020614604e-06, "loss": 0.0162, "step": 187320 }, { "epoch": 0.7816216171107643, "grad_norm": 0.7594726751565078, "learning_rate": 2.2623891079272437e-06, "loss": 0.0234, "step": 187325 }, { "epoch": 0.7816424798257546, "grad_norm": 0.3499621417845809, "learning_rate": 2.2623589150019023e-06, "loss": 0.0164, "step": 187330 }, { "epoch": 0.7816633425407449, "grad_norm": 0.8447895910608789, "learning_rate": 2.2623287232853565e-06, "loss": 0.0153, "step": 187335 }, { "epoch": 0.7816842052557351, "grad_norm": 0.2944718530168778, "learning_rate": 2.262298532777525e-06, "loss": 0.0269, "step": 187340 }, { "epoch": 0.7817050679707255, "grad_norm": 0.26287465833329116, "learning_rate": 2.2622683434783275e-06, "loss": 0.0185, "step": 187345 }, { "epoch": 0.7817259306857157, "grad_norm": 0.7727515558363178, "learning_rate": 2.2622381553876837e-06, "loss": 0.0232, "step": 187350 }, { "epoch": 0.781746793400706, "grad_norm": 0.45477646860103943, "learning_rate": 2.2622079685055125e-06, "loss": 0.0226, "step": 187355 }, { "epoch": 0.7817676561156963, "grad_norm": 0.7669082455537833, "learning_rate": 2.262177782831733e-06, "loss": 0.0212, "step": 187360 }, { "epoch": 0.7817885188306866, "grad_norm": 0.6552871738148803, "learning_rate": 2.262147598366265e-06, "loss": 0.0205, "step": 187365 }, { "epoch": 0.7818093815456768, "grad_norm": 1.4157656894007986, "learning_rate": 2.2621174151090285e-06, "loss": 0.0231, "step": 187370 }, { "epoch": 0.781830244260667, "grad_norm": 0.7988662002955579, "learning_rate": 2.262087233059942e-06, "loss": 0.0211, "step": 187375 }, { "epoch": 0.7818511069756574, "grad_norm": 0.407091072814115, "learning_rate": 2.2620570522189245e-06, "loss": 0.0196, "step": 187380 }, { "epoch": 0.7818719696906477, "grad_norm": 0.6073141104607219, "learning_rate": 2.2620268725858963e-06, "loss": 0.023, "step": 187385 }, { "epoch": 0.7818928324056379, "grad_norm": 0.7606530095664665, "learning_rate": 2.2619966941607768e-06, "loss": 0.0294, "step": 187390 }, { "epoch": 0.7819136951206282, "grad_norm": 0.3632859206751249, "learning_rate": 2.2619665169434848e-06, "loss": 0.0182, "step": 187395 }, { "epoch": 0.7819345578356185, "grad_norm": 0.6683417714511769, "learning_rate": 2.2619363409339402e-06, "loss": 0.0317, "step": 187400 }, { "epoch": 0.7819554205506087, "grad_norm": 0.612645590422265, "learning_rate": 2.261906166132063e-06, "loss": 0.0185, "step": 187405 }, { "epoch": 0.7819762832655991, "grad_norm": 0.8261756184781344, "learning_rate": 2.261875992537771e-06, "loss": 0.0261, "step": 187410 }, { "epoch": 0.7819971459805893, "grad_norm": 0.4707274103705116, "learning_rate": 2.2618458201509856e-06, "loss": 0.0164, "step": 187415 }, { "epoch": 0.7820180086955796, "grad_norm": 0.40247808623611575, "learning_rate": 2.2618156489716244e-06, "loss": 0.0161, "step": 187420 }, { "epoch": 0.7820388714105699, "grad_norm": 0.6548245621204611, "learning_rate": 2.2617854789996087e-06, "loss": 0.0277, "step": 187425 }, { "epoch": 0.7820597341255602, "grad_norm": 0.47695948589464127, "learning_rate": 2.261755310234857e-06, "loss": 0.0189, "step": 187430 }, { "epoch": 0.7820805968405504, "grad_norm": 0.6060339397803496, "learning_rate": 2.2617251426772878e-06, "loss": 0.017, "step": 187435 }, { "epoch": 0.7821014595555407, "grad_norm": 0.6058059658908324, "learning_rate": 2.2616949763268233e-06, "loss": 0.0286, "step": 187440 }, { "epoch": 0.782122322270531, "grad_norm": 0.552764695981687, "learning_rate": 2.26166481118338e-06, "loss": 0.0191, "step": 187445 }, { "epoch": 0.7821431849855213, "grad_norm": 1.396932831159095, "learning_rate": 2.261634647246879e-06, "loss": 0.0245, "step": 187450 }, { "epoch": 0.7821640477005115, "grad_norm": 0.5447234632652473, "learning_rate": 2.26160448451724e-06, "loss": 0.0184, "step": 187455 }, { "epoch": 0.7821849104155019, "grad_norm": 0.6555848584238415, "learning_rate": 2.2615743229943823e-06, "loss": 0.022, "step": 187460 }, { "epoch": 0.7822057731304921, "grad_norm": 0.5353466376365328, "learning_rate": 2.2615441626782245e-06, "loss": 0.018, "step": 187465 }, { "epoch": 0.7822266358454824, "grad_norm": 1.2272246818615242, "learning_rate": 2.261514003568687e-06, "loss": 0.0305, "step": 187470 }, { "epoch": 0.7822474985604727, "grad_norm": 0.916287488752261, "learning_rate": 2.26148384566569e-06, "loss": 0.0222, "step": 187475 }, { "epoch": 0.782268361275463, "grad_norm": 0.721596081414398, "learning_rate": 2.2614536889691514e-06, "loss": 0.0242, "step": 187480 }, { "epoch": 0.7822892239904532, "grad_norm": 0.4827355012566582, "learning_rate": 2.2614235334789922e-06, "loss": 0.0251, "step": 187485 }, { "epoch": 0.7823100867054436, "grad_norm": 0.372397666716092, "learning_rate": 2.2613933791951313e-06, "loss": 0.0193, "step": 187490 }, { "epoch": 0.7823309494204338, "grad_norm": 0.47368979554509166, "learning_rate": 2.261363226117488e-06, "loss": 0.023, "step": 187495 }, { "epoch": 0.782351812135424, "grad_norm": 0.8424136003053604, "learning_rate": 2.261333074245983e-06, "loss": 0.0226, "step": 187500 }, { "epoch": 0.7823726748504143, "grad_norm": 0.8053770241914578, "learning_rate": 2.2613029235805348e-06, "loss": 0.0216, "step": 187505 }, { "epoch": 0.7823935375654046, "grad_norm": 0.628003164685316, "learning_rate": 2.2612727741210634e-06, "loss": 0.0168, "step": 187510 }, { "epoch": 0.7824144002803949, "grad_norm": 0.6561073694626952, "learning_rate": 2.2612426258674884e-06, "loss": 0.0265, "step": 187515 }, { "epoch": 0.7824352629953851, "grad_norm": 0.6454888441219527, "learning_rate": 2.2612124788197295e-06, "loss": 0.0232, "step": 187520 }, { "epoch": 0.7824561257103755, "grad_norm": 0.9308811049627014, "learning_rate": 2.261182332977706e-06, "loss": 0.0217, "step": 187525 }, { "epoch": 0.7824769884253657, "grad_norm": 0.8952296433317783, "learning_rate": 2.2611521883413376e-06, "loss": 0.024, "step": 187530 }, { "epoch": 0.782497851140356, "grad_norm": 1.094648959847325, "learning_rate": 2.2611220449105445e-06, "loss": 0.0222, "step": 187535 }, { "epoch": 0.7825187138553463, "grad_norm": 0.7760247359311793, "learning_rate": 2.2610919026852457e-06, "loss": 0.0233, "step": 187540 }, { "epoch": 0.7825395765703366, "grad_norm": 0.6366759114524133, "learning_rate": 2.261061761665361e-06, "loss": 0.0214, "step": 187545 }, { "epoch": 0.7825604392853268, "grad_norm": 1.0753491732086813, "learning_rate": 2.26103162185081e-06, "loss": 0.0257, "step": 187550 }, { "epoch": 0.7825813020003171, "grad_norm": 0.7084413298457735, "learning_rate": 2.261001483241513e-06, "loss": 0.0154, "step": 187555 }, { "epoch": 0.7826021647153074, "grad_norm": 0.5192199312513254, "learning_rate": 2.260971345837389e-06, "loss": 0.0291, "step": 187560 }, { "epoch": 0.7826230274302977, "grad_norm": 0.6409846606014131, "learning_rate": 2.2609412096383576e-06, "loss": 0.0234, "step": 187565 }, { "epoch": 0.7826438901452879, "grad_norm": 0.5811146942439255, "learning_rate": 2.2609110746443388e-06, "loss": 0.0282, "step": 187570 }, { "epoch": 0.7826647528602783, "grad_norm": 0.6919342050322823, "learning_rate": 2.2608809408552527e-06, "loss": 0.0203, "step": 187575 }, { "epoch": 0.7826856155752685, "grad_norm": 0.32697161637328725, "learning_rate": 2.260850808271018e-06, "loss": 0.0232, "step": 187580 }, { "epoch": 0.7827064782902587, "grad_norm": 0.8937835817641506, "learning_rate": 2.260820676891555e-06, "loss": 0.0153, "step": 187585 }, { "epoch": 0.7827273410052491, "grad_norm": 1.2715950932367468, "learning_rate": 2.2607905467167837e-06, "loss": 0.0252, "step": 187590 }, { "epoch": 0.7827482037202393, "grad_norm": 0.733214690229072, "learning_rate": 2.2607604177466234e-06, "loss": 0.018, "step": 187595 }, { "epoch": 0.7827690664352296, "grad_norm": 0.5880977783757625, "learning_rate": 2.260730289980994e-06, "loss": 0.0217, "step": 187600 }, { "epoch": 0.7827899291502199, "grad_norm": 0.6178713384831817, "learning_rate": 2.2607001634198146e-06, "loss": 0.0176, "step": 187605 }, { "epoch": 0.7828107918652102, "grad_norm": 0.5014583114971554, "learning_rate": 2.2606700380630067e-06, "loss": 0.0152, "step": 187610 }, { "epoch": 0.7828316545802004, "grad_norm": 0.5591444185335219, "learning_rate": 2.2606399139104883e-06, "loss": 0.027, "step": 187615 }, { "epoch": 0.7828525172951907, "grad_norm": 0.5373950740239009, "learning_rate": 2.260609790962179e-06, "loss": 0.0149, "step": 187620 }, { "epoch": 0.782873380010181, "grad_norm": 0.41815035141464774, "learning_rate": 2.2605796692180003e-06, "loss": 0.02, "step": 187625 }, { "epoch": 0.7828942427251713, "grad_norm": 0.5954781829704989, "learning_rate": 2.2605495486778707e-06, "loss": 0.0204, "step": 187630 }, { "epoch": 0.7829151054401615, "grad_norm": 0.7065855527116406, "learning_rate": 2.2605194293417103e-06, "loss": 0.0262, "step": 187635 }, { "epoch": 0.7829359681551519, "grad_norm": 0.44147429924377884, "learning_rate": 2.260489311209439e-06, "loss": 0.0155, "step": 187640 }, { "epoch": 0.7829568308701421, "grad_norm": 1.6900494260832353, "learning_rate": 2.2604591942809767e-06, "loss": 0.0244, "step": 187645 }, { "epoch": 0.7829776935851324, "grad_norm": 0.37179517048237937, "learning_rate": 2.2604290785562425e-06, "loss": 0.018, "step": 187650 }, { "epoch": 0.7829985563001227, "grad_norm": 0.5532245519459553, "learning_rate": 2.260398964035157e-06, "loss": 0.0198, "step": 187655 }, { "epoch": 0.783019419015113, "grad_norm": 0.7321615293767817, "learning_rate": 2.260368850717639e-06, "loss": 0.0179, "step": 187660 }, { "epoch": 0.7830402817301032, "grad_norm": 1.366626187162313, "learning_rate": 2.2603387386036104e-06, "loss": 0.0249, "step": 187665 }, { "epoch": 0.7830611444450936, "grad_norm": 0.5255724618034346, "learning_rate": 2.260308627692989e-06, "loss": 0.0214, "step": 187670 }, { "epoch": 0.7830820071600838, "grad_norm": 0.5817419239578968, "learning_rate": 2.2602785179856953e-06, "loss": 0.0265, "step": 187675 }, { "epoch": 0.783102869875074, "grad_norm": 0.7852170804114891, "learning_rate": 2.2602484094816498e-06, "loss": 0.0211, "step": 187680 }, { "epoch": 0.7831237325900643, "grad_norm": 0.7386553497355074, "learning_rate": 2.260218302180771e-06, "loss": 0.0201, "step": 187685 }, { "epoch": 0.7831445953050546, "grad_norm": 1.0419634692209425, "learning_rate": 2.26018819608298e-06, "loss": 0.0268, "step": 187690 }, { "epoch": 0.7831654580200449, "grad_norm": 0.5263130872929488, "learning_rate": 2.260158091188196e-06, "loss": 0.0215, "step": 187695 }, { "epoch": 0.7831863207350351, "grad_norm": 0.46997751616227007, "learning_rate": 2.260127987496339e-06, "loss": 0.0184, "step": 187700 }, { "epoch": 0.7832071834500255, "grad_norm": 0.5322017649617663, "learning_rate": 2.260097885007329e-06, "loss": 0.0229, "step": 187705 }, { "epoch": 0.7832280461650157, "grad_norm": 0.5123034959597198, "learning_rate": 2.2600677837210862e-06, "loss": 0.0202, "step": 187710 }, { "epoch": 0.783248908880006, "grad_norm": 1.0427141306525274, "learning_rate": 2.2600376836375303e-06, "loss": 0.014, "step": 187715 }, { "epoch": 0.7832697715949963, "grad_norm": 0.4869371871793015, "learning_rate": 2.2600075847565804e-06, "loss": 0.0184, "step": 187720 }, { "epoch": 0.7832906343099866, "grad_norm": 0.4348070063361243, "learning_rate": 2.259977487078158e-06, "loss": 0.0165, "step": 187725 }, { "epoch": 0.7833114970249768, "grad_norm": 0.6188558882901845, "learning_rate": 2.259947390602182e-06, "loss": 0.0173, "step": 187730 }, { "epoch": 0.7833323597399671, "grad_norm": 0.7320984120766477, "learning_rate": 2.259917295328572e-06, "loss": 0.0223, "step": 187735 }, { "epoch": 0.7833532224549574, "grad_norm": 0.7167742955980456, "learning_rate": 2.2598872012572488e-06, "loss": 0.0215, "step": 187740 }, { "epoch": 0.7833740851699477, "grad_norm": 1.0497208532519626, "learning_rate": 2.2598571083881317e-06, "loss": 0.0281, "step": 187745 }, { "epoch": 0.7833949478849379, "grad_norm": 0.7270375185819118, "learning_rate": 2.2598270167211415e-06, "loss": 0.0194, "step": 187750 }, { "epoch": 0.7834158105999283, "grad_norm": 1.0454165635822092, "learning_rate": 2.2597969262561974e-06, "loss": 0.0247, "step": 187755 }, { "epoch": 0.7834366733149185, "grad_norm": 0.5909833769460328, "learning_rate": 2.259766836993219e-06, "loss": 0.0255, "step": 187760 }, { "epoch": 0.7834575360299088, "grad_norm": 0.6576603383568362, "learning_rate": 2.259736748932127e-06, "loss": 0.0217, "step": 187765 }, { "epoch": 0.7834783987448991, "grad_norm": 0.43850622943251977, "learning_rate": 2.259706662072842e-06, "loss": 0.0171, "step": 187770 }, { "epoch": 0.7834992614598894, "grad_norm": 0.4083545257516127, "learning_rate": 2.259676576415283e-06, "loss": 0.016, "step": 187775 }, { "epoch": 0.7835201241748796, "grad_norm": 0.39824034924556395, "learning_rate": 2.25964649195937e-06, "loss": 0.0208, "step": 187780 }, { "epoch": 0.78354098688987, "grad_norm": 0.6060451734651053, "learning_rate": 2.2596164087050234e-06, "loss": 0.0185, "step": 187785 }, { "epoch": 0.7835618496048602, "grad_norm": 0.7556274713804787, "learning_rate": 2.2595863266521634e-06, "loss": 0.0283, "step": 187790 }, { "epoch": 0.7835827123198504, "grad_norm": 0.5055924635122176, "learning_rate": 2.259556245800709e-06, "loss": 0.0186, "step": 187795 }, { "epoch": 0.7836035750348407, "grad_norm": 0.7558805372850969, "learning_rate": 2.2595261661505817e-06, "loss": 0.0346, "step": 187800 }, { "epoch": 0.783624437749831, "grad_norm": 0.6654274176567623, "learning_rate": 2.2594960877017003e-06, "loss": 0.0239, "step": 187805 }, { "epoch": 0.7836453004648213, "grad_norm": 0.43614011205608183, "learning_rate": 2.259466010453986e-06, "loss": 0.0194, "step": 187810 }, { "epoch": 0.7836661631798115, "grad_norm": 0.35719233486706353, "learning_rate": 2.2594359344073577e-06, "loss": 0.0169, "step": 187815 }, { "epoch": 0.7836870258948019, "grad_norm": 0.6359570868241389, "learning_rate": 2.259405859561736e-06, "loss": 0.0262, "step": 187820 }, { "epoch": 0.7837078886097921, "grad_norm": 0.3390205612713612, "learning_rate": 2.259375785917041e-06, "loss": 0.0155, "step": 187825 }, { "epoch": 0.7837287513247824, "grad_norm": 0.714069131639318, "learning_rate": 2.2593457134731927e-06, "loss": 0.0197, "step": 187830 }, { "epoch": 0.7837496140397727, "grad_norm": 0.6812425339538373, "learning_rate": 2.259315642230111e-06, "loss": 0.0099, "step": 187835 }, { "epoch": 0.783770476754763, "grad_norm": 0.40635588820777296, "learning_rate": 2.259285572187716e-06, "loss": 0.0229, "step": 187840 }, { "epoch": 0.7837913394697532, "grad_norm": 1.0627762215134235, "learning_rate": 2.259255503345928e-06, "loss": 0.0195, "step": 187845 }, { "epoch": 0.7838122021847436, "grad_norm": 0.7980971621267987, "learning_rate": 2.2592254357046674e-06, "loss": 0.0215, "step": 187850 }, { "epoch": 0.7838330648997338, "grad_norm": 0.8853033926080689, "learning_rate": 2.259195369263854e-06, "loss": 0.0225, "step": 187855 }, { "epoch": 0.783853927614724, "grad_norm": 0.5201869464964027, "learning_rate": 2.259165304023408e-06, "loss": 0.0255, "step": 187860 }, { "epoch": 0.7838747903297143, "grad_norm": 0.598953628013129, "learning_rate": 2.2591352399832495e-06, "loss": 0.0167, "step": 187865 }, { "epoch": 0.7838956530447047, "grad_norm": 0.7070641760417283, "learning_rate": 2.2591051771432986e-06, "loss": 0.0167, "step": 187870 }, { "epoch": 0.7839165157596949, "grad_norm": 0.7983016377177294, "learning_rate": 2.2590751155034753e-06, "loss": 0.0184, "step": 187875 }, { "epoch": 0.7839373784746851, "grad_norm": 0.4773850953241333, "learning_rate": 2.2590450550637e-06, "loss": 0.025, "step": 187880 }, { "epoch": 0.7839582411896755, "grad_norm": 0.7232062591848994, "learning_rate": 2.2590149958238926e-06, "loss": 0.0243, "step": 187885 }, { "epoch": 0.7839791039046657, "grad_norm": 0.7774343962736748, "learning_rate": 2.2589849377839732e-06, "loss": 0.0214, "step": 187890 }, { "epoch": 0.783999966619656, "grad_norm": 0.46193323084842164, "learning_rate": 2.258954880943863e-06, "loss": 0.0162, "step": 187895 }, { "epoch": 0.7840208293346463, "grad_norm": 0.7225019263233794, "learning_rate": 2.2589248253034803e-06, "loss": 0.0179, "step": 187900 }, { "epoch": 0.7840416920496366, "grad_norm": 0.6149976499817355, "learning_rate": 2.2588947708627475e-06, "loss": 0.0167, "step": 187905 }, { "epoch": 0.7840625547646268, "grad_norm": 0.6253467154310431, "learning_rate": 2.2588647176215827e-06, "loss": 0.0215, "step": 187910 }, { "epoch": 0.7840834174796171, "grad_norm": 1.3744637735879857, "learning_rate": 2.2588346655799077e-06, "loss": 0.0203, "step": 187915 }, { "epoch": 0.7841042801946074, "grad_norm": 0.37651899909983794, "learning_rate": 2.258804614737642e-06, "loss": 0.0204, "step": 187920 }, { "epoch": 0.7841251429095977, "grad_norm": 0.9124825233275866, "learning_rate": 2.2587745650947053e-06, "loss": 0.0214, "step": 187925 }, { "epoch": 0.7841460056245879, "grad_norm": 0.5291925463806735, "learning_rate": 2.258744516651019e-06, "loss": 0.02, "step": 187930 }, { "epoch": 0.7841668683395783, "grad_norm": 0.9804178373916784, "learning_rate": 2.258714469406503e-06, "loss": 0.0195, "step": 187935 }, { "epoch": 0.7841877310545685, "grad_norm": 0.7300749210481823, "learning_rate": 2.258684423361077e-06, "loss": 0.019, "step": 187940 }, { "epoch": 0.7842085937695588, "grad_norm": 0.5042434780192641, "learning_rate": 2.258654378514662e-06, "loss": 0.0268, "step": 187945 }, { "epoch": 0.7842294564845491, "grad_norm": 0.5391082334636599, "learning_rate": 2.2586243348671765e-06, "loss": 0.0168, "step": 187950 }, { "epoch": 0.7842503191995394, "grad_norm": 0.4132899867164593, "learning_rate": 2.2585942924185434e-06, "loss": 0.0192, "step": 187955 }, { "epoch": 0.7842711819145296, "grad_norm": 0.1931952748309918, "learning_rate": 2.258564251168681e-06, "loss": 0.0179, "step": 187960 }, { "epoch": 0.78429204462952, "grad_norm": 1.6188468080236165, "learning_rate": 2.2585342111175103e-06, "loss": 0.0197, "step": 187965 }, { "epoch": 0.7843129073445102, "grad_norm": 0.7347711148167952, "learning_rate": 2.2585041722649514e-06, "loss": 0.0232, "step": 187970 }, { "epoch": 0.7843337700595004, "grad_norm": 1.0937386251595247, "learning_rate": 2.258474134610925e-06, "loss": 0.0269, "step": 187975 }, { "epoch": 0.7843546327744907, "grad_norm": 0.5574760251826887, "learning_rate": 2.258444098155351e-06, "loss": 0.0176, "step": 187980 }, { "epoch": 0.784375495489481, "grad_norm": 0.5784252304932279, "learning_rate": 2.2584140628981495e-06, "loss": 0.0245, "step": 187985 }, { "epoch": 0.7843963582044713, "grad_norm": 0.643611670281714, "learning_rate": 2.258384028839242e-06, "loss": 0.0196, "step": 187990 }, { "epoch": 0.7844172209194615, "grad_norm": 0.49586648539229305, "learning_rate": 2.258353995978547e-06, "loss": 0.0172, "step": 187995 }, { "epoch": 0.7844380836344519, "grad_norm": 0.6655925680738238, "learning_rate": 2.258323964315986e-06, "loss": 0.019, "step": 188000 }, { "epoch": 0.7844589463494421, "grad_norm": 0.7131693041900505, "learning_rate": 2.25829393385148e-06, "loss": 0.0248, "step": 188005 }, { "epoch": 0.7844798090644324, "grad_norm": 0.565838393754139, "learning_rate": 2.2582639045849474e-06, "loss": 0.0257, "step": 188010 }, { "epoch": 0.7845006717794227, "grad_norm": 1.00810977596744, "learning_rate": 2.25823387651631e-06, "loss": 0.0227, "step": 188015 }, { "epoch": 0.784521534494413, "grad_norm": 0.5584135647679648, "learning_rate": 2.258203849645488e-06, "loss": 0.0176, "step": 188020 }, { "epoch": 0.7845423972094032, "grad_norm": 0.7726579229778324, "learning_rate": 2.2581738239724013e-06, "loss": 0.0214, "step": 188025 }, { "epoch": 0.7845632599243936, "grad_norm": 0.6070759962871555, "learning_rate": 2.25814379949697e-06, "loss": 0.0311, "step": 188030 }, { "epoch": 0.7845841226393838, "grad_norm": 0.6643427989002736, "learning_rate": 2.2581137762191156e-06, "loss": 0.0222, "step": 188035 }, { "epoch": 0.7846049853543741, "grad_norm": 0.6216130520600628, "learning_rate": 2.258083754138758e-06, "loss": 0.0234, "step": 188040 }, { "epoch": 0.7846258480693643, "grad_norm": 1.1198631007040356, "learning_rate": 2.2580537332558173e-06, "loss": 0.0246, "step": 188045 }, { "epoch": 0.7846467107843547, "grad_norm": 0.49795398825454695, "learning_rate": 2.258023713570214e-06, "loss": 0.0146, "step": 188050 }, { "epoch": 0.7846675734993449, "grad_norm": 0.6613184696782125, "learning_rate": 2.2579936950818686e-06, "loss": 0.0273, "step": 188055 }, { "epoch": 0.7846884362143351, "grad_norm": 0.3275867786714831, "learning_rate": 2.2579636777907015e-06, "loss": 0.0186, "step": 188060 }, { "epoch": 0.7847092989293255, "grad_norm": 0.7612292413733043, "learning_rate": 2.2579336616966334e-06, "loss": 0.0229, "step": 188065 }, { "epoch": 0.7847301616443157, "grad_norm": 1.3904690396538977, "learning_rate": 2.257903646799584e-06, "loss": 0.0312, "step": 188070 }, { "epoch": 0.784751024359306, "grad_norm": 0.986295970521063, "learning_rate": 2.2578736330994755e-06, "loss": 0.0208, "step": 188075 }, { "epoch": 0.7847718870742963, "grad_norm": 0.9053456501763405, "learning_rate": 2.2578436205962257e-06, "loss": 0.0291, "step": 188080 }, { "epoch": 0.7847927497892866, "grad_norm": 0.4244610163050556, "learning_rate": 2.257813609289757e-06, "loss": 0.0154, "step": 188085 }, { "epoch": 0.7848136125042768, "grad_norm": 0.45951377683896427, "learning_rate": 2.257783599179989e-06, "loss": 0.03, "step": 188090 }, { "epoch": 0.7848344752192671, "grad_norm": 0.5278911863240494, "learning_rate": 2.257753590266843e-06, "loss": 0.029, "step": 188095 }, { "epoch": 0.7848553379342574, "grad_norm": 0.7157105967688983, "learning_rate": 2.2577235825502383e-06, "loss": 0.0174, "step": 188100 }, { "epoch": 0.7848762006492477, "grad_norm": 0.5873842195280771, "learning_rate": 2.2576935760300963e-06, "loss": 0.0206, "step": 188105 }, { "epoch": 0.7848970633642379, "grad_norm": 0.7869131862696375, "learning_rate": 2.257663570706338e-06, "loss": 0.0206, "step": 188110 }, { "epoch": 0.7849179260792283, "grad_norm": 0.6623348360325084, "learning_rate": 2.2576335665788824e-06, "loss": 0.0171, "step": 188115 }, { "epoch": 0.7849387887942185, "grad_norm": 0.7463301540795809, "learning_rate": 2.2576035636476504e-06, "loss": 0.0206, "step": 188120 }, { "epoch": 0.7849596515092088, "grad_norm": 1.0451117260895517, "learning_rate": 2.2575735619125636e-06, "loss": 0.0222, "step": 188125 }, { "epoch": 0.7849805142241991, "grad_norm": 0.3285048125258717, "learning_rate": 2.2575435613735415e-06, "loss": 0.0225, "step": 188130 }, { "epoch": 0.7850013769391894, "grad_norm": 0.6481289299708725, "learning_rate": 2.2575135620305045e-06, "loss": 0.0207, "step": 188135 }, { "epoch": 0.7850222396541796, "grad_norm": 0.5117038986577711, "learning_rate": 2.257483563883374e-06, "loss": 0.0276, "step": 188140 }, { "epoch": 0.78504310236917, "grad_norm": 1.1445148149041946, "learning_rate": 2.2574535669320694e-06, "loss": 0.0184, "step": 188145 }, { "epoch": 0.7850639650841602, "grad_norm": 0.6210545673856525, "learning_rate": 2.2574235711765134e-06, "loss": 0.0238, "step": 188150 }, { "epoch": 0.7850848277991505, "grad_norm": 0.353910823655536, "learning_rate": 2.2573935766166236e-06, "loss": 0.016, "step": 188155 }, { "epoch": 0.7851056905141407, "grad_norm": 1.2992172301987484, "learning_rate": 2.2573635832523226e-06, "loss": 0.0274, "step": 188160 }, { "epoch": 0.785126553229131, "grad_norm": 0.6290914432223607, "learning_rate": 2.2573335910835305e-06, "loss": 0.0188, "step": 188165 }, { "epoch": 0.7851474159441213, "grad_norm": 0.665641688493868, "learning_rate": 2.257303600110168e-06, "loss": 0.0175, "step": 188170 }, { "epoch": 0.7851682786591115, "grad_norm": 0.6477964708091137, "learning_rate": 2.2572736103321557e-06, "loss": 0.0235, "step": 188175 }, { "epoch": 0.7851891413741019, "grad_norm": 0.929034213209699, "learning_rate": 2.2572436217494136e-06, "loss": 0.0257, "step": 188180 }, { "epoch": 0.7852100040890921, "grad_norm": 0.33700662821733973, "learning_rate": 2.257213634361863e-06, "loss": 0.0213, "step": 188185 }, { "epoch": 0.7852308668040824, "grad_norm": 0.34193053387250266, "learning_rate": 2.2571836481694233e-06, "loss": 0.0226, "step": 188190 }, { "epoch": 0.7852517295190727, "grad_norm": 0.6730528075386846, "learning_rate": 2.2571536631720175e-06, "loss": 0.0229, "step": 188195 }, { "epoch": 0.785272592234063, "grad_norm": 0.5712448309298865, "learning_rate": 2.257123679369564e-06, "loss": 0.022, "step": 188200 }, { "epoch": 0.7852934549490532, "grad_norm": 0.4670699085655493, "learning_rate": 2.257093696761984e-06, "loss": 0.017, "step": 188205 }, { "epoch": 0.7853143176640436, "grad_norm": 0.34892816567689844, "learning_rate": 2.257063715349199e-06, "loss": 0.0166, "step": 188210 }, { "epoch": 0.7853351803790338, "grad_norm": 0.669777644300479, "learning_rate": 2.2570337351311286e-06, "loss": 0.0213, "step": 188215 }, { "epoch": 0.7853560430940241, "grad_norm": 1.1714314858205022, "learning_rate": 2.2570037561076937e-06, "loss": 0.0185, "step": 188220 }, { "epoch": 0.7853769058090143, "grad_norm": 0.5354033174031025, "learning_rate": 2.256973778278815e-06, "loss": 0.0234, "step": 188225 }, { "epoch": 0.7853977685240047, "grad_norm": 0.6121478978850273, "learning_rate": 2.256943801644414e-06, "loss": 0.0223, "step": 188230 }, { "epoch": 0.7854186312389949, "grad_norm": 0.2551214590525384, "learning_rate": 2.25691382620441e-06, "loss": 0.0259, "step": 188235 }, { "epoch": 0.7854394939539852, "grad_norm": 0.5991220419424766, "learning_rate": 2.2568838519587247e-06, "loss": 0.0218, "step": 188240 }, { "epoch": 0.7854603566689755, "grad_norm": 0.5028061998234272, "learning_rate": 2.2568538789072787e-06, "loss": 0.0135, "step": 188245 }, { "epoch": 0.7854812193839658, "grad_norm": 0.4339772589819012, "learning_rate": 2.2568239070499916e-06, "loss": 0.0215, "step": 188250 }, { "epoch": 0.785502082098956, "grad_norm": 0.7792226395351352, "learning_rate": 2.256793936386786e-06, "loss": 0.0218, "step": 188255 }, { "epoch": 0.7855229448139464, "grad_norm": 0.4557479982773928, "learning_rate": 2.2567639669175807e-06, "loss": 0.0224, "step": 188260 }, { "epoch": 0.7855438075289366, "grad_norm": 0.4966836178641301, "learning_rate": 2.256733998642298e-06, "loss": 0.0228, "step": 188265 }, { "epoch": 0.7855646702439268, "grad_norm": 0.7017094329982029, "learning_rate": 2.2567040315608576e-06, "loss": 0.0198, "step": 188270 }, { "epoch": 0.7855855329589171, "grad_norm": 0.2815822180106781, "learning_rate": 2.2566740656731805e-06, "loss": 0.0186, "step": 188275 }, { "epoch": 0.7856063956739074, "grad_norm": 0.675437298768556, "learning_rate": 2.2566441009791875e-06, "loss": 0.0182, "step": 188280 }, { "epoch": 0.7856272583888977, "grad_norm": 0.4532724580044037, "learning_rate": 2.2566141374787994e-06, "loss": 0.019, "step": 188285 }, { "epoch": 0.7856481211038879, "grad_norm": 0.53673658376856, "learning_rate": 2.256584175171937e-06, "loss": 0.0173, "step": 188290 }, { "epoch": 0.7856689838188783, "grad_norm": 0.5431844173058131, "learning_rate": 2.256554214058521e-06, "loss": 0.016, "step": 188295 }, { "epoch": 0.7856898465338685, "grad_norm": 0.6280048861375295, "learning_rate": 2.2565242541384717e-06, "loss": 0.0238, "step": 188300 }, { "epoch": 0.7857107092488588, "grad_norm": 0.5439246916637722, "learning_rate": 2.2564942954117106e-06, "loss": 0.0191, "step": 188305 }, { "epoch": 0.7857315719638491, "grad_norm": 1.7816599196108225, "learning_rate": 2.2564643378781585e-06, "loss": 0.0274, "step": 188310 }, { "epoch": 0.7857524346788394, "grad_norm": 0.4879501075548369, "learning_rate": 2.2564343815377357e-06, "loss": 0.0162, "step": 188315 }, { "epoch": 0.7857732973938296, "grad_norm": 0.774686533056506, "learning_rate": 2.2564044263903635e-06, "loss": 0.0293, "step": 188320 }, { "epoch": 0.78579416010882, "grad_norm": 0.27946249094930675, "learning_rate": 2.2563744724359618e-06, "loss": 0.0212, "step": 188325 }, { "epoch": 0.7858150228238102, "grad_norm": 0.2691701056631746, "learning_rate": 2.256344519674453e-06, "loss": 0.0165, "step": 188330 }, { "epoch": 0.7858358855388005, "grad_norm": 0.32283480195302433, "learning_rate": 2.2563145681057565e-06, "loss": 0.0227, "step": 188335 }, { "epoch": 0.7858567482537907, "grad_norm": 0.5495949400721681, "learning_rate": 2.2562846177297933e-06, "loss": 0.0239, "step": 188340 }, { "epoch": 0.7858776109687811, "grad_norm": 0.5515049159916879, "learning_rate": 2.2562546685464847e-06, "loss": 0.0206, "step": 188345 }, { "epoch": 0.7858984736837713, "grad_norm": 0.39027061968346866, "learning_rate": 2.256224720555752e-06, "loss": 0.0277, "step": 188350 }, { "epoch": 0.7859193363987615, "grad_norm": 0.72268536847991, "learning_rate": 2.2561947737575148e-06, "loss": 0.0209, "step": 188355 }, { "epoch": 0.7859401991137519, "grad_norm": 0.43911078337309367, "learning_rate": 2.2561648281516943e-06, "loss": 0.0188, "step": 188360 }, { "epoch": 0.7859610618287421, "grad_norm": 0.328363272123878, "learning_rate": 2.256134883738213e-06, "loss": 0.0165, "step": 188365 }, { "epoch": 0.7859819245437324, "grad_norm": 0.575042306263664, "learning_rate": 2.2561049405169895e-06, "loss": 0.0211, "step": 188370 }, { "epoch": 0.7860027872587227, "grad_norm": 0.5879062088149375, "learning_rate": 2.2560749984879452e-06, "loss": 0.018, "step": 188375 }, { "epoch": 0.786023649973713, "grad_norm": 0.8746926429975577, "learning_rate": 2.2560450576510024e-06, "loss": 0.0227, "step": 188380 }, { "epoch": 0.7860445126887032, "grad_norm": 0.4633222020740126, "learning_rate": 2.256015118006081e-06, "loss": 0.0212, "step": 188385 }, { "epoch": 0.7860653754036936, "grad_norm": 0.4626138704886007, "learning_rate": 2.2559851795531013e-06, "loss": 0.0219, "step": 188390 }, { "epoch": 0.7860862381186838, "grad_norm": 0.554353894989096, "learning_rate": 2.255955242291985e-06, "loss": 0.0218, "step": 188395 }, { "epoch": 0.7861071008336741, "grad_norm": 0.5028835105843328, "learning_rate": 2.255925306222653e-06, "loss": 0.0216, "step": 188400 }, { "epoch": 0.7861279635486643, "grad_norm": 0.5211047156136972, "learning_rate": 2.2558953713450263e-06, "loss": 0.0226, "step": 188405 }, { "epoch": 0.7861488262636547, "grad_norm": 0.8246239487876366, "learning_rate": 2.2558654376590257e-06, "loss": 0.0157, "step": 188410 }, { "epoch": 0.7861696889786449, "grad_norm": 1.726846130745377, "learning_rate": 2.255835505164572e-06, "loss": 0.0219, "step": 188415 }, { "epoch": 0.7861905516936352, "grad_norm": 0.8953573249692705, "learning_rate": 2.2558055738615857e-06, "loss": 0.0217, "step": 188420 }, { "epoch": 0.7862114144086255, "grad_norm": 0.9133381187490529, "learning_rate": 2.255775643749989e-06, "loss": 0.0202, "step": 188425 }, { "epoch": 0.7862322771236158, "grad_norm": 0.6750809741563925, "learning_rate": 2.255745714829702e-06, "loss": 0.0237, "step": 188430 }, { "epoch": 0.786253139838606, "grad_norm": 0.9244907020035468, "learning_rate": 2.255715787100646e-06, "loss": 0.0248, "step": 188435 }, { "epoch": 0.7862740025535964, "grad_norm": 0.7785956718255972, "learning_rate": 2.2556858605627414e-06, "loss": 0.0234, "step": 188440 }, { "epoch": 0.7862948652685866, "grad_norm": 0.5850808798563343, "learning_rate": 2.2556559352159097e-06, "loss": 0.0144, "step": 188445 }, { "epoch": 0.7863157279835769, "grad_norm": 0.5561907357282776, "learning_rate": 2.255626011060072e-06, "loss": 0.0232, "step": 188450 }, { "epoch": 0.7863365906985671, "grad_norm": 0.5454841792164447, "learning_rate": 2.2555960880951487e-06, "loss": 0.0156, "step": 188455 }, { "epoch": 0.7863574534135575, "grad_norm": 0.5096311332071497, "learning_rate": 2.2555661663210614e-06, "loss": 0.0142, "step": 188460 }, { "epoch": 0.7863783161285477, "grad_norm": 0.5928975284645069, "learning_rate": 2.255536245737731e-06, "loss": 0.0249, "step": 188465 }, { "epoch": 0.7863991788435379, "grad_norm": 0.5208267960001066, "learning_rate": 2.2555063263450792e-06, "loss": 0.0236, "step": 188470 }, { "epoch": 0.7864200415585283, "grad_norm": 0.8380171027779143, "learning_rate": 2.2554764081430247e-06, "loss": 0.0149, "step": 188475 }, { "epoch": 0.7864409042735185, "grad_norm": 1.2536160438648625, "learning_rate": 2.2554464911314915e-06, "loss": 0.0165, "step": 188480 }, { "epoch": 0.7864617669885088, "grad_norm": 1.0557438066850156, "learning_rate": 2.2554165753103983e-06, "loss": 0.0272, "step": 188485 }, { "epoch": 0.7864826297034991, "grad_norm": 0.655855922414945, "learning_rate": 2.2553866606796676e-06, "loss": 0.0216, "step": 188490 }, { "epoch": 0.7865034924184894, "grad_norm": 0.6376467539488739, "learning_rate": 2.25535674723922e-06, "loss": 0.0241, "step": 188495 }, { "epoch": 0.7865243551334796, "grad_norm": 0.6021944973837375, "learning_rate": 2.2553268349889766e-06, "loss": 0.0198, "step": 188500 }, { "epoch": 0.78654521784847, "grad_norm": 0.8326022602723077, "learning_rate": 2.2552969239288582e-06, "loss": 0.0225, "step": 188505 }, { "epoch": 0.7865660805634602, "grad_norm": 0.6390887278401759, "learning_rate": 2.2552670140587864e-06, "loss": 0.0239, "step": 188510 }, { "epoch": 0.7865869432784505, "grad_norm": 0.5268708383570796, "learning_rate": 2.2552371053786816e-06, "loss": 0.0219, "step": 188515 }, { "epoch": 0.7866078059934407, "grad_norm": 0.7294893843362871, "learning_rate": 2.2552071978884653e-06, "loss": 0.0251, "step": 188520 }, { "epoch": 0.7866286687084311, "grad_norm": 1.0800647001337398, "learning_rate": 2.255177291588059e-06, "loss": 0.0243, "step": 188525 }, { "epoch": 0.7866495314234213, "grad_norm": 0.5772033823538895, "learning_rate": 2.255147386477383e-06, "loss": 0.0197, "step": 188530 }, { "epoch": 0.7866703941384116, "grad_norm": 0.5777862596130664, "learning_rate": 2.255117482556359e-06, "loss": 0.016, "step": 188535 }, { "epoch": 0.7866912568534019, "grad_norm": 0.9513137219543372, "learning_rate": 2.2550875798249078e-06, "loss": 0.0194, "step": 188540 }, { "epoch": 0.7867121195683922, "grad_norm": 0.7985864044452555, "learning_rate": 2.2550576782829507e-06, "loss": 0.0248, "step": 188545 }, { "epoch": 0.7867329822833824, "grad_norm": 0.7060785046379378, "learning_rate": 2.255027777930409e-06, "loss": 0.0325, "step": 188550 }, { "epoch": 0.7867538449983728, "grad_norm": 0.5216024060083364, "learning_rate": 2.254997878767204e-06, "loss": 0.0157, "step": 188555 }, { "epoch": 0.786774707713363, "grad_norm": 0.6176323570846202, "learning_rate": 2.254967980793256e-06, "loss": 0.0229, "step": 188560 }, { "epoch": 0.7867955704283532, "grad_norm": 1.581060081781793, "learning_rate": 2.2549380840084867e-06, "loss": 0.055, "step": 188565 }, { "epoch": 0.7868164331433436, "grad_norm": 0.524173781813251, "learning_rate": 2.2549081884128174e-06, "loss": 0.0199, "step": 188570 }, { "epoch": 0.7868372958583338, "grad_norm": 0.46758353967750177, "learning_rate": 2.2548782940061694e-06, "loss": 0.0163, "step": 188575 }, { "epoch": 0.7868581585733241, "grad_norm": 1.0349300113977602, "learning_rate": 2.2548484007884634e-06, "loss": 0.0247, "step": 188580 }, { "epoch": 0.7868790212883143, "grad_norm": 0.5362824902018468, "learning_rate": 2.2548185087596206e-06, "loss": 0.0212, "step": 188585 }, { "epoch": 0.7868998840033047, "grad_norm": 0.37570669523582206, "learning_rate": 2.2547886179195624e-06, "loss": 0.013, "step": 188590 }, { "epoch": 0.7869207467182949, "grad_norm": 0.5725918393471235, "learning_rate": 2.2547587282682102e-06, "loss": 0.0218, "step": 188595 }, { "epoch": 0.7869416094332852, "grad_norm": 0.154898962115352, "learning_rate": 2.2547288398054854e-06, "loss": 0.0164, "step": 188600 }, { "epoch": 0.7869624721482755, "grad_norm": 0.4344573203949079, "learning_rate": 2.254698952531309e-06, "loss": 0.0163, "step": 188605 }, { "epoch": 0.7869833348632658, "grad_norm": 0.5426646382549182, "learning_rate": 2.2546690664456007e-06, "loss": 0.0211, "step": 188610 }, { "epoch": 0.787004197578256, "grad_norm": 1.0452993644818236, "learning_rate": 2.254639181548284e-06, "loss": 0.0232, "step": 188615 }, { "epoch": 0.7870250602932464, "grad_norm": 1.3580026777375604, "learning_rate": 2.2546092978392798e-06, "loss": 0.0212, "step": 188620 }, { "epoch": 0.7870459230082366, "grad_norm": 1.0940882698915375, "learning_rate": 2.2545794153185085e-06, "loss": 0.0266, "step": 188625 }, { "epoch": 0.7870667857232269, "grad_norm": 1.0677147623886825, "learning_rate": 2.254549533985891e-06, "loss": 0.0178, "step": 188630 }, { "epoch": 0.7870876484382171, "grad_norm": 0.6818108074883626, "learning_rate": 2.25451965384135e-06, "loss": 0.0182, "step": 188635 }, { "epoch": 0.7871085111532075, "grad_norm": 0.622797604225785, "learning_rate": 2.2544897748848057e-06, "loss": 0.0173, "step": 188640 }, { "epoch": 0.7871293738681977, "grad_norm": 0.6884742171236029, "learning_rate": 2.2544598971161798e-06, "loss": 0.0193, "step": 188645 }, { "epoch": 0.787150236583188, "grad_norm": 0.858625848187725, "learning_rate": 2.2544300205353935e-06, "loss": 0.0215, "step": 188650 }, { "epoch": 0.7871710992981783, "grad_norm": 0.4314337463765258, "learning_rate": 2.254400145142368e-06, "loss": 0.0196, "step": 188655 }, { "epoch": 0.7871919620131685, "grad_norm": 0.5445754026087951, "learning_rate": 2.2543702709370246e-06, "loss": 0.0246, "step": 188660 }, { "epoch": 0.7872128247281588, "grad_norm": 0.7204495874254216, "learning_rate": 2.2543403979192853e-06, "loss": 0.0284, "step": 188665 }, { "epoch": 0.7872336874431491, "grad_norm": 0.515292514820513, "learning_rate": 2.2543105260890698e-06, "loss": 0.017, "step": 188670 }, { "epoch": 0.7872545501581394, "grad_norm": 1.036799553030592, "learning_rate": 2.2542806554463012e-06, "loss": 0.039, "step": 188675 }, { "epoch": 0.7872754128731296, "grad_norm": 0.8430920764219669, "learning_rate": 2.2542507859908996e-06, "loss": 0.0271, "step": 188680 }, { "epoch": 0.78729627558812, "grad_norm": 1.0084370373500198, "learning_rate": 2.254220917722787e-06, "loss": 0.0201, "step": 188685 }, { "epoch": 0.7873171383031102, "grad_norm": 1.2279043507746963, "learning_rate": 2.2541910506418844e-06, "loss": 0.0274, "step": 188690 }, { "epoch": 0.7873380010181005, "grad_norm": 1.1497661587956112, "learning_rate": 2.2541611847481137e-06, "loss": 0.0231, "step": 188695 }, { "epoch": 0.7873588637330907, "grad_norm": 0.41059187532064545, "learning_rate": 2.254131320041395e-06, "loss": 0.0227, "step": 188700 }, { "epoch": 0.7873797264480811, "grad_norm": 0.7023314571320198, "learning_rate": 2.254101456521652e-06, "loss": 0.0191, "step": 188705 }, { "epoch": 0.7874005891630713, "grad_norm": 0.572170189091259, "learning_rate": 2.2540715941888035e-06, "loss": 0.0198, "step": 188710 }, { "epoch": 0.7874214518780616, "grad_norm": 1.0711642744919212, "learning_rate": 2.254041733042772e-06, "loss": 0.0247, "step": 188715 }, { "epoch": 0.7874423145930519, "grad_norm": 0.8595768592545578, "learning_rate": 2.254011873083479e-06, "loss": 0.0251, "step": 188720 }, { "epoch": 0.7874631773080422, "grad_norm": 0.6277450307410065, "learning_rate": 2.253982014310846e-06, "loss": 0.022, "step": 188725 }, { "epoch": 0.7874840400230324, "grad_norm": 0.9695958452264558, "learning_rate": 2.2539521567247943e-06, "loss": 0.0149, "step": 188730 }, { "epoch": 0.7875049027380228, "grad_norm": 1.0021470175032938, "learning_rate": 2.2539223003252443e-06, "loss": 0.0338, "step": 188735 }, { "epoch": 0.787525765453013, "grad_norm": 0.9818098138648037, "learning_rate": 2.253892445112119e-06, "loss": 0.0209, "step": 188740 }, { "epoch": 0.7875466281680032, "grad_norm": 0.34393611710518796, "learning_rate": 2.2538625910853395e-06, "loss": 0.0178, "step": 188745 }, { "epoch": 0.7875674908829936, "grad_norm": 1.3085408070022166, "learning_rate": 2.253832738244826e-06, "loss": 0.0217, "step": 188750 }, { "epoch": 0.7875883535979838, "grad_norm": 0.5931369712092903, "learning_rate": 2.253802886590501e-06, "loss": 0.0214, "step": 188755 }, { "epoch": 0.7876092163129741, "grad_norm": 0.8394823044990556, "learning_rate": 2.253773036122286e-06, "loss": 0.0221, "step": 188760 }, { "epoch": 0.7876300790279643, "grad_norm": 0.3575233191353657, "learning_rate": 2.2537431868401024e-06, "loss": 0.0151, "step": 188765 }, { "epoch": 0.7876509417429547, "grad_norm": 0.7976240202195966, "learning_rate": 2.2537133387438707e-06, "loss": 0.0284, "step": 188770 }, { "epoch": 0.7876718044579449, "grad_norm": 0.7077294160962391, "learning_rate": 2.253683491833514e-06, "loss": 0.0184, "step": 188775 }, { "epoch": 0.7876926671729352, "grad_norm": 0.24983270756428508, "learning_rate": 2.2536536461089525e-06, "loss": 0.0192, "step": 188780 }, { "epoch": 0.7877135298879255, "grad_norm": 0.576659813470732, "learning_rate": 2.253623801570108e-06, "loss": 0.0273, "step": 188785 }, { "epoch": 0.7877343926029158, "grad_norm": 0.7096825517350916, "learning_rate": 2.253593958216902e-06, "loss": 0.0226, "step": 188790 }, { "epoch": 0.787755255317906, "grad_norm": 0.3380751293732231, "learning_rate": 2.253564116049257e-06, "loss": 0.0213, "step": 188795 }, { "epoch": 0.7877761180328964, "grad_norm": 0.37195536078331526, "learning_rate": 2.2535342750670927e-06, "loss": 0.0174, "step": 188800 }, { "epoch": 0.7877969807478866, "grad_norm": 0.4102405457805137, "learning_rate": 2.2535044352703316e-06, "loss": 0.0183, "step": 188805 }, { "epoch": 0.7878178434628769, "grad_norm": 1.0417077439062759, "learning_rate": 2.2534745966588954e-06, "loss": 0.0226, "step": 188810 }, { "epoch": 0.7878387061778671, "grad_norm": 0.4688221107206192, "learning_rate": 2.253444759232705e-06, "loss": 0.02, "step": 188815 }, { "epoch": 0.7878595688928575, "grad_norm": 0.7891943636747859, "learning_rate": 2.2534149229916825e-06, "loss": 0.0209, "step": 188820 }, { "epoch": 0.7878804316078477, "grad_norm": 0.5508235761482293, "learning_rate": 2.2533850879357496e-06, "loss": 0.025, "step": 188825 }, { "epoch": 0.787901294322838, "grad_norm": 0.4246394159781774, "learning_rate": 2.253355254064827e-06, "loss": 0.0141, "step": 188830 }, { "epoch": 0.7879221570378283, "grad_norm": 0.3217213853243592, "learning_rate": 2.253325421378836e-06, "loss": 0.0174, "step": 188835 }, { "epoch": 0.7879430197528186, "grad_norm": 0.5731663924926115, "learning_rate": 2.2532955898777003e-06, "loss": 0.0202, "step": 188840 }, { "epoch": 0.7879638824678088, "grad_norm": 0.6770000462253905, "learning_rate": 2.2532657595613394e-06, "loss": 0.0277, "step": 188845 }, { "epoch": 0.7879847451827992, "grad_norm": 0.21297092458843137, "learning_rate": 2.2532359304296757e-06, "loss": 0.0241, "step": 188850 }, { "epoch": 0.7880056078977894, "grad_norm": 0.5493783166200802, "learning_rate": 2.253206102482631e-06, "loss": 0.0189, "step": 188855 }, { "epoch": 0.7880264706127796, "grad_norm": 0.740905291906363, "learning_rate": 2.2531762757201257e-06, "loss": 0.0118, "step": 188860 }, { "epoch": 0.78804733332777, "grad_norm": 1.0893975278065793, "learning_rate": 2.253146450142083e-06, "loss": 0.0238, "step": 188865 }, { "epoch": 0.7880681960427602, "grad_norm": 1.1259059835498622, "learning_rate": 2.2531166257484228e-06, "loss": 0.0194, "step": 188870 }, { "epoch": 0.7880890587577505, "grad_norm": 0.8484016288906846, "learning_rate": 2.2530868025390686e-06, "loss": 0.0303, "step": 188875 }, { "epoch": 0.7881099214727407, "grad_norm": 0.5094247276784172, "learning_rate": 2.253056980513941e-06, "loss": 0.0253, "step": 188880 }, { "epoch": 0.7881307841877311, "grad_norm": 1.9849474896719288, "learning_rate": 2.2530271596729616e-06, "loss": 0.0157, "step": 188885 }, { "epoch": 0.7881516469027213, "grad_norm": 1.0956210417449261, "learning_rate": 2.2529973400160516e-06, "loss": 0.0287, "step": 188890 }, { "epoch": 0.7881725096177116, "grad_norm": 1.1765295780953475, "learning_rate": 2.2529675215431337e-06, "loss": 0.0214, "step": 188895 }, { "epoch": 0.7881933723327019, "grad_norm": 0.5739187760119157, "learning_rate": 2.252937704254129e-06, "loss": 0.0208, "step": 188900 }, { "epoch": 0.7882142350476922, "grad_norm": 0.7947526855804344, "learning_rate": 2.2529078881489587e-06, "loss": 0.0221, "step": 188905 }, { "epoch": 0.7882350977626824, "grad_norm": 0.5311328103454802, "learning_rate": 2.2528780732275453e-06, "loss": 0.0198, "step": 188910 }, { "epoch": 0.7882559604776728, "grad_norm": 0.43451819333818426, "learning_rate": 2.2528482594898105e-06, "loss": 0.0216, "step": 188915 }, { "epoch": 0.788276823192663, "grad_norm": 0.8240470964962922, "learning_rate": 2.2528184469356754e-06, "loss": 0.0237, "step": 188920 }, { "epoch": 0.7882976859076533, "grad_norm": 0.5690331787423138, "learning_rate": 2.252788635565062e-06, "loss": 0.0244, "step": 188925 }, { "epoch": 0.7883185486226436, "grad_norm": 0.8205570333697996, "learning_rate": 2.2527588253778913e-06, "loss": 0.0203, "step": 188930 }, { "epoch": 0.7883394113376339, "grad_norm": 0.2719622320829103, "learning_rate": 2.252729016374086e-06, "loss": 0.0173, "step": 188935 }, { "epoch": 0.7883602740526241, "grad_norm": 0.6006957803435279, "learning_rate": 2.2526992085535675e-06, "loss": 0.0298, "step": 188940 }, { "epoch": 0.7883811367676143, "grad_norm": 0.3322611561878275, "learning_rate": 2.2526694019162575e-06, "loss": 0.0213, "step": 188945 }, { "epoch": 0.7884019994826047, "grad_norm": 0.7348424019307488, "learning_rate": 2.252639596462077e-06, "loss": 0.0225, "step": 188950 }, { "epoch": 0.7884228621975949, "grad_norm": 0.640101033513128, "learning_rate": 2.2526097921909494e-06, "loss": 0.0221, "step": 188955 }, { "epoch": 0.7884437249125852, "grad_norm": 0.8089162915452305, "learning_rate": 2.2525799891027946e-06, "loss": 0.0205, "step": 188960 }, { "epoch": 0.7884645876275755, "grad_norm": 0.9627888034982675, "learning_rate": 2.2525501871975358e-06, "loss": 0.0248, "step": 188965 }, { "epoch": 0.7884854503425658, "grad_norm": 0.40369352843055273, "learning_rate": 2.2525203864750937e-06, "loss": 0.0217, "step": 188970 }, { "epoch": 0.788506313057556, "grad_norm": 0.4944569069469852, "learning_rate": 2.2524905869353903e-06, "loss": 0.0177, "step": 188975 }, { "epoch": 0.7885271757725464, "grad_norm": 0.7089417237112919, "learning_rate": 2.2524607885783483e-06, "loss": 0.0202, "step": 188980 }, { "epoch": 0.7885480384875366, "grad_norm": 0.5706477900935142, "learning_rate": 2.2524309914038883e-06, "loss": 0.0193, "step": 188985 }, { "epoch": 0.7885689012025269, "grad_norm": 1.5383035274962524, "learning_rate": 2.2524011954119325e-06, "loss": 0.0262, "step": 188990 }, { "epoch": 0.7885897639175171, "grad_norm": 0.28834987854769745, "learning_rate": 2.252371400602402e-06, "loss": 0.0222, "step": 188995 }, { "epoch": 0.7886106266325075, "grad_norm": 0.6106953134582058, "learning_rate": 2.2523416069752203e-06, "loss": 0.0202, "step": 189000 }, { "epoch": 0.7886314893474977, "grad_norm": 0.8678132538841894, "learning_rate": 2.2523118145303077e-06, "loss": 0.0162, "step": 189005 }, { "epoch": 0.788652352062488, "grad_norm": 0.6317840315836675, "learning_rate": 2.2522820232675867e-06, "loss": 0.0245, "step": 189010 }, { "epoch": 0.7886732147774783, "grad_norm": 0.7987984201737643, "learning_rate": 2.252252233186979e-06, "loss": 0.0197, "step": 189015 }, { "epoch": 0.7886940774924686, "grad_norm": 0.5110225889517267, "learning_rate": 2.252222444288406e-06, "loss": 0.0209, "step": 189020 }, { "epoch": 0.7887149402074588, "grad_norm": 0.9217332456269418, "learning_rate": 2.2521926565717903e-06, "loss": 0.0184, "step": 189025 }, { "epoch": 0.7887358029224492, "grad_norm": 0.34347023021882905, "learning_rate": 2.2521628700370526e-06, "loss": 0.0175, "step": 189030 }, { "epoch": 0.7887566656374394, "grad_norm": 0.3941750952951846, "learning_rate": 2.2521330846841165e-06, "loss": 0.0165, "step": 189035 }, { "epoch": 0.7887775283524296, "grad_norm": 0.34035421765980833, "learning_rate": 2.252103300512902e-06, "loss": 0.0246, "step": 189040 }, { "epoch": 0.78879839106742, "grad_norm": 0.6735919708691908, "learning_rate": 2.252073517523332e-06, "loss": 0.0147, "step": 189045 }, { "epoch": 0.7888192537824102, "grad_norm": 0.6657422757426942, "learning_rate": 2.2520437357153284e-06, "loss": 0.0233, "step": 189050 }, { "epoch": 0.7888401164974005, "grad_norm": 0.6935492047706296, "learning_rate": 2.252013955088812e-06, "loss": 0.0176, "step": 189055 }, { "epoch": 0.7888609792123907, "grad_norm": 1.3700919133175673, "learning_rate": 2.2519841756437066e-06, "loss": 0.0177, "step": 189060 }, { "epoch": 0.7888818419273811, "grad_norm": 0.7513623447731886, "learning_rate": 2.2519543973799322e-06, "loss": 0.0185, "step": 189065 }, { "epoch": 0.7889027046423713, "grad_norm": 0.5103904002060204, "learning_rate": 2.2519246202974123e-06, "loss": 0.0216, "step": 189070 }, { "epoch": 0.7889235673573616, "grad_norm": 1.198105853354262, "learning_rate": 2.251894844396067e-06, "loss": 0.0287, "step": 189075 }, { "epoch": 0.7889444300723519, "grad_norm": 0.5369209949903648, "learning_rate": 2.25186506967582e-06, "loss": 0.0172, "step": 189080 }, { "epoch": 0.7889652927873422, "grad_norm": 0.6736136246820598, "learning_rate": 2.251835296136592e-06, "loss": 0.016, "step": 189085 }, { "epoch": 0.7889861555023324, "grad_norm": 1.9630152346652103, "learning_rate": 2.2518055237783055e-06, "loss": 0.0211, "step": 189090 }, { "epoch": 0.7890070182173228, "grad_norm": 0.45860147784746763, "learning_rate": 2.251775752600882e-06, "loss": 0.0232, "step": 189095 }, { "epoch": 0.789027880932313, "grad_norm": 1.0278842940447555, "learning_rate": 2.251745982604244e-06, "loss": 0.0293, "step": 189100 }, { "epoch": 0.7890487436473033, "grad_norm": 0.6169006454719466, "learning_rate": 2.2517162137883135e-06, "loss": 0.013, "step": 189105 }, { "epoch": 0.7890696063622936, "grad_norm": 0.4544018451541935, "learning_rate": 2.251686446153012e-06, "loss": 0.0187, "step": 189110 }, { "epoch": 0.7890904690772839, "grad_norm": 0.5549036635296961, "learning_rate": 2.251656679698261e-06, "loss": 0.0248, "step": 189115 }, { "epoch": 0.7891113317922741, "grad_norm": 0.5765417299262726, "learning_rate": 2.2516269144239842e-06, "loss": 0.0267, "step": 189120 }, { "epoch": 0.7891321945072644, "grad_norm": 1.031169340283067, "learning_rate": 2.2515971503301017e-06, "loss": 0.0179, "step": 189125 }, { "epoch": 0.7891530572222547, "grad_norm": 0.4541502579240114, "learning_rate": 2.251567387416536e-06, "loss": 0.0172, "step": 189130 }, { "epoch": 0.789173919937245, "grad_norm": 0.47383124628968176, "learning_rate": 2.2515376256832095e-06, "loss": 0.0221, "step": 189135 }, { "epoch": 0.7891947826522352, "grad_norm": 1.0992886702361366, "learning_rate": 2.251507865130044e-06, "loss": 0.0313, "step": 189140 }, { "epoch": 0.7892156453672255, "grad_norm": 0.4484627910154874, "learning_rate": 2.2514781057569616e-06, "loss": 0.0231, "step": 189145 }, { "epoch": 0.7892365080822158, "grad_norm": 0.5587098969385182, "learning_rate": 2.2514483475638845e-06, "loss": 0.0265, "step": 189150 }, { "epoch": 0.789257370797206, "grad_norm": 0.7192488959105727, "learning_rate": 2.2514185905507345e-06, "loss": 0.0178, "step": 189155 }, { "epoch": 0.7892782335121964, "grad_norm": 0.43330457209697554, "learning_rate": 2.251388834717433e-06, "loss": 0.0226, "step": 189160 }, { "epoch": 0.7892990962271866, "grad_norm": 0.7196952960819695, "learning_rate": 2.251359080063903e-06, "loss": 0.0153, "step": 189165 }, { "epoch": 0.7893199589421769, "grad_norm": 0.7253110985355062, "learning_rate": 2.2513293265900663e-06, "loss": 0.0266, "step": 189170 }, { "epoch": 0.7893408216571671, "grad_norm": 1.0271970738800793, "learning_rate": 2.2512995742958448e-06, "loss": 0.0215, "step": 189175 }, { "epoch": 0.7893616843721575, "grad_norm": 0.5000736545711144, "learning_rate": 2.2512698231811603e-06, "loss": 0.0166, "step": 189180 }, { "epoch": 0.7893825470871477, "grad_norm": 2.119738927209706, "learning_rate": 2.251240073245935e-06, "loss": 0.02, "step": 189185 }, { "epoch": 0.789403409802138, "grad_norm": 1.1687843415558745, "learning_rate": 2.251210324490091e-06, "loss": 0.0216, "step": 189190 }, { "epoch": 0.7894242725171283, "grad_norm": 0.5236765065219249, "learning_rate": 2.2511805769135513e-06, "loss": 0.0214, "step": 189195 }, { "epoch": 0.7894451352321186, "grad_norm": 0.49188855707856066, "learning_rate": 2.251150830516237e-06, "loss": 0.0147, "step": 189200 }, { "epoch": 0.7894659979471088, "grad_norm": 0.7218667997526338, "learning_rate": 2.25112108529807e-06, "loss": 0.0195, "step": 189205 }, { "epoch": 0.7894868606620992, "grad_norm": 0.6490385393132809, "learning_rate": 2.2510913412589725e-06, "loss": 0.0221, "step": 189210 }, { "epoch": 0.7895077233770894, "grad_norm": 0.8745169623438172, "learning_rate": 2.251061598398867e-06, "loss": 0.0185, "step": 189215 }, { "epoch": 0.7895285860920797, "grad_norm": 1.096870051636293, "learning_rate": 2.2510318567176763e-06, "loss": 0.0183, "step": 189220 }, { "epoch": 0.78954944880707, "grad_norm": 0.41576095133300534, "learning_rate": 2.2510021162153206e-06, "loss": 0.0145, "step": 189225 }, { "epoch": 0.7895703115220603, "grad_norm": 0.7193502231586708, "learning_rate": 2.250972376891724e-06, "loss": 0.0223, "step": 189230 }, { "epoch": 0.7895911742370505, "grad_norm": 0.27306876926521667, "learning_rate": 2.250942638746807e-06, "loss": 0.0236, "step": 189235 }, { "epoch": 0.7896120369520407, "grad_norm": 0.6193181150512035, "learning_rate": 2.250912901780493e-06, "loss": 0.0182, "step": 189240 }, { "epoch": 0.7896328996670311, "grad_norm": 0.36442231575382833, "learning_rate": 2.2508831659927037e-06, "loss": 0.0202, "step": 189245 }, { "epoch": 0.7896537623820213, "grad_norm": 0.5084310914298054, "learning_rate": 2.250853431383361e-06, "loss": 0.0159, "step": 189250 }, { "epoch": 0.7896746250970116, "grad_norm": 0.6616695979179404, "learning_rate": 2.2508236979523873e-06, "loss": 0.0231, "step": 189255 }, { "epoch": 0.7896954878120019, "grad_norm": 1.0349398245118446, "learning_rate": 2.250793965699705e-06, "loss": 0.0328, "step": 189260 }, { "epoch": 0.7897163505269922, "grad_norm": 0.9275085946630677, "learning_rate": 2.2507642346252354e-06, "loss": 0.0259, "step": 189265 }, { "epoch": 0.7897372132419824, "grad_norm": 0.9622754781518114, "learning_rate": 2.2507345047289016e-06, "loss": 0.0246, "step": 189270 }, { "epoch": 0.7897580759569728, "grad_norm": 0.8432938831897233, "learning_rate": 2.250704776010626e-06, "loss": 0.023, "step": 189275 }, { "epoch": 0.789778938671963, "grad_norm": 0.3103741456628173, "learning_rate": 2.2506750484703295e-06, "loss": 0.0245, "step": 189280 }, { "epoch": 0.7897998013869533, "grad_norm": 0.5822518927824072, "learning_rate": 2.2506453221079357e-06, "loss": 0.0229, "step": 189285 }, { "epoch": 0.7898206641019436, "grad_norm": 0.3265757897237412, "learning_rate": 2.2506155969233658e-06, "loss": 0.0173, "step": 189290 }, { "epoch": 0.7898415268169339, "grad_norm": 0.4826764794716549, "learning_rate": 2.2505858729165427e-06, "loss": 0.0185, "step": 189295 }, { "epoch": 0.7898623895319241, "grad_norm": 0.302447702719847, "learning_rate": 2.250556150087389e-06, "loss": 0.0176, "step": 189300 }, { "epoch": 0.7898832522469144, "grad_norm": 1.0045223651569826, "learning_rate": 2.2505264284358256e-06, "loss": 0.0201, "step": 189305 }, { "epoch": 0.7899041149619047, "grad_norm": 0.6978123774560253, "learning_rate": 2.2504967079617757e-06, "loss": 0.0274, "step": 189310 }, { "epoch": 0.789924977676895, "grad_norm": 0.39091863163917934, "learning_rate": 2.250466988665161e-06, "loss": 0.0209, "step": 189315 }, { "epoch": 0.7899458403918852, "grad_norm": 1.2870497984069846, "learning_rate": 2.2504372705459042e-06, "loss": 0.0167, "step": 189320 }, { "epoch": 0.7899667031068756, "grad_norm": 0.8768272648107076, "learning_rate": 2.250407553603928e-06, "loss": 0.0271, "step": 189325 }, { "epoch": 0.7899875658218658, "grad_norm": 0.6368293272762797, "learning_rate": 2.2503778378391535e-06, "loss": 0.027, "step": 189330 }, { "epoch": 0.790008428536856, "grad_norm": 0.7596823121109274, "learning_rate": 2.2503481232515036e-06, "loss": 0.0203, "step": 189335 }, { "epoch": 0.7900292912518464, "grad_norm": 0.28010259451077035, "learning_rate": 2.2503184098409013e-06, "loss": 0.0141, "step": 189340 }, { "epoch": 0.7900501539668366, "grad_norm": 0.8297070963590938, "learning_rate": 2.250288697607267e-06, "loss": 0.0192, "step": 189345 }, { "epoch": 0.7900710166818269, "grad_norm": 0.3535721276338084, "learning_rate": 2.2502589865505248e-06, "loss": 0.0139, "step": 189350 }, { "epoch": 0.7900918793968171, "grad_norm": 0.6821635197018567, "learning_rate": 2.2502292766705964e-06, "loss": 0.0209, "step": 189355 }, { "epoch": 0.7901127421118075, "grad_norm": 0.5574773302945236, "learning_rate": 2.2501995679674043e-06, "loss": 0.025, "step": 189360 }, { "epoch": 0.7901336048267977, "grad_norm": 0.4748877376905234, "learning_rate": 2.2501698604408705e-06, "loss": 0.0252, "step": 189365 }, { "epoch": 0.790154467541788, "grad_norm": 0.6848164318133401, "learning_rate": 2.2501401540909167e-06, "loss": 0.0179, "step": 189370 }, { "epoch": 0.7901753302567783, "grad_norm": 1.4075391738442278, "learning_rate": 2.2501104489174668e-06, "loss": 0.0231, "step": 189375 }, { "epoch": 0.7901961929717686, "grad_norm": 0.9568976796594918, "learning_rate": 2.250080744920442e-06, "loss": 0.0247, "step": 189380 }, { "epoch": 0.7902170556867588, "grad_norm": 0.5205416392459332, "learning_rate": 2.2500510420997656e-06, "loss": 0.0203, "step": 189385 }, { "epoch": 0.7902379184017492, "grad_norm": 0.7137445853579786, "learning_rate": 2.2500213404553585e-06, "loss": 0.0242, "step": 189390 }, { "epoch": 0.7902587811167394, "grad_norm": 0.8916315998153943, "learning_rate": 2.2499916399871444e-06, "loss": 0.0238, "step": 189395 }, { "epoch": 0.7902796438317297, "grad_norm": 0.8976243793525838, "learning_rate": 2.2499619406950452e-06, "loss": 0.0297, "step": 189400 }, { "epoch": 0.79030050654672, "grad_norm": 1.1894464272698981, "learning_rate": 2.249932242578983e-06, "loss": 0.0204, "step": 189405 }, { "epoch": 0.7903213692617103, "grad_norm": 0.34631417846625384, "learning_rate": 2.2499025456388806e-06, "loss": 0.0172, "step": 189410 }, { "epoch": 0.7903422319767005, "grad_norm": 0.5703470051066438, "learning_rate": 2.2498728498746597e-06, "loss": 0.0251, "step": 189415 }, { "epoch": 0.7903630946916907, "grad_norm": 0.6412194791053655, "learning_rate": 2.2498431552862438e-06, "loss": 0.0264, "step": 189420 }, { "epoch": 0.7903839574066811, "grad_norm": 0.5370367014468084, "learning_rate": 2.2498134618735553e-06, "loss": 0.0207, "step": 189425 }, { "epoch": 0.7904048201216713, "grad_norm": 0.45496865975669426, "learning_rate": 2.2497837696365147e-06, "loss": 0.0155, "step": 189430 }, { "epoch": 0.7904256828366616, "grad_norm": 0.29705094645976404, "learning_rate": 2.249754078575047e-06, "loss": 0.0129, "step": 189435 }, { "epoch": 0.790446545551652, "grad_norm": 0.44685390965450483, "learning_rate": 2.2497243886890725e-06, "loss": 0.0238, "step": 189440 }, { "epoch": 0.7904674082666422, "grad_norm": 7.683321265994139, "learning_rate": 2.2496946999785155e-06, "loss": 0.0217, "step": 189445 }, { "epoch": 0.7904882709816324, "grad_norm": 0.56250259176379, "learning_rate": 2.249665012443297e-06, "loss": 0.0214, "step": 189450 }, { "epoch": 0.7905091336966228, "grad_norm": 0.5725125730493404, "learning_rate": 2.2496353260833396e-06, "loss": 0.0171, "step": 189455 }, { "epoch": 0.790529996411613, "grad_norm": 0.6719648948639106, "learning_rate": 2.2496056408985666e-06, "loss": 0.0257, "step": 189460 }, { "epoch": 0.7905508591266033, "grad_norm": 0.2260244297762044, "learning_rate": 2.2495759568888997e-06, "loss": 0.0176, "step": 189465 }, { "epoch": 0.7905717218415935, "grad_norm": 0.645007375821021, "learning_rate": 2.249546274054262e-06, "loss": 0.0197, "step": 189470 }, { "epoch": 0.7905925845565839, "grad_norm": 0.93777714772447, "learning_rate": 2.249516592394575e-06, "loss": 0.0301, "step": 189475 }, { "epoch": 0.7906134472715741, "grad_norm": 2.4624286213930424, "learning_rate": 2.249486911909763e-06, "loss": 0.0287, "step": 189480 }, { "epoch": 0.7906343099865644, "grad_norm": 0.7654515007193804, "learning_rate": 2.249457232599746e-06, "loss": 0.0196, "step": 189485 }, { "epoch": 0.7906551727015547, "grad_norm": 1.0163533891156624, "learning_rate": 2.2494275544644483e-06, "loss": 0.0202, "step": 189490 }, { "epoch": 0.790676035416545, "grad_norm": 0.6287278304177356, "learning_rate": 2.249397877503792e-06, "loss": 0.017, "step": 189495 }, { "epoch": 0.7906968981315352, "grad_norm": 0.563353037738477, "learning_rate": 2.2493682017176997e-06, "loss": 0.026, "step": 189500 }, { "epoch": 0.7907177608465256, "grad_norm": 0.56685036127424, "learning_rate": 2.2493385271060937e-06, "loss": 0.0221, "step": 189505 }, { "epoch": 0.7907386235615158, "grad_norm": 0.648334106412523, "learning_rate": 2.2493088536688964e-06, "loss": 0.0176, "step": 189510 }, { "epoch": 0.790759486276506, "grad_norm": 1.0303963889399537, "learning_rate": 2.2492791814060307e-06, "loss": 0.0172, "step": 189515 }, { "epoch": 0.7907803489914964, "grad_norm": 0.9020899308467141, "learning_rate": 2.249249510317419e-06, "loss": 0.0228, "step": 189520 }, { "epoch": 0.7908012117064867, "grad_norm": 0.5891072676856914, "learning_rate": 2.2492198404029835e-06, "loss": 0.027, "step": 189525 }, { "epoch": 0.7908220744214769, "grad_norm": 0.46348193230602464, "learning_rate": 2.2491901716626475e-06, "loss": 0.0191, "step": 189530 }, { "epoch": 0.7908429371364671, "grad_norm": 0.2486766396667099, "learning_rate": 2.2491605040963325e-06, "loss": 0.0179, "step": 189535 }, { "epoch": 0.7908637998514575, "grad_norm": 0.7575286474605469, "learning_rate": 2.2491308377039625e-06, "loss": 0.0202, "step": 189540 }, { "epoch": 0.7908846625664477, "grad_norm": 0.45368582120393613, "learning_rate": 2.2491011724854594e-06, "loss": 0.0161, "step": 189545 }, { "epoch": 0.790905525281438, "grad_norm": 0.4169022965169964, "learning_rate": 2.249071508440745e-06, "loss": 0.02, "step": 189550 }, { "epoch": 0.7909263879964283, "grad_norm": 0.5399962227226878, "learning_rate": 2.2490418455697428e-06, "loss": 0.0169, "step": 189555 }, { "epoch": 0.7909472507114186, "grad_norm": 0.5500707314658856, "learning_rate": 2.249012183872375e-06, "loss": 0.0223, "step": 189560 }, { "epoch": 0.7909681134264088, "grad_norm": 0.7051788582656224, "learning_rate": 2.248982523348565e-06, "loss": 0.0201, "step": 189565 }, { "epoch": 0.7909889761413992, "grad_norm": 0.4519256096226205, "learning_rate": 2.248952863998234e-06, "loss": 0.0239, "step": 189570 }, { "epoch": 0.7910098388563894, "grad_norm": 1.3863422432840804, "learning_rate": 2.248923205821306e-06, "loss": 0.0259, "step": 189575 }, { "epoch": 0.7910307015713797, "grad_norm": 0.44742773439861117, "learning_rate": 2.2488935488177037e-06, "loss": 0.0216, "step": 189580 }, { "epoch": 0.79105156428637, "grad_norm": 0.7691042581969111, "learning_rate": 2.248863892987348e-06, "loss": 0.0204, "step": 189585 }, { "epoch": 0.7910724270013603, "grad_norm": 0.7245063876365718, "learning_rate": 2.248834238330163e-06, "loss": 0.0222, "step": 189590 }, { "epoch": 0.7910932897163505, "grad_norm": 0.8591533266338266, "learning_rate": 2.248804584846071e-06, "loss": 0.0198, "step": 189595 }, { "epoch": 0.7911141524313408, "grad_norm": 0.9973831749360577, "learning_rate": 2.248774932534995e-06, "loss": 0.0197, "step": 189600 }, { "epoch": 0.7911350151463311, "grad_norm": 0.513993918739101, "learning_rate": 2.248745281396857e-06, "loss": 0.0168, "step": 189605 }, { "epoch": 0.7911558778613214, "grad_norm": 0.6066681863384614, "learning_rate": 2.2487156314315796e-06, "loss": 0.0195, "step": 189610 }, { "epoch": 0.7911767405763116, "grad_norm": 0.453927971073502, "learning_rate": 2.248685982639087e-06, "loss": 0.0219, "step": 189615 }, { "epoch": 0.791197603291302, "grad_norm": 0.8395397370700798, "learning_rate": 2.2486563350192995e-06, "loss": 0.02, "step": 189620 }, { "epoch": 0.7912184660062922, "grad_norm": 0.6071855473800027, "learning_rate": 2.248626688572142e-06, "loss": 0.0184, "step": 189625 }, { "epoch": 0.7912393287212824, "grad_norm": 0.46922960095709493, "learning_rate": 2.2485970432975363e-06, "loss": 0.0271, "step": 189630 }, { "epoch": 0.7912601914362728, "grad_norm": 0.8897828668877292, "learning_rate": 2.2485673991954045e-06, "loss": 0.0257, "step": 189635 }, { "epoch": 0.791281054151263, "grad_norm": 0.46862325136483945, "learning_rate": 2.24853775626567e-06, "loss": 0.0203, "step": 189640 }, { "epoch": 0.7913019168662533, "grad_norm": 0.6973798942193563, "learning_rate": 2.2485081145082555e-06, "loss": 0.0218, "step": 189645 }, { "epoch": 0.7913227795812435, "grad_norm": 0.6592638023983983, "learning_rate": 2.2484784739230837e-06, "loss": 0.0225, "step": 189650 }, { "epoch": 0.7913436422962339, "grad_norm": 0.8085958466605822, "learning_rate": 2.2484488345100776e-06, "loss": 0.0262, "step": 189655 }, { "epoch": 0.7913645050112241, "grad_norm": 0.4385306420099324, "learning_rate": 2.2484191962691592e-06, "loss": 0.0212, "step": 189660 }, { "epoch": 0.7913853677262144, "grad_norm": 1.4942969485730266, "learning_rate": 2.248389559200252e-06, "loss": 0.0283, "step": 189665 }, { "epoch": 0.7914062304412047, "grad_norm": 1.0851563170351246, "learning_rate": 2.248359923303278e-06, "loss": 0.0215, "step": 189670 }, { "epoch": 0.791427093156195, "grad_norm": 0.5662713605058891, "learning_rate": 2.2483302885781604e-06, "loss": 0.0179, "step": 189675 }, { "epoch": 0.7914479558711852, "grad_norm": 0.5639897172627074, "learning_rate": 2.2483006550248224e-06, "loss": 0.0203, "step": 189680 }, { "epoch": 0.7914688185861756, "grad_norm": 0.7700962554203774, "learning_rate": 2.2482710226431863e-06, "loss": 0.0207, "step": 189685 }, { "epoch": 0.7914896813011658, "grad_norm": 1.2809448589261847, "learning_rate": 2.248241391433175e-06, "loss": 0.0258, "step": 189690 }, { "epoch": 0.7915105440161561, "grad_norm": 0.7286698970071273, "learning_rate": 2.2482117613947107e-06, "loss": 0.0213, "step": 189695 }, { "epoch": 0.7915314067311464, "grad_norm": 0.7157851757637068, "learning_rate": 2.2481821325277175e-06, "loss": 0.0189, "step": 189700 }, { "epoch": 0.7915522694461367, "grad_norm": 0.4861675151974322, "learning_rate": 2.248152504832117e-06, "loss": 0.0192, "step": 189705 }, { "epoch": 0.7915731321611269, "grad_norm": 0.5776530088601799, "learning_rate": 2.2481228783078324e-06, "loss": 0.0264, "step": 189710 }, { "epoch": 0.7915939948761171, "grad_norm": 0.5946738887936187, "learning_rate": 2.248093252954787e-06, "loss": 0.0194, "step": 189715 }, { "epoch": 0.7916148575911075, "grad_norm": 0.46544753095757035, "learning_rate": 2.2480636287729026e-06, "loss": 0.015, "step": 189720 }, { "epoch": 0.7916357203060977, "grad_norm": 0.5047810545367007, "learning_rate": 2.2480340057621034e-06, "loss": 0.0298, "step": 189725 }, { "epoch": 0.791656583021088, "grad_norm": 0.7345776559188981, "learning_rate": 2.2480043839223107e-06, "loss": 0.0201, "step": 189730 }, { "epoch": 0.7916774457360783, "grad_norm": 0.6215346023445695, "learning_rate": 2.2479747632534487e-06, "loss": 0.024, "step": 189735 }, { "epoch": 0.7916983084510686, "grad_norm": 0.6578616308966116, "learning_rate": 2.2479451437554395e-06, "loss": 0.0283, "step": 189740 }, { "epoch": 0.7917191711660588, "grad_norm": 0.5237623662536581, "learning_rate": 2.2479155254282065e-06, "loss": 0.02, "step": 189745 }, { "epoch": 0.7917400338810492, "grad_norm": 1.581579208280263, "learning_rate": 2.247885908271672e-06, "loss": 0.0242, "step": 189750 }, { "epoch": 0.7917608965960394, "grad_norm": 0.663177818949394, "learning_rate": 2.2478562922857592e-06, "loss": 0.0338, "step": 189755 }, { "epoch": 0.7917817593110297, "grad_norm": 1.1491440541056925, "learning_rate": 2.247826677470391e-06, "loss": 0.0267, "step": 189760 }, { "epoch": 0.79180262202602, "grad_norm": 0.8094066769595089, "learning_rate": 2.24779706382549e-06, "loss": 0.0234, "step": 189765 }, { "epoch": 0.7918234847410103, "grad_norm": 0.3488005649015137, "learning_rate": 2.2477674513509796e-06, "loss": 0.0213, "step": 189770 }, { "epoch": 0.7918443474560005, "grad_norm": 0.707453477441539, "learning_rate": 2.2477378400467818e-06, "loss": 0.0205, "step": 189775 }, { "epoch": 0.7918652101709908, "grad_norm": 0.43453883064295407, "learning_rate": 2.2477082299128205e-06, "loss": 0.0152, "step": 189780 }, { "epoch": 0.7918860728859811, "grad_norm": 0.790014076297329, "learning_rate": 2.2476786209490187e-06, "loss": 0.03, "step": 189785 }, { "epoch": 0.7919069356009714, "grad_norm": 0.5423519351870785, "learning_rate": 2.2476490131552985e-06, "loss": 0.0166, "step": 189790 }, { "epoch": 0.7919277983159616, "grad_norm": 0.7558897266290756, "learning_rate": 2.2476194065315832e-06, "loss": 0.0206, "step": 189795 }, { "epoch": 0.791948661030952, "grad_norm": 0.9051788020094561, "learning_rate": 2.2475898010777958e-06, "loss": 0.0221, "step": 189800 }, { "epoch": 0.7919695237459422, "grad_norm": 0.621676428282822, "learning_rate": 2.247560196793859e-06, "loss": 0.0226, "step": 189805 }, { "epoch": 0.7919903864609324, "grad_norm": 1.0763158024603288, "learning_rate": 2.247530593679696e-06, "loss": 0.0263, "step": 189810 }, { "epoch": 0.7920112491759228, "grad_norm": 0.725665553611367, "learning_rate": 2.2475009917352305e-06, "loss": 0.0166, "step": 189815 }, { "epoch": 0.792032111890913, "grad_norm": 0.7970854873295643, "learning_rate": 2.247471390960384e-06, "loss": 0.0264, "step": 189820 }, { "epoch": 0.7920529746059033, "grad_norm": 1.3055146023237452, "learning_rate": 2.2474417913550804e-06, "loss": 0.0281, "step": 189825 }, { "epoch": 0.7920738373208935, "grad_norm": 0.6288777659008528, "learning_rate": 2.2474121929192425e-06, "loss": 0.0148, "step": 189830 }, { "epoch": 0.7920947000358839, "grad_norm": 0.3833320881284715, "learning_rate": 2.2473825956527934e-06, "loss": 0.0242, "step": 189835 }, { "epoch": 0.7921155627508741, "grad_norm": 0.5602788887448898, "learning_rate": 2.2473529995556555e-06, "loss": 0.024, "step": 189840 }, { "epoch": 0.7921364254658644, "grad_norm": 0.6836802936516071, "learning_rate": 2.247323404627753e-06, "loss": 0.025, "step": 189845 }, { "epoch": 0.7921572881808547, "grad_norm": 0.42437634312209704, "learning_rate": 2.247293810869008e-06, "loss": 0.021, "step": 189850 }, { "epoch": 0.792178150895845, "grad_norm": 0.8520688018563248, "learning_rate": 2.2472642182793435e-06, "loss": 0.0251, "step": 189855 }, { "epoch": 0.7921990136108352, "grad_norm": 1.5250595320261886, "learning_rate": 2.2472346268586825e-06, "loss": 0.0203, "step": 189860 }, { "epoch": 0.7922198763258256, "grad_norm": 0.30035263676756646, "learning_rate": 2.2472050366069486e-06, "loss": 0.0127, "step": 189865 }, { "epoch": 0.7922407390408158, "grad_norm": 1.0329330083401436, "learning_rate": 2.2471754475240646e-06, "loss": 0.0207, "step": 189870 }, { "epoch": 0.7922616017558061, "grad_norm": 0.2632637835027508, "learning_rate": 2.2471458596099536e-06, "loss": 0.0157, "step": 189875 }, { "epoch": 0.7922824644707964, "grad_norm": 0.7333075236902179, "learning_rate": 2.2471162728645384e-06, "loss": 0.0227, "step": 189880 }, { "epoch": 0.7923033271857867, "grad_norm": 0.6683309217280092, "learning_rate": 2.247086687287742e-06, "loss": 0.0321, "step": 189885 }, { "epoch": 0.7923241899007769, "grad_norm": 0.5779133531195146, "learning_rate": 2.2470571028794875e-06, "loss": 0.0178, "step": 189890 }, { "epoch": 0.7923450526157672, "grad_norm": 0.6960254522265916, "learning_rate": 2.2470275196396986e-06, "loss": 0.0236, "step": 189895 }, { "epoch": 0.7923659153307575, "grad_norm": 0.8326884790978303, "learning_rate": 2.246997937568298e-06, "loss": 0.0229, "step": 189900 }, { "epoch": 0.7923867780457478, "grad_norm": 0.4883146256937729, "learning_rate": 2.246968356665209e-06, "loss": 0.0248, "step": 189905 }, { "epoch": 0.792407640760738, "grad_norm": 0.4155096076523042, "learning_rate": 2.246938776930354e-06, "loss": 0.017, "step": 189910 }, { "epoch": 0.7924285034757284, "grad_norm": 0.984505663819853, "learning_rate": 2.2469091983636564e-06, "loss": 0.0219, "step": 189915 }, { "epoch": 0.7924493661907186, "grad_norm": 0.4264434485251496, "learning_rate": 2.2468796209650397e-06, "loss": 0.0209, "step": 189920 }, { "epoch": 0.7924702289057088, "grad_norm": 0.8000611521593098, "learning_rate": 2.2468500447344268e-06, "loss": 0.0164, "step": 189925 }, { "epoch": 0.7924910916206992, "grad_norm": 0.627353674982105, "learning_rate": 2.2468204696717406e-06, "loss": 0.0181, "step": 189930 }, { "epoch": 0.7925119543356894, "grad_norm": 0.5739681435249029, "learning_rate": 2.246790895776904e-06, "loss": 0.0237, "step": 189935 }, { "epoch": 0.7925328170506797, "grad_norm": 1.1101365848325573, "learning_rate": 2.2467613230498418e-06, "loss": 0.0267, "step": 189940 }, { "epoch": 0.79255367976567, "grad_norm": 0.5810936870794711, "learning_rate": 2.2467317514904754e-06, "loss": 0.0214, "step": 189945 }, { "epoch": 0.7925745424806603, "grad_norm": 0.7046465829031728, "learning_rate": 2.246702181098728e-06, "loss": 0.0175, "step": 189950 }, { "epoch": 0.7925954051956505, "grad_norm": 0.38265842331990363, "learning_rate": 2.2466726118745233e-06, "loss": 0.0192, "step": 189955 }, { "epoch": 0.7926162679106408, "grad_norm": 0.5714316932090028, "learning_rate": 2.2466430438177847e-06, "loss": 0.0233, "step": 189960 }, { "epoch": 0.7926371306256311, "grad_norm": 0.4292206652591691, "learning_rate": 2.2466134769284353e-06, "loss": 0.0253, "step": 189965 }, { "epoch": 0.7926579933406214, "grad_norm": 0.6199234368844371, "learning_rate": 2.246583911206398e-06, "loss": 0.0169, "step": 189970 }, { "epoch": 0.7926788560556116, "grad_norm": 0.5708695497115817, "learning_rate": 2.246554346651596e-06, "loss": 0.0191, "step": 189975 }, { "epoch": 0.792699718770602, "grad_norm": 0.9975033766576136, "learning_rate": 2.2465247832639526e-06, "loss": 0.0218, "step": 189980 }, { "epoch": 0.7927205814855922, "grad_norm": 0.6415200439439693, "learning_rate": 2.246495221043391e-06, "loss": 0.0224, "step": 189985 }, { "epoch": 0.7927414442005825, "grad_norm": 0.5686382448944726, "learning_rate": 2.2464656599898347e-06, "loss": 0.0177, "step": 189990 }, { "epoch": 0.7927623069155728, "grad_norm": 0.6747932355015862, "learning_rate": 2.246436100103206e-06, "loss": 0.0202, "step": 189995 }, { "epoch": 0.7927831696305631, "grad_norm": 0.6583719571199893, "learning_rate": 2.2464065413834286e-06, "loss": 0.0215, "step": 190000 }, { "epoch": 0.7928040323455533, "grad_norm": 0.669436792748192, "learning_rate": 2.2463769838304267e-06, "loss": 0.0222, "step": 190005 }, { "epoch": 0.7928248950605435, "grad_norm": 0.711964080349059, "learning_rate": 2.246347427444122e-06, "loss": 0.0184, "step": 190010 }, { "epoch": 0.7928457577755339, "grad_norm": 0.6745117697019448, "learning_rate": 2.246317872224439e-06, "loss": 0.02, "step": 190015 }, { "epoch": 0.7928666204905241, "grad_norm": 0.6743609747247515, "learning_rate": 2.2462883181713003e-06, "loss": 0.0234, "step": 190020 }, { "epoch": 0.7928874832055144, "grad_norm": 1.1379756163794252, "learning_rate": 2.246258765284629e-06, "loss": 0.026, "step": 190025 }, { "epoch": 0.7929083459205047, "grad_norm": 0.6339473410044281, "learning_rate": 2.2462292135643494e-06, "loss": 0.0225, "step": 190030 }, { "epoch": 0.792929208635495, "grad_norm": 0.41046583872000303, "learning_rate": 2.2461996630103833e-06, "loss": 0.0195, "step": 190035 }, { "epoch": 0.7929500713504852, "grad_norm": 0.5161719858890279, "learning_rate": 2.246170113622655e-06, "loss": 0.0223, "step": 190040 }, { "epoch": 0.7929709340654756, "grad_norm": 1.2039036027568426, "learning_rate": 2.246140565401088e-06, "loss": 0.0119, "step": 190045 }, { "epoch": 0.7929917967804658, "grad_norm": 1.0846384783636551, "learning_rate": 2.246111018345604e-06, "loss": 0.0175, "step": 190050 }, { "epoch": 0.7930126594954561, "grad_norm": 0.7171639366675268, "learning_rate": 2.246081472456128e-06, "loss": 0.0215, "step": 190055 }, { "epoch": 0.7930335222104464, "grad_norm": 0.5795972217693742, "learning_rate": 2.2460519277325833e-06, "loss": 0.0156, "step": 190060 }, { "epoch": 0.7930543849254367, "grad_norm": 0.6892505950348906, "learning_rate": 2.246022384174892e-06, "loss": 0.0211, "step": 190065 }, { "epoch": 0.7930752476404269, "grad_norm": 0.33491285066121074, "learning_rate": 2.2459928417829777e-06, "loss": 0.02, "step": 190070 }, { "epoch": 0.7930961103554172, "grad_norm": 0.8253626175692454, "learning_rate": 2.245963300556765e-06, "loss": 0.0302, "step": 190075 }, { "epoch": 0.7931169730704075, "grad_norm": 0.6701496406662832, "learning_rate": 2.245933760496176e-06, "loss": 0.0279, "step": 190080 }, { "epoch": 0.7931378357853978, "grad_norm": 0.47302930946630667, "learning_rate": 2.245904221601134e-06, "loss": 0.0257, "step": 190085 }, { "epoch": 0.793158698500388, "grad_norm": 0.8126521831756931, "learning_rate": 2.245874683871563e-06, "loss": 0.0277, "step": 190090 }, { "epoch": 0.7931795612153784, "grad_norm": 0.7234964702872416, "learning_rate": 2.2458451473073867e-06, "loss": 0.0217, "step": 190095 }, { "epoch": 0.7932004239303686, "grad_norm": 0.6813251568140554, "learning_rate": 2.2458156119085273e-06, "loss": 0.0204, "step": 190100 }, { "epoch": 0.7932212866453588, "grad_norm": 0.501686635569901, "learning_rate": 2.2457860776749086e-06, "loss": 0.0211, "step": 190105 }, { "epoch": 0.7932421493603492, "grad_norm": 0.665169442393658, "learning_rate": 2.2457565446064545e-06, "loss": 0.0214, "step": 190110 }, { "epoch": 0.7932630120753394, "grad_norm": 0.630366463898327, "learning_rate": 2.2457270127030874e-06, "loss": 0.0146, "step": 190115 }, { "epoch": 0.7932838747903297, "grad_norm": 0.5998133378396548, "learning_rate": 2.245697481964732e-06, "loss": 0.0233, "step": 190120 }, { "epoch": 0.79330473750532, "grad_norm": 0.3789857147552715, "learning_rate": 2.2456679523913107e-06, "loss": 0.0199, "step": 190125 }, { "epoch": 0.7933256002203103, "grad_norm": 1.0151559616456098, "learning_rate": 2.2456384239827483e-06, "loss": 0.0184, "step": 190130 }, { "epoch": 0.7933464629353005, "grad_norm": 0.6094865153847658, "learning_rate": 2.2456088967389658e-06, "loss": 0.0142, "step": 190135 }, { "epoch": 0.7933673256502908, "grad_norm": 0.827941152940805, "learning_rate": 2.2455793706598883e-06, "loss": 0.0322, "step": 190140 }, { "epoch": 0.7933881883652811, "grad_norm": 1.216935960890908, "learning_rate": 2.245549845745439e-06, "loss": 0.0232, "step": 190145 }, { "epoch": 0.7934090510802714, "grad_norm": 0.7797334583700901, "learning_rate": 2.2455203219955414e-06, "loss": 0.0204, "step": 190150 }, { "epoch": 0.7934299137952616, "grad_norm": 1.14898284604281, "learning_rate": 2.2454907994101186e-06, "loss": 0.0133, "step": 190155 }, { "epoch": 0.793450776510252, "grad_norm": 0.9973057350057529, "learning_rate": 2.2454612779890943e-06, "loss": 0.0151, "step": 190160 }, { "epoch": 0.7934716392252422, "grad_norm": 1.0856451203306572, "learning_rate": 2.245431757732392e-06, "loss": 0.0241, "step": 190165 }, { "epoch": 0.7934925019402325, "grad_norm": 0.8156818598086745, "learning_rate": 2.2454022386399355e-06, "loss": 0.0175, "step": 190170 }, { "epoch": 0.7935133646552228, "grad_norm": 0.7737050463892767, "learning_rate": 2.2453727207116473e-06, "loss": 0.0238, "step": 190175 }, { "epoch": 0.7935342273702131, "grad_norm": 0.8825298092310327, "learning_rate": 2.2453432039474517e-06, "loss": 0.0236, "step": 190180 }, { "epoch": 0.7935550900852033, "grad_norm": 1.9476904427140749, "learning_rate": 2.2453136883472717e-06, "loss": 0.0163, "step": 190185 }, { "epoch": 0.7935759528001936, "grad_norm": 0.6393586728424587, "learning_rate": 2.2452841739110314e-06, "loss": 0.0161, "step": 190190 }, { "epoch": 0.7935968155151839, "grad_norm": 0.34305350010115165, "learning_rate": 2.2452546606386537e-06, "loss": 0.0268, "step": 190195 }, { "epoch": 0.7936176782301742, "grad_norm": 0.7107696530120381, "learning_rate": 2.245225148530062e-06, "loss": 0.0254, "step": 190200 }, { "epoch": 0.7936385409451644, "grad_norm": 0.5893189163861503, "learning_rate": 2.24519563758518e-06, "loss": 0.0216, "step": 190205 }, { "epoch": 0.7936594036601547, "grad_norm": 1.028933211713298, "learning_rate": 2.245166127803932e-06, "loss": 0.0237, "step": 190210 }, { "epoch": 0.793680266375145, "grad_norm": 0.7978131653788435, "learning_rate": 2.2451366191862412e-06, "loss": 0.0298, "step": 190215 }, { "epoch": 0.7937011290901352, "grad_norm": 1.093330888963483, "learning_rate": 2.24510711173203e-06, "loss": 0.0312, "step": 190220 }, { "epoch": 0.7937219918051256, "grad_norm": 0.8597294715738818, "learning_rate": 2.245077605441223e-06, "loss": 0.0247, "step": 190225 }, { "epoch": 0.7937428545201158, "grad_norm": 0.506706384870239, "learning_rate": 2.245048100313744e-06, "loss": 0.027, "step": 190230 }, { "epoch": 0.7937637172351061, "grad_norm": 0.47626639373046337, "learning_rate": 2.245018596349516e-06, "loss": 0.0156, "step": 190235 }, { "epoch": 0.7937845799500964, "grad_norm": 0.5109458689902308, "learning_rate": 2.244989093548462e-06, "loss": 0.021, "step": 190240 }, { "epoch": 0.7938054426650867, "grad_norm": 0.438654919491911, "learning_rate": 2.2449595919105076e-06, "loss": 0.0212, "step": 190245 }, { "epoch": 0.7938263053800769, "grad_norm": 0.16674775515038748, "learning_rate": 2.244930091435574e-06, "loss": 0.0247, "step": 190250 }, { "epoch": 0.7938471680950672, "grad_norm": 0.4522293154554733, "learning_rate": 2.2449005921235854e-06, "loss": 0.0163, "step": 190255 }, { "epoch": 0.7938680308100575, "grad_norm": 0.6579447706094821, "learning_rate": 2.2448710939744665e-06, "loss": 0.0248, "step": 190260 }, { "epoch": 0.7938888935250478, "grad_norm": 0.41499088111025817, "learning_rate": 2.2448415969881404e-06, "loss": 0.0206, "step": 190265 }, { "epoch": 0.793909756240038, "grad_norm": 0.7015379844341019, "learning_rate": 2.24481210116453e-06, "loss": 0.0196, "step": 190270 }, { "epoch": 0.7939306189550284, "grad_norm": 0.6288088935553428, "learning_rate": 2.2447826065035593e-06, "loss": 0.0159, "step": 190275 }, { "epoch": 0.7939514816700186, "grad_norm": 0.3818583256398646, "learning_rate": 2.2447531130051527e-06, "loss": 0.0206, "step": 190280 }, { "epoch": 0.7939723443850089, "grad_norm": 0.5710941338564073, "learning_rate": 2.244723620669233e-06, "loss": 0.0174, "step": 190285 }, { "epoch": 0.7939932070999992, "grad_norm": 0.3695513050685389, "learning_rate": 2.2446941294957236e-06, "loss": 0.0152, "step": 190290 }, { "epoch": 0.7940140698149895, "grad_norm": 1.5953869727458854, "learning_rate": 2.2446646394845486e-06, "loss": 0.0246, "step": 190295 }, { "epoch": 0.7940349325299797, "grad_norm": 0.7140163691136623, "learning_rate": 2.244635150635632e-06, "loss": 0.0248, "step": 190300 }, { "epoch": 0.79405579524497, "grad_norm": 0.3466980133464745, "learning_rate": 2.244605662948897e-06, "loss": 0.0144, "step": 190305 }, { "epoch": 0.7940766579599603, "grad_norm": 0.680665206525195, "learning_rate": 2.2445761764242674e-06, "loss": 0.0203, "step": 190310 }, { "epoch": 0.7940975206749505, "grad_norm": 1.1085963566830124, "learning_rate": 2.2445466910616665e-06, "loss": 0.0287, "step": 190315 }, { "epoch": 0.7941183833899408, "grad_norm": 0.6598222168922645, "learning_rate": 2.244517206861018e-06, "loss": 0.0183, "step": 190320 }, { "epoch": 0.7941392461049311, "grad_norm": 0.6472103162973771, "learning_rate": 2.244487723822247e-06, "loss": 0.0201, "step": 190325 }, { "epoch": 0.7941601088199214, "grad_norm": 0.9860927706442669, "learning_rate": 2.2444582419452747e-06, "loss": 0.023, "step": 190330 }, { "epoch": 0.7941809715349116, "grad_norm": 0.5904719596433597, "learning_rate": 2.244428761230027e-06, "loss": 0.0256, "step": 190335 }, { "epoch": 0.794201834249902, "grad_norm": 0.3360860974859442, "learning_rate": 2.244399281676427e-06, "loss": 0.0166, "step": 190340 }, { "epoch": 0.7942226969648922, "grad_norm": 1.001448301412178, "learning_rate": 2.2443698032843967e-06, "loss": 0.0183, "step": 190345 }, { "epoch": 0.7942435596798825, "grad_norm": 0.36447824260089856, "learning_rate": 2.244340326053863e-06, "loss": 0.0145, "step": 190350 }, { "epoch": 0.7942644223948728, "grad_norm": 0.6085185686865711, "learning_rate": 2.2443108499847467e-06, "loss": 0.019, "step": 190355 }, { "epoch": 0.7942852851098631, "grad_norm": 1.2282539951598326, "learning_rate": 2.244281375076974e-06, "loss": 0.0225, "step": 190360 }, { "epoch": 0.7943061478248533, "grad_norm": 1.3452065353972151, "learning_rate": 2.2442519013304663e-06, "loss": 0.0173, "step": 190365 }, { "epoch": 0.7943270105398436, "grad_norm": 0.49587970503594264, "learning_rate": 2.244222428745149e-06, "loss": 0.0213, "step": 190370 }, { "epoch": 0.7943478732548339, "grad_norm": 0.20752166182446521, "learning_rate": 2.2441929573209452e-06, "loss": 0.018, "step": 190375 }, { "epoch": 0.7943687359698242, "grad_norm": 0.26757785151894037, "learning_rate": 2.2441634870577785e-06, "loss": 0.0235, "step": 190380 }, { "epoch": 0.7943895986848144, "grad_norm": 0.27674310312019096, "learning_rate": 2.2441340179555733e-06, "loss": 0.0278, "step": 190385 }, { "epoch": 0.7944104613998048, "grad_norm": 0.5583899257413586, "learning_rate": 2.2441045500142526e-06, "loss": 0.0188, "step": 190390 }, { "epoch": 0.794431324114795, "grad_norm": 0.7536674675295673, "learning_rate": 2.244075083233741e-06, "loss": 0.0187, "step": 190395 }, { "epoch": 0.7944521868297852, "grad_norm": 0.4926000197718085, "learning_rate": 2.244045617613962e-06, "loss": 0.0171, "step": 190400 }, { "epoch": 0.7944730495447756, "grad_norm": 0.6984540859886089, "learning_rate": 2.244016153154839e-06, "loss": 0.0167, "step": 190405 }, { "epoch": 0.7944939122597658, "grad_norm": 0.6249850959264875, "learning_rate": 2.2439866898562956e-06, "loss": 0.0194, "step": 190410 }, { "epoch": 0.7945147749747561, "grad_norm": 0.6188963880563483, "learning_rate": 2.243957227718257e-06, "loss": 0.0211, "step": 190415 }, { "epoch": 0.7945356376897464, "grad_norm": 0.7178546996954981, "learning_rate": 2.243927766740646e-06, "loss": 0.0148, "step": 190420 }, { "epoch": 0.7945565004047367, "grad_norm": 0.4454173082425772, "learning_rate": 2.243898306923386e-06, "loss": 0.0204, "step": 190425 }, { "epoch": 0.7945773631197269, "grad_norm": 0.5810271705064216, "learning_rate": 2.243868848266402e-06, "loss": 0.0184, "step": 190430 }, { "epoch": 0.7945982258347172, "grad_norm": 0.8499858709903572, "learning_rate": 2.2438393907696162e-06, "loss": 0.0256, "step": 190435 }, { "epoch": 0.7946190885497075, "grad_norm": 0.28360440551158056, "learning_rate": 2.2438099344329544e-06, "loss": 0.0186, "step": 190440 }, { "epoch": 0.7946399512646978, "grad_norm": 0.6440932561812244, "learning_rate": 2.2437804792563393e-06, "loss": 0.0183, "step": 190445 }, { "epoch": 0.794660813979688, "grad_norm": 0.5965728126466913, "learning_rate": 2.2437510252396948e-06, "loss": 0.0186, "step": 190450 }, { "epoch": 0.7946816766946784, "grad_norm": 0.6725415451144758, "learning_rate": 2.243721572382945e-06, "loss": 0.0268, "step": 190455 }, { "epoch": 0.7947025394096686, "grad_norm": 1.4252356769807755, "learning_rate": 2.2436921206860137e-06, "loss": 0.0258, "step": 190460 }, { "epoch": 0.7947234021246589, "grad_norm": 0.7887865742615903, "learning_rate": 2.243662670148825e-06, "loss": 0.0241, "step": 190465 }, { "epoch": 0.7947442648396492, "grad_norm": 0.5452443697032789, "learning_rate": 2.243633220771303e-06, "loss": 0.0204, "step": 190470 }, { "epoch": 0.7947651275546395, "grad_norm": 1.0157659080396169, "learning_rate": 2.24360377255337e-06, "loss": 0.0234, "step": 190475 }, { "epoch": 0.7947859902696297, "grad_norm": 0.550676238206077, "learning_rate": 2.243574325494952e-06, "loss": 0.0278, "step": 190480 }, { "epoch": 0.7948068529846201, "grad_norm": 1.1403658772005743, "learning_rate": 2.2435448795959716e-06, "loss": 0.0301, "step": 190485 }, { "epoch": 0.7948277156996103, "grad_norm": 0.45001045158831243, "learning_rate": 2.2435154348563538e-06, "loss": 0.017, "step": 190490 }, { "epoch": 0.7948485784146005, "grad_norm": 0.6477767973965546, "learning_rate": 2.243485991276021e-06, "loss": 0.02, "step": 190495 }, { "epoch": 0.7948694411295908, "grad_norm": 0.8570008367145132, "learning_rate": 2.243456548854898e-06, "loss": 0.0437, "step": 190500 }, { "epoch": 0.7948903038445811, "grad_norm": 0.5136823673722833, "learning_rate": 2.2434271075929093e-06, "loss": 0.0207, "step": 190505 }, { "epoch": 0.7949111665595714, "grad_norm": 0.6302800492138109, "learning_rate": 2.2433976674899777e-06, "loss": 0.0165, "step": 190510 }, { "epoch": 0.7949320292745616, "grad_norm": 0.8212454114760455, "learning_rate": 2.2433682285460283e-06, "loss": 0.0215, "step": 190515 }, { "epoch": 0.794952891989552, "grad_norm": 0.2831408674685934, "learning_rate": 2.2433387907609842e-06, "loss": 0.0152, "step": 190520 }, { "epoch": 0.7949737547045422, "grad_norm": 0.5438098577194762, "learning_rate": 2.2433093541347693e-06, "loss": 0.0154, "step": 190525 }, { "epoch": 0.7949946174195325, "grad_norm": 0.6433810586862353, "learning_rate": 2.2432799186673078e-06, "loss": 0.0203, "step": 190530 }, { "epoch": 0.7950154801345228, "grad_norm": 0.45254565512998074, "learning_rate": 2.243250484358524e-06, "loss": 0.0238, "step": 190535 }, { "epoch": 0.7950363428495131, "grad_norm": 0.5473054935878238, "learning_rate": 2.243221051208342e-06, "loss": 0.0268, "step": 190540 }, { "epoch": 0.7950572055645033, "grad_norm": 0.9099365151853744, "learning_rate": 2.243191619216685e-06, "loss": 0.0282, "step": 190545 }, { "epoch": 0.7950780682794936, "grad_norm": 0.8213536579904056, "learning_rate": 2.2431621883834777e-06, "loss": 0.0222, "step": 190550 }, { "epoch": 0.7950989309944839, "grad_norm": 0.35358401311933585, "learning_rate": 2.243132758708644e-06, "loss": 0.0179, "step": 190555 }, { "epoch": 0.7951197937094742, "grad_norm": 1.1470917179381175, "learning_rate": 2.243103330192108e-06, "loss": 0.0227, "step": 190560 }, { "epoch": 0.7951406564244644, "grad_norm": 0.8353772578721932, "learning_rate": 2.2430739028337927e-06, "loss": 0.0197, "step": 190565 }, { "epoch": 0.7951615191394548, "grad_norm": 0.3783114257777668, "learning_rate": 2.2430444766336234e-06, "loss": 0.0179, "step": 190570 }, { "epoch": 0.795182381854445, "grad_norm": 0.7225354124872383, "learning_rate": 2.2430150515915234e-06, "loss": 0.0191, "step": 190575 }, { "epoch": 0.7952032445694353, "grad_norm": 0.43847350063627477, "learning_rate": 2.242985627707418e-06, "loss": 0.0179, "step": 190580 }, { "epoch": 0.7952241072844256, "grad_norm": 0.7692749867745567, "learning_rate": 2.242956204981229e-06, "loss": 0.0239, "step": 190585 }, { "epoch": 0.7952449699994159, "grad_norm": 0.6527985757152213, "learning_rate": 2.2429267834128824e-06, "loss": 0.0212, "step": 190590 }, { "epoch": 0.7952658327144061, "grad_norm": 0.5822312207747672, "learning_rate": 2.242897363002301e-06, "loss": 0.0188, "step": 190595 }, { "epoch": 0.7952866954293965, "grad_norm": 0.8501451993821201, "learning_rate": 2.24286794374941e-06, "loss": 0.0208, "step": 190600 }, { "epoch": 0.7953075581443867, "grad_norm": 0.45922056317640336, "learning_rate": 2.2428385256541325e-06, "loss": 0.0162, "step": 190605 }, { "epoch": 0.7953284208593769, "grad_norm": 1.059555826392164, "learning_rate": 2.242809108716393e-06, "loss": 0.0185, "step": 190610 }, { "epoch": 0.7953492835743672, "grad_norm": 0.4195248988394386, "learning_rate": 2.242779692936116e-06, "loss": 0.0187, "step": 190615 }, { "epoch": 0.7953701462893575, "grad_norm": 1.2058330905361097, "learning_rate": 2.2427502783132245e-06, "loss": 0.0226, "step": 190620 }, { "epoch": 0.7953910090043478, "grad_norm": 0.9337555157191811, "learning_rate": 2.2427208648476445e-06, "loss": 0.0216, "step": 190625 }, { "epoch": 0.795411871719338, "grad_norm": 0.6963295125340544, "learning_rate": 2.242691452539298e-06, "loss": 0.0199, "step": 190630 }, { "epoch": 0.7954327344343284, "grad_norm": 0.4193238682001079, "learning_rate": 2.2426620413881105e-06, "loss": 0.0227, "step": 190635 }, { "epoch": 0.7954535971493186, "grad_norm": 0.4929921966172393, "learning_rate": 2.242632631394005e-06, "loss": 0.0172, "step": 190640 }, { "epoch": 0.7954744598643089, "grad_norm": 0.9391810002117578, "learning_rate": 2.2426032225569068e-06, "loss": 0.0197, "step": 190645 }, { "epoch": 0.7954953225792992, "grad_norm": 0.4394947662899224, "learning_rate": 2.2425738148767393e-06, "loss": 0.0229, "step": 190650 }, { "epoch": 0.7955161852942895, "grad_norm": 0.37234016483185156, "learning_rate": 2.2425444083534273e-06, "loss": 0.0231, "step": 190655 }, { "epoch": 0.7955370480092797, "grad_norm": 0.4089133253685468, "learning_rate": 2.2425150029868943e-06, "loss": 0.025, "step": 190660 }, { "epoch": 0.7955579107242701, "grad_norm": 0.42425532068209887, "learning_rate": 2.242485598777064e-06, "loss": 0.0268, "step": 190665 }, { "epoch": 0.7955787734392603, "grad_norm": 0.34586688104767593, "learning_rate": 2.242456195723862e-06, "loss": 0.0201, "step": 190670 }, { "epoch": 0.7955996361542506, "grad_norm": 0.8345124329638827, "learning_rate": 2.242426793827212e-06, "loss": 0.0205, "step": 190675 }, { "epoch": 0.7956204988692408, "grad_norm": 0.40676810447074585, "learning_rate": 2.242397393087037e-06, "loss": 0.0232, "step": 190680 }, { "epoch": 0.7956413615842312, "grad_norm": 0.7419134340459614, "learning_rate": 2.2423679935032627e-06, "loss": 0.0208, "step": 190685 }, { "epoch": 0.7956622242992214, "grad_norm": 0.47852506446771603, "learning_rate": 2.2423385950758125e-06, "loss": 0.0187, "step": 190690 }, { "epoch": 0.7956830870142116, "grad_norm": 1.0891515410170742, "learning_rate": 2.242309197804611e-06, "loss": 0.0229, "step": 190695 }, { "epoch": 0.795703949729202, "grad_norm": 0.3631346915408534, "learning_rate": 2.2422798016895822e-06, "loss": 0.0146, "step": 190700 }, { "epoch": 0.7957248124441922, "grad_norm": 0.8002092741647471, "learning_rate": 2.2422504067306498e-06, "loss": 0.0164, "step": 190705 }, { "epoch": 0.7957456751591825, "grad_norm": 0.7474212455947747, "learning_rate": 2.2422210129277394e-06, "loss": 0.0204, "step": 190710 }, { "epoch": 0.7957665378741728, "grad_norm": 0.42166215090850095, "learning_rate": 2.242191620280774e-06, "loss": 0.0171, "step": 190715 }, { "epoch": 0.7957874005891631, "grad_norm": 0.8582538201255675, "learning_rate": 2.2421622287896783e-06, "loss": 0.0219, "step": 190720 }, { "epoch": 0.7958082633041533, "grad_norm": 0.9995741755372366, "learning_rate": 2.242132838454376e-06, "loss": 0.0421, "step": 190725 }, { "epoch": 0.7958291260191436, "grad_norm": 0.5509222425613621, "learning_rate": 2.242103449274792e-06, "loss": 0.0216, "step": 190730 }, { "epoch": 0.7958499887341339, "grad_norm": 0.7945547787384928, "learning_rate": 2.242074061250851e-06, "loss": 0.0214, "step": 190735 }, { "epoch": 0.7958708514491242, "grad_norm": 0.46812352143750324, "learning_rate": 2.242044674382476e-06, "loss": 0.0203, "step": 190740 }, { "epoch": 0.7958917141641144, "grad_norm": 0.6961984922470118, "learning_rate": 2.242015288669592e-06, "loss": 0.022, "step": 190745 }, { "epoch": 0.7959125768791048, "grad_norm": 0.9720276723653487, "learning_rate": 2.241985904112123e-06, "loss": 0.0186, "step": 190750 }, { "epoch": 0.795933439594095, "grad_norm": 0.416309656969802, "learning_rate": 2.2419565207099937e-06, "loss": 0.0233, "step": 190755 }, { "epoch": 0.7959543023090853, "grad_norm": 0.9351645727490487, "learning_rate": 2.241927138463128e-06, "loss": 0.0206, "step": 190760 }, { "epoch": 0.7959751650240756, "grad_norm": 0.4264112149774023, "learning_rate": 2.241897757371451e-06, "loss": 0.0218, "step": 190765 }, { "epoch": 0.7959960277390659, "grad_norm": 0.5227022712789275, "learning_rate": 2.2418683774348854e-06, "loss": 0.0244, "step": 190770 }, { "epoch": 0.7960168904540561, "grad_norm": 0.6358934130563497, "learning_rate": 2.2418389986533573e-06, "loss": 0.0159, "step": 190775 }, { "epoch": 0.7960377531690465, "grad_norm": 0.7806634292429728, "learning_rate": 2.24180962102679e-06, "loss": 0.0133, "step": 190780 }, { "epoch": 0.7960586158840367, "grad_norm": 1.1655186841856322, "learning_rate": 2.241780244555108e-06, "loss": 0.0353, "step": 190785 }, { "epoch": 0.796079478599027, "grad_norm": 0.2788586497355339, "learning_rate": 2.2417508692382355e-06, "loss": 0.0193, "step": 190790 }, { "epoch": 0.7961003413140172, "grad_norm": 0.5829885528512924, "learning_rate": 2.241721495076097e-06, "loss": 0.0217, "step": 190795 }, { "epoch": 0.7961212040290075, "grad_norm": 0.6128391685342626, "learning_rate": 2.241692122068617e-06, "loss": 0.0247, "step": 190800 }, { "epoch": 0.7961420667439978, "grad_norm": 0.6515536603325384, "learning_rate": 2.2416627502157196e-06, "loss": 0.0219, "step": 190805 }, { "epoch": 0.796162929458988, "grad_norm": 0.770268436439613, "learning_rate": 2.241633379517329e-06, "loss": 0.0219, "step": 190810 }, { "epoch": 0.7961837921739784, "grad_norm": 0.29415833020501214, "learning_rate": 2.2416040099733703e-06, "loss": 0.0243, "step": 190815 }, { "epoch": 0.7962046548889686, "grad_norm": 0.40986597514774115, "learning_rate": 2.241574641583767e-06, "loss": 0.0196, "step": 190820 }, { "epoch": 0.7962255176039589, "grad_norm": 0.47388906075851644, "learning_rate": 2.241545274348444e-06, "loss": 0.0219, "step": 190825 }, { "epoch": 0.7962463803189492, "grad_norm": 0.8448302403229914, "learning_rate": 2.2415159082673263e-06, "loss": 0.0186, "step": 190830 }, { "epoch": 0.7962672430339395, "grad_norm": 0.40025185024672105, "learning_rate": 2.241486543340337e-06, "loss": 0.026, "step": 190835 }, { "epoch": 0.7962881057489297, "grad_norm": 0.4576104017467369, "learning_rate": 2.241457179567401e-06, "loss": 0.0213, "step": 190840 }, { "epoch": 0.7963089684639201, "grad_norm": 0.910019415325126, "learning_rate": 2.241427816948443e-06, "loss": 0.0263, "step": 190845 }, { "epoch": 0.7963298311789103, "grad_norm": 0.8080238155129987, "learning_rate": 2.2413984554833876e-06, "loss": 0.0184, "step": 190850 }, { "epoch": 0.7963506938939006, "grad_norm": 0.4171249648205749, "learning_rate": 2.2413690951721582e-06, "loss": 0.0145, "step": 190855 }, { "epoch": 0.7963715566088908, "grad_norm": 0.4281392972805909, "learning_rate": 2.24133973601468e-06, "loss": 0.0204, "step": 190860 }, { "epoch": 0.7963924193238812, "grad_norm": 0.3707181890208562, "learning_rate": 2.2413103780108774e-06, "loss": 0.0212, "step": 190865 }, { "epoch": 0.7964132820388714, "grad_norm": 0.667152532334914, "learning_rate": 2.2412810211606746e-06, "loss": 0.022, "step": 190870 }, { "epoch": 0.7964341447538617, "grad_norm": 0.3966553758622999, "learning_rate": 2.2412516654639965e-06, "loss": 0.0212, "step": 190875 }, { "epoch": 0.796455007468852, "grad_norm": 0.40873222299955175, "learning_rate": 2.2412223109207672e-06, "loss": 0.019, "step": 190880 }, { "epoch": 0.7964758701838422, "grad_norm": 0.5747229443435619, "learning_rate": 2.241192957530911e-06, "loss": 0.0186, "step": 190885 }, { "epoch": 0.7964967328988325, "grad_norm": 0.3071068654913157, "learning_rate": 2.2411636052943527e-06, "loss": 0.0206, "step": 190890 }, { "epoch": 0.7965175956138228, "grad_norm": 0.9230277715706849, "learning_rate": 2.2411342542110166e-06, "loss": 0.0179, "step": 190895 }, { "epoch": 0.7965384583288131, "grad_norm": 0.4221561025051922, "learning_rate": 2.241104904280828e-06, "loss": 0.0121, "step": 190900 }, { "epoch": 0.7965593210438033, "grad_norm": 0.43279110634992235, "learning_rate": 2.2410755555037097e-06, "loss": 0.0161, "step": 190905 }, { "epoch": 0.7965801837587936, "grad_norm": 0.31694032408648953, "learning_rate": 2.2410462078795876e-06, "loss": 0.0188, "step": 190910 }, { "epoch": 0.7966010464737839, "grad_norm": 0.3973286813216468, "learning_rate": 2.241016861408386e-06, "loss": 0.0136, "step": 190915 }, { "epoch": 0.7966219091887742, "grad_norm": 0.864108596485006, "learning_rate": 2.240987516090029e-06, "loss": 0.0171, "step": 190920 }, { "epoch": 0.7966427719037644, "grad_norm": 0.7260842472185409, "learning_rate": 2.240958171924441e-06, "loss": 0.025, "step": 190925 }, { "epoch": 0.7966636346187548, "grad_norm": 0.3252962350462241, "learning_rate": 2.240928828911547e-06, "loss": 0.0232, "step": 190930 }, { "epoch": 0.796684497333745, "grad_norm": 1.081861149342394, "learning_rate": 2.2408994870512717e-06, "loss": 0.0275, "step": 190935 }, { "epoch": 0.7967053600487353, "grad_norm": 0.8678117464667136, "learning_rate": 2.240870146343539e-06, "loss": 0.0263, "step": 190940 }, { "epoch": 0.7967262227637256, "grad_norm": 0.45847068067674823, "learning_rate": 2.2408408067882736e-06, "loss": 0.0197, "step": 190945 }, { "epoch": 0.7967470854787159, "grad_norm": 0.6072224466538552, "learning_rate": 2.240811468385401e-06, "loss": 0.0164, "step": 190950 }, { "epoch": 0.7967679481937061, "grad_norm": 0.582249270757492, "learning_rate": 2.240782131134844e-06, "loss": 0.0199, "step": 190955 }, { "epoch": 0.7967888109086965, "grad_norm": 0.6809542850798825, "learning_rate": 2.240752795036529e-06, "loss": 0.0203, "step": 190960 }, { "epoch": 0.7968096736236867, "grad_norm": 0.7159599177420687, "learning_rate": 2.2407234600903797e-06, "loss": 0.0181, "step": 190965 }, { "epoch": 0.796830536338677, "grad_norm": 0.40383663483158233, "learning_rate": 2.24069412629632e-06, "loss": 0.0225, "step": 190970 }, { "epoch": 0.7968513990536672, "grad_norm": 0.7085534607826912, "learning_rate": 2.240664793654276e-06, "loss": 0.0214, "step": 190975 }, { "epoch": 0.7968722617686576, "grad_norm": 0.5510771132136345, "learning_rate": 2.2406354621641705e-06, "loss": 0.0169, "step": 190980 }, { "epoch": 0.7968931244836478, "grad_norm": 0.3086404803085847, "learning_rate": 2.24060613182593e-06, "loss": 0.0208, "step": 190985 }, { "epoch": 0.796913987198638, "grad_norm": 0.4714702146184876, "learning_rate": 2.2405768026394782e-06, "loss": 0.0176, "step": 190990 }, { "epoch": 0.7969348499136284, "grad_norm": 0.6686206364044205, "learning_rate": 2.240547474604739e-06, "loss": 0.0207, "step": 190995 }, { "epoch": 0.7969557126286186, "grad_norm": 0.7323167742904946, "learning_rate": 2.240518147721639e-06, "loss": 0.0264, "step": 191000 }, { "epoch": 0.7969765753436089, "grad_norm": 0.5411843641298373, "learning_rate": 2.2404888219901006e-06, "loss": 0.0183, "step": 191005 }, { "epoch": 0.7969974380585992, "grad_norm": 1.078633157803887, "learning_rate": 2.24045949741005e-06, "loss": 0.0304, "step": 191010 }, { "epoch": 0.7970183007735895, "grad_norm": 0.7802642254520359, "learning_rate": 2.2404301739814108e-06, "loss": 0.0223, "step": 191015 }, { "epoch": 0.7970391634885797, "grad_norm": 1.5650229780589688, "learning_rate": 2.240400851704108e-06, "loss": 0.0284, "step": 191020 }, { "epoch": 0.7970600262035701, "grad_norm": 0.4227426904777106, "learning_rate": 2.2403715305780666e-06, "loss": 0.0176, "step": 191025 }, { "epoch": 0.7970808889185603, "grad_norm": 0.8677787488788582, "learning_rate": 2.240342210603211e-06, "loss": 0.021, "step": 191030 }, { "epoch": 0.7971017516335506, "grad_norm": 0.7615739070703803, "learning_rate": 2.2403128917794666e-06, "loss": 0.0272, "step": 191035 }, { "epoch": 0.7971226143485408, "grad_norm": 0.8401219059829776, "learning_rate": 2.240283574106757e-06, "loss": 0.025, "step": 191040 }, { "epoch": 0.7971434770635312, "grad_norm": 0.7184331146542442, "learning_rate": 2.240254257585007e-06, "loss": 0.0221, "step": 191045 }, { "epoch": 0.7971643397785214, "grad_norm": 0.5352703785737274, "learning_rate": 2.240224942214142e-06, "loss": 0.0141, "step": 191050 }, { "epoch": 0.7971852024935117, "grad_norm": 0.8382440730541589, "learning_rate": 2.240195627994086e-06, "loss": 0.0236, "step": 191055 }, { "epoch": 0.797206065208502, "grad_norm": 0.7146773950390409, "learning_rate": 2.2401663149247645e-06, "loss": 0.0261, "step": 191060 }, { "epoch": 0.7972269279234923, "grad_norm": 0.4136273397008695, "learning_rate": 2.240137003006101e-06, "loss": 0.0203, "step": 191065 }, { "epoch": 0.7972477906384825, "grad_norm": 0.6622482274227196, "learning_rate": 2.2401076922380212e-06, "loss": 0.0224, "step": 191070 }, { "epoch": 0.7972686533534729, "grad_norm": 0.3480260875690863, "learning_rate": 2.24007838262045e-06, "loss": 0.0191, "step": 191075 }, { "epoch": 0.7972895160684631, "grad_norm": 0.548013815960175, "learning_rate": 2.2400490741533113e-06, "loss": 0.0214, "step": 191080 }, { "epoch": 0.7973103787834533, "grad_norm": 0.6416939695993453, "learning_rate": 2.240019766836531e-06, "loss": 0.0242, "step": 191085 }, { "epoch": 0.7973312414984436, "grad_norm": 0.5965058908240686, "learning_rate": 2.2399904606700317e-06, "loss": 0.0204, "step": 191090 }, { "epoch": 0.7973521042134339, "grad_norm": 0.8699711401228118, "learning_rate": 2.2399611556537404e-06, "loss": 0.0306, "step": 191095 }, { "epoch": 0.7973729669284242, "grad_norm": 0.5338950071657496, "learning_rate": 2.239931851787581e-06, "loss": 0.0192, "step": 191100 }, { "epoch": 0.7973938296434144, "grad_norm": 0.6018898323836774, "learning_rate": 2.2399025490714783e-06, "loss": 0.0214, "step": 191105 }, { "epoch": 0.7974146923584048, "grad_norm": 0.614088997952953, "learning_rate": 2.2398732475053568e-06, "loss": 0.0213, "step": 191110 }, { "epoch": 0.797435555073395, "grad_norm": 0.8897282400664308, "learning_rate": 2.239843947089142e-06, "loss": 0.0206, "step": 191115 }, { "epoch": 0.7974564177883853, "grad_norm": 0.7859128563417871, "learning_rate": 2.239814647822758e-06, "loss": 0.0176, "step": 191120 }, { "epoch": 0.7974772805033756, "grad_norm": 0.9422063810211934, "learning_rate": 2.2397853497061297e-06, "loss": 0.0214, "step": 191125 }, { "epoch": 0.7974981432183659, "grad_norm": 0.8729020611947306, "learning_rate": 2.2397560527391823e-06, "loss": 0.0279, "step": 191130 }, { "epoch": 0.7975190059333561, "grad_norm": 0.7452317981122055, "learning_rate": 2.23972675692184e-06, "loss": 0.0189, "step": 191135 }, { "epoch": 0.7975398686483465, "grad_norm": 0.8586965347031396, "learning_rate": 2.2396974622540284e-06, "loss": 0.0181, "step": 191140 }, { "epoch": 0.7975607313633367, "grad_norm": 1.1381184237550095, "learning_rate": 2.2396681687356715e-06, "loss": 0.0362, "step": 191145 }, { "epoch": 0.797581594078327, "grad_norm": 0.43338141630596555, "learning_rate": 2.239638876366695e-06, "loss": 0.0213, "step": 191150 }, { "epoch": 0.7976024567933172, "grad_norm": 0.4567716775331102, "learning_rate": 2.239609585147023e-06, "loss": 0.0195, "step": 191155 }, { "epoch": 0.7976233195083076, "grad_norm": 0.5582578862364909, "learning_rate": 2.2395802950765806e-06, "loss": 0.0204, "step": 191160 }, { "epoch": 0.7976441822232978, "grad_norm": 4.81983923945734, "learning_rate": 2.2395510061552925e-06, "loss": 0.0212, "step": 191165 }, { "epoch": 0.797665044938288, "grad_norm": 0.47583753854144617, "learning_rate": 2.2395217183830843e-06, "loss": 0.0216, "step": 191170 }, { "epoch": 0.7976859076532784, "grad_norm": 1.0160138390108657, "learning_rate": 2.2394924317598797e-06, "loss": 0.0335, "step": 191175 }, { "epoch": 0.7977067703682686, "grad_norm": 0.3355277166844776, "learning_rate": 2.2394631462856045e-06, "loss": 0.0182, "step": 191180 }, { "epoch": 0.7977276330832589, "grad_norm": 0.7455397433057391, "learning_rate": 2.239433861960183e-06, "loss": 0.0185, "step": 191185 }, { "epoch": 0.7977484957982492, "grad_norm": 0.8422051760749988, "learning_rate": 2.2394045787835406e-06, "loss": 0.0192, "step": 191190 }, { "epoch": 0.7977693585132395, "grad_norm": 0.8064608124819139, "learning_rate": 2.239375296755602e-06, "loss": 0.0188, "step": 191195 }, { "epoch": 0.7977902212282297, "grad_norm": 0.9172437920011713, "learning_rate": 2.239346015876291e-06, "loss": 0.0201, "step": 191200 }, { "epoch": 0.7978110839432201, "grad_norm": 0.73774790614046, "learning_rate": 2.2393167361455347e-06, "loss": 0.0232, "step": 191205 }, { "epoch": 0.7978319466582103, "grad_norm": 1.2771476485022806, "learning_rate": 2.2392874575632566e-06, "loss": 0.0266, "step": 191210 }, { "epoch": 0.7978528093732006, "grad_norm": 0.28051475158470945, "learning_rate": 2.239258180129382e-06, "loss": 0.0161, "step": 191215 }, { "epoch": 0.7978736720881908, "grad_norm": 0.28840104200090744, "learning_rate": 2.239228903843835e-06, "loss": 0.0178, "step": 191220 }, { "epoch": 0.7978945348031812, "grad_norm": 0.4083271758959009, "learning_rate": 2.239199628706542e-06, "loss": 0.0182, "step": 191225 }, { "epoch": 0.7979153975181714, "grad_norm": 0.6033491995267427, "learning_rate": 2.2391703547174267e-06, "loss": 0.0277, "step": 191230 }, { "epoch": 0.7979362602331617, "grad_norm": 0.5924691962029272, "learning_rate": 2.239141081876415e-06, "loss": 0.0194, "step": 191235 }, { "epoch": 0.797957122948152, "grad_norm": 0.37776858154735854, "learning_rate": 2.239111810183431e-06, "loss": 0.0244, "step": 191240 }, { "epoch": 0.7979779856631423, "grad_norm": 0.8341501740907944, "learning_rate": 2.2390825396384004e-06, "loss": 0.0166, "step": 191245 }, { "epoch": 0.7979988483781325, "grad_norm": 0.768120072184444, "learning_rate": 2.2390532702412477e-06, "loss": 0.0178, "step": 191250 }, { "epoch": 0.7980197110931229, "grad_norm": 0.4071833349013294, "learning_rate": 2.2390240019918974e-06, "loss": 0.0195, "step": 191255 }, { "epoch": 0.7980405738081131, "grad_norm": 0.6138511026477681, "learning_rate": 2.238994734890276e-06, "loss": 0.019, "step": 191260 }, { "epoch": 0.7980614365231034, "grad_norm": 1.2310377893597604, "learning_rate": 2.238965468936307e-06, "loss": 0.0102, "step": 191265 }, { "epoch": 0.7980822992380936, "grad_norm": 0.6862665709912565, "learning_rate": 2.2389362041299157e-06, "loss": 0.026, "step": 191270 }, { "epoch": 0.798103161953084, "grad_norm": 0.5346768892728119, "learning_rate": 2.2389069404710283e-06, "loss": 0.0192, "step": 191275 }, { "epoch": 0.7981240246680742, "grad_norm": 1.4779756871546288, "learning_rate": 2.2388776779595677e-06, "loss": 0.0218, "step": 191280 }, { "epoch": 0.7981448873830644, "grad_norm": 0.4271703661754827, "learning_rate": 2.238848416595461e-06, "loss": 0.0201, "step": 191285 }, { "epoch": 0.7981657500980548, "grad_norm": 0.9008110587954274, "learning_rate": 2.238819156378632e-06, "loss": 0.0248, "step": 191290 }, { "epoch": 0.798186612813045, "grad_norm": 0.9022509921919807, "learning_rate": 2.2387898973090062e-06, "loss": 0.0179, "step": 191295 }, { "epoch": 0.7982074755280353, "grad_norm": 0.34712015353016074, "learning_rate": 2.2387606393865084e-06, "loss": 0.0149, "step": 191300 }, { "epoch": 0.7982283382430256, "grad_norm": 0.5611198444971227, "learning_rate": 2.2387313826110635e-06, "loss": 0.0203, "step": 191305 }, { "epoch": 0.7982492009580159, "grad_norm": 0.4428316418177394, "learning_rate": 2.238702126982597e-06, "loss": 0.0176, "step": 191310 }, { "epoch": 0.7982700636730061, "grad_norm": 0.619727328181353, "learning_rate": 2.2386728725010337e-06, "loss": 0.0214, "step": 191315 }, { "epoch": 0.7982909263879965, "grad_norm": 0.8170709385924604, "learning_rate": 2.2386436191662984e-06, "loss": 0.0192, "step": 191320 }, { "epoch": 0.7983117891029867, "grad_norm": 1.1036763910429435, "learning_rate": 2.2386143669783174e-06, "loss": 0.0278, "step": 191325 }, { "epoch": 0.798332651817977, "grad_norm": 0.9573473954851923, "learning_rate": 2.238585115937014e-06, "loss": 0.0271, "step": 191330 }, { "epoch": 0.7983535145329672, "grad_norm": 0.8386882040314394, "learning_rate": 2.238555866042314e-06, "loss": 0.0265, "step": 191335 }, { "epoch": 0.7983743772479576, "grad_norm": 0.23579120894243064, "learning_rate": 2.2385266172941433e-06, "loss": 0.0192, "step": 191340 }, { "epoch": 0.7983952399629478, "grad_norm": 0.531349853932993, "learning_rate": 2.2384973696924265e-06, "loss": 0.0205, "step": 191345 }, { "epoch": 0.798416102677938, "grad_norm": 0.6014539669946121, "learning_rate": 2.2384681232370883e-06, "loss": 0.0196, "step": 191350 }, { "epoch": 0.7984369653929284, "grad_norm": 0.7381555338687008, "learning_rate": 2.2384388779280533e-06, "loss": 0.0174, "step": 191355 }, { "epoch": 0.7984578281079187, "grad_norm": 0.5534175330588462, "learning_rate": 2.2384096337652482e-06, "loss": 0.0142, "step": 191360 }, { "epoch": 0.7984786908229089, "grad_norm": 0.842348017514943, "learning_rate": 2.238380390748597e-06, "loss": 0.0186, "step": 191365 }, { "epoch": 0.7984995535378993, "grad_norm": 0.47813524346872466, "learning_rate": 2.2383511488780253e-06, "loss": 0.0202, "step": 191370 }, { "epoch": 0.7985204162528895, "grad_norm": 0.27655534686365274, "learning_rate": 2.238321908153458e-06, "loss": 0.0259, "step": 191375 }, { "epoch": 0.7985412789678797, "grad_norm": 0.49131514932675036, "learning_rate": 2.2382926685748206e-06, "loss": 0.019, "step": 191380 }, { "epoch": 0.7985621416828701, "grad_norm": 0.29056859515944666, "learning_rate": 2.2382634301420376e-06, "loss": 0.0123, "step": 191385 }, { "epoch": 0.7985830043978603, "grad_norm": 1.9924088652840544, "learning_rate": 2.2382341928550344e-06, "loss": 0.022, "step": 191390 }, { "epoch": 0.7986038671128506, "grad_norm": 0.5033387865131715, "learning_rate": 2.238204956713737e-06, "loss": 0.021, "step": 191395 }, { "epoch": 0.7986247298278408, "grad_norm": 1.066151606047355, "learning_rate": 2.238175721718069e-06, "loss": 0.023, "step": 191400 }, { "epoch": 0.7986455925428312, "grad_norm": 0.39683004473389294, "learning_rate": 2.2381464878679568e-06, "loss": 0.0241, "step": 191405 }, { "epoch": 0.7986664552578214, "grad_norm": 0.473167265345525, "learning_rate": 2.2381172551633256e-06, "loss": 0.0189, "step": 191410 }, { "epoch": 0.7986873179728117, "grad_norm": 0.21511441724018215, "learning_rate": 2.2380880236041e-06, "loss": 0.0175, "step": 191415 }, { "epoch": 0.798708180687802, "grad_norm": 0.4700406947654753, "learning_rate": 2.2380587931902056e-06, "loss": 0.026, "step": 191420 }, { "epoch": 0.7987290434027923, "grad_norm": 0.6492902528663406, "learning_rate": 2.2380295639215674e-06, "loss": 0.0175, "step": 191425 }, { "epoch": 0.7987499061177825, "grad_norm": 0.3213842165716529, "learning_rate": 2.238000335798111e-06, "loss": 0.0203, "step": 191430 }, { "epoch": 0.7987707688327729, "grad_norm": 0.5563084220566863, "learning_rate": 2.2379711088197604e-06, "loss": 0.0181, "step": 191435 }, { "epoch": 0.7987916315477631, "grad_norm": 0.8004291962303384, "learning_rate": 2.2379418829864426e-06, "loss": 0.0154, "step": 191440 }, { "epoch": 0.7988124942627534, "grad_norm": 0.36769768361575716, "learning_rate": 2.237912658298082e-06, "loss": 0.0173, "step": 191445 }, { "epoch": 0.7988333569777436, "grad_norm": 0.8925551144912222, "learning_rate": 2.2378834347546035e-06, "loss": 0.0185, "step": 191450 }, { "epoch": 0.798854219692734, "grad_norm": 0.5981980212484722, "learning_rate": 2.2378542123559327e-06, "loss": 0.0247, "step": 191455 }, { "epoch": 0.7988750824077242, "grad_norm": 0.22830542401423626, "learning_rate": 2.2378249911019943e-06, "loss": 0.0126, "step": 191460 }, { "epoch": 0.7988959451227144, "grad_norm": 0.8589273910935952, "learning_rate": 2.2377957709927147e-06, "loss": 0.0239, "step": 191465 }, { "epoch": 0.7989168078377048, "grad_norm": 0.8429071428619828, "learning_rate": 2.2377665520280188e-06, "loss": 0.0254, "step": 191470 }, { "epoch": 0.798937670552695, "grad_norm": 0.9632706222121598, "learning_rate": 2.237737334207831e-06, "loss": 0.0246, "step": 191475 }, { "epoch": 0.7989585332676853, "grad_norm": 0.5491154221341025, "learning_rate": 2.2377081175320776e-06, "loss": 0.018, "step": 191480 }, { "epoch": 0.7989793959826756, "grad_norm": 0.9122314754473179, "learning_rate": 2.2376789020006837e-06, "loss": 0.0216, "step": 191485 }, { "epoch": 0.7990002586976659, "grad_norm": 1.2097969350991975, "learning_rate": 2.237649687613574e-06, "loss": 0.0206, "step": 191490 }, { "epoch": 0.7990211214126561, "grad_norm": 0.6468863192484047, "learning_rate": 2.237620474370674e-06, "loss": 0.0274, "step": 191495 }, { "epoch": 0.7990419841276465, "grad_norm": 0.5342600782050736, "learning_rate": 2.23759126227191e-06, "loss": 0.0216, "step": 191500 }, { "epoch": 0.7990628468426367, "grad_norm": 0.46443155690059285, "learning_rate": 2.2375620513172063e-06, "loss": 0.0186, "step": 191505 }, { "epoch": 0.799083709557627, "grad_norm": 0.5229947112118365, "learning_rate": 2.2375328415064878e-06, "loss": 0.0199, "step": 191510 }, { "epoch": 0.7991045722726172, "grad_norm": 0.5478733314652325, "learning_rate": 2.2375036328396813e-06, "loss": 0.0193, "step": 191515 }, { "epoch": 0.7991254349876076, "grad_norm": 1.5891106277396896, "learning_rate": 2.237474425316711e-06, "loss": 0.02, "step": 191520 }, { "epoch": 0.7991462977025978, "grad_norm": 0.9266702301490679, "learning_rate": 2.2374452189375027e-06, "loss": 0.0164, "step": 191525 }, { "epoch": 0.7991671604175881, "grad_norm": 0.38026989764554275, "learning_rate": 2.2374160137019814e-06, "loss": 0.0186, "step": 191530 }, { "epoch": 0.7991880231325784, "grad_norm": 0.8644829923745265, "learning_rate": 2.237386809610073e-06, "loss": 0.0211, "step": 191535 }, { "epoch": 0.7992088858475687, "grad_norm": 0.6426773683258478, "learning_rate": 2.2373576066617027e-06, "loss": 0.0209, "step": 191540 }, { "epoch": 0.7992297485625589, "grad_norm": 1.5065277819589418, "learning_rate": 2.237328404856795e-06, "loss": 0.018, "step": 191545 }, { "epoch": 0.7992506112775493, "grad_norm": 0.5049289728891796, "learning_rate": 2.237299204195277e-06, "loss": 0.0233, "step": 191550 }, { "epoch": 0.7992714739925395, "grad_norm": 0.6215052091155349, "learning_rate": 2.237270004677072e-06, "loss": 0.0218, "step": 191555 }, { "epoch": 0.7992923367075297, "grad_norm": 0.7686177193102739, "learning_rate": 2.2372408063021074e-06, "loss": 0.0165, "step": 191560 }, { "epoch": 0.7993131994225201, "grad_norm": 0.802285213050141, "learning_rate": 2.2372116090703076e-06, "loss": 0.0266, "step": 191565 }, { "epoch": 0.7993340621375103, "grad_norm": 0.5392797859845324, "learning_rate": 2.2371824129815974e-06, "loss": 0.0202, "step": 191570 }, { "epoch": 0.7993549248525006, "grad_norm": 1.2291687758839507, "learning_rate": 2.2371532180359035e-06, "loss": 0.0224, "step": 191575 }, { "epoch": 0.7993757875674908, "grad_norm": 0.39952558143869016, "learning_rate": 2.2371240242331505e-06, "loss": 0.0261, "step": 191580 }, { "epoch": 0.7993966502824812, "grad_norm": 0.23599922575708204, "learning_rate": 2.237094831573265e-06, "loss": 0.0195, "step": 191585 }, { "epoch": 0.7994175129974714, "grad_norm": 0.4613508707604929, "learning_rate": 2.2370656400561706e-06, "loss": 0.0165, "step": 191590 }, { "epoch": 0.7994383757124617, "grad_norm": 0.8304963847823911, "learning_rate": 2.2370364496817932e-06, "loss": 0.0207, "step": 191595 }, { "epoch": 0.799459238427452, "grad_norm": 0.4832045295665181, "learning_rate": 2.2370072604500595e-06, "loss": 0.0197, "step": 191600 }, { "epoch": 0.7994801011424423, "grad_norm": 0.5786788866632531, "learning_rate": 2.236978072360894e-06, "loss": 0.0183, "step": 191605 }, { "epoch": 0.7995009638574325, "grad_norm": 0.6866039865856268, "learning_rate": 2.2369488854142227e-06, "loss": 0.0183, "step": 191610 }, { "epoch": 0.7995218265724229, "grad_norm": 0.8335588487346105, "learning_rate": 2.2369196996099697e-06, "loss": 0.0181, "step": 191615 }, { "epoch": 0.7995426892874131, "grad_norm": 0.7799304063813073, "learning_rate": 2.2368905149480626e-06, "loss": 0.0224, "step": 191620 }, { "epoch": 0.7995635520024034, "grad_norm": 0.7457540788199123, "learning_rate": 2.2368613314284248e-06, "loss": 0.0184, "step": 191625 }, { "epoch": 0.7995844147173936, "grad_norm": 0.8134259774336442, "learning_rate": 2.236832149050983e-06, "loss": 0.0183, "step": 191630 }, { "epoch": 0.799605277432384, "grad_norm": 1.1093877123185514, "learning_rate": 2.2368029678156624e-06, "loss": 0.0265, "step": 191635 }, { "epoch": 0.7996261401473742, "grad_norm": 0.6218718622469945, "learning_rate": 2.2367737877223885e-06, "loss": 0.0149, "step": 191640 }, { "epoch": 0.7996470028623645, "grad_norm": 1.125026196828398, "learning_rate": 2.2367446087710873e-06, "loss": 0.0277, "step": 191645 }, { "epoch": 0.7996678655773548, "grad_norm": 0.4912139832176747, "learning_rate": 2.2367154309616835e-06, "loss": 0.0192, "step": 191650 }, { "epoch": 0.799688728292345, "grad_norm": 0.9349487685816629, "learning_rate": 2.236686254294103e-06, "loss": 0.0246, "step": 191655 }, { "epoch": 0.7997095910073353, "grad_norm": 0.35399010196914676, "learning_rate": 2.2366570787682712e-06, "loss": 0.0127, "step": 191660 }, { "epoch": 0.7997304537223257, "grad_norm": 0.37276171661624874, "learning_rate": 2.2366279043841137e-06, "loss": 0.0144, "step": 191665 }, { "epoch": 0.7997513164373159, "grad_norm": 0.5613571387220688, "learning_rate": 2.2365987311415566e-06, "loss": 0.0192, "step": 191670 }, { "epoch": 0.7997721791523061, "grad_norm": 1.1888415516239181, "learning_rate": 2.2365695590405243e-06, "loss": 0.0272, "step": 191675 }, { "epoch": 0.7997930418672965, "grad_norm": 0.43521752698269517, "learning_rate": 2.2365403880809434e-06, "loss": 0.0206, "step": 191680 }, { "epoch": 0.7998139045822867, "grad_norm": 0.6646742980703052, "learning_rate": 2.236511218262739e-06, "loss": 0.0251, "step": 191685 }, { "epoch": 0.799834767297277, "grad_norm": 0.3386401654825978, "learning_rate": 2.2364820495858365e-06, "loss": 0.0207, "step": 191690 }, { "epoch": 0.7998556300122672, "grad_norm": 0.8143877610454097, "learning_rate": 2.2364528820501614e-06, "loss": 0.0165, "step": 191695 }, { "epoch": 0.7998764927272576, "grad_norm": 0.8183742284277686, "learning_rate": 2.2364237156556404e-06, "loss": 0.0213, "step": 191700 }, { "epoch": 0.7998973554422478, "grad_norm": 1.1239198443234486, "learning_rate": 2.236394550402198e-06, "loss": 0.028, "step": 191705 }, { "epoch": 0.7999182181572381, "grad_norm": 0.7308739029665087, "learning_rate": 2.2363653862897597e-06, "loss": 0.0289, "step": 191710 }, { "epoch": 0.7999390808722284, "grad_norm": 0.3972736117951466, "learning_rate": 2.236336223318252e-06, "loss": 0.03, "step": 191715 }, { "epoch": 0.7999599435872187, "grad_norm": 0.700096270853409, "learning_rate": 2.2363070614875994e-06, "loss": 0.018, "step": 191720 }, { "epoch": 0.7999808063022089, "grad_norm": 0.5920479339380043, "learning_rate": 2.236277900797729e-06, "loss": 0.0199, "step": 191725 }, { "epoch": 0.8000016690171993, "grad_norm": 0.6939607718452133, "learning_rate": 2.2362487412485647e-06, "loss": 0.017, "step": 191730 }, { "epoch": 0.8000225317321895, "grad_norm": 0.7700878318937302, "learning_rate": 2.236219582840033e-06, "loss": 0.0179, "step": 191735 }, { "epoch": 0.8000433944471798, "grad_norm": 0.7297383425355107, "learning_rate": 2.2361904255720596e-06, "loss": 0.0254, "step": 191740 }, { "epoch": 0.8000642571621701, "grad_norm": 0.2672147235394305, "learning_rate": 2.2361612694445706e-06, "loss": 0.016, "step": 191745 }, { "epoch": 0.8000851198771604, "grad_norm": 0.7239967367643997, "learning_rate": 2.23613211445749e-06, "loss": 0.0225, "step": 191750 }, { "epoch": 0.8001059825921506, "grad_norm": 0.7676241928961529, "learning_rate": 2.236102960610746e-06, "loss": 0.0185, "step": 191755 }, { "epoch": 0.8001268453071408, "grad_norm": 0.6073665304681227, "learning_rate": 2.236073807904262e-06, "loss": 0.0282, "step": 191760 }, { "epoch": 0.8001477080221312, "grad_norm": 1.09819946697679, "learning_rate": 2.2360446563379645e-06, "loss": 0.0268, "step": 191765 }, { "epoch": 0.8001685707371214, "grad_norm": 0.24473735729511048, "learning_rate": 2.2360155059117794e-06, "loss": 0.0229, "step": 191770 }, { "epoch": 0.8001894334521117, "grad_norm": 0.4377509513960108, "learning_rate": 2.235986356625632e-06, "loss": 0.0205, "step": 191775 }, { "epoch": 0.800210296167102, "grad_norm": 0.699782212739269, "learning_rate": 2.2359572084794482e-06, "loss": 0.0192, "step": 191780 }, { "epoch": 0.8002311588820923, "grad_norm": 0.3474768138945562, "learning_rate": 2.235928061473153e-06, "loss": 0.0209, "step": 191785 }, { "epoch": 0.8002520215970825, "grad_norm": 0.47783951815866005, "learning_rate": 2.2358989156066743e-06, "loss": 0.0262, "step": 191790 }, { "epoch": 0.8002728843120729, "grad_norm": 0.4600704706832606, "learning_rate": 2.235869770879935e-06, "loss": 0.0192, "step": 191795 }, { "epoch": 0.8002937470270631, "grad_norm": 0.5952786483363612, "learning_rate": 2.2358406272928624e-06, "loss": 0.019, "step": 191800 }, { "epoch": 0.8003146097420534, "grad_norm": 0.18627144312319438, "learning_rate": 2.235811484845382e-06, "loss": 0.015, "step": 191805 }, { "epoch": 0.8003354724570436, "grad_norm": 0.5573186589459308, "learning_rate": 2.2357823435374194e-06, "loss": 0.0217, "step": 191810 }, { "epoch": 0.800356335172034, "grad_norm": 0.4794561703739374, "learning_rate": 2.2357532033689007e-06, "loss": 0.0204, "step": 191815 }, { "epoch": 0.8003771978870242, "grad_norm": 1.3811567241263156, "learning_rate": 2.2357240643397514e-06, "loss": 0.0229, "step": 191820 }, { "epoch": 0.8003980606020145, "grad_norm": 0.5905247052193515, "learning_rate": 2.2356949264498965e-06, "loss": 0.0251, "step": 191825 }, { "epoch": 0.8004189233170048, "grad_norm": 0.32239430609169323, "learning_rate": 2.235665789699263e-06, "loss": 0.0174, "step": 191830 }, { "epoch": 0.8004397860319951, "grad_norm": 1.1223022412894288, "learning_rate": 2.235636654087776e-06, "loss": 0.0288, "step": 191835 }, { "epoch": 0.8004606487469853, "grad_norm": 0.5618249421361393, "learning_rate": 2.2356075196153615e-06, "loss": 0.0162, "step": 191840 }, { "epoch": 0.8004815114619757, "grad_norm": 0.4913899356772387, "learning_rate": 2.235578386281945e-06, "loss": 0.0237, "step": 191845 }, { "epoch": 0.8005023741769659, "grad_norm": 0.4148670882762547, "learning_rate": 2.2355492540874525e-06, "loss": 0.0164, "step": 191850 }, { "epoch": 0.8005232368919561, "grad_norm": 0.7878099997933645, "learning_rate": 2.2355201230318098e-06, "loss": 0.0238, "step": 191855 }, { "epoch": 0.8005440996069465, "grad_norm": 0.4342167705230064, "learning_rate": 2.2354909931149427e-06, "loss": 0.0259, "step": 191860 }, { "epoch": 0.8005649623219367, "grad_norm": 0.3532473989318143, "learning_rate": 2.2354618643367767e-06, "loss": 0.0243, "step": 191865 }, { "epoch": 0.800585825036927, "grad_norm": 0.5578649316814398, "learning_rate": 2.235432736697238e-06, "loss": 0.0193, "step": 191870 }, { "epoch": 0.8006066877519172, "grad_norm": 0.5172751169703605, "learning_rate": 2.235403610196253e-06, "loss": 0.0205, "step": 191875 }, { "epoch": 0.8006275504669076, "grad_norm": 0.658971915179506, "learning_rate": 2.235374484833746e-06, "loss": 0.0275, "step": 191880 }, { "epoch": 0.8006484131818978, "grad_norm": 0.9588912671292981, "learning_rate": 2.2353453606096437e-06, "loss": 0.0232, "step": 191885 }, { "epoch": 0.8006692758968881, "grad_norm": 0.4730832435366594, "learning_rate": 2.235316237523872e-06, "loss": 0.0191, "step": 191890 }, { "epoch": 0.8006901386118784, "grad_norm": 0.8200049625873617, "learning_rate": 2.235287115576357e-06, "loss": 0.0146, "step": 191895 }, { "epoch": 0.8007110013268687, "grad_norm": 0.6463738747241103, "learning_rate": 2.2352579947670242e-06, "loss": 0.0301, "step": 191900 }, { "epoch": 0.8007318640418589, "grad_norm": 0.8196524522962494, "learning_rate": 2.235228875095799e-06, "loss": 0.0229, "step": 191905 }, { "epoch": 0.8007527267568493, "grad_norm": 0.28803820181522294, "learning_rate": 2.2351997565626084e-06, "loss": 0.02, "step": 191910 }, { "epoch": 0.8007735894718395, "grad_norm": 1.3292113616571004, "learning_rate": 2.235170639167377e-06, "loss": 0.025, "step": 191915 }, { "epoch": 0.8007944521868298, "grad_norm": 0.28326857005297684, "learning_rate": 2.2351415229100315e-06, "loss": 0.0164, "step": 191920 }, { "epoch": 0.80081531490182, "grad_norm": 0.6172265689938975, "learning_rate": 2.235112407790498e-06, "loss": 0.0275, "step": 191925 }, { "epoch": 0.8008361776168104, "grad_norm": 1.3676791113511346, "learning_rate": 2.2350832938087016e-06, "loss": 0.0312, "step": 191930 }, { "epoch": 0.8008570403318006, "grad_norm": 0.37790064105585847, "learning_rate": 2.2350541809645687e-06, "loss": 0.0202, "step": 191935 }, { "epoch": 0.8008779030467909, "grad_norm": 0.369905766196266, "learning_rate": 2.235025069258025e-06, "loss": 0.0221, "step": 191940 }, { "epoch": 0.8008987657617812, "grad_norm": 0.4423024189450471, "learning_rate": 2.2349959586889962e-06, "loss": 0.0159, "step": 191945 }, { "epoch": 0.8009196284767715, "grad_norm": 0.6590290951728875, "learning_rate": 2.234966849257409e-06, "loss": 0.018, "step": 191950 }, { "epoch": 0.8009404911917617, "grad_norm": 0.7987704672669387, "learning_rate": 2.2349377409631888e-06, "loss": 0.0203, "step": 191955 }, { "epoch": 0.800961353906752, "grad_norm": 0.3610048027596389, "learning_rate": 2.2349086338062617e-06, "loss": 0.0155, "step": 191960 }, { "epoch": 0.8009822166217423, "grad_norm": 0.565512838494832, "learning_rate": 2.2348795277865535e-06, "loss": 0.0184, "step": 191965 }, { "epoch": 0.8010030793367325, "grad_norm": 1.4193612622765035, "learning_rate": 2.2348504229039903e-06, "loss": 0.026, "step": 191970 }, { "epoch": 0.8010239420517229, "grad_norm": 0.49274123235421247, "learning_rate": 2.2348213191584974e-06, "loss": 0.0176, "step": 191975 }, { "epoch": 0.8010448047667131, "grad_norm": 0.46075851938353557, "learning_rate": 2.2347922165500017e-06, "loss": 0.0182, "step": 191980 }, { "epoch": 0.8010656674817034, "grad_norm": 0.5195112447676433, "learning_rate": 2.234763115078429e-06, "loss": 0.0249, "step": 191985 }, { "epoch": 0.8010865301966936, "grad_norm": 1.1600254193177442, "learning_rate": 2.2347340147437046e-06, "loss": 0.0248, "step": 191990 }, { "epoch": 0.801107392911684, "grad_norm": 0.3370187080456419, "learning_rate": 2.2347049155457553e-06, "loss": 0.0226, "step": 191995 }, { "epoch": 0.8011282556266742, "grad_norm": 0.670472245949177, "learning_rate": 2.234675817484507e-06, "loss": 0.0194, "step": 192000 }, { "epoch": 0.8011491183416645, "grad_norm": 0.9167392305758317, "learning_rate": 2.2346467205598848e-06, "loss": 0.0193, "step": 192005 }, { "epoch": 0.8011699810566548, "grad_norm": 0.2784576937838154, "learning_rate": 2.234617624771816e-06, "loss": 0.0247, "step": 192010 }, { "epoch": 0.8011908437716451, "grad_norm": 0.5766546265098701, "learning_rate": 2.2345885301202256e-06, "loss": 0.0183, "step": 192015 }, { "epoch": 0.8012117064866353, "grad_norm": 0.320741039053924, "learning_rate": 2.2345594366050394e-06, "loss": 0.0147, "step": 192020 }, { "epoch": 0.8012325692016257, "grad_norm": 0.7337846706991338, "learning_rate": 2.234530344226185e-06, "loss": 0.0294, "step": 192025 }, { "epoch": 0.8012534319166159, "grad_norm": 0.48293032480014764, "learning_rate": 2.2345012529835868e-06, "loss": 0.0219, "step": 192030 }, { "epoch": 0.8012742946316062, "grad_norm": 0.5272345669237141, "learning_rate": 2.2344721628771716e-06, "loss": 0.0242, "step": 192035 }, { "epoch": 0.8012951573465965, "grad_norm": 0.7551241342241549, "learning_rate": 2.2344430739068653e-06, "loss": 0.0227, "step": 192040 }, { "epoch": 0.8013160200615868, "grad_norm": 0.5239508153085402, "learning_rate": 2.234413986072594e-06, "loss": 0.0209, "step": 192045 }, { "epoch": 0.801336882776577, "grad_norm": 0.5120189278270267, "learning_rate": 2.234384899374284e-06, "loss": 0.0135, "step": 192050 }, { "epoch": 0.8013577454915672, "grad_norm": 0.3287042616006088, "learning_rate": 2.2343558138118606e-06, "loss": 0.0201, "step": 192055 }, { "epoch": 0.8013786082065576, "grad_norm": 0.5704809033096105, "learning_rate": 2.2343267293852503e-06, "loss": 0.0217, "step": 192060 }, { "epoch": 0.8013994709215478, "grad_norm": 0.5802180387486141, "learning_rate": 2.23429764609438e-06, "loss": 0.0207, "step": 192065 }, { "epoch": 0.8014203336365381, "grad_norm": 0.4754355728638859, "learning_rate": 2.2342685639391744e-06, "loss": 0.0191, "step": 192070 }, { "epoch": 0.8014411963515284, "grad_norm": 0.7456838566007706, "learning_rate": 2.2342394829195606e-06, "loss": 0.0227, "step": 192075 }, { "epoch": 0.8014620590665187, "grad_norm": 0.3745968249687435, "learning_rate": 2.234210403035464e-06, "loss": 0.0162, "step": 192080 }, { "epoch": 0.8014829217815089, "grad_norm": 0.9290412880216006, "learning_rate": 2.2341813242868106e-06, "loss": 0.0175, "step": 192085 }, { "epoch": 0.8015037844964993, "grad_norm": 0.9229097182332644, "learning_rate": 2.2341522466735275e-06, "loss": 0.0217, "step": 192090 }, { "epoch": 0.8015246472114895, "grad_norm": 0.7239016504216859, "learning_rate": 2.23412317019554e-06, "loss": 0.0179, "step": 192095 }, { "epoch": 0.8015455099264798, "grad_norm": 0.43475564080637896, "learning_rate": 2.2340940948527744e-06, "loss": 0.0196, "step": 192100 }, { "epoch": 0.80156637264147, "grad_norm": 0.6704917694057867, "learning_rate": 2.234065020645157e-06, "loss": 0.0155, "step": 192105 }, { "epoch": 0.8015872353564604, "grad_norm": 0.4265390041019845, "learning_rate": 2.234035947572614e-06, "loss": 0.0216, "step": 192110 }, { "epoch": 0.8016080980714506, "grad_norm": 0.4660477640135337, "learning_rate": 2.234006875635071e-06, "loss": 0.0166, "step": 192115 }, { "epoch": 0.8016289607864409, "grad_norm": 0.8363301750894396, "learning_rate": 2.233977804832455e-06, "loss": 0.0225, "step": 192120 }, { "epoch": 0.8016498235014312, "grad_norm": 0.8852740915778806, "learning_rate": 2.2339487351646918e-06, "loss": 0.024, "step": 192125 }, { "epoch": 0.8016706862164215, "grad_norm": 0.7476329118052212, "learning_rate": 2.233919666631707e-06, "loss": 0.0278, "step": 192130 }, { "epoch": 0.8016915489314117, "grad_norm": 0.3771008898949171, "learning_rate": 2.233890599233427e-06, "loss": 0.0277, "step": 192135 }, { "epoch": 0.8017124116464021, "grad_norm": 0.47753547340099867, "learning_rate": 2.2338615329697787e-06, "loss": 0.0197, "step": 192140 }, { "epoch": 0.8017332743613923, "grad_norm": 0.6325004652391361, "learning_rate": 2.2338324678406872e-06, "loss": 0.025, "step": 192145 }, { "epoch": 0.8017541370763825, "grad_norm": 0.5140686439744448, "learning_rate": 2.2338034038460803e-06, "loss": 0.0205, "step": 192150 }, { "epoch": 0.8017749997913729, "grad_norm": 0.9551348035919943, "learning_rate": 2.233774340985882e-06, "loss": 0.0284, "step": 192155 }, { "epoch": 0.8017958625063631, "grad_norm": 0.5225151932586813, "learning_rate": 2.2337452792600204e-06, "loss": 0.0219, "step": 192160 }, { "epoch": 0.8018167252213534, "grad_norm": 0.5829838952604868, "learning_rate": 2.2337162186684213e-06, "loss": 0.0289, "step": 192165 }, { "epoch": 0.8018375879363436, "grad_norm": 0.3003731783177781, "learning_rate": 2.2336871592110103e-06, "loss": 0.0171, "step": 192170 }, { "epoch": 0.801858450651334, "grad_norm": 0.4838907962267537, "learning_rate": 2.2336581008877138e-06, "loss": 0.0182, "step": 192175 }, { "epoch": 0.8018793133663242, "grad_norm": 0.5117623355973275, "learning_rate": 2.233629043698458e-06, "loss": 0.0192, "step": 192180 }, { "epoch": 0.8019001760813145, "grad_norm": 0.7295512946068096, "learning_rate": 2.23359998764317e-06, "loss": 0.0144, "step": 192185 }, { "epoch": 0.8019210387963048, "grad_norm": 0.7323406456358057, "learning_rate": 2.2335709327217747e-06, "loss": 0.0172, "step": 192190 }, { "epoch": 0.8019419015112951, "grad_norm": 0.6538316052363385, "learning_rate": 2.2335418789341994e-06, "loss": 0.0186, "step": 192195 }, { "epoch": 0.8019627642262853, "grad_norm": 0.6652313125137562, "learning_rate": 2.23351282628037e-06, "loss": 0.0183, "step": 192200 }, { "epoch": 0.8019836269412757, "grad_norm": 1.0341205684916532, "learning_rate": 2.233483774760212e-06, "loss": 0.0253, "step": 192205 }, { "epoch": 0.8020044896562659, "grad_norm": 0.36843619502822, "learning_rate": 2.2334547243736533e-06, "loss": 0.0221, "step": 192210 }, { "epoch": 0.8020253523712562, "grad_norm": 0.35731623665639906, "learning_rate": 2.233425675120619e-06, "loss": 0.0193, "step": 192215 }, { "epoch": 0.8020462150862465, "grad_norm": 0.8702449904192234, "learning_rate": 2.2333966270010357e-06, "loss": 0.0231, "step": 192220 }, { "epoch": 0.8020670778012368, "grad_norm": 0.5723239808774827, "learning_rate": 2.2333675800148292e-06, "loss": 0.0191, "step": 192225 }, { "epoch": 0.802087940516227, "grad_norm": 1.6800722687810492, "learning_rate": 2.233338534161927e-06, "loss": 0.0266, "step": 192230 }, { "epoch": 0.8021088032312172, "grad_norm": 0.8561127555668432, "learning_rate": 2.233309489442255e-06, "loss": 0.019, "step": 192235 }, { "epoch": 0.8021296659462076, "grad_norm": 0.7533242829391915, "learning_rate": 2.233280445855738e-06, "loss": 0.0185, "step": 192240 }, { "epoch": 0.8021505286611978, "grad_norm": 0.345118212951565, "learning_rate": 2.233251403402304e-06, "loss": 0.0204, "step": 192245 }, { "epoch": 0.8021713913761881, "grad_norm": 0.5725859657835148, "learning_rate": 2.233222362081879e-06, "loss": 0.0197, "step": 192250 }, { "epoch": 0.8021922540911784, "grad_norm": 0.2986055754624666, "learning_rate": 2.233193321894389e-06, "loss": 0.0311, "step": 192255 }, { "epoch": 0.8022131168061687, "grad_norm": 0.5030432934462995, "learning_rate": 2.233164282839761e-06, "loss": 0.0303, "step": 192260 }, { "epoch": 0.8022339795211589, "grad_norm": 0.9356652882998077, "learning_rate": 2.2331352449179205e-06, "loss": 0.024, "step": 192265 }, { "epoch": 0.8022548422361493, "grad_norm": 0.4873780183190063, "learning_rate": 2.2331062081287942e-06, "loss": 0.0241, "step": 192270 }, { "epoch": 0.8022757049511395, "grad_norm": 0.7992309710053489, "learning_rate": 2.2330771724723086e-06, "loss": 0.0312, "step": 192275 }, { "epoch": 0.8022965676661298, "grad_norm": 0.7689667434573545, "learning_rate": 2.23304813794839e-06, "loss": 0.0235, "step": 192280 }, { "epoch": 0.80231743038112, "grad_norm": 0.9122051721046415, "learning_rate": 2.2330191045569643e-06, "loss": 0.0263, "step": 192285 }, { "epoch": 0.8023382930961104, "grad_norm": 0.759694081908243, "learning_rate": 2.2329900722979587e-06, "loss": 0.0202, "step": 192290 }, { "epoch": 0.8023591558111006, "grad_norm": 0.7064027864890328, "learning_rate": 2.232961041171299e-06, "loss": 0.0192, "step": 192295 }, { "epoch": 0.8023800185260909, "grad_norm": 0.8215620700258278, "learning_rate": 2.232932011176912e-06, "loss": 0.0201, "step": 192300 }, { "epoch": 0.8024008812410812, "grad_norm": 0.5049788009783941, "learning_rate": 2.2329029823147234e-06, "loss": 0.017, "step": 192305 }, { "epoch": 0.8024217439560715, "grad_norm": 0.7803747362292305, "learning_rate": 2.2328739545846607e-06, "loss": 0.032, "step": 192310 }, { "epoch": 0.8024426066710617, "grad_norm": 0.964088713314352, "learning_rate": 2.232844927986649e-06, "loss": 0.0294, "step": 192315 }, { "epoch": 0.8024634693860521, "grad_norm": 0.5408113199464272, "learning_rate": 2.2328159025206164e-06, "loss": 0.0216, "step": 192320 }, { "epoch": 0.8024843321010423, "grad_norm": 0.5708236073641818, "learning_rate": 2.2327868781864875e-06, "loss": 0.0208, "step": 192325 }, { "epoch": 0.8025051948160326, "grad_norm": 0.40728761708297123, "learning_rate": 2.23275785498419e-06, "loss": 0.0166, "step": 192330 }, { "epoch": 0.8025260575310229, "grad_norm": 0.715842943951473, "learning_rate": 2.2327288329136498e-06, "loss": 0.0244, "step": 192335 }, { "epoch": 0.8025469202460132, "grad_norm": 0.7238164482376744, "learning_rate": 2.232699811974793e-06, "loss": 0.0223, "step": 192340 }, { "epoch": 0.8025677829610034, "grad_norm": 0.3478091283502143, "learning_rate": 2.232670792167547e-06, "loss": 0.0129, "step": 192345 }, { "epoch": 0.8025886456759936, "grad_norm": 0.43971431275822875, "learning_rate": 2.2326417734918376e-06, "loss": 0.0196, "step": 192350 }, { "epoch": 0.802609508390984, "grad_norm": 0.4952542606072658, "learning_rate": 2.2326127559475917e-06, "loss": 0.0217, "step": 192355 }, { "epoch": 0.8026303711059742, "grad_norm": 0.4258532726772709, "learning_rate": 2.2325837395347353e-06, "loss": 0.0125, "step": 192360 }, { "epoch": 0.8026512338209645, "grad_norm": 0.743895463213089, "learning_rate": 2.232554724253195e-06, "loss": 0.0241, "step": 192365 }, { "epoch": 0.8026720965359548, "grad_norm": 0.7078863142578632, "learning_rate": 2.2325257101028985e-06, "loss": 0.02, "step": 192370 }, { "epoch": 0.8026929592509451, "grad_norm": 0.6572051784922588, "learning_rate": 2.23249669708377e-06, "loss": 0.0185, "step": 192375 }, { "epoch": 0.8027138219659353, "grad_norm": 1.1111118947241763, "learning_rate": 2.2324676851957377e-06, "loss": 0.0284, "step": 192380 }, { "epoch": 0.8027346846809257, "grad_norm": 0.5207564546154497, "learning_rate": 2.2324386744387274e-06, "loss": 0.017, "step": 192385 }, { "epoch": 0.8027555473959159, "grad_norm": 0.3337428183121231, "learning_rate": 2.2324096648126658e-06, "loss": 0.022, "step": 192390 }, { "epoch": 0.8027764101109062, "grad_norm": 0.7200210376086588, "learning_rate": 2.2323806563174795e-06, "loss": 0.027, "step": 192395 }, { "epoch": 0.8027972728258965, "grad_norm": 0.5477279607901088, "learning_rate": 2.2323516489530945e-06, "loss": 0.0236, "step": 192400 }, { "epoch": 0.8028181355408868, "grad_norm": 0.39122653695954973, "learning_rate": 2.2323226427194384e-06, "loss": 0.019, "step": 192405 }, { "epoch": 0.802838998255877, "grad_norm": 0.7268494594003201, "learning_rate": 2.2322936376164366e-06, "loss": 0.0231, "step": 192410 }, { "epoch": 0.8028598609708673, "grad_norm": 0.5361530716981139, "learning_rate": 2.2322646336440166e-06, "loss": 0.0166, "step": 192415 }, { "epoch": 0.8028807236858576, "grad_norm": 1.5359529866765758, "learning_rate": 2.2322356308021045e-06, "loss": 0.0293, "step": 192420 }, { "epoch": 0.8029015864008479, "grad_norm": 0.44046177380417695, "learning_rate": 2.232206629090627e-06, "loss": 0.018, "step": 192425 }, { "epoch": 0.8029224491158381, "grad_norm": 1.2728401813445782, "learning_rate": 2.23217762850951e-06, "loss": 0.0222, "step": 192430 }, { "epoch": 0.8029433118308285, "grad_norm": 0.7210361945982464, "learning_rate": 2.232148629058681e-06, "loss": 0.0185, "step": 192435 }, { "epoch": 0.8029641745458187, "grad_norm": 0.45689407331930526, "learning_rate": 2.2321196307380664e-06, "loss": 0.0159, "step": 192440 }, { "epoch": 0.8029850372608089, "grad_norm": 0.7161809936850475, "learning_rate": 2.232090633547593e-06, "loss": 0.0224, "step": 192445 }, { "epoch": 0.8030058999757993, "grad_norm": 0.7941688254174778, "learning_rate": 2.232061637487186e-06, "loss": 0.0255, "step": 192450 }, { "epoch": 0.8030267626907895, "grad_norm": 0.4779237826101347, "learning_rate": 2.232032642556774e-06, "loss": 0.0241, "step": 192455 }, { "epoch": 0.8030476254057798, "grad_norm": 1.3122113697725222, "learning_rate": 2.2320036487562815e-06, "loss": 0.0361, "step": 192460 }, { "epoch": 0.80306848812077, "grad_norm": 0.7090895893835101, "learning_rate": 2.231974656085637e-06, "loss": 0.0176, "step": 192465 }, { "epoch": 0.8030893508357604, "grad_norm": 0.5514143962106918, "learning_rate": 2.2319456645447657e-06, "loss": 0.0174, "step": 192470 }, { "epoch": 0.8031102135507506, "grad_norm": 1.0516085991809134, "learning_rate": 2.2319166741335956e-06, "loss": 0.0179, "step": 192475 }, { "epoch": 0.8031310762657409, "grad_norm": 0.4764466277238955, "learning_rate": 2.2318876848520524e-06, "loss": 0.012, "step": 192480 }, { "epoch": 0.8031519389807312, "grad_norm": 0.885165106563344, "learning_rate": 2.231858696700063e-06, "loss": 0.0282, "step": 192485 }, { "epoch": 0.8031728016957215, "grad_norm": 1.7223456127005214, "learning_rate": 2.2318297096775536e-06, "loss": 0.0228, "step": 192490 }, { "epoch": 0.8031936644107117, "grad_norm": 1.270492858224916, "learning_rate": 2.2318007237844518e-06, "loss": 0.0263, "step": 192495 }, { "epoch": 0.8032145271257021, "grad_norm": 0.9389284119923825, "learning_rate": 2.231771739020683e-06, "loss": 0.0256, "step": 192500 }, { "epoch": 0.8032353898406923, "grad_norm": 0.5590005489119287, "learning_rate": 2.231742755386175e-06, "loss": 0.0211, "step": 192505 }, { "epoch": 0.8032562525556826, "grad_norm": 0.8417060498859941, "learning_rate": 2.2317137728808543e-06, "loss": 0.0177, "step": 192510 }, { "epoch": 0.8032771152706729, "grad_norm": 0.3487173890526415, "learning_rate": 2.2316847915046467e-06, "loss": 0.0185, "step": 192515 }, { "epoch": 0.8032979779856632, "grad_norm": 0.5507711639217746, "learning_rate": 2.23165581125748e-06, "loss": 0.019, "step": 192520 }, { "epoch": 0.8033188407006534, "grad_norm": 0.8229770465707381, "learning_rate": 2.2316268321392805e-06, "loss": 0.0247, "step": 192525 }, { "epoch": 0.8033397034156436, "grad_norm": 0.47941023867004084, "learning_rate": 2.2315978541499744e-06, "loss": 0.0208, "step": 192530 }, { "epoch": 0.803360566130634, "grad_norm": 0.6839662458565273, "learning_rate": 2.231568877289489e-06, "loss": 0.0176, "step": 192535 }, { "epoch": 0.8033814288456242, "grad_norm": 0.9350039964370548, "learning_rate": 2.231539901557751e-06, "loss": 0.0196, "step": 192540 }, { "epoch": 0.8034022915606145, "grad_norm": 1.0384632467642194, "learning_rate": 2.2315109269546868e-06, "loss": 0.0266, "step": 192545 }, { "epoch": 0.8034231542756048, "grad_norm": 0.6783431381754099, "learning_rate": 2.231481953480224e-06, "loss": 0.0324, "step": 192550 }, { "epoch": 0.8034440169905951, "grad_norm": 0.48944624426213834, "learning_rate": 2.2314529811342875e-06, "loss": 0.0156, "step": 192555 }, { "epoch": 0.8034648797055853, "grad_norm": 0.6286149246805561, "learning_rate": 2.2314240099168053e-06, "loss": 0.0245, "step": 192560 }, { "epoch": 0.8034857424205757, "grad_norm": 0.6808556725962998, "learning_rate": 2.2313950398277048e-06, "loss": 0.0229, "step": 192565 }, { "epoch": 0.8035066051355659, "grad_norm": 0.7822250886216068, "learning_rate": 2.2313660708669114e-06, "loss": 0.021, "step": 192570 }, { "epoch": 0.8035274678505562, "grad_norm": 0.5304110443787542, "learning_rate": 2.2313371030343524e-06, "loss": 0.0144, "step": 192575 }, { "epoch": 0.8035483305655465, "grad_norm": 1.113468009432963, "learning_rate": 2.231308136329955e-06, "loss": 0.0266, "step": 192580 }, { "epoch": 0.8035691932805368, "grad_norm": 0.7658772199949622, "learning_rate": 2.2312791707536448e-06, "loss": 0.0148, "step": 192585 }, { "epoch": 0.803590055995527, "grad_norm": 1.0546571438268577, "learning_rate": 2.23125020630535e-06, "loss": 0.0182, "step": 192590 }, { "epoch": 0.8036109187105173, "grad_norm": 0.6423661383287347, "learning_rate": 2.2312212429849965e-06, "loss": 0.0167, "step": 192595 }, { "epoch": 0.8036317814255076, "grad_norm": 0.5711315080292162, "learning_rate": 2.2311922807925114e-06, "loss": 0.0261, "step": 192600 }, { "epoch": 0.8036526441404979, "grad_norm": 0.9847521012596309, "learning_rate": 2.231163319727822e-06, "loss": 0.0199, "step": 192605 }, { "epoch": 0.8036735068554881, "grad_norm": 0.5208704999392265, "learning_rate": 2.2311343597908534e-06, "loss": 0.0171, "step": 192610 }, { "epoch": 0.8036943695704785, "grad_norm": 0.9323152683396703, "learning_rate": 2.231105400981534e-06, "loss": 0.0207, "step": 192615 }, { "epoch": 0.8037152322854687, "grad_norm": 0.7351595116558032, "learning_rate": 2.2310764432997903e-06, "loss": 0.0257, "step": 192620 }, { "epoch": 0.803736095000459, "grad_norm": 6.921774298959163, "learning_rate": 2.231047486745549e-06, "loss": 0.0211, "step": 192625 }, { "epoch": 0.8037569577154493, "grad_norm": 0.9378255652258144, "learning_rate": 2.231018531318737e-06, "loss": 0.0221, "step": 192630 }, { "epoch": 0.8037778204304395, "grad_norm": 0.3668193357814152, "learning_rate": 2.230989577019281e-06, "loss": 0.0281, "step": 192635 }, { "epoch": 0.8037986831454298, "grad_norm": 0.49452179141327685, "learning_rate": 2.2309606238471082e-06, "loss": 0.0203, "step": 192640 }, { "epoch": 0.80381954586042, "grad_norm": 0.8206712177560515, "learning_rate": 2.230931671802145e-06, "loss": 0.0152, "step": 192645 }, { "epoch": 0.8038404085754104, "grad_norm": 0.5811779749613438, "learning_rate": 2.2309027208843183e-06, "loss": 0.0198, "step": 192650 }, { "epoch": 0.8038612712904006, "grad_norm": 0.6930712679720281, "learning_rate": 2.230873771093555e-06, "loss": 0.0199, "step": 192655 }, { "epoch": 0.8038821340053909, "grad_norm": 0.5511266233645788, "learning_rate": 2.2308448224297825e-06, "loss": 0.011, "step": 192660 }, { "epoch": 0.8039029967203812, "grad_norm": 0.9252690970524937, "learning_rate": 2.2308158748929265e-06, "loss": 0.0249, "step": 192665 }, { "epoch": 0.8039238594353715, "grad_norm": 1.7833502637510938, "learning_rate": 2.230786928482915e-06, "loss": 0.0198, "step": 192670 }, { "epoch": 0.8039447221503617, "grad_norm": 0.3164609890057039, "learning_rate": 2.230757983199675e-06, "loss": 0.0244, "step": 192675 }, { "epoch": 0.8039655848653521, "grad_norm": 0.4365039415907775, "learning_rate": 2.2307290390431327e-06, "loss": 0.0189, "step": 192680 }, { "epoch": 0.8039864475803423, "grad_norm": 0.6626338021863251, "learning_rate": 2.2307000960132154e-06, "loss": 0.0185, "step": 192685 }, { "epoch": 0.8040073102953326, "grad_norm": 0.23832619524224205, "learning_rate": 2.230671154109849e-06, "loss": 0.0168, "step": 192690 }, { "epoch": 0.8040281730103229, "grad_norm": 0.9617685605738598, "learning_rate": 2.2306422133329623e-06, "loss": 0.0255, "step": 192695 }, { "epoch": 0.8040490357253132, "grad_norm": 0.478716930368638, "learning_rate": 2.2306132736824808e-06, "loss": 0.0176, "step": 192700 }, { "epoch": 0.8040698984403034, "grad_norm": 0.7442175884381705, "learning_rate": 2.2305843351583315e-06, "loss": 0.017, "step": 192705 }, { "epoch": 0.8040907611552937, "grad_norm": 0.7131945916144851, "learning_rate": 2.2305553977604422e-06, "loss": 0.0254, "step": 192710 }, { "epoch": 0.804111623870284, "grad_norm": 0.7415626179671682, "learning_rate": 2.2305264614887395e-06, "loss": 0.024, "step": 192715 }, { "epoch": 0.8041324865852743, "grad_norm": 0.851301366337257, "learning_rate": 2.2304975263431493e-06, "loss": 0.0229, "step": 192720 }, { "epoch": 0.8041533493002645, "grad_norm": 0.7657454408703571, "learning_rate": 2.2304685923236e-06, "loss": 0.0211, "step": 192725 }, { "epoch": 0.8041742120152549, "grad_norm": 0.7631062349611444, "learning_rate": 2.2304396594300186e-06, "loss": 0.0185, "step": 192730 }, { "epoch": 0.8041950747302451, "grad_norm": 0.5943592193373536, "learning_rate": 2.2304107276623305e-06, "loss": 0.022, "step": 192735 }, { "epoch": 0.8042159374452353, "grad_norm": 0.6141615919007292, "learning_rate": 2.2303817970204645e-06, "loss": 0.0192, "step": 192740 }, { "epoch": 0.8042368001602257, "grad_norm": 0.6093743656081936, "learning_rate": 2.230352867504346e-06, "loss": 0.0217, "step": 192745 }, { "epoch": 0.8042576628752159, "grad_norm": 0.6734374116282469, "learning_rate": 2.2303239391139032e-06, "loss": 0.0197, "step": 192750 }, { "epoch": 0.8042785255902062, "grad_norm": 0.6288119356167934, "learning_rate": 2.2302950118490627e-06, "loss": 0.0225, "step": 192755 }, { "epoch": 0.8042993883051965, "grad_norm": 0.6909433786567566, "learning_rate": 2.230266085709751e-06, "loss": 0.0219, "step": 192760 }, { "epoch": 0.8043202510201868, "grad_norm": 0.2983504262470639, "learning_rate": 2.230237160695896e-06, "loss": 0.0152, "step": 192765 }, { "epoch": 0.804341113735177, "grad_norm": 1.1524178641730303, "learning_rate": 2.2302082368074245e-06, "loss": 0.0288, "step": 192770 }, { "epoch": 0.8043619764501673, "grad_norm": 0.4209839697569889, "learning_rate": 2.2301793140442625e-06, "loss": 0.0195, "step": 192775 }, { "epoch": 0.8043828391651576, "grad_norm": 0.5957766608887702, "learning_rate": 2.2301503924063385e-06, "loss": 0.0139, "step": 192780 }, { "epoch": 0.8044037018801479, "grad_norm": 0.4760400026373919, "learning_rate": 2.2301214718935788e-06, "loss": 0.0187, "step": 192785 }, { "epoch": 0.8044245645951381, "grad_norm": 1.0498514815163167, "learning_rate": 2.23009255250591e-06, "loss": 0.0248, "step": 192790 }, { "epoch": 0.8044454273101285, "grad_norm": 0.7017034861285117, "learning_rate": 2.2300636342432603e-06, "loss": 0.0217, "step": 192795 }, { "epoch": 0.8044662900251187, "grad_norm": 0.594798593901818, "learning_rate": 2.230034717105556e-06, "loss": 0.0227, "step": 192800 }, { "epoch": 0.804487152740109, "grad_norm": 1.0993250082817758, "learning_rate": 2.230005801092724e-06, "loss": 0.0223, "step": 192805 }, { "epoch": 0.8045080154550993, "grad_norm": 0.683901682111317, "learning_rate": 2.229976886204692e-06, "loss": 0.0199, "step": 192810 }, { "epoch": 0.8045288781700896, "grad_norm": 0.2767439700696519, "learning_rate": 2.2299479724413867e-06, "loss": 0.0217, "step": 192815 }, { "epoch": 0.8045497408850798, "grad_norm": 0.7060749671160352, "learning_rate": 2.2299190598027355e-06, "loss": 0.0211, "step": 192820 }, { "epoch": 0.80457060360007, "grad_norm": 0.46014563665725056, "learning_rate": 2.229890148288665e-06, "loss": 0.02, "step": 192825 }, { "epoch": 0.8045914663150604, "grad_norm": 0.6379132813502647, "learning_rate": 2.229861237899102e-06, "loss": 0.0179, "step": 192830 }, { "epoch": 0.8046123290300506, "grad_norm": 0.5793514695408252, "learning_rate": 2.2298323286339747e-06, "loss": 0.0223, "step": 192835 }, { "epoch": 0.8046331917450409, "grad_norm": 0.7781848224524724, "learning_rate": 2.22980342049321e-06, "loss": 0.0179, "step": 192840 }, { "epoch": 0.8046540544600312, "grad_norm": 0.9126050541366729, "learning_rate": 2.229774513476734e-06, "loss": 0.0292, "step": 192845 }, { "epoch": 0.8046749171750215, "grad_norm": 0.32961393824377017, "learning_rate": 2.2297456075844747e-06, "loss": 0.0204, "step": 192850 }, { "epoch": 0.8046957798900117, "grad_norm": 0.7405009460083134, "learning_rate": 2.229716702816359e-06, "loss": 0.0216, "step": 192855 }, { "epoch": 0.8047166426050021, "grad_norm": 1.0073343236497685, "learning_rate": 2.2296877991723144e-06, "loss": 0.0205, "step": 192860 }, { "epoch": 0.8047375053199923, "grad_norm": 1.5871924240188595, "learning_rate": 2.229658896652268e-06, "loss": 0.0263, "step": 192865 }, { "epoch": 0.8047583680349826, "grad_norm": 0.8924122872910525, "learning_rate": 2.229629995256146e-06, "loss": 0.0191, "step": 192870 }, { "epoch": 0.8047792307499729, "grad_norm": 0.541276042709693, "learning_rate": 2.2296010949838764e-06, "loss": 0.0199, "step": 192875 }, { "epoch": 0.8048000934649632, "grad_norm": 0.9241177118822927, "learning_rate": 2.229572195835386e-06, "loss": 0.021, "step": 192880 }, { "epoch": 0.8048209561799534, "grad_norm": 0.36608546309258866, "learning_rate": 2.229543297810603e-06, "loss": 0.0213, "step": 192885 }, { "epoch": 0.8048418188949437, "grad_norm": 0.6034712251769867, "learning_rate": 2.2295144009094526e-06, "loss": 0.0228, "step": 192890 }, { "epoch": 0.804862681609934, "grad_norm": 0.41249161681531127, "learning_rate": 2.229485505131864e-06, "loss": 0.0174, "step": 192895 }, { "epoch": 0.8048835443249243, "grad_norm": 0.2705214274314344, "learning_rate": 2.2294566104777634e-06, "loss": 0.0185, "step": 192900 }, { "epoch": 0.8049044070399145, "grad_norm": 0.7322310031835271, "learning_rate": 2.229427716947078e-06, "loss": 0.0195, "step": 192905 }, { "epoch": 0.8049252697549049, "grad_norm": 0.8598279518741786, "learning_rate": 2.229398824539735e-06, "loss": 0.0192, "step": 192910 }, { "epoch": 0.8049461324698951, "grad_norm": 0.6178225732717428, "learning_rate": 2.229369933255662e-06, "loss": 0.0223, "step": 192915 }, { "epoch": 0.8049669951848853, "grad_norm": 0.8185418680280251, "learning_rate": 2.229341043094786e-06, "loss": 0.016, "step": 192920 }, { "epoch": 0.8049878578998757, "grad_norm": 0.8851233574554175, "learning_rate": 2.2293121540570335e-06, "loss": 0.0271, "step": 192925 }, { "epoch": 0.805008720614866, "grad_norm": 0.4042192209830285, "learning_rate": 2.229283266142333e-06, "loss": 0.0173, "step": 192930 }, { "epoch": 0.8050295833298562, "grad_norm": 0.41069754570189365, "learning_rate": 2.2292543793506113e-06, "loss": 0.0193, "step": 192935 }, { "epoch": 0.8050504460448465, "grad_norm": 0.6461713593813024, "learning_rate": 2.229225493681795e-06, "loss": 0.0186, "step": 192940 }, { "epoch": 0.8050713087598368, "grad_norm": 0.5704548083882617, "learning_rate": 2.229196609135812e-06, "loss": 0.0217, "step": 192945 }, { "epoch": 0.805092171474827, "grad_norm": 0.8591086784977966, "learning_rate": 2.2291677257125897e-06, "loss": 0.0337, "step": 192950 }, { "epoch": 0.8051130341898173, "grad_norm": 0.3948082849544071, "learning_rate": 2.229138843412055e-06, "loss": 0.0172, "step": 192955 }, { "epoch": 0.8051338969048076, "grad_norm": 0.5455222031311003, "learning_rate": 2.229109962234135e-06, "loss": 0.0309, "step": 192960 }, { "epoch": 0.8051547596197979, "grad_norm": 0.5337860923769195, "learning_rate": 2.229081082178757e-06, "loss": 0.0176, "step": 192965 }, { "epoch": 0.8051756223347881, "grad_norm": 0.3561729440479192, "learning_rate": 2.2290522032458495e-06, "loss": 0.0237, "step": 192970 }, { "epoch": 0.8051964850497785, "grad_norm": 0.6615318551488436, "learning_rate": 2.2290233254353385e-06, "loss": 0.0257, "step": 192975 }, { "epoch": 0.8052173477647687, "grad_norm": 1.1008796247341042, "learning_rate": 2.2289944487471508e-06, "loss": 0.0221, "step": 192980 }, { "epoch": 0.805238210479759, "grad_norm": 0.34409893946485925, "learning_rate": 2.2289655731812148e-06, "loss": 0.0173, "step": 192985 }, { "epoch": 0.8052590731947493, "grad_norm": 0.9225193178522161, "learning_rate": 2.228936698737458e-06, "loss": 0.0177, "step": 192990 }, { "epoch": 0.8052799359097396, "grad_norm": 0.8131406410083228, "learning_rate": 2.2289078254158066e-06, "loss": 0.0257, "step": 192995 }, { "epoch": 0.8053007986247298, "grad_norm": 0.7143638792967373, "learning_rate": 2.2288789532161892e-06, "loss": 0.0201, "step": 193000 }, { "epoch": 0.80532166133972, "grad_norm": 0.685825291787454, "learning_rate": 2.228850082138532e-06, "loss": 0.0235, "step": 193005 }, { "epoch": 0.8053425240547104, "grad_norm": 0.7208845494408032, "learning_rate": 2.228821212182763e-06, "loss": 0.023, "step": 193010 }, { "epoch": 0.8053633867697007, "grad_norm": 0.9380708633184618, "learning_rate": 2.228792343348809e-06, "loss": 0.019, "step": 193015 }, { "epoch": 0.8053842494846909, "grad_norm": 1.3839374587959374, "learning_rate": 2.2287634756365985e-06, "loss": 0.0273, "step": 193020 }, { "epoch": 0.8054051121996813, "grad_norm": 0.5715272040332067, "learning_rate": 2.2287346090460574e-06, "loss": 0.0267, "step": 193025 }, { "epoch": 0.8054259749146715, "grad_norm": 0.7317642799613723, "learning_rate": 2.228705743577114e-06, "loss": 0.0211, "step": 193030 }, { "epoch": 0.8054468376296617, "grad_norm": 0.8788388862039617, "learning_rate": 2.228676879229695e-06, "loss": 0.0214, "step": 193035 }, { "epoch": 0.8054677003446521, "grad_norm": 1.21269807966801, "learning_rate": 2.228648016003729e-06, "loss": 0.029, "step": 193040 }, { "epoch": 0.8054885630596423, "grad_norm": 0.36550112583196287, "learning_rate": 2.228619153899142e-06, "loss": 0.0178, "step": 193045 }, { "epoch": 0.8055094257746326, "grad_norm": 1.0667820078559258, "learning_rate": 2.228590292915862e-06, "loss": 0.0217, "step": 193050 }, { "epoch": 0.8055302884896229, "grad_norm": 1.3167086618791564, "learning_rate": 2.2285614330538166e-06, "loss": 0.0203, "step": 193055 }, { "epoch": 0.8055511512046132, "grad_norm": 0.6543613975705015, "learning_rate": 2.2285325743129322e-06, "loss": 0.0172, "step": 193060 }, { "epoch": 0.8055720139196034, "grad_norm": 0.6015787684864015, "learning_rate": 2.228503716693138e-06, "loss": 0.0164, "step": 193065 }, { "epoch": 0.8055928766345937, "grad_norm": 0.5252702515216017, "learning_rate": 2.22847486019436e-06, "loss": 0.0193, "step": 193070 }, { "epoch": 0.805613739349584, "grad_norm": 1.382762188094136, "learning_rate": 2.228446004816526e-06, "loss": 0.0278, "step": 193075 }, { "epoch": 0.8056346020645743, "grad_norm": 1.2968809068063392, "learning_rate": 2.228417150559563e-06, "loss": 0.0282, "step": 193080 }, { "epoch": 0.8056554647795645, "grad_norm": 0.37271253217713124, "learning_rate": 2.228388297423399e-06, "loss": 0.0262, "step": 193085 }, { "epoch": 0.8056763274945549, "grad_norm": 0.758172981266318, "learning_rate": 2.228359445407962e-06, "loss": 0.0217, "step": 193090 }, { "epoch": 0.8056971902095451, "grad_norm": 0.8241643928798984, "learning_rate": 2.228330594513178e-06, "loss": 0.0187, "step": 193095 }, { "epoch": 0.8057180529245354, "grad_norm": 0.8248133538285862, "learning_rate": 2.2283017447389756e-06, "loss": 0.0199, "step": 193100 }, { "epoch": 0.8057389156395257, "grad_norm": 0.44250675832070896, "learning_rate": 2.2282728960852817e-06, "loss": 0.0242, "step": 193105 }, { "epoch": 0.805759778354516, "grad_norm": 0.20690135158594322, "learning_rate": 2.228244048552024e-06, "loss": 0.0143, "step": 193110 }, { "epoch": 0.8057806410695062, "grad_norm": 0.7210189603937662, "learning_rate": 2.22821520213913e-06, "loss": 0.0143, "step": 193115 }, { "epoch": 0.8058015037844966, "grad_norm": 0.5815289464771191, "learning_rate": 2.2281863568465266e-06, "loss": 0.0249, "step": 193120 }, { "epoch": 0.8058223664994868, "grad_norm": 0.40759284333448986, "learning_rate": 2.2281575126741427e-06, "loss": 0.0212, "step": 193125 }, { "epoch": 0.805843229214477, "grad_norm": 0.9363717583256275, "learning_rate": 2.228128669621904e-06, "loss": 0.0174, "step": 193130 }, { "epoch": 0.8058640919294673, "grad_norm": 0.7341699862161621, "learning_rate": 2.2280998276897396e-06, "loss": 0.0221, "step": 193135 }, { "epoch": 0.8058849546444576, "grad_norm": 0.4710188688814468, "learning_rate": 2.2280709868775756e-06, "loss": 0.0139, "step": 193140 }, { "epoch": 0.8059058173594479, "grad_norm": 0.4983939792518933, "learning_rate": 2.2280421471853408e-06, "loss": 0.0153, "step": 193145 }, { "epoch": 0.8059266800744381, "grad_norm": 0.49199313752342455, "learning_rate": 2.2280133086129614e-06, "loss": 0.014, "step": 193150 }, { "epoch": 0.8059475427894285, "grad_norm": 0.792965026005731, "learning_rate": 2.2279844711603664e-06, "loss": 0.0232, "step": 193155 }, { "epoch": 0.8059684055044187, "grad_norm": 1.0956809800634533, "learning_rate": 2.2279556348274825e-06, "loss": 0.0232, "step": 193160 }, { "epoch": 0.805989268219409, "grad_norm": 0.6385901668538276, "learning_rate": 2.227926799614237e-06, "loss": 0.0264, "step": 193165 }, { "epoch": 0.8060101309343993, "grad_norm": 0.6611192230184547, "learning_rate": 2.227897965520558e-06, "loss": 0.0213, "step": 193170 }, { "epoch": 0.8060309936493896, "grad_norm": 1.1550664991773332, "learning_rate": 2.227869132546373e-06, "loss": 0.0262, "step": 193175 }, { "epoch": 0.8060518563643798, "grad_norm": 0.6358636646484083, "learning_rate": 2.227840300691609e-06, "loss": 0.0201, "step": 193180 }, { "epoch": 0.8060727190793701, "grad_norm": 0.717432561523934, "learning_rate": 2.2278114699561944e-06, "loss": 0.0179, "step": 193185 }, { "epoch": 0.8060935817943604, "grad_norm": 0.17790884116510094, "learning_rate": 2.2277826403400556e-06, "loss": 0.0116, "step": 193190 }, { "epoch": 0.8061144445093507, "grad_norm": 1.133070024164959, "learning_rate": 2.2277538118431217e-06, "loss": 0.017, "step": 193195 }, { "epoch": 0.8061353072243409, "grad_norm": 0.44845731771278724, "learning_rate": 2.227724984465319e-06, "loss": 0.0191, "step": 193200 }, { "epoch": 0.8061561699393313, "grad_norm": 0.37027961331736065, "learning_rate": 2.2276961582065758e-06, "loss": 0.0194, "step": 193205 }, { "epoch": 0.8061770326543215, "grad_norm": 0.7806355920073503, "learning_rate": 2.2276673330668197e-06, "loss": 0.0204, "step": 193210 }, { "epoch": 0.8061978953693117, "grad_norm": 0.5016660490479714, "learning_rate": 2.227638509045978e-06, "loss": 0.0244, "step": 193215 }, { "epoch": 0.8062187580843021, "grad_norm": 0.34697226054263786, "learning_rate": 2.227609686143978e-06, "loss": 0.012, "step": 193220 }, { "epoch": 0.8062396207992923, "grad_norm": 0.4233761434241946, "learning_rate": 2.2275808643607485e-06, "loss": 0.0167, "step": 193225 }, { "epoch": 0.8062604835142826, "grad_norm": 0.40573992318740654, "learning_rate": 2.227552043696216e-06, "loss": 0.0239, "step": 193230 }, { "epoch": 0.8062813462292729, "grad_norm": 0.7786883601336827, "learning_rate": 2.227523224150308e-06, "loss": 0.0243, "step": 193235 }, { "epoch": 0.8063022089442632, "grad_norm": 0.8357205265960775, "learning_rate": 2.2274944057229534e-06, "loss": 0.0252, "step": 193240 }, { "epoch": 0.8063230716592534, "grad_norm": 0.5135596980272228, "learning_rate": 2.227465588414079e-06, "loss": 0.018, "step": 193245 }, { "epoch": 0.8063439343742437, "grad_norm": 0.7511056099010429, "learning_rate": 2.2274367722236117e-06, "loss": 0.0281, "step": 193250 }, { "epoch": 0.806364797089234, "grad_norm": 0.5131917502184465, "learning_rate": 2.2274079571514804e-06, "loss": 0.0156, "step": 193255 }, { "epoch": 0.8063856598042243, "grad_norm": 0.8062121836912097, "learning_rate": 2.227379143197613e-06, "loss": 0.0249, "step": 193260 }, { "epoch": 0.8064065225192145, "grad_norm": 0.6679959462462814, "learning_rate": 2.227350330361936e-06, "loss": 0.0253, "step": 193265 }, { "epoch": 0.8064273852342049, "grad_norm": 0.7524660057698935, "learning_rate": 2.2273215186443775e-06, "loss": 0.0215, "step": 193270 }, { "epoch": 0.8064482479491951, "grad_norm": 0.5221451722186933, "learning_rate": 2.2272927080448657e-06, "loss": 0.0216, "step": 193275 }, { "epoch": 0.8064691106641854, "grad_norm": 0.6232088071353069, "learning_rate": 2.2272638985633277e-06, "loss": 0.0219, "step": 193280 }, { "epoch": 0.8064899733791757, "grad_norm": 0.4106704572186921, "learning_rate": 2.227235090199691e-06, "loss": 0.0203, "step": 193285 }, { "epoch": 0.806510836094166, "grad_norm": 0.3330276926520844, "learning_rate": 2.227206282953884e-06, "loss": 0.0218, "step": 193290 }, { "epoch": 0.8065316988091562, "grad_norm": 0.5410426930710004, "learning_rate": 2.2271774768258343e-06, "loss": 0.0211, "step": 193295 }, { "epoch": 0.8065525615241466, "grad_norm": 0.6517620604320278, "learning_rate": 2.227148671815469e-06, "loss": 0.0216, "step": 193300 }, { "epoch": 0.8065734242391368, "grad_norm": 0.2866835552082378, "learning_rate": 2.227119867922716e-06, "loss": 0.0156, "step": 193305 }, { "epoch": 0.806594286954127, "grad_norm": 0.7934952705192099, "learning_rate": 2.2270910651475043e-06, "loss": 0.0217, "step": 193310 }, { "epoch": 0.8066151496691173, "grad_norm": 0.5341471761705883, "learning_rate": 2.22706226348976e-06, "loss": 0.0192, "step": 193315 }, { "epoch": 0.8066360123841076, "grad_norm": 0.5182668308982376, "learning_rate": 2.2270334629494113e-06, "loss": 0.0186, "step": 193320 }, { "epoch": 0.8066568750990979, "grad_norm": 1.131749510079443, "learning_rate": 2.2270046635263866e-06, "loss": 0.0223, "step": 193325 }, { "epoch": 0.8066777378140881, "grad_norm": 0.9427747805136985, "learning_rate": 2.226975865220613e-06, "loss": 0.0221, "step": 193330 }, { "epoch": 0.8066986005290785, "grad_norm": 0.5848573414513687, "learning_rate": 2.226947068032018e-06, "loss": 0.0175, "step": 193335 }, { "epoch": 0.8067194632440687, "grad_norm": 0.7456384072730169, "learning_rate": 2.22691827196053e-06, "loss": 0.0195, "step": 193340 }, { "epoch": 0.806740325959059, "grad_norm": 0.8440977143517849, "learning_rate": 2.2268894770060767e-06, "loss": 0.0165, "step": 193345 }, { "epoch": 0.8067611886740493, "grad_norm": 3.3265073925230224, "learning_rate": 2.226860683168586e-06, "loss": 0.0227, "step": 193350 }, { "epoch": 0.8067820513890396, "grad_norm": 1.2159825439008836, "learning_rate": 2.226831890447985e-06, "loss": 0.0216, "step": 193355 }, { "epoch": 0.8068029141040298, "grad_norm": 0.8474292801878297, "learning_rate": 2.2268030988442018e-06, "loss": 0.0247, "step": 193360 }, { "epoch": 0.8068237768190201, "grad_norm": 0.21153235140656138, "learning_rate": 2.226774308357165e-06, "loss": 0.0134, "step": 193365 }, { "epoch": 0.8068446395340104, "grad_norm": 0.826427328111555, "learning_rate": 2.2267455189868014e-06, "loss": 0.0248, "step": 193370 }, { "epoch": 0.8068655022490007, "grad_norm": 0.8529523133003969, "learning_rate": 2.2267167307330394e-06, "loss": 0.0301, "step": 193375 }, { "epoch": 0.8068863649639909, "grad_norm": 0.6600600159904636, "learning_rate": 2.226687943595806e-06, "loss": 0.0265, "step": 193380 }, { "epoch": 0.8069072276789813, "grad_norm": 0.5180448889207617, "learning_rate": 2.22665915757503e-06, "loss": 0.0239, "step": 193385 }, { "epoch": 0.8069280903939715, "grad_norm": 1.3342001417949245, "learning_rate": 2.226630372670639e-06, "loss": 0.0252, "step": 193390 }, { "epoch": 0.8069489531089618, "grad_norm": 0.9325580798400651, "learning_rate": 2.2266015888825603e-06, "loss": 0.0271, "step": 193395 }, { "epoch": 0.8069698158239521, "grad_norm": 0.7027668810459301, "learning_rate": 2.2265728062107226e-06, "loss": 0.0217, "step": 193400 }, { "epoch": 0.8069906785389424, "grad_norm": 0.5869484358187264, "learning_rate": 2.2265440246550527e-06, "loss": 0.0261, "step": 193405 }, { "epoch": 0.8070115412539326, "grad_norm": 0.8010635377232622, "learning_rate": 2.22651524421548e-06, "loss": 0.0209, "step": 193410 }, { "epoch": 0.807032403968923, "grad_norm": 1.2726820236313672, "learning_rate": 2.2264864648919306e-06, "loss": 0.0151, "step": 193415 }, { "epoch": 0.8070532666839132, "grad_norm": 1.0752771248675659, "learning_rate": 2.2264576866843334e-06, "loss": 0.0295, "step": 193420 }, { "epoch": 0.8070741293989034, "grad_norm": 0.7038526313581036, "learning_rate": 2.2264289095926166e-06, "loss": 0.021, "step": 193425 }, { "epoch": 0.8070949921138937, "grad_norm": 0.4319727615480005, "learning_rate": 2.226400133616707e-06, "loss": 0.017, "step": 193430 }, { "epoch": 0.807115854828884, "grad_norm": 0.8853628742114935, "learning_rate": 2.2263713587565335e-06, "loss": 0.0277, "step": 193435 }, { "epoch": 0.8071367175438743, "grad_norm": 0.4417696699644571, "learning_rate": 2.226342585012023e-06, "loss": 0.0232, "step": 193440 }, { "epoch": 0.8071575802588645, "grad_norm": 1.0838366481176598, "learning_rate": 2.226313812383104e-06, "loss": 0.0175, "step": 193445 }, { "epoch": 0.8071784429738549, "grad_norm": 0.4810121571885471, "learning_rate": 2.226285040869705e-06, "loss": 0.0178, "step": 193450 }, { "epoch": 0.8071993056888451, "grad_norm": 0.7163300066647724, "learning_rate": 2.2262562704717526e-06, "loss": 0.0208, "step": 193455 }, { "epoch": 0.8072201684038354, "grad_norm": 0.5749447816454233, "learning_rate": 2.226227501189176e-06, "loss": 0.0184, "step": 193460 }, { "epoch": 0.8072410311188257, "grad_norm": 0.9434857763678745, "learning_rate": 2.2261987330219025e-06, "loss": 0.0239, "step": 193465 }, { "epoch": 0.807261893833816, "grad_norm": 0.6452617330136546, "learning_rate": 2.2261699659698605e-06, "loss": 0.0132, "step": 193470 }, { "epoch": 0.8072827565488062, "grad_norm": 0.6491001416982576, "learning_rate": 2.226141200032977e-06, "loss": 0.0191, "step": 193475 }, { "epoch": 0.8073036192637966, "grad_norm": 1.2401220908292085, "learning_rate": 2.2261124352111806e-06, "loss": 0.0145, "step": 193480 }, { "epoch": 0.8073244819787868, "grad_norm": 0.5305739465115001, "learning_rate": 2.226083671504399e-06, "loss": 0.0149, "step": 193485 }, { "epoch": 0.8073453446937771, "grad_norm": 0.7503180109231417, "learning_rate": 2.2260549089125608e-06, "loss": 0.0219, "step": 193490 }, { "epoch": 0.8073662074087673, "grad_norm": 0.9607340185260466, "learning_rate": 2.226026147435593e-06, "loss": 0.0244, "step": 193495 }, { "epoch": 0.8073870701237577, "grad_norm": 0.3552817662451202, "learning_rate": 2.225997387073424e-06, "loss": 0.0213, "step": 193500 }, { "epoch": 0.8074079328387479, "grad_norm": 0.7973666641709141, "learning_rate": 2.225968627825983e-06, "loss": 0.0184, "step": 193505 }, { "epoch": 0.8074287955537381, "grad_norm": 1.1960603764349569, "learning_rate": 2.225939869693196e-06, "loss": 0.0337, "step": 193510 }, { "epoch": 0.8074496582687285, "grad_norm": 0.40832213046695537, "learning_rate": 2.2259111126749915e-06, "loss": 0.0189, "step": 193515 }, { "epoch": 0.8074705209837187, "grad_norm": 0.601628362873926, "learning_rate": 2.2258823567712986e-06, "loss": 0.0247, "step": 193520 }, { "epoch": 0.807491383698709, "grad_norm": 0.4028373564840491, "learning_rate": 2.2258536019820444e-06, "loss": 0.0205, "step": 193525 }, { "epoch": 0.8075122464136993, "grad_norm": 0.3965379499612459, "learning_rate": 2.225824848307157e-06, "loss": 0.0197, "step": 193530 }, { "epoch": 0.8075331091286896, "grad_norm": 0.66813048801627, "learning_rate": 2.2257960957465645e-06, "loss": 0.033, "step": 193535 }, { "epoch": 0.8075539718436798, "grad_norm": 0.5876080093668898, "learning_rate": 2.225767344300195e-06, "loss": 0.0186, "step": 193540 }, { "epoch": 0.8075748345586701, "grad_norm": 0.9398555017504859, "learning_rate": 2.2257385939679764e-06, "loss": 0.0278, "step": 193545 }, { "epoch": 0.8075956972736604, "grad_norm": 0.31060661321496397, "learning_rate": 2.2257098447498366e-06, "loss": 0.0196, "step": 193550 }, { "epoch": 0.8076165599886507, "grad_norm": 0.38335579171103285, "learning_rate": 2.2256810966457044e-06, "loss": 0.0281, "step": 193555 }, { "epoch": 0.8076374227036409, "grad_norm": 0.6733329184785699, "learning_rate": 2.2256523496555073e-06, "loss": 0.0225, "step": 193560 }, { "epoch": 0.8076582854186313, "grad_norm": 0.8885608355961436, "learning_rate": 2.225623603779173e-06, "loss": 0.0254, "step": 193565 }, { "epoch": 0.8076791481336215, "grad_norm": 0.9442058037599842, "learning_rate": 2.2255948590166302e-06, "loss": 0.0168, "step": 193570 }, { "epoch": 0.8077000108486118, "grad_norm": 0.771821877372358, "learning_rate": 2.225566115367807e-06, "loss": 0.0252, "step": 193575 }, { "epoch": 0.8077208735636021, "grad_norm": 0.5978620245842382, "learning_rate": 2.2255373728326314e-06, "loss": 0.0207, "step": 193580 }, { "epoch": 0.8077417362785924, "grad_norm": 0.42023789951439494, "learning_rate": 2.225508631411031e-06, "loss": 0.0255, "step": 193585 }, { "epoch": 0.8077625989935826, "grad_norm": 0.7331839860767531, "learning_rate": 2.225479891102934e-06, "loss": 0.0225, "step": 193590 }, { "epoch": 0.807783461708573, "grad_norm": 0.8749547061122811, "learning_rate": 2.225451151908269e-06, "loss": 0.0281, "step": 193595 }, { "epoch": 0.8078043244235632, "grad_norm": 0.4598944642527869, "learning_rate": 2.2254224138269636e-06, "loss": 0.0166, "step": 193600 }, { "epoch": 0.8078251871385534, "grad_norm": 0.3929437020985048, "learning_rate": 2.2253936768589467e-06, "loss": 0.0218, "step": 193605 }, { "epoch": 0.8078460498535437, "grad_norm": 0.5565099169476301, "learning_rate": 2.2253649410041455e-06, "loss": 0.0278, "step": 193610 }, { "epoch": 0.807866912568534, "grad_norm": 0.772505021892819, "learning_rate": 2.2253362062624887e-06, "loss": 0.0202, "step": 193615 }, { "epoch": 0.8078877752835243, "grad_norm": 0.26526596055582347, "learning_rate": 2.225307472633904e-06, "loss": 0.0142, "step": 193620 }, { "epoch": 0.8079086379985145, "grad_norm": 0.7063431204654717, "learning_rate": 2.22527874011832e-06, "loss": 0.0214, "step": 193625 }, { "epoch": 0.8079295007135049, "grad_norm": 0.756788665762195, "learning_rate": 2.225250008715664e-06, "loss": 0.0274, "step": 193630 }, { "epoch": 0.8079503634284951, "grad_norm": 0.6030783062276314, "learning_rate": 2.2252212784258655e-06, "loss": 0.017, "step": 193635 }, { "epoch": 0.8079712261434854, "grad_norm": 0.6359567699902879, "learning_rate": 2.2251925492488518e-06, "loss": 0.0183, "step": 193640 }, { "epoch": 0.8079920888584757, "grad_norm": 0.620559130489759, "learning_rate": 2.2251638211845514e-06, "loss": 0.0125, "step": 193645 }, { "epoch": 0.808012951573466, "grad_norm": 0.8544025246370902, "learning_rate": 2.225135094232892e-06, "loss": 0.0212, "step": 193650 }, { "epoch": 0.8080338142884562, "grad_norm": 0.33900092310388985, "learning_rate": 2.2251063683938016e-06, "loss": 0.0268, "step": 193655 }, { "epoch": 0.8080546770034466, "grad_norm": 0.9313502495056644, "learning_rate": 2.2250776436672095e-06, "loss": 0.0218, "step": 193660 }, { "epoch": 0.8080755397184368, "grad_norm": 1.0637859257454474, "learning_rate": 2.225048920053043e-06, "loss": 0.0218, "step": 193665 }, { "epoch": 0.8080964024334271, "grad_norm": 0.5147655850191623, "learning_rate": 2.225020197551231e-06, "loss": 0.0177, "step": 193670 }, { "epoch": 0.8081172651484173, "grad_norm": 1.303295270165399, "learning_rate": 2.224991476161701e-06, "loss": 0.0221, "step": 193675 }, { "epoch": 0.8081381278634077, "grad_norm": 0.6962563598738811, "learning_rate": 2.2249627558843816e-06, "loss": 0.0152, "step": 193680 }, { "epoch": 0.8081589905783979, "grad_norm": 0.3120611769776075, "learning_rate": 2.2249340367192003e-06, "loss": 0.0154, "step": 193685 }, { "epoch": 0.8081798532933882, "grad_norm": 0.25750128141089157, "learning_rate": 2.2249053186660865e-06, "loss": 0.0203, "step": 193690 }, { "epoch": 0.8082007160083785, "grad_norm": 0.674700583601973, "learning_rate": 2.2248766017249677e-06, "loss": 0.0214, "step": 193695 }, { "epoch": 0.8082215787233687, "grad_norm": 0.728702014265626, "learning_rate": 2.224847885895772e-06, "loss": 0.0222, "step": 193700 }, { "epoch": 0.808242441438359, "grad_norm": 0.7668314682253806, "learning_rate": 2.2248191711784284e-06, "loss": 0.0249, "step": 193705 }, { "epoch": 0.8082633041533493, "grad_norm": 0.917652526024876, "learning_rate": 2.2247904575728643e-06, "loss": 0.0187, "step": 193710 }, { "epoch": 0.8082841668683396, "grad_norm": 0.6234252567375764, "learning_rate": 2.2247617450790086e-06, "loss": 0.0222, "step": 193715 }, { "epoch": 0.8083050295833298, "grad_norm": 1.0277482825443172, "learning_rate": 2.224733033696789e-06, "loss": 0.0208, "step": 193720 }, { "epoch": 0.8083258922983201, "grad_norm": 0.3818653308173749, "learning_rate": 2.2247043234261344e-06, "loss": 0.0182, "step": 193725 }, { "epoch": 0.8083467550133104, "grad_norm": 0.4648296728050058, "learning_rate": 2.224675614266973e-06, "loss": 0.0205, "step": 193730 }, { "epoch": 0.8083676177283007, "grad_norm": 0.9689471228631129, "learning_rate": 2.224646906219232e-06, "loss": 0.0277, "step": 193735 }, { "epoch": 0.8083884804432909, "grad_norm": 0.6512776881516323, "learning_rate": 2.2246181992828416e-06, "loss": 0.0313, "step": 193740 }, { "epoch": 0.8084093431582813, "grad_norm": 0.4357433130452026, "learning_rate": 2.224589493457728e-06, "loss": 0.0217, "step": 193745 }, { "epoch": 0.8084302058732715, "grad_norm": 1.0743137772543436, "learning_rate": 2.224560788743821e-06, "loss": 0.023, "step": 193750 }, { "epoch": 0.8084510685882618, "grad_norm": 0.7505411937021774, "learning_rate": 2.2245320851410486e-06, "loss": 0.0184, "step": 193755 }, { "epoch": 0.8084719313032521, "grad_norm": 0.6795110791295781, "learning_rate": 2.224503382649339e-06, "loss": 0.025, "step": 193760 }, { "epoch": 0.8084927940182424, "grad_norm": 0.3465100164349485, "learning_rate": 2.2244746812686204e-06, "loss": 0.0195, "step": 193765 }, { "epoch": 0.8085136567332326, "grad_norm": 0.5425591227068314, "learning_rate": 2.2244459809988206e-06, "loss": 0.0233, "step": 193770 }, { "epoch": 0.808534519448223, "grad_norm": 0.6351609931601496, "learning_rate": 2.2244172818398693e-06, "loss": 0.0213, "step": 193775 }, { "epoch": 0.8085553821632132, "grad_norm": 0.5485355427545264, "learning_rate": 2.2243885837916938e-06, "loss": 0.0212, "step": 193780 }, { "epoch": 0.8085762448782035, "grad_norm": 0.6681354144350129, "learning_rate": 2.2243598868542224e-06, "loss": 0.0213, "step": 193785 }, { "epoch": 0.8085971075931937, "grad_norm": 0.35165476462218465, "learning_rate": 2.2243311910273848e-06, "loss": 0.013, "step": 193790 }, { "epoch": 0.808617970308184, "grad_norm": 0.7412034918670912, "learning_rate": 2.2243024963111073e-06, "loss": 0.0141, "step": 193795 }, { "epoch": 0.8086388330231743, "grad_norm": 0.811131708172561, "learning_rate": 2.2242738027053198e-06, "loss": 0.0154, "step": 193800 }, { "epoch": 0.8086596957381645, "grad_norm": 0.4588233723504819, "learning_rate": 2.22424511020995e-06, "loss": 0.0207, "step": 193805 }, { "epoch": 0.8086805584531549, "grad_norm": 0.8687460908173155, "learning_rate": 2.224216418824927e-06, "loss": 0.0196, "step": 193810 }, { "epoch": 0.8087014211681451, "grad_norm": 0.6010476172739949, "learning_rate": 2.224187728550178e-06, "loss": 0.0248, "step": 193815 }, { "epoch": 0.8087222838831354, "grad_norm": 0.27585138841163287, "learning_rate": 2.2241590393856325e-06, "loss": 0.0125, "step": 193820 }, { "epoch": 0.8087431465981257, "grad_norm": 0.7738271546409102, "learning_rate": 2.224130351331219e-06, "loss": 0.0167, "step": 193825 }, { "epoch": 0.808764009313116, "grad_norm": 0.5424578862385706, "learning_rate": 2.224101664386864e-06, "loss": 0.0185, "step": 193830 }, { "epoch": 0.8087848720281062, "grad_norm": 0.6743444099629564, "learning_rate": 2.2240729785524985e-06, "loss": 0.0195, "step": 193835 }, { "epoch": 0.8088057347430966, "grad_norm": 0.751168571815708, "learning_rate": 2.224044293828049e-06, "loss": 0.0209, "step": 193840 }, { "epoch": 0.8088265974580868, "grad_norm": 0.5322502657079439, "learning_rate": 2.224015610213445e-06, "loss": 0.0149, "step": 193845 }, { "epoch": 0.8088474601730771, "grad_norm": 0.7000606963839321, "learning_rate": 2.2239869277086145e-06, "loss": 0.0236, "step": 193850 }, { "epoch": 0.8088683228880673, "grad_norm": 0.8016205300979506, "learning_rate": 2.223958246313486e-06, "loss": 0.0263, "step": 193855 }, { "epoch": 0.8088891856030577, "grad_norm": 0.467225921077544, "learning_rate": 2.223929566027988e-06, "loss": 0.0218, "step": 193860 }, { "epoch": 0.8089100483180479, "grad_norm": 0.4323512180256906, "learning_rate": 2.2239008868520487e-06, "loss": 0.0256, "step": 193865 }, { "epoch": 0.8089309110330382, "grad_norm": 0.74920801655483, "learning_rate": 2.2238722087855968e-06, "loss": 0.0192, "step": 193870 }, { "epoch": 0.8089517737480285, "grad_norm": 1.2988489873998061, "learning_rate": 2.2238435318285607e-06, "loss": 0.0239, "step": 193875 }, { "epoch": 0.8089726364630188, "grad_norm": 0.7033650014337346, "learning_rate": 2.2238148559808692e-06, "loss": 0.0245, "step": 193880 }, { "epoch": 0.808993499178009, "grad_norm": 0.4744471624470477, "learning_rate": 2.2237861812424504e-06, "loss": 0.0318, "step": 193885 }, { "epoch": 0.8090143618929994, "grad_norm": 0.7627755766924194, "learning_rate": 2.2237575076132327e-06, "loss": 0.0231, "step": 193890 }, { "epoch": 0.8090352246079896, "grad_norm": 3.0164374122345614, "learning_rate": 2.2237288350931453e-06, "loss": 0.0252, "step": 193895 }, { "epoch": 0.8090560873229798, "grad_norm": 0.6748668170742268, "learning_rate": 2.223700163682115e-06, "loss": 0.0251, "step": 193900 }, { "epoch": 0.8090769500379701, "grad_norm": 1.1655058968102028, "learning_rate": 2.223671493380073e-06, "loss": 0.0221, "step": 193905 }, { "epoch": 0.8090978127529604, "grad_norm": 0.564173810872939, "learning_rate": 2.223642824186945e-06, "loss": 0.0258, "step": 193910 }, { "epoch": 0.8091186754679507, "grad_norm": 0.6398536291210414, "learning_rate": 2.2236141561026615e-06, "loss": 0.0208, "step": 193915 }, { "epoch": 0.8091395381829409, "grad_norm": 0.3417577284610461, "learning_rate": 2.2235854891271498e-06, "loss": 0.026, "step": 193920 }, { "epoch": 0.8091604008979313, "grad_norm": 0.9083093623966352, "learning_rate": 2.223556823260339e-06, "loss": 0.0237, "step": 193925 }, { "epoch": 0.8091812636129215, "grad_norm": 0.5220720929128542, "learning_rate": 2.2235281585021582e-06, "loss": 0.0222, "step": 193930 }, { "epoch": 0.8092021263279118, "grad_norm": 1.098775974556269, "learning_rate": 2.2234994948525345e-06, "loss": 0.0235, "step": 193935 }, { "epoch": 0.8092229890429021, "grad_norm": 1.176567119224788, "learning_rate": 2.2234708323113977e-06, "loss": 0.0289, "step": 193940 }, { "epoch": 0.8092438517578924, "grad_norm": 1.162402838933202, "learning_rate": 2.2234421708786756e-06, "loss": 0.0198, "step": 193945 }, { "epoch": 0.8092647144728826, "grad_norm": 0.32488340600399723, "learning_rate": 2.2234135105542978e-06, "loss": 0.0183, "step": 193950 }, { "epoch": 0.809285577187873, "grad_norm": 0.5171281154513265, "learning_rate": 2.223384851338192e-06, "loss": 0.0225, "step": 193955 }, { "epoch": 0.8093064399028632, "grad_norm": 0.9897070291351865, "learning_rate": 2.2233561932302862e-06, "loss": 0.0204, "step": 193960 }, { "epoch": 0.8093273026178535, "grad_norm": 0.41431763098868, "learning_rate": 2.22332753623051e-06, "loss": 0.0247, "step": 193965 }, { "epoch": 0.8093481653328437, "grad_norm": 0.4479648287460591, "learning_rate": 2.223298880338792e-06, "loss": 0.0185, "step": 193970 }, { "epoch": 0.8093690280478341, "grad_norm": 0.8728599947654309, "learning_rate": 2.2232702255550605e-06, "loss": 0.0216, "step": 193975 }, { "epoch": 0.8093898907628243, "grad_norm": 0.6435545356225711, "learning_rate": 2.2232415718792438e-06, "loss": 0.0193, "step": 193980 }, { "epoch": 0.8094107534778145, "grad_norm": 0.7458577323151434, "learning_rate": 2.2232129193112716e-06, "loss": 0.0235, "step": 193985 }, { "epoch": 0.8094316161928049, "grad_norm": 0.9919863205537233, "learning_rate": 2.2231842678510703e-06, "loss": 0.027, "step": 193990 }, { "epoch": 0.8094524789077951, "grad_norm": 0.39773972828753945, "learning_rate": 2.223155617498571e-06, "loss": 0.0201, "step": 193995 }, { "epoch": 0.8094733416227854, "grad_norm": 0.6341246229105874, "learning_rate": 2.223126968253701e-06, "loss": 0.0226, "step": 194000 }, { "epoch": 0.8094942043377757, "grad_norm": 1.0540034961994693, "learning_rate": 2.2230983201163893e-06, "loss": 0.0255, "step": 194005 }, { "epoch": 0.809515067052766, "grad_norm": 0.7189211118002599, "learning_rate": 2.2230696730865646e-06, "loss": 0.0222, "step": 194010 }, { "epoch": 0.8095359297677562, "grad_norm": 0.9974091995830281, "learning_rate": 2.223041027164155e-06, "loss": 0.0193, "step": 194015 }, { "epoch": 0.8095567924827466, "grad_norm": 0.6855352010746597, "learning_rate": 2.2230123823490898e-06, "loss": 0.0223, "step": 194020 }, { "epoch": 0.8095776551977368, "grad_norm": 0.4608738645985732, "learning_rate": 2.2229837386412965e-06, "loss": 0.0198, "step": 194025 }, { "epoch": 0.8095985179127271, "grad_norm": 1.1351515974483277, "learning_rate": 2.222955096040706e-06, "loss": 0.0247, "step": 194030 }, { "epoch": 0.8096193806277173, "grad_norm": 0.7021561723240778, "learning_rate": 2.2229264545472455e-06, "loss": 0.0154, "step": 194035 }, { "epoch": 0.8096402433427077, "grad_norm": 0.414048381555, "learning_rate": 2.2228978141608433e-06, "loss": 0.0184, "step": 194040 }, { "epoch": 0.8096611060576979, "grad_norm": 0.6661098573134232, "learning_rate": 2.222869174881428e-06, "loss": 0.0193, "step": 194045 }, { "epoch": 0.8096819687726882, "grad_norm": 0.43470948217048105, "learning_rate": 2.22284053670893e-06, "loss": 0.0178, "step": 194050 }, { "epoch": 0.8097028314876785, "grad_norm": 0.6706844945245338, "learning_rate": 2.2228118996432764e-06, "loss": 0.0184, "step": 194055 }, { "epoch": 0.8097236942026688, "grad_norm": 0.9150242602090333, "learning_rate": 2.2227832636843964e-06, "loss": 0.0237, "step": 194060 }, { "epoch": 0.809744556917659, "grad_norm": 0.5395260820455794, "learning_rate": 2.2227546288322186e-06, "loss": 0.016, "step": 194065 }, { "epoch": 0.8097654196326494, "grad_norm": 0.5234493552113443, "learning_rate": 2.222725995086672e-06, "loss": 0.0288, "step": 194070 }, { "epoch": 0.8097862823476396, "grad_norm": 0.6725146354338297, "learning_rate": 2.222697362447685e-06, "loss": 0.0178, "step": 194075 }, { "epoch": 0.8098071450626299, "grad_norm": 0.7376185841932051, "learning_rate": 2.2226687309151867e-06, "loss": 0.0227, "step": 194080 }, { "epoch": 0.8098280077776201, "grad_norm": 0.8047412563199587, "learning_rate": 2.2226401004891055e-06, "loss": 0.0266, "step": 194085 }, { "epoch": 0.8098488704926105, "grad_norm": 0.9757790989198332, "learning_rate": 2.22261147116937e-06, "loss": 0.0278, "step": 194090 }, { "epoch": 0.8098697332076007, "grad_norm": 0.37120687569391203, "learning_rate": 2.2225828429559092e-06, "loss": 0.0285, "step": 194095 }, { "epoch": 0.8098905959225909, "grad_norm": 0.5575176828419015, "learning_rate": 2.2225542158486526e-06, "loss": 0.0189, "step": 194100 }, { "epoch": 0.8099114586375813, "grad_norm": 0.9089116903851447, "learning_rate": 2.2225255898475275e-06, "loss": 0.0225, "step": 194105 }, { "epoch": 0.8099323213525715, "grad_norm": 0.3991184308136551, "learning_rate": 2.2224969649524635e-06, "loss": 0.0146, "step": 194110 }, { "epoch": 0.8099531840675618, "grad_norm": 0.4924598215215231, "learning_rate": 2.2224683411633894e-06, "loss": 0.0215, "step": 194115 }, { "epoch": 0.8099740467825521, "grad_norm": 0.661587579245039, "learning_rate": 2.2224397184802338e-06, "loss": 0.0203, "step": 194120 }, { "epoch": 0.8099949094975424, "grad_norm": 0.43141931568560155, "learning_rate": 2.222411096902925e-06, "loss": 0.019, "step": 194125 }, { "epoch": 0.8100157722125326, "grad_norm": 0.702351445649548, "learning_rate": 2.222382476431393e-06, "loss": 0.0248, "step": 194130 }, { "epoch": 0.810036634927523, "grad_norm": 0.5073881569334049, "learning_rate": 2.222353857065566e-06, "loss": 0.0254, "step": 194135 }, { "epoch": 0.8100574976425132, "grad_norm": 0.6969085249142968, "learning_rate": 2.2223252388053722e-06, "loss": 0.0228, "step": 194140 }, { "epoch": 0.8100783603575035, "grad_norm": 0.37086151481932966, "learning_rate": 2.222296621650741e-06, "loss": 0.0239, "step": 194145 }, { "epoch": 0.8100992230724937, "grad_norm": 0.6023348715495432, "learning_rate": 2.222268005601602e-06, "loss": 0.0176, "step": 194150 }, { "epoch": 0.8101200857874841, "grad_norm": 0.2965322520859272, "learning_rate": 2.222239390657882e-06, "loss": 0.0176, "step": 194155 }, { "epoch": 0.8101409485024743, "grad_norm": 0.7872996401447896, "learning_rate": 2.2222107768195114e-06, "loss": 0.0299, "step": 194160 }, { "epoch": 0.8101618112174646, "grad_norm": 0.6102437533381919, "learning_rate": 2.2221821640864187e-06, "loss": 0.0168, "step": 194165 }, { "epoch": 0.8101826739324549, "grad_norm": 0.6218863741220545, "learning_rate": 2.222153552458533e-06, "loss": 0.0279, "step": 194170 }, { "epoch": 0.8102035366474452, "grad_norm": 0.4383974274735754, "learning_rate": 2.2221249419357825e-06, "loss": 0.0161, "step": 194175 }, { "epoch": 0.8102243993624354, "grad_norm": 0.9467878923976014, "learning_rate": 2.2220963325180963e-06, "loss": 0.021, "step": 194180 }, { "epoch": 0.8102452620774258, "grad_norm": 1.0370561870523032, "learning_rate": 2.2220677242054043e-06, "loss": 0.0226, "step": 194185 }, { "epoch": 0.810266124792416, "grad_norm": 0.7758384928969272, "learning_rate": 2.2220391169976334e-06, "loss": 0.0223, "step": 194190 }, { "epoch": 0.8102869875074062, "grad_norm": 0.7212720612645277, "learning_rate": 2.2220105108947136e-06, "loss": 0.0198, "step": 194195 }, { "epoch": 0.8103078502223965, "grad_norm": 0.6658508076936296, "learning_rate": 2.2219819058965746e-06, "loss": 0.0259, "step": 194200 }, { "epoch": 0.8103287129373868, "grad_norm": 0.5635212502409486, "learning_rate": 2.2219533020031436e-06, "loss": 0.0221, "step": 194205 }, { "epoch": 0.8103495756523771, "grad_norm": 0.5126122195146661, "learning_rate": 2.2219246992143508e-06, "loss": 0.0164, "step": 194210 }, { "epoch": 0.8103704383673673, "grad_norm": 0.3486721828794304, "learning_rate": 2.2218960975301236e-06, "loss": 0.0177, "step": 194215 }, { "epoch": 0.8103913010823577, "grad_norm": 0.4638433688412109, "learning_rate": 2.221867496950393e-06, "loss": 0.0169, "step": 194220 }, { "epoch": 0.8104121637973479, "grad_norm": 0.44029725092613164, "learning_rate": 2.2218388974750864e-06, "loss": 0.0164, "step": 194225 }, { "epoch": 0.8104330265123382, "grad_norm": 0.5066403640912575, "learning_rate": 2.221810299104133e-06, "loss": 0.0195, "step": 194230 }, { "epoch": 0.8104538892273285, "grad_norm": 0.5910109334132124, "learning_rate": 2.221781701837462e-06, "loss": 0.0152, "step": 194235 }, { "epoch": 0.8104747519423188, "grad_norm": 0.7511392429920635, "learning_rate": 2.2217531056750023e-06, "loss": 0.0195, "step": 194240 }, { "epoch": 0.810495614657309, "grad_norm": 0.33590917583656504, "learning_rate": 2.2217245106166828e-06, "loss": 0.0201, "step": 194245 }, { "epoch": 0.8105164773722994, "grad_norm": 0.8439535069551046, "learning_rate": 2.221695916662432e-06, "loss": 0.0375, "step": 194250 }, { "epoch": 0.8105373400872896, "grad_norm": 0.6575218692062276, "learning_rate": 2.2216673238121797e-06, "loss": 0.0209, "step": 194255 }, { "epoch": 0.8105582028022799, "grad_norm": 0.453217004263832, "learning_rate": 2.221638732065854e-06, "loss": 0.0237, "step": 194260 }, { "epoch": 0.8105790655172701, "grad_norm": 0.5812817634720546, "learning_rate": 2.2216101414233846e-06, "loss": 0.0215, "step": 194265 }, { "epoch": 0.8105999282322605, "grad_norm": 0.5486053906842379, "learning_rate": 2.2215815518847e-06, "loss": 0.0206, "step": 194270 }, { "epoch": 0.8106207909472507, "grad_norm": 0.43584966039465306, "learning_rate": 2.2215529634497294e-06, "loss": 0.0194, "step": 194275 }, { "epoch": 0.810641653662241, "grad_norm": 0.6251593510214825, "learning_rate": 2.221524376118402e-06, "loss": 0.0237, "step": 194280 }, { "epoch": 0.8106625163772313, "grad_norm": 0.7397929876750559, "learning_rate": 2.2214957898906455e-06, "loss": 0.0244, "step": 194285 }, { "epoch": 0.8106833790922215, "grad_norm": 0.24611434019642578, "learning_rate": 2.221467204766391e-06, "loss": 0.0163, "step": 194290 }, { "epoch": 0.8107042418072118, "grad_norm": 0.784051547580706, "learning_rate": 2.2214386207455655e-06, "loss": 0.0257, "step": 194295 }, { "epoch": 0.8107251045222021, "grad_norm": 1.082758776811748, "learning_rate": 2.2214100378280996e-06, "loss": 0.0221, "step": 194300 }, { "epoch": 0.8107459672371924, "grad_norm": 0.6934670109332456, "learning_rate": 2.2213814560139215e-06, "loss": 0.0156, "step": 194305 }, { "epoch": 0.8107668299521826, "grad_norm": 0.5354436617425488, "learning_rate": 2.2213528753029607e-06, "loss": 0.0201, "step": 194310 }, { "epoch": 0.810787692667173, "grad_norm": 1.2321093219820345, "learning_rate": 2.2213242956951455e-06, "loss": 0.0233, "step": 194315 }, { "epoch": 0.8108085553821632, "grad_norm": 0.8315071263331956, "learning_rate": 2.2212957171904052e-06, "loss": 0.0235, "step": 194320 }, { "epoch": 0.8108294180971535, "grad_norm": 0.9113330854936639, "learning_rate": 2.221267139788669e-06, "loss": 0.0253, "step": 194325 }, { "epoch": 0.8108502808121437, "grad_norm": 1.056130976947212, "learning_rate": 2.221238563489866e-06, "loss": 0.0239, "step": 194330 }, { "epoch": 0.8108711435271341, "grad_norm": 0.9405592147938069, "learning_rate": 2.2212099882939253e-06, "loss": 0.0174, "step": 194335 }, { "epoch": 0.8108920062421243, "grad_norm": 0.9923424751918773, "learning_rate": 2.2211814142007757e-06, "loss": 0.026, "step": 194340 }, { "epoch": 0.8109128689571146, "grad_norm": 0.3814712437639549, "learning_rate": 2.221152841210347e-06, "loss": 0.0175, "step": 194345 }, { "epoch": 0.8109337316721049, "grad_norm": 0.46868543157908166, "learning_rate": 2.2211242693225667e-06, "loss": 0.0164, "step": 194350 }, { "epoch": 0.8109545943870952, "grad_norm": 0.5381099634569698, "learning_rate": 2.221095698537366e-06, "loss": 0.0146, "step": 194355 }, { "epoch": 0.8109754571020854, "grad_norm": 0.8535824739177945, "learning_rate": 2.221067128854672e-06, "loss": 0.0253, "step": 194360 }, { "epoch": 0.8109963198170758, "grad_norm": 0.5930583915247809, "learning_rate": 2.2210385602744146e-06, "loss": 0.0249, "step": 194365 }, { "epoch": 0.811017182532066, "grad_norm": 0.37958969152223054, "learning_rate": 2.2210099927965233e-06, "loss": 0.0215, "step": 194370 }, { "epoch": 0.8110380452470562, "grad_norm": 0.7576762255372791, "learning_rate": 2.2209814264209273e-06, "loss": 0.0209, "step": 194375 }, { "epoch": 0.8110589079620465, "grad_norm": 1.0130401853575794, "learning_rate": 2.2209528611475544e-06, "loss": 0.0181, "step": 194380 }, { "epoch": 0.8110797706770368, "grad_norm": 0.3661995794147456, "learning_rate": 2.220924296976335e-06, "loss": 0.0161, "step": 194385 }, { "epoch": 0.8111006333920271, "grad_norm": 0.8831146691191567, "learning_rate": 2.220895733907198e-06, "loss": 0.0228, "step": 194390 }, { "epoch": 0.8111214961070173, "grad_norm": 0.4777521990782446, "learning_rate": 2.220867171940072e-06, "loss": 0.0159, "step": 194395 }, { "epoch": 0.8111423588220077, "grad_norm": 0.780923125880516, "learning_rate": 2.2208386110748866e-06, "loss": 0.0185, "step": 194400 }, { "epoch": 0.8111632215369979, "grad_norm": 0.9551758945167702, "learning_rate": 2.220810051311571e-06, "loss": 0.0274, "step": 194405 }, { "epoch": 0.8111840842519882, "grad_norm": 0.6698074175073877, "learning_rate": 2.2207814926500543e-06, "loss": 0.0202, "step": 194410 }, { "epoch": 0.8112049469669785, "grad_norm": 0.5465583406274681, "learning_rate": 2.220752935090265e-06, "loss": 0.0221, "step": 194415 }, { "epoch": 0.8112258096819688, "grad_norm": 0.7787228332074304, "learning_rate": 2.220724378632133e-06, "loss": 0.0245, "step": 194420 }, { "epoch": 0.811246672396959, "grad_norm": 1.3076646423695928, "learning_rate": 2.220695823275588e-06, "loss": 0.0281, "step": 194425 }, { "epoch": 0.8112675351119494, "grad_norm": 0.4385801387767651, "learning_rate": 2.220667269020558e-06, "loss": 0.0154, "step": 194430 }, { "epoch": 0.8112883978269396, "grad_norm": 0.6261029244359553, "learning_rate": 2.2206387158669724e-06, "loss": 0.0221, "step": 194435 }, { "epoch": 0.8113092605419299, "grad_norm": 0.7030537734733533, "learning_rate": 2.2206101638147607e-06, "loss": 0.0202, "step": 194440 }, { "epoch": 0.8113301232569201, "grad_norm": 1.138100718915561, "learning_rate": 2.2205816128638522e-06, "loss": 0.0243, "step": 194445 }, { "epoch": 0.8113509859719105, "grad_norm": 1.0038125645780962, "learning_rate": 2.2205530630141764e-06, "loss": 0.0263, "step": 194450 }, { "epoch": 0.8113718486869007, "grad_norm": 0.7964031229032691, "learning_rate": 2.2205245142656617e-06, "loss": 0.0221, "step": 194455 }, { "epoch": 0.811392711401891, "grad_norm": 0.6932224242528252, "learning_rate": 2.2204959666182372e-06, "loss": 0.0148, "step": 194460 }, { "epoch": 0.8114135741168813, "grad_norm": 0.41761063089820355, "learning_rate": 2.2204674200718328e-06, "loss": 0.0189, "step": 194465 }, { "epoch": 0.8114344368318716, "grad_norm": 2.961991838067915, "learning_rate": 2.2204388746263776e-06, "loss": 0.0254, "step": 194470 }, { "epoch": 0.8114552995468618, "grad_norm": 0.8935774964278215, "learning_rate": 2.220410330281801e-06, "loss": 0.0265, "step": 194475 }, { "epoch": 0.8114761622618522, "grad_norm": 0.39527958459821516, "learning_rate": 2.220381787038032e-06, "loss": 0.0195, "step": 194480 }, { "epoch": 0.8114970249768424, "grad_norm": 0.8624233473127796, "learning_rate": 2.220353244894999e-06, "loss": 0.0198, "step": 194485 }, { "epoch": 0.8115178876918326, "grad_norm": 0.4367547230830009, "learning_rate": 2.2203247038526328e-06, "loss": 0.0243, "step": 194490 }, { "epoch": 0.811538750406823, "grad_norm": 0.4760434355603093, "learning_rate": 2.2202961639108627e-06, "loss": 0.0259, "step": 194495 }, { "epoch": 0.8115596131218132, "grad_norm": 0.3606333586255261, "learning_rate": 2.220267625069616e-06, "loss": 0.0146, "step": 194500 }, { "epoch": 0.8115804758368035, "grad_norm": 0.8438046150111449, "learning_rate": 2.2202390873288236e-06, "loss": 0.0188, "step": 194505 }, { "epoch": 0.8116013385517937, "grad_norm": 0.7420635188770555, "learning_rate": 2.2202105506884145e-06, "loss": 0.0239, "step": 194510 }, { "epoch": 0.8116222012667841, "grad_norm": 0.33910902852122465, "learning_rate": 2.2201820151483176e-06, "loss": 0.0191, "step": 194515 }, { "epoch": 0.8116430639817743, "grad_norm": 0.6908764702785123, "learning_rate": 2.2201534807084628e-06, "loss": 0.0239, "step": 194520 }, { "epoch": 0.8116639266967646, "grad_norm": 0.5352176860932739, "learning_rate": 2.2201249473687788e-06, "loss": 0.0231, "step": 194525 }, { "epoch": 0.8116847894117549, "grad_norm": 1.634147956481635, "learning_rate": 2.2200964151291953e-06, "loss": 0.0291, "step": 194530 }, { "epoch": 0.8117056521267452, "grad_norm": 0.8323218268621793, "learning_rate": 2.2200678839896417e-06, "loss": 0.0269, "step": 194535 }, { "epoch": 0.8117265148417354, "grad_norm": 0.5298068114937458, "learning_rate": 2.2200393539500464e-06, "loss": 0.0139, "step": 194540 }, { "epoch": 0.8117473775567258, "grad_norm": 0.24157605287042286, "learning_rate": 2.22001082501034e-06, "loss": 0.0205, "step": 194545 }, { "epoch": 0.811768240271716, "grad_norm": 0.6415126150776169, "learning_rate": 2.2199822971704514e-06, "loss": 0.0187, "step": 194550 }, { "epoch": 0.8117891029867063, "grad_norm": 0.9335707557809841, "learning_rate": 2.219953770430309e-06, "loss": 0.0265, "step": 194555 }, { "epoch": 0.8118099657016965, "grad_norm": 0.5696729388907302, "learning_rate": 2.219925244789843e-06, "loss": 0.0239, "step": 194560 }, { "epoch": 0.8118308284166869, "grad_norm": 0.4408776738276888, "learning_rate": 2.2198967202489833e-06, "loss": 0.0186, "step": 194565 }, { "epoch": 0.8118516911316771, "grad_norm": 0.539123840930059, "learning_rate": 2.219868196807658e-06, "loss": 0.02, "step": 194570 }, { "epoch": 0.8118725538466673, "grad_norm": 0.6139066555334208, "learning_rate": 2.219839674465798e-06, "loss": 0.02, "step": 194575 }, { "epoch": 0.8118934165616577, "grad_norm": 1.0045319327501556, "learning_rate": 2.2198111532233313e-06, "loss": 0.0215, "step": 194580 }, { "epoch": 0.8119142792766479, "grad_norm": 1.979247648786341, "learning_rate": 2.2197826330801872e-06, "loss": 0.0207, "step": 194585 }, { "epoch": 0.8119351419916382, "grad_norm": 0.6657033498065094, "learning_rate": 2.2197541140362963e-06, "loss": 0.0183, "step": 194590 }, { "epoch": 0.8119560047066285, "grad_norm": 0.926804709170181, "learning_rate": 2.219725596091587e-06, "loss": 0.028, "step": 194595 }, { "epoch": 0.8119768674216188, "grad_norm": 0.5894170388157635, "learning_rate": 2.219697079245989e-06, "loss": 0.0197, "step": 194600 }, { "epoch": 0.811997730136609, "grad_norm": 0.9220126526324822, "learning_rate": 2.219668563499432e-06, "loss": 0.0259, "step": 194605 }, { "epoch": 0.8120185928515994, "grad_norm": 0.47594177284881206, "learning_rate": 2.219640048851845e-06, "loss": 0.0123, "step": 194610 }, { "epoch": 0.8120394555665896, "grad_norm": 0.7182584733196097, "learning_rate": 2.219611535303157e-06, "loss": 0.0222, "step": 194615 }, { "epoch": 0.8120603182815799, "grad_norm": 1.1112678526550857, "learning_rate": 2.2195830228532984e-06, "loss": 0.0206, "step": 194620 }, { "epoch": 0.8120811809965701, "grad_norm": 0.33430441995575655, "learning_rate": 2.219554511502198e-06, "loss": 0.0209, "step": 194625 }, { "epoch": 0.8121020437115605, "grad_norm": 0.45882829964652905, "learning_rate": 2.2195260012497856e-06, "loss": 0.0222, "step": 194630 }, { "epoch": 0.8121229064265507, "grad_norm": 0.6944477578138036, "learning_rate": 2.21949749209599e-06, "loss": 0.0329, "step": 194635 }, { "epoch": 0.812143769141541, "grad_norm": 0.638556302323192, "learning_rate": 2.2194689840407413e-06, "loss": 0.0203, "step": 194640 }, { "epoch": 0.8121646318565313, "grad_norm": 0.5932851768134532, "learning_rate": 2.219440477083969e-06, "loss": 0.021, "step": 194645 }, { "epoch": 0.8121854945715216, "grad_norm": 1.0561296769046888, "learning_rate": 2.219411971225602e-06, "loss": 0.0407, "step": 194650 }, { "epoch": 0.8122063572865118, "grad_norm": 0.6308491484857554, "learning_rate": 2.2193834664655702e-06, "loss": 0.0181, "step": 194655 }, { "epoch": 0.8122272200015022, "grad_norm": 1.0893368916316954, "learning_rate": 2.2193549628038027e-06, "loss": 0.0244, "step": 194660 }, { "epoch": 0.8122480827164924, "grad_norm": 0.5658216252150432, "learning_rate": 2.2193264602402295e-06, "loss": 0.0225, "step": 194665 }, { "epoch": 0.8122689454314826, "grad_norm": 0.5083756179394826, "learning_rate": 2.21929795877478e-06, "loss": 0.0185, "step": 194670 }, { "epoch": 0.812289808146473, "grad_norm": 0.312285842358947, "learning_rate": 2.2192694584073827e-06, "loss": 0.0178, "step": 194675 }, { "epoch": 0.8123106708614632, "grad_norm": 0.34323651245003584, "learning_rate": 2.219240959137968e-06, "loss": 0.021, "step": 194680 }, { "epoch": 0.8123315335764535, "grad_norm": 0.554175941795975, "learning_rate": 2.2192124609664656e-06, "loss": 0.0207, "step": 194685 }, { "epoch": 0.8123523962914437, "grad_norm": 0.9089421435835314, "learning_rate": 2.2191839638928047e-06, "loss": 0.0237, "step": 194690 }, { "epoch": 0.8123732590064341, "grad_norm": 0.7910877407374419, "learning_rate": 2.2191554679169146e-06, "loss": 0.0181, "step": 194695 }, { "epoch": 0.8123941217214243, "grad_norm": 0.9270460149892388, "learning_rate": 2.2191269730387255e-06, "loss": 0.0242, "step": 194700 }, { "epoch": 0.8124149844364146, "grad_norm": 0.9779668426543412, "learning_rate": 2.219098479258166e-06, "loss": 0.0224, "step": 194705 }, { "epoch": 0.8124358471514049, "grad_norm": 0.518259504583538, "learning_rate": 2.2190699865751657e-06, "loss": 0.0173, "step": 194710 }, { "epoch": 0.8124567098663952, "grad_norm": 0.8920481979565489, "learning_rate": 2.2190414949896553e-06, "loss": 0.0172, "step": 194715 }, { "epoch": 0.8124775725813854, "grad_norm": 0.375035909213192, "learning_rate": 2.219013004501563e-06, "loss": 0.0222, "step": 194720 }, { "epoch": 0.8124984352963758, "grad_norm": 0.3856030833703638, "learning_rate": 2.218984515110819e-06, "loss": 0.0184, "step": 194725 }, { "epoch": 0.812519298011366, "grad_norm": 0.9716009523061843, "learning_rate": 2.2189560268173527e-06, "loss": 0.0236, "step": 194730 }, { "epoch": 0.8125401607263563, "grad_norm": 0.5241790740956234, "learning_rate": 2.218927539621094e-06, "loss": 0.0187, "step": 194735 }, { "epoch": 0.8125610234413465, "grad_norm": 0.7268999195331894, "learning_rate": 2.218899053521972e-06, "loss": 0.0208, "step": 194740 }, { "epoch": 0.8125818861563369, "grad_norm": 0.7151543200183609, "learning_rate": 2.218870568519917e-06, "loss": 0.0185, "step": 194745 }, { "epoch": 0.8126027488713271, "grad_norm": 1.1484389612957644, "learning_rate": 2.2188420846148574e-06, "loss": 0.027, "step": 194750 }, { "epoch": 0.8126236115863174, "grad_norm": 0.6293327017743479, "learning_rate": 2.218813601806724e-06, "loss": 0.0206, "step": 194755 }, { "epoch": 0.8126444743013077, "grad_norm": 0.9880135682088401, "learning_rate": 2.2187851200954454e-06, "loss": 0.0229, "step": 194760 }, { "epoch": 0.812665337016298, "grad_norm": 0.7778092562378306, "learning_rate": 2.2187566394809514e-06, "loss": 0.0215, "step": 194765 }, { "epoch": 0.8126861997312882, "grad_norm": 0.8324642041356072, "learning_rate": 2.218728159963172e-06, "loss": 0.0176, "step": 194770 }, { "epoch": 0.8127070624462785, "grad_norm": 0.8546431895421382, "learning_rate": 2.2186996815420376e-06, "loss": 0.0205, "step": 194775 }, { "epoch": 0.8127279251612688, "grad_norm": 0.6695287511427466, "learning_rate": 2.218671204217476e-06, "loss": 0.0194, "step": 194780 }, { "epoch": 0.812748787876259, "grad_norm": 0.41683876634925277, "learning_rate": 2.218642727989418e-06, "loss": 0.025, "step": 194785 }, { "epoch": 0.8127696505912494, "grad_norm": 0.8671409796116836, "learning_rate": 2.2186142528577927e-06, "loss": 0.0257, "step": 194790 }, { "epoch": 0.8127905133062396, "grad_norm": 1.007211264964744, "learning_rate": 2.21858577882253e-06, "loss": 0.0186, "step": 194795 }, { "epoch": 0.8128113760212299, "grad_norm": 0.3910828829929653, "learning_rate": 2.21855730588356e-06, "loss": 0.0186, "step": 194800 }, { "epoch": 0.8128322387362201, "grad_norm": 0.9183598262742451, "learning_rate": 2.218528834040812e-06, "loss": 0.0164, "step": 194805 }, { "epoch": 0.8128531014512105, "grad_norm": 0.7416510291162114, "learning_rate": 2.218500363294215e-06, "loss": 0.0216, "step": 194810 }, { "epoch": 0.8128739641662007, "grad_norm": 0.7173703930760109, "learning_rate": 2.2184718936436993e-06, "loss": 0.0231, "step": 194815 }, { "epoch": 0.812894826881191, "grad_norm": 0.4856270730598553, "learning_rate": 2.218443425089195e-06, "loss": 0.0165, "step": 194820 }, { "epoch": 0.8129156895961813, "grad_norm": 0.320380262049354, "learning_rate": 2.218414957630631e-06, "loss": 0.0176, "step": 194825 }, { "epoch": 0.8129365523111716, "grad_norm": 0.6205778905681069, "learning_rate": 2.2183864912679373e-06, "loss": 0.0315, "step": 194830 }, { "epoch": 0.8129574150261618, "grad_norm": 0.5284682878562184, "learning_rate": 2.2183580260010438e-06, "loss": 0.0202, "step": 194835 }, { "epoch": 0.8129782777411522, "grad_norm": 0.6199071145390876, "learning_rate": 2.2183295618298797e-06, "loss": 0.0151, "step": 194840 }, { "epoch": 0.8129991404561424, "grad_norm": 0.8273119726982932, "learning_rate": 2.2183010987543747e-06, "loss": 0.0221, "step": 194845 }, { "epoch": 0.8130200031711327, "grad_norm": 0.9466396352653933, "learning_rate": 2.2182726367744586e-06, "loss": 0.0264, "step": 194850 }, { "epoch": 0.813040865886123, "grad_norm": 0.5134830720080937, "learning_rate": 2.2182441758900623e-06, "loss": 0.0198, "step": 194855 }, { "epoch": 0.8130617286011133, "grad_norm": 0.8386215377217505, "learning_rate": 2.2182157161011138e-06, "loss": 0.0185, "step": 194860 }, { "epoch": 0.8130825913161035, "grad_norm": 0.5275823080698299, "learning_rate": 2.218187257407543e-06, "loss": 0.0219, "step": 194865 }, { "epoch": 0.8131034540310937, "grad_norm": 0.5794558501069749, "learning_rate": 2.2181587998092815e-06, "loss": 0.0216, "step": 194870 }, { "epoch": 0.8131243167460841, "grad_norm": 1.1159777670797646, "learning_rate": 2.218130343306257e-06, "loss": 0.0187, "step": 194875 }, { "epoch": 0.8131451794610743, "grad_norm": 0.3533615495948192, "learning_rate": 2.2181018878984e-06, "loss": 0.0181, "step": 194880 }, { "epoch": 0.8131660421760646, "grad_norm": 0.5394860232957028, "learning_rate": 2.21807343358564e-06, "loss": 0.0213, "step": 194885 }, { "epoch": 0.8131869048910549, "grad_norm": 0.7067902449376205, "learning_rate": 2.218044980367908e-06, "loss": 0.0191, "step": 194890 }, { "epoch": 0.8132077676060452, "grad_norm": 0.1951132549355488, "learning_rate": 2.2180165282451318e-06, "loss": 0.0214, "step": 194895 }, { "epoch": 0.8132286303210354, "grad_norm": 1.1690091835945882, "learning_rate": 2.2179880772172424e-06, "loss": 0.0228, "step": 194900 }, { "epoch": 0.8132494930360258, "grad_norm": 0.7466884700885331, "learning_rate": 2.217959627284169e-06, "loss": 0.0196, "step": 194905 }, { "epoch": 0.813270355751016, "grad_norm": 0.45127641221120923, "learning_rate": 2.217931178445842e-06, "loss": 0.0141, "step": 194910 }, { "epoch": 0.8132912184660063, "grad_norm": 0.4188168357197671, "learning_rate": 2.2179027307021907e-06, "loss": 0.0185, "step": 194915 }, { "epoch": 0.8133120811809965, "grad_norm": 0.7636720246701343, "learning_rate": 2.2178742840531453e-06, "loss": 0.0232, "step": 194920 }, { "epoch": 0.8133329438959869, "grad_norm": 0.23423188200217876, "learning_rate": 2.217845838498636e-06, "loss": 0.0179, "step": 194925 }, { "epoch": 0.8133538066109771, "grad_norm": 0.6430590132130268, "learning_rate": 2.2178173940385904e-06, "loss": 0.0216, "step": 194930 }, { "epoch": 0.8133746693259674, "grad_norm": 0.298017735219136, "learning_rate": 2.217788950672941e-06, "loss": 0.0186, "step": 194935 }, { "epoch": 0.8133955320409577, "grad_norm": 0.594133537247574, "learning_rate": 2.2177605084016165e-06, "loss": 0.0182, "step": 194940 }, { "epoch": 0.813416394755948, "grad_norm": 0.5191688348765611, "learning_rate": 2.2177320672245467e-06, "loss": 0.0261, "step": 194945 }, { "epoch": 0.8134372574709382, "grad_norm": 0.9649053081258661, "learning_rate": 2.2177036271416613e-06, "loss": 0.0159, "step": 194950 }, { "epoch": 0.8134581201859286, "grad_norm": 0.497332174995929, "learning_rate": 2.2176751881528905e-06, "loss": 0.0207, "step": 194955 }, { "epoch": 0.8134789829009188, "grad_norm": 0.8410453281418753, "learning_rate": 2.2176467502581643e-06, "loss": 0.018, "step": 194960 }, { "epoch": 0.813499845615909, "grad_norm": 0.5308670837089247, "learning_rate": 2.2176183134574118e-06, "loss": 0.0165, "step": 194965 }, { "epoch": 0.8135207083308994, "grad_norm": 0.7905514600392115, "learning_rate": 2.2175898777505637e-06, "loss": 0.0201, "step": 194970 }, { "epoch": 0.8135415710458896, "grad_norm": 1.060375574831565, "learning_rate": 2.2175614431375494e-06, "loss": 0.0234, "step": 194975 }, { "epoch": 0.8135624337608799, "grad_norm": 0.9277883837313102, "learning_rate": 2.2175330096182986e-06, "loss": 0.0262, "step": 194980 }, { "epoch": 0.8135832964758701, "grad_norm": 0.4226355120070233, "learning_rate": 2.217504577192741e-06, "loss": 0.0192, "step": 194985 }, { "epoch": 0.8136041591908605, "grad_norm": 0.5497855234734994, "learning_rate": 2.2174761458608084e-06, "loss": 0.022, "step": 194990 }, { "epoch": 0.8136250219058507, "grad_norm": 0.6323887661579114, "learning_rate": 2.217447715622428e-06, "loss": 0.0189, "step": 194995 }, { "epoch": 0.813645884620841, "grad_norm": 0.37045513726651846, "learning_rate": 2.2174192864775314e-06, "loss": 0.016, "step": 195000 }, { "epoch": 0.8136667473358313, "grad_norm": 0.5335267362327852, "learning_rate": 2.2173908584260476e-06, "loss": 0.0272, "step": 195005 }, { "epoch": 0.8136876100508216, "grad_norm": 0.2043285469735497, "learning_rate": 2.2173624314679077e-06, "loss": 0.0177, "step": 195010 }, { "epoch": 0.8137084727658118, "grad_norm": 0.5466471766642312, "learning_rate": 2.2173340056030404e-06, "loss": 0.0243, "step": 195015 }, { "epoch": 0.8137293354808022, "grad_norm": 0.3757691231329869, "learning_rate": 2.217305580831376e-06, "loss": 0.0141, "step": 195020 }, { "epoch": 0.8137501981957924, "grad_norm": 0.5067115290267333, "learning_rate": 2.2172771571528445e-06, "loss": 0.0199, "step": 195025 }, { "epoch": 0.8137710609107827, "grad_norm": 1.2351658024845729, "learning_rate": 2.217248734567376e-06, "loss": 0.0182, "step": 195030 }, { "epoch": 0.813791923625773, "grad_norm": 1.0047032963144282, "learning_rate": 2.2172203130749e-06, "loss": 0.0217, "step": 195035 }, { "epoch": 0.8138127863407633, "grad_norm": 0.6377145625200769, "learning_rate": 2.2171918926753474e-06, "loss": 0.0195, "step": 195040 }, { "epoch": 0.8138336490557535, "grad_norm": 1.4268547913525431, "learning_rate": 2.217163473368647e-06, "loss": 0.019, "step": 195045 }, { "epoch": 0.8138545117707437, "grad_norm": 0.6198060540664747, "learning_rate": 2.2171350551547298e-06, "loss": 0.0162, "step": 195050 }, { "epoch": 0.8138753744857341, "grad_norm": 0.34480951566265416, "learning_rate": 2.2171066380335244e-06, "loss": 0.0177, "step": 195055 }, { "epoch": 0.8138962372007243, "grad_norm": 0.8159906218192401, "learning_rate": 2.217078222004962e-06, "loss": 0.0212, "step": 195060 }, { "epoch": 0.8139170999157146, "grad_norm": 0.8166429066883654, "learning_rate": 2.2170498070689723e-06, "loss": 0.0137, "step": 195065 }, { "epoch": 0.813937962630705, "grad_norm": 0.9745587589726403, "learning_rate": 2.2170213932254854e-06, "loss": 0.0254, "step": 195070 }, { "epoch": 0.8139588253456952, "grad_norm": 0.6319788701154486, "learning_rate": 2.2169929804744304e-06, "loss": 0.0187, "step": 195075 }, { "epoch": 0.8139796880606854, "grad_norm": 1.1436201289524135, "learning_rate": 2.2169645688157384e-06, "loss": 0.0168, "step": 195080 }, { "epoch": 0.8140005507756758, "grad_norm": 0.659173656458529, "learning_rate": 2.216936158249339e-06, "loss": 0.0188, "step": 195085 }, { "epoch": 0.814021413490666, "grad_norm": 0.8941837574996412, "learning_rate": 2.216907748775162e-06, "loss": 0.0168, "step": 195090 }, { "epoch": 0.8140422762056563, "grad_norm": 1.134134660227858, "learning_rate": 2.216879340393138e-06, "loss": 0.0281, "step": 195095 }, { "epoch": 0.8140631389206465, "grad_norm": 0.413880190194599, "learning_rate": 2.2168509331031962e-06, "loss": 0.0161, "step": 195100 }, { "epoch": 0.8140840016356369, "grad_norm": 0.7051128480387207, "learning_rate": 2.216822526905267e-06, "loss": 0.0286, "step": 195105 }, { "epoch": 0.8141048643506271, "grad_norm": 0.5699634096142855, "learning_rate": 2.216794121799281e-06, "loss": 0.0181, "step": 195110 }, { "epoch": 0.8141257270656174, "grad_norm": 0.5701255366645553, "learning_rate": 2.2167657177851675e-06, "loss": 0.0272, "step": 195115 }, { "epoch": 0.8141465897806077, "grad_norm": 0.3578287369091876, "learning_rate": 2.2167373148628567e-06, "loss": 0.0237, "step": 195120 }, { "epoch": 0.814167452495598, "grad_norm": 0.4358398402161323, "learning_rate": 2.2167089130322787e-06, "loss": 0.0266, "step": 195125 }, { "epoch": 0.8141883152105882, "grad_norm": 0.6632670058313591, "learning_rate": 2.216680512293364e-06, "loss": 0.026, "step": 195130 }, { "epoch": 0.8142091779255786, "grad_norm": 0.6185533454761799, "learning_rate": 2.216652112646042e-06, "loss": 0.0169, "step": 195135 }, { "epoch": 0.8142300406405688, "grad_norm": 0.5426800733610405, "learning_rate": 2.216623714090243e-06, "loss": 0.0132, "step": 195140 }, { "epoch": 0.814250903355559, "grad_norm": 0.8988712026681988, "learning_rate": 2.2165953166258972e-06, "loss": 0.0337, "step": 195145 }, { "epoch": 0.8142717660705494, "grad_norm": 0.6603858026517271, "learning_rate": 2.216566920252935e-06, "loss": 0.0133, "step": 195150 }, { "epoch": 0.8142926287855397, "grad_norm": 0.7133330298484707, "learning_rate": 2.2165385249712858e-06, "loss": 0.0242, "step": 195155 }, { "epoch": 0.8143134915005299, "grad_norm": 0.6129007093470633, "learning_rate": 2.2165101307808797e-06, "loss": 0.018, "step": 195160 }, { "epoch": 0.8143343542155201, "grad_norm": 0.7991223239558715, "learning_rate": 2.2164817376816477e-06, "loss": 0.0213, "step": 195165 }, { "epoch": 0.8143552169305105, "grad_norm": 0.8121533692468863, "learning_rate": 2.2164533456735194e-06, "loss": 0.0258, "step": 195170 }, { "epoch": 0.8143760796455007, "grad_norm": 0.7023184481454425, "learning_rate": 2.2164249547564243e-06, "loss": 0.0184, "step": 195175 }, { "epoch": 0.814396942360491, "grad_norm": 0.6092979222141255, "learning_rate": 2.2163965649302936e-06, "loss": 0.026, "step": 195180 }, { "epoch": 0.8144178050754813, "grad_norm": 0.4412189027296978, "learning_rate": 2.2163681761950563e-06, "loss": 0.0199, "step": 195185 }, { "epoch": 0.8144386677904716, "grad_norm": 0.48031415842260866, "learning_rate": 2.2163397885506436e-06, "loss": 0.013, "step": 195190 }, { "epoch": 0.8144595305054618, "grad_norm": 0.3366843325918764, "learning_rate": 2.2163114019969858e-06, "loss": 0.0215, "step": 195195 }, { "epoch": 0.8144803932204522, "grad_norm": 0.462104570275549, "learning_rate": 2.2162830165340116e-06, "loss": 0.0182, "step": 195200 }, { "epoch": 0.8145012559354424, "grad_norm": 0.7784098942914651, "learning_rate": 2.2162546321616525e-06, "loss": 0.0195, "step": 195205 }, { "epoch": 0.8145221186504327, "grad_norm": 0.45951077574656796, "learning_rate": 2.2162262488798377e-06, "loss": 0.0165, "step": 195210 }, { "epoch": 0.814542981365423, "grad_norm": 0.3470266553401728, "learning_rate": 2.2161978666884986e-06, "loss": 0.031, "step": 195215 }, { "epoch": 0.8145638440804133, "grad_norm": 0.7824139377718258, "learning_rate": 2.2161694855875637e-06, "loss": 0.0288, "step": 195220 }, { "epoch": 0.8145847067954035, "grad_norm": 0.5974867113046816, "learning_rate": 2.2161411055769643e-06, "loss": 0.0183, "step": 195225 }, { "epoch": 0.8146055695103938, "grad_norm": 0.5753429886488033, "learning_rate": 2.2161127266566314e-06, "loss": 0.0198, "step": 195230 }, { "epoch": 0.8146264322253841, "grad_norm": 0.4699828728305243, "learning_rate": 2.216084348826493e-06, "loss": 0.0243, "step": 195235 }, { "epoch": 0.8146472949403744, "grad_norm": 0.7758754095115482, "learning_rate": 2.2160559720864806e-06, "loss": 0.0185, "step": 195240 }, { "epoch": 0.8146681576553646, "grad_norm": 0.5450107873591441, "learning_rate": 2.216027596436525e-06, "loss": 0.0148, "step": 195245 }, { "epoch": 0.814689020370355, "grad_norm": 0.45366001558459274, "learning_rate": 2.215999221876555e-06, "loss": 0.0204, "step": 195250 }, { "epoch": 0.8147098830853452, "grad_norm": 0.44041085066978153, "learning_rate": 2.215970848406502e-06, "loss": 0.0177, "step": 195255 }, { "epoch": 0.8147307458003354, "grad_norm": 0.40152919970387846, "learning_rate": 2.2159424760262955e-06, "loss": 0.0155, "step": 195260 }, { "epoch": 0.8147516085153258, "grad_norm": 0.8872343134362597, "learning_rate": 2.2159141047358664e-06, "loss": 0.0259, "step": 195265 }, { "epoch": 0.814772471230316, "grad_norm": 0.5313925006149942, "learning_rate": 2.215885734535144e-06, "loss": 0.0241, "step": 195270 }, { "epoch": 0.8147933339453063, "grad_norm": 0.6481342496197676, "learning_rate": 2.215857365424059e-06, "loss": 0.0226, "step": 195275 }, { "epoch": 0.8148141966602965, "grad_norm": 0.3647865448955792, "learning_rate": 2.2158289974025427e-06, "loss": 0.0207, "step": 195280 }, { "epoch": 0.8148350593752869, "grad_norm": 0.6163349249036171, "learning_rate": 2.2158006304705234e-06, "loss": 0.0261, "step": 195285 }, { "epoch": 0.8148559220902771, "grad_norm": 0.3721824598902098, "learning_rate": 2.215772264627933e-06, "loss": 0.0184, "step": 195290 }, { "epoch": 0.8148767848052674, "grad_norm": 0.5978754944924956, "learning_rate": 2.2157438998747003e-06, "loss": 0.0266, "step": 195295 }, { "epoch": 0.8148976475202577, "grad_norm": 0.4501449754453794, "learning_rate": 2.215715536210757e-06, "loss": 0.0228, "step": 195300 }, { "epoch": 0.814918510235248, "grad_norm": 0.5123632849184545, "learning_rate": 2.2156871736360326e-06, "loss": 0.02, "step": 195305 }, { "epoch": 0.8149393729502382, "grad_norm": 1.0597082608165262, "learning_rate": 2.2156588121504575e-06, "loss": 0.0249, "step": 195310 }, { "epoch": 0.8149602356652286, "grad_norm": 0.5943233224559603, "learning_rate": 2.2156304517539623e-06, "loss": 0.0205, "step": 195315 }, { "epoch": 0.8149810983802188, "grad_norm": 0.8115686181601549, "learning_rate": 2.215602092446477e-06, "loss": 0.0197, "step": 195320 }, { "epoch": 0.8150019610952091, "grad_norm": 1.0735809070176008, "learning_rate": 2.2155737342279317e-06, "loss": 0.0328, "step": 195325 }, { "epoch": 0.8150228238101994, "grad_norm": 0.8960871315748857, "learning_rate": 2.2155453770982566e-06, "loss": 0.0326, "step": 195330 }, { "epoch": 0.8150436865251897, "grad_norm": 0.7667793502391695, "learning_rate": 2.2155170210573835e-06, "loss": 0.0192, "step": 195335 }, { "epoch": 0.8150645492401799, "grad_norm": 0.5822778709222928, "learning_rate": 2.215488666105241e-06, "loss": 0.0158, "step": 195340 }, { "epoch": 0.8150854119551701, "grad_norm": 0.5202664646325066, "learning_rate": 2.21546031224176e-06, "loss": 0.0288, "step": 195345 }, { "epoch": 0.8151062746701605, "grad_norm": 1.458178440188021, "learning_rate": 2.2154319594668706e-06, "loss": 0.0228, "step": 195350 }, { "epoch": 0.8151271373851507, "grad_norm": 0.4469218032909528, "learning_rate": 2.215403607780504e-06, "loss": 0.0237, "step": 195355 }, { "epoch": 0.815148000100141, "grad_norm": 0.5288261636833614, "learning_rate": 2.2153752571825897e-06, "loss": 0.0141, "step": 195360 }, { "epoch": 0.8151688628151313, "grad_norm": 0.6359018882296071, "learning_rate": 2.2153469076730585e-06, "loss": 0.0204, "step": 195365 }, { "epoch": 0.8151897255301216, "grad_norm": 0.4947189907724812, "learning_rate": 2.2153185592518406e-06, "loss": 0.026, "step": 195370 }, { "epoch": 0.8152105882451118, "grad_norm": 0.5751195195913288, "learning_rate": 2.2152902119188664e-06, "loss": 0.0199, "step": 195375 }, { "epoch": 0.8152314509601022, "grad_norm": 0.4049470940534955, "learning_rate": 2.2152618656740657e-06, "loss": 0.0248, "step": 195380 }, { "epoch": 0.8152523136750924, "grad_norm": 1.0867038597319576, "learning_rate": 2.21523352051737e-06, "loss": 0.0273, "step": 195385 }, { "epoch": 0.8152731763900827, "grad_norm": 0.5892819802422842, "learning_rate": 2.215205176448709e-06, "loss": 0.0177, "step": 195390 }, { "epoch": 0.815294039105073, "grad_norm": 0.40788161054717714, "learning_rate": 2.2151768334680134e-06, "loss": 0.0193, "step": 195395 }, { "epoch": 0.8153149018200633, "grad_norm": 0.6630837306630727, "learning_rate": 2.215148491575213e-06, "loss": 0.0239, "step": 195400 }, { "epoch": 0.8153357645350535, "grad_norm": 0.728545319595249, "learning_rate": 2.215120150770239e-06, "loss": 0.0182, "step": 195405 }, { "epoch": 0.8153566272500438, "grad_norm": 0.9676914945665109, "learning_rate": 2.215091811053021e-06, "loss": 0.0224, "step": 195410 }, { "epoch": 0.8153774899650341, "grad_norm": 0.9485396316587775, "learning_rate": 2.21506347242349e-06, "loss": 0.0201, "step": 195415 }, { "epoch": 0.8153983526800244, "grad_norm": 0.293406077805548, "learning_rate": 2.2150351348815764e-06, "loss": 0.0176, "step": 195420 }, { "epoch": 0.8154192153950146, "grad_norm": 0.5437039718794539, "learning_rate": 2.215006798427211e-06, "loss": 0.0147, "step": 195425 }, { "epoch": 0.815440078110005, "grad_norm": 1.0154955808719734, "learning_rate": 2.214978463060323e-06, "loss": 0.0152, "step": 195430 }, { "epoch": 0.8154609408249952, "grad_norm": 0.348363921514835, "learning_rate": 2.214950128780844e-06, "loss": 0.0226, "step": 195435 }, { "epoch": 0.8154818035399855, "grad_norm": 0.2827565259450069, "learning_rate": 2.214921795588704e-06, "loss": 0.0131, "step": 195440 }, { "epoch": 0.8155026662549758, "grad_norm": 0.9511854481272798, "learning_rate": 2.214893463483833e-06, "loss": 0.0232, "step": 195445 }, { "epoch": 0.815523528969966, "grad_norm": 0.6055931323168228, "learning_rate": 2.2148651324661624e-06, "loss": 0.0147, "step": 195450 }, { "epoch": 0.8155443916849563, "grad_norm": 0.7186662703723528, "learning_rate": 2.2148368025356224e-06, "loss": 0.0242, "step": 195455 }, { "epoch": 0.8155652543999465, "grad_norm": 0.5826919507370776, "learning_rate": 2.2148084736921433e-06, "loss": 0.0153, "step": 195460 }, { "epoch": 0.8155861171149369, "grad_norm": 0.7275835301116773, "learning_rate": 2.2147801459356553e-06, "loss": 0.0206, "step": 195465 }, { "epoch": 0.8156069798299271, "grad_norm": 0.5694824150779035, "learning_rate": 2.2147518192660892e-06, "loss": 0.0119, "step": 195470 }, { "epoch": 0.8156278425449174, "grad_norm": 0.4694834882559113, "learning_rate": 2.2147234936833757e-06, "loss": 0.0134, "step": 195475 }, { "epoch": 0.8156487052599077, "grad_norm": 0.7194345546939814, "learning_rate": 2.214695169187445e-06, "loss": 0.0185, "step": 195480 }, { "epoch": 0.815669567974898, "grad_norm": 0.5084340682926598, "learning_rate": 2.2146668457782277e-06, "loss": 0.0253, "step": 195485 }, { "epoch": 0.8156904306898882, "grad_norm": 0.19601088373268716, "learning_rate": 2.2146385234556538e-06, "loss": 0.0234, "step": 195490 }, { "epoch": 0.8157112934048786, "grad_norm": 0.42291510667506266, "learning_rate": 2.2146102022196546e-06, "loss": 0.0146, "step": 195495 }, { "epoch": 0.8157321561198688, "grad_norm": 0.27422398132299425, "learning_rate": 2.2145818820701607e-06, "loss": 0.0205, "step": 195500 }, { "epoch": 0.8157530188348591, "grad_norm": 0.7104567333893951, "learning_rate": 2.2145535630071017e-06, "loss": 0.0234, "step": 195505 }, { "epoch": 0.8157738815498494, "grad_norm": 0.8632463507638624, "learning_rate": 2.214525245030409e-06, "loss": 0.0203, "step": 195510 }, { "epoch": 0.8157947442648397, "grad_norm": 0.8673676271339323, "learning_rate": 2.214496928140013e-06, "loss": 0.0224, "step": 195515 }, { "epoch": 0.8158156069798299, "grad_norm": 0.41266478248865507, "learning_rate": 2.2144686123358437e-06, "loss": 0.0136, "step": 195520 }, { "epoch": 0.8158364696948202, "grad_norm": 1.0888153436426917, "learning_rate": 2.214440297617832e-06, "loss": 0.0214, "step": 195525 }, { "epoch": 0.8158573324098105, "grad_norm": 0.5806034815484762, "learning_rate": 2.2144119839859094e-06, "loss": 0.0211, "step": 195530 }, { "epoch": 0.8158781951248008, "grad_norm": 0.5757155783283184, "learning_rate": 2.214383671440005e-06, "loss": 0.0154, "step": 195535 }, { "epoch": 0.815899057839791, "grad_norm": 0.6926056488291171, "learning_rate": 2.21435535998005e-06, "loss": 0.0261, "step": 195540 }, { "epoch": 0.8159199205547814, "grad_norm": 0.5982285667433835, "learning_rate": 2.2143270496059745e-06, "loss": 0.0239, "step": 195545 }, { "epoch": 0.8159407832697716, "grad_norm": 0.9146969058002649, "learning_rate": 2.21429874031771e-06, "loss": 0.0216, "step": 195550 }, { "epoch": 0.8159616459847618, "grad_norm": 0.8684813355808101, "learning_rate": 2.2142704321151868e-06, "loss": 0.0127, "step": 195555 }, { "epoch": 0.8159825086997522, "grad_norm": 0.909094841676231, "learning_rate": 2.214242124998335e-06, "loss": 0.0272, "step": 195560 }, { "epoch": 0.8160033714147424, "grad_norm": 0.2938169313278884, "learning_rate": 2.2142138189670857e-06, "loss": 0.0168, "step": 195565 }, { "epoch": 0.8160242341297327, "grad_norm": 0.5807904232132751, "learning_rate": 2.214185514021369e-06, "loss": 0.0242, "step": 195570 }, { "epoch": 0.816045096844723, "grad_norm": 1.4875872022887588, "learning_rate": 2.214157210161116e-06, "loss": 0.0188, "step": 195575 }, { "epoch": 0.8160659595597133, "grad_norm": 0.6620982874417563, "learning_rate": 2.2141289073862572e-06, "loss": 0.0236, "step": 195580 }, { "epoch": 0.8160868222747035, "grad_norm": 0.3041315281624058, "learning_rate": 2.2141006056967236e-06, "loss": 0.0164, "step": 195585 }, { "epoch": 0.8161076849896938, "grad_norm": 0.7349933071478295, "learning_rate": 2.214072305092445e-06, "loss": 0.0205, "step": 195590 }, { "epoch": 0.8161285477046841, "grad_norm": 0.9925769388789393, "learning_rate": 2.2140440055733525e-06, "loss": 0.0252, "step": 195595 }, { "epoch": 0.8161494104196744, "grad_norm": 0.3191739964357361, "learning_rate": 2.214015707139377e-06, "loss": 0.018, "step": 195600 }, { "epoch": 0.8161702731346646, "grad_norm": 0.9357524383444554, "learning_rate": 2.213987409790449e-06, "loss": 0.0232, "step": 195605 }, { "epoch": 0.816191135849655, "grad_norm": 0.7906721368532508, "learning_rate": 2.2139591135264984e-06, "loss": 0.0231, "step": 195610 }, { "epoch": 0.8162119985646452, "grad_norm": 1.0615877730805499, "learning_rate": 2.213930818347457e-06, "loss": 0.0202, "step": 195615 }, { "epoch": 0.8162328612796355, "grad_norm": 0.6693960734812413, "learning_rate": 2.213902524253255e-06, "loss": 0.0185, "step": 195620 }, { "epoch": 0.8162537239946258, "grad_norm": 0.9602899028388823, "learning_rate": 2.213874231243823e-06, "loss": 0.0148, "step": 195625 }, { "epoch": 0.8162745867096161, "grad_norm": 0.5324357845504024, "learning_rate": 2.213845939319092e-06, "loss": 0.0185, "step": 195630 }, { "epoch": 0.8162954494246063, "grad_norm": 1.4233620222777785, "learning_rate": 2.213817648478992e-06, "loss": 0.0182, "step": 195635 }, { "epoch": 0.8163163121395965, "grad_norm": 0.638942283305766, "learning_rate": 2.2137893587234547e-06, "loss": 0.0187, "step": 195640 }, { "epoch": 0.8163371748545869, "grad_norm": 0.4873605263612596, "learning_rate": 2.2137610700524096e-06, "loss": 0.0249, "step": 195645 }, { "epoch": 0.8163580375695771, "grad_norm": 0.6486496135530777, "learning_rate": 2.2137327824657883e-06, "loss": 0.0209, "step": 195650 }, { "epoch": 0.8163789002845674, "grad_norm": 0.9143786783491635, "learning_rate": 2.2137044959635216e-06, "loss": 0.0232, "step": 195655 }, { "epoch": 0.8163997629995577, "grad_norm": 0.481396607071559, "learning_rate": 2.21367621054554e-06, "loss": 0.0219, "step": 195660 }, { "epoch": 0.816420625714548, "grad_norm": 1.2093165994496584, "learning_rate": 2.2136479262117738e-06, "loss": 0.0371, "step": 195665 }, { "epoch": 0.8164414884295382, "grad_norm": 1.0731403350687299, "learning_rate": 2.2136196429621542e-06, "loss": 0.0217, "step": 195670 }, { "epoch": 0.8164623511445286, "grad_norm": 1.025700012503298, "learning_rate": 2.213591360796612e-06, "loss": 0.0216, "step": 195675 }, { "epoch": 0.8164832138595188, "grad_norm": 0.8567979541482524, "learning_rate": 2.2135630797150774e-06, "loss": 0.0252, "step": 195680 }, { "epoch": 0.8165040765745091, "grad_norm": 0.6632640382583695, "learning_rate": 2.2135347997174816e-06, "loss": 0.0257, "step": 195685 }, { "epoch": 0.8165249392894994, "grad_norm": 0.9649627568260367, "learning_rate": 2.2135065208037555e-06, "loss": 0.0212, "step": 195690 }, { "epoch": 0.8165458020044897, "grad_norm": 0.8803518613712783, "learning_rate": 2.21347824297383e-06, "loss": 0.0173, "step": 195695 }, { "epoch": 0.8165666647194799, "grad_norm": 0.5352809366109468, "learning_rate": 2.213449966227635e-06, "loss": 0.0235, "step": 195700 }, { "epoch": 0.8165875274344702, "grad_norm": 0.5880227376404675, "learning_rate": 2.213421690565102e-06, "loss": 0.0142, "step": 195705 }, { "epoch": 0.8166083901494605, "grad_norm": 0.5793501442878487, "learning_rate": 2.2133934159861613e-06, "loss": 0.0201, "step": 195710 }, { "epoch": 0.8166292528644508, "grad_norm": 0.45344302970620487, "learning_rate": 2.213365142490744e-06, "loss": 0.0338, "step": 195715 }, { "epoch": 0.816650115579441, "grad_norm": 0.9474398648633603, "learning_rate": 2.213336870078781e-06, "loss": 0.0212, "step": 195720 }, { "epoch": 0.8166709782944314, "grad_norm": 0.7229227790670504, "learning_rate": 2.2133085987502032e-06, "loss": 0.0249, "step": 195725 }, { "epoch": 0.8166918410094216, "grad_norm": 1.1089889110516722, "learning_rate": 2.2132803285049416e-06, "loss": 0.0202, "step": 195730 }, { "epoch": 0.8167127037244118, "grad_norm": 0.42910969198182275, "learning_rate": 2.213252059342926e-06, "loss": 0.017, "step": 195735 }, { "epoch": 0.8167335664394022, "grad_norm": 0.48338584409382324, "learning_rate": 2.2132237912640874e-06, "loss": 0.0206, "step": 195740 }, { "epoch": 0.8167544291543924, "grad_norm": 0.6764264399365398, "learning_rate": 2.213195524268358e-06, "loss": 0.0226, "step": 195745 }, { "epoch": 0.8167752918693827, "grad_norm": 0.6951589092967542, "learning_rate": 2.2131672583556673e-06, "loss": 0.0166, "step": 195750 }, { "epoch": 0.816796154584373, "grad_norm": 0.5404007403412019, "learning_rate": 2.2131389935259464e-06, "loss": 0.0173, "step": 195755 }, { "epoch": 0.8168170172993633, "grad_norm": 0.6544621973424181, "learning_rate": 2.2131107297791266e-06, "loss": 0.0246, "step": 195760 }, { "epoch": 0.8168378800143535, "grad_norm": 0.4615079146864068, "learning_rate": 2.213082467115138e-06, "loss": 0.0197, "step": 195765 }, { "epoch": 0.8168587427293438, "grad_norm": 0.8494921020678946, "learning_rate": 2.2130542055339122e-06, "loss": 0.0235, "step": 195770 }, { "epoch": 0.8168796054443341, "grad_norm": 0.40218423105825446, "learning_rate": 2.2130259450353795e-06, "loss": 0.0193, "step": 195775 }, { "epoch": 0.8169004681593244, "grad_norm": 0.33060528116459725, "learning_rate": 2.212997685619472e-06, "loss": 0.0157, "step": 195780 }, { "epoch": 0.8169213308743146, "grad_norm": 0.6105360967033061, "learning_rate": 2.212969427286119e-06, "loss": 0.0253, "step": 195785 }, { "epoch": 0.816942193589305, "grad_norm": 0.5355253571791384, "learning_rate": 2.212941170035252e-06, "loss": 0.022, "step": 195790 }, { "epoch": 0.8169630563042952, "grad_norm": 0.382264887665253, "learning_rate": 2.2129129138668016e-06, "loss": 0.0173, "step": 195795 }, { "epoch": 0.8169839190192855, "grad_norm": 1.002555761488014, "learning_rate": 2.2128846587806997e-06, "loss": 0.0222, "step": 195800 }, { "epoch": 0.8170047817342758, "grad_norm": 0.6555350337565723, "learning_rate": 2.2128564047768756e-06, "loss": 0.0238, "step": 195805 }, { "epoch": 0.8170256444492661, "grad_norm": 1.0232292932696294, "learning_rate": 2.2128281518552614e-06, "loss": 0.0306, "step": 195810 }, { "epoch": 0.8170465071642563, "grad_norm": 0.1728998566417535, "learning_rate": 2.212799900015788e-06, "loss": 0.019, "step": 195815 }, { "epoch": 0.8170673698792466, "grad_norm": 0.8723318780468544, "learning_rate": 2.2127716492583863e-06, "loss": 0.0176, "step": 195820 }, { "epoch": 0.8170882325942369, "grad_norm": 0.2946600736654736, "learning_rate": 2.2127433995829865e-06, "loss": 0.0248, "step": 195825 }, { "epoch": 0.8171090953092272, "grad_norm": 0.6814809086791049, "learning_rate": 2.21271515098952e-06, "loss": 0.0214, "step": 195830 }, { "epoch": 0.8171299580242174, "grad_norm": 0.782519477929961, "learning_rate": 2.2126869034779185e-06, "loss": 0.0203, "step": 195835 }, { "epoch": 0.8171508207392078, "grad_norm": 0.7805871210827114, "learning_rate": 2.212658657048111e-06, "loss": 0.0268, "step": 195840 }, { "epoch": 0.817171683454198, "grad_norm": 0.7941555708412543, "learning_rate": 2.21263041170003e-06, "loss": 0.0208, "step": 195845 }, { "epoch": 0.8171925461691882, "grad_norm": 1.6002182819181086, "learning_rate": 2.212602167433607e-06, "loss": 0.0268, "step": 195850 }, { "epoch": 0.8172134088841786, "grad_norm": 0.8040286812516326, "learning_rate": 2.212573924248771e-06, "loss": 0.0225, "step": 195855 }, { "epoch": 0.8172342715991688, "grad_norm": 0.2371158671962265, "learning_rate": 2.2125456821454545e-06, "loss": 0.0234, "step": 195860 }, { "epoch": 0.8172551343141591, "grad_norm": 0.9241042017234707, "learning_rate": 2.2125174411235877e-06, "loss": 0.0279, "step": 195865 }, { "epoch": 0.8172759970291494, "grad_norm": 0.43875441291570416, "learning_rate": 2.2124892011831024e-06, "loss": 0.0207, "step": 195870 }, { "epoch": 0.8172968597441397, "grad_norm": 0.5320663827615476, "learning_rate": 2.212460962323929e-06, "loss": 0.03, "step": 195875 }, { "epoch": 0.8173177224591299, "grad_norm": 0.5724741062145361, "learning_rate": 2.2124327245459986e-06, "loss": 0.0168, "step": 195880 }, { "epoch": 0.8173385851741202, "grad_norm": 0.6178637588209845, "learning_rate": 2.2124044878492416e-06, "loss": 0.0182, "step": 195885 }, { "epoch": 0.8173594478891105, "grad_norm": 0.4251124360705838, "learning_rate": 2.2123762522335905e-06, "loss": 0.0181, "step": 195890 }, { "epoch": 0.8173803106041008, "grad_norm": 0.8766084159457737, "learning_rate": 2.212348017698975e-06, "loss": 0.0257, "step": 195895 }, { "epoch": 0.817401173319091, "grad_norm": 0.5291782250137175, "learning_rate": 2.2123197842453263e-06, "loss": 0.0244, "step": 195900 }, { "epoch": 0.8174220360340814, "grad_norm": 0.6751907097396039, "learning_rate": 2.2122915518725756e-06, "loss": 0.0176, "step": 195905 }, { "epoch": 0.8174428987490716, "grad_norm": 1.2667209508180814, "learning_rate": 2.2122633205806545e-06, "loss": 0.0177, "step": 195910 }, { "epoch": 0.8174637614640619, "grad_norm": 0.37487617207881285, "learning_rate": 2.2122350903694933e-06, "loss": 0.0168, "step": 195915 }, { "epoch": 0.8174846241790522, "grad_norm": 1.3616817468737976, "learning_rate": 2.2122068612390234e-06, "loss": 0.0354, "step": 195920 }, { "epoch": 0.8175054868940425, "grad_norm": 0.6746821895128118, "learning_rate": 2.2121786331891756e-06, "loss": 0.0231, "step": 195925 }, { "epoch": 0.8175263496090327, "grad_norm": 0.7309424756765531, "learning_rate": 2.2121504062198805e-06, "loss": 0.0195, "step": 195930 }, { "epoch": 0.817547212324023, "grad_norm": 0.4176741268172597, "learning_rate": 2.2121221803310704e-06, "loss": 0.0185, "step": 195935 }, { "epoch": 0.8175680750390133, "grad_norm": 0.5644191200554024, "learning_rate": 2.212093955522676e-06, "loss": 0.0177, "step": 195940 }, { "epoch": 0.8175889377540035, "grad_norm": 0.4691825089133814, "learning_rate": 2.2120657317946273e-06, "loss": 0.0145, "step": 195945 }, { "epoch": 0.8176098004689938, "grad_norm": 0.462433477900015, "learning_rate": 2.2120375091468566e-06, "loss": 0.025, "step": 195950 }, { "epoch": 0.8176306631839841, "grad_norm": 0.37203557582860386, "learning_rate": 2.2120092875792952e-06, "loss": 0.0175, "step": 195955 }, { "epoch": 0.8176515258989744, "grad_norm": 0.46835165793258937, "learning_rate": 2.2119810670918728e-06, "loss": 0.021, "step": 195960 }, { "epoch": 0.8176723886139646, "grad_norm": 0.5746348527482812, "learning_rate": 2.211952847684521e-06, "loss": 0.0132, "step": 195965 }, { "epoch": 0.817693251328955, "grad_norm": 0.9856161528465295, "learning_rate": 2.211924629357172e-06, "loss": 0.0222, "step": 195970 }, { "epoch": 0.8177141140439452, "grad_norm": 0.4410813923240846, "learning_rate": 2.2118964121097548e-06, "loss": 0.0178, "step": 195975 }, { "epoch": 0.8177349767589355, "grad_norm": 0.7104502057717627, "learning_rate": 2.211868195942203e-06, "loss": 0.0206, "step": 195980 }, { "epoch": 0.8177558394739258, "grad_norm": 0.426186810108847, "learning_rate": 2.2118399808544465e-06, "loss": 0.0163, "step": 195985 }, { "epoch": 0.8177767021889161, "grad_norm": 0.6642049127212081, "learning_rate": 2.2118117668464157e-06, "loss": 0.0167, "step": 195990 }, { "epoch": 0.8177975649039063, "grad_norm": 0.8583537492392371, "learning_rate": 2.2117835539180428e-06, "loss": 0.0169, "step": 195995 }, { "epoch": 0.8178184276188966, "grad_norm": 0.38122854343621704, "learning_rate": 2.211755342069259e-06, "loss": 0.0184, "step": 196000 }, { "epoch": 0.8178392903338869, "grad_norm": 0.6529463169151326, "learning_rate": 2.2117271312999947e-06, "loss": 0.0206, "step": 196005 }, { "epoch": 0.8178601530488772, "grad_norm": 0.7091381291886403, "learning_rate": 2.2116989216101815e-06, "loss": 0.0278, "step": 196010 }, { "epoch": 0.8178810157638674, "grad_norm": 0.45315966456747275, "learning_rate": 2.2116707129997504e-06, "loss": 0.0224, "step": 196015 }, { "epoch": 0.8179018784788578, "grad_norm": 0.603095285075047, "learning_rate": 2.2116425054686333e-06, "loss": 0.0233, "step": 196020 }, { "epoch": 0.817922741193848, "grad_norm": 0.49290850700021843, "learning_rate": 2.2116142990167603e-06, "loss": 0.0162, "step": 196025 }, { "epoch": 0.8179436039088382, "grad_norm": 0.2830222892464159, "learning_rate": 2.2115860936440627e-06, "loss": 0.0207, "step": 196030 }, { "epoch": 0.8179644666238286, "grad_norm": 0.2785296334450328, "learning_rate": 2.2115578893504723e-06, "loss": 0.0169, "step": 196035 }, { "epoch": 0.8179853293388188, "grad_norm": 0.7341979518136922, "learning_rate": 2.2115296861359202e-06, "loss": 0.0226, "step": 196040 }, { "epoch": 0.8180061920538091, "grad_norm": 0.5608813446984372, "learning_rate": 2.211501484000337e-06, "loss": 0.022, "step": 196045 }, { "epoch": 0.8180270547687994, "grad_norm": 0.32601614768041726, "learning_rate": 2.2114732829436546e-06, "loss": 0.0168, "step": 196050 }, { "epoch": 0.8180479174837897, "grad_norm": 0.7126891776051782, "learning_rate": 2.211445082965804e-06, "loss": 0.0187, "step": 196055 }, { "epoch": 0.8180687801987799, "grad_norm": 0.3526726147076503, "learning_rate": 2.2114168840667157e-06, "loss": 0.0174, "step": 196060 }, { "epoch": 0.8180896429137702, "grad_norm": 0.41029484153276535, "learning_rate": 2.2113886862463222e-06, "loss": 0.0205, "step": 196065 }, { "epoch": 0.8181105056287605, "grad_norm": 0.46669694017898145, "learning_rate": 2.211360489504554e-06, "loss": 0.0242, "step": 196070 }, { "epoch": 0.8181313683437508, "grad_norm": 0.7870897717322611, "learning_rate": 2.211332293841342e-06, "loss": 0.0205, "step": 196075 }, { "epoch": 0.818152231058741, "grad_norm": 0.7987543958884289, "learning_rate": 2.211304099256619e-06, "loss": 0.0183, "step": 196080 }, { "epoch": 0.8181730937737314, "grad_norm": 0.7426399911606194, "learning_rate": 2.2112759057503137e-06, "loss": 0.0266, "step": 196085 }, { "epoch": 0.8181939564887216, "grad_norm": 0.943291939246153, "learning_rate": 2.2112477133223594e-06, "loss": 0.0197, "step": 196090 }, { "epoch": 0.8182148192037119, "grad_norm": 0.48932783122844237, "learning_rate": 2.2112195219726866e-06, "loss": 0.0187, "step": 196095 }, { "epoch": 0.8182356819187022, "grad_norm": 0.5047049532753444, "learning_rate": 2.211191331701227e-06, "loss": 0.0153, "step": 196100 }, { "epoch": 0.8182565446336925, "grad_norm": 0.4552888227402362, "learning_rate": 2.211163142507911e-06, "loss": 0.0141, "step": 196105 }, { "epoch": 0.8182774073486827, "grad_norm": 0.45214848652651396, "learning_rate": 2.211134954392671e-06, "loss": 0.0225, "step": 196110 }, { "epoch": 0.8182982700636731, "grad_norm": 0.41041352882702853, "learning_rate": 2.2111067673554374e-06, "loss": 0.0237, "step": 196115 }, { "epoch": 0.8183191327786633, "grad_norm": 0.7050449051364938, "learning_rate": 2.2110785813961425e-06, "loss": 0.0148, "step": 196120 }, { "epoch": 0.8183399954936535, "grad_norm": 0.38033161084652223, "learning_rate": 2.211050396514716e-06, "loss": 0.02, "step": 196125 }, { "epoch": 0.8183608582086438, "grad_norm": 0.5303089171365805, "learning_rate": 2.2110222127110903e-06, "loss": 0.0297, "step": 196130 }, { "epoch": 0.8183817209236341, "grad_norm": 0.7028672055397202, "learning_rate": 2.2109940299851967e-06, "loss": 0.0175, "step": 196135 }, { "epoch": 0.8184025836386244, "grad_norm": 0.6926186235994926, "learning_rate": 2.210965848336967e-06, "loss": 0.0199, "step": 196140 }, { "epoch": 0.8184234463536146, "grad_norm": 0.597154030690073, "learning_rate": 2.210937667766331e-06, "loss": 0.0237, "step": 196145 }, { "epoch": 0.818444309068605, "grad_norm": 0.5402307929444069, "learning_rate": 2.2109094882732206e-06, "loss": 0.0431, "step": 196150 }, { "epoch": 0.8184651717835952, "grad_norm": 1.0836780253736094, "learning_rate": 2.2108813098575685e-06, "loss": 0.0201, "step": 196155 }, { "epoch": 0.8184860344985855, "grad_norm": 0.7163245840403155, "learning_rate": 2.2108531325193046e-06, "loss": 0.0207, "step": 196160 }, { "epoch": 0.8185068972135758, "grad_norm": 0.9740816212941682, "learning_rate": 2.21082495625836e-06, "loss": 0.021, "step": 196165 }, { "epoch": 0.8185277599285661, "grad_norm": 0.6975073548997074, "learning_rate": 2.2107967810746674e-06, "loss": 0.0263, "step": 196170 }, { "epoch": 0.8185486226435563, "grad_norm": 0.848719717259195, "learning_rate": 2.2107686069681573e-06, "loss": 0.0207, "step": 196175 }, { "epoch": 0.8185694853585466, "grad_norm": 0.5380033747446117, "learning_rate": 2.210740433938761e-06, "loss": 0.0198, "step": 196180 }, { "epoch": 0.8185903480735369, "grad_norm": 0.45177395934453796, "learning_rate": 2.2107122619864094e-06, "loss": 0.0264, "step": 196185 }, { "epoch": 0.8186112107885272, "grad_norm": 0.6860494325848162, "learning_rate": 2.2106840911110357e-06, "loss": 0.0236, "step": 196190 }, { "epoch": 0.8186320735035174, "grad_norm": 0.5233903642187591, "learning_rate": 2.2106559213125695e-06, "loss": 0.0148, "step": 196195 }, { "epoch": 0.8186529362185078, "grad_norm": 0.5406248185754463, "learning_rate": 2.210627752590943e-06, "loss": 0.0173, "step": 196200 }, { "epoch": 0.818673798933498, "grad_norm": 0.6476590499925527, "learning_rate": 2.2105995849460873e-06, "loss": 0.0162, "step": 196205 }, { "epoch": 0.8186946616484883, "grad_norm": 0.6851287317478956, "learning_rate": 2.2105714183779337e-06, "loss": 0.021, "step": 196210 }, { "epoch": 0.8187155243634786, "grad_norm": 1.0616360451070217, "learning_rate": 2.2105432528864145e-06, "loss": 0.0304, "step": 196215 }, { "epoch": 0.8187363870784689, "grad_norm": 0.928506795724653, "learning_rate": 2.2105150884714597e-06, "loss": 0.0126, "step": 196220 }, { "epoch": 0.8187572497934591, "grad_norm": 0.5236905149953494, "learning_rate": 2.210486925133002e-06, "loss": 0.0252, "step": 196225 }, { "epoch": 0.8187781125084495, "grad_norm": 0.49546891611470867, "learning_rate": 2.2104587628709718e-06, "loss": 0.0231, "step": 196230 }, { "epoch": 0.8187989752234397, "grad_norm": 0.9207471495018845, "learning_rate": 2.2104306016853013e-06, "loss": 0.0156, "step": 196235 }, { "epoch": 0.8188198379384299, "grad_norm": 0.8319525711788013, "learning_rate": 2.2104024415759217e-06, "loss": 0.0185, "step": 196240 }, { "epoch": 0.8188407006534202, "grad_norm": 0.48824214149186906, "learning_rate": 2.210374282542764e-06, "loss": 0.028, "step": 196245 }, { "epoch": 0.8188615633684105, "grad_norm": 0.5217086610502836, "learning_rate": 2.2103461245857604e-06, "loss": 0.0195, "step": 196250 }, { "epoch": 0.8188824260834008, "grad_norm": 0.4565359470858392, "learning_rate": 2.210317967704842e-06, "loss": 0.019, "step": 196255 }, { "epoch": 0.818903288798391, "grad_norm": 0.8465610687261013, "learning_rate": 2.21028981189994e-06, "loss": 0.0251, "step": 196260 }, { "epoch": 0.8189241515133814, "grad_norm": 0.7433539970550833, "learning_rate": 2.2102616571709863e-06, "loss": 0.0184, "step": 196265 }, { "epoch": 0.8189450142283716, "grad_norm": 1.1605821726504246, "learning_rate": 2.2102335035179116e-06, "loss": 0.0288, "step": 196270 }, { "epoch": 0.8189658769433619, "grad_norm": 0.6095324593533862, "learning_rate": 2.210205350940649e-06, "loss": 0.0254, "step": 196275 }, { "epoch": 0.8189867396583522, "grad_norm": 0.3711374010932467, "learning_rate": 2.210177199439128e-06, "loss": 0.0196, "step": 196280 }, { "epoch": 0.8190076023733425, "grad_norm": 0.7437422692980707, "learning_rate": 2.2101490490132817e-06, "loss": 0.0232, "step": 196285 }, { "epoch": 0.8190284650883327, "grad_norm": 0.7436679933736772, "learning_rate": 2.2101208996630412e-06, "loss": 0.0223, "step": 196290 }, { "epoch": 0.8190493278033231, "grad_norm": 0.29105250534394983, "learning_rate": 2.210092751388337e-06, "loss": 0.0182, "step": 196295 }, { "epoch": 0.8190701905183133, "grad_norm": 0.5236606128751286, "learning_rate": 2.2100646041891015e-06, "loss": 0.0212, "step": 196300 }, { "epoch": 0.8190910532333036, "grad_norm": 0.8480965080347193, "learning_rate": 2.2100364580652663e-06, "loss": 0.0218, "step": 196305 }, { "epoch": 0.8191119159482938, "grad_norm": 0.6286894988229497, "learning_rate": 2.2100083130167628e-06, "loss": 0.0179, "step": 196310 }, { "epoch": 0.8191327786632842, "grad_norm": 0.5984959480499608, "learning_rate": 2.2099801690435222e-06, "loss": 0.0195, "step": 196315 }, { "epoch": 0.8191536413782744, "grad_norm": 0.5830795537741701, "learning_rate": 2.2099520261454765e-06, "loss": 0.015, "step": 196320 }, { "epoch": 0.8191745040932646, "grad_norm": 0.3418842308411935, "learning_rate": 2.209923884322557e-06, "loss": 0.0148, "step": 196325 }, { "epoch": 0.819195366808255, "grad_norm": 0.377083838532601, "learning_rate": 2.209895743574695e-06, "loss": 0.0172, "step": 196330 }, { "epoch": 0.8192162295232452, "grad_norm": 1.0612844709610498, "learning_rate": 2.2098676039018225e-06, "loss": 0.0183, "step": 196335 }, { "epoch": 0.8192370922382355, "grad_norm": 0.5917224675808402, "learning_rate": 2.209839465303871e-06, "loss": 0.0203, "step": 196340 }, { "epoch": 0.8192579549532258, "grad_norm": 0.7354108043294867, "learning_rate": 2.2098113277807722e-06, "loss": 0.0178, "step": 196345 }, { "epoch": 0.8192788176682161, "grad_norm": 0.34909407590933833, "learning_rate": 2.2097831913324567e-06, "loss": 0.0275, "step": 196350 }, { "epoch": 0.8192996803832063, "grad_norm": 0.8303721133044355, "learning_rate": 2.209755055958857e-06, "loss": 0.024, "step": 196355 }, { "epoch": 0.8193205430981966, "grad_norm": 0.22621053494258928, "learning_rate": 2.2097269216599047e-06, "loss": 0.0172, "step": 196360 }, { "epoch": 0.8193414058131869, "grad_norm": 0.585927562128071, "learning_rate": 2.209698788435531e-06, "loss": 0.0188, "step": 196365 }, { "epoch": 0.8193622685281772, "grad_norm": 0.7986288633292389, "learning_rate": 2.2096706562856674e-06, "loss": 0.0277, "step": 196370 }, { "epoch": 0.8193831312431674, "grad_norm": 0.716376332758796, "learning_rate": 2.2096425252102463e-06, "loss": 0.0305, "step": 196375 }, { "epoch": 0.8194039939581578, "grad_norm": 0.46688879665590327, "learning_rate": 2.2096143952091987e-06, "loss": 0.0151, "step": 196380 }, { "epoch": 0.819424856673148, "grad_norm": 0.5186826758031201, "learning_rate": 2.209586266282456e-06, "loss": 0.0269, "step": 196385 }, { "epoch": 0.8194457193881383, "grad_norm": 0.7787576179685409, "learning_rate": 2.20955813842995e-06, "loss": 0.0203, "step": 196390 }, { "epoch": 0.8194665821031286, "grad_norm": 0.9113918939416727, "learning_rate": 2.2095300116516134e-06, "loss": 0.0309, "step": 196395 }, { "epoch": 0.8194874448181189, "grad_norm": 0.5147146140044634, "learning_rate": 2.209501885947376e-06, "loss": 0.0157, "step": 196400 }, { "epoch": 0.8195083075331091, "grad_norm": 0.40610790017195136, "learning_rate": 2.20947376131717e-06, "loss": 0.0203, "step": 196405 }, { "epoch": 0.8195291702480995, "grad_norm": 0.6463844961007515, "learning_rate": 2.2094456377609285e-06, "loss": 0.0237, "step": 196410 }, { "epoch": 0.8195500329630897, "grad_norm": 0.42497596478557936, "learning_rate": 2.209417515278581e-06, "loss": 0.0306, "step": 196415 }, { "epoch": 0.81957089567808, "grad_norm": 0.724848105968413, "learning_rate": 2.2093893938700607e-06, "loss": 0.0185, "step": 196420 }, { "epoch": 0.8195917583930702, "grad_norm": 0.2193865263359696, "learning_rate": 2.2093612735352986e-06, "loss": 0.0175, "step": 196425 }, { "epoch": 0.8196126211080605, "grad_norm": 0.47201569128025506, "learning_rate": 2.2093331542742264e-06, "loss": 0.0239, "step": 196430 }, { "epoch": 0.8196334838230508, "grad_norm": 0.444996959661699, "learning_rate": 2.209305036086776e-06, "loss": 0.0181, "step": 196435 }, { "epoch": 0.819654346538041, "grad_norm": 0.7177372455152983, "learning_rate": 2.2092769189728784e-06, "loss": 0.0217, "step": 196440 }, { "epoch": 0.8196752092530314, "grad_norm": 0.7524270213487255, "learning_rate": 2.209248802932467e-06, "loss": 0.0281, "step": 196445 }, { "epoch": 0.8196960719680216, "grad_norm": 0.6059774573422949, "learning_rate": 2.2092206879654716e-06, "loss": 0.0272, "step": 196450 }, { "epoch": 0.8197169346830119, "grad_norm": 0.3184970159264621, "learning_rate": 2.2091925740718245e-06, "loss": 0.0125, "step": 196455 }, { "epoch": 0.8197377973980022, "grad_norm": 0.5173836302614193, "learning_rate": 2.2091644612514575e-06, "loss": 0.02, "step": 196460 }, { "epoch": 0.8197586601129925, "grad_norm": 1.738950729194427, "learning_rate": 2.209136349504303e-06, "loss": 0.0218, "step": 196465 }, { "epoch": 0.8197795228279827, "grad_norm": 0.5679966671123058, "learning_rate": 2.209108238830292e-06, "loss": 0.0142, "step": 196470 }, { "epoch": 0.8198003855429731, "grad_norm": 0.5618298194000773, "learning_rate": 2.2090801292293557e-06, "loss": 0.0215, "step": 196475 }, { "epoch": 0.8198212482579633, "grad_norm": 0.6603731693917634, "learning_rate": 2.209052020701427e-06, "loss": 0.0244, "step": 196480 }, { "epoch": 0.8198421109729536, "grad_norm": 0.7213046262330051, "learning_rate": 2.2090239132464364e-06, "loss": 0.0231, "step": 196485 }, { "epoch": 0.8198629736879438, "grad_norm": 0.638151955993934, "learning_rate": 2.208995806864317e-06, "loss": 0.021, "step": 196490 }, { "epoch": 0.8198838364029342, "grad_norm": 0.29271697031117355, "learning_rate": 2.2089677015549993e-06, "loss": 0.0159, "step": 196495 }, { "epoch": 0.8199046991179244, "grad_norm": 0.2687937098619825, "learning_rate": 2.208939597318416e-06, "loss": 0.0145, "step": 196500 }, { "epoch": 0.8199255618329147, "grad_norm": 0.715845166637559, "learning_rate": 2.2089114941544985e-06, "loss": 0.0282, "step": 196505 }, { "epoch": 0.819946424547905, "grad_norm": 0.6370384281540316, "learning_rate": 2.208883392063178e-06, "loss": 0.0239, "step": 196510 }, { "epoch": 0.8199672872628953, "grad_norm": 0.5736671253841783, "learning_rate": 2.2088552910443873e-06, "loss": 0.0248, "step": 196515 }, { "epoch": 0.8199881499778855, "grad_norm": 0.9031629424991212, "learning_rate": 2.2088271910980576e-06, "loss": 0.019, "step": 196520 }, { "epoch": 0.8200090126928758, "grad_norm": 0.2731218374590563, "learning_rate": 2.2087990922241207e-06, "loss": 0.0161, "step": 196525 }, { "epoch": 0.8200298754078661, "grad_norm": 0.41517698524399965, "learning_rate": 2.208770994422509e-06, "loss": 0.0193, "step": 196530 }, { "epoch": 0.8200507381228563, "grad_norm": 0.2678862218680405, "learning_rate": 2.208742897693153e-06, "loss": 0.018, "step": 196535 }, { "epoch": 0.8200716008378466, "grad_norm": 0.4402179294980498, "learning_rate": 2.208714802035986e-06, "loss": 0.0215, "step": 196540 }, { "epoch": 0.8200924635528369, "grad_norm": 0.4258289245713762, "learning_rate": 2.208686707450938e-06, "loss": 0.019, "step": 196545 }, { "epoch": 0.8201133262678272, "grad_norm": 0.5046861808958948, "learning_rate": 2.2086586139379427e-06, "loss": 0.0234, "step": 196550 }, { "epoch": 0.8201341889828174, "grad_norm": 1.1205019335669433, "learning_rate": 2.208630521496931e-06, "loss": 0.0262, "step": 196555 }, { "epoch": 0.8201550516978078, "grad_norm": 0.8326910599227837, "learning_rate": 2.208602430127835e-06, "loss": 0.0184, "step": 196560 }, { "epoch": 0.820175914412798, "grad_norm": 0.29623915521189664, "learning_rate": 2.2085743398305862e-06, "loss": 0.0188, "step": 196565 }, { "epoch": 0.8201967771277883, "grad_norm": 0.34981240523648943, "learning_rate": 2.2085462506051165e-06, "loss": 0.0215, "step": 196570 }, { "epoch": 0.8202176398427786, "grad_norm": 0.7977924830231853, "learning_rate": 2.208518162451358e-06, "loss": 0.0195, "step": 196575 }, { "epoch": 0.8202385025577689, "grad_norm": 0.20132658473084689, "learning_rate": 2.208490075369242e-06, "loss": 0.0209, "step": 196580 }, { "epoch": 0.8202593652727591, "grad_norm": 0.6277448374278488, "learning_rate": 2.2084619893587017e-06, "loss": 0.0201, "step": 196585 }, { "epoch": 0.8202802279877495, "grad_norm": 0.41533261613871586, "learning_rate": 2.208433904419667e-06, "loss": 0.0137, "step": 196590 }, { "epoch": 0.8203010907027397, "grad_norm": 1.134333610241513, "learning_rate": 2.2084058205520718e-06, "loss": 0.0281, "step": 196595 }, { "epoch": 0.82032195341773, "grad_norm": 0.8094035707686464, "learning_rate": 2.2083777377558464e-06, "loss": 0.015, "step": 196600 }, { "epoch": 0.8203428161327202, "grad_norm": 1.134976334124304, "learning_rate": 2.2083496560309233e-06, "loss": 0.0252, "step": 196605 }, { "epoch": 0.8203636788477106, "grad_norm": 0.5414828672418954, "learning_rate": 2.2083215753772343e-06, "loss": 0.0166, "step": 196610 }, { "epoch": 0.8203845415627008, "grad_norm": 0.42267256757183347, "learning_rate": 2.2082934957947116e-06, "loss": 0.0225, "step": 196615 }, { "epoch": 0.820405404277691, "grad_norm": 0.7658374214763947, "learning_rate": 2.2082654172832867e-06, "loss": 0.0225, "step": 196620 }, { "epoch": 0.8204262669926814, "grad_norm": 0.8128027657616577, "learning_rate": 2.2082373398428917e-06, "loss": 0.0311, "step": 196625 }, { "epoch": 0.8204471297076716, "grad_norm": 0.7274944511837222, "learning_rate": 2.208209263473458e-06, "loss": 0.0176, "step": 196630 }, { "epoch": 0.8204679924226619, "grad_norm": 0.5897665641437263, "learning_rate": 2.208181188174919e-06, "loss": 0.0158, "step": 196635 }, { "epoch": 0.8204888551376522, "grad_norm": 0.7529505711335277, "learning_rate": 2.2081531139472047e-06, "loss": 0.021, "step": 196640 }, { "epoch": 0.8205097178526425, "grad_norm": 0.42998645986181117, "learning_rate": 2.2081250407902486e-06, "loss": 0.0177, "step": 196645 }, { "epoch": 0.8205305805676327, "grad_norm": 0.8879595396961927, "learning_rate": 2.208096968703982e-06, "loss": 0.0237, "step": 196650 }, { "epoch": 0.820551443282623, "grad_norm": 0.5134836896937145, "learning_rate": 2.208068897688336e-06, "loss": 0.0168, "step": 196655 }, { "epoch": 0.8205723059976133, "grad_norm": 1.075426669178996, "learning_rate": 2.2080408277432437e-06, "loss": 0.0234, "step": 196660 }, { "epoch": 0.8205931687126036, "grad_norm": 1.0509540848214816, "learning_rate": 2.208012758868637e-06, "loss": 0.0213, "step": 196665 }, { "epoch": 0.8206140314275938, "grad_norm": 1.2871139872549646, "learning_rate": 2.2079846910644477e-06, "loss": 0.0207, "step": 196670 }, { "epoch": 0.8206348941425842, "grad_norm": 1.052835853460149, "learning_rate": 2.2079566243306076e-06, "loss": 0.0192, "step": 196675 }, { "epoch": 0.8206557568575744, "grad_norm": 0.6211752637850186, "learning_rate": 2.2079285586670483e-06, "loss": 0.0388, "step": 196680 }, { "epoch": 0.8206766195725647, "grad_norm": 0.6517985758731552, "learning_rate": 2.2079004940737024e-06, "loss": 0.0184, "step": 196685 }, { "epoch": 0.820697482287555, "grad_norm": 0.5904370609655444, "learning_rate": 2.2078724305505018e-06, "loss": 0.019, "step": 196690 }, { "epoch": 0.8207183450025453, "grad_norm": 0.9268417548550753, "learning_rate": 2.207844368097378e-06, "loss": 0.0257, "step": 196695 }, { "epoch": 0.8207392077175355, "grad_norm": 0.429597151715563, "learning_rate": 2.2078163067142643e-06, "loss": 0.0147, "step": 196700 }, { "epoch": 0.8207600704325259, "grad_norm": 1.05565278542754, "learning_rate": 2.2077882464010906e-06, "loss": 0.0291, "step": 196705 }, { "epoch": 0.8207809331475161, "grad_norm": 0.727854994611326, "learning_rate": 2.2077601871577908e-06, "loss": 0.0255, "step": 196710 }, { "epoch": 0.8208017958625063, "grad_norm": 0.5738478081754339, "learning_rate": 2.207732128984296e-06, "loss": 0.0178, "step": 196715 }, { "epoch": 0.8208226585774966, "grad_norm": 0.6670852339137523, "learning_rate": 2.2077040718805387e-06, "loss": 0.0176, "step": 196720 }, { "epoch": 0.8208435212924869, "grad_norm": 0.7263407511240341, "learning_rate": 2.2076760158464506e-06, "loss": 0.0235, "step": 196725 }, { "epoch": 0.8208643840074772, "grad_norm": 0.9098438789089927, "learning_rate": 2.2076479608819635e-06, "loss": 0.0184, "step": 196730 }, { "epoch": 0.8208852467224674, "grad_norm": 0.5271367895981326, "learning_rate": 2.20761990698701e-06, "loss": 0.0197, "step": 196735 }, { "epoch": 0.8209061094374578, "grad_norm": 0.3829401749114852, "learning_rate": 2.2075918541615217e-06, "loss": 0.0147, "step": 196740 }, { "epoch": 0.820926972152448, "grad_norm": 0.5825362602451408, "learning_rate": 2.2075638024054315e-06, "loss": 0.0235, "step": 196745 }, { "epoch": 0.8209478348674383, "grad_norm": 0.30859886637351847, "learning_rate": 2.20753575171867e-06, "loss": 0.0236, "step": 196750 }, { "epoch": 0.8209686975824286, "grad_norm": 0.6812851170899525, "learning_rate": 2.2075077021011703e-06, "loss": 0.0224, "step": 196755 }, { "epoch": 0.8209895602974189, "grad_norm": 1.3558706386746027, "learning_rate": 2.207479653552864e-06, "loss": 0.0314, "step": 196760 }, { "epoch": 0.8210104230124091, "grad_norm": 0.49098206522119603, "learning_rate": 2.2074516060736834e-06, "loss": 0.0182, "step": 196765 }, { "epoch": 0.8210312857273995, "grad_norm": 0.47009751453732274, "learning_rate": 2.207423559663561e-06, "loss": 0.0215, "step": 196770 }, { "epoch": 0.8210521484423897, "grad_norm": 0.5452973061231516, "learning_rate": 2.207395514322428e-06, "loss": 0.019, "step": 196775 }, { "epoch": 0.82107301115738, "grad_norm": 0.728352737915317, "learning_rate": 2.207367470050218e-06, "loss": 0.0204, "step": 196780 }, { "epoch": 0.8210938738723702, "grad_norm": 0.6637468037619105, "learning_rate": 2.2073394268468607e-06, "loss": 0.0137, "step": 196785 }, { "epoch": 0.8211147365873606, "grad_norm": 1.0278353900708725, "learning_rate": 2.2073113847122906e-06, "loss": 0.0204, "step": 196790 }, { "epoch": 0.8211355993023508, "grad_norm": 0.29442811358329524, "learning_rate": 2.207283343646438e-06, "loss": 0.0167, "step": 196795 }, { "epoch": 0.821156462017341, "grad_norm": 1.3212777036388352, "learning_rate": 2.207255303649236e-06, "loss": 0.0238, "step": 196800 }, { "epoch": 0.8211773247323314, "grad_norm": 0.7888656363696706, "learning_rate": 2.207227264720617e-06, "loss": 0.0244, "step": 196805 }, { "epoch": 0.8211981874473216, "grad_norm": 1.0464144346255468, "learning_rate": 2.2071992268605115e-06, "loss": 0.022, "step": 196810 }, { "epoch": 0.8212190501623119, "grad_norm": 0.4020059406759136, "learning_rate": 2.2071711900688537e-06, "loss": 0.0161, "step": 196815 }, { "epoch": 0.8212399128773022, "grad_norm": 0.6731915605638831, "learning_rate": 2.207143154345575e-06, "loss": 0.0202, "step": 196820 }, { "epoch": 0.8212607755922925, "grad_norm": 0.6208592906860011, "learning_rate": 2.207115119690607e-06, "loss": 0.0245, "step": 196825 }, { "epoch": 0.8212816383072827, "grad_norm": 0.6575947673375084, "learning_rate": 2.207087086103882e-06, "loss": 0.022, "step": 196830 }, { "epoch": 0.821302501022273, "grad_norm": 0.29505816465863627, "learning_rate": 2.207059053585333e-06, "loss": 0.0196, "step": 196835 }, { "epoch": 0.8213233637372633, "grad_norm": 0.8351956404864609, "learning_rate": 2.2070310221348916e-06, "loss": 0.0229, "step": 196840 }, { "epoch": 0.8213442264522536, "grad_norm": 0.7859151507139275, "learning_rate": 2.2070029917524894e-06, "loss": 0.0218, "step": 196845 }, { "epoch": 0.8213650891672438, "grad_norm": 0.43967255626826335, "learning_rate": 2.2069749624380594e-06, "loss": 0.021, "step": 196850 }, { "epoch": 0.8213859518822342, "grad_norm": 0.3074695902488728, "learning_rate": 2.2069469341915334e-06, "loss": 0.0127, "step": 196855 }, { "epoch": 0.8214068145972244, "grad_norm": 0.6947034303920213, "learning_rate": 2.2069189070128436e-06, "loss": 0.0258, "step": 196860 }, { "epoch": 0.8214276773122147, "grad_norm": 0.34356297158341054, "learning_rate": 2.2068908809019223e-06, "loss": 0.0182, "step": 196865 }, { "epoch": 0.821448540027205, "grad_norm": 0.7415666647959699, "learning_rate": 2.2068628558587017e-06, "loss": 0.0245, "step": 196870 }, { "epoch": 0.8214694027421953, "grad_norm": 1.2555440002351037, "learning_rate": 2.2068348318831145e-06, "loss": 0.0294, "step": 196875 }, { "epoch": 0.8214902654571855, "grad_norm": 0.43834121596858644, "learning_rate": 2.206806808975092e-06, "loss": 0.0142, "step": 196880 }, { "epoch": 0.8215111281721759, "grad_norm": 0.7077907153494847, "learning_rate": 2.206778787134567e-06, "loss": 0.0255, "step": 196885 }, { "epoch": 0.8215319908871661, "grad_norm": 0.2907628719430663, "learning_rate": 2.206750766361471e-06, "loss": 0.0246, "step": 196890 }, { "epoch": 0.8215528536021564, "grad_norm": 0.40643494430030247, "learning_rate": 2.206722746655737e-06, "loss": 0.0241, "step": 196895 }, { "epoch": 0.8215737163171466, "grad_norm": 0.6715686697933595, "learning_rate": 2.206694728017297e-06, "loss": 0.0184, "step": 196900 }, { "epoch": 0.821594579032137, "grad_norm": 0.8556399047933643, "learning_rate": 2.2066667104460833e-06, "loss": 0.0157, "step": 196905 }, { "epoch": 0.8216154417471272, "grad_norm": 0.8720730995346591, "learning_rate": 2.206638693942029e-06, "loss": 0.0221, "step": 196910 }, { "epoch": 0.8216363044621174, "grad_norm": 0.6787334521302286, "learning_rate": 2.2066106785050643e-06, "loss": 0.0149, "step": 196915 }, { "epoch": 0.8216571671771078, "grad_norm": 0.7784536873491099, "learning_rate": 2.2065826641351233e-06, "loss": 0.0179, "step": 196920 }, { "epoch": 0.821678029892098, "grad_norm": 0.3904042570418423, "learning_rate": 2.2065546508321377e-06, "loss": 0.0196, "step": 196925 }, { "epoch": 0.8216988926070883, "grad_norm": 0.6550742310905794, "learning_rate": 2.206526638596039e-06, "loss": 0.0204, "step": 196930 }, { "epoch": 0.8217197553220786, "grad_norm": 1.1527654195949661, "learning_rate": 2.2064986274267608e-06, "loss": 0.0186, "step": 196935 }, { "epoch": 0.8217406180370689, "grad_norm": 0.8044908266860229, "learning_rate": 2.2064706173242345e-06, "loss": 0.0158, "step": 196940 }, { "epoch": 0.8217614807520591, "grad_norm": 0.8136885728921288, "learning_rate": 2.206442608288393e-06, "loss": 0.0299, "step": 196945 }, { "epoch": 0.8217823434670495, "grad_norm": 0.5599778990593101, "learning_rate": 2.2064146003191673e-06, "loss": 0.016, "step": 196950 }, { "epoch": 0.8218032061820397, "grad_norm": 1.2410105377815803, "learning_rate": 2.2063865934164917e-06, "loss": 0.0212, "step": 196955 }, { "epoch": 0.82182406889703, "grad_norm": 0.5663615688184066, "learning_rate": 2.206358587580297e-06, "loss": 0.0229, "step": 196960 }, { "epoch": 0.8218449316120202, "grad_norm": 0.5309428698363605, "learning_rate": 2.2063305828105162e-06, "loss": 0.0226, "step": 196965 }, { "epoch": 0.8218657943270106, "grad_norm": 0.6390744598563819, "learning_rate": 2.206302579107081e-06, "loss": 0.0184, "step": 196970 }, { "epoch": 0.8218866570420008, "grad_norm": 0.22735800177660323, "learning_rate": 2.2062745764699246e-06, "loss": 0.017, "step": 196975 }, { "epoch": 0.8219075197569911, "grad_norm": 0.9085764591350747, "learning_rate": 2.206246574898979e-06, "loss": 0.0225, "step": 196980 }, { "epoch": 0.8219283824719814, "grad_norm": 0.5109926201733567, "learning_rate": 2.2062185743941757e-06, "loss": 0.0176, "step": 196985 }, { "epoch": 0.8219492451869717, "grad_norm": 1.135715547451788, "learning_rate": 2.206190574955448e-06, "loss": 0.0315, "step": 196990 }, { "epoch": 0.8219701079019619, "grad_norm": 0.6592728660186348, "learning_rate": 2.206162576582728e-06, "loss": 0.02, "step": 196995 }, { "epoch": 0.8219909706169523, "grad_norm": 0.7029350381969466, "learning_rate": 2.206134579275948e-06, "loss": 0.0214, "step": 197000 }, { "epoch": 0.8220118333319425, "grad_norm": 0.7838438539060836, "learning_rate": 2.2061065830350407e-06, "loss": 0.0228, "step": 197005 }, { "epoch": 0.8220326960469327, "grad_norm": 0.5360668596833131, "learning_rate": 2.206078587859938e-06, "loss": 0.0169, "step": 197010 }, { "epoch": 0.822053558761923, "grad_norm": 0.6142032712723702, "learning_rate": 2.2060505937505734e-06, "loss": 0.0203, "step": 197015 }, { "epoch": 0.8220744214769133, "grad_norm": 0.5335574862676772, "learning_rate": 2.2060226007068773e-06, "loss": 0.0188, "step": 197020 }, { "epoch": 0.8220952841919036, "grad_norm": 0.3196793108363081, "learning_rate": 2.205994608728784e-06, "loss": 0.0228, "step": 197025 }, { "epoch": 0.8221161469068938, "grad_norm": 0.9407486972747138, "learning_rate": 2.2059666178162243e-06, "loss": 0.0175, "step": 197030 }, { "epoch": 0.8221370096218842, "grad_norm": 0.9029397138872693, "learning_rate": 2.2059386279691314e-06, "loss": 0.0262, "step": 197035 }, { "epoch": 0.8221578723368744, "grad_norm": 0.3582234676280025, "learning_rate": 2.205910639187438e-06, "loss": 0.0177, "step": 197040 }, { "epoch": 0.8221787350518647, "grad_norm": 0.7404422920435345, "learning_rate": 2.205882651471076e-06, "loss": 0.0232, "step": 197045 }, { "epoch": 0.822199597766855, "grad_norm": 0.6768549927388893, "learning_rate": 2.2058546648199777e-06, "loss": 0.015, "step": 197050 }, { "epoch": 0.8222204604818453, "grad_norm": 0.6502429762960705, "learning_rate": 2.205826679234076e-06, "loss": 0.0187, "step": 197055 }, { "epoch": 0.8222413231968355, "grad_norm": 1.812467960815342, "learning_rate": 2.2057986947133032e-06, "loss": 0.0259, "step": 197060 }, { "epoch": 0.8222621859118259, "grad_norm": 0.4881993353717799, "learning_rate": 2.2057707112575923e-06, "loss": 0.0228, "step": 197065 }, { "epoch": 0.8222830486268161, "grad_norm": 0.7768992852804184, "learning_rate": 2.205742728866874e-06, "loss": 0.0219, "step": 197070 }, { "epoch": 0.8223039113418064, "grad_norm": 0.6851267903280173, "learning_rate": 2.2057147475410827e-06, "loss": 0.0185, "step": 197075 }, { "epoch": 0.8223247740567966, "grad_norm": 0.5630586973199961, "learning_rate": 2.2056867672801497e-06, "loss": 0.0138, "step": 197080 }, { "epoch": 0.822345636771787, "grad_norm": 0.7363162746149347, "learning_rate": 2.205658788084008e-06, "loss": 0.0288, "step": 197085 }, { "epoch": 0.8223664994867772, "grad_norm": 0.5746793725619446, "learning_rate": 2.2056308099525893e-06, "loss": 0.0176, "step": 197090 }, { "epoch": 0.8223873622017674, "grad_norm": 0.6594622638417709, "learning_rate": 2.2056028328858277e-06, "loss": 0.023, "step": 197095 }, { "epoch": 0.8224082249167578, "grad_norm": 0.7812452224566073, "learning_rate": 2.2055748568836535e-06, "loss": 0.0286, "step": 197100 }, { "epoch": 0.822429087631748, "grad_norm": 0.8041123543752043, "learning_rate": 2.205546881946001e-06, "loss": 0.0215, "step": 197105 }, { "epoch": 0.8224499503467383, "grad_norm": 0.39720752509798374, "learning_rate": 2.2055189080728015e-06, "loss": 0.0155, "step": 197110 }, { "epoch": 0.8224708130617286, "grad_norm": 0.8782710126146408, "learning_rate": 2.205490935263988e-06, "loss": 0.0179, "step": 197115 }, { "epoch": 0.8224916757767189, "grad_norm": 0.7730799759120806, "learning_rate": 2.2054629635194937e-06, "loss": 0.0246, "step": 197120 }, { "epoch": 0.8225125384917091, "grad_norm": 0.7147341759487534, "learning_rate": 2.20543499283925e-06, "loss": 0.0139, "step": 197125 }, { "epoch": 0.8225334012066995, "grad_norm": 3.199060409758102, "learning_rate": 2.2054070232231896e-06, "loss": 0.0284, "step": 197130 }, { "epoch": 0.8225542639216897, "grad_norm": 0.3427332478953747, "learning_rate": 2.2053790546712454e-06, "loss": 0.0209, "step": 197135 }, { "epoch": 0.82257512663668, "grad_norm": 0.256916763450704, "learning_rate": 2.20535108718335e-06, "loss": 0.0154, "step": 197140 }, { "epoch": 0.8225959893516702, "grad_norm": 0.7221377255547471, "learning_rate": 2.205323120759435e-06, "loss": 0.023, "step": 197145 }, { "epoch": 0.8226168520666606, "grad_norm": 0.7247324145495545, "learning_rate": 2.2052951553994347e-06, "loss": 0.0179, "step": 197150 }, { "epoch": 0.8226377147816508, "grad_norm": 0.628559923058045, "learning_rate": 2.2052671911032795e-06, "loss": 0.0249, "step": 197155 }, { "epoch": 0.8226585774966411, "grad_norm": 0.8600882632820683, "learning_rate": 2.2052392278709034e-06, "loss": 0.0174, "step": 197160 }, { "epoch": 0.8226794402116314, "grad_norm": 0.4781643070908528, "learning_rate": 2.2052112657022388e-06, "loss": 0.0244, "step": 197165 }, { "epoch": 0.8227003029266217, "grad_norm": 0.4606464162142978, "learning_rate": 2.205183304597218e-06, "loss": 0.0206, "step": 197170 }, { "epoch": 0.8227211656416119, "grad_norm": 0.4292919118962385, "learning_rate": 2.2051553445557738e-06, "loss": 0.0194, "step": 197175 }, { "epoch": 0.8227420283566023, "grad_norm": 0.7393835876598234, "learning_rate": 2.2051273855778382e-06, "loss": 0.0201, "step": 197180 }, { "epoch": 0.8227628910715925, "grad_norm": 0.8461717223930005, "learning_rate": 2.2050994276633445e-06, "loss": 0.0235, "step": 197185 }, { "epoch": 0.8227837537865827, "grad_norm": 0.7624073483485716, "learning_rate": 2.2050714708122245e-06, "loss": 0.0258, "step": 197190 }, { "epoch": 0.822804616501573, "grad_norm": 0.5809433533852415, "learning_rate": 2.2050435150244115e-06, "loss": 0.025, "step": 197195 }, { "epoch": 0.8228254792165633, "grad_norm": 0.6474100941159815, "learning_rate": 2.2050155602998376e-06, "loss": 0.0156, "step": 197200 }, { "epoch": 0.8228463419315536, "grad_norm": 0.398603452259114, "learning_rate": 2.204987606638436e-06, "loss": 0.0189, "step": 197205 }, { "epoch": 0.8228672046465438, "grad_norm": 0.4633878480138187, "learning_rate": 2.2049596540401392e-06, "loss": 0.0255, "step": 197210 }, { "epoch": 0.8228880673615342, "grad_norm": 0.32110493920332295, "learning_rate": 2.2049317025048794e-06, "loss": 0.0203, "step": 197215 }, { "epoch": 0.8229089300765244, "grad_norm": 1.075488100871365, "learning_rate": 2.2049037520325895e-06, "loss": 0.0203, "step": 197220 }, { "epoch": 0.8229297927915147, "grad_norm": 0.641236030472462, "learning_rate": 2.204875802623202e-06, "loss": 0.023, "step": 197225 }, { "epoch": 0.822950655506505, "grad_norm": 0.9382721694986773, "learning_rate": 2.2048478542766492e-06, "loss": 0.0166, "step": 197230 }, { "epoch": 0.8229715182214953, "grad_norm": 0.4136003642617438, "learning_rate": 2.2048199069928645e-06, "loss": 0.0202, "step": 197235 }, { "epoch": 0.8229923809364855, "grad_norm": 0.33838161905617187, "learning_rate": 2.20479196077178e-06, "loss": 0.0204, "step": 197240 }, { "epoch": 0.8230132436514759, "grad_norm": 1.1730236628044277, "learning_rate": 2.204764015613328e-06, "loss": 0.0201, "step": 197245 }, { "epoch": 0.8230341063664661, "grad_norm": 0.5466755154638796, "learning_rate": 2.2047360715174426e-06, "loss": 0.0219, "step": 197250 }, { "epoch": 0.8230549690814564, "grad_norm": 0.8107758101464113, "learning_rate": 2.204708128484055e-06, "loss": 0.0223, "step": 197255 }, { "epoch": 0.8230758317964466, "grad_norm": 0.5896755469524941, "learning_rate": 2.2046801865130984e-06, "loss": 0.0209, "step": 197260 }, { "epoch": 0.823096694511437, "grad_norm": 0.9594239100174632, "learning_rate": 2.204652245604506e-06, "loss": 0.0243, "step": 197265 }, { "epoch": 0.8231175572264272, "grad_norm": 0.621776070642523, "learning_rate": 2.2046243057582097e-06, "loss": 0.0226, "step": 197270 }, { "epoch": 0.8231384199414175, "grad_norm": 0.5299720319180924, "learning_rate": 2.2045963669741423e-06, "loss": 0.0245, "step": 197275 }, { "epoch": 0.8231592826564078, "grad_norm": 0.4263956516379655, "learning_rate": 2.2045684292522364e-06, "loss": 0.0214, "step": 197280 }, { "epoch": 0.823180145371398, "grad_norm": 0.5224398406553347, "learning_rate": 2.2045404925924256e-06, "loss": 0.0237, "step": 197285 }, { "epoch": 0.8232010080863883, "grad_norm": 0.6820154517622617, "learning_rate": 2.2045125569946416e-06, "loss": 0.0264, "step": 197290 }, { "epoch": 0.8232218708013787, "grad_norm": 0.41218483296380304, "learning_rate": 2.2044846224588176e-06, "loss": 0.0259, "step": 197295 }, { "epoch": 0.8232427335163689, "grad_norm": 0.8445327213919664, "learning_rate": 2.204456688984886e-06, "loss": 0.0205, "step": 197300 }, { "epoch": 0.8232635962313591, "grad_norm": 0.6026902474179723, "learning_rate": 2.20442875657278e-06, "loss": 0.0151, "step": 197305 }, { "epoch": 0.8232844589463495, "grad_norm": 1.2501437202552763, "learning_rate": 2.204400825222432e-06, "loss": 0.0218, "step": 197310 }, { "epoch": 0.8233053216613397, "grad_norm": 0.9810638095650588, "learning_rate": 2.2043728949337744e-06, "loss": 0.0182, "step": 197315 }, { "epoch": 0.82332618437633, "grad_norm": 0.46549639890892025, "learning_rate": 2.2043449657067403e-06, "loss": 0.0215, "step": 197320 }, { "epoch": 0.8233470470913202, "grad_norm": 0.41600165500863834, "learning_rate": 2.204317037541263e-06, "loss": 0.0206, "step": 197325 }, { "epoch": 0.8233679098063106, "grad_norm": 0.47575045739874655, "learning_rate": 2.2042891104372743e-06, "loss": 0.024, "step": 197330 }, { "epoch": 0.8233887725213008, "grad_norm": 0.8254417451892161, "learning_rate": 2.2042611843947076e-06, "loss": 0.0207, "step": 197335 }, { "epoch": 0.8234096352362911, "grad_norm": 0.5919477433390647, "learning_rate": 2.2042332594134953e-06, "loss": 0.0206, "step": 197340 }, { "epoch": 0.8234304979512814, "grad_norm": 0.4144149745062115, "learning_rate": 2.2042053354935703e-06, "loss": 0.018, "step": 197345 }, { "epoch": 0.8234513606662717, "grad_norm": 0.9928416427352954, "learning_rate": 2.2041774126348654e-06, "loss": 0.0294, "step": 197350 }, { "epoch": 0.8234722233812619, "grad_norm": 0.5665756646986919, "learning_rate": 2.204149490837314e-06, "loss": 0.0176, "step": 197355 }, { "epoch": 0.8234930860962523, "grad_norm": 0.6111358756945695, "learning_rate": 2.2041215701008476e-06, "loss": 0.0325, "step": 197360 }, { "epoch": 0.8235139488112425, "grad_norm": 0.4599211688924482, "learning_rate": 2.2040936504254e-06, "loss": 0.0214, "step": 197365 }, { "epoch": 0.8235348115262328, "grad_norm": 0.5944475644676109, "learning_rate": 2.2040657318109037e-06, "loss": 0.0232, "step": 197370 }, { "epoch": 0.823555674241223, "grad_norm": 0.661775433329046, "learning_rate": 2.204037814257291e-06, "loss": 0.0242, "step": 197375 }, { "epoch": 0.8235765369562134, "grad_norm": 0.7995539738643862, "learning_rate": 2.2040098977644957e-06, "loss": 0.0223, "step": 197380 }, { "epoch": 0.8235973996712036, "grad_norm": 0.6392157193635908, "learning_rate": 2.20398198233245e-06, "loss": 0.017, "step": 197385 }, { "epoch": 0.8236182623861938, "grad_norm": 1.5807889673368083, "learning_rate": 2.2039540679610867e-06, "loss": 0.031, "step": 197390 }, { "epoch": 0.8236391251011842, "grad_norm": 0.6439770396643226, "learning_rate": 2.203926154650339e-06, "loss": 0.0243, "step": 197395 }, { "epoch": 0.8236599878161744, "grad_norm": 0.573332294924597, "learning_rate": 2.203898242400139e-06, "loss": 0.0173, "step": 197400 }, { "epoch": 0.8236808505311647, "grad_norm": 0.6865407465536546, "learning_rate": 2.20387033121042e-06, "loss": 0.0212, "step": 197405 }, { "epoch": 0.823701713246155, "grad_norm": 0.9325550396461718, "learning_rate": 2.203842421081116e-06, "loss": 0.029, "step": 197410 }, { "epoch": 0.8237225759611453, "grad_norm": 0.441709557782448, "learning_rate": 2.203814512012158e-06, "loss": 0.0227, "step": 197415 }, { "epoch": 0.8237434386761355, "grad_norm": 0.8685342393184923, "learning_rate": 2.2037866040034797e-06, "loss": 0.0261, "step": 197420 }, { "epoch": 0.8237643013911259, "grad_norm": 0.5647426488989155, "learning_rate": 2.203758697055014e-06, "loss": 0.0236, "step": 197425 }, { "epoch": 0.8237851641061161, "grad_norm": 0.5474358928291685, "learning_rate": 2.2037307911666937e-06, "loss": 0.0265, "step": 197430 }, { "epoch": 0.8238060268211064, "grad_norm": 0.6939861015790035, "learning_rate": 2.203702886338451e-06, "loss": 0.0183, "step": 197435 }, { "epoch": 0.8238268895360966, "grad_norm": 0.33370164256633456, "learning_rate": 2.20367498257022e-06, "loss": 0.0231, "step": 197440 }, { "epoch": 0.823847752251087, "grad_norm": 0.7233842934529715, "learning_rate": 2.203647079861933e-06, "loss": 0.0264, "step": 197445 }, { "epoch": 0.8238686149660772, "grad_norm": 0.3590562292772268, "learning_rate": 2.203619178213523e-06, "loss": 0.0297, "step": 197450 }, { "epoch": 0.8238894776810675, "grad_norm": 0.4803494739726995, "learning_rate": 2.2035912776249227e-06, "loss": 0.0194, "step": 197455 }, { "epoch": 0.8239103403960578, "grad_norm": 4.153337198009354, "learning_rate": 2.203563378096065e-06, "loss": 0.0307, "step": 197460 }, { "epoch": 0.8239312031110481, "grad_norm": 0.6685658027171368, "learning_rate": 2.203535479626883e-06, "loss": 0.0268, "step": 197465 }, { "epoch": 0.8239520658260383, "grad_norm": 0.865766878943576, "learning_rate": 2.2035075822173096e-06, "loss": 0.029, "step": 197470 }, { "epoch": 0.8239729285410287, "grad_norm": 0.7641632286616988, "learning_rate": 2.203479685867278e-06, "loss": 0.024, "step": 197475 }, { "epoch": 0.8239937912560189, "grad_norm": 0.5482278881591663, "learning_rate": 2.20345179057672e-06, "loss": 0.0211, "step": 197480 }, { "epoch": 0.8240146539710091, "grad_norm": 0.956546409310267, "learning_rate": 2.20342389634557e-06, "loss": 0.0187, "step": 197485 }, { "epoch": 0.8240355166859995, "grad_norm": 0.23565633565216373, "learning_rate": 2.2033960031737603e-06, "loss": 0.0177, "step": 197490 }, { "epoch": 0.8240563794009897, "grad_norm": 0.2939224941774512, "learning_rate": 2.2033681110612235e-06, "loss": 0.0225, "step": 197495 }, { "epoch": 0.82407724211598, "grad_norm": 0.6771297099865415, "learning_rate": 2.203340220007893e-06, "loss": 0.0181, "step": 197500 }, { "epoch": 0.8240981048309702, "grad_norm": 0.6655899347129285, "learning_rate": 2.2033123300137022e-06, "loss": 0.0284, "step": 197505 }, { "epoch": 0.8241189675459606, "grad_norm": 0.620215864937091, "learning_rate": 2.203284441078583e-06, "loss": 0.0177, "step": 197510 }, { "epoch": 0.8241398302609508, "grad_norm": 0.6293613171397607, "learning_rate": 2.2032565532024692e-06, "loss": 0.0263, "step": 197515 }, { "epoch": 0.8241606929759411, "grad_norm": 0.9752930706247853, "learning_rate": 2.203228666385293e-06, "loss": 0.0257, "step": 197520 }, { "epoch": 0.8241815556909314, "grad_norm": 0.4722768250031727, "learning_rate": 2.203200780626989e-06, "loss": 0.0183, "step": 197525 }, { "epoch": 0.8242024184059217, "grad_norm": 0.7507746953959309, "learning_rate": 2.203172895927488e-06, "loss": 0.0183, "step": 197530 }, { "epoch": 0.8242232811209119, "grad_norm": 1.1148365425390887, "learning_rate": 2.2031450122867244e-06, "loss": 0.0165, "step": 197535 }, { "epoch": 0.8242441438359023, "grad_norm": 0.846136436588763, "learning_rate": 2.203117129704631e-06, "loss": 0.0206, "step": 197540 }, { "epoch": 0.8242650065508925, "grad_norm": 1.1149967251515978, "learning_rate": 2.2030892481811406e-06, "loss": 0.0314, "step": 197545 }, { "epoch": 0.8242858692658828, "grad_norm": 0.38869850969437014, "learning_rate": 2.2030613677161865e-06, "loss": 0.0203, "step": 197550 }, { "epoch": 0.824306731980873, "grad_norm": 0.5678669795020941, "learning_rate": 2.2030334883097013e-06, "loss": 0.02, "step": 197555 }, { "epoch": 0.8243275946958634, "grad_norm": 0.4036527998204086, "learning_rate": 2.203005609961618e-06, "loss": 0.0281, "step": 197560 }, { "epoch": 0.8243484574108536, "grad_norm": 0.9380619541092603, "learning_rate": 2.2029777326718704e-06, "loss": 0.0155, "step": 197565 }, { "epoch": 0.8243693201258439, "grad_norm": 1.0651697537621052, "learning_rate": 2.2029498564403908e-06, "loss": 0.0259, "step": 197570 }, { "epoch": 0.8243901828408342, "grad_norm": 0.8057676126351115, "learning_rate": 2.2029219812671124e-06, "loss": 0.0183, "step": 197575 }, { "epoch": 0.8244110455558245, "grad_norm": 0.7518571416209359, "learning_rate": 2.2028941071519683e-06, "loss": 0.0242, "step": 197580 }, { "epoch": 0.8244319082708147, "grad_norm": 0.3312489799834812, "learning_rate": 2.2028662340948916e-06, "loss": 0.0173, "step": 197585 }, { "epoch": 0.824452770985805, "grad_norm": 0.4613011722165822, "learning_rate": 2.2028383620958155e-06, "loss": 0.0178, "step": 197590 }, { "epoch": 0.8244736337007953, "grad_norm": 1.0372826605517296, "learning_rate": 2.2028104911546732e-06, "loss": 0.0223, "step": 197595 }, { "epoch": 0.8244944964157855, "grad_norm": 0.505653575508473, "learning_rate": 2.202782621271397e-06, "loss": 0.0207, "step": 197600 }, { "epoch": 0.8245153591307759, "grad_norm": 0.6350495650313748, "learning_rate": 2.2027547524459206e-06, "loss": 0.0265, "step": 197605 }, { "epoch": 0.8245362218457661, "grad_norm": 0.6929281674888266, "learning_rate": 2.2027268846781776e-06, "loss": 0.017, "step": 197610 }, { "epoch": 0.8245570845607564, "grad_norm": 0.6168894613982363, "learning_rate": 2.2026990179680996e-06, "loss": 0.0265, "step": 197615 }, { "epoch": 0.8245779472757466, "grad_norm": 0.9645243222127944, "learning_rate": 2.2026711523156206e-06, "loss": 0.0141, "step": 197620 }, { "epoch": 0.824598809990737, "grad_norm": 0.9085620023173665, "learning_rate": 2.2026432877206737e-06, "loss": 0.0167, "step": 197625 }, { "epoch": 0.8246196727057272, "grad_norm": 0.3648233721773068, "learning_rate": 2.2026154241831924e-06, "loss": 0.0194, "step": 197630 }, { "epoch": 0.8246405354207175, "grad_norm": 1.5949338778452304, "learning_rate": 2.202587561703109e-06, "loss": 0.024, "step": 197635 }, { "epoch": 0.8246613981357078, "grad_norm": 0.1904072723713817, "learning_rate": 2.202559700280357e-06, "loss": 0.0117, "step": 197640 }, { "epoch": 0.8246822608506981, "grad_norm": 0.2667598421064009, "learning_rate": 2.2025318399148698e-06, "loss": 0.0223, "step": 197645 }, { "epoch": 0.8247031235656883, "grad_norm": 0.9133675911460131, "learning_rate": 2.2025039806065803e-06, "loss": 0.0247, "step": 197650 }, { "epoch": 0.8247239862806787, "grad_norm": 0.8885973366197903, "learning_rate": 2.2024761223554215e-06, "loss": 0.0215, "step": 197655 }, { "epoch": 0.8247448489956689, "grad_norm": 1.0886469425146086, "learning_rate": 2.2024482651613267e-06, "loss": 0.0271, "step": 197660 }, { "epoch": 0.8247657117106592, "grad_norm": 0.8824284584930072, "learning_rate": 2.2024204090242286e-06, "loss": 0.0212, "step": 197665 }, { "epoch": 0.8247865744256495, "grad_norm": 0.7946335910019079, "learning_rate": 2.202392553944061e-06, "loss": 0.0309, "step": 197670 }, { "epoch": 0.8248074371406398, "grad_norm": 0.7910576325302305, "learning_rate": 2.2023646999207572e-06, "loss": 0.0166, "step": 197675 }, { "epoch": 0.82482829985563, "grad_norm": 0.4509136018378991, "learning_rate": 2.2023368469542496e-06, "loss": 0.0168, "step": 197680 }, { "epoch": 0.8248491625706202, "grad_norm": 0.4987078720906852, "learning_rate": 2.202308995044472e-06, "loss": 0.0242, "step": 197685 }, { "epoch": 0.8248700252856106, "grad_norm": 1.4326186941841923, "learning_rate": 2.2022811441913573e-06, "loss": 0.0222, "step": 197690 }, { "epoch": 0.8248908880006008, "grad_norm": 0.9498517109574159, "learning_rate": 2.2022532943948387e-06, "loss": 0.0204, "step": 197695 }, { "epoch": 0.8249117507155911, "grad_norm": 0.32518269278992035, "learning_rate": 2.2022254456548494e-06, "loss": 0.014, "step": 197700 }, { "epoch": 0.8249326134305814, "grad_norm": 0.558050378595788, "learning_rate": 2.2021975979713227e-06, "loss": 0.0238, "step": 197705 }, { "epoch": 0.8249534761455717, "grad_norm": 0.5057825932455943, "learning_rate": 2.2021697513441918e-06, "loss": 0.0155, "step": 197710 }, { "epoch": 0.8249743388605619, "grad_norm": 0.4895074959766558, "learning_rate": 2.2021419057733893e-06, "loss": 0.018, "step": 197715 }, { "epoch": 0.8249952015755523, "grad_norm": 0.8044650761432833, "learning_rate": 2.20211406125885e-06, "loss": 0.0166, "step": 197720 }, { "epoch": 0.8250160642905425, "grad_norm": 0.4447859170329879, "learning_rate": 2.2020862178005058e-06, "loss": 0.0238, "step": 197725 }, { "epoch": 0.8250369270055328, "grad_norm": 0.5144499111244767, "learning_rate": 2.2020583753982897e-06, "loss": 0.0243, "step": 197730 }, { "epoch": 0.825057789720523, "grad_norm": 0.6527781478217358, "learning_rate": 2.202030534052136e-06, "loss": 0.0187, "step": 197735 }, { "epoch": 0.8250786524355134, "grad_norm": 0.5818057078428067, "learning_rate": 2.202002693761977e-06, "loss": 0.0216, "step": 197740 }, { "epoch": 0.8250995151505036, "grad_norm": 0.6180772706089505, "learning_rate": 2.2019748545277463e-06, "loss": 0.0141, "step": 197745 }, { "epoch": 0.8251203778654939, "grad_norm": 0.5810783156255448, "learning_rate": 2.2019470163493777e-06, "loss": 0.024, "step": 197750 }, { "epoch": 0.8251412405804842, "grad_norm": 0.4509093843107414, "learning_rate": 2.201919179226804e-06, "loss": 0.0168, "step": 197755 }, { "epoch": 0.8251621032954745, "grad_norm": 1.0002749013345968, "learning_rate": 2.201891343159958e-06, "loss": 0.0192, "step": 197760 }, { "epoch": 0.8251829660104647, "grad_norm": 0.5749944068101833, "learning_rate": 2.2018635081487735e-06, "loss": 0.017, "step": 197765 }, { "epoch": 0.8252038287254551, "grad_norm": 0.9863602066592434, "learning_rate": 2.2018356741931835e-06, "loss": 0.0197, "step": 197770 }, { "epoch": 0.8252246914404453, "grad_norm": 0.47498532724900694, "learning_rate": 2.201807841293122e-06, "loss": 0.0223, "step": 197775 }, { "epoch": 0.8252455541554355, "grad_norm": 0.7554102746156773, "learning_rate": 2.2017800094485214e-06, "loss": 0.0202, "step": 197780 }, { "epoch": 0.8252664168704259, "grad_norm": 0.5476997910371164, "learning_rate": 2.201752178659315e-06, "loss": 0.024, "step": 197785 }, { "epoch": 0.8252872795854161, "grad_norm": 1.9408965264700437, "learning_rate": 2.201724348925437e-06, "loss": 0.0269, "step": 197790 }, { "epoch": 0.8253081423004064, "grad_norm": 0.7642998956985617, "learning_rate": 2.20169652024682e-06, "loss": 0.0204, "step": 197795 }, { "epoch": 0.8253290050153966, "grad_norm": 0.9593281896462081, "learning_rate": 2.2016686926233976e-06, "loss": 0.0237, "step": 197800 }, { "epoch": 0.825349867730387, "grad_norm": 0.5506762371881547, "learning_rate": 2.201640866055102e-06, "loss": 0.0218, "step": 197805 }, { "epoch": 0.8253707304453772, "grad_norm": 0.6388300958659835, "learning_rate": 2.2016130405418683e-06, "loss": 0.0196, "step": 197810 }, { "epoch": 0.8253915931603675, "grad_norm": 0.5951512836920835, "learning_rate": 2.2015852160836296e-06, "loss": 0.0254, "step": 197815 }, { "epoch": 0.8254124558753578, "grad_norm": 0.8356823413650227, "learning_rate": 2.2015573926803176e-06, "loss": 0.0208, "step": 197820 }, { "epoch": 0.8254333185903481, "grad_norm": 1.2709908094842606, "learning_rate": 2.2015295703318674e-06, "loss": 0.0218, "step": 197825 }, { "epoch": 0.8254541813053383, "grad_norm": 0.8780569664769269, "learning_rate": 2.2015017490382117e-06, "loss": 0.0182, "step": 197830 }, { "epoch": 0.8254750440203287, "grad_norm": 0.20029157129144207, "learning_rate": 2.201473928799284e-06, "loss": 0.0145, "step": 197835 }, { "epoch": 0.8254959067353189, "grad_norm": 0.9441713762438516, "learning_rate": 2.2014461096150167e-06, "loss": 0.0268, "step": 197840 }, { "epoch": 0.8255167694503092, "grad_norm": 0.6188634547008385, "learning_rate": 2.2014182914853437e-06, "loss": 0.0187, "step": 197845 }, { "epoch": 0.8255376321652995, "grad_norm": 0.5739855328300477, "learning_rate": 2.2013904744101996e-06, "loss": 0.0189, "step": 197850 }, { "epoch": 0.8255584948802898, "grad_norm": 0.7248266738023306, "learning_rate": 2.201362658389516e-06, "loss": 0.0193, "step": 197855 }, { "epoch": 0.82557935759528, "grad_norm": 0.7755723014316381, "learning_rate": 2.2013348434232275e-06, "loss": 0.0293, "step": 197860 }, { "epoch": 0.8256002203102702, "grad_norm": 0.8440774747928369, "learning_rate": 2.201307029511267e-06, "loss": 0.0291, "step": 197865 }, { "epoch": 0.8256210830252606, "grad_norm": 0.6990683346076662, "learning_rate": 2.201279216653568e-06, "loss": 0.026, "step": 197870 }, { "epoch": 0.8256419457402508, "grad_norm": 0.632028329002329, "learning_rate": 2.201251404850063e-06, "loss": 0.018, "step": 197875 }, { "epoch": 0.8256628084552411, "grad_norm": 0.7026320112341525, "learning_rate": 2.2012235941006873e-06, "loss": 0.0186, "step": 197880 }, { "epoch": 0.8256836711702314, "grad_norm": 0.780364390302983, "learning_rate": 2.201195784405373e-06, "loss": 0.0173, "step": 197885 }, { "epoch": 0.8257045338852217, "grad_norm": 0.5264951544756433, "learning_rate": 2.2011679757640535e-06, "loss": 0.0193, "step": 197890 }, { "epoch": 0.8257253966002119, "grad_norm": 1.047482327875865, "learning_rate": 2.2011401681766627e-06, "loss": 0.0188, "step": 197895 }, { "epoch": 0.8257462593152023, "grad_norm": 0.9787420627671054, "learning_rate": 2.201112361643134e-06, "loss": 0.0171, "step": 197900 }, { "epoch": 0.8257671220301925, "grad_norm": 0.589314364897647, "learning_rate": 2.2010845561634006e-06, "loss": 0.0189, "step": 197905 }, { "epoch": 0.8257879847451828, "grad_norm": 1.1574866181938306, "learning_rate": 2.201056751737396e-06, "loss": 0.0168, "step": 197910 }, { "epoch": 0.825808847460173, "grad_norm": 0.45404289254014535, "learning_rate": 2.2010289483650536e-06, "loss": 0.0172, "step": 197915 }, { "epoch": 0.8258297101751634, "grad_norm": 0.5837440115232688, "learning_rate": 2.2010011460463068e-06, "loss": 0.0215, "step": 197920 }, { "epoch": 0.8258505728901536, "grad_norm": 0.6299540212848849, "learning_rate": 2.200973344781089e-06, "loss": 0.0177, "step": 197925 }, { "epoch": 0.8258714356051439, "grad_norm": 0.40222258974317276, "learning_rate": 2.200945544569334e-06, "loss": 0.0183, "step": 197930 }, { "epoch": 0.8258922983201342, "grad_norm": 0.37406757515272987, "learning_rate": 2.2009177454109756e-06, "loss": 0.0257, "step": 197935 }, { "epoch": 0.8259131610351245, "grad_norm": 0.6090138039241274, "learning_rate": 2.2008899473059462e-06, "loss": 0.018, "step": 197940 }, { "epoch": 0.8259340237501147, "grad_norm": 0.5581904898498751, "learning_rate": 2.20086215025418e-06, "loss": 0.023, "step": 197945 }, { "epoch": 0.8259548864651051, "grad_norm": 0.49599871533046724, "learning_rate": 2.2008343542556105e-06, "loss": 0.0184, "step": 197950 }, { "epoch": 0.8259757491800953, "grad_norm": 0.8297541717557273, "learning_rate": 2.2008065593101707e-06, "loss": 0.0181, "step": 197955 }, { "epoch": 0.8259966118950856, "grad_norm": 0.7464169141003728, "learning_rate": 2.200778765417795e-06, "loss": 0.0235, "step": 197960 }, { "epoch": 0.8260174746100759, "grad_norm": 0.5175616194847394, "learning_rate": 2.200750972578416e-06, "loss": 0.0167, "step": 197965 }, { "epoch": 0.8260383373250662, "grad_norm": 0.5796120761540641, "learning_rate": 2.2007231807919675e-06, "loss": 0.0217, "step": 197970 }, { "epoch": 0.8260592000400564, "grad_norm": 0.5591430687942772, "learning_rate": 2.2006953900583834e-06, "loss": 0.017, "step": 197975 }, { "epoch": 0.8260800627550466, "grad_norm": 0.44895338853705324, "learning_rate": 2.200667600377597e-06, "loss": 0.0239, "step": 197980 }, { "epoch": 0.826100925470037, "grad_norm": 0.3551426489180324, "learning_rate": 2.200639811749541e-06, "loss": 0.0168, "step": 197985 }, { "epoch": 0.8261217881850272, "grad_norm": 0.6476552261581842, "learning_rate": 2.2006120241741505e-06, "loss": 0.0176, "step": 197990 }, { "epoch": 0.8261426509000175, "grad_norm": 0.5438999543531977, "learning_rate": 2.2005842376513574e-06, "loss": 0.023, "step": 197995 }, { "epoch": 0.8261635136150078, "grad_norm": 0.9049797189767815, "learning_rate": 2.2005564521810966e-06, "loss": 0.0247, "step": 198000 }, { "epoch": 0.8261843763299981, "grad_norm": 0.6790577820527602, "learning_rate": 2.200528667763301e-06, "loss": 0.0243, "step": 198005 }, { "epoch": 0.8262052390449883, "grad_norm": 0.5780979373339618, "learning_rate": 2.200500884397904e-06, "loss": 0.0129, "step": 198010 }, { "epoch": 0.8262261017599787, "grad_norm": 0.9026515407333972, "learning_rate": 2.20047310208484e-06, "loss": 0.0233, "step": 198015 }, { "epoch": 0.8262469644749689, "grad_norm": 0.41421744011365097, "learning_rate": 2.200445320824042e-06, "loss": 0.018, "step": 198020 }, { "epoch": 0.8262678271899592, "grad_norm": 1.3330148144583842, "learning_rate": 2.2004175406154434e-06, "loss": 0.0194, "step": 198025 }, { "epoch": 0.8262886899049495, "grad_norm": 1.0716274248621573, "learning_rate": 2.2003897614589777e-06, "loss": 0.0213, "step": 198030 }, { "epoch": 0.8263095526199398, "grad_norm": 0.42259650588210707, "learning_rate": 2.200361983354579e-06, "loss": 0.0155, "step": 198035 }, { "epoch": 0.82633041533493, "grad_norm": 1.6692027228345787, "learning_rate": 2.200334206302181e-06, "loss": 0.0253, "step": 198040 }, { "epoch": 0.8263512780499203, "grad_norm": 0.5620935123098956, "learning_rate": 2.2003064303017163e-06, "loss": 0.0173, "step": 198045 }, { "epoch": 0.8263721407649106, "grad_norm": 0.5229818604442772, "learning_rate": 2.2002786553531192e-06, "loss": 0.0213, "step": 198050 }, { "epoch": 0.8263930034799009, "grad_norm": 1.346125740269504, "learning_rate": 2.2002508814563237e-06, "loss": 0.0258, "step": 198055 }, { "epoch": 0.8264138661948911, "grad_norm": 0.5597436481570454, "learning_rate": 2.200223108611263e-06, "loss": 0.0181, "step": 198060 }, { "epoch": 0.8264347289098815, "grad_norm": 0.9256223728850203, "learning_rate": 2.20019533681787e-06, "loss": 0.0179, "step": 198065 }, { "epoch": 0.8264555916248717, "grad_norm": 0.5235656395587379, "learning_rate": 2.20016756607608e-06, "loss": 0.0221, "step": 198070 }, { "epoch": 0.8264764543398619, "grad_norm": 0.517189172166464, "learning_rate": 2.2001397963858255e-06, "loss": 0.0238, "step": 198075 }, { "epoch": 0.8264973170548523, "grad_norm": 0.5766866042884361, "learning_rate": 2.20011202774704e-06, "loss": 0.0185, "step": 198080 }, { "epoch": 0.8265181797698425, "grad_norm": 0.5967712375055376, "learning_rate": 2.200084260159657e-06, "loss": 0.0203, "step": 198085 }, { "epoch": 0.8265390424848328, "grad_norm": 0.42303662576258516, "learning_rate": 2.2000564936236114e-06, "loss": 0.0169, "step": 198090 }, { "epoch": 0.826559905199823, "grad_norm": 0.8310451607559822, "learning_rate": 2.2000287281388356e-06, "loss": 0.0181, "step": 198095 }, { "epoch": 0.8265807679148134, "grad_norm": 0.3557765746635941, "learning_rate": 2.2000009637052645e-06, "loss": 0.0194, "step": 198100 }, { "epoch": 0.8266016306298036, "grad_norm": 0.778563474900517, "learning_rate": 2.1999732003228304e-06, "loss": 0.0215, "step": 198105 }, { "epoch": 0.8266224933447939, "grad_norm": 0.4150487610216393, "learning_rate": 2.1999454379914676e-06, "loss": 0.0219, "step": 198110 }, { "epoch": 0.8266433560597842, "grad_norm": 0.426249650481078, "learning_rate": 2.1999176767111096e-06, "loss": 0.0199, "step": 198115 }, { "epoch": 0.8266642187747745, "grad_norm": 0.9142563461773547, "learning_rate": 2.1998899164816904e-06, "loss": 0.0152, "step": 198120 }, { "epoch": 0.8266850814897647, "grad_norm": 0.6485506687412872, "learning_rate": 2.1998621573031435e-06, "loss": 0.0164, "step": 198125 }, { "epoch": 0.8267059442047551, "grad_norm": 0.6808520132175054, "learning_rate": 2.1998343991754028e-06, "loss": 0.0174, "step": 198130 }, { "epoch": 0.8267268069197453, "grad_norm": 0.689160319854281, "learning_rate": 2.199806642098402e-06, "loss": 0.0213, "step": 198135 }, { "epoch": 0.8267476696347356, "grad_norm": 2.1168097535664776, "learning_rate": 2.199778886072075e-06, "loss": 0.0157, "step": 198140 }, { "epoch": 0.8267685323497259, "grad_norm": 0.5966664869804044, "learning_rate": 2.1997511310963546e-06, "loss": 0.0221, "step": 198145 }, { "epoch": 0.8267893950647162, "grad_norm": 1.2974392266376642, "learning_rate": 2.199723377171175e-06, "loss": 0.0218, "step": 198150 }, { "epoch": 0.8268102577797064, "grad_norm": 0.5172949650409412, "learning_rate": 2.1996956242964705e-06, "loss": 0.0213, "step": 198155 }, { "epoch": 0.8268311204946966, "grad_norm": 0.3923707199356325, "learning_rate": 2.199667872472174e-06, "loss": 0.0228, "step": 198160 }, { "epoch": 0.826851983209687, "grad_norm": 1.2833530489178924, "learning_rate": 2.1996401216982197e-06, "loss": 0.0268, "step": 198165 }, { "epoch": 0.8268728459246772, "grad_norm": 0.37836649207340395, "learning_rate": 2.1996123719745412e-06, "loss": 0.0271, "step": 198170 }, { "epoch": 0.8268937086396675, "grad_norm": 0.8225953692756163, "learning_rate": 2.1995846233010727e-06, "loss": 0.0256, "step": 198175 }, { "epoch": 0.8269145713546578, "grad_norm": 0.43995486729183897, "learning_rate": 2.199556875677747e-06, "loss": 0.0235, "step": 198180 }, { "epoch": 0.8269354340696481, "grad_norm": 0.8057430364429587, "learning_rate": 2.199529129104499e-06, "loss": 0.0152, "step": 198185 }, { "epoch": 0.8269562967846383, "grad_norm": 0.6066952127782884, "learning_rate": 2.1995013835812614e-06, "loss": 0.029, "step": 198190 }, { "epoch": 0.8269771594996287, "grad_norm": 0.5861998444443933, "learning_rate": 2.1994736391079692e-06, "loss": 0.0226, "step": 198195 }, { "epoch": 0.8269980222146189, "grad_norm": 1.2590986244193938, "learning_rate": 2.1994458956845545e-06, "loss": 0.025, "step": 198200 }, { "epoch": 0.8270188849296092, "grad_norm": 0.5875518123750415, "learning_rate": 2.1994181533109527e-06, "loss": 0.0273, "step": 198205 }, { "epoch": 0.8270397476445995, "grad_norm": 0.7542002493658131, "learning_rate": 2.199390411987097e-06, "loss": 0.0209, "step": 198210 }, { "epoch": 0.8270606103595898, "grad_norm": 0.5651243345280151, "learning_rate": 2.199362671712921e-06, "loss": 0.0291, "step": 198215 }, { "epoch": 0.82708147307458, "grad_norm": 0.36062828519474655, "learning_rate": 2.1993349324883585e-06, "loss": 0.0174, "step": 198220 }, { "epoch": 0.8271023357895703, "grad_norm": 0.8305952909198, "learning_rate": 2.1993071943133436e-06, "loss": 0.0251, "step": 198225 }, { "epoch": 0.8271231985045606, "grad_norm": 0.6016746172709581, "learning_rate": 2.1992794571878103e-06, "loss": 0.022, "step": 198230 }, { "epoch": 0.8271440612195509, "grad_norm": 1.2009953068085533, "learning_rate": 2.1992517211116914e-06, "loss": 0.0218, "step": 198235 }, { "epoch": 0.8271649239345411, "grad_norm": 0.45513186462548827, "learning_rate": 2.199223986084922e-06, "loss": 0.0159, "step": 198240 }, { "epoch": 0.8271857866495315, "grad_norm": 0.4593564542483654, "learning_rate": 2.199196252107435e-06, "loss": 0.0215, "step": 198245 }, { "epoch": 0.8272066493645217, "grad_norm": 1.0332149640361605, "learning_rate": 2.1991685191791647e-06, "loss": 0.023, "step": 198250 }, { "epoch": 0.827227512079512, "grad_norm": 0.4499175964363302, "learning_rate": 2.199140787300045e-06, "loss": 0.0146, "step": 198255 }, { "epoch": 0.8272483747945023, "grad_norm": 0.6465937906150168, "learning_rate": 2.1991130564700094e-06, "loss": 0.0242, "step": 198260 }, { "epoch": 0.8272692375094925, "grad_norm": 0.40827800226940386, "learning_rate": 2.199085326688992e-06, "loss": 0.0279, "step": 198265 }, { "epoch": 0.8272901002244828, "grad_norm": 0.5933343344057072, "learning_rate": 2.1990575979569266e-06, "loss": 0.023, "step": 198270 }, { "epoch": 0.827310962939473, "grad_norm": 0.4650182957087968, "learning_rate": 2.1990298702737473e-06, "loss": 0.0211, "step": 198275 }, { "epoch": 0.8273318256544634, "grad_norm": 0.9312070068019624, "learning_rate": 2.1990021436393878e-06, "loss": 0.0182, "step": 198280 }, { "epoch": 0.8273526883694536, "grad_norm": 0.872549383831699, "learning_rate": 2.1989744180537817e-06, "loss": 0.0244, "step": 198285 }, { "epoch": 0.8273735510844439, "grad_norm": 0.624139144186521, "learning_rate": 2.1989466935168634e-06, "loss": 0.0257, "step": 198290 }, { "epoch": 0.8273944137994342, "grad_norm": 0.5093589408379017, "learning_rate": 2.1989189700285664e-06, "loss": 0.019, "step": 198295 }, { "epoch": 0.8274152765144245, "grad_norm": 0.5415700912491415, "learning_rate": 2.1988912475888246e-06, "loss": 0.0173, "step": 198300 }, { "epoch": 0.8274361392294147, "grad_norm": 0.80558423698983, "learning_rate": 2.198863526197572e-06, "loss": 0.0169, "step": 198305 }, { "epoch": 0.8274570019444051, "grad_norm": 0.5552704860336501, "learning_rate": 2.198835805854743e-06, "loss": 0.0156, "step": 198310 }, { "epoch": 0.8274778646593953, "grad_norm": 0.8574930726628771, "learning_rate": 2.1988080865602703e-06, "loss": 0.0228, "step": 198315 }, { "epoch": 0.8274987273743856, "grad_norm": 0.3241439947358929, "learning_rate": 2.1987803683140895e-06, "loss": 0.0182, "step": 198320 }, { "epoch": 0.8275195900893759, "grad_norm": 0.5472383257244793, "learning_rate": 2.1987526511161323e-06, "loss": 0.0278, "step": 198325 }, { "epoch": 0.8275404528043662, "grad_norm": 0.8135993751543716, "learning_rate": 2.1987249349663357e-06, "loss": 0.0224, "step": 198330 }, { "epoch": 0.8275613155193564, "grad_norm": 0.5975922058127483, "learning_rate": 2.1986972198646305e-06, "loss": 0.022, "step": 198335 }, { "epoch": 0.8275821782343467, "grad_norm": 0.5477645602220166, "learning_rate": 2.1986695058109523e-06, "loss": 0.0181, "step": 198340 }, { "epoch": 0.827603040949337, "grad_norm": 0.7816186779008901, "learning_rate": 2.1986417928052352e-06, "loss": 0.023, "step": 198345 }, { "epoch": 0.8276239036643273, "grad_norm": 0.6021593157919953, "learning_rate": 2.1986140808474127e-06, "loss": 0.0217, "step": 198350 }, { "epoch": 0.8276447663793175, "grad_norm": 0.34523410933916215, "learning_rate": 2.198586369937418e-06, "loss": 0.0163, "step": 198355 }, { "epoch": 0.8276656290943079, "grad_norm": 0.7282501700558354, "learning_rate": 2.1985586600751864e-06, "loss": 0.02, "step": 198360 }, { "epoch": 0.8276864918092981, "grad_norm": 0.4202054290290998, "learning_rate": 2.198530951260652e-06, "loss": 0.0183, "step": 198365 }, { "epoch": 0.8277073545242883, "grad_norm": 0.48575815357421975, "learning_rate": 2.198503243493747e-06, "loss": 0.0157, "step": 198370 }, { "epoch": 0.8277282172392787, "grad_norm": 0.9001321504454405, "learning_rate": 2.198475536774407e-06, "loss": 0.0263, "step": 198375 }, { "epoch": 0.8277490799542689, "grad_norm": 0.2806338104131483, "learning_rate": 2.1984478311025655e-06, "loss": 0.0164, "step": 198380 }, { "epoch": 0.8277699426692592, "grad_norm": 0.5349973394813122, "learning_rate": 2.198420126478157e-06, "loss": 0.027, "step": 198385 }, { "epoch": 0.8277908053842495, "grad_norm": 0.7167326001702491, "learning_rate": 2.198392422901114e-06, "loss": 0.0236, "step": 198390 }, { "epoch": 0.8278116680992398, "grad_norm": 1.044255352560811, "learning_rate": 2.198364720371372e-06, "loss": 0.0184, "step": 198395 }, { "epoch": 0.82783253081423, "grad_norm": 0.866237527923768, "learning_rate": 2.198337018888865e-06, "loss": 0.0277, "step": 198400 }, { "epoch": 0.8278533935292203, "grad_norm": 0.403806501002894, "learning_rate": 2.1983093184535263e-06, "loss": 0.0166, "step": 198405 }, { "epoch": 0.8278742562442106, "grad_norm": 0.8116739987681421, "learning_rate": 2.1982816190652894e-06, "loss": 0.0227, "step": 198410 }, { "epoch": 0.8278951189592009, "grad_norm": 0.8351263215540609, "learning_rate": 2.19825392072409e-06, "loss": 0.0298, "step": 198415 }, { "epoch": 0.8279159816741911, "grad_norm": 1.2139068942252598, "learning_rate": 2.1982262234298607e-06, "loss": 0.0237, "step": 198420 }, { "epoch": 0.8279368443891815, "grad_norm": 0.5000876468108885, "learning_rate": 2.1981985271825363e-06, "loss": 0.0174, "step": 198425 }, { "epoch": 0.8279577071041717, "grad_norm": 0.5694102170787099, "learning_rate": 2.1981708319820506e-06, "loss": 0.0181, "step": 198430 }, { "epoch": 0.827978569819162, "grad_norm": 0.4541889703127988, "learning_rate": 2.198143137828338e-06, "loss": 0.014, "step": 198435 }, { "epoch": 0.8279994325341523, "grad_norm": 0.5818106146509179, "learning_rate": 2.198115444721332e-06, "loss": 0.018, "step": 198440 }, { "epoch": 0.8280202952491426, "grad_norm": 0.8390483892148097, "learning_rate": 2.1980877526609663e-06, "loss": 0.0249, "step": 198445 }, { "epoch": 0.8280411579641328, "grad_norm": 0.6934662180571168, "learning_rate": 2.198060061647176e-06, "loss": 0.0153, "step": 198450 }, { "epoch": 0.828062020679123, "grad_norm": 0.44743412891009915, "learning_rate": 2.1980323716798953e-06, "loss": 0.0186, "step": 198455 }, { "epoch": 0.8280828833941134, "grad_norm": 0.5927009606045793, "learning_rate": 2.1980046827590578e-06, "loss": 0.0214, "step": 198460 }, { "epoch": 0.8281037461091036, "grad_norm": 0.4718760024743903, "learning_rate": 2.197976994884597e-06, "loss": 0.0242, "step": 198465 }, { "epoch": 0.8281246088240939, "grad_norm": 0.385108747154733, "learning_rate": 2.1979493080564474e-06, "loss": 0.0253, "step": 198470 }, { "epoch": 0.8281454715390842, "grad_norm": 1.0328261605178344, "learning_rate": 2.1979216222745433e-06, "loss": 0.025, "step": 198475 }, { "epoch": 0.8281663342540745, "grad_norm": 0.2440403217469566, "learning_rate": 2.1978939375388193e-06, "loss": 0.013, "step": 198480 }, { "epoch": 0.8281871969690647, "grad_norm": 0.5588134846896606, "learning_rate": 2.1978662538492086e-06, "loss": 0.0269, "step": 198485 }, { "epoch": 0.8282080596840551, "grad_norm": 0.4091315376117097, "learning_rate": 2.197838571205646e-06, "loss": 0.0203, "step": 198490 }, { "epoch": 0.8282289223990453, "grad_norm": 0.5577860892795974, "learning_rate": 2.1978108896080646e-06, "loss": 0.02, "step": 198495 }, { "epoch": 0.8282497851140356, "grad_norm": 0.22271338001032528, "learning_rate": 2.1977832090564005e-06, "loss": 0.0138, "step": 198500 }, { "epoch": 0.8282706478290259, "grad_norm": 0.6327928401852575, "learning_rate": 2.1977555295505853e-06, "loss": 0.0162, "step": 198505 }, { "epoch": 0.8282915105440162, "grad_norm": 0.48213052777660104, "learning_rate": 2.197727851090555e-06, "loss": 0.0164, "step": 198510 }, { "epoch": 0.8283123732590064, "grad_norm": 0.5798093172172699, "learning_rate": 2.197700173676243e-06, "loss": 0.014, "step": 198515 }, { "epoch": 0.8283332359739967, "grad_norm": 0.4116382214864133, "learning_rate": 2.1976724973075837e-06, "loss": 0.0272, "step": 198520 }, { "epoch": 0.828354098688987, "grad_norm": 0.31472794573946183, "learning_rate": 2.1976448219845113e-06, "loss": 0.0197, "step": 198525 }, { "epoch": 0.8283749614039773, "grad_norm": 0.7879136117132822, "learning_rate": 2.1976171477069595e-06, "loss": 0.0267, "step": 198530 }, { "epoch": 0.8283958241189675, "grad_norm": 0.6412811799354152, "learning_rate": 2.1975894744748637e-06, "loss": 0.0195, "step": 198535 }, { "epoch": 0.8284166868339579, "grad_norm": 0.8517570374467928, "learning_rate": 2.1975618022881564e-06, "loss": 0.0188, "step": 198540 }, { "epoch": 0.8284375495489481, "grad_norm": 0.3118974181689893, "learning_rate": 2.1975341311467726e-06, "loss": 0.0167, "step": 198545 }, { "epoch": 0.8284584122639383, "grad_norm": 0.8745418098117973, "learning_rate": 2.197506461050647e-06, "loss": 0.028, "step": 198550 }, { "epoch": 0.8284792749789287, "grad_norm": 0.21129131806080934, "learning_rate": 2.1974787919997127e-06, "loss": 0.0175, "step": 198555 }, { "epoch": 0.828500137693919, "grad_norm": 0.7557569609157391, "learning_rate": 2.197451123993905e-06, "loss": 0.026, "step": 198560 }, { "epoch": 0.8285210004089092, "grad_norm": 0.4590861888652501, "learning_rate": 2.197423457033157e-06, "loss": 0.0234, "step": 198565 }, { "epoch": 0.8285418631238995, "grad_norm": 0.6800985012494393, "learning_rate": 2.197395791117404e-06, "loss": 0.0201, "step": 198570 }, { "epoch": 0.8285627258388898, "grad_norm": 0.5186172956077418, "learning_rate": 2.1973681262465795e-06, "loss": 0.0235, "step": 198575 }, { "epoch": 0.82858358855388, "grad_norm": 0.7213924849088819, "learning_rate": 2.1973404624206177e-06, "loss": 0.0288, "step": 198580 }, { "epoch": 0.8286044512688703, "grad_norm": 0.4927989537925685, "learning_rate": 2.197312799639454e-06, "loss": 0.0171, "step": 198585 }, { "epoch": 0.8286253139838606, "grad_norm": 0.7294209547897855, "learning_rate": 2.197285137903021e-06, "loss": 0.0257, "step": 198590 }, { "epoch": 0.8286461766988509, "grad_norm": 1.0813114622448712, "learning_rate": 2.1972574772112533e-06, "loss": 0.0407, "step": 198595 }, { "epoch": 0.8286670394138411, "grad_norm": 0.625919787517818, "learning_rate": 2.1972298175640854e-06, "loss": 0.0191, "step": 198600 }, { "epoch": 0.8286879021288315, "grad_norm": 0.7415983677656693, "learning_rate": 2.1972021589614526e-06, "loss": 0.0293, "step": 198605 }, { "epoch": 0.8287087648438217, "grad_norm": 0.7028667600217288, "learning_rate": 2.1971745014032877e-06, "loss": 0.0259, "step": 198610 }, { "epoch": 0.828729627558812, "grad_norm": 0.6984621999548071, "learning_rate": 2.1971468448895254e-06, "loss": 0.0178, "step": 198615 }, { "epoch": 0.8287504902738023, "grad_norm": 1.2516567545451418, "learning_rate": 2.1971191894201005e-06, "loss": 0.0225, "step": 198620 }, { "epoch": 0.8287713529887926, "grad_norm": 0.6905772226229309, "learning_rate": 2.197091534994946e-06, "loss": 0.0225, "step": 198625 }, { "epoch": 0.8287922157037828, "grad_norm": 0.8112996800079941, "learning_rate": 2.1970638816139975e-06, "loss": 0.0206, "step": 198630 }, { "epoch": 0.828813078418773, "grad_norm": 1.9823045510328485, "learning_rate": 2.197036229277189e-06, "loss": 0.0303, "step": 198635 }, { "epoch": 0.8288339411337634, "grad_norm": 0.4817662963593181, "learning_rate": 2.1970085779844543e-06, "loss": 0.023, "step": 198640 }, { "epoch": 0.8288548038487537, "grad_norm": 0.40497166561176245, "learning_rate": 2.1969809277357276e-06, "loss": 0.0209, "step": 198645 }, { "epoch": 0.8288756665637439, "grad_norm": 0.9150929990764198, "learning_rate": 2.196953278530944e-06, "loss": 0.0202, "step": 198650 }, { "epoch": 0.8288965292787343, "grad_norm": 0.46782680148826394, "learning_rate": 2.1969256303700375e-06, "loss": 0.0194, "step": 198655 }, { "epoch": 0.8289173919937245, "grad_norm": 0.8791881997087139, "learning_rate": 2.1968979832529423e-06, "loss": 0.0221, "step": 198660 }, { "epoch": 0.8289382547087147, "grad_norm": 1.0162606171885569, "learning_rate": 2.1968703371795925e-06, "loss": 0.0218, "step": 198665 }, { "epoch": 0.8289591174237051, "grad_norm": 0.25415069474254953, "learning_rate": 2.196842692149923e-06, "loss": 0.022, "step": 198670 }, { "epoch": 0.8289799801386953, "grad_norm": 0.8384573339067909, "learning_rate": 2.1968150481638677e-06, "loss": 0.0199, "step": 198675 }, { "epoch": 0.8290008428536856, "grad_norm": 0.7584594097289726, "learning_rate": 2.1967874052213604e-06, "loss": 0.0269, "step": 198680 }, { "epoch": 0.8290217055686759, "grad_norm": 0.6230151287780082, "learning_rate": 2.1967597633223364e-06, "loss": 0.021, "step": 198685 }, { "epoch": 0.8290425682836662, "grad_norm": 0.2407087559694313, "learning_rate": 2.1967321224667303e-06, "loss": 0.0191, "step": 198690 }, { "epoch": 0.8290634309986564, "grad_norm": 1.4882114682209322, "learning_rate": 2.1967044826544757e-06, "loss": 0.031, "step": 198695 }, { "epoch": 0.8290842937136467, "grad_norm": 0.8209671308151041, "learning_rate": 2.196676843885507e-06, "loss": 0.018, "step": 198700 }, { "epoch": 0.829105156428637, "grad_norm": 0.887337246409581, "learning_rate": 2.1966492061597587e-06, "loss": 0.0187, "step": 198705 }, { "epoch": 0.8291260191436273, "grad_norm": 0.5857664724048961, "learning_rate": 2.1966215694771655e-06, "loss": 0.021, "step": 198710 }, { "epoch": 0.8291468818586175, "grad_norm": 0.5616433849063407, "learning_rate": 2.1965939338376607e-06, "loss": 0.0209, "step": 198715 }, { "epoch": 0.8291677445736079, "grad_norm": 0.6265476361084974, "learning_rate": 2.19656629924118e-06, "loss": 0.0186, "step": 198720 }, { "epoch": 0.8291886072885981, "grad_norm": 0.8666136074885091, "learning_rate": 2.1965386656876574e-06, "loss": 0.0179, "step": 198725 }, { "epoch": 0.8292094700035884, "grad_norm": 0.38151440756925953, "learning_rate": 2.1965110331770267e-06, "loss": 0.0261, "step": 198730 }, { "epoch": 0.8292303327185787, "grad_norm": 0.3309545770541434, "learning_rate": 2.196483401709223e-06, "loss": 0.0203, "step": 198735 }, { "epoch": 0.829251195433569, "grad_norm": 0.8558079415014972, "learning_rate": 2.1964557712841806e-06, "loss": 0.0156, "step": 198740 }, { "epoch": 0.8292720581485592, "grad_norm": 1.059251491826617, "learning_rate": 2.1964281419018336e-06, "loss": 0.0261, "step": 198745 }, { "epoch": 0.8292929208635496, "grad_norm": 3.0730009134017706, "learning_rate": 2.196400513562117e-06, "loss": 0.0181, "step": 198750 }, { "epoch": 0.8293137835785398, "grad_norm": 1.0372339647908826, "learning_rate": 2.1963728862649646e-06, "loss": 0.0193, "step": 198755 }, { "epoch": 0.82933464629353, "grad_norm": 0.48963739622930946, "learning_rate": 2.1963452600103104e-06, "loss": 0.02, "step": 198760 }, { "epoch": 0.8293555090085203, "grad_norm": 0.23084925938102527, "learning_rate": 2.1963176347980904e-06, "loss": 0.0181, "step": 198765 }, { "epoch": 0.8293763717235106, "grad_norm": 0.5735322628184397, "learning_rate": 2.1962900106282376e-06, "loss": 0.025, "step": 198770 }, { "epoch": 0.8293972344385009, "grad_norm": 0.6035910768615765, "learning_rate": 2.1962623875006876e-06, "loss": 0.0179, "step": 198775 }, { "epoch": 0.8294180971534911, "grad_norm": 0.9675583067187291, "learning_rate": 2.1962347654153736e-06, "loss": 0.0245, "step": 198780 }, { "epoch": 0.8294389598684815, "grad_norm": 0.876113870863006, "learning_rate": 2.1962071443722315e-06, "loss": 0.0208, "step": 198785 }, { "epoch": 0.8294598225834717, "grad_norm": 0.4613288391301945, "learning_rate": 2.196179524371194e-06, "loss": 0.0128, "step": 198790 }, { "epoch": 0.829480685298462, "grad_norm": 0.3918907431259819, "learning_rate": 2.1961519054121973e-06, "loss": 0.0132, "step": 198795 }, { "epoch": 0.8295015480134523, "grad_norm": 0.8669331165560272, "learning_rate": 2.1961242874951748e-06, "loss": 0.0237, "step": 198800 }, { "epoch": 0.8295224107284426, "grad_norm": 0.6019369375508353, "learning_rate": 2.196096670620061e-06, "loss": 0.0221, "step": 198805 }, { "epoch": 0.8295432734434328, "grad_norm": 0.34902296096657115, "learning_rate": 2.1960690547867918e-06, "loss": 0.0193, "step": 198810 }, { "epoch": 0.8295641361584231, "grad_norm": 0.701929450907454, "learning_rate": 2.196041439995299e-06, "loss": 0.024, "step": 198815 }, { "epoch": 0.8295849988734134, "grad_norm": 0.951970941196555, "learning_rate": 2.19601382624552e-06, "loss": 0.0207, "step": 198820 }, { "epoch": 0.8296058615884037, "grad_norm": 0.6913963817144421, "learning_rate": 2.195986213537388e-06, "loss": 0.0195, "step": 198825 }, { "epoch": 0.8296267243033939, "grad_norm": 0.8039580142486882, "learning_rate": 2.195958601870837e-06, "loss": 0.0228, "step": 198830 }, { "epoch": 0.8296475870183843, "grad_norm": 0.6748047636140816, "learning_rate": 2.1959309912458016e-06, "loss": 0.0181, "step": 198835 }, { "epoch": 0.8296684497333745, "grad_norm": 0.4786753223494132, "learning_rate": 2.1959033816622175e-06, "loss": 0.0131, "step": 198840 }, { "epoch": 0.8296893124483647, "grad_norm": 0.6291064743151215, "learning_rate": 2.1958757731200183e-06, "loss": 0.0182, "step": 198845 }, { "epoch": 0.8297101751633551, "grad_norm": 0.5287451951125198, "learning_rate": 2.195848165619139e-06, "loss": 0.0286, "step": 198850 }, { "epoch": 0.8297310378783453, "grad_norm": 0.733227352032533, "learning_rate": 2.1958205591595136e-06, "loss": 0.0159, "step": 198855 }, { "epoch": 0.8297519005933356, "grad_norm": 0.3226193186155894, "learning_rate": 2.195792953741077e-06, "loss": 0.0183, "step": 198860 }, { "epoch": 0.829772763308326, "grad_norm": 0.9220570766498049, "learning_rate": 2.1957653493637636e-06, "loss": 0.0246, "step": 198865 }, { "epoch": 0.8297936260233162, "grad_norm": 0.33455993183090577, "learning_rate": 2.195737746027508e-06, "loss": 0.0164, "step": 198870 }, { "epoch": 0.8298144887383064, "grad_norm": 0.7210804073017674, "learning_rate": 2.1957101437322452e-06, "loss": 0.022, "step": 198875 }, { "epoch": 0.8298353514532967, "grad_norm": 0.8075389785856651, "learning_rate": 2.1956825424779087e-06, "loss": 0.031, "step": 198880 }, { "epoch": 0.829856214168287, "grad_norm": 0.7898351635181254, "learning_rate": 2.195654942264434e-06, "loss": 0.0219, "step": 198885 }, { "epoch": 0.8298770768832773, "grad_norm": 0.559444534674903, "learning_rate": 2.1956273430917557e-06, "loss": 0.0219, "step": 198890 }, { "epoch": 0.8298979395982675, "grad_norm": 0.5141235655645255, "learning_rate": 2.1955997449598083e-06, "loss": 0.0248, "step": 198895 }, { "epoch": 0.8299188023132579, "grad_norm": 0.8148506737674578, "learning_rate": 2.1955721478685256e-06, "loss": 0.0257, "step": 198900 }, { "epoch": 0.8299396650282481, "grad_norm": 0.5839342754104965, "learning_rate": 2.1955445518178433e-06, "loss": 0.0162, "step": 198905 }, { "epoch": 0.8299605277432384, "grad_norm": 0.3390939016431538, "learning_rate": 2.195516956807695e-06, "loss": 0.0212, "step": 198910 }, { "epoch": 0.8299813904582287, "grad_norm": 0.5266759707108056, "learning_rate": 2.1954893628380164e-06, "loss": 0.0209, "step": 198915 }, { "epoch": 0.830002253173219, "grad_norm": 0.46774216655783435, "learning_rate": 2.1954617699087415e-06, "loss": 0.0153, "step": 198920 }, { "epoch": 0.8300231158882092, "grad_norm": 0.6538927199591198, "learning_rate": 2.1954341780198043e-06, "loss": 0.0223, "step": 198925 }, { "epoch": 0.8300439786031995, "grad_norm": 0.6661724335851746, "learning_rate": 2.195406587171141e-06, "loss": 0.0207, "step": 198930 }, { "epoch": 0.8300648413181898, "grad_norm": 0.5839408656746854, "learning_rate": 2.195378997362685e-06, "loss": 0.023, "step": 198935 }, { "epoch": 0.83008570403318, "grad_norm": 0.83295280980145, "learning_rate": 2.1953514085943707e-06, "loss": 0.0226, "step": 198940 }, { "epoch": 0.8301065667481703, "grad_norm": 0.6116255444853885, "learning_rate": 2.1953238208661344e-06, "loss": 0.0177, "step": 198945 }, { "epoch": 0.8301274294631606, "grad_norm": 0.43581060255684173, "learning_rate": 2.1952962341779088e-06, "loss": 0.0204, "step": 198950 }, { "epoch": 0.8301482921781509, "grad_norm": 0.3913740772325231, "learning_rate": 2.1952686485296295e-06, "loss": 0.0187, "step": 198955 }, { "epoch": 0.8301691548931411, "grad_norm": 1.5981824472071093, "learning_rate": 2.195241063921231e-06, "loss": 0.0186, "step": 198960 }, { "epoch": 0.8301900176081315, "grad_norm": 0.9893044394918148, "learning_rate": 2.195213480352649e-06, "loss": 0.0246, "step": 198965 }, { "epoch": 0.8302108803231217, "grad_norm": 0.4709097279336782, "learning_rate": 2.195185897823816e-06, "loss": 0.0196, "step": 198970 }, { "epoch": 0.830231743038112, "grad_norm": 0.6274707796819523, "learning_rate": 2.195158316334669e-06, "loss": 0.0203, "step": 198975 }, { "epoch": 0.8302526057531023, "grad_norm": 0.5926511704197052, "learning_rate": 2.195130735885141e-06, "loss": 0.0204, "step": 198980 }, { "epoch": 0.8302734684680926, "grad_norm": 0.5175823506035016, "learning_rate": 2.1951031564751672e-06, "loss": 0.0267, "step": 198985 }, { "epoch": 0.8302943311830828, "grad_norm": 1.3565693262302803, "learning_rate": 2.1950755781046828e-06, "loss": 0.0223, "step": 198990 }, { "epoch": 0.8303151938980731, "grad_norm": 2.2831135872894084, "learning_rate": 2.1950480007736217e-06, "loss": 0.0171, "step": 198995 }, { "epoch": 0.8303360566130634, "grad_norm": 0.5581372576441319, "learning_rate": 2.195020424481919e-06, "loss": 0.0252, "step": 199000 }, { "epoch": 0.8303569193280537, "grad_norm": 0.6536208653242204, "learning_rate": 2.1949928492295095e-06, "loss": 0.0172, "step": 199005 }, { "epoch": 0.8303777820430439, "grad_norm": 0.8627384016857611, "learning_rate": 2.194965275016328e-06, "loss": 0.0146, "step": 199010 }, { "epoch": 0.8303986447580343, "grad_norm": 0.840557260282755, "learning_rate": 2.194937701842309e-06, "loss": 0.0214, "step": 199015 }, { "epoch": 0.8304195074730245, "grad_norm": 1.3869091430828189, "learning_rate": 2.1949101297073874e-06, "loss": 0.0261, "step": 199020 }, { "epoch": 0.8304403701880148, "grad_norm": 1.0299193196635716, "learning_rate": 2.1948825586114976e-06, "loss": 0.0291, "step": 199025 }, { "epoch": 0.8304612329030051, "grad_norm": 0.8312435163474617, "learning_rate": 2.1948549885545746e-06, "loss": 0.0171, "step": 199030 }, { "epoch": 0.8304820956179954, "grad_norm": 0.648193464387364, "learning_rate": 2.1948274195365536e-06, "loss": 0.0161, "step": 199035 }, { "epoch": 0.8305029583329856, "grad_norm": 0.899538436844141, "learning_rate": 2.194799851557368e-06, "loss": 0.0217, "step": 199040 }, { "epoch": 0.830523821047976, "grad_norm": 0.38337348771588975, "learning_rate": 2.194772284616954e-06, "loss": 0.02, "step": 199045 }, { "epoch": 0.8305446837629662, "grad_norm": 0.2426509149709961, "learning_rate": 2.1947447187152454e-06, "loss": 0.0187, "step": 199050 }, { "epoch": 0.8305655464779564, "grad_norm": 0.5127877290373595, "learning_rate": 2.1947171538521776e-06, "loss": 0.0247, "step": 199055 }, { "epoch": 0.8305864091929467, "grad_norm": 0.31157861386005836, "learning_rate": 2.1946895900276853e-06, "loss": 0.0178, "step": 199060 }, { "epoch": 0.830607271907937, "grad_norm": 1.0882755800152886, "learning_rate": 2.1946620272417033e-06, "loss": 0.0241, "step": 199065 }, { "epoch": 0.8306281346229273, "grad_norm": 0.608391400367794, "learning_rate": 2.1946344654941663e-06, "loss": 0.0182, "step": 199070 }, { "epoch": 0.8306489973379175, "grad_norm": 0.5094727430828461, "learning_rate": 2.1946069047850083e-06, "loss": 0.0275, "step": 199075 }, { "epoch": 0.8306698600529079, "grad_norm": 0.34217959739686343, "learning_rate": 2.194579345114165e-06, "loss": 0.0159, "step": 199080 }, { "epoch": 0.8306907227678981, "grad_norm": 0.9173385805834943, "learning_rate": 2.1945517864815716e-06, "loss": 0.0207, "step": 199085 }, { "epoch": 0.8307115854828884, "grad_norm": 0.8841003253661021, "learning_rate": 2.1945242288871617e-06, "loss": 0.016, "step": 199090 }, { "epoch": 0.8307324481978787, "grad_norm": 0.37928294461028134, "learning_rate": 2.1944966723308715e-06, "loss": 0.0245, "step": 199095 }, { "epoch": 0.830753310912869, "grad_norm": 1.1262100357580331, "learning_rate": 2.1944691168126346e-06, "loss": 0.0191, "step": 199100 }, { "epoch": 0.8307741736278592, "grad_norm": 0.626353920430461, "learning_rate": 2.1944415623323862e-06, "loss": 0.013, "step": 199105 }, { "epoch": 0.8307950363428495, "grad_norm": 1.0565175771570634, "learning_rate": 2.1944140088900615e-06, "loss": 0.0211, "step": 199110 }, { "epoch": 0.8308158990578398, "grad_norm": 0.4695370164700717, "learning_rate": 2.1943864564855948e-06, "loss": 0.0154, "step": 199115 }, { "epoch": 0.8308367617728301, "grad_norm": 0.7953807220998558, "learning_rate": 2.1943589051189217e-06, "loss": 0.0149, "step": 199120 }, { "epoch": 0.8308576244878203, "grad_norm": 1.0666495953927335, "learning_rate": 2.194331354789976e-06, "loss": 0.0244, "step": 199125 }, { "epoch": 0.8308784872028107, "grad_norm": 0.3597768934954782, "learning_rate": 2.1943038054986932e-06, "loss": 0.0158, "step": 199130 }, { "epoch": 0.8308993499178009, "grad_norm": 0.6936810463118631, "learning_rate": 2.194276257245009e-06, "loss": 0.0229, "step": 199135 }, { "epoch": 0.8309202126327911, "grad_norm": 0.6547494808749916, "learning_rate": 2.1942487100288563e-06, "loss": 0.0206, "step": 199140 }, { "epoch": 0.8309410753477815, "grad_norm": 0.6032183118089827, "learning_rate": 2.1942211638501715e-06, "loss": 0.0232, "step": 199145 }, { "epoch": 0.8309619380627717, "grad_norm": 1.0971482365295506, "learning_rate": 2.194193618708889e-06, "loss": 0.031, "step": 199150 }, { "epoch": 0.830982800777762, "grad_norm": 1.7500810821339357, "learning_rate": 2.194166074604944e-06, "loss": 0.0288, "step": 199155 }, { "epoch": 0.8310036634927523, "grad_norm": 0.5508174848280416, "learning_rate": 2.1941385315382704e-06, "loss": 0.0176, "step": 199160 }, { "epoch": 0.8310245262077426, "grad_norm": 0.6240681343121377, "learning_rate": 2.1941109895088045e-06, "loss": 0.0199, "step": 199165 }, { "epoch": 0.8310453889227328, "grad_norm": 0.8164494307037601, "learning_rate": 2.1940834485164804e-06, "loss": 0.0197, "step": 199170 }, { "epoch": 0.8310662516377231, "grad_norm": 0.6980141690754841, "learning_rate": 2.1940559085612326e-06, "loss": 0.0233, "step": 199175 }, { "epoch": 0.8310871143527134, "grad_norm": 0.35678070580102894, "learning_rate": 2.1940283696429967e-06, "loss": 0.025, "step": 199180 }, { "epoch": 0.8311079770677037, "grad_norm": 0.5460773240404927, "learning_rate": 2.194000831761708e-06, "loss": 0.0229, "step": 199185 }, { "epoch": 0.8311288397826939, "grad_norm": 0.5012406704748747, "learning_rate": 2.1939732949173007e-06, "loss": 0.0209, "step": 199190 }, { "epoch": 0.8311497024976843, "grad_norm": 0.40259843194785294, "learning_rate": 2.1939457591097098e-06, "loss": 0.0179, "step": 199195 }, { "epoch": 0.8311705652126745, "grad_norm": 0.475333746825556, "learning_rate": 2.1939182243388703e-06, "loss": 0.0173, "step": 199200 }, { "epoch": 0.8311914279276648, "grad_norm": 0.29713118636970737, "learning_rate": 2.193890690604717e-06, "loss": 0.0133, "step": 199205 }, { "epoch": 0.8312122906426551, "grad_norm": 0.7988747757872737, "learning_rate": 2.1938631579071854e-06, "loss": 0.0225, "step": 199210 }, { "epoch": 0.8312331533576454, "grad_norm": 0.8399603348081037, "learning_rate": 2.19383562624621e-06, "loss": 0.0216, "step": 199215 }, { "epoch": 0.8312540160726356, "grad_norm": 0.6098504075076688, "learning_rate": 2.193808095621726e-06, "loss": 0.0229, "step": 199220 }, { "epoch": 0.831274878787626, "grad_norm": 0.7486440743278715, "learning_rate": 2.193780566033668e-06, "loss": 0.0292, "step": 199225 }, { "epoch": 0.8312957415026162, "grad_norm": 0.5926115050129797, "learning_rate": 2.1937530374819713e-06, "loss": 0.0216, "step": 199230 }, { "epoch": 0.8313166042176064, "grad_norm": 0.6265243656093548, "learning_rate": 2.1937255099665704e-06, "loss": 0.0213, "step": 199235 }, { "epoch": 0.8313374669325967, "grad_norm": 0.5104980550558559, "learning_rate": 2.193697983487401e-06, "loss": 0.0177, "step": 199240 }, { "epoch": 0.831358329647587, "grad_norm": 0.5833771860189293, "learning_rate": 2.193670458044398e-06, "loss": 0.0248, "step": 199245 }, { "epoch": 0.8313791923625773, "grad_norm": 0.375273374925003, "learning_rate": 2.1936429336374955e-06, "loss": 0.0216, "step": 199250 }, { "epoch": 0.8314000550775675, "grad_norm": 0.8661404331302341, "learning_rate": 2.1936154102666297e-06, "loss": 0.0268, "step": 199255 }, { "epoch": 0.8314209177925579, "grad_norm": 0.8623701276201149, "learning_rate": 2.1935878879317352e-06, "loss": 0.0174, "step": 199260 }, { "epoch": 0.8314417805075481, "grad_norm": 0.2661896951735935, "learning_rate": 2.1935603666327464e-06, "loss": 0.0177, "step": 199265 }, { "epoch": 0.8314626432225384, "grad_norm": 0.6862529207095021, "learning_rate": 2.193532846369599e-06, "loss": 0.0191, "step": 199270 }, { "epoch": 0.8314835059375287, "grad_norm": 0.867975288134714, "learning_rate": 2.1935053271422276e-06, "loss": 0.0204, "step": 199275 }, { "epoch": 0.831504368652519, "grad_norm": 0.8512507586912685, "learning_rate": 2.1934778089505677e-06, "loss": 0.0345, "step": 199280 }, { "epoch": 0.8315252313675092, "grad_norm": 0.5907182709442125, "learning_rate": 2.1934502917945542e-06, "loss": 0.0184, "step": 199285 }, { "epoch": 0.8315460940824995, "grad_norm": 0.6047490901449948, "learning_rate": 2.1934227756741215e-06, "loss": 0.0176, "step": 199290 }, { "epoch": 0.8315669567974898, "grad_norm": 0.28666139646316274, "learning_rate": 2.1933952605892056e-06, "loss": 0.0145, "step": 199295 }, { "epoch": 0.8315878195124801, "grad_norm": 0.647149882928246, "learning_rate": 2.193367746539741e-06, "loss": 0.0244, "step": 199300 }, { "epoch": 0.8316086822274703, "grad_norm": 0.3165537510363305, "learning_rate": 2.1933402335256627e-06, "loss": 0.0191, "step": 199305 }, { "epoch": 0.8316295449424607, "grad_norm": 0.4856572343161278, "learning_rate": 2.193312721546906e-06, "loss": 0.0295, "step": 199310 }, { "epoch": 0.8316504076574509, "grad_norm": 0.7413548766155735, "learning_rate": 2.1932852106034065e-06, "loss": 0.0185, "step": 199315 }, { "epoch": 0.8316712703724412, "grad_norm": 0.5145368387412553, "learning_rate": 2.193257700695098e-06, "loss": 0.0199, "step": 199320 }, { "epoch": 0.8316921330874315, "grad_norm": 0.7098807058852578, "learning_rate": 2.1932301918219163e-06, "loss": 0.0213, "step": 199325 }, { "epoch": 0.8317129958024218, "grad_norm": 0.4151253233988753, "learning_rate": 2.1932026839837967e-06, "loss": 0.0186, "step": 199330 }, { "epoch": 0.831733858517412, "grad_norm": 0.5846089088148855, "learning_rate": 2.193175177180674e-06, "loss": 0.021, "step": 199335 }, { "epoch": 0.8317547212324023, "grad_norm": 0.3055696521893027, "learning_rate": 2.1931476714124835e-06, "loss": 0.0201, "step": 199340 }, { "epoch": 0.8317755839473926, "grad_norm": 0.5452405900352993, "learning_rate": 2.19312016667916e-06, "loss": 0.0158, "step": 199345 }, { "epoch": 0.8317964466623828, "grad_norm": 0.9175861993841942, "learning_rate": 2.193092662980638e-06, "loss": 0.0241, "step": 199350 }, { "epoch": 0.8318173093773731, "grad_norm": 0.459423339180778, "learning_rate": 2.193065160316854e-06, "loss": 0.0272, "step": 199355 }, { "epoch": 0.8318381720923634, "grad_norm": 0.3169721627227633, "learning_rate": 2.1930376586877425e-06, "loss": 0.0193, "step": 199360 }, { "epoch": 0.8318590348073537, "grad_norm": 0.6828132469693421, "learning_rate": 2.1930101580932386e-06, "loss": 0.0232, "step": 199365 }, { "epoch": 0.8318798975223439, "grad_norm": 0.6528626879269093, "learning_rate": 2.192982658533277e-06, "loss": 0.0219, "step": 199370 }, { "epoch": 0.8319007602373343, "grad_norm": 0.4439209748801314, "learning_rate": 2.1929551600077944e-06, "loss": 0.0158, "step": 199375 }, { "epoch": 0.8319216229523245, "grad_norm": 0.447338184695377, "learning_rate": 2.1929276625167235e-06, "loss": 0.0181, "step": 199380 }, { "epoch": 0.8319424856673148, "grad_norm": 0.38382258899871413, "learning_rate": 2.1929001660600015e-06, "loss": 0.0235, "step": 199385 }, { "epoch": 0.8319633483823051, "grad_norm": 0.6209571643083924, "learning_rate": 2.1928726706375634e-06, "loss": 0.0161, "step": 199390 }, { "epoch": 0.8319842110972954, "grad_norm": 1.0020620414243147, "learning_rate": 2.1928451762493423e-06, "loss": 0.0305, "step": 199395 }, { "epoch": 0.8320050738122856, "grad_norm": 2.1768954062752695, "learning_rate": 2.192817682895276e-06, "loss": 0.0216, "step": 199400 }, { "epoch": 0.832025936527276, "grad_norm": 0.6576248792507358, "learning_rate": 2.192790190575298e-06, "loss": 0.0217, "step": 199405 }, { "epoch": 0.8320467992422662, "grad_norm": 0.6299513291542319, "learning_rate": 2.1927626992893442e-06, "loss": 0.02, "step": 199410 }, { "epoch": 0.8320676619572565, "grad_norm": 0.7222257267072708, "learning_rate": 2.19273520903735e-06, "loss": 0.0264, "step": 199415 }, { "epoch": 0.8320885246722467, "grad_norm": 0.48429757064393464, "learning_rate": 2.1927077198192493e-06, "loss": 0.0204, "step": 199420 }, { "epoch": 0.832109387387237, "grad_norm": 0.6606463068669659, "learning_rate": 2.192680231634979e-06, "loss": 0.0161, "step": 199425 }, { "epoch": 0.8321302501022273, "grad_norm": 0.6234110293200045, "learning_rate": 2.192652744484473e-06, "loss": 0.0216, "step": 199430 }, { "epoch": 0.8321511128172175, "grad_norm": 0.4505208414716849, "learning_rate": 2.1926252583676673e-06, "loss": 0.0136, "step": 199435 }, { "epoch": 0.8321719755322079, "grad_norm": 0.6179829383017497, "learning_rate": 2.192597773284496e-06, "loss": 0.0189, "step": 199440 }, { "epoch": 0.8321928382471981, "grad_norm": 0.7576201316348915, "learning_rate": 2.1925702892348963e-06, "loss": 0.0185, "step": 199445 }, { "epoch": 0.8322137009621884, "grad_norm": 1.0389479940575068, "learning_rate": 2.192542806218801e-06, "loss": 0.0248, "step": 199450 }, { "epoch": 0.8322345636771787, "grad_norm": 0.7310044161693958, "learning_rate": 2.1925153242361476e-06, "loss": 0.0276, "step": 199455 }, { "epoch": 0.832255426392169, "grad_norm": 0.536697139359839, "learning_rate": 2.1924878432868695e-06, "loss": 0.0227, "step": 199460 }, { "epoch": 0.8322762891071592, "grad_norm": 0.8787160472764174, "learning_rate": 2.1924603633709035e-06, "loss": 0.0238, "step": 199465 }, { "epoch": 0.8322971518221495, "grad_norm": 0.21357059685097768, "learning_rate": 2.1924328844881834e-06, "loss": 0.016, "step": 199470 }, { "epoch": 0.8323180145371398, "grad_norm": 0.5045042602569959, "learning_rate": 2.1924054066386454e-06, "loss": 0.0234, "step": 199475 }, { "epoch": 0.8323388772521301, "grad_norm": 0.7210629614359907, "learning_rate": 2.1923779298222245e-06, "loss": 0.0171, "step": 199480 }, { "epoch": 0.8323597399671203, "grad_norm": 0.478475187512315, "learning_rate": 2.1923504540388555e-06, "loss": 0.0199, "step": 199485 }, { "epoch": 0.8323806026821107, "grad_norm": 0.4791055807746855, "learning_rate": 2.192322979288475e-06, "loss": 0.0211, "step": 199490 }, { "epoch": 0.8324014653971009, "grad_norm": 0.5044926662557588, "learning_rate": 2.192295505571017e-06, "loss": 0.0151, "step": 199495 }, { "epoch": 0.8324223281120912, "grad_norm": 0.3420050402772713, "learning_rate": 2.1922680328864173e-06, "loss": 0.0204, "step": 199500 }, { "epoch": 0.8324431908270815, "grad_norm": 0.9388073389282421, "learning_rate": 2.1922405612346104e-06, "loss": 0.0239, "step": 199505 }, { "epoch": 0.8324640535420718, "grad_norm": 0.597136272486569, "learning_rate": 2.1922130906155327e-06, "loss": 0.0117, "step": 199510 }, { "epoch": 0.832484916257062, "grad_norm": 0.23766362800968316, "learning_rate": 2.192185621029119e-06, "loss": 0.0222, "step": 199515 }, { "epoch": 0.8325057789720524, "grad_norm": 1.139159805813766, "learning_rate": 2.1921581524753047e-06, "loss": 0.0321, "step": 199520 }, { "epoch": 0.8325266416870426, "grad_norm": 0.6734788261532221, "learning_rate": 2.192130684954025e-06, "loss": 0.0186, "step": 199525 }, { "epoch": 0.8325475044020328, "grad_norm": 4.155652847599117, "learning_rate": 2.1921032184652157e-06, "loss": 0.0287, "step": 199530 }, { "epoch": 0.8325683671170231, "grad_norm": 0.8746340069017738, "learning_rate": 2.1920757530088106e-06, "loss": 0.0177, "step": 199535 }, { "epoch": 0.8325892298320134, "grad_norm": 1.0327290733149272, "learning_rate": 2.192048288584747e-06, "loss": 0.0246, "step": 199540 }, { "epoch": 0.8326100925470037, "grad_norm": 0.4128265164888249, "learning_rate": 2.1920208251929596e-06, "loss": 0.0152, "step": 199545 }, { "epoch": 0.8326309552619939, "grad_norm": 0.6623357142581884, "learning_rate": 2.1919933628333824e-06, "loss": 0.0139, "step": 199550 }, { "epoch": 0.8326518179769843, "grad_norm": 1.9991608596499157, "learning_rate": 2.191965901505953e-06, "loss": 0.0243, "step": 199555 }, { "epoch": 0.8326726806919745, "grad_norm": 0.5276355533735563, "learning_rate": 2.191938441210605e-06, "loss": 0.0183, "step": 199560 }, { "epoch": 0.8326935434069648, "grad_norm": 0.9463210462073011, "learning_rate": 2.1919109819472743e-06, "loss": 0.0222, "step": 199565 }, { "epoch": 0.8327144061219551, "grad_norm": 0.4110205432290495, "learning_rate": 2.1918835237158963e-06, "loss": 0.0112, "step": 199570 }, { "epoch": 0.8327352688369454, "grad_norm": 0.4032975764419711, "learning_rate": 2.191856066516406e-06, "loss": 0.0199, "step": 199575 }, { "epoch": 0.8327561315519356, "grad_norm": 0.6675126833254397, "learning_rate": 2.1918286103487397e-06, "loss": 0.0209, "step": 199580 }, { "epoch": 0.832776994266926, "grad_norm": 0.3133705984876992, "learning_rate": 2.1918011552128323e-06, "loss": 0.0242, "step": 199585 }, { "epoch": 0.8327978569819162, "grad_norm": 0.4169847352188166, "learning_rate": 2.191773701108618e-06, "loss": 0.0169, "step": 199590 }, { "epoch": 0.8328187196969065, "grad_norm": 0.7593110641642279, "learning_rate": 2.1917462480360342e-06, "loss": 0.0235, "step": 199595 }, { "epoch": 0.8328395824118967, "grad_norm": 1.3289445123142554, "learning_rate": 2.1917187959950153e-06, "loss": 0.0285, "step": 199600 }, { "epoch": 0.8328604451268871, "grad_norm": 0.47933541285759046, "learning_rate": 2.1916913449854966e-06, "loss": 0.0196, "step": 199605 }, { "epoch": 0.8328813078418773, "grad_norm": 0.6494948524883727, "learning_rate": 2.1916638950074138e-06, "loss": 0.0309, "step": 199610 }, { "epoch": 0.8329021705568675, "grad_norm": 0.5109628317135638, "learning_rate": 2.191636446060702e-06, "loss": 0.0217, "step": 199615 }, { "epoch": 0.8329230332718579, "grad_norm": 0.6412585015482527, "learning_rate": 2.191608998145297e-06, "loss": 0.0294, "step": 199620 }, { "epoch": 0.8329438959868481, "grad_norm": 0.7231742866604782, "learning_rate": 2.1915815512611335e-06, "loss": 0.0172, "step": 199625 }, { "epoch": 0.8329647587018384, "grad_norm": 0.7020305504570122, "learning_rate": 2.191554105408148e-06, "loss": 0.0183, "step": 199630 }, { "epoch": 0.8329856214168287, "grad_norm": 0.457115388322198, "learning_rate": 2.1915266605862754e-06, "loss": 0.022, "step": 199635 }, { "epoch": 0.833006484131819, "grad_norm": 0.3426559407133416, "learning_rate": 2.191499216795451e-06, "loss": 0.019, "step": 199640 }, { "epoch": 0.8330273468468092, "grad_norm": 0.8551584611177502, "learning_rate": 2.19147177403561e-06, "loss": 0.0184, "step": 199645 }, { "epoch": 0.8330482095617995, "grad_norm": 0.716361778068653, "learning_rate": 2.1914443323066884e-06, "loss": 0.0249, "step": 199650 }, { "epoch": 0.8330690722767898, "grad_norm": 1.2378759994403807, "learning_rate": 2.191416891608622e-06, "loss": 0.0186, "step": 199655 }, { "epoch": 0.8330899349917801, "grad_norm": 0.600626919264974, "learning_rate": 2.1913894519413444e-06, "loss": 0.0207, "step": 199660 }, { "epoch": 0.8331107977067703, "grad_norm": 0.28950674348412453, "learning_rate": 2.1913620133047934e-06, "loss": 0.017, "step": 199665 }, { "epoch": 0.8331316604217607, "grad_norm": 0.8375430369652939, "learning_rate": 2.1913345756989032e-06, "loss": 0.0193, "step": 199670 }, { "epoch": 0.8331525231367509, "grad_norm": 1.2606933537753626, "learning_rate": 2.1913071391236095e-06, "loss": 0.017, "step": 199675 }, { "epoch": 0.8331733858517412, "grad_norm": 0.8670952033418654, "learning_rate": 2.191279703578848e-06, "loss": 0.0175, "step": 199680 }, { "epoch": 0.8331942485667315, "grad_norm": 0.4711416017155282, "learning_rate": 2.1912522690645537e-06, "loss": 0.0231, "step": 199685 }, { "epoch": 0.8332151112817218, "grad_norm": 0.7744921839265935, "learning_rate": 2.1912248355806625e-06, "loss": 0.0172, "step": 199690 }, { "epoch": 0.833235973996712, "grad_norm": 1.0925648200320635, "learning_rate": 2.19119740312711e-06, "loss": 0.0242, "step": 199695 }, { "epoch": 0.8332568367117024, "grad_norm": 1.1529401284337388, "learning_rate": 2.1911699717038313e-06, "loss": 0.0195, "step": 199700 }, { "epoch": 0.8332776994266926, "grad_norm": 1.56048241459271, "learning_rate": 2.1911425413107625e-06, "loss": 0.017, "step": 199705 }, { "epoch": 0.8332985621416829, "grad_norm": 0.9441934304143953, "learning_rate": 2.1911151119478387e-06, "loss": 0.021, "step": 199710 }, { "epoch": 0.8333194248566731, "grad_norm": 0.41131764521058195, "learning_rate": 2.191087683614995e-06, "loss": 0.0223, "step": 199715 }, { "epoch": 0.8333402875716635, "grad_norm": 0.879962547844939, "learning_rate": 2.1910602563121675e-06, "loss": 0.0199, "step": 199720 }, { "epoch": 0.8333611502866537, "grad_norm": 0.7132584014264145, "learning_rate": 2.1910328300392915e-06, "loss": 0.0346, "step": 199725 }, { "epoch": 0.8333820130016439, "grad_norm": 0.8604173727595078, "learning_rate": 2.1910054047963033e-06, "loss": 0.0239, "step": 199730 }, { "epoch": 0.8334028757166343, "grad_norm": 0.5308991711993091, "learning_rate": 2.1909779805831377e-06, "loss": 0.0172, "step": 199735 }, { "epoch": 0.8334237384316245, "grad_norm": 0.8063333785509211, "learning_rate": 2.1909505573997296e-06, "loss": 0.0191, "step": 199740 }, { "epoch": 0.8334446011466148, "grad_norm": 0.5133633402326684, "learning_rate": 2.190923135246016e-06, "loss": 0.024, "step": 199745 }, { "epoch": 0.8334654638616051, "grad_norm": 1.156144496975684, "learning_rate": 2.1908957141219316e-06, "loss": 0.0245, "step": 199750 }, { "epoch": 0.8334863265765954, "grad_norm": 0.29722652717439435, "learning_rate": 2.190868294027412e-06, "loss": 0.016, "step": 199755 }, { "epoch": 0.8335071892915856, "grad_norm": 0.8082197664591271, "learning_rate": 2.1908408749623934e-06, "loss": 0.0209, "step": 199760 }, { "epoch": 0.833528052006576, "grad_norm": 0.4488863837149343, "learning_rate": 2.1908134569268107e-06, "loss": 0.0219, "step": 199765 }, { "epoch": 0.8335489147215662, "grad_norm": 0.6680983565851643, "learning_rate": 2.190786039920599e-06, "loss": 0.022, "step": 199770 }, { "epoch": 0.8335697774365565, "grad_norm": 0.3705873847139704, "learning_rate": 2.1907586239436957e-06, "loss": 0.015, "step": 199775 }, { "epoch": 0.8335906401515467, "grad_norm": 0.7033421290473745, "learning_rate": 2.1907312089960343e-06, "loss": 0.0323, "step": 199780 }, { "epoch": 0.8336115028665371, "grad_norm": 0.7936496276617203, "learning_rate": 2.190703795077552e-06, "loss": 0.0194, "step": 199785 }, { "epoch": 0.8336323655815273, "grad_norm": 0.8838005740717204, "learning_rate": 2.1906763821881838e-06, "loss": 0.0221, "step": 199790 }, { "epoch": 0.8336532282965176, "grad_norm": 0.5292737406764839, "learning_rate": 2.190648970327865e-06, "loss": 0.0146, "step": 199795 }, { "epoch": 0.8336740910115079, "grad_norm": 0.8229648586194037, "learning_rate": 2.1906215594965318e-06, "loss": 0.0268, "step": 199800 }, { "epoch": 0.8336949537264982, "grad_norm": 1.0428840308078597, "learning_rate": 2.190594149694119e-06, "loss": 0.0184, "step": 199805 }, { "epoch": 0.8337158164414884, "grad_norm": 0.44920001229308976, "learning_rate": 2.1905667409205633e-06, "loss": 0.019, "step": 199810 }, { "epoch": 0.8337366791564788, "grad_norm": 0.6848614616054055, "learning_rate": 2.1905393331758e-06, "loss": 0.0203, "step": 199815 }, { "epoch": 0.833757541871469, "grad_norm": 0.5281744150304267, "learning_rate": 2.1905119264597643e-06, "loss": 0.0207, "step": 199820 }, { "epoch": 0.8337784045864592, "grad_norm": 0.6351042432422308, "learning_rate": 2.1904845207723918e-06, "loss": 0.0216, "step": 199825 }, { "epoch": 0.8337992673014495, "grad_norm": 1.0555592583045037, "learning_rate": 2.190457116113619e-06, "loss": 0.0217, "step": 199830 }, { "epoch": 0.8338201300164398, "grad_norm": 0.28288830516348934, "learning_rate": 2.1904297124833805e-06, "loss": 0.0165, "step": 199835 }, { "epoch": 0.8338409927314301, "grad_norm": 0.6393609454520007, "learning_rate": 2.190402309881613e-06, "loss": 0.02, "step": 199840 }, { "epoch": 0.8338618554464203, "grad_norm": 0.5262988609576306, "learning_rate": 2.190374908308251e-06, "loss": 0.0181, "step": 199845 }, { "epoch": 0.8338827181614107, "grad_norm": 0.6870296228897719, "learning_rate": 2.190347507763231e-06, "loss": 0.0223, "step": 199850 }, { "epoch": 0.8339035808764009, "grad_norm": 0.6666502721953916, "learning_rate": 2.190320108246489e-06, "loss": 0.0169, "step": 199855 }, { "epoch": 0.8339244435913912, "grad_norm": 0.8875505573054508, "learning_rate": 2.19029270975796e-06, "loss": 0.0224, "step": 199860 }, { "epoch": 0.8339453063063815, "grad_norm": 0.8985708638947489, "learning_rate": 2.1902653122975796e-06, "loss": 0.0226, "step": 199865 }, { "epoch": 0.8339661690213718, "grad_norm": 0.40589259187665233, "learning_rate": 2.1902379158652838e-06, "loss": 0.0181, "step": 199870 }, { "epoch": 0.833987031736362, "grad_norm": 0.5331819006865505, "learning_rate": 2.1902105204610085e-06, "loss": 0.014, "step": 199875 }, { "epoch": 0.8340078944513524, "grad_norm": 0.8783917454027391, "learning_rate": 2.190183126084689e-06, "loss": 0.0289, "step": 199880 }, { "epoch": 0.8340287571663426, "grad_norm": 0.6294886064353109, "learning_rate": 2.190155732736261e-06, "loss": 0.0173, "step": 199885 }, { "epoch": 0.8340496198813329, "grad_norm": 0.6493640104102303, "learning_rate": 2.1901283404156614e-06, "loss": 0.0254, "step": 199890 }, { "epoch": 0.8340704825963231, "grad_norm": 0.37056577712921285, "learning_rate": 2.1901009491228244e-06, "loss": 0.0171, "step": 199895 }, { "epoch": 0.8340913453113135, "grad_norm": 0.5889487249061928, "learning_rate": 2.190073558857686e-06, "loss": 0.0171, "step": 199900 }, { "epoch": 0.8341122080263037, "grad_norm": 0.5015410677228526, "learning_rate": 2.1900461696201826e-06, "loss": 0.0243, "step": 199905 }, { "epoch": 0.834133070741294, "grad_norm": 0.6270863753350785, "learning_rate": 2.1900187814102494e-06, "loss": 0.0202, "step": 199910 }, { "epoch": 0.8341539334562843, "grad_norm": 0.19842485585093034, "learning_rate": 2.1899913942278223e-06, "loss": 0.0176, "step": 199915 }, { "epoch": 0.8341747961712745, "grad_norm": 0.6119252741808122, "learning_rate": 2.1899640080728373e-06, "loss": 0.0267, "step": 199920 }, { "epoch": 0.8341956588862648, "grad_norm": 0.3261479410692021, "learning_rate": 2.18993662294523e-06, "loss": 0.0162, "step": 199925 }, { "epoch": 0.8342165216012551, "grad_norm": 0.79242761915227, "learning_rate": 2.189909238844935e-06, "loss": 0.0243, "step": 199930 }, { "epoch": 0.8342373843162454, "grad_norm": 0.6086321394453217, "learning_rate": 2.1898818557718905e-06, "loss": 0.018, "step": 199935 }, { "epoch": 0.8342582470312356, "grad_norm": 0.9567987291930916, "learning_rate": 2.1898544737260304e-06, "loss": 0.0229, "step": 199940 }, { "epoch": 0.834279109746226, "grad_norm": 1.0683114394266215, "learning_rate": 2.1898270927072914e-06, "loss": 0.0207, "step": 199945 }, { "epoch": 0.8342999724612162, "grad_norm": 0.43811068989904095, "learning_rate": 2.1897997127156086e-06, "loss": 0.0165, "step": 199950 }, { "epoch": 0.8343208351762065, "grad_norm": 0.6757382764509315, "learning_rate": 2.189772333750918e-06, "loss": 0.0144, "step": 199955 }, { "epoch": 0.8343416978911967, "grad_norm": 0.6511968287433858, "learning_rate": 2.189744955813156e-06, "loss": 0.0198, "step": 199960 }, { "epoch": 0.8343625606061871, "grad_norm": 0.9586096073338576, "learning_rate": 2.189717578902258e-06, "loss": 0.0193, "step": 199965 }, { "epoch": 0.8343834233211773, "grad_norm": 0.29213180019793095, "learning_rate": 2.189690203018159e-06, "loss": 0.0195, "step": 199970 }, { "epoch": 0.8344042860361676, "grad_norm": 0.5201833881592371, "learning_rate": 2.189662828160796e-06, "loss": 0.0212, "step": 199975 }, { "epoch": 0.8344251487511579, "grad_norm": 0.5243098196700333, "learning_rate": 2.1896354543301042e-06, "loss": 0.0151, "step": 199980 }, { "epoch": 0.8344460114661482, "grad_norm": 0.9717722383159033, "learning_rate": 2.1896080815260195e-06, "loss": 0.0218, "step": 199985 }, { "epoch": 0.8344668741811384, "grad_norm": 1.023322323992903, "learning_rate": 2.1895807097484777e-06, "loss": 0.0199, "step": 199990 }, { "epoch": 0.8344877368961288, "grad_norm": 0.9196352939441526, "learning_rate": 2.1895533389974153e-06, "loss": 0.0233, "step": 199995 }, { "epoch": 0.834508599611119, "grad_norm": 0.37857382238354087, "learning_rate": 2.1895259692727675e-06, "loss": 0.0236, "step": 200000 }, { "epoch": 0.8345294623261093, "grad_norm": 1.047051081839942, "learning_rate": 2.18949860057447e-06, "loss": 0.0269, "step": 200005 }, { "epoch": 0.8345503250410995, "grad_norm": 0.941583169871157, "learning_rate": 2.1894712329024588e-06, "loss": 0.0171, "step": 200010 }, { "epoch": 0.8345711877560898, "grad_norm": 0.3594625233345831, "learning_rate": 2.1894438662566703e-06, "loss": 0.0211, "step": 200015 }, { "epoch": 0.8345920504710801, "grad_norm": 0.2939853157749585, "learning_rate": 2.18941650063704e-06, "loss": 0.0204, "step": 200020 }, { "epoch": 0.8346129131860703, "grad_norm": 1.210328498198313, "learning_rate": 2.1893891360435033e-06, "loss": 0.0386, "step": 200025 }, { "epoch": 0.8346337759010607, "grad_norm": 0.6606324327203081, "learning_rate": 2.1893617724759965e-06, "loss": 0.0203, "step": 200030 }, { "epoch": 0.8346546386160509, "grad_norm": 0.5137760732613756, "learning_rate": 2.1893344099344558e-06, "loss": 0.021, "step": 200035 }, { "epoch": 0.8346755013310412, "grad_norm": 0.6064074214590566, "learning_rate": 2.189307048418817e-06, "loss": 0.0149, "step": 200040 }, { "epoch": 0.8346963640460315, "grad_norm": 0.4561272194383953, "learning_rate": 2.1892796879290148e-06, "loss": 0.0159, "step": 200045 }, { "epoch": 0.8347172267610218, "grad_norm": 0.5572240719223911, "learning_rate": 2.189252328464987e-06, "loss": 0.0222, "step": 200050 }, { "epoch": 0.834738089476012, "grad_norm": 0.934841625748561, "learning_rate": 2.189224970026668e-06, "loss": 0.0175, "step": 200055 }, { "epoch": 0.8347589521910024, "grad_norm": 0.8581633557476653, "learning_rate": 2.189197612613995e-06, "loss": 0.0271, "step": 200060 }, { "epoch": 0.8347798149059926, "grad_norm": 0.7540631360791665, "learning_rate": 2.1891702562269026e-06, "loss": 0.0173, "step": 200065 }, { "epoch": 0.8348006776209829, "grad_norm": 0.6578798288892365, "learning_rate": 2.1891429008653273e-06, "loss": 0.0199, "step": 200070 }, { "epoch": 0.8348215403359731, "grad_norm": 0.6666694723113563, "learning_rate": 2.189115546529205e-06, "loss": 0.0155, "step": 200075 }, { "epoch": 0.8348424030509635, "grad_norm": 0.5395570912513064, "learning_rate": 2.189088193218472e-06, "loss": 0.0172, "step": 200080 }, { "epoch": 0.8348632657659537, "grad_norm": 0.20827667227933475, "learning_rate": 2.1890608409330637e-06, "loss": 0.0216, "step": 200085 }, { "epoch": 0.834884128480944, "grad_norm": 0.6156131599221878, "learning_rate": 2.1890334896729164e-06, "loss": 0.0227, "step": 200090 }, { "epoch": 0.8349049911959343, "grad_norm": 0.7441399817404025, "learning_rate": 2.189006139437966e-06, "loss": 0.0295, "step": 200095 }, { "epoch": 0.8349258539109246, "grad_norm": 0.8351321498653026, "learning_rate": 2.188978790228148e-06, "loss": 0.0281, "step": 200100 }, { "epoch": 0.8349467166259148, "grad_norm": 0.4002456481257543, "learning_rate": 2.188951442043399e-06, "loss": 0.0201, "step": 200105 }, { "epoch": 0.8349675793409052, "grad_norm": 0.7105659934516785, "learning_rate": 2.1889240948836547e-06, "loss": 0.0246, "step": 200110 }, { "epoch": 0.8349884420558954, "grad_norm": 0.6408287036764909, "learning_rate": 2.1888967487488512e-06, "loss": 0.0298, "step": 200115 }, { "epoch": 0.8350093047708856, "grad_norm": 0.3109273898545693, "learning_rate": 2.1888694036389246e-06, "loss": 0.0173, "step": 200120 }, { "epoch": 0.835030167485876, "grad_norm": 0.6437436156751077, "learning_rate": 2.18884205955381e-06, "loss": 0.0223, "step": 200125 }, { "epoch": 0.8350510302008662, "grad_norm": 0.42531067264831657, "learning_rate": 2.1888147164934443e-06, "loss": 0.0165, "step": 200130 }, { "epoch": 0.8350718929158565, "grad_norm": 0.5598218097407147, "learning_rate": 2.188787374457763e-06, "loss": 0.0189, "step": 200135 }, { "epoch": 0.8350927556308467, "grad_norm": 0.32312254333657203, "learning_rate": 2.1887600334467026e-06, "loss": 0.0146, "step": 200140 }, { "epoch": 0.8351136183458371, "grad_norm": 0.7423025751511507, "learning_rate": 2.188732693460199e-06, "loss": 0.0261, "step": 200145 }, { "epoch": 0.8351344810608273, "grad_norm": 0.7704819515675936, "learning_rate": 2.188705354498188e-06, "loss": 0.0236, "step": 200150 }, { "epoch": 0.8351553437758176, "grad_norm": 0.6555970379307354, "learning_rate": 2.188678016560605e-06, "loss": 0.0202, "step": 200155 }, { "epoch": 0.8351762064908079, "grad_norm": 0.7274214267082802, "learning_rate": 2.1886506796473875e-06, "loss": 0.0299, "step": 200160 }, { "epoch": 0.8351970692057982, "grad_norm": 0.7023349927781384, "learning_rate": 2.1886233437584708e-06, "loss": 0.0155, "step": 200165 }, { "epoch": 0.8352179319207884, "grad_norm": 0.20552737561933185, "learning_rate": 2.18859600889379e-06, "loss": 0.0163, "step": 200170 }, { "epoch": 0.8352387946357788, "grad_norm": 0.7002244577000849, "learning_rate": 2.1885686750532827e-06, "loss": 0.0186, "step": 200175 }, { "epoch": 0.835259657350769, "grad_norm": 0.8050079063670681, "learning_rate": 2.188541342236884e-06, "loss": 0.0215, "step": 200180 }, { "epoch": 0.8352805200657593, "grad_norm": 0.519259384847294, "learning_rate": 2.1885140104445306e-06, "loss": 0.0208, "step": 200185 }, { "epoch": 0.8353013827807495, "grad_norm": 1.382119387191361, "learning_rate": 2.1884866796761576e-06, "loss": 0.0202, "step": 200190 }, { "epoch": 0.8353222454957399, "grad_norm": 0.4275372734791627, "learning_rate": 2.188459349931702e-06, "loss": 0.0198, "step": 200195 }, { "epoch": 0.8353431082107301, "grad_norm": 1.1170180400911467, "learning_rate": 2.188432021211099e-06, "loss": 0.0233, "step": 200200 }, { "epoch": 0.8353639709257203, "grad_norm": 0.5141932313904569, "learning_rate": 2.1884046935142854e-06, "loss": 0.0225, "step": 200205 }, { "epoch": 0.8353848336407107, "grad_norm": 0.36073648838441175, "learning_rate": 2.188377366841197e-06, "loss": 0.0161, "step": 200210 }, { "epoch": 0.8354056963557009, "grad_norm": 0.5742776466618232, "learning_rate": 2.1883500411917704e-06, "loss": 0.0145, "step": 200215 }, { "epoch": 0.8354265590706912, "grad_norm": 1.0271233933934794, "learning_rate": 2.1883227165659405e-06, "loss": 0.02, "step": 200220 }, { "epoch": 0.8354474217856815, "grad_norm": 1.0034765087371151, "learning_rate": 2.1882953929636445e-06, "loss": 0.0236, "step": 200225 }, { "epoch": 0.8354682845006718, "grad_norm": 0.8295521510204206, "learning_rate": 2.188268070384818e-06, "loss": 0.0231, "step": 200230 }, { "epoch": 0.835489147215662, "grad_norm": 0.962903273309052, "learning_rate": 2.1882407488293974e-06, "loss": 0.0285, "step": 200235 }, { "epoch": 0.8355100099306524, "grad_norm": 0.9303407032248062, "learning_rate": 2.1882134282973184e-06, "loss": 0.0245, "step": 200240 }, { "epoch": 0.8355308726456426, "grad_norm": 0.911421654237027, "learning_rate": 2.1881861087885174e-06, "loss": 0.0268, "step": 200245 }, { "epoch": 0.8355517353606329, "grad_norm": 0.42139681860055095, "learning_rate": 2.1881587903029306e-06, "loss": 0.0174, "step": 200250 }, { "epoch": 0.8355725980756231, "grad_norm": 0.6107390635578778, "learning_rate": 2.1881314728404935e-06, "loss": 0.0214, "step": 200255 }, { "epoch": 0.8355934607906135, "grad_norm": 0.8993285363582674, "learning_rate": 2.188104156401143e-06, "loss": 0.0172, "step": 200260 }, { "epoch": 0.8356143235056037, "grad_norm": 0.39529051929891634, "learning_rate": 2.1880768409848157e-06, "loss": 0.0165, "step": 200265 }, { "epoch": 0.835635186220594, "grad_norm": 0.517932965388261, "learning_rate": 2.188049526591446e-06, "loss": 0.0131, "step": 200270 }, { "epoch": 0.8356560489355843, "grad_norm": 1.5236728222779694, "learning_rate": 2.1880222132209715e-06, "loss": 0.0282, "step": 200275 }, { "epoch": 0.8356769116505746, "grad_norm": 0.7551896741010905, "learning_rate": 2.187994900873328e-06, "loss": 0.0201, "step": 200280 }, { "epoch": 0.8356977743655648, "grad_norm": 0.893769906100836, "learning_rate": 2.187967589548452e-06, "loss": 0.0224, "step": 200285 }, { "epoch": 0.8357186370805552, "grad_norm": 1.0462026111899607, "learning_rate": 2.1879402792462786e-06, "loss": 0.0266, "step": 200290 }, { "epoch": 0.8357394997955454, "grad_norm": 0.42194542128840834, "learning_rate": 2.1879129699667447e-06, "loss": 0.0252, "step": 200295 }, { "epoch": 0.8357603625105356, "grad_norm": 0.5672816023704205, "learning_rate": 2.1878856617097867e-06, "loss": 0.0207, "step": 200300 }, { "epoch": 0.835781225225526, "grad_norm": 0.9433832017347754, "learning_rate": 2.1878583544753407e-06, "loss": 0.0217, "step": 200305 }, { "epoch": 0.8358020879405162, "grad_norm": 0.6944549468741116, "learning_rate": 2.187831048263342e-06, "loss": 0.0158, "step": 200310 }, { "epoch": 0.8358229506555065, "grad_norm": 0.78174776355024, "learning_rate": 2.1878037430737285e-06, "loss": 0.0211, "step": 200315 }, { "epoch": 0.8358438133704967, "grad_norm": 0.4768357802771041, "learning_rate": 2.1877764389064344e-06, "loss": 0.0172, "step": 200320 }, { "epoch": 0.8358646760854871, "grad_norm": 0.6905455797399176, "learning_rate": 2.1877491357613974e-06, "loss": 0.0171, "step": 200325 }, { "epoch": 0.8358855388004773, "grad_norm": 0.7884153334388275, "learning_rate": 2.187721833638553e-06, "loss": 0.0204, "step": 200330 }, { "epoch": 0.8359064015154676, "grad_norm": 0.5998604141885207, "learning_rate": 2.1876945325378374e-06, "loss": 0.0186, "step": 200335 }, { "epoch": 0.8359272642304579, "grad_norm": 0.4961138134570378, "learning_rate": 2.1876672324591875e-06, "loss": 0.0184, "step": 200340 }, { "epoch": 0.8359481269454482, "grad_norm": 0.2332429964614052, "learning_rate": 2.1876399334025392e-06, "loss": 0.0183, "step": 200345 }, { "epoch": 0.8359689896604384, "grad_norm": 1.020418463642974, "learning_rate": 2.1876126353678285e-06, "loss": 0.0212, "step": 200350 }, { "epoch": 0.8359898523754288, "grad_norm": 0.8026319170356011, "learning_rate": 2.187585338354992e-06, "loss": 0.0216, "step": 200355 }, { "epoch": 0.836010715090419, "grad_norm": 0.6015336414060569, "learning_rate": 2.1875580423639655e-06, "loss": 0.0294, "step": 200360 }, { "epoch": 0.8360315778054093, "grad_norm": 0.559269328950372, "learning_rate": 2.1875307473946857e-06, "loss": 0.0191, "step": 200365 }, { "epoch": 0.8360524405203995, "grad_norm": 0.33763028214883517, "learning_rate": 2.187503453447088e-06, "loss": 0.0178, "step": 200370 }, { "epoch": 0.8360733032353899, "grad_norm": 0.553791638294687, "learning_rate": 2.1874761605211092e-06, "loss": 0.0272, "step": 200375 }, { "epoch": 0.8360941659503801, "grad_norm": 0.5242535175878988, "learning_rate": 2.1874488686166864e-06, "loss": 0.0236, "step": 200380 }, { "epoch": 0.8361150286653704, "grad_norm": 0.6143741744430694, "learning_rate": 2.187421577733755e-06, "loss": 0.0159, "step": 200385 }, { "epoch": 0.8361358913803607, "grad_norm": 0.565029180243208, "learning_rate": 2.1873942878722514e-06, "loss": 0.024, "step": 200390 }, { "epoch": 0.836156754095351, "grad_norm": 0.5392071802603561, "learning_rate": 2.1873669990321113e-06, "loss": 0.0178, "step": 200395 }, { "epoch": 0.8361776168103412, "grad_norm": 0.5445906960282282, "learning_rate": 2.1873397112132723e-06, "loss": 0.0169, "step": 200400 }, { "epoch": 0.8361984795253316, "grad_norm": 0.45665516046334703, "learning_rate": 2.18731242441567e-06, "loss": 0.0163, "step": 200405 }, { "epoch": 0.8362193422403218, "grad_norm": 0.6837276591365007, "learning_rate": 2.1872851386392404e-06, "loss": 0.0165, "step": 200410 }, { "epoch": 0.836240204955312, "grad_norm": 0.7615259528285651, "learning_rate": 2.1872578538839197e-06, "loss": 0.022, "step": 200415 }, { "epoch": 0.8362610676703024, "grad_norm": 1.2105604050538028, "learning_rate": 2.187230570149645e-06, "loss": 0.0219, "step": 200420 }, { "epoch": 0.8362819303852926, "grad_norm": 0.4321497546633858, "learning_rate": 2.187203287436352e-06, "loss": 0.0193, "step": 200425 }, { "epoch": 0.8363027931002829, "grad_norm": 0.6599363002592452, "learning_rate": 2.187176005743978e-06, "loss": 0.0241, "step": 200430 }, { "epoch": 0.8363236558152731, "grad_norm": 2.0202172331653196, "learning_rate": 2.1871487250724584e-06, "loss": 0.026, "step": 200435 }, { "epoch": 0.8363445185302635, "grad_norm": 0.7197357045417094, "learning_rate": 2.187121445421729e-06, "loss": 0.0267, "step": 200440 }, { "epoch": 0.8363653812452537, "grad_norm": 0.9038948537222168, "learning_rate": 2.1870941667917276e-06, "loss": 0.0264, "step": 200445 }, { "epoch": 0.836386243960244, "grad_norm": 0.2351694954204019, "learning_rate": 2.1870668891823898e-06, "loss": 0.0116, "step": 200450 }, { "epoch": 0.8364071066752343, "grad_norm": 0.5826377432551799, "learning_rate": 2.1870396125936516e-06, "loss": 0.0248, "step": 200455 }, { "epoch": 0.8364279693902246, "grad_norm": 0.5559064866951303, "learning_rate": 2.1870123370254497e-06, "loss": 0.0185, "step": 200460 }, { "epoch": 0.8364488321052148, "grad_norm": 0.7795784968270163, "learning_rate": 2.1869850624777204e-06, "loss": 0.0202, "step": 200465 }, { "epoch": 0.8364696948202052, "grad_norm": 1.491483505962302, "learning_rate": 2.1869577889504008e-06, "loss": 0.0242, "step": 200470 }, { "epoch": 0.8364905575351954, "grad_norm": 0.44815308899921835, "learning_rate": 2.186930516443426e-06, "loss": 0.0208, "step": 200475 }, { "epoch": 0.8365114202501857, "grad_norm": 0.8059136874483025, "learning_rate": 2.186903244956733e-06, "loss": 0.0178, "step": 200480 }, { "epoch": 0.836532282965176, "grad_norm": 0.4746915513267414, "learning_rate": 2.186875974490259e-06, "loss": 0.0219, "step": 200485 }, { "epoch": 0.8365531456801663, "grad_norm": 0.6375048181585758, "learning_rate": 2.186848705043939e-06, "loss": 0.0217, "step": 200490 }, { "epoch": 0.8365740083951565, "grad_norm": 0.3542184331617924, "learning_rate": 2.18682143661771e-06, "loss": 0.021, "step": 200495 }, { "epoch": 0.8365948711101467, "grad_norm": 0.6356736064157675, "learning_rate": 2.186794169211508e-06, "loss": 0.0175, "step": 200500 }, { "epoch": 0.8366157338251371, "grad_norm": 1.199118902281406, "learning_rate": 2.1867669028252705e-06, "loss": 0.0229, "step": 200505 }, { "epoch": 0.8366365965401273, "grad_norm": 0.5056701488923042, "learning_rate": 2.1867396374589325e-06, "loss": 0.0166, "step": 200510 }, { "epoch": 0.8366574592551176, "grad_norm": 0.6029036740767898, "learning_rate": 2.186712373112432e-06, "loss": 0.023, "step": 200515 }, { "epoch": 0.8366783219701079, "grad_norm": 0.9934022070292928, "learning_rate": 2.1866851097857038e-06, "loss": 0.0236, "step": 200520 }, { "epoch": 0.8366991846850982, "grad_norm": 0.22095149923084292, "learning_rate": 2.1866578474786853e-06, "loss": 0.0189, "step": 200525 }, { "epoch": 0.8367200474000884, "grad_norm": 0.7201749270756418, "learning_rate": 2.1866305861913127e-06, "loss": 0.0217, "step": 200530 }, { "epoch": 0.8367409101150788, "grad_norm": 0.5941398669583937, "learning_rate": 2.1866033259235226e-06, "loss": 0.0183, "step": 200535 }, { "epoch": 0.836761772830069, "grad_norm": 0.6535806160087095, "learning_rate": 2.1865760666752514e-06, "loss": 0.0232, "step": 200540 }, { "epoch": 0.8367826355450593, "grad_norm": 0.5064628061801834, "learning_rate": 2.1865488084464352e-06, "loss": 0.0244, "step": 200545 }, { "epoch": 0.8368034982600495, "grad_norm": 0.6925708443183792, "learning_rate": 2.186521551237011e-06, "loss": 0.0212, "step": 200550 }, { "epoch": 0.8368243609750399, "grad_norm": 0.47907635088973627, "learning_rate": 2.186494295046915e-06, "loss": 0.0168, "step": 200555 }, { "epoch": 0.8368452236900301, "grad_norm": 0.6386951817412039, "learning_rate": 2.1864670398760833e-06, "loss": 0.0193, "step": 200560 }, { "epoch": 0.8368660864050204, "grad_norm": 0.7710979364582246, "learning_rate": 2.186439785724453e-06, "loss": 0.0235, "step": 200565 }, { "epoch": 0.8368869491200107, "grad_norm": 0.7274866308126877, "learning_rate": 2.18641253259196e-06, "loss": 0.022, "step": 200570 }, { "epoch": 0.836907811835001, "grad_norm": 0.9155713799578329, "learning_rate": 2.1863852804785418e-06, "loss": 0.0221, "step": 200575 }, { "epoch": 0.8369286745499912, "grad_norm": 0.565416459949404, "learning_rate": 2.1863580293841337e-06, "loss": 0.0225, "step": 200580 }, { "epoch": 0.8369495372649816, "grad_norm": 0.6865691845466201, "learning_rate": 2.1863307793086724e-06, "loss": 0.0207, "step": 200585 }, { "epoch": 0.8369703999799718, "grad_norm": 0.6616877571541531, "learning_rate": 2.1863035302520957e-06, "loss": 0.0165, "step": 200590 }, { "epoch": 0.836991262694962, "grad_norm": 0.7019626221967368, "learning_rate": 2.1862762822143377e-06, "loss": 0.0354, "step": 200595 }, { "epoch": 0.8370121254099524, "grad_norm": 0.4350552255453382, "learning_rate": 2.1862490351953373e-06, "loss": 0.0176, "step": 200600 }, { "epoch": 0.8370329881249426, "grad_norm": 0.4190778981877311, "learning_rate": 2.18622178919503e-06, "loss": 0.0197, "step": 200605 }, { "epoch": 0.8370538508399329, "grad_norm": 1.2323571517436, "learning_rate": 2.1861945442133515e-06, "loss": 0.0237, "step": 200610 }, { "epoch": 0.8370747135549231, "grad_norm": 1.0047225632801597, "learning_rate": 2.18616730025024e-06, "loss": 0.0207, "step": 200615 }, { "epoch": 0.8370955762699135, "grad_norm": 0.620124110748136, "learning_rate": 2.186140057305631e-06, "loss": 0.0243, "step": 200620 }, { "epoch": 0.8371164389849037, "grad_norm": 0.5133909990121583, "learning_rate": 2.186112815379461e-06, "loss": 0.0193, "step": 200625 }, { "epoch": 0.837137301699894, "grad_norm": 0.8624858528016636, "learning_rate": 2.186085574471667e-06, "loss": 0.0168, "step": 200630 }, { "epoch": 0.8371581644148843, "grad_norm": 0.5499497748441482, "learning_rate": 2.186058334582185e-06, "loss": 0.0248, "step": 200635 }, { "epoch": 0.8371790271298746, "grad_norm": 0.9358842655549494, "learning_rate": 2.1860310957109526e-06, "loss": 0.0187, "step": 200640 }, { "epoch": 0.8371998898448648, "grad_norm": 0.402106049440601, "learning_rate": 2.1860038578579057e-06, "loss": 0.0202, "step": 200645 }, { "epoch": 0.8372207525598552, "grad_norm": 0.4134621975624885, "learning_rate": 2.1859766210229805e-06, "loss": 0.0166, "step": 200650 }, { "epoch": 0.8372416152748454, "grad_norm": 0.43815367585791387, "learning_rate": 2.1859493852061136e-06, "loss": 0.0146, "step": 200655 }, { "epoch": 0.8372624779898357, "grad_norm": 1.2771260445547643, "learning_rate": 2.1859221504072426e-06, "loss": 0.0223, "step": 200660 }, { "epoch": 0.837283340704826, "grad_norm": 0.6950864373406456, "learning_rate": 2.185894916626303e-06, "loss": 0.0152, "step": 200665 }, { "epoch": 0.8373042034198163, "grad_norm": 0.8114166574485718, "learning_rate": 2.185867683863232e-06, "loss": 0.0212, "step": 200670 }, { "epoch": 0.8373250661348065, "grad_norm": 0.6048481304640936, "learning_rate": 2.1858404521179655e-06, "loss": 0.0176, "step": 200675 }, { "epoch": 0.8373459288497968, "grad_norm": 0.7729943411457544, "learning_rate": 2.1858132213904414e-06, "loss": 0.0242, "step": 200680 }, { "epoch": 0.8373667915647871, "grad_norm": 0.37260246968299116, "learning_rate": 2.1857859916805948e-06, "loss": 0.0198, "step": 200685 }, { "epoch": 0.8373876542797773, "grad_norm": 0.8940072249796507, "learning_rate": 2.185758762988363e-06, "loss": 0.0223, "step": 200690 }, { "epoch": 0.8374085169947676, "grad_norm": 0.9118912944169373, "learning_rate": 2.185731535313683e-06, "loss": 0.0211, "step": 200695 }, { "epoch": 0.837429379709758, "grad_norm": 0.6572798691492121, "learning_rate": 2.1857043086564904e-06, "loss": 0.0158, "step": 200700 }, { "epoch": 0.8374502424247482, "grad_norm": 0.5825657225090342, "learning_rate": 2.1856770830167226e-06, "loss": 0.0179, "step": 200705 }, { "epoch": 0.8374711051397384, "grad_norm": 0.6132071573590279, "learning_rate": 2.185649858394317e-06, "loss": 0.0221, "step": 200710 }, { "epoch": 0.8374919678547288, "grad_norm": 0.8664340287369923, "learning_rate": 2.1856226347892086e-06, "loss": 0.0199, "step": 200715 }, { "epoch": 0.837512830569719, "grad_norm": 0.8599018546300164, "learning_rate": 2.185595412201335e-06, "loss": 0.0235, "step": 200720 }, { "epoch": 0.8375336932847093, "grad_norm": 0.3586441955920663, "learning_rate": 2.1855681906306324e-06, "loss": 0.011, "step": 200725 }, { "epoch": 0.8375545559996995, "grad_norm": 0.7503149355524901, "learning_rate": 2.1855409700770375e-06, "loss": 0.0191, "step": 200730 }, { "epoch": 0.8375754187146899, "grad_norm": 0.37353602217406384, "learning_rate": 2.1855137505404877e-06, "loss": 0.0173, "step": 200735 }, { "epoch": 0.8375962814296801, "grad_norm": 0.27158942718038837, "learning_rate": 2.185486532020919e-06, "loss": 0.0161, "step": 200740 }, { "epoch": 0.8376171441446704, "grad_norm": 0.6518460428871425, "learning_rate": 2.185459314518268e-06, "loss": 0.0152, "step": 200745 }, { "epoch": 0.8376380068596607, "grad_norm": 0.49710284315529696, "learning_rate": 2.185432098032472e-06, "loss": 0.0212, "step": 200750 }, { "epoch": 0.837658869574651, "grad_norm": 0.6635061733308653, "learning_rate": 2.185404882563467e-06, "loss": 0.0246, "step": 200755 }, { "epoch": 0.8376797322896412, "grad_norm": 1.30163945202743, "learning_rate": 2.1853776681111898e-06, "loss": 0.0197, "step": 200760 }, { "epoch": 0.8377005950046316, "grad_norm": 0.7379465582742676, "learning_rate": 2.1853504546755773e-06, "loss": 0.0206, "step": 200765 }, { "epoch": 0.8377214577196218, "grad_norm": 0.8598094383043947, "learning_rate": 2.1853232422565666e-06, "loss": 0.0221, "step": 200770 }, { "epoch": 0.837742320434612, "grad_norm": 0.7597269858639951, "learning_rate": 2.1852960308540932e-06, "loss": 0.0184, "step": 200775 }, { "epoch": 0.8377631831496024, "grad_norm": 0.32729989610220317, "learning_rate": 2.1852688204680954e-06, "loss": 0.0129, "step": 200780 }, { "epoch": 0.8377840458645927, "grad_norm": 0.40963702817804, "learning_rate": 2.1852416110985088e-06, "loss": 0.0178, "step": 200785 }, { "epoch": 0.8378049085795829, "grad_norm": 0.4540341343036901, "learning_rate": 2.18521440274527e-06, "loss": 0.0218, "step": 200790 }, { "epoch": 0.8378257712945731, "grad_norm": 1.0707908293129471, "learning_rate": 2.1851871954083166e-06, "loss": 0.0236, "step": 200795 }, { "epoch": 0.8378466340095635, "grad_norm": 0.8726013599594765, "learning_rate": 2.1851599890875848e-06, "loss": 0.0263, "step": 200800 }, { "epoch": 0.8378674967245537, "grad_norm": 1.1354764198672753, "learning_rate": 2.1851327837830113e-06, "loss": 0.0223, "step": 200805 }, { "epoch": 0.837888359439544, "grad_norm": 0.5840996860084041, "learning_rate": 2.185105579494533e-06, "loss": 0.0212, "step": 200810 }, { "epoch": 0.8379092221545343, "grad_norm": 1.1977109317894448, "learning_rate": 2.185078376222087e-06, "loss": 0.0273, "step": 200815 }, { "epoch": 0.8379300848695246, "grad_norm": 0.4465767406249974, "learning_rate": 2.185051173965609e-06, "loss": 0.0214, "step": 200820 }, { "epoch": 0.8379509475845148, "grad_norm": 0.792599136301169, "learning_rate": 2.185023972725037e-06, "loss": 0.0227, "step": 200825 }, { "epoch": 0.8379718102995052, "grad_norm": 0.3666588468362304, "learning_rate": 2.184996772500307e-06, "loss": 0.0169, "step": 200830 }, { "epoch": 0.8379926730144954, "grad_norm": 0.5918196748831576, "learning_rate": 2.1849695732913555e-06, "loss": 0.0151, "step": 200835 }, { "epoch": 0.8380135357294857, "grad_norm": 0.3680206572634598, "learning_rate": 2.1849423750981204e-06, "loss": 0.0206, "step": 200840 }, { "epoch": 0.838034398444476, "grad_norm": 1.01123880447045, "learning_rate": 2.184915177920538e-06, "loss": 0.0226, "step": 200845 }, { "epoch": 0.8380552611594663, "grad_norm": 0.3825901034375482, "learning_rate": 2.1848879817585442e-06, "loss": 0.0225, "step": 200850 }, { "epoch": 0.8380761238744565, "grad_norm": 0.7038265882982614, "learning_rate": 2.1848607866120773e-06, "loss": 0.0258, "step": 200855 }, { "epoch": 0.8380969865894468, "grad_norm": 1.370179906177641, "learning_rate": 2.1848335924810725e-06, "loss": 0.0242, "step": 200860 }, { "epoch": 0.8381178493044371, "grad_norm": 0.8198796820165907, "learning_rate": 2.184806399365468e-06, "loss": 0.0264, "step": 200865 }, { "epoch": 0.8381387120194274, "grad_norm": 0.6576012887141798, "learning_rate": 2.1847792072652e-06, "loss": 0.0228, "step": 200870 }, { "epoch": 0.8381595747344176, "grad_norm": 0.4326409109966012, "learning_rate": 2.1847520161802052e-06, "loss": 0.0218, "step": 200875 }, { "epoch": 0.838180437449408, "grad_norm": 0.5572387552472072, "learning_rate": 2.184724826110421e-06, "loss": 0.0165, "step": 200880 }, { "epoch": 0.8382013001643982, "grad_norm": 0.45447414637072464, "learning_rate": 2.1846976370557833e-06, "loss": 0.0165, "step": 200885 }, { "epoch": 0.8382221628793884, "grad_norm": 0.7667627492039221, "learning_rate": 2.1846704490162294e-06, "loss": 0.0188, "step": 200890 }, { "epoch": 0.8382430255943788, "grad_norm": 1.7550740390446908, "learning_rate": 2.1846432619916968e-06, "loss": 0.0232, "step": 200895 }, { "epoch": 0.838263888309369, "grad_norm": 0.7716969481755906, "learning_rate": 2.184616075982121e-06, "loss": 0.0236, "step": 200900 }, { "epoch": 0.8382847510243593, "grad_norm": 0.5569635588545563, "learning_rate": 2.18458889098744e-06, "loss": 0.02, "step": 200905 }, { "epoch": 0.8383056137393495, "grad_norm": 0.43839912212798055, "learning_rate": 2.1845617070075898e-06, "loss": 0.0238, "step": 200910 }, { "epoch": 0.8383264764543399, "grad_norm": 0.8030389442359377, "learning_rate": 2.1845345240425082e-06, "loss": 0.0217, "step": 200915 }, { "epoch": 0.8383473391693301, "grad_norm": 0.4607050435935336, "learning_rate": 2.1845073420921315e-06, "loss": 0.0199, "step": 200920 }, { "epoch": 0.8383682018843204, "grad_norm": 1.0319626525756447, "learning_rate": 2.1844801611563965e-06, "loss": 0.0223, "step": 200925 }, { "epoch": 0.8383890645993107, "grad_norm": 1.007249774223275, "learning_rate": 2.18445298123524e-06, "loss": 0.0215, "step": 200930 }, { "epoch": 0.838409927314301, "grad_norm": 0.5013934831801389, "learning_rate": 2.1844258023285992e-06, "loss": 0.0238, "step": 200935 }, { "epoch": 0.8384307900292912, "grad_norm": 0.8594573781291246, "learning_rate": 2.1843986244364116e-06, "loss": 0.0262, "step": 200940 }, { "epoch": 0.8384516527442816, "grad_norm": 0.31955790770315173, "learning_rate": 2.1843714475586124e-06, "loss": 0.0182, "step": 200945 }, { "epoch": 0.8384725154592718, "grad_norm": 0.6896180101909984, "learning_rate": 2.1843442716951406e-06, "loss": 0.0189, "step": 200950 }, { "epoch": 0.8384933781742621, "grad_norm": 0.6477791120599438, "learning_rate": 2.184317096845931e-06, "loss": 0.0221, "step": 200955 }, { "epoch": 0.8385142408892524, "grad_norm": 0.8754581814045683, "learning_rate": 2.1842899230109216e-06, "loss": 0.0197, "step": 200960 }, { "epoch": 0.8385351036042427, "grad_norm": 0.3136440019031534, "learning_rate": 2.1842627501900494e-06, "loss": 0.0159, "step": 200965 }, { "epoch": 0.8385559663192329, "grad_norm": 0.4742463704705435, "learning_rate": 2.1842355783832514e-06, "loss": 0.0179, "step": 200970 }, { "epoch": 0.8385768290342231, "grad_norm": 0.4330924720085388, "learning_rate": 2.1842084075904636e-06, "loss": 0.0236, "step": 200975 }, { "epoch": 0.8385976917492135, "grad_norm": 1.319040386511292, "learning_rate": 2.1841812378116242e-06, "loss": 0.0248, "step": 200980 }, { "epoch": 0.8386185544642037, "grad_norm": 0.6810412748002536, "learning_rate": 2.1841540690466693e-06, "loss": 0.022, "step": 200985 }, { "epoch": 0.838639417179194, "grad_norm": 0.6603153092274571, "learning_rate": 2.184126901295536e-06, "loss": 0.0165, "step": 200990 }, { "epoch": 0.8386602798941843, "grad_norm": 0.7909157084869369, "learning_rate": 2.184099734558161e-06, "loss": 0.023, "step": 200995 }, { "epoch": 0.8386811426091746, "grad_norm": 0.5522977650122236, "learning_rate": 2.184072568834482e-06, "loss": 0.017, "step": 201000 }, { "epoch": 0.8387020053241648, "grad_norm": 0.39661295084348264, "learning_rate": 2.1840454041244356e-06, "loss": 0.0144, "step": 201005 }, { "epoch": 0.8387228680391552, "grad_norm": 0.8507270526757742, "learning_rate": 2.184018240427959e-06, "loss": 0.0181, "step": 201010 }, { "epoch": 0.8387437307541454, "grad_norm": 0.4521978310751664, "learning_rate": 2.183991077744988e-06, "loss": 0.0181, "step": 201015 }, { "epoch": 0.8387645934691357, "grad_norm": 0.8196312334709982, "learning_rate": 2.183963916075461e-06, "loss": 0.0206, "step": 201020 }, { "epoch": 0.838785456184126, "grad_norm": 1.069939742042993, "learning_rate": 2.1839367554193148e-06, "loss": 0.0192, "step": 201025 }, { "epoch": 0.8388063188991163, "grad_norm": 0.6811870485795358, "learning_rate": 2.183909595776485e-06, "loss": 0.0205, "step": 201030 }, { "epoch": 0.8388271816141065, "grad_norm": 0.6176947102757735, "learning_rate": 2.1838824371469108e-06, "loss": 0.0167, "step": 201035 }, { "epoch": 0.8388480443290968, "grad_norm": 0.6361263104248222, "learning_rate": 2.1838552795305273e-06, "loss": 0.0179, "step": 201040 }, { "epoch": 0.8388689070440871, "grad_norm": 0.9292304956593049, "learning_rate": 2.183828122927272e-06, "loss": 0.0199, "step": 201045 }, { "epoch": 0.8388897697590774, "grad_norm": 0.9267668068570016, "learning_rate": 2.183800967337083e-06, "loss": 0.0206, "step": 201050 }, { "epoch": 0.8389106324740676, "grad_norm": 0.9724111090198102, "learning_rate": 2.1837738127598957e-06, "loss": 0.0214, "step": 201055 }, { "epoch": 0.838931495189058, "grad_norm": 0.9363352171268984, "learning_rate": 2.183746659195648e-06, "loss": 0.0275, "step": 201060 }, { "epoch": 0.8389523579040482, "grad_norm": 0.24683995001037928, "learning_rate": 2.1837195066442767e-06, "loss": 0.0183, "step": 201065 }, { "epoch": 0.8389732206190385, "grad_norm": 0.33615283598872064, "learning_rate": 2.1836923551057193e-06, "loss": 0.0242, "step": 201070 }, { "epoch": 0.8389940833340288, "grad_norm": 0.9670185378102336, "learning_rate": 2.183665204579912e-06, "loss": 0.0244, "step": 201075 }, { "epoch": 0.839014946049019, "grad_norm": 0.524319340987038, "learning_rate": 2.1836380550667926e-06, "loss": 0.0214, "step": 201080 }, { "epoch": 0.8390358087640093, "grad_norm": 0.4248652501793696, "learning_rate": 2.183610906566298e-06, "loss": 0.0197, "step": 201085 }, { "epoch": 0.8390566714789995, "grad_norm": 0.4788384861755687, "learning_rate": 2.1835837590783647e-06, "loss": 0.0219, "step": 201090 }, { "epoch": 0.8390775341939899, "grad_norm": 0.33570406257772406, "learning_rate": 2.1835566126029302e-06, "loss": 0.015, "step": 201095 }, { "epoch": 0.8390983969089801, "grad_norm": 0.6248797418713933, "learning_rate": 2.1835294671399317e-06, "loss": 0.0178, "step": 201100 }, { "epoch": 0.8391192596239704, "grad_norm": 0.3783016526438048, "learning_rate": 2.1835023226893058e-06, "loss": 0.0284, "step": 201105 }, { "epoch": 0.8391401223389607, "grad_norm": 0.6780534806388735, "learning_rate": 2.1834751792509905e-06, "loss": 0.0345, "step": 201110 }, { "epoch": 0.839160985053951, "grad_norm": 0.6449781900703329, "learning_rate": 2.1834480368249215e-06, "loss": 0.0271, "step": 201115 }, { "epoch": 0.8391818477689412, "grad_norm": 0.6985166866318125, "learning_rate": 2.1834208954110374e-06, "loss": 0.0135, "step": 201120 }, { "epoch": 0.8392027104839316, "grad_norm": 0.6518272955115431, "learning_rate": 2.1833937550092738e-06, "loss": 0.0255, "step": 201125 }, { "epoch": 0.8392235731989218, "grad_norm": 0.8156349752270596, "learning_rate": 2.183366615619569e-06, "loss": 0.0165, "step": 201130 }, { "epoch": 0.8392444359139121, "grad_norm": 1.1610146607873761, "learning_rate": 2.1833394772418595e-06, "loss": 0.0207, "step": 201135 }, { "epoch": 0.8392652986289024, "grad_norm": 0.8381694132454459, "learning_rate": 2.1833123398760827e-06, "loss": 0.0153, "step": 201140 }, { "epoch": 0.8392861613438927, "grad_norm": 1.0861372876349817, "learning_rate": 2.1832852035221748e-06, "loss": 0.0242, "step": 201145 }, { "epoch": 0.8393070240588829, "grad_norm": 0.6711252132622149, "learning_rate": 2.183258068180074e-06, "loss": 0.015, "step": 201150 }, { "epoch": 0.8393278867738732, "grad_norm": 5.692902119143768, "learning_rate": 2.183230933849718e-06, "loss": 0.0198, "step": 201155 }, { "epoch": 0.8393487494888635, "grad_norm": 0.524096451557182, "learning_rate": 2.1832038005310417e-06, "loss": 0.0198, "step": 201160 }, { "epoch": 0.8393696122038538, "grad_norm": 0.3048571080172599, "learning_rate": 2.183176668223984e-06, "loss": 0.0205, "step": 201165 }, { "epoch": 0.839390474918844, "grad_norm": 0.4633993584955706, "learning_rate": 2.1831495369284824e-06, "loss": 0.0175, "step": 201170 }, { "epoch": 0.8394113376338344, "grad_norm": 0.33974512211108365, "learning_rate": 2.1831224066444723e-06, "loss": 0.0225, "step": 201175 }, { "epoch": 0.8394322003488246, "grad_norm": 0.962944658445608, "learning_rate": 2.183095277371892e-06, "loss": 0.0207, "step": 201180 }, { "epoch": 0.8394530630638148, "grad_norm": 0.5063513752111864, "learning_rate": 2.1830681491106787e-06, "loss": 0.0222, "step": 201185 }, { "epoch": 0.8394739257788052, "grad_norm": 0.5150165634498153, "learning_rate": 2.1830410218607694e-06, "loss": 0.0519, "step": 201190 }, { "epoch": 0.8394947884937954, "grad_norm": 0.4092962262067437, "learning_rate": 2.183013895622101e-06, "loss": 0.0218, "step": 201195 }, { "epoch": 0.8395156512087857, "grad_norm": 1.2071340686986174, "learning_rate": 2.182986770394611e-06, "loss": 0.0196, "step": 201200 }, { "epoch": 0.839536513923776, "grad_norm": 1.0399282062647035, "learning_rate": 2.1829596461782357e-06, "loss": 0.0221, "step": 201205 }, { "epoch": 0.8395573766387663, "grad_norm": 0.5015987809399329, "learning_rate": 2.1829325229729134e-06, "loss": 0.0167, "step": 201210 }, { "epoch": 0.8395782393537565, "grad_norm": 0.6173157875053858, "learning_rate": 2.1829054007785812e-06, "loss": 0.0205, "step": 201215 }, { "epoch": 0.8395991020687468, "grad_norm": 1.0349871981489895, "learning_rate": 2.1828782795951757e-06, "loss": 0.0358, "step": 201220 }, { "epoch": 0.8396199647837371, "grad_norm": 0.6695047934175052, "learning_rate": 2.182851159422635e-06, "loss": 0.0258, "step": 201225 }, { "epoch": 0.8396408274987274, "grad_norm": 0.5001013998596966, "learning_rate": 2.182824040260895e-06, "loss": 0.0172, "step": 201230 }, { "epoch": 0.8396616902137176, "grad_norm": 0.5003942052865106, "learning_rate": 2.1827969221098938e-06, "loss": 0.0269, "step": 201235 }, { "epoch": 0.839682552928708, "grad_norm": 0.725536570717776, "learning_rate": 2.182769804969569e-06, "loss": 0.0191, "step": 201240 }, { "epoch": 0.8397034156436982, "grad_norm": 0.7825666228953948, "learning_rate": 2.1827426888398566e-06, "loss": 0.0205, "step": 201245 }, { "epoch": 0.8397242783586885, "grad_norm": 0.5753997921647951, "learning_rate": 2.182715573720695e-06, "loss": 0.023, "step": 201250 }, { "epoch": 0.8397451410736788, "grad_norm": 0.887284021159926, "learning_rate": 2.1826884596120203e-06, "loss": 0.0193, "step": 201255 }, { "epoch": 0.8397660037886691, "grad_norm": 0.2963979027204895, "learning_rate": 2.182661346513771e-06, "loss": 0.0169, "step": 201260 }, { "epoch": 0.8397868665036593, "grad_norm": 0.6832233537241823, "learning_rate": 2.1826342344258835e-06, "loss": 0.0219, "step": 201265 }, { "epoch": 0.8398077292186495, "grad_norm": 0.5295831181362983, "learning_rate": 2.1826071233482946e-06, "loss": 0.0187, "step": 201270 }, { "epoch": 0.8398285919336399, "grad_norm": 0.4345180217217944, "learning_rate": 2.1825800132809434e-06, "loss": 0.0239, "step": 201275 }, { "epoch": 0.8398494546486301, "grad_norm": 1.1185087042677222, "learning_rate": 2.1825529042237653e-06, "loss": 0.0205, "step": 201280 }, { "epoch": 0.8398703173636204, "grad_norm": 2.8225351473249716, "learning_rate": 2.182525796176698e-06, "loss": 0.0181, "step": 201285 }, { "epoch": 0.8398911800786107, "grad_norm": 0.7124626583627941, "learning_rate": 2.1824986891396796e-06, "loss": 0.0183, "step": 201290 }, { "epoch": 0.839912042793601, "grad_norm": 0.698903701162444, "learning_rate": 2.1824715831126467e-06, "loss": 0.0158, "step": 201295 }, { "epoch": 0.8399329055085912, "grad_norm": 1.3560471312546225, "learning_rate": 2.1824444780955363e-06, "loss": 0.0206, "step": 201300 }, { "epoch": 0.8399537682235816, "grad_norm": 1.1643783637688043, "learning_rate": 2.182417374088286e-06, "loss": 0.0258, "step": 201305 }, { "epoch": 0.8399746309385718, "grad_norm": 0.4715542640018344, "learning_rate": 2.182390271090834e-06, "loss": 0.0167, "step": 201310 }, { "epoch": 0.8399954936535621, "grad_norm": 1.011748660884499, "learning_rate": 2.182363169103116e-06, "loss": 0.0234, "step": 201315 }, { "epoch": 0.8400163563685524, "grad_norm": 1.3170217254863228, "learning_rate": 2.18233606812507e-06, "loss": 0.0242, "step": 201320 }, { "epoch": 0.8400372190835427, "grad_norm": 0.6374454045548964, "learning_rate": 2.182308968156634e-06, "loss": 0.0145, "step": 201325 }, { "epoch": 0.8400580817985329, "grad_norm": 0.5137352821197332, "learning_rate": 2.182281869197744e-06, "loss": 0.018, "step": 201330 }, { "epoch": 0.8400789445135232, "grad_norm": 0.5111022202641666, "learning_rate": 2.1822547712483384e-06, "loss": 0.0172, "step": 201335 }, { "epoch": 0.8400998072285135, "grad_norm": 0.4815440109295954, "learning_rate": 2.182227674308354e-06, "loss": 0.0199, "step": 201340 }, { "epoch": 0.8401206699435038, "grad_norm": 0.3816487194143708, "learning_rate": 2.1822005783777282e-06, "loss": 0.0184, "step": 201345 }, { "epoch": 0.840141532658494, "grad_norm": 0.5721500077983473, "learning_rate": 2.182173483456399e-06, "loss": 0.0175, "step": 201350 }, { "epoch": 0.8401623953734844, "grad_norm": 0.4625120764296959, "learning_rate": 2.182146389544302e-06, "loss": 0.0229, "step": 201355 }, { "epoch": 0.8401832580884746, "grad_norm": 0.6783782271888902, "learning_rate": 2.1821192966413767e-06, "loss": 0.0202, "step": 201360 }, { "epoch": 0.8402041208034648, "grad_norm": 0.4808151854383645, "learning_rate": 2.1820922047475594e-06, "loss": 0.0138, "step": 201365 }, { "epoch": 0.8402249835184552, "grad_norm": 0.5790423230698518, "learning_rate": 2.182065113862787e-06, "loss": 0.0253, "step": 201370 }, { "epoch": 0.8402458462334454, "grad_norm": 0.8815301517087281, "learning_rate": 2.1820380239869975e-06, "loss": 0.0185, "step": 201375 }, { "epoch": 0.8402667089484357, "grad_norm": 0.5496742353044198, "learning_rate": 2.1820109351201283e-06, "loss": 0.0276, "step": 201380 }, { "epoch": 0.8402875716634259, "grad_norm": 0.692115265312314, "learning_rate": 2.181983847262117e-06, "loss": 0.0228, "step": 201385 }, { "epoch": 0.8403084343784163, "grad_norm": 0.42038118328986357, "learning_rate": 2.1819567604128996e-06, "loss": 0.0209, "step": 201390 }, { "epoch": 0.8403292970934065, "grad_norm": 1.3518354253402542, "learning_rate": 2.1819296745724152e-06, "loss": 0.0256, "step": 201395 }, { "epoch": 0.8403501598083968, "grad_norm": 0.4475671403842134, "learning_rate": 2.1819025897406005e-06, "loss": 0.0175, "step": 201400 }, { "epoch": 0.8403710225233871, "grad_norm": 0.5174345313024677, "learning_rate": 2.181875505917393e-06, "loss": 0.0147, "step": 201405 }, { "epoch": 0.8403918852383774, "grad_norm": 0.4700534009291552, "learning_rate": 2.1818484231027294e-06, "loss": 0.0233, "step": 201410 }, { "epoch": 0.8404127479533676, "grad_norm": 0.5782881772727009, "learning_rate": 2.181821341296548e-06, "loss": 0.0215, "step": 201415 }, { "epoch": 0.840433610668358, "grad_norm": 0.747426516829119, "learning_rate": 2.181794260498786e-06, "loss": 0.0264, "step": 201420 }, { "epoch": 0.8404544733833482, "grad_norm": 0.3475975837216835, "learning_rate": 2.1817671807093803e-06, "loss": 0.0227, "step": 201425 }, { "epoch": 0.8404753360983385, "grad_norm": 0.509056480486731, "learning_rate": 2.1817401019282692e-06, "loss": 0.0169, "step": 201430 }, { "epoch": 0.8404961988133288, "grad_norm": 0.44430271998574006, "learning_rate": 2.1817130241553895e-06, "loss": 0.0243, "step": 201435 }, { "epoch": 0.8405170615283191, "grad_norm": 0.7523532431161849, "learning_rate": 2.181685947390679e-06, "loss": 0.0172, "step": 201440 }, { "epoch": 0.8405379242433093, "grad_norm": 0.8573428180794463, "learning_rate": 2.1816588716340747e-06, "loss": 0.0187, "step": 201445 }, { "epoch": 0.8405587869582996, "grad_norm": 0.5906822990283624, "learning_rate": 2.1816317968855142e-06, "loss": 0.0269, "step": 201450 }, { "epoch": 0.8405796496732899, "grad_norm": 0.435432766744254, "learning_rate": 2.1816047231449353e-06, "loss": 0.018, "step": 201455 }, { "epoch": 0.8406005123882802, "grad_norm": 2.224312946285667, "learning_rate": 2.1815776504122747e-06, "loss": 0.0179, "step": 201460 }, { "epoch": 0.8406213751032704, "grad_norm": 0.3799236937844687, "learning_rate": 2.1815505786874715e-06, "loss": 0.0196, "step": 201465 }, { "epoch": 0.8406422378182608, "grad_norm": 0.603044814634273, "learning_rate": 2.181523507970461e-06, "loss": 0.0192, "step": 201470 }, { "epoch": 0.840663100533251, "grad_norm": 1.1970236058260337, "learning_rate": 2.181496438261182e-06, "loss": 0.0154, "step": 201475 }, { "epoch": 0.8406839632482412, "grad_norm": 0.4531767594927378, "learning_rate": 2.181469369559572e-06, "loss": 0.0185, "step": 201480 }, { "epoch": 0.8407048259632316, "grad_norm": 0.7766258531957776, "learning_rate": 2.1814423018655677e-06, "loss": 0.0194, "step": 201485 }, { "epoch": 0.8407256886782218, "grad_norm": 0.5302143654381605, "learning_rate": 2.181415235179107e-06, "loss": 0.0184, "step": 201490 }, { "epoch": 0.8407465513932121, "grad_norm": 0.6165202245192394, "learning_rate": 2.181388169500128e-06, "loss": 0.0184, "step": 201495 }, { "epoch": 0.8407674141082024, "grad_norm": 0.7404314210385492, "learning_rate": 2.1813611048285676e-06, "loss": 0.0185, "step": 201500 }, { "epoch": 0.8407882768231927, "grad_norm": 1.3769176188667367, "learning_rate": 2.1813340411643624e-06, "loss": 0.0431, "step": 201505 }, { "epoch": 0.8408091395381829, "grad_norm": 0.2740320898729219, "learning_rate": 2.181306978507452e-06, "loss": 0.02, "step": 201510 }, { "epoch": 0.8408300022531732, "grad_norm": 0.8062064791772706, "learning_rate": 2.1812799168577727e-06, "loss": 0.0316, "step": 201515 }, { "epoch": 0.8408508649681635, "grad_norm": 0.5137340897064335, "learning_rate": 2.1812528562152614e-06, "loss": 0.0209, "step": 201520 }, { "epoch": 0.8408717276831538, "grad_norm": 0.744442056287358, "learning_rate": 2.1812257965798567e-06, "loss": 0.0219, "step": 201525 }, { "epoch": 0.840892590398144, "grad_norm": 0.40299696173059885, "learning_rate": 2.1811987379514958e-06, "loss": 0.0176, "step": 201530 }, { "epoch": 0.8409134531131344, "grad_norm": 0.6445659790504537, "learning_rate": 2.181171680330116e-06, "loss": 0.0161, "step": 201535 }, { "epoch": 0.8409343158281246, "grad_norm": 0.5894574080033433, "learning_rate": 2.1811446237156555e-06, "loss": 0.0244, "step": 201540 }, { "epoch": 0.8409551785431149, "grad_norm": 0.8345700663502882, "learning_rate": 2.181117568108051e-06, "loss": 0.0214, "step": 201545 }, { "epoch": 0.8409760412581052, "grad_norm": 0.704979955645863, "learning_rate": 2.181090513507241e-06, "loss": 0.0216, "step": 201550 }, { "epoch": 0.8409969039730955, "grad_norm": 0.6279094420214182, "learning_rate": 2.181063459913162e-06, "loss": 0.0204, "step": 201555 }, { "epoch": 0.8410177666880857, "grad_norm": 1.0009625229320203, "learning_rate": 2.181036407325752e-06, "loss": 0.0167, "step": 201560 }, { "epoch": 0.8410386294030759, "grad_norm": 0.5433535322436491, "learning_rate": 2.1810093557449487e-06, "loss": 0.0144, "step": 201565 }, { "epoch": 0.8410594921180663, "grad_norm": 0.5947292765109592, "learning_rate": 2.1809823051706903e-06, "loss": 0.0252, "step": 201570 }, { "epoch": 0.8410803548330565, "grad_norm": 0.5213911957444766, "learning_rate": 2.1809552556029133e-06, "loss": 0.0215, "step": 201575 }, { "epoch": 0.8411012175480468, "grad_norm": 0.6911264640753588, "learning_rate": 2.180928207041555e-06, "loss": 0.0218, "step": 201580 }, { "epoch": 0.8411220802630371, "grad_norm": 0.3068396181752701, "learning_rate": 2.1809011594865546e-06, "loss": 0.019, "step": 201585 }, { "epoch": 0.8411429429780274, "grad_norm": 0.891839084735003, "learning_rate": 2.1808741129378483e-06, "loss": 0.0199, "step": 201590 }, { "epoch": 0.8411638056930176, "grad_norm": 1.2295106981737856, "learning_rate": 2.1808470673953743e-06, "loss": 0.0239, "step": 201595 }, { "epoch": 0.841184668408008, "grad_norm": 0.502704429826388, "learning_rate": 2.1808200228590705e-06, "loss": 0.0155, "step": 201600 }, { "epoch": 0.8412055311229982, "grad_norm": 0.6840393163982535, "learning_rate": 2.180792979328874e-06, "loss": 0.0189, "step": 201605 }, { "epoch": 0.8412263938379885, "grad_norm": 1.0949247403580682, "learning_rate": 2.180765936804722e-06, "loss": 0.0302, "step": 201610 }, { "epoch": 0.8412472565529788, "grad_norm": 0.5188899311591049, "learning_rate": 2.1807388952865537e-06, "loss": 0.0192, "step": 201615 }, { "epoch": 0.8412681192679691, "grad_norm": 0.7026795543724812, "learning_rate": 2.1807118547743047e-06, "loss": 0.0209, "step": 201620 }, { "epoch": 0.8412889819829593, "grad_norm": 0.6799832616555098, "learning_rate": 2.1806848152679138e-06, "loss": 0.0162, "step": 201625 }, { "epoch": 0.8413098446979496, "grad_norm": 0.7054197497969962, "learning_rate": 2.1806577767673185e-06, "loss": 0.0161, "step": 201630 }, { "epoch": 0.8413307074129399, "grad_norm": 0.27752777622932884, "learning_rate": 2.1806307392724568e-06, "loss": 0.0203, "step": 201635 }, { "epoch": 0.8413515701279302, "grad_norm": 0.6007484995995811, "learning_rate": 2.1806037027832658e-06, "loss": 0.0201, "step": 201640 }, { "epoch": 0.8413724328429204, "grad_norm": 0.9688387145442618, "learning_rate": 2.180576667299683e-06, "loss": 0.0201, "step": 201645 }, { "epoch": 0.8413932955579108, "grad_norm": 0.8680703621782956, "learning_rate": 2.1805496328216467e-06, "loss": 0.0163, "step": 201650 }, { "epoch": 0.841414158272901, "grad_norm": 0.8747604166427527, "learning_rate": 2.180522599349094e-06, "loss": 0.0227, "step": 201655 }, { "epoch": 0.8414350209878912, "grad_norm": 0.73452030651967, "learning_rate": 2.180495566881963e-06, "loss": 0.0236, "step": 201660 }, { "epoch": 0.8414558837028816, "grad_norm": 0.5613525014825811, "learning_rate": 2.1804685354201915e-06, "loss": 0.0295, "step": 201665 }, { "epoch": 0.8414767464178718, "grad_norm": 0.32053958541065053, "learning_rate": 2.180441504963717e-06, "loss": 0.0218, "step": 201670 }, { "epoch": 0.8414976091328621, "grad_norm": 0.8648695083057675, "learning_rate": 2.180414475512476e-06, "loss": 0.0226, "step": 201675 }, { "epoch": 0.8415184718478524, "grad_norm": 0.6232860042669234, "learning_rate": 2.1803874470664085e-06, "loss": 0.0184, "step": 201680 }, { "epoch": 0.8415393345628427, "grad_norm": 0.4930560890199929, "learning_rate": 2.1803604196254504e-06, "loss": 0.0213, "step": 201685 }, { "epoch": 0.8415601972778329, "grad_norm": 0.6496196578717255, "learning_rate": 2.18033339318954e-06, "loss": 0.0224, "step": 201690 }, { "epoch": 0.8415810599928232, "grad_norm": 0.5008283794322943, "learning_rate": 2.1803063677586146e-06, "loss": 0.0208, "step": 201695 }, { "epoch": 0.8416019227078135, "grad_norm": 0.9751489036302072, "learning_rate": 2.180279343332613e-06, "loss": 0.0293, "step": 201700 }, { "epoch": 0.8416227854228038, "grad_norm": 0.30762978468573265, "learning_rate": 2.1802523199114723e-06, "loss": 0.016, "step": 201705 }, { "epoch": 0.841643648137794, "grad_norm": 0.6750811572864858, "learning_rate": 2.18022529749513e-06, "loss": 0.0187, "step": 201710 }, { "epoch": 0.8416645108527844, "grad_norm": 0.5386248134059765, "learning_rate": 2.1801982760835234e-06, "loss": 0.022, "step": 201715 }, { "epoch": 0.8416853735677746, "grad_norm": 0.6935077238091762, "learning_rate": 2.1801712556765916e-06, "loss": 0.0172, "step": 201720 }, { "epoch": 0.8417062362827649, "grad_norm": 0.7521218761206934, "learning_rate": 2.1801442362742714e-06, "loss": 0.028, "step": 201725 }, { "epoch": 0.8417270989977552, "grad_norm": 0.4745596007453484, "learning_rate": 2.1801172178765006e-06, "loss": 0.0178, "step": 201730 }, { "epoch": 0.8417479617127455, "grad_norm": 0.5135478512969817, "learning_rate": 2.180090200483217e-06, "loss": 0.0221, "step": 201735 }, { "epoch": 0.8417688244277357, "grad_norm": 0.4343599250582216, "learning_rate": 2.180063184094359e-06, "loss": 0.0143, "step": 201740 }, { "epoch": 0.841789687142726, "grad_norm": 0.6691942117623517, "learning_rate": 2.180036168709863e-06, "loss": 0.0189, "step": 201745 }, { "epoch": 0.8418105498577163, "grad_norm": 0.2942451378307118, "learning_rate": 2.1800091543296685e-06, "loss": 0.0147, "step": 201750 }, { "epoch": 0.8418314125727066, "grad_norm": 0.5078150446987708, "learning_rate": 2.1799821409537116e-06, "loss": 0.0193, "step": 201755 }, { "epoch": 0.8418522752876968, "grad_norm": 0.3343723148953323, "learning_rate": 2.1799551285819313e-06, "loss": 0.0186, "step": 201760 }, { "epoch": 0.8418731380026871, "grad_norm": 0.6221909580942082, "learning_rate": 2.179928117214265e-06, "loss": 0.027, "step": 201765 }, { "epoch": 0.8418940007176774, "grad_norm": 0.4973091312919933, "learning_rate": 2.17990110685065e-06, "loss": 0.0201, "step": 201770 }, { "epoch": 0.8419148634326676, "grad_norm": 0.4078667932127047, "learning_rate": 2.179874097491024e-06, "loss": 0.021, "step": 201775 }, { "epoch": 0.841935726147658, "grad_norm": 0.43140502980166345, "learning_rate": 2.1798470891353265e-06, "loss": 0.0181, "step": 201780 }, { "epoch": 0.8419565888626482, "grad_norm": 2.1443963795091125, "learning_rate": 2.1798200817834934e-06, "loss": 0.0207, "step": 201785 }, { "epoch": 0.8419774515776385, "grad_norm": 0.2762979976406897, "learning_rate": 2.179793075435464e-06, "loss": 0.0156, "step": 201790 }, { "epoch": 0.8419983142926288, "grad_norm": 0.36211279446706773, "learning_rate": 2.1797660700911744e-06, "loss": 0.0159, "step": 201795 }, { "epoch": 0.8420191770076191, "grad_norm": 0.7945607005878605, "learning_rate": 2.179739065750564e-06, "loss": 0.0145, "step": 201800 }, { "epoch": 0.8420400397226093, "grad_norm": 0.3713103112122448, "learning_rate": 2.17971206241357e-06, "loss": 0.0159, "step": 201805 }, { "epoch": 0.8420609024375996, "grad_norm": 0.5574844365606715, "learning_rate": 2.1796850600801302e-06, "loss": 0.0171, "step": 201810 }, { "epoch": 0.8420817651525899, "grad_norm": 0.6372319895146625, "learning_rate": 2.1796580587501825e-06, "loss": 0.0242, "step": 201815 }, { "epoch": 0.8421026278675802, "grad_norm": 1.007606582496116, "learning_rate": 2.1796310584236648e-06, "loss": 0.0333, "step": 201820 }, { "epoch": 0.8421234905825704, "grad_norm": 0.43657883579602336, "learning_rate": 2.1796040591005147e-06, "loss": 0.0197, "step": 201825 }, { "epoch": 0.8421443532975608, "grad_norm": 0.5055790707301995, "learning_rate": 2.1795770607806705e-06, "loss": 0.0211, "step": 201830 }, { "epoch": 0.842165216012551, "grad_norm": 0.5332889025569392, "learning_rate": 2.1795500634640695e-06, "loss": 0.0218, "step": 201835 }, { "epoch": 0.8421860787275413, "grad_norm": 0.5613877046825856, "learning_rate": 2.1795230671506503e-06, "loss": 0.0176, "step": 201840 }, { "epoch": 0.8422069414425316, "grad_norm": 1.1195963472288188, "learning_rate": 2.1794960718403497e-06, "loss": 0.0157, "step": 201845 }, { "epoch": 0.8422278041575219, "grad_norm": 0.8717910679245242, "learning_rate": 2.179469077533107e-06, "loss": 0.0201, "step": 201850 }, { "epoch": 0.8422486668725121, "grad_norm": 0.5254233185017165, "learning_rate": 2.179442084228859e-06, "loss": 0.0136, "step": 201855 }, { "epoch": 0.8422695295875025, "grad_norm": 0.83328901927643, "learning_rate": 2.179415091927544e-06, "loss": 0.0159, "step": 201860 }, { "epoch": 0.8422903923024927, "grad_norm": 0.6534053123501924, "learning_rate": 2.1793881006290997e-06, "loss": 0.0234, "step": 201865 }, { "epoch": 0.8423112550174829, "grad_norm": 0.5326617666339883, "learning_rate": 2.179361110333464e-06, "loss": 0.0228, "step": 201870 }, { "epoch": 0.8423321177324732, "grad_norm": 0.5684518854797888, "learning_rate": 2.1793341210405755e-06, "loss": 0.0202, "step": 201875 }, { "epoch": 0.8423529804474635, "grad_norm": 0.3901109349347333, "learning_rate": 2.179307132750371e-06, "loss": 0.0173, "step": 201880 }, { "epoch": 0.8423738431624538, "grad_norm": 0.665799750910129, "learning_rate": 2.179280145462789e-06, "loss": 0.0165, "step": 201885 }, { "epoch": 0.842394705877444, "grad_norm": 0.40039320324314914, "learning_rate": 2.1792531591777673e-06, "loss": 0.0145, "step": 201890 }, { "epoch": 0.8424155685924344, "grad_norm": 0.5302638624106389, "learning_rate": 2.1792261738952437e-06, "loss": 0.0166, "step": 201895 }, { "epoch": 0.8424364313074246, "grad_norm": 0.7195556694166414, "learning_rate": 2.179199189615157e-06, "loss": 0.018, "step": 201900 }, { "epoch": 0.8424572940224149, "grad_norm": 0.7285918396414809, "learning_rate": 2.179172206337444e-06, "loss": 0.0217, "step": 201905 }, { "epoch": 0.8424781567374052, "grad_norm": 0.6690715784317872, "learning_rate": 2.1791452240620432e-06, "loss": 0.0207, "step": 201910 }, { "epoch": 0.8424990194523955, "grad_norm": 1.0810887900875825, "learning_rate": 2.1791182427888925e-06, "loss": 0.0278, "step": 201915 }, { "epoch": 0.8425198821673857, "grad_norm": 0.8709904546854552, "learning_rate": 2.1790912625179296e-06, "loss": 0.0204, "step": 201920 }, { "epoch": 0.842540744882376, "grad_norm": 0.8144863005025238, "learning_rate": 2.1790642832490935e-06, "loss": 0.0342, "step": 201925 }, { "epoch": 0.8425616075973663, "grad_norm": 0.7734803340022509, "learning_rate": 2.1790373049823202e-06, "loss": 0.0253, "step": 201930 }, { "epoch": 0.8425824703123566, "grad_norm": 0.5181652232593029, "learning_rate": 2.1790103277175497e-06, "loss": 0.0173, "step": 201935 }, { "epoch": 0.8426033330273468, "grad_norm": 0.734004857398584, "learning_rate": 2.1789833514547183e-06, "loss": 0.0224, "step": 201940 }, { "epoch": 0.8426241957423372, "grad_norm": 0.9220332737634143, "learning_rate": 2.178956376193765e-06, "loss": 0.0219, "step": 201945 }, { "epoch": 0.8426450584573274, "grad_norm": 0.9348679711002248, "learning_rate": 2.1789294019346283e-06, "loss": 0.0194, "step": 201950 }, { "epoch": 0.8426659211723176, "grad_norm": 0.7295038641725288, "learning_rate": 2.1789024286772443e-06, "loss": 0.0193, "step": 201955 }, { "epoch": 0.842686783887308, "grad_norm": 0.33307550671199426, "learning_rate": 2.178875456421553e-06, "loss": 0.0212, "step": 201960 }, { "epoch": 0.8427076466022982, "grad_norm": 0.9232663510662398, "learning_rate": 2.178848485167491e-06, "loss": 0.0235, "step": 201965 }, { "epoch": 0.8427285093172885, "grad_norm": 0.7066707331345985, "learning_rate": 2.1788215149149973e-06, "loss": 0.0153, "step": 201970 }, { "epoch": 0.8427493720322788, "grad_norm": 0.7083022684794059, "learning_rate": 2.1787945456640093e-06, "loss": 0.0179, "step": 201975 }, { "epoch": 0.8427702347472691, "grad_norm": 0.46102420965375174, "learning_rate": 2.1787675774144653e-06, "loss": 0.0173, "step": 201980 }, { "epoch": 0.8427910974622593, "grad_norm": 0.48902400946947844, "learning_rate": 2.1787406101663028e-06, "loss": 0.0216, "step": 201985 }, { "epoch": 0.8428119601772496, "grad_norm": 0.6764887468828461, "learning_rate": 2.1787136439194602e-06, "loss": 0.0276, "step": 201990 }, { "epoch": 0.8428328228922399, "grad_norm": 0.3591792376323433, "learning_rate": 2.1786866786738763e-06, "loss": 0.0187, "step": 201995 }, { "epoch": 0.8428536856072302, "grad_norm": 0.41176196125973297, "learning_rate": 2.178659714429488e-06, "loss": 0.0128, "step": 202000 }, { "epoch": 0.8428745483222204, "grad_norm": 0.48182485394051044, "learning_rate": 2.1786327511862334e-06, "loss": 0.0174, "step": 202005 }, { "epoch": 0.8428954110372108, "grad_norm": 0.43560035438215283, "learning_rate": 2.1786057889440513e-06, "loss": 0.021, "step": 202010 }, { "epoch": 0.842916273752201, "grad_norm": 0.4034436382147481, "learning_rate": 2.178578827702879e-06, "loss": 0.0203, "step": 202015 }, { "epoch": 0.8429371364671913, "grad_norm": 0.6224119183054432, "learning_rate": 2.1785518674626555e-06, "loss": 0.0222, "step": 202020 }, { "epoch": 0.8429579991821816, "grad_norm": 0.6184477377048143, "learning_rate": 2.178524908223318e-06, "loss": 0.0154, "step": 202025 }, { "epoch": 0.8429788618971719, "grad_norm": 0.7735367041191497, "learning_rate": 2.178497949984805e-06, "loss": 0.0245, "step": 202030 }, { "epoch": 0.8429997246121621, "grad_norm": 1.042527280061462, "learning_rate": 2.1784709927470545e-06, "loss": 0.0309, "step": 202035 }, { "epoch": 0.8430205873271525, "grad_norm": 0.9434860925761831, "learning_rate": 2.1784440365100043e-06, "loss": 0.0217, "step": 202040 }, { "epoch": 0.8430414500421427, "grad_norm": 0.6905269703717869, "learning_rate": 2.1784170812735927e-06, "loss": 0.0222, "step": 202045 }, { "epoch": 0.843062312757133, "grad_norm": 0.664621532046773, "learning_rate": 2.1783901270377582e-06, "loss": 0.0206, "step": 202050 }, { "epoch": 0.8430831754721232, "grad_norm": 0.6830411047542696, "learning_rate": 2.178363173802438e-06, "loss": 0.0307, "step": 202055 }, { "epoch": 0.8431040381871135, "grad_norm": 0.9995167182548589, "learning_rate": 2.178336221567571e-06, "loss": 0.0292, "step": 202060 }, { "epoch": 0.8431249009021038, "grad_norm": 1.3212132114345678, "learning_rate": 2.1783092703330948e-06, "loss": 0.0181, "step": 202065 }, { "epoch": 0.843145763617094, "grad_norm": 0.9171203428217916, "learning_rate": 2.1782823200989476e-06, "loss": 0.0237, "step": 202070 }, { "epoch": 0.8431666263320844, "grad_norm": 0.34321091224900446, "learning_rate": 2.1782553708650685e-06, "loss": 0.0152, "step": 202075 }, { "epoch": 0.8431874890470746, "grad_norm": 0.43683747210615653, "learning_rate": 2.178228422631394e-06, "loss": 0.0197, "step": 202080 }, { "epoch": 0.8432083517620649, "grad_norm": 0.4487775833534203, "learning_rate": 2.1782014753978634e-06, "loss": 0.0198, "step": 202085 }, { "epoch": 0.8432292144770552, "grad_norm": 0.8560095556221232, "learning_rate": 2.178174529164414e-06, "loss": 0.0191, "step": 202090 }, { "epoch": 0.8432500771920455, "grad_norm": 0.34723143564846, "learning_rate": 2.178147583930985e-06, "loss": 0.0213, "step": 202095 }, { "epoch": 0.8432709399070357, "grad_norm": 0.7113913327996472, "learning_rate": 2.1781206396975134e-06, "loss": 0.0143, "step": 202100 }, { "epoch": 0.843291802622026, "grad_norm": 1.1799459996850095, "learning_rate": 2.178093696463938e-06, "loss": 0.0262, "step": 202105 }, { "epoch": 0.8433126653370163, "grad_norm": 0.6811618219880619, "learning_rate": 2.178066754230197e-06, "loss": 0.0188, "step": 202110 }, { "epoch": 0.8433335280520066, "grad_norm": 0.5286654902701268, "learning_rate": 2.1780398129962286e-06, "loss": 0.0169, "step": 202115 }, { "epoch": 0.8433543907669968, "grad_norm": 0.4431192165132196, "learning_rate": 2.1780128727619708e-06, "loss": 0.017, "step": 202120 }, { "epoch": 0.8433752534819872, "grad_norm": 0.49336635639377985, "learning_rate": 2.1779859335273613e-06, "loss": 0.0215, "step": 202125 }, { "epoch": 0.8433961161969774, "grad_norm": 0.31118826795873084, "learning_rate": 2.1779589952923388e-06, "loss": 0.0191, "step": 202130 }, { "epoch": 0.8434169789119677, "grad_norm": 0.5534389510123355, "learning_rate": 2.177932058056842e-06, "loss": 0.0162, "step": 202135 }, { "epoch": 0.843437841626958, "grad_norm": 0.4727523732938934, "learning_rate": 2.177905121820808e-06, "loss": 0.0225, "step": 202140 }, { "epoch": 0.8434587043419483, "grad_norm": 0.6525314633584914, "learning_rate": 2.1778781865841755e-06, "loss": 0.0246, "step": 202145 }, { "epoch": 0.8434795670569385, "grad_norm": 0.46889748391206054, "learning_rate": 2.1778512523468834e-06, "loss": 0.0183, "step": 202150 }, { "epoch": 0.8435004297719289, "grad_norm": 0.20381020192256225, "learning_rate": 2.1778243191088682e-06, "loss": 0.0171, "step": 202155 }, { "epoch": 0.8435212924869191, "grad_norm": 0.8150825417879308, "learning_rate": 2.17779738687007e-06, "loss": 0.0297, "step": 202160 }, { "epoch": 0.8435421552019093, "grad_norm": 0.5629262792654811, "learning_rate": 2.1777704556304257e-06, "loss": 0.0221, "step": 202165 }, { "epoch": 0.8435630179168996, "grad_norm": 0.4855953816501006, "learning_rate": 2.177743525389874e-06, "loss": 0.0213, "step": 202170 }, { "epoch": 0.8435838806318899, "grad_norm": 0.5689635603151304, "learning_rate": 2.1777165961483527e-06, "loss": 0.0182, "step": 202175 }, { "epoch": 0.8436047433468802, "grad_norm": 0.8855263092438598, "learning_rate": 2.1776896679058016e-06, "loss": 0.0138, "step": 202180 }, { "epoch": 0.8436256060618704, "grad_norm": 0.8914052297851615, "learning_rate": 2.177662740662157e-06, "loss": 0.0262, "step": 202185 }, { "epoch": 0.8436464687768608, "grad_norm": 0.7526317689834695, "learning_rate": 2.1776358144173575e-06, "loss": 0.0263, "step": 202190 }, { "epoch": 0.843667331491851, "grad_norm": 0.8513974301276785, "learning_rate": 2.177608889171342e-06, "loss": 0.025, "step": 202195 }, { "epoch": 0.8436881942068413, "grad_norm": 0.6164449768279789, "learning_rate": 2.1775819649240498e-06, "loss": 0.0227, "step": 202200 }, { "epoch": 0.8437090569218316, "grad_norm": 0.6111189349174099, "learning_rate": 2.1775550416754164e-06, "loss": 0.0167, "step": 202205 }, { "epoch": 0.8437299196368219, "grad_norm": 0.6129704638625201, "learning_rate": 2.1775281194253816e-06, "loss": 0.0172, "step": 202210 }, { "epoch": 0.8437507823518121, "grad_norm": 5.221596270821562, "learning_rate": 2.1775011981738847e-06, "loss": 0.0236, "step": 202215 }, { "epoch": 0.8437716450668025, "grad_norm": 0.8036351184872611, "learning_rate": 2.1774742779208617e-06, "loss": 0.0315, "step": 202220 }, { "epoch": 0.8437925077817927, "grad_norm": 0.8752020072513923, "learning_rate": 2.177447358666253e-06, "loss": 0.0275, "step": 202225 }, { "epoch": 0.843813370496783, "grad_norm": 0.39294189513993294, "learning_rate": 2.1774204404099954e-06, "loss": 0.0252, "step": 202230 }, { "epoch": 0.8438342332117732, "grad_norm": 0.31089395461070857, "learning_rate": 2.1773935231520276e-06, "loss": 0.0163, "step": 202235 }, { "epoch": 0.8438550959267636, "grad_norm": 2.330577523299213, "learning_rate": 2.1773666068922885e-06, "loss": 0.0178, "step": 202240 }, { "epoch": 0.8438759586417538, "grad_norm": 0.8267827254676594, "learning_rate": 2.1773396916307155e-06, "loss": 0.0158, "step": 202245 }, { "epoch": 0.843896821356744, "grad_norm": 0.8923150569380519, "learning_rate": 2.177312777367248e-06, "loss": 0.0266, "step": 202250 }, { "epoch": 0.8439176840717344, "grad_norm": 0.6665633362439091, "learning_rate": 2.177285864101823e-06, "loss": 0.023, "step": 202255 }, { "epoch": 0.8439385467867246, "grad_norm": 0.5344977715758519, "learning_rate": 2.1772589518343797e-06, "loss": 0.0252, "step": 202260 }, { "epoch": 0.8439594095017149, "grad_norm": 0.46554084717100497, "learning_rate": 2.1772320405648565e-06, "loss": 0.0175, "step": 202265 }, { "epoch": 0.8439802722167052, "grad_norm": 0.6637807283292118, "learning_rate": 2.1772051302931914e-06, "loss": 0.0248, "step": 202270 }, { "epoch": 0.8440011349316955, "grad_norm": 0.43762653446080585, "learning_rate": 2.1771782210193223e-06, "loss": 0.0322, "step": 202275 }, { "epoch": 0.8440219976466857, "grad_norm": 0.4938801809125982, "learning_rate": 2.177151312743188e-06, "loss": 0.0156, "step": 202280 }, { "epoch": 0.844042860361676, "grad_norm": 0.6492637632559226, "learning_rate": 2.177124405464728e-06, "loss": 0.0204, "step": 202285 }, { "epoch": 0.8440637230766663, "grad_norm": 1.0026912790761606, "learning_rate": 2.177097499183878e-06, "loss": 0.0244, "step": 202290 }, { "epoch": 0.8440845857916566, "grad_norm": 0.5638837165777899, "learning_rate": 2.177070593900579e-06, "loss": 0.0166, "step": 202295 }, { "epoch": 0.8441054485066468, "grad_norm": 0.9792496844032503, "learning_rate": 2.1770436896147677e-06, "loss": 0.0291, "step": 202300 }, { "epoch": 0.8441263112216372, "grad_norm": 0.5037322893584005, "learning_rate": 2.177016786326383e-06, "loss": 0.0175, "step": 202305 }, { "epoch": 0.8441471739366274, "grad_norm": 0.5685861758129922, "learning_rate": 2.176989884035364e-06, "loss": 0.021, "step": 202310 }, { "epoch": 0.8441680366516177, "grad_norm": 1.2084522253901788, "learning_rate": 2.176962982741647e-06, "loss": 0.0199, "step": 202315 }, { "epoch": 0.844188899366608, "grad_norm": 0.5527041217517524, "learning_rate": 2.1769360824451727e-06, "loss": 0.0155, "step": 202320 }, { "epoch": 0.8442097620815983, "grad_norm": 0.2908760728271618, "learning_rate": 2.1769091831458784e-06, "loss": 0.0258, "step": 202325 }, { "epoch": 0.8442306247965885, "grad_norm": 0.931231085619541, "learning_rate": 2.1768822848437024e-06, "loss": 0.0241, "step": 202330 }, { "epoch": 0.8442514875115789, "grad_norm": 1.011485841315936, "learning_rate": 2.1768553875385833e-06, "loss": 0.02, "step": 202335 }, { "epoch": 0.8442723502265691, "grad_norm": 0.7984902586680174, "learning_rate": 2.1768284912304596e-06, "loss": 0.0172, "step": 202340 }, { "epoch": 0.8442932129415593, "grad_norm": 0.5534416415752165, "learning_rate": 2.1768015959192696e-06, "loss": 0.0172, "step": 202345 }, { "epoch": 0.8443140756565496, "grad_norm": 1.1571618310554792, "learning_rate": 2.176774701604952e-06, "loss": 0.0193, "step": 202350 }, { "epoch": 0.84433493837154, "grad_norm": 0.9946442591948337, "learning_rate": 2.176747808287445e-06, "loss": 0.0258, "step": 202355 }, { "epoch": 0.8443558010865302, "grad_norm": 0.8200176364327822, "learning_rate": 2.1767209159666865e-06, "loss": 0.0239, "step": 202360 }, { "epoch": 0.8443766638015204, "grad_norm": 0.5583213670701681, "learning_rate": 2.1766940246426153e-06, "loss": 0.0193, "step": 202365 }, { "epoch": 0.8443975265165108, "grad_norm": 0.5491741704831744, "learning_rate": 2.1766671343151706e-06, "loss": 0.0207, "step": 202370 }, { "epoch": 0.844418389231501, "grad_norm": 0.3534459463477405, "learning_rate": 2.1766402449842895e-06, "loss": 0.0189, "step": 202375 }, { "epoch": 0.8444392519464913, "grad_norm": 0.78555703491136, "learning_rate": 2.1766133566499117e-06, "loss": 0.0237, "step": 202380 }, { "epoch": 0.8444601146614816, "grad_norm": 0.624207017577065, "learning_rate": 2.176586469311975e-06, "loss": 0.0198, "step": 202385 }, { "epoch": 0.8444809773764719, "grad_norm": 0.546684230783691, "learning_rate": 2.176559582970418e-06, "loss": 0.0202, "step": 202390 }, { "epoch": 0.8445018400914621, "grad_norm": 0.9394758372540606, "learning_rate": 2.1765326976251787e-06, "loss": 0.0206, "step": 202395 }, { "epoch": 0.8445227028064525, "grad_norm": 0.6614008094232429, "learning_rate": 2.176505813276196e-06, "loss": 0.0186, "step": 202400 }, { "epoch": 0.8445435655214427, "grad_norm": 0.63870751866848, "learning_rate": 2.1764789299234086e-06, "loss": 0.0216, "step": 202405 }, { "epoch": 0.844564428236433, "grad_norm": 0.6232854911785084, "learning_rate": 2.176452047566755e-06, "loss": 0.0193, "step": 202410 }, { "epoch": 0.8445852909514232, "grad_norm": 0.8105280912464199, "learning_rate": 2.176425166206173e-06, "loss": 0.0214, "step": 202415 }, { "epoch": 0.8446061536664136, "grad_norm": 0.5112742682091583, "learning_rate": 2.176398285841602e-06, "loss": 0.0228, "step": 202420 }, { "epoch": 0.8446270163814038, "grad_norm": 0.30625575606330757, "learning_rate": 2.1763714064729792e-06, "loss": 0.0203, "step": 202425 }, { "epoch": 0.844647879096394, "grad_norm": 1.07344718037577, "learning_rate": 2.176344528100244e-06, "loss": 0.0189, "step": 202430 }, { "epoch": 0.8446687418113844, "grad_norm": 0.8726702746883532, "learning_rate": 2.1763176507233353e-06, "loss": 0.0189, "step": 202435 }, { "epoch": 0.8446896045263746, "grad_norm": 1.0496604714153697, "learning_rate": 2.1762907743421907e-06, "loss": 0.0213, "step": 202440 }, { "epoch": 0.8447104672413649, "grad_norm": 0.5344729157831684, "learning_rate": 2.17626389895675e-06, "loss": 0.0304, "step": 202445 }, { "epoch": 0.8447313299563552, "grad_norm": 0.4615614362029577, "learning_rate": 2.1762370245669497e-06, "loss": 0.0221, "step": 202450 }, { "epoch": 0.8447521926713455, "grad_norm": 0.595516779437854, "learning_rate": 2.17621015117273e-06, "loss": 0.0235, "step": 202455 }, { "epoch": 0.8447730553863357, "grad_norm": 0.45644113117518237, "learning_rate": 2.1761832787740286e-06, "loss": 0.0206, "step": 202460 }, { "epoch": 0.844793918101326, "grad_norm": 0.37026770008929305, "learning_rate": 2.176156407370785e-06, "loss": 0.0214, "step": 202465 }, { "epoch": 0.8448147808163163, "grad_norm": 0.7004499277715245, "learning_rate": 2.176129536962936e-06, "loss": 0.0216, "step": 202470 }, { "epoch": 0.8448356435313066, "grad_norm": 0.5144475887897061, "learning_rate": 2.176102667550422e-06, "loss": 0.0229, "step": 202475 }, { "epoch": 0.8448565062462968, "grad_norm": 0.5532409500154496, "learning_rate": 2.176075799133181e-06, "loss": 0.0218, "step": 202480 }, { "epoch": 0.8448773689612872, "grad_norm": 0.5119878647215478, "learning_rate": 2.176048931711151e-06, "loss": 0.0179, "step": 202485 }, { "epoch": 0.8448982316762774, "grad_norm": 0.50810657345544, "learning_rate": 2.176022065284271e-06, "loss": 0.0167, "step": 202490 }, { "epoch": 0.8449190943912677, "grad_norm": 0.7971931512306822, "learning_rate": 2.1759951998524794e-06, "loss": 0.0238, "step": 202495 }, { "epoch": 0.844939957106258, "grad_norm": 1.881765684398229, "learning_rate": 2.1759683354157145e-06, "loss": 0.0214, "step": 202500 }, { "epoch": 0.8449608198212483, "grad_norm": 1.161942383132207, "learning_rate": 2.175941471973916e-06, "loss": 0.0224, "step": 202505 }, { "epoch": 0.8449816825362385, "grad_norm": 1.321163297499396, "learning_rate": 2.175914609527021e-06, "loss": 0.0195, "step": 202510 }, { "epoch": 0.8450025452512289, "grad_norm": 0.6260368500629665, "learning_rate": 2.175887748074969e-06, "loss": 0.0212, "step": 202515 }, { "epoch": 0.8450234079662191, "grad_norm": 0.5144243461041166, "learning_rate": 2.1758608876176987e-06, "loss": 0.025, "step": 202520 }, { "epoch": 0.8450442706812094, "grad_norm": 0.604997655478518, "learning_rate": 2.1758340281551482e-06, "loss": 0.0253, "step": 202525 }, { "epoch": 0.8450651333961996, "grad_norm": 0.867645812166543, "learning_rate": 2.1758071696872563e-06, "loss": 0.0189, "step": 202530 }, { "epoch": 0.84508599611119, "grad_norm": 0.8654742021057339, "learning_rate": 2.175780312213962e-06, "loss": 0.0223, "step": 202535 }, { "epoch": 0.8451068588261802, "grad_norm": 0.519782311484893, "learning_rate": 2.175753455735203e-06, "loss": 0.025, "step": 202540 }, { "epoch": 0.8451277215411704, "grad_norm": 0.9116617651418758, "learning_rate": 2.1757266002509184e-06, "loss": 0.0217, "step": 202545 }, { "epoch": 0.8451485842561608, "grad_norm": 0.33518539203116043, "learning_rate": 2.1756997457610473e-06, "loss": 0.022, "step": 202550 }, { "epoch": 0.845169446971151, "grad_norm": 0.6360457739898385, "learning_rate": 2.175672892265527e-06, "loss": 0.0208, "step": 202555 }, { "epoch": 0.8451903096861413, "grad_norm": 0.8448341834001515, "learning_rate": 2.175646039764298e-06, "loss": 0.0261, "step": 202560 }, { "epoch": 0.8452111724011316, "grad_norm": 0.7049684666713415, "learning_rate": 2.175619188257298e-06, "loss": 0.0256, "step": 202565 }, { "epoch": 0.8452320351161219, "grad_norm": 0.8551352069203831, "learning_rate": 2.1755923377444653e-06, "loss": 0.0305, "step": 202570 }, { "epoch": 0.8452528978311121, "grad_norm": 0.487931636861823, "learning_rate": 2.175565488225739e-06, "loss": 0.0188, "step": 202575 }, { "epoch": 0.8452737605461025, "grad_norm": 0.9248209940561254, "learning_rate": 2.1755386397010577e-06, "loss": 0.0252, "step": 202580 }, { "epoch": 0.8452946232610927, "grad_norm": 0.8676888330821518, "learning_rate": 2.1755117921703593e-06, "loss": 0.0215, "step": 202585 }, { "epoch": 0.845315485976083, "grad_norm": 0.45053279404139324, "learning_rate": 2.1754849456335845e-06, "loss": 0.0201, "step": 202590 }, { "epoch": 0.8453363486910732, "grad_norm": 0.5754297905120358, "learning_rate": 2.17545810009067e-06, "loss": 0.0161, "step": 202595 }, { "epoch": 0.8453572114060636, "grad_norm": 0.21427983287453753, "learning_rate": 2.175431255541555e-06, "loss": 0.0173, "step": 202600 }, { "epoch": 0.8453780741210538, "grad_norm": 0.585068562215263, "learning_rate": 2.175404411986179e-06, "loss": 0.0191, "step": 202605 }, { "epoch": 0.8453989368360441, "grad_norm": 1.4439427302324412, "learning_rate": 2.1753775694244793e-06, "loss": 0.0195, "step": 202610 }, { "epoch": 0.8454197995510344, "grad_norm": 0.8265341173747487, "learning_rate": 2.1753507278563953e-06, "loss": 0.0278, "step": 202615 }, { "epoch": 0.8454406622660247, "grad_norm": 0.2983096853405946, "learning_rate": 2.175323887281866e-06, "loss": 0.0241, "step": 202620 }, { "epoch": 0.8454615249810149, "grad_norm": 1.0220818440738204, "learning_rate": 2.1752970477008297e-06, "loss": 0.0183, "step": 202625 }, { "epoch": 0.8454823876960053, "grad_norm": 0.7377096292071855, "learning_rate": 2.1752702091132254e-06, "loss": 0.0174, "step": 202630 }, { "epoch": 0.8455032504109955, "grad_norm": 0.730817371835216, "learning_rate": 2.1752433715189913e-06, "loss": 0.0186, "step": 202635 }, { "epoch": 0.8455241131259857, "grad_norm": 1.603332583022195, "learning_rate": 2.175216534918067e-06, "loss": 0.0275, "step": 202640 }, { "epoch": 0.845544975840976, "grad_norm": 0.6384996742154032, "learning_rate": 2.17518969931039e-06, "loss": 0.0133, "step": 202645 }, { "epoch": 0.8455658385559663, "grad_norm": 0.6682909849773461, "learning_rate": 2.1751628646959006e-06, "loss": 0.023, "step": 202650 }, { "epoch": 0.8455867012709566, "grad_norm": 0.6588989683077096, "learning_rate": 2.1751360310745356e-06, "loss": 0.0206, "step": 202655 }, { "epoch": 0.8456075639859468, "grad_norm": 0.5489533022151543, "learning_rate": 2.175109198446236e-06, "loss": 0.0196, "step": 202660 }, { "epoch": 0.8456284267009372, "grad_norm": 0.35298500346506817, "learning_rate": 2.1750823668109387e-06, "loss": 0.0142, "step": 202665 }, { "epoch": 0.8456492894159274, "grad_norm": 0.4355462526005688, "learning_rate": 2.175055536168583e-06, "loss": 0.019, "step": 202670 }, { "epoch": 0.8456701521309177, "grad_norm": 1.008188644666028, "learning_rate": 2.1750287065191085e-06, "loss": 0.021, "step": 202675 }, { "epoch": 0.845691014845908, "grad_norm": 0.4533110498345439, "learning_rate": 2.1750018778624526e-06, "loss": 0.025, "step": 202680 }, { "epoch": 0.8457118775608983, "grad_norm": 0.20641378980422145, "learning_rate": 2.1749750501985545e-06, "loss": 0.0253, "step": 202685 }, { "epoch": 0.8457327402758885, "grad_norm": 0.4719210601742579, "learning_rate": 2.174948223527354e-06, "loss": 0.0203, "step": 202690 }, { "epoch": 0.8457536029908789, "grad_norm": 0.8550202952680629, "learning_rate": 2.1749213978487886e-06, "loss": 0.0248, "step": 202695 }, { "epoch": 0.8457744657058691, "grad_norm": 0.9370050882810859, "learning_rate": 2.174894573162798e-06, "loss": 0.023, "step": 202700 }, { "epoch": 0.8457953284208594, "grad_norm": 0.5073723362301253, "learning_rate": 2.1748677494693197e-06, "loss": 0.0263, "step": 202705 }, { "epoch": 0.8458161911358496, "grad_norm": 0.557418927166343, "learning_rate": 2.1748409267682935e-06, "loss": 0.0165, "step": 202710 }, { "epoch": 0.84583705385084, "grad_norm": 0.7230109580550795, "learning_rate": 2.1748141050596584e-06, "loss": 0.0263, "step": 202715 }, { "epoch": 0.8458579165658302, "grad_norm": 0.6772986687898545, "learning_rate": 2.1747872843433532e-06, "loss": 0.0196, "step": 202720 }, { "epoch": 0.8458787792808204, "grad_norm": 0.527219496518929, "learning_rate": 2.174760464619316e-06, "loss": 0.019, "step": 202725 }, { "epoch": 0.8458996419958108, "grad_norm": 1.0289602429256328, "learning_rate": 2.174733645887486e-06, "loss": 0.0204, "step": 202730 }, { "epoch": 0.845920504710801, "grad_norm": 0.6632663130374622, "learning_rate": 2.174706828147802e-06, "loss": 0.0175, "step": 202735 }, { "epoch": 0.8459413674257913, "grad_norm": 0.3214645511438079, "learning_rate": 2.174680011400203e-06, "loss": 0.0186, "step": 202740 }, { "epoch": 0.8459622301407816, "grad_norm": 0.758647526478774, "learning_rate": 2.1746531956446275e-06, "loss": 0.0231, "step": 202745 }, { "epoch": 0.8459830928557719, "grad_norm": 0.5332396817134776, "learning_rate": 2.174626380881014e-06, "loss": 0.0196, "step": 202750 }, { "epoch": 0.8460039555707621, "grad_norm": 0.6899718765018619, "learning_rate": 2.1745995671093025e-06, "loss": 0.0171, "step": 202755 }, { "epoch": 0.8460248182857525, "grad_norm": 0.7488068177403036, "learning_rate": 2.174572754329431e-06, "loss": 0.0218, "step": 202760 }, { "epoch": 0.8460456810007427, "grad_norm": 1.133441929021117, "learning_rate": 2.174545942541339e-06, "loss": 0.018, "step": 202765 }, { "epoch": 0.846066543715733, "grad_norm": 1.2208671957374957, "learning_rate": 2.174519131744964e-06, "loss": 0.0206, "step": 202770 }, { "epoch": 0.8460874064307232, "grad_norm": 0.6342105190511269, "learning_rate": 2.1744923219402465e-06, "loss": 0.019, "step": 202775 }, { "epoch": 0.8461082691457136, "grad_norm": 0.5061400421896249, "learning_rate": 2.1744655131271247e-06, "loss": 0.0175, "step": 202780 }, { "epoch": 0.8461291318607038, "grad_norm": 0.8732840193302611, "learning_rate": 2.174438705305537e-06, "loss": 0.0185, "step": 202785 }, { "epoch": 0.8461499945756941, "grad_norm": 1.409358468469059, "learning_rate": 2.174411898475423e-06, "loss": 0.0195, "step": 202790 }, { "epoch": 0.8461708572906844, "grad_norm": 0.8576425529439293, "learning_rate": 2.174385092636721e-06, "loss": 0.0193, "step": 202795 }, { "epoch": 0.8461917200056747, "grad_norm": 0.6164646882740403, "learning_rate": 2.17435828778937e-06, "loss": 0.0231, "step": 202800 }, { "epoch": 0.8462125827206649, "grad_norm": 1.06183049736422, "learning_rate": 2.17433148393331e-06, "loss": 0.0216, "step": 202805 }, { "epoch": 0.8462334454356553, "grad_norm": 1.0534400580936212, "learning_rate": 2.1743046810684784e-06, "loss": 0.0197, "step": 202810 }, { "epoch": 0.8462543081506455, "grad_norm": 0.7571557636441439, "learning_rate": 2.1742778791948146e-06, "loss": 0.0207, "step": 202815 }, { "epoch": 0.8462751708656358, "grad_norm": 0.5376816049945591, "learning_rate": 2.1742510783122577e-06, "loss": 0.0175, "step": 202820 }, { "epoch": 0.846296033580626, "grad_norm": 0.7274813488482664, "learning_rate": 2.1742242784207467e-06, "loss": 0.0242, "step": 202825 }, { "epoch": 0.8463168962956163, "grad_norm": 0.7045766260565384, "learning_rate": 2.17419747952022e-06, "loss": 0.0286, "step": 202830 }, { "epoch": 0.8463377590106066, "grad_norm": 0.672066890654754, "learning_rate": 2.1741706816106177e-06, "loss": 0.0195, "step": 202835 }, { "epoch": 0.8463586217255968, "grad_norm": 0.3680346969553426, "learning_rate": 2.174143884691877e-06, "loss": 0.0177, "step": 202840 }, { "epoch": 0.8463794844405872, "grad_norm": 0.4535036314050798, "learning_rate": 2.174117088763938e-06, "loss": 0.0197, "step": 202845 }, { "epoch": 0.8464003471555774, "grad_norm": 0.5367629501764783, "learning_rate": 2.1740902938267392e-06, "loss": 0.0227, "step": 202850 }, { "epoch": 0.8464212098705677, "grad_norm": 0.8354166447557203, "learning_rate": 2.17406349988022e-06, "loss": 0.0223, "step": 202855 }, { "epoch": 0.846442072585558, "grad_norm": 1.1404368461274679, "learning_rate": 2.1740367069243194e-06, "loss": 0.03, "step": 202860 }, { "epoch": 0.8464629353005483, "grad_norm": 0.8189583227357322, "learning_rate": 2.174009914958976e-06, "loss": 0.0171, "step": 202865 }, { "epoch": 0.8464837980155385, "grad_norm": 0.4941365515569154, "learning_rate": 2.1739831239841283e-06, "loss": 0.0207, "step": 202870 }, { "epoch": 0.8465046607305289, "grad_norm": 0.805696571976444, "learning_rate": 2.173956333999716e-06, "loss": 0.0281, "step": 202875 }, { "epoch": 0.8465255234455191, "grad_norm": 0.48419898685138213, "learning_rate": 2.173929545005678e-06, "loss": 0.0182, "step": 202880 }, { "epoch": 0.8465463861605094, "grad_norm": 0.5597529912097177, "learning_rate": 2.1739027570019535e-06, "loss": 0.0162, "step": 202885 }, { "epoch": 0.8465672488754996, "grad_norm": 1.1796365762515943, "learning_rate": 2.1738759699884803e-06, "loss": 0.0259, "step": 202890 }, { "epoch": 0.84658811159049, "grad_norm": 0.6662646928532969, "learning_rate": 2.1738491839651983e-06, "loss": 0.0265, "step": 202895 }, { "epoch": 0.8466089743054802, "grad_norm": 0.8733284488634385, "learning_rate": 2.173822398932047e-06, "loss": 0.0264, "step": 202900 }, { "epoch": 0.8466298370204705, "grad_norm": 0.507127416980728, "learning_rate": 2.173795614888965e-06, "loss": 0.0253, "step": 202905 }, { "epoch": 0.8466506997354608, "grad_norm": 0.892993187723106, "learning_rate": 2.1737688318358906e-06, "loss": 0.0138, "step": 202910 }, { "epoch": 0.846671562450451, "grad_norm": 0.47398628949039967, "learning_rate": 2.1737420497727634e-06, "loss": 0.0172, "step": 202915 }, { "epoch": 0.8466924251654413, "grad_norm": 1.063891616682999, "learning_rate": 2.173715268699523e-06, "loss": 0.0196, "step": 202920 }, { "epoch": 0.8467132878804317, "grad_norm": 1.0689821073282595, "learning_rate": 2.1736884886161066e-06, "loss": 0.026, "step": 202925 }, { "epoch": 0.8467341505954219, "grad_norm": 0.4222437106750009, "learning_rate": 2.1736617095224554e-06, "loss": 0.018, "step": 202930 }, { "epoch": 0.8467550133104121, "grad_norm": 0.5045012057041435, "learning_rate": 2.1736349314185067e-06, "loss": 0.0298, "step": 202935 }, { "epoch": 0.8467758760254025, "grad_norm": 0.472889035168859, "learning_rate": 2.173608154304201e-06, "loss": 0.0236, "step": 202940 }, { "epoch": 0.8467967387403927, "grad_norm": 0.44161591456181726, "learning_rate": 2.1735813781794764e-06, "loss": 0.018, "step": 202945 }, { "epoch": 0.846817601455383, "grad_norm": 1.2286312624878355, "learning_rate": 2.173554603044272e-06, "loss": 0.0268, "step": 202950 }, { "epoch": 0.8468384641703732, "grad_norm": 0.45508378448078546, "learning_rate": 2.1735278288985272e-06, "loss": 0.0206, "step": 202955 }, { "epoch": 0.8468593268853636, "grad_norm": 0.585312963143429, "learning_rate": 2.1735010557421806e-06, "loss": 0.0159, "step": 202960 }, { "epoch": 0.8468801896003538, "grad_norm": 0.3920212073991818, "learning_rate": 2.173474283575172e-06, "loss": 0.0189, "step": 202965 }, { "epoch": 0.8469010523153441, "grad_norm": 0.961147715682235, "learning_rate": 2.1734475123974395e-06, "loss": 0.0192, "step": 202970 }, { "epoch": 0.8469219150303344, "grad_norm": 0.6568651423782047, "learning_rate": 2.173420742208923e-06, "loss": 0.0159, "step": 202975 }, { "epoch": 0.8469427777453247, "grad_norm": 0.4465709047834775, "learning_rate": 2.1733939730095616e-06, "loss": 0.0279, "step": 202980 }, { "epoch": 0.8469636404603149, "grad_norm": 1.050482237380802, "learning_rate": 2.173367204799294e-06, "loss": 0.019, "step": 202985 }, { "epoch": 0.8469845031753053, "grad_norm": 1.0780494120238369, "learning_rate": 2.1733404375780594e-06, "loss": 0.0176, "step": 202990 }, { "epoch": 0.8470053658902955, "grad_norm": 0.8640048568714155, "learning_rate": 2.1733136713457963e-06, "loss": 0.0181, "step": 202995 }, { "epoch": 0.8470262286052858, "grad_norm": 0.47064268912683, "learning_rate": 2.1732869061024446e-06, "loss": 0.0173, "step": 203000 }, { "epoch": 0.847047091320276, "grad_norm": 0.33691488285107357, "learning_rate": 2.1732601418479437e-06, "loss": 0.0229, "step": 203005 }, { "epoch": 0.8470679540352664, "grad_norm": 0.6003086303578028, "learning_rate": 2.1732333785822313e-06, "loss": 0.0194, "step": 203010 }, { "epoch": 0.8470888167502566, "grad_norm": 0.43648146728733495, "learning_rate": 2.1732066163052483e-06, "loss": 0.0214, "step": 203015 }, { "epoch": 0.8471096794652468, "grad_norm": 0.3896485408607576, "learning_rate": 2.1731798550169327e-06, "loss": 0.0146, "step": 203020 }, { "epoch": 0.8471305421802372, "grad_norm": 0.5854841031119988, "learning_rate": 2.1731530947172236e-06, "loss": 0.0195, "step": 203025 }, { "epoch": 0.8471514048952274, "grad_norm": 0.7856713208969773, "learning_rate": 2.1731263354060603e-06, "loss": 0.0228, "step": 203030 }, { "epoch": 0.8471722676102177, "grad_norm": 0.4447155357664923, "learning_rate": 2.1730995770833824e-06, "loss": 0.0168, "step": 203035 }, { "epoch": 0.847193130325208, "grad_norm": 0.5869862567428611, "learning_rate": 2.1730728197491284e-06, "loss": 0.0279, "step": 203040 }, { "epoch": 0.8472139930401983, "grad_norm": 1.0994515857540144, "learning_rate": 2.1730460634032378e-06, "loss": 0.0244, "step": 203045 }, { "epoch": 0.8472348557551885, "grad_norm": 0.7727853145515614, "learning_rate": 2.1730193080456495e-06, "loss": 0.0199, "step": 203050 }, { "epoch": 0.8472557184701789, "grad_norm": 0.5659276720842839, "learning_rate": 2.172992553676303e-06, "loss": 0.0262, "step": 203055 }, { "epoch": 0.8472765811851691, "grad_norm": 1.0682212701609184, "learning_rate": 2.1729658002951374e-06, "loss": 0.0245, "step": 203060 }, { "epoch": 0.8472974439001594, "grad_norm": 0.5489000872491212, "learning_rate": 2.1729390479020916e-06, "loss": 0.0219, "step": 203065 }, { "epoch": 0.8473183066151496, "grad_norm": 0.59866068928236, "learning_rate": 2.172912296497105e-06, "loss": 0.0218, "step": 203070 }, { "epoch": 0.84733916933014, "grad_norm": 0.6755744326389717, "learning_rate": 2.172885546080117e-06, "loss": 0.0217, "step": 203075 }, { "epoch": 0.8473600320451302, "grad_norm": 0.8589550992982069, "learning_rate": 2.172858796651066e-06, "loss": 0.0186, "step": 203080 }, { "epoch": 0.8473808947601205, "grad_norm": 1.0995902452977453, "learning_rate": 2.172832048209892e-06, "loss": 0.0228, "step": 203085 }, { "epoch": 0.8474017574751108, "grad_norm": 0.7365247500542368, "learning_rate": 2.1728053007565338e-06, "loss": 0.0192, "step": 203090 }, { "epoch": 0.8474226201901011, "grad_norm": 0.516499762180014, "learning_rate": 2.172778554290931e-06, "loss": 0.0177, "step": 203095 }, { "epoch": 0.8474434829050913, "grad_norm": 0.4377419324459053, "learning_rate": 2.1727518088130223e-06, "loss": 0.0197, "step": 203100 }, { "epoch": 0.8474643456200817, "grad_norm": 0.8471020137925548, "learning_rate": 2.1727250643227473e-06, "loss": 0.0226, "step": 203105 }, { "epoch": 0.8474852083350719, "grad_norm": 0.37642663096613377, "learning_rate": 2.1726983208200446e-06, "loss": 0.0193, "step": 203110 }, { "epoch": 0.8475060710500621, "grad_norm": 0.7633823083543465, "learning_rate": 2.172671578304854e-06, "loss": 0.0231, "step": 203115 }, { "epoch": 0.8475269337650525, "grad_norm": 0.5507435333567708, "learning_rate": 2.172644836777115e-06, "loss": 0.0189, "step": 203120 }, { "epoch": 0.8475477964800427, "grad_norm": 0.6124648233461194, "learning_rate": 2.172618096236766e-06, "loss": 0.0188, "step": 203125 }, { "epoch": 0.847568659195033, "grad_norm": 2.1205155802151956, "learning_rate": 2.1725913566837467e-06, "loss": 0.0346, "step": 203130 }, { "epoch": 0.8475895219100232, "grad_norm": 0.45834368649400564, "learning_rate": 2.1725646181179967e-06, "loss": 0.0202, "step": 203135 }, { "epoch": 0.8476103846250136, "grad_norm": 0.4039776777836616, "learning_rate": 2.1725378805394543e-06, "loss": 0.0251, "step": 203140 }, { "epoch": 0.8476312473400038, "grad_norm": 0.5579120845304716, "learning_rate": 2.1725111439480597e-06, "loss": 0.0264, "step": 203145 }, { "epoch": 0.8476521100549941, "grad_norm": 0.6852758710976645, "learning_rate": 2.1724844083437515e-06, "loss": 0.0231, "step": 203150 }, { "epoch": 0.8476729727699844, "grad_norm": 0.7748223539943997, "learning_rate": 2.1724576737264693e-06, "loss": 0.0233, "step": 203155 }, { "epoch": 0.8476938354849747, "grad_norm": 0.747796628215637, "learning_rate": 2.172430940096153e-06, "loss": 0.0222, "step": 203160 }, { "epoch": 0.8477146981999649, "grad_norm": 0.9985563244022242, "learning_rate": 2.17240420745274e-06, "loss": 0.022, "step": 203165 }, { "epoch": 0.8477355609149553, "grad_norm": 1.099985831509555, "learning_rate": 2.172377475796171e-06, "loss": 0.0225, "step": 203170 }, { "epoch": 0.8477564236299455, "grad_norm": 0.5663328953971362, "learning_rate": 2.1723507451263855e-06, "loss": 0.0185, "step": 203175 }, { "epoch": 0.8477772863449358, "grad_norm": 0.6388440920195747, "learning_rate": 2.172324015443322e-06, "loss": 0.0227, "step": 203180 }, { "epoch": 0.847798149059926, "grad_norm": 0.5246336953023815, "learning_rate": 2.1722972867469205e-06, "loss": 0.0217, "step": 203185 }, { "epoch": 0.8478190117749164, "grad_norm": 0.8180574894563032, "learning_rate": 2.17227055903712e-06, "loss": 0.0219, "step": 203190 }, { "epoch": 0.8478398744899066, "grad_norm": 0.4268587243832929, "learning_rate": 2.172243832313859e-06, "loss": 0.0257, "step": 203195 }, { "epoch": 0.8478607372048969, "grad_norm": 0.6832577450290531, "learning_rate": 2.172217106577078e-06, "loss": 0.0206, "step": 203200 }, { "epoch": 0.8478815999198872, "grad_norm": 0.4469664855186545, "learning_rate": 2.172190381826716e-06, "loss": 0.0215, "step": 203205 }, { "epoch": 0.8479024626348775, "grad_norm": 1.0256666124916145, "learning_rate": 2.1721636580627124e-06, "loss": 0.0273, "step": 203210 }, { "epoch": 0.8479233253498677, "grad_norm": 0.2557500682013277, "learning_rate": 2.1721369352850056e-06, "loss": 0.0177, "step": 203215 }, { "epoch": 0.847944188064858, "grad_norm": 0.499216704787931, "learning_rate": 2.172110213493536e-06, "loss": 0.0339, "step": 203220 }, { "epoch": 0.8479650507798483, "grad_norm": 0.5800579520529132, "learning_rate": 2.172083492688243e-06, "loss": 0.0184, "step": 203225 }, { "epoch": 0.8479859134948385, "grad_norm": 0.28407488460178654, "learning_rate": 2.172056772869065e-06, "loss": 0.0184, "step": 203230 }, { "epoch": 0.8480067762098289, "grad_norm": 0.48040179640382874, "learning_rate": 2.172030054035942e-06, "loss": 0.0172, "step": 203235 }, { "epoch": 0.8480276389248191, "grad_norm": 0.6954830939056621, "learning_rate": 2.172003336188813e-06, "loss": 0.0179, "step": 203240 }, { "epoch": 0.8480485016398094, "grad_norm": 0.2606907472362847, "learning_rate": 2.171976619327618e-06, "loss": 0.0177, "step": 203245 }, { "epoch": 0.8480693643547996, "grad_norm": 0.6349471463973161, "learning_rate": 2.171949903452296e-06, "loss": 0.0317, "step": 203250 }, { "epoch": 0.84809022706979, "grad_norm": 2.0754260416467107, "learning_rate": 2.171923188562786e-06, "loss": 0.0303, "step": 203255 }, { "epoch": 0.8481110897847802, "grad_norm": 0.6035822711097728, "learning_rate": 2.1718964746590277e-06, "loss": 0.0194, "step": 203260 }, { "epoch": 0.8481319524997705, "grad_norm": 0.7810204549447853, "learning_rate": 2.1718697617409605e-06, "loss": 0.021, "step": 203265 }, { "epoch": 0.8481528152147608, "grad_norm": 0.4891011483542915, "learning_rate": 2.1718430498085238e-06, "loss": 0.0204, "step": 203270 }, { "epoch": 0.8481736779297511, "grad_norm": 0.5606311832774765, "learning_rate": 2.171816338861657e-06, "loss": 0.0183, "step": 203275 }, { "epoch": 0.8481945406447413, "grad_norm": 0.5771888958996135, "learning_rate": 2.1717896289002992e-06, "loss": 0.0263, "step": 203280 }, { "epoch": 0.8482154033597317, "grad_norm": 0.39646011027467215, "learning_rate": 2.1717629199243903e-06, "loss": 0.0165, "step": 203285 }, { "epoch": 0.8482362660747219, "grad_norm": 0.6579363627079844, "learning_rate": 2.1717362119338696e-06, "loss": 0.0183, "step": 203290 }, { "epoch": 0.8482571287897122, "grad_norm": 0.7392069757796733, "learning_rate": 2.171709504928676e-06, "loss": 0.0166, "step": 203295 }, { "epoch": 0.8482779915047025, "grad_norm": 0.6331621045872439, "learning_rate": 2.17168279890875e-06, "loss": 0.017, "step": 203300 }, { "epoch": 0.8482988542196928, "grad_norm": 0.4343835573810115, "learning_rate": 2.1716560938740293e-06, "loss": 0.0167, "step": 203305 }, { "epoch": 0.848319716934683, "grad_norm": 0.6079484519656331, "learning_rate": 2.1716293898244547e-06, "loss": 0.0189, "step": 203310 }, { "epoch": 0.8483405796496732, "grad_norm": 0.5550794503863226, "learning_rate": 2.171602686759965e-06, "loss": 0.025, "step": 203315 }, { "epoch": 0.8483614423646636, "grad_norm": 1.1923185970214036, "learning_rate": 2.1715759846805006e-06, "loss": 0.0293, "step": 203320 }, { "epoch": 0.8483823050796538, "grad_norm": 0.9112349994562375, "learning_rate": 2.1715492835859994e-06, "loss": 0.0208, "step": 203325 }, { "epoch": 0.8484031677946441, "grad_norm": 0.6288401809014854, "learning_rate": 2.1715225834764024e-06, "loss": 0.0176, "step": 203330 }, { "epoch": 0.8484240305096344, "grad_norm": 0.8411939098728929, "learning_rate": 2.171495884351648e-06, "loss": 0.023, "step": 203335 }, { "epoch": 0.8484448932246247, "grad_norm": 1.2214733928407504, "learning_rate": 2.171469186211676e-06, "loss": 0.0201, "step": 203340 }, { "epoch": 0.8484657559396149, "grad_norm": 0.41275680283965716, "learning_rate": 2.1714424890564263e-06, "loss": 0.0186, "step": 203345 }, { "epoch": 0.8484866186546053, "grad_norm": 1.0925111764502127, "learning_rate": 2.171415792885837e-06, "loss": 0.0253, "step": 203350 }, { "epoch": 0.8485074813695955, "grad_norm": 0.4604308608504284, "learning_rate": 2.1713890976998493e-06, "loss": 0.0188, "step": 203355 }, { "epoch": 0.8485283440845858, "grad_norm": 0.6225474042106931, "learning_rate": 2.1713624034984016e-06, "loss": 0.0179, "step": 203360 }, { "epoch": 0.848549206799576, "grad_norm": 0.26147088640174604, "learning_rate": 2.1713357102814335e-06, "loss": 0.0238, "step": 203365 }, { "epoch": 0.8485700695145664, "grad_norm": 0.730526652877181, "learning_rate": 2.1713090180488846e-06, "loss": 0.0229, "step": 203370 }, { "epoch": 0.8485909322295566, "grad_norm": 0.13292228710488363, "learning_rate": 2.171282326800695e-06, "loss": 0.0126, "step": 203375 }, { "epoch": 0.8486117949445469, "grad_norm": 0.7521127188267414, "learning_rate": 2.1712556365368028e-06, "loss": 0.0268, "step": 203380 }, { "epoch": 0.8486326576595372, "grad_norm": 1.026214001540615, "learning_rate": 2.171228947257149e-06, "loss": 0.0315, "step": 203385 }, { "epoch": 0.8486535203745275, "grad_norm": 0.8646692352907817, "learning_rate": 2.1712022589616725e-06, "loss": 0.0198, "step": 203390 }, { "epoch": 0.8486743830895177, "grad_norm": 1.309005961686872, "learning_rate": 2.1711755716503126e-06, "loss": 0.0215, "step": 203395 }, { "epoch": 0.8486952458045081, "grad_norm": 0.3688756017239304, "learning_rate": 2.171148885323009e-06, "loss": 0.0147, "step": 203400 }, { "epoch": 0.8487161085194983, "grad_norm": 0.9295421427045605, "learning_rate": 2.1711221999797008e-06, "loss": 0.0198, "step": 203405 }, { "epoch": 0.8487369712344885, "grad_norm": 0.6270342893752773, "learning_rate": 2.1710955156203286e-06, "loss": 0.021, "step": 203410 }, { "epoch": 0.8487578339494789, "grad_norm": 0.9780751310691197, "learning_rate": 2.1710688322448305e-06, "loss": 0.0216, "step": 203415 }, { "epoch": 0.8487786966644691, "grad_norm": 0.24857131541152863, "learning_rate": 2.1710421498531476e-06, "loss": 0.0186, "step": 203420 }, { "epoch": 0.8487995593794594, "grad_norm": 0.6474224408564322, "learning_rate": 2.171015468445218e-06, "loss": 0.0242, "step": 203425 }, { "epoch": 0.8488204220944496, "grad_norm": 0.5456982429173374, "learning_rate": 2.1709887880209827e-06, "loss": 0.0151, "step": 203430 }, { "epoch": 0.84884128480944, "grad_norm": 0.9707236131199795, "learning_rate": 2.17096210858038e-06, "loss": 0.021, "step": 203435 }, { "epoch": 0.8488621475244302, "grad_norm": 1.4985161391447033, "learning_rate": 2.1709354301233503e-06, "loss": 0.0177, "step": 203440 }, { "epoch": 0.8488830102394205, "grad_norm": 0.5487878135743753, "learning_rate": 2.170908752649832e-06, "loss": 0.027, "step": 203445 }, { "epoch": 0.8489038729544108, "grad_norm": 0.6121279088280754, "learning_rate": 2.170882076159766e-06, "loss": 0.0177, "step": 203450 }, { "epoch": 0.8489247356694011, "grad_norm": 0.8802251045556668, "learning_rate": 2.1708554006530915e-06, "loss": 0.0215, "step": 203455 }, { "epoch": 0.8489455983843913, "grad_norm": 0.7376876227212699, "learning_rate": 2.170828726129748e-06, "loss": 0.0192, "step": 203460 }, { "epoch": 0.8489664610993817, "grad_norm": 0.5250472708559947, "learning_rate": 2.1708020525896745e-06, "loss": 0.0184, "step": 203465 }, { "epoch": 0.8489873238143719, "grad_norm": 0.7679655839980374, "learning_rate": 2.1707753800328116e-06, "loss": 0.027, "step": 203470 }, { "epoch": 0.8490081865293622, "grad_norm": 1.0174566620681647, "learning_rate": 2.170748708459098e-06, "loss": 0.0211, "step": 203475 }, { "epoch": 0.8490290492443525, "grad_norm": 0.9048128517115481, "learning_rate": 2.170722037868474e-06, "loss": 0.0192, "step": 203480 }, { "epoch": 0.8490499119593428, "grad_norm": 0.5116912326905697, "learning_rate": 2.1706953682608785e-06, "loss": 0.0206, "step": 203485 }, { "epoch": 0.849070774674333, "grad_norm": 0.9141905502557112, "learning_rate": 2.170668699636252e-06, "loss": 0.0262, "step": 203490 }, { "epoch": 0.8490916373893233, "grad_norm": 0.6060003969509697, "learning_rate": 2.1706420319945337e-06, "loss": 0.017, "step": 203495 }, { "epoch": 0.8491125001043136, "grad_norm": 1.6224612904248563, "learning_rate": 2.1706153653356637e-06, "loss": 0.0179, "step": 203500 }, { "epoch": 0.8491333628193038, "grad_norm": 0.8340224150583382, "learning_rate": 2.17058869965958e-06, "loss": 0.0232, "step": 203505 }, { "epoch": 0.8491542255342941, "grad_norm": 0.5629329026685373, "learning_rate": 2.1705620349662238e-06, "loss": 0.0136, "step": 203510 }, { "epoch": 0.8491750882492844, "grad_norm": 0.360399756852721, "learning_rate": 2.1705353712555345e-06, "loss": 0.0165, "step": 203515 }, { "epoch": 0.8491959509642747, "grad_norm": 0.7494678615351816, "learning_rate": 2.1705087085274513e-06, "loss": 0.0251, "step": 203520 }, { "epoch": 0.8492168136792649, "grad_norm": 0.7511541163052784, "learning_rate": 2.170482046781914e-06, "loss": 0.0276, "step": 203525 }, { "epoch": 0.8492376763942553, "grad_norm": 0.4679582310658789, "learning_rate": 2.1704553860188625e-06, "loss": 0.0188, "step": 203530 }, { "epoch": 0.8492585391092455, "grad_norm": 0.6598303676888831, "learning_rate": 2.1704287262382363e-06, "loss": 0.0184, "step": 203535 }, { "epoch": 0.8492794018242358, "grad_norm": 0.47849739652323897, "learning_rate": 2.170402067439975e-06, "loss": 0.0213, "step": 203540 }, { "epoch": 0.849300264539226, "grad_norm": 0.33189811518225526, "learning_rate": 2.170375409624018e-06, "loss": 0.0233, "step": 203545 }, { "epoch": 0.8493211272542164, "grad_norm": 0.4959041477340101, "learning_rate": 2.170348752790306e-06, "loss": 0.0241, "step": 203550 }, { "epoch": 0.8493419899692066, "grad_norm": 0.7221457208531589, "learning_rate": 2.170322096938778e-06, "loss": 0.0291, "step": 203555 }, { "epoch": 0.8493628526841969, "grad_norm": 0.5577059640341646, "learning_rate": 2.1702954420693733e-06, "loss": 0.0196, "step": 203560 }, { "epoch": 0.8493837153991872, "grad_norm": 0.9036688290494554, "learning_rate": 2.170268788182032e-06, "loss": 0.0273, "step": 203565 }, { "epoch": 0.8494045781141775, "grad_norm": 0.6416378054695361, "learning_rate": 2.1702421352766935e-06, "loss": 0.0223, "step": 203570 }, { "epoch": 0.8494254408291677, "grad_norm": 1.3151989211157042, "learning_rate": 2.1702154833532982e-06, "loss": 0.0223, "step": 203575 }, { "epoch": 0.8494463035441581, "grad_norm": 0.7655364083387836, "learning_rate": 2.170188832411785e-06, "loss": 0.0174, "step": 203580 }, { "epoch": 0.8494671662591483, "grad_norm": 0.40052684968944546, "learning_rate": 2.1701621824520948e-06, "loss": 0.0185, "step": 203585 }, { "epoch": 0.8494880289741386, "grad_norm": 0.5352730771823206, "learning_rate": 2.170135533474166e-06, "loss": 0.0172, "step": 203590 }, { "epoch": 0.8495088916891289, "grad_norm": 0.7719194869375398, "learning_rate": 2.170108885477939e-06, "loss": 0.0171, "step": 203595 }, { "epoch": 0.8495297544041192, "grad_norm": 0.6611516388480171, "learning_rate": 2.1700822384633524e-06, "loss": 0.0161, "step": 203600 }, { "epoch": 0.8495506171191094, "grad_norm": 0.3098517120569372, "learning_rate": 2.170055592430348e-06, "loss": 0.017, "step": 203605 }, { "epoch": 0.8495714798340996, "grad_norm": 0.644407437441817, "learning_rate": 2.1700289473788635e-06, "loss": 0.0201, "step": 203610 }, { "epoch": 0.84959234254909, "grad_norm": 0.246931167848353, "learning_rate": 2.1700023033088405e-06, "loss": 0.016, "step": 203615 }, { "epoch": 0.8496132052640802, "grad_norm": 0.40901023673190007, "learning_rate": 2.1699756602202178e-06, "loss": 0.0151, "step": 203620 }, { "epoch": 0.8496340679790705, "grad_norm": 0.8852383363110261, "learning_rate": 2.1699490181129344e-06, "loss": 0.0244, "step": 203625 }, { "epoch": 0.8496549306940608, "grad_norm": 0.6600364249153269, "learning_rate": 2.1699223769869315e-06, "loss": 0.0177, "step": 203630 }, { "epoch": 0.8496757934090511, "grad_norm": 0.6877612771087438, "learning_rate": 2.169895736842148e-06, "loss": 0.0216, "step": 203635 }, { "epoch": 0.8496966561240413, "grad_norm": 2.761128955987396, "learning_rate": 2.1698690976785236e-06, "loss": 0.0198, "step": 203640 }, { "epoch": 0.8497175188390317, "grad_norm": 0.49976209877281663, "learning_rate": 2.1698424594959987e-06, "loss": 0.0149, "step": 203645 }, { "epoch": 0.8497383815540219, "grad_norm": 0.6678558426594327, "learning_rate": 2.1698158222945125e-06, "loss": 0.0161, "step": 203650 }, { "epoch": 0.8497592442690122, "grad_norm": 0.4758901483625404, "learning_rate": 2.169789186074005e-06, "loss": 0.0226, "step": 203655 }, { "epoch": 0.8497801069840025, "grad_norm": 0.4731920353616194, "learning_rate": 2.1697625508344162e-06, "loss": 0.0169, "step": 203660 }, { "epoch": 0.8498009696989928, "grad_norm": 0.9317895521658478, "learning_rate": 2.1697359165756855e-06, "loss": 0.0246, "step": 203665 }, { "epoch": 0.849821832413983, "grad_norm": 0.6810688048850854, "learning_rate": 2.1697092832977533e-06, "loss": 0.0144, "step": 203670 }, { "epoch": 0.8498426951289733, "grad_norm": 0.5009575959356912, "learning_rate": 2.1696826510005582e-06, "loss": 0.0168, "step": 203675 }, { "epoch": 0.8498635578439636, "grad_norm": 0.6517276743827839, "learning_rate": 2.1696560196840413e-06, "loss": 0.0183, "step": 203680 }, { "epoch": 0.8498844205589539, "grad_norm": 0.6107897493752994, "learning_rate": 2.1696293893481417e-06, "loss": 0.0186, "step": 203685 }, { "epoch": 0.8499052832739441, "grad_norm": 0.38828988081096916, "learning_rate": 2.1696027599928e-06, "loss": 0.0265, "step": 203690 }, { "epoch": 0.8499261459889345, "grad_norm": 0.6726948965050831, "learning_rate": 2.169576131617955e-06, "loss": 0.0157, "step": 203695 }, { "epoch": 0.8499470087039247, "grad_norm": 0.37275291398220534, "learning_rate": 2.1695495042235474e-06, "loss": 0.02, "step": 203700 }, { "epoch": 0.8499678714189149, "grad_norm": 1.0092397104089548, "learning_rate": 2.1695228778095158e-06, "loss": 0.0244, "step": 203705 }, { "epoch": 0.8499887341339053, "grad_norm": 0.6445616951100511, "learning_rate": 2.169496252375802e-06, "loss": 0.0178, "step": 203710 }, { "epoch": 0.8500095968488955, "grad_norm": 0.6672705517910926, "learning_rate": 2.1694696279223437e-06, "loss": 0.0203, "step": 203715 }, { "epoch": 0.8500304595638858, "grad_norm": 0.3819227427401534, "learning_rate": 2.169443004449082e-06, "loss": 0.0171, "step": 203720 }, { "epoch": 0.850051322278876, "grad_norm": 0.7094192140404334, "learning_rate": 2.169416381955957e-06, "loss": 0.0213, "step": 203725 }, { "epoch": 0.8500721849938664, "grad_norm": 0.4892178144465647, "learning_rate": 2.169389760442908e-06, "loss": 0.018, "step": 203730 }, { "epoch": 0.8500930477088566, "grad_norm": 0.8412369305846874, "learning_rate": 2.169363139909875e-06, "loss": 0.0264, "step": 203735 }, { "epoch": 0.8501139104238469, "grad_norm": 0.5776879747156808, "learning_rate": 2.1693365203567974e-06, "loss": 0.0123, "step": 203740 }, { "epoch": 0.8501347731388372, "grad_norm": 0.49193424677011677, "learning_rate": 2.169309901783616e-06, "loss": 0.0246, "step": 203745 }, { "epoch": 0.8501556358538275, "grad_norm": 0.44833486619668245, "learning_rate": 2.16928328419027e-06, "loss": 0.0198, "step": 203750 }, { "epoch": 0.8501764985688177, "grad_norm": 0.4441276341447017, "learning_rate": 2.1692566675766996e-06, "loss": 0.0262, "step": 203755 }, { "epoch": 0.8501973612838081, "grad_norm": 1.0847328248421546, "learning_rate": 2.1692300519428444e-06, "loss": 0.02, "step": 203760 }, { "epoch": 0.8502182239987983, "grad_norm": 0.33024968445232417, "learning_rate": 2.1692034372886447e-06, "loss": 0.0146, "step": 203765 }, { "epoch": 0.8502390867137886, "grad_norm": 0.3774569266799405, "learning_rate": 2.16917682361404e-06, "loss": 0.0182, "step": 203770 }, { "epoch": 0.8502599494287789, "grad_norm": 0.45177234862999255, "learning_rate": 2.16915021091897e-06, "loss": 0.0276, "step": 203775 }, { "epoch": 0.8502808121437692, "grad_norm": 0.6424425974222506, "learning_rate": 2.169123599203376e-06, "loss": 0.0173, "step": 203780 }, { "epoch": 0.8503016748587594, "grad_norm": 0.7707670356087747, "learning_rate": 2.169096988467196e-06, "loss": 0.0228, "step": 203785 }, { "epoch": 0.8503225375737496, "grad_norm": 0.7283373899929902, "learning_rate": 2.169070378710371e-06, "loss": 0.0212, "step": 203790 }, { "epoch": 0.85034340028874, "grad_norm": 0.5774330480963424, "learning_rate": 2.1690437699328414e-06, "loss": 0.0209, "step": 203795 }, { "epoch": 0.8503642630037302, "grad_norm": 0.42836031351196624, "learning_rate": 2.169017162134546e-06, "loss": 0.0231, "step": 203800 }, { "epoch": 0.8503851257187205, "grad_norm": 0.7611168320438345, "learning_rate": 2.1689905553154254e-06, "loss": 0.0166, "step": 203805 }, { "epoch": 0.8504059884337108, "grad_norm": 5.948734246266518, "learning_rate": 2.168963949475419e-06, "loss": 0.0183, "step": 203810 }, { "epoch": 0.8504268511487011, "grad_norm": 0.802342369729498, "learning_rate": 2.168937344614468e-06, "loss": 0.0221, "step": 203815 }, { "epoch": 0.8504477138636913, "grad_norm": 1.0890467714300596, "learning_rate": 2.1689107407325104e-06, "loss": 0.0231, "step": 203820 }, { "epoch": 0.8504685765786817, "grad_norm": 0.46942848935040826, "learning_rate": 2.1688841378294884e-06, "loss": 0.0164, "step": 203825 }, { "epoch": 0.8504894392936719, "grad_norm": 0.7829547224841577, "learning_rate": 2.16885753590534e-06, "loss": 0.0213, "step": 203830 }, { "epoch": 0.8505103020086622, "grad_norm": 0.30633507146097133, "learning_rate": 2.1688309349600065e-06, "loss": 0.0179, "step": 203835 }, { "epoch": 0.8505311647236524, "grad_norm": 1.5120933337318196, "learning_rate": 2.168804334993427e-06, "loss": 0.0214, "step": 203840 }, { "epoch": 0.8505520274386428, "grad_norm": 0.7542108012414602, "learning_rate": 2.168777736005542e-06, "loss": 0.022, "step": 203845 }, { "epoch": 0.850572890153633, "grad_norm": 0.7795247507471676, "learning_rate": 2.1687511379962916e-06, "loss": 0.0212, "step": 203850 }, { "epoch": 0.8505937528686233, "grad_norm": 0.832844497325358, "learning_rate": 2.168724540965615e-06, "loss": 0.0199, "step": 203855 }, { "epoch": 0.8506146155836136, "grad_norm": 0.6728924642864081, "learning_rate": 2.1686979449134533e-06, "loss": 0.0213, "step": 203860 }, { "epoch": 0.8506354782986039, "grad_norm": 0.7011193052581407, "learning_rate": 2.1686713498397457e-06, "loss": 0.0231, "step": 203865 }, { "epoch": 0.8506563410135941, "grad_norm": 0.5177272448202654, "learning_rate": 2.168644755744432e-06, "loss": 0.0219, "step": 203870 }, { "epoch": 0.8506772037285845, "grad_norm": 0.8222168180422111, "learning_rate": 2.1686181626274535e-06, "loss": 0.0219, "step": 203875 }, { "epoch": 0.8506980664435747, "grad_norm": 0.45011737127286944, "learning_rate": 2.168591570488749e-06, "loss": 0.0193, "step": 203880 }, { "epoch": 0.850718929158565, "grad_norm": 0.6513123972539979, "learning_rate": 2.1685649793282587e-06, "loss": 0.0167, "step": 203885 }, { "epoch": 0.8507397918735553, "grad_norm": 0.7728327518603312, "learning_rate": 2.168538389145923e-06, "loss": 0.0209, "step": 203890 }, { "epoch": 0.8507606545885456, "grad_norm": 1.5065644554309692, "learning_rate": 2.168511799941682e-06, "loss": 0.0181, "step": 203895 }, { "epoch": 0.8507815173035358, "grad_norm": 1.0434720846550145, "learning_rate": 2.168485211715475e-06, "loss": 0.0178, "step": 203900 }, { "epoch": 0.850802380018526, "grad_norm": 0.3802745936337294, "learning_rate": 2.1684586244672428e-06, "loss": 0.0268, "step": 203905 }, { "epoch": 0.8508232427335164, "grad_norm": 0.7715227062216329, "learning_rate": 2.168432038196925e-06, "loss": 0.0188, "step": 203910 }, { "epoch": 0.8508441054485066, "grad_norm": 0.6180920888283092, "learning_rate": 2.168405452904462e-06, "loss": 0.017, "step": 203915 }, { "epoch": 0.8508649681634969, "grad_norm": 0.31815925934737016, "learning_rate": 2.168378868589793e-06, "loss": 0.0183, "step": 203920 }, { "epoch": 0.8508858308784872, "grad_norm": 0.23041698475538866, "learning_rate": 2.16835228525286e-06, "loss": 0.018, "step": 203925 }, { "epoch": 0.8509066935934775, "grad_norm": 0.8452462975562212, "learning_rate": 2.168325702893601e-06, "loss": 0.0193, "step": 203930 }, { "epoch": 0.8509275563084677, "grad_norm": 0.745125138885986, "learning_rate": 2.1682991215119575e-06, "loss": 0.0304, "step": 203935 }, { "epoch": 0.8509484190234581, "grad_norm": 0.6957748034002844, "learning_rate": 2.1682725411078682e-06, "loss": 0.0159, "step": 203940 }, { "epoch": 0.8509692817384483, "grad_norm": 0.5529341359349592, "learning_rate": 2.1682459616812745e-06, "loss": 0.017, "step": 203945 }, { "epoch": 0.8509901444534386, "grad_norm": 1.3542749447946996, "learning_rate": 2.168219383232116e-06, "loss": 0.0232, "step": 203950 }, { "epoch": 0.8510110071684289, "grad_norm": 0.48339495344804906, "learning_rate": 2.1681928057603322e-06, "loss": 0.0173, "step": 203955 }, { "epoch": 0.8510318698834192, "grad_norm": 0.5122380136481159, "learning_rate": 2.168166229265864e-06, "loss": 0.0176, "step": 203960 }, { "epoch": 0.8510527325984094, "grad_norm": 0.7758093599952136, "learning_rate": 2.1681396537486515e-06, "loss": 0.0182, "step": 203965 }, { "epoch": 0.8510735953133997, "grad_norm": 1.1583118920509705, "learning_rate": 2.1681130792086345e-06, "loss": 0.0176, "step": 203970 }, { "epoch": 0.85109445802839, "grad_norm": 0.43810850002856006, "learning_rate": 2.168086505645753e-06, "loss": 0.0262, "step": 203975 }, { "epoch": 0.8511153207433803, "grad_norm": 0.30930164177814923, "learning_rate": 2.1680599330599473e-06, "loss": 0.0148, "step": 203980 }, { "epoch": 0.8511361834583705, "grad_norm": 1.6063756139526997, "learning_rate": 2.1680333614511577e-06, "loss": 0.023, "step": 203985 }, { "epoch": 0.8511570461733609, "grad_norm": 1.0453039899142615, "learning_rate": 2.168006790819324e-06, "loss": 0.0234, "step": 203990 }, { "epoch": 0.8511779088883511, "grad_norm": 0.5971954310477076, "learning_rate": 2.1679802211643866e-06, "loss": 0.021, "step": 203995 }, { "epoch": 0.8511987716033413, "grad_norm": 0.5597471670972648, "learning_rate": 2.1679536524862853e-06, "loss": 0.0178, "step": 204000 }, { "epoch": 0.8512196343183317, "grad_norm": 0.3926716583068822, "learning_rate": 2.1679270847849603e-06, "loss": 0.0143, "step": 204005 }, { "epoch": 0.8512404970333219, "grad_norm": 0.800005262784401, "learning_rate": 2.1679005180603524e-06, "loss": 0.0234, "step": 204010 }, { "epoch": 0.8512613597483122, "grad_norm": 0.6012954767522973, "learning_rate": 2.1678739523124014e-06, "loss": 0.0161, "step": 204015 }, { "epoch": 0.8512822224633024, "grad_norm": 0.706090090563877, "learning_rate": 2.167847387541047e-06, "loss": 0.0226, "step": 204020 }, { "epoch": 0.8513030851782928, "grad_norm": 0.8113956803037288, "learning_rate": 2.1678208237462293e-06, "loss": 0.0308, "step": 204025 }, { "epoch": 0.851323947893283, "grad_norm": 1.1483325911632143, "learning_rate": 2.1677942609278896e-06, "loss": 0.0209, "step": 204030 }, { "epoch": 0.8513448106082733, "grad_norm": 1.251203576292297, "learning_rate": 2.167767699085967e-06, "loss": 0.0193, "step": 204035 }, { "epoch": 0.8513656733232636, "grad_norm": 1.145501132475653, "learning_rate": 2.167741138220402e-06, "loss": 0.0205, "step": 204040 }, { "epoch": 0.8513865360382539, "grad_norm": 0.7548628630549311, "learning_rate": 2.1677145783311346e-06, "loss": 0.0216, "step": 204045 }, { "epoch": 0.8514073987532441, "grad_norm": 1.7927815052280498, "learning_rate": 2.167688019418106e-06, "loss": 0.0241, "step": 204050 }, { "epoch": 0.8514282614682345, "grad_norm": 0.7014735457583858, "learning_rate": 2.167661461481255e-06, "loss": 0.0191, "step": 204055 }, { "epoch": 0.8514491241832247, "grad_norm": 1.2634508779230034, "learning_rate": 2.1676349045205227e-06, "loss": 0.0147, "step": 204060 }, { "epoch": 0.851469986898215, "grad_norm": 0.5662290847394414, "learning_rate": 2.167608348535849e-06, "loss": 0.0173, "step": 204065 }, { "epoch": 0.8514908496132053, "grad_norm": 0.4247820413654312, "learning_rate": 2.1675817935271735e-06, "loss": 0.022, "step": 204070 }, { "epoch": 0.8515117123281956, "grad_norm": 0.5355142320129935, "learning_rate": 2.167555239494438e-06, "loss": 0.0199, "step": 204075 }, { "epoch": 0.8515325750431858, "grad_norm": 0.9829800630113423, "learning_rate": 2.167528686437581e-06, "loss": 0.0255, "step": 204080 }, { "epoch": 0.851553437758176, "grad_norm": 0.628210306723452, "learning_rate": 2.1675021343565436e-06, "loss": 0.0192, "step": 204085 }, { "epoch": 0.8515743004731664, "grad_norm": 0.6742590776554275, "learning_rate": 2.167475583251266e-06, "loss": 0.0223, "step": 204090 }, { "epoch": 0.8515951631881566, "grad_norm": 0.41423816552620035, "learning_rate": 2.1674490331216887e-06, "loss": 0.0248, "step": 204095 }, { "epoch": 0.8516160259031469, "grad_norm": 0.40246229835236097, "learning_rate": 2.1674224839677514e-06, "loss": 0.0247, "step": 204100 }, { "epoch": 0.8516368886181372, "grad_norm": 0.6265567818737754, "learning_rate": 2.1673959357893944e-06, "loss": 0.0248, "step": 204105 }, { "epoch": 0.8516577513331275, "grad_norm": 0.9113612273269397, "learning_rate": 2.1673693885865586e-06, "loss": 0.0198, "step": 204110 }, { "epoch": 0.8516786140481177, "grad_norm": 1.229550570858628, "learning_rate": 2.1673428423591836e-06, "loss": 0.0263, "step": 204115 }, { "epoch": 0.8516994767631081, "grad_norm": 0.8566294773142225, "learning_rate": 2.167316297107209e-06, "loss": 0.0174, "step": 204120 }, { "epoch": 0.8517203394780983, "grad_norm": 0.5804444258978818, "learning_rate": 2.167289752830577e-06, "loss": 0.0153, "step": 204125 }, { "epoch": 0.8517412021930886, "grad_norm": 0.9186741987375067, "learning_rate": 2.167263209529226e-06, "loss": 0.0201, "step": 204130 }, { "epoch": 0.8517620649080789, "grad_norm": 0.6606924817291665, "learning_rate": 2.1672366672030976e-06, "loss": 0.0317, "step": 204135 }, { "epoch": 0.8517829276230692, "grad_norm": 0.6111464403427151, "learning_rate": 2.167210125852131e-06, "loss": 0.0184, "step": 204140 }, { "epoch": 0.8518037903380594, "grad_norm": 0.5411472386973571, "learning_rate": 2.167183585476268e-06, "loss": 0.0243, "step": 204145 }, { "epoch": 0.8518246530530497, "grad_norm": 0.5769297142170233, "learning_rate": 2.167157046075447e-06, "loss": 0.0224, "step": 204150 }, { "epoch": 0.85184551576804, "grad_norm": 0.658509619337764, "learning_rate": 2.1671305076496097e-06, "loss": 0.0218, "step": 204155 }, { "epoch": 0.8518663784830303, "grad_norm": 0.6054746626025498, "learning_rate": 2.1671039701986955e-06, "loss": 0.0211, "step": 204160 }, { "epoch": 0.8518872411980205, "grad_norm": 0.5809825358522269, "learning_rate": 2.1670774337226455e-06, "loss": 0.0222, "step": 204165 }, { "epoch": 0.8519081039130109, "grad_norm": 1.3165466871557692, "learning_rate": 2.1670508982213995e-06, "loss": 0.0208, "step": 204170 }, { "epoch": 0.8519289666280011, "grad_norm": 0.7633337017216646, "learning_rate": 2.1670243636948983e-06, "loss": 0.0148, "step": 204175 }, { "epoch": 0.8519498293429913, "grad_norm": 0.7591455677020865, "learning_rate": 2.166997830143082e-06, "loss": 0.0199, "step": 204180 }, { "epoch": 0.8519706920579817, "grad_norm": 0.41463629541211333, "learning_rate": 2.1669712975658897e-06, "loss": 0.031, "step": 204185 }, { "epoch": 0.851991554772972, "grad_norm": 0.7925839931253711, "learning_rate": 2.166944765963264e-06, "loss": 0.0194, "step": 204190 }, { "epoch": 0.8520124174879622, "grad_norm": 0.3944919485812413, "learning_rate": 2.166918235335143e-06, "loss": 0.0192, "step": 204195 }, { "epoch": 0.8520332802029524, "grad_norm": 0.35071446189383026, "learning_rate": 2.166891705681469e-06, "loss": 0.0174, "step": 204200 }, { "epoch": 0.8520541429179428, "grad_norm": 0.6761139890284273, "learning_rate": 2.1668651770021813e-06, "loss": 0.0234, "step": 204205 }, { "epoch": 0.852075005632933, "grad_norm": 0.47236520750548333, "learning_rate": 2.166838649297221e-06, "loss": 0.014, "step": 204210 }, { "epoch": 0.8520958683479233, "grad_norm": 0.38803996021298576, "learning_rate": 2.166812122566527e-06, "loss": 0.0173, "step": 204215 }, { "epoch": 0.8521167310629136, "grad_norm": 0.9393605265172421, "learning_rate": 2.1667855968100413e-06, "loss": 0.0221, "step": 204220 }, { "epoch": 0.8521375937779039, "grad_norm": 0.6520822986867787, "learning_rate": 2.166759072027703e-06, "loss": 0.0187, "step": 204225 }, { "epoch": 0.8521584564928941, "grad_norm": 0.4168510886425076, "learning_rate": 2.166732548219454e-06, "loss": 0.0162, "step": 204230 }, { "epoch": 0.8521793192078845, "grad_norm": 0.6903754012115554, "learning_rate": 2.166706025385233e-06, "loss": 0.0221, "step": 204235 }, { "epoch": 0.8522001819228747, "grad_norm": 0.3607835902023056, "learning_rate": 2.1666795035249807e-06, "loss": 0.0194, "step": 204240 }, { "epoch": 0.852221044637865, "grad_norm": 0.6512724305163756, "learning_rate": 2.166652982638638e-06, "loss": 0.0197, "step": 204245 }, { "epoch": 0.8522419073528553, "grad_norm": 0.7678491846211063, "learning_rate": 2.1666264627261455e-06, "loss": 0.0128, "step": 204250 }, { "epoch": 0.8522627700678456, "grad_norm": 0.7868385689236262, "learning_rate": 2.1665999437874434e-06, "loss": 0.0255, "step": 204255 }, { "epoch": 0.8522836327828358, "grad_norm": 0.7911313045279602, "learning_rate": 2.1665734258224717e-06, "loss": 0.0271, "step": 204260 }, { "epoch": 0.852304495497826, "grad_norm": 0.710774697071931, "learning_rate": 2.1665469088311712e-06, "loss": 0.0296, "step": 204265 }, { "epoch": 0.8523253582128164, "grad_norm": 0.41328633834837913, "learning_rate": 2.166520392813482e-06, "loss": 0.0217, "step": 204270 }, { "epoch": 0.8523462209278067, "grad_norm": 0.8624370541806926, "learning_rate": 2.1664938777693457e-06, "loss": 0.0262, "step": 204275 }, { "epoch": 0.8523670836427969, "grad_norm": 0.9252801082891868, "learning_rate": 2.1664673636987008e-06, "loss": 0.0273, "step": 204280 }, { "epoch": 0.8523879463577873, "grad_norm": 0.601504942775774, "learning_rate": 2.166440850601489e-06, "loss": 0.0194, "step": 204285 }, { "epoch": 0.8524088090727775, "grad_norm": 0.7859995024476759, "learning_rate": 2.16641433847765e-06, "loss": 0.0214, "step": 204290 }, { "epoch": 0.8524296717877677, "grad_norm": 0.5089453080143996, "learning_rate": 2.1663878273271255e-06, "loss": 0.0213, "step": 204295 }, { "epoch": 0.8524505345027581, "grad_norm": 0.941104055030697, "learning_rate": 2.166361317149855e-06, "loss": 0.0226, "step": 204300 }, { "epoch": 0.8524713972177483, "grad_norm": 0.8928419204896724, "learning_rate": 2.1663348079457784e-06, "loss": 0.0268, "step": 204305 }, { "epoch": 0.8524922599327386, "grad_norm": 1.0774743973976941, "learning_rate": 2.166308299714837e-06, "loss": 0.0202, "step": 204310 }, { "epoch": 0.8525131226477289, "grad_norm": 0.5252387605849667, "learning_rate": 2.1662817924569717e-06, "loss": 0.0183, "step": 204315 }, { "epoch": 0.8525339853627192, "grad_norm": 0.9989359089378048, "learning_rate": 2.166255286172122e-06, "loss": 0.024, "step": 204320 }, { "epoch": 0.8525548480777094, "grad_norm": 1.0415325810061298, "learning_rate": 2.1662287808602288e-06, "loss": 0.0265, "step": 204325 }, { "epoch": 0.8525757107926997, "grad_norm": 0.480370107479958, "learning_rate": 2.1662022765212322e-06, "loss": 0.0146, "step": 204330 }, { "epoch": 0.85259657350769, "grad_norm": 0.9213584785040043, "learning_rate": 2.1661757731550733e-06, "loss": 0.0219, "step": 204335 }, { "epoch": 0.8526174362226803, "grad_norm": 0.8720144061792112, "learning_rate": 2.1661492707616924e-06, "loss": 0.0223, "step": 204340 }, { "epoch": 0.8526382989376705, "grad_norm": 0.6191385258230191, "learning_rate": 2.1661227693410297e-06, "loss": 0.014, "step": 204345 }, { "epoch": 0.8526591616526609, "grad_norm": 0.5925206527844741, "learning_rate": 2.1660962688930263e-06, "loss": 0.0168, "step": 204350 }, { "epoch": 0.8526800243676511, "grad_norm": 0.2743448317691065, "learning_rate": 2.1660697694176218e-06, "loss": 0.0124, "step": 204355 }, { "epoch": 0.8527008870826414, "grad_norm": 0.5707238915016575, "learning_rate": 2.166043270914757e-06, "loss": 0.0248, "step": 204360 }, { "epoch": 0.8527217497976317, "grad_norm": 0.7131503333465973, "learning_rate": 2.1660167733843732e-06, "loss": 0.0161, "step": 204365 }, { "epoch": 0.852742612512622, "grad_norm": 0.49088816111155575, "learning_rate": 2.1659902768264102e-06, "loss": 0.029, "step": 204370 }, { "epoch": 0.8527634752276122, "grad_norm": 1.5457960319095276, "learning_rate": 2.1659637812408084e-06, "loss": 0.0238, "step": 204375 }, { "epoch": 0.8527843379426024, "grad_norm": 0.3606660311145457, "learning_rate": 2.165937286627509e-06, "loss": 0.0169, "step": 204380 }, { "epoch": 0.8528052006575928, "grad_norm": 0.44735665048920475, "learning_rate": 2.1659107929864517e-06, "loss": 0.0166, "step": 204385 }, { "epoch": 0.852826063372583, "grad_norm": 0.9715079374882348, "learning_rate": 2.1658843003175777e-06, "loss": 0.0223, "step": 204390 }, { "epoch": 0.8528469260875733, "grad_norm": 0.4386521107898975, "learning_rate": 2.1658578086208274e-06, "loss": 0.0271, "step": 204395 }, { "epoch": 0.8528677888025636, "grad_norm": 0.7753509041461674, "learning_rate": 2.1658313178961406e-06, "loss": 0.0197, "step": 204400 }, { "epoch": 0.8528886515175539, "grad_norm": 0.6932348690113986, "learning_rate": 2.1658048281434592e-06, "loss": 0.0202, "step": 204405 }, { "epoch": 0.8529095142325441, "grad_norm": 0.47866570614621096, "learning_rate": 2.1657783393627228e-06, "loss": 0.0175, "step": 204410 }, { "epoch": 0.8529303769475345, "grad_norm": 0.716266900568466, "learning_rate": 2.1657518515538723e-06, "loss": 0.0149, "step": 204415 }, { "epoch": 0.8529512396625247, "grad_norm": 0.7179984839331092, "learning_rate": 2.165725364716848e-06, "loss": 0.018, "step": 204420 }, { "epoch": 0.852972102377515, "grad_norm": 1.227201411739883, "learning_rate": 2.165698878851591e-06, "loss": 0.0231, "step": 204425 }, { "epoch": 0.8529929650925053, "grad_norm": 1.0945296613750681, "learning_rate": 2.1656723939580417e-06, "loss": 0.0203, "step": 204430 }, { "epoch": 0.8530138278074956, "grad_norm": 0.5869625964177662, "learning_rate": 2.1656459100361403e-06, "loss": 0.0211, "step": 204435 }, { "epoch": 0.8530346905224858, "grad_norm": 0.8125467349767219, "learning_rate": 2.1656194270858276e-06, "loss": 0.0178, "step": 204440 }, { "epoch": 0.8530555532374761, "grad_norm": 0.7734917149809882, "learning_rate": 2.1655929451070444e-06, "loss": 0.0234, "step": 204445 }, { "epoch": 0.8530764159524664, "grad_norm": 0.8085503559679367, "learning_rate": 2.1655664640997305e-06, "loss": 0.0225, "step": 204450 }, { "epoch": 0.8530972786674567, "grad_norm": 0.5796094509968539, "learning_rate": 2.1655399840638274e-06, "loss": 0.0197, "step": 204455 }, { "epoch": 0.8531181413824469, "grad_norm": 0.4858793346010473, "learning_rate": 2.165513504999276e-06, "loss": 0.0134, "step": 204460 }, { "epoch": 0.8531390040974373, "grad_norm": 0.7731169638081606, "learning_rate": 2.165487026906016e-06, "loss": 0.0211, "step": 204465 }, { "epoch": 0.8531598668124275, "grad_norm": 0.4683931956454418, "learning_rate": 2.165460549783988e-06, "loss": 0.0218, "step": 204470 }, { "epoch": 0.8531807295274177, "grad_norm": 0.6579484817858334, "learning_rate": 2.1654340736331337e-06, "loss": 0.0193, "step": 204475 }, { "epoch": 0.8532015922424081, "grad_norm": 0.6126348891105659, "learning_rate": 2.1654075984533925e-06, "loss": 0.0163, "step": 204480 }, { "epoch": 0.8532224549573983, "grad_norm": 0.4933011277734659, "learning_rate": 2.1653811242447055e-06, "loss": 0.0274, "step": 204485 }, { "epoch": 0.8532433176723886, "grad_norm": 0.7207167116129083, "learning_rate": 2.1653546510070143e-06, "loss": 0.0211, "step": 204490 }, { "epoch": 0.853264180387379, "grad_norm": 0.42638691475935747, "learning_rate": 2.165328178740258e-06, "loss": 0.0145, "step": 204495 }, { "epoch": 0.8532850431023692, "grad_norm": 0.59519775150349, "learning_rate": 2.165301707444378e-06, "loss": 0.0159, "step": 204500 }, { "epoch": 0.8533059058173594, "grad_norm": 0.5235989951080152, "learning_rate": 2.1652752371193144e-06, "loss": 0.0206, "step": 204505 }, { "epoch": 0.8533267685323497, "grad_norm": 0.7307857396602906, "learning_rate": 2.1652487677650093e-06, "loss": 0.0226, "step": 204510 }, { "epoch": 0.85334763124734, "grad_norm": 0.40667089507944776, "learning_rate": 2.1652222993814013e-06, "loss": 0.0144, "step": 204515 }, { "epoch": 0.8533684939623303, "grad_norm": 0.5496435297812045, "learning_rate": 2.1651958319684325e-06, "loss": 0.0176, "step": 204520 }, { "epoch": 0.8533893566773205, "grad_norm": 1.0525360618878024, "learning_rate": 2.1651693655260436e-06, "loss": 0.0236, "step": 204525 }, { "epoch": 0.8534102193923109, "grad_norm": 1.1435567800733772, "learning_rate": 2.165142900054175e-06, "loss": 0.0289, "step": 204530 }, { "epoch": 0.8534310821073011, "grad_norm": 0.669241910285297, "learning_rate": 2.165116435552767e-06, "loss": 0.0212, "step": 204535 }, { "epoch": 0.8534519448222914, "grad_norm": 0.8060543285407231, "learning_rate": 2.1650899720217606e-06, "loss": 0.0189, "step": 204540 }, { "epoch": 0.8534728075372817, "grad_norm": 0.46607124574528314, "learning_rate": 2.165063509461097e-06, "loss": 0.0219, "step": 204545 }, { "epoch": 0.853493670252272, "grad_norm": 0.7843536394312863, "learning_rate": 2.1650370478707154e-06, "loss": 0.0176, "step": 204550 }, { "epoch": 0.8535145329672622, "grad_norm": 0.6123837438288441, "learning_rate": 2.1650105872505587e-06, "loss": 0.023, "step": 204555 }, { "epoch": 0.8535353956822525, "grad_norm": 0.4422775689122927, "learning_rate": 2.164984127600566e-06, "loss": 0.0199, "step": 204560 }, { "epoch": 0.8535562583972428, "grad_norm": 0.5565631156154779, "learning_rate": 2.1649576689206776e-06, "loss": 0.019, "step": 204565 }, { "epoch": 0.853577121112233, "grad_norm": 0.7153320739682817, "learning_rate": 2.164931211210836e-06, "loss": 0.0286, "step": 204570 }, { "epoch": 0.8535979838272233, "grad_norm": 0.37184874914892263, "learning_rate": 2.1649047544709807e-06, "loss": 0.0207, "step": 204575 }, { "epoch": 0.8536188465422136, "grad_norm": 0.4707732190562794, "learning_rate": 2.1648782987010532e-06, "loss": 0.0184, "step": 204580 }, { "epoch": 0.8536397092572039, "grad_norm": 0.49653245717626454, "learning_rate": 2.164851843900993e-06, "loss": 0.0169, "step": 204585 }, { "epoch": 0.8536605719721941, "grad_norm": 0.5237710472572885, "learning_rate": 2.164825390070742e-06, "loss": 0.016, "step": 204590 }, { "epoch": 0.8536814346871845, "grad_norm": 0.6283716573128602, "learning_rate": 2.1647989372102406e-06, "loss": 0.0239, "step": 204595 }, { "epoch": 0.8537022974021747, "grad_norm": 0.6575122356134369, "learning_rate": 2.164772485319429e-06, "loss": 0.013, "step": 204600 }, { "epoch": 0.853723160117165, "grad_norm": 0.6999854476856918, "learning_rate": 2.1647460343982488e-06, "loss": 0.0177, "step": 204605 }, { "epoch": 0.8537440228321553, "grad_norm": 0.7465667254929522, "learning_rate": 2.1647195844466406e-06, "loss": 0.0253, "step": 204610 }, { "epoch": 0.8537648855471456, "grad_norm": 0.7667928488970236, "learning_rate": 2.1646931354645446e-06, "loss": 0.0189, "step": 204615 }, { "epoch": 0.8537857482621358, "grad_norm": 0.3805086770642114, "learning_rate": 2.1646666874519024e-06, "loss": 0.0212, "step": 204620 }, { "epoch": 0.8538066109771261, "grad_norm": 1.0253235905784475, "learning_rate": 2.164640240408654e-06, "loss": 0.0225, "step": 204625 }, { "epoch": 0.8538274736921164, "grad_norm": 0.5991670075635496, "learning_rate": 2.1646137943347405e-06, "loss": 0.0263, "step": 204630 }, { "epoch": 0.8538483364071067, "grad_norm": 0.3340001184163865, "learning_rate": 2.164587349230103e-06, "loss": 0.0231, "step": 204635 }, { "epoch": 0.8538691991220969, "grad_norm": 0.496380517946707, "learning_rate": 2.164560905094682e-06, "loss": 0.022, "step": 204640 }, { "epoch": 0.8538900618370873, "grad_norm": 0.7350164884329276, "learning_rate": 2.164534461928418e-06, "loss": 0.0151, "step": 204645 }, { "epoch": 0.8539109245520775, "grad_norm": 0.6841622173380982, "learning_rate": 2.164508019731252e-06, "loss": 0.0234, "step": 204650 }, { "epoch": 0.8539317872670678, "grad_norm": 0.7045451512640408, "learning_rate": 2.1644815785031254e-06, "loss": 0.0266, "step": 204655 }, { "epoch": 0.8539526499820581, "grad_norm": 0.8130829088514405, "learning_rate": 2.164455138243978e-06, "loss": 0.0213, "step": 204660 }, { "epoch": 0.8539735126970484, "grad_norm": 0.6177123899487272, "learning_rate": 2.164428698953752e-06, "loss": 0.0197, "step": 204665 }, { "epoch": 0.8539943754120386, "grad_norm": 0.8902395502682846, "learning_rate": 2.164402260632387e-06, "loss": 0.0196, "step": 204670 }, { "epoch": 0.854015238127029, "grad_norm": 0.5217556723153837, "learning_rate": 2.1643758232798233e-06, "loss": 0.0188, "step": 204675 }, { "epoch": 0.8540361008420192, "grad_norm": 1.6322953501447683, "learning_rate": 2.1643493868960037e-06, "loss": 0.0159, "step": 204680 }, { "epoch": 0.8540569635570094, "grad_norm": 0.42175782327455597, "learning_rate": 2.164322951480868e-06, "loss": 0.0183, "step": 204685 }, { "epoch": 0.8540778262719997, "grad_norm": 0.47912152471098957, "learning_rate": 2.1642965170343562e-06, "loss": 0.0277, "step": 204690 }, { "epoch": 0.85409868898699, "grad_norm": 1.325990776611598, "learning_rate": 2.1642700835564106e-06, "loss": 0.0216, "step": 204695 }, { "epoch": 0.8541195517019803, "grad_norm": 1.1944274687454954, "learning_rate": 2.164243651046971e-06, "loss": 0.0256, "step": 204700 }, { "epoch": 0.8541404144169705, "grad_norm": 0.8146143166133023, "learning_rate": 2.1642172195059792e-06, "loss": 0.0257, "step": 204705 }, { "epoch": 0.8541612771319609, "grad_norm": 0.6531379160197537, "learning_rate": 2.1641907889333753e-06, "loss": 0.0244, "step": 204710 }, { "epoch": 0.8541821398469511, "grad_norm": 0.5166398576246838, "learning_rate": 2.1641643593291e-06, "loss": 0.0157, "step": 204715 }, { "epoch": 0.8542030025619414, "grad_norm": 0.4873774272493453, "learning_rate": 2.164137930693095e-06, "loss": 0.0211, "step": 204720 }, { "epoch": 0.8542238652769317, "grad_norm": 0.697234677219126, "learning_rate": 2.1641115030253005e-06, "loss": 0.024, "step": 204725 }, { "epoch": 0.854244727991922, "grad_norm": 0.3367735274764651, "learning_rate": 2.164085076325658e-06, "loss": 0.0157, "step": 204730 }, { "epoch": 0.8542655907069122, "grad_norm": 0.5132071886342247, "learning_rate": 2.164058650594108e-06, "loss": 0.0188, "step": 204735 }, { "epoch": 0.8542864534219025, "grad_norm": 1.007905820093304, "learning_rate": 2.164032225830591e-06, "loss": 0.0233, "step": 204740 }, { "epoch": 0.8543073161368928, "grad_norm": 0.5371115522234761, "learning_rate": 2.164005802035049e-06, "loss": 0.032, "step": 204745 }, { "epoch": 0.8543281788518831, "grad_norm": 0.5911238027074487, "learning_rate": 2.1639793792074216e-06, "loss": 0.017, "step": 204750 }, { "epoch": 0.8543490415668733, "grad_norm": 0.47628888084746435, "learning_rate": 2.163952957347651e-06, "loss": 0.0193, "step": 204755 }, { "epoch": 0.8543699042818637, "grad_norm": 0.5375972483634204, "learning_rate": 2.1639265364556773e-06, "loss": 0.0174, "step": 204760 }, { "epoch": 0.8543907669968539, "grad_norm": 1.1540117198460398, "learning_rate": 2.1639001165314413e-06, "loss": 0.0286, "step": 204765 }, { "epoch": 0.8544116297118441, "grad_norm": 0.505070598124429, "learning_rate": 2.1638736975748837e-06, "loss": 0.0243, "step": 204770 }, { "epoch": 0.8544324924268345, "grad_norm": 0.7515630721267169, "learning_rate": 2.163847279585947e-06, "loss": 0.0171, "step": 204775 }, { "epoch": 0.8544533551418247, "grad_norm": 0.677018664889402, "learning_rate": 2.1638208625645703e-06, "loss": 0.02, "step": 204780 }, { "epoch": 0.854474217856815, "grad_norm": 0.7548529130637354, "learning_rate": 2.1637944465106955e-06, "loss": 0.019, "step": 204785 }, { "epoch": 0.8544950805718053, "grad_norm": 0.6567766456962608, "learning_rate": 2.1637680314242632e-06, "loss": 0.0202, "step": 204790 }, { "epoch": 0.8545159432867956, "grad_norm": 0.5756214205245396, "learning_rate": 2.1637416173052154e-06, "loss": 0.0212, "step": 204795 }, { "epoch": 0.8545368060017858, "grad_norm": 0.7481852553157032, "learning_rate": 2.163715204153491e-06, "loss": 0.0245, "step": 204800 }, { "epoch": 0.8545576687167761, "grad_norm": 0.4200712241521682, "learning_rate": 2.163688791969033e-06, "loss": 0.0216, "step": 204805 }, { "epoch": 0.8545785314317664, "grad_norm": 0.36125869147375667, "learning_rate": 2.1636623807517807e-06, "loss": 0.0178, "step": 204810 }, { "epoch": 0.8545993941467567, "grad_norm": 0.421191014234353, "learning_rate": 2.163635970501676e-06, "loss": 0.0236, "step": 204815 }, { "epoch": 0.8546202568617469, "grad_norm": 0.8306667625824401, "learning_rate": 2.1636095612186605e-06, "loss": 0.0163, "step": 204820 }, { "epoch": 0.8546411195767373, "grad_norm": 0.5247242428428988, "learning_rate": 2.163583152902674e-06, "loss": 0.0159, "step": 204825 }, { "epoch": 0.8546619822917275, "grad_norm": 0.9146006419706835, "learning_rate": 2.1635567455536576e-06, "loss": 0.0249, "step": 204830 }, { "epoch": 0.8546828450067178, "grad_norm": 0.4254444104975144, "learning_rate": 2.1635303391715527e-06, "loss": 0.0204, "step": 204835 }, { "epoch": 0.8547037077217081, "grad_norm": 0.7382288859953098, "learning_rate": 2.1635039337563004e-06, "loss": 0.0201, "step": 204840 }, { "epoch": 0.8547245704366984, "grad_norm": 1.1115845439134096, "learning_rate": 2.1634775293078412e-06, "loss": 0.0179, "step": 204845 }, { "epoch": 0.8547454331516886, "grad_norm": 0.4039340801716238, "learning_rate": 2.163451125826117e-06, "loss": 0.0248, "step": 204850 }, { "epoch": 0.854766295866679, "grad_norm": 0.6982799488260112, "learning_rate": 2.1634247233110673e-06, "loss": 0.0235, "step": 204855 }, { "epoch": 0.8547871585816692, "grad_norm": 0.6456771587959973, "learning_rate": 2.1633983217626342e-06, "loss": 0.0218, "step": 204860 }, { "epoch": 0.8548080212966594, "grad_norm": 0.43655397849869554, "learning_rate": 2.163371921180759e-06, "loss": 0.0214, "step": 204865 }, { "epoch": 0.8548288840116497, "grad_norm": 0.5738406549044492, "learning_rate": 2.1633455215653817e-06, "loss": 0.0217, "step": 204870 }, { "epoch": 0.85484974672664, "grad_norm": 0.43500445094794066, "learning_rate": 2.1633191229164446e-06, "loss": 0.0119, "step": 204875 }, { "epoch": 0.8548706094416303, "grad_norm": 0.7386800105112222, "learning_rate": 2.1632927252338877e-06, "loss": 0.018, "step": 204880 }, { "epoch": 0.8548914721566205, "grad_norm": 1.072961522364962, "learning_rate": 2.163266328517652e-06, "loss": 0.0223, "step": 204885 }, { "epoch": 0.8549123348716109, "grad_norm": 0.5026399006330468, "learning_rate": 2.163239932767679e-06, "loss": 0.0215, "step": 204890 }, { "epoch": 0.8549331975866011, "grad_norm": 0.4280141447683798, "learning_rate": 2.16321353798391e-06, "loss": 0.0188, "step": 204895 }, { "epoch": 0.8549540603015914, "grad_norm": 0.6600078997459361, "learning_rate": 2.1631871441662855e-06, "loss": 0.0151, "step": 204900 }, { "epoch": 0.8549749230165817, "grad_norm": 1.1001743348105117, "learning_rate": 2.163160751314747e-06, "loss": 0.0257, "step": 204905 }, { "epoch": 0.854995785731572, "grad_norm": 0.381792709058744, "learning_rate": 2.163134359429235e-06, "loss": 0.0181, "step": 204910 }, { "epoch": 0.8550166484465622, "grad_norm": 0.8757842256648546, "learning_rate": 2.1631079685096913e-06, "loss": 0.0217, "step": 204915 }, { "epoch": 0.8550375111615525, "grad_norm": 0.42014745232084305, "learning_rate": 2.163081578556056e-06, "loss": 0.0293, "step": 204920 }, { "epoch": 0.8550583738765428, "grad_norm": 0.5640886577155767, "learning_rate": 2.1630551895682715e-06, "loss": 0.0172, "step": 204925 }, { "epoch": 0.8550792365915331, "grad_norm": 0.9349993393759789, "learning_rate": 2.163028801546277e-06, "loss": 0.0232, "step": 204930 }, { "epoch": 0.8551000993065233, "grad_norm": 0.5221471182010412, "learning_rate": 2.163002414490016e-06, "loss": 0.0227, "step": 204935 }, { "epoch": 0.8551209620215137, "grad_norm": 0.689006099894864, "learning_rate": 2.162976028399428e-06, "loss": 0.0258, "step": 204940 }, { "epoch": 0.8551418247365039, "grad_norm": 0.30741521271835315, "learning_rate": 2.1629496432744546e-06, "loss": 0.021, "step": 204945 }, { "epoch": 0.8551626874514942, "grad_norm": 0.3761378754466105, "learning_rate": 2.1629232591150362e-06, "loss": 0.013, "step": 204950 }, { "epoch": 0.8551835501664845, "grad_norm": 0.6796068402256951, "learning_rate": 2.162896875921115e-06, "loss": 0.0171, "step": 204955 }, { "epoch": 0.8552044128814748, "grad_norm": 0.932192919762339, "learning_rate": 2.162870493692631e-06, "loss": 0.0228, "step": 204960 }, { "epoch": 0.855225275596465, "grad_norm": 0.5980973251521294, "learning_rate": 2.162844112429526e-06, "loss": 0.0238, "step": 204965 }, { "epoch": 0.8552461383114554, "grad_norm": 0.49166613221105726, "learning_rate": 2.1628177321317414e-06, "loss": 0.0159, "step": 204970 }, { "epoch": 0.8552670010264456, "grad_norm": 1.169991282366097, "learning_rate": 2.162791352799218e-06, "loss": 0.0255, "step": 204975 }, { "epoch": 0.8552878637414358, "grad_norm": 1.1561564414063976, "learning_rate": 2.1627649744318967e-06, "loss": 0.0216, "step": 204980 }, { "epoch": 0.8553087264564261, "grad_norm": 0.6860723009088613, "learning_rate": 2.1627385970297184e-06, "loss": 0.0223, "step": 204985 }, { "epoch": 0.8553295891714164, "grad_norm": 0.7137941691382049, "learning_rate": 2.1627122205926253e-06, "loss": 0.0215, "step": 204990 }, { "epoch": 0.8553504518864067, "grad_norm": 0.738011993581828, "learning_rate": 2.162685845120558e-06, "loss": 0.0254, "step": 204995 }, { "epoch": 0.8553713146013969, "grad_norm": 0.38154876873707305, "learning_rate": 2.1626594706134566e-06, "loss": 0.0151, "step": 205000 }, { "epoch": 0.8553921773163873, "grad_norm": 0.4220924150300521, "learning_rate": 2.1626330970712646e-06, "loss": 0.02, "step": 205005 }, { "epoch": 0.8554130400313775, "grad_norm": 0.586882522047371, "learning_rate": 2.1626067244939207e-06, "loss": 0.0146, "step": 205010 }, { "epoch": 0.8554339027463678, "grad_norm": 0.7904999953087147, "learning_rate": 2.162580352881368e-06, "loss": 0.0172, "step": 205015 }, { "epoch": 0.8554547654613581, "grad_norm": 0.6247448490522871, "learning_rate": 2.1625539822335465e-06, "loss": 0.0243, "step": 205020 }, { "epoch": 0.8554756281763484, "grad_norm": 0.6897326600226654, "learning_rate": 2.1625276125503976e-06, "loss": 0.0203, "step": 205025 }, { "epoch": 0.8554964908913386, "grad_norm": 0.4486173749101793, "learning_rate": 2.1625012438318626e-06, "loss": 0.0256, "step": 205030 }, { "epoch": 0.855517353606329, "grad_norm": 0.677907387641856, "learning_rate": 2.162474876077883e-06, "loss": 0.0253, "step": 205035 }, { "epoch": 0.8555382163213192, "grad_norm": 0.9297490304111246, "learning_rate": 2.1624485092883994e-06, "loss": 0.0254, "step": 205040 }, { "epoch": 0.8555590790363095, "grad_norm": 0.3100824186828856, "learning_rate": 2.162422143463354e-06, "loss": 0.0113, "step": 205045 }, { "epoch": 0.8555799417512997, "grad_norm": 0.70304806184031, "learning_rate": 2.1623957786026868e-06, "loss": 0.0155, "step": 205050 }, { "epoch": 0.85560080446629, "grad_norm": 0.7924481359026586, "learning_rate": 2.1623694147063396e-06, "loss": 0.0183, "step": 205055 }, { "epoch": 0.8556216671812803, "grad_norm": 0.8240254563784798, "learning_rate": 2.1623430517742534e-06, "loss": 0.0197, "step": 205060 }, { "epoch": 0.8556425298962705, "grad_norm": 0.3312467294154774, "learning_rate": 2.1623166898063698e-06, "loss": 0.0196, "step": 205065 }, { "epoch": 0.8556633926112609, "grad_norm": 0.6559312011660915, "learning_rate": 2.1622903288026295e-06, "loss": 0.0238, "step": 205070 }, { "epoch": 0.8556842553262511, "grad_norm": 0.7573533696244868, "learning_rate": 2.162263968762975e-06, "loss": 0.0263, "step": 205075 }, { "epoch": 0.8557051180412414, "grad_norm": 0.5957668184600118, "learning_rate": 2.162237609687345e-06, "loss": 0.0217, "step": 205080 }, { "epoch": 0.8557259807562317, "grad_norm": 0.7601428627616225, "learning_rate": 2.162211251575683e-06, "loss": 0.0172, "step": 205085 }, { "epoch": 0.855746843471222, "grad_norm": 0.6126689750388848, "learning_rate": 2.1621848944279302e-06, "loss": 0.0212, "step": 205090 }, { "epoch": 0.8557677061862122, "grad_norm": 0.36300111568714816, "learning_rate": 2.1621585382440268e-06, "loss": 0.0147, "step": 205095 }, { "epoch": 0.8557885689012025, "grad_norm": 0.907042520646458, "learning_rate": 2.1621321830239147e-06, "loss": 0.0189, "step": 205100 }, { "epoch": 0.8558094316161928, "grad_norm": 0.30038722743598656, "learning_rate": 2.162105828767534e-06, "loss": 0.0167, "step": 205105 }, { "epoch": 0.8558302943311831, "grad_norm": 1.0067708924792127, "learning_rate": 2.1620794754748274e-06, "loss": 0.0235, "step": 205110 }, { "epoch": 0.8558511570461733, "grad_norm": 1.1337398323309675, "learning_rate": 2.162053123145736e-06, "loss": 0.0309, "step": 205115 }, { "epoch": 0.8558720197611637, "grad_norm": 0.5248072763151167, "learning_rate": 2.1620267717802004e-06, "loss": 0.0205, "step": 205120 }, { "epoch": 0.8558928824761539, "grad_norm": 0.787740489054981, "learning_rate": 2.1620004213781625e-06, "loss": 0.0153, "step": 205125 }, { "epoch": 0.8559137451911442, "grad_norm": 0.7978376653564364, "learning_rate": 2.161974071939563e-06, "loss": 0.0133, "step": 205130 }, { "epoch": 0.8559346079061345, "grad_norm": 0.8422899928017542, "learning_rate": 2.161947723464344e-06, "loss": 0.0201, "step": 205135 }, { "epoch": 0.8559554706211248, "grad_norm": 0.570150285405345, "learning_rate": 2.161921375952446e-06, "loss": 0.022, "step": 205140 }, { "epoch": 0.855976333336115, "grad_norm": 0.7031944833452188, "learning_rate": 2.1618950294038108e-06, "loss": 0.0233, "step": 205145 }, { "epoch": 0.8559971960511054, "grad_norm": 0.6475860802044853, "learning_rate": 2.1618686838183787e-06, "loss": 0.0239, "step": 205150 }, { "epoch": 0.8560180587660956, "grad_norm": 0.40573947717098013, "learning_rate": 2.1618423391960925e-06, "loss": 0.0156, "step": 205155 }, { "epoch": 0.8560389214810858, "grad_norm": 0.5819704629655191, "learning_rate": 2.161815995536893e-06, "loss": 0.019, "step": 205160 }, { "epoch": 0.8560597841960761, "grad_norm": 0.44354084888643586, "learning_rate": 2.161789652840721e-06, "loss": 0.019, "step": 205165 }, { "epoch": 0.8560806469110664, "grad_norm": 0.7818074322587125, "learning_rate": 2.1617633111075183e-06, "loss": 0.0217, "step": 205170 }, { "epoch": 0.8561015096260567, "grad_norm": 0.4608732584406774, "learning_rate": 2.161736970337226e-06, "loss": 0.0197, "step": 205175 }, { "epoch": 0.8561223723410469, "grad_norm": 1.094671184557317, "learning_rate": 2.161710630529786e-06, "loss": 0.032, "step": 205180 }, { "epoch": 0.8561432350560373, "grad_norm": 0.9439852861135897, "learning_rate": 2.161684291685138e-06, "loss": 0.0207, "step": 205185 }, { "epoch": 0.8561640977710275, "grad_norm": 0.9976983103384511, "learning_rate": 2.161657953803226e-06, "loss": 0.0253, "step": 205190 }, { "epoch": 0.8561849604860178, "grad_norm": 0.4106840535091969, "learning_rate": 2.161631616883989e-06, "loss": 0.0141, "step": 205195 }, { "epoch": 0.8562058232010081, "grad_norm": 0.7814690477506782, "learning_rate": 2.1616052809273697e-06, "loss": 0.0252, "step": 205200 }, { "epoch": 0.8562266859159984, "grad_norm": 0.5117278394175627, "learning_rate": 2.161578945933309e-06, "loss": 0.0161, "step": 205205 }, { "epoch": 0.8562475486309886, "grad_norm": 1.3452863577126726, "learning_rate": 2.161552611901748e-06, "loss": 0.0262, "step": 205210 }, { "epoch": 0.856268411345979, "grad_norm": 2.123723733464705, "learning_rate": 2.1615262788326285e-06, "loss": 0.0223, "step": 205215 }, { "epoch": 0.8562892740609692, "grad_norm": 0.7150320947158016, "learning_rate": 2.161499946725892e-06, "loss": 0.0196, "step": 205220 }, { "epoch": 0.8563101367759595, "grad_norm": 0.46749719347715357, "learning_rate": 2.161473615581479e-06, "loss": 0.0185, "step": 205225 }, { "epoch": 0.8563309994909497, "grad_norm": 0.40371874675019354, "learning_rate": 2.161447285399332e-06, "loss": 0.0125, "step": 205230 }, { "epoch": 0.8563518622059401, "grad_norm": 1.7536011632178437, "learning_rate": 2.161420956179392e-06, "loss": 0.0179, "step": 205235 }, { "epoch": 0.8563727249209303, "grad_norm": 1.622367930001756, "learning_rate": 2.1613946279215996e-06, "loss": 0.0234, "step": 205240 }, { "epoch": 0.8563935876359206, "grad_norm": 0.6123395229724915, "learning_rate": 2.161368300625897e-06, "loss": 0.0241, "step": 205245 }, { "epoch": 0.8564144503509109, "grad_norm": 1.0157860052929553, "learning_rate": 2.161341974292226e-06, "loss": 0.0215, "step": 205250 }, { "epoch": 0.8564353130659011, "grad_norm": 1.1288049275293304, "learning_rate": 2.1613156489205274e-06, "loss": 0.0219, "step": 205255 }, { "epoch": 0.8564561757808914, "grad_norm": 0.48495023730864123, "learning_rate": 2.1612893245107424e-06, "loss": 0.0129, "step": 205260 }, { "epoch": 0.8564770384958817, "grad_norm": 0.3500157641038081, "learning_rate": 2.1612630010628133e-06, "loss": 0.0177, "step": 205265 }, { "epoch": 0.856497901210872, "grad_norm": 1.5538063824936672, "learning_rate": 2.1612366785766804e-06, "loss": 0.0255, "step": 205270 }, { "epoch": 0.8565187639258622, "grad_norm": 0.3335282228069611, "learning_rate": 2.161210357052286e-06, "loss": 0.0248, "step": 205275 }, { "epoch": 0.8565396266408525, "grad_norm": 0.72144882314846, "learning_rate": 2.1611840364895713e-06, "loss": 0.0171, "step": 205280 }, { "epoch": 0.8565604893558428, "grad_norm": 0.33126336959100044, "learning_rate": 2.1611577168884775e-06, "loss": 0.0192, "step": 205285 }, { "epoch": 0.8565813520708331, "grad_norm": 0.7422948391944555, "learning_rate": 2.1611313982489465e-06, "loss": 0.0168, "step": 205290 }, { "epoch": 0.8566022147858233, "grad_norm": 0.44405993724237813, "learning_rate": 2.161105080570919e-06, "loss": 0.0169, "step": 205295 }, { "epoch": 0.8566230775008137, "grad_norm": 0.7214617489170194, "learning_rate": 2.1610787638543374e-06, "loss": 0.0189, "step": 205300 }, { "epoch": 0.8566439402158039, "grad_norm": 0.48220447629911317, "learning_rate": 2.1610524480991425e-06, "loss": 0.0171, "step": 205305 }, { "epoch": 0.8566648029307942, "grad_norm": 0.8029725435989753, "learning_rate": 2.161026133305276e-06, "loss": 0.0267, "step": 205310 }, { "epoch": 0.8566856656457845, "grad_norm": 0.5637243100288409, "learning_rate": 2.160999819472679e-06, "loss": 0.0174, "step": 205315 }, { "epoch": 0.8567065283607748, "grad_norm": 0.5867700080728603, "learning_rate": 2.1609735066012943e-06, "loss": 0.0192, "step": 205320 }, { "epoch": 0.856727391075765, "grad_norm": 1.2192911750330777, "learning_rate": 2.1609471946910614e-06, "loss": 0.0178, "step": 205325 }, { "epoch": 0.8567482537907554, "grad_norm": 0.38727149837826125, "learning_rate": 2.160920883741923e-06, "loss": 0.0218, "step": 205330 }, { "epoch": 0.8567691165057456, "grad_norm": 1.132516048229803, "learning_rate": 2.160894573753821e-06, "loss": 0.024, "step": 205335 }, { "epoch": 0.8567899792207359, "grad_norm": 1.6662979057825342, "learning_rate": 2.1608682647266954e-06, "loss": 0.0254, "step": 205340 }, { "epoch": 0.8568108419357261, "grad_norm": 0.9198338467732505, "learning_rate": 2.1608419566604886e-06, "loss": 0.0289, "step": 205345 }, { "epoch": 0.8568317046507165, "grad_norm": 0.5233626410365618, "learning_rate": 2.160815649555143e-06, "loss": 0.0252, "step": 205350 }, { "epoch": 0.8568525673657067, "grad_norm": 1.0731345470532931, "learning_rate": 2.1607893434105985e-06, "loss": 0.0182, "step": 205355 }, { "epoch": 0.8568734300806969, "grad_norm": 0.4101655273049337, "learning_rate": 2.160763038226797e-06, "loss": 0.0159, "step": 205360 }, { "epoch": 0.8568942927956873, "grad_norm": 0.37398234435601657, "learning_rate": 2.1607367340036808e-06, "loss": 0.0122, "step": 205365 }, { "epoch": 0.8569151555106775, "grad_norm": 0.6115461508297994, "learning_rate": 2.160710430741191e-06, "loss": 0.0177, "step": 205370 }, { "epoch": 0.8569360182256678, "grad_norm": 0.7160502345435837, "learning_rate": 2.160684128439269e-06, "loss": 0.0177, "step": 205375 }, { "epoch": 0.8569568809406581, "grad_norm": 0.6374271677127432, "learning_rate": 2.1606578270978564e-06, "loss": 0.0193, "step": 205380 }, { "epoch": 0.8569777436556484, "grad_norm": 0.596562433676967, "learning_rate": 2.1606315267168947e-06, "loss": 0.0207, "step": 205385 }, { "epoch": 0.8569986063706386, "grad_norm": 0.8746801556068061, "learning_rate": 2.1606052272963257e-06, "loss": 0.0208, "step": 205390 }, { "epoch": 0.857019469085629, "grad_norm": 0.5233166492302351, "learning_rate": 2.1605789288360905e-06, "loss": 0.0158, "step": 205395 }, { "epoch": 0.8570403318006192, "grad_norm": 0.4395650776066389, "learning_rate": 2.1605526313361314e-06, "loss": 0.0159, "step": 205400 }, { "epoch": 0.8570611945156095, "grad_norm": 0.7214121308371868, "learning_rate": 2.1605263347963888e-06, "loss": 0.0215, "step": 205405 }, { "epoch": 0.8570820572305997, "grad_norm": 0.37526257675361196, "learning_rate": 2.1605000392168054e-06, "loss": 0.0199, "step": 205410 }, { "epoch": 0.8571029199455901, "grad_norm": 0.5568616701484623, "learning_rate": 2.1604737445973225e-06, "loss": 0.0188, "step": 205415 }, { "epoch": 0.8571237826605803, "grad_norm": 0.30256168866390354, "learning_rate": 2.160447450937881e-06, "loss": 0.0136, "step": 205420 }, { "epoch": 0.8571446453755706, "grad_norm": 0.3308259586868312, "learning_rate": 2.1604211582384232e-06, "loss": 0.0221, "step": 205425 }, { "epoch": 0.8571655080905609, "grad_norm": 0.685441641834571, "learning_rate": 2.1603948664988904e-06, "loss": 0.0239, "step": 205430 }, { "epoch": 0.8571863708055512, "grad_norm": 0.47107201076141364, "learning_rate": 2.1603685757192247e-06, "loss": 0.0218, "step": 205435 }, { "epoch": 0.8572072335205414, "grad_norm": 0.6249633759962597, "learning_rate": 2.1603422858993663e-06, "loss": 0.0174, "step": 205440 }, { "epoch": 0.8572280962355318, "grad_norm": 0.44009357987661474, "learning_rate": 2.1603159970392585e-06, "loss": 0.0187, "step": 205445 }, { "epoch": 0.857248958950522, "grad_norm": 1.355799554830378, "learning_rate": 2.1602897091388424e-06, "loss": 0.024, "step": 205450 }, { "epoch": 0.8572698216655122, "grad_norm": 0.645823890234658, "learning_rate": 2.1602634221980587e-06, "loss": 0.019, "step": 205455 }, { "epoch": 0.8572906843805025, "grad_norm": 0.5616977914110444, "learning_rate": 2.16023713621685e-06, "loss": 0.0204, "step": 205460 }, { "epoch": 0.8573115470954928, "grad_norm": 1.0520440848148787, "learning_rate": 2.1602108511951574e-06, "loss": 0.0233, "step": 205465 }, { "epoch": 0.8573324098104831, "grad_norm": 0.4481296594494664, "learning_rate": 2.160184567132923e-06, "loss": 0.013, "step": 205470 }, { "epoch": 0.8573532725254733, "grad_norm": 1.0945094628146688, "learning_rate": 2.160158284030088e-06, "loss": 0.0202, "step": 205475 }, { "epoch": 0.8573741352404637, "grad_norm": 0.3938936273915958, "learning_rate": 2.1601320018865943e-06, "loss": 0.0179, "step": 205480 }, { "epoch": 0.8573949979554539, "grad_norm": 0.7319698688551421, "learning_rate": 2.1601057207023833e-06, "loss": 0.0228, "step": 205485 }, { "epoch": 0.8574158606704442, "grad_norm": 0.22688566561799636, "learning_rate": 2.160079440477397e-06, "loss": 0.0191, "step": 205490 }, { "epoch": 0.8574367233854345, "grad_norm": 0.5078877861877232, "learning_rate": 2.1600531612115767e-06, "loss": 0.0202, "step": 205495 }, { "epoch": 0.8574575861004248, "grad_norm": 0.39541074535286613, "learning_rate": 2.1600268829048637e-06, "loss": 0.0266, "step": 205500 }, { "epoch": 0.857478448815415, "grad_norm": 0.6238625548693241, "learning_rate": 2.160000605557201e-06, "loss": 0.0238, "step": 205505 }, { "epoch": 0.8574993115304054, "grad_norm": 0.32790327675151415, "learning_rate": 2.1599743291685287e-06, "loss": 0.0229, "step": 205510 }, { "epoch": 0.8575201742453956, "grad_norm": 0.5852819572747124, "learning_rate": 2.1599480537387898e-06, "loss": 0.0221, "step": 205515 }, { "epoch": 0.8575410369603859, "grad_norm": 0.294452088449968, "learning_rate": 2.1599217792679256e-06, "loss": 0.015, "step": 205520 }, { "epoch": 0.8575618996753761, "grad_norm": 0.5622313448190754, "learning_rate": 2.159895505755877e-06, "loss": 0.0168, "step": 205525 }, { "epoch": 0.8575827623903665, "grad_norm": 0.7672585989500831, "learning_rate": 2.1598692332025857e-06, "loss": 0.0197, "step": 205530 }, { "epoch": 0.8576036251053567, "grad_norm": 1.3709990948510247, "learning_rate": 2.159842961607995e-06, "loss": 0.0212, "step": 205535 }, { "epoch": 0.857624487820347, "grad_norm": 1.7346260261896487, "learning_rate": 2.159816690972045e-06, "loss": 0.0287, "step": 205540 }, { "epoch": 0.8576453505353373, "grad_norm": 0.6784451314663419, "learning_rate": 2.1597904212946776e-06, "loss": 0.0215, "step": 205545 }, { "epoch": 0.8576662132503275, "grad_norm": 0.4178848101158866, "learning_rate": 2.1597641525758355e-06, "loss": 0.02, "step": 205550 }, { "epoch": 0.8576870759653178, "grad_norm": 0.42662162752400973, "learning_rate": 2.1597378848154594e-06, "loss": 0.0231, "step": 205555 }, { "epoch": 0.8577079386803081, "grad_norm": 0.5760292318263446, "learning_rate": 2.159711618013492e-06, "loss": 0.0174, "step": 205560 }, { "epoch": 0.8577288013952984, "grad_norm": 1.006718358009424, "learning_rate": 2.1596853521698734e-06, "loss": 0.0203, "step": 205565 }, { "epoch": 0.8577496641102886, "grad_norm": 0.753177928165977, "learning_rate": 2.1596590872845465e-06, "loss": 0.0185, "step": 205570 }, { "epoch": 0.857770526825279, "grad_norm": 0.3380411731527041, "learning_rate": 2.1596328233574528e-06, "loss": 0.0161, "step": 205575 }, { "epoch": 0.8577913895402692, "grad_norm": 0.9112678305570834, "learning_rate": 2.1596065603885346e-06, "loss": 0.0202, "step": 205580 }, { "epoch": 0.8578122522552595, "grad_norm": 0.7488109152136897, "learning_rate": 2.1595802983777326e-06, "loss": 0.0152, "step": 205585 }, { "epoch": 0.8578331149702497, "grad_norm": 0.7797147255607285, "learning_rate": 2.1595540373249897e-06, "loss": 0.0226, "step": 205590 }, { "epoch": 0.8578539776852401, "grad_norm": 2.279323242316162, "learning_rate": 2.159527777230246e-06, "loss": 0.035, "step": 205595 }, { "epoch": 0.8578748404002303, "grad_norm": 0.48165071429303696, "learning_rate": 2.159501518093445e-06, "loss": 0.0182, "step": 205600 }, { "epoch": 0.8578957031152206, "grad_norm": 0.5114920473809053, "learning_rate": 2.1594752599145275e-06, "loss": 0.0149, "step": 205605 }, { "epoch": 0.8579165658302109, "grad_norm": 0.41864233473869666, "learning_rate": 2.1594490026934355e-06, "loss": 0.0213, "step": 205610 }, { "epoch": 0.8579374285452012, "grad_norm": 0.6540760778781113, "learning_rate": 2.159422746430111e-06, "loss": 0.0222, "step": 205615 }, { "epoch": 0.8579582912601914, "grad_norm": 0.8142436123733575, "learning_rate": 2.1593964911244945e-06, "loss": 0.0259, "step": 205620 }, { "epoch": 0.8579791539751818, "grad_norm": 0.6405332847895892, "learning_rate": 2.1593702367765294e-06, "loss": 0.0186, "step": 205625 }, { "epoch": 0.858000016690172, "grad_norm": 0.31086798061886206, "learning_rate": 2.1593439833861575e-06, "loss": 0.0171, "step": 205630 }, { "epoch": 0.8580208794051623, "grad_norm": 0.3219612660574129, "learning_rate": 2.1593177309533194e-06, "loss": 0.0179, "step": 205635 }, { "epoch": 0.8580417421201525, "grad_norm": 0.607864164524488, "learning_rate": 2.1592914794779577e-06, "loss": 0.0192, "step": 205640 }, { "epoch": 0.8580626048351429, "grad_norm": 0.6518342901784024, "learning_rate": 2.1592652289600135e-06, "loss": 0.0182, "step": 205645 }, { "epoch": 0.8580834675501331, "grad_norm": 0.7354994677716845, "learning_rate": 2.15923897939943e-06, "loss": 0.0176, "step": 205650 }, { "epoch": 0.8581043302651233, "grad_norm": 1.176983726698803, "learning_rate": 2.1592127307961473e-06, "loss": 0.0291, "step": 205655 }, { "epoch": 0.8581251929801137, "grad_norm": 1.1899102584614527, "learning_rate": 2.1591864831501084e-06, "loss": 0.0213, "step": 205660 }, { "epoch": 0.8581460556951039, "grad_norm": 1.586388326475743, "learning_rate": 2.1591602364612545e-06, "loss": 0.0267, "step": 205665 }, { "epoch": 0.8581669184100942, "grad_norm": 0.6194333762047101, "learning_rate": 2.1591339907295275e-06, "loss": 0.0164, "step": 205670 }, { "epoch": 0.8581877811250845, "grad_norm": 0.8022893057428512, "learning_rate": 2.1591077459548702e-06, "loss": 0.0214, "step": 205675 }, { "epoch": 0.8582086438400748, "grad_norm": 0.31364400260260095, "learning_rate": 2.1590815021372225e-06, "loss": 0.0162, "step": 205680 }, { "epoch": 0.858229506555065, "grad_norm": 0.5047257050553725, "learning_rate": 2.159055259276528e-06, "loss": 0.0264, "step": 205685 }, { "epoch": 0.8582503692700554, "grad_norm": 0.7687906812414472, "learning_rate": 2.1590290173727276e-06, "loss": 0.0166, "step": 205690 }, { "epoch": 0.8582712319850456, "grad_norm": 1.2603751977826565, "learning_rate": 2.1590027764257636e-06, "loss": 0.0228, "step": 205695 }, { "epoch": 0.8582920947000359, "grad_norm": 0.5216307031461969, "learning_rate": 2.158976536435578e-06, "loss": 0.0196, "step": 205700 }, { "epoch": 0.8583129574150261, "grad_norm": 0.4821131279252124, "learning_rate": 2.1589502974021115e-06, "loss": 0.0191, "step": 205705 }, { "epoch": 0.8583338201300165, "grad_norm": 0.7369512746184418, "learning_rate": 2.1589240593253076e-06, "loss": 0.0228, "step": 205710 }, { "epoch": 0.8583546828450067, "grad_norm": 0.5286966274267443, "learning_rate": 2.158897822205107e-06, "loss": 0.0242, "step": 205715 }, { "epoch": 0.858375545559997, "grad_norm": 1.2296315995905587, "learning_rate": 2.1588715860414524e-06, "loss": 0.0183, "step": 205720 }, { "epoch": 0.8583964082749873, "grad_norm": 0.9938868255480332, "learning_rate": 2.158845350834285e-06, "loss": 0.0232, "step": 205725 }, { "epoch": 0.8584172709899776, "grad_norm": 0.39539069391612497, "learning_rate": 2.1588191165835467e-06, "loss": 0.0189, "step": 205730 }, { "epoch": 0.8584381337049678, "grad_norm": 0.6784079118230936, "learning_rate": 2.15879288328918e-06, "loss": 0.0237, "step": 205735 }, { "epoch": 0.8584589964199582, "grad_norm": 0.8264657520429074, "learning_rate": 2.1587666509511264e-06, "loss": 0.0185, "step": 205740 }, { "epoch": 0.8584798591349484, "grad_norm": 0.36724536492799176, "learning_rate": 2.1587404195693276e-06, "loss": 0.0197, "step": 205745 }, { "epoch": 0.8585007218499386, "grad_norm": 0.6265089822288126, "learning_rate": 2.1587141891437257e-06, "loss": 0.0265, "step": 205750 }, { "epoch": 0.858521584564929, "grad_norm": 2.717685480343017, "learning_rate": 2.1586879596742626e-06, "loss": 0.0172, "step": 205755 }, { "epoch": 0.8585424472799192, "grad_norm": 0.30796466375823883, "learning_rate": 2.1586617311608803e-06, "loss": 0.0197, "step": 205760 }, { "epoch": 0.8585633099949095, "grad_norm": 1.458213413990714, "learning_rate": 2.1586355036035204e-06, "loss": 0.0166, "step": 205765 }, { "epoch": 0.8585841727098997, "grad_norm": 0.8595374552622205, "learning_rate": 2.1586092770021257e-06, "loss": 0.0152, "step": 205770 }, { "epoch": 0.8586050354248901, "grad_norm": 0.5887523494564173, "learning_rate": 2.1585830513566373e-06, "loss": 0.0242, "step": 205775 }, { "epoch": 0.8586258981398803, "grad_norm": 0.639216227150951, "learning_rate": 2.1585568266669968e-06, "loss": 0.0158, "step": 205780 }, { "epoch": 0.8586467608548706, "grad_norm": 0.5705800413218208, "learning_rate": 2.1585306029331474e-06, "loss": 0.0149, "step": 205785 }, { "epoch": 0.8586676235698609, "grad_norm": 0.6334711530099318, "learning_rate": 2.1585043801550294e-06, "loss": 0.0175, "step": 205790 }, { "epoch": 0.8586884862848512, "grad_norm": 0.6441948790609349, "learning_rate": 2.1584781583325866e-06, "loss": 0.016, "step": 205795 }, { "epoch": 0.8587093489998414, "grad_norm": 0.559896712407296, "learning_rate": 2.15845193746576e-06, "loss": 0.0193, "step": 205800 }, { "epoch": 0.8587302117148318, "grad_norm": 0.4399808068478867, "learning_rate": 2.158425717554491e-06, "loss": 0.017, "step": 205805 }, { "epoch": 0.858751074429822, "grad_norm": 0.7762048390938667, "learning_rate": 2.1583994985987223e-06, "loss": 0.0171, "step": 205810 }, { "epoch": 0.8587719371448123, "grad_norm": 1.0568153369001805, "learning_rate": 2.1583732805983958e-06, "loss": 0.022, "step": 205815 }, { "epoch": 0.8587927998598025, "grad_norm": 0.7707397150179175, "learning_rate": 2.1583470635534533e-06, "loss": 0.0234, "step": 205820 }, { "epoch": 0.8588136625747929, "grad_norm": 0.6598466398867721, "learning_rate": 2.158320847463837e-06, "loss": 0.023, "step": 205825 }, { "epoch": 0.8588345252897831, "grad_norm": 0.32298011608005217, "learning_rate": 2.1582946323294888e-06, "loss": 0.0175, "step": 205830 }, { "epoch": 0.8588553880047733, "grad_norm": 0.21997784139429835, "learning_rate": 2.1582684181503506e-06, "loss": 0.0199, "step": 205835 }, { "epoch": 0.8588762507197637, "grad_norm": 0.5204222272216531, "learning_rate": 2.158242204926364e-06, "loss": 0.0202, "step": 205840 }, { "epoch": 0.858897113434754, "grad_norm": 0.5393649559913276, "learning_rate": 2.1582159926574724e-06, "loss": 0.0244, "step": 205845 }, { "epoch": 0.8589179761497442, "grad_norm": 0.5485004685461645, "learning_rate": 2.158189781343616e-06, "loss": 0.0152, "step": 205850 }, { "epoch": 0.8589388388647345, "grad_norm": 0.9213087392243645, "learning_rate": 2.1581635709847378e-06, "loss": 0.0158, "step": 205855 }, { "epoch": 0.8589597015797248, "grad_norm": 0.6947417871604007, "learning_rate": 2.158137361580779e-06, "loss": 0.0232, "step": 205860 }, { "epoch": 0.858980564294715, "grad_norm": 0.42135129328276766, "learning_rate": 2.158111153131684e-06, "loss": 0.0151, "step": 205865 }, { "epoch": 0.8590014270097054, "grad_norm": 0.5824043327177931, "learning_rate": 2.1580849456373915e-06, "loss": 0.0193, "step": 205870 }, { "epoch": 0.8590222897246956, "grad_norm": 0.4708459983057743, "learning_rate": 2.158058739097845e-06, "loss": 0.0242, "step": 205875 }, { "epoch": 0.8590431524396859, "grad_norm": 1.1389364562268205, "learning_rate": 2.158032533512988e-06, "loss": 0.0335, "step": 205880 }, { "epoch": 0.8590640151546761, "grad_norm": 0.5308969869663769, "learning_rate": 2.1580063288827604e-06, "loss": 0.0217, "step": 205885 }, { "epoch": 0.8590848778696665, "grad_norm": 0.34446846010582655, "learning_rate": 2.157980125207105e-06, "loss": 0.0142, "step": 205890 }, { "epoch": 0.8591057405846567, "grad_norm": 0.7697018243291363, "learning_rate": 2.157953922485964e-06, "loss": 0.0178, "step": 205895 }, { "epoch": 0.859126603299647, "grad_norm": 0.9862069513577026, "learning_rate": 2.157927720719279e-06, "loss": 0.0253, "step": 205900 }, { "epoch": 0.8591474660146373, "grad_norm": 0.6551963833865507, "learning_rate": 2.157901519906993e-06, "loss": 0.025, "step": 205905 }, { "epoch": 0.8591683287296276, "grad_norm": 1.0229964287446844, "learning_rate": 2.157875320049047e-06, "loss": 0.0247, "step": 205910 }, { "epoch": 0.8591891914446178, "grad_norm": 0.6518773081562602, "learning_rate": 2.1578491211453836e-06, "loss": 0.0167, "step": 205915 }, { "epoch": 0.8592100541596082, "grad_norm": 0.2608855087991182, "learning_rate": 2.1578229231959443e-06, "loss": 0.0173, "step": 205920 }, { "epoch": 0.8592309168745984, "grad_norm": 0.5900351171492774, "learning_rate": 2.1577967262006724e-06, "loss": 0.0262, "step": 205925 }, { "epoch": 0.8592517795895886, "grad_norm": 0.6134032301600717, "learning_rate": 2.157770530159509e-06, "loss": 0.0217, "step": 205930 }, { "epoch": 0.859272642304579, "grad_norm": 0.5058334366981507, "learning_rate": 2.157744335072396e-06, "loss": 0.0203, "step": 205935 }, { "epoch": 0.8592935050195692, "grad_norm": 0.5945334855515272, "learning_rate": 2.1577181409392764e-06, "loss": 0.0236, "step": 205940 }, { "epoch": 0.8593143677345595, "grad_norm": 1.4460281121119016, "learning_rate": 2.1576919477600916e-06, "loss": 0.0219, "step": 205945 }, { "epoch": 0.8593352304495497, "grad_norm": 0.5392075580232899, "learning_rate": 2.1576657555347845e-06, "loss": 0.0265, "step": 205950 }, { "epoch": 0.8593560931645401, "grad_norm": 0.5463867260219923, "learning_rate": 2.157639564263296e-06, "loss": 0.0228, "step": 205955 }, { "epoch": 0.8593769558795303, "grad_norm": 0.4293659534027004, "learning_rate": 2.1576133739455687e-06, "loss": 0.0155, "step": 205960 }, { "epoch": 0.8593978185945206, "grad_norm": 0.960146893555203, "learning_rate": 2.157587184581545e-06, "loss": 0.0215, "step": 205965 }, { "epoch": 0.8594186813095109, "grad_norm": 1.3635620787433065, "learning_rate": 2.157560996171167e-06, "loss": 0.0283, "step": 205970 }, { "epoch": 0.8594395440245012, "grad_norm": 0.6181230808259984, "learning_rate": 2.1575348087143763e-06, "loss": 0.0177, "step": 205975 }, { "epoch": 0.8594604067394914, "grad_norm": 1.3159797071145645, "learning_rate": 2.157508622211116e-06, "loss": 0.0275, "step": 205980 }, { "epoch": 0.8594812694544818, "grad_norm": 0.3252214360108841, "learning_rate": 2.157482436661327e-06, "loss": 0.0171, "step": 205985 }, { "epoch": 0.859502132169472, "grad_norm": 0.5445333641080323, "learning_rate": 2.1574562520649524e-06, "loss": 0.0175, "step": 205990 }, { "epoch": 0.8595229948844623, "grad_norm": 0.680157596044769, "learning_rate": 2.1574300684219343e-06, "loss": 0.0173, "step": 205995 }, { "epoch": 0.8595438575994525, "grad_norm": 0.844380950919639, "learning_rate": 2.1574038857322146e-06, "loss": 0.0252, "step": 206000 }, { "epoch": 0.8595647203144429, "grad_norm": 0.664202675073178, "learning_rate": 2.157377703995735e-06, "loss": 0.0251, "step": 206005 }, { "epoch": 0.8595855830294331, "grad_norm": 0.7287308687069527, "learning_rate": 2.1573515232124383e-06, "loss": 0.0197, "step": 206010 }, { "epoch": 0.8596064457444234, "grad_norm": 0.3531411721800627, "learning_rate": 2.157325343382267e-06, "loss": 0.0203, "step": 206015 }, { "epoch": 0.8596273084594137, "grad_norm": 2.991304266719096, "learning_rate": 2.157299164505162e-06, "loss": 0.0225, "step": 206020 }, { "epoch": 0.859648171174404, "grad_norm": 0.8523365578801471, "learning_rate": 2.1572729865810664e-06, "loss": 0.0198, "step": 206025 }, { "epoch": 0.8596690338893942, "grad_norm": 0.7547298392193295, "learning_rate": 2.157246809609922e-06, "loss": 0.0258, "step": 206030 }, { "epoch": 0.8596898966043846, "grad_norm": 0.8127823461303332, "learning_rate": 2.1572206335916716e-06, "loss": 0.0173, "step": 206035 }, { "epoch": 0.8597107593193748, "grad_norm": 1.0229891698711453, "learning_rate": 2.1571944585262568e-06, "loss": 0.0216, "step": 206040 }, { "epoch": 0.859731622034365, "grad_norm": 0.5305290036185799, "learning_rate": 2.1571682844136197e-06, "loss": 0.0245, "step": 206045 }, { "epoch": 0.8597524847493554, "grad_norm": 0.7210664861450763, "learning_rate": 2.157142111253703e-06, "loss": 0.0148, "step": 206050 }, { "epoch": 0.8597733474643456, "grad_norm": 0.5801326101209018, "learning_rate": 2.1571159390464487e-06, "loss": 0.0146, "step": 206055 }, { "epoch": 0.8597942101793359, "grad_norm": 0.552411220845877, "learning_rate": 2.157089767791799e-06, "loss": 0.0218, "step": 206060 }, { "epoch": 0.8598150728943261, "grad_norm": 0.5633710913213287, "learning_rate": 2.1570635974896956e-06, "loss": 0.0218, "step": 206065 }, { "epoch": 0.8598359356093165, "grad_norm": 0.5850906451151464, "learning_rate": 2.157037428140082e-06, "loss": 0.0249, "step": 206070 }, { "epoch": 0.8598567983243067, "grad_norm": 0.4945899499339122, "learning_rate": 2.1570112597428993e-06, "loss": 0.0164, "step": 206075 }, { "epoch": 0.859877661039297, "grad_norm": 0.6627354967422396, "learning_rate": 2.1569850922980895e-06, "loss": 0.0205, "step": 206080 }, { "epoch": 0.8598985237542873, "grad_norm": 0.7877385876965403, "learning_rate": 2.156958925805596e-06, "loss": 0.0157, "step": 206085 }, { "epoch": 0.8599193864692776, "grad_norm": 0.519243376962396, "learning_rate": 2.1569327602653605e-06, "loss": 0.0206, "step": 206090 }, { "epoch": 0.8599402491842678, "grad_norm": 0.5099923118441032, "learning_rate": 2.1569065956773243e-06, "loss": 0.0172, "step": 206095 }, { "epoch": 0.8599611118992582, "grad_norm": 0.6259436573972033, "learning_rate": 2.1568804320414316e-06, "loss": 0.0141, "step": 206100 }, { "epoch": 0.8599819746142484, "grad_norm": 0.6961699624594676, "learning_rate": 2.1568542693576233e-06, "loss": 0.0251, "step": 206105 }, { "epoch": 0.8600028373292387, "grad_norm": 0.426271646608278, "learning_rate": 2.156828107625842e-06, "loss": 0.0158, "step": 206110 }, { "epoch": 0.8600237000442289, "grad_norm": 0.6686277207651355, "learning_rate": 2.156801946846029e-06, "loss": 0.0222, "step": 206115 }, { "epoch": 0.8600445627592193, "grad_norm": 1.290538445088498, "learning_rate": 2.1567757870181284e-06, "loss": 0.0211, "step": 206120 }, { "epoch": 0.8600654254742095, "grad_norm": 0.9187979450491699, "learning_rate": 2.1567496281420814e-06, "loss": 0.0151, "step": 206125 }, { "epoch": 0.8600862881891997, "grad_norm": 0.41378737622505324, "learning_rate": 2.1567234702178305e-06, "loss": 0.0122, "step": 206130 }, { "epoch": 0.8601071509041901, "grad_norm": 0.7831937592068028, "learning_rate": 2.156697313245317e-06, "loss": 0.0149, "step": 206135 }, { "epoch": 0.8601280136191803, "grad_norm": 0.34958015148667515, "learning_rate": 2.1566711572244852e-06, "loss": 0.0154, "step": 206140 }, { "epoch": 0.8601488763341706, "grad_norm": 1.149975274559144, "learning_rate": 2.1566450021552758e-06, "loss": 0.0166, "step": 206145 }, { "epoch": 0.8601697390491609, "grad_norm": 0.9144188830723212, "learning_rate": 2.1566188480376316e-06, "loss": 0.0271, "step": 206150 }, { "epoch": 0.8601906017641512, "grad_norm": 1.0751714346113783, "learning_rate": 2.156592694871495e-06, "loss": 0.02, "step": 206155 }, { "epoch": 0.8602114644791414, "grad_norm": 0.20881823274139635, "learning_rate": 2.1565665426568078e-06, "loss": 0.0149, "step": 206160 }, { "epoch": 0.8602323271941318, "grad_norm": 0.5566028478842296, "learning_rate": 2.156540391393513e-06, "loss": 0.0193, "step": 206165 }, { "epoch": 0.860253189909122, "grad_norm": 0.6092082321636458, "learning_rate": 2.156514241081553e-06, "loss": 0.0239, "step": 206170 }, { "epoch": 0.8602740526241123, "grad_norm": 1.04641084138349, "learning_rate": 2.1564880917208688e-06, "loss": 0.0186, "step": 206175 }, { "epoch": 0.8602949153391025, "grad_norm": 0.22120904436546193, "learning_rate": 2.156461943311404e-06, "loss": 0.0203, "step": 206180 }, { "epoch": 0.8603157780540929, "grad_norm": 0.49288893553511814, "learning_rate": 2.156435795853101e-06, "loss": 0.0188, "step": 206185 }, { "epoch": 0.8603366407690831, "grad_norm": 0.6011957951220332, "learning_rate": 2.1564096493459015e-06, "loss": 0.0209, "step": 206190 }, { "epoch": 0.8603575034840734, "grad_norm": 2.0184306452827254, "learning_rate": 2.156383503789748e-06, "loss": 0.0325, "step": 206195 }, { "epoch": 0.8603783661990637, "grad_norm": 0.7124208660788388, "learning_rate": 2.1563573591845827e-06, "loss": 0.0188, "step": 206200 }, { "epoch": 0.860399228914054, "grad_norm": 0.9884469334998583, "learning_rate": 2.1563312155303488e-06, "loss": 0.0281, "step": 206205 }, { "epoch": 0.8604200916290442, "grad_norm": 0.6268226037365169, "learning_rate": 2.1563050728269873e-06, "loss": 0.0193, "step": 206210 }, { "epoch": 0.8604409543440346, "grad_norm": 0.7507000656401559, "learning_rate": 2.156278931074442e-06, "loss": 0.016, "step": 206215 }, { "epoch": 0.8604618170590248, "grad_norm": 1.0123617295017922, "learning_rate": 2.156252790272654e-06, "loss": 0.0279, "step": 206220 }, { "epoch": 0.860482679774015, "grad_norm": 0.7868710209963486, "learning_rate": 2.1562266504215663e-06, "loss": 0.0257, "step": 206225 }, { "epoch": 0.8605035424890054, "grad_norm": 0.6803219493092976, "learning_rate": 2.1562005115211214e-06, "loss": 0.0173, "step": 206230 }, { "epoch": 0.8605244052039956, "grad_norm": 0.4135283094125485, "learning_rate": 2.1561743735712614e-06, "loss": 0.0208, "step": 206235 }, { "epoch": 0.8605452679189859, "grad_norm": 0.7232967676013925, "learning_rate": 2.1561482365719286e-06, "loss": 0.0223, "step": 206240 }, { "epoch": 0.8605661306339761, "grad_norm": 0.42318281918291156, "learning_rate": 2.156122100523066e-06, "loss": 0.0176, "step": 206245 }, { "epoch": 0.8605869933489665, "grad_norm": 1.034028782021386, "learning_rate": 2.1560959654246147e-06, "loss": 0.0254, "step": 206250 }, { "epoch": 0.8606078560639567, "grad_norm": 0.49793826866463314, "learning_rate": 2.156069831276519e-06, "loss": 0.023, "step": 206255 }, { "epoch": 0.860628718778947, "grad_norm": 0.7230350950678379, "learning_rate": 2.156043698078719e-06, "loss": 0.0316, "step": 206260 }, { "epoch": 0.8606495814939373, "grad_norm": 1.0699549605231367, "learning_rate": 2.156017565831159e-06, "loss": 0.0274, "step": 206265 }, { "epoch": 0.8606704442089276, "grad_norm": 0.38653728294802514, "learning_rate": 2.155991434533781e-06, "loss": 0.0185, "step": 206270 }, { "epoch": 0.8606913069239178, "grad_norm": 0.40595019050088454, "learning_rate": 2.1559653041865274e-06, "loss": 0.0169, "step": 206275 }, { "epoch": 0.8607121696389082, "grad_norm": 0.5082966879404632, "learning_rate": 2.15593917478934e-06, "loss": 0.0245, "step": 206280 }, { "epoch": 0.8607330323538984, "grad_norm": 0.48897252681196973, "learning_rate": 2.1559130463421616e-06, "loss": 0.0181, "step": 206285 }, { "epoch": 0.8607538950688887, "grad_norm": 0.9059922031240903, "learning_rate": 2.155886918844935e-06, "loss": 0.0247, "step": 206290 }, { "epoch": 0.8607747577838789, "grad_norm": 0.6212519779925599, "learning_rate": 2.1558607922976017e-06, "loss": 0.0217, "step": 206295 }, { "epoch": 0.8607956204988693, "grad_norm": 0.5532452449341928, "learning_rate": 2.1558346667001053e-06, "loss": 0.0178, "step": 206300 }, { "epoch": 0.8608164832138595, "grad_norm": 0.7865169590928253, "learning_rate": 2.1558085420523873e-06, "loss": 0.0278, "step": 206305 }, { "epoch": 0.8608373459288498, "grad_norm": 0.8628808084456157, "learning_rate": 2.155782418354391e-06, "loss": 0.0226, "step": 206310 }, { "epoch": 0.8608582086438401, "grad_norm": 0.5481414236605248, "learning_rate": 2.1557562956060586e-06, "loss": 0.0206, "step": 206315 }, { "epoch": 0.8608790713588304, "grad_norm": 1.3310860003296987, "learning_rate": 2.1557301738073317e-06, "loss": 0.0182, "step": 206320 }, { "epoch": 0.8608999340738206, "grad_norm": 0.7790384411302193, "learning_rate": 2.155704052958154e-06, "loss": 0.0226, "step": 206325 }, { "epoch": 0.860920796788811, "grad_norm": 1.072148576136284, "learning_rate": 2.155677933058467e-06, "loss": 0.0345, "step": 206330 }, { "epoch": 0.8609416595038012, "grad_norm": 0.7466651907555976, "learning_rate": 2.155651814108214e-06, "loss": 0.0196, "step": 206335 }, { "epoch": 0.8609625222187914, "grad_norm": 0.39053437834551213, "learning_rate": 2.155625696107337e-06, "loss": 0.0253, "step": 206340 }, { "epoch": 0.8609833849337818, "grad_norm": 0.6713114057610847, "learning_rate": 2.1555995790557784e-06, "loss": 0.0241, "step": 206345 }, { "epoch": 0.861004247648772, "grad_norm": 0.56249453852107, "learning_rate": 2.1555734629534815e-06, "loss": 0.0158, "step": 206350 }, { "epoch": 0.8610251103637623, "grad_norm": 0.4861407891159046, "learning_rate": 2.155547347800387e-06, "loss": 0.0149, "step": 206355 }, { "epoch": 0.8610459730787525, "grad_norm": 0.41954037220127954, "learning_rate": 2.1555212335964397e-06, "loss": 0.032, "step": 206360 }, { "epoch": 0.8610668357937429, "grad_norm": 0.6696840770057011, "learning_rate": 2.1554951203415805e-06, "loss": 0.021, "step": 206365 }, { "epoch": 0.8610876985087331, "grad_norm": 0.6746634982503171, "learning_rate": 2.155469008035752e-06, "loss": 0.0148, "step": 206370 }, { "epoch": 0.8611085612237234, "grad_norm": 0.6956184226197454, "learning_rate": 2.155442896678898e-06, "loss": 0.0222, "step": 206375 }, { "epoch": 0.8611294239387137, "grad_norm": 0.48145182978632034, "learning_rate": 2.1554167862709598e-06, "loss": 0.0195, "step": 206380 }, { "epoch": 0.861150286653704, "grad_norm": 0.6373947964256431, "learning_rate": 2.1553906768118796e-06, "loss": 0.0166, "step": 206385 }, { "epoch": 0.8611711493686942, "grad_norm": 0.8852960929815104, "learning_rate": 2.1553645683016013e-06, "loss": 0.0183, "step": 206390 }, { "epoch": 0.8611920120836846, "grad_norm": 0.54743224569522, "learning_rate": 2.1553384607400668e-06, "loss": 0.0258, "step": 206395 }, { "epoch": 0.8612128747986748, "grad_norm": 0.7693527667733613, "learning_rate": 2.155312354127218e-06, "loss": 0.0223, "step": 206400 }, { "epoch": 0.861233737513665, "grad_norm": 0.7427012550657545, "learning_rate": 2.1552862484629984e-06, "loss": 0.0187, "step": 206405 }, { "epoch": 0.8612546002286554, "grad_norm": 0.6672655569000261, "learning_rate": 2.15526014374735e-06, "loss": 0.0128, "step": 206410 }, { "epoch": 0.8612754629436457, "grad_norm": 0.9850186447244437, "learning_rate": 2.155234039980216e-06, "loss": 0.0275, "step": 206415 }, { "epoch": 0.8612963256586359, "grad_norm": 0.6919397556927974, "learning_rate": 2.1552079371615377e-06, "loss": 0.0347, "step": 206420 }, { "epoch": 0.8613171883736261, "grad_norm": 0.6578132015321905, "learning_rate": 2.155181835291259e-06, "loss": 0.0236, "step": 206425 }, { "epoch": 0.8613380510886165, "grad_norm": 0.5470489269550097, "learning_rate": 2.155155734369322e-06, "loss": 0.0191, "step": 206430 }, { "epoch": 0.8613589138036067, "grad_norm": 0.5155259822361975, "learning_rate": 2.1551296343956686e-06, "loss": 0.0213, "step": 206435 }, { "epoch": 0.861379776518597, "grad_norm": 0.9131754690064231, "learning_rate": 2.1551035353702425e-06, "loss": 0.0145, "step": 206440 }, { "epoch": 0.8614006392335873, "grad_norm": 1.1360358251350986, "learning_rate": 2.155077437292986e-06, "loss": 0.0164, "step": 206445 }, { "epoch": 0.8614215019485776, "grad_norm": 0.5548193659737002, "learning_rate": 2.1550513401638407e-06, "loss": 0.0222, "step": 206450 }, { "epoch": 0.8614423646635678, "grad_norm": 0.6718934893940894, "learning_rate": 2.15502524398275e-06, "loss": 0.0224, "step": 206455 }, { "epoch": 0.8614632273785582, "grad_norm": 0.6513519065933927, "learning_rate": 2.1549991487496576e-06, "loss": 0.0225, "step": 206460 }, { "epoch": 0.8614840900935484, "grad_norm": 0.8861059347458692, "learning_rate": 2.1549730544645035e-06, "loss": 0.0267, "step": 206465 }, { "epoch": 0.8615049528085387, "grad_norm": 0.6653096468483913, "learning_rate": 2.1549469611272327e-06, "loss": 0.0223, "step": 206470 }, { "epoch": 0.8615258155235289, "grad_norm": 0.554649471097252, "learning_rate": 2.1549208687377867e-06, "loss": 0.0208, "step": 206475 }, { "epoch": 0.8615466782385193, "grad_norm": 1.09102567232148, "learning_rate": 2.154894777296108e-06, "loss": 0.0203, "step": 206480 }, { "epoch": 0.8615675409535095, "grad_norm": 0.6457393966956283, "learning_rate": 2.1548686868021394e-06, "loss": 0.0201, "step": 206485 }, { "epoch": 0.8615884036684998, "grad_norm": 0.36175889245615883, "learning_rate": 2.1548425972558242e-06, "loss": 0.0173, "step": 206490 }, { "epoch": 0.8616092663834901, "grad_norm": 1.3684472892578765, "learning_rate": 2.154816508657104e-06, "loss": 0.0279, "step": 206495 }, { "epoch": 0.8616301290984804, "grad_norm": 0.44567701005750704, "learning_rate": 2.1547904210059224e-06, "loss": 0.016, "step": 206500 }, { "epoch": 0.8616509918134706, "grad_norm": 0.9941468444212105, "learning_rate": 2.154764334302221e-06, "loss": 0.0206, "step": 206505 }, { "epoch": 0.861671854528461, "grad_norm": 0.928795277252861, "learning_rate": 2.1547382485459434e-06, "loss": 0.0272, "step": 206510 }, { "epoch": 0.8616927172434512, "grad_norm": 0.44849054432188457, "learning_rate": 2.154712163737032e-06, "loss": 0.0185, "step": 206515 }, { "epoch": 0.8617135799584414, "grad_norm": 0.6081652165838006, "learning_rate": 2.154686079875429e-06, "loss": 0.0247, "step": 206520 }, { "epoch": 0.8617344426734318, "grad_norm": 0.6378707222194775, "learning_rate": 2.1546599969610775e-06, "loss": 0.0181, "step": 206525 }, { "epoch": 0.861755305388422, "grad_norm": 0.9685079842989022, "learning_rate": 2.1546339149939204e-06, "loss": 0.0275, "step": 206530 }, { "epoch": 0.8617761681034123, "grad_norm": 0.721743901142697, "learning_rate": 2.1546078339738994e-06, "loss": 0.0282, "step": 206535 }, { "epoch": 0.8617970308184025, "grad_norm": 0.6447100745727106, "learning_rate": 2.154581753900958e-06, "loss": 0.0258, "step": 206540 }, { "epoch": 0.8618178935333929, "grad_norm": 0.5528753664363958, "learning_rate": 2.1545556747750386e-06, "loss": 0.0181, "step": 206545 }, { "epoch": 0.8618387562483831, "grad_norm": 0.5482054887096354, "learning_rate": 2.1545295965960843e-06, "loss": 0.0159, "step": 206550 }, { "epoch": 0.8618596189633734, "grad_norm": 0.26468172218059893, "learning_rate": 2.1545035193640374e-06, "loss": 0.0189, "step": 206555 }, { "epoch": 0.8618804816783637, "grad_norm": 0.5304244708622277, "learning_rate": 2.1544774430788404e-06, "loss": 0.0195, "step": 206560 }, { "epoch": 0.861901344393354, "grad_norm": 0.7727719033575061, "learning_rate": 2.1544513677404366e-06, "loss": 0.0275, "step": 206565 }, { "epoch": 0.8619222071083442, "grad_norm": 0.8482970102295249, "learning_rate": 2.154425293348768e-06, "loss": 0.0253, "step": 206570 }, { "epoch": 0.8619430698233346, "grad_norm": 0.9317784695833893, "learning_rate": 2.154399219903778e-06, "loss": 0.023, "step": 206575 }, { "epoch": 0.8619639325383248, "grad_norm": 0.4625416701284987, "learning_rate": 2.154373147405409e-06, "loss": 0.0205, "step": 206580 }, { "epoch": 0.8619847952533151, "grad_norm": 0.5383602123701965, "learning_rate": 2.154347075853604e-06, "loss": 0.02, "step": 206585 }, { "epoch": 0.8620056579683054, "grad_norm": 0.5996630979150251, "learning_rate": 2.1543210052483053e-06, "loss": 0.0176, "step": 206590 }, { "epoch": 0.8620265206832957, "grad_norm": 0.8096512262012634, "learning_rate": 2.154294935589455e-06, "loss": 0.0293, "step": 206595 }, { "epoch": 0.8620473833982859, "grad_norm": 0.37235238370930485, "learning_rate": 2.154268866876998e-06, "loss": 0.0258, "step": 206600 }, { "epoch": 0.8620682461132761, "grad_norm": 1.0166279255902626, "learning_rate": 2.1542427991108745e-06, "loss": 0.0228, "step": 206605 }, { "epoch": 0.8620891088282665, "grad_norm": 0.964690151003151, "learning_rate": 2.154216732291029e-06, "loss": 0.0284, "step": 206610 }, { "epoch": 0.8621099715432567, "grad_norm": 0.5867881404923302, "learning_rate": 2.1541906664174033e-06, "loss": 0.0253, "step": 206615 }, { "epoch": 0.862130834258247, "grad_norm": 0.38915965938445396, "learning_rate": 2.1541646014899407e-06, "loss": 0.0205, "step": 206620 }, { "epoch": 0.8621516969732373, "grad_norm": 0.7800637184279667, "learning_rate": 2.1541385375085838e-06, "loss": 0.0209, "step": 206625 }, { "epoch": 0.8621725596882276, "grad_norm": 0.4471722090445745, "learning_rate": 2.1541124744732754e-06, "loss": 0.0185, "step": 206630 }, { "epoch": 0.8621934224032178, "grad_norm": 0.6397686096021026, "learning_rate": 2.1540864123839576e-06, "loss": 0.0239, "step": 206635 }, { "epoch": 0.8622142851182082, "grad_norm": 0.9280134804966298, "learning_rate": 2.1540603512405744e-06, "loss": 0.0248, "step": 206640 }, { "epoch": 0.8622351478331984, "grad_norm": 0.6509826761567786, "learning_rate": 2.154034291043068e-06, "loss": 0.0235, "step": 206645 }, { "epoch": 0.8622560105481887, "grad_norm": 0.604776116332952, "learning_rate": 2.154008231791381e-06, "loss": 0.0225, "step": 206650 }, { "epoch": 0.8622768732631789, "grad_norm": 0.6900504786896517, "learning_rate": 2.1539821734854567e-06, "loss": 0.0216, "step": 206655 }, { "epoch": 0.8622977359781693, "grad_norm": 0.7361558352557674, "learning_rate": 2.153956116125237e-06, "loss": 0.023, "step": 206660 }, { "epoch": 0.8623185986931595, "grad_norm": 0.31532594071359543, "learning_rate": 2.153930059710665e-06, "loss": 0.0182, "step": 206665 }, { "epoch": 0.8623394614081498, "grad_norm": 0.6118836556068592, "learning_rate": 2.1539040042416847e-06, "loss": 0.0131, "step": 206670 }, { "epoch": 0.8623603241231401, "grad_norm": 1.0286583604668729, "learning_rate": 2.153877949718237e-06, "loss": 0.0275, "step": 206675 }, { "epoch": 0.8623811868381304, "grad_norm": 0.8039992349996838, "learning_rate": 2.153851896140266e-06, "loss": 0.0217, "step": 206680 }, { "epoch": 0.8624020495531206, "grad_norm": 0.574074165620237, "learning_rate": 2.1538258435077144e-06, "loss": 0.0261, "step": 206685 }, { "epoch": 0.862422912268111, "grad_norm": 0.5773855835630112, "learning_rate": 2.1537997918205244e-06, "loss": 0.0227, "step": 206690 }, { "epoch": 0.8624437749831012, "grad_norm": 0.37919794316114325, "learning_rate": 2.153773741078639e-06, "loss": 0.0171, "step": 206695 }, { "epoch": 0.8624646376980915, "grad_norm": 0.3471367536223731, "learning_rate": 2.153747691282002e-06, "loss": 0.0187, "step": 206700 }, { "epoch": 0.8624855004130818, "grad_norm": 0.5284261083992349, "learning_rate": 2.1537216424305555e-06, "loss": 0.0104, "step": 206705 }, { "epoch": 0.862506363128072, "grad_norm": 0.6349896566515494, "learning_rate": 2.1536955945242415e-06, "loss": 0.0217, "step": 206710 }, { "epoch": 0.8625272258430623, "grad_norm": 0.6010629662799579, "learning_rate": 2.153669547563004e-06, "loss": 0.0179, "step": 206715 }, { "epoch": 0.8625480885580525, "grad_norm": 0.46488486860330064, "learning_rate": 2.153643501546786e-06, "loss": 0.0177, "step": 206720 }, { "epoch": 0.8625689512730429, "grad_norm": 0.5783461846500271, "learning_rate": 2.15361745647553e-06, "loss": 0.0174, "step": 206725 }, { "epoch": 0.8625898139880331, "grad_norm": 0.3047589567915345, "learning_rate": 2.153591412349178e-06, "loss": 0.0171, "step": 206730 }, { "epoch": 0.8626106767030234, "grad_norm": 1.3849771317002608, "learning_rate": 2.153565369167674e-06, "loss": 0.0202, "step": 206735 }, { "epoch": 0.8626315394180137, "grad_norm": 0.7833145733181135, "learning_rate": 2.1535393269309603e-06, "loss": 0.016, "step": 206740 }, { "epoch": 0.862652402133004, "grad_norm": 0.8554352795669041, "learning_rate": 2.1535132856389805e-06, "loss": 0.0151, "step": 206745 }, { "epoch": 0.8626732648479942, "grad_norm": 0.3962050926650924, "learning_rate": 2.1534872452916762e-06, "loss": 0.0155, "step": 206750 }, { "epoch": 0.8626941275629846, "grad_norm": 0.5210391939339432, "learning_rate": 2.1534612058889916e-06, "loss": 0.0165, "step": 206755 }, { "epoch": 0.8627149902779748, "grad_norm": 0.5012220830583174, "learning_rate": 2.153435167430869e-06, "loss": 0.023, "step": 206760 }, { "epoch": 0.8627358529929651, "grad_norm": 0.47692734171387496, "learning_rate": 2.1534091299172515e-06, "loss": 0.0159, "step": 206765 }, { "epoch": 0.8627567157079554, "grad_norm": 0.5359167150637492, "learning_rate": 2.1533830933480814e-06, "loss": 0.0247, "step": 206770 }, { "epoch": 0.8627775784229457, "grad_norm": 0.7279283961450402, "learning_rate": 2.153357057723302e-06, "loss": 0.0201, "step": 206775 }, { "epoch": 0.8627984411379359, "grad_norm": 0.5467571295456155, "learning_rate": 2.1533310230428562e-06, "loss": 0.0197, "step": 206780 }, { "epoch": 0.8628193038529262, "grad_norm": 0.5572646546340243, "learning_rate": 2.1533049893066873e-06, "loss": 0.0249, "step": 206785 }, { "epoch": 0.8628401665679165, "grad_norm": 0.7383086190143534, "learning_rate": 2.153278956514738e-06, "loss": 0.0233, "step": 206790 }, { "epoch": 0.8628610292829068, "grad_norm": 0.688946756509654, "learning_rate": 2.1532529246669513e-06, "loss": 0.0238, "step": 206795 }, { "epoch": 0.862881891997897, "grad_norm": 1.0430685502346018, "learning_rate": 2.153226893763269e-06, "loss": 0.0294, "step": 206800 }, { "epoch": 0.8629027547128874, "grad_norm": 0.4619584322838067, "learning_rate": 2.1532008638036355e-06, "loss": 0.0209, "step": 206805 }, { "epoch": 0.8629236174278776, "grad_norm": 0.7954341790169616, "learning_rate": 2.1531748347879934e-06, "loss": 0.019, "step": 206810 }, { "epoch": 0.8629444801428678, "grad_norm": 0.4954824630667349, "learning_rate": 2.153148806716285e-06, "loss": 0.0247, "step": 206815 }, { "epoch": 0.8629653428578582, "grad_norm": 0.48551568814153717, "learning_rate": 2.153122779588454e-06, "loss": 0.0136, "step": 206820 }, { "epoch": 0.8629862055728484, "grad_norm": 0.6108159697108514, "learning_rate": 2.153096753404443e-06, "loss": 0.0166, "step": 206825 }, { "epoch": 0.8630070682878387, "grad_norm": 0.5213380373536584, "learning_rate": 2.153070728164195e-06, "loss": 0.0214, "step": 206830 }, { "epoch": 0.8630279310028289, "grad_norm": 0.5916596181404512, "learning_rate": 2.1530447038676527e-06, "loss": 0.0189, "step": 206835 }, { "epoch": 0.8630487937178193, "grad_norm": 0.6983999348316889, "learning_rate": 2.1530186805147597e-06, "loss": 0.0208, "step": 206840 }, { "epoch": 0.8630696564328095, "grad_norm": 0.32448104324311167, "learning_rate": 2.152992658105459e-06, "loss": 0.0228, "step": 206845 }, { "epoch": 0.8630905191477998, "grad_norm": 0.8641340786389774, "learning_rate": 2.1529666366396925e-06, "loss": 0.0206, "step": 206850 }, { "epoch": 0.8631113818627901, "grad_norm": 0.8125868650640976, "learning_rate": 2.152940616117404e-06, "loss": 0.0229, "step": 206855 }, { "epoch": 0.8631322445777804, "grad_norm": 0.6808026501616153, "learning_rate": 2.152914596538536e-06, "loss": 0.026, "step": 206860 }, { "epoch": 0.8631531072927706, "grad_norm": 0.7997656135746863, "learning_rate": 2.152888577903032e-06, "loss": 0.0213, "step": 206865 }, { "epoch": 0.863173970007761, "grad_norm": 0.6703311415860433, "learning_rate": 2.1528625602108357e-06, "loss": 0.0165, "step": 206870 }, { "epoch": 0.8631948327227512, "grad_norm": 0.4920657855516914, "learning_rate": 2.152836543461889e-06, "loss": 0.0248, "step": 206875 }, { "epoch": 0.8632156954377415, "grad_norm": 0.8393191354352825, "learning_rate": 2.1528105276561347e-06, "loss": 0.0265, "step": 206880 }, { "epoch": 0.8632365581527318, "grad_norm": 0.5663208269945335, "learning_rate": 2.1527845127935162e-06, "loss": 0.0157, "step": 206885 }, { "epoch": 0.8632574208677221, "grad_norm": 0.9605388769164771, "learning_rate": 2.152758498873977e-06, "loss": 0.0299, "step": 206890 }, { "epoch": 0.8632782835827123, "grad_norm": 0.4333172817012705, "learning_rate": 2.1527324858974595e-06, "loss": 0.0212, "step": 206895 }, { "epoch": 0.8632991462977025, "grad_norm": 0.7917705327020593, "learning_rate": 2.1527064738639074e-06, "loss": 0.0193, "step": 206900 }, { "epoch": 0.8633200090126929, "grad_norm": 0.6274278612585473, "learning_rate": 2.1526804627732625e-06, "loss": 0.0281, "step": 206905 }, { "epoch": 0.8633408717276831, "grad_norm": 0.3407824391202936, "learning_rate": 2.152654452625469e-06, "loss": 0.0114, "step": 206910 }, { "epoch": 0.8633617344426734, "grad_norm": 0.469617803988058, "learning_rate": 2.1526284434204696e-06, "loss": 0.0186, "step": 206915 }, { "epoch": 0.8633825971576637, "grad_norm": 0.6736871576988566, "learning_rate": 2.152602435158207e-06, "loss": 0.0177, "step": 206920 }, { "epoch": 0.863403459872654, "grad_norm": 0.36158735607539455, "learning_rate": 2.152576427838625e-06, "loss": 0.0225, "step": 206925 }, { "epoch": 0.8634243225876442, "grad_norm": 0.30455107825956973, "learning_rate": 2.152550421461666e-06, "loss": 0.025, "step": 206930 }, { "epoch": 0.8634451853026346, "grad_norm": 0.40993867257824207, "learning_rate": 2.152524416027273e-06, "loss": 0.0279, "step": 206935 }, { "epoch": 0.8634660480176248, "grad_norm": 0.5983612802224688, "learning_rate": 2.1524984115353896e-06, "loss": 0.0247, "step": 206940 }, { "epoch": 0.8634869107326151, "grad_norm": 0.7625448403265189, "learning_rate": 2.1524724079859583e-06, "loss": 0.0197, "step": 206945 }, { "epoch": 0.8635077734476054, "grad_norm": 1.0564927710238503, "learning_rate": 2.1524464053789227e-06, "loss": 0.0227, "step": 206950 }, { "epoch": 0.8635286361625957, "grad_norm": 0.6172167214718647, "learning_rate": 2.1524204037142253e-06, "loss": 0.0175, "step": 206955 }, { "epoch": 0.8635494988775859, "grad_norm": 0.7720309078607981, "learning_rate": 2.15239440299181e-06, "loss": 0.0221, "step": 206960 }, { "epoch": 0.8635703615925762, "grad_norm": 0.5480984923866475, "learning_rate": 2.1523684032116195e-06, "loss": 0.0233, "step": 206965 }, { "epoch": 0.8635912243075665, "grad_norm": 0.5431092667289392, "learning_rate": 2.152342404373596e-06, "loss": 0.0164, "step": 206970 }, { "epoch": 0.8636120870225568, "grad_norm": 0.47414488255330894, "learning_rate": 2.152316406477685e-06, "loss": 0.021, "step": 206975 }, { "epoch": 0.863632949737547, "grad_norm": 0.6453410493249084, "learning_rate": 2.1522904095238263e-06, "loss": 0.02, "step": 206980 }, { "epoch": 0.8636538124525374, "grad_norm": 0.3949800009773211, "learning_rate": 2.1522644135119655e-06, "loss": 0.0227, "step": 206985 }, { "epoch": 0.8636746751675276, "grad_norm": 0.2670541181145611, "learning_rate": 2.1522384184420446e-06, "loss": 0.0201, "step": 206990 }, { "epoch": 0.8636955378825178, "grad_norm": 0.8023716223460527, "learning_rate": 2.1522124243140075e-06, "loss": 0.0233, "step": 206995 }, { "epoch": 0.8637164005975082, "grad_norm": 0.7301053061099874, "learning_rate": 2.1521864311277964e-06, "loss": 0.0227, "step": 207000 }, { "epoch": 0.8637372633124984, "grad_norm": 1.2366698424513096, "learning_rate": 2.152160438883355e-06, "loss": 0.0241, "step": 207005 }, { "epoch": 0.8637581260274887, "grad_norm": 0.4851559519686613, "learning_rate": 2.1521344475806265e-06, "loss": 0.02, "step": 207010 }, { "epoch": 0.8637789887424789, "grad_norm": 0.3407780165129942, "learning_rate": 2.1521084572195537e-06, "loss": 0.0207, "step": 207015 }, { "epoch": 0.8637998514574693, "grad_norm": 0.6329788965434804, "learning_rate": 2.1520824678000804e-06, "loss": 0.0245, "step": 207020 }, { "epoch": 0.8638207141724595, "grad_norm": 0.40122185138168415, "learning_rate": 2.152056479322149e-06, "loss": 0.0214, "step": 207025 }, { "epoch": 0.8638415768874498, "grad_norm": 0.7504557754549147, "learning_rate": 2.1520304917857023e-06, "loss": 0.0191, "step": 207030 }, { "epoch": 0.8638624396024401, "grad_norm": 0.7672443415959461, "learning_rate": 2.152004505190685e-06, "loss": 0.0207, "step": 207035 }, { "epoch": 0.8638833023174304, "grad_norm": 0.549717157549507, "learning_rate": 2.1519785195370384e-06, "loss": 0.0173, "step": 207040 }, { "epoch": 0.8639041650324206, "grad_norm": 0.41829234491679734, "learning_rate": 2.151952534824707e-06, "loss": 0.0143, "step": 207045 }, { "epoch": 0.863925027747411, "grad_norm": 0.5147560163796215, "learning_rate": 2.151926551053634e-06, "loss": 0.0232, "step": 207050 }, { "epoch": 0.8639458904624012, "grad_norm": 0.7024320287101802, "learning_rate": 2.1519005682237616e-06, "loss": 0.0156, "step": 207055 }, { "epoch": 0.8639667531773915, "grad_norm": 0.7505729786569413, "learning_rate": 2.1518745863350337e-06, "loss": 0.02, "step": 207060 }, { "epoch": 0.8639876158923818, "grad_norm": 0.41403208532554, "learning_rate": 2.1518486053873934e-06, "loss": 0.0198, "step": 207065 }, { "epoch": 0.8640084786073721, "grad_norm": 0.6114466318112627, "learning_rate": 2.1518226253807838e-06, "loss": 0.0188, "step": 207070 }, { "epoch": 0.8640293413223623, "grad_norm": 0.3729287663474298, "learning_rate": 2.1517966463151477e-06, "loss": 0.0162, "step": 207075 }, { "epoch": 0.8640502040373526, "grad_norm": 0.7410210054933168, "learning_rate": 2.1517706681904293e-06, "loss": 0.0178, "step": 207080 }, { "epoch": 0.8640710667523429, "grad_norm": 0.18084548647285212, "learning_rate": 2.1517446910065705e-06, "loss": 0.023, "step": 207085 }, { "epoch": 0.8640919294673332, "grad_norm": 0.6371351627742432, "learning_rate": 2.1517187147635155e-06, "loss": 0.0234, "step": 207090 }, { "epoch": 0.8641127921823234, "grad_norm": 0.5726014059236475, "learning_rate": 2.151692739461207e-06, "loss": 0.0214, "step": 207095 }, { "epoch": 0.8641336548973138, "grad_norm": 0.6427518334707532, "learning_rate": 2.1516667650995887e-06, "loss": 0.0201, "step": 207100 }, { "epoch": 0.864154517612304, "grad_norm": 0.3995051886402342, "learning_rate": 2.1516407916786035e-06, "loss": 0.0169, "step": 207105 }, { "epoch": 0.8641753803272942, "grad_norm": 0.6739681328609868, "learning_rate": 2.1516148191981946e-06, "loss": 0.0218, "step": 207110 }, { "epoch": 0.8641962430422846, "grad_norm": 0.6358908597879557, "learning_rate": 2.1515888476583057e-06, "loss": 0.0176, "step": 207115 }, { "epoch": 0.8642171057572748, "grad_norm": 1.1519257507432665, "learning_rate": 2.151562877058879e-06, "loss": 0.0224, "step": 207120 }, { "epoch": 0.8642379684722651, "grad_norm": 0.6427511613286523, "learning_rate": 2.151536907399859e-06, "loss": 0.0189, "step": 207125 }, { "epoch": 0.8642588311872554, "grad_norm": 1.2227654317925294, "learning_rate": 2.151510938681188e-06, "loss": 0.0243, "step": 207130 }, { "epoch": 0.8642796939022457, "grad_norm": 0.640334252289315, "learning_rate": 2.15148497090281e-06, "loss": 0.0198, "step": 207135 }, { "epoch": 0.8643005566172359, "grad_norm": 1.2450718145205844, "learning_rate": 2.151459004064667e-06, "loss": 0.0162, "step": 207140 }, { "epoch": 0.8643214193322262, "grad_norm": 0.6683858599169246, "learning_rate": 2.1514330381667038e-06, "loss": 0.0182, "step": 207145 }, { "epoch": 0.8643422820472165, "grad_norm": 0.9207983660133057, "learning_rate": 2.151407073208863e-06, "loss": 0.0213, "step": 207150 }, { "epoch": 0.8643631447622068, "grad_norm": 0.26646248375128173, "learning_rate": 2.151381109191087e-06, "loss": 0.0152, "step": 207155 }, { "epoch": 0.864384007477197, "grad_norm": 0.546256136271525, "learning_rate": 2.1513551461133207e-06, "loss": 0.0203, "step": 207160 }, { "epoch": 0.8644048701921874, "grad_norm": 0.6129887352825365, "learning_rate": 2.151329183975507e-06, "loss": 0.0189, "step": 207165 }, { "epoch": 0.8644257329071776, "grad_norm": 0.31070430822517353, "learning_rate": 2.1513032227775878e-06, "loss": 0.0146, "step": 207170 }, { "epoch": 0.8644465956221679, "grad_norm": 0.7660837613199224, "learning_rate": 2.1512772625195077e-06, "loss": 0.0141, "step": 207175 }, { "epoch": 0.8644674583371582, "grad_norm": 0.2794428482337075, "learning_rate": 2.15125130320121e-06, "loss": 0.015, "step": 207180 }, { "epoch": 0.8644883210521485, "grad_norm": 0.6032851020523722, "learning_rate": 2.1512253448226374e-06, "loss": 0.0194, "step": 207185 }, { "epoch": 0.8645091837671387, "grad_norm": 0.589319830229664, "learning_rate": 2.1511993873837337e-06, "loss": 0.0233, "step": 207190 }, { "epoch": 0.8645300464821289, "grad_norm": 0.5013030355825562, "learning_rate": 2.151173430884442e-06, "loss": 0.0209, "step": 207195 }, { "epoch": 0.8645509091971193, "grad_norm": 0.6212907653029812, "learning_rate": 2.1511474753247054e-06, "loss": 0.0147, "step": 207200 }, { "epoch": 0.8645717719121095, "grad_norm": 0.458599241532912, "learning_rate": 2.1511215207044673e-06, "loss": 0.0194, "step": 207205 }, { "epoch": 0.8645926346270998, "grad_norm": 0.7435727236898385, "learning_rate": 2.151095567023671e-06, "loss": 0.0181, "step": 207210 }, { "epoch": 0.8646134973420901, "grad_norm": 0.5404538095588518, "learning_rate": 2.1510696142822603e-06, "loss": 0.0217, "step": 207215 }, { "epoch": 0.8646343600570804, "grad_norm": 0.7156232285740958, "learning_rate": 2.1510436624801783e-06, "loss": 0.0169, "step": 207220 }, { "epoch": 0.8646552227720706, "grad_norm": 1.092303549737264, "learning_rate": 2.1510177116173682e-06, "loss": 0.0392, "step": 207225 }, { "epoch": 0.864676085487061, "grad_norm": 0.48162537969493513, "learning_rate": 2.1509917616937732e-06, "loss": 0.0138, "step": 207230 }, { "epoch": 0.8646969482020512, "grad_norm": 1.3634878756205877, "learning_rate": 2.150965812709337e-06, "loss": 0.0257, "step": 207235 }, { "epoch": 0.8647178109170415, "grad_norm": 0.5494998674805431, "learning_rate": 2.150939864664003e-06, "loss": 0.0173, "step": 207240 }, { "epoch": 0.8647386736320318, "grad_norm": 0.7450300295644393, "learning_rate": 2.150913917557714e-06, "loss": 0.019, "step": 207245 }, { "epoch": 0.8647595363470221, "grad_norm": 0.8516879589873048, "learning_rate": 2.1508879713904137e-06, "loss": 0.0176, "step": 207250 }, { "epoch": 0.8647803990620123, "grad_norm": 0.7766391648726761, "learning_rate": 2.1508620261620456e-06, "loss": 0.021, "step": 207255 }, { "epoch": 0.8648012617770026, "grad_norm": 0.4734501938853005, "learning_rate": 2.1508360818725533e-06, "loss": 0.0175, "step": 207260 }, { "epoch": 0.8648221244919929, "grad_norm": 0.39811660571271057, "learning_rate": 2.150810138521879e-06, "loss": 0.0217, "step": 207265 }, { "epoch": 0.8648429872069832, "grad_norm": 2.2442039926225577, "learning_rate": 2.150784196109968e-06, "loss": 0.031, "step": 207270 }, { "epoch": 0.8648638499219734, "grad_norm": 1.5504582128177706, "learning_rate": 2.1507582546367615e-06, "loss": 0.0204, "step": 207275 }, { "epoch": 0.8648847126369638, "grad_norm": 0.5069612241945454, "learning_rate": 2.1507323141022044e-06, "loss": 0.0195, "step": 207280 }, { "epoch": 0.864905575351954, "grad_norm": 0.8877953585677318, "learning_rate": 2.1507063745062395e-06, "loss": 0.0231, "step": 207285 }, { "epoch": 0.8649264380669442, "grad_norm": 0.8434797083551363, "learning_rate": 2.1506804358488107e-06, "loss": 0.0173, "step": 207290 }, { "epoch": 0.8649473007819346, "grad_norm": 0.5770084261804884, "learning_rate": 2.1506544981298606e-06, "loss": 0.0257, "step": 207295 }, { "epoch": 0.8649681634969248, "grad_norm": 0.48297108610533857, "learning_rate": 2.150628561349334e-06, "loss": 0.0158, "step": 207300 }, { "epoch": 0.8649890262119151, "grad_norm": 0.42065334626480594, "learning_rate": 2.1506026255071727e-06, "loss": 0.0121, "step": 207305 }, { "epoch": 0.8650098889269054, "grad_norm": 0.2998400671205055, "learning_rate": 2.150576690603321e-06, "loss": 0.0189, "step": 207310 }, { "epoch": 0.8650307516418957, "grad_norm": 0.6456391187607836, "learning_rate": 2.150550756637722e-06, "loss": 0.0151, "step": 207315 }, { "epoch": 0.8650516143568859, "grad_norm": 0.4892538985907102, "learning_rate": 2.1505248236103198e-06, "loss": 0.0128, "step": 207320 }, { "epoch": 0.8650724770718762, "grad_norm": 0.43271326314386743, "learning_rate": 2.1504988915210564e-06, "loss": 0.0181, "step": 207325 }, { "epoch": 0.8650933397868665, "grad_norm": 1.1001658788948747, "learning_rate": 2.150472960369877e-06, "loss": 0.0283, "step": 207330 }, { "epoch": 0.8651142025018568, "grad_norm": 0.4145364999747746, "learning_rate": 2.150447030156724e-06, "loss": 0.0182, "step": 207335 }, { "epoch": 0.865135065216847, "grad_norm": 0.5060830832413611, "learning_rate": 2.1504211008815407e-06, "loss": 0.0191, "step": 207340 }, { "epoch": 0.8651559279318374, "grad_norm": 0.7717018193044219, "learning_rate": 2.1503951725442717e-06, "loss": 0.0177, "step": 207345 }, { "epoch": 0.8651767906468276, "grad_norm": 0.30692816741601087, "learning_rate": 2.1503692451448586e-06, "loss": 0.0166, "step": 207350 }, { "epoch": 0.8651976533618179, "grad_norm": 0.3123674277574996, "learning_rate": 2.1503433186832466e-06, "loss": 0.0197, "step": 207355 }, { "epoch": 0.8652185160768082, "grad_norm": 0.5391743772193627, "learning_rate": 2.150317393159378e-06, "loss": 0.0215, "step": 207360 }, { "epoch": 0.8652393787917985, "grad_norm": 0.4576997317008432, "learning_rate": 2.1502914685731973e-06, "loss": 0.0251, "step": 207365 }, { "epoch": 0.8652602415067887, "grad_norm": 0.45211191978826915, "learning_rate": 2.150265544924647e-06, "loss": 0.0197, "step": 207370 }, { "epoch": 0.865281104221779, "grad_norm": 0.3231355269172354, "learning_rate": 2.1502396222136714e-06, "loss": 0.0144, "step": 207375 }, { "epoch": 0.8653019669367693, "grad_norm": 0.45553307517088004, "learning_rate": 2.150213700440213e-06, "loss": 0.0187, "step": 207380 }, { "epoch": 0.8653228296517596, "grad_norm": 1.2920886673619152, "learning_rate": 2.1501877796042166e-06, "loss": 0.0333, "step": 207385 }, { "epoch": 0.8653436923667498, "grad_norm": 0.9545747232338863, "learning_rate": 2.1501618597056246e-06, "loss": 0.0225, "step": 207390 }, { "epoch": 0.8653645550817401, "grad_norm": 1.0632856811566238, "learning_rate": 2.1501359407443813e-06, "loss": 0.0235, "step": 207395 }, { "epoch": 0.8653854177967304, "grad_norm": 0.8794593519591943, "learning_rate": 2.150110022720429e-06, "loss": 0.0224, "step": 207400 }, { "epoch": 0.8654062805117206, "grad_norm": 0.5985402661201421, "learning_rate": 2.1500841056337126e-06, "loss": 0.0278, "step": 207405 }, { "epoch": 0.865427143226711, "grad_norm": 0.24631280078637008, "learning_rate": 2.150058189484175e-06, "loss": 0.0298, "step": 207410 }, { "epoch": 0.8654480059417012, "grad_norm": 0.6005412093006258, "learning_rate": 2.1500322742717592e-06, "loss": 0.0154, "step": 207415 }, { "epoch": 0.8654688686566915, "grad_norm": 0.6454988878671006, "learning_rate": 2.1500063599964102e-06, "loss": 0.0249, "step": 207420 }, { "epoch": 0.8654897313716818, "grad_norm": 1.1726718194900478, "learning_rate": 2.1499804466580702e-06, "loss": 0.0231, "step": 207425 }, { "epoch": 0.8655105940866721, "grad_norm": 0.4362754892013361, "learning_rate": 2.1499545342566825e-06, "loss": 0.0221, "step": 207430 }, { "epoch": 0.8655314568016623, "grad_norm": 0.9576848787260153, "learning_rate": 2.149928622792192e-06, "loss": 0.0176, "step": 207435 }, { "epoch": 0.8655523195166526, "grad_norm": 0.7453663070477922, "learning_rate": 2.149902712264541e-06, "loss": 0.0178, "step": 207440 }, { "epoch": 0.8655731822316429, "grad_norm": 0.6102190920000722, "learning_rate": 2.149876802673674e-06, "loss": 0.0135, "step": 207445 }, { "epoch": 0.8655940449466332, "grad_norm": 0.44915992178068864, "learning_rate": 2.149850894019534e-06, "loss": 0.019, "step": 207450 }, { "epoch": 0.8656149076616234, "grad_norm": 0.5896295757413105, "learning_rate": 2.149824986302065e-06, "loss": 0.0149, "step": 207455 }, { "epoch": 0.8656357703766138, "grad_norm": 0.9714683576527542, "learning_rate": 2.1497990795212097e-06, "loss": 0.0231, "step": 207460 }, { "epoch": 0.865656633091604, "grad_norm": 0.5690740164691238, "learning_rate": 2.1497731736769124e-06, "loss": 0.0215, "step": 207465 }, { "epoch": 0.8656774958065943, "grad_norm": 0.6518586015753169, "learning_rate": 2.1497472687691164e-06, "loss": 0.0203, "step": 207470 }, { "epoch": 0.8656983585215846, "grad_norm": 0.6302082074449021, "learning_rate": 2.1497213647977656e-06, "loss": 0.0172, "step": 207475 }, { "epoch": 0.8657192212365749, "grad_norm": 0.5349086332430651, "learning_rate": 2.1496954617628026e-06, "loss": 0.0109, "step": 207480 }, { "epoch": 0.8657400839515651, "grad_norm": 0.8915696172889632, "learning_rate": 2.1496695596641727e-06, "loss": 0.0231, "step": 207485 }, { "epoch": 0.8657609466665555, "grad_norm": 0.5789279165110266, "learning_rate": 2.149643658501818e-06, "loss": 0.0173, "step": 207490 }, { "epoch": 0.8657818093815457, "grad_norm": 0.5402899927650989, "learning_rate": 2.1496177582756825e-06, "loss": 0.0217, "step": 207495 }, { "epoch": 0.8658026720965359, "grad_norm": 0.8311123829489635, "learning_rate": 2.14959185898571e-06, "loss": 0.0177, "step": 207500 }, { "epoch": 0.8658235348115262, "grad_norm": 0.5839769437212513, "learning_rate": 2.149565960631844e-06, "loss": 0.0162, "step": 207505 }, { "epoch": 0.8658443975265165, "grad_norm": 0.3839677582522246, "learning_rate": 2.149540063214028e-06, "loss": 0.0154, "step": 207510 }, { "epoch": 0.8658652602415068, "grad_norm": 0.634473825616558, "learning_rate": 2.149514166732206e-06, "loss": 0.0163, "step": 207515 }, { "epoch": 0.865886122956497, "grad_norm": 0.16635747682818572, "learning_rate": 2.1494882711863212e-06, "loss": 0.0206, "step": 207520 }, { "epoch": 0.8659069856714874, "grad_norm": 0.8169248505421174, "learning_rate": 2.1494623765763176e-06, "loss": 0.0185, "step": 207525 }, { "epoch": 0.8659278483864776, "grad_norm": 0.29276087355244546, "learning_rate": 2.1494364829021385e-06, "loss": 0.0165, "step": 207530 }, { "epoch": 0.8659487111014679, "grad_norm": 1.031975042303777, "learning_rate": 2.149410590163727e-06, "loss": 0.0164, "step": 207535 }, { "epoch": 0.8659695738164582, "grad_norm": 0.9325074987036068, "learning_rate": 2.1493846983610286e-06, "loss": 0.0209, "step": 207540 }, { "epoch": 0.8659904365314485, "grad_norm": 0.9946919551869162, "learning_rate": 2.149358807493985e-06, "loss": 0.0207, "step": 207545 }, { "epoch": 0.8660112992464387, "grad_norm": 1.1182581277762376, "learning_rate": 2.1493329175625406e-06, "loss": 0.0331, "step": 207550 }, { "epoch": 0.866032161961429, "grad_norm": 0.284213999826459, "learning_rate": 2.1493070285666388e-06, "loss": 0.0209, "step": 207555 }, { "epoch": 0.8660530246764193, "grad_norm": 0.7010982199906249, "learning_rate": 2.149281140506224e-06, "loss": 0.0202, "step": 207560 }, { "epoch": 0.8660738873914096, "grad_norm": 0.7441073439153091, "learning_rate": 2.149255253381239e-06, "loss": 0.0213, "step": 207565 }, { "epoch": 0.8660947501063998, "grad_norm": 0.4472682125405809, "learning_rate": 2.149229367191628e-06, "loss": 0.0173, "step": 207570 }, { "epoch": 0.8661156128213902, "grad_norm": 0.8205032141125691, "learning_rate": 2.1492034819373346e-06, "loss": 0.022, "step": 207575 }, { "epoch": 0.8661364755363804, "grad_norm": 0.6527056225092766, "learning_rate": 2.149177597618302e-06, "loss": 0.0196, "step": 207580 }, { "epoch": 0.8661573382513706, "grad_norm": 0.6923744309515117, "learning_rate": 2.1491517142344745e-06, "loss": 0.0193, "step": 207585 }, { "epoch": 0.866178200966361, "grad_norm": 0.2958587174137203, "learning_rate": 2.1491258317857955e-06, "loss": 0.0217, "step": 207590 }, { "epoch": 0.8661990636813512, "grad_norm": 0.8677757336436672, "learning_rate": 2.1490999502722084e-06, "loss": 0.0215, "step": 207595 }, { "epoch": 0.8662199263963415, "grad_norm": 0.6912817468664594, "learning_rate": 2.1490740696936575e-06, "loss": 0.0208, "step": 207600 }, { "epoch": 0.8662407891113318, "grad_norm": 0.670159693297308, "learning_rate": 2.149048190050086e-06, "loss": 0.0162, "step": 207605 }, { "epoch": 0.8662616518263221, "grad_norm": 0.7925252146015027, "learning_rate": 2.149022311341438e-06, "loss": 0.0255, "step": 207610 }, { "epoch": 0.8662825145413123, "grad_norm": 0.752031794938776, "learning_rate": 2.148996433567657e-06, "loss": 0.0167, "step": 207615 }, { "epoch": 0.8663033772563026, "grad_norm": 0.4588524494576642, "learning_rate": 2.1489705567286868e-06, "loss": 0.0241, "step": 207620 }, { "epoch": 0.8663242399712929, "grad_norm": 0.43179892473153086, "learning_rate": 2.148944680824471e-06, "loss": 0.0136, "step": 207625 }, { "epoch": 0.8663451026862832, "grad_norm": 0.3548151514138949, "learning_rate": 2.1489188058549533e-06, "loss": 0.0164, "step": 207630 }, { "epoch": 0.8663659654012734, "grad_norm": 1.3462367255390681, "learning_rate": 2.1488929318200776e-06, "loss": 0.0184, "step": 207635 }, { "epoch": 0.8663868281162638, "grad_norm": 0.7233680197374606, "learning_rate": 2.148867058719788e-06, "loss": 0.0193, "step": 207640 }, { "epoch": 0.866407690831254, "grad_norm": 0.8997679159955688, "learning_rate": 2.148841186554027e-06, "loss": 0.0228, "step": 207645 }, { "epoch": 0.8664285535462443, "grad_norm": 0.5017904189442304, "learning_rate": 2.1488153153227397e-06, "loss": 0.02, "step": 207650 }, { "epoch": 0.8664494162612346, "grad_norm": 0.7776817477954534, "learning_rate": 2.148789445025869e-06, "loss": 0.0204, "step": 207655 }, { "epoch": 0.8664702789762249, "grad_norm": 0.5482404235444327, "learning_rate": 2.148763575663359e-06, "loss": 0.0152, "step": 207660 }, { "epoch": 0.8664911416912151, "grad_norm": 0.6758128957658892, "learning_rate": 2.1487377072351532e-06, "loss": 0.0203, "step": 207665 }, { "epoch": 0.8665120044062055, "grad_norm": 0.5167960394765493, "learning_rate": 2.148711839741196e-06, "loss": 0.0224, "step": 207670 }, { "epoch": 0.8665328671211957, "grad_norm": 0.4441495998883929, "learning_rate": 2.1486859731814303e-06, "loss": 0.0163, "step": 207675 }, { "epoch": 0.866553729836186, "grad_norm": 0.40654321605699917, "learning_rate": 2.1486601075558007e-06, "loss": 0.0211, "step": 207680 }, { "epoch": 0.8665745925511762, "grad_norm": 0.7163485682796144, "learning_rate": 2.14863424286425e-06, "loss": 0.0187, "step": 207685 }, { "epoch": 0.8665954552661665, "grad_norm": 1.1611468403483778, "learning_rate": 2.148608379106723e-06, "loss": 0.0193, "step": 207690 }, { "epoch": 0.8666163179811568, "grad_norm": 0.6463614215736768, "learning_rate": 2.148582516283163e-06, "loss": 0.0162, "step": 207695 }, { "epoch": 0.866637180696147, "grad_norm": 0.45587145725647954, "learning_rate": 2.1485566543935135e-06, "loss": 0.0125, "step": 207700 }, { "epoch": 0.8666580434111374, "grad_norm": 0.8133950935937043, "learning_rate": 2.1485307934377187e-06, "loss": 0.0206, "step": 207705 }, { "epoch": 0.8666789061261276, "grad_norm": 0.28157430717119153, "learning_rate": 2.1485049334157228e-06, "loss": 0.0175, "step": 207710 }, { "epoch": 0.8666997688411179, "grad_norm": 0.5770575629189302, "learning_rate": 2.148479074327469e-06, "loss": 0.0173, "step": 207715 }, { "epoch": 0.8667206315561082, "grad_norm": 0.6402401877269316, "learning_rate": 2.1484532161729006e-06, "loss": 0.0172, "step": 207720 }, { "epoch": 0.8667414942710985, "grad_norm": 0.6222890352075549, "learning_rate": 2.1484273589519625e-06, "loss": 0.0211, "step": 207725 }, { "epoch": 0.8667623569860887, "grad_norm": 0.5016460074879737, "learning_rate": 2.1484015026645983e-06, "loss": 0.02, "step": 207730 }, { "epoch": 0.866783219701079, "grad_norm": 0.8283728000413482, "learning_rate": 2.1483756473107513e-06, "loss": 0.0199, "step": 207735 }, { "epoch": 0.8668040824160693, "grad_norm": 0.2577644745864428, "learning_rate": 2.1483497928903655e-06, "loss": 0.0158, "step": 207740 }, { "epoch": 0.8668249451310596, "grad_norm": 0.41320104219086556, "learning_rate": 2.1483239394033854e-06, "loss": 0.0191, "step": 207745 }, { "epoch": 0.8668458078460498, "grad_norm": 0.5345014165458133, "learning_rate": 2.148298086849754e-06, "loss": 0.0165, "step": 207750 }, { "epoch": 0.8668666705610402, "grad_norm": 0.6118464680962059, "learning_rate": 2.1482722352294147e-06, "loss": 0.0195, "step": 207755 }, { "epoch": 0.8668875332760304, "grad_norm": 0.47906081071312495, "learning_rate": 2.1482463845423126e-06, "loss": 0.0156, "step": 207760 }, { "epoch": 0.8669083959910207, "grad_norm": 0.8715858617446871, "learning_rate": 2.1482205347883914e-06, "loss": 0.0264, "step": 207765 }, { "epoch": 0.866929258706011, "grad_norm": 0.42539191930247605, "learning_rate": 2.1481946859675942e-06, "loss": 0.02, "step": 207770 }, { "epoch": 0.8669501214210013, "grad_norm": 0.688085605218035, "learning_rate": 2.1481688380798653e-06, "loss": 0.0284, "step": 207775 }, { "epoch": 0.8669709841359915, "grad_norm": 0.8688251780849938, "learning_rate": 2.1481429911251487e-06, "loss": 0.021, "step": 207780 }, { "epoch": 0.8669918468509819, "grad_norm": 0.7145304545391767, "learning_rate": 2.148117145103388e-06, "loss": 0.0216, "step": 207785 }, { "epoch": 0.8670127095659721, "grad_norm": 0.902777923721851, "learning_rate": 2.1480913000145275e-06, "loss": 0.0271, "step": 207790 }, { "epoch": 0.8670335722809623, "grad_norm": 0.39343916602307516, "learning_rate": 2.1480654558585098e-06, "loss": 0.0183, "step": 207795 }, { "epoch": 0.8670544349959526, "grad_norm": 0.2719325296328044, "learning_rate": 2.14803961263528e-06, "loss": 0.0174, "step": 207800 }, { "epoch": 0.8670752977109429, "grad_norm": 0.20666112693788202, "learning_rate": 2.1480137703447824e-06, "loss": 0.0233, "step": 207805 }, { "epoch": 0.8670961604259332, "grad_norm": 0.552355689184608, "learning_rate": 2.1479879289869597e-06, "loss": 0.0173, "step": 207810 }, { "epoch": 0.8671170231409234, "grad_norm": 1.153335173720659, "learning_rate": 2.1479620885617566e-06, "loss": 0.0178, "step": 207815 }, { "epoch": 0.8671378858559138, "grad_norm": 0.4757215519877003, "learning_rate": 2.1479362490691165e-06, "loss": 0.0194, "step": 207820 }, { "epoch": 0.867158748570904, "grad_norm": 0.5268723958821365, "learning_rate": 2.147910410508984e-06, "loss": 0.0242, "step": 207825 }, { "epoch": 0.8671796112858943, "grad_norm": 0.34301516733439685, "learning_rate": 2.1478845728813015e-06, "loss": 0.0201, "step": 207830 }, { "epoch": 0.8672004740008846, "grad_norm": 0.6568362289431767, "learning_rate": 2.147858736186015e-06, "loss": 0.0207, "step": 207835 }, { "epoch": 0.8672213367158749, "grad_norm": 0.34103992223748225, "learning_rate": 2.1478329004230666e-06, "loss": 0.0143, "step": 207840 }, { "epoch": 0.8672421994308651, "grad_norm": 0.836975388544817, "learning_rate": 2.1478070655924014e-06, "loss": 0.0269, "step": 207845 }, { "epoch": 0.8672630621458555, "grad_norm": 0.6331772475466138, "learning_rate": 2.147781231693963e-06, "loss": 0.0243, "step": 207850 }, { "epoch": 0.8672839248608457, "grad_norm": 1.3844344284520347, "learning_rate": 2.1477553987276947e-06, "loss": 0.0219, "step": 207855 }, { "epoch": 0.867304787575836, "grad_norm": 1.325989978384383, "learning_rate": 2.1477295666935418e-06, "loss": 0.0162, "step": 207860 }, { "epoch": 0.8673256502908262, "grad_norm": 0.7778731168876777, "learning_rate": 2.147703735591447e-06, "loss": 0.0168, "step": 207865 }, { "epoch": 0.8673465130058166, "grad_norm": 0.6707682821907004, "learning_rate": 2.147677905421355e-06, "loss": 0.0331, "step": 207870 }, { "epoch": 0.8673673757208068, "grad_norm": 0.5448052376589331, "learning_rate": 2.1476520761832094e-06, "loss": 0.0198, "step": 207875 }, { "epoch": 0.867388238435797, "grad_norm": 0.6154811769838466, "learning_rate": 2.147626247876954e-06, "loss": 0.0195, "step": 207880 }, { "epoch": 0.8674091011507874, "grad_norm": 1.7187448804688765, "learning_rate": 2.1476004205025335e-06, "loss": 0.0229, "step": 207885 }, { "epoch": 0.8674299638657776, "grad_norm": 0.35168529104359997, "learning_rate": 2.1475745940598907e-06, "loss": 0.0145, "step": 207890 }, { "epoch": 0.8674508265807679, "grad_norm": 0.8615906218035244, "learning_rate": 2.1475487685489706e-06, "loss": 0.0175, "step": 207895 }, { "epoch": 0.8674716892957582, "grad_norm": 1.0169702671391274, "learning_rate": 2.147522943969717e-06, "loss": 0.0205, "step": 207900 }, { "epoch": 0.8674925520107485, "grad_norm": 1.231852874550129, "learning_rate": 2.147497120322073e-06, "loss": 0.031, "step": 207905 }, { "epoch": 0.8675134147257387, "grad_norm": 0.47005730082032, "learning_rate": 2.147471297605984e-06, "loss": 0.0162, "step": 207910 }, { "epoch": 0.867534277440729, "grad_norm": 0.840243356065263, "learning_rate": 2.147445475821393e-06, "loss": 0.0221, "step": 207915 }, { "epoch": 0.8675551401557193, "grad_norm": 0.5752997687558747, "learning_rate": 2.1474196549682445e-06, "loss": 0.0268, "step": 207920 }, { "epoch": 0.8675760028707096, "grad_norm": 1.2849677522742722, "learning_rate": 2.147393835046482e-06, "loss": 0.0211, "step": 207925 }, { "epoch": 0.8675968655856998, "grad_norm": 0.48951760977883296, "learning_rate": 2.14736801605605e-06, "loss": 0.0155, "step": 207930 }, { "epoch": 0.8676177283006902, "grad_norm": 0.8966790218027716, "learning_rate": 2.147342197996892e-06, "loss": 0.0192, "step": 207935 }, { "epoch": 0.8676385910156804, "grad_norm": 1.3000271056338715, "learning_rate": 2.1473163808689528e-06, "loss": 0.0213, "step": 207940 }, { "epoch": 0.8676594537306707, "grad_norm": 1.0368916328505213, "learning_rate": 2.1472905646721755e-06, "loss": 0.0215, "step": 207945 }, { "epoch": 0.867680316445661, "grad_norm": 0.3755604382561917, "learning_rate": 2.147264749406505e-06, "loss": 0.0233, "step": 207950 }, { "epoch": 0.8677011791606513, "grad_norm": 0.6162329094982926, "learning_rate": 2.1472389350718846e-06, "loss": 0.0179, "step": 207955 }, { "epoch": 0.8677220418756415, "grad_norm": 0.3034850384909973, "learning_rate": 2.1472131216682586e-06, "loss": 0.0223, "step": 207960 }, { "epoch": 0.8677429045906319, "grad_norm": 0.7491752647311194, "learning_rate": 2.1471873091955714e-06, "loss": 0.0236, "step": 207965 }, { "epoch": 0.8677637673056221, "grad_norm": 0.6775652908154588, "learning_rate": 2.1471614976537667e-06, "loss": 0.0265, "step": 207970 }, { "epoch": 0.8677846300206123, "grad_norm": 0.7042494276924075, "learning_rate": 2.147135687042788e-06, "loss": 0.0153, "step": 207975 }, { "epoch": 0.8678054927356026, "grad_norm": 0.5121217368575438, "learning_rate": 2.147109877362581e-06, "loss": 0.0167, "step": 207980 }, { "epoch": 0.867826355450593, "grad_norm": 0.3822127632570676, "learning_rate": 2.147084068613088e-06, "loss": 0.0201, "step": 207985 }, { "epoch": 0.8678472181655832, "grad_norm": 0.8077565558372657, "learning_rate": 2.1470582607942534e-06, "loss": 0.022, "step": 207990 }, { "epoch": 0.8678680808805734, "grad_norm": 0.7435833208491127, "learning_rate": 2.1470324539060227e-06, "loss": 0.0245, "step": 207995 }, { "epoch": 0.8678889435955638, "grad_norm": 0.5747812012816664, "learning_rate": 2.1470066479483377e-06, "loss": 0.0241, "step": 208000 }, { "epoch": 0.867909806310554, "grad_norm": 0.33540111079025464, "learning_rate": 2.1469808429211446e-06, "loss": 0.0263, "step": 208005 }, { "epoch": 0.8679306690255443, "grad_norm": 0.6369858153004112, "learning_rate": 2.146955038824386e-06, "loss": 0.0247, "step": 208010 }, { "epoch": 0.8679515317405346, "grad_norm": 0.3844429693791217, "learning_rate": 2.146929235658007e-06, "loss": 0.0219, "step": 208015 }, { "epoch": 0.8679723944555249, "grad_norm": 0.47913596485259663, "learning_rate": 2.146903433421951e-06, "loss": 0.018, "step": 208020 }, { "epoch": 0.8679932571705151, "grad_norm": 0.8147383529295044, "learning_rate": 2.146877632116162e-06, "loss": 0.0257, "step": 208025 }, { "epoch": 0.8680141198855055, "grad_norm": 0.9316587008892132, "learning_rate": 2.1468518317405846e-06, "loss": 0.0274, "step": 208030 }, { "epoch": 0.8680349826004957, "grad_norm": 1.6388008462818193, "learning_rate": 2.1468260322951634e-06, "loss": 0.033, "step": 208035 }, { "epoch": 0.868055845315486, "grad_norm": 0.7509488515440635, "learning_rate": 2.1468002337798415e-06, "loss": 0.0257, "step": 208040 }, { "epoch": 0.8680767080304762, "grad_norm": 0.9585404967052894, "learning_rate": 2.1467744361945626e-06, "loss": 0.0182, "step": 208045 }, { "epoch": 0.8680975707454666, "grad_norm": 0.21546583201797145, "learning_rate": 2.1467486395392725e-06, "loss": 0.0164, "step": 208050 }, { "epoch": 0.8681184334604568, "grad_norm": 0.579192736750508, "learning_rate": 2.1467228438139145e-06, "loss": 0.0205, "step": 208055 }, { "epoch": 0.868139296175447, "grad_norm": 0.38643945836949556, "learning_rate": 2.146697049018432e-06, "loss": 0.0161, "step": 208060 }, { "epoch": 0.8681601588904374, "grad_norm": 0.6774653048138951, "learning_rate": 2.14667125515277e-06, "loss": 0.02, "step": 208065 }, { "epoch": 0.8681810216054276, "grad_norm": 0.3460857546827862, "learning_rate": 2.146645462216873e-06, "loss": 0.0138, "step": 208070 }, { "epoch": 0.8682018843204179, "grad_norm": 0.8128205348393923, "learning_rate": 2.146619670210684e-06, "loss": 0.0201, "step": 208075 }, { "epoch": 0.8682227470354082, "grad_norm": 0.6410013932686053, "learning_rate": 2.146593879134148e-06, "loss": 0.0141, "step": 208080 }, { "epoch": 0.8682436097503985, "grad_norm": 0.9086541370897366, "learning_rate": 2.1465680889872086e-06, "loss": 0.0177, "step": 208085 }, { "epoch": 0.8682644724653887, "grad_norm": 0.6134878726341697, "learning_rate": 2.14654229976981e-06, "loss": 0.0215, "step": 208090 }, { "epoch": 0.868285335180379, "grad_norm": 0.660988594941233, "learning_rate": 2.1465165114818973e-06, "loss": 0.0175, "step": 208095 }, { "epoch": 0.8683061978953693, "grad_norm": 1.059597299301093, "learning_rate": 2.146490724123414e-06, "loss": 0.0248, "step": 208100 }, { "epoch": 0.8683270606103596, "grad_norm": 0.4815774117707674, "learning_rate": 2.1464649376943035e-06, "loss": 0.021, "step": 208105 }, { "epoch": 0.8683479233253498, "grad_norm": 0.729157745259382, "learning_rate": 2.1464391521945106e-06, "loss": 0.0249, "step": 208110 }, { "epoch": 0.8683687860403402, "grad_norm": 1.323852481341135, "learning_rate": 2.1464133676239806e-06, "loss": 0.0214, "step": 208115 }, { "epoch": 0.8683896487553304, "grad_norm": 0.5111190396062038, "learning_rate": 2.1463875839826555e-06, "loss": 0.0288, "step": 208120 }, { "epoch": 0.8684105114703207, "grad_norm": 0.610248277107204, "learning_rate": 2.146361801270482e-06, "loss": 0.0196, "step": 208125 }, { "epoch": 0.868431374185311, "grad_norm": 0.46202308460664954, "learning_rate": 2.146336019487402e-06, "loss": 0.0168, "step": 208130 }, { "epoch": 0.8684522369003013, "grad_norm": 0.41902883856131984, "learning_rate": 2.146310238633361e-06, "loss": 0.0219, "step": 208135 }, { "epoch": 0.8684730996152915, "grad_norm": 0.43304826851244865, "learning_rate": 2.146284458708303e-06, "loss": 0.0207, "step": 208140 }, { "epoch": 0.8684939623302819, "grad_norm": 0.8026214184875017, "learning_rate": 2.146258679712172e-06, "loss": 0.0199, "step": 208145 }, { "epoch": 0.8685148250452721, "grad_norm": 0.4909296959023039, "learning_rate": 2.146232901644912e-06, "loss": 0.0153, "step": 208150 }, { "epoch": 0.8685356877602624, "grad_norm": 0.8323089867960034, "learning_rate": 2.146207124506468e-06, "loss": 0.0209, "step": 208155 }, { "epoch": 0.8685565504752526, "grad_norm": 0.7050658706302018, "learning_rate": 2.146181348296783e-06, "loss": 0.019, "step": 208160 }, { "epoch": 0.868577413190243, "grad_norm": 0.810620072220298, "learning_rate": 2.146155573015803e-06, "loss": 0.017, "step": 208165 }, { "epoch": 0.8685982759052332, "grad_norm": 0.5198530280592596, "learning_rate": 2.1461297986634703e-06, "loss": 0.0184, "step": 208170 }, { "epoch": 0.8686191386202234, "grad_norm": 0.23494878708263714, "learning_rate": 2.1461040252397304e-06, "loss": 0.0318, "step": 208175 }, { "epoch": 0.8686400013352138, "grad_norm": 0.6945186864687084, "learning_rate": 2.146078252744527e-06, "loss": 0.0249, "step": 208180 }, { "epoch": 0.868660864050204, "grad_norm": 0.5852976351181053, "learning_rate": 2.146052481177805e-06, "loss": 0.0212, "step": 208185 }, { "epoch": 0.8686817267651943, "grad_norm": 0.779299333572816, "learning_rate": 2.146026710539508e-06, "loss": 0.0203, "step": 208190 }, { "epoch": 0.8687025894801846, "grad_norm": 0.7205952303952214, "learning_rate": 2.1460009408295797e-06, "loss": 0.0199, "step": 208195 }, { "epoch": 0.8687234521951749, "grad_norm": 0.7844567071099534, "learning_rate": 2.1459751720479662e-06, "loss": 0.0252, "step": 208200 }, { "epoch": 0.8687443149101651, "grad_norm": 0.6772687884702948, "learning_rate": 2.1459494041946098e-06, "loss": 0.0177, "step": 208205 }, { "epoch": 0.8687651776251555, "grad_norm": 1.0414371672045408, "learning_rate": 2.1459236372694566e-06, "loss": 0.0208, "step": 208210 }, { "epoch": 0.8687860403401457, "grad_norm": 0.6685320988479374, "learning_rate": 2.1458978712724495e-06, "loss": 0.0197, "step": 208215 }, { "epoch": 0.868806903055136, "grad_norm": 0.6811742968593033, "learning_rate": 2.1458721062035326e-06, "loss": 0.0187, "step": 208220 }, { "epoch": 0.8688277657701262, "grad_norm": 0.8849471201103837, "learning_rate": 2.1458463420626512e-06, "loss": 0.0204, "step": 208225 }, { "epoch": 0.8688486284851166, "grad_norm": 0.8870892369463611, "learning_rate": 2.1458205788497496e-06, "loss": 0.0238, "step": 208230 }, { "epoch": 0.8688694912001068, "grad_norm": 0.6430439247416234, "learning_rate": 2.145794816564771e-06, "loss": 0.0201, "step": 208235 }, { "epoch": 0.8688903539150971, "grad_norm": 0.7152680102686967, "learning_rate": 2.1457690552076607e-06, "loss": 0.019, "step": 208240 }, { "epoch": 0.8689112166300874, "grad_norm": 0.2771553463564311, "learning_rate": 2.145743294778363e-06, "loss": 0.0168, "step": 208245 }, { "epoch": 0.8689320793450777, "grad_norm": 0.994669124581845, "learning_rate": 2.145717535276821e-06, "loss": 0.0119, "step": 208250 }, { "epoch": 0.8689529420600679, "grad_norm": 0.49814874801472453, "learning_rate": 2.1456917767029805e-06, "loss": 0.0191, "step": 208255 }, { "epoch": 0.8689738047750583, "grad_norm": 0.6808148979489951, "learning_rate": 2.1456660190567852e-06, "loss": 0.0205, "step": 208260 }, { "epoch": 0.8689946674900485, "grad_norm": 0.8783755013858643, "learning_rate": 2.1456402623381796e-06, "loss": 0.0198, "step": 208265 }, { "epoch": 0.8690155302050387, "grad_norm": 0.571665463532961, "learning_rate": 2.145614506547107e-06, "loss": 0.02, "step": 208270 }, { "epoch": 0.869036392920029, "grad_norm": 0.8768007278110089, "learning_rate": 2.1455887516835135e-06, "loss": 0.0195, "step": 208275 }, { "epoch": 0.8690572556350193, "grad_norm": 0.6280327145336687, "learning_rate": 2.145562997747342e-06, "loss": 0.0273, "step": 208280 }, { "epoch": 0.8690781183500096, "grad_norm": 1.3988140716851718, "learning_rate": 2.1455372447385374e-06, "loss": 0.0223, "step": 208285 }, { "epoch": 0.8690989810649998, "grad_norm": 0.4688187595526508, "learning_rate": 2.145511492657044e-06, "loss": 0.0171, "step": 208290 }, { "epoch": 0.8691198437799902, "grad_norm": 0.40466308018077757, "learning_rate": 2.1454857415028067e-06, "loss": 0.017, "step": 208295 }, { "epoch": 0.8691407064949804, "grad_norm": 0.518852668750323, "learning_rate": 2.1454599912757692e-06, "loss": 0.0206, "step": 208300 }, { "epoch": 0.8691615692099707, "grad_norm": 0.5797513210719106, "learning_rate": 2.1454342419758754e-06, "loss": 0.0257, "step": 208305 }, { "epoch": 0.869182431924961, "grad_norm": 0.5636045737616271, "learning_rate": 2.145408493603071e-06, "loss": 0.0207, "step": 208310 }, { "epoch": 0.8692032946399513, "grad_norm": 0.9668704848189758, "learning_rate": 2.1453827461572992e-06, "loss": 0.0188, "step": 208315 }, { "epoch": 0.8692241573549415, "grad_norm": 0.7962722087998504, "learning_rate": 2.1453569996385047e-06, "loss": 0.0139, "step": 208320 }, { "epoch": 0.8692450200699319, "grad_norm": 0.5174017231687655, "learning_rate": 2.145331254046632e-06, "loss": 0.0142, "step": 208325 }, { "epoch": 0.8692658827849221, "grad_norm": 0.6961666541834942, "learning_rate": 2.1453055093816256e-06, "loss": 0.0172, "step": 208330 }, { "epoch": 0.8692867454999124, "grad_norm": 0.6551319675746629, "learning_rate": 2.1452797656434293e-06, "loss": 0.0222, "step": 208335 }, { "epoch": 0.8693076082149026, "grad_norm": 0.5491243618106464, "learning_rate": 2.1452540228319883e-06, "loss": 0.0255, "step": 208340 }, { "epoch": 0.869328470929893, "grad_norm": 0.4621342497667367, "learning_rate": 2.145228280947247e-06, "loss": 0.0143, "step": 208345 }, { "epoch": 0.8693493336448832, "grad_norm": 0.6811181007337946, "learning_rate": 2.1452025399891484e-06, "loss": 0.0232, "step": 208350 }, { "epoch": 0.8693701963598734, "grad_norm": 0.5463958474503539, "learning_rate": 2.145176799957639e-06, "loss": 0.0166, "step": 208355 }, { "epoch": 0.8693910590748638, "grad_norm": 0.5889212833539793, "learning_rate": 2.1451510608526612e-06, "loss": 0.0196, "step": 208360 }, { "epoch": 0.869411921789854, "grad_norm": 0.6092890073108181, "learning_rate": 2.1451253226741613e-06, "loss": 0.0166, "step": 208365 }, { "epoch": 0.8694327845048443, "grad_norm": 0.813079874709791, "learning_rate": 2.145099585422082e-06, "loss": 0.0245, "step": 208370 }, { "epoch": 0.8694536472198346, "grad_norm": 1.0036337026884452, "learning_rate": 2.145073849096369e-06, "loss": 0.0253, "step": 208375 }, { "epoch": 0.8694745099348249, "grad_norm": 0.6770694293640716, "learning_rate": 2.145048113696966e-06, "loss": 0.0163, "step": 208380 }, { "epoch": 0.8694953726498151, "grad_norm": 0.45936455395696746, "learning_rate": 2.1450223792238176e-06, "loss": 0.0144, "step": 208385 }, { "epoch": 0.8695162353648055, "grad_norm": 0.8983963213697023, "learning_rate": 2.1449966456768685e-06, "loss": 0.0249, "step": 208390 }, { "epoch": 0.8695370980797957, "grad_norm": 0.9435106970441638, "learning_rate": 2.1449709130560635e-06, "loss": 0.0219, "step": 208395 }, { "epoch": 0.869557960794786, "grad_norm": 0.5206101967351167, "learning_rate": 2.1449451813613454e-06, "loss": 0.0198, "step": 208400 }, { "epoch": 0.8695788235097762, "grad_norm": 0.24512569080248955, "learning_rate": 2.1449194505926605e-06, "loss": 0.0218, "step": 208405 }, { "epoch": 0.8695996862247666, "grad_norm": 0.6163023334222193, "learning_rate": 2.144893720749952e-06, "loss": 0.0178, "step": 208410 }, { "epoch": 0.8696205489397568, "grad_norm": 0.7478558493220311, "learning_rate": 2.1448679918331654e-06, "loss": 0.0264, "step": 208415 }, { "epoch": 0.8696414116547471, "grad_norm": 0.7811674267422041, "learning_rate": 2.144842263842244e-06, "loss": 0.0248, "step": 208420 }, { "epoch": 0.8696622743697374, "grad_norm": 0.4578906692775777, "learning_rate": 2.1448165367771333e-06, "loss": 0.0153, "step": 208425 }, { "epoch": 0.8696831370847277, "grad_norm": 0.5261096869909496, "learning_rate": 2.144790810637778e-06, "loss": 0.0196, "step": 208430 }, { "epoch": 0.8697039997997179, "grad_norm": 0.599726894600047, "learning_rate": 2.144765085424121e-06, "loss": 0.0204, "step": 208435 }, { "epoch": 0.8697248625147083, "grad_norm": 0.9158693446552654, "learning_rate": 2.144739361136108e-06, "loss": 0.0211, "step": 208440 }, { "epoch": 0.8697457252296985, "grad_norm": 0.49346347669883833, "learning_rate": 2.144713637773684e-06, "loss": 0.014, "step": 208445 }, { "epoch": 0.8697665879446888, "grad_norm": 0.472906613135424, "learning_rate": 2.144687915336792e-06, "loss": 0.0243, "step": 208450 }, { "epoch": 0.869787450659679, "grad_norm": 0.7531856886426762, "learning_rate": 2.1446621938253772e-06, "loss": 0.0212, "step": 208455 }, { "epoch": 0.8698083133746694, "grad_norm": 1.1544875601562423, "learning_rate": 2.144636473239384e-06, "loss": 0.0212, "step": 208460 }, { "epoch": 0.8698291760896596, "grad_norm": 0.6854034218154218, "learning_rate": 2.1446107535787574e-06, "loss": 0.0215, "step": 208465 }, { "epoch": 0.8698500388046498, "grad_norm": 0.5580249403852517, "learning_rate": 2.144585034843442e-06, "loss": 0.0204, "step": 208470 }, { "epoch": 0.8698709015196402, "grad_norm": 0.32522594638057667, "learning_rate": 2.1445593170333815e-06, "loss": 0.0163, "step": 208475 }, { "epoch": 0.8698917642346304, "grad_norm": 0.32948379918710924, "learning_rate": 2.1445336001485203e-06, "loss": 0.0167, "step": 208480 }, { "epoch": 0.8699126269496207, "grad_norm": 0.4796977192483948, "learning_rate": 2.144507884188804e-06, "loss": 0.0261, "step": 208485 }, { "epoch": 0.869933489664611, "grad_norm": 0.6750781824125599, "learning_rate": 2.144482169154177e-06, "loss": 0.0146, "step": 208490 }, { "epoch": 0.8699543523796013, "grad_norm": 0.34358798065040175, "learning_rate": 2.1444564550445824e-06, "loss": 0.0189, "step": 208495 }, { "epoch": 0.8699752150945915, "grad_norm": 0.36323487734376425, "learning_rate": 2.144430741859966e-06, "loss": 0.0181, "step": 208500 }, { "epoch": 0.8699960778095819, "grad_norm": 0.40442872752973225, "learning_rate": 2.144405029600273e-06, "loss": 0.0149, "step": 208505 }, { "epoch": 0.8700169405245721, "grad_norm": 0.5536510400225413, "learning_rate": 2.1443793182654465e-06, "loss": 0.0197, "step": 208510 }, { "epoch": 0.8700378032395624, "grad_norm": 0.598220101715994, "learning_rate": 2.144353607855431e-06, "loss": 0.0169, "step": 208515 }, { "epoch": 0.8700586659545526, "grad_norm": 0.7613357231209295, "learning_rate": 2.144327898370172e-06, "loss": 0.0141, "step": 208520 }, { "epoch": 0.870079528669543, "grad_norm": 0.7354502427603447, "learning_rate": 2.1443021898096137e-06, "loss": 0.0181, "step": 208525 }, { "epoch": 0.8701003913845332, "grad_norm": 1.080500456076413, "learning_rate": 2.144276482173701e-06, "loss": 0.0193, "step": 208530 }, { "epoch": 0.8701212540995235, "grad_norm": 0.38414831351363066, "learning_rate": 2.144250775462378e-06, "loss": 0.027, "step": 208535 }, { "epoch": 0.8701421168145138, "grad_norm": 0.6739556466214828, "learning_rate": 2.1442250696755896e-06, "loss": 0.0157, "step": 208540 }, { "epoch": 0.870162979529504, "grad_norm": 0.2894288339967482, "learning_rate": 2.1441993648132808e-06, "loss": 0.024, "step": 208545 }, { "epoch": 0.8701838422444943, "grad_norm": 0.5895543601637694, "learning_rate": 2.1441736608753945e-06, "loss": 0.0149, "step": 208550 }, { "epoch": 0.8702047049594847, "grad_norm": 0.5421592230097848, "learning_rate": 2.144147957861877e-06, "loss": 0.0201, "step": 208555 }, { "epoch": 0.8702255676744749, "grad_norm": 0.574621613060266, "learning_rate": 2.1441222557726722e-06, "loss": 0.0182, "step": 208560 }, { "epoch": 0.8702464303894651, "grad_norm": 0.8857443510480766, "learning_rate": 2.144096554607725e-06, "loss": 0.0269, "step": 208565 }, { "epoch": 0.8702672931044554, "grad_norm": 0.8526508958177557, "learning_rate": 2.14407085436698e-06, "loss": 0.0225, "step": 208570 }, { "epoch": 0.8702881558194457, "grad_norm": 0.7329660845133369, "learning_rate": 2.144045155050381e-06, "loss": 0.024, "step": 208575 }, { "epoch": 0.870309018534436, "grad_norm": 0.8137894093744387, "learning_rate": 2.1440194566578735e-06, "loss": 0.0216, "step": 208580 }, { "epoch": 0.8703298812494262, "grad_norm": 0.7290641692958426, "learning_rate": 2.143993759189402e-06, "loss": 0.0208, "step": 208585 }, { "epoch": 0.8703507439644166, "grad_norm": 0.6554365532927412, "learning_rate": 2.1439680626449107e-06, "loss": 0.0251, "step": 208590 }, { "epoch": 0.8703716066794068, "grad_norm": 0.7634502574196984, "learning_rate": 2.143942367024345e-06, "loss": 0.0211, "step": 208595 }, { "epoch": 0.8703924693943971, "grad_norm": 0.718230773573427, "learning_rate": 2.1439166723276487e-06, "loss": 0.0205, "step": 208600 }, { "epoch": 0.8704133321093874, "grad_norm": 0.5629547780216924, "learning_rate": 2.1438909785547668e-06, "loss": 0.0285, "step": 208605 }, { "epoch": 0.8704341948243777, "grad_norm": 0.5570438889953682, "learning_rate": 2.143865285705644e-06, "loss": 0.0255, "step": 208610 }, { "epoch": 0.8704550575393679, "grad_norm": 0.48303096625330433, "learning_rate": 2.1438395937802254e-06, "loss": 0.016, "step": 208615 }, { "epoch": 0.8704759202543583, "grad_norm": 0.4992970960813734, "learning_rate": 2.1438139027784546e-06, "loss": 0.0183, "step": 208620 }, { "epoch": 0.8704967829693485, "grad_norm": 1.5814879476830004, "learning_rate": 2.143788212700277e-06, "loss": 0.0275, "step": 208625 }, { "epoch": 0.8705176456843388, "grad_norm": 0.9579505309893178, "learning_rate": 2.143762523545637e-06, "loss": 0.0184, "step": 208630 }, { "epoch": 0.870538508399329, "grad_norm": 0.5469084421204381, "learning_rate": 2.143736835314479e-06, "loss": 0.0172, "step": 208635 }, { "epoch": 0.8705593711143194, "grad_norm": 1.2684261150533078, "learning_rate": 2.1437111480067484e-06, "loss": 0.0272, "step": 208640 }, { "epoch": 0.8705802338293096, "grad_norm": 0.27864576231944527, "learning_rate": 2.1436854616223896e-06, "loss": 0.0145, "step": 208645 }, { "epoch": 0.8706010965442998, "grad_norm": 0.48695622820210216, "learning_rate": 2.143659776161347e-06, "loss": 0.0217, "step": 208650 }, { "epoch": 0.8706219592592902, "grad_norm": 0.4919210245963108, "learning_rate": 2.1436340916235655e-06, "loss": 0.0209, "step": 208655 }, { "epoch": 0.8706428219742804, "grad_norm": 1.0201005307308186, "learning_rate": 2.1436084080089897e-06, "loss": 0.0205, "step": 208660 }, { "epoch": 0.8706636846892707, "grad_norm": 0.7195005326785824, "learning_rate": 2.143582725317564e-06, "loss": 0.0202, "step": 208665 }, { "epoch": 0.870684547404261, "grad_norm": 0.47429695916390024, "learning_rate": 2.1435570435492338e-06, "loss": 0.0187, "step": 208670 }, { "epoch": 0.8707054101192513, "grad_norm": 0.4429915896375043, "learning_rate": 2.143531362703944e-06, "loss": 0.0223, "step": 208675 }, { "epoch": 0.8707262728342415, "grad_norm": 0.6134914782089742, "learning_rate": 2.143505682781638e-06, "loss": 0.0186, "step": 208680 }, { "epoch": 0.8707471355492319, "grad_norm": 0.6954013303678426, "learning_rate": 2.143480003782261e-06, "loss": 0.0235, "step": 208685 }, { "epoch": 0.8707679982642221, "grad_norm": 0.6685053952968488, "learning_rate": 2.143454325705759e-06, "loss": 0.0236, "step": 208690 }, { "epoch": 0.8707888609792124, "grad_norm": 0.4328552825947279, "learning_rate": 2.143428648552075e-06, "loss": 0.0213, "step": 208695 }, { "epoch": 0.8708097236942026, "grad_norm": 0.4900555937661569, "learning_rate": 2.1434029723211545e-06, "loss": 0.0212, "step": 208700 }, { "epoch": 0.870830586409193, "grad_norm": 0.6502779757486903, "learning_rate": 2.1433772970129425e-06, "loss": 0.0295, "step": 208705 }, { "epoch": 0.8708514491241832, "grad_norm": 0.5518382730296896, "learning_rate": 2.143351622627383e-06, "loss": 0.0162, "step": 208710 }, { "epoch": 0.8708723118391735, "grad_norm": 0.4301578062619197, "learning_rate": 2.1433259491644213e-06, "loss": 0.0225, "step": 208715 }, { "epoch": 0.8708931745541638, "grad_norm": 0.6478437885605381, "learning_rate": 2.143300276624002e-06, "loss": 0.0204, "step": 208720 }, { "epoch": 0.8709140372691541, "grad_norm": 0.48137546078256765, "learning_rate": 2.14327460500607e-06, "loss": 0.025, "step": 208725 }, { "epoch": 0.8709348999841443, "grad_norm": 0.5470286583139289, "learning_rate": 2.1432489343105696e-06, "loss": 0.0139, "step": 208730 }, { "epoch": 0.8709557626991347, "grad_norm": 0.769041301458271, "learning_rate": 2.1432232645374456e-06, "loss": 0.0205, "step": 208735 }, { "epoch": 0.8709766254141249, "grad_norm": 0.49089500943277853, "learning_rate": 2.1431975956866434e-06, "loss": 0.017, "step": 208740 }, { "epoch": 0.8709974881291151, "grad_norm": 0.2522398951260498, "learning_rate": 2.1431719277581076e-06, "loss": 0.0169, "step": 208745 }, { "epoch": 0.8710183508441054, "grad_norm": 0.5358891499580557, "learning_rate": 2.1431462607517824e-06, "loss": 0.0174, "step": 208750 }, { "epoch": 0.8710392135590957, "grad_norm": 0.5844468501083016, "learning_rate": 2.1431205946676134e-06, "loss": 0.0174, "step": 208755 }, { "epoch": 0.871060076274086, "grad_norm": 0.48866236106451416, "learning_rate": 2.143094929505545e-06, "loss": 0.0184, "step": 208760 }, { "epoch": 0.8710809389890762, "grad_norm": 0.6703546887948976, "learning_rate": 2.143069265265521e-06, "loss": 0.0295, "step": 208765 }, { "epoch": 0.8711018017040666, "grad_norm": 0.8880548815388921, "learning_rate": 2.143043601947488e-06, "loss": 0.022, "step": 208770 }, { "epoch": 0.8711226644190568, "grad_norm": 0.36315617112403026, "learning_rate": 2.1430179395513897e-06, "loss": 0.0183, "step": 208775 }, { "epoch": 0.8711435271340471, "grad_norm": 0.29432493948003247, "learning_rate": 2.1429922780771704e-06, "loss": 0.0125, "step": 208780 }, { "epoch": 0.8711643898490374, "grad_norm": 0.929979284008531, "learning_rate": 2.1429666175247763e-06, "loss": 0.0211, "step": 208785 }, { "epoch": 0.8711852525640277, "grad_norm": 0.3370201837007855, "learning_rate": 2.1429409578941514e-06, "loss": 0.026, "step": 208790 }, { "epoch": 0.8712061152790179, "grad_norm": 1.5839948250832503, "learning_rate": 2.142915299185241e-06, "loss": 0.0239, "step": 208795 }, { "epoch": 0.8712269779940083, "grad_norm": 0.7908877495243101, "learning_rate": 2.1428896413979893e-06, "loss": 0.0198, "step": 208800 }, { "epoch": 0.8712478407089985, "grad_norm": 0.558645984520786, "learning_rate": 2.142863984532341e-06, "loss": 0.0211, "step": 208805 }, { "epoch": 0.8712687034239888, "grad_norm": 0.33182767712373745, "learning_rate": 2.1428383285882417e-06, "loss": 0.0201, "step": 208810 }, { "epoch": 0.871289566138979, "grad_norm": 1.0360688311490203, "learning_rate": 2.1428126735656356e-06, "loss": 0.0216, "step": 208815 }, { "epoch": 0.8713104288539694, "grad_norm": 0.45137206038999184, "learning_rate": 2.142787019464468e-06, "loss": 0.0179, "step": 208820 }, { "epoch": 0.8713312915689596, "grad_norm": 0.9235701104709381, "learning_rate": 2.1427613662846834e-06, "loss": 0.0243, "step": 208825 }, { "epoch": 0.8713521542839499, "grad_norm": 0.7161852871492744, "learning_rate": 2.1427357140262267e-06, "loss": 0.0182, "step": 208830 }, { "epoch": 0.8713730169989402, "grad_norm": 1.0249853241337812, "learning_rate": 2.1427100626890428e-06, "loss": 0.0243, "step": 208835 }, { "epoch": 0.8713938797139305, "grad_norm": 0.38212332183768266, "learning_rate": 2.142684412273077e-06, "loss": 0.0163, "step": 208840 }, { "epoch": 0.8714147424289207, "grad_norm": 0.7168150032804564, "learning_rate": 2.1426587627782733e-06, "loss": 0.0209, "step": 208845 }, { "epoch": 0.871435605143911, "grad_norm": 0.5323673687966851, "learning_rate": 2.142633114204577e-06, "loss": 0.022, "step": 208850 }, { "epoch": 0.8714564678589013, "grad_norm": 0.5756653669330248, "learning_rate": 2.1426074665519337e-06, "loss": 0.0168, "step": 208855 }, { "epoch": 0.8714773305738915, "grad_norm": 0.6056495647187113, "learning_rate": 2.142581819820287e-06, "loss": 0.0185, "step": 208860 }, { "epoch": 0.8714981932888819, "grad_norm": 0.6599927173919045, "learning_rate": 2.142556174009582e-06, "loss": 0.0193, "step": 208865 }, { "epoch": 0.8715190560038721, "grad_norm": 0.6592659082244798, "learning_rate": 2.1425305291197644e-06, "loss": 0.02, "step": 208870 }, { "epoch": 0.8715399187188624, "grad_norm": 0.5886179636109437, "learning_rate": 2.1425048851507787e-06, "loss": 0.0251, "step": 208875 }, { "epoch": 0.8715607814338526, "grad_norm": 0.8222619834730217, "learning_rate": 2.1424792421025695e-06, "loss": 0.0171, "step": 208880 }, { "epoch": 0.871581644148843, "grad_norm": 0.7159158402168507, "learning_rate": 2.142453599975082e-06, "loss": 0.0147, "step": 208885 }, { "epoch": 0.8716025068638332, "grad_norm": 1.2976886367590736, "learning_rate": 2.142427958768261e-06, "loss": 0.025, "step": 208890 }, { "epoch": 0.8716233695788235, "grad_norm": 0.8727483150022255, "learning_rate": 2.1424023184820517e-06, "loss": 0.0191, "step": 208895 }, { "epoch": 0.8716442322938138, "grad_norm": 0.6369155580146816, "learning_rate": 2.1423766791163982e-06, "loss": 0.0165, "step": 208900 }, { "epoch": 0.8716650950088041, "grad_norm": 0.7855737304350827, "learning_rate": 2.142351040671246e-06, "loss": 0.0276, "step": 208905 }, { "epoch": 0.8716859577237943, "grad_norm": 0.5084416107799908, "learning_rate": 2.1423254031465402e-06, "loss": 0.0179, "step": 208910 }, { "epoch": 0.8717068204387847, "grad_norm": 0.4776761396511709, "learning_rate": 2.1422997665422256e-06, "loss": 0.0235, "step": 208915 }, { "epoch": 0.8717276831537749, "grad_norm": 1.0894026752879542, "learning_rate": 2.1422741308582466e-06, "loss": 0.0278, "step": 208920 }, { "epoch": 0.8717485458687652, "grad_norm": 0.6116388252958104, "learning_rate": 2.142248496094549e-06, "loss": 0.0133, "step": 208925 }, { "epoch": 0.8717694085837554, "grad_norm": 0.4678879683939763, "learning_rate": 2.1422228622510772e-06, "loss": 0.0246, "step": 208930 }, { "epoch": 0.8717902712987458, "grad_norm": 0.5181834913520368, "learning_rate": 2.1421972293277764e-06, "loss": 0.0266, "step": 208935 }, { "epoch": 0.871811134013736, "grad_norm": 0.564458867577977, "learning_rate": 2.142171597324591e-06, "loss": 0.021, "step": 208940 }, { "epoch": 0.8718319967287262, "grad_norm": 0.6412608074040739, "learning_rate": 2.1421459662414665e-06, "loss": 0.0197, "step": 208945 }, { "epoch": 0.8718528594437166, "grad_norm": 0.9728626150707128, "learning_rate": 2.142120336078348e-06, "loss": 0.0194, "step": 208950 }, { "epoch": 0.8718737221587068, "grad_norm": 0.442208352976319, "learning_rate": 2.14209470683518e-06, "loss": 0.0194, "step": 208955 }, { "epoch": 0.8718945848736971, "grad_norm": 0.5621907793365709, "learning_rate": 2.1420690785119074e-06, "loss": 0.0124, "step": 208960 }, { "epoch": 0.8719154475886874, "grad_norm": 1.4559232436452905, "learning_rate": 2.1420434511084754e-06, "loss": 0.0288, "step": 208965 }, { "epoch": 0.8719363103036777, "grad_norm": 0.3621968807038816, "learning_rate": 2.1420178246248294e-06, "loss": 0.0251, "step": 208970 }, { "epoch": 0.8719571730186679, "grad_norm": 0.5577623407727832, "learning_rate": 2.1419921990609137e-06, "loss": 0.0192, "step": 208975 }, { "epoch": 0.8719780357336583, "grad_norm": 0.457782302527254, "learning_rate": 2.1419665744166734e-06, "loss": 0.0245, "step": 208980 }, { "epoch": 0.8719988984486485, "grad_norm": 0.570745221816873, "learning_rate": 2.1419409506920537e-06, "loss": 0.0147, "step": 208985 }, { "epoch": 0.8720197611636388, "grad_norm": 1.164507590068981, "learning_rate": 2.141915327887e-06, "loss": 0.0265, "step": 208990 }, { "epoch": 0.872040623878629, "grad_norm": 0.34148897678761236, "learning_rate": 2.1418897060014564e-06, "loss": 0.0204, "step": 208995 }, { "epoch": 0.8720614865936194, "grad_norm": 0.843595245283559, "learning_rate": 2.1418640850353685e-06, "loss": 0.0263, "step": 209000 }, { "epoch": 0.8720823493086096, "grad_norm": 0.5911478604312587, "learning_rate": 2.141838464988681e-06, "loss": 0.0257, "step": 209005 }, { "epoch": 0.8721032120235999, "grad_norm": 0.6238364589774665, "learning_rate": 2.141812845861339e-06, "loss": 0.0189, "step": 209010 }, { "epoch": 0.8721240747385902, "grad_norm": 0.5421067801489181, "learning_rate": 2.1417872276532885e-06, "loss": 0.0142, "step": 209015 }, { "epoch": 0.8721449374535805, "grad_norm": 0.5708501936070789, "learning_rate": 2.141761610364473e-06, "loss": 0.0232, "step": 209020 }, { "epoch": 0.8721658001685707, "grad_norm": 0.6958753514843813, "learning_rate": 2.141735993994838e-06, "loss": 0.0199, "step": 209025 }, { "epoch": 0.8721866628835611, "grad_norm": 0.6030087379695198, "learning_rate": 2.1417103785443287e-06, "loss": 0.0125, "step": 209030 }, { "epoch": 0.8722075255985513, "grad_norm": 0.409111909702424, "learning_rate": 2.14168476401289e-06, "loss": 0.0249, "step": 209035 }, { "epoch": 0.8722283883135415, "grad_norm": 0.5173732170145641, "learning_rate": 2.141659150400467e-06, "loss": 0.0166, "step": 209040 }, { "epoch": 0.8722492510285319, "grad_norm": 0.7723480395780679, "learning_rate": 2.1416335377070052e-06, "loss": 0.0236, "step": 209045 }, { "epoch": 0.8722701137435221, "grad_norm": 0.5419672607091958, "learning_rate": 2.1416079259324492e-06, "loss": 0.0248, "step": 209050 }, { "epoch": 0.8722909764585124, "grad_norm": 0.5451829903196624, "learning_rate": 2.1415823150767444e-06, "loss": 0.0195, "step": 209055 }, { "epoch": 0.8723118391735026, "grad_norm": 0.6649320616965763, "learning_rate": 2.141556705139835e-06, "loss": 0.0161, "step": 209060 }, { "epoch": 0.872332701888493, "grad_norm": 0.7358948815312222, "learning_rate": 2.141531096121667e-06, "loss": 0.0135, "step": 209065 }, { "epoch": 0.8723535646034832, "grad_norm": 0.6694046606496552, "learning_rate": 2.141505488022185e-06, "loss": 0.0233, "step": 209070 }, { "epoch": 0.8723744273184735, "grad_norm": 0.705917175944458, "learning_rate": 2.141479880841334e-06, "loss": 0.0255, "step": 209075 }, { "epoch": 0.8723952900334638, "grad_norm": 0.663868321177582, "learning_rate": 2.1414542745790594e-06, "loss": 0.0178, "step": 209080 }, { "epoch": 0.8724161527484541, "grad_norm": 0.8006648844189878, "learning_rate": 2.141428669235306e-06, "loss": 0.0175, "step": 209085 }, { "epoch": 0.8724370154634443, "grad_norm": 1.4017543958967866, "learning_rate": 2.141403064810019e-06, "loss": 0.019, "step": 209090 }, { "epoch": 0.8724578781784347, "grad_norm": 0.7588950133264105, "learning_rate": 2.1413774613031436e-06, "loss": 0.0235, "step": 209095 }, { "epoch": 0.8724787408934249, "grad_norm": 0.6174218884273883, "learning_rate": 2.141351858714625e-06, "loss": 0.0173, "step": 209100 }, { "epoch": 0.8724996036084152, "grad_norm": 0.5185150629008524, "learning_rate": 2.141326257044408e-06, "loss": 0.0179, "step": 209105 }, { "epoch": 0.8725204663234054, "grad_norm": 0.600837865672536, "learning_rate": 2.1413006562924377e-06, "loss": 0.0199, "step": 209110 }, { "epoch": 0.8725413290383958, "grad_norm": 0.6661359281418153, "learning_rate": 2.1412750564586593e-06, "loss": 0.0273, "step": 209115 }, { "epoch": 0.872562191753386, "grad_norm": 0.7766762364170644, "learning_rate": 2.141249457543018e-06, "loss": 0.0188, "step": 209120 }, { "epoch": 0.8725830544683763, "grad_norm": 1.530012140872345, "learning_rate": 2.141223859545459e-06, "loss": 0.0289, "step": 209125 }, { "epoch": 0.8726039171833666, "grad_norm": 0.7081563445299027, "learning_rate": 2.141198262465927e-06, "loss": 0.0231, "step": 209130 }, { "epoch": 0.8726247798983569, "grad_norm": 0.5534396510654279, "learning_rate": 2.141172666304368e-06, "loss": 0.018, "step": 209135 }, { "epoch": 0.8726456426133471, "grad_norm": 0.9367928181499328, "learning_rate": 2.141147071060726e-06, "loss": 0.026, "step": 209140 }, { "epoch": 0.8726665053283374, "grad_norm": 0.43156444076162265, "learning_rate": 2.141121476734947e-06, "loss": 0.0237, "step": 209145 }, { "epoch": 0.8726873680433277, "grad_norm": 0.45598528782805803, "learning_rate": 2.1410958833269756e-06, "loss": 0.0126, "step": 209150 }, { "epoch": 0.8727082307583179, "grad_norm": 0.3644742537421128, "learning_rate": 2.1410702908367577e-06, "loss": 0.0183, "step": 209155 }, { "epoch": 0.8727290934733083, "grad_norm": 0.5692238350026869, "learning_rate": 2.1410446992642365e-06, "loss": 0.0234, "step": 209160 }, { "epoch": 0.8727499561882985, "grad_norm": 0.7203609132207243, "learning_rate": 2.14101910860936e-06, "loss": 0.0183, "step": 209165 }, { "epoch": 0.8727708189032888, "grad_norm": 0.7860789402144516, "learning_rate": 2.140993518872072e-06, "loss": 0.0202, "step": 209170 }, { "epoch": 0.872791681618279, "grad_norm": 1.1155818482894233, "learning_rate": 2.140967930052317e-06, "loss": 0.0264, "step": 209175 }, { "epoch": 0.8728125443332694, "grad_norm": 0.3181130546379259, "learning_rate": 2.1409423421500406e-06, "loss": 0.013, "step": 209180 }, { "epoch": 0.8728334070482596, "grad_norm": 0.34318377726906224, "learning_rate": 2.140916755165189e-06, "loss": 0.0117, "step": 209185 }, { "epoch": 0.8728542697632499, "grad_norm": 0.5002804832847311, "learning_rate": 2.140891169097706e-06, "loss": 0.0178, "step": 209190 }, { "epoch": 0.8728751324782402, "grad_norm": 1.0665022974438132, "learning_rate": 2.140865583947537e-06, "loss": 0.0367, "step": 209195 }, { "epoch": 0.8728959951932305, "grad_norm": 0.7385781724414593, "learning_rate": 2.1408399997146284e-06, "loss": 0.0212, "step": 209200 }, { "epoch": 0.8729168579082207, "grad_norm": 0.3039878780022688, "learning_rate": 2.140814416398924e-06, "loss": 0.0188, "step": 209205 }, { "epoch": 0.8729377206232111, "grad_norm": 0.4764199144372155, "learning_rate": 2.140788834000369e-06, "loss": 0.0129, "step": 209210 }, { "epoch": 0.8729585833382013, "grad_norm": 0.6060012305974177, "learning_rate": 2.1407632525189098e-06, "loss": 0.0244, "step": 209215 }, { "epoch": 0.8729794460531916, "grad_norm": 0.40809338321090377, "learning_rate": 2.1407376719544906e-06, "loss": 0.0204, "step": 209220 }, { "epoch": 0.8730003087681819, "grad_norm": 0.6670209151302676, "learning_rate": 2.1407120923070574e-06, "loss": 0.021, "step": 209225 }, { "epoch": 0.8730211714831722, "grad_norm": 0.7858420944804995, "learning_rate": 2.1406865135765544e-06, "loss": 0.0236, "step": 209230 }, { "epoch": 0.8730420341981624, "grad_norm": 0.5401115737252391, "learning_rate": 2.140660935762927e-06, "loss": 0.0166, "step": 209235 }, { "epoch": 0.8730628969131526, "grad_norm": 1.4729989054616393, "learning_rate": 2.1406353588661217e-06, "loss": 0.0238, "step": 209240 }, { "epoch": 0.873083759628143, "grad_norm": 0.34823995062414204, "learning_rate": 2.1406097828860824e-06, "loss": 0.0158, "step": 209245 }, { "epoch": 0.8731046223431332, "grad_norm": 0.7200656200680454, "learning_rate": 2.1405842078227547e-06, "loss": 0.0163, "step": 209250 }, { "epoch": 0.8731254850581235, "grad_norm": 0.24519371658287217, "learning_rate": 2.1405586336760844e-06, "loss": 0.02, "step": 209255 }, { "epoch": 0.8731463477731138, "grad_norm": 1.6247282511089018, "learning_rate": 2.1405330604460155e-06, "loss": 0.0221, "step": 209260 }, { "epoch": 0.8731672104881041, "grad_norm": 0.8535803527799499, "learning_rate": 2.1405074881324943e-06, "loss": 0.0228, "step": 209265 }, { "epoch": 0.8731880732030943, "grad_norm": 0.7008939571084065, "learning_rate": 2.140481916735466e-06, "loss": 0.0188, "step": 209270 }, { "epoch": 0.8732089359180847, "grad_norm": 0.6893450149502092, "learning_rate": 2.140456346254875e-06, "loss": 0.0264, "step": 209275 }, { "epoch": 0.8732297986330749, "grad_norm": 0.8751640610172974, "learning_rate": 2.140430776690668e-06, "loss": 0.0221, "step": 209280 }, { "epoch": 0.8732506613480652, "grad_norm": 0.3937163959688911, "learning_rate": 2.140405208042789e-06, "loss": 0.0198, "step": 209285 }, { "epoch": 0.8732715240630554, "grad_norm": 0.35266015341179985, "learning_rate": 2.1403796403111835e-06, "loss": 0.0237, "step": 209290 }, { "epoch": 0.8732923867780458, "grad_norm": 0.5184373301933228, "learning_rate": 2.140354073495797e-06, "loss": 0.0207, "step": 209295 }, { "epoch": 0.873313249493036, "grad_norm": 0.8077884491856833, "learning_rate": 2.1403285075965744e-06, "loss": 0.0199, "step": 209300 }, { "epoch": 0.8733341122080263, "grad_norm": 0.4397518392454787, "learning_rate": 2.140302942613462e-06, "loss": 0.0261, "step": 209305 }, { "epoch": 0.8733549749230166, "grad_norm": 0.8144250740544916, "learning_rate": 2.1402773785464044e-06, "loss": 0.018, "step": 209310 }, { "epoch": 0.8733758376380069, "grad_norm": 0.8201162210972918, "learning_rate": 2.1402518153953466e-06, "loss": 0.0211, "step": 209315 }, { "epoch": 0.8733967003529971, "grad_norm": 0.4464423249880624, "learning_rate": 2.140226253160234e-06, "loss": 0.016, "step": 209320 }, { "epoch": 0.8734175630679875, "grad_norm": 0.6839168837226365, "learning_rate": 2.1402006918410126e-06, "loss": 0.0196, "step": 209325 }, { "epoch": 0.8734384257829777, "grad_norm": 1.4735511749144372, "learning_rate": 2.140175131437627e-06, "loss": 0.0157, "step": 209330 }, { "epoch": 0.873459288497968, "grad_norm": 2.612088102267621, "learning_rate": 2.140149571950023e-06, "loss": 0.0213, "step": 209335 }, { "epoch": 0.8734801512129583, "grad_norm": 0.38565717819294987, "learning_rate": 2.1401240133781454e-06, "loss": 0.0181, "step": 209340 }, { "epoch": 0.8735010139279485, "grad_norm": 0.7507091736913974, "learning_rate": 2.1400984557219394e-06, "loss": 0.0183, "step": 209345 }, { "epoch": 0.8735218766429388, "grad_norm": 0.6935099430539563, "learning_rate": 2.1400728989813514e-06, "loss": 0.0217, "step": 209350 }, { "epoch": 0.873542739357929, "grad_norm": 0.7612350481540002, "learning_rate": 2.1400473431563256e-06, "loss": 0.0238, "step": 209355 }, { "epoch": 0.8735636020729194, "grad_norm": 0.8672904313787863, "learning_rate": 2.140021788246808e-06, "loss": 0.0244, "step": 209360 }, { "epoch": 0.8735844647879096, "grad_norm": 0.6310604275105611, "learning_rate": 2.1399962342527436e-06, "loss": 0.0243, "step": 209365 }, { "epoch": 0.8736053275028999, "grad_norm": 0.5513489073538719, "learning_rate": 2.1399706811740777e-06, "loss": 0.0322, "step": 209370 }, { "epoch": 0.8736261902178902, "grad_norm": 0.7518068269139238, "learning_rate": 2.139945129010756e-06, "loss": 0.0192, "step": 209375 }, { "epoch": 0.8736470529328805, "grad_norm": 0.6486149023600509, "learning_rate": 2.1399195777627233e-06, "loss": 0.0233, "step": 209380 }, { "epoch": 0.8736679156478707, "grad_norm": 0.5431141235060412, "learning_rate": 2.139894027429926e-06, "loss": 0.0193, "step": 209385 }, { "epoch": 0.8736887783628611, "grad_norm": 0.8658986483220339, "learning_rate": 2.1398684780123085e-06, "loss": 0.0166, "step": 209390 }, { "epoch": 0.8737096410778513, "grad_norm": 0.9182450737789337, "learning_rate": 2.139842929509816e-06, "loss": 0.0203, "step": 209395 }, { "epoch": 0.8737305037928416, "grad_norm": 0.6921400485297388, "learning_rate": 2.1398173819223944e-06, "loss": 0.0201, "step": 209400 }, { "epoch": 0.8737513665078319, "grad_norm": 1.073247536217021, "learning_rate": 2.13979183524999e-06, "loss": 0.0204, "step": 209405 }, { "epoch": 0.8737722292228222, "grad_norm": 0.4604868646371494, "learning_rate": 2.139766289492546e-06, "loss": 0.0255, "step": 209410 }, { "epoch": 0.8737930919378124, "grad_norm": 0.5498796147191404, "learning_rate": 2.139740744650009e-06, "loss": 0.0245, "step": 209415 }, { "epoch": 0.8738139546528026, "grad_norm": 0.6176887801775519, "learning_rate": 2.139715200722325e-06, "loss": 0.0274, "step": 209420 }, { "epoch": 0.873834817367793, "grad_norm": 0.7310341854474397, "learning_rate": 2.1396896577094385e-06, "loss": 0.0199, "step": 209425 }, { "epoch": 0.8738556800827832, "grad_norm": 1.1199964725667468, "learning_rate": 2.1396641156112948e-06, "loss": 0.0351, "step": 209430 }, { "epoch": 0.8738765427977735, "grad_norm": 0.6034050967937055, "learning_rate": 2.13963857442784e-06, "loss": 0.0149, "step": 209435 }, { "epoch": 0.8738974055127638, "grad_norm": 0.5570478695016439, "learning_rate": 2.139613034159019e-06, "loss": 0.0197, "step": 209440 }, { "epoch": 0.8739182682277541, "grad_norm": 0.3278728873554828, "learning_rate": 2.1395874948047774e-06, "loss": 0.0291, "step": 209445 }, { "epoch": 0.8739391309427443, "grad_norm": 0.5877595413828152, "learning_rate": 2.1395619563650605e-06, "loss": 0.0199, "step": 209450 }, { "epoch": 0.8739599936577347, "grad_norm": 0.49149984818083436, "learning_rate": 2.139536418839814e-06, "loss": 0.0186, "step": 209455 }, { "epoch": 0.8739808563727249, "grad_norm": 0.5858666035037383, "learning_rate": 2.139510882228983e-06, "loss": 0.0172, "step": 209460 }, { "epoch": 0.8740017190877152, "grad_norm": 0.3242786522534076, "learning_rate": 2.139485346532513e-06, "loss": 0.0209, "step": 209465 }, { "epoch": 0.8740225818027054, "grad_norm": 0.6755035416219154, "learning_rate": 2.1394598117503497e-06, "loss": 0.0193, "step": 209470 }, { "epoch": 0.8740434445176958, "grad_norm": 0.9540183884504473, "learning_rate": 2.139434277882438e-06, "loss": 0.0199, "step": 209475 }, { "epoch": 0.874064307232686, "grad_norm": 0.4075498362334548, "learning_rate": 2.139408744928724e-06, "loss": 0.0185, "step": 209480 }, { "epoch": 0.8740851699476763, "grad_norm": 0.49168886530607464, "learning_rate": 2.1393832128891524e-06, "loss": 0.0378, "step": 209485 }, { "epoch": 0.8741060326626666, "grad_norm": 0.633690108385844, "learning_rate": 2.139357681763669e-06, "loss": 0.0144, "step": 209490 }, { "epoch": 0.8741268953776569, "grad_norm": 0.7604601427662597, "learning_rate": 2.13933215155222e-06, "loss": 0.0204, "step": 209495 }, { "epoch": 0.8741477580926471, "grad_norm": 0.4391691596987728, "learning_rate": 2.13930662225475e-06, "loss": 0.0165, "step": 209500 }, { "epoch": 0.8741686208076375, "grad_norm": 0.4448966023164347, "learning_rate": 2.1392810938712042e-06, "loss": 0.0243, "step": 209505 }, { "epoch": 0.8741894835226277, "grad_norm": 0.30247150665663025, "learning_rate": 2.139255566401529e-06, "loss": 0.0144, "step": 209510 }, { "epoch": 0.874210346237618, "grad_norm": 0.6801752847450113, "learning_rate": 2.1392300398456697e-06, "loss": 0.028, "step": 209515 }, { "epoch": 0.8742312089526083, "grad_norm": 0.7461822036799646, "learning_rate": 2.1392045142035707e-06, "loss": 0.0245, "step": 209520 }, { "epoch": 0.8742520716675986, "grad_norm": 0.6637249943290834, "learning_rate": 2.139178989475179e-06, "loss": 0.0232, "step": 209525 }, { "epoch": 0.8742729343825888, "grad_norm": 0.37114742677727125, "learning_rate": 2.1391534656604386e-06, "loss": 0.0201, "step": 209530 }, { "epoch": 0.874293797097579, "grad_norm": 0.5583764570528637, "learning_rate": 2.1391279427592963e-06, "loss": 0.0171, "step": 209535 }, { "epoch": 0.8743146598125694, "grad_norm": 0.6033372078587347, "learning_rate": 2.139102420771697e-06, "loss": 0.0241, "step": 209540 }, { "epoch": 0.8743355225275596, "grad_norm": 0.7411025772898551, "learning_rate": 2.139076899697587e-06, "loss": 0.0199, "step": 209545 }, { "epoch": 0.8743563852425499, "grad_norm": 0.505536419016526, "learning_rate": 2.13905137953691e-06, "loss": 0.0225, "step": 209550 }, { "epoch": 0.8743772479575402, "grad_norm": 0.3513114092549028, "learning_rate": 2.139025860289613e-06, "loss": 0.0218, "step": 209555 }, { "epoch": 0.8743981106725305, "grad_norm": 0.7141355377709813, "learning_rate": 2.139000341955641e-06, "loss": 0.0248, "step": 209560 }, { "epoch": 0.8744189733875207, "grad_norm": 0.5567935877574671, "learning_rate": 2.1389748245349393e-06, "loss": 0.0228, "step": 209565 }, { "epoch": 0.8744398361025111, "grad_norm": 0.40792836247370456, "learning_rate": 2.138949308027454e-06, "loss": 0.0135, "step": 209570 }, { "epoch": 0.8744606988175013, "grad_norm": 0.3366425353739573, "learning_rate": 2.1389237924331306e-06, "loss": 0.0202, "step": 209575 }, { "epoch": 0.8744815615324916, "grad_norm": 0.5634504242705779, "learning_rate": 2.1388982777519145e-06, "loss": 0.0224, "step": 209580 }, { "epoch": 0.8745024242474819, "grad_norm": 0.5152598296004735, "learning_rate": 2.1388727639837512e-06, "loss": 0.018, "step": 209585 }, { "epoch": 0.8745232869624722, "grad_norm": 0.5490902475314506, "learning_rate": 2.1388472511285857e-06, "loss": 0.0187, "step": 209590 }, { "epoch": 0.8745441496774624, "grad_norm": 1.0624155756387381, "learning_rate": 2.1388217391863646e-06, "loss": 0.0213, "step": 209595 }, { "epoch": 0.8745650123924527, "grad_norm": 0.7364202744249565, "learning_rate": 2.1387962281570328e-06, "loss": 0.0192, "step": 209600 }, { "epoch": 0.874585875107443, "grad_norm": 0.40909481169633005, "learning_rate": 2.1387707180405352e-06, "loss": 0.0702, "step": 209605 }, { "epoch": 0.8746067378224333, "grad_norm": 0.2841992364200383, "learning_rate": 2.1387452088368186e-06, "loss": 0.0116, "step": 209610 }, { "epoch": 0.8746276005374235, "grad_norm": 0.7470858021699623, "learning_rate": 2.1387197005458287e-06, "loss": 0.0199, "step": 209615 }, { "epoch": 0.8746484632524139, "grad_norm": 0.38199955081333303, "learning_rate": 2.13869419316751e-06, "loss": 0.0303, "step": 209620 }, { "epoch": 0.8746693259674041, "grad_norm": 1.3324071892565499, "learning_rate": 2.138668686701809e-06, "loss": 0.0223, "step": 209625 }, { "epoch": 0.8746901886823943, "grad_norm": 0.6759994196503887, "learning_rate": 2.1386431811486704e-06, "loss": 0.0228, "step": 209630 }, { "epoch": 0.8747110513973847, "grad_norm": 0.24723640942410968, "learning_rate": 2.13861767650804e-06, "loss": 0.0197, "step": 209635 }, { "epoch": 0.8747319141123749, "grad_norm": 0.9952958998459768, "learning_rate": 2.138592172779864e-06, "loss": 0.0202, "step": 209640 }, { "epoch": 0.8747527768273652, "grad_norm": 0.4955314871596515, "learning_rate": 2.1385666699640877e-06, "loss": 0.014, "step": 209645 }, { "epoch": 0.8747736395423554, "grad_norm": 0.7272174376198609, "learning_rate": 2.138541168060656e-06, "loss": 0.0237, "step": 209650 }, { "epoch": 0.8747945022573458, "grad_norm": 0.8478162590920754, "learning_rate": 2.1385156670695153e-06, "loss": 0.0229, "step": 209655 }, { "epoch": 0.874815364972336, "grad_norm": 0.6834191898398515, "learning_rate": 2.1384901669906114e-06, "loss": 0.0221, "step": 209660 }, { "epoch": 0.8748362276873263, "grad_norm": 0.2209841749214246, "learning_rate": 2.1384646678238892e-06, "loss": 0.0161, "step": 209665 }, { "epoch": 0.8748570904023166, "grad_norm": 0.5819840347785201, "learning_rate": 2.1384391695692945e-06, "loss": 0.0144, "step": 209670 }, { "epoch": 0.8748779531173069, "grad_norm": 0.30106456203346454, "learning_rate": 2.1384136722267734e-06, "loss": 0.0194, "step": 209675 }, { "epoch": 0.8748988158322971, "grad_norm": 0.730195392050399, "learning_rate": 2.1383881757962712e-06, "loss": 0.0277, "step": 209680 }, { "epoch": 0.8749196785472875, "grad_norm": 0.761219307619165, "learning_rate": 2.1383626802777335e-06, "loss": 0.02, "step": 209685 }, { "epoch": 0.8749405412622777, "grad_norm": 0.5167440295607388, "learning_rate": 2.138337185671106e-06, "loss": 0.0213, "step": 209690 }, { "epoch": 0.874961403977268, "grad_norm": 1.1829881923662044, "learning_rate": 2.1383116919763335e-06, "loss": 0.0249, "step": 209695 }, { "epoch": 0.8749822666922583, "grad_norm": 1.087671006703753, "learning_rate": 2.1382861991933636e-06, "loss": 0.0213, "step": 209700 }, { "epoch": 0.8750031294072486, "grad_norm": 0.7658673360947672, "learning_rate": 2.13826070732214e-06, "loss": 0.0211, "step": 209705 }, { "epoch": 0.8750239921222388, "grad_norm": 0.7048268641411198, "learning_rate": 2.1382352163626093e-06, "loss": 0.0218, "step": 209710 }, { "epoch": 0.875044854837229, "grad_norm": 0.28175733423238214, "learning_rate": 2.1382097263147177e-06, "loss": 0.024, "step": 209715 }, { "epoch": 0.8750657175522194, "grad_norm": 0.9952871706295028, "learning_rate": 2.1381842371784093e-06, "loss": 0.0177, "step": 209720 }, { "epoch": 0.8750865802672096, "grad_norm": 0.27164041807652595, "learning_rate": 2.1381587489536308e-06, "loss": 0.0152, "step": 209725 }, { "epoch": 0.8751074429821999, "grad_norm": 0.5999032656852357, "learning_rate": 2.1381332616403283e-06, "loss": 0.0226, "step": 209730 }, { "epoch": 0.8751283056971902, "grad_norm": 0.6976555027296348, "learning_rate": 2.138107775238446e-06, "loss": 0.0194, "step": 209735 }, { "epoch": 0.8751491684121805, "grad_norm": 0.6163567930635974, "learning_rate": 2.138082289747931e-06, "loss": 0.0198, "step": 209740 }, { "epoch": 0.8751700311271707, "grad_norm": 1.0158881823476769, "learning_rate": 2.138056805168728e-06, "loss": 0.017, "step": 209745 }, { "epoch": 0.8751908938421611, "grad_norm": 0.7431128369083496, "learning_rate": 2.1380313215007838e-06, "loss": 0.02, "step": 209750 }, { "epoch": 0.8752117565571513, "grad_norm": 0.48275719244705273, "learning_rate": 2.138005838744043e-06, "loss": 0.0173, "step": 209755 }, { "epoch": 0.8752326192721416, "grad_norm": 0.6759025764680444, "learning_rate": 2.137980356898452e-06, "loss": 0.0228, "step": 209760 }, { "epoch": 0.8752534819871319, "grad_norm": 0.8402252972468617, "learning_rate": 2.137954875963956e-06, "loss": 0.0207, "step": 209765 }, { "epoch": 0.8752743447021222, "grad_norm": 0.8590562270341436, "learning_rate": 2.137929395940501e-06, "loss": 0.0182, "step": 209770 }, { "epoch": 0.8752952074171124, "grad_norm": 0.7306433874119181, "learning_rate": 2.137903916828033e-06, "loss": 0.0185, "step": 209775 }, { "epoch": 0.8753160701321027, "grad_norm": 0.4827343188833521, "learning_rate": 2.1378784386264967e-06, "loss": 0.029, "step": 209780 }, { "epoch": 0.875336932847093, "grad_norm": 0.5486053431645199, "learning_rate": 2.137852961335839e-06, "loss": 0.0159, "step": 209785 }, { "epoch": 0.8753577955620833, "grad_norm": 0.5849187979511431, "learning_rate": 2.137827484956005e-06, "loss": 0.0226, "step": 209790 }, { "epoch": 0.8753786582770735, "grad_norm": 0.9098531477667359, "learning_rate": 2.1378020094869403e-06, "loss": 0.0253, "step": 209795 }, { "epoch": 0.8753995209920639, "grad_norm": 0.6041416856002914, "learning_rate": 2.1377765349285907e-06, "loss": 0.0177, "step": 209800 }, { "epoch": 0.8754203837070541, "grad_norm": 0.7870220428598645, "learning_rate": 2.1377510612809027e-06, "loss": 0.0256, "step": 209805 }, { "epoch": 0.8754412464220444, "grad_norm": 0.8569932293228791, "learning_rate": 2.1377255885438214e-06, "loss": 0.0184, "step": 209810 }, { "epoch": 0.8754621091370347, "grad_norm": 0.541770847122891, "learning_rate": 2.1377001167172928e-06, "loss": 0.0176, "step": 209815 }, { "epoch": 0.875482971852025, "grad_norm": 0.38188996377014467, "learning_rate": 2.137674645801262e-06, "loss": 0.0169, "step": 209820 }, { "epoch": 0.8755038345670152, "grad_norm": 0.6343770428328659, "learning_rate": 2.1376491757956755e-06, "loss": 0.0182, "step": 209825 }, { "epoch": 0.8755246972820054, "grad_norm": 0.9486434414381332, "learning_rate": 2.1376237067004784e-06, "loss": 0.0252, "step": 209830 }, { "epoch": 0.8755455599969958, "grad_norm": 0.3236250624928083, "learning_rate": 2.1375982385156176e-06, "loss": 0.0218, "step": 209835 }, { "epoch": 0.875566422711986, "grad_norm": 0.7819721405105284, "learning_rate": 2.1375727712410377e-06, "loss": 0.0173, "step": 209840 }, { "epoch": 0.8755872854269763, "grad_norm": 0.5208607186836555, "learning_rate": 2.137547304876685e-06, "loss": 0.0288, "step": 209845 }, { "epoch": 0.8756081481419666, "grad_norm": 0.47375711618547983, "learning_rate": 2.137521839422505e-06, "loss": 0.0179, "step": 209850 }, { "epoch": 0.8756290108569569, "grad_norm": 0.5877359259881851, "learning_rate": 2.137496374878444e-06, "loss": 0.0228, "step": 209855 }, { "epoch": 0.8756498735719471, "grad_norm": 0.6034950057404441, "learning_rate": 2.137470911244447e-06, "loss": 0.0252, "step": 209860 }, { "epoch": 0.8756707362869375, "grad_norm": 0.6405134418792446, "learning_rate": 2.13744544852046e-06, "loss": 0.0238, "step": 209865 }, { "epoch": 0.8756915990019277, "grad_norm": 0.3929061378920579, "learning_rate": 2.13741998670643e-06, "loss": 0.0173, "step": 209870 }, { "epoch": 0.875712461716918, "grad_norm": 0.8314280364204966, "learning_rate": 2.137394525802301e-06, "loss": 0.0198, "step": 209875 }, { "epoch": 0.8757333244319083, "grad_norm": 0.46773791756612804, "learning_rate": 2.1373690658080203e-06, "loss": 0.0243, "step": 209880 }, { "epoch": 0.8757541871468986, "grad_norm": 0.943240583510442, "learning_rate": 2.137343606723533e-06, "loss": 0.0193, "step": 209885 }, { "epoch": 0.8757750498618888, "grad_norm": 0.6637383331459246, "learning_rate": 2.1373181485487846e-06, "loss": 0.0181, "step": 209890 }, { "epoch": 0.875795912576879, "grad_norm": 0.4298342122314936, "learning_rate": 2.1372926912837214e-06, "loss": 0.0174, "step": 209895 }, { "epoch": 0.8758167752918694, "grad_norm": 0.29307256540557314, "learning_rate": 2.1372672349282895e-06, "loss": 0.021, "step": 209900 }, { "epoch": 0.8758376380068597, "grad_norm": 0.20995671658865467, "learning_rate": 2.1372417794824345e-06, "loss": 0.0153, "step": 209905 }, { "epoch": 0.8758585007218499, "grad_norm": 0.4083591434236562, "learning_rate": 2.1372163249461015e-06, "loss": 0.0156, "step": 209910 }, { "epoch": 0.8758793634368403, "grad_norm": 1.0850388075688626, "learning_rate": 2.137190871319237e-06, "loss": 0.023, "step": 209915 }, { "epoch": 0.8759002261518305, "grad_norm": 0.359861126197101, "learning_rate": 2.1371654186017874e-06, "loss": 0.0238, "step": 209920 }, { "epoch": 0.8759210888668207, "grad_norm": 0.6903462890573395, "learning_rate": 2.137139966793697e-06, "loss": 0.0197, "step": 209925 }, { "epoch": 0.8759419515818111, "grad_norm": 1.0657738847142948, "learning_rate": 2.1371145158949137e-06, "loss": 0.0227, "step": 209930 }, { "epoch": 0.8759628142968013, "grad_norm": 0.7655531185663822, "learning_rate": 2.1370890659053815e-06, "loss": 0.0225, "step": 209935 }, { "epoch": 0.8759836770117916, "grad_norm": 0.7893898307626658, "learning_rate": 2.1370636168250473e-06, "loss": 0.0224, "step": 209940 }, { "epoch": 0.8760045397267819, "grad_norm": 0.6747800636435719, "learning_rate": 2.1370381686538565e-06, "loss": 0.0291, "step": 209945 }, { "epoch": 0.8760254024417722, "grad_norm": 1.0191537541835438, "learning_rate": 2.137012721391755e-06, "loss": 0.017, "step": 209950 }, { "epoch": 0.8760462651567624, "grad_norm": 0.5432586074017313, "learning_rate": 2.1369872750386892e-06, "loss": 0.0162, "step": 209955 }, { "epoch": 0.8760671278717527, "grad_norm": 0.31140311190891257, "learning_rate": 2.1369618295946044e-06, "loss": 0.021, "step": 209960 }, { "epoch": 0.876087990586743, "grad_norm": 0.6869714459722837, "learning_rate": 2.1369363850594465e-06, "loss": 0.0232, "step": 209965 }, { "epoch": 0.8761088533017333, "grad_norm": 0.758053725314095, "learning_rate": 2.136910941433162e-06, "loss": 0.0127, "step": 209970 }, { "epoch": 0.8761297160167235, "grad_norm": 0.632890503334886, "learning_rate": 2.1368854987156966e-06, "loss": 0.0216, "step": 209975 }, { "epoch": 0.8761505787317139, "grad_norm": 0.6815695482242956, "learning_rate": 2.1368600569069954e-06, "loss": 0.0208, "step": 209980 }, { "epoch": 0.8761714414467041, "grad_norm": 0.4413514304341732, "learning_rate": 2.1368346160070048e-06, "loss": 0.0178, "step": 209985 }, { "epoch": 0.8761923041616944, "grad_norm": 0.3607782175560112, "learning_rate": 2.136809176015671e-06, "loss": 0.0147, "step": 209990 }, { "epoch": 0.8762131668766847, "grad_norm": 0.3574345378992933, "learning_rate": 2.1367837369329398e-06, "loss": 0.0242, "step": 209995 }, { "epoch": 0.876234029591675, "grad_norm": 0.6864809525226723, "learning_rate": 2.1367582987587566e-06, "loss": 0.0205, "step": 210000 }, { "epoch": 0.8762548923066652, "grad_norm": 0.5732434426868135, "learning_rate": 2.1367328614930684e-06, "loss": 0.0172, "step": 210005 }, { "epoch": 0.8762757550216554, "grad_norm": 0.9281693057493365, "learning_rate": 2.1367074251358196e-06, "loss": 0.0268, "step": 210010 }, { "epoch": 0.8762966177366458, "grad_norm": 0.3791996552245098, "learning_rate": 2.1366819896869577e-06, "loss": 0.0219, "step": 210015 }, { "epoch": 0.876317480451636, "grad_norm": 0.7604060607114035, "learning_rate": 2.1366565551464273e-06, "loss": 0.015, "step": 210020 }, { "epoch": 0.8763383431666263, "grad_norm": 0.4310485759002038, "learning_rate": 2.1366311215141754e-06, "loss": 0.0175, "step": 210025 }, { "epoch": 0.8763592058816166, "grad_norm": 0.5405126597985368, "learning_rate": 2.136605688790148e-06, "loss": 0.0253, "step": 210030 }, { "epoch": 0.8763800685966069, "grad_norm": 0.5616623921907592, "learning_rate": 2.1365802569742894e-06, "loss": 0.0182, "step": 210035 }, { "epoch": 0.8764009313115971, "grad_norm": 0.5474695472992117, "learning_rate": 2.1365548260665474e-06, "loss": 0.0292, "step": 210040 }, { "epoch": 0.8764217940265875, "grad_norm": 1.2185381577201122, "learning_rate": 2.1365293960668674e-06, "loss": 0.0231, "step": 210045 }, { "epoch": 0.8764426567415777, "grad_norm": 0.8053903500267875, "learning_rate": 2.1365039669751945e-06, "loss": 0.0229, "step": 210050 }, { "epoch": 0.876463519456568, "grad_norm": 0.5894399581970768, "learning_rate": 2.1364785387914757e-06, "loss": 0.0287, "step": 210055 }, { "epoch": 0.8764843821715583, "grad_norm": 0.7508070751241098, "learning_rate": 2.1364531115156568e-06, "loss": 0.0193, "step": 210060 }, { "epoch": 0.8765052448865486, "grad_norm": 0.6889560218501785, "learning_rate": 2.1364276851476835e-06, "loss": 0.0165, "step": 210065 }, { "epoch": 0.8765261076015388, "grad_norm": 0.6714504326105136, "learning_rate": 2.1364022596875017e-06, "loss": 0.0279, "step": 210070 }, { "epoch": 0.8765469703165291, "grad_norm": 0.8359050450041917, "learning_rate": 2.136376835135058e-06, "loss": 0.0187, "step": 210075 }, { "epoch": 0.8765678330315194, "grad_norm": 0.5451583861573865, "learning_rate": 2.1363514114902974e-06, "loss": 0.0161, "step": 210080 }, { "epoch": 0.8765886957465097, "grad_norm": 0.6758902130359358, "learning_rate": 2.1363259887531673e-06, "loss": 0.0185, "step": 210085 }, { "epoch": 0.8766095584614999, "grad_norm": 0.443421441350935, "learning_rate": 2.1363005669236127e-06, "loss": 0.0129, "step": 210090 }, { "epoch": 0.8766304211764903, "grad_norm": 0.5712932246939455, "learning_rate": 2.136275146001579e-06, "loss": 0.0268, "step": 210095 }, { "epoch": 0.8766512838914805, "grad_norm": 1.0016677942922272, "learning_rate": 2.1362497259870134e-06, "loss": 0.0205, "step": 210100 }, { "epoch": 0.8766721466064707, "grad_norm": 0.49300345599818235, "learning_rate": 2.1362243068798617e-06, "loss": 0.0167, "step": 210105 }, { "epoch": 0.8766930093214611, "grad_norm": 0.8541182872834306, "learning_rate": 2.1361988886800694e-06, "loss": 0.0231, "step": 210110 }, { "epoch": 0.8767138720364513, "grad_norm": 0.8667579040461642, "learning_rate": 2.136173471387583e-06, "loss": 0.0192, "step": 210115 }, { "epoch": 0.8767347347514416, "grad_norm": 1.2584719616331315, "learning_rate": 2.1361480550023485e-06, "loss": 0.0172, "step": 210120 }, { "epoch": 0.876755597466432, "grad_norm": 1.1849461374758499, "learning_rate": 2.1361226395243115e-06, "loss": 0.0162, "step": 210125 }, { "epoch": 0.8767764601814222, "grad_norm": 0.765863491543521, "learning_rate": 2.136097224953418e-06, "loss": 0.02, "step": 210130 }, { "epoch": 0.8767973228964124, "grad_norm": 0.7621868372987065, "learning_rate": 2.1360718112896146e-06, "loss": 0.0235, "step": 210135 }, { "epoch": 0.8768181856114027, "grad_norm": 0.7664812983734749, "learning_rate": 2.1360463985328468e-06, "loss": 0.0184, "step": 210140 }, { "epoch": 0.876839048326393, "grad_norm": 0.7570642199109859, "learning_rate": 2.136020986683062e-06, "loss": 0.0188, "step": 210145 }, { "epoch": 0.8768599110413833, "grad_norm": 0.7287075653485475, "learning_rate": 2.135995575740204e-06, "loss": 0.0171, "step": 210150 }, { "epoch": 0.8768807737563735, "grad_norm": 0.6086731901235631, "learning_rate": 2.1359701657042207e-06, "loss": 0.0222, "step": 210155 }, { "epoch": 0.8769016364713639, "grad_norm": 0.7976507399016673, "learning_rate": 2.1359447565750573e-06, "loss": 0.0227, "step": 210160 }, { "epoch": 0.8769224991863541, "grad_norm": 1.4724747330656778, "learning_rate": 2.13591934835266e-06, "loss": 0.0209, "step": 210165 }, { "epoch": 0.8769433619013444, "grad_norm": 0.8049454586596794, "learning_rate": 2.1358939410369748e-06, "loss": 0.0222, "step": 210170 }, { "epoch": 0.8769642246163347, "grad_norm": 0.5602080419267396, "learning_rate": 2.135868534627948e-06, "loss": 0.0222, "step": 210175 }, { "epoch": 0.876985087331325, "grad_norm": 0.3928893122410305, "learning_rate": 2.1358431291255256e-06, "loss": 0.0214, "step": 210180 }, { "epoch": 0.8770059500463152, "grad_norm": 0.4342634936199089, "learning_rate": 2.135817724529654e-06, "loss": 0.0149, "step": 210185 }, { "epoch": 0.8770268127613055, "grad_norm": 0.9490851784692744, "learning_rate": 2.135792320840278e-06, "loss": 0.0224, "step": 210190 }, { "epoch": 0.8770476754762958, "grad_norm": 0.5133816078528115, "learning_rate": 2.135766918057346e-06, "loss": 0.0218, "step": 210195 }, { "epoch": 0.877068538191286, "grad_norm": 4.392406367679562, "learning_rate": 2.1357415161808017e-06, "loss": 0.0184, "step": 210200 }, { "epoch": 0.8770894009062763, "grad_norm": 0.4898331931061652, "learning_rate": 2.1357161152105926e-06, "loss": 0.0179, "step": 210205 }, { "epoch": 0.8771102636212667, "grad_norm": 0.6281842224029902, "learning_rate": 2.1356907151466642e-06, "loss": 0.0165, "step": 210210 }, { "epoch": 0.8771311263362569, "grad_norm": 0.6722832946573442, "learning_rate": 2.135665315988963e-06, "loss": 0.0175, "step": 210215 }, { "epoch": 0.8771519890512471, "grad_norm": 0.47436793422115653, "learning_rate": 2.1356399177374347e-06, "loss": 0.0231, "step": 210220 }, { "epoch": 0.8771728517662375, "grad_norm": 0.31005874144203405, "learning_rate": 2.135614520392026e-06, "loss": 0.0266, "step": 210225 }, { "epoch": 0.8771937144812277, "grad_norm": 0.4422444052540408, "learning_rate": 2.1355891239526825e-06, "loss": 0.0236, "step": 210230 }, { "epoch": 0.877214577196218, "grad_norm": 0.45394240312488215, "learning_rate": 2.135563728419351e-06, "loss": 0.0151, "step": 210235 }, { "epoch": 0.8772354399112083, "grad_norm": 0.646683498583651, "learning_rate": 2.135538333791977e-06, "loss": 0.0205, "step": 210240 }, { "epoch": 0.8772563026261986, "grad_norm": 0.289272727488867, "learning_rate": 2.1355129400705062e-06, "loss": 0.0201, "step": 210245 }, { "epoch": 0.8772771653411888, "grad_norm": 1.0396198338066034, "learning_rate": 2.1354875472548857e-06, "loss": 0.0241, "step": 210250 }, { "epoch": 0.8772980280561791, "grad_norm": 0.5781686704726833, "learning_rate": 2.1354621553450615e-06, "loss": 0.0167, "step": 210255 }, { "epoch": 0.8773188907711694, "grad_norm": 0.6795589503935442, "learning_rate": 2.1354367643409793e-06, "loss": 0.0212, "step": 210260 }, { "epoch": 0.8773397534861597, "grad_norm": 0.6458900275992381, "learning_rate": 2.135411374242586e-06, "loss": 0.0206, "step": 210265 }, { "epoch": 0.8773606162011499, "grad_norm": 0.6031079664068052, "learning_rate": 2.135385985049826e-06, "loss": 0.0235, "step": 210270 }, { "epoch": 0.8773814789161403, "grad_norm": 0.2356879197252112, "learning_rate": 2.135360596762648e-06, "loss": 0.0156, "step": 210275 }, { "epoch": 0.8774023416311305, "grad_norm": 0.8285263096337974, "learning_rate": 2.135335209380996e-06, "loss": 0.0187, "step": 210280 }, { "epoch": 0.8774232043461208, "grad_norm": 1.0847017900593896, "learning_rate": 2.1353098229048175e-06, "loss": 0.0206, "step": 210285 }, { "epoch": 0.8774440670611111, "grad_norm": 0.6692831618005038, "learning_rate": 2.1352844373340582e-06, "loss": 0.0192, "step": 210290 }, { "epoch": 0.8774649297761014, "grad_norm": 0.5098487273552963, "learning_rate": 2.1352590526686646e-06, "loss": 0.0195, "step": 210295 }, { "epoch": 0.8774857924910916, "grad_norm": 0.5810113103217165, "learning_rate": 2.1352336689085822e-06, "loss": 0.017, "step": 210300 }, { "epoch": 0.877506655206082, "grad_norm": 0.3783537625191, "learning_rate": 2.1352082860537575e-06, "loss": 0.0148, "step": 210305 }, { "epoch": 0.8775275179210722, "grad_norm": 0.6306387153408521, "learning_rate": 2.1351829041041365e-06, "loss": 0.0206, "step": 210310 }, { "epoch": 0.8775483806360624, "grad_norm": 0.7698999041112674, "learning_rate": 2.1351575230596664e-06, "loss": 0.0226, "step": 210315 }, { "epoch": 0.8775692433510527, "grad_norm": 0.7447486344911378, "learning_rate": 2.135132142920292e-06, "loss": 0.0264, "step": 210320 }, { "epoch": 0.877590106066043, "grad_norm": 0.908188508939156, "learning_rate": 2.1351067636859605e-06, "loss": 0.0266, "step": 210325 }, { "epoch": 0.8776109687810333, "grad_norm": 0.5130389523691666, "learning_rate": 2.135081385356618e-06, "loss": 0.0167, "step": 210330 }, { "epoch": 0.8776318314960235, "grad_norm": 0.7410927189993818, "learning_rate": 2.135056007932211e-06, "loss": 0.0271, "step": 210335 }, { "epoch": 0.8776526942110139, "grad_norm": 1.3208668881137617, "learning_rate": 2.135030631412684e-06, "loss": 0.0302, "step": 210340 }, { "epoch": 0.8776735569260041, "grad_norm": 0.7338552091096194, "learning_rate": 2.135005255797985e-06, "loss": 0.0258, "step": 210345 }, { "epoch": 0.8776944196409944, "grad_norm": 0.8911563876148458, "learning_rate": 2.1349798810880597e-06, "loss": 0.0191, "step": 210350 }, { "epoch": 0.8777152823559847, "grad_norm": 1.325495958877743, "learning_rate": 2.1349545072828545e-06, "loss": 0.0286, "step": 210355 }, { "epoch": 0.877736145070975, "grad_norm": 0.549979968712517, "learning_rate": 2.134929134382315e-06, "loss": 0.0175, "step": 210360 }, { "epoch": 0.8777570077859652, "grad_norm": 0.9802491645976606, "learning_rate": 2.134903762386389e-06, "loss": 0.0273, "step": 210365 }, { "epoch": 0.8777778705009555, "grad_norm": 0.6565470656549437, "learning_rate": 2.1348783912950205e-06, "loss": 0.0163, "step": 210370 }, { "epoch": 0.8777987332159458, "grad_norm": 0.7953346547107532, "learning_rate": 2.134853021108157e-06, "loss": 0.0173, "step": 210375 }, { "epoch": 0.8778195959309361, "grad_norm": 1.0017072293124922, "learning_rate": 2.1348276518257452e-06, "loss": 0.0253, "step": 210380 }, { "epoch": 0.8778404586459263, "grad_norm": 0.7971832068867369, "learning_rate": 2.1348022834477304e-06, "loss": 0.0237, "step": 210385 }, { "epoch": 0.8778613213609167, "grad_norm": 0.5707748178443792, "learning_rate": 2.1347769159740596e-06, "loss": 0.0207, "step": 210390 }, { "epoch": 0.8778821840759069, "grad_norm": 0.5690029128680488, "learning_rate": 2.1347515494046787e-06, "loss": 0.0223, "step": 210395 }, { "epoch": 0.8779030467908971, "grad_norm": 0.6136230444142181, "learning_rate": 2.1347261837395337e-06, "loss": 0.0181, "step": 210400 }, { "epoch": 0.8779239095058875, "grad_norm": 1.0512576766321418, "learning_rate": 2.1347008189785715e-06, "loss": 0.0243, "step": 210405 }, { "epoch": 0.8779447722208777, "grad_norm": 0.9563188998916324, "learning_rate": 2.134675455121738e-06, "loss": 0.0173, "step": 210410 }, { "epoch": 0.877965634935868, "grad_norm": 0.4322583466611193, "learning_rate": 2.13465009216898e-06, "loss": 0.0196, "step": 210415 }, { "epoch": 0.8779864976508583, "grad_norm": 0.541107269112873, "learning_rate": 2.134624730120243e-06, "loss": 0.0229, "step": 210420 }, { "epoch": 0.8780073603658486, "grad_norm": 1.716547534683179, "learning_rate": 2.1345993689754737e-06, "loss": 0.0231, "step": 210425 }, { "epoch": 0.8780282230808388, "grad_norm": 0.7938230669380021, "learning_rate": 2.1345740087346186e-06, "loss": 0.0222, "step": 210430 }, { "epoch": 0.8780490857958291, "grad_norm": 0.5889348178369342, "learning_rate": 2.1345486493976238e-06, "loss": 0.0213, "step": 210435 }, { "epoch": 0.8780699485108194, "grad_norm": 0.5244514171619272, "learning_rate": 2.134523290964435e-06, "loss": 0.022, "step": 210440 }, { "epoch": 0.8780908112258097, "grad_norm": 1.005278453267544, "learning_rate": 2.1344979334349998e-06, "loss": 0.0245, "step": 210445 }, { "epoch": 0.8781116739407999, "grad_norm": 0.42046847546563415, "learning_rate": 2.134472576809264e-06, "loss": 0.0272, "step": 210450 }, { "epoch": 0.8781325366557903, "grad_norm": 0.5112640751547243, "learning_rate": 2.134447221087173e-06, "loss": 0.0238, "step": 210455 }, { "epoch": 0.8781533993707805, "grad_norm": 0.4805015769781332, "learning_rate": 2.1344218662686744e-06, "loss": 0.0259, "step": 210460 }, { "epoch": 0.8781742620857708, "grad_norm": 0.7047358116009822, "learning_rate": 2.1343965123537137e-06, "loss": 0.0209, "step": 210465 }, { "epoch": 0.8781951248007611, "grad_norm": 1.178095074455382, "learning_rate": 2.134371159342238e-06, "loss": 0.0191, "step": 210470 }, { "epoch": 0.8782159875157514, "grad_norm": 0.8239985332748777, "learning_rate": 2.1343458072341926e-06, "loss": 0.021, "step": 210475 }, { "epoch": 0.8782368502307416, "grad_norm": 0.6035410763180408, "learning_rate": 2.134320456029525e-06, "loss": 0.02, "step": 210480 }, { "epoch": 0.878257712945732, "grad_norm": 1.013107214705815, "learning_rate": 2.134295105728181e-06, "loss": 0.0209, "step": 210485 }, { "epoch": 0.8782785756607222, "grad_norm": 0.40884290564892495, "learning_rate": 2.1342697563301066e-06, "loss": 0.0174, "step": 210490 }, { "epoch": 0.8782994383757124, "grad_norm": 0.5908042890819222, "learning_rate": 2.1342444078352483e-06, "loss": 0.0195, "step": 210495 }, { "epoch": 0.8783203010907027, "grad_norm": 0.9479238472462479, "learning_rate": 2.1342190602435532e-06, "loss": 0.0222, "step": 210500 }, { "epoch": 0.878341163805693, "grad_norm": 0.7975168583444409, "learning_rate": 2.134193713554967e-06, "loss": 0.0229, "step": 210505 }, { "epoch": 0.8783620265206833, "grad_norm": 2.894075504342554, "learning_rate": 2.1341683677694362e-06, "loss": 0.0173, "step": 210510 }, { "epoch": 0.8783828892356735, "grad_norm": 0.7803781463275893, "learning_rate": 2.1341430228869075e-06, "loss": 0.0194, "step": 210515 }, { "epoch": 0.8784037519506639, "grad_norm": 0.34591738466764044, "learning_rate": 2.1341176789073264e-06, "loss": 0.0223, "step": 210520 }, { "epoch": 0.8784246146656541, "grad_norm": 0.5095982704006253, "learning_rate": 2.1340923358306398e-06, "loss": 0.0145, "step": 210525 }, { "epoch": 0.8784454773806444, "grad_norm": 0.8970212159843085, "learning_rate": 2.134066993656795e-06, "loss": 0.026, "step": 210530 }, { "epoch": 0.8784663400956347, "grad_norm": 0.5334057471952184, "learning_rate": 2.134041652385737e-06, "loss": 0.0232, "step": 210535 }, { "epoch": 0.878487202810625, "grad_norm": 0.5695093558612715, "learning_rate": 2.1340163120174126e-06, "loss": 0.0218, "step": 210540 }, { "epoch": 0.8785080655256152, "grad_norm": 0.5021042899944109, "learning_rate": 2.1339909725517684e-06, "loss": 0.0194, "step": 210545 }, { "epoch": 0.8785289282406055, "grad_norm": 0.9654153263612844, "learning_rate": 2.133965633988751e-06, "loss": 0.0248, "step": 210550 }, { "epoch": 0.8785497909555958, "grad_norm": 0.5075271983679633, "learning_rate": 2.1339402963283067e-06, "loss": 0.0222, "step": 210555 }, { "epoch": 0.8785706536705861, "grad_norm": 0.5956764578646976, "learning_rate": 2.133914959570381e-06, "loss": 0.0166, "step": 210560 }, { "epoch": 0.8785915163855763, "grad_norm": 0.4207555476444407, "learning_rate": 2.1338896237149222e-06, "loss": 0.022, "step": 210565 }, { "epoch": 0.8786123791005667, "grad_norm": 0.1884801249814088, "learning_rate": 2.1338642887618753e-06, "loss": 0.0178, "step": 210570 }, { "epoch": 0.8786332418155569, "grad_norm": 0.852989126076552, "learning_rate": 2.1338389547111865e-06, "loss": 0.0264, "step": 210575 }, { "epoch": 0.8786541045305472, "grad_norm": 0.5644996307879245, "learning_rate": 2.1338136215628034e-06, "loss": 0.0189, "step": 210580 }, { "epoch": 0.8786749672455375, "grad_norm": 0.8538616591253331, "learning_rate": 2.1337882893166713e-06, "loss": 0.0223, "step": 210585 }, { "epoch": 0.8786958299605278, "grad_norm": 0.42251030286464875, "learning_rate": 2.1337629579727377e-06, "loss": 0.0157, "step": 210590 }, { "epoch": 0.878716692675518, "grad_norm": 0.6654622516149703, "learning_rate": 2.1337376275309484e-06, "loss": 0.017, "step": 210595 }, { "epoch": 0.8787375553905084, "grad_norm": 0.6873555837025834, "learning_rate": 2.13371229799125e-06, "loss": 0.0215, "step": 210600 }, { "epoch": 0.8787584181054986, "grad_norm": 0.45820121834148714, "learning_rate": 2.1336869693535893e-06, "loss": 0.0286, "step": 210605 }, { "epoch": 0.8787792808204888, "grad_norm": 0.4391682234353007, "learning_rate": 2.133661641617912e-06, "loss": 0.02, "step": 210610 }, { "epoch": 0.8788001435354791, "grad_norm": 0.5871710254294413, "learning_rate": 2.133636314784165e-06, "loss": 0.0232, "step": 210615 }, { "epoch": 0.8788210062504694, "grad_norm": 0.9299502147860589, "learning_rate": 2.1336109888522945e-06, "loss": 0.0262, "step": 210620 }, { "epoch": 0.8788418689654597, "grad_norm": 0.9553185861109299, "learning_rate": 2.133585663822248e-06, "loss": 0.0188, "step": 210625 }, { "epoch": 0.8788627316804499, "grad_norm": 0.7171211419840339, "learning_rate": 2.1335603396939707e-06, "loss": 0.0177, "step": 210630 }, { "epoch": 0.8788835943954403, "grad_norm": 0.2142262355731602, "learning_rate": 2.1335350164674097e-06, "loss": 0.0141, "step": 210635 }, { "epoch": 0.8789044571104305, "grad_norm": 0.6333083120225232, "learning_rate": 2.1335096941425114e-06, "loss": 0.0166, "step": 210640 }, { "epoch": 0.8789253198254208, "grad_norm": 1.0178770691860548, "learning_rate": 2.133484372719222e-06, "loss": 0.0227, "step": 210645 }, { "epoch": 0.8789461825404111, "grad_norm": 0.5646592805324465, "learning_rate": 2.1334590521974884e-06, "loss": 0.0217, "step": 210650 }, { "epoch": 0.8789670452554014, "grad_norm": 0.8736453142724376, "learning_rate": 2.1334337325772573e-06, "loss": 0.0239, "step": 210655 }, { "epoch": 0.8789879079703916, "grad_norm": 0.7852465226842087, "learning_rate": 2.1334084138584748e-06, "loss": 0.0233, "step": 210660 }, { "epoch": 0.879008770685382, "grad_norm": 0.4076108888266736, "learning_rate": 2.1333830960410873e-06, "loss": 0.0171, "step": 210665 }, { "epoch": 0.8790296334003722, "grad_norm": 0.5656123232195186, "learning_rate": 2.1333577791250416e-06, "loss": 0.0166, "step": 210670 }, { "epoch": 0.8790504961153625, "grad_norm": 0.8411224427363937, "learning_rate": 2.133332463110284e-06, "loss": 0.0185, "step": 210675 }, { "epoch": 0.8790713588303527, "grad_norm": 0.36192061244091017, "learning_rate": 2.1333071479967614e-06, "loss": 0.0151, "step": 210680 }, { "epoch": 0.879092221545343, "grad_norm": 0.8532045863512017, "learning_rate": 2.13328183378442e-06, "loss": 0.023, "step": 210685 }, { "epoch": 0.8791130842603333, "grad_norm": 0.7953920589963035, "learning_rate": 2.1332565204732066e-06, "loss": 0.0207, "step": 210690 }, { "epoch": 0.8791339469753235, "grad_norm": 0.925479064565756, "learning_rate": 2.133231208063067e-06, "loss": 0.0225, "step": 210695 }, { "epoch": 0.8791548096903139, "grad_norm": 0.6022897256507729, "learning_rate": 2.1332058965539485e-06, "loss": 0.0276, "step": 210700 }, { "epoch": 0.8791756724053041, "grad_norm": 0.7427792092051754, "learning_rate": 2.1331805859457978e-06, "loss": 0.0204, "step": 210705 }, { "epoch": 0.8791965351202944, "grad_norm": 0.7023820900367327, "learning_rate": 2.1331552762385606e-06, "loss": 0.0209, "step": 210710 }, { "epoch": 0.8792173978352847, "grad_norm": 0.42320718258419265, "learning_rate": 2.133129967432184e-06, "loss": 0.0224, "step": 210715 }, { "epoch": 0.879238260550275, "grad_norm": 0.40528976875748246, "learning_rate": 2.133104659526615e-06, "loss": 0.0154, "step": 210720 }, { "epoch": 0.8792591232652652, "grad_norm": 0.5379044949135147, "learning_rate": 2.1330793525217994e-06, "loss": 0.0209, "step": 210725 }, { "epoch": 0.8792799859802555, "grad_norm": 0.7849616840761109, "learning_rate": 2.1330540464176846e-06, "loss": 0.0227, "step": 210730 }, { "epoch": 0.8793008486952458, "grad_norm": 0.5442034227526741, "learning_rate": 2.1330287412142156e-06, "loss": 0.0201, "step": 210735 }, { "epoch": 0.8793217114102361, "grad_norm": 1.1316290849630048, "learning_rate": 2.1330034369113405e-06, "loss": 0.0256, "step": 210740 }, { "epoch": 0.8793425741252263, "grad_norm": 0.6600275862645721, "learning_rate": 2.1329781335090053e-06, "loss": 0.0253, "step": 210745 }, { "epoch": 0.8793634368402167, "grad_norm": 0.8937763491929012, "learning_rate": 2.1329528310071564e-06, "loss": 0.0195, "step": 210750 }, { "epoch": 0.8793842995552069, "grad_norm": 0.7881893096855557, "learning_rate": 2.1329275294057412e-06, "loss": 0.019, "step": 210755 }, { "epoch": 0.8794051622701972, "grad_norm": 1.0803173501743983, "learning_rate": 2.1329022287047056e-06, "loss": 0.0222, "step": 210760 }, { "epoch": 0.8794260249851875, "grad_norm": 0.37589203218683587, "learning_rate": 2.1328769289039964e-06, "loss": 0.0187, "step": 210765 }, { "epoch": 0.8794468877001778, "grad_norm": 0.9271236603576027, "learning_rate": 2.13285163000356e-06, "loss": 0.0224, "step": 210770 }, { "epoch": 0.879467750415168, "grad_norm": 0.7837830742369123, "learning_rate": 2.132826332003343e-06, "loss": 0.0199, "step": 210775 }, { "epoch": 0.8794886131301584, "grad_norm": 0.9441784273501402, "learning_rate": 2.1328010349032927e-06, "loss": 0.0252, "step": 210780 }, { "epoch": 0.8795094758451486, "grad_norm": 0.28594873823168415, "learning_rate": 2.1327757387033546e-06, "loss": 0.0178, "step": 210785 }, { "epoch": 0.8795303385601388, "grad_norm": 0.47334292336189376, "learning_rate": 2.132750443403476e-06, "loss": 0.014, "step": 210790 }, { "epoch": 0.8795512012751291, "grad_norm": 0.9427764181389422, "learning_rate": 2.132725149003604e-06, "loss": 0.0226, "step": 210795 }, { "epoch": 0.8795720639901194, "grad_norm": 0.6625357120994028, "learning_rate": 2.1326998555036843e-06, "loss": 0.021, "step": 210800 }, { "epoch": 0.8795929267051097, "grad_norm": 0.5560591125996888, "learning_rate": 2.1326745629036636e-06, "loss": 0.019, "step": 210805 }, { "epoch": 0.8796137894200999, "grad_norm": 0.5514963126173009, "learning_rate": 2.1326492712034897e-06, "loss": 0.0153, "step": 210810 }, { "epoch": 0.8796346521350903, "grad_norm": 0.6558600172528849, "learning_rate": 2.132623980403108e-06, "loss": 0.0264, "step": 210815 }, { "epoch": 0.8796555148500805, "grad_norm": 0.4863213995716331, "learning_rate": 2.132598690502465e-06, "loss": 0.0194, "step": 210820 }, { "epoch": 0.8796763775650708, "grad_norm": 0.24857013529040572, "learning_rate": 2.132573401501508e-06, "loss": 0.0171, "step": 210825 }, { "epoch": 0.8796972402800611, "grad_norm": 0.9767225530707464, "learning_rate": 2.132548113400184e-06, "loss": 0.0198, "step": 210830 }, { "epoch": 0.8797181029950514, "grad_norm": 0.5714136839101867, "learning_rate": 2.132522826198439e-06, "loss": 0.0165, "step": 210835 }, { "epoch": 0.8797389657100416, "grad_norm": 0.5242214380516399, "learning_rate": 2.13249753989622e-06, "loss": 0.015, "step": 210840 }, { "epoch": 0.8797598284250319, "grad_norm": 1.185400252410767, "learning_rate": 2.1324722544934736e-06, "loss": 0.0146, "step": 210845 }, { "epoch": 0.8797806911400222, "grad_norm": 0.867724100295195, "learning_rate": 2.132446969990146e-06, "loss": 0.0176, "step": 210850 }, { "epoch": 0.8798015538550125, "grad_norm": 0.8740262574891433, "learning_rate": 2.132421686386185e-06, "loss": 0.0246, "step": 210855 }, { "epoch": 0.8798224165700027, "grad_norm": 0.5663144288267858, "learning_rate": 2.132396403681536e-06, "loss": 0.024, "step": 210860 }, { "epoch": 0.8798432792849931, "grad_norm": 0.48832830839909486, "learning_rate": 2.1323711218761466e-06, "loss": 0.0139, "step": 210865 }, { "epoch": 0.8798641419999833, "grad_norm": 0.6041363685423653, "learning_rate": 2.1323458409699634e-06, "loss": 0.0148, "step": 210870 }, { "epoch": 0.8798850047149736, "grad_norm": 1.792775936211035, "learning_rate": 2.1323205609629324e-06, "loss": 0.0186, "step": 210875 }, { "epoch": 0.8799058674299639, "grad_norm": 1.2762660617865367, "learning_rate": 2.132295281855001e-06, "loss": 0.0212, "step": 210880 }, { "epoch": 0.8799267301449542, "grad_norm": 0.7077712798396039, "learning_rate": 2.1322700036461155e-06, "loss": 0.0177, "step": 210885 }, { "epoch": 0.8799475928599444, "grad_norm": 0.6874199546365031, "learning_rate": 2.1322447263362233e-06, "loss": 0.0246, "step": 210890 }, { "epoch": 0.8799684555749347, "grad_norm": 0.7156728829622963, "learning_rate": 2.13221944992527e-06, "loss": 0.0232, "step": 210895 }, { "epoch": 0.879989318289925, "grad_norm": 0.5700100456153022, "learning_rate": 2.132194174413203e-06, "loss": 0.0192, "step": 210900 }, { "epoch": 0.8800101810049152, "grad_norm": 0.7582672077668683, "learning_rate": 2.1321688997999693e-06, "loss": 0.0234, "step": 210905 }, { "epoch": 0.8800310437199055, "grad_norm": 0.6333685866517565, "learning_rate": 2.132143626085515e-06, "loss": 0.0162, "step": 210910 }, { "epoch": 0.8800519064348958, "grad_norm": 0.27886060383433703, "learning_rate": 2.1321183532697875e-06, "loss": 0.0189, "step": 210915 }, { "epoch": 0.8800727691498861, "grad_norm": 0.6191433870357835, "learning_rate": 2.132093081352732e-06, "loss": 0.0243, "step": 210920 }, { "epoch": 0.8800936318648763, "grad_norm": 0.3339028661446646, "learning_rate": 2.1320678103342974e-06, "loss": 0.0139, "step": 210925 }, { "epoch": 0.8801144945798667, "grad_norm": 0.6434694535816351, "learning_rate": 2.1320425402144296e-06, "loss": 0.0252, "step": 210930 }, { "epoch": 0.8801353572948569, "grad_norm": 0.6801406668822196, "learning_rate": 2.1320172709930744e-06, "loss": 0.02, "step": 210935 }, { "epoch": 0.8801562200098472, "grad_norm": 0.3672108424800236, "learning_rate": 2.1319920026701798e-06, "loss": 0.0158, "step": 210940 }, { "epoch": 0.8801770827248375, "grad_norm": 0.7174283877237302, "learning_rate": 2.1319667352456923e-06, "loss": 0.0219, "step": 210945 }, { "epoch": 0.8801979454398278, "grad_norm": 0.9618431991231947, "learning_rate": 2.1319414687195582e-06, "loss": 0.0177, "step": 210950 }, { "epoch": 0.880218808154818, "grad_norm": 0.9420881834984262, "learning_rate": 2.1319162030917242e-06, "loss": 0.0164, "step": 210955 }, { "epoch": 0.8802396708698084, "grad_norm": 0.535520419229132, "learning_rate": 2.1318909383621377e-06, "loss": 0.0224, "step": 210960 }, { "epoch": 0.8802605335847986, "grad_norm": 0.7883946400392379, "learning_rate": 2.131865674530746e-06, "loss": 0.0202, "step": 210965 }, { "epoch": 0.8802813962997889, "grad_norm": 0.7285472361941067, "learning_rate": 2.131840411597494e-06, "loss": 0.0182, "step": 210970 }, { "epoch": 0.8803022590147791, "grad_norm": 0.6204091640184869, "learning_rate": 2.1318151495623298e-06, "loss": 0.0181, "step": 210975 }, { "epoch": 0.8803231217297695, "grad_norm": 1.3840026441162476, "learning_rate": 2.1317898884252e-06, "loss": 0.0209, "step": 210980 }, { "epoch": 0.8803439844447597, "grad_norm": 1.1716738733900314, "learning_rate": 2.131764628186051e-06, "loss": 0.0226, "step": 210985 }, { "epoch": 0.8803648471597499, "grad_norm": 0.7439306526451652, "learning_rate": 2.1317393688448306e-06, "loss": 0.0166, "step": 210990 }, { "epoch": 0.8803857098747403, "grad_norm": 0.9605200157184414, "learning_rate": 2.1317141104014843e-06, "loss": 0.0238, "step": 210995 }, { "epoch": 0.8804065725897305, "grad_norm": 0.9768012100951418, "learning_rate": 2.13168885285596e-06, "loss": 0.0179, "step": 211000 }, { "epoch": 0.8804274353047208, "grad_norm": 0.7853801083187646, "learning_rate": 2.1316635962082036e-06, "loss": 0.0186, "step": 211005 }, { "epoch": 0.8804482980197111, "grad_norm": 0.4965447310373732, "learning_rate": 2.1316383404581625e-06, "loss": 0.023, "step": 211010 }, { "epoch": 0.8804691607347014, "grad_norm": 0.5278061221329442, "learning_rate": 2.1316130856057835e-06, "loss": 0.0187, "step": 211015 }, { "epoch": 0.8804900234496916, "grad_norm": 0.782233643514486, "learning_rate": 2.1315878316510135e-06, "loss": 0.0168, "step": 211020 }, { "epoch": 0.8805108861646819, "grad_norm": 0.32884687363878784, "learning_rate": 2.131562578593799e-06, "loss": 0.0146, "step": 211025 }, { "epoch": 0.8805317488796722, "grad_norm": 0.6006406388328529, "learning_rate": 2.1315373264340874e-06, "loss": 0.0145, "step": 211030 }, { "epoch": 0.8805526115946625, "grad_norm": 1.0342215318227033, "learning_rate": 2.131512075171824e-06, "loss": 0.0298, "step": 211035 }, { "epoch": 0.8805734743096527, "grad_norm": 0.427309091435137, "learning_rate": 2.131486824806958e-06, "loss": 0.0184, "step": 211040 }, { "epoch": 0.8805943370246431, "grad_norm": 0.7783542674241285, "learning_rate": 2.131461575339434e-06, "loss": 0.0255, "step": 211045 }, { "epoch": 0.8806151997396333, "grad_norm": 0.8171171411529501, "learning_rate": 2.1314363267692002e-06, "loss": 0.021, "step": 211050 }, { "epoch": 0.8806360624546236, "grad_norm": 0.6479251031418332, "learning_rate": 2.1314110790962036e-06, "loss": 0.0262, "step": 211055 }, { "epoch": 0.8806569251696139, "grad_norm": 0.8733490157926852, "learning_rate": 2.13138583232039e-06, "loss": 0.0177, "step": 211060 }, { "epoch": 0.8806777878846042, "grad_norm": 0.8871274755277633, "learning_rate": 2.1313605864417074e-06, "loss": 0.0171, "step": 211065 }, { "epoch": 0.8806986505995944, "grad_norm": 0.9406890658362103, "learning_rate": 2.1313353414601015e-06, "loss": 0.0279, "step": 211070 }, { "epoch": 0.8807195133145848, "grad_norm": 0.45724215324118245, "learning_rate": 2.13131009737552e-06, "loss": 0.0183, "step": 211075 }, { "epoch": 0.880740376029575, "grad_norm": 0.6153642027730722, "learning_rate": 2.1312848541879097e-06, "loss": 0.0234, "step": 211080 }, { "epoch": 0.8807612387445652, "grad_norm": 0.730979802631316, "learning_rate": 2.1312596118972175e-06, "loss": 0.0226, "step": 211085 }, { "epoch": 0.8807821014595555, "grad_norm": 0.4311178258750461, "learning_rate": 2.13123437050339e-06, "loss": 0.0193, "step": 211090 }, { "epoch": 0.8808029641745458, "grad_norm": 0.8616754253744217, "learning_rate": 2.131209130006374e-06, "loss": 0.0181, "step": 211095 }, { "epoch": 0.8808238268895361, "grad_norm": 0.8274137328395224, "learning_rate": 2.1311838904061165e-06, "loss": 0.0214, "step": 211100 }, { "epoch": 0.8808446896045263, "grad_norm": 0.8692820487013321, "learning_rate": 2.131158651702565e-06, "loss": 0.0235, "step": 211105 }, { "epoch": 0.8808655523195167, "grad_norm": 0.7203486685457785, "learning_rate": 2.1311334138956655e-06, "loss": 0.0261, "step": 211110 }, { "epoch": 0.8808864150345069, "grad_norm": 0.44004180611835136, "learning_rate": 2.131108176985366e-06, "loss": 0.0158, "step": 211115 }, { "epoch": 0.8809072777494972, "grad_norm": 0.6348203034060707, "learning_rate": 2.1310829409716118e-06, "loss": 0.0189, "step": 211120 }, { "epoch": 0.8809281404644875, "grad_norm": 0.9123557444202336, "learning_rate": 2.1310577058543513e-06, "loss": 0.0184, "step": 211125 }, { "epoch": 0.8809490031794778, "grad_norm": 0.9339354692277018, "learning_rate": 2.131032471633531e-06, "loss": 0.0186, "step": 211130 }, { "epoch": 0.880969865894468, "grad_norm": 0.6261958916516673, "learning_rate": 2.131007238309098e-06, "loss": 0.0143, "step": 211135 }, { "epoch": 0.8809907286094584, "grad_norm": 0.22606548827460937, "learning_rate": 2.1309820058809984e-06, "loss": 0.0178, "step": 211140 }, { "epoch": 0.8810115913244486, "grad_norm": 0.6276040242265837, "learning_rate": 2.1309567743491793e-06, "loss": 0.0212, "step": 211145 }, { "epoch": 0.8810324540394389, "grad_norm": 0.3833168166233225, "learning_rate": 2.130931543713589e-06, "loss": 0.0147, "step": 211150 }, { "epoch": 0.8810533167544291, "grad_norm": 0.6389202138323767, "learning_rate": 2.1309063139741727e-06, "loss": 0.0198, "step": 211155 }, { "epoch": 0.8810741794694195, "grad_norm": 0.23931120769782407, "learning_rate": 2.1308810851308786e-06, "loss": 0.0132, "step": 211160 }, { "epoch": 0.8810950421844097, "grad_norm": 0.4993553530322411, "learning_rate": 2.1308558571836533e-06, "loss": 0.0213, "step": 211165 }, { "epoch": 0.8811159048994, "grad_norm": 0.7844231721517281, "learning_rate": 2.1308306301324433e-06, "loss": 0.0181, "step": 211170 }, { "epoch": 0.8811367676143903, "grad_norm": 0.4251334810438875, "learning_rate": 2.130805403977196e-06, "loss": 0.0169, "step": 211175 }, { "epoch": 0.8811576303293805, "grad_norm": 0.27762265642361045, "learning_rate": 2.1307801787178573e-06, "loss": 0.0274, "step": 211180 }, { "epoch": 0.8811784930443708, "grad_norm": 1.0680972898249057, "learning_rate": 2.1307549543543764e-06, "loss": 0.0238, "step": 211185 }, { "epoch": 0.8811993557593611, "grad_norm": 0.7187456155585813, "learning_rate": 2.1307297308866985e-06, "loss": 0.0223, "step": 211190 }, { "epoch": 0.8812202184743514, "grad_norm": 1.0801117645273042, "learning_rate": 2.130704508314771e-06, "loss": 0.0185, "step": 211195 }, { "epoch": 0.8812410811893416, "grad_norm": 1.0727299838165394, "learning_rate": 2.1306792866385415e-06, "loss": 0.0217, "step": 211200 }, { "epoch": 0.8812619439043319, "grad_norm": 0.5160713720085589, "learning_rate": 2.130654065857956e-06, "loss": 0.0196, "step": 211205 }, { "epoch": 0.8812828066193222, "grad_norm": 0.8587865987611417, "learning_rate": 2.130628845972962e-06, "loss": 0.0215, "step": 211210 }, { "epoch": 0.8813036693343125, "grad_norm": 0.4428437212737236, "learning_rate": 2.1306036269835067e-06, "loss": 0.0196, "step": 211215 }, { "epoch": 0.8813245320493027, "grad_norm": 0.8441168750154475, "learning_rate": 2.1305784088895366e-06, "loss": 0.0324, "step": 211220 }, { "epoch": 0.8813453947642931, "grad_norm": 0.4667640528520759, "learning_rate": 2.130553191690999e-06, "loss": 0.0191, "step": 211225 }, { "epoch": 0.8813662574792833, "grad_norm": 0.4518389598625793, "learning_rate": 2.130527975387841e-06, "loss": 0.0202, "step": 211230 }, { "epoch": 0.8813871201942736, "grad_norm": 0.6701057773576812, "learning_rate": 2.130502759980009e-06, "loss": 0.0223, "step": 211235 }, { "epoch": 0.8814079829092639, "grad_norm": 0.5140255359334895, "learning_rate": 2.130477545467451e-06, "loss": 0.0194, "step": 211240 }, { "epoch": 0.8814288456242542, "grad_norm": 0.4620795404498859, "learning_rate": 2.130452331850113e-06, "loss": 0.0178, "step": 211245 }, { "epoch": 0.8814497083392444, "grad_norm": 0.506348737407003, "learning_rate": 2.130427119127943e-06, "loss": 0.0204, "step": 211250 }, { "epoch": 0.8814705710542348, "grad_norm": 1.0905365154991196, "learning_rate": 2.130401907300888e-06, "loss": 0.0174, "step": 211255 }, { "epoch": 0.881491433769225, "grad_norm": 0.6860971880582667, "learning_rate": 2.1303766963688936e-06, "loss": 0.0173, "step": 211260 }, { "epoch": 0.8815122964842153, "grad_norm": 0.47312302078785695, "learning_rate": 2.130351486331908e-06, "loss": 0.028, "step": 211265 }, { "epoch": 0.8815331591992055, "grad_norm": 0.49922303570014503, "learning_rate": 2.1303262771898786e-06, "loss": 0.0288, "step": 211270 }, { "epoch": 0.8815540219141959, "grad_norm": 0.755876543421537, "learning_rate": 2.130301068942752e-06, "loss": 0.021, "step": 211275 }, { "epoch": 0.8815748846291861, "grad_norm": 0.9724446413968405, "learning_rate": 2.130275861590475e-06, "loss": 0.0196, "step": 211280 }, { "epoch": 0.8815957473441763, "grad_norm": 0.46784133383797194, "learning_rate": 2.1302506551329948e-06, "loss": 0.0159, "step": 211285 }, { "epoch": 0.8816166100591667, "grad_norm": 1.2353559205218645, "learning_rate": 2.130225449570259e-06, "loss": 0.0214, "step": 211290 }, { "epoch": 0.8816374727741569, "grad_norm": 0.6434355789511632, "learning_rate": 2.1302002449022137e-06, "loss": 0.0148, "step": 211295 }, { "epoch": 0.8816583354891472, "grad_norm": 0.7422271381246615, "learning_rate": 2.1301750411288065e-06, "loss": 0.0225, "step": 211300 }, { "epoch": 0.8816791982041375, "grad_norm": 0.29145653789821163, "learning_rate": 2.1301498382499843e-06, "loss": 0.018, "step": 211305 }, { "epoch": 0.8817000609191278, "grad_norm": 0.24123520944692808, "learning_rate": 2.1301246362656945e-06, "loss": 0.0224, "step": 211310 }, { "epoch": 0.881720923634118, "grad_norm": 1.3219815249840963, "learning_rate": 2.1300994351758838e-06, "loss": 0.0168, "step": 211315 }, { "epoch": 0.8817417863491084, "grad_norm": 0.7252340426131965, "learning_rate": 2.1300742349805e-06, "loss": 0.0218, "step": 211320 }, { "epoch": 0.8817626490640986, "grad_norm": 0.6722555990043405, "learning_rate": 2.1300490356794894e-06, "loss": 0.0219, "step": 211325 }, { "epoch": 0.8817835117790889, "grad_norm": 1.0728855163602284, "learning_rate": 2.1300238372727996e-06, "loss": 0.0217, "step": 211330 }, { "epoch": 0.8818043744940791, "grad_norm": 0.43869248019403473, "learning_rate": 2.129998639760377e-06, "loss": 0.0133, "step": 211335 }, { "epoch": 0.8818252372090695, "grad_norm": 1.6873592879945647, "learning_rate": 2.1299734431421692e-06, "loss": 0.0289, "step": 211340 }, { "epoch": 0.8818460999240597, "grad_norm": 0.8947292926346069, "learning_rate": 2.129948247418124e-06, "loss": 0.0154, "step": 211345 }, { "epoch": 0.88186696263905, "grad_norm": 0.8684167733049503, "learning_rate": 2.129923052588187e-06, "loss": 0.0253, "step": 211350 }, { "epoch": 0.8818878253540403, "grad_norm": 0.8846560678957212, "learning_rate": 2.1298978586523066e-06, "loss": 0.0147, "step": 211355 }, { "epoch": 0.8819086880690306, "grad_norm": 0.7080127353552205, "learning_rate": 2.1298726656104295e-06, "loss": 0.02, "step": 211360 }, { "epoch": 0.8819295507840208, "grad_norm": 0.4405018805818403, "learning_rate": 2.1298474734625025e-06, "loss": 0.0211, "step": 211365 }, { "epoch": 0.8819504134990112, "grad_norm": 0.2771373585666834, "learning_rate": 2.1298222822084735e-06, "loss": 0.0185, "step": 211370 }, { "epoch": 0.8819712762140014, "grad_norm": 0.5676539675183356, "learning_rate": 2.1297970918482887e-06, "loss": 0.0309, "step": 211375 }, { "epoch": 0.8819921389289916, "grad_norm": 0.5331805811082039, "learning_rate": 2.1297719023818965e-06, "loss": 0.0318, "step": 211380 }, { "epoch": 0.8820130016439819, "grad_norm": 0.47147729747966094, "learning_rate": 2.1297467138092422e-06, "loss": 0.0192, "step": 211385 }, { "epoch": 0.8820338643589722, "grad_norm": 1.3622811807490307, "learning_rate": 2.1297215261302746e-06, "loss": 0.0302, "step": 211390 }, { "epoch": 0.8820547270739625, "grad_norm": 0.5302803640523189, "learning_rate": 2.1296963393449403e-06, "loss": 0.0276, "step": 211395 }, { "epoch": 0.8820755897889527, "grad_norm": 0.5045978904809397, "learning_rate": 2.1296711534531863e-06, "loss": 0.0166, "step": 211400 }, { "epoch": 0.8820964525039431, "grad_norm": 1.1495645216297103, "learning_rate": 2.12964596845496e-06, "loss": 0.0266, "step": 211405 }, { "epoch": 0.8821173152189333, "grad_norm": 0.6445019254760842, "learning_rate": 2.129620784350208e-06, "loss": 0.0213, "step": 211410 }, { "epoch": 0.8821381779339236, "grad_norm": 1.0350154660311426, "learning_rate": 2.129595601138878e-06, "loss": 0.03, "step": 211415 }, { "epoch": 0.8821590406489139, "grad_norm": 0.7640529943089095, "learning_rate": 2.129570418820917e-06, "loss": 0.0203, "step": 211420 }, { "epoch": 0.8821799033639042, "grad_norm": 0.38621797202349156, "learning_rate": 2.129545237396273e-06, "loss": 0.0196, "step": 211425 }, { "epoch": 0.8822007660788944, "grad_norm": 0.4619217333343472, "learning_rate": 2.1295200568648922e-06, "loss": 0.0187, "step": 211430 }, { "epoch": 0.8822216287938848, "grad_norm": 0.3855080793075671, "learning_rate": 2.129494877226722e-06, "loss": 0.0311, "step": 211435 }, { "epoch": 0.882242491508875, "grad_norm": 1.1516085005704604, "learning_rate": 2.1294696984817094e-06, "loss": 0.0188, "step": 211440 }, { "epoch": 0.8822633542238653, "grad_norm": 0.45906865796227075, "learning_rate": 2.129444520629802e-06, "loss": 0.0177, "step": 211445 }, { "epoch": 0.8822842169388555, "grad_norm": 1.099080426872005, "learning_rate": 2.129419343670947e-06, "loss": 0.0229, "step": 211450 }, { "epoch": 0.8823050796538459, "grad_norm": 0.9772973986910928, "learning_rate": 2.129394167605091e-06, "loss": 0.0212, "step": 211455 }, { "epoch": 0.8823259423688361, "grad_norm": 0.8368500531484423, "learning_rate": 2.129368992432182e-06, "loss": 0.0316, "step": 211460 }, { "epoch": 0.8823468050838263, "grad_norm": 1.5363696551778474, "learning_rate": 2.1293438181521674e-06, "loss": 0.0319, "step": 211465 }, { "epoch": 0.8823676677988167, "grad_norm": 0.5737572551248222, "learning_rate": 2.129318644764993e-06, "loss": 0.0169, "step": 211470 }, { "epoch": 0.882388530513807, "grad_norm": 1.2286396299937639, "learning_rate": 2.1292934722706073e-06, "loss": 0.0225, "step": 211475 }, { "epoch": 0.8824093932287972, "grad_norm": 0.5031993982533092, "learning_rate": 2.1292683006689573e-06, "loss": 0.019, "step": 211480 }, { "epoch": 0.8824302559437875, "grad_norm": 0.4991524253575016, "learning_rate": 2.12924312995999e-06, "loss": 0.0249, "step": 211485 }, { "epoch": 0.8824511186587778, "grad_norm": 0.8249334042414965, "learning_rate": 2.1292179601436528e-06, "loss": 0.0263, "step": 211490 }, { "epoch": 0.882471981373768, "grad_norm": 0.8581218999469051, "learning_rate": 2.129192791219893e-06, "loss": 0.0215, "step": 211495 }, { "epoch": 0.8824928440887584, "grad_norm": 0.7249682200374226, "learning_rate": 2.1291676231886574e-06, "loss": 0.0176, "step": 211500 }, { "epoch": 0.8825137068037486, "grad_norm": 0.5513641423575114, "learning_rate": 2.1291424560498936e-06, "loss": 0.0171, "step": 211505 }, { "epoch": 0.8825345695187389, "grad_norm": 0.48112574279514364, "learning_rate": 2.129117289803549e-06, "loss": 0.0172, "step": 211510 }, { "epoch": 0.8825554322337291, "grad_norm": 0.7159086223297836, "learning_rate": 2.1290921244495707e-06, "loss": 0.0199, "step": 211515 }, { "epoch": 0.8825762949487195, "grad_norm": 0.623681286617366, "learning_rate": 2.1290669599879053e-06, "loss": 0.0264, "step": 211520 }, { "epoch": 0.8825971576637097, "grad_norm": 0.7770874640747847, "learning_rate": 2.1290417964185015e-06, "loss": 0.0173, "step": 211525 }, { "epoch": 0.8826180203787, "grad_norm": 0.4978852149787691, "learning_rate": 2.129016633741306e-06, "loss": 0.0192, "step": 211530 }, { "epoch": 0.8826388830936903, "grad_norm": 0.6290762198052561, "learning_rate": 2.128991471956265e-06, "loss": 0.0179, "step": 211535 }, { "epoch": 0.8826597458086806, "grad_norm": 1.121319091279148, "learning_rate": 2.1289663110633273e-06, "loss": 0.0296, "step": 211540 }, { "epoch": 0.8826806085236708, "grad_norm": 0.43165960209529747, "learning_rate": 2.128941151062439e-06, "loss": 0.0164, "step": 211545 }, { "epoch": 0.8827014712386612, "grad_norm": 0.5651786266780505, "learning_rate": 2.1289159919535483e-06, "loss": 0.0202, "step": 211550 }, { "epoch": 0.8827223339536514, "grad_norm": 0.7405813951723718, "learning_rate": 2.128890833736602e-06, "loss": 0.0232, "step": 211555 }, { "epoch": 0.8827431966686416, "grad_norm": 0.47393555787834685, "learning_rate": 2.1288656764115475e-06, "loss": 0.0204, "step": 211560 }, { "epoch": 0.8827640593836319, "grad_norm": 0.8696323576243856, "learning_rate": 2.1288405199783323e-06, "loss": 0.0207, "step": 211565 }, { "epoch": 0.8827849220986222, "grad_norm": 0.8059592830889033, "learning_rate": 2.1288153644369035e-06, "loss": 0.0266, "step": 211570 }, { "epoch": 0.8828057848136125, "grad_norm": 0.5181428526281795, "learning_rate": 2.128790209787209e-06, "loss": 0.0262, "step": 211575 }, { "epoch": 0.8828266475286027, "grad_norm": 0.4202564121386173, "learning_rate": 2.128765056029195e-06, "loss": 0.0208, "step": 211580 }, { "epoch": 0.8828475102435931, "grad_norm": 0.3057287537182011, "learning_rate": 2.128739903162809e-06, "loss": 0.0138, "step": 211585 }, { "epoch": 0.8828683729585833, "grad_norm": 0.46470577358885967, "learning_rate": 2.128714751187999e-06, "loss": 0.0189, "step": 211590 }, { "epoch": 0.8828892356735736, "grad_norm": 0.54784086734466, "learning_rate": 2.128689600104712e-06, "loss": 0.0195, "step": 211595 }, { "epoch": 0.8829100983885639, "grad_norm": 0.3273001883446549, "learning_rate": 2.128664449912896e-06, "loss": 0.0173, "step": 211600 }, { "epoch": 0.8829309611035542, "grad_norm": 0.8901881661288482, "learning_rate": 2.128639300612497e-06, "loss": 0.031, "step": 211605 }, { "epoch": 0.8829518238185444, "grad_norm": 0.3878398216382492, "learning_rate": 2.128614152203463e-06, "loss": 0.0227, "step": 211610 }, { "epoch": 0.8829726865335348, "grad_norm": 0.7042748990546912, "learning_rate": 2.128589004685742e-06, "loss": 0.0213, "step": 211615 }, { "epoch": 0.882993549248525, "grad_norm": 0.6367008327861516, "learning_rate": 2.1285638580592803e-06, "loss": 0.0188, "step": 211620 }, { "epoch": 0.8830144119635153, "grad_norm": 0.6955396146039493, "learning_rate": 2.1285387123240255e-06, "loss": 0.0156, "step": 211625 }, { "epoch": 0.8830352746785055, "grad_norm": 0.4649236706360085, "learning_rate": 2.1285135674799253e-06, "loss": 0.024, "step": 211630 }, { "epoch": 0.8830561373934959, "grad_norm": 0.4403688929282377, "learning_rate": 2.1284884235269274e-06, "loss": 0.0114, "step": 211635 }, { "epoch": 0.8830770001084861, "grad_norm": 0.5239744178800246, "learning_rate": 2.128463280464978e-06, "loss": 0.0156, "step": 211640 }, { "epoch": 0.8830978628234764, "grad_norm": 0.6801885227599218, "learning_rate": 2.1284381382940258e-06, "loss": 0.0339, "step": 211645 }, { "epoch": 0.8831187255384667, "grad_norm": 0.5021580156847463, "learning_rate": 2.128412997014017e-06, "loss": 0.022, "step": 211650 }, { "epoch": 0.883139588253457, "grad_norm": 0.7684687277431009, "learning_rate": 2.1283878566249e-06, "loss": 0.0184, "step": 211655 }, { "epoch": 0.8831604509684472, "grad_norm": 0.49049079666714657, "learning_rate": 2.1283627171266213e-06, "loss": 0.023, "step": 211660 }, { "epoch": 0.8831813136834376, "grad_norm": 0.3779634068473002, "learning_rate": 2.1283375785191293e-06, "loss": 0.0207, "step": 211665 }, { "epoch": 0.8832021763984278, "grad_norm": 0.8631329262498639, "learning_rate": 2.1283124408023704e-06, "loss": 0.0212, "step": 211670 }, { "epoch": 0.883223039113418, "grad_norm": 0.9040108217345576, "learning_rate": 2.1282873039762925e-06, "loss": 0.0167, "step": 211675 }, { "epoch": 0.8832439018284084, "grad_norm": 0.6145814386309505, "learning_rate": 2.1282621680408427e-06, "loss": 0.0175, "step": 211680 }, { "epoch": 0.8832647645433986, "grad_norm": 0.7532764472535659, "learning_rate": 2.1282370329959687e-06, "loss": 0.0199, "step": 211685 }, { "epoch": 0.8832856272583889, "grad_norm": 0.9069531847992007, "learning_rate": 2.1282118988416183e-06, "loss": 0.0213, "step": 211690 }, { "epoch": 0.8833064899733791, "grad_norm": 0.7686668080165711, "learning_rate": 2.128186765577738e-06, "loss": 0.041, "step": 211695 }, { "epoch": 0.8833273526883695, "grad_norm": 0.47227551433629383, "learning_rate": 2.1281616332042754e-06, "loss": 0.0208, "step": 211700 }, { "epoch": 0.8833482154033597, "grad_norm": 0.7861079598860026, "learning_rate": 2.1281365017211784e-06, "loss": 0.0293, "step": 211705 }, { "epoch": 0.88336907811835, "grad_norm": 0.31549921221731597, "learning_rate": 2.1281113711283944e-06, "loss": 0.0167, "step": 211710 }, { "epoch": 0.8833899408333403, "grad_norm": 0.855152526821139, "learning_rate": 2.1280862414258703e-06, "loss": 0.0251, "step": 211715 }, { "epoch": 0.8834108035483306, "grad_norm": 0.3439400326278506, "learning_rate": 2.1280611126135542e-06, "loss": 0.0238, "step": 211720 }, { "epoch": 0.8834316662633208, "grad_norm": 0.5936617818628025, "learning_rate": 2.1280359846913935e-06, "loss": 0.0218, "step": 211725 }, { "epoch": 0.8834525289783112, "grad_norm": 0.5634734019512192, "learning_rate": 2.1280108576593348e-06, "loss": 0.021, "step": 211730 }, { "epoch": 0.8834733916933014, "grad_norm": 0.7236388659543022, "learning_rate": 2.1279857315173264e-06, "loss": 0.021, "step": 211735 }, { "epoch": 0.8834942544082917, "grad_norm": 0.574372825746426, "learning_rate": 2.127960606265315e-06, "loss": 0.0273, "step": 211740 }, { "epoch": 0.8835151171232819, "grad_norm": 1.1268447382701423, "learning_rate": 2.1279354819032495e-06, "loss": 0.0258, "step": 211745 }, { "epoch": 0.8835359798382723, "grad_norm": 0.3931043467916064, "learning_rate": 2.127910358431076e-06, "loss": 0.011, "step": 211750 }, { "epoch": 0.8835568425532625, "grad_norm": 0.5115662471878765, "learning_rate": 2.127885235848742e-06, "loss": 0.023, "step": 211755 }, { "epoch": 0.8835777052682527, "grad_norm": 0.6951182942603122, "learning_rate": 2.127860114156196e-06, "loss": 0.0267, "step": 211760 }, { "epoch": 0.8835985679832431, "grad_norm": 0.5767907071270724, "learning_rate": 2.1278349933533842e-06, "loss": 0.0231, "step": 211765 }, { "epoch": 0.8836194306982333, "grad_norm": 0.9164249653265311, "learning_rate": 2.1278098734402554e-06, "loss": 0.0249, "step": 211770 }, { "epoch": 0.8836402934132236, "grad_norm": 0.4645364327919532, "learning_rate": 2.127784754416756e-06, "loss": 0.0249, "step": 211775 }, { "epoch": 0.8836611561282139, "grad_norm": 0.4277664324341411, "learning_rate": 2.1277596362828335e-06, "loss": 0.0243, "step": 211780 }, { "epoch": 0.8836820188432042, "grad_norm": 0.929363091538155, "learning_rate": 2.127734519038436e-06, "loss": 0.0155, "step": 211785 }, { "epoch": 0.8837028815581944, "grad_norm": 0.9206528535492495, "learning_rate": 2.127709402683511e-06, "loss": 0.0183, "step": 211790 }, { "epoch": 0.8837237442731848, "grad_norm": 0.8170520350009449, "learning_rate": 2.1276842872180058e-06, "loss": 0.0226, "step": 211795 }, { "epoch": 0.883744606988175, "grad_norm": 0.3370994781989238, "learning_rate": 2.127659172641868e-06, "loss": 0.0229, "step": 211800 }, { "epoch": 0.8837654697031653, "grad_norm": 0.7781919316829387, "learning_rate": 2.1276340589550447e-06, "loss": 0.0174, "step": 211805 }, { "epoch": 0.8837863324181555, "grad_norm": 0.6425801275503681, "learning_rate": 2.127608946157484e-06, "loss": 0.0188, "step": 211810 }, { "epoch": 0.8838071951331459, "grad_norm": 0.5715519376634045, "learning_rate": 2.127583834249133e-06, "loss": 0.0209, "step": 211815 }, { "epoch": 0.8838280578481361, "grad_norm": 0.5454654323811898, "learning_rate": 2.1275587232299394e-06, "loss": 0.0221, "step": 211820 }, { "epoch": 0.8838489205631264, "grad_norm": 0.8902670456024127, "learning_rate": 2.12753361309985e-06, "loss": 0.0176, "step": 211825 }, { "epoch": 0.8838697832781167, "grad_norm": 0.400938426744715, "learning_rate": 2.127508503858814e-06, "loss": 0.0194, "step": 211830 }, { "epoch": 0.883890645993107, "grad_norm": 0.6338984661407767, "learning_rate": 2.1274833955067777e-06, "loss": 0.0209, "step": 211835 }, { "epoch": 0.8839115087080972, "grad_norm": 0.34808246277380517, "learning_rate": 2.127458288043689e-06, "loss": 0.0215, "step": 211840 }, { "epoch": 0.8839323714230876, "grad_norm": 0.7519702213889493, "learning_rate": 2.1274331814694953e-06, "loss": 0.0177, "step": 211845 }, { "epoch": 0.8839532341380778, "grad_norm": 0.4689089438388846, "learning_rate": 2.1274080757841436e-06, "loss": 0.018, "step": 211850 }, { "epoch": 0.883974096853068, "grad_norm": 0.4120582813691709, "learning_rate": 2.127382970987583e-06, "loss": 0.0156, "step": 211855 }, { "epoch": 0.8839949595680584, "grad_norm": 0.5046166468206231, "learning_rate": 2.1273578670797595e-06, "loss": 0.0161, "step": 211860 }, { "epoch": 0.8840158222830486, "grad_norm": 1.0763413769432177, "learning_rate": 2.1273327640606215e-06, "loss": 0.0253, "step": 211865 }, { "epoch": 0.8840366849980389, "grad_norm": 0.611605763591487, "learning_rate": 2.127307661930116e-06, "loss": 0.0166, "step": 211870 }, { "epoch": 0.8840575477130291, "grad_norm": 0.9600069374293563, "learning_rate": 2.1272825606881916e-06, "loss": 0.0244, "step": 211875 }, { "epoch": 0.8840784104280195, "grad_norm": 0.5434041071370939, "learning_rate": 2.1272574603347946e-06, "loss": 0.0147, "step": 211880 }, { "epoch": 0.8840992731430097, "grad_norm": 0.25934908770491366, "learning_rate": 2.1272323608698734e-06, "loss": 0.0177, "step": 211885 }, { "epoch": 0.884120135858, "grad_norm": 0.6359139720088535, "learning_rate": 2.127207262293376e-06, "loss": 0.0221, "step": 211890 }, { "epoch": 0.8841409985729903, "grad_norm": 0.47172095598898095, "learning_rate": 2.1271821646052485e-06, "loss": 0.0207, "step": 211895 }, { "epoch": 0.8841618612879806, "grad_norm": 0.7667506465778191, "learning_rate": 2.1271570678054398e-06, "loss": 0.0247, "step": 211900 }, { "epoch": 0.8841827240029708, "grad_norm": 0.41356256624433446, "learning_rate": 2.1271319718938967e-06, "loss": 0.0135, "step": 211905 }, { "epoch": 0.8842035867179612, "grad_norm": 0.5971087355473578, "learning_rate": 2.1271068768705676e-06, "loss": 0.0189, "step": 211910 }, { "epoch": 0.8842244494329514, "grad_norm": 0.6481287986191143, "learning_rate": 2.127081782735399e-06, "loss": 0.0222, "step": 211915 }, { "epoch": 0.8842453121479417, "grad_norm": 0.7240551086295388, "learning_rate": 2.1270566894883397e-06, "loss": 0.0165, "step": 211920 }, { "epoch": 0.8842661748629319, "grad_norm": 0.7005171719584969, "learning_rate": 2.1270315971293363e-06, "loss": 0.0168, "step": 211925 }, { "epoch": 0.8842870375779223, "grad_norm": 0.8572138761708593, "learning_rate": 2.1270065056583376e-06, "loss": 0.0213, "step": 211930 }, { "epoch": 0.8843079002929125, "grad_norm": 0.6587933429874218, "learning_rate": 2.12698141507529e-06, "loss": 0.0183, "step": 211935 }, { "epoch": 0.8843287630079028, "grad_norm": 1.5586143044810938, "learning_rate": 2.1269563253801424e-06, "loss": 0.0232, "step": 211940 }, { "epoch": 0.8843496257228931, "grad_norm": 0.6302312865391161, "learning_rate": 2.1269312365728413e-06, "loss": 0.0214, "step": 211945 }, { "epoch": 0.8843704884378834, "grad_norm": 0.49558239758093325, "learning_rate": 2.1269061486533345e-06, "loss": 0.0164, "step": 211950 }, { "epoch": 0.8843913511528736, "grad_norm": 0.8770726253271389, "learning_rate": 2.12688106162157e-06, "loss": 0.0246, "step": 211955 }, { "epoch": 0.884412213867864, "grad_norm": 0.3686110671098186, "learning_rate": 2.1268559754774953e-06, "loss": 0.0159, "step": 211960 }, { "epoch": 0.8844330765828542, "grad_norm": 0.8654023121795684, "learning_rate": 2.1268308902210584e-06, "loss": 0.0224, "step": 211965 }, { "epoch": 0.8844539392978444, "grad_norm": 0.4675443563102891, "learning_rate": 2.126805805852206e-06, "loss": 0.0221, "step": 211970 }, { "epoch": 0.8844748020128348, "grad_norm": 0.6137432007091704, "learning_rate": 2.1267807223708868e-06, "loss": 0.0155, "step": 211975 }, { "epoch": 0.884495664727825, "grad_norm": 0.39831115759899366, "learning_rate": 2.126755639777048e-06, "loss": 0.0175, "step": 211980 }, { "epoch": 0.8845165274428153, "grad_norm": 0.8689716501519157, "learning_rate": 2.126730558070637e-06, "loss": 0.0286, "step": 211985 }, { "epoch": 0.8845373901578055, "grad_norm": 0.5276336390337716, "learning_rate": 2.126705477251603e-06, "loss": 0.0275, "step": 211990 }, { "epoch": 0.8845582528727959, "grad_norm": 0.7260872775951486, "learning_rate": 2.126680397319891e-06, "loss": 0.0228, "step": 211995 }, { "epoch": 0.8845791155877861, "grad_norm": 0.6788811625433855, "learning_rate": 2.126655318275451e-06, "loss": 0.0191, "step": 212000 }, { "epoch": 0.8845999783027764, "grad_norm": 0.4831422139007383, "learning_rate": 2.1266302401182294e-06, "loss": 0.019, "step": 212005 }, { "epoch": 0.8846208410177667, "grad_norm": 0.8994567675022903, "learning_rate": 2.1266051628481747e-06, "loss": 0.0189, "step": 212010 }, { "epoch": 0.884641703732757, "grad_norm": 0.8904255987856445, "learning_rate": 2.1265800864652338e-06, "loss": 0.024, "step": 212015 }, { "epoch": 0.8846625664477472, "grad_norm": 0.591864596217528, "learning_rate": 2.126555010969355e-06, "loss": 0.014, "step": 212020 }, { "epoch": 0.8846834291627376, "grad_norm": 0.5784371453096374, "learning_rate": 2.126529936360486e-06, "loss": 0.0224, "step": 212025 }, { "epoch": 0.8847042918777278, "grad_norm": 0.17140027917820747, "learning_rate": 2.1265048626385743e-06, "loss": 0.0145, "step": 212030 }, { "epoch": 0.884725154592718, "grad_norm": 0.658581235808501, "learning_rate": 2.126479789803567e-06, "loss": 0.0225, "step": 212035 }, { "epoch": 0.8847460173077084, "grad_norm": 0.3912480819440031, "learning_rate": 2.1264547178554133e-06, "loss": 0.0179, "step": 212040 }, { "epoch": 0.8847668800226987, "grad_norm": 0.43496821230513283, "learning_rate": 2.12642964679406e-06, "loss": 0.0177, "step": 212045 }, { "epoch": 0.8847877427376889, "grad_norm": 0.67611980129349, "learning_rate": 2.1264045766194544e-06, "loss": 0.0178, "step": 212050 }, { "epoch": 0.8848086054526791, "grad_norm": 0.6947994990415094, "learning_rate": 2.1263795073315448e-06, "loss": 0.0197, "step": 212055 }, { "epoch": 0.8848294681676695, "grad_norm": 0.6899707396348187, "learning_rate": 2.1263544389302795e-06, "loss": 0.0208, "step": 212060 }, { "epoch": 0.8848503308826597, "grad_norm": 1.1669511674448072, "learning_rate": 2.126329371415605e-06, "loss": 0.0208, "step": 212065 }, { "epoch": 0.88487119359765, "grad_norm": 0.5636276934538745, "learning_rate": 2.12630430478747e-06, "loss": 0.0191, "step": 212070 }, { "epoch": 0.8848920563126403, "grad_norm": 0.38070199046470843, "learning_rate": 2.126279239045821e-06, "loss": 0.023, "step": 212075 }, { "epoch": 0.8849129190276306, "grad_norm": 0.6868987290227554, "learning_rate": 2.1262541741906077e-06, "loss": 0.015, "step": 212080 }, { "epoch": 0.8849337817426208, "grad_norm": 0.20921352059560672, "learning_rate": 2.1262291102217765e-06, "loss": 0.016, "step": 212085 }, { "epoch": 0.8849546444576112, "grad_norm": 0.9341691526914134, "learning_rate": 2.126204047139275e-06, "loss": 0.0232, "step": 212090 }, { "epoch": 0.8849755071726014, "grad_norm": 0.42869209522158064, "learning_rate": 2.126178984943052e-06, "loss": 0.0236, "step": 212095 }, { "epoch": 0.8849963698875917, "grad_norm": 0.6159440062699637, "learning_rate": 2.126153923633054e-06, "loss": 0.0248, "step": 212100 }, { "epoch": 0.8850172326025819, "grad_norm": 0.2733347485306187, "learning_rate": 2.12612886320923e-06, "loss": 0.0186, "step": 212105 }, { "epoch": 0.8850380953175723, "grad_norm": 0.9517316473593207, "learning_rate": 2.1261038036715275e-06, "loss": 0.0257, "step": 212110 }, { "epoch": 0.8850589580325625, "grad_norm": 0.5268013751620183, "learning_rate": 2.126078745019893e-06, "loss": 0.0299, "step": 212115 }, { "epoch": 0.8850798207475528, "grad_norm": 1.1168091166896994, "learning_rate": 2.126053687254276e-06, "loss": 0.0289, "step": 212120 }, { "epoch": 0.8851006834625431, "grad_norm": 0.7772210487211657, "learning_rate": 2.1260286303746236e-06, "loss": 0.0246, "step": 212125 }, { "epoch": 0.8851215461775334, "grad_norm": 0.3857636410400057, "learning_rate": 2.1260035743808837e-06, "loss": 0.0245, "step": 212130 }, { "epoch": 0.8851424088925236, "grad_norm": 0.5976744433602026, "learning_rate": 2.125978519273003e-06, "loss": 0.0188, "step": 212135 }, { "epoch": 0.885163271607514, "grad_norm": 0.5158053942092128, "learning_rate": 2.125953465050931e-06, "loss": 0.021, "step": 212140 }, { "epoch": 0.8851841343225042, "grad_norm": 0.2739618801504839, "learning_rate": 2.125928411714615e-06, "loss": 0.0149, "step": 212145 }, { "epoch": 0.8852049970374944, "grad_norm": 0.9839990160575026, "learning_rate": 2.1259033592640022e-06, "loss": 0.0228, "step": 212150 }, { "epoch": 0.8852258597524848, "grad_norm": 0.42428152632378935, "learning_rate": 2.125878307699041e-06, "loss": 0.0228, "step": 212155 }, { "epoch": 0.885246722467475, "grad_norm": 0.26847480212524566, "learning_rate": 2.125853257019679e-06, "loss": 0.0135, "step": 212160 }, { "epoch": 0.8852675851824653, "grad_norm": 0.642482275812661, "learning_rate": 2.125828207225864e-06, "loss": 0.0166, "step": 212165 }, { "epoch": 0.8852884478974555, "grad_norm": 0.6338201395746202, "learning_rate": 2.1258031583175436e-06, "loss": 0.0182, "step": 212170 }, { "epoch": 0.8853093106124459, "grad_norm": 0.4042652044102543, "learning_rate": 2.1257781102946663e-06, "loss": 0.0246, "step": 212175 }, { "epoch": 0.8853301733274361, "grad_norm": 0.5547402268234565, "learning_rate": 2.1257530631571794e-06, "loss": 0.026, "step": 212180 }, { "epoch": 0.8853510360424264, "grad_norm": 0.792703903121297, "learning_rate": 2.125728016905031e-06, "loss": 0.0123, "step": 212185 }, { "epoch": 0.8853718987574167, "grad_norm": 1.0179025957873689, "learning_rate": 2.1257029715381684e-06, "loss": 0.0202, "step": 212190 }, { "epoch": 0.885392761472407, "grad_norm": 0.7665775744874378, "learning_rate": 2.12567792705654e-06, "loss": 0.0163, "step": 212195 }, { "epoch": 0.8854136241873972, "grad_norm": 0.5597938717445564, "learning_rate": 2.1256528834600936e-06, "loss": 0.0187, "step": 212200 }, { "epoch": 0.8854344869023876, "grad_norm": 0.4305669520606074, "learning_rate": 2.1256278407487773e-06, "loss": 0.0128, "step": 212205 }, { "epoch": 0.8854553496173778, "grad_norm": 0.4708050330910001, "learning_rate": 2.125602798922538e-06, "loss": 0.0277, "step": 212210 }, { "epoch": 0.8854762123323681, "grad_norm": 0.49961291339817376, "learning_rate": 2.125577757981325e-06, "loss": 0.0181, "step": 212215 }, { "epoch": 0.8854970750473584, "grad_norm": 0.712443580659925, "learning_rate": 2.1255527179250844e-06, "loss": 0.0192, "step": 212220 }, { "epoch": 0.8855179377623487, "grad_norm": 1.0634545821208687, "learning_rate": 2.1255276787537658e-06, "loss": 0.0307, "step": 212225 }, { "epoch": 0.8855388004773389, "grad_norm": 0.7038790059985521, "learning_rate": 2.125502640467316e-06, "loss": 0.0169, "step": 212230 }, { "epoch": 0.8855596631923291, "grad_norm": 0.4849562251171592, "learning_rate": 2.1254776030656836e-06, "loss": 0.0185, "step": 212235 }, { "epoch": 0.8855805259073195, "grad_norm": 0.4280023901069997, "learning_rate": 2.125452566548816e-06, "loss": 0.0238, "step": 212240 }, { "epoch": 0.8856013886223097, "grad_norm": 0.5329533208888647, "learning_rate": 2.125427530916661e-06, "loss": 0.017, "step": 212245 }, { "epoch": 0.8856222513373, "grad_norm": 0.4956343363208017, "learning_rate": 2.1254024961691666e-06, "loss": 0.0164, "step": 212250 }, { "epoch": 0.8856431140522903, "grad_norm": 0.3912486688131848, "learning_rate": 2.125377462306281e-06, "loss": 0.0173, "step": 212255 }, { "epoch": 0.8856639767672806, "grad_norm": 0.6129226424264275, "learning_rate": 2.125352429327952e-06, "loss": 0.0204, "step": 212260 }, { "epoch": 0.8856848394822708, "grad_norm": 0.47663622506137476, "learning_rate": 2.1253273972341275e-06, "loss": 0.0136, "step": 212265 }, { "epoch": 0.8857057021972612, "grad_norm": 0.5214892781920315, "learning_rate": 2.1253023660247547e-06, "loss": 0.0182, "step": 212270 }, { "epoch": 0.8857265649122514, "grad_norm": 0.5412124880914612, "learning_rate": 2.125277335699782e-06, "loss": 0.0145, "step": 212275 }, { "epoch": 0.8857474276272417, "grad_norm": 1.788618374002414, "learning_rate": 2.125252306259158e-06, "loss": 0.0251, "step": 212280 }, { "epoch": 0.8857682903422319, "grad_norm": 0.8313446609508011, "learning_rate": 2.12522727770283e-06, "loss": 0.0195, "step": 212285 }, { "epoch": 0.8857891530572223, "grad_norm": 0.7796502843400251, "learning_rate": 2.1252022500307464e-06, "loss": 0.0195, "step": 212290 }, { "epoch": 0.8858100157722125, "grad_norm": 0.6060578663451539, "learning_rate": 2.1251772232428546e-06, "loss": 0.0278, "step": 212295 }, { "epoch": 0.8858308784872028, "grad_norm": 0.5167665642016248, "learning_rate": 2.125152197339102e-06, "loss": 0.0221, "step": 212300 }, { "epoch": 0.8858517412021931, "grad_norm": 0.7939135770944052, "learning_rate": 2.1251271723194382e-06, "loss": 0.0207, "step": 212305 }, { "epoch": 0.8858726039171834, "grad_norm": 0.41672204050719297, "learning_rate": 2.1251021481838095e-06, "loss": 0.0246, "step": 212310 }, { "epoch": 0.8858934666321736, "grad_norm": 0.5910009206693615, "learning_rate": 2.1250771249321646e-06, "loss": 0.0182, "step": 212315 }, { "epoch": 0.885914329347164, "grad_norm": 0.771513359481181, "learning_rate": 2.125052102564451e-06, "loss": 0.0192, "step": 212320 }, { "epoch": 0.8859351920621542, "grad_norm": 1.1677326560777805, "learning_rate": 2.125027081080618e-06, "loss": 0.0196, "step": 212325 }, { "epoch": 0.8859560547771445, "grad_norm": 0.6004190469308809, "learning_rate": 2.1250020604806123e-06, "loss": 0.0173, "step": 212330 }, { "epoch": 0.8859769174921348, "grad_norm": 0.662689258935439, "learning_rate": 2.1249770407643817e-06, "loss": 0.0309, "step": 212335 }, { "epoch": 0.885997780207125, "grad_norm": 0.550461506951496, "learning_rate": 2.1249520219318755e-06, "loss": 0.0216, "step": 212340 }, { "epoch": 0.8860186429221153, "grad_norm": 0.5878004848844588, "learning_rate": 2.12492700398304e-06, "loss": 0.0231, "step": 212345 }, { "epoch": 0.8860395056371055, "grad_norm": 0.5789919696909925, "learning_rate": 2.1249019869178246e-06, "loss": 0.0204, "step": 212350 }, { "epoch": 0.8860603683520959, "grad_norm": 0.6258577979900284, "learning_rate": 2.1248769707361765e-06, "loss": 0.0229, "step": 212355 }, { "epoch": 0.8860812310670861, "grad_norm": 1.0078152148589017, "learning_rate": 2.1248519554380436e-06, "loss": 0.0272, "step": 212360 }, { "epoch": 0.8861020937820764, "grad_norm": 0.4094036974258769, "learning_rate": 2.1248269410233745e-06, "loss": 0.0396, "step": 212365 }, { "epoch": 0.8861229564970667, "grad_norm": 0.2686792975571255, "learning_rate": 2.124801927492117e-06, "loss": 0.0171, "step": 212370 }, { "epoch": 0.886143819212057, "grad_norm": 0.2472662752466129, "learning_rate": 2.124776914844219e-06, "loss": 0.0208, "step": 212375 }, { "epoch": 0.8861646819270472, "grad_norm": 1.1116454231439965, "learning_rate": 2.1247519030796284e-06, "loss": 0.0277, "step": 212380 }, { "epoch": 0.8861855446420376, "grad_norm": 0.7279601113841324, "learning_rate": 2.124726892198293e-06, "loss": 0.0174, "step": 212385 }, { "epoch": 0.8862064073570278, "grad_norm": 0.2867210894100786, "learning_rate": 2.1247018822001615e-06, "loss": 0.0172, "step": 212390 }, { "epoch": 0.8862272700720181, "grad_norm": 0.7838799512193402, "learning_rate": 2.1246768730851815e-06, "loss": 0.0265, "step": 212395 }, { "epoch": 0.8862481327870084, "grad_norm": 0.5810301697979537, "learning_rate": 2.1246518648533006e-06, "loss": 0.018, "step": 212400 }, { "epoch": 0.8862689955019987, "grad_norm": 0.4319221602024399, "learning_rate": 2.1246268575044677e-06, "loss": 0.019, "step": 212405 }, { "epoch": 0.8862898582169889, "grad_norm": 0.43542685622482685, "learning_rate": 2.1246018510386306e-06, "loss": 0.0186, "step": 212410 }, { "epoch": 0.8863107209319792, "grad_norm": 0.8308548301411016, "learning_rate": 2.1245768454557373e-06, "loss": 0.0227, "step": 212415 }, { "epoch": 0.8863315836469695, "grad_norm": 0.438221104428183, "learning_rate": 2.1245518407557352e-06, "loss": 0.0229, "step": 212420 }, { "epoch": 0.8863524463619598, "grad_norm": 0.33142473393934857, "learning_rate": 2.124526836938573e-06, "loss": 0.016, "step": 212425 }, { "epoch": 0.88637330907695, "grad_norm": 0.6597236564790179, "learning_rate": 2.1245018340041986e-06, "loss": 0.0203, "step": 212430 }, { "epoch": 0.8863941717919404, "grad_norm": 0.8533417884341783, "learning_rate": 2.12447683195256e-06, "loss": 0.0207, "step": 212435 }, { "epoch": 0.8864150345069306, "grad_norm": 1.1459043269616946, "learning_rate": 2.1244518307836064e-06, "loss": 0.022, "step": 212440 }, { "epoch": 0.8864358972219208, "grad_norm": 0.838455230326774, "learning_rate": 2.1244268304972834e-06, "loss": 0.0194, "step": 212445 }, { "epoch": 0.8864567599369112, "grad_norm": 0.6272126418162385, "learning_rate": 2.1244018310935407e-06, "loss": 0.0186, "step": 212450 }, { "epoch": 0.8864776226519014, "grad_norm": 0.2832011757090688, "learning_rate": 2.1243768325723268e-06, "loss": 0.0148, "step": 212455 }, { "epoch": 0.8864984853668917, "grad_norm": 1.5386101690877438, "learning_rate": 2.1243518349335886e-06, "loss": 0.0204, "step": 212460 }, { "epoch": 0.8865193480818819, "grad_norm": 0.4546017545428763, "learning_rate": 2.1243268381772746e-06, "loss": 0.0157, "step": 212465 }, { "epoch": 0.8865402107968723, "grad_norm": 0.1822493090343789, "learning_rate": 2.1243018423033333e-06, "loss": 0.0187, "step": 212470 }, { "epoch": 0.8865610735118625, "grad_norm": 0.8498839255180985, "learning_rate": 2.1242768473117125e-06, "loss": 0.0236, "step": 212475 }, { "epoch": 0.8865819362268528, "grad_norm": 0.39749885513672684, "learning_rate": 2.1242518532023594e-06, "loss": 0.0166, "step": 212480 }, { "epoch": 0.8866027989418431, "grad_norm": 0.5427849150096535, "learning_rate": 2.1242268599752242e-06, "loss": 0.0138, "step": 212485 }, { "epoch": 0.8866236616568334, "grad_norm": 0.5484238456364496, "learning_rate": 2.1242018676302527e-06, "loss": 0.0161, "step": 212490 }, { "epoch": 0.8866445243718236, "grad_norm": 0.687938109871024, "learning_rate": 2.1241768761673944e-06, "loss": 0.0234, "step": 212495 }, { "epoch": 0.886665387086814, "grad_norm": 0.8475461981147017, "learning_rate": 2.1241518855865976e-06, "loss": 0.0203, "step": 212500 }, { "epoch": 0.8866862498018042, "grad_norm": 0.6870417413900559, "learning_rate": 2.1241268958878095e-06, "loss": 0.0177, "step": 212505 }, { "epoch": 0.8867071125167945, "grad_norm": 0.9076000100406327, "learning_rate": 2.1241019070709788e-06, "loss": 0.0245, "step": 212510 }, { "epoch": 0.8867279752317848, "grad_norm": 0.7781685487380211, "learning_rate": 2.124076919136053e-06, "loss": 0.0297, "step": 212515 }, { "epoch": 0.8867488379467751, "grad_norm": 0.40504298939071176, "learning_rate": 2.1240519320829807e-06, "loss": 0.0206, "step": 212520 }, { "epoch": 0.8867697006617653, "grad_norm": 0.6875515417873637, "learning_rate": 2.1240269459117104e-06, "loss": 0.0139, "step": 212525 }, { "epoch": 0.8867905633767555, "grad_norm": 0.9919065584773188, "learning_rate": 2.124001960622189e-06, "loss": 0.0207, "step": 212530 }, { "epoch": 0.8868114260917459, "grad_norm": 0.8630509052535738, "learning_rate": 2.123976976214366e-06, "loss": 0.0199, "step": 212535 }, { "epoch": 0.8868322888067361, "grad_norm": 0.36420364150979717, "learning_rate": 2.1239519926881895e-06, "loss": 0.0175, "step": 212540 }, { "epoch": 0.8868531515217264, "grad_norm": 0.5781464291269225, "learning_rate": 2.123927010043606e-06, "loss": 0.0198, "step": 212545 }, { "epoch": 0.8868740142367167, "grad_norm": 0.6990691215688412, "learning_rate": 2.123902028280566e-06, "loss": 0.0219, "step": 212550 }, { "epoch": 0.886894876951707, "grad_norm": 0.7692711483755007, "learning_rate": 2.123877047399016e-06, "loss": 0.022, "step": 212555 }, { "epoch": 0.8869157396666972, "grad_norm": 0.712809852898152, "learning_rate": 2.1238520673989046e-06, "loss": 0.0187, "step": 212560 }, { "epoch": 0.8869366023816876, "grad_norm": 0.8726667221753859, "learning_rate": 2.1238270882801798e-06, "loss": 0.0218, "step": 212565 }, { "epoch": 0.8869574650966778, "grad_norm": 0.2716961070621054, "learning_rate": 2.12380211004279e-06, "loss": 0.0159, "step": 212570 }, { "epoch": 0.8869783278116681, "grad_norm": 0.8230874059727435, "learning_rate": 2.1237771326866836e-06, "loss": 0.0183, "step": 212575 }, { "epoch": 0.8869991905266584, "grad_norm": 0.4836550023563667, "learning_rate": 2.1237521562118086e-06, "loss": 0.0347, "step": 212580 }, { "epoch": 0.8870200532416487, "grad_norm": 0.661495646708166, "learning_rate": 2.123727180618113e-06, "loss": 0.0224, "step": 212585 }, { "epoch": 0.8870409159566389, "grad_norm": 0.7644597998835847, "learning_rate": 2.123702205905545e-06, "loss": 0.0281, "step": 212590 }, { "epoch": 0.8870617786716292, "grad_norm": 0.7891789443799276, "learning_rate": 2.1236772320740524e-06, "loss": 0.0191, "step": 212595 }, { "epoch": 0.8870826413866195, "grad_norm": 0.5361094493761772, "learning_rate": 2.123652259123585e-06, "loss": 0.0173, "step": 212600 }, { "epoch": 0.8871035041016098, "grad_norm": 0.7734453371299933, "learning_rate": 2.123627287054089e-06, "loss": 0.0201, "step": 212605 }, { "epoch": 0.8871243668166, "grad_norm": 0.6204731545257293, "learning_rate": 2.1236023158655134e-06, "loss": 0.0203, "step": 212610 }, { "epoch": 0.8871452295315904, "grad_norm": 0.637260178958461, "learning_rate": 2.123577345557807e-06, "loss": 0.0253, "step": 212615 }, { "epoch": 0.8871660922465806, "grad_norm": 0.7148763596253673, "learning_rate": 2.123552376130917e-06, "loss": 0.0226, "step": 212620 }, { "epoch": 0.8871869549615709, "grad_norm": 0.6699945891458399, "learning_rate": 2.1235274075847925e-06, "loss": 0.0283, "step": 212625 }, { "epoch": 0.8872078176765612, "grad_norm": 0.7153526477989487, "learning_rate": 2.1235024399193816e-06, "loss": 0.0221, "step": 212630 }, { "epoch": 0.8872286803915514, "grad_norm": 0.8579018736695164, "learning_rate": 2.1234774731346317e-06, "loss": 0.0184, "step": 212635 }, { "epoch": 0.8872495431065417, "grad_norm": 1.1014042803887825, "learning_rate": 2.123452507230492e-06, "loss": 0.0218, "step": 212640 }, { "epoch": 0.8872704058215319, "grad_norm": 0.24845785711461438, "learning_rate": 2.12342754220691e-06, "loss": 0.0157, "step": 212645 }, { "epoch": 0.8872912685365223, "grad_norm": 0.5388734095644654, "learning_rate": 2.123402578063834e-06, "loss": 0.0182, "step": 212650 }, { "epoch": 0.8873121312515125, "grad_norm": 0.8402533597955533, "learning_rate": 2.1233776148012133e-06, "loss": 0.0186, "step": 212655 }, { "epoch": 0.8873329939665028, "grad_norm": 0.6071464317277178, "learning_rate": 2.123352652418995e-06, "loss": 0.0253, "step": 212660 }, { "epoch": 0.8873538566814931, "grad_norm": 0.551041098662565, "learning_rate": 2.123327690917128e-06, "loss": 0.0144, "step": 212665 }, { "epoch": 0.8873747193964834, "grad_norm": 0.6022261473972661, "learning_rate": 2.1233027302955595e-06, "loss": 0.026, "step": 212670 }, { "epoch": 0.8873955821114736, "grad_norm": 0.4704605798546356, "learning_rate": 2.1232777705542396e-06, "loss": 0.0202, "step": 212675 }, { "epoch": 0.887416444826464, "grad_norm": 0.5954905946220314, "learning_rate": 2.123252811693115e-06, "loss": 0.015, "step": 212680 }, { "epoch": 0.8874373075414542, "grad_norm": 0.4503110388984372, "learning_rate": 2.1232278537121336e-06, "loss": 0.0158, "step": 212685 }, { "epoch": 0.8874581702564445, "grad_norm": 0.29770254675616575, "learning_rate": 2.1232028966112454e-06, "loss": 0.0134, "step": 212690 }, { "epoch": 0.8874790329714348, "grad_norm": 0.9194507163241786, "learning_rate": 2.1231779403903978e-06, "loss": 0.0214, "step": 212695 }, { "epoch": 0.8874998956864251, "grad_norm": 0.387555456642704, "learning_rate": 2.123152985049539e-06, "loss": 0.016, "step": 212700 }, { "epoch": 0.8875207584014153, "grad_norm": 0.5151152273232146, "learning_rate": 2.1231280305886175e-06, "loss": 0.0116, "step": 212705 }, { "epoch": 0.8875416211164056, "grad_norm": 0.4351306195644638, "learning_rate": 2.123103077007581e-06, "loss": 0.0189, "step": 212710 }, { "epoch": 0.8875624838313959, "grad_norm": 0.6191689923853139, "learning_rate": 2.1230781243063785e-06, "loss": 0.0234, "step": 212715 }, { "epoch": 0.8875833465463862, "grad_norm": 0.5053063370829174, "learning_rate": 2.123053172484958e-06, "loss": 0.0185, "step": 212720 }, { "epoch": 0.8876042092613764, "grad_norm": 0.5022327278660542, "learning_rate": 2.123028221543268e-06, "loss": 0.0219, "step": 212725 }, { "epoch": 0.8876250719763668, "grad_norm": 0.6250071349800426, "learning_rate": 2.123003271481257e-06, "loss": 0.0202, "step": 212730 }, { "epoch": 0.887645934691357, "grad_norm": 0.8132633206019847, "learning_rate": 2.1229783222988724e-06, "loss": 0.0255, "step": 212735 }, { "epoch": 0.8876667974063472, "grad_norm": 0.8048916190895009, "learning_rate": 2.1229533739960633e-06, "loss": 0.0189, "step": 212740 }, { "epoch": 0.8876876601213376, "grad_norm": 1.2138157863766312, "learning_rate": 2.1229284265727777e-06, "loss": 0.0189, "step": 212745 }, { "epoch": 0.8877085228363278, "grad_norm": 0.5437427886054623, "learning_rate": 2.1229034800289642e-06, "loss": 0.0201, "step": 212750 }, { "epoch": 0.8877293855513181, "grad_norm": 0.7774779584163581, "learning_rate": 2.122878534364571e-06, "loss": 0.0192, "step": 212755 }, { "epoch": 0.8877502482663084, "grad_norm": 0.43972565059730995, "learning_rate": 2.122853589579546e-06, "loss": 0.0199, "step": 212760 }, { "epoch": 0.8877711109812987, "grad_norm": 0.7134466633779271, "learning_rate": 2.1228286456738383e-06, "loss": 0.0212, "step": 212765 }, { "epoch": 0.8877919736962889, "grad_norm": 0.35695022523538456, "learning_rate": 2.1228037026473957e-06, "loss": 0.0166, "step": 212770 }, { "epoch": 0.8878128364112792, "grad_norm": 0.7446229414236231, "learning_rate": 2.122778760500167e-06, "loss": 0.0165, "step": 212775 }, { "epoch": 0.8878336991262695, "grad_norm": 0.7011159700911206, "learning_rate": 2.1227538192321003e-06, "loss": 0.0273, "step": 212780 }, { "epoch": 0.8878545618412598, "grad_norm": 0.7233442270931653, "learning_rate": 2.1227288788431433e-06, "loss": 0.0279, "step": 212785 }, { "epoch": 0.88787542455625, "grad_norm": 1.3375523987052311, "learning_rate": 2.1227039393332456e-06, "loss": 0.0204, "step": 212790 }, { "epoch": 0.8878962872712404, "grad_norm": 0.7864310889907569, "learning_rate": 2.1226790007023547e-06, "loss": 0.023, "step": 212795 }, { "epoch": 0.8879171499862306, "grad_norm": 0.7897175759537505, "learning_rate": 2.1226540629504193e-06, "loss": 0.0232, "step": 212800 }, { "epoch": 0.8879380127012209, "grad_norm": 0.6146489934228587, "learning_rate": 2.1226291260773873e-06, "loss": 0.016, "step": 212805 }, { "epoch": 0.8879588754162112, "grad_norm": 1.2124680502096243, "learning_rate": 2.1226041900832074e-06, "loss": 0.0281, "step": 212810 }, { "epoch": 0.8879797381312015, "grad_norm": 0.6471996788914595, "learning_rate": 2.122579254967829e-06, "loss": 0.0156, "step": 212815 }, { "epoch": 0.8880006008461917, "grad_norm": 0.48449277529933715, "learning_rate": 2.1225543207311983e-06, "loss": 0.0218, "step": 212820 }, { "epoch": 0.888021463561182, "grad_norm": 0.5690885230307456, "learning_rate": 2.1225293873732654e-06, "loss": 0.0327, "step": 212825 }, { "epoch": 0.8880423262761723, "grad_norm": 0.48508519375513415, "learning_rate": 2.1225044548939783e-06, "loss": 0.0206, "step": 212830 }, { "epoch": 0.8880631889911625, "grad_norm": 0.6336888724761803, "learning_rate": 2.1224795232932847e-06, "loss": 0.0168, "step": 212835 }, { "epoch": 0.8880840517061528, "grad_norm": 0.4273110235045042, "learning_rate": 2.1224545925711344e-06, "loss": 0.019, "step": 212840 }, { "epoch": 0.8881049144211431, "grad_norm": 0.6385578347929114, "learning_rate": 2.1224296627274747e-06, "loss": 0.0159, "step": 212845 }, { "epoch": 0.8881257771361334, "grad_norm": 0.4826147557766953, "learning_rate": 2.122404733762254e-06, "loss": 0.0202, "step": 212850 }, { "epoch": 0.8881466398511236, "grad_norm": 0.5476825784693387, "learning_rate": 2.1223798056754212e-06, "loss": 0.0229, "step": 212855 }, { "epoch": 0.888167502566114, "grad_norm": 0.4760757696511066, "learning_rate": 2.122354878466925e-06, "loss": 0.018, "step": 212860 }, { "epoch": 0.8881883652811042, "grad_norm": 0.40412278630479126, "learning_rate": 2.1223299521367126e-06, "loss": 0.0187, "step": 212865 }, { "epoch": 0.8882092279960945, "grad_norm": 0.9083236207300425, "learning_rate": 2.122305026684733e-06, "loss": 0.0177, "step": 212870 }, { "epoch": 0.8882300907110848, "grad_norm": 0.5112854363244239, "learning_rate": 2.1222801021109355e-06, "loss": 0.0217, "step": 212875 }, { "epoch": 0.8882509534260751, "grad_norm": 1.1812918378514923, "learning_rate": 2.122255178415268e-06, "loss": 0.02, "step": 212880 }, { "epoch": 0.8882718161410653, "grad_norm": 0.26282151796358355, "learning_rate": 2.1222302555976782e-06, "loss": 0.0188, "step": 212885 }, { "epoch": 0.8882926788560556, "grad_norm": 0.6658039318336638, "learning_rate": 2.1222053336581153e-06, "loss": 0.0229, "step": 212890 }, { "epoch": 0.8883135415710459, "grad_norm": 0.7667991642729661, "learning_rate": 2.122180412596528e-06, "loss": 0.0268, "step": 212895 }, { "epoch": 0.8883344042860362, "grad_norm": 0.6076544191016355, "learning_rate": 2.122155492412864e-06, "loss": 0.0179, "step": 212900 }, { "epoch": 0.8883552670010264, "grad_norm": 0.5678825281257511, "learning_rate": 2.122130573107072e-06, "loss": 0.0177, "step": 212905 }, { "epoch": 0.8883761297160168, "grad_norm": 1.116514621426066, "learning_rate": 2.1221056546791e-06, "loss": 0.0221, "step": 212910 }, { "epoch": 0.888396992431007, "grad_norm": 0.630519297737187, "learning_rate": 2.122080737128898e-06, "loss": 0.0167, "step": 212915 }, { "epoch": 0.8884178551459972, "grad_norm": 0.7686019055735188, "learning_rate": 2.1220558204564125e-06, "loss": 0.0227, "step": 212920 }, { "epoch": 0.8884387178609876, "grad_norm": 0.8154338192936397, "learning_rate": 2.1220309046615933e-06, "loss": 0.0176, "step": 212925 }, { "epoch": 0.8884595805759778, "grad_norm": 0.8887604968354488, "learning_rate": 2.122005989744389e-06, "loss": 0.0198, "step": 212930 }, { "epoch": 0.8884804432909681, "grad_norm": 0.5915230536891561, "learning_rate": 2.121981075704747e-06, "loss": 0.0198, "step": 212935 }, { "epoch": 0.8885013060059584, "grad_norm": 0.8147644273431501, "learning_rate": 2.1219561625426163e-06, "loss": 0.0236, "step": 212940 }, { "epoch": 0.8885221687209487, "grad_norm": 0.5003958777488794, "learning_rate": 2.1219312502579457e-06, "loss": 0.0184, "step": 212945 }, { "epoch": 0.8885430314359389, "grad_norm": 0.6849133186269066, "learning_rate": 2.1219063388506835e-06, "loss": 0.0251, "step": 212950 }, { "epoch": 0.8885638941509292, "grad_norm": 0.7786028948027848, "learning_rate": 2.121881428320778e-06, "loss": 0.0229, "step": 212955 }, { "epoch": 0.8885847568659195, "grad_norm": 0.8553257315749957, "learning_rate": 2.121856518668178e-06, "loss": 0.0232, "step": 212960 }, { "epoch": 0.8886056195809098, "grad_norm": 0.5659090250740616, "learning_rate": 2.121831609892832e-06, "loss": 0.0199, "step": 212965 }, { "epoch": 0.8886264822959, "grad_norm": 0.4301401392976173, "learning_rate": 2.121806701994689e-06, "loss": 0.02, "step": 212970 }, { "epoch": 0.8886473450108904, "grad_norm": 0.8789259669558563, "learning_rate": 2.1217817949736953e-06, "loss": 0.0308, "step": 212975 }, { "epoch": 0.8886682077258806, "grad_norm": 0.5259978019178668, "learning_rate": 2.1217568888298022e-06, "loss": 0.0212, "step": 212980 }, { "epoch": 0.8886890704408709, "grad_norm": 0.6928396176765869, "learning_rate": 2.1217319835629563e-06, "loss": 0.0207, "step": 212985 }, { "epoch": 0.8887099331558612, "grad_norm": 0.7335229800870943, "learning_rate": 2.121707079173107e-06, "loss": 0.018, "step": 212990 }, { "epoch": 0.8887307958708515, "grad_norm": 0.51167028286456, "learning_rate": 2.1216821756602027e-06, "loss": 0.0243, "step": 212995 }, { "epoch": 0.8887516585858417, "grad_norm": 0.35548923783526, "learning_rate": 2.1216572730241924e-06, "loss": 0.0193, "step": 213000 }, { "epoch": 0.888772521300832, "grad_norm": 0.5283394281752225, "learning_rate": 2.1216323712650234e-06, "loss": 0.0284, "step": 213005 }, { "epoch": 0.8887933840158223, "grad_norm": 1.2393608288591924, "learning_rate": 2.121607470382646e-06, "loss": 0.0265, "step": 213010 }, { "epoch": 0.8888142467308126, "grad_norm": 0.621083457542899, "learning_rate": 2.121582570377007e-06, "loss": 0.0211, "step": 213015 }, { "epoch": 0.8888351094458028, "grad_norm": 0.2639509106582767, "learning_rate": 2.121557671248056e-06, "loss": 0.0209, "step": 213020 }, { "epoch": 0.8888559721607932, "grad_norm": 0.5475943428469757, "learning_rate": 2.121532772995741e-06, "loss": 0.0209, "step": 213025 }, { "epoch": 0.8888768348757834, "grad_norm": 1.2814544282012967, "learning_rate": 2.121507875620011e-06, "loss": 0.0261, "step": 213030 }, { "epoch": 0.8888976975907736, "grad_norm": 0.7959740999849111, "learning_rate": 2.1214829791208146e-06, "loss": 0.0189, "step": 213035 }, { "epoch": 0.888918560305764, "grad_norm": 0.9504769166636221, "learning_rate": 2.1214580834981e-06, "loss": 0.0301, "step": 213040 }, { "epoch": 0.8889394230207542, "grad_norm": 1.20329474758269, "learning_rate": 2.1214331887518157e-06, "loss": 0.0333, "step": 213045 }, { "epoch": 0.8889602857357445, "grad_norm": 0.8571948273057849, "learning_rate": 2.1214082948819103e-06, "loss": 0.0194, "step": 213050 }, { "epoch": 0.8889811484507348, "grad_norm": 0.7877494898945692, "learning_rate": 2.1213834018883334e-06, "loss": 0.0241, "step": 213055 }, { "epoch": 0.8890020111657251, "grad_norm": 0.5289567891045356, "learning_rate": 2.121358509771032e-06, "loss": 0.0249, "step": 213060 }, { "epoch": 0.8890228738807153, "grad_norm": 1.0148055067170334, "learning_rate": 2.121333618529956e-06, "loss": 0.0183, "step": 213065 }, { "epoch": 0.8890437365957056, "grad_norm": 1.1192374841295354, "learning_rate": 2.121308728165053e-06, "loss": 0.0183, "step": 213070 }, { "epoch": 0.8890645993106959, "grad_norm": 0.8153161994211948, "learning_rate": 2.1212838386762727e-06, "loss": 0.0255, "step": 213075 }, { "epoch": 0.8890854620256862, "grad_norm": 0.6438239682780049, "learning_rate": 2.121258950063562e-06, "loss": 0.0169, "step": 213080 }, { "epoch": 0.8891063247406764, "grad_norm": 0.41240756026059344, "learning_rate": 2.1212340623268715e-06, "loss": 0.0177, "step": 213085 }, { "epoch": 0.8891271874556668, "grad_norm": 1.0396673201027085, "learning_rate": 2.1212091754661485e-06, "loss": 0.0257, "step": 213090 }, { "epoch": 0.889148050170657, "grad_norm": 0.8626737535179149, "learning_rate": 2.1211842894813423e-06, "loss": 0.0166, "step": 213095 }, { "epoch": 0.8891689128856473, "grad_norm": 0.5307675591225341, "learning_rate": 2.1211594043724012e-06, "loss": 0.024, "step": 213100 }, { "epoch": 0.8891897756006376, "grad_norm": 0.5318972333265003, "learning_rate": 2.1211345201392736e-06, "loss": 0.0182, "step": 213105 }, { "epoch": 0.8892106383156279, "grad_norm": 0.9119990007929231, "learning_rate": 2.1211096367819085e-06, "loss": 0.0242, "step": 213110 }, { "epoch": 0.8892315010306181, "grad_norm": 0.5480516972426207, "learning_rate": 2.1210847543002544e-06, "loss": 0.0159, "step": 213115 }, { "epoch": 0.8892523637456085, "grad_norm": 0.9409724001412094, "learning_rate": 2.1210598726942596e-06, "loss": 0.0237, "step": 213120 }, { "epoch": 0.8892732264605987, "grad_norm": 0.6313251171282951, "learning_rate": 2.1210349919638736e-06, "loss": 0.0172, "step": 213125 }, { "epoch": 0.8892940891755889, "grad_norm": 0.952145125416929, "learning_rate": 2.1210101121090444e-06, "loss": 0.0265, "step": 213130 }, { "epoch": 0.8893149518905792, "grad_norm": 0.825983084562441, "learning_rate": 2.1209852331297208e-06, "loss": 0.0194, "step": 213135 }, { "epoch": 0.8893358146055695, "grad_norm": 0.9810974467579076, "learning_rate": 2.1209603550258514e-06, "loss": 0.0156, "step": 213140 }, { "epoch": 0.8893566773205598, "grad_norm": 1.6563652540869902, "learning_rate": 2.1209354777973847e-06, "loss": 0.017, "step": 213145 }, { "epoch": 0.88937754003555, "grad_norm": 0.4594831054151473, "learning_rate": 2.1209106014442697e-06, "loss": 0.0228, "step": 213150 }, { "epoch": 0.8893984027505404, "grad_norm": 1.495388115725697, "learning_rate": 2.120885725966455e-06, "loss": 0.0236, "step": 213155 }, { "epoch": 0.8894192654655306, "grad_norm": 0.3057856374637765, "learning_rate": 2.120860851363889e-06, "loss": 0.0128, "step": 213160 }, { "epoch": 0.8894401281805209, "grad_norm": 0.6337860737537904, "learning_rate": 2.1208359776365203e-06, "loss": 0.0177, "step": 213165 }, { "epoch": 0.8894609908955112, "grad_norm": 0.8136914933505365, "learning_rate": 2.1208111047842984e-06, "loss": 0.0175, "step": 213170 }, { "epoch": 0.8894818536105015, "grad_norm": 0.4102586133554576, "learning_rate": 2.1207862328071713e-06, "loss": 0.017, "step": 213175 }, { "epoch": 0.8895027163254917, "grad_norm": 0.738422105876452, "learning_rate": 2.120761361705087e-06, "loss": 0.027, "step": 213180 }, { "epoch": 0.889523579040482, "grad_norm": 1.1907818867991677, "learning_rate": 2.120736491477996e-06, "loss": 0.0223, "step": 213185 }, { "epoch": 0.8895444417554723, "grad_norm": 0.7579886485271318, "learning_rate": 2.1207116221258457e-06, "loss": 0.022, "step": 213190 }, { "epoch": 0.8895653044704626, "grad_norm": 0.8742032499346657, "learning_rate": 2.1206867536485854e-06, "loss": 0.0192, "step": 213195 }, { "epoch": 0.8895861671854528, "grad_norm": 1.5162813622096585, "learning_rate": 2.120661886046163e-06, "loss": 0.0315, "step": 213200 }, { "epoch": 0.8896070299004432, "grad_norm": 0.6493631191430641, "learning_rate": 2.120637019318528e-06, "loss": 0.0177, "step": 213205 }, { "epoch": 0.8896278926154334, "grad_norm": 0.3176633679856738, "learning_rate": 2.120612153465629e-06, "loss": 0.0193, "step": 213210 }, { "epoch": 0.8896487553304236, "grad_norm": 0.5657362279182138, "learning_rate": 2.1205872884874142e-06, "loss": 0.0218, "step": 213215 }, { "epoch": 0.889669618045414, "grad_norm": 0.9567332896486349, "learning_rate": 2.120562424383833e-06, "loss": 0.0167, "step": 213220 }, { "epoch": 0.8896904807604042, "grad_norm": 0.4765161946958097, "learning_rate": 2.120537561154833e-06, "loss": 0.0188, "step": 213225 }, { "epoch": 0.8897113434753945, "grad_norm": 0.968259446204434, "learning_rate": 2.1205126988003644e-06, "loss": 0.0169, "step": 213230 }, { "epoch": 0.8897322061903848, "grad_norm": 0.6852188284444629, "learning_rate": 2.120487837320375e-06, "loss": 0.0235, "step": 213235 }, { "epoch": 0.8897530689053751, "grad_norm": 0.5039104169601742, "learning_rate": 2.1204629767148143e-06, "loss": 0.0228, "step": 213240 }, { "epoch": 0.8897739316203653, "grad_norm": 0.3944894237143089, "learning_rate": 2.1204381169836304e-06, "loss": 0.0161, "step": 213245 }, { "epoch": 0.8897947943353556, "grad_norm": 0.479711856765149, "learning_rate": 2.1204132581267717e-06, "loss": 0.0186, "step": 213250 }, { "epoch": 0.8898156570503459, "grad_norm": 1.0974419647888096, "learning_rate": 2.1203884001441874e-06, "loss": 0.0159, "step": 213255 }, { "epoch": 0.8898365197653362, "grad_norm": 0.982465051833011, "learning_rate": 2.120363543035827e-06, "loss": 0.0159, "step": 213260 }, { "epoch": 0.8898573824803264, "grad_norm": 0.42393047385197424, "learning_rate": 2.1203386868016377e-06, "loss": 0.0164, "step": 213265 }, { "epoch": 0.8898782451953168, "grad_norm": 1.0544777679064097, "learning_rate": 2.1203138314415694e-06, "loss": 0.0277, "step": 213270 }, { "epoch": 0.889899107910307, "grad_norm": 0.5479113789433476, "learning_rate": 2.120288976955571e-06, "loss": 0.0249, "step": 213275 }, { "epoch": 0.8899199706252973, "grad_norm": 1.269827413508286, "learning_rate": 2.1202641233435903e-06, "loss": 0.0198, "step": 213280 }, { "epoch": 0.8899408333402876, "grad_norm": 1.3152166176490439, "learning_rate": 2.1202392706055773e-06, "loss": 0.0215, "step": 213285 }, { "epoch": 0.8899616960552779, "grad_norm": 0.8610349396158202, "learning_rate": 2.120214418741479e-06, "loss": 0.023, "step": 213290 }, { "epoch": 0.8899825587702681, "grad_norm": 0.6268318483624601, "learning_rate": 2.1201895677512462e-06, "loss": 0.0245, "step": 213295 }, { "epoch": 0.8900034214852584, "grad_norm": 1.2512306170699192, "learning_rate": 2.1201647176348265e-06, "loss": 0.0286, "step": 213300 }, { "epoch": 0.8900242842002487, "grad_norm": 1.3564004754075853, "learning_rate": 2.120139868392169e-06, "loss": 0.0292, "step": 213305 }, { "epoch": 0.890045146915239, "grad_norm": 0.39679906664130865, "learning_rate": 2.1201150200232228e-06, "loss": 0.0203, "step": 213310 }, { "epoch": 0.8900660096302292, "grad_norm": 0.5498826849723277, "learning_rate": 2.120090172527936e-06, "loss": 0.023, "step": 213315 }, { "epoch": 0.8900868723452195, "grad_norm": 0.7107750304608003, "learning_rate": 2.1200653259062572e-06, "loss": 0.0214, "step": 213320 }, { "epoch": 0.8901077350602098, "grad_norm": 0.4598729530699625, "learning_rate": 2.1200404801581363e-06, "loss": 0.0246, "step": 213325 }, { "epoch": 0.8901285977752, "grad_norm": 0.5092434398617407, "learning_rate": 2.1200156352835215e-06, "loss": 0.02, "step": 213330 }, { "epoch": 0.8901494604901904, "grad_norm": 0.3120381782409427, "learning_rate": 2.119990791282362e-06, "loss": 0.0138, "step": 213335 }, { "epoch": 0.8901703232051806, "grad_norm": 0.8064357662704744, "learning_rate": 2.1199659481546066e-06, "loss": 0.0223, "step": 213340 }, { "epoch": 0.8901911859201709, "grad_norm": 0.6378215418551072, "learning_rate": 2.119941105900203e-06, "loss": 0.0205, "step": 213345 }, { "epoch": 0.8902120486351612, "grad_norm": 0.578680676255924, "learning_rate": 2.1199162645191015e-06, "loss": 0.026, "step": 213350 }, { "epoch": 0.8902329113501515, "grad_norm": 0.8456510654037241, "learning_rate": 2.1198914240112503e-06, "loss": 0.023, "step": 213355 }, { "epoch": 0.8902537740651417, "grad_norm": 0.41446548023736707, "learning_rate": 2.119866584376598e-06, "loss": 0.0229, "step": 213360 }, { "epoch": 0.890274636780132, "grad_norm": 1.0317508916232114, "learning_rate": 2.1198417456150943e-06, "loss": 0.0292, "step": 213365 }, { "epoch": 0.8902954994951223, "grad_norm": 0.550808010446728, "learning_rate": 2.1198169077266865e-06, "loss": 0.025, "step": 213370 }, { "epoch": 0.8903163622101126, "grad_norm": 0.6064779258874387, "learning_rate": 2.119792070711325e-06, "loss": 0.019, "step": 213375 }, { "epoch": 0.8903372249251028, "grad_norm": 0.7299186604922857, "learning_rate": 2.119767234568958e-06, "loss": 0.0213, "step": 213380 }, { "epoch": 0.8903580876400932, "grad_norm": 0.4870771725089482, "learning_rate": 2.119742399299534e-06, "loss": 0.0185, "step": 213385 }, { "epoch": 0.8903789503550834, "grad_norm": 0.3750876024529607, "learning_rate": 2.119717564903003e-06, "loss": 0.0208, "step": 213390 }, { "epoch": 0.8903998130700737, "grad_norm": 0.9555431025750164, "learning_rate": 2.119692731379313e-06, "loss": 0.0264, "step": 213395 }, { "epoch": 0.890420675785064, "grad_norm": 0.7846491406200127, "learning_rate": 2.1196678987284124e-06, "loss": 0.0175, "step": 213400 }, { "epoch": 0.8904415385000543, "grad_norm": 0.33165097428919044, "learning_rate": 2.1196430669502513e-06, "loss": 0.017, "step": 213405 }, { "epoch": 0.8904624012150445, "grad_norm": 0.5655253104704212, "learning_rate": 2.1196182360447776e-06, "loss": 0.0186, "step": 213410 }, { "epoch": 0.8904832639300349, "grad_norm": 1.1316101978423343, "learning_rate": 2.119593406011941e-06, "loss": 0.0227, "step": 213415 }, { "epoch": 0.8905041266450251, "grad_norm": 0.47382187058541586, "learning_rate": 2.11956857685169e-06, "loss": 0.013, "step": 213420 }, { "epoch": 0.8905249893600153, "grad_norm": 0.426989936237387, "learning_rate": 2.119543748563973e-06, "loss": 0.0156, "step": 213425 }, { "epoch": 0.8905458520750056, "grad_norm": 0.536937448008902, "learning_rate": 2.1195189211487395e-06, "loss": 0.0186, "step": 213430 }, { "epoch": 0.8905667147899959, "grad_norm": 0.6298411649059138, "learning_rate": 2.1194940946059383e-06, "loss": 0.0193, "step": 213435 }, { "epoch": 0.8905875775049862, "grad_norm": 0.4530217142821017, "learning_rate": 2.119469268935518e-06, "loss": 0.0182, "step": 213440 }, { "epoch": 0.8906084402199764, "grad_norm": 0.7442693916653368, "learning_rate": 2.119444444137428e-06, "loss": 0.0257, "step": 213445 }, { "epoch": 0.8906293029349668, "grad_norm": 0.41995101333890705, "learning_rate": 2.1194196202116172e-06, "loss": 0.0146, "step": 213450 }, { "epoch": 0.890650165649957, "grad_norm": 1.707357435279782, "learning_rate": 2.1193947971580344e-06, "loss": 0.0217, "step": 213455 }, { "epoch": 0.8906710283649473, "grad_norm": 0.4131909448042124, "learning_rate": 2.119369974976628e-06, "loss": 0.022, "step": 213460 }, { "epoch": 0.8906918910799376, "grad_norm": 0.6371593571595465, "learning_rate": 2.119345153667347e-06, "loss": 0.0215, "step": 213465 }, { "epoch": 0.8907127537949279, "grad_norm": 0.6434758848496198, "learning_rate": 2.1193203332301415e-06, "loss": 0.0147, "step": 213470 }, { "epoch": 0.8907336165099181, "grad_norm": 0.5493029224471139, "learning_rate": 2.119295513664959e-06, "loss": 0.0191, "step": 213475 }, { "epoch": 0.8907544792249084, "grad_norm": 0.5605485403590236, "learning_rate": 2.1192706949717496e-06, "loss": 0.0192, "step": 213480 }, { "epoch": 0.8907753419398987, "grad_norm": 0.5055718064932865, "learning_rate": 2.1192458771504613e-06, "loss": 0.022, "step": 213485 }, { "epoch": 0.890796204654889, "grad_norm": 1.1292829743651902, "learning_rate": 2.1192210602010434e-06, "loss": 0.0224, "step": 213490 }, { "epoch": 0.8908170673698792, "grad_norm": 0.39523720104124027, "learning_rate": 2.119196244123445e-06, "loss": 0.0145, "step": 213495 }, { "epoch": 0.8908379300848696, "grad_norm": 1.751782257238813, "learning_rate": 2.119171428917615e-06, "loss": 0.0235, "step": 213500 }, { "epoch": 0.8908587927998598, "grad_norm": 0.36247314594435837, "learning_rate": 2.1191466145835024e-06, "loss": 0.0191, "step": 213505 }, { "epoch": 0.89087965551485, "grad_norm": 0.7129087680953694, "learning_rate": 2.1191218011210558e-06, "loss": 0.0202, "step": 213510 }, { "epoch": 0.8909005182298404, "grad_norm": 0.5539966306206277, "learning_rate": 2.1190969885302255e-06, "loss": 0.0185, "step": 213515 }, { "epoch": 0.8909213809448306, "grad_norm": 0.45656105083588794, "learning_rate": 2.119072176810958e-06, "loss": 0.0194, "step": 213520 }, { "epoch": 0.8909422436598209, "grad_norm": 0.5036019684804306, "learning_rate": 2.1190473659632045e-06, "loss": 0.0119, "step": 213525 }, { "epoch": 0.8909631063748112, "grad_norm": 4.479794823693012, "learning_rate": 2.1190225559869125e-06, "loss": 0.0184, "step": 213530 }, { "epoch": 0.8909839690898015, "grad_norm": 1.0277430503756821, "learning_rate": 2.1189977468820324e-06, "loss": 0.021, "step": 213535 }, { "epoch": 0.8910048318047917, "grad_norm": 0.36308341676052974, "learning_rate": 2.1189729386485124e-06, "loss": 0.0131, "step": 213540 }, { "epoch": 0.891025694519782, "grad_norm": 0.7327198533287198, "learning_rate": 2.118948131286301e-06, "loss": 0.0104, "step": 213545 }, { "epoch": 0.8910465572347723, "grad_norm": 1.1367886847133155, "learning_rate": 2.1189233247953483e-06, "loss": 0.0267, "step": 213550 }, { "epoch": 0.8910674199497626, "grad_norm": 0.4333835213555166, "learning_rate": 2.118898519175602e-06, "loss": 0.0284, "step": 213555 }, { "epoch": 0.8910882826647528, "grad_norm": 0.34692435289539003, "learning_rate": 2.118873714427013e-06, "loss": 0.0182, "step": 213560 }, { "epoch": 0.8911091453797432, "grad_norm": 0.7371272873943436, "learning_rate": 2.118848910549528e-06, "loss": 0.0255, "step": 213565 }, { "epoch": 0.8911300080947334, "grad_norm": 0.7242222725836343, "learning_rate": 2.118824107543098e-06, "loss": 0.0236, "step": 213570 }, { "epoch": 0.8911508708097237, "grad_norm": 0.9325361185384967, "learning_rate": 2.118799305407671e-06, "loss": 0.0224, "step": 213575 }, { "epoch": 0.891171733524714, "grad_norm": 0.9610685037452066, "learning_rate": 2.1187745041431958e-06, "loss": 0.0201, "step": 213580 }, { "epoch": 0.8911925962397043, "grad_norm": 0.47386645098825503, "learning_rate": 2.1187497037496226e-06, "loss": 0.014, "step": 213585 }, { "epoch": 0.8912134589546945, "grad_norm": 0.521411333913988, "learning_rate": 2.118724904226899e-06, "loss": 0.0178, "step": 213590 }, { "epoch": 0.8912343216696849, "grad_norm": 1.3537137319879389, "learning_rate": 2.1187001055749753e-06, "loss": 0.0173, "step": 213595 }, { "epoch": 0.8912551843846751, "grad_norm": 1.239775371430737, "learning_rate": 2.1186753077937995e-06, "loss": 0.0277, "step": 213600 }, { "epoch": 0.8912760470996653, "grad_norm": 0.8896498679990414, "learning_rate": 2.118650510883321e-06, "loss": 0.0178, "step": 213605 }, { "epoch": 0.8912969098146556, "grad_norm": 0.2235651854485918, "learning_rate": 2.118625714843489e-06, "loss": 0.0192, "step": 213610 }, { "epoch": 0.891317772529646, "grad_norm": 0.541034370540701, "learning_rate": 2.1186009196742526e-06, "loss": 0.0153, "step": 213615 }, { "epoch": 0.8913386352446362, "grad_norm": 1.022574180773286, "learning_rate": 2.118576125375561e-06, "loss": 0.0242, "step": 213620 }, { "epoch": 0.8913594979596264, "grad_norm": 0.42731949393539426, "learning_rate": 2.118551331947363e-06, "loss": 0.0183, "step": 213625 }, { "epoch": 0.8913803606746168, "grad_norm": 0.3654339749775681, "learning_rate": 2.118526539389607e-06, "loss": 0.0153, "step": 213630 }, { "epoch": 0.891401223389607, "grad_norm": 0.7638000256768273, "learning_rate": 2.118501747702244e-06, "loss": 0.0155, "step": 213635 }, { "epoch": 0.8914220861045973, "grad_norm": 0.4670331905005716, "learning_rate": 2.118476956885221e-06, "loss": 0.0195, "step": 213640 }, { "epoch": 0.8914429488195876, "grad_norm": 0.489746035722015, "learning_rate": 2.1184521669384876e-06, "loss": 0.0195, "step": 213645 }, { "epoch": 0.8914638115345779, "grad_norm": 0.6573566789647142, "learning_rate": 2.1184273778619937e-06, "loss": 0.0206, "step": 213650 }, { "epoch": 0.8914846742495681, "grad_norm": 0.6967977693536302, "learning_rate": 2.1184025896556877e-06, "loss": 0.0287, "step": 213655 }, { "epoch": 0.8915055369645584, "grad_norm": 0.6718169394459595, "learning_rate": 2.118377802319519e-06, "loss": 0.0166, "step": 213660 }, { "epoch": 0.8915263996795487, "grad_norm": 1.2168718196215327, "learning_rate": 2.1183530158534363e-06, "loss": 0.0196, "step": 213665 }, { "epoch": 0.891547262394539, "grad_norm": 0.6932808662525732, "learning_rate": 2.118328230257389e-06, "loss": 0.018, "step": 213670 }, { "epoch": 0.8915681251095292, "grad_norm": 0.30772697142721905, "learning_rate": 2.1183034455313266e-06, "loss": 0.0266, "step": 213675 }, { "epoch": 0.8915889878245196, "grad_norm": 0.519290943323267, "learning_rate": 2.1182786616751967e-06, "loss": 0.0276, "step": 213680 }, { "epoch": 0.8916098505395098, "grad_norm": 1.0283994566005867, "learning_rate": 2.1182538786889506e-06, "loss": 0.0246, "step": 213685 }, { "epoch": 0.8916307132545, "grad_norm": 0.5434035495585577, "learning_rate": 2.118229096572536e-06, "loss": 0.0243, "step": 213690 }, { "epoch": 0.8916515759694904, "grad_norm": 0.7729183902199217, "learning_rate": 2.118204315325902e-06, "loss": 0.0189, "step": 213695 }, { "epoch": 0.8916724386844807, "grad_norm": 0.7846376625389326, "learning_rate": 2.118179534948998e-06, "loss": 0.0209, "step": 213700 }, { "epoch": 0.8916933013994709, "grad_norm": 0.9903488470618622, "learning_rate": 2.1181547554417736e-06, "loss": 0.0207, "step": 213705 }, { "epoch": 0.8917141641144612, "grad_norm": 1.3465110091792236, "learning_rate": 2.1181299768041773e-06, "loss": 0.0196, "step": 213710 }, { "epoch": 0.8917350268294515, "grad_norm": 0.6961464260808502, "learning_rate": 2.118105199036158e-06, "loss": 0.0192, "step": 213715 }, { "epoch": 0.8917558895444417, "grad_norm": 1.4127751120348386, "learning_rate": 2.118080422137666e-06, "loss": 0.0226, "step": 213720 }, { "epoch": 0.891776752259432, "grad_norm": 0.40560516143317243, "learning_rate": 2.118055646108649e-06, "loss": 0.0168, "step": 213725 }, { "epoch": 0.8917976149744223, "grad_norm": 1.0725826454692227, "learning_rate": 2.118030870949057e-06, "loss": 0.0231, "step": 213730 }, { "epoch": 0.8918184776894126, "grad_norm": 0.5877350224963954, "learning_rate": 2.1180060966588396e-06, "loss": 0.0348, "step": 213735 }, { "epoch": 0.8918393404044028, "grad_norm": 0.5640422379061834, "learning_rate": 2.117981323237945e-06, "loss": 0.0142, "step": 213740 }, { "epoch": 0.8918602031193932, "grad_norm": 0.5141324038012028, "learning_rate": 2.1179565506863224e-06, "loss": 0.0194, "step": 213745 }, { "epoch": 0.8918810658343834, "grad_norm": 0.4796245405008511, "learning_rate": 2.117931779003921e-06, "loss": 0.0191, "step": 213750 }, { "epoch": 0.8919019285493737, "grad_norm": 0.2506887580066975, "learning_rate": 2.1179070081906914e-06, "loss": 0.0181, "step": 213755 }, { "epoch": 0.891922791264364, "grad_norm": 0.7910214538935434, "learning_rate": 2.1178822382465813e-06, "loss": 0.0185, "step": 213760 }, { "epoch": 0.8919436539793543, "grad_norm": 0.44496362684024376, "learning_rate": 2.1178574691715397e-06, "loss": 0.0177, "step": 213765 }, { "epoch": 0.8919645166943445, "grad_norm": 0.6673496074769426, "learning_rate": 2.1178327009655162e-06, "loss": 0.018, "step": 213770 }, { "epoch": 0.8919853794093349, "grad_norm": 0.49405747186213117, "learning_rate": 2.117807933628461e-06, "loss": 0.0177, "step": 213775 }, { "epoch": 0.8920062421243251, "grad_norm": 0.8299214963397071, "learning_rate": 2.1177831671603216e-06, "loss": 0.026, "step": 213780 }, { "epoch": 0.8920271048393154, "grad_norm": 0.36001033445124186, "learning_rate": 2.117758401561048e-06, "loss": 0.0177, "step": 213785 }, { "epoch": 0.8920479675543056, "grad_norm": 0.7893210687049399, "learning_rate": 2.1177336368305896e-06, "loss": 0.0211, "step": 213790 }, { "epoch": 0.892068830269296, "grad_norm": 0.9563531576734782, "learning_rate": 2.1177088729688952e-06, "loss": 0.0254, "step": 213795 }, { "epoch": 0.8920896929842862, "grad_norm": 0.6653226447061318, "learning_rate": 2.1176841099759144e-06, "loss": 0.0171, "step": 213800 }, { "epoch": 0.8921105556992764, "grad_norm": 0.7071290712509912, "learning_rate": 2.117659347851596e-06, "loss": 0.0158, "step": 213805 }, { "epoch": 0.8921314184142668, "grad_norm": 1.0113208620757859, "learning_rate": 2.117634586595889e-06, "loss": 0.0206, "step": 213810 }, { "epoch": 0.892152281129257, "grad_norm": 1.1433474441431803, "learning_rate": 2.1176098262087434e-06, "loss": 0.0271, "step": 213815 }, { "epoch": 0.8921731438442473, "grad_norm": 0.5118998383406945, "learning_rate": 2.117585066690108e-06, "loss": 0.0207, "step": 213820 }, { "epoch": 0.8921940065592376, "grad_norm": 0.48604453343510634, "learning_rate": 2.1175603080399324e-06, "loss": 0.0157, "step": 213825 }, { "epoch": 0.8922148692742279, "grad_norm": 0.5097192380825118, "learning_rate": 2.1175355502581647e-06, "loss": 0.0254, "step": 213830 }, { "epoch": 0.8922357319892181, "grad_norm": 0.7757371519784944, "learning_rate": 2.1175107933447557e-06, "loss": 0.0192, "step": 213835 }, { "epoch": 0.8922565947042084, "grad_norm": 1.016910287071709, "learning_rate": 2.1174860372996533e-06, "loss": 0.0236, "step": 213840 }, { "epoch": 0.8922774574191987, "grad_norm": 0.5878911862655373, "learning_rate": 2.117461282122808e-06, "loss": 0.0214, "step": 213845 }, { "epoch": 0.892298320134189, "grad_norm": 0.558596065013812, "learning_rate": 2.117436527814168e-06, "loss": 0.0192, "step": 213850 }, { "epoch": 0.8923191828491792, "grad_norm": 1.029183380430559, "learning_rate": 2.1174117743736825e-06, "loss": 0.0287, "step": 213855 }, { "epoch": 0.8923400455641696, "grad_norm": 0.5855608544935593, "learning_rate": 2.1173870218013017e-06, "loss": 0.0183, "step": 213860 }, { "epoch": 0.8923609082791598, "grad_norm": 0.7123280555155654, "learning_rate": 2.117362270096974e-06, "loss": 0.0229, "step": 213865 }, { "epoch": 0.8923817709941501, "grad_norm": 1.0858218595972902, "learning_rate": 2.1173375192606493e-06, "loss": 0.029, "step": 213870 }, { "epoch": 0.8924026337091404, "grad_norm": 0.652662976044957, "learning_rate": 2.1173127692922764e-06, "loss": 0.0163, "step": 213875 }, { "epoch": 0.8924234964241307, "grad_norm": 0.5726843391854332, "learning_rate": 2.1172880201918045e-06, "loss": 0.0208, "step": 213880 }, { "epoch": 0.8924443591391209, "grad_norm": 0.5774220136154725, "learning_rate": 2.1172632719591834e-06, "loss": 0.0188, "step": 213885 }, { "epoch": 0.8924652218541113, "grad_norm": 0.8550334991828817, "learning_rate": 2.117238524594362e-06, "loss": 0.0176, "step": 213890 }, { "epoch": 0.8924860845691015, "grad_norm": 0.8075521449597903, "learning_rate": 2.1172137780972894e-06, "loss": 0.0262, "step": 213895 }, { "epoch": 0.8925069472840917, "grad_norm": 0.6123229338488843, "learning_rate": 2.1171890324679157e-06, "loss": 0.0165, "step": 213900 }, { "epoch": 0.892527809999082, "grad_norm": 0.8732555868563405, "learning_rate": 2.117164287706189e-06, "loss": 0.022, "step": 213905 }, { "epoch": 0.8925486727140723, "grad_norm": 0.5339349817090722, "learning_rate": 2.1171395438120602e-06, "loss": 0.0234, "step": 213910 }, { "epoch": 0.8925695354290626, "grad_norm": 0.29222188173667535, "learning_rate": 2.1171148007854764e-06, "loss": 0.0138, "step": 213915 }, { "epoch": 0.8925903981440528, "grad_norm": 0.8455361840347301, "learning_rate": 2.117090058626389e-06, "loss": 0.0283, "step": 213920 }, { "epoch": 0.8926112608590432, "grad_norm": 0.6636071656787986, "learning_rate": 2.1170653173347465e-06, "loss": 0.0293, "step": 213925 }, { "epoch": 0.8926321235740334, "grad_norm": 0.6076891326687539, "learning_rate": 2.117040576910498e-06, "loss": 0.025, "step": 213930 }, { "epoch": 0.8926529862890237, "grad_norm": 0.4635339941343709, "learning_rate": 2.117015837353593e-06, "loss": 0.0155, "step": 213935 }, { "epoch": 0.892673849004014, "grad_norm": 0.4300223890797434, "learning_rate": 2.1169910986639806e-06, "loss": 0.0192, "step": 213940 }, { "epoch": 0.8926947117190043, "grad_norm": 0.4734470211822523, "learning_rate": 2.1169663608416113e-06, "loss": 0.0151, "step": 213945 }, { "epoch": 0.8927155744339945, "grad_norm": 0.49359653510463436, "learning_rate": 2.1169416238864326e-06, "loss": 0.0196, "step": 213950 }, { "epoch": 0.8927364371489849, "grad_norm": 0.4504114855735424, "learning_rate": 2.116916887798395e-06, "loss": 0.0146, "step": 213955 }, { "epoch": 0.8927572998639751, "grad_norm": 0.46717618792733995, "learning_rate": 2.1168921525774477e-06, "loss": 0.0218, "step": 213960 }, { "epoch": 0.8927781625789654, "grad_norm": 0.5750700443226975, "learning_rate": 2.11686741822354e-06, "loss": 0.0199, "step": 213965 }, { "epoch": 0.8927990252939556, "grad_norm": 0.37863888007293744, "learning_rate": 2.1168426847366206e-06, "loss": 0.0152, "step": 213970 }, { "epoch": 0.892819888008946, "grad_norm": 0.4125414404930796, "learning_rate": 2.1168179521166395e-06, "loss": 0.0202, "step": 213975 }, { "epoch": 0.8928407507239362, "grad_norm": 0.5667560652455091, "learning_rate": 2.1167932203635463e-06, "loss": 0.0172, "step": 213980 }, { "epoch": 0.8928616134389264, "grad_norm": 0.9795177501123837, "learning_rate": 2.1167684894772892e-06, "loss": 0.022, "step": 213985 }, { "epoch": 0.8928824761539168, "grad_norm": 0.8073304132103339, "learning_rate": 2.116743759457819e-06, "loss": 0.0317, "step": 213990 }, { "epoch": 0.892903338868907, "grad_norm": 0.5830640018976203, "learning_rate": 2.116719030305085e-06, "loss": 0.0234, "step": 213995 }, { "epoch": 0.8929242015838973, "grad_norm": 0.5664638323064558, "learning_rate": 2.1166943020190357e-06, "loss": 0.0322, "step": 214000 }, { "epoch": 0.8929450642988876, "grad_norm": 0.5601232075534822, "learning_rate": 2.1166695745996205e-06, "loss": 0.0234, "step": 214005 }, { "epoch": 0.8929659270138779, "grad_norm": 0.32101664320835704, "learning_rate": 2.1166448480467895e-06, "loss": 0.0123, "step": 214010 }, { "epoch": 0.8929867897288681, "grad_norm": 0.8425749192101419, "learning_rate": 2.1166201223604914e-06, "loss": 0.0241, "step": 214015 }, { "epoch": 0.8930076524438584, "grad_norm": 0.6110227280756652, "learning_rate": 2.1165953975406754e-06, "loss": 0.023, "step": 214020 }, { "epoch": 0.8930285151588487, "grad_norm": 0.4007581471489421, "learning_rate": 2.116570673587292e-06, "loss": 0.0157, "step": 214025 }, { "epoch": 0.893049377873839, "grad_norm": 0.30946484425397697, "learning_rate": 2.11654595050029e-06, "loss": 0.0209, "step": 214030 }, { "epoch": 0.8930702405888292, "grad_norm": 0.7002675339886332, "learning_rate": 2.116521228279618e-06, "loss": 0.0165, "step": 214035 }, { "epoch": 0.8930911033038196, "grad_norm": 0.7694333463401182, "learning_rate": 2.116496506925227e-06, "loss": 0.0295, "step": 214040 }, { "epoch": 0.8931119660188098, "grad_norm": 0.5138925411020037, "learning_rate": 2.116471786437065e-06, "loss": 0.0225, "step": 214045 }, { "epoch": 0.8931328287338001, "grad_norm": 0.8554782419583588, "learning_rate": 2.116447066815082e-06, "loss": 0.0226, "step": 214050 }, { "epoch": 0.8931536914487904, "grad_norm": 0.6058098615861709, "learning_rate": 2.116422348059228e-06, "loss": 0.0215, "step": 214055 }, { "epoch": 0.8931745541637807, "grad_norm": 0.5474884162131515, "learning_rate": 2.116397630169451e-06, "loss": 0.0224, "step": 214060 }, { "epoch": 0.8931954168787709, "grad_norm": 0.7844453320077418, "learning_rate": 2.116372913145702e-06, "loss": 0.0203, "step": 214065 }, { "epoch": 0.8932162795937613, "grad_norm": 0.7246153565508041, "learning_rate": 2.116348196987929e-06, "loss": 0.0191, "step": 214070 }, { "epoch": 0.8932371423087515, "grad_norm": 1.0471946168597077, "learning_rate": 2.1163234816960827e-06, "loss": 0.0188, "step": 214075 }, { "epoch": 0.8932580050237418, "grad_norm": 0.37829650183592406, "learning_rate": 2.1162987672701114e-06, "loss": 0.0179, "step": 214080 }, { "epoch": 0.893278867738732, "grad_norm": 0.5178528813148449, "learning_rate": 2.116274053709965e-06, "loss": 0.0161, "step": 214085 }, { "epoch": 0.8932997304537224, "grad_norm": 0.6213759596309095, "learning_rate": 2.1162493410155938e-06, "loss": 0.0194, "step": 214090 }, { "epoch": 0.8933205931687126, "grad_norm": 0.9281296046964768, "learning_rate": 2.116224629186946e-06, "loss": 0.022, "step": 214095 }, { "epoch": 0.8933414558837028, "grad_norm": 0.4546737700738541, "learning_rate": 2.1161999182239717e-06, "loss": 0.0183, "step": 214100 }, { "epoch": 0.8933623185986932, "grad_norm": 0.5994358288501932, "learning_rate": 2.11617520812662e-06, "loss": 0.0239, "step": 214105 }, { "epoch": 0.8933831813136834, "grad_norm": 0.8719576079491743, "learning_rate": 2.1161504988948407e-06, "loss": 0.017, "step": 214110 }, { "epoch": 0.8934040440286737, "grad_norm": 0.7137252747369676, "learning_rate": 2.116125790528583e-06, "loss": 0.0187, "step": 214115 }, { "epoch": 0.893424906743664, "grad_norm": 0.6218071119988847, "learning_rate": 2.116101083027797e-06, "loss": 0.0229, "step": 214120 }, { "epoch": 0.8934457694586543, "grad_norm": 0.6136837474977632, "learning_rate": 2.116076376392431e-06, "loss": 0.0205, "step": 214125 }, { "epoch": 0.8934666321736445, "grad_norm": 0.7593860535003139, "learning_rate": 2.1160516706224356e-06, "loss": 0.0262, "step": 214130 }, { "epoch": 0.8934874948886349, "grad_norm": 0.4028463791983112, "learning_rate": 2.1160269657177597e-06, "loss": 0.0249, "step": 214135 }, { "epoch": 0.8935083576036251, "grad_norm": 0.9336358469970385, "learning_rate": 2.116002261678353e-06, "loss": 0.0257, "step": 214140 }, { "epoch": 0.8935292203186154, "grad_norm": 0.5801758311448072, "learning_rate": 2.115977558504165e-06, "loss": 0.0199, "step": 214145 }, { "epoch": 0.8935500830336056, "grad_norm": 0.5141836419748184, "learning_rate": 2.1159528561951448e-06, "loss": 0.0188, "step": 214150 }, { "epoch": 0.893570945748596, "grad_norm": 0.4706123944978453, "learning_rate": 2.1159281547512426e-06, "loss": 0.0163, "step": 214155 }, { "epoch": 0.8935918084635862, "grad_norm": 0.8505943334736464, "learning_rate": 2.1159034541724076e-06, "loss": 0.0212, "step": 214160 }, { "epoch": 0.8936126711785765, "grad_norm": 0.8483835423992945, "learning_rate": 2.115878754458589e-06, "loss": 0.0189, "step": 214165 }, { "epoch": 0.8936335338935668, "grad_norm": 0.5635906543663219, "learning_rate": 2.115854055609736e-06, "loss": 0.0186, "step": 214170 }, { "epoch": 0.8936543966085571, "grad_norm": 0.8183598422009933, "learning_rate": 2.115829357625799e-06, "loss": 0.0201, "step": 214175 }, { "epoch": 0.8936752593235473, "grad_norm": 1.2974879071229608, "learning_rate": 2.1158046605067274e-06, "loss": 0.0328, "step": 214180 }, { "epoch": 0.8936961220385377, "grad_norm": 1.162869981677844, "learning_rate": 2.1157799642524708e-06, "loss": 0.0214, "step": 214185 }, { "epoch": 0.8937169847535279, "grad_norm": 0.850189318511807, "learning_rate": 2.115755268862978e-06, "loss": 0.0199, "step": 214190 }, { "epoch": 0.8937378474685181, "grad_norm": 0.6616256824136935, "learning_rate": 2.115730574338199e-06, "loss": 0.0188, "step": 214195 }, { "epoch": 0.8937587101835084, "grad_norm": 0.9410840519636101, "learning_rate": 2.115705880678084e-06, "loss": 0.0332, "step": 214200 }, { "epoch": 0.8937795728984987, "grad_norm": 0.4423491889875481, "learning_rate": 2.1156811878825808e-06, "loss": 0.0165, "step": 214205 }, { "epoch": 0.893800435613489, "grad_norm": 0.7105106794706036, "learning_rate": 2.1156564959516404e-06, "loss": 0.0231, "step": 214210 }, { "epoch": 0.8938212983284792, "grad_norm": 0.3701639447803819, "learning_rate": 2.1156318048852116e-06, "loss": 0.0144, "step": 214215 }, { "epoch": 0.8938421610434696, "grad_norm": 0.2043560758038449, "learning_rate": 2.115607114683245e-06, "loss": 0.0165, "step": 214220 }, { "epoch": 0.8938630237584598, "grad_norm": 0.6148746511240861, "learning_rate": 2.1155824253456896e-06, "loss": 0.025, "step": 214225 }, { "epoch": 0.8938838864734501, "grad_norm": 0.34586703995062357, "learning_rate": 2.115557736872494e-06, "loss": 0.0189, "step": 214230 }, { "epoch": 0.8939047491884404, "grad_norm": 0.8475504309689185, "learning_rate": 2.1155330492636088e-06, "loss": 0.0284, "step": 214235 }, { "epoch": 0.8939256119034307, "grad_norm": 0.4786584015056223, "learning_rate": 2.115508362518984e-06, "loss": 0.015, "step": 214240 }, { "epoch": 0.8939464746184209, "grad_norm": 0.7493120175609793, "learning_rate": 2.115483676638568e-06, "loss": 0.0185, "step": 214245 }, { "epoch": 0.8939673373334113, "grad_norm": 0.432015377440699, "learning_rate": 2.115458991622311e-06, "loss": 0.0225, "step": 214250 }, { "epoch": 0.8939882000484015, "grad_norm": 0.4977870361572642, "learning_rate": 2.1154343074701623e-06, "loss": 0.0231, "step": 214255 }, { "epoch": 0.8940090627633918, "grad_norm": 0.6523648742028517, "learning_rate": 2.1154096241820717e-06, "loss": 0.0216, "step": 214260 }, { "epoch": 0.894029925478382, "grad_norm": 0.7764837577316579, "learning_rate": 2.115384941757989e-06, "loss": 0.0155, "step": 214265 }, { "epoch": 0.8940507881933724, "grad_norm": 0.5112998652605013, "learning_rate": 2.115360260197864e-06, "loss": 0.0146, "step": 214270 }, { "epoch": 0.8940716509083626, "grad_norm": 0.44427305747123946, "learning_rate": 2.1153355795016454e-06, "loss": 0.0216, "step": 214275 }, { "epoch": 0.8940925136233528, "grad_norm": 0.5584444433719434, "learning_rate": 2.1153108996692832e-06, "loss": 0.0184, "step": 214280 }, { "epoch": 0.8941133763383432, "grad_norm": 0.8698333976356155, "learning_rate": 2.115286220700727e-06, "loss": 0.0261, "step": 214285 }, { "epoch": 0.8941342390533334, "grad_norm": 0.3180468351053353, "learning_rate": 2.1152615425959266e-06, "loss": 0.02, "step": 214290 }, { "epoch": 0.8941551017683237, "grad_norm": 0.7685448873406939, "learning_rate": 2.1152368653548317e-06, "loss": 0.0156, "step": 214295 }, { "epoch": 0.894175964483314, "grad_norm": 1.0912079238730312, "learning_rate": 2.115212188977391e-06, "loss": 0.0199, "step": 214300 }, { "epoch": 0.8941968271983043, "grad_norm": 0.6135201854613951, "learning_rate": 2.1151875134635557e-06, "loss": 0.0191, "step": 214305 }, { "epoch": 0.8942176899132945, "grad_norm": 0.6497383144167307, "learning_rate": 2.1151628388132746e-06, "loss": 0.0244, "step": 214310 }, { "epoch": 0.8942385526282849, "grad_norm": 0.6774373236369471, "learning_rate": 2.115138165026497e-06, "loss": 0.0176, "step": 214315 }, { "epoch": 0.8942594153432751, "grad_norm": 0.8675831829064985, "learning_rate": 2.115113492103173e-06, "loss": 0.0219, "step": 214320 }, { "epoch": 0.8942802780582654, "grad_norm": 0.7865666801842945, "learning_rate": 2.1150888200432516e-06, "loss": 0.0223, "step": 214325 }, { "epoch": 0.8943011407732556, "grad_norm": 1.2011074170310958, "learning_rate": 2.1150641488466834e-06, "loss": 0.024, "step": 214330 }, { "epoch": 0.894322003488246, "grad_norm": 0.8892032552268497, "learning_rate": 2.1150394785134175e-06, "loss": 0.0176, "step": 214335 }, { "epoch": 0.8943428662032362, "grad_norm": 0.7866006429145055, "learning_rate": 2.1150148090434036e-06, "loss": 0.0203, "step": 214340 }, { "epoch": 0.8943637289182265, "grad_norm": 0.37366785442348055, "learning_rate": 2.114990140436591e-06, "loss": 0.0178, "step": 214345 }, { "epoch": 0.8943845916332168, "grad_norm": 0.8185414775816886, "learning_rate": 2.1149654726929307e-06, "loss": 0.0222, "step": 214350 }, { "epoch": 0.8944054543482071, "grad_norm": 0.3421814664782651, "learning_rate": 2.1149408058123715e-06, "loss": 0.0245, "step": 214355 }, { "epoch": 0.8944263170631973, "grad_norm": 1.076504922593739, "learning_rate": 2.114916139794862e-06, "loss": 0.0236, "step": 214360 }, { "epoch": 0.8944471797781877, "grad_norm": 0.7667548030617644, "learning_rate": 2.114891474640353e-06, "loss": 0.0225, "step": 214365 }, { "epoch": 0.8944680424931779, "grad_norm": 0.621507924732196, "learning_rate": 2.1148668103487946e-06, "loss": 0.0193, "step": 214370 }, { "epoch": 0.8944889052081682, "grad_norm": 0.38368615725627303, "learning_rate": 2.114842146920136e-06, "loss": 0.0167, "step": 214375 }, { "epoch": 0.8945097679231584, "grad_norm": 1.0293835129338136, "learning_rate": 2.114817484354326e-06, "loss": 0.0202, "step": 214380 }, { "epoch": 0.8945306306381487, "grad_norm": 0.9042330557998529, "learning_rate": 2.1147928226513155e-06, "loss": 0.0167, "step": 214385 }, { "epoch": 0.894551493353139, "grad_norm": 0.3015541074529938, "learning_rate": 2.1147681618110536e-06, "loss": 0.0177, "step": 214390 }, { "epoch": 0.8945723560681292, "grad_norm": 0.4276365321943555, "learning_rate": 2.114743501833491e-06, "loss": 0.0186, "step": 214395 }, { "epoch": 0.8945932187831196, "grad_norm": 0.5532690573907517, "learning_rate": 2.114718842718576e-06, "loss": 0.016, "step": 214400 }, { "epoch": 0.8946140814981098, "grad_norm": 0.5545624519411855, "learning_rate": 2.1146941844662585e-06, "loss": 0.0201, "step": 214405 }, { "epoch": 0.8946349442131001, "grad_norm": 0.33458486023630196, "learning_rate": 2.1146695270764896e-06, "loss": 0.016, "step": 214410 }, { "epoch": 0.8946558069280904, "grad_norm": 0.5960603270276496, "learning_rate": 2.114644870549217e-06, "loss": 0.019, "step": 214415 }, { "epoch": 0.8946766696430807, "grad_norm": 0.4751504635856088, "learning_rate": 2.114620214884392e-06, "loss": 0.0218, "step": 214420 }, { "epoch": 0.8946975323580709, "grad_norm": 0.6371864862937436, "learning_rate": 2.1145955600819636e-06, "loss": 0.0183, "step": 214425 }, { "epoch": 0.8947183950730613, "grad_norm": 0.8303129885211659, "learning_rate": 2.1145709061418816e-06, "loss": 0.0174, "step": 214430 }, { "epoch": 0.8947392577880515, "grad_norm": 0.49377927278503303, "learning_rate": 2.114546253064096e-06, "loss": 0.0169, "step": 214435 }, { "epoch": 0.8947601205030418, "grad_norm": 1.089340871814558, "learning_rate": 2.1145216008485566e-06, "loss": 0.0191, "step": 214440 }, { "epoch": 0.894780983218032, "grad_norm": 0.4353600464633866, "learning_rate": 2.1144969494952124e-06, "loss": 0.0147, "step": 214445 }, { "epoch": 0.8948018459330224, "grad_norm": 0.6106719224085655, "learning_rate": 2.114472299004014e-06, "loss": 0.0154, "step": 214450 }, { "epoch": 0.8948227086480126, "grad_norm": 0.684315217682305, "learning_rate": 2.1144476493749107e-06, "loss": 0.0231, "step": 214455 }, { "epoch": 0.8948435713630029, "grad_norm": 0.9105593521491689, "learning_rate": 2.1144230006078525e-06, "loss": 0.0164, "step": 214460 }, { "epoch": 0.8948644340779932, "grad_norm": 0.7272033809682795, "learning_rate": 2.1143983527027883e-06, "loss": 0.0208, "step": 214465 }, { "epoch": 0.8948852967929835, "grad_norm": 0.4453241965178164, "learning_rate": 2.114373705659669e-06, "loss": 0.0194, "step": 214470 }, { "epoch": 0.8949061595079737, "grad_norm": 0.5802512788591627, "learning_rate": 2.1143490594784444e-06, "loss": 0.0177, "step": 214475 }, { "epoch": 0.894927022222964, "grad_norm": 1.2832286361272083, "learning_rate": 2.114324414159063e-06, "loss": 0.018, "step": 214480 }, { "epoch": 0.8949478849379543, "grad_norm": 0.5598289958169554, "learning_rate": 2.114299769701476e-06, "loss": 0.0212, "step": 214485 }, { "epoch": 0.8949687476529445, "grad_norm": 0.7071337006376284, "learning_rate": 2.1142751261056322e-06, "loss": 0.0176, "step": 214490 }, { "epoch": 0.8949896103679349, "grad_norm": 0.8122686480550678, "learning_rate": 2.114250483371482e-06, "loss": 0.0224, "step": 214495 }, { "epoch": 0.8950104730829251, "grad_norm": 0.601048847620377, "learning_rate": 2.1142258414989745e-06, "loss": 0.0197, "step": 214500 }, { "epoch": 0.8950313357979154, "grad_norm": 0.674437872257907, "learning_rate": 2.11420120048806e-06, "loss": 0.0201, "step": 214505 }, { "epoch": 0.8950521985129056, "grad_norm": 0.5574346514207894, "learning_rate": 2.1141765603386884e-06, "loss": 0.0158, "step": 214510 }, { "epoch": 0.895073061227896, "grad_norm": 1.017964891212346, "learning_rate": 2.1141519210508093e-06, "loss": 0.0253, "step": 214515 }, { "epoch": 0.8950939239428862, "grad_norm": 0.6601256996192795, "learning_rate": 2.114127282624372e-06, "loss": 0.0229, "step": 214520 }, { "epoch": 0.8951147866578765, "grad_norm": 0.5133802394046686, "learning_rate": 2.114102645059327e-06, "loss": 0.0163, "step": 214525 }, { "epoch": 0.8951356493728668, "grad_norm": 0.44308860971455993, "learning_rate": 2.1140780083556243e-06, "loss": 0.0169, "step": 214530 }, { "epoch": 0.8951565120878571, "grad_norm": 0.2900538700457076, "learning_rate": 2.1140533725132127e-06, "loss": 0.0186, "step": 214535 }, { "epoch": 0.8951773748028473, "grad_norm": 0.4991192620803733, "learning_rate": 2.114028737532043e-06, "loss": 0.0232, "step": 214540 }, { "epoch": 0.8951982375178377, "grad_norm": 0.701408121448206, "learning_rate": 2.1140041034120643e-06, "loss": 0.0251, "step": 214545 }, { "epoch": 0.8952191002328279, "grad_norm": 1.0949935545362446, "learning_rate": 2.1139794701532273e-06, "loss": 0.0273, "step": 214550 }, { "epoch": 0.8952399629478182, "grad_norm": 0.5164797605427522, "learning_rate": 2.113954837755481e-06, "loss": 0.017, "step": 214555 }, { "epoch": 0.8952608256628084, "grad_norm": 1.6721630904450608, "learning_rate": 2.1139302062187754e-06, "loss": 0.026, "step": 214560 }, { "epoch": 0.8952816883777988, "grad_norm": 1.256433694854224, "learning_rate": 2.1139055755430606e-06, "loss": 0.0241, "step": 214565 }, { "epoch": 0.895302551092789, "grad_norm": 0.7950256926378432, "learning_rate": 2.113880945728286e-06, "loss": 0.0203, "step": 214570 }, { "epoch": 0.8953234138077792, "grad_norm": 0.6782769371850466, "learning_rate": 2.113856316774402e-06, "loss": 0.0284, "step": 214575 }, { "epoch": 0.8953442765227696, "grad_norm": 0.8801332401231488, "learning_rate": 2.1138316886813585e-06, "loss": 0.0174, "step": 214580 }, { "epoch": 0.8953651392377598, "grad_norm": 0.37758912756571417, "learning_rate": 2.113807061449105e-06, "loss": 0.0204, "step": 214585 }, { "epoch": 0.8953860019527501, "grad_norm": 0.9472817497200148, "learning_rate": 2.113782435077591e-06, "loss": 0.0313, "step": 214590 }, { "epoch": 0.8954068646677404, "grad_norm": 0.7443815872197626, "learning_rate": 2.1137578095667673e-06, "loss": 0.0166, "step": 214595 }, { "epoch": 0.8954277273827307, "grad_norm": 0.6166501360944786, "learning_rate": 2.1137331849165827e-06, "loss": 0.0232, "step": 214600 }, { "epoch": 0.8954485900977209, "grad_norm": 0.5618942513898344, "learning_rate": 2.1137085611269883e-06, "loss": 0.0252, "step": 214605 }, { "epoch": 0.8954694528127113, "grad_norm": 0.48248133528285553, "learning_rate": 2.1136839381979327e-06, "loss": 0.0264, "step": 214610 }, { "epoch": 0.8954903155277015, "grad_norm": 0.8315977266940519, "learning_rate": 2.1136593161293665e-06, "loss": 0.0272, "step": 214615 }, { "epoch": 0.8955111782426918, "grad_norm": 0.5796230544460323, "learning_rate": 2.1136346949212392e-06, "loss": 0.0264, "step": 214620 }, { "epoch": 0.895532040957682, "grad_norm": 0.592476341412961, "learning_rate": 2.1136100745735013e-06, "loss": 0.0178, "step": 214625 }, { "epoch": 0.8955529036726724, "grad_norm": 0.47051849938973006, "learning_rate": 2.1135854550861024e-06, "loss": 0.022, "step": 214630 }, { "epoch": 0.8955737663876626, "grad_norm": 0.5180613279323076, "learning_rate": 2.1135608364589917e-06, "loss": 0.0191, "step": 214635 }, { "epoch": 0.8955946291026529, "grad_norm": 0.39390317196745867, "learning_rate": 2.113536218692121e-06, "loss": 0.0168, "step": 214640 }, { "epoch": 0.8956154918176432, "grad_norm": 0.906162431091756, "learning_rate": 2.1135116017854378e-06, "loss": 0.0217, "step": 214645 }, { "epoch": 0.8956363545326335, "grad_norm": 1.206985885782653, "learning_rate": 2.1134869857388934e-06, "loss": 0.025, "step": 214650 }, { "epoch": 0.8956572172476237, "grad_norm": 0.829647854397982, "learning_rate": 2.1134623705524373e-06, "loss": 0.0197, "step": 214655 }, { "epoch": 0.8956780799626141, "grad_norm": 0.987361617940686, "learning_rate": 2.11343775622602e-06, "loss": 0.0225, "step": 214660 }, { "epoch": 0.8956989426776043, "grad_norm": 0.6762343447764315, "learning_rate": 2.1134131427595903e-06, "loss": 0.0181, "step": 214665 }, { "epoch": 0.8957198053925945, "grad_norm": 0.4582319766486546, "learning_rate": 2.1133885301530996e-06, "loss": 0.0275, "step": 214670 }, { "epoch": 0.8957406681075849, "grad_norm": 1.0271523385988974, "learning_rate": 2.1133639184064968e-06, "loss": 0.0223, "step": 214675 }, { "epoch": 0.8957615308225751, "grad_norm": 0.3517348086586011, "learning_rate": 2.1133393075197315e-06, "loss": 0.0208, "step": 214680 }, { "epoch": 0.8957823935375654, "grad_norm": 0.5130371999976278, "learning_rate": 2.113314697492755e-06, "loss": 0.0188, "step": 214685 }, { "epoch": 0.8958032562525556, "grad_norm": 0.5743223326204515, "learning_rate": 2.113290088325516e-06, "loss": 0.0161, "step": 214690 }, { "epoch": 0.895824118967546, "grad_norm": 1.0767264023745617, "learning_rate": 2.1132654800179646e-06, "loss": 0.0221, "step": 214695 }, { "epoch": 0.8958449816825362, "grad_norm": 1.0918729690827038, "learning_rate": 2.113240872570051e-06, "loss": 0.02, "step": 214700 }, { "epoch": 0.8958658443975265, "grad_norm": 0.5888539656398317, "learning_rate": 2.1132162659817256e-06, "loss": 0.0178, "step": 214705 }, { "epoch": 0.8958867071125168, "grad_norm": 0.2545467287037568, "learning_rate": 2.1131916602529378e-06, "loss": 0.0187, "step": 214710 }, { "epoch": 0.8959075698275071, "grad_norm": 1.4212650681519359, "learning_rate": 2.113167055383638e-06, "loss": 0.0265, "step": 214715 }, { "epoch": 0.8959284325424973, "grad_norm": 0.1380926656600108, "learning_rate": 2.113142451373776e-06, "loss": 0.022, "step": 214720 }, { "epoch": 0.8959492952574877, "grad_norm": 0.3958388377079732, "learning_rate": 2.113117848223301e-06, "loss": 0.0174, "step": 214725 }, { "epoch": 0.8959701579724779, "grad_norm": 6.541550094633311, "learning_rate": 2.1130932459321637e-06, "loss": 0.0169, "step": 214730 }, { "epoch": 0.8959910206874682, "grad_norm": 0.4572562108280096, "learning_rate": 2.113068644500314e-06, "loss": 0.0199, "step": 214735 }, { "epoch": 0.8960118834024584, "grad_norm": 0.28990419678691626, "learning_rate": 2.1130440439277015e-06, "loss": 0.0279, "step": 214740 }, { "epoch": 0.8960327461174488, "grad_norm": 0.7975616485299609, "learning_rate": 2.1130194442142775e-06, "loss": 0.0219, "step": 214745 }, { "epoch": 0.896053608832439, "grad_norm": 0.545246679561978, "learning_rate": 2.11299484535999e-06, "loss": 0.0184, "step": 214750 }, { "epoch": 0.8960744715474293, "grad_norm": 0.6175496888622033, "learning_rate": 2.1129702473647905e-06, "loss": 0.0214, "step": 214755 }, { "epoch": 0.8960953342624196, "grad_norm": 0.41003458402359244, "learning_rate": 2.1129456502286285e-06, "loss": 0.0123, "step": 214760 }, { "epoch": 0.8961161969774099, "grad_norm": 0.39603193276587584, "learning_rate": 2.112921053951454e-06, "loss": 0.0181, "step": 214765 }, { "epoch": 0.8961370596924001, "grad_norm": 0.41117319877071834, "learning_rate": 2.1128964585332176e-06, "loss": 0.0156, "step": 214770 }, { "epoch": 0.8961579224073905, "grad_norm": 0.6443208753794357, "learning_rate": 2.112871863973868e-06, "loss": 0.0302, "step": 214775 }, { "epoch": 0.8961787851223807, "grad_norm": 0.7805171293918526, "learning_rate": 2.112847270273356e-06, "loss": 0.0167, "step": 214780 }, { "epoch": 0.8961996478373709, "grad_norm": 0.7622172584049218, "learning_rate": 2.112822677431632e-06, "loss": 0.0176, "step": 214785 }, { "epoch": 0.8962205105523613, "grad_norm": 0.6168081306362169, "learning_rate": 2.112798085448645e-06, "loss": 0.0178, "step": 214790 }, { "epoch": 0.8962413732673515, "grad_norm": 0.9561263095764697, "learning_rate": 2.112773494324346e-06, "loss": 0.0202, "step": 214795 }, { "epoch": 0.8962622359823418, "grad_norm": 0.6546879420558416, "learning_rate": 2.1127489040586846e-06, "loss": 0.0159, "step": 214800 }, { "epoch": 0.896283098697332, "grad_norm": 0.6888316260945524, "learning_rate": 2.1127243146516106e-06, "loss": 0.0166, "step": 214805 }, { "epoch": 0.8963039614123224, "grad_norm": 0.7146448453062965, "learning_rate": 2.1126997261030748e-06, "loss": 0.0213, "step": 214810 }, { "epoch": 0.8963248241273126, "grad_norm": 0.4584439517968334, "learning_rate": 2.112675138413026e-06, "loss": 0.0178, "step": 214815 }, { "epoch": 0.8963456868423029, "grad_norm": 0.3924931634015853, "learning_rate": 2.1126505515814157e-06, "loss": 0.0152, "step": 214820 }, { "epoch": 0.8963665495572932, "grad_norm": 0.6608442645044015, "learning_rate": 2.1126259656081925e-06, "loss": 0.019, "step": 214825 }, { "epoch": 0.8963874122722835, "grad_norm": 0.748280161301021, "learning_rate": 2.1126013804933076e-06, "loss": 0.0187, "step": 214830 }, { "epoch": 0.8964082749872737, "grad_norm": 0.45290456899630754, "learning_rate": 2.112576796236711e-06, "loss": 0.0198, "step": 214835 }, { "epoch": 0.8964291377022641, "grad_norm": 1.3311971611652607, "learning_rate": 2.1125522128383523e-06, "loss": 0.0169, "step": 214840 }, { "epoch": 0.8964500004172543, "grad_norm": 0.6096850632057451, "learning_rate": 2.1125276302981816e-06, "loss": 0.0156, "step": 214845 }, { "epoch": 0.8964708631322446, "grad_norm": 0.6937098161425974, "learning_rate": 2.112503048616149e-06, "loss": 0.0162, "step": 214850 }, { "epoch": 0.8964917258472349, "grad_norm": 0.47653731135231764, "learning_rate": 2.1124784677922045e-06, "loss": 0.0233, "step": 214855 }, { "epoch": 0.8965125885622252, "grad_norm": 0.6271676856941137, "learning_rate": 2.1124538878262982e-06, "loss": 0.0244, "step": 214860 }, { "epoch": 0.8965334512772154, "grad_norm": 0.5359935267870449, "learning_rate": 2.1124293087183804e-06, "loss": 0.0195, "step": 214865 }, { "epoch": 0.8965543139922056, "grad_norm": 0.9644971872180351, "learning_rate": 2.112404730468402e-06, "loss": 0.0148, "step": 214870 }, { "epoch": 0.896575176707196, "grad_norm": 0.3529602632726633, "learning_rate": 2.1123801530763107e-06, "loss": 0.0196, "step": 214875 }, { "epoch": 0.8965960394221862, "grad_norm": 0.7659119360145032, "learning_rate": 2.112355576542059e-06, "loss": 0.0163, "step": 214880 }, { "epoch": 0.8966169021371765, "grad_norm": 0.8124747334769559, "learning_rate": 2.1123310008655956e-06, "loss": 0.0185, "step": 214885 }, { "epoch": 0.8966377648521668, "grad_norm": 0.7599281230032678, "learning_rate": 2.1123064260468713e-06, "loss": 0.0211, "step": 214890 }, { "epoch": 0.8966586275671571, "grad_norm": 1.124518750831983, "learning_rate": 2.1122818520858353e-06, "loss": 0.0239, "step": 214895 }, { "epoch": 0.8966794902821473, "grad_norm": 0.6059165525808795, "learning_rate": 2.112257278982439e-06, "loss": 0.0194, "step": 214900 }, { "epoch": 0.8967003529971377, "grad_norm": 0.5826565899258442, "learning_rate": 2.1122327067366325e-06, "loss": 0.0165, "step": 214905 }, { "epoch": 0.8967212157121279, "grad_norm": 0.3013795682522555, "learning_rate": 2.1122081353483646e-06, "loss": 0.0142, "step": 214910 }, { "epoch": 0.8967420784271182, "grad_norm": 0.8958693120152935, "learning_rate": 2.112183564817586e-06, "loss": 0.0108, "step": 214915 }, { "epoch": 0.8967629411421084, "grad_norm": 0.6024334523800856, "learning_rate": 2.112158995144247e-06, "loss": 0.0229, "step": 214920 }, { "epoch": 0.8967838038570988, "grad_norm": 1.0411682837132044, "learning_rate": 2.112134426328298e-06, "loss": 0.0224, "step": 214925 }, { "epoch": 0.896804666572089, "grad_norm": 0.754565549148218, "learning_rate": 2.1121098583696884e-06, "loss": 0.0215, "step": 214930 }, { "epoch": 0.8968255292870793, "grad_norm": 0.8604740490873816, "learning_rate": 2.1120852912683694e-06, "loss": 0.0191, "step": 214935 }, { "epoch": 0.8968463920020696, "grad_norm": 0.6061088284216652, "learning_rate": 2.1120607250242897e-06, "loss": 0.0167, "step": 214940 }, { "epoch": 0.8968672547170599, "grad_norm": 0.9438667873541796, "learning_rate": 2.1120361596374006e-06, "loss": 0.016, "step": 214945 }, { "epoch": 0.8968881174320501, "grad_norm": 0.3528505933209486, "learning_rate": 2.112011595107652e-06, "loss": 0.0124, "step": 214950 }, { "epoch": 0.8969089801470405, "grad_norm": 0.9145624571596025, "learning_rate": 2.1119870314349945e-06, "loss": 0.0256, "step": 214955 }, { "epoch": 0.8969298428620307, "grad_norm": 0.9113770724267565, "learning_rate": 2.111962468619377e-06, "loss": 0.0133, "step": 214960 }, { "epoch": 0.896950705577021, "grad_norm": 0.9164616868744545, "learning_rate": 2.11193790666075e-06, "loss": 0.0188, "step": 214965 }, { "epoch": 0.8969715682920113, "grad_norm": 1.240620309368879, "learning_rate": 2.111913345559065e-06, "loss": 0.0267, "step": 214970 }, { "epoch": 0.8969924310070015, "grad_norm": 0.3506740161865417, "learning_rate": 2.111888785314271e-06, "loss": 0.0181, "step": 214975 }, { "epoch": 0.8970132937219918, "grad_norm": 0.572911479490677, "learning_rate": 2.111864225926318e-06, "loss": 0.0185, "step": 214980 }, { "epoch": 0.897034156436982, "grad_norm": 0.4920650687735936, "learning_rate": 2.111839667395157e-06, "loss": 0.0267, "step": 214985 }, { "epoch": 0.8970550191519724, "grad_norm": 0.4872943710951554, "learning_rate": 2.1118151097207373e-06, "loss": 0.0294, "step": 214990 }, { "epoch": 0.8970758818669626, "grad_norm": 1.3533553077161475, "learning_rate": 2.11179055290301e-06, "loss": 0.0201, "step": 214995 }, { "epoch": 0.8970967445819529, "grad_norm": 0.9919187930386999, "learning_rate": 2.1117659969419245e-06, "loss": 0.0224, "step": 215000 }, { "epoch": 0.8971176072969432, "grad_norm": 0.6269180974089041, "learning_rate": 2.1117414418374317e-06, "loss": 0.0138, "step": 215005 }, { "epoch": 0.8971384700119335, "grad_norm": 0.3931593419943169, "learning_rate": 2.1117168875894816e-06, "loss": 0.0182, "step": 215010 }, { "epoch": 0.8971593327269237, "grad_norm": 0.47582095366221827, "learning_rate": 2.1116923341980236e-06, "loss": 0.0207, "step": 215015 }, { "epoch": 0.8971801954419141, "grad_norm": 0.636253635965395, "learning_rate": 2.1116677816630086e-06, "loss": 0.0176, "step": 215020 }, { "epoch": 0.8972010581569043, "grad_norm": 0.9384103034722308, "learning_rate": 2.1116432299843872e-06, "loss": 0.0252, "step": 215025 }, { "epoch": 0.8972219208718946, "grad_norm": 0.8277785940207188, "learning_rate": 2.111618679162109e-06, "loss": 0.0199, "step": 215030 }, { "epoch": 0.8972427835868849, "grad_norm": 0.48267972440883256, "learning_rate": 2.111594129196124e-06, "loss": 0.0116, "step": 215035 }, { "epoch": 0.8972636463018752, "grad_norm": 0.8050096625613742, "learning_rate": 2.1115695800863834e-06, "loss": 0.0256, "step": 215040 }, { "epoch": 0.8972845090168654, "grad_norm": 0.6458921119073459, "learning_rate": 2.1115450318328367e-06, "loss": 0.0196, "step": 215045 }, { "epoch": 0.8973053717318556, "grad_norm": 0.6495016406637135, "learning_rate": 2.111520484435434e-06, "loss": 0.0204, "step": 215050 }, { "epoch": 0.897326234446846, "grad_norm": 0.7676732725526043, "learning_rate": 2.111495937894126e-06, "loss": 0.0185, "step": 215055 }, { "epoch": 0.8973470971618362, "grad_norm": 0.6724434806368557, "learning_rate": 2.1114713922088627e-06, "loss": 0.0144, "step": 215060 }, { "epoch": 0.8973679598768265, "grad_norm": 0.6184090382104886, "learning_rate": 2.1114468473795944e-06, "loss": 0.0188, "step": 215065 }, { "epoch": 0.8973888225918168, "grad_norm": 1.0494217855516412, "learning_rate": 2.111422303406272e-06, "loss": 0.0199, "step": 215070 }, { "epoch": 0.8974096853068071, "grad_norm": 0.6989326762634795, "learning_rate": 2.1113977602888443e-06, "loss": 0.0234, "step": 215075 }, { "epoch": 0.8974305480217973, "grad_norm": 0.7258440550855336, "learning_rate": 2.1113732180272625e-06, "loss": 0.0169, "step": 215080 }, { "epoch": 0.8974514107367877, "grad_norm": 0.4015326067663584, "learning_rate": 2.111348676621477e-06, "loss": 0.0146, "step": 215085 }, { "epoch": 0.8974722734517779, "grad_norm": 0.3194557035120299, "learning_rate": 2.111324136071438e-06, "loss": 0.0162, "step": 215090 }, { "epoch": 0.8974931361667682, "grad_norm": 0.35369029971123966, "learning_rate": 2.111299596377095e-06, "loss": 0.0145, "step": 215095 }, { "epoch": 0.8975139988817584, "grad_norm": 0.865078574121924, "learning_rate": 2.1112750575383988e-06, "loss": 0.0202, "step": 215100 }, { "epoch": 0.8975348615967488, "grad_norm": 0.7543347705602458, "learning_rate": 2.1112505195552998e-06, "loss": 0.0173, "step": 215105 }, { "epoch": 0.897555724311739, "grad_norm": 0.6995136154464885, "learning_rate": 2.1112259824277485e-06, "loss": 0.0198, "step": 215110 }, { "epoch": 0.8975765870267293, "grad_norm": 0.5777791473295188, "learning_rate": 2.1112014461556946e-06, "loss": 0.0202, "step": 215115 }, { "epoch": 0.8975974497417196, "grad_norm": 0.35333135155727513, "learning_rate": 2.1111769107390884e-06, "loss": 0.0178, "step": 215120 }, { "epoch": 0.8976183124567099, "grad_norm": 0.6643690940257222, "learning_rate": 2.1111523761778814e-06, "loss": 0.0161, "step": 215125 }, { "epoch": 0.8976391751717001, "grad_norm": 0.8403368456754179, "learning_rate": 2.1111278424720222e-06, "loss": 0.0202, "step": 215130 }, { "epoch": 0.8976600378866905, "grad_norm": 0.6535727417239732, "learning_rate": 2.1111033096214618e-06, "loss": 0.0183, "step": 215135 }, { "epoch": 0.8976809006016807, "grad_norm": 0.26177947709742816, "learning_rate": 2.1110787776261505e-06, "loss": 0.0179, "step": 215140 }, { "epoch": 0.897701763316671, "grad_norm": 0.3208226925498492, "learning_rate": 2.111054246486039e-06, "loss": 0.0219, "step": 215145 }, { "epoch": 0.8977226260316613, "grad_norm": 0.4696551686735091, "learning_rate": 2.111029716201077e-06, "loss": 0.0227, "step": 215150 }, { "epoch": 0.8977434887466516, "grad_norm": 0.7599028423633022, "learning_rate": 2.1110051867712156e-06, "loss": 0.0218, "step": 215155 }, { "epoch": 0.8977643514616418, "grad_norm": 0.3870586555285388, "learning_rate": 2.110980658196404e-06, "loss": 0.0168, "step": 215160 }, { "epoch": 0.897785214176632, "grad_norm": 0.6953483911063528, "learning_rate": 2.1109561304765938e-06, "loss": 0.0309, "step": 215165 }, { "epoch": 0.8978060768916224, "grad_norm": 0.37169529578705157, "learning_rate": 2.1109316036117343e-06, "loss": 0.0197, "step": 215170 }, { "epoch": 0.8978269396066126, "grad_norm": 0.5184100290596299, "learning_rate": 2.1109070776017764e-06, "loss": 0.0129, "step": 215175 }, { "epoch": 0.8978478023216029, "grad_norm": 0.3484455703999164, "learning_rate": 2.11088255244667e-06, "loss": 0.0157, "step": 215180 }, { "epoch": 0.8978686650365932, "grad_norm": 0.6676791352786289, "learning_rate": 2.110858028146366e-06, "loss": 0.0258, "step": 215185 }, { "epoch": 0.8978895277515835, "grad_norm": 1.155558068009228, "learning_rate": 2.1108335047008136e-06, "loss": 0.0199, "step": 215190 }, { "epoch": 0.8979103904665737, "grad_norm": 0.627037992364665, "learning_rate": 2.1108089821099647e-06, "loss": 0.0183, "step": 215195 }, { "epoch": 0.8979312531815641, "grad_norm": 0.6020425963481031, "learning_rate": 2.1107844603737687e-06, "loss": 0.0241, "step": 215200 }, { "epoch": 0.8979521158965543, "grad_norm": 0.6497008789439699, "learning_rate": 2.1107599394921764e-06, "loss": 0.0214, "step": 215205 }, { "epoch": 0.8979729786115446, "grad_norm": 0.9503893945366529, "learning_rate": 2.1107354194651378e-06, "loss": 0.0197, "step": 215210 }, { "epoch": 0.8979938413265349, "grad_norm": 0.3571025534468383, "learning_rate": 2.1107109002926034e-06, "loss": 0.0177, "step": 215215 }, { "epoch": 0.8980147040415252, "grad_norm": 0.9215274231752401, "learning_rate": 2.1106863819745237e-06, "loss": 0.0249, "step": 215220 }, { "epoch": 0.8980355667565154, "grad_norm": 0.4206854698947227, "learning_rate": 2.1106618645108487e-06, "loss": 0.017, "step": 215225 }, { "epoch": 0.8980564294715057, "grad_norm": 0.9229114173235967, "learning_rate": 2.1106373479015296e-06, "loss": 0.0191, "step": 215230 }, { "epoch": 0.898077292186496, "grad_norm": 0.33423095898471183, "learning_rate": 2.1106128321465157e-06, "loss": 0.0181, "step": 215235 }, { "epoch": 0.8980981549014863, "grad_norm": 0.44980034056806745, "learning_rate": 2.110588317245758e-06, "loss": 0.0223, "step": 215240 }, { "epoch": 0.8981190176164765, "grad_norm": 0.36724626295562013, "learning_rate": 2.110563803199207e-06, "loss": 0.0212, "step": 215245 }, { "epoch": 0.8981398803314669, "grad_norm": 0.6310682498842927, "learning_rate": 2.1105392900068126e-06, "loss": 0.0165, "step": 215250 }, { "epoch": 0.8981607430464571, "grad_norm": 0.3789241532911446, "learning_rate": 2.110514777668526e-06, "loss": 0.0205, "step": 215255 }, { "epoch": 0.8981816057614473, "grad_norm": 0.6697127410180274, "learning_rate": 2.1104902661842966e-06, "loss": 0.017, "step": 215260 }, { "epoch": 0.8982024684764377, "grad_norm": 0.9405700852515488, "learning_rate": 2.1104657555540755e-06, "loss": 0.0239, "step": 215265 }, { "epoch": 0.8982233311914279, "grad_norm": 0.3519851046909532, "learning_rate": 2.1104412457778127e-06, "loss": 0.0172, "step": 215270 }, { "epoch": 0.8982441939064182, "grad_norm": 0.7566233820338562, "learning_rate": 2.110416736855459e-06, "loss": 0.0196, "step": 215275 }, { "epoch": 0.8982650566214084, "grad_norm": 0.7448829558269043, "learning_rate": 2.110392228786965e-06, "loss": 0.0161, "step": 215280 }, { "epoch": 0.8982859193363988, "grad_norm": 0.6620774174219695, "learning_rate": 2.11036772157228e-06, "loss": 0.022, "step": 215285 }, { "epoch": 0.898306782051389, "grad_norm": 0.6448108422425368, "learning_rate": 2.1103432152113556e-06, "loss": 0.0214, "step": 215290 }, { "epoch": 0.8983276447663793, "grad_norm": 0.2627878623724454, "learning_rate": 2.110318709704142e-06, "loss": 0.0173, "step": 215295 }, { "epoch": 0.8983485074813696, "grad_norm": 0.24532243998395672, "learning_rate": 2.110294205050589e-06, "loss": 0.0164, "step": 215300 }, { "epoch": 0.8983693701963599, "grad_norm": 0.3774189330762124, "learning_rate": 2.110269701250648e-06, "loss": 0.0157, "step": 215305 }, { "epoch": 0.8983902329113501, "grad_norm": 0.863150044696253, "learning_rate": 2.110245198304268e-06, "loss": 0.0246, "step": 215310 }, { "epoch": 0.8984110956263405, "grad_norm": 0.9677311952139382, "learning_rate": 2.1102206962114015e-06, "loss": 0.0192, "step": 215315 }, { "epoch": 0.8984319583413307, "grad_norm": 0.35502640861441803, "learning_rate": 2.1101961949719975e-06, "loss": 0.0157, "step": 215320 }, { "epoch": 0.898452821056321, "grad_norm": 1.3933327097823982, "learning_rate": 2.1101716945860064e-06, "loss": 0.0244, "step": 215325 }, { "epoch": 0.8984736837713113, "grad_norm": 0.9760232231667698, "learning_rate": 2.1101471950533793e-06, "loss": 0.0181, "step": 215330 }, { "epoch": 0.8984945464863016, "grad_norm": 0.5507883459874148, "learning_rate": 2.1101226963740668e-06, "loss": 0.0243, "step": 215335 }, { "epoch": 0.8985154092012918, "grad_norm": 1.2392003155232778, "learning_rate": 2.110098198548018e-06, "loss": 0.0155, "step": 215340 }, { "epoch": 0.898536271916282, "grad_norm": 1.0925731122309204, "learning_rate": 2.1100737015751855e-06, "loss": 0.0285, "step": 215345 }, { "epoch": 0.8985571346312724, "grad_norm": 0.41787117307140487, "learning_rate": 2.1100492054555176e-06, "loss": 0.0209, "step": 215350 }, { "epoch": 0.8985779973462626, "grad_norm": 0.4990335821315329, "learning_rate": 2.1100247101889665e-06, "loss": 0.0156, "step": 215355 }, { "epoch": 0.8985988600612529, "grad_norm": 0.49919820005116516, "learning_rate": 2.1100002157754817e-06, "loss": 0.0163, "step": 215360 }, { "epoch": 0.8986197227762432, "grad_norm": 0.6833160034448025, "learning_rate": 2.109975722215014e-06, "loss": 0.0231, "step": 215365 }, { "epoch": 0.8986405854912335, "grad_norm": 0.8360800157510829, "learning_rate": 2.109951229507514e-06, "loss": 0.0216, "step": 215370 }, { "epoch": 0.8986614482062237, "grad_norm": 0.8883184070509974, "learning_rate": 2.109926737652932e-06, "loss": 0.0139, "step": 215375 }, { "epoch": 0.8986823109212141, "grad_norm": 0.6573100456572833, "learning_rate": 2.109902246651218e-06, "loss": 0.0177, "step": 215380 }, { "epoch": 0.8987031736362043, "grad_norm": 1.0466863945422071, "learning_rate": 2.1098777565023237e-06, "loss": 0.03, "step": 215385 }, { "epoch": 0.8987240363511946, "grad_norm": 0.5886769425985541, "learning_rate": 2.109853267206199e-06, "loss": 0.0163, "step": 215390 }, { "epoch": 0.8987448990661849, "grad_norm": 0.8240705938750867, "learning_rate": 2.109828778762794e-06, "loss": 0.0204, "step": 215395 }, { "epoch": 0.8987657617811752, "grad_norm": 0.4017693688720118, "learning_rate": 2.1098042911720596e-06, "loss": 0.0201, "step": 215400 }, { "epoch": 0.8987866244961654, "grad_norm": 0.5471428930996282, "learning_rate": 2.109779804433946e-06, "loss": 0.0194, "step": 215405 }, { "epoch": 0.8988074872111557, "grad_norm": 0.6074647659030216, "learning_rate": 2.1097553185484047e-06, "loss": 0.0226, "step": 215410 }, { "epoch": 0.898828349926146, "grad_norm": 0.7011660628518754, "learning_rate": 2.1097308335153855e-06, "loss": 0.0191, "step": 215415 }, { "epoch": 0.8988492126411363, "grad_norm": 0.4793931487173842, "learning_rate": 2.109706349334839e-06, "loss": 0.0204, "step": 215420 }, { "epoch": 0.8988700753561265, "grad_norm": 0.3431795456421001, "learning_rate": 2.1096818660067152e-06, "loss": 0.0179, "step": 215425 }, { "epoch": 0.8988909380711169, "grad_norm": 0.8828738763329136, "learning_rate": 2.1096573835309656e-06, "loss": 0.0208, "step": 215430 }, { "epoch": 0.8989118007861071, "grad_norm": 0.7237438255925119, "learning_rate": 2.1096329019075402e-06, "loss": 0.0156, "step": 215435 }, { "epoch": 0.8989326635010974, "grad_norm": 0.5889036220638763, "learning_rate": 2.1096084211363897e-06, "loss": 0.0178, "step": 215440 }, { "epoch": 0.8989535262160877, "grad_norm": 0.7866923816380541, "learning_rate": 2.1095839412174645e-06, "loss": 0.0199, "step": 215445 }, { "epoch": 0.898974388931078, "grad_norm": 0.6110179914474243, "learning_rate": 2.1095594621507153e-06, "loss": 0.0154, "step": 215450 }, { "epoch": 0.8989952516460682, "grad_norm": 1.2054008130819882, "learning_rate": 2.1095349839360924e-06, "loss": 0.0299, "step": 215455 }, { "epoch": 0.8990161143610584, "grad_norm": 0.6943656648785375, "learning_rate": 2.109510506573547e-06, "loss": 0.0156, "step": 215460 }, { "epoch": 0.8990369770760488, "grad_norm": 0.6759856435113745, "learning_rate": 2.109486030063029e-06, "loss": 0.0228, "step": 215465 }, { "epoch": 0.899057839791039, "grad_norm": 0.8776361934682806, "learning_rate": 2.109461554404489e-06, "loss": 0.0267, "step": 215470 }, { "epoch": 0.8990787025060293, "grad_norm": 0.48186265395154854, "learning_rate": 2.1094370795978782e-06, "loss": 0.0155, "step": 215475 }, { "epoch": 0.8990995652210196, "grad_norm": 0.8340547562829252, "learning_rate": 2.109412605643146e-06, "loss": 0.013, "step": 215480 }, { "epoch": 0.8991204279360099, "grad_norm": 0.4003525510282532, "learning_rate": 2.109388132540245e-06, "loss": 0.0278, "step": 215485 }, { "epoch": 0.8991412906510001, "grad_norm": 0.5196324433680259, "learning_rate": 2.1093636602891237e-06, "loss": 0.0186, "step": 215490 }, { "epoch": 0.8991621533659905, "grad_norm": 1.302994505285515, "learning_rate": 2.1093391888897337e-06, "loss": 0.0198, "step": 215495 }, { "epoch": 0.8991830160809807, "grad_norm": 0.49027908283154675, "learning_rate": 2.1093147183420252e-06, "loss": 0.0145, "step": 215500 }, { "epoch": 0.899203878795971, "grad_norm": 0.7632112577697505, "learning_rate": 2.1092902486459495e-06, "loss": 0.0349, "step": 215505 }, { "epoch": 0.8992247415109613, "grad_norm": 0.4461463723880087, "learning_rate": 2.1092657798014566e-06, "loss": 0.0184, "step": 215510 }, { "epoch": 0.8992456042259516, "grad_norm": 0.42213793623359797, "learning_rate": 2.109241311808497e-06, "loss": 0.0227, "step": 215515 }, { "epoch": 0.8992664669409418, "grad_norm": 0.34925403633930285, "learning_rate": 2.1092168446670217e-06, "loss": 0.0178, "step": 215520 }, { "epoch": 0.899287329655932, "grad_norm": 0.5431570451939224, "learning_rate": 2.109192378376981e-06, "loss": 0.0272, "step": 215525 }, { "epoch": 0.8993081923709224, "grad_norm": 0.888377504551491, "learning_rate": 2.109167912938326e-06, "loss": 0.0233, "step": 215530 }, { "epoch": 0.8993290550859127, "grad_norm": 0.336647251527789, "learning_rate": 2.1091434483510065e-06, "loss": 0.0183, "step": 215535 }, { "epoch": 0.8993499178009029, "grad_norm": 0.6963881294560931, "learning_rate": 2.109118984614974e-06, "loss": 0.0128, "step": 215540 }, { "epoch": 0.8993707805158933, "grad_norm": 0.9310127239193808, "learning_rate": 2.109094521730178e-06, "loss": 0.0215, "step": 215545 }, { "epoch": 0.8993916432308835, "grad_norm": 0.46809049148513787, "learning_rate": 2.109070059696571e-06, "loss": 0.0185, "step": 215550 }, { "epoch": 0.8994125059458737, "grad_norm": 0.6766404965610282, "learning_rate": 2.1090455985141025e-06, "loss": 0.0179, "step": 215555 }, { "epoch": 0.8994333686608641, "grad_norm": 0.36925345195372655, "learning_rate": 2.1090211381827227e-06, "loss": 0.0152, "step": 215560 }, { "epoch": 0.8994542313758543, "grad_norm": 0.9716626888818897, "learning_rate": 2.1089966787023827e-06, "loss": 0.0236, "step": 215565 }, { "epoch": 0.8994750940908446, "grad_norm": 0.5103853567304621, "learning_rate": 2.108972220073034e-06, "loss": 0.021, "step": 215570 }, { "epoch": 0.8994959568058348, "grad_norm": 0.43878606701365364, "learning_rate": 2.1089477622946252e-06, "loss": 0.0171, "step": 215575 }, { "epoch": 0.8995168195208252, "grad_norm": 0.5742538774157504, "learning_rate": 2.1089233053671083e-06, "loss": 0.0216, "step": 215580 }, { "epoch": 0.8995376822358154, "grad_norm": 1.4920887400699359, "learning_rate": 2.1088988492904347e-06, "loss": 0.032, "step": 215585 }, { "epoch": 0.8995585449508057, "grad_norm": 0.7073772325538629, "learning_rate": 2.1088743940645536e-06, "loss": 0.0179, "step": 215590 }, { "epoch": 0.899579407665796, "grad_norm": 0.9537948099705982, "learning_rate": 2.1088499396894167e-06, "loss": 0.024, "step": 215595 }, { "epoch": 0.8996002703807863, "grad_norm": 0.7044870344442004, "learning_rate": 2.1088254861649736e-06, "loss": 0.0176, "step": 215600 }, { "epoch": 0.8996211330957765, "grad_norm": 1.576640361776608, "learning_rate": 2.1088010334911765e-06, "loss": 0.0192, "step": 215605 }, { "epoch": 0.8996419958107669, "grad_norm": 0.532161046499161, "learning_rate": 2.108776581667975e-06, "loss": 0.0187, "step": 215610 }, { "epoch": 0.8996628585257571, "grad_norm": 0.42277840678869527, "learning_rate": 2.1087521306953194e-06, "loss": 0.0157, "step": 215615 }, { "epoch": 0.8996837212407474, "grad_norm": 0.8055173745785288, "learning_rate": 2.108727680573162e-06, "loss": 0.0265, "step": 215620 }, { "epoch": 0.8997045839557377, "grad_norm": 0.5876660698461316, "learning_rate": 2.108703231301452e-06, "loss": 0.0178, "step": 215625 }, { "epoch": 0.899725446670728, "grad_norm": 1.313200032761839, "learning_rate": 2.1086787828801406e-06, "loss": 0.0248, "step": 215630 }, { "epoch": 0.8997463093857182, "grad_norm": 0.5648240741533233, "learning_rate": 2.1086543353091783e-06, "loss": 0.0136, "step": 215635 }, { "epoch": 0.8997671721007084, "grad_norm": 0.858462000840563, "learning_rate": 2.1086298885885164e-06, "loss": 0.0208, "step": 215640 }, { "epoch": 0.8997880348156988, "grad_norm": 1.1824220709238873, "learning_rate": 2.1086054427181044e-06, "loss": 0.0217, "step": 215645 }, { "epoch": 0.899808897530689, "grad_norm": 0.24650711711661133, "learning_rate": 2.108580997697895e-06, "loss": 0.0215, "step": 215650 }, { "epoch": 0.8998297602456793, "grad_norm": 0.4327264590407177, "learning_rate": 2.1085565535278373e-06, "loss": 0.0177, "step": 215655 }, { "epoch": 0.8998506229606696, "grad_norm": 0.5750872889097516, "learning_rate": 2.1085321102078827e-06, "loss": 0.0205, "step": 215660 }, { "epoch": 0.8998714856756599, "grad_norm": 1.8375629651605103, "learning_rate": 2.108507667737981e-06, "loss": 0.0284, "step": 215665 }, { "epoch": 0.8998923483906501, "grad_norm": 0.6678847825491144, "learning_rate": 2.108483226118084e-06, "loss": 0.0195, "step": 215670 }, { "epoch": 0.8999132111056405, "grad_norm": 0.8482611238102904, "learning_rate": 2.1084587853481425e-06, "loss": 0.018, "step": 215675 }, { "epoch": 0.8999340738206307, "grad_norm": 0.4758736200490826, "learning_rate": 2.108434345428106e-06, "loss": 0.0182, "step": 215680 }, { "epoch": 0.899954936535621, "grad_norm": 0.6331832150646456, "learning_rate": 2.108409906357927e-06, "loss": 0.014, "step": 215685 }, { "epoch": 0.8999757992506113, "grad_norm": 0.783114905736674, "learning_rate": 2.108385468137555e-06, "loss": 0.0248, "step": 215690 }, { "epoch": 0.8999966619656016, "grad_norm": 0.9307825787671962, "learning_rate": 2.1083610307669404e-06, "loss": 0.0185, "step": 215695 }, { "epoch": 0.9000175246805918, "grad_norm": 1.1371418911309017, "learning_rate": 2.1083365942460353e-06, "loss": 0.0232, "step": 215700 }, { "epoch": 0.9000383873955821, "grad_norm": 0.41772443539651516, "learning_rate": 2.1083121585747894e-06, "loss": 0.0138, "step": 215705 }, { "epoch": 0.9000592501105724, "grad_norm": 0.4110665367605148, "learning_rate": 2.1082877237531544e-06, "loss": 0.0189, "step": 215710 }, { "epoch": 0.9000801128255627, "grad_norm": 0.9191584770757074, "learning_rate": 2.10826328978108e-06, "loss": 0.0177, "step": 215715 }, { "epoch": 0.9001009755405529, "grad_norm": 0.40583494623233046, "learning_rate": 2.1082388566585176e-06, "loss": 0.0165, "step": 215720 }, { "epoch": 0.9001218382555433, "grad_norm": 0.6563069315450346, "learning_rate": 2.108214424385418e-06, "loss": 0.0205, "step": 215725 }, { "epoch": 0.9001427009705335, "grad_norm": 0.633180298511169, "learning_rate": 2.1081899929617315e-06, "loss": 0.0127, "step": 215730 }, { "epoch": 0.9001635636855237, "grad_norm": 0.6253745853751395, "learning_rate": 2.1081655623874094e-06, "loss": 0.0172, "step": 215735 }, { "epoch": 0.9001844264005141, "grad_norm": 0.45170940095695955, "learning_rate": 2.1081411326624025e-06, "loss": 0.0221, "step": 215740 }, { "epoch": 0.9002052891155043, "grad_norm": 0.2774871906575624, "learning_rate": 2.1081167037866614e-06, "loss": 0.018, "step": 215745 }, { "epoch": 0.9002261518304946, "grad_norm": 0.9719948086338123, "learning_rate": 2.1080922757601365e-06, "loss": 0.0203, "step": 215750 }, { "epoch": 0.9002470145454848, "grad_norm": 0.8015252507247063, "learning_rate": 2.108067848582779e-06, "loss": 0.0217, "step": 215755 }, { "epoch": 0.9002678772604752, "grad_norm": 1.2486813496603577, "learning_rate": 2.1080434222545403e-06, "loss": 0.0222, "step": 215760 }, { "epoch": 0.9002887399754654, "grad_norm": 0.7387305123331245, "learning_rate": 2.10801899677537e-06, "loss": 0.0172, "step": 215765 }, { "epoch": 0.9003096026904557, "grad_norm": 0.6516394206839655, "learning_rate": 2.1079945721452198e-06, "loss": 0.0155, "step": 215770 }, { "epoch": 0.900330465405446, "grad_norm": 2.9050134274306862, "learning_rate": 2.10797014836404e-06, "loss": 0.0217, "step": 215775 }, { "epoch": 0.9003513281204363, "grad_norm": 0.8910968439126243, "learning_rate": 2.1079457254317817e-06, "loss": 0.02, "step": 215780 }, { "epoch": 0.9003721908354265, "grad_norm": 0.8276734836075388, "learning_rate": 2.1079213033483957e-06, "loss": 0.0225, "step": 215785 }, { "epoch": 0.9003930535504169, "grad_norm": 0.8835796484444317, "learning_rate": 2.107896882113833e-06, "loss": 0.0178, "step": 215790 }, { "epoch": 0.9004139162654071, "grad_norm": 0.43293945730225947, "learning_rate": 2.107872461728044e-06, "loss": 0.0206, "step": 215795 }, { "epoch": 0.9004347789803974, "grad_norm": 0.4159024092209324, "learning_rate": 2.1078480421909796e-06, "loss": 0.0194, "step": 215800 }, { "epoch": 0.9004556416953877, "grad_norm": 0.37374217225155987, "learning_rate": 2.107823623502591e-06, "loss": 0.0184, "step": 215805 }, { "epoch": 0.900476504410378, "grad_norm": 0.7440597501013992, "learning_rate": 2.107799205662829e-06, "loss": 0.0199, "step": 215810 }, { "epoch": 0.9004973671253682, "grad_norm": 1.0492795477307624, "learning_rate": 2.107774788671644e-06, "loss": 0.0268, "step": 215815 }, { "epoch": 0.9005182298403585, "grad_norm": 0.46871833067650764, "learning_rate": 2.1077503725289873e-06, "loss": 0.0215, "step": 215820 }, { "epoch": 0.9005390925553488, "grad_norm": 0.6546686522763585, "learning_rate": 2.1077259572348096e-06, "loss": 0.0203, "step": 215825 }, { "epoch": 0.900559955270339, "grad_norm": 0.4916842511441797, "learning_rate": 2.1077015427890616e-06, "loss": 0.0157, "step": 215830 }, { "epoch": 0.9005808179853293, "grad_norm": 0.36231077527050237, "learning_rate": 2.1076771291916947e-06, "loss": 0.0227, "step": 215835 }, { "epoch": 0.9006016807003197, "grad_norm": 0.4392948808819934, "learning_rate": 2.1076527164426587e-06, "loss": 0.0179, "step": 215840 }, { "epoch": 0.9006225434153099, "grad_norm": 0.9618039035922804, "learning_rate": 2.107628304541906e-06, "loss": 0.0193, "step": 215845 }, { "epoch": 0.9006434061303001, "grad_norm": 0.5662601166088743, "learning_rate": 2.107603893489386e-06, "loss": 0.0292, "step": 215850 }, { "epoch": 0.9006642688452905, "grad_norm": 0.7136170430800409, "learning_rate": 2.10757948328505e-06, "loss": 0.0151, "step": 215855 }, { "epoch": 0.9006851315602807, "grad_norm": 0.3103382273167156, "learning_rate": 2.10755507392885e-06, "loss": 0.0179, "step": 215860 }, { "epoch": 0.900705994275271, "grad_norm": 1.0061156851332784, "learning_rate": 2.1075306654207356e-06, "loss": 0.0188, "step": 215865 }, { "epoch": 0.9007268569902613, "grad_norm": 1.1177107666176993, "learning_rate": 2.1075062577606575e-06, "loss": 0.0148, "step": 215870 }, { "epoch": 0.9007477197052516, "grad_norm": 0.5232907250040307, "learning_rate": 2.107481850948568e-06, "loss": 0.0161, "step": 215875 }, { "epoch": 0.9007685824202418, "grad_norm": 0.8746819969861317, "learning_rate": 2.107457444984417e-06, "loss": 0.0172, "step": 215880 }, { "epoch": 0.9007894451352321, "grad_norm": 0.5571048619477013, "learning_rate": 2.1074330398681546e-06, "loss": 0.0174, "step": 215885 }, { "epoch": 0.9008103078502224, "grad_norm": 0.6728476602855703, "learning_rate": 2.1074086355997337e-06, "loss": 0.017, "step": 215890 }, { "epoch": 0.9008311705652127, "grad_norm": 0.32045935445488083, "learning_rate": 2.107384232179104e-06, "loss": 0.0144, "step": 215895 }, { "epoch": 0.9008520332802029, "grad_norm": 0.7508731754855869, "learning_rate": 2.1073598296062166e-06, "loss": 0.0212, "step": 215900 }, { "epoch": 0.9008728959951933, "grad_norm": 0.5751298939869227, "learning_rate": 2.1073354278810223e-06, "loss": 0.0206, "step": 215905 }, { "epoch": 0.9008937587101835, "grad_norm": 0.6886580606466232, "learning_rate": 2.107311027003472e-06, "loss": 0.0152, "step": 215910 }, { "epoch": 0.9009146214251738, "grad_norm": 0.7549138730261051, "learning_rate": 2.107286626973517e-06, "loss": 0.0235, "step": 215915 }, { "epoch": 0.9009354841401641, "grad_norm": 0.710874398419154, "learning_rate": 2.107262227791108e-06, "loss": 0.0217, "step": 215920 }, { "epoch": 0.9009563468551544, "grad_norm": 0.45488219276563113, "learning_rate": 2.1072378294561957e-06, "loss": 0.0197, "step": 215925 }, { "epoch": 0.9009772095701446, "grad_norm": 0.7984636934658395, "learning_rate": 2.1072134319687313e-06, "loss": 0.018, "step": 215930 }, { "epoch": 0.9009980722851348, "grad_norm": 0.9280054856363186, "learning_rate": 2.107189035328666e-06, "loss": 0.0266, "step": 215935 }, { "epoch": 0.9010189350001252, "grad_norm": 0.874102089773093, "learning_rate": 2.10716463953595e-06, "loss": 0.0249, "step": 215940 }, { "epoch": 0.9010397977151154, "grad_norm": 0.6054223976687539, "learning_rate": 2.1071402445905353e-06, "loss": 0.0178, "step": 215945 }, { "epoch": 0.9010606604301057, "grad_norm": 0.6072377656950698, "learning_rate": 2.107115850492372e-06, "loss": 0.0235, "step": 215950 }, { "epoch": 0.901081523145096, "grad_norm": 0.47460414506619236, "learning_rate": 2.107091457241411e-06, "loss": 0.0193, "step": 215955 }, { "epoch": 0.9011023858600863, "grad_norm": 0.479823257698768, "learning_rate": 2.107067064837604e-06, "loss": 0.0155, "step": 215960 }, { "epoch": 0.9011232485750765, "grad_norm": 0.5112041189663379, "learning_rate": 2.1070426732809015e-06, "loss": 0.015, "step": 215965 }, { "epoch": 0.9011441112900669, "grad_norm": 0.7207190888975907, "learning_rate": 2.107018282571254e-06, "loss": 0.0247, "step": 215970 }, { "epoch": 0.9011649740050571, "grad_norm": 0.720963059628871, "learning_rate": 2.1069938927086136e-06, "loss": 0.0151, "step": 215975 }, { "epoch": 0.9011858367200474, "grad_norm": 0.37784099130943866, "learning_rate": 2.106969503692931e-06, "loss": 0.0189, "step": 215980 }, { "epoch": 0.9012066994350377, "grad_norm": 0.3733011463221473, "learning_rate": 2.1069451155241562e-06, "loss": 0.0214, "step": 215985 }, { "epoch": 0.901227562150028, "grad_norm": 0.5298371893841728, "learning_rate": 2.106920728202241e-06, "loss": 0.0149, "step": 215990 }, { "epoch": 0.9012484248650182, "grad_norm": 0.6873953467318802, "learning_rate": 2.106896341727136e-06, "loss": 0.019, "step": 215995 }, { "epoch": 0.9012692875800085, "grad_norm": 1.0936873881570224, "learning_rate": 2.106871956098793e-06, "loss": 0.0208, "step": 216000 }, { "epoch": 0.9012901502949988, "grad_norm": 0.8253024151049284, "learning_rate": 2.106847571317162e-06, "loss": 0.027, "step": 216005 }, { "epoch": 0.9013110130099891, "grad_norm": 0.46380622718386594, "learning_rate": 2.1068231873821942e-06, "loss": 0.0182, "step": 216010 }, { "epoch": 0.9013318757249793, "grad_norm": 0.8125206780100357, "learning_rate": 2.1067988042938414e-06, "loss": 0.018, "step": 216015 }, { "epoch": 0.9013527384399697, "grad_norm": 0.1847448241719378, "learning_rate": 2.1067744220520536e-06, "loss": 0.0221, "step": 216020 }, { "epoch": 0.9013736011549599, "grad_norm": 0.8873072332277286, "learning_rate": 2.106750040656783e-06, "loss": 0.0221, "step": 216025 }, { "epoch": 0.9013944638699501, "grad_norm": 1.0249337508365814, "learning_rate": 2.106725660107979e-06, "loss": 0.0246, "step": 216030 }, { "epoch": 0.9014153265849405, "grad_norm": 0.8473257388501276, "learning_rate": 2.106701280405594e-06, "loss": 0.0257, "step": 216035 }, { "epoch": 0.9014361892999307, "grad_norm": 0.29395408835354275, "learning_rate": 2.106676901549578e-06, "loss": 0.0244, "step": 216040 }, { "epoch": 0.901457052014921, "grad_norm": 0.45137807041530853, "learning_rate": 2.106652523539883e-06, "loss": 0.0238, "step": 216045 }, { "epoch": 0.9014779147299113, "grad_norm": 0.6314793522433917, "learning_rate": 2.1066281463764597e-06, "loss": 0.0191, "step": 216050 }, { "epoch": 0.9014987774449016, "grad_norm": 0.3878449569542906, "learning_rate": 2.1066037700592586e-06, "loss": 0.0199, "step": 216055 }, { "epoch": 0.9015196401598918, "grad_norm": 0.6085525240713371, "learning_rate": 2.1065793945882316e-06, "loss": 0.0209, "step": 216060 }, { "epoch": 0.9015405028748821, "grad_norm": 0.8298946152149473, "learning_rate": 2.1065550199633296e-06, "loss": 0.03, "step": 216065 }, { "epoch": 0.9015613655898724, "grad_norm": 1.052604390724031, "learning_rate": 2.1065306461845025e-06, "loss": 0.0258, "step": 216070 }, { "epoch": 0.9015822283048627, "grad_norm": 0.6864256355085194, "learning_rate": 2.1065062732517025e-06, "loss": 0.0176, "step": 216075 }, { "epoch": 0.9016030910198529, "grad_norm": 0.46337530271420013, "learning_rate": 2.1064819011648804e-06, "loss": 0.0233, "step": 216080 }, { "epoch": 0.9016239537348433, "grad_norm": 1.3747577197757637, "learning_rate": 2.1064575299239877e-06, "loss": 0.0221, "step": 216085 }, { "epoch": 0.9016448164498335, "grad_norm": 0.8989091096922088, "learning_rate": 2.1064331595289746e-06, "loss": 0.0269, "step": 216090 }, { "epoch": 0.9016656791648238, "grad_norm": 0.4068842836619034, "learning_rate": 2.106408789979792e-06, "loss": 0.0149, "step": 216095 }, { "epoch": 0.9016865418798141, "grad_norm": 1.282135573005798, "learning_rate": 2.1063844212763928e-06, "loss": 0.0169, "step": 216100 }, { "epoch": 0.9017074045948044, "grad_norm": 0.66644367055428, "learning_rate": 2.1063600534187258e-06, "loss": 0.0184, "step": 216105 }, { "epoch": 0.9017282673097946, "grad_norm": 0.6045058417795097, "learning_rate": 2.1063356864067437e-06, "loss": 0.0197, "step": 216110 }, { "epoch": 0.9017491300247849, "grad_norm": 0.4901661280442593, "learning_rate": 2.106311320240397e-06, "loss": 0.0177, "step": 216115 }, { "epoch": 0.9017699927397752, "grad_norm": 0.6431386446481516, "learning_rate": 2.1062869549196363e-06, "loss": 0.0177, "step": 216120 }, { "epoch": 0.9017908554547654, "grad_norm": 0.6859266523820188, "learning_rate": 2.1062625904444133e-06, "loss": 0.0128, "step": 216125 }, { "epoch": 0.9018117181697557, "grad_norm": 0.8121340704983304, "learning_rate": 2.106238226814679e-06, "loss": 0.0208, "step": 216130 }, { "epoch": 0.901832580884746, "grad_norm": 1.2221695691134695, "learning_rate": 2.106213864030385e-06, "loss": 0.0262, "step": 216135 }, { "epoch": 0.9018534435997363, "grad_norm": 0.2585713933823939, "learning_rate": 2.106189502091481e-06, "loss": 0.0243, "step": 216140 }, { "epoch": 0.9018743063147265, "grad_norm": 0.8008662052440069, "learning_rate": 2.1061651409979196e-06, "loss": 0.0177, "step": 216145 }, { "epoch": 0.9018951690297169, "grad_norm": 0.3853961898074412, "learning_rate": 2.106140780749651e-06, "loss": 0.0178, "step": 216150 }, { "epoch": 0.9019160317447071, "grad_norm": 0.24891825190739642, "learning_rate": 2.106116421346627e-06, "loss": 0.0142, "step": 216155 }, { "epoch": 0.9019368944596974, "grad_norm": 0.5711794980867404, "learning_rate": 2.106092062788798e-06, "loss": 0.0185, "step": 216160 }, { "epoch": 0.9019577571746877, "grad_norm": 0.6713107068352069, "learning_rate": 2.1060677050761153e-06, "loss": 0.0155, "step": 216165 }, { "epoch": 0.901978619889678, "grad_norm": 0.6925574528298394, "learning_rate": 2.1060433482085306e-06, "loss": 0.0244, "step": 216170 }, { "epoch": 0.9019994826046682, "grad_norm": 0.30995614661294746, "learning_rate": 2.1060189921859946e-06, "loss": 0.0216, "step": 216175 }, { "epoch": 0.9020203453196585, "grad_norm": 0.34140918383772845, "learning_rate": 2.1059946370084582e-06, "loss": 0.0129, "step": 216180 }, { "epoch": 0.9020412080346488, "grad_norm": 0.5095128196844456, "learning_rate": 2.105970282675873e-06, "loss": 0.0235, "step": 216185 }, { "epoch": 0.9020620707496391, "grad_norm": 0.5517473343562596, "learning_rate": 2.1059459291881896e-06, "loss": 0.0291, "step": 216190 }, { "epoch": 0.9020829334646293, "grad_norm": 0.5329417158629325, "learning_rate": 2.10592157654536e-06, "loss": 0.0172, "step": 216195 }, { "epoch": 0.9021037961796197, "grad_norm": 0.5367992421903894, "learning_rate": 2.1058972247473343e-06, "loss": 0.019, "step": 216200 }, { "epoch": 0.9021246588946099, "grad_norm": 1.1784301221517293, "learning_rate": 2.1058728737940652e-06, "loss": 0.0297, "step": 216205 }, { "epoch": 0.9021455216096002, "grad_norm": 0.7356824144970211, "learning_rate": 2.105848523685502e-06, "loss": 0.0173, "step": 216210 }, { "epoch": 0.9021663843245905, "grad_norm": 0.5695420294101949, "learning_rate": 2.105824174421597e-06, "loss": 0.0222, "step": 216215 }, { "epoch": 0.9021872470395808, "grad_norm": 0.5549226087873408, "learning_rate": 2.105799826002301e-06, "loss": 0.0193, "step": 216220 }, { "epoch": 0.902208109754571, "grad_norm": 0.5902177915505368, "learning_rate": 2.1057754784275653e-06, "loss": 0.0269, "step": 216225 }, { "epoch": 0.9022289724695614, "grad_norm": 0.43778409001493307, "learning_rate": 2.105751131697341e-06, "loss": 0.0209, "step": 216230 }, { "epoch": 0.9022498351845516, "grad_norm": 0.9413825096513894, "learning_rate": 2.105726785811579e-06, "loss": 0.027, "step": 216235 }, { "epoch": 0.9022706978995418, "grad_norm": 0.4908291446985046, "learning_rate": 2.1057024407702316e-06, "loss": 0.0168, "step": 216240 }, { "epoch": 0.9022915606145321, "grad_norm": 0.30288452679858374, "learning_rate": 2.1056780965732488e-06, "loss": 0.0194, "step": 216245 }, { "epoch": 0.9023124233295224, "grad_norm": 0.6569450908834354, "learning_rate": 2.105653753220582e-06, "loss": 0.0151, "step": 216250 }, { "epoch": 0.9023332860445127, "grad_norm": 1.1371439086929274, "learning_rate": 2.105629410712183e-06, "loss": 0.0187, "step": 216255 }, { "epoch": 0.9023541487595029, "grad_norm": 0.5418641672370116, "learning_rate": 2.1056050690480025e-06, "loss": 0.0191, "step": 216260 }, { "epoch": 0.9023750114744933, "grad_norm": 0.49052013906904873, "learning_rate": 2.1055807282279914e-06, "loss": 0.0218, "step": 216265 }, { "epoch": 0.9023958741894835, "grad_norm": 0.5680109410886818, "learning_rate": 2.1055563882521016e-06, "loss": 0.0214, "step": 216270 }, { "epoch": 0.9024167369044738, "grad_norm": 0.786085176757946, "learning_rate": 2.1055320491202843e-06, "loss": 0.0152, "step": 216275 }, { "epoch": 0.9024375996194641, "grad_norm": 0.3925013254855728, "learning_rate": 2.1055077108324896e-06, "loss": 0.0169, "step": 216280 }, { "epoch": 0.9024584623344544, "grad_norm": 1.2983114421792643, "learning_rate": 2.10548337338867e-06, "loss": 0.0276, "step": 216285 }, { "epoch": 0.9024793250494446, "grad_norm": 0.8087150714591227, "learning_rate": 2.1054590367887766e-06, "loss": 0.0156, "step": 216290 }, { "epoch": 0.9025001877644349, "grad_norm": 0.7304373898865264, "learning_rate": 2.1054347010327605e-06, "loss": 0.0225, "step": 216295 }, { "epoch": 0.9025210504794252, "grad_norm": 0.591307569192977, "learning_rate": 2.105410366120572e-06, "loss": 0.0201, "step": 216300 }, { "epoch": 0.9025419131944155, "grad_norm": 0.5516039459653705, "learning_rate": 2.1053860320521636e-06, "loss": 0.02, "step": 216305 }, { "epoch": 0.9025627759094057, "grad_norm": 0.9173371464039651, "learning_rate": 2.1053616988274855e-06, "loss": 0.0258, "step": 216310 }, { "epoch": 0.9025836386243961, "grad_norm": 0.44410379196256705, "learning_rate": 2.1053373664464895e-06, "loss": 0.013, "step": 216315 }, { "epoch": 0.9026045013393863, "grad_norm": 0.8454514165613873, "learning_rate": 2.1053130349091273e-06, "loss": 0.0144, "step": 216320 }, { "epoch": 0.9026253640543765, "grad_norm": 0.9040852040114187, "learning_rate": 2.1052887042153493e-06, "loss": 0.0309, "step": 216325 }, { "epoch": 0.9026462267693669, "grad_norm": 0.4248171255642395, "learning_rate": 2.1052643743651073e-06, "loss": 0.0192, "step": 216330 }, { "epoch": 0.9026670894843571, "grad_norm": 0.9401256364308099, "learning_rate": 2.1052400453583518e-06, "loss": 0.0274, "step": 216335 }, { "epoch": 0.9026879521993474, "grad_norm": 0.7577750009442249, "learning_rate": 2.1052157171950356e-06, "loss": 0.0259, "step": 216340 }, { "epoch": 0.9027088149143377, "grad_norm": 0.7324467942221559, "learning_rate": 2.1051913898751085e-06, "loss": 0.0189, "step": 216345 }, { "epoch": 0.902729677629328, "grad_norm": 0.8761108680682274, "learning_rate": 2.105167063398522e-06, "loss": 0.0224, "step": 216350 }, { "epoch": 0.9027505403443182, "grad_norm": 0.30032602801579367, "learning_rate": 2.1051427377652277e-06, "loss": 0.0192, "step": 216355 }, { "epoch": 0.9027714030593085, "grad_norm": 0.7401545409395415, "learning_rate": 2.1051184129751774e-06, "loss": 0.0199, "step": 216360 }, { "epoch": 0.9027922657742988, "grad_norm": 0.8598292149793317, "learning_rate": 2.1050940890283214e-06, "loss": 0.0212, "step": 216365 }, { "epoch": 0.9028131284892891, "grad_norm": 0.5421815858736087, "learning_rate": 2.1050697659246113e-06, "loss": 0.0201, "step": 216370 }, { "epoch": 0.9028339912042793, "grad_norm": 0.6137482365165612, "learning_rate": 2.1050454436639984e-06, "loss": 0.0184, "step": 216375 }, { "epoch": 0.9028548539192697, "grad_norm": 0.5738015837056444, "learning_rate": 2.1050211222464344e-06, "loss": 0.0224, "step": 216380 }, { "epoch": 0.9028757166342599, "grad_norm": 0.485858118386803, "learning_rate": 2.1049968016718702e-06, "loss": 0.0162, "step": 216385 }, { "epoch": 0.9028965793492502, "grad_norm": 0.582057404575987, "learning_rate": 2.104972481940257e-06, "loss": 0.0286, "step": 216390 }, { "epoch": 0.9029174420642405, "grad_norm": 0.7764484396292192, "learning_rate": 2.1049481630515465e-06, "loss": 0.0226, "step": 216395 }, { "epoch": 0.9029383047792308, "grad_norm": 0.3149370946064522, "learning_rate": 2.10492384500569e-06, "loss": 0.0182, "step": 216400 }, { "epoch": 0.902959167494221, "grad_norm": 1.0342204584804358, "learning_rate": 2.104899527802638e-06, "loss": 0.0245, "step": 216405 }, { "epoch": 0.9029800302092114, "grad_norm": 0.4127383974803085, "learning_rate": 2.104875211442343e-06, "loss": 0.0171, "step": 216410 }, { "epoch": 0.9030008929242016, "grad_norm": 0.5542165359935404, "learning_rate": 2.1048508959247557e-06, "loss": 0.0228, "step": 216415 }, { "epoch": 0.9030217556391918, "grad_norm": 0.5605638878586067, "learning_rate": 2.1048265812498268e-06, "loss": 0.0234, "step": 216420 }, { "epoch": 0.9030426183541821, "grad_norm": 1.225819190434506, "learning_rate": 2.104802267417509e-06, "loss": 0.0257, "step": 216425 }, { "epoch": 0.9030634810691724, "grad_norm": 0.6260123230053514, "learning_rate": 2.104777954427753e-06, "loss": 0.0214, "step": 216430 }, { "epoch": 0.9030843437841627, "grad_norm": 0.5444454013682303, "learning_rate": 2.1047536422805097e-06, "loss": 0.0175, "step": 216435 }, { "epoch": 0.9031052064991529, "grad_norm": 0.5630808669509193, "learning_rate": 2.1047293309757305e-06, "loss": 0.0213, "step": 216440 }, { "epoch": 0.9031260692141433, "grad_norm": 1.2103211588475735, "learning_rate": 2.104705020513368e-06, "loss": 0.029, "step": 216445 }, { "epoch": 0.9031469319291335, "grad_norm": 1.0407155175863536, "learning_rate": 2.1046807108933716e-06, "loss": 0.0249, "step": 216450 }, { "epoch": 0.9031677946441238, "grad_norm": 0.44425062479162847, "learning_rate": 2.1046564021156945e-06, "loss": 0.023, "step": 216455 }, { "epoch": 0.9031886573591141, "grad_norm": 0.5071695816936065, "learning_rate": 2.104632094180287e-06, "loss": 0.0253, "step": 216460 }, { "epoch": 0.9032095200741044, "grad_norm": 0.5222003288851789, "learning_rate": 2.1046077870871005e-06, "loss": 0.0232, "step": 216465 }, { "epoch": 0.9032303827890946, "grad_norm": 0.5279759783049529, "learning_rate": 2.1045834808360867e-06, "loss": 0.0165, "step": 216470 }, { "epoch": 0.9032512455040849, "grad_norm": 1.0135207895625853, "learning_rate": 2.104559175427197e-06, "loss": 0.0223, "step": 216475 }, { "epoch": 0.9032721082190752, "grad_norm": 0.5381915521655338, "learning_rate": 2.1045348708603826e-06, "loss": 0.023, "step": 216480 }, { "epoch": 0.9032929709340655, "grad_norm": 1.24981292019545, "learning_rate": 2.1045105671355946e-06, "loss": 0.019, "step": 216485 }, { "epoch": 0.9033138336490557, "grad_norm": 0.6054267580660252, "learning_rate": 2.1044862642527848e-06, "loss": 0.0224, "step": 216490 }, { "epoch": 0.9033346963640461, "grad_norm": 0.8697068846107331, "learning_rate": 2.104461962211904e-06, "loss": 0.024, "step": 216495 }, { "epoch": 0.9033555590790363, "grad_norm": 0.35694813493997085, "learning_rate": 2.104437661012905e-06, "loss": 0.0134, "step": 216500 }, { "epoch": 0.9033764217940266, "grad_norm": 0.41666364742932865, "learning_rate": 2.1044133606557372e-06, "loss": 0.0189, "step": 216505 }, { "epoch": 0.9033972845090169, "grad_norm": 0.2815265453597937, "learning_rate": 2.104389061140354e-06, "loss": 0.0173, "step": 216510 }, { "epoch": 0.9034181472240072, "grad_norm": 0.29861178237867286, "learning_rate": 2.104364762466705e-06, "loss": 0.0191, "step": 216515 }, { "epoch": 0.9034390099389974, "grad_norm": 0.5612553527330869, "learning_rate": 2.104340464634743e-06, "loss": 0.0181, "step": 216520 }, { "epoch": 0.9034598726539878, "grad_norm": 0.6407965522048967, "learning_rate": 2.1043161676444182e-06, "loss": 0.0229, "step": 216525 }, { "epoch": 0.903480735368978, "grad_norm": 0.5425499344667681, "learning_rate": 2.1042918714956834e-06, "loss": 0.0213, "step": 216530 }, { "epoch": 0.9035015980839682, "grad_norm": 1.1590855437602137, "learning_rate": 2.104267576188489e-06, "loss": 0.0172, "step": 216535 }, { "epoch": 0.9035224607989585, "grad_norm": 0.9125025246166067, "learning_rate": 2.1042432817227867e-06, "loss": 0.0184, "step": 216540 }, { "epoch": 0.9035433235139488, "grad_norm": 0.7583923445384464, "learning_rate": 2.1042189880985277e-06, "loss": 0.0179, "step": 216545 }, { "epoch": 0.9035641862289391, "grad_norm": 0.5690537317702633, "learning_rate": 2.104194695315664e-06, "loss": 0.0167, "step": 216550 }, { "epoch": 0.9035850489439293, "grad_norm": 0.32682815504429263, "learning_rate": 2.1041704033741463e-06, "loss": 0.0157, "step": 216555 }, { "epoch": 0.9036059116589197, "grad_norm": 0.30124883057569707, "learning_rate": 2.1041461122739265e-06, "loss": 0.0217, "step": 216560 }, { "epoch": 0.9036267743739099, "grad_norm": 0.517478084498111, "learning_rate": 2.1041218220149557e-06, "loss": 0.0223, "step": 216565 }, { "epoch": 0.9036476370889002, "grad_norm": 0.4630339388989025, "learning_rate": 2.1040975325971856e-06, "loss": 0.022, "step": 216570 }, { "epoch": 0.9036684998038905, "grad_norm": 0.6268890816278964, "learning_rate": 2.1040732440205683e-06, "loss": 0.0234, "step": 216575 }, { "epoch": 0.9036893625188808, "grad_norm": 0.6740445512446166, "learning_rate": 2.1040489562850543e-06, "loss": 0.0217, "step": 216580 }, { "epoch": 0.903710225233871, "grad_norm": 0.7029672147599463, "learning_rate": 2.1040246693905954e-06, "loss": 0.0215, "step": 216585 }, { "epoch": 0.9037310879488614, "grad_norm": 0.7079633134643643, "learning_rate": 2.1040003833371427e-06, "loss": 0.0184, "step": 216590 }, { "epoch": 0.9037519506638516, "grad_norm": 0.6733307236557664, "learning_rate": 2.1039760981246485e-06, "loss": 0.0249, "step": 216595 }, { "epoch": 0.9037728133788419, "grad_norm": 0.5079288706680505, "learning_rate": 2.1039518137530632e-06, "loss": 0.0257, "step": 216600 }, { "epoch": 0.9037936760938321, "grad_norm": 1.3479873396934885, "learning_rate": 2.1039275302223394e-06, "loss": 0.0248, "step": 216605 }, { "epoch": 0.9038145388088225, "grad_norm": 0.6396719109177358, "learning_rate": 2.1039032475324275e-06, "loss": 0.0202, "step": 216610 }, { "epoch": 0.9038354015238127, "grad_norm": 0.45866507236466425, "learning_rate": 2.1038789656832796e-06, "loss": 0.0195, "step": 216615 }, { "epoch": 0.9038562642388029, "grad_norm": 0.9020604992013054, "learning_rate": 2.103854684674847e-06, "loss": 0.0177, "step": 216620 }, { "epoch": 0.9038771269537933, "grad_norm": 0.5359934324344549, "learning_rate": 2.1038304045070816e-06, "loss": 0.0196, "step": 216625 }, { "epoch": 0.9038979896687835, "grad_norm": 0.37581283381095903, "learning_rate": 2.103806125179934e-06, "loss": 0.0162, "step": 216630 }, { "epoch": 0.9039188523837738, "grad_norm": 0.9442610709124231, "learning_rate": 2.1037818466933567e-06, "loss": 0.0358, "step": 216635 }, { "epoch": 0.9039397150987641, "grad_norm": 0.6705774419592188, "learning_rate": 2.1037575690473007e-06, "loss": 0.0155, "step": 216640 }, { "epoch": 0.9039605778137544, "grad_norm": 0.7640427506430183, "learning_rate": 2.103733292241717e-06, "loss": 0.0226, "step": 216645 }, { "epoch": 0.9039814405287446, "grad_norm": 0.4903729702365666, "learning_rate": 2.1037090162765583e-06, "loss": 0.0243, "step": 216650 }, { "epoch": 0.9040023032437349, "grad_norm": 0.8054788182329095, "learning_rate": 2.103684741151775e-06, "loss": 0.0161, "step": 216655 }, { "epoch": 0.9040231659587252, "grad_norm": 0.40046830078293494, "learning_rate": 2.1036604668673188e-06, "loss": 0.0179, "step": 216660 }, { "epoch": 0.9040440286737155, "grad_norm": 0.24888739598044837, "learning_rate": 2.103636193423142e-06, "loss": 0.0215, "step": 216665 }, { "epoch": 0.9040648913887057, "grad_norm": 0.292198737402186, "learning_rate": 2.1036119208191953e-06, "loss": 0.0141, "step": 216670 }, { "epoch": 0.9040857541036961, "grad_norm": 0.470518421973263, "learning_rate": 2.103587649055431e-06, "loss": 0.017, "step": 216675 }, { "epoch": 0.9041066168186863, "grad_norm": 0.4145479320942572, "learning_rate": 2.1035633781318e-06, "loss": 0.0213, "step": 216680 }, { "epoch": 0.9041274795336766, "grad_norm": 0.7814249108639114, "learning_rate": 2.1035391080482536e-06, "loss": 0.016, "step": 216685 }, { "epoch": 0.9041483422486669, "grad_norm": 0.5468595647061203, "learning_rate": 2.1035148388047445e-06, "loss": 0.0191, "step": 216690 }, { "epoch": 0.9041692049636572, "grad_norm": 0.31588681802574115, "learning_rate": 2.1034905704012223e-06, "loss": 0.0226, "step": 216695 }, { "epoch": 0.9041900676786474, "grad_norm": 0.524877724083772, "learning_rate": 2.103466302837641e-06, "loss": 0.0188, "step": 216700 }, { "epoch": 0.9042109303936378, "grad_norm": 0.6285243691983041, "learning_rate": 2.1034420361139497e-06, "loss": 0.0175, "step": 216705 }, { "epoch": 0.904231793108628, "grad_norm": 0.8625241768135351, "learning_rate": 2.103417770230102e-06, "loss": 0.0269, "step": 216710 }, { "epoch": 0.9042526558236182, "grad_norm": 0.3414469225339306, "learning_rate": 2.1033935051860484e-06, "loss": 0.0135, "step": 216715 }, { "epoch": 0.9042735185386085, "grad_norm": 0.7995218921300554, "learning_rate": 2.1033692409817403e-06, "loss": 0.0276, "step": 216720 }, { "epoch": 0.9042943812535988, "grad_norm": 0.7345369323941451, "learning_rate": 2.10334497761713e-06, "loss": 0.0249, "step": 216725 }, { "epoch": 0.9043152439685891, "grad_norm": 0.5252278309682569, "learning_rate": 2.1033207150921687e-06, "loss": 0.0157, "step": 216730 }, { "epoch": 0.9043361066835793, "grad_norm": 0.5676720753048763, "learning_rate": 2.1032964534068075e-06, "loss": 0.0173, "step": 216735 }, { "epoch": 0.9043569693985697, "grad_norm": 0.8221918738544557, "learning_rate": 2.1032721925609986e-06, "loss": 0.0204, "step": 216740 }, { "epoch": 0.9043778321135599, "grad_norm": 0.7311423712441082, "learning_rate": 2.1032479325546938e-06, "loss": 0.0157, "step": 216745 }, { "epoch": 0.9043986948285502, "grad_norm": 0.6182520541101557, "learning_rate": 2.1032236733878444e-06, "loss": 0.019, "step": 216750 }, { "epoch": 0.9044195575435405, "grad_norm": 0.5985287093536866, "learning_rate": 2.1031994150604012e-06, "loss": 0.0244, "step": 216755 }, { "epoch": 0.9044404202585308, "grad_norm": 0.7017345438538573, "learning_rate": 2.1031751575723165e-06, "loss": 0.0204, "step": 216760 }, { "epoch": 0.904461282973521, "grad_norm": 0.4618826244995746, "learning_rate": 2.1031509009235423e-06, "loss": 0.0196, "step": 216765 }, { "epoch": 0.9044821456885114, "grad_norm": 0.40653872789938517, "learning_rate": 2.10312664511403e-06, "loss": 0.0257, "step": 216770 }, { "epoch": 0.9045030084035016, "grad_norm": 0.851857132381913, "learning_rate": 2.1031023901437303e-06, "loss": 0.0202, "step": 216775 }, { "epoch": 0.9045238711184919, "grad_norm": 0.7010455937477297, "learning_rate": 2.1030781360125955e-06, "loss": 0.0193, "step": 216780 }, { "epoch": 0.9045447338334821, "grad_norm": 0.5182865703211609, "learning_rate": 2.1030538827205777e-06, "loss": 0.0169, "step": 216785 }, { "epoch": 0.9045655965484725, "grad_norm": 0.6177656492401169, "learning_rate": 2.1030296302676283e-06, "loss": 0.0219, "step": 216790 }, { "epoch": 0.9045864592634627, "grad_norm": 0.3444205820203595, "learning_rate": 2.103005378653698e-06, "loss": 0.0226, "step": 216795 }, { "epoch": 0.904607321978453, "grad_norm": 0.600504138235209, "learning_rate": 2.1029811278787394e-06, "loss": 0.0245, "step": 216800 }, { "epoch": 0.9046281846934433, "grad_norm": 0.750839458606873, "learning_rate": 2.102956877942704e-06, "loss": 0.0153, "step": 216805 }, { "epoch": 0.9046490474084335, "grad_norm": 0.5845440670456847, "learning_rate": 2.102932628845542e-06, "loss": 0.0246, "step": 216810 }, { "epoch": 0.9046699101234238, "grad_norm": 0.5742075311838352, "learning_rate": 2.102908380587208e-06, "loss": 0.0195, "step": 216815 }, { "epoch": 0.9046907728384141, "grad_norm": 0.5836587834409342, "learning_rate": 2.102884133167651e-06, "loss": 0.0179, "step": 216820 }, { "epoch": 0.9047116355534044, "grad_norm": 0.3023586808001995, "learning_rate": 2.1028598865868233e-06, "loss": 0.0195, "step": 216825 }, { "epoch": 0.9047324982683946, "grad_norm": 1.9053301428864213, "learning_rate": 2.102835640844677e-06, "loss": 0.0384, "step": 216830 }, { "epoch": 0.9047533609833849, "grad_norm": 0.39048253514132286, "learning_rate": 2.1028113959411637e-06, "loss": 0.0173, "step": 216835 }, { "epoch": 0.9047742236983752, "grad_norm": 0.22657030013519175, "learning_rate": 2.102787151876235e-06, "loss": 0.0247, "step": 216840 }, { "epoch": 0.9047950864133655, "grad_norm": 0.5603000537417199, "learning_rate": 2.1027629086498417e-06, "loss": 0.0153, "step": 216845 }, { "epoch": 0.9048159491283557, "grad_norm": 0.8796158328217303, "learning_rate": 2.102738666261937e-06, "loss": 0.0262, "step": 216850 }, { "epoch": 0.9048368118433461, "grad_norm": 0.7833897279925668, "learning_rate": 2.102714424712472e-06, "loss": 0.0177, "step": 216855 }, { "epoch": 0.9048576745583363, "grad_norm": 0.37941062249151336, "learning_rate": 2.1026901840013974e-06, "loss": 0.0228, "step": 216860 }, { "epoch": 0.9048785372733266, "grad_norm": 1.43176775209046, "learning_rate": 2.1026659441286663e-06, "loss": 0.0178, "step": 216865 }, { "epoch": 0.9048993999883169, "grad_norm": 0.5860359283931982, "learning_rate": 2.10264170509423e-06, "loss": 0.0261, "step": 216870 }, { "epoch": 0.9049202627033072, "grad_norm": 0.4346009043208745, "learning_rate": 2.1026174668980388e-06, "loss": 0.0201, "step": 216875 }, { "epoch": 0.9049411254182974, "grad_norm": 0.3244665957259774, "learning_rate": 2.102593229540046e-06, "loss": 0.0121, "step": 216880 }, { "epoch": 0.9049619881332878, "grad_norm": 0.6108066098380712, "learning_rate": 2.1025689930202026e-06, "loss": 0.0181, "step": 216885 }, { "epoch": 0.904982850848278, "grad_norm": 0.6811823231522803, "learning_rate": 2.102544757338461e-06, "loss": 0.0251, "step": 216890 }, { "epoch": 0.9050037135632683, "grad_norm": 0.6134096777862704, "learning_rate": 2.102520522494772e-06, "loss": 0.0182, "step": 216895 }, { "epoch": 0.9050245762782585, "grad_norm": 0.7751281120660307, "learning_rate": 2.1024962884890873e-06, "loss": 0.0197, "step": 216900 }, { "epoch": 0.9050454389932489, "grad_norm": 0.8880521074729351, "learning_rate": 2.1024720553213594e-06, "loss": 0.0205, "step": 216905 }, { "epoch": 0.9050663017082391, "grad_norm": 0.5531053601276458, "learning_rate": 2.1024478229915398e-06, "loss": 0.0204, "step": 216910 }, { "epoch": 0.9050871644232293, "grad_norm": 0.7890835030623798, "learning_rate": 2.10242359149958e-06, "loss": 0.0242, "step": 216915 }, { "epoch": 0.9051080271382197, "grad_norm": 0.42005131993522743, "learning_rate": 2.102399360845431e-06, "loss": 0.0268, "step": 216920 }, { "epoch": 0.9051288898532099, "grad_norm": 1.0136113044226813, "learning_rate": 2.1023751310290457e-06, "loss": 0.0231, "step": 216925 }, { "epoch": 0.9051497525682002, "grad_norm": 0.7771593714232866, "learning_rate": 2.1023509020503756e-06, "loss": 0.0245, "step": 216930 }, { "epoch": 0.9051706152831905, "grad_norm": 1.1096531454838412, "learning_rate": 2.102326673909372e-06, "loss": 0.0304, "step": 216935 }, { "epoch": 0.9051914779981808, "grad_norm": 0.5121370578834777, "learning_rate": 2.102302446605987e-06, "loss": 0.027, "step": 216940 }, { "epoch": 0.905212340713171, "grad_norm": 1.1299705883154638, "learning_rate": 2.102278220140172e-06, "loss": 0.0179, "step": 216945 }, { "epoch": 0.9052332034281614, "grad_norm": 0.7641281497072698, "learning_rate": 2.1022539945118785e-06, "loss": 0.017, "step": 216950 }, { "epoch": 0.9052540661431516, "grad_norm": 0.4987509645045842, "learning_rate": 2.1022297697210595e-06, "loss": 0.0195, "step": 216955 }, { "epoch": 0.9052749288581419, "grad_norm": 0.45021780296604197, "learning_rate": 2.1022055457676656e-06, "loss": 0.0256, "step": 216960 }, { "epoch": 0.9052957915731321, "grad_norm": 0.8231336842781946, "learning_rate": 2.1021813226516485e-06, "loss": 0.0172, "step": 216965 }, { "epoch": 0.9053166542881225, "grad_norm": 0.6934451657369998, "learning_rate": 2.1021571003729602e-06, "loss": 0.0234, "step": 216970 }, { "epoch": 0.9053375170031127, "grad_norm": 0.6822008053846538, "learning_rate": 2.102132878931553e-06, "loss": 0.0193, "step": 216975 }, { "epoch": 0.905358379718103, "grad_norm": 0.4111553949541697, "learning_rate": 2.1021086583273783e-06, "loss": 0.0222, "step": 216980 }, { "epoch": 0.9053792424330933, "grad_norm": 0.47538460685901757, "learning_rate": 2.1020844385603875e-06, "loss": 0.0158, "step": 216985 }, { "epoch": 0.9054001051480836, "grad_norm": 0.4170846874228909, "learning_rate": 2.102060219630533e-06, "loss": 0.018, "step": 216990 }, { "epoch": 0.9054209678630738, "grad_norm": 0.6507996402187086, "learning_rate": 2.102036001537766e-06, "loss": 0.0177, "step": 216995 }, { "epoch": 0.9054418305780642, "grad_norm": 0.6132465990217869, "learning_rate": 2.1020117842820386e-06, "loss": 0.0247, "step": 217000 }, { "epoch": 0.9054626932930544, "grad_norm": 0.4666943359858914, "learning_rate": 2.101987567863302e-06, "loss": 0.0281, "step": 217005 }, { "epoch": 0.9054835560080446, "grad_norm": 0.5134289945620665, "learning_rate": 2.1019633522815097e-06, "loss": 0.0229, "step": 217010 }, { "epoch": 0.9055044187230349, "grad_norm": 0.39859912337200964, "learning_rate": 2.101939137536611e-06, "loss": 0.0225, "step": 217015 }, { "epoch": 0.9055252814380252, "grad_norm": 0.42957820178130435, "learning_rate": 2.10191492362856e-06, "loss": 0.0226, "step": 217020 }, { "epoch": 0.9055461441530155, "grad_norm": 0.6287174397836692, "learning_rate": 2.101890710557307e-06, "loss": 0.0155, "step": 217025 }, { "epoch": 0.9055670068680057, "grad_norm": 0.5558311969055431, "learning_rate": 2.1018664983228046e-06, "loss": 0.0221, "step": 217030 }, { "epoch": 0.9055878695829961, "grad_norm": 0.37492929104049066, "learning_rate": 2.1018422869250042e-06, "loss": 0.0206, "step": 217035 }, { "epoch": 0.9056087322979863, "grad_norm": 0.7139782852057931, "learning_rate": 2.1018180763638573e-06, "loss": 0.0258, "step": 217040 }, { "epoch": 0.9056295950129766, "grad_norm": 0.4521685220149091, "learning_rate": 2.101793866639317e-06, "loss": 0.0112, "step": 217045 }, { "epoch": 0.9056504577279669, "grad_norm": 0.8194295830164325, "learning_rate": 2.1017696577513333e-06, "loss": 0.0218, "step": 217050 }, { "epoch": 0.9056713204429572, "grad_norm": 0.604493594259501, "learning_rate": 2.1017454496998594e-06, "loss": 0.0208, "step": 217055 }, { "epoch": 0.9056921831579474, "grad_norm": 0.803433194281668, "learning_rate": 2.1017212424848466e-06, "loss": 0.0294, "step": 217060 }, { "epoch": 0.9057130458729378, "grad_norm": 0.559201035512417, "learning_rate": 2.101697036106247e-06, "loss": 0.0236, "step": 217065 }, { "epoch": 0.905733908587928, "grad_norm": 1.0348760296645267, "learning_rate": 2.101672830564012e-06, "loss": 0.0222, "step": 217070 }, { "epoch": 0.9057547713029183, "grad_norm": 0.6168403170566382, "learning_rate": 2.1016486258580944e-06, "loss": 0.0174, "step": 217075 }, { "epoch": 0.9057756340179085, "grad_norm": 0.6014451919018365, "learning_rate": 2.101624421988445e-06, "loss": 0.0232, "step": 217080 }, { "epoch": 0.9057964967328989, "grad_norm": 1.3209242314358762, "learning_rate": 2.1016002189550157e-06, "loss": 0.0285, "step": 217085 }, { "epoch": 0.9058173594478891, "grad_norm": 1.2274431884445933, "learning_rate": 2.101576016757759e-06, "loss": 0.0173, "step": 217090 }, { "epoch": 0.9058382221628793, "grad_norm": 0.5507369749824375, "learning_rate": 2.101551815396626e-06, "loss": 0.0189, "step": 217095 }, { "epoch": 0.9058590848778697, "grad_norm": 0.8537594177948837, "learning_rate": 2.1015276148715687e-06, "loss": 0.0171, "step": 217100 }, { "epoch": 0.90587994759286, "grad_norm": 0.3207920030260717, "learning_rate": 2.10150341518254e-06, "loss": 0.0325, "step": 217105 }, { "epoch": 0.9059008103078502, "grad_norm": 0.7756087975105367, "learning_rate": 2.1014792163294904e-06, "loss": 0.017, "step": 217110 }, { "epoch": 0.9059216730228405, "grad_norm": 0.48818453570265785, "learning_rate": 2.101455018312373e-06, "loss": 0.0186, "step": 217115 }, { "epoch": 0.9059425357378308, "grad_norm": 0.6738385210701873, "learning_rate": 2.1014308211311387e-06, "loss": 0.0239, "step": 217120 }, { "epoch": 0.905963398452821, "grad_norm": 0.7613256051141779, "learning_rate": 2.1014066247857393e-06, "loss": 0.0235, "step": 217125 }, { "epoch": 0.9059842611678114, "grad_norm": 1.2157517008763195, "learning_rate": 2.1013824292761272e-06, "loss": 0.0268, "step": 217130 }, { "epoch": 0.9060051238828016, "grad_norm": 0.14010971803800468, "learning_rate": 2.101358234602254e-06, "loss": 0.0115, "step": 217135 }, { "epoch": 0.9060259865977919, "grad_norm": 0.588783479268992, "learning_rate": 2.1013340407640723e-06, "loss": 0.0151, "step": 217140 }, { "epoch": 0.9060468493127821, "grad_norm": 0.5595253423049967, "learning_rate": 2.1013098477615334e-06, "loss": 0.0203, "step": 217145 }, { "epoch": 0.9060677120277725, "grad_norm": 0.5891326532693444, "learning_rate": 2.1012856555945894e-06, "loss": 0.0227, "step": 217150 }, { "epoch": 0.9060885747427627, "grad_norm": 0.6151714726960172, "learning_rate": 2.101261464263192e-06, "loss": 0.0204, "step": 217155 }, { "epoch": 0.906109437457753, "grad_norm": 0.7417845524554767, "learning_rate": 2.1012372737672923e-06, "loss": 0.0254, "step": 217160 }, { "epoch": 0.9061303001727433, "grad_norm": 1.2190453648201405, "learning_rate": 2.101213084106844e-06, "loss": 0.0237, "step": 217165 }, { "epoch": 0.9061511628877336, "grad_norm": 0.4915530194001073, "learning_rate": 2.1011888952817976e-06, "loss": 0.0224, "step": 217170 }, { "epoch": 0.9061720256027238, "grad_norm": 0.7688213277794597, "learning_rate": 2.1011647072921055e-06, "loss": 0.0203, "step": 217175 }, { "epoch": 0.9061928883177142, "grad_norm": 0.4823526911299943, "learning_rate": 2.10114052013772e-06, "loss": 0.0208, "step": 217180 }, { "epoch": 0.9062137510327044, "grad_norm": 0.3825720108465436, "learning_rate": 2.1011163338185925e-06, "loss": 0.0189, "step": 217185 }, { "epoch": 0.9062346137476947, "grad_norm": 0.8589229697162393, "learning_rate": 2.1010921483346746e-06, "loss": 0.0236, "step": 217190 }, { "epoch": 0.9062554764626849, "grad_norm": 0.6798183620537771, "learning_rate": 2.1010679636859193e-06, "loss": 0.0169, "step": 217195 }, { "epoch": 0.9062763391776752, "grad_norm": 0.7625907169900641, "learning_rate": 2.1010437798722775e-06, "loss": 0.0212, "step": 217200 }, { "epoch": 0.9062972018926655, "grad_norm": 0.4220443144534229, "learning_rate": 2.1010195968937013e-06, "loss": 0.0243, "step": 217205 }, { "epoch": 0.9063180646076557, "grad_norm": 0.3347161143340057, "learning_rate": 2.1009954147501434e-06, "loss": 0.0187, "step": 217210 }, { "epoch": 0.9063389273226461, "grad_norm": 0.44246497982244615, "learning_rate": 2.1009712334415553e-06, "loss": 0.0181, "step": 217215 }, { "epoch": 0.9063597900376363, "grad_norm": 1.6866422540207684, "learning_rate": 2.1009470529678885e-06, "loss": 0.0295, "step": 217220 }, { "epoch": 0.9063806527526266, "grad_norm": 0.4220072220724823, "learning_rate": 2.100922873329096e-06, "loss": 0.0192, "step": 217225 }, { "epoch": 0.9064015154676169, "grad_norm": 0.7090830045117067, "learning_rate": 2.100898694525129e-06, "loss": 0.0178, "step": 217230 }, { "epoch": 0.9064223781826072, "grad_norm": 0.8848926552487161, "learning_rate": 2.100874516555939e-06, "loss": 0.0228, "step": 217235 }, { "epoch": 0.9064432408975974, "grad_norm": 0.7739285454233766, "learning_rate": 2.100850339421479e-06, "loss": 0.0241, "step": 217240 }, { "epoch": 0.9064641036125878, "grad_norm": 0.5522608528607101, "learning_rate": 2.1008261631217004e-06, "loss": 0.0201, "step": 217245 }, { "epoch": 0.906484966327578, "grad_norm": 2.1026008153720013, "learning_rate": 2.100801987656555e-06, "loss": 0.0207, "step": 217250 }, { "epoch": 0.9065058290425683, "grad_norm": 0.983276047891683, "learning_rate": 2.1007778130259956e-06, "loss": 0.0201, "step": 217255 }, { "epoch": 0.9065266917575585, "grad_norm": 0.6822567187709225, "learning_rate": 2.1007536392299736e-06, "loss": 0.0145, "step": 217260 }, { "epoch": 0.9065475544725489, "grad_norm": 0.660496079039398, "learning_rate": 2.1007294662684404e-06, "loss": 0.018, "step": 217265 }, { "epoch": 0.9065684171875391, "grad_norm": 0.5201725731343069, "learning_rate": 2.1007052941413495e-06, "loss": 0.0318, "step": 217270 }, { "epoch": 0.9065892799025294, "grad_norm": 0.19944945096580155, "learning_rate": 2.100681122848651e-06, "loss": 0.0267, "step": 217275 }, { "epoch": 0.9066101426175197, "grad_norm": 0.8854662879578262, "learning_rate": 2.100656952390299e-06, "loss": 0.0198, "step": 217280 }, { "epoch": 0.90663100533251, "grad_norm": 0.9160311666115798, "learning_rate": 2.1006327827662438e-06, "loss": 0.0211, "step": 217285 }, { "epoch": 0.9066518680475002, "grad_norm": 0.7475319803085729, "learning_rate": 2.100608613976438e-06, "loss": 0.0207, "step": 217290 }, { "epoch": 0.9066727307624906, "grad_norm": 0.7146094515635099, "learning_rate": 2.100584446020834e-06, "loss": 0.0236, "step": 217295 }, { "epoch": 0.9066935934774808, "grad_norm": 0.7469551630826319, "learning_rate": 2.1005602788993834e-06, "loss": 0.0186, "step": 217300 }, { "epoch": 0.906714456192471, "grad_norm": 0.621737469303863, "learning_rate": 2.100536112612038e-06, "loss": 0.0221, "step": 217305 }, { "epoch": 0.9067353189074614, "grad_norm": 1.0773374860575127, "learning_rate": 2.10051194715875e-06, "loss": 0.0139, "step": 217310 }, { "epoch": 0.9067561816224516, "grad_norm": 0.424601429760693, "learning_rate": 2.100487782539472e-06, "loss": 0.0219, "step": 217315 }, { "epoch": 0.9067770443374419, "grad_norm": 1.1254368357819813, "learning_rate": 2.100463618754155e-06, "loss": 0.0194, "step": 217320 }, { "epoch": 0.9067979070524321, "grad_norm": 0.3642035625557356, "learning_rate": 2.1004394558027517e-06, "loss": 0.0226, "step": 217325 }, { "epoch": 0.9068187697674225, "grad_norm": 0.3744080158040722, "learning_rate": 2.1004152936852142e-06, "loss": 0.0174, "step": 217330 }, { "epoch": 0.9068396324824127, "grad_norm": 0.4395417797725534, "learning_rate": 2.1003911324014943e-06, "loss": 0.0199, "step": 217335 }, { "epoch": 0.906860495197403, "grad_norm": 0.5817736571699971, "learning_rate": 2.100366971951544e-06, "loss": 0.0176, "step": 217340 }, { "epoch": 0.9068813579123933, "grad_norm": 0.7258781868564632, "learning_rate": 2.1003428123353153e-06, "loss": 0.0218, "step": 217345 }, { "epoch": 0.9069022206273836, "grad_norm": 1.04505842721802, "learning_rate": 2.100318653552761e-06, "loss": 0.0219, "step": 217350 }, { "epoch": 0.9069230833423738, "grad_norm": 0.8213855110249668, "learning_rate": 2.1002944956038317e-06, "loss": 0.0204, "step": 217355 }, { "epoch": 0.9069439460573642, "grad_norm": 0.48724537249027383, "learning_rate": 2.1002703384884807e-06, "loss": 0.02, "step": 217360 }, { "epoch": 0.9069648087723544, "grad_norm": 0.29994495204481936, "learning_rate": 2.1002461822066598e-06, "loss": 0.0221, "step": 217365 }, { "epoch": 0.9069856714873447, "grad_norm": 0.35396371755136297, "learning_rate": 2.100222026758321e-06, "loss": 0.0186, "step": 217370 }, { "epoch": 0.9070065342023349, "grad_norm": 0.8716124871824477, "learning_rate": 2.100197872143416e-06, "loss": 0.0253, "step": 217375 }, { "epoch": 0.9070273969173253, "grad_norm": 0.37393528030858364, "learning_rate": 2.1001737183618973e-06, "loss": 0.0176, "step": 217380 }, { "epoch": 0.9070482596323155, "grad_norm": 1.2267997194176583, "learning_rate": 2.100149565413717e-06, "loss": 0.0234, "step": 217385 }, { "epoch": 0.9070691223473057, "grad_norm": 0.3155205937709405, "learning_rate": 2.1001254132988266e-06, "loss": 0.0157, "step": 217390 }, { "epoch": 0.9070899850622961, "grad_norm": 0.512438127134439, "learning_rate": 2.100101262017179e-06, "loss": 0.0166, "step": 217395 }, { "epoch": 0.9071108477772863, "grad_norm": 1.0860421945790566, "learning_rate": 2.1000771115687256e-06, "loss": 0.012, "step": 217400 }, { "epoch": 0.9071317104922766, "grad_norm": 0.6241809639759941, "learning_rate": 2.1000529619534194e-06, "loss": 0.0196, "step": 217405 }, { "epoch": 0.9071525732072669, "grad_norm": 0.8898945342810101, "learning_rate": 2.1000288131712112e-06, "loss": 0.0153, "step": 217410 }, { "epoch": 0.9071734359222572, "grad_norm": 0.3913779737000993, "learning_rate": 2.100004665222054e-06, "loss": 0.0206, "step": 217415 }, { "epoch": 0.9071942986372474, "grad_norm": 0.5076741068949235, "learning_rate": 2.0999805181059e-06, "loss": 0.017, "step": 217420 }, { "epoch": 0.9072151613522378, "grad_norm": 0.3393674390518183, "learning_rate": 2.0999563718227007e-06, "loss": 0.0231, "step": 217425 }, { "epoch": 0.907236024067228, "grad_norm": 0.761508378925051, "learning_rate": 2.099932226372409e-06, "loss": 0.0187, "step": 217430 }, { "epoch": 0.9072568867822183, "grad_norm": 0.554687554654914, "learning_rate": 2.0999080817549765e-06, "loss": 0.0223, "step": 217435 }, { "epoch": 0.9072777494972085, "grad_norm": 0.7164639980540646, "learning_rate": 2.0998839379703547e-06, "loss": 0.0225, "step": 217440 }, { "epoch": 0.9072986122121989, "grad_norm": 0.6993680617549121, "learning_rate": 2.0998597950184966e-06, "loss": 0.0182, "step": 217445 }, { "epoch": 0.9073194749271891, "grad_norm": 0.8339894474182148, "learning_rate": 2.0998356528993545e-06, "loss": 0.0241, "step": 217450 }, { "epoch": 0.9073403376421794, "grad_norm": 0.7669827981479964, "learning_rate": 2.0998115116128797e-06, "loss": 0.0183, "step": 217455 }, { "epoch": 0.9073612003571697, "grad_norm": 0.6796025500279957, "learning_rate": 2.0997873711590256e-06, "loss": 0.0241, "step": 217460 }, { "epoch": 0.90738206307216, "grad_norm": 0.6494632071356953, "learning_rate": 2.0997632315377426e-06, "loss": 0.0178, "step": 217465 }, { "epoch": 0.9074029257871502, "grad_norm": 0.8298923121210141, "learning_rate": 2.0997390927489846e-06, "loss": 0.0226, "step": 217470 }, { "epoch": 0.9074237885021406, "grad_norm": 0.37028315952623425, "learning_rate": 2.0997149547927024e-06, "loss": 0.0168, "step": 217475 }, { "epoch": 0.9074446512171308, "grad_norm": 0.969899527170564, "learning_rate": 2.0996908176688486e-06, "loss": 0.0231, "step": 217480 }, { "epoch": 0.907465513932121, "grad_norm": 0.6722016122928688, "learning_rate": 2.0996666813773758e-06, "loss": 0.0232, "step": 217485 }, { "epoch": 0.9074863766471114, "grad_norm": 0.49984618862572366, "learning_rate": 2.0996425459182352e-06, "loss": 0.0216, "step": 217490 }, { "epoch": 0.9075072393621016, "grad_norm": 0.81859449970994, "learning_rate": 2.0996184112913803e-06, "loss": 0.0195, "step": 217495 }, { "epoch": 0.9075281020770919, "grad_norm": 0.355565106208468, "learning_rate": 2.099594277496762e-06, "loss": 0.0106, "step": 217500 }, { "epoch": 0.9075489647920821, "grad_norm": 0.6078470089344705, "learning_rate": 2.0995701445343333e-06, "loss": 0.0185, "step": 217505 }, { "epoch": 0.9075698275070725, "grad_norm": 0.531827619246049, "learning_rate": 2.0995460124040457e-06, "loss": 0.0179, "step": 217510 }, { "epoch": 0.9075906902220627, "grad_norm": 0.4685897600862445, "learning_rate": 2.0995218811058525e-06, "loss": 0.0196, "step": 217515 }, { "epoch": 0.907611552937053, "grad_norm": 0.5915069345831275, "learning_rate": 2.099497750639704e-06, "loss": 0.0184, "step": 217520 }, { "epoch": 0.9076324156520433, "grad_norm": 0.5087412055476211, "learning_rate": 2.0994736210055543e-06, "loss": 0.0282, "step": 217525 }, { "epoch": 0.9076532783670336, "grad_norm": 0.6192861280094956, "learning_rate": 2.099449492203355e-06, "loss": 0.0222, "step": 217530 }, { "epoch": 0.9076741410820238, "grad_norm": 0.6173468609545719, "learning_rate": 2.099425364233057e-06, "loss": 0.0206, "step": 217535 }, { "epoch": 0.9076950037970142, "grad_norm": 0.8277683857038269, "learning_rate": 2.099401237094615e-06, "loss": 0.0265, "step": 217540 }, { "epoch": 0.9077158665120044, "grad_norm": 0.9260780493673196, "learning_rate": 2.099377110787979e-06, "loss": 0.0209, "step": 217545 }, { "epoch": 0.9077367292269947, "grad_norm": 0.6063806634740806, "learning_rate": 2.099352985313102e-06, "loss": 0.0193, "step": 217550 }, { "epoch": 0.9077575919419849, "grad_norm": 0.617865664072663, "learning_rate": 2.099328860669936e-06, "loss": 0.0173, "step": 217555 }, { "epoch": 0.9077784546569753, "grad_norm": 0.4756325170412446, "learning_rate": 2.099304736858434e-06, "loss": 0.0212, "step": 217560 }, { "epoch": 0.9077993173719655, "grad_norm": 0.3146237194406504, "learning_rate": 2.099280613878548e-06, "loss": 0.0193, "step": 217565 }, { "epoch": 0.9078201800869558, "grad_norm": 0.7222032857846682, "learning_rate": 2.099256491730229e-06, "loss": 0.0164, "step": 217570 }, { "epoch": 0.9078410428019461, "grad_norm": 0.5692704725044865, "learning_rate": 2.0992323704134305e-06, "loss": 0.0254, "step": 217575 }, { "epoch": 0.9078619055169364, "grad_norm": 0.4396372166882185, "learning_rate": 2.099208249928104e-06, "loss": 0.0172, "step": 217580 }, { "epoch": 0.9078827682319266, "grad_norm": 0.4497098361817066, "learning_rate": 2.0991841302742023e-06, "loss": 0.0177, "step": 217585 }, { "epoch": 0.907903630946917, "grad_norm": 0.28486403386685927, "learning_rate": 2.0991600114516774e-06, "loss": 0.0202, "step": 217590 }, { "epoch": 0.9079244936619072, "grad_norm": 0.7955246407537323, "learning_rate": 2.0991358934604814e-06, "loss": 0.0156, "step": 217595 }, { "epoch": 0.9079453563768974, "grad_norm": 0.526768740242979, "learning_rate": 2.0991117763005666e-06, "loss": 0.019, "step": 217600 }, { "epoch": 0.9079662190918878, "grad_norm": 0.32691927377637403, "learning_rate": 2.0990876599718854e-06, "loss": 0.0174, "step": 217605 }, { "epoch": 0.907987081806878, "grad_norm": 0.3295527491067477, "learning_rate": 2.0990635444743904e-06, "loss": 0.0219, "step": 217610 }, { "epoch": 0.9080079445218683, "grad_norm": 0.4890567113234812, "learning_rate": 2.0990394298080325e-06, "loss": 0.016, "step": 217615 }, { "epoch": 0.9080288072368585, "grad_norm": 1.3138858263797972, "learning_rate": 2.099015315972766e-06, "loss": 0.0296, "step": 217620 }, { "epoch": 0.9080496699518489, "grad_norm": 0.45962494113766483, "learning_rate": 2.0989912029685413e-06, "loss": 0.0212, "step": 217625 }, { "epoch": 0.9080705326668391, "grad_norm": 0.7032671042505276, "learning_rate": 2.0989670907953114e-06, "loss": 0.031, "step": 217630 }, { "epoch": 0.9080913953818294, "grad_norm": 0.48747294965476234, "learning_rate": 2.0989429794530285e-06, "loss": 0.0172, "step": 217635 }, { "epoch": 0.9081122580968197, "grad_norm": 0.595347604265565, "learning_rate": 2.0989188689416454e-06, "loss": 0.0192, "step": 217640 }, { "epoch": 0.90813312081181, "grad_norm": 0.6401843766523289, "learning_rate": 2.098894759261114e-06, "loss": 0.0184, "step": 217645 }, { "epoch": 0.9081539835268002, "grad_norm": 0.4655999707052215, "learning_rate": 2.098870650411386e-06, "loss": 0.0191, "step": 217650 }, { "epoch": 0.9081748462417906, "grad_norm": 0.40192373994008446, "learning_rate": 2.0988465423924145e-06, "loss": 0.0193, "step": 217655 }, { "epoch": 0.9081957089567808, "grad_norm": 0.8927021631475083, "learning_rate": 2.0988224352041513e-06, "loss": 0.0196, "step": 217660 }, { "epoch": 0.9082165716717711, "grad_norm": 1.1582837872377365, "learning_rate": 2.0987983288465497e-06, "loss": 0.0273, "step": 217665 }, { "epoch": 0.9082374343867614, "grad_norm": 0.6145890284501166, "learning_rate": 2.09877422331956e-06, "loss": 0.0202, "step": 217670 }, { "epoch": 0.9082582971017517, "grad_norm": 0.6563371891099549, "learning_rate": 2.0987501186231365e-06, "loss": 0.0192, "step": 217675 }, { "epoch": 0.9082791598167419, "grad_norm": 0.9898506553871876, "learning_rate": 2.0987260147572306e-06, "loss": 0.0218, "step": 217680 }, { "epoch": 0.9083000225317321, "grad_norm": 1.0328084088833713, "learning_rate": 2.0987019117217942e-06, "loss": 0.0251, "step": 217685 }, { "epoch": 0.9083208852467225, "grad_norm": 0.4590658175492209, "learning_rate": 2.0986778095167805e-06, "loss": 0.0171, "step": 217690 }, { "epoch": 0.9083417479617127, "grad_norm": 0.21050815731181957, "learning_rate": 2.098653708142142e-06, "loss": 0.02, "step": 217695 }, { "epoch": 0.908362610676703, "grad_norm": 0.5260548504457044, "learning_rate": 2.0986296075978296e-06, "loss": 0.0271, "step": 217700 }, { "epoch": 0.9083834733916933, "grad_norm": 0.49029714735449037, "learning_rate": 2.098605507883796e-06, "loss": 0.0223, "step": 217705 }, { "epoch": 0.9084043361066836, "grad_norm": 0.1984424322243925, "learning_rate": 2.0985814089999953e-06, "loss": 0.0184, "step": 217710 }, { "epoch": 0.9084251988216738, "grad_norm": 0.5262956627390166, "learning_rate": 2.098557310946378e-06, "loss": 0.0205, "step": 217715 }, { "epoch": 0.9084460615366642, "grad_norm": 0.5023171150537589, "learning_rate": 2.098533213722897e-06, "loss": 0.018, "step": 217720 }, { "epoch": 0.9084669242516544, "grad_norm": 1.075274063584525, "learning_rate": 2.0985091173295045e-06, "loss": 0.0162, "step": 217725 }, { "epoch": 0.9084877869666447, "grad_norm": 1.0264047465146993, "learning_rate": 2.0984850217661532e-06, "loss": 0.0197, "step": 217730 }, { "epoch": 0.9085086496816349, "grad_norm": 0.297126228532325, "learning_rate": 2.098460927032795e-06, "loss": 0.0246, "step": 217735 }, { "epoch": 0.9085295123966253, "grad_norm": 0.5175486314169144, "learning_rate": 2.0984368331293824e-06, "loss": 0.0148, "step": 217740 }, { "epoch": 0.9085503751116155, "grad_norm": 0.7171896395553258, "learning_rate": 2.098412740055868e-06, "loss": 0.022, "step": 217745 }, { "epoch": 0.9085712378266058, "grad_norm": 1.1322224915632253, "learning_rate": 2.0983886478122035e-06, "loss": 0.0202, "step": 217750 }, { "epoch": 0.9085921005415961, "grad_norm": 0.7847097676569726, "learning_rate": 2.0983645563983425e-06, "loss": 0.0341, "step": 217755 }, { "epoch": 0.9086129632565864, "grad_norm": 0.7140766839734491, "learning_rate": 2.0983404658142363e-06, "loss": 0.0222, "step": 217760 }, { "epoch": 0.9086338259715766, "grad_norm": 0.6662426409721587, "learning_rate": 2.0983163760598374e-06, "loss": 0.0214, "step": 217765 }, { "epoch": 0.908654688686567, "grad_norm": 0.6693231834971987, "learning_rate": 2.0982922871350983e-06, "loss": 0.0202, "step": 217770 }, { "epoch": 0.9086755514015572, "grad_norm": 0.5608306175750456, "learning_rate": 2.0982681990399717e-06, "loss": 0.0203, "step": 217775 }, { "epoch": 0.9086964141165474, "grad_norm": 0.8415728451398715, "learning_rate": 2.0982441117744097e-06, "loss": 0.0237, "step": 217780 }, { "epoch": 0.9087172768315378, "grad_norm": 0.3420448285649722, "learning_rate": 2.0982200253383645e-06, "loss": 0.0157, "step": 217785 }, { "epoch": 0.908738139546528, "grad_norm": 0.48895743206009595, "learning_rate": 2.0981959397317885e-06, "loss": 0.0207, "step": 217790 }, { "epoch": 0.9087590022615183, "grad_norm": 1.0870364684376699, "learning_rate": 2.0981718549546344e-06, "loss": 0.0228, "step": 217795 }, { "epoch": 0.9087798649765085, "grad_norm": 0.3803967586060614, "learning_rate": 2.0981477710068547e-06, "loss": 0.0179, "step": 217800 }, { "epoch": 0.9088007276914989, "grad_norm": 0.7024377164716312, "learning_rate": 2.0981236878884015e-06, "loss": 0.0222, "step": 217805 }, { "epoch": 0.9088215904064891, "grad_norm": 0.9907037658616399, "learning_rate": 2.0980996055992276e-06, "loss": 0.0241, "step": 217810 }, { "epoch": 0.9088424531214794, "grad_norm": 0.44051000066886425, "learning_rate": 2.0980755241392844e-06, "loss": 0.0165, "step": 217815 }, { "epoch": 0.9088633158364697, "grad_norm": 0.5975680073222154, "learning_rate": 2.0980514435085256e-06, "loss": 0.0171, "step": 217820 }, { "epoch": 0.90888417855146, "grad_norm": 0.7263970784710861, "learning_rate": 2.0980273637069023e-06, "loss": 0.0188, "step": 217825 }, { "epoch": 0.9089050412664502, "grad_norm": 0.4372273669557769, "learning_rate": 2.0980032847343684e-06, "loss": 0.0185, "step": 217830 }, { "epoch": 0.9089259039814406, "grad_norm": 0.8824416103875837, "learning_rate": 2.0979792065908756e-06, "loss": 0.0188, "step": 217835 }, { "epoch": 0.9089467666964308, "grad_norm": 0.48785795051327546, "learning_rate": 2.0979551292763757e-06, "loss": 0.0142, "step": 217840 }, { "epoch": 0.9089676294114211, "grad_norm": 0.4174528809577927, "learning_rate": 2.097931052790822e-06, "loss": 0.0241, "step": 217845 }, { "epoch": 0.9089884921264114, "grad_norm": 0.3436750480367906, "learning_rate": 2.0979069771341663e-06, "loss": 0.0179, "step": 217850 }, { "epoch": 0.9090093548414017, "grad_norm": 0.5547538503501658, "learning_rate": 2.097882902306362e-06, "loss": 0.0168, "step": 217855 }, { "epoch": 0.9090302175563919, "grad_norm": 0.6830598385882009, "learning_rate": 2.0978588283073603e-06, "loss": 0.0216, "step": 217860 }, { "epoch": 0.9090510802713822, "grad_norm": 0.7507772772319227, "learning_rate": 2.0978347551371148e-06, "loss": 0.0197, "step": 217865 }, { "epoch": 0.9090719429863725, "grad_norm": 0.7169825763515753, "learning_rate": 2.097810682795577e-06, "loss": 0.017, "step": 217870 }, { "epoch": 0.9090928057013627, "grad_norm": 0.5772673391551099, "learning_rate": 2.0977866112827e-06, "loss": 0.0234, "step": 217875 }, { "epoch": 0.909113668416353, "grad_norm": 0.9316752562341925, "learning_rate": 2.0977625405984365e-06, "loss": 0.0252, "step": 217880 }, { "epoch": 0.9091345311313433, "grad_norm": 0.7433376256577259, "learning_rate": 2.097738470742738e-06, "loss": 0.0195, "step": 217885 }, { "epoch": 0.9091553938463336, "grad_norm": 0.5891600460056704, "learning_rate": 2.0977144017155572e-06, "loss": 0.0245, "step": 217890 }, { "epoch": 0.9091762565613238, "grad_norm": 0.6022557330001179, "learning_rate": 2.097690333516848e-06, "loss": 0.0178, "step": 217895 }, { "epoch": 0.9091971192763142, "grad_norm": 0.34746563039518213, "learning_rate": 2.0976662661465605e-06, "loss": 0.0148, "step": 217900 }, { "epoch": 0.9092179819913044, "grad_norm": 0.6407389452775197, "learning_rate": 2.0976421996046493e-06, "loss": 0.0133, "step": 217905 }, { "epoch": 0.9092388447062947, "grad_norm": 1.0395427364745584, "learning_rate": 2.097618133891065e-06, "loss": 0.0185, "step": 217910 }, { "epoch": 0.9092597074212849, "grad_norm": 0.5194835576158726, "learning_rate": 2.0975940690057615e-06, "loss": 0.0166, "step": 217915 }, { "epoch": 0.9092805701362753, "grad_norm": 0.3953646786280129, "learning_rate": 2.0975700049486914e-06, "loss": 0.0228, "step": 217920 }, { "epoch": 0.9093014328512655, "grad_norm": 0.8046465444987968, "learning_rate": 2.097545941719806e-06, "loss": 0.0224, "step": 217925 }, { "epoch": 0.9093222955662558, "grad_norm": 0.716295807441647, "learning_rate": 2.0975218793190584e-06, "loss": 0.0235, "step": 217930 }, { "epoch": 0.9093431582812461, "grad_norm": 0.4191289389494861, "learning_rate": 2.097497817746402e-06, "loss": 0.0264, "step": 217935 }, { "epoch": 0.9093640209962364, "grad_norm": 0.7928454188952744, "learning_rate": 2.0974737570017875e-06, "loss": 0.0169, "step": 217940 }, { "epoch": 0.9093848837112266, "grad_norm": 0.84932318170306, "learning_rate": 2.0974496970851685e-06, "loss": 0.0154, "step": 217945 }, { "epoch": 0.909405746426217, "grad_norm": 1.0208017083601089, "learning_rate": 2.0974256379964976e-06, "loss": 0.0195, "step": 217950 }, { "epoch": 0.9094266091412072, "grad_norm": 0.7177572095723309, "learning_rate": 2.097401579735727e-06, "loss": 0.0169, "step": 217955 }, { "epoch": 0.9094474718561975, "grad_norm": 0.7503434325713983, "learning_rate": 2.0973775223028092e-06, "loss": 0.0164, "step": 217960 }, { "epoch": 0.9094683345711878, "grad_norm": 0.7198184423447546, "learning_rate": 2.097353465697697e-06, "loss": 0.0194, "step": 217965 }, { "epoch": 0.909489197286178, "grad_norm": 0.9025538928906568, "learning_rate": 2.0973294099203433e-06, "loss": 0.0232, "step": 217970 }, { "epoch": 0.9095100600011683, "grad_norm": 0.6324649752694878, "learning_rate": 2.097305354970699e-06, "loss": 0.0278, "step": 217975 }, { "epoch": 0.9095309227161585, "grad_norm": 0.7727048010715145, "learning_rate": 2.0972813008487183e-06, "loss": 0.0201, "step": 217980 }, { "epoch": 0.9095517854311489, "grad_norm": 0.511844973704854, "learning_rate": 2.097257247554353e-06, "loss": 0.017, "step": 217985 }, { "epoch": 0.9095726481461391, "grad_norm": 0.7359106734374067, "learning_rate": 2.097233195087556e-06, "loss": 0.0196, "step": 217990 }, { "epoch": 0.9095935108611294, "grad_norm": 0.5966212674908374, "learning_rate": 2.0972091434482793e-06, "loss": 0.024, "step": 217995 }, { "epoch": 0.9096143735761197, "grad_norm": 0.6043911133480026, "learning_rate": 2.0971850926364764e-06, "loss": 0.0207, "step": 218000 }, { "epoch": 0.90963523629111, "grad_norm": 0.4619297018498388, "learning_rate": 2.0971610426520984e-06, "loss": 0.0249, "step": 218005 }, { "epoch": 0.9096560990061002, "grad_norm": 0.3470097439966369, "learning_rate": 2.0971369934950993e-06, "loss": 0.018, "step": 218010 }, { "epoch": 0.9096769617210906, "grad_norm": 0.5321512414430122, "learning_rate": 2.097112945165431e-06, "loss": 0.0175, "step": 218015 }, { "epoch": 0.9096978244360808, "grad_norm": 0.4578847601216655, "learning_rate": 2.097088897663046e-06, "loss": 0.0256, "step": 218020 }, { "epoch": 0.9097186871510711, "grad_norm": 0.11671495296495345, "learning_rate": 2.097064850987897e-06, "loss": 0.0184, "step": 218025 }, { "epoch": 0.9097395498660613, "grad_norm": 0.5683611881504146, "learning_rate": 2.0970408051399367e-06, "loss": 0.0158, "step": 218030 }, { "epoch": 0.9097604125810517, "grad_norm": 0.6201907628235127, "learning_rate": 2.0970167601191176e-06, "loss": 0.0095, "step": 218035 }, { "epoch": 0.9097812752960419, "grad_norm": 0.5318115492264551, "learning_rate": 2.096992715925392e-06, "loss": 0.0127, "step": 218040 }, { "epoch": 0.9098021380110322, "grad_norm": 0.4096013188146641, "learning_rate": 2.0969686725587125e-06, "loss": 0.0149, "step": 218045 }, { "epoch": 0.9098230007260225, "grad_norm": 0.6738673022404299, "learning_rate": 2.096944630019032e-06, "loss": 0.0245, "step": 218050 }, { "epoch": 0.9098438634410128, "grad_norm": 0.5206843647009796, "learning_rate": 2.0969205883063036e-06, "loss": 0.0203, "step": 218055 }, { "epoch": 0.909864726156003, "grad_norm": 0.6345978730249571, "learning_rate": 2.096896547420479e-06, "loss": 0.0246, "step": 218060 }, { "epoch": 0.9098855888709934, "grad_norm": 0.5141948782502542, "learning_rate": 2.096872507361511e-06, "loss": 0.0231, "step": 218065 }, { "epoch": 0.9099064515859836, "grad_norm": 0.43195890593496505, "learning_rate": 2.0968484681293526e-06, "loss": 0.0227, "step": 218070 }, { "epoch": 0.9099273143009738, "grad_norm": 0.5092171753561217, "learning_rate": 2.0968244297239556e-06, "loss": 0.0119, "step": 218075 }, { "epoch": 0.9099481770159642, "grad_norm": 1.150805268147755, "learning_rate": 2.0968003921452735e-06, "loss": 0.0275, "step": 218080 }, { "epoch": 0.9099690397309544, "grad_norm": 0.7545174616920416, "learning_rate": 2.0967763553932578e-06, "loss": 0.0192, "step": 218085 }, { "epoch": 0.9099899024459447, "grad_norm": 0.7116454721272526, "learning_rate": 2.096752319467863e-06, "loss": 0.0173, "step": 218090 }, { "epoch": 0.9100107651609349, "grad_norm": 2.1867618772545785, "learning_rate": 2.0967282843690394e-06, "loss": 0.0191, "step": 218095 }, { "epoch": 0.9100316278759253, "grad_norm": 0.5947202900515937, "learning_rate": 2.0967042500967415e-06, "loss": 0.0237, "step": 218100 }, { "epoch": 0.9100524905909155, "grad_norm": 0.3511895612576364, "learning_rate": 2.0966802166509215e-06, "loss": 0.013, "step": 218105 }, { "epoch": 0.9100733533059058, "grad_norm": 1.1496002636686535, "learning_rate": 2.096656184031531e-06, "loss": 0.0285, "step": 218110 }, { "epoch": 0.9100942160208961, "grad_norm": 0.5003834603258546, "learning_rate": 2.0966321522385238e-06, "loss": 0.0187, "step": 218115 }, { "epoch": 0.9101150787358864, "grad_norm": 0.5125519525423604, "learning_rate": 2.096608121271852e-06, "loss": 0.0142, "step": 218120 }, { "epoch": 0.9101359414508766, "grad_norm": 0.6996071028216707, "learning_rate": 2.0965840911314684e-06, "loss": 0.0248, "step": 218125 }, { "epoch": 0.910156804165867, "grad_norm": 1.0055472184381706, "learning_rate": 2.096560061817325e-06, "loss": 0.0223, "step": 218130 }, { "epoch": 0.9101776668808572, "grad_norm": 0.575106117146919, "learning_rate": 2.096536033329376e-06, "loss": 0.0156, "step": 218135 }, { "epoch": 0.9101985295958475, "grad_norm": 0.822272403309154, "learning_rate": 2.096512005667573e-06, "loss": 0.0237, "step": 218140 }, { "epoch": 0.9102193923108378, "grad_norm": 1.1042982265145294, "learning_rate": 2.096487978831869e-06, "loss": 0.0236, "step": 218145 }, { "epoch": 0.9102402550258281, "grad_norm": 1.057016332106476, "learning_rate": 2.096463952822216e-06, "loss": 0.0193, "step": 218150 }, { "epoch": 0.9102611177408183, "grad_norm": 0.9632741047570728, "learning_rate": 2.096439927638567e-06, "loss": 0.0181, "step": 218155 }, { "epoch": 0.9102819804558085, "grad_norm": 0.4369436018127989, "learning_rate": 2.0964159032808754e-06, "loss": 0.0206, "step": 218160 }, { "epoch": 0.9103028431707989, "grad_norm": 0.7809778761977584, "learning_rate": 2.096391879749093e-06, "loss": 0.0205, "step": 218165 }, { "epoch": 0.9103237058857891, "grad_norm": 1.23273425163728, "learning_rate": 2.096367857043172e-06, "loss": 0.0194, "step": 218170 }, { "epoch": 0.9103445686007794, "grad_norm": 0.8255090608904025, "learning_rate": 2.096343835163067e-06, "loss": 0.02, "step": 218175 }, { "epoch": 0.9103654313157697, "grad_norm": 0.44685166384520686, "learning_rate": 2.096319814108729e-06, "loss": 0.0168, "step": 218180 }, { "epoch": 0.91038629403076, "grad_norm": 0.27708485229141894, "learning_rate": 2.0962957938801113e-06, "loss": 0.0132, "step": 218185 }, { "epoch": 0.9104071567457502, "grad_norm": 0.5087708242013611, "learning_rate": 2.0962717744771667e-06, "loss": 0.0235, "step": 218190 }, { "epoch": 0.9104280194607406, "grad_norm": 0.7188948463127421, "learning_rate": 2.096247755899847e-06, "loss": 0.0232, "step": 218195 }, { "epoch": 0.9104488821757308, "grad_norm": 0.6888140681216818, "learning_rate": 2.0962237381481065e-06, "loss": 0.0271, "step": 218200 }, { "epoch": 0.9104697448907211, "grad_norm": 0.6389773676227725, "learning_rate": 2.0961997212218965e-06, "loss": 0.0217, "step": 218205 }, { "epoch": 0.9104906076057113, "grad_norm": 0.6762586547059437, "learning_rate": 2.0961757051211704e-06, "loss": 0.0179, "step": 218210 }, { "epoch": 0.9105114703207017, "grad_norm": 0.4464050504986155, "learning_rate": 2.0961516898458805e-06, "loss": 0.0201, "step": 218215 }, { "epoch": 0.9105323330356919, "grad_norm": 0.4304291579628289, "learning_rate": 2.09612767539598e-06, "loss": 0.0286, "step": 218220 }, { "epoch": 0.9105531957506822, "grad_norm": 0.6687400001551519, "learning_rate": 2.096103661771422e-06, "loss": 0.0188, "step": 218225 }, { "epoch": 0.9105740584656725, "grad_norm": 0.3567286671991152, "learning_rate": 2.0960796489721575e-06, "loss": 0.0141, "step": 218230 }, { "epoch": 0.9105949211806628, "grad_norm": 1.0207303242892227, "learning_rate": 2.0960556369981404e-06, "loss": 0.0256, "step": 218235 }, { "epoch": 0.910615783895653, "grad_norm": 0.9993188854035054, "learning_rate": 2.096031625849324e-06, "loss": 0.0188, "step": 218240 }, { "epoch": 0.9106366466106434, "grad_norm": 0.48058317381218757, "learning_rate": 2.09600761552566e-06, "loss": 0.0148, "step": 218245 }, { "epoch": 0.9106575093256336, "grad_norm": 0.7194590732419086, "learning_rate": 2.095983606027102e-06, "loss": 0.0228, "step": 218250 }, { "epoch": 0.9106783720406239, "grad_norm": 0.38290002659066635, "learning_rate": 2.0959595973536013e-06, "loss": 0.0222, "step": 218255 }, { "epoch": 0.9106992347556142, "grad_norm": 0.6868971314867887, "learning_rate": 2.0959355895051124e-06, "loss": 0.0172, "step": 218260 }, { "epoch": 0.9107200974706045, "grad_norm": 0.3867556837999684, "learning_rate": 2.0959115824815878e-06, "loss": 0.0266, "step": 218265 }, { "epoch": 0.9107409601855947, "grad_norm": 0.7559597251290164, "learning_rate": 2.095887576282979e-06, "loss": 0.0212, "step": 218270 }, { "epoch": 0.9107618229005849, "grad_norm": 0.8425107230669769, "learning_rate": 2.0958635709092394e-06, "loss": 0.0163, "step": 218275 }, { "epoch": 0.9107826856155753, "grad_norm": 1.2614829831016443, "learning_rate": 2.095839566360322e-06, "loss": 0.0295, "step": 218280 }, { "epoch": 0.9108035483305655, "grad_norm": 0.2694183442772591, "learning_rate": 2.0958155626361796e-06, "loss": 0.0192, "step": 218285 }, { "epoch": 0.9108244110455558, "grad_norm": 0.36972758010478907, "learning_rate": 2.0957915597367644e-06, "loss": 0.0178, "step": 218290 }, { "epoch": 0.9108452737605461, "grad_norm": 0.6775600367898275, "learning_rate": 2.0957675576620302e-06, "loss": 0.0157, "step": 218295 }, { "epoch": 0.9108661364755364, "grad_norm": 0.8005235574114566, "learning_rate": 2.0957435564119284e-06, "loss": 0.0212, "step": 218300 }, { "epoch": 0.9108869991905266, "grad_norm": 4.058885609349018, "learning_rate": 2.095719555986413e-06, "loss": 0.026, "step": 218305 }, { "epoch": 0.910907861905517, "grad_norm": 0.5566619178613554, "learning_rate": 2.095695556385436e-06, "loss": 0.0223, "step": 218310 }, { "epoch": 0.9109287246205072, "grad_norm": 0.5932364788863876, "learning_rate": 2.095671557608951e-06, "loss": 0.0199, "step": 218315 }, { "epoch": 0.9109495873354975, "grad_norm": 0.5881771108002137, "learning_rate": 2.0956475596569098e-06, "loss": 0.0147, "step": 218320 }, { "epoch": 0.9109704500504878, "grad_norm": 0.5936958301837643, "learning_rate": 2.095623562529266e-06, "loss": 0.0212, "step": 218325 }, { "epoch": 0.9109913127654781, "grad_norm": 0.6301363941190614, "learning_rate": 2.095599566225972e-06, "loss": 0.0269, "step": 218330 }, { "epoch": 0.9110121754804683, "grad_norm": 0.6227420108666725, "learning_rate": 2.0955755707469806e-06, "loss": 0.0208, "step": 218335 }, { "epoch": 0.9110330381954586, "grad_norm": 0.5071040411362896, "learning_rate": 2.095551576092245e-06, "loss": 0.0223, "step": 218340 }, { "epoch": 0.9110539009104489, "grad_norm": 0.5026307007122416, "learning_rate": 2.0955275822617175e-06, "loss": 0.0184, "step": 218345 }, { "epoch": 0.9110747636254392, "grad_norm": 0.7497082221082573, "learning_rate": 2.095503589255351e-06, "loss": 0.0176, "step": 218350 }, { "epoch": 0.9110956263404294, "grad_norm": 3.356245507506493, "learning_rate": 2.095479597073099e-06, "loss": 0.0418, "step": 218355 }, { "epoch": 0.9111164890554198, "grad_norm": 0.44750124366446525, "learning_rate": 2.095455605714913e-06, "loss": 0.0149, "step": 218360 }, { "epoch": 0.91113735177041, "grad_norm": 0.5200650064171334, "learning_rate": 2.095431615180747e-06, "loss": 0.0203, "step": 218365 }, { "epoch": 0.9111582144854002, "grad_norm": 0.3748749735123707, "learning_rate": 2.0954076254705534e-06, "loss": 0.0267, "step": 218370 }, { "epoch": 0.9111790772003906, "grad_norm": 0.6791241272928591, "learning_rate": 2.0953836365842853e-06, "loss": 0.0234, "step": 218375 }, { "epoch": 0.9111999399153808, "grad_norm": 0.32514567923884297, "learning_rate": 2.095359648521895e-06, "loss": 0.0204, "step": 218380 }, { "epoch": 0.9112208026303711, "grad_norm": 0.5559809461851802, "learning_rate": 2.0953356612833356e-06, "loss": 0.0176, "step": 218385 }, { "epoch": 0.9112416653453613, "grad_norm": 0.6232293471073097, "learning_rate": 2.09531167486856e-06, "loss": 0.0168, "step": 218390 }, { "epoch": 0.9112625280603517, "grad_norm": 0.5101287222676497, "learning_rate": 2.0952876892775217e-06, "loss": 0.0262, "step": 218395 }, { "epoch": 0.9112833907753419, "grad_norm": 0.431219029493071, "learning_rate": 2.095263704510172e-06, "loss": 0.0184, "step": 218400 }, { "epoch": 0.9113042534903322, "grad_norm": 0.42045879509853623, "learning_rate": 2.095239720566465e-06, "loss": 0.016, "step": 218405 }, { "epoch": 0.9113251162053225, "grad_norm": 0.5133233618628752, "learning_rate": 2.0952157374463534e-06, "loss": 0.0247, "step": 218410 }, { "epoch": 0.9113459789203128, "grad_norm": 0.5076456919435226, "learning_rate": 2.09519175514979e-06, "loss": 0.0231, "step": 218415 }, { "epoch": 0.911366841635303, "grad_norm": 0.6314703720729202, "learning_rate": 2.095167773676727e-06, "loss": 0.0195, "step": 218420 }, { "epoch": 0.9113877043502934, "grad_norm": 0.5341956132091805, "learning_rate": 2.0951437930271186e-06, "loss": 0.0171, "step": 218425 }, { "epoch": 0.9114085670652836, "grad_norm": 0.43336639562293383, "learning_rate": 2.095119813200916e-06, "loss": 0.0275, "step": 218430 }, { "epoch": 0.9114294297802739, "grad_norm": 0.4396688757889125, "learning_rate": 2.0950958341980736e-06, "loss": 0.0143, "step": 218435 }, { "epoch": 0.9114502924952642, "grad_norm": 0.6770287797463479, "learning_rate": 2.0950718560185433e-06, "loss": 0.0188, "step": 218440 }, { "epoch": 0.9114711552102545, "grad_norm": 0.8804519406801896, "learning_rate": 2.0950478786622786e-06, "loss": 0.0206, "step": 218445 }, { "epoch": 0.9114920179252447, "grad_norm": 0.716553647562649, "learning_rate": 2.0950239021292318e-06, "loss": 0.024, "step": 218450 }, { "epoch": 0.911512880640235, "grad_norm": 0.6802960607700126, "learning_rate": 2.0949999264193565e-06, "loss": 0.024, "step": 218455 }, { "epoch": 0.9115337433552253, "grad_norm": 0.4175574089459354, "learning_rate": 2.094975951532605e-06, "loss": 0.0118, "step": 218460 }, { "epoch": 0.9115546060702155, "grad_norm": 0.6117558278459266, "learning_rate": 2.09495197746893e-06, "loss": 0.022, "step": 218465 }, { "epoch": 0.9115754687852058, "grad_norm": 0.6929433645689426, "learning_rate": 2.0949280042282855e-06, "loss": 0.0242, "step": 218470 }, { "epoch": 0.9115963315001961, "grad_norm": 0.7543775861921076, "learning_rate": 2.094904031810624e-06, "loss": 0.0132, "step": 218475 }, { "epoch": 0.9116171942151864, "grad_norm": 0.3682666655954379, "learning_rate": 2.094880060215898e-06, "loss": 0.0165, "step": 218480 }, { "epoch": 0.9116380569301766, "grad_norm": 0.9300084015442112, "learning_rate": 2.09485608944406e-06, "loss": 0.0187, "step": 218485 }, { "epoch": 0.911658919645167, "grad_norm": 0.9896511449693789, "learning_rate": 2.094832119495064e-06, "loss": 0.026, "step": 218490 }, { "epoch": 0.9116797823601572, "grad_norm": 0.9016868771870314, "learning_rate": 2.094808150368862e-06, "loss": 0.0296, "step": 218495 }, { "epoch": 0.9117006450751475, "grad_norm": 0.5182633615880963, "learning_rate": 2.094784182065408e-06, "loss": 0.0227, "step": 218500 }, { "epoch": 0.9117215077901378, "grad_norm": 0.7164708436097362, "learning_rate": 2.0947602145846536e-06, "loss": 0.021, "step": 218505 }, { "epoch": 0.9117423705051281, "grad_norm": 0.5638767250852424, "learning_rate": 2.0947362479265527e-06, "loss": 0.0215, "step": 218510 }, { "epoch": 0.9117632332201183, "grad_norm": 0.5184021055763146, "learning_rate": 2.094712282091058e-06, "loss": 0.0183, "step": 218515 }, { "epoch": 0.9117840959351086, "grad_norm": 0.3550385636295674, "learning_rate": 2.094688317078123e-06, "loss": 0.0108, "step": 218520 }, { "epoch": 0.9118049586500989, "grad_norm": 0.8326421911138089, "learning_rate": 2.0946643528876993e-06, "loss": 0.0273, "step": 218525 }, { "epoch": 0.9118258213650892, "grad_norm": 0.6267841924113436, "learning_rate": 2.0946403895197404e-06, "loss": 0.0232, "step": 218530 }, { "epoch": 0.9118466840800794, "grad_norm": 1.2432947297081811, "learning_rate": 2.0946164269742004e-06, "loss": 0.024, "step": 218535 }, { "epoch": 0.9118675467950698, "grad_norm": 0.6762036833560127, "learning_rate": 2.0945924652510304e-06, "loss": 0.0133, "step": 218540 }, { "epoch": 0.91188840951006, "grad_norm": 0.7531573955648972, "learning_rate": 2.094568504350185e-06, "loss": 0.0249, "step": 218545 }, { "epoch": 0.9119092722250502, "grad_norm": 0.985826832110248, "learning_rate": 2.0945445442716166e-06, "loss": 0.0196, "step": 218550 }, { "epoch": 0.9119301349400406, "grad_norm": 1.0860176520790286, "learning_rate": 2.094520585015277e-06, "loss": 0.0229, "step": 218555 }, { "epoch": 0.9119509976550308, "grad_norm": 0.5306600131782037, "learning_rate": 2.0944966265811208e-06, "loss": 0.024, "step": 218560 }, { "epoch": 0.9119718603700211, "grad_norm": 0.3601875715399425, "learning_rate": 2.0944726689691e-06, "loss": 0.012, "step": 218565 }, { "epoch": 0.9119927230850113, "grad_norm": 0.5212368326666629, "learning_rate": 2.0944487121791685e-06, "loss": 0.0184, "step": 218570 }, { "epoch": 0.9120135858000017, "grad_norm": 0.8249960422888163, "learning_rate": 2.0944247562112786e-06, "loss": 0.0189, "step": 218575 }, { "epoch": 0.9120344485149919, "grad_norm": 0.6842060988002363, "learning_rate": 2.0944008010653834e-06, "loss": 0.0195, "step": 218580 }, { "epoch": 0.9120553112299822, "grad_norm": 0.42976384246420096, "learning_rate": 2.0943768467414358e-06, "loss": 0.0253, "step": 218585 }, { "epoch": 0.9120761739449725, "grad_norm": 0.7931434916039874, "learning_rate": 2.0943528932393896e-06, "loss": 0.024, "step": 218590 }, { "epoch": 0.9120970366599628, "grad_norm": 0.2177570105934626, "learning_rate": 2.0943289405591963e-06, "loss": 0.0159, "step": 218595 }, { "epoch": 0.912117899374953, "grad_norm": 1.665470780469347, "learning_rate": 2.0943049887008095e-06, "loss": 0.0282, "step": 218600 }, { "epoch": 0.9121387620899434, "grad_norm": 0.26080281710818465, "learning_rate": 2.0942810376641832e-06, "loss": 0.0129, "step": 218605 }, { "epoch": 0.9121596248049336, "grad_norm": 0.14334958190471367, "learning_rate": 2.094257087449269e-06, "loss": 0.0124, "step": 218610 }, { "epoch": 0.9121804875199239, "grad_norm": 0.8552952745667486, "learning_rate": 2.0942331380560214e-06, "loss": 0.0207, "step": 218615 }, { "epoch": 0.9122013502349142, "grad_norm": 0.8704829049674985, "learning_rate": 2.094209189484392e-06, "loss": 0.0213, "step": 218620 }, { "epoch": 0.9122222129499045, "grad_norm": 0.9385564296343615, "learning_rate": 2.0941852417343343e-06, "loss": 0.0246, "step": 218625 }, { "epoch": 0.9122430756648947, "grad_norm": 0.5581964579423774, "learning_rate": 2.0941612948058017e-06, "loss": 0.0249, "step": 218630 }, { "epoch": 0.912263938379885, "grad_norm": 1.2227389097906207, "learning_rate": 2.094137348698747e-06, "loss": 0.0245, "step": 218635 }, { "epoch": 0.9122848010948753, "grad_norm": 0.5617076763312434, "learning_rate": 2.0941134034131234e-06, "loss": 0.0222, "step": 218640 }, { "epoch": 0.9123056638098656, "grad_norm": 0.5449413915307763, "learning_rate": 2.0940894589488832e-06, "loss": 0.0222, "step": 218645 }, { "epoch": 0.9123265265248558, "grad_norm": 0.595341108454432, "learning_rate": 2.0940655153059804e-06, "loss": 0.0197, "step": 218650 }, { "epoch": 0.9123473892398462, "grad_norm": 0.6749527518313349, "learning_rate": 2.094041572484367e-06, "loss": 0.025, "step": 218655 }, { "epoch": 0.9123682519548364, "grad_norm": 0.4756162480271732, "learning_rate": 2.094017630483997e-06, "loss": 0.0252, "step": 218660 }, { "epoch": 0.9123891146698266, "grad_norm": 0.6117731528289704, "learning_rate": 2.093993689304823e-06, "loss": 0.0141, "step": 218665 }, { "epoch": 0.912409977384817, "grad_norm": 1.0585393587235654, "learning_rate": 2.0939697489467988e-06, "loss": 0.0255, "step": 218670 }, { "epoch": 0.9124308400998072, "grad_norm": 0.42831759354586574, "learning_rate": 2.0939458094098767e-06, "loss": 0.0194, "step": 218675 }, { "epoch": 0.9124517028147975, "grad_norm": 0.5733463322827724, "learning_rate": 2.0939218706940096e-06, "loss": 0.0203, "step": 218680 }, { "epoch": 0.9124725655297878, "grad_norm": 0.7501652026537378, "learning_rate": 2.0938979327991507e-06, "loss": 0.0218, "step": 218685 }, { "epoch": 0.9124934282447781, "grad_norm": 0.7030877464486853, "learning_rate": 2.093873995725254e-06, "loss": 0.0248, "step": 218690 }, { "epoch": 0.9125142909597683, "grad_norm": 1.6420291651370698, "learning_rate": 2.0938500594722706e-06, "loss": 0.0217, "step": 218695 }, { "epoch": 0.9125351536747586, "grad_norm": 1.1065555783320848, "learning_rate": 2.0938261240401554e-06, "loss": 0.0231, "step": 218700 }, { "epoch": 0.9125560163897489, "grad_norm": 0.5696632498345666, "learning_rate": 2.0938021894288616e-06, "loss": 0.0155, "step": 218705 }, { "epoch": 0.9125768791047392, "grad_norm": 0.71747322368374, "learning_rate": 2.0937782556383407e-06, "loss": 0.0312, "step": 218710 }, { "epoch": 0.9125977418197294, "grad_norm": 0.44970485521198217, "learning_rate": 2.093754322668547e-06, "loss": 0.0186, "step": 218715 }, { "epoch": 0.9126186045347198, "grad_norm": 0.4843019155520723, "learning_rate": 2.0937303905194335e-06, "loss": 0.0138, "step": 218720 }, { "epoch": 0.91263946724971, "grad_norm": 0.546738036037596, "learning_rate": 2.0937064591909523e-06, "loss": 0.0215, "step": 218725 }, { "epoch": 0.9126603299647003, "grad_norm": 0.5463318889253034, "learning_rate": 2.0936825286830576e-06, "loss": 0.0195, "step": 218730 }, { "epoch": 0.9126811926796906, "grad_norm": 1.3446871998438918, "learning_rate": 2.093658598995702e-06, "loss": 0.0229, "step": 218735 }, { "epoch": 0.9127020553946809, "grad_norm": 0.8136435243812754, "learning_rate": 2.093634670128839e-06, "loss": 0.0343, "step": 218740 }, { "epoch": 0.9127229181096711, "grad_norm": 0.5059482114143069, "learning_rate": 2.0936107420824218e-06, "loss": 0.0198, "step": 218745 }, { "epoch": 0.9127437808246613, "grad_norm": 0.48583316657362324, "learning_rate": 2.093586814856403e-06, "loss": 0.0217, "step": 218750 }, { "epoch": 0.9127646435396517, "grad_norm": 0.3837615256996085, "learning_rate": 2.0935628884507357e-06, "loss": 0.0182, "step": 218755 }, { "epoch": 0.9127855062546419, "grad_norm": 0.6408689018014632, "learning_rate": 2.0935389628653734e-06, "loss": 0.018, "step": 218760 }, { "epoch": 0.9128063689696322, "grad_norm": 0.6708367468964528, "learning_rate": 2.093515038100269e-06, "loss": 0.0204, "step": 218765 }, { "epoch": 0.9128272316846225, "grad_norm": 0.39004710416428523, "learning_rate": 2.0934911141553756e-06, "loss": 0.0216, "step": 218770 }, { "epoch": 0.9128480943996128, "grad_norm": 0.3544989016005397, "learning_rate": 2.0934671910306465e-06, "loss": 0.0218, "step": 218775 }, { "epoch": 0.912868957114603, "grad_norm": 1.041865191306509, "learning_rate": 2.093443268726035e-06, "loss": 0.0177, "step": 218780 }, { "epoch": 0.9128898198295934, "grad_norm": 0.5874088154621437, "learning_rate": 2.093419347241494e-06, "loss": 0.0223, "step": 218785 }, { "epoch": 0.9129106825445836, "grad_norm": 0.41721701468826444, "learning_rate": 2.0933954265769767e-06, "loss": 0.0179, "step": 218790 }, { "epoch": 0.9129315452595739, "grad_norm": 1.8523542110349591, "learning_rate": 2.093371506732436e-06, "loss": 0.0217, "step": 218795 }, { "epoch": 0.9129524079745642, "grad_norm": 1.5369761334039311, "learning_rate": 2.0933475877078255e-06, "loss": 0.0281, "step": 218800 }, { "epoch": 0.9129732706895545, "grad_norm": 0.4502351695776893, "learning_rate": 2.093323669503098e-06, "loss": 0.0209, "step": 218805 }, { "epoch": 0.9129941334045447, "grad_norm": 1.3050967071096788, "learning_rate": 2.0932997521182067e-06, "loss": 0.0241, "step": 218810 }, { "epoch": 0.913014996119535, "grad_norm": 0.4761086034689435, "learning_rate": 2.093275835553105e-06, "loss": 0.0189, "step": 218815 }, { "epoch": 0.9130358588345253, "grad_norm": 0.6539944552712845, "learning_rate": 2.093251919807746e-06, "loss": 0.0204, "step": 218820 }, { "epoch": 0.9130567215495156, "grad_norm": 0.7795989420175665, "learning_rate": 2.0932280048820824e-06, "loss": 0.0188, "step": 218825 }, { "epoch": 0.9130775842645058, "grad_norm": 0.43132846304768807, "learning_rate": 2.093204090776068e-06, "loss": 0.0213, "step": 218830 }, { "epoch": 0.9130984469794962, "grad_norm": 0.43171290063742795, "learning_rate": 2.093180177489656e-06, "loss": 0.0221, "step": 218835 }, { "epoch": 0.9131193096944864, "grad_norm": 0.44057289556499823, "learning_rate": 2.093156265022799e-06, "loss": 0.0245, "step": 218840 }, { "epoch": 0.9131401724094766, "grad_norm": 0.7314765153271205, "learning_rate": 2.0931323533754506e-06, "loss": 0.0144, "step": 218845 }, { "epoch": 0.913161035124467, "grad_norm": 0.41883915586488935, "learning_rate": 2.0931084425475643e-06, "loss": 0.0186, "step": 218850 }, { "epoch": 0.9131818978394572, "grad_norm": 0.7710354403786854, "learning_rate": 2.093084532539093e-06, "loss": 0.0195, "step": 218855 }, { "epoch": 0.9132027605544475, "grad_norm": 0.15962041585911768, "learning_rate": 2.0930606233499893e-06, "loss": 0.0183, "step": 218860 }, { "epoch": 0.9132236232694378, "grad_norm": 0.698790700075719, "learning_rate": 2.093036714980207e-06, "loss": 0.0245, "step": 218865 }, { "epoch": 0.9132444859844281, "grad_norm": 0.8881848140786232, "learning_rate": 2.0930128074296995e-06, "loss": 0.0214, "step": 218870 }, { "epoch": 0.9132653486994183, "grad_norm": 0.6244591566644779, "learning_rate": 2.0929889006984194e-06, "loss": 0.0171, "step": 218875 }, { "epoch": 0.9132862114144086, "grad_norm": 2.15461913217176, "learning_rate": 2.0929649947863206e-06, "loss": 0.0177, "step": 218880 }, { "epoch": 0.9133070741293989, "grad_norm": 0.497444368800032, "learning_rate": 2.092941089693356e-06, "loss": 0.0181, "step": 218885 }, { "epoch": 0.9133279368443892, "grad_norm": 0.5630594477571936, "learning_rate": 2.0929171854194785e-06, "loss": 0.0125, "step": 218890 }, { "epoch": 0.9133487995593794, "grad_norm": 0.7389798352104409, "learning_rate": 2.0928932819646418e-06, "loss": 0.0202, "step": 218895 }, { "epoch": 0.9133696622743698, "grad_norm": 0.9696135048861675, "learning_rate": 2.0928693793287986e-06, "loss": 0.0186, "step": 218900 }, { "epoch": 0.91339052498936, "grad_norm": 1.0758103449885077, "learning_rate": 2.092845477511903e-06, "loss": 0.0225, "step": 218905 }, { "epoch": 0.9134113877043503, "grad_norm": 0.4609074433761321, "learning_rate": 2.092821576513907e-06, "loss": 0.0183, "step": 218910 }, { "epoch": 0.9134322504193406, "grad_norm": 1.010994943360858, "learning_rate": 2.0927976763347654e-06, "loss": 0.0193, "step": 218915 }, { "epoch": 0.9134531131343309, "grad_norm": 0.5000063097075657, "learning_rate": 2.09277377697443e-06, "loss": 0.0185, "step": 218920 }, { "epoch": 0.9134739758493211, "grad_norm": 0.3979258267497822, "learning_rate": 2.092749878432855e-06, "loss": 0.0208, "step": 218925 }, { "epoch": 0.9134948385643114, "grad_norm": 0.5675790366100059, "learning_rate": 2.0927259807099933e-06, "loss": 0.0144, "step": 218930 }, { "epoch": 0.9135157012793017, "grad_norm": 0.5881940513028432, "learning_rate": 2.092702083805798e-06, "loss": 0.0184, "step": 218935 }, { "epoch": 0.913536563994292, "grad_norm": 1.1333870280192055, "learning_rate": 2.0926781877202224e-06, "loss": 0.0181, "step": 218940 }, { "epoch": 0.9135574267092822, "grad_norm": 1.0921240566060497, "learning_rate": 2.0926542924532207e-06, "loss": 0.0179, "step": 218945 }, { "epoch": 0.9135782894242725, "grad_norm": 0.40277359378469346, "learning_rate": 2.0926303980047443e-06, "loss": 0.0227, "step": 218950 }, { "epoch": 0.9135991521392628, "grad_norm": 0.6537613647494354, "learning_rate": 2.0926065043747477e-06, "loss": 0.0163, "step": 218955 }, { "epoch": 0.913620014854253, "grad_norm": 0.5861657613115093, "learning_rate": 2.092582611563185e-06, "loss": 0.0149, "step": 218960 }, { "epoch": 0.9136408775692434, "grad_norm": 0.8790432206378194, "learning_rate": 2.0925587195700074e-06, "loss": 0.0209, "step": 218965 }, { "epoch": 0.9136617402842336, "grad_norm": 0.7066471553275161, "learning_rate": 2.0925348283951697e-06, "loss": 0.0248, "step": 218970 }, { "epoch": 0.9136826029992239, "grad_norm": 0.4898662654058855, "learning_rate": 2.0925109380386247e-06, "loss": 0.0172, "step": 218975 }, { "epoch": 0.9137034657142142, "grad_norm": 0.6120208542306121, "learning_rate": 2.0924870485003253e-06, "loss": 0.0188, "step": 218980 }, { "epoch": 0.9137243284292045, "grad_norm": 0.520471845287166, "learning_rate": 2.0924631597802255e-06, "loss": 0.0158, "step": 218985 }, { "epoch": 0.9137451911441947, "grad_norm": 0.6134617742259229, "learning_rate": 2.0924392718782786e-06, "loss": 0.0216, "step": 218990 }, { "epoch": 0.913766053859185, "grad_norm": 0.9568630954629073, "learning_rate": 2.0924153847944377e-06, "loss": 0.0248, "step": 218995 }, { "epoch": 0.9137869165741753, "grad_norm": 0.5797519539912585, "learning_rate": 2.0923914985286556e-06, "loss": 0.0206, "step": 219000 }, { "epoch": 0.9138077792891656, "grad_norm": 0.8435154790539252, "learning_rate": 2.092367613080886e-06, "loss": 0.0226, "step": 219005 }, { "epoch": 0.9138286420041558, "grad_norm": 0.6899166799519625, "learning_rate": 2.0923437284510824e-06, "loss": 0.0182, "step": 219010 }, { "epoch": 0.9138495047191462, "grad_norm": 0.8738484171468116, "learning_rate": 2.092319844639198e-06, "loss": 0.0218, "step": 219015 }, { "epoch": 0.9138703674341364, "grad_norm": 0.43324881956239525, "learning_rate": 2.092295961645186e-06, "loss": 0.0156, "step": 219020 }, { "epoch": 0.9138912301491267, "grad_norm": 0.9123280243260461, "learning_rate": 2.0922720794689995e-06, "loss": 0.0172, "step": 219025 }, { "epoch": 0.913912092864117, "grad_norm": 0.32268485117606405, "learning_rate": 2.0922481981105926e-06, "loss": 0.0157, "step": 219030 }, { "epoch": 0.9139329555791073, "grad_norm": 0.7230751346843964, "learning_rate": 2.0922243175699177e-06, "loss": 0.0208, "step": 219035 }, { "epoch": 0.9139538182940975, "grad_norm": 1.1888048388141286, "learning_rate": 2.092200437846929e-06, "loss": 0.0194, "step": 219040 }, { "epoch": 0.9139746810090879, "grad_norm": 1.3462157704664295, "learning_rate": 2.092176558941579e-06, "loss": 0.022, "step": 219045 }, { "epoch": 0.9139955437240781, "grad_norm": 0.4830549884059121, "learning_rate": 2.092152680853822e-06, "loss": 0.0231, "step": 219050 }, { "epoch": 0.9140164064390683, "grad_norm": 0.8268632067907389, "learning_rate": 2.092128803583611e-06, "loss": 0.0176, "step": 219055 }, { "epoch": 0.9140372691540586, "grad_norm": 0.58386155645002, "learning_rate": 2.0921049271308984e-06, "loss": 0.0166, "step": 219060 }, { "epoch": 0.9140581318690489, "grad_norm": 0.8068924246446088, "learning_rate": 2.092081051495639e-06, "loss": 0.0272, "step": 219065 }, { "epoch": 0.9140789945840392, "grad_norm": 0.9120658077784942, "learning_rate": 2.092057176677785e-06, "loss": 0.0215, "step": 219070 }, { "epoch": 0.9140998572990294, "grad_norm": 0.4298194337289443, "learning_rate": 2.0920333026772904e-06, "loss": 0.0215, "step": 219075 }, { "epoch": 0.9141207200140198, "grad_norm": 0.46373019865442344, "learning_rate": 2.092009429494108e-06, "loss": 0.0287, "step": 219080 }, { "epoch": 0.91414158272901, "grad_norm": 0.4875639571546402, "learning_rate": 2.091985557128192e-06, "loss": 0.018, "step": 219085 }, { "epoch": 0.9141624454440003, "grad_norm": 0.6641765176437631, "learning_rate": 2.0919616855794953e-06, "loss": 0.0144, "step": 219090 }, { "epoch": 0.9141833081589906, "grad_norm": 0.40878689816523456, "learning_rate": 2.0919378148479714e-06, "loss": 0.024, "step": 219095 }, { "epoch": 0.9142041708739809, "grad_norm": 0.6989934260040835, "learning_rate": 2.091913944933573e-06, "loss": 0.0312, "step": 219100 }, { "epoch": 0.9142250335889711, "grad_norm": 0.5212258762719119, "learning_rate": 2.091890075836255e-06, "loss": 0.0169, "step": 219105 }, { "epoch": 0.9142458963039614, "grad_norm": 0.6896817108850246, "learning_rate": 2.091866207555969e-06, "loss": 0.0208, "step": 219110 }, { "epoch": 0.9142667590189517, "grad_norm": 0.5523851365843421, "learning_rate": 2.0918423400926695e-06, "loss": 0.0211, "step": 219115 }, { "epoch": 0.914287621733942, "grad_norm": 0.4917775985170061, "learning_rate": 2.0918184734463097e-06, "loss": 0.0197, "step": 219120 }, { "epoch": 0.9143084844489322, "grad_norm": 0.32314635852704876, "learning_rate": 2.0917946076168433e-06, "loss": 0.0192, "step": 219125 }, { "epoch": 0.9143293471639226, "grad_norm": 0.732139253214841, "learning_rate": 2.091770742604223e-06, "loss": 0.018, "step": 219130 }, { "epoch": 0.9143502098789128, "grad_norm": 0.370406930960202, "learning_rate": 2.0917468784084023e-06, "loss": 0.0192, "step": 219135 }, { "epoch": 0.914371072593903, "grad_norm": 0.43220538642362055, "learning_rate": 2.0917230150293353e-06, "loss": 0.0157, "step": 219140 }, { "epoch": 0.9143919353088934, "grad_norm": 0.49118770191657424, "learning_rate": 2.0916991524669745e-06, "loss": 0.0202, "step": 219145 }, { "epoch": 0.9144127980238836, "grad_norm": 1.15160415607926, "learning_rate": 2.091675290721274e-06, "loss": 0.0242, "step": 219150 }, { "epoch": 0.9144336607388739, "grad_norm": 0.531540200587282, "learning_rate": 2.0916514297921873e-06, "loss": 0.0186, "step": 219155 }, { "epoch": 0.9144545234538642, "grad_norm": 0.599346744951735, "learning_rate": 2.091627569679667e-06, "loss": 0.02, "step": 219160 }, { "epoch": 0.9144753861688545, "grad_norm": 0.58534321835956, "learning_rate": 2.091603710383667e-06, "loss": 0.023, "step": 219165 }, { "epoch": 0.9144962488838447, "grad_norm": 0.90367639061435, "learning_rate": 2.091579851904141e-06, "loss": 0.0219, "step": 219170 }, { "epoch": 0.914517111598835, "grad_norm": 0.9526324943559124, "learning_rate": 2.0915559942410425e-06, "loss": 0.018, "step": 219175 }, { "epoch": 0.9145379743138253, "grad_norm": 0.49982937332286537, "learning_rate": 2.091532137394324e-06, "loss": 0.0242, "step": 219180 }, { "epoch": 0.9145588370288156, "grad_norm": 0.6556844568802138, "learning_rate": 2.0915082813639405e-06, "loss": 0.0165, "step": 219185 }, { "epoch": 0.9145796997438058, "grad_norm": 0.5396120933927321, "learning_rate": 2.091484426149844e-06, "loss": 0.0235, "step": 219190 }, { "epoch": 0.9146005624587962, "grad_norm": 0.21105888400185657, "learning_rate": 2.0914605717519882e-06, "loss": 0.0191, "step": 219195 }, { "epoch": 0.9146214251737864, "grad_norm": 0.8148427683767219, "learning_rate": 2.0914367181703273e-06, "loss": 0.0167, "step": 219200 }, { "epoch": 0.9146422878887767, "grad_norm": 0.2656332636425578, "learning_rate": 2.091412865404814e-06, "loss": 0.0298, "step": 219205 }, { "epoch": 0.914663150603767, "grad_norm": 0.3286684795299011, "learning_rate": 2.091389013455402e-06, "loss": 0.012, "step": 219210 }, { "epoch": 0.9146840133187573, "grad_norm": 0.3832478023687274, "learning_rate": 2.091365162322045e-06, "loss": 0.0175, "step": 219215 }, { "epoch": 0.9147048760337475, "grad_norm": 0.2673705110369897, "learning_rate": 2.0913413120046965e-06, "loss": 0.0139, "step": 219220 }, { "epoch": 0.9147257387487379, "grad_norm": 0.618151164995315, "learning_rate": 2.0913174625033093e-06, "loss": 0.0242, "step": 219225 }, { "epoch": 0.9147466014637281, "grad_norm": 0.5504372449307188, "learning_rate": 2.0912936138178376e-06, "loss": 0.0217, "step": 219230 }, { "epoch": 0.9147674641787183, "grad_norm": 1.0480093830712418, "learning_rate": 2.0912697659482344e-06, "loss": 0.0272, "step": 219235 }, { "epoch": 0.9147883268937086, "grad_norm": 0.9494624492857603, "learning_rate": 2.091245918894454e-06, "loss": 0.0307, "step": 219240 }, { "epoch": 0.914809189608699, "grad_norm": 0.6974521146621727, "learning_rate": 2.0912220726564484e-06, "loss": 0.018, "step": 219245 }, { "epoch": 0.9148300523236892, "grad_norm": 0.6325678241154141, "learning_rate": 2.091198227234173e-06, "loss": 0.0237, "step": 219250 }, { "epoch": 0.9148509150386794, "grad_norm": 0.5649995563278246, "learning_rate": 2.0911743826275794e-06, "loss": 0.0178, "step": 219255 }, { "epoch": 0.9148717777536698, "grad_norm": 0.44173623849653676, "learning_rate": 2.0911505388366224e-06, "loss": 0.0226, "step": 219260 }, { "epoch": 0.91489264046866, "grad_norm": 1.482673742048382, "learning_rate": 2.0911266958612548e-06, "loss": 0.026, "step": 219265 }, { "epoch": 0.9149135031836503, "grad_norm": 0.9441740087971828, "learning_rate": 2.0911028537014304e-06, "loss": 0.0201, "step": 219270 }, { "epoch": 0.9149343658986406, "grad_norm": 0.45733548798249574, "learning_rate": 2.091079012357103e-06, "loss": 0.0189, "step": 219275 }, { "epoch": 0.9149552286136309, "grad_norm": 0.9683168896374887, "learning_rate": 2.0910551718282253e-06, "loss": 0.022, "step": 219280 }, { "epoch": 0.9149760913286211, "grad_norm": 0.4607992623117811, "learning_rate": 2.0910313321147515e-06, "loss": 0.0167, "step": 219285 }, { "epoch": 0.9149969540436114, "grad_norm": 0.4269106364705703, "learning_rate": 2.091007493216635e-06, "loss": 0.0289, "step": 219290 }, { "epoch": 0.9150178167586017, "grad_norm": 0.4747563532313497, "learning_rate": 2.090983655133829e-06, "loss": 0.0184, "step": 219295 }, { "epoch": 0.915038679473592, "grad_norm": 1.0014126960244076, "learning_rate": 2.0909598178662875e-06, "loss": 0.0204, "step": 219300 }, { "epoch": 0.9150595421885822, "grad_norm": 0.6034923685833499, "learning_rate": 2.0909359814139638e-06, "loss": 0.0267, "step": 219305 }, { "epoch": 0.9150804049035726, "grad_norm": 0.517162648453408, "learning_rate": 2.090912145776812e-06, "loss": 0.0133, "step": 219310 }, { "epoch": 0.9151012676185628, "grad_norm": 0.6469930620909121, "learning_rate": 2.090888310954784e-06, "loss": 0.0149, "step": 219315 }, { "epoch": 0.915122130333553, "grad_norm": 0.851826271159206, "learning_rate": 2.090864476947835e-06, "loss": 0.0223, "step": 219320 }, { "epoch": 0.9151429930485434, "grad_norm": 0.4219820300718527, "learning_rate": 2.0908406437559177e-06, "loss": 0.0165, "step": 219325 }, { "epoch": 0.9151638557635337, "grad_norm": 0.5190597815828948, "learning_rate": 2.0908168113789863e-06, "loss": 0.0202, "step": 219330 }, { "epoch": 0.9151847184785239, "grad_norm": 0.5894210951780647, "learning_rate": 2.0907929798169938e-06, "loss": 0.0221, "step": 219335 }, { "epoch": 0.9152055811935143, "grad_norm": 0.6812240153653835, "learning_rate": 2.0907691490698935e-06, "loss": 0.0243, "step": 219340 }, { "epoch": 0.9152264439085045, "grad_norm": 1.0903696089860364, "learning_rate": 2.0907453191376402e-06, "loss": 0.0207, "step": 219345 }, { "epoch": 0.9152473066234947, "grad_norm": 0.7826582723001726, "learning_rate": 2.0907214900201856e-06, "loss": 0.0284, "step": 219350 }, { "epoch": 0.915268169338485, "grad_norm": 0.421222457518449, "learning_rate": 2.0906976617174856e-06, "loss": 0.013, "step": 219355 }, { "epoch": 0.9152890320534753, "grad_norm": 0.48412323589224016, "learning_rate": 2.0906738342294916e-06, "loss": 0.0194, "step": 219360 }, { "epoch": 0.9153098947684656, "grad_norm": 1.0338385743931882, "learning_rate": 2.090650007556158e-06, "loss": 0.0297, "step": 219365 }, { "epoch": 0.9153307574834558, "grad_norm": 0.5091109446774914, "learning_rate": 2.0906261816974392e-06, "loss": 0.0253, "step": 219370 }, { "epoch": 0.9153516201984462, "grad_norm": 0.6473374432178925, "learning_rate": 2.0906023566532877e-06, "loss": 0.0185, "step": 219375 }, { "epoch": 0.9153724829134364, "grad_norm": 0.7513227520288523, "learning_rate": 2.090578532423657e-06, "loss": 0.0223, "step": 219380 }, { "epoch": 0.9153933456284267, "grad_norm": 0.7033800702074567, "learning_rate": 2.090554709008501e-06, "loss": 0.019, "step": 219385 }, { "epoch": 0.915414208343417, "grad_norm": 0.5037907222777342, "learning_rate": 2.090530886407774e-06, "loss": 0.0198, "step": 219390 }, { "epoch": 0.9154350710584073, "grad_norm": 0.9647616134082605, "learning_rate": 2.0905070646214295e-06, "loss": 0.0274, "step": 219395 }, { "epoch": 0.9154559337733975, "grad_norm": 0.365703219083298, "learning_rate": 2.0904832436494198e-06, "loss": 0.031, "step": 219400 }, { "epoch": 0.9154767964883879, "grad_norm": 0.39849249935771386, "learning_rate": 2.0904594234916993e-06, "loss": 0.0236, "step": 219405 }, { "epoch": 0.9154976592033781, "grad_norm": 1.1756102338362322, "learning_rate": 2.0904356041482214e-06, "loss": 0.016, "step": 219410 }, { "epoch": 0.9155185219183684, "grad_norm": 0.45548014184679037, "learning_rate": 2.0904117856189403e-06, "loss": 0.0232, "step": 219415 }, { "epoch": 0.9155393846333586, "grad_norm": 0.2721934324537343, "learning_rate": 2.090387967903809e-06, "loss": 0.0163, "step": 219420 }, { "epoch": 0.915560247348349, "grad_norm": 0.7781012901965983, "learning_rate": 2.0903641510027817e-06, "loss": 0.0189, "step": 219425 }, { "epoch": 0.9155811100633392, "grad_norm": 0.5787562857544732, "learning_rate": 2.0903403349158113e-06, "loss": 0.0258, "step": 219430 }, { "epoch": 0.9156019727783294, "grad_norm": 0.7070208678839045, "learning_rate": 2.090316519642852e-06, "loss": 0.0232, "step": 219435 }, { "epoch": 0.9156228354933198, "grad_norm": 0.988000849257264, "learning_rate": 2.0902927051838572e-06, "loss": 0.0235, "step": 219440 }, { "epoch": 0.91564369820831, "grad_norm": 0.5200462483310972, "learning_rate": 2.0902688915387808e-06, "loss": 0.0148, "step": 219445 }, { "epoch": 0.9156645609233003, "grad_norm": 0.6704148563808136, "learning_rate": 2.0902450787075755e-06, "loss": 0.016, "step": 219450 }, { "epoch": 0.9156854236382906, "grad_norm": 0.8692139399791305, "learning_rate": 2.090221266690197e-06, "loss": 0.0152, "step": 219455 }, { "epoch": 0.9157062863532809, "grad_norm": 2.024869781809837, "learning_rate": 2.0901974554865964e-06, "loss": 0.0284, "step": 219460 }, { "epoch": 0.9157271490682711, "grad_norm": 0.5789658755097302, "learning_rate": 2.090173645096729e-06, "loss": 0.0172, "step": 219465 }, { "epoch": 0.9157480117832614, "grad_norm": 0.5287514694984572, "learning_rate": 2.090149835520548e-06, "loss": 0.0147, "step": 219470 }, { "epoch": 0.9157688744982517, "grad_norm": 0.8087462832760245, "learning_rate": 2.090126026758007e-06, "loss": 0.0189, "step": 219475 }, { "epoch": 0.915789737213242, "grad_norm": 0.5293237955952083, "learning_rate": 2.0901022188090595e-06, "loss": 0.0154, "step": 219480 }, { "epoch": 0.9158105999282322, "grad_norm": 0.44458669956213315, "learning_rate": 2.0900784116736595e-06, "loss": 0.0293, "step": 219485 }, { "epoch": 0.9158314626432226, "grad_norm": 0.5642243683933087, "learning_rate": 2.090054605351761e-06, "loss": 0.022, "step": 219490 }, { "epoch": 0.9158523253582128, "grad_norm": 0.48290749997722915, "learning_rate": 2.0900307998433165e-06, "loss": 0.02, "step": 219495 }, { "epoch": 0.9158731880732031, "grad_norm": 1.5171138271406046, "learning_rate": 2.090006995148281e-06, "loss": 0.0216, "step": 219500 }, { "epoch": 0.9158940507881934, "grad_norm": 0.46326120701420553, "learning_rate": 2.089983191266607e-06, "loss": 0.019, "step": 219505 }, { "epoch": 0.9159149135031837, "grad_norm": 0.66047387681684, "learning_rate": 2.0899593881982493e-06, "loss": 0.016, "step": 219510 }, { "epoch": 0.9159357762181739, "grad_norm": 0.8439102391295021, "learning_rate": 2.089935585943161e-06, "loss": 0.0156, "step": 219515 }, { "epoch": 0.9159566389331643, "grad_norm": 0.546668419551229, "learning_rate": 2.0899117845012958e-06, "loss": 0.0183, "step": 219520 }, { "epoch": 0.9159775016481545, "grad_norm": 0.944375087829558, "learning_rate": 2.0898879838726072e-06, "loss": 0.0219, "step": 219525 }, { "epoch": 0.9159983643631447, "grad_norm": 0.16535470361403667, "learning_rate": 2.0898641840570496e-06, "loss": 0.0131, "step": 219530 }, { "epoch": 0.916019227078135, "grad_norm": 0.5512465805200943, "learning_rate": 2.0898403850545758e-06, "loss": 0.0197, "step": 219535 }, { "epoch": 0.9160400897931253, "grad_norm": 1.5656121425889025, "learning_rate": 2.0898165868651397e-06, "loss": 0.0295, "step": 219540 }, { "epoch": 0.9160609525081156, "grad_norm": 0.45615602956667733, "learning_rate": 2.0897927894886955e-06, "loss": 0.0185, "step": 219545 }, { "epoch": 0.9160818152231058, "grad_norm": 0.42972346376264997, "learning_rate": 2.0897689929251968e-06, "loss": 0.0172, "step": 219550 }, { "epoch": 0.9161026779380962, "grad_norm": 0.6044586725897815, "learning_rate": 2.0897451971745972e-06, "loss": 0.0192, "step": 219555 }, { "epoch": 0.9161235406530864, "grad_norm": 0.5566721415241687, "learning_rate": 2.0897214022368503e-06, "loss": 0.0188, "step": 219560 }, { "epoch": 0.9161444033680767, "grad_norm": 2.268466195231622, "learning_rate": 2.08969760811191e-06, "loss": 0.0251, "step": 219565 }, { "epoch": 0.916165266083067, "grad_norm": 0.7034997213038406, "learning_rate": 2.0896738147997293e-06, "loss": 0.0224, "step": 219570 }, { "epoch": 0.9161861287980573, "grad_norm": 1.0824910686590845, "learning_rate": 2.0896500223002637e-06, "loss": 0.0228, "step": 219575 }, { "epoch": 0.9162069915130475, "grad_norm": 0.47604902319797754, "learning_rate": 2.089626230613465e-06, "loss": 0.0146, "step": 219580 }, { "epoch": 0.9162278542280379, "grad_norm": 0.745861690265443, "learning_rate": 2.089602439739288e-06, "loss": 0.0167, "step": 219585 }, { "epoch": 0.9162487169430281, "grad_norm": 3.330527126843668, "learning_rate": 2.089578649677686e-06, "loss": 0.0188, "step": 219590 }, { "epoch": 0.9162695796580184, "grad_norm": 0.678308133518954, "learning_rate": 2.0895548604286135e-06, "loss": 0.0184, "step": 219595 }, { "epoch": 0.9162904423730086, "grad_norm": 0.9101241980488967, "learning_rate": 2.0895310719920228e-06, "loss": 0.0178, "step": 219600 }, { "epoch": 0.916311305087999, "grad_norm": 0.6356233386918391, "learning_rate": 2.089507284367869e-06, "loss": 0.0214, "step": 219605 }, { "epoch": 0.9163321678029892, "grad_norm": 0.36925403694059433, "learning_rate": 2.0894834975561053e-06, "loss": 0.0223, "step": 219610 }, { "epoch": 0.9163530305179794, "grad_norm": 0.6975107709595196, "learning_rate": 2.089459711556686e-06, "loss": 0.0234, "step": 219615 }, { "epoch": 0.9163738932329698, "grad_norm": 0.5443780219142079, "learning_rate": 2.0894359263695634e-06, "loss": 0.0137, "step": 219620 }, { "epoch": 0.91639475594796, "grad_norm": 0.33177174084762623, "learning_rate": 2.089412141994693e-06, "loss": 0.0162, "step": 219625 }, { "epoch": 0.9164156186629503, "grad_norm": 0.6332022164619343, "learning_rate": 2.089388358432028e-06, "loss": 0.0166, "step": 219630 }, { "epoch": 0.9164364813779406, "grad_norm": 1.0056501062194958, "learning_rate": 2.0893645756815216e-06, "loss": 0.0162, "step": 219635 }, { "epoch": 0.9164573440929309, "grad_norm": 0.3757543112971427, "learning_rate": 2.089340793743128e-06, "loss": 0.0215, "step": 219640 }, { "epoch": 0.9164782068079211, "grad_norm": 0.5537996774450722, "learning_rate": 2.0893170126168013e-06, "loss": 0.0128, "step": 219645 }, { "epoch": 0.9164990695229114, "grad_norm": 0.7135509192715391, "learning_rate": 2.0892932323024946e-06, "loss": 0.0183, "step": 219650 }, { "epoch": 0.9165199322379017, "grad_norm": 0.6823219860135701, "learning_rate": 2.0892694528001626e-06, "loss": 0.0153, "step": 219655 }, { "epoch": 0.916540794952892, "grad_norm": 0.36822257396181307, "learning_rate": 2.0892456741097582e-06, "loss": 0.0333, "step": 219660 }, { "epoch": 0.9165616576678822, "grad_norm": 0.46069034436743644, "learning_rate": 2.0892218962312354e-06, "loss": 0.0208, "step": 219665 }, { "epoch": 0.9165825203828726, "grad_norm": 0.6874470631362468, "learning_rate": 2.0891981191645487e-06, "loss": 0.0231, "step": 219670 }, { "epoch": 0.9166033830978628, "grad_norm": 1.207535475609185, "learning_rate": 2.0891743429096504e-06, "loss": 0.0197, "step": 219675 }, { "epoch": 0.9166242458128531, "grad_norm": 1.8108345489635307, "learning_rate": 2.0891505674664964e-06, "loss": 0.022, "step": 219680 }, { "epoch": 0.9166451085278434, "grad_norm": 0.966177507950212, "learning_rate": 2.0891267928350384e-06, "loss": 0.0213, "step": 219685 }, { "epoch": 0.9166659712428337, "grad_norm": 1.0207234785826202, "learning_rate": 2.089103019015232e-06, "loss": 0.0581, "step": 219690 }, { "epoch": 0.9166868339578239, "grad_norm": 0.8566695711217107, "learning_rate": 2.08907924600703e-06, "loss": 0.0208, "step": 219695 }, { "epoch": 0.9167076966728143, "grad_norm": 0.5414847788931533, "learning_rate": 2.089055473810386e-06, "loss": 0.0177, "step": 219700 }, { "epoch": 0.9167285593878045, "grad_norm": 0.3930958862585811, "learning_rate": 2.0890317024252544e-06, "loss": 0.0155, "step": 219705 }, { "epoch": 0.9167494221027948, "grad_norm": 0.7189481066744333, "learning_rate": 2.089007931851589e-06, "loss": 0.0222, "step": 219710 }, { "epoch": 0.916770284817785, "grad_norm": 0.6280674937154003, "learning_rate": 2.088984162089344e-06, "loss": 0.0211, "step": 219715 }, { "epoch": 0.9167911475327754, "grad_norm": 0.9336516185999926, "learning_rate": 2.088960393138472e-06, "loss": 0.0145, "step": 219720 }, { "epoch": 0.9168120102477656, "grad_norm": 0.951485889551695, "learning_rate": 2.0889366249989274e-06, "loss": 0.0205, "step": 219725 }, { "epoch": 0.9168328729627558, "grad_norm": 0.6362598516682608, "learning_rate": 2.0889128576706654e-06, "loss": 0.0165, "step": 219730 }, { "epoch": 0.9168537356777462, "grad_norm": 0.5553394516727558, "learning_rate": 2.088889091153638e-06, "loss": 0.0163, "step": 219735 }, { "epoch": 0.9168745983927364, "grad_norm": 0.3453250938133392, "learning_rate": 2.0888653254477996e-06, "loss": 0.0198, "step": 219740 }, { "epoch": 0.9168954611077267, "grad_norm": 0.37201601504215526, "learning_rate": 2.0888415605531043e-06, "loss": 0.0209, "step": 219745 }, { "epoch": 0.916916323822717, "grad_norm": 0.8977064766004496, "learning_rate": 2.088817796469506e-06, "loss": 0.0172, "step": 219750 }, { "epoch": 0.9169371865377073, "grad_norm": 0.6306371526318101, "learning_rate": 2.0887940331969585e-06, "loss": 0.0229, "step": 219755 }, { "epoch": 0.9169580492526975, "grad_norm": 0.7918879677769134, "learning_rate": 2.0887702707354153e-06, "loss": 0.0203, "step": 219760 }, { "epoch": 0.9169789119676879, "grad_norm": 0.47270762581733095, "learning_rate": 2.088746509084831e-06, "loss": 0.0121, "step": 219765 }, { "epoch": 0.9169997746826781, "grad_norm": 0.5815875442002798, "learning_rate": 2.0887227482451584e-06, "loss": 0.0222, "step": 219770 }, { "epoch": 0.9170206373976684, "grad_norm": 0.9215202263956233, "learning_rate": 2.0886989882163523e-06, "loss": 0.0223, "step": 219775 }, { "epoch": 0.9170415001126586, "grad_norm": 0.726532187865753, "learning_rate": 2.0886752289983663e-06, "loss": 0.0212, "step": 219780 }, { "epoch": 0.917062362827649, "grad_norm": 0.5152876976031251, "learning_rate": 2.0886514705911544e-06, "loss": 0.0161, "step": 219785 }, { "epoch": 0.9170832255426392, "grad_norm": 0.9526117369525896, "learning_rate": 2.0886277129946707e-06, "loss": 0.0145, "step": 219790 }, { "epoch": 0.9171040882576295, "grad_norm": 0.522905894730648, "learning_rate": 2.088603956208868e-06, "loss": 0.024, "step": 219795 }, { "epoch": 0.9171249509726198, "grad_norm": 0.30031788414499583, "learning_rate": 2.0885802002337014e-06, "loss": 0.0159, "step": 219800 }, { "epoch": 0.9171458136876101, "grad_norm": 1.530088482520744, "learning_rate": 2.0885564450691243e-06, "loss": 0.0273, "step": 219805 }, { "epoch": 0.9171666764026003, "grad_norm": 0.6040863362073516, "learning_rate": 2.088532690715091e-06, "loss": 0.0179, "step": 219810 }, { "epoch": 0.9171875391175907, "grad_norm": 0.5112456578970638, "learning_rate": 2.0885089371715544e-06, "loss": 0.0181, "step": 219815 }, { "epoch": 0.9172084018325809, "grad_norm": 0.7591688103795473, "learning_rate": 2.088485184438469e-06, "loss": 0.0201, "step": 219820 }, { "epoch": 0.9172292645475711, "grad_norm": 0.5185689854836453, "learning_rate": 2.0884614325157896e-06, "loss": 0.0168, "step": 219825 }, { "epoch": 0.9172501272625614, "grad_norm": 0.29153417242195345, "learning_rate": 2.0884376814034685e-06, "loss": 0.0166, "step": 219830 }, { "epoch": 0.9172709899775517, "grad_norm": 0.8479999557924189, "learning_rate": 2.088413931101461e-06, "loss": 0.0176, "step": 219835 }, { "epoch": 0.917291852692542, "grad_norm": 0.5413111170236051, "learning_rate": 2.0883901816097206e-06, "loss": 0.0179, "step": 219840 }, { "epoch": 0.9173127154075322, "grad_norm": 0.5448820380193502, "learning_rate": 2.0883664329282003e-06, "loss": 0.0253, "step": 219845 }, { "epoch": 0.9173335781225226, "grad_norm": 0.5724675017569459, "learning_rate": 2.0883426850568555e-06, "loss": 0.0161, "step": 219850 }, { "epoch": 0.9173544408375128, "grad_norm": 0.7575220930943026, "learning_rate": 2.0883189379956393e-06, "loss": 0.0253, "step": 219855 }, { "epoch": 0.9173753035525031, "grad_norm": 1.1511340233902416, "learning_rate": 2.0882951917445055e-06, "loss": 0.0243, "step": 219860 }, { "epoch": 0.9173961662674934, "grad_norm": 0.5657455885506838, "learning_rate": 2.088271446303409e-06, "loss": 0.0248, "step": 219865 }, { "epoch": 0.9174170289824837, "grad_norm": 0.5986947502590612, "learning_rate": 2.0882477016723026e-06, "loss": 0.0251, "step": 219870 }, { "epoch": 0.9174378916974739, "grad_norm": 0.5302094584883811, "learning_rate": 2.088223957851141e-06, "loss": 0.0191, "step": 219875 }, { "epoch": 0.9174587544124643, "grad_norm": 0.5589651253644238, "learning_rate": 2.0882002148398775e-06, "loss": 0.02, "step": 219880 }, { "epoch": 0.9174796171274545, "grad_norm": 0.7848422055396799, "learning_rate": 2.0881764726384667e-06, "loss": 0.021, "step": 219885 }, { "epoch": 0.9175004798424448, "grad_norm": 0.6316490265156696, "learning_rate": 2.088152731246862e-06, "loss": 0.018, "step": 219890 }, { "epoch": 0.917521342557435, "grad_norm": 1.26583544306326, "learning_rate": 2.088128990665018e-06, "loss": 0.02, "step": 219895 }, { "epoch": 0.9175422052724254, "grad_norm": 0.5630652535823569, "learning_rate": 2.0881052508928885e-06, "loss": 0.0266, "step": 219900 }, { "epoch": 0.9175630679874156, "grad_norm": 0.592301140588801, "learning_rate": 2.088081511930427e-06, "loss": 0.0215, "step": 219905 }, { "epoch": 0.9175839307024058, "grad_norm": 0.5684284195753012, "learning_rate": 2.088057773777588e-06, "loss": 0.0263, "step": 219910 }, { "epoch": 0.9176047934173962, "grad_norm": 0.623210082535712, "learning_rate": 2.088034036434325e-06, "loss": 0.0138, "step": 219915 }, { "epoch": 0.9176256561323864, "grad_norm": 0.4495221859443021, "learning_rate": 2.088010299900593e-06, "loss": 0.0114, "step": 219920 }, { "epoch": 0.9176465188473767, "grad_norm": 0.7619199436917142, "learning_rate": 2.0879865641763443e-06, "loss": 0.0166, "step": 219925 }, { "epoch": 0.917667381562367, "grad_norm": 1.0522362378291397, "learning_rate": 2.087962829261534e-06, "loss": 0.0189, "step": 219930 }, { "epoch": 0.9176882442773573, "grad_norm": 0.45689941700301495, "learning_rate": 2.0879390951561166e-06, "loss": 0.0189, "step": 219935 }, { "epoch": 0.9177091069923475, "grad_norm": 0.9515347252461291, "learning_rate": 2.087915361860045e-06, "loss": 0.0117, "step": 219940 }, { "epoch": 0.9177299697073379, "grad_norm": 0.29303068742606136, "learning_rate": 2.087891629373274e-06, "loss": 0.0127, "step": 219945 }, { "epoch": 0.9177508324223281, "grad_norm": 0.8592089287514779, "learning_rate": 2.0878678976957565e-06, "loss": 0.0212, "step": 219950 }, { "epoch": 0.9177716951373184, "grad_norm": 0.5127188584390547, "learning_rate": 2.087844166827448e-06, "loss": 0.0144, "step": 219955 }, { "epoch": 0.9177925578523086, "grad_norm": 0.6474198746568952, "learning_rate": 2.0878204367683014e-06, "loss": 0.0176, "step": 219960 }, { "epoch": 0.917813420567299, "grad_norm": 0.5896214338888679, "learning_rate": 2.0877967075182713e-06, "loss": 0.0231, "step": 219965 }, { "epoch": 0.9178342832822892, "grad_norm": 0.14928433074214192, "learning_rate": 2.0877729790773113e-06, "loss": 0.023, "step": 219970 }, { "epoch": 0.9178551459972795, "grad_norm": 0.5998248614283065, "learning_rate": 2.087749251445376e-06, "loss": 0.0237, "step": 219975 }, { "epoch": 0.9178760087122698, "grad_norm": 0.6287528164098584, "learning_rate": 2.0877255246224186e-06, "loss": 0.0238, "step": 219980 }, { "epoch": 0.9178968714272601, "grad_norm": 0.3348952407312736, "learning_rate": 2.087701798608393e-06, "loss": 0.0195, "step": 219985 }, { "epoch": 0.9179177341422503, "grad_norm": 0.39350498257452776, "learning_rate": 2.087678073403255e-06, "loss": 0.0236, "step": 219990 }, { "epoch": 0.9179385968572407, "grad_norm": 0.95929936392018, "learning_rate": 2.0876543490069574e-06, "loss": 0.0185, "step": 219995 }, { "epoch": 0.9179594595722309, "grad_norm": 0.7255472379259419, "learning_rate": 2.087630625419454e-06, "loss": 0.0291, "step": 220000 }, { "epoch": 0.9179803222872212, "grad_norm": 0.5120170934484329, "learning_rate": 2.087606902640699e-06, "loss": 0.0153, "step": 220005 }, { "epoch": 0.9180011850022114, "grad_norm": 0.6408483105826872, "learning_rate": 2.0875831806706465e-06, "loss": 0.0225, "step": 220010 }, { "epoch": 0.9180220477172018, "grad_norm": 0.43894324194202994, "learning_rate": 2.087559459509251e-06, "loss": 0.0166, "step": 220015 }, { "epoch": 0.918042910432192, "grad_norm": 0.40847250053774437, "learning_rate": 2.087535739156466e-06, "loss": 0.0169, "step": 220020 }, { "epoch": 0.9180637731471822, "grad_norm": 0.7429262979250749, "learning_rate": 2.087512019612246e-06, "loss": 0.0224, "step": 220025 }, { "epoch": 0.9180846358621726, "grad_norm": 0.8698386050452507, "learning_rate": 2.0874883008765447e-06, "loss": 0.0187, "step": 220030 }, { "epoch": 0.9181054985771628, "grad_norm": 0.45321694727939793, "learning_rate": 2.0874645829493164e-06, "loss": 0.0191, "step": 220035 }, { "epoch": 0.9181263612921531, "grad_norm": 0.9955226669574129, "learning_rate": 2.087440865830515e-06, "loss": 0.022, "step": 220040 }, { "epoch": 0.9181472240071434, "grad_norm": 0.379079322920518, "learning_rate": 2.0874171495200947e-06, "loss": 0.0206, "step": 220045 }, { "epoch": 0.9181680867221337, "grad_norm": 0.947128050650773, "learning_rate": 2.0873934340180094e-06, "loss": 0.0176, "step": 220050 }, { "epoch": 0.9181889494371239, "grad_norm": 0.8299849630270156, "learning_rate": 2.0873697193242134e-06, "loss": 0.0228, "step": 220055 }, { "epoch": 0.9182098121521143, "grad_norm": 0.5943541217408395, "learning_rate": 2.087346005438661e-06, "loss": 0.021, "step": 220060 }, { "epoch": 0.9182306748671045, "grad_norm": 0.8439499826564304, "learning_rate": 2.0873222923613056e-06, "loss": 0.0219, "step": 220065 }, { "epoch": 0.9182515375820948, "grad_norm": 0.4327442053729551, "learning_rate": 2.0872985800921015e-06, "loss": 0.0123, "step": 220070 }, { "epoch": 0.918272400297085, "grad_norm": 0.6181879323066094, "learning_rate": 2.0872748686310033e-06, "loss": 0.0168, "step": 220075 }, { "epoch": 0.9182932630120754, "grad_norm": 0.754264056859888, "learning_rate": 2.087251157977965e-06, "loss": 0.0213, "step": 220080 }, { "epoch": 0.9183141257270656, "grad_norm": 0.9304368130323748, "learning_rate": 2.0872274481329403e-06, "loss": 0.0172, "step": 220085 }, { "epoch": 0.9183349884420559, "grad_norm": 1.1237466920286678, "learning_rate": 2.0872037390958835e-06, "loss": 0.0209, "step": 220090 }, { "epoch": 0.9183558511570462, "grad_norm": 1.4548343476461545, "learning_rate": 2.0871800308667484e-06, "loss": 0.0231, "step": 220095 }, { "epoch": 0.9183767138720365, "grad_norm": 1.315353316659954, "learning_rate": 2.08715632344549e-06, "loss": 0.0258, "step": 220100 }, { "epoch": 0.9183975765870267, "grad_norm": 0.6702210264553571, "learning_rate": 2.0871326168320613e-06, "loss": 0.0238, "step": 220105 }, { "epoch": 0.918418439302017, "grad_norm": 0.432387787144673, "learning_rate": 2.0871089110264174e-06, "loss": 0.0175, "step": 220110 }, { "epoch": 0.9184393020170073, "grad_norm": 0.41185796857959955, "learning_rate": 2.0870852060285118e-06, "loss": 0.0227, "step": 220115 }, { "epoch": 0.9184601647319975, "grad_norm": 0.6612322392965427, "learning_rate": 2.087061501838299e-06, "loss": 0.0261, "step": 220120 }, { "epoch": 0.9184810274469879, "grad_norm": 0.49167687453686654, "learning_rate": 2.0870377984557327e-06, "loss": 0.019, "step": 220125 }, { "epoch": 0.9185018901619781, "grad_norm": 0.5325075501734996, "learning_rate": 2.0870140958807674e-06, "loss": 0.0184, "step": 220130 }, { "epoch": 0.9185227528769684, "grad_norm": 0.492581428486557, "learning_rate": 2.0869903941133573e-06, "loss": 0.0205, "step": 220135 }, { "epoch": 0.9185436155919586, "grad_norm": 0.318800651754366, "learning_rate": 2.0869666931534564e-06, "loss": 0.0221, "step": 220140 }, { "epoch": 0.918564478306949, "grad_norm": 0.5358926220494167, "learning_rate": 2.0869429930010184e-06, "loss": 0.0178, "step": 220145 }, { "epoch": 0.9185853410219392, "grad_norm": 1.1677811897636017, "learning_rate": 2.0869192936559985e-06, "loss": 0.0195, "step": 220150 }, { "epoch": 0.9186062037369295, "grad_norm": 0.26289394291126317, "learning_rate": 2.0868955951183497e-06, "loss": 0.018, "step": 220155 }, { "epoch": 0.9186270664519198, "grad_norm": 0.7446131161205195, "learning_rate": 2.0868718973880274e-06, "loss": 0.0223, "step": 220160 }, { "epoch": 0.9186479291669101, "grad_norm": 1.0901574516691934, "learning_rate": 2.0868482004649843e-06, "loss": 0.0296, "step": 220165 }, { "epoch": 0.9186687918819003, "grad_norm": 0.515702304719743, "learning_rate": 2.086824504349176e-06, "loss": 0.0175, "step": 220170 }, { "epoch": 0.9186896545968907, "grad_norm": 0.5700657934631536, "learning_rate": 2.0868008090405555e-06, "loss": 0.0193, "step": 220175 }, { "epoch": 0.9187105173118809, "grad_norm": 0.372451034551102, "learning_rate": 2.0867771145390775e-06, "loss": 0.0135, "step": 220180 }, { "epoch": 0.9187313800268712, "grad_norm": 0.875570449235412, "learning_rate": 2.086753420844696e-06, "loss": 0.0208, "step": 220185 }, { "epoch": 0.9187522427418614, "grad_norm": 0.6294084176376953, "learning_rate": 2.086729727957366e-06, "loss": 0.0175, "step": 220190 }, { "epoch": 0.9187731054568518, "grad_norm": 0.42377694283721523, "learning_rate": 2.0867060358770406e-06, "loss": 0.0223, "step": 220195 }, { "epoch": 0.918793968171842, "grad_norm": 1.114517541602883, "learning_rate": 2.0866823446036742e-06, "loss": 0.021, "step": 220200 }, { "epoch": 0.9188148308868322, "grad_norm": 0.9913518378789432, "learning_rate": 2.0866586541372215e-06, "loss": 0.0219, "step": 220205 }, { "epoch": 0.9188356936018226, "grad_norm": 0.44333338318783777, "learning_rate": 2.0866349644776366e-06, "loss": 0.0161, "step": 220210 }, { "epoch": 0.9188565563168128, "grad_norm": 0.43511318218454065, "learning_rate": 2.086611275624873e-06, "loss": 0.0188, "step": 220215 }, { "epoch": 0.9188774190318031, "grad_norm": 0.27929707395484277, "learning_rate": 2.0865875875788856e-06, "loss": 0.0196, "step": 220220 }, { "epoch": 0.9188982817467934, "grad_norm": 0.25838416211344484, "learning_rate": 2.086563900339629e-06, "loss": 0.021, "step": 220225 }, { "epoch": 0.9189191444617837, "grad_norm": 0.9663152779637162, "learning_rate": 2.0865402139070557e-06, "loss": 0.0304, "step": 220230 }, { "epoch": 0.9189400071767739, "grad_norm": 0.5174294489133537, "learning_rate": 2.086516528281122e-06, "loss": 0.0238, "step": 220235 }, { "epoch": 0.9189608698917643, "grad_norm": 0.41929153081244974, "learning_rate": 2.0864928434617806e-06, "loss": 0.0385, "step": 220240 }, { "epoch": 0.9189817326067545, "grad_norm": 0.4408457757726145, "learning_rate": 2.0864691594489866e-06, "loss": 0.0178, "step": 220245 }, { "epoch": 0.9190025953217448, "grad_norm": 1.1943256066573327, "learning_rate": 2.0864454762426934e-06, "loss": 0.0173, "step": 220250 }, { "epoch": 0.919023458036735, "grad_norm": 0.640765426804296, "learning_rate": 2.086421793842856e-06, "loss": 0.0243, "step": 220255 }, { "epoch": 0.9190443207517254, "grad_norm": 0.6277642272831294, "learning_rate": 2.0863981122494287e-06, "loss": 0.0153, "step": 220260 }, { "epoch": 0.9190651834667156, "grad_norm": 0.5248439057454989, "learning_rate": 2.0863744314623654e-06, "loss": 0.0166, "step": 220265 }, { "epoch": 0.9190860461817059, "grad_norm": 0.6165186679520477, "learning_rate": 2.0863507514816193e-06, "loss": 0.0138, "step": 220270 }, { "epoch": 0.9191069088966962, "grad_norm": 0.41784748177585596, "learning_rate": 2.0863270723071466e-06, "loss": 0.0234, "step": 220275 }, { "epoch": 0.9191277716116865, "grad_norm": 0.6175069502545721, "learning_rate": 2.0863033939389006e-06, "loss": 0.0274, "step": 220280 }, { "epoch": 0.9191486343266767, "grad_norm": 0.4995244837672671, "learning_rate": 2.0862797163768356e-06, "loss": 0.0207, "step": 220285 }, { "epoch": 0.9191694970416671, "grad_norm": 0.5149504476888684, "learning_rate": 2.0862560396209054e-06, "loss": 0.0137, "step": 220290 }, { "epoch": 0.9191903597566573, "grad_norm": 0.7045497787627826, "learning_rate": 2.086232363671065e-06, "loss": 0.0169, "step": 220295 }, { "epoch": 0.9192112224716475, "grad_norm": 0.6021329735426147, "learning_rate": 2.0862086885272685e-06, "loss": 0.0206, "step": 220300 }, { "epoch": 0.9192320851866378, "grad_norm": 0.5000135478140135, "learning_rate": 2.0861850141894695e-06, "loss": 0.0168, "step": 220305 }, { "epoch": 0.9192529479016281, "grad_norm": 0.5377761848415538, "learning_rate": 2.0861613406576232e-06, "loss": 0.0232, "step": 220310 }, { "epoch": 0.9192738106166184, "grad_norm": 0.7291597506522616, "learning_rate": 2.0861376679316835e-06, "loss": 0.018, "step": 220315 }, { "epoch": 0.9192946733316086, "grad_norm": 0.452658039518828, "learning_rate": 2.086113996011604e-06, "loss": 0.0238, "step": 220320 }, { "epoch": 0.919315536046599, "grad_norm": 0.7930062386357424, "learning_rate": 2.0860903248973402e-06, "loss": 0.0216, "step": 220325 }, { "epoch": 0.9193363987615892, "grad_norm": 0.599735142794903, "learning_rate": 2.0860666545888456e-06, "loss": 0.0162, "step": 220330 }, { "epoch": 0.9193572614765795, "grad_norm": 0.5914911940596977, "learning_rate": 2.086042985086075e-06, "loss": 0.0175, "step": 220335 }, { "epoch": 0.9193781241915698, "grad_norm": 0.9144465075988679, "learning_rate": 2.086019316388982e-06, "loss": 0.0179, "step": 220340 }, { "epoch": 0.9193989869065601, "grad_norm": 0.39832290014544597, "learning_rate": 2.0859956484975212e-06, "loss": 0.0147, "step": 220345 }, { "epoch": 0.9194198496215503, "grad_norm": 1.353025870097635, "learning_rate": 2.0859719814116473e-06, "loss": 0.0259, "step": 220350 }, { "epoch": 0.9194407123365407, "grad_norm": 0.8130657676171681, "learning_rate": 2.085948315131314e-06, "loss": 0.0311, "step": 220355 }, { "epoch": 0.9194615750515309, "grad_norm": 0.9056991184084989, "learning_rate": 2.0859246496564757e-06, "loss": 0.0247, "step": 220360 }, { "epoch": 0.9194824377665212, "grad_norm": 0.9544472012608399, "learning_rate": 2.0859009849870877e-06, "loss": 0.0262, "step": 220365 }, { "epoch": 0.9195033004815114, "grad_norm": 0.6134853649450868, "learning_rate": 2.0858773211231027e-06, "loss": 0.0193, "step": 220370 }, { "epoch": 0.9195241631965018, "grad_norm": 0.45162784052375626, "learning_rate": 2.0858536580644758e-06, "loss": 0.0204, "step": 220375 }, { "epoch": 0.919545025911492, "grad_norm": 0.3255310231663193, "learning_rate": 2.0858299958111616e-06, "loss": 0.0161, "step": 220380 }, { "epoch": 0.9195658886264823, "grad_norm": 0.721162095973577, "learning_rate": 2.0858063343631136e-06, "loss": 0.0197, "step": 220385 }, { "epoch": 0.9195867513414726, "grad_norm": 0.7953934939576446, "learning_rate": 2.0857826737202877e-06, "loss": 0.019, "step": 220390 }, { "epoch": 0.9196076140564629, "grad_norm": 1.1351639267740832, "learning_rate": 2.0857590138826366e-06, "loss": 0.0233, "step": 220395 }, { "epoch": 0.9196284767714531, "grad_norm": 0.6012695713143301, "learning_rate": 2.0857353548501152e-06, "loss": 0.0182, "step": 220400 }, { "epoch": 0.9196493394864435, "grad_norm": 0.48331389290873306, "learning_rate": 2.085711696622678e-06, "loss": 0.0192, "step": 220405 }, { "epoch": 0.9196702022014337, "grad_norm": 0.890689085607678, "learning_rate": 2.0856880392002794e-06, "loss": 0.0259, "step": 220410 }, { "epoch": 0.9196910649164239, "grad_norm": 0.49150244534064824, "learning_rate": 2.085664382582873e-06, "loss": 0.0205, "step": 220415 }, { "epoch": 0.9197119276314143, "grad_norm": 0.5775724473997194, "learning_rate": 2.085640726770414e-06, "loss": 0.0166, "step": 220420 }, { "epoch": 0.9197327903464045, "grad_norm": 0.9433027788697226, "learning_rate": 2.0856170717628567e-06, "loss": 0.0177, "step": 220425 }, { "epoch": 0.9197536530613948, "grad_norm": 0.5813308652533853, "learning_rate": 2.085593417560155e-06, "loss": 0.0184, "step": 220430 }, { "epoch": 0.919774515776385, "grad_norm": 0.40337134379020023, "learning_rate": 2.0855697641622636e-06, "loss": 0.0156, "step": 220435 }, { "epoch": 0.9197953784913754, "grad_norm": 1.0832964222648707, "learning_rate": 2.0855461115691365e-06, "loss": 0.0236, "step": 220440 }, { "epoch": 0.9198162412063656, "grad_norm": 0.4496080118965932, "learning_rate": 2.0855224597807285e-06, "loss": 0.0189, "step": 220445 }, { "epoch": 0.9198371039213559, "grad_norm": 0.6169293710308058, "learning_rate": 2.0854988087969933e-06, "loss": 0.0144, "step": 220450 }, { "epoch": 0.9198579666363462, "grad_norm": 0.33594205829406326, "learning_rate": 2.0854751586178865e-06, "loss": 0.0179, "step": 220455 }, { "epoch": 0.9198788293513365, "grad_norm": 1.028976926502827, "learning_rate": 2.0854515092433614e-06, "loss": 0.0281, "step": 220460 }, { "epoch": 0.9198996920663267, "grad_norm": 0.4042982463770888, "learning_rate": 2.085427860673373e-06, "loss": 0.0215, "step": 220465 }, { "epoch": 0.9199205547813171, "grad_norm": 1.1322638872649138, "learning_rate": 2.085404212907875e-06, "loss": 0.0278, "step": 220470 }, { "epoch": 0.9199414174963073, "grad_norm": 0.5661711971740785, "learning_rate": 2.085380565946822e-06, "loss": 0.0213, "step": 220475 }, { "epoch": 0.9199622802112976, "grad_norm": 0.5021591178852962, "learning_rate": 2.0853569197901687e-06, "loss": 0.0204, "step": 220480 }, { "epoch": 0.9199831429262878, "grad_norm": 0.46926708135348394, "learning_rate": 2.08533327443787e-06, "loss": 0.0172, "step": 220485 }, { "epoch": 0.9200040056412782, "grad_norm": 0.5903122107942661, "learning_rate": 2.0853096298898787e-06, "loss": 0.0198, "step": 220490 }, { "epoch": 0.9200248683562684, "grad_norm": 0.7901607399757669, "learning_rate": 2.085285986146151e-06, "loss": 0.0217, "step": 220495 }, { "epoch": 0.9200457310712586, "grad_norm": 0.9856874589223572, "learning_rate": 2.08526234320664e-06, "loss": 0.0238, "step": 220500 }, { "epoch": 0.920066593786249, "grad_norm": 0.7478632433181119, "learning_rate": 2.0852387010713007e-06, "loss": 0.0188, "step": 220505 }, { "epoch": 0.9200874565012392, "grad_norm": 0.7749409244965314, "learning_rate": 2.085215059740087e-06, "loss": 0.0149, "step": 220510 }, { "epoch": 0.9201083192162295, "grad_norm": 0.8612833732723673, "learning_rate": 2.0851914192129545e-06, "loss": 0.0155, "step": 220515 }, { "epoch": 0.9201291819312198, "grad_norm": 0.5519755343190604, "learning_rate": 2.0851677794898566e-06, "loss": 0.0174, "step": 220520 }, { "epoch": 0.9201500446462101, "grad_norm": 0.8737015621302868, "learning_rate": 2.085144140570748e-06, "loss": 0.0167, "step": 220525 }, { "epoch": 0.9201709073612003, "grad_norm": 1.126820579271695, "learning_rate": 2.085120502455583e-06, "loss": 0.024, "step": 220530 }, { "epoch": 0.9201917700761907, "grad_norm": 0.704923688710247, "learning_rate": 2.085096865144316e-06, "loss": 0.0211, "step": 220535 }, { "epoch": 0.9202126327911809, "grad_norm": 0.466532666847226, "learning_rate": 2.0850732286369015e-06, "loss": 0.0185, "step": 220540 }, { "epoch": 0.9202334955061712, "grad_norm": 0.3809237703341932, "learning_rate": 2.085049592933294e-06, "loss": 0.0175, "step": 220545 }, { "epoch": 0.9202543582211614, "grad_norm": 0.7808161035063907, "learning_rate": 2.085025958033448e-06, "loss": 0.0172, "step": 220550 }, { "epoch": 0.9202752209361518, "grad_norm": 0.6710893443074427, "learning_rate": 2.0850023239373178e-06, "loss": 0.019, "step": 220555 }, { "epoch": 0.920296083651142, "grad_norm": 0.4408508957514919, "learning_rate": 2.0849786906448584e-06, "loss": 0.0221, "step": 220560 }, { "epoch": 0.9203169463661323, "grad_norm": 1.087069273479121, "learning_rate": 2.0849550581560232e-06, "loss": 0.0249, "step": 220565 }, { "epoch": 0.9203378090811226, "grad_norm": 0.4454741811430086, "learning_rate": 2.084931426470768e-06, "loss": 0.0274, "step": 220570 }, { "epoch": 0.9203586717961129, "grad_norm": 1.8009770323356984, "learning_rate": 2.0849077955890455e-06, "loss": 0.0237, "step": 220575 }, { "epoch": 0.9203795345111031, "grad_norm": 0.6779611893001496, "learning_rate": 2.0848841655108115e-06, "loss": 0.0188, "step": 220580 }, { "epoch": 0.9204003972260935, "grad_norm": 0.5566152149593239, "learning_rate": 2.084860536236021e-06, "loss": 0.0177, "step": 220585 }, { "epoch": 0.9204212599410837, "grad_norm": 0.9599588586934288, "learning_rate": 2.0848369077646265e-06, "loss": 0.018, "step": 220590 }, { "epoch": 0.920442122656074, "grad_norm": 0.5741383432387506, "learning_rate": 2.084813280096584e-06, "loss": 0.0173, "step": 220595 }, { "epoch": 0.9204629853710643, "grad_norm": 0.6039084314290298, "learning_rate": 2.0847896532318477e-06, "loss": 0.0202, "step": 220600 }, { "epoch": 0.9204838480860545, "grad_norm": 1.5669706451177807, "learning_rate": 2.084766027170372e-06, "loss": 0.0293, "step": 220605 }, { "epoch": 0.9205047108010448, "grad_norm": 0.36758358185697193, "learning_rate": 2.084742401912111e-06, "loss": 0.015, "step": 220610 }, { "epoch": 0.920525573516035, "grad_norm": 0.930969961596622, "learning_rate": 2.0847187774570195e-06, "loss": 0.0235, "step": 220615 }, { "epoch": 0.9205464362310254, "grad_norm": 0.5477627704987774, "learning_rate": 2.084695153805052e-06, "loss": 0.0166, "step": 220620 }, { "epoch": 0.9205672989460156, "grad_norm": 0.45432342096905964, "learning_rate": 2.0846715309561634e-06, "loss": 0.0232, "step": 220625 }, { "epoch": 0.9205881616610059, "grad_norm": 1.0300208495945258, "learning_rate": 2.0846479089103076e-06, "loss": 0.0281, "step": 220630 }, { "epoch": 0.9206090243759962, "grad_norm": 0.5064038804409885, "learning_rate": 2.0846242876674395e-06, "loss": 0.017, "step": 220635 }, { "epoch": 0.9206298870909865, "grad_norm": 0.658669554010078, "learning_rate": 2.0846006672275134e-06, "loss": 0.0199, "step": 220640 }, { "epoch": 0.9206507498059767, "grad_norm": 0.7375220023986746, "learning_rate": 2.0845770475904834e-06, "loss": 0.0139, "step": 220645 }, { "epoch": 0.9206716125209671, "grad_norm": 0.5965763337817026, "learning_rate": 2.084553428756305e-06, "loss": 0.0175, "step": 220650 }, { "epoch": 0.9206924752359573, "grad_norm": 0.6404507651271999, "learning_rate": 2.084529810724932e-06, "loss": 0.0227, "step": 220655 }, { "epoch": 0.9207133379509476, "grad_norm": 0.8653766464175132, "learning_rate": 2.0845061934963186e-06, "loss": 0.023, "step": 220660 }, { "epoch": 0.9207342006659378, "grad_norm": 0.6490120143622534, "learning_rate": 2.08448257707042e-06, "loss": 0.0189, "step": 220665 }, { "epoch": 0.9207550633809282, "grad_norm": 0.6178275254717289, "learning_rate": 2.0844589614471913e-06, "loss": 0.0201, "step": 220670 }, { "epoch": 0.9207759260959184, "grad_norm": 4.95333837205461, "learning_rate": 2.084435346626586e-06, "loss": 0.0193, "step": 220675 }, { "epoch": 0.9207967888109087, "grad_norm": 0.7201885675244224, "learning_rate": 2.084411732608558e-06, "loss": 0.0183, "step": 220680 }, { "epoch": 0.920817651525899, "grad_norm": 0.5746598033053582, "learning_rate": 2.0843881193930636e-06, "loss": 0.018, "step": 220685 }, { "epoch": 0.9208385142408892, "grad_norm": 0.38428702218192107, "learning_rate": 2.084364506980056e-06, "loss": 0.0148, "step": 220690 }, { "epoch": 0.9208593769558795, "grad_norm": 0.9568687452040354, "learning_rate": 2.084340895369491e-06, "loss": 0.0226, "step": 220695 }, { "epoch": 0.9208802396708698, "grad_norm": 0.3579592533437965, "learning_rate": 2.084317284561322e-06, "loss": 0.0225, "step": 220700 }, { "epoch": 0.9209011023858601, "grad_norm": 0.5904128889415791, "learning_rate": 2.084293674555504e-06, "loss": 0.0166, "step": 220705 }, { "epoch": 0.9209219651008503, "grad_norm": 0.492749340633785, "learning_rate": 2.084270065351991e-06, "loss": 0.0159, "step": 220710 }, { "epoch": 0.9209428278158407, "grad_norm": 0.42278404718777857, "learning_rate": 2.084246456950739e-06, "loss": 0.0156, "step": 220715 }, { "epoch": 0.9209636905308309, "grad_norm": 0.6778712539081096, "learning_rate": 2.084222849351701e-06, "loss": 0.0247, "step": 220720 }, { "epoch": 0.9209845532458212, "grad_norm": 0.7239080000538475, "learning_rate": 2.0841992425548323e-06, "loss": 0.0142, "step": 220725 }, { "epoch": 0.9210054159608114, "grad_norm": 0.9087583715893431, "learning_rate": 2.0841756365600874e-06, "loss": 0.0179, "step": 220730 }, { "epoch": 0.9210262786758018, "grad_norm": 0.4509606824623921, "learning_rate": 2.0841520313674205e-06, "loss": 0.0198, "step": 220735 }, { "epoch": 0.921047141390792, "grad_norm": 0.8322294258200974, "learning_rate": 2.084128426976787e-06, "loss": 0.0161, "step": 220740 }, { "epoch": 0.9210680041057823, "grad_norm": 0.9453867071006736, "learning_rate": 2.0841048233881413e-06, "loss": 0.0199, "step": 220745 }, { "epoch": 0.9210888668207726, "grad_norm": 0.6019171962069052, "learning_rate": 2.084081220601437e-06, "loss": 0.0199, "step": 220750 }, { "epoch": 0.9211097295357629, "grad_norm": 0.7108466685117095, "learning_rate": 2.0840576186166297e-06, "loss": 0.0288, "step": 220755 }, { "epoch": 0.9211305922507531, "grad_norm": 0.6035934461324541, "learning_rate": 2.0840340174336736e-06, "loss": 0.0207, "step": 220760 }, { "epoch": 0.9211514549657435, "grad_norm": 0.8587175777865137, "learning_rate": 2.0840104170525235e-06, "loss": 0.0326, "step": 220765 }, { "epoch": 0.9211723176807337, "grad_norm": 1.4211635609358608, "learning_rate": 2.0839868174731338e-06, "loss": 0.0185, "step": 220770 }, { "epoch": 0.921193180395724, "grad_norm": 0.6849706681771048, "learning_rate": 2.083963218695459e-06, "loss": 0.0207, "step": 220775 }, { "epoch": 0.9212140431107143, "grad_norm": 0.17194597298814351, "learning_rate": 2.0839396207194543e-06, "loss": 0.0188, "step": 220780 }, { "epoch": 0.9212349058257046, "grad_norm": 0.5676120367328529, "learning_rate": 2.083916023545074e-06, "loss": 0.0322, "step": 220785 }, { "epoch": 0.9212557685406948, "grad_norm": 1.0250337294641851, "learning_rate": 2.0838924271722723e-06, "loss": 0.0226, "step": 220790 }, { "epoch": 0.921276631255685, "grad_norm": 0.5456286126857651, "learning_rate": 2.0838688316010044e-06, "loss": 0.0205, "step": 220795 }, { "epoch": 0.9212974939706754, "grad_norm": 0.5892323540047012, "learning_rate": 2.0838452368312244e-06, "loss": 0.0235, "step": 220800 }, { "epoch": 0.9213183566856656, "grad_norm": 0.7303830665458679, "learning_rate": 2.083821642862887e-06, "loss": 0.0152, "step": 220805 }, { "epoch": 0.9213392194006559, "grad_norm": 0.9417685829667286, "learning_rate": 2.0837980496959475e-06, "loss": 0.0193, "step": 220810 }, { "epoch": 0.9213600821156462, "grad_norm": 1.1500416402370188, "learning_rate": 2.0837744573303596e-06, "loss": 0.0202, "step": 220815 }, { "epoch": 0.9213809448306365, "grad_norm": 0.6023254082853566, "learning_rate": 2.083750865766079e-06, "loss": 0.0236, "step": 220820 }, { "epoch": 0.9214018075456267, "grad_norm": 0.9313544735542872, "learning_rate": 2.0837272750030595e-06, "loss": 0.0217, "step": 220825 }, { "epoch": 0.9214226702606171, "grad_norm": 0.7400804265717863, "learning_rate": 2.0837036850412558e-06, "loss": 0.023, "step": 220830 }, { "epoch": 0.9214435329756073, "grad_norm": 0.5795188417557375, "learning_rate": 2.083680095880623e-06, "loss": 0.0152, "step": 220835 }, { "epoch": 0.9214643956905976, "grad_norm": 0.6593779393087952, "learning_rate": 2.083656507521115e-06, "loss": 0.0174, "step": 220840 }, { "epoch": 0.9214852584055878, "grad_norm": 0.36061486543809496, "learning_rate": 2.0836329199626876e-06, "loss": 0.0222, "step": 220845 }, { "epoch": 0.9215061211205782, "grad_norm": 0.7254247465296587, "learning_rate": 2.0836093332052942e-06, "loss": 0.0251, "step": 220850 }, { "epoch": 0.9215269838355684, "grad_norm": 0.568249364018657, "learning_rate": 2.0835857472488904e-06, "loss": 0.0186, "step": 220855 }, { "epoch": 0.9215478465505587, "grad_norm": 0.4785287527983763, "learning_rate": 2.0835621620934303e-06, "loss": 0.0205, "step": 220860 }, { "epoch": 0.921568709265549, "grad_norm": 0.8345549002021487, "learning_rate": 2.083538577738869e-06, "loss": 0.0187, "step": 220865 }, { "epoch": 0.9215895719805393, "grad_norm": 0.45057169470667996, "learning_rate": 2.083514994185161e-06, "loss": 0.0305, "step": 220870 }, { "epoch": 0.9216104346955295, "grad_norm": 0.6528181152963022, "learning_rate": 2.0834914114322603e-06, "loss": 0.0142, "step": 220875 }, { "epoch": 0.9216312974105199, "grad_norm": 1.3590282678969015, "learning_rate": 2.0834678294801234e-06, "loss": 0.034, "step": 220880 }, { "epoch": 0.9216521601255101, "grad_norm": 0.3880666163947331, "learning_rate": 2.083444248328703e-06, "loss": 0.0209, "step": 220885 }, { "epoch": 0.9216730228405003, "grad_norm": 0.38713077809352825, "learning_rate": 2.083420667977955e-06, "loss": 0.0173, "step": 220890 }, { "epoch": 0.9216938855554907, "grad_norm": 0.993482822995367, "learning_rate": 2.0833970884278332e-06, "loss": 0.0266, "step": 220895 }, { "epoch": 0.9217147482704809, "grad_norm": 0.5616902795132682, "learning_rate": 2.083373509678293e-06, "loss": 0.0137, "step": 220900 }, { "epoch": 0.9217356109854712, "grad_norm": 1.0174326496715975, "learning_rate": 2.083349931729289e-06, "loss": 0.0283, "step": 220905 }, { "epoch": 0.9217564737004614, "grad_norm": 0.6465774945747179, "learning_rate": 2.0833263545807755e-06, "loss": 0.0255, "step": 220910 }, { "epoch": 0.9217773364154518, "grad_norm": 0.6924940341585689, "learning_rate": 2.0833027782327076e-06, "loss": 0.0195, "step": 220915 }, { "epoch": 0.921798199130442, "grad_norm": 0.5095419830668849, "learning_rate": 2.0832792026850406e-06, "loss": 0.0179, "step": 220920 }, { "epoch": 0.9218190618454323, "grad_norm": 0.4921552684830305, "learning_rate": 2.0832556279377274e-06, "loss": 0.0181, "step": 220925 }, { "epoch": 0.9218399245604226, "grad_norm": 0.8310914270496755, "learning_rate": 2.0832320539907244e-06, "loss": 0.0227, "step": 220930 }, { "epoch": 0.9218607872754129, "grad_norm": 0.45961773071935236, "learning_rate": 2.083208480843986e-06, "loss": 0.0137, "step": 220935 }, { "epoch": 0.9218816499904031, "grad_norm": 0.5109311442680854, "learning_rate": 2.083184908497466e-06, "loss": 0.0211, "step": 220940 }, { "epoch": 0.9219025127053935, "grad_norm": 0.6807050088387524, "learning_rate": 2.08316133695112e-06, "loss": 0.0178, "step": 220945 }, { "epoch": 0.9219233754203837, "grad_norm": 0.607391558744008, "learning_rate": 2.0831377662049028e-06, "loss": 0.0174, "step": 220950 }, { "epoch": 0.921944238135374, "grad_norm": 0.3979269104879835, "learning_rate": 2.083114196258769e-06, "loss": 0.0214, "step": 220955 }, { "epoch": 0.9219651008503643, "grad_norm": 0.6399309814937193, "learning_rate": 2.0830906271126723e-06, "loss": 0.0175, "step": 220960 }, { "epoch": 0.9219859635653546, "grad_norm": 0.4201022040033073, "learning_rate": 2.083067058766569e-06, "loss": 0.0155, "step": 220965 }, { "epoch": 0.9220068262803448, "grad_norm": 1.458121054228645, "learning_rate": 2.0830434912204133e-06, "loss": 0.0288, "step": 220970 }, { "epoch": 0.922027688995335, "grad_norm": 0.5027081900503045, "learning_rate": 2.08301992447416e-06, "loss": 0.0209, "step": 220975 }, { "epoch": 0.9220485517103254, "grad_norm": 0.5851268530973979, "learning_rate": 2.082996358527763e-06, "loss": 0.0174, "step": 220980 }, { "epoch": 0.9220694144253156, "grad_norm": 0.4717343123101534, "learning_rate": 2.0829727933811785e-06, "loss": 0.0224, "step": 220985 }, { "epoch": 0.9220902771403059, "grad_norm": 0.5256432547914401, "learning_rate": 2.0829492290343596e-06, "loss": 0.0203, "step": 220990 }, { "epoch": 0.9221111398552962, "grad_norm": 0.812284849271765, "learning_rate": 2.0829256654872627e-06, "loss": 0.0185, "step": 220995 }, { "epoch": 0.9221320025702865, "grad_norm": 0.40330081017001795, "learning_rate": 2.0829021027398416e-06, "loss": 0.0223, "step": 221000 }, { "epoch": 0.9221528652852767, "grad_norm": 0.33745825002985225, "learning_rate": 2.082878540792051e-06, "loss": 0.0177, "step": 221005 }, { "epoch": 0.9221737280002671, "grad_norm": 0.6291249731762624, "learning_rate": 2.0828549796438465e-06, "loss": 0.0178, "step": 221010 }, { "epoch": 0.9221945907152573, "grad_norm": 0.4717926548728792, "learning_rate": 2.0828314192951824e-06, "loss": 0.0205, "step": 221015 }, { "epoch": 0.9222154534302476, "grad_norm": 0.43419872214078836, "learning_rate": 2.082807859746013e-06, "loss": 0.022, "step": 221020 }, { "epoch": 0.9222363161452378, "grad_norm": 0.6341225728995998, "learning_rate": 2.0827843009962933e-06, "loss": 0.0173, "step": 221025 }, { "epoch": 0.9222571788602282, "grad_norm": 0.43535420773760797, "learning_rate": 2.082760743045979e-06, "loss": 0.0199, "step": 221030 }, { "epoch": 0.9222780415752184, "grad_norm": 0.5222618009342336, "learning_rate": 2.0827371858950234e-06, "loss": 0.0135, "step": 221035 }, { "epoch": 0.9222989042902087, "grad_norm": 0.6560594509197453, "learning_rate": 2.082713629543383e-06, "loss": 0.0159, "step": 221040 }, { "epoch": 0.922319767005199, "grad_norm": 0.9474373287478336, "learning_rate": 2.082690073991011e-06, "loss": 0.0233, "step": 221045 }, { "epoch": 0.9223406297201893, "grad_norm": 0.28597593839325114, "learning_rate": 2.082666519237863e-06, "loss": 0.0147, "step": 221050 }, { "epoch": 0.9223614924351795, "grad_norm": 0.5124446317492471, "learning_rate": 2.0826429652838938e-06, "loss": 0.0349, "step": 221055 }, { "epoch": 0.9223823551501699, "grad_norm": 0.4008592232810573, "learning_rate": 2.0826194121290585e-06, "loss": 0.0177, "step": 221060 }, { "epoch": 0.9224032178651601, "grad_norm": 0.8469117082388034, "learning_rate": 2.082595859773311e-06, "loss": 0.02, "step": 221065 }, { "epoch": 0.9224240805801504, "grad_norm": 0.36837167436803747, "learning_rate": 2.0825723082166068e-06, "loss": 0.0125, "step": 221070 }, { "epoch": 0.9224449432951407, "grad_norm": 0.5205140127425203, "learning_rate": 2.0825487574589005e-06, "loss": 0.0203, "step": 221075 }, { "epoch": 0.922465806010131, "grad_norm": 0.1999047436100222, "learning_rate": 2.0825252075001467e-06, "loss": 0.0203, "step": 221080 }, { "epoch": 0.9224866687251212, "grad_norm": 0.8575680941892607, "learning_rate": 2.0825016583403013e-06, "loss": 0.0246, "step": 221085 }, { "epoch": 0.9225075314401114, "grad_norm": 0.8296595321963375, "learning_rate": 2.0824781099793175e-06, "loss": 0.0176, "step": 221090 }, { "epoch": 0.9225283941551018, "grad_norm": 0.6287928759777245, "learning_rate": 2.082454562417151e-06, "loss": 0.0159, "step": 221095 }, { "epoch": 0.922549256870092, "grad_norm": 2.0320993882125586, "learning_rate": 2.0824310156537575e-06, "loss": 0.0382, "step": 221100 }, { "epoch": 0.9225701195850823, "grad_norm": 0.49451036341753407, "learning_rate": 2.08240746968909e-06, "loss": 0.0179, "step": 221105 }, { "epoch": 0.9225909823000726, "grad_norm": 0.8140869663406494, "learning_rate": 2.082383924523105e-06, "loss": 0.0155, "step": 221110 }, { "epoch": 0.9226118450150629, "grad_norm": 0.5598609965577834, "learning_rate": 2.082360380155756e-06, "loss": 0.0194, "step": 221115 }, { "epoch": 0.9226327077300531, "grad_norm": 0.6283652404291804, "learning_rate": 2.082336836586999e-06, "loss": 0.0214, "step": 221120 }, { "epoch": 0.9226535704450435, "grad_norm": 0.7302451268611759, "learning_rate": 2.0823132938167886e-06, "loss": 0.0211, "step": 221125 }, { "epoch": 0.9226744331600337, "grad_norm": 0.4823838721855195, "learning_rate": 2.082289751845079e-06, "loss": 0.0161, "step": 221130 }, { "epoch": 0.922695295875024, "grad_norm": 0.4855363803915769, "learning_rate": 2.082266210671825e-06, "loss": 0.0218, "step": 221135 }, { "epoch": 0.9227161585900143, "grad_norm": 0.6202406418298803, "learning_rate": 2.0822426702969826e-06, "loss": 0.0175, "step": 221140 }, { "epoch": 0.9227370213050046, "grad_norm": 0.41604901138030875, "learning_rate": 2.0822191307205056e-06, "loss": 0.0186, "step": 221145 }, { "epoch": 0.9227578840199948, "grad_norm": 1.0946600531256279, "learning_rate": 2.08219559194235e-06, "loss": 0.0173, "step": 221150 }, { "epoch": 0.9227787467349851, "grad_norm": 0.2607080588796384, "learning_rate": 2.0821720539624695e-06, "loss": 0.0237, "step": 221155 }, { "epoch": 0.9227996094499754, "grad_norm": 0.6631894025305787, "learning_rate": 2.0821485167808193e-06, "loss": 0.0215, "step": 221160 }, { "epoch": 0.9228204721649657, "grad_norm": 0.8067520230023518, "learning_rate": 2.082124980397354e-06, "loss": 0.024, "step": 221165 }, { "epoch": 0.9228413348799559, "grad_norm": 0.562293134552045, "learning_rate": 2.08210144481203e-06, "loss": 0.0215, "step": 221170 }, { "epoch": 0.9228621975949463, "grad_norm": 0.3012672025099886, "learning_rate": 2.0820779100248003e-06, "loss": 0.0644, "step": 221175 }, { "epoch": 0.9228830603099365, "grad_norm": 0.40393819186107277, "learning_rate": 2.082054376035621e-06, "loss": 0.0199, "step": 221180 }, { "epoch": 0.9229039230249267, "grad_norm": 0.6482860835260529, "learning_rate": 2.0820308428444465e-06, "loss": 0.0134, "step": 221185 }, { "epoch": 0.9229247857399171, "grad_norm": 0.30826238847440895, "learning_rate": 2.082007310451232e-06, "loss": 0.0183, "step": 221190 }, { "epoch": 0.9229456484549073, "grad_norm": 0.5100675952853727, "learning_rate": 2.0819837788559314e-06, "loss": 0.0197, "step": 221195 }, { "epoch": 0.9229665111698976, "grad_norm": 0.9943394169254353, "learning_rate": 2.0819602480585015e-06, "loss": 0.0316, "step": 221200 }, { "epoch": 0.9229873738848878, "grad_norm": 0.38161671653406753, "learning_rate": 2.0819367180588954e-06, "loss": 0.0169, "step": 221205 }, { "epoch": 0.9230082365998782, "grad_norm": 0.7022088714602416, "learning_rate": 2.0819131888570693e-06, "loss": 0.0188, "step": 221210 }, { "epoch": 0.9230290993148684, "grad_norm": 0.6780139830313566, "learning_rate": 2.0818896604529772e-06, "loss": 0.0252, "step": 221215 }, { "epoch": 0.9230499620298587, "grad_norm": 0.8936098747560892, "learning_rate": 2.0818661328465746e-06, "loss": 0.0229, "step": 221220 }, { "epoch": 0.923070824744849, "grad_norm": 0.19926044214574212, "learning_rate": 2.081842606037816e-06, "loss": 0.0173, "step": 221225 }, { "epoch": 0.9230916874598393, "grad_norm": 0.919924320815768, "learning_rate": 2.0818190800266564e-06, "loss": 0.0216, "step": 221230 }, { "epoch": 0.9231125501748295, "grad_norm": 0.6238824532057484, "learning_rate": 2.081795554813051e-06, "loss": 0.0273, "step": 221235 }, { "epoch": 0.9231334128898199, "grad_norm": 0.6440226205146097, "learning_rate": 2.081772030396955e-06, "loss": 0.0222, "step": 221240 }, { "epoch": 0.9231542756048101, "grad_norm": 1.0826257239629895, "learning_rate": 2.0817485067783224e-06, "loss": 0.0187, "step": 221245 }, { "epoch": 0.9231751383198004, "grad_norm": 0.49940230172901157, "learning_rate": 2.0817249839571095e-06, "loss": 0.031, "step": 221250 }, { "epoch": 0.9231960010347907, "grad_norm": 0.4197592116329929, "learning_rate": 2.08170146193327e-06, "loss": 0.0174, "step": 221255 }, { "epoch": 0.923216863749781, "grad_norm": 0.626861966464644, "learning_rate": 2.081677940706759e-06, "loss": 0.0266, "step": 221260 }, { "epoch": 0.9232377264647712, "grad_norm": 2.111604157975452, "learning_rate": 2.081654420277532e-06, "loss": 0.0147, "step": 221265 }, { "epoch": 0.9232585891797614, "grad_norm": 0.3271902536271723, "learning_rate": 2.081630900645544e-06, "loss": 0.0196, "step": 221270 }, { "epoch": 0.9232794518947518, "grad_norm": 0.8433036249326926, "learning_rate": 2.0816073818107495e-06, "loss": 0.0223, "step": 221275 }, { "epoch": 0.923300314609742, "grad_norm": 0.6345215041658862, "learning_rate": 2.081583863773103e-06, "loss": 0.0218, "step": 221280 }, { "epoch": 0.9233211773247323, "grad_norm": 0.7184726768594045, "learning_rate": 2.0815603465325614e-06, "loss": 0.0258, "step": 221285 }, { "epoch": 0.9233420400397226, "grad_norm": 0.9317848294798591, "learning_rate": 2.0815368300890777e-06, "loss": 0.0236, "step": 221290 }, { "epoch": 0.9233629027547129, "grad_norm": 0.592843208217668, "learning_rate": 2.0815133144426074e-06, "loss": 0.0206, "step": 221295 }, { "epoch": 0.9233837654697031, "grad_norm": 0.8896655730520966, "learning_rate": 2.081489799593106e-06, "loss": 0.0237, "step": 221300 }, { "epoch": 0.9234046281846935, "grad_norm": 0.531931814355488, "learning_rate": 2.081466285540528e-06, "loss": 0.0232, "step": 221305 }, { "epoch": 0.9234254908996837, "grad_norm": 0.261850952197949, "learning_rate": 2.0814427722848286e-06, "loss": 0.0172, "step": 221310 }, { "epoch": 0.923446353614674, "grad_norm": 0.3919097112933681, "learning_rate": 2.0814192598259628e-06, "loss": 0.0156, "step": 221315 }, { "epoch": 0.9234672163296643, "grad_norm": 1.5553994282247043, "learning_rate": 2.081395748163885e-06, "loss": 0.0179, "step": 221320 }, { "epoch": 0.9234880790446546, "grad_norm": 0.33334855636383026, "learning_rate": 2.0813722372985517e-06, "loss": 0.0155, "step": 221325 }, { "epoch": 0.9235089417596448, "grad_norm": 0.4602030099987805, "learning_rate": 2.081348727229916e-06, "loss": 0.0186, "step": 221330 }, { "epoch": 0.9235298044746351, "grad_norm": 0.6869085792277971, "learning_rate": 2.0813252179579343e-06, "loss": 0.0225, "step": 221335 }, { "epoch": 0.9235506671896254, "grad_norm": 0.5512279253007711, "learning_rate": 2.081301709482561e-06, "loss": 0.0233, "step": 221340 }, { "epoch": 0.9235715299046157, "grad_norm": 0.38952181828293153, "learning_rate": 2.081278201803751e-06, "loss": 0.0209, "step": 221345 }, { "epoch": 0.9235923926196059, "grad_norm": 0.4193763194902569, "learning_rate": 2.0812546949214597e-06, "loss": 0.0177, "step": 221350 }, { "epoch": 0.9236132553345963, "grad_norm": 0.4130323592829403, "learning_rate": 2.081231188835642e-06, "loss": 0.0178, "step": 221355 }, { "epoch": 0.9236341180495865, "grad_norm": 1.172692513272485, "learning_rate": 2.0812076835462538e-06, "loss": 0.0233, "step": 221360 }, { "epoch": 0.9236549807645767, "grad_norm": 0.970018116969104, "learning_rate": 2.081184179053248e-06, "loss": 0.0223, "step": 221365 }, { "epoch": 0.9236758434795671, "grad_norm": 0.6689238246411549, "learning_rate": 2.0811606753565812e-06, "loss": 0.0244, "step": 221370 }, { "epoch": 0.9236967061945573, "grad_norm": 0.16954606151254822, "learning_rate": 2.081137172456208e-06, "loss": 0.0154, "step": 221375 }, { "epoch": 0.9237175689095476, "grad_norm": 0.4008532446836022, "learning_rate": 2.0811136703520838e-06, "loss": 0.0342, "step": 221380 }, { "epoch": 0.9237384316245378, "grad_norm": 0.5571005544918302, "learning_rate": 2.081090169044163e-06, "loss": 0.015, "step": 221385 }, { "epoch": 0.9237592943395282, "grad_norm": 1.3536584194251726, "learning_rate": 2.0810666685324018e-06, "loss": 0.023, "step": 221390 }, { "epoch": 0.9237801570545184, "grad_norm": 0.7339168213050451, "learning_rate": 2.0810431688167536e-06, "loss": 0.02, "step": 221395 }, { "epoch": 0.9238010197695087, "grad_norm": 0.6389139752205849, "learning_rate": 2.081019669897175e-06, "loss": 0.0236, "step": 221400 }, { "epoch": 0.923821882484499, "grad_norm": 0.6905765083198718, "learning_rate": 2.08099617177362e-06, "loss": 0.0162, "step": 221405 }, { "epoch": 0.9238427451994893, "grad_norm": 0.8567218262272742, "learning_rate": 2.0809726744460436e-06, "loss": 0.0212, "step": 221410 }, { "epoch": 0.9238636079144795, "grad_norm": 0.36852924526102643, "learning_rate": 2.0809491779144015e-06, "loss": 0.0222, "step": 221415 }, { "epoch": 0.9238844706294699, "grad_norm": 0.523893863995474, "learning_rate": 2.080925682178649e-06, "loss": 0.0226, "step": 221420 }, { "epoch": 0.9239053333444601, "grad_norm": 0.4560041450416828, "learning_rate": 2.080902187238741e-06, "loss": 0.0225, "step": 221425 }, { "epoch": 0.9239261960594504, "grad_norm": 0.8889880973751685, "learning_rate": 2.080878693094632e-06, "loss": 0.014, "step": 221430 }, { "epoch": 0.9239470587744407, "grad_norm": 0.8224747468444785, "learning_rate": 2.0808551997462768e-06, "loss": 0.0211, "step": 221435 }, { "epoch": 0.923967921489431, "grad_norm": 0.3284369028542554, "learning_rate": 2.0808317071936317e-06, "loss": 0.0292, "step": 221440 }, { "epoch": 0.9239887842044212, "grad_norm": 0.8815210541523856, "learning_rate": 2.080808215436651e-06, "loss": 0.0212, "step": 221445 }, { "epoch": 0.9240096469194115, "grad_norm": 0.49416904216296426, "learning_rate": 2.0807847244752898e-06, "loss": 0.0146, "step": 221450 }, { "epoch": 0.9240305096344018, "grad_norm": 0.5519764979829661, "learning_rate": 2.0807612343095035e-06, "loss": 0.022, "step": 221455 }, { "epoch": 0.924051372349392, "grad_norm": 0.5962596249362153, "learning_rate": 2.080737744939247e-06, "loss": 0.0126, "step": 221460 }, { "epoch": 0.9240722350643823, "grad_norm": 0.8505467661403878, "learning_rate": 2.080714256364475e-06, "loss": 0.0201, "step": 221465 }, { "epoch": 0.9240930977793727, "grad_norm": 0.43490314068762714, "learning_rate": 2.0806907685851437e-06, "loss": 0.0184, "step": 221470 }, { "epoch": 0.9241139604943629, "grad_norm": 0.46686435303960655, "learning_rate": 2.080667281601207e-06, "loss": 0.0188, "step": 221475 }, { "epoch": 0.9241348232093531, "grad_norm": 0.644908410892531, "learning_rate": 2.080643795412621e-06, "loss": 0.0183, "step": 221480 }, { "epoch": 0.9241556859243435, "grad_norm": 1.2987779619077124, "learning_rate": 2.0806203100193397e-06, "loss": 0.0195, "step": 221485 }, { "epoch": 0.9241765486393337, "grad_norm": 0.9450259598990547, "learning_rate": 2.080596825421319e-06, "loss": 0.0265, "step": 221490 }, { "epoch": 0.924197411354324, "grad_norm": 0.7957146232645366, "learning_rate": 2.0805733416185147e-06, "loss": 0.04, "step": 221495 }, { "epoch": 0.9242182740693143, "grad_norm": 0.605582035274675, "learning_rate": 2.08054985861088e-06, "loss": 0.0214, "step": 221500 }, { "epoch": 0.9242391367843046, "grad_norm": 0.41279299616115434, "learning_rate": 2.0805263763983717e-06, "loss": 0.026, "step": 221505 }, { "epoch": 0.9242599994992948, "grad_norm": 0.6762458625753315, "learning_rate": 2.0805028949809446e-06, "loss": 0.0179, "step": 221510 }, { "epoch": 0.9242808622142851, "grad_norm": 0.46430810429378927, "learning_rate": 2.080479414358553e-06, "loss": 0.0224, "step": 221515 }, { "epoch": 0.9243017249292754, "grad_norm": 0.43631359293109, "learning_rate": 2.0804559345311526e-06, "loss": 0.02, "step": 221520 }, { "epoch": 0.9243225876442657, "grad_norm": 0.6755860152356761, "learning_rate": 2.0804324554986992e-06, "loss": 0.0219, "step": 221525 }, { "epoch": 0.9243434503592559, "grad_norm": 0.7050322868974226, "learning_rate": 2.080408977261147e-06, "loss": 0.02, "step": 221530 }, { "epoch": 0.9243643130742463, "grad_norm": 0.4287595436823933, "learning_rate": 2.080385499818451e-06, "loss": 0.0216, "step": 221535 }, { "epoch": 0.9243851757892365, "grad_norm": 0.46612530473211605, "learning_rate": 2.0803620231705675e-06, "loss": 0.0197, "step": 221540 }, { "epoch": 0.9244060385042268, "grad_norm": 0.6195539352829024, "learning_rate": 2.080338547317451e-06, "loss": 0.0279, "step": 221545 }, { "epoch": 0.9244269012192171, "grad_norm": 0.5765428987126389, "learning_rate": 2.080315072259056e-06, "loss": 0.021, "step": 221550 }, { "epoch": 0.9244477639342074, "grad_norm": 0.7926478775931534, "learning_rate": 2.0802915979953384e-06, "loss": 0.0237, "step": 221555 }, { "epoch": 0.9244686266491976, "grad_norm": 0.46597726652097765, "learning_rate": 2.0802681245262537e-06, "loss": 0.0192, "step": 221560 }, { "epoch": 0.9244894893641878, "grad_norm": 0.6616249193163924, "learning_rate": 2.0802446518517563e-06, "loss": 0.0234, "step": 221565 }, { "epoch": 0.9245103520791782, "grad_norm": 0.5109750990752246, "learning_rate": 2.0802211799718016e-06, "loss": 0.0221, "step": 221570 }, { "epoch": 0.9245312147941684, "grad_norm": 0.7034347112039823, "learning_rate": 2.080197708886345e-06, "loss": 0.0196, "step": 221575 }, { "epoch": 0.9245520775091587, "grad_norm": 0.46507846921899765, "learning_rate": 2.0801742385953417e-06, "loss": 0.0259, "step": 221580 }, { "epoch": 0.924572940224149, "grad_norm": 0.7654858327523307, "learning_rate": 2.0801507690987467e-06, "loss": 0.0189, "step": 221585 }, { "epoch": 0.9245938029391393, "grad_norm": 0.7632037512993668, "learning_rate": 2.080127300396515e-06, "loss": 0.0274, "step": 221590 }, { "epoch": 0.9246146656541295, "grad_norm": 1.319047554273222, "learning_rate": 2.080103832488602e-06, "loss": 0.0172, "step": 221595 }, { "epoch": 0.9246355283691199, "grad_norm": 0.9451600533666329, "learning_rate": 2.0800803653749633e-06, "loss": 0.022, "step": 221600 }, { "epoch": 0.9246563910841101, "grad_norm": 0.6153299851175992, "learning_rate": 2.0800568990555537e-06, "loss": 0.0207, "step": 221605 }, { "epoch": 0.9246772537991004, "grad_norm": 0.6361717139048668, "learning_rate": 2.080033433530328e-06, "loss": 0.0177, "step": 221610 }, { "epoch": 0.9246981165140907, "grad_norm": 0.5407062425001188, "learning_rate": 2.0800099687992422e-06, "loss": 0.0221, "step": 221615 }, { "epoch": 0.924718979229081, "grad_norm": 0.4556963934736275, "learning_rate": 2.0799865048622505e-06, "loss": 0.0149, "step": 221620 }, { "epoch": 0.9247398419440712, "grad_norm": 0.5677173534787995, "learning_rate": 2.079963041719309e-06, "loss": 0.0242, "step": 221625 }, { "epoch": 0.9247607046590615, "grad_norm": 0.7094899587888085, "learning_rate": 2.079939579370373e-06, "loss": 0.0218, "step": 221630 }, { "epoch": 0.9247815673740518, "grad_norm": 1.053191936724395, "learning_rate": 2.079916117815397e-06, "loss": 0.0236, "step": 221635 }, { "epoch": 0.9248024300890421, "grad_norm": 1.0557380866196633, "learning_rate": 2.079892657054337e-06, "loss": 0.0262, "step": 221640 }, { "epoch": 0.9248232928040323, "grad_norm": 0.5290814578223496, "learning_rate": 2.079869197087147e-06, "loss": 0.0178, "step": 221645 }, { "epoch": 0.9248441555190227, "grad_norm": 0.6906561442511364, "learning_rate": 2.0798457379137836e-06, "loss": 0.0213, "step": 221650 }, { "epoch": 0.9248650182340129, "grad_norm": 0.6313920812333097, "learning_rate": 2.0798222795342012e-06, "loss": 0.0162, "step": 221655 }, { "epoch": 0.9248858809490031, "grad_norm": 0.8174711086110753, "learning_rate": 2.0797988219483557e-06, "loss": 0.0215, "step": 221660 }, { "epoch": 0.9249067436639935, "grad_norm": 0.40263650735414164, "learning_rate": 2.0797753651562012e-06, "loss": 0.0251, "step": 221665 }, { "epoch": 0.9249276063789837, "grad_norm": 0.4737230248416109, "learning_rate": 2.079751909157694e-06, "loss": 0.0225, "step": 221670 }, { "epoch": 0.924948469093974, "grad_norm": 0.4027641387422405, "learning_rate": 2.0797284539527896e-06, "loss": 0.0158, "step": 221675 }, { "epoch": 0.9249693318089643, "grad_norm": 0.8525226773563426, "learning_rate": 2.079704999541442e-06, "loss": 0.0265, "step": 221680 }, { "epoch": 0.9249901945239546, "grad_norm": 0.39981253959868424, "learning_rate": 2.079681545923608e-06, "loss": 0.0177, "step": 221685 }, { "epoch": 0.9250110572389448, "grad_norm": 0.5664397989000659, "learning_rate": 2.0796580930992414e-06, "loss": 0.0167, "step": 221690 }, { "epoch": 0.9250319199539351, "grad_norm": 0.5975785212887678, "learning_rate": 2.079634641068298e-06, "loss": 0.0233, "step": 221695 }, { "epoch": 0.9250527826689254, "grad_norm": 0.7234153810974104, "learning_rate": 2.0796111898307333e-06, "loss": 0.0148, "step": 221700 }, { "epoch": 0.9250736453839157, "grad_norm": 0.4369101494544131, "learning_rate": 2.079587739386502e-06, "loss": 0.0155, "step": 221705 }, { "epoch": 0.9250945080989059, "grad_norm": 0.38822883828179056, "learning_rate": 2.07956428973556e-06, "loss": 0.0117, "step": 221710 }, { "epoch": 0.9251153708138963, "grad_norm": 0.9916949178636767, "learning_rate": 2.0795408408778625e-06, "loss": 0.0193, "step": 221715 }, { "epoch": 0.9251362335288865, "grad_norm": 0.5053835465430111, "learning_rate": 2.0795173928133644e-06, "loss": 0.0191, "step": 221720 }, { "epoch": 0.9251570962438768, "grad_norm": 0.49485773149170204, "learning_rate": 2.079493945542021e-06, "loss": 0.0217, "step": 221725 }, { "epoch": 0.9251779589588671, "grad_norm": 0.5398204042837396, "learning_rate": 2.0794704990637883e-06, "loss": 0.0175, "step": 221730 }, { "epoch": 0.9251988216738574, "grad_norm": 0.3088814893810197, "learning_rate": 2.079447053378621e-06, "loss": 0.0189, "step": 221735 }, { "epoch": 0.9252196843888476, "grad_norm": 0.3525672778159524, "learning_rate": 2.079423608486474e-06, "loss": 0.015, "step": 221740 }, { "epoch": 0.9252405471038379, "grad_norm": 0.660060068037814, "learning_rate": 2.0794001643873034e-06, "loss": 0.0231, "step": 221745 }, { "epoch": 0.9252614098188282, "grad_norm": 0.34549726846976747, "learning_rate": 2.079376721081064e-06, "loss": 0.0167, "step": 221750 }, { "epoch": 0.9252822725338185, "grad_norm": 0.8445901856425074, "learning_rate": 2.0793532785677116e-06, "loss": 0.0171, "step": 221755 }, { "epoch": 0.9253031352488087, "grad_norm": 0.5919817936028043, "learning_rate": 2.0793298368472e-06, "loss": 0.0209, "step": 221760 }, { "epoch": 0.925323997963799, "grad_norm": 0.7443453774416728, "learning_rate": 2.079306395919487e-06, "loss": 0.017, "step": 221765 }, { "epoch": 0.9253448606787893, "grad_norm": 0.5874378287967984, "learning_rate": 2.0792829557845264e-06, "loss": 0.0223, "step": 221770 }, { "epoch": 0.9253657233937795, "grad_norm": 0.4521476172084524, "learning_rate": 2.0792595164422734e-06, "loss": 0.0112, "step": 221775 }, { "epoch": 0.9253865861087699, "grad_norm": 0.6711182645441451, "learning_rate": 2.0792360778926836e-06, "loss": 0.0144, "step": 221780 }, { "epoch": 0.9254074488237601, "grad_norm": 0.7377053082837094, "learning_rate": 2.0792126401357125e-06, "loss": 0.0185, "step": 221785 }, { "epoch": 0.9254283115387504, "grad_norm": 0.8351218125232421, "learning_rate": 2.0791892031713152e-06, "loss": 0.0259, "step": 221790 }, { "epoch": 0.9254491742537407, "grad_norm": 0.882620702080438, "learning_rate": 2.079165766999447e-06, "loss": 0.0155, "step": 221795 }, { "epoch": 0.925470036968731, "grad_norm": 0.34964225492159035, "learning_rate": 2.0791423316200638e-06, "loss": 0.0184, "step": 221800 }, { "epoch": 0.9254908996837212, "grad_norm": 0.42107843006303364, "learning_rate": 2.07911889703312e-06, "loss": 0.0177, "step": 221805 }, { "epoch": 0.9255117623987115, "grad_norm": 0.9275151688938047, "learning_rate": 2.0790954632385714e-06, "loss": 0.0184, "step": 221810 }, { "epoch": 0.9255326251137018, "grad_norm": 0.28091199539712347, "learning_rate": 2.0790720302363735e-06, "loss": 0.0161, "step": 221815 }, { "epoch": 0.9255534878286921, "grad_norm": 0.2851821401145783, "learning_rate": 2.079048598026482e-06, "loss": 0.0166, "step": 221820 }, { "epoch": 0.9255743505436823, "grad_norm": 0.6724545286305131, "learning_rate": 2.079025166608851e-06, "loss": 0.0181, "step": 221825 }, { "epoch": 0.9255952132586727, "grad_norm": 0.5782132456560203, "learning_rate": 2.079001735983437e-06, "loss": 0.0154, "step": 221830 }, { "epoch": 0.9256160759736629, "grad_norm": 0.603440938936558, "learning_rate": 2.078978306150195e-06, "loss": 0.0232, "step": 221835 }, { "epoch": 0.9256369386886532, "grad_norm": 0.7136323522685915, "learning_rate": 2.0789548771090805e-06, "loss": 0.0127, "step": 221840 }, { "epoch": 0.9256578014036435, "grad_norm": 0.25569695889210065, "learning_rate": 2.0789314488600483e-06, "loss": 0.0172, "step": 221845 }, { "epoch": 0.9256786641186338, "grad_norm": 0.790935263164497, "learning_rate": 2.078908021403054e-06, "loss": 0.0196, "step": 221850 }, { "epoch": 0.925699526833624, "grad_norm": 0.21630768378241513, "learning_rate": 2.078884594738054e-06, "loss": 0.0172, "step": 221855 }, { "epoch": 0.9257203895486144, "grad_norm": 0.635135557813657, "learning_rate": 2.0788611688650026e-06, "loss": 0.0219, "step": 221860 }, { "epoch": 0.9257412522636046, "grad_norm": 0.7168067114586179, "learning_rate": 2.078837743783855e-06, "loss": 0.0189, "step": 221865 }, { "epoch": 0.9257621149785948, "grad_norm": 0.40455768603180015, "learning_rate": 2.078814319494567e-06, "loss": 0.0156, "step": 221870 }, { "epoch": 0.9257829776935851, "grad_norm": 0.6774141490839594, "learning_rate": 2.0787908959970946e-06, "loss": 0.0255, "step": 221875 }, { "epoch": 0.9258038404085754, "grad_norm": 0.41447116608015344, "learning_rate": 2.078767473291392e-06, "loss": 0.0246, "step": 221880 }, { "epoch": 0.9258247031235657, "grad_norm": 0.6172760416146195, "learning_rate": 2.0787440513774158e-06, "loss": 0.0183, "step": 221885 }, { "epoch": 0.9258455658385559, "grad_norm": 0.46680964791372437, "learning_rate": 2.0787206302551197e-06, "loss": 0.0169, "step": 221890 }, { "epoch": 0.9258664285535463, "grad_norm": 0.43911274893935387, "learning_rate": 2.0786972099244615e-06, "loss": 0.0209, "step": 221895 }, { "epoch": 0.9258872912685365, "grad_norm": 0.7273067646790102, "learning_rate": 2.078673790385394e-06, "loss": 0.0161, "step": 221900 }, { "epoch": 0.9259081539835268, "grad_norm": 0.7576855976852106, "learning_rate": 2.078650371637875e-06, "loss": 0.0241, "step": 221905 }, { "epoch": 0.9259290166985171, "grad_norm": 0.8366718379643424, "learning_rate": 2.0786269536818583e-06, "loss": 0.026, "step": 221910 }, { "epoch": 0.9259498794135074, "grad_norm": 0.42445269874900965, "learning_rate": 2.0786035365172995e-06, "loss": 0.0211, "step": 221915 }, { "epoch": 0.9259707421284976, "grad_norm": 0.3409328508654392, "learning_rate": 2.0785801201441546e-06, "loss": 0.0171, "step": 221920 }, { "epoch": 0.9259916048434879, "grad_norm": 0.4830256841785192, "learning_rate": 2.0785567045623794e-06, "loss": 0.0176, "step": 221925 }, { "epoch": 0.9260124675584782, "grad_norm": 0.4350566479917809, "learning_rate": 2.0785332897719283e-06, "loss": 0.0186, "step": 221930 }, { "epoch": 0.9260333302734685, "grad_norm": 0.6915747545435142, "learning_rate": 2.0785098757727566e-06, "loss": 0.015, "step": 221935 }, { "epoch": 0.9260541929884587, "grad_norm": 0.7579014238128848, "learning_rate": 2.0784864625648205e-06, "loss": 0.0214, "step": 221940 }, { "epoch": 0.9260750557034491, "grad_norm": 0.759319908260643, "learning_rate": 2.078463050148075e-06, "loss": 0.0165, "step": 221945 }, { "epoch": 0.9260959184184393, "grad_norm": 0.8463276247602167, "learning_rate": 2.0784396385224765e-06, "loss": 0.0213, "step": 221950 }, { "epoch": 0.9261167811334295, "grad_norm": 1.521333330906977, "learning_rate": 2.078416227687979e-06, "loss": 0.0262, "step": 221955 }, { "epoch": 0.9261376438484199, "grad_norm": 0.33370297542845667, "learning_rate": 2.078392817644539e-06, "loss": 0.0215, "step": 221960 }, { "epoch": 0.9261585065634101, "grad_norm": 0.42466624861449276, "learning_rate": 2.0783694083921113e-06, "loss": 0.017, "step": 221965 }, { "epoch": 0.9261793692784004, "grad_norm": 0.6555594772306547, "learning_rate": 2.078345999930652e-06, "loss": 0.0226, "step": 221970 }, { "epoch": 0.9262002319933907, "grad_norm": 0.6302919468420206, "learning_rate": 2.0783225922601156e-06, "loss": 0.0173, "step": 221975 }, { "epoch": 0.926221094708381, "grad_norm": 0.6605384227400009, "learning_rate": 2.0782991853804584e-06, "loss": 0.0164, "step": 221980 }, { "epoch": 0.9262419574233712, "grad_norm": 0.6615618991354979, "learning_rate": 2.078275779291636e-06, "loss": 0.0162, "step": 221985 }, { "epoch": 0.9262628201383615, "grad_norm": 0.47013380989936143, "learning_rate": 2.078252373993603e-06, "loss": 0.0142, "step": 221990 }, { "epoch": 0.9262836828533518, "grad_norm": 0.8408721413084378, "learning_rate": 2.0782289694863157e-06, "loss": 0.0268, "step": 221995 }, { "epoch": 0.9263045455683421, "grad_norm": 0.7204054157902628, "learning_rate": 2.078205565769729e-06, "loss": 0.0211, "step": 222000 }, { "epoch": 0.9263254082833323, "grad_norm": 0.8472745485029113, "learning_rate": 2.0781821628437983e-06, "loss": 0.0185, "step": 222005 }, { "epoch": 0.9263462709983227, "grad_norm": 0.6326644047401201, "learning_rate": 2.0781587607084797e-06, "loss": 0.0185, "step": 222010 }, { "epoch": 0.9263671337133129, "grad_norm": 0.7573118474196292, "learning_rate": 2.0781353593637283e-06, "loss": 0.0205, "step": 222015 }, { "epoch": 0.9263879964283032, "grad_norm": 0.4746480073931087, "learning_rate": 2.0781119588094997e-06, "loss": 0.0227, "step": 222020 }, { "epoch": 0.9264088591432935, "grad_norm": 0.9868824416829926, "learning_rate": 2.0780885590457497e-06, "loss": 0.0197, "step": 222025 }, { "epoch": 0.9264297218582838, "grad_norm": 0.38621952777087243, "learning_rate": 2.078065160072433e-06, "loss": 0.0184, "step": 222030 }, { "epoch": 0.926450584573274, "grad_norm": 0.5987286721428589, "learning_rate": 2.0780417618895057e-06, "loss": 0.0184, "step": 222035 }, { "epoch": 0.9264714472882644, "grad_norm": 0.5506101128908074, "learning_rate": 2.0780183644969233e-06, "loss": 0.0142, "step": 222040 }, { "epoch": 0.9264923100032546, "grad_norm": 0.8609991932947348, "learning_rate": 2.0779949678946408e-06, "loss": 0.0205, "step": 222045 }, { "epoch": 0.9265131727182448, "grad_norm": 0.5918124642514079, "learning_rate": 2.077971572082614e-06, "loss": 0.0177, "step": 222050 }, { "epoch": 0.9265340354332351, "grad_norm": 0.5443246413273752, "learning_rate": 2.0779481770607994e-06, "loss": 0.0175, "step": 222055 }, { "epoch": 0.9265548981482254, "grad_norm": 0.7041466361050678, "learning_rate": 2.077924782829151e-06, "loss": 0.0232, "step": 222060 }, { "epoch": 0.9265757608632157, "grad_norm": 0.6035781336021074, "learning_rate": 2.077901389387625e-06, "loss": 0.0194, "step": 222065 }, { "epoch": 0.9265966235782059, "grad_norm": 0.9288282171453095, "learning_rate": 2.077877996736177e-06, "loss": 0.0169, "step": 222070 }, { "epoch": 0.9266174862931963, "grad_norm": 1.0385406634415275, "learning_rate": 2.0778546048747624e-06, "loss": 0.0221, "step": 222075 }, { "epoch": 0.9266383490081865, "grad_norm": 0.5677243728844339, "learning_rate": 2.077831213803336e-06, "loss": 0.0176, "step": 222080 }, { "epoch": 0.9266592117231768, "grad_norm": 1.0277272604313779, "learning_rate": 2.077807823521855e-06, "loss": 0.0203, "step": 222085 }, { "epoch": 0.9266800744381671, "grad_norm": 0.6007719794794342, "learning_rate": 2.077784434030273e-06, "loss": 0.0264, "step": 222090 }, { "epoch": 0.9267009371531574, "grad_norm": 0.5160622029644362, "learning_rate": 2.0777610453285474e-06, "loss": 0.02, "step": 222095 }, { "epoch": 0.9267217998681476, "grad_norm": 1.4251435929316805, "learning_rate": 2.0777376574166324e-06, "loss": 0.0315, "step": 222100 }, { "epoch": 0.9267426625831379, "grad_norm": 0.7053932623329292, "learning_rate": 2.077714270294484e-06, "loss": 0.0227, "step": 222105 }, { "epoch": 0.9267635252981282, "grad_norm": 0.6784722954844099, "learning_rate": 2.0776908839620584e-06, "loss": 0.0149, "step": 222110 }, { "epoch": 0.9267843880131185, "grad_norm": 0.8673899229662747, "learning_rate": 2.0776674984193098e-06, "loss": 0.0204, "step": 222115 }, { "epoch": 0.9268052507281087, "grad_norm": 1.000599150340575, "learning_rate": 2.0776441136661944e-06, "loss": 0.0208, "step": 222120 }, { "epoch": 0.9268261134430991, "grad_norm": 0.3874971346329765, "learning_rate": 2.0776207297026684e-06, "loss": 0.0186, "step": 222125 }, { "epoch": 0.9268469761580893, "grad_norm": 0.9790375306491736, "learning_rate": 2.0775973465286868e-06, "loss": 0.02, "step": 222130 }, { "epoch": 0.9268678388730796, "grad_norm": 1.0433440830664675, "learning_rate": 2.077573964144205e-06, "loss": 0.0205, "step": 222135 }, { "epoch": 0.9268887015880699, "grad_norm": 0.5088892937320488, "learning_rate": 2.0775505825491792e-06, "loss": 0.023, "step": 222140 }, { "epoch": 0.9269095643030602, "grad_norm": 0.6461148086115569, "learning_rate": 2.077527201743564e-06, "loss": 0.0201, "step": 222145 }, { "epoch": 0.9269304270180504, "grad_norm": 0.5992033589068709, "learning_rate": 2.0775038217273156e-06, "loss": 0.0205, "step": 222150 }, { "epoch": 0.9269512897330408, "grad_norm": 0.4470698618301604, "learning_rate": 2.07748044250039e-06, "loss": 0.0174, "step": 222155 }, { "epoch": 0.926972152448031, "grad_norm": 0.5159575470280038, "learning_rate": 2.0774570640627415e-06, "loss": 0.0208, "step": 222160 }, { "epoch": 0.9269930151630212, "grad_norm": 0.9734778329421178, "learning_rate": 2.077433686414327e-06, "loss": 0.0191, "step": 222165 }, { "epoch": 0.9270138778780115, "grad_norm": 0.8832933428818781, "learning_rate": 2.077410309555101e-06, "loss": 0.0234, "step": 222170 }, { "epoch": 0.9270347405930018, "grad_norm": 0.7725918045450163, "learning_rate": 2.0773869334850205e-06, "loss": 0.0174, "step": 222175 }, { "epoch": 0.9270556033079921, "grad_norm": 0.6100664300349031, "learning_rate": 2.07736355820404e-06, "loss": 0.0163, "step": 222180 }, { "epoch": 0.9270764660229823, "grad_norm": 0.39766851035852724, "learning_rate": 2.0773401837121156e-06, "loss": 0.0105, "step": 222185 }, { "epoch": 0.9270973287379727, "grad_norm": 0.6354795567731171, "learning_rate": 2.0773168100092024e-06, "loss": 0.0162, "step": 222190 }, { "epoch": 0.9271181914529629, "grad_norm": 0.8784452168200623, "learning_rate": 2.077293437095256e-06, "loss": 0.0192, "step": 222195 }, { "epoch": 0.9271390541679532, "grad_norm": 0.495266981986461, "learning_rate": 2.077270064970233e-06, "loss": 0.021, "step": 222200 }, { "epoch": 0.9271599168829435, "grad_norm": 0.3593056711014104, "learning_rate": 2.0772466936340877e-06, "loss": 0.0179, "step": 222205 }, { "epoch": 0.9271807795979338, "grad_norm": 0.5999723180858225, "learning_rate": 2.077223323086777e-06, "loss": 0.0192, "step": 222210 }, { "epoch": 0.927201642312924, "grad_norm": 0.6443140844570125, "learning_rate": 2.0771999533282553e-06, "loss": 0.0199, "step": 222215 }, { "epoch": 0.9272225050279144, "grad_norm": 0.7612639430645836, "learning_rate": 2.077176584358479e-06, "loss": 0.0194, "step": 222220 }, { "epoch": 0.9272433677429046, "grad_norm": 0.4731509982209454, "learning_rate": 2.0771532161774038e-06, "loss": 0.0115, "step": 222225 }, { "epoch": 0.9272642304578949, "grad_norm": 1.0724154963428525, "learning_rate": 2.077129848784985e-06, "loss": 0.028, "step": 222230 }, { "epoch": 0.9272850931728851, "grad_norm": 1.1797472710717245, "learning_rate": 2.0771064821811786e-06, "loss": 0.0163, "step": 222235 }, { "epoch": 0.9273059558878755, "grad_norm": 0.9025649606746987, "learning_rate": 2.0770831163659396e-06, "loss": 0.0255, "step": 222240 }, { "epoch": 0.9273268186028657, "grad_norm": 0.771691251870516, "learning_rate": 2.077059751339224e-06, "loss": 0.0225, "step": 222245 }, { "epoch": 0.9273476813178559, "grad_norm": 0.5820520151946922, "learning_rate": 2.0770363871009872e-06, "loss": 0.0132, "step": 222250 }, { "epoch": 0.9273685440328463, "grad_norm": 0.27597478514295665, "learning_rate": 2.0770130236511855e-06, "loss": 0.0138, "step": 222255 }, { "epoch": 0.9273894067478365, "grad_norm": 0.6249756670262313, "learning_rate": 2.0769896609897745e-06, "loss": 0.0211, "step": 222260 }, { "epoch": 0.9274102694628268, "grad_norm": 0.7744594101410586, "learning_rate": 2.0769662991167093e-06, "loss": 0.012, "step": 222265 }, { "epoch": 0.9274311321778171, "grad_norm": 0.992840121306911, "learning_rate": 2.0769429380319455e-06, "loss": 0.0241, "step": 222270 }, { "epoch": 0.9274519948928074, "grad_norm": 0.5211001221742297, "learning_rate": 2.0769195777354395e-06, "loss": 0.0145, "step": 222275 }, { "epoch": 0.9274728576077976, "grad_norm": 0.5559938450337027, "learning_rate": 2.0768962182271463e-06, "loss": 0.0164, "step": 222280 }, { "epoch": 0.9274937203227879, "grad_norm": 0.746595125376431, "learning_rate": 2.076872859507022e-06, "loss": 0.0174, "step": 222285 }, { "epoch": 0.9275145830377782, "grad_norm": 1.1033046472089774, "learning_rate": 2.076849501575022e-06, "loss": 0.0205, "step": 222290 }, { "epoch": 0.9275354457527685, "grad_norm": 0.9783215910782446, "learning_rate": 2.0768261444311022e-06, "loss": 0.0211, "step": 222295 }, { "epoch": 0.9275563084677587, "grad_norm": 0.5938036205023765, "learning_rate": 2.076802788075218e-06, "loss": 0.0182, "step": 222300 }, { "epoch": 0.9275771711827491, "grad_norm": 0.7325263005876429, "learning_rate": 2.0767794325073254e-06, "loss": 0.0187, "step": 222305 }, { "epoch": 0.9275980338977393, "grad_norm": 0.5979435820926241, "learning_rate": 2.0767560777273798e-06, "loss": 0.0157, "step": 222310 }, { "epoch": 0.9276188966127296, "grad_norm": 0.886237876204226, "learning_rate": 2.076732723735337e-06, "loss": 0.0175, "step": 222315 }, { "epoch": 0.9276397593277199, "grad_norm": 0.5239670559932127, "learning_rate": 2.076709370531153e-06, "loss": 0.0163, "step": 222320 }, { "epoch": 0.9276606220427102, "grad_norm": 1.0284365305257435, "learning_rate": 2.076686018114783e-06, "loss": 0.0185, "step": 222325 }, { "epoch": 0.9276814847577004, "grad_norm": 0.5865221358375535, "learning_rate": 2.076662666486183e-06, "loss": 0.032, "step": 222330 }, { "epoch": 0.9277023474726908, "grad_norm": 0.41288232389424095, "learning_rate": 2.0766393156453092e-06, "loss": 0.023, "step": 222335 }, { "epoch": 0.927723210187681, "grad_norm": 0.6807520320492477, "learning_rate": 2.076615965592116e-06, "loss": 0.0223, "step": 222340 }, { "epoch": 0.9277440729026712, "grad_norm": 0.5120105756978155, "learning_rate": 2.0765926163265605e-06, "loss": 0.0156, "step": 222345 }, { "epoch": 0.9277649356176615, "grad_norm": 1.1041404806088775, "learning_rate": 2.0765692678485976e-06, "loss": 0.0191, "step": 222350 }, { "epoch": 0.9277857983326518, "grad_norm": 0.6072163441057241, "learning_rate": 2.076545920158183e-06, "loss": 0.0144, "step": 222355 }, { "epoch": 0.9278066610476421, "grad_norm": 0.2042856835742157, "learning_rate": 2.076522573255273e-06, "loss": 0.0124, "step": 222360 }, { "epoch": 0.9278275237626323, "grad_norm": 0.317750346196546, "learning_rate": 2.076499227139823e-06, "loss": 0.0178, "step": 222365 }, { "epoch": 0.9278483864776227, "grad_norm": 0.9348287485501722, "learning_rate": 2.0764758818117888e-06, "loss": 0.0201, "step": 222370 }, { "epoch": 0.9278692491926129, "grad_norm": 0.5461646536222053, "learning_rate": 2.0764525372711255e-06, "loss": 0.0165, "step": 222375 }, { "epoch": 0.9278901119076032, "grad_norm": 0.4731069262538058, "learning_rate": 2.07642919351779e-06, "loss": 0.0139, "step": 222380 }, { "epoch": 0.9279109746225935, "grad_norm": 1.1224123290972075, "learning_rate": 2.0764058505517373e-06, "loss": 0.03, "step": 222385 }, { "epoch": 0.9279318373375838, "grad_norm": 0.9740169858777632, "learning_rate": 2.0763825083729234e-06, "loss": 0.0162, "step": 222390 }, { "epoch": 0.927952700052574, "grad_norm": 0.8539710024551933, "learning_rate": 2.0763591669813034e-06, "loss": 0.0181, "step": 222395 }, { "epoch": 0.9279735627675644, "grad_norm": 0.4961990282603908, "learning_rate": 2.0763358263768344e-06, "loss": 0.018, "step": 222400 }, { "epoch": 0.9279944254825546, "grad_norm": 0.7299450781166787, "learning_rate": 2.076312486559471e-06, "loss": 0.0186, "step": 222405 }, { "epoch": 0.9280152881975449, "grad_norm": 0.4068439170767227, "learning_rate": 2.0762891475291695e-06, "loss": 0.0158, "step": 222410 }, { "epoch": 0.9280361509125351, "grad_norm": 0.7623092883069101, "learning_rate": 2.076265809285885e-06, "loss": 0.0224, "step": 222415 }, { "epoch": 0.9280570136275255, "grad_norm": 0.7351933225967697, "learning_rate": 2.0762424718295745e-06, "loss": 0.0171, "step": 222420 }, { "epoch": 0.9280778763425157, "grad_norm": 0.6687605113206447, "learning_rate": 2.0762191351601928e-06, "loss": 0.0185, "step": 222425 }, { "epoch": 0.928098739057506, "grad_norm": 0.6615644273673689, "learning_rate": 2.0761957992776958e-06, "loss": 0.0338, "step": 222430 }, { "epoch": 0.9281196017724963, "grad_norm": 0.38927102661402185, "learning_rate": 2.0761724641820394e-06, "loss": 0.0216, "step": 222435 }, { "epoch": 0.9281404644874865, "grad_norm": 0.7201533369272057, "learning_rate": 2.0761491298731793e-06, "loss": 0.0322, "step": 222440 }, { "epoch": 0.9281613272024768, "grad_norm": 0.46420627797201597, "learning_rate": 2.0761257963510713e-06, "loss": 0.0146, "step": 222445 }, { "epoch": 0.9281821899174671, "grad_norm": 0.9077292792276697, "learning_rate": 2.0761024636156715e-06, "loss": 0.02, "step": 222450 }, { "epoch": 0.9282030526324574, "grad_norm": 0.8130741315096304, "learning_rate": 2.0760791316669352e-06, "loss": 0.0209, "step": 222455 }, { "epoch": 0.9282239153474476, "grad_norm": 0.4667940075324018, "learning_rate": 2.076055800504819e-06, "loss": 0.0242, "step": 222460 }, { "epoch": 0.9282447780624379, "grad_norm": 0.7684156642589708, "learning_rate": 2.076032470129278e-06, "loss": 0.0197, "step": 222465 }, { "epoch": 0.9282656407774282, "grad_norm": 0.8178601995142027, "learning_rate": 2.0760091405402675e-06, "loss": 0.0266, "step": 222470 }, { "epoch": 0.9282865034924185, "grad_norm": 0.43792630059599663, "learning_rate": 2.0759858117377445e-06, "loss": 0.0216, "step": 222475 }, { "epoch": 0.9283073662074087, "grad_norm": 0.5398482605223319, "learning_rate": 2.0759624837216645e-06, "loss": 0.0177, "step": 222480 }, { "epoch": 0.9283282289223991, "grad_norm": 0.7763359093216419, "learning_rate": 2.0759391564919826e-06, "loss": 0.0212, "step": 222485 }, { "epoch": 0.9283490916373893, "grad_norm": 0.7439996128834201, "learning_rate": 2.075915830048655e-06, "loss": 0.0191, "step": 222490 }, { "epoch": 0.9283699543523796, "grad_norm": 0.49800894805929696, "learning_rate": 2.075892504391638e-06, "loss": 0.0122, "step": 222495 }, { "epoch": 0.9283908170673699, "grad_norm": 0.546673507874803, "learning_rate": 2.075869179520887e-06, "loss": 0.0187, "step": 222500 }, { "epoch": 0.9284116797823602, "grad_norm": 0.6614698808463518, "learning_rate": 2.0758458554363584e-06, "loss": 0.0161, "step": 222505 }, { "epoch": 0.9284325424973504, "grad_norm": 0.9227667981649084, "learning_rate": 2.0758225321380067e-06, "loss": 0.0183, "step": 222510 }, { "epoch": 0.9284534052123408, "grad_norm": 0.4062877311802617, "learning_rate": 2.075799209625789e-06, "loss": 0.0145, "step": 222515 }, { "epoch": 0.928474267927331, "grad_norm": 0.47871250280870953, "learning_rate": 2.0757758878996604e-06, "loss": 0.0166, "step": 222520 }, { "epoch": 0.9284951306423213, "grad_norm": 0.3878924968203501, "learning_rate": 2.0757525669595776e-06, "loss": 0.0111, "step": 222525 }, { "epoch": 0.9285159933573115, "grad_norm": 0.6714139128240239, "learning_rate": 2.075729246805495e-06, "loss": 0.0264, "step": 222530 }, { "epoch": 0.9285368560723019, "grad_norm": 0.6715252202252265, "learning_rate": 2.0757059274373696e-06, "loss": 0.0231, "step": 222535 }, { "epoch": 0.9285577187872921, "grad_norm": 1.0627401248760049, "learning_rate": 2.075682608855157e-06, "loss": 0.0244, "step": 222540 }, { "epoch": 0.9285785815022823, "grad_norm": 0.3963259756889723, "learning_rate": 2.0756592910588134e-06, "loss": 0.0166, "step": 222545 }, { "epoch": 0.9285994442172727, "grad_norm": 0.8383152103832229, "learning_rate": 2.075635974048294e-06, "loss": 0.0253, "step": 222550 }, { "epoch": 0.9286203069322629, "grad_norm": 0.34511754465162103, "learning_rate": 2.0756126578235555e-06, "loss": 0.0238, "step": 222555 }, { "epoch": 0.9286411696472532, "grad_norm": 0.78921389851275, "learning_rate": 2.0755893423845523e-06, "loss": 0.0236, "step": 222560 }, { "epoch": 0.9286620323622435, "grad_norm": 1.2313787258633366, "learning_rate": 2.0755660277312416e-06, "loss": 0.0177, "step": 222565 }, { "epoch": 0.9286828950772338, "grad_norm": 0.7247119063162157, "learning_rate": 2.0755427138635786e-06, "loss": 0.012, "step": 222570 }, { "epoch": 0.928703757792224, "grad_norm": 0.5599656435999966, "learning_rate": 2.07551940078152e-06, "loss": 0.0189, "step": 222575 }, { "epoch": 0.9287246205072144, "grad_norm": 1.3370919902240233, "learning_rate": 2.075496088485021e-06, "loss": 0.0216, "step": 222580 }, { "epoch": 0.9287454832222046, "grad_norm": 0.6314658500782151, "learning_rate": 2.0754727769740373e-06, "loss": 0.0189, "step": 222585 }, { "epoch": 0.9287663459371949, "grad_norm": 0.5772240980223149, "learning_rate": 2.075449466248525e-06, "loss": 0.0141, "step": 222590 }, { "epoch": 0.9287872086521851, "grad_norm": 1.0084517183490658, "learning_rate": 2.0754261563084404e-06, "loss": 0.0214, "step": 222595 }, { "epoch": 0.9288080713671755, "grad_norm": 0.8911957846996963, "learning_rate": 2.075402847153739e-06, "loss": 0.0218, "step": 222600 }, { "epoch": 0.9288289340821657, "grad_norm": 0.5609967351292824, "learning_rate": 2.075379538784377e-06, "loss": 0.0185, "step": 222605 }, { "epoch": 0.928849796797156, "grad_norm": 0.4038544259093956, "learning_rate": 2.0753562312003095e-06, "loss": 0.0175, "step": 222610 }, { "epoch": 0.9288706595121463, "grad_norm": 0.5765386276823331, "learning_rate": 2.0753329244014935e-06, "loss": 0.0141, "step": 222615 }, { "epoch": 0.9288915222271366, "grad_norm": 0.7825256583491562, "learning_rate": 2.075309618387884e-06, "loss": 0.0221, "step": 222620 }, { "epoch": 0.9289123849421268, "grad_norm": 0.45199507287279256, "learning_rate": 2.0752863131594375e-06, "loss": 0.0248, "step": 222625 }, { "epoch": 0.9289332476571172, "grad_norm": 0.5813350889689392, "learning_rate": 2.0752630087161095e-06, "loss": 0.0216, "step": 222630 }, { "epoch": 0.9289541103721074, "grad_norm": 0.7544016453551721, "learning_rate": 2.075239705057857e-06, "loss": 0.0209, "step": 222635 }, { "epoch": 0.9289749730870976, "grad_norm": 0.9937243352764096, "learning_rate": 2.075216402184634e-06, "loss": 0.0191, "step": 222640 }, { "epoch": 0.9289958358020879, "grad_norm": 0.4392369541115293, "learning_rate": 2.0751931000963983e-06, "loss": 0.0188, "step": 222645 }, { "epoch": 0.9290166985170782, "grad_norm": 0.825151778163141, "learning_rate": 2.0751697987931045e-06, "loss": 0.0198, "step": 222650 }, { "epoch": 0.9290375612320685, "grad_norm": 0.9939572371629595, "learning_rate": 2.075146498274709e-06, "loss": 0.0236, "step": 222655 }, { "epoch": 0.9290584239470587, "grad_norm": 0.431341753088733, "learning_rate": 2.0751231985411684e-06, "loss": 0.0149, "step": 222660 }, { "epoch": 0.9290792866620491, "grad_norm": 0.3951185888614328, "learning_rate": 2.075099899592437e-06, "loss": 0.0147, "step": 222665 }, { "epoch": 0.9291001493770393, "grad_norm": 1.072506715793417, "learning_rate": 2.0750766014284724e-06, "loss": 0.0205, "step": 222670 }, { "epoch": 0.9291210120920296, "grad_norm": 0.4650745860316798, "learning_rate": 2.0750533040492303e-06, "loss": 0.0234, "step": 222675 }, { "epoch": 0.9291418748070199, "grad_norm": 0.6546690921599445, "learning_rate": 2.0750300074546654e-06, "loss": 0.0204, "step": 222680 }, { "epoch": 0.9291627375220102, "grad_norm": 0.6256452102202986, "learning_rate": 2.075006711644735e-06, "loss": 0.0323, "step": 222685 }, { "epoch": 0.9291836002370004, "grad_norm": 0.5669481626971083, "learning_rate": 2.0749834166193946e-06, "loss": 0.0208, "step": 222690 }, { "epoch": 0.9292044629519908, "grad_norm": 0.8213084565403528, "learning_rate": 2.0749601223785996e-06, "loss": 0.0149, "step": 222695 }, { "epoch": 0.929225325666981, "grad_norm": 1.8895393881698237, "learning_rate": 2.074936828922307e-06, "loss": 0.0199, "step": 222700 }, { "epoch": 0.9292461883819713, "grad_norm": 0.4571699504545607, "learning_rate": 2.074913536250472e-06, "loss": 0.024, "step": 222705 }, { "epoch": 0.9292670510969615, "grad_norm": 0.7145755073968181, "learning_rate": 2.0748902443630515e-06, "loss": 0.0275, "step": 222710 }, { "epoch": 0.9292879138119519, "grad_norm": 0.8465432703096465, "learning_rate": 2.07486695326e-06, "loss": 0.0204, "step": 222715 }, { "epoch": 0.9293087765269421, "grad_norm": 0.31735900118999494, "learning_rate": 2.0748436629412745e-06, "loss": 0.0166, "step": 222720 }, { "epoch": 0.9293296392419323, "grad_norm": 0.47241339935816246, "learning_rate": 2.074820373406831e-06, "loss": 0.0217, "step": 222725 }, { "epoch": 0.9293505019569227, "grad_norm": 0.6117555202564721, "learning_rate": 2.074797084656625e-06, "loss": 0.0179, "step": 222730 }, { "epoch": 0.929371364671913, "grad_norm": 0.2803433175796348, "learning_rate": 2.074773796690613e-06, "loss": 0.0195, "step": 222735 }, { "epoch": 0.9293922273869032, "grad_norm": 0.3218658591148913, "learning_rate": 2.0747505095087504e-06, "loss": 0.0233, "step": 222740 }, { "epoch": 0.9294130901018935, "grad_norm": 0.5640342025887044, "learning_rate": 2.0747272231109935e-06, "loss": 0.0225, "step": 222745 }, { "epoch": 0.9294339528168838, "grad_norm": 0.7457778238300269, "learning_rate": 2.0747039374972986e-06, "loss": 0.0239, "step": 222750 }, { "epoch": 0.929454815531874, "grad_norm": 0.7179331265531096, "learning_rate": 2.074680652667621e-06, "loss": 0.018, "step": 222755 }, { "epoch": 0.9294756782468643, "grad_norm": 0.44926945122918577, "learning_rate": 2.074657368621917e-06, "loss": 0.0171, "step": 222760 }, { "epoch": 0.9294965409618546, "grad_norm": 0.23929848787084737, "learning_rate": 2.0746340853601435e-06, "loss": 0.0114, "step": 222765 }, { "epoch": 0.9295174036768449, "grad_norm": 0.5836432778326508, "learning_rate": 2.0746108028822556e-06, "loss": 0.018, "step": 222770 }, { "epoch": 0.9295382663918351, "grad_norm": 0.6207831082448122, "learning_rate": 2.0745875211882094e-06, "loss": 0.0188, "step": 222775 }, { "epoch": 0.9295591291068255, "grad_norm": 0.5658067003225495, "learning_rate": 2.0745642402779605e-06, "loss": 0.0252, "step": 222780 }, { "epoch": 0.9295799918218157, "grad_norm": 1.1798997721152031, "learning_rate": 2.0745409601514657e-06, "loss": 0.0242, "step": 222785 }, { "epoch": 0.929600854536806, "grad_norm": 0.8404628919539672, "learning_rate": 2.074517680808681e-06, "loss": 0.0222, "step": 222790 }, { "epoch": 0.9296217172517963, "grad_norm": 0.6382877068466876, "learning_rate": 2.0744944022495618e-06, "loss": 0.0143, "step": 222795 }, { "epoch": 0.9296425799667866, "grad_norm": 0.4379240793523545, "learning_rate": 2.074471124474065e-06, "loss": 0.0268, "step": 222800 }, { "epoch": 0.9296634426817768, "grad_norm": 0.8010419378927719, "learning_rate": 2.074447847482145e-06, "loss": 0.0212, "step": 222805 }, { "epoch": 0.9296843053967672, "grad_norm": 0.3309607526313341, "learning_rate": 2.0744245712737596e-06, "loss": 0.0193, "step": 222810 }, { "epoch": 0.9297051681117574, "grad_norm": 0.6824786380024525, "learning_rate": 2.074401295848865e-06, "loss": 0.0216, "step": 222815 }, { "epoch": 0.9297260308267477, "grad_norm": 0.48832878457406953, "learning_rate": 2.0743780212074153e-06, "loss": 0.0199, "step": 222820 }, { "epoch": 0.9297468935417379, "grad_norm": 0.5661141836144387, "learning_rate": 2.074354747349368e-06, "loss": 0.0229, "step": 222825 }, { "epoch": 0.9297677562567283, "grad_norm": 1.0907017554057625, "learning_rate": 2.074331474274679e-06, "loss": 0.0235, "step": 222830 }, { "epoch": 0.9297886189717185, "grad_norm": 0.6104647996412087, "learning_rate": 2.0743082019833042e-06, "loss": 0.0194, "step": 222835 }, { "epoch": 0.9298094816867087, "grad_norm": 0.36814765529020366, "learning_rate": 2.0742849304751994e-06, "loss": 0.0171, "step": 222840 }, { "epoch": 0.9298303444016991, "grad_norm": 0.4366394478551604, "learning_rate": 2.0742616597503214e-06, "loss": 0.0127, "step": 222845 }, { "epoch": 0.9298512071166893, "grad_norm": 0.6185096775161195, "learning_rate": 2.0742383898086257e-06, "loss": 0.0168, "step": 222850 }, { "epoch": 0.9298720698316796, "grad_norm": 0.5461325757660621, "learning_rate": 2.0742151206500683e-06, "loss": 0.0199, "step": 222855 }, { "epoch": 0.9298929325466699, "grad_norm": 1.0043261979303235, "learning_rate": 2.0741918522746056e-06, "loss": 0.0209, "step": 222860 }, { "epoch": 0.9299137952616602, "grad_norm": 0.7577109357970125, "learning_rate": 2.074168584682194e-06, "loss": 0.0199, "step": 222865 }, { "epoch": 0.9299346579766504, "grad_norm": 0.6180058893365635, "learning_rate": 2.074145317872788e-06, "loss": 0.0208, "step": 222870 }, { "epoch": 0.9299555206916408, "grad_norm": 0.6584123908813567, "learning_rate": 2.0741220518463454e-06, "loss": 0.0205, "step": 222875 }, { "epoch": 0.929976383406631, "grad_norm": 0.512993227614063, "learning_rate": 2.0740987866028217e-06, "loss": 0.0222, "step": 222880 }, { "epoch": 0.9299972461216213, "grad_norm": 1.3861781298669338, "learning_rate": 2.074075522142173e-06, "loss": 0.0235, "step": 222885 }, { "epoch": 0.9300181088366115, "grad_norm": 0.3764705607696176, "learning_rate": 2.074052258464355e-06, "loss": 0.0184, "step": 222890 }, { "epoch": 0.9300389715516019, "grad_norm": 0.6117653911919146, "learning_rate": 2.0740289955693245e-06, "loss": 0.0274, "step": 222895 }, { "epoch": 0.9300598342665921, "grad_norm": 1.00099730120908, "learning_rate": 2.0740057334570373e-06, "loss": 0.0209, "step": 222900 }, { "epoch": 0.9300806969815824, "grad_norm": 0.6356294362277077, "learning_rate": 2.0739824721274496e-06, "loss": 0.021, "step": 222905 }, { "epoch": 0.9301015596965727, "grad_norm": 0.6192791943285328, "learning_rate": 2.073959211580517e-06, "loss": 0.0226, "step": 222910 }, { "epoch": 0.930122422411563, "grad_norm": 0.439657537747489, "learning_rate": 2.073935951816196e-06, "loss": 0.0144, "step": 222915 }, { "epoch": 0.9301432851265532, "grad_norm": 0.2558412526444773, "learning_rate": 2.0739126928344427e-06, "loss": 0.0186, "step": 222920 }, { "epoch": 0.9301641478415436, "grad_norm": 1.0388184540930352, "learning_rate": 2.0738894346352135e-06, "loss": 0.0205, "step": 222925 }, { "epoch": 0.9301850105565338, "grad_norm": 0.7932171682423691, "learning_rate": 2.0738661772184644e-06, "loss": 0.0138, "step": 222930 }, { "epoch": 0.930205873271524, "grad_norm": 0.488583258932499, "learning_rate": 2.0738429205841506e-06, "loss": 0.0228, "step": 222935 }, { "epoch": 0.9302267359865143, "grad_norm": 0.5964329493212747, "learning_rate": 2.0738196647322296e-06, "loss": 0.0257, "step": 222940 }, { "epoch": 0.9302475987015046, "grad_norm": 0.6119664965810238, "learning_rate": 2.073796409662657e-06, "loss": 0.0191, "step": 222945 }, { "epoch": 0.9302684614164949, "grad_norm": 0.4596814401599491, "learning_rate": 2.0737731553753886e-06, "loss": 0.0207, "step": 222950 }, { "epoch": 0.9302893241314851, "grad_norm": 0.34517355710433545, "learning_rate": 2.0737499018703808e-06, "loss": 0.0158, "step": 222955 }, { "epoch": 0.9303101868464755, "grad_norm": 1.7219318993801376, "learning_rate": 2.0737266491475897e-06, "loss": 0.0242, "step": 222960 }, { "epoch": 0.9303310495614657, "grad_norm": 1.1392505081353892, "learning_rate": 2.073703397206972e-06, "loss": 0.0182, "step": 222965 }, { "epoch": 0.930351912276456, "grad_norm": 0.49322563842519024, "learning_rate": 2.0736801460484832e-06, "loss": 0.0226, "step": 222970 }, { "epoch": 0.9303727749914463, "grad_norm": 0.4761066293805351, "learning_rate": 2.0736568956720793e-06, "loss": 0.0208, "step": 222975 }, { "epoch": 0.9303936377064366, "grad_norm": 0.35497712063965775, "learning_rate": 2.0736336460777168e-06, "loss": 0.0175, "step": 222980 }, { "epoch": 0.9304145004214268, "grad_norm": 0.8543833091496812, "learning_rate": 2.073610397265352e-06, "loss": 0.0226, "step": 222985 }, { "epoch": 0.9304353631364172, "grad_norm": 0.751294100667739, "learning_rate": 2.073587149234941e-06, "loss": 0.0186, "step": 222990 }, { "epoch": 0.9304562258514074, "grad_norm": 0.6125308301253772, "learning_rate": 2.073563901986439e-06, "loss": 0.0166, "step": 222995 }, { "epoch": 0.9304770885663977, "grad_norm": 0.41063208134551393, "learning_rate": 2.073540655519804e-06, "loss": 0.0209, "step": 223000 }, { "epoch": 0.9304979512813879, "grad_norm": 0.577833832106558, "learning_rate": 2.073517409834991e-06, "loss": 0.0201, "step": 223005 }, { "epoch": 0.9305188139963783, "grad_norm": 0.44182742662660973, "learning_rate": 2.073494164931956e-06, "loss": 0.0214, "step": 223010 }, { "epoch": 0.9305396767113685, "grad_norm": 0.3579192277222293, "learning_rate": 2.0734709208106555e-06, "loss": 0.0175, "step": 223015 }, { "epoch": 0.9305605394263587, "grad_norm": 0.5755183519084757, "learning_rate": 2.0734476774710464e-06, "loss": 0.0168, "step": 223020 }, { "epoch": 0.9305814021413491, "grad_norm": 0.8465069832898283, "learning_rate": 2.073424434913084e-06, "loss": 0.0192, "step": 223025 }, { "epoch": 0.9306022648563393, "grad_norm": 0.5553856125650593, "learning_rate": 2.0734011931367246e-06, "loss": 0.0263, "step": 223030 }, { "epoch": 0.9306231275713296, "grad_norm": 0.8134433767590754, "learning_rate": 2.0733779521419246e-06, "loss": 0.0185, "step": 223035 }, { "epoch": 0.9306439902863199, "grad_norm": 0.42582746335753563, "learning_rate": 2.07335471192864e-06, "loss": 0.0164, "step": 223040 }, { "epoch": 0.9306648530013102, "grad_norm": 0.8497267124923392, "learning_rate": 2.073331472496827e-06, "loss": 0.0209, "step": 223045 }, { "epoch": 0.9306857157163004, "grad_norm": 1.0448637485861731, "learning_rate": 2.073308233846442e-06, "loss": 0.0269, "step": 223050 }, { "epoch": 0.9307065784312908, "grad_norm": 1.1386951161773253, "learning_rate": 2.0732849959774413e-06, "loss": 0.0208, "step": 223055 }, { "epoch": 0.930727441146281, "grad_norm": 0.3575368345115377, "learning_rate": 2.0732617588897807e-06, "loss": 0.022, "step": 223060 }, { "epoch": 0.9307483038612713, "grad_norm": 0.3499542210270789, "learning_rate": 2.0732385225834165e-06, "loss": 0.0252, "step": 223065 }, { "epoch": 0.9307691665762615, "grad_norm": 0.2867980619573034, "learning_rate": 2.0732152870583055e-06, "loss": 0.0251, "step": 223070 }, { "epoch": 0.9307900292912519, "grad_norm": 0.5223704777230977, "learning_rate": 2.0731920523144033e-06, "loss": 0.0126, "step": 223075 }, { "epoch": 0.9308108920062421, "grad_norm": 0.846062591890079, "learning_rate": 2.073168818351666e-06, "loss": 0.0204, "step": 223080 }, { "epoch": 0.9308317547212324, "grad_norm": 0.3726677611089361, "learning_rate": 2.073145585170051e-06, "loss": 0.0186, "step": 223085 }, { "epoch": 0.9308526174362227, "grad_norm": 0.264119536389484, "learning_rate": 2.0731223527695132e-06, "loss": 0.0193, "step": 223090 }, { "epoch": 0.930873480151213, "grad_norm": 0.47567362318689455, "learning_rate": 2.0730991211500093e-06, "loss": 0.022, "step": 223095 }, { "epoch": 0.9308943428662032, "grad_norm": 0.9285268604715218, "learning_rate": 2.073075890311495e-06, "loss": 0.0243, "step": 223100 }, { "epoch": 0.9309152055811936, "grad_norm": 0.86975180055079, "learning_rate": 2.0730526602539277e-06, "loss": 0.0182, "step": 223105 }, { "epoch": 0.9309360682961838, "grad_norm": 0.9625887682532278, "learning_rate": 2.073029430977263e-06, "loss": 0.0261, "step": 223110 }, { "epoch": 0.930956931011174, "grad_norm": 0.2226470124119577, "learning_rate": 2.073006202481457e-06, "loss": 0.0179, "step": 223115 }, { "epoch": 0.9309777937261643, "grad_norm": 0.8972227585918393, "learning_rate": 2.0729829747664663e-06, "loss": 0.017, "step": 223120 }, { "epoch": 0.9309986564411546, "grad_norm": 0.6823251201915076, "learning_rate": 2.0729597478322472e-06, "loss": 0.0201, "step": 223125 }, { "epoch": 0.9310195191561449, "grad_norm": 0.6128114564344159, "learning_rate": 2.0729365216787558e-06, "loss": 0.0173, "step": 223130 }, { "epoch": 0.9310403818711351, "grad_norm": 1.0052807126698884, "learning_rate": 2.072913296305948e-06, "loss": 0.0202, "step": 223135 }, { "epoch": 0.9310612445861255, "grad_norm": 0.7801224365931457, "learning_rate": 2.0728900717137804e-06, "loss": 0.0279, "step": 223140 }, { "epoch": 0.9310821073011157, "grad_norm": 0.5854294917355732, "learning_rate": 2.0728668479022093e-06, "loss": 0.0202, "step": 223145 }, { "epoch": 0.931102970016106, "grad_norm": 0.409188263913081, "learning_rate": 2.072843624871191e-06, "loss": 0.0299, "step": 223150 }, { "epoch": 0.9311238327310963, "grad_norm": 0.4120881767400018, "learning_rate": 2.072820402620682e-06, "loss": 0.0213, "step": 223155 }, { "epoch": 0.9311446954460866, "grad_norm": 0.5827801052892996, "learning_rate": 2.0727971811506377e-06, "loss": 0.0157, "step": 223160 }, { "epoch": 0.9311655581610768, "grad_norm": 0.28081059428317146, "learning_rate": 2.072773960461015e-06, "loss": 0.0153, "step": 223165 }, { "epoch": 0.9311864208760672, "grad_norm": 0.4738655388567098, "learning_rate": 2.0727507405517707e-06, "loss": 0.02, "step": 223170 }, { "epoch": 0.9312072835910574, "grad_norm": 0.7074044466746434, "learning_rate": 2.0727275214228603e-06, "loss": 0.0191, "step": 223175 }, { "epoch": 0.9312281463060477, "grad_norm": 0.4596917682977393, "learning_rate": 2.0727043030742404e-06, "loss": 0.0157, "step": 223180 }, { "epoch": 0.9312490090210379, "grad_norm": 0.6161848196024381, "learning_rate": 2.072681085505867e-06, "loss": 0.0227, "step": 223185 }, { "epoch": 0.9312698717360283, "grad_norm": 0.22717489338933947, "learning_rate": 2.072657868717697e-06, "loss": 0.014, "step": 223190 }, { "epoch": 0.9312907344510185, "grad_norm": 0.521502329115149, "learning_rate": 2.0726346527096863e-06, "loss": 0.0236, "step": 223195 }, { "epoch": 0.9313115971660088, "grad_norm": 0.40687275076927176, "learning_rate": 2.072611437481791e-06, "loss": 0.015, "step": 223200 }, { "epoch": 0.9313324598809991, "grad_norm": 0.8974266488804402, "learning_rate": 2.072588223033968e-06, "loss": 0.0175, "step": 223205 }, { "epoch": 0.9313533225959894, "grad_norm": 0.8705525801818425, "learning_rate": 2.0725650093661726e-06, "loss": 0.016, "step": 223210 }, { "epoch": 0.9313741853109796, "grad_norm": 0.7648438450426658, "learning_rate": 2.0725417964783628e-06, "loss": 0.025, "step": 223215 }, { "epoch": 0.93139504802597, "grad_norm": 0.4815401960229846, "learning_rate": 2.0725185843704928e-06, "loss": 0.0129, "step": 223220 }, { "epoch": 0.9314159107409602, "grad_norm": 0.8782701781077105, "learning_rate": 2.072495373042521e-06, "loss": 0.0188, "step": 223225 }, { "epoch": 0.9314367734559504, "grad_norm": 0.661825088555367, "learning_rate": 2.0724721624944026e-06, "loss": 0.0183, "step": 223230 }, { "epoch": 0.9314576361709408, "grad_norm": 0.46469172474092857, "learning_rate": 2.072448952726094e-06, "loss": 0.017, "step": 223235 }, { "epoch": 0.931478498885931, "grad_norm": 0.7766580504284833, "learning_rate": 2.0724257437375516e-06, "loss": 0.0171, "step": 223240 }, { "epoch": 0.9314993616009213, "grad_norm": 0.8945327282849114, "learning_rate": 2.0724025355287318e-06, "loss": 0.0251, "step": 223245 }, { "epoch": 0.9315202243159115, "grad_norm": 1.01562668558595, "learning_rate": 2.072379328099591e-06, "loss": 0.0182, "step": 223250 }, { "epoch": 0.9315410870309019, "grad_norm": 0.6249536990477711, "learning_rate": 2.072356121450085e-06, "loss": 0.017, "step": 223255 }, { "epoch": 0.9315619497458921, "grad_norm": 0.8016653198716226, "learning_rate": 2.072332915580171e-06, "loss": 0.0261, "step": 223260 }, { "epoch": 0.9315828124608824, "grad_norm": 0.7617747870033168, "learning_rate": 2.0723097104898056e-06, "loss": 0.0237, "step": 223265 }, { "epoch": 0.9316036751758727, "grad_norm": 0.41767903102022297, "learning_rate": 2.0722865061789433e-06, "loss": 0.0182, "step": 223270 }, { "epoch": 0.931624537890863, "grad_norm": 0.6253722572052457, "learning_rate": 2.0722633026475424e-06, "loss": 0.0202, "step": 223275 }, { "epoch": 0.9316454006058532, "grad_norm": 0.6370500886415538, "learning_rate": 2.0722400998955587e-06, "loss": 0.017, "step": 223280 }, { "epoch": 0.9316662633208436, "grad_norm": 1.1726099304219695, "learning_rate": 2.072216897922948e-06, "loss": 0.0255, "step": 223285 }, { "epoch": 0.9316871260358338, "grad_norm": 0.998742796483975, "learning_rate": 2.072193696729667e-06, "loss": 0.0201, "step": 223290 }, { "epoch": 0.9317079887508241, "grad_norm": 0.36776801551916394, "learning_rate": 2.0721704963156726e-06, "loss": 0.0191, "step": 223295 }, { "epoch": 0.9317288514658143, "grad_norm": 0.6829871357024747, "learning_rate": 2.072147296680921e-06, "loss": 0.0178, "step": 223300 }, { "epoch": 0.9317497141808047, "grad_norm": 0.8024948149840218, "learning_rate": 2.072124097825367e-06, "loss": 0.0287, "step": 223305 }, { "epoch": 0.9317705768957949, "grad_norm": 1.1199084277141171, "learning_rate": 2.07210089974897e-06, "loss": 0.0247, "step": 223310 }, { "epoch": 0.9317914396107851, "grad_norm": 0.7125440176838469, "learning_rate": 2.0720777024516835e-06, "loss": 0.0274, "step": 223315 }, { "epoch": 0.9318123023257755, "grad_norm": 0.28915655084846387, "learning_rate": 2.0720545059334655e-06, "loss": 0.0182, "step": 223320 }, { "epoch": 0.9318331650407657, "grad_norm": 0.4020517200093064, "learning_rate": 2.072031310194272e-06, "loss": 0.0179, "step": 223325 }, { "epoch": 0.931854027755756, "grad_norm": 1.0164677565851283, "learning_rate": 2.0720081152340595e-06, "loss": 0.0225, "step": 223330 }, { "epoch": 0.9318748904707463, "grad_norm": 0.9369765460546601, "learning_rate": 2.0719849210527838e-06, "loss": 0.0202, "step": 223335 }, { "epoch": 0.9318957531857366, "grad_norm": 0.47347230509111465, "learning_rate": 2.071961727650402e-06, "loss": 0.0318, "step": 223340 }, { "epoch": 0.9319166159007268, "grad_norm": 0.6649963343604095, "learning_rate": 2.07193853502687e-06, "loss": 0.0223, "step": 223345 }, { "epoch": 0.9319374786157172, "grad_norm": 0.956076175274736, "learning_rate": 2.071915343182145e-06, "loss": 0.0187, "step": 223350 }, { "epoch": 0.9319583413307074, "grad_norm": 0.3653752925500795, "learning_rate": 2.0718921521161828e-06, "loss": 0.0121, "step": 223355 }, { "epoch": 0.9319792040456977, "grad_norm": 0.8860588720137429, "learning_rate": 2.07186896182894e-06, "loss": 0.0227, "step": 223360 }, { "epoch": 0.9320000667606879, "grad_norm": 0.4957518001815494, "learning_rate": 2.0718457723203727e-06, "loss": 0.018, "step": 223365 }, { "epoch": 0.9320209294756783, "grad_norm": 0.5516542854022556, "learning_rate": 2.0718225835904375e-06, "loss": 0.016, "step": 223370 }, { "epoch": 0.9320417921906685, "grad_norm": 0.7781194363114389, "learning_rate": 2.071799395639091e-06, "loss": 0.0173, "step": 223375 }, { "epoch": 0.9320626549056588, "grad_norm": 0.6545799362730165, "learning_rate": 2.0717762084662894e-06, "loss": 0.0175, "step": 223380 }, { "epoch": 0.9320835176206491, "grad_norm": 0.3237649701759917, "learning_rate": 2.0717530220719896e-06, "loss": 0.0215, "step": 223385 }, { "epoch": 0.9321043803356394, "grad_norm": 1.2692301728098838, "learning_rate": 2.071729836456147e-06, "loss": 0.03, "step": 223390 }, { "epoch": 0.9321252430506296, "grad_norm": 0.555292801091294, "learning_rate": 2.071706651618719e-06, "loss": 0.0141, "step": 223395 }, { "epoch": 0.93214610576562, "grad_norm": 0.3761569304063815, "learning_rate": 2.0716834675596625e-06, "loss": 0.0225, "step": 223400 }, { "epoch": 0.9321669684806102, "grad_norm": 0.5461746458806439, "learning_rate": 2.0716602842789325e-06, "loss": 0.0125, "step": 223405 }, { "epoch": 0.9321878311956004, "grad_norm": 0.9027909712366899, "learning_rate": 2.0716371017764865e-06, "loss": 0.0233, "step": 223410 }, { "epoch": 0.9322086939105908, "grad_norm": 0.9249210608841874, "learning_rate": 2.0716139200522804e-06, "loss": 0.0166, "step": 223415 }, { "epoch": 0.932229556625581, "grad_norm": 0.842313144394906, "learning_rate": 2.071590739106271e-06, "loss": 0.0147, "step": 223420 }, { "epoch": 0.9322504193405713, "grad_norm": 0.629607052323518, "learning_rate": 2.0715675589384143e-06, "loss": 0.0225, "step": 223425 }, { "epoch": 0.9322712820555615, "grad_norm": 0.46621961838522574, "learning_rate": 2.0715443795486683e-06, "loss": 0.0186, "step": 223430 }, { "epoch": 0.9322921447705519, "grad_norm": 0.555287192376902, "learning_rate": 2.071521200936987e-06, "loss": 0.0208, "step": 223435 }, { "epoch": 0.9323130074855421, "grad_norm": 0.6414538185020816, "learning_rate": 2.0714980231033284e-06, "loss": 0.0263, "step": 223440 }, { "epoch": 0.9323338702005324, "grad_norm": 0.6017487738693503, "learning_rate": 2.071474846047649e-06, "loss": 0.0208, "step": 223445 }, { "epoch": 0.9323547329155227, "grad_norm": 0.43163201702335274, "learning_rate": 2.071451669769905e-06, "loss": 0.0165, "step": 223450 }, { "epoch": 0.932375595630513, "grad_norm": 0.4842948833962187, "learning_rate": 2.071428494270053e-06, "loss": 0.0213, "step": 223455 }, { "epoch": 0.9323964583455032, "grad_norm": 0.6083383100525449, "learning_rate": 2.0714053195480495e-06, "loss": 0.0208, "step": 223460 }, { "epoch": 0.9324173210604936, "grad_norm": 0.3379379040842444, "learning_rate": 2.0713821456038503e-06, "loss": 0.0198, "step": 223465 }, { "epoch": 0.9324381837754838, "grad_norm": 0.7201857932210176, "learning_rate": 2.0713589724374125e-06, "loss": 0.023, "step": 223470 }, { "epoch": 0.9324590464904741, "grad_norm": 0.7006967428791699, "learning_rate": 2.071335800048693e-06, "loss": 0.0232, "step": 223475 }, { "epoch": 0.9324799092054643, "grad_norm": 0.4156879621208149, "learning_rate": 2.071312628437648e-06, "loss": 0.014, "step": 223480 }, { "epoch": 0.9325007719204547, "grad_norm": 3.0648794947177036, "learning_rate": 2.0712894576042332e-06, "loss": 0.028, "step": 223485 }, { "epoch": 0.9325216346354449, "grad_norm": 0.48221311076974727, "learning_rate": 2.071266287548406e-06, "loss": 0.0239, "step": 223490 }, { "epoch": 0.9325424973504352, "grad_norm": 0.28291818507830624, "learning_rate": 2.0712431182701227e-06, "loss": 0.0145, "step": 223495 }, { "epoch": 0.9325633600654255, "grad_norm": 0.6288084281069861, "learning_rate": 2.07121994976934e-06, "loss": 0.0219, "step": 223500 }, { "epoch": 0.9325842227804158, "grad_norm": 0.6847733755390444, "learning_rate": 2.071196782046014e-06, "loss": 0.0223, "step": 223505 }, { "epoch": 0.932605085495406, "grad_norm": 0.5985666577030452, "learning_rate": 2.071173615100102e-06, "loss": 0.0183, "step": 223510 }, { "epoch": 0.9326259482103963, "grad_norm": 0.6174684661705838, "learning_rate": 2.071150448931559e-06, "loss": 0.0138, "step": 223515 }, { "epoch": 0.9326468109253866, "grad_norm": 0.6439231089687436, "learning_rate": 2.071127283540343e-06, "loss": 0.0235, "step": 223520 }, { "epoch": 0.9326676736403768, "grad_norm": 0.28723533900485104, "learning_rate": 2.0711041189264095e-06, "loss": 0.0165, "step": 223525 }, { "epoch": 0.9326885363553672, "grad_norm": 0.7268981692537433, "learning_rate": 2.0710809550897165e-06, "loss": 0.0189, "step": 223530 }, { "epoch": 0.9327093990703574, "grad_norm": 0.9568835139369442, "learning_rate": 2.071057792030219e-06, "loss": 0.0195, "step": 223535 }, { "epoch": 0.9327302617853477, "grad_norm": 0.574913325465291, "learning_rate": 2.071034629747874e-06, "loss": 0.0181, "step": 223540 }, { "epoch": 0.9327511245003379, "grad_norm": 0.8796457994069177, "learning_rate": 2.0710114682426384e-06, "loss": 0.0296, "step": 223545 }, { "epoch": 0.9327719872153283, "grad_norm": 1.0178367860495519, "learning_rate": 2.070988307514468e-06, "loss": 0.0248, "step": 223550 }, { "epoch": 0.9327928499303185, "grad_norm": 0.7597699433973025, "learning_rate": 2.0709651475633206e-06, "loss": 0.0174, "step": 223555 }, { "epoch": 0.9328137126453088, "grad_norm": 1.0373898658016223, "learning_rate": 2.0709419883891517e-06, "loss": 0.0207, "step": 223560 }, { "epoch": 0.9328345753602991, "grad_norm": 1.0139815032828767, "learning_rate": 2.070918829991918e-06, "loss": 0.0206, "step": 223565 }, { "epoch": 0.9328554380752894, "grad_norm": 0.7278531457401841, "learning_rate": 2.0708956723715763e-06, "loss": 0.0213, "step": 223570 }, { "epoch": 0.9328763007902796, "grad_norm": 0.3153824648397317, "learning_rate": 2.070872515528083e-06, "loss": 0.0298, "step": 223575 }, { "epoch": 0.93289716350527, "grad_norm": 0.6015606493785074, "learning_rate": 2.0708493594613947e-06, "loss": 0.0133, "step": 223580 }, { "epoch": 0.9329180262202602, "grad_norm": 0.6954755911759456, "learning_rate": 2.070826204171468e-06, "loss": 0.0228, "step": 223585 }, { "epoch": 0.9329388889352505, "grad_norm": 0.8590985359090635, "learning_rate": 2.07080304965826e-06, "loss": 0.0202, "step": 223590 }, { "epoch": 0.9329597516502408, "grad_norm": 0.37530311051931153, "learning_rate": 2.0707798959217266e-06, "loss": 0.0165, "step": 223595 }, { "epoch": 0.932980614365231, "grad_norm": 0.9122045640495975, "learning_rate": 2.0707567429618247e-06, "loss": 0.0283, "step": 223600 }, { "epoch": 0.9330014770802213, "grad_norm": 0.4393661425827441, "learning_rate": 2.0707335907785105e-06, "loss": 0.0206, "step": 223605 }, { "epoch": 0.9330223397952115, "grad_norm": 0.5796158947664118, "learning_rate": 2.0707104393717405e-06, "loss": 0.0208, "step": 223610 }, { "epoch": 0.9330432025102019, "grad_norm": 1.107660151717189, "learning_rate": 2.070687288741472e-06, "loss": 0.026, "step": 223615 }, { "epoch": 0.9330640652251921, "grad_norm": 0.7032862297213671, "learning_rate": 2.070664138887661e-06, "loss": 0.0219, "step": 223620 }, { "epoch": 0.9330849279401824, "grad_norm": 0.9502332993958761, "learning_rate": 2.0706409898102646e-06, "loss": 0.0245, "step": 223625 }, { "epoch": 0.9331057906551727, "grad_norm": 1.0622537621513288, "learning_rate": 2.0706178415092387e-06, "loss": 0.0214, "step": 223630 }, { "epoch": 0.933126653370163, "grad_norm": 0.634857519446335, "learning_rate": 2.070594693984541e-06, "loss": 0.0257, "step": 223635 }, { "epoch": 0.9331475160851532, "grad_norm": 0.6685056664692828, "learning_rate": 2.070571547236127e-06, "loss": 0.0194, "step": 223640 }, { "epoch": 0.9331683788001436, "grad_norm": 0.6106218841034607, "learning_rate": 2.0705484012639533e-06, "loss": 0.0182, "step": 223645 }, { "epoch": 0.9331892415151338, "grad_norm": 0.723829415894004, "learning_rate": 2.070525256067978e-06, "loss": 0.0229, "step": 223650 }, { "epoch": 0.9332101042301241, "grad_norm": 0.2783195562023857, "learning_rate": 2.0705021116481556e-06, "loss": 0.0173, "step": 223655 }, { "epoch": 0.9332309669451143, "grad_norm": 0.5912463854636888, "learning_rate": 2.0704789680044442e-06, "loss": 0.0114, "step": 223660 }, { "epoch": 0.9332518296601047, "grad_norm": 0.40165541201866517, "learning_rate": 2.0704558251368e-06, "loss": 0.0224, "step": 223665 }, { "epoch": 0.9332726923750949, "grad_norm": 0.7460830494837488, "learning_rate": 2.07043268304518e-06, "loss": 0.0214, "step": 223670 }, { "epoch": 0.9332935550900852, "grad_norm": 1.1706696383844237, "learning_rate": 2.07040954172954e-06, "loss": 0.0178, "step": 223675 }, { "epoch": 0.9333144178050755, "grad_norm": 0.8612524156819205, "learning_rate": 2.0703864011898373e-06, "loss": 0.0164, "step": 223680 }, { "epoch": 0.9333352805200658, "grad_norm": 0.5425669270199963, "learning_rate": 2.070363261426028e-06, "loss": 0.0188, "step": 223685 }, { "epoch": 0.933356143235056, "grad_norm": 1.5841666519403617, "learning_rate": 2.07034012243807e-06, "loss": 0.0242, "step": 223690 }, { "epoch": 0.9333770059500464, "grad_norm": 0.8076978201410763, "learning_rate": 2.070316984225918e-06, "loss": 0.0149, "step": 223695 }, { "epoch": 0.9333978686650366, "grad_norm": 1.1014208796152802, "learning_rate": 2.0702938467895307e-06, "loss": 0.0191, "step": 223700 }, { "epoch": 0.9334187313800268, "grad_norm": 0.8607165855364758, "learning_rate": 2.070270710128863e-06, "loss": 0.0187, "step": 223705 }, { "epoch": 0.9334395940950172, "grad_norm": 0.6340949763888215, "learning_rate": 2.0702475742438723e-06, "loss": 0.0174, "step": 223710 }, { "epoch": 0.9334604568100074, "grad_norm": 0.40393752728360455, "learning_rate": 2.0702244391345155e-06, "loss": 0.0275, "step": 223715 }, { "epoch": 0.9334813195249977, "grad_norm": 0.4932265755557939, "learning_rate": 2.070201304800749e-06, "loss": 0.0331, "step": 223720 }, { "epoch": 0.9335021822399879, "grad_norm": 0.9161044232424386, "learning_rate": 2.07017817124253e-06, "loss": 0.0169, "step": 223725 }, { "epoch": 0.9335230449549783, "grad_norm": 1.1404008688364236, "learning_rate": 2.0701550384598134e-06, "loss": 0.0195, "step": 223730 }, { "epoch": 0.9335439076699685, "grad_norm": 0.6428564823286533, "learning_rate": 2.070131906452558e-06, "loss": 0.0167, "step": 223735 }, { "epoch": 0.9335647703849588, "grad_norm": 0.5498945367662261, "learning_rate": 2.0701087752207195e-06, "loss": 0.0246, "step": 223740 }, { "epoch": 0.9335856330999491, "grad_norm": 0.7221238815583542, "learning_rate": 2.0700856447642544e-06, "loss": 0.0252, "step": 223745 }, { "epoch": 0.9336064958149394, "grad_norm": 0.7804980939275183, "learning_rate": 2.07006251508312e-06, "loss": 0.0226, "step": 223750 }, { "epoch": 0.9336273585299296, "grad_norm": 1.1340395885549353, "learning_rate": 2.0700393861772726e-06, "loss": 0.0244, "step": 223755 }, { "epoch": 0.93364822124492, "grad_norm": 0.5028170911331661, "learning_rate": 2.070016258046669e-06, "loss": 0.0138, "step": 223760 }, { "epoch": 0.9336690839599102, "grad_norm": 0.5695997089072886, "learning_rate": 2.0699931306912656e-06, "loss": 0.0143, "step": 223765 }, { "epoch": 0.9336899466749005, "grad_norm": 0.744292712822985, "learning_rate": 2.0699700041110195e-06, "loss": 0.018, "step": 223770 }, { "epoch": 0.9337108093898908, "grad_norm": 0.9930314585154345, "learning_rate": 2.0699468783058868e-06, "loss": 0.0215, "step": 223775 }, { "epoch": 0.9337316721048811, "grad_norm": 1.174380447825034, "learning_rate": 2.069923753275825e-06, "loss": 0.0213, "step": 223780 }, { "epoch": 0.9337525348198713, "grad_norm": 1.652763220467681, "learning_rate": 2.0699006290207903e-06, "loss": 0.0191, "step": 223785 }, { "epoch": 0.9337733975348615, "grad_norm": 0.5140664916133434, "learning_rate": 2.06987750554074e-06, "loss": 0.0173, "step": 223790 }, { "epoch": 0.9337942602498519, "grad_norm": 0.3682054217633497, "learning_rate": 2.0698543828356295e-06, "loss": 0.0179, "step": 223795 }, { "epoch": 0.9338151229648421, "grad_norm": 0.29153029355886606, "learning_rate": 2.0698312609054168e-06, "loss": 0.0141, "step": 223800 }, { "epoch": 0.9338359856798324, "grad_norm": 0.48610223040462935, "learning_rate": 2.0698081397500583e-06, "loss": 0.0176, "step": 223805 }, { "epoch": 0.9338568483948227, "grad_norm": 0.9101169563094187, "learning_rate": 2.0697850193695105e-06, "loss": 0.0215, "step": 223810 }, { "epoch": 0.933877711109813, "grad_norm": 0.6835204632292989, "learning_rate": 2.0697618997637303e-06, "loss": 0.0178, "step": 223815 }, { "epoch": 0.9338985738248032, "grad_norm": 0.5167893909555592, "learning_rate": 2.0697387809326742e-06, "loss": 0.0206, "step": 223820 }, { "epoch": 0.9339194365397936, "grad_norm": 0.9201331145152871, "learning_rate": 2.0697156628762998e-06, "loss": 0.0187, "step": 223825 }, { "epoch": 0.9339402992547838, "grad_norm": 0.542179813659019, "learning_rate": 2.0696925455945623e-06, "loss": 0.0159, "step": 223830 }, { "epoch": 0.9339611619697741, "grad_norm": 0.867090861850462, "learning_rate": 2.0696694290874195e-06, "loss": 0.0228, "step": 223835 }, { "epoch": 0.9339820246847643, "grad_norm": 0.8860496287017947, "learning_rate": 2.0696463133548283e-06, "loss": 0.0294, "step": 223840 }, { "epoch": 0.9340028873997547, "grad_norm": 0.8414474781152054, "learning_rate": 2.0696231983967444e-06, "loss": 0.0249, "step": 223845 }, { "epoch": 0.9340237501147449, "grad_norm": 1.3274854286515376, "learning_rate": 2.0696000842131257e-06, "loss": 0.0227, "step": 223850 }, { "epoch": 0.9340446128297352, "grad_norm": 0.5471679956550495, "learning_rate": 2.069576970803929e-06, "loss": 0.0257, "step": 223855 }, { "epoch": 0.9340654755447255, "grad_norm": 0.4245333080429216, "learning_rate": 2.0695538581691095e-06, "loss": 0.0197, "step": 223860 }, { "epoch": 0.9340863382597158, "grad_norm": 0.5095653797154334, "learning_rate": 2.069530746308625e-06, "loss": 0.0185, "step": 223865 }, { "epoch": 0.934107200974706, "grad_norm": 0.56797853927249, "learning_rate": 2.069507635222433e-06, "loss": 0.0246, "step": 223870 }, { "epoch": 0.9341280636896964, "grad_norm": 0.754571696193508, "learning_rate": 2.0694845249104893e-06, "loss": 0.0194, "step": 223875 }, { "epoch": 0.9341489264046866, "grad_norm": 0.6965930937304703, "learning_rate": 2.0694614153727506e-06, "loss": 0.0279, "step": 223880 }, { "epoch": 0.9341697891196769, "grad_norm": 0.48110927105325685, "learning_rate": 2.0694383066091745e-06, "loss": 0.0186, "step": 223885 }, { "epoch": 0.9341906518346672, "grad_norm": 1.3475698925637953, "learning_rate": 2.0694151986197167e-06, "loss": 0.0286, "step": 223890 }, { "epoch": 0.9342115145496575, "grad_norm": 0.47620071679019504, "learning_rate": 2.069392091404335e-06, "loss": 0.0192, "step": 223895 }, { "epoch": 0.9342323772646477, "grad_norm": 0.6935807591088816, "learning_rate": 2.069368984962985e-06, "loss": 0.0194, "step": 223900 }, { "epoch": 0.9342532399796379, "grad_norm": 1.3804659715744914, "learning_rate": 2.0693458792956246e-06, "loss": 0.0299, "step": 223905 }, { "epoch": 0.9342741026946283, "grad_norm": 0.9278391459078196, "learning_rate": 2.0693227744022107e-06, "loss": 0.0249, "step": 223910 }, { "epoch": 0.9342949654096185, "grad_norm": 0.595434080018009, "learning_rate": 2.0692996702826986e-06, "loss": 0.0214, "step": 223915 }, { "epoch": 0.9343158281246088, "grad_norm": 0.9094121950625361, "learning_rate": 2.0692765669370464e-06, "loss": 0.0189, "step": 223920 }, { "epoch": 0.9343366908395991, "grad_norm": 0.5095579809741501, "learning_rate": 2.0692534643652114e-06, "loss": 0.0193, "step": 223925 }, { "epoch": 0.9343575535545894, "grad_norm": 0.643235194992619, "learning_rate": 2.069230362567149e-06, "loss": 0.0208, "step": 223930 }, { "epoch": 0.9343784162695796, "grad_norm": 0.5139810277525207, "learning_rate": 2.0692072615428164e-06, "loss": 0.0102, "step": 223935 }, { "epoch": 0.93439927898457, "grad_norm": 0.43746215750052225, "learning_rate": 2.0691841612921705e-06, "loss": 0.0222, "step": 223940 }, { "epoch": 0.9344201416995602, "grad_norm": 0.4477541506707545, "learning_rate": 2.0691610618151685e-06, "loss": 0.0171, "step": 223945 }, { "epoch": 0.9344410044145505, "grad_norm": 0.24014664623285756, "learning_rate": 2.0691379631117672e-06, "loss": 0.0196, "step": 223950 }, { "epoch": 0.9344618671295408, "grad_norm": 0.9345898833098972, "learning_rate": 2.069114865181923e-06, "loss": 0.0211, "step": 223955 }, { "epoch": 0.9344827298445311, "grad_norm": 0.5182945535422108, "learning_rate": 2.0690917680255927e-06, "loss": 0.0226, "step": 223960 }, { "epoch": 0.9345035925595213, "grad_norm": 1.0175597611878477, "learning_rate": 2.0690686716427335e-06, "loss": 0.0215, "step": 223965 }, { "epoch": 0.9345244552745116, "grad_norm": 0.7071309602153579, "learning_rate": 2.0690455760333017e-06, "loss": 0.0199, "step": 223970 }, { "epoch": 0.9345453179895019, "grad_norm": 0.8429384279048645, "learning_rate": 2.0690224811972546e-06, "loss": 0.0184, "step": 223975 }, { "epoch": 0.9345661807044922, "grad_norm": 0.5686225483988482, "learning_rate": 2.068999387134549e-06, "loss": 0.0244, "step": 223980 }, { "epoch": 0.9345870434194824, "grad_norm": 1.139736662044458, "learning_rate": 2.068976293845142e-06, "loss": 0.0241, "step": 223985 }, { "epoch": 0.9346079061344728, "grad_norm": 0.27763856457283104, "learning_rate": 2.0689532013289897e-06, "loss": 0.015, "step": 223990 }, { "epoch": 0.934628768849463, "grad_norm": 0.36739163469795316, "learning_rate": 2.068930109586049e-06, "loss": 0.0263, "step": 223995 }, { "epoch": 0.9346496315644532, "grad_norm": 0.4227116987933759, "learning_rate": 2.068907018616278e-06, "loss": 0.0145, "step": 224000 }, { "epoch": 0.9346704942794436, "grad_norm": 0.29073891690863085, "learning_rate": 2.068883928419632e-06, "loss": 0.016, "step": 224005 }, { "epoch": 0.9346913569944338, "grad_norm": 0.5171232592696737, "learning_rate": 2.068860838996069e-06, "loss": 0.0153, "step": 224010 }, { "epoch": 0.9347122197094241, "grad_norm": 0.43893737927274723, "learning_rate": 2.068837750345545e-06, "loss": 0.0206, "step": 224015 }, { "epoch": 0.9347330824244143, "grad_norm": 1.3332022983829281, "learning_rate": 2.068814662468017e-06, "loss": 0.0211, "step": 224020 }, { "epoch": 0.9347539451394047, "grad_norm": 0.3408526660237464, "learning_rate": 2.068791575363443e-06, "loss": 0.0266, "step": 224025 }, { "epoch": 0.9347748078543949, "grad_norm": 0.7798046361538287, "learning_rate": 2.0687684890317785e-06, "loss": 0.0195, "step": 224030 }, { "epoch": 0.9347956705693852, "grad_norm": 0.7645012518918732, "learning_rate": 2.0687454034729805e-06, "loss": 0.0178, "step": 224035 }, { "epoch": 0.9348165332843755, "grad_norm": 0.18137148387721458, "learning_rate": 2.0687223186870065e-06, "loss": 0.015, "step": 224040 }, { "epoch": 0.9348373959993658, "grad_norm": 0.505191473344078, "learning_rate": 2.068699234673813e-06, "loss": 0.028, "step": 224045 }, { "epoch": 0.934858258714356, "grad_norm": 0.5574713149202765, "learning_rate": 2.068676151433357e-06, "loss": 0.0183, "step": 224050 }, { "epoch": 0.9348791214293464, "grad_norm": 0.7385124443959262, "learning_rate": 2.0686530689655957e-06, "loss": 0.0224, "step": 224055 }, { "epoch": 0.9348999841443366, "grad_norm": 0.8974794479603433, "learning_rate": 2.0686299872704856e-06, "loss": 0.0202, "step": 224060 }, { "epoch": 0.9349208468593269, "grad_norm": 0.5313659493295793, "learning_rate": 2.0686069063479834e-06, "loss": 0.0211, "step": 224065 }, { "epoch": 0.9349417095743172, "grad_norm": 0.5733496898330775, "learning_rate": 2.068583826198047e-06, "loss": 0.015, "step": 224070 }, { "epoch": 0.9349625722893075, "grad_norm": 0.4997689159032135, "learning_rate": 2.0685607468206315e-06, "loss": 0.0135, "step": 224075 }, { "epoch": 0.9349834350042977, "grad_norm": 0.42840712529988556, "learning_rate": 2.0685376682156953e-06, "loss": 0.0252, "step": 224080 }, { "epoch": 0.935004297719288, "grad_norm": 0.5255757497044427, "learning_rate": 2.068514590383195e-06, "loss": 0.0173, "step": 224085 }, { "epoch": 0.9350251604342783, "grad_norm": 0.2884722942965003, "learning_rate": 2.068491513323088e-06, "loss": 0.0209, "step": 224090 }, { "epoch": 0.9350460231492685, "grad_norm": 0.566392701398205, "learning_rate": 2.06846843703533e-06, "loss": 0.0174, "step": 224095 }, { "epoch": 0.9350668858642588, "grad_norm": 0.5793201425392884, "learning_rate": 2.0684453615198784e-06, "loss": 0.0186, "step": 224100 }, { "epoch": 0.9350877485792491, "grad_norm": 0.8422432099640702, "learning_rate": 2.0684222867766904e-06, "loss": 0.0184, "step": 224105 }, { "epoch": 0.9351086112942394, "grad_norm": 0.7258079381961353, "learning_rate": 2.068399212805723e-06, "loss": 0.0237, "step": 224110 }, { "epoch": 0.9351294740092296, "grad_norm": 0.6096184822546472, "learning_rate": 2.0683761396069325e-06, "loss": 0.0164, "step": 224115 }, { "epoch": 0.93515033672422, "grad_norm": 0.5209064562348933, "learning_rate": 2.0683530671802766e-06, "loss": 0.0214, "step": 224120 }, { "epoch": 0.9351711994392102, "grad_norm": 0.5155840791085121, "learning_rate": 2.0683299955257117e-06, "loss": 0.0163, "step": 224125 }, { "epoch": 0.9351920621542005, "grad_norm": 0.7995385735756079, "learning_rate": 2.068306924643195e-06, "loss": 0.0193, "step": 224130 }, { "epoch": 0.9352129248691908, "grad_norm": 0.4428629336508579, "learning_rate": 2.0682838545326836e-06, "loss": 0.0196, "step": 224135 }, { "epoch": 0.9352337875841811, "grad_norm": 0.48626870872412636, "learning_rate": 2.0682607851941337e-06, "loss": 0.0096, "step": 224140 }, { "epoch": 0.9352546502991713, "grad_norm": 0.4962071847900721, "learning_rate": 2.068237716627503e-06, "loss": 0.0162, "step": 224145 }, { "epoch": 0.9352755130141616, "grad_norm": 0.43727029199907963, "learning_rate": 2.0682146488327487e-06, "loss": 0.0187, "step": 224150 }, { "epoch": 0.9352963757291519, "grad_norm": 0.6578973227764011, "learning_rate": 2.068191581809827e-06, "loss": 0.0183, "step": 224155 }, { "epoch": 0.9353172384441422, "grad_norm": 0.27948838957576183, "learning_rate": 2.0681685155586947e-06, "loss": 0.0186, "step": 224160 }, { "epoch": 0.9353381011591324, "grad_norm": 0.3247118147346405, "learning_rate": 2.06814545007931e-06, "loss": 0.0215, "step": 224165 }, { "epoch": 0.9353589638741228, "grad_norm": 0.6828421496578417, "learning_rate": 2.068122385371628e-06, "loss": 0.0179, "step": 224170 }, { "epoch": 0.935379826589113, "grad_norm": 1.2738313912861183, "learning_rate": 2.0680993214356076e-06, "loss": 0.0216, "step": 224175 }, { "epoch": 0.9354006893041032, "grad_norm": 0.8534912369857728, "learning_rate": 2.068076258271204e-06, "loss": 0.0228, "step": 224180 }, { "epoch": 0.9354215520190936, "grad_norm": 0.7849517803433314, "learning_rate": 2.068053195878376e-06, "loss": 0.0182, "step": 224185 }, { "epoch": 0.9354424147340838, "grad_norm": 0.7471534830712646, "learning_rate": 2.0680301342570793e-06, "loss": 0.018, "step": 224190 }, { "epoch": 0.9354632774490741, "grad_norm": 0.7847611324518058, "learning_rate": 2.0680070734072713e-06, "loss": 0.0224, "step": 224195 }, { "epoch": 0.9354841401640643, "grad_norm": 0.5765501855350785, "learning_rate": 2.067984013328909e-06, "loss": 0.019, "step": 224200 }, { "epoch": 0.9355050028790547, "grad_norm": 0.49466581488637035, "learning_rate": 2.0679609540219487e-06, "loss": 0.02, "step": 224205 }, { "epoch": 0.9355258655940449, "grad_norm": 0.5810713478517011, "learning_rate": 2.067937895486349e-06, "loss": 0.0212, "step": 224210 }, { "epoch": 0.9355467283090352, "grad_norm": 0.5477693390127708, "learning_rate": 2.067914837722065e-06, "loss": 0.0224, "step": 224215 }, { "epoch": 0.9355675910240255, "grad_norm": 0.30479230447100314, "learning_rate": 2.067891780729055e-06, "loss": 0.0152, "step": 224220 }, { "epoch": 0.9355884537390158, "grad_norm": 0.6385521721726085, "learning_rate": 2.0678687245072755e-06, "loss": 0.0154, "step": 224225 }, { "epoch": 0.935609316454006, "grad_norm": 1.2906669689881227, "learning_rate": 2.067845669056684e-06, "loss": 0.019, "step": 224230 }, { "epoch": 0.9356301791689964, "grad_norm": 0.320634287543651, "learning_rate": 2.067822614377237e-06, "loss": 0.0206, "step": 224235 }, { "epoch": 0.9356510418839866, "grad_norm": 0.33191363596848567, "learning_rate": 2.0677995604688916e-06, "loss": 0.0191, "step": 224240 }, { "epoch": 0.9356719045989769, "grad_norm": 0.7307161433355467, "learning_rate": 2.0677765073316044e-06, "loss": 0.026, "step": 224245 }, { "epoch": 0.9356927673139672, "grad_norm": 0.8046654748460808, "learning_rate": 2.0677534549653337e-06, "loss": 0.0156, "step": 224250 }, { "epoch": 0.9357136300289575, "grad_norm": 0.5951911235414172, "learning_rate": 2.067730403370035e-06, "loss": 0.0238, "step": 224255 }, { "epoch": 0.9357344927439477, "grad_norm": 0.5579767730738836, "learning_rate": 2.067707352545666e-06, "loss": 0.0178, "step": 224260 }, { "epoch": 0.935755355458938, "grad_norm": 1.7711147250185852, "learning_rate": 2.0676843024921843e-06, "loss": 0.0201, "step": 224265 }, { "epoch": 0.9357762181739283, "grad_norm": 0.4712988798377238, "learning_rate": 2.0676612532095463e-06, "loss": 0.0152, "step": 224270 }, { "epoch": 0.9357970808889186, "grad_norm": 0.4835128604710876, "learning_rate": 2.0676382046977082e-06, "loss": 0.0236, "step": 224275 }, { "epoch": 0.9358179436039088, "grad_norm": 0.5124929554834747, "learning_rate": 2.067615156956629e-06, "loss": 0.0184, "step": 224280 }, { "epoch": 0.9358388063188992, "grad_norm": 0.4062268949971124, "learning_rate": 2.0675921099862644e-06, "loss": 0.0199, "step": 224285 }, { "epoch": 0.9358596690338894, "grad_norm": 0.8046346854464943, "learning_rate": 2.0675690637865718e-06, "loss": 0.0211, "step": 224290 }, { "epoch": 0.9358805317488796, "grad_norm": 0.6941519636521786, "learning_rate": 2.067546018357508e-06, "loss": 0.0181, "step": 224295 }, { "epoch": 0.93590139446387, "grad_norm": 0.6990429735829254, "learning_rate": 2.0675229736990304e-06, "loss": 0.0189, "step": 224300 }, { "epoch": 0.9359222571788602, "grad_norm": 0.3988420977534347, "learning_rate": 2.067499929811096e-06, "loss": 0.0195, "step": 224305 }, { "epoch": 0.9359431198938505, "grad_norm": 1.0944481674436222, "learning_rate": 2.067476886693661e-06, "loss": 0.0216, "step": 224310 }, { "epoch": 0.9359639826088408, "grad_norm": 0.5265935483587233, "learning_rate": 2.067453844346684e-06, "loss": 0.0184, "step": 224315 }, { "epoch": 0.9359848453238311, "grad_norm": 0.6248749105208197, "learning_rate": 2.0674308027701215e-06, "loss": 0.0196, "step": 224320 }, { "epoch": 0.9360057080388213, "grad_norm": 0.5096872577725161, "learning_rate": 2.06740776196393e-06, "loss": 0.0158, "step": 224325 }, { "epoch": 0.9360265707538116, "grad_norm": 0.6341596328977895, "learning_rate": 2.0673847219280667e-06, "loss": 0.0158, "step": 224330 }, { "epoch": 0.9360474334688019, "grad_norm": 0.5798086614951551, "learning_rate": 2.0673616826624895e-06, "loss": 0.0146, "step": 224335 }, { "epoch": 0.9360682961837922, "grad_norm": 0.7050944472123571, "learning_rate": 2.067338644167155e-06, "loss": 0.024, "step": 224340 }, { "epoch": 0.9360891588987824, "grad_norm": 0.4807303185194491, "learning_rate": 2.0673156064420192e-06, "loss": 0.0134, "step": 224345 }, { "epoch": 0.9361100216137728, "grad_norm": 0.7853672078826457, "learning_rate": 2.0672925694870407e-06, "loss": 0.0311, "step": 224350 }, { "epoch": 0.936130884328763, "grad_norm": 0.41758313974482036, "learning_rate": 2.0672695333021765e-06, "loss": 0.0177, "step": 224355 }, { "epoch": 0.9361517470437533, "grad_norm": 0.9096907955599847, "learning_rate": 2.0672464978873825e-06, "loss": 0.0264, "step": 224360 }, { "epoch": 0.9361726097587436, "grad_norm": 0.3081639763695653, "learning_rate": 2.067223463242617e-06, "loss": 0.0221, "step": 224365 }, { "epoch": 0.9361934724737339, "grad_norm": 0.5277633372008481, "learning_rate": 2.067200429367837e-06, "loss": 0.0157, "step": 224370 }, { "epoch": 0.9362143351887241, "grad_norm": 0.593194377917408, "learning_rate": 2.0671773962629983e-06, "loss": 0.021, "step": 224375 }, { "epoch": 0.9362351979037143, "grad_norm": 0.7982437241719793, "learning_rate": 2.0671543639280593e-06, "loss": 0.0245, "step": 224380 }, { "epoch": 0.9362560606187047, "grad_norm": 0.5490646516312987, "learning_rate": 2.0671313323629776e-06, "loss": 0.0282, "step": 224385 }, { "epoch": 0.9362769233336949, "grad_norm": 0.5009671460199607, "learning_rate": 2.0671083015677086e-06, "loss": 0.0164, "step": 224390 }, { "epoch": 0.9362977860486852, "grad_norm": 0.5396201594847788, "learning_rate": 2.067085271542211e-06, "loss": 0.0229, "step": 224395 }, { "epoch": 0.9363186487636755, "grad_norm": 0.6464093318035893, "learning_rate": 2.0670622422864404e-06, "loss": 0.0218, "step": 224400 }, { "epoch": 0.9363395114786658, "grad_norm": 0.7501918760311497, "learning_rate": 2.067039213800355e-06, "loss": 0.0196, "step": 224405 }, { "epoch": 0.936360374193656, "grad_norm": 1.4959408343356455, "learning_rate": 2.067016186083912e-06, "loss": 0.017, "step": 224410 }, { "epoch": 0.9363812369086464, "grad_norm": 0.9485282784444959, "learning_rate": 2.066993159137068e-06, "loss": 0.0152, "step": 224415 }, { "epoch": 0.9364020996236366, "grad_norm": 0.7926713303490306, "learning_rate": 2.0669701329597797e-06, "loss": 0.0209, "step": 224420 }, { "epoch": 0.9364229623386269, "grad_norm": 0.6632998058478864, "learning_rate": 2.066947107552006e-06, "loss": 0.0206, "step": 224425 }, { "epoch": 0.9364438250536172, "grad_norm": 0.3419063053396354, "learning_rate": 2.0669240829137025e-06, "loss": 0.0235, "step": 224430 }, { "epoch": 0.9364646877686075, "grad_norm": 0.4700365332520562, "learning_rate": 2.0669010590448265e-06, "loss": 0.0159, "step": 224435 }, { "epoch": 0.9364855504835977, "grad_norm": 0.5946597781653518, "learning_rate": 2.0668780359453355e-06, "loss": 0.0139, "step": 224440 }, { "epoch": 0.936506413198588, "grad_norm": 0.7733184483216885, "learning_rate": 2.0668550136151867e-06, "loss": 0.0242, "step": 224445 }, { "epoch": 0.9365272759135783, "grad_norm": 0.8610768051321533, "learning_rate": 2.066831992054337e-06, "loss": 0.0128, "step": 224450 }, { "epoch": 0.9365481386285686, "grad_norm": 0.9723025510506919, "learning_rate": 2.0668089712627436e-06, "loss": 0.0299, "step": 224455 }, { "epoch": 0.9365690013435588, "grad_norm": 0.45944580187594825, "learning_rate": 2.0667859512403636e-06, "loss": 0.0177, "step": 224460 }, { "epoch": 0.9365898640585492, "grad_norm": 1.2752488264501038, "learning_rate": 2.066762931987155e-06, "loss": 0.0164, "step": 224465 }, { "epoch": 0.9366107267735394, "grad_norm": 0.35517337737630633, "learning_rate": 2.066739913503073e-06, "loss": 0.0136, "step": 224470 }, { "epoch": 0.9366315894885296, "grad_norm": 0.6367045312244923, "learning_rate": 2.0667168957880774e-06, "loss": 0.0168, "step": 224475 }, { "epoch": 0.93665245220352, "grad_norm": 0.46807217481867863, "learning_rate": 2.066693878842123e-06, "loss": 0.0159, "step": 224480 }, { "epoch": 0.9366733149185102, "grad_norm": 0.5457540367720567, "learning_rate": 2.0666708626651682e-06, "loss": 0.018, "step": 224485 }, { "epoch": 0.9366941776335005, "grad_norm": 0.8628792255935303, "learning_rate": 2.0666478472571705e-06, "loss": 0.0246, "step": 224490 }, { "epoch": 0.9367150403484908, "grad_norm": 0.558520779999279, "learning_rate": 2.066624832618086e-06, "loss": 0.0159, "step": 224495 }, { "epoch": 0.9367359030634811, "grad_norm": 0.2851056236845281, "learning_rate": 2.0666018187478728e-06, "loss": 0.017, "step": 224500 }, { "epoch": 0.9367567657784713, "grad_norm": 0.6056414901726933, "learning_rate": 2.0665788056464872e-06, "loss": 0.0161, "step": 224505 }, { "epoch": 0.9367776284934616, "grad_norm": 0.64071336815589, "learning_rate": 2.066555793313887e-06, "loss": 0.0235, "step": 224510 }, { "epoch": 0.9367984912084519, "grad_norm": 1.4880151049287007, "learning_rate": 2.06653278175003e-06, "loss": 0.0246, "step": 224515 }, { "epoch": 0.9368193539234422, "grad_norm": 0.6694090739071427, "learning_rate": 2.0665097709548722e-06, "loss": 0.0255, "step": 224520 }, { "epoch": 0.9368402166384324, "grad_norm": 1.5745356577767613, "learning_rate": 2.0664867609283713e-06, "loss": 0.0228, "step": 224525 }, { "epoch": 0.9368610793534228, "grad_norm": 0.5448372035211492, "learning_rate": 2.0664637516704845e-06, "loss": 0.0161, "step": 224530 }, { "epoch": 0.936881942068413, "grad_norm": 0.503386221734219, "learning_rate": 2.0664407431811692e-06, "loss": 0.0189, "step": 224535 }, { "epoch": 0.9369028047834033, "grad_norm": 2.1430167357906242, "learning_rate": 2.066417735460382e-06, "loss": 0.0235, "step": 224540 }, { "epoch": 0.9369236674983936, "grad_norm": 0.7105324304445914, "learning_rate": 2.0663947285080815e-06, "loss": 0.0222, "step": 224545 }, { "epoch": 0.9369445302133839, "grad_norm": 0.5186199216672684, "learning_rate": 2.066371722324223e-06, "loss": 0.0189, "step": 224550 }, { "epoch": 0.9369653929283741, "grad_norm": 0.5275672255667876, "learning_rate": 2.066348716908765e-06, "loss": 0.023, "step": 224555 }, { "epoch": 0.9369862556433644, "grad_norm": 0.8512888065083102, "learning_rate": 2.0663257122616647e-06, "loss": 0.0196, "step": 224560 }, { "epoch": 0.9370071183583547, "grad_norm": 1.1918078078914789, "learning_rate": 2.0663027083828785e-06, "loss": 0.0202, "step": 224565 }, { "epoch": 0.937027981073345, "grad_norm": 0.7971888357326197, "learning_rate": 2.0662797052723647e-06, "loss": 0.0193, "step": 224570 }, { "epoch": 0.9370488437883352, "grad_norm": 0.30676542227750225, "learning_rate": 2.06625670293008e-06, "loss": 0.0175, "step": 224575 }, { "epoch": 0.9370697065033256, "grad_norm": 2.8784064109577185, "learning_rate": 2.0662337013559817e-06, "loss": 0.027, "step": 224580 }, { "epoch": 0.9370905692183158, "grad_norm": 0.8059618674816827, "learning_rate": 2.066210700550027e-06, "loss": 0.0234, "step": 224585 }, { "epoch": 0.937111431933306, "grad_norm": 9.133958407687635, "learning_rate": 2.066187700512173e-06, "loss": 0.0233, "step": 224590 }, { "epoch": 0.9371322946482964, "grad_norm": 0.6911087664076266, "learning_rate": 2.066164701242377e-06, "loss": 0.0223, "step": 224595 }, { "epoch": 0.9371531573632866, "grad_norm": 0.42455441504485897, "learning_rate": 2.0661417027405964e-06, "loss": 0.0253, "step": 224600 }, { "epoch": 0.9371740200782769, "grad_norm": 0.5021826301151167, "learning_rate": 2.0661187050067885e-06, "loss": 0.0221, "step": 224605 }, { "epoch": 0.9371948827932672, "grad_norm": 0.6583556967927278, "learning_rate": 2.066095708040911e-06, "loss": 0.0225, "step": 224610 }, { "epoch": 0.9372157455082575, "grad_norm": 0.4600204241681987, "learning_rate": 2.06607271184292e-06, "loss": 0.0176, "step": 224615 }, { "epoch": 0.9372366082232477, "grad_norm": 0.7660004177253462, "learning_rate": 2.0660497164127733e-06, "loss": 0.0203, "step": 224620 }, { "epoch": 0.937257470938238, "grad_norm": 0.8473998854891472, "learning_rate": 2.0660267217504285e-06, "loss": 0.0171, "step": 224625 }, { "epoch": 0.9372783336532283, "grad_norm": 0.7308238609185095, "learning_rate": 2.066003727855843e-06, "loss": 0.0163, "step": 224630 }, { "epoch": 0.9372991963682186, "grad_norm": 0.4629168952286052, "learning_rate": 2.0659807347289735e-06, "loss": 0.0223, "step": 224635 }, { "epoch": 0.9373200590832088, "grad_norm": 1.2503203445328162, "learning_rate": 2.0659577423697773e-06, "loss": 0.0228, "step": 224640 }, { "epoch": 0.9373409217981992, "grad_norm": 0.4851710441901744, "learning_rate": 2.065934750778212e-06, "loss": 0.0163, "step": 224645 }, { "epoch": 0.9373617845131894, "grad_norm": 1.1496648329959565, "learning_rate": 2.065911759954235e-06, "loss": 0.0347, "step": 224650 }, { "epoch": 0.9373826472281797, "grad_norm": 0.40249370315903354, "learning_rate": 2.0658887698978035e-06, "loss": 0.0111, "step": 224655 }, { "epoch": 0.93740350994317, "grad_norm": 0.6006665214535527, "learning_rate": 2.0658657806088743e-06, "loss": 0.0305, "step": 224660 }, { "epoch": 0.9374243726581603, "grad_norm": 1.2097168004332757, "learning_rate": 2.0658427920874053e-06, "loss": 0.0139, "step": 224665 }, { "epoch": 0.9374452353731505, "grad_norm": 0.4648648725197201, "learning_rate": 2.0658198043333537e-06, "loss": 0.0156, "step": 224670 }, { "epoch": 0.9374660980881409, "grad_norm": 0.6192394631874942, "learning_rate": 2.0657968173466757e-06, "loss": 0.0213, "step": 224675 }, { "epoch": 0.9374869608031311, "grad_norm": 0.42682903895058294, "learning_rate": 2.065773831127331e-06, "loss": 0.0158, "step": 224680 }, { "epoch": 0.9375078235181213, "grad_norm": 0.6545727546090458, "learning_rate": 2.0657508456752747e-06, "loss": 0.0179, "step": 224685 }, { "epoch": 0.9375286862331116, "grad_norm": 0.8300406887188903, "learning_rate": 2.0657278609904647e-06, "loss": 0.0212, "step": 224690 }, { "epoch": 0.9375495489481019, "grad_norm": 0.841922128379185, "learning_rate": 2.065704877072859e-06, "loss": 0.0214, "step": 224695 }, { "epoch": 0.9375704116630922, "grad_norm": 0.39974982128352615, "learning_rate": 2.0656818939224145e-06, "loss": 0.0204, "step": 224700 }, { "epoch": 0.9375912743780824, "grad_norm": 0.27340239300580094, "learning_rate": 2.065658911539088e-06, "loss": 0.0168, "step": 224705 }, { "epoch": 0.9376121370930728, "grad_norm": 0.788481916350877, "learning_rate": 2.0656359299228377e-06, "loss": 0.0288, "step": 224710 }, { "epoch": 0.937632999808063, "grad_norm": 0.49546946249999513, "learning_rate": 2.0656129490736203e-06, "loss": 0.0239, "step": 224715 }, { "epoch": 0.9376538625230533, "grad_norm": 0.8868364449728137, "learning_rate": 2.0655899689913934e-06, "loss": 0.0261, "step": 224720 }, { "epoch": 0.9376747252380436, "grad_norm": 0.7872646578589658, "learning_rate": 2.065566989676114e-06, "loss": 0.0213, "step": 224725 }, { "epoch": 0.9376955879530339, "grad_norm": 0.4896944783444613, "learning_rate": 2.06554401112774e-06, "loss": 0.0178, "step": 224730 }, { "epoch": 0.9377164506680241, "grad_norm": 0.6728490685047039, "learning_rate": 2.0655210333462284e-06, "loss": 0.0197, "step": 224735 }, { "epoch": 0.9377373133830144, "grad_norm": 0.8667992121842132, "learning_rate": 2.065498056331537e-06, "loss": 0.0175, "step": 224740 }, { "epoch": 0.9377581760980047, "grad_norm": 0.8945364583662923, "learning_rate": 2.0654750800836223e-06, "loss": 0.0216, "step": 224745 }, { "epoch": 0.937779038812995, "grad_norm": 0.8448824199247325, "learning_rate": 2.0654521046024427e-06, "loss": 0.0236, "step": 224750 }, { "epoch": 0.9377999015279852, "grad_norm": 0.6677272558750305, "learning_rate": 2.0654291298879542e-06, "loss": 0.0238, "step": 224755 }, { "epoch": 0.9378207642429756, "grad_norm": 0.47214499949690003, "learning_rate": 2.0654061559401157e-06, "loss": 0.0151, "step": 224760 }, { "epoch": 0.9378416269579658, "grad_norm": 1.563143554152959, "learning_rate": 2.0653831827588834e-06, "loss": 0.0282, "step": 224765 }, { "epoch": 0.937862489672956, "grad_norm": 0.409099786721654, "learning_rate": 2.065360210344215e-06, "loss": 0.0179, "step": 224770 }, { "epoch": 0.9378833523879464, "grad_norm": 0.6573283328945632, "learning_rate": 2.0653372386960677e-06, "loss": 0.022, "step": 224775 }, { "epoch": 0.9379042151029366, "grad_norm": 0.9700942748593557, "learning_rate": 2.0653142678144e-06, "loss": 0.0203, "step": 224780 }, { "epoch": 0.9379250778179269, "grad_norm": 1.061028335930004, "learning_rate": 2.0652912976991677e-06, "loss": 0.0174, "step": 224785 }, { "epoch": 0.9379459405329172, "grad_norm": 0.413506345716177, "learning_rate": 2.065268328350329e-06, "loss": 0.0134, "step": 224790 }, { "epoch": 0.9379668032479075, "grad_norm": 0.6553946969915183, "learning_rate": 2.0652453597678413e-06, "loss": 0.0188, "step": 224795 }, { "epoch": 0.9379876659628977, "grad_norm": 0.6422314262525421, "learning_rate": 2.0652223919516616e-06, "loss": 0.0248, "step": 224800 }, { "epoch": 0.938008528677888, "grad_norm": 0.7320633095186888, "learning_rate": 2.0651994249017476e-06, "loss": 0.0211, "step": 224805 }, { "epoch": 0.9380293913928783, "grad_norm": 0.9889099963222061, "learning_rate": 2.065176458618057e-06, "loss": 0.0244, "step": 224810 }, { "epoch": 0.9380502541078686, "grad_norm": 0.5479070548103584, "learning_rate": 2.065153493100546e-06, "loss": 0.0189, "step": 224815 }, { "epoch": 0.9380711168228588, "grad_norm": 0.8264561920141889, "learning_rate": 2.065130528349174e-06, "loss": 0.0175, "step": 224820 }, { "epoch": 0.9380919795378492, "grad_norm": 0.7119367252773281, "learning_rate": 2.0651075643638958e-06, "loss": 0.0177, "step": 224825 }, { "epoch": 0.9381128422528394, "grad_norm": 0.8152554476785965, "learning_rate": 2.0650846011446714e-06, "loss": 0.0195, "step": 224830 }, { "epoch": 0.9381337049678297, "grad_norm": 0.5445876981669521, "learning_rate": 2.0650616386914566e-06, "loss": 0.0244, "step": 224835 }, { "epoch": 0.93815456768282, "grad_norm": 0.40718848030026683, "learning_rate": 2.065038677004209e-06, "loss": 0.016, "step": 224840 }, { "epoch": 0.9381754303978103, "grad_norm": 0.3297143745309659, "learning_rate": 2.0650157160828872e-06, "loss": 0.0189, "step": 224845 }, { "epoch": 0.9381962931128005, "grad_norm": 1.2055882490444747, "learning_rate": 2.0649927559274467e-06, "loss": 0.0208, "step": 224850 }, { "epoch": 0.9382171558277909, "grad_norm": 0.6197416494864989, "learning_rate": 2.0649697965378467e-06, "loss": 0.0287, "step": 224855 }, { "epoch": 0.9382380185427811, "grad_norm": 0.6124045313186569, "learning_rate": 2.064946837914043e-06, "loss": 0.022, "step": 224860 }, { "epoch": 0.9382588812577713, "grad_norm": 0.3575593147834719, "learning_rate": 2.064923880055994e-06, "loss": 0.0175, "step": 224865 }, { "epoch": 0.9382797439727616, "grad_norm": 0.25296453487245907, "learning_rate": 2.0649009229636576e-06, "loss": 0.0213, "step": 224870 }, { "epoch": 0.938300606687752, "grad_norm": 0.3802960841556252, "learning_rate": 2.06487796663699e-06, "loss": 0.0251, "step": 224875 }, { "epoch": 0.9383214694027422, "grad_norm": 0.3609901086565421, "learning_rate": 2.0648550110759495e-06, "loss": 0.0179, "step": 224880 }, { "epoch": 0.9383423321177324, "grad_norm": 0.4725948626311745, "learning_rate": 2.0648320562804936e-06, "loss": 0.0194, "step": 224885 }, { "epoch": 0.9383631948327228, "grad_norm": 0.4760429038913713, "learning_rate": 2.0648091022505793e-06, "loss": 0.0194, "step": 224890 }, { "epoch": 0.938384057547713, "grad_norm": 0.40296154617432667, "learning_rate": 2.0647861489861643e-06, "loss": 0.0179, "step": 224895 }, { "epoch": 0.9384049202627033, "grad_norm": 0.7502147797607338, "learning_rate": 2.064763196487206e-06, "loss": 0.0178, "step": 224900 }, { "epoch": 0.9384257829776936, "grad_norm": 1.1379746074223505, "learning_rate": 2.0647402447536617e-06, "loss": 0.0239, "step": 224905 }, { "epoch": 0.9384466456926839, "grad_norm": 0.8690368356023294, "learning_rate": 2.064717293785489e-06, "loss": 0.0159, "step": 224910 }, { "epoch": 0.9384675084076741, "grad_norm": 0.2536302586381275, "learning_rate": 2.0646943435826454e-06, "loss": 0.0166, "step": 224915 }, { "epoch": 0.9384883711226644, "grad_norm": 0.6615189614786466, "learning_rate": 2.0646713941450884e-06, "loss": 0.0208, "step": 224920 }, { "epoch": 0.9385092338376547, "grad_norm": 0.7314582648889288, "learning_rate": 2.064648445472775e-06, "loss": 0.0185, "step": 224925 }, { "epoch": 0.938530096552645, "grad_norm": 0.7846038017919168, "learning_rate": 2.0646254975656636e-06, "loss": 0.0235, "step": 224930 }, { "epoch": 0.9385509592676352, "grad_norm": 1.011894380472896, "learning_rate": 2.0646025504237107e-06, "loss": 0.0191, "step": 224935 }, { "epoch": 0.9385718219826256, "grad_norm": 3.6387943367266478, "learning_rate": 2.064579604046874e-06, "loss": 0.0304, "step": 224940 }, { "epoch": 0.9385926846976158, "grad_norm": 0.7175749784042, "learning_rate": 2.0645566584351117e-06, "loss": 0.0239, "step": 224945 }, { "epoch": 0.938613547412606, "grad_norm": 0.80517164685101, "learning_rate": 2.064533713588381e-06, "loss": 0.0278, "step": 224950 }, { "epoch": 0.9386344101275964, "grad_norm": 0.625887972299942, "learning_rate": 2.0645107695066387e-06, "loss": 0.0232, "step": 224955 }, { "epoch": 0.9386552728425867, "grad_norm": 0.6541074837750157, "learning_rate": 2.064487826189843e-06, "loss": 0.0189, "step": 224960 }, { "epoch": 0.9386761355575769, "grad_norm": 0.6991410681524235, "learning_rate": 2.0644648836379506e-06, "loss": 0.0284, "step": 224965 }, { "epoch": 0.9386969982725673, "grad_norm": 0.7289051708802068, "learning_rate": 2.06444194185092e-06, "loss": 0.0192, "step": 224970 }, { "epoch": 0.9387178609875575, "grad_norm": 0.8431322263640476, "learning_rate": 2.0644190008287086e-06, "loss": 0.0234, "step": 224975 }, { "epoch": 0.9387387237025477, "grad_norm": 0.3326869611215928, "learning_rate": 2.064396060571273e-06, "loss": 0.0188, "step": 224980 }, { "epoch": 0.938759586417538, "grad_norm": 0.5889199556150815, "learning_rate": 2.0643731210785715e-06, "loss": 0.0202, "step": 224985 }, { "epoch": 0.9387804491325283, "grad_norm": 0.7486221027991588, "learning_rate": 2.0643501823505615e-06, "loss": 0.0153, "step": 224990 }, { "epoch": 0.9388013118475186, "grad_norm": 0.37933353052254853, "learning_rate": 2.0643272443872e-06, "loss": 0.02, "step": 224995 }, { "epoch": 0.9388221745625088, "grad_norm": 0.6048447693245788, "learning_rate": 2.0643043071884457e-06, "loss": 0.0192, "step": 225000 }, { "epoch": 0.9388430372774992, "grad_norm": 0.48141279182065555, "learning_rate": 2.064281370754255e-06, "loss": 0.0186, "step": 225005 }, { "epoch": 0.9388638999924894, "grad_norm": 0.519843695688307, "learning_rate": 2.0642584350845855e-06, "loss": 0.0126, "step": 225010 }, { "epoch": 0.9388847627074797, "grad_norm": 1.0655980563094747, "learning_rate": 2.064235500179395e-06, "loss": 0.0262, "step": 225015 }, { "epoch": 0.93890562542247, "grad_norm": 0.4529553773268311, "learning_rate": 2.064212566038641e-06, "loss": 0.0147, "step": 225020 }, { "epoch": 0.9389264881374603, "grad_norm": 0.5002214907681939, "learning_rate": 2.0641896326622817e-06, "loss": 0.0213, "step": 225025 }, { "epoch": 0.9389473508524505, "grad_norm": 1.0927404961729756, "learning_rate": 2.064166700050273e-06, "loss": 0.0243, "step": 225030 }, { "epoch": 0.9389682135674408, "grad_norm": 0.6068675773997894, "learning_rate": 2.064143768202574e-06, "loss": 0.0193, "step": 225035 }, { "epoch": 0.9389890762824311, "grad_norm": 0.6501632931091942, "learning_rate": 2.064120837119142e-06, "loss": 0.016, "step": 225040 }, { "epoch": 0.9390099389974214, "grad_norm": 0.42247699188666493, "learning_rate": 2.064097906799934e-06, "loss": 0.0175, "step": 225045 }, { "epoch": 0.9390308017124116, "grad_norm": 0.5682515289424364, "learning_rate": 2.0640749772449078e-06, "loss": 0.0184, "step": 225050 }, { "epoch": 0.939051664427402, "grad_norm": 1.0048320109778168, "learning_rate": 2.0640520484540206e-06, "loss": 0.0243, "step": 225055 }, { "epoch": 0.9390725271423922, "grad_norm": 0.4132978838066618, "learning_rate": 2.064029120427231e-06, "loss": 0.0319, "step": 225060 }, { "epoch": 0.9390933898573824, "grad_norm": 0.7322043918912644, "learning_rate": 2.0640061931644955e-06, "loss": 0.0183, "step": 225065 }, { "epoch": 0.9391142525723728, "grad_norm": 0.629973685442137, "learning_rate": 2.063983266665772e-06, "loss": 0.0194, "step": 225070 }, { "epoch": 0.939135115287363, "grad_norm": 0.28180411856922033, "learning_rate": 2.063960340931018e-06, "loss": 0.0219, "step": 225075 }, { "epoch": 0.9391559780023533, "grad_norm": 0.5357542523254429, "learning_rate": 2.063937415960191e-06, "loss": 0.0224, "step": 225080 }, { "epoch": 0.9391768407173436, "grad_norm": 0.7856064365947523, "learning_rate": 2.0639144917532495e-06, "loss": 0.0158, "step": 225085 }, { "epoch": 0.9391977034323339, "grad_norm": 0.6229117102334789, "learning_rate": 2.0638915683101498e-06, "loss": 0.0226, "step": 225090 }, { "epoch": 0.9392185661473241, "grad_norm": 0.7172176823099454, "learning_rate": 2.06386864563085e-06, "loss": 0.0194, "step": 225095 }, { "epoch": 0.9392394288623144, "grad_norm": 0.6012372664564196, "learning_rate": 2.063845723715308e-06, "loss": 0.0253, "step": 225100 }, { "epoch": 0.9392602915773047, "grad_norm": 0.6989251608354148, "learning_rate": 2.0638228025634806e-06, "loss": 0.0186, "step": 225105 }, { "epoch": 0.939281154292295, "grad_norm": 0.6208289842124318, "learning_rate": 2.0637998821753263e-06, "loss": 0.0176, "step": 225110 }, { "epoch": 0.9393020170072852, "grad_norm": 0.29325163547077765, "learning_rate": 2.0637769625508024e-06, "loss": 0.0195, "step": 225115 }, { "epoch": 0.9393228797222756, "grad_norm": 0.6264326471583174, "learning_rate": 2.063754043689866e-06, "loss": 0.0206, "step": 225120 }, { "epoch": 0.9393437424372658, "grad_norm": 0.6432149961206092, "learning_rate": 2.0637311255924754e-06, "loss": 0.0184, "step": 225125 }, { "epoch": 0.9393646051522561, "grad_norm": 0.5539869532394956, "learning_rate": 2.0637082082585873e-06, "loss": 0.0143, "step": 225130 }, { "epoch": 0.9393854678672464, "grad_norm": 0.6285549997227755, "learning_rate": 2.0636852916881604e-06, "loss": 0.0212, "step": 225135 }, { "epoch": 0.9394063305822367, "grad_norm": 0.3329680801643607, "learning_rate": 2.063662375881152e-06, "loss": 0.014, "step": 225140 }, { "epoch": 0.9394271932972269, "grad_norm": 1.023363825547276, "learning_rate": 2.0636394608375187e-06, "loss": 0.0253, "step": 225145 }, { "epoch": 0.9394480560122173, "grad_norm": 1.240376652601735, "learning_rate": 2.06361654655722e-06, "loss": 0.0252, "step": 225150 }, { "epoch": 0.9394689187272075, "grad_norm": 0.7664189201091418, "learning_rate": 2.0635936330402113e-06, "loss": 0.0178, "step": 225155 }, { "epoch": 0.9394897814421977, "grad_norm": 0.541879178157789, "learning_rate": 2.0635707202864523e-06, "loss": 0.0136, "step": 225160 }, { "epoch": 0.939510644157188, "grad_norm": 0.6239103903428119, "learning_rate": 2.063547808295899e-06, "loss": 0.0146, "step": 225165 }, { "epoch": 0.9395315068721783, "grad_norm": 0.4767242957966601, "learning_rate": 2.06352489706851e-06, "loss": 0.0216, "step": 225170 }, { "epoch": 0.9395523695871686, "grad_norm": 1.282956490462912, "learning_rate": 2.0635019866042433e-06, "loss": 0.0244, "step": 225175 }, { "epoch": 0.9395732323021588, "grad_norm": 0.4310477104059214, "learning_rate": 2.063479076903055e-06, "loss": 0.0185, "step": 225180 }, { "epoch": 0.9395940950171492, "grad_norm": 2.6448756382271124, "learning_rate": 2.063456167964904e-06, "loss": 0.0244, "step": 225185 }, { "epoch": 0.9396149577321394, "grad_norm": 0.5523263450032253, "learning_rate": 2.0634332597897478e-06, "loss": 0.0273, "step": 225190 }, { "epoch": 0.9396358204471297, "grad_norm": 0.7082122935025871, "learning_rate": 2.0634103523775436e-06, "loss": 0.0131, "step": 225195 }, { "epoch": 0.93965668316212, "grad_norm": 0.5363635862684552, "learning_rate": 2.0633874457282494e-06, "loss": 0.0181, "step": 225200 }, { "epoch": 0.9396775458771103, "grad_norm": 0.8232657566408444, "learning_rate": 2.0633645398418222e-06, "loss": 0.0234, "step": 225205 }, { "epoch": 0.9396984085921005, "grad_norm": 0.9474713939921563, "learning_rate": 2.063341634718221e-06, "loss": 0.0166, "step": 225210 }, { "epoch": 0.9397192713070908, "grad_norm": 0.6339335020177035, "learning_rate": 2.0633187303574024e-06, "loss": 0.0212, "step": 225215 }, { "epoch": 0.9397401340220811, "grad_norm": 1.5169352202891107, "learning_rate": 2.0632958267593245e-06, "loss": 0.0235, "step": 225220 }, { "epoch": 0.9397609967370714, "grad_norm": 1.0995169595215848, "learning_rate": 2.0632729239239443e-06, "loss": 0.0245, "step": 225225 }, { "epoch": 0.9397818594520616, "grad_norm": 0.7430473465019266, "learning_rate": 2.0632500218512203e-06, "loss": 0.022, "step": 225230 }, { "epoch": 0.939802722167052, "grad_norm": 0.6207052704175011, "learning_rate": 2.06322712054111e-06, "loss": 0.0197, "step": 225235 }, { "epoch": 0.9398235848820422, "grad_norm": 0.8010173252263203, "learning_rate": 2.063204219993571e-06, "loss": 0.0177, "step": 225240 }, { "epoch": 0.9398444475970325, "grad_norm": 0.4302933223469592, "learning_rate": 2.0631813202085603e-06, "loss": 0.019, "step": 225245 }, { "epoch": 0.9398653103120228, "grad_norm": 0.6466033001959544, "learning_rate": 2.0631584211860366e-06, "loss": 0.0142, "step": 225250 }, { "epoch": 0.939886173027013, "grad_norm": 1.1791376538847833, "learning_rate": 2.063135522925957e-06, "loss": 0.0245, "step": 225255 }, { "epoch": 0.9399070357420033, "grad_norm": 1.1606469358782805, "learning_rate": 2.0631126254282795e-06, "loss": 0.024, "step": 225260 }, { "epoch": 0.9399278984569936, "grad_norm": 0.4559382804624321, "learning_rate": 2.0630897286929617e-06, "loss": 0.0163, "step": 225265 }, { "epoch": 0.9399487611719839, "grad_norm": 0.4452537260483033, "learning_rate": 2.063066832719961e-06, "loss": 0.0204, "step": 225270 }, { "epoch": 0.9399696238869741, "grad_norm": 0.4722204331970899, "learning_rate": 2.063043937509236e-06, "loss": 0.0235, "step": 225275 }, { "epoch": 0.9399904866019644, "grad_norm": 0.6306078590349513, "learning_rate": 2.063021043060743e-06, "loss": 0.0219, "step": 225280 }, { "epoch": 0.9400113493169547, "grad_norm": 0.6269048226579899, "learning_rate": 2.0629981493744406e-06, "loss": 0.0257, "step": 225285 }, { "epoch": 0.940032212031945, "grad_norm": 0.6224522037489812, "learning_rate": 2.0629752564502865e-06, "loss": 0.0228, "step": 225290 }, { "epoch": 0.9400530747469352, "grad_norm": 0.5876065924679494, "learning_rate": 2.0629523642882385e-06, "loss": 0.0229, "step": 225295 }, { "epoch": 0.9400739374619256, "grad_norm": 0.80112444961992, "learning_rate": 2.0629294728882538e-06, "loss": 0.0204, "step": 225300 }, { "epoch": 0.9400948001769158, "grad_norm": 0.4522061221878322, "learning_rate": 2.0629065822502907e-06, "loss": 0.0185, "step": 225305 }, { "epoch": 0.9401156628919061, "grad_norm": 0.760217744907215, "learning_rate": 2.0628836923743063e-06, "loss": 0.0191, "step": 225310 }, { "epoch": 0.9401365256068964, "grad_norm": 0.29658825820744383, "learning_rate": 2.062860803260259e-06, "loss": 0.023, "step": 225315 }, { "epoch": 0.9401573883218867, "grad_norm": 0.6850066251853683, "learning_rate": 2.0628379149081058e-06, "loss": 0.0229, "step": 225320 }, { "epoch": 0.9401782510368769, "grad_norm": 0.5675313152638007, "learning_rate": 2.0628150273178054e-06, "loss": 0.0166, "step": 225325 }, { "epoch": 0.9401991137518673, "grad_norm": 2.7581465235698905, "learning_rate": 2.0627921404893147e-06, "loss": 0.0179, "step": 225330 }, { "epoch": 0.9402199764668575, "grad_norm": 0.570128627395337, "learning_rate": 2.0627692544225915e-06, "loss": 0.0207, "step": 225335 }, { "epoch": 0.9402408391818478, "grad_norm": 0.48449626956019953, "learning_rate": 2.0627463691175937e-06, "loss": 0.0217, "step": 225340 }, { "epoch": 0.940261701896838, "grad_norm": 0.6288297367072416, "learning_rate": 2.0627234845742797e-06, "loss": 0.0161, "step": 225345 }, { "epoch": 0.9402825646118284, "grad_norm": 0.605416708509064, "learning_rate": 2.062700600792606e-06, "loss": 0.0171, "step": 225350 }, { "epoch": 0.9403034273268186, "grad_norm": 0.4675460510531434, "learning_rate": 2.0626777177725314e-06, "loss": 0.016, "step": 225355 }, { "epoch": 0.9403242900418088, "grad_norm": 0.7325413826629286, "learning_rate": 2.0626548355140134e-06, "loss": 0.0226, "step": 225360 }, { "epoch": 0.9403451527567992, "grad_norm": 0.4958744013140321, "learning_rate": 2.0626319540170093e-06, "loss": 0.023, "step": 225365 }, { "epoch": 0.9403660154717894, "grad_norm": 1.2361942306950315, "learning_rate": 2.062609073281477e-06, "loss": 0.0256, "step": 225370 }, { "epoch": 0.9403868781867797, "grad_norm": 0.7448213477437972, "learning_rate": 2.0625861933073744e-06, "loss": 0.0231, "step": 225375 }, { "epoch": 0.94040774090177, "grad_norm": 0.6171740544494414, "learning_rate": 2.0625633140946602e-06, "loss": 0.0198, "step": 225380 }, { "epoch": 0.9404286036167603, "grad_norm": 0.7548946418199478, "learning_rate": 2.0625404356432906e-06, "loss": 0.0135, "step": 225385 }, { "epoch": 0.9404494663317505, "grad_norm": 0.7507529527709915, "learning_rate": 2.062517557953224e-06, "loss": 0.0275, "step": 225390 }, { "epoch": 0.9404703290467408, "grad_norm": 0.6943945914312278, "learning_rate": 2.0624946810244185e-06, "loss": 0.0224, "step": 225395 }, { "epoch": 0.9404911917617311, "grad_norm": 1.0595578485330799, "learning_rate": 2.062471804856832e-06, "loss": 0.0224, "step": 225400 }, { "epoch": 0.9405120544767214, "grad_norm": 0.35503591232931536, "learning_rate": 2.0624489294504215e-06, "loss": 0.0194, "step": 225405 }, { "epoch": 0.9405329171917116, "grad_norm": 0.6909502493430691, "learning_rate": 2.062426054805145e-06, "loss": 0.0153, "step": 225410 }, { "epoch": 0.940553779906702, "grad_norm": 0.42356003824452215, "learning_rate": 2.0624031809209603e-06, "loss": 0.0212, "step": 225415 }, { "epoch": 0.9405746426216922, "grad_norm": 0.3734423197379573, "learning_rate": 2.062380307797826e-06, "loss": 0.0128, "step": 225420 }, { "epoch": 0.9405955053366825, "grad_norm": 0.7895921781866126, "learning_rate": 2.062357435435699e-06, "loss": 0.0242, "step": 225425 }, { "epoch": 0.9406163680516728, "grad_norm": 0.6393426000444083, "learning_rate": 2.0623345638345374e-06, "loss": 0.0176, "step": 225430 }, { "epoch": 0.9406372307666631, "grad_norm": 0.7546616831150437, "learning_rate": 2.0623116929942987e-06, "loss": 0.0153, "step": 225435 }, { "epoch": 0.9406580934816533, "grad_norm": 0.530681156445247, "learning_rate": 2.0622888229149417e-06, "loss": 0.0202, "step": 225440 }, { "epoch": 0.9406789561966437, "grad_norm": 0.7420264583414011, "learning_rate": 2.062265953596423e-06, "loss": 0.0203, "step": 225445 }, { "epoch": 0.9406998189116339, "grad_norm": 0.31723686078570795, "learning_rate": 2.062243085038701e-06, "loss": 0.0125, "step": 225450 }, { "epoch": 0.9407206816266241, "grad_norm": 0.5565994875747786, "learning_rate": 2.0622202172417333e-06, "loss": 0.0198, "step": 225455 }, { "epoch": 0.9407415443416144, "grad_norm": 0.6670915987179812, "learning_rate": 2.0621973502054786e-06, "loss": 0.0187, "step": 225460 }, { "epoch": 0.9407624070566047, "grad_norm": 0.7417517067580646, "learning_rate": 2.0621744839298937e-06, "loss": 0.0221, "step": 225465 }, { "epoch": 0.940783269771595, "grad_norm": 0.5214747185573724, "learning_rate": 2.0621516184149364e-06, "loss": 0.0194, "step": 225470 }, { "epoch": 0.9408041324865852, "grad_norm": 0.5469155220870452, "learning_rate": 2.062128753660565e-06, "loss": 0.016, "step": 225475 }, { "epoch": 0.9408249952015756, "grad_norm": 0.7924575280905584, "learning_rate": 2.062105889666737e-06, "loss": 0.0139, "step": 225480 }, { "epoch": 0.9408458579165658, "grad_norm": 0.3583747472006673, "learning_rate": 2.062083026433411e-06, "loss": 0.0171, "step": 225485 }, { "epoch": 0.9408667206315561, "grad_norm": 0.6370879663264205, "learning_rate": 2.0620601639605436e-06, "loss": 0.0184, "step": 225490 }, { "epoch": 0.9408875833465464, "grad_norm": 0.4685244619249499, "learning_rate": 2.0620373022480937e-06, "loss": 0.0169, "step": 225495 }, { "epoch": 0.9409084460615367, "grad_norm": 0.5617154370449005, "learning_rate": 2.0620144412960186e-06, "loss": 0.0192, "step": 225500 }, { "epoch": 0.9409293087765269, "grad_norm": 1.2331517096928482, "learning_rate": 2.0619915811042763e-06, "loss": 0.0272, "step": 225505 }, { "epoch": 0.9409501714915173, "grad_norm": 1.033326739606197, "learning_rate": 2.0619687216728244e-06, "loss": 0.0252, "step": 225510 }, { "epoch": 0.9409710342065075, "grad_norm": 1.1007497781011302, "learning_rate": 2.061945863001622e-06, "loss": 0.0216, "step": 225515 }, { "epoch": 0.9409918969214978, "grad_norm": 0.7188472660848859, "learning_rate": 2.0619230050906254e-06, "loss": 0.0267, "step": 225520 }, { "epoch": 0.941012759636488, "grad_norm": 0.9756430193337623, "learning_rate": 2.061900147939793e-06, "loss": 0.0205, "step": 225525 }, { "epoch": 0.9410336223514784, "grad_norm": 0.7215431627547187, "learning_rate": 2.0618772915490823e-06, "loss": 0.0203, "step": 225530 }, { "epoch": 0.9410544850664686, "grad_norm": 0.7998394544950196, "learning_rate": 2.0618544359184523e-06, "loss": 0.0213, "step": 225535 }, { "epoch": 0.9410753477814588, "grad_norm": 0.6007509090844315, "learning_rate": 2.06183158104786e-06, "loss": 0.0194, "step": 225540 }, { "epoch": 0.9410962104964492, "grad_norm": 0.6172398933111111, "learning_rate": 2.061808726937263e-06, "loss": 0.0178, "step": 225545 }, { "epoch": 0.9411170732114394, "grad_norm": 0.2669683721050444, "learning_rate": 2.06178587358662e-06, "loss": 0.0171, "step": 225550 }, { "epoch": 0.9411379359264297, "grad_norm": 1.0219632242160621, "learning_rate": 2.0617630209958884e-06, "loss": 0.0287, "step": 225555 }, { "epoch": 0.94115879864142, "grad_norm": 1.29045296658032, "learning_rate": 2.0617401691650266e-06, "loss": 0.0252, "step": 225560 }, { "epoch": 0.9411796613564103, "grad_norm": 0.39733728576096233, "learning_rate": 2.0617173180939915e-06, "loss": 0.0194, "step": 225565 }, { "epoch": 0.9412005240714005, "grad_norm": 0.8485749570227555, "learning_rate": 2.061694467782742e-06, "loss": 0.0275, "step": 225570 }, { "epoch": 0.9412213867863908, "grad_norm": 0.621695224110214, "learning_rate": 2.061671618231235e-06, "loss": 0.018, "step": 225575 }, { "epoch": 0.9412422495013811, "grad_norm": 0.45635887128633723, "learning_rate": 2.0616487694394293e-06, "loss": 0.0241, "step": 225580 }, { "epoch": 0.9412631122163714, "grad_norm": 0.971147863880846, "learning_rate": 2.061625921407283e-06, "loss": 0.02, "step": 225585 }, { "epoch": 0.9412839749313616, "grad_norm": 0.7047721762961432, "learning_rate": 2.0616030741347526e-06, "loss": 0.0283, "step": 225590 }, { "epoch": 0.941304837646352, "grad_norm": 1.2872971644713127, "learning_rate": 2.0615802276217974e-06, "loss": 0.0224, "step": 225595 }, { "epoch": 0.9413257003613422, "grad_norm": 0.4596386643116499, "learning_rate": 2.0615573818683745e-06, "loss": 0.0291, "step": 225600 }, { "epoch": 0.9413465630763325, "grad_norm": 0.820295759919506, "learning_rate": 2.0615345368744425e-06, "loss": 0.0281, "step": 225605 }, { "epoch": 0.9413674257913228, "grad_norm": 0.1843178434152032, "learning_rate": 2.061511692639959e-06, "loss": 0.018, "step": 225610 }, { "epoch": 0.9413882885063131, "grad_norm": 0.8003994246950162, "learning_rate": 2.0614888491648815e-06, "loss": 0.0153, "step": 225615 }, { "epoch": 0.9414091512213033, "grad_norm": 1.3382718025331803, "learning_rate": 2.0614660064491687e-06, "loss": 0.0341, "step": 225620 }, { "epoch": 0.9414300139362937, "grad_norm": 0.7490213165375544, "learning_rate": 2.061443164492778e-06, "loss": 0.0315, "step": 225625 }, { "epoch": 0.9414508766512839, "grad_norm": 0.6728489800856101, "learning_rate": 2.061420323295667e-06, "loss": 0.0234, "step": 225630 }, { "epoch": 0.9414717393662742, "grad_norm": 0.8711662541575186, "learning_rate": 2.0613974828577943e-06, "loss": 0.0198, "step": 225635 }, { "epoch": 0.9414926020812644, "grad_norm": 0.46831937157941134, "learning_rate": 2.0613746431791177e-06, "loss": 0.0247, "step": 225640 }, { "epoch": 0.9415134647962548, "grad_norm": 0.7068448694532682, "learning_rate": 2.0613518042595952e-06, "loss": 0.0191, "step": 225645 }, { "epoch": 0.941534327511245, "grad_norm": 0.37808154508342445, "learning_rate": 2.0613289660991845e-06, "loss": 0.0191, "step": 225650 }, { "epoch": 0.9415551902262352, "grad_norm": 0.47804153775533675, "learning_rate": 2.0613061286978436e-06, "loss": 0.0181, "step": 225655 }, { "epoch": 0.9415760529412256, "grad_norm": 0.7254561322970533, "learning_rate": 2.0612832920555307e-06, "loss": 0.0209, "step": 225660 }, { "epoch": 0.9415969156562158, "grad_norm": 0.3048106319331601, "learning_rate": 2.0612604561722037e-06, "loss": 0.0125, "step": 225665 }, { "epoch": 0.9416177783712061, "grad_norm": 1.3458526522730183, "learning_rate": 2.0612376210478204e-06, "loss": 0.032, "step": 225670 }, { "epoch": 0.9416386410861964, "grad_norm": 1.2504714345790833, "learning_rate": 2.0612147866823383e-06, "loss": 0.0196, "step": 225675 }, { "epoch": 0.9416595038011867, "grad_norm": 0.511465234026968, "learning_rate": 2.0611919530757165e-06, "loss": 0.0215, "step": 225680 }, { "epoch": 0.9416803665161769, "grad_norm": 1.743231177594667, "learning_rate": 2.0611691202279116e-06, "loss": 0.0276, "step": 225685 }, { "epoch": 0.9417012292311673, "grad_norm": 0.42310757769194596, "learning_rate": 2.0611462881388827e-06, "loss": 0.0163, "step": 225690 }, { "epoch": 0.9417220919461575, "grad_norm": 0.9152533427979632, "learning_rate": 2.0611234568085874e-06, "loss": 0.0181, "step": 225695 }, { "epoch": 0.9417429546611478, "grad_norm": 0.608378355965437, "learning_rate": 2.0611006262369837e-06, "loss": 0.0196, "step": 225700 }, { "epoch": 0.941763817376138, "grad_norm": 0.38277847653723435, "learning_rate": 2.0610777964240293e-06, "loss": 0.0176, "step": 225705 }, { "epoch": 0.9417846800911284, "grad_norm": 0.5427338804381738, "learning_rate": 2.0610549673696827e-06, "loss": 0.0169, "step": 225710 }, { "epoch": 0.9418055428061186, "grad_norm": 0.5649774819366306, "learning_rate": 2.0610321390739016e-06, "loss": 0.0201, "step": 225715 }, { "epoch": 0.9418264055211089, "grad_norm": 0.7866427475061684, "learning_rate": 2.061009311536644e-06, "loss": 0.0213, "step": 225720 }, { "epoch": 0.9418472682360992, "grad_norm": 0.6290870288255157, "learning_rate": 2.060986484757868e-06, "loss": 0.0205, "step": 225725 }, { "epoch": 0.9418681309510895, "grad_norm": 1.3900505065305186, "learning_rate": 2.060963658737531e-06, "loss": 0.0445, "step": 225730 }, { "epoch": 0.9418889936660797, "grad_norm": 0.7891366992725857, "learning_rate": 2.060940833475592e-06, "loss": 0.025, "step": 225735 }, { "epoch": 0.94190985638107, "grad_norm": 0.6529945151452359, "learning_rate": 2.060918008972008e-06, "loss": 0.0216, "step": 225740 }, { "epoch": 0.9419307190960603, "grad_norm": 0.46727419540934184, "learning_rate": 2.0608951852267383e-06, "loss": 0.0192, "step": 225745 }, { "epoch": 0.9419515818110505, "grad_norm": 0.3553392747482952, "learning_rate": 2.0608723622397395e-06, "loss": 0.0197, "step": 225750 }, { "epoch": 0.9419724445260408, "grad_norm": 0.28119234572776336, "learning_rate": 2.0608495400109703e-06, "loss": 0.0196, "step": 225755 }, { "epoch": 0.9419933072410311, "grad_norm": 1.5598531773921414, "learning_rate": 2.0608267185403888e-06, "loss": 0.0248, "step": 225760 }, { "epoch": 0.9420141699560214, "grad_norm": 0.5401887270212151, "learning_rate": 2.060803897827953e-06, "loss": 0.017, "step": 225765 }, { "epoch": 0.9420350326710116, "grad_norm": 0.941674119075379, "learning_rate": 2.0607810778736206e-06, "loss": 0.0147, "step": 225770 }, { "epoch": 0.942055895386002, "grad_norm": 0.7078948021952071, "learning_rate": 2.0607582586773496e-06, "loss": 0.0196, "step": 225775 }, { "epoch": 0.9420767581009922, "grad_norm": 0.3944443303750002, "learning_rate": 2.060735440239098e-06, "loss": 0.0245, "step": 225780 }, { "epoch": 0.9420976208159825, "grad_norm": 0.8236661762224581, "learning_rate": 2.060712622558825e-06, "loss": 0.0295, "step": 225785 }, { "epoch": 0.9421184835309728, "grad_norm": 0.9313649493380627, "learning_rate": 2.060689805636488e-06, "loss": 0.0242, "step": 225790 }, { "epoch": 0.9421393462459631, "grad_norm": 0.8226559356818682, "learning_rate": 2.0606669894720434e-06, "loss": 0.0179, "step": 225795 }, { "epoch": 0.9421602089609533, "grad_norm": 1.2723886625257301, "learning_rate": 2.0606441740654512e-06, "loss": 0.0205, "step": 225800 }, { "epoch": 0.9421810716759437, "grad_norm": 0.4845546582230398, "learning_rate": 2.060621359416669e-06, "loss": 0.0167, "step": 225805 }, { "epoch": 0.9422019343909339, "grad_norm": 0.7631828583735937, "learning_rate": 2.060598545525655e-06, "loss": 0.0232, "step": 225810 }, { "epoch": 0.9422227971059242, "grad_norm": 0.662878116757599, "learning_rate": 2.0605757323923663e-06, "loss": 0.0267, "step": 225815 }, { "epoch": 0.9422436598209144, "grad_norm": 0.3702262536208589, "learning_rate": 2.0605529200167616e-06, "loss": 0.0181, "step": 225820 }, { "epoch": 0.9422645225359048, "grad_norm": 0.8671182596781636, "learning_rate": 2.0605301083987996e-06, "loss": 0.0252, "step": 225825 }, { "epoch": 0.942285385250895, "grad_norm": 0.754162172850954, "learning_rate": 2.0605072975384374e-06, "loss": 0.0236, "step": 225830 }, { "epoch": 0.9423062479658852, "grad_norm": 0.4645455897743122, "learning_rate": 2.0604844874356334e-06, "loss": 0.0145, "step": 225835 }, { "epoch": 0.9423271106808756, "grad_norm": 0.5236333368285733, "learning_rate": 2.060461678090346e-06, "loss": 0.0162, "step": 225840 }, { "epoch": 0.9423479733958658, "grad_norm": 0.9523271917632178, "learning_rate": 2.0604388695025324e-06, "loss": 0.0164, "step": 225845 }, { "epoch": 0.9423688361108561, "grad_norm": 0.39006798738678916, "learning_rate": 2.0604160616721515e-06, "loss": 0.016, "step": 225850 }, { "epoch": 0.9423896988258464, "grad_norm": 0.3903444399946674, "learning_rate": 2.060393254599161e-06, "loss": 0.0112, "step": 225855 }, { "epoch": 0.9424105615408367, "grad_norm": 0.5339678923376997, "learning_rate": 2.0603704482835195e-06, "loss": 0.0161, "step": 225860 }, { "epoch": 0.9424314242558269, "grad_norm": 0.4682117288562496, "learning_rate": 2.060347642725184e-06, "loss": 0.0204, "step": 225865 }, { "epoch": 0.9424522869708173, "grad_norm": 0.3443435684639057, "learning_rate": 2.0603248379241136e-06, "loss": 0.0141, "step": 225870 }, { "epoch": 0.9424731496858075, "grad_norm": 0.5544791576849503, "learning_rate": 2.0603020338802655e-06, "loss": 0.0223, "step": 225875 }, { "epoch": 0.9424940124007978, "grad_norm": 0.4364447874802844, "learning_rate": 2.0602792305935992e-06, "loss": 0.0152, "step": 225880 }, { "epoch": 0.942514875115788, "grad_norm": 0.2481575999662171, "learning_rate": 2.060256428064072e-06, "loss": 0.0213, "step": 225885 }, { "epoch": 0.9425357378307784, "grad_norm": 0.6298166044970581, "learning_rate": 2.060233626291642e-06, "loss": 0.0161, "step": 225890 }, { "epoch": 0.9425566005457686, "grad_norm": 0.43871099869505537, "learning_rate": 2.060210825276266e-06, "loss": 0.0135, "step": 225895 }, { "epoch": 0.9425774632607589, "grad_norm": 0.5968855470958838, "learning_rate": 2.0601880250179044e-06, "loss": 0.0138, "step": 225900 }, { "epoch": 0.9425983259757492, "grad_norm": 0.8190647810695948, "learning_rate": 2.0601652255165145e-06, "loss": 0.0243, "step": 225905 }, { "epoch": 0.9426191886907395, "grad_norm": 0.6141260318165999, "learning_rate": 2.0601424267720535e-06, "loss": 0.0314, "step": 225910 }, { "epoch": 0.9426400514057297, "grad_norm": 0.6729447510882918, "learning_rate": 2.06011962878448e-06, "loss": 0.0206, "step": 225915 }, { "epoch": 0.9426609141207201, "grad_norm": 0.6006834502171761, "learning_rate": 2.0600968315537533e-06, "loss": 0.0194, "step": 225920 }, { "epoch": 0.9426817768357103, "grad_norm": 0.39294230124001184, "learning_rate": 2.06007403507983e-06, "loss": 0.0174, "step": 225925 }, { "epoch": 0.9427026395507005, "grad_norm": 0.4151717813093656, "learning_rate": 2.060051239362669e-06, "loss": 0.0167, "step": 225930 }, { "epoch": 0.9427235022656908, "grad_norm": 0.7918376502498573, "learning_rate": 2.060028444402228e-06, "loss": 0.019, "step": 225935 }, { "epoch": 0.9427443649806811, "grad_norm": 0.5409965259643346, "learning_rate": 2.0600056501984655e-06, "loss": 0.0158, "step": 225940 }, { "epoch": 0.9427652276956714, "grad_norm": 0.7041289234808549, "learning_rate": 2.0599828567513392e-06, "loss": 0.0225, "step": 225945 }, { "epoch": 0.9427860904106616, "grad_norm": 0.3333324348326771, "learning_rate": 2.059960064060808e-06, "loss": 0.0235, "step": 225950 }, { "epoch": 0.942806953125652, "grad_norm": 0.7175866475545915, "learning_rate": 2.059937272126829e-06, "loss": 0.0178, "step": 225955 }, { "epoch": 0.9428278158406422, "grad_norm": 0.6285620739254808, "learning_rate": 2.0599144809493613e-06, "loss": 0.0235, "step": 225960 }, { "epoch": 0.9428486785556325, "grad_norm": 0.5481978986144282, "learning_rate": 2.0598916905283623e-06, "loss": 0.0174, "step": 225965 }, { "epoch": 0.9428695412706228, "grad_norm": 1.8857876864478424, "learning_rate": 2.059868900863791e-06, "loss": 0.0394, "step": 225970 }, { "epoch": 0.9428904039856131, "grad_norm": 0.7510564236363662, "learning_rate": 2.059846111955605e-06, "loss": 0.0202, "step": 225975 }, { "epoch": 0.9429112667006033, "grad_norm": 0.6674935089356657, "learning_rate": 2.0598233238037624e-06, "loss": 0.0219, "step": 225980 }, { "epoch": 0.9429321294155937, "grad_norm": 0.43956084617900115, "learning_rate": 2.0598005364082216e-06, "loss": 0.0319, "step": 225985 }, { "epoch": 0.9429529921305839, "grad_norm": 0.5879805572428278, "learning_rate": 2.05977774976894e-06, "loss": 0.0266, "step": 225990 }, { "epoch": 0.9429738548455742, "grad_norm": 0.2816098744777136, "learning_rate": 2.059754963885877e-06, "loss": 0.0178, "step": 225995 }, { "epoch": 0.9429947175605644, "grad_norm": 0.4880242051082318, "learning_rate": 2.0597321787589906e-06, "loss": 0.0175, "step": 226000 }, { "epoch": 0.9430155802755548, "grad_norm": 0.7871765647241712, "learning_rate": 2.0597093943882383e-06, "loss": 0.0178, "step": 226005 }, { "epoch": 0.943036442990545, "grad_norm": 0.6462737094867952, "learning_rate": 2.059686610773578e-06, "loss": 0.0224, "step": 226010 }, { "epoch": 0.9430573057055353, "grad_norm": 0.42012468653380447, "learning_rate": 2.0596638279149694e-06, "loss": 0.0205, "step": 226015 }, { "epoch": 0.9430781684205256, "grad_norm": 0.9412531781290272, "learning_rate": 2.0596410458123693e-06, "loss": 0.0209, "step": 226020 }, { "epoch": 0.9430990311355159, "grad_norm": 0.7804258023929718, "learning_rate": 2.0596182644657363e-06, "loss": 0.0162, "step": 226025 }, { "epoch": 0.9431198938505061, "grad_norm": 0.7091102091884881, "learning_rate": 2.0595954838750286e-06, "loss": 0.0197, "step": 226030 }, { "epoch": 0.9431407565654965, "grad_norm": 0.6513186066083083, "learning_rate": 2.0595727040402047e-06, "loss": 0.0205, "step": 226035 }, { "epoch": 0.9431616192804867, "grad_norm": 0.4018989109332812, "learning_rate": 2.059549924961222e-06, "loss": 0.0185, "step": 226040 }, { "epoch": 0.9431824819954769, "grad_norm": 0.6991630313061463, "learning_rate": 2.0595271466380394e-06, "loss": 0.0207, "step": 226045 }, { "epoch": 0.9432033447104673, "grad_norm": 0.21430087959752075, "learning_rate": 2.059504369070615e-06, "loss": 0.0162, "step": 226050 }, { "epoch": 0.9432242074254575, "grad_norm": 0.3528846963294606, "learning_rate": 2.059481592258907e-06, "loss": 0.0188, "step": 226055 }, { "epoch": 0.9432450701404478, "grad_norm": 0.5041334886956631, "learning_rate": 2.0594588162028737e-06, "loss": 0.0101, "step": 226060 }, { "epoch": 0.943265932855438, "grad_norm": 0.6246019272465368, "learning_rate": 2.0594360409024725e-06, "loss": 0.0238, "step": 226065 }, { "epoch": 0.9432867955704284, "grad_norm": 0.6050993064333093, "learning_rate": 2.059413266357663e-06, "loss": 0.017, "step": 226070 }, { "epoch": 0.9433076582854186, "grad_norm": 0.46490340779425215, "learning_rate": 2.0593904925684027e-06, "loss": 0.0325, "step": 226075 }, { "epoch": 0.9433285210004089, "grad_norm": 0.6039682099828276, "learning_rate": 2.0593677195346493e-06, "loss": 0.0225, "step": 226080 }, { "epoch": 0.9433493837153992, "grad_norm": 0.42454132397607414, "learning_rate": 2.059344947256362e-06, "loss": 0.017, "step": 226085 }, { "epoch": 0.9433702464303895, "grad_norm": 0.44107728934008905, "learning_rate": 2.0593221757334983e-06, "loss": 0.0156, "step": 226090 }, { "epoch": 0.9433911091453797, "grad_norm": 0.45440634959906256, "learning_rate": 2.059299404966017e-06, "loss": 0.0133, "step": 226095 }, { "epoch": 0.9434119718603701, "grad_norm": 0.5608404159396009, "learning_rate": 2.059276634953876e-06, "loss": 0.0204, "step": 226100 }, { "epoch": 0.9434328345753603, "grad_norm": 0.6345354538851804, "learning_rate": 2.0592538656970333e-06, "loss": 0.0267, "step": 226105 }, { "epoch": 0.9434536972903506, "grad_norm": 0.8017714568783268, "learning_rate": 2.0592310971954475e-06, "loss": 0.0151, "step": 226110 }, { "epoch": 0.9434745600053408, "grad_norm": 1.2872760778020804, "learning_rate": 2.059208329449077e-06, "loss": 0.0196, "step": 226115 }, { "epoch": 0.9434954227203312, "grad_norm": 1.0087986948016334, "learning_rate": 2.0591855624578802e-06, "loss": 0.0215, "step": 226120 }, { "epoch": 0.9435162854353214, "grad_norm": 3.2490824970940904, "learning_rate": 2.0591627962218145e-06, "loss": 0.0371, "step": 226125 }, { "epoch": 0.9435371481503116, "grad_norm": 0.443634307906486, "learning_rate": 2.059140030740839e-06, "loss": 0.0132, "step": 226130 }, { "epoch": 0.943558010865302, "grad_norm": 0.8058800131690483, "learning_rate": 2.0591172660149112e-06, "loss": 0.026, "step": 226135 }, { "epoch": 0.9435788735802922, "grad_norm": 0.696352473901888, "learning_rate": 2.05909450204399e-06, "loss": 0.0184, "step": 226140 }, { "epoch": 0.9435997362952825, "grad_norm": 0.8524674413492795, "learning_rate": 2.0590717388280337e-06, "loss": 0.0203, "step": 226145 }, { "epoch": 0.9436205990102728, "grad_norm": 0.2415130284220566, "learning_rate": 2.0590489763669998e-06, "loss": 0.0144, "step": 226150 }, { "epoch": 0.9436414617252631, "grad_norm": 0.5492837474369862, "learning_rate": 2.0590262146608468e-06, "loss": 0.0163, "step": 226155 }, { "epoch": 0.9436623244402533, "grad_norm": 0.5168269086320173, "learning_rate": 2.059003453709534e-06, "loss": 0.0212, "step": 226160 }, { "epoch": 0.9436831871552437, "grad_norm": 0.7510877586825041, "learning_rate": 2.058980693513019e-06, "loss": 0.0235, "step": 226165 }, { "epoch": 0.9437040498702339, "grad_norm": 0.32656778592023605, "learning_rate": 2.05895793407126e-06, "loss": 0.0173, "step": 226170 }, { "epoch": 0.9437249125852242, "grad_norm": 0.2822095298584549, "learning_rate": 2.0589351753842147e-06, "loss": 0.0229, "step": 226175 }, { "epoch": 0.9437457753002144, "grad_norm": 0.688372071361231, "learning_rate": 2.0589124174518425e-06, "loss": 0.0251, "step": 226180 }, { "epoch": 0.9437666380152048, "grad_norm": 4.9805096643831925, "learning_rate": 2.058889660274101e-06, "loss": 0.0186, "step": 226185 }, { "epoch": 0.943787500730195, "grad_norm": 0.5291315499541198, "learning_rate": 2.0588669038509486e-06, "loss": 0.0189, "step": 226190 }, { "epoch": 0.9438083634451853, "grad_norm": 1.2929357543277842, "learning_rate": 2.058844148182344e-06, "loss": 0.0175, "step": 226195 }, { "epoch": 0.9438292261601756, "grad_norm": 0.6029675222711576, "learning_rate": 2.0588213932682453e-06, "loss": 0.025, "step": 226200 }, { "epoch": 0.9438500888751659, "grad_norm": 0.5736960538627657, "learning_rate": 2.0587986391086107e-06, "loss": 0.0193, "step": 226205 }, { "epoch": 0.9438709515901561, "grad_norm": 1.3162030570852032, "learning_rate": 2.0587758857033984e-06, "loss": 0.0216, "step": 226210 }, { "epoch": 0.9438918143051465, "grad_norm": 0.4769685070190186, "learning_rate": 2.0587531330525667e-06, "loss": 0.0235, "step": 226215 }, { "epoch": 0.9439126770201367, "grad_norm": 0.7444114708118382, "learning_rate": 2.058730381156074e-06, "loss": 0.0193, "step": 226220 }, { "epoch": 0.943933539735127, "grad_norm": 0.6958354630905356, "learning_rate": 2.0587076300138784e-06, "loss": 0.0181, "step": 226225 }, { "epoch": 0.9439544024501173, "grad_norm": 1.4663507590039269, "learning_rate": 2.0586848796259393e-06, "loss": 0.0194, "step": 226230 }, { "epoch": 0.9439752651651075, "grad_norm": 0.4873561449621412, "learning_rate": 2.058662129992214e-06, "loss": 0.0177, "step": 226235 }, { "epoch": 0.9439961278800978, "grad_norm": 0.3698731309698217, "learning_rate": 2.0586393811126606e-06, "loss": 0.0181, "step": 226240 }, { "epoch": 0.944016990595088, "grad_norm": 0.6534592648049881, "learning_rate": 2.058616632987238e-06, "loss": 0.0256, "step": 226245 }, { "epoch": 0.9440378533100784, "grad_norm": 0.6279549425856047, "learning_rate": 2.058593885615904e-06, "loss": 0.0256, "step": 226250 }, { "epoch": 0.9440587160250686, "grad_norm": 0.37159303422777573, "learning_rate": 2.058571138998618e-06, "loss": 0.0156, "step": 226255 }, { "epoch": 0.9440795787400589, "grad_norm": 0.3582019940170087, "learning_rate": 2.058548393135338e-06, "loss": 0.0239, "step": 226260 }, { "epoch": 0.9441004414550492, "grad_norm": 0.5113050112730643, "learning_rate": 2.0585256480260214e-06, "loss": 0.0188, "step": 226265 }, { "epoch": 0.9441213041700395, "grad_norm": 0.3244085596400027, "learning_rate": 2.058502903670627e-06, "loss": 0.0177, "step": 226270 }, { "epoch": 0.9441421668850297, "grad_norm": 0.48990078714800134, "learning_rate": 2.058480160069114e-06, "loss": 0.018, "step": 226275 }, { "epoch": 0.9441630296000201, "grad_norm": 0.7929919331632564, "learning_rate": 2.0584574172214393e-06, "loss": 0.0188, "step": 226280 }, { "epoch": 0.9441838923150103, "grad_norm": 0.565399709656144, "learning_rate": 2.0584346751275625e-06, "loss": 0.0236, "step": 226285 }, { "epoch": 0.9442047550300006, "grad_norm": 0.33736528693866774, "learning_rate": 2.0584119337874417e-06, "loss": 0.0206, "step": 226290 }, { "epoch": 0.9442256177449908, "grad_norm": 0.4460699202274015, "learning_rate": 2.0583891932010345e-06, "loss": 0.0158, "step": 226295 }, { "epoch": 0.9442464804599812, "grad_norm": 0.6840846097545107, "learning_rate": 2.0583664533683e-06, "loss": 0.0176, "step": 226300 }, { "epoch": 0.9442673431749714, "grad_norm": 0.4779003728484829, "learning_rate": 2.0583437142891968e-06, "loss": 0.0142, "step": 226305 }, { "epoch": 0.9442882058899617, "grad_norm": 0.3667641807035193, "learning_rate": 2.0583209759636823e-06, "loss": 0.0162, "step": 226310 }, { "epoch": 0.944309068604952, "grad_norm": 0.5658147934704255, "learning_rate": 2.058298238391716e-06, "loss": 0.0153, "step": 226315 }, { "epoch": 0.9443299313199423, "grad_norm": 0.7341397288863681, "learning_rate": 2.0582755015732556e-06, "loss": 0.0181, "step": 226320 }, { "epoch": 0.9443507940349325, "grad_norm": 0.7418308771527843, "learning_rate": 2.0582527655082595e-06, "loss": 0.0211, "step": 226325 }, { "epoch": 0.9443716567499228, "grad_norm": 0.6830734504207895, "learning_rate": 2.058230030196686e-06, "loss": 0.0163, "step": 226330 }, { "epoch": 0.9443925194649131, "grad_norm": 0.6593826861729025, "learning_rate": 2.0582072956384942e-06, "loss": 0.0225, "step": 226335 }, { "epoch": 0.9444133821799033, "grad_norm": 0.9957893587481346, "learning_rate": 2.058184561833642e-06, "loss": 0.0252, "step": 226340 }, { "epoch": 0.9444342448948937, "grad_norm": 0.649867384282238, "learning_rate": 2.058161828782087e-06, "loss": 0.0225, "step": 226345 }, { "epoch": 0.9444551076098839, "grad_norm": 0.6042186578347444, "learning_rate": 2.058139096483789e-06, "loss": 0.0191, "step": 226350 }, { "epoch": 0.9444759703248742, "grad_norm": 0.5394136321552716, "learning_rate": 2.058116364938706e-06, "loss": 0.0204, "step": 226355 }, { "epoch": 0.9444968330398644, "grad_norm": 0.5454688518678483, "learning_rate": 2.0580936341467957e-06, "loss": 0.0247, "step": 226360 }, { "epoch": 0.9445176957548548, "grad_norm": 0.7767846546357565, "learning_rate": 2.0580709041080168e-06, "loss": 0.0214, "step": 226365 }, { "epoch": 0.944538558469845, "grad_norm": 0.4828818394942561, "learning_rate": 2.0580481748223285e-06, "loss": 0.016, "step": 226370 }, { "epoch": 0.9445594211848353, "grad_norm": 0.5091271149688648, "learning_rate": 2.0580254462896885e-06, "loss": 0.0215, "step": 226375 }, { "epoch": 0.9445802838998256, "grad_norm": 0.5816762651220907, "learning_rate": 2.058002718510055e-06, "loss": 0.0203, "step": 226380 }, { "epoch": 0.9446011466148159, "grad_norm": 0.5467177636120122, "learning_rate": 2.057979991483387e-06, "loss": 0.0217, "step": 226385 }, { "epoch": 0.9446220093298061, "grad_norm": 0.8292095492966248, "learning_rate": 2.0579572652096427e-06, "loss": 0.0237, "step": 226390 }, { "epoch": 0.9446428720447965, "grad_norm": 0.5563059764173325, "learning_rate": 2.0579345396887808e-06, "loss": 0.0193, "step": 226395 }, { "epoch": 0.9446637347597867, "grad_norm": 0.8341499960600165, "learning_rate": 2.057911814920759e-06, "loss": 0.0338, "step": 226400 }, { "epoch": 0.944684597474777, "grad_norm": 0.44720000952411865, "learning_rate": 2.057889090905536e-06, "loss": 0.0206, "step": 226405 }, { "epoch": 0.9447054601897673, "grad_norm": 0.6571121120199478, "learning_rate": 2.0578663676430707e-06, "loss": 0.0234, "step": 226410 }, { "epoch": 0.9447263229047576, "grad_norm": 0.763924998566707, "learning_rate": 2.0578436451333216e-06, "loss": 0.0258, "step": 226415 }, { "epoch": 0.9447471856197478, "grad_norm": 0.48991577202214803, "learning_rate": 2.0578209233762463e-06, "loss": 0.018, "step": 226420 }, { "epoch": 0.944768048334738, "grad_norm": 0.7465253844224765, "learning_rate": 2.057798202371804e-06, "loss": 0.0179, "step": 226425 }, { "epoch": 0.9447889110497284, "grad_norm": 0.8049547349645869, "learning_rate": 2.057775482119953e-06, "loss": 0.0174, "step": 226430 }, { "epoch": 0.9448097737647186, "grad_norm": 0.8016476606114348, "learning_rate": 2.0577527626206515e-06, "loss": 0.0212, "step": 226435 }, { "epoch": 0.9448306364797089, "grad_norm": 1.262028211878267, "learning_rate": 2.0577300438738577e-06, "loss": 0.0183, "step": 226440 }, { "epoch": 0.9448514991946992, "grad_norm": 0.8398276078600668, "learning_rate": 2.0577073258795315e-06, "loss": 0.0207, "step": 226445 }, { "epoch": 0.9448723619096895, "grad_norm": 0.6535586664246334, "learning_rate": 2.0576846086376293e-06, "loss": 0.0218, "step": 226450 }, { "epoch": 0.9448932246246797, "grad_norm": 0.414460188731741, "learning_rate": 2.0576618921481114e-06, "loss": 0.0256, "step": 226455 }, { "epoch": 0.9449140873396701, "grad_norm": 0.8942763064870678, "learning_rate": 2.057639176410935e-06, "loss": 0.0243, "step": 226460 }, { "epoch": 0.9449349500546603, "grad_norm": 1.1371046692847437, "learning_rate": 2.057616461426059e-06, "loss": 0.02, "step": 226465 }, { "epoch": 0.9449558127696506, "grad_norm": 0.2863956404220677, "learning_rate": 2.057593747193442e-06, "loss": 0.0212, "step": 226470 }, { "epoch": 0.9449766754846408, "grad_norm": 0.5229838065683563, "learning_rate": 2.0575710337130428e-06, "loss": 0.0214, "step": 226475 }, { "epoch": 0.9449975381996312, "grad_norm": 0.5029575288476084, "learning_rate": 2.0575483209848195e-06, "loss": 0.0283, "step": 226480 }, { "epoch": 0.9450184009146214, "grad_norm": 0.6974923384864956, "learning_rate": 2.05752560900873e-06, "loss": 0.0264, "step": 226485 }, { "epoch": 0.9450392636296117, "grad_norm": 0.408918014319606, "learning_rate": 2.0575028977847337e-06, "loss": 0.0178, "step": 226490 }, { "epoch": 0.945060126344602, "grad_norm": 4.55127529701967, "learning_rate": 2.0574801873127885e-06, "loss": 0.0212, "step": 226495 }, { "epoch": 0.9450809890595923, "grad_norm": 0.4510165193177469, "learning_rate": 2.0574574775928536e-06, "loss": 0.0186, "step": 226500 }, { "epoch": 0.9451018517745825, "grad_norm": 0.6643092463726519, "learning_rate": 2.0574347686248866e-06, "loss": 0.0283, "step": 226505 }, { "epoch": 0.9451227144895729, "grad_norm": 0.6616852872625566, "learning_rate": 2.0574120604088464e-06, "loss": 0.0187, "step": 226510 }, { "epoch": 0.9451435772045631, "grad_norm": 0.5988841177596611, "learning_rate": 2.0573893529446924e-06, "loss": 0.0161, "step": 226515 }, { "epoch": 0.9451644399195533, "grad_norm": 0.6925399661335635, "learning_rate": 2.0573666462323817e-06, "loss": 0.0207, "step": 226520 }, { "epoch": 0.9451853026345437, "grad_norm": 0.6422507960833947, "learning_rate": 2.057343940271873e-06, "loss": 0.0206, "step": 226525 }, { "epoch": 0.9452061653495339, "grad_norm": 0.36958409785549534, "learning_rate": 2.057321235063126e-06, "loss": 0.0175, "step": 226530 }, { "epoch": 0.9452270280645242, "grad_norm": 0.3234379488808841, "learning_rate": 2.0572985306060975e-06, "loss": 0.0209, "step": 226535 }, { "epoch": 0.9452478907795144, "grad_norm": 0.8240339658147688, "learning_rate": 2.0572758269007475e-06, "loss": 0.0203, "step": 226540 }, { "epoch": 0.9452687534945048, "grad_norm": 0.3132626397514306, "learning_rate": 2.057253123947034e-06, "loss": 0.0247, "step": 226545 }, { "epoch": 0.945289616209495, "grad_norm": 0.4662472667983308, "learning_rate": 2.0572304217449156e-06, "loss": 0.0123, "step": 226550 }, { "epoch": 0.9453104789244853, "grad_norm": 0.5353903464089512, "learning_rate": 2.05720772029435e-06, "loss": 0.015, "step": 226555 }, { "epoch": 0.9453313416394756, "grad_norm": 0.5864067515794616, "learning_rate": 2.0571850195952972e-06, "loss": 0.0188, "step": 226560 }, { "epoch": 0.9453522043544659, "grad_norm": 0.4980432943512768, "learning_rate": 2.057162319647715e-06, "loss": 0.0233, "step": 226565 }, { "epoch": 0.9453730670694561, "grad_norm": 0.48969906694317733, "learning_rate": 2.0571396204515612e-06, "loss": 0.0151, "step": 226570 }, { "epoch": 0.9453939297844465, "grad_norm": 0.6349668754287062, "learning_rate": 2.0571169220067957e-06, "loss": 0.0223, "step": 226575 }, { "epoch": 0.9454147924994367, "grad_norm": 0.7685966609332311, "learning_rate": 2.0570942243133763e-06, "loss": 0.0238, "step": 226580 }, { "epoch": 0.945435655214427, "grad_norm": 0.22661029605121832, "learning_rate": 2.0570715273712612e-06, "loss": 0.0316, "step": 226585 }, { "epoch": 0.9454565179294173, "grad_norm": 0.7771734036409083, "learning_rate": 2.05704883118041e-06, "loss": 0.0161, "step": 226590 }, { "epoch": 0.9454773806444076, "grad_norm": 0.5118822031015027, "learning_rate": 2.0570261357407807e-06, "loss": 0.0159, "step": 226595 }, { "epoch": 0.9454982433593978, "grad_norm": 0.41339035562308013, "learning_rate": 2.0570034410523314e-06, "loss": 0.0198, "step": 226600 }, { "epoch": 0.945519106074388, "grad_norm": 0.42632583374937427, "learning_rate": 2.0569807471150217e-06, "loss": 0.0204, "step": 226605 }, { "epoch": 0.9455399687893784, "grad_norm": 0.39625209710767334, "learning_rate": 2.056958053928809e-06, "loss": 0.0187, "step": 226610 }, { "epoch": 0.9455608315043686, "grad_norm": 0.29715603586253775, "learning_rate": 2.0569353614936525e-06, "loss": 0.0182, "step": 226615 }, { "epoch": 0.9455816942193589, "grad_norm": 0.2513332592827436, "learning_rate": 2.056912669809511e-06, "loss": 0.0146, "step": 226620 }, { "epoch": 0.9456025569343492, "grad_norm": 0.784403758992265, "learning_rate": 2.0568899788763424e-06, "loss": 0.0216, "step": 226625 }, { "epoch": 0.9456234196493395, "grad_norm": 1.0259435524945992, "learning_rate": 2.0568672886941063e-06, "loss": 0.0189, "step": 226630 }, { "epoch": 0.9456442823643297, "grad_norm": 0.3612081041357044, "learning_rate": 2.05684459926276e-06, "loss": 0.0194, "step": 226635 }, { "epoch": 0.9456651450793201, "grad_norm": 0.34088256846465154, "learning_rate": 2.056821910582263e-06, "loss": 0.0157, "step": 226640 }, { "epoch": 0.9456860077943103, "grad_norm": 0.20561862363183606, "learning_rate": 2.056799222652574e-06, "loss": 0.0183, "step": 226645 }, { "epoch": 0.9457068705093006, "grad_norm": 1.3669970342797515, "learning_rate": 2.0567765354736507e-06, "loss": 0.0231, "step": 226650 }, { "epoch": 0.9457277332242908, "grad_norm": 0.8859566013307413, "learning_rate": 2.0567538490454523e-06, "loss": 0.0176, "step": 226655 }, { "epoch": 0.9457485959392812, "grad_norm": 1.485799962553181, "learning_rate": 2.056731163367937e-06, "loss": 0.0189, "step": 226660 }, { "epoch": 0.9457694586542714, "grad_norm": 0.79654733498934, "learning_rate": 2.0567084784410642e-06, "loss": 0.0205, "step": 226665 }, { "epoch": 0.9457903213692617, "grad_norm": 1.0959474987865934, "learning_rate": 2.0566857942647915e-06, "loss": 0.0241, "step": 226670 }, { "epoch": 0.945811184084252, "grad_norm": 0.43625561685937797, "learning_rate": 2.0566631108390784e-06, "loss": 0.0209, "step": 226675 }, { "epoch": 0.9458320467992423, "grad_norm": 0.39959096939914057, "learning_rate": 2.056640428163883e-06, "loss": 0.0179, "step": 226680 }, { "epoch": 0.9458529095142325, "grad_norm": 0.26655615725257514, "learning_rate": 2.056617746239164e-06, "loss": 0.0214, "step": 226685 }, { "epoch": 0.9458737722292229, "grad_norm": 1.624462967773565, "learning_rate": 2.0565950650648807e-06, "loss": 0.0331, "step": 226690 }, { "epoch": 0.9458946349442131, "grad_norm": 0.7381966374559957, "learning_rate": 2.0565723846409906e-06, "loss": 0.0214, "step": 226695 }, { "epoch": 0.9459154976592034, "grad_norm": 0.8969957589048136, "learning_rate": 2.0565497049674527e-06, "loss": 0.0163, "step": 226700 }, { "epoch": 0.9459363603741937, "grad_norm": 0.4177412945693346, "learning_rate": 2.0565270260442263e-06, "loss": 0.0162, "step": 226705 }, { "epoch": 0.945957223089184, "grad_norm": 0.670267751077544, "learning_rate": 2.056504347871268e-06, "loss": 0.0249, "step": 226710 }, { "epoch": 0.9459780858041742, "grad_norm": 0.7519769241627992, "learning_rate": 2.0564816704485394e-06, "loss": 0.0237, "step": 226715 }, { "epoch": 0.9459989485191644, "grad_norm": 0.3116716650445037, "learning_rate": 2.0564589937759973e-06, "loss": 0.0228, "step": 226720 }, { "epoch": 0.9460198112341548, "grad_norm": 0.3796940358410587, "learning_rate": 2.0564363178536002e-06, "loss": 0.0203, "step": 226725 }, { "epoch": 0.946040673949145, "grad_norm": 0.8028447837459012, "learning_rate": 2.0564136426813076e-06, "loss": 0.0185, "step": 226730 }, { "epoch": 0.9460615366641353, "grad_norm": 0.549297195350421, "learning_rate": 2.0563909682590775e-06, "loss": 0.024, "step": 226735 }, { "epoch": 0.9460823993791256, "grad_norm": 0.9127510053062032, "learning_rate": 2.056368294586869e-06, "loss": 0.0245, "step": 226740 }, { "epoch": 0.9461032620941159, "grad_norm": 0.39097686748057153, "learning_rate": 2.0563456216646404e-06, "loss": 0.0184, "step": 226745 }, { "epoch": 0.9461241248091061, "grad_norm": 0.2362872389606422, "learning_rate": 2.056322949492351e-06, "loss": 0.018, "step": 226750 }, { "epoch": 0.9461449875240965, "grad_norm": 0.8672297364598044, "learning_rate": 2.056300278069958e-06, "loss": 0.0198, "step": 226755 }, { "epoch": 0.9461658502390867, "grad_norm": 0.4913070058185833, "learning_rate": 2.056277607397422e-06, "loss": 0.0184, "step": 226760 }, { "epoch": 0.946186712954077, "grad_norm": 0.5914985175324272, "learning_rate": 2.0562549374747e-06, "loss": 0.0141, "step": 226765 }, { "epoch": 0.9462075756690673, "grad_norm": 0.7638415885204252, "learning_rate": 2.056232268301752e-06, "loss": 0.021, "step": 226770 }, { "epoch": 0.9462284383840576, "grad_norm": 0.4681398659190154, "learning_rate": 2.0562095998785355e-06, "loss": 0.0155, "step": 226775 }, { "epoch": 0.9462493010990478, "grad_norm": 0.5333074775809836, "learning_rate": 2.0561869322050103e-06, "loss": 0.0243, "step": 226780 }, { "epoch": 0.9462701638140381, "grad_norm": 0.36070992435832727, "learning_rate": 2.056164265281134e-06, "loss": 0.0186, "step": 226785 }, { "epoch": 0.9462910265290284, "grad_norm": 0.6906193062580135, "learning_rate": 2.056141599106866e-06, "loss": 0.017, "step": 226790 }, { "epoch": 0.9463118892440187, "grad_norm": 0.37741938586903495, "learning_rate": 2.056118933682165e-06, "loss": 0.0153, "step": 226795 }, { "epoch": 0.9463327519590089, "grad_norm": 0.3729499778799285, "learning_rate": 2.056096269006989e-06, "loss": 0.0204, "step": 226800 }, { "epoch": 0.9463536146739993, "grad_norm": 0.35553879475548567, "learning_rate": 2.056073605081297e-06, "loss": 0.0185, "step": 226805 }, { "epoch": 0.9463744773889895, "grad_norm": 0.8675301903528938, "learning_rate": 2.0560509419050483e-06, "loss": 0.0184, "step": 226810 }, { "epoch": 0.9463953401039797, "grad_norm": 0.7007646911230332, "learning_rate": 2.056028279478201e-06, "loss": 0.0189, "step": 226815 }, { "epoch": 0.9464162028189701, "grad_norm": 0.664875126550543, "learning_rate": 2.0560056178007136e-06, "loss": 0.0169, "step": 226820 }, { "epoch": 0.9464370655339603, "grad_norm": 0.4103662837866408, "learning_rate": 2.0559829568725455e-06, "loss": 0.0224, "step": 226825 }, { "epoch": 0.9464579282489506, "grad_norm": 0.7451580638058992, "learning_rate": 2.055960296693655e-06, "loss": 0.0169, "step": 226830 }, { "epoch": 0.9464787909639408, "grad_norm": 0.4100522328837934, "learning_rate": 2.0559376372640012e-06, "loss": 0.0171, "step": 226835 }, { "epoch": 0.9464996536789312, "grad_norm": 0.5766551788597635, "learning_rate": 2.0559149785835417e-06, "loss": 0.0184, "step": 226840 }, { "epoch": 0.9465205163939214, "grad_norm": 0.5981106880901415, "learning_rate": 2.0558923206522367e-06, "loss": 0.0271, "step": 226845 }, { "epoch": 0.9465413791089117, "grad_norm": 0.9819519529530989, "learning_rate": 2.055869663470044e-06, "loss": 0.0256, "step": 226850 }, { "epoch": 0.946562241823902, "grad_norm": 0.5162363422436329, "learning_rate": 2.055847007036922e-06, "loss": 0.0182, "step": 226855 }, { "epoch": 0.9465831045388923, "grad_norm": 0.7462763878313747, "learning_rate": 2.0558243513528305e-06, "loss": 0.0204, "step": 226860 }, { "epoch": 0.9466039672538825, "grad_norm": 1.2535759730655522, "learning_rate": 2.0558016964177275e-06, "loss": 0.026, "step": 226865 }, { "epoch": 0.9466248299688729, "grad_norm": 0.4007511747759798, "learning_rate": 2.055779042231572e-06, "loss": 0.0203, "step": 226870 }, { "epoch": 0.9466456926838631, "grad_norm": 0.4417632710054991, "learning_rate": 2.0557563887943226e-06, "loss": 0.0206, "step": 226875 }, { "epoch": 0.9466665553988534, "grad_norm": 0.8418450098326813, "learning_rate": 2.0557337361059376e-06, "loss": 0.0203, "step": 226880 }, { "epoch": 0.9466874181138437, "grad_norm": 1.0809274112807779, "learning_rate": 2.055711084166377e-06, "loss": 0.0298, "step": 226885 }, { "epoch": 0.946708280828834, "grad_norm": 0.8096356263403482, "learning_rate": 2.055688432975598e-06, "loss": 0.0187, "step": 226890 }, { "epoch": 0.9467291435438242, "grad_norm": 0.5731386650508085, "learning_rate": 2.0556657825335607e-06, "loss": 0.019, "step": 226895 }, { "epoch": 0.9467500062588144, "grad_norm": 0.38257251408811965, "learning_rate": 2.0556431328402233e-06, "loss": 0.0167, "step": 226900 }, { "epoch": 0.9467708689738048, "grad_norm": 0.5555376550779142, "learning_rate": 2.0556204838955444e-06, "loss": 0.0173, "step": 226905 }, { "epoch": 0.946791731688795, "grad_norm": 0.6887798396469883, "learning_rate": 2.0555978356994825e-06, "loss": 0.019, "step": 226910 }, { "epoch": 0.9468125944037853, "grad_norm": 0.8060833848152804, "learning_rate": 2.0555751882519974e-06, "loss": 0.0294, "step": 226915 }, { "epoch": 0.9468334571187756, "grad_norm": 0.64647226674387, "learning_rate": 2.055552541553047e-06, "loss": 0.0189, "step": 226920 }, { "epoch": 0.9468543198337659, "grad_norm": 0.5524577767321793, "learning_rate": 2.05552989560259e-06, "loss": 0.0218, "step": 226925 }, { "epoch": 0.9468751825487561, "grad_norm": 0.288874434052804, "learning_rate": 2.0555072504005855e-06, "loss": 0.0168, "step": 226930 }, { "epoch": 0.9468960452637465, "grad_norm": 0.8563186420652684, "learning_rate": 2.055484605946992e-06, "loss": 0.0168, "step": 226935 }, { "epoch": 0.9469169079787367, "grad_norm": 0.3020069350912351, "learning_rate": 2.055461962241769e-06, "loss": 0.0147, "step": 226940 }, { "epoch": 0.946937770693727, "grad_norm": 0.7338557348713699, "learning_rate": 2.0554393192848744e-06, "loss": 0.0238, "step": 226945 }, { "epoch": 0.9469586334087173, "grad_norm": 0.5898337316748826, "learning_rate": 2.0554166770762678e-06, "loss": 0.0169, "step": 226950 }, { "epoch": 0.9469794961237076, "grad_norm": 0.35301374918711526, "learning_rate": 2.055394035615907e-06, "loss": 0.0144, "step": 226955 }, { "epoch": 0.9470003588386978, "grad_norm": 0.5152956086240523, "learning_rate": 2.0553713949037516e-06, "loss": 0.0165, "step": 226960 }, { "epoch": 0.9470212215536881, "grad_norm": 0.8457077593749753, "learning_rate": 2.05534875493976e-06, "loss": 0.0274, "step": 226965 }, { "epoch": 0.9470420842686784, "grad_norm": 0.625673965659738, "learning_rate": 2.055326115723891e-06, "loss": 0.0208, "step": 226970 }, { "epoch": 0.9470629469836687, "grad_norm": 0.4849616086922728, "learning_rate": 2.055303477256104e-06, "loss": 0.019, "step": 226975 }, { "epoch": 0.9470838096986589, "grad_norm": 0.9082110519816354, "learning_rate": 2.055280839536357e-06, "loss": 0.0212, "step": 226980 }, { "epoch": 0.9471046724136493, "grad_norm": 0.5085430808222642, "learning_rate": 2.055258202564609e-06, "loss": 0.0183, "step": 226985 }, { "epoch": 0.9471255351286395, "grad_norm": 0.9473959544316704, "learning_rate": 2.0552355663408193e-06, "loss": 0.0277, "step": 226990 }, { "epoch": 0.9471463978436298, "grad_norm": 1.0208882632478722, "learning_rate": 2.055212930864946e-06, "loss": 0.0161, "step": 226995 }, { "epoch": 0.9471672605586201, "grad_norm": 0.6153324449846406, "learning_rate": 2.0551902961369485e-06, "loss": 0.0198, "step": 227000 }, { "epoch": 0.9471881232736103, "grad_norm": 0.44108434729634544, "learning_rate": 2.0551676621567854e-06, "loss": 0.0152, "step": 227005 }, { "epoch": 0.9472089859886006, "grad_norm": 0.5879212520571564, "learning_rate": 2.0551450289244152e-06, "loss": 0.0196, "step": 227010 }, { "epoch": 0.9472298487035908, "grad_norm": 0.7791747017500402, "learning_rate": 2.0551223964397972e-06, "loss": 0.0212, "step": 227015 }, { "epoch": 0.9472507114185812, "grad_norm": 0.40359336969574605, "learning_rate": 2.05509976470289e-06, "loss": 0.0208, "step": 227020 }, { "epoch": 0.9472715741335714, "grad_norm": 0.5343726819430945, "learning_rate": 2.0550771337136528e-06, "loss": 0.0224, "step": 227025 }, { "epoch": 0.9472924368485617, "grad_norm": 0.39747643619782214, "learning_rate": 2.0550545034720433e-06, "loss": 0.0264, "step": 227030 }, { "epoch": 0.947313299563552, "grad_norm": 0.6879257333627556, "learning_rate": 2.0550318739780216e-06, "loss": 0.0189, "step": 227035 }, { "epoch": 0.9473341622785423, "grad_norm": 0.7512893261158667, "learning_rate": 2.055009245231547e-06, "loss": 0.0197, "step": 227040 }, { "epoch": 0.9473550249935325, "grad_norm": 0.4064989968907487, "learning_rate": 2.054986617232576e-06, "loss": 0.02, "step": 227045 }, { "epoch": 0.9473758877085229, "grad_norm": 0.3131019559591351, "learning_rate": 2.05496398998107e-06, "loss": 0.0225, "step": 227050 }, { "epoch": 0.9473967504235131, "grad_norm": 2.2795431629921183, "learning_rate": 2.0549413634769857e-06, "loss": 0.0176, "step": 227055 }, { "epoch": 0.9474176131385034, "grad_norm": 0.5330199885123731, "learning_rate": 2.0549187377202835e-06, "loss": 0.0159, "step": 227060 }, { "epoch": 0.9474384758534937, "grad_norm": 0.5794075043332834, "learning_rate": 2.054896112710922e-06, "loss": 0.0235, "step": 227065 }, { "epoch": 0.947459338568484, "grad_norm": 0.5554620029923598, "learning_rate": 2.0548734884488593e-06, "loss": 0.0174, "step": 227070 }, { "epoch": 0.9474802012834742, "grad_norm": 0.40475053547217676, "learning_rate": 2.0548508649340555e-06, "loss": 0.0205, "step": 227075 }, { "epoch": 0.9475010639984645, "grad_norm": 1.201439253602133, "learning_rate": 2.054828242166468e-06, "loss": 0.0278, "step": 227080 }, { "epoch": 0.9475219267134548, "grad_norm": 1.406997877904806, "learning_rate": 2.0548056201460563e-06, "loss": 0.0259, "step": 227085 }, { "epoch": 0.947542789428445, "grad_norm": 0.6783674285691376, "learning_rate": 2.05478299887278e-06, "loss": 0.0195, "step": 227090 }, { "epoch": 0.9475636521434353, "grad_norm": 0.7258582409005577, "learning_rate": 2.054760378346597e-06, "loss": 0.0198, "step": 227095 }, { "epoch": 0.9475845148584257, "grad_norm": 0.5868904674053158, "learning_rate": 2.0547377585674663e-06, "loss": 0.0234, "step": 227100 }, { "epoch": 0.9476053775734159, "grad_norm": 0.4752194688245152, "learning_rate": 2.0547151395353472e-06, "loss": 0.0121, "step": 227105 }, { "epoch": 0.9476262402884061, "grad_norm": 1.1690027330796167, "learning_rate": 2.054692521250199e-06, "loss": 0.0235, "step": 227110 }, { "epoch": 0.9476471030033965, "grad_norm": 0.6635789755108692, "learning_rate": 2.0546699037119792e-06, "loss": 0.0242, "step": 227115 }, { "epoch": 0.9476679657183867, "grad_norm": 0.5095458169648658, "learning_rate": 2.0546472869206477e-06, "loss": 0.0158, "step": 227120 }, { "epoch": 0.947688828433377, "grad_norm": 0.44503944934329237, "learning_rate": 2.054624670876163e-06, "loss": 0.0193, "step": 227125 }, { "epoch": 0.9477096911483673, "grad_norm": 0.7731679434294324, "learning_rate": 2.0546020555784843e-06, "loss": 0.0235, "step": 227130 }, { "epoch": 0.9477305538633576, "grad_norm": 0.6457595504230524, "learning_rate": 2.05457944102757e-06, "loss": 0.0184, "step": 227135 }, { "epoch": 0.9477514165783478, "grad_norm": 0.9939879060356239, "learning_rate": 2.05455682722338e-06, "loss": 0.0205, "step": 227140 }, { "epoch": 0.9477722792933381, "grad_norm": 0.4584281710115078, "learning_rate": 2.0545342141658726e-06, "loss": 0.0207, "step": 227145 }, { "epoch": 0.9477931420083284, "grad_norm": 0.39565880557305755, "learning_rate": 2.054511601855006e-06, "loss": 0.0156, "step": 227150 }, { "epoch": 0.9478140047233187, "grad_norm": 1.4224935871385782, "learning_rate": 2.0544889902907403e-06, "loss": 0.0159, "step": 227155 }, { "epoch": 0.9478348674383089, "grad_norm": 0.32433178000740254, "learning_rate": 2.0544663794730337e-06, "loss": 0.0156, "step": 227160 }, { "epoch": 0.9478557301532993, "grad_norm": 0.32649184429277917, "learning_rate": 2.0544437694018456e-06, "loss": 0.0177, "step": 227165 }, { "epoch": 0.9478765928682895, "grad_norm": 0.3680386390546956, "learning_rate": 2.054421160077134e-06, "loss": 0.0172, "step": 227170 }, { "epoch": 0.9478974555832798, "grad_norm": 0.593805635097194, "learning_rate": 2.054398551498859e-06, "loss": 0.0261, "step": 227175 }, { "epoch": 0.9479183182982701, "grad_norm": 0.677298308859225, "learning_rate": 2.054375943666979e-06, "loss": 0.0177, "step": 227180 }, { "epoch": 0.9479391810132604, "grad_norm": 0.5285051824678553, "learning_rate": 2.054353336581453e-06, "loss": 0.0153, "step": 227185 }, { "epoch": 0.9479600437282506, "grad_norm": 0.6440699757611232, "learning_rate": 2.0543307302422394e-06, "loss": 0.0195, "step": 227190 }, { "epoch": 0.9479809064432408, "grad_norm": 0.6957054660704284, "learning_rate": 2.0543081246492976e-06, "loss": 0.0227, "step": 227195 }, { "epoch": 0.9480017691582312, "grad_norm": 0.5587236559314916, "learning_rate": 2.054285519802587e-06, "loss": 0.0258, "step": 227200 }, { "epoch": 0.9480226318732214, "grad_norm": 0.4787358227042779, "learning_rate": 2.054262915702066e-06, "loss": 0.0135, "step": 227205 }, { "epoch": 0.9480434945882117, "grad_norm": 0.3516711200328321, "learning_rate": 2.0542403123476933e-06, "loss": 0.0175, "step": 227210 }, { "epoch": 0.948064357303202, "grad_norm": 0.7028293855923418, "learning_rate": 2.0542177097394288e-06, "loss": 0.0188, "step": 227215 }, { "epoch": 0.9480852200181923, "grad_norm": 0.6399469212953706, "learning_rate": 2.0541951078772304e-06, "loss": 0.0156, "step": 227220 }, { "epoch": 0.9481060827331825, "grad_norm": 0.44132783293112826, "learning_rate": 2.0541725067610576e-06, "loss": 0.0273, "step": 227225 }, { "epoch": 0.9481269454481729, "grad_norm": 0.47896025525784824, "learning_rate": 2.0541499063908692e-06, "loss": 0.0153, "step": 227230 }, { "epoch": 0.9481478081631631, "grad_norm": 0.9797340130361312, "learning_rate": 2.0541273067666246e-06, "loss": 0.024, "step": 227235 }, { "epoch": 0.9481686708781534, "grad_norm": 0.4600786427469336, "learning_rate": 2.054104707888282e-06, "loss": 0.0198, "step": 227240 }, { "epoch": 0.9481895335931437, "grad_norm": 0.5492163047424335, "learning_rate": 2.0540821097558005e-06, "loss": 0.0186, "step": 227245 }, { "epoch": 0.948210396308134, "grad_norm": 0.8073675464472467, "learning_rate": 2.05405951236914e-06, "loss": 0.0175, "step": 227250 }, { "epoch": 0.9482312590231242, "grad_norm": 0.6629115781603835, "learning_rate": 2.054036915728259e-06, "loss": 0.0152, "step": 227255 }, { "epoch": 0.9482521217381145, "grad_norm": 0.9583964260090758, "learning_rate": 2.0540143198331155e-06, "loss": 0.0218, "step": 227260 }, { "epoch": 0.9482729844531048, "grad_norm": 1.7894638442139852, "learning_rate": 2.0539917246836697e-06, "loss": 0.0218, "step": 227265 }, { "epoch": 0.9482938471680951, "grad_norm": 0.503258310320435, "learning_rate": 2.05396913027988e-06, "loss": 0.0111, "step": 227270 }, { "epoch": 0.9483147098830853, "grad_norm": 0.4287674962701762, "learning_rate": 2.0539465366217054e-06, "loss": 0.0184, "step": 227275 }, { "epoch": 0.9483355725980757, "grad_norm": 1.1275350899420773, "learning_rate": 2.053923943709105e-06, "loss": 0.014, "step": 227280 }, { "epoch": 0.9483564353130659, "grad_norm": 0.8013996617947788, "learning_rate": 2.0539013515420383e-06, "loss": 0.0219, "step": 227285 }, { "epoch": 0.9483772980280561, "grad_norm": 0.8563229067425945, "learning_rate": 2.0538787601204637e-06, "loss": 0.0198, "step": 227290 }, { "epoch": 0.9483981607430465, "grad_norm": 0.9726823053344102, "learning_rate": 2.05385616944434e-06, "loss": 0.0296, "step": 227295 }, { "epoch": 0.9484190234580367, "grad_norm": 0.5214904557118987, "learning_rate": 2.0538335795136273e-06, "loss": 0.0326, "step": 227300 }, { "epoch": 0.948439886173027, "grad_norm": 0.7280865014827717, "learning_rate": 2.0538109903282833e-06, "loss": 0.0221, "step": 227305 }, { "epoch": 0.9484607488880173, "grad_norm": 0.6997787019547893, "learning_rate": 2.0537884018882672e-06, "loss": 0.0235, "step": 227310 }, { "epoch": 0.9484816116030076, "grad_norm": 0.6765362611782865, "learning_rate": 2.053765814193539e-06, "loss": 0.0177, "step": 227315 }, { "epoch": 0.9485024743179978, "grad_norm": 1.482985398033481, "learning_rate": 2.053743227244057e-06, "loss": 0.0264, "step": 227320 }, { "epoch": 0.9485233370329881, "grad_norm": 0.668448163093497, "learning_rate": 2.05372064103978e-06, "loss": 0.0194, "step": 227325 }, { "epoch": 0.9485441997479784, "grad_norm": 0.9122473821029388, "learning_rate": 2.0536980555806675e-06, "loss": 0.0194, "step": 227330 }, { "epoch": 0.9485650624629687, "grad_norm": 0.4513784837290406, "learning_rate": 2.0536754708666787e-06, "loss": 0.0143, "step": 227335 }, { "epoch": 0.9485859251779589, "grad_norm": 0.7133502350263077, "learning_rate": 2.053652886897772e-06, "loss": 0.0245, "step": 227340 }, { "epoch": 0.9486067878929493, "grad_norm": 0.7618976682344215, "learning_rate": 2.0536303036739067e-06, "loss": 0.0181, "step": 227345 }, { "epoch": 0.9486276506079395, "grad_norm": 0.5153161460550582, "learning_rate": 2.0536077211950417e-06, "loss": 0.0218, "step": 227350 }, { "epoch": 0.9486485133229298, "grad_norm": 1.2508462179062352, "learning_rate": 2.0535851394611363e-06, "loss": 0.0266, "step": 227355 }, { "epoch": 0.9486693760379201, "grad_norm": 0.7832061953626415, "learning_rate": 2.0535625584721493e-06, "loss": 0.0215, "step": 227360 }, { "epoch": 0.9486902387529104, "grad_norm": 1.1751217430179075, "learning_rate": 2.0535399782280406e-06, "loss": 0.0184, "step": 227365 }, { "epoch": 0.9487111014679006, "grad_norm": 0.6232329613530779, "learning_rate": 2.0535173987287682e-06, "loss": 0.0196, "step": 227370 }, { "epoch": 0.9487319641828909, "grad_norm": 0.29534210279659023, "learning_rate": 2.053494819974291e-06, "loss": 0.0207, "step": 227375 }, { "epoch": 0.9487528268978812, "grad_norm": 0.49810753246669326, "learning_rate": 2.053472241964569e-06, "loss": 0.0193, "step": 227380 }, { "epoch": 0.9487736896128715, "grad_norm": 0.4155006000533913, "learning_rate": 2.053449664699561e-06, "loss": 0.017, "step": 227385 }, { "epoch": 0.9487945523278617, "grad_norm": 0.5214500118409032, "learning_rate": 2.0534270881792255e-06, "loss": 0.0233, "step": 227390 }, { "epoch": 0.948815415042852, "grad_norm": 0.3415669647462517, "learning_rate": 2.0534045124035214e-06, "loss": 0.0147, "step": 227395 }, { "epoch": 0.9488362777578423, "grad_norm": 0.7247307512506407, "learning_rate": 2.053381937372409e-06, "loss": 0.0224, "step": 227400 }, { "epoch": 0.9488571404728325, "grad_norm": 0.6850558573767799, "learning_rate": 2.053359363085847e-06, "loss": 0.0205, "step": 227405 }, { "epoch": 0.9488780031878229, "grad_norm": 0.649385811786334, "learning_rate": 2.0533367895437933e-06, "loss": 0.0155, "step": 227410 }, { "epoch": 0.9488988659028131, "grad_norm": 0.5705922529735984, "learning_rate": 2.0533142167462087e-06, "loss": 0.0177, "step": 227415 }, { "epoch": 0.9489197286178034, "grad_norm": 0.7069168351763955, "learning_rate": 2.0532916446930505e-06, "loss": 0.0242, "step": 227420 }, { "epoch": 0.9489405913327937, "grad_norm": 0.8628596723795551, "learning_rate": 2.053269073384279e-06, "loss": 0.0195, "step": 227425 }, { "epoch": 0.948961454047784, "grad_norm": 0.4918330269777853, "learning_rate": 2.0532465028198534e-06, "loss": 0.0148, "step": 227430 }, { "epoch": 0.9489823167627742, "grad_norm": 0.5594037810176784, "learning_rate": 2.0532239329997323e-06, "loss": 0.0304, "step": 227435 }, { "epoch": 0.9490031794777645, "grad_norm": 0.44708383852187816, "learning_rate": 2.0532013639238746e-06, "loss": 0.0176, "step": 227440 }, { "epoch": 0.9490240421927548, "grad_norm": 0.8535841894518905, "learning_rate": 2.053178795592239e-06, "loss": 0.0251, "step": 227445 }, { "epoch": 0.9490449049077451, "grad_norm": 0.7545319132067941, "learning_rate": 2.053156228004786e-06, "loss": 0.0192, "step": 227450 }, { "epoch": 0.9490657676227353, "grad_norm": 0.7522563533745584, "learning_rate": 2.053133661161474e-06, "loss": 0.0234, "step": 227455 }, { "epoch": 0.9490866303377257, "grad_norm": 0.7316837040950513, "learning_rate": 2.053111095062262e-06, "loss": 0.0234, "step": 227460 }, { "epoch": 0.9491074930527159, "grad_norm": 1.0154201834921222, "learning_rate": 2.053088529707109e-06, "loss": 0.0165, "step": 227465 }, { "epoch": 0.9491283557677062, "grad_norm": 0.18989658744272778, "learning_rate": 2.0530659650959743e-06, "loss": 0.0177, "step": 227470 }, { "epoch": 0.9491492184826965, "grad_norm": 0.832823975787978, "learning_rate": 2.0530434012288173e-06, "loss": 0.0301, "step": 227475 }, { "epoch": 0.9491700811976868, "grad_norm": 0.416333500193174, "learning_rate": 2.0530208381055963e-06, "loss": 0.0191, "step": 227480 }, { "epoch": 0.949190943912677, "grad_norm": 0.6258212943458702, "learning_rate": 2.0529982757262713e-06, "loss": 0.0212, "step": 227485 }, { "epoch": 0.9492118066276672, "grad_norm": 0.37083655652703323, "learning_rate": 2.052975714090801e-06, "loss": 0.0168, "step": 227490 }, { "epoch": 0.9492326693426576, "grad_norm": 0.7333385787813855, "learning_rate": 2.0529531531991445e-06, "loss": 0.0176, "step": 227495 }, { "epoch": 0.9492535320576478, "grad_norm": 0.50944928165619, "learning_rate": 2.052930593051261e-06, "loss": 0.0225, "step": 227500 }, { "epoch": 0.9492743947726381, "grad_norm": 0.38918913763359253, "learning_rate": 2.0529080336471095e-06, "loss": 0.0179, "step": 227505 }, { "epoch": 0.9492952574876284, "grad_norm": 0.4287266925375848, "learning_rate": 2.0528854749866497e-06, "loss": 0.0135, "step": 227510 }, { "epoch": 0.9493161202026187, "grad_norm": 1.0785736746338441, "learning_rate": 2.05286291706984e-06, "loss": 0.0177, "step": 227515 }, { "epoch": 0.9493369829176089, "grad_norm": 0.4827329168739573, "learning_rate": 2.0528403598966403e-06, "loss": 0.0195, "step": 227520 }, { "epoch": 0.9493578456325993, "grad_norm": 0.3924234576226306, "learning_rate": 2.0528178034670086e-06, "loss": 0.0163, "step": 227525 }, { "epoch": 0.9493787083475895, "grad_norm": 1.2312044669676454, "learning_rate": 2.0527952477809054e-06, "loss": 0.0246, "step": 227530 }, { "epoch": 0.9493995710625798, "grad_norm": 0.6452712056140825, "learning_rate": 2.052772692838289e-06, "loss": 0.0179, "step": 227535 }, { "epoch": 0.9494204337775701, "grad_norm": 0.8335029081411155, "learning_rate": 2.0527501386391184e-06, "loss": 0.025, "step": 227540 }, { "epoch": 0.9494412964925604, "grad_norm": 0.5935935319696851, "learning_rate": 2.052727585183354e-06, "loss": 0.0264, "step": 227545 }, { "epoch": 0.9494621592075506, "grad_norm": 0.7611734993388352, "learning_rate": 2.0527050324709536e-06, "loss": 0.0248, "step": 227550 }, { "epoch": 0.9494830219225409, "grad_norm": 0.6261555631322853, "learning_rate": 2.0526824805018766e-06, "loss": 0.0146, "step": 227555 }, { "epoch": 0.9495038846375312, "grad_norm": 0.4512524095528408, "learning_rate": 2.0526599292760824e-06, "loss": 0.0133, "step": 227560 }, { "epoch": 0.9495247473525215, "grad_norm": 0.4815829704951058, "learning_rate": 2.0526373787935304e-06, "loss": 0.0171, "step": 227565 }, { "epoch": 0.9495456100675117, "grad_norm": 0.5118273829025198, "learning_rate": 2.0526148290541796e-06, "loss": 0.0163, "step": 227570 }, { "epoch": 0.9495664727825021, "grad_norm": 0.8577908464244582, "learning_rate": 2.0525922800579896e-06, "loss": 0.025, "step": 227575 }, { "epoch": 0.9495873354974923, "grad_norm": 0.7086865677758083, "learning_rate": 2.0525697318049185e-06, "loss": 0.0284, "step": 227580 }, { "epoch": 0.9496081982124825, "grad_norm": 0.5625789154918764, "learning_rate": 2.0525471842949266e-06, "loss": 0.0245, "step": 227585 }, { "epoch": 0.9496290609274729, "grad_norm": 0.9958124088216592, "learning_rate": 2.0525246375279723e-06, "loss": 0.0265, "step": 227590 }, { "epoch": 0.9496499236424631, "grad_norm": 1.0843948172600577, "learning_rate": 2.052502091504015e-06, "loss": 0.0306, "step": 227595 }, { "epoch": 0.9496707863574534, "grad_norm": 0.38296277841858384, "learning_rate": 2.052479546223014e-06, "loss": 0.0166, "step": 227600 }, { "epoch": 0.9496916490724437, "grad_norm": 0.6102077718488993, "learning_rate": 2.052457001684929e-06, "loss": 0.0179, "step": 227605 }, { "epoch": 0.949712511787434, "grad_norm": 0.4147474159147964, "learning_rate": 2.0524344578897183e-06, "loss": 0.0158, "step": 227610 }, { "epoch": 0.9497333745024242, "grad_norm": 0.9542303184819686, "learning_rate": 2.0524119148373415e-06, "loss": 0.02, "step": 227615 }, { "epoch": 0.9497542372174145, "grad_norm": 0.5633922838673415, "learning_rate": 2.0523893725277575e-06, "loss": 0.0186, "step": 227620 }, { "epoch": 0.9497750999324048, "grad_norm": 0.4828798853631678, "learning_rate": 2.0523668309609265e-06, "loss": 0.0207, "step": 227625 }, { "epoch": 0.9497959626473951, "grad_norm": 0.39314180756933886, "learning_rate": 2.052344290136806e-06, "loss": 0.0154, "step": 227630 }, { "epoch": 0.9498168253623853, "grad_norm": 0.6767576257857763, "learning_rate": 2.052321750055357e-06, "loss": 0.0126, "step": 227635 }, { "epoch": 0.9498376880773757, "grad_norm": 0.6329573476661334, "learning_rate": 2.052299210716538e-06, "loss": 0.0153, "step": 227640 }, { "epoch": 0.9498585507923659, "grad_norm": 0.501591146998753, "learning_rate": 2.0522766721203082e-06, "loss": 0.0175, "step": 227645 }, { "epoch": 0.9498794135073562, "grad_norm": 1.0302643472400788, "learning_rate": 2.052254134266626e-06, "loss": 0.0251, "step": 227650 }, { "epoch": 0.9499002762223465, "grad_norm": 0.5585938508272827, "learning_rate": 2.052231597155452e-06, "loss": 0.0189, "step": 227655 }, { "epoch": 0.9499211389373368, "grad_norm": 0.3770596054354607, "learning_rate": 2.0522090607867452e-06, "loss": 0.0136, "step": 227660 }, { "epoch": 0.949942001652327, "grad_norm": 1.3892258267052577, "learning_rate": 2.0521865251604638e-06, "loss": 0.018, "step": 227665 }, { "epoch": 0.9499628643673173, "grad_norm": 0.8186309866453929, "learning_rate": 2.052163990276568e-06, "loss": 0.025, "step": 227670 }, { "epoch": 0.9499837270823076, "grad_norm": 0.6837951251959349, "learning_rate": 2.0521414561350173e-06, "loss": 0.0203, "step": 227675 }, { "epoch": 0.9500045897972978, "grad_norm": 0.6205150628013912, "learning_rate": 2.0521189227357697e-06, "loss": 0.0233, "step": 227680 }, { "epoch": 0.9500254525122881, "grad_norm": 0.6219376341548835, "learning_rate": 2.0520963900787855e-06, "loss": 0.0191, "step": 227685 }, { "epoch": 0.9500463152272784, "grad_norm": 0.5851416773016753, "learning_rate": 2.0520738581640233e-06, "loss": 0.0167, "step": 227690 }, { "epoch": 0.9500671779422687, "grad_norm": 0.7317634572703893, "learning_rate": 2.052051326991443e-06, "loss": 0.0223, "step": 227695 }, { "epoch": 0.9500880406572589, "grad_norm": 0.7324358255984615, "learning_rate": 2.0520287965610033e-06, "loss": 0.0222, "step": 227700 }, { "epoch": 0.9501089033722493, "grad_norm": 0.5120670861177464, "learning_rate": 2.052006266872664e-06, "loss": 0.0174, "step": 227705 }, { "epoch": 0.9501297660872395, "grad_norm": 0.5921382403772217, "learning_rate": 2.0519837379263836e-06, "loss": 0.0207, "step": 227710 }, { "epoch": 0.9501506288022298, "grad_norm": 0.7877472861976799, "learning_rate": 2.051961209722122e-06, "loss": 0.0227, "step": 227715 }, { "epoch": 0.9501714915172201, "grad_norm": 0.5588210549254804, "learning_rate": 2.051938682259838e-06, "loss": 0.0164, "step": 227720 }, { "epoch": 0.9501923542322104, "grad_norm": 0.3430318828090881, "learning_rate": 2.051916155539492e-06, "loss": 0.0162, "step": 227725 }, { "epoch": 0.9502132169472006, "grad_norm": 0.596594862949166, "learning_rate": 2.051893629561042e-06, "loss": 0.0182, "step": 227730 }, { "epoch": 0.9502340796621909, "grad_norm": 0.7830905995931587, "learning_rate": 2.051871104324447e-06, "loss": 0.0165, "step": 227735 }, { "epoch": 0.9502549423771812, "grad_norm": 0.8370572229643329, "learning_rate": 2.0518485798296676e-06, "loss": 0.0172, "step": 227740 }, { "epoch": 0.9502758050921715, "grad_norm": 0.6523831901481308, "learning_rate": 2.0518260560766625e-06, "loss": 0.0134, "step": 227745 }, { "epoch": 0.9502966678071617, "grad_norm": 1.0665655954870634, "learning_rate": 2.051803533065391e-06, "loss": 0.0209, "step": 227750 }, { "epoch": 0.9503175305221521, "grad_norm": 0.7784287150222361, "learning_rate": 2.0517810107958115e-06, "loss": 0.0178, "step": 227755 }, { "epoch": 0.9503383932371423, "grad_norm": 1.2014327949689658, "learning_rate": 2.0517584892678853e-06, "loss": 0.0195, "step": 227760 }, { "epoch": 0.9503592559521326, "grad_norm": 0.7182631610238266, "learning_rate": 2.05173596848157e-06, "loss": 0.0343, "step": 227765 }, { "epoch": 0.9503801186671229, "grad_norm": 0.8870496880217812, "learning_rate": 2.0517134484368253e-06, "loss": 0.0167, "step": 227770 }, { "epoch": 0.9504009813821132, "grad_norm": 0.59830464279738, "learning_rate": 2.051690929133611e-06, "loss": 0.0219, "step": 227775 }, { "epoch": 0.9504218440971034, "grad_norm": 0.6511042045014925, "learning_rate": 2.051668410571886e-06, "loss": 0.0232, "step": 227780 }, { "epoch": 0.9504427068120938, "grad_norm": 0.6084205279074294, "learning_rate": 2.0516458927516096e-06, "loss": 0.0168, "step": 227785 }, { "epoch": 0.950463569527084, "grad_norm": 0.6254755756866445, "learning_rate": 2.051623375672741e-06, "loss": 0.019, "step": 227790 }, { "epoch": 0.9504844322420742, "grad_norm": 0.7550074145577078, "learning_rate": 2.0516008593352398e-06, "loss": 0.0255, "step": 227795 }, { "epoch": 0.9505052949570645, "grad_norm": 0.4580106574393965, "learning_rate": 2.051578343739065e-06, "loss": 0.0152, "step": 227800 }, { "epoch": 0.9505261576720548, "grad_norm": 0.4492649656140921, "learning_rate": 2.051555828884176e-06, "loss": 0.0158, "step": 227805 }, { "epoch": 0.9505470203870451, "grad_norm": 0.4467850204352953, "learning_rate": 2.051533314770533e-06, "loss": 0.0182, "step": 227810 }, { "epoch": 0.9505678831020353, "grad_norm": 0.8289230103161126, "learning_rate": 2.0515108013980943e-06, "loss": 0.0167, "step": 227815 }, { "epoch": 0.9505887458170257, "grad_norm": 0.5538933217174776, "learning_rate": 2.0514882887668185e-06, "loss": 0.0135, "step": 227820 }, { "epoch": 0.9506096085320159, "grad_norm": 0.5402505618352148, "learning_rate": 2.051465776876667e-06, "loss": 0.0156, "step": 227825 }, { "epoch": 0.9506304712470062, "grad_norm": 0.504374161717067, "learning_rate": 2.051443265727598e-06, "loss": 0.0155, "step": 227830 }, { "epoch": 0.9506513339619965, "grad_norm": 0.59487967592789, "learning_rate": 2.0514207553195707e-06, "loss": 0.022, "step": 227835 }, { "epoch": 0.9506721966769868, "grad_norm": 0.4778489660064641, "learning_rate": 2.0513982456525447e-06, "loss": 0.0216, "step": 227840 }, { "epoch": 0.950693059391977, "grad_norm": 0.4074794045480777, "learning_rate": 2.051375736726479e-06, "loss": 0.0245, "step": 227845 }, { "epoch": 0.9507139221069673, "grad_norm": 0.7265101732153321, "learning_rate": 2.051353228541334e-06, "loss": 0.0192, "step": 227850 }, { "epoch": 0.9507347848219576, "grad_norm": 0.42940326544452184, "learning_rate": 2.0513307210970675e-06, "loss": 0.0179, "step": 227855 }, { "epoch": 0.9507556475369479, "grad_norm": 0.4765469155246067, "learning_rate": 2.05130821439364e-06, "loss": 0.0188, "step": 227860 }, { "epoch": 0.9507765102519381, "grad_norm": 1.2411763242077172, "learning_rate": 2.051285708431011e-06, "loss": 0.0201, "step": 227865 }, { "epoch": 0.9507973729669285, "grad_norm": 0.5628043817109253, "learning_rate": 2.0512632032091386e-06, "loss": 0.0176, "step": 227870 }, { "epoch": 0.9508182356819187, "grad_norm": 0.43328237318900054, "learning_rate": 2.0512406987279833e-06, "loss": 0.0166, "step": 227875 }, { "epoch": 0.9508390983969089, "grad_norm": 0.7138142908864514, "learning_rate": 2.051218194987504e-06, "loss": 0.0198, "step": 227880 }, { "epoch": 0.9508599611118993, "grad_norm": 0.43949976020433645, "learning_rate": 2.051195691987661e-06, "loss": 0.0182, "step": 227885 }, { "epoch": 0.9508808238268895, "grad_norm": 0.7083264961765717, "learning_rate": 2.051173189728412e-06, "loss": 0.0218, "step": 227890 }, { "epoch": 0.9509016865418798, "grad_norm": 0.5652731258339405, "learning_rate": 2.0511506882097175e-06, "loss": 0.0188, "step": 227895 }, { "epoch": 0.9509225492568701, "grad_norm": 0.43865268992347944, "learning_rate": 2.0511281874315366e-06, "loss": 0.0188, "step": 227900 }, { "epoch": 0.9509434119718604, "grad_norm": 0.5113129891352483, "learning_rate": 2.0511056873938285e-06, "loss": 0.0158, "step": 227905 }, { "epoch": 0.9509642746868506, "grad_norm": 0.6069867482026141, "learning_rate": 2.051083188096553e-06, "loss": 0.0272, "step": 227910 }, { "epoch": 0.9509851374018409, "grad_norm": 0.43926332282412184, "learning_rate": 2.0510606895396694e-06, "loss": 0.0185, "step": 227915 }, { "epoch": 0.9510060001168312, "grad_norm": 1.213476111484669, "learning_rate": 2.0510381917231367e-06, "loss": 0.0317, "step": 227920 }, { "epoch": 0.9510268628318215, "grad_norm": 0.5070551218517039, "learning_rate": 2.051015694646915e-06, "loss": 0.021, "step": 227925 }, { "epoch": 0.9510477255468117, "grad_norm": 0.4390488803311122, "learning_rate": 2.0509931983109628e-06, "loss": 0.017, "step": 227930 }, { "epoch": 0.9510685882618021, "grad_norm": 0.6909567899470176, "learning_rate": 2.0509707027152407e-06, "loss": 0.0205, "step": 227935 }, { "epoch": 0.9510894509767923, "grad_norm": 0.9389102137858875, "learning_rate": 2.050948207859707e-06, "loss": 0.0212, "step": 227940 }, { "epoch": 0.9511103136917826, "grad_norm": 0.6369105758501893, "learning_rate": 2.050925713744321e-06, "loss": 0.0209, "step": 227945 }, { "epoch": 0.9511311764067729, "grad_norm": 0.4401732865829217, "learning_rate": 2.050903220369043e-06, "loss": 0.0211, "step": 227950 }, { "epoch": 0.9511520391217632, "grad_norm": 0.7336141812466657, "learning_rate": 2.0508807277338323e-06, "loss": 0.018, "step": 227955 }, { "epoch": 0.9511729018367534, "grad_norm": 0.45302557972184526, "learning_rate": 2.0508582358386477e-06, "loss": 0.0173, "step": 227960 }, { "epoch": 0.9511937645517438, "grad_norm": 0.6944040085203399, "learning_rate": 2.0508357446834493e-06, "loss": 0.0207, "step": 227965 }, { "epoch": 0.951214627266734, "grad_norm": 0.7344157320856732, "learning_rate": 2.050813254268196e-06, "loss": 0.0228, "step": 227970 }, { "epoch": 0.9512354899817242, "grad_norm": 0.5066034410735866, "learning_rate": 2.0507907645928475e-06, "loss": 0.0226, "step": 227975 }, { "epoch": 0.9512563526967145, "grad_norm": 0.9104956503369845, "learning_rate": 2.0507682756573625e-06, "loss": 0.0278, "step": 227980 }, { "epoch": 0.9512772154117048, "grad_norm": 0.8681516169375315, "learning_rate": 2.050745787461702e-06, "loss": 0.0254, "step": 227985 }, { "epoch": 0.9512980781266951, "grad_norm": 0.7018716784215301, "learning_rate": 2.050723300005824e-06, "loss": 0.02, "step": 227990 }, { "epoch": 0.9513189408416853, "grad_norm": 0.7842970765660126, "learning_rate": 2.0507008132896887e-06, "loss": 0.0188, "step": 227995 }, { "epoch": 0.9513398035566757, "grad_norm": 0.8775799886141147, "learning_rate": 2.0506783273132556e-06, "loss": 0.014, "step": 228000 }, { "epoch": 0.9513606662716659, "grad_norm": 0.6362391799267109, "learning_rate": 2.050655842076483e-06, "loss": 0.0209, "step": 228005 }, { "epoch": 0.9513815289866562, "grad_norm": 0.3244610752103171, "learning_rate": 2.0506333575793317e-06, "loss": 0.0179, "step": 228010 }, { "epoch": 0.9514023917016465, "grad_norm": 0.5322820221974947, "learning_rate": 2.0506108738217607e-06, "loss": 0.0194, "step": 228015 }, { "epoch": 0.9514232544166368, "grad_norm": 0.4839061675529901, "learning_rate": 2.0505883908037296e-06, "loss": 0.0191, "step": 228020 }, { "epoch": 0.951444117131627, "grad_norm": 0.6555590136087607, "learning_rate": 2.0505659085251976e-06, "loss": 0.0112, "step": 228025 }, { "epoch": 0.9514649798466173, "grad_norm": 0.6832158369204138, "learning_rate": 2.0505434269861237e-06, "loss": 0.0164, "step": 228030 }, { "epoch": 0.9514858425616076, "grad_norm": 1.3348699567862543, "learning_rate": 2.0505209461864684e-06, "loss": 0.0217, "step": 228035 }, { "epoch": 0.9515067052765979, "grad_norm": 0.5392190438500637, "learning_rate": 2.050498466126191e-06, "loss": 0.0277, "step": 228040 }, { "epoch": 0.9515275679915881, "grad_norm": 0.6177471077662595, "learning_rate": 2.0504759868052497e-06, "loss": 0.0185, "step": 228045 }, { "epoch": 0.9515484307065785, "grad_norm": 0.7731120671728615, "learning_rate": 2.0504535082236057e-06, "loss": 0.0241, "step": 228050 }, { "epoch": 0.9515692934215687, "grad_norm": 0.3333545090970718, "learning_rate": 2.0504310303812173e-06, "loss": 0.0168, "step": 228055 }, { "epoch": 0.951590156136559, "grad_norm": 0.45610815968235063, "learning_rate": 2.050408553278045e-06, "loss": 0.019, "step": 228060 }, { "epoch": 0.9516110188515493, "grad_norm": 0.7485317633378974, "learning_rate": 2.050386076914047e-06, "loss": 0.0247, "step": 228065 }, { "epoch": 0.9516318815665396, "grad_norm": 0.6008464962195396, "learning_rate": 2.0503636012891833e-06, "loss": 0.0185, "step": 228070 }, { "epoch": 0.9516527442815298, "grad_norm": 0.527012965331166, "learning_rate": 2.050341126403414e-06, "loss": 0.0191, "step": 228075 }, { "epoch": 0.9516736069965201, "grad_norm": 0.48241700299644835, "learning_rate": 2.050318652256698e-06, "loss": 0.02, "step": 228080 }, { "epoch": 0.9516944697115104, "grad_norm": 0.25493426377313566, "learning_rate": 2.0502961788489947e-06, "loss": 0.0162, "step": 228085 }, { "epoch": 0.9517153324265006, "grad_norm": 0.21173556784199796, "learning_rate": 2.050273706180264e-06, "loss": 0.0186, "step": 228090 }, { "epoch": 0.9517361951414909, "grad_norm": 0.7865810715033315, "learning_rate": 2.0502512342504655e-06, "loss": 0.0227, "step": 228095 }, { "epoch": 0.9517570578564812, "grad_norm": 0.35025242413121377, "learning_rate": 2.050228763059558e-06, "loss": 0.02, "step": 228100 }, { "epoch": 0.9517779205714715, "grad_norm": 0.44674903328183657, "learning_rate": 2.0502062926075016e-06, "loss": 0.018, "step": 228105 }, { "epoch": 0.9517987832864617, "grad_norm": 0.4121306209373707, "learning_rate": 2.0501838228942557e-06, "loss": 0.0215, "step": 228110 }, { "epoch": 0.9518196460014521, "grad_norm": 0.48258257271142013, "learning_rate": 2.0501613539197797e-06, "loss": 0.0225, "step": 228115 }, { "epoch": 0.9518405087164423, "grad_norm": 0.4754180655940035, "learning_rate": 2.0501388856840333e-06, "loss": 0.0237, "step": 228120 }, { "epoch": 0.9518613714314326, "grad_norm": 0.47298320000381416, "learning_rate": 2.050116418186976e-06, "loss": 0.016, "step": 228125 }, { "epoch": 0.9518822341464229, "grad_norm": 1.368446377491998, "learning_rate": 2.050093951428567e-06, "loss": 0.0289, "step": 228130 }, { "epoch": 0.9519030968614132, "grad_norm": 1.249613593164606, "learning_rate": 2.050071485408766e-06, "loss": 0.0217, "step": 228135 }, { "epoch": 0.9519239595764034, "grad_norm": 0.49384812535741357, "learning_rate": 2.050049020127533e-06, "loss": 0.0182, "step": 228140 }, { "epoch": 0.9519448222913938, "grad_norm": 0.4438616065793659, "learning_rate": 2.0500265555848264e-06, "loss": 0.0238, "step": 228145 }, { "epoch": 0.951965685006384, "grad_norm": 1.0691601384565923, "learning_rate": 2.0500040917806073e-06, "loss": 0.019, "step": 228150 }, { "epoch": 0.9519865477213743, "grad_norm": 0.6221345884905201, "learning_rate": 2.049981628714834e-06, "loss": 0.0234, "step": 228155 }, { "epoch": 0.9520074104363645, "grad_norm": 1.2472453756276183, "learning_rate": 2.049959166387466e-06, "loss": 0.0241, "step": 228160 }, { "epoch": 0.9520282731513549, "grad_norm": 0.8119918405480541, "learning_rate": 2.049936704798464e-06, "loss": 0.0197, "step": 228165 }, { "epoch": 0.9520491358663451, "grad_norm": 0.9592917581872599, "learning_rate": 2.049914243947787e-06, "loss": 0.0267, "step": 228170 }, { "epoch": 0.9520699985813353, "grad_norm": 1.0875914839789276, "learning_rate": 2.0498917838353934e-06, "loss": 0.0161, "step": 228175 }, { "epoch": 0.9520908612963257, "grad_norm": 0.5201468197464082, "learning_rate": 2.0498693244612443e-06, "loss": 0.0225, "step": 228180 }, { "epoch": 0.9521117240113159, "grad_norm": 0.6410303331261974, "learning_rate": 2.049846865825299e-06, "loss": 0.0166, "step": 228185 }, { "epoch": 0.9521325867263062, "grad_norm": 0.5906093277029417, "learning_rate": 2.0498244079275163e-06, "loss": 0.0219, "step": 228190 }, { "epoch": 0.9521534494412965, "grad_norm": 0.8751797896543594, "learning_rate": 2.0498019507678565e-06, "loss": 0.0177, "step": 228195 }, { "epoch": 0.9521743121562868, "grad_norm": 0.48805682238132186, "learning_rate": 2.049779494346279e-06, "loss": 0.0196, "step": 228200 }, { "epoch": 0.952195174871277, "grad_norm": 0.8488746438282051, "learning_rate": 2.049757038662743e-06, "loss": 0.0211, "step": 228205 }, { "epoch": 0.9522160375862673, "grad_norm": 0.5427037869556824, "learning_rate": 2.0497345837172084e-06, "loss": 0.0159, "step": 228210 }, { "epoch": 0.9522369003012576, "grad_norm": 0.9550062543312756, "learning_rate": 2.049712129509635e-06, "loss": 0.0212, "step": 228215 }, { "epoch": 0.9522577630162479, "grad_norm": 0.3642003225511713, "learning_rate": 2.049689676039982e-06, "loss": 0.0136, "step": 228220 }, { "epoch": 0.9522786257312381, "grad_norm": 0.9457570398410683, "learning_rate": 2.049667223308209e-06, "loss": 0.0101, "step": 228225 }, { "epoch": 0.9522994884462285, "grad_norm": 0.5653416463745718, "learning_rate": 2.0496447713142756e-06, "loss": 0.0191, "step": 228230 }, { "epoch": 0.9523203511612187, "grad_norm": 1.1150961265899335, "learning_rate": 2.0496223200581415e-06, "loss": 0.0203, "step": 228235 }, { "epoch": 0.952341213876209, "grad_norm": 0.3861801956622089, "learning_rate": 2.0495998695397663e-06, "loss": 0.0148, "step": 228240 }, { "epoch": 0.9523620765911993, "grad_norm": 0.9063652571635381, "learning_rate": 2.0495774197591094e-06, "loss": 0.0167, "step": 228245 }, { "epoch": 0.9523829393061896, "grad_norm": 0.5669060579244091, "learning_rate": 2.0495549707161305e-06, "loss": 0.0218, "step": 228250 }, { "epoch": 0.9524038020211798, "grad_norm": 0.657842170750096, "learning_rate": 2.0495325224107894e-06, "loss": 0.0329, "step": 228255 }, { "epoch": 0.9524246647361702, "grad_norm": 0.4514715427476809, "learning_rate": 2.049510074843046e-06, "loss": 0.015, "step": 228260 }, { "epoch": 0.9524455274511604, "grad_norm": 0.47904671710344915, "learning_rate": 2.049487628012859e-06, "loss": 0.0209, "step": 228265 }, { "epoch": 0.9524663901661506, "grad_norm": 0.7691554785220437, "learning_rate": 2.0494651819201885e-06, "loss": 0.0189, "step": 228270 }, { "epoch": 0.9524872528811409, "grad_norm": 0.7005302650467538, "learning_rate": 2.0494427365649937e-06, "loss": 0.0188, "step": 228275 }, { "epoch": 0.9525081155961312, "grad_norm": 0.7174391440291641, "learning_rate": 2.049420291947235e-06, "loss": 0.0172, "step": 228280 }, { "epoch": 0.9525289783111215, "grad_norm": 0.596309043967726, "learning_rate": 2.0493978480668716e-06, "loss": 0.0243, "step": 228285 }, { "epoch": 0.9525498410261117, "grad_norm": 0.5780607090527212, "learning_rate": 2.049375404923863e-06, "loss": 0.0221, "step": 228290 }, { "epoch": 0.9525707037411021, "grad_norm": 0.7796472607246396, "learning_rate": 2.0493529625181695e-06, "loss": 0.0223, "step": 228295 }, { "epoch": 0.9525915664560923, "grad_norm": 0.5404012886909795, "learning_rate": 2.04933052084975e-06, "loss": 0.0163, "step": 228300 }, { "epoch": 0.9526124291710826, "grad_norm": 0.5483484312441754, "learning_rate": 2.049308079918564e-06, "loss": 0.0186, "step": 228305 }, { "epoch": 0.9526332918860729, "grad_norm": 1.2567969450841423, "learning_rate": 2.0492856397245716e-06, "loss": 0.0253, "step": 228310 }, { "epoch": 0.9526541546010632, "grad_norm": 0.47764998113947105, "learning_rate": 2.0492632002677322e-06, "loss": 0.0153, "step": 228315 }, { "epoch": 0.9526750173160534, "grad_norm": 0.7516966501842599, "learning_rate": 2.0492407615480063e-06, "loss": 0.0158, "step": 228320 }, { "epoch": 0.9526958800310438, "grad_norm": 1.5032643140793311, "learning_rate": 2.0492183235653516e-06, "loss": 0.0163, "step": 228325 }, { "epoch": 0.952716742746034, "grad_norm": 0.6237211408573342, "learning_rate": 2.04919588631973e-06, "loss": 0.0182, "step": 228330 }, { "epoch": 0.9527376054610243, "grad_norm": 0.3611431566070225, "learning_rate": 2.0491734498111e-06, "loss": 0.0143, "step": 228335 }, { "epoch": 0.9527584681760145, "grad_norm": 0.3917762279644113, "learning_rate": 2.0491510140394207e-06, "loss": 0.0155, "step": 228340 }, { "epoch": 0.9527793308910049, "grad_norm": 0.6751549694706506, "learning_rate": 2.0491285790046528e-06, "loss": 0.0167, "step": 228345 }, { "epoch": 0.9528001936059951, "grad_norm": 0.7174220732303671, "learning_rate": 2.0491061447067557e-06, "loss": 0.0178, "step": 228350 }, { "epoch": 0.9528210563209853, "grad_norm": 0.6370816791527218, "learning_rate": 2.0490837111456887e-06, "loss": 0.0217, "step": 228355 }, { "epoch": 0.9528419190359757, "grad_norm": 0.4864681761625734, "learning_rate": 2.0490612783214117e-06, "loss": 0.0187, "step": 228360 }, { "epoch": 0.952862781750966, "grad_norm": 0.8344310241626578, "learning_rate": 2.0490388462338843e-06, "loss": 0.0228, "step": 228365 }, { "epoch": 0.9528836444659562, "grad_norm": 0.9393144420075995, "learning_rate": 2.0490164148830664e-06, "loss": 0.0172, "step": 228370 }, { "epoch": 0.9529045071809465, "grad_norm": 0.4923749829596453, "learning_rate": 2.0489939842689177e-06, "loss": 0.021, "step": 228375 }, { "epoch": 0.9529253698959368, "grad_norm": 0.8715370893803317, "learning_rate": 2.048971554391398e-06, "loss": 0.0154, "step": 228380 }, { "epoch": 0.952946232610927, "grad_norm": 0.592510975897426, "learning_rate": 2.0489491252504657e-06, "loss": 0.0249, "step": 228385 }, { "epoch": 0.9529670953259173, "grad_norm": 0.46970845249381227, "learning_rate": 2.048926696846082e-06, "loss": 0.0264, "step": 228390 }, { "epoch": 0.9529879580409076, "grad_norm": 0.7364302601918821, "learning_rate": 2.048904269178206e-06, "loss": 0.0254, "step": 228395 }, { "epoch": 0.9530088207558979, "grad_norm": 0.9688896209823128, "learning_rate": 2.048881842246798e-06, "loss": 0.0191, "step": 228400 }, { "epoch": 0.9530296834708881, "grad_norm": 1.113407062711272, "learning_rate": 2.048859416051816e-06, "loss": 0.0248, "step": 228405 }, { "epoch": 0.9530505461858785, "grad_norm": 0.4199799030331866, "learning_rate": 2.048836990593222e-06, "loss": 0.0161, "step": 228410 }, { "epoch": 0.9530714089008687, "grad_norm": 0.5312705124124875, "learning_rate": 2.0488145658709743e-06, "loss": 0.0163, "step": 228415 }, { "epoch": 0.953092271615859, "grad_norm": 0.6771340184945442, "learning_rate": 2.0487921418850325e-06, "loss": 0.0194, "step": 228420 }, { "epoch": 0.9531131343308493, "grad_norm": 0.34991639368666866, "learning_rate": 2.0487697186353567e-06, "loss": 0.0236, "step": 228425 }, { "epoch": 0.9531339970458396, "grad_norm": 0.6078792107557194, "learning_rate": 2.0487472961219067e-06, "loss": 0.021, "step": 228430 }, { "epoch": 0.9531548597608298, "grad_norm": 0.2997534284131913, "learning_rate": 2.048724874344642e-06, "loss": 0.0176, "step": 228435 }, { "epoch": 0.9531757224758202, "grad_norm": 0.5684058790443235, "learning_rate": 2.0487024533035223e-06, "loss": 0.0188, "step": 228440 }, { "epoch": 0.9531965851908104, "grad_norm": 12.091480915319652, "learning_rate": 2.048680032998508e-06, "loss": 0.0188, "step": 228445 }, { "epoch": 0.9532174479058007, "grad_norm": 0.6756268623947955, "learning_rate": 2.0486576134295574e-06, "loss": 0.016, "step": 228450 }, { "epoch": 0.9532383106207909, "grad_norm": 0.3281971902099826, "learning_rate": 2.0486351945966315e-06, "loss": 0.0209, "step": 228455 }, { "epoch": 0.9532591733357813, "grad_norm": 0.3924833559518486, "learning_rate": 2.0486127764996892e-06, "loss": 0.0224, "step": 228460 }, { "epoch": 0.9532800360507715, "grad_norm": 0.603427862592422, "learning_rate": 2.0485903591386915e-06, "loss": 0.0207, "step": 228465 }, { "epoch": 0.9533008987657617, "grad_norm": 0.8329472483209154, "learning_rate": 2.0485679425135964e-06, "loss": 0.0248, "step": 228470 }, { "epoch": 0.9533217614807521, "grad_norm": 0.9324129510265425, "learning_rate": 2.0485455266243646e-06, "loss": 0.0269, "step": 228475 }, { "epoch": 0.9533426241957423, "grad_norm": 0.6627018356581694, "learning_rate": 2.0485231114709563e-06, "loss": 0.0214, "step": 228480 }, { "epoch": 0.9533634869107326, "grad_norm": 0.4736894596549311, "learning_rate": 2.0485006970533303e-06, "loss": 0.0282, "step": 228485 }, { "epoch": 0.9533843496257229, "grad_norm": 0.4895542785650549, "learning_rate": 2.048478283371447e-06, "loss": 0.0179, "step": 228490 }, { "epoch": 0.9534052123407132, "grad_norm": 0.7151577129561876, "learning_rate": 2.0484558704252653e-06, "loss": 0.0224, "step": 228495 }, { "epoch": 0.9534260750557034, "grad_norm": 0.6825797679264878, "learning_rate": 2.048433458214746e-06, "loss": 0.0158, "step": 228500 }, { "epoch": 0.9534469377706938, "grad_norm": 0.366622575061304, "learning_rate": 2.048411046739848e-06, "loss": 0.0152, "step": 228505 }, { "epoch": 0.953467800485684, "grad_norm": 0.4136359306345275, "learning_rate": 2.0483886360005315e-06, "loss": 0.0195, "step": 228510 }, { "epoch": 0.9534886632006743, "grad_norm": 0.788686130866503, "learning_rate": 2.048366225996757e-06, "loss": 0.0268, "step": 228515 }, { "epoch": 0.9535095259156645, "grad_norm": 0.5851841589285584, "learning_rate": 2.0483438167284827e-06, "loss": 0.0197, "step": 228520 }, { "epoch": 0.9535303886306549, "grad_norm": 0.5466464759120826, "learning_rate": 2.0483214081956694e-06, "loss": 0.0147, "step": 228525 }, { "epoch": 0.9535512513456451, "grad_norm": 0.47549969417437976, "learning_rate": 2.0482990003982763e-06, "loss": 0.0166, "step": 228530 }, { "epoch": 0.9535721140606354, "grad_norm": 0.6824065503495661, "learning_rate": 2.048276593336264e-06, "loss": 0.0171, "step": 228535 }, { "epoch": 0.9535929767756257, "grad_norm": 0.8799935190466927, "learning_rate": 2.0482541870095917e-06, "loss": 0.0214, "step": 228540 }, { "epoch": 0.953613839490616, "grad_norm": 0.44323669326998383, "learning_rate": 2.048231781418219e-06, "loss": 0.0211, "step": 228545 }, { "epoch": 0.9536347022056062, "grad_norm": 2.1633337830774386, "learning_rate": 2.0482093765621057e-06, "loss": 0.0226, "step": 228550 }, { "epoch": 0.9536555649205966, "grad_norm": 1.1471927426440163, "learning_rate": 2.048186972441212e-06, "loss": 0.014, "step": 228555 }, { "epoch": 0.9536764276355868, "grad_norm": 0.5975746439459151, "learning_rate": 2.048164569055498e-06, "loss": 0.0199, "step": 228560 }, { "epoch": 0.953697290350577, "grad_norm": 0.6818488727291083, "learning_rate": 2.0481421664049226e-06, "loss": 0.0193, "step": 228565 }, { "epoch": 0.9537181530655673, "grad_norm": 0.9708548384315693, "learning_rate": 2.048119764489446e-06, "loss": 0.0223, "step": 228570 }, { "epoch": 0.9537390157805576, "grad_norm": 0.6032869955555983, "learning_rate": 2.0480973633090283e-06, "loss": 0.0182, "step": 228575 }, { "epoch": 0.9537598784955479, "grad_norm": 0.5426969891943835, "learning_rate": 2.0480749628636284e-06, "loss": 0.0171, "step": 228580 }, { "epoch": 0.9537807412105381, "grad_norm": 0.2675065350816476, "learning_rate": 2.048052563153207e-06, "loss": 0.0183, "step": 228585 }, { "epoch": 0.9538016039255285, "grad_norm": 0.564779022335379, "learning_rate": 2.048030164177724e-06, "loss": 0.0175, "step": 228590 }, { "epoch": 0.9538224666405187, "grad_norm": 0.5422843856822098, "learning_rate": 2.048007765937138e-06, "loss": 0.021, "step": 228595 }, { "epoch": 0.953843329355509, "grad_norm": 0.6057514382376507, "learning_rate": 2.0479853684314104e-06, "loss": 0.016, "step": 228600 }, { "epoch": 0.9538641920704993, "grad_norm": 0.5591205303527083, "learning_rate": 2.0479629716605e-06, "loss": 0.0198, "step": 228605 }, { "epoch": 0.9538850547854896, "grad_norm": 0.39090856482391323, "learning_rate": 2.047940575624367e-06, "loss": 0.0205, "step": 228610 }, { "epoch": 0.9539059175004798, "grad_norm": 0.6041185816026166, "learning_rate": 2.0479181803229713e-06, "loss": 0.0204, "step": 228615 }, { "epoch": 0.9539267802154702, "grad_norm": 0.5648529217175022, "learning_rate": 2.047895785756272e-06, "loss": 0.0222, "step": 228620 }, { "epoch": 0.9539476429304604, "grad_norm": 1.2182330696359513, "learning_rate": 2.04787339192423e-06, "loss": 0.0236, "step": 228625 }, { "epoch": 0.9539685056454507, "grad_norm": 0.44449719047363795, "learning_rate": 2.047850998826804e-06, "loss": 0.023, "step": 228630 }, { "epoch": 0.9539893683604409, "grad_norm": 0.278191338055002, "learning_rate": 2.0478286064639546e-06, "loss": 0.021, "step": 228635 }, { "epoch": 0.9540102310754313, "grad_norm": 0.519761066964282, "learning_rate": 2.047806214835642e-06, "loss": 0.0161, "step": 228640 }, { "epoch": 0.9540310937904215, "grad_norm": 1.0605778786897768, "learning_rate": 2.047783823941825e-06, "loss": 0.0261, "step": 228645 }, { "epoch": 0.9540519565054117, "grad_norm": 0.4600931647071999, "learning_rate": 2.0477614337824643e-06, "loss": 0.0224, "step": 228650 }, { "epoch": 0.9540728192204021, "grad_norm": 0.8653499460094596, "learning_rate": 2.047739044357519e-06, "loss": 0.0136, "step": 228655 }, { "epoch": 0.9540936819353923, "grad_norm": 0.3379531452172153, "learning_rate": 2.0477166556669498e-06, "loss": 0.0198, "step": 228660 }, { "epoch": 0.9541145446503826, "grad_norm": 0.7088681080700819, "learning_rate": 2.047694267710716e-06, "loss": 0.0167, "step": 228665 }, { "epoch": 0.954135407365373, "grad_norm": 0.6902800741430553, "learning_rate": 2.0476718804887774e-06, "loss": 0.0149, "step": 228670 }, { "epoch": 0.9541562700803632, "grad_norm": 0.5110332291022907, "learning_rate": 2.047649494001094e-06, "loss": 0.0215, "step": 228675 }, { "epoch": 0.9541771327953534, "grad_norm": 0.6254606313418718, "learning_rate": 2.0476271082476256e-06, "loss": 0.021, "step": 228680 }, { "epoch": 0.9541979955103438, "grad_norm": 0.4025077112534473, "learning_rate": 2.047604723228333e-06, "loss": 0.0216, "step": 228685 }, { "epoch": 0.954218858225334, "grad_norm": 0.6978263295291203, "learning_rate": 2.0475823389431743e-06, "loss": 0.0172, "step": 228690 }, { "epoch": 0.9542397209403243, "grad_norm": 0.5366882076076219, "learning_rate": 2.0475599553921104e-06, "loss": 0.0218, "step": 228695 }, { "epoch": 0.9542605836553145, "grad_norm": 0.7208032326129474, "learning_rate": 2.0475375725751013e-06, "loss": 0.0188, "step": 228700 }, { "epoch": 0.9542814463703049, "grad_norm": 0.42778748420575496, "learning_rate": 2.0475151904921072e-06, "loss": 0.0196, "step": 228705 }, { "epoch": 0.9543023090852951, "grad_norm": 0.7242629572566267, "learning_rate": 2.0474928091430867e-06, "loss": 0.0221, "step": 228710 }, { "epoch": 0.9543231718002854, "grad_norm": 0.8441014244884649, "learning_rate": 2.0474704285280007e-06, "loss": 0.0159, "step": 228715 }, { "epoch": 0.9543440345152757, "grad_norm": 0.9557305769554147, "learning_rate": 2.0474480486468086e-06, "loss": 0.0243, "step": 228720 }, { "epoch": 0.954364897230266, "grad_norm": 1.160114494210814, "learning_rate": 2.047425669499471e-06, "loss": 0.0183, "step": 228725 }, { "epoch": 0.9543857599452562, "grad_norm": 0.7808003295958622, "learning_rate": 2.0474032910859466e-06, "loss": 0.0247, "step": 228730 }, { "epoch": 0.9544066226602466, "grad_norm": 0.7102940829147002, "learning_rate": 2.047380913406197e-06, "loss": 0.0235, "step": 228735 }, { "epoch": 0.9544274853752368, "grad_norm": 0.8423200426088996, "learning_rate": 2.0473585364601807e-06, "loss": 0.026, "step": 228740 }, { "epoch": 0.954448348090227, "grad_norm": 0.4887211807974865, "learning_rate": 2.0473361602478577e-06, "loss": 0.011, "step": 228745 }, { "epoch": 0.9544692108052173, "grad_norm": 0.6818970378460043, "learning_rate": 2.0473137847691884e-06, "loss": 0.025, "step": 228750 }, { "epoch": 0.9544900735202076, "grad_norm": 0.3704336287186321, "learning_rate": 2.0472914100241326e-06, "loss": 0.0222, "step": 228755 }, { "epoch": 0.9545109362351979, "grad_norm": 0.7537567289104056, "learning_rate": 2.04726903601265e-06, "loss": 0.018, "step": 228760 }, { "epoch": 0.9545317989501881, "grad_norm": 0.7434250359723182, "learning_rate": 2.047246662734701e-06, "loss": 0.0226, "step": 228765 }, { "epoch": 0.9545526616651785, "grad_norm": 0.7008599223891491, "learning_rate": 2.0472242901902447e-06, "loss": 0.0221, "step": 228770 }, { "epoch": 0.9545735243801687, "grad_norm": 0.16223010250825076, "learning_rate": 2.0472019183792417e-06, "loss": 0.0234, "step": 228775 }, { "epoch": 0.954594387095159, "grad_norm": 0.4329625580476937, "learning_rate": 2.047179547301652e-06, "loss": 0.0141, "step": 228780 }, { "epoch": 0.9546152498101493, "grad_norm": 0.3792962789197846, "learning_rate": 2.0471571769574345e-06, "loss": 0.0234, "step": 228785 }, { "epoch": 0.9546361125251396, "grad_norm": 0.516328389902474, "learning_rate": 2.047134807346551e-06, "loss": 0.0208, "step": 228790 }, { "epoch": 0.9546569752401298, "grad_norm": 0.3921861214792606, "learning_rate": 2.0471124384689597e-06, "loss": 0.0227, "step": 228795 }, { "epoch": 0.9546778379551202, "grad_norm": 0.7591974478832262, "learning_rate": 2.0470900703246208e-06, "loss": 0.0204, "step": 228800 }, { "epoch": 0.9546987006701104, "grad_norm": 0.37174097214444557, "learning_rate": 2.047067702913495e-06, "loss": 0.0155, "step": 228805 }, { "epoch": 0.9547195633851007, "grad_norm": 0.7344083545528483, "learning_rate": 2.047045336235542e-06, "loss": 0.0206, "step": 228810 }, { "epoch": 0.9547404261000909, "grad_norm": 0.7000148047667307, "learning_rate": 2.0470229702907213e-06, "loss": 0.0223, "step": 228815 }, { "epoch": 0.9547612888150813, "grad_norm": 0.9102562425693905, "learning_rate": 2.0470006050789935e-06, "loss": 0.0222, "step": 228820 }, { "epoch": 0.9547821515300715, "grad_norm": 1.1884755996355554, "learning_rate": 2.0469782406003176e-06, "loss": 0.0234, "step": 228825 }, { "epoch": 0.9548030142450618, "grad_norm": 0.8664997566842627, "learning_rate": 2.046955876854655e-06, "loss": 0.0164, "step": 228830 }, { "epoch": 0.9548238769600521, "grad_norm": 0.6734877692102863, "learning_rate": 2.046933513841964e-06, "loss": 0.0224, "step": 228835 }, { "epoch": 0.9548447396750424, "grad_norm": 0.609380865925695, "learning_rate": 2.046911151562206e-06, "loss": 0.0209, "step": 228840 }, { "epoch": 0.9548656023900326, "grad_norm": 0.8344742255383981, "learning_rate": 2.04688879001534e-06, "loss": 0.0176, "step": 228845 }, { "epoch": 0.954886465105023, "grad_norm": 0.6325911636494214, "learning_rate": 2.046866429201326e-06, "loss": 0.0266, "step": 228850 }, { "epoch": 0.9549073278200132, "grad_norm": 0.6270024354954195, "learning_rate": 2.046844069120125e-06, "loss": 0.0182, "step": 228855 }, { "epoch": 0.9549281905350034, "grad_norm": 0.9298343892157662, "learning_rate": 2.046821709771696e-06, "loss": 0.0223, "step": 228860 }, { "epoch": 0.9549490532499938, "grad_norm": 0.3662302790595692, "learning_rate": 2.046799351155999e-06, "loss": 0.0161, "step": 228865 }, { "epoch": 0.954969915964984, "grad_norm": 1.0145243627888318, "learning_rate": 2.046776993272994e-06, "loss": 0.0236, "step": 228870 }, { "epoch": 0.9549907786799743, "grad_norm": 0.6236247830694545, "learning_rate": 2.0467546361226417e-06, "loss": 0.0147, "step": 228875 }, { "epoch": 0.9550116413949645, "grad_norm": 0.7164295023926189, "learning_rate": 2.046732279704902e-06, "loss": 0.0237, "step": 228880 }, { "epoch": 0.9550325041099549, "grad_norm": 0.7097599626407332, "learning_rate": 2.046709924019733e-06, "loss": 0.0152, "step": 228885 }, { "epoch": 0.9550533668249451, "grad_norm": 0.5254278109423529, "learning_rate": 2.0466875690670977e-06, "loss": 0.019, "step": 228890 }, { "epoch": 0.9550742295399354, "grad_norm": 0.3698069749341255, "learning_rate": 2.0466652148469534e-06, "loss": 0.0202, "step": 228895 }, { "epoch": 0.9550950922549257, "grad_norm": 0.6071062059346105, "learning_rate": 2.046642861359262e-06, "loss": 0.0178, "step": 228900 }, { "epoch": 0.955115954969916, "grad_norm": 0.9082260816715486, "learning_rate": 2.0466205086039826e-06, "loss": 0.0206, "step": 228905 }, { "epoch": 0.9551368176849062, "grad_norm": 0.5342139447053361, "learning_rate": 2.0465981565810753e-06, "loss": 0.0184, "step": 228910 }, { "epoch": 0.9551576803998966, "grad_norm": 0.36888490336987473, "learning_rate": 2.0465758052905e-06, "loss": 0.0168, "step": 228915 }, { "epoch": 0.9551785431148868, "grad_norm": 0.7133402062136736, "learning_rate": 2.0465534547322167e-06, "loss": 0.02, "step": 228920 }, { "epoch": 0.9551994058298771, "grad_norm": 0.535332655902633, "learning_rate": 2.0465311049061863e-06, "loss": 0.0156, "step": 228925 }, { "epoch": 0.9552202685448673, "grad_norm": 0.42384884046925947, "learning_rate": 2.0465087558123676e-06, "loss": 0.0195, "step": 228930 }, { "epoch": 0.9552411312598577, "grad_norm": 0.9576908618163987, "learning_rate": 2.0464864074507216e-06, "loss": 0.0181, "step": 228935 }, { "epoch": 0.9552619939748479, "grad_norm": 0.4367171600305575, "learning_rate": 2.0464640598212073e-06, "loss": 0.0158, "step": 228940 }, { "epoch": 0.9552828566898381, "grad_norm": 0.6302681231167311, "learning_rate": 2.0464417129237853e-06, "loss": 0.0185, "step": 228945 }, { "epoch": 0.9553037194048285, "grad_norm": 0.4728921387468435, "learning_rate": 2.046419366758416e-06, "loss": 0.0187, "step": 228950 }, { "epoch": 0.9553245821198187, "grad_norm": 0.6414261247069796, "learning_rate": 2.0463970213250587e-06, "loss": 0.0174, "step": 228955 }, { "epoch": 0.955345444834809, "grad_norm": 1.9997418396923112, "learning_rate": 2.0463746766236746e-06, "loss": 0.0308, "step": 228960 }, { "epoch": 0.9553663075497993, "grad_norm": 0.8002305579509502, "learning_rate": 2.046352332654222e-06, "loss": 0.0282, "step": 228965 }, { "epoch": 0.9553871702647896, "grad_norm": 1.0853382282714596, "learning_rate": 2.0463299894166623e-06, "loss": 0.021, "step": 228970 }, { "epoch": 0.9554080329797798, "grad_norm": 0.8271076264737243, "learning_rate": 2.046307646910955e-06, "loss": 0.0275, "step": 228975 }, { "epoch": 0.9554288956947702, "grad_norm": 0.532231227588074, "learning_rate": 2.0462853051370603e-06, "loss": 0.0189, "step": 228980 }, { "epoch": 0.9554497584097604, "grad_norm": 1.0240050119864634, "learning_rate": 2.046262964094938e-06, "loss": 0.0227, "step": 228985 }, { "epoch": 0.9554706211247507, "grad_norm": 0.1723252573815381, "learning_rate": 2.0462406237845482e-06, "loss": 0.0164, "step": 228990 }, { "epoch": 0.9554914838397409, "grad_norm": 0.4066411198855234, "learning_rate": 2.046218284205852e-06, "loss": 0.0176, "step": 228995 }, { "epoch": 0.9555123465547313, "grad_norm": 0.5067905276490849, "learning_rate": 2.046195945358808e-06, "loss": 0.0171, "step": 229000 }, { "epoch": 0.9555332092697215, "grad_norm": 0.7732948416797522, "learning_rate": 2.0461736072433768e-06, "loss": 0.0162, "step": 229005 }, { "epoch": 0.9555540719847118, "grad_norm": 0.58762218715991, "learning_rate": 2.0461512698595184e-06, "loss": 0.0157, "step": 229010 }, { "epoch": 0.9555749346997021, "grad_norm": 0.9204496548845339, "learning_rate": 2.046128933207193e-06, "loss": 0.0159, "step": 229015 }, { "epoch": 0.9555957974146924, "grad_norm": 1.8487528754935643, "learning_rate": 2.046106597286361e-06, "loss": 0.0327, "step": 229020 }, { "epoch": 0.9556166601296826, "grad_norm": 0.3522132684340158, "learning_rate": 2.046084262096982e-06, "loss": 0.0132, "step": 229025 }, { "epoch": 0.955637522844673, "grad_norm": 0.48851142136788334, "learning_rate": 2.046061927639016e-06, "loss": 0.0148, "step": 229030 }, { "epoch": 0.9556583855596632, "grad_norm": 0.20483309692385912, "learning_rate": 2.0460395939124234e-06, "loss": 0.0163, "step": 229035 }, { "epoch": 0.9556792482746534, "grad_norm": 0.7870541989515802, "learning_rate": 2.0460172609171642e-06, "loss": 0.0219, "step": 229040 }, { "epoch": 0.9557001109896438, "grad_norm": 0.41955010174212964, "learning_rate": 2.045994928653198e-06, "loss": 0.0181, "step": 229045 }, { "epoch": 0.955720973704634, "grad_norm": 0.3167340458861665, "learning_rate": 2.045972597120486e-06, "loss": 0.0186, "step": 229050 }, { "epoch": 0.9557418364196243, "grad_norm": 0.8412956982711841, "learning_rate": 2.0459502663189877e-06, "loss": 0.0194, "step": 229055 }, { "epoch": 0.9557626991346145, "grad_norm": 0.8600503820409818, "learning_rate": 2.0459279362486625e-06, "loss": 0.0243, "step": 229060 }, { "epoch": 0.9557835618496049, "grad_norm": 1.1307984932831705, "learning_rate": 2.0459056069094715e-06, "loss": 0.0199, "step": 229065 }, { "epoch": 0.9558044245645951, "grad_norm": 0.830275005122866, "learning_rate": 2.0458832783013746e-06, "loss": 0.0156, "step": 229070 }, { "epoch": 0.9558252872795854, "grad_norm": 0.5823386608388981, "learning_rate": 2.0458609504243315e-06, "loss": 0.014, "step": 229075 }, { "epoch": 0.9558461499945757, "grad_norm": 0.788704627966525, "learning_rate": 2.045838623278303e-06, "loss": 0.0186, "step": 229080 }, { "epoch": 0.955867012709566, "grad_norm": 0.45713422098531986, "learning_rate": 2.0458162968632483e-06, "loss": 0.0239, "step": 229085 }, { "epoch": 0.9558878754245562, "grad_norm": 1.0050293683368128, "learning_rate": 2.045793971179128e-06, "loss": 0.0208, "step": 229090 }, { "epoch": 0.9559087381395466, "grad_norm": 0.49836158499161476, "learning_rate": 2.0457716462259022e-06, "loss": 0.0237, "step": 229095 }, { "epoch": 0.9559296008545368, "grad_norm": 0.5142792514921927, "learning_rate": 2.0457493220035314e-06, "loss": 0.0135, "step": 229100 }, { "epoch": 0.9559504635695271, "grad_norm": 0.4509359937634856, "learning_rate": 2.0457269985119747e-06, "loss": 0.0143, "step": 229105 }, { "epoch": 0.9559713262845173, "grad_norm": 1.1954601064161032, "learning_rate": 2.045704675751193e-06, "loss": 0.0215, "step": 229110 }, { "epoch": 0.9559921889995077, "grad_norm": 0.24514790497769484, "learning_rate": 2.045682353721147e-06, "loss": 0.0167, "step": 229115 }, { "epoch": 0.9560130517144979, "grad_norm": 0.49825107514178085, "learning_rate": 2.0456600324217953e-06, "loss": 0.0163, "step": 229120 }, { "epoch": 0.9560339144294882, "grad_norm": 0.7233425271443189, "learning_rate": 2.0456377118530995e-06, "loss": 0.0148, "step": 229125 }, { "epoch": 0.9560547771444785, "grad_norm": 0.6485638271823693, "learning_rate": 2.045615392015019e-06, "loss": 0.0175, "step": 229130 }, { "epoch": 0.9560756398594688, "grad_norm": 0.7989333351317295, "learning_rate": 2.045593072907514e-06, "loss": 0.0194, "step": 229135 }, { "epoch": 0.956096502574459, "grad_norm": 0.6428533419186454, "learning_rate": 2.0455707545305445e-06, "loss": 0.0162, "step": 229140 }, { "epoch": 0.9561173652894494, "grad_norm": 0.6416277058234423, "learning_rate": 2.045548436884071e-06, "loss": 0.0187, "step": 229145 }, { "epoch": 0.9561382280044396, "grad_norm": 1.1621435363706052, "learning_rate": 2.0455261199680533e-06, "loss": 0.017, "step": 229150 }, { "epoch": 0.9561590907194298, "grad_norm": 0.8820186055891475, "learning_rate": 2.045503803782452e-06, "loss": 0.0183, "step": 229155 }, { "epoch": 0.9561799534344202, "grad_norm": 1.7990301699536102, "learning_rate": 2.0454814883272265e-06, "loss": 0.0186, "step": 229160 }, { "epoch": 0.9562008161494104, "grad_norm": 0.7480264007724035, "learning_rate": 2.045459173602338e-06, "loss": 0.0219, "step": 229165 }, { "epoch": 0.9562216788644007, "grad_norm": 1.3452793473598061, "learning_rate": 2.045436859607746e-06, "loss": 0.0271, "step": 229170 }, { "epoch": 0.9562425415793909, "grad_norm": 0.6051975668987905, "learning_rate": 2.0454145463434113e-06, "loss": 0.0181, "step": 229175 }, { "epoch": 0.9562634042943813, "grad_norm": 0.6143024392527193, "learning_rate": 2.0453922338092926e-06, "loss": 0.0173, "step": 229180 }, { "epoch": 0.9562842670093715, "grad_norm": 0.6140810815746629, "learning_rate": 2.0453699220053514e-06, "loss": 0.0161, "step": 229185 }, { "epoch": 0.9563051297243618, "grad_norm": 0.5222646562694516, "learning_rate": 2.045347610931548e-06, "loss": 0.0211, "step": 229190 }, { "epoch": 0.9563259924393521, "grad_norm": 0.8989997649073094, "learning_rate": 2.0453253005878417e-06, "loss": 0.0155, "step": 229195 }, { "epoch": 0.9563468551543424, "grad_norm": 0.7529371371614809, "learning_rate": 2.045302990974193e-06, "loss": 0.0185, "step": 229200 }, { "epoch": 0.9563677178693326, "grad_norm": 1.3237831262378728, "learning_rate": 2.0452806820905624e-06, "loss": 0.0278, "step": 229205 }, { "epoch": 0.956388580584323, "grad_norm": 0.42101497123978815, "learning_rate": 2.0452583739369093e-06, "loss": 0.0203, "step": 229210 }, { "epoch": 0.9564094432993132, "grad_norm": 0.19934884095353494, "learning_rate": 2.0452360665131953e-06, "loss": 0.0159, "step": 229215 }, { "epoch": 0.9564303060143035, "grad_norm": 0.9896014056069301, "learning_rate": 2.0452137598193794e-06, "loss": 0.0195, "step": 229220 }, { "epoch": 0.9564511687292938, "grad_norm": 0.9835001205789164, "learning_rate": 2.0451914538554217e-06, "loss": 0.0257, "step": 229225 }, { "epoch": 0.956472031444284, "grad_norm": 0.6987027756111223, "learning_rate": 2.0451691486212832e-06, "loss": 0.024, "step": 229230 }, { "epoch": 0.9564928941592743, "grad_norm": 0.5387651886269785, "learning_rate": 2.045146844116924e-06, "loss": 0.0222, "step": 229235 }, { "epoch": 0.9565137568742645, "grad_norm": 0.5786431414589973, "learning_rate": 2.045124540342304e-06, "loss": 0.0232, "step": 229240 }, { "epoch": 0.9565346195892549, "grad_norm": 0.44908649685153024, "learning_rate": 2.045102237297383e-06, "loss": 0.0158, "step": 229245 }, { "epoch": 0.9565554823042451, "grad_norm": 0.5739048568892133, "learning_rate": 2.045079934982122e-06, "loss": 0.0216, "step": 229250 }, { "epoch": 0.9565763450192354, "grad_norm": 0.6746270807422129, "learning_rate": 2.045057633396481e-06, "loss": 0.0221, "step": 229255 }, { "epoch": 0.9565972077342257, "grad_norm": 0.5078035020268695, "learning_rate": 2.0450353325404195e-06, "loss": 0.0189, "step": 229260 }, { "epoch": 0.956618070449216, "grad_norm": 0.34890266328059755, "learning_rate": 2.045013032413899e-06, "loss": 0.0137, "step": 229265 }, { "epoch": 0.9566389331642062, "grad_norm": 0.34060096980724125, "learning_rate": 2.0449907330168785e-06, "loss": 0.0172, "step": 229270 }, { "epoch": 0.9566597958791966, "grad_norm": 0.4969348545101548, "learning_rate": 2.0449684343493193e-06, "loss": 0.0202, "step": 229275 }, { "epoch": 0.9566806585941868, "grad_norm": 0.6551478632784775, "learning_rate": 2.0449461364111805e-06, "loss": 0.0281, "step": 229280 }, { "epoch": 0.9567015213091771, "grad_norm": 0.5311856609985439, "learning_rate": 2.044923839202424e-06, "loss": 0.0154, "step": 229285 }, { "epoch": 0.9567223840241673, "grad_norm": 0.36421455671730546, "learning_rate": 2.044901542723008e-06, "loss": 0.0196, "step": 229290 }, { "epoch": 0.9567432467391577, "grad_norm": 0.6325130270936936, "learning_rate": 2.0448792469728936e-06, "loss": 0.0155, "step": 229295 }, { "epoch": 0.9567641094541479, "grad_norm": 0.8944797350595021, "learning_rate": 2.044856951952042e-06, "loss": 0.0178, "step": 229300 }, { "epoch": 0.9567849721691382, "grad_norm": 0.5899601381576058, "learning_rate": 2.044834657660412e-06, "loss": 0.0228, "step": 229305 }, { "epoch": 0.9568058348841285, "grad_norm": 0.33534716062594866, "learning_rate": 2.0448123640979643e-06, "loss": 0.0155, "step": 229310 }, { "epoch": 0.9568266975991188, "grad_norm": 0.5879401882722983, "learning_rate": 2.0447900712646597e-06, "loss": 0.0212, "step": 229315 }, { "epoch": 0.956847560314109, "grad_norm": 0.412566478166861, "learning_rate": 2.044767779160458e-06, "loss": 0.0167, "step": 229320 }, { "epoch": 0.9568684230290994, "grad_norm": 0.4714011015878023, "learning_rate": 2.0447454877853194e-06, "loss": 0.0203, "step": 229325 }, { "epoch": 0.9568892857440896, "grad_norm": 1.2130777543702513, "learning_rate": 2.0447231971392042e-06, "loss": 0.0236, "step": 229330 }, { "epoch": 0.9569101484590798, "grad_norm": 0.6835385403124596, "learning_rate": 2.044700907222073e-06, "loss": 0.0202, "step": 229335 }, { "epoch": 0.9569310111740702, "grad_norm": 0.4406746771970124, "learning_rate": 2.044678618033886e-06, "loss": 0.0168, "step": 229340 }, { "epoch": 0.9569518738890604, "grad_norm": 0.9736672042882484, "learning_rate": 2.0446563295746026e-06, "loss": 0.0173, "step": 229345 }, { "epoch": 0.9569727366040507, "grad_norm": 0.5271118945781635, "learning_rate": 2.044634041844184e-06, "loss": 0.0204, "step": 229350 }, { "epoch": 0.9569935993190409, "grad_norm": 0.39072508153419655, "learning_rate": 2.04461175484259e-06, "loss": 0.0135, "step": 229355 }, { "epoch": 0.9570144620340313, "grad_norm": 0.31205347322590915, "learning_rate": 2.044589468569782e-06, "loss": 0.0235, "step": 229360 }, { "epoch": 0.9570353247490215, "grad_norm": 0.5580711486114266, "learning_rate": 2.044567183025719e-06, "loss": 0.0134, "step": 229365 }, { "epoch": 0.9570561874640118, "grad_norm": 0.599698929942634, "learning_rate": 2.044544898210361e-06, "loss": 0.0143, "step": 229370 }, { "epoch": 0.9570770501790021, "grad_norm": 0.8436162009155883, "learning_rate": 2.0445226141236695e-06, "loss": 0.0136, "step": 229375 }, { "epoch": 0.9570979128939924, "grad_norm": 0.5470192582509702, "learning_rate": 2.0445003307656045e-06, "loss": 0.0182, "step": 229380 }, { "epoch": 0.9571187756089826, "grad_norm": 0.3531698442848612, "learning_rate": 2.0444780481361255e-06, "loss": 0.0181, "step": 229385 }, { "epoch": 0.957139638323973, "grad_norm": 0.5435882952371417, "learning_rate": 2.044455766235194e-06, "loss": 0.0167, "step": 229390 }, { "epoch": 0.9571605010389632, "grad_norm": 1.68005848643681, "learning_rate": 2.044433485062769e-06, "loss": 0.0183, "step": 229395 }, { "epoch": 0.9571813637539535, "grad_norm": 0.6077565577748406, "learning_rate": 2.0444112046188116e-06, "loss": 0.0212, "step": 229400 }, { "epoch": 0.9572022264689438, "grad_norm": 1.2287291389809758, "learning_rate": 2.044388924903282e-06, "loss": 0.0223, "step": 229405 }, { "epoch": 0.9572230891839341, "grad_norm": 0.6119006942123529, "learning_rate": 2.0443666459161406e-06, "loss": 0.0198, "step": 229410 }, { "epoch": 0.9572439518989243, "grad_norm": 0.753024278837826, "learning_rate": 2.0443443676573473e-06, "loss": 0.015, "step": 229415 }, { "epoch": 0.9572648146139145, "grad_norm": 0.5602692198829244, "learning_rate": 2.044322090126863e-06, "loss": 0.0172, "step": 229420 }, { "epoch": 0.9572856773289049, "grad_norm": 0.7634285601368477, "learning_rate": 2.044299813324648e-06, "loss": 0.0156, "step": 229425 }, { "epoch": 0.9573065400438951, "grad_norm": 0.4373909715579644, "learning_rate": 2.0442775372506614e-06, "loss": 0.016, "step": 229430 }, { "epoch": 0.9573274027588854, "grad_norm": 0.5912429076564796, "learning_rate": 2.044255261904865e-06, "loss": 0.0232, "step": 229435 }, { "epoch": 0.9573482654738757, "grad_norm": 0.6206819345475415, "learning_rate": 2.044232987287219e-06, "loss": 0.0212, "step": 229440 }, { "epoch": 0.957369128188866, "grad_norm": 0.4710880617648362, "learning_rate": 2.0442107133976824e-06, "loss": 0.0161, "step": 229445 }, { "epoch": 0.9573899909038562, "grad_norm": 0.9832991412754479, "learning_rate": 2.0441884402362166e-06, "loss": 0.0285, "step": 229450 }, { "epoch": 0.9574108536188466, "grad_norm": 0.7433403479084902, "learning_rate": 2.0441661678027823e-06, "loss": 0.0161, "step": 229455 }, { "epoch": 0.9574317163338368, "grad_norm": 0.4858721935612426, "learning_rate": 2.0441438960973394e-06, "loss": 0.0149, "step": 229460 }, { "epoch": 0.9574525790488271, "grad_norm": 0.539077404900305, "learning_rate": 2.0441216251198476e-06, "loss": 0.0201, "step": 229465 }, { "epoch": 0.9574734417638173, "grad_norm": 0.6106064520591655, "learning_rate": 2.044099354870268e-06, "loss": 0.0204, "step": 229470 }, { "epoch": 0.9574943044788077, "grad_norm": 0.5966940761864858, "learning_rate": 2.0440770853485607e-06, "loss": 0.0239, "step": 229475 }, { "epoch": 0.9575151671937979, "grad_norm": 0.541489513035626, "learning_rate": 2.0440548165546868e-06, "loss": 0.0221, "step": 229480 }, { "epoch": 0.9575360299087882, "grad_norm": 0.8225623600854123, "learning_rate": 2.044032548488605e-06, "loss": 0.0237, "step": 229485 }, { "epoch": 0.9575568926237785, "grad_norm": 0.7710858915457414, "learning_rate": 2.044010281150277e-06, "loss": 0.0257, "step": 229490 }, { "epoch": 0.9575777553387688, "grad_norm": 0.49283038901885456, "learning_rate": 2.043988014539663e-06, "loss": 0.0188, "step": 229495 }, { "epoch": 0.957598618053759, "grad_norm": 0.30129645893750984, "learning_rate": 2.043965748656723e-06, "loss": 0.027, "step": 229500 }, { "epoch": 0.9576194807687494, "grad_norm": 1.4180820051614664, "learning_rate": 2.0439434835014176e-06, "loss": 0.0292, "step": 229505 }, { "epoch": 0.9576403434837396, "grad_norm": 1.0521941524938387, "learning_rate": 2.0439212190737067e-06, "loss": 0.0253, "step": 229510 }, { "epoch": 0.9576612061987299, "grad_norm": 0.232470740205194, "learning_rate": 2.0438989553735514e-06, "loss": 0.0151, "step": 229515 }, { "epoch": 0.9576820689137202, "grad_norm": 0.44849107430637597, "learning_rate": 2.0438766924009115e-06, "loss": 0.0165, "step": 229520 }, { "epoch": 0.9577029316287105, "grad_norm": 0.4282797116483974, "learning_rate": 2.0438544301557483e-06, "loss": 0.014, "step": 229525 }, { "epoch": 0.9577237943437007, "grad_norm": 0.35922538494965517, "learning_rate": 2.0438321686380204e-06, "loss": 0.0251, "step": 229530 }, { "epoch": 0.9577446570586909, "grad_norm": 0.7689741596130106, "learning_rate": 2.04380990784769e-06, "loss": 0.0226, "step": 229535 }, { "epoch": 0.9577655197736813, "grad_norm": 6.833804039392634, "learning_rate": 2.0437876477847167e-06, "loss": 0.0211, "step": 229540 }, { "epoch": 0.9577863824886715, "grad_norm": 0.6995165966049313, "learning_rate": 2.043765388449061e-06, "loss": 0.0222, "step": 229545 }, { "epoch": 0.9578072452036618, "grad_norm": 0.7555446051441069, "learning_rate": 2.043743129840683e-06, "loss": 0.0151, "step": 229550 }, { "epoch": 0.9578281079186521, "grad_norm": 0.4362662681771687, "learning_rate": 2.0437208719595435e-06, "loss": 0.0158, "step": 229555 }, { "epoch": 0.9578489706336424, "grad_norm": 0.3816102202156575, "learning_rate": 2.0436986148056027e-06, "loss": 0.0169, "step": 229560 }, { "epoch": 0.9578698333486326, "grad_norm": 0.3279109184469079, "learning_rate": 2.043676358378821e-06, "loss": 0.0179, "step": 229565 }, { "epoch": 0.957890696063623, "grad_norm": 0.4210623549699773, "learning_rate": 2.043654102679159e-06, "loss": 0.0162, "step": 229570 }, { "epoch": 0.9579115587786132, "grad_norm": 0.4095659668165835, "learning_rate": 2.043631847706577e-06, "loss": 0.0171, "step": 229575 }, { "epoch": 0.9579324214936035, "grad_norm": 0.6871992015695397, "learning_rate": 2.043609593461035e-06, "loss": 0.0164, "step": 229580 }, { "epoch": 0.9579532842085938, "grad_norm": 1.035063896080686, "learning_rate": 2.043587339942494e-06, "loss": 0.0203, "step": 229585 }, { "epoch": 0.9579741469235841, "grad_norm": 0.22591458932217154, "learning_rate": 2.0435650871509145e-06, "loss": 0.0203, "step": 229590 }, { "epoch": 0.9579950096385743, "grad_norm": 0.8305254291050064, "learning_rate": 2.0435428350862564e-06, "loss": 0.0197, "step": 229595 }, { "epoch": 0.9580158723535646, "grad_norm": 0.46047274679270933, "learning_rate": 2.0435205837484805e-06, "loss": 0.0206, "step": 229600 }, { "epoch": 0.9580367350685549, "grad_norm": 0.7491532896716273, "learning_rate": 2.0434983331375465e-06, "loss": 0.0225, "step": 229605 }, { "epoch": 0.9580575977835452, "grad_norm": 1.1148988002359776, "learning_rate": 2.0434760832534163e-06, "loss": 0.0217, "step": 229610 }, { "epoch": 0.9580784604985354, "grad_norm": 0.38976690930794433, "learning_rate": 2.0434538340960493e-06, "loss": 0.014, "step": 229615 }, { "epoch": 0.9580993232135258, "grad_norm": 0.4348714210844125, "learning_rate": 2.0434315856654053e-06, "loss": 0.0211, "step": 229620 }, { "epoch": 0.958120185928516, "grad_norm": 0.605374160045468, "learning_rate": 2.043409337961446e-06, "loss": 0.0159, "step": 229625 }, { "epoch": 0.9581410486435062, "grad_norm": 0.6031112089625843, "learning_rate": 2.0433870909841318e-06, "loss": 0.0163, "step": 229630 }, { "epoch": 0.9581619113584966, "grad_norm": 0.7132677340771207, "learning_rate": 2.043364844733422e-06, "loss": 0.0258, "step": 229635 }, { "epoch": 0.9581827740734868, "grad_norm": 1.0256875022140848, "learning_rate": 2.043342599209278e-06, "loss": 0.0174, "step": 229640 }, { "epoch": 0.9582036367884771, "grad_norm": 0.4010685727322943, "learning_rate": 2.04332035441166e-06, "loss": 0.0147, "step": 229645 }, { "epoch": 0.9582244995034673, "grad_norm": 0.4640568688870152, "learning_rate": 2.043298110340529e-06, "loss": 0.0171, "step": 229650 }, { "epoch": 0.9582453622184577, "grad_norm": 0.7873403859925963, "learning_rate": 2.0432758669958443e-06, "loss": 0.0154, "step": 229655 }, { "epoch": 0.9582662249334479, "grad_norm": 0.770819521971218, "learning_rate": 2.043253624377567e-06, "loss": 0.0174, "step": 229660 }, { "epoch": 0.9582870876484382, "grad_norm": 0.4629621045656508, "learning_rate": 2.0432313824856577e-06, "loss": 0.0222, "step": 229665 }, { "epoch": 0.9583079503634285, "grad_norm": 0.541701424067072, "learning_rate": 2.0432091413200766e-06, "loss": 0.0196, "step": 229670 }, { "epoch": 0.9583288130784188, "grad_norm": 0.6862658385375641, "learning_rate": 2.0431869008807843e-06, "loss": 0.023, "step": 229675 }, { "epoch": 0.958349675793409, "grad_norm": 0.6282791013635032, "learning_rate": 2.0431646611677415e-06, "loss": 0.0157, "step": 229680 }, { "epoch": 0.9583705385083994, "grad_norm": 0.609494810523367, "learning_rate": 2.0431424221809084e-06, "loss": 0.022, "step": 229685 }, { "epoch": 0.9583914012233896, "grad_norm": 0.6007282026086203, "learning_rate": 2.0431201839202454e-06, "loss": 0.021, "step": 229690 }, { "epoch": 0.9584122639383799, "grad_norm": 0.5552857353042693, "learning_rate": 2.043097946385713e-06, "loss": 0.015, "step": 229695 }, { "epoch": 0.9584331266533702, "grad_norm": 0.9220472614791605, "learning_rate": 2.0430757095772714e-06, "loss": 0.0194, "step": 229700 }, { "epoch": 0.9584539893683605, "grad_norm": 0.33312828189589916, "learning_rate": 2.0430534734948825e-06, "loss": 0.0216, "step": 229705 }, { "epoch": 0.9584748520833507, "grad_norm": 0.580025935146549, "learning_rate": 2.043031238138505e-06, "loss": 0.0163, "step": 229710 }, { "epoch": 0.958495714798341, "grad_norm": 0.5532639451893977, "learning_rate": 2.0430090035081002e-06, "loss": 0.0182, "step": 229715 }, { "epoch": 0.9585165775133313, "grad_norm": 0.32879243199696356, "learning_rate": 2.0429867696036286e-06, "loss": 0.0168, "step": 229720 }, { "epoch": 0.9585374402283215, "grad_norm": 0.6478176305976813, "learning_rate": 2.0429645364250507e-06, "loss": 0.0227, "step": 229725 }, { "epoch": 0.9585583029433118, "grad_norm": 0.38002661152054873, "learning_rate": 2.0429423039723267e-06, "loss": 0.0293, "step": 229730 }, { "epoch": 0.9585791656583021, "grad_norm": 0.4437920479362384, "learning_rate": 2.042920072245418e-06, "loss": 0.0169, "step": 229735 }, { "epoch": 0.9586000283732924, "grad_norm": 0.3896679092687204, "learning_rate": 2.042897841244284e-06, "loss": 0.022, "step": 229740 }, { "epoch": 0.9586208910882826, "grad_norm": 0.7564833090216249, "learning_rate": 2.0428756109688856e-06, "loss": 0.02, "step": 229745 }, { "epoch": 0.958641753803273, "grad_norm": 0.6074911871169963, "learning_rate": 2.042853381419183e-06, "loss": 0.0175, "step": 229750 }, { "epoch": 0.9586626165182632, "grad_norm": 0.4618117522020944, "learning_rate": 2.042831152595138e-06, "loss": 0.0184, "step": 229755 }, { "epoch": 0.9586834792332535, "grad_norm": 0.2503908962061876, "learning_rate": 2.0428089244967098e-06, "loss": 0.0153, "step": 229760 }, { "epoch": 0.9587043419482437, "grad_norm": 0.633704754385471, "learning_rate": 2.0427866971238596e-06, "loss": 0.0177, "step": 229765 }, { "epoch": 0.9587252046632341, "grad_norm": 0.5295640862645558, "learning_rate": 2.042764470476547e-06, "loss": 0.0283, "step": 229770 }, { "epoch": 0.9587460673782243, "grad_norm": 0.6001317951301739, "learning_rate": 2.0427422445547336e-06, "loss": 0.0224, "step": 229775 }, { "epoch": 0.9587669300932146, "grad_norm": 1.386258752158993, "learning_rate": 2.04272001935838e-06, "loss": 0.0246, "step": 229780 }, { "epoch": 0.9587877928082049, "grad_norm": 1.0088799839041844, "learning_rate": 2.042697794887446e-06, "loss": 0.0275, "step": 229785 }, { "epoch": 0.9588086555231952, "grad_norm": 0.5955801274634243, "learning_rate": 2.042675571141892e-06, "loss": 0.0227, "step": 229790 }, { "epoch": 0.9588295182381854, "grad_norm": 0.8239767186680578, "learning_rate": 2.042653348121679e-06, "loss": 0.0221, "step": 229795 }, { "epoch": 0.9588503809531758, "grad_norm": 0.7077275481804021, "learning_rate": 2.042631125826768e-06, "loss": 0.0163, "step": 229800 }, { "epoch": 0.958871243668166, "grad_norm": 0.9709881510785354, "learning_rate": 2.042608904257119e-06, "loss": 0.0182, "step": 229805 }, { "epoch": 0.9588921063831563, "grad_norm": 0.8164364483904675, "learning_rate": 2.0425866834126916e-06, "loss": 0.0178, "step": 229810 }, { "epoch": 0.9589129690981466, "grad_norm": 0.5471161688164088, "learning_rate": 2.0425644632934487e-06, "loss": 0.0728, "step": 229815 }, { "epoch": 0.9589338318131368, "grad_norm": 0.7516813061539307, "learning_rate": 2.0425422438993486e-06, "loss": 0.0188, "step": 229820 }, { "epoch": 0.9589546945281271, "grad_norm": 0.5566818561574827, "learning_rate": 2.042520025230353e-06, "loss": 0.0155, "step": 229825 }, { "epoch": 0.9589755572431173, "grad_norm": 0.9476152091442073, "learning_rate": 2.042497807286422e-06, "loss": 0.0182, "step": 229830 }, { "epoch": 0.9589964199581077, "grad_norm": 0.8166062489152457, "learning_rate": 2.042475590067517e-06, "loss": 0.0197, "step": 229835 }, { "epoch": 0.9590172826730979, "grad_norm": 0.41954194596134825, "learning_rate": 2.0424533735735972e-06, "loss": 0.0199, "step": 229840 }, { "epoch": 0.9590381453880882, "grad_norm": 0.4057310831193222, "learning_rate": 2.0424311578046245e-06, "loss": 0.0195, "step": 229845 }, { "epoch": 0.9590590081030785, "grad_norm": 0.7350580782062337, "learning_rate": 2.0424089427605587e-06, "loss": 0.0214, "step": 229850 }, { "epoch": 0.9590798708180688, "grad_norm": 1.0175057488661663, "learning_rate": 2.0423867284413608e-06, "loss": 0.0293, "step": 229855 }, { "epoch": 0.959100733533059, "grad_norm": 0.6996220265026795, "learning_rate": 2.042364514846991e-06, "loss": 0.017, "step": 229860 }, { "epoch": 0.9591215962480494, "grad_norm": 0.42436200989736567, "learning_rate": 2.04234230197741e-06, "loss": 0.0221, "step": 229865 }, { "epoch": 0.9591424589630396, "grad_norm": 0.7385160798124831, "learning_rate": 2.0423200898325786e-06, "loss": 0.0196, "step": 229870 }, { "epoch": 0.9591633216780299, "grad_norm": 0.4423438998125427, "learning_rate": 2.0422978784124572e-06, "loss": 0.0188, "step": 229875 }, { "epoch": 0.9591841843930202, "grad_norm": 0.6901359018036702, "learning_rate": 2.042275667717006e-06, "loss": 0.0183, "step": 229880 }, { "epoch": 0.9592050471080105, "grad_norm": 0.7760565061130213, "learning_rate": 2.0422534577461864e-06, "loss": 0.0182, "step": 229885 }, { "epoch": 0.9592259098230007, "grad_norm": 0.6332083112342065, "learning_rate": 2.0422312484999586e-06, "loss": 0.0259, "step": 229890 }, { "epoch": 0.959246772537991, "grad_norm": 0.6525313629311148, "learning_rate": 2.0422090399782832e-06, "loss": 0.0153, "step": 229895 }, { "epoch": 0.9592676352529813, "grad_norm": 0.2247070266553884, "learning_rate": 2.042186832181121e-06, "loss": 0.015, "step": 229900 }, { "epoch": 0.9592884979679716, "grad_norm": 1.0271723154307517, "learning_rate": 2.042164625108432e-06, "loss": 0.0187, "step": 229905 }, { "epoch": 0.9593093606829618, "grad_norm": 0.2899240010910496, "learning_rate": 2.0421424187601776e-06, "loss": 0.0116, "step": 229910 }, { "epoch": 0.9593302233979522, "grad_norm": 0.3635036201837215, "learning_rate": 2.0421202131363175e-06, "loss": 0.0225, "step": 229915 }, { "epoch": 0.9593510861129424, "grad_norm": 1.681696698610615, "learning_rate": 2.0420980082368138e-06, "loss": 0.0214, "step": 229920 }, { "epoch": 0.9593719488279326, "grad_norm": 0.5662784177485249, "learning_rate": 2.0420758040616253e-06, "loss": 0.0203, "step": 229925 }, { "epoch": 0.959392811542923, "grad_norm": 0.38869439862080885, "learning_rate": 2.042053600610714e-06, "loss": 0.0165, "step": 229930 }, { "epoch": 0.9594136742579132, "grad_norm": 0.5313432932620061, "learning_rate": 2.0420313978840398e-06, "loss": 0.0214, "step": 229935 }, { "epoch": 0.9594345369729035, "grad_norm": 0.2510228643436948, "learning_rate": 2.0420091958815636e-06, "loss": 0.016, "step": 229940 }, { "epoch": 0.9594553996878937, "grad_norm": 0.9391166863530762, "learning_rate": 2.041986994603246e-06, "loss": 0.0424, "step": 229945 }, { "epoch": 0.9594762624028841, "grad_norm": 0.7865149479961752, "learning_rate": 2.041964794049048e-06, "loss": 0.0173, "step": 229950 }, { "epoch": 0.9594971251178743, "grad_norm": 1.1402192554058996, "learning_rate": 2.0419425942189292e-06, "loss": 0.0258, "step": 229955 }, { "epoch": 0.9595179878328646, "grad_norm": 0.7349996237683036, "learning_rate": 2.0419203951128513e-06, "loss": 0.0176, "step": 229960 }, { "epoch": 0.9595388505478549, "grad_norm": 0.4076733315945651, "learning_rate": 2.0418981967307744e-06, "loss": 0.0177, "step": 229965 }, { "epoch": 0.9595597132628452, "grad_norm": 0.6425521148338262, "learning_rate": 2.0418759990726595e-06, "loss": 0.0262, "step": 229970 }, { "epoch": 0.9595805759778354, "grad_norm": 0.5494746868289931, "learning_rate": 2.041853802138467e-06, "loss": 0.0235, "step": 229975 }, { "epoch": 0.9596014386928258, "grad_norm": 0.6667536194027274, "learning_rate": 2.041831605928158e-06, "loss": 0.0226, "step": 229980 }, { "epoch": 0.959622301407816, "grad_norm": 0.9264481041683783, "learning_rate": 2.041809410441692e-06, "loss": 0.0321, "step": 229985 }, { "epoch": 0.9596431641228063, "grad_norm": 0.5501701542045031, "learning_rate": 2.0417872156790307e-06, "loss": 0.0212, "step": 229990 }, { "epoch": 0.9596640268377966, "grad_norm": 1.0336479887046441, "learning_rate": 2.0417650216401343e-06, "loss": 0.0193, "step": 229995 }, { "epoch": 0.9596848895527869, "grad_norm": 0.3663499830670154, "learning_rate": 2.0417428283249636e-06, "loss": 0.0203, "step": 230000 }, { "epoch": 0.9597057522677771, "grad_norm": 0.3945599608261717, "learning_rate": 2.0417206357334796e-06, "loss": 0.0192, "step": 230005 }, { "epoch": 0.9597266149827673, "grad_norm": 0.45039006229170825, "learning_rate": 2.0416984438656425e-06, "loss": 0.026, "step": 230010 }, { "epoch": 0.9597474776977577, "grad_norm": 0.5324636193161245, "learning_rate": 2.0416762527214134e-06, "loss": 0.0196, "step": 230015 }, { "epoch": 0.959768340412748, "grad_norm": 0.5330499946204859, "learning_rate": 2.041654062300753e-06, "loss": 0.02, "step": 230020 }, { "epoch": 0.9597892031277382, "grad_norm": 0.4262909895294801, "learning_rate": 2.041631872603621e-06, "loss": 0.0191, "step": 230025 }, { "epoch": 0.9598100658427285, "grad_norm": 1.2094974737479713, "learning_rate": 2.0416096836299788e-06, "loss": 0.0226, "step": 230030 }, { "epoch": 0.9598309285577188, "grad_norm": 0.5041677194672719, "learning_rate": 2.0415874953797873e-06, "loss": 0.0147, "step": 230035 }, { "epoch": 0.959851791272709, "grad_norm": 0.2998449604041129, "learning_rate": 2.041565307853007e-06, "loss": 0.0163, "step": 230040 }, { "epoch": 0.9598726539876994, "grad_norm": 0.3721712801242595, "learning_rate": 2.0415431210495987e-06, "loss": 0.0168, "step": 230045 }, { "epoch": 0.9598935167026896, "grad_norm": 0.5014694246382729, "learning_rate": 2.041520934969523e-06, "loss": 0.0237, "step": 230050 }, { "epoch": 0.9599143794176799, "grad_norm": 0.6222519881315617, "learning_rate": 2.0414987496127405e-06, "loss": 0.0184, "step": 230055 }, { "epoch": 0.9599352421326702, "grad_norm": 0.6809044461790794, "learning_rate": 2.0414765649792116e-06, "loss": 0.0155, "step": 230060 }, { "epoch": 0.9599561048476605, "grad_norm": 0.6798551684816111, "learning_rate": 2.041454381068898e-06, "loss": 0.0183, "step": 230065 }, { "epoch": 0.9599769675626507, "grad_norm": 2.207731253530779, "learning_rate": 2.041432197881759e-06, "loss": 0.0162, "step": 230070 }, { "epoch": 0.959997830277641, "grad_norm": 0.3689419978855289, "learning_rate": 2.0414100154177563e-06, "loss": 0.0517, "step": 230075 }, { "epoch": 0.9600186929926313, "grad_norm": 0.6287695691904086, "learning_rate": 2.0413878336768508e-06, "loss": 0.0228, "step": 230080 }, { "epoch": 0.9600395557076216, "grad_norm": 0.5310685169324711, "learning_rate": 2.041365652659002e-06, "loss": 0.0224, "step": 230085 }, { "epoch": 0.9600604184226118, "grad_norm": 1.0137211232324732, "learning_rate": 2.041343472364172e-06, "loss": 0.0168, "step": 230090 }, { "epoch": 0.9600812811376022, "grad_norm": 0.6666577572831458, "learning_rate": 2.041321292792321e-06, "loss": 0.0179, "step": 230095 }, { "epoch": 0.9601021438525924, "grad_norm": 0.6203729901083883, "learning_rate": 2.0412991139434095e-06, "loss": 0.0202, "step": 230100 }, { "epoch": 0.9601230065675826, "grad_norm": 0.5581423746760489, "learning_rate": 2.0412769358173984e-06, "loss": 0.0289, "step": 230105 }, { "epoch": 0.960143869282573, "grad_norm": 0.4777886925464109, "learning_rate": 2.0412547584142485e-06, "loss": 0.0217, "step": 230110 }, { "epoch": 0.9601647319975632, "grad_norm": 0.3193343678936277, "learning_rate": 2.04123258173392e-06, "loss": 0.0115, "step": 230115 }, { "epoch": 0.9601855947125535, "grad_norm": 0.9523474155958637, "learning_rate": 2.0412104057763747e-06, "loss": 0.0185, "step": 230120 }, { "epoch": 0.9602064574275437, "grad_norm": 0.7963625426174674, "learning_rate": 2.0411882305415724e-06, "loss": 0.0259, "step": 230125 }, { "epoch": 0.9602273201425341, "grad_norm": 0.3339449808903376, "learning_rate": 2.041166056029474e-06, "loss": 0.023, "step": 230130 }, { "epoch": 0.9602481828575243, "grad_norm": 0.6304227544279173, "learning_rate": 2.041143882240041e-06, "loss": 0.0195, "step": 230135 }, { "epoch": 0.9602690455725146, "grad_norm": 0.56034485973528, "learning_rate": 2.041121709173233e-06, "loss": 0.0212, "step": 230140 }, { "epoch": 0.9602899082875049, "grad_norm": 1.0420778998216869, "learning_rate": 2.0410995368290117e-06, "loss": 0.0239, "step": 230145 }, { "epoch": 0.9603107710024952, "grad_norm": 0.6144981083848583, "learning_rate": 2.041077365207337e-06, "loss": 0.0245, "step": 230150 }, { "epoch": 0.9603316337174854, "grad_norm": 0.6271889367882907, "learning_rate": 2.04105519430817e-06, "loss": 0.0219, "step": 230155 }, { "epoch": 0.9603524964324758, "grad_norm": 0.5982294701618431, "learning_rate": 2.0410330241314726e-06, "loss": 0.014, "step": 230160 }, { "epoch": 0.960373359147466, "grad_norm": 0.6772969007335501, "learning_rate": 2.0410108546772035e-06, "loss": 0.0156, "step": 230165 }, { "epoch": 0.9603942218624563, "grad_norm": 0.5579222997218027, "learning_rate": 2.040988685945325e-06, "loss": 0.0252, "step": 230170 }, { "epoch": 0.9604150845774466, "grad_norm": 0.5427369581653846, "learning_rate": 2.0409665179357974e-06, "loss": 0.0206, "step": 230175 }, { "epoch": 0.9604359472924369, "grad_norm": 0.38636581835101313, "learning_rate": 2.040944350648581e-06, "loss": 0.0179, "step": 230180 }, { "epoch": 0.9604568100074271, "grad_norm": 0.6885185358656102, "learning_rate": 2.0409221840836375e-06, "loss": 0.022, "step": 230185 }, { "epoch": 0.9604776727224174, "grad_norm": 0.3794782378679965, "learning_rate": 2.0409000182409276e-06, "loss": 0.0283, "step": 230190 }, { "epoch": 0.9604985354374077, "grad_norm": 0.6701566719275917, "learning_rate": 2.040877853120411e-06, "loss": 0.0237, "step": 230195 }, { "epoch": 0.960519398152398, "grad_norm": 0.5109725812755139, "learning_rate": 2.0408556887220494e-06, "loss": 0.017, "step": 230200 }, { "epoch": 0.9605402608673882, "grad_norm": 0.39832631384458117, "learning_rate": 2.0408335250458033e-06, "loss": 0.0206, "step": 230205 }, { "epoch": 0.9605611235823786, "grad_norm": 0.7222605576119785, "learning_rate": 2.0408113620916337e-06, "loss": 0.0187, "step": 230210 }, { "epoch": 0.9605819862973688, "grad_norm": 0.6157856607672544, "learning_rate": 2.040789199859501e-06, "loss": 0.0227, "step": 230215 }, { "epoch": 0.960602849012359, "grad_norm": 0.9177978590289585, "learning_rate": 2.0407670383493664e-06, "loss": 0.0263, "step": 230220 }, { "epoch": 0.9606237117273494, "grad_norm": 0.4106180314454486, "learning_rate": 2.0407448775611905e-06, "loss": 0.0192, "step": 230225 }, { "epoch": 0.9606445744423396, "grad_norm": 0.8701740177422106, "learning_rate": 2.040722717494934e-06, "loss": 0.0164, "step": 230230 }, { "epoch": 0.9606654371573299, "grad_norm": 0.4945625362368681, "learning_rate": 2.040700558150558e-06, "loss": 0.0254, "step": 230235 }, { "epoch": 0.9606862998723202, "grad_norm": 1.0774380575246896, "learning_rate": 2.040678399528023e-06, "loss": 0.0346, "step": 230240 }, { "epoch": 0.9607071625873105, "grad_norm": 0.8114483035896204, "learning_rate": 2.0406562416272903e-06, "loss": 0.0163, "step": 230245 }, { "epoch": 0.9607280253023007, "grad_norm": 0.7538994938036434, "learning_rate": 2.04063408444832e-06, "loss": 0.0199, "step": 230250 }, { "epoch": 0.960748888017291, "grad_norm": 0.2928441526285125, "learning_rate": 2.0406119279910736e-06, "loss": 0.0181, "step": 230255 }, { "epoch": 0.9607697507322813, "grad_norm": 0.8106404799309755, "learning_rate": 2.0405897722555115e-06, "loss": 0.0179, "step": 230260 }, { "epoch": 0.9607906134472716, "grad_norm": 0.5799451559468102, "learning_rate": 2.0405676172415947e-06, "loss": 0.0173, "step": 230265 }, { "epoch": 0.9608114761622618, "grad_norm": 0.7363217610668402, "learning_rate": 2.040545462949284e-06, "loss": 0.0263, "step": 230270 }, { "epoch": 0.9608323388772522, "grad_norm": 0.3905999468404815, "learning_rate": 2.04052330937854e-06, "loss": 0.0194, "step": 230275 }, { "epoch": 0.9608532015922424, "grad_norm": 0.42179542946675214, "learning_rate": 2.0405011565293233e-06, "loss": 0.0175, "step": 230280 }, { "epoch": 0.9608740643072327, "grad_norm": 0.7427067954463765, "learning_rate": 2.040479004401596e-06, "loss": 0.0175, "step": 230285 }, { "epoch": 0.960894927022223, "grad_norm": 0.484414555225824, "learning_rate": 2.0404568529953175e-06, "loss": 0.0199, "step": 230290 }, { "epoch": 0.9609157897372133, "grad_norm": 0.5361625682069995, "learning_rate": 2.0404347023104497e-06, "loss": 0.0179, "step": 230295 }, { "epoch": 0.9609366524522035, "grad_norm": 0.33719763019348503, "learning_rate": 2.040412552346953e-06, "loss": 0.0222, "step": 230300 }, { "epoch": 0.9609575151671937, "grad_norm": 1.1739797705841717, "learning_rate": 2.0403904031047873e-06, "loss": 0.0252, "step": 230305 }, { "epoch": 0.9609783778821841, "grad_norm": 0.5122638256961777, "learning_rate": 2.0403682545839155e-06, "loss": 0.0131, "step": 230310 }, { "epoch": 0.9609992405971743, "grad_norm": 0.44794361133925537, "learning_rate": 2.040346106784297e-06, "loss": 0.0228, "step": 230315 }, { "epoch": 0.9610201033121646, "grad_norm": 0.43878138739359995, "learning_rate": 2.0403239597058927e-06, "loss": 0.0243, "step": 230320 }, { "epoch": 0.9610409660271549, "grad_norm": 0.7793801200341305, "learning_rate": 2.0403018133486636e-06, "loss": 0.0215, "step": 230325 }, { "epoch": 0.9610618287421452, "grad_norm": 1.061783342427637, "learning_rate": 2.0402796677125713e-06, "loss": 0.0261, "step": 230330 }, { "epoch": 0.9610826914571354, "grad_norm": 1.3761349969281433, "learning_rate": 2.0402575227975756e-06, "loss": 0.0252, "step": 230335 }, { "epoch": 0.9611035541721258, "grad_norm": 0.7188212444597076, "learning_rate": 2.0402353786036382e-06, "loss": 0.0195, "step": 230340 }, { "epoch": 0.961124416887116, "grad_norm": 0.6943763395962147, "learning_rate": 2.04021323513072e-06, "loss": 0.0226, "step": 230345 }, { "epoch": 0.9611452796021063, "grad_norm": 0.381266460698675, "learning_rate": 2.0401910923787807e-06, "loss": 0.0196, "step": 230350 }, { "epoch": 0.9611661423170966, "grad_norm": 0.43890291775940005, "learning_rate": 2.040168950347782e-06, "loss": 0.0222, "step": 230355 }, { "epoch": 0.9611870050320869, "grad_norm": 0.7651498166142676, "learning_rate": 2.0401468090376845e-06, "loss": 0.0208, "step": 230360 }, { "epoch": 0.9612078677470771, "grad_norm": 0.7294899261946449, "learning_rate": 2.04012466844845e-06, "loss": 0.0197, "step": 230365 }, { "epoch": 0.9612287304620674, "grad_norm": 0.3064543149355476, "learning_rate": 2.0401025285800384e-06, "loss": 0.0147, "step": 230370 }, { "epoch": 0.9612495931770577, "grad_norm": 0.7163723865911985, "learning_rate": 2.040080389432411e-06, "loss": 0.0212, "step": 230375 }, { "epoch": 0.961270455892048, "grad_norm": 0.2097690704189643, "learning_rate": 2.0400582510055286e-06, "loss": 0.0139, "step": 230380 }, { "epoch": 0.9612913186070382, "grad_norm": 0.6339869973573335, "learning_rate": 2.040036113299352e-06, "loss": 0.0184, "step": 230385 }, { "epoch": 0.9613121813220286, "grad_norm": 0.5985831567312123, "learning_rate": 2.040013976313842e-06, "loss": 0.0163, "step": 230390 }, { "epoch": 0.9613330440370188, "grad_norm": 0.5977481225352441, "learning_rate": 2.0399918400489598e-06, "loss": 0.0217, "step": 230395 }, { "epoch": 0.961353906752009, "grad_norm": 0.47935981667467814, "learning_rate": 2.0399697045046666e-06, "loss": 0.0162, "step": 230400 }, { "epoch": 0.9613747694669994, "grad_norm": 0.8582481865665481, "learning_rate": 2.039947569680922e-06, "loss": 0.0223, "step": 230405 }, { "epoch": 0.9613956321819896, "grad_norm": 0.286312768781447, "learning_rate": 2.039925435577688e-06, "loss": 0.0159, "step": 230410 }, { "epoch": 0.9614164948969799, "grad_norm": 0.46762032996561004, "learning_rate": 2.0399033021949256e-06, "loss": 0.0187, "step": 230415 }, { "epoch": 0.9614373576119702, "grad_norm": 1.0688680559154116, "learning_rate": 2.0398811695325956e-06, "loss": 0.0209, "step": 230420 }, { "epoch": 0.9614582203269605, "grad_norm": 0.7074037588435389, "learning_rate": 2.0398590375906586e-06, "loss": 0.021, "step": 230425 }, { "epoch": 0.9614790830419507, "grad_norm": 1.1475123177147348, "learning_rate": 2.0398369063690757e-06, "loss": 0.02, "step": 230430 }, { "epoch": 0.961499945756941, "grad_norm": 0.6016658910186982, "learning_rate": 2.0398147758678076e-06, "loss": 0.0244, "step": 230435 }, { "epoch": 0.9615208084719313, "grad_norm": 0.7955321257846119, "learning_rate": 2.0397926460868155e-06, "loss": 0.0254, "step": 230440 }, { "epoch": 0.9615416711869216, "grad_norm": 0.9117513354053259, "learning_rate": 2.03977051702606e-06, "loss": 0.0222, "step": 230445 }, { "epoch": 0.9615625339019118, "grad_norm": 0.715328703697228, "learning_rate": 2.039748388685503e-06, "loss": 0.0311, "step": 230450 }, { "epoch": 0.9615833966169022, "grad_norm": 0.88448988604064, "learning_rate": 2.039726261065104e-06, "loss": 0.0201, "step": 230455 }, { "epoch": 0.9616042593318924, "grad_norm": 0.5091840101847548, "learning_rate": 2.0397041341648247e-06, "loss": 0.0148, "step": 230460 }, { "epoch": 0.9616251220468827, "grad_norm": 1.1248947546537782, "learning_rate": 2.039682007984626e-06, "loss": 0.0188, "step": 230465 }, { "epoch": 0.961645984761873, "grad_norm": 0.7530095449577284, "learning_rate": 2.039659882524469e-06, "loss": 0.02, "step": 230470 }, { "epoch": 0.9616668474768633, "grad_norm": 0.30061548575421027, "learning_rate": 2.0396377577843144e-06, "loss": 0.0241, "step": 230475 }, { "epoch": 0.9616877101918535, "grad_norm": 0.346431749360048, "learning_rate": 2.039615633764123e-06, "loss": 0.0101, "step": 230480 }, { "epoch": 0.9617085729068438, "grad_norm": 1.091164624526997, "learning_rate": 2.0395935104638563e-06, "loss": 0.0193, "step": 230485 }, { "epoch": 0.9617294356218341, "grad_norm": 0.4268711613461748, "learning_rate": 2.039571387883475e-06, "loss": 0.0221, "step": 230490 }, { "epoch": 0.9617502983368243, "grad_norm": 0.7961531316072922, "learning_rate": 2.03954926602294e-06, "loss": 0.0305, "step": 230495 }, { "epoch": 0.9617711610518146, "grad_norm": 0.1914125748242068, "learning_rate": 2.0395271448822123e-06, "loss": 0.0218, "step": 230500 }, { "epoch": 0.961792023766805, "grad_norm": 0.4893412043087707, "learning_rate": 2.0395050244612524e-06, "loss": 0.0191, "step": 230505 }, { "epoch": 0.9618128864817952, "grad_norm": 0.6926354056480238, "learning_rate": 2.039482904760022e-06, "loss": 0.0192, "step": 230510 }, { "epoch": 0.9618337491967854, "grad_norm": 0.5732618335587211, "learning_rate": 2.0394607857784814e-06, "loss": 0.0181, "step": 230515 }, { "epoch": 0.9618546119117758, "grad_norm": 0.7281741282840007, "learning_rate": 2.0394386675165924e-06, "loss": 0.0193, "step": 230520 }, { "epoch": 0.961875474626766, "grad_norm": 0.5365695290649833, "learning_rate": 2.0394165499743154e-06, "loss": 0.0242, "step": 230525 }, { "epoch": 0.9618963373417563, "grad_norm": 0.4588830710963669, "learning_rate": 2.0393944331516116e-06, "loss": 0.0172, "step": 230530 }, { "epoch": 0.9619172000567466, "grad_norm": 0.721028019611593, "learning_rate": 2.039372317048442e-06, "loss": 0.0171, "step": 230535 }, { "epoch": 0.9619380627717369, "grad_norm": 0.7036751205947386, "learning_rate": 2.0393502016647667e-06, "loss": 0.0171, "step": 230540 }, { "epoch": 0.9619589254867271, "grad_norm": 0.5523977779175185, "learning_rate": 2.0393280870005485e-06, "loss": 0.0171, "step": 230545 }, { "epoch": 0.9619797882017174, "grad_norm": 0.3309026605374299, "learning_rate": 2.0393059730557467e-06, "loss": 0.0171, "step": 230550 }, { "epoch": 0.9620006509167077, "grad_norm": 0.49546735855319696, "learning_rate": 2.039283859830323e-06, "loss": 0.0204, "step": 230555 }, { "epoch": 0.962021513631698, "grad_norm": 0.4857262003062816, "learning_rate": 2.0392617473242386e-06, "loss": 0.0191, "step": 230560 }, { "epoch": 0.9620423763466882, "grad_norm": 0.7441436395226649, "learning_rate": 2.039239635537454e-06, "loss": 0.0139, "step": 230565 }, { "epoch": 0.9620632390616786, "grad_norm": 0.7008723634464046, "learning_rate": 2.0392175244699304e-06, "loss": 0.0125, "step": 230570 }, { "epoch": 0.9620841017766688, "grad_norm": 0.8416485966773528, "learning_rate": 2.039195414121629e-06, "loss": 0.0237, "step": 230575 }, { "epoch": 0.962104964491659, "grad_norm": 0.817268158013789, "learning_rate": 2.0391733044925103e-06, "loss": 0.0181, "step": 230580 }, { "epoch": 0.9621258272066494, "grad_norm": 0.77508401097644, "learning_rate": 2.039151195582536e-06, "loss": 0.0201, "step": 230585 }, { "epoch": 0.9621466899216397, "grad_norm": 0.729247906190615, "learning_rate": 2.0391290873916666e-06, "loss": 0.0183, "step": 230590 }, { "epoch": 0.9621675526366299, "grad_norm": 0.47166296334727276, "learning_rate": 2.0391069799198635e-06, "loss": 0.0201, "step": 230595 }, { "epoch": 0.9621884153516203, "grad_norm": 0.6691594905035911, "learning_rate": 2.0390848731670876e-06, "loss": 0.0236, "step": 230600 }, { "epoch": 0.9622092780666105, "grad_norm": 0.5260244820817714, "learning_rate": 2.0390627671333004e-06, "loss": 0.0144, "step": 230605 }, { "epoch": 0.9622301407816007, "grad_norm": 0.23100461340815492, "learning_rate": 2.039040661818461e-06, "loss": 0.0232, "step": 230610 }, { "epoch": 0.962251003496591, "grad_norm": 0.41187652716338746, "learning_rate": 2.0390185572225327e-06, "loss": 0.0185, "step": 230615 }, { "epoch": 0.9622718662115813, "grad_norm": 0.7558039708049802, "learning_rate": 2.038996453345476e-06, "loss": 0.0193, "step": 230620 }, { "epoch": 0.9622927289265716, "grad_norm": 0.23287503362853365, "learning_rate": 2.038974350187251e-06, "loss": 0.0172, "step": 230625 }, { "epoch": 0.9623135916415618, "grad_norm": 0.44190874959232157, "learning_rate": 2.038952247747819e-06, "loss": 0.022, "step": 230630 }, { "epoch": 0.9623344543565522, "grad_norm": 0.9820500171494516, "learning_rate": 2.0389301460271414e-06, "loss": 0.0193, "step": 230635 }, { "epoch": 0.9623553170715424, "grad_norm": 0.4464493571150587, "learning_rate": 2.03890804502518e-06, "loss": 0.021, "step": 230640 }, { "epoch": 0.9623761797865327, "grad_norm": 0.6035887251119617, "learning_rate": 2.0388859447418945e-06, "loss": 0.0211, "step": 230645 }, { "epoch": 0.962397042501523, "grad_norm": 0.39153929582202013, "learning_rate": 2.038863845177247e-06, "loss": 0.0124, "step": 230650 }, { "epoch": 0.9624179052165133, "grad_norm": 0.5434410410470418, "learning_rate": 2.038841746331198e-06, "loss": 0.0156, "step": 230655 }, { "epoch": 0.9624387679315035, "grad_norm": 0.34522011124887647, "learning_rate": 2.038819648203708e-06, "loss": 0.0178, "step": 230660 }, { "epoch": 0.9624596306464938, "grad_norm": 0.38657263210997705, "learning_rate": 2.0387975507947393e-06, "loss": 0.0194, "step": 230665 }, { "epoch": 0.9624804933614841, "grad_norm": 0.7252470250814647, "learning_rate": 2.038775454104252e-06, "loss": 0.0175, "step": 230670 }, { "epoch": 0.9625013560764744, "grad_norm": 0.41733604976920424, "learning_rate": 2.0387533581322073e-06, "loss": 0.0152, "step": 230675 }, { "epoch": 0.9625222187914646, "grad_norm": 0.5159029643791155, "learning_rate": 2.038731262878567e-06, "loss": 0.0181, "step": 230680 }, { "epoch": 0.962543081506455, "grad_norm": 0.7762520587120535, "learning_rate": 2.0387091683432913e-06, "loss": 0.0205, "step": 230685 }, { "epoch": 0.9625639442214452, "grad_norm": 0.8920987695334069, "learning_rate": 2.0386870745263416e-06, "loss": 0.0237, "step": 230690 }, { "epoch": 0.9625848069364354, "grad_norm": 1.6331141729613359, "learning_rate": 2.038664981427679e-06, "loss": 0.0239, "step": 230695 }, { "epoch": 0.9626056696514258, "grad_norm": 0.4372595899106732, "learning_rate": 2.0386428890472647e-06, "loss": 0.015, "step": 230700 }, { "epoch": 0.962626532366416, "grad_norm": 0.310943512131071, "learning_rate": 2.03862079738506e-06, "loss": 0.0116, "step": 230705 }, { "epoch": 0.9626473950814063, "grad_norm": 0.6713394877377749, "learning_rate": 2.038598706441025e-06, "loss": 0.0208, "step": 230710 }, { "epoch": 0.9626682577963966, "grad_norm": 0.9683020682544413, "learning_rate": 2.038576616215122e-06, "loss": 0.0182, "step": 230715 }, { "epoch": 0.9626891205113869, "grad_norm": 0.2487203489608427, "learning_rate": 2.0385545267073113e-06, "loss": 0.0157, "step": 230720 }, { "epoch": 0.9627099832263771, "grad_norm": 0.5824605933260891, "learning_rate": 2.0385324379175547e-06, "loss": 0.0128, "step": 230725 }, { "epoch": 0.9627308459413674, "grad_norm": 0.26994617180011965, "learning_rate": 2.038510349845812e-06, "loss": 0.0156, "step": 230730 }, { "epoch": 0.9627517086563577, "grad_norm": 0.7689133447983133, "learning_rate": 2.0384882624920453e-06, "loss": 0.0203, "step": 230735 }, { "epoch": 0.962772571371348, "grad_norm": 1.0953028403577292, "learning_rate": 2.038466175856216e-06, "loss": 0.0148, "step": 230740 }, { "epoch": 0.9627934340863382, "grad_norm": 0.3605275113184607, "learning_rate": 2.0384440899382842e-06, "loss": 0.0164, "step": 230745 }, { "epoch": 0.9628142968013286, "grad_norm": 0.7641958934245185, "learning_rate": 2.038422004738212e-06, "loss": 0.0213, "step": 230750 }, { "epoch": 0.9628351595163188, "grad_norm": 0.36936165626897666, "learning_rate": 2.03839992025596e-06, "loss": 0.0156, "step": 230755 }, { "epoch": 0.9628560222313091, "grad_norm": 0.37568513827260364, "learning_rate": 2.038377836491489e-06, "loss": 0.0181, "step": 230760 }, { "epoch": 0.9628768849462994, "grad_norm": 0.5186534477574204, "learning_rate": 2.038355753444761e-06, "loss": 0.0208, "step": 230765 }, { "epoch": 0.9628977476612897, "grad_norm": 0.22292682158163135, "learning_rate": 2.0383336711157363e-06, "loss": 0.0155, "step": 230770 }, { "epoch": 0.9629186103762799, "grad_norm": 0.6439253674417118, "learning_rate": 2.038311589504377e-06, "loss": 0.0148, "step": 230775 }, { "epoch": 0.9629394730912703, "grad_norm": 1.0939409145284056, "learning_rate": 2.0382895086106427e-06, "loss": 0.0332, "step": 230780 }, { "epoch": 0.9629603358062605, "grad_norm": 0.6996598410850974, "learning_rate": 2.0382674284344958e-06, "loss": 0.0238, "step": 230785 }, { "epoch": 0.9629811985212507, "grad_norm": 0.735208871331533, "learning_rate": 2.0382453489758967e-06, "loss": 0.0229, "step": 230790 }, { "epoch": 0.963002061236241, "grad_norm": 0.9168362839697054, "learning_rate": 2.0382232702348074e-06, "loss": 0.0163, "step": 230795 }, { "epoch": 0.9630229239512313, "grad_norm": 0.635023324326287, "learning_rate": 2.038201192211188e-06, "loss": 0.0205, "step": 230800 }, { "epoch": 0.9630437866662216, "grad_norm": 1.4060164249069165, "learning_rate": 2.038179114905001e-06, "loss": 0.0201, "step": 230805 }, { "epoch": 0.9630646493812118, "grad_norm": 0.3500320688275527, "learning_rate": 2.038157038316206e-06, "loss": 0.023, "step": 230810 }, { "epoch": 0.9630855120962022, "grad_norm": 1.0188350968819295, "learning_rate": 2.038134962444765e-06, "loss": 0.0202, "step": 230815 }, { "epoch": 0.9631063748111924, "grad_norm": 0.6240363139407716, "learning_rate": 2.038112887290639e-06, "loss": 0.0226, "step": 230820 }, { "epoch": 0.9631272375261827, "grad_norm": 0.5089815784538907, "learning_rate": 2.0380908128537895e-06, "loss": 0.0198, "step": 230825 }, { "epoch": 0.963148100241173, "grad_norm": 1.0359591787755689, "learning_rate": 2.0380687391341766e-06, "loss": 0.0238, "step": 230830 }, { "epoch": 0.9631689629561633, "grad_norm": 0.5330235597199291, "learning_rate": 2.0380466661317626e-06, "loss": 0.0167, "step": 230835 }, { "epoch": 0.9631898256711535, "grad_norm": 0.939034566014234, "learning_rate": 2.038024593846508e-06, "loss": 0.0175, "step": 230840 }, { "epoch": 0.9632106883861438, "grad_norm": 2.2025768139576463, "learning_rate": 2.0380025222783747e-06, "loss": 0.03, "step": 230845 }, { "epoch": 0.9632315511011341, "grad_norm": 0.7494327147972344, "learning_rate": 2.037980451427323e-06, "loss": 0.0228, "step": 230850 }, { "epoch": 0.9632524138161244, "grad_norm": 0.36237593222107056, "learning_rate": 2.0379583812933147e-06, "loss": 0.0148, "step": 230855 }, { "epoch": 0.9632732765311146, "grad_norm": 0.5218624030463047, "learning_rate": 2.0379363118763108e-06, "loss": 0.0172, "step": 230860 }, { "epoch": 0.963294139246105, "grad_norm": 0.9617821441949426, "learning_rate": 2.037914243176272e-06, "loss": 0.0195, "step": 230865 }, { "epoch": 0.9633150019610952, "grad_norm": 0.859443928430196, "learning_rate": 2.03789217519316e-06, "loss": 0.0151, "step": 230870 }, { "epoch": 0.9633358646760855, "grad_norm": 0.7218778346406346, "learning_rate": 2.0378701079269358e-06, "loss": 0.0215, "step": 230875 }, { "epoch": 0.9633567273910758, "grad_norm": 0.586525272455656, "learning_rate": 2.037848041377561e-06, "loss": 0.0145, "step": 230880 }, { "epoch": 0.963377590106066, "grad_norm": 0.6471759191770167, "learning_rate": 2.037825975544996e-06, "loss": 0.0194, "step": 230885 }, { "epoch": 0.9633984528210563, "grad_norm": 1.687474741155589, "learning_rate": 2.037803910429203e-06, "loss": 0.0288, "step": 230890 }, { "epoch": 0.9634193155360466, "grad_norm": 0.5129163185429929, "learning_rate": 2.0377818460301423e-06, "loss": 0.0183, "step": 230895 }, { "epoch": 0.9634401782510369, "grad_norm": 0.907293606210712, "learning_rate": 2.0377597823477754e-06, "loss": 0.0241, "step": 230900 }, { "epoch": 0.9634610409660271, "grad_norm": 0.7830247199127546, "learning_rate": 2.0377377193820632e-06, "loss": 0.0218, "step": 230905 }, { "epoch": 0.9634819036810174, "grad_norm": 0.5001151731643899, "learning_rate": 2.037715657132968e-06, "loss": 0.0208, "step": 230910 }, { "epoch": 0.9635027663960077, "grad_norm": 0.7945396648662533, "learning_rate": 2.0376935956004495e-06, "loss": 0.0199, "step": 230915 }, { "epoch": 0.963523629110998, "grad_norm": 0.7631888026075304, "learning_rate": 2.03767153478447e-06, "loss": 0.0284, "step": 230920 }, { "epoch": 0.9635444918259882, "grad_norm": 0.8872321382288572, "learning_rate": 2.0376494746849908e-06, "loss": 0.0218, "step": 230925 }, { "epoch": 0.9635653545409786, "grad_norm": 0.8333609208016163, "learning_rate": 2.037627415301972e-06, "loss": 0.0228, "step": 230930 }, { "epoch": 0.9635862172559688, "grad_norm": 0.39706751147636077, "learning_rate": 2.0376053566353756e-06, "loss": 0.0163, "step": 230935 }, { "epoch": 0.9636070799709591, "grad_norm": 0.6428044870219027, "learning_rate": 2.037583298685163e-06, "loss": 0.0218, "step": 230940 }, { "epoch": 0.9636279426859494, "grad_norm": 0.6271423591959688, "learning_rate": 2.037561241451295e-06, "loss": 0.0246, "step": 230945 }, { "epoch": 0.9636488054009397, "grad_norm": 0.8354554294328052, "learning_rate": 2.037539184933733e-06, "loss": 0.0222, "step": 230950 }, { "epoch": 0.9636696681159299, "grad_norm": 0.7014672270062492, "learning_rate": 2.037517129132438e-06, "loss": 0.0157, "step": 230955 }, { "epoch": 0.9636905308309203, "grad_norm": 0.38699307362513924, "learning_rate": 2.0374950740473717e-06, "loss": 0.02, "step": 230960 }, { "epoch": 0.9637113935459105, "grad_norm": 0.8818184754636613, "learning_rate": 2.0374730196784955e-06, "loss": 0.0149, "step": 230965 }, { "epoch": 0.9637322562609008, "grad_norm": 0.7648218147633361, "learning_rate": 2.0374509660257695e-06, "loss": 0.0136, "step": 230970 }, { "epoch": 0.963753118975891, "grad_norm": 0.6466012097805073, "learning_rate": 2.037428913089156e-06, "loss": 0.0261, "step": 230975 }, { "epoch": 0.9637739816908814, "grad_norm": 0.2976532694885455, "learning_rate": 2.037406860868616e-06, "loss": 0.0123, "step": 230980 }, { "epoch": 0.9637948444058716, "grad_norm": 0.381771613385488, "learning_rate": 2.0373848093641103e-06, "loss": 0.0156, "step": 230985 }, { "epoch": 0.9638157071208618, "grad_norm": 0.9356493687010359, "learning_rate": 2.0373627585756004e-06, "loss": 0.0197, "step": 230990 }, { "epoch": 0.9638365698358522, "grad_norm": 0.496899197161675, "learning_rate": 2.0373407085030485e-06, "loss": 0.0161, "step": 230995 }, { "epoch": 0.9638574325508424, "grad_norm": 0.7623942888409002, "learning_rate": 2.0373186591464143e-06, "loss": 0.0263, "step": 231000 }, { "epoch": 0.9638782952658327, "grad_norm": 0.5191758980310786, "learning_rate": 2.0372966105056598e-06, "loss": 0.0313, "step": 231005 }, { "epoch": 0.963899157980823, "grad_norm": 1.1037832263458336, "learning_rate": 2.0372745625807463e-06, "loss": 0.0221, "step": 231010 }, { "epoch": 0.9639200206958133, "grad_norm": 0.3845414797103398, "learning_rate": 2.0372525153716355e-06, "loss": 0.0197, "step": 231015 }, { "epoch": 0.9639408834108035, "grad_norm": 0.3390810437489109, "learning_rate": 2.0372304688782878e-06, "loss": 0.012, "step": 231020 }, { "epoch": 0.9639617461257938, "grad_norm": 0.684277770659426, "learning_rate": 2.0372084231006647e-06, "loss": 0.0242, "step": 231025 }, { "epoch": 0.9639826088407841, "grad_norm": 0.9536268173351365, "learning_rate": 2.0371863780387277e-06, "loss": 0.0205, "step": 231030 }, { "epoch": 0.9640034715557744, "grad_norm": 1.1824413851257178, "learning_rate": 2.0371643336924383e-06, "loss": 0.0184, "step": 231035 }, { "epoch": 0.9640243342707646, "grad_norm": 0.9729747035502138, "learning_rate": 2.037142290061757e-06, "loss": 0.0233, "step": 231040 }, { "epoch": 0.964045196985755, "grad_norm": 0.7398724046765157, "learning_rate": 2.0371202471466458e-06, "loss": 0.0212, "step": 231045 }, { "epoch": 0.9640660597007452, "grad_norm": 0.5379013431150397, "learning_rate": 2.037098204947066e-06, "loss": 0.0222, "step": 231050 }, { "epoch": 0.9640869224157355, "grad_norm": 0.46187170082001433, "learning_rate": 2.037076163462978e-06, "loss": 0.0189, "step": 231055 }, { "epoch": 0.9641077851307258, "grad_norm": 0.48427892876204715, "learning_rate": 2.0370541226943444e-06, "loss": 0.0201, "step": 231060 }, { "epoch": 0.9641286478457161, "grad_norm": 0.8080722181961939, "learning_rate": 2.037032082641126e-06, "loss": 0.0208, "step": 231065 }, { "epoch": 0.9641495105607063, "grad_norm": 0.5976072530266221, "learning_rate": 2.037010043303283e-06, "loss": 0.0243, "step": 231070 }, { "epoch": 0.9641703732756967, "grad_norm": 1.339944290426796, "learning_rate": 2.036988004680778e-06, "loss": 0.0276, "step": 231075 }, { "epoch": 0.9641912359906869, "grad_norm": 0.4766144796589938, "learning_rate": 2.036965966773572e-06, "loss": 0.0164, "step": 231080 }, { "epoch": 0.9642120987056771, "grad_norm": 1.2032776777899468, "learning_rate": 2.0369439295816265e-06, "loss": 0.0194, "step": 231085 }, { "epoch": 0.9642329614206674, "grad_norm": 0.6313382413404467, "learning_rate": 2.0369218931049025e-06, "loss": 0.0234, "step": 231090 }, { "epoch": 0.9642538241356577, "grad_norm": 0.9691587203188825, "learning_rate": 2.0368998573433615e-06, "loss": 0.0217, "step": 231095 }, { "epoch": 0.964274686850648, "grad_norm": 0.9502773429814576, "learning_rate": 2.0368778222969644e-06, "loss": 0.0236, "step": 231100 }, { "epoch": 0.9642955495656382, "grad_norm": 0.7918572891417271, "learning_rate": 2.0368557879656724e-06, "loss": 0.0201, "step": 231105 }, { "epoch": 0.9643164122806286, "grad_norm": 0.3609914620851309, "learning_rate": 2.036833754349448e-06, "loss": 0.0239, "step": 231110 }, { "epoch": 0.9643372749956188, "grad_norm": 0.6952712200861514, "learning_rate": 2.0368117214482513e-06, "loss": 0.0132, "step": 231115 }, { "epoch": 0.9643581377106091, "grad_norm": 1.7851894915314828, "learning_rate": 2.0367896892620444e-06, "loss": 0.0229, "step": 231120 }, { "epoch": 0.9643790004255994, "grad_norm": 0.7434752503650526, "learning_rate": 2.036767657790788e-06, "loss": 0.0204, "step": 231125 }, { "epoch": 0.9643998631405897, "grad_norm": 0.6249677372501256, "learning_rate": 2.0367456270344437e-06, "loss": 0.0248, "step": 231130 }, { "epoch": 0.9644207258555799, "grad_norm": 0.3563916916364681, "learning_rate": 2.036723596992973e-06, "loss": 0.0204, "step": 231135 }, { "epoch": 0.9644415885705703, "grad_norm": 0.27011090010702327, "learning_rate": 2.036701567666337e-06, "loss": 0.0164, "step": 231140 }, { "epoch": 0.9644624512855605, "grad_norm": 0.6730978038318421, "learning_rate": 2.0366795390544976e-06, "loss": 0.0175, "step": 231145 }, { "epoch": 0.9644833140005508, "grad_norm": 0.7306397810624174, "learning_rate": 2.036657511157415e-06, "loss": 0.0197, "step": 231150 }, { "epoch": 0.964504176715541, "grad_norm": 0.42884489820386495, "learning_rate": 2.036635483975052e-06, "loss": 0.0146, "step": 231155 }, { "epoch": 0.9645250394305314, "grad_norm": 0.8363972718551234, "learning_rate": 2.0366134575073686e-06, "loss": 0.0204, "step": 231160 }, { "epoch": 0.9645459021455216, "grad_norm": 0.7107383154612252, "learning_rate": 2.036591431754327e-06, "loss": 0.0234, "step": 231165 }, { "epoch": 0.9645667648605118, "grad_norm": 0.4321136323322272, "learning_rate": 2.0365694067158884e-06, "loss": 0.0214, "step": 231170 }, { "epoch": 0.9645876275755022, "grad_norm": 0.8080527043824458, "learning_rate": 2.036547382392014e-06, "loss": 0.0201, "step": 231175 }, { "epoch": 0.9646084902904924, "grad_norm": 0.34870265293287633, "learning_rate": 2.036525358782665e-06, "loss": 0.018, "step": 231180 }, { "epoch": 0.9646293530054827, "grad_norm": 0.4732813128047148, "learning_rate": 2.0365033358878034e-06, "loss": 0.021, "step": 231185 }, { "epoch": 0.964650215720473, "grad_norm": 0.6674432559960507, "learning_rate": 2.03648131370739e-06, "loss": 0.0174, "step": 231190 }, { "epoch": 0.9646710784354633, "grad_norm": 0.8213571188628117, "learning_rate": 2.0364592922413866e-06, "loss": 0.0216, "step": 231195 }, { "epoch": 0.9646919411504535, "grad_norm": 0.8021347248200751, "learning_rate": 2.036437271489754e-06, "loss": 0.0175, "step": 231200 }, { "epoch": 0.9647128038654438, "grad_norm": 0.8288438770699206, "learning_rate": 2.0364152514524537e-06, "loss": 0.0192, "step": 231205 }, { "epoch": 0.9647336665804341, "grad_norm": 0.5561593792004417, "learning_rate": 2.0363932321294475e-06, "loss": 0.0182, "step": 231210 }, { "epoch": 0.9647545292954244, "grad_norm": 0.6410715851409449, "learning_rate": 2.0363712135206967e-06, "loss": 0.0177, "step": 231215 }, { "epoch": 0.9647753920104146, "grad_norm": 0.8610482858996573, "learning_rate": 2.0363491956261625e-06, "loss": 0.0227, "step": 231220 }, { "epoch": 0.964796254725405, "grad_norm": 0.9470620771998692, "learning_rate": 2.036327178445806e-06, "loss": 0.0159, "step": 231225 }, { "epoch": 0.9648171174403952, "grad_norm": 0.44388912317757245, "learning_rate": 2.0363051619795892e-06, "loss": 0.0201, "step": 231230 }, { "epoch": 0.9648379801553855, "grad_norm": 0.7019225714495824, "learning_rate": 2.0362831462274736e-06, "loss": 0.0213, "step": 231235 }, { "epoch": 0.9648588428703758, "grad_norm": 0.6595662274332267, "learning_rate": 2.03626113118942e-06, "loss": 0.0191, "step": 231240 }, { "epoch": 0.9648797055853661, "grad_norm": 0.5822180382621627, "learning_rate": 2.0362391168653895e-06, "loss": 0.0174, "step": 231245 }, { "epoch": 0.9649005683003563, "grad_norm": 0.7254754980907037, "learning_rate": 2.0362171032553444e-06, "loss": 0.0235, "step": 231250 }, { "epoch": 0.9649214310153467, "grad_norm": 0.5966554210302871, "learning_rate": 2.036195090359246e-06, "loss": 0.022, "step": 231255 }, { "epoch": 0.9649422937303369, "grad_norm": 0.9796381191180364, "learning_rate": 2.036173078177055e-06, "loss": 0.0185, "step": 231260 }, { "epoch": 0.9649631564453272, "grad_norm": 0.7119802946714019, "learning_rate": 2.0361510667087335e-06, "loss": 0.0188, "step": 231265 }, { "epoch": 0.9649840191603174, "grad_norm": 1.0958397414854262, "learning_rate": 2.0361290559542427e-06, "loss": 0.0244, "step": 231270 }, { "epoch": 0.9650048818753078, "grad_norm": 0.800376217228329, "learning_rate": 2.0361070459135436e-06, "loss": 0.0208, "step": 231275 }, { "epoch": 0.965025744590298, "grad_norm": 0.4819826516091968, "learning_rate": 2.0360850365865985e-06, "loss": 0.0193, "step": 231280 }, { "epoch": 0.9650466073052882, "grad_norm": 0.8005622924417996, "learning_rate": 2.0360630279733676e-06, "loss": 0.0172, "step": 231285 }, { "epoch": 0.9650674700202786, "grad_norm": 0.4369003699279712, "learning_rate": 2.0360410200738136e-06, "loss": 0.0196, "step": 231290 }, { "epoch": 0.9650883327352688, "grad_norm": 0.3548769800745783, "learning_rate": 2.0360190128878976e-06, "loss": 0.0221, "step": 231295 }, { "epoch": 0.9651091954502591, "grad_norm": 0.7152794697825777, "learning_rate": 2.03599700641558e-06, "loss": 0.0214, "step": 231300 }, { "epoch": 0.9651300581652494, "grad_norm": 0.6770193939223785, "learning_rate": 2.0359750006568237e-06, "loss": 0.0242, "step": 231305 }, { "epoch": 0.9651509208802397, "grad_norm": 0.49914873922128106, "learning_rate": 2.03595299561159e-06, "loss": 0.0218, "step": 231310 }, { "epoch": 0.9651717835952299, "grad_norm": 0.7048270873728598, "learning_rate": 2.035930991279839e-06, "loss": 0.0202, "step": 231315 }, { "epoch": 0.9651926463102203, "grad_norm": 0.5705457740267673, "learning_rate": 2.035908987661533e-06, "loss": 0.0192, "step": 231320 }, { "epoch": 0.9652135090252105, "grad_norm": 0.31180659251215886, "learning_rate": 2.035886984756634e-06, "loss": 0.0256, "step": 231325 }, { "epoch": 0.9652343717402008, "grad_norm": 0.6843126611322232, "learning_rate": 2.0358649825651024e-06, "loss": 0.0216, "step": 231330 }, { "epoch": 0.965255234455191, "grad_norm": 1.1394948952798274, "learning_rate": 2.0358429810869e-06, "loss": 0.0212, "step": 231335 }, { "epoch": 0.9652760971701814, "grad_norm": 0.7025168521675468, "learning_rate": 2.035820980321989e-06, "loss": 0.0193, "step": 231340 }, { "epoch": 0.9652969598851716, "grad_norm": 1.626422636107616, "learning_rate": 2.0357989802703303e-06, "loss": 0.0249, "step": 231345 }, { "epoch": 0.9653178226001619, "grad_norm": 1.0423372406528844, "learning_rate": 2.0357769809318845e-06, "loss": 0.0275, "step": 231350 }, { "epoch": 0.9653386853151522, "grad_norm": 0.45694060523101404, "learning_rate": 2.0357549823066145e-06, "loss": 0.0255, "step": 231355 }, { "epoch": 0.9653595480301425, "grad_norm": 0.5496274701543822, "learning_rate": 2.035732984394481e-06, "loss": 0.0169, "step": 231360 }, { "epoch": 0.9653804107451327, "grad_norm": 0.9160538786006183, "learning_rate": 2.0357109871954452e-06, "loss": 0.0194, "step": 231365 }, { "epoch": 0.965401273460123, "grad_norm": 0.9162437671408921, "learning_rate": 2.0356889907094692e-06, "loss": 0.0195, "step": 231370 }, { "epoch": 0.9654221361751133, "grad_norm": 0.49713204727003074, "learning_rate": 2.0356669949365145e-06, "loss": 0.0194, "step": 231375 }, { "epoch": 0.9654429988901035, "grad_norm": 0.886779672507569, "learning_rate": 2.035644999876542e-06, "loss": 0.0201, "step": 231380 }, { "epoch": 0.9654638616050938, "grad_norm": 0.30087570133423847, "learning_rate": 2.035623005529514e-06, "loss": 0.0162, "step": 231385 }, { "epoch": 0.9654847243200841, "grad_norm": 0.46962603867581443, "learning_rate": 2.0356010118953918e-06, "loss": 0.0168, "step": 231390 }, { "epoch": 0.9655055870350744, "grad_norm": 0.490006752039524, "learning_rate": 2.0355790189741357e-06, "loss": 0.0182, "step": 231395 }, { "epoch": 0.9655264497500646, "grad_norm": 0.6817022547378769, "learning_rate": 2.0355570267657087e-06, "loss": 0.0225, "step": 231400 }, { "epoch": 0.965547312465055, "grad_norm": 1.1156933655220453, "learning_rate": 2.0355350352700713e-06, "loss": 0.0166, "step": 231405 }, { "epoch": 0.9655681751800452, "grad_norm": 0.5931469949020817, "learning_rate": 2.035513044487186e-06, "loss": 0.0165, "step": 231410 }, { "epoch": 0.9655890378950355, "grad_norm": 1.1106881402527868, "learning_rate": 2.035491054417013e-06, "loss": 0.0253, "step": 231415 }, { "epoch": 0.9656099006100258, "grad_norm": 0.5923983966419191, "learning_rate": 2.035469065059515e-06, "loss": 0.0182, "step": 231420 }, { "epoch": 0.9656307633250161, "grad_norm": 0.5069553286739278, "learning_rate": 2.0354470764146524e-06, "loss": 0.0204, "step": 231425 }, { "epoch": 0.9656516260400063, "grad_norm": 0.5715804396610388, "learning_rate": 2.0354250884823874e-06, "loss": 0.0123, "step": 231430 }, { "epoch": 0.9656724887549967, "grad_norm": 0.48851338842503605, "learning_rate": 2.035403101262682e-06, "loss": 0.0229, "step": 231435 }, { "epoch": 0.9656933514699869, "grad_norm": 0.4938566997143091, "learning_rate": 2.035381114755497e-06, "loss": 0.0157, "step": 231440 }, { "epoch": 0.9657142141849772, "grad_norm": 0.48718821613263935, "learning_rate": 2.0353591289607937e-06, "loss": 0.0238, "step": 231445 }, { "epoch": 0.9657350768999674, "grad_norm": 1.200038483098869, "learning_rate": 2.0353371438785337e-06, "loss": 0.0216, "step": 231450 }, { "epoch": 0.9657559396149578, "grad_norm": 0.8206936475345057, "learning_rate": 2.0353151595086796e-06, "loss": 0.0218, "step": 231455 }, { "epoch": 0.965776802329948, "grad_norm": 0.7746251012269638, "learning_rate": 2.0352931758511916e-06, "loss": 0.0204, "step": 231460 }, { "epoch": 0.9657976650449382, "grad_norm": 0.7517869439530512, "learning_rate": 2.035271192906032e-06, "loss": 0.0176, "step": 231465 }, { "epoch": 0.9658185277599286, "grad_norm": 0.6388232012344959, "learning_rate": 2.0352492106731617e-06, "loss": 0.0227, "step": 231470 }, { "epoch": 0.9658393904749188, "grad_norm": 0.6365193903984939, "learning_rate": 2.035227229152543e-06, "loss": 0.0204, "step": 231475 }, { "epoch": 0.9658602531899091, "grad_norm": 0.6659294187932263, "learning_rate": 2.0352052483441368e-06, "loss": 0.0259, "step": 231480 }, { "epoch": 0.9658811159048994, "grad_norm": 0.780581617544175, "learning_rate": 2.035183268247905e-06, "loss": 0.0148, "step": 231485 }, { "epoch": 0.9659019786198897, "grad_norm": 0.3329051291991639, "learning_rate": 2.035161288863809e-06, "loss": 0.0158, "step": 231490 }, { "epoch": 0.9659228413348799, "grad_norm": 1.2604067315444214, "learning_rate": 2.0351393101918105e-06, "loss": 0.0212, "step": 231495 }, { "epoch": 0.9659437040498703, "grad_norm": 0.7258422970563349, "learning_rate": 2.035117332231871e-06, "loss": 0.0253, "step": 231500 }, { "epoch": 0.9659645667648605, "grad_norm": 0.4340746866957154, "learning_rate": 2.0350953549839517e-06, "loss": 0.0247, "step": 231505 }, { "epoch": 0.9659854294798508, "grad_norm": 0.6106467041741048, "learning_rate": 2.0350733784480144e-06, "loss": 0.0182, "step": 231510 }, { "epoch": 0.966006292194841, "grad_norm": 0.8109525354835515, "learning_rate": 2.035051402624021e-06, "loss": 0.0228, "step": 231515 }, { "epoch": 0.9660271549098314, "grad_norm": 0.5554710786994175, "learning_rate": 2.035029427511932e-06, "loss": 0.0184, "step": 231520 }, { "epoch": 0.9660480176248216, "grad_norm": 0.6846649956075953, "learning_rate": 2.0350074531117103e-06, "loss": 0.016, "step": 231525 }, { "epoch": 0.9660688803398119, "grad_norm": 0.6128290256258325, "learning_rate": 2.0349854794233174e-06, "loss": 0.0177, "step": 231530 }, { "epoch": 0.9660897430548022, "grad_norm": 0.3832068361400964, "learning_rate": 2.0349635064467137e-06, "loss": 0.0203, "step": 231535 }, { "epoch": 0.9661106057697925, "grad_norm": 0.5029033140895781, "learning_rate": 2.0349415341818612e-06, "loss": 0.0194, "step": 231540 }, { "epoch": 0.9661314684847827, "grad_norm": 0.8611473345662165, "learning_rate": 2.0349195626287223e-06, "loss": 0.0185, "step": 231545 }, { "epoch": 0.9661523311997731, "grad_norm": 0.6302581772434782, "learning_rate": 2.0348975917872574e-06, "loss": 0.0248, "step": 231550 }, { "epoch": 0.9661731939147633, "grad_norm": 0.7332090027082404, "learning_rate": 2.0348756216574295e-06, "loss": 0.017, "step": 231555 }, { "epoch": 0.9661940566297536, "grad_norm": 0.595097636875733, "learning_rate": 2.034853652239199e-06, "loss": 0.0162, "step": 231560 }, { "epoch": 0.9662149193447438, "grad_norm": 1.0648328592524954, "learning_rate": 2.0348316835325277e-06, "loss": 0.0256, "step": 231565 }, { "epoch": 0.9662357820597341, "grad_norm": 0.3582424873382373, "learning_rate": 2.0348097155373774e-06, "loss": 0.0139, "step": 231570 }, { "epoch": 0.9662566447747244, "grad_norm": 0.4124319528464333, "learning_rate": 2.0347877482537092e-06, "loss": 0.017, "step": 231575 }, { "epoch": 0.9662775074897146, "grad_norm": 0.41933772153096616, "learning_rate": 2.0347657816814857e-06, "loss": 0.0225, "step": 231580 }, { "epoch": 0.966298370204705, "grad_norm": 0.7853649083776189, "learning_rate": 2.0347438158206678e-06, "loss": 0.0211, "step": 231585 }, { "epoch": 0.9663192329196952, "grad_norm": 0.8408060848600017, "learning_rate": 2.034721850671217e-06, "loss": 0.0233, "step": 231590 }, { "epoch": 0.9663400956346855, "grad_norm": 0.37757953563864133, "learning_rate": 2.0346998862330954e-06, "loss": 0.0183, "step": 231595 }, { "epoch": 0.9663609583496758, "grad_norm": 0.8310239355776715, "learning_rate": 2.034677922506264e-06, "loss": 0.0231, "step": 231600 }, { "epoch": 0.9663818210646661, "grad_norm": 0.6150960888732768, "learning_rate": 2.0346559594906847e-06, "loss": 0.0189, "step": 231605 }, { "epoch": 0.9664026837796563, "grad_norm": 0.7152053187135543, "learning_rate": 2.0346339971863192e-06, "loss": 0.0161, "step": 231610 }, { "epoch": 0.9664235464946467, "grad_norm": 0.42890927684626495, "learning_rate": 2.0346120355931294e-06, "loss": 0.0178, "step": 231615 }, { "epoch": 0.9664444092096369, "grad_norm": 0.47436111567417394, "learning_rate": 2.0345900747110767e-06, "loss": 0.0157, "step": 231620 }, { "epoch": 0.9664652719246272, "grad_norm": 0.7462272937572914, "learning_rate": 2.034568114540122e-06, "loss": 0.021, "step": 231625 }, { "epoch": 0.9664861346396174, "grad_norm": 0.797064861701807, "learning_rate": 2.034546155080228e-06, "loss": 0.0216, "step": 231630 }, { "epoch": 0.9665069973546078, "grad_norm": 0.7581547383684184, "learning_rate": 2.0345241963313558e-06, "loss": 0.0168, "step": 231635 }, { "epoch": 0.966527860069598, "grad_norm": 0.2675490817154659, "learning_rate": 2.034502238293467e-06, "loss": 0.0181, "step": 231640 }, { "epoch": 0.9665487227845883, "grad_norm": 0.6246135462053334, "learning_rate": 2.0344802809665233e-06, "loss": 0.0187, "step": 231645 }, { "epoch": 0.9665695854995786, "grad_norm": 0.7520862297712161, "learning_rate": 2.0344583243504863e-06, "loss": 0.0206, "step": 231650 }, { "epoch": 0.9665904482145689, "grad_norm": 0.9452070503639249, "learning_rate": 2.0344363684453176e-06, "loss": 0.0256, "step": 231655 }, { "epoch": 0.9666113109295591, "grad_norm": 0.9137882937454557, "learning_rate": 2.0344144132509793e-06, "loss": 0.015, "step": 231660 }, { "epoch": 0.9666321736445495, "grad_norm": 0.6461803758079561, "learning_rate": 2.0343924587674325e-06, "loss": 0.0236, "step": 231665 }, { "epoch": 0.9666530363595397, "grad_norm": 0.5940782772359314, "learning_rate": 2.034370504994639e-06, "loss": 0.0192, "step": 231670 }, { "epoch": 0.9666738990745299, "grad_norm": 0.5318731987198158, "learning_rate": 2.0343485519325606e-06, "loss": 0.0141, "step": 231675 }, { "epoch": 0.9666947617895203, "grad_norm": 0.6160466659678225, "learning_rate": 2.034326599581159e-06, "loss": 0.02, "step": 231680 }, { "epoch": 0.9667156245045105, "grad_norm": 0.5716515335460172, "learning_rate": 2.0343046479403956e-06, "loss": 0.0189, "step": 231685 }, { "epoch": 0.9667364872195008, "grad_norm": 0.5131557849195562, "learning_rate": 2.0342826970102317e-06, "loss": 0.0221, "step": 231690 }, { "epoch": 0.966757349934491, "grad_norm": 0.8041488465652399, "learning_rate": 2.0342607467906297e-06, "loss": 0.0281, "step": 231695 }, { "epoch": 0.9667782126494814, "grad_norm": 0.8238874947526358, "learning_rate": 2.034238797281551e-06, "loss": 0.02, "step": 231700 }, { "epoch": 0.9667990753644716, "grad_norm": 0.6308441031447745, "learning_rate": 2.034216848482958e-06, "loss": 0.0222, "step": 231705 }, { "epoch": 0.9668199380794619, "grad_norm": 0.7113562967072415, "learning_rate": 2.0341949003948106e-06, "loss": 0.0257, "step": 231710 }, { "epoch": 0.9668408007944522, "grad_norm": 0.949054650451834, "learning_rate": 2.034172953017072e-06, "loss": 0.0329, "step": 231715 }, { "epoch": 0.9668616635094425, "grad_norm": 0.832530113365386, "learning_rate": 2.034151006349703e-06, "loss": 0.0171, "step": 231720 }, { "epoch": 0.9668825262244327, "grad_norm": 2.122135375809579, "learning_rate": 2.034129060392666e-06, "loss": 0.0181, "step": 231725 }, { "epoch": 0.9669033889394231, "grad_norm": 0.5463281508865413, "learning_rate": 2.034107115145922e-06, "loss": 0.0284, "step": 231730 }, { "epoch": 0.9669242516544133, "grad_norm": 0.6099219949242497, "learning_rate": 2.0340851706094334e-06, "loss": 0.0195, "step": 231735 }, { "epoch": 0.9669451143694036, "grad_norm": 0.7720415051237338, "learning_rate": 2.0340632267831615e-06, "loss": 0.0237, "step": 231740 }, { "epoch": 0.9669659770843938, "grad_norm": 0.5125427346034069, "learning_rate": 2.0340412836670675e-06, "loss": 0.0233, "step": 231745 }, { "epoch": 0.9669868397993842, "grad_norm": 0.6638680418148207, "learning_rate": 2.034019341261114e-06, "loss": 0.0219, "step": 231750 }, { "epoch": 0.9670077025143744, "grad_norm": 0.7206442126080251, "learning_rate": 2.0339973995652627e-06, "loss": 0.0206, "step": 231755 }, { "epoch": 0.9670285652293646, "grad_norm": 0.7764482778075156, "learning_rate": 2.033975458579474e-06, "loss": 0.0221, "step": 231760 }, { "epoch": 0.967049427944355, "grad_norm": 1.3210562272552475, "learning_rate": 2.0339535183037114e-06, "loss": 0.0253, "step": 231765 }, { "epoch": 0.9670702906593452, "grad_norm": 0.30215836011852293, "learning_rate": 2.0339315787379352e-06, "loss": 0.0207, "step": 231770 }, { "epoch": 0.9670911533743355, "grad_norm": 0.514586030234323, "learning_rate": 2.0339096398821074e-06, "loss": 0.0226, "step": 231775 }, { "epoch": 0.9671120160893258, "grad_norm": 0.3914817266612593, "learning_rate": 2.0338877017361905e-06, "loss": 0.0165, "step": 231780 }, { "epoch": 0.9671328788043161, "grad_norm": 0.5395479585191127, "learning_rate": 2.0338657643001453e-06, "loss": 0.022, "step": 231785 }, { "epoch": 0.9671537415193063, "grad_norm": 0.5529005025299778, "learning_rate": 2.033843827573934e-06, "loss": 0.0156, "step": 231790 }, { "epoch": 0.9671746042342967, "grad_norm": 0.49293189635268136, "learning_rate": 2.0338218915575177e-06, "loss": 0.0217, "step": 231795 }, { "epoch": 0.9671954669492869, "grad_norm": 0.6891304428360749, "learning_rate": 2.033799956250859e-06, "loss": 0.021, "step": 231800 }, { "epoch": 0.9672163296642772, "grad_norm": 0.39624090493382824, "learning_rate": 2.0337780216539197e-06, "loss": 0.0171, "step": 231805 }, { "epoch": 0.9672371923792674, "grad_norm": 0.49555685809750255, "learning_rate": 2.0337560877666605e-06, "loss": 0.0182, "step": 231810 }, { "epoch": 0.9672580550942578, "grad_norm": 0.6241668952430266, "learning_rate": 2.0337341545890438e-06, "loss": 0.024, "step": 231815 }, { "epoch": 0.967278917809248, "grad_norm": 0.7687097715447129, "learning_rate": 2.0337122221210315e-06, "loss": 0.0203, "step": 231820 }, { "epoch": 0.9672997805242383, "grad_norm": 0.41836568709260635, "learning_rate": 2.0336902903625846e-06, "loss": 0.0181, "step": 231825 }, { "epoch": 0.9673206432392286, "grad_norm": 0.8610695799825544, "learning_rate": 2.0336683593136654e-06, "loss": 0.0235, "step": 231830 }, { "epoch": 0.9673415059542189, "grad_norm": 0.7550575615547549, "learning_rate": 2.033646428974236e-06, "loss": 0.0174, "step": 231835 }, { "epoch": 0.9673623686692091, "grad_norm": 0.38807735006154065, "learning_rate": 2.0336244993442574e-06, "loss": 0.0249, "step": 231840 }, { "epoch": 0.9673832313841995, "grad_norm": 0.2280458391173381, "learning_rate": 2.0336025704236915e-06, "loss": 0.0181, "step": 231845 }, { "epoch": 0.9674040940991897, "grad_norm": 0.5147088095196982, "learning_rate": 2.0335806422125e-06, "loss": 0.0191, "step": 231850 }, { "epoch": 0.96742495681418, "grad_norm": 0.44507323057658815, "learning_rate": 2.0335587147106452e-06, "loss": 0.0187, "step": 231855 }, { "epoch": 0.9674458195291703, "grad_norm": 0.6647419862449139, "learning_rate": 2.0335367879180882e-06, "loss": 0.0279, "step": 231860 }, { "epoch": 0.9674666822441605, "grad_norm": 0.6610458360170975, "learning_rate": 2.0335148618347917e-06, "loss": 0.0122, "step": 231865 }, { "epoch": 0.9674875449591508, "grad_norm": 0.538010243359526, "learning_rate": 2.0334929364607163e-06, "loss": 0.0212, "step": 231870 }, { "epoch": 0.967508407674141, "grad_norm": 0.9333251953783201, "learning_rate": 2.0334710117958244e-06, "loss": 0.018, "step": 231875 }, { "epoch": 0.9675292703891314, "grad_norm": 0.4974349554407704, "learning_rate": 2.0334490878400777e-06, "loss": 0.0168, "step": 231880 }, { "epoch": 0.9675501331041216, "grad_norm": 0.4525645202529348, "learning_rate": 2.0334271645934382e-06, "loss": 0.018, "step": 231885 }, { "epoch": 0.9675709958191119, "grad_norm": 0.5240221911226296, "learning_rate": 2.033405242055867e-06, "loss": 0.0189, "step": 231890 }, { "epoch": 0.9675918585341022, "grad_norm": 0.6359441998173417, "learning_rate": 2.0333833202273266e-06, "loss": 0.0144, "step": 231895 }, { "epoch": 0.9676127212490925, "grad_norm": 0.6681071189792737, "learning_rate": 2.0333613991077783e-06, "loss": 0.0199, "step": 231900 }, { "epoch": 0.9676335839640827, "grad_norm": 0.8881870006630248, "learning_rate": 2.0333394786971843e-06, "loss": 0.0264, "step": 231905 }, { "epoch": 0.9676544466790731, "grad_norm": 0.5195042568610315, "learning_rate": 2.033317558995506e-06, "loss": 0.0213, "step": 231910 }, { "epoch": 0.9676753093940633, "grad_norm": 2.593085206582026, "learning_rate": 2.033295640002705e-06, "loss": 0.0235, "step": 231915 }, { "epoch": 0.9676961721090536, "grad_norm": 0.6478519378426773, "learning_rate": 2.033273721718744e-06, "loss": 0.0199, "step": 231920 }, { "epoch": 0.9677170348240438, "grad_norm": 0.4110330743702046, "learning_rate": 2.033251804143584e-06, "loss": 0.0165, "step": 231925 }, { "epoch": 0.9677378975390342, "grad_norm": 0.505574696707615, "learning_rate": 2.033229887277187e-06, "loss": 0.0212, "step": 231930 }, { "epoch": 0.9677587602540244, "grad_norm": 0.30879940484209084, "learning_rate": 2.0332079711195147e-06, "loss": 0.0187, "step": 231935 }, { "epoch": 0.9677796229690147, "grad_norm": 0.7748380085660047, "learning_rate": 2.0331860556705293e-06, "loss": 0.0174, "step": 231940 }, { "epoch": 0.967800485684005, "grad_norm": 0.8374064305149663, "learning_rate": 2.033164140930192e-06, "loss": 0.0274, "step": 231945 }, { "epoch": 0.9678213483989953, "grad_norm": 0.4498856073359845, "learning_rate": 2.0331422268984654e-06, "loss": 0.0312, "step": 231950 }, { "epoch": 0.9678422111139855, "grad_norm": 0.6426734106890742, "learning_rate": 2.0331203135753107e-06, "loss": 0.0156, "step": 231955 }, { "epoch": 0.9678630738289759, "grad_norm": 0.35681114030779204, "learning_rate": 2.0330984009606897e-06, "loss": 0.02, "step": 231960 }, { "epoch": 0.9678839365439661, "grad_norm": 0.6052306060310193, "learning_rate": 2.0330764890545644e-06, "loss": 0.0168, "step": 231965 }, { "epoch": 0.9679047992589563, "grad_norm": 0.29887546971703133, "learning_rate": 2.0330545778568966e-06, "loss": 0.0139, "step": 231970 }, { "epoch": 0.9679256619739467, "grad_norm": 0.5243686899698011, "learning_rate": 2.033032667367648e-06, "loss": 0.018, "step": 231975 }, { "epoch": 0.9679465246889369, "grad_norm": 1.0376281383435515, "learning_rate": 2.033010757586781e-06, "loss": 0.0224, "step": 231980 }, { "epoch": 0.9679673874039272, "grad_norm": 0.6748098316177435, "learning_rate": 2.0329888485142566e-06, "loss": 0.024, "step": 231985 }, { "epoch": 0.9679882501189174, "grad_norm": 0.4733011910507366, "learning_rate": 2.0329669401500373e-06, "loss": 0.018, "step": 231990 }, { "epoch": 0.9680091128339078, "grad_norm": 0.6977177900667707, "learning_rate": 2.032945032494085e-06, "loss": 0.0219, "step": 231995 }, { "epoch": 0.968029975548898, "grad_norm": 0.43455639463232537, "learning_rate": 2.0329231255463604e-06, "loss": 0.0231, "step": 232000 }, { "epoch": 0.9680508382638883, "grad_norm": 0.558894228808013, "learning_rate": 2.0329012193068266e-06, "loss": 0.018, "step": 232005 }, { "epoch": 0.9680717009788786, "grad_norm": 3.270670258177094, "learning_rate": 2.032879313775445e-06, "loss": 0.0381, "step": 232010 }, { "epoch": 0.9680925636938689, "grad_norm": 0.49754383801609914, "learning_rate": 2.032857408952177e-06, "loss": 0.0148, "step": 232015 }, { "epoch": 0.9681134264088591, "grad_norm": 0.7076839490687145, "learning_rate": 2.0328355048369853e-06, "loss": 0.0201, "step": 232020 }, { "epoch": 0.9681342891238495, "grad_norm": 1.0058739896883604, "learning_rate": 2.0328136014298315e-06, "loss": 0.0258, "step": 232025 }, { "epoch": 0.9681551518388397, "grad_norm": 0.49791396611510647, "learning_rate": 2.0327916987306773e-06, "loss": 0.0206, "step": 232030 }, { "epoch": 0.96817601455383, "grad_norm": 0.15075058815372652, "learning_rate": 2.0327697967394843e-06, "loss": 0.0107, "step": 232035 }, { "epoch": 0.9681968772688203, "grad_norm": 0.46077590643527455, "learning_rate": 2.0327478954562147e-06, "loss": 0.0185, "step": 232040 }, { "epoch": 0.9682177399838106, "grad_norm": 0.6873155697791574, "learning_rate": 2.0327259948808305e-06, "loss": 0.0179, "step": 232045 }, { "epoch": 0.9682386026988008, "grad_norm": 0.3031555792374832, "learning_rate": 2.0327040950132927e-06, "loss": 0.0122, "step": 232050 }, { "epoch": 0.968259465413791, "grad_norm": 0.27913356946097495, "learning_rate": 2.0326821958535643e-06, "loss": 0.0224, "step": 232055 }, { "epoch": 0.9682803281287814, "grad_norm": 0.3720988483983248, "learning_rate": 2.032660297401607e-06, "loss": 0.0181, "step": 232060 }, { "epoch": 0.9683011908437716, "grad_norm": 0.3056876885951934, "learning_rate": 2.0326383996573817e-06, "loss": 0.0206, "step": 232065 }, { "epoch": 0.9683220535587619, "grad_norm": 0.3330905291399803, "learning_rate": 2.032616502620851e-06, "loss": 0.0201, "step": 232070 }, { "epoch": 0.9683429162737522, "grad_norm": 0.6123443765956267, "learning_rate": 2.0325946062919773e-06, "loss": 0.0157, "step": 232075 }, { "epoch": 0.9683637789887425, "grad_norm": 0.7535041174988398, "learning_rate": 2.0325727106707215e-06, "loss": 0.0171, "step": 232080 }, { "epoch": 0.9683846417037327, "grad_norm": 0.46009674695990993, "learning_rate": 2.032550815757046e-06, "loss": 0.0157, "step": 232085 }, { "epoch": 0.9684055044187231, "grad_norm": 1.8564502377872083, "learning_rate": 2.0325289215509126e-06, "loss": 0.0291, "step": 232090 }, { "epoch": 0.9684263671337133, "grad_norm": 0.37996942091670327, "learning_rate": 2.032507028052283e-06, "loss": 0.0251, "step": 232095 }, { "epoch": 0.9684472298487036, "grad_norm": 0.6622427561047719, "learning_rate": 2.0324851352611195e-06, "loss": 0.0217, "step": 232100 }, { "epoch": 0.9684680925636938, "grad_norm": 0.3606970660511381, "learning_rate": 2.0324632431773837e-06, "loss": 0.0192, "step": 232105 }, { "epoch": 0.9684889552786842, "grad_norm": 0.5630726434643228, "learning_rate": 2.032441351801038e-06, "loss": 0.0236, "step": 232110 }, { "epoch": 0.9685098179936744, "grad_norm": 0.8176824769329972, "learning_rate": 2.032419461132043e-06, "loss": 0.0212, "step": 232115 }, { "epoch": 0.9685306807086647, "grad_norm": 0.46039010477756254, "learning_rate": 2.032397571170362e-06, "loss": 0.018, "step": 232120 }, { "epoch": 0.968551543423655, "grad_norm": 0.5810158856225479, "learning_rate": 2.032375681915957e-06, "loss": 0.0213, "step": 232125 }, { "epoch": 0.9685724061386453, "grad_norm": 0.7185403163065005, "learning_rate": 2.032353793368788e-06, "loss": 0.0179, "step": 232130 }, { "epoch": 0.9685932688536355, "grad_norm": 1.0061429745640762, "learning_rate": 2.032331905528819e-06, "loss": 0.0315, "step": 232135 }, { "epoch": 0.9686141315686259, "grad_norm": 0.45358154257799005, "learning_rate": 2.0323100183960113e-06, "loss": 0.0183, "step": 232140 }, { "epoch": 0.9686349942836161, "grad_norm": 0.8566638160697899, "learning_rate": 2.032288131970327e-06, "loss": 0.0158, "step": 232145 }, { "epoch": 0.9686558569986063, "grad_norm": 0.350384943684722, "learning_rate": 2.032266246251727e-06, "loss": 0.0212, "step": 232150 }, { "epoch": 0.9686767197135967, "grad_norm": 0.7379560006541014, "learning_rate": 2.032244361240174e-06, "loss": 0.022, "step": 232155 }, { "epoch": 0.968697582428587, "grad_norm": 0.7394661703896482, "learning_rate": 2.03222247693563e-06, "loss": 0.0226, "step": 232160 }, { "epoch": 0.9687184451435772, "grad_norm": 0.8448214595366448, "learning_rate": 2.032200593338057e-06, "loss": 0.0153, "step": 232165 }, { "epoch": 0.9687393078585674, "grad_norm": 0.8790965072064693, "learning_rate": 2.0321787104474166e-06, "loss": 0.0199, "step": 232170 }, { "epoch": 0.9687601705735578, "grad_norm": 0.5865845423611751, "learning_rate": 2.0321568282636707e-06, "loss": 0.026, "step": 232175 }, { "epoch": 0.968781033288548, "grad_norm": 1.0631874993661534, "learning_rate": 2.0321349467867817e-06, "loss": 0.0227, "step": 232180 }, { "epoch": 0.9688018960035383, "grad_norm": 0.17831072212778837, "learning_rate": 2.032113066016711e-06, "loss": 0.0213, "step": 232185 }, { "epoch": 0.9688227587185286, "grad_norm": 0.4251491576233046, "learning_rate": 2.032091185953421e-06, "loss": 0.0255, "step": 232190 }, { "epoch": 0.9688436214335189, "grad_norm": 0.430280306288626, "learning_rate": 2.0320693065968734e-06, "loss": 0.0206, "step": 232195 }, { "epoch": 0.9688644841485091, "grad_norm": 0.9938408610968695, "learning_rate": 2.03204742794703e-06, "loss": 0.0167, "step": 232200 }, { "epoch": 0.9688853468634995, "grad_norm": 1.1011764421975216, "learning_rate": 2.0320255500038526e-06, "loss": 0.0181, "step": 232205 }, { "epoch": 0.9689062095784897, "grad_norm": 0.6000817591835496, "learning_rate": 2.0320036727673042e-06, "loss": 0.025, "step": 232210 }, { "epoch": 0.96892707229348, "grad_norm": 0.9564993219399199, "learning_rate": 2.0319817962373465e-06, "loss": 0.0231, "step": 232215 }, { "epoch": 0.9689479350084702, "grad_norm": 0.70801554660123, "learning_rate": 2.03195992041394e-06, "loss": 0.02, "step": 232220 }, { "epoch": 0.9689687977234606, "grad_norm": 0.7590597150287717, "learning_rate": 2.0319380452970483e-06, "loss": 0.025, "step": 232225 }, { "epoch": 0.9689896604384508, "grad_norm": 0.5000610381052764, "learning_rate": 2.031916170886633e-06, "loss": 0.0241, "step": 232230 }, { "epoch": 0.969010523153441, "grad_norm": 0.5283834392882699, "learning_rate": 2.0318942971826554e-06, "loss": 0.0149, "step": 232235 }, { "epoch": 0.9690313858684314, "grad_norm": 0.4137397294056634, "learning_rate": 2.031872424185078e-06, "loss": 0.0176, "step": 232240 }, { "epoch": 0.9690522485834216, "grad_norm": 0.531467889052341, "learning_rate": 2.031850551893863e-06, "loss": 0.0219, "step": 232245 }, { "epoch": 0.9690731112984119, "grad_norm": 0.7979719463874537, "learning_rate": 2.031828680308972e-06, "loss": 0.0277, "step": 232250 }, { "epoch": 0.9690939740134022, "grad_norm": 1.159900921574944, "learning_rate": 2.0318068094303666e-06, "loss": 0.0282, "step": 232255 }, { "epoch": 0.9691148367283925, "grad_norm": 0.877727952674098, "learning_rate": 2.03178493925801e-06, "loss": 0.0271, "step": 232260 }, { "epoch": 0.9691356994433827, "grad_norm": 0.42840244743586914, "learning_rate": 2.0317630697918635e-06, "loss": 0.019, "step": 232265 }, { "epoch": 0.9691565621583731, "grad_norm": 0.7420359768304925, "learning_rate": 2.0317412010318885e-06, "loss": 0.0185, "step": 232270 }, { "epoch": 0.9691774248733633, "grad_norm": 0.5242072832109341, "learning_rate": 2.0317193329780476e-06, "loss": 0.0212, "step": 232275 }, { "epoch": 0.9691982875883536, "grad_norm": 0.8827356557208188, "learning_rate": 2.031697465630303e-06, "loss": 0.0142, "step": 232280 }, { "epoch": 0.9692191503033438, "grad_norm": 0.6750597669160607, "learning_rate": 2.031675598988617e-06, "loss": 0.0204, "step": 232285 }, { "epoch": 0.9692400130183342, "grad_norm": 0.817195541408315, "learning_rate": 2.03165373305295e-06, "loss": 0.0234, "step": 232290 }, { "epoch": 0.9692608757333244, "grad_norm": 0.8474384640984248, "learning_rate": 2.0316318678232655e-06, "loss": 0.0165, "step": 232295 }, { "epoch": 0.9692817384483147, "grad_norm": 0.5369554004412549, "learning_rate": 2.0316100032995254e-06, "loss": 0.0157, "step": 232300 }, { "epoch": 0.969302601163305, "grad_norm": 0.5899667552613987, "learning_rate": 2.031588139481691e-06, "loss": 0.0158, "step": 232305 }, { "epoch": 0.9693234638782953, "grad_norm": 0.5203591503244568, "learning_rate": 2.0315662763697246e-06, "loss": 0.0196, "step": 232310 }, { "epoch": 0.9693443265932855, "grad_norm": 0.3756945189492654, "learning_rate": 2.031544413963589e-06, "loss": 0.0145, "step": 232315 }, { "epoch": 0.9693651893082759, "grad_norm": 0.7246743657054531, "learning_rate": 2.031522552263245e-06, "loss": 0.0232, "step": 232320 }, { "epoch": 0.9693860520232661, "grad_norm": 0.6502606364703847, "learning_rate": 2.031500691268655e-06, "loss": 0.0217, "step": 232325 }, { "epoch": 0.9694069147382564, "grad_norm": 0.700192302539982, "learning_rate": 2.0314788309797817e-06, "loss": 0.0189, "step": 232330 }, { "epoch": 0.9694277774532467, "grad_norm": 1.128130162250498, "learning_rate": 2.0314569713965866e-06, "loss": 0.0223, "step": 232335 }, { "epoch": 0.969448640168237, "grad_norm": 1.005662960008763, "learning_rate": 2.0314351125190318e-06, "loss": 0.02, "step": 232340 }, { "epoch": 0.9694695028832272, "grad_norm": 0.38186023403792624, "learning_rate": 2.0314132543470787e-06, "loss": 0.0195, "step": 232345 }, { "epoch": 0.9694903655982174, "grad_norm": 0.7425034375134635, "learning_rate": 2.0313913968806914e-06, "loss": 0.0224, "step": 232350 }, { "epoch": 0.9695112283132078, "grad_norm": 0.9925933438797864, "learning_rate": 2.031369540119829e-06, "loss": 0.0193, "step": 232355 }, { "epoch": 0.969532091028198, "grad_norm": 0.9707575125112832, "learning_rate": 2.0313476840644555e-06, "loss": 0.024, "step": 232360 }, { "epoch": 0.9695529537431883, "grad_norm": 0.5092221545713259, "learning_rate": 2.0313258287145325e-06, "loss": 0.0205, "step": 232365 }, { "epoch": 0.9695738164581786, "grad_norm": 0.4878285928582417, "learning_rate": 2.0313039740700223e-06, "loss": 0.0187, "step": 232370 }, { "epoch": 0.9695946791731689, "grad_norm": 0.3589801846298449, "learning_rate": 2.0312821201308864e-06, "loss": 0.0297, "step": 232375 }, { "epoch": 0.9696155418881591, "grad_norm": 0.4322666171115674, "learning_rate": 2.031260266897087e-06, "loss": 0.0233, "step": 232380 }, { "epoch": 0.9696364046031495, "grad_norm": 0.6707306044592495, "learning_rate": 2.0312384143685867e-06, "loss": 0.0254, "step": 232385 }, { "epoch": 0.9696572673181397, "grad_norm": 0.5334030649664083, "learning_rate": 2.031216562545347e-06, "loss": 0.0129, "step": 232390 }, { "epoch": 0.96967813003313, "grad_norm": 0.3958295216772228, "learning_rate": 2.0311947114273304e-06, "loss": 0.028, "step": 232395 }, { "epoch": 0.9696989927481202, "grad_norm": 0.703476963247875, "learning_rate": 2.031172861014498e-06, "loss": 0.0168, "step": 232400 }, { "epoch": 0.9697198554631106, "grad_norm": 0.5983539317779867, "learning_rate": 2.0311510113068135e-06, "loss": 0.0237, "step": 232405 }, { "epoch": 0.9697407181781008, "grad_norm": 0.6808901136895517, "learning_rate": 2.0311291623042373e-06, "loss": 0.0226, "step": 232410 }, { "epoch": 0.9697615808930911, "grad_norm": 0.4971590839137782, "learning_rate": 2.0311073140067326e-06, "loss": 0.0182, "step": 232415 }, { "epoch": 0.9697824436080814, "grad_norm": 0.5923080687896822, "learning_rate": 2.031085466414261e-06, "loss": 0.02, "step": 232420 }, { "epoch": 0.9698033063230717, "grad_norm": 0.715216459207944, "learning_rate": 2.031063619526785e-06, "loss": 0.0255, "step": 232425 }, { "epoch": 0.9698241690380619, "grad_norm": 0.7807608314670069, "learning_rate": 2.031041773344266e-06, "loss": 0.0241, "step": 232430 }, { "epoch": 0.9698450317530523, "grad_norm": 0.8575260981325057, "learning_rate": 2.031019927866666e-06, "loss": 0.0207, "step": 232435 }, { "epoch": 0.9698658944680425, "grad_norm": 0.8514348231898589, "learning_rate": 2.0309980830939487e-06, "loss": 0.0175, "step": 232440 }, { "epoch": 0.9698867571830327, "grad_norm": 1.0866420943086965, "learning_rate": 2.030976239026074e-06, "loss": 0.0195, "step": 232445 }, { "epoch": 0.9699076198980231, "grad_norm": 1.006887736720046, "learning_rate": 2.0309543956630053e-06, "loss": 0.0244, "step": 232450 }, { "epoch": 0.9699284826130133, "grad_norm": 0.2551566908433112, "learning_rate": 2.030932553004705e-06, "loss": 0.0175, "step": 232455 }, { "epoch": 0.9699493453280036, "grad_norm": 1.1760370950739525, "learning_rate": 2.030910711051134e-06, "loss": 0.0193, "step": 232460 }, { "epoch": 0.9699702080429938, "grad_norm": 0.984787234037517, "learning_rate": 2.0308888698022555e-06, "loss": 0.0157, "step": 232465 }, { "epoch": 0.9699910707579842, "grad_norm": 0.4933138910139166, "learning_rate": 2.030867029258031e-06, "loss": 0.022, "step": 232470 }, { "epoch": 0.9700119334729744, "grad_norm": 0.7163850634406018, "learning_rate": 2.0308451894184224e-06, "loss": 0.024, "step": 232475 }, { "epoch": 0.9700327961879647, "grad_norm": 0.754215290908403, "learning_rate": 2.0308233502833926e-06, "loss": 0.021, "step": 232480 }, { "epoch": 0.970053658902955, "grad_norm": 0.8141582847834312, "learning_rate": 2.0308015118529035e-06, "loss": 0.019, "step": 232485 }, { "epoch": 0.9700745216179453, "grad_norm": 0.6045987464401232, "learning_rate": 2.0307796741269163e-06, "loss": 0.0153, "step": 232490 }, { "epoch": 0.9700953843329355, "grad_norm": 0.7097733052510224, "learning_rate": 2.0307578371053946e-06, "loss": 0.0154, "step": 232495 }, { "epoch": 0.9701162470479259, "grad_norm": 0.49651125714306665, "learning_rate": 2.0307360007882993e-06, "loss": 0.0214, "step": 232500 }, { "epoch": 0.9701371097629161, "grad_norm": 0.8329201390389108, "learning_rate": 2.0307141651755937e-06, "loss": 0.0231, "step": 232505 }, { "epoch": 0.9701579724779064, "grad_norm": 0.2708827897706271, "learning_rate": 2.0306923302672387e-06, "loss": 0.021, "step": 232510 }, { "epoch": 0.9701788351928967, "grad_norm": 0.5908140563533802, "learning_rate": 2.0306704960631963e-06, "loss": 0.0165, "step": 232515 }, { "epoch": 0.970199697907887, "grad_norm": 0.7530179890515277, "learning_rate": 2.0306486625634304e-06, "loss": 0.0237, "step": 232520 }, { "epoch": 0.9702205606228772, "grad_norm": 0.39050903703486173, "learning_rate": 2.030626829767901e-06, "loss": 0.0261, "step": 232525 }, { "epoch": 0.9702414233378674, "grad_norm": 0.44034335923003254, "learning_rate": 2.030604997676572e-06, "loss": 0.0211, "step": 232530 }, { "epoch": 0.9702622860528578, "grad_norm": 0.9201644213153852, "learning_rate": 2.0305831662894047e-06, "loss": 0.0228, "step": 232535 }, { "epoch": 0.970283148767848, "grad_norm": 0.3710013063578269, "learning_rate": 2.0305613356063614e-06, "loss": 0.016, "step": 232540 }, { "epoch": 0.9703040114828383, "grad_norm": 0.6461577107424946, "learning_rate": 2.030539505627404e-06, "loss": 0.0172, "step": 232545 }, { "epoch": 0.9703248741978286, "grad_norm": 0.6748620767982534, "learning_rate": 2.0305176763524953e-06, "loss": 0.0162, "step": 232550 }, { "epoch": 0.9703457369128189, "grad_norm": 0.7128150716984165, "learning_rate": 2.030495847781597e-06, "loss": 0.0211, "step": 232555 }, { "epoch": 0.9703665996278091, "grad_norm": 0.8874216594098548, "learning_rate": 2.0304740199146704e-06, "loss": 0.0216, "step": 232560 }, { "epoch": 0.9703874623427995, "grad_norm": 0.5641284917468063, "learning_rate": 2.0304521927516796e-06, "loss": 0.0174, "step": 232565 }, { "epoch": 0.9704083250577897, "grad_norm": 0.7083368260544365, "learning_rate": 2.030430366292585e-06, "loss": 0.0206, "step": 232570 }, { "epoch": 0.97042918777278, "grad_norm": 0.5953142577472648, "learning_rate": 2.03040854053735e-06, "loss": 0.019, "step": 232575 }, { "epoch": 0.9704500504877702, "grad_norm": 0.5297791467891567, "learning_rate": 2.030386715485936e-06, "loss": 0.0197, "step": 232580 }, { "epoch": 0.9704709132027606, "grad_norm": 0.6597396567227052, "learning_rate": 2.030364891138305e-06, "loss": 0.0134, "step": 232585 }, { "epoch": 0.9704917759177508, "grad_norm": 0.7676467966566228, "learning_rate": 2.0303430674944207e-06, "loss": 0.0221, "step": 232590 }, { "epoch": 0.9705126386327411, "grad_norm": 0.6444719572306061, "learning_rate": 2.030321244554243e-06, "loss": 0.017, "step": 232595 }, { "epoch": 0.9705335013477314, "grad_norm": 1.4228152219430308, "learning_rate": 2.030299422317736e-06, "loss": 0.0263, "step": 232600 }, { "epoch": 0.9705543640627217, "grad_norm": 0.9785685631693346, "learning_rate": 2.0302776007848615e-06, "loss": 0.0201, "step": 232605 }, { "epoch": 0.9705752267777119, "grad_norm": 0.9930971268837644, "learning_rate": 2.0302557799555807e-06, "loss": 0.0217, "step": 232610 }, { "epoch": 0.9705960894927023, "grad_norm": 0.823293212364438, "learning_rate": 2.030233959829856e-06, "loss": 0.0217, "step": 232615 }, { "epoch": 0.9706169522076925, "grad_norm": 0.5453375878507996, "learning_rate": 2.0302121404076508e-06, "loss": 0.0206, "step": 232620 }, { "epoch": 0.9706378149226828, "grad_norm": 0.7934543671007993, "learning_rate": 2.0301903216889266e-06, "loss": 0.019, "step": 232625 }, { "epoch": 0.9706586776376731, "grad_norm": 0.750219718285436, "learning_rate": 2.030168503673645e-06, "loss": 0.0148, "step": 232630 }, { "epoch": 0.9706795403526634, "grad_norm": 0.7180281651216449, "learning_rate": 2.030146686361769e-06, "loss": 0.0191, "step": 232635 }, { "epoch": 0.9707004030676536, "grad_norm": 0.5081129208848133, "learning_rate": 2.0301248697532606e-06, "loss": 0.0146, "step": 232640 }, { "epoch": 0.9707212657826438, "grad_norm": 0.5968552260711559, "learning_rate": 2.030103053848082e-06, "loss": 0.0213, "step": 232645 }, { "epoch": 0.9707421284976342, "grad_norm": 0.3999338968461745, "learning_rate": 2.0300812386461953e-06, "loss": 0.0236, "step": 232650 }, { "epoch": 0.9707629912126244, "grad_norm": 0.5746802576005116, "learning_rate": 2.030059424147562e-06, "loss": 0.0172, "step": 232655 }, { "epoch": 0.9707838539276147, "grad_norm": 0.5137794511073879, "learning_rate": 2.0300376103521462e-06, "loss": 0.0145, "step": 232660 }, { "epoch": 0.970804716642605, "grad_norm": 0.5634985295905017, "learning_rate": 2.0300157972599083e-06, "loss": 0.0205, "step": 232665 }, { "epoch": 0.9708255793575953, "grad_norm": 0.5785210707927737, "learning_rate": 2.0299939848708115e-06, "loss": 0.0214, "step": 232670 }, { "epoch": 0.9708464420725855, "grad_norm": 0.7260786807908095, "learning_rate": 2.0299721731848177e-06, "loss": 0.0195, "step": 232675 }, { "epoch": 0.9708673047875759, "grad_norm": 0.6392292053674227, "learning_rate": 2.0299503622018892e-06, "loss": 0.0197, "step": 232680 }, { "epoch": 0.9708881675025661, "grad_norm": 0.3530358055852073, "learning_rate": 2.029928551921988e-06, "loss": 0.0192, "step": 232685 }, { "epoch": 0.9709090302175564, "grad_norm": 0.6913595266454298, "learning_rate": 2.029906742345077e-06, "loss": 0.0184, "step": 232690 }, { "epoch": 0.9709298929325467, "grad_norm": 0.6890464241115456, "learning_rate": 2.0298849334711177e-06, "loss": 0.0247, "step": 232695 }, { "epoch": 0.970950755647537, "grad_norm": 0.5424225384096739, "learning_rate": 2.029863125300072e-06, "loss": 0.0216, "step": 232700 }, { "epoch": 0.9709716183625272, "grad_norm": 0.6489815872664157, "learning_rate": 2.0298413178319035e-06, "loss": 0.0146, "step": 232705 }, { "epoch": 0.9709924810775175, "grad_norm": 0.5490658549214543, "learning_rate": 2.0298195110665736e-06, "loss": 0.0151, "step": 232710 }, { "epoch": 0.9710133437925078, "grad_norm": 0.9127662816337669, "learning_rate": 2.0297977050040442e-06, "loss": 0.0235, "step": 232715 }, { "epoch": 0.971034206507498, "grad_norm": 1.612438516383776, "learning_rate": 2.0297758996442786e-06, "loss": 0.0193, "step": 232720 }, { "epoch": 0.9710550692224883, "grad_norm": 0.3417604736608984, "learning_rate": 2.029754094987238e-06, "loss": 0.0176, "step": 232725 }, { "epoch": 0.9710759319374787, "grad_norm": 0.7671021482516484, "learning_rate": 2.0297322910328854e-06, "loss": 0.0204, "step": 232730 }, { "epoch": 0.9710967946524689, "grad_norm": 0.8685108501568369, "learning_rate": 2.0297104877811823e-06, "loss": 0.0245, "step": 232735 }, { "epoch": 0.9711176573674591, "grad_norm": 0.5149732355781369, "learning_rate": 2.029688685232092e-06, "loss": 0.0163, "step": 232740 }, { "epoch": 0.9711385200824495, "grad_norm": 1.359441549315002, "learning_rate": 2.029666883385576e-06, "loss": 0.0257, "step": 232745 }, { "epoch": 0.9711593827974397, "grad_norm": 0.644084607722654, "learning_rate": 2.0296450822415965e-06, "loss": 0.0212, "step": 232750 }, { "epoch": 0.97118024551243, "grad_norm": 0.4666632355003975, "learning_rate": 2.0296232818001163e-06, "loss": 0.0218, "step": 232755 }, { "epoch": 0.9712011082274202, "grad_norm": 0.6655288236639968, "learning_rate": 2.0296014820610974e-06, "loss": 0.0156, "step": 232760 }, { "epoch": 0.9712219709424106, "grad_norm": 0.568640093151586, "learning_rate": 2.029579683024502e-06, "loss": 0.0214, "step": 232765 }, { "epoch": 0.9712428336574008, "grad_norm": 0.5182138460393472, "learning_rate": 2.0295578846902926e-06, "loss": 0.0215, "step": 232770 }, { "epoch": 0.9712636963723911, "grad_norm": 0.6740819408101693, "learning_rate": 2.029536087058431e-06, "loss": 0.0169, "step": 232775 }, { "epoch": 0.9712845590873814, "grad_norm": 0.3508243244033054, "learning_rate": 2.02951429012888e-06, "loss": 0.0161, "step": 232780 }, { "epoch": 0.9713054218023717, "grad_norm": 1.4481411973198075, "learning_rate": 2.0294924939016013e-06, "loss": 0.0252, "step": 232785 }, { "epoch": 0.9713262845173619, "grad_norm": 0.6811603769870976, "learning_rate": 2.029470698376558e-06, "loss": 0.0215, "step": 232790 }, { "epoch": 0.9713471472323523, "grad_norm": 0.37401537488277625, "learning_rate": 2.0294489035537123e-06, "loss": 0.025, "step": 232795 }, { "epoch": 0.9713680099473425, "grad_norm": 0.4832290430569289, "learning_rate": 2.029427109433026e-06, "loss": 0.0192, "step": 232800 }, { "epoch": 0.9713888726623328, "grad_norm": 0.3928471180287866, "learning_rate": 2.029405316014461e-06, "loss": 0.0262, "step": 232805 }, { "epoch": 0.9714097353773231, "grad_norm": 0.8784643150488962, "learning_rate": 2.0293835232979807e-06, "loss": 0.0173, "step": 232810 }, { "epoch": 0.9714305980923134, "grad_norm": 0.2665385295182972, "learning_rate": 2.029361731283547e-06, "loss": 0.0236, "step": 232815 }, { "epoch": 0.9714514608073036, "grad_norm": 0.8350145481639409, "learning_rate": 2.029339939971122e-06, "loss": 0.0166, "step": 232820 }, { "epoch": 0.9714723235222938, "grad_norm": 0.7813913534443022, "learning_rate": 2.0293181493606678e-06, "loss": 0.0188, "step": 232825 }, { "epoch": 0.9714931862372842, "grad_norm": 1.6602322459715246, "learning_rate": 2.0292963594521477e-06, "loss": 0.0237, "step": 232830 }, { "epoch": 0.9715140489522744, "grad_norm": 0.29169143827274463, "learning_rate": 2.0292745702455227e-06, "loss": 0.0119, "step": 232835 }, { "epoch": 0.9715349116672647, "grad_norm": 0.47030755217519615, "learning_rate": 2.029252781740756e-06, "loss": 0.0145, "step": 232840 }, { "epoch": 0.971555774382255, "grad_norm": 0.5372418340072345, "learning_rate": 2.0292309939378095e-06, "loss": 0.0208, "step": 232845 }, { "epoch": 0.9715766370972453, "grad_norm": 0.6923074759319101, "learning_rate": 2.0292092068366455e-06, "loss": 0.0182, "step": 232850 }, { "epoch": 0.9715974998122355, "grad_norm": 0.4991553694720277, "learning_rate": 2.029187420437227e-06, "loss": 0.0235, "step": 232855 }, { "epoch": 0.9716183625272259, "grad_norm": 0.23344328119461524, "learning_rate": 2.0291656347395157e-06, "loss": 0.0194, "step": 232860 }, { "epoch": 0.9716392252422161, "grad_norm": 0.4737195263352306, "learning_rate": 2.029143849743474e-06, "loss": 0.0162, "step": 232865 }, { "epoch": 0.9716600879572064, "grad_norm": 0.9787032907591051, "learning_rate": 2.0291220654490643e-06, "loss": 0.0184, "step": 232870 }, { "epoch": 0.9716809506721967, "grad_norm": 1.043527196670882, "learning_rate": 2.029100281856249e-06, "loss": 0.0202, "step": 232875 }, { "epoch": 0.971701813387187, "grad_norm": 0.8920586615910693, "learning_rate": 2.0290784989649907e-06, "loss": 0.0224, "step": 232880 }, { "epoch": 0.9717226761021772, "grad_norm": 0.8624514220767132, "learning_rate": 2.029056716775251e-06, "loss": 0.0197, "step": 232885 }, { "epoch": 0.9717435388171675, "grad_norm": 0.7870801344442185, "learning_rate": 2.0290349352869927e-06, "loss": 0.0183, "step": 232890 }, { "epoch": 0.9717644015321578, "grad_norm": 0.21499410325184584, "learning_rate": 2.0290131545001785e-06, "loss": 0.0144, "step": 232895 }, { "epoch": 0.9717852642471481, "grad_norm": 1.1161746964045716, "learning_rate": 2.0289913744147702e-06, "loss": 0.017, "step": 232900 }, { "epoch": 0.9718061269621383, "grad_norm": 0.4329684910970865, "learning_rate": 2.0289695950307303e-06, "loss": 0.017, "step": 232905 }, { "epoch": 0.9718269896771287, "grad_norm": 0.643066363163142, "learning_rate": 2.0289478163480217e-06, "loss": 0.0256, "step": 232910 }, { "epoch": 0.9718478523921189, "grad_norm": 0.8704953171133316, "learning_rate": 2.0289260383666056e-06, "loss": 0.0196, "step": 232915 }, { "epoch": 0.9718687151071091, "grad_norm": 0.5280771129373618, "learning_rate": 2.028904261086445e-06, "loss": 0.026, "step": 232920 }, { "epoch": 0.9718895778220995, "grad_norm": 1.1264904620745109, "learning_rate": 2.028882484507503e-06, "loss": 0.0258, "step": 232925 }, { "epoch": 0.9719104405370897, "grad_norm": 0.8891445590830472, "learning_rate": 2.028860708629741e-06, "loss": 0.0178, "step": 232930 }, { "epoch": 0.97193130325208, "grad_norm": 0.44386494929328146, "learning_rate": 2.028838933453121e-06, "loss": 0.0189, "step": 232935 }, { "epoch": 0.9719521659670702, "grad_norm": 0.6229695230380563, "learning_rate": 2.0288171589776066e-06, "loss": 0.0196, "step": 232940 }, { "epoch": 0.9719730286820606, "grad_norm": 0.7146926338627427, "learning_rate": 2.0287953852031596e-06, "loss": 0.0169, "step": 232945 }, { "epoch": 0.9719938913970508, "grad_norm": 0.7842730905631039, "learning_rate": 2.0287736121297425e-06, "loss": 0.0172, "step": 232950 }, { "epoch": 0.9720147541120411, "grad_norm": 0.6157678013058504, "learning_rate": 2.028751839757317e-06, "loss": 0.0172, "step": 232955 }, { "epoch": 0.9720356168270314, "grad_norm": 0.6080842448360201, "learning_rate": 2.0287300680858463e-06, "loss": 0.0185, "step": 232960 }, { "epoch": 0.9720564795420217, "grad_norm": 0.7745106505035039, "learning_rate": 2.0287082971152927e-06, "loss": 0.0189, "step": 232965 }, { "epoch": 0.9720773422570119, "grad_norm": 0.8563128265164949, "learning_rate": 2.0286865268456186e-06, "loss": 0.019, "step": 232970 }, { "epoch": 0.9720982049720023, "grad_norm": 0.37056761438559177, "learning_rate": 2.0286647572767855e-06, "loss": 0.0205, "step": 232975 }, { "epoch": 0.9721190676869925, "grad_norm": 0.8739423952624125, "learning_rate": 2.0286429884087574e-06, "loss": 0.0149, "step": 232980 }, { "epoch": 0.9721399304019828, "grad_norm": 0.9062456583519793, "learning_rate": 2.0286212202414953e-06, "loss": 0.0293, "step": 232985 }, { "epoch": 0.9721607931169731, "grad_norm": 0.7154884817368031, "learning_rate": 2.028599452774962e-06, "loss": 0.0142, "step": 232990 }, { "epoch": 0.9721816558319634, "grad_norm": 0.686255512827519, "learning_rate": 2.02857768600912e-06, "loss": 0.0226, "step": 232995 }, { "epoch": 0.9722025185469536, "grad_norm": 0.6535835803846615, "learning_rate": 2.028555919943932e-06, "loss": 0.0146, "step": 233000 }, { "epoch": 0.9722233812619439, "grad_norm": 2.2195819484050494, "learning_rate": 2.02853415457936e-06, "loss": 0.0215, "step": 233005 }, { "epoch": 0.9722442439769342, "grad_norm": 0.7650645303335105, "learning_rate": 2.028512389915367e-06, "loss": 0.0206, "step": 233010 }, { "epoch": 0.9722651066919245, "grad_norm": 0.2910877801109268, "learning_rate": 2.0284906259519146e-06, "loss": 0.0182, "step": 233015 }, { "epoch": 0.9722859694069147, "grad_norm": 0.5167133423932749, "learning_rate": 2.028468862688966e-06, "loss": 0.0201, "step": 233020 }, { "epoch": 0.972306832121905, "grad_norm": 0.695113609840711, "learning_rate": 2.0284471001264825e-06, "loss": 0.0166, "step": 233025 }, { "epoch": 0.9723276948368953, "grad_norm": 0.47817633844707635, "learning_rate": 2.0284253382644277e-06, "loss": 0.0251, "step": 233030 }, { "epoch": 0.9723485575518855, "grad_norm": 0.365360404393392, "learning_rate": 2.0284035771027634e-06, "loss": 0.0169, "step": 233035 }, { "epoch": 0.9723694202668759, "grad_norm": 0.8440122174070626, "learning_rate": 2.028381816641452e-06, "loss": 0.0187, "step": 233040 }, { "epoch": 0.9723902829818661, "grad_norm": 0.7196029649922326, "learning_rate": 2.0283600568804563e-06, "loss": 0.0211, "step": 233045 }, { "epoch": 0.9724111456968564, "grad_norm": 4.589892409637615, "learning_rate": 2.028338297819739e-06, "loss": 0.0219, "step": 233050 }, { "epoch": 0.9724320084118467, "grad_norm": 0.5365732295689625, "learning_rate": 2.0283165394592618e-06, "loss": 0.015, "step": 233055 }, { "epoch": 0.972452871126837, "grad_norm": 0.8041606264018296, "learning_rate": 2.0282947817989877e-06, "loss": 0.0247, "step": 233060 }, { "epoch": 0.9724737338418272, "grad_norm": 0.756137749671487, "learning_rate": 2.0282730248388787e-06, "loss": 0.0151, "step": 233065 }, { "epoch": 0.9724945965568175, "grad_norm": 0.6875601707446827, "learning_rate": 2.0282512685788974e-06, "loss": 0.0194, "step": 233070 }, { "epoch": 0.9725154592718078, "grad_norm": 0.5107409689107623, "learning_rate": 2.0282295130190063e-06, "loss": 0.0171, "step": 233075 }, { "epoch": 0.9725363219867981, "grad_norm": 0.49818261228146643, "learning_rate": 2.028207758159168e-06, "loss": 0.0212, "step": 233080 }, { "epoch": 0.9725571847017883, "grad_norm": 0.649502815749132, "learning_rate": 2.0281860039993448e-06, "loss": 0.018, "step": 233085 }, { "epoch": 0.9725780474167787, "grad_norm": 0.4485378002066645, "learning_rate": 2.028164250539499e-06, "loss": 0.0203, "step": 233090 }, { "epoch": 0.9725989101317689, "grad_norm": 0.2535970509424183, "learning_rate": 2.0281424977795935e-06, "loss": 0.0183, "step": 233095 }, { "epoch": 0.9726197728467592, "grad_norm": 0.24043284823913888, "learning_rate": 2.02812074571959e-06, "loss": 0.0168, "step": 233100 }, { "epoch": 0.9726406355617495, "grad_norm": 0.20488476636036032, "learning_rate": 2.0280989943594525e-06, "loss": 0.0163, "step": 233105 }, { "epoch": 0.9726614982767398, "grad_norm": 0.4332401326505653, "learning_rate": 2.0280772436991413e-06, "loss": 0.0231, "step": 233110 }, { "epoch": 0.97268236099173, "grad_norm": 0.5752751458335554, "learning_rate": 2.0280554937386207e-06, "loss": 0.0134, "step": 233115 }, { "epoch": 0.9727032237067202, "grad_norm": 0.6284861984126178, "learning_rate": 2.0280337444778523e-06, "loss": 0.0161, "step": 233120 }, { "epoch": 0.9727240864217106, "grad_norm": 0.9367023454089032, "learning_rate": 2.0280119959167987e-06, "loss": 0.019, "step": 233125 }, { "epoch": 0.9727449491367008, "grad_norm": 0.15236885199822617, "learning_rate": 2.0279902480554227e-06, "loss": 0.0223, "step": 233130 }, { "epoch": 0.9727658118516911, "grad_norm": 0.5826643745993787, "learning_rate": 2.0279685008936862e-06, "loss": 0.0197, "step": 233135 }, { "epoch": 0.9727866745666814, "grad_norm": 1.3185630022955745, "learning_rate": 2.0279467544315524e-06, "loss": 0.0271, "step": 233140 }, { "epoch": 0.9728075372816717, "grad_norm": 0.294263871637272, "learning_rate": 2.027925008668983e-06, "loss": 0.0238, "step": 233145 }, { "epoch": 0.9728283999966619, "grad_norm": 1.2974633947867842, "learning_rate": 2.0279032636059413e-06, "loss": 0.0287, "step": 233150 }, { "epoch": 0.9728492627116523, "grad_norm": 0.6661638392748519, "learning_rate": 2.027881519242389e-06, "loss": 0.014, "step": 233155 }, { "epoch": 0.9728701254266425, "grad_norm": 0.5989172592947742, "learning_rate": 2.0278597755782893e-06, "loss": 0.0173, "step": 233160 }, { "epoch": 0.9728909881416328, "grad_norm": 0.30029227493816424, "learning_rate": 2.0278380326136044e-06, "loss": 0.0141, "step": 233165 }, { "epoch": 0.9729118508566231, "grad_norm": 0.7282232908255873, "learning_rate": 2.027816290348297e-06, "loss": 0.0167, "step": 233170 }, { "epoch": 0.9729327135716134, "grad_norm": 0.8276338719765268, "learning_rate": 2.0277945487823294e-06, "loss": 0.0173, "step": 233175 }, { "epoch": 0.9729535762866036, "grad_norm": 0.3654158698913957, "learning_rate": 2.0277728079156634e-06, "loss": 0.0186, "step": 233180 }, { "epoch": 0.9729744390015939, "grad_norm": 0.33055841850246365, "learning_rate": 2.027751067748263e-06, "loss": 0.0258, "step": 233185 }, { "epoch": 0.9729953017165842, "grad_norm": 0.5708267159264294, "learning_rate": 2.02772932828009e-06, "loss": 0.0169, "step": 233190 }, { "epoch": 0.9730161644315745, "grad_norm": 0.9021989084348873, "learning_rate": 2.027707589511106e-06, "loss": 0.0193, "step": 233195 }, { "epoch": 0.9730370271465647, "grad_norm": 0.546986736266518, "learning_rate": 2.0276858514412755e-06, "loss": 0.021, "step": 233200 }, { "epoch": 0.9730578898615551, "grad_norm": 0.6246630283150781, "learning_rate": 2.0276641140705593e-06, "loss": 0.0247, "step": 233205 }, { "epoch": 0.9730787525765453, "grad_norm": 0.5548283010693754, "learning_rate": 2.0276423773989205e-06, "loss": 0.027, "step": 233210 }, { "epoch": 0.9730996152915355, "grad_norm": 0.6591186720579505, "learning_rate": 2.0276206414263224e-06, "loss": 0.0183, "step": 233215 }, { "epoch": 0.9731204780065259, "grad_norm": 0.34828664729765807, "learning_rate": 2.0275989061527263e-06, "loss": 0.0171, "step": 233220 }, { "epoch": 0.9731413407215161, "grad_norm": 0.8791967487585202, "learning_rate": 2.027577171578095e-06, "loss": 0.0228, "step": 233225 }, { "epoch": 0.9731622034365064, "grad_norm": 0.19708327053497346, "learning_rate": 2.0275554377023914e-06, "loss": 0.0206, "step": 233230 }, { "epoch": 0.9731830661514967, "grad_norm": 0.5141351772891676, "learning_rate": 2.0275337045255784e-06, "loss": 0.0138, "step": 233235 }, { "epoch": 0.973203928866487, "grad_norm": 0.9543347002443732, "learning_rate": 2.0275119720476175e-06, "loss": 0.0253, "step": 233240 }, { "epoch": 0.9732247915814772, "grad_norm": 0.7562088671141043, "learning_rate": 2.027490240268472e-06, "loss": 0.0143, "step": 233245 }, { "epoch": 0.9732456542964675, "grad_norm": 0.595679057521865, "learning_rate": 2.0274685091881045e-06, "loss": 0.0153, "step": 233250 }, { "epoch": 0.9732665170114578, "grad_norm": 0.6621333545475004, "learning_rate": 2.0274467788064774e-06, "loss": 0.0197, "step": 233255 }, { "epoch": 0.9732873797264481, "grad_norm": 0.6507705895351301, "learning_rate": 2.027425049123553e-06, "loss": 0.0193, "step": 233260 }, { "epoch": 0.9733082424414383, "grad_norm": 0.6983253834446255, "learning_rate": 2.027403320139294e-06, "loss": 0.0209, "step": 233265 }, { "epoch": 0.9733291051564287, "grad_norm": 0.8593326819820605, "learning_rate": 2.0273815918536628e-06, "loss": 0.023, "step": 233270 }, { "epoch": 0.9733499678714189, "grad_norm": 0.5305049217861743, "learning_rate": 2.0273598642666226e-06, "loss": 0.0206, "step": 233275 }, { "epoch": 0.9733708305864092, "grad_norm": 0.7121050071153447, "learning_rate": 2.0273381373781352e-06, "loss": 0.0178, "step": 233280 }, { "epoch": 0.9733916933013995, "grad_norm": 0.3007469200872123, "learning_rate": 2.0273164111881637e-06, "loss": 0.0209, "step": 233285 }, { "epoch": 0.9734125560163898, "grad_norm": 1.0775132315761464, "learning_rate": 2.0272946856966708e-06, "loss": 0.0282, "step": 233290 }, { "epoch": 0.97343341873138, "grad_norm": 0.31352376404155907, "learning_rate": 2.0272729609036184e-06, "loss": 0.0159, "step": 233295 }, { "epoch": 0.9734542814463703, "grad_norm": 0.6944867350939212, "learning_rate": 2.027251236808969e-06, "loss": 0.0212, "step": 233300 }, { "epoch": 0.9734751441613606, "grad_norm": 0.4588327701644844, "learning_rate": 2.027229513412686e-06, "loss": 0.0135, "step": 233305 }, { "epoch": 0.9734960068763509, "grad_norm": 0.42045587652684013, "learning_rate": 2.027207790714732e-06, "loss": 0.0284, "step": 233310 }, { "epoch": 0.9735168695913411, "grad_norm": 0.8624452223270658, "learning_rate": 2.0271860687150683e-06, "loss": 0.0178, "step": 233315 }, { "epoch": 0.9735377323063314, "grad_norm": 0.47454817343353733, "learning_rate": 2.027164347413659e-06, "loss": 0.0229, "step": 233320 }, { "epoch": 0.9735585950213217, "grad_norm": 0.4585420316090485, "learning_rate": 2.0271426268104656e-06, "loss": 0.0198, "step": 233325 }, { "epoch": 0.9735794577363119, "grad_norm": 1.180468258502033, "learning_rate": 2.027120906905452e-06, "loss": 0.0208, "step": 233330 }, { "epoch": 0.9736003204513023, "grad_norm": 1.259275702852609, "learning_rate": 2.0270991876985787e-06, "loss": 0.0247, "step": 233335 }, { "epoch": 0.9736211831662925, "grad_norm": 0.3185537515229909, "learning_rate": 2.0270774691898103e-06, "loss": 0.0177, "step": 233340 }, { "epoch": 0.9736420458812828, "grad_norm": 0.5740154270480363, "learning_rate": 2.027055751379109e-06, "loss": 0.0167, "step": 233345 }, { "epoch": 0.9736629085962731, "grad_norm": 0.601147918304084, "learning_rate": 2.0270340342664366e-06, "loss": 0.015, "step": 233350 }, { "epoch": 0.9736837713112634, "grad_norm": 0.3289147842875385, "learning_rate": 2.027012317851756e-06, "loss": 0.0215, "step": 233355 }, { "epoch": 0.9737046340262536, "grad_norm": 0.5685038499409433, "learning_rate": 2.0269906021350304e-06, "loss": 0.0179, "step": 233360 }, { "epoch": 0.9737254967412439, "grad_norm": 0.5434808402339886, "learning_rate": 2.0269688871162215e-06, "loss": 0.0156, "step": 233365 }, { "epoch": 0.9737463594562342, "grad_norm": 0.5660003895243396, "learning_rate": 2.0269471727952924e-06, "loss": 0.0164, "step": 233370 }, { "epoch": 0.9737672221712245, "grad_norm": 0.7311874472389734, "learning_rate": 2.026925459172206e-06, "loss": 0.0179, "step": 233375 }, { "epoch": 0.9737880848862147, "grad_norm": 0.863423328560161, "learning_rate": 2.0269037462469248e-06, "loss": 0.0241, "step": 233380 }, { "epoch": 0.9738089476012051, "grad_norm": 0.4842943042831775, "learning_rate": 2.0268820340194113e-06, "loss": 0.0203, "step": 233385 }, { "epoch": 0.9738298103161953, "grad_norm": 0.7315228598098542, "learning_rate": 2.0268603224896277e-06, "loss": 0.0182, "step": 233390 }, { "epoch": 0.9738506730311856, "grad_norm": 0.5834164398774382, "learning_rate": 2.026838611657538e-06, "loss": 0.024, "step": 233395 }, { "epoch": 0.9738715357461759, "grad_norm": 0.5048462067533802, "learning_rate": 2.0268169015231026e-06, "loss": 0.0162, "step": 233400 }, { "epoch": 0.9738923984611662, "grad_norm": 0.4344603568028356, "learning_rate": 2.026795192086286e-06, "loss": 0.0167, "step": 233405 }, { "epoch": 0.9739132611761564, "grad_norm": 0.9122595299719507, "learning_rate": 2.0267734833470504e-06, "loss": 0.0151, "step": 233410 }, { "epoch": 0.9739341238911468, "grad_norm": 0.700444685399948, "learning_rate": 2.026751775305358e-06, "loss": 0.0241, "step": 233415 }, { "epoch": 0.973954986606137, "grad_norm": 0.6417961576799668, "learning_rate": 2.026730067961172e-06, "loss": 0.018, "step": 233420 }, { "epoch": 0.9739758493211272, "grad_norm": 0.6756997034821343, "learning_rate": 2.0267083613144544e-06, "loss": 0.0219, "step": 233425 }, { "epoch": 0.9739967120361175, "grad_norm": 0.7879969989517785, "learning_rate": 2.0266866553651686e-06, "loss": 0.0243, "step": 233430 }, { "epoch": 0.9740175747511078, "grad_norm": 0.7181758880177649, "learning_rate": 2.0266649501132764e-06, "loss": 0.0177, "step": 233435 }, { "epoch": 0.9740384374660981, "grad_norm": 0.6755998315853643, "learning_rate": 2.0266432455587413e-06, "loss": 0.017, "step": 233440 }, { "epoch": 0.9740593001810883, "grad_norm": 0.8096737290243008, "learning_rate": 2.026621541701526e-06, "loss": 0.0203, "step": 233445 }, { "epoch": 0.9740801628960787, "grad_norm": 0.7700309778053361, "learning_rate": 2.0265998385415923e-06, "loss": 0.0259, "step": 233450 }, { "epoch": 0.9741010256110689, "grad_norm": 0.4881665650606614, "learning_rate": 2.0265781360789033e-06, "loss": 0.0189, "step": 233455 }, { "epoch": 0.9741218883260592, "grad_norm": 0.6927221523940799, "learning_rate": 2.0265564343134216e-06, "loss": 0.0213, "step": 233460 }, { "epoch": 0.9741427510410495, "grad_norm": 1.0846866211813841, "learning_rate": 2.0265347332451103e-06, "loss": 0.0248, "step": 233465 }, { "epoch": 0.9741636137560398, "grad_norm": 1.1826206827478143, "learning_rate": 2.026513032873932e-06, "loss": 0.0187, "step": 233470 }, { "epoch": 0.97418447647103, "grad_norm": 0.7197870090685677, "learning_rate": 2.0264913331998483e-06, "loss": 0.0243, "step": 233475 }, { "epoch": 0.9742053391860203, "grad_norm": 0.20151538423818804, "learning_rate": 2.0264696342228228e-06, "loss": 0.0153, "step": 233480 }, { "epoch": 0.9742262019010106, "grad_norm": 0.49369657411895734, "learning_rate": 2.026447935942819e-06, "loss": 0.0196, "step": 233485 }, { "epoch": 0.9742470646160009, "grad_norm": 0.3184004567302967, "learning_rate": 2.026426238359798e-06, "loss": 0.0169, "step": 233490 }, { "epoch": 0.9742679273309911, "grad_norm": 0.80955330575806, "learning_rate": 2.026404541473723e-06, "loss": 0.0236, "step": 233495 }, { "epoch": 0.9742887900459815, "grad_norm": 0.49246716102199894, "learning_rate": 2.026382845284557e-06, "loss": 0.0198, "step": 233500 }, { "epoch": 0.9743096527609717, "grad_norm": 0.5967256136333208, "learning_rate": 2.0263611497922623e-06, "loss": 0.0144, "step": 233505 }, { "epoch": 0.974330515475962, "grad_norm": 3.5602392899427793, "learning_rate": 2.026339454996802e-06, "loss": 0.0143, "step": 233510 }, { "epoch": 0.9743513781909523, "grad_norm": 0.921103009973251, "learning_rate": 2.026317760898139e-06, "loss": 0.0264, "step": 233515 }, { "epoch": 0.9743722409059425, "grad_norm": 0.2980002051935564, "learning_rate": 2.0262960674962353e-06, "loss": 0.0187, "step": 233520 }, { "epoch": 0.9743931036209328, "grad_norm": 0.6994428168662387, "learning_rate": 2.026274374791054e-06, "loss": 0.022, "step": 233525 }, { "epoch": 0.9744139663359231, "grad_norm": 0.6508611119424785, "learning_rate": 2.0262526827825577e-06, "loss": 0.0234, "step": 233530 }, { "epoch": 0.9744348290509134, "grad_norm": 0.2588406801064949, "learning_rate": 2.026230991470709e-06, "loss": 0.0198, "step": 233535 }, { "epoch": 0.9744556917659036, "grad_norm": 0.3363898546190803, "learning_rate": 2.0262093008554707e-06, "loss": 0.018, "step": 233540 }, { "epoch": 0.9744765544808939, "grad_norm": 0.6093059742011379, "learning_rate": 2.026187610936806e-06, "loss": 0.0207, "step": 233545 }, { "epoch": 0.9744974171958842, "grad_norm": 0.6890961631218395, "learning_rate": 2.0261659217146774e-06, "loss": 0.0231, "step": 233550 }, { "epoch": 0.9745182799108745, "grad_norm": 0.5415404818580556, "learning_rate": 2.0261442331890465e-06, "loss": 0.015, "step": 233555 }, { "epoch": 0.9745391426258647, "grad_norm": 0.8706725818094493, "learning_rate": 2.0261225453598775e-06, "loss": 0.0162, "step": 233560 }, { "epoch": 0.9745600053408551, "grad_norm": 1.2070625694204162, "learning_rate": 2.0261008582271326e-06, "loss": 0.0219, "step": 233565 }, { "epoch": 0.9745808680558453, "grad_norm": 0.4844161956798815, "learning_rate": 2.026079171790774e-06, "loss": 0.0175, "step": 233570 }, { "epoch": 0.9746017307708356, "grad_norm": 0.50343978086229, "learning_rate": 2.0260574860507658e-06, "loss": 0.0233, "step": 233575 }, { "epoch": 0.9746225934858259, "grad_norm": 1.2502452108017539, "learning_rate": 2.0260358010070692e-06, "loss": 0.0266, "step": 233580 }, { "epoch": 0.9746434562008162, "grad_norm": 0.38895148251559786, "learning_rate": 2.0260141166596474e-06, "loss": 0.0261, "step": 233585 }, { "epoch": 0.9746643189158064, "grad_norm": 0.774509647582666, "learning_rate": 2.0259924330084637e-06, "loss": 0.02, "step": 233590 }, { "epoch": 0.9746851816307968, "grad_norm": 0.6350081209769886, "learning_rate": 2.0259707500534805e-06, "loss": 0.0218, "step": 233595 }, { "epoch": 0.974706044345787, "grad_norm": 0.6564603010467877, "learning_rate": 2.0259490677946606e-06, "loss": 0.0212, "step": 233600 }, { "epoch": 0.9747269070607772, "grad_norm": 0.8408931323800815, "learning_rate": 2.0259273862319663e-06, "loss": 0.0253, "step": 233605 }, { "epoch": 0.9747477697757675, "grad_norm": 0.5025458057796511, "learning_rate": 2.025905705365361e-06, "loss": 0.0212, "step": 233610 }, { "epoch": 0.9747686324907578, "grad_norm": 0.6049599540708174, "learning_rate": 2.0258840251948073e-06, "loss": 0.0201, "step": 233615 }, { "epoch": 0.9747894952057481, "grad_norm": 0.39225578204922357, "learning_rate": 2.0258623457202673e-06, "loss": 0.0186, "step": 233620 }, { "epoch": 0.9748103579207383, "grad_norm": 1.8491305743196904, "learning_rate": 2.025840666941705e-06, "loss": 0.0206, "step": 233625 }, { "epoch": 0.9748312206357287, "grad_norm": 0.5108210025438461, "learning_rate": 2.025818988859082e-06, "loss": 0.019, "step": 233630 }, { "epoch": 0.9748520833507189, "grad_norm": 0.493704316278818, "learning_rate": 2.025797311472362e-06, "loss": 0.0225, "step": 233635 }, { "epoch": 0.9748729460657092, "grad_norm": 0.8036055580335142, "learning_rate": 2.025775634781507e-06, "loss": 0.0245, "step": 233640 }, { "epoch": 0.9748938087806995, "grad_norm": 0.7091920921414026, "learning_rate": 2.0257539587864796e-06, "loss": 0.0223, "step": 233645 }, { "epoch": 0.9749146714956898, "grad_norm": 0.6341756431610516, "learning_rate": 2.0257322834872437e-06, "loss": 0.024, "step": 233650 }, { "epoch": 0.97493553421068, "grad_norm": 0.7940195469942614, "learning_rate": 2.025710608883761e-06, "loss": 0.0135, "step": 233655 }, { "epoch": 0.9749563969256703, "grad_norm": 0.8729940972297385, "learning_rate": 2.0256889349759944e-06, "loss": 0.0153, "step": 233660 }, { "epoch": 0.9749772596406606, "grad_norm": 0.9115380772524871, "learning_rate": 2.0256672617639078e-06, "loss": 0.0207, "step": 233665 }, { "epoch": 0.9749981223556509, "grad_norm": 0.5258401832369315, "learning_rate": 2.0256455892474626e-06, "loss": 0.0208, "step": 233670 }, { "epoch": 0.9750189850706411, "grad_norm": 0.42732218272405054, "learning_rate": 2.025623917426622e-06, "loss": 0.0224, "step": 233675 }, { "epoch": 0.9750398477856315, "grad_norm": 0.6061505232848212, "learning_rate": 2.0256022463013498e-06, "loss": 0.0191, "step": 233680 }, { "epoch": 0.9750607105006217, "grad_norm": 0.9233916197771512, "learning_rate": 2.025580575871607e-06, "loss": 0.0255, "step": 233685 }, { "epoch": 0.975081573215612, "grad_norm": 1.2709313317343016, "learning_rate": 2.0255589061373578e-06, "loss": 0.0284, "step": 233690 }, { "epoch": 0.9751024359306023, "grad_norm": 0.5316866912941314, "learning_rate": 2.0255372370985644e-06, "loss": 0.0173, "step": 233695 }, { "epoch": 0.9751232986455926, "grad_norm": 0.8468179502038283, "learning_rate": 2.0255155687551897e-06, "loss": 0.0188, "step": 233700 }, { "epoch": 0.9751441613605828, "grad_norm": 0.7383996508637982, "learning_rate": 2.025493901107197e-06, "loss": 0.0182, "step": 233705 }, { "epoch": 0.9751650240755732, "grad_norm": 0.9562435468300036, "learning_rate": 2.025472234154548e-06, "loss": 0.0239, "step": 233710 }, { "epoch": 0.9751858867905634, "grad_norm": 0.5252009873738965, "learning_rate": 2.025450567897206e-06, "loss": 0.02, "step": 233715 }, { "epoch": 0.9752067495055536, "grad_norm": 0.5555194694903033, "learning_rate": 2.0254289023351342e-06, "loss": 0.0319, "step": 233720 }, { "epoch": 0.9752276122205439, "grad_norm": 0.7513085722772046, "learning_rate": 2.0254072374682953e-06, "loss": 0.0271, "step": 233725 }, { "epoch": 0.9752484749355342, "grad_norm": 0.6118966856163545, "learning_rate": 2.025385573296652e-06, "loss": 0.0168, "step": 233730 }, { "epoch": 0.9752693376505245, "grad_norm": 0.7832692374539807, "learning_rate": 2.025363909820167e-06, "loss": 0.0215, "step": 233735 }, { "epoch": 0.9752902003655147, "grad_norm": 0.7136955272343622, "learning_rate": 2.0253422470388034e-06, "loss": 0.0178, "step": 233740 }, { "epoch": 0.9753110630805051, "grad_norm": 0.6462955894727135, "learning_rate": 2.0253205849525237e-06, "loss": 0.0197, "step": 233745 }, { "epoch": 0.9753319257954953, "grad_norm": 1.31662261808602, "learning_rate": 2.025298923561291e-06, "loss": 0.0225, "step": 233750 }, { "epoch": 0.9753527885104856, "grad_norm": 0.9945594326764847, "learning_rate": 2.0252772628650678e-06, "loss": 0.0207, "step": 233755 }, { "epoch": 0.9753736512254759, "grad_norm": 0.5847750531498784, "learning_rate": 2.0252556028638173e-06, "loss": 0.0168, "step": 233760 }, { "epoch": 0.9753945139404662, "grad_norm": 0.9226239900395208, "learning_rate": 2.0252339435575024e-06, "loss": 0.0173, "step": 233765 }, { "epoch": 0.9754153766554564, "grad_norm": 0.4858309569422502, "learning_rate": 2.0252122849460856e-06, "loss": 0.0162, "step": 233770 }, { "epoch": 0.9754362393704468, "grad_norm": 0.2615095798550331, "learning_rate": 2.0251906270295298e-06, "loss": 0.0143, "step": 233775 }, { "epoch": 0.975457102085437, "grad_norm": 0.3123689997948796, "learning_rate": 2.0251689698077976e-06, "loss": 0.0165, "step": 233780 }, { "epoch": 0.9754779648004273, "grad_norm": 0.6456642349776507, "learning_rate": 2.0251473132808527e-06, "loss": 0.0184, "step": 233785 }, { "epoch": 0.9754988275154175, "grad_norm": 0.8024726507135632, "learning_rate": 2.0251256574486574e-06, "loss": 0.0287, "step": 233790 }, { "epoch": 0.9755196902304079, "grad_norm": 0.4976739697050297, "learning_rate": 2.0251040023111743e-06, "loss": 0.0142, "step": 233795 }, { "epoch": 0.9755405529453981, "grad_norm": 0.453004352968922, "learning_rate": 2.0250823478683667e-06, "loss": 0.0178, "step": 233800 }, { "epoch": 0.9755614156603883, "grad_norm": 0.4367986832774066, "learning_rate": 2.0250606941201976e-06, "loss": 0.0195, "step": 233805 }, { "epoch": 0.9755822783753787, "grad_norm": 0.2858324403941126, "learning_rate": 2.0250390410666295e-06, "loss": 0.0164, "step": 233810 }, { "epoch": 0.9756031410903689, "grad_norm": 0.7912214403345723, "learning_rate": 2.0250173887076245e-06, "loss": 0.022, "step": 233815 }, { "epoch": 0.9756240038053592, "grad_norm": 0.8997442480686076, "learning_rate": 2.024995737043147e-06, "loss": 0.0275, "step": 233820 }, { "epoch": 0.9756448665203495, "grad_norm": 5.079804245946119, "learning_rate": 2.0249740860731594e-06, "loss": 0.0214, "step": 233825 }, { "epoch": 0.9756657292353398, "grad_norm": 0.3222062604037185, "learning_rate": 2.0249524357976234e-06, "loss": 0.0144, "step": 233830 }, { "epoch": 0.97568659195033, "grad_norm": 0.6066740385257083, "learning_rate": 2.0249307862165037e-06, "loss": 0.0281, "step": 233835 }, { "epoch": 0.9757074546653203, "grad_norm": 0.5444518148265646, "learning_rate": 2.024909137329762e-06, "loss": 0.0209, "step": 233840 }, { "epoch": 0.9757283173803106, "grad_norm": 0.514584653353725, "learning_rate": 2.0248874891373613e-06, "loss": 0.0267, "step": 233845 }, { "epoch": 0.9757491800953009, "grad_norm": 0.7146941150645142, "learning_rate": 2.024865841639265e-06, "loss": 0.0188, "step": 233850 }, { "epoch": 0.9757700428102911, "grad_norm": 0.44722487302480035, "learning_rate": 2.0248441948354355e-06, "loss": 0.0164, "step": 233855 }, { "epoch": 0.9757909055252815, "grad_norm": 0.7205623700130249, "learning_rate": 2.024822548725836e-06, "loss": 0.0181, "step": 233860 }, { "epoch": 0.9758117682402717, "grad_norm": 0.6034305622399869, "learning_rate": 2.0248009033104288e-06, "loss": 0.0198, "step": 233865 }, { "epoch": 0.975832630955262, "grad_norm": 0.9361558831403098, "learning_rate": 2.0247792585891775e-06, "loss": 0.0275, "step": 233870 }, { "epoch": 0.9758534936702523, "grad_norm": 0.8023342148655345, "learning_rate": 2.0247576145620447e-06, "loss": 0.0256, "step": 233875 }, { "epoch": 0.9758743563852426, "grad_norm": 0.4702072407355758, "learning_rate": 2.0247359712289932e-06, "loss": 0.014, "step": 233880 }, { "epoch": 0.9758952191002328, "grad_norm": 0.666553908755753, "learning_rate": 2.024714328589986e-06, "loss": 0.0228, "step": 233885 }, { "epoch": 0.9759160818152232, "grad_norm": 0.9025584800007629, "learning_rate": 2.0246926866449866e-06, "loss": 0.0175, "step": 233890 }, { "epoch": 0.9759369445302134, "grad_norm": 1.160324590148904, "learning_rate": 2.0246710453939573e-06, "loss": 0.0251, "step": 233895 }, { "epoch": 0.9759578072452036, "grad_norm": 0.7253251263736095, "learning_rate": 2.0246494048368606e-06, "loss": 0.0215, "step": 233900 }, { "epoch": 0.9759786699601939, "grad_norm": 0.7589657188787412, "learning_rate": 2.0246277649736597e-06, "loss": 0.0163, "step": 233905 }, { "epoch": 0.9759995326751842, "grad_norm": 0.4764201889163374, "learning_rate": 2.0246061258043185e-06, "loss": 0.0182, "step": 233910 }, { "epoch": 0.9760203953901745, "grad_norm": 0.4943223675775711, "learning_rate": 2.0245844873287985e-06, "loss": 0.0213, "step": 233915 }, { "epoch": 0.9760412581051647, "grad_norm": 0.7884111934858584, "learning_rate": 2.0245628495470633e-06, "loss": 0.0195, "step": 233920 }, { "epoch": 0.9760621208201551, "grad_norm": 0.5075158374620332, "learning_rate": 2.024541212459076e-06, "loss": 0.0187, "step": 233925 }, { "epoch": 0.9760829835351453, "grad_norm": 0.3743544488169611, "learning_rate": 2.024519576064799e-06, "loss": 0.0175, "step": 233930 }, { "epoch": 0.9761038462501356, "grad_norm": 0.39718219921599607, "learning_rate": 2.0244979403641958e-06, "loss": 0.0199, "step": 233935 }, { "epoch": 0.9761247089651259, "grad_norm": 0.28742728882982016, "learning_rate": 2.024476305357229e-06, "loss": 0.0111, "step": 233940 }, { "epoch": 0.9761455716801162, "grad_norm": 1.7678665203854504, "learning_rate": 2.0244546710438613e-06, "loss": 0.019, "step": 233945 }, { "epoch": 0.9761664343951064, "grad_norm": 0.6830134658120908, "learning_rate": 2.0244330374240564e-06, "loss": 0.0185, "step": 233950 }, { "epoch": 0.9761872971100968, "grad_norm": 0.5759041011528084, "learning_rate": 2.0244114044977766e-06, "loss": 0.0191, "step": 233955 }, { "epoch": 0.976208159825087, "grad_norm": 0.5786927125567153, "learning_rate": 2.024389772264985e-06, "loss": 0.0195, "step": 233960 }, { "epoch": 0.9762290225400773, "grad_norm": 0.30919643245547596, "learning_rate": 2.0243681407256448e-06, "loss": 0.0247, "step": 233965 }, { "epoch": 0.9762498852550675, "grad_norm": 0.6064888509439944, "learning_rate": 2.024346509879718e-06, "loss": 0.0255, "step": 233970 }, { "epoch": 0.9762707479700579, "grad_norm": 0.6787543186268661, "learning_rate": 2.0243248797271693e-06, "loss": 0.0178, "step": 233975 }, { "epoch": 0.9762916106850481, "grad_norm": 0.9303150131189138, "learning_rate": 2.02430325026796e-06, "loss": 0.0236, "step": 233980 }, { "epoch": 0.9763124734000383, "grad_norm": 0.4445281100431648, "learning_rate": 2.0242816215020538e-06, "loss": 0.0199, "step": 233985 }, { "epoch": 0.9763333361150287, "grad_norm": 0.5160936468837086, "learning_rate": 2.0242599934294142e-06, "loss": 0.0179, "step": 233990 }, { "epoch": 0.976354198830019, "grad_norm": 1.099198288527199, "learning_rate": 2.0242383660500026e-06, "loss": 0.0164, "step": 233995 }, { "epoch": 0.9763750615450092, "grad_norm": 0.7938463939421098, "learning_rate": 2.0242167393637836e-06, "loss": 0.0226, "step": 234000 }, { "epoch": 0.9763959242599995, "grad_norm": 0.48942149436565946, "learning_rate": 2.024195113370719e-06, "loss": 0.0211, "step": 234005 }, { "epoch": 0.9764167869749898, "grad_norm": 0.5578764231019934, "learning_rate": 2.0241734880707726e-06, "loss": 0.0144, "step": 234010 }, { "epoch": 0.97643764968998, "grad_norm": 0.5689516570334819, "learning_rate": 2.024151863463907e-06, "loss": 0.0194, "step": 234015 }, { "epoch": 0.9764585124049703, "grad_norm": 0.7621775899467024, "learning_rate": 2.024130239550085e-06, "loss": 0.0244, "step": 234020 }, { "epoch": 0.9764793751199606, "grad_norm": 0.7590299383447722, "learning_rate": 2.02410861632927e-06, "loss": 0.0214, "step": 234025 }, { "epoch": 0.9765002378349509, "grad_norm": 0.5285998936431564, "learning_rate": 2.0240869938014244e-06, "loss": 0.0226, "step": 234030 }, { "epoch": 0.9765211005499411, "grad_norm": 0.36304183379593613, "learning_rate": 2.0240653719665117e-06, "loss": 0.0101, "step": 234035 }, { "epoch": 0.9765419632649315, "grad_norm": 0.48326676699064686, "learning_rate": 2.0240437508244948e-06, "loss": 0.0157, "step": 234040 }, { "epoch": 0.9765628259799217, "grad_norm": 0.6653132484064386, "learning_rate": 2.0240221303753364e-06, "loss": 0.0235, "step": 234045 }, { "epoch": 0.976583688694912, "grad_norm": 0.8688239886168349, "learning_rate": 2.024000510619e-06, "loss": 0.0258, "step": 234050 }, { "epoch": 0.9766045514099023, "grad_norm": 0.4599018162279621, "learning_rate": 2.0239788915554484e-06, "loss": 0.0209, "step": 234055 }, { "epoch": 0.9766254141248926, "grad_norm": 0.6394658691014362, "learning_rate": 2.0239572731846446e-06, "loss": 0.0178, "step": 234060 }, { "epoch": 0.9766462768398828, "grad_norm": 0.35210786063542243, "learning_rate": 2.0239356555065516e-06, "loss": 0.0199, "step": 234065 }, { "epoch": 0.9766671395548732, "grad_norm": 0.7467349802255165, "learning_rate": 2.023914038521132e-06, "loss": 0.0182, "step": 234070 }, { "epoch": 0.9766880022698634, "grad_norm": 0.49511673398330974, "learning_rate": 2.023892422228349e-06, "loss": 0.0239, "step": 234075 }, { "epoch": 0.9767088649848537, "grad_norm": 0.9995336438712603, "learning_rate": 2.0238708066281664e-06, "loss": 0.0208, "step": 234080 }, { "epoch": 0.9767297276998439, "grad_norm": 0.32647804592889096, "learning_rate": 2.023849191720546e-06, "loss": 0.0191, "step": 234085 }, { "epoch": 0.9767505904148343, "grad_norm": 2.036738362030119, "learning_rate": 2.023827577505452e-06, "loss": 0.0171, "step": 234090 }, { "epoch": 0.9767714531298245, "grad_norm": 0.423933988643386, "learning_rate": 2.0238059639828464e-06, "loss": 0.0187, "step": 234095 }, { "epoch": 0.9767923158448147, "grad_norm": 1.320713325903556, "learning_rate": 2.023784351152693e-06, "loss": 0.0196, "step": 234100 }, { "epoch": 0.9768131785598051, "grad_norm": 0.8399201755162186, "learning_rate": 2.023762739014954e-06, "loss": 0.0168, "step": 234105 }, { "epoch": 0.9768340412747953, "grad_norm": 0.7436913604379507, "learning_rate": 2.0237411275695932e-06, "loss": 0.0236, "step": 234110 }, { "epoch": 0.9768549039897856, "grad_norm": 0.4196314500310887, "learning_rate": 2.023719516816573e-06, "loss": 0.0175, "step": 234115 }, { "epoch": 0.9768757667047759, "grad_norm": 0.5128240328563916, "learning_rate": 2.023697906755857e-06, "loss": 0.0199, "step": 234120 }, { "epoch": 0.9768966294197662, "grad_norm": 0.5855547017986797, "learning_rate": 2.0236762973874084e-06, "loss": 0.0135, "step": 234125 }, { "epoch": 0.9769174921347564, "grad_norm": 1.0994554671528856, "learning_rate": 2.0236546887111894e-06, "loss": 0.0222, "step": 234130 }, { "epoch": 0.9769383548497468, "grad_norm": 0.6996742671189502, "learning_rate": 2.0236330807271635e-06, "loss": 0.018, "step": 234135 }, { "epoch": 0.976959217564737, "grad_norm": 0.6603621959521341, "learning_rate": 2.023611473435294e-06, "loss": 0.0244, "step": 234140 }, { "epoch": 0.9769800802797273, "grad_norm": 0.3479110147533509, "learning_rate": 2.0235898668355437e-06, "loss": 0.0215, "step": 234145 }, { "epoch": 0.9770009429947175, "grad_norm": 0.5670681099770298, "learning_rate": 2.023568260927875e-06, "loss": 0.0214, "step": 234150 }, { "epoch": 0.9770218057097079, "grad_norm": 0.573369089963871, "learning_rate": 2.0235466557122524e-06, "loss": 0.0189, "step": 234155 }, { "epoch": 0.9770426684246981, "grad_norm": 1.0725193819695187, "learning_rate": 2.0235250511886377e-06, "loss": 0.0215, "step": 234160 }, { "epoch": 0.9770635311396884, "grad_norm": 0.4939309695842647, "learning_rate": 2.0235034473569944e-06, "loss": 0.0214, "step": 234165 }, { "epoch": 0.9770843938546787, "grad_norm": 0.42937989678581595, "learning_rate": 2.0234818442172855e-06, "loss": 0.0154, "step": 234170 }, { "epoch": 0.977105256569669, "grad_norm": 0.36905887522881486, "learning_rate": 2.0234602417694742e-06, "loss": 0.0186, "step": 234175 }, { "epoch": 0.9771261192846592, "grad_norm": 0.5223750416811849, "learning_rate": 2.023438640013524e-06, "loss": 0.0225, "step": 234180 }, { "epoch": 0.9771469819996496, "grad_norm": 0.6247594960374276, "learning_rate": 2.0234170389493966e-06, "loss": 0.0173, "step": 234185 }, { "epoch": 0.9771678447146398, "grad_norm": 0.7755616634394101, "learning_rate": 2.0233954385770566e-06, "loss": 0.0208, "step": 234190 }, { "epoch": 0.97718870742963, "grad_norm": 0.43946010177844985, "learning_rate": 2.023373838896466e-06, "loss": 0.0136, "step": 234195 }, { "epoch": 0.9772095701446203, "grad_norm": 0.3126672229543451, "learning_rate": 2.0233522399075886e-06, "loss": 0.0134, "step": 234200 }, { "epoch": 0.9772304328596106, "grad_norm": 0.40286376581234573, "learning_rate": 2.023330641610387e-06, "loss": 0.0197, "step": 234205 }, { "epoch": 0.9772512955746009, "grad_norm": 0.6261851147746038, "learning_rate": 2.0233090440048247e-06, "loss": 0.0262, "step": 234210 }, { "epoch": 0.9772721582895911, "grad_norm": 0.6312017682842369, "learning_rate": 2.0232874470908643e-06, "loss": 0.0223, "step": 234215 }, { "epoch": 0.9772930210045815, "grad_norm": 0.7209250375421591, "learning_rate": 2.0232658508684694e-06, "loss": 0.0209, "step": 234220 }, { "epoch": 0.9773138837195717, "grad_norm": 0.23877945549903368, "learning_rate": 2.0232442553376024e-06, "loss": 0.0221, "step": 234225 }, { "epoch": 0.977334746434562, "grad_norm": 0.39219870368448084, "learning_rate": 2.0232226604982273e-06, "loss": 0.0151, "step": 234230 }, { "epoch": 0.9773556091495523, "grad_norm": 1.8504176149529372, "learning_rate": 2.0232010663503064e-06, "loss": 0.0228, "step": 234235 }, { "epoch": 0.9773764718645426, "grad_norm": 0.43626208675045514, "learning_rate": 2.0231794728938036e-06, "loss": 0.0181, "step": 234240 }, { "epoch": 0.9773973345795328, "grad_norm": 0.36148124748543464, "learning_rate": 2.023157880128681e-06, "loss": 0.021, "step": 234245 }, { "epoch": 0.9774181972945232, "grad_norm": 0.48824335247941586, "learning_rate": 2.0231362880549022e-06, "loss": 0.0144, "step": 234250 }, { "epoch": 0.9774390600095134, "grad_norm": 0.8413632164660868, "learning_rate": 2.0231146966724303e-06, "loss": 0.018, "step": 234255 }, { "epoch": 0.9774599227245037, "grad_norm": 0.27262085575986666, "learning_rate": 2.023093105981229e-06, "loss": 0.017, "step": 234260 }, { "epoch": 0.9774807854394939, "grad_norm": 0.36991323941742854, "learning_rate": 2.023071515981261e-06, "loss": 0.0215, "step": 234265 }, { "epoch": 0.9775016481544843, "grad_norm": 1.22247987836905, "learning_rate": 2.0230499266724887e-06, "loss": 0.0187, "step": 234270 }, { "epoch": 0.9775225108694745, "grad_norm": 0.7778113288961784, "learning_rate": 2.023028338054876e-06, "loss": 0.0146, "step": 234275 }, { "epoch": 0.9775433735844647, "grad_norm": 0.822326077012086, "learning_rate": 2.023006750128386e-06, "loss": 0.0199, "step": 234280 }, { "epoch": 0.9775642362994551, "grad_norm": 0.4783224527615552, "learning_rate": 2.0229851628929812e-06, "loss": 0.0147, "step": 234285 }, { "epoch": 0.9775850990144453, "grad_norm": 0.47693951394460177, "learning_rate": 2.0229635763486256e-06, "loss": 0.0182, "step": 234290 }, { "epoch": 0.9776059617294356, "grad_norm": 0.4495766602023821, "learning_rate": 2.0229419904952816e-06, "loss": 0.0207, "step": 234295 }, { "epoch": 0.977626824444426, "grad_norm": 0.7193021046514616, "learning_rate": 2.022920405332913e-06, "loss": 0.017, "step": 234300 }, { "epoch": 0.9776476871594162, "grad_norm": 0.6498639071824852, "learning_rate": 2.0228988208614826e-06, "loss": 0.016, "step": 234305 }, { "epoch": 0.9776685498744064, "grad_norm": 1.0510034954361551, "learning_rate": 2.022877237080953e-06, "loss": 0.0351, "step": 234310 }, { "epoch": 0.9776894125893968, "grad_norm": 0.7067886506595212, "learning_rate": 2.0228556539912887e-06, "loss": 0.018, "step": 234315 }, { "epoch": 0.977710275304387, "grad_norm": 0.7673200970800119, "learning_rate": 2.0228340715924512e-06, "loss": 0.0144, "step": 234320 }, { "epoch": 0.9777311380193773, "grad_norm": 0.6626227568150738, "learning_rate": 2.022812489884405e-06, "loss": 0.0218, "step": 234325 }, { "epoch": 0.9777520007343675, "grad_norm": 0.9350367668633142, "learning_rate": 2.0227909088671123e-06, "loss": 0.0185, "step": 234330 }, { "epoch": 0.9777728634493579, "grad_norm": 0.5290760582622828, "learning_rate": 2.022769328540537e-06, "loss": 0.0227, "step": 234335 }, { "epoch": 0.9777937261643481, "grad_norm": 0.8235261141585802, "learning_rate": 2.022747748904642e-06, "loss": 0.0162, "step": 234340 }, { "epoch": 0.9778145888793384, "grad_norm": 0.7529439946210695, "learning_rate": 2.02272616995939e-06, "loss": 0.0161, "step": 234345 }, { "epoch": 0.9778354515943287, "grad_norm": 1.097144952148543, "learning_rate": 2.0227045917047445e-06, "loss": 0.0174, "step": 234350 }, { "epoch": 0.977856314309319, "grad_norm": 1.1053153154212083, "learning_rate": 2.022683014140669e-06, "loss": 0.0246, "step": 234355 }, { "epoch": 0.9778771770243092, "grad_norm": 0.5818342541827244, "learning_rate": 2.022661437267126e-06, "loss": 0.022, "step": 234360 }, { "epoch": 0.9778980397392996, "grad_norm": 0.6392557865015555, "learning_rate": 2.02263986108408e-06, "loss": 0.0146, "step": 234365 }, { "epoch": 0.9779189024542898, "grad_norm": 0.4145591467035374, "learning_rate": 2.022618285591493e-06, "loss": 0.0111, "step": 234370 }, { "epoch": 0.97793976516928, "grad_norm": 0.8835678460240567, "learning_rate": 2.0225967107893276e-06, "loss": 0.0253, "step": 234375 }, { "epoch": 0.9779606278842703, "grad_norm": 0.9845181728055742, "learning_rate": 2.022575136677548e-06, "loss": 0.0166, "step": 234380 }, { "epoch": 0.9779814905992606, "grad_norm": 1.241718967184783, "learning_rate": 2.0225535632561175e-06, "loss": 0.0152, "step": 234385 }, { "epoch": 0.9780023533142509, "grad_norm": 0.4468444950263727, "learning_rate": 2.0225319905249985e-06, "loss": 0.016, "step": 234390 }, { "epoch": 0.9780232160292411, "grad_norm": 0.8662139209544678, "learning_rate": 2.0225104184841547e-06, "loss": 0.0192, "step": 234395 }, { "epoch": 0.9780440787442315, "grad_norm": 0.27847279851066636, "learning_rate": 2.0224888471335496e-06, "loss": 0.014, "step": 234400 }, { "epoch": 0.9780649414592217, "grad_norm": 0.29333201742526444, "learning_rate": 2.022467276473146e-06, "loss": 0.0159, "step": 234405 }, { "epoch": 0.978085804174212, "grad_norm": 0.74985294340676, "learning_rate": 2.0224457065029066e-06, "loss": 0.0183, "step": 234410 }, { "epoch": 0.9781066668892023, "grad_norm": 0.9097305248238687, "learning_rate": 2.022424137222795e-06, "loss": 0.0207, "step": 234415 }, { "epoch": 0.9781275296041926, "grad_norm": 1.1896395447540846, "learning_rate": 2.0224025686327754e-06, "loss": 0.0266, "step": 234420 }, { "epoch": 0.9781483923191828, "grad_norm": 0.8152196540616197, "learning_rate": 2.0223810007328093e-06, "loss": 0.0243, "step": 234425 }, { "epoch": 0.9781692550341732, "grad_norm": 0.7960069270297336, "learning_rate": 2.022359433522861e-06, "loss": 0.0256, "step": 234430 }, { "epoch": 0.9781901177491634, "grad_norm": 0.4126299580104735, "learning_rate": 2.022337867002893e-06, "loss": 0.0143, "step": 234435 }, { "epoch": 0.9782109804641537, "grad_norm": 0.6125888932805644, "learning_rate": 2.0223163011728695e-06, "loss": 0.0166, "step": 234440 }, { "epoch": 0.9782318431791439, "grad_norm": 0.6662554499536912, "learning_rate": 2.022294736032753e-06, "loss": 0.0218, "step": 234445 }, { "epoch": 0.9782527058941343, "grad_norm": 0.8397155590175898, "learning_rate": 2.0222731715825065e-06, "loss": 0.0227, "step": 234450 }, { "epoch": 0.9782735686091245, "grad_norm": 0.3433274614754317, "learning_rate": 2.0222516078220938e-06, "loss": 0.0182, "step": 234455 }, { "epoch": 0.9782944313241148, "grad_norm": 0.49351653080083396, "learning_rate": 2.022230044751478e-06, "loss": 0.0197, "step": 234460 }, { "epoch": 0.9783152940391051, "grad_norm": 0.7457597524768249, "learning_rate": 2.022208482370622e-06, "loss": 0.0247, "step": 234465 }, { "epoch": 0.9783361567540954, "grad_norm": 0.5406673368149704, "learning_rate": 2.0221869206794893e-06, "loss": 0.0222, "step": 234470 }, { "epoch": 0.9783570194690856, "grad_norm": 2.657407551607464, "learning_rate": 2.022165359678043e-06, "loss": 0.0216, "step": 234475 }, { "epoch": 0.978377882184076, "grad_norm": 0.548264233740261, "learning_rate": 2.0221437993662467e-06, "loss": 0.0234, "step": 234480 }, { "epoch": 0.9783987448990662, "grad_norm": 0.5450721288197458, "learning_rate": 2.0221222397440634e-06, "loss": 0.0243, "step": 234485 }, { "epoch": 0.9784196076140564, "grad_norm": 0.7703462490986515, "learning_rate": 2.022100680811456e-06, "loss": 0.0203, "step": 234490 }, { "epoch": 0.9784404703290467, "grad_norm": 0.8512740015001804, "learning_rate": 2.022079122568388e-06, "loss": 0.0256, "step": 234495 }, { "epoch": 0.978461333044037, "grad_norm": 0.8500288035223735, "learning_rate": 2.0220575650148224e-06, "loss": 0.0197, "step": 234500 }, { "epoch": 0.9784821957590273, "grad_norm": 0.7598747748247513, "learning_rate": 2.0220360081507236e-06, "loss": 0.0161, "step": 234505 }, { "epoch": 0.9785030584740175, "grad_norm": 0.461513008120317, "learning_rate": 2.022014451976053e-06, "loss": 0.0124, "step": 234510 }, { "epoch": 0.9785239211890079, "grad_norm": 0.4382322543856884, "learning_rate": 2.021992896490775e-06, "loss": 0.0237, "step": 234515 }, { "epoch": 0.9785447839039981, "grad_norm": 0.6542233944431508, "learning_rate": 2.021971341694853e-06, "loss": 0.0197, "step": 234520 }, { "epoch": 0.9785656466189884, "grad_norm": 0.4404019440370119, "learning_rate": 2.02194978758825e-06, "loss": 0.0179, "step": 234525 }, { "epoch": 0.9785865093339787, "grad_norm": 0.8218894237070189, "learning_rate": 2.0219282341709285e-06, "loss": 0.0199, "step": 234530 }, { "epoch": 0.978607372048969, "grad_norm": 0.627435427618259, "learning_rate": 2.0219066814428535e-06, "loss": 0.0175, "step": 234535 }, { "epoch": 0.9786282347639592, "grad_norm": 0.3976310807988024, "learning_rate": 2.0218851294039862e-06, "loss": 0.0208, "step": 234540 }, { "epoch": 0.9786490974789496, "grad_norm": 0.687373533869337, "learning_rate": 2.0218635780542913e-06, "loss": 0.0204, "step": 234545 }, { "epoch": 0.9786699601939398, "grad_norm": 0.8059407561422837, "learning_rate": 2.0218420273937318e-06, "loss": 0.0207, "step": 234550 }, { "epoch": 0.9786908229089301, "grad_norm": 0.5247008811040177, "learning_rate": 2.0218204774222704e-06, "loss": 0.0171, "step": 234555 }, { "epoch": 0.9787116856239203, "grad_norm": 0.9669528650738664, "learning_rate": 2.0217989281398716e-06, "loss": 0.0164, "step": 234560 }, { "epoch": 0.9787325483389107, "grad_norm": 0.49900490037366807, "learning_rate": 2.0217773795464973e-06, "loss": 0.0189, "step": 234565 }, { "epoch": 0.9787534110539009, "grad_norm": 0.4399017714649994, "learning_rate": 2.0217558316421114e-06, "loss": 0.0142, "step": 234570 }, { "epoch": 0.9787742737688911, "grad_norm": 0.6715143608213262, "learning_rate": 2.0217342844266767e-06, "loss": 0.0133, "step": 234575 }, { "epoch": 0.9787951364838815, "grad_norm": 0.8015215331134669, "learning_rate": 2.0217127379001576e-06, "loss": 0.0185, "step": 234580 }, { "epoch": 0.9788159991988717, "grad_norm": 0.5748889882714499, "learning_rate": 2.021691192062517e-06, "loss": 0.0169, "step": 234585 }, { "epoch": 0.978836861913862, "grad_norm": 0.29969154947470406, "learning_rate": 2.021669646913717e-06, "loss": 0.0218, "step": 234590 }, { "epoch": 0.9788577246288523, "grad_norm": 0.8644313300675379, "learning_rate": 2.0216481024537226e-06, "loss": 0.021, "step": 234595 }, { "epoch": 0.9788785873438426, "grad_norm": 0.8104353820112056, "learning_rate": 2.0216265586824958e-06, "loss": 0.0165, "step": 234600 }, { "epoch": 0.9788994500588328, "grad_norm": 0.45863672535734185, "learning_rate": 2.0216050156000004e-06, "loss": 0.0178, "step": 234605 }, { "epoch": 0.9789203127738232, "grad_norm": 0.8294317942200327, "learning_rate": 2.0215834732062e-06, "loss": 0.0253, "step": 234610 }, { "epoch": 0.9789411754888134, "grad_norm": 0.7791638870757465, "learning_rate": 2.021561931501057e-06, "loss": 0.0171, "step": 234615 }, { "epoch": 0.9789620382038037, "grad_norm": 0.7981281968449454, "learning_rate": 2.021540390484536e-06, "loss": 0.0224, "step": 234620 }, { "epoch": 0.9789829009187939, "grad_norm": 0.7193912049284744, "learning_rate": 2.0215188501566e-06, "loss": 0.0234, "step": 234625 }, { "epoch": 0.9790037636337843, "grad_norm": 1.1084533717900122, "learning_rate": 2.0214973105172113e-06, "loss": 0.0278, "step": 234630 }, { "epoch": 0.9790246263487745, "grad_norm": 1.6253955972738672, "learning_rate": 2.021475771566334e-06, "loss": 0.0215, "step": 234635 }, { "epoch": 0.9790454890637648, "grad_norm": 0.6921185309174612, "learning_rate": 2.0214542333039315e-06, "loss": 0.0188, "step": 234640 }, { "epoch": 0.9790663517787551, "grad_norm": 0.7507919537480665, "learning_rate": 2.021432695729967e-06, "loss": 0.0227, "step": 234645 }, { "epoch": 0.9790872144937454, "grad_norm": 0.4055727455083179, "learning_rate": 2.0214111588444036e-06, "loss": 0.0168, "step": 234650 }, { "epoch": 0.9791080772087356, "grad_norm": 0.48757404418773115, "learning_rate": 2.021389622647205e-06, "loss": 0.027, "step": 234655 }, { "epoch": 0.979128939923726, "grad_norm": 1.1064245819193155, "learning_rate": 2.021368087138334e-06, "loss": 0.0201, "step": 234660 }, { "epoch": 0.9791498026387162, "grad_norm": 1.0255674182118906, "learning_rate": 2.021346552317754e-06, "loss": 0.0239, "step": 234665 }, { "epoch": 0.9791706653537064, "grad_norm": 0.7427203294898799, "learning_rate": 2.021325018185429e-06, "loss": 0.02, "step": 234670 }, { "epoch": 0.9791915280686967, "grad_norm": 0.4059549429283203, "learning_rate": 2.0213034847413215e-06, "loss": 0.017, "step": 234675 }, { "epoch": 0.979212390783687, "grad_norm": 0.44062993115178617, "learning_rate": 2.021281951985396e-06, "loss": 0.0206, "step": 234680 }, { "epoch": 0.9792332534986773, "grad_norm": 0.43083910308912554, "learning_rate": 2.0212604199176146e-06, "loss": 0.02, "step": 234685 }, { "epoch": 0.9792541162136675, "grad_norm": 0.39200096587448324, "learning_rate": 2.021238888537941e-06, "loss": 0.0197, "step": 234690 }, { "epoch": 0.9792749789286579, "grad_norm": 0.36254224238221566, "learning_rate": 2.021217357846339e-06, "loss": 0.0201, "step": 234695 }, { "epoch": 0.9792958416436481, "grad_norm": 0.6816929675634273, "learning_rate": 2.021195827842772e-06, "loss": 0.022, "step": 234700 }, { "epoch": 0.9793167043586384, "grad_norm": 0.47926996173553255, "learning_rate": 2.021174298527202e-06, "loss": 0.0177, "step": 234705 }, { "epoch": 0.9793375670736287, "grad_norm": 0.5577692601102507, "learning_rate": 2.021152769899594e-06, "loss": 0.0204, "step": 234710 }, { "epoch": 0.979358429788619, "grad_norm": 1.0721160078371605, "learning_rate": 2.0211312419599113e-06, "loss": 0.0289, "step": 234715 }, { "epoch": 0.9793792925036092, "grad_norm": 0.5299893028185764, "learning_rate": 2.0211097147081156e-06, "loss": 0.0188, "step": 234720 }, { "epoch": 0.9794001552185996, "grad_norm": 0.7220161348679384, "learning_rate": 2.021088188144172e-06, "loss": 0.0241, "step": 234725 }, { "epoch": 0.9794210179335898, "grad_norm": 0.7657518228591198, "learning_rate": 2.021066662268043e-06, "loss": 0.0191, "step": 234730 }, { "epoch": 0.9794418806485801, "grad_norm": 0.6975579870298639, "learning_rate": 2.021045137079693e-06, "loss": 0.0219, "step": 234735 }, { "epoch": 0.9794627433635703, "grad_norm": 0.49564786464081845, "learning_rate": 2.0210236125790837e-06, "loss": 0.017, "step": 234740 }, { "epoch": 0.9794836060785607, "grad_norm": 0.4807333650142781, "learning_rate": 2.0210020887661796e-06, "loss": 0.0247, "step": 234745 }, { "epoch": 0.9795044687935509, "grad_norm": 0.7018609099025672, "learning_rate": 2.020980565640944e-06, "loss": 0.0172, "step": 234750 }, { "epoch": 0.9795253315085412, "grad_norm": 0.8053377532747523, "learning_rate": 2.02095904320334e-06, "loss": 0.0161, "step": 234755 }, { "epoch": 0.9795461942235315, "grad_norm": 0.40589068801017564, "learning_rate": 2.020937521453331e-06, "loss": 0.0158, "step": 234760 }, { "epoch": 0.9795670569385218, "grad_norm": 0.4577639741877636, "learning_rate": 2.0209160003908803e-06, "loss": 0.0188, "step": 234765 }, { "epoch": 0.979587919653512, "grad_norm": 0.44331272843750535, "learning_rate": 2.020894480015952e-06, "loss": 0.0135, "step": 234770 }, { "epoch": 0.9796087823685024, "grad_norm": 1.1715242907422563, "learning_rate": 2.020872960328509e-06, "loss": 0.0234, "step": 234775 }, { "epoch": 0.9796296450834926, "grad_norm": 0.3822314213710891, "learning_rate": 2.0208514413285145e-06, "loss": 0.0133, "step": 234780 }, { "epoch": 0.9796505077984828, "grad_norm": 0.47315808277252813, "learning_rate": 2.020829923015932e-06, "loss": 0.0169, "step": 234785 }, { "epoch": 0.9796713705134732, "grad_norm": 0.2691252592603574, "learning_rate": 2.020808405390725e-06, "loss": 0.0201, "step": 234790 }, { "epoch": 0.9796922332284634, "grad_norm": 0.5566126924732776, "learning_rate": 2.020786888452857e-06, "loss": 0.0201, "step": 234795 }, { "epoch": 0.9797130959434537, "grad_norm": 0.6082872957871115, "learning_rate": 2.020765372202291e-06, "loss": 0.0336, "step": 234800 }, { "epoch": 0.9797339586584439, "grad_norm": 0.49193804565189014, "learning_rate": 2.0207438566389913e-06, "loss": 0.0176, "step": 234805 }, { "epoch": 0.9797548213734343, "grad_norm": 0.643531367051059, "learning_rate": 2.02072234176292e-06, "loss": 0.0273, "step": 234810 }, { "epoch": 0.9797756840884245, "grad_norm": 0.8673024468758461, "learning_rate": 2.020700827574042e-06, "loss": 0.0188, "step": 234815 }, { "epoch": 0.9797965468034148, "grad_norm": 0.6283537380292656, "learning_rate": 2.020679314072319e-06, "loss": 0.0224, "step": 234820 }, { "epoch": 0.9798174095184051, "grad_norm": 0.5988694518902677, "learning_rate": 2.0206578012577162e-06, "loss": 0.0241, "step": 234825 }, { "epoch": 0.9798382722333954, "grad_norm": 0.5631730518861409, "learning_rate": 2.0206362891301957e-06, "loss": 0.02, "step": 234830 }, { "epoch": 0.9798591349483856, "grad_norm": 0.7930971537429395, "learning_rate": 2.020614777689722e-06, "loss": 0.0311, "step": 234835 }, { "epoch": 0.979879997663376, "grad_norm": 0.9230324744897437, "learning_rate": 2.020593266936258e-06, "loss": 0.029, "step": 234840 }, { "epoch": 0.9799008603783662, "grad_norm": 0.6356115226818623, "learning_rate": 2.0205717568697667e-06, "loss": 0.0241, "step": 234845 }, { "epoch": 0.9799217230933565, "grad_norm": 0.7999600552800041, "learning_rate": 2.020550247490212e-06, "loss": 0.0204, "step": 234850 }, { "epoch": 0.9799425858083467, "grad_norm": 0.8330076798992008, "learning_rate": 2.020528738797557e-06, "loss": 0.0187, "step": 234855 }, { "epoch": 0.979963448523337, "grad_norm": 1.078865251310752, "learning_rate": 2.0205072307917655e-06, "loss": 0.0203, "step": 234860 }, { "epoch": 0.9799843112383273, "grad_norm": 0.3672071030323738, "learning_rate": 2.020485723472801e-06, "loss": 0.0177, "step": 234865 }, { "epoch": 0.9800051739533175, "grad_norm": 0.8008982232841094, "learning_rate": 2.020464216840627e-06, "loss": 0.0379, "step": 234870 }, { "epoch": 0.9800260366683079, "grad_norm": 0.7013997165086518, "learning_rate": 2.020442710895207e-06, "loss": 0.021, "step": 234875 }, { "epoch": 0.9800468993832981, "grad_norm": 0.49264916196136094, "learning_rate": 2.0204212056365032e-06, "loss": 0.0147, "step": 234880 }, { "epoch": 0.9800677620982884, "grad_norm": 0.5673515080466683, "learning_rate": 2.020399701064481e-06, "loss": 0.0226, "step": 234885 }, { "epoch": 0.9800886248132787, "grad_norm": 1.078276420743979, "learning_rate": 2.0203781971791023e-06, "loss": 0.0228, "step": 234890 }, { "epoch": 0.980109487528269, "grad_norm": 0.9396966181824759, "learning_rate": 2.0203566939803315e-06, "loss": 0.0189, "step": 234895 }, { "epoch": 0.9801303502432592, "grad_norm": 0.7567382334021967, "learning_rate": 2.020335191468132e-06, "loss": 0.0184, "step": 234900 }, { "epoch": 0.9801512129582496, "grad_norm": 0.5315434652813859, "learning_rate": 2.020313689642466e-06, "loss": 0.0123, "step": 234905 }, { "epoch": 0.9801720756732398, "grad_norm": 0.45022696316699884, "learning_rate": 2.020292188503299e-06, "loss": 0.0214, "step": 234910 }, { "epoch": 0.9801929383882301, "grad_norm": 0.3654034346979556, "learning_rate": 2.0202706880505928e-06, "loss": 0.0134, "step": 234915 }, { "epoch": 0.9802138011032203, "grad_norm": 0.39133078513513087, "learning_rate": 2.020249188284312e-06, "loss": 0.022, "step": 234920 }, { "epoch": 0.9802346638182107, "grad_norm": 0.8822794074462604, "learning_rate": 2.0202276892044192e-06, "loss": 0.0219, "step": 234925 }, { "epoch": 0.9802555265332009, "grad_norm": 0.7405625785096598, "learning_rate": 2.020206190810878e-06, "loss": 0.0229, "step": 234930 }, { "epoch": 0.9802763892481912, "grad_norm": 0.376316259973896, "learning_rate": 2.020184693103653e-06, "loss": 0.0153, "step": 234935 }, { "epoch": 0.9802972519631815, "grad_norm": 0.40694867228093734, "learning_rate": 2.020163196082706e-06, "loss": 0.0145, "step": 234940 }, { "epoch": 0.9803181146781718, "grad_norm": 0.3151149727470075, "learning_rate": 2.0201416997480018e-06, "loss": 0.0164, "step": 234945 }, { "epoch": 0.980338977393162, "grad_norm": 0.40449227385718356, "learning_rate": 2.0201202040995035e-06, "loss": 0.0256, "step": 234950 }, { "epoch": 0.9803598401081524, "grad_norm": 1.1165284368125694, "learning_rate": 2.0200987091371743e-06, "loss": 0.0264, "step": 234955 }, { "epoch": 0.9803807028231426, "grad_norm": 0.5526719865177266, "learning_rate": 2.0200772148609777e-06, "loss": 0.0168, "step": 234960 }, { "epoch": 0.9804015655381328, "grad_norm": 0.5416861882101668, "learning_rate": 2.0200557212708775e-06, "loss": 0.021, "step": 234965 }, { "epoch": 0.9804224282531232, "grad_norm": 0.6179229186924529, "learning_rate": 2.0200342283668375e-06, "loss": 0.0143, "step": 234970 }, { "epoch": 0.9804432909681134, "grad_norm": 0.481061720378384, "learning_rate": 2.0200127361488205e-06, "loss": 0.0272, "step": 234975 }, { "epoch": 0.9804641536831037, "grad_norm": 0.5387643650513938, "learning_rate": 2.01999124461679e-06, "loss": 0.0141, "step": 234980 }, { "epoch": 0.9804850163980939, "grad_norm": 1.185854228225988, "learning_rate": 2.0199697537707106e-06, "loss": 0.0204, "step": 234985 }, { "epoch": 0.9805058791130843, "grad_norm": 0.8314219435869753, "learning_rate": 2.0199482636105445e-06, "loss": 0.0185, "step": 234990 }, { "epoch": 0.9805267418280745, "grad_norm": 0.3954479947291179, "learning_rate": 2.0199267741362557e-06, "loss": 0.0143, "step": 234995 }, { "epoch": 0.9805476045430648, "grad_norm": 0.8838856902273158, "learning_rate": 2.0199052853478073e-06, "loss": 0.0228, "step": 235000 }, { "epoch": 0.9805684672580551, "grad_norm": 0.48270314146619264, "learning_rate": 2.0198837972451643e-06, "loss": 0.0171, "step": 235005 }, { "epoch": 0.9805893299730454, "grad_norm": 0.43397207267730803, "learning_rate": 2.0198623098282884e-06, "loss": 0.0114, "step": 235010 }, { "epoch": 0.9806101926880356, "grad_norm": 0.48200615098328037, "learning_rate": 2.019840823097144e-06, "loss": 0.0114, "step": 235015 }, { "epoch": 0.980631055403026, "grad_norm": 0.918245845988269, "learning_rate": 2.019819337051695e-06, "loss": 0.015, "step": 235020 }, { "epoch": 0.9806519181180162, "grad_norm": 0.5644971205722527, "learning_rate": 2.0197978516919044e-06, "loss": 0.0179, "step": 235025 }, { "epoch": 0.9806727808330065, "grad_norm": 0.6839910229100005, "learning_rate": 2.0197763670177352e-06, "loss": 0.0177, "step": 235030 }, { "epoch": 0.9806936435479967, "grad_norm": 0.8961715089080706, "learning_rate": 2.0197548830291524e-06, "loss": 0.0232, "step": 235035 }, { "epoch": 0.9807145062629871, "grad_norm": 0.8191193724281298, "learning_rate": 2.0197333997261178e-06, "loss": 0.0196, "step": 235040 }, { "epoch": 0.9807353689779773, "grad_norm": 0.9497400988552626, "learning_rate": 2.0197119171085962e-06, "loss": 0.0263, "step": 235045 }, { "epoch": 0.9807562316929676, "grad_norm": 0.48852939184587224, "learning_rate": 2.0196904351765514e-06, "loss": 0.0163, "step": 235050 }, { "epoch": 0.9807770944079579, "grad_norm": 0.735438187356461, "learning_rate": 2.019668953929945e-06, "loss": 0.021, "step": 235055 }, { "epoch": 0.9807979571229481, "grad_norm": 1.1224743121187102, "learning_rate": 2.0196474733687434e-06, "loss": 0.0289, "step": 235060 }, { "epoch": 0.9808188198379384, "grad_norm": 0.8301969785569231, "learning_rate": 2.019625993492908e-06, "loss": 0.0189, "step": 235065 }, { "epoch": 0.9808396825529287, "grad_norm": 0.6376496778755624, "learning_rate": 2.019604514302403e-06, "loss": 0.0144, "step": 235070 }, { "epoch": 0.980860545267919, "grad_norm": 0.45356377959188693, "learning_rate": 2.019583035797191e-06, "loss": 0.0151, "step": 235075 }, { "epoch": 0.9808814079829092, "grad_norm": 0.6007493927042843, "learning_rate": 2.0195615579772375e-06, "loss": 0.0163, "step": 235080 }, { "epoch": 0.9809022706978996, "grad_norm": 1.3690743039417608, "learning_rate": 2.0195400808425047e-06, "loss": 0.0207, "step": 235085 }, { "epoch": 0.9809231334128898, "grad_norm": 0.5259417088741258, "learning_rate": 2.0195186043929564e-06, "loss": 0.0208, "step": 235090 }, { "epoch": 0.9809439961278801, "grad_norm": 1.0678797277745504, "learning_rate": 2.0194971286285565e-06, "loss": 0.0235, "step": 235095 }, { "epoch": 0.9809648588428703, "grad_norm": 0.9101325643194037, "learning_rate": 2.0194756535492687e-06, "loss": 0.024, "step": 235100 }, { "epoch": 0.9809857215578607, "grad_norm": 0.7605035195333069, "learning_rate": 2.019454179155056e-06, "loss": 0.0229, "step": 235105 }, { "epoch": 0.9810065842728509, "grad_norm": 0.5221676449026057, "learning_rate": 2.0194327054458816e-06, "loss": 0.0206, "step": 235110 }, { "epoch": 0.9810274469878412, "grad_norm": 0.4271006478075028, "learning_rate": 2.0194112324217104e-06, "loss": 0.0292, "step": 235115 }, { "epoch": 0.9810483097028315, "grad_norm": 1.481525255999146, "learning_rate": 2.019389760082505e-06, "loss": 0.0168, "step": 235120 }, { "epoch": 0.9810691724178218, "grad_norm": 0.4917120482770394, "learning_rate": 2.0193682884282296e-06, "loss": 0.0168, "step": 235125 }, { "epoch": 0.981090035132812, "grad_norm": 0.41380935983042455, "learning_rate": 2.019346817458847e-06, "loss": 0.0186, "step": 235130 }, { "epoch": 0.9811108978478024, "grad_norm": 0.6306845556419851, "learning_rate": 2.019325347174321e-06, "loss": 0.0199, "step": 235135 }, { "epoch": 0.9811317605627926, "grad_norm": 0.3186506815596846, "learning_rate": 2.019303877574616e-06, "loss": 0.0248, "step": 235140 }, { "epoch": 0.9811526232777829, "grad_norm": 0.4972074142261509, "learning_rate": 2.0192824086596947e-06, "loss": 0.0173, "step": 235145 }, { "epoch": 0.9811734859927732, "grad_norm": 0.8032291856075637, "learning_rate": 2.019260940429521e-06, "loss": 0.0216, "step": 235150 }, { "epoch": 0.9811943487077635, "grad_norm": 0.41930481281383297, "learning_rate": 2.019239472884059e-06, "loss": 0.0294, "step": 235155 }, { "epoch": 0.9812152114227537, "grad_norm": 0.8455917418940311, "learning_rate": 2.0192180060232713e-06, "loss": 0.0202, "step": 235160 }, { "epoch": 0.9812360741377439, "grad_norm": 0.3458020261206027, "learning_rate": 2.0191965398471226e-06, "loss": 0.0172, "step": 235165 }, { "epoch": 0.9812569368527343, "grad_norm": 0.6972813815580975, "learning_rate": 2.0191750743555756e-06, "loss": 0.0244, "step": 235170 }, { "epoch": 0.9812777995677245, "grad_norm": 0.5631251955462018, "learning_rate": 2.0191536095485943e-06, "loss": 0.0255, "step": 235175 }, { "epoch": 0.9812986622827148, "grad_norm": 1.2959748398343676, "learning_rate": 2.019132145426142e-06, "loss": 0.0144, "step": 235180 }, { "epoch": 0.9813195249977051, "grad_norm": 0.4601478173064577, "learning_rate": 2.0191106819881825e-06, "loss": 0.0213, "step": 235185 }, { "epoch": 0.9813403877126954, "grad_norm": 0.8164190812435501, "learning_rate": 2.0190892192346805e-06, "loss": 0.02, "step": 235190 }, { "epoch": 0.9813612504276856, "grad_norm": 0.4781922437371403, "learning_rate": 2.019067757165597e-06, "loss": 0.0238, "step": 235195 }, { "epoch": 0.981382113142676, "grad_norm": 0.5737297494577466, "learning_rate": 2.0190462957808983e-06, "loss": 0.0203, "step": 235200 }, { "epoch": 0.9814029758576662, "grad_norm": 0.6420124173894188, "learning_rate": 2.019024835080547e-06, "loss": 0.0199, "step": 235205 }, { "epoch": 0.9814238385726565, "grad_norm": 0.8492891954408203, "learning_rate": 2.0190033750645068e-06, "loss": 0.0192, "step": 235210 }, { "epoch": 0.9814447012876467, "grad_norm": 0.6902381240172073, "learning_rate": 2.0189819157327405e-06, "loss": 0.025, "step": 235215 }, { "epoch": 0.9814655640026371, "grad_norm": 0.7946841870838665, "learning_rate": 2.0189604570852133e-06, "loss": 0.0241, "step": 235220 }, { "epoch": 0.9814864267176273, "grad_norm": 1.1487917692134693, "learning_rate": 2.0189389991218876e-06, "loss": 0.0187, "step": 235225 }, { "epoch": 0.9815072894326176, "grad_norm": 1.0800946762506123, "learning_rate": 2.0189175418427277e-06, "loss": 0.0227, "step": 235230 }, { "epoch": 0.9815281521476079, "grad_norm": 0.3349087404023316, "learning_rate": 2.0188960852476965e-06, "loss": 0.0159, "step": 235235 }, { "epoch": 0.9815490148625982, "grad_norm": 1.0121225587732985, "learning_rate": 2.0188746293367587e-06, "loss": 0.0187, "step": 235240 }, { "epoch": 0.9815698775775884, "grad_norm": 0.5229677198537466, "learning_rate": 2.0188531741098767e-06, "loss": 0.0225, "step": 235245 }, { "epoch": 0.9815907402925788, "grad_norm": 0.5100001603782355, "learning_rate": 2.0188317195670157e-06, "loss": 0.0185, "step": 235250 }, { "epoch": 0.981611603007569, "grad_norm": 0.5751101916921423, "learning_rate": 2.018810265708138e-06, "loss": 0.0161, "step": 235255 }, { "epoch": 0.9816324657225592, "grad_norm": 0.7419822952755631, "learning_rate": 2.0187888125332082e-06, "loss": 0.0184, "step": 235260 }, { "epoch": 0.9816533284375496, "grad_norm": 0.46554695529713097, "learning_rate": 2.018767360042189e-06, "loss": 0.0184, "step": 235265 }, { "epoch": 0.9816741911525398, "grad_norm": 0.6499088176505795, "learning_rate": 2.0187459082350448e-06, "loss": 0.0193, "step": 235270 }, { "epoch": 0.9816950538675301, "grad_norm": 0.4932008811401549, "learning_rate": 2.018724457111739e-06, "loss": 0.018, "step": 235275 }, { "epoch": 0.9817159165825203, "grad_norm": 0.7492478183968717, "learning_rate": 2.0187030066722358e-06, "loss": 0.0201, "step": 235280 }, { "epoch": 0.9817367792975107, "grad_norm": 1.267682773156997, "learning_rate": 2.0186815569164975e-06, "loss": 0.0243, "step": 235285 }, { "epoch": 0.9817576420125009, "grad_norm": 0.5111912271322919, "learning_rate": 2.0186601078444894e-06, "loss": 0.0172, "step": 235290 }, { "epoch": 0.9817785047274912, "grad_norm": 0.6808804671108215, "learning_rate": 2.018638659456174e-06, "loss": 0.0175, "step": 235295 }, { "epoch": 0.9817993674424815, "grad_norm": 0.9643865430379994, "learning_rate": 2.018617211751516e-06, "loss": 0.0234, "step": 235300 }, { "epoch": 0.9818202301574718, "grad_norm": 1.0691154814702404, "learning_rate": 2.0185957647304777e-06, "loss": 0.0249, "step": 235305 }, { "epoch": 0.981841092872462, "grad_norm": 0.43249329052228935, "learning_rate": 2.0185743183930244e-06, "loss": 0.0192, "step": 235310 }, { "epoch": 0.9818619555874524, "grad_norm": 0.6414486378810874, "learning_rate": 2.0185528727391186e-06, "loss": 0.0156, "step": 235315 }, { "epoch": 0.9818828183024426, "grad_norm": 0.3871651127058901, "learning_rate": 2.018531427768724e-06, "loss": 0.0204, "step": 235320 }, { "epoch": 0.9819036810174329, "grad_norm": 0.6478522796764059, "learning_rate": 2.0185099834818048e-06, "loss": 0.018, "step": 235325 }, { "epoch": 0.9819245437324232, "grad_norm": 1.1656637434107644, "learning_rate": 2.0184885398783247e-06, "loss": 0.0176, "step": 235330 }, { "epoch": 0.9819454064474135, "grad_norm": 0.5553115031890883, "learning_rate": 2.018467096958248e-06, "loss": 0.0128, "step": 235335 }, { "epoch": 0.9819662691624037, "grad_norm": 0.5719105579684695, "learning_rate": 2.018445654721537e-06, "loss": 0.0145, "step": 235340 }, { "epoch": 0.981987131877394, "grad_norm": 0.5115233346573075, "learning_rate": 2.0184242131681556e-06, "loss": 0.0204, "step": 235345 }, { "epoch": 0.9820079945923843, "grad_norm": 0.4298543004825906, "learning_rate": 2.0184027722980683e-06, "loss": 0.021, "step": 235350 }, { "epoch": 0.9820288573073745, "grad_norm": 0.571140348403299, "learning_rate": 2.0183813321112388e-06, "loss": 0.0181, "step": 235355 }, { "epoch": 0.9820497200223648, "grad_norm": 0.6329611529555912, "learning_rate": 2.01835989260763e-06, "loss": 0.0191, "step": 235360 }, { "epoch": 0.9820705827373551, "grad_norm": 0.8073240188914248, "learning_rate": 2.018338453787207e-06, "loss": 0.0205, "step": 235365 }, { "epoch": 0.9820914454523454, "grad_norm": 0.49759743909227866, "learning_rate": 2.0183170156499316e-06, "loss": 0.028, "step": 235370 }, { "epoch": 0.9821123081673356, "grad_norm": 0.5929949527008862, "learning_rate": 2.018295578195769e-06, "loss": 0.0199, "step": 235375 }, { "epoch": 0.982133170882326, "grad_norm": 0.4256465773088351, "learning_rate": 2.0182741414246823e-06, "loss": 0.0154, "step": 235380 }, { "epoch": 0.9821540335973162, "grad_norm": 1.0177523286964623, "learning_rate": 2.0182527053366355e-06, "loss": 0.0196, "step": 235385 }, { "epoch": 0.9821748963123065, "grad_norm": 0.6619405138341786, "learning_rate": 2.0182312699315925e-06, "loss": 0.0184, "step": 235390 }, { "epoch": 0.9821957590272967, "grad_norm": 0.5284383388307141, "learning_rate": 2.018209835209516e-06, "loss": 0.0228, "step": 235395 }, { "epoch": 0.9822166217422871, "grad_norm": 0.7309762374757875, "learning_rate": 2.018188401170371e-06, "loss": 0.0211, "step": 235400 }, { "epoch": 0.9822374844572773, "grad_norm": 0.7841383644994676, "learning_rate": 2.0181669678141204e-06, "loss": 0.0205, "step": 235405 }, { "epoch": 0.9822583471722676, "grad_norm": 0.5936351704703571, "learning_rate": 2.018145535140729e-06, "loss": 0.0197, "step": 235410 }, { "epoch": 0.9822792098872579, "grad_norm": 0.5450276334703477, "learning_rate": 2.0181241031501587e-06, "loss": 0.019, "step": 235415 }, { "epoch": 0.9823000726022482, "grad_norm": 0.7964137591738228, "learning_rate": 2.0181026718423754e-06, "loss": 0.0246, "step": 235420 }, { "epoch": 0.9823209353172384, "grad_norm": 0.41384831846430276, "learning_rate": 2.0180812412173413e-06, "loss": 0.0194, "step": 235425 }, { "epoch": 0.9823417980322288, "grad_norm": 0.5516179325884565, "learning_rate": 2.0180598112750204e-06, "loss": 0.019, "step": 235430 }, { "epoch": 0.982362660747219, "grad_norm": 0.8098108342912415, "learning_rate": 2.0180383820153767e-06, "loss": 0.0151, "step": 235435 }, { "epoch": 0.9823835234622093, "grad_norm": 0.5112194591809558, "learning_rate": 2.0180169534383747e-06, "loss": 0.0126, "step": 235440 }, { "epoch": 0.9824043861771996, "grad_norm": 0.674799453939314, "learning_rate": 2.017995525543976e-06, "loss": 0.0196, "step": 235445 }, { "epoch": 0.9824252488921899, "grad_norm": 0.8744246643321825, "learning_rate": 2.0179740983321473e-06, "loss": 0.02, "step": 235450 }, { "epoch": 0.9824461116071801, "grad_norm": 0.4761404603949008, "learning_rate": 2.01795267180285e-06, "loss": 0.0191, "step": 235455 }, { "epoch": 0.9824669743221703, "grad_norm": 0.8435973381573774, "learning_rate": 2.017931245956049e-06, "loss": 0.0175, "step": 235460 }, { "epoch": 0.9824878370371607, "grad_norm": 0.27559785043690077, "learning_rate": 2.0179098207917075e-06, "loss": 0.0163, "step": 235465 }, { "epoch": 0.9825086997521509, "grad_norm": 0.6393517865746542, "learning_rate": 2.0178883963097896e-06, "loss": 0.0248, "step": 235470 }, { "epoch": 0.9825295624671412, "grad_norm": 0.9811162288732443, "learning_rate": 2.017866972510259e-06, "loss": 0.027, "step": 235475 }, { "epoch": 0.9825504251821315, "grad_norm": 0.458016779760014, "learning_rate": 2.017845549393079e-06, "loss": 0.0146, "step": 235480 }, { "epoch": 0.9825712878971218, "grad_norm": 0.9153199616311534, "learning_rate": 2.0178241269582145e-06, "loss": 0.0153, "step": 235485 }, { "epoch": 0.982592150612112, "grad_norm": 0.6503610942569995, "learning_rate": 2.0178027052056286e-06, "loss": 0.0256, "step": 235490 }, { "epoch": 0.9826130133271024, "grad_norm": 0.9218714395725497, "learning_rate": 2.0177812841352854e-06, "loss": 0.0209, "step": 235495 }, { "epoch": 0.9826338760420926, "grad_norm": 0.7049750715929208, "learning_rate": 2.017759863747148e-06, "loss": 0.0235, "step": 235500 }, { "epoch": 0.9826547387570829, "grad_norm": 0.4928131863458452, "learning_rate": 2.01773844404118e-06, "loss": 0.0184, "step": 235505 }, { "epoch": 0.9826756014720732, "grad_norm": 0.35527016690390084, "learning_rate": 2.017717025017347e-06, "loss": 0.0202, "step": 235510 }, { "epoch": 0.9826964641870635, "grad_norm": 0.30780014260964095, "learning_rate": 2.017695606675611e-06, "loss": 0.0278, "step": 235515 }, { "epoch": 0.9827173269020537, "grad_norm": 0.6601601571354666, "learning_rate": 2.017674189015936e-06, "loss": 0.0158, "step": 235520 }, { "epoch": 0.982738189617044, "grad_norm": 0.49060220198987725, "learning_rate": 2.0176527720382867e-06, "loss": 0.0173, "step": 235525 }, { "epoch": 0.9827590523320343, "grad_norm": 0.5485078057342592, "learning_rate": 2.0176313557426263e-06, "loss": 0.0214, "step": 235530 }, { "epoch": 0.9827799150470246, "grad_norm": 0.2569312978776771, "learning_rate": 2.017609940128919e-06, "loss": 0.023, "step": 235535 }, { "epoch": 0.9828007777620148, "grad_norm": 0.8171456989426343, "learning_rate": 2.0175885251971276e-06, "loss": 0.0365, "step": 235540 }, { "epoch": 0.9828216404770052, "grad_norm": 1.3165744035925697, "learning_rate": 2.017567110947217e-06, "loss": 0.0209, "step": 235545 }, { "epoch": 0.9828425031919954, "grad_norm": 0.9629491934128283, "learning_rate": 2.017545697379151e-06, "loss": 0.0189, "step": 235550 }, { "epoch": 0.9828633659069856, "grad_norm": 0.6882260369167399, "learning_rate": 2.0175242844928924e-06, "loss": 0.0184, "step": 235555 }, { "epoch": 0.982884228621976, "grad_norm": 0.40697846591333886, "learning_rate": 2.017502872288406e-06, "loss": 0.0266, "step": 235560 }, { "epoch": 0.9829050913369662, "grad_norm": 0.7059996049045046, "learning_rate": 2.017481460765655e-06, "loss": 0.0229, "step": 235565 }, { "epoch": 0.9829259540519565, "grad_norm": 0.45575692611726015, "learning_rate": 2.0174600499246038e-06, "loss": 0.0196, "step": 235570 }, { "epoch": 0.9829468167669467, "grad_norm": 0.6236377463124081, "learning_rate": 2.0174386397652155e-06, "loss": 0.0185, "step": 235575 }, { "epoch": 0.9829676794819371, "grad_norm": 0.5603462694236541, "learning_rate": 2.017417230287455e-06, "loss": 0.0204, "step": 235580 }, { "epoch": 0.9829885421969273, "grad_norm": 0.4584607575070074, "learning_rate": 2.0173958214912854e-06, "loss": 0.0167, "step": 235585 }, { "epoch": 0.9830094049119176, "grad_norm": 0.649410691214872, "learning_rate": 2.0173744133766702e-06, "loss": 0.0248, "step": 235590 }, { "epoch": 0.9830302676269079, "grad_norm": 0.3682564150722513, "learning_rate": 2.017353005943574e-06, "loss": 0.0224, "step": 235595 }, { "epoch": 0.9830511303418982, "grad_norm": 0.34687614878889833, "learning_rate": 2.0173315991919602e-06, "loss": 0.0229, "step": 235600 }, { "epoch": 0.9830719930568884, "grad_norm": 0.35969577459387597, "learning_rate": 2.017310193121793e-06, "loss": 0.0188, "step": 235605 }, { "epoch": 0.9830928557718788, "grad_norm": 0.3833883622511913, "learning_rate": 2.0172887877330357e-06, "loss": 0.0135, "step": 235610 }, { "epoch": 0.983113718486869, "grad_norm": 0.6900760518189522, "learning_rate": 2.017267383025653e-06, "loss": 0.0243, "step": 235615 }, { "epoch": 0.9831345812018593, "grad_norm": 0.3923424884252212, "learning_rate": 2.0172459789996078e-06, "loss": 0.016, "step": 235620 }, { "epoch": 0.9831554439168496, "grad_norm": 0.9708474027348839, "learning_rate": 2.0172245756548643e-06, "loss": 0.0213, "step": 235625 }, { "epoch": 0.9831763066318399, "grad_norm": 0.2031860229509324, "learning_rate": 2.0172031729913864e-06, "loss": 0.0125, "step": 235630 }, { "epoch": 0.9831971693468301, "grad_norm": 0.4606736677788127, "learning_rate": 2.0171817710091378e-06, "loss": 0.0197, "step": 235635 }, { "epoch": 0.9832180320618203, "grad_norm": 0.9577216045505708, "learning_rate": 2.017160369708083e-06, "loss": 0.0241, "step": 235640 }, { "epoch": 0.9832388947768107, "grad_norm": 0.6186918693113576, "learning_rate": 2.017138969088185e-06, "loss": 0.0232, "step": 235645 }, { "epoch": 0.983259757491801, "grad_norm": 0.7579950316116892, "learning_rate": 2.0171175691494084e-06, "loss": 0.0233, "step": 235650 }, { "epoch": 0.9832806202067912, "grad_norm": 0.3399854834876039, "learning_rate": 2.017096169891717e-06, "loss": 0.0212, "step": 235655 }, { "epoch": 0.9833014829217815, "grad_norm": 0.8547619839834029, "learning_rate": 2.017074771315074e-06, "loss": 0.017, "step": 235660 }, { "epoch": 0.9833223456367718, "grad_norm": 0.8081601179001007, "learning_rate": 2.017053373419444e-06, "loss": 0.0213, "step": 235665 }, { "epoch": 0.983343208351762, "grad_norm": 0.5752019177158043, "learning_rate": 2.01703197620479e-06, "loss": 0.0164, "step": 235670 }, { "epoch": 0.9833640710667524, "grad_norm": 1.1996538878796752, "learning_rate": 2.0170105796710774e-06, "loss": 0.0211, "step": 235675 }, { "epoch": 0.9833849337817426, "grad_norm": 0.6457792413087498, "learning_rate": 2.0169891838182683e-06, "loss": 0.0155, "step": 235680 }, { "epoch": 0.9834057964967329, "grad_norm": 0.26561147752916714, "learning_rate": 2.0169677886463276e-06, "loss": 0.0186, "step": 235685 }, { "epoch": 0.9834266592117232, "grad_norm": 0.6514018433898385, "learning_rate": 2.016946394155219e-06, "loss": 0.0156, "step": 235690 }, { "epoch": 0.9834475219267135, "grad_norm": 0.6704340608060813, "learning_rate": 2.016925000344907e-06, "loss": 0.0181, "step": 235695 }, { "epoch": 0.9834683846417037, "grad_norm": 1.158455210697406, "learning_rate": 2.016903607215354e-06, "loss": 0.0254, "step": 235700 }, { "epoch": 0.983489247356694, "grad_norm": 0.4199303923123934, "learning_rate": 2.016882214766525e-06, "loss": 0.0124, "step": 235705 }, { "epoch": 0.9835101100716843, "grad_norm": 0.6870358767843757, "learning_rate": 2.0168608229983843e-06, "loss": 0.0145, "step": 235710 }, { "epoch": 0.9835309727866746, "grad_norm": 0.31539028343061615, "learning_rate": 2.016839431910895e-06, "loss": 0.0139, "step": 235715 }, { "epoch": 0.9835518355016648, "grad_norm": 0.53365904934878, "learning_rate": 2.0168180415040206e-06, "loss": 0.0199, "step": 235720 }, { "epoch": 0.9835726982166552, "grad_norm": 0.6963290757843856, "learning_rate": 2.0167966517777263e-06, "loss": 0.0158, "step": 235725 }, { "epoch": 0.9835935609316454, "grad_norm": 0.5362938814791454, "learning_rate": 2.016775262731975e-06, "loss": 0.0166, "step": 235730 }, { "epoch": 0.9836144236466356, "grad_norm": 0.44060908546897176, "learning_rate": 2.0167538743667306e-06, "loss": 0.0166, "step": 235735 }, { "epoch": 0.983635286361626, "grad_norm": 0.4910421228976627, "learning_rate": 2.016732486681958e-06, "loss": 0.0175, "step": 235740 }, { "epoch": 0.9836561490766162, "grad_norm": 0.40684194965613624, "learning_rate": 2.0167110996776202e-06, "loss": 0.0255, "step": 235745 }, { "epoch": 0.9836770117916065, "grad_norm": 0.6864826822318325, "learning_rate": 2.0166897133536815e-06, "loss": 0.0191, "step": 235750 }, { "epoch": 0.9836978745065967, "grad_norm": 0.7631988646645764, "learning_rate": 2.0166683277101055e-06, "loss": 0.0253, "step": 235755 }, { "epoch": 0.9837187372215871, "grad_norm": 0.6494110684738995, "learning_rate": 2.016646942746857e-06, "loss": 0.0212, "step": 235760 }, { "epoch": 0.9837395999365773, "grad_norm": 0.7617945217524641, "learning_rate": 2.0166255584638984e-06, "loss": 0.0274, "step": 235765 }, { "epoch": 0.9837604626515676, "grad_norm": 0.7163716761235992, "learning_rate": 2.016604174861195e-06, "loss": 0.0164, "step": 235770 }, { "epoch": 0.9837813253665579, "grad_norm": 0.4346181801758306, "learning_rate": 2.01658279193871e-06, "loss": 0.0306, "step": 235775 }, { "epoch": 0.9838021880815482, "grad_norm": 0.32884675293139026, "learning_rate": 2.016561409696408e-06, "loss": 0.017, "step": 235780 }, { "epoch": 0.9838230507965384, "grad_norm": 0.44424308551909164, "learning_rate": 2.0165400281342518e-06, "loss": 0.0219, "step": 235785 }, { "epoch": 0.9838439135115288, "grad_norm": 0.2987274861701191, "learning_rate": 2.0165186472522067e-06, "loss": 0.0157, "step": 235790 }, { "epoch": 0.983864776226519, "grad_norm": 0.7581904139102258, "learning_rate": 2.016497267050236e-06, "loss": 0.0215, "step": 235795 }, { "epoch": 0.9838856389415093, "grad_norm": 0.7947534580801124, "learning_rate": 2.0164758875283033e-06, "loss": 0.0243, "step": 235800 }, { "epoch": 0.9839065016564996, "grad_norm": 0.20489422091390425, "learning_rate": 2.016454508686373e-06, "loss": 0.0244, "step": 235805 }, { "epoch": 0.9839273643714899, "grad_norm": 0.43475733306406417, "learning_rate": 2.016433130524409e-06, "loss": 0.0164, "step": 235810 }, { "epoch": 0.9839482270864801, "grad_norm": 0.5606902944983849, "learning_rate": 2.016411753042375e-06, "loss": 0.0252, "step": 235815 }, { "epoch": 0.9839690898014704, "grad_norm": 0.7474844621516221, "learning_rate": 2.016390376240236e-06, "loss": 0.0189, "step": 235820 }, { "epoch": 0.9839899525164607, "grad_norm": 0.8728736454247064, "learning_rate": 2.0163690001179545e-06, "loss": 0.0171, "step": 235825 }, { "epoch": 0.984010815231451, "grad_norm": 0.37683654038919306, "learning_rate": 2.0163476246754953e-06, "loss": 0.0188, "step": 235830 }, { "epoch": 0.9840316779464412, "grad_norm": 0.6663463243902753, "learning_rate": 2.0163262499128215e-06, "loss": 0.0202, "step": 235835 }, { "epoch": 0.9840525406614316, "grad_norm": 0.4563215310779685, "learning_rate": 2.0163048758298984e-06, "loss": 0.0221, "step": 235840 }, { "epoch": 0.9840734033764218, "grad_norm": 0.5072153114566297, "learning_rate": 2.0162835024266896e-06, "loss": 0.0222, "step": 235845 }, { "epoch": 0.984094266091412, "grad_norm": 0.8763200191335178, "learning_rate": 2.016262129703158e-06, "loss": 0.0306, "step": 235850 }, { "epoch": 0.9841151288064024, "grad_norm": 0.9397155592145037, "learning_rate": 2.016240757659269e-06, "loss": 0.0239, "step": 235855 }, { "epoch": 0.9841359915213926, "grad_norm": 0.5319093838020346, "learning_rate": 2.0162193862949857e-06, "loss": 0.0159, "step": 235860 }, { "epoch": 0.9841568542363829, "grad_norm": 0.45547728009319216, "learning_rate": 2.016198015610272e-06, "loss": 0.0214, "step": 235865 }, { "epoch": 0.9841777169513732, "grad_norm": 0.5497619420114884, "learning_rate": 2.0161766456050925e-06, "loss": 0.0182, "step": 235870 }, { "epoch": 0.9841985796663635, "grad_norm": 1.900228551854657, "learning_rate": 2.016155276279411e-06, "loss": 0.0229, "step": 235875 }, { "epoch": 0.9842194423813537, "grad_norm": 0.5337030082182903, "learning_rate": 2.0161339076331918e-06, "loss": 0.0141, "step": 235880 }, { "epoch": 0.984240305096344, "grad_norm": 0.30262193778365215, "learning_rate": 2.0161125396663976e-06, "loss": 0.0155, "step": 235885 }, { "epoch": 0.9842611678113343, "grad_norm": 0.4001761007119425, "learning_rate": 2.0160911723789937e-06, "loss": 0.0135, "step": 235890 }, { "epoch": 0.9842820305263246, "grad_norm": 0.7845273540990496, "learning_rate": 2.0160698057709437e-06, "loss": 0.0156, "step": 235895 }, { "epoch": 0.9843028932413148, "grad_norm": 0.25808644712273787, "learning_rate": 2.0160484398422115e-06, "loss": 0.0266, "step": 235900 }, { "epoch": 0.9843237559563052, "grad_norm": 0.5936560974239444, "learning_rate": 2.0160270745927617e-06, "loss": 0.0193, "step": 235905 }, { "epoch": 0.9843446186712954, "grad_norm": 0.9336255092169017, "learning_rate": 2.0160057100225573e-06, "loss": 0.0239, "step": 235910 }, { "epoch": 0.9843654813862857, "grad_norm": 0.9496200122624321, "learning_rate": 2.0159843461315624e-06, "loss": 0.0189, "step": 235915 }, { "epoch": 0.984386344101276, "grad_norm": 0.9433708412233385, "learning_rate": 2.015962982919742e-06, "loss": 0.0317, "step": 235920 }, { "epoch": 0.9844072068162663, "grad_norm": 0.36785360656404875, "learning_rate": 2.0159416203870596e-06, "loss": 0.0147, "step": 235925 }, { "epoch": 0.9844280695312565, "grad_norm": 0.7061330609592698, "learning_rate": 2.015920258533479e-06, "loss": 0.029, "step": 235930 }, { "epoch": 0.9844489322462467, "grad_norm": 1.3189466344531169, "learning_rate": 2.0158988973589643e-06, "loss": 0.0267, "step": 235935 }, { "epoch": 0.9844697949612371, "grad_norm": 0.5511091359318553, "learning_rate": 2.0158775368634797e-06, "loss": 0.0391, "step": 235940 }, { "epoch": 0.9844906576762273, "grad_norm": 0.5749377970173881, "learning_rate": 2.015856177046989e-06, "loss": 0.0259, "step": 235945 }, { "epoch": 0.9845115203912176, "grad_norm": 0.7133844500754046, "learning_rate": 2.0158348179094565e-06, "loss": 0.016, "step": 235950 }, { "epoch": 0.9845323831062079, "grad_norm": 0.7694106457182325, "learning_rate": 2.0158134594508456e-06, "loss": 0.0169, "step": 235955 }, { "epoch": 0.9845532458211982, "grad_norm": 0.7086725990644365, "learning_rate": 2.0157921016711214e-06, "loss": 0.0202, "step": 235960 }, { "epoch": 0.9845741085361884, "grad_norm": 1.4125467201048436, "learning_rate": 2.015770744570247e-06, "loss": 0.0271, "step": 235965 }, { "epoch": 0.9845949712511788, "grad_norm": 0.5712283631293928, "learning_rate": 2.0157493881481868e-06, "loss": 0.0176, "step": 235970 }, { "epoch": 0.984615833966169, "grad_norm": 0.6974189830623351, "learning_rate": 2.015728032404905e-06, "loss": 0.021, "step": 235975 }, { "epoch": 0.9846366966811593, "grad_norm": 0.5101227452273318, "learning_rate": 2.0157066773403654e-06, "loss": 0.0207, "step": 235980 }, { "epoch": 0.9846575593961496, "grad_norm": 0.550031345179958, "learning_rate": 2.0156853229545324e-06, "loss": 0.019, "step": 235985 }, { "epoch": 0.9846784221111399, "grad_norm": 0.7263407782837588, "learning_rate": 2.0156639692473697e-06, "loss": 0.0239, "step": 235990 }, { "epoch": 0.9846992848261301, "grad_norm": 0.5957790927103344, "learning_rate": 2.015642616218841e-06, "loss": 0.014, "step": 235995 }, { "epoch": 0.9847201475411204, "grad_norm": 0.8304878401884763, "learning_rate": 2.015621263868911e-06, "loss": 0.0171, "step": 236000 }, { "epoch": 0.9847410102561107, "grad_norm": 0.8325649247395829, "learning_rate": 2.0155999121975437e-06, "loss": 0.0232, "step": 236005 }, { "epoch": 0.984761872971101, "grad_norm": 0.1425506782739044, "learning_rate": 2.0155785612047027e-06, "loss": 0.0112, "step": 236010 }, { "epoch": 0.9847827356860912, "grad_norm": 0.5889180711254569, "learning_rate": 2.0155572108903526e-06, "loss": 0.0227, "step": 236015 }, { "epoch": 0.9848035984010816, "grad_norm": 0.4001554929751243, "learning_rate": 2.015535861254457e-06, "loss": 0.0199, "step": 236020 }, { "epoch": 0.9848244611160718, "grad_norm": 2.0567510737089645, "learning_rate": 2.0155145122969805e-06, "loss": 0.021, "step": 236025 }, { "epoch": 0.984845323831062, "grad_norm": 0.2496859535211184, "learning_rate": 2.015493164017887e-06, "loss": 0.0163, "step": 236030 }, { "epoch": 0.9848661865460524, "grad_norm": 0.4354654444098837, "learning_rate": 2.01547181641714e-06, "loss": 0.0157, "step": 236035 }, { "epoch": 0.9848870492610426, "grad_norm": 0.7447443467105698, "learning_rate": 2.015450469494704e-06, "loss": 0.0219, "step": 236040 }, { "epoch": 0.9849079119760329, "grad_norm": 0.26546493616873756, "learning_rate": 2.015429123250543e-06, "loss": 0.0293, "step": 236045 }, { "epoch": 0.9849287746910232, "grad_norm": 0.8266167302147633, "learning_rate": 2.0154077776846215e-06, "loss": 0.0217, "step": 236050 }, { "epoch": 0.9849496374060135, "grad_norm": 0.486009792116609, "learning_rate": 2.015386432796903e-06, "loss": 0.0235, "step": 236055 }, { "epoch": 0.9849705001210037, "grad_norm": 0.45442102474612095, "learning_rate": 2.0153650885873523e-06, "loss": 0.0247, "step": 236060 }, { "epoch": 0.984991362835994, "grad_norm": 0.5168290443445777, "learning_rate": 2.0153437450559327e-06, "loss": 0.0168, "step": 236065 }, { "epoch": 0.9850122255509843, "grad_norm": 1.1121697633238217, "learning_rate": 2.0153224022026087e-06, "loss": 0.019, "step": 236070 }, { "epoch": 0.9850330882659746, "grad_norm": 0.738717038288068, "learning_rate": 2.0153010600273444e-06, "loss": 0.02, "step": 236075 }, { "epoch": 0.9850539509809648, "grad_norm": 0.5853976365709873, "learning_rate": 2.0152797185301036e-06, "loss": 0.0178, "step": 236080 }, { "epoch": 0.9850748136959552, "grad_norm": 0.8726759202005632, "learning_rate": 2.015258377710851e-06, "loss": 0.0238, "step": 236085 }, { "epoch": 0.9850956764109454, "grad_norm": 1.6167694149060852, "learning_rate": 2.0152370375695494e-06, "loss": 0.0293, "step": 236090 }, { "epoch": 0.9851165391259357, "grad_norm": 0.8842062072542507, "learning_rate": 2.0152156981061648e-06, "loss": 0.0168, "step": 236095 }, { "epoch": 0.985137401840926, "grad_norm": 0.6578229065014906, "learning_rate": 2.01519435932066e-06, "loss": 0.019, "step": 236100 }, { "epoch": 0.9851582645559163, "grad_norm": 0.3375168497261045, "learning_rate": 2.0151730212129996e-06, "loss": 0.0212, "step": 236105 }, { "epoch": 0.9851791272709065, "grad_norm": 0.6440363415823471, "learning_rate": 2.0151516837831476e-06, "loss": 0.0219, "step": 236110 }, { "epoch": 0.9851999899858968, "grad_norm": 0.4157181614333811, "learning_rate": 2.0151303470310675e-06, "loss": 0.015, "step": 236115 }, { "epoch": 0.9852208527008871, "grad_norm": 0.7041374395087299, "learning_rate": 2.015109010956725e-06, "loss": 0.0178, "step": 236120 }, { "epoch": 0.9852417154158774, "grad_norm": 0.9643631936289976, "learning_rate": 2.015087675560082e-06, "loss": 0.021, "step": 236125 }, { "epoch": 0.9852625781308676, "grad_norm": 0.7284066517891689, "learning_rate": 2.015066340841105e-06, "loss": 0.0163, "step": 236130 }, { "epoch": 0.985283440845858, "grad_norm": 0.5976189630935259, "learning_rate": 2.0150450067997564e-06, "loss": 0.0173, "step": 236135 }, { "epoch": 0.9853043035608482, "grad_norm": 0.6770325041800386, "learning_rate": 2.015023673436001e-06, "loss": 0.0153, "step": 236140 }, { "epoch": 0.9853251662758384, "grad_norm": 0.6615845477355892, "learning_rate": 2.0150023407498027e-06, "loss": 0.015, "step": 236145 }, { "epoch": 0.9853460289908288, "grad_norm": 0.8220843613834082, "learning_rate": 2.014981008741126e-06, "loss": 0.0173, "step": 236150 }, { "epoch": 0.985366891705819, "grad_norm": 0.7956392193528201, "learning_rate": 2.0149596774099346e-06, "loss": 0.0168, "step": 236155 }, { "epoch": 0.9853877544208093, "grad_norm": 0.7201737776260055, "learning_rate": 2.014938346756193e-06, "loss": 0.0275, "step": 236160 }, { "epoch": 0.9854086171357996, "grad_norm": 0.566296493403048, "learning_rate": 2.0149170167798652e-06, "loss": 0.0165, "step": 236165 }, { "epoch": 0.9854294798507899, "grad_norm": 0.9423827277575931, "learning_rate": 2.014895687480915e-06, "loss": 0.0172, "step": 236170 }, { "epoch": 0.9854503425657801, "grad_norm": 0.42587264992811946, "learning_rate": 2.014874358859307e-06, "loss": 0.0141, "step": 236175 }, { "epoch": 0.9854712052807704, "grad_norm": 0.45371733751353027, "learning_rate": 2.0148530309150057e-06, "loss": 0.0328, "step": 236180 }, { "epoch": 0.9854920679957607, "grad_norm": 0.7940996795300687, "learning_rate": 2.0148317036479744e-06, "loss": 0.0245, "step": 236185 }, { "epoch": 0.985512930710751, "grad_norm": 0.4291786418589154, "learning_rate": 2.0148103770581776e-06, "loss": 0.0151, "step": 236190 }, { "epoch": 0.9855337934257412, "grad_norm": 0.9110867690253428, "learning_rate": 2.0147890511455795e-06, "loss": 0.0169, "step": 236195 }, { "epoch": 0.9855546561407316, "grad_norm": 0.6178715022341794, "learning_rate": 2.014767725910144e-06, "loss": 0.0214, "step": 236200 }, { "epoch": 0.9855755188557218, "grad_norm": 1.4124171019162344, "learning_rate": 2.0147464013518363e-06, "loss": 0.0193, "step": 236205 }, { "epoch": 0.985596381570712, "grad_norm": 0.5511872734692822, "learning_rate": 2.0147250774706193e-06, "loss": 0.0171, "step": 236210 }, { "epoch": 0.9856172442857024, "grad_norm": 0.4172588021017765, "learning_rate": 2.0147037542664577e-06, "loss": 0.0149, "step": 236215 }, { "epoch": 0.9856381070006927, "grad_norm": 0.41835164201870995, "learning_rate": 2.0146824317393156e-06, "loss": 0.0138, "step": 236220 }, { "epoch": 0.9856589697156829, "grad_norm": 0.6987155794970661, "learning_rate": 2.0146611098891574e-06, "loss": 0.0206, "step": 236225 }, { "epoch": 0.9856798324306733, "grad_norm": 0.7381745853087968, "learning_rate": 2.0146397887159466e-06, "loss": 0.0155, "step": 236230 }, { "epoch": 0.9857006951456635, "grad_norm": 0.8234699192299798, "learning_rate": 2.0146184682196486e-06, "loss": 0.0239, "step": 236235 }, { "epoch": 0.9857215578606537, "grad_norm": 0.5908621237048703, "learning_rate": 2.0145971484002264e-06, "loss": 0.0162, "step": 236240 }, { "epoch": 0.985742420575644, "grad_norm": 0.3099702277713515, "learning_rate": 2.0145758292576446e-06, "loss": 0.0209, "step": 236245 }, { "epoch": 0.9857632832906343, "grad_norm": 0.8392537095635915, "learning_rate": 2.0145545107918675e-06, "loss": 0.0177, "step": 236250 }, { "epoch": 0.9857841460056246, "grad_norm": 0.2537762356732173, "learning_rate": 2.014533193002859e-06, "loss": 0.0201, "step": 236255 }, { "epoch": 0.9858050087206148, "grad_norm": 0.5243106762265686, "learning_rate": 2.014511875890584e-06, "loss": 0.0146, "step": 236260 }, { "epoch": 0.9858258714356052, "grad_norm": 1.2459702981988336, "learning_rate": 2.014490559455006e-06, "loss": 0.0186, "step": 236265 }, { "epoch": 0.9858467341505954, "grad_norm": 0.4627559370607231, "learning_rate": 2.0144692436960894e-06, "loss": 0.0144, "step": 236270 }, { "epoch": 0.9858675968655857, "grad_norm": 0.5406039316358476, "learning_rate": 2.014447928613798e-06, "loss": 0.0184, "step": 236275 }, { "epoch": 0.985888459580576, "grad_norm": 0.6232130749699356, "learning_rate": 2.014426614208097e-06, "loss": 0.0183, "step": 236280 }, { "epoch": 0.9859093222955663, "grad_norm": 0.27758142274899966, "learning_rate": 2.0144053004789496e-06, "loss": 0.0198, "step": 236285 }, { "epoch": 0.9859301850105565, "grad_norm": 0.5428378983213191, "learning_rate": 2.0143839874263208e-06, "loss": 0.0159, "step": 236290 }, { "epoch": 0.9859510477255468, "grad_norm": 0.7088271083609292, "learning_rate": 2.0143626750501744e-06, "loss": 0.0211, "step": 236295 }, { "epoch": 0.9859719104405371, "grad_norm": 0.42966887956361005, "learning_rate": 2.0143413633504744e-06, "loss": 0.0144, "step": 236300 }, { "epoch": 0.9859927731555274, "grad_norm": 0.5827757593458505, "learning_rate": 2.0143200523271854e-06, "loss": 0.0181, "step": 236305 }, { "epoch": 0.9860136358705176, "grad_norm": 0.7676578443108614, "learning_rate": 2.014298741980271e-06, "loss": 0.0189, "step": 236310 }, { "epoch": 0.986034498585508, "grad_norm": 0.8176652987913171, "learning_rate": 2.0142774323096966e-06, "loss": 0.0172, "step": 236315 }, { "epoch": 0.9860553613004982, "grad_norm": 0.7136004324787376, "learning_rate": 2.014256123315426e-06, "loss": 0.025, "step": 236320 }, { "epoch": 0.9860762240154884, "grad_norm": 0.460657267567457, "learning_rate": 2.0142348149974224e-06, "loss": 0.0158, "step": 236325 }, { "epoch": 0.9860970867304788, "grad_norm": 0.550555496540466, "learning_rate": 2.014213507355651e-06, "loss": 0.0241, "step": 236330 }, { "epoch": 0.986117949445469, "grad_norm": 0.5931916073895819, "learning_rate": 2.0141922003900754e-06, "loss": 0.0211, "step": 236335 }, { "epoch": 0.9861388121604593, "grad_norm": 0.6579260889293573, "learning_rate": 2.014170894100661e-06, "loss": 0.0162, "step": 236340 }, { "epoch": 0.9861596748754496, "grad_norm": 0.5161900017273602, "learning_rate": 2.014149588487371e-06, "loss": 0.0259, "step": 236345 }, { "epoch": 0.9861805375904399, "grad_norm": 0.13120916131141352, "learning_rate": 2.01412828355017e-06, "loss": 0.0105, "step": 236350 }, { "epoch": 0.9862014003054301, "grad_norm": 0.5416682121266838, "learning_rate": 2.0141069792890224e-06, "loss": 0.0187, "step": 236355 }, { "epoch": 0.9862222630204204, "grad_norm": 0.6169049044012138, "learning_rate": 2.0140856757038917e-06, "loss": 0.0266, "step": 236360 }, { "epoch": 0.9862431257354107, "grad_norm": 0.5624657101200038, "learning_rate": 2.014064372794743e-06, "loss": 0.0212, "step": 236365 }, { "epoch": 0.986263988450401, "grad_norm": 0.425479987357264, "learning_rate": 2.01404307056154e-06, "loss": 0.0187, "step": 236370 }, { "epoch": 0.9862848511653912, "grad_norm": 0.6248599966114146, "learning_rate": 2.0140217690042476e-06, "loss": 0.0176, "step": 236375 }, { "epoch": 0.9863057138803816, "grad_norm": 1.2716809419409234, "learning_rate": 2.0140004681228292e-06, "loss": 0.0234, "step": 236380 }, { "epoch": 0.9863265765953718, "grad_norm": 0.7157319792122938, "learning_rate": 2.0139791679172495e-06, "loss": 0.0176, "step": 236385 }, { "epoch": 0.9863474393103621, "grad_norm": 0.6281640514563661, "learning_rate": 2.0139578683874733e-06, "loss": 0.0176, "step": 236390 }, { "epoch": 0.9863683020253524, "grad_norm": 0.8083870089064478, "learning_rate": 2.0139365695334638e-06, "loss": 0.023, "step": 236395 }, { "epoch": 0.9863891647403427, "grad_norm": 0.8234341876647099, "learning_rate": 2.013915271355186e-06, "loss": 0.0171, "step": 236400 }, { "epoch": 0.9864100274553329, "grad_norm": 0.4728192656666002, "learning_rate": 2.013893973852604e-06, "loss": 0.0203, "step": 236405 }, { "epoch": 0.9864308901703233, "grad_norm": 0.6410505511004038, "learning_rate": 2.013872677025682e-06, "loss": 0.0185, "step": 236410 }, { "epoch": 0.9864517528853135, "grad_norm": 0.716554028722508, "learning_rate": 2.013851380874384e-06, "loss": 0.0193, "step": 236415 }, { "epoch": 0.9864726156003037, "grad_norm": 0.7651594134076963, "learning_rate": 2.013830085398675e-06, "loss": 0.0241, "step": 236420 }, { "epoch": 0.986493478315294, "grad_norm": 0.41139126183306435, "learning_rate": 2.013808790598519e-06, "loss": 0.0267, "step": 236425 }, { "epoch": 0.9865143410302843, "grad_norm": 0.8268876807214214, "learning_rate": 2.01378749647388e-06, "loss": 0.0201, "step": 236430 }, { "epoch": 0.9865352037452746, "grad_norm": 0.3166218515402344, "learning_rate": 2.013766203024722e-06, "loss": 0.0198, "step": 236435 }, { "epoch": 0.9865560664602648, "grad_norm": 0.693479083171194, "learning_rate": 2.0137449102510105e-06, "loss": 0.0139, "step": 236440 }, { "epoch": 0.9865769291752552, "grad_norm": 0.3615750318212977, "learning_rate": 2.013723618152709e-06, "loss": 0.0139, "step": 236445 }, { "epoch": 0.9865977918902454, "grad_norm": 0.3422079326913422, "learning_rate": 2.0137023267297808e-06, "loss": 0.0177, "step": 236450 }, { "epoch": 0.9866186546052357, "grad_norm": 0.7961869919579543, "learning_rate": 2.0136810359821917e-06, "loss": 0.021, "step": 236455 }, { "epoch": 0.986639517320226, "grad_norm": 1.11855397078035, "learning_rate": 2.013659745909906e-06, "loss": 0.0205, "step": 236460 }, { "epoch": 0.9866603800352163, "grad_norm": 0.3859616969991322, "learning_rate": 2.0136384565128867e-06, "loss": 0.0163, "step": 236465 }, { "epoch": 0.9866812427502065, "grad_norm": 0.6784115617371962, "learning_rate": 2.0136171677910998e-06, "loss": 0.0175, "step": 236470 }, { "epoch": 0.9867021054651968, "grad_norm": 0.5987421170479453, "learning_rate": 2.013595879744508e-06, "loss": 0.0197, "step": 236475 }, { "epoch": 0.9867229681801871, "grad_norm": 0.36556729295785934, "learning_rate": 2.013574592373077e-06, "loss": 0.0194, "step": 236480 }, { "epoch": 0.9867438308951774, "grad_norm": 0.6135113079976673, "learning_rate": 2.01355330567677e-06, "loss": 0.0208, "step": 236485 }, { "epoch": 0.9867646936101676, "grad_norm": 0.5954436917512347, "learning_rate": 2.0135320196555515e-06, "loss": 0.019, "step": 236490 }, { "epoch": 0.986785556325158, "grad_norm": 0.6065639305392153, "learning_rate": 2.0135107343093865e-06, "loss": 0.0193, "step": 236495 }, { "epoch": 0.9868064190401482, "grad_norm": 0.5947748706241901, "learning_rate": 2.013489449638239e-06, "loss": 0.016, "step": 236500 }, { "epoch": 0.9868272817551385, "grad_norm": 0.41349827350075363, "learning_rate": 2.0134681656420733e-06, "loss": 0.02, "step": 236505 }, { "epoch": 0.9868481444701288, "grad_norm": 0.40241878898081596, "learning_rate": 2.013446882320853e-06, "loss": 0.0143, "step": 236510 }, { "epoch": 0.986869007185119, "grad_norm": 0.4850792593260844, "learning_rate": 2.013425599674544e-06, "loss": 0.0202, "step": 236515 }, { "epoch": 0.9868898699001093, "grad_norm": 0.4215447225949862, "learning_rate": 2.013404317703109e-06, "loss": 0.0202, "step": 236520 }, { "epoch": 0.9869107326150997, "grad_norm": 0.7536670589960242, "learning_rate": 2.013383036406513e-06, "loss": 0.0193, "step": 236525 }, { "epoch": 0.9869315953300899, "grad_norm": 0.7897633481641763, "learning_rate": 2.013361755784721e-06, "loss": 0.0229, "step": 236530 }, { "epoch": 0.9869524580450801, "grad_norm": 0.531131142035377, "learning_rate": 2.013340475837696e-06, "loss": 0.0208, "step": 236535 }, { "epoch": 0.9869733207600704, "grad_norm": 0.8678434910920585, "learning_rate": 2.0133191965654034e-06, "loss": 0.0209, "step": 236540 }, { "epoch": 0.9869941834750607, "grad_norm": 0.410978346309124, "learning_rate": 2.0132979179678074e-06, "loss": 0.0139, "step": 236545 }, { "epoch": 0.987015046190051, "grad_norm": 0.4806427085942377, "learning_rate": 2.013276640044872e-06, "loss": 0.0148, "step": 236550 }, { "epoch": 0.9870359089050412, "grad_norm": 1.0821299539285023, "learning_rate": 2.013255362796561e-06, "loss": 0.0161, "step": 236555 }, { "epoch": 0.9870567716200316, "grad_norm": 0.5914519278083658, "learning_rate": 2.0132340862228406e-06, "loss": 0.0234, "step": 236560 }, { "epoch": 0.9870776343350218, "grad_norm": 0.8059829159461088, "learning_rate": 2.0132128103236735e-06, "loss": 0.0193, "step": 236565 }, { "epoch": 0.9870984970500121, "grad_norm": 0.6265901421741021, "learning_rate": 2.013191535099024e-06, "loss": 0.0172, "step": 236570 }, { "epoch": 0.9871193597650024, "grad_norm": 0.5236979281490685, "learning_rate": 2.013170260548858e-06, "loss": 0.0181, "step": 236575 }, { "epoch": 0.9871402224799927, "grad_norm": 0.7920840636082208, "learning_rate": 2.0131489866731382e-06, "loss": 0.0215, "step": 236580 }, { "epoch": 0.9871610851949829, "grad_norm": 0.4720019857022869, "learning_rate": 2.01312771347183e-06, "loss": 0.02, "step": 236585 }, { "epoch": 0.9871819479099733, "grad_norm": 0.21957845599780504, "learning_rate": 2.0131064409448973e-06, "loss": 0.0195, "step": 236590 }, { "epoch": 0.9872028106249635, "grad_norm": 0.6570309321595112, "learning_rate": 2.0130851690923047e-06, "loss": 0.0212, "step": 236595 }, { "epoch": 0.9872236733399538, "grad_norm": 0.38110797878286123, "learning_rate": 2.0130638979140164e-06, "loss": 0.0173, "step": 236600 }, { "epoch": 0.987244536054944, "grad_norm": 0.8554660073740635, "learning_rate": 2.0130426274099967e-06, "loss": 0.0232, "step": 236605 }, { "epoch": 0.9872653987699344, "grad_norm": 0.5178433989744036, "learning_rate": 2.01302135758021e-06, "loss": 0.0202, "step": 236610 }, { "epoch": 0.9872862614849246, "grad_norm": 0.8280383122264036, "learning_rate": 2.0130000884246214e-06, "loss": 0.0257, "step": 236615 }, { "epoch": 0.9873071241999148, "grad_norm": 0.8795142860096833, "learning_rate": 2.012978819943194e-06, "loss": 0.0183, "step": 236620 }, { "epoch": 0.9873279869149052, "grad_norm": 0.6635472139832986, "learning_rate": 2.012957552135893e-06, "loss": 0.0226, "step": 236625 }, { "epoch": 0.9873488496298954, "grad_norm": 0.5991268984734633, "learning_rate": 2.012936285002683e-06, "loss": 0.0202, "step": 236630 }, { "epoch": 0.9873697123448857, "grad_norm": 1.591095214178734, "learning_rate": 2.012915018543528e-06, "loss": 0.0246, "step": 236635 }, { "epoch": 0.987390575059876, "grad_norm": 0.35687821783103874, "learning_rate": 2.012893752758392e-06, "loss": 0.0166, "step": 236640 }, { "epoch": 0.9874114377748663, "grad_norm": 0.610752724301863, "learning_rate": 2.0128724876472404e-06, "loss": 0.0193, "step": 236645 }, { "epoch": 0.9874323004898565, "grad_norm": 0.31416964607440395, "learning_rate": 2.0128512232100362e-06, "loss": 0.0164, "step": 236650 }, { "epoch": 0.9874531632048468, "grad_norm": 0.7803588151593448, "learning_rate": 2.0128299594467455e-06, "loss": 0.0182, "step": 236655 }, { "epoch": 0.9874740259198371, "grad_norm": 1.261943495215449, "learning_rate": 2.0128086963573317e-06, "loss": 0.0197, "step": 236660 }, { "epoch": 0.9874948886348274, "grad_norm": 0.5091757407820272, "learning_rate": 2.0127874339417593e-06, "loss": 0.0223, "step": 236665 }, { "epoch": 0.9875157513498176, "grad_norm": 0.5007401666893895, "learning_rate": 2.0127661721999927e-06, "loss": 0.0177, "step": 236670 }, { "epoch": 0.987536614064808, "grad_norm": 1.1175892013881417, "learning_rate": 2.012744911131996e-06, "loss": 0.0171, "step": 236675 }, { "epoch": 0.9875574767797982, "grad_norm": 0.7511780040176185, "learning_rate": 2.012723650737734e-06, "loss": 0.0148, "step": 236680 }, { "epoch": 0.9875783394947885, "grad_norm": 0.8675344734351715, "learning_rate": 2.0127023910171716e-06, "loss": 0.0211, "step": 236685 }, { "epoch": 0.9875992022097788, "grad_norm": 0.9932593956179961, "learning_rate": 2.0126811319702725e-06, "loss": 0.0225, "step": 236690 }, { "epoch": 0.9876200649247691, "grad_norm": 0.5482604659539894, "learning_rate": 2.0126598735970014e-06, "loss": 0.0144, "step": 236695 }, { "epoch": 0.9876409276397593, "grad_norm": 0.46349632251219497, "learning_rate": 2.012638615897323e-06, "loss": 0.0224, "step": 236700 }, { "epoch": 0.9876617903547497, "grad_norm": 0.2611928497862342, "learning_rate": 2.012617358871201e-06, "loss": 0.0193, "step": 236705 }, { "epoch": 0.9876826530697399, "grad_norm": 0.8036822154994742, "learning_rate": 2.0125961025186e-06, "loss": 0.02, "step": 236710 }, { "epoch": 0.9877035157847301, "grad_norm": 0.4902091730849287, "learning_rate": 2.0125748468394855e-06, "loss": 0.0244, "step": 236715 }, { "epoch": 0.9877243784997204, "grad_norm": 0.5812732661742391, "learning_rate": 2.0125535918338204e-06, "loss": 0.0239, "step": 236720 }, { "epoch": 0.9877452412147107, "grad_norm": 0.5793629747357625, "learning_rate": 2.01253233750157e-06, "loss": 0.031, "step": 236725 }, { "epoch": 0.987766103929701, "grad_norm": 0.9964820999807917, "learning_rate": 2.0125110838426985e-06, "loss": 0.0188, "step": 236730 }, { "epoch": 0.9877869666446912, "grad_norm": 0.6718127855480225, "learning_rate": 2.0124898308571706e-06, "loss": 0.0187, "step": 236735 }, { "epoch": 0.9878078293596816, "grad_norm": 0.6184665958642594, "learning_rate": 2.0124685785449504e-06, "loss": 0.0184, "step": 236740 }, { "epoch": 0.9878286920746718, "grad_norm": 0.7016739060489902, "learning_rate": 2.012447326906003e-06, "loss": 0.0221, "step": 236745 }, { "epoch": 0.9878495547896621, "grad_norm": 0.41782206526324955, "learning_rate": 2.0124260759402915e-06, "loss": 0.0156, "step": 236750 }, { "epoch": 0.9878704175046524, "grad_norm": 0.6572319383779313, "learning_rate": 2.012404825647782e-06, "loss": 0.0198, "step": 236755 }, { "epoch": 0.9878912802196427, "grad_norm": 0.4185298541090843, "learning_rate": 2.0123835760284376e-06, "loss": 0.023, "step": 236760 }, { "epoch": 0.9879121429346329, "grad_norm": 0.902989041309371, "learning_rate": 2.012362327082224e-06, "loss": 0.0183, "step": 236765 }, { "epoch": 0.9879330056496233, "grad_norm": 0.49505376915312227, "learning_rate": 2.0123410788091044e-06, "loss": 0.0231, "step": 236770 }, { "epoch": 0.9879538683646135, "grad_norm": 0.739436277254844, "learning_rate": 2.012319831209044e-06, "loss": 0.0225, "step": 236775 }, { "epoch": 0.9879747310796038, "grad_norm": 0.6834269930685906, "learning_rate": 2.0122985842820075e-06, "loss": 0.0183, "step": 236780 }, { "epoch": 0.987995593794594, "grad_norm": 0.5322539498522528, "learning_rate": 2.012277338027959e-06, "loss": 0.0241, "step": 236785 }, { "epoch": 0.9880164565095844, "grad_norm": 0.5789121939417504, "learning_rate": 2.012256092446863e-06, "loss": 0.0224, "step": 236790 }, { "epoch": 0.9880373192245746, "grad_norm": 0.8686077674530746, "learning_rate": 2.0122348475386836e-06, "loss": 0.0189, "step": 236795 }, { "epoch": 0.9880581819395649, "grad_norm": 0.7796670665658659, "learning_rate": 2.0122136033033855e-06, "loss": 0.0216, "step": 236800 }, { "epoch": 0.9880790446545552, "grad_norm": 0.8685554715049787, "learning_rate": 2.012192359740934e-06, "loss": 0.0298, "step": 236805 }, { "epoch": 0.9880999073695454, "grad_norm": 0.7064337469918872, "learning_rate": 2.012171116851292e-06, "loss": 0.0247, "step": 236810 }, { "epoch": 0.9881207700845357, "grad_norm": 0.859440572825308, "learning_rate": 2.0121498746344256e-06, "loss": 0.0173, "step": 236815 }, { "epoch": 0.988141632799526, "grad_norm": 0.3032683497254803, "learning_rate": 2.0121286330902986e-06, "loss": 0.0201, "step": 236820 }, { "epoch": 0.9881624955145163, "grad_norm": 0.317597212248528, "learning_rate": 2.0121073922188753e-06, "loss": 0.0179, "step": 236825 }, { "epoch": 0.9881833582295065, "grad_norm": 0.6494709501227357, "learning_rate": 2.0120861520201203e-06, "loss": 0.0169, "step": 236830 }, { "epoch": 0.9882042209444968, "grad_norm": 0.9091719987395008, "learning_rate": 2.012064912493998e-06, "loss": 0.0196, "step": 236835 }, { "epoch": 0.9882250836594871, "grad_norm": 0.5095673663913937, "learning_rate": 2.0120436736404737e-06, "loss": 0.017, "step": 236840 }, { "epoch": 0.9882459463744774, "grad_norm": 1.307265709969494, "learning_rate": 2.0120224354595104e-06, "loss": 0.0187, "step": 236845 }, { "epoch": 0.9882668090894676, "grad_norm": 0.8311123496601699, "learning_rate": 2.0120011979510744e-06, "loss": 0.0151, "step": 236850 }, { "epoch": 0.988287671804458, "grad_norm": 0.8154078102543362, "learning_rate": 2.0119799611151287e-06, "loss": 0.0112, "step": 236855 }, { "epoch": 0.9883085345194482, "grad_norm": 0.6839294280136466, "learning_rate": 2.0119587249516383e-06, "loss": 0.013, "step": 236860 }, { "epoch": 0.9883293972344385, "grad_norm": 0.5461388319832398, "learning_rate": 2.011937489460568e-06, "loss": 0.0181, "step": 236865 }, { "epoch": 0.9883502599494288, "grad_norm": 0.8031890065561591, "learning_rate": 2.011916254641882e-06, "loss": 0.0155, "step": 236870 }, { "epoch": 0.9883711226644191, "grad_norm": 1.1217820234101636, "learning_rate": 2.011895020495545e-06, "loss": 0.0222, "step": 236875 }, { "epoch": 0.9883919853794093, "grad_norm": 0.5907649297894986, "learning_rate": 2.0118737870215215e-06, "loss": 0.0228, "step": 236880 }, { "epoch": 0.9884128480943997, "grad_norm": 0.48811704790031707, "learning_rate": 2.011852554219776e-06, "loss": 0.0145, "step": 236885 }, { "epoch": 0.9884337108093899, "grad_norm": 0.4851549302519414, "learning_rate": 2.011831322090273e-06, "loss": 0.0139, "step": 236890 }, { "epoch": 0.9884545735243802, "grad_norm": 0.7646577959223301, "learning_rate": 2.0118100906329766e-06, "loss": 0.0242, "step": 236895 }, { "epoch": 0.9884754362393704, "grad_norm": 0.8537231808457171, "learning_rate": 2.0117888598478523e-06, "loss": 0.0169, "step": 236900 }, { "epoch": 0.9884962989543608, "grad_norm": 0.6119086791792379, "learning_rate": 2.0117676297348643e-06, "loss": 0.0308, "step": 236905 }, { "epoch": 0.988517161669351, "grad_norm": 0.5225993367401355, "learning_rate": 2.0117464002939762e-06, "loss": 0.0176, "step": 236910 }, { "epoch": 0.9885380243843412, "grad_norm": 0.46700179804606945, "learning_rate": 2.0117251715251533e-06, "loss": 0.0209, "step": 236915 }, { "epoch": 0.9885588870993316, "grad_norm": 0.2424016609616966, "learning_rate": 2.011703943428361e-06, "loss": 0.0214, "step": 236920 }, { "epoch": 0.9885797498143218, "grad_norm": 0.6002551512655536, "learning_rate": 2.0116827160035623e-06, "loss": 0.0146, "step": 236925 }, { "epoch": 0.9886006125293121, "grad_norm": 0.5020011273612839, "learning_rate": 2.0116614892507223e-06, "loss": 0.0199, "step": 236930 }, { "epoch": 0.9886214752443024, "grad_norm": 0.26719639802597533, "learning_rate": 2.0116402631698055e-06, "loss": 0.0175, "step": 236935 }, { "epoch": 0.9886423379592927, "grad_norm": 0.6350564337759983, "learning_rate": 2.0116190377607773e-06, "loss": 0.0134, "step": 236940 }, { "epoch": 0.9886632006742829, "grad_norm": 0.5112609996048816, "learning_rate": 2.0115978130236012e-06, "loss": 0.0201, "step": 236945 }, { "epoch": 0.9886840633892732, "grad_norm": 0.34088238494704004, "learning_rate": 2.011576588958242e-06, "loss": 0.0163, "step": 236950 }, { "epoch": 0.9887049261042635, "grad_norm": 1.630815373472248, "learning_rate": 2.0115553655646645e-06, "loss": 0.0238, "step": 236955 }, { "epoch": 0.9887257888192538, "grad_norm": 0.9750559044765517, "learning_rate": 2.011534142842833e-06, "loss": 0.0223, "step": 236960 }, { "epoch": 0.988746651534244, "grad_norm": 0.5376603381911447, "learning_rate": 2.0115129207927123e-06, "loss": 0.0178, "step": 236965 }, { "epoch": 0.9887675142492344, "grad_norm": 0.2603299117586404, "learning_rate": 2.011491699414267e-06, "loss": 0.0191, "step": 236970 }, { "epoch": 0.9887883769642246, "grad_norm": 1.1033639027275115, "learning_rate": 2.0114704787074614e-06, "loss": 0.023, "step": 236975 }, { "epoch": 0.9888092396792149, "grad_norm": 0.6246424826993137, "learning_rate": 2.0114492586722596e-06, "loss": 0.0212, "step": 236980 }, { "epoch": 0.9888301023942052, "grad_norm": 0.8314559156448982, "learning_rate": 2.0114280393086276e-06, "loss": 0.0248, "step": 236985 }, { "epoch": 0.9888509651091955, "grad_norm": 0.5378828391772454, "learning_rate": 2.011406820616529e-06, "loss": 0.0195, "step": 236990 }, { "epoch": 0.9888718278241857, "grad_norm": 0.9858015154314383, "learning_rate": 2.0113856025959288e-06, "loss": 0.0169, "step": 236995 }, { "epoch": 0.9888926905391761, "grad_norm": 1.0402051695436692, "learning_rate": 2.0113643852467905e-06, "loss": 0.0241, "step": 237000 }, { "epoch": 0.9889135532541663, "grad_norm": 0.47512196318444344, "learning_rate": 2.01134316856908e-06, "loss": 0.019, "step": 237005 }, { "epoch": 0.9889344159691565, "grad_norm": 0.4893094265552386, "learning_rate": 2.0113219525627616e-06, "loss": 0.0191, "step": 237010 }, { "epoch": 0.9889552786841468, "grad_norm": 0.7761054586852858, "learning_rate": 2.011300737227799e-06, "loss": 0.0206, "step": 237015 }, { "epoch": 0.9889761413991371, "grad_norm": 0.30707361223792945, "learning_rate": 2.0112795225641584e-06, "loss": 0.0172, "step": 237020 }, { "epoch": 0.9889970041141274, "grad_norm": 0.5158926033034056, "learning_rate": 2.011258308571803e-06, "loss": 0.019, "step": 237025 }, { "epoch": 0.9890178668291176, "grad_norm": 0.4785531176671674, "learning_rate": 2.0112370952506975e-06, "loss": 0.0176, "step": 237030 }, { "epoch": 0.989038729544108, "grad_norm": 0.5001212902675336, "learning_rate": 2.0112158826008074e-06, "loss": 0.0146, "step": 237035 }, { "epoch": 0.9890595922590982, "grad_norm": 0.2695563120405758, "learning_rate": 2.011194670622097e-06, "loss": 0.0178, "step": 237040 }, { "epoch": 0.9890804549740885, "grad_norm": 0.6072539040057195, "learning_rate": 2.0111734593145303e-06, "loss": 0.0226, "step": 237045 }, { "epoch": 0.9891013176890788, "grad_norm": 0.9640879549943859, "learning_rate": 2.0111522486780722e-06, "loss": 0.0253, "step": 237050 }, { "epoch": 0.9891221804040691, "grad_norm": 0.6341907233403677, "learning_rate": 2.0111310387126874e-06, "loss": 0.016, "step": 237055 }, { "epoch": 0.9891430431190593, "grad_norm": 0.6263422617775887, "learning_rate": 2.0111098294183408e-06, "loss": 0.0186, "step": 237060 }, { "epoch": 0.9891639058340497, "grad_norm": 0.7510916865718655, "learning_rate": 2.0110886207949967e-06, "loss": 0.0218, "step": 237065 }, { "epoch": 0.9891847685490399, "grad_norm": 0.7572588447765635, "learning_rate": 2.0110674128426195e-06, "loss": 0.0249, "step": 237070 }, { "epoch": 0.9892056312640302, "grad_norm": 0.7561724616417781, "learning_rate": 2.0110462055611747e-06, "loss": 0.0219, "step": 237075 }, { "epoch": 0.9892264939790204, "grad_norm": 0.5667584441748853, "learning_rate": 2.0110249989506256e-06, "loss": 0.0253, "step": 237080 }, { "epoch": 0.9892473566940108, "grad_norm": 0.7428630376737483, "learning_rate": 2.0110037930109377e-06, "loss": 0.0182, "step": 237085 }, { "epoch": 0.989268219409001, "grad_norm": 0.4686141483156472, "learning_rate": 2.0109825877420758e-06, "loss": 0.0191, "step": 237090 }, { "epoch": 0.9892890821239912, "grad_norm": 0.48464946982192164, "learning_rate": 2.0109613831440037e-06, "loss": 0.0136, "step": 237095 }, { "epoch": 0.9893099448389816, "grad_norm": 0.3694579260910392, "learning_rate": 2.010940179216687e-06, "loss": 0.022, "step": 237100 }, { "epoch": 0.9893308075539718, "grad_norm": 0.4211848241539436, "learning_rate": 2.01091897596009e-06, "loss": 0.017, "step": 237105 }, { "epoch": 0.9893516702689621, "grad_norm": 0.5026166300980502, "learning_rate": 2.0108977733741763e-06, "loss": 0.0178, "step": 237110 }, { "epoch": 0.9893725329839524, "grad_norm": 0.33199393032111574, "learning_rate": 2.010876571458912e-06, "loss": 0.025, "step": 237115 }, { "epoch": 0.9893933956989427, "grad_norm": 0.7990198778545395, "learning_rate": 2.0108553702142614e-06, "loss": 0.0289, "step": 237120 }, { "epoch": 0.9894142584139329, "grad_norm": 0.9468276320027019, "learning_rate": 2.010834169640189e-06, "loss": 0.0196, "step": 237125 }, { "epoch": 0.9894351211289232, "grad_norm": 0.7545723922157952, "learning_rate": 2.0108129697366593e-06, "loss": 0.0249, "step": 237130 }, { "epoch": 0.9894559838439135, "grad_norm": 1.3060411822252707, "learning_rate": 2.0107917705036364e-06, "loss": 0.0193, "step": 237135 }, { "epoch": 0.9894768465589038, "grad_norm": 0.3493721773038749, "learning_rate": 2.0107705719410866e-06, "loss": 0.0101, "step": 237140 }, { "epoch": 0.989497709273894, "grad_norm": 0.6192339638370954, "learning_rate": 2.010749374048973e-06, "loss": 0.0187, "step": 237145 }, { "epoch": 0.9895185719888844, "grad_norm": 0.7482345094280848, "learning_rate": 2.010728176827261e-06, "loss": 0.0165, "step": 237150 }, { "epoch": 0.9895394347038746, "grad_norm": 0.2918900347720681, "learning_rate": 2.010706980275915e-06, "loss": 0.0173, "step": 237155 }, { "epoch": 0.9895602974188649, "grad_norm": 0.5668349283800997, "learning_rate": 2.0106857843949e-06, "loss": 0.0175, "step": 237160 }, { "epoch": 0.9895811601338552, "grad_norm": 0.5146159572271832, "learning_rate": 2.0106645891841804e-06, "loss": 0.0182, "step": 237165 }, { "epoch": 0.9896020228488455, "grad_norm": 0.9057640656034903, "learning_rate": 2.0106433946437204e-06, "loss": 0.0201, "step": 237170 }, { "epoch": 0.9896228855638357, "grad_norm": 0.4896522145735041, "learning_rate": 2.0106222007734854e-06, "loss": 0.0191, "step": 237175 }, { "epoch": 0.9896437482788261, "grad_norm": 0.6314424518668268, "learning_rate": 2.0106010075734403e-06, "loss": 0.0241, "step": 237180 }, { "epoch": 0.9896646109938163, "grad_norm": 0.8453021888875295, "learning_rate": 2.010579815043549e-06, "loss": 0.0246, "step": 237185 }, { "epoch": 0.9896854737088066, "grad_norm": 0.398712090206889, "learning_rate": 2.0105586231837763e-06, "loss": 0.0148, "step": 237190 }, { "epoch": 0.9897063364237968, "grad_norm": 0.3904219252706303, "learning_rate": 2.0105374319940876e-06, "loss": 0.0212, "step": 237195 }, { "epoch": 0.9897271991387872, "grad_norm": 0.6039319585256404, "learning_rate": 2.010516241474447e-06, "loss": 0.0162, "step": 237200 }, { "epoch": 0.9897480618537774, "grad_norm": 0.7474757691817534, "learning_rate": 2.0104950516248185e-06, "loss": 0.0279, "step": 237205 }, { "epoch": 0.9897689245687676, "grad_norm": 0.7050983460043386, "learning_rate": 2.0104738624451687e-06, "loss": 0.0233, "step": 237210 }, { "epoch": 0.989789787283758, "grad_norm": 0.41721967776603297, "learning_rate": 2.0104526739354604e-06, "loss": 0.0187, "step": 237215 }, { "epoch": 0.9898106499987482, "grad_norm": 1.098462007518816, "learning_rate": 2.010431486095659e-06, "loss": 0.0172, "step": 237220 }, { "epoch": 0.9898315127137385, "grad_norm": 0.6377888498288368, "learning_rate": 2.01041029892573e-06, "loss": 0.0221, "step": 237225 }, { "epoch": 0.9898523754287288, "grad_norm": 2.016872987120275, "learning_rate": 2.0103891124256368e-06, "loss": 0.0246, "step": 237230 }, { "epoch": 0.9898732381437191, "grad_norm": 0.5308593186684194, "learning_rate": 2.010367926595345e-06, "loss": 0.0188, "step": 237235 }, { "epoch": 0.9898941008587093, "grad_norm": 0.47447103670120383, "learning_rate": 2.0103467414348185e-06, "loss": 0.0118, "step": 237240 }, { "epoch": 0.9899149635736997, "grad_norm": 0.6115909338338367, "learning_rate": 2.010325556944023e-06, "loss": 0.0211, "step": 237245 }, { "epoch": 0.9899358262886899, "grad_norm": 0.6331756701672716, "learning_rate": 2.0103043731229224e-06, "loss": 0.0231, "step": 237250 }, { "epoch": 0.9899566890036802, "grad_norm": 0.6610862125114216, "learning_rate": 2.0102831899714815e-06, "loss": 0.0175, "step": 237255 }, { "epoch": 0.9899775517186704, "grad_norm": 2.1279563705193696, "learning_rate": 2.0102620074896655e-06, "loss": 0.0235, "step": 237260 }, { "epoch": 0.9899984144336608, "grad_norm": 0.593062374042399, "learning_rate": 2.0102408256774396e-06, "loss": 0.0323, "step": 237265 }, { "epoch": 0.990019277148651, "grad_norm": 0.5002536084658998, "learning_rate": 2.0102196445347666e-06, "loss": 0.0188, "step": 237270 }, { "epoch": 0.9900401398636413, "grad_norm": 0.5638408098211629, "learning_rate": 2.0101984640616125e-06, "loss": 0.0169, "step": 237275 }, { "epoch": 0.9900610025786316, "grad_norm": 0.6608171688842137, "learning_rate": 2.0101772842579427e-06, "loss": 0.0243, "step": 237280 }, { "epoch": 0.9900818652936219, "grad_norm": 0.5602315661581746, "learning_rate": 2.0101561051237208e-06, "loss": 0.0231, "step": 237285 }, { "epoch": 0.9901027280086121, "grad_norm": 0.4984067726002554, "learning_rate": 2.010134926658912e-06, "loss": 0.0182, "step": 237290 }, { "epoch": 0.9901235907236025, "grad_norm": 0.8083339085890623, "learning_rate": 2.010113748863481e-06, "loss": 0.0192, "step": 237295 }, { "epoch": 0.9901444534385927, "grad_norm": 0.544147989811543, "learning_rate": 2.0100925717373925e-06, "loss": 0.0186, "step": 237300 }, { "epoch": 0.9901653161535829, "grad_norm": 0.5980992386374372, "learning_rate": 2.010071395280611e-06, "loss": 0.0176, "step": 237305 }, { "epoch": 0.9901861788685732, "grad_norm": 0.4303735373989782, "learning_rate": 2.010050219493101e-06, "loss": 0.0158, "step": 237310 }, { "epoch": 0.9902070415835635, "grad_norm": 0.6358442015401219, "learning_rate": 2.0100290443748287e-06, "loss": 0.019, "step": 237315 }, { "epoch": 0.9902279042985538, "grad_norm": 0.5612397806426441, "learning_rate": 2.0100078699257578e-06, "loss": 0.0165, "step": 237320 }, { "epoch": 0.990248767013544, "grad_norm": 0.9143715745271728, "learning_rate": 2.0099866961458526e-06, "loss": 0.0219, "step": 237325 }, { "epoch": 0.9902696297285344, "grad_norm": 0.42211099903165505, "learning_rate": 2.009965523035079e-06, "loss": 0.0252, "step": 237330 }, { "epoch": 0.9902904924435246, "grad_norm": 0.9558133352484061, "learning_rate": 2.0099443505934e-06, "loss": 0.0193, "step": 237335 }, { "epoch": 0.9903113551585149, "grad_norm": 0.6013240033317759, "learning_rate": 2.009923178820782e-06, "loss": 0.0234, "step": 237340 }, { "epoch": 0.9903322178735052, "grad_norm": 0.4435924882145831, "learning_rate": 2.0099020077171898e-06, "loss": 0.0176, "step": 237345 }, { "epoch": 0.9903530805884955, "grad_norm": 0.4632167489071003, "learning_rate": 2.0098808372825874e-06, "loss": 0.0191, "step": 237350 }, { "epoch": 0.9903739433034857, "grad_norm": 0.6088941963404207, "learning_rate": 2.0098596675169395e-06, "loss": 0.0198, "step": 237355 }, { "epoch": 0.9903948060184761, "grad_norm": 0.4140087427801731, "learning_rate": 2.0098384984202117e-06, "loss": 0.0173, "step": 237360 }, { "epoch": 0.9904156687334663, "grad_norm": 0.821415345211478, "learning_rate": 2.0098173299923676e-06, "loss": 0.0181, "step": 237365 }, { "epoch": 0.9904365314484566, "grad_norm": 0.4004553802520153, "learning_rate": 2.009796162233373e-06, "loss": 0.0115, "step": 237370 }, { "epoch": 0.9904573941634468, "grad_norm": 0.539097808640231, "learning_rate": 2.009774995143192e-06, "loss": 0.0171, "step": 237375 }, { "epoch": 0.9904782568784372, "grad_norm": 0.808212692164625, "learning_rate": 2.00975382872179e-06, "loss": 0.0279, "step": 237380 }, { "epoch": 0.9904991195934274, "grad_norm": 0.8831195009186585, "learning_rate": 2.009732662969131e-06, "loss": 0.0154, "step": 237385 }, { "epoch": 0.9905199823084176, "grad_norm": 0.9087342586746054, "learning_rate": 2.0097114978851804e-06, "loss": 0.0163, "step": 237390 }, { "epoch": 0.990540845023408, "grad_norm": 0.8316811043576654, "learning_rate": 2.0096903334699034e-06, "loss": 0.0221, "step": 237395 }, { "epoch": 0.9905617077383982, "grad_norm": 0.4730636153747083, "learning_rate": 2.0096691697232635e-06, "loss": 0.0164, "step": 237400 }, { "epoch": 0.9905825704533885, "grad_norm": 0.46793393588145155, "learning_rate": 2.0096480066452266e-06, "loss": 0.0191, "step": 237405 }, { "epoch": 0.9906034331683788, "grad_norm": 0.27971977574044077, "learning_rate": 2.009626844235757e-06, "loss": 0.0147, "step": 237410 }, { "epoch": 0.9906242958833691, "grad_norm": 0.40911879230660614, "learning_rate": 2.0096056824948196e-06, "loss": 0.0211, "step": 237415 }, { "epoch": 0.9906451585983593, "grad_norm": 0.3335147918141922, "learning_rate": 2.0095845214223796e-06, "loss": 0.018, "step": 237420 }, { "epoch": 0.9906660213133497, "grad_norm": 0.8365921456452385, "learning_rate": 2.009563361018401e-06, "loss": 0.019, "step": 237425 }, { "epoch": 0.9906868840283399, "grad_norm": 0.16547963302320054, "learning_rate": 2.0095422012828485e-06, "loss": 0.0187, "step": 237430 }, { "epoch": 0.9907077467433302, "grad_norm": 0.6917478222901736, "learning_rate": 2.0095210422156883e-06, "loss": 0.0163, "step": 237435 }, { "epoch": 0.9907286094583204, "grad_norm": 0.38185071957805133, "learning_rate": 2.009499883816884e-06, "loss": 0.0198, "step": 237440 }, { "epoch": 0.9907494721733108, "grad_norm": 0.45668827738171586, "learning_rate": 2.009478726086401e-06, "loss": 0.0165, "step": 237445 }, { "epoch": 0.990770334888301, "grad_norm": 1.2748552175403967, "learning_rate": 2.0094575690242037e-06, "loss": 0.0195, "step": 237450 }, { "epoch": 0.9907911976032913, "grad_norm": 0.667026716974866, "learning_rate": 2.0094364126302575e-06, "loss": 0.0189, "step": 237455 }, { "epoch": 0.9908120603182816, "grad_norm": 0.3892399488077729, "learning_rate": 2.0094152569045258e-06, "loss": 0.0256, "step": 237460 }, { "epoch": 0.9908329230332719, "grad_norm": 0.479534967333068, "learning_rate": 2.0093941018469752e-06, "loss": 0.0194, "step": 237465 }, { "epoch": 0.9908537857482621, "grad_norm": 0.38898444965707774, "learning_rate": 2.0093729474575697e-06, "loss": 0.0191, "step": 237470 }, { "epoch": 0.9908746484632525, "grad_norm": 0.642361989902552, "learning_rate": 2.009351793736274e-06, "loss": 0.0167, "step": 237475 }, { "epoch": 0.9908955111782427, "grad_norm": 0.599951682973852, "learning_rate": 2.0093306406830535e-06, "loss": 0.0172, "step": 237480 }, { "epoch": 0.990916373893233, "grad_norm": 0.6515877735129358, "learning_rate": 2.0093094882978728e-06, "loss": 0.0272, "step": 237485 }, { "epoch": 0.9909372366082232, "grad_norm": 0.690041600697096, "learning_rate": 2.009288336580696e-06, "loss": 0.026, "step": 237490 }, { "epoch": 0.9909580993232135, "grad_norm": 0.42825785253087906, "learning_rate": 2.009267185531489e-06, "loss": 0.018, "step": 237495 }, { "epoch": 0.9909789620382038, "grad_norm": 0.5356183915150812, "learning_rate": 2.009246035150216e-06, "loss": 0.0172, "step": 237500 }, { "epoch": 0.990999824753194, "grad_norm": 0.42985717504305143, "learning_rate": 2.0092248854368426e-06, "loss": 0.0156, "step": 237505 }, { "epoch": 0.9910206874681844, "grad_norm": 0.7505325653161004, "learning_rate": 2.009203736391332e-06, "loss": 0.0272, "step": 237510 }, { "epoch": 0.9910415501831746, "grad_norm": 0.6468253428230379, "learning_rate": 2.0091825880136505e-06, "loss": 0.0256, "step": 237515 }, { "epoch": 0.9910624128981649, "grad_norm": 0.6236460547600599, "learning_rate": 2.009161440303763e-06, "loss": 0.0182, "step": 237520 }, { "epoch": 0.9910832756131552, "grad_norm": 1.1627990326379696, "learning_rate": 2.009140293261634e-06, "loss": 0.0292, "step": 237525 }, { "epoch": 0.9911041383281455, "grad_norm": 0.580865709854512, "learning_rate": 2.009119146887228e-06, "loss": 0.0192, "step": 237530 }, { "epoch": 0.9911250010431357, "grad_norm": 0.32392379861902215, "learning_rate": 2.00909800118051e-06, "loss": 0.0134, "step": 237535 }, { "epoch": 0.9911458637581261, "grad_norm": 0.513504358479658, "learning_rate": 2.0090768561414455e-06, "loss": 0.0218, "step": 237540 }, { "epoch": 0.9911667264731163, "grad_norm": 0.4467815015042243, "learning_rate": 2.0090557117699986e-06, "loss": 0.0157, "step": 237545 }, { "epoch": 0.9911875891881066, "grad_norm": 0.43696169348451797, "learning_rate": 2.009034568066135e-06, "loss": 0.0163, "step": 237550 }, { "epoch": 0.9912084519030968, "grad_norm": 0.49306446956021416, "learning_rate": 2.0090134250298186e-06, "loss": 0.0186, "step": 237555 }, { "epoch": 0.9912293146180872, "grad_norm": 0.9042401007217993, "learning_rate": 2.0089922826610147e-06, "loss": 0.0209, "step": 237560 }, { "epoch": 0.9912501773330774, "grad_norm": 0.7688476309284591, "learning_rate": 2.008971140959688e-06, "loss": 0.0162, "step": 237565 }, { "epoch": 0.9912710400480677, "grad_norm": 0.6154382692065202, "learning_rate": 2.0089499999258038e-06, "loss": 0.0219, "step": 237570 }, { "epoch": 0.991291902763058, "grad_norm": 0.949838880113616, "learning_rate": 2.008928859559327e-06, "loss": 0.0188, "step": 237575 }, { "epoch": 0.9913127654780483, "grad_norm": 0.43202423973764587, "learning_rate": 2.0089077198602218e-06, "loss": 0.0155, "step": 237580 }, { "epoch": 0.9913336281930385, "grad_norm": 2.2199241116859394, "learning_rate": 2.0088865808284537e-06, "loss": 0.0255, "step": 237585 }, { "epoch": 0.9913544909080289, "grad_norm": 0.6441389419426207, "learning_rate": 2.0088654424639874e-06, "loss": 0.0202, "step": 237590 }, { "epoch": 0.9913753536230191, "grad_norm": 0.7082838282212375, "learning_rate": 2.0088443047667884e-06, "loss": 0.0191, "step": 237595 }, { "epoch": 0.9913962163380093, "grad_norm": 0.5118126412593373, "learning_rate": 2.00882316773682e-06, "loss": 0.0174, "step": 237600 }, { "epoch": 0.9914170790529997, "grad_norm": 1.6748612490563055, "learning_rate": 2.0088020313740485e-06, "loss": 0.0242, "step": 237605 }, { "epoch": 0.9914379417679899, "grad_norm": 0.5970762368271212, "learning_rate": 2.0087808956784386e-06, "loss": 0.0166, "step": 237610 }, { "epoch": 0.9914588044829802, "grad_norm": 0.6572447244547441, "learning_rate": 2.008759760649955e-06, "loss": 0.0174, "step": 237615 }, { "epoch": 0.9914796671979704, "grad_norm": 0.3571780473264893, "learning_rate": 2.0087386262885626e-06, "loss": 0.0212, "step": 237620 }, { "epoch": 0.9915005299129608, "grad_norm": 0.44236535873339056, "learning_rate": 2.008717492594226e-06, "loss": 0.018, "step": 237625 }, { "epoch": 0.991521392627951, "grad_norm": 0.590242955196885, "learning_rate": 2.008696359566911e-06, "loss": 0.0181, "step": 237630 }, { "epoch": 0.9915422553429413, "grad_norm": 0.6698901822554177, "learning_rate": 2.0086752272065816e-06, "loss": 0.0135, "step": 237635 }, { "epoch": 0.9915631180579316, "grad_norm": 0.9301499458180542, "learning_rate": 2.0086540955132032e-06, "loss": 0.0208, "step": 237640 }, { "epoch": 0.9915839807729219, "grad_norm": 0.5468382081242694, "learning_rate": 2.00863296448674e-06, "loss": 0.0264, "step": 237645 }, { "epoch": 0.9916048434879121, "grad_norm": 0.41755334801725824, "learning_rate": 2.0086118341271584e-06, "loss": 0.0227, "step": 237650 }, { "epoch": 0.9916257062029025, "grad_norm": 0.4583501210848426, "learning_rate": 2.0085907044344223e-06, "loss": 0.0245, "step": 237655 }, { "epoch": 0.9916465689178927, "grad_norm": 0.8363239891144779, "learning_rate": 2.0085695754084964e-06, "loss": 0.0235, "step": 237660 }, { "epoch": 0.991667431632883, "grad_norm": 0.3762958326582974, "learning_rate": 2.0085484470493456e-06, "loss": 0.0167, "step": 237665 }, { "epoch": 0.9916882943478732, "grad_norm": 0.7205473236551879, "learning_rate": 2.008527319356936e-06, "loss": 0.0262, "step": 237670 }, { "epoch": 0.9917091570628636, "grad_norm": 0.3673726456635581, "learning_rate": 2.0085061923312317e-06, "loss": 0.0196, "step": 237675 }, { "epoch": 0.9917300197778538, "grad_norm": 0.638032832404672, "learning_rate": 2.0084850659721967e-06, "loss": 0.0173, "step": 237680 }, { "epoch": 0.991750882492844, "grad_norm": 0.5529001849682375, "learning_rate": 2.008463940279798e-06, "loss": 0.021, "step": 237685 }, { "epoch": 0.9917717452078344, "grad_norm": 0.6047716064432261, "learning_rate": 2.008442815253999e-06, "loss": 0.0241, "step": 237690 }, { "epoch": 0.9917926079228246, "grad_norm": 0.6468630097471587, "learning_rate": 2.008421690894765e-06, "loss": 0.022, "step": 237695 }, { "epoch": 0.9918134706378149, "grad_norm": 0.7476874238069637, "learning_rate": 2.008400567202061e-06, "loss": 0.0198, "step": 237700 }, { "epoch": 0.9918343333528052, "grad_norm": 0.9876953521093713, "learning_rate": 2.0083794441758526e-06, "loss": 0.0271, "step": 237705 }, { "epoch": 0.9918551960677955, "grad_norm": 0.9981109818977966, "learning_rate": 2.008358321816104e-06, "loss": 0.0189, "step": 237710 }, { "epoch": 0.9918760587827857, "grad_norm": 0.5757568037902928, "learning_rate": 2.00833720012278e-06, "loss": 0.0175, "step": 237715 }, { "epoch": 0.9918969214977761, "grad_norm": 0.5245073130730907, "learning_rate": 2.0083160790958463e-06, "loss": 0.0161, "step": 237720 }, { "epoch": 0.9919177842127663, "grad_norm": 0.4526412488522615, "learning_rate": 2.008294958735267e-06, "loss": 0.0236, "step": 237725 }, { "epoch": 0.9919386469277566, "grad_norm": 0.9609079655443064, "learning_rate": 2.0082738390410074e-06, "loss": 0.0149, "step": 237730 }, { "epoch": 0.9919595096427468, "grad_norm": 0.2894073720984946, "learning_rate": 2.0082527200130324e-06, "loss": 0.0199, "step": 237735 }, { "epoch": 0.9919803723577372, "grad_norm": 0.3686671606549337, "learning_rate": 2.0082316016513076e-06, "loss": 0.0181, "step": 237740 }, { "epoch": 0.9920012350727274, "grad_norm": 0.2733429231795034, "learning_rate": 2.0082104839557976e-06, "loss": 0.015, "step": 237745 }, { "epoch": 0.9920220977877177, "grad_norm": 0.5296419726874241, "learning_rate": 2.0081893669264667e-06, "loss": 0.0179, "step": 237750 }, { "epoch": 0.992042960502708, "grad_norm": 0.8016456908252175, "learning_rate": 2.008168250563281e-06, "loss": 0.0197, "step": 237755 }, { "epoch": 0.9920638232176983, "grad_norm": 0.7095536082286276, "learning_rate": 2.0081471348662047e-06, "loss": 0.0203, "step": 237760 }, { "epoch": 0.9920846859326885, "grad_norm": 0.7358993599607052, "learning_rate": 2.008126019835203e-06, "loss": 0.0178, "step": 237765 }, { "epoch": 0.9921055486476789, "grad_norm": 0.5975576250583028, "learning_rate": 2.0081049054702407e-06, "loss": 0.0156, "step": 237770 }, { "epoch": 0.9921264113626691, "grad_norm": 0.3932390980914736, "learning_rate": 2.0080837917712833e-06, "loss": 0.0131, "step": 237775 }, { "epoch": 0.9921472740776593, "grad_norm": 0.7123876341086561, "learning_rate": 2.008062678738295e-06, "loss": 0.0232, "step": 237780 }, { "epoch": 0.9921681367926497, "grad_norm": 0.5098739222187147, "learning_rate": 2.0080415663712417e-06, "loss": 0.0223, "step": 237785 }, { "epoch": 0.99218899950764, "grad_norm": 0.4122783768748005, "learning_rate": 2.0080204546700877e-06, "loss": 0.0223, "step": 237790 }, { "epoch": 0.9922098622226302, "grad_norm": 0.26245205636104, "learning_rate": 2.007999343634798e-06, "loss": 0.0206, "step": 237795 }, { "epoch": 0.9922307249376204, "grad_norm": 0.720232614074036, "learning_rate": 2.0079782332653384e-06, "loss": 0.0177, "step": 237800 }, { "epoch": 0.9922515876526108, "grad_norm": 0.7611054137471791, "learning_rate": 2.007957123561673e-06, "loss": 0.0178, "step": 237805 }, { "epoch": 0.992272450367601, "grad_norm": 0.8769804130437415, "learning_rate": 2.0079360145237673e-06, "loss": 0.0202, "step": 237810 }, { "epoch": 0.9922933130825913, "grad_norm": 0.13904288801388312, "learning_rate": 2.007914906151586e-06, "loss": 0.0135, "step": 237815 }, { "epoch": 0.9923141757975816, "grad_norm": 0.4978505242314295, "learning_rate": 2.0078937984450942e-06, "loss": 0.0187, "step": 237820 }, { "epoch": 0.9923350385125719, "grad_norm": 0.8954336735371345, "learning_rate": 2.0078726914042575e-06, "loss": 0.0277, "step": 237825 }, { "epoch": 0.9923559012275621, "grad_norm": 0.6118539314314221, "learning_rate": 2.0078515850290396e-06, "loss": 0.0219, "step": 237830 }, { "epoch": 0.9923767639425525, "grad_norm": 0.606478807155674, "learning_rate": 2.0078304793194066e-06, "loss": 0.0192, "step": 237835 }, { "epoch": 0.9923976266575427, "grad_norm": 0.728617080591252, "learning_rate": 2.0078093742753234e-06, "loss": 0.0161, "step": 237840 }, { "epoch": 0.992418489372533, "grad_norm": 0.4938974962339985, "learning_rate": 2.007788269896755e-06, "loss": 0.0257, "step": 237845 }, { "epoch": 0.9924393520875232, "grad_norm": 1.8903406654726693, "learning_rate": 2.0077671661836657e-06, "loss": 0.0292, "step": 237850 }, { "epoch": 0.9924602148025136, "grad_norm": 0.7095711984829689, "learning_rate": 2.0077460631360214e-06, "loss": 0.0231, "step": 237855 }, { "epoch": 0.9924810775175038, "grad_norm": 0.6921900099588358, "learning_rate": 2.0077249607537867e-06, "loss": 0.015, "step": 237860 }, { "epoch": 0.992501940232494, "grad_norm": 0.6926207201827982, "learning_rate": 2.007703859036927e-06, "loss": 0.0206, "step": 237865 }, { "epoch": 0.9925228029474844, "grad_norm": 0.25959611696024226, "learning_rate": 2.0076827579854067e-06, "loss": 0.0175, "step": 237870 }, { "epoch": 0.9925436656624747, "grad_norm": 0.3878533230356186, "learning_rate": 2.007661657599191e-06, "loss": 0.0175, "step": 237875 }, { "epoch": 0.9925645283774649, "grad_norm": 0.8179214926171141, "learning_rate": 2.007640557878246e-06, "loss": 0.0283, "step": 237880 }, { "epoch": 0.9925853910924552, "grad_norm": 0.8790834802085807, "learning_rate": 2.007619458822535e-06, "loss": 0.0179, "step": 237885 }, { "epoch": 0.9926062538074455, "grad_norm": 0.779971751006612, "learning_rate": 2.0075983604320245e-06, "loss": 0.025, "step": 237890 }, { "epoch": 0.9926271165224357, "grad_norm": 1.1445322556748179, "learning_rate": 2.007577262706679e-06, "loss": 0.0215, "step": 237895 }, { "epoch": 0.9926479792374261, "grad_norm": 1.18926720026515, "learning_rate": 2.0075561656464634e-06, "loss": 0.0252, "step": 237900 }, { "epoch": 0.9926688419524163, "grad_norm": 1.2037603240020331, "learning_rate": 2.007535069251343e-06, "loss": 0.0251, "step": 237905 }, { "epoch": 0.9926897046674066, "grad_norm": 0.3800714426381961, "learning_rate": 2.0075139735212827e-06, "loss": 0.0228, "step": 237910 }, { "epoch": 0.9927105673823968, "grad_norm": 0.6016085915312264, "learning_rate": 2.007492878456247e-06, "loss": 0.0168, "step": 237915 }, { "epoch": 0.9927314300973872, "grad_norm": 0.3996398816639664, "learning_rate": 2.0074717840562023e-06, "loss": 0.0179, "step": 237920 }, { "epoch": 0.9927522928123774, "grad_norm": 0.5676120161362337, "learning_rate": 2.0074506903211126e-06, "loss": 0.0163, "step": 237925 }, { "epoch": 0.9927731555273677, "grad_norm": 0.5883399520980448, "learning_rate": 2.0074295972509434e-06, "loss": 0.015, "step": 237930 }, { "epoch": 0.992794018242358, "grad_norm": 0.3765108573024736, "learning_rate": 2.00740850484566e-06, "loss": 0.019, "step": 237935 }, { "epoch": 0.9928148809573483, "grad_norm": 0.4176283824369503, "learning_rate": 2.007387413105227e-06, "loss": 0.0166, "step": 237940 }, { "epoch": 0.9928357436723385, "grad_norm": 0.46617737008748983, "learning_rate": 2.007366322029609e-06, "loss": 0.0225, "step": 237945 }, { "epoch": 0.9928566063873289, "grad_norm": 0.5110919199944982, "learning_rate": 2.007345231618772e-06, "loss": 0.0234, "step": 237950 }, { "epoch": 0.9928774691023191, "grad_norm": 0.7361489145039452, "learning_rate": 2.007324141872681e-06, "loss": 0.025, "step": 237955 }, { "epoch": 0.9928983318173094, "grad_norm": 0.4796862311088436, "learning_rate": 2.007303052791301e-06, "loss": 0.0208, "step": 237960 }, { "epoch": 0.9929191945322997, "grad_norm": 1.001042793347583, "learning_rate": 2.0072819643745966e-06, "loss": 0.0267, "step": 237965 }, { "epoch": 0.99294005724729, "grad_norm": 0.456522761368429, "learning_rate": 2.0072608766225334e-06, "loss": 0.0174, "step": 237970 }, { "epoch": 0.9929609199622802, "grad_norm": 2.916769842455055, "learning_rate": 2.007239789535076e-06, "loss": 0.0294, "step": 237975 }, { "epoch": 0.9929817826772704, "grad_norm": 0.45623949338402553, "learning_rate": 2.0072187031121897e-06, "loss": 0.0177, "step": 237980 }, { "epoch": 0.9930026453922608, "grad_norm": 0.6312496070595384, "learning_rate": 2.00719761735384e-06, "loss": 0.0211, "step": 237985 }, { "epoch": 0.993023508107251, "grad_norm": 0.7905496314166623, "learning_rate": 2.0071765322599917e-06, "loss": 0.0183, "step": 237990 }, { "epoch": 0.9930443708222413, "grad_norm": 0.7423517093434295, "learning_rate": 2.0071554478306097e-06, "loss": 0.0203, "step": 237995 }, { "epoch": 0.9930652335372316, "grad_norm": 0.850336966960632, "learning_rate": 2.0071343640656594e-06, "loss": 0.0248, "step": 238000 }, { "epoch": 0.9930860962522219, "grad_norm": 0.6056061723630628, "learning_rate": 2.0071132809651057e-06, "loss": 0.015, "step": 238005 }, { "epoch": 0.9931069589672121, "grad_norm": 0.6619073304114668, "learning_rate": 2.0070921985289137e-06, "loss": 0.0207, "step": 238010 }, { "epoch": 0.9931278216822025, "grad_norm": 0.9862898431330263, "learning_rate": 2.007071116757049e-06, "loss": 0.0221, "step": 238015 }, { "epoch": 0.9931486843971927, "grad_norm": 0.6877706496795515, "learning_rate": 2.0070500356494756e-06, "loss": 0.0193, "step": 238020 }, { "epoch": 0.993169547112183, "grad_norm": 0.8186781803889345, "learning_rate": 2.00702895520616e-06, "loss": 0.0169, "step": 238025 }, { "epoch": 0.9931904098271732, "grad_norm": 0.6107632711544452, "learning_rate": 2.0070078754270663e-06, "loss": 0.0205, "step": 238030 }, { "epoch": 0.9932112725421636, "grad_norm": 0.5912367117637706, "learning_rate": 2.00698679631216e-06, "loss": 0.0175, "step": 238035 }, { "epoch": 0.9932321352571538, "grad_norm": 0.7178272878913443, "learning_rate": 2.006965717861406e-06, "loss": 0.0191, "step": 238040 }, { "epoch": 0.9932529979721441, "grad_norm": 0.6347637800106796, "learning_rate": 2.00694464007477e-06, "loss": 0.0179, "step": 238045 }, { "epoch": 0.9932738606871344, "grad_norm": 0.5846625704668332, "learning_rate": 2.0069235629522166e-06, "loss": 0.0165, "step": 238050 }, { "epoch": 0.9932947234021247, "grad_norm": 0.9270319472084991, "learning_rate": 2.006902486493711e-06, "loss": 0.018, "step": 238055 }, { "epoch": 0.9933155861171149, "grad_norm": 0.689341168963437, "learning_rate": 2.0068814106992184e-06, "loss": 0.0179, "step": 238060 }, { "epoch": 0.9933364488321053, "grad_norm": 1.0494568630156074, "learning_rate": 2.0068603355687034e-06, "loss": 0.0337, "step": 238065 }, { "epoch": 0.9933573115470955, "grad_norm": 0.4684906413896503, "learning_rate": 2.0068392611021327e-06, "loss": 0.0179, "step": 238070 }, { "epoch": 0.9933781742620857, "grad_norm": 0.686577724740564, "learning_rate": 2.0068181872994698e-06, "loss": 0.0236, "step": 238075 }, { "epoch": 0.9933990369770761, "grad_norm": 0.5862941025343432, "learning_rate": 2.0067971141606804e-06, "loss": 0.0172, "step": 238080 }, { "epoch": 0.9934198996920663, "grad_norm": 0.26157830959074685, "learning_rate": 2.0067760416857297e-06, "loss": 0.0185, "step": 238085 }, { "epoch": 0.9934407624070566, "grad_norm": 0.4275784178091757, "learning_rate": 2.006754969874583e-06, "loss": 0.0188, "step": 238090 }, { "epoch": 0.9934616251220468, "grad_norm": 0.371972869171009, "learning_rate": 2.0067338987272054e-06, "loss": 0.0218, "step": 238095 }, { "epoch": 0.9934824878370372, "grad_norm": 0.883700965106606, "learning_rate": 2.0067128282435615e-06, "loss": 0.0222, "step": 238100 }, { "epoch": 0.9935033505520274, "grad_norm": 0.45905047465133575, "learning_rate": 2.006691758423618e-06, "loss": 0.0177, "step": 238105 }, { "epoch": 0.9935242132670177, "grad_norm": 1.231762164338067, "learning_rate": 2.0066706892673376e-06, "loss": 0.0235, "step": 238110 }, { "epoch": 0.993545075982008, "grad_norm": 0.4561969281297388, "learning_rate": 2.0066496207746874e-06, "loss": 0.0164, "step": 238115 }, { "epoch": 0.9935659386969983, "grad_norm": 0.7883399589970641, "learning_rate": 2.006628552945632e-06, "loss": 0.0237, "step": 238120 }, { "epoch": 0.9935868014119885, "grad_norm": 0.25753467205353975, "learning_rate": 2.0066074857801363e-06, "loss": 0.0104, "step": 238125 }, { "epoch": 0.9936076641269789, "grad_norm": 0.5022890061338042, "learning_rate": 2.006586419278166e-06, "loss": 0.0192, "step": 238130 }, { "epoch": 0.9936285268419691, "grad_norm": 0.427899038996878, "learning_rate": 2.006565353439686e-06, "loss": 0.025, "step": 238135 }, { "epoch": 0.9936493895569594, "grad_norm": 0.22555148111247064, "learning_rate": 2.0065442882646613e-06, "loss": 0.0187, "step": 238140 }, { "epoch": 0.9936702522719497, "grad_norm": 0.37713253434123695, "learning_rate": 2.006523223753057e-06, "loss": 0.0152, "step": 238145 }, { "epoch": 0.99369111498694, "grad_norm": 0.47585727783154175, "learning_rate": 2.0065021599048388e-06, "loss": 0.0234, "step": 238150 }, { "epoch": 0.9937119777019302, "grad_norm": 2.3654469040149713, "learning_rate": 2.0064810967199715e-06, "loss": 0.0215, "step": 238155 }, { "epoch": 0.9937328404169204, "grad_norm": 0.6111888205776576, "learning_rate": 2.0064600341984203e-06, "loss": 0.0177, "step": 238160 }, { "epoch": 0.9937537031319108, "grad_norm": 0.7635543697415578, "learning_rate": 2.0064389723401505e-06, "loss": 0.019, "step": 238165 }, { "epoch": 0.993774565846901, "grad_norm": 0.4827577335223667, "learning_rate": 2.0064179111451275e-06, "loss": 0.0186, "step": 238170 }, { "epoch": 0.9937954285618913, "grad_norm": 0.6299519600464984, "learning_rate": 2.006396850613316e-06, "loss": 0.0121, "step": 238175 }, { "epoch": 0.9938162912768816, "grad_norm": 0.6766501538425966, "learning_rate": 2.0063757907446813e-06, "loss": 0.0224, "step": 238180 }, { "epoch": 0.9938371539918719, "grad_norm": 0.7453585900309797, "learning_rate": 2.006354731539189e-06, "loss": 0.0272, "step": 238185 }, { "epoch": 0.9938580167068621, "grad_norm": 0.46869862559845166, "learning_rate": 2.006333672996804e-06, "loss": 0.0218, "step": 238190 }, { "epoch": 0.9938788794218525, "grad_norm": 0.9779935611684352, "learning_rate": 2.006312615117491e-06, "loss": 0.0296, "step": 238195 }, { "epoch": 0.9938997421368427, "grad_norm": 0.30824856616078367, "learning_rate": 2.0062915579012163e-06, "loss": 0.0144, "step": 238200 }, { "epoch": 0.993920604851833, "grad_norm": 0.8225307143231444, "learning_rate": 2.006270501347945e-06, "loss": 0.0171, "step": 238205 }, { "epoch": 0.9939414675668232, "grad_norm": 0.6206567989867794, "learning_rate": 2.006249445457641e-06, "loss": 0.0179, "step": 238210 }, { "epoch": 0.9939623302818136, "grad_norm": 0.7081131403894942, "learning_rate": 2.0062283902302704e-06, "loss": 0.0174, "step": 238215 }, { "epoch": 0.9939831929968038, "grad_norm": 3.067816847826287, "learning_rate": 2.006207335665799e-06, "loss": 0.0184, "step": 238220 }, { "epoch": 0.9940040557117941, "grad_norm": 0.7634889661246075, "learning_rate": 2.0061862817641906e-06, "loss": 0.0211, "step": 238225 }, { "epoch": 0.9940249184267844, "grad_norm": 0.9616991878026387, "learning_rate": 2.0061652285254116e-06, "loss": 0.0257, "step": 238230 }, { "epoch": 0.9940457811417747, "grad_norm": 0.8310598298687426, "learning_rate": 2.0061441759494265e-06, "loss": 0.0169, "step": 238235 }, { "epoch": 0.9940666438567649, "grad_norm": 0.47221141331976746, "learning_rate": 2.0061231240362015e-06, "loss": 0.0192, "step": 238240 }, { "epoch": 0.9940875065717553, "grad_norm": 0.2315618369362261, "learning_rate": 2.0061020727857e-06, "loss": 0.0127, "step": 238245 }, { "epoch": 0.9941083692867455, "grad_norm": 0.3008587276189558, "learning_rate": 2.00608102219789e-06, "loss": 0.012, "step": 238250 }, { "epoch": 0.9941292320017358, "grad_norm": 0.3804752911226283, "learning_rate": 2.0060599722727342e-06, "loss": 0.0136, "step": 238255 }, { "epoch": 0.9941500947167261, "grad_norm": 0.7347202761127503, "learning_rate": 2.006038923010199e-06, "loss": 0.0145, "step": 238260 }, { "epoch": 0.9941709574317164, "grad_norm": 0.4132623694817425, "learning_rate": 2.0060178744102487e-06, "loss": 0.025, "step": 238265 }, { "epoch": 0.9941918201467066, "grad_norm": 0.49616032389893827, "learning_rate": 2.0059968264728503e-06, "loss": 0.0215, "step": 238270 }, { "epoch": 0.9942126828616968, "grad_norm": 0.557391634402686, "learning_rate": 2.0059757791979675e-06, "loss": 0.0196, "step": 238275 }, { "epoch": 0.9942335455766872, "grad_norm": 0.6318630966429366, "learning_rate": 2.0059547325855658e-06, "loss": 0.0174, "step": 238280 }, { "epoch": 0.9942544082916774, "grad_norm": 0.6186341240321213, "learning_rate": 2.005933686635611e-06, "loss": 0.016, "step": 238285 }, { "epoch": 0.9942752710066677, "grad_norm": 0.8411596566221393, "learning_rate": 2.005912641348068e-06, "loss": 0.0147, "step": 238290 }, { "epoch": 0.994296133721658, "grad_norm": 0.684660865965154, "learning_rate": 2.005891596722902e-06, "loss": 0.0168, "step": 238295 }, { "epoch": 0.9943169964366483, "grad_norm": 1.1987537665528405, "learning_rate": 2.0058705527600787e-06, "loss": 0.025, "step": 238300 }, { "epoch": 0.9943378591516385, "grad_norm": 0.86695751158814, "learning_rate": 2.0058495094595626e-06, "loss": 0.0232, "step": 238305 }, { "epoch": 0.9943587218666289, "grad_norm": 0.8497176136007615, "learning_rate": 2.005828466821319e-06, "loss": 0.0183, "step": 238310 }, { "epoch": 0.9943795845816191, "grad_norm": 0.37191816652496484, "learning_rate": 2.0058074248453145e-06, "loss": 0.0188, "step": 238315 }, { "epoch": 0.9944004472966094, "grad_norm": 1.3296365282008127, "learning_rate": 2.005786383531513e-06, "loss": 0.0206, "step": 238320 }, { "epoch": 0.9944213100115997, "grad_norm": 0.627737449700692, "learning_rate": 2.00576534287988e-06, "loss": 0.0183, "step": 238325 }, { "epoch": 0.99444217272659, "grad_norm": 0.8111385685697846, "learning_rate": 2.005744302890381e-06, "loss": 0.0255, "step": 238330 }, { "epoch": 0.9944630354415802, "grad_norm": 0.8507795922113942, "learning_rate": 2.0057232635629808e-06, "loss": 0.0246, "step": 238335 }, { "epoch": 0.9944838981565705, "grad_norm": 0.8656283179888791, "learning_rate": 2.005702224897646e-06, "loss": 0.0222, "step": 238340 }, { "epoch": 0.9945047608715608, "grad_norm": 0.732067425759938, "learning_rate": 2.0056811868943396e-06, "loss": 0.0143, "step": 238345 }, { "epoch": 0.994525623586551, "grad_norm": 0.7720629344861375, "learning_rate": 2.0056601495530294e-06, "loss": 0.0175, "step": 238350 }, { "epoch": 0.9945464863015413, "grad_norm": 1.0688345802665788, "learning_rate": 2.005639112873679e-06, "loss": 0.0192, "step": 238355 }, { "epoch": 0.9945673490165317, "grad_norm": 0.7374643162455672, "learning_rate": 2.005618076856254e-06, "loss": 0.0149, "step": 238360 }, { "epoch": 0.9945882117315219, "grad_norm": 0.4736628023782134, "learning_rate": 2.0055970415007202e-06, "loss": 0.0224, "step": 238365 }, { "epoch": 0.9946090744465121, "grad_norm": 0.7490621735479469, "learning_rate": 2.0055760068070423e-06, "loss": 0.0256, "step": 238370 }, { "epoch": 0.9946299371615025, "grad_norm": 0.7986448039644122, "learning_rate": 2.0055549727751864e-06, "loss": 0.0221, "step": 238375 }, { "epoch": 0.9946507998764927, "grad_norm": 0.3826718613795637, "learning_rate": 2.005533939405117e-06, "loss": 0.0152, "step": 238380 }, { "epoch": 0.994671662591483, "grad_norm": 0.893639427458574, "learning_rate": 2.0055129066967995e-06, "loss": 0.0204, "step": 238385 }, { "epoch": 0.9946925253064732, "grad_norm": 0.8663251670107467, "learning_rate": 2.005491874650199e-06, "loss": 0.0208, "step": 238390 }, { "epoch": 0.9947133880214636, "grad_norm": 0.42828778155440667, "learning_rate": 2.005470843265282e-06, "loss": 0.0253, "step": 238395 }, { "epoch": 0.9947342507364538, "grad_norm": 0.785435065503482, "learning_rate": 2.0054498125420123e-06, "loss": 0.019, "step": 238400 }, { "epoch": 0.9947551134514441, "grad_norm": 0.44862412306557575, "learning_rate": 2.005428782480356e-06, "loss": 0.016, "step": 238405 }, { "epoch": 0.9947759761664344, "grad_norm": 0.5375710408503672, "learning_rate": 2.005407753080279e-06, "loss": 0.0161, "step": 238410 }, { "epoch": 0.9947968388814247, "grad_norm": 1.001985319315374, "learning_rate": 2.0053867243417456e-06, "loss": 0.0354, "step": 238415 }, { "epoch": 0.9948177015964149, "grad_norm": 0.7446017847723032, "learning_rate": 2.0053656962647206e-06, "loss": 0.0189, "step": 238420 }, { "epoch": 0.9948385643114053, "grad_norm": 0.2662627156089765, "learning_rate": 2.0053446688491708e-06, "loss": 0.0178, "step": 238425 }, { "epoch": 0.9948594270263955, "grad_norm": 0.410639682238348, "learning_rate": 2.0053236420950613e-06, "loss": 0.0174, "step": 238430 }, { "epoch": 0.9948802897413858, "grad_norm": 0.860397064196954, "learning_rate": 2.005302616002356e-06, "loss": 0.0203, "step": 238435 }, { "epoch": 0.9949011524563761, "grad_norm": 0.5968897145368741, "learning_rate": 2.0052815905710218e-06, "loss": 0.0207, "step": 238440 }, { "epoch": 0.9949220151713664, "grad_norm": 0.5739816551589748, "learning_rate": 2.005260565801023e-06, "loss": 0.0174, "step": 238445 }, { "epoch": 0.9949428778863566, "grad_norm": 0.47443529957398367, "learning_rate": 2.005239541692326e-06, "loss": 0.0194, "step": 238450 }, { "epoch": 0.9949637406013468, "grad_norm": 0.6712681688756781, "learning_rate": 2.005218518244895e-06, "loss": 0.0204, "step": 238455 }, { "epoch": 0.9949846033163372, "grad_norm": 0.33706368907659023, "learning_rate": 2.005197495458696e-06, "loss": 0.0216, "step": 238460 }, { "epoch": 0.9950054660313274, "grad_norm": 0.841873016268735, "learning_rate": 2.005176473333694e-06, "loss": 0.0216, "step": 238465 }, { "epoch": 0.9950263287463177, "grad_norm": 0.8237367297401325, "learning_rate": 2.0051554518698547e-06, "loss": 0.0145, "step": 238470 }, { "epoch": 0.995047191461308, "grad_norm": 0.7291033757155995, "learning_rate": 2.005134431067143e-06, "loss": 0.0159, "step": 238475 }, { "epoch": 0.9950680541762983, "grad_norm": 0.3993916004939897, "learning_rate": 2.0051134109255248e-06, "loss": 0.0141, "step": 238480 }, { "epoch": 0.9950889168912885, "grad_norm": 0.5191003431517259, "learning_rate": 2.0050923914449656e-06, "loss": 0.0189, "step": 238485 }, { "epoch": 0.9951097796062789, "grad_norm": 0.6438795831455028, "learning_rate": 2.0050713726254296e-06, "loss": 0.025, "step": 238490 }, { "epoch": 0.9951306423212691, "grad_norm": 0.5252835050105882, "learning_rate": 2.005050354466883e-06, "loss": 0.0131, "step": 238495 }, { "epoch": 0.9951515050362594, "grad_norm": 0.45688520207904126, "learning_rate": 2.005029336969291e-06, "loss": 0.0186, "step": 238500 }, { "epoch": 0.9951723677512497, "grad_norm": 0.5558197484027847, "learning_rate": 2.005008320132619e-06, "loss": 0.0194, "step": 238505 }, { "epoch": 0.99519323046624, "grad_norm": 0.8779074398262912, "learning_rate": 2.004987303956832e-06, "loss": 0.0133, "step": 238510 }, { "epoch": 0.9952140931812302, "grad_norm": 0.3893420040118581, "learning_rate": 2.004966288441896e-06, "loss": 0.0164, "step": 238515 }, { "epoch": 0.9952349558962205, "grad_norm": 0.6996195492087557, "learning_rate": 2.0049452735877768e-06, "loss": 0.0257, "step": 238520 }, { "epoch": 0.9952558186112108, "grad_norm": 3.3064154101407373, "learning_rate": 2.0049242593944383e-06, "loss": 0.0189, "step": 238525 }, { "epoch": 0.9952766813262011, "grad_norm": 0.776931204903054, "learning_rate": 2.0049032458618466e-06, "loss": 0.0247, "step": 238530 }, { "epoch": 0.9952975440411913, "grad_norm": 0.6457443178906817, "learning_rate": 2.004882232989967e-06, "loss": 0.0175, "step": 238535 }, { "epoch": 0.9953184067561817, "grad_norm": 0.7618109925175724, "learning_rate": 2.0048612207787655e-06, "loss": 0.0166, "step": 238540 }, { "epoch": 0.9953392694711719, "grad_norm": 0.3080081678042767, "learning_rate": 2.0048402092282063e-06, "loss": 0.0116, "step": 238545 }, { "epoch": 0.9953601321861621, "grad_norm": 0.6115279595718716, "learning_rate": 2.004819198338256e-06, "loss": 0.0172, "step": 238550 }, { "epoch": 0.9953809949011525, "grad_norm": 0.8507810717018693, "learning_rate": 2.004798188108879e-06, "loss": 0.0212, "step": 238555 }, { "epoch": 0.9954018576161427, "grad_norm": 2.2416007395915765, "learning_rate": 2.0047771785400414e-06, "loss": 0.0168, "step": 238560 }, { "epoch": 0.995422720331133, "grad_norm": 0.7743132365283242, "learning_rate": 2.0047561696317083e-06, "loss": 0.019, "step": 238565 }, { "epoch": 0.9954435830461232, "grad_norm": 0.457012593421895, "learning_rate": 2.004735161383845e-06, "loss": 0.0176, "step": 238570 }, { "epoch": 0.9954644457611136, "grad_norm": 0.7846716032585952, "learning_rate": 2.0047141537964164e-06, "loss": 0.0175, "step": 238575 }, { "epoch": 0.9954853084761038, "grad_norm": 0.47634214429604743, "learning_rate": 2.004693146869389e-06, "loss": 0.017, "step": 238580 }, { "epoch": 0.9955061711910941, "grad_norm": 0.6875192914713353, "learning_rate": 2.0046721406027277e-06, "loss": 0.0245, "step": 238585 }, { "epoch": 0.9955270339060844, "grad_norm": 0.8450573558189267, "learning_rate": 2.0046511349963975e-06, "loss": 0.0153, "step": 238590 }, { "epoch": 0.9955478966210747, "grad_norm": 0.9941241657685927, "learning_rate": 2.0046301300503647e-06, "loss": 0.0278, "step": 238595 }, { "epoch": 0.9955687593360649, "grad_norm": 1.2655252195522957, "learning_rate": 2.004609125764594e-06, "loss": 0.0249, "step": 238600 }, { "epoch": 0.9955896220510553, "grad_norm": 0.58732457182341, "learning_rate": 2.0045881221390508e-06, "loss": 0.0179, "step": 238605 }, { "epoch": 0.9956104847660455, "grad_norm": 0.5043320078241023, "learning_rate": 2.004567119173701e-06, "loss": 0.0289, "step": 238610 }, { "epoch": 0.9956313474810358, "grad_norm": 0.9359927086767797, "learning_rate": 2.0045461168685095e-06, "loss": 0.0209, "step": 238615 }, { "epoch": 0.9956522101960261, "grad_norm": 0.8239389531005153, "learning_rate": 2.004525115223442e-06, "loss": 0.021, "step": 238620 }, { "epoch": 0.9956730729110164, "grad_norm": 0.8256125571317178, "learning_rate": 2.004504114238464e-06, "loss": 0.0182, "step": 238625 }, { "epoch": 0.9956939356260066, "grad_norm": 0.640294047432017, "learning_rate": 2.00448311391354e-06, "loss": 0.0228, "step": 238630 }, { "epoch": 0.9957147983409969, "grad_norm": 0.3278229946413859, "learning_rate": 2.0044621142486372e-06, "loss": 0.0159, "step": 238635 }, { "epoch": 0.9957356610559872, "grad_norm": 0.6683025051508563, "learning_rate": 2.0044411152437197e-06, "loss": 0.0208, "step": 238640 }, { "epoch": 0.9957565237709775, "grad_norm": 0.8999293630384176, "learning_rate": 2.0044201168987527e-06, "loss": 0.0247, "step": 238645 }, { "epoch": 0.9957773864859677, "grad_norm": 1.0362443735062588, "learning_rate": 2.004399119213703e-06, "loss": 0.0176, "step": 238650 }, { "epoch": 0.995798249200958, "grad_norm": 0.7188813788259837, "learning_rate": 2.004378122188535e-06, "loss": 0.0171, "step": 238655 }, { "epoch": 0.9958191119159483, "grad_norm": 0.815887079204811, "learning_rate": 2.0043571258232144e-06, "loss": 0.0205, "step": 238660 }, { "epoch": 0.9958399746309385, "grad_norm": 0.7066906359909727, "learning_rate": 2.0043361301177067e-06, "loss": 0.0202, "step": 238665 }, { "epoch": 0.9958608373459289, "grad_norm": 0.53545880964229, "learning_rate": 2.004315135071977e-06, "loss": 0.0216, "step": 238670 }, { "epoch": 0.9958817000609191, "grad_norm": 0.5131768461157594, "learning_rate": 2.004294140685991e-06, "loss": 0.0165, "step": 238675 }, { "epoch": 0.9959025627759094, "grad_norm": 0.6607381170476959, "learning_rate": 2.004273146959714e-06, "loss": 0.02, "step": 238680 }, { "epoch": 0.9959234254908997, "grad_norm": 0.5630275442395313, "learning_rate": 2.0042521538931124e-06, "loss": 0.0166, "step": 238685 }, { "epoch": 0.99594428820589, "grad_norm": 0.4939296700875346, "learning_rate": 2.00423116148615e-06, "loss": 0.02, "step": 238690 }, { "epoch": 0.9959651509208802, "grad_norm": 0.7666689478975324, "learning_rate": 2.0042101697387933e-06, "loss": 0.0203, "step": 238695 }, { "epoch": 0.9959860136358705, "grad_norm": 0.4482935556746419, "learning_rate": 2.004189178651008e-06, "loss": 0.0185, "step": 238700 }, { "epoch": 0.9960068763508608, "grad_norm": 2.0057340886413866, "learning_rate": 2.0041681882227587e-06, "loss": 0.0266, "step": 238705 }, { "epoch": 0.9960277390658511, "grad_norm": 0.3900530656436472, "learning_rate": 2.0041471984540113e-06, "loss": 0.0129, "step": 238710 }, { "epoch": 0.9960486017808413, "grad_norm": 0.7860417708438582, "learning_rate": 2.0041262093447315e-06, "loss": 0.0209, "step": 238715 }, { "epoch": 0.9960694644958317, "grad_norm": 0.36513740760107394, "learning_rate": 2.0041052208948843e-06, "loss": 0.0178, "step": 238720 }, { "epoch": 0.9960903272108219, "grad_norm": 0.5545719874236121, "learning_rate": 2.0040842331044352e-06, "loss": 0.0208, "step": 238725 }, { "epoch": 0.9961111899258122, "grad_norm": 0.7220207614276383, "learning_rate": 2.00406324597335e-06, "loss": 0.0268, "step": 238730 }, { "epoch": 0.9961320526408025, "grad_norm": 0.4381338038103762, "learning_rate": 2.0040422595015946e-06, "loss": 0.02, "step": 238735 }, { "epoch": 0.9961529153557928, "grad_norm": 0.5416078706551551, "learning_rate": 2.0040212736891335e-06, "loss": 0.0172, "step": 238740 }, { "epoch": 0.996173778070783, "grad_norm": 1.0281996294304734, "learning_rate": 2.004000288535932e-06, "loss": 0.0233, "step": 238745 }, { "epoch": 0.9961946407857732, "grad_norm": 0.3189720910211756, "learning_rate": 2.003979304041957e-06, "loss": 0.0138, "step": 238750 }, { "epoch": 0.9962155035007636, "grad_norm": 0.6053446532915535, "learning_rate": 2.003958320207173e-06, "loss": 0.0211, "step": 238755 }, { "epoch": 0.9962363662157538, "grad_norm": 0.3115513637026727, "learning_rate": 2.003937337031546e-06, "loss": 0.0135, "step": 238760 }, { "epoch": 0.9962572289307441, "grad_norm": 0.6290420161473859, "learning_rate": 2.0039163545150403e-06, "loss": 0.0194, "step": 238765 }, { "epoch": 0.9962780916457344, "grad_norm": 0.7569786718387472, "learning_rate": 2.0038953726576225e-06, "loss": 0.0282, "step": 238770 }, { "epoch": 0.9962989543607247, "grad_norm": 0.9466582279465399, "learning_rate": 2.0038743914592582e-06, "loss": 0.0217, "step": 238775 }, { "epoch": 0.9963198170757149, "grad_norm": 0.5026066447593315, "learning_rate": 2.0038534109199123e-06, "loss": 0.0163, "step": 238780 }, { "epoch": 0.9963406797907053, "grad_norm": 1.026396364352601, "learning_rate": 2.0038324310395507e-06, "loss": 0.0249, "step": 238785 }, { "epoch": 0.9963615425056955, "grad_norm": 0.3995021658270047, "learning_rate": 2.003811451818139e-06, "loss": 0.0202, "step": 238790 }, { "epoch": 0.9963824052206858, "grad_norm": 0.8418723384592101, "learning_rate": 2.003790473255642e-06, "loss": 0.0195, "step": 238795 }, { "epoch": 0.9964032679356761, "grad_norm": 0.679037339016885, "learning_rate": 2.003769495352026e-06, "loss": 0.0219, "step": 238800 }, { "epoch": 0.9964241306506664, "grad_norm": 0.4063546620367921, "learning_rate": 2.003748518107255e-06, "loss": 0.023, "step": 238805 }, { "epoch": 0.9964449933656566, "grad_norm": 0.6137808499489866, "learning_rate": 2.003727541521297e-06, "loss": 0.0194, "step": 238810 }, { "epoch": 0.9964658560806469, "grad_norm": 0.737067475016482, "learning_rate": 2.003706565594116e-06, "loss": 0.0221, "step": 238815 }, { "epoch": 0.9964867187956372, "grad_norm": 0.47488607180396275, "learning_rate": 2.0036855903256774e-06, "loss": 0.02, "step": 238820 }, { "epoch": 0.9965075815106275, "grad_norm": 0.5538328258325897, "learning_rate": 2.0036646157159474e-06, "loss": 0.0233, "step": 238825 }, { "epoch": 0.9965284442256177, "grad_norm": 0.7050075597470431, "learning_rate": 2.0036436417648906e-06, "loss": 0.0208, "step": 238830 }, { "epoch": 0.9965493069406081, "grad_norm": 0.4151843283555518, "learning_rate": 2.0036226684724735e-06, "loss": 0.0179, "step": 238835 }, { "epoch": 0.9965701696555983, "grad_norm": 0.7416338897787302, "learning_rate": 2.003601695838661e-06, "loss": 0.0237, "step": 238840 }, { "epoch": 0.9965910323705885, "grad_norm": 0.7060367333197548, "learning_rate": 2.003580723863419e-06, "loss": 0.019, "step": 238845 }, { "epoch": 0.9966118950855789, "grad_norm": 1.2294246657656924, "learning_rate": 2.003559752546713e-06, "loss": 0.0232, "step": 238850 }, { "epoch": 0.9966327578005691, "grad_norm": 0.32958380337544707, "learning_rate": 2.0035387818885083e-06, "loss": 0.0157, "step": 238855 }, { "epoch": 0.9966536205155594, "grad_norm": 0.8225734642256034, "learning_rate": 2.003517811888771e-06, "loss": 0.0205, "step": 238860 }, { "epoch": 0.9966744832305497, "grad_norm": 1.298615744668166, "learning_rate": 2.003496842547465e-06, "loss": 0.0269, "step": 238865 }, { "epoch": 0.99669534594554, "grad_norm": 0.8475492233303743, "learning_rate": 2.003475873864558e-06, "loss": 0.0143, "step": 238870 }, { "epoch": 0.9967162086605302, "grad_norm": 0.7904807849351718, "learning_rate": 2.0034549058400146e-06, "loss": 0.0192, "step": 238875 }, { "epoch": 0.9967370713755205, "grad_norm": 1.2569744125057867, "learning_rate": 2.0034339384738e-06, "loss": 0.0244, "step": 238880 }, { "epoch": 0.9967579340905108, "grad_norm": 0.5897137681085468, "learning_rate": 2.0034129717658803e-06, "loss": 0.0232, "step": 238885 }, { "epoch": 0.9967787968055011, "grad_norm": 0.7170427989480277, "learning_rate": 2.003392005716221e-06, "loss": 0.0205, "step": 238890 }, { "epoch": 0.9967996595204913, "grad_norm": 0.32308999916174613, "learning_rate": 2.0033710403247873e-06, "loss": 0.0195, "step": 238895 }, { "epoch": 0.9968205222354817, "grad_norm": 0.9222255564429149, "learning_rate": 2.0033500755915445e-06, "loss": 0.0189, "step": 238900 }, { "epoch": 0.9968413849504719, "grad_norm": 0.4786501026749143, "learning_rate": 2.0033291115164595e-06, "loss": 0.0186, "step": 238905 }, { "epoch": 0.9968622476654622, "grad_norm": 0.6713010328291327, "learning_rate": 2.003308148099496e-06, "loss": 0.0149, "step": 238910 }, { "epoch": 0.9968831103804525, "grad_norm": 0.7245060553631609, "learning_rate": 2.0032871853406215e-06, "loss": 0.0184, "step": 238915 }, { "epoch": 0.9969039730954428, "grad_norm": 0.8520065941591235, "learning_rate": 2.0032662232398e-06, "loss": 0.0228, "step": 238920 }, { "epoch": 0.996924835810433, "grad_norm": 0.4169382370886903, "learning_rate": 2.003245261796998e-06, "loss": 0.017, "step": 238925 }, { "epoch": 0.9969456985254233, "grad_norm": 0.8075557801356746, "learning_rate": 2.003224301012181e-06, "loss": 0.0163, "step": 238930 }, { "epoch": 0.9969665612404136, "grad_norm": 0.6342308990227593, "learning_rate": 2.0032033408853137e-06, "loss": 0.018, "step": 238935 }, { "epoch": 0.9969874239554039, "grad_norm": 0.6981525858173411, "learning_rate": 2.003182381416363e-06, "loss": 0.0208, "step": 238940 }, { "epoch": 0.9970082866703941, "grad_norm": 0.49091949656442124, "learning_rate": 2.0031614226052933e-06, "loss": 0.0214, "step": 238945 }, { "epoch": 0.9970291493853844, "grad_norm": 0.3722618557511778, "learning_rate": 2.003140464452071e-06, "loss": 0.0191, "step": 238950 }, { "epoch": 0.9970500121003747, "grad_norm": 0.4108178942303086, "learning_rate": 2.003119506956661e-06, "loss": 0.0188, "step": 238955 }, { "epoch": 0.9970708748153649, "grad_norm": 0.6579065949220539, "learning_rate": 2.0030985501190297e-06, "loss": 0.0179, "step": 238960 }, { "epoch": 0.9970917375303553, "grad_norm": 0.8673520173767346, "learning_rate": 2.003077593939142e-06, "loss": 0.0171, "step": 238965 }, { "epoch": 0.9971126002453455, "grad_norm": 0.9332986215953768, "learning_rate": 2.003056638416964e-06, "loss": 0.0248, "step": 238970 }, { "epoch": 0.9971334629603358, "grad_norm": 0.6909807357340654, "learning_rate": 2.003035683552461e-06, "loss": 0.0134, "step": 238975 }, { "epoch": 0.9971543256753261, "grad_norm": 0.5988754929973318, "learning_rate": 2.003014729345599e-06, "loss": 0.0172, "step": 238980 }, { "epoch": 0.9971751883903164, "grad_norm": 0.3411627785781758, "learning_rate": 2.0029937757963424e-06, "loss": 0.0173, "step": 238985 }, { "epoch": 0.9971960511053066, "grad_norm": 0.4194902549120103, "learning_rate": 2.002972822904658e-06, "loss": 0.0196, "step": 238990 }, { "epoch": 0.9972169138202969, "grad_norm": 0.8004804003236469, "learning_rate": 2.002951870670511e-06, "loss": 0.018, "step": 238995 }, { "epoch": 0.9972377765352872, "grad_norm": 0.3296024832831609, "learning_rate": 2.0029309190938673e-06, "loss": 0.0223, "step": 239000 }, { "epoch": 0.9972586392502775, "grad_norm": 0.4234708415371247, "learning_rate": 2.0029099681746926e-06, "loss": 0.0163, "step": 239005 }, { "epoch": 0.9972795019652677, "grad_norm": 0.6691189415356593, "learning_rate": 2.0028890179129514e-06, "loss": 0.023, "step": 239010 }, { "epoch": 0.9973003646802581, "grad_norm": 0.2335822976110196, "learning_rate": 2.0028680683086107e-06, "loss": 0.0137, "step": 239015 }, { "epoch": 0.9973212273952483, "grad_norm": 0.8200465754936832, "learning_rate": 2.0028471193616357e-06, "loss": 0.0141, "step": 239020 }, { "epoch": 0.9973420901102386, "grad_norm": 0.4137458656317012, "learning_rate": 2.0028261710719913e-06, "loss": 0.0233, "step": 239025 }, { "epoch": 0.9973629528252289, "grad_norm": 0.5297237906312483, "learning_rate": 2.0028052234396437e-06, "loss": 0.0195, "step": 239030 }, { "epoch": 0.9973838155402192, "grad_norm": 0.7711277006106612, "learning_rate": 2.002784276464559e-06, "loss": 0.0159, "step": 239035 }, { "epoch": 0.9974046782552094, "grad_norm": 0.4672362541222387, "learning_rate": 2.0027633301467017e-06, "loss": 0.0249, "step": 239040 }, { "epoch": 0.9974255409701998, "grad_norm": 0.3131321273209339, "learning_rate": 2.002742384486039e-06, "loss": 0.0181, "step": 239045 }, { "epoch": 0.99744640368519, "grad_norm": 0.6409638435269284, "learning_rate": 2.0027214394825346e-06, "loss": 0.025, "step": 239050 }, { "epoch": 0.9974672664001802, "grad_norm": 0.5841588002905492, "learning_rate": 2.002700495136156e-06, "loss": 0.0236, "step": 239055 }, { "epoch": 0.9974881291151705, "grad_norm": 0.6405304147928195, "learning_rate": 2.0026795514468674e-06, "loss": 0.0174, "step": 239060 }, { "epoch": 0.9975089918301608, "grad_norm": 1.049762905467943, "learning_rate": 2.002658608414635e-06, "loss": 0.0169, "step": 239065 }, { "epoch": 0.9975298545451511, "grad_norm": 0.6105894813290511, "learning_rate": 2.0026376660394253e-06, "loss": 0.0264, "step": 239070 }, { "epoch": 0.9975507172601413, "grad_norm": 0.3954014180518205, "learning_rate": 2.0026167243212023e-06, "loss": 0.0265, "step": 239075 }, { "epoch": 0.9975715799751317, "grad_norm": 0.465442087038409, "learning_rate": 2.002595783259933e-06, "loss": 0.0205, "step": 239080 }, { "epoch": 0.9975924426901219, "grad_norm": 0.2778387569705012, "learning_rate": 2.002574842855582e-06, "loss": 0.0231, "step": 239085 }, { "epoch": 0.9976133054051122, "grad_norm": 0.4266005311992744, "learning_rate": 2.0025539031081156e-06, "loss": 0.0133, "step": 239090 }, { "epoch": 0.9976341681201025, "grad_norm": 0.28418914968980935, "learning_rate": 2.0025329640174997e-06, "loss": 0.0236, "step": 239095 }, { "epoch": 0.9976550308350928, "grad_norm": 1.232986557697448, "learning_rate": 2.0025120255836995e-06, "loss": 0.0195, "step": 239100 }, { "epoch": 0.997675893550083, "grad_norm": 0.7430662342307741, "learning_rate": 2.0024910878066806e-06, "loss": 0.0276, "step": 239105 }, { "epoch": 0.9976967562650733, "grad_norm": 0.6101430843643374, "learning_rate": 2.0024701506864086e-06, "loss": 0.0273, "step": 239110 }, { "epoch": 0.9977176189800636, "grad_norm": 0.7125014864320104, "learning_rate": 2.00244921422285e-06, "loss": 0.0147, "step": 239115 }, { "epoch": 0.9977384816950539, "grad_norm": 0.8303240381061698, "learning_rate": 2.0024282784159694e-06, "loss": 0.0199, "step": 239120 }, { "epoch": 0.9977593444100441, "grad_norm": 4.108744211874322, "learning_rate": 2.002407343265733e-06, "loss": 0.0228, "step": 239125 }, { "epoch": 0.9977802071250345, "grad_norm": 0.8473055387675559, "learning_rate": 2.0023864087721064e-06, "loss": 0.0236, "step": 239130 }, { "epoch": 0.9978010698400247, "grad_norm": 0.4877091191354345, "learning_rate": 2.0023654749350553e-06, "loss": 0.0153, "step": 239135 }, { "epoch": 0.997821932555015, "grad_norm": 0.3943581570981131, "learning_rate": 2.002344541754546e-06, "loss": 0.0211, "step": 239140 }, { "epoch": 0.9978427952700053, "grad_norm": 0.34999079324387705, "learning_rate": 2.0023236092305427e-06, "loss": 0.0221, "step": 239145 }, { "epoch": 0.9978636579849955, "grad_norm": 0.5669684051611844, "learning_rate": 2.0023026773630122e-06, "loss": 0.0156, "step": 239150 }, { "epoch": 0.9978845206999858, "grad_norm": 0.32799239987791357, "learning_rate": 2.00228174615192e-06, "loss": 0.0158, "step": 239155 }, { "epoch": 0.9979053834149761, "grad_norm": 0.4194272465385865, "learning_rate": 2.0022608155972316e-06, "loss": 0.0226, "step": 239160 }, { "epoch": 0.9979262461299664, "grad_norm": 0.43362095160835135, "learning_rate": 2.002239885698913e-06, "loss": 0.018, "step": 239165 }, { "epoch": 0.9979471088449566, "grad_norm": 1.4324435940243487, "learning_rate": 2.0022189564569296e-06, "loss": 0.0212, "step": 239170 }, { "epoch": 0.9979679715599469, "grad_norm": 0.3486930001176945, "learning_rate": 2.002198027871247e-06, "loss": 0.0207, "step": 239175 }, { "epoch": 0.9979888342749372, "grad_norm": 0.4030366339432331, "learning_rate": 2.002177099941831e-06, "loss": 0.0136, "step": 239180 }, { "epoch": 0.9980096969899275, "grad_norm": 0.6950532227762679, "learning_rate": 2.0021561726686477e-06, "loss": 0.0233, "step": 239185 }, { "epoch": 0.9980305597049177, "grad_norm": 0.5606771355234865, "learning_rate": 2.0021352460516627e-06, "loss": 0.0294, "step": 239190 }, { "epoch": 0.9980514224199081, "grad_norm": 0.7684148134468594, "learning_rate": 2.002114320090841e-06, "loss": 0.0249, "step": 239195 }, { "epoch": 0.9980722851348983, "grad_norm": 0.985747814327689, "learning_rate": 2.0020933947861493e-06, "loss": 0.0209, "step": 239200 }, { "epoch": 0.9980931478498886, "grad_norm": 0.5214809560691706, "learning_rate": 2.002072470137553e-06, "loss": 0.0186, "step": 239205 }, { "epoch": 0.9981140105648789, "grad_norm": 0.6329945887652871, "learning_rate": 2.0020515461450168e-06, "loss": 0.025, "step": 239210 }, { "epoch": 0.9981348732798692, "grad_norm": 0.47298512322100955, "learning_rate": 2.0020306228085077e-06, "loss": 0.0237, "step": 239215 }, { "epoch": 0.9981557359948594, "grad_norm": 0.8278062510539403, "learning_rate": 2.002009700127991e-06, "loss": 0.017, "step": 239220 }, { "epoch": 0.9981765987098496, "grad_norm": 0.9590152742367302, "learning_rate": 2.0019887781034327e-06, "loss": 0.0214, "step": 239225 }, { "epoch": 0.99819746142484, "grad_norm": 0.33897316015392803, "learning_rate": 2.001967856734798e-06, "loss": 0.0135, "step": 239230 }, { "epoch": 0.9982183241398302, "grad_norm": 0.3314161634841572, "learning_rate": 2.0019469360220528e-06, "loss": 0.0187, "step": 239235 }, { "epoch": 0.9982391868548205, "grad_norm": 0.6066410761086541, "learning_rate": 2.0019260159651634e-06, "loss": 0.021, "step": 239240 }, { "epoch": 0.9982600495698108, "grad_norm": 2.4148664183293618, "learning_rate": 2.0019050965640944e-06, "loss": 0.0178, "step": 239245 }, { "epoch": 0.9982809122848011, "grad_norm": 0.8234473074630559, "learning_rate": 2.0018841778188123e-06, "loss": 0.0234, "step": 239250 }, { "epoch": 0.9983017749997913, "grad_norm": 1.0305129738559227, "learning_rate": 2.0018632597292827e-06, "loss": 0.0169, "step": 239255 }, { "epoch": 0.9983226377147817, "grad_norm": 0.5318574496277773, "learning_rate": 2.0018423422954715e-06, "loss": 0.0173, "step": 239260 }, { "epoch": 0.9983435004297719, "grad_norm": 0.7557630653841843, "learning_rate": 2.001821425517344e-06, "loss": 0.0194, "step": 239265 }, { "epoch": 0.9983643631447622, "grad_norm": 0.8464589165156872, "learning_rate": 2.0018005093948665e-06, "loss": 0.016, "step": 239270 }, { "epoch": 0.9983852258597525, "grad_norm": 0.8540807484557336, "learning_rate": 2.0017795939280047e-06, "loss": 0.0173, "step": 239275 }, { "epoch": 0.9984060885747428, "grad_norm": 0.613874362038586, "learning_rate": 2.001758679116724e-06, "loss": 0.0236, "step": 239280 }, { "epoch": 0.998426951289733, "grad_norm": 0.34277421798118074, "learning_rate": 2.00173776496099e-06, "loss": 0.0148, "step": 239285 }, { "epoch": 0.9984478140047233, "grad_norm": 0.5966447411279282, "learning_rate": 2.0017168514607686e-06, "loss": 0.0217, "step": 239290 }, { "epoch": 0.9984686767197136, "grad_norm": 0.9430978165046178, "learning_rate": 2.001695938616026e-06, "loss": 0.0213, "step": 239295 }, { "epoch": 0.9984895394347039, "grad_norm": 0.7915206437669997, "learning_rate": 2.0016750264267277e-06, "loss": 0.0188, "step": 239300 }, { "epoch": 0.9985104021496941, "grad_norm": 0.7665494860853845, "learning_rate": 2.0016541148928397e-06, "loss": 0.0201, "step": 239305 }, { "epoch": 0.9985312648646845, "grad_norm": 0.4901484127929213, "learning_rate": 2.0016332040143265e-06, "loss": 0.014, "step": 239310 }, { "epoch": 0.9985521275796747, "grad_norm": 1.4358099259314971, "learning_rate": 2.001612293791156e-06, "loss": 0.0239, "step": 239315 }, { "epoch": 0.998572990294665, "grad_norm": 0.35105933257486094, "learning_rate": 2.001591384223292e-06, "loss": 0.0171, "step": 239320 }, { "epoch": 0.9985938530096553, "grad_norm": 0.41148674526823775, "learning_rate": 2.001570475310701e-06, "loss": 0.0196, "step": 239325 }, { "epoch": 0.9986147157246456, "grad_norm": 0.6057060296370129, "learning_rate": 2.0015495670533492e-06, "loss": 0.0205, "step": 239330 }, { "epoch": 0.9986355784396358, "grad_norm": 0.9876656912997236, "learning_rate": 2.0015286594512027e-06, "loss": 0.0204, "step": 239335 }, { "epoch": 0.9986564411546262, "grad_norm": 0.6404555653578042, "learning_rate": 2.001507752504226e-06, "loss": 0.0211, "step": 239340 }, { "epoch": 0.9986773038696164, "grad_norm": 0.7917322940083934, "learning_rate": 2.0014868462123856e-06, "loss": 0.0218, "step": 239345 }, { "epoch": 0.9986981665846066, "grad_norm": 1.7959444973595988, "learning_rate": 2.001465940575647e-06, "loss": 0.0199, "step": 239350 }, { "epoch": 0.9987190292995969, "grad_norm": 0.8387952451209524, "learning_rate": 2.0014450355939764e-06, "loss": 0.0229, "step": 239355 }, { "epoch": 0.9987398920145872, "grad_norm": 0.5354058587877462, "learning_rate": 2.0014241312673393e-06, "loss": 0.019, "step": 239360 }, { "epoch": 0.9987607547295775, "grad_norm": 0.5710689016542759, "learning_rate": 2.0014032275957017e-06, "loss": 0.0176, "step": 239365 }, { "epoch": 0.9987816174445677, "grad_norm": 0.49415190384776614, "learning_rate": 2.001382324579029e-06, "loss": 0.0141, "step": 239370 }, { "epoch": 0.9988024801595581, "grad_norm": 0.37382943753257203, "learning_rate": 2.0013614222172877e-06, "loss": 0.0207, "step": 239375 }, { "epoch": 0.9988233428745483, "grad_norm": 0.8219752003665, "learning_rate": 2.001340520510442e-06, "loss": 0.0202, "step": 239380 }, { "epoch": 0.9988442055895386, "grad_norm": 0.597534301067389, "learning_rate": 2.0013196194584607e-06, "loss": 0.0119, "step": 239385 }, { "epoch": 0.9988650683045289, "grad_norm": 0.8406609849122529, "learning_rate": 2.0012987190613065e-06, "loss": 0.015, "step": 239390 }, { "epoch": 0.9988859310195192, "grad_norm": 2.59350088737943, "learning_rate": 2.0012778193189468e-06, "loss": 0.0395, "step": 239395 }, { "epoch": 0.9989067937345094, "grad_norm": 0.7340020919542752, "learning_rate": 2.0012569202313465e-06, "loss": 0.018, "step": 239400 }, { "epoch": 0.9989276564494997, "grad_norm": 0.6529673110433768, "learning_rate": 2.0012360217984727e-06, "loss": 0.0184, "step": 239405 }, { "epoch": 0.99894851916449, "grad_norm": 0.386609518849263, "learning_rate": 2.0012151240202905e-06, "loss": 0.0217, "step": 239410 }, { "epoch": 0.9989693818794803, "grad_norm": 0.7456511285192472, "learning_rate": 2.0011942268967657e-06, "loss": 0.0219, "step": 239415 }, { "epoch": 0.9989902445944705, "grad_norm": 0.4965666291096219, "learning_rate": 2.001173330427864e-06, "loss": 0.014, "step": 239420 }, { "epoch": 0.9990111073094609, "grad_norm": 0.5425957786718881, "learning_rate": 2.0011524346135513e-06, "loss": 0.0173, "step": 239425 }, { "epoch": 0.9990319700244511, "grad_norm": 0.44713433490444876, "learning_rate": 2.0011315394537936e-06, "loss": 0.0189, "step": 239430 }, { "epoch": 0.9990528327394413, "grad_norm": 0.7630504381511208, "learning_rate": 2.001110644948557e-06, "loss": 0.0247, "step": 239435 }, { "epoch": 0.9990736954544317, "grad_norm": 0.6101360726229927, "learning_rate": 2.001089751097807e-06, "loss": 0.0218, "step": 239440 }, { "epoch": 0.9990945581694219, "grad_norm": 0.48636580875320845, "learning_rate": 2.0010688579015084e-06, "loss": 0.0208, "step": 239445 }, { "epoch": 0.9991154208844122, "grad_norm": 0.4051404869866098, "learning_rate": 2.001047965359629e-06, "loss": 0.0237, "step": 239450 }, { "epoch": 0.9991362835994025, "grad_norm": 0.4579819917410459, "learning_rate": 2.001027073472133e-06, "loss": 0.019, "step": 239455 }, { "epoch": 0.9991571463143928, "grad_norm": 0.6443829194659078, "learning_rate": 2.001006182238987e-06, "loss": 0.0218, "step": 239460 }, { "epoch": 0.999178009029383, "grad_norm": 0.6476349725046802, "learning_rate": 2.0009852916601572e-06, "loss": 0.0171, "step": 239465 }, { "epoch": 0.9991988717443733, "grad_norm": 3.2355288076069626, "learning_rate": 2.0009644017356086e-06, "loss": 0.0177, "step": 239470 }, { "epoch": 0.9992197344593636, "grad_norm": 0.7994029478460783, "learning_rate": 2.0009435124653074e-06, "loss": 0.0245, "step": 239475 }, { "epoch": 0.9992405971743539, "grad_norm": 1.73024763402218, "learning_rate": 2.0009226238492198e-06, "loss": 0.0261, "step": 239480 }, { "epoch": 0.9992614598893441, "grad_norm": 0.6621221190657051, "learning_rate": 2.0009017358873113e-06, "loss": 0.017, "step": 239485 }, { "epoch": 0.9992823226043345, "grad_norm": 0.8087144298783825, "learning_rate": 2.0008808485795477e-06, "loss": 0.0185, "step": 239490 }, { "epoch": 0.9993031853193247, "grad_norm": 0.5411575800293288, "learning_rate": 2.000859961925895e-06, "loss": 0.0156, "step": 239495 }, { "epoch": 0.999324048034315, "grad_norm": 1.4835239689392405, "learning_rate": 2.000839075926319e-06, "loss": 0.0286, "step": 239500 }, { "epoch": 0.9993449107493053, "grad_norm": 0.832494866325747, "learning_rate": 2.0008181905807856e-06, "loss": 0.0176, "step": 239505 }, { "epoch": 0.9993657734642956, "grad_norm": 0.6736485705689738, "learning_rate": 2.000797305889261e-06, "loss": 0.0199, "step": 239510 }, { "epoch": 0.9993866361792858, "grad_norm": 0.4229232343324356, "learning_rate": 2.00077642185171e-06, "loss": 0.0199, "step": 239515 }, { "epoch": 0.9994074988942762, "grad_norm": 0.723859750309742, "learning_rate": 2.0007555384680995e-06, "loss": 0.018, "step": 239520 }, { "epoch": 0.9994283616092664, "grad_norm": 0.578616273113665, "learning_rate": 2.000734655738395e-06, "loss": 0.016, "step": 239525 }, { "epoch": 0.9994492243242566, "grad_norm": 0.4074942682259703, "learning_rate": 2.0007137736625628e-06, "loss": 0.0165, "step": 239530 }, { "epoch": 0.9994700870392469, "grad_norm": 0.5222579721161569, "learning_rate": 2.000692892240568e-06, "loss": 0.0178, "step": 239535 }, { "epoch": 0.9994909497542372, "grad_norm": 0.8847626228973715, "learning_rate": 2.0006720114723775e-06, "loss": 0.0173, "step": 239540 }, { "epoch": 0.9995118124692275, "grad_norm": 0.47914075978889703, "learning_rate": 2.0006511313579557e-06, "loss": 0.0163, "step": 239545 }, { "epoch": 0.9995326751842177, "grad_norm": 0.43794459196707614, "learning_rate": 2.0006302518972693e-06, "loss": 0.0187, "step": 239550 }, { "epoch": 0.9995535378992081, "grad_norm": 0.5094352256375799, "learning_rate": 2.000609373090285e-06, "loss": 0.0126, "step": 239555 }, { "epoch": 0.9995744006141983, "grad_norm": 0.4694153188377285, "learning_rate": 2.000588494936968e-06, "loss": 0.0203, "step": 239560 }, { "epoch": 0.9995952633291886, "grad_norm": 0.6220134020155456, "learning_rate": 2.000567617437284e-06, "loss": 0.0194, "step": 239565 }, { "epoch": 0.9996161260441789, "grad_norm": 0.75829976515193, "learning_rate": 2.0005467405911984e-06, "loss": 0.0187, "step": 239570 }, { "epoch": 0.9996369887591692, "grad_norm": 0.38724729934909435, "learning_rate": 2.0005258643986785e-06, "loss": 0.02, "step": 239575 }, { "epoch": 0.9996578514741594, "grad_norm": 0.7960184168795765, "learning_rate": 2.0005049888596885e-06, "loss": 0.0187, "step": 239580 }, { "epoch": 0.9996787141891497, "grad_norm": 1.0089907227442492, "learning_rate": 2.000484113974196e-06, "loss": 0.0231, "step": 239585 }, { "epoch": 0.99969957690414, "grad_norm": 0.6247912591180929, "learning_rate": 2.000463239742166e-06, "loss": 0.0202, "step": 239590 }, { "epoch": 0.9997204396191303, "grad_norm": 1.2277116375664725, "learning_rate": 2.0004423661635643e-06, "loss": 0.0221, "step": 239595 }, { "epoch": 0.9997413023341205, "grad_norm": 0.3175661236760325, "learning_rate": 2.000421493238357e-06, "loss": 0.0217, "step": 239600 }, { "epoch": 0.9997621650491109, "grad_norm": 0.46213109838016886, "learning_rate": 2.0004006209665104e-06, "loss": 0.0151, "step": 239605 }, { "epoch": 0.9997830277641011, "grad_norm": 0.7121864299964167, "learning_rate": 2.00037974934799e-06, "loss": 0.0164, "step": 239610 }, { "epoch": 0.9998038904790914, "grad_norm": 0.6410321826357909, "learning_rate": 2.0003588783827615e-06, "loss": 0.0187, "step": 239615 }, { "epoch": 0.9998247531940817, "grad_norm": 0.2705688698882331, "learning_rate": 2.0003380080707916e-06, "loss": 0.0136, "step": 239620 }, { "epoch": 0.999845615909072, "grad_norm": 0.44149426433505623, "learning_rate": 2.0003171384120454e-06, "loss": 0.0204, "step": 239625 }, { "epoch": 0.9998664786240622, "grad_norm": 0.6935522507682282, "learning_rate": 2.000296269406489e-06, "loss": 0.0207, "step": 239630 }, { "epoch": 0.9998873413390525, "grad_norm": 0.42283646682216, "learning_rate": 2.0002754010540887e-06, "loss": 0.0182, "step": 239635 }, { "epoch": 0.9999082040540428, "grad_norm": 0.34671684314227047, "learning_rate": 2.0002545333548097e-06, "loss": 0.0173, "step": 239640 }, { "epoch": 0.999929066769033, "grad_norm": 0.9233220744041376, "learning_rate": 2.0002336663086193e-06, "loss": 0.0257, "step": 239645 }, { "epoch": 0.9999499294840233, "grad_norm": 0.9424049178780639, "learning_rate": 2.0002127999154824e-06, "loss": 0.0213, "step": 239650 }, { "epoch": 0.9999707921990136, "grad_norm": 0.7231310220525968, "learning_rate": 2.000191934175365e-06, "loss": 0.0198, "step": 239655 }, { "epoch": 0.9999916549140039, "grad_norm": 0.5687254631394869, "learning_rate": 2.000171069088233e-06, "loss": 0.021, "step": 239660 }, { "epoch": 1.0, "step": 239662, "total_flos": 2097228602327040.0, "train_loss": 0.050915672732994244, "train_runtime": 738268.7103, "train_samples_per_second": 10.388, "train_steps_per_second": 0.325 } ], "logging_steps": 5, "max_steps": 239662, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2097228602327040.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }