{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.1219932390859013, "eval_steps": 500, "global_step": 155000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.238666058618717e-05, "grad_norm": 0.5545526742935181, "learning_rate": 4.999934852005473e-06, "loss": 1.5857, "step": 10 }, { "epoch": 0.00014477332117237434, "grad_norm": 0.44032129645347595, "learning_rate": 4.999862465344887e-06, "loss": 1.5257, "step": 20 }, { "epoch": 0.00021715998175856154, "grad_norm": 0.40077871084213257, "learning_rate": 4.999790078684301e-06, "loss": 1.4955, "step": 30 }, { "epoch": 0.0002895466423447487, "grad_norm": 0.3221718966960907, "learning_rate": 4.999717692023714e-06, "loss": 1.4903, "step": 40 }, { "epoch": 0.00036193330293093586, "grad_norm": 0.3151167631149292, "learning_rate": 4.999645305363128e-06, "loss": 1.4603, "step": 50 }, { "epoch": 0.0004343199635171231, "grad_norm": 0.27057284116744995, "learning_rate": 4.9995729187025424e-06, "loss": 1.4502, "step": 60 }, { "epoch": 0.0005067066241033103, "grad_norm": 0.3064720928668976, "learning_rate": 4.999500532041956e-06, "loss": 1.4473, "step": 70 }, { "epoch": 0.0005790932846894974, "grad_norm": 0.2519373595714569, "learning_rate": 4.99942814538137e-06, "loss": 1.4421, "step": 80 }, { "epoch": 0.0006514799452756846, "grad_norm": 0.23665937781333923, "learning_rate": 4.999355758720783e-06, "loss": 1.4143, "step": 90 }, { "epoch": 0.0007238666058618717, "grad_norm": 0.26254650950431824, "learning_rate": 4.999283372060198e-06, "loss": 1.4186, "step": 100 }, { "epoch": 0.000796253266448059, "grad_norm": 0.21838980913162231, "learning_rate": 4.9992109853996105e-06, "loss": 1.3901, "step": 110 }, { "epoch": 0.0008686399270342462, "grad_norm": 0.22787261009216309, "learning_rate": 4.999138598739024e-06, "loss": 1.393, "step": 120 }, { "epoch": 0.0009410265876204333, "grad_norm": 0.24213093519210815, "learning_rate": 4.999066212078439e-06, "loss": 1.3777, "step": 130 }, { "epoch": 0.0010134132482066205, "grad_norm": 0.31372249126434326, "learning_rate": 4.998993825417852e-06, "loss": 1.3858, "step": 140 }, { "epoch": 0.0010857999087928076, "grad_norm": 0.23661810159683228, "learning_rate": 4.998921438757266e-06, "loss": 1.3904, "step": 150 }, { "epoch": 0.0011581865693789948, "grad_norm": 0.245035320520401, "learning_rate": 4.9988490520966794e-06, "loss": 1.3719, "step": 160 }, { "epoch": 0.001230573229965182, "grad_norm": 0.2323773056268692, "learning_rate": 4.998776665436094e-06, "loss": 1.3662, "step": 170 }, { "epoch": 0.0013029598905513692, "grad_norm": 0.20842741429805756, "learning_rate": 4.9987042787755075e-06, "loss": 1.3549, "step": 180 }, { "epoch": 0.0013753465511375563, "grad_norm": 0.2077568769454956, "learning_rate": 4.998631892114921e-06, "loss": 1.351, "step": 190 }, { "epoch": 0.0014477332117237434, "grad_norm": 0.2298651784658432, "learning_rate": 4.998559505454335e-06, "loss": 1.3661, "step": 200 }, { "epoch": 0.0015201198723099308, "grad_norm": 0.23133844137191772, "learning_rate": 4.998487118793749e-06, "loss": 1.349, "step": 210 }, { "epoch": 0.001592506532896118, "grad_norm": 0.2279757559299469, "learning_rate": 4.998414732133163e-06, "loss": 1.3391, "step": 220 }, { "epoch": 0.001664893193482305, "grad_norm": 0.22140687704086304, "learning_rate": 4.9983423454725765e-06, "loss": 1.3491, "step": 230 }, { "epoch": 0.0017372798540684924, "grad_norm": 0.2116747796535492, "learning_rate": 4.99826995881199e-06, "loss": 1.3488, "step": 240 }, { "epoch": 0.0018096665146546795, "grad_norm": 0.2300599366426468, "learning_rate": 4.9981975721514045e-06, "loss": 1.3383, "step": 250 }, { "epoch": 0.0018820531752408666, "grad_norm": 0.2189347743988037, "learning_rate": 4.998125185490818e-06, "loss": 1.3214, "step": 260 }, { "epoch": 0.001954439835827054, "grad_norm": 0.2163819819688797, "learning_rate": 4.998052798830232e-06, "loss": 1.331, "step": 270 }, { "epoch": 0.002026826496413241, "grad_norm": 0.24370183050632477, "learning_rate": 4.997980412169645e-06, "loss": 1.3307, "step": 280 }, { "epoch": 0.002099213156999428, "grad_norm": 0.21121959388256073, "learning_rate": 4.99790802550906e-06, "loss": 1.326, "step": 290 }, { "epoch": 0.0021715998175856153, "grad_norm": 0.2269212156534195, "learning_rate": 4.9978356388484735e-06, "loss": 1.3192, "step": 300 }, { "epoch": 0.0022439864781718024, "grad_norm": 0.19639967381954193, "learning_rate": 4.997763252187887e-06, "loss": 1.3322, "step": 310 }, { "epoch": 0.0023163731387579895, "grad_norm": 0.2111475169658661, "learning_rate": 4.997690865527301e-06, "loss": 1.3282, "step": 320 }, { "epoch": 0.002388759799344177, "grad_norm": 0.21942846477031708, "learning_rate": 4.997618478866715e-06, "loss": 1.3358, "step": 330 }, { "epoch": 0.002461146459930364, "grad_norm": 0.24269279837608337, "learning_rate": 4.997546092206129e-06, "loss": 1.2974, "step": 340 }, { "epoch": 0.0025335331205165513, "grad_norm": 0.2175322324037552, "learning_rate": 4.997473705545542e-06, "loss": 1.3239, "step": 350 }, { "epoch": 0.0026059197811027384, "grad_norm": 0.2417760044336319, "learning_rate": 4.997401318884956e-06, "loss": 1.3016, "step": 360 }, { "epoch": 0.0026783064416889255, "grad_norm": 0.21008457243442535, "learning_rate": 4.9973289322243705e-06, "loss": 1.3162, "step": 370 }, { "epoch": 0.0027506931022751127, "grad_norm": 0.21002964675426483, "learning_rate": 4.997256545563784e-06, "loss": 1.2806, "step": 380 }, { "epoch": 0.0028230797628612998, "grad_norm": 0.2070387601852417, "learning_rate": 4.997184158903198e-06, "loss": 1.3012, "step": 390 }, { "epoch": 0.002895466423447487, "grad_norm": 0.22568874061107635, "learning_rate": 4.997111772242611e-06, "loss": 1.2917, "step": 400 }, { "epoch": 0.0029678530840336744, "grad_norm": 0.21147729456424713, "learning_rate": 4.997039385582026e-06, "loss": 1.3087, "step": 410 }, { "epoch": 0.0030402397446198616, "grad_norm": 0.2232261747121811, "learning_rate": 4.996966998921439e-06, "loss": 1.3038, "step": 420 }, { "epoch": 0.0031126264052060487, "grad_norm": 0.2272012084722519, "learning_rate": 4.996894612260853e-06, "loss": 1.2891, "step": 430 }, { "epoch": 0.003185013065792236, "grad_norm": 0.20684392750263214, "learning_rate": 4.996822225600267e-06, "loss": 1.2942, "step": 440 }, { "epoch": 0.003257399726378423, "grad_norm": 0.20101316273212433, "learning_rate": 4.996749838939681e-06, "loss": 1.2889, "step": 450 }, { "epoch": 0.00332978638696461, "grad_norm": 0.22801408171653748, "learning_rate": 4.996677452279095e-06, "loss": 1.285, "step": 460 }, { "epoch": 0.003402173047550797, "grad_norm": 0.222897469997406, "learning_rate": 4.996605065618508e-06, "loss": 1.2837, "step": 470 }, { "epoch": 0.0034745597081369847, "grad_norm": 0.2286527454853058, "learning_rate": 4.996532678957922e-06, "loss": 1.2894, "step": 480 }, { "epoch": 0.003546946368723172, "grad_norm": 0.23111595213413239, "learning_rate": 4.9964602922973356e-06, "loss": 1.2842, "step": 490 }, { "epoch": 0.003619333029309359, "grad_norm": 0.20559774339199066, "learning_rate": 4.99638790563675e-06, "loss": 1.2789, "step": 500 }, { "epoch": 0.003691719689895546, "grad_norm": 0.2076191008090973, "learning_rate": 4.996315518976164e-06, "loss": 1.2871, "step": 510 }, { "epoch": 0.003764106350481733, "grad_norm": 0.1894315779209137, "learning_rate": 4.996243132315577e-06, "loss": 1.2697, "step": 520 }, { "epoch": 0.0038364930110679203, "grad_norm": 0.21035927534103394, "learning_rate": 4.996170745654991e-06, "loss": 1.2744, "step": 530 }, { "epoch": 0.003908879671654108, "grad_norm": 0.22824759781360626, "learning_rate": 4.996098358994405e-06, "loss": 1.2881, "step": 540 }, { "epoch": 0.003981266332240295, "grad_norm": 0.21655461192131042, "learning_rate": 4.996025972333819e-06, "loss": 1.273, "step": 550 }, { "epoch": 0.004053652992826482, "grad_norm": 0.2505953907966614, "learning_rate": 4.9959535856732326e-06, "loss": 1.2879, "step": 560 }, { "epoch": 0.004126039653412669, "grad_norm": 0.21900534629821777, "learning_rate": 4.995881199012646e-06, "loss": 1.2763, "step": 570 }, { "epoch": 0.004198426313998856, "grad_norm": 0.21123918890953064, "learning_rate": 4.995808812352061e-06, "loss": 1.2833, "step": 580 }, { "epoch": 0.0042708129745850434, "grad_norm": 0.20754094421863556, "learning_rate": 4.995736425691474e-06, "loss": 1.2692, "step": 590 }, { "epoch": 0.0043431996351712306, "grad_norm": 0.23484139144420624, "learning_rate": 4.995664039030888e-06, "loss": 1.2596, "step": 600 }, { "epoch": 0.004415586295757418, "grad_norm": 0.2571958303451538, "learning_rate": 4.9955916523703015e-06, "loss": 1.2714, "step": 610 }, { "epoch": 0.004487972956343605, "grad_norm": 0.21375253796577454, "learning_rate": 4.995519265709716e-06, "loss": 1.2718, "step": 620 }, { "epoch": 0.004560359616929792, "grad_norm": 0.20837347209453583, "learning_rate": 4.9954468790491296e-06, "loss": 1.2555, "step": 630 }, { "epoch": 0.004632746277515979, "grad_norm": 0.21926173567771912, "learning_rate": 4.995374492388543e-06, "loss": 1.2565, "step": 640 }, { "epoch": 0.004705132938102166, "grad_norm": 0.21038663387298584, "learning_rate": 4.995302105727957e-06, "loss": 1.2713, "step": 650 }, { "epoch": 0.004777519598688354, "grad_norm": 0.2313099056482315, "learning_rate": 4.99522971906737e-06, "loss": 1.2441, "step": 660 }, { "epoch": 0.004849906259274541, "grad_norm": 0.2143174558877945, "learning_rate": 4.995157332406784e-06, "loss": 1.2497, "step": 670 }, { "epoch": 0.004922292919860728, "grad_norm": 0.21607981622219086, "learning_rate": 4.995084945746198e-06, "loss": 1.2455, "step": 680 }, { "epoch": 0.0049946795804469155, "grad_norm": 0.2096307873725891, "learning_rate": 4.995012559085612e-06, "loss": 1.2614, "step": 690 }, { "epoch": 0.005067066241033103, "grad_norm": 0.20711645483970642, "learning_rate": 4.994940172425026e-06, "loss": 1.2529, "step": 700 }, { "epoch": 0.00513945290161929, "grad_norm": 0.2200125753879547, "learning_rate": 4.994867785764439e-06, "loss": 1.2626, "step": 710 }, { "epoch": 0.005211839562205477, "grad_norm": 0.24058933556079865, "learning_rate": 4.994795399103853e-06, "loss": 1.2615, "step": 720 }, { "epoch": 0.005284226222791664, "grad_norm": 0.21051721274852753, "learning_rate": 4.9947230124432674e-06, "loss": 1.2608, "step": 730 }, { "epoch": 0.005356612883377851, "grad_norm": 0.18748053908348083, "learning_rate": 4.994650625782681e-06, "loss": 1.2579, "step": 740 }, { "epoch": 0.005428999543964038, "grad_norm": 0.4778112769126892, "learning_rate": 4.994578239122095e-06, "loss": 1.2514, "step": 750 }, { "epoch": 0.005501386204550225, "grad_norm": 0.22301283478736877, "learning_rate": 4.994505852461508e-06, "loss": 1.2391, "step": 760 }, { "epoch": 0.0055737728651364124, "grad_norm": 0.21762309968471527, "learning_rate": 4.994433465800923e-06, "loss": 1.2562, "step": 770 }, { "epoch": 0.0056461595257225996, "grad_norm": 0.216139554977417, "learning_rate": 4.994361079140336e-06, "loss": 1.2587, "step": 780 }, { "epoch": 0.005718546186308787, "grad_norm": 0.19104599952697754, "learning_rate": 4.99428869247975e-06, "loss": 1.2622, "step": 790 }, { "epoch": 0.005790932846894974, "grad_norm": 0.205087810754776, "learning_rate": 4.994216305819164e-06, "loss": 1.2465, "step": 800 }, { "epoch": 0.005863319507481162, "grad_norm": 0.20174795389175415, "learning_rate": 4.994143919158578e-06, "loss": 1.2285, "step": 810 }, { "epoch": 0.005935706168067349, "grad_norm": 0.1900462955236435, "learning_rate": 4.994071532497992e-06, "loss": 1.2433, "step": 820 }, { "epoch": 0.006008092828653536, "grad_norm": 0.21302379667758942, "learning_rate": 4.993999145837405e-06, "loss": 1.2355, "step": 830 }, { "epoch": 0.006080479489239723, "grad_norm": 0.19449925422668457, "learning_rate": 4.993926759176819e-06, "loss": 1.2538, "step": 840 }, { "epoch": 0.00615286614982591, "grad_norm": 0.2037796676158905, "learning_rate": 4.993854372516233e-06, "loss": 1.2643, "step": 850 }, { "epoch": 0.006225252810412097, "grad_norm": 0.22023706138134003, "learning_rate": 4.993781985855647e-06, "loss": 1.2358, "step": 860 }, { "epoch": 0.0062976394709982845, "grad_norm": 0.1888815462589264, "learning_rate": 4.993709599195061e-06, "loss": 1.2499, "step": 870 }, { "epoch": 0.006370026131584472, "grad_norm": 0.18520642817020416, "learning_rate": 4.993637212534474e-06, "loss": 1.2459, "step": 880 }, { "epoch": 0.006442412792170659, "grad_norm": 0.1910271942615509, "learning_rate": 4.993564825873889e-06, "loss": 1.2406, "step": 890 }, { "epoch": 0.006514799452756846, "grad_norm": 0.1944640874862671, "learning_rate": 4.993492439213302e-06, "loss": 1.2331, "step": 900 }, { "epoch": 0.006587186113343033, "grad_norm": 0.19614140689373016, "learning_rate": 4.993420052552716e-06, "loss": 1.2319, "step": 910 }, { "epoch": 0.00665957277392922, "grad_norm": 0.20917336642742157, "learning_rate": 4.9933476658921295e-06, "loss": 1.2433, "step": 920 }, { "epoch": 0.006731959434515407, "grad_norm": 0.18660324811935425, "learning_rate": 4.993275279231544e-06, "loss": 1.2266, "step": 930 }, { "epoch": 0.006804346095101594, "grad_norm": 0.20741026103496552, "learning_rate": 4.993202892570958e-06, "loss": 1.2376, "step": 940 }, { "epoch": 0.006876732755687781, "grad_norm": 0.20696569979190826, "learning_rate": 4.993130505910371e-06, "loss": 1.2479, "step": 950 }, { "epoch": 0.006949119416273969, "grad_norm": 0.19880063831806183, "learning_rate": 4.993058119249785e-06, "loss": 1.246, "step": 960 }, { "epoch": 0.0070215060768601565, "grad_norm": 0.21153710782527924, "learning_rate": 4.992985732589199e-06, "loss": 1.2399, "step": 970 }, { "epoch": 0.007093892737446344, "grad_norm": 0.22593656182289124, "learning_rate": 4.992913345928613e-06, "loss": 1.2423, "step": 980 }, { "epoch": 0.007166279398032531, "grad_norm": 0.22546051442623138, "learning_rate": 4.9928409592680265e-06, "loss": 1.2099, "step": 990 }, { "epoch": 0.007238666058618718, "grad_norm": 0.2021579146385193, "learning_rate": 4.99276857260744e-06, "loss": 1.2264, "step": 1000 }, { "epoch": 0.007311052719204905, "grad_norm": 0.2759735584259033, "learning_rate": 4.992696185946855e-06, "loss": 1.2178, "step": 1010 }, { "epoch": 0.007383439379791092, "grad_norm": 0.20149949193000793, "learning_rate": 4.992623799286268e-06, "loss": 1.2289, "step": 1020 }, { "epoch": 0.007455826040377279, "grad_norm": 0.1932690441608429, "learning_rate": 4.992551412625682e-06, "loss": 1.2506, "step": 1030 }, { "epoch": 0.007528212700963466, "grad_norm": 0.2068718671798706, "learning_rate": 4.9924790259650955e-06, "loss": 1.215, "step": 1040 }, { "epoch": 0.0076005993615496535, "grad_norm": 0.22313687205314636, "learning_rate": 4.99240663930451e-06, "loss": 1.2356, "step": 1050 }, { "epoch": 0.007672986022135841, "grad_norm": 0.2034347951412201, "learning_rate": 4.9923342526439235e-06, "loss": 1.2195, "step": 1060 }, { "epoch": 0.007745372682722028, "grad_norm": 0.2164289355278015, "learning_rate": 4.992261865983337e-06, "loss": 1.2069, "step": 1070 }, { "epoch": 0.007817759343308216, "grad_norm": 0.20517635345458984, "learning_rate": 4.992189479322751e-06, "loss": 1.2181, "step": 1080 }, { "epoch": 0.007890146003894403, "grad_norm": 0.19486500322818756, "learning_rate": 4.992117092662165e-06, "loss": 1.229, "step": 1090 }, { "epoch": 0.00796253266448059, "grad_norm": 0.20392891764640808, "learning_rate": 4.992044706001579e-06, "loss": 1.2325, "step": 1100 }, { "epoch": 0.008034919325066777, "grad_norm": 0.19018247723579407, "learning_rate": 4.9919723193409925e-06, "loss": 1.2242, "step": 1110 }, { "epoch": 0.008107305985652964, "grad_norm": 0.2185811698436737, "learning_rate": 4.991899932680406e-06, "loss": 1.2287, "step": 1120 }, { "epoch": 0.008179692646239151, "grad_norm": 0.20160779356956482, "learning_rate": 4.99182754601982e-06, "loss": 1.2131, "step": 1130 }, { "epoch": 0.008252079306825338, "grad_norm": 0.19665688276290894, "learning_rate": 4.991755159359234e-06, "loss": 1.219, "step": 1140 }, { "epoch": 0.008324465967411526, "grad_norm": 0.19984516501426697, "learning_rate": 4.991682772698648e-06, "loss": 1.241, "step": 1150 }, { "epoch": 0.008396852627997713, "grad_norm": 0.19308070838451385, "learning_rate": 4.991610386038061e-06, "loss": 1.213, "step": 1160 }, { "epoch": 0.0084692392885839, "grad_norm": 0.2210283875465393, "learning_rate": 4.991537999377475e-06, "loss": 1.2214, "step": 1170 }, { "epoch": 0.008541625949170087, "grad_norm": 0.19817933440208435, "learning_rate": 4.991465612716889e-06, "loss": 1.2446, "step": 1180 }, { "epoch": 0.008614012609756274, "grad_norm": 0.198905810713768, "learning_rate": 4.991393226056302e-06, "loss": 1.2252, "step": 1190 }, { "epoch": 0.008686399270342461, "grad_norm": 0.21226166188716888, "learning_rate": 4.991320839395717e-06, "loss": 1.2189, "step": 1200 }, { "epoch": 0.008758785930928648, "grad_norm": 0.20412813127040863, "learning_rate": 4.99124845273513e-06, "loss": 1.2299, "step": 1210 }, { "epoch": 0.008831172591514835, "grad_norm": 0.18132169544696808, "learning_rate": 4.991176066074544e-06, "loss": 1.2236, "step": 1220 }, { "epoch": 0.008903559252101022, "grad_norm": 0.2028530240058899, "learning_rate": 4.9911036794139576e-06, "loss": 1.2205, "step": 1230 }, { "epoch": 0.00897594591268721, "grad_norm": 0.21371830999851227, "learning_rate": 4.991031292753372e-06, "loss": 1.213, "step": 1240 }, { "epoch": 0.009048332573273397, "grad_norm": 0.20845739543437958, "learning_rate": 4.990958906092786e-06, "loss": 1.2153, "step": 1250 }, { "epoch": 0.009120719233859584, "grad_norm": 0.19972443580627441, "learning_rate": 4.990886519432199e-06, "loss": 1.2211, "step": 1260 }, { "epoch": 0.009193105894445771, "grad_norm": 0.21171468496322632, "learning_rate": 4.990814132771613e-06, "loss": 1.2159, "step": 1270 }, { "epoch": 0.009265492555031958, "grad_norm": 0.18781454861164093, "learning_rate": 4.9907417461110265e-06, "loss": 1.2305, "step": 1280 }, { "epoch": 0.009337879215618145, "grad_norm": 0.20194801688194275, "learning_rate": 4.990669359450441e-06, "loss": 1.2119, "step": 1290 }, { "epoch": 0.009410265876204332, "grad_norm": 0.20740105211734772, "learning_rate": 4.9905969727898546e-06, "loss": 1.2171, "step": 1300 }, { "epoch": 0.00948265253679052, "grad_norm": 0.19401483237743378, "learning_rate": 4.990524586129268e-06, "loss": 1.2198, "step": 1310 }, { "epoch": 0.009555039197376708, "grad_norm": 0.21922868490219116, "learning_rate": 4.990452199468682e-06, "loss": 1.2135, "step": 1320 }, { "epoch": 0.009627425857962895, "grad_norm": 0.19779783487319946, "learning_rate": 4.990379812808096e-06, "loss": 1.2111, "step": 1330 }, { "epoch": 0.009699812518549083, "grad_norm": 0.23003704845905304, "learning_rate": 4.99030742614751e-06, "loss": 1.2131, "step": 1340 }, { "epoch": 0.00977219917913527, "grad_norm": 0.2075931429862976, "learning_rate": 4.9902350394869235e-06, "loss": 1.2128, "step": 1350 }, { "epoch": 0.009844585839721457, "grad_norm": 0.19751273095607758, "learning_rate": 4.990162652826337e-06, "loss": 1.199, "step": 1360 }, { "epoch": 0.009916972500307644, "grad_norm": 0.20674757659435272, "learning_rate": 4.9900902661657516e-06, "loss": 1.2362, "step": 1370 }, { "epoch": 0.009989359160893831, "grad_norm": 0.19264833629131317, "learning_rate": 4.990017879505165e-06, "loss": 1.229, "step": 1380 }, { "epoch": 0.010061745821480018, "grad_norm": 0.19390186667442322, "learning_rate": 4.989945492844579e-06, "loss": 1.2308, "step": 1390 }, { "epoch": 0.010134132482066205, "grad_norm": 0.20780542492866516, "learning_rate": 4.989873106183992e-06, "loss": 1.1996, "step": 1400 }, { "epoch": 0.010206519142652392, "grad_norm": 0.20205456018447876, "learning_rate": 4.989800719523407e-06, "loss": 1.2062, "step": 1410 }, { "epoch": 0.01027890580323858, "grad_norm": 0.2114095389842987, "learning_rate": 4.9897283328628205e-06, "loss": 1.2147, "step": 1420 }, { "epoch": 0.010351292463824767, "grad_norm": 0.2318940907716751, "learning_rate": 4.989655946202234e-06, "loss": 1.2316, "step": 1430 }, { "epoch": 0.010423679124410954, "grad_norm": 0.20557528734207153, "learning_rate": 4.989583559541648e-06, "loss": 1.2094, "step": 1440 }, { "epoch": 0.01049606578499714, "grad_norm": 0.23943296074867249, "learning_rate": 4.989511172881062e-06, "loss": 1.2044, "step": 1450 }, { "epoch": 0.010568452445583328, "grad_norm": 0.19559936225414276, "learning_rate": 4.989438786220476e-06, "loss": 1.2108, "step": 1460 }, { "epoch": 0.010640839106169515, "grad_norm": 0.22766897082328796, "learning_rate": 4.9893663995598894e-06, "loss": 1.2156, "step": 1470 }, { "epoch": 0.010713225766755702, "grad_norm": 0.1910102516412735, "learning_rate": 4.989294012899303e-06, "loss": 1.2147, "step": 1480 }, { "epoch": 0.01078561242734189, "grad_norm": 0.20596715807914734, "learning_rate": 4.9892216262387175e-06, "loss": 1.2146, "step": 1490 }, { "epoch": 0.010857999087928076, "grad_norm": 0.21965187788009644, "learning_rate": 4.989149239578131e-06, "loss": 1.2115, "step": 1500 }, { "epoch": 0.010930385748514264, "grad_norm": 0.2052324265241623, "learning_rate": 4.989076852917545e-06, "loss": 1.2269, "step": 1510 }, { "epoch": 0.01100277240910045, "grad_norm": 0.19294176995754242, "learning_rate": 4.989004466256958e-06, "loss": 1.2152, "step": 1520 }, { "epoch": 0.011075159069686638, "grad_norm": 0.20509877800941467, "learning_rate": 4.988932079596373e-06, "loss": 1.2036, "step": 1530 }, { "epoch": 0.011147545730272825, "grad_norm": 0.20573221147060394, "learning_rate": 4.9888596929357864e-06, "loss": 1.2033, "step": 1540 }, { "epoch": 0.011219932390859012, "grad_norm": 0.1944323480129242, "learning_rate": 4.9887873062752e-06, "loss": 1.2002, "step": 1550 }, { "epoch": 0.011292319051445199, "grad_norm": 0.20238761603832245, "learning_rate": 4.988714919614614e-06, "loss": 1.1859, "step": 1560 }, { "epoch": 0.011364705712031386, "grad_norm": 0.21487738192081451, "learning_rate": 4.988642532954028e-06, "loss": 1.1908, "step": 1570 }, { "epoch": 0.011437092372617573, "grad_norm": 0.19267559051513672, "learning_rate": 4.988570146293442e-06, "loss": 1.2108, "step": 1580 }, { "epoch": 0.01150947903320376, "grad_norm": 0.21015968918800354, "learning_rate": 4.988497759632855e-06, "loss": 1.1977, "step": 1590 }, { "epoch": 0.011581865693789948, "grad_norm": 0.1899135857820511, "learning_rate": 4.988425372972269e-06, "loss": 1.1996, "step": 1600 }, { "epoch": 0.011654252354376135, "grad_norm": 0.21136519312858582, "learning_rate": 4.9883529863116834e-06, "loss": 1.2074, "step": 1610 }, { "epoch": 0.011726639014962324, "grad_norm": 0.18629726767539978, "learning_rate": 4.988280599651097e-06, "loss": 1.1995, "step": 1620 }, { "epoch": 0.01179902567554851, "grad_norm": 0.18262408673763275, "learning_rate": 4.988208212990511e-06, "loss": 1.1972, "step": 1630 }, { "epoch": 0.011871412336134698, "grad_norm": 0.216887965798378, "learning_rate": 4.988135826329924e-06, "loss": 1.1925, "step": 1640 }, { "epoch": 0.011943798996720885, "grad_norm": 0.18681691586971283, "learning_rate": 4.988063439669339e-06, "loss": 1.21, "step": 1650 }, { "epoch": 0.012016185657307072, "grad_norm": 0.199030801653862, "learning_rate": 4.987991053008752e-06, "loss": 1.2014, "step": 1660 }, { "epoch": 0.01208857231789326, "grad_norm": 0.23438072204589844, "learning_rate": 4.987918666348166e-06, "loss": 1.1913, "step": 1670 }, { "epoch": 0.012160958978479446, "grad_norm": 0.18666905164718628, "learning_rate": 4.98784627968758e-06, "loss": 1.2013, "step": 1680 }, { "epoch": 0.012233345639065633, "grad_norm": 0.20759981870651245, "learning_rate": 4.987773893026994e-06, "loss": 1.2058, "step": 1690 }, { "epoch": 0.01230573229965182, "grad_norm": 0.20983868837356567, "learning_rate": 4.987701506366407e-06, "loss": 1.2153, "step": 1700 }, { "epoch": 0.012378118960238008, "grad_norm": 0.21793246269226074, "learning_rate": 4.9876291197058205e-06, "loss": 1.1994, "step": 1710 }, { "epoch": 0.012450505620824195, "grad_norm": 0.20397932827472687, "learning_rate": 4.987556733045235e-06, "loss": 1.2112, "step": 1720 }, { "epoch": 0.012522892281410382, "grad_norm": 0.202115997672081, "learning_rate": 4.9874843463846485e-06, "loss": 1.2132, "step": 1730 }, { "epoch": 0.012595278941996569, "grad_norm": 0.1943732500076294, "learning_rate": 4.987411959724062e-06, "loss": 1.1877, "step": 1740 }, { "epoch": 0.012667665602582756, "grad_norm": 0.20084722340106964, "learning_rate": 4.987339573063476e-06, "loss": 1.1773, "step": 1750 }, { "epoch": 0.012740052263168943, "grad_norm": 0.19970197975635529, "learning_rate": 4.98726718640289e-06, "loss": 1.1826, "step": 1760 }, { "epoch": 0.01281243892375513, "grad_norm": 0.20870880782604218, "learning_rate": 4.987194799742304e-06, "loss": 1.1833, "step": 1770 }, { "epoch": 0.012884825584341317, "grad_norm": 0.2071852833032608, "learning_rate": 4.9871224130817175e-06, "loss": 1.1955, "step": 1780 }, { "epoch": 0.012957212244927505, "grad_norm": 0.21110813319683075, "learning_rate": 4.987050026421131e-06, "loss": 1.1876, "step": 1790 }, { "epoch": 0.013029598905513692, "grad_norm": 0.18131721019744873, "learning_rate": 4.9869776397605455e-06, "loss": 1.1867, "step": 1800 }, { "epoch": 0.013101985566099879, "grad_norm": 0.22368177771568298, "learning_rate": 4.986905253099959e-06, "loss": 1.199, "step": 1810 }, { "epoch": 0.013174372226686066, "grad_norm": 0.19988280534744263, "learning_rate": 4.986832866439373e-06, "loss": 1.1972, "step": 1820 }, { "epoch": 0.013246758887272253, "grad_norm": 0.2156451791524887, "learning_rate": 4.986760479778786e-06, "loss": 1.1999, "step": 1830 }, { "epoch": 0.01331914554785844, "grad_norm": 0.1884111911058426, "learning_rate": 4.986688093118201e-06, "loss": 1.2085, "step": 1840 }, { "epoch": 0.013391532208444627, "grad_norm": 0.20055466890335083, "learning_rate": 4.9866157064576145e-06, "loss": 1.2014, "step": 1850 }, { "epoch": 0.013463918869030814, "grad_norm": 0.22643277049064636, "learning_rate": 4.986543319797028e-06, "loss": 1.194, "step": 1860 }, { "epoch": 0.013536305529617002, "grad_norm": 0.2079644799232483, "learning_rate": 4.986470933136442e-06, "loss": 1.1998, "step": 1870 }, { "epoch": 0.013608692190203189, "grad_norm": 0.19740234315395355, "learning_rate": 4.986398546475856e-06, "loss": 1.1917, "step": 1880 }, { "epoch": 0.013681078850789376, "grad_norm": 0.18155385553836823, "learning_rate": 4.98632615981527e-06, "loss": 1.1904, "step": 1890 }, { "epoch": 0.013753465511375563, "grad_norm": 0.1898881047964096, "learning_rate": 4.986253773154683e-06, "loss": 1.2094, "step": 1900 }, { "epoch": 0.013825852171961752, "grad_norm": 0.19113953411579132, "learning_rate": 4.986181386494097e-06, "loss": 1.1973, "step": 1910 }, { "epoch": 0.013898238832547939, "grad_norm": 0.18314586579799652, "learning_rate": 4.986108999833511e-06, "loss": 1.1954, "step": 1920 }, { "epoch": 0.013970625493134126, "grad_norm": 0.21783554553985596, "learning_rate": 4.986036613172925e-06, "loss": 1.1947, "step": 1930 }, { "epoch": 0.014043012153720313, "grad_norm": 0.18775740265846252, "learning_rate": 4.985964226512339e-06, "loss": 1.1773, "step": 1940 }, { "epoch": 0.0141153988143065, "grad_norm": 0.1981116682291031, "learning_rate": 4.985891839851752e-06, "loss": 1.1887, "step": 1950 }, { "epoch": 0.014187785474892687, "grad_norm": 0.19518138468265533, "learning_rate": 4.985819453191166e-06, "loss": 1.1832, "step": 1960 }, { "epoch": 0.014260172135478874, "grad_norm": 0.18999353051185608, "learning_rate": 4.98574706653058e-06, "loss": 1.1854, "step": 1970 }, { "epoch": 0.014332558796065062, "grad_norm": 0.18030518293380737, "learning_rate": 4.985674679869994e-06, "loss": 1.1873, "step": 1980 }, { "epoch": 0.014404945456651249, "grad_norm": 0.19327767193317413, "learning_rate": 4.985602293209408e-06, "loss": 1.1978, "step": 1990 }, { "epoch": 0.014477332117237436, "grad_norm": 0.18995395302772522, "learning_rate": 4.985529906548821e-06, "loss": 1.1941, "step": 2000 }, { "epoch": 0.014549718777823623, "grad_norm": 0.19528520107269287, "learning_rate": 4.985457519888236e-06, "loss": 1.1841, "step": 2010 }, { "epoch": 0.01462210543840981, "grad_norm": 0.20308247208595276, "learning_rate": 4.985385133227649e-06, "loss": 1.19, "step": 2020 }, { "epoch": 0.014694492098995997, "grad_norm": 0.22538486123085022, "learning_rate": 4.985312746567063e-06, "loss": 1.1979, "step": 2030 }, { "epoch": 0.014766878759582184, "grad_norm": 0.19460053741931915, "learning_rate": 4.9852403599064766e-06, "loss": 1.1837, "step": 2040 }, { "epoch": 0.014839265420168371, "grad_norm": 0.25119996070861816, "learning_rate": 4.985167973245891e-06, "loss": 1.1851, "step": 2050 }, { "epoch": 0.014911652080754558, "grad_norm": 0.1847628504037857, "learning_rate": 4.985095586585305e-06, "loss": 1.1955, "step": 2060 }, { "epoch": 0.014984038741340746, "grad_norm": 0.18682530522346497, "learning_rate": 4.985023199924718e-06, "loss": 1.1957, "step": 2070 }, { "epoch": 0.015056425401926933, "grad_norm": 0.18913131952285767, "learning_rate": 4.984950813264132e-06, "loss": 1.1825, "step": 2080 }, { "epoch": 0.01512881206251312, "grad_norm": 0.18790295720100403, "learning_rate": 4.984878426603546e-06, "loss": 1.1794, "step": 2090 }, { "epoch": 0.015201198723099307, "grad_norm": 0.17898432910442352, "learning_rate": 4.98480603994296e-06, "loss": 1.1643, "step": 2100 }, { "epoch": 0.015273585383685494, "grad_norm": 0.18662859499454498, "learning_rate": 4.9847336532823736e-06, "loss": 1.1796, "step": 2110 }, { "epoch": 0.015345972044271681, "grad_norm": 0.1902911514043808, "learning_rate": 4.984661266621787e-06, "loss": 1.1846, "step": 2120 }, { "epoch": 0.015418358704857868, "grad_norm": 0.21089226007461548, "learning_rate": 4.984588879961202e-06, "loss": 1.1826, "step": 2130 }, { "epoch": 0.015490745365444055, "grad_norm": 0.19871971011161804, "learning_rate": 4.984516493300615e-06, "loss": 1.1881, "step": 2140 }, { "epoch": 0.015563132026030243, "grad_norm": 0.1826990395784378, "learning_rate": 4.984444106640029e-06, "loss": 1.1654, "step": 2150 }, { "epoch": 0.01563551868661643, "grad_norm": 0.22978819906711578, "learning_rate": 4.9843717199794425e-06, "loss": 1.1748, "step": 2160 }, { "epoch": 0.01570790534720262, "grad_norm": 0.22456589341163635, "learning_rate": 4.984299333318857e-06, "loss": 1.1922, "step": 2170 }, { "epoch": 0.015780292007788806, "grad_norm": 0.19744843244552612, "learning_rate": 4.984226946658271e-06, "loss": 1.1794, "step": 2180 }, { "epoch": 0.015852678668374993, "grad_norm": 0.19209572672843933, "learning_rate": 4.984154559997684e-06, "loss": 1.1828, "step": 2190 }, { "epoch": 0.01592506532896118, "grad_norm": 0.22542759776115417, "learning_rate": 4.984082173337098e-06, "loss": 1.1772, "step": 2200 }, { "epoch": 0.015997451989547367, "grad_norm": 0.21402914822101593, "learning_rate": 4.984009786676512e-06, "loss": 1.1847, "step": 2210 }, { "epoch": 0.016069838650133554, "grad_norm": 0.3712976574897766, "learning_rate": 4.983937400015926e-06, "loss": 1.1727, "step": 2220 }, { "epoch": 0.01614222531071974, "grad_norm": 0.21875779330730438, "learning_rate": 4.983865013355339e-06, "loss": 1.1844, "step": 2230 }, { "epoch": 0.01621461197130593, "grad_norm": 0.1886206865310669, "learning_rate": 4.983792626694753e-06, "loss": 1.1875, "step": 2240 }, { "epoch": 0.016286998631892115, "grad_norm": 0.2019553780555725, "learning_rate": 4.983720240034167e-06, "loss": 1.1729, "step": 2250 }, { "epoch": 0.016359385292478303, "grad_norm": 0.21517504751682281, "learning_rate": 4.98364785337358e-06, "loss": 1.1713, "step": 2260 }, { "epoch": 0.01643177195306449, "grad_norm": 0.18494921922683716, "learning_rate": 4.983575466712994e-06, "loss": 1.1786, "step": 2270 }, { "epoch": 0.016504158613650677, "grad_norm": 0.3039611876010895, "learning_rate": 4.9835030800524084e-06, "loss": 1.1819, "step": 2280 }, { "epoch": 0.016576545274236864, "grad_norm": 0.23030070960521698, "learning_rate": 4.983430693391822e-06, "loss": 1.1915, "step": 2290 }, { "epoch": 0.01664893193482305, "grad_norm": 0.1895866096019745, "learning_rate": 4.983358306731236e-06, "loss": 1.1782, "step": 2300 }, { "epoch": 0.016721318595409238, "grad_norm": 0.19012758135795593, "learning_rate": 4.983285920070649e-06, "loss": 1.1894, "step": 2310 }, { "epoch": 0.016793705255995425, "grad_norm": 0.18805542588233948, "learning_rate": 4.983213533410064e-06, "loss": 1.1802, "step": 2320 }, { "epoch": 0.016866091916581612, "grad_norm": 0.19105587899684906, "learning_rate": 4.983141146749477e-06, "loss": 1.1797, "step": 2330 }, { "epoch": 0.0169384785771678, "grad_norm": 0.22354061901569366, "learning_rate": 4.983068760088891e-06, "loss": 1.1898, "step": 2340 }, { "epoch": 0.017010865237753987, "grad_norm": 0.1948034018278122, "learning_rate": 4.982996373428305e-06, "loss": 1.166, "step": 2350 }, { "epoch": 0.017083251898340174, "grad_norm": 0.18683524429798126, "learning_rate": 4.982923986767719e-06, "loss": 1.1826, "step": 2360 }, { "epoch": 0.01715563855892636, "grad_norm": 0.19329576194286346, "learning_rate": 4.982851600107133e-06, "loss": 1.1853, "step": 2370 }, { "epoch": 0.017228025219512548, "grad_norm": 0.2141970992088318, "learning_rate": 4.982779213446546e-06, "loss": 1.172, "step": 2380 }, { "epoch": 0.017300411880098735, "grad_norm": 0.20139099657535553, "learning_rate": 4.98270682678596e-06, "loss": 1.178, "step": 2390 }, { "epoch": 0.017372798540684922, "grad_norm": 0.22393347322940826, "learning_rate": 4.982634440125374e-06, "loss": 1.1724, "step": 2400 }, { "epoch": 0.01744518520127111, "grad_norm": 0.18857796490192413, "learning_rate": 4.982562053464788e-06, "loss": 1.1663, "step": 2410 }, { "epoch": 0.017517571861857296, "grad_norm": 0.18142379820346832, "learning_rate": 4.982489666804202e-06, "loss": 1.1972, "step": 2420 }, { "epoch": 0.017589958522443484, "grad_norm": 0.20119883120059967, "learning_rate": 4.982417280143615e-06, "loss": 1.1719, "step": 2430 }, { "epoch": 0.01766234518302967, "grad_norm": 0.20527637004852295, "learning_rate": 4.98234489348303e-06, "loss": 1.1612, "step": 2440 }, { "epoch": 0.017734731843615858, "grad_norm": 0.19073791801929474, "learning_rate": 4.982272506822443e-06, "loss": 1.1752, "step": 2450 }, { "epoch": 0.017807118504202045, "grad_norm": 0.19779928028583527, "learning_rate": 4.982200120161857e-06, "loss": 1.1877, "step": 2460 }, { "epoch": 0.017879505164788232, "grad_norm": 0.2000398486852646, "learning_rate": 4.9821277335012705e-06, "loss": 1.169, "step": 2470 }, { "epoch": 0.01795189182537442, "grad_norm": 0.19572487473487854, "learning_rate": 4.982055346840685e-06, "loss": 1.1985, "step": 2480 }, { "epoch": 0.018024278485960606, "grad_norm": 0.2484935224056244, "learning_rate": 4.981982960180099e-06, "loss": 1.1692, "step": 2490 }, { "epoch": 0.018096665146546793, "grad_norm": 0.18102866411209106, "learning_rate": 4.981910573519512e-06, "loss": 1.1632, "step": 2500 }, { "epoch": 0.01816905180713298, "grad_norm": 0.18723329901695251, "learning_rate": 4.981838186858926e-06, "loss": 1.1763, "step": 2510 }, { "epoch": 0.018241438467719168, "grad_norm": 0.201924130320549, "learning_rate": 4.98176580019834e-06, "loss": 1.1712, "step": 2520 }, { "epoch": 0.018313825128305355, "grad_norm": 0.2887810468673706, "learning_rate": 4.981693413537754e-06, "loss": 1.1868, "step": 2530 }, { "epoch": 0.018386211788891542, "grad_norm": 0.17751596868038177, "learning_rate": 4.9816210268771675e-06, "loss": 1.1815, "step": 2540 }, { "epoch": 0.01845859844947773, "grad_norm": 0.20777058601379395, "learning_rate": 4.981548640216581e-06, "loss": 1.1607, "step": 2550 }, { "epoch": 0.018530985110063916, "grad_norm": 0.2111022174358368, "learning_rate": 4.981476253555995e-06, "loss": 1.1709, "step": 2560 }, { "epoch": 0.018603371770650103, "grad_norm": 0.1838046908378601, "learning_rate": 4.981403866895409e-06, "loss": 1.1658, "step": 2570 }, { "epoch": 0.01867575843123629, "grad_norm": 0.20409898459911346, "learning_rate": 4.981331480234823e-06, "loss": 1.161, "step": 2580 }, { "epoch": 0.018748145091822477, "grad_norm": 0.25109410285949707, "learning_rate": 4.9812590935742365e-06, "loss": 1.1483, "step": 2590 }, { "epoch": 0.018820531752408665, "grad_norm": 0.25918787717819214, "learning_rate": 4.98118670691365e-06, "loss": 1.1781, "step": 2600 }, { "epoch": 0.01889291841299485, "grad_norm": 0.18770872056484222, "learning_rate": 4.9811143202530645e-06, "loss": 1.1732, "step": 2610 }, { "epoch": 0.01896530507358104, "grad_norm": 0.195766419172287, "learning_rate": 4.981041933592478e-06, "loss": 1.1673, "step": 2620 }, { "epoch": 0.019037691734167226, "grad_norm": 0.19628261029720306, "learning_rate": 4.980969546931892e-06, "loss": 1.1687, "step": 2630 }, { "epoch": 0.019110078394753417, "grad_norm": 0.22135888040065765, "learning_rate": 4.980897160271305e-06, "loss": 1.1664, "step": 2640 }, { "epoch": 0.019182465055339604, "grad_norm": 0.19807446002960205, "learning_rate": 4.98082477361072e-06, "loss": 1.1762, "step": 2650 }, { "epoch": 0.01925485171592579, "grad_norm": 0.3103126883506775, "learning_rate": 4.9807523869501335e-06, "loss": 1.1723, "step": 2660 }, { "epoch": 0.019327238376511978, "grad_norm": 0.18378371000289917, "learning_rate": 4.980680000289547e-06, "loss": 1.185, "step": 2670 }, { "epoch": 0.019399625037098165, "grad_norm": 0.1914234310388565, "learning_rate": 4.980607613628961e-06, "loss": 1.17, "step": 2680 }, { "epoch": 0.019472011697684352, "grad_norm": 0.18758605420589447, "learning_rate": 4.980535226968375e-06, "loss": 1.1857, "step": 2690 }, { "epoch": 0.01954439835827054, "grad_norm": 0.19784751534461975, "learning_rate": 4.980462840307789e-06, "loss": 1.1627, "step": 2700 }, { "epoch": 0.019616785018856726, "grad_norm": 0.2056863158941269, "learning_rate": 4.980390453647202e-06, "loss": 1.1744, "step": 2710 }, { "epoch": 0.019689171679442913, "grad_norm": 0.23083150386810303, "learning_rate": 4.980318066986616e-06, "loss": 1.1682, "step": 2720 }, { "epoch": 0.0197615583400291, "grad_norm": 0.2089979201555252, "learning_rate": 4.9802456803260305e-06, "loss": 1.171, "step": 2730 }, { "epoch": 0.019833945000615288, "grad_norm": 0.36265090107917786, "learning_rate": 4.980173293665444e-06, "loss": 1.1738, "step": 2740 }, { "epoch": 0.019906331661201475, "grad_norm": 0.21013633906841278, "learning_rate": 4.980100907004858e-06, "loss": 1.1591, "step": 2750 }, { "epoch": 0.019978718321787662, "grad_norm": 0.190397247672081, "learning_rate": 4.980028520344271e-06, "loss": 1.1761, "step": 2760 }, { "epoch": 0.02005110498237385, "grad_norm": 0.2045404314994812, "learning_rate": 4.979956133683685e-06, "loss": 1.1701, "step": 2770 }, { "epoch": 0.020123491642960036, "grad_norm": 0.18745577335357666, "learning_rate": 4.9798837470230986e-06, "loss": 1.1736, "step": 2780 }, { "epoch": 0.020195878303546223, "grad_norm": 0.2338806539773941, "learning_rate": 4.979811360362512e-06, "loss": 1.1565, "step": 2790 }, { "epoch": 0.02026826496413241, "grad_norm": 0.21875229477882385, "learning_rate": 4.979738973701927e-06, "loss": 1.1576, "step": 2800 }, { "epoch": 0.020340651624718598, "grad_norm": 0.18577270209789276, "learning_rate": 4.97966658704134e-06, "loss": 1.1519, "step": 2810 }, { "epoch": 0.020413038285304785, "grad_norm": 0.20935316383838654, "learning_rate": 4.979594200380754e-06, "loss": 1.168, "step": 2820 }, { "epoch": 0.020485424945890972, "grad_norm": 0.2035789042711258, "learning_rate": 4.9795218137201675e-06, "loss": 1.1657, "step": 2830 }, { "epoch": 0.02055781160647716, "grad_norm": 0.1842500865459442, "learning_rate": 4.979449427059582e-06, "loss": 1.1898, "step": 2840 }, { "epoch": 0.020630198267063346, "grad_norm": 0.18557128310203552, "learning_rate": 4.9793770403989956e-06, "loss": 1.1708, "step": 2850 }, { "epoch": 0.020702584927649533, "grad_norm": 0.19317933917045593, "learning_rate": 4.979304653738409e-06, "loss": 1.1572, "step": 2860 }, { "epoch": 0.02077497158823572, "grad_norm": 0.17931902408599854, "learning_rate": 4.979232267077823e-06, "loss": 1.1581, "step": 2870 }, { "epoch": 0.020847358248821907, "grad_norm": 0.20684286952018738, "learning_rate": 4.979159880417237e-06, "loss": 1.1828, "step": 2880 }, { "epoch": 0.020919744909408095, "grad_norm": 0.19323229789733887, "learning_rate": 4.979087493756651e-06, "loss": 1.1456, "step": 2890 }, { "epoch": 0.02099213156999428, "grad_norm": 0.19767233729362488, "learning_rate": 4.9790151070960645e-06, "loss": 1.1742, "step": 2900 }, { "epoch": 0.02106451823058047, "grad_norm": 0.2398158460855484, "learning_rate": 4.978942720435478e-06, "loss": 1.1721, "step": 2910 }, { "epoch": 0.021136904891166656, "grad_norm": 0.20536300539970398, "learning_rate": 4.978870333774893e-06, "loss": 1.158, "step": 2920 }, { "epoch": 0.021209291551752843, "grad_norm": 0.20128795504570007, "learning_rate": 4.978797947114306e-06, "loss": 1.1632, "step": 2930 }, { "epoch": 0.02128167821233903, "grad_norm": 0.19307374954223633, "learning_rate": 4.97872556045372e-06, "loss": 1.1718, "step": 2940 }, { "epoch": 0.021354064872925217, "grad_norm": 0.19423744082450867, "learning_rate": 4.9786531737931334e-06, "loss": 1.1568, "step": 2950 }, { "epoch": 0.021426451533511404, "grad_norm": 0.2026156485080719, "learning_rate": 4.978580787132548e-06, "loss": 1.168, "step": 2960 }, { "epoch": 0.02149883819409759, "grad_norm": 0.21222802996635437, "learning_rate": 4.9785084004719615e-06, "loss": 1.1588, "step": 2970 }, { "epoch": 0.02157122485468378, "grad_norm": 0.1813318431377411, "learning_rate": 4.978436013811375e-06, "loss": 1.1752, "step": 2980 }, { "epoch": 0.021643611515269966, "grad_norm": 0.1816568523645401, "learning_rate": 4.978363627150789e-06, "loss": 1.1586, "step": 2990 }, { "epoch": 0.021715998175856153, "grad_norm": 0.18530064821243286, "learning_rate": 4.978291240490203e-06, "loss": 1.1684, "step": 3000 }, { "epoch": 0.02178838483644234, "grad_norm": 0.2098444104194641, "learning_rate": 4.978218853829617e-06, "loss": 1.1685, "step": 3010 }, { "epoch": 0.021860771497028527, "grad_norm": 0.17709197103977203, "learning_rate": 4.9781464671690304e-06, "loss": 1.1557, "step": 3020 }, { "epoch": 0.021933158157614714, "grad_norm": 0.21442793309688568, "learning_rate": 4.978074080508444e-06, "loss": 1.1649, "step": 3030 }, { "epoch": 0.0220055448182009, "grad_norm": 0.1951083093881607, "learning_rate": 4.9780016938478585e-06, "loss": 1.1499, "step": 3040 }, { "epoch": 0.02207793147878709, "grad_norm": 0.18640479445457458, "learning_rate": 4.977929307187272e-06, "loss": 1.1664, "step": 3050 }, { "epoch": 0.022150318139373276, "grad_norm": 0.18357819318771362, "learning_rate": 4.977856920526686e-06, "loss": 1.1744, "step": 3060 }, { "epoch": 0.022222704799959463, "grad_norm": 0.23373299837112427, "learning_rate": 4.977784533866099e-06, "loss": 1.1594, "step": 3070 }, { "epoch": 0.02229509146054565, "grad_norm": 0.2267906218767166, "learning_rate": 4.977712147205514e-06, "loss": 1.161, "step": 3080 }, { "epoch": 0.022367478121131837, "grad_norm": 0.19738420844078064, "learning_rate": 4.9776397605449274e-06, "loss": 1.1524, "step": 3090 }, { "epoch": 0.022439864781718024, "grad_norm": 0.22471626102924347, "learning_rate": 4.977567373884341e-06, "loss": 1.1426, "step": 3100 }, { "epoch": 0.02251225144230421, "grad_norm": 0.18693824112415314, "learning_rate": 4.977494987223755e-06, "loss": 1.1516, "step": 3110 }, { "epoch": 0.022584638102890398, "grad_norm": 0.29740896821022034, "learning_rate": 4.977422600563169e-06, "loss": 1.1545, "step": 3120 }, { "epoch": 0.022657024763476585, "grad_norm": 0.19464527070522308, "learning_rate": 4.977350213902583e-06, "loss": 1.1474, "step": 3130 }, { "epoch": 0.022729411424062772, "grad_norm": 0.19471614062786102, "learning_rate": 4.977277827241996e-06, "loss": 1.1441, "step": 3140 }, { "epoch": 0.02280179808464896, "grad_norm": 0.1917818784713745, "learning_rate": 4.97720544058141e-06, "loss": 1.1588, "step": 3150 }, { "epoch": 0.022874184745235147, "grad_norm": 0.20581530034542084, "learning_rate": 4.977133053920824e-06, "loss": 1.1644, "step": 3160 }, { "epoch": 0.022946571405821334, "grad_norm": 0.19986987113952637, "learning_rate": 4.977060667260238e-06, "loss": 1.153, "step": 3170 }, { "epoch": 0.02301895806640752, "grad_norm": 0.195315420627594, "learning_rate": 4.976988280599652e-06, "loss": 1.1563, "step": 3180 }, { "epoch": 0.023091344726993708, "grad_norm": 0.2056894749403, "learning_rate": 4.976915893939065e-06, "loss": 1.1605, "step": 3190 }, { "epoch": 0.023163731387579895, "grad_norm": 0.19704991579055786, "learning_rate": 4.976843507278479e-06, "loss": 1.1635, "step": 3200 }, { "epoch": 0.023236118048166082, "grad_norm": 0.2168043702840805, "learning_rate": 4.976771120617893e-06, "loss": 1.165, "step": 3210 }, { "epoch": 0.02330850470875227, "grad_norm": 0.21489858627319336, "learning_rate": 4.976698733957307e-06, "loss": 1.1592, "step": 3220 }, { "epoch": 0.02338089136933846, "grad_norm": 0.18227837979793549, "learning_rate": 4.976626347296721e-06, "loss": 1.1573, "step": 3230 }, { "epoch": 0.023453278029924647, "grad_norm": 0.20387127995491028, "learning_rate": 4.976553960636134e-06, "loss": 1.1611, "step": 3240 }, { "epoch": 0.023525664690510834, "grad_norm": 0.17616231739521027, "learning_rate": 4.976481573975549e-06, "loss": 1.1614, "step": 3250 }, { "epoch": 0.02359805135109702, "grad_norm": 0.1885817050933838, "learning_rate": 4.976409187314962e-06, "loss": 1.152, "step": 3260 }, { "epoch": 0.02367043801168321, "grad_norm": 0.22119595110416412, "learning_rate": 4.976336800654376e-06, "loss": 1.1541, "step": 3270 }, { "epoch": 0.023742824672269396, "grad_norm": 0.24721552431583405, "learning_rate": 4.9762644139937895e-06, "loss": 1.1419, "step": 3280 }, { "epoch": 0.023815211332855583, "grad_norm": 0.20840491354465485, "learning_rate": 4.976192027333203e-06, "loss": 1.157, "step": 3290 }, { "epoch": 0.02388759799344177, "grad_norm": 0.1903543770313263, "learning_rate": 4.976119640672617e-06, "loss": 1.1492, "step": 3300 }, { "epoch": 0.023959984654027957, "grad_norm": 0.1851651966571808, "learning_rate": 4.976047254012031e-06, "loss": 1.1473, "step": 3310 }, { "epoch": 0.024032371314614144, "grad_norm": 0.18348610401153564, "learning_rate": 4.975974867351445e-06, "loss": 1.1615, "step": 3320 }, { "epoch": 0.02410475797520033, "grad_norm": 0.19050352275371552, "learning_rate": 4.9759024806908585e-06, "loss": 1.1705, "step": 3330 }, { "epoch": 0.02417714463578652, "grad_norm": 0.2005050927400589, "learning_rate": 4.975830094030272e-06, "loss": 1.1582, "step": 3340 }, { "epoch": 0.024249531296372705, "grad_norm": 0.19381307065486908, "learning_rate": 4.975757707369686e-06, "loss": 1.165, "step": 3350 }, { "epoch": 0.024321917956958893, "grad_norm": 0.1775844395160675, "learning_rate": 4.9756853207091e-06, "loss": 1.1466, "step": 3360 }, { "epoch": 0.02439430461754508, "grad_norm": 0.1731746643781662, "learning_rate": 4.975612934048514e-06, "loss": 1.1399, "step": 3370 }, { "epoch": 0.024466691278131267, "grad_norm": 0.2276047319173813, "learning_rate": 4.975540547387927e-06, "loss": 1.1665, "step": 3380 }, { "epoch": 0.024539077938717454, "grad_norm": 0.19553668797016144, "learning_rate": 4.975468160727341e-06, "loss": 1.1632, "step": 3390 }, { "epoch": 0.02461146459930364, "grad_norm": 0.19757987558841705, "learning_rate": 4.9753957740667555e-06, "loss": 1.1549, "step": 3400 }, { "epoch": 0.024683851259889828, "grad_norm": 0.19470013678073883, "learning_rate": 4.975323387406169e-06, "loss": 1.1677, "step": 3410 }, { "epoch": 0.024756237920476015, "grad_norm": 0.20019254088401794, "learning_rate": 4.975251000745583e-06, "loss": 1.1456, "step": 3420 }, { "epoch": 0.024828624581062202, "grad_norm": 0.20071542263031006, "learning_rate": 4.975178614084996e-06, "loss": 1.149, "step": 3430 }, { "epoch": 0.02490101124164839, "grad_norm": 0.18767118453979492, "learning_rate": 4.975106227424411e-06, "loss": 1.1633, "step": 3440 }, { "epoch": 0.024973397902234577, "grad_norm": 0.19544526934623718, "learning_rate": 4.975033840763824e-06, "loss": 1.145, "step": 3450 }, { "epoch": 0.025045784562820764, "grad_norm": 0.18626125156879425, "learning_rate": 4.974961454103238e-06, "loss": 1.1457, "step": 3460 }, { "epoch": 0.02511817122340695, "grad_norm": 0.19342540204524994, "learning_rate": 4.974889067442652e-06, "loss": 1.1648, "step": 3470 }, { "epoch": 0.025190557883993138, "grad_norm": 0.20179611444473267, "learning_rate": 4.974816680782066e-06, "loss": 1.1574, "step": 3480 }, { "epoch": 0.025262944544579325, "grad_norm": 0.20184293389320374, "learning_rate": 4.97474429412148e-06, "loss": 1.1542, "step": 3490 }, { "epoch": 0.025335331205165512, "grad_norm": 0.18145424127578735, "learning_rate": 4.974671907460893e-06, "loss": 1.1478, "step": 3500 }, { "epoch": 0.0254077178657517, "grad_norm": 0.2179315686225891, "learning_rate": 4.974599520800307e-06, "loss": 1.1552, "step": 3510 }, { "epoch": 0.025480104526337886, "grad_norm": 0.20705650746822357, "learning_rate": 4.974527134139721e-06, "loss": 1.1547, "step": 3520 }, { "epoch": 0.025552491186924074, "grad_norm": 0.19656804203987122, "learning_rate": 4.974454747479135e-06, "loss": 1.1541, "step": 3530 }, { "epoch": 0.02562487784751026, "grad_norm": 0.18042577803134918, "learning_rate": 4.974382360818549e-06, "loss": 1.1442, "step": 3540 }, { "epoch": 0.025697264508096448, "grad_norm": 0.20010463893413544, "learning_rate": 4.974309974157962e-06, "loss": 1.1454, "step": 3550 }, { "epoch": 0.025769651168682635, "grad_norm": 0.17687994241714478, "learning_rate": 4.974237587497377e-06, "loss": 1.1548, "step": 3560 }, { "epoch": 0.025842037829268822, "grad_norm": 0.17953871190547943, "learning_rate": 4.97416520083679e-06, "loss": 1.1388, "step": 3570 }, { "epoch": 0.02591442448985501, "grad_norm": 0.1789717972278595, "learning_rate": 4.974092814176204e-06, "loss": 1.1522, "step": 3580 }, { "epoch": 0.025986811150441196, "grad_norm": 0.1992194801568985, "learning_rate": 4.9740204275156176e-06, "loss": 1.1413, "step": 3590 }, { "epoch": 0.026059197811027383, "grad_norm": 0.18657977879047394, "learning_rate": 4.973948040855032e-06, "loss": 1.1566, "step": 3600 }, { "epoch": 0.02613158447161357, "grad_norm": 0.1899375021457672, "learning_rate": 4.973875654194446e-06, "loss": 1.1589, "step": 3610 }, { "epoch": 0.026203971132199758, "grad_norm": 0.18726755678653717, "learning_rate": 4.973803267533859e-06, "loss": 1.1479, "step": 3620 }, { "epoch": 0.026276357792785945, "grad_norm": 0.19596411287784576, "learning_rate": 4.973730880873273e-06, "loss": 1.1555, "step": 3630 }, { "epoch": 0.026348744453372132, "grad_norm": 0.19859635829925537, "learning_rate": 4.973658494212687e-06, "loss": 1.1535, "step": 3640 }, { "epoch": 0.02642113111395832, "grad_norm": 0.2296416461467743, "learning_rate": 4.973586107552101e-06, "loss": 1.129, "step": 3650 }, { "epoch": 0.026493517774544506, "grad_norm": 0.1784886121749878, "learning_rate": 4.973513720891515e-06, "loss": 1.148, "step": 3660 }, { "epoch": 0.026565904435130693, "grad_norm": 0.19031654298305511, "learning_rate": 4.973441334230928e-06, "loss": 1.1486, "step": 3670 }, { "epoch": 0.02663829109571688, "grad_norm": 0.192035511136055, "learning_rate": 4.973368947570343e-06, "loss": 1.1436, "step": 3680 }, { "epoch": 0.026710677756303067, "grad_norm": 0.19924819469451904, "learning_rate": 4.973296560909756e-06, "loss": 1.1522, "step": 3690 }, { "epoch": 0.026783064416889255, "grad_norm": 0.17904998362064362, "learning_rate": 4.97322417424917e-06, "loss": 1.1435, "step": 3700 }, { "epoch": 0.02685545107747544, "grad_norm": 0.17476427555084229, "learning_rate": 4.9731517875885835e-06, "loss": 1.1375, "step": 3710 }, { "epoch": 0.02692783773806163, "grad_norm": 0.18144498765468597, "learning_rate": 4.973079400927998e-06, "loss": 1.1413, "step": 3720 }, { "epoch": 0.027000224398647816, "grad_norm": 0.29269012808799744, "learning_rate": 4.973007014267412e-06, "loss": 1.1434, "step": 3730 }, { "epoch": 0.027072611059234003, "grad_norm": 0.3143029510974884, "learning_rate": 4.972934627606825e-06, "loss": 1.127, "step": 3740 }, { "epoch": 0.02714499771982019, "grad_norm": 0.2110947221517563, "learning_rate": 4.972862240946239e-06, "loss": 1.1477, "step": 3750 }, { "epoch": 0.027217384380406377, "grad_norm": 0.18889427185058594, "learning_rate": 4.972789854285653e-06, "loss": 1.1458, "step": 3760 }, { "epoch": 0.027289771040992564, "grad_norm": 0.19204457104206085, "learning_rate": 4.972717467625067e-06, "loss": 1.1528, "step": 3770 }, { "epoch": 0.02736215770157875, "grad_norm": 0.19252029061317444, "learning_rate": 4.9726450809644805e-06, "loss": 1.1411, "step": 3780 }, { "epoch": 0.02743454436216494, "grad_norm": 0.1872684508562088, "learning_rate": 4.972572694303894e-06, "loss": 1.1486, "step": 3790 }, { "epoch": 0.027506931022751126, "grad_norm": 0.189093217253685, "learning_rate": 4.972500307643308e-06, "loss": 1.151, "step": 3800 }, { "epoch": 0.027579317683337316, "grad_norm": 0.19076119363307953, "learning_rate": 4.972427920982722e-06, "loss": 1.1442, "step": 3810 }, { "epoch": 0.027651704343923503, "grad_norm": 0.2054983228445053, "learning_rate": 4.972355534322135e-06, "loss": 1.149, "step": 3820 }, { "epoch": 0.02772409100450969, "grad_norm": 0.19762204587459564, "learning_rate": 4.9722831476615494e-06, "loss": 1.1373, "step": 3830 }, { "epoch": 0.027796477665095878, "grad_norm": 0.20214678347110748, "learning_rate": 4.972210761000963e-06, "loss": 1.1302, "step": 3840 }, { "epoch": 0.027868864325682065, "grad_norm": 0.19313126802444458, "learning_rate": 4.972138374340377e-06, "loss": 1.152, "step": 3850 }, { "epoch": 0.027941250986268252, "grad_norm": 0.19163060188293457, "learning_rate": 4.97206598767979e-06, "loss": 1.1401, "step": 3860 }, { "epoch": 0.02801363764685444, "grad_norm": 0.1946219801902771, "learning_rate": 4.971993601019205e-06, "loss": 1.1586, "step": 3870 }, { "epoch": 0.028086024307440626, "grad_norm": 0.18935218453407288, "learning_rate": 4.971921214358618e-06, "loss": 1.1657, "step": 3880 }, { "epoch": 0.028158410968026813, "grad_norm": 0.864253044128418, "learning_rate": 4.971848827698032e-06, "loss": 1.1543, "step": 3890 }, { "epoch": 0.028230797628613, "grad_norm": 0.1859940141439438, "learning_rate": 4.971776441037446e-06, "loss": 1.1487, "step": 3900 }, { "epoch": 0.028303184289199187, "grad_norm": 0.18587446212768555, "learning_rate": 4.97170405437686e-06, "loss": 1.1423, "step": 3910 }, { "epoch": 0.028375570949785375, "grad_norm": 0.22516964375972748, "learning_rate": 4.971631667716274e-06, "loss": 1.1224, "step": 3920 }, { "epoch": 0.02844795761037156, "grad_norm": 0.19674985110759735, "learning_rate": 4.971559281055687e-06, "loss": 1.1492, "step": 3930 }, { "epoch": 0.02852034427095775, "grad_norm": 0.1964777559041977, "learning_rate": 4.971486894395101e-06, "loss": 1.1433, "step": 3940 }, { "epoch": 0.028592730931543936, "grad_norm": 0.1828773319721222, "learning_rate": 4.9714145077345145e-06, "loss": 1.1462, "step": 3950 }, { "epoch": 0.028665117592130123, "grad_norm": 0.19454605877399445, "learning_rate": 4.971342121073929e-06, "loss": 1.137, "step": 3960 }, { "epoch": 0.02873750425271631, "grad_norm": 0.20798833668231964, "learning_rate": 4.971269734413343e-06, "loss": 1.1352, "step": 3970 }, { "epoch": 0.028809890913302497, "grad_norm": 0.1934082806110382, "learning_rate": 4.971197347752756e-06, "loss": 1.1256, "step": 3980 }, { "epoch": 0.028882277573888684, "grad_norm": 0.19269952178001404, "learning_rate": 4.97112496109217e-06, "loss": 1.147, "step": 3990 }, { "epoch": 0.02895466423447487, "grad_norm": 0.1940053403377533, "learning_rate": 4.971052574431584e-06, "loss": 1.1449, "step": 4000 }, { "epoch": 0.02902705089506106, "grad_norm": 0.18359413743019104, "learning_rate": 4.970980187770998e-06, "loss": 1.1507, "step": 4010 }, { "epoch": 0.029099437555647246, "grad_norm": 0.19637279212474823, "learning_rate": 4.9709078011104115e-06, "loss": 1.14, "step": 4020 }, { "epoch": 0.029171824216233433, "grad_norm": 0.19355088472366333, "learning_rate": 4.970835414449825e-06, "loss": 1.1521, "step": 4030 }, { "epoch": 0.02924421087681962, "grad_norm": 0.18721257150173187, "learning_rate": 4.97076302778924e-06, "loss": 1.1391, "step": 4040 }, { "epoch": 0.029316597537405807, "grad_norm": 0.2009786069393158, "learning_rate": 4.970690641128653e-06, "loss": 1.1205, "step": 4050 }, { "epoch": 0.029388984197991994, "grad_norm": 0.19062365591526031, "learning_rate": 4.970618254468067e-06, "loss": 1.1506, "step": 4060 }, { "epoch": 0.02946137085857818, "grad_norm": 0.2137022167444229, "learning_rate": 4.9705458678074805e-06, "loss": 1.1407, "step": 4070 }, { "epoch": 0.02953375751916437, "grad_norm": 0.18308189511299133, "learning_rate": 4.970473481146895e-06, "loss": 1.1483, "step": 4080 }, { "epoch": 0.029606144179750556, "grad_norm": 0.1955224871635437, "learning_rate": 4.9704010944863085e-06, "loss": 1.1315, "step": 4090 }, { "epoch": 0.029678530840336743, "grad_norm": 0.1963161826133728, "learning_rate": 4.970328707825722e-06, "loss": 1.1519, "step": 4100 }, { "epoch": 0.02975091750092293, "grad_norm": 0.20369768142700195, "learning_rate": 4.970256321165136e-06, "loss": 1.1286, "step": 4110 }, { "epoch": 0.029823304161509117, "grad_norm": 0.18953342735767365, "learning_rate": 4.97018393450455e-06, "loss": 1.1546, "step": 4120 }, { "epoch": 0.029895690822095304, "grad_norm": 0.22297842800617218, "learning_rate": 4.970111547843964e-06, "loss": 1.1373, "step": 4130 }, { "epoch": 0.02996807748268149, "grad_norm": 0.23974451422691345, "learning_rate": 4.9700391611833775e-06, "loss": 1.1384, "step": 4140 }, { "epoch": 0.03004046414326768, "grad_norm": 0.2132536619901657, "learning_rate": 4.969966774522791e-06, "loss": 1.145, "step": 4150 }, { "epoch": 0.030112850803853865, "grad_norm": 0.18748712539672852, "learning_rate": 4.9698943878622056e-06, "loss": 1.129, "step": 4160 }, { "epoch": 0.030185237464440053, "grad_norm": 0.18784211575984955, "learning_rate": 4.969822001201619e-06, "loss": 1.1376, "step": 4170 }, { "epoch": 0.03025762412502624, "grad_norm": 0.19498911499977112, "learning_rate": 4.969749614541033e-06, "loss": 1.1282, "step": 4180 }, { "epoch": 0.030330010785612427, "grad_norm": 0.19395703077316284, "learning_rate": 4.969677227880446e-06, "loss": 1.1319, "step": 4190 }, { "epoch": 0.030402397446198614, "grad_norm": 0.1815170794725418, "learning_rate": 4.969604841219861e-06, "loss": 1.135, "step": 4200 }, { "epoch": 0.0304747841067848, "grad_norm": 0.18967507779598236, "learning_rate": 4.9695324545592745e-06, "loss": 1.1281, "step": 4210 }, { "epoch": 0.030547170767370988, "grad_norm": 0.19109271466732025, "learning_rate": 4.969460067898688e-06, "loss": 1.1424, "step": 4220 }, { "epoch": 0.030619557427957175, "grad_norm": 0.23208124935626984, "learning_rate": 4.969387681238102e-06, "loss": 1.1408, "step": 4230 }, { "epoch": 0.030691944088543362, "grad_norm": 0.18746989965438843, "learning_rate": 4.969315294577516e-06, "loss": 1.143, "step": 4240 }, { "epoch": 0.03076433074912955, "grad_norm": 0.21662504971027374, "learning_rate": 4.96924290791693e-06, "loss": 1.1353, "step": 4250 }, { "epoch": 0.030836717409715737, "grad_norm": 0.2007625848054886, "learning_rate": 4.969170521256343e-06, "loss": 1.1345, "step": 4260 }, { "epoch": 0.030909104070301924, "grad_norm": 0.18364295363426208, "learning_rate": 4.969098134595757e-06, "loss": 1.1357, "step": 4270 }, { "epoch": 0.03098149073088811, "grad_norm": 0.19245269894599915, "learning_rate": 4.9690257479351715e-06, "loss": 1.1326, "step": 4280 }, { "epoch": 0.031053877391474298, "grad_norm": 0.1952354460954666, "learning_rate": 4.968953361274585e-06, "loss": 1.1253, "step": 4290 }, { "epoch": 0.031126264052060485, "grad_norm": 0.18464967608451843, "learning_rate": 4.968880974613999e-06, "loss": 1.1324, "step": 4300 }, { "epoch": 0.031198650712646672, "grad_norm": 0.19587458670139313, "learning_rate": 4.968808587953412e-06, "loss": 1.1399, "step": 4310 }, { "epoch": 0.03127103737323286, "grad_norm": 0.1744762659072876, "learning_rate": 4.968736201292827e-06, "loss": 1.1389, "step": 4320 }, { "epoch": 0.031343424033819046, "grad_norm": 0.17935672402381897, "learning_rate": 4.96866381463224e-06, "loss": 1.1392, "step": 4330 }, { "epoch": 0.03141581069440524, "grad_norm": 0.19451332092285156, "learning_rate": 4.968591427971654e-06, "loss": 1.1409, "step": 4340 }, { "epoch": 0.03148819735499142, "grad_norm": 0.20897798240184784, "learning_rate": 4.968519041311068e-06, "loss": 1.1205, "step": 4350 }, { "epoch": 0.03156058401557761, "grad_norm": 0.20010434091091156, "learning_rate": 4.968446654650481e-06, "loss": 1.1408, "step": 4360 }, { "epoch": 0.031632970676163795, "grad_norm": 0.19705431163311005, "learning_rate": 4.968374267989895e-06, "loss": 1.1392, "step": 4370 }, { "epoch": 0.031705357336749986, "grad_norm": 0.19512054324150085, "learning_rate": 4.9683018813293085e-06, "loss": 1.1481, "step": 4380 }, { "epoch": 0.03177774399733617, "grad_norm": 0.2135220468044281, "learning_rate": 4.968229494668723e-06, "loss": 1.1294, "step": 4390 }, { "epoch": 0.03185013065792236, "grad_norm": 0.18849720060825348, "learning_rate": 4.9681571080081366e-06, "loss": 1.137, "step": 4400 }, { "epoch": 0.03192251731850854, "grad_norm": 0.18328508734703064, "learning_rate": 4.96808472134755e-06, "loss": 1.1302, "step": 4410 }, { "epoch": 0.031994903979094734, "grad_norm": 0.19263693690299988, "learning_rate": 4.968012334686964e-06, "loss": 1.1344, "step": 4420 }, { "epoch": 0.03206729063968092, "grad_norm": 0.17624565958976746, "learning_rate": 4.967939948026378e-06, "loss": 1.1283, "step": 4430 }, { "epoch": 0.03213967730026711, "grad_norm": 0.2214570790529251, "learning_rate": 4.967867561365792e-06, "loss": 1.1212, "step": 4440 }, { "epoch": 0.03221206396085329, "grad_norm": 0.19538843631744385, "learning_rate": 4.9677951747052055e-06, "loss": 1.1519, "step": 4450 }, { "epoch": 0.03228445062143948, "grad_norm": 0.19535070657730103, "learning_rate": 4.967722788044619e-06, "loss": 1.1206, "step": 4460 }, { "epoch": 0.032356837282025666, "grad_norm": 0.20394045114517212, "learning_rate": 4.967650401384034e-06, "loss": 1.1546, "step": 4470 }, { "epoch": 0.03242922394261186, "grad_norm": 0.18652796745300293, "learning_rate": 4.967578014723447e-06, "loss": 1.1371, "step": 4480 }, { "epoch": 0.03250161060319804, "grad_norm": 0.18646922707557678, "learning_rate": 4.967505628062861e-06, "loss": 1.1479, "step": 4490 }, { "epoch": 0.03257399726378423, "grad_norm": 0.1993158608675003, "learning_rate": 4.9674332414022744e-06, "loss": 1.1402, "step": 4500 }, { "epoch": 0.032646383924370415, "grad_norm": 0.18076984584331512, "learning_rate": 4.967360854741689e-06, "loss": 1.1291, "step": 4510 }, { "epoch": 0.032718770584956605, "grad_norm": 0.9336857199668884, "learning_rate": 4.9672884680811025e-06, "loss": 1.136, "step": 4520 }, { "epoch": 0.03279115724554279, "grad_norm": 0.1966332644224167, "learning_rate": 4.967216081420516e-06, "loss": 1.1248, "step": 4530 }, { "epoch": 0.03286354390612898, "grad_norm": 0.20412597060203552, "learning_rate": 4.96714369475993e-06, "loss": 1.1375, "step": 4540 }, { "epoch": 0.03293593056671516, "grad_norm": 0.1918439120054245, "learning_rate": 4.967071308099344e-06, "loss": 1.1298, "step": 4550 }, { "epoch": 0.033008317227301354, "grad_norm": 0.18617042899131775, "learning_rate": 4.966998921438758e-06, "loss": 1.1299, "step": 4560 }, { "epoch": 0.03308070388788754, "grad_norm": 0.18576890230178833, "learning_rate": 4.9669265347781714e-06, "loss": 1.127, "step": 4570 }, { "epoch": 0.03315309054847373, "grad_norm": 0.18330788612365723, "learning_rate": 4.966854148117585e-06, "loss": 1.1222, "step": 4580 }, { "epoch": 0.03322547720905991, "grad_norm": 0.19359582662582397, "learning_rate": 4.966781761456999e-06, "loss": 1.1218, "step": 4590 }, { "epoch": 0.0332978638696461, "grad_norm": 0.19401496648788452, "learning_rate": 4.966709374796413e-06, "loss": 1.1386, "step": 4600 }, { "epoch": 0.033370250530232286, "grad_norm": 0.18727423250675201, "learning_rate": 4.966636988135827e-06, "loss": 1.1225, "step": 4610 }, { "epoch": 0.033442637190818476, "grad_norm": 0.19162550568580627, "learning_rate": 4.96656460147524e-06, "loss": 1.1226, "step": 4620 }, { "epoch": 0.03351502385140466, "grad_norm": 0.18989378213882446, "learning_rate": 4.966492214814654e-06, "loss": 1.1409, "step": 4630 }, { "epoch": 0.03358741051199085, "grad_norm": 0.17028242349624634, "learning_rate": 4.9664198281540684e-06, "loss": 1.1353, "step": 4640 }, { "epoch": 0.033659797172577034, "grad_norm": 0.18227070569992065, "learning_rate": 4.966347441493482e-06, "loss": 1.136, "step": 4650 }, { "epoch": 0.033732183833163225, "grad_norm": 0.1902882605791092, "learning_rate": 4.966275054832896e-06, "loss": 1.1392, "step": 4660 }, { "epoch": 0.03380457049374941, "grad_norm": 0.17739436030387878, "learning_rate": 4.966202668172309e-06, "loss": 1.1258, "step": 4670 }, { "epoch": 0.0338769571543356, "grad_norm": 0.187397763133049, "learning_rate": 4.966130281511724e-06, "loss": 1.1261, "step": 4680 }, { "epoch": 0.03394934381492179, "grad_norm": 0.17765313386917114, "learning_rate": 4.966057894851137e-06, "loss": 1.1252, "step": 4690 }, { "epoch": 0.03402173047550797, "grad_norm": 0.18505384027957916, "learning_rate": 4.965985508190551e-06, "loss": 1.1257, "step": 4700 }, { "epoch": 0.034094117136094164, "grad_norm": 0.19399258494377136, "learning_rate": 4.965913121529965e-06, "loss": 1.1236, "step": 4710 }, { "epoch": 0.03416650379668035, "grad_norm": 0.17900791764259338, "learning_rate": 4.965840734869379e-06, "loss": 1.13, "step": 4720 }, { "epoch": 0.03423889045726654, "grad_norm": 0.17912939190864563, "learning_rate": 4.965768348208793e-06, "loss": 1.1416, "step": 4730 }, { "epoch": 0.03431127711785272, "grad_norm": 0.1887941062450409, "learning_rate": 4.965695961548206e-06, "loss": 1.1308, "step": 4740 }, { "epoch": 0.03438366377843891, "grad_norm": 0.19085676968097687, "learning_rate": 4.96562357488762e-06, "loss": 1.1374, "step": 4750 }, { "epoch": 0.034456050439025096, "grad_norm": 0.1932060867547989, "learning_rate": 4.965551188227034e-06, "loss": 1.1412, "step": 4760 }, { "epoch": 0.03452843709961129, "grad_norm": 0.1827419102191925, "learning_rate": 4.965478801566448e-06, "loss": 1.1221, "step": 4770 }, { "epoch": 0.03460082376019747, "grad_norm": 0.31538698077201843, "learning_rate": 4.965406414905862e-06, "loss": 1.1423, "step": 4780 }, { "epoch": 0.03467321042078366, "grad_norm": 0.23767271637916565, "learning_rate": 4.965334028245275e-06, "loss": 1.1166, "step": 4790 }, { "epoch": 0.034745597081369844, "grad_norm": 0.1903272569179535, "learning_rate": 4.96526164158469e-06, "loss": 1.1551, "step": 4800 }, { "epoch": 0.034817983741956035, "grad_norm": 0.1915259212255478, "learning_rate": 4.965189254924103e-06, "loss": 1.1427, "step": 4810 }, { "epoch": 0.03489037040254222, "grad_norm": 0.18546457588672638, "learning_rate": 4.965116868263517e-06, "loss": 1.1335, "step": 4820 }, { "epoch": 0.03496275706312841, "grad_norm": 0.17920000851154327, "learning_rate": 4.9650444816029305e-06, "loss": 1.1362, "step": 4830 }, { "epoch": 0.03503514372371459, "grad_norm": 0.1886454075574875, "learning_rate": 4.964972094942345e-06, "loss": 1.1328, "step": 4840 }, { "epoch": 0.035107530384300784, "grad_norm": 0.18787150084972382, "learning_rate": 4.964899708281759e-06, "loss": 1.1359, "step": 4850 }, { "epoch": 0.03517991704488697, "grad_norm": 0.18325144052505493, "learning_rate": 4.964827321621172e-06, "loss": 1.1281, "step": 4860 }, { "epoch": 0.03525230370547316, "grad_norm": 0.17951847612857819, "learning_rate": 4.964754934960586e-06, "loss": 1.1304, "step": 4870 }, { "epoch": 0.03532469036605934, "grad_norm": 0.17793092131614685, "learning_rate": 4.9646825482999995e-06, "loss": 1.1348, "step": 4880 }, { "epoch": 0.03539707702664553, "grad_norm": 0.19709095358848572, "learning_rate": 4.964610161639413e-06, "loss": 1.1046, "step": 4890 }, { "epoch": 0.035469463687231716, "grad_norm": 0.2029866725206375, "learning_rate": 4.964537774978827e-06, "loss": 1.1269, "step": 4900 }, { "epoch": 0.035541850347817906, "grad_norm": 0.1873743087053299, "learning_rate": 4.964465388318241e-06, "loss": 1.1214, "step": 4910 }, { "epoch": 0.03561423700840409, "grad_norm": 0.18011152744293213, "learning_rate": 4.964393001657655e-06, "loss": 1.1244, "step": 4920 }, { "epoch": 0.03568662366899028, "grad_norm": 0.2078821361064911, "learning_rate": 4.964320614997068e-06, "loss": 1.123, "step": 4930 }, { "epoch": 0.035759010329576464, "grad_norm": 0.22960899770259857, "learning_rate": 4.964248228336482e-06, "loss": 1.1354, "step": 4940 }, { "epoch": 0.035831396990162655, "grad_norm": 0.19076983630657196, "learning_rate": 4.9641758416758965e-06, "loss": 1.1237, "step": 4950 }, { "epoch": 0.03590378365074884, "grad_norm": 0.19131101667881012, "learning_rate": 4.96410345501531e-06, "loss": 1.1253, "step": 4960 }, { "epoch": 0.03597617031133503, "grad_norm": 0.1835022121667862, "learning_rate": 4.964031068354724e-06, "loss": 1.1338, "step": 4970 }, { "epoch": 0.03604855697192121, "grad_norm": 0.17867615818977356, "learning_rate": 4.963958681694137e-06, "loss": 1.112, "step": 4980 }, { "epoch": 0.0361209436325074, "grad_norm": 0.18737174570560455, "learning_rate": 4.963886295033552e-06, "loss": 1.1277, "step": 4990 }, { "epoch": 0.03619333029309359, "grad_norm": 0.21853433549404144, "learning_rate": 4.963813908372965e-06, "loss": 1.1324, "step": 5000 }, { "epoch": 0.03626571695367978, "grad_norm": 0.18206347525119781, "learning_rate": 4.963741521712379e-06, "loss": 1.1275, "step": 5010 }, { "epoch": 0.03633810361426596, "grad_norm": 0.20839586853981018, "learning_rate": 4.963669135051793e-06, "loss": 1.1235, "step": 5020 }, { "epoch": 0.03641049027485215, "grad_norm": 0.208791121840477, "learning_rate": 4.963596748391207e-06, "loss": 1.1197, "step": 5030 }, { "epoch": 0.036482876935438335, "grad_norm": 0.18400295078754425, "learning_rate": 4.963524361730621e-06, "loss": 1.1413, "step": 5040 }, { "epoch": 0.036555263596024526, "grad_norm": 0.1914309412240982, "learning_rate": 4.963451975070034e-06, "loss": 1.1389, "step": 5050 }, { "epoch": 0.03662765025661071, "grad_norm": 0.18529996275901794, "learning_rate": 4.963379588409448e-06, "loss": 1.1152, "step": 5060 }, { "epoch": 0.0367000369171969, "grad_norm": 0.1869628131389618, "learning_rate": 4.963307201748862e-06, "loss": 1.1276, "step": 5070 }, { "epoch": 0.036772423577783084, "grad_norm": 0.1835557073354721, "learning_rate": 4.963234815088276e-06, "loss": 1.1408, "step": 5080 }, { "epoch": 0.036844810238369274, "grad_norm": 0.19675718247890472, "learning_rate": 4.96316242842769e-06, "loss": 1.1053, "step": 5090 }, { "epoch": 0.03691719689895546, "grad_norm": 0.18781916797161102, "learning_rate": 4.963090041767103e-06, "loss": 1.1331, "step": 5100 }, { "epoch": 0.03698958355954165, "grad_norm": 0.24024321138858795, "learning_rate": 4.963017655106518e-06, "loss": 1.1233, "step": 5110 }, { "epoch": 0.03706197022012783, "grad_norm": 0.17619359493255615, "learning_rate": 4.962945268445931e-06, "loss": 1.123, "step": 5120 }, { "epoch": 0.03713435688071402, "grad_norm": 0.19211329519748688, "learning_rate": 4.962872881785345e-06, "loss": 1.1162, "step": 5130 }, { "epoch": 0.037206743541300207, "grad_norm": 0.18998998403549194, "learning_rate": 4.9628004951247586e-06, "loss": 1.1298, "step": 5140 }, { "epoch": 0.0372791302018864, "grad_norm": 0.18230006098747253, "learning_rate": 4.962728108464173e-06, "loss": 1.1313, "step": 5150 }, { "epoch": 0.03735151686247258, "grad_norm": 0.18055368959903717, "learning_rate": 4.962655721803587e-06, "loss": 1.1395, "step": 5160 }, { "epoch": 0.03742390352305877, "grad_norm": 0.1782815009355545, "learning_rate": 4.962583335143e-06, "loss": 1.1295, "step": 5170 }, { "epoch": 0.037496290183644955, "grad_norm": 0.21609275043010712, "learning_rate": 4.962510948482414e-06, "loss": 1.1273, "step": 5180 }, { "epoch": 0.037568676844231146, "grad_norm": 0.19607985019683838, "learning_rate": 4.962438561821828e-06, "loss": 1.116, "step": 5190 }, { "epoch": 0.03764106350481733, "grad_norm": 0.26767560839653015, "learning_rate": 4.962366175161242e-06, "loss": 1.1085, "step": 5200 }, { "epoch": 0.03771345016540352, "grad_norm": 0.2847156524658203, "learning_rate": 4.962293788500656e-06, "loss": 1.1067, "step": 5210 }, { "epoch": 0.0377858368259897, "grad_norm": 0.20309562981128693, "learning_rate": 4.962221401840069e-06, "loss": 1.1204, "step": 5220 }, { "epoch": 0.037858223486575894, "grad_norm": 0.20030486583709717, "learning_rate": 4.962149015179483e-06, "loss": 1.1203, "step": 5230 }, { "epoch": 0.03793061014716208, "grad_norm": 0.16900622844696045, "learning_rate": 4.962076628518897e-06, "loss": 1.1398, "step": 5240 }, { "epoch": 0.03800299680774827, "grad_norm": 0.18107786774635315, "learning_rate": 4.962004241858311e-06, "loss": 1.1224, "step": 5250 }, { "epoch": 0.03807538346833445, "grad_norm": 0.19224514067173004, "learning_rate": 4.9619318551977245e-06, "loss": 1.1198, "step": 5260 }, { "epoch": 0.03814777012892064, "grad_norm": 0.18875695765018463, "learning_rate": 4.961859468537138e-06, "loss": 1.1281, "step": 5270 }, { "epoch": 0.03822015678950683, "grad_norm": 0.20869621634483337, "learning_rate": 4.961787081876553e-06, "loss": 1.0947, "step": 5280 }, { "epoch": 0.03829254345009302, "grad_norm": 0.18768785893917084, "learning_rate": 4.961714695215966e-06, "loss": 1.1132, "step": 5290 }, { "epoch": 0.03836493011067921, "grad_norm": 0.19424548745155334, "learning_rate": 4.96164230855538e-06, "loss": 1.1311, "step": 5300 }, { "epoch": 0.03843731677126539, "grad_norm": 0.18979331851005554, "learning_rate": 4.9615699218947934e-06, "loss": 1.1254, "step": 5310 }, { "epoch": 0.03850970343185158, "grad_norm": 0.18203093111515045, "learning_rate": 4.961497535234208e-06, "loss": 1.133, "step": 5320 }, { "epoch": 0.038582090092437765, "grad_norm": 0.1737833172082901, "learning_rate": 4.9614251485736215e-06, "loss": 1.1132, "step": 5330 }, { "epoch": 0.038654476753023956, "grad_norm": 0.18952320516109467, "learning_rate": 4.961352761913035e-06, "loss": 1.1311, "step": 5340 }, { "epoch": 0.03872686341361014, "grad_norm": 0.25203409790992737, "learning_rate": 4.961280375252449e-06, "loss": 1.1111, "step": 5350 }, { "epoch": 0.03879925007419633, "grad_norm": 0.20354746282100677, "learning_rate": 4.961207988591863e-06, "loss": 1.1149, "step": 5360 }, { "epoch": 0.038871636734782514, "grad_norm": 0.21158255636692047, "learning_rate": 4.961135601931277e-06, "loss": 1.1342, "step": 5370 }, { "epoch": 0.038944023395368704, "grad_norm": 0.19399100542068481, "learning_rate": 4.9610632152706904e-06, "loss": 1.1173, "step": 5380 }, { "epoch": 0.03901641005595489, "grad_norm": 0.18055978417396545, "learning_rate": 4.960990828610104e-06, "loss": 1.1228, "step": 5390 }, { "epoch": 0.03908879671654108, "grad_norm": 0.17884358763694763, "learning_rate": 4.9609184419495185e-06, "loss": 1.1214, "step": 5400 }, { "epoch": 0.03916118337712726, "grad_norm": 0.1775633841753006, "learning_rate": 4.960846055288932e-06, "loss": 1.121, "step": 5410 }, { "epoch": 0.03923357003771345, "grad_norm": 0.18697598576545715, "learning_rate": 4.960773668628345e-06, "loss": 1.147, "step": 5420 }, { "epoch": 0.039305956698299636, "grad_norm": 0.8238439559936523, "learning_rate": 4.960701281967759e-06, "loss": 1.1239, "step": 5430 }, { "epoch": 0.03937834335888583, "grad_norm": 0.18985433876514435, "learning_rate": 4.960628895307173e-06, "loss": 1.1123, "step": 5440 }, { "epoch": 0.03945073001947201, "grad_norm": 0.18031606078147888, "learning_rate": 4.960556508646587e-06, "loss": 1.1203, "step": 5450 }, { "epoch": 0.0395231166800582, "grad_norm": 0.16933594644069672, "learning_rate": 4.960484121986e-06, "loss": 1.118, "step": 5460 }, { "epoch": 0.039595503340644385, "grad_norm": 0.1860426515340805, "learning_rate": 4.960411735325415e-06, "loss": 1.1193, "step": 5470 }, { "epoch": 0.039667890001230575, "grad_norm": 0.18198014795780182, "learning_rate": 4.960339348664828e-06, "loss": 1.1229, "step": 5480 }, { "epoch": 0.03974027666181676, "grad_norm": 0.19671514630317688, "learning_rate": 4.960266962004242e-06, "loss": 1.1263, "step": 5490 }, { "epoch": 0.03981266332240295, "grad_norm": 0.18220868706703186, "learning_rate": 4.9601945753436555e-06, "loss": 1.122, "step": 5500 }, { "epoch": 0.03988504998298913, "grad_norm": 0.19989323616027832, "learning_rate": 4.96012218868307e-06, "loss": 1.1202, "step": 5510 }, { "epoch": 0.039957436643575324, "grad_norm": 0.1794479936361313, "learning_rate": 4.960049802022484e-06, "loss": 1.1221, "step": 5520 }, { "epoch": 0.04002982330416151, "grad_norm": 0.23208436369895935, "learning_rate": 4.959977415361897e-06, "loss": 1.1197, "step": 5530 }, { "epoch": 0.0401022099647477, "grad_norm": 0.20182034373283386, "learning_rate": 4.959905028701311e-06, "loss": 1.1272, "step": 5540 }, { "epoch": 0.04017459662533388, "grad_norm": 0.18753409385681152, "learning_rate": 4.959832642040725e-06, "loss": 1.1078, "step": 5550 }, { "epoch": 0.04024698328592007, "grad_norm": 0.20286722481250763, "learning_rate": 4.959760255380139e-06, "loss": 1.1251, "step": 5560 }, { "epoch": 0.040319369946506256, "grad_norm": 0.21133583784103394, "learning_rate": 4.9596878687195525e-06, "loss": 1.1299, "step": 5570 }, { "epoch": 0.04039175660709245, "grad_norm": 0.1748242825269699, "learning_rate": 4.959615482058966e-06, "loss": 1.1263, "step": 5580 }, { "epoch": 0.04046414326767863, "grad_norm": 0.19774052500724792, "learning_rate": 4.959543095398381e-06, "loss": 1.1134, "step": 5590 }, { "epoch": 0.04053652992826482, "grad_norm": 0.2021917700767517, "learning_rate": 4.959470708737794e-06, "loss": 1.1222, "step": 5600 }, { "epoch": 0.040608916588851005, "grad_norm": 0.20801125466823578, "learning_rate": 4.959398322077208e-06, "loss": 1.1187, "step": 5610 }, { "epoch": 0.040681303249437195, "grad_norm": 0.17507006227970123, "learning_rate": 4.9593259354166215e-06, "loss": 1.1209, "step": 5620 }, { "epoch": 0.04075368991002338, "grad_norm": 0.1864837110042572, "learning_rate": 4.959253548756036e-06, "loss": 1.1278, "step": 5630 }, { "epoch": 0.04082607657060957, "grad_norm": 0.17824417352676392, "learning_rate": 4.9591811620954495e-06, "loss": 1.114, "step": 5640 }, { "epoch": 0.04089846323119575, "grad_norm": 0.18606680631637573, "learning_rate": 4.959108775434863e-06, "loss": 1.133, "step": 5650 }, { "epoch": 0.040970849891781944, "grad_norm": 0.1736188679933548, "learning_rate": 4.959036388774277e-06, "loss": 1.1071, "step": 5660 }, { "epoch": 0.04104323655236813, "grad_norm": 0.1926575005054474, "learning_rate": 4.958964002113691e-06, "loss": 1.1257, "step": 5670 }, { "epoch": 0.04111562321295432, "grad_norm": 0.1755741983652115, "learning_rate": 4.958891615453105e-06, "loss": 1.108, "step": 5680 }, { "epoch": 0.0411880098735405, "grad_norm": 0.19758310914039612, "learning_rate": 4.9588192287925185e-06, "loss": 1.1305, "step": 5690 }, { "epoch": 0.04126039653412669, "grad_norm": 0.21883004903793335, "learning_rate": 4.958746842131932e-06, "loss": 1.1153, "step": 5700 }, { "epoch": 0.041332783194712876, "grad_norm": 0.19593234360218048, "learning_rate": 4.9586744554713466e-06, "loss": 1.1249, "step": 5710 }, { "epoch": 0.041405169855299066, "grad_norm": 0.19684365391731262, "learning_rate": 4.95860206881076e-06, "loss": 1.1193, "step": 5720 }, { "epoch": 0.04147755651588525, "grad_norm": 0.19557367265224457, "learning_rate": 4.958529682150174e-06, "loss": 1.1251, "step": 5730 }, { "epoch": 0.04154994317647144, "grad_norm": 0.21626479923725128, "learning_rate": 4.958457295489587e-06, "loss": 1.1123, "step": 5740 }, { "epoch": 0.041622329837057624, "grad_norm": 0.18775388598442078, "learning_rate": 4.958384908829002e-06, "loss": 1.128, "step": 5750 }, { "epoch": 0.041694716497643815, "grad_norm": 0.19671566784381866, "learning_rate": 4.9583125221684155e-06, "loss": 1.1164, "step": 5760 }, { "epoch": 0.04176710315823, "grad_norm": 0.23783642053604126, "learning_rate": 4.958240135507829e-06, "loss": 1.1256, "step": 5770 }, { "epoch": 0.04183948981881619, "grad_norm": 0.18829865753650665, "learning_rate": 4.958167748847243e-06, "loss": 1.1186, "step": 5780 }, { "epoch": 0.04191187647940237, "grad_norm": 0.1892513930797577, "learning_rate": 4.958095362186657e-06, "loss": 1.1372, "step": 5790 }, { "epoch": 0.04198426313998856, "grad_norm": 0.2102990746498108, "learning_rate": 4.958022975526071e-06, "loss": 1.1055, "step": 5800 }, { "epoch": 0.04205664980057475, "grad_norm": 0.18884964287281036, "learning_rate": 4.957950588865484e-06, "loss": 1.1284, "step": 5810 }, { "epoch": 0.04212903646116094, "grad_norm": 0.17858710885047913, "learning_rate": 4.957878202204898e-06, "loss": 1.1122, "step": 5820 }, { "epoch": 0.04220142312174712, "grad_norm": 0.1783481240272522, "learning_rate": 4.957805815544312e-06, "loss": 1.1289, "step": 5830 }, { "epoch": 0.04227380978233331, "grad_norm": 0.18287035822868347, "learning_rate": 4.957733428883726e-06, "loss": 1.1005, "step": 5840 }, { "epoch": 0.042346196442919495, "grad_norm": 0.20345322787761688, "learning_rate": 4.95766104222314e-06, "loss": 1.1115, "step": 5850 }, { "epoch": 0.042418583103505686, "grad_norm": 0.1760493516921997, "learning_rate": 4.957588655562553e-06, "loss": 1.1119, "step": 5860 }, { "epoch": 0.04249096976409188, "grad_norm": 0.1835738718509674, "learning_rate": 4.957516268901967e-06, "loss": 1.1132, "step": 5870 }, { "epoch": 0.04256335642467806, "grad_norm": 0.17547370493412018, "learning_rate": 4.957443882241381e-06, "loss": 1.1409, "step": 5880 }, { "epoch": 0.04263574308526425, "grad_norm": 0.17572353780269623, "learning_rate": 4.957371495580795e-06, "loss": 1.1132, "step": 5890 }, { "epoch": 0.042708129745850434, "grad_norm": 0.18828994035720825, "learning_rate": 4.957299108920209e-06, "loss": 1.1331, "step": 5900 }, { "epoch": 0.042780516406436625, "grad_norm": 0.20104007422924042, "learning_rate": 4.957226722259622e-06, "loss": 1.0998, "step": 5910 }, { "epoch": 0.04285290306702281, "grad_norm": 0.1787911206483841, "learning_rate": 4.957154335599037e-06, "loss": 1.105, "step": 5920 }, { "epoch": 0.042925289727609, "grad_norm": 0.1929769665002823, "learning_rate": 4.95708194893845e-06, "loss": 1.1265, "step": 5930 }, { "epoch": 0.04299767638819518, "grad_norm": 0.18230368196964264, "learning_rate": 4.957009562277864e-06, "loss": 1.1114, "step": 5940 }, { "epoch": 0.043070063048781373, "grad_norm": 0.18805713951587677, "learning_rate": 4.956937175617278e-06, "loss": 1.1119, "step": 5950 }, { "epoch": 0.04314244970936756, "grad_norm": 0.178778737783432, "learning_rate": 4.956864788956691e-06, "loss": 1.105, "step": 5960 }, { "epoch": 0.04321483636995375, "grad_norm": 0.19835583865642548, "learning_rate": 4.956792402296105e-06, "loss": 1.1146, "step": 5970 }, { "epoch": 0.04328722303053993, "grad_norm": 0.18930941820144653, "learning_rate": 4.956720015635519e-06, "loss": 1.1172, "step": 5980 }, { "epoch": 0.04335960969112612, "grad_norm": 0.18036046624183655, "learning_rate": 4.956647628974933e-06, "loss": 1.1233, "step": 5990 }, { "epoch": 0.043431996351712306, "grad_norm": 0.18780753016471863, "learning_rate": 4.9565752423143465e-06, "loss": 1.1101, "step": 6000 }, { "epoch": 0.043504383012298496, "grad_norm": 0.2190426141023636, "learning_rate": 4.95650285565376e-06, "loss": 1.1199, "step": 6010 }, { "epoch": 0.04357676967288468, "grad_norm": 0.20030994713306427, "learning_rate": 4.956430468993174e-06, "loss": 1.1194, "step": 6020 }, { "epoch": 0.04364915633347087, "grad_norm": 0.17827999591827393, "learning_rate": 4.956358082332588e-06, "loss": 1.1142, "step": 6030 }, { "epoch": 0.043721542994057054, "grad_norm": 0.22356738150119781, "learning_rate": 4.956285695672002e-06, "loss": 1.1218, "step": 6040 }, { "epoch": 0.043793929654643245, "grad_norm": 0.18917842209339142, "learning_rate": 4.9562133090114154e-06, "loss": 1.1133, "step": 6050 }, { "epoch": 0.04386631631522943, "grad_norm": 0.18079480528831482, "learning_rate": 4.956140922350829e-06, "loss": 1.1137, "step": 6060 }, { "epoch": 0.04393870297581562, "grad_norm": 0.18289583921432495, "learning_rate": 4.9560685356902435e-06, "loss": 1.1261, "step": 6070 }, { "epoch": 0.0440110896364018, "grad_norm": 0.2138090580701828, "learning_rate": 4.955996149029657e-06, "loss": 1.1005, "step": 6080 }, { "epoch": 0.04408347629698799, "grad_norm": 0.20163343846797943, "learning_rate": 4.955923762369071e-06, "loss": 1.1079, "step": 6090 }, { "epoch": 0.04415586295757418, "grad_norm": 0.20078091323375702, "learning_rate": 4.955851375708484e-06, "loss": 1.0869, "step": 6100 }, { "epoch": 0.04422824961816037, "grad_norm": 0.2066076546907425, "learning_rate": 4.955778989047899e-06, "loss": 1.1208, "step": 6110 }, { "epoch": 0.04430063627874655, "grad_norm": 0.18639633059501648, "learning_rate": 4.9557066023873124e-06, "loss": 1.1256, "step": 6120 }, { "epoch": 0.04437302293933274, "grad_norm": 0.1844189614057541, "learning_rate": 4.955634215726726e-06, "loss": 1.1258, "step": 6130 }, { "epoch": 0.044445409599918925, "grad_norm": 0.1896459013223648, "learning_rate": 4.95556182906614e-06, "loss": 1.1393, "step": 6140 }, { "epoch": 0.044517796260505116, "grad_norm": 0.17254671454429626, "learning_rate": 4.955489442405554e-06, "loss": 1.1274, "step": 6150 }, { "epoch": 0.0445901829210913, "grad_norm": 0.22296860814094543, "learning_rate": 4.955417055744968e-06, "loss": 1.125, "step": 6160 }, { "epoch": 0.04466256958167749, "grad_norm": 0.20404869318008423, "learning_rate": 4.955344669084381e-06, "loss": 1.1093, "step": 6170 }, { "epoch": 0.044734956242263674, "grad_norm": 0.18639911711215973, "learning_rate": 4.955272282423795e-06, "loss": 1.1051, "step": 6180 }, { "epoch": 0.044807342902849864, "grad_norm": 0.1889095902442932, "learning_rate": 4.9551998957632095e-06, "loss": 1.1199, "step": 6190 }, { "epoch": 0.04487972956343605, "grad_norm": 0.19607719779014587, "learning_rate": 4.955127509102623e-06, "loss": 1.1052, "step": 6200 }, { "epoch": 0.04495211622402224, "grad_norm": 0.18083210289478302, "learning_rate": 4.955055122442037e-06, "loss": 1.129, "step": 6210 }, { "epoch": 0.04502450288460842, "grad_norm": 0.2071446180343628, "learning_rate": 4.95498273578145e-06, "loss": 1.0987, "step": 6220 }, { "epoch": 0.04509688954519461, "grad_norm": 0.1783336102962494, "learning_rate": 4.954910349120865e-06, "loss": 1.1306, "step": 6230 }, { "epoch": 0.045169276205780796, "grad_norm": 0.18706344068050385, "learning_rate": 4.954837962460278e-06, "loss": 1.1217, "step": 6240 }, { "epoch": 0.04524166286636699, "grad_norm": 0.19343775510787964, "learning_rate": 4.954765575799692e-06, "loss": 1.1079, "step": 6250 }, { "epoch": 0.04531404952695317, "grad_norm": 0.18519844114780426, "learning_rate": 4.954693189139106e-06, "loss": 1.1148, "step": 6260 }, { "epoch": 0.04538643618753936, "grad_norm": 0.2034037858247757, "learning_rate": 4.95462080247852e-06, "loss": 1.1, "step": 6270 }, { "epoch": 0.045458822848125545, "grad_norm": 0.22080808877944946, "learning_rate": 4.954548415817934e-06, "loss": 1.1046, "step": 6280 }, { "epoch": 0.045531209508711736, "grad_norm": 0.19020305573940277, "learning_rate": 4.954476029157347e-06, "loss": 1.1003, "step": 6290 }, { "epoch": 0.04560359616929792, "grad_norm": 0.18018420040607452, "learning_rate": 4.954403642496761e-06, "loss": 1.1247, "step": 6300 }, { "epoch": 0.04567598282988411, "grad_norm": 0.18034905195236206, "learning_rate": 4.954331255836175e-06, "loss": 1.103, "step": 6310 }, { "epoch": 0.04574836949047029, "grad_norm": 0.18934369087219238, "learning_rate": 4.954258869175589e-06, "loss": 1.1229, "step": 6320 }, { "epoch": 0.045820756151056484, "grad_norm": 0.18742787837982178, "learning_rate": 4.954186482515003e-06, "loss": 1.1131, "step": 6330 }, { "epoch": 0.04589314281164267, "grad_norm": 0.1858285516500473, "learning_rate": 4.954114095854416e-06, "loss": 1.1242, "step": 6340 }, { "epoch": 0.04596552947222886, "grad_norm": 0.17056511342525482, "learning_rate": 4.954041709193831e-06, "loss": 1.0997, "step": 6350 }, { "epoch": 0.04603791613281504, "grad_norm": 0.2107161581516266, "learning_rate": 4.953969322533244e-06, "loss": 1.1075, "step": 6360 }, { "epoch": 0.04611030279340123, "grad_norm": 0.17084477841854095, "learning_rate": 4.953896935872658e-06, "loss": 1.1113, "step": 6370 }, { "epoch": 0.046182689453987416, "grad_norm": 0.18742406368255615, "learning_rate": 4.9538245492120715e-06, "loss": 1.115, "step": 6380 }, { "epoch": 0.04625507611457361, "grad_norm": 0.18658484518527985, "learning_rate": 4.953752162551486e-06, "loss": 1.112, "step": 6390 }, { "epoch": 0.04632746277515979, "grad_norm": 0.19460472464561462, "learning_rate": 4.9536797758909e-06, "loss": 1.1297, "step": 6400 }, { "epoch": 0.04639984943574598, "grad_norm": 0.1897081881761551, "learning_rate": 4.953607389230313e-06, "loss": 1.1045, "step": 6410 }, { "epoch": 0.046472236096332165, "grad_norm": 0.1974371373653412, "learning_rate": 4.953535002569727e-06, "loss": 1.106, "step": 6420 }, { "epoch": 0.046544622756918355, "grad_norm": 0.18914976716041565, "learning_rate": 4.953462615909141e-06, "loss": 1.1199, "step": 6430 }, { "epoch": 0.04661700941750454, "grad_norm": 0.18014445900917053, "learning_rate": 4.953390229248555e-06, "loss": 1.1011, "step": 6440 }, { "epoch": 0.04668939607809073, "grad_norm": 0.17030683159828186, "learning_rate": 4.9533178425879686e-06, "loss": 1.1098, "step": 6450 }, { "epoch": 0.04676178273867692, "grad_norm": 0.19075506925582886, "learning_rate": 4.953245455927382e-06, "loss": 1.1064, "step": 6460 }, { "epoch": 0.046834169399263104, "grad_norm": 0.19481076300144196, "learning_rate": 4.953173069266796e-06, "loss": 1.1031, "step": 6470 }, { "epoch": 0.046906556059849294, "grad_norm": 0.1836860328912735, "learning_rate": 4.953100682606209e-06, "loss": 1.1134, "step": 6480 }, { "epoch": 0.04697894272043548, "grad_norm": 0.18626393377780914, "learning_rate": 4.953028295945623e-06, "loss": 1.1165, "step": 6490 }, { "epoch": 0.04705132938102167, "grad_norm": 0.17867518961429596, "learning_rate": 4.9529559092850375e-06, "loss": 1.1102, "step": 6500 }, { "epoch": 0.04712371604160785, "grad_norm": 0.1940499097108841, "learning_rate": 4.952883522624451e-06, "loss": 1.1063, "step": 6510 }, { "epoch": 0.04719610270219404, "grad_norm": 0.17317171394824982, "learning_rate": 4.952811135963865e-06, "loss": 1.1254, "step": 6520 }, { "epoch": 0.047268489362780226, "grad_norm": 0.19576644897460938, "learning_rate": 4.952738749303278e-06, "loss": 1.1251, "step": 6530 }, { "epoch": 0.04734087602336642, "grad_norm": 0.17316751182079315, "learning_rate": 4.952666362642693e-06, "loss": 1.1097, "step": 6540 }, { "epoch": 0.0474132626839526, "grad_norm": 0.1811741143465042, "learning_rate": 4.952593975982106e-06, "loss": 1.122, "step": 6550 }, { "epoch": 0.04748564934453879, "grad_norm": 0.1905275136232376, "learning_rate": 4.95252158932152e-06, "loss": 1.1074, "step": 6560 }, { "epoch": 0.047558036005124975, "grad_norm": 0.19564856588840485, "learning_rate": 4.952449202660934e-06, "loss": 1.1139, "step": 6570 }, { "epoch": 0.047630422665711165, "grad_norm": 0.17164203524589539, "learning_rate": 4.952376816000348e-06, "loss": 1.1148, "step": 6580 }, { "epoch": 0.04770280932629735, "grad_norm": 0.19059689342975616, "learning_rate": 4.952304429339762e-06, "loss": 1.1193, "step": 6590 }, { "epoch": 0.04777519598688354, "grad_norm": 0.18997924029827118, "learning_rate": 4.952232042679175e-06, "loss": 1.1091, "step": 6600 }, { "epoch": 0.04784758264746972, "grad_norm": 0.2226713001728058, "learning_rate": 4.952159656018589e-06, "loss": 1.1131, "step": 6610 }, { "epoch": 0.047919969308055914, "grad_norm": 0.18311481177806854, "learning_rate": 4.9520872693580026e-06, "loss": 1.1152, "step": 6620 }, { "epoch": 0.0479923559686421, "grad_norm": 0.18285690248012543, "learning_rate": 4.952014882697417e-06, "loss": 1.1209, "step": 6630 }, { "epoch": 0.04806474262922829, "grad_norm": 0.18869620561599731, "learning_rate": 4.951942496036831e-06, "loss": 1.0976, "step": 6640 }, { "epoch": 0.04813712928981447, "grad_norm": 0.17298665642738342, "learning_rate": 4.951870109376244e-06, "loss": 1.1105, "step": 6650 }, { "epoch": 0.04820951595040066, "grad_norm": 0.1727328598499298, "learning_rate": 4.951797722715658e-06, "loss": 1.1042, "step": 6660 }, { "epoch": 0.048281902610986846, "grad_norm": 0.2000247985124588, "learning_rate": 4.951725336055072e-06, "loss": 1.1056, "step": 6670 }, { "epoch": 0.04835428927157304, "grad_norm": 0.18341891467571259, "learning_rate": 4.951652949394486e-06, "loss": 1.1081, "step": 6680 }, { "epoch": 0.04842667593215922, "grad_norm": 0.17036312818527222, "learning_rate": 4.9515805627339e-06, "loss": 1.1055, "step": 6690 }, { "epoch": 0.04849906259274541, "grad_norm": 0.1787406951189041, "learning_rate": 4.951508176073313e-06, "loss": 1.1104, "step": 6700 }, { "epoch": 0.048571449253331594, "grad_norm": 0.21522028744220734, "learning_rate": 4.951435789412728e-06, "loss": 1.1306, "step": 6710 }, { "epoch": 0.048643835913917785, "grad_norm": 0.18588437139987946, "learning_rate": 4.951363402752141e-06, "loss": 1.1115, "step": 6720 }, { "epoch": 0.04871622257450397, "grad_norm": 0.18174554407596588, "learning_rate": 4.951291016091555e-06, "loss": 1.1072, "step": 6730 }, { "epoch": 0.04878860923509016, "grad_norm": 0.20772916078567505, "learning_rate": 4.9512186294309685e-06, "loss": 1.1141, "step": 6740 }, { "epoch": 0.04886099589567634, "grad_norm": 0.1825239658355713, "learning_rate": 4.951146242770383e-06, "loss": 1.0956, "step": 6750 }, { "epoch": 0.048933382556262534, "grad_norm": 0.1815953105688095, "learning_rate": 4.951073856109797e-06, "loss": 1.1061, "step": 6760 }, { "epoch": 0.04900576921684872, "grad_norm": 0.20214617252349854, "learning_rate": 4.95100146944921e-06, "loss": 1.1076, "step": 6770 }, { "epoch": 0.04907815587743491, "grad_norm": 0.18477530777454376, "learning_rate": 4.950929082788624e-06, "loss": 1.1158, "step": 6780 }, { "epoch": 0.04915054253802109, "grad_norm": 0.18946325778961182, "learning_rate": 4.950856696128038e-06, "loss": 1.0897, "step": 6790 }, { "epoch": 0.04922292919860728, "grad_norm": 0.18577060103416443, "learning_rate": 4.950784309467452e-06, "loss": 1.1187, "step": 6800 }, { "epoch": 0.049295315859193466, "grad_norm": 0.21096892654895782, "learning_rate": 4.9507119228068655e-06, "loss": 1.117, "step": 6810 }, { "epoch": 0.049367702519779656, "grad_norm": 0.18540360033512115, "learning_rate": 4.950639536146279e-06, "loss": 1.1034, "step": 6820 }, { "epoch": 0.04944008918036584, "grad_norm": 0.18726834654808044, "learning_rate": 4.950567149485694e-06, "loss": 1.1083, "step": 6830 }, { "epoch": 0.04951247584095203, "grad_norm": 0.1806015968322754, "learning_rate": 4.950494762825107e-06, "loss": 1.1017, "step": 6840 }, { "epoch": 0.049584862501538214, "grad_norm": 0.19054830074310303, "learning_rate": 4.950422376164521e-06, "loss": 1.1093, "step": 6850 }, { "epoch": 0.049657249162124405, "grad_norm": 0.18223927915096283, "learning_rate": 4.9503499895039344e-06, "loss": 1.1138, "step": 6860 }, { "epoch": 0.04972963582271059, "grad_norm": 0.18125773966312408, "learning_rate": 4.950277602843349e-06, "loss": 1.0931, "step": 6870 }, { "epoch": 0.04980202248329678, "grad_norm": 0.18000420928001404, "learning_rate": 4.9502052161827625e-06, "loss": 1.1019, "step": 6880 }, { "epoch": 0.04987440914388296, "grad_norm": 0.21712656319141388, "learning_rate": 4.950132829522176e-06, "loss": 1.1233, "step": 6890 }, { "epoch": 0.04994679580446915, "grad_norm": 0.20299050211906433, "learning_rate": 4.95006044286159e-06, "loss": 1.1055, "step": 6900 }, { "epoch": 0.05001918246505534, "grad_norm": 0.1789911538362503, "learning_rate": 4.949988056201004e-06, "loss": 1.0976, "step": 6910 }, { "epoch": 0.05009156912564153, "grad_norm": 0.18480534851551056, "learning_rate": 4.949915669540418e-06, "loss": 1.1133, "step": 6920 }, { "epoch": 0.05016395578622771, "grad_norm": 0.17870941758155823, "learning_rate": 4.9498432828798315e-06, "loss": 1.1187, "step": 6930 }, { "epoch": 0.0502363424468139, "grad_norm": 0.18759605288505554, "learning_rate": 4.949770896219245e-06, "loss": 1.0889, "step": 6940 }, { "epoch": 0.050308729107400085, "grad_norm": 0.19115033745765686, "learning_rate": 4.9496985095586595e-06, "loss": 1.1194, "step": 6950 }, { "epoch": 0.050381115767986276, "grad_norm": 0.19017775356769562, "learning_rate": 4.949626122898073e-06, "loss": 1.1183, "step": 6960 }, { "epoch": 0.05045350242857246, "grad_norm": 0.16906103491783142, "learning_rate": 4.949553736237487e-06, "loss": 1.1165, "step": 6970 }, { "epoch": 0.05052588908915865, "grad_norm": 0.18214966356754303, "learning_rate": 4.9494813495769e-06, "loss": 1.0915, "step": 6980 }, { "epoch": 0.050598275749744834, "grad_norm": 0.19243939220905304, "learning_rate": 4.949408962916315e-06, "loss": 1.1123, "step": 6990 }, { "epoch": 0.050670662410331024, "grad_norm": 0.1790931522846222, "learning_rate": 4.9493365762557285e-06, "loss": 1.1139, "step": 7000 }, { "epoch": 0.05074304907091721, "grad_norm": 0.1852511316537857, "learning_rate": 4.949264189595141e-06, "loss": 1.1077, "step": 7010 }, { "epoch": 0.0508154357315034, "grad_norm": 0.24233588576316833, "learning_rate": 4.949191802934556e-06, "loss": 1.1023, "step": 7020 }, { "epoch": 0.05088782239208959, "grad_norm": 0.22311051189899445, "learning_rate": 4.949119416273969e-06, "loss": 1.0985, "step": 7030 }, { "epoch": 0.05096020905267577, "grad_norm": 0.21093599498271942, "learning_rate": 4.949047029613383e-06, "loss": 1.0985, "step": 7040 }, { "epoch": 0.05103259571326196, "grad_norm": 0.19992297887802124, "learning_rate": 4.9489746429527965e-06, "loss": 1.1149, "step": 7050 }, { "epoch": 0.05110498237384815, "grad_norm": 0.23999755084514618, "learning_rate": 4.948902256292211e-06, "loss": 1.1069, "step": 7060 }, { "epoch": 0.05117736903443434, "grad_norm": 0.1987914890050888, "learning_rate": 4.948829869631625e-06, "loss": 1.1077, "step": 7070 }, { "epoch": 0.05124975569502052, "grad_norm": 0.2037319839000702, "learning_rate": 4.948757482971038e-06, "loss": 1.107, "step": 7080 }, { "epoch": 0.05132214235560671, "grad_norm": 0.1777563840150833, "learning_rate": 4.948685096310452e-06, "loss": 1.1182, "step": 7090 }, { "epoch": 0.051394529016192896, "grad_norm": 0.18838225305080414, "learning_rate": 4.948612709649866e-06, "loss": 1.1226, "step": 7100 }, { "epoch": 0.051466915676779086, "grad_norm": 0.19094857573509216, "learning_rate": 4.94854032298928e-06, "loss": 1.0974, "step": 7110 }, { "epoch": 0.05153930233736527, "grad_norm": 0.22589007019996643, "learning_rate": 4.9484679363286935e-06, "loss": 1.1015, "step": 7120 }, { "epoch": 0.05161168899795146, "grad_norm": 0.1975346952676773, "learning_rate": 4.948395549668107e-06, "loss": 1.1143, "step": 7130 }, { "epoch": 0.051684075658537644, "grad_norm": 0.17119309306144714, "learning_rate": 4.948323163007522e-06, "loss": 1.1089, "step": 7140 }, { "epoch": 0.051756462319123835, "grad_norm": 0.1853228211402893, "learning_rate": 4.948250776346935e-06, "loss": 1.1202, "step": 7150 }, { "epoch": 0.05182884897971002, "grad_norm": 0.21450157463550568, "learning_rate": 4.948178389686349e-06, "loss": 1.1093, "step": 7160 }, { "epoch": 0.05190123564029621, "grad_norm": 0.1728169471025467, "learning_rate": 4.9481060030257625e-06, "loss": 1.0914, "step": 7170 }, { "epoch": 0.05197362230088239, "grad_norm": 0.21489717066287994, "learning_rate": 4.948033616365177e-06, "loss": 1.0987, "step": 7180 }, { "epoch": 0.05204600896146858, "grad_norm": 0.19044430553913116, "learning_rate": 4.9479612297045906e-06, "loss": 1.1089, "step": 7190 }, { "epoch": 0.05211839562205477, "grad_norm": 0.18932093679904938, "learning_rate": 4.947888843044004e-06, "loss": 1.1093, "step": 7200 }, { "epoch": 0.05219078228264096, "grad_norm": 0.18193960189819336, "learning_rate": 4.947816456383418e-06, "loss": 1.1116, "step": 7210 }, { "epoch": 0.05226316894322714, "grad_norm": 0.18055914342403412, "learning_rate": 4.947744069722832e-06, "loss": 1.1052, "step": 7220 }, { "epoch": 0.05233555560381333, "grad_norm": 0.18837417662143707, "learning_rate": 4.947671683062246e-06, "loss": 1.0917, "step": 7230 }, { "epoch": 0.052407942264399515, "grad_norm": 0.18194933235645294, "learning_rate": 4.9475992964016595e-06, "loss": 1.0992, "step": 7240 }, { "epoch": 0.052480328924985706, "grad_norm": 0.2614051103591919, "learning_rate": 4.947526909741073e-06, "loss": 1.0997, "step": 7250 }, { "epoch": 0.05255271558557189, "grad_norm": 0.1798228770494461, "learning_rate": 4.947454523080487e-06, "loss": 1.1035, "step": 7260 }, { "epoch": 0.05262510224615808, "grad_norm": 0.1857793927192688, "learning_rate": 4.947382136419901e-06, "loss": 1.099, "step": 7270 }, { "epoch": 0.052697488906744264, "grad_norm": 0.1768251359462738, "learning_rate": 4.947309749759315e-06, "loss": 1.0987, "step": 7280 }, { "epoch": 0.052769875567330454, "grad_norm": 0.18754634261131287, "learning_rate": 4.947237363098728e-06, "loss": 1.098, "step": 7290 }, { "epoch": 0.05284226222791664, "grad_norm": 0.18987387418746948, "learning_rate": 4.947164976438142e-06, "loss": 1.1097, "step": 7300 }, { "epoch": 0.05291464888850283, "grad_norm": 0.17656712234020233, "learning_rate": 4.9470925897775565e-06, "loss": 1.1031, "step": 7310 }, { "epoch": 0.05298703554908901, "grad_norm": 0.20419563353061676, "learning_rate": 4.94702020311697e-06, "loss": 1.1133, "step": 7320 }, { "epoch": 0.0530594222096752, "grad_norm": 0.19128115475177765, "learning_rate": 4.946947816456384e-06, "loss": 1.1022, "step": 7330 }, { "epoch": 0.053131808870261386, "grad_norm": 0.17749454081058502, "learning_rate": 4.946875429795797e-06, "loss": 1.1134, "step": 7340 }, { "epoch": 0.05320419553084758, "grad_norm": 0.18553034961223602, "learning_rate": 4.946803043135212e-06, "loss": 1.1059, "step": 7350 }, { "epoch": 0.05327658219143376, "grad_norm": 0.2216414511203766, "learning_rate": 4.946730656474625e-06, "loss": 1.1099, "step": 7360 }, { "epoch": 0.05334896885201995, "grad_norm": 0.1996411681175232, "learning_rate": 4.946658269814039e-06, "loss": 1.0844, "step": 7370 }, { "epoch": 0.053421355512606135, "grad_norm": 0.17310328781604767, "learning_rate": 4.946585883153453e-06, "loss": 1.0895, "step": 7380 }, { "epoch": 0.053493742173192325, "grad_norm": 0.18325687944889069, "learning_rate": 4.946513496492867e-06, "loss": 1.1017, "step": 7390 }, { "epoch": 0.05356612883377851, "grad_norm": 0.1925450563430786, "learning_rate": 4.946441109832281e-06, "loss": 1.1152, "step": 7400 }, { "epoch": 0.0536385154943647, "grad_norm": 0.20546835660934448, "learning_rate": 4.946368723171694e-06, "loss": 1.0937, "step": 7410 }, { "epoch": 0.05371090215495088, "grad_norm": 0.19936460256576538, "learning_rate": 4.946296336511108e-06, "loss": 1.0876, "step": 7420 }, { "epoch": 0.053783288815537074, "grad_norm": 0.18377749621868134, "learning_rate": 4.9462239498505224e-06, "loss": 1.1009, "step": 7430 }, { "epoch": 0.05385567547612326, "grad_norm": 0.18654756247997284, "learning_rate": 4.946151563189936e-06, "loss": 1.1017, "step": 7440 }, { "epoch": 0.05392806213670945, "grad_norm": 0.17584462463855743, "learning_rate": 4.94607917652935e-06, "loss": 1.1111, "step": 7450 }, { "epoch": 0.05400044879729563, "grad_norm": 0.1823701709508896, "learning_rate": 4.946006789868763e-06, "loss": 1.0983, "step": 7460 }, { "epoch": 0.05407283545788182, "grad_norm": 0.17409732937812805, "learning_rate": 4.945934403208178e-06, "loss": 1.1101, "step": 7470 }, { "epoch": 0.054145222118468006, "grad_norm": 0.19750286638736725, "learning_rate": 4.945862016547591e-06, "loss": 1.1043, "step": 7480 }, { "epoch": 0.0542176087790542, "grad_norm": 0.18154850602149963, "learning_rate": 4.945789629887005e-06, "loss": 1.1078, "step": 7490 }, { "epoch": 0.05428999543964038, "grad_norm": 0.17519652843475342, "learning_rate": 4.945717243226419e-06, "loss": 1.0995, "step": 7500 }, { "epoch": 0.05436238210022657, "grad_norm": 0.18698933720588684, "learning_rate": 4.945644856565833e-06, "loss": 1.0991, "step": 7510 }, { "epoch": 0.054434768760812755, "grad_norm": 0.1779969483613968, "learning_rate": 4.945572469905247e-06, "loss": 1.1029, "step": 7520 }, { "epoch": 0.054507155421398945, "grad_norm": 0.1794944554567337, "learning_rate": 4.94550008324466e-06, "loss": 1.0927, "step": 7530 }, { "epoch": 0.05457954208198513, "grad_norm": 0.1742369830608368, "learning_rate": 4.945427696584074e-06, "loss": 1.1085, "step": 7540 }, { "epoch": 0.05465192874257132, "grad_norm": 0.17996317148208618, "learning_rate": 4.9453553099234875e-06, "loss": 1.1053, "step": 7550 }, { "epoch": 0.0547243154031575, "grad_norm": 0.18489129841327667, "learning_rate": 4.945282923262901e-06, "loss": 1.0965, "step": 7560 }, { "epoch": 0.054796702063743694, "grad_norm": 0.2375306636095047, "learning_rate": 4.945210536602315e-06, "loss": 1.1016, "step": 7570 }, { "epoch": 0.05486908872432988, "grad_norm": 0.18841832876205444, "learning_rate": 4.945138149941729e-06, "loss": 1.1089, "step": 7580 }, { "epoch": 0.05494147538491607, "grad_norm": 0.17497693002223969, "learning_rate": 4.945065763281143e-06, "loss": 1.0913, "step": 7590 }, { "epoch": 0.05501386204550225, "grad_norm": 0.20578007400035858, "learning_rate": 4.9449933766205564e-06, "loss": 1.0975, "step": 7600 }, { "epoch": 0.05508624870608844, "grad_norm": 0.1815917193889618, "learning_rate": 4.94492098995997e-06, "loss": 1.0931, "step": 7610 }, { "epoch": 0.05515863536667463, "grad_norm": 0.18187057971954346, "learning_rate": 4.9448486032993845e-06, "loss": 1.0993, "step": 7620 }, { "epoch": 0.055231022027260816, "grad_norm": 0.1740749329328537, "learning_rate": 4.944776216638798e-06, "loss": 1.0941, "step": 7630 }, { "epoch": 0.05530340868784701, "grad_norm": 0.17215760052204132, "learning_rate": 4.944703829978212e-06, "loss": 1.1036, "step": 7640 }, { "epoch": 0.05537579534843319, "grad_norm": 0.21924471855163574, "learning_rate": 4.944631443317625e-06, "loss": 1.0897, "step": 7650 }, { "epoch": 0.05544818200901938, "grad_norm": 0.1766793131828308, "learning_rate": 4.94455905665704e-06, "loss": 1.0977, "step": 7660 }, { "epoch": 0.055520568669605565, "grad_norm": 0.20563791692256927, "learning_rate": 4.9444866699964535e-06, "loss": 1.0981, "step": 7670 }, { "epoch": 0.055592955330191755, "grad_norm": 0.17593294382095337, "learning_rate": 4.944414283335867e-06, "loss": 1.0932, "step": 7680 }, { "epoch": 0.05566534199077794, "grad_norm": 0.18366055190563202, "learning_rate": 4.944341896675281e-06, "loss": 1.1067, "step": 7690 }, { "epoch": 0.05573772865136413, "grad_norm": 0.19099287688732147, "learning_rate": 4.944269510014695e-06, "loss": 1.0877, "step": 7700 }, { "epoch": 0.05581011531195031, "grad_norm": 0.17935815453529358, "learning_rate": 4.944197123354109e-06, "loss": 1.0943, "step": 7710 }, { "epoch": 0.055882501972536504, "grad_norm": 0.18561848998069763, "learning_rate": 4.944124736693522e-06, "loss": 1.1099, "step": 7720 }, { "epoch": 0.05595488863312269, "grad_norm": 0.18966755270957947, "learning_rate": 4.944052350032936e-06, "loss": 1.0981, "step": 7730 }, { "epoch": 0.05602727529370888, "grad_norm": 0.18499694764614105, "learning_rate": 4.9439799633723505e-06, "loss": 1.103, "step": 7740 }, { "epoch": 0.05609966195429506, "grad_norm": 0.17918939888477325, "learning_rate": 4.943907576711764e-06, "loss": 1.1007, "step": 7750 }, { "epoch": 0.05617204861488125, "grad_norm": 0.19508133828639984, "learning_rate": 4.943835190051178e-06, "loss": 1.1104, "step": 7760 }, { "epoch": 0.056244435275467436, "grad_norm": 0.19462929666042328, "learning_rate": 4.943762803390591e-06, "loss": 1.1089, "step": 7770 }, { "epoch": 0.056316821936053627, "grad_norm": 0.3181241750717163, "learning_rate": 4.943690416730006e-06, "loss": 1.1036, "step": 7780 }, { "epoch": 0.05638920859663981, "grad_norm": 0.175667867064476, "learning_rate": 4.943618030069419e-06, "loss": 1.1137, "step": 7790 }, { "epoch": 0.056461595257226, "grad_norm": 0.17442381381988525, "learning_rate": 4.943545643408833e-06, "loss": 1.0801, "step": 7800 }, { "epoch": 0.056533981917812184, "grad_norm": 0.17181989550590515, "learning_rate": 4.943473256748247e-06, "loss": 1.1102, "step": 7810 }, { "epoch": 0.056606368578398375, "grad_norm": 0.19522684812545776, "learning_rate": 4.943400870087661e-06, "loss": 1.103, "step": 7820 }, { "epoch": 0.05667875523898456, "grad_norm": 0.3031037747859955, "learning_rate": 4.943328483427075e-06, "loss": 1.1028, "step": 7830 }, { "epoch": 0.05675114189957075, "grad_norm": 0.21572640538215637, "learning_rate": 4.943256096766488e-06, "loss": 1.0969, "step": 7840 }, { "epoch": 0.05682352856015693, "grad_norm": 0.1849983185529709, "learning_rate": 4.943183710105902e-06, "loss": 1.0988, "step": 7850 }, { "epoch": 0.05689591522074312, "grad_norm": 0.17931009829044342, "learning_rate": 4.9431113234453155e-06, "loss": 1.0871, "step": 7860 }, { "epoch": 0.05696830188132931, "grad_norm": 0.19952841103076935, "learning_rate": 4.94303893678473e-06, "loss": 1.1064, "step": 7870 }, { "epoch": 0.0570406885419155, "grad_norm": 0.19907739758491516, "learning_rate": 4.942966550124144e-06, "loss": 1.0837, "step": 7880 }, { "epoch": 0.05711307520250168, "grad_norm": 0.1972847580909729, "learning_rate": 4.942894163463557e-06, "loss": 1.0981, "step": 7890 }, { "epoch": 0.05718546186308787, "grad_norm": 0.17437316477298737, "learning_rate": 4.942821776802971e-06, "loss": 1.0794, "step": 7900 }, { "epoch": 0.057257848523674056, "grad_norm": 0.24157637357711792, "learning_rate": 4.942749390142385e-06, "loss": 1.0933, "step": 7910 }, { "epoch": 0.057330235184260246, "grad_norm": 0.17602761089801788, "learning_rate": 4.942677003481799e-06, "loss": 1.1007, "step": 7920 }, { "epoch": 0.05740262184484643, "grad_norm": 0.17508484423160553, "learning_rate": 4.9426046168212126e-06, "loss": 1.1054, "step": 7930 }, { "epoch": 0.05747500850543262, "grad_norm": 0.18290212750434875, "learning_rate": 4.942532230160626e-06, "loss": 1.0949, "step": 7940 }, { "epoch": 0.057547395166018804, "grad_norm": 0.17786473035812378, "learning_rate": 4.942459843500041e-06, "loss": 1.1041, "step": 7950 }, { "epoch": 0.057619781826604995, "grad_norm": 0.19254614412784576, "learning_rate": 4.942387456839454e-06, "loss": 1.0975, "step": 7960 }, { "epoch": 0.05769216848719118, "grad_norm": 0.16799108684062958, "learning_rate": 4.942315070178868e-06, "loss": 1.0947, "step": 7970 }, { "epoch": 0.05776455514777737, "grad_norm": 0.21218262612819672, "learning_rate": 4.9422426835182815e-06, "loss": 1.0826, "step": 7980 }, { "epoch": 0.05783694180836355, "grad_norm": 0.1867387741804123, "learning_rate": 4.942170296857696e-06, "loss": 1.0918, "step": 7990 }, { "epoch": 0.05790932846894974, "grad_norm": 0.19001053273677826, "learning_rate": 4.9420979101971096e-06, "loss": 1.113, "step": 8000 }, { "epoch": 0.05798171512953593, "grad_norm": 0.23286646604537964, "learning_rate": 4.942025523536523e-06, "loss": 1.0957, "step": 8010 }, { "epoch": 0.05805410179012212, "grad_norm": 0.184943288564682, "learning_rate": 4.941953136875937e-06, "loss": 1.1023, "step": 8020 }, { "epoch": 0.0581264884507083, "grad_norm": 0.20318259298801422, "learning_rate": 4.941880750215351e-06, "loss": 1.1059, "step": 8030 }, { "epoch": 0.05819887511129449, "grad_norm": 0.17356903851032257, "learning_rate": 4.941808363554765e-06, "loss": 1.0779, "step": 8040 }, { "epoch": 0.058271261771880675, "grad_norm": 0.19621440768241882, "learning_rate": 4.9417359768941785e-06, "loss": 1.0961, "step": 8050 }, { "epoch": 0.058343648432466866, "grad_norm": 0.18054349720478058, "learning_rate": 4.941663590233592e-06, "loss": 1.0977, "step": 8060 }, { "epoch": 0.05841603509305305, "grad_norm": 0.18659920990467072, "learning_rate": 4.941591203573006e-06, "loss": 1.0989, "step": 8070 }, { "epoch": 0.05848842175363924, "grad_norm": 0.1870632916688919, "learning_rate": 4.941518816912419e-06, "loss": 1.1024, "step": 8080 }, { "epoch": 0.058560808414225424, "grad_norm": 0.18549518287181854, "learning_rate": 4.941446430251833e-06, "loss": 1.0826, "step": 8090 }, { "epoch": 0.058633195074811614, "grad_norm": 0.1786888837814331, "learning_rate": 4.941374043591247e-06, "loss": 1.0928, "step": 8100 }, { "epoch": 0.0587055817353978, "grad_norm": 0.1912764608860016, "learning_rate": 4.941301656930661e-06, "loss": 1.0815, "step": 8110 }, { "epoch": 0.05877796839598399, "grad_norm": 0.17277614772319794, "learning_rate": 4.941229270270075e-06, "loss": 1.0938, "step": 8120 }, { "epoch": 0.05885035505657017, "grad_norm": 0.18441365659236908, "learning_rate": 4.941156883609488e-06, "loss": 1.1084, "step": 8130 }, { "epoch": 0.05892274171715636, "grad_norm": 0.24493847787380219, "learning_rate": 4.941084496948903e-06, "loss": 1.0924, "step": 8140 }, { "epoch": 0.058995128377742546, "grad_norm": 0.1992846131324768, "learning_rate": 4.941012110288316e-06, "loss": 1.0969, "step": 8150 }, { "epoch": 0.05906751503832874, "grad_norm": 0.20065009593963623, "learning_rate": 4.94093972362773e-06, "loss": 1.1019, "step": 8160 }, { "epoch": 0.05913990169891492, "grad_norm": 0.17659001052379608, "learning_rate": 4.940867336967144e-06, "loss": 1.0926, "step": 8170 }, { "epoch": 0.05921228835950111, "grad_norm": 0.19576965272426605, "learning_rate": 4.940794950306558e-06, "loss": 1.1049, "step": 8180 }, { "epoch": 0.059284675020087295, "grad_norm": 0.20090840756893158, "learning_rate": 4.940722563645972e-06, "loss": 1.1027, "step": 8190 }, { "epoch": 0.059357061680673485, "grad_norm": 0.16997766494750977, "learning_rate": 4.940650176985385e-06, "loss": 1.0915, "step": 8200 }, { "epoch": 0.059429448341259676, "grad_norm": 0.20561368763446808, "learning_rate": 4.940577790324799e-06, "loss": 1.0983, "step": 8210 }, { "epoch": 0.05950183500184586, "grad_norm": 0.20617088675498962, "learning_rate": 4.940505403664213e-06, "loss": 1.0889, "step": 8220 }, { "epoch": 0.05957422166243205, "grad_norm": 0.1871093362569809, "learning_rate": 4.940433017003627e-06, "loss": 1.0964, "step": 8230 }, { "epoch": 0.059646608323018234, "grad_norm": 0.1626369059085846, "learning_rate": 4.940360630343041e-06, "loss": 1.0938, "step": 8240 }, { "epoch": 0.059718994983604425, "grad_norm": 0.18901404738426208, "learning_rate": 4.940288243682454e-06, "loss": 1.0823, "step": 8250 }, { "epoch": 0.05979138164419061, "grad_norm": 0.18634545803070068, "learning_rate": 4.940215857021869e-06, "loss": 1.0863, "step": 8260 }, { "epoch": 0.0598637683047768, "grad_norm": 0.1944178342819214, "learning_rate": 4.940143470361282e-06, "loss": 1.0806, "step": 8270 }, { "epoch": 0.05993615496536298, "grad_norm": 0.1961442083120346, "learning_rate": 4.940071083700696e-06, "loss": 1.0876, "step": 8280 }, { "epoch": 0.06000854162594917, "grad_norm": 0.17273077368736267, "learning_rate": 4.9399986970401095e-06, "loss": 1.091, "step": 8290 }, { "epoch": 0.06008092828653536, "grad_norm": 0.24428923428058624, "learning_rate": 4.939926310379524e-06, "loss": 1.0987, "step": 8300 }, { "epoch": 0.06015331494712155, "grad_norm": 0.18236348032951355, "learning_rate": 4.939853923718938e-06, "loss": 1.0951, "step": 8310 }, { "epoch": 0.06022570160770773, "grad_norm": 0.18725799024105072, "learning_rate": 4.939781537058351e-06, "loss": 1.0989, "step": 8320 }, { "epoch": 0.06029808826829392, "grad_norm": 0.21253715455532074, "learning_rate": 4.939709150397765e-06, "loss": 1.1017, "step": 8330 }, { "epoch": 0.060370474928880105, "grad_norm": 0.17409272491931915, "learning_rate": 4.939636763737179e-06, "loss": 1.0809, "step": 8340 }, { "epoch": 0.060442861589466296, "grad_norm": 0.21270409226417542, "learning_rate": 4.939564377076593e-06, "loss": 1.084, "step": 8350 }, { "epoch": 0.06051524825005248, "grad_norm": 0.17901867628097534, "learning_rate": 4.9394919904160065e-06, "loss": 1.0841, "step": 8360 }, { "epoch": 0.06058763491063867, "grad_norm": 0.18769490718841553, "learning_rate": 4.93941960375542e-06, "loss": 1.0789, "step": 8370 }, { "epoch": 0.060660021571224854, "grad_norm": 0.18529854714870453, "learning_rate": 4.939347217094835e-06, "loss": 1.0867, "step": 8380 }, { "epoch": 0.060732408231811044, "grad_norm": 0.18796664476394653, "learning_rate": 4.939274830434248e-06, "loss": 1.0871, "step": 8390 }, { "epoch": 0.06080479489239723, "grad_norm": 0.1806957870721817, "learning_rate": 4.939202443773662e-06, "loss": 1.1102, "step": 8400 }, { "epoch": 0.06087718155298342, "grad_norm": 0.19365540146827698, "learning_rate": 4.9391300571130754e-06, "loss": 1.0842, "step": 8410 }, { "epoch": 0.0609495682135696, "grad_norm": 0.18744197487831116, "learning_rate": 4.93905767045249e-06, "loss": 1.1065, "step": 8420 }, { "epoch": 0.06102195487415579, "grad_norm": 0.1846928596496582, "learning_rate": 4.9389852837919035e-06, "loss": 1.0979, "step": 8430 }, { "epoch": 0.061094341534741976, "grad_norm": 0.17853565514087677, "learning_rate": 4.938912897131317e-06, "loss": 1.0965, "step": 8440 }, { "epoch": 0.06116672819532817, "grad_norm": 0.18652281165122986, "learning_rate": 4.938840510470731e-06, "loss": 1.0835, "step": 8450 }, { "epoch": 0.06123911485591435, "grad_norm": 0.2018202245235443, "learning_rate": 4.938768123810145e-06, "loss": 1.0853, "step": 8460 }, { "epoch": 0.06131150151650054, "grad_norm": 0.17888695001602173, "learning_rate": 4.938695737149559e-06, "loss": 1.0994, "step": 8470 }, { "epoch": 0.061383888177086725, "grad_norm": 0.18451561033725739, "learning_rate": 4.9386233504889725e-06, "loss": 1.0874, "step": 8480 }, { "epoch": 0.061456274837672915, "grad_norm": 0.18623459339141846, "learning_rate": 4.938550963828386e-06, "loss": 1.108, "step": 8490 }, { "epoch": 0.0615286614982591, "grad_norm": 0.1710231751203537, "learning_rate": 4.9384785771678e-06, "loss": 1.0908, "step": 8500 }, { "epoch": 0.06160104815884529, "grad_norm": 0.2325315922498703, "learning_rate": 4.938406190507214e-06, "loss": 1.0932, "step": 8510 }, { "epoch": 0.06167343481943147, "grad_norm": 0.17885486781597137, "learning_rate": 4.938333803846628e-06, "loss": 1.088, "step": 8520 }, { "epoch": 0.061745821480017664, "grad_norm": 0.18864506483078003, "learning_rate": 4.938261417186041e-06, "loss": 1.0981, "step": 8530 }, { "epoch": 0.06181820814060385, "grad_norm": 0.18470600247383118, "learning_rate": 4.938189030525455e-06, "loss": 1.0974, "step": 8540 }, { "epoch": 0.06189059480119004, "grad_norm": 0.18703562021255493, "learning_rate": 4.9381166438648695e-06, "loss": 1.0987, "step": 8550 }, { "epoch": 0.06196298146177622, "grad_norm": 0.18582236766815186, "learning_rate": 4.938044257204283e-06, "loss": 1.0925, "step": 8560 }, { "epoch": 0.06203536812236241, "grad_norm": 0.2140107899904251, "learning_rate": 4.937971870543697e-06, "loss": 1.0907, "step": 8570 }, { "epoch": 0.062107754782948596, "grad_norm": 0.19368021190166473, "learning_rate": 4.93789948388311e-06, "loss": 1.0877, "step": 8580 }, { "epoch": 0.06218014144353479, "grad_norm": 0.17043587565422058, "learning_rate": 4.937827097222525e-06, "loss": 1.0851, "step": 8590 }, { "epoch": 0.06225252810412097, "grad_norm": 0.18179139494895935, "learning_rate": 4.9377547105619375e-06, "loss": 1.095, "step": 8600 }, { "epoch": 0.06232491476470716, "grad_norm": 0.19978579878807068, "learning_rate": 4.937682323901352e-06, "loss": 1.0989, "step": 8610 }, { "epoch": 0.062397301425293344, "grad_norm": 0.1800697147846222, "learning_rate": 4.937609937240766e-06, "loss": 1.1074, "step": 8620 }, { "epoch": 0.062469688085879535, "grad_norm": 0.176766499876976, "learning_rate": 4.937537550580179e-06, "loss": 1.0897, "step": 8630 }, { "epoch": 0.06254207474646573, "grad_norm": 0.19286136329174042, "learning_rate": 4.937465163919593e-06, "loss": 1.1041, "step": 8640 }, { "epoch": 0.0626144614070519, "grad_norm": 0.2166859209537506, "learning_rate": 4.937392777259007e-06, "loss": 1.0827, "step": 8650 }, { "epoch": 0.06268684806763809, "grad_norm": 0.17568807303905487, "learning_rate": 4.937320390598421e-06, "loss": 1.1016, "step": 8660 }, { "epoch": 0.06275923472822428, "grad_norm": 0.19446556270122528, "learning_rate": 4.9372480039378346e-06, "loss": 1.0951, "step": 8670 }, { "epoch": 0.06283162138881047, "grad_norm": 0.1830175518989563, "learning_rate": 4.937175617277248e-06, "loss": 1.0891, "step": 8680 }, { "epoch": 0.06290400804939665, "grad_norm": 0.170004740357399, "learning_rate": 4.937103230616662e-06, "loss": 1.0872, "step": 8690 }, { "epoch": 0.06297639470998284, "grad_norm": 0.1958620399236679, "learning_rate": 4.937030843956076e-06, "loss": 1.0987, "step": 8700 }, { "epoch": 0.06304878137056903, "grad_norm": 0.19869394600391388, "learning_rate": 4.93695845729549e-06, "loss": 1.0727, "step": 8710 }, { "epoch": 0.06312116803115522, "grad_norm": 0.17475168406963348, "learning_rate": 4.9368860706349035e-06, "loss": 1.0991, "step": 8720 }, { "epoch": 0.0631935546917414, "grad_norm": 0.19080884754657745, "learning_rate": 4.936813683974317e-06, "loss": 1.0988, "step": 8730 }, { "epoch": 0.06326594135232759, "grad_norm": 0.17002183198928833, "learning_rate": 4.9367412973137316e-06, "loss": 1.0859, "step": 8740 }, { "epoch": 0.06333832801291378, "grad_norm": 0.19736173748970032, "learning_rate": 4.936668910653145e-06, "loss": 1.0757, "step": 8750 }, { "epoch": 0.06341071467349997, "grad_norm": 0.20306764543056488, "learning_rate": 4.936596523992559e-06, "loss": 1.091, "step": 8760 }, { "epoch": 0.06348310133408615, "grad_norm": 0.16739881038665771, "learning_rate": 4.936524137331972e-06, "loss": 1.0796, "step": 8770 }, { "epoch": 0.06355548799467234, "grad_norm": 0.18593810498714447, "learning_rate": 4.936451750671387e-06, "loss": 1.0916, "step": 8780 }, { "epoch": 0.06362787465525853, "grad_norm": 0.17287929356098175, "learning_rate": 4.9363793640108005e-06, "loss": 1.0747, "step": 8790 }, { "epoch": 0.06370026131584472, "grad_norm": 0.19802750647068024, "learning_rate": 4.936306977350214e-06, "loss": 1.1005, "step": 8800 }, { "epoch": 0.06377264797643091, "grad_norm": 0.17878587543964386, "learning_rate": 4.936234590689628e-06, "loss": 1.075, "step": 8810 }, { "epoch": 0.06384503463701709, "grad_norm": 0.1824096143245697, "learning_rate": 4.936162204029042e-06, "loss": 1.0706, "step": 8820 }, { "epoch": 0.06391742129760328, "grad_norm": 0.20461733639240265, "learning_rate": 4.936089817368456e-06, "loss": 1.0816, "step": 8830 }, { "epoch": 0.06398980795818947, "grad_norm": 0.17579731345176697, "learning_rate": 4.936017430707869e-06, "loss": 1.0805, "step": 8840 }, { "epoch": 0.06406219461877566, "grad_norm": 0.18281228840351105, "learning_rate": 4.935945044047283e-06, "loss": 1.0857, "step": 8850 }, { "epoch": 0.06413458127936184, "grad_norm": 0.21296283602714539, "learning_rate": 4.9358726573866975e-06, "loss": 1.098, "step": 8860 }, { "epoch": 0.06420696793994803, "grad_norm": 0.28200826048851013, "learning_rate": 4.935800270726111e-06, "loss": 1.0917, "step": 8870 }, { "epoch": 0.06427935460053422, "grad_norm": 0.1753077208995819, "learning_rate": 4.935727884065525e-06, "loss": 1.0908, "step": 8880 }, { "epoch": 0.06435174126112041, "grad_norm": 0.17760993540287018, "learning_rate": 4.935655497404938e-06, "loss": 1.0828, "step": 8890 }, { "epoch": 0.06442412792170658, "grad_norm": 0.1909879595041275, "learning_rate": 4.935583110744353e-06, "loss": 1.0867, "step": 8900 }, { "epoch": 0.06449651458229277, "grad_norm": 0.19046929478645325, "learning_rate": 4.9355107240837664e-06, "loss": 1.0913, "step": 8910 }, { "epoch": 0.06456890124287896, "grad_norm": 0.19228294491767883, "learning_rate": 4.93543833742318e-06, "loss": 1.0848, "step": 8920 }, { "epoch": 0.06464128790346516, "grad_norm": 0.1879161149263382, "learning_rate": 4.935365950762594e-06, "loss": 1.0996, "step": 8930 }, { "epoch": 0.06471367456405133, "grad_norm": 0.18242213129997253, "learning_rate": 4.935293564102008e-06, "loss": 1.0873, "step": 8940 }, { "epoch": 0.06478606122463752, "grad_norm": 0.19306018948554993, "learning_rate": 4.935221177441422e-06, "loss": 1.092, "step": 8950 }, { "epoch": 0.06485844788522371, "grad_norm": 0.18336234986782074, "learning_rate": 4.935148790780835e-06, "loss": 1.1006, "step": 8960 }, { "epoch": 0.0649308345458099, "grad_norm": 0.1992163509130478, "learning_rate": 4.935076404120249e-06, "loss": 1.0969, "step": 8970 }, { "epoch": 0.06500322120639608, "grad_norm": 0.187247171998024, "learning_rate": 4.9350040174596634e-06, "loss": 1.0934, "step": 8980 }, { "epoch": 0.06507560786698227, "grad_norm": 0.35581186413764954, "learning_rate": 4.934931630799077e-06, "loss": 1.0897, "step": 8990 }, { "epoch": 0.06514799452756846, "grad_norm": 0.18312859535217285, "learning_rate": 4.934859244138491e-06, "loss": 1.0841, "step": 9000 }, { "epoch": 0.06522038118815465, "grad_norm": 0.18880075216293335, "learning_rate": 4.934786857477904e-06, "loss": 1.0709, "step": 9010 }, { "epoch": 0.06529276784874083, "grad_norm": 0.1736619919538498, "learning_rate": 4.934714470817319e-06, "loss": 1.0891, "step": 9020 }, { "epoch": 0.06536515450932702, "grad_norm": 0.1821936070919037, "learning_rate": 4.934642084156732e-06, "loss": 1.0867, "step": 9030 }, { "epoch": 0.06543754116991321, "grad_norm": 0.16732348501682281, "learning_rate": 4.934569697496146e-06, "loss": 1.0913, "step": 9040 }, { "epoch": 0.0655099278304994, "grad_norm": 0.17859619855880737, "learning_rate": 4.93449731083556e-06, "loss": 1.0942, "step": 9050 }, { "epoch": 0.06558231449108558, "grad_norm": 0.1915702074766159, "learning_rate": 4.934424924174974e-06, "loss": 1.0917, "step": 9060 }, { "epoch": 0.06565470115167177, "grad_norm": 0.17785701155662537, "learning_rate": 4.934352537514388e-06, "loss": 1.0749, "step": 9070 }, { "epoch": 0.06572708781225796, "grad_norm": 0.17183399200439453, "learning_rate": 4.934280150853801e-06, "loss": 1.0896, "step": 9080 }, { "epoch": 0.06579947447284415, "grad_norm": 0.16996833682060242, "learning_rate": 4.934207764193215e-06, "loss": 1.0842, "step": 9090 }, { "epoch": 0.06587186113343033, "grad_norm": 0.17851723730564117, "learning_rate": 4.934135377532629e-06, "loss": 1.0824, "step": 9100 }, { "epoch": 0.06594424779401652, "grad_norm": 0.18523283302783966, "learning_rate": 4.934062990872043e-06, "loss": 1.081, "step": 9110 }, { "epoch": 0.06601663445460271, "grad_norm": 0.1726391464471817, "learning_rate": 4.933990604211457e-06, "loss": 1.0866, "step": 9120 }, { "epoch": 0.0660890211151889, "grad_norm": 0.18292652070522308, "learning_rate": 4.93391821755087e-06, "loss": 1.0933, "step": 9130 }, { "epoch": 0.06616140777577507, "grad_norm": 0.18601487576961517, "learning_rate": 4.933845830890284e-06, "loss": 1.0834, "step": 9140 }, { "epoch": 0.06623379443636127, "grad_norm": 0.16572198271751404, "learning_rate": 4.9337734442296974e-06, "loss": 1.0698, "step": 9150 }, { "epoch": 0.06630618109694746, "grad_norm": 0.18648765981197357, "learning_rate": 4.933701057569111e-06, "loss": 1.0965, "step": 9160 }, { "epoch": 0.06637856775753365, "grad_norm": 0.1860000491142273, "learning_rate": 4.9336286709085255e-06, "loss": 1.0782, "step": 9170 }, { "epoch": 0.06645095441811982, "grad_norm": 0.2106804996728897, "learning_rate": 4.933556284247939e-06, "loss": 1.0899, "step": 9180 }, { "epoch": 0.06652334107870601, "grad_norm": 0.16858477890491486, "learning_rate": 4.933483897587353e-06, "loss": 1.0902, "step": 9190 }, { "epoch": 0.0665957277392922, "grad_norm": 0.17290134727954865, "learning_rate": 4.933411510926766e-06, "loss": 1.0991, "step": 9200 }, { "epoch": 0.0666681143998784, "grad_norm": 0.18629100918769836, "learning_rate": 4.933339124266181e-06, "loss": 1.0849, "step": 9210 }, { "epoch": 0.06674050106046457, "grad_norm": 0.18276071548461914, "learning_rate": 4.9332667376055945e-06, "loss": 1.08, "step": 9220 }, { "epoch": 0.06681288772105076, "grad_norm": 0.2082335501909256, "learning_rate": 4.933194350945008e-06, "loss": 1.0747, "step": 9230 }, { "epoch": 0.06688527438163695, "grad_norm": 0.20962758362293243, "learning_rate": 4.933121964284422e-06, "loss": 1.0784, "step": 9240 }, { "epoch": 0.06695766104222314, "grad_norm": 0.20550638437271118, "learning_rate": 4.933049577623836e-06, "loss": 1.0856, "step": 9250 }, { "epoch": 0.06703004770280932, "grad_norm": 0.17833620309829712, "learning_rate": 4.93297719096325e-06, "loss": 1.0797, "step": 9260 }, { "epoch": 0.06710243436339551, "grad_norm": 0.17725925147533417, "learning_rate": 4.932904804302663e-06, "loss": 1.0872, "step": 9270 }, { "epoch": 0.0671748210239817, "grad_norm": 0.1763269156217575, "learning_rate": 4.932832417642077e-06, "loss": 1.0956, "step": 9280 }, { "epoch": 0.06724720768456789, "grad_norm": 0.17126107215881348, "learning_rate": 4.932760030981491e-06, "loss": 1.0727, "step": 9290 }, { "epoch": 0.06731959434515407, "grad_norm": 0.16544747352600098, "learning_rate": 4.932687644320905e-06, "loss": 1.0782, "step": 9300 }, { "epoch": 0.06739198100574026, "grad_norm": 0.175096794962883, "learning_rate": 4.932615257660319e-06, "loss": 1.0884, "step": 9310 }, { "epoch": 0.06746436766632645, "grad_norm": 0.1752748042345047, "learning_rate": 4.932542870999732e-06, "loss": 1.0963, "step": 9320 }, { "epoch": 0.06753675432691264, "grad_norm": 0.17110379040241241, "learning_rate": 4.932470484339146e-06, "loss": 1.1028, "step": 9330 }, { "epoch": 0.06760914098749882, "grad_norm": 0.20907478034496307, "learning_rate": 4.93239809767856e-06, "loss": 1.0764, "step": 9340 }, { "epoch": 0.06768152764808501, "grad_norm": 0.19506807625293732, "learning_rate": 4.932325711017974e-06, "loss": 1.065, "step": 9350 }, { "epoch": 0.0677539143086712, "grad_norm": 0.1756713092327118, "learning_rate": 4.932253324357388e-06, "loss": 1.1012, "step": 9360 }, { "epoch": 0.06782630096925739, "grad_norm": 0.18884608149528503, "learning_rate": 4.932180937696801e-06, "loss": 1.0739, "step": 9370 }, { "epoch": 0.06789868762984358, "grad_norm": 0.19393788278102875, "learning_rate": 4.932108551036216e-06, "loss": 1.0933, "step": 9380 }, { "epoch": 0.06797107429042976, "grad_norm": 0.18844221532344818, "learning_rate": 4.932036164375629e-06, "loss": 1.0796, "step": 9390 }, { "epoch": 0.06804346095101595, "grad_norm": 0.17534899711608887, "learning_rate": 4.931963777715043e-06, "loss": 1.0807, "step": 9400 }, { "epoch": 0.06811584761160214, "grad_norm": 0.1886800080537796, "learning_rate": 4.9318913910544566e-06, "loss": 1.0834, "step": 9410 }, { "epoch": 0.06818823427218833, "grad_norm": 0.20587556064128876, "learning_rate": 4.931819004393871e-06, "loss": 1.1054, "step": 9420 }, { "epoch": 0.0682606209327745, "grad_norm": 0.18019132316112518, "learning_rate": 4.931746617733285e-06, "loss": 1.076, "step": 9430 }, { "epoch": 0.0683330075933607, "grad_norm": 0.18654106557369232, "learning_rate": 4.931674231072698e-06, "loss": 1.0768, "step": 9440 }, { "epoch": 0.06840539425394689, "grad_norm": 0.1790568232536316, "learning_rate": 4.931601844412112e-06, "loss": 1.0751, "step": 9450 }, { "epoch": 0.06847778091453308, "grad_norm": 0.17355284094810486, "learning_rate": 4.931529457751526e-06, "loss": 1.0755, "step": 9460 }, { "epoch": 0.06855016757511925, "grad_norm": 0.18090367317199707, "learning_rate": 4.93145707109094e-06, "loss": 1.0927, "step": 9470 }, { "epoch": 0.06862255423570544, "grad_norm": 0.18429531157016754, "learning_rate": 4.9313846844303536e-06, "loss": 1.0857, "step": 9480 }, { "epoch": 0.06869494089629163, "grad_norm": 0.16996966302394867, "learning_rate": 4.931312297769767e-06, "loss": 1.0955, "step": 9490 }, { "epoch": 0.06876732755687782, "grad_norm": 0.17073926329612732, "learning_rate": 4.931239911109182e-06, "loss": 1.0934, "step": 9500 }, { "epoch": 0.068839714217464, "grad_norm": 0.19295451045036316, "learning_rate": 4.931167524448595e-06, "loss": 1.0843, "step": 9510 }, { "epoch": 0.06891210087805019, "grad_norm": 0.19370391964912415, "learning_rate": 4.931095137788009e-06, "loss": 1.0939, "step": 9520 }, { "epoch": 0.06898448753863638, "grad_norm": 0.18708017468452454, "learning_rate": 4.9310227511274225e-06, "loss": 1.0761, "step": 9530 }, { "epoch": 0.06905687419922257, "grad_norm": 0.16977204382419586, "learning_rate": 4.930950364466837e-06, "loss": 1.0833, "step": 9540 }, { "epoch": 0.06912926085980875, "grad_norm": 0.1928476095199585, "learning_rate": 4.9308779778062506e-06, "loss": 1.0773, "step": 9550 }, { "epoch": 0.06920164752039494, "grad_norm": 0.19466859102249146, "learning_rate": 4.930805591145664e-06, "loss": 1.07, "step": 9560 }, { "epoch": 0.06927403418098113, "grad_norm": 0.18574132025241852, "learning_rate": 4.930733204485078e-06, "loss": 1.0887, "step": 9570 }, { "epoch": 0.06934642084156732, "grad_norm": 0.17645835876464844, "learning_rate": 4.930660817824492e-06, "loss": 1.0841, "step": 9580 }, { "epoch": 0.0694188075021535, "grad_norm": 0.17621047794818878, "learning_rate": 4.930588431163906e-06, "loss": 1.0909, "step": 9590 }, { "epoch": 0.06949119416273969, "grad_norm": 0.1808473765850067, "learning_rate": 4.9305160445033195e-06, "loss": 1.0885, "step": 9600 }, { "epoch": 0.06956358082332588, "grad_norm": 0.1906941682100296, "learning_rate": 4.930443657842733e-06, "loss": 1.0699, "step": 9610 }, { "epoch": 0.06963596748391207, "grad_norm": 0.18440672755241394, "learning_rate": 4.9303712711821476e-06, "loss": 1.0753, "step": 9620 }, { "epoch": 0.06970835414449825, "grad_norm": 0.31207403540611267, "learning_rate": 4.930298884521561e-06, "loss": 1.0821, "step": 9630 }, { "epoch": 0.06978074080508444, "grad_norm": 0.178600013256073, "learning_rate": 4.930226497860975e-06, "loss": 1.0827, "step": 9640 }, { "epoch": 0.06985312746567063, "grad_norm": 0.1899259239435196, "learning_rate": 4.930154111200388e-06, "loss": 1.0673, "step": 9650 }, { "epoch": 0.06992551412625682, "grad_norm": 0.1868361085653305, "learning_rate": 4.930081724539802e-06, "loss": 1.0838, "step": 9660 }, { "epoch": 0.069997900786843, "grad_norm": 0.19484713673591614, "learning_rate": 4.930009337879216e-06, "loss": 1.0806, "step": 9670 }, { "epoch": 0.07007028744742919, "grad_norm": 0.1849289834499359, "learning_rate": 4.929936951218629e-06, "loss": 1.0847, "step": 9680 }, { "epoch": 0.07014267410801538, "grad_norm": 0.1799035519361496, "learning_rate": 4.929864564558044e-06, "loss": 1.0791, "step": 9690 }, { "epoch": 0.07021506076860157, "grad_norm": 0.19453303515911102, "learning_rate": 4.929792177897457e-06, "loss": 1.0923, "step": 9700 }, { "epoch": 0.07028744742918774, "grad_norm": 0.20945830643177032, "learning_rate": 4.929719791236871e-06, "loss": 1.1022, "step": 9710 }, { "epoch": 0.07035983408977393, "grad_norm": 0.1825156956911087, "learning_rate": 4.929647404576285e-06, "loss": 1.0819, "step": 9720 }, { "epoch": 0.07043222075036012, "grad_norm": 0.18250644207000732, "learning_rate": 4.929575017915699e-06, "loss": 1.0681, "step": 9730 }, { "epoch": 0.07050460741094632, "grad_norm": 0.20941105484962463, "learning_rate": 4.929502631255113e-06, "loss": 1.0651, "step": 9740 }, { "epoch": 0.07057699407153249, "grad_norm": 0.1895821988582611, "learning_rate": 4.929430244594526e-06, "loss": 1.0919, "step": 9750 }, { "epoch": 0.07064938073211868, "grad_norm": 0.16658397018909454, "learning_rate": 4.92935785793394e-06, "loss": 1.0787, "step": 9760 }, { "epoch": 0.07072176739270487, "grad_norm": 0.19257685542106628, "learning_rate": 4.929285471273354e-06, "loss": 1.0788, "step": 9770 }, { "epoch": 0.07079415405329106, "grad_norm": 0.17767976224422455, "learning_rate": 4.929213084612768e-06, "loss": 1.0762, "step": 9780 }, { "epoch": 0.07086654071387724, "grad_norm": 0.2234843373298645, "learning_rate": 4.929140697952182e-06, "loss": 1.0751, "step": 9790 }, { "epoch": 0.07093892737446343, "grad_norm": 0.2335948348045349, "learning_rate": 4.929068311291595e-06, "loss": 1.0819, "step": 9800 }, { "epoch": 0.07101131403504962, "grad_norm": 0.1768556535243988, "learning_rate": 4.92899592463101e-06, "loss": 1.0837, "step": 9810 }, { "epoch": 0.07108370069563581, "grad_norm": 0.17308947443962097, "learning_rate": 4.928923537970423e-06, "loss": 1.074, "step": 9820 }, { "epoch": 0.07115608735622199, "grad_norm": 0.18633995950222015, "learning_rate": 4.928851151309837e-06, "loss": 1.0755, "step": 9830 }, { "epoch": 0.07122847401680818, "grad_norm": 0.19569677114486694, "learning_rate": 4.9287787646492505e-06, "loss": 1.0815, "step": 9840 }, { "epoch": 0.07130086067739437, "grad_norm": 0.19831666350364685, "learning_rate": 4.928706377988665e-06, "loss": 1.084, "step": 9850 }, { "epoch": 0.07137324733798056, "grad_norm": 0.18258638679981232, "learning_rate": 4.928633991328079e-06, "loss": 1.0666, "step": 9860 }, { "epoch": 0.07144563399856674, "grad_norm": 0.16318519413471222, "learning_rate": 4.928561604667492e-06, "loss": 1.0754, "step": 9870 }, { "epoch": 0.07151802065915293, "grad_norm": 0.17054226994514465, "learning_rate": 4.928489218006906e-06, "loss": 1.0815, "step": 9880 }, { "epoch": 0.07159040731973912, "grad_norm": 0.1713060736656189, "learning_rate": 4.92841683134632e-06, "loss": 1.0822, "step": 9890 }, { "epoch": 0.07166279398032531, "grad_norm": 0.17956681549549103, "learning_rate": 4.928344444685734e-06, "loss": 1.0747, "step": 9900 }, { "epoch": 0.07173518064091149, "grad_norm": 0.19176030158996582, "learning_rate": 4.9282720580251475e-06, "loss": 1.074, "step": 9910 }, { "epoch": 0.07180756730149768, "grad_norm": 0.17614677548408508, "learning_rate": 4.928199671364561e-06, "loss": 1.0894, "step": 9920 }, { "epoch": 0.07187995396208387, "grad_norm": 0.1864258348941803, "learning_rate": 4.928127284703975e-06, "loss": 1.0826, "step": 9930 }, { "epoch": 0.07195234062267006, "grad_norm": 0.18494221568107605, "learning_rate": 4.928054898043389e-06, "loss": 1.0613, "step": 9940 }, { "epoch": 0.07202472728325623, "grad_norm": 0.17159555852413177, "learning_rate": 4.927982511382803e-06, "loss": 1.073, "step": 9950 }, { "epoch": 0.07209711394384243, "grad_norm": 0.18387450277805328, "learning_rate": 4.9279101247222165e-06, "loss": 1.0729, "step": 9960 }, { "epoch": 0.07216950060442862, "grad_norm": 0.1786179393529892, "learning_rate": 4.92783773806163e-06, "loss": 1.0867, "step": 9970 }, { "epoch": 0.0722418872650148, "grad_norm": 0.1791893094778061, "learning_rate": 4.9277653514010445e-06, "loss": 1.0783, "step": 9980 }, { "epoch": 0.072314273925601, "grad_norm": 0.18308618664741516, "learning_rate": 4.927692964740458e-06, "loss": 1.0667, "step": 9990 }, { "epoch": 0.07238666058618717, "grad_norm": 0.18215444684028625, "learning_rate": 4.927620578079872e-06, "loss": 1.0804, "step": 10000 }, { "epoch": 0.07245904724677336, "grad_norm": 0.1691853404045105, "learning_rate": 4.927548191419285e-06, "loss": 1.0837, "step": 10010 }, { "epoch": 0.07253143390735955, "grad_norm": 0.1819877028465271, "learning_rate": 4.9274758047587e-06, "loss": 1.0799, "step": 10020 }, { "epoch": 0.07260382056794575, "grad_norm": 0.17936040461063385, "learning_rate": 4.9274034180981135e-06, "loss": 1.0675, "step": 10030 }, { "epoch": 0.07267620722853192, "grad_norm": 0.17777326703071594, "learning_rate": 4.927331031437527e-06, "loss": 1.0674, "step": 10040 }, { "epoch": 0.07274859388911811, "grad_norm": 0.22706569731235504, "learning_rate": 4.927258644776941e-06, "loss": 1.0901, "step": 10050 }, { "epoch": 0.0728209805497043, "grad_norm": 0.17453064024448395, "learning_rate": 4.927186258116355e-06, "loss": 1.0665, "step": 10060 }, { "epoch": 0.0728933672102905, "grad_norm": 0.1731829047203064, "learning_rate": 4.927113871455769e-06, "loss": 1.0734, "step": 10070 }, { "epoch": 0.07296575387087667, "grad_norm": 0.1648159921169281, "learning_rate": 4.927041484795182e-06, "loss": 1.0762, "step": 10080 }, { "epoch": 0.07303814053146286, "grad_norm": 0.19320763647556305, "learning_rate": 4.926969098134596e-06, "loss": 1.0672, "step": 10090 }, { "epoch": 0.07311052719204905, "grad_norm": 0.17869828641414642, "learning_rate": 4.9268967114740105e-06, "loss": 1.0748, "step": 10100 }, { "epoch": 0.07318291385263524, "grad_norm": 0.17317935824394226, "learning_rate": 4.926824324813424e-06, "loss": 1.0725, "step": 10110 }, { "epoch": 0.07325530051322142, "grad_norm": 0.18840764462947845, "learning_rate": 4.926751938152838e-06, "loss": 1.0902, "step": 10120 }, { "epoch": 0.07332768717380761, "grad_norm": 0.18961584568023682, "learning_rate": 4.926679551492251e-06, "loss": 1.0677, "step": 10130 }, { "epoch": 0.0734000738343938, "grad_norm": 0.1739116609096527, "learning_rate": 4.926607164831666e-06, "loss": 1.0744, "step": 10140 }, { "epoch": 0.07347246049497999, "grad_norm": 0.19770686328411102, "learning_rate": 4.926534778171079e-06, "loss": 1.0968, "step": 10150 }, { "epoch": 0.07354484715556617, "grad_norm": 0.17552542686462402, "learning_rate": 4.926462391510493e-06, "loss": 1.0851, "step": 10160 }, { "epoch": 0.07361723381615236, "grad_norm": 0.18830542266368866, "learning_rate": 4.926390004849907e-06, "loss": 1.0855, "step": 10170 }, { "epoch": 0.07368962047673855, "grad_norm": 0.2055501937866211, "learning_rate": 4.926317618189321e-06, "loss": 1.0803, "step": 10180 }, { "epoch": 0.07376200713732474, "grad_norm": 0.18813519179821014, "learning_rate": 4.926245231528734e-06, "loss": 1.071, "step": 10190 }, { "epoch": 0.07383439379791092, "grad_norm": 0.2169976532459259, "learning_rate": 4.9261728448681475e-06, "loss": 1.0872, "step": 10200 }, { "epoch": 0.0739067804584971, "grad_norm": 0.2091887891292572, "learning_rate": 4.926100458207562e-06, "loss": 1.0811, "step": 10210 }, { "epoch": 0.0739791671190833, "grad_norm": 0.17298966646194458, "learning_rate": 4.9260280715469756e-06, "loss": 1.0846, "step": 10220 }, { "epoch": 0.07405155377966949, "grad_norm": 0.19214802980422974, "learning_rate": 4.925955684886389e-06, "loss": 1.0804, "step": 10230 }, { "epoch": 0.07412394044025566, "grad_norm": 0.18159951269626617, "learning_rate": 4.925883298225803e-06, "loss": 1.0818, "step": 10240 }, { "epoch": 0.07419632710084186, "grad_norm": 0.2073182910680771, "learning_rate": 4.925810911565217e-06, "loss": 1.0717, "step": 10250 }, { "epoch": 0.07426871376142805, "grad_norm": 0.17683623731136322, "learning_rate": 4.925738524904631e-06, "loss": 1.0691, "step": 10260 }, { "epoch": 0.07434110042201424, "grad_norm": 0.17853355407714844, "learning_rate": 4.9256661382440445e-06, "loss": 1.0807, "step": 10270 }, { "epoch": 0.07441348708260041, "grad_norm": 0.1861315667629242, "learning_rate": 4.925593751583458e-06, "loss": 1.075, "step": 10280 }, { "epoch": 0.0744858737431866, "grad_norm": 0.22674742341041565, "learning_rate": 4.9255213649228726e-06, "loss": 1.0824, "step": 10290 }, { "epoch": 0.0745582604037728, "grad_norm": 0.1941600888967514, "learning_rate": 4.925448978262286e-06, "loss": 1.0663, "step": 10300 }, { "epoch": 0.07463064706435898, "grad_norm": 0.1759897917509079, "learning_rate": 4.9253765916017e-06, "loss": 1.0761, "step": 10310 }, { "epoch": 0.07470303372494516, "grad_norm": 0.1744421124458313, "learning_rate": 4.925304204941113e-06, "loss": 1.0705, "step": 10320 }, { "epoch": 0.07477542038553135, "grad_norm": 0.1926373541355133, "learning_rate": 4.925231818280528e-06, "loss": 1.0714, "step": 10330 }, { "epoch": 0.07484780704611754, "grad_norm": 0.19619183242321014, "learning_rate": 4.9251594316199415e-06, "loss": 1.0728, "step": 10340 }, { "epoch": 0.07492019370670373, "grad_norm": 0.23807357251644135, "learning_rate": 4.925087044959355e-06, "loss": 1.0772, "step": 10350 }, { "epoch": 0.07499258036728991, "grad_norm": 0.1988050788640976, "learning_rate": 4.925014658298769e-06, "loss": 1.0829, "step": 10360 }, { "epoch": 0.0750649670278761, "grad_norm": 0.16706973314285278, "learning_rate": 4.924942271638183e-06, "loss": 1.0642, "step": 10370 }, { "epoch": 0.07513735368846229, "grad_norm": 0.19122160971164703, "learning_rate": 4.924869884977597e-06, "loss": 1.0973, "step": 10380 }, { "epoch": 0.07520974034904848, "grad_norm": 0.1846979856491089, "learning_rate": 4.92479749831701e-06, "loss": 1.0775, "step": 10390 }, { "epoch": 0.07528212700963466, "grad_norm": 0.17939844727516174, "learning_rate": 4.924725111656424e-06, "loss": 1.0644, "step": 10400 }, { "epoch": 0.07535451367022085, "grad_norm": 0.17780403792858124, "learning_rate": 4.9246527249958385e-06, "loss": 1.0655, "step": 10410 }, { "epoch": 0.07542690033080704, "grad_norm": 0.2274876832962036, "learning_rate": 4.924580338335252e-06, "loss": 1.0866, "step": 10420 }, { "epoch": 0.07549928699139323, "grad_norm": 0.23748233914375305, "learning_rate": 4.924507951674666e-06, "loss": 1.0637, "step": 10430 }, { "epoch": 0.0755716736519794, "grad_norm": 0.22824735939502716, "learning_rate": 4.924435565014079e-06, "loss": 1.0908, "step": 10440 }, { "epoch": 0.0756440603125656, "grad_norm": 0.1775079220533371, "learning_rate": 4.924363178353494e-06, "loss": 1.0736, "step": 10450 }, { "epoch": 0.07571644697315179, "grad_norm": 0.1669524759054184, "learning_rate": 4.9242907916929074e-06, "loss": 1.0772, "step": 10460 }, { "epoch": 0.07578883363373798, "grad_norm": 0.19081354141235352, "learning_rate": 4.924218405032321e-06, "loss": 1.0668, "step": 10470 }, { "epoch": 0.07586122029432416, "grad_norm": 0.18212305009365082, "learning_rate": 4.924146018371735e-06, "loss": 1.0824, "step": 10480 }, { "epoch": 0.07593360695491035, "grad_norm": 0.19392240047454834, "learning_rate": 4.924073631711149e-06, "loss": 1.086, "step": 10490 }, { "epoch": 0.07600599361549654, "grad_norm": 0.17266540229320526, "learning_rate": 4.924001245050563e-06, "loss": 1.0713, "step": 10500 }, { "epoch": 0.07607838027608273, "grad_norm": 0.16641221940517426, "learning_rate": 4.923928858389976e-06, "loss": 1.0734, "step": 10510 }, { "epoch": 0.0761507669366689, "grad_norm": 0.19562187790870667, "learning_rate": 4.92385647172939e-06, "loss": 1.0696, "step": 10520 }, { "epoch": 0.0762231535972551, "grad_norm": 0.1799498200416565, "learning_rate": 4.923784085068804e-06, "loss": 1.0691, "step": 10530 }, { "epoch": 0.07629554025784129, "grad_norm": 0.21454447507858276, "learning_rate": 4.923711698408218e-06, "loss": 1.0651, "step": 10540 }, { "epoch": 0.07636792691842748, "grad_norm": 0.16438615322113037, "learning_rate": 4.923639311747632e-06, "loss": 1.0758, "step": 10550 }, { "epoch": 0.07644031357901367, "grad_norm": 0.17331558465957642, "learning_rate": 4.923566925087045e-06, "loss": 1.092, "step": 10560 }, { "epoch": 0.07651270023959984, "grad_norm": 0.17129570245742798, "learning_rate": 4.923494538426459e-06, "loss": 1.0644, "step": 10570 }, { "epoch": 0.07658508690018603, "grad_norm": 0.17949286103248596, "learning_rate": 4.923422151765873e-06, "loss": 1.0765, "step": 10580 }, { "epoch": 0.07665747356077222, "grad_norm": 0.20298872888088226, "learning_rate": 4.923349765105287e-06, "loss": 1.0813, "step": 10590 }, { "epoch": 0.07672986022135841, "grad_norm": 0.17841701209545135, "learning_rate": 4.923277378444701e-06, "loss": 1.0763, "step": 10600 }, { "epoch": 0.07680224688194459, "grad_norm": 0.1926995813846588, "learning_rate": 4.923204991784114e-06, "loss": 1.0666, "step": 10610 }, { "epoch": 0.07687463354253078, "grad_norm": 0.20982502400875092, "learning_rate": 4.923132605123529e-06, "loss": 1.0674, "step": 10620 }, { "epoch": 0.07694702020311697, "grad_norm": 0.18079523742198944, "learning_rate": 4.923060218462942e-06, "loss": 1.0782, "step": 10630 }, { "epoch": 0.07701940686370316, "grad_norm": 0.18885524570941925, "learning_rate": 4.922987831802356e-06, "loss": 1.0675, "step": 10640 }, { "epoch": 0.07709179352428934, "grad_norm": 0.1845076084136963, "learning_rate": 4.9229154451417695e-06, "loss": 1.0691, "step": 10650 }, { "epoch": 0.07716418018487553, "grad_norm": 0.18656429648399353, "learning_rate": 4.922843058481184e-06, "loss": 1.0698, "step": 10660 }, { "epoch": 0.07723656684546172, "grad_norm": 0.1843183934688568, "learning_rate": 4.922770671820598e-06, "loss": 1.0685, "step": 10670 }, { "epoch": 0.07730895350604791, "grad_norm": 0.22134332358837128, "learning_rate": 4.922698285160011e-06, "loss": 1.0716, "step": 10680 }, { "epoch": 0.07738134016663409, "grad_norm": 0.23194286227226257, "learning_rate": 4.922625898499425e-06, "loss": 1.0655, "step": 10690 }, { "epoch": 0.07745372682722028, "grad_norm": 0.18523964285850525, "learning_rate": 4.922553511838839e-06, "loss": 1.0793, "step": 10700 }, { "epoch": 0.07752611348780647, "grad_norm": 0.19442328810691833, "learning_rate": 4.922481125178253e-06, "loss": 1.0722, "step": 10710 }, { "epoch": 0.07759850014839266, "grad_norm": 0.17840822041034698, "learning_rate": 4.922408738517666e-06, "loss": 1.0698, "step": 10720 }, { "epoch": 0.07767088680897884, "grad_norm": 0.1624884307384491, "learning_rate": 4.92233635185708e-06, "loss": 1.0767, "step": 10730 }, { "epoch": 0.07774327346956503, "grad_norm": 0.1825413703918457, "learning_rate": 4.922263965196494e-06, "loss": 1.0707, "step": 10740 }, { "epoch": 0.07781566013015122, "grad_norm": 0.18537434935569763, "learning_rate": 4.922191578535907e-06, "loss": 1.0802, "step": 10750 }, { "epoch": 0.07788804679073741, "grad_norm": 0.16724461317062378, "learning_rate": 4.922119191875321e-06, "loss": 1.0879, "step": 10760 }, { "epoch": 0.07796043345132359, "grad_norm": 0.20884546637535095, "learning_rate": 4.9220468052147355e-06, "loss": 1.0533, "step": 10770 }, { "epoch": 0.07803282011190978, "grad_norm": 0.19194482266902924, "learning_rate": 4.921974418554149e-06, "loss": 1.083, "step": 10780 }, { "epoch": 0.07810520677249597, "grad_norm": 0.17105208337306976, "learning_rate": 4.921902031893563e-06, "loss": 1.0832, "step": 10790 }, { "epoch": 0.07817759343308216, "grad_norm": 0.18788471817970276, "learning_rate": 4.921829645232976e-06, "loss": 1.0891, "step": 10800 }, { "epoch": 0.07824998009366833, "grad_norm": 0.20326700806617737, "learning_rate": 4.921757258572391e-06, "loss": 1.0925, "step": 10810 }, { "epoch": 0.07832236675425452, "grad_norm": 0.17056359350681305, "learning_rate": 4.921684871911804e-06, "loss": 1.06, "step": 10820 }, { "epoch": 0.07839475341484071, "grad_norm": 0.17205092310905457, "learning_rate": 4.921612485251218e-06, "loss": 1.0682, "step": 10830 }, { "epoch": 0.0784671400754269, "grad_norm": 0.17760831117630005, "learning_rate": 4.921540098590632e-06, "loss": 1.079, "step": 10840 }, { "epoch": 0.07853952673601308, "grad_norm": 0.2105284333229065, "learning_rate": 4.921467711930046e-06, "loss": 1.0787, "step": 10850 }, { "epoch": 0.07861191339659927, "grad_norm": 0.1877516359090805, "learning_rate": 4.92139532526946e-06, "loss": 1.0756, "step": 10860 }, { "epoch": 0.07868430005718546, "grad_norm": 0.15930220484733582, "learning_rate": 4.921322938608873e-06, "loss": 1.0785, "step": 10870 }, { "epoch": 0.07875668671777165, "grad_norm": 0.19607755541801453, "learning_rate": 4.921250551948287e-06, "loss": 1.0908, "step": 10880 }, { "epoch": 0.07882907337835783, "grad_norm": 0.18164999783039093, "learning_rate": 4.921178165287701e-06, "loss": 1.049, "step": 10890 }, { "epoch": 0.07890146003894402, "grad_norm": 0.17521744966506958, "learning_rate": 4.921105778627115e-06, "loss": 1.0801, "step": 10900 }, { "epoch": 0.07897384669953021, "grad_norm": 0.1904889941215515, "learning_rate": 4.921033391966529e-06, "loss": 1.0697, "step": 10910 }, { "epoch": 0.0790462333601164, "grad_norm": 0.1861046850681305, "learning_rate": 4.920961005305942e-06, "loss": 1.0619, "step": 10920 }, { "epoch": 0.07911862002070258, "grad_norm": 0.18160459399223328, "learning_rate": 4.920888618645357e-06, "loss": 1.075, "step": 10930 }, { "epoch": 0.07919100668128877, "grad_norm": 0.1644423007965088, "learning_rate": 4.92081623198477e-06, "loss": 1.0704, "step": 10940 }, { "epoch": 0.07926339334187496, "grad_norm": 0.192913219332695, "learning_rate": 4.920743845324184e-06, "loss": 1.0905, "step": 10950 }, { "epoch": 0.07933578000246115, "grad_norm": 0.18490082025527954, "learning_rate": 4.9206714586635976e-06, "loss": 1.0704, "step": 10960 }, { "epoch": 0.07940816666304733, "grad_norm": 0.17639735341072083, "learning_rate": 4.920599072003012e-06, "loss": 1.0789, "step": 10970 }, { "epoch": 0.07948055332363352, "grad_norm": 0.1718800812959671, "learning_rate": 4.920526685342426e-06, "loss": 1.0849, "step": 10980 }, { "epoch": 0.07955293998421971, "grad_norm": 0.1801319122314453, "learning_rate": 4.920454298681839e-06, "loss": 1.0621, "step": 10990 }, { "epoch": 0.0796253266448059, "grad_norm": 0.17438428103923798, "learning_rate": 4.920381912021253e-06, "loss": 1.0694, "step": 11000 }, { "epoch": 0.07969771330539208, "grad_norm": 0.19011586904525757, "learning_rate": 4.920309525360667e-06, "loss": 1.0672, "step": 11010 }, { "epoch": 0.07977009996597827, "grad_norm": 0.17366407811641693, "learning_rate": 4.920237138700081e-06, "loss": 1.0692, "step": 11020 }, { "epoch": 0.07984248662656446, "grad_norm": 0.18017128109931946, "learning_rate": 4.9201647520394946e-06, "loss": 1.0839, "step": 11030 }, { "epoch": 0.07991487328715065, "grad_norm": 0.18107180297374725, "learning_rate": 4.920092365378908e-06, "loss": 1.0789, "step": 11040 }, { "epoch": 0.07998725994773682, "grad_norm": 0.1829778552055359, "learning_rate": 4.920019978718323e-06, "loss": 1.0716, "step": 11050 }, { "epoch": 0.08005964660832302, "grad_norm": 0.17643770575523376, "learning_rate": 4.919947592057736e-06, "loss": 1.0778, "step": 11060 }, { "epoch": 0.0801320332689092, "grad_norm": 0.17540425062179565, "learning_rate": 4.91987520539715e-06, "loss": 1.0685, "step": 11070 }, { "epoch": 0.0802044199294954, "grad_norm": 0.18910111486911774, "learning_rate": 4.9198028187365635e-06, "loss": 1.0639, "step": 11080 }, { "epoch": 0.08027680659008157, "grad_norm": 0.17153383791446686, "learning_rate": 4.919730432075978e-06, "loss": 1.0872, "step": 11090 }, { "epoch": 0.08034919325066776, "grad_norm": 0.16545794904232025, "learning_rate": 4.9196580454153916e-06, "loss": 1.0681, "step": 11100 }, { "epoch": 0.08042157991125395, "grad_norm": 0.19072282314300537, "learning_rate": 4.919585658754805e-06, "loss": 1.0723, "step": 11110 }, { "epoch": 0.08049396657184014, "grad_norm": 0.190969780087471, "learning_rate": 4.919513272094219e-06, "loss": 1.0853, "step": 11120 }, { "epoch": 0.08056635323242634, "grad_norm": 0.17297950387001038, "learning_rate": 4.919440885433633e-06, "loss": 1.071, "step": 11130 }, { "epoch": 0.08063873989301251, "grad_norm": 0.19058924913406372, "learning_rate": 4.919368498773047e-06, "loss": 1.0879, "step": 11140 }, { "epoch": 0.0807111265535987, "grad_norm": 0.19707483053207397, "learning_rate": 4.9192961121124605e-06, "loss": 1.0816, "step": 11150 }, { "epoch": 0.0807835132141849, "grad_norm": 0.1789708286523819, "learning_rate": 4.919223725451874e-06, "loss": 1.0836, "step": 11160 }, { "epoch": 0.08085589987477108, "grad_norm": 0.17154861986637115, "learning_rate": 4.919151338791288e-06, "loss": 1.0631, "step": 11170 }, { "epoch": 0.08092828653535726, "grad_norm": 0.1575436145067215, "learning_rate": 4.919078952130702e-06, "loss": 1.0772, "step": 11180 }, { "epoch": 0.08100067319594345, "grad_norm": 0.18095077574253082, "learning_rate": 4.919006565470116e-06, "loss": 1.0588, "step": 11190 }, { "epoch": 0.08107305985652964, "grad_norm": 0.18087738752365112, "learning_rate": 4.9189341788095294e-06, "loss": 1.0859, "step": 11200 }, { "epoch": 0.08114544651711583, "grad_norm": 0.41991138458251953, "learning_rate": 4.918861792148943e-06, "loss": 1.0671, "step": 11210 }, { "epoch": 0.08121783317770201, "grad_norm": 0.19493845105171204, "learning_rate": 4.9187894054883575e-06, "loss": 1.0691, "step": 11220 }, { "epoch": 0.0812902198382882, "grad_norm": 0.16449686884880066, "learning_rate": 4.918717018827771e-06, "loss": 1.0713, "step": 11230 }, { "epoch": 0.08136260649887439, "grad_norm": 0.16916941106319427, "learning_rate": 4.918644632167185e-06, "loss": 1.0779, "step": 11240 }, { "epoch": 0.08143499315946058, "grad_norm": 0.1702130287885666, "learning_rate": 4.918572245506598e-06, "loss": 1.0677, "step": 11250 }, { "epoch": 0.08150737982004676, "grad_norm": 0.1953752636909485, "learning_rate": 4.918499858846012e-06, "loss": 1.0766, "step": 11260 }, { "epoch": 0.08157976648063295, "grad_norm": 0.18306571245193481, "learning_rate": 4.918427472185426e-06, "loss": 1.0737, "step": 11270 }, { "epoch": 0.08165215314121914, "grad_norm": 0.15860667824745178, "learning_rate": 4.91835508552484e-06, "loss": 1.0552, "step": 11280 }, { "epoch": 0.08172453980180533, "grad_norm": 0.1841406524181366, "learning_rate": 4.918282698864254e-06, "loss": 1.0779, "step": 11290 }, { "epoch": 0.0817969264623915, "grad_norm": 0.22922413051128387, "learning_rate": 4.918210312203667e-06, "loss": 1.0727, "step": 11300 }, { "epoch": 0.0818693131229777, "grad_norm": 0.17833411693572998, "learning_rate": 4.918137925543081e-06, "loss": 1.0751, "step": 11310 }, { "epoch": 0.08194169978356389, "grad_norm": 0.1674196720123291, "learning_rate": 4.9180655388824945e-06, "loss": 1.0797, "step": 11320 }, { "epoch": 0.08201408644415008, "grad_norm": 0.18351003527641296, "learning_rate": 4.917993152221909e-06, "loss": 1.0893, "step": 11330 }, { "epoch": 0.08208647310473625, "grad_norm": 0.17393292486667633, "learning_rate": 4.917920765561323e-06, "loss": 1.073, "step": 11340 }, { "epoch": 0.08215885976532245, "grad_norm": 0.18493473529815674, "learning_rate": 4.917848378900736e-06, "loss": 1.0783, "step": 11350 }, { "epoch": 0.08223124642590864, "grad_norm": 0.18359433114528656, "learning_rate": 4.91777599224015e-06, "loss": 1.0538, "step": 11360 }, { "epoch": 0.08230363308649483, "grad_norm": 0.1751643419265747, "learning_rate": 4.917703605579564e-06, "loss": 1.0779, "step": 11370 }, { "epoch": 0.082376019747081, "grad_norm": 0.16881217062473297, "learning_rate": 4.917631218918978e-06, "loss": 1.0722, "step": 11380 }, { "epoch": 0.0824484064076672, "grad_norm": 0.19173048436641693, "learning_rate": 4.9175588322583915e-06, "loss": 1.0616, "step": 11390 }, { "epoch": 0.08252079306825338, "grad_norm": 0.1824052780866623, "learning_rate": 4.917486445597805e-06, "loss": 1.0691, "step": 11400 }, { "epoch": 0.08259317972883957, "grad_norm": 0.1803470402956009, "learning_rate": 4.91741405893722e-06, "loss": 1.0615, "step": 11410 }, { "epoch": 0.08266556638942575, "grad_norm": 0.18563689291477203, "learning_rate": 4.917341672276633e-06, "loss": 1.0701, "step": 11420 }, { "epoch": 0.08273795305001194, "grad_norm": 0.17787225544452667, "learning_rate": 4.917269285616047e-06, "loss": 1.0812, "step": 11430 }, { "epoch": 0.08281033971059813, "grad_norm": 0.1732112318277359, "learning_rate": 4.9171968989554605e-06, "loss": 1.0754, "step": 11440 }, { "epoch": 0.08288272637118432, "grad_norm": 0.19006459414958954, "learning_rate": 4.917124512294875e-06, "loss": 1.0577, "step": 11450 }, { "epoch": 0.0829551130317705, "grad_norm": 0.17567865550518036, "learning_rate": 4.9170521256342885e-06, "loss": 1.0775, "step": 11460 }, { "epoch": 0.08302749969235669, "grad_norm": 0.1794678121805191, "learning_rate": 4.916979738973702e-06, "loss": 1.0655, "step": 11470 }, { "epoch": 0.08309988635294288, "grad_norm": 0.20485854148864746, "learning_rate": 4.916907352313116e-06, "loss": 1.065, "step": 11480 }, { "epoch": 0.08317227301352907, "grad_norm": 0.2057936191558838, "learning_rate": 4.91683496565253e-06, "loss": 1.0769, "step": 11490 }, { "epoch": 0.08324465967411525, "grad_norm": 0.17958250641822815, "learning_rate": 4.916762578991944e-06, "loss": 1.0644, "step": 11500 }, { "epoch": 0.08331704633470144, "grad_norm": 0.16068291664123535, "learning_rate": 4.9166901923313575e-06, "loss": 1.0761, "step": 11510 }, { "epoch": 0.08338943299528763, "grad_norm": 0.18293248116970062, "learning_rate": 4.916617805670771e-06, "loss": 1.0632, "step": 11520 }, { "epoch": 0.08346181965587382, "grad_norm": 0.19636189937591553, "learning_rate": 4.9165454190101855e-06, "loss": 1.0827, "step": 11530 }, { "epoch": 0.08353420631646, "grad_norm": 0.18141409754753113, "learning_rate": 4.916473032349599e-06, "loss": 1.077, "step": 11540 }, { "epoch": 0.08360659297704619, "grad_norm": 0.18197061121463776, "learning_rate": 4.916400645689013e-06, "loss": 1.0639, "step": 11550 }, { "epoch": 0.08367897963763238, "grad_norm": 0.18665997684001923, "learning_rate": 4.916328259028426e-06, "loss": 1.0709, "step": 11560 }, { "epoch": 0.08375136629821857, "grad_norm": 0.18202146887779236, "learning_rate": 4.916255872367841e-06, "loss": 1.0797, "step": 11570 }, { "epoch": 0.08382375295880475, "grad_norm": 0.16882570087909698, "learning_rate": 4.9161834857072545e-06, "loss": 1.0683, "step": 11580 }, { "epoch": 0.08389613961939094, "grad_norm": 0.17632512748241425, "learning_rate": 4.916111099046668e-06, "loss": 1.0526, "step": 11590 }, { "epoch": 0.08396852627997713, "grad_norm": 0.1826392412185669, "learning_rate": 4.916038712386082e-06, "loss": 1.0694, "step": 11600 }, { "epoch": 0.08404091294056332, "grad_norm": 0.17934630811214447, "learning_rate": 4.915966325725496e-06, "loss": 1.0621, "step": 11610 }, { "epoch": 0.0841132996011495, "grad_norm": 0.23041057586669922, "learning_rate": 4.91589393906491e-06, "loss": 1.0716, "step": 11620 }, { "epoch": 0.08418568626173568, "grad_norm": 0.18051432073116302, "learning_rate": 4.915821552404323e-06, "loss": 1.0688, "step": 11630 }, { "epoch": 0.08425807292232187, "grad_norm": 0.18257080018520355, "learning_rate": 4.915749165743737e-06, "loss": 1.0637, "step": 11640 }, { "epoch": 0.08433045958290807, "grad_norm": 0.17362256348133087, "learning_rate": 4.9156767790831515e-06, "loss": 1.0703, "step": 11650 }, { "epoch": 0.08440284624349424, "grad_norm": 0.1855854094028473, "learning_rate": 4.915604392422565e-06, "loss": 1.0711, "step": 11660 }, { "epoch": 0.08447523290408043, "grad_norm": 0.17203544080257416, "learning_rate": 4.915532005761979e-06, "loss": 1.0666, "step": 11670 }, { "epoch": 0.08454761956466662, "grad_norm": 0.18600568175315857, "learning_rate": 4.915459619101392e-06, "loss": 1.0635, "step": 11680 }, { "epoch": 0.08462000622525281, "grad_norm": 0.1972518265247345, "learning_rate": 4.915387232440807e-06, "loss": 1.0761, "step": 11690 }, { "epoch": 0.08469239288583899, "grad_norm": 0.1868593841791153, "learning_rate": 4.91531484578022e-06, "loss": 1.0943, "step": 11700 }, { "epoch": 0.08476477954642518, "grad_norm": 0.18218737840652466, "learning_rate": 4.915242459119634e-06, "loss": 1.0787, "step": 11710 }, { "epoch": 0.08483716620701137, "grad_norm": 0.19324436783790588, "learning_rate": 4.915170072459048e-06, "loss": 1.0549, "step": 11720 }, { "epoch": 0.08490955286759756, "grad_norm": 0.16747941076755524, "learning_rate": 4.915097685798462e-06, "loss": 1.0751, "step": 11730 }, { "epoch": 0.08498193952818375, "grad_norm": 0.1799047440290451, "learning_rate": 4.915025299137876e-06, "loss": 1.072, "step": 11740 }, { "epoch": 0.08505432618876993, "grad_norm": 0.17832840979099274, "learning_rate": 4.914952912477289e-06, "loss": 1.0627, "step": 11750 }, { "epoch": 0.08512671284935612, "grad_norm": 0.21101170778274536, "learning_rate": 4.914880525816703e-06, "loss": 1.0887, "step": 11760 }, { "epoch": 0.08519909950994231, "grad_norm": 0.20275020599365234, "learning_rate": 4.9148081391561166e-06, "loss": 1.0752, "step": 11770 }, { "epoch": 0.0852714861705285, "grad_norm": 0.173346146941185, "learning_rate": 4.91473575249553e-06, "loss": 1.0769, "step": 11780 }, { "epoch": 0.08534387283111468, "grad_norm": 0.19621768593788147, "learning_rate": 4.914663365834944e-06, "loss": 1.0652, "step": 11790 }, { "epoch": 0.08541625949170087, "grad_norm": 0.19705529510974884, "learning_rate": 4.914590979174358e-06, "loss": 1.0729, "step": 11800 }, { "epoch": 0.08548864615228706, "grad_norm": 0.1750185489654541, "learning_rate": 4.914518592513772e-06, "loss": 1.0859, "step": 11810 }, { "epoch": 0.08556103281287325, "grad_norm": 0.17165139317512512, "learning_rate": 4.9144462058531855e-06, "loss": 1.0737, "step": 11820 }, { "epoch": 0.08563341947345943, "grad_norm": 0.18447789549827576, "learning_rate": 4.914373819192599e-06, "loss": 1.0659, "step": 11830 }, { "epoch": 0.08570580613404562, "grad_norm": 0.18128694593906403, "learning_rate": 4.9143014325320136e-06, "loss": 1.0664, "step": 11840 }, { "epoch": 0.08577819279463181, "grad_norm": 0.1879102736711502, "learning_rate": 4.914229045871427e-06, "loss": 1.0826, "step": 11850 }, { "epoch": 0.085850579455218, "grad_norm": 0.1664225310087204, "learning_rate": 4.914156659210841e-06, "loss": 1.0587, "step": 11860 }, { "epoch": 0.08592296611580418, "grad_norm": 0.18422931432724, "learning_rate": 4.914084272550254e-06, "loss": 1.0793, "step": 11870 }, { "epoch": 0.08599535277639037, "grad_norm": 0.17721515893936157, "learning_rate": 4.914011885889669e-06, "loss": 1.0602, "step": 11880 }, { "epoch": 0.08606773943697656, "grad_norm": 0.17775796353816986, "learning_rate": 4.9139394992290825e-06, "loss": 1.0734, "step": 11890 }, { "epoch": 0.08614012609756275, "grad_norm": 0.19729198515415192, "learning_rate": 4.913867112568496e-06, "loss": 1.0694, "step": 11900 }, { "epoch": 0.08621251275814892, "grad_norm": 0.17630425095558167, "learning_rate": 4.91379472590791e-06, "loss": 1.0646, "step": 11910 }, { "epoch": 0.08628489941873511, "grad_norm": 0.1867901086807251, "learning_rate": 4.913722339247324e-06, "loss": 1.0823, "step": 11920 }, { "epoch": 0.0863572860793213, "grad_norm": 0.17957784235477448, "learning_rate": 4.913649952586738e-06, "loss": 1.0621, "step": 11930 }, { "epoch": 0.0864296727399075, "grad_norm": 0.18513678014278412, "learning_rate": 4.9135775659261514e-06, "loss": 1.0737, "step": 11940 }, { "epoch": 0.08650205940049367, "grad_norm": 0.18073897063732147, "learning_rate": 4.913505179265565e-06, "loss": 1.0567, "step": 11950 }, { "epoch": 0.08657444606107986, "grad_norm": 0.19034330546855927, "learning_rate": 4.913432792604979e-06, "loss": 1.0701, "step": 11960 }, { "epoch": 0.08664683272166605, "grad_norm": 0.19168923795223236, "learning_rate": 4.913360405944393e-06, "loss": 1.0592, "step": 11970 }, { "epoch": 0.08671921938225224, "grad_norm": 0.1833886355161667, "learning_rate": 4.913288019283807e-06, "loss": 1.0812, "step": 11980 }, { "epoch": 0.08679160604283842, "grad_norm": 0.1789073795080185, "learning_rate": 4.91321563262322e-06, "loss": 1.0813, "step": 11990 }, { "epoch": 0.08686399270342461, "grad_norm": 0.19956181943416595, "learning_rate": 4.913143245962634e-06, "loss": 1.0602, "step": 12000 }, { "epoch": 0.0869363793640108, "grad_norm": 0.16924233734607697, "learning_rate": 4.9130708593020484e-06, "loss": 1.0682, "step": 12010 }, { "epoch": 0.08700876602459699, "grad_norm": 0.20101603865623474, "learning_rate": 4.912998472641462e-06, "loss": 1.0896, "step": 12020 }, { "epoch": 0.08708115268518317, "grad_norm": 0.17488181591033936, "learning_rate": 4.912926085980876e-06, "loss": 1.0551, "step": 12030 }, { "epoch": 0.08715353934576936, "grad_norm": 0.18475006520748138, "learning_rate": 4.912853699320289e-06, "loss": 1.0825, "step": 12040 }, { "epoch": 0.08722592600635555, "grad_norm": 0.2061099112033844, "learning_rate": 4.912781312659704e-06, "loss": 1.0836, "step": 12050 }, { "epoch": 0.08729831266694174, "grad_norm": 0.7837763428688049, "learning_rate": 4.912708925999117e-06, "loss": 1.0716, "step": 12060 }, { "epoch": 0.08737069932752792, "grad_norm": 0.2470492273569107, "learning_rate": 4.912636539338531e-06, "loss": 1.0686, "step": 12070 }, { "epoch": 0.08744308598811411, "grad_norm": 0.18343280255794525, "learning_rate": 4.912564152677945e-06, "loss": 1.0704, "step": 12080 }, { "epoch": 0.0875154726487003, "grad_norm": 0.18261706829071045, "learning_rate": 4.912491766017359e-06, "loss": 1.0632, "step": 12090 }, { "epoch": 0.08758785930928649, "grad_norm": 0.25385862588882446, "learning_rate": 4.912419379356773e-06, "loss": 1.0686, "step": 12100 }, { "epoch": 0.08766024596987267, "grad_norm": 0.1851423978805542, "learning_rate": 4.912346992696186e-06, "loss": 1.0647, "step": 12110 }, { "epoch": 0.08773263263045886, "grad_norm": 0.16765649616718292, "learning_rate": 4.9122746060356e-06, "loss": 1.0625, "step": 12120 }, { "epoch": 0.08780501929104505, "grad_norm": 0.17632648348808289, "learning_rate": 4.912202219375014e-06, "loss": 1.0767, "step": 12130 }, { "epoch": 0.08787740595163124, "grad_norm": 0.20516112446784973, "learning_rate": 4.912129832714428e-06, "loss": 1.0764, "step": 12140 }, { "epoch": 0.08794979261221741, "grad_norm": 0.19637328386306763, "learning_rate": 4.912057446053842e-06, "loss": 1.0819, "step": 12150 }, { "epoch": 0.0880221792728036, "grad_norm": 0.17330960929393768, "learning_rate": 4.911985059393255e-06, "loss": 1.0731, "step": 12160 }, { "epoch": 0.0880945659333898, "grad_norm": 0.17274819314479828, "learning_rate": 4.91191267273267e-06, "loss": 1.0811, "step": 12170 }, { "epoch": 0.08816695259397599, "grad_norm": 0.18579480051994324, "learning_rate": 4.911840286072083e-06, "loss": 1.0769, "step": 12180 }, { "epoch": 0.08823933925456216, "grad_norm": 0.18373064696788788, "learning_rate": 4.911767899411497e-06, "loss": 1.0749, "step": 12190 }, { "epoch": 0.08831172591514835, "grad_norm": 0.1849633753299713, "learning_rate": 4.9116955127509105e-06, "loss": 1.0789, "step": 12200 }, { "epoch": 0.08838411257573454, "grad_norm": 0.1739869862794876, "learning_rate": 4.911623126090325e-06, "loss": 1.0805, "step": 12210 }, { "epoch": 0.08845649923632073, "grad_norm": 0.18374896049499512, "learning_rate": 4.911550739429739e-06, "loss": 1.0491, "step": 12220 }, { "epoch": 0.08852888589690691, "grad_norm": 0.20483864843845367, "learning_rate": 4.911478352769152e-06, "loss": 1.0698, "step": 12230 }, { "epoch": 0.0886012725574931, "grad_norm": 0.17698989808559418, "learning_rate": 4.911405966108566e-06, "loss": 1.0757, "step": 12240 }, { "epoch": 0.08867365921807929, "grad_norm": 0.16468937695026398, "learning_rate": 4.91133357944798e-06, "loss": 1.0577, "step": 12250 }, { "epoch": 0.08874604587866548, "grad_norm": 0.18862901628017426, "learning_rate": 4.911261192787394e-06, "loss": 1.0779, "step": 12260 }, { "epoch": 0.08881843253925166, "grad_norm": 0.17492325603961945, "learning_rate": 4.9111888061268075e-06, "loss": 1.0662, "step": 12270 }, { "epoch": 0.08889081919983785, "grad_norm": 0.16824495792388916, "learning_rate": 4.911116419466221e-06, "loss": 1.0617, "step": 12280 }, { "epoch": 0.08896320586042404, "grad_norm": 0.17155864834785461, "learning_rate": 4.911044032805636e-06, "loss": 1.058, "step": 12290 }, { "epoch": 0.08903559252101023, "grad_norm": 0.17374593019485474, "learning_rate": 4.910971646145049e-06, "loss": 1.0538, "step": 12300 }, { "epoch": 0.08910797918159642, "grad_norm": 0.170820415019989, "learning_rate": 4.910899259484462e-06, "loss": 1.0719, "step": 12310 }, { "epoch": 0.0891803658421826, "grad_norm": 0.1844668984413147, "learning_rate": 4.9108268728238765e-06, "loss": 1.058, "step": 12320 }, { "epoch": 0.08925275250276879, "grad_norm": 0.17644762992858887, "learning_rate": 4.91075448616329e-06, "loss": 1.0533, "step": 12330 }, { "epoch": 0.08932513916335498, "grad_norm": 0.19445423781871796, "learning_rate": 4.910682099502704e-06, "loss": 1.0617, "step": 12340 }, { "epoch": 0.08939752582394117, "grad_norm": 0.20080770552158356, "learning_rate": 4.910609712842117e-06, "loss": 1.0592, "step": 12350 }, { "epoch": 0.08946991248452735, "grad_norm": 0.17183251678943634, "learning_rate": 4.910537326181532e-06, "loss": 1.0629, "step": 12360 }, { "epoch": 0.08954229914511354, "grad_norm": 0.17015236616134644, "learning_rate": 4.910464939520945e-06, "loss": 1.0571, "step": 12370 }, { "epoch": 0.08961468580569973, "grad_norm": 0.19391165673732758, "learning_rate": 4.910392552860359e-06, "loss": 1.0634, "step": 12380 }, { "epoch": 0.08968707246628592, "grad_norm": 0.21788957715034485, "learning_rate": 4.910320166199773e-06, "loss": 1.0633, "step": 12390 }, { "epoch": 0.0897594591268721, "grad_norm": 0.1834946870803833, "learning_rate": 4.910247779539187e-06, "loss": 1.0594, "step": 12400 }, { "epoch": 0.08983184578745829, "grad_norm": 0.1719583421945572, "learning_rate": 4.910175392878601e-06, "loss": 1.0485, "step": 12410 }, { "epoch": 0.08990423244804448, "grad_norm": 0.21385973691940308, "learning_rate": 4.910103006218014e-06, "loss": 1.068, "step": 12420 }, { "epoch": 0.08997661910863067, "grad_norm": 0.1749151051044464, "learning_rate": 4.910030619557428e-06, "loss": 1.0658, "step": 12430 }, { "epoch": 0.09004900576921684, "grad_norm": 0.17845019698143005, "learning_rate": 4.909958232896842e-06, "loss": 1.0693, "step": 12440 }, { "epoch": 0.09012139242980303, "grad_norm": 0.16557128727436066, "learning_rate": 4.909885846236256e-06, "loss": 1.05, "step": 12450 }, { "epoch": 0.09019377909038923, "grad_norm": 0.1660662740468979, "learning_rate": 4.90981345957567e-06, "loss": 1.0756, "step": 12460 }, { "epoch": 0.09026616575097542, "grad_norm": 0.17898871004581451, "learning_rate": 4.909741072915083e-06, "loss": 1.0692, "step": 12470 }, { "epoch": 0.09033855241156159, "grad_norm": 0.18854671716690063, "learning_rate": 4.909668686254498e-06, "loss": 1.0648, "step": 12480 }, { "epoch": 0.09041093907214778, "grad_norm": 0.18120893836021423, "learning_rate": 4.909596299593911e-06, "loss": 1.0646, "step": 12490 }, { "epoch": 0.09048332573273397, "grad_norm": 0.1960660070180893, "learning_rate": 4.909523912933325e-06, "loss": 1.067, "step": 12500 }, { "epoch": 0.09055571239332016, "grad_norm": 0.2270880788564682, "learning_rate": 4.9094515262727386e-06, "loss": 1.057, "step": 12510 }, { "epoch": 0.09062809905390634, "grad_norm": 0.19658519327640533, "learning_rate": 4.909379139612153e-06, "loss": 1.0789, "step": 12520 }, { "epoch": 0.09070048571449253, "grad_norm": 0.1924048811197281, "learning_rate": 4.909306752951567e-06, "loss": 1.0608, "step": 12530 }, { "epoch": 0.09077287237507872, "grad_norm": 0.1948341578245163, "learning_rate": 4.90923436629098e-06, "loss": 1.0594, "step": 12540 }, { "epoch": 0.09084525903566491, "grad_norm": 0.21154290437698364, "learning_rate": 4.909161979630394e-06, "loss": 1.0635, "step": 12550 }, { "epoch": 0.09091764569625109, "grad_norm": 0.18303339183330536, "learning_rate": 4.909089592969808e-06, "loss": 1.0802, "step": 12560 }, { "epoch": 0.09099003235683728, "grad_norm": 0.17858447134494781, "learning_rate": 4.909017206309222e-06, "loss": 1.065, "step": 12570 }, { "epoch": 0.09106241901742347, "grad_norm": 0.186380535364151, "learning_rate": 4.9089448196486356e-06, "loss": 1.0611, "step": 12580 }, { "epoch": 0.09113480567800966, "grad_norm": 0.17138642072677612, "learning_rate": 4.908872432988049e-06, "loss": 1.0598, "step": 12590 }, { "epoch": 0.09120719233859584, "grad_norm": 0.21660137176513672, "learning_rate": 4.908800046327463e-06, "loss": 1.0604, "step": 12600 }, { "epoch": 0.09127957899918203, "grad_norm": 0.17236590385437012, "learning_rate": 4.908727659666877e-06, "loss": 1.0554, "step": 12610 }, { "epoch": 0.09135196565976822, "grad_norm": 0.16949652135372162, "learning_rate": 4.908655273006291e-06, "loss": 1.0523, "step": 12620 }, { "epoch": 0.09142435232035441, "grad_norm": 0.18573276698589325, "learning_rate": 4.9085828863457045e-06, "loss": 1.0574, "step": 12630 }, { "epoch": 0.09149673898094059, "grad_norm": 0.17692014575004578, "learning_rate": 4.908510499685118e-06, "loss": 1.0683, "step": 12640 }, { "epoch": 0.09156912564152678, "grad_norm": 0.17320817708969116, "learning_rate": 4.908438113024533e-06, "loss": 1.0725, "step": 12650 }, { "epoch": 0.09164151230211297, "grad_norm": 0.18194791674613953, "learning_rate": 4.908365726363946e-06, "loss": 1.0769, "step": 12660 }, { "epoch": 0.09171389896269916, "grad_norm": 0.17334748804569244, "learning_rate": 4.90829333970336e-06, "loss": 1.0648, "step": 12670 }, { "epoch": 0.09178628562328534, "grad_norm": 0.18237444758415222, "learning_rate": 4.9082209530427734e-06, "loss": 1.0701, "step": 12680 }, { "epoch": 0.09185867228387153, "grad_norm": 0.18935726583003998, "learning_rate": 4.908148566382188e-06, "loss": 1.0576, "step": 12690 }, { "epoch": 0.09193105894445772, "grad_norm": 0.17094674706459045, "learning_rate": 4.9080761797216015e-06, "loss": 1.0695, "step": 12700 }, { "epoch": 0.09200344560504391, "grad_norm": 0.1917349398136139, "learning_rate": 4.908003793061015e-06, "loss": 1.061, "step": 12710 }, { "epoch": 0.09207583226563008, "grad_norm": 0.18850326538085938, "learning_rate": 4.907931406400429e-06, "loss": 1.0772, "step": 12720 }, { "epoch": 0.09214821892621627, "grad_norm": 0.20601466298103333, "learning_rate": 4.907859019739843e-06, "loss": 1.0626, "step": 12730 }, { "epoch": 0.09222060558680246, "grad_norm": 0.1699492633342743, "learning_rate": 4.907786633079257e-06, "loss": 1.054, "step": 12740 }, { "epoch": 0.09229299224738866, "grad_norm": 0.16967886686325073, "learning_rate": 4.9077142464186704e-06, "loss": 1.0633, "step": 12750 }, { "epoch": 0.09236537890797483, "grad_norm": 0.16590727865695953, "learning_rate": 4.907641859758084e-06, "loss": 1.0436, "step": 12760 }, { "epoch": 0.09243776556856102, "grad_norm": 0.18494842946529388, "learning_rate": 4.9075694730974985e-06, "loss": 1.0639, "step": 12770 }, { "epoch": 0.09251015222914721, "grad_norm": 0.19467203319072723, "learning_rate": 4.907497086436912e-06, "loss": 1.059, "step": 12780 }, { "epoch": 0.0925825388897334, "grad_norm": 0.1718284636735916, "learning_rate": 4.907424699776326e-06, "loss": 1.0615, "step": 12790 }, { "epoch": 0.09265492555031958, "grad_norm": 0.16538646817207336, "learning_rate": 4.907352313115739e-06, "loss": 1.0653, "step": 12800 }, { "epoch": 0.09272731221090577, "grad_norm": 0.17664389312267303, "learning_rate": 4.907279926455154e-06, "loss": 1.0595, "step": 12810 }, { "epoch": 0.09279969887149196, "grad_norm": 0.1798931062221527, "learning_rate": 4.9072075397945674e-06, "loss": 1.0578, "step": 12820 }, { "epoch": 0.09287208553207815, "grad_norm": 0.20928549766540527, "learning_rate": 4.907135153133981e-06, "loss": 1.0699, "step": 12830 }, { "epoch": 0.09294447219266433, "grad_norm": 0.1914556324481964, "learning_rate": 4.907062766473395e-06, "loss": 1.078, "step": 12840 }, { "epoch": 0.09301685885325052, "grad_norm": 0.17535455524921417, "learning_rate": 4.906990379812808e-06, "loss": 1.062, "step": 12850 }, { "epoch": 0.09308924551383671, "grad_norm": 0.209900364279747, "learning_rate": 4.906917993152222e-06, "loss": 1.0651, "step": 12860 }, { "epoch": 0.0931616321744229, "grad_norm": 0.19271911680698395, "learning_rate": 4.9068456064916355e-06, "loss": 1.0695, "step": 12870 }, { "epoch": 0.09323401883500908, "grad_norm": 0.17206203937530518, "learning_rate": 4.90677321983105e-06, "loss": 1.054, "step": 12880 }, { "epoch": 0.09330640549559527, "grad_norm": 0.17323225736618042, "learning_rate": 4.906700833170464e-06, "loss": 1.0545, "step": 12890 }, { "epoch": 0.09337879215618146, "grad_norm": 0.18585805594921112, "learning_rate": 4.906628446509877e-06, "loss": 1.0638, "step": 12900 }, { "epoch": 0.09345117881676765, "grad_norm": 0.18131840229034424, "learning_rate": 4.906556059849291e-06, "loss": 1.0575, "step": 12910 }, { "epoch": 0.09352356547735384, "grad_norm": 0.18972773849964142, "learning_rate": 4.906483673188705e-06, "loss": 1.0561, "step": 12920 }, { "epoch": 0.09359595213794002, "grad_norm": 0.17242597043514252, "learning_rate": 4.906411286528119e-06, "loss": 1.0754, "step": 12930 }, { "epoch": 0.09366833879852621, "grad_norm": 0.17874747514724731, "learning_rate": 4.9063388998675325e-06, "loss": 1.0769, "step": 12940 }, { "epoch": 0.0937407254591124, "grad_norm": 0.1628153920173645, "learning_rate": 4.906266513206946e-06, "loss": 1.0701, "step": 12950 }, { "epoch": 0.09381311211969859, "grad_norm": 0.1775561273097992, "learning_rate": 4.906194126546361e-06, "loss": 1.0496, "step": 12960 }, { "epoch": 0.09388549878028477, "grad_norm": 0.17346498370170593, "learning_rate": 4.906121739885774e-06, "loss": 1.0592, "step": 12970 }, { "epoch": 0.09395788544087096, "grad_norm": 0.1621558666229248, "learning_rate": 4.906049353225188e-06, "loss": 1.0639, "step": 12980 }, { "epoch": 0.09403027210145715, "grad_norm": 0.16274607181549072, "learning_rate": 4.9059769665646015e-06, "loss": 1.0593, "step": 12990 }, { "epoch": 0.09410265876204334, "grad_norm": 0.24660325050354004, "learning_rate": 4.905904579904016e-06, "loss": 1.0687, "step": 13000 }, { "epoch": 0.09417504542262951, "grad_norm": 0.17829085886478424, "learning_rate": 4.9058321932434295e-06, "loss": 1.061, "step": 13010 }, { "epoch": 0.0942474320832157, "grad_norm": 0.19128848612308502, "learning_rate": 4.905759806582843e-06, "loss": 1.0612, "step": 13020 }, { "epoch": 0.0943198187438019, "grad_norm": 0.1639917492866516, "learning_rate": 4.905687419922257e-06, "loss": 1.0514, "step": 13030 }, { "epoch": 0.09439220540438809, "grad_norm": 0.18937060236930847, "learning_rate": 4.905615033261671e-06, "loss": 1.0471, "step": 13040 }, { "epoch": 0.09446459206497426, "grad_norm": 0.20053894817829132, "learning_rate": 4.905542646601085e-06, "loss": 1.051, "step": 13050 }, { "epoch": 0.09453697872556045, "grad_norm": 0.17322196066379547, "learning_rate": 4.9054702599404985e-06, "loss": 1.0626, "step": 13060 }, { "epoch": 0.09460936538614664, "grad_norm": 0.18326787650585175, "learning_rate": 4.905397873279912e-06, "loss": 1.0556, "step": 13070 }, { "epoch": 0.09468175204673283, "grad_norm": 0.18732202053070068, "learning_rate": 4.9053254866193265e-06, "loss": 1.0697, "step": 13080 }, { "epoch": 0.09475413870731901, "grad_norm": 0.1722288727760315, "learning_rate": 4.90525309995874e-06, "loss": 1.0558, "step": 13090 }, { "epoch": 0.0948265253679052, "grad_norm": 0.1705879271030426, "learning_rate": 4.905180713298154e-06, "loss": 1.0436, "step": 13100 }, { "epoch": 0.09489891202849139, "grad_norm": 0.19368794560432434, "learning_rate": 4.905108326637567e-06, "loss": 1.0541, "step": 13110 }, { "epoch": 0.09497129868907758, "grad_norm": 0.16703177988529205, "learning_rate": 4.905035939976982e-06, "loss": 1.061, "step": 13120 }, { "epoch": 0.09504368534966376, "grad_norm": 0.1757340431213379, "learning_rate": 4.9049635533163955e-06, "loss": 1.054, "step": 13130 }, { "epoch": 0.09511607201024995, "grad_norm": 0.1705889105796814, "learning_rate": 4.904891166655809e-06, "loss": 1.0499, "step": 13140 }, { "epoch": 0.09518845867083614, "grad_norm": 0.18532103300094604, "learning_rate": 4.904818779995223e-06, "loss": 1.073, "step": 13150 }, { "epoch": 0.09526084533142233, "grad_norm": 0.17054064571857452, "learning_rate": 4.904746393334637e-06, "loss": 1.0548, "step": 13160 }, { "epoch": 0.09533323199200851, "grad_norm": 0.18268738687038422, "learning_rate": 4.904674006674051e-06, "loss": 1.0489, "step": 13170 }, { "epoch": 0.0954056186525947, "grad_norm": 0.1656617373228073, "learning_rate": 4.904601620013464e-06, "loss": 1.048, "step": 13180 }, { "epoch": 0.09547800531318089, "grad_norm": 0.16891372203826904, "learning_rate": 4.904529233352878e-06, "loss": 1.0719, "step": 13190 }, { "epoch": 0.09555039197376708, "grad_norm": 0.19069869816303253, "learning_rate": 4.904456846692292e-06, "loss": 1.0673, "step": 13200 }, { "epoch": 0.09562277863435326, "grad_norm": 0.17400185763835907, "learning_rate": 4.904384460031706e-06, "loss": 1.0586, "step": 13210 }, { "epoch": 0.09569516529493945, "grad_norm": 0.17865540087223053, "learning_rate": 4.90431207337112e-06, "loss": 1.0509, "step": 13220 }, { "epoch": 0.09576755195552564, "grad_norm": 0.22971458733081818, "learning_rate": 4.904239686710533e-06, "loss": 1.0619, "step": 13230 }, { "epoch": 0.09583993861611183, "grad_norm": 0.16705693304538727, "learning_rate": 4.904167300049947e-06, "loss": 1.0616, "step": 13240 }, { "epoch": 0.095912325276698, "grad_norm": 0.17722009122371674, "learning_rate": 4.904094913389361e-06, "loss": 1.046, "step": 13250 }, { "epoch": 0.0959847119372842, "grad_norm": 0.18104061484336853, "learning_rate": 4.904022526728775e-06, "loss": 1.0516, "step": 13260 }, { "epoch": 0.09605709859787039, "grad_norm": 0.1646488904953003, "learning_rate": 4.903950140068189e-06, "loss": 1.0612, "step": 13270 }, { "epoch": 0.09612948525845658, "grad_norm": 0.17843876779079437, "learning_rate": 4.903877753407602e-06, "loss": 1.0584, "step": 13280 }, { "epoch": 0.09620187191904275, "grad_norm": 0.18090327084064484, "learning_rate": 4.903805366747017e-06, "loss": 1.063, "step": 13290 }, { "epoch": 0.09627425857962894, "grad_norm": 0.17758344113826752, "learning_rate": 4.90373298008643e-06, "loss": 1.0633, "step": 13300 }, { "epoch": 0.09634664524021513, "grad_norm": 0.17629143595695496, "learning_rate": 4.903660593425844e-06, "loss": 1.0546, "step": 13310 }, { "epoch": 0.09641903190080132, "grad_norm": 0.17930828034877777, "learning_rate": 4.9035882067652576e-06, "loss": 1.0679, "step": 13320 }, { "epoch": 0.0964914185613875, "grad_norm": 0.22573702037334442, "learning_rate": 4.903515820104672e-06, "loss": 1.062, "step": 13330 }, { "epoch": 0.09656380522197369, "grad_norm": 0.19880887866020203, "learning_rate": 4.903443433444086e-06, "loss": 1.0614, "step": 13340 }, { "epoch": 0.09663619188255988, "grad_norm": 0.20379464328289032, "learning_rate": 4.903371046783499e-06, "loss": 1.0585, "step": 13350 }, { "epoch": 0.09670857854314607, "grad_norm": 0.1923993080854416, "learning_rate": 4.903298660122913e-06, "loss": 1.0647, "step": 13360 }, { "epoch": 0.09678096520373225, "grad_norm": 0.18714511394500732, "learning_rate": 4.9032262734623265e-06, "loss": 1.0439, "step": 13370 }, { "epoch": 0.09685335186431844, "grad_norm": 0.20776358246803284, "learning_rate": 4.90315388680174e-06, "loss": 1.0717, "step": 13380 }, { "epoch": 0.09692573852490463, "grad_norm": 0.1841840147972107, "learning_rate": 4.903081500141154e-06, "loss": 1.0716, "step": 13390 }, { "epoch": 0.09699812518549082, "grad_norm": 0.1891954392194748, "learning_rate": 4.903009113480568e-06, "loss": 1.0572, "step": 13400 }, { "epoch": 0.097070511846077, "grad_norm": 0.2061716616153717, "learning_rate": 4.902936726819982e-06, "loss": 1.0587, "step": 13410 }, { "epoch": 0.09714289850666319, "grad_norm": 0.1809329241514206, "learning_rate": 4.9028643401593954e-06, "loss": 1.0772, "step": 13420 }, { "epoch": 0.09721528516724938, "grad_norm": 0.18182168900966644, "learning_rate": 4.902791953498809e-06, "loss": 1.0707, "step": 13430 }, { "epoch": 0.09728767182783557, "grad_norm": 0.16881795227527618, "learning_rate": 4.9027195668382235e-06, "loss": 1.0617, "step": 13440 }, { "epoch": 0.09736005848842175, "grad_norm": 0.18087784945964813, "learning_rate": 4.902647180177637e-06, "loss": 1.0566, "step": 13450 }, { "epoch": 0.09743244514900794, "grad_norm": 0.19071871042251587, "learning_rate": 4.902574793517051e-06, "loss": 1.0479, "step": 13460 }, { "epoch": 0.09750483180959413, "grad_norm": 0.18914766609668732, "learning_rate": 4.902502406856464e-06, "loss": 1.0657, "step": 13470 }, { "epoch": 0.09757721847018032, "grad_norm": 0.16709379851818085, "learning_rate": 4.902430020195879e-06, "loss": 1.0522, "step": 13480 }, { "epoch": 0.09764960513076651, "grad_norm": 0.178353950381279, "learning_rate": 4.9023576335352924e-06, "loss": 1.0385, "step": 13490 }, { "epoch": 0.09772199179135269, "grad_norm": 0.18520021438598633, "learning_rate": 4.902285246874706e-06, "loss": 1.0431, "step": 13500 }, { "epoch": 0.09779437845193888, "grad_norm": 0.1669263392686844, "learning_rate": 4.90221286021412e-06, "loss": 1.0552, "step": 13510 }, { "epoch": 0.09786676511252507, "grad_norm": 0.1864238828420639, "learning_rate": 4.902140473553534e-06, "loss": 1.0536, "step": 13520 }, { "epoch": 0.09793915177311126, "grad_norm": 0.1777682602405548, "learning_rate": 4.902068086892948e-06, "loss": 1.0604, "step": 13530 }, { "epoch": 0.09801153843369743, "grad_norm": 0.1826665848493576, "learning_rate": 4.901995700232361e-06, "loss": 1.0642, "step": 13540 }, { "epoch": 0.09808392509428362, "grad_norm": 0.17385782301425934, "learning_rate": 4.901923313571775e-06, "loss": 1.0643, "step": 13550 }, { "epoch": 0.09815631175486982, "grad_norm": 0.18327641487121582, "learning_rate": 4.9018509269111894e-06, "loss": 1.0438, "step": 13560 }, { "epoch": 0.098228698415456, "grad_norm": 0.18838289380073547, "learning_rate": 4.901778540250603e-06, "loss": 1.0529, "step": 13570 }, { "epoch": 0.09830108507604218, "grad_norm": 0.1839076280593872, "learning_rate": 4.901706153590017e-06, "loss": 1.0553, "step": 13580 }, { "epoch": 0.09837347173662837, "grad_norm": 0.21451406180858612, "learning_rate": 4.90163376692943e-06, "loss": 1.0549, "step": 13590 }, { "epoch": 0.09844585839721456, "grad_norm": 0.1959172636270523, "learning_rate": 4.901561380268845e-06, "loss": 1.0824, "step": 13600 }, { "epoch": 0.09851824505780075, "grad_norm": 0.17677393555641174, "learning_rate": 4.901488993608258e-06, "loss": 1.0723, "step": 13610 }, { "epoch": 0.09859063171838693, "grad_norm": 0.17665037512779236, "learning_rate": 4.901416606947672e-06, "loss": 1.0616, "step": 13620 }, { "epoch": 0.09866301837897312, "grad_norm": 0.20014891028404236, "learning_rate": 4.901344220287086e-06, "loss": 1.0484, "step": 13630 }, { "epoch": 0.09873540503955931, "grad_norm": 0.18088600039482117, "learning_rate": 4.9012718336265e-06, "loss": 1.0507, "step": 13640 }, { "epoch": 0.0988077917001455, "grad_norm": 0.19113531708717346, "learning_rate": 4.901199446965914e-06, "loss": 1.063, "step": 13650 }, { "epoch": 0.09888017836073168, "grad_norm": 0.18885605037212372, "learning_rate": 4.901127060305327e-06, "loss": 1.0525, "step": 13660 }, { "epoch": 0.09895256502131787, "grad_norm": 0.21783652901649475, "learning_rate": 4.901054673644741e-06, "loss": 1.0417, "step": 13670 }, { "epoch": 0.09902495168190406, "grad_norm": 0.17940004169940948, "learning_rate": 4.900982286984155e-06, "loss": 1.0507, "step": 13680 }, { "epoch": 0.09909733834249025, "grad_norm": 0.18767113983631134, "learning_rate": 4.900909900323569e-06, "loss": 1.0595, "step": 13690 }, { "epoch": 0.09916972500307643, "grad_norm": 0.21696849167346954, "learning_rate": 4.900837513662983e-06, "loss": 1.0526, "step": 13700 }, { "epoch": 0.09924211166366262, "grad_norm": 0.18748457729816437, "learning_rate": 4.900765127002396e-06, "loss": 1.0464, "step": 13710 }, { "epoch": 0.09931449832424881, "grad_norm": 0.16807325184345245, "learning_rate": 4.900692740341811e-06, "loss": 1.0509, "step": 13720 }, { "epoch": 0.099386884984835, "grad_norm": 0.19087384641170502, "learning_rate": 4.900620353681224e-06, "loss": 1.0599, "step": 13730 }, { "epoch": 0.09945927164542118, "grad_norm": 0.2038954794406891, "learning_rate": 4.900547967020638e-06, "loss": 1.064, "step": 13740 }, { "epoch": 0.09953165830600737, "grad_norm": 0.18865635991096497, "learning_rate": 4.9004755803600515e-06, "loss": 1.0605, "step": 13750 }, { "epoch": 0.09960404496659356, "grad_norm": 0.1682719886302948, "learning_rate": 4.900403193699466e-06, "loss": 1.053, "step": 13760 }, { "epoch": 0.09967643162717975, "grad_norm": 0.17732466757297516, "learning_rate": 4.90033080703888e-06, "loss": 1.0541, "step": 13770 }, { "epoch": 0.09974881828776593, "grad_norm": 0.18870952725410461, "learning_rate": 4.900258420378293e-06, "loss": 1.0635, "step": 13780 }, { "epoch": 0.09982120494835212, "grad_norm": 0.17982196807861328, "learning_rate": 4.900186033717707e-06, "loss": 1.064, "step": 13790 }, { "epoch": 0.0998935916089383, "grad_norm": 0.17786167562007904, "learning_rate": 4.900113647057121e-06, "loss": 1.0619, "step": 13800 }, { "epoch": 0.0999659782695245, "grad_norm": 0.1713530421257019, "learning_rate": 4.900041260396535e-06, "loss": 1.051, "step": 13810 }, { "epoch": 0.10003836493011067, "grad_norm": 0.17207489907741547, "learning_rate": 4.8999688737359485e-06, "loss": 1.0744, "step": 13820 }, { "epoch": 0.10011075159069686, "grad_norm": 0.18473108112812042, "learning_rate": 4.899896487075362e-06, "loss": 1.0505, "step": 13830 }, { "epoch": 0.10018313825128305, "grad_norm": 0.16173668205738068, "learning_rate": 4.899824100414776e-06, "loss": 1.054, "step": 13840 }, { "epoch": 0.10025552491186925, "grad_norm": 0.18166185915470123, "learning_rate": 4.89975171375419e-06, "loss": 1.0566, "step": 13850 }, { "epoch": 0.10032791157245542, "grad_norm": 0.1847759187221527, "learning_rate": 4.899679327093604e-06, "loss": 1.0589, "step": 13860 }, { "epoch": 0.10040029823304161, "grad_norm": 0.23197191953659058, "learning_rate": 4.8996069404330175e-06, "loss": 1.0468, "step": 13870 }, { "epoch": 0.1004726848936278, "grad_norm": 0.1927204728126526, "learning_rate": 4.899534553772431e-06, "loss": 1.0602, "step": 13880 }, { "epoch": 0.100545071554214, "grad_norm": 0.18343763053417206, "learning_rate": 4.8994621671118456e-06, "loss": 1.0436, "step": 13890 }, { "epoch": 0.10061745821480017, "grad_norm": 0.18282248079776764, "learning_rate": 4.899389780451258e-06, "loss": 1.0505, "step": 13900 }, { "epoch": 0.10068984487538636, "grad_norm": 0.17692866921424866, "learning_rate": 4.899317393790673e-06, "loss": 1.0826, "step": 13910 }, { "epoch": 0.10076223153597255, "grad_norm": 0.17446507513523102, "learning_rate": 4.899245007130086e-06, "loss": 1.06, "step": 13920 }, { "epoch": 0.10083461819655874, "grad_norm": 0.1633169800043106, "learning_rate": 4.8991726204695e-06, "loss": 1.0358, "step": 13930 }, { "epoch": 0.10090700485714492, "grad_norm": 0.17512056231498718, "learning_rate": 4.899100233808914e-06, "loss": 1.0552, "step": 13940 }, { "epoch": 0.10097939151773111, "grad_norm": 0.1872796267271042, "learning_rate": 4.899027847148328e-06, "loss": 1.0467, "step": 13950 }, { "epoch": 0.1010517781783173, "grad_norm": 0.17610350251197815, "learning_rate": 4.898955460487742e-06, "loss": 1.0536, "step": 13960 }, { "epoch": 0.10112416483890349, "grad_norm": 0.16492041945457458, "learning_rate": 4.898883073827155e-06, "loss": 1.0724, "step": 13970 }, { "epoch": 0.10119655149948967, "grad_norm": 0.16583912074565887, "learning_rate": 4.898810687166569e-06, "loss": 1.0532, "step": 13980 }, { "epoch": 0.10126893816007586, "grad_norm": 0.20664115250110626, "learning_rate": 4.8987383005059826e-06, "loss": 1.0643, "step": 13990 }, { "epoch": 0.10134132482066205, "grad_norm": 0.17064620554447174, "learning_rate": 4.898665913845397e-06, "loss": 1.042, "step": 14000 }, { "epoch": 0.10141371148124824, "grad_norm": 0.17799104750156403, "learning_rate": 4.898593527184811e-06, "loss": 1.0575, "step": 14010 }, { "epoch": 0.10148609814183442, "grad_norm": 0.16274908185005188, "learning_rate": 4.898521140524224e-06, "loss": 1.0514, "step": 14020 }, { "epoch": 0.1015584848024206, "grad_norm": 0.1757798045873642, "learning_rate": 4.898448753863638e-06, "loss": 1.0512, "step": 14030 }, { "epoch": 0.1016308714630068, "grad_norm": 0.18405269086360931, "learning_rate": 4.898376367203052e-06, "loss": 1.0545, "step": 14040 }, { "epoch": 0.10170325812359299, "grad_norm": 0.1723915934562683, "learning_rate": 4.898303980542466e-06, "loss": 1.0563, "step": 14050 }, { "epoch": 0.10177564478417918, "grad_norm": 0.18672221899032593, "learning_rate": 4.8982315938818796e-06, "loss": 1.0542, "step": 14060 }, { "epoch": 0.10184803144476536, "grad_norm": 0.20487704873085022, "learning_rate": 4.898159207221293e-06, "loss": 1.0567, "step": 14070 }, { "epoch": 0.10192041810535155, "grad_norm": 0.1787646859884262, "learning_rate": 4.898086820560708e-06, "loss": 1.0683, "step": 14080 }, { "epoch": 0.10199280476593774, "grad_norm": 0.18574008345603943, "learning_rate": 4.898014433900121e-06, "loss": 1.0517, "step": 14090 }, { "epoch": 0.10206519142652393, "grad_norm": 0.17313173413276672, "learning_rate": 4.897942047239535e-06, "loss": 1.0451, "step": 14100 }, { "epoch": 0.1021375780871101, "grad_norm": 0.18771642446517944, "learning_rate": 4.8978696605789485e-06, "loss": 1.0599, "step": 14110 }, { "epoch": 0.1022099647476963, "grad_norm": 0.22109109163284302, "learning_rate": 4.897797273918363e-06, "loss": 1.0622, "step": 14120 }, { "epoch": 0.10228235140828248, "grad_norm": 0.1844063252210617, "learning_rate": 4.8977248872577766e-06, "loss": 1.0602, "step": 14130 }, { "epoch": 0.10235473806886868, "grad_norm": 0.18542051315307617, "learning_rate": 4.89765250059719e-06, "loss": 1.0554, "step": 14140 }, { "epoch": 0.10242712472945485, "grad_norm": 0.17631708085536957, "learning_rate": 4.897580113936604e-06, "loss": 1.053, "step": 14150 }, { "epoch": 0.10249951139004104, "grad_norm": 0.17057740688323975, "learning_rate": 4.897507727276018e-06, "loss": 1.0535, "step": 14160 }, { "epoch": 0.10257189805062723, "grad_norm": 0.17483165860176086, "learning_rate": 4.897435340615432e-06, "loss": 1.0433, "step": 14170 }, { "epoch": 0.10264428471121342, "grad_norm": 0.18016666173934937, "learning_rate": 4.8973629539548455e-06, "loss": 1.031, "step": 14180 }, { "epoch": 0.1027166713717996, "grad_norm": 0.1845754235982895, "learning_rate": 4.897290567294259e-06, "loss": 1.0498, "step": 14190 }, { "epoch": 0.10278905803238579, "grad_norm": 0.1925055980682373, "learning_rate": 4.897218180633674e-06, "loss": 1.0588, "step": 14200 }, { "epoch": 0.10286144469297198, "grad_norm": 0.19144387543201447, "learning_rate": 4.897145793973087e-06, "loss": 1.0514, "step": 14210 }, { "epoch": 0.10293383135355817, "grad_norm": 0.1973779946565628, "learning_rate": 4.897073407312501e-06, "loss": 1.0468, "step": 14220 }, { "epoch": 0.10300621801414435, "grad_norm": 0.16684280335903168, "learning_rate": 4.8970010206519144e-06, "loss": 1.0578, "step": 14230 }, { "epoch": 0.10307860467473054, "grad_norm": 0.22712835669517517, "learning_rate": 4.896928633991329e-06, "loss": 1.055, "step": 14240 }, { "epoch": 0.10315099133531673, "grad_norm": 0.189208522439003, "learning_rate": 4.8968562473307425e-06, "loss": 1.0338, "step": 14250 }, { "epoch": 0.10322337799590292, "grad_norm": 0.18186455965042114, "learning_rate": 4.896783860670156e-06, "loss": 1.0497, "step": 14260 }, { "epoch": 0.1032957646564891, "grad_norm": 0.1724977344274521, "learning_rate": 4.89671147400957e-06, "loss": 1.0475, "step": 14270 }, { "epoch": 0.10336815131707529, "grad_norm": 0.1740335077047348, "learning_rate": 4.896639087348984e-06, "loss": 1.0406, "step": 14280 }, { "epoch": 0.10344053797766148, "grad_norm": 0.21872451901435852, "learning_rate": 4.896566700688398e-06, "loss": 1.0514, "step": 14290 }, { "epoch": 0.10351292463824767, "grad_norm": 0.18021239340305328, "learning_rate": 4.8964943140278114e-06, "loss": 1.0619, "step": 14300 }, { "epoch": 0.10358531129883385, "grad_norm": 0.17084094882011414, "learning_rate": 4.896421927367225e-06, "loss": 1.0567, "step": 14310 }, { "epoch": 0.10365769795942004, "grad_norm": 0.18873821198940277, "learning_rate": 4.8963495407066395e-06, "loss": 1.0565, "step": 14320 }, { "epoch": 0.10373008462000623, "grad_norm": 0.16363616287708282, "learning_rate": 4.896277154046053e-06, "loss": 1.0444, "step": 14330 }, { "epoch": 0.10380247128059242, "grad_norm": 0.20980127155780792, "learning_rate": 4.896204767385467e-06, "loss": 1.0515, "step": 14340 }, { "epoch": 0.1038748579411786, "grad_norm": 0.17421722412109375, "learning_rate": 4.89613238072488e-06, "loss": 1.0513, "step": 14350 }, { "epoch": 0.10394724460176478, "grad_norm": 0.20598512887954712, "learning_rate": 4.896059994064295e-06, "loss": 1.0549, "step": 14360 }, { "epoch": 0.10401963126235098, "grad_norm": 0.1713586449623108, "learning_rate": 4.8959876074037084e-06, "loss": 1.0456, "step": 14370 }, { "epoch": 0.10409201792293717, "grad_norm": 0.1770513504743576, "learning_rate": 4.895915220743122e-06, "loss": 1.0492, "step": 14380 }, { "epoch": 0.10416440458352334, "grad_norm": 0.18118791282176971, "learning_rate": 4.895842834082536e-06, "loss": 1.0547, "step": 14390 }, { "epoch": 0.10423679124410953, "grad_norm": 0.18534211814403534, "learning_rate": 4.89577044742195e-06, "loss": 1.057, "step": 14400 }, { "epoch": 0.10430917790469572, "grad_norm": 0.20665541291236877, "learning_rate": 4.895698060761364e-06, "loss": 1.0455, "step": 14410 }, { "epoch": 0.10438156456528191, "grad_norm": 0.17217250168323517, "learning_rate": 4.895625674100777e-06, "loss": 1.0524, "step": 14420 }, { "epoch": 0.10445395122586809, "grad_norm": 0.1958875209093094, "learning_rate": 4.895553287440191e-06, "loss": 1.0457, "step": 14430 }, { "epoch": 0.10452633788645428, "grad_norm": 0.18812295794487, "learning_rate": 4.895480900779605e-06, "loss": 1.0523, "step": 14440 }, { "epoch": 0.10459872454704047, "grad_norm": 0.16309994459152222, "learning_rate": 4.895408514119018e-06, "loss": 1.0446, "step": 14450 }, { "epoch": 0.10467111120762666, "grad_norm": 0.1937844455242157, "learning_rate": 4.895336127458432e-06, "loss": 1.0534, "step": 14460 }, { "epoch": 0.10474349786821284, "grad_norm": 0.17462307214736938, "learning_rate": 4.895263740797846e-06, "loss": 1.0362, "step": 14470 }, { "epoch": 0.10481588452879903, "grad_norm": 0.17586156725883484, "learning_rate": 4.89519135413726e-06, "loss": 1.034, "step": 14480 }, { "epoch": 0.10488827118938522, "grad_norm": 0.1698175072669983, "learning_rate": 4.8951189674766735e-06, "loss": 1.0439, "step": 14490 }, { "epoch": 0.10496065784997141, "grad_norm": 0.18947486579418182, "learning_rate": 4.895046580816087e-06, "loss": 1.0507, "step": 14500 }, { "epoch": 0.10503304451055759, "grad_norm": 0.17008043825626373, "learning_rate": 4.894974194155502e-06, "loss": 1.0368, "step": 14510 }, { "epoch": 0.10510543117114378, "grad_norm": 0.18763844668865204, "learning_rate": 4.894901807494915e-06, "loss": 1.0634, "step": 14520 }, { "epoch": 0.10517781783172997, "grad_norm": 0.18135419487953186, "learning_rate": 4.894829420834329e-06, "loss": 1.0502, "step": 14530 }, { "epoch": 0.10525020449231616, "grad_norm": 0.18853351473808289, "learning_rate": 4.8947570341737425e-06, "loss": 1.0538, "step": 14540 }, { "epoch": 0.10532259115290234, "grad_norm": 0.17148560285568237, "learning_rate": 4.894684647513157e-06, "loss": 1.0524, "step": 14550 }, { "epoch": 0.10539497781348853, "grad_norm": 0.18300309777259827, "learning_rate": 4.8946122608525705e-06, "loss": 1.0528, "step": 14560 }, { "epoch": 0.10546736447407472, "grad_norm": 0.17703156173229218, "learning_rate": 4.894539874191984e-06, "loss": 1.057, "step": 14570 }, { "epoch": 0.10553975113466091, "grad_norm": 0.18114346265792847, "learning_rate": 4.894467487531398e-06, "loss": 1.0477, "step": 14580 }, { "epoch": 0.10561213779524709, "grad_norm": 0.18363863229751587, "learning_rate": 4.894395100870812e-06, "loss": 1.0485, "step": 14590 }, { "epoch": 0.10568452445583328, "grad_norm": 0.17243093252182007, "learning_rate": 4.894322714210226e-06, "loss": 1.0512, "step": 14600 }, { "epoch": 0.10575691111641947, "grad_norm": 0.16274864971637726, "learning_rate": 4.8942503275496395e-06, "loss": 1.0321, "step": 14610 }, { "epoch": 0.10582929777700566, "grad_norm": 0.1914278268814087, "learning_rate": 4.894177940889053e-06, "loss": 1.0623, "step": 14620 }, { "epoch": 0.10590168443759183, "grad_norm": 0.187461256980896, "learning_rate": 4.894105554228467e-06, "loss": 1.0426, "step": 14630 }, { "epoch": 0.10597407109817802, "grad_norm": 0.17549605667591095, "learning_rate": 4.894033167567881e-06, "loss": 1.0341, "step": 14640 }, { "epoch": 0.10604645775876421, "grad_norm": 0.17880699038505554, "learning_rate": 4.893960780907295e-06, "loss": 1.0549, "step": 14650 }, { "epoch": 0.1061188444193504, "grad_norm": 0.17565539479255676, "learning_rate": 4.893888394246708e-06, "loss": 1.048, "step": 14660 }, { "epoch": 0.1061912310799366, "grad_norm": 0.17139199376106262, "learning_rate": 4.893816007586122e-06, "loss": 1.0557, "step": 14670 }, { "epoch": 0.10626361774052277, "grad_norm": 0.18786922097206116, "learning_rate": 4.8937436209255365e-06, "loss": 1.059, "step": 14680 }, { "epoch": 0.10633600440110896, "grad_norm": 0.18301083147525787, "learning_rate": 4.89367123426495e-06, "loss": 1.0397, "step": 14690 }, { "epoch": 0.10640839106169515, "grad_norm": 0.1840226799249649, "learning_rate": 4.893598847604364e-06, "loss": 1.0548, "step": 14700 }, { "epoch": 0.10648077772228134, "grad_norm": 0.18379652500152588, "learning_rate": 4.893526460943777e-06, "loss": 1.0441, "step": 14710 }, { "epoch": 0.10655316438286752, "grad_norm": 0.17776694893836975, "learning_rate": 4.893454074283192e-06, "loss": 1.0594, "step": 14720 }, { "epoch": 0.10662555104345371, "grad_norm": 0.17674773931503296, "learning_rate": 4.893381687622605e-06, "loss": 1.0594, "step": 14730 }, { "epoch": 0.1066979377040399, "grad_norm": 0.17039507627487183, "learning_rate": 4.893309300962019e-06, "loss": 1.0525, "step": 14740 }, { "epoch": 0.10677032436462609, "grad_norm": 0.16657809913158417, "learning_rate": 4.893236914301433e-06, "loss": 1.0669, "step": 14750 }, { "epoch": 0.10684271102521227, "grad_norm": 0.17446079850196838, "learning_rate": 4.893164527640847e-06, "loss": 1.0464, "step": 14760 }, { "epoch": 0.10691509768579846, "grad_norm": 0.16530625522136688, "learning_rate": 4.893092140980261e-06, "loss": 1.0426, "step": 14770 }, { "epoch": 0.10698748434638465, "grad_norm": 0.180899515748024, "learning_rate": 4.893019754319674e-06, "loss": 1.0426, "step": 14780 }, { "epoch": 0.10705987100697084, "grad_norm": 0.18140670657157898, "learning_rate": 4.892947367659088e-06, "loss": 1.0468, "step": 14790 }, { "epoch": 0.10713225766755702, "grad_norm": 0.1905767023563385, "learning_rate": 4.892874980998502e-06, "loss": 1.0518, "step": 14800 }, { "epoch": 0.10720464432814321, "grad_norm": 0.17752863466739655, "learning_rate": 4.892802594337916e-06, "loss": 1.048, "step": 14810 }, { "epoch": 0.1072770309887294, "grad_norm": 0.16803748905658722, "learning_rate": 4.89273020767733e-06, "loss": 1.0472, "step": 14820 }, { "epoch": 0.10734941764931559, "grad_norm": 0.17945846915245056, "learning_rate": 4.892657821016743e-06, "loss": 1.0396, "step": 14830 }, { "epoch": 0.10742180430990177, "grad_norm": 0.19865509867668152, "learning_rate": 4.892585434356158e-06, "loss": 1.0592, "step": 14840 }, { "epoch": 0.10749419097048796, "grad_norm": 0.18445666134357452, "learning_rate": 4.892513047695571e-06, "loss": 1.0411, "step": 14850 }, { "epoch": 0.10756657763107415, "grad_norm": 0.1645526885986328, "learning_rate": 4.892440661034985e-06, "loss": 1.0588, "step": 14860 }, { "epoch": 0.10763896429166034, "grad_norm": 0.1656254678964615, "learning_rate": 4.8923682743743986e-06, "loss": 1.0547, "step": 14870 }, { "epoch": 0.10771135095224652, "grad_norm": 0.1796533614397049, "learning_rate": 4.892295887713813e-06, "loss": 1.0504, "step": 14880 }, { "epoch": 0.1077837376128327, "grad_norm": 0.17627054452896118, "learning_rate": 4.892223501053227e-06, "loss": 1.064, "step": 14890 }, { "epoch": 0.1078561242734189, "grad_norm": 0.1647876799106598, "learning_rate": 4.89215111439264e-06, "loss": 1.0305, "step": 14900 }, { "epoch": 0.10792851093400509, "grad_norm": 0.17485207319259644, "learning_rate": 4.892078727732054e-06, "loss": 1.058, "step": 14910 }, { "epoch": 0.10800089759459126, "grad_norm": 0.18584197759628296, "learning_rate": 4.892006341071468e-06, "loss": 1.0459, "step": 14920 }, { "epoch": 0.10807328425517745, "grad_norm": 0.17022201418876648, "learning_rate": 4.891933954410882e-06, "loss": 1.0758, "step": 14930 }, { "epoch": 0.10814567091576364, "grad_norm": 0.159866064786911, "learning_rate": 4.891861567750296e-06, "loss": 1.0397, "step": 14940 }, { "epoch": 0.10821805757634984, "grad_norm": 0.16421310603618622, "learning_rate": 4.891789181089709e-06, "loss": 1.0472, "step": 14950 }, { "epoch": 0.10829044423693601, "grad_norm": 0.17276668548583984, "learning_rate": 4.891716794429123e-06, "loss": 1.0433, "step": 14960 }, { "epoch": 0.1083628308975222, "grad_norm": 0.1813632696866989, "learning_rate": 4.8916444077685364e-06, "loss": 1.0355, "step": 14970 }, { "epoch": 0.1084352175581084, "grad_norm": 0.16496697068214417, "learning_rate": 4.89157202110795e-06, "loss": 1.0368, "step": 14980 }, { "epoch": 0.10850760421869458, "grad_norm": 0.1636180728673935, "learning_rate": 4.8914996344473645e-06, "loss": 1.042, "step": 14990 }, { "epoch": 0.10857999087928076, "grad_norm": 0.1825898438692093, "learning_rate": 4.891427247786778e-06, "loss": 1.0519, "step": 15000 }, { "epoch": 0.10865237753986695, "grad_norm": 0.19418931007385254, "learning_rate": 4.891354861126192e-06, "loss": 1.0613, "step": 15010 }, { "epoch": 0.10872476420045314, "grad_norm": 0.1902722269296646, "learning_rate": 4.891282474465605e-06, "loss": 1.0433, "step": 15020 }, { "epoch": 0.10879715086103933, "grad_norm": 0.1725703924894333, "learning_rate": 4.89121008780502e-06, "loss": 1.0537, "step": 15030 }, { "epoch": 0.10886953752162551, "grad_norm": 0.2096461057662964, "learning_rate": 4.8911377011444334e-06, "loss": 1.0416, "step": 15040 }, { "epoch": 0.1089419241822117, "grad_norm": 0.1624659150838852, "learning_rate": 4.891065314483847e-06, "loss": 1.0572, "step": 15050 }, { "epoch": 0.10901431084279789, "grad_norm": 0.1570434719324112, "learning_rate": 4.890992927823261e-06, "loss": 1.0396, "step": 15060 }, { "epoch": 0.10908669750338408, "grad_norm": 0.17029106616973877, "learning_rate": 4.890920541162675e-06, "loss": 1.0596, "step": 15070 }, { "epoch": 0.10915908416397026, "grad_norm": 0.16916610300540924, "learning_rate": 4.890848154502089e-06, "loss": 1.0534, "step": 15080 }, { "epoch": 0.10923147082455645, "grad_norm": 0.18326231837272644, "learning_rate": 4.890775767841502e-06, "loss": 1.0517, "step": 15090 }, { "epoch": 0.10930385748514264, "grad_norm": 0.2638685405254364, "learning_rate": 4.890703381180916e-06, "loss": 1.0556, "step": 15100 }, { "epoch": 0.10937624414572883, "grad_norm": 0.17363910377025604, "learning_rate": 4.8906309945203304e-06, "loss": 1.0463, "step": 15110 }, { "epoch": 0.109448630806315, "grad_norm": 0.17666375637054443, "learning_rate": 4.890558607859744e-06, "loss": 1.0603, "step": 15120 }, { "epoch": 0.1095210174669012, "grad_norm": 0.16066348552703857, "learning_rate": 4.890486221199158e-06, "loss": 1.0578, "step": 15130 }, { "epoch": 0.10959340412748739, "grad_norm": 0.18254084885120392, "learning_rate": 4.890413834538571e-06, "loss": 1.0439, "step": 15140 }, { "epoch": 0.10966579078807358, "grad_norm": 0.18753020465373993, "learning_rate": 4.890341447877986e-06, "loss": 1.0529, "step": 15150 }, { "epoch": 0.10973817744865975, "grad_norm": 0.21900314092636108, "learning_rate": 4.890269061217399e-06, "loss": 1.0389, "step": 15160 }, { "epoch": 0.10981056410924595, "grad_norm": 0.17943327128887177, "learning_rate": 4.890196674556813e-06, "loss": 1.0558, "step": 15170 }, { "epoch": 0.10988295076983214, "grad_norm": 0.16995948553085327, "learning_rate": 4.890124287896227e-06, "loss": 1.0474, "step": 15180 }, { "epoch": 0.10995533743041833, "grad_norm": 0.17363294959068298, "learning_rate": 4.890051901235641e-06, "loss": 1.0629, "step": 15190 }, { "epoch": 0.1100277240910045, "grad_norm": 0.1756804883480072, "learning_rate": 4.889979514575055e-06, "loss": 1.0547, "step": 15200 }, { "epoch": 0.1101001107515907, "grad_norm": 0.1712425798177719, "learning_rate": 4.889907127914468e-06, "loss": 1.047, "step": 15210 }, { "epoch": 0.11017249741217688, "grad_norm": 0.1906772404909134, "learning_rate": 4.889834741253882e-06, "loss": 1.0554, "step": 15220 }, { "epoch": 0.11024488407276307, "grad_norm": 0.1886027455329895, "learning_rate": 4.8897623545932955e-06, "loss": 1.0429, "step": 15230 }, { "epoch": 0.11031727073334927, "grad_norm": 0.17662407457828522, "learning_rate": 4.88968996793271e-06, "loss": 1.0659, "step": 15240 }, { "epoch": 0.11038965739393544, "grad_norm": 0.1786973476409912, "learning_rate": 4.889617581272124e-06, "loss": 1.0657, "step": 15250 }, { "epoch": 0.11046204405452163, "grad_norm": 0.18255244195461273, "learning_rate": 4.889545194611537e-06, "loss": 1.0411, "step": 15260 }, { "epoch": 0.11053443071510782, "grad_norm": 0.18926455080509186, "learning_rate": 4.889472807950951e-06, "loss": 1.0492, "step": 15270 }, { "epoch": 0.11060681737569401, "grad_norm": 0.17178624868392944, "learning_rate": 4.889400421290365e-06, "loss": 1.0424, "step": 15280 }, { "epoch": 0.11067920403628019, "grad_norm": 0.17576183378696442, "learning_rate": 4.889328034629779e-06, "loss": 1.0464, "step": 15290 }, { "epoch": 0.11075159069686638, "grad_norm": 0.19050145149230957, "learning_rate": 4.8892556479691925e-06, "loss": 1.0531, "step": 15300 }, { "epoch": 0.11082397735745257, "grad_norm": 0.17656132578849792, "learning_rate": 4.889183261308606e-06, "loss": 1.0312, "step": 15310 }, { "epoch": 0.11089636401803876, "grad_norm": 0.20985843241214752, "learning_rate": 4.889110874648021e-06, "loss": 1.0405, "step": 15320 }, { "epoch": 0.11096875067862494, "grad_norm": 0.17128346860408783, "learning_rate": 4.889038487987434e-06, "loss": 1.0542, "step": 15330 }, { "epoch": 0.11104113733921113, "grad_norm": 0.18605564534664154, "learning_rate": 4.888966101326848e-06, "loss": 1.0492, "step": 15340 }, { "epoch": 0.11111352399979732, "grad_norm": 0.17718972265720367, "learning_rate": 4.8888937146662615e-06, "loss": 1.0379, "step": 15350 }, { "epoch": 0.11118591066038351, "grad_norm": 0.263354629278183, "learning_rate": 4.888821328005676e-06, "loss": 1.0562, "step": 15360 }, { "epoch": 0.11125829732096969, "grad_norm": 0.2226659655570984, "learning_rate": 4.8887489413450895e-06, "loss": 1.0404, "step": 15370 }, { "epoch": 0.11133068398155588, "grad_norm": 0.18347443640232086, "learning_rate": 4.888676554684503e-06, "loss": 1.0458, "step": 15380 }, { "epoch": 0.11140307064214207, "grad_norm": 0.2046961486339569, "learning_rate": 4.888604168023917e-06, "loss": 1.0678, "step": 15390 }, { "epoch": 0.11147545730272826, "grad_norm": 0.18448476493358612, "learning_rate": 4.888531781363331e-06, "loss": 1.0627, "step": 15400 }, { "epoch": 0.11154784396331444, "grad_norm": 0.17062750458717346, "learning_rate": 4.888459394702745e-06, "loss": 1.0404, "step": 15410 }, { "epoch": 0.11162023062390063, "grad_norm": 0.18066982924938202, "learning_rate": 4.8883870080421585e-06, "loss": 1.0386, "step": 15420 }, { "epoch": 0.11169261728448682, "grad_norm": 0.18624179065227509, "learning_rate": 4.888314621381572e-06, "loss": 1.0547, "step": 15430 }, { "epoch": 0.11176500394507301, "grad_norm": 0.23639068007469177, "learning_rate": 4.8882422347209866e-06, "loss": 1.0424, "step": 15440 }, { "epoch": 0.11183739060565918, "grad_norm": 0.2034512162208557, "learning_rate": 4.8881698480604e-06, "loss": 1.0349, "step": 15450 }, { "epoch": 0.11190977726624537, "grad_norm": 0.1779569387435913, "learning_rate": 4.888097461399814e-06, "loss": 1.0517, "step": 15460 }, { "epoch": 0.11198216392683157, "grad_norm": 0.1710672378540039, "learning_rate": 4.888025074739227e-06, "loss": 1.0476, "step": 15470 }, { "epoch": 0.11205455058741776, "grad_norm": 0.1749650537967682, "learning_rate": 4.887952688078642e-06, "loss": 1.0481, "step": 15480 }, { "epoch": 0.11212693724800393, "grad_norm": 0.17475520074367523, "learning_rate": 4.887880301418055e-06, "loss": 1.0611, "step": 15490 }, { "epoch": 0.11219932390859012, "grad_norm": 0.16181065142154694, "learning_rate": 4.887807914757468e-06, "loss": 1.0518, "step": 15500 }, { "epoch": 0.11227171056917631, "grad_norm": 0.2130517065525055, "learning_rate": 4.887735528096883e-06, "loss": 1.0441, "step": 15510 }, { "epoch": 0.1123440972297625, "grad_norm": 0.18250152468681335, "learning_rate": 4.887663141436296e-06, "loss": 1.0524, "step": 15520 }, { "epoch": 0.11241648389034868, "grad_norm": 0.17363341152668, "learning_rate": 4.88759075477571e-06, "loss": 1.0465, "step": 15530 }, { "epoch": 0.11248887055093487, "grad_norm": 0.17302308976650238, "learning_rate": 4.8875183681151236e-06, "loss": 1.0467, "step": 15540 }, { "epoch": 0.11256125721152106, "grad_norm": 0.18607738614082336, "learning_rate": 4.887445981454538e-06, "loss": 1.0449, "step": 15550 }, { "epoch": 0.11263364387210725, "grad_norm": 0.2096407413482666, "learning_rate": 4.887373594793952e-06, "loss": 1.0541, "step": 15560 }, { "epoch": 0.11270603053269343, "grad_norm": 0.17433767020702362, "learning_rate": 4.887301208133365e-06, "loss": 1.052, "step": 15570 }, { "epoch": 0.11277841719327962, "grad_norm": 0.16963066160678864, "learning_rate": 4.887228821472779e-06, "loss": 1.0411, "step": 15580 }, { "epoch": 0.11285080385386581, "grad_norm": 0.19870556890964508, "learning_rate": 4.887156434812193e-06, "loss": 1.0467, "step": 15590 }, { "epoch": 0.112923190514452, "grad_norm": 0.17876464128494263, "learning_rate": 4.887084048151607e-06, "loss": 1.0401, "step": 15600 }, { "epoch": 0.11299557717503818, "grad_norm": 0.15952976047992706, "learning_rate": 4.8870116614910206e-06, "loss": 1.0339, "step": 15610 }, { "epoch": 0.11306796383562437, "grad_norm": 0.17333056032657623, "learning_rate": 4.886939274830434e-06, "loss": 1.0465, "step": 15620 }, { "epoch": 0.11314035049621056, "grad_norm": 0.1765410602092743, "learning_rate": 4.886866888169849e-06, "loss": 1.0402, "step": 15630 }, { "epoch": 0.11321273715679675, "grad_norm": 0.17773103713989258, "learning_rate": 4.886794501509262e-06, "loss": 1.0483, "step": 15640 }, { "epoch": 0.11328512381738293, "grad_norm": 0.18877708911895752, "learning_rate": 4.886722114848676e-06, "loss": 1.0528, "step": 15650 }, { "epoch": 0.11335751047796912, "grad_norm": 0.20413969457149506, "learning_rate": 4.8866497281880895e-06, "loss": 1.0667, "step": 15660 }, { "epoch": 0.11342989713855531, "grad_norm": 0.16380807757377625, "learning_rate": 4.886577341527504e-06, "loss": 1.0583, "step": 15670 }, { "epoch": 0.1135022837991415, "grad_norm": 0.17399290204048157, "learning_rate": 4.886504954866918e-06, "loss": 1.0675, "step": 15680 }, { "epoch": 0.11357467045972768, "grad_norm": 0.18681904673576355, "learning_rate": 4.886432568206331e-06, "loss": 1.0452, "step": 15690 }, { "epoch": 0.11364705712031387, "grad_norm": 0.15756523609161377, "learning_rate": 4.886360181545745e-06, "loss": 1.0391, "step": 15700 }, { "epoch": 0.11371944378090006, "grad_norm": 0.18316251039505005, "learning_rate": 4.886287794885159e-06, "loss": 1.0474, "step": 15710 }, { "epoch": 0.11379183044148625, "grad_norm": 0.19340310990810394, "learning_rate": 4.886215408224573e-06, "loss": 1.0438, "step": 15720 }, { "epoch": 0.11386421710207242, "grad_norm": 0.16754977405071259, "learning_rate": 4.8861430215639865e-06, "loss": 1.0435, "step": 15730 }, { "epoch": 0.11393660376265861, "grad_norm": 0.16273082792758942, "learning_rate": 4.8860706349034e-06, "loss": 1.0434, "step": 15740 }, { "epoch": 0.1140089904232448, "grad_norm": 0.19182349741458893, "learning_rate": 4.885998248242815e-06, "loss": 1.0516, "step": 15750 }, { "epoch": 0.114081377083831, "grad_norm": 0.2044076770544052, "learning_rate": 4.885925861582228e-06, "loss": 1.042, "step": 15760 }, { "epoch": 0.11415376374441717, "grad_norm": 0.17020849883556366, "learning_rate": 4.885853474921642e-06, "loss": 1.0367, "step": 15770 }, { "epoch": 0.11422615040500336, "grad_norm": 0.17558561265468597, "learning_rate": 4.8857810882610554e-06, "loss": 1.0528, "step": 15780 }, { "epoch": 0.11429853706558955, "grad_norm": 0.20141629874706268, "learning_rate": 4.88570870160047e-06, "loss": 1.0485, "step": 15790 }, { "epoch": 0.11437092372617574, "grad_norm": 0.17599183320999146, "learning_rate": 4.8856363149398835e-06, "loss": 1.0499, "step": 15800 }, { "epoch": 0.11444331038676192, "grad_norm": 0.17712847888469696, "learning_rate": 4.885563928279297e-06, "loss": 1.0386, "step": 15810 }, { "epoch": 0.11451569704734811, "grad_norm": 0.19977372884750366, "learning_rate": 4.885491541618711e-06, "loss": 1.0372, "step": 15820 }, { "epoch": 0.1145880837079343, "grad_norm": 0.17353418469429016, "learning_rate": 4.885419154958125e-06, "loss": 1.0647, "step": 15830 }, { "epoch": 0.11466047036852049, "grad_norm": 0.1808134913444519, "learning_rate": 4.885346768297539e-06, "loss": 1.0505, "step": 15840 }, { "epoch": 0.11473285702910668, "grad_norm": 0.1848040372133255, "learning_rate": 4.8852743816369524e-06, "loss": 1.0363, "step": 15850 }, { "epoch": 0.11480524368969286, "grad_norm": 0.1654992550611496, "learning_rate": 4.885201994976366e-06, "loss": 1.0361, "step": 15860 }, { "epoch": 0.11487763035027905, "grad_norm": 0.17101933062076569, "learning_rate": 4.88512960831578e-06, "loss": 1.0407, "step": 15870 }, { "epoch": 0.11495001701086524, "grad_norm": 0.2501929998397827, "learning_rate": 4.885057221655194e-06, "loss": 1.0494, "step": 15880 }, { "epoch": 0.11502240367145143, "grad_norm": 0.17349718511104584, "learning_rate": 4.884984834994608e-06, "loss": 1.0417, "step": 15890 }, { "epoch": 0.11509479033203761, "grad_norm": 0.20508350431919098, "learning_rate": 4.884912448334021e-06, "loss": 1.0481, "step": 15900 }, { "epoch": 0.1151671769926238, "grad_norm": 0.17247234284877777, "learning_rate": 4.884840061673435e-06, "loss": 1.0362, "step": 15910 }, { "epoch": 0.11523956365320999, "grad_norm": 0.16966472566127777, "learning_rate": 4.8847676750128495e-06, "loss": 1.0501, "step": 15920 }, { "epoch": 0.11531195031379618, "grad_norm": 0.18211546540260315, "learning_rate": 4.884695288352263e-06, "loss": 1.0437, "step": 15930 }, { "epoch": 0.11538433697438236, "grad_norm": 0.1784241944551468, "learning_rate": 4.884622901691677e-06, "loss": 1.0439, "step": 15940 }, { "epoch": 0.11545672363496855, "grad_norm": 0.18033133447170258, "learning_rate": 4.88455051503109e-06, "loss": 1.0441, "step": 15950 }, { "epoch": 0.11552911029555474, "grad_norm": 0.1717543601989746, "learning_rate": 4.884478128370505e-06, "loss": 1.032, "step": 15960 }, { "epoch": 0.11560149695614093, "grad_norm": 0.18930160999298096, "learning_rate": 4.884405741709918e-06, "loss": 1.0425, "step": 15970 }, { "epoch": 0.1156738836167271, "grad_norm": 0.17767852544784546, "learning_rate": 4.884333355049332e-06, "loss": 1.039, "step": 15980 }, { "epoch": 0.1157462702773133, "grad_norm": 0.20390869677066803, "learning_rate": 4.884260968388746e-06, "loss": 1.0554, "step": 15990 }, { "epoch": 0.11581865693789949, "grad_norm": 0.17186413705348969, "learning_rate": 4.88418858172816e-06, "loss": 1.0445, "step": 16000 }, { "epoch": 0.11589104359848568, "grad_norm": 0.17237228155136108, "learning_rate": 4.884116195067574e-06, "loss": 1.0376, "step": 16010 }, { "epoch": 0.11596343025907185, "grad_norm": 0.17973795533180237, "learning_rate": 4.884043808406987e-06, "loss": 1.0304, "step": 16020 }, { "epoch": 0.11603581691965804, "grad_norm": 0.16578522324562073, "learning_rate": 4.883971421746401e-06, "loss": 1.0475, "step": 16030 }, { "epoch": 0.11610820358024423, "grad_norm": 0.16535358130931854, "learning_rate": 4.8838990350858145e-06, "loss": 1.0501, "step": 16040 }, { "epoch": 0.11618059024083043, "grad_norm": 0.18786129355430603, "learning_rate": 4.883826648425228e-06, "loss": 1.05, "step": 16050 }, { "epoch": 0.1162529769014166, "grad_norm": 0.19526006281375885, "learning_rate": 4.883754261764642e-06, "loss": 1.0365, "step": 16060 }, { "epoch": 0.11632536356200279, "grad_norm": 0.166265606880188, "learning_rate": 4.883681875104056e-06, "loss": 1.0481, "step": 16070 }, { "epoch": 0.11639775022258898, "grad_norm": 0.18460646271705627, "learning_rate": 4.88360948844347e-06, "loss": 1.0422, "step": 16080 }, { "epoch": 0.11647013688317517, "grad_norm": 0.16810445487499237, "learning_rate": 4.8835371017828835e-06, "loss": 1.0535, "step": 16090 }, { "epoch": 0.11654252354376135, "grad_norm": 0.469312459230423, "learning_rate": 4.883464715122297e-06, "loss": 1.0527, "step": 16100 }, { "epoch": 0.11661491020434754, "grad_norm": 0.1745699644088745, "learning_rate": 4.8833923284617115e-06, "loss": 1.0331, "step": 16110 }, { "epoch": 0.11668729686493373, "grad_norm": 0.17050036787986755, "learning_rate": 4.883319941801125e-06, "loss": 1.0449, "step": 16120 }, { "epoch": 0.11675968352551992, "grad_norm": 0.18037088215351105, "learning_rate": 4.883247555140539e-06, "loss": 1.044, "step": 16130 }, { "epoch": 0.1168320701861061, "grad_norm": 0.23608429729938507, "learning_rate": 4.883175168479952e-06, "loss": 1.0464, "step": 16140 }, { "epoch": 0.11690445684669229, "grad_norm": 0.175536647439003, "learning_rate": 4.883102781819367e-06, "loss": 1.051, "step": 16150 }, { "epoch": 0.11697684350727848, "grad_norm": 0.17264996469020844, "learning_rate": 4.8830303951587805e-06, "loss": 1.0428, "step": 16160 }, { "epoch": 0.11704923016786467, "grad_norm": 0.17153437435626984, "learning_rate": 4.882958008498194e-06, "loss": 1.035, "step": 16170 }, { "epoch": 0.11712161682845085, "grad_norm": 0.16962124407291412, "learning_rate": 4.882885621837608e-06, "loss": 1.0428, "step": 16180 }, { "epoch": 0.11719400348903704, "grad_norm": 0.17661970853805542, "learning_rate": 4.882813235177022e-06, "loss": 1.0442, "step": 16190 }, { "epoch": 0.11726639014962323, "grad_norm": 0.1694341003894806, "learning_rate": 4.882740848516436e-06, "loss": 1.0409, "step": 16200 }, { "epoch": 0.11733877681020942, "grad_norm": 0.1740081012248993, "learning_rate": 4.882668461855849e-06, "loss": 1.042, "step": 16210 }, { "epoch": 0.1174111634707956, "grad_norm": 0.20887626707553864, "learning_rate": 4.882596075195263e-06, "loss": 1.0573, "step": 16220 }, { "epoch": 0.11748355013138179, "grad_norm": 0.1686887890100479, "learning_rate": 4.8825236885346775e-06, "loss": 1.0396, "step": 16230 }, { "epoch": 0.11755593679196798, "grad_norm": 0.654279351234436, "learning_rate": 4.882451301874091e-06, "loss": 1.0511, "step": 16240 }, { "epoch": 0.11762832345255417, "grad_norm": 0.18497861921787262, "learning_rate": 4.882378915213505e-06, "loss": 1.0568, "step": 16250 }, { "epoch": 0.11770071011314034, "grad_norm": 0.1842462718486786, "learning_rate": 4.882306528552918e-06, "loss": 1.0534, "step": 16260 }, { "epoch": 0.11777309677372653, "grad_norm": 0.16614331305027008, "learning_rate": 4.882234141892333e-06, "loss": 1.0546, "step": 16270 }, { "epoch": 0.11784548343431273, "grad_norm": 0.17406275868415833, "learning_rate": 4.882161755231746e-06, "loss": 1.0451, "step": 16280 }, { "epoch": 0.11791787009489892, "grad_norm": 0.18107841908931732, "learning_rate": 4.88208936857116e-06, "loss": 1.0556, "step": 16290 }, { "epoch": 0.11799025675548509, "grad_norm": 0.17502503097057343, "learning_rate": 4.882016981910574e-06, "loss": 1.0464, "step": 16300 }, { "epoch": 0.11806264341607128, "grad_norm": 0.18071506917476654, "learning_rate": 4.881944595249988e-06, "loss": 1.0354, "step": 16310 }, { "epoch": 0.11813503007665747, "grad_norm": 0.16707313060760498, "learning_rate": 4.881872208589402e-06, "loss": 1.0548, "step": 16320 }, { "epoch": 0.11820741673724366, "grad_norm": 0.18432946503162384, "learning_rate": 4.881799821928815e-06, "loss": 1.0413, "step": 16330 }, { "epoch": 0.11827980339782984, "grad_norm": 0.19203749299049377, "learning_rate": 4.881727435268229e-06, "loss": 1.0507, "step": 16340 }, { "epoch": 0.11835219005841603, "grad_norm": 0.20910878479480743, "learning_rate": 4.881655048607643e-06, "loss": 1.0511, "step": 16350 }, { "epoch": 0.11842457671900222, "grad_norm": 0.16124652326107025, "learning_rate": 4.881582661947057e-06, "loss": 1.0418, "step": 16360 }, { "epoch": 0.11849696337958841, "grad_norm": 0.1836594045162201, "learning_rate": 4.881510275286471e-06, "loss": 1.0411, "step": 16370 }, { "epoch": 0.11856935004017459, "grad_norm": 0.1791921854019165, "learning_rate": 4.881437888625884e-06, "loss": 1.0399, "step": 16380 }, { "epoch": 0.11864173670076078, "grad_norm": 0.1730756163597107, "learning_rate": 4.881365501965299e-06, "loss": 1.0393, "step": 16390 }, { "epoch": 0.11871412336134697, "grad_norm": 0.17524994909763336, "learning_rate": 4.881293115304712e-06, "loss": 1.0573, "step": 16400 }, { "epoch": 0.11878651002193316, "grad_norm": 0.17954038083553314, "learning_rate": 4.881220728644126e-06, "loss": 1.0378, "step": 16410 }, { "epoch": 0.11885889668251935, "grad_norm": 0.17099116742610931, "learning_rate": 4.88114834198354e-06, "loss": 1.047, "step": 16420 }, { "epoch": 0.11893128334310553, "grad_norm": 0.1616821140050888, "learning_rate": 4.881075955322954e-06, "loss": 1.0512, "step": 16430 }, { "epoch": 0.11900367000369172, "grad_norm": 0.18617095053195953, "learning_rate": 4.881003568662368e-06, "loss": 1.0634, "step": 16440 }, { "epoch": 0.11907605666427791, "grad_norm": 0.19976121187210083, "learning_rate": 4.880931182001781e-06, "loss": 1.0464, "step": 16450 }, { "epoch": 0.1191484433248641, "grad_norm": 0.17326410114765167, "learning_rate": 4.880858795341195e-06, "loss": 1.0619, "step": 16460 }, { "epoch": 0.11922082998545028, "grad_norm": 0.16668163239955902, "learning_rate": 4.880786408680609e-06, "loss": 1.03, "step": 16470 }, { "epoch": 0.11929321664603647, "grad_norm": 0.16474226117134094, "learning_rate": 4.880714022020023e-06, "loss": 1.0532, "step": 16480 }, { "epoch": 0.11936560330662266, "grad_norm": 0.18454322218894958, "learning_rate": 4.880641635359437e-06, "loss": 1.0407, "step": 16490 }, { "epoch": 0.11943798996720885, "grad_norm": 0.18234814703464508, "learning_rate": 4.88056924869885e-06, "loss": 1.0338, "step": 16500 }, { "epoch": 0.11951037662779503, "grad_norm": 0.2152404487133026, "learning_rate": 4.880496862038264e-06, "loss": 1.0384, "step": 16510 }, { "epoch": 0.11958276328838122, "grad_norm": 0.1833188831806183, "learning_rate": 4.880424475377678e-06, "loss": 1.052, "step": 16520 }, { "epoch": 0.11965514994896741, "grad_norm": 0.16804316639900208, "learning_rate": 4.880352088717092e-06, "loss": 1.0423, "step": 16530 }, { "epoch": 0.1197275366095536, "grad_norm": 0.21003463864326477, "learning_rate": 4.8802797020565055e-06, "loss": 1.0444, "step": 16540 }, { "epoch": 0.11979992327013977, "grad_norm": 0.17130064964294434, "learning_rate": 4.880207315395919e-06, "loss": 1.0572, "step": 16550 }, { "epoch": 0.11987230993072596, "grad_norm": 0.17350070178508759, "learning_rate": 4.880134928735333e-06, "loss": 1.0547, "step": 16560 }, { "epoch": 0.11994469659131216, "grad_norm": 0.17210082709789276, "learning_rate": 4.880062542074746e-06, "loss": 1.0394, "step": 16570 }, { "epoch": 0.12001708325189835, "grad_norm": 0.17453570663928986, "learning_rate": 4.879990155414161e-06, "loss": 1.044, "step": 16580 }, { "epoch": 0.12008946991248452, "grad_norm": 0.16361692547798157, "learning_rate": 4.8799177687535744e-06, "loss": 1.0393, "step": 16590 }, { "epoch": 0.12016185657307071, "grad_norm": 0.17388883233070374, "learning_rate": 4.879845382092988e-06, "loss": 1.0354, "step": 16600 }, { "epoch": 0.1202342432336569, "grad_norm": 0.18685205280780792, "learning_rate": 4.879772995432402e-06, "loss": 1.0414, "step": 16610 }, { "epoch": 0.1203066298942431, "grad_norm": 0.20947107672691345, "learning_rate": 4.879700608771816e-06, "loss": 1.0482, "step": 16620 }, { "epoch": 0.12037901655482927, "grad_norm": 0.16179795563220978, "learning_rate": 4.87962822211123e-06, "loss": 1.0336, "step": 16630 }, { "epoch": 0.12045140321541546, "grad_norm": 0.18635103106498718, "learning_rate": 4.879555835450643e-06, "loss": 1.0425, "step": 16640 }, { "epoch": 0.12052378987600165, "grad_norm": 0.17454616725444794, "learning_rate": 4.879483448790057e-06, "loss": 1.0383, "step": 16650 }, { "epoch": 0.12059617653658784, "grad_norm": 0.17645971477031708, "learning_rate": 4.879411062129471e-06, "loss": 1.036, "step": 16660 }, { "epoch": 0.12066856319717402, "grad_norm": 0.20391476154327393, "learning_rate": 4.879338675468885e-06, "loss": 1.0259, "step": 16670 }, { "epoch": 0.12074094985776021, "grad_norm": 0.1724318414926529, "learning_rate": 4.879266288808299e-06, "loss": 1.0425, "step": 16680 }, { "epoch": 0.1208133365183464, "grad_norm": 0.17643970251083374, "learning_rate": 4.879193902147712e-06, "loss": 1.0654, "step": 16690 }, { "epoch": 0.12088572317893259, "grad_norm": 0.339347243309021, "learning_rate": 4.879121515487126e-06, "loss": 1.048, "step": 16700 }, { "epoch": 0.12095810983951877, "grad_norm": 0.17284011840820312, "learning_rate": 4.87904912882654e-06, "loss": 1.0377, "step": 16710 }, { "epoch": 0.12103049650010496, "grad_norm": 0.1662074625492096, "learning_rate": 4.878976742165954e-06, "loss": 1.0272, "step": 16720 }, { "epoch": 0.12110288316069115, "grad_norm": 0.1813957393169403, "learning_rate": 4.878904355505368e-06, "loss": 1.0351, "step": 16730 }, { "epoch": 0.12117526982127734, "grad_norm": 0.17036522924900055, "learning_rate": 4.878831968844781e-06, "loss": 1.0373, "step": 16740 }, { "epoch": 0.12124765648186352, "grad_norm": 0.17929202318191528, "learning_rate": 4.878759582184196e-06, "loss": 1.0419, "step": 16750 }, { "epoch": 0.12132004314244971, "grad_norm": 0.173272043466568, "learning_rate": 4.878687195523609e-06, "loss": 1.0535, "step": 16760 }, { "epoch": 0.1213924298030359, "grad_norm": 0.18325883150100708, "learning_rate": 4.878614808863023e-06, "loss": 1.0403, "step": 16770 }, { "epoch": 0.12146481646362209, "grad_norm": 0.18894407153129578, "learning_rate": 4.8785424222024365e-06, "loss": 1.0472, "step": 16780 }, { "epoch": 0.12153720312420827, "grad_norm": 0.17379698157310486, "learning_rate": 4.878470035541851e-06, "loss": 1.0422, "step": 16790 }, { "epoch": 0.12160958978479446, "grad_norm": 0.17601001262664795, "learning_rate": 4.878397648881265e-06, "loss": 1.0497, "step": 16800 }, { "epoch": 0.12168197644538065, "grad_norm": 0.16135278344154358, "learning_rate": 4.878325262220678e-06, "loss": 1.0349, "step": 16810 }, { "epoch": 0.12175436310596684, "grad_norm": 0.17922668159008026, "learning_rate": 4.878252875560092e-06, "loss": 1.0377, "step": 16820 }, { "epoch": 0.12182674976655301, "grad_norm": 0.17484663426876068, "learning_rate": 4.878180488899506e-06, "loss": 1.0463, "step": 16830 }, { "epoch": 0.1218991364271392, "grad_norm": 0.16210487484931946, "learning_rate": 4.87810810223892e-06, "loss": 1.0429, "step": 16840 }, { "epoch": 0.1219715230877254, "grad_norm": 0.17364494502544403, "learning_rate": 4.8780357155783335e-06, "loss": 1.0426, "step": 16850 }, { "epoch": 0.12204390974831159, "grad_norm": 0.1732948124408722, "learning_rate": 4.877963328917747e-06, "loss": 1.0417, "step": 16860 }, { "epoch": 0.12211629640889776, "grad_norm": 0.18654009699821472, "learning_rate": 4.877890942257162e-06, "loss": 1.0351, "step": 16870 }, { "epoch": 0.12218868306948395, "grad_norm": 0.17695607244968414, "learning_rate": 4.877818555596575e-06, "loss": 1.0412, "step": 16880 }, { "epoch": 0.12226106973007014, "grad_norm": 0.173911452293396, "learning_rate": 4.877746168935989e-06, "loss": 1.0419, "step": 16890 }, { "epoch": 0.12233345639065633, "grad_norm": 0.17585676908493042, "learning_rate": 4.8776737822754025e-06, "loss": 1.0414, "step": 16900 }, { "epoch": 0.12240584305124251, "grad_norm": 0.179653599858284, "learning_rate": 4.877601395614817e-06, "loss": 1.0412, "step": 16910 }, { "epoch": 0.1224782297118287, "grad_norm": 0.16631865501403809, "learning_rate": 4.8775290089542306e-06, "loss": 1.0408, "step": 16920 }, { "epoch": 0.12255061637241489, "grad_norm": 0.17161540687084198, "learning_rate": 4.877456622293644e-06, "loss": 1.0323, "step": 16930 }, { "epoch": 0.12262300303300108, "grad_norm": 0.1741219311952591, "learning_rate": 4.877384235633058e-06, "loss": 1.0435, "step": 16940 }, { "epoch": 0.12269538969358726, "grad_norm": 0.2024671733379364, "learning_rate": 4.877311848972472e-06, "loss": 1.0446, "step": 16950 }, { "epoch": 0.12276777635417345, "grad_norm": 0.1737942099571228, "learning_rate": 4.877239462311886e-06, "loss": 1.0464, "step": 16960 }, { "epoch": 0.12284016301475964, "grad_norm": 0.21153798699378967, "learning_rate": 4.8771670756512995e-06, "loss": 1.0438, "step": 16970 }, { "epoch": 0.12291254967534583, "grad_norm": 0.17885242402553558, "learning_rate": 4.877094688990713e-06, "loss": 1.0402, "step": 16980 }, { "epoch": 0.12298493633593202, "grad_norm": 0.18069690465927124, "learning_rate": 4.8770223023301276e-06, "loss": 1.0519, "step": 16990 }, { "epoch": 0.1230573229965182, "grad_norm": 0.17245696485042572, "learning_rate": 4.876949915669541e-06, "loss": 1.0395, "step": 17000 }, { "epoch": 0.12312970965710439, "grad_norm": 0.16451863944530487, "learning_rate": 4.876877529008955e-06, "loss": 1.0378, "step": 17010 }, { "epoch": 0.12320209631769058, "grad_norm": 0.18624931573867798, "learning_rate": 4.876805142348368e-06, "loss": 1.0513, "step": 17020 }, { "epoch": 0.12327448297827677, "grad_norm": 0.16225986182689667, "learning_rate": 4.876732755687783e-06, "loss": 1.0374, "step": 17030 }, { "epoch": 0.12334686963886295, "grad_norm": 0.19967621564865112, "learning_rate": 4.8766603690271965e-06, "loss": 1.0455, "step": 17040 }, { "epoch": 0.12341925629944914, "grad_norm": 0.18386250734329224, "learning_rate": 4.87658798236661e-06, "loss": 1.0349, "step": 17050 }, { "epoch": 0.12349164296003533, "grad_norm": 0.17461447417736053, "learning_rate": 4.876515595706024e-06, "loss": 1.0374, "step": 17060 }, { "epoch": 0.12356402962062152, "grad_norm": 0.16911447048187256, "learning_rate": 4.876443209045438e-06, "loss": 1.0312, "step": 17070 }, { "epoch": 0.1236364162812077, "grad_norm": 0.17958052456378937, "learning_rate": 4.876370822384851e-06, "loss": 1.038, "step": 17080 }, { "epoch": 0.12370880294179389, "grad_norm": 0.16912564635276794, "learning_rate": 4.8762984357242646e-06, "loss": 1.0268, "step": 17090 }, { "epoch": 0.12378118960238008, "grad_norm": 0.18362846970558167, "learning_rate": 4.876226049063679e-06, "loss": 1.0457, "step": 17100 }, { "epoch": 0.12385357626296627, "grad_norm": 0.18593281507492065, "learning_rate": 4.876153662403093e-06, "loss": 1.0296, "step": 17110 }, { "epoch": 0.12392596292355244, "grad_norm": 0.1746986359357834, "learning_rate": 4.876081275742506e-06, "loss": 1.0429, "step": 17120 }, { "epoch": 0.12399834958413863, "grad_norm": 0.16610664129257202, "learning_rate": 4.87600888908192e-06, "loss": 1.0432, "step": 17130 }, { "epoch": 0.12407073624472482, "grad_norm": 0.1753586083650589, "learning_rate": 4.875936502421334e-06, "loss": 1.0408, "step": 17140 }, { "epoch": 0.12414312290531102, "grad_norm": 0.1812318116426468, "learning_rate": 4.875864115760748e-06, "loss": 1.0482, "step": 17150 }, { "epoch": 0.12421550956589719, "grad_norm": 0.18393771350383759, "learning_rate": 4.875791729100162e-06, "loss": 1.0439, "step": 17160 }, { "epoch": 0.12428789622648338, "grad_norm": 0.2628847062587738, "learning_rate": 4.875719342439575e-06, "loss": 1.0498, "step": 17170 }, { "epoch": 0.12436028288706957, "grad_norm": 0.18083707988262177, "learning_rate": 4.87564695577899e-06, "loss": 1.0428, "step": 17180 }, { "epoch": 0.12443266954765576, "grad_norm": 0.19616307318210602, "learning_rate": 4.875574569118403e-06, "loss": 1.0391, "step": 17190 }, { "epoch": 0.12450505620824194, "grad_norm": 0.16640251874923706, "learning_rate": 4.875502182457817e-06, "loss": 1.0412, "step": 17200 }, { "epoch": 0.12457744286882813, "grad_norm": 0.2135634571313858, "learning_rate": 4.8754297957972305e-06, "loss": 1.0415, "step": 17210 }, { "epoch": 0.12464982952941432, "grad_norm": 0.17053596675395966, "learning_rate": 4.875357409136645e-06, "loss": 1.0467, "step": 17220 }, { "epoch": 0.12472221619000051, "grad_norm": 0.16077913343906403, "learning_rate": 4.875285022476059e-06, "loss": 1.0309, "step": 17230 }, { "epoch": 0.12479460285058669, "grad_norm": 0.17686612904071808, "learning_rate": 4.875212635815472e-06, "loss": 1.0304, "step": 17240 }, { "epoch": 0.12486698951117288, "grad_norm": 0.1883876919746399, "learning_rate": 4.875140249154886e-06, "loss": 1.0413, "step": 17250 }, { "epoch": 0.12493937617175907, "grad_norm": 0.20872871577739716, "learning_rate": 4.8750678624943e-06, "loss": 1.0434, "step": 17260 }, { "epoch": 0.12501176283234525, "grad_norm": 0.17581568658351898, "learning_rate": 4.874995475833714e-06, "loss": 1.0488, "step": 17270 }, { "epoch": 0.12508414949293145, "grad_norm": 0.17273147404193878, "learning_rate": 4.8749230891731275e-06, "loss": 1.0498, "step": 17280 }, { "epoch": 0.12515653615351763, "grad_norm": 0.21244202554225922, "learning_rate": 4.874850702512541e-06, "loss": 1.0332, "step": 17290 }, { "epoch": 0.1252289228141038, "grad_norm": 0.17866666615009308, "learning_rate": 4.874778315851955e-06, "loss": 1.0346, "step": 17300 }, { "epoch": 0.12530130947469, "grad_norm": 0.1749383807182312, "learning_rate": 4.874705929191369e-06, "loss": 1.0452, "step": 17310 }, { "epoch": 0.12537369613527619, "grad_norm": 0.175296813249588, "learning_rate": 4.874633542530783e-06, "loss": 1.0376, "step": 17320 }, { "epoch": 0.1254460827958624, "grad_norm": 0.18073537945747375, "learning_rate": 4.8745611558701964e-06, "loss": 1.0316, "step": 17330 }, { "epoch": 0.12551846945644857, "grad_norm": 0.18828389048576355, "learning_rate": 4.87448876920961e-06, "loss": 1.0376, "step": 17340 }, { "epoch": 0.12559085611703474, "grad_norm": 0.25895076990127563, "learning_rate": 4.8744163825490245e-06, "loss": 1.0266, "step": 17350 }, { "epoch": 0.12566324277762095, "grad_norm": 0.200386181473732, "learning_rate": 4.874343995888438e-06, "loss": 1.0438, "step": 17360 }, { "epoch": 0.12573562943820712, "grad_norm": 0.1703101396560669, "learning_rate": 4.874271609227852e-06, "loss": 1.0397, "step": 17370 }, { "epoch": 0.1258080160987933, "grad_norm": 0.18138134479522705, "learning_rate": 4.874199222567265e-06, "loss": 1.0447, "step": 17380 }, { "epoch": 0.1258804027593795, "grad_norm": 0.16725878417491913, "learning_rate": 4.87412683590668e-06, "loss": 1.0437, "step": 17390 }, { "epoch": 0.12595278941996568, "grad_norm": 0.19038145244121552, "learning_rate": 4.8740544492460934e-06, "loss": 1.0377, "step": 17400 }, { "epoch": 0.1260251760805519, "grad_norm": 0.20098719000816345, "learning_rate": 4.873982062585507e-06, "loss": 1.0406, "step": 17410 }, { "epoch": 0.12609756274113806, "grad_norm": 0.17163150012493134, "learning_rate": 4.873909675924921e-06, "loss": 1.0316, "step": 17420 }, { "epoch": 0.12616994940172424, "grad_norm": 0.17717736959457397, "learning_rate": 4.873837289264335e-06, "loss": 1.0378, "step": 17430 }, { "epoch": 0.12624233606231045, "grad_norm": 0.1882064789533615, "learning_rate": 4.873764902603749e-06, "loss": 1.0284, "step": 17440 }, { "epoch": 0.12631472272289662, "grad_norm": 0.1787366420030594, "learning_rate": 4.873692515943162e-06, "loss": 1.0418, "step": 17450 }, { "epoch": 0.1263871093834828, "grad_norm": 0.17708836495876312, "learning_rate": 4.873620129282576e-06, "loss": 1.047, "step": 17460 }, { "epoch": 0.126459496044069, "grad_norm": 0.1702563464641571, "learning_rate": 4.8735477426219905e-06, "loss": 1.038, "step": 17470 }, { "epoch": 0.12653188270465518, "grad_norm": 0.1806415468454361, "learning_rate": 4.873475355961404e-06, "loss": 1.0358, "step": 17480 }, { "epoch": 0.12660426936524138, "grad_norm": 0.24682064354419708, "learning_rate": 4.873402969300818e-06, "loss": 1.0479, "step": 17490 }, { "epoch": 0.12667665602582756, "grad_norm": 0.18831071257591248, "learning_rate": 4.873330582640231e-06, "loss": 1.0333, "step": 17500 }, { "epoch": 0.12674904268641374, "grad_norm": 0.17342287302017212, "learning_rate": 4.873258195979646e-06, "loss": 1.0253, "step": 17510 }, { "epoch": 0.12682142934699994, "grad_norm": 0.17534632980823517, "learning_rate": 4.873185809319059e-06, "loss": 1.0478, "step": 17520 }, { "epoch": 0.12689381600758612, "grad_norm": 0.16948920488357544, "learning_rate": 4.873113422658473e-06, "loss": 1.0396, "step": 17530 }, { "epoch": 0.1269662026681723, "grad_norm": 0.19890448451042175, "learning_rate": 4.873041035997887e-06, "loss": 1.035, "step": 17540 }, { "epoch": 0.1270385893287585, "grad_norm": 0.17209777235984802, "learning_rate": 4.872968649337301e-06, "loss": 1.031, "step": 17550 }, { "epoch": 0.12711097598934468, "grad_norm": 0.1808318793773651, "learning_rate": 4.872896262676715e-06, "loss": 1.0491, "step": 17560 }, { "epoch": 0.12718336264993088, "grad_norm": 0.24130041897296906, "learning_rate": 4.872823876016128e-06, "loss": 1.0217, "step": 17570 }, { "epoch": 0.12725574931051706, "grad_norm": 0.22134606540203094, "learning_rate": 4.872751489355542e-06, "loss": 1.0452, "step": 17580 }, { "epoch": 0.12732813597110323, "grad_norm": 0.18467597663402557, "learning_rate": 4.872679102694956e-06, "loss": 1.0515, "step": 17590 }, { "epoch": 0.12740052263168944, "grad_norm": 0.1730322241783142, "learning_rate": 4.87260671603437e-06, "loss": 1.0315, "step": 17600 }, { "epoch": 0.12747290929227562, "grad_norm": 0.1717580109834671, "learning_rate": 4.872534329373783e-06, "loss": 1.0351, "step": 17610 }, { "epoch": 0.12754529595286182, "grad_norm": 0.17170915007591248, "learning_rate": 4.872461942713197e-06, "loss": 1.033, "step": 17620 }, { "epoch": 0.127617682613448, "grad_norm": 0.17856451869010925, "learning_rate": 4.872389556052611e-06, "loss": 1.0123, "step": 17630 }, { "epoch": 0.12769006927403417, "grad_norm": 0.19595351815223694, "learning_rate": 4.8723171693920245e-06, "loss": 1.0247, "step": 17640 }, { "epoch": 0.12776245593462038, "grad_norm": 0.16732360422611237, "learning_rate": 4.872244782731438e-06, "loss": 1.0421, "step": 17650 }, { "epoch": 0.12783484259520655, "grad_norm": 0.18419161438941956, "learning_rate": 4.8721723960708526e-06, "loss": 1.0563, "step": 17660 }, { "epoch": 0.12790722925579273, "grad_norm": 0.20935377478599548, "learning_rate": 4.872100009410266e-06, "loss": 1.0111, "step": 17670 }, { "epoch": 0.12797961591637894, "grad_norm": 0.1993681639432907, "learning_rate": 4.87202762274968e-06, "loss": 1.0407, "step": 17680 }, { "epoch": 0.1280520025769651, "grad_norm": 0.16351039707660675, "learning_rate": 4.871955236089093e-06, "loss": 1.0276, "step": 17690 }, { "epoch": 0.12812438923755132, "grad_norm": 0.17967389523983002, "learning_rate": 4.871882849428508e-06, "loss": 1.0442, "step": 17700 }, { "epoch": 0.1281967758981375, "grad_norm": 0.1645398736000061, "learning_rate": 4.8718104627679215e-06, "loss": 1.0285, "step": 17710 }, { "epoch": 0.12826916255872367, "grad_norm": 0.16454453766345978, "learning_rate": 4.871738076107335e-06, "loss": 1.0418, "step": 17720 }, { "epoch": 0.12834154921930988, "grad_norm": 0.16446729004383087, "learning_rate": 4.871665689446749e-06, "loss": 1.0381, "step": 17730 }, { "epoch": 0.12841393587989605, "grad_norm": 0.16874778270721436, "learning_rate": 4.871593302786163e-06, "loss": 1.0239, "step": 17740 }, { "epoch": 0.12848632254048223, "grad_norm": 0.17436982691287994, "learning_rate": 4.871520916125577e-06, "loss": 1.0345, "step": 17750 }, { "epoch": 0.12855870920106843, "grad_norm": 0.18663202226161957, "learning_rate": 4.87144852946499e-06, "loss": 1.0282, "step": 17760 }, { "epoch": 0.1286310958616546, "grad_norm": 0.1899365931749344, "learning_rate": 4.871376142804404e-06, "loss": 1.0335, "step": 17770 }, { "epoch": 0.12870348252224081, "grad_norm": 0.17862582206726074, "learning_rate": 4.8713037561438185e-06, "loss": 1.0453, "step": 17780 }, { "epoch": 0.128775869182827, "grad_norm": 0.1662217378616333, "learning_rate": 4.871231369483232e-06, "loss": 1.0389, "step": 17790 }, { "epoch": 0.12884825584341317, "grad_norm": 0.1717958003282547, "learning_rate": 4.871158982822646e-06, "loss": 1.0385, "step": 17800 }, { "epoch": 0.12892064250399937, "grad_norm": 0.17093853652477264, "learning_rate": 4.871086596162059e-06, "loss": 1.0347, "step": 17810 }, { "epoch": 0.12899302916458555, "grad_norm": 0.212626650929451, "learning_rate": 4.871014209501474e-06, "loss": 1.0299, "step": 17820 }, { "epoch": 0.12906541582517173, "grad_norm": 0.16492265462875366, "learning_rate": 4.870941822840887e-06, "loss": 1.0366, "step": 17830 }, { "epoch": 0.12913780248575793, "grad_norm": 0.17654773592948914, "learning_rate": 4.870869436180301e-06, "loss": 1.0359, "step": 17840 }, { "epoch": 0.1292101891463441, "grad_norm": 0.17455124855041504, "learning_rate": 4.870797049519715e-06, "loss": 1.0263, "step": 17850 }, { "epoch": 0.1292825758069303, "grad_norm": 0.16440659761428833, "learning_rate": 4.870724662859129e-06, "loss": 1.0193, "step": 17860 }, { "epoch": 0.1293549624675165, "grad_norm": 0.1920333057641983, "learning_rate": 4.870652276198543e-06, "loss": 1.0347, "step": 17870 }, { "epoch": 0.12942734912810266, "grad_norm": 0.18916349112987518, "learning_rate": 4.870579889537956e-06, "loss": 1.036, "step": 17880 }, { "epoch": 0.12949973578868887, "grad_norm": 0.20579378306865692, "learning_rate": 4.87050750287737e-06, "loss": 1.0291, "step": 17890 }, { "epoch": 0.12957212244927505, "grad_norm": 0.16314570605754852, "learning_rate": 4.870435116216784e-06, "loss": 1.0316, "step": 17900 }, { "epoch": 0.12964450910986122, "grad_norm": 0.18022793531417847, "learning_rate": 4.870362729556198e-06, "loss": 1.0444, "step": 17910 }, { "epoch": 0.12971689577044743, "grad_norm": 0.1798829734325409, "learning_rate": 4.870290342895612e-06, "loss": 1.0351, "step": 17920 }, { "epoch": 0.1297892824310336, "grad_norm": 0.1796688586473465, "learning_rate": 4.870217956235025e-06, "loss": 1.0431, "step": 17930 }, { "epoch": 0.1298616690916198, "grad_norm": 0.1828147917985916, "learning_rate": 4.870145569574439e-06, "loss": 1.0343, "step": 17940 }, { "epoch": 0.12993405575220598, "grad_norm": 0.21611171960830688, "learning_rate": 4.870073182913853e-06, "loss": 1.0343, "step": 17950 }, { "epoch": 0.13000644241279216, "grad_norm": 0.17004896700382233, "learning_rate": 4.870000796253267e-06, "loss": 1.0468, "step": 17960 }, { "epoch": 0.13007882907337837, "grad_norm": 0.18028271198272705, "learning_rate": 4.869928409592681e-06, "loss": 1.0504, "step": 17970 }, { "epoch": 0.13015121573396454, "grad_norm": 0.18797443807125092, "learning_rate": 4.869856022932094e-06, "loss": 1.0274, "step": 17980 }, { "epoch": 0.13022360239455072, "grad_norm": 0.18303246796131134, "learning_rate": 4.869783636271509e-06, "loss": 1.0087, "step": 17990 }, { "epoch": 0.13029598905513692, "grad_norm": 0.18918801844120026, "learning_rate": 4.869711249610922e-06, "loss": 1.0417, "step": 18000 }, { "epoch": 0.1303683757157231, "grad_norm": 0.16673238575458527, "learning_rate": 4.869638862950336e-06, "loss": 1.0255, "step": 18010 }, { "epoch": 0.1304407623763093, "grad_norm": 0.16749219596385956, "learning_rate": 4.8695664762897495e-06, "loss": 1.0267, "step": 18020 }, { "epoch": 0.13051314903689548, "grad_norm": 0.1816338449716568, "learning_rate": 4.869494089629164e-06, "loss": 1.0432, "step": 18030 }, { "epoch": 0.13058553569748166, "grad_norm": 0.1622341275215149, "learning_rate": 4.869421702968578e-06, "loss": 1.023, "step": 18040 }, { "epoch": 0.13065792235806786, "grad_norm": 0.1742738038301468, "learning_rate": 4.869349316307991e-06, "loss": 1.0301, "step": 18050 }, { "epoch": 0.13073030901865404, "grad_norm": 0.16464455425739288, "learning_rate": 4.869276929647405e-06, "loss": 1.0151, "step": 18060 }, { "epoch": 0.13080269567924022, "grad_norm": 0.16827940940856934, "learning_rate": 4.869204542986819e-06, "loss": 1.0237, "step": 18070 }, { "epoch": 0.13087508233982642, "grad_norm": 0.17623087763786316, "learning_rate": 4.869132156326233e-06, "loss": 1.0521, "step": 18080 }, { "epoch": 0.1309474690004126, "grad_norm": 0.1728028506040573, "learning_rate": 4.8690597696656465e-06, "loss": 1.0373, "step": 18090 }, { "epoch": 0.1310198556609988, "grad_norm": 0.17844252288341522, "learning_rate": 4.86898738300506e-06, "loss": 1.0461, "step": 18100 }, { "epoch": 0.13109224232158498, "grad_norm": 0.16047002375125885, "learning_rate": 4.868914996344475e-06, "loss": 1.0312, "step": 18110 }, { "epoch": 0.13116462898217116, "grad_norm": 0.18902169167995453, "learning_rate": 4.868842609683888e-06, "loss": 1.0386, "step": 18120 }, { "epoch": 0.13123701564275736, "grad_norm": 0.18937572836875916, "learning_rate": 4.868770223023302e-06, "loss": 1.0421, "step": 18130 }, { "epoch": 0.13130940230334354, "grad_norm": 0.17996446788311005, "learning_rate": 4.8686978363627154e-06, "loss": 1.0457, "step": 18140 }, { "epoch": 0.1313817889639297, "grad_norm": 0.1748812198638916, "learning_rate": 4.868625449702129e-06, "loss": 1.0539, "step": 18150 }, { "epoch": 0.13145417562451592, "grad_norm": 0.17151296138763428, "learning_rate": 4.868553063041543e-06, "loss": 1.0268, "step": 18160 }, { "epoch": 0.1315265622851021, "grad_norm": 0.17143696546554565, "learning_rate": 4.868480676380956e-06, "loss": 1.0318, "step": 18170 }, { "epoch": 0.1315989489456883, "grad_norm": 0.167258158326149, "learning_rate": 4.868408289720371e-06, "loss": 1.0264, "step": 18180 }, { "epoch": 0.13167133560627448, "grad_norm": 0.17534078657627106, "learning_rate": 4.868335903059784e-06, "loss": 1.0408, "step": 18190 }, { "epoch": 0.13174372226686065, "grad_norm": 0.17414483428001404, "learning_rate": 4.868263516399198e-06, "loss": 1.0265, "step": 18200 }, { "epoch": 0.13181610892744686, "grad_norm": 0.1946071982383728, "learning_rate": 4.868191129738612e-06, "loss": 1.0186, "step": 18210 }, { "epoch": 0.13188849558803303, "grad_norm": 0.1678534746170044, "learning_rate": 4.868118743078026e-06, "loss": 1.0336, "step": 18220 }, { "epoch": 0.13196088224861924, "grad_norm": 0.1735198199748993, "learning_rate": 4.86804635641744e-06, "loss": 1.0279, "step": 18230 }, { "epoch": 0.13203326890920541, "grad_norm": 0.19873082637786865, "learning_rate": 4.867973969756853e-06, "loss": 1.0294, "step": 18240 }, { "epoch": 0.1321056555697916, "grad_norm": 0.17502813041210175, "learning_rate": 4.867901583096267e-06, "loss": 1.0235, "step": 18250 }, { "epoch": 0.1321780422303778, "grad_norm": 0.18205420672893524, "learning_rate": 4.867829196435681e-06, "loss": 1.0364, "step": 18260 }, { "epoch": 0.13225042889096397, "grad_norm": 0.16406495869159698, "learning_rate": 4.867756809775095e-06, "loss": 1.0416, "step": 18270 }, { "epoch": 0.13232281555155015, "grad_norm": 0.2004392445087433, "learning_rate": 4.867684423114509e-06, "loss": 1.0369, "step": 18280 }, { "epoch": 0.13239520221213635, "grad_norm": 0.16325534880161285, "learning_rate": 4.867612036453922e-06, "loss": 1.0315, "step": 18290 }, { "epoch": 0.13246758887272253, "grad_norm": 0.17578786611557007, "learning_rate": 4.867539649793337e-06, "loss": 1.0262, "step": 18300 }, { "epoch": 0.13253997553330873, "grad_norm": 0.18025441467761993, "learning_rate": 4.86746726313275e-06, "loss": 1.0369, "step": 18310 }, { "epoch": 0.1326123621938949, "grad_norm": 0.1580292284488678, "learning_rate": 4.867394876472164e-06, "loss": 1.023, "step": 18320 }, { "epoch": 0.1326847488544811, "grad_norm": 0.16052091121673584, "learning_rate": 4.8673224898115775e-06, "loss": 1.0332, "step": 18330 }, { "epoch": 0.1327571355150673, "grad_norm": 0.17163227498531342, "learning_rate": 4.867250103150992e-06, "loss": 1.0384, "step": 18340 }, { "epoch": 0.13282952217565347, "grad_norm": 0.17826439440250397, "learning_rate": 4.867177716490406e-06, "loss": 1.0355, "step": 18350 }, { "epoch": 0.13290190883623965, "grad_norm": 0.16754212975502014, "learning_rate": 4.867105329829819e-06, "loss": 1.044, "step": 18360 }, { "epoch": 0.13297429549682585, "grad_norm": 0.17139877378940582, "learning_rate": 4.867032943169233e-06, "loss": 1.0249, "step": 18370 }, { "epoch": 0.13304668215741203, "grad_norm": 0.1751633733510971, "learning_rate": 4.866960556508647e-06, "loss": 1.0354, "step": 18380 }, { "epoch": 0.13311906881799823, "grad_norm": 0.1777397096157074, "learning_rate": 4.866888169848061e-06, "loss": 1.0224, "step": 18390 }, { "epoch": 0.1331914554785844, "grad_norm": 0.1866157501935959, "learning_rate": 4.8668157831874746e-06, "loss": 1.0231, "step": 18400 }, { "epoch": 0.13326384213917059, "grad_norm": 0.2078569382429123, "learning_rate": 4.866743396526888e-06, "loss": 1.0317, "step": 18410 }, { "epoch": 0.1333362287997568, "grad_norm": 0.1714392751455307, "learning_rate": 4.866671009866303e-06, "loss": 1.0303, "step": 18420 }, { "epoch": 0.13340861546034297, "grad_norm": 0.1851416379213333, "learning_rate": 4.866598623205716e-06, "loss": 1.0321, "step": 18430 }, { "epoch": 0.13348100212092914, "grad_norm": 0.17238068580627441, "learning_rate": 4.86652623654513e-06, "loss": 1.0313, "step": 18440 }, { "epoch": 0.13355338878151535, "grad_norm": 0.18135380744934082, "learning_rate": 4.8664538498845435e-06, "loss": 1.0359, "step": 18450 }, { "epoch": 0.13362577544210152, "grad_norm": 0.1726706624031067, "learning_rate": 4.866381463223958e-06, "loss": 1.0164, "step": 18460 }, { "epoch": 0.13369816210268773, "grad_norm": 0.1773628294467926, "learning_rate": 4.8663090765633716e-06, "loss": 1.0397, "step": 18470 }, { "epoch": 0.1337705487632739, "grad_norm": 0.17249688506126404, "learning_rate": 4.866236689902785e-06, "loss": 1.0314, "step": 18480 }, { "epoch": 0.13384293542386008, "grad_norm": 0.17601031064987183, "learning_rate": 4.866164303242199e-06, "loss": 1.0412, "step": 18490 }, { "epoch": 0.1339153220844463, "grad_norm": 0.17362290620803833, "learning_rate": 4.866091916581613e-06, "loss": 1.0335, "step": 18500 }, { "epoch": 0.13398770874503246, "grad_norm": 0.17129851877689362, "learning_rate": 4.866019529921027e-06, "loss": 1.0282, "step": 18510 }, { "epoch": 0.13406009540561864, "grad_norm": 0.18305164575576782, "learning_rate": 4.8659471432604405e-06, "loss": 1.0276, "step": 18520 }, { "epoch": 0.13413248206620484, "grad_norm": 0.16306072473526, "learning_rate": 4.865874756599854e-06, "loss": 1.026, "step": 18530 }, { "epoch": 0.13420486872679102, "grad_norm": 0.17902541160583496, "learning_rate": 4.865802369939268e-06, "loss": 1.0372, "step": 18540 }, { "epoch": 0.13427725538737723, "grad_norm": 0.17102956771850586, "learning_rate": 4.865729983278682e-06, "loss": 1.0272, "step": 18550 }, { "epoch": 0.1343496420479634, "grad_norm": 0.1690719872713089, "learning_rate": 4.865657596618096e-06, "loss": 1.0411, "step": 18560 }, { "epoch": 0.13442202870854958, "grad_norm": 0.18555520474910736, "learning_rate": 4.865585209957509e-06, "loss": 1.0406, "step": 18570 }, { "epoch": 0.13449441536913578, "grad_norm": 0.17667360603809357, "learning_rate": 4.865512823296923e-06, "loss": 1.0216, "step": 18580 }, { "epoch": 0.13456680202972196, "grad_norm": 0.15590178966522217, "learning_rate": 4.8654404366363375e-06, "loss": 1.025, "step": 18590 }, { "epoch": 0.13463918869030814, "grad_norm": 0.17240816354751587, "learning_rate": 4.865368049975751e-06, "loss": 1.0211, "step": 18600 }, { "epoch": 0.13471157535089434, "grad_norm": 0.16866736114025116, "learning_rate": 4.865295663315165e-06, "loss": 1.0422, "step": 18610 }, { "epoch": 0.13478396201148052, "grad_norm": 0.17667965590953827, "learning_rate": 4.865223276654578e-06, "loss": 1.0312, "step": 18620 }, { "epoch": 0.13485634867206672, "grad_norm": 0.176561638712883, "learning_rate": 4.865150889993993e-06, "loss": 1.0363, "step": 18630 }, { "epoch": 0.1349287353326529, "grad_norm": 0.18116410076618195, "learning_rate": 4.8650785033334064e-06, "loss": 1.0333, "step": 18640 }, { "epoch": 0.13500112199323908, "grad_norm": 0.17595870792865753, "learning_rate": 4.86500611667282e-06, "loss": 1.0195, "step": 18650 }, { "epoch": 0.13507350865382528, "grad_norm": 0.17091989517211914, "learning_rate": 4.864933730012234e-06, "loss": 1.0266, "step": 18660 }, { "epoch": 0.13514589531441146, "grad_norm": 0.18030595779418945, "learning_rate": 4.864861343351647e-06, "loss": 1.0377, "step": 18670 }, { "epoch": 0.13521828197499763, "grad_norm": 0.1966775506734848, "learning_rate": 4.864788956691061e-06, "loss": 1.0233, "step": 18680 }, { "epoch": 0.13529066863558384, "grad_norm": 0.1909208595752716, "learning_rate": 4.8647165700304745e-06, "loss": 1.0294, "step": 18690 }, { "epoch": 0.13536305529617002, "grad_norm": 0.17203876376152039, "learning_rate": 4.864644183369889e-06, "loss": 1.0422, "step": 18700 }, { "epoch": 0.13543544195675622, "grad_norm": 0.164755716919899, "learning_rate": 4.864571796709303e-06, "loss": 1.0307, "step": 18710 }, { "epoch": 0.1355078286173424, "grad_norm": 0.17235048115253448, "learning_rate": 4.864499410048716e-06, "loss": 1.025, "step": 18720 }, { "epoch": 0.13558021527792857, "grad_norm": 0.17906124889850616, "learning_rate": 4.86442702338813e-06, "loss": 1.0251, "step": 18730 }, { "epoch": 0.13565260193851478, "grad_norm": 0.1699845939874649, "learning_rate": 4.864354636727544e-06, "loss": 1.0513, "step": 18740 }, { "epoch": 0.13572498859910095, "grad_norm": 0.2316533476114273, "learning_rate": 4.864282250066958e-06, "loss": 1.0299, "step": 18750 }, { "epoch": 0.13579737525968716, "grad_norm": 0.1772417426109314, "learning_rate": 4.8642098634063715e-06, "loss": 1.014, "step": 18760 }, { "epoch": 0.13586976192027334, "grad_norm": 0.17594146728515625, "learning_rate": 4.864137476745785e-06, "loss": 1.0376, "step": 18770 }, { "epoch": 0.1359421485808595, "grad_norm": 0.1800992637872696, "learning_rate": 4.8640650900852e-06, "loss": 1.0218, "step": 18780 }, { "epoch": 0.13601453524144572, "grad_norm": 0.2082536369562149, "learning_rate": 4.863992703424613e-06, "loss": 1.029, "step": 18790 }, { "epoch": 0.1360869219020319, "grad_norm": 0.17295007407665253, "learning_rate": 4.863920316764027e-06, "loss": 1.0321, "step": 18800 }, { "epoch": 0.13615930856261807, "grad_norm": 0.2445904165506363, "learning_rate": 4.8638479301034404e-06, "loss": 1.034, "step": 18810 }, { "epoch": 0.13623169522320427, "grad_norm": 0.16870233416557312, "learning_rate": 4.863775543442855e-06, "loss": 1.0237, "step": 18820 }, { "epoch": 0.13630408188379045, "grad_norm": 0.1675586849451065, "learning_rate": 4.8637031567822685e-06, "loss": 1.0238, "step": 18830 }, { "epoch": 0.13637646854437666, "grad_norm": 0.1609523743391037, "learning_rate": 4.863630770121682e-06, "loss": 1.0141, "step": 18840 }, { "epoch": 0.13644885520496283, "grad_norm": 0.1770259588956833, "learning_rate": 4.863558383461096e-06, "loss": 1.0507, "step": 18850 }, { "epoch": 0.136521241865549, "grad_norm": 0.17467375099658966, "learning_rate": 4.86348599680051e-06, "loss": 1.0226, "step": 18860 }, { "epoch": 0.1365936285261352, "grad_norm": 0.17384403944015503, "learning_rate": 4.863413610139924e-06, "loss": 1.0342, "step": 18870 }, { "epoch": 0.1366660151867214, "grad_norm": 0.18507815897464752, "learning_rate": 4.8633412234793374e-06, "loss": 1.0338, "step": 18880 }, { "epoch": 0.13673840184730757, "grad_norm": 0.173783078789711, "learning_rate": 4.863268836818751e-06, "loss": 1.0354, "step": 18890 }, { "epoch": 0.13681078850789377, "grad_norm": 0.1754179745912552, "learning_rate": 4.8631964501581655e-06, "loss": 1.031, "step": 18900 }, { "epoch": 0.13688317516847995, "grad_norm": 0.17524704337120056, "learning_rate": 4.863124063497579e-06, "loss": 1.0221, "step": 18910 }, { "epoch": 0.13695556182906615, "grad_norm": 0.1778000146150589, "learning_rate": 4.863051676836993e-06, "loss": 1.0122, "step": 18920 }, { "epoch": 0.13702794848965233, "grad_norm": 0.19337837398052216, "learning_rate": 4.862979290176406e-06, "loss": 1.0321, "step": 18930 }, { "epoch": 0.1371003351502385, "grad_norm": 0.16937635838985443, "learning_rate": 4.862906903515821e-06, "loss": 1.0281, "step": 18940 }, { "epoch": 0.1371727218108247, "grad_norm": 0.161665141582489, "learning_rate": 4.8628345168552345e-06, "loss": 1.0256, "step": 18950 }, { "epoch": 0.1372451084714109, "grad_norm": 0.18332736194133759, "learning_rate": 4.862762130194648e-06, "loss": 1.034, "step": 18960 }, { "epoch": 0.13731749513199706, "grad_norm": 0.17856566607952118, "learning_rate": 4.862689743534062e-06, "loss": 1.0334, "step": 18970 }, { "epoch": 0.13738988179258327, "grad_norm": 0.16888518631458282, "learning_rate": 4.862617356873476e-06, "loss": 1.0146, "step": 18980 }, { "epoch": 0.13746226845316944, "grad_norm": 0.18556654453277588, "learning_rate": 4.86254497021289e-06, "loss": 1.0241, "step": 18990 }, { "epoch": 0.13753465511375565, "grad_norm": 0.21083232760429382, "learning_rate": 4.862472583552303e-06, "loss": 1.0315, "step": 19000 }, { "epoch": 0.13760704177434183, "grad_norm": 0.194644495844841, "learning_rate": 4.862400196891717e-06, "loss": 1.0312, "step": 19010 }, { "epoch": 0.137679428434928, "grad_norm": 0.19104093313217163, "learning_rate": 4.8623278102311315e-06, "loss": 1.0296, "step": 19020 }, { "epoch": 0.1377518150955142, "grad_norm": 0.16693130135536194, "learning_rate": 4.862255423570545e-06, "loss": 1.0217, "step": 19030 }, { "epoch": 0.13782420175610038, "grad_norm": 0.18064992129802704, "learning_rate": 4.862183036909959e-06, "loss": 1.0308, "step": 19040 }, { "epoch": 0.13789658841668656, "grad_norm": 0.19371187686920166, "learning_rate": 4.862110650249372e-06, "loss": 1.032, "step": 19050 }, { "epoch": 0.13796897507727277, "grad_norm": 0.17446281015872955, "learning_rate": 4.862038263588787e-06, "loss": 1.0323, "step": 19060 }, { "epoch": 0.13804136173785894, "grad_norm": 0.17750053107738495, "learning_rate": 4.8619658769282e-06, "loss": 1.0269, "step": 19070 }, { "epoch": 0.13811374839844515, "grad_norm": 0.1809011548757553, "learning_rate": 4.861893490267614e-06, "loss": 1.0242, "step": 19080 }, { "epoch": 0.13818613505903132, "grad_norm": 0.19166511297225952, "learning_rate": 4.861821103607028e-06, "loss": 1.0228, "step": 19090 }, { "epoch": 0.1382585217196175, "grad_norm": 0.17810720205307007, "learning_rate": 4.861748716946442e-06, "loss": 1.038, "step": 19100 }, { "epoch": 0.1383309083802037, "grad_norm": 0.16615994274616241, "learning_rate": 4.861676330285856e-06, "loss": 1.027, "step": 19110 }, { "epoch": 0.13840329504078988, "grad_norm": 0.16814115643501282, "learning_rate": 4.861603943625269e-06, "loss": 1.0254, "step": 19120 }, { "epoch": 0.13847568170137606, "grad_norm": 0.1832701861858368, "learning_rate": 4.861531556964683e-06, "loss": 1.029, "step": 19130 }, { "epoch": 0.13854806836196226, "grad_norm": 0.17128700017929077, "learning_rate": 4.861459170304097e-06, "loss": 1.0295, "step": 19140 }, { "epoch": 0.13862045502254844, "grad_norm": 0.1750701367855072, "learning_rate": 4.861386783643511e-06, "loss": 1.0251, "step": 19150 }, { "epoch": 0.13869284168313464, "grad_norm": 0.2172747105360031, "learning_rate": 4.861314396982925e-06, "loss": 1.022, "step": 19160 }, { "epoch": 0.13876522834372082, "grad_norm": 0.17995861172676086, "learning_rate": 4.861242010322338e-06, "loss": 1.0308, "step": 19170 }, { "epoch": 0.138837615004307, "grad_norm": 0.17807912826538086, "learning_rate": 4.861169623661752e-06, "loss": 1.0221, "step": 19180 }, { "epoch": 0.1389100016648932, "grad_norm": 0.17291560769081116, "learning_rate": 4.861097237001166e-06, "loss": 1.023, "step": 19190 }, { "epoch": 0.13898238832547938, "grad_norm": 0.21126574277877808, "learning_rate": 4.861024850340579e-06, "loss": 1.0267, "step": 19200 }, { "epoch": 0.13905477498606555, "grad_norm": 0.178102508187294, "learning_rate": 4.8609524636799936e-06, "loss": 1.028, "step": 19210 }, { "epoch": 0.13912716164665176, "grad_norm": 0.18170644342899323, "learning_rate": 4.860880077019407e-06, "loss": 1.0209, "step": 19220 }, { "epoch": 0.13919954830723794, "grad_norm": 0.17773956060409546, "learning_rate": 4.860807690358821e-06, "loss": 1.0319, "step": 19230 }, { "epoch": 0.13927193496782414, "grad_norm": 0.25084516406059265, "learning_rate": 4.860735303698234e-06, "loss": 1.027, "step": 19240 }, { "epoch": 0.13934432162841032, "grad_norm": 0.18037767708301544, "learning_rate": 4.860662917037649e-06, "loss": 1.0325, "step": 19250 }, { "epoch": 0.1394167082889965, "grad_norm": 0.1826416552066803, "learning_rate": 4.8605905303770625e-06, "loss": 1.0292, "step": 19260 }, { "epoch": 0.1394890949495827, "grad_norm": 0.16938439011573792, "learning_rate": 4.860518143716476e-06, "loss": 1.0303, "step": 19270 }, { "epoch": 0.13956148161016887, "grad_norm": 0.18216344714164734, "learning_rate": 4.86044575705589e-06, "loss": 1.0351, "step": 19280 }, { "epoch": 0.13963386827075505, "grad_norm": 0.17035633325576782, "learning_rate": 4.860373370395304e-06, "loss": 1.0357, "step": 19290 }, { "epoch": 0.13970625493134126, "grad_norm": 0.17245067656040192, "learning_rate": 4.860300983734718e-06, "loss": 1.0377, "step": 19300 }, { "epoch": 0.13977864159192743, "grad_norm": 0.17423754930496216, "learning_rate": 4.860228597074131e-06, "loss": 1.0384, "step": 19310 }, { "epoch": 0.13985102825251364, "grad_norm": 0.20927661657333374, "learning_rate": 4.860156210413545e-06, "loss": 1.0141, "step": 19320 }, { "epoch": 0.13992341491309981, "grad_norm": 0.18298327922821045, "learning_rate": 4.860083823752959e-06, "loss": 1.0114, "step": 19330 }, { "epoch": 0.139995801573686, "grad_norm": 0.1714993566274643, "learning_rate": 4.860011437092373e-06, "loss": 1.0275, "step": 19340 }, { "epoch": 0.1400681882342722, "grad_norm": 0.1708700954914093, "learning_rate": 4.859939050431787e-06, "loss": 1.0364, "step": 19350 }, { "epoch": 0.14014057489485837, "grad_norm": 0.18635700643062592, "learning_rate": 4.8598666637712e-06, "loss": 1.034, "step": 19360 }, { "epoch": 0.14021296155544458, "grad_norm": 0.17284467816352844, "learning_rate": 4.859794277110614e-06, "loss": 1.0332, "step": 19370 }, { "epoch": 0.14028534821603075, "grad_norm": 0.17173859477043152, "learning_rate": 4.859721890450028e-06, "loss": 1.0151, "step": 19380 }, { "epoch": 0.14035773487661693, "grad_norm": 0.17994263768196106, "learning_rate": 4.859649503789442e-06, "loss": 1.0139, "step": 19390 }, { "epoch": 0.14043012153720313, "grad_norm": 0.21482296288013458, "learning_rate": 4.859577117128856e-06, "loss": 1.0116, "step": 19400 }, { "epoch": 0.1405025081977893, "grad_norm": 0.17747275531291962, "learning_rate": 4.859504730468269e-06, "loss": 1.0267, "step": 19410 }, { "epoch": 0.1405748948583755, "grad_norm": 0.17190630733966827, "learning_rate": 4.859432343807684e-06, "loss": 1.0314, "step": 19420 }, { "epoch": 0.1406472815189617, "grad_norm": 0.1733713150024414, "learning_rate": 4.859359957147097e-06, "loss": 1.0189, "step": 19430 }, { "epoch": 0.14071966817954787, "grad_norm": 0.18654613196849823, "learning_rate": 4.859287570486511e-06, "loss": 1.0353, "step": 19440 }, { "epoch": 0.14079205484013407, "grad_norm": 0.19242019951343536, "learning_rate": 4.859215183825925e-06, "loss": 1.0235, "step": 19450 }, { "epoch": 0.14086444150072025, "grad_norm": 0.17690154910087585, "learning_rate": 4.859142797165339e-06, "loss": 1.0262, "step": 19460 }, { "epoch": 0.14093682816130643, "grad_norm": 0.17208655178546906, "learning_rate": 4.859070410504753e-06, "loss": 1.026, "step": 19470 }, { "epoch": 0.14100921482189263, "grad_norm": 0.1762927770614624, "learning_rate": 4.858998023844166e-06, "loss": 1.0367, "step": 19480 }, { "epoch": 0.1410816014824788, "grad_norm": 0.18810449540615082, "learning_rate": 4.85892563718358e-06, "loss": 1.0267, "step": 19490 }, { "epoch": 0.14115398814306498, "grad_norm": 0.23858876526355743, "learning_rate": 4.858853250522994e-06, "loss": 1.033, "step": 19500 }, { "epoch": 0.1412263748036512, "grad_norm": 0.186942458152771, "learning_rate": 4.858780863862408e-06, "loss": 1.0223, "step": 19510 }, { "epoch": 0.14129876146423737, "grad_norm": 0.1946224570274353, "learning_rate": 4.858708477201822e-06, "loss": 1.0169, "step": 19520 }, { "epoch": 0.14137114812482357, "grad_norm": 0.1714710295200348, "learning_rate": 4.858636090541235e-06, "loss": 1.0199, "step": 19530 }, { "epoch": 0.14144353478540975, "grad_norm": 0.17258310317993164, "learning_rate": 4.85856370388065e-06, "loss": 1.0274, "step": 19540 }, { "epoch": 0.14151592144599592, "grad_norm": 0.18983785808086395, "learning_rate": 4.858491317220063e-06, "loss": 1.0382, "step": 19550 }, { "epoch": 0.14158830810658213, "grad_norm": 0.17009888589382172, "learning_rate": 4.858418930559477e-06, "loss": 1.0138, "step": 19560 }, { "epoch": 0.1416606947671683, "grad_norm": 0.17958617210388184, "learning_rate": 4.8583465438988905e-06, "loss": 1.0258, "step": 19570 }, { "epoch": 0.14173308142775448, "grad_norm": 0.17548643052577972, "learning_rate": 4.858274157238305e-06, "loss": 1.0213, "step": 19580 }, { "epoch": 0.14180546808834069, "grad_norm": 0.16976386308670044, "learning_rate": 4.858201770577719e-06, "loss": 1.0292, "step": 19590 }, { "epoch": 0.14187785474892686, "grad_norm": 0.16231513023376465, "learning_rate": 4.858129383917132e-06, "loss": 1.0299, "step": 19600 }, { "epoch": 0.14195024140951307, "grad_norm": 0.17872096598148346, "learning_rate": 4.858056997256546e-06, "loss": 1.0275, "step": 19610 }, { "epoch": 0.14202262807009924, "grad_norm": 0.17281471192836761, "learning_rate": 4.85798461059596e-06, "loss": 1.0325, "step": 19620 }, { "epoch": 0.14209501473068542, "grad_norm": 0.19203288853168488, "learning_rate": 4.857912223935374e-06, "loss": 1.0231, "step": 19630 }, { "epoch": 0.14216740139127163, "grad_norm": 0.1869240552186966, "learning_rate": 4.8578398372747875e-06, "loss": 1.0303, "step": 19640 }, { "epoch": 0.1422397880518578, "grad_norm": 0.18349246680736542, "learning_rate": 4.857767450614201e-06, "loss": 1.051, "step": 19650 }, { "epoch": 0.14231217471244398, "grad_norm": 0.16620957851409912, "learning_rate": 4.857695063953616e-06, "loss": 1.0302, "step": 19660 }, { "epoch": 0.14238456137303018, "grad_norm": 0.16156761348247528, "learning_rate": 4.857622677293029e-06, "loss": 1.0181, "step": 19670 }, { "epoch": 0.14245694803361636, "grad_norm": 0.19458822906017303, "learning_rate": 4.857550290632443e-06, "loss": 1.027, "step": 19680 }, { "epoch": 0.14252933469420256, "grad_norm": 0.17083300650119781, "learning_rate": 4.8574779039718565e-06, "loss": 1.0365, "step": 19690 }, { "epoch": 0.14260172135478874, "grad_norm": 0.1720832884311676, "learning_rate": 4.857405517311271e-06, "loss": 1.0303, "step": 19700 }, { "epoch": 0.14267410801537492, "grad_norm": 0.17224328219890594, "learning_rate": 4.8573331306506845e-06, "loss": 1.0318, "step": 19710 }, { "epoch": 0.14274649467596112, "grad_norm": 0.16815780103206635, "learning_rate": 4.857260743990098e-06, "loss": 1.041, "step": 19720 }, { "epoch": 0.1428188813365473, "grad_norm": 0.19083335995674133, "learning_rate": 4.857188357329512e-06, "loss": 1.0303, "step": 19730 }, { "epoch": 0.14289126799713348, "grad_norm": 0.167524054646492, "learning_rate": 4.857115970668925e-06, "loss": 1.0343, "step": 19740 }, { "epoch": 0.14296365465771968, "grad_norm": 0.17465034127235413, "learning_rate": 4.857043584008339e-06, "loss": 1.0161, "step": 19750 }, { "epoch": 0.14303604131830586, "grad_norm": 0.18341541290283203, "learning_rate": 4.856971197347753e-06, "loss": 1.0134, "step": 19760 }, { "epoch": 0.14310842797889206, "grad_norm": 0.1703953891992569, "learning_rate": 4.856898810687167e-06, "loss": 1.0333, "step": 19770 }, { "epoch": 0.14318081463947824, "grad_norm": 0.17636318504810333, "learning_rate": 4.856826424026581e-06, "loss": 1.0271, "step": 19780 }, { "epoch": 0.14325320130006441, "grad_norm": 0.17411428689956665, "learning_rate": 4.856754037365994e-06, "loss": 1.0399, "step": 19790 }, { "epoch": 0.14332558796065062, "grad_norm": 0.18185856938362122, "learning_rate": 4.856681650705408e-06, "loss": 1.0305, "step": 19800 }, { "epoch": 0.1433979746212368, "grad_norm": 0.17615829408168793, "learning_rate": 4.856609264044822e-06, "loss": 1.0266, "step": 19810 }, { "epoch": 0.14347036128182297, "grad_norm": 0.19287104904651642, "learning_rate": 4.856536877384236e-06, "loss": 1.012, "step": 19820 }, { "epoch": 0.14354274794240918, "grad_norm": 0.17633087933063507, "learning_rate": 4.85646449072365e-06, "loss": 1.0288, "step": 19830 }, { "epoch": 0.14361513460299535, "grad_norm": 0.16220209002494812, "learning_rate": 4.856392104063063e-06, "loss": 1.0151, "step": 19840 }, { "epoch": 0.14368752126358156, "grad_norm": 0.16982801258563995, "learning_rate": 4.856319717402478e-06, "loss": 1.0261, "step": 19850 }, { "epoch": 0.14375990792416773, "grad_norm": 0.1811235100030899, "learning_rate": 4.856247330741891e-06, "loss": 1.0323, "step": 19860 }, { "epoch": 0.1438322945847539, "grad_norm": 0.17505569756031036, "learning_rate": 4.856174944081305e-06, "loss": 1.0212, "step": 19870 }, { "epoch": 0.14390468124534012, "grad_norm": 0.17915651202201843, "learning_rate": 4.8561025574207186e-06, "loss": 1.0347, "step": 19880 }, { "epoch": 0.1439770679059263, "grad_norm": 0.16607633233070374, "learning_rate": 4.856030170760133e-06, "loss": 1.0331, "step": 19890 }, { "epoch": 0.14404945456651247, "grad_norm": 0.1832243949174881, "learning_rate": 4.855957784099547e-06, "loss": 1.0218, "step": 19900 }, { "epoch": 0.14412184122709867, "grad_norm": 0.17596963047981262, "learning_rate": 4.85588539743896e-06, "loss": 1.0321, "step": 19910 }, { "epoch": 0.14419422788768485, "grad_norm": 0.17509303987026215, "learning_rate": 4.855813010778374e-06, "loss": 1.0224, "step": 19920 }, { "epoch": 0.14426661454827105, "grad_norm": 0.159066841006279, "learning_rate": 4.855740624117788e-06, "loss": 1.0205, "step": 19930 }, { "epoch": 0.14433900120885723, "grad_norm": 0.18700556457042694, "learning_rate": 4.855668237457202e-06, "loss": 1.0297, "step": 19940 }, { "epoch": 0.1444113878694434, "grad_norm": 0.17160135507583618, "learning_rate": 4.8555958507966156e-06, "loss": 1.034, "step": 19950 }, { "epoch": 0.1444837745300296, "grad_norm": 0.17142252624034882, "learning_rate": 4.855523464136029e-06, "loss": 1.0225, "step": 19960 }, { "epoch": 0.1445561611906158, "grad_norm": 0.18951663374900818, "learning_rate": 4.855451077475443e-06, "loss": 1.0276, "step": 19970 }, { "epoch": 0.144628547851202, "grad_norm": 0.1778341382741928, "learning_rate": 4.855378690814857e-06, "loss": 1.0116, "step": 19980 }, { "epoch": 0.14470093451178817, "grad_norm": 0.1711929589509964, "learning_rate": 4.855306304154271e-06, "loss": 1.0469, "step": 19990 }, { "epoch": 0.14477332117237435, "grad_norm": 0.1792709082365036, "learning_rate": 4.8552339174936845e-06, "loss": 1.0369, "step": 20000 }, { "epoch": 0.14484570783296055, "grad_norm": 0.1821603775024414, "learning_rate": 4.855161530833098e-06, "loss": 1.0374, "step": 20010 }, { "epoch": 0.14491809449354673, "grad_norm": 0.17707721889019012, "learning_rate": 4.8550891441725126e-06, "loss": 1.0276, "step": 20020 }, { "epoch": 0.1449904811541329, "grad_norm": 0.18702900409698486, "learning_rate": 4.855016757511926e-06, "loss": 1.0198, "step": 20030 }, { "epoch": 0.1450628678147191, "grad_norm": 0.16952428221702576, "learning_rate": 4.85494437085134e-06, "loss": 1.0333, "step": 20040 }, { "epoch": 0.1451352544753053, "grad_norm": 0.17305567860603333, "learning_rate": 4.854871984190753e-06, "loss": 1.0139, "step": 20050 }, { "epoch": 0.1452076411358915, "grad_norm": 0.17391686141490936, "learning_rate": 4.854799597530168e-06, "loss": 1.029, "step": 20060 }, { "epoch": 0.14528002779647767, "grad_norm": 0.17623725533485413, "learning_rate": 4.8547272108695815e-06, "loss": 1.0245, "step": 20070 }, { "epoch": 0.14535241445706384, "grad_norm": 0.17333059012889862, "learning_rate": 4.854654824208995e-06, "loss": 1.0297, "step": 20080 }, { "epoch": 0.14542480111765005, "grad_norm": 0.1761116087436676, "learning_rate": 4.854582437548409e-06, "loss": 1.0236, "step": 20090 }, { "epoch": 0.14549718777823623, "grad_norm": 0.16909906268119812, "learning_rate": 4.854510050887823e-06, "loss": 1.0219, "step": 20100 }, { "epoch": 0.1455695744388224, "grad_norm": 0.1722634732723236, "learning_rate": 4.854437664227237e-06, "loss": 1.0271, "step": 20110 }, { "epoch": 0.1456419610994086, "grad_norm": 0.32046079635620117, "learning_rate": 4.85436527756665e-06, "loss": 1.0361, "step": 20120 }, { "epoch": 0.14571434775999478, "grad_norm": 0.18109475076198578, "learning_rate": 4.854292890906064e-06, "loss": 1.0265, "step": 20130 }, { "epoch": 0.145786734420581, "grad_norm": 0.17990946769714355, "learning_rate": 4.8542205042454785e-06, "loss": 1.033, "step": 20140 }, { "epoch": 0.14585912108116716, "grad_norm": 0.17502890527248383, "learning_rate": 4.854148117584892e-06, "loss": 1.0363, "step": 20150 }, { "epoch": 0.14593150774175334, "grad_norm": 0.17126423120498657, "learning_rate": 4.854075730924306e-06, "loss": 1.0339, "step": 20160 }, { "epoch": 0.14600389440233955, "grad_norm": 0.1773746907711029, "learning_rate": 4.854003344263719e-06, "loss": 1.038, "step": 20170 }, { "epoch": 0.14607628106292572, "grad_norm": 0.1729922592639923, "learning_rate": 4.853930957603134e-06, "loss": 1.0151, "step": 20180 }, { "epoch": 0.1461486677235119, "grad_norm": 0.1790771633386612, "learning_rate": 4.8538585709425474e-06, "loss": 1.0436, "step": 20190 }, { "epoch": 0.1462210543840981, "grad_norm": 0.17333535850048065, "learning_rate": 4.853786184281961e-06, "loss": 1.0241, "step": 20200 }, { "epoch": 0.14629344104468428, "grad_norm": 0.19140805304050446, "learning_rate": 4.853713797621375e-06, "loss": 1.0242, "step": 20210 }, { "epoch": 0.14636582770527048, "grad_norm": 0.16760674118995667, "learning_rate": 4.853641410960789e-06, "loss": 1.0257, "step": 20220 }, { "epoch": 0.14643821436585666, "grad_norm": 0.17233441770076752, "learning_rate": 4.853569024300203e-06, "loss": 1.0338, "step": 20230 }, { "epoch": 0.14651060102644284, "grad_norm": 0.17265230417251587, "learning_rate": 4.853496637639616e-06, "loss": 1.037, "step": 20240 }, { "epoch": 0.14658298768702904, "grad_norm": 0.17247281968593597, "learning_rate": 4.85342425097903e-06, "loss": 1.0141, "step": 20250 }, { "epoch": 0.14665537434761522, "grad_norm": 0.1745438128709793, "learning_rate": 4.853351864318444e-06, "loss": 1.023, "step": 20260 }, { "epoch": 0.1467277610082014, "grad_norm": 0.15753747522830963, "learning_rate": 4.853279477657857e-06, "loss": 1.0326, "step": 20270 }, { "epoch": 0.1468001476687876, "grad_norm": 0.16413787007331848, "learning_rate": 4.853207090997271e-06, "loss": 1.021, "step": 20280 }, { "epoch": 0.14687253432937378, "grad_norm": 0.1920785754919052, "learning_rate": 4.853134704336685e-06, "loss": 1.039, "step": 20290 }, { "epoch": 0.14694492098995998, "grad_norm": 0.1672215759754181, "learning_rate": 4.853062317676099e-06, "loss": 1.0227, "step": 20300 }, { "epoch": 0.14701730765054616, "grad_norm": 0.17362061142921448, "learning_rate": 4.8529899310155125e-06, "loss": 1.0167, "step": 20310 }, { "epoch": 0.14708969431113234, "grad_norm": 0.22914515435695648, "learning_rate": 4.852917544354926e-06, "loss": 1.0369, "step": 20320 }, { "epoch": 0.14716208097171854, "grad_norm": 0.2375790923833847, "learning_rate": 4.852845157694341e-06, "loss": 1.0304, "step": 20330 }, { "epoch": 0.14723446763230472, "grad_norm": 0.1776515245437622, "learning_rate": 4.852772771033754e-06, "loss": 1.0327, "step": 20340 }, { "epoch": 0.1473068542928909, "grad_norm": 0.18703965842723846, "learning_rate": 4.852700384373168e-06, "loss": 1.0377, "step": 20350 }, { "epoch": 0.1473792409534771, "grad_norm": 0.18549805879592896, "learning_rate": 4.8526279977125814e-06, "loss": 1.0297, "step": 20360 }, { "epoch": 0.14745162761406327, "grad_norm": 0.1732458919286728, "learning_rate": 4.852555611051996e-06, "loss": 1.0432, "step": 20370 }, { "epoch": 0.14752401427464948, "grad_norm": 0.1937321275472641, "learning_rate": 4.8524832243914095e-06, "loss": 1.0219, "step": 20380 }, { "epoch": 0.14759640093523566, "grad_norm": 0.2144630402326584, "learning_rate": 4.852410837730823e-06, "loss": 1.025, "step": 20390 }, { "epoch": 0.14766878759582183, "grad_norm": 0.1818886548280716, "learning_rate": 4.852338451070237e-06, "loss": 1.0292, "step": 20400 }, { "epoch": 0.14774117425640804, "grad_norm": 0.1875666230916977, "learning_rate": 4.852266064409651e-06, "loss": 1.0262, "step": 20410 }, { "epoch": 0.1478135609169942, "grad_norm": 0.15505819022655487, "learning_rate": 4.852193677749065e-06, "loss": 1.0158, "step": 20420 }, { "epoch": 0.1478859475775804, "grad_norm": 0.16805370151996613, "learning_rate": 4.8521212910884785e-06, "loss": 1.0337, "step": 20430 }, { "epoch": 0.1479583342381666, "grad_norm": 0.18419931828975677, "learning_rate": 4.852048904427892e-06, "loss": 1.0314, "step": 20440 }, { "epoch": 0.14803072089875277, "grad_norm": 0.1946050375699997, "learning_rate": 4.8519765177673065e-06, "loss": 1.0217, "step": 20450 }, { "epoch": 0.14810310755933898, "grad_norm": 0.1928732991218567, "learning_rate": 4.85190413110672e-06, "loss": 1.0272, "step": 20460 }, { "epoch": 0.14817549421992515, "grad_norm": 0.20039114356040955, "learning_rate": 4.851831744446134e-06, "loss": 1.0294, "step": 20470 }, { "epoch": 0.14824788088051133, "grad_norm": 0.1852181851863861, "learning_rate": 4.851759357785547e-06, "loss": 1.0188, "step": 20480 }, { "epoch": 0.14832026754109753, "grad_norm": 0.16493256390094757, "learning_rate": 4.851686971124962e-06, "loss": 1.0354, "step": 20490 }, { "epoch": 0.1483926542016837, "grad_norm": 0.17907851934432983, "learning_rate": 4.8516145844643755e-06, "loss": 1.0362, "step": 20500 }, { "epoch": 0.14846504086226991, "grad_norm": 0.19379720091819763, "learning_rate": 4.851542197803789e-06, "loss": 1.0367, "step": 20510 }, { "epoch": 0.1485374275228561, "grad_norm": 0.1685412973165512, "learning_rate": 4.851469811143203e-06, "loss": 1.0292, "step": 20520 }, { "epoch": 0.14860981418344227, "grad_norm": 0.17223286628723145, "learning_rate": 4.851397424482617e-06, "loss": 1.0302, "step": 20530 }, { "epoch": 0.14868220084402847, "grad_norm": 0.17199258506298065, "learning_rate": 4.851325037822031e-06, "loss": 1.0299, "step": 20540 }, { "epoch": 0.14875458750461465, "grad_norm": 0.1630372256040573, "learning_rate": 4.851252651161444e-06, "loss": 1.0267, "step": 20550 }, { "epoch": 0.14882697416520083, "grad_norm": 0.17937202751636505, "learning_rate": 4.851180264500858e-06, "loss": 1.0288, "step": 20560 }, { "epoch": 0.14889936082578703, "grad_norm": 0.1685437560081482, "learning_rate": 4.851107877840272e-06, "loss": 1.0382, "step": 20570 }, { "epoch": 0.1489717474863732, "grad_norm": 0.7421558499336243, "learning_rate": 4.851035491179686e-06, "loss": 1.0395, "step": 20580 }, { "epoch": 0.1490441341469594, "grad_norm": 0.1732497662305832, "learning_rate": 4.8509631045191e-06, "loss": 1.024, "step": 20590 }, { "epoch": 0.1491165208075456, "grad_norm": 0.15808378159999847, "learning_rate": 4.850890717858513e-06, "loss": 1.0197, "step": 20600 }, { "epoch": 0.14918890746813177, "grad_norm": 0.177708238363266, "learning_rate": 4.850818331197927e-06, "loss": 1.026, "step": 20610 }, { "epoch": 0.14926129412871797, "grad_norm": 0.16309256851673126, "learning_rate": 4.850745944537341e-06, "loss": 1.0317, "step": 20620 }, { "epoch": 0.14933368078930415, "grad_norm": 0.16510923206806183, "learning_rate": 4.850673557876755e-06, "loss": 1.0207, "step": 20630 }, { "epoch": 0.14940606744989032, "grad_norm": 0.18781432509422302, "learning_rate": 4.850601171216169e-06, "loss": 1.0201, "step": 20640 }, { "epoch": 0.14947845411047653, "grad_norm": 0.19044159352779388, "learning_rate": 4.850528784555582e-06, "loss": 1.0272, "step": 20650 }, { "epoch": 0.1495508407710627, "grad_norm": 0.17361673712730408, "learning_rate": 4.850456397894997e-06, "loss": 1.0183, "step": 20660 }, { "epoch": 0.1496232274316489, "grad_norm": 0.1650736778974533, "learning_rate": 4.85038401123441e-06, "loss": 1.0283, "step": 20670 }, { "epoch": 0.14969561409223509, "grad_norm": 0.16908712685108185, "learning_rate": 4.850311624573824e-06, "loss": 1.0159, "step": 20680 }, { "epoch": 0.14976800075282126, "grad_norm": 0.17872673273086548, "learning_rate": 4.8502392379132376e-06, "loss": 1.0202, "step": 20690 }, { "epoch": 0.14984038741340747, "grad_norm": 0.16816575825214386, "learning_rate": 4.850166851252652e-06, "loss": 1.0228, "step": 20700 }, { "epoch": 0.14991277407399364, "grad_norm": 0.16545483469963074, "learning_rate": 4.850094464592066e-06, "loss": 1.026, "step": 20710 }, { "epoch": 0.14998516073457982, "grad_norm": 0.17488573491573334, "learning_rate": 4.850022077931479e-06, "loss": 1.0393, "step": 20720 }, { "epoch": 0.15005754739516602, "grad_norm": 0.16280938684940338, "learning_rate": 4.849949691270893e-06, "loss": 1.0309, "step": 20730 }, { "epoch": 0.1501299340557522, "grad_norm": 0.18939673900604248, "learning_rate": 4.849877304610307e-06, "loss": 1.034, "step": 20740 }, { "epoch": 0.1502023207163384, "grad_norm": 0.16721650958061218, "learning_rate": 4.849804917949721e-06, "loss": 1.0227, "step": 20750 }, { "epoch": 0.15027470737692458, "grad_norm": 0.1724279224872589, "learning_rate": 4.8497325312891346e-06, "loss": 1.0207, "step": 20760 }, { "epoch": 0.15034709403751076, "grad_norm": 0.1731773465871811, "learning_rate": 4.849660144628548e-06, "loss": 1.032, "step": 20770 }, { "epoch": 0.15041948069809696, "grad_norm": 0.15489158034324646, "learning_rate": 4.849587757967963e-06, "loss": 1.0216, "step": 20780 }, { "epoch": 0.15049186735868314, "grad_norm": 0.1700417697429657, "learning_rate": 4.849515371307375e-06, "loss": 1.0307, "step": 20790 }, { "epoch": 0.15056425401926932, "grad_norm": 0.19111177325248718, "learning_rate": 4.849442984646789e-06, "loss": 1.0101, "step": 20800 }, { "epoch": 0.15063664067985552, "grad_norm": 0.17551042139530182, "learning_rate": 4.8493705979862035e-06, "loss": 1.0204, "step": 20810 }, { "epoch": 0.1507090273404417, "grad_norm": 0.18322902917861938, "learning_rate": 4.849298211325617e-06, "loss": 1.0231, "step": 20820 }, { "epoch": 0.1507814140010279, "grad_norm": 0.23962242901325226, "learning_rate": 4.849225824665031e-06, "loss": 1.0276, "step": 20830 }, { "epoch": 0.15085380066161408, "grad_norm": 0.24165092408657074, "learning_rate": 4.849153438004444e-06, "loss": 1.0114, "step": 20840 }, { "epoch": 0.15092618732220026, "grad_norm": 0.16115596890449524, "learning_rate": 4.849081051343859e-06, "loss": 1.0203, "step": 20850 }, { "epoch": 0.15099857398278646, "grad_norm": 0.1693454384803772, "learning_rate": 4.849008664683272e-06, "loss": 1.0206, "step": 20860 }, { "epoch": 0.15107096064337264, "grad_norm": 0.1848178654909134, "learning_rate": 4.848936278022686e-06, "loss": 1.0206, "step": 20870 }, { "epoch": 0.1511433473039588, "grad_norm": 0.17669790983200073, "learning_rate": 4.8488638913621e-06, "loss": 1.0219, "step": 20880 }, { "epoch": 0.15121573396454502, "grad_norm": 0.17501038312911987, "learning_rate": 4.848791504701514e-06, "loss": 1.0268, "step": 20890 }, { "epoch": 0.1512881206251312, "grad_norm": 0.17689432203769684, "learning_rate": 4.848719118040928e-06, "loss": 1.0276, "step": 20900 }, { "epoch": 0.1513605072857174, "grad_norm": 0.19285716116428375, "learning_rate": 4.848646731380341e-06, "loss": 1.0148, "step": 20910 }, { "epoch": 0.15143289394630358, "grad_norm": 0.17144864797592163, "learning_rate": 4.848574344719755e-06, "loss": 1.0195, "step": 20920 }, { "epoch": 0.15150528060688975, "grad_norm": 0.18200930953025818, "learning_rate": 4.8485019580591694e-06, "loss": 1.0325, "step": 20930 }, { "epoch": 0.15157766726747596, "grad_norm": 0.17781402170658112, "learning_rate": 4.848429571398583e-06, "loss": 1.028, "step": 20940 }, { "epoch": 0.15165005392806213, "grad_norm": 0.16629651188850403, "learning_rate": 4.848357184737997e-06, "loss": 1.0354, "step": 20950 }, { "epoch": 0.1517224405886483, "grad_norm": 0.1842113435268402, "learning_rate": 4.84828479807741e-06, "loss": 1.0181, "step": 20960 }, { "epoch": 0.15179482724923452, "grad_norm": 0.16665078699588776, "learning_rate": 4.848212411416825e-06, "loss": 1.0333, "step": 20970 }, { "epoch": 0.1518672139098207, "grad_norm": 0.17545108497142792, "learning_rate": 4.848140024756238e-06, "loss": 1.0224, "step": 20980 }, { "epoch": 0.1519396005704069, "grad_norm": 0.17534632980823517, "learning_rate": 4.848067638095652e-06, "loss": 1.035, "step": 20990 }, { "epoch": 0.15201198723099307, "grad_norm": 0.16326074302196503, "learning_rate": 4.847995251435066e-06, "loss": 1.0243, "step": 21000 }, { "epoch": 0.15208437389157925, "grad_norm": 0.16196899116039276, "learning_rate": 4.84792286477448e-06, "loss": 1.0239, "step": 21010 }, { "epoch": 0.15215676055216545, "grad_norm": 0.17832061648368835, "learning_rate": 4.847850478113894e-06, "loss": 1.0319, "step": 21020 }, { "epoch": 0.15222914721275163, "grad_norm": 0.16528917849063873, "learning_rate": 4.847778091453307e-06, "loss": 1.024, "step": 21030 }, { "epoch": 0.1523015338733378, "grad_norm": 0.18502266705036163, "learning_rate": 4.847705704792721e-06, "loss": 1.0254, "step": 21040 }, { "epoch": 0.152373920533924, "grad_norm": 0.21463727951049805, "learning_rate": 4.847633318132135e-06, "loss": 1.0331, "step": 21050 }, { "epoch": 0.1524463071945102, "grad_norm": 0.1813105046749115, "learning_rate": 4.847560931471549e-06, "loss": 1.022, "step": 21060 }, { "epoch": 0.1525186938550964, "grad_norm": 0.17173029482364655, "learning_rate": 4.847488544810963e-06, "loss": 1.0358, "step": 21070 }, { "epoch": 0.15259108051568257, "grad_norm": 0.17391446232795715, "learning_rate": 4.847416158150376e-06, "loss": 1.0353, "step": 21080 }, { "epoch": 0.15266346717626875, "grad_norm": 0.18049128353595734, "learning_rate": 4.847343771489791e-06, "loss": 1.0328, "step": 21090 }, { "epoch": 0.15273585383685495, "grad_norm": 0.16343386471271515, "learning_rate": 4.847271384829204e-06, "loss": 1.0207, "step": 21100 }, { "epoch": 0.15280824049744113, "grad_norm": 0.17857620120048523, "learning_rate": 4.847198998168618e-06, "loss": 1.0256, "step": 21110 }, { "epoch": 0.15288062715802733, "grad_norm": 0.17615753412246704, "learning_rate": 4.8471266115080315e-06, "loss": 1.0197, "step": 21120 }, { "epoch": 0.1529530138186135, "grad_norm": 0.26710131764411926, "learning_rate": 4.847054224847446e-06, "loss": 1.0344, "step": 21130 }, { "epoch": 0.15302540047919969, "grad_norm": 0.17339998483657837, "learning_rate": 4.84698183818686e-06, "loss": 1.0031, "step": 21140 }, { "epoch": 0.1530977871397859, "grad_norm": 0.1859639585018158, "learning_rate": 4.846909451526273e-06, "loss": 1.0236, "step": 21150 }, { "epoch": 0.15317017380037207, "grad_norm": 0.16950179636478424, "learning_rate": 4.846837064865687e-06, "loss": 1.0251, "step": 21160 }, { "epoch": 0.15324256046095824, "grad_norm": 0.18173764646053314, "learning_rate": 4.846764678205101e-06, "loss": 1.0126, "step": 21170 }, { "epoch": 0.15331494712154445, "grad_norm": 0.1840379685163498, "learning_rate": 4.846692291544515e-06, "loss": 1.0327, "step": 21180 }, { "epoch": 0.15338733378213062, "grad_norm": 0.1827646642923355, "learning_rate": 4.8466199048839285e-06, "loss": 1.0287, "step": 21190 }, { "epoch": 0.15345972044271683, "grad_norm": 0.20936237275600433, "learning_rate": 4.846547518223342e-06, "loss": 1.0154, "step": 21200 }, { "epoch": 0.153532107103303, "grad_norm": 0.20079828798770905, "learning_rate": 4.846475131562756e-06, "loss": 1.0166, "step": 21210 }, { "epoch": 0.15360449376388918, "grad_norm": 0.1706727296113968, "learning_rate": 4.84640274490217e-06, "loss": 1.0317, "step": 21220 }, { "epoch": 0.1536768804244754, "grad_norm": 0.1730235069990158, "learning_rate": 4.846330358241584e-06, "loss": 1.0206, "step": 21230 }, { "epoch": 0.15374926708506156, "grad_norm": 0.17368663847446442, "learning_rate": 4.8462579715809975e-06, "loss": 1.026, "step": 21240 }, { "epoch": 0.15382165374564774, "grad_norm": 0.19320766627788544, "learning_rate": 4.846185584920411e-06, "loss": 1.0455, "step": 21250 }, { "epoch": 0.15389404040623395, "grad_norm": 0.17245323956012726, "learning_rate": 4.8461131982598255e-06, "loss": 1.0254, "step": 21260 }, { "epoch": 0.15396642706682012, "grad_norm": 0.28055688738822937, "learning_rate": 4.846040811599239e-06, "loss": 1.0158, "step": 21270 }, { "epoch": 0.15403881372740633, "grad_norm": 0.1800316721200943, "learning_rate": 4.845968424938653e-06, "loss": 1.0278, "step": 21280 }, { "epoch": 0.1541112003879925, "grad_norm": 0.16145852208137512, "learning_rate": 4.845896038278066e-06, "loss": 1.0221, "step": 21290 }, { "epoch": 0.15418358704857868, "grad_norm": 0.16589269042015076, "learning_rate": 4.845823651617481e-06, "loss": 1.029, "step": 21300 }, { "epoch": 0.15425597370916488, "grad_norm": 0.17117495834827423, "learning_rate": 4.8457512649568945e-06, "loss": 1.012, "step": 21310 }, { "epoch": 0.15432836036975106, "grad_norm": 0.16930687427520752, "learning_rate": 4.845678878296308e-06, "loss": 1.0132, "step": 21320 }, { "epoch": 0.15440074703033724, "grad_norm": 0.15988081693649292, "learning_rate": 4.845606491635722e-06, "loss": 1.0226, "step": 21330 }, { "epoch": 0.15447313369092344, "grad_norm": 0.16869573295116425, "learning_rate": 4.845534104975135e-06, "loss": 1.0183, "step": 21340 }, { "epoch": 0.15454552035150962, "grad_norm": 0.1683894544839859, "learning_rate": 4.845461718314549e-06, "loss": 1.02, "step": 21350 }, { "epoch": 0.15461790701209582, "grad_norm": 0.16887131333351135, "learning_rate": 4.8453893316539625e-06, "loss": 1.0191, "step": 21360 }, { "epoch": 0.154690293672682, "grad_norm": 0.1669628620147705, "learning_rate": 4.845316944993377e-06, "loss": 1.0215, "step": 21370 }, { "epoch": 0.15476268033326818, "grad_norm": 0.16394099593162537, "learning_rate": 4.845244558332791e-06, "loss": 1.0435, "step": 21380 }, { "epoch": 0.15483506699385438, "grad_norm": 0.298396497964859, "learning_rate": 4.845172171672204e-06, "loss": 1.0062, "step": 21390 }, { "epoch": 0.15490745365444056, "grad_norm": 0.1778578758239746, "learning_rate": 4.845099785011618e-06, "loss": 1.0383, "step": 21400 }, { "epoch": 0.15497984031502673, "grad_norm": 0.16672207415103912, "learning_rate": 4.845027398351032e-06, "loss": 1.0257, "step": 21410 }, { "epoch": 0.15505222697561294, "grad_norm": 0.1904023140668869, "learning_rate": 4.844955011690446e-06, "loss": 1.0183, "step": 21420 }, { "epoch": 0.15512461363619912, "grad_norm": 0.19985061883926392, "learning_rate": 4.8448826250298596e-06, "loss": 1.0243, "step": 21430 }, { "epoch": 0.15519700029678532, "grad_norm": 0.18588073551654816, "learning_rate": 4.844810238369273e-06, "loss": 1.0255, "step": 21440 }, { "epoch": 0.1552693869573715, "grad_norm": 0.18870490789413452, "learning_rate": 4.844737851708688e-06, "loss": 1.0333, "step": 21450 }, { "epoch": 0.15534177361795767, "grad_norm": 0.1897239238023758, "learning_rate": 4.844665465048101e-06, "loss": 1.0228, "step": 21460 }, { "epoch": 0.15541416027854388, "grad_norm": 0.2040310651063919, "learning_rate": 4.844593078387515e-06, "loss": 1.0144, "step": 21470 }, { "epoch": 0.15548654693913005, "grad_norm": 0.18327052891254425, "learning_rate": 4.8445206917269285e-06, "loss": 1.0259, "step": 21480 }, { "epoch": 0.15555893359971623, "grad_norm": 0.2118920236825943, "learning_rate": 4.844448305066343e-06, "loss": 1.0299, "step": 21490 }, { "epoch": 0.15563132026030244, "grad_norm": 0.17221951484680176, "learning_rate": 4.8443759184057566e-06, "loss": 1.0316, "step": 21500 }, { "epoch": 0.1557037069208886, "grad_norm": 0.1662265807390213, "learning_rate": 4.84430353174517e-06, "loss": 1.0094, "step": 21510 }, { "epoch": 0.15577609358147482, "grad_norm": 0.18612660467624664, "learning_rate": 4.844231145084584e-06, "loss": 1.0185, "step": 21520 }, { "epoch": 0.155848480242061, "grad_norm": 0.17847616970539093, "learning_rate": 4.844158758423998e-06, "loss": 1.0138, "step": 21530 }, { "epoch": 0.15592086690264717, "grad_norm": 0.17742417752742767, "learning_rate": 4.844086371763412e-06, "loss": 1.0129, "step": 21540 }, { "epoch": 0.15599325356323337, "grad_norm": 0.17097236216068268, "learning_rate": 4.8440139851028255e-06, "loss": 1.0194, "step": 21550 }, { "epoch": 0.15606564022381955, "grad_norm": 0.2134924679994583, "learning_rate": 4.843941598442239e-06, "loss": 1.0192, "step": 21560 }, { "epoch": 0.15613802688440573, "grad_norm": 0.17918936908245087, "learning_rate": 4.8438692117816536e-06, "loss": 1.0286, "step": 21570 }, { "epoch": 0.15621041354499193, "grad_norm": 0.16962049901485443, "learning_rate": 4.843796825121067e-06, "loss": 1.0243, "step": 21580 }, { "epoch": 0.1562828002055781, "grad_norm": 0.18653210997581482, "learning_rate": 4.843724438460481e-06, "loss": 1.0321, "step": 21590 }, { "epoch": 0.15635518686616431, "grad_norm": 0.1704837828874588, "learning_rate": 4.843652051799894e-06, "loss": 1.031, "step": 21600 }, { "epoch": 0.1564275735267505, "grad_norm": 0.18651624023914337, "learning_rate": 4.843579665139309e-06, "loss": 1.0029, "step": 21610 }, { "epoch": 0.15649996018733667, "grad_norm": 0.18372347950935364, "learning_rate": 4.8435072784787225e-06, "loss": 1.0356, "step": 21620 }, { "epoch": 0.15657234684792287, "grad_norm": 0.18019145727157593, "learning_rate": 4.843434891818136e-06, "loss": 1.0289, "step": 21630 }, { "epoch": 0.15664473350850905, "grad_norm": 0.1659417301416397, "learning_rate": 4.84336250515755e-06, "loss": 1.0187, "step": 21640 }, { "epoch": 0.15671712016909523, "grad_norm": 0.18040567636489868, "learning_rate": 4.843290118496964e-06, "loss": 1.0321, "step": 21650 }, { "epoch": 0.15678950682968143, "grad_norm": 0.1708545982837677, "learning_rate": 4.843217731836378e-06, "loss": 1.0207, "step": 21660 }, { "epoch": 0.1568618934902676, "grad_norm": 0.16297031939029694, "learning_rate": 4.8431453451757914e-06, "loss": 1.0146, "step": 21670 }, { "epoch": 0.1569342801508538, "grad_norm": 0.19065815210342407, "learning_rate": 4.843072958515205e-06, "loss": 1.0256, "step": 21680 }, { "epoch": 0.15700666681144, "grad_norm": 0.17626740038394928, "learning_rate": 4.8430005718546195e-06, "loss": 1.025, "step": 21690 }, { "epoch": 0.15707905347202616, "grad_norm": 0.1907099336385727, "learning_rate": 4.842928185194033e-06, "loss": 1.0163, "step": 21700 }, { "epoch": 0.15715144013261237, "grad_norm": 0.1677362322807312, "learning_rate": 4.842855798533447e-06, "loss": 1.0341, "step": 21710 }, { "epoch": 0.15722382679319855, "grad_norm": 0.18757909536361694, "learning_rate": 4.84278341187286e-06, "loss": 1.0201, "step": 21720 }, { "epoch": 0.15729621345378475, "grad_norm": 0.16965140402317047, "learning_rate": 4.842711025212275e-06, "loss": 1.0217, "step": 21730 }, { "epoch": 0.15736860011437093, "grad_norm": 0.16649499535560608, "learning_rate": 4.8426386385516884e-06, "loss": 1.0142, "step": 21740 }, { "epoch": 0.1574409867749571, "grad_norm": 0.1895909309387207, "learning_rate": 4.842566251891102e-06, "loss": 1.017, "step": 21750 }, { "epoch": 0.1575133734355433, "grad_norm": 0.17109711468219757, "learning_rate": 4.842493865230516e-06, "loss": 1.0203, "step": 21760 }, { "epoch": 0.15758576009612948, "grad_norm": 0.16221435368061066, "learning_rate": 4.84242147856993e-06, "loss": 1.0098, "step": 21770 }, { "epoch": 0.15765814675671566, "grad_norm": 0.17318131029605865, "learning_rate": 4.842349091909344e-06, "loss": 1.0307, "step": 21780 }, { "epoch": 0.15773053341730187, "grad_norm": 0.17340058088302612, "learning_rate": 4.842276705248757e-06, "loss": 1.021, "step": 21790 }, { "epoch": 0.15780292007788804, "grad_norm": 0.16992861032485962, "learning_rate": 4.842204318588171e-06, "loss": 1.0237, "step": 21800 }, { "epoch": 0.15787530673847425, "grad_norm": 0.17952974140644073, "learning_rate": 4.842131931927585e-06, "loss": 1.0169, "step": 21810 }, { "epoch": 0.15794769339906042, "grad_norm": 0.19869837164878845, "learning_rate": 4.842059545266999e-06, "loss": 1.0267, "step": 21820 }, { "epoch": 0.1580200800596466, "grad_norm": 0.1871790587902069, "learning_rate": 4.841987158606413e-06, "loss": 1.017, "step": 21830 }, { "epoch": 0.1580924667202328, "grad_norm": 0.183842271566391, "learning_rate": 4.841914771945826e-06, "loss": 1.0101, "step": 21840 }, { "epoch": 0.15816485338081898, "grad_norm": 0.17067475616931915, "learning_rate": 4.84184238528524e-06, "loss": 1.0251, "step": 21850 }, { "epoch": 0.15823724004140516, "grad_norm": 0.17248547077178955, "learning_rate": 4.8417699986246535e-06, "loss": 1.0154, "step": 21860 }, { "epoch": 0.15830962670199136, "grad_norm": 0.17312128841876984, "learning_rate": 4.841697611964067e-06, "loss": 1.0098, "step": 21870 }, { "epoch": 0.15838201336257754, "grad_norm": 0.17725808918476105, "learning_rate": 4.841625225303482e-06, "loss": 1.016, "step": 21880 }, { "epoch": 0.15845440002316374, "grad_norm": 0.16956385970115662, "learning_rate": 4.841552838642895e-06, "loss": 1.0214, "step": 21890 }, { "epoch": 0.15852678668374992, "grad_norm": 0.16064795851707458, "learning_rate": 4.841480451982309e-06, "loss": 1.0117, "step": 21900 }, { "epoch": 0.1585991733443361, "grad_norm": 0.1749652624130249, "learning_rate": 4.8414080653217225e-06, "loss": 1.0257, "step": 21910 }, { "epoch": 0.1586715600049223, "grad_norm": 0.18944776058197021, "learning_rate": 4.841335678661137e-06, "loss": 0.9973, "step": 21920 }, { "epoch": 0.15874394666550848, "grad_norm": 0.24421219527721405, "learning_rate": 4.8412632920005505e-06, "loss": 1.0302, "step": 21930 }, { "epoch": 0.15881633332609466, "grad_norm": 0.1638653576374054, "learning_rate": 4.841190905339964e-06, "loss": 1.0135, "step": 21940 }, { "epoch": 0.15888871998668086, "grad_norm": 0.18156132102012634, "learning_rate": 4.841118518679378e-06, "loss": 1.0098, "step": 21950 }, { "epoch": 0.15896110664726704, "grad_norm": 0.170707106590271, "learning_rate": 4.841046132018792e-06, "loss": 1.0142, "step": 21960 }, { "epoch": 0.15903349330785324, "grad_norm": 0.17522519826889038, "learning_rate": 4.840973745358206e-06, "loss": 1.0156, "step": 21970 }, { "epoch": 0.15910587996843942, "grad_norm": 0.17544645071029663, "learning_rate": 4.8409013586976195e-06, "loss": 1.0216, "step": 21980 }, { "epoch": 0.1591782666290256, "grad_norm": 0.16112075746059418, "learning_rate": 4.840828972037033e-06, "loss": 1.0137, "step": 21990 }, { "epoch": 0.1592506532896118, "grad_norm": 0.166812926530838, "learning_rate": 4.840756585376447e-06, "loss": 1.0234, "step": 22000 }, { "epoch": 0.15932303995019798, "grad_norm": 0.19277237355709076, "learning_rate": 4.840684198715861e-06, "loss": 1.0112, "step": 22010 }, { "epoch": 0.15939542661078415, "grad_norm": 0.18085378408432007, "learning_rate": 4.840611812055275e-06, "loss": 1.0177, "step": 22020 }, { "epoch": 0.15946781327137036, "grad_norm": 0.2057359516620636, "learning_rate": 4.840539425394688e-06, "loss": 1.0294, "step": 22030 }, { "epoch": 0.15954019993195653, "grad_norm": 0.165378138422966, "learning_rate": 4.840467038734102e-06, "loss": 1.0127, "step": 22040 }, { "epoch": 0.15961258659254274, "grad_norm": 0.18065567314624786, "learning_rate": 4.8403946520735165e-06, "loss": 1.0238, "step": 22050 }, { "epoch": 0.15968497325312891, "grad_norm": 0.1699487715959549, "learning_rate": 4.84032226541293e-06, "loss": 1.0217, "step": 22060 }, { "epoch": 0.1597573599137151, "grad_norm": 0.22719596326351166, "learning_rate": 4.840249878752344e-06, "loss": 1.032, "step": 22070 }, { "epoch": 0.1598297465743013, "grad_norm": 0.17852869629859924, "learning_rate": 4.840177492091757e-06, "loss": 1.0022, "step": 22080 }, { "epoch": 0.15990213323488747, "grad_norm": 0.18880029022693634, "learning_rate": 4.840105105431172e-06, "loss": 1.0214, "step": 22090 }, { "epoch": 0.15997451989547365, "grad_norm": 0.15838246047496796, "learning_rate": 4.840032718770585e-06, "loss": 1.0162, "step": 22100 }, { "epoch": 0.16004690655605985, "grad_norm": 0.15795861184597015, "learning_rate": 4.839960332109999e-06, "loss": 1.0199, "step": 22110 }, { "epoch": 0.16011929321664603, "grad_norm": 0.2182161509990692, "learning_rate": 4.839887945449413e-06, "loss": 1.0107, "step": 22120 }, { "epoch": 0.16019167987723223, "grad_norm": 0.16949397325515747, "learning_rate": 4.839815558788827e-06, "loss": 1.0171, "step": 22130 }, { "epoch": 0.1602640665378184, "grad_norm": 0.16689515113830566, "learning_rate": 4.839743172128241e-06, "loss": 1.0128, "step": 22140 }, { "epoch": 0.1603364531984046, "grad_norm": 0.19180209934711456, "learning_rate": 4.839670785467654e-06, "loss": 1.0162, "step": 22150 }, { "epoch": 0.1604088398589908, "grad_norm": 0.16757026314735413, "learning_rate": 4.839598398807068e-06, "loss": 1.0052, "step": 22160 }, { "epoch": 0.16048122651957697, "grad_norm": 0.1716785430908203, "learning_rate": 4.839526012146482e-06, "loss": 1.0202, "step": 22170 }, { "epoch": 0.16055361318016315, "grad_norm": 0.17269423604011536, "learning_rate": 4.839453625485896e-06, "loss": 1.0103, "step": 22180 }, { "epoch": 0.16062599984074935, "grad_norm": 0.22084921598434448, "learning_rate": 4.83938123882531e-06, "loss": 1.0143, "step": 22190 }, { "epoch": 0.16069838650133553, "grad_norm": 0.18779946863651276, "learning_rate": 4.839308852164723e-06, "loss": 1.016, "step": 22200 }, { "epoch": 0.16077077316192173, "grad_norm": 0.17556487023830414, "learning_rate": 4.839236465504138e-06, "loss": 1.0178, "step": 22210 }, { "epoch": 0.1608431598225079, "grad_norm": 0.164267897605896, "learning_rate": 4.839164078843551e-06, "loss": 1.0224, "step": 22220 }, { "epoch": 0.16091554648309409, "grad_norm": 0.20508578419685364, "learning_rate": 4.839091692182965e-06, "loss": 1.0087, "step": 22230 }, { "epoch": 0.1609879331436803, "grad_norm": 0.46417108178138733, "learning_rate": 4.8390193055223786e-06, "loss": 1.0203, "step": 22240 }, { "epoch": 0.16106031980426647, "grad_norm": 0.19800300896167755, "learning_rate": 4.838946918861793e-06, "loss": 1.0203, "step": 22250 }, { "epoch": 0.16113270646485267, "grad_norm": 0.17719148099422455, "learning_rate": 4.838874532201207e-06, "loss": 1.0306, "step": 22260 }, { "epoch": 0.16120509312543885, "grad_norm": 0.16901084780693054, "learning_rate": 4.83880214554062e-06, "loss": 1.014, "step": 22270 }, { "epoch": 0.16127747978602502, "grad_norm": 0.1782984584569931, "learning_rate": 4.838729758880034e-06, "loss": 1.0268, "step": 22280 }, { "epoch": 0.16134986644661123, "grad_norm": 0.18408305943012238, "learning_rate": 4.838657372219448e-06, "loss": 1.0163, "step": 22290 }, { "epoch": 0.1614222531071974, "grad_norm": 0.16512493789196014, "learning_rate": 4.838584985558862e-06, "loss": 1.0251, "step": 22300 }, { "epoch": 0.16149463976778358, "grad_norm": 0.180845245718956, "learning_rate": 4.8385125988982756e-06, "loss": 1.019, "step": 22310 }, { "epoch": 0.1615670264283698, "grad_norm": 0.17334243655204773, "learning_rate": 4.838440212237689e-06, "loss": 1.0123, "step": 22320 }, { "epoch": 0.16163941308895596, "grad_norm": 0.16331173479557037, "learning_rate": 4.838367825577104e-06, "loss": 1.0139, "step": 22330 }, { "epoch": 0.16171179974954217, "grad_norm": 0.17761701345443726, "learning_rate": 4.838295438916517e-06, "loss": 1.0381, "step": 22340 }, { "epoch": 0.16178418641012834, "grad_norm": 0.162800133228302, "learning_rate": 4.838223052255931e-06, "loss": 1.026, "step": 22350 }, { "epoch": 0.16185657307071452, "grad_norm": 0.18399173021316528, "learning_rate": 4.8381506655953445e-06, "loss": 1.0155, "step": 22360 }, { "epoch": 0.16192895973130073, "grad_norm": 0.18210694193840027, "learning_rate": 4.838078278934759e-06, "loss": 1.0326, "step": 22370 }, { "epoch": 0.1620013463918869, "grad_norm": 0.19371944665908813, "learning_rate": 4.838005892274173e-06, "loss": 1.0091, "step": 22380 }, { "epoch": 0.16207373305247308, "grad_norm": 0.2175704389810562, "learning_rate": 4.837933505613585e-06, "loss": 1.0094, "step": 22390 }, { "epoch": 0.16214611971305928, "grad_norm": 0.17889146506786346, "learning_rate": 4.837861118953e-06, "loss": 1.0349, "step": 22400 }, { "epoch": 0.16221850637364546, "grad_norm": 0.17176759243011475, "learning_rate": 4.8377887322924134e-06, "loss": 1.0191, "step": 22410 }, { "epoch": 0.16229089303423166, "grad_norm": 0.17913545668125153, "learning_rate": 4.837716345631827e-06, "loss": 1.0136, "step": 22420 }, { "epoch": 0.16236327969481784, "grad_norm": 0.17989039421081543, "learning_rate": 4.837643958971241e-06, "loss": 1.0442, "step": 22430 }, { "epoch": 0.16243566635540402, "grad_norm": 0.165592759847641, "learning_rate": 4.837571572310655e-06, "loss": 1.0248, "step": 22440 }, { "epoch": 0.16250805301599022, "grad_norm": 0.179484561085701, "learning_rate": 4.837499185650069e-06, "loss": 1.0218, "step": 22450 }, { "epoch": 0.1625804396765764, "grad_norm": 0.17062394320964813, "learning_rate": 4.837426798989482e-06, "loss": 1.0168, "step": 22460 }, { "epoch": 0.16265282633716258, "grad_norm": 0.1736491620540619, "learning_rate": 4.837354412328896e-06, "loss": 1.0122, "step": 22470 }, { "epoch": 0.16272521299774878, "grad_norm": 0.17264223098754883, "learning_rate": 4.8372820256683104e-06, "loss": 1.0215, "step": 22480 }, { "epoch": 0.16279759965833496, "grad_norm": 0.18589048087596893, "learning_rate": 4.837209639007724e-06, "loss": 1.0197, "step": 22490 }, { "epoch": 0.16286998631892116, "grad_norm": 0.16932156682014465, "learning_rate": 4.837137252347138e-06, "loss": 1.0257, "step": 22500 }, { "epoch": 0.16294237297950734, "grad_norm": 0.17345447838306427, "learning_rate": 4.837064865686551e-06, "loss": 1.0228, "step": 22510 }, { "epoch": 0.16301475964009352, "grad_norm": 0.16551077365875244, "learning_rate": 4.836992479025966e-06, "loss": 1.0266, "step": 22520 }, { "epoch": 0.16308714630067972, "grad_norm": 0.1778588742017746, "learning_rate": 4.836920092365379e-06, "loss": 1.0194, "step": 22530 }, { "epoch": 0.1631595329612659, "grad_norm": 0.16605743765830994, "learning_rate": 4.836847705704793e-06, "loss": 1.0151, "step": 22540 }, { "epoch": 0.16323191962185207, "grad_norm": 0.18560338020324707, "learning_rate": 4.836775319044207e-06, "loss": 1.017, "step": 22550 }, { "epoch": 0.16330430628243828, "grad_norm": 0.1800818145275116, "learning_rate": 4.836702932383621e-06, "loss": 1.0375, "step": 22560 }, { "epoch": 0.16337669294302445, "grad_norm": 0.20372332632541656, "learning_rate": 4.836630545723035e-06, "loss": 1.0101, "step": 22570 }, { "epoch": 0.16344907960361066, "grad_norm": 0.17202037572860718, "learning_rate": 4.836558159062448e-06, "loss": 1.0169, "step": 22580 }, { "epoch": 0.16352146626419684, "grad_norm": 0.17716839909553528, "learning_rate": 4.836485772401862e-06, "loss": 1.0207, "step": 22590 }, { "epoch": 0.163593852924783, "grad_norm": 0.18483179807662964, "learning_rate": 4.836413385741276e-06, "loss": 1.004, "step": 22600 }, { "epoch": 0.16366623958536922, "grad_norm": 0.18177522718906403, "learning_rate": 4.83634099908069e-06, "loss": 1.005, "step": 22610 }, { "epoch": 0.1637386262459554, "grad_norm": 0.1648116558790207, "learning_rate": 4.836268612420104e-06, "loss": 1.0109, "step": 22620 }, { "epoch": 0.16381101290654157, "grad_norm": 0.169694185256958, "learning_rate": 4.836196225759517e-06, "loss": 1.0065, "step": 22630 }, { "epoch": 0.16388339956712777, "grad_norm": 0.1901792734861374, "learning_rate": 4.836123839098931e-06, "loss": 1.0206, "step": 22640 }, { "epoch": 0.16395578622771395, "grad_norm": 0.1724863350391388, "learning_rate": 4.836051452438345e-06, "loss": 1.0276, "step": 22650 }, { "epoch": 0.16402817288830016, "grad_norm": 0.17653776705265045, "learning_rate": 4.835979065777759e-06, "loss": 1.0317, "step": 22660 }, { "epoch": 0.16410055954888633, "grad_norm": 0.1690499484539032, "learning_rate": 4.8359066791171725e-06, "loss": 1.0252, "step": 22670 }, { "epoch": 0.1641729462094725, "grad_norm": 0.1637679785490036, "learning_rate": 4.835834292456586e-06, "loss": 1.0172, "step": 22680 }, { "epoch": 0.1642453328700587, "grad_norm": 0.17211398482322693, "learning_rate": 4.835761905796001e-06, "loss": 1.018, "step": 22690 }, { "epoch": 0.1643177195306449, "grad_norm": 0.18994399905204773, "learning_rate": 4.835689519135414e-06, "loss": 1.0102, "step": 22700 }, { "epoch": 0.16439010619123107, "grad_norm": 0.17697584629058838, "learning_rate": 4.835617132474828e-06, "loss": 1.0071, "step": 22710 }, { "epoch": 0.16446249285181727, "grad_norm": 0.17998354136943817, "learning_rate": 4.8355447458142415e-06, "loss": 1.0074, "step": 22720 }, { "epoch": 0.16453487951240345, "grad_norm": 0.17211481928825378, "learning_rate": 4.835472359153656e-06, "loss": 1.0121, "step": 22730 }, { "epoch": 0.16460726617298965, "grad_norm": 0.19927141070365906, "learning_rate": 4.8353999724930695e-06, "loss": 1.026, "step": 22740 }, { "epoch": 0.16467965283357583, "grad_norm": 0.18247176706790924, "learning_rate": 4.835327585832483e-06, "loss": 1.0036, "step": 22750 }, { "epoch": 0.164752039494162, "grad_norm": 0.1697113811969757, "learning_rate": 4.835255199171897e-06, "loss": 1.011, "step": 22760 }, { "epoch": 0.1648244261547482, "grad_norm": 0.1692165732383728, "learning_rate": 4.835182812511311e-06, "loss": 1.0219, "step": 22770 }, { "epoch": 0.1648968128153344, "grad_norm": 0.18225090205669403, "learning_rate": 4.835110425850725e-06, "loss": 1.0236, "step": 22780 }, { "epoch": 0.16496919947592056, "grad_norm": 0.17765475809574127, "learning_rate": 4.8350380391901385e-06, "loss": 1.0097, "step": 22790 }, { "epoch": 0.16504158613650677, "grad_norm": 0.17133235931396484, "learning_rate": 4.834965652529552e-06, "loss": 1.0139, "step": 22800 }, { "epoch": 0.16511397279709294, "grad_norm": 0.1649719774723053, "learning_rate": 4.8348932658689665e-06, "loss": 1.0256, "step": 22810 }, { "epoch": 0.16518635945767915, "grad_norm": 0.1826830953359604, "learning_rate": 4.83482087920838e-06, "loss": 1.0307, "step": 22820 }, { "epoch": 0.16525874611826533, "grad_norm": 0.16446372866630554, "learning_rate": 4.834748492547794e-06, "loss": 1.0148, "step": 22830 }, { "epoch": 0.1653311327788515, "grad_norm": 0.16916634142398834, "learning_rate": 4.834676105887207e-06, "loss": 1.0137, "step": 22840 }, { "epoch": 0.1654035194394377, "grad_norm": 0.18869148194789886, "learning_rate": 4.834603719226622e-06, "loss": 1.0212, "step": 22850 }, { "epoch": 0.16547590610002388, "grad_norm": 0.2165064662694931, "learning_rate": 4.8345313325660355e-06, "loss": 1.0222, "step": 22860 }, { "epoch": 0.1655482927606101, "grad_norm": 0.17655514180660248, "learning_rate": 4.834458945905449e-06, "loss": 1.0227, "step": 22870 }, { "epoch": 0.16562067942119627, "grad_norm": 0.17349255084991455, "learning_rate": 4.834386559244863e-06, "loss": 1.0176, "step": 22880 }, { "epoch": 0.16569306608178244, "grad_norm": 0.1745917946100235, "learning_rate": 4.834314172584277e-06, "loss": 1.0105, "step": 22890 }, { "epoch": 0.16576545274236865, "grad_norm": 0.17585240304470062, "learning_rate": 4.834241785923691e-06, "loss": 1.0113, "step": 22900 }, { "epoch": 0.16583783940295482, "grad_norm": 0.1638375222682953, "learning_rate": 4.834169399263104e-06, "loss": 1.0205, "step": 22910 }, { "epoch": 0.165910226063541, "grad_norm": 0.24316856265068054, "learning_rate": 4.834097012602518e-06, "loss": 1.0187, "step": 22920 }, { "epoch": 0.1659826127241272, "grad_norm": 0.18205313384532928, "learning_rate": 4.834024625941932e-06, "loss": 1.0046, "step": 22930 }, { "epoch": 0.16605499938471338, "grad_norm": 0.16292521357536316, "learning_rate": 4.833952239281345e-06, "loss": 1.0175, "step": 22940 }, { "epoch": 0.16612738604529959, "grad_norm": 0.1718023717403412, "learning_rate": 4.833879852620759e-06, "loss": 1.0144, "step": 22950 }, { "epoch": 0.16619977270588576, "grad_norm": 0.17306740581989288, "learning_rate": 4.833807465960173e-06, "loss": 1.0277, "step": 22960 }, { "epoch": 0.16627215936647194, "grad_norm": 0.2184477001428604, "learning_rate": 4.833735079299587e-06, "loss": 1.0086, "step": 22970 }, { "epoch": 0.16634454602705814, "grad_norm": 0.18039968609809875, "learning_rate": 4.8336626926390006e-06, "loss": 1.0144, "step": 22980 }, { "epoch": 0.16641693268764432, "grad_norm": 0.1612757295370102, "learning_rate": 4.833590305978414e-06, "loss": 1.0096, "step": 22990 }, { "epoch": 0.1664893193482305, "grad_norm": 0.16724608838558197, "learning_rate": 4.833517919317829e-06, "loss": 1.0224, "step": 23000 }, { "epoch": 0.1665617060088167, "grad_norm": 0.19500578939914703, "learning_rate": 4.833445532657242e-06, "loss": 1.0279, "step": 23010 }, { "epoch": 0.16663409266940288, "grad_norm": 0.1689131259918213, "learning_rate": 4.833373145996656e-06, "loss": 1.0225, "step": 23020 }, { "epoch": 0.16670647932998908, "grad_norm": 0.18741093575954437, "learning_rate": 4.8333007593360695e-06, "loss": 1.0067, "step": 23030 }, { "epoch": 0.16677886599057526, "grad_norm": 0.16117849946022034, "learning_rate": 4.833228372675484e-06, "loss": 1.0135, "step": 23040 }, { "epoch": 0.16685125265116144, "grad_norm": 0.17075563967227936, "learning_rate": 4.8331559860148976e-06, "loss": 1.0091, "step": 23050 }, { "epoch": 0.16692363931174764, "grad_norm": 0.16465437412261963, "learning_rate": 4.833083599354311e-06, "loss": 1.013, "step": 23060 }, { "epoch": 0.16699602597233382, "grad_norm": 0.17264023423194885, "learning_rate": 4.833011212693725e-06, "loss": 1.0298, "step": 23070 }, { "epoch": 0.16706841263292, "grad_norm": 0.27394869923591614, "learning_rate": 4.832938826033139e-06, "loss": 1.0207, "step": 23080 }, { "epoch": 0.1671407992935062, "grad_norm": 0.16792115569114685, "learning_rate": 4.832866439372553e-06, "loss": 1.0246, "step": 23090 }, { "epoch": 0.16721318595409237, "grad_norm": 0.17486481368541718, "learning_rate": 4.8327940527119665e-06, "loss": 0.9999, "step": 23100 }, { "epoch": 0.16728557261467858, "grad_norm": 0.18093885481357574, "learning_rate": 4.83272166605138e-06, "loss": 1.0158, "step": 23110 }, { "epoch": 0.16735795927526476, "grad_norm": 0.16081741452217102, "learning_rate": 4.8326492793907946e-06, "loss": 1.0061, "step": 23120 }, { "epoch": 0.16743034593585093, "grad_norm": 0.1673981100320816, "learning_rate": 4.832576892730208e-06, "loss": 1.0152, "step": 23130 }, { "epoch": 0.16750273259643714, "grad_norm": 0.1669563502073288, "learning_rate": 4.832504506069622e-06, "loss": 1.0121, "step": 23140 }, { "epoch": 0.16757511925702331, "grad_norm": 0.19718532264232635, "learning_rate": 4.8324321194090354e-06, "loss": 1.0223, "step": 23150 }, { "epoch": 0.1676475059176095, "grad_norm": 0.2987012565135956, "learning_rate": 4.83235973274845e-06, "loss": 1.0097, "step": 23160 }, { "epoch": 0.1677198925781957, "grad_norm": 0.16320352256298065, "learning_rate": 4.8322873460878635e-06, "loss": 1.006, "step": 23170 }, { "epoch": 0.16779227923878187, "grad_norm": 0.16298076510429382, "learning_rate": 4.832214959427277e-06, "loss": 1.0035, "step": 23180 }, { "epoch": 0.16786466589936808, "grad_norm": 0.1666620373725891, "learning_rate": 4.832142572766691e-06, "loss": 1.0136, "step": 23190 }, { "epoch": 0.16793705255995425, "grad_norm": 0.16718272864818573, "learning_rate": 4.832070186106105e-06, "loss": 1.0126, "step": 23200 }, { "epoch": 0.16800943922054043, "grad_norm": 0.2104543298482895, "learning_rate": 4.831997799445519e-06, "loss": 1.0133, "step": 23210 }, { "epoch": 0.16808182588112663, "grad_norm": 0.17877444624900818, "learning_rate": 4.8319254127849324e-06, "loss": 1.0188, "step": 23220 }, { "epoch": 0.1681542125417128, "grad_norm": 0.16311542689800262, "learning_rate": 4.831853026124346e-06, "loss": 1.0098, "step": 23230 }, { "epoch": 0.168226599202299, "grad_norm": 0.1771518886089325, "learning_rate": 4.83178063946376e-06, "loss": 1.0004, "step": 23240 }, { "epoch": 0.1682989858628852, "grad_norm": 0.20852121710777283, "learning_rate": 4.831708252803174e-06, "loss": 1.014, "step": 23250 }, { "epoch": 0.16837137252347137, "grad_norm": 0.17537963390350342, "learning_rate": 4.831635866142588e-06, "loss": 1.0188, "step": 23260 }, { "epoch": 0.16844375918405757, "grad_norm": 0.16795283555984497, "learning_rate": 4.831563479482001e-06, "loss": 1.0063, "step": 23270 }, { "epoch": 0.16851614584464375, "grad_norm": 0.18271319568157196, "learning_rate": 4.831491092821415e-06, "loss": 1.0066, "step": 23280 }, { "epoch": 0.16858853250522993, "grad_norm": 0.16966833174228668, "learning_rate": 4.8314187061608294e-06, "loss": 1.0109, "step": 23290 }, { "epoch": 0.16866091916581613, "grad_norm": 0.1862793117761612, "learning_rate": 4.831346319500243e-06, "loss": 1.0195, "step": 23300 }, { "epoch": 0.1687333058264023, "grad_norm": 0.17365515232086182, "learning_rate": 4.831273932839657e-06, "loss": 1.04, "step": 23310 }, { "epoch": 0.16880569248698848, "grad_norm": 0.17782513797283173, "learning_rate": 4.83120154617907e-06, "loss": 1.0078, "step": 23320 }, { "epoch": 0.1688780791475747, "grad_norm": 0.16336967051029205, "learning_rate": 4.831129159518485e-06, "loss": 1.0081, "step": 23330 }, { "epoch": 0.16895046580816087, "grad_norm": 0.18843859434127808, "learning_rate": 4.831056772857898e-06, "loss": 1.0103, "step": 23340 }, { "epoch": 0.16902285246874707, "grad_norm": 0.1720830351114273, "learning_rate": 4.830984386197312e-06, "loss": 1.0168, "step": 23350 }, { "epoch": 0.16909523912933325, "grad_norm": 0.24049195647239685, "learning_rate": 4.830911999536726e-06, "loss": 1.0181, "step": 23360 }, { "epoch": 0.16916762578991942, "grad_norm": 0.17097869515419006, "learning_rate": 4.83083961287614e-06, "loss": 1.0042, "step": 23370 }, { "epoch": 0.16924001245050563, "grad_norm": 0.2235918790102005, "learning_rate": 4.830767226215554e-06, "loss": 1.0094, "step": 23380 }, { "epoch": 0.1693123991110918, "grad_norm": 0.16128119826316833, "learning_rate": 4.830694839554967e-06, "loss": 1.0185, "step": 23390 }, { "epoch": 0.16938478577167798, "grad_norm": 0.16598336398601532, "learning_rate": 4.830622452894381e-06, "loss": 1.0118, "step": 23400 }, { "epoch": 0.16945717243226419, "grad_norm": 0.16268615424633026, "learning_rate": 4.830550066233795e-06, "loss": 1.0171, "step": 23410 }, { "epoch": 0.16952955909285036, "grad_norm": 0.1621619611978531, "learning_rate": 4.830477679573209e-06, "loss": 1.0176, "step": 23420 }, { "epoch": 0.16960194575343657, "grad_norm": 0.1687730848789215, "learning_rate": 4.830405292912623e-06, "loss": 1.0058, "step": 23430 }, { "epoch": 0.16967433241402274, "grad_norm": 0.17967501282691956, "learning_rate": 4.830332906252036e-06, "loss": 1.0141, "step": 23440 }, { "epoch": 0.16974671907460892, "grad_norm": 0.23088906705379486, "learning_rate": 4.83026051959145e-06, "loss": 1.0283, "step": 23450 }, { "epoch": 0.16981910573519512, "grad_norm": 0.16651782393455505, "learning_rate": 4.8301881329308635e-06, "loss": 1.0155, "step": 23460 }, { "epoch": 0.1698914923957813, "grad_norm": 0.16330918669700623, "learning_rate": 4.830115746270277e-06, "loss": 1.0126, "step": 23470 }, { "epoch": 0.1699638790563675, "grad_norm": 0.1758406162261963, "learning_rate": 4.8300433596096915e-06, "loss": 1.007, "step": 23480 }, { "epoch": 0.17003626571695368, "grad_norm": 0.18241257965564728, "learning_rate": 4.829970972949105e-06, "loss": 1.0032, "step": 23490 }, { "epoch": 0.17010865237753986, "grad_norm": 0.18768127262592316, "learning_rate": 4.829898586288519e-06, "loss": 1.0193, "step": 23500 }, { "epoch": 0.17018103903812606, "grad_norm": 0.1854964941740036, "learning_rate": 4.829826199627932e-06, "loss": 1.0254, "step": 23510 }, { "epoch": 0.17025342569871224, "grad_norm": 0.164667010307312, "learning_rate": 4.829753812967347e-06, "loss": 1.0093, "step": 23520 }, { "epoch": 0.17032581235929842, "grad_norm": 0.1953144520521164, "learning_rate": 4.8296814263067605e-06, "loss": 1.0138, "step": 23530 }, { "epoch": 0.17039819901988462, "grad_norm": 0.170278862118721, "learning_rate": 4.829609039646174e-06, "loss": 1.0114, "step": 23540 }, { "epoch": 0.1704705856804708, "grad_norm": 0.19750620424747467, "learning_rate": 4.829536652985588e-06, "loss": 1.0215, "step": 23550 }, { "epoch": 0.170542972341057, "grad_norm": 0.19091200828552246, "learning_rate": 4.829464266325002e-06, "loss": 1.0166, "step": 23560 }, { "epoch": 0.17061535900164318, "grad_norm": 0.1851140260696411, "learning_rate": 4.829391879664416e-06, "loss": 1.0062, "step": 23570 }, { "epoch": 0.17068774566222936, "grad_norm": 0.19524310529232025, "learning_rate": 4.829319493003829e-06, "loss": 1.0217, "step": 23580 }, { "epoch": 0.17076013232281556, "grad_norm": 0.17677630484104156, "learning_rate": 4.829247106343243e-06, "loss": 1.0066, "step": 23590 }, { "epoch": 0.17083251898340174, "grad_norm": 0.1704902946949005, "learning_rate": 4.8291747196826575e-06, "loss": 1.0033, "step": 23600 }, { "epoch": 0.17090490564398791, "grad_norm": 0.16769947111606598, "learning_rate": 4.829102333022071e-06, "loss": 0.9979, "step": 23610 }, { "epoch": 0.17097729230457412, "grad_norm": 0.16838715970516205, "learning_rate": 4.829029946361485e-06, "loss": 1.0022, "step": 23620 }, { "epoch": 0.1710496789651603, "grad_norm": 0.18555735051631927, "learning_rate": 4.828957559700898e-06, "loss": 1.0036, "step": 23630 }, { "epoch": 0.1711220656257465, "grad_norm": 0.17811746895313263, "learning_rate": 4.828885173040313e-06, "loss": 1.0122, "step": 23640 }, { "epoch": 0.17119445228633268, "grad_norm": 0.17292849719524384, "learning_rate": 4.828812786379726e-06, "loss": 0.9983, "step": 23650 }, { "epoch": 0.17126683894691885, "grad_norm": 0.18011000752449036, "learning_rate": 4.82874039971914e-06, "loss": 1.0149, "step": 23660 }, { "epoch": 0.17133922560750506, "grad_norm": 0.1814233809709549, "learning_rate": 4.828668013058554e-06, "loss": 1.0192, "step": 23670 }, { "epoch": 0.17141161226809123, "grad_norm": 0.17261269688606262, "learning_rate": 4.828595626397968e-06, "loss": 1.0306, "step": 23680 }, { "epoch": 0.1714839989286774, "grad_norm": 0.18298877775669098, "learning_rate": 4.828523239737382e-06, "loss": 1.0157, "step": 23690 }, { "epoch": 0.17155638558926362, "grad_norm": 0.16827493906021118, "learning_rate": 4.828450853076795e-06, "loss": 1.0056, "step": 23700 }, { "epoch": 0.1716287722498498, "grad_norm": 0.21955229341983795, "learning_rate": 4.828378466416209e-06, "loss": 1.0198, "step": 23710 }, { "epoch": 0.171701158910436, "grad_norm": 0.17503350973129272, "learning_rate": 4.828306079755623e-06, "loss": 1.0191, "step": 23720 }, { "epoch": 0.17177354557102217, "grad_norm": 0.1629331111907959, "learning_rate": 4.828233693095037e-06, "loss": 1.0135, "step": 23730 }, { "epoch": 0.17184593223160835, "grad_norm": 0.1784103661775589, "learning_rate": 4.828161306434451e-06, "loss": 1.0184, "step": 23740 }, { "epoch": 0.17191831889219455, "grad_norm": 0.17589734494686127, "learning_rate": 4.828088919773864e-06, "loss": 1.0039, "step": 23750 }, { "epoch": 0.17199070555278073, "grad_norm": 0.17863091826438904, "learning_rate": 4.828016533113279e-06, "loss": 0.9991, "step": 23760 }, { "epoch": 0.1720630922133669, "grad_norm": 0.16851244866847992, "learning_rate": 4.827944146452692e-06, "loss": 1.0054, "step": 23770 }, { "epoch": 0.1721354788739531, "grad_norm": 0.1785619854927063, "learning_rate": 4.827871759792106e-06, "loss": 1.0108, "step": 23780 }, { "epoch": 0.1722078655345393, "grad_norm": 0.18070313334465027, "learning_rate": 4.8277993731315196e-06, "loss": 1.0221, "step": 23790 }, { "epoch": 0.1722802521951255, "grad_norm": 0.16967083513736725, "learning_rate": 4.827726986470934e-06, "loss": 1.0178, "step": 23800 }, { "epoch": 0.17235263885571167, "grad_norm": 0.19012100994586945, "learning_rate": 4.827654599810348e-06, "loss": 1.0037, "step": 23810 }, { "epoch": 0.17242502551629785, "grad_norm": 0.18341712653636932, "learning_rate": 4.827582213149761e-06, "loss": 1.0065, "step": 23820 }, { "epoch": 0.17249741217688405, "grad_norm": 0.1624983698129654, "learning_rate": 4.827509826489175e-06, "loss": 1.0026, "step": 23830 }, { "epoch": 0.17256979883747023, "grad_norm": 0.1717177778482437, "learning_rate": 4.827437439828589e-06, "loss": 1.0206, "step": 23840 }, { "epoch": 0.1726421854980564, "grad_norm": 0.18346939980983734, "learning_rate": 4.827365053168003e-06, "loss": 1.0212, "step": 23850 }, { "epoch": 0.1727145721586426, "grad_norm": 0.2184140831232071, "learning_rate": 4.8272926665074166e-06, "loss": 1.0239, "step": 23860 }, { "epoch": 0.1727869588192288, "grad_norm": 0.15121838450431824, "learning_rate": 4.82722027984683e-06, "loss": 0.9996, "step": 23870 }, { "epoch": 0.172859345479815, "grad_norm": 0.1719536930322647, "learning_rate": 4.827147893186244e-06, "loss": 1.0067, "step": 23880 }, { "epoch": 0.17293173214040117, "grad_norm": 0.18498043715953827, "learning_rate": 4.827075506525658e-06, "loss": 1.011, "step": 23890 }, { "epoch": 0.17300411880098734, "grad_norm": 0.16693322360515594, "learning_rate": 4.827003119865072e-06, "loss": 1.001, "step": 23900 }, { "epoch": 0.17307650546157355, "grad_norm": 0.19714364409446716, "learning_rate": 4.8269307332044855e-06, "loss": 1.0202, "step": 23910 }, { "epoch": 0.17314889212215973, "grad_norm": 0.1966882348060608, "learning_rate": 4.826858346543899e-06, "loss": 1.0124, "step": 23920 }, { "epoch": 0.1732212787827459, "grad_norm": 0.1639029085636139, "learning_rate": 4.826785959883314e-06, "loss": 1.012, "step": 23930 }, { "epoch": 0.1732936654433321, "grad_norm": 0.18476232886314392, "learning_rate": 4.826713573222727e-06, "loss": 1.0252, "step": 23940 }, { "epoch": 0.17336605210391828, "grad_norm": 0.1863066405057907, "learning_rate": 4.826641186562141e-06, "loss": 1.0282, "step": 23950 }, { "epoch": 0.1734384387645045, "grad_norm": 0.1653691977262497, "learning_rate": 4.8265687999015544e-06, "loss": 1.0146, "step": 23960 }, { "epoch": 0.17351082542509066, "grad_norm": 0.18651318550109863, "learning_rate": 4.826496413240969e-06, "loss": 1.0117, "step": 23970 }, { "epoch": 0.17358321208567684, "grad_norm": 0.16590899229049683, "learning_rate": 4.826424026580382e-06, "loss": 1.0129, "step": 23980 }, { "epoch": 0.17365559874626305, "grad_norm": 0.15830731391906738, "learning_rate": 4.826351639919796e-06, "loss": 1.0247, "step": 23990 }, { "epoch": 0.17372798540684922, "grad_norm": 0.20496408641338348, "learning_rate": 4.82627925325921e-06, "loss": 1.0094, "step": 24000 }, { "epoch": 0.1738003720674354, "grad_norm": 0.17806655168533325, "learning_rate": 4.826206866598623e-06, "loss": 1.0311, "step": 24010 }, { "epoch": 0.1738727587280216, "grad_norm": 0.18243646621704102, "learning_rate": 4.826134479938037e-06, "loss": 1.001, "step": 24020 }, { "epoch": 0.17394514538860778, "grad_norm": 0.16999110579490662, "learning_rate": 4.826062093277451e-06, "loss": 1.0067, "step": 24030 }, { "epoch": 0.17401753204919398, "grad_norm": 0.16639530658721924, "learning_rate": 4.825989706616865e-06, "loss": 0.9953, "step": 24040 }, { "epoch": 0.17408991870978016, "grad_norm": 0.20730364322662354, "learning_rate": 4.825917319956279e-06, "loss": 1.0058, "step": 24050 }, { "epoch": 0.17416230537036634, "grad_norm": 0.17685486376285553, "learning_rate": 4.825844933295692e-06, "loss": 1.005, "step": 24060 }, { "epoch": 0.17423469203095254, "grad_norm": 0.16955289244651794, "learning_rate": 4.825772546635106e-06, "loss": 1.0101, "step": 24070 }, { "epoch": 0.17430707869153872, "grad_norm": 0.1870541274547577, "learning_rate": 4.82570015997452e-06, "loss": 1.0102, "step": 24080 }, { "epoch": 0.17437946535212492, "grad_norm": 0.22333884239196777, "learning_rate": 4.825627773313934e-06, "loss": 1.0087, "step": 24090 }, { "epoch": 0.1744518520127111, "grad_norm": 0.18519848585128784, "learning_rate": 4.825555386653348e-06, "loss": 1.005, "step": 24100 }, { "epoch": 0.17452423867329728, "grad_norm": 0.18657277524471283, "learning_rate": 4.825482999992761e-06, "loss": 1.0142, "step": 24110 }, { "epoch": 0.17459662533388348, "grad_norm": 0.20050521194934845, "learning_rate": 4.825410613332176e-06, "loss": 1.0069, "step": 24120 }, { "epoch": 0.17466901199446966, "grad_norm": 0.18028657138347626, "learning_rate": 4.825338226671589e-06, "loss": 1.0045, "step": 24130 }, { "epoch": 0.17474139865505584, "grad_norm": 0.17001697421073914, "learning_rate": 4.825265840011003e-06, "loss": 1.011, "step": 24140 }, { "epoch": 0.17481378531564204, "grad_norm": 0.16994266211986542, "learning_rate": 4.8251934533504165e-06, "loss": 1.0136, "step": 24150 }, { "epoch": 0.17488617197622822, "grad_norm": 0.17373910546302795, "learning_rate": 4.825121066689831e-06, "loss": 1.0038, "step": 24160 }, { "epoch": 0.17495855863681442, "grad_norm": 0.17860816419124603, "learning_rate": 4.825048680029245e-06, "loss": 1.0161, "step": 24170 }, { "epoch": 0.1750309452974006, "grad_norm": 0.16933861374855042, "learning_rate": 4.824976293368658e-06, "loss": 1.0084, "step": 24180 }, { "epoch": 0.17510333195798677, "grad_norm": 0.18152697384357452, "learning_rate": 4.824903906708072e-06, "loss": 0.9913, "step": 24190 }, { "epoch": 0.17517571861857298, "grad_norm": 0.15832629799842834, "learning_rate": 4.824831520047486e-06, "loss": 1.0034, "step": 24200 }, { "epoch": 0.17524810527915916, "grad_norm": 0.1804628223180771, "learning_rate": 4.8247591333869e-06, "loss": 1.0056, "step": 24210 }, { "epoch": 0.17532049193974533, "grad_norm": 0.1735430359840393, "learning_rate": 4.8246867467263135e-06, "loss": 1.0073, "step": 24220 }, { "epoch": 0.17539287860033154, "grad_norm": 0.176886186003685, "learning_rate": 4.824614360065727e-06, "loss": 1.0106, "step": 24230 }, { "epoch": 0.1754652652609177, "grad_norm": 0.16210952401161194, "learning_rate": 4.824541973405142e-06, "loss": 1.0199, "step": 24240 }, { "epoch": 0.17553765192150392, "grad_norm": 0.16794593632221222, "learning_rate": 4.824469586744555e-06, "loss": 0.9955, "step": 24250 }, { "epoch": 0.1756100385820901, "grad_norm": 0.18100325763225555, "learning_rate": 4.824397200083969e-06, "loss": 1.0069, "step": 24260 }, { "epoch": 0.17568242524267627, "grad_norm": 0.16271761059761047, "learning_rate": 4.8243248134233825e-06, "loss": 1.0122, "step": 24270 }, { "epoch": 0.17575481190326248, "grad_norm": 0.16619524359703064, "learning_rate": 4.824252426762797e-06, "loss": 1.0177, "step": 24280 }, { "epoch": 0.17582719856384865, "grad_norm": 0.18534304201602936, "learning_rate": 4.8241800401022105e-06, "loss": 1.0133, "step": 24290 }, { "epoch": 0.17589958522443483, "grad_norm": 0.179828941822052, "learning_rate": 4.824107653441624e-06, "loss": 1.0115, "step": 24300 }, { "epoch": 0.17597197188502103, "grad_norm": 0.19218121469020844, "learning_rate": 4.824035266781038e-06, "loss": 1.0002, "step": 24310 }, { "epoch": 0.1760443585456072, "grad_norm": 0.1730458289384842, "learning_rate": 4.823962880120452e-06, "loss": 1.0093, "step": 24320 }, { "epoch": 0.17611674520619341, "grad_norm": 0.1910993903875351, "learning_rate": 4.823890493459866e-06, "loss": 1.0133, "step": 24330 }, { "epoch": 0.1761891318667796, "grad_norm": 0.1970120370388031, "learning_rate": 4.8238181067992795e-06, "loss": 1.0102, "step": 24340 }, { "epoch": 0.17626151852736577, "grad_norm": 0.17400617897510529, "learning_rate": 4.823745720138693e-06, "loss": 1.0102, "step": 24350 }, { "epoch": 0.17633390518795197, "grad_norm": 0.17096106708049774, "learning_rate": 4.8236733334781075e-06, "loss": 1.0161, "step": 24360 }, { "epoch": 0.17640629184853815, "grad_norm": 0.185289666056633, "learning_rate": 4.823600946817521e-06, "loss": 1.0013, "step": 24370 }, { "epoch": 0.17647867850912433, "grad_norm": 0.17100411653518677, "learning_rate": 4.823528560156935e-06, "loss": 0.994, "step": 24380 }, { "epoch": 0.17655106516971053, "grad_norm": 0.1876966655254364, "learning_rate": 4.823456173496348e-06, "loss": 1.0041, "step": 24390 }, { "epoch": 0.1766234518302967, "grad_norm": 0.1617269068956375, "learning_rate": 4.823383786835763e-06, "loss": 0.997, "step": 24400 }, { "epoch": 0.1766958384908829, "grad_norm": 0.17999807000160217, "learning_rate": 4.8233114001751765e-06, "loss": 1.0085, "step": 24410 }, { "epoch": 0.1767682251514691, "grad_norm": 0.16914209723472595, "learning_rate": 4.82323901351459e-06, "loss": 1.0218, "step": 24420 }, { "epoch": 0.17684061181205527, "grad_norm": 0.20446030795574188, "learning_rate": 4.823166626854004e-06, "loss": 1.0115, "step": 24430 }, { "epoch": 0.17691299847264147, "grad_norm": 0.16619598865509033, "learning_rate": 4.823094240193418e-06, "loss": 1.0197, "step": 24440 }, { "epoch": 0.17698538513322765, "grad_norm": 0.173335999250412, "learning_rate": 4.823021853532832e-06, "loss": 1.0128, "step": 24450 }, { "epoch": 0.17705777179381382, "grad_norm": 0.17360666394233704, "learning_rate": 4.822949466872245e-06, "loss": 1.0073, "step": 24460 }, { "epoch": 0.17713015845440003, "grad_norm": 0.16045695543289185, "learning_rate": 4.822877080211659e-06, "loss": 0.999, "step": 24470 }, { "epoch": 0.1772025451149862, "grad_norm": 0.17634133994579315, "learning_rate": 4.822804693551073e-06, "loss": 1.0113, "step": 24480 }, { "epoch": 0.1772749317755724, "grad_norm": 0.15489248931407928, "learning_rate": 4.822732306890487e-06, "loss": 1.0033, "step": 24490 }, { "epoch": 0.17734731843615859, "grad_norm": 0.17164482176303864, "learning_rate": 4.822659920229901e-06, "loss": 1.0036, "step": 24500 }, { "epoch": 0.17741970509674476, "grad_norm": 0.17662885785102844, "learning_rate": 4.822587533569314e-06, "loss": 1.0162, "step": 24510 }, { "epoch": 0.17749209175733097, "grad_norm": 0.1665455847978592, "learning_rate": 4.822515146908728e-06, "loss": 1.0061, "step": 24520 }, { "epoch": 0.17756447841791714, "grad_norm": 0.17285911738872528, "learning_rate": 4.8224427602481416e-06, "loss": 1.0259, "step": 24530 }, { "epoch": 0.17763686507850332, "grad_norm": 0.1706143617630005, "learning_rate": 4.822370373587555e-06, "loss": 1.0258, "step": 24540 }, { "epoch": 0.17770925173908952, "grad_norm": 0.17605163156986237, "learning_rate": 4.82229798692697e-06, "loss": 1.0184, "step": 24550 }, { "epoch": 0.1777816383996757, "grad_norm": 0.1811869591474533, "learning_rate": 4.822225600266383e-06, "loss": 1.018, "step": 24560 }, { "epoch": 0.1778540250602619, "grad_norm": 0.16938798129558563, "learning_rate": 4.822153213605797e-06, "loss": 1.0101, "step": 24570 }, { "epoch": 0.17792641172084808, "grad_norm": 0.17794403433799744, "learning_rate": 4.8220808269452105e-06, "loss": 1.0068, "step": 24580 }, { "epoch": 0.17799879838143426, "grad_norm": 0.1750117838382721, "learning_rate": 4.822008440284625e-06, "loss": 1.0183, "step": 24590 }, { "epoch": 0.17807118504202046, "grad_norm": 0.19311663508415222, "learning_rate": 4.8219360536240386e-06, "loss": 1.0059, "step": 24600 }, { "epoch": 0.17814357170260664, "grad_norm": 0.17027834057807922, "learning_rate": 4.821863666963452e-06, "loss": 1.0117, "step": 24610 }, { "epoch": 0.17821595836319284, "grad_norm": 0.16142655909061432, "learning_rate": 4.821791280302866e-06, "loss": 1.0039, "step": 24620 }, { "epoch": 0.17828834502377902, "grad_norm": 0.20361775159835815, "learning_rate": 4.82171889364228e-06, "loss": 0.9984, "step": 24630 }, { "epoch": 0.1783607316843652, "grad_norm": 0.1685153841972351, "learning_rate": 4.821646506981694e-06, "loss": 1.0199, "step": 24640 }, { "epoch": 0.1784331183449514, "grad_norm": 0.17604348063468933, "learning_rate": 4.8215741203211075e-06, "loss": 1.0111, "step": 24650 }, { "epoch": 0.17850550500553758, "grad_norm": 0.17381654679775238, "learning_rate": 4.821501733660521e-06, "loss": 1.0142, "step": 24660 }, { "epoch": 0.17857789166612376, "grad_norm": 0.1625945121049881, "learning_rate": 4.821429346999935e-06, "loss": 1.0036, "step": 24670 }, { "epoch": 0.17865027832670996, "grad_norm": 0.16681507229804993, "learning_rate": 4.821356960339349e-06, "loss": 1.0174, "step": 24680 }, { "epoch": 0.17872266498729614, "grad_norm": 0.17053383588790894, "learning_rate": 4.821284573678763e-06, "loss": 1.0073, "step": 24690 }, { "epoch": 0.17879505164788234, "grad_norm": 0.16991731524467468, "learning_rate": 4.8212121870181764e-06, "loss": 1.0232, "step": 24700 }, { "epoch": 0.17886743830846852, "grad_norm": 0.18369825184345245, "learning_rate": 4.82113980035759e-06, "loss": 1.0144, "step": 24710 }, { "epoch": 0.1789398249690547, "grad_norm": 0.18356874585151672, "learning_rate": 4.8210674136970045e-06, "loss": 1.0159, "step": 24720 }, { "epoch": 0.1790122116296409, "grad_norm": 0.21696501970291138, "learning_rate": 4.820995027036418e-06, "loss": 1.001, "step": 24730 }, { "epoch": 0.17908459829022708, "grad_norm": 0.1677326261997223, "learning_rate": 4.820922640375832e-06, "loss": 1.02, "step": 24740 }, { "epoch": 0.17915698495081325, "grad_norm": 0.17450115084648132, "learning_rate": 4.820850253715245e-06, "loss": 1.0215, "step": 24750 }, { "epoch": 0.17922937161139946, "grad_norm": 0.1796158254146576, "learning_rate": 4.82077786705466e-06, "loss": 1.0213, "step": 24760 }, { "epoch": 0.17930175827198563, "grad_norm": 0.1706731915473938, "learning_rate": 4.8207054803940734e-06, "loss": 1.0065, "step": 24770 }, { "epoch": 0.17937414493257184, "grad_norm": 0.16157269477844238, "learning_rate": 4.820633093733487e-06, "loss": 1.0122, "step": 24780 }, { "epoch": 0.17944653159315802, "grad_norm": 0.17014949023723602, "learning_rate": 4.820560707072901e-06, "loss": 1.0163, "step": 24790 }, { "epoch": 0.1795189182537442, "grad_norm": 0.17551864683628082, "learning_rate": 4.820488320412315e-06, "loss": 1.001, "step": 24800 }, { "epoch": 0.1795913049143304, "grad_norm": 0.17576205730438232, "learning_rate": 4.820415933751729e-06, "loss": 1.0058, "step": 24810 }, { "epoch": 0.17966369157491657, "grad_norm": 0.16241174936294556, "learning_rate": 4.820343547091142e-06, "loss": 1.001, "step": 24820 }, { "epoch": 0.17973607823550275, "grad_norm": 0.2229170948266983, "learning_rate": 4.820271160430556e-06, "loss": 1.0258, "step": 24830 }, { "epoch": 0.17980846489608895, "grad_norm": 0.16988316178321838, "learning_rate": 4.8201987737699704e-06, "loss": 1.013, "step": 24840 }, { "epoch": 0.17988085155667513, "grad_norm": 0.2061333805322647, "learning_rate": 4.820126387109384e-06, "loss": 1.0025, "step": 24850 }, { "epoch": 0.17995323821726134, "grad_norm": 0.19015344977378845, "learning_rate": 4.820054000448798e-06, "loss": 1.014, "step": 24860 }, { "epoch": 0.1800256248778475, "grad_norm": 0.17161577939987183, "learning_rate": 4.819981613788211e-06, "loss": 0.9984, "step": 24870 }, { "epoch": 0.1800980115384337, "grad_norm": 0.17109227180480957, "learning_rate": 4.819909227127626e-06, "loss": 1.0087, "step": 24880 }, { "epoch": 0.1801703981990199, "grad_norm": 0.1876406967639923, "learning_rate": 4.819836840467039e-06, "loss": 1.0225, "step": 24890 }, { "epoch": 0.18024278485960607, "grad_norm": 0.17785070836544037, "learning_rate": 4.819764453806453e-06, "loss": 1.0034, "step": 24900 }, { "epoch": 0.18031517152019225, "grad_norm": 0.17659418284893036, "learning_rate": 4.819692067145867e-06, "loss": 1.0196, "step": 24910 }, { "epoch": 0.18038755818077845, "grad_norm": 0.19268226623535156, "learning_rate": 4.819619680485281e-06, "loss": 1.0012, "step": 24920 }, { "epoch": 0.18045994484136463, "grad_norm": 0.1869351863861084, "learning_rate": 4.819547293824695e-06, "loss": 1.0112, "step": 24930 }, { "epoch": 0.18053233150195083, "grad_norm": 0.17009639739990234, "learning_rate": 4.819474907164108e-06, "loss": 1.0161, "step": 24940 }, { "epoch": 0.180604718162537, "grad_norm": 0.20268967747688293, "learning_rate": 4.819402520503522e-06, "loss": 1.0077, "step": 24950 }, { "epoch": 0.18067710482312319, "grad_norm": 0.1628827154636383, "learning_rate": 4.819330133842936e-06, "loss": 1.0127, "step": 24960 }, { "epoch": 0.1807494914837094, "grad_norm": 0.18746638298034668, "learning_rate": 4.81925774718235e-06, "loss": 1.015, "step": 24970 }, { "epoch": 0.18082187814429557, "grad_norm": 0.16319945454597473, "learning_rate": 4.819185360521764e-06, "loss": 0.9972, "step": 24980 }, { "epoch": 0.18089426480488174, "grad_norm": 0.1594797521829605, "learning_rate": 4.819112973861177e-06, "loss": 1.0161, "step": 24990 }, { "epoch": 0.18096665146546795, "grad_norm": 0.20105616748332977, "learning_rate": 4.819040587200592e-06, "loss": 1.014, "step": 25000 }, { "epoch": 0.18103903812605412, "grad_norm": 0.1811130940914154, "learning_rate": 4.818968200540005e-06, "loss": 1.0114, "step": 25010 }, { "epoch": 0.18111142478664033, "grad_norm": 0.17586207389831543, "learning_rate": 4.818895813879419e-06, "loss": 1.0274, "step": 25020 }, { "epoch": 0.1811838114472265, "grad_norm": 0.19107623398303986, "learning_rate": 4.8188234272188325e-06, "loss": 1.0176, "step": 25030 }, { "epoch": 0.18125619810781268, "grad_norm": 0.16346633434295654, "learning_rate": 4.818751040558246e-06, "loss": 1.0137, "step": 25040 }, { "epoch": 0.1813285847683989, "grad_norm": 0.17446771264076233, "learning_rate": 4.81867865389766e-06, "loss": 0.989, "step": 25050 }, { "epoch": 0.18140097142898506, "grad_norm": 0.1719013899564743, "learning_rate": 4.818606267237073e-06, "loss": 1.0155, "step": 25060 }, { "epoch": 0.18147335808957124, "grad_norm": 0.1678323745727539, "learning_rate": 4.818533880576488e-06, "loss": 1.0097, "step": 25070 }, { "epoch": 0.18154574475015745, "grad_norm": 0.1604251116514206, "learning_rate": 4.8184614939159015e-06, "loss": 1.0109, "step": 25080 }, { "epoch": 0.18161813141074362, "grad_norm": 0.20874586701393127, "learning_rate": 4.818389107255315e-06, "loss": 1.0054, "step": 25090 }, { "epoch": 0.18169051807132983, "grad_norm": 0.17557349801063538, "learning_rate": 4.818316720594729e-06, "loss": 1.0165, "step": 25100 }, { "epoch": 0.181762904731916, "grad_norm": 0.17090950906276703, "learning_rate": 4.818244333934143e-06, "loss": 1.0049, "step": 25110 }, { "epoch": 0.18183529139250218, "grad_norm": 0.20330511033535004, "learning_rate": 4.818171947273557e-06, "loss": 1.0095, "step": 25120 }, { "epoch": 0.18190767805308838, "grad_norm": 0.1726309359073639, "learning_rate": 4.81809956061297e-06, "loss": 1.0183, "step": 25130 }, { "epoch": 0.18198006471367456, "grad_norm": 0.18963780999183655, "learning_rate": 4.818027173952384e-06, "loss": 0.9986, "step": 25140 }, { "epoch": 0.18205245137426074, "grad_norm": 0.18106186389923096, "learning_rate": 4.8179547872917985e-06, "loss": 1.0016, "step": 25150 }, { "epoch": 0.18212483803484694, "grad_norm": 0.17316314578056335, "learning_rate": 4.817882400631212e-06, "loss": 0.9989, "step": 25160 }, { "epoch": 0.18219722469543312, "grad_norm": 0.17553074657917023, "learning_rate": 4.817810013970626e-06, "loss": 0.9927, "step": 25170 }, { "epoch": 0.18226961135601932, "grad_norm": 0.21649324893951416, "learning_rate": 4.817737627310039e-06, "loss": 1.0065, "step": 25180 }, { "epoch": 0.1823419980166055, "grad_norm": 0.19755081832408905, "learning_rate": 4.817665240649454e-06, "loss": 1.0105, "step": 25190 }, { "epoch": 0.18241438467719168, "grad_norm": 0.15995916724205017, "learning_rate": 4.817592853988867e-06, "loss": 1.0064, "step": 25200 }, { "epoch": 0.18248677133777788, "grad_norm": 0.17277465760707855, "learning_rate": 4.817520467328281e-06, "loss": 1.0104, "step": 25210 }, { "epoch": 0.18255915799836406, "grad_norm": 0.17675970494747162, "learning_rate": 4.817448080667695e-06, "loss": 0.9952, "step": 25220 }, { "epoch": 0.18263154465895026, "grad_norm": 0.18293781578540802, "learning_rate": 4.817375694007109e-06, "loss": 0.9949, "step": 25230 }, { "epoch": 0.18270393131953644, "grad_norm": 0.18516694009304047, "learning_rate": 4.817303307346523e-06, "loss": 1.0042, "step": 25240 }, { "epoch": 0.18277631798012262, "grad_norm": 0.1832907646894455, "learning_rate": 4.817230920685936e-06, "loss": 1.0153, "step": 25250 }, { "epoch": 0.18284870464070882, "grad_norm": 0.17651574313640594, "learning_rate": 4.81715853402535e-06, "loss": 1.0011, "step": 25260 }, { "epoch": 0.182921091301295, "grad_norm": 0.17321477830410004, "learning_rate": 4.8170861473647636e-06, "loss": 0.9985, "step": 25270 }, { "epoch": 0.18299347796188117, "grad_norm": 0.16472671926021576, "learning_rate": 4.817013760704178e-06, "loss": 1.0067, "step": 25280 }, { "epoch": 0.18306586462246738, "grad_norm": 0.16202005743980408, "learning_rate": 4.816941374043592e-06, "loss": 1.0109, "step": 25290 }, { "epoch": 0.18313825128305355, "grad_norm": 0.19252698123455048, "learning_rate": 4.816868987383005e-06, "loss": 0.9949, "step": 25300 }, { "epoch": 0.18321063794363976, "grad_norm": 0.18542930483818054, "learning_rate": 4.816796600722419e-06, "loss": 1.0235, "step": 25310 }, { "epoch": 0.18328302460422594, "grad_norm": 0.17838090658187866, "learning_rate": 4.816724214061833e-06, "loss": 1.0196, "step": 25320 }, { "epoch": 0.1833554112648121, "grad_norm": 0.1758023202419281, "learning_rate": 4.816651827401247e-06, "loss": 1.0076, "step": 25330 }, { "epoch": 0.18342779792539832, "grad_norm": 0.16187690198421478, "learning_rate": 4.8165794407406606e-06, "loss": 1.0086, "step": 25340 }, { "epoch": 0.1835001845859845, "grad_norm": 0.16450618207454681, "learning_rate": 4.816507054080074e-06, "loss": 1.0065, "step": 25350 }, { "epoch": 0.18357257124657067, "grad_norm": 0.18235477805137634, "learning_rate": 4.816434667419489e-06, "loss": 1.0105, "step": 25360 }, { "epoch": 0.18364495790715687, "grad_norm": 0.1665814220905304, "learning_rate": 4.816362280758902e-06, "loss": 0.9997, "step": 25370 }, { "epoch": 0.18371734456774305, "grad_norm": 0.17312000691890717, "learning_rate": 4.816289894098316e-06, "loss": 1.0143, "step": 25380 }, { "epoch": 0.18378973122832926, "grad_norm": 0.16814294457435608, "learning_rate": 4.8162175074377295e-06, "loss": 0.987, "step": 25390 }, { "epoch": 0.18386211788891543, "grad_norm": 0.17651145160198212, "learning_rate": 4.816145120777144e-06, "loss": 1.0058, "step": 25400 }, { "epoch": 0.1839345045495016, "grad_norm": 0.1924390345811844, "learning_rate": 4.816072734116558e-06, "loss": 1.0145, "step": 25410 }, { "epoch": 0.18400689121008781, "grad_norm": 0.1660076081752777, "learning_rate": 4.816000347455971e-06, "loss": 0.9972, "step": 25420 }, { "epoch": 0.184079277870674, "grad_norm": 0.16928330063819885, "learning_rate": 4.815927960795385e-06, "loss": 1.0043, "step": 25430 }, { "epoch": 0.18415166453126017, "grad_norm": 0.2533145546913147, "learning_rate": 4.815855574134799e-06, "loss": 0.9977, "step": 25440 }, { "epoch": 0.18422405119184637, "grad_norm": 0.17467600107192993, "learning_rate": 4.815783187474213e-06, "loss": 1.0004, "step": 25450 }, { "epoch": 0.18429643785243255, "grad_norm": 0.16953355073928833, "learning_rate": 4.8157108008136265e-06, "loss": 1.0088, "step": 25460 }, { "epoch": 0.18436882451301875, "grad_norm": 0.17765529453754425, "learning_rate": 4.81563841415304e-06, "loss": 1.0144, "step": 25470 }, { "epoch": 0.18444121117360493, "grad_norm": 0.16797038912773132, "learning_rate": 4.815566027492455e-06, "loss": 1.0036, "step": 25480 }, { "epoch": 0.1845135978341911, "grad_norm": 0.17482724785804749, "learning_rate": 4.815493640831868e-06, "loss": 1.0054, "step": 25490 }, { "epoch": 0.1845859844947773, "grad_norm": 0.18403145670890808, "learning_rate": 4.815421254171282e-06, "loss": 1.0197, "step": 25500 }, { "epoch": 0.1846583711553635, "grad_norm": 0.1704588383436203, "learning_rate": 4.8153488675106954e-06, "loss": 1.0074, "step": 25510 }, { "epoch": 0.18473075781594966, "grad_norm": 0.1732064187526703, "learning_rate": 4.81527648085011e-06, "loss": 1.0107, "step": 25520 }, { "epoch": 0.18480314447653587, "grad_norm": 0.16858525574207306, "learning_rate": 4.8152040941895235e-06, "loss": 1.0017, "step": 25530 }, { "epoch": 0.18487553113712205, "grad_norm": 0.17689578235149384, "learning_rate": 4.815131707528937e-06, "loss": 0.995, "step": 25540 }, { "epoch": 0.18494791779770825, "grad_norm": 0.16487176716327667, "learning_rate": 4.815059320868351e-06, "loss": 1.0149, "step": 25550 }, { "epoch": 0.18502030445829443, "grad_norm": 0.15995535254478455, "learning_rate": 4.814986934207765e-06, "loss": 1.0098, "step": 25560 }, { "epoch": 0.1850926911188806, "grad_norm": 0.18028096854686737, "learning_rate": 4.814914547547178e-06, "loss": 0.9971, "step": 25570 }, { "epoch": 0.1851650777794668, "grad_norm": 0.1847427487373352, "learning_rate": 4.814842160886592e-06, "loss": 0.9958, "step": 25580 }, { "epoch": 0.18523746444005298, "grad_norm": 0.17257408797740936, "learning_rate": 4.814769774226006e-06, "loss": 1.0213, "step": 25590 }, { "epoch": 0.18530985110063916, "grad_norm": 0.17929920554161072, "learning_rate": 4.81469738756542e-06, "loss": 1.0089, "step": 25600 }, { "epoch": 0.18538223776122537, "grad_norm": 0.16596031188964844, "learning_rate": 4.814625000904833e-06, "loss": 1.0069, "step": 25610 }, { "epoch": 0.18545462442181154, "grad_norm": 0.17687474191188812, "learning_rate": 4.814552614244247e-06, "loss": 1.0072, "step": 25620 }, { "epoch": 0.18552701108239775, "grad_norm": 0.18298721313476562, "learning_rate": 4.814480227583661e-06, "loss": 1.0038, "step": 25630 }, { "epoch": 0.18559939774298392, "grad_norm": 0.17622758448123932, "learning_rate": 4.814407840923075e-06, "loss": 1.0052, "step": 25640 }, { "epoch": 0.1856717844035701, "grad_norm": 0.17824731767177582, "learning_rate": 4.814335454262489e-06, "loss": 1.0, "step": 25650 }, { "epoch": 0.1857441710641563, "grad_norm": 0.16804032027721405, "learning_rate": 4.814263067601902e-06, "loss": 1.0189, "step": 25660 }, { "epoch": 0.18581655772474248, "grad_norm": 0.16727906465530396, "learning_rate": 4.814190680941317e-06, "loss": 1.0104, "step": 25670 }, { "epoch": 0.18588894438532866, "grad_norm": 0.16687215864658356, "learning_rate": 4.81411829428073e-06, "loss": 1.0073, "step": 25680 }, { "epoch": 0.18596133104591486, "grad_norm": 0.16591374576091766, "learning_rate": 4.814045907620144e-06, "loss": 1.0267, "step": 25690 }, { "epoch": 0.18603371770650104, "grad_norm": 0.19451576471328735, "learning_rate": 4.8139735209595575e-06, "loss": 0.9904, "step": 25700 }, { "epoch": 0.18610610436708724, "grad_norm": 0.179254949092865, "learning_rate": 4.813901134298972e-06, "loss": 1.0082, "step": 25710 }, { "epoch": 0.18617849102767342, "grad_norm": 0.17360366880893707, "learning_rate": 4.813828747638386e-06, "loss": 1.0003, "step": 25720 }, { "epoch": 0.1862508776882596, "grad_norm": 0.15899336338043213, "learning_rate": 4.813756360977799e-06, "loss": 1.0011, "step": 25730 }, { "epoch": 0.1863232643488458, "grad_norm": 0.16736361384391785, "learning_rate": 4.813683974317213e-06, "loss": 1.0155, "step": 25740 }, { "epoch": 0.18639565100943198, "grad_norm": 0.18747742474079132, "learning_rate": 4.813611587656627e-06, "loss": 0.9963, "step": 25750 }, { "epoch": 0.18646803767001816, "grad_norm": 0.177093505859375, "learning_rate": 4.813539200996041e-06, "loss": 1.0041, "step": 25760 }, { "epoch": 0.18654042433060436, "grad_norm": 0.16255925595760345, "learning_rate": 4.8134668143354545e-06, "loss": 1.0181, "step": 25770 }, { "epoch": 0.18661281099119054, "grad_norm": 0.1680876463651657, "learning_rate": 4.813394427674868e-06, "loss": 1.0146, "step": 25780 }, { "epoch": 0.18668519765177674, "grad_norm": 0.16530559957027435, "learning_rate": 4.813322041014283e-06, "loss": 0.9985, "step": 25790 }, { "epoch": 0.18675758431236292, "grad_norm": 0.17303499579429626, "learning_rate": 4.813249654353696e-06, "loss": 1.0045, "step": 25800 }, { "epoch": 0.1868299709729491, "grad_norm": 0.18719731271266937, "learning_rate": 4.81317726769311e-06, "loss": 1.0188, "step": 25810 }, { "epoch": 0.1869023576335353, "grad_norm": 0.17420874536037445, "learning_rate": 4.8131048810325235e-06, "loss": 1.0054, "step": 25820 }, { "epoch": 0.18697474429412148, "grad_norm": 0.16283327341079712, "learning_rate": 4.813032494371938e-06, "loss": 1.01, "step": 25830 }, { "epoch": 0.18704713095470768, "grad_norm": 0.16841505467891693, "learning_rate": 4.8129601077113515e-06, "loss": 1.0008, "step": 25840 }, { "epoch": 0.18711951761529386, "grad_norm": 0.16842392086982727, "learning_rate": 4.812887721050765e-06, "loss": 0.9914, "step": 25850 }, { "epoch": 0.18719190427588003, "grad_norm": 0.16453711688518524, "learning_rate": 4.812815334390179e-06, "loss": 1.0025, "step": 25860 }, { "epoch": 0.18726429093646624, "grad_norm": 0.15623344480991364, "learning_rate": 4.812742947729593e-06, "loss": 1.0011, "step": 25870 }, { "epoch": 0.18733667759705241, "grad_norm": 0.1698412448167801, "learning_rate": 4.812670561069007e-06, "loss": 1.0064, "step": 25880 }, { "epoch": 0.1874090642576386, "grad_norm": 0.17024236917495728, "learning_rate": 4.8125981744084205e-06, "loss": 0.9973, "step": 25890 }, { "epoch": 0.1874814509182248, "grad_norm": 0.1828537881374359, "learning_rate": 4.812525787747834e-06, "loss": 1.0015, "step": 25900 }, { "epoch": 0.18755383757881097, "grad_norm": 0.25711390376091003, "learning_rate": 4.812453401087248e-06, "loss": 1.0139, "step": 25910 }, { "epoch": 0.18762622423939718, "grad_norm": 0.17483864724636078, "learning_rate": 4.812381014426662e-06, "loss": 1.0105, "step": 25920 }, { "epoch": 0.18769861089998335, "grad_norm": 0.18660502135753632, "learning_rate": 4.812308627766076e-06, "loss": 1.0001, "step": 25930 }, { "epoch": 0.18777099756056953, "grad_norm": 0.16732463240623474, "learning_rate": 4.812236241105489e-06, "loss": 0.9918, "step": 25940 }, { "epoch": 0.18784338422115573, "grad_norm": 0.16313007473945618, "learning_rate": 4.812163854444903e-06, "loss": 0.9983, "step": 25950 }, { "epoch": 0.1879157708817419, "grad_norm": 0.17107586562633514, "learning_rate": 4.8120914677843175e-06, "loss": 1.0116, "step": 25960 }, { "epoch": 0.1879881575423281, "grad_norm": 0.17458048462867737, "learning_rate": 4.812019081123731e-06, "loss": 0.998, "step": 25970 }, { "epoch": 0.1880605442029143, "grad_norm": 0.16045020520687103, "learning_rate": 4.811946694463145e-06, "loss": 0.9783, "step": 25980 }, { "epoch": 0.18813293086350047, "grad_norm": 0.1742679625749588, "learning_rate": 4.811874307802558e-06, "loss": 1.0056, "step": 25990 }, { "epoch": 0.18820531752408667, "grad_norm": 0.21241635084152222, "learning_rate": 4.811801921141973e-06, "loss": 1.0108, "step": 26000 }, { "epoch": 0.18827770418467285, "grad_norm": 0.2042105346918106, "learning_rate": 4.811729534481386e-06, "loss": 0.9975, "step": 26010 }, { "epoch": 0.18835009084525903, "grad_norm": 0.18309561908245087, "learning_rate": 4.8116571478208e-06, "loss": 1.0094, "step": 26020 }, { "epoch": 0.18842247750584523, "grad_norm": 0.17432443797588348, "learning_rate": 4.811584761160214e-06, "loss": 0.9972, "step": 26030 }, { "epoch": 0.1884948641664314, "grad_norm": 0.17750132083892822, "learning_rate": 4.811512374499628e-06, "loss": 0.9996, "step": 26040 }, { "epoch": 0.18856725082701759, "grad_norm": 0.17087271809577942, "learning_rate": 4.811439987839042e-06, "loss": 1.0022, "step": 26050 }, { "epoch": 0.1886396374876038, "grad_norm": 0.1847958117723465, "learning_rate": 4.811367601178455e-06, "loss": 1.0217, "step": 26060 }, { "epoch": 0.18871202414818997, "grad_norm": 0.20137399435043335, "learning_rate": 4.811295214517869e-06, "loss": 0.9926, "step": 26070 }, { "epoch": 0.18878441080877617, "grad_norm": 0.18774452805519104, "learning_rate": 4.811222827857283e-06, "loss": 1.0058, "step": 26080 }, { "epoch": 0.18885679746936235, "grad_norm": 0.18850480020046234, "learning_rate": 4.811150441196697e-06, "loss": 0.9942, "step": 26090 }, { "epoch": 0.18892918412994852, "grad_norm": 0.18268634378910065, "learning_rate": 4.81107805453611e-06, "loss": 0.9998, "step": 26100 }, { "epoch": 0.18900157079053473, "grad_norm": 0.16116634011268616, "learning_rate": 4.811005667875524e-06, "loss": 0.9966, "step": 26110 }, { "epoch": 0.1890739574511209, "grad_norm": 0.18132539093494415, "learning_rate": 4.810933281214938e-06, "loss": 1.006, "step": 26120 }, { "epoch": 0.18914634411170708, "grad_norm": 0.19577182829380035, "learning_rate": 4.8108608945543515e-06, "loss": 1.0125, "step": 26130 }, { "epoch": 0.1892187307722933, "grad_norm": 0.2031572461128235, "learning_rate": 4.810788507893765e-06, "loss": 1.0017, "step": 26140 }, { "epoch": 0.18929111743287946, "grad_norm": 0.1615428924560547, "learning_rate": 4.81071612123318e-06, "loss": 0.9947, "step": 26150 }, { "epoch": 0.18936350409346567, "grad_norm": 0.18610787391662598, "learning_rate": 4.810643734572593e-06, "loss": 1.0, "step": 26160 }, { "epoch": 0.18943589075405184, "grad_norm": 0.1651063859462738, "learning_rate": 4.810571347912007e-06, "loss": 0.9999, "step": 26170 }, { "epoch": 0.18950827741463802, "grad_norm": 0.1639026403427124, "learning_rate": 4.8104989612514204e-06, "loss": 1.0054, "step": 26180 }, { "epoch": 0.18958066407522423, "grad_norm": 0.17324158549308777, "learning_rate": 4.810426574590835e-06, "loss": 1.0045, "step": 26190 }, { "epoch": 0.1896530507358104, "grad_norm": 0.1581469178199768, "learning_rate": 4.8103541879302485e-06, "loss": 0.9932, "step": 26200 }, { "epoch": 0.18972543739639658, "grad_norm": 0.17741291224956512, "learning_rate": 4.810281801269662e-06, "loss": 0.9954, "step": 26210 }, { "epoch": 0.18979782405698278, "grad_norm": 0.21022439002990723, "learning_rate": 4.810209414609076e-06, "loss": 1.0093, "step": 26220 }, { "epoch": 0.18987021071756896, "grad_norm": 0.1657036393880844, "learning_rate": 4.81013702794849e-06, "loss": 0.9934, "step": 26230 }, { "epoch": 0.18994259737815516, "grad_norm": 0.17548425495624542, "learning_rate": 4.810064641287904e-06, "loss": 1.0048, "step": 26240 }, { "epoch": 0.19001498403874134, "grad_norm": 0.17935971915721893, "learning_rate": 4.8099922546273174e-06, "loss": 1.0143, "step": 26250 }, { "epoch": 0.19008737069932752, "grad_norm": 0.16744934022426605, "learning_rate": 4.809919867966731e-06, "loss": 0.9974, "step": 26260 }, { "epoch": 0.19015975735991372, "grad_norm": 0.18624848127365112, "learning_rate": 4.8098474813061455e-06, "loss": 1.0077, "step": 26270 }, { "epoch": 0.1902321440204999, "grad_norm": 0.21752850711345673, "learning_rate": 4.809775094645559e-06, "loss": 1.0113, "step": 26280 }, { "epoch": 0.19030453068108608, "grad_norm": 0.16781753301620483, "learning_rate": 4.809702707984973e-06, "loss": 1.0066, "step": 26290 }, { "epoch": 0.19037691734167228, "grad_norm": 0.16836953163146973, "learning_rate": 4.809630321324386e-06, "loss": 1.0001, "step": 26300 }, { "epoch": 0.19044930400225846, "grad_norm": 0.2364221215248108, "learning_rate": 4.809557934663801e-06, "loss": 0.9922, "step": 26310 }, { "epoch": 0.19052169066284466, "grad_norm": 0.18490445613861084, "learning_rate": 4.8094855480032144e-06, "loss": 1.0104, "step": 26320 }, { "epoch": 0.19059407732343084, "grad_norm": 0.1624281406402588, "learning_rate": 4.809413161342628e-06, "loss": 0.9976, "step": 26330 }, { "epoch": 0.19066646398401702, "grad_norm": 0.17725110054016113, "learning_rate": 4.809340774682042e-06, "loss": 1.0087, "step": 26340 }, { "epoch": 0.19073885064460322, "grad_norm": 0.1625022143125534, "learning_rate": 4.809268388021456e-06, "loss": 1.0036, "step": 26350 }, { "epoch": 0.1908112373051894, "grad_norm": 0.1683979332447052, "learning_rate": 4.80919600136087e-06, "loss": 0.9992, "step": 26360 }, { "epoch": 0.1908836239657756, "grad_norm": 0.22802671790122986, "learning_rate": 4.809123614700283e-06, "loss": 1.0043, "step": 26370 }, { "epoch": 0.19095601062636178, "grad_norm": 0.16152718663215637, "learning_rate": 4.809051228039697e-06, "loss": 0.9968, "step": 26380 }, { "epoch": 0.19102839728694795, "grad_norm": 0.17749905586242676, "learning_rate": 4.8089788413791115e-06, "loss": 1.0124, "step": 26390 }, { "epoch": 0.19110078394753416, "grad_norm": 0.16626082360744476, "learning_rate": 4.808906454718525e-06, "loss": 0.9949, "step": 26400 }, { "epoch": 0.19117317060812034, "grad_norm": 0.1978878676891327, "learning_rate": 4.808834068057939e-06, "loss": 1.0022, "step": 26410 }, { "epoch": 0.1912455572687065, "grad_norm": 0.1693107634782791, "learning_rate": 4.808761681397352e-06, "loss": 0.9924, "step": 26420 }, { "epoch": 0.19131794392929272, "grad_norm": 0.18437570333480835, "learning_rate": 4.808689294736767e-06, "loss": 1.0065, "step": 26430 }, { "epoch": 0.1913903305898789, "grad_norm": 0.1534915566444397, "learning_rate": 4.80861690807618e-06, "loss": 1.0144, "step": 26440 }, { "epoch": 0.1914627172504651, "grad_norm": 0.17550812661647797, "learning_rate": 4.808544521415594e-06, "loss": 0.9951, "step": 26450 }, { "epoch": 0.19153510391105127, "grad_norm": 0.19502116739749908, "learning_rate": 4.808472134755008e-06, "loss": 0.9999, "step": 26460 }, { "epoch": 0.19160749057163745, "grad_norm": 0.1792958527803421, "learning_rate": 4.808399748094422e-06, "loss": 0.9968, "step": 26470 }, { "epoch": 0.19167987723222366, "grad_norm": 0.16823455691337585, "learning_rate": 4.808327361433836e-06, "loss": 1.0029, "step": 26480 }, { "epoch": 0.19175226389280983, "grad_norm": 0.17597924172878265, "learning_rate": 4.808254974773249e-06, "loss": 1.008, "step": 26490 }, { "epoch": 0.191824650553396, "grad_norm": 0.1772463321685791, "learning_rate": 4.808182588112663e-06, "loss": 0.9921, "step": 26500 }, { "epoch": 0.1918970372139822, "grad_norm": 0.1926368921995163, "learning_rate": 4.808110201452077e-06, "loss": 0.9993, "step": 26510 }, { "epoch": 0.1919694238745684, "grad_norm": 0.16659504175186157, "learning_rate": 4.808037814791491e-06, "loss": 1.0063, "step": 26520 }, { "epoch": 0.1920418105351546, "grad_norm": 0.1627788245677948, "learning_rate": 4.807965428130905e-06, "loss": 0.9983, "step": 26530 }, { "epoch": 0.19211419719574077, "grad_norm": 0.30011650919914246, "learning_rate": 4.807893041470318e-06, "loss": 0.9909, "step": 26540 }, { "epoch": 0.19218658385632695, "grad_norm": 0.17447486519813538, "learning_rate": 4.807820654809732e-06, "loss": 0.9992, "step": 26550 }, { "epoch": 0.19225897051691315, "grad_norm": 0.17184774577617645, "learning_rate": 4.807748268149146e-06, "loss": 1.0044, "step": 26560 }, { "epoch": 0.19233135717749933, "grad_norm": 0.17131789028644562, "learning_rate": 4.80767588148856e-06, "loss": 1.0048, "step": 26570 }, { "epoch": 0.1924037438380855, "grad_norm": 0.1681704968214035, "learning_rate": 4.8076034948279735e-06, "loss": 1.0105, "step": 26580 }, { "epoch": 0.1924761304986717, "grad_norm": 0.18246489763259888, "learning_rate": 4.807531108167387e-06, "loss": 0.9853, "step": 26590 }, { "epoch": 0.1925485171592579, "grad_norm": 0.1730792075395584, "learning_rate": 4.807458721506802e-06, "loss": 1.0048, "step": 26600 }, { "epoch": 0.1926209038198441, "grad_norm": 0.1860353797674179, "learning_rate": 4.807386334846215e-06, "loss": 1.0024, "step": 26610 }, { "epoch": 0.19269329048043027, "grad_norm": 0.16695286333560944, "learning_rate": 4.807313948185629e-06, "loss": 1.0142, "step": 26620 }, { "epoch": 0.19276567714101644, "grad_norm": 0.17245034873485565, "learning_rate": 4.8072415615250425e-06, "loss": 1.0003, "step": 26630 }, { "epoch": 0.19283806380160265, "grad_norm": 0.16842077672481537, "learning_rate": 4.807169174864456e-06, "loss": 1.0141, "step": 26640 }, { "epoch": 0.19291045046218883, "grad_norm": 0.16834910213947296, "learning_rate": 4.80709678820387e-06, "loss": 0.9903, "step": 26650 }, { "epoch": 0.192982837122775, "grad_norm": 0.1826198846101761, "learning_rate": 4.807024401543284e-06, "loss": 1.0076, "step": 26660 }, { "epoch": 0.1930552237833612, "grad_norm": 0.17187774181365967, "learning_rate": 4.806952014882698e-06, "loss": 1.0037, "step": 26670 }, { "epoch": 0.19312761044394738, "grad_norm": 0.1815541386604309, "learning_rate": 4.806879628222111e-06, "loss": 1.0086, "step": 26680 }, { "epoch": 0.1931999971045336, "grad_norm": 0.18151985108852386, "learning_rate": 4.806807241561525e-06, "loss": 0.9894, "step": 26690 }, { "epoch": 0.19327238376511977, "grad_norm": 0.17643725872039795, "learning_rate": 4.806734854900939e-06, "loss": 1.0044, "step": 26700 }, { "epoch": 0.19334477042570594, "grad_norm": 0.17447534203529358, "learning_rate": 4.806662468240353e-06, "loss": 1.0114, "step": 26710 }, { "epoch": 0.19341715708629215, "grad_norm": 0.16424672305583954, "learning_rate": 4.806590081579767e-06, "loss": 1.0033, "step": 26720 }, { "epoch": 0.19348954374687832, "grad_norm": 0.16802680492401123, "learning_rate": 4.80651769491918e-06, "loss": 1.0072, "step": 26730 }, { "epoch": 0.1935619304074645, "grad_norm": 0.17925448715686798, "learning_rate": 4.806445308258594e-06, "loss": 0.9935, "step": 26740 }, { "epoch": 0.1936343170680507, "grad_norm": 0.17291328310966492, "learning_rate": 4.806372921598008e-06, "loss": 0.9942, "step": 26750 }, { "epoch": 0.19370670372863688, "grad_norm": 0.18986187875270844, "learning_rate": 4.806300534937422e-06, "loss": 1.0095, "step": 26760 }, { "epoch": 0.19377909038922309, "grad_norm": 0.15968811511993408, "learning_rate": 4.806228148276836e-06, "loss": 1.0083, "step": 26770 }, { "epoch": 0.19385147704980926, "grad_norm": 0.18246155977249146, "learning_rate": 4.806155761616249e-06, "loss": 0.9959, "step": 26780 }, { "epoch": 0.19392386371039544, "grad_norm": 0.1724570244550705, "learning_rate": 4.806083374955664e-06, "loss": 0.993, "step": 26790 }, { "epoch": 0.19399625037098164, "grad_norm": 0.16867926716804504, "learning_rate": 4.806010988295077e-06, "loss": 1.0064, "step": 26800 }, { "epoch": 0.19406863703156782, "grad_norm": 0.16909553110599518, "learning_rate": 4.805938601634491e-06, "loss": 1.0073, "step": 26810 }, { "epoch": 0.194141023692154, "grad_norm": 0.2134760469198227, "learning_rate": 4.8058662149739046e-06, "loss": 0.9893, "step": 26820 }, { "epoch": 0.1942134103527402, "grad_norm": 0.16521146893501282, "learning_rate": 4.805793828313319e-06, "loss": 0.9962, "step": 26830 }, { "epoch": 0.19428579701332638, "grad_norm": 0.16288983821868896, "learning_rate": 4.805721441652733e-06, "loss": 0.9958, "step": 26840 }, { "epoch": 0.19435818367391258, "grad_norm": 0.17485104501247406, "learning_rate": 4.805649054992146e-06, "loss": 1.0048, "step": 26850 }, { "epoch": 0.19443057033449876, "grad_norm": 0.15907074511051178, "learning_rate": 4.80557666833156e-06, "loss": 1.0056, "step": 26860 }, { "epoch": 0.19450295699508494, "grad_norm": 0.17798353731632233, "learning_rate": 4.805504281670974e-06, "loss": 1.0024, "step": 26870 }, { "epoch": 0.19457534365567114, "grad_norm": 0.18437181413173676, "learning_rate": 4.805431895010388e-06, "loss": 0.9971, "step": 26880 }, { "epoch": 0.19464773031625732, "grad_norm": 0.15452776849269867, "learning_rate": 4.805359508349802e-06, "loss": 1.0092, "step": 26890 }, { "epoch": 0.1947201169768435, "grad_norm": 0.16308918595314026, "learning_rate": 4.805287121689215e-06, "loss": 1.0055, "step": 26900 }, { "epoch": 0.1947925036374297, "grad_norm": 0.1587022840976715, "learning_rate": 4.80521473502863e-06, "loss": 1.0006, "step": 26910 }, { "epoch": 0.19486489029801587, "grad_norm": 0.1751098334789276, "learning_rate": 4.805142348368043e-06, "loss": 0.9859, "step": 26920 }, { "epoch": 0.19493727695860208, "grad_norm": 0.15153715014457703, "learning_rate": 4.805069961707457e-06, "loss": 1.0112, "step": 26930 }, { "epoch": 0.19500966361918826, "grad_norm": 0.16435284912586212, "learning_rate": 4.8049975750468705e-06, "loss": 1.0057, "step": 26940 }, { "epoch": 0.19508205027977443, "grad_norm": 0.17554335296154022, "learning_rate": 4.804925188386285e-06, "loss": 1.0051, "step": 26950 }, { "epoch": 0.19515443694036064, "grad_norm": 0.23302575945854187, "learning_rate": 4.804852801725699e-06, "loss": 1.0029, "step": 26960 }, { "epoch": 0.1952268236009468, "grad_norm": 0.16776177287101746, "learning_rate": 4.804780415065112e-06, "loss": 0.9986, "step": 26970 }, { "epoch": 0.19529921026153302, "grad_norm": 0.15875661373138428, "learning_rate": 4.804708028404526e-06, "loss": 0.9944, "step": 26980 }, { "epoch": 0.1953715969221192, "grad_norm": 0.16342279314994812, "learning_rate": 4.80463564174394e-06, "loss": 1.0014, "step": 26990 }, { "epoch": 0.19544398358270537, "grad_norm": 0.16927549242973328, "learning_rate": 4.804563255083354e-06, "loss": 1.0005, "step": 27000 }, { "epoch": 0.19551637024329158, "grad_norm": 0.17107799649238586, "learning_rate": 4.8044908684227675e-06, "loss": 0.9865, "step": 27010 }, { "epoch": 0.19558875690387775, "grad_norm": 0.156640887260437, "learning_rate": 4.804418481762181e-06, "loss": 0.9957, "step": 27020 }, { "epoch": 0.19566114356446393, "grad_norm": 0.20385904610157013, "learning_rate": 4.804346095101596e-06, "loss": 0.9996, "step": 27030 }, { "epoch": 0.19573353022505013, "grad_norm": 0.15625648200511932, "learning_rate": 4.804273708441009e-06, "loss": 1.0155, "step": 27040 }, { "epoch": 0.1958059168856363, "grad_norm": 0.17145240306854248, "learning_rate": 4.804201321780423e-06, "loss": 0.9947, "step": 27050 }, { "epoch": 0.19587830354622252, "grad_norm": 0.20977072417736053, "learning_rate": 4.8041289351198364e-06, "loss": 1.0036, "step": 27060 }, { "epoch": 0.1959506902068087, "grad_norm": 0.163489431142807, "learning_rate": 4.804056548459251e-06, "loss": 1.0106, "step": 27070 }, { "epoch": 0.19602307686739487, "grad_norm": 0.17504650354385376, "learning_rate": 4.8039841617986645e-06, "loss": 0.9896, "step": 27080 }, { "epoch": 0.19609546352798107, "grad_norm": 0.16525472700595856, "learning_rate": 4.803911775138078e-06, "loss": 1.0065, "step": 27090 }, { "epoch": 0.19616785018856725, "grad_norm": 0.20987266302108765, "learning_rate": 4.803839388477492e-06, "loss": 1.0003, "step": 27100 }, { "epoch": 0.19624023684915343, "grad_norm": 0.17910590767860413, "learning_rate": 4.803767001816906e-06, "loss": 0.9886, "step": 27110 }, { "epoch": 0.19631262350973963, "grad_norm": 0.17571011185646057, "learning_rate": 4.80369461515632e-06, "loss": 0.9865, "step": 27120 }, { "epoch": 0.1963850101703258, "grad_norm": 0.1787402182817459, "learning_rate": 4.8036222284957334e-06, "loss": 1.0061, "step": 27130 }, { "epoch": 0.196457396830912, "grad_norm": 0.18772943317890167, "learning_rate": 4.803549841835147e-06, "loss": 0.9935, "step": 27140 }, { "epoch": 0.1965297834914982, "grad_norm": 0.17668381333351135, "learning_rate": 4.803477455174561e-06, "loss": 1.008, "step": 27150 }, { "epoch": 0.19660217015208437, "grad_norm": 0.16147243976593018, "learning_rate": 4.803405068513974e-06, "loss": 0.996, "step": 27160 }, { "epoch": 0.19667455681267057, "grad_norm": 0.1802648901939392, "learning_rate": 4.803332681853388e-06, "loss": 1.0033, "step": 27170 }, { "epoch": 0.19674694347325675, "grad_norm": 0.1721193492412567, "learning_rate": 4.803260295192802e-06, "loss": 1.0023, "step": 27180 }, { "epoch": 0.19681933013384292, "grad_norm": 0.1651720553636551, "learning_rate": 4.803187908532216e-06, "loss": 1.0125, "step": 27190 }, { "epoch": 0.19689171679442913, "grad_norm": 0.2428152859210968, "learning_rate": 4.80311552187163e-06, "loss": 1.0131, "step": 27200 }, { "epoch": 0.1969641034550153, "grad_norm": 0.17188434302806854, "learning_rate": 4.803043135211043e-06, "loss": 0.9972, "step": 27210 }, { "epoch": 0.1970364901156015, "grad_norm": 0.1973094493150711, "learning_rate": 4.802970748550458e-06, "loss": 0.9983, "step": 27220 }, { "epoch": 0.19710887677618769, "grad_norm": 0.16881392896175385, "learning_rate": 4.802898361889871e-06, "loss": 1.0107, "step": 27230 }, { "epoch": 0.19718126343677386, "grad_norm": 0.17442533373832703, "learning_rate": 4.802825975229285e-06, "loss": 1.0153, "step": 27240 }, { "epoch": 0.19725365009736007, "grad_norm": 0.18059037625789642, "learning_rate": 4.8027535885686985e-06, "loss": 1.0168, "step": 27250 }, { "epoch": 0.19732603675794624, "grad_norm": 0.19683484733104706, "learning_rate": 4.802681201908113e-06, "loss": 0.9942, "step": 27260 }, { "epoch": 0.19739842341853242, "grad_norm": 0.1710311323404312, "learning_rate": 4.802608815247527e-06, "loss": 1.0155, "step": 27270 }, { "epoch": 0.19747081007911862, "grad_norm": 0.16936126351356506, "learning_rate": 4.80253642858694e-06, "loss": 0.9969, "step": 27280 }, { "epoch": 0.1975431967397048, "grad_norm": 0.19875222444534302, "learning_rate": 4.802464041926354e-06, "loss": 0.9913, "step": 27290 }, { "epoch": 0.197615583400291, "grad_norm": 0.16911375522613525, "learning_rate": 4.802391655265768e-06, "loss": 1.0025, "step": 27300 }, { "epoch": 0.19768797006087718, "grad_norm": 0.16510553658008575, "learning_rate": 4.802319268605182e-06, "loss": 1.0083, "step": 27310 }, { "epoch": 0.19776035672146336, "grad_norm": 0.17663301527500153, "learning_rate": 4.8022468819445955e-06, "loss": 0.9859, "step": 27320 }, { "epoch": 0.19783274338204956, "grad_norm": 0.18130449950695038, "learning_rate": 4.802174495284009e-06, "loss": 1.0023, "step": 27330 }, { "epoch": 0.19790513004263574, "grad_norm": 0.1663258969783783, "learning_rate": 4.802102108623423e-06, "loss": 1.0032, "step": 27340 }, { "epoch": 0.19797751670322192, "grad_norm": 0.17057499289512634, "learning_rate": 4.802029721962837e-06, "loss": 0.9985, "step": 27350 }, { "epoch": 0.19804990336380812, "grad_norm": 0.1627529412508011, "learning_rate": 4.801957335302251e-06, "loss": 1.021, "step": 27360 }, { "epoch": 0.1981222900243943, "grad_norm": 0.1599203497171402, "learning_rate": 4.8018849486416645e-06, "loss": 1.0064, "step": 27370 }, { "epoch": 0.1981946766849805, "grad_norm": 0.199107825756073, "learning_rate": 4.801812561981078e-06, "loss": 1.0142, "step": 27380 }, { "epoch": 0.19826706334556668, "grad_norm": 0.19576513767242432, "learning_rate": 4.8017401753204926e-06, "loss": 1.0037, "step": 27390 }, { "epoch": 0.19833945000615286, "grad_norm": 0.16550497710704803, "learning_rate": 4.801667788659906e-06, "loss": 0.9853, "step": 27400 }, { "epoch": 0.19841183666673906, "grad_norm": 0.17709434032440186, "learning_rate": 4.80159540199932e-06, "loss": 1.0031, "step": 27410 }, { "epoch": 0.19848422332732524, "grad_norm": 0.1671217530965805, "learning_rate": 4.801523015338733e-06, "loss": 0.9927, "step": 27420 }, { "epoch": 0.19855660998791141, "grad_norm": 0.16205404698848724, "learning_rate": 4.801450628678148e-06, "loss": 1.004, "step": 27430 }, { "epoch": 0.19862899664849762, "grad_norm": 0.21994493901729584, "learning_rate": 4.8013782420175615e-06, "loss": 0.9901, "step": 27440 }, { "epoch": 0.1987013833090838, "grad_norm": 0.17155443131923676, "learning_rate": 4.801305855356975e-06, "loss": 1.011, "step": 27450 }, { "epoch": 0.19877376996967, "grad_norm": 0.1630110889673233, "learning_rate": 4.801233468696389e-06, "loss": 0.9974, "step": 27460 }, { "epoch": 0.19884615663025618, "grad_norm": 0.17589977383613586, "learning_rate": 4.801161082035803e-06, "loss": 0.9816, "step": 27470 }, { "epoch": 0.19891854329084235, "grad_norm": 0.16798698902130127, "learning_rate": 4.801088695375217e-06, "loss": 1.0086, "step": 27480 }, { "epoch": 0.19899092995142856, "grad_norm": 0.18393242359161377, "learning_rate": 4.80101630871463e-06, "loss": 1.01, "step": 27490 }, { "epoch": 0.19906331661201473, "grad_norm": 0.1627999097108841, "learning_rate": 4.800943922054044e-06, "loss": 1.0033, "step": 27500 }, { "epoch": 0.1991357032726009, "grad_norm": 0.1707964688539505, "learning_rate": 4.8008715353934585e-06, "loss": 0.9966, "step": 27510 }, { "epoch": 0.19920808993318712, "grad_norm": 0.154254749417305, "learning_rate": 4.800799148732872e-06, "loss": 0.9943, "step": 27520 }, { "epoch": 0.1992804765937733, "grad_norm": 0.17467810213565826, "learning_rate": 4.800726762072286e-06, "loss": 0.9986, "step": 27530 }, { "epoch": 0.1993528632543595, "grad_norm": 0.17175132036209106, "learning_rate": 4.800654375411699e-06, "loss": 1.0, "step": 27540 }, { "epoch": 0.19942524991494567, "grad_norm": 0.17211025953292847, "learning_rate": 4.800581988751114e-06, "loss": 1.011, "step": 27550 }, { "epoch": 0.19949763657553185, "grad_norm": 0.16861172020435333, "learning_rate": 4.800509602090527e-06, "loss": 1.0082, "step": 27560 }, { "epoch": 0.19957002323611805, "grad_norm": 0.20472027361392975, "learning_rate": 4.800437215429941e-06, "loss": 0.9906, "step": 27570 }, { "epoch": 0.19964240989670423, "grad_norm": 0.16416484117507935, "learning_rate": 4.800364828769355e-06, "loss": 0.9934, "step": 27580 }, { "epoch": 0.19971479655729044, "grad_norm": 0.16409383714199066, "learning_rate": 4.800292442108769e-06, "loss": 0.9885, "step": 27590 }, { "epoch": 0.1997871832178766, "grad_norm": 0.17067715525627136, "learning_rate": 4.800220055448183e-06, "loss": 0.9954, "step": 27600 }, { "epoch": 0.1998595698784628, "grad_norm": 0.18361663818359375, "learning_rate": 4.800147668787596e-06, "loss": 1.0033, "step": 27610 }, { "epoch": 0.199931956539049, "grad_norm": 0.1660345494747162, "learning_rate": 4.80007528212701e-06, "loss": 1.0032, "step": 27620 }, { "epoch": 0.20000434319963517, "grad_norm": 0.16931919753551483, "learning_rate": 4.8000028954664244e-06, "loss": 1.0006, "step": 27630 }, { "epoch": 0.20007672986022135, "grad_norm": 0.1660221815109253, "learning_rate": 4.799930508805838e-06, "loss": 1.0126, "step": 27640 }, { "epoch": 0.20014911652080755, "grad_norm": 0.179294615983963, "learning_rate": 4.799858122145252e-06, "loss": 1.0143, "step": 27650 }, { "epoch": 0.20022150318139373, "grad_norm": 0.19794364273548126, "learning_rate": 4.799785735484665e-06, "loss": 1.0057, "step": 27660 }, { "epoch": 0.20029388984197993, "grad_norm": 0.1863810420036316, "learning_rate": 4.79971334882408e-06, "loss": 0.9972, "step": 27670 }, { "epoch": 0.2003662765025661, "grad_norm": 0.17206966876983643, "learning_rate": 4.799640962163493e-06, "loss": 1.0025, "step": 27680 }, { "epoch": 0.2004386631631523, "grad_norm": 0.18598827719688416, "learning_rate": 4.799568575502906e-06, "loss": 1.0035, "step": 27690 }, { "epoch": 0.2005110498237385, "grad_norm": 0.17448210716247559, "learning_rate": 4.799496188842321e-06, "loss": 1.0121, "step": 27700 }, { "epoch": 0.20058343648432467, "grad_norm": 0.1639571338891983, "learning_rate": 4.799423802181734e-06, "loss": 1.0005, "step": 27710 }, { "epoch": 0.20065582314491084, "grad_norm": 0.16156673431396484, "learning_rate": 4.799351415521148e-06, "loss": 0.9937, "step": 27720 }, { "epoch": 0.20072820980549705, "grad_norm": 0.1673559546470642, "learning_rate": 4.7992790288605614e-06, "loss": 0.9872, "step": 27730 }, { "epoch": 0.20080059646608323, "grad_norm": 0.16240374743938446, "learning_rate": 4.799206642199976e-06, "loss": 1.0194, "step": 27740 }, { "epoch": 0.20087298312666943, "grad_norm": 0.16212816536426544, "learning_rate": 4.7991342555393895e-06, "loss": 0.9928, "step": 27750 }, { "epoch": 0.2009453697872556, "grad_norm": 0.15658411383628845, "learning_rate": 4.799061868878803e-06, "loss": 0.9976, "step": 27760 }, { "epoch": 0.20101775644784178, "grad_norm": 0.16336016356945038, "learning_rate": 4.798989482218217e-06, "loss": 1.0189, "step": 27770 }, { "epoch": 0.201090143108428, "grad_norm": 0.18533754348754883, "learning_rate": 4.798917095557631e-06, "loss": 0.9867, "step": 27780 }, { "epoch": 0.20116252976901416, "grad_norm": 0.1857583224773407, "learning_rate": 4.798844708897045e-06, "loss": 0.9994, "step": 27790 }, { "epoch": 0.20123491642960034, "grad_norm": 0.19050684571266174, "learning_rate": 4.7987723222364584e-06, "loss": 0.997, "step": 27800 }, { "epoch": 0.20130730309018655, "grad_norm": 0.17281419038772583, "learning_rate": 4.798699935575872e-06, "loss": 1.0018, "step": 27810 }, { "epoch": 0.20137968975077272, "grad_norm": 0.169466033577919, "learning_rate": 4.7986275489152865e-06, "loss": 0.997, "step": 27820 }, { "epoch": 0.20145207641135893, "grad_norm": 0.16908937692642212, "learning_rate": 4.7985551622547e-06, "loss": 1.0002, "step": 27830 }, { "epoch": 0.2015244630719451, "grad_norm": 0.17299243807792664, "learning_rate": 4.798482775594114e-06, "loss": 1.0038, "step": 27840 }, { "epoch": 0.20159684973253128, "grad_norm": 0.16563472151756287, "learning_rate": 4.798410388933527e-06, "loss": 0.9915, "step": 27850 }, { "epoch": 0.20166923639311748, "grad_norm": 0.19840186834335327, "learning_rate": 4.798338002272942e-06, "loss": 1.0037, "step": 27860 }, { "epoch": 0.20174162305370366, "grad_norm": 0.17237645387649536, "learning_rate": 4.7982656156123554e-06, "loss": 1.012, "step": 27870 }, { "epoch": 0.20181400971428984, "grad_norm": 0.18129993975162506, "learning_rate": 4.798193228951769e-06, "loss": 1.0009, "step": 27880 }, { "epoch": 0.20188639637487604, "grad_norm": 0.16735847294330597, "learning_rate": 4.798120842291183e-06, "loss": 1.0198, "step": 27890 }, { "epoch": 0.20195878303546222, "grad_norm": 0.16451966762542725, "learning_rate": 4.798048455630597e-06, "loss": 1.0038, "step": 27900 }, { "epoch": 0.20203116969604842, "grad_norm": 0.1658269166946411, "learning_rate": 4.797976068970011e-06, "loss": 0.9934, "step": 27910 }, { "epoch": 0.2021035563566346, "grad_norm": 0.17041842639446259, "learning_rate": 4.797903682309424e-06, "loss": 0.9873, "step": 27920 }, { "epoch": 0.20217594301722078, "grad_norm": 0.18290521204471588, "learning_rate": 4.797831295648838e-06, "loss": 0.9885, "step": 27930 }, { "epoch": 0.20224832967780698, "grad_norm": 0.173233300447464, "learning_rate": 4.797758908988252e-06, "loss": 0.9977, "step": 27940 }, { "epoch": 0.20232071633839316, "grad_norm": 0.1651332825422287, "learning_rate": 4.797686522327666e-06, "loss": 0.9989, "step": 27950 }, { "epoch": 0.20239310299897934, "grad_norm": 0.18143868446350098, "learning_rate": 4.79761413566708e-06, "loss": 1.0013, "step": 27960 }, { "epoch": 0.20246548965956554, "grad_norm": 0.18052855134010315, "learning_rate": 4.797541749006493e-06, "loss": 1.0127, "step": 27970 }, { "epoch": 0.20253787632015172, "grad_norm": 0.1647488921880722, "learning_rate": 4.797469362345907e-06, "loss": 0.9766, "step": 27980 }, { "epoch": 0.20261026298073792, "grad_norm": 0.18632908165454865, "learning_rate": 4.797396975685321e-06, "loss": 0.9951, "step": 27990 }, { "epoch": 0.2026826496413241, "grad_norm": 0.1738438606262207, "learning_rate": 4.797324589024735e-06, "loss": 1.0055, "step": 28000 }, { "epoch": 0.20275503630191027, "grad_norm": 0.17190273106098175, "learning_rate": 4.797252202364149e-06, "loss": 1.003, "step": 28010 }, { "epoch": 0.20282742296249648, "grad_norm": 0.16988669335842133, "learning_rate": 4.797179815703562e-06, "loss": 0.9932, "step": 28020 }, { "epoch": 0.20289980962308266, "grad_norm": 0.16658180952072144, "learning_rate": 4.797107429042977e-06, "loss": 1.0075, "step": 28030 }, { "epoch": 0.20297219628366883, "grad_norm": 0.16553157567977905, "learning_rate": 4.79703504238239e-06, "loss": 1.0056, "step": 28040 }, { "epoch": 0.20304458294425504, "grad_norm": 0.17701905965805054, "learning_rate": 4.796962655721804e-06, "loss": 1.0115, "step": 28050 }, { "epoch": 0.2031169696048412, "grad_norm": 0.16136251389980316, "learning_rate": 4.7968902690612175e-06, "loss": 0.9909, "step": 28060 }, { "epoch": 0.20318935626542742, "grad_norm": 0.17143204808235168, "learning_rate": 4.796817882400632e-06, "loss": 0.9944, "step": 28070 }, { "epoch": 0.2032617429260136, "grad_norm": 0.16544604301452637, "learning_rate": 4.796745495740046e-06, "loss": 1.0009, "step": 28080 }, { "epoch": 0.20333412958659977, "grad_norm": 0.1553160548210144, "learning_rate": 4.796673109079459e-06, "loss": 1.0123, "step": 28090 }, { "epoch": 0.20340651624718598, "grad_norm": 0.16683773696422577, "learning_rate": 4.796600722418873e-06, "loss": 1.0095, "step": 28100 }, { "epoch": 0.20347890290777215, "grad_norm": 0.16187171638011932, "learning_rate": 4.796528335758287e-06, "loss": 0.9973, "step": 28110 }, { "epoch": 0.20355128956835836, "grad_norm": 0.1778879463672638, "learning_rate": 4.796455949097701e-06, "loss": 1.012, "step": 28120 }, { "epoch": 0.20362367622894453, "grad_norm": 0.1824272871017456, "learning_rate": 4.7963835624371146e-06, "loss": 0.9874, "step": 28130 }, { "epoch": 0.2036960628895307, "grad_norm": 0.15840831398963928, "learning_rate": 4.796311175776528e-06, "loss": 1.0078, "step": 28140 }, { "epoch": 0.20376844955011691, "grad_norm": 0.1720242202281952, "learning_rate": 4.796238789115943e-06, "loss": 0.9946, "step": 28150 }, { "epoch": 0.2038408362107031, "grad_norm": 0.18525436520576477, "learning_rate": 4.796166402455356e-06, "loss": 0.9854, "step": 28160 }, { "epoch": 0.20391322287128927, "grad_norm": 0.16889312863349915, "learning_rate": 4.79609401579477e-06, "loss": 0.9788, "step": 28170 }, { "epoch": 0.20398560953187547, "grad_norm": 0.1737358719110489, "learning_rate": 4.7960216291341835e-06, "loss": 1.008, "step": 28180 }, { "epoch": 0.20405799619246165, "grad_norm": 0.1770784854888916, "learning_rate": 4.795949242473598e-06, "loss": 0.9884, "step": 28190 }, { "epoch": 0.20413038285304785, "grad_norm": 0.17560461163520813, "learning_rate": 4.7958768558130116e-06, "loss": 0.9923, "step": 28200 }, { "epoch": 0.20420276951363403, "grad_norm": 0.210534930229187, "learning_rate": 4.795804469152425e-06, "loss": 1.0002, "step": 28210 }, { "epoch": 0.2042751561742202, "grad_norm": 0.1665802001953125, "learning_rate": 4.795732082491839e-06, "loss": 0.9887, "step": 28220 }, { "epoch": 0.2043475428348064, "grad_norm": 0.17272193729877472, "learning_rate": 4.795659695831252e-06, "loss": 1.007, "step": 28230 }, { "epoch": 0.2044199294953926, "grad_norm": 0.16487373411655426, "learning_rate": 4.795587309170666e-06, "loss": 1.0067, "step": 28240 }, { "epoch": 0.20449231615597876, "grad_norm": 0.16630111634731293, "learning_rate": 4.79551492251008e-06, "loss": 0.9795, "step": 28250 }, { "epoch": 0.20456470281656497, "grad_norm": 0.17112283408641815, "learning_rate": 4.795442535849494e-06, "loss": 1.001, "step": 28260 }, { "epoch": 0.20463708947715115, "grad_norm": 0.17622940242290497, "learning_rate": 4.795370149188908e-06, "loss": 0.9859, "step": 28270 }, { "epoch": 0.20470947613773735, "grad_norm": 0.16943074762821198, "learning_rate": 4.795297762528321e-06, "loss": 1.0008, "step": 28280 }, { "epoch": 0.20478186279832353, "grad_norm": 0.16673767566680908, "learning_rate": 4.795225375867735e-06, "loss": 0.9991, "step": 28290 }, { "epoch": 0.2048542494589097, "grad_norm": 0.16490839421749115, "learning_rate": 4.795152989207149e-06, "loss": 1.0065, "step": 28300 }, { "epoch": 0.2049266361194959, "grad_norm": 0.16834768652915955, "learning_rate": 4.795080602546563e-06, "loss": 1.0002, "step": 28310 }, { "epoch": 0.20499902278008209, "grad_norm": 0.15660522878170013, "learning_rate": 4.795008215885977e-06, "loss": 1.008, "step": 28320 }, { "epoch": 0.20507140944066826, "grad_norm": 0.1734970211982727, "learning_rate": 4.79493582922539e-06, "loss": 0.9975, "step": 28330 }, { "epoch": 0.20514379610125447, "grad_norm": 0.17806610465049744, "learning_rate": 4.794863442564805e-06, "loss": 0.9932, "step": 28340 }, { "epoch": 0.20521618276184064, "grad_norm": 0.1722010225057602, "learning_rate": 4.794791055904218e-06, "loss": 0.9841, "step": 28350 }, { "epoch": 0.20528856942242685, "grad_norm": 0.1715139001607895, "learning_rate": 4.794718669243632e-06, "loss": 0.9977, "step": 28360 }, { "epoch": 0.20536095608301302, "grad_norm": 0.17599177360534668, "learning_rate": 4.7946462825830456e-06, "loss": 1.0029, "step": 28370 }, { "epoch": 0.2054333427435992, "grad_norm": 0.16538375616073608, "learning_rate": 4.79457389592246e-06, "loss": 0.999, "step": 28380 }, { "epoch": 0.2055057294041854, "grad_norm": 0.1635717749595642, "learning_rate": 4.794501509261874e-06, "loss": 1.0158, "step": 28390 }, { "epoch": 0.20557811606477158, "grad_norm": 0.17137548327445984, "learning_rate": 4.794429122601287e-06, "loss": 1.0063, "step": 28400 }, { "epoch": 0.20565050272535776, "grad_norm": 0.26129329204559326, "learning_rate": 4.794356735940701e-06, "loss": 1.0029, "step": 28410 }, { "epoch": 0.20572288938594396, "grad_norm": 0.19296756386756897, "learning_rate": 4.794284349280115e-06, "loss": 0.9955, "step": 28420 }, { "epoch": 0.20579527604653014, "grad_norm": 0.17898808419704437, "learning_rate": 4.794211962619529e-06, "loss": 0.9865, "step": 28430 }, { "epoch": 0.20586766270711634, "grad_norm": 0.16566985845565796, "learning_rate": 4.794139575958943e-06, "loss": 0.9895, "step": 28440 }, { "epoch": 0.20594004936770252, "grad_norm": 0.1738710254430771, "learning_rate": 4.794067189298356e-06, "loss": 1.0066, "step": 28450 }, { "epoch": 0.2060124360282887, "grad_norm": 0.18403783440589905, "learning_rate": 4.793994802637771e-06, "loss": 0.9924, "step": 28460 }, { "epoch": 0.2060848226888749, "grad_norm": 0.1839311420917511, "learning_rate": 4.793922415977184e-06, "loss": 0.9973, "step": 28470 }, { "epoch": 0.20615720934946108, "grad_norm": 0.17490077018737793, "learning_rate": 4.793850029316598e-06, "loss": 1.0049, "step": 28480 }, { "epoch": 0.20622959601004726, "grad_norm": 0.17090052366256714, "learning_rate": 4.7937776426560115e-06, "loss": 0.987, "step": 28490 }, { "epoch": 0.20630198267063346, "grad_norm": 0.1909414380788803, "learning_rate": 4.793705255995426e-06, "loss": 1.0039, "step": 28500 }, { "epoch": 0.20637436933121964, "grad_norm": 0.18218588829040527, "learning_rate": 4.79363286933484e-06, "loss": 1.0067, "step": 28510 }, { "epoch": 0.20644675599180584, "grad_norm": 0.2051839381456375, "learning_rate": 4.793560482674253e-06, "loss": 0.9922, "step": 28520 }, { "epoch": 0.20651914265239202, "grad_norm": 0.17690230906009674, "learning_rate": 4.793488096013667e-06, "loss": 0.9973, "step": 28530 }, { "epoch": 0.2065915293129782, "grad_norm": 0.32738569378852844, "learning_rate": 4.793415709353081e-06, "loss": 0.998, "step": 28540 }, { "epoch": 0.2066639159735644, "grad_norm": 0.16709066927433014, "learning_rate": 4.793343322692495e-06, "loss": 0.9968, "step": 28550 }, { "epoch": 0.20673630263415058, "grad_norm": 0.16835230588912964, "learning_rate": 4.7932709360319085e-06, "loss": 0.9936, "step": 28560 }, { "epoch": 0.20680868929473675, "grad_norm": 0.18569424748420715, "learning_rate": 4.793198549371322e-06, "loss": 0.9828, "step": 28570 }, { "epoch": 0.20688107595532296, "grad_norm": 0.16183647513389587, "learning_rate": 4.793126162710736e-06, "loss": 1.0112, "step": 28580 }, { "epoch": 0.20695346261590913, "grad_norm": 0.1736779808998108, "learning_rate": 4.79305377605015e-06, "loss": 0.9918, "step": 28590 }, { "epoch": 0.20702584927649534, "grad_norm": 0.6868466138839722, "learning_rate": 4.792981389389564e-06, "loss": 0.9918, "step": 28600 }, { "epoch": 0.20709823593708152, "grad_norm": 0.1731596291065216, "learning_rate": 4.7929090027289774e-06, "loss": 0.9935, "step": 28610 }, { "epoch": 0.2071706225976677, "grad_norm": 0.18018291890621185, "learning_rate": 4.792836616068391e-06, "loss": 1.0124, "step": 28620 }, { "epoch": 0.2072430092582539, "grad_norm": 0.1815289855003357, "learning_rate": 4.7927642294078055e-06, "loss": 1.0116, "step": 28630 }, { "epoch": 0.20731539591884007, "grad_norm": 0.18109048902988434, "learning_rate": 4.792691842747219e-06, "loss": 0.9968, "step": 28640 }, { "epoch": 0.20738778257942625, "grad_norm": 0.1723286211490631, "learning_rate": 4.792619456086633e-06, "loss": 1.0088, "step": 28650 }, { "epoch": 0.20746016924001245, "grad_norm": 0.17731237411499023, "learning_rate": 4.792547069426046e-06, "loss": 1.0042, "step": 28660 }, { "epoch": 0.20753255590059863, "grad_norm": 0.17550349235534668, "learning_rate": 4.792474682765461e-06, "loss": 1.0046, "step": 28670 }, { "epoch": 0.20760494256118484, "grad_norm": 0.17998819053173065, "learning_rate": 4.7924022961048745e-06, "loss": 0.9919, "step": 28680 }, { "epoch": 0.207677329221771, "grad_norm": 0.1708000898361206, "learning_rate": 4.792329909444288e-06, "loss": 1.0019, "step": 28690 }, { "epoch": 0.2077497158823572, "grad_norm": 0.16459348797798157, "learning_rate": 4.792257522783702e-06, "loss": 1.0008, "step": 28700 }, { "epoch": 0.2078221025429434, "grad_norm": 0.17152118682861328, "learning_rate": 4.792185136123116e-06, "loss": 0.9919, "step": 28710 }, { "epoch": 0.20789448920352957, "grad_norm": 0.1598610132932663, "learning_rate": 4.79211274946253e-06, "loss": 1.0093, "step": 28720 }, { "epoch": 0.20796687586411577, "grad_norm": 0.18231916427612305, "learning_rate": 4.792040362801943e-06, "loss": 1.0098, "step": 28730 }, { "epoch": 0.20803926252470195, "grad_norm": 0.16905340552330017, "learning_rate": 4.791967976141357e-06, "loss": 0.992, "step": 28740 }, { "epoch": 0.20811164918528813, "grad_norm": 0.1717272698879242, "learning_rate": 4.791895589480771e-06, "loss": 0.9876, "step": 28750 }, { "epoch": 0.20818403584587433, "grad_norm": 0.16113172471523285, "learning_rate": 4.791823202820184e-06, "loss": 0.9987, "step": 28760 }, { "epoch": 0.2082564225064605, "grad_norm": 0.18557120859622955, "learning_rate": 4.791750816159598e-06, "loss": 1.0109, "step": 28770 }, { "epoch": 0.20832880916704669, "grad_norm": 0.17728063464164734, "learning_rate": 4.791678429499012e-06, "loss": 0.9922, "step": 28780 }, { "epoch": 0.2084011958276329, "grad_norm": 0.19753523170948029, "learning_rate": 4.791606042838426e-06, "loss": 1.0016, "step": 28790 }, { "epoch": 0.20847358248821907, "grad_norm": 0.16373999416828156, "learning_rate": 4.7915336561778395e-06, "loss": 0.9975, "step": 28800 }, { "epoch": 0.20854596914880527, "grad_norm": 0.17161045968532562, "learning_rate": 4.791461269517253e-06, "loss": 0.9926, "step": 28810 }, { "epoch": 0.20861835580939145, "grad_norm": 0.16987286508083344, "learning_rate": 4.791388882856668e-06, "loss": 0.9974, "step": 28820 }, { "epoch": 0.20869074246997762, "grad_norm": 0.2174030840396881, "learning_rate": 4.791316496196081e-06, "loss": 1.003, "step": 28830 }, { "epoch": 0.20876312913056383, "grad_norm": 0.1620844602584839, "learning_rate": 4.791244109535495e-06, "loss": 1.0006, "step": 28840 }, { "epoch": 0.20883551579115, "grad_norm": 0.18147195875644684, "learning_rate": 4.7911717228749085e-06, "loss": 1.0035, "step": 28850 }, { "epoch": 0.20890790245173618, "grad_norm": 0.1799267679452896, "learning_rate": 4.791099336214323e-06, "loss": 1.0044, "step": 28860 }, { "epoch": 0.2089802891123224, "grad_norm": 0.2170770764350891, "learning_rate": 4.7910269495537366e-06, "loss": 0.9663, "step": 28870 }, { "epoch": 0.20905267577290856, "grad_norm": 0.17900538444519043, "learning_rate": 4.79095456289315e-06, "loss": 0.99, "step": 28880 }, { "epoch": 0.20912506243349477, "grad_norm": 0.1686086356639862, "learning_rate": 4.790882176232564e-06, "loss": 1.0001, "step": 28890 }, { "epoch": 0.20919744909408095, "grad_norm": 0.19090880453586578, "learning_rate": 4.790809789571978e-06, "loss": 1.0074, "step": 28900 }, { "epoch": 0.20926983575466712, "grad_norm": 0.16574802994728088, "learning_rate": 4.790737402911392e-06, "loss": 0.9998, "step": 28910 }, { "epoch": 0.20934222241525333, "grad_norm": 0.17160232365131378, "learning_rate": 4.7906650162508055e-06, "loss": 1.0036, "step": 28920 }, { "epoch": 0.2094146090758395, "grad_norm": 0.17954379320144653, "learning_rate": 4.790592629590219e-06, "loss": 0.9932, "step": 28930 }, { "epoch": 0.20948699573642568, "grad_norm": 0.17119750380516052, "learning_rate": 4.7905202429296336e-06, "loss": 0.9804, "step": 28940 }, { "epoch": 0.20955938239701188, "grad_norm": 0.16228660941123962, "learning_rate": 4.790447856269047e-06, "loss": 1.0078, "step": 28950 }, { "epoch": 0.20963176905759806, "grad_norm": 0.16871187090873718, "learning_rate": 4.790375469608461e-06, "loss": 1.0053, "step": 28960 }, { "epoch": 0.20970415571818427, "grad_norm": 0.1674191802740097, "learning_rate": 4.790303082947874e-06, "loss": 1.0003, "step": 28970 }, { "epoch": 0.20977654237877044, "grad_norm": 0.16644033789634705, "learning_rate": 4.790230696287289e-06, "loss": 0.9774, "step": 28980 }, { "epoch": 0.20984892903935662, "grad_norm": 0.17086544632911682, "learning_rate": 4.7901583096267025e-06, "loss": 1.0096, "step": 28990 }, { "epoch": 0.20992131569994282, "grad_norm": 0.16685600578784943, "learning_rate": 4.790085922966116e-06, "loss": 0.9982, "step": 29000 }, { "epoch": 0.209993702360529, "grad_norm": 0.16532373428344727, "learning_rate": 4.79001353630553e-06, "loss": 0.976, "step": 29010 }, { "epoch": 0.21006608902111518, "grad_norm": 0.36561527848243713, "learning_rate": 4.789941149644944e-06, "loss": 0.9778, "step": 29020 }, { "epoch": 0.21013847568170138, "grad_norm": 0.1807478368282318, "learning_rate": 4.789868762984358e-06, "loss": 1.0065, "step": 29030 }, { "epoch": 0.21021086234228756, "grad_norm": 0.17527340352535248, "learning_rate": 4.789796376323771e-06, "loss": 0.998, "step": 29040 }, { "epoch": 0.21028324900287376, "grad_norm": 0.1826309859752655, "learning_rate": 4.789723989663185e-06, "loss": 0.9983, "step": 29050 }, { "epoch": 0.21035563566345994, "grad_norm": 0.17075999081134796, "learning_rate": 4.7896516030025995e-06, "loss": 1.0071, "step": 29060 }, { "epoch": 0.21042802232404612, "grad_norm": 0.20827743411064148, "learning_rate": 4.789579216342013e-06, "loss": 0.9932, "step": 29070 }, { "epoch": 0.21050040898463232, "grad_norm": 0.18631266057491302, "learning_rate": 4.789506829681427e-06, "loss": 1.0037, "step": 29080 }, { "epoch": 0.2105727956452185, "grad_norm": 0.1681118607521057, "learning_rate": 4.78943444302084e-06, "loss": 0.9982, "step": 29090 }, { "epoch": 0.21064518230580467, "grad_norm": 0.16737908124923706, "learning_rate": 4.789362056360255e-06, "loss": 0.9948, "step": 29100 }, { "epoch": 0.21071756896639088, "grad_norm": 0.17825300991535187, "learning_rate": 4.789289669699668e-06, "loss": 1.0048, "step": 29110 }, { "epoch": 0.21078995562697705, "grad_norm": 0.16629593074321747, "learning_rate": 4.789217283039082e-06, "loss": 1.002, "step": 29120 }, { "epoch": 0.21086234228756326, "grad_norm": 0.16453807055950165, "learning_rate": 4.789144896378496e-06, "loss": 0.9952, "step": 29130 }, { "epoch": 0.21093472894814944, "grad_norm": 0.18079112470149994, "learning_rate": 4.78907250971791e-06, "loss": 0.994, "step": 29140 }, { "epoch": 0.2110071156087356, "grad_norm": 0.16734014451503754, "learning_rate": 4.789000123057324e-06, "loss": 0.9832, "step": 29150 }, { "epoch": 0.21107950226932182, "grad_norm": 0.16586540639400482, "learning_rate": 4.788927736396737e-06, "loss": 1.0078, "step": 29160 }, { "epoch": 0.211151888929908, "grad_norm": 0.1610448658466339, "learning_rate": 4.788855349736151e-06, "loss": 0.9818, "step": 29170 }, { "epoch": 0.21122427559049417, "grad_norm": 0.16332942247390747, "learning_rate": 4.788782963075565e-06, "loss": 0.9989, "step": 29180 }, { "epoch": 0.21129666225108037, "grad_norm": 0.17163671553134918, "learning_rate": 4.788710576414979e-06, "loss": 1.0085, "step": 29190 }, { "epoch": 0.21136904891166655, "grad_norm": 0.17151540517807007, "learning_rate": 4.788638189754393e-06, "loss": 0.9963, "step": 29200 }, { "epoch": 0.21144143557225276, "grad_norm": 0.16174449026584625, "learning_rate": 4.788565803093806e-06, "loss": 1.0069, "step": 29210 }, { "epoch": 0.21151382223283893, "grad_norm": 0.1772051900625229, "learning_rate": 4.78849341643322e-06, "loss": 1.0059, "step": 29220 }, { "epoch": 0.2115862088934251, "grad_norm": 0.1666724532842636, "learning_rate": 4.788421029772634e-06, "loss": 0.9863, "step": 29230 }, { "epoch": 0.21165859555401131, "grad_norm": 0.1606568545103073, "learning_rate": 4.788348643112048e-06, "loss": 0.9924, "step": 29240 }, { "epoch": 0.2117309822145975, "grad_norm": 0.1638329178094864, "learning_rate": 4.788276256451462e-06, "loss": 0.9987, "step": 29250 }, { "epoch": 0.21180336887518367, "grad_norm": 0.17658962309360504, "learning_rate": 4.788203869790875e-06, "loss": 0.9978, "step": 29260 }, { "epoch": 0.21187575553576987, "grad_norm": 0.17163963615894318, "learning_rate": 4.78813148313029e-06, "loss": 0.9986, "step": 29270 }, { "epoch": 0.21194814219635605, "grad_norm": 0.17665480077266693, "learning_rate": 4.7880590964697024e-06, "loss": 0.9921, "step": 29280 }, { "epoch": 0.21202052885694225, "grad_norm": 0.17104417085647583, "learning_rate": 4.787986709809117e-06, "loss": 0.9853, "step": 29290 }, { "epoch": 0.21209291551752843, "grad_norm": 0.17565962672233582, "learning_rate": 4.7879143231485305e-06, "loss": 1.0039, "step": 29300 }, { "epoch": 0.2121653021781146, "grad_norm": 0.2072620391845703, "learning_rate": 4.787841936487944e-06, "loss": 0.9881, "step": 29310 }, { "epoch": 0.2122376888387008, "grad_norm": 0.16268736124038696, "learning_rate": 4.787769549827358e-06, "loss": 0.9767, "step": 29320 }, { "epoch": 0.212310075499287, "grad_norm": 0.19353044033050537, "learning_rate": 4.787697163166772e-06, "loss": 1.002, "step": 29330 }, { "epoch": 0.2123824621598732, "grad_norm": 0.1635683923959732, "learning_rate": 4.787624776506186e-06, "loss": 0.9846, "step": 29340 }, { "epoch": 0.21245484882045937, "grad_norm": 0.16228455305099487, "learning_rate": 4.7875523898455994e-06, "loss": 0.996, "step": 29350 }, { "epoch": 0.21252723548104555, "grad_norm": 0.18255174160003662, "learning_rate": 4.787480003185013e-06, "loss": 0.9805, "step": 29360 }, { "epoch": 0.21259962214163175, "grad_norm": 0.17667743563652039, "learning_rate": 4.787407616524427e-06, "loss": 0.994, "step": 29370 }, { "epoch": 0.21267200880221793, "grad_norm": 0.15924212336540222, "learning_rate": 4.787335229863841e-06, "loss": 0.9877, "step": 29380 }, { "epoch": 0.2127443954628041, "grad_norm": 0.27366387844085693, "learning_rate": 4.787262843203255e-06, "loss": 0.9891, "step": 29390 }, { "epoch": 0.2128167821233903, "grad_norm": 0.17497357726097107, "learning_rate": 4.787190456542668e-06, "loss": 0.9717, "step": 29400 }, { "epoch": 0.21288916878397648, "grad_norm": 0.17088007926940918, "learning_rate": 4.787118069882082e-06, "loss": 0.9952, "step": 29410 }, { "epoch": 0.2129615554445627, "grad_norm": 0.16863124072551727, "learning_rate": 4.7870456832214965e-06, "loss": 0.9988, "step": 29420 }, { "epoch": 0.21303394210514887, "grad_norm": 0.16611674427986145, "learning_rate": 4.78697329656091e-06, "loss": 0.989, "step": 29430 }, { "epoch": 0.21310632876573504, "grad_norm": 0.2923133373260498, "learning_rate": 4.786900909900324e-06, "loss": 1.0054, "step": 29440 }, { "epoch": 0.21317871542632125, "grad_norm": 0.18671530485153198, "learning_rate": 4.786828523239737e-06, "loss": 1.003, "step": 29450 }, { "epoch": 0.21325110208690742, "grad_norm": 0.17407409846782684, "learning_rate": 4.786756136579152e-06, "loss": 0.9949, "step": 29460 }, { "epoch": 0.2133234887474936, "grad_norm": 0.27453675866127014, "learning_rate": 4.786683749918565e-06, "loss": 0.9984, "step": 29470 }, { "epoch": 0.2133958754080798, "grad_norm": 0.16514623165130615, "learning_rate": 4.786611363257979e-06, "loss": 0.9985, "step": 29480 }, { "epoch": 0.21346826206866598, "grad_norm": 0.17301639914512634, "learning_rate": 4.786538976597393e-06, "loss": 1.0006, "step": 29490 }, { "epoch": 0.21354064872925219, "grad_norm": 0.1778867542743683, "learning_rate": 4.786466589936807e-06, "loss": 0.9948, "step": 29500 }, { "epoch": 0.21361303538983836, "grad_norm": 0.1603461503982544, "learning_rate": 4.786394203276221e-06, "loss": 0.9911, "step": 29510 }, { "epoch": 0.21368542205042454, "grad_norm": 0.234296053647995, "learning_rate": 4.786321816615634e-06, "loss": 0.9971, "step": 29520 }, { "epoch": 0.21375780871101074, "grad_norm": 0.158965066075325, "learning_rate": 4.786249429955048e-06, "loss": 0.9987, "step": 29530 }, { "epoch": 0.21383019537159692, "grad_norm": 0.1825021207332611, "learning_rate": 4.786177043294462e-06, "loss": 0.9916, "step": 29540 }, { "epoch": 0.2139025820321831, "grad_norm": 0.18081267178058624, "learning_rate": 4.786104656633876e-06, "loss": 0.9879, "step": 29550 }, { "epoch": 0.2139749686927693, "grad_norm": 0.1616426259279251, "learning_rate": 4.78603226997329e-06, "loss": 0.9958, "step": 29560 }, { "epoch": 0.21404735535335548, "grad_norm": 0.17399275302886963, "learning_rate": 4.785959883312703e-06, "loss": 1.0119, "step": 29570 }, { "epoch": 0.21411974201394168, "grad_norm": 0.22257418930530548, "learning_rate": 4.785887496652118e-06, "loss": 0.9888, "step": 29580 }, { "epoch": 0.21419212867452786, "grad_norm": 0.162775918841362, "learning_rate": 4.785815109991531e-06, "loss": 0.9989, "step": 29590 }, { "epoch": 0.21426451533511404, "grad_norm": 0.16926290094852448, "learning_rate": 4.785742723330945e-06, "loss": 0.9965, "step": 29600 }, { "epoch": 0.21433690199570024, "grad_norm": 0.18034987151622772, "learning_rate": 4.7856703366703585e-06, "loss": 0.995, "step": 29610 }, { "epoch": 0.21440928865628642, "grad_norm": 0.17207445204257965, "learning_rate": 4.785597950009773e-06, "loss": 1.0046, "step": 29620 }, { "epoch": 0.2144816753168726, "grad_norm": 0.21093519032001495, "learning_rate": 4.785525563349187e-06, "loss": 0.9917, "step": 29630 }, { "epoch": 0.2145540619774588, "grad_norm": 0.16876211762428284, "learning_rate": 4.7854531766886e-06, "loss": 0.9903, "step": 29640 }, { "epoch": 0.21462644863804498, "grad_norm": 0.24008895456790924, "learning_rate": 4.785380790028014e-06, "loss": 0.9946, "step": 29650 }, { "epoch": 0.21469883529863118, "grad_norm": 0.16969846189022064, "learning_rate": 4.785308403367428e-06, "loss": 0.9987, "step": 29660 }, { "epoch": 0.21477122195921736, "grad_norm": 0.1585167944431305, "learning_rate": 4.785236016706842e-06, "loss": 0.9916, "step": 29670 }, { "epoch": 0.21484360861980353, "grad_norm": 0.16442425549030304, "learning_rate": 4.7851636300462556e-06, "loss": 0.9765, "step": 29680 }, { "epoch": 0.21491599528038974, "grad_norm": 0.19266870617866516, "learning_rate": 4.785091243385669e-06, "loss": 0.9818, "step": 29690 }, { "epoch": 0.21498838194097591, "grad_norm": 0.16447675228118896, "learning_rate": 4.785018856725084e-06, "loss": 1.0, "step": 29700 }, { "epoch": 0.2150607686015621, "grad_norm": 0.1561741828918457, "learning_rate": 4.784946470064497e-06, "loss": 0.9833, "step": 29710 }, { "epoch": 0.2151331552621483, "grad_norm": 0.1617199033498764, "learning_rate": 4.784874083403911e-06, "loss": 0.997, "step": 29720 }, { "epoch": 0.21520554192273447, "grad_norm": 0.16869542002677917, "learning_rate": 4.7848016967433245e-06, "loss": 0.9877, "step": 29730 }, { "epoch": 0.21527792858332068, "grad_norm": 0.1657578945159912, "learning_rate": 4.784729310082739e-06, "loss": 0.984, "step": 29740 }, { "epoch": 0.21535031524390685, "grad_norm": 0.1783364713191986, "learning_rate": 4.7846569234221526e-06, "loss": 0.9972, "step": 29750 }, { "epoch": 0.21542270190449303, "grad_norm": 0.16609250009059906, "learning_rate": 4.784584536761566e-06, "loss": 0.9815, "step": 29760 }, { "epoch": 0.21549508856507923, "grad_norm": 0.16367986798286438, "learning_rate": 4.78451215010098e-06, "loss": 0.9864, "step": 29770 }, { "epoch": 0.2155674752256654, "grad_norm": 0.1632063388824463, "learning_rate": 4.784439763440394e-06, "loss": 0.9857, "step": 29780 }, { "epoch": 0.2156398618862516, "grad_norm": 0.2034599334001541, "learning_rate": 4.784367376779808e-06, "loss": 0.9864, "step": 29790 }, { "epoch": 0.2157122485468378, "grad_norm": 0.15968771278858185, "learning_rate": 4.7842949901192215e-06, "loss": 0.982, "step": 29800 }, { "epoch": 0.21578463520742397, "grad_norm": 0.16494300961494446, "learning_rate": 4.784222603458635e-06, "loss": 0.9871, "step": 29810 }, { "epoch": 0.21585702186801017, "grad_norm": 0.1890561729669571, "learning_rate": 4.784150216798049e-06, "loss": 0.9845, "step": 29820 }, { "epoch": 0.21592940852859635, "grad_norm": 0.18601875007152557, "learning_rate": 4.784077830137462e-06, "loss": 0.9968, "step": 29830 }, { "epoch": 0.21600179518918253, "grad_norm": 0.18021321296691895, "learning_rate": 4.784005443476876e-06, "loss": 0.9904, "step": 29840 }, { "epoch": 0.21607418184976873, "grad_norm": 0.17945921421051025, "learning_rate": 4.78393305681629e-06, "loss": 0.9995, "step": 29850 }, { "epoch": 0.2161465685103549, "grad_norm": 0.16598504781723022, "learning_rate": 4.783860670155704e-06, "loss": 1.0009, "step": 29860 }, { "epoch": 0.2162189551709411, "grad_norm": 0.1580289602279663, "learning_rate": 4.783788283495118e-06, "loss": 0.9999, "step": 29870 }, { "epoch": 0.2162913418315273, "grad_norm": 0.16476024687290192, "learning_rate": 4.783715896834531e-06, "loss": 0.9882, "step": 29880 }, { "epoch": 0.21636372849211347, "grad_norm": 0.17372757196426392, "learning_rate": 4.783643510173946e-06, "loss": 0.9873, "step": 29890 }, { "epoch": 0.21643611515269967, "grad_norm": 0.17424210906028748, "learning_rate": 4.783571123513359e-06, "loss": 0.9848, "step": 29900 }, { "epoch": 0.21650850181328585, "grad_norm": 0.16753096878528595, "learning_rate": 4.783498736852773e-06, "loss": 0.9753, "step": 29910 }, { "epoch": 0.21658088847387202, "grad_norm": 0.16922076046466827, "learning_rate": 4.783426350192187e-06, "loss": 0.9874, "step": 29920 }, { "epoch": 0.21665327513445823, "grad_norm": 0.18576353788375854, "learning_rate": 4.783353963531601e-06, "loss": 0.9952, "step": 29930 }, { "epoch": 0.2167256617950444, "grad_norm": 0.15824037790298462, "learning_rate": 4.783281576871015e-06, "loss": 0.9921, "step": 29940 }, { "epoch": 0.2167980484556306, "grad_norm": 0.18869732320308685, "learning_rate": 4.783209190210428e-06, "loss": 1.0058, "step": 29950 }, { "epoch": 0.2168704351162168, "grad_norm": 0.17720650136470795, "learning_rate": 4.783136803549842e-06, "loss": 0.9959, "step": 29960 }, { "epoch": 0.21694282177680296, "grad_norm": 0.17192226648330688, "learning_rate": 4.783064416889256e-06, "loss": 0.9915, "step": 29970 }, { "epoch": 0.21701520843738917, "grad_norm": 0.17724481225013733, "learning_rate": 4.78299203022867e-06, "loss": 0.9957, "step": 29980 }, { "epoch": 0.21708759509797534, "grad_norm": 0.16848604381084442, "learning_rate": 4.782919643568084e-06, "loss": 0.995, "step": 29990 }, { "epoch": 0.21715998175856152, "grad_norm": 0.16832269728183746, "learning_rate": 4.782847256907497e-06, "loss": 1.0102, "step": 30000 }, { "epoch": 0.21723236841914773, "grad_norm": 0.16782933473587036, "learning_rate": 4.782774870246911e-06, "loss": 0.9875, "step": 30010 }, { "epoch": 0.2173047550797339, "grad_norm": 0.17606335878372192, "learning_rate": 4.782702483586325e-06, "loss": 0.9907, "step": 30020 }, { "epoch": 0.2173771417403201, "grad_norm": 0.16667865216732025, "learning_rate": 4.782630096925739e-06, "loss": 0.9906, "step": 30030 }, { "epoch": 0.21744952840090628, "grad_norm": 0.17801982164382935, "learning_rate": 4.7825577102651525e-06, "loss": 0.995, "step": 30040 }, { "epoch": 0.21752191506149246, "grad_norm": 0.19045329093933105, "learning_rate": 4.782485323604566e-06, "loss": 0.9944, "step": 30050 }, { "epoch": 0.21759430172207866, "grad_norm": 0.19208842515945435, "learning_rate": 4.782412936943981e-06, "loss": 0.9972, "step": 30060 }, { "epoch": 0.21766668838266484, "grad_norm": 0.19080910086631775, "learning_rate": 4.782340550283394e-06, "loss": 0.9907, "step": 30070 }, { "epoch": 0.21773907504325102, "grad_norm": 0.16448453068733215, "learning_rate": 4.782268163622808e-06, "loss": 0.9954, "step": 30080 }, { "epoch": 0.21781146170383722, "grad_norm": 0.16412755846977234, "learning_rate": 4.7821957769622214e-06, "loss": 0.9929, "step": 30090 }, { "epoch": 0.2178838483644234, "grad_norm": 0.17973798513412476, "learning_rate": 4.782123390301636e-06, "loss": 0.9898, "step": 30100 }, { "epoch": 0.2179562350250096, "grad_norm": 0.16456930339336395, "learning_rate": 4.7820510036410495e-06, "loss": 0.9943, "step": 30110 }, { "epoch": 0.21802862168559578, "grad_norm": 0.1790180802345276, "learning_rate": 4.781978616980463e-06, "loss": 1.0058, "step": 30120 }, { "epoch": 0.21810100834618196, "grad_norm": 0.15905308723449707, "learning_rate": 4.781906230319877e-06, "loss": 0.985, "step": 30130 }, { "epoch": 0.21817339500676816, "grad_norm": 0.17000477015972137, "learning_rate": 4.781833843659291e-06, "loss": 0.9802, "step": 30140 }, { "epoch": 0.21824578166735434, "grad_norm": 0.19705595076084137, "learning_rate": 4.781761456998705e-06, "loss": 1.002, "step": 30150 }, { "epoch": 0.21831816832794051, "grad_norm": 0.16319391131401062, "learning_rate": 4.7816890703381185e-06, "loss": 0.997, "step": 30160 }, { "epoch": 0.21839055498852672, "grad_norm": 0.1748110055923462, "learning_rate": 4.781616683677532e-06, "loss": 0.9865, "step": 30170 }, { "epoch": 0.2184629416491129, "grad_norm": 0.16149266064167023, "learning_rate": 4.7815442970169465e-06, "loss": 0.9952, "step": 30180 }, { "epoch": 0.2185353283096991, "grad_norm": 0.22713536024093628, "learning_rate": 4.78147191035636e-06, "loss": 1.0025, "step": 30190 }, { "epoch": 0.21860771497028528, "grad_norm": 0.1786499321460724, "learning_rate": 4.781399523695774e-06, "loss": 0.9896, "step": 30200 }, { "epoch": 0.21868010163087145, "grad_norm": 0.19902971386909485, "learning_rate": 4.781327137035187e-06, "loss": 0.9976, "step": 30210 }, { "epoch": 0.21875248829145766, "grad_norm": 0.17950847744941711, "learning_rate": 4.781254750374602e-06, "loss": 1.0033, "step": 30220 }, { "epoch": 0.21882487495204384, "grad_norm": 0.1599850207567215, "learning_rate": 4.7811823637140155e-06, "loss": 0.9735, "step": 30230 }, { "epoch": 0.21889726161263, "grad_norm": 0.15841835737228394, "learning_rate": 4.781109977053429e-06, "loss": 1.0022, "step": 30240 }, { "epoch": 0.21896964827321622, "grad_norm": 0.17060723900794983, "learning_rate": 4.781037590392843e-06, "loss": 0.9909, "step": 30250 }, { "epoch": 0.2190420349338024, "grad_norm": 0.16815410554409027, "learning_rate": 4.780965203732257e-06, "loss": 0.9865, "step": 30260 }, { "epoch": 0.2191144215943886, "grad_norm": 0.2049429714679718, "learning_rate": 4.780892817071671e-06, "loss": 0.978, "step": 30270 }, { "epoch": 0.21918680825497477, "grad_norm": 0.1744859516620636, "learning_rate": 4.780820430411084e-06, "loss": 0.9946, "step": 30280 }, { "epoch": 0.21925919491556095, "grad_norm": 0.17535239458084106, "learning_rate": 4.780748043750498e-06, "loss": 0.9893, "step": 30290 }, { "epoch": 0.21933158157614716, "grad_norm": 0.16993948817253113, "learning_rate": 4.7806756570899125e-06, "loss": 0.995, "step": 30300 }, { "epoch": 0.21940396823673333, "grad_norm": 0.1737825572490692, "learning_rate": 4.780603270429326e-06, "loss": 0.979, "step": 30310 }, { "epoch": 0.2194763548973195, "grad_norm": 0.19781017303466797, "learning_rate": 4.78053088376874e-06, "loss": 0.9894, "step": 30320 }, { "epoch": 0.2195487415579057, "grad_norm": 0.15426485240459442, "learning_rate": 4.780458497108153e-06, "loss": 0.9906, "step": 30330 }, { "epoch": 0.2196211282184919, "grad_norm": 0.17458288371562958, "learning_rate": 4.780386110447567e-06, "loss": 0.9847, "step": 30340 }, { "epoch": 0.2196935148790781, "grad_norm": 0.17659978568553925, "learning_rate": 4.7803137237869805e-06, "loss": 0.9937, "step": 30350 }, { "epoch": 0.21976590153966427, "grad_norm": 0.16080795228481293, "learning_rate": 4.780241337126394e-06, "loss": 0.975, "step": 30360 }, { "epoch": 0.21983828820025045, "grad_norm": 0.1639893800020218, "learning_rate": 4.780168950465809e-06, "loss": 1.0061, "step": 30370 }, { "epoch": 0.21991067486083665, "grad_norm": 0.17804419994354248, "learning_rate": 4.780096563805222e-06, "loss": 0.9952, "step": 30380 }, { "epoch": 0.21998306152142283, "grad_norm": 0.17205168306827545, "learning_rate": 4.780024177144636e-06, "loss": 0.9953, "step": 30390 }, { "epoch": 0.220055448182009, "grad_norm": 0.2006746232509613, "learning_rate": 4.7799517904840495e-06, "loss": 0.9882, "step": 30400 }, { "epoch": 0.2201278348425952, "grad_norm": 0.18707719445228577, "learning_rate": 4.779879403823464e-06, "loss": 1.0003, "step": 30410 }, { "epoch": 0.2202002215031814, "grad_norm": 0.178603395819664, "learning_rate": 4.7798070171628776e-06, "loss": 0.9928, "step": 30420 }, { "epoch": 0.2202726081637676, "grad_norm": 0.19374102354049683, "learning_rate": 4.779734630502291e-06, "loss": 0.9865, "step": 30430 }, { "epoch": 0.22034499482435377, "grad_norm": 0.1638360470533371, "learning_rate": 4.779662243841705e-06, "loss": 0.9922, "step": 30440 }, { "epoch": 0.22041738148493994, "grad_norm": 0.16339033842086792, "learning_rate": 4.779589857181119e-06, "loss": 0.995, "step": 30450 }, { "epoch": 0.22048976814552615, "grad_norm": 0.17286980152130127, "learning_rate": 4.779517470520533e-06, "loss": 0.9862, "step": 30460 }, { "epoch": 0.22056215480611233, "grad_norm": 0.17059637606143951, "learning_rate": 4.7794450838599465e-06, "loss": 0.988, "step": 30470 }, { "epoch": 0.22063454146669853, "grad_norm": 0.15443108975887299, "learning_rate": 4.77937269719936e-06, "loss": 0.9902, "step": 30480 }, { "epoch": 0.2207069281272847, "grad_norm": 0.16582870483398438, "learning_rate": 4.7793003105387746e-06, "loss": 0.9947, "step": 30490 }, { "epoch": 0.22077931478787088, "grad_norm": 0.16786135733127594, "learning_rate": 4.779227923878188e-06, "loss": 0.9932, "step": 30500 }, { "epoch": 0.2208517014484571, "grad_norm": 0.18378998339176178, "learning_rate": 4.779155537217602e-06, "loss": 0.9923, "step": 30510 }, { "epoch": 0.22092408810904327, "grad_norm": 0.1661590337753296, "learning_rate": 4.779083150557015e-06, "loss": 0.9963, "step": 30520 }, { "epoch": 0.22099647476962944, "grad_norm": 0.16045540571212769, "learning_rate": 4.77901076389643e-06, "loss": 0.9888, "step": 30530 }, { "epoch": 0.22106886143021565, "grad_norm": 0.17073200643062592, "learning_rate": 4.7789383772358435e-06, "loss": 0.9863, "step": 30540 }, { "epoch": 0.22114124809080182, "grad_norm": 0.2749057710170746, "learning_rate": 4.778865990575257e-06, "loss": 0.9993, "step": 30550 }, { "epoch": 0.22121363475138803, "grad_norm": 0.17265433073043823, "learning_rate": 4.778793603914671e-06, "loss": 0.9898, "step": 30560 }, { "epoch": 0.2212860214119742, "grad_norm": 0.1878434419631958, "learning_rate": 4.778721217254085e-06, "loss": 0.998, "step": 30570 }, { "epoch": 0.22135840807256038, "grad_norm": 0.16679958999156952, "learning_rate": 4.778648830593499e-06, "loss": 0.9897, "step": 30580 }, { "epoch": 0.22143079473314659, "grad_norm": 0.18159635365009308, "learning_rate": 4.778576443932912e-06, "loss": 0.9953, "step": 30590 }, { "epoch": 0.22150318139373276, "grad_norm": 0.18782582879066467, "learning_rate": 4.778504057272326e-06, "loss": 0.9949, "step": 30600 }, { "epoch": 0.22157556805431894, "grad_norm": 0.16997790336608887, "learning_rate": 4.77843167061174e-06, "loss": 0.9956, "step": 30610 }, { "epoch": 0.22164795471490514, "grad_norm": 0.19015435874462128, "learning_rate": 4.778359283951154e-06, "loss": 0.9788, "step": 30620 }, { "epoch": 0.22172034137549132, "grad_norm": 0.15905068814754486, "learning_rate": 4.778286897290568e-06, "loss": 0.9862, "step": 30630 }, { "epoch": 0.22179272803607752, "grad_norm": 0.21960864961147308, "learning_rate": 4.778214510629981e-06, "loss": 0.9855, "step": 30640 }, { "epoch": 0.2218651146966637, "grad_norm": 0.1731971949338913, "learning_rate": 4.778142123969395e-06, "loss": 0.9855, "step": 30650 }, { "epoch": 0.22193750135724988, "grad_norm": 0.1737217754125595, "learning_rate": 4.7780697373088094e-06, "loss": 0.9929, "step": 30660 }, { "epoch": 0.22200988801783608, "grad_norm": 0.16913598775863647, "learning_rate": 4.777997350648223e-06, "loss": 0.9939, "step": 30670 }, { "epoch": 0.22208227467842226, "grad_norm": 0.19872261583805084, "learning_rate": 4.777924963987637e-06, "loss": 1.0059, "step": 30680 }, { "epoch": 0.22215466133900844, "grad_norm": 0.17020894587039948, "learning_rate": 4.77785257732705e-06, "loss": 0.9992, "step": 30690 }, { "epoch": 0.22222704799959464, "grad_norm": 0.19177848100662231, "learning_rate": 4.777780190666465e-06, "loss": 0.9826, "step": 30700 }, { "epoch": 0.22229943466018082, "grad_norm": 0.17280960083007812, "learning_rate": 4.777707804005878e-06, "loss": 0.9868, "step": 30710 }, { "epoch": 0.22237182132076702, "grad_norm": 0.1845826506614685, "learning_rate": 4.777635417345292e-06, "loss": 0.994, "step": 30720 }, { "epoch": 0.2224442079813532, "grad_norm": 0.20441372692584991, "learning_rate": 4.777563030684706e-06, "loss": 0.9852, "step": 30730 }, { "epoch": 0.22251659464193937, "grad_norm": 0.17806535959243774, "learning_rate": 4.77749064402412e-06, "loss": 0.9922, "step": 30740 }, { "epoch": 0.22258898130252558, "grad_norm": 0.1843547821044922, "learning_rate": 4.777418257363534e-06, "loss": 0.9931, "step": 30750 }, { "epoch": 0.22266136796311176, "grad_norm": 0.1745116412639618, "learning_rate": 4.777345870702947e-06, "loss": 0.9939, "step": 30760 }, { "epoch": 0.22273375462369793, "grad_norm": 0.18230880796909332, "learning_rate": 4.777273484042361e-06, "loss": 0.9821, "step": 30770 }, { "epoch": 0.22280614128428414, "grad_norm": 0.17460425198078156, "learning_rate": 4.777201097381775e-06, "loss": 0.9932, "step": 30780 }, { "epoch": 0.2228785279448703, "grad_norm": 0.16138465702533722, "learning_rate": 4.777128710721189e-06, "loss": 0.9921, "step": 30790 }, { "epoch": 0.22295091460545652, "grad_norm": 0.1702491044998169, "learning_rate": 4.777056324060603e-06, "loss": 1.0047, "step": 30800 }, { "epoch": 0.2230233012660427, "grad_norm": 0.1869451254606247, "learning_rate": 4.776983937400016e-06, "loss": 0.9977, "step": 30810 }, { "epoch": 0.22309568792662887, "grad_norm": 0.17127086222171783, "learning_rate": 4.776911550739431e-06, "loss": 1.0077, "step": 30820 }, { "epoch": 0.22316807458721508, "grad_norm": 0.17618906497955322, "learning_rate": 4.776839164078844e-06, "loss": 0.9919, "step": 30830 }, { "epoch": 0.22324046124780125, "grad_norm": 0.19127856194972992, "learning_rate": 4.776766777418258e-06, "loss": 0.9839, "step": 30840 }, { "epoch": 0.22331284790838743, "grad_norm": 0.16776026785373688, "learning_rate": 4.7766943907576715e-06, "loss": 0.9903, "step": 30850 }, { "epoch": 0.22338523456897363, "grad_norm": 0.17897354066371918, "learning_rate": 4.776622004097086e-06, "loss": 0.9847, "step": 30860 }, { "epoch": 0.2234576212295598, "grad_norm": 0.16106632351875305, "learning_rate": 4.776549617436499e-06, "loss": 1.0027, "step": 30870 }, { "epoch": 0.22353000789014602, "grad_norm": 0.15779045224189758, "learning_rate": 4.776477230775912e-06, "loss": 0.9942, "step": 30880 }, { "epoch": 0.2236023945507322, "grad_norm": 0.19655205309391022, "learning_rate": 4.776404844115327e-06, "loss": 1.0076, "step": 30890 }, { "epoch": 0.22367478121131837, "grad_norm": 0.1667492836713791, "learning_rate": 4.7763324574547405e-06, "loss": 0.9975, "step": 30900 }, { "epoch": 0.22374716787190457, "grad_norm": 0.18029555678367615, "learning_rate": 4.776260070794154e-06, "loss": 0.9857, "step": 30910 }, { "epoch": 0.22381955453249075, "grad_norm": 0.1663108766078949, "learning_rate": 4.776187684133568e-06, "loss": 0.9952, "step": 30920 }, { "epoch": 0.22389194119307693, "grad_norm": 0.18549077212810516, "learning_rate": 4.776115297472982e-06, "loss": 0.9825, "step": 30930 }, { "epoch": 0.22396432785366313, "grad_norm": 0.17091688513755798, "learning_rate": 4.776042910812396e-06, "loss": 1.0017, "step": 30940 }, { "epoch": 0.2240367145142493, "grad_norm": 0.18328119814395905, "learning_rate": 4.775970524151809e-06, "loss": 0.9957, "step": 30950 }, { "epoch": 0.2241091011748355, "grad_norm": 0.18230809271335602, "learning_rate": 4.775898137491223e-06, "loss": 1.0105, "step": 30960 }, { "epoch": 0.2241814878354217, "grad_norm": 0.17327918112277985, "learning_rate": 4.7758257508306375e-06, "loss": 0.9785, "step": 30970 }, { "epoch": 0.22425387449600787, "grad_norm": 0.1592261642217636, "learning_rate": 4.775753364170051e-06, "loss": 0.9965, "step": 30980 }, { "epoch": 0.22432626115659407, "grad_norm": 0.16673846542835236, "learning_rate": 4.775680977509465e-06, "loss": 0.986, "step": 30990 }, { "epoch": 0.22439864781718025, "grad_norm": 0.1908463090658188, "learning_rate": 4.775608590848878e-06, "loss": 1.0013, "step": 31000 }, { "epoch": 0.22447103447776642, "grad_norm": 0.16515257954597473, "learning_rate": 4.775536204188293e-06, "loss": 1.0092, "step": 31010 }, { "epoch": 0.22454342113835263, "grad_norm": 0.1733829379081726, "learning_rate": 4.775463817527706e-06, "loss": 0.9834, "step": 31020 }, { "epoch": 0.2246158077989388, "grad_norm": 0.17411421239376068, "learning_rate": 4.77539143086712e-06, "loss": 0.9866, "step": 31030 }, { "epoch": 0.224688194459525, "grad_norm": 0.16972704231739044, "learning_rate": 4.775319044206534e-06, "loss": 0.986, "step": 31040 }, { "epoch": 0.22476058112011119, "grad_norm": 0.1979493349790573, "learning_rate": 4.775246657545948e-06, "loss": 0.9816, "step": 31050 }, { "epoch": 0.22483296778069736, "grad_norm": 0.18027926981449127, "learning_rate": 4.775174270885362e-06, "loss": 0.9853, "step": 31060 }, { "epoch": 0.22490535444128357, "grad_norm": 0.16481108963489532, "learning_rate": 4.775101884224775e-06, "loss": 0.9876, "step": 31070 }, { "epoch": 0.22497774110186974, "grad_norm": 0.1805502474308014, "learning_rate": 4.775029497564189e-06, "loss": 0.9897, "step": 31080 }, { "epoch": 0.22505012776245595, "grad_norm": 0.1621004194021225, "learning_rate": 4.774957110903603e-06, "loss": 0.9909, "step": 31090 }, { "epoch": 0.22512251442304212, "grad_norm": 0.1652892827987671, "learning_rate": 4.774884724243017e-06, "loss": 0.9781, "step": 31100 }, { "epoch": 0.2251949010836283, "grad_norm": 0.16680368781089783, "learning_rate": 4.774812337582431e-06, "loss": 0.9995, "step": 31110 }, { "epoch": 0.2252672877442145, "grad_norm": 0.17814873158931732, "learning_rate": 4.774739950921844e-06, "loss": 0.9923, "step": 31120 }, { "epoch": 0.22533967440480068, "grad_norm": 0.15377861261367798, "learning_rate": 4.774667564261259e-06, "loss": 0.9837, "step": 31130 }, { "epoch": 0.22541206106538686, "grad_norm": 0.18391862511634827, "learning_rate": 4.774595177600672e-06, "loss": 0.9995, "step": 31140 }, { "epoch": 0.22548444772597306, "grad_norm": 0.17734524607658386, "learning_rate": 4.774522790940086e-06, "loss": 1.0083, "step": 31150 }, { "epoch": 0.22555683438655924, "grad_norm": 0.17555080354213715, "learning_rate": 4.7744504042794996e-06, "loss": 1.0057, "step": 31160 }, { "epoch": 0.22562922104714545, "grad_norm": 0.16660809516906738, "learning_rate": 4.774378017618914e-06, "loss": 0.9793, "step": 31170 }, { "epoch": 0.22570160770773162, "grad_norm": 0.19567494094371796, "learning_rate": 4.774305630958328e-06, "loss": 0.9865, "step": 31180 }, { "epoch": 0.2257739943683178, "grad_norm": 0.15271180868148804, "learning_rate": 4.774233244297741e-06, "loss": 0.9831, "step": 31190 }, { "epoch": 0.225846381028904, "grad_norm": 0.16450783610343933, "learning_rate": 4.774160857637155e-06, "loss": 0.996, "step": 31200 }, { "epoch": 0.22591876768949018, "grad_norm": 0.2669644057750702, "learning_rate": 4.774088470976569e-06, "loss": 0.9876, "step": 31210 }, { "epoch": 0.22599115435007636, "grad_norm": 0.1570923775434494, "learning_rate": 4.774016084315983e-06, "loss": 0.9838, "step": 31220 }, { "epoch": 0.22606354101066256, "grad_norm": 0.1822049915790558, "learning_rate": 4.7739436976553966e-06, "loss": 0.976, "step": 31230 }, { "epoch": 0.22613592767124874, "grad_norm": 0.17041492462158203, "learning_rate": 4.77387131099481e-06, "loss": 1.0074, "step": 31240 }, { "epoch": 0.22620831433183494, "grad_norm": 0.1686597615480423, "learning_rate": 4.773798924334224e-06, "loss": 1.0004, "step": 31250 }, { "epoch": 0.22628070099242112, "grad_norm": 0.16727478802204132, "learning_rate": 4.773726537673638e-06, "loss": 1.0044, "step": 31260 }, { "epoch": 0.2263530876530073, "grad_norm": 0.16306477785110474, "learning_rate": 4.773654151013052e-06, "loss": 0.9966, "step": 31270 }, { "epoch": 0.2264254743135935, "grad_norm": 0.17092132568359375, "learning_rate": 4.7735817643524655e-06, "loss": 1.0063, "step": 31280 }, { "epoch": 0.22649786097417968, "grad_norm": 0.16931623220443726, "learning_rate": 4.773509377691879e-06, "loss": 0.9868, "step": 31290 }, { "epoch": 0.22657024763476585, "grad_norm": 0.1603298783302307, "learning_rate": 4.7734369910312936e-06, "loss": 0.9837, "step": 31300 }, { "epoch": 0.22664263429535206, "grad_norm": 0.19515341520309448, "learning_rate": 4.773364604370707e-06, "loss": 0.9844, "step": 31310 }, { "epoch": 0.22671502095593823, "grad_norm": 0.16910241544246674, "learning_rate": 4.773292217710121e-06, "loss": 0.9948, "step": 31320 }, { "epoch": 0.22678740761652444, "grad_norm": 0.19384412467479706, "learning_rate": 4.773219831049534e-06, "loss": 0.9617, "step": 31330 }, { "epoch": 0.22685979427711062, "grad_norm": 0.17638161778450012, "learning_rate": 4.773147444388949e-06, "loss": 0.9865, "step": 31340 }, { "epoch": 0.2269321809376968, "grad_norm": 0.17400164902210236, "learning_rate": 4.7730750577283625e-06, "loss": 0.997, "step": 31350 }, { "epoch": 0.227004567598283, "grad_norm": 0.18208061158657074, "learning_rate": 4.773002671067776e-06, "loss": 1.0006, "step": 31360 }, { "epoch": 0.22707695425886917, "grad_norm": 0.17312349379062653, "learning_rate": 4.77293028440719e-06, "loss": 0.973, "step": 31370 }, { "epoch": 0.22714934091945535, "grad_norm": 0.18779698014259338, "learning_rate": 4.772857897746604e-06, "loss": 0.9899, "step": 31380 }, { "epoch": 0.22722172758004155, "grad_norm": 0.1715383231639862, "learning_rate": 4.772785511086018e-06, "loss": 0.9891, "step": 31390 }, { "epoch": 0.22729411424062773, "grad_norm": 0.18335622549057007, "learning_rate": 4.772713124425431e-06, "loss": 0.9925, "step": 31400 }, { "epoch": 0.22736650090121394, "grad_norm": 0.1920541524887085, "learning_rate": 4.772640737764845e-06, "loss": 0.9875, "step": 31410 }, { "epoch": 0.2274388875618001, "grad_norm": 0.16819268465042114, "learning_rate": 4.772568351104259e-06, "loss": 0.9909, "step": 31420 }, { "epoch": 0.2275112742223863, "grad_norm": 0.1579744815826416, "learning_rate": 4.772495964443672e-06, "loss": 0.9903, "step": 31430 }, { "epoch": 0.2275836608829725, "grad_norm": 0.1756497621536255, "learning_rate": 4.772423577783086e-06, "loss": 0.9948, "step": 31440 }, { "epoch": 0.22765604754355867, "grad_norm": 0.1611769050359726, "learning_rate": 4.7723511911225e-06, "loss": 0.9925, "step": 31450 }, { "epoch": 0.22772843420414485, "grad_norm": 0.19173139333724976, "learning_rate": 4.772278804461914e-06, "loss": 0.9761, "step": 31460 }, { "epoch": 0.22780082086473105, "grad_norm": 0.20270651578903198, "learning_rate": 4.772206417801328e-06, "loss": 0.9898, "step": 31470 }, { "epoch": 0.22787320752531723, "grad_norm": 0.17298614978790283, "learning_rate": 4.772134031140741e-06, "loss": 0.9871, "step": 31480 }, { "epoch": 0.22794559418590343, "grad_norm": 0.15793482959270477, "learning_rate": 4.772061644480156e-06, "loss": 0.9874, "step": 31490 }, { "epoch": 0.2280179808464896, "grad_norm": 0.1665399670600891, "learning_rate": 4.771989257819569e-06, "loss": 0.9963, "step": 31500 }, { "epoch": 0.2280903675070758, "grad_norm": 0.1729333996772766, "learning_rate": 4.771916871158983e-06, "loss": 0.9995, "step": 31510 }, { "epoch": 0.228162754167662, "grad_norm": 0.1829392910003662, "learning_rate": 4.7718444844983965e-06, "loss": 0.9848, "step": 31520 }, { "epoch": 0.22823514082824817, "grad_norm": 0.17102506756782532, "learning_rate": 4.771772097837811e-06, "loss": 0.9867, "step": 31530 }, { "epoch": 0.22830752748883434, "grad_norm": 0.1742861568927765, "learning_rate": 4.771699711177225e-06, "loss": 0.9888, "step": 31540 }, { "epoch": 0.22837991414942055, "grad_norm": 0.17398032546043396, "learning_rate": 4.771627324516638e-06, "loss": 0.9985, "step": 31550 }, { "epoch": 0.22845230081000673, "grad_norm": 0.168184295296669, "learning_rate": 4.771554937856052e-06, "loss": 0.9877, "step": 31560 }, { "epoch": 0.22852468747059293, "grad_norm": 0.1651362031698227, "learning_rate": 4.771482551195466e-06, "loss": 0.996, "step": 31570 }, { "epoch": 0.2285970741311791, "grad_norm": 0.17777453362941742, "learning_rate": 4.77141016453488e-06, "loss": 0.9945, "step": 31580 }, { "epoch": 0.22866946079176528, "grad_norm": 0.17504815757274628, "learning_rate": 4.7713377778742935e-06, "loss": 1.0095, "step": 31590 }, { "epoch": 0.2287418474523515, "grad_norm": 0.16557878255844116, "learning_rate": 4.771265391213707e-06, "loss": 0.9934, "step": 31600 }, { "epoch": 0.22881423411293766, "grad_norm": 0.1588304191827774, "learning_rate": 4.771193004553122e-06, "loss": 0.9674, "step": 31610 }, { "epoch": 0.22888662077352384, "grad_norm": 0.17581112682819366, "learning_rate": 4.771120617892535e-06, "loss": 0.9903, "step": 31620 }, { "epoch": 0.22895900743411005, "grad_norm": 0.15916821360588074, "learning_rate": 4.771048231231949e-06, "loss": 0.9846, "step": 31630 }, { "epoch": 0.22903139409469622, "grad_norm": 0.16646379232406616, "learning_rate": 4.7709758445713625e-06, "loss": 0.9747, "step": 31640 }, { "epoch": 0.22910378075528243, "grad_norm": 0.18210920691490173, "learning_rate": 4.770903457910777e-06, "loss": 0.9999, "step": 31650 }, { "epoch": 0.2291761674158686, "grad_norm": 0.1829901784658432, "learning_rate": 4.7708310712501905e-06, "loss": 0.9917, "step": 31660 }, { "epoch": 0.22924855407645478, "grad_norm": 0.16289442777633667, "learning_rate": 4.770758684589604e-06, "loss": 0.9858, "step": 31670 }, { "epoch": 0.22932094073704098, "grad_norm": 0.1644997000694275, "learning_rate": 4.770686297929018e-06, "loss": 0.9918, "step": 31680 }, { "epoch": 0.22939332739762716, "grad_norm": 0.1583040952682495, "learning_rate": 4.770613911268432e-06, "loss": 0.9852, "step": 31690 }, { "epoch": 0.22946571405821337, "grad_norm": 0.15572409331798553, "learning_rate": 4.770541524607846e-06, "loss": 1.0034, "step": 31700 }, { "epoch": 0.22953810071879954, "grad_norm": 0.185507133603096, "learning_rate": 4.7704691379472595e-06, "loss": 0.996, "step": 31710 }, { "epoch": 0.22961048737938572, "grad_norm": 0.16780561208724976, "learning_rate": 4.770396751286673e-06, "loss": 0.9902, "step": 31720 }, { "epoch": 0.22968287403997192, "grad_norm": 0.1608486771583557, "learning_rate": 4.7703243646260875e-06, "loss": 0.9956, "step": 31730 }, { "epoch": 0.2297552607005581, "grad_norm": 0.16754546761512756, "learning_rate": 4.770251977965501e-06, "loss": 0.9933, "step": 31740 }, { "epoch": 0.22982764736114428, "grad_norm": 0.16332760453224182, "learning_rate": 4.770179591304915e-06, "loss": 0.9819, "step": 31750 }, { "epoch": 0.22990003402173048, "grad_norm": 0.23022449016571045, "learning_rate": 4.770107204644328e-06, "loss": 0.9909, "step": 31760 }, { "epoch": 0.22997242068231666, "grad_norm": 0.18331989645957947, "learning_rate": 4.770034817983743e-06, "loss": 1.0003, "step": 31770 }, { "epoch": 0.23004480734290286, "grad_norm": 0.1962156593799591, "learning_rate": 4.7699624313231565e-06, "loss": 0.9884, "step": 31780 }, { "epoch": 0.23011719400348904, "grad_norm": 0.16017916798591614, "learning_rate": 4.76989004466257e-06, "loss": 0.9935, "step": 31790 }, { "epoch": 0.23018958066407522, "grad_norm": 0.16973306238651276, "learning_rate": 4.769817658001984e-06, "loss": 1.0018, "step": 31800 }, { "epoch": 0.23026196732466142, "grad_norm": 0.1658056229352951, "learning_rate": 4.769745271341398e-06, "loss": 0.9564, "step": 31810 }, { "epoch": 0.2303343539852476, "grad_norm": 0.1671288162469864, "learning_rate": 4.769672884680812e-06, "loss": 0.9972, "step": 31820 }, { "epoch": 0.23040674064583377, "grad_norm": 0.17118290066719055, "learning_rate": 4.769600498020225e-06, "loss": 0.9972, "step": 31830 }, { "epoch": 0.23047912730641998, "grad_norm": 0.1895531266927719, "learning_rate": 4.769528111359639e-06, "loss": 0.9919, "step": 31840 }, { "epoch": 0.23055151396700616, "grad_norm": 0.1677425503730774, "learning_rate": 4.769455724699053e-06, "loss": 0.9709, "step": 31850 }, { "epoch": 0.23062390062759236, "grad_norm": 0.15633396804332733, "learning_rate": 4.769383338038467e-06, "loss": 0.9762, "step": 31860 }, { "epoch": 0.23069628728817854, "grad_norm": 0.15433230996131897, "learning_rate": 4.769310951377881e-06, "loss": 0.9902, "step": 31870 }, { "epoch": 0.2307686739487647, "grad_norm": 0.17825239896774292, "learning_rate": 4.769238564717294e-06, "loss": 0.9933, "step": 31880 }, { "epoch": 0.23084106060935092, "grad_norm": 0.1626371145248413, "learning_rate": 4.769166178056708e-06, "loss": 0.9893, "step": 31890 }, { "epoch": 0.2309134472699371, "grad_norm": 0.17792868614196777, "learning_rate": 4.769093791396122e-06, "loss": 0.9825, "step": 31900 }, { "epoch": 0.23098583393052327, "grad_norm": 0.1791241466999054, "learning_rate": 4.769021404735536e-06, "loss": 0.9916, "step": 31910 }, { "epoch": 0.23105822059110948, "grad_norm": 0.1636131852865219, "learning_rate": 4.76894901807495e-06, "loss": 1.0103, "step": 31920 }, { "epoch": 0.23113060725169565, "grad_norm": 0.1715201586484909, "learning_rate": 4.768876631414363e-06, "loss": 0.988, "step": 31930 }, { "epoch": 0.23120299391228186, "grad_norm": 0.19205105304718018, "learning_rate": 4.768804244753777e-06, "loss": 0.9843, "step": 31940 }, { "epoch": 0.23127538057286803, "grad_norm": 0.177320197224617, "learning_rate": 4.7687318580931905e-06, "loss": 1.0003, "step": 31950 }, { "epoch": 0.2313477672334542, "grad_norm": 0.16277745366096497, "learning_rate": 4.768659471432605e-06, "loss": 0.9858, "step": 31960 }, { "epoch": 0.23142015389404041, "grad_norm": 0.16934184730052948, "learning_rate": 4.7685870847720186e-06, "loss": 1.0059, "step": 31970 }, { "epoch": 0.2314925405546266, "grad_norm": 0.17568084597587585, "learning_rate": 4.768514698111432e-06, "loss": 0.9886, "step": 31980 }, { "epoch": 0.23156492721521277, "grad_norm": 0.16883017122745514, "learning_rate": 4.768442311450846e-06, "loss": 1.0016, "step": 31990 }, { "epoch": 0.23163731387579897, "grad_norm": 0.170290008187294, "learning_rate": 4.76836992479026e-06, "loss": 0.9923, "step": 32000 }, { "epoch": 0.23170970053638515, "grad_norm": 0.17787618935108185, "learning_rate": 4.768297538129674e-06, "loss": 0.974, "step": 32010 }, { "epoch": 0.23178208719697135, "grad_norm": 0.16933457553386688, "learning_rate": 4.7682251514690875e-06, "loss": 0.9861, "step": 32020 }, { "epoch": 0.23185447385755753, "grad_norm": 0.1682525873184204, "learning_rate": 4.768152764808501e-06, "loss": 0.9785, "step": 32030 }, { "epoch": 0.2319268605181437, "grad_norm": 0.17858469486236572, "learning_rate": 4.768080378147915e-06, "loss": 0.9788, "step": 32040 }, { "epoch": 0.2319992471787299, "grad_norm": 0.16218888759613037, "learning_rate": 4.768007991487329e-06, "loss": 0.9851, "step": 32050 }, { "epoch": 0.2320716338393161, "grad_norm": 0.1646839827299118, "learning_rate": 4.767935604826743e-06, "loss": 0.9798, "step": 32060 }, { "epoch": 0.23214402049990226, "grad_norm": 0.20934203267097473, "learning_rate": 4.767863218166156e-06, "loss": 0.9786, "step": 32070 }, { "epoch": 0.23221640716048847, "grad_norm": 0.1663549393415451, "learning_rate": 4.76779083150557e-06, "loss": 0.9759, "step": 32080 }, { "epoch": 0.23228879382107465, "grad_norm": 0.16773077845573425, "learning_rate": 4.7677184448449845e-06, "loss": 0.997, "step": 32090 }, { "epoch": 0.23236118048166085, "grad_norm": 0.18589721620082855, "learning_rate": 4.767646058184398e-06, "loss": 0.991, "step": 32100 }, { "epoch": 0.23243356714224703, "grad_norm": 0.17491789162158966, "learning_rate": 4.767573671523812e-06, "loss": 0.9837, "step": 32110 }, { "epoch": 0.2325059538028332, "grad_norm": 0.18429701030254364, "learning_rate": 4.767501284863225e-06, "loss": 0.9795, "step": 32120 }, { "epoch": 0.2325783404634194, "grad_norm": 0.16141755878925323, "learning_rate": 4.76742889820264e-06, "loss": 1.0045, "step": 32130 }, { "epoch": 0.23265072712400559, "grad_norm": 0.18277022242546082, "learning_rate": 4.7673565115420534e-06, "loss": 0.9937, "step": 32140 }, { "epoch": 0.23272311378459176, "grad_norm": 0.17076079547405243, "learning_rate": 4.767284124881467e-06, "loss": 0.9845, "step": 32150 }, { "epoch": 0.23279550044517797, "grad_norm": 0.20235513150691986, "learning_rate": 4.767211738220881e-06, "loss": 0.9765, "step": 32160 }, { "epoch": 0.23286788710576414, "grad_norm": 0.2140243798494339, "learning_rate": 4.767139351560295e-06, "loss": 0.9939, "step": 32170 }, { "epoch": 0.23294027376635035, "grad_norm": 0.17782087624073029, "learning_rate": 4.767066964899709e-06, "loss": 0.9904, "step": 32180 }, { "epoch": 0.23301266042693652, "grad_norm": 0.15998302400112152, "learning_rate": 4.766994578239122e-06, "loss": 0.9819, "step": 32190 }, { "epoch": 0.2330850470875227, "grad_norm": 0.17034156620502472, "learning_rate": 4.766922191578536e-06, "loss": 0.9825, "step": 32200 }, { "epoch": 0.2331574337481089, "grad_norm": 0.1846659928560257, "learning_rate": 4.7668498049179504e-06, "loss": 0.9788, "step": 32210 }, { "epoch": 0.23322982040869508, "grad_norm": 0.1705406755208969, "learning_rate": 4.766777418257364e-06, "loss": 0.9898, "step": 32220 }, { "epoch": 0.2333022070692813, "grad_norm": 0.16298960149288177, "learning_rate": 4.766705031596778e-06, "loss": 1.008, "step": 32230 }, { "epoch": 0.23337459372986746, "grad_norm": 0.1563328206539154, "learning_rate": 4.766632644936191e-06, "loss": 0.9991, "step": 32240 }, { "epoch": 0.23344698039045364, "grad_norm": 0.19101522862911224, "learning_rate": 4.766560258275606e-06, "loss": 0.9866, "step": 32250 }, { "epoch": 0.23351936705103984, "grad_norm": 0.16439121961593628, "learning_rate": 4.766487871615019e-06, "loss": 0.9864, "step": 32260 }, { "epoch": 0.23359175371162602, "grad_norm": 0.16966257989406586, "learning_rate": 4.766415484954433e-06, "loss": 0.99, "step": 32270 }, { "epoch": 0.2336641403722122, "grad_norm": 0.17119558155536652, "learning_rate": 4.766343098293847e-06, "loss": 0.985, "step": 32280 }, { "epoch": 0.2337365270327984, "grad_norm": 0.16533613204956055, "learning_rate": 4.766270711633261e-06, "loss": 0.9725, "step": 32290 }, { "epoch": 0.23380891369338458, "grad_norm": 0.21587282419204712, "learning_rate": 4.766198324972675e-06, "loss": 0.9851, "step": 32300 }, { "epoch": 0.23388130035397078, "grad_norm": 0.1782499998807907, "learning_rate": 4.766125938312088e-06, "loss": 0.9908, "step": 32310 }, { "epoch": 0.23395368701455696, "grad_norm": 0.17237718403339386, "learning_rate": 4.766053551651502e-06, "loss": 0.9953, "step": 32320 }, { "epoch": 0.23402607367514314, "grad_norm": 0.16807182133197784, "learning_rate": 4.765981164990916e-06, "loss": 0.9937, "step": 32330 }, { "epoch": 0.23409846033572934, "grad_norm": 0.17400771379470825, "learning_rate": 4.76590877833033e-06, "loss": 0.9917, "step": 32340 }, { "epoch": 0.23417084699631552, "grad_norm": 0.16965161263942719, "learning_rate": 4.765836391669744e-06, "loss": 0.9828, "step": 32350 }, { "epoch": 0.2342432336569017, "grad_norm": 0.17362885177135468, "learning_rate": 4.765764005009157e-06, "loss": 0.9778, "step": 32360 }, { "epoch": 0.2343156203174879, "grad_norm": 0.16530746221542358, "learning_rate": 4.765691618348572e-06, "loss": 1.0049, "step": 32370 }, { "epoch": 0.23438800697807408, "grad_norm": 0.19061051309108734, "learning_rate": 4.765619231687985e-06, "loss": 1.0037, "step": 32380 }, { "epoch": 0.23446039363866028, "grad_norm": 0.19029873609542847, "learning_rate": 4.765546845027399e-06, "loss": 0.9754, "step": 32390 }, { "epoch": 0.23453278029924646, "grad_norm": 0.17064446210861206, "learning_rate": 4.7654744583668125e-06, "loss": 0.996, "step": 32400 }, { "epoch": 0.23460516695983263, "grad_norm": 0.17918898165225983, "learning_rate": 4.765402071706227e-06, "loss": 1.0014, "step": 32410 }, { "epoch": 0.23467755362041884, "grad_norm": 0.16727639734745026, "learning_rate": 4.765329685045641e-06, "loss": 0.9911, "step": 32420 }, { "epoch": 0.23474994028100502, "grad_norm": 0.16551131010055542, "learning_rate": 4.765257298385054e-06, "loss": 1.0024, "step": 32430 }, { "epoch": 0.2348223269415912, "grad_norm": 0.18284402787685394, "learning_rate": 4.765184911724468e-06, "loss": 0.9876, "step": 32440 }, { "epoch": 0.2348947136021774, "grad_norm": 0.1902831643819809, "learning_rate": 4.765112525063882e-06, "loss": 0.9884, "step": 32450 }, { "epoch": 0.23496710026276357, "grad_norm": 0.19926203787326813, "learning_rate": 4.765040138403295e-06, "loss": 0.9889, "step": 32460 }, { "epoch": 0.23503948692334978, "grad_norm": 0.1620095819234848, "learning_rate": 4.764967751742709e-06, "loss": 0.9882, "step": 32470 }, { "epoch": 0.23511187358393595, "grad_norm": 0.19026455283164978, "learning_rate": 4.764895365082123e-06, "loss": 0.9652, "step": 32480 }, { "epoch": 0.23518426024452213, "grad_norm": 0.18305736780166626, "learning_rate": 4.764822978421537e-06, "loss": 0.984, "step": 32490 }, { "epoch": 0.23525664690510834, "grad_norm": 0.16484969854354858, "learning_rate": 4.76475059176095e-06, "loss": 0.9798, "step": 32500 }, { "epoch": 0.2353290335656945, "grad_norm": 0.1810574233531952, "learning_rate": 4.764678205100364e-06, "loss": 0.9762, "step": 32510 }, { "epoch": 0.2354014202262807, "grad_norm": 0.16701748967170715, "learning_rate": 4.7646058184397785e-06, "loss": 0.9907, "step": 32520 }, { "epoch": 0.2354738068868669, "grad_norm": 0.17856666445732117, "learning_rate": 4.764533431779192e-06, "loss": 0.9772, "step": 32530 }, { "epoch": 0.23554619354745307, "grad_norm": 0.19198378920555115, "learning_rate": 4.764461045118606e-06, "loss": 0.9729, "step": 32540 }, { "epoch": 0.23561858020803927, "grad_norm": 0.18789614737033844, "learning_rate": 4.764388658458019e-06, "loss": 0.986, "step": 32550 }, { "epoch": 0.23569096686862545, "grad_norm": 0.1816016137599945, "learning_rate": 4.764316271797434e-06, "loss": 0.9902, "step": 32560 }, { "epoch": 0.23576335352921163, "grad_norm": 0.17151084542274475, "learning_rate": 4.764243885136847e-06, "loss": 1.0051, "step": 32570 }, { "epoch": 0.23583574018979783, "grad_norm": 0.16930073499679565, "learning_rate": 4.764171498476261e-06, "loss": 0.9954, "step": 32580 }, { "epoch": 0.235908126850384, "grad_norm": 0.16622294485569, "learning_rate": 4.764099111815675e-06, "loss": 0.9795, "step": 32590 }, { "epoch": 0.23598051351097019, "grad_norm": 0.1712275892496109, "learning_rate": 4.764026725155089e-06, "loss": 0.978, "step": 32600 }, { "epoch": 0.2360529001715564, "grad_norm": 0.17688055336475372, "learning_rate": 4.763954338494503e-06, "loss": 0.9731, "step": 32610 }, { "epoch": 0.23612528683214257, "grad_norm": 0.1737777143716812, "learning_rate": 4.763881951833916e-06, "loss": 0.9967, "step": 32620 }, { "epoch": 0.23619767349272877, "grad_norm": 0.2481534481048584, "learning_rate": 4.76380956517333e-06, "loss": 0.9943, "step": 32630 }, { "epoch": 0.23627006015331495, "grad_norm": 0.1767176389694214, "learning_rate": 4.7637371785127436e-06, "loss": 0.9884, "step": 32640 }, { "epoch": 0.23634244681390112, "grad_norm": 0.17765220999717712, "learning_rate": 4.763664791852158e-06, "loss": 0.9768, "step": 32650 }, { "epoch": 0.23641483347448733, "grad_norm": 0.17526815831661224, "learning_rate": 4.763592405191572e-06, "loss": 0.9778, "step": 32660 }, { "epoch": 0.2364872201350735, "grad_norm": 0.22117316722869873, "learning_rate": 4.763520018530985e-06, "loss": 0.9754, "step": 32670 }, { "epoch": 0.23655960679565968, "grad_norm": 0.17404435575008392, "learning_rate": 4.763447631870399e-06, "loss": 0.9928, "step": 32680 }, { "epoch": 0.2366319934562459, "grad_norm": 0.17994847893714905, "learning_rate": 4.763375245209813e-06, "loss": 0.9899, "step": 32690 }, { "epoch": 0.23670438011683206, "grad_norm": 0.2047555297613144, "learning_rate": 4.763302858549227e-06, "loss": 0.986, "step": 32700 }, { "epoch": 0.23677676677741827, "grad_norm": 0.19384850561618805, "learning_rate": 4.7632304718886406e-06, "loss": 0.9829, "step": 32710 }, { "epoch": 0.23684915343800444, "grad_norm": 0.18670782446861267, "learning_rate": 4.763158085228054e-06, "loss": 0.979, "step": 32720 }, { "epoch": 0.23692154009859062, "grad_norm": 0.1731204390525818, "learning_rate": 4.763085698567469e-06, "loss": 0.9934, "step": 32730 }, { "epoch": 0.23699392675917683, "grad_norm": 0.1660483479499817, "learning_rate": 4.763013311906882e-06, "loss": 0.9775, "step": 32740 }, { "epoch": 0.237066313419763, "grad_norm": 0.1656530648469925, "learning_rate": 4.762940925246296e-06, "loss": 0.9899, "step": 32750 }, { "epoch": 0.23713870008034918, "grad_norm": 0.16526754200458527, "learning_rate": 4.7628685385857095e-06, "loss": 0.9766, "step": 32760 }, { "epoch": 0.23721108674093538, "grad_norm": 0.16604174673557281, "learning_rate": 4.762796151925124e-06, "loss": 0.9883, "step": 32770 }, { "epoch": 0.23728347340152156, "grad_norm": 0.1578882932662964, "learning_rate": 4.7627237652645376e-06, "loss": 0.9888, "step": 32780 }, { "epoch": 0.23735586006210777, "grad_norm": 0.1657019555568695, "learning_rate": 4.762651378603951e-06, "loss": 0.9808, "step": 32790 }, { "epoch": 0.23742824672269394, "grad_norm": 0.2883431017398834, "learning_rate": 4.762578991943365e-06, "loss": 0.9934, "step": 32800 }, { "epoch": 0.23750063338328012, "grad_norm": 0.17559395730495453, "learning_rate": 4.762506605282779e-06, "loss": 0.9899, "step": 32810 }, { "epoch": 0.23757302004386632, "grad_norm": 0.16590864956378937, "learning_rate": 4.762434218622193e-06, "loss": 0.9745, "step": 32820 }, { "epoch": 0.2376454067044525, "grad_norm": 0.17122603952884674, "learning_rate": 4.7623618319616065e-06, "loss": 0.9744, "step": 32830 }, { "epoch": 0.2377177933650387, "grad_norm": 0.16767603158950806, "learning_rate": 4.76228944530102e-06, "loss": 0.9875, "step": 32840 }, { "epoch": 0.23779018002562488, "grad_norm": 0.16585096716880798, "learning_rate": 4.7622170586404346e-06, "loss": 0.9873, "step": 32850 }, { "epoch": 0.23786256668621106, "grad_norm": 0.16618821024894714, "learning_rate": 4.762144671979848e-06, "loss": 0.9841, "step": 32860 }, { "epoch": 0.23793495334679726, "grad_norm": 0.18130016326904297, "learning_rate": 4.762072285319262e-06, "loss": 0.9948, "step": 32870 }, { "epoch": 0.23800734000738344, "grad_norm": 0.16200359165668488, "learning_rate": 4.7619998986586754e-06, "loss": 0.9913, "step": 32880 }, { "epoch": 0.23807972666796962, "grad_norm": 0.16042807698249817, "learning_rate": 4.76192751199809e-06, "loss": 0.9835, "step": 32890 }, { "epoch": 0.23815211332855582, "grad_norm": 0.1726037710905075, "learning_rate": 4.7618551253375035e-06, "loss": 0.9665, "step": 32900 }, { "epoch": 0.238224499989142, "grad_norm": 0.16508442163467407, "learning_rate": 4.761782738676917e-06, "loss": 1.0037, "step": 32910 }, { "epoch": 0.2382968866497282, "grad_norm": 0.16878628730773926, "learning_rate": 4.761710352016331e-06, "loss": 0.9897, "step": 32920 }, { "epoch": 0.23836927331031438, "grad_norm": 0.17071186006069183, "learning_rate": 4.761637965355745e-06, "loss": 1.0066, "step": 32930 }, { "epoch": 0.23844165997090055, "grad_norm": 0.16093328595161438, "learning_rate": 4.761565578695159e-06, "loss": 0.9938, "step": 32940 }, { "epoch": 0.23851404663148676, "grad_norm": 0.181466743350029, "learning_rate": 4.7614931920345724e-06, "loss": 0.9863, "step": 32950 }, { "epoch": 0.23858643329207294, "grad_norm": 0.19711147248744965, "learning_rate": 4.761420805373986e-06, "loss": 0.9867, "step": 32960 }, { "epoch": 0.2386588199526591, "grad_norm": 0.17717312276363373, "learning_rate": 4.7613484187134005e-06, "loss": 0.9923, "step": 32970 }, { "epoch": 0.23873120661324532, "grad_norm": 0.17629402875900269, "learning_rate": 4.761276032052814e-06, "loss": 1.0001, "step": 32980 }, { "epoch": 0.2388035932738315, "grad_norm": 0.16870364546775818, "learning_rate": 4.761203645392227e-06, "loss": 0.9904, "step": 32990 }, { "epoch": 0.2388759799344177, "grad_norm": 0.17565900087356567, "learning_rate": 4.761131258731641e-06, "loss": 0.9871, "step": 33000 }, { "epoch": 0.23894836659500387, "grad_norm": 0.20653510093688965, "learning_rate": 4.761058872071055e-06, "loss": 0.9983, "step": 33010 }, { "epoch": 0.23902075325559005, "grad_norm": 0.165169820189476, "learning_rate": 4.760986485410469e-06, "loss": 0.9812, "step": 33020 }, { "epoch": 0.23909313991617626, "grad_norm": 0.29448917508125305, "learning_rate": 4.760914098749882e-06, "loss": 0.9681, "step": 33030 }, { "epoch": 0.23916552657676243, "grad_norm": 0.1737428605556488, "learning_rate": 4.760841712089297e-06, "loss": 0.9895, "step": 33040 }, { "epoch": 0.2392379132373486, "grad_norm": 0.15930554270744324, "learning_rate": 4.76076932542871e-06, "loss": 0.9923, "step": 33050 }, { "epoch": 0.23931029989793481, "grad_norm": 0.15467441082000732, "learning_rate": 4.760696938768124e-06, "loss": 0.9791, "step": 33060 }, { "epoch": 0.239382686558521, "grad_norm": 0.16547755897045135, "learning_rate": 4.7606245521075375e-06, "loss": 0.9836, "step": 33070 }, { "epoch": 0.2394550732191072, "grad_norm": 0.1736575812101364, "learning_rate": 4.760552165446952e-06, "loss": 0.9818, "step": 33080 }, { "epoch": 0.23952745987969337, "grad_norm": 0.17099529504776, "learning_rate": 4.760479778786366e-06, "loss": 0.9936, "step": 33090 }, { "epoch": 0.23959984654027955, "grad_norm": 0.16323433816432953, "learning_rate": 4.760407392125779e-06, "loss": 0.9748, "step": 33100 }, { "epoch": 0.23967223320086575, "grad_norm": 0.174141988158226, "learning_rate": 4.760335005465193e-06, "loss": 0.9935, "step": 33110 }, { "epoch": 0.23974461986145193, "grad_norm": 0.17322736978530884, "learning_rate": 4.760262618804607e-06, "loss": 0.9852, "step": 33120 }, { "epoch": 0.2398170065220381, "grad_norm": 0.1788867712020874, "learning_rate": 4.760190232144021e-06, "loss": 0.9934, "step": 33130 }, { "epoch": 0.2398893931826243, "grad_norm": 0.1551644206047058, "learning_rate": 4.7601178454834345e-06, "loss": 0.9858, "step": 33140 }, { "epoch": 0.2399617798432105, "grad_norm": 0.16504356265068054, "learning_rate": 4.760045458822848e-06, "loss": 0.974, "step": 33150 }, { "epoch": 0.2400341665037967, "grad_norm": 0.16401821374893188, "learning_rate": 4.759973072162263e-06, "loss": 0.9849, "step": 33160 }, { "epoch": 0.24010655316438287, "grad_norm": 0.1985578089952469, "learning_rate": 4.759900685501676e-06, "loss": 0.9823, "step": 33170 }, { "epoch": 0.24017893982496905, "grad_norm": 0.1741316169500351, "learning_rate": 4.75982829884109e-06, "loss": 0.9926, "step": 33180 }, { "epoch": 0.24025132648555525, "grad_norm": 0.17418281733989716, "learning_rate": 4.7597559121805035e-06, "loss": 0.9871, "step": 33190 }, { "epoch": 0.24032371314614143, "grad_norm": 0.16395236551761627, "learning_rate": 4.759683525519918e-06, "loss": 0.9792, "step": 33200 }, { "epoch": 0.2403960998067276, "grad_norm": 0.1797424554824829, "learning_rate": 4.7596111388593315e-06, "loss": 0.9794, "step": 33210 }, { "epoch": 0.2404684864673138, "grad_norm": 0.15799680352210999, "learning_rate": 4.759538752198745e-06, "loss": 0.9801, "step": 33220 }, { "epoch": 0.24054087312789998, "grad_norm": 0.1746169626712799, "learning_rate": 4.759466365538159e-06, "loss": 0.9923, "step": 33230 }, { "epoch": 0.2406132597884862, "grad_norm": 0.16156165301799774, "learning_rate": 4.759393978877573e-06, "loss": 0.9762, "step": 33240 }, { "epoch": 0.24068564644907237, "grad_norm": 0.1618238091468811, "learning_rate": 4.759321592216987e-06, "loss": 0.9839, "step": 33250 }, { "epoch": 0.24075803310965854, "grad_norm": 0.17595741152763367, "learning_rate": 4.7592492055564005e-06, "loss": 0.9902, "step": 33260 }, { "epoch": 0.24083041977024475, "grad_norm": 0.16890239715576172, "learning_rate": 4.759176818895814e-06, "loss": 0.9763, "step": 33270 }, { "epoch": 0.24090280643083092, "grad_norm": 0.17769628763198853, "learning_rate": 4.759104432235228e-06, "loss": 0.9644, "step": 33280 }, { "epoch": 0.2409751930914171, "grad_norm": 0.17169831693172455, "learning_rate": 4.759032045574642e-06, "loss": 0.9999, "step": 33290 }, { "epoch": 0.2410475797520033, "grad_norm": 0.16996458172798157, "learning_rate": 4.758959658914056e-06, "loss": 0.9833, "step": 33300 }, { "epoch": 0.24111996641258948, "grad_norm": 0.1796058863401413, "learning_rate": 4.758887272253469e-06, "loss": 0.9757, "step": 33310 }, { "epoch": 0.24119235307317569, "grad_norm": 0.17819707095623016, "learning_rate": 4.758814885592883e-06, "loss": 0.9874, "step": 33320 }, { "epoch": 0.24126473973376186, "grad_norm": 0.16091227531433105, "learning_rate": 4.7587424989322975e-06, "loss": 0.9809, "step": 33330 }, { "epoch": 0.24133712639434804, "grad_norm": 0.17495249211788177, "learning_rate": 4.758670112271711e-06, "loss": 0.9992, "step": 33340 }, { "epoch": 0.24140951305493424, "grad_norm": 0.16773755848407745, "learning_rate": 4.758597725611125e-06, "loss": 0.9884, "step": 33350 }, { "epoch": 0.24148189971552042, "grad_norm": 0.23348310589790344, "learning_rate": 4.758525338950538e-06, "loss": 0.9905, "step": 33360 }, { "epoch": 0.2415542863761066, "grad_norm": 0.1686268001794815, "learning_rate": 4.758452952289953e-06, "loss": 1.0038, "step": 33370 }, { "epoch": 0.2416266730366928, "grad_norm": 0.15924613177776337, "learning_rate": 4.758380565629366e-06, "loss": 0.9911, "step": 33380 }, { "epoch": 0.24169905969727898, "grad_norm": 0.19010992348194122, "learning_rate": 4.75830817896878e-06, "loss": 0.9824, "step": 33390 }, { "epoch": 0.24177144635786518, "grad_norm": 0.18180611729621887, "learning_rate": 4.758235792308194e-06, "loss": 0.9813, "step": 33400 }, { "epoch": 0.24184383301845136, "grad_norm": 0.1697949916124344, "learning_rate": 4.758163405647608e-06, "loss": 0.9783, "step": 33410 }, { "epoch": 0.24191621967903754, "grad_norm": 0.1699085682630539, "learning_rate": 4.758091018987022e-06, "loss": 0.9754, "step": 33420 }, { "epoch": 0.24198860633962374, "grad_norm": 0.18635696172714233, "learning_rate": 4.758018632326435e-06, "loss": 0.9779, "step": 33430 }, { "epoch": 0.24206099300020992, "grad_norm": 0.17529450356960297, "learning_rate": 4.757946245665849e-06, "loss": 0.9768, "step": 33440 }, { "epoch": 0.24213337966079612, "grad_norm": 0.1667739450931549, "learning_rate": 4.757873859005263e-06, "loss": 0.9699, "step": 33450 }, { "epoch": 0.2422057663213823, "grad_norm": 0.17115871608257294, "learning_rate": 4.757801472344677e-06, "loss": 0.9964, "step": 33460 }, { "epoch": 0.24227815298196848, "grad_norm": 0.19105277955532074, "learning_rate": 4.757729085684091e-06, "loss": 0.9917, "step": 33470 }, { "epoch": 0.24235053964255468, "grad_norm": 0.16427116096019745, "learning_rate": 4.757656699023504e-06, "loss": 0.9769, "step": 33480 }, { "epoch": 0.24242292630314086, "grad_norm": 0.17592819035053253, "learning_rate": 4.757584312362919e-06, "loss": 0.9819, "step": 33490 }, { "epoch": 0.24249531296372703, "grad_norm": 0.22349461913108826, "learning_rate": 4.757511925702332e-06, "loss": 0.9884, "step": 33500 }, { "epoch": 0.24256769962431324, "grad_norm": 0.15861837565898895, "learning_rate": 4.757439539041746e-06, "loss": 0.9811, "step": 33510 }, { "epoch": 0.24264008628489941, "grad_norm": 0.1849350929260254, "learning_rate": 4.7573671523811596e-06, "loss": 0.9861, "step": 33520 }, { "epoch": 0.24271247294548562, "grad_norm": 0.18118155002593994, "learning_rate": 4.757294765720573e-06, "loss": 0.9915, "step": 33530 }, { "epoch": 0.2427848596060718, "grad_norm": 0.1626245677471161, "learning_rate": 4.757222379059987e-06, "loss": 1.0021, "step": 33540 }, { "epoch": 0.24285724626665797, "grad_norm": 0.1634633094072342, "learning_rate": 4.7571499923994e-06, "loss": 0.982, "step": 33550 }, { "epoch": 0.24292963292724418, "grad_norm": 0.23890525102615356, "learning_rate": 4.757077605738815e-06, "loss": 0.9864, "step": 33560 }, { "epoch": 0.24300201958783035, "grad_norm": 0.1567389816045761, "learning_rate": 4.7570052190782285e-06, "loss": 0.9626, "step": 33570 }, { "epoch": 0.24307440624841653, "grad_norm": 0.16346383094787598, "learning_rate": 4.756932832417642e-06, "loss": 0.9792, "step": 33580 }, { "epoch": 0.24314679290900273, "grad_norm": 0.1675373762845993, "learning_rate": 4.756860445757056e-06, "loss": 0.9689, "step": 33590 }, { "epoch": 0.2432191795695889, "grad_norm": 0.1947060525417328, "learning_rate": 4.75678805909647e-06, "loss": 0.9719, "step": 33600 }, { "epoch": 0.24329156623017512, "grad_norm": 0.17084960639476776, "learning_rate": 4.756715672435884e-06, "loss": 0.9709, "step": 33610 }, { "epoch": 0.2433639528907613, "grad_norm": 0.16235877573490143, "learning_rate": 4.756643285775297e-06, "loss": 0.9808, "step": 33620 }, { "epoch": 0.24343633955134747, "grad_norm": 0.16720472276210785, "learning_rate": 4.756570899114711e-06, "loss": 0.9817, "step": 33630 }, { "epoch": 0.24350872621193367, "grad_norm": 0.16974382102489471, "learning_rate": 4.7564985124541255e-06, "loss": 0.9774, "step": 33640 }, { "epoch": 0.24358111287251985, "grad_norm": 0.16924746334552765, "learning_rate": 4.756426125793539e-06, "loss": 0.9672, "step": 33650 }, { "epoch": 0.24365349953310603, "grad_norm": 0.19946692883968353, "learning_rate": 4.756353739132953e-06, "loss": 1.0038, "step": 33660 }, { "epoch": 0.24372588619369223, "grad_norm": 0.17359711229801178, "learning_rate": 4.756281352472366e-06, "loss": 0.9763, "step": 33670 }, { "epoch": 0.2437982728542784, "grad_norm": 0.1743346005678177, "learning_rate": 4.756208965811781e-06, "loss": 0.9859, "step": 33680 }, { "epoch": 0.2438706595148646, "grad_norm": 0.1658143252134323, "learning_rate": 4.7561365791511944e-06, "loss": 1.0053, "step": 33690 }, { "epoch": 0.2439430461754508, "grad_norm": 0.17681175470352173, "learning_rate": 4.756064192490608e-06, "loss": 0.9842, "step": 33700 }, { "epoch": 0.24401543283603697, "grad_norm": 0.17499324679374695, "learning_rate": 4.755991805830022e-06, "loss": 0.9822, "step": 33710 }, { "epoch": 0.24408781949662317, "grad_norm": 0.15076082944869995, "learning_rate": 4.755919419169436e-06, "loss": 0.97, "step": 33720 }, { "epoch": 0.24416020615720935, "grad_norm": 0.17643196880817413, "learning_rate": 4.75584703250885e-06, "loss": 0.9888, "step": 33730 }, { "epoch": 0.24423259281779552, "grad_norm": 0.24239002168178558, "learning_rate": 4.755774645848263e-06, "loss": 0.9886, "step": 33740 }, { "epoch": 0.24430497947838173, "grad_norm": 0.16595035791397095, "learning_rate": 4.755702259187677e-06, "loss": 0.991, "step": 33750 }, { "epoch": 0.2443773661389679, "grad_norm": 0.1724718064069748, "learning_rate": 4.7556298725270914e-06, "loss": 0.9744, "step": 33760 }, { "epoch": 0.2444497527995541, "grad_norm": 0.17158448696136475, "learning_rate": 4.755557485866505e-06, "loss": 0.9853, "step": 33770 }, { "epoch": 0.2445221394601403, "grad_norm": 0.23816993832588196, "learning_rate": 4.755485099205919e-06, "loss": 1.0021, "step": 33780 }, { "epoch": 0.24459452612072646, "grad_norm": 0.16831299662590027, "learning_rate": 4.755412712545332e-06, "loss": 0.9886, "step": 33790 }, { "epoch": 0.24466691278131267, "grad_norm": 0.1661224067211151, "learning_rate": 4.755340325884747e-06, "loss": 1.002, "step": 33800 }, { "epoch": 0.24473929944189884, "grad_norm": 0.16861529648303986, "learning_rate": 4.75526793922416e-06, "loss": 0.9858, "step": 33810 }, { "epoch": 0.24481168610248502, "grad_norm": 0.17031748592853546, "learning_rate": 4.755195552563574e-06, "loss": 0.9806, "step": 33820 }, { "epoch": 0.24488407276307123, "grad_norm": 0.17578351497650146, "learning_rate": 4.755123165902988e-06, "loss": 0.9785, "step": 33830 }, { "epoch": 0.2449564594236574, "grad_norm": 0.18001072108745575, "learning_rate": 4.755050779242402e-06, "loss": 0.9939, "step": 33840 }, { "epoch": 0.2450288460842436, "grad_norm": 0.18239019811153412, "learning_rate": 4.754978392581816e-06, "loss": 0.9748, "step": 33850 }, { "epoch": 0.24510123274482978, "grad_norm": 0.17754653096199036, "learning_rate": 4.754906005921229e-06, "loss": 0.989, "step": 33860 }, { "epoch": 0.24517361940541596, "grad_norm": 0.1725796014070511, "learning_rate": 4.754833619260643e-06, "loss": 0.9888, "step": 33870 }, { "epoch": 0.24524600606600216, "grad_norm": 0.17696940898895264, "learning_rate": 4.754761232600057e-06, "loss": 0.9773, "step": 33880 }, { "epoch": 0.24531839272658834, "grad_norm": 0.18559856712818146, "learning_rate": 4.754688845939471e-06, "loss": 0.9736, "step": 33890 }, { "epoch": 0.24539077938717452, "grad_norm": 0.17658168077468872, "learning_rate": 4.754616459278885e-06, "loss": 1.0012, "step": 33900 }, { "epoch": 0.24546316604776072, "grad_norm": 0.15067683160305023, "learning_rate": 4.754544072618298e-06, "loss": 0.982, "step": 33910 }, { "epoch": 0.2455355527083469, "grad_norm": 0.18849797546863556, "learning_rate": 4.754471685957712e-06, "loss": 0.9798, "step": 33920 }, { "epoch": 0.2456079393689331, "grad_norm": 0.16818571090698242, "learning_rate": 4.754399299297126e-06, "loss": 0.9772, "step": 33930 }, { "epoch": 0.24568032602951928, "grad_norm": 0.17547225952148438, "learning_rate": 4.75432691263654e-06, "loss": 0.9829, "step": 33940 }, { "epoch": 0.24575271269010546, "grad_norm": 0.17536889016628265, "learning_rate": 4.7542545259759535e-06, "loss": 0.9678, "step": 33950 }, { "epoch": 0.24582509935069166, "grad_norm": 0.17033427953720093, "learning_rate": 4.754182139315367e-06, "loss": 0.9967, "step": 33960 }, { "epoch": 0.24589748601127784, "grad_norm": 0.18375976383686066, "learning_rate": 4.754109752654782e-06, "loss": 0.9946, "step": 33970 }, { "epoch": 0.24596987267186404, "grad_norm": 0.15287001430988312, "learning_rate": 4.754037365994195e-06, "loss": 0.9848, "step": 33980 }, { "epoch": 0.24604225933245022, "grad_norm": 0.17237313091754913, "learning_rate": 4.753964979333609e-06, "loss": 0.9876, "step": 33990 }, { "epoch": 0.2461146459930364, "grad_norm": 0.16331906616687775, "learning_rate": 4.7538925926730225e-06, "loss": 0.9934, "step": 34000 }, { "epoch": 0.2461870326536226, "grad_norm": 0.17287370562553406, "learning_rate": 4.753820206012437e-06, "loss": 0.9888, "step": 34010 }, { "epoch": 0.24625941931420878, "grad_norm": 0.16767475008964539, "learning_rate": 4.7537478193518505e-06, "loss": 0.9969, "step": 34020 }, { "epoch": 0.24633180597479495, "grad_norm": 0.1597083956003189, "learning_rate": 4.753675432691264e-06, "loss": 0.9748, "step": 34030 }, { "epoch": 0.24640419263538116, "grad_norm": 0.17577607929706573, "learning_rate": 4.753603046030678e-06, "loss": 0.9894, "step": 34040 }, { "epoch": 0.24647657929596734, "grad_norm": 0.1721176654100418, "learning_rate": 4.753530659370091e-06, "loss": 0.9785, "step": 34050 }, { "epoch": 0.24654896595655354, "grad_norm": 0.17841075360774994, "learning_rate": 4.753458272709505e-06, "loss": 0.9734, "step": 34060 }, { "epoch": 0.24662135261713972, "grad_norm": 0.18525052070617676, "learning_rate": 4.753385886048919e-06, "loss": 0.9842, "step": 34070 }, { "epoch": 0.2466937392777259, "grad_norm": 0.18916094303131104, "learning_rate": 4.753313499388333e-06, "loss": 0.9919, "step": 34080 }, { "epoch": 0.2467661259383121, "grad_norm": 0.21833638846874237, "learning_rate": 4.753241112727747e-06, "loss": 0.9952, "step": 34090 }, { "epoch": 0.24683851259889827, "grad_norm": 0.1600702404975891, "learning_rate": 4.75316872606716e-06, "loss": 0.9731, "step": 34100 }, { "epoch": 0.24691089925948445, "grad_norm": 0.16110806167125702, "learning_rate": 4.753096339406574e-06, "loss": 0.9743, "step": 34110 }, { "epoch": 0.24698328592007066, "grad_norm": 0.16948141157627106, "learning_rate": 4.753023952745988e-06, "loss": 0.9904, "step": 34120 }, { "epoch": 0.24705567258065683, "grad_norm": 0.16437900066375732, "learning_rate": 4.752951566085402e-06, "loss": 0.9714, "step": 34130 }, { "epoch": 0.24712805924124304, "grad_norm": 0.1707761436700821, "learning_rate": 4.752879179424816e-06, "loss": 0.9948, "step": 34140 }, { "epoch": 0.2472004459018292, "grad_norm": 0.17766614258289337, "learning_rate": 4.752806792764229e-06, "loss": 0.9811, "step": 34150 }, { "epoch": 0.2472728325624154, "grad_norm": 0.16982759535312653, "learning_rate": 4.752734406103644e-06, "loss": 0.988, "step": 34160 }, { "epoch": 0.2473452192230016, "grad_norm": 0.1820429265499115, "learning_rate": 4.752662019443057e-06, "loss": 0.9801, "step": 34170 }, { "epoch": 0.24741760588358777, "grad_norm": 0.17136429250240326, "learning_rate": 4.752589632782471e-06, "loss": 0.975, "step": 34180 }, { "epoch": 0.24748999254417395, "grad_norm": 0.17014792561531067, "learning_rate": 4.7525172461218846e-06, "loss": 0.985, "step": 34190 }, { "epoch": 0.24756237920476015, "grad_norm": 0.17695526778697968, "learning_rate": 4.752444859461299e-06, "loss": 0.9866, "step": 34200 }, { "epoch": 0.24763476586534633, "grad_norm": 0.18461161851882935, "learning_rate": 4.752372472800713e-06, "loss": 0.9922, "step": 34210 }, { "epoch": 0.24770715252593253, "grad_norm": 0.23707006871700287, "learning_rate": 4.752300086140126e-06, "loss": 0.9964, "step": 34220 }, { "epoch": 0.2477795391865187, "grad_norm": 0.16921405494213104, "learning_rate": 4.75222769947954e-06, "loss": 0.9831, "step": 34230 }, { "epoch": 0.2478519258471049, "grad_norm": 0.18006683886051178, "learning_rate": 4.752155312818954e-06, "loss": 0.9915, "step": 34240 }, { "epoch": 0.2479243125076911, "grad_norm": 0.1586627960205078, "learning_rate": 4.752082926158368e-06, "loss": 0.9765, "step": 34250 }, { "epoch": 0.24799669916827727, "grad_norm": 0.17502956092357635, "learning_rate": 4.7520105394977816e-06, "loss": 0.9833, "step": 34260 }, { "epoch": 0.24806908582886344, "grad_norm": 0.17137262225151062, "learning_rate": 4.751938152837195e-06, "loss": 0.9813, "step": 34270 }, { "epoch": 0.24814147248944965, "grad_norm": 0.1806812733411789, "learning_rate": 4.75186576617661e-06, "loss": 0.9949, "step": 34280 }, { "epoch": 0.24821385915003583, "grad_norm": 0.17505182325839996, "learning_rate": 4.751793379516023e-06, "loss": 0.9949, "step": 34290 }, { "epoch": 0.24828624581062203, "grad_norm": 0.17659908533096313, "learning_rate": 4.751720992855437e-06, "loss": 0.9938, "step": 34300 }, { "epoch": 0.2483586324712082, "grad_norm": 0.18105976283550262, "learning_rate": 4.7516486061948505e-06, "loss": 0.9778, "step": 34310 }, { "epoch": 0.24843101913179438, "grad_norm": 0.18048040568828583, "learning_rate": 4.751576219534265e-06, "loss": 0.9918, "step": 34320 }, { "epoch": 0.2485034057923806, "grad_norm": 0.17734596133232117, "learning_rate": 4.7515038328736786e-06, "loss": 0.9889, "step": 34330 }, { "epoch": 0.24857579245296677, "grad_norm": 0.16741421818733215, "learning_rate": 4.751431446213092e-06, "loss": 0.9854, "step": 34340 }, { "epoch": 0.24864817911355294, "grad_norm": 0.17737902700901031, "learning_rate": 4.751359059552506e-06, "loss": 0.9793, "step": 34350 }, { "epoch": 0.24872056577413915, "grad_norm": 0.1893605887889862, "learning_rate": 4.75128667289192e-06, "loss": 0.9778, "step": 34360 }, { "epoch": 0.24879295243472532, "grad_norm": 0.16508178412914276, "learning_rate": 4.751214286231334e-06, "loss": 0.9873, "step": 34370 }, { "epoch": 0.24886533909531153, "grad_norm": 0.1740712970495224, "learning_rate": 4.7511418995707475e-06, "loss": 0.9862, "step": 34380 }, { "epoch": 0.2489377257558977, "grad_norm": 0.1603364646434784, "learning_rate": 4.751069512910161e-06, "loss": 0.978, "step": 34390 }, { "epoch": 0.24901011241648388, "grad_norm": 0.18249155580997467, "learning_rate": 4.750997126249576e-06, "loss": 0.9909, "step": 34400 }, { "epoch": 0.24908249907707009, "grad_norm": 0.1699957698583603, "learning_rate": 4.750924739588989e-06, "loss": 0.9915, "step": 34410 }, { "epoch": 0.24915488573765626, "grad_norm": 0.17712600529193878, "learning_rate": 4.750852352928403e-06, "loss": 0.976, "step": 34420 }, { "epoch": 0.24922727239824244, "grad_norm": 0.17744386196136475, "learning_rate": 4.7507799662678164e-06, "loss": 0.9728, "step": 34430 }, { "epoch": 0.24929965905882864, "grad_norm": 0.15788348019123077, "learning_rate": 4.750707579607231e-06, "loss": 0.9756, "step": 34440 }, { "epoch": 0.24937204571941482, "grad_norm": 0.1607343703508377, "learning_rate": 4.7506351929466445e-06, "loss": 0.9801, "step": 34450 }, { "epoch": 0.24944443238000102, "grad_norm": 0.18192967772483826, "learning_rate": 4.750562806286058e-06, "loss": 0.992, "step": 34460 }, { "epoch": 0.2495168190405872, "grad_norm": 0.17542362213134766, "learning_rate": 4.750490419625472e-06, "loss": 0.9864, "step": 34470 }, { "epoch": 0.24958920570117338, "grad_norm": 0.17715929448604584, "learning_rate": 4.750418032964886e-06, "loss": 0.9606, "step": 34480 }, { "epoch": 0.24966159236175958, "grad_norm": 0.16454724967479706, "learning_rate": 4.7503456463043e-06, "loss": 0.9867, "step": 34490 }, { "epoch": 0.24973397902234576, "grad_norm": 0.1616222858428955, "learning_rate": 4.7502732596437134e-06, "loss": 0.9823, "step": 34500 }, { "epoch": 0.24980636568293194, "grad_norm": 0.21941696107387543, "learning_rate": 4.750200872983127e-06, "loss": 0.9895, "step": 34510 }, { "epoch": 0.24987875234351814, "grad_norm": 0.20772811770439148, "learning_rate": 4.750128486322541e-06, "loss": 0.9953, "step": 34520 }, { "epoch": 0.24995113900410432, "grad_norm": 0.172512024641037, "learning_rate": 4.750056099661955e-06, "loss": 0.9808, "step": 34530 }, { "epoch": 0.2500235256646905, "grad_norm": 0.1570970118045807, "learning_rate": 4.749983713001369e-06, "loss": 0.9893, "step": 34540 }, { "epoch": 0.25009591232527667, "grad_norm": 0.1649353802204132, "learning_rate": 4.749911326340782e-06, "loss": 0.9848, "step": 34550 }, { "epoch": 0.2501682989858629, "grad_norm": 0.18175838887691498, "learning_rate": 4.749838939680196e-06, "loss": 0.9766, "step": 34560 }, { "epoch": 0.2502406856464491, "grad_norm": 0.16872644424438477, "learning_rate": 4.7497665530196104e-06, "loss": 0.9747, "step": 34570 }, { "epoch": 0.25031307230703526, "grad_norm": 0.16489218175411224, "learning_rate": 4.749694166359023e-06, "loss": 0.9861, "step": 34580 }, { "epoch": 0.25038545896762143, "grad_norm": 0.15996752679347992, "learning_rate": 4.749621779698438e-06, "loss": 0.9866, "step": 34590 }, { "epoch": 0.2504578456282076, "grad_norm": 0.1725120097398758, "learning_rate": 4.749549393037851e-06, "loss": 0.9784, "step": 34600 }, { "epoch": 0.25053023228879384, "grad_norm": 0.167087122797966, "learning_rate": 4.749477006377265e-06, "loss": 0.9864, "step": 34610 }, { "epoch": 0.25060261894938, "grad_norm": 0.1733073741197586, "learning_rate": 4.7494046197166785e-06, "loss": 0.964, "step": 34620 }, { "epoch": 0.2506750056099662, "grad_norm": 0.1666453331708908, "learning_rate": 4.749332233056093e-06, "loss": 0.9815, "step": 34630 }, { "epoch": 0.25074739227055237, "grad_norm": 0.2046760767698288, "learning_rate": 4.749259846395507e-06, "loss": 0.974, "step": 34640 }, { "epoch": 0.25081977893113855, "grad_norm": 0.1714048534631729, "learning_rate": 4.74918745973492e-06, "loss": 0.9842, "step": 34650 }, { "epoch": 0.2508921655917248, "grad_norm": 0.17349305748939514, "learning_rate": 4.749115073074334e-06, "loss": 0.9822, "step": 34660 }, { "epoch": 0.25096455225231096, "grad_norm": 0.1701555848121643, "learning_rate": 4.749042686413748e-06, "loss": 0.9751, "step": 34670 }, { "epoch": 0.25103693891289713, "grad_norm": 0.1766299158334732, "learning_rate": 4.748970299753162e-06, "loss": 0.9783, "step": 34680 }, { "epoch": 0.2511093255734833, "grad_norm": 0.16215893626213074, "learning_rate": 4.7488979130925755e-06, "loss": 0.9798, "step": 34690 }, { "epoch": 0.2511817122340695, "grad_norm": 0.16153815388679504, "learning_rate": 4.748825526431989e-06, "loss": 0.9638, "step": 34700 }, { "epoch": 0.2512540988946557, "grad_norm": 0.17262734472751617, "learning_rate": 4.748753139771403e-06, "loss": 0.9585, "step": 34710 }, { "epoch": 0.2513264855552419, "grad_norm": 0.1748388707637787, "learning_rate": 4.748680753110817e-06, "loss": 0.9755, "step": 34720 }, { "epoch": 0.2513988722158281, "grad_norm": 0.16861870884895325, "learning_rate": 4.748608366450231e-06, "loss": 0.9901, "step": 34730 }, { "epoch": 0.25147125887641425, "grad_norm": 0.1585874855518341, "learning_rate": 4.7485359797896445e-06, "loss": 0.9944, "step": 34740 }, { "epoch": 0.2515436455370004, "grad_norm": 0.16749711334705353, "learning_rate": 4.748463593129058e-06, "loss": 0.967, "step": 34750 }, { "epoch": 0.2516160321975866, "grad_norm": 0.19604967534542084, "learning_rate": 4.7483912064684725e-06, "loss": 0.991, "step": 34760 }, { "epoch": 0.25168841885817284, "grad_norm": 0.16052144765853882, "learning_rate": 4.748318819807886e-06, "loss": 0.9695, "step": 34770 }, { "epoch": 0.251760805518759, "grad_norm": 0.18518640100955963, "learning_rate": 4.7482464331473e-06, "loss": 0.9893, "step": 34780 }, { "epoch": 0.2518331921793452, "grad_norm": 0.18303367495536804, "learning_rate": 4.748174046486713e-06, "loss": 0.9683, "step": 34790 }, { "epoch": 0.25190557883993137, "grad_norm": 0.178907573223114, "learning_rate": 4.748101659826128e-06, "loss": 0.9822, "step": 34800 }, { "epoch": 0.25197796550051754, "grad_norm": 0.1701265573501587, "learning_rate": 4.7480292731655415e-06, "loss": 0.9788, "step": 34810 }, { "epoch": 0.2520503521611038, "grad_norm": 0.15824520587921143, "learning_rate": 4.747956886504955e-06, "loss": 0.9705, "step": 34820 }, { "epoch": 0.25212273882168995, "grad_norm": 0.16767218708992004, "learning_rate": 4.747884499844369e-06, "loss": 0.9993, "step": 34830 }, { "epoch": 0.25219512548227613, "grad_norm": 0.16409531235694885, "learning_rate": 4.747812113183783e-06, "loss": 0.986, "step": 34840 }, { "epoch": 0.2522675121428623, "grad_norm": 0.17260675132274628, "learning_rate": 4.747739726523197e-06, "loss": 0.9677, "step": 34850 }, { "epoch": 0.2523398988034485, "grad_norm": 0.15789243578910828, "learning_rate": 4.74766733986261e-06, "loss": 0.9795, "step": 34860 }, { "epoch": 0.2524122854640347, "grad_norm": 0.9138031005859375, "learning_rate": 4.747594953202024e-06, "loss": 0.9824, "step": 34870 }, { "epoch": 0.2524846721246209, "grad_norm": 0.1509862244129181, "learning_rate": 4.7475225665414385e-06, "loss": 0.9766, "step": 34880 }, { "epoch": 0.25255705878520707, "grad_norm": 0.15754196047782898, "learning_rate": 4.747450179880852e-06, "loss": 0.9781, "step": 34890 }, { "epoch": 0.25262944544579324, "grad_norm": 0.1629686951637268, "learning_rate": 4.747377793220266e-06, "loss": 0.9731, "step": 34900 }, { "epoch": 0.2527018321063794, "grad_norm": 0.17362354695796967, "learning_rate": 4.747305406559679e-06, "loss": 0.9777, "step": 34910 }, { "epoch": 0.2527742187669656, "grad_norm": 0.1758502572774887, "learning_rate": 4.747233019899094e-06, "loss": 0.9851, "step": 34920 }, { "epoch": 0.25284660542755183, "grad_norm": 0.16180376708507538, "learning_rate": 4.747160633238507e-06, "loss": 0.9728, "step": 34930 }, { "epoch": 0.252918992088138, "grad_norm": 0.18018090724945068, "learning_rate": 4.747088246577921e-06, "loss": 0.9682, "step": 34940 }, { "epoch": 0.2529913787487242, "grad_norm": 0.1669149100780487, "learning_rate": 4.747015859917335e-06, "loss": 0.9744, "step": 34950 }, { "epoch": 0.25306376540931036, "grad_norm": 0.16746865212917328, "learning_rate": 4.746943473256749e-06, "loss": 0.986, "step": 34960 }, { "epoch": 0.25313615206989654, "grad_norm": 0.1823040395975113, "learning_rate": 4.746871086596163e-06, "loss": 0.9739, "step": 34970 }, { "epoch": 0.25320853873048277, "grad_norm": 0.17070458829402924, "learning_rate": 4.746798699935576e-06, "loss": 0.9951, "step": 34980 }, { "epoch": 0.25328092539106895, "grad_norm": 0.16862110793590546, "learning_rate": 4.74672631327499e-06, "loss": 0.9872, "step": 34990 }, { "epoch": 0.2533533120516551, "grad_norm": 0.17179900407791138, "learning_rate": 4.746653926614404e-06, "loss": 0.9854, "step": 35000 }, { "epoch": 0.2534256987122413, "grad_norm": 0.17400874197483063, "learning_rate": 4.746581539953818e-06, "loss": 0.9763, "step": 35010 }, { "epoch": 0.2534980853728275, "grad_norm": 0.20990721881389618, "learning_rate": 4.746509153293232e-06, "loss": 0.9752, "step": 35020 }, { "epoch": 0.2535704720334137, "grad_norm": 0.17433054745197296, "learning_rate": 4.746436766632645e-06, "loss": 0.9773, "step": 35030 }, { "epoch": 0.2536428586939999, "grad_norm": 0.17180733382701874, "learning_rate": 4.74636437997206e-06, "loss": 0.9757, "step": 35040 }, { "epoch": 0.25371524535458606, "grad_norm": 0.19720107316970825, "learning_rate": 4.746291993311473e-06, "loss": 0.9789, "step": 35050 }, { "epoch": 0.25378763201517224, "grad_norm": 0.18892458081245422, "learning_rate": 4.746219606650887e-06, "loss": 0.9868, "step": 35060 }, { "epoch": 0.2538600186757584, "grad_norm": 0.18329143524169922, "learning_rate": 4.7461472199903006e-06, "loss": 0.9795, "step": 35070 }, { "epoch": 0.2539324053363446, "grad_norm": 0.17473115026950836, "learning_rate": 4.746074833329715e-06, "loss": 0.9798, "step": 35080 }, { "epoch": 0.2540047919969308, "grad_norm": 0.17442865669727325, "learning_rate": 4.746002446669129e-06, "loss": 0.9803, "step": 35090 }, { "epoch": 0.254077178657517, "grad_norm": 0.17204399406909943, "learning_rate": 4.745930060008542e-06, "loss": 0.9771, "step": 35100 }, { "epoch": 0.2541495653181032, "grad_norm": 0.1870034784078598, "learning_rate": 4.745857673347956e-06, "loss": 0.9718, "step": 35110 }, { "epoch": 0.25422195197868935, "grad_norm": 0.1639021635055542, "learning_rate": 4.7457852866873695e-06, "loss": 0.9805, "step": 35120 }, { "epoch": 0.25429433863927553, "grad_norm": 0.16658686101436615, "learning_rate": 4.745712900026783e-06, "loss": 0.982, "step": 35130 }, { "epoch": 0.25436672529986176, "grad_norm": 0.164857879281044, "learning_rate": 4.745640513366197e-06, "loss": 0.9885, "step": 35140 }, { "epoch": 0.25443911196044794, "grad_norm": 0.18031135201454163, "learning_rate": 4.745568126705611e-06, "loss": 0.9763, "step": 35150 }, { "epoch": 0.2545114986210341, "grad_norm": 0.1674937754869461, "learning_rate": 4.745495740045025e-06, "loss": 0.9891, "step": 35160 }, { "epoch": 0.2545838852816203, "grad_norm": 0.16859376430511475, "learning_rate": 4.7454233533844384e-06, "loss": 0.9738, "step": 35170 }, { "epoch": 0.25465627194220647, "grad_norm": 0.1623847633600235, "learning_rate": 4.745350966723852e-06, "loss": 0.9772, "step": 35180 }, { "epoch": 0.2547286586027927, "grad_norm": 0.25162413716316223, "learning_rate": 4.7452785800632665e-06, "loss": 0.9767, "step": 35190 }, { "epoch": 0.2548010452633789, "grad_norm": 0.16929900646209717, "learning_rate": 4.74520619340268e-06, "loss": 0.9745, "step": 35200 }, { "epoch": 0.25487343192396505, "grad_norm": 0.18202784657478333, "learning_rate": 4.745133806742094e-06, "loss": 0.9777, "step": 35210 }, { "epoch": 0.25494581858455123, "grad_norm": 0.16864082217216492, "learning_rate": 4.745061420081507e-06, "loss": 0.9695, "step": 35220 }, { "epoch": 0.2550182052451374, "grad_norm": 0.18244844675064087, "learning_rate": 4.744989033420922e-06, "loss": 0.9793, "step": 35230 }, { "epoch": 0.25509059190572364, "grad_norm": 0.15838764607906342, "learning_rate": 4.7449166467603354e-06, "loss": 0.9749, "step": 35240 }, { "epoch": 0.2551629785663098, "grad_norm": 0.1680920273065567, "learning_rate": 4.744844260099749e-06, "loss": 0.9788, "step": 35250 }, { "epoch": 0.255235365226896, "grad_norm": 0.1684524267911911, "learning_rate": 4.744771873439163e-06, "loss": 0.9859, "step": 35260 }, { "epoch": 0.25530775188748217, "grad_norm": 0.16353952884674072, "learning_rate": 4.744699486778577e-06, "loss": 0.9842, "step": 35270 }, { "epoch": 0.25538013854806835, "grad_norm": 0.17483973503112793, "learning_rate": 4.744627100117991e-06, "loss": 0.9818, "step": 35280 }, { "epoch": 0.2554525252086545, "grad_norm": 0.16171082854270935, "learning_rate": 4.744554713457404e-06, "loss": 0.9814, "step": 35290 }, { "epoch": 0.25552491186924076, "grad_norm": 0.16629914939403534, "learning_rate": 4.744482326796818e-06, "loss": 0.9932, "step": 35300 }, { "epoch": 0.25559729852982693, "grad_norm": 0.1688779890537262, "learning_rate": 4.744409940136232e-06, "loss": 0.9748, "step": 35310 }, { "epoch": 0.2556696851904131, "grad_norm": 0.18332324922084808, "learning_rate": 4.744337553475646e-06, "loss": 0.983, "step": 35320 }, { "epoch": 0.2557420718509993, "grad_norm": 3.5396459102630615, "learning_rate": 4.74426516681506e-06, "loss": 0.9881, "step": 35330 }, { "epoch": 0.25581445851158546, "grad_norm": 0.16247138381004333, "learning_rate": 4.744192780154473e-06, "loss": 0.9893, "step": 35340 }, { "epoch": 0.2558868451721717, "grad_norm": 0.17081671953201294, "learning_rate": 4.744120393493887e-06, "loss": 0.9899, "step": 35350 }, { "epoch": 0.25595923183275787, "grad_norm": 0.1555139124393463, "learning_rate": 4.744048006833301e-06, "loss": 0.9697, "step": 35360 }, { "epoch": 0.25603161849334405, "grad_norm": 0.19195972383022308, "learning_rate": 4.743975620172715e-06, "loss": 0.9819, "step": 35370 }, { "epoch": 0.2561040051539302, "grad_norm": 0.2612147927284241, "learning_rate": 4.743903233512129e-06, "loss": 0.9856, "step": 35380 }, { "epoch": 0.2561763918145164, "grad_norm": 0.17265565693378448, "learning_rate": 4.743830846851542e-06, "loss": 0.973, "step": 35390 }, { "epoch": 0.25624877847510263, "grad_norm": 0.17444762587547302, "learning_rate": 4.743758460190957e-06, "loss": 0.9811, "step": 35400 }, { "epoch": 0.2563211651356888, "grad_norm": 0.17848478257656097, "learning_rate": 4.74368607353037e-06, "loss": 0.9701, "step": 35410 }, { "epoch": 0.256393551796275, "grad_norm": 0.16675600409507751, "learning_rate": 4.743613686869784e-06, "loss": 0.9836, "step": 35420 }, { "epoch": 0.25646593845686116, "grad_norm": 0.16786222159862518, "learning_rate": 4.7435413002091975e-06, "loss": 0.9816, "step": 35430 }, { "epoch": 0.25653832511744734, "grad_norm": 0.18785636126995087, "learning_rate": 4.743468913548612e-06, "loss": 0.9693, "step": 35440 }, { "epoch": 0.2566107117780335, "grad_norm": 0.16622446477413177, "learning_rate": 4.743396526888026e-06, "loss": 0.9818, "step": 35450 }, { "epoch": 0.25668309843861975, "grad_norm": 0.1868736445903778, "learning_rate": 4.743324140227439e-06, "loss": 0.9806, "step": 35460 }, { "epoch": 0.2567554850992059, "grad_norm": 0.17753900587558746, "learning_rate": 4.743251753566853e-06, "loss": 0.9739, "step": 35470 }, { "epoch": 0.2568278717597921, "grad_norm": 0.18363986909389496, "learning_rate": 4.743179366906267e-06, "loss": 0.9767, "step": 35480 }, { "epoch": 0.2569002584203783, "grad_norm": 0.1745624840259552, "learning_rate": 4.743106980245681e-06, "loss": 0.9889, "step": 35490 }, { "epoch": 0.25697264508096446, "grad_norm": 0.16066277027130127, "learning_rate": 4.7430345935850945e-06, "loss": 0.9692, "step": 35500 }, { "epoch": 0.2570450317415507, "grad_norm": 0.16065356135368347, "learning_rate": 4.742962206924508e-06, "loss": 0.9864, "step": 35510 }, { "epoch": 0.25711741840213687, "grad_norm": 0.1665170043706894, "learning_rate": 4.742889820263923e-06, "loss": 0.979, "step": 35520 }, { "epoch": 0.25718980506272304, "grad_norm": 0.15713395178318024, "learning_rate": 4.742817433603336e-06, "loss": 0.9603, "step": 35530 }, { "epoch": 0.2572621917233092, "grad_norm": 0.18510933220386505, "learning_rate": 4.74274504694275e-06, "loss": 0.9726, "step": 35540 }, { "epoch": 0.2573345783838954, "grad_norm": 0.16515351831912994, "learning_rate": 4.7426726602821635e-06, "loss": 0.9782, "step": 35550 }, { "epoch": 0.25740696504448163, "grad_norm": 0.17086593806743622, "learning_rate": 4.742600273621578e-06, "loss": 0.9634, "step": 35560 }, { "epoch": 0.2574793517050678, "grad_norm": 0.17308638989925385, "learning_rate": 4.7425278869609915e-06, "loss": 0.978, "step": 35570 }, { "epoch": 0.257551738365654, "grad_norm": 0.18150749802589417, "learning_rate": 4.742455500300405e-06, "loss": 0.976, "step": 35580 }, { "epoch": 0.25762412502624016, "grad_norm": 0.1714148074388504, "learning_rate": 4.742383113639819e-06, "loss": 0.9786, "step": 35590 }, { "epoch": 0.25769651168682634, "grad_norm": 0.16008371114730835, "learning_rate": 4.742310726979233e-06, "loss": 0.9789, "step": 35600 }, { "epoch": 0.2577688983474125, "grad_norm": 0.17920182645320892, "learning_rate": 4.742238340318647e-06, "loss": 0.9875, "step": 35610 }, { "epoch": 0.25784128500799874, "grad_norm": 0.1833036094903946, "learning_rate": 4.7421659536580605e-06, "loss": 1.0031, "step": 35620 }, { "epoch": 0.2579136716685849, "grad_norm": 0.17967666685581207, "learning_rate": 4.742093566997474e-06, "loss": 0.9975, "step": 35630 }, { "epoch": 0.2579860583291711, "grad_norm": 0.29100435972213745, "learning_rate": 4.742021180336888e-06, "loss": 0.974, "step": 35640 }, { "epoch": 0.2580584449897573, "grad_norm": 0.1635904461145401, "learning_rate": 4.741948793676301e-06, "loss": 0.9854, "step": 35650 }, { "epoch": 0.25813083165034345, "grad_norm": 0.20104598999023438, "learning_rate": 4.741876407015715e-06, "loss": 0.9847, "step": 35660 }, { "epoch": 0.2582032183109297, "grad_norm": 0.1948116570711136, "learning_rate": 4.741804020355129e-06, "loss": 0.9766, "step": 35670 }, { "epoch": 0.25827560497151586, "grad_norm": 0.18129467964172363, "learning_rate": 4.741731633694543e-06, "loss": 0.9788, "step": 35680 }, { "epoch": 0.25834799163210204, "grad_norm": 0.19102171063423157, "learning_rate": 4.741659247033957e-06, "loss": 0.9826, "step": 35690 }, { "epoch": 0.2584203782926882, "grad_norm": 0.15986351668834686, "learning_rate": 4.74158686037337e-06, "loss": 0.9818, "step": 35700 }, { "epoch": 0.2584927649532744, "grad_norm": 0.16768378019332886, "learning_rate": 4.741514473712785e-06, "loss": 0.9833, "step": 35710 }, { "epoch": 0.2585651516138606, "grad_norm": 0.16951607167720795, "learning_rate": 4.741442087052198e-06, "loss": 0.9796, "step": 35720 }, { "epoch": 0.2586375382744468, "grad_norm": 0.18605226278305054, "learning_rate": 4.741369700391612e-06, "loss": 0.9878, "step": 35730 }, { "epoch": 0.258709924935033, "grad_norm": 0.15698987245559692, "learning_rate": 4.7412973137310256e-06, "loss": 0.9664, "step": 35740 }, { "epoch": 0.25878231159561915, "grad_norm": 0.17848019301891327, "learning_rate": 4.74122492707044e-06, "loss": 0.968, "step": 35750 }, { "epoch": 0.25885469825620533, "grad_norm": 0.18096424639225006, "learning_rate": 4.741152540409854e-06, "loss": 0.9781, "step": 35760 }, { "epoch": 0.25892708491679156, "grad_norm": 0.16998852789402008, "learning_rate": 4.741080153749267e-06, "loss": 0.9852, "step": 35770 }, { "epoch": 0.25899947157737774, "grad_norm": 0.16071717441082, "learning_rate": 4.741007767088681e-06, "loss": 0.9852, "step": 35780 }, { "epoch": 0.2590718582379639, "grad_norm": 0.16710597276687622, "learning_rate": 4.740935380428095e-06, "loss": 0.9728, "step": 35790 }, { "epoch": 0.2591442448985501, "grad_norm": 0.15692850947380066, "learning_rate": 4.740862993767509e-06, "loss": 0.983, "step": 35800 }, { "epoch": 0.25921663155913627, "grad_norm": 0.16748061776161194, "learning_rate": 4.7407906071069226e-06, "loss": 0.984, "step": 35810 }, { "epoch": 0.25928901821972244, "grad_norm": 0.16068707406520844, "learning_rate": 4.740718220446336e-06, "loss": 0.9745, "step": 35820 }, { "epoch": 0.2593614048803087, "grad_norm": 0.17079469561576843, "learning_rate": 4.740645833785751e-06, "loss": 0.9656, "step": 35830 }, { "epoch": 0.25943379154089485, "grad_norm": 0.18786334991455078, "learning_rate": 4.740573447125164e-06, "loss": 0.9619, "step": 35840 }, { "epoch": 0.25950617820148103, "grad_norm": 0.1623920202255249, "learning_rate": 4.740501060464578e-06, "loss": 0.9768, "step": 35850 }, { "epoch": 0.2595785648620672, "grad_norm": 0.16342291235923767, "learning_rate": 4.7404286738039915e-06, "loss": 0.9867, "step": 35860 }, { "epoch": 0.2596509515226534, "grad_norm": 0.16781893372535706, "learning_rate": 4.740356287143406e-06, "loss": 0.9758, "step": 35870 }, { "epoch": 0.2597233381832396, "grad_norm": 0.1712564378976822, "learning_rate": 4.74028390048282e-06, "loss": 0.9558, "step": 35880 }, { "epoch": 0.2597957248438258, "grad_norm": 0.16046634316444397, "learning_rate": 4.740211513822233e-06, "loss": 0.9723, "step": 35890 }, { "epoch": 0.25986811150441197, "grad_norm": 0.1630384773015976, "learning_rate": 4.740139127161647e-06, "loss": 0.9766, "step": 35900 }, { "epoch": 0.25994049816499815, "grad_norm": 0.16825400292873383, "learning_rate": 4.740066740501061e-06, "loss": 0.9824, "step": 35910 }, { "epoch": 0.2600128848255843, "grad_norm": 0.17138569056987762, "learning_rate": 4.739994353840475e-06, "loss": 0.9749, "step": 35920 }, { "epoch": 0.26008527148617056, "grad_norm": 0.1562861055135727, "learning_rate": 4.7399219671798885e-06, "loss": 0.9786, "step": 35930 }, { "epoch": 0.26015765814675673, "grad_norm": 0.1865861713886261, "learning_rate": 4.739849580519302e-06, "loss": 0.9761, "step": 35940 }, { "epoch": 0.2602300448073429, "grad_norm": 0.168324813246727, "learning_rate": 4.739777193858716e-06, "loss": 0.9813, "step": 35950 }, { "epoch": 0.2603024314679291, "grad_norm": 0.1690807342529297, "learning_rate": 4.73970480719813e-06, "loss": 0.9779, "step": 35960 }, { "epoch": 0.26037481812851526, "grad_norm": 0.17235450446605682, "learning_rate": 4.739632420537544e-06, "loss": 0.9858, "step": 35970 }, { "epoch": 0.26044720478910144, "grad_norm": 0.188417449593544, "learning_rate": 4.7395600338769574e-06, "loss": 0.9765, "step": 35980 }, { "epoch": 0.26051959144968767, "grad_norm": 0.17113593220710754, "learning_rate": 4.739487647216371e-06, "loss": 0.9687, "step": 35990 }, { "epoch": 0.26059197811027385, "grad_norm": 0.1908288598060608, "learning_rate": 4.7394152605557855e-06, "loss": 0.9831, "step": 36000 }, { "epoch": 0.26066436477086, "grad_norm": 0.17590002715587616, "learning_rate": 4.739342873895199e-06, "loss": 0.9856, "step": 36010 }, { "epoch": 0.2607367514314462, "grad_norm": 0.16687145829200745, "learning_rate": 4.739270487234613e-06, "loss": 0.9793, "step": 36020 }, { "epoch": 0.2608091380920324, "grad_norm": 0.1600182056427002, "learning_rate": 4.739198100574026e-06, "loss": 0.9738, "step": 36030 }, { "epoch": 0.2608815247526186, "grad_norm": 0.16335642337799072, "learning_rate": 4.739125713913441e-06, "loss": 0.9686, "step": 36040 }, { "epoch": 0.2609539114132048, "grad_norm": 0.16597791016101837, "learning_rate": 4.7390533272528544e-06, "loss": 0.9796, "step": 36050 }, { "epoch": 0.26102629807379096, "grad_norm": 0.1797637939453125, "learning_rate": 4.738980940592268e-06, "loss": 0.9771, "step": 36060 }, { "epoch": 0.26109868473437714, "grad_norm": 0.17320837080478668, "learning_rate": 4.738908553931682e-06, "loss": 0.9889, "step": 36070 }, { "epoch": 0.2611710713949633, "grad_norm": 0.16971297562122345, "learning_rate": 4.738836167271096e-06, "loss": 0.9947, "step": 36080 }, { "epoch": 0.26124345805554955, "grad_norm": 0.17000246047973633, "learning_rate": 4.73876378061051e-06, "loss": 0.9631, "step": 36090 }, { "epoch": 0.2613158447161357, "grad_norm": 0.16442660987377167, "learning_rate": 4.738691393949923e-06, "loss": 0.9566, "step": 36100 }, { "epoch": 0.2613882313767219, "grad_norm": 0.15749797224998474, "learning_rate": 4.738619007289337e-06, "loss": 0.9857, "step": 36110 }, { "epoch": 0.2614606180373081, "grad_norm": 0.1590905487537384, "learning_rate": 4.7385466206287514e-06, "loss": 0.9741, "step": 36120 }, { "epoch": 0.26153300469789426, "grad_norm": 0.20736895501613617, "learning_rate": 4.738474233968165e-06, "loss": 0.9741, "step": 36130 }, { "epoch": 0.26160539135848043, "grad_norm": 0.1713234931230545, "learning_rate": 4.738401847307579e-06, "loss": 0.9652, "step": 36140 }, { "epoch": 0.26167777801906666, "grad_norm": 0.17479397356510162, "learning_rate": 4.738329460646992e-06, "loss": 0.962, "step": 36150 }, { "epoch": 0.26175016467965284, "grad_norm": 0.15319569408893585, "learning_rate": 4.738257073986407e-06, "loss": 0.9914, "step": 36160 }, { "epoch": 0.261822551340239, "grad_norm": 0.17199110984802246, "learning_rate": 4.7381846873258195e-06, "loss": 0.9701, "step": 36170 }, { "epoch": 0.2618949380008252, "grad_norm": 0.17923112213611603, "learning_rate": 4.738112300665233e-06, "loss": 0.9866, "step": 36180 }, { "epoch": 0.26196732466141137, "grad_norm": 0.17701056599617004, "learning_rate": 4.738039914004648e-06, "loss": 0.9905, "step": 36190 }, { "epoch": 0.2620397113219976, "grad_norm": 0.18318752944469452, "learning_rate": 4.737967527344061e-06, "loss": 0.9769, "step": 36200 }, { "epoch": 0.2621120979825838, "grad_norm": 0.16948123276233673, "learning_rate": 4.737895140683475e-06, "loss": 0.9701, "step": 36210 }, { "epoch": 0.26218448464316996, "grad_norm": 0.16804496943950653, "learning_rate": 4.7378227540228885e-06, "loss": 0.9758, "step": 36220 }, { "epoch": 0.26225687130375613, "grad_norm": 0.16858185827732086, "learning_rate": 4.737750367362303e-06, "loss": 0.9778, "step": 36230 }, { "epoch": 0.2623292579643423, "grad_norm": 0.16800491511821747, "learning_rate": 4.7376779807017165e-06, "loss": 0.96, "step": 36240 }, { "epoch": 0.26240164462492854, "grad_norm": 0.16772626340389252, "learning_rate": 4.73760559404113e-06, "loss": 0.958, "step": 36250 }, { "epoch": 0.2624740312855147, "grad_norm": 0.16602137684822083, "learning_rate": 4.737533207380544e-06, "loss": 0.9761, "step": 36260 }, { "epoch": 0.2625464179461009, "grad_norm": 0.17461955547332764, "learning_rate": 4.737460820719958e-06, "loss": 0.9546, "step": 36270 }, { "epoch": 0.2626188046066871, "grad_norm": 0.1623477339744568, "learning_rate": 4.737388434059372e-06, "loss": 0.981, "step": 36280 }, { "epoch": 0.26269119126727325, "grad_norm": 0.17813843488693237, "learning_rate": 4.7373160473987855e-06, "loss": 0.9779, "step": 36290 }, { "epoch": 0.2627635779278594, "grad_norm": 0.18442265689373016, "learning_rate": 4.737243660738199e-06, "loss": 0.9541, "step": 36300 }, { "epoch": 0.26283596458844566, "grad_norm": 0.16500839591026306, "learning_rate": 4.7371712740776135e-06, "loss": 0.9601, "step": 36310 }, { "epoch": 0.26290835124903184, "grad_norm": 0.17317786812782288, "learning_rate": 4.737098887417027e-06, "loss": 0.9704, "step": 36320 }, { "epoch": 0.262980737909618, "grad_norm": 0.1605556458234787, "learning_rate": 4.737026500756441e-06, "loss": 0.9793, "step": 36330 }, { "epoch": 0.2630531245702042, "grad_norm": 0.16761939227581024, "learning_rate": 4.736954114095854e-06, "loss": 0.9669, "step": 36340 }, { "epoch": 0.26312551123079037, "grad_norm": 0.19816842675209045, "learning_rate": 4.736881727435269e-06, "loss": 0.9749, "step": 36350 }, { "epoch": 0.2631978978913766, "grad_norm": 0.15934444963932037, "learning_rate": 4.7368093407746825e-06, "loss": 0.9762, "step": 36360 }, { "epoch": 0.2632702845519628, "grad_norm": 0.16128577291965485, "learning_rate": 4.736736954114096e-06, "loss": 0.9648, "step": 36370 }, { "epoch": 0.26334267121254895, "grad_norm": 0.1702321320772171, "learning_rate": 4.73666456745351e-06, "loss": 0.9762, "step": 36380 }, { "epoch": 0.26341505787313513, "grad_norm": 0.16586679220199585, "learning_rate": 4.736592180792924e-06, "loss": 0.9676, "step": 36390 }, { "epoch": 0.2634874445337213, "grad_norm": 0.18088088929653168, "learning_rate": 4.736519794132338e-06, "loss": 0.974, "step": 36400 }, { "epoch": 0.26355983119430754, "grad_norm": 0.2222166806459427, "learning_rate": 4.736447407471751e-06, "loss": 0.9776, "step": 36410 }, { "epoch": 0.2636322178548937, "grad_norm": 0.15918581187725067, "learning_rate": 4.736375020811165e-06, "loss": 0.9787, "step": 36420 }, { "epoch": 0.2637046045154799, "grad_norm": 0.1603013426065445, "learning_rate": 4.7363026341505795e-06, "loss": 0.9617, "step": 36430 }, { "epoch": 0.26377699117606607, "grad_norm": 0.1752447783946991, "learning_rate": 4.736230247489993e-06, "loss": 0.9724, "step": 36440 }, { "epoch": 0.26384937783665224, "grad_norm": 0.27862676978111267, "learning_rate": 4.736157860829407e-06, "loss": 0.9675, "step": 36450 }, { "epoch": 0.2639217644972385, "grad_norm": 0.1704714149236679, "learning_rate": 4.73608547416882e-06, "loss": 0.962, "step": 36460 }, { "epoch": 0.26399415115782465, "grad_norm": 0.2055920511484146, "learning_rate": 4.736013087508235e-06, "loss": 0.9712, "step": 36470 }, { "epoch": 0.26406653781841083, "grad_norm": 0.168257936835289, "learning_rate": 4.735940700847648e-06, "loss": 0.9722, "step": 36480 }, { "epoch": 0.264138924478997, "grad_norm": 0.1615269035100937, "learning_rate": 4.735868314187062e-06, "loss": 0.9708, "step": 36490 }, { "epoch": 0.2642113111395832, "grad_norm": 0.16784435510635376, "learning_rate": 4.735795927526476e-06, "loss": 0.9741, "step": 36500 }, { "epoch": 0.26428369780016936, "grad_norm": 0.16948504745960236, "learning_rate": 4.73572354086589e-06, "loss": 0.9692, "step": 36510 }, { "epoch": 0.2643560844607556, "grad_norm": 0.16476517915725708, "learning_rate": 4.735651154205304e-06, "loss": 0.9673, "step": 36520 }, { "epoch": 0.26442847112134177, "grad_norm": 0.18049409985542297, "learning_rate": 4.735578767544717e-06, "loss": 0.9676, "step": 36530 }, { "epoch": 0.26450085778192794, "grad_norm": 0.1603485643863678, "learning_rate": 4.735506380884131e-06, "loss": 0.9789, "step": 36540 }, { "epoch": 0.2645732444425141, "grad_norm": 0.16971318423748016, "learning_rate": 4.735433994223545e-06, "loss": 0.9847, "step": 36550 }, { "epoch": 0.2646456311031003, "grad_norm": 0.19369632005691528, "learning_rate": 4.735361607562959e-06, "loss": 0.9731, "step": 36560 }, { "epoch": 0.26471801776368653, "grad_norm": 0.1709279865026474, "learning_rate": 4.735289220902373e-06, "loss": 0.9596, "step": 36570 }, { "epoch": 0.2647904044242727, "grad_norm": 0.1615041047334671, "learning_rate": 4.735216834241786e-06, "loss": 0.9585, "step": 36580 }, { "epoch": 0.2648627910848589, "grad_norm": 0.17136956751346588, "learning_rate": 4.7351444475812e-06, "loss": 0.9857, "step": 36590 }, { "epoch": 0.26493517774544506, "grad_norm": 0.1596122533082962, "learning_rate": 4.735072060920614e-06, "loss": 0.9646, "step": 36600 }, { "epoch": 0.26500756440603124, "grad_norm": 0.16344700753688812, "learning_rate": 4.734999674260028e-06, "loss": 0.9773, "step": 36610 }, { "epoch": 0.26507995106661747, "grad_norm": 0.17619621753692627, "learning_rate": 4.734927287599442e-06, "loss": 0.97, "step": 36620 }, { "epoch": 0.26515233772720365, "grad_norm": 0.16314248740673065, "learning_rate": 4.734854900938855e-06, "loss": 0.9712, "step": 36630 }, { "epoch": 0.2652247243877898, "grad_norm": 0.1628977507352829, "learning_rate": 4.73478251427827e-06, "loss": 0.9813, "step": 36640 }, { "epoch": 0.265297111048376, "grad_norm": 0.18795722723007202, "learning_rate": 4.734710127617683e-06, "loss": 0.9799, "step": 36650 }, { "epoch": 0.2653694977089622, "grad_norm": 0.16708257794380188, "learning_rate": 4.734637740957097e-06, "loss": 0.964, "step": 36660 }, { "epoch": 0.26544188436954835, "grad_norm": 0.15541145205497742, "learning_rate": 4.7345653542965105e-06, "loss": 0.9709, "step": 36670 }, { "epoch": 0.2655142710301346, "grad_norm": 0.6976906657218933, "learning_rate": 4.734492967635925e-06, "loss": 0.9633, "step": 36680 }, { "epoch": 0.26558665769072076, "grad_norm": 0.18083471059799194, "learning_rate": 4.734420580975339e-06, "loss": 0.9868, "step": 36690 }, { "epoch": 0.26565904435130694, "grad_norm": 0.16377471387386322, "learning_rate": 4.734348194314752e-06, "loss": 0.9749, "step": 36700 }, { "epoch": 0.2657314310118931, "grad_norm": 0.17875248193740845, "learning_rate": 4.734275807654166e-06, "loss": 0.9759, "step": 36710 }, { "epoch": 0.2658038176724793, "grad_norm": 0.1629532426595688, "learning_rate": 4.7342034209935794e-06, "loss": 0.9764, "step": 36720 }, { "epoch": 0.2658762043330655, "grad_norm": 0.18493175506591797, "learning_rate": 4.734131034332993e-06, "loss": 0.9693, "step": 36730 }, { "epoch": 0.2659485909936517, "grad_norm": 0.1734851896762848, "learning_rate": 4.734058647672407e-06, "loss": 0.9604, "step": 36740 }, { "epoch": 0.2660209776542379, "grad_norm": 0.15318694710731506, "learning_rate": 4.733986261011821e-06, "loss": 0.9816, "step": 36750 }, { "epoch": 0.26609336431482405, "grad_norm": 0.1794171929359436, "learning_rate": 4.733913874351235e-06, "loss": 0.977, "step": 36760 }, { "epoch": 0.26616575097541023, "grad_norm": 0.18346603214740753, "learning_rate": 4.733841487690648e-06, "loss": 0.9839, "step": 36770 }, { "epoch": 0.26623813763599646, "grad_norm": 0.16709034144878387, "learning_rate": 4.733769101030062e-06, "loss": 0.9943, "step": 36780 }, { "epoch": 0.26631052429658264, "grad_norm": 0.16048607230186462, "learning_rate": 4.7336967143694764e-06, "loss": 0.963, "step": 36790 }, { "epoch": 0.2663829109571688, "grad_norm": 0.1640567034482956, "learning_rate": 4.73362432770889e-06, "loss": 0.9744, "step": 36800 }, { "epoch": 0.266455297617755, "grad_norm": 0.18880528211593628, "learning_rate": 4.733551941048304e-06, "loss": 0.9689, "step": 36810 }, { "epoch": 0.26652768427834117, "grad_norm": 0.17127233743667603, "learning_rate": 4.733479554387717e-06, "loss": 0.9747, "step": 36820 }, { "epoch": 0.26660007093892735, "grad_norm": 0.17141376435756683, "learning_rate": 4.733407167727132e-06, "loss": 0.9732, "step": 36830 }, { "epoch": 0.2666724575995136, "grad_norm": 0.1688418984413147, "learning_rate": 4.733334781066545e-06, "loss": 0.9744, "step": 36840 }, { "epoch": 0.26674484426009976, "grad_norm": 0.18066221475601196, "learning_rate": 4.733262394405959e-06, "loss": 0.9735, "step": 36850 }, { "epoch": 0.26681723092068593, "grad_norm": 0.15719619393348694, "learning_rate": 4.733190007745373e-06, "loss": 0.9797, "step": 36860 }, { "epoch": 0.2668896175812721, "grad_norm": 0.1722886562347412, "learning_rate": 4.733117621084787e-06, "loss": 0.9744, "step": 36870 }, { "epoch": 0.2669620042418583, "grad_norm": 0.19963102042675018, "learning_rate": 4.733045234424201e-06, "loss": 0.9828, "step": 36880 }, { "epoch": 0.2670343909024445, "grad_norm": 0.16132299602031708, "learning_rate": 4.732972847763614e-06, "loss": 0.9712, "step": 36890 }, { "epoch": 0.2671067775630307, "grad_norm": 0.16427114605903625, "learning_rate": 4.732900461103028e-06, "loss": 0.9779, "step": 36900 }, { "epoch": 0.26717916422361687, "grad_norm": 0.18113258481025696, "learning_rate": 4.732828074442442e-06, "loss": 0.9688, "step": 36910 }, { "epoch": 0.26725155088420305, "grad_norm": 0.1653916984796524, "learning_rate": 4.732755687781856e-06, "loss": 0.9717, "step": 36920 }, { "epoch": 0.2673239375447892, "grad_norm": 0.179592102766037, "learning_rate": 4.73268330112127e-06, "loss": 0.9656, "step": 36930 }, { "epoch": 0.26739632420537546, "grad_norm": 0.19769518077373505, "learning_rate": 4.732610914460683e-06, "loss": 0.9637, "step": 36940 }, { "epoch": 0.26746871086596163, "grad_norm": 0.17146340012550354, "learning_rate": 4.732538527800098e-06, "loss": 0.9619, "step": 36950 }, { "epoch": 0.2675410975265478, "grad_norm": 0.1564941555261612, "learning_rate": 4.732466141139511e-06, "loss": 0.9688, "step": 36960 }, { "epoch": 0.267613484187134, "grad_norm": 0.1622597575187683, "learning_rate": 4.732393754478925e-06, "loss": 0.9848, "step": 36970 }, { "epoch": 0.26768587084772016, "grad_norm": 0.17522403597831726, "learning_rate": 4.7323213678183385e-06, "loss": 0.9857, "step": 36980 }, { "epoch": 0.2677582575083064, "grad_norm": 0.18009519577026367, "learning_rate": 4.732248981157753e-06, "loss": 0.9746, "step": 36990 }, { "epoch": 0.2678306441688926, "grad_norm": 0.16934752464294434, "learning_rate": 4.732176594497167e-06, "loss": 0.9715, "step": 37000 }, { "epoch": 0.26790303082947875, "grad_norm": 0.17400366067886353, "learning_rate": 4.73210420783658e-06, "loss": 0.985, "step": 37010 }, { "epoch": 0.2679754174900649, "grad_norm": 0.15376758575439453, "learning_rate": 4.732031821175994e-06, "loss": 0.976, "step": 37020 }, { "epoch": 0.2680478041506511, "grad_norm": 0.17052818834781647, "learning_rate": 4.731959434515408e-06, "loss": 0.9854, "step": 37030 }, { "epoch": 0.2681201908112373, "grad_norm": 0.16806358098983765, "learning_rate": 4.731887047854822e-06, "loss": 0.9739, "step": 37040 }, { "epoch": 0.2681925774718235, "grad_norm": 0.16140809655189514, "learning_rate": 4.7318146611942355e-06, "loss": 0.9864, "step": 37050 }, { "epoch": 0.2682649641324097, "grad_norm": 0.16904619336128235, "learning_rate": 4.731742274533649e-06, "loss": 0.9594, "step": 37060 }, { "epoch": 0.26833735079299587, "grad_norm": 0.1671932488679886, "learning_rate": 4.731669887873064e-06, "loss": 0.9694, "step": 37070 }, { "epoch": 0.26840973745358204, "grad_norm": 0.17153862118721008, "learning_rate": 4.731597501212477e-06, "loss": 0.9811, "step": 37080 }, { "epoch": 0.2684821241141682, "grad_norm": 0.16389037668704987, "learning_rate": 4.731525114551891e-06, "loss": 0.9758, "step": 37090 }, { "epoch": 0.26855451077475445, "grad_norm": 0.16687390208244324, "learning_rate": 4.7314527278913045e-06, "loss": 0.9687, "step": 37100 }, { "epoch": 0.26862689743534063, "grad_norm": 0.2017953097820282, "learning_rate": 4.731380341230719e-06, "loss": 0.9653, "step": 37110 }, { "epoch": 0.2686992840959268, "grad_norm": 0.15654923021793365, "learning_rate": 4.7313079545701326e-06, "loss": 0.98, "step": 37120 }, { "epoch": 0.268771670756513, "grad_norm": 0.18685537576675415, "learning_rate": 4.731235567909546e-06, "loss": 0.9759, "step": 37130 }, { "epoch": 0.26884405741709916, "grad_norm": 0.1896306425333023, "learning_rate": 4.73116318124896e-06, "loss": 0.975, "step": 37140 }, { "epoch": 0.2689164440776854, "grad_norm": 0.16155771911144257, "learning_rate": 4.731090794588374e-06, "loss": 0.9734, "step": 37150 }, { "epoch": 0.26898883073827157, "grad_norm": 0.18064841628074646, "learning_rate": 4.731018407927788e-06, "loss": 0.9724, "step": 37160 }, { "epoch": 0.26906121739885774, "grad_norm": 0.1620803028345108, "learning_rate": 4.7309460212672015e-06, "loss": 0.9608, "step": 37170 }, { "epoch": 0.2691336040594439, "grad_norm": 0.15905041992664337, "learning_rate": 4.730873634606615e-06, "loss": 0.974, "step": 37180 }, { "epoch": 0.2692059907200301, "grad_norm": 0.16241392493247986, "learning_rate": 4.730801247946029e-06, "loss": 0.971, "step": 37190 }, { "epoch": 0.2692783773806163, "grad_norm": 0.1652408093214035, "learning_rate": 4.730728861285443e-06, "loss": 0.9561, "step": 37200 }, { "epoch": 0.2693507640412025, "grad_norm": 0.1611439734697342, "learning_rate": 4.730656474624857e-06, "loss": 0.9889, "step": 37210 }, { "epoch": 0.2694231507017887, "grad_norm": 0.16467610001564026, "learning_rate": 4.73058408796427e-06, "loss": 0.9639, "step": 37220 }, { "epoch": 0.26949553736237486, "grad_norm": 0.17998045682907104, "learning_rate": 4.730511701303684e-06, "loss": 0.9692, "step": 37230 }, { "epoch": 0.26956792402296104, "grad_norm": 0.16246424615383148, "learning_rate": 4.730439314643098e-06, "loss": 0.981, "step": 37240 }, { "epoch": 0.2696403106835472, "grad_norm": 0.15857288241386414, "learning_rate": 4.730366927982511e-06, "loss": 0.9596, "step": 37250 }, { "epoch": 0.26971269734413345, "grad_norm": 0.17110489308834076, "learning_rate": 4.730294541321926e-06, "loss": 0.9907, "step": 37260 }, { "epoch": 0.2697850840047196, "grad_norm": 0.16102251410484314, "learning_rate": 4.730222154661339e-06, "loss": 0.9728, "step": 37270 }, { "epoch": 0.2698574706653058, "grad_norm": 0.15859219431877136, "learning_rate": 4.730149768000753e-06, "loss": 0.9693, "step": 37280 }, { "epoch": 0.269929857325892, "grad_norm": 0.16720248758792877, "learning_rate": 4.7300773813401666e-06, "loss": 0.975, "step": 37290 }, { "epoch": 0.27000224398647815, "grad_norm": 0.1726258546113968, "learning_rate": 4.730004994679581e-06, "loss": 0.9823, "step": 37300 }, { "epoch": 0.2700746306470644, "grad_norm": 0.17747798562049866, "learning_rate": 4.729932608018995e-06, "loss": 0.9637, "step": 37310 }, { "epoch": 0.27014701730765056, "grad_norm": 0.17301778495311737, "learning_rate": 4.729860221358408e-06, "loss": 0.9755, "step": 37320 }, { "epoch": 0.27021940396823674, "grad_norm": 0.1715429127216339, "learning_rate": 4.729787834697822e-06, "loss": 0.9838, "step": 37330 }, { "epoch": 0.2702917906288229, "grad_norm": 0.17954185605049133, "learning_rate": 4.729715448037236e-06, "loss": 0.981, "step": 37340 }, { "epoch": 0.2703641772894091, "grad_norm": 0.1605345904827118, "learning_rate": 4.72964306137665e-06, "loss": 0.9726, "step": 37350 }, { "epoch": 0.27043656394999527, "grad_norm": 0.1734982430934906, "learning_rate": 4.7295706747160636e-06, "loss": 0.9621, "step": 37360 }, { "epoch": 0.2705089506105815, "grad_norm": 0.16955071687698364, "learning_rate": 4.729498288055477e-06, "loss": 0.9831, "step": 37370 }, { "epoch": 0.2705813372711677, "grad_norm": 0.18525801599025726, "learning_rate": 4.729425901394891e-06, "loss": 0.9803, "step": 37380 }, { "epoch": 0.27065372393175385, "grad_norm": 0.1983799785375595, "learning_rate": 4.729353514734305e-06, "loss": 0.9768, "step": 37390 }, { "epoch": 0.27072611059234003, "grad_norm": 0.169651597738266, "learning_rate": 4.729281128073719e-06, "loss": 0.9703, "step": 37400 }, { "epoch": 0.2707984972529262, "grad_norm": 0.20270761847496033, "learning_rate": 4.7292087414131325e-06, "loss": 0.9909, "step": 37410 }, { "epoch": 0.27087088391351244, "grad_norm": 0.19444452226161957, "learning_rate": 4.729136354752546e-06, "loss": 0.9811, "step": 37420 }, { "epoch": 0.2709432705740986, "grad_norm": 0.17284293472766876, "learning_rate": 4.729063968091961e-06, "loss": 0.9821, "step": 37430 }, { "epoch": 0.2710156572346848, "grad_norm": 0.1620352417230606, "learning_rate": 4.728991581431374e-06, "loss": 0.9725, "step": 37440 }, { "epoch": 0.27108804389527097, "grad_norm": 0.17737331986427307, "learning_rate": 4.728919194770788e-06, "loss": 0.9739, "step": 37450 }, { "epoch": 0.27116043055585715, "grad_norm": 0.15988625586032867, "learning_rate": 4.7288468081102014e-06, "loss": 0.9737, "step": 37460 }, { "epoch": 0.2712328172164434, "grad_norm": 0.1713685542345047, "learning_rate": 4.728774421449616e-06, "loss": 0.98, "step": 37470 }, { "epoch": 0.27130520387702955, "grad_norm": 0.17601056396961212, "learning_rate": 4.7287020347890295e-06, "loss": 0.9833, "step": 37480 }, { "epoch": 0.27137759053761573, "grad_norm": 0.16959619522094727, "learning_rate": 4.728629648128443e-06, "loss": 0.9658, "step": 37490 }, { "epoch": 0.2714499771982019, "grad_norm": 0.174661785364151, "learning_rate": 4.728557261467857e-06, "loss": 0.9721, "step": 37500 }, { "epoch": 0.2715223638587881, "grad_norm": 0.16935496032238007, "learning_rate": 4.728484874807271e-06, "loss": 0.963, "step": 37510 }, { "epoch": 0.2715947505193743, "grad_norm": 0.1676948070526123, "learning_rate": 4.728412488146685e-06, "loss": 0.969, "step": 37520 }, { "epoch": 0.2716671371799605, "grad_norm": 0.17413829267024994, "learning_rate": 4.7283401014860984e-06, "loss": 0.9767, "step": 37530 }, { "epoch": 0.27173952384054667, "grad_norm": 0.1575450748205185, "learning_rate": 4.728267714825512e-06, "loss": 0.9855, "step": 37540 }, { "epoch": 0.27181191050113285, "grad_norm": 0.15776337683200836, "learning_rate": 4.7281953281649265e-06, "loss": 0.9669, "step": 37550 }, { "epoch": 0.271884297161719, "grad_norm": 0.16901899874210358, "learning_rate": 4.72812294150434e-06, "loss": 0.9696, "step": 37560 }, { "epoch": 0.2719566838223052, "grad_norm": 0.18395544588565826, "learning_rate": 4.728050554843754e-06, "loss": 0.9743, "step": 37570 }, { "epoch": 0.27202907048289143, "grad_norm": 0.15982641279697418, "learning_rate": 4.727978168183167e-06, "loss": 0.9691, "step": 37580 }, { "epoch": 0.2721014571434776, "grad_norm": 0.16578301787376404, "learning_rate": 4.727905781522582e-06, "loss": 0.9807, "step": 37590 }, { "epoch": 0.2721738438040638, "grad_norm": 0.1732582002878189, "learning_rate": 4.7278333948619954e-06, "loss": 0.9776, "step": 37600 }, { "epoch": 0.27224623046464996, "grad_norm": 0.1836758553981781, "learning_rate": 4.727761008201409e-06, "loss": 0.9741, "step": 37610 }, { "epoch": 0.27231861712523614, "grad_norm": 0.1627260446548462, "learning_rate": 4.727688621540823e-06, "loss": 0.9747, "step": 37620 }, { "epoch": 0.27239100378582237, "grad_norm": 0.16746656596660614, "learning_rate": 4.727616234880237e-06, "loss": 0.9808, "step": 37630 }, { "epoch": 0.27246339044640855, "grad_norm": 0.18597693741321564, "learning_rate": 4.727543848219651e-06, "loss": 0.9778, "step": 37640 }, { "epoch": 0.2725357771069947, "grad_norm": 0.1623985916376114, "learning_rate": 4.727471461559064e-06, "loss": 0.9741, "step": 37650 }, { "epoch": 0.2726081637675809, "grad_norm": 0.1913863867521286, "learning_rate": 4.727399074898478e-06, "loss": 0.9838, "step": 37660 }, { "epoch": 0.2726805504281671, "grad_norm": 0.1891731321811676, "learning_rate": 4.7273266882378925e-06, "loss": 0.9743, "step": 37670 }, { "epoch": 0.2727529370887533, "grad_norm": 0.1674305498600006, "learning_rate": 4.727254301577306e-06, "loss": 0.9798, "step": 37680 }, { "epoch": 0.2728253237493395, "grad_norm": 0.1666889786720276, "learning_rate": 4.72718191491672e-06, "loss": 0.9566, "step": 37690 }, { "epoch": 0.27289771040992566, "grad_norm": 0.17124493420124054, "learning_rate": 4.727109528256133e-06, "loss": 0.9701, "step": 37700 }, { "epoch": 0.27297009707051184, "grad_norm": 0.17866984009742737, "learning_rate": 4.727037141595548e-06, "loss": 0.9893, "step": 37710 }, { "epoch": 0.273042483731098, "grad_norm": 0.16738882660865784, "learning_rate": 4.726964754934961e-06, "loss": 0.9757, "step": 37720 }, { "epoch": 0.2731148703916842, "grad_norm": 0.1580776870250702, "learning_rate": 4.726892368274375e-06, "loss": 0.9758, "step": 37730 }, { "epoch": 0.2731872570522704, "grad_norm": 0.1834014505147934, "learning_rate": 4.726819981613789e-06, "loss": 0.9687, "step": 37740 }, { "epoch": 0.2732596437128566, "grad_norm": 0.15350131690502167, "learning_rate": 4.726747594953203e-06, "loss": 0.9674, "step": 37750 }, { "epoch": 0.2733320303734428, "grad_norm": 0.1681162267923355, "learning_rate": 4.726675208292616e-06, "loss": 0.9744, "step": 37760 }, { "epoch": 0.27340441703402896, "grad_norm": 0.17525209486484528, "learning_rate": 4.7266028216320295e-06, "loss": 0.9663, "step": 37770 }, { "epoch": 0.27347680369461513, "grad_norm": 0.16645973920822144, "learning_rate": 4.726530434971444e-06, "loss": 0.9631, "step": 37780 }, { "epoch": 0.27354919035520137, "grad_norm": 0.15926945209503174, "learning_rate": 4.7264580483108575e-06, "loss": 0.9706, "step": 37790 }, { "epoch": 0.27362157701578754, "grad_norm": 0.18003691732883453, "learning_rate": 4.726385661650271e-06, "loss": 0.961, "step": 37800 }, { "epoch": 0.2736939636763737, "grad_norm": 0.1554681807756424, "learning_rate": 4.726313274989685e-06, "loss": 0.9708, "step": 37810 }, { "epoch": 0.2737663503369599, "grad_norm": 0.165873184800148, "learning_rate": 4.726240888329099e-06, "loss": 0.9873, "step": 37820 }, { "epoch": 0.2738387369975461, "grad_norm": 0.16069376468658447, "learning_rate": 4.726168501668513e-06, "loss": 0.9704, "step": 37830 }, { "epoch": 0.2739111236581323, "grad_norm": 0.16646680235862732, "learning_rate": 4.7260961150079265e-06, "loss": 0.9794, "step": 37840 }, { "epoch": 0.2739835103187185, "grad_norm": 0.1622561812400818, "learning_rate": 4.72602372834734e-06, "loss": 0.9655, "step": 37850 }, { "epoch": 0.27405589697930466, "grad_norm": 0.17769238352775574, "learning_rate": 4.7259513416867546e-06, "loss": 0.9845, "step": 37860 }, { "epoch": 0.27412828363989084, "grad_norm": 0.1598799079656601, "learning_rate": 4.725878955026168e-06, "loss": 0.9684, "step": 37870 }, { "epoch": 0.274200670300477, "grad_norm": 0.15777985751628876, "learning_rate": 4.725806568365582e-06, "loss": 0.9754, "step": 37880 }, { "epoch": 0.2742730569610632, "grad_norm": 0.16836735606193542, "learning_rate": 4.725734181704995e-06, "loss": 0.9621, "step": 37890 }, { "epoch": 0.2743454436216494, "grad_norm": 0.1695423424243927, "learning_rate": 4.72566179504441e-06, "loss": 0.9573, "step": 37900 }, { "epoch": 0.2744178302822356, "grad_norm": 0.17105446755886078, "learning_rate": 4.7255894083838235e-06, "loss": 0.9736, "step": 37910 }, { "epoch": 0.2744902169428218, "grad_norm": 0.17636360228061676, "learning_rate": 4.725517021723237e-06, "loss": 0.9589, "step": 37920 }, { "epoch": 0.27456260360340795, "grad_norm": 0.2081209272146225, "learning_rate": 4.725444635062651e-06, "loss": 0.9717, "step": 37930 }, { "epoch": 0.2746349902639941, "grad_norm": 0.1700514554977417, "learning_rate": 4.725372248402065e-06, "loss": 0.9788, "step": 37940 }, { "epoch": 0.27470737692458036, "grad_norm": 0.18585331737995148, "learning_rate": 4.725299861741479e-06, "loss": 0.9673, "step": 37950 }, { "epoch": 0.27477976358516654, "grad_norm": 0.1638772189617157, "learning_rate": 4.725227475080892e-06, "loss": 0.977, "step": 37960 }, { "epoch": 0.2748521502457527, "grad_norm": 0.16300378739833832, "learning_rate": 4.725155088420306e-06, "loss": 0.9904, "step": 37970 }, { "epoch": 0.2749245369063389, "grad_norm": 0.16224254667758942, "learning_rate": 4.72508270175972e-06, "loss": 0.9626, "step": 37980 }, { "epoch": 0.27499692356692507, "grad_norm": 0.1576361060142517, "learning_rate": 4.725010315099134e-06, "loss": 0.9815, "step": 37990 }, { "epoch": 0.2750693102275113, "grad_norm": 0.265109121799469, "learning_rate": 4.724937928438548e-06, "loss": 0.9741, "step": 38000 }, { "epoch": 0.2751416968880975, "grad_norm": 0.16923700273036957, "learning_rate": 4.724865541777961e-06, "loss": 0.9641, "step": 38010 }, { "epoch": 0.27521408354868365, "grad_norm": 0.15976910293102264, "learning_rate": 4.724793155117375e-06, "loss": 0.9833, "step": 38020 }, { "epoch": 0.27528647020926983, "grad_norm": 0.1535775065422058, "learning_rate": 4.724720768456789e-06, "loss": 0.985, "step": 38030 }, { "epoch": 0.275358856869856, "grad_norm": 0.1646791249513626, "learning_rate": 4.724648381796203e-06, "loss": 0.9652, "step": 38040 }, { "epoch": 0.2754312435304422, "grad_norm": 0.17050626873970032, "learning_rate": 4.724575995135617e-06, "loss": 0.9634, "step": 38050 }, { "epoch": 0.2755036301910284, "grad_norm": 0.16307681798934937, "learning_rate": 4.72450360847503e-06, "loss": 0.9639, "step": 38060 }, { "epoch": 0.2755760168516146, "grad_norm": 0.17553727328777313, "learning_rate": 4.724431221814445e-06, "loss": 0.9771, "step": 38070 }, { "epoch": 0.27564840351220077, "grad_norm": 0.1655888557434082, "learning_rate": 4.724358835153858e-06, "loss": 0.9717, "step": 38080 }, { "epoch": 0.27572079017278694, "grad_norm": 0.1691659390926361, "learning_rate": 4.724286448493272e-06, "loss": 0.9491, "step": 38090 }, { "epoch": 0.2757931768333731, "grad_norm": 0.15853264927864075, "learning_rate": 4.7242140618326856e-06, "loss": 0.9666, "step": 38100 }, { "epoch": 0.27586556349395935, "grad_norm": 0.15642456710338593, "learning_rate": 4.7241416751721e-06, "loss": 0.9665, "step": 38110 }, { "epoch": 0.27593795015454553, "grad_norm": 0.16785617172718048, "learning_rate": 4.724069288511514e-06, "loss": 0.9697, "step": 38120 }, { "epoch": 0.2760103368151317, "grad_norm": 0.2900194823741913, "learning_rate": 4.723996901850927e-06, "loss": 0.9717, "step": 38130 }, { "epoch": 0.2760827234757179, "grad_norm": 0.23790918290615082, "learning_rate": 4.723924515190341e-06, "loss": 0.9698, "step": 38140 }, { "epoch": 0.27615511013630406, "grad_norm": 0.15893487632274628, "learning_rate": 4.723852128529755e-06, "loss": 0.975, "step": 38150 }, { "epoch": 0.2762274967968903, "grad_norm": 0.15974752604961395, "learning_rate": 4.723779741869169e-06, "loss": 0.9819, "step": 38160 }, { "epoch": 0.27629988345747647, "grad_norm": 0.16646121442317963, "learning_rate": 4.723707355208583e-06, "loss": 0.9664, "step": 38170 }, { "epoch": 0.27637227011806265, "grad_norm": 0.15674103796482086, "learning_rate": 4.723634968547996e-06, "loss": 0.9783, "step": 38180 }, { "epoch": 0.2764446567786488, "grad_norm": 0.1640247255563736, "learning_rate": 4.723562581887411e-06, "loss": 0.9746, "step": 38190 }, { "epoch": 0.276517043439235, "grad_norm": 0.18196041882038116, "learning_rate": 4.723490195226824e-06, "loss": 0.9697, "step": 38200 }, { "epoch": 0.27658943009982123, "grad_norm": 0.16162289679050446, "learning_rate": 4.723417808566238e-06, "loss": 0.9823, "step": 38210 }, { "epoch": 0.2766618167604074, "grad_norm": 0.16687557101249695, "learning_rate": 4.7233454219056515e-06, "loss": 0.9688, "step": 38220 }, { "epoch": 0.2767342034209936, "grad_norm": 0.16218598186969757, "learning_rate": 4.723273035245066e-06, "loss": 0.9888, "step": 38230 }, { "epoch": 0.27680659008157976, "grad_norm": 0.15919815003871918, "learning_rate": 4.72320064858448e-06, "loss": 0.9664, "step": 38240 }, { "epoch": 0.27687897674216594, "grad_norm": 0.15851202607154846, "learning_rate": 4.723128261923893e-06, "loss": 0.9657, "step": 38250 }, { "epoch": 0.2769513634027521, "grad_norm": 0.17171426117420197, "learning_rate": 4.723055875263307e-06, "loss": 0.9671, "step": 38260 }, { "epoch": 0.27702375006333835, "grad_norm": 0.1656324565410614, "learning_rate": 4.722983488602721e-06, "loss": 0.9644, "step": 38270 }, { "epoch": 0.2770961367239245, "grad_norm": 0.16705715656280518, "learning_rate": 4.722911101942135e-06, "loss": 0.969, "step": 38280 }, { "epoch": 0.2771685233845107, "grad_norm": 0.1705339401960373, "learning_rate": 4.722838715281548e-06, "loss": 0.9657, "step": 38290 }, { "epoch": 0.2772409100450969, "grad_norm": 0.18638475239276886, "learning_rate": 4.722766328620962e-06, "loss": 0.9715, "step": 38300 }, { "epoch": 0.27731329670568305, "grad_norm": 0.16748540103435516, "learning_rate": 4.722693941960376e-06, "loss": 0.9616, "step": 38310 }, { "epoch": 0.2773856833662693, "grad_norm": 0.15937179327011108, "learning_rate": 4.722621555299789e-06, "loss": 0.9764, "step": 38320 }, { "epoch": 0.27745807002685546, "grad_norm": 0.19340196251869202, "learning_rate": 4.722549168639203e-06, "loss": 0.9746, "step": 38330 }, { "epoch": 0.27753045668744164, "grad_norm": 0.17282438278198242, "learning_rate": 4.7224767819786174e-06, "loss": 0.9691, "step": 38340 }, { "epoch": 0.2776028433480278, "grad_norm": 0.17909948527812958, "learning_rate": 4.722404395318031e-06, "loss": 0.9802, "step": 38350 }, { "epoch": 0.277675230008614, "grad_norm": 0.1845092922449112, "learning_rate": 4.722332008657445e-06, "loss": 0.9723, "step": 38360 }, { "epoch": 0.2777476166692002, "grad_norm": 0.18336127698421478, "learning_rate": 4.722259621996858e-06, "loss": 0.9782, "step": 38370 }, { "epoch": 0.2778200033297864, "grad_norm": 0.1571909338235855, "learning_rate": 4.722187235336273e-06, "loss": 0.9594, "step": 38380 }, { "epoch": 0.2778923899903726, "grad_norm": 0.18232674896717072, "learning_rate": 4.722114848675686e-06, "loss": 0.9559, "step": 38390 }, { "epoch": 0.27796477665095876, "grad_norm": 0.20004482567310333, "learning_rate": 4.7220424620151e-06, "loss": 0.9725, "step": 38400 }, { "epoch": 0.27803716331154493, "grad_norm": 0.18284298479557037, "learning_rate": 4.721970075354514e-06, "loss": 0.9719, "step": 38410 }, { "epoch": 0.2781095499721311, "grad_norm": 0.16327325999736786, "learning_rate": 4.721897688693928e-06, "loss": 0.9698, "step": 38420 }, { "epoch": 0.27818193663271734, "grad_norm": 0.16780653595924377, "learning_rate": 4.721825302033342e-06, "loss": 0.9678, "step": 38430 }, { "epoch": 0.2782543232933035, "grad_norm": 0.16471821069717407, "learning_rate": 4.721752915372755e-06, "loss": 0.9706, "step": 38440 }, { "epoch": 0.2783267099538897, "grad_norm": 0.1728796362876892, "learning_rate": 4.721680528712169e-06, "loss": 0.9657, "step": 38450 }, { "epoch": 0.27839909661447587, "grad_norm": 0.15473975241184235, "learning_rate": 4.721608142051583e-06, "loss": 0.9683, "step": 38460 }, { "epoch": 0.27847148327506205, "grad_norm": 0.15741673111915588, "learning_rate": 4.721535755390997e-06, "loss": 0.9775, "step": 38470 }, { "epoch": 0.2785438699356483, "grad_norm": 0.1729719191789627, "learning_rate": 4.721463368730411e-06, "loss": 0.973, "step": 38480 }, { "epoch": 0.27861625659623446, "grad_norm": 0.16401894390583038, "learning_rate": 4.721390982069824e-06, "loss": 0.9589, "step": 38490 }, { "epoch": 0.27868864325682063, "grad_norm": 0.16134223341941833, "learning_rate": 4.721318595409239e-06, "loss": 0.9797, "step": 38500 }, { "epoch": 0.2787610299174068, "grad_norm": 0.1627780795097351, "learning_rate": 4.721246208748652e-06, "loss": 0.976, "step": 38510 }, { "epoch": 0.278833416577993, "grad_norm": 0.15430185198783875, "learning_rate": 4.721173822088066e-06, "loss": 0.9683, "step": 38520 }, { "epoch": 0.2789058032385792, "grad_norm": 0.18733610212802887, "learning_rate": 4.7211014354274795e-06, "loss": 0.9688, "step": 38530 }, { "epoch": 0.2789781898991654, "grad_norm": 0.16492965817451477, "learning_rate": 4.721029048766894e-06, "loss": 0.9683, "step": 38540 }, { "epoch": 0.2790505765597516, "grad_norm": 0.1765565425157547, "learning_rate": 4.720956662106308e-06, "loss": 0.9645, "step": 38550 }, { "epoch": 0.27912296322033775, "grad_norm": 0.18229708075523376, "learning_rate": 4.720884275445721e-06, "loss": 0.9562, "step": 38560 }, { "epoch": 0.2791953498809239, "grad_norm": 0.175114244222641, "learning_rate": 4.720811888785135e-06, "loss": 0.959, "step": 38570 }, { "epoch": 0.2792677365415101, "grad_norm": 0.18212807178497314, "learning_rate": 4.720739502124549e-06, "loss": 0.9662, "step": 38580 }, { "epoch": 0.27934012320209634, "grad_norm": 0.20221389830112457, "learning_rate": 4.720667115463963e-06, "loss": 0.9644, "step": 38590 }, { "epoch": 0.2794125098626825, "grad_norm": 0.16933338344097137, "learning_rate": 4.7205947288033766e-06, "loss": 0.9783, "step": 38600 }, { "epoch": 0.2794848965232687, "grad_norm": 0.16471229493618011, "learning_rate": 4.72052234214279e-06, "loss": 0.9816, "step": 38610 }, { "epoch": 0.27955728318385487, "grad_norm": 0.16443496942520142, "learning_rate": 4.720449955482204e-06, "loss": 0.9757, "step": 38620 }, { "epoch": 0.27962966984444104, "grad_norm": 0.17601965367794037, "learning_rate": 4.720377568821618e-06, "loss": 0.9697, "step": 38630 }, { "epoch": 0.2797020565050273, "grad_norm": 0.1578618586063385, "learning_rate": 4.720305182161032e-06, "loss": 0.9539, "step": 38640 }, { "epoch": 0.27977444316561345, "grad_norm": 0.1648857295513153, "learning_rate": 4.7202327955004455e-06, "loss": 0.9604, "step": 38650 }, { "epoch": 0.27984682982619963, "grad_norm": 0.1769489347934723, "learning_rate": 4.720160408839859e-06, "loss": 0.9612, "step": 38660 }, { "epoch": 0.2799192164867858, "grad_norm": 0.17651750147342682, "learning_rate": 4.7200880221792736e-06, "loss": 0.976, "step": 38670 }, { "epoch": 0.279991603147372, "grad_norm": 0.1547250896692276, "learning_rate": 4.720015635518687e-06, "loss": 0.9679, "step": 38680 }, { "epoch": 0.2800639898079582, "grad_norm": 0.15348443388938904, "learning_rate": 4.719943248858101e-06, "loss": 0.9766, "step": 38690 }, { "epoch": 0.2801363764685444, "grad_norm": 0.1610197126865387, "learning_rate": 4.719870862197514e-06, "loss": 0.9837, "step": 38700 }, { "epoch": 0.28020876312913057, "grad_norm": 0.21418476104736328, "learning_rate": 4.719798475536929e-06, "loss": 0.9665, "step": 38710 }, { "epoch": 0.28028114978971674, "grad_norm": 0.17171064019203186, "learning_rate": 4.7197260888763425e-06, "loss": 0.9687, "step": 38720 }, { "epoch": 0.2803535364503029, "grad_norm": 0.1554517149925232, "learning_rate": 4.719653702215756e-06, "loss": 0.9636, "step": 38730 }, { "epoch": 0.28042592311088915, "grad_norm": 0.16150425374507904, "learning_rate": 4.71958131555517e-06, "loss": 0.978, "step": 38740 }, { "epoch": 0.28049830977147533, "grad_norm": 0.16200552880764008, "learning_rate": 4.719508928894584e-06, "loss": 0.9618, "step": 38750 }, { "epoch": 0.2805706964320615, "grad_norm": 0.16305282711982727, "learning_rate": 4.719436542233998e-06, "loss": 0.9762, "step": 38760 }, { "epoch": 0.2806430830926477, "grad_norm": 0.16002973914146423, "learning_rate": 4.719364155573411e-06, "loss": 0.9829, "step": 38770 }, { "epoch": 0.28071546975323386, "grad_norm": 0.1760619580745697, "learning_rate": 4.719291768912825e-06, "loss": 0.9737, "step": 38780 }, { "epoch": 0.28078785641382004, "grad_norm": 0.15010814368724823, "learning_rate": 4.7192193822522395e-06, "loss": 0.9658, "step": 38790 }, { "epoch": 0.28086024307440627, "grad_norm": 0.1825767457485199, "learning_rate": 4.719146995591653e-06, "loss": 0.975, "step": 38800 }, { "epoch": 0.28093262973499245, "grad_norm": 0.17386701703071594, "learning_rate": 4.719074608931067e-06, "loss": 0.9684, "step": 38810 }, { "epoch": 0.2810050163955786, "grad_norm": 0.15754207968711853, "learning_rate": 4.71900222227048e-06, "loss": 0.9836, "step": 38820 }, { "epoch": 0.2810774030561648, "grad_norm": 0.1641159951686859, "learning_rate": 4.718929835609894e-06, "loss": 0.9741, "step": 38830 }, { "epoch": 0.281149789716751, "grad_norm": 0.16686658561229706, "learning_rate": 4.7188574489493076e-06, "loss": 0.9835, "step": 38840 }, { "epoch": 0.2812221763773372, "grad_norm": 0.17113393545150757, "learning_rate": 4.718785062288721e-06, "loss": 0.9675, "step": 38850 }, { "epoch": 0.2812945630379234, "grad_norm": 0.19541935622692108, "learning_rate": 4.718712675628136e-06, "loss": 0.971, "step": 38860 }, { "epoch": 0.28136694969850956, "grad_norm": 0.171500563621521, "learning_rate": 4.718640288967549e-06, "loss": 0.9693, "step": 38870 }, { "epoch": 0.28143933635909574, "grad_norm": 0.1516040712594986, "learning_rate": 4.718567902306963e-06, "loss": 0.9637, "step": 38880 }, { "epoch": 0.2815117230196819, "grad_norm": 0.1661519557237625, "learning_rate": 4.7184955156463765e-06, "loss": 0.9658, "step": 38890 }, { "epoch": 0.28158410968026815, "grad_norm": 0.16510553658008575, "learning_rate": 4.718423128985791e-06, "loss": 0.9776, "step": 38900 }, { "epoch": 0.2816564963408543, "grad_norm": 0.386662095785141, "learning_rate": 4.718350742325205e-06, "loss": 0.964, "step": 38910 }, { "epoch": 0.2817288830014405, "grad_norm": 0.1649596393108368, "learning_rate": 4.718278355664618e-06, "loss": 0.9717, "step": 38920 }, { "epoch": 0.2818012696620267, "grad_norm": 0.17319610714912415, "learning_rate": 4.718205969004032e-06, "loss": 0.9752, "step": 38930 }, { "epoch": 0.28187365632261285, "grad_norm": 0.16159221529960632, "learning_rate": 4.718133582343446e-06, "loss": 0.9815, "step": 38940 }, { "epoch": 0.28194604298319903, "grad_norm": 0.17223593592643738, "learning_rate": 4.71806119568286e-06, "loss": 0.9752, "step": 38950 }, { "epoch": 0.28201842964378526, "grad_norm": 0.15941330790519714, "learning_rate": 4.7179888090222735e-06, "loss": 0.97, "step": 38960 }, { "epoch": 0.28209081630437144, "grad_norm": 0.17768092453479767, "learning_rate": 4.717916422361687e-06, "loss": 0.9608, "step": 38970 }, { "epoch": 0.2821632029649576, "grad_norm": 0.18235769867897034, "learning_rate": 4.717844035701102e-06, "loss": 0.9795, "step": 38980 }, { "epoch": 0.2822355896255438, "grad_norm": 0.1654520183801651, "learning_rate": 4.717771649040515e-06, "loss": 0.9743, "step": 38990 }, { "epoch": 0.28230797628612997, "grad_norm": 0.17783403396606445, "learning_rate": 4.717699262379929e-06, "loss": 0.977, "step": 39000 }, { "epoch": 0.2823803629467162, "grad_norm": 0.15796136856079102, "learning_rate": 4.7176268757193424e-06, "loss": 0.9702, "step": 39010 }, { "epoch": 0.2824527496073024, "grad_norm": 0.16155339777469635, "learning_rate": 4.717554489058757e-06, "loss": 0.9734, "step": 39020 }, { "epoch": 0.28252513626788855, "grad_norm": 0.16858787834644318, "learning_rate": 4.7174821023981705e-06, "loss": 0.9644, "step": 39030 }, { "epoch": 0.28259752292847473, "grad_norm": 0.17033177614212036, "learning_rate": 4.717409715737584e-06, "loss": 0.981, "step": 39040 }, { "epoch": 0.2826699095890609, "grad_norm": 0.17069222033023834, "learning_rate": 4.717337329076998e-06, "loss": 0.974, "step": 39050 }, { "epoch": 0.28274229624964714, "grad_norm": 0.17629200220108032, "learning_rate": 4.717264942416412e-06, "loss": 0.9717, "step": 39060 }, { "epoch": 0.2828146829102333, "grad_norm": 0.1703384816646576, "learning_rate": 4.717192555755826e-06, "loss": 0.9781, "step": 39070 }, { "epoch": 0.2828870695708195, "grad_norm": 0.16142185032367706, "learning_rate": 4.7171201690952394e-06, "loss": 0.9576, "step": 39080 }, { "epoch": 0.28295945623140567, "grad_norm": 0.1647561937570572, "learning_rate": 4.717047782434653e-06, "loss": 0.9738, "step": 39090 }, { "epoch": 0.28303184289199185, "grad_norm": 0.16790105402469635, "learning_rate": 4.7169753957740675e-06, "loss": 0.9632, "step": 39100 }, { "epoch": 0.283104229552578, "grad_norm": 0.16049116849899292, "learning_rate": 4.716903009113481e-06, "loss": 0.9809, "step": 39110 }, { "epoch": 0.28317661621316426, "grad_norm": 0.16163350641727448, "learning_rate": 4.716830622452895e-06, "loss": 0.9568, "step": 39120 }, { "epoch": 0.28324900287375043, "grad_norm": 0.16700412333011627, "learning_rate": 4.716758235792308e-06, "loss": 0.9724, "step": 39130 }, { "epoch": 0.2833213895343366, "grad_norm": 0.17913348972797394, "learning_rate": 4.716685849131723e-06, "loss": 0.9634, "step": 39140 }, { "epoch": 0.2833937761949228, "grad_norm": 0.1574385166168213, "learning_rate": 4.7166134624711365e-06, "loss": 0.98, "step": 39150 }, { "epoch": 0.28346616285550896, "grad_norm": 0.15847839415073395, "learning_rate": 4.71654107581055e-06, "loss": 0.9686, "step": 39160 }, { "epoch": 0.2835385495160952, "grad_norm": 0.18322902917861938, "learning_rate": 4.716468689149964e-06, "loss": 0.9731, "step": 39170 }, { "epoch": 0.28361093617668137, "grad_norm": 0.155546173453331, "learning_rate": 4.716396302489378e-06, "loss": 0.9539, "step": 39180 }, { "epoch": 0.28368332283726755, "grad_norm": 0.17779536545276642, "learning_rate": 4.716323915828792e-06, "loss": 0.9655, "step": 39190 }, { "epoch": 0.2837557094978537, "grad_norm": 0.1730605959892273, "learning_rate": 4.716251529168205e-06, "loss": 0.9726, "step": 39200 }, { "epoch": 0.2838280961584399, "grad_norm": 0.19149181246757507, "learning_rate": 4.716179142507619e-06, "loss": 0.9723, "step": 39210 }, { "epoch": 0.28390048281902613, "grad_norm": 0.15683695673942566, "learning_rate": 4.716106755847033e-06, "loss": 0.974, "step": 39220 }, { "epoch": 0.2839728694796123, "grad_norm": 0.1871178299188614, "learning_rate": 4.716034369186447e-06, "loss": 0.9814, "step": 39230 }, { "epoch": 0.2840452561401985, "grad_norm": 0.25293681025505066, "learning_rate": 4.715961982525861e-06, "loss": 0.9697, "step": 39240 }, { "epoch": 0.28411764280078466, "grad_norm": 0.18877732753753662, "learning_rate": 4.715889595865274e-06, "loss": 0.9743, "step": 39250 }, { "epoch": 0.28419002946137084, "grad_norm": 0.17080391943454742, "learning_rate": 4.715817209204688e-06, "loss": 0.9627, "step": 39260 }, { "epoch": 0.2842624161219571, "grad_norm": 0.1600443720817566, "learning_rate": 4.715744822544102e-06, "loss": 0.9624, "step": 39270 }, { "epoch": 0.28433480278254325, "grad_norm": 0.18293151259422302, "learning_rate": 4.715672435883516e-06, "loss": 0.9775, "step": 39280 }, { "epoch": 0.2844071894431294, "grad_norm": 0.17442253232002258, "learning_rate": 4.71560004922293e-06, "loss": 0.9586, "step": 39290 }, { "epoch": 0.2844795761037156, "grad_norm": 0.1556355059146881, "learning_rate": 4.715527662562343e-06, "loss": 0.9798, "step": 39300 }, { "epoch": 0.2845519627643018, "grad_norm": 0.18220852315425873, "learning_rate": 4.715455275901758e-06, "loss": 0.97, "step": 39310 }, { "epoch": 0.28462434942488796, "grad_norm": 0.1559944748878479, "learning_rate": 4.715382889241171e-06, "loss": 0.9807, "step": 39320 }, { "epoch": 0.2846967360854742, "grad_norm": 0.1595684438943863, "learning_rate": 4.715310502580585e-06, "loss": 0.9567, "step": 39330 }, { "epoch": 0.28476912274606037, "grad_norm": 0.18207839131355286, "learning_rate": 4.7152381159199985e-06, "loss": 0.9801, "step": 39340 }, { "epoch": 0.28484150940664654, "grad_norm": 0.1691935509443283, "learning_rate": 4.715165729259413e-06, "loss": 0.9671, "step": 39350 }, { "epoch": 0.2849138960672327, "grad_norm": 0.16009429097175598, "learning_rate": 4.715093342598826e-06, "loss": 0.9612, "step": 39360 }, { "epoch": 0.2849862827278189, "grad_norm": 0.16721150279045105, "learning_rate": 4.71502095593824e-06, "loss": 0.9633, "step": 39370 }, { "epoch": 0.28505866938840513, "grad_norm": 0.16945244371891022, "learning_rate": 4.714948569277654e-06, "loss": 0.9679, "step": 39380 }, { "epoch": 0.2851310560489913, "grad_norm": 0.16579945385456085, "learning_rate": 4.7148761826170675e-06, "loss": 0.9525, "step": 39390 }, { "epoch": 0.2852034427095775, "grad_norm": 0.17413263022899628, "learning_rate": 4.714803795956481e-06, "loss": 0.9631, "step": 39400 }, { "epoch": 0.28527582937016366, "grad_norm": 0.16317112743854523, "learning_rate": 4.714731409295895e-06, "loss": 0.9699, "step": 39410 }, { "epoch": 0.28534821603074983, "grad_norm": 0.16122910380363464, "learning_rate": 4.714659022635309e-06, "loss": 0.9658, "step": 39420 }, { "epoch": 0.28542060269133607, "grad_norm": 0.17023932933807373, "learning_rate": 4.714586635974723e-06, "loss": 0.9697, "step": 39430 }, { "epoch": 0.28549298935192224, "grad_norm": 0.17641928791999817, "learning_rate": 4.714514249314136e-06, "loss": 0.9823, "step": 39440 }, { "epoch": 0.2855653760125084, "grad_norm": 0.16606158018112183, "learning_rate": 4.71444186265355e-06, "loss": 0.9721, "step": 39450 }, { "epoch": 0.2856377626730946, "grad_norm": 0.1534787118434906, "learning_rate": 4.7143694759929645e-06, "loss": 0.976, "step": 39460 }, { "epoch": 0.2857101493336808, "grad_norm": 0.15776877105236053, "learning_rate": 4.714297089332378e-06, "loss": 0.9708, "step": 39470 }, { "epoch": 0.28578253599426695, "grad_norm": 0.1588585376739502, "learning_rate": 4.714224702671792e-06, "loss": 0.9766, "step": 39480 }, { "epoch": 0.2858549226548532, "grad_norm": 0.16405107080936432, "learning_rate": 4.714152316011205e-06, "loss": 0.9766, "step": 39490 }, { "epoch": 0.28592730931543936, "grad_norm": 0.15826919674873352, "learning_rate": 4.71407992935062e-06, "loss": 0.987, "step": 39500 }, { "epoch": 0.28599969597602554, "grad_norm": 0.166813924908638, "learning_rate": 4.714007542690033e-06, "loss": 0.9616, "step": 39510 }, { "epoch": 0.2860720826366117, "grad_norm": 0.15735673904418945, "learning_rate": 4.713935156029447e-06, "loss": 0.9719, "step": 39520 }, { "epoch": 0.2861444692971979, "grad_norm": 0.16577894985675812, "learning_rate": 4.713862769368861e-06, "loss": 0.9661, "step": 39530 }, { "epoch": 0.2862168559577841, "grad_norm": 0.1636783629655838, "learning_rate": 4.713790382708275e-06, "loss": 0.966, "step": 39540 }, { "epoch": 0.2862892426183703, "grad_norm": 0.16773077845573425, "learning_rate": 4.713717996047689e-06, "loss": 0.9675, "step": 39550 }, { "epoch": 0.2863616292789565, "grad_norm": 0.16528858244419098, "learning_rate": 4.713645609387102e-06, "loss": 0.953, "step": 39560 }, { "epoch": 0.28643401593954265, "grad_norm": 0.178873211145401, "learning_rate": 4.713573222726516e-06, "loss": 0.9768, "step": 39570 }, { "epoch": 0.28650640260012883, "grad_norm": 0.18183675408363342, "learning_rate": 4.71350083606593e-06, "loss": 0.9704, "step": 39580 }, { "epoch": 0.28657878926071506, "grad_norm": 0.16120202839374542, "learning_rate": 4.713428449405344e-06, "loss": 0.9816, "step": 39590 }, { "epoch": 0.28665117592130124, "grad_norm": 0.20635010302066803, "learning_rate": 4.713356062744758e-06, "loss": 0.9583, "step": 39600 }, { "epoch": 0.2867235625818874, "grad_norm": 0.1598854809999466, "learning_rate": 4.713283676084171e-06, "loss": 0.9664, "step": 39610 }, { "epoch": 0.2867959492424736, "grad_norm": 0.16559354960918427, "learning_rate": 4.713211289423586e-06, "loss": 0.9564, "step": 39620 }, { "epoch": 0.28686833590305977, "grad_norm": 0.18220894038677216, "learning_rate": 4.713138902762999e-06, "loss": 0.9688, "step": 39630 }, { "epoch": 0.28694072256364594, "grad_norm": 0.16989223659038544, "learning_rate": 4.713066516102413e-06, "loss": 0.9781, "step": 39640 }, { "epoch": 0.2870131092242322, "grad_norm": 0.16962113976478577, "learning_rate": 4.712994129441827e-06, "loss": 0.9771, "step": 39650 }, { "epoch": 0.28708549588481835, "grad_norm": 0.16623146831989288, "learning_rate": 4.712921742781241e-06, "loss": 0.9551, "step": 39660 }, { "epoch": 0.28715788254540453, "grad_norm": 0.1545698344707489, "learning_rate": 4.712849356120655e-06, "loss": 0.9565, "step": 39670 }, { "epoch": 0.2872302692059907, "grad_norm": 0.19971857964992523, "learning_rate": 4.712776969460068e-06, "loss": 0.9566, "step": 39680 }, { "epoch": 0.2873026558665769, "grad_norm": 0.1566108763217926, "learning_rate": 4.712704582799482e-06, "loss": 0.9696, "step": 39690 }, { "epoch": 0.2873750425271631, "grad_norm": 0.16341611742973328, "learning_rate": 4.712632196138896e-06, "loss": 0.9674, "step": 39700 }, { "epoch": 0.2874474291877493, "grad_norm": 0.18505293130874634, "learning_rate": 4.71255980947831e-06, "loss": 0.9693, "step": 39710 }, { "epoch": 0.28751981584833547, "grad_norm": 0.167031392455101, "learning_rate": 4.712487422817724e-06, "loss": 0.9645, "step": 39720 }, { "epoch": 0.28759220250892165, "grad_norm": 0.17869937419891357, "learning_rate": 4.712415036157137e-06, "loss": 0.9805, "step": 39730 }, { "epoch": 0.2876645891695078, "grad_norm": 0.16606245934963226, "learning_rate": 4.712342649496552e-06, "loss": 0.9612, "step": 39740 }, { "epoch": 0.28773697583009405, "grad_norm": 0.19421431422233582, "learning_rate": 4.712270262835965e-06, "loss": 0.9731, "step": 39750 }, { "epoch": 0.28780936249068023, "grad_norm": 0.17621836066246033, "learning_rate": 4.712197876175379e-06, "loss": 0.9697, "step": 39760 }, { "epoch": 0.2878817491512664, "grad_norm": 0.1548440009355545, "learning_rate": 4.7121254895147925e-06, "loss": 0.9666, "step": 39770 }, { "epoch": 0.2879541358118526, "grad_norm": 0.18306025862693787, "learning_rate": 4.712053102854207e-06, "loss": 0.9628, "step": 39780 }, { "epoch": 0.28802652247243876, "grad_norm": 0.16512498259544373, "learning_rate": 4.711980716193621e-06, "loss": 0.968, "step": 39790 }, { "epoch": 0.28809890913302494, "grad_norm": 0.17831869423389435, "learning_rate": 4.711908329533034e-06, "loss": 0.9739, "step": 39800 }, { "epoch": 0.28817129579361117, "grad_norm": 0.15997859835624695, "learning_rate": 4.711835942872448e-06, "loss": 0.9684, "step": 39810 }, { "epoch": 0.28824368245419735, "grad_norm": 0.1619550734758377, "learning_rate": 4.711763556211862e-06, "loss": 0.9792, "step": 39820 }, { "epoch": 0.2883160691147835, "grad_norm": 0.1704218089580536, "learning_rate": 4.711691169551276e-06, "loss": 0.9676, "step": 39830 }, { "epoch": 0.2883884557753697, "grad_norm": 0.17263592779636383, "learning_rate": 4.7116187828906895e-06, "loss": 0.9774, "step": 39840 }, { "epoch": 0.2884608424359559, "grad_norm": 0.1720539629459381, "learning_rate": 4.711546396230103e-06, "loss": 0.9691, "step": 39850 }, { "epoch": 0.2885332290965421, "grad_norm": 0.2420406937599182, "learning_rate": 4.711474009569517e-06, "loss": 0.9788, "step": 39860 }, { "epoch": 0.2886056157571283, "grad_norm": 0.17428769171237946, "learning_rate": 4.711401622908931e-06, "loss": 0.9727, "step": 39870 }, { "epoch": 0.28867800241771446, "grad_norm": 0.17469775676727295, "learning_rate": 4.711329236248345e-06, "loss": 0.9748, "step": 39880 }, { "epoch": 0.28875038907830064, "grad_norm": 0.17111310362815857, "learning_rate": 4.7112568495877585e-06, "loss": 0.9626, "step": 39890 }, { "epoch": 0.2888227757388868, "grad_norm": 0.19012178480625153, "learning_rate": 4.711184462927172e-06, "loss": 0.9675, "step": 39900 }, { "epoch": 0.28889516239947305, "grad_norm": 0.16587407886981964, "learning_rate": 4.711112076266586e-06, "loss": 0.9665, "step": 39910 }, { "epoch": 0.2889675490600592, "grad_norm": 0.17747098207473755, "learning_rate": 4.711039689605999e-06, "loss": 0.975, "step": 39920 }, { "epoch": 0.2890399357206454, "grad_norm": 0.15955297648906708, "learning_rate": 4.710967302945414e-06, "loss": 0.9633, "step": 39930 }, { "epoch": 0.2891123223812316, "grad_norm": 0.1651538610458374, "learning_rate": 4.710894916284827e-06, "loss": 0.9611, "step": 39940 }, { "epoch": 0.28918470904181776, "grad_norm": 0.15846842527389526, "learning_rate": 4.710822529624241e-06, "loss": 0.9699, "step": 39950 }, { "epoch": 0.289257095702404, "grad_norm": 0.17200906574726105, "learning_rate": 4.710750142963655e-06, "loss": 0.9759, "step": 39960 }, { "epoch": 0.28932948236299016, "grad_norm": 0.18310053646564484, "learning_rate": 4.710677756303069e-06, "loss": 0.9855, "step": 39970 }, { "epoch": 0.28940186902357634, "grad_norm": 0.17805758118629456, "learning_rate": 4.710605369642483e-06, "loss": 0.9754, "step": 39980 }, { "epoch": 0.2894742556841625, "grad_norm": 0.164411723613739, "learning_rate": 4.710532982981896e-06, "loss": 0.9643, "step": 39990 }, { "epoch": 0.2895466423447487, "grad_norm": 0.17421521246433258, "learning_rate": 4.71046059632131e-06, "loss": 0.9709, "step": 40000 }, { "epoch": 0.28961902900533487, "grad_norm": 0.17790335416793823, "learning_rate": 4.710388209660724e-06, "loss": 0.9615, "step": 40010 }, { "epoch": 0.2896914156659211, "grad_norm": 0.17614027857780457, "learning_rate": 4.710315823000138e-06, "loss": 0.972, "step": 40020 }, { "epoch": 0.2897638023265073, "grad_norm": 0.17423102259635925, "learning_rate": 4.710243436339552e-06, "loss": 0.9799, "step": 40030 }, { "epoch": 0.28983618898709346, "grad_norm": 0.16734719276428223, "learning_rate": 4.710171049678965e-06, "loss": 0.9802, "step": 40040 }, { "epoch": 0.28990857564767963, "grad_norm": 0.18581393361091614, "learning_rate": 4.710098663018379e-06, "loss": 0.9651, "step": 40050 }, { "epoch": 0.2899809623082658, "grad_norm": 0.16907867789268494, "learning_rate": 4.710026276357793e-06, "loss": 0.981, "step": 40060 }, { "epoch": 0.29005334896885204, "grad_norm": 0.164125457406044, "learning_rate": 4.709953889697207e-06, "loss": 0.9739, "step": 40070 }, { "epoch": 0.2901257356294382, "grad_norm": 0.1936989575624466, "learning_rate": 4.7098815030366205e-06, "loss": 0.9565, "step": 40080 }, { "epoch": 0.2901981222900244, "grad_norm": 0.16564348340034485, "learning_rate": 4.709809116376034e-06, "loss": 0.9671, "step": 40090 }, { "epoch": 0.2902705089506106, "grad_norm": 0.17586882412433624, "learning_rate": 4.709736729715449e-06, "loss": 0.9709, "step": 40100 }, { "epoch": 0.29034289561119675, "grad_norm": 0.18018198013305664, "learning_rate": 4.709664343054862e-06, "loss": 0.9725, "step": 40110 }, { "epoch": 0.290415282271783, "grad_norm": 0.1636233627796173, "learning_rate": 4.709591956394276e-06, "loss": 0.9762, "step": 40120 }, { "epoch": 0.29048766893236916, "grad_norm": 0.1633533090353012, "learning_rate": 4.7095195697336895e-06, "loss": 0.972, "step": 40130 }, { "epoch": 0.29056005559295534, "grad_norm": 0.18192791938781738, "learning_rate": 4.709447183073104e-06, "loss": 0.9612, "step": 40140 }, { "epoch": 0.2906324422535415, "grad_norm": 0.17899948358535767, "learning_rate": 4.7093747964125176e-06, "loss": 0.9708, "step": 40150 }, { "epoch": 0.2907048289141277, "grad_norm": 0.17250950634479523, "learning_rate": 4.709302409751931e-06, "loss": 0.9764, "step": 40160 }, { "epoch": 0.29077721557471387, "grad_norm": 0.1687665432691574, "learning_rate": 4.709230023091345e-06, "loss": 0.9702, "step": 40170 }, { "epoch": 0.2908496022353001, "grad_norm": 0.1669207513332367, "learning_rate": 4.709157636430759e-06, "loss": 0.9824, "step": 40180 }, { "epoch": 0.2909219888958863, "grad_norm": 0.16087938845157623, "learning_rate": 4.709085249770173e-06, "loss": 0.9607, "step": 40190 }, { "epoch": 0.29099437555647245, "grad_norm": 0.18034091591835022, "learning_rate": 4.7090128631095865e-06, "loss": 0.9671, "step": 40200 }, { "epoch": 0.2910667622170586, "grad_norm": 0.1801455020904541, "learning_rate": 4.708940476449e-06, "loss": 0.975, "step": 40210 }, { "epoch": 0.2911391488776448, "grad_norm": 0.16992124915122986, "learning_rate": 4.7088680897884146e-06, "loss": 0.9597, "step": 40220 }, { "epoch": 0.29121153553823104, "grad_norm": 0.1644294410943985, "learning_rate": 4.708795703127828e-06, "loss": 0.97, "step": 40230 }, { "epoch": 0.2912839221988172, "grad_norm": 0.16996628046035767, "learning_rate": 4.708723316467242e-06, "loss": 0.9661, "step": 40240 }, { "epoch": 0.2913563088594034, "grad_norm": 0.1804681122303009, "learning_rate": 4.708650929806655e-06, "loss": 0.9691, "step": 40250 }, { "epoch": 0.29142869551998957, "grad_norm": 0.1625206172466278, "learning_rate": 4.70857854314607e-06, "loss": 0.9725, "step": 40260 }, { "epoch": 0.29150108218057574, "grad_norm": 0.15384940803050995, "learning_rate": 4.7085061564854835e-06, "loss": 0.9657, "step": 40270 }, { "epoch": 0.291573468841162, "grad_norm": 0.18380320072174072, "learning_rate": 4.708433769824897e-06, "loss": 0.9646, "step": 40280 }, { "epoch": 0.29164585550174815, "grad_norm": 0.17061907052993774, "learning_rate": 4.708361383164311e-06, "loss": 0.9681, "step": 40290 }, { "epoch": 0.29171824216233433, "grad_norm": 0.18094012141227722, "learning_rate": 4.708288996503725e-06, "loss": 0.9638, "step": 40300 }, { "epoch": 0.2917906288229205, "grad_norm": 0.1657949686050415, "learning_rate": 4.708216609843139e-06, "loss": 0.9689, "step": 40310 }, { "epoch": 0.2918630154835067, "grad_norm": 0.28880590200424194, "learning_rate": 4.708144223182552e-06, "loss": 0.9702, "step": 40320 }, { "epoch": 0.29193540214409286, "grad_norm": 0.16735604405403137, "learning_rate": 4.708071836521966e-06, "loss": 0.9845, "step": 40330 }, { "epoch": 0.2920077888046791, "grad_norm": 0.17504291236400604, "learning_rate": 4.7079994498613805e-06, "loss": 0.9572, "step": 40340 }, { "epoch": 0.29208017546526527, "grad_norm": 0.15563970804214478, "learning_rate": 4.707927063200794e-06, "loss": 0.9574, "step": 40350 }, { "epoch": 0.29215256212585144, "grad_norm": 0.15227772295475006, "learning_rate": 4.707854676540208e-06, "loss": 0.9612, "step": 40360 }, { "epoch": 0.2922249487864376, "grad_norm": 0.16515760123729706, "learning_rate": 4.707782289879621e-06, "loss": 0.9519, "step": 40370 }, { "epoch": 0.2922973354470238, "grad_norm": 0.17312021553516388, "learning_rate": 4.707709903219036e-06, "loss": 0.9707, "step": 40380 }, { "epoch": 0.29236972210761003, "grad_norm": 0.16830036044120789, "learning_rate": 4.7076375165584494e-06, "loss": 0.9557, "step": 40390 }, { "epoch": 0.2924421087681962, "grad_norm": 0.15725110471248627, "learning_rate": 4.707565129897863e-06, "loss": 0.9652, "step": 40400 }, { "epoch": 0.2925144954287824, "grad_norm": 0.16694726049900055, "learning_rate": 4.707492743237277e-06, "loss": 0.9682, "step": 40410 }, { "epoch": 0.29258688208936856, "grad_norm": 0.16128145158290863, "learning_rate": 4.70742035657669e-06, "loss": 0.9658, "step": 40420 }, { "epoch": 0.29265926874995474, "grad_norm": 0.16756875813007355, "learning_rate": 4.707347969916104e-06, "loss": 0.9768, "step": 40430 }, { "epoch": 0.29273165541054097, "grad_norm": 0.1622963845729828, "learning_rate": 4.7072755832555175e-06, "loss": 0.9754, "step": 40440 }, { "epoch": 0.29280404207112715, "grad_norm": 0.15665726363658905, "learning_rate": 4.707203196594932e-06, "loss": 0.9553, "step": 40450 }, { "epoch": 0.2928764287317133, "grad_norm": 0.17446359992027283, "learning_rate": 4.707130809934346e-06, "loss": 0.9724, "step": 40460 }, { "epoch": 0.2929488153922995, "grad_norm": 0.16420406103134155, "learning_rate": 4.707058423273759e-06, "loss": 0.972, "step": 40470 }, { "epoch": 0.2930212020528857, "grad_norm": 0.1752181053161621, "learning_rate": 4.706986036613173e-06, "loss": 0.9645, "step": 40480 }, { "epoch": 0.2930935887134719, "grad_norm": 0.16292783617973328, "learning_rate": 4.706913649952587e-06, "loss": 0.9608, "step": 40490 }, { "epoch": 0.2931659753740581, "grad_norm": 0.15894421935081482, "learning_rate": 4.706841263292001e-06, "loss": 0.955, "step": 40500 }, { "epoch": 0.29323836203464426, "grad_norm": 0.1527421772480011, "learning_rate": 4.7067688766314145e-06, "loss": 0.9607, "step": 40510 }, { "epoch": 0.29331074869523044, "grad_norm": 0.1576659232378006, "learning_rate": 4.706696489970828e-06, "loss": 0.9614, "step": 40520 }, { "epoch": 0.2933831353558166, "grad_norm": 0.1652728021144867, "learning_rate": 4.706624103310243e-06, "loss": 0.9544, "step": 40530 }, { "epoch": 0.2934555220164028, "grad_norm": 0.16338014602661133, "learning_rate": 4.706551716649656e-06, "loss": 0.9539, "step": 40540 }, { "epoch": 0.293527908676989, "grad_norm": 0.16020265221595764, "learning_rate": 4.70647932998907e-06, "loss": 0.9732, "step": 40550 }, { "epoch": 0.2936002953375752, "grad_norm": 0.17139659821987152, "learning_rate": 4.7064069433284834e-06, "loss": 0.9645, "step": 40560 }, { "epoch": 0.2936726819981614, "grad_norm": 0.1736135184764862, "learning_rate": 4.706334556667898e-06, "loss": 0.9746, "step": 40570 }, { "epoch": 0.29374506865874755, "grad_norm": 0.16610664129257202, "learning_rate": 4.7062621700073115e-06, "loss": 0.9673, "step": 40580 }, { "epoch": 0.29381745531933373, "grad_norm": 0.195650115609169, "learning_rate": 4.706189783346725e-06, "loss": 0.9613, "step": 40590 }, { "epoch": 0.29388984197991996, "grad_norm": 0.1645543873310089, "learning_rate": 4.706117396686139e-06, "loss": 0.9577, "step": 40600 }, { "epoch": 0.29396222864050614, "grad_norm": 0.17229747772216797, "learning_rate": 4.706045010025553e-06, "loss": 0.9701, "step": 40610 }, { "epoch": 0.2940346153010923, "grad_norm": 0.18451061844825745, "learning_rate": 4.705972623364967e-06, "loss": 0.9746, "step": 40620 }, { "epoch": 0.2941070019616785, "grad_norm": 0.16431106626987457, "learning_rate": 4.7059002367043805e-06, "loss": 0.9575, "step": 40630 }, { "epoch": 0.29417938862226467, "grad_norm": 0.15613338351249695, "learning_rate": 4.705827850043794e-06, "loss": 0.9684, "step": 40640 }, { "epoch": 0.2942517752828509, "grad_norm": 0.1578802615404129, "learning_rate": 4.705755463383208e-06, "loss": 0.9641, "step": 40650 }, { "epoch": 0.2943241619434371, "grad_norm": 0.1636672019958496, "learning_rate": 4.705683076722622e-06, "loss": 0.9686, "step": 40660 }, { "epoch": 0.29439654860402326, "grad_norm": 0.2066519856452942, "learning_rate": 4.705610690062036e-06, "loss": 0.9649, "step": 40670 }, { "epoch": 0.29446893526460943, "grad_norm": 0.16676735877990723, "learning_rate": 4.705538303401449e-06, "loss": 0.9666, "step": 40680 }, { "epoch": 0.2945413219251956, "grad_norm": 0.17513099312782288, "learning_rate": 4.705465916740863e-06, "loss": 0.9768, "step": 40690 }, { "epoch": 0.2946137085857818, "grad_norm": 0.17959053814411163, "learning_rate": 4.7053935300802775e-06, "loss": 0.9748, "step": 40700 }, { "epoch": 0.294686095246368, "grad_norm": 0.16386069357395172, "learning_rate": 4.705321143419691e-06, "loss": 0.9679, "step": 40710 }, { "epoch": 0.2947584819069542, "grad_norm": 0.16159318387508392, "learning_rate": 4.705248756759105e-06, "loss": 0.9673, "step": 40720 }, { "epoch": 0.29483086856754037, "grad_norm": 0.16015946865081787, "learning_rate": 4.705176370098518e-06, "loss": 0.9542, "step": 40730 }, { "epoch": 0.29490325522812655, "grad_norm": 0.16827359795570374, "learning_rate": 4.705103983437933e-06, "loss": 0.973, "step": 40740 }, { "epoch": 0.2949756418887127, "grad_norm": 0.16736365854740143, "learning_rate": 4.705031596777346e-06, "loss": 0.9778, "step": 40750 }, { "epoch": 0.29504802854929896, "grad_norm": 0.17061574757099152, "learning_rate": 4.70495921011676e-06, "loss": 0.9558, "step": 40760 }, { "epoch": 0.29512041520988513, "grad_norm": 0.16562294960021973, "learning_rate": 4.704886823456174e-06, "loss": 0.9805, "step": 40770 }, { "epoch": 0.2951928018704713, "grad_norm": 0.17279978096485138, "learning_rate": 4.704814436795588e-06, "loss": 0.9656, "step": 40780 }, { "epoch": 0.2952651885310575, "grad_norm": 0.20112721621990204, "learning_rate": 4.704742050135002e-06, "loss": 0.96, "step": 40790 }, { "epoch": 0.29533757519164366, "grad_norm": 0.1690005213022232, "learning_rate": 4.704669663474415e-06, "loss": 0.961, "step": 40800 }, { "epoch": 0.2954099618522299, "grad_norm": 0.16406698524951935, "learning_rate": 4.704597276813829e-06, "loss": 0.9705, "step": 40810 }, { "epoch": 0.2954823485128161, "grad_norm": 0.16599249839782715, "learning_rate": 4.704524890153243e-06, "loss": 0.9577, "step": 40820 }, { "epoch": 0.29555473517340225, "grad_norm": 0.19940651953220367, "learning_rate": 4.704452503492657e-06, "loss": 0.9678, "step": 40830 }, { "epoch": 0.2956271218339884, "grad_norm": 0.1629924476146698, "learning_rate": 4.704380116832071e-06, "loss": 0.9655, "step": 40840 }, { "epoch": 0.2956995084945746, "grad_norm": 0.17306756973266602, "learning_rate": 4.704307730171484e-06, "loss": 0.974, "step": 40850 }, { "epoch": 0.2957718951551608, "grad_norm": 0.15905143320560455, "learning_rate": 4.704235343510899e-06, "loss": 0.9605, "step": 40860 }, { "epoch": 0.295844281815747, "grad_norm": 0.172456756234169, "learning_rate": 4.704162956850312e-06, "loss": 0.9663, "step": 40870 }, { "epoch": 0.2959166684763332, "grad_norm": 0.16704027354717255, "learning_rate": 4.704090570189726e-06, "loss": 0.9636, "step": 40880 }, { "epoch": 0.29598905513691937, "grad_norm": 0.17687085270881653, "learning_rate": 4.7040181835291396e-06, "loss": 0.9696, "step": 40890 }, { "epoch": 0.29606144179750554, "grad_norm": 0.16065765917301178, "learning_rate": 4.703945796868554e-06, "loss": 0.9753, "step": 40900 }, { "epoch": 0.2961338284580917, "grad_norm": 0.2607700526714325, "learning_rate": 4.703873410207968e-06, "loss": 0.9674, "step": 40910 }, { "epoch": 0.29620621511867795, "grad_norm": 0.1637168526649475, "learning_rate": 4.703801023547381e-06, "loss": 0.9579, "step": 40920 }, { "epoch": 0.29627860177926413, "grad_norm": 0.16693775355815887, "learning_rate": 4.703728636886795e-06, "loss": 0.9653, "step": 40930 }, { "epoch": 0.2963509884398503, "grad_norm": 0.16066871583461761, "learning_rate": 4.703656250226209e-06, "loss": 0.9694, "step": 40940 }, { "epoch": 0.2964233751004365, "grad_norm": 0.16873207688331604, "learning_rate": 4.703583863565622e-06, "loss": 0.9804, "step": 40950 }, { "epoch": 0.29649576176102266, "grad_norm": 0.17132726311683655, "learning_rate": 4.703511476905036e-06, "loss": 0.9662, "step": 40960 }, { "epoch": 0.2965681484216089, "grad_norm": 0.16967126727104187, "learning_rate": 4.70343909024445e-06, "loss": 0.9615, "step": 40970 }, { "epoch": 0.29664053508219507, "grad_norm": 0.16376537084579468, "learning_rate": 4.703366703583864e-06, "loss": 0.9654, "step": 40980 }, { "epoch": 0.29671292174278124, "grad_norm": 0.18798090517520905, "learning_rate": 4.703294316923277e-06, "loss": 0.9677, "step": 40990 }, { "epoch": 0.2967853084033674, "grad_norm": 0.1704019010066986, "learning_rate": 4.703221930262691e-06, "loss": 0.9697, "step": 41000 }, { "epoch": 0.2968576950639536, "grad_norm": 0.15598341822624207, "learning_rate": 4.7031495436021055e-06, "loss": 0.9642, "step": 41010 }, { "epoch": 0.29693008172453983, "grad_norm": 0.17868080735206604, "learning_rate": 4.703077156941519e-06, "loss": 0.9692, "step": 41020 }, { "epoch": 0.297002468385126, "grad_norm": 0.16686975955963135, "learning_rate": 4.703004770280933e-06, "loss": 0.9695, "step": 41030 }, { "epoch": 0.2970748550457122, "grad_norm": 0.16504326462745667, "learning_rate": 4.702932383620346e-06, "loss": 0.9697, "step": 41040 }, { "epoch": 0.29714724170629836, "grad_norm": 0.17151391506195068, "learning_rate": 4.702859996959761e-06, "loss": 0.9686, "step": 41050 }, { "epoch": 0.29721962836688454, "grad_norm": 0.16817405819892883, "learning_rate": 4.702787610299174e-06, "loss": 0.9692, "step": 41060 }, { "epoch": 0.2972920150274707, "grad_norm": 0.1832583099603653, "learning_rate": 4.702715223638588e-06, "loss": 0.9846, "step": 41070 }, { "epoch": 0.29736440168805695, "grad_norm": 0.171138733625412, "learning_rate": 4.702642836978002e-06, "loss": 0.9749, "step": 41080 }, { "epoch": 0.2974367883486431, "grad_norm": 0.15057332813739777, "learning_rate": 4.702570450317416e-06, "loss": 0.9668, "step": 41090 }, { "epoch": 0.2975091750092293, "grad_norm": 0.16757220029830933, "learning_rate": 4.70249806365683e-06, "loss": 0.9772, "step": 41100 }, { "epoch": 0.2975815616698155, "grad_norm": 0.20239600539207458, "learning_rate": 4.702425676996243e-06, "loss": 0.9588, "step": 41110 }, { "epoch": 0.29765394833040165, "grad_norm": 0.17006611824035645, "learning_rate": 4.702353290335657e-06, "loss": 0.9626, "step": 41120 }, { "epoch": 0.2977263349909879, "grad_norm": 0.17428216338157654, "learning_rate": 4.7022809036750714e-06, "loss": 0.963, "step": 41130 }, { "epoch": 0.29779872165157406, "grad_norm": 0.16272498667240143, "learning_rate": 4.702208517014485e-06, "loss": 0.9712, "step": 41140 }, { "epoch": 0.29787110831216024, "grad_norm": 0.15650291740894318, "learning_rate": 4.702136130353899e-06, "loss": 0.9564, "step": 41150 }, { "epoch": 0.2979434949727464, "grad_norm": 0.17676356434822083, "learning_rate": 4.702063743693312e-06, "loss": 0.9722, "step": 41160 }, { "epoch": 0.2980158816333326, "grad_norm": 0.17107968032360077, "learning_rate": 4.701991357032727e-06, "loss": 0.9638, "step": 41170 }, { "epoch": 0.2980882682939188, "grad_norm": 0.16407465934753418, "learning_rate": 4.70191897037214e-06, "loss": 0.9646, "step": 41180 }, { "epoch": 0.298160654954505, "grad_norm": 0.1607753485441208, "learning_rate": 4.701846583711554e-06, "loss": 0.9712, "step": 41190 }, { "epoch": 0.2982330416150912, "grad_norm": 0.1688770204782486, "learning_rate": 4.701774197050968e-06, "loss": 0.9638, "step": 41200 }, { "epoch": 0.29830542827567735, "grad_norm": 0.16739125549793243, "learning_rate": 4.701701810390382e-06, "loss": 0.9555, "step": 41210 }, { "epoch": 0.29837781493626353, "grad_norm": 0.17620337009429932, "learning_rate": 4.701629423729796e-06, "loss": 0.9672, "step": 41220 }, { "epoch": 0.2984502015968497, "grad_norm": 0.16256146132946014, "learning_rate": 4.701557037069209e-06, "loss": 0.9719, "step": 41230 }, { "epoch": 0.29852258825743594, "grad_norm": 0.17721201479434967, "learning_rate": 4.701484650408623e-06, "loss": 0.9726, "step": 41240 }, { "epoch": 0.2985949749180221, "grad_norm": 0.1829133778810501, "learning_rate": 4.701412263748037e-06, "loss": 0.9632, "step": 41250 }, { "epoch": 0.2986673615786083, "grad_norm": 0.15692444145679474, "learning_rate": 4.701339877087451e-06, "loss": 0.9641, "step": 41260 }, { "epoch": 0.29873974823919447, "grad_norm": 0.1705268919467926, "learning_rate": 4.701267490426865e-06, "loss": 0.9684, "step": 41270 }, { "epoch": 0.29881213489978065, "grad_norm": 0.16059453785419464, "learning_rate": 4.701195103766278e-06, "loss": 0.9658, "step": 41280 }, { "epoch": 0.2988845215603669, "grad_norm": 0.16799260675907135, "learning_rate": 4.701122717105692e-06, "loss": 0.9679, "step": 41290 }, { "epoch": 0.29895690822095305, "grad_norm": 0.15362030267715454, "learning_rate": 4.701050330445106e-06, "loss": 0.9547, "step": 41300 }, { "epoch": 0.29902929488153923, "grad_norm": 0.15962745249271393, "learning_rate": 4.70097794378452e-06, "loss": 0.9745, "step": 41310 }, { "epoch": 0.2991016815421254, "grad_norm": 0.17490293085575104, "learning_rate": 4.7009055571239335e-06, "loss": 0.965, "step": 41320 }, { "epoch": 0.2991740682027116, "grad_norm": 0.1677936166524887, "learning_rate": 4.700833170463347e-06, "loss": 0.9546, "step": 41330 }, { "epoch": 0.2992464548632978, "grad_norm": 0.16804441809654236, "learning_rate": 4.700760783802762e-06, "loss": 0.9712, "step": 41340 }, { "epoch": 0.299318841523884, "grad_norm": 0.21531559526920319, "learning_rate": 4.700688397142175e-06, "loss": 0.9689, "step": 41350 }, { "epoch": 0.29939122818447017, "grad_norm": 0.18383803963661194, "learning_rate": 4.700616010481589e-06, "loss": 0.9736, "step": 41360 }, { "epoch": 0.29946361484505635, "grad_norm": 0.17313213646411896, "learning_rate": 4.7005436238210024e-06, "loss": 0.9589, "step": 41370 }, { "epoch": 0.2995360015056425, "grad_norm": 0.18341746926307678, "learning_rate": 4.700471237160417e-06, "loss": 0.9629, "step": 41380 }, { "epoch": 0.2996083881662287, "grad_norm": 0.1760926991701126, "learning_rate": 4.7003988504998305e-06, "loss": 0.9681, "step": 41390 }, { "epoch": 0.29968077482681493, "grad_norm": 0.18593202531337738, "learning_rate": 4.700326463839244e-06, "loss": 0.9667, "step": 41400 }, { "epoch": 0.2997531614874011, "grad_norm": 0.16508474946022034, "learning_rate": 4.700254077178658e-06, "loss": 0.9772, "step": 41410 }, { "epoch": 0.2998255481479873, "grad_norm": 0.16628380119800568, "learning_rate": 4.700181690518072e-06, "loss": 0.9648, "step": 41420 }, { "epoch": 0.29989793480857346, "grad_norm": 0.19085149466991425, "learning_rate": 4.700109303857486e-06, "loss": 0.9726, "step": 41430 }, { "epoch": 0.29997032146915964, "grad_norm": 0.16664601862430573, "learning_rate": 4.7000369171968995e-06, "loss": 0.9628, "step": 41440 }, { "epoch": 0.30004270812974587, "grad_norm": 0.17423871159553528, "learning_rate": 4.699964530536313e-06, "loss": 0.966, "step": 41450 }, { "epoch": 0.30011509479033205, "grad_norm": 0.18593478202819824, "learning_rate": 4.6998921438757275e-06, "loss": 0.9664, "step": 41460 }, { "epoch": 0.3001874814509182, "grad_norm": 0.19175831973552704, "learning_rate": 4.699819757215141e-06, "loss": 0.9789, "step": 41470 }, { "epoch": 0.3002598681115044, "grad_norm": 0.15855441987514496, "learning_rate": 4.699747370554554e-06, "loss": 0.9516, "step": 41480 }, { "epoch": 0.3003322547720906, "grad_norm": 0.2066076695919037, "learning_rate": 4.699674983893968e-06, "loss": 0.9594, "step": 41490 }, { "epoch": 0.3004046414326768, "grad_norm": 0.9006432890892029, "learning_rate": 4.699602597233382e-06, "loss": 0.9761, "step": 41500 }, { "epoch": 0.300477028093263, "grad_norm": 0.17038564383983612, "learning_rate": 4.699530210572796e-06, "loss": 0.9686, "step": 41510 }, { "epoch": 0.30054941475384916, "grad_norm": 0.16303139925003052, "learning_rate": 4.699457823912209e-06, "loss": 0.9774, "step": 41520 }, { "epoch": 0.30062180141443534, "grad_norm": 0.16983075439929962, "learning_rate": 4.699385437251624e-06, "loss": 0.9731, "step": 41530 }, { "epoch": 0.3006941880750215, "grad_norm": 0.17250801622867584, "learning_rate": 4.699313050591037e-06, "loss": 0.9432, "step": 41540 }, { "epoch": 0.3007665747356077, "grad_norm": 0.16830387711524963, "learning_rate": 4.699240663930451e-06, "loss": 0.964, "step": 41550 }, { "epoch": 0.3008389613961939, "grad_norm": 0.16739524900913239, "learning_rate": 4.6991682772698645e-06, "loss": 0.9785, "step": 41560 }, { "epoch": 0.3009113480567801, "grad_norm": 0.17023219168186188, "learning_rate": 4.699095890609279e-06, "loss": 0.9729, "step": 41570 }, { "epoch": 0.3009837347173663, "grad_norm": 0.17168667912483215, "learning_rate": 4.699023503948693e-06, "loss": 0.961, "step": 41580 }, { "epoch": 0.30105612137795246, "grad_norm": 0.16515317559242249, "learning_rate": 4.698951117288106e-06, "loss": 0.9731, "step": 41590 }, { "epoch": 0.30112850803853863, "grad_norm": 0.16387787461280823, "learning_rate": 4.69887873062752e-06, "loss": 0.9587, "step": 41600 }, { "epoch": 0.30120089469912487, "grad_norm": 0.18492285907268524, "learning_rate": 4.698806343966934e-06, "loss": 0.9635, "step": 41610 }, { "epoch": 0.30127328135971104, "grad_norm": 0.15610884130001068, "learning_rate": 4.698733957306348e-06, "loss": 0.9579, "step": 41620 }, { "epoch": 0.3013456680202972, "grad_norm": 0.1656864881515503, "learning_rate": 4.6986615706457616e-06, "loss": 0.9637, "step": 41630 }, { "epoch": 0.3014180546808834, "grad_norm": 0.16656151413917542, "learning_rate": 4.698589183985175e-06, "loss": 0.9704, "step": 41640 }, { "epoch": 0.3014904413414696, "grad_norm": 0.163262277841568, "learning_rate": 4.69851679732459e-06, "loss": 0.954, "step": 41650 }, { "epoch": 0.3015628280020558, "grad_norm": 0.17654089629650116, "learning_rate": 4.698444410664003e-06, "loss": 0.981, "step": 41660 }, { "epoch": 0.301635214662642, "grad_norm": 0.16009564697742462, "learning_rate": 4.698372024003417e-06, "loss": 0.964, "step": 41670 }, { "epoch": 0.30170760132322816, "grad_norm": 0.17271243035793304, "learning_rate": 4.6982996373428305e-06, "loss": 0.9637, "step": 41680 }, { "epoch": 0.30177998798381434, "grad_norm": 0.16152308881282806, "learning_rate": 4.698227250682245e-06, "loss": 0.9601, "step": 41690 }, { "epoch": 0.3018523746444005, "grad_norm": 0.16311658918857574, "learning_rate": 4.6981548640216586e-06, "loss": 0.9648, "step": 41700 }, { "epoch": 0.30192476130498674, "grad_norm": 0.17179886996746063, "learning_rate": 4.698082477361072e-06, "loss": 0.9636, "step": 41710 }, { "epoch": 0.3019971479655729, "grad_norm": 0.16331078112125397, "learning_rate": 4.698010090700486e-06, "loss": 0.9694, "step": 41720 }, { "epoch": 0.3020695346261591, "grad_norm": 0.16688616573810577, "learning_rate": 4.6979377040399e-06, "loss": 0.9646, "step": 41730 }, { "epoch": 0.3021419212867453, "grad_norm": 0.19026783108711243, "learning_rate": 4.697865317379314e-06, "loss": 0.9706, "step": 41740 }, { "epoch": 0.30221430794733145, "grad_norm": 0.169419065117836, "learning_rate": 4.6977929307187275e-06, "loss": 0.9559, "step": 41750 }, { "epoch": 0.3022866946079176, "grad_norm": 0.1800675243139267, "learning_rate": 4.697720544058141e-06, "loss": 0.9822, "step": 41760 }, { "epoch": 0.30235908126850386, "grad_norm": 0.15072910487651825, "learning_rate": 4.6976481573975556e-06, "loss": 0.962, "step": 41770 }, { "epoch": 0.30243146792909004, "grad_norm": 0.15950879454612732, "learning_rate": 4.697575770736969e-06, "loss": 0.9697, "step": 41780 }, { "epoch": 0.3025038545896762, "grad_norm": 0.1841602772474289, "learning_rate": 4.697503384076383e-06, "loss": 0.9731, "step": 41790 }, { "epoch": 0.3025762412502624, "grad_norm": 0.22333543002605438, "learning_rate": 4.697430997415796e-06, "loss": 0.9594, "step": 41800 }, { "epoch": 0.30264862791084857, "grad_norm": 0.17645373940467834, "learning_rate": 4.697358610755211e-06, "loss": 0.9595, "step": 41810 }, { "epoch": 0.3027210145714348, "grad_norm": 0.16420337557792664, "learning_rate": 4.6972862240946245e-06, "loss": 0.9829, "step": 41820 }, { "epoch": 0.302793401232021, "grad_norm": 0.1616903692483902, "learning_rate": 4.697213837434038e-06, "loss": 0.9627, "step": 41830 }, { "epoch": 0.30286578789260715, "grad_norm": 0.1770700216293335, "learning_rate": 4.697141450773452e-06, "loss": 0.9546, "step": 41840 }, { "epoch": 0.30293817455319333, "grad_norm": 0.16673099994659424, "learning_rate": 4.697069064112866e-06, "loss": 0.972, "step": 41850 }, { "epoch": 0.3030105612137795, "grad_norm": 0.15225054323673248, "learning_rate": 4.69699667745228e-06, "loss": 0.9707, "step": 41860 }, { "epoch": 0.30308294787436574, "grad_norm": 0.1643424779176712, "learning_rate": 4.6969242907916934e-06, "loss": 0.9601, "step": 41870 }, { "epoch": 0.3031553345349519, "grad_norm": 0.20740066468715668, "learning_rate": 4.696851904131107e-06, "loss": 0.962, "step": 41880 }, { "epoch": 0.3032277211955381, "grad_norm": 0.17226648330688477, "learning_rate": 4.696779517470521e-06, "loss": 0.9739, "step": 41890 }, { "epoch": 0.30330010785612427, "grad_norm": 0.17776334285736084, "learning_rate": 4.696707130809935e-06, "loss": 0.979, "step": 41900 }, { "epoch": 0.30337249451671044, "grad_norm": 0.15923330187797546, "learning_rate": 4.696634744149349e-06, "loss": 0.9613, "step": 41910 }, { "epoch": 0.3034448811772966, "grad_norm": 0.1711985170841217, "learning_rate": 4.696562357488762e-06, "loss": 0.9588, "step": 41920 }, { "epoch": 0.30351726783788285, "grad_norm": 0.1879815310239792, "learning_rate": 4.696489970828176e-06, "loss": 0.9679, "step": 41930 }, { "epoch": 0.30358965449846903, "grad_norm": 0.17883126437664032, "learning_rate": 4.6964175841675904e-06, "loss": 0.9755, "step": 41940 }, { "epoch": 0.3036620411590552, "grad_norm": 0.16554105281829834, "learning_rate": 4.696345197507004e-06, "loss": 0.9611, "step": 41950 }, { "epoch": 0.3037344278196414, "grad_norm": 0.1649336963891983, "learning_rate": 4.696272810846418e-06, "loss": 0.9611, "step": 41960 }, { "epoch": 0.30380681448022756, "grad_norm": 0.15734368562698364, "learning_rate": 4.696200424185831e-06, "loss": 0.952, "step": 41970 }, { "epoch": 0.3038792011408138, "grad_norm": 0.17791064083576202, "learning_rate": 4.696128037525246e-06, "loss": 0.9507, "step": 41980 }, { "epoch": 0.30395158780139997, "grad_norm": 0.18303173780441284, "learning_rate": 4.696055650864659e-06, "loss": 0.9595, "step": 41990 }, { "epoch": 0.30402397446198615, "grad_norm": 0.1724705994129181, "learning_rate": 4.695983264204073e-06, "loss": 0.9704, "step": 42000 }, { "epoch": 0.3040963611225723, "grad_norm": 0.164495587348938, "learning_rate": 4.695910877543487e-06, "loss": 0.9598, "step": 42010 }, { "epoch": 0.3041687477831585, "grad_norm": 0.16240546107292175, "learning_rate": 4.6958384908829e-06, "loss": 0.9621, "step": 42020 }, { "epoch": 0.30424113444374473, "grad_norm": 0.17704802751541138, "learning_rate": 4.695766104222314e-06, "loss": 0.9589, "step": 42030 }, { "epoch": 0.3043135211043309, "grad_norm": 0.17840299010276794, "learning_rate": 4.695693717561728e-06, "loss": 0.9773, "step": 42040 }, { "epoch": 0.3043859077649171, "grad_norm": 0.18059472739696503, "learning_rate": 4.695621330901142e-06, "loss": 0.9743, "step": 42050 }, { "epoch": 0.30445829442550326, "grad_norm": 0.18101860582828522, "learning_rate": 4.6955489442405555e-06, "loss": 0.9618, "step": 42060 }, { "epoch": 0.30453068108608944, "grad_norm": 0.16693522036075592, "learning_rate": 4.695476557579969e-06, "loss": 0.9533, "step": 42070 }, { "epoch": 0.3046030677466756, "grad_norm": 0.1905452311038971, "learning_rate": 4.695404170919383e-06, "loss": 0.9576, "step": 42080 }, { "epoch": 0.30467545440726185, "grad_norm": 0.16793328523635864, "learning_rate": 4.695331784258797e-06, "loss": 0.9592, "step": 42090 }, { "epoch": 0.304747841067848, "grad_norm": 0.17872589826583862, "learning_rate": 4.695259397598211e-06, "loss": 0.9747, "step": 42100 }, { "epoch": 0.3048202277284342, "grad_norm": 0.1794547736644745, "learning_rate": 4.6951870109376244e-06, "loss": 0.951, "step": 42110 }, { "epoch": 0.3048926143890204, "grad_norm": 0.1795600950717926, "learning_rate": 4.695114624277038e-06, "loss": 0.9495, "step": 42120 }, { "epoch": 0.30496500104960655, "grad_norm": 0.16469037532806396, "learning_rate": 4.6950422376164525e-06, "loss": 0.9677, "step": 42130 }, { "epoch": 0.3050373877101928, "grad_norm": 0.15928620100021362, "learning_rate": 4.694969850955866e-06, "loss": 0.9597, "step": 42140 }, { "epoch": 0.30510977437077896, "grad_norm": 0.17253397405147552, "learning_rate": 4.69489746429528e-06, "loss": 0.9603, "step": 42150 }, { "epoch": 0.30518216103136514, "grad_norm": 0.1894710212945938, "learning_rate": 4.694825077634693e-06, "loss": 0.9537, "step": 42160 }, { "epoch": 0.3052545476919513, "grad_norm": 0.16059796512126923, "learning_rate": 4.694752690974108e-06, "loss": 0.9595, "step": 42170 }, { "epoch": 0.3053269343525375, "grad_norm": 0.156858429312706, "learning_rate": 4.6946803043135215e-06, "loss": 0.9604, "step": 42180 }, { "epoch": 0.3053993210131237, "grad_norm": 0.17593984305858612, "learning_rate": 4.694607917652935e-06, "loss": 0.955, "step": 42190 }, { "epoch": 0.3054717076737099, "grad_norm": 0.17821641266345978, "learning_rate": 4.694535530992349e-06, "loss": 0.959, "step": 42200 }, { "epoch": 0.3055440943342961, "grad_norm": 0.25540754199028015, "learning_rate": 4.694463144331763e-06, "loss": 0.9672, "step": 42210 }, { "epoch": 0.30561648099488226, "grad_norm": 0.19185206294059753, "learning_rate": 4.694390757671177e-06, "loss": 0.9605, "step": 42220 }, { "epoch": 0.30568886765546843, "grad_norm": 0.21133938431739807, "learning_rate": 4.69431837101059e-06, "loss": 0.9783, "step": 42230 }, { "epoch": 0.30576125431605466, "grad_norm": 0.17313557863235474, "learning_rate": 4.694245984350004e-06, "loss": 0.964, "step": 42240 }, { "epoch": 0.30583364097664084, "grad_norm": 0.18542583286762238, "learning_rate": 4.6941735976894185e-06, "loss": 0.9428, "step": 42250 }, { "epoch": 0.305906027637227, "grad_norm": 0.17269088327884674, "learning_rate": 4.694101211028832e-06, "loss": 0.974, "step": 42260 }, { "epoch": 0.3059784142978132, "grad_norm": 0.16022367775440216, "learning_rate": 4.694028824368246e-06, "loss": 0.9644, "step": 42270 }, { "epoch": 0.30605080095839937, "grad_norm": 0.17748059332370758, "learning_rate": 4.693956437707659e-06, "loss": 0.9604, "step": 42280 }, { "epoch": 0.30612318761898555, "grad_norm": 0.16204720735549927, "learning_rate": 4.693884051047074e-06, "loss": 0.9672, "step": 42290 }, { "epoch": 0.3061955742795718, "grad_norm": 0.16240718960762024, "learning_rate": 4.693811664386487e-06, "loss": 0.9703, "step": 42300 }, { "epoch": 0.30626796094015796, "grad_norm": 0.1833665817975998, "learning_rate": 4.693739277725901e-06, "loss": 0.9671, "step": 42310 }, { "epoch": 0.30634034760074413, "grad_norm": 0.16456997394561768, "learning_rate": 4.693666891065315e-06, "loss": 0.9566, "step": 42320 }, { "epoch": 0.3064127342613303, "grad_norm": 0.1582067459821701, "learning_rate": 4.693594504404729e-06, "loss": 0.9696, "step": 42330 }, { "epoch": 0.3064851209219165, "grad_norm": 0.16254618763923645, "learning_rate": 4.693522117744143e-06, "loss": 0.9624, "step": 42340 }, { "epoch": 0.3065575075825027, "grad_norm": 0.22306321561336517, "learning_rate": 4.693449731083556e-06, "loss": 0.9663, "step": 42350 }, { "epoch": 0.3066298942430889, "grad_norm": 0.1735800951719284, "learning_rate": 4.69337734442297e-06, "loss": 0.9727, "step": 42360 }, { "epoch": 0.3067022809036751, "grad_norm": 0.16660434007644653, "learning_rate": 4.693304957762384e-06, "loss": 0.9558, "step": 42370 }, { "epoch": 0.30677466756426125, "grad_norm": 0.1673038899898529, "learning_rate": 4.693232571101798e-06, "loss": 0.9772, "step": 42380 }, { "epoch": 0.3068470542248474, "grad_norm": 0.1805497705936432, "learning_rate": 4.693160184441212e-06, "loss": 0.9679, "step": 42390 }, { "epoch": 0.30691944088543366, "grad_norm": 0.16256369650363922, "learning_rate": 4.693087797780625e-06, "loss": 0.9636, "step": 42400 }, { "epoch": 0.30699182754601984, "grad_norm": 0.16821902990341187, "learning_rate": 4.69301541112004e-06, "loss": 0.9604, "step": 42410 }, { "epoch": 0.307064214206606, "grad_norm": 0.170756995677948, "learning_rate": 4.692943024459453e-06, "loss": 0.9493, "step": 42420 }, { "epoch": 0.3071366008671922, "grad_norm": 0.1680414229631424, "learning_rate": 4.692870637798867e-06, "loss": 0.9669, "step": 42430 }, { "epoch": 0.30720898752777837, "grad_norm": 0.17271284759044647, "learning_rate": 4.6927982511382806e-06, "loss": 0.9572, "step": 42440 }, { "epoch": 0.30728137418836454, "grad_norm": 0.20150049030780792, "learning_rate": 4.692725864477695e-06, "loss": 0.976, "step": 42450 }, { "epoch": 0.3073537608489508, "grad_norm": 0.16273252665996552, "learning_rate": 4.692653477817109e-06, "loss": 0.9646, "step": 42460 }, { "epoch": 0.30742614750953695, "grad_norm": 0.17150864005088806, "learning_rate": 4.692581091156522e-06, "loss": 0.9734, "step": 42470 }, { "epoch": 0.30749853417012313, "grad_norm": 0.16339828073978424, "learning_rate": 4.692508704495936e-06, "loss": 0.959, "step": 42480 }, { "epoch": 0.3075709208307093, "grad_norm": 0.17035742104053497, "learning_rate": 4.69243631783535e-06, "loss": 0.9637, "step": 42490 }, { "epoch": 0.3076433074912955, "grad_norm": 0.17820675671100616, "learning_rate": 4.692363931174764e-06, "loss": 0.9647, "step": 42500 }, { "epoch": 0.3077156941518817, "grad_norm": 0.15689398348331451, "learning_rate": 4.6922915445141776e-06, "loss": 0.966, "step": 42510 }, { "epoch": 0.3077880808124679, "grad_norm": 0.17147964239120483, "learning_rate": 4.692219157853591e-06, "loss": 0.9748, "step": 42520 }, { "epoch": 0.30786046747305407, "grad_norm": 0.17200268805027008, "learning_rate": 4.692146771193005e-06, "loss": 0.9656, "step": 42530 }, { "epoch": 0.30793285413364024, "grad_norm": 0.18745240569114685, "learning_rate": 4.692074384532418e-06, "loss": 0.9666, "step": 42540 }, { "epoch": 0.3080052407942264, "grad_norm": 0.16291543841362, "learning_rate": 4.692001997871832e-06, "loss": 0.9636, "step": 42550 }, { "epoch": 0.30807762745481265, "grad_norm": 0.16443657875061035, "learning_rate": 4.6919296112112465e-06, "loss": 0.9673, "step": 42560 }, { "epoch": 0.30815001411539883, "grad_norm": 0.1615900695323944, "learning_rate": 4.69185722455066e-06, "loss": 0.9578, "step": 42570 }, { "epoch": 0.308222400775985, "grad_norm": 0.17126181721687317, "learning_rate": 4.691784837890074e-06, "loss": 0.9695, "step": 42580 }, { "epoch": 0.3082947874365712, "grad_norm": 0.15183715522289276, "learning_rate": 4.691712451229487e-06, "loss": 0.9695, "step": 42590 }, { "epoch": 0.30836717409715736, "grad_norm": 0.15649786591529846, "learning_rate": 4.691640064568902e-06, "loss": 0.9661, "step": 42600 }, { "epoch": 0.30843956075774354, "grad_norm": 0.15690255165100098, "learning_rate": 4.691567677908315e-06, "loss": 0.9723, "step": 42610 }, { "epoch": 0.30851194741832977, "grad_norm": 0.16935759782791138, "learning_rate": 4.691495291247729e-06, "loss": 0.9786, "step": 42620 }, { "epoch": 0.30858433407891595, "grad_norm": 0.19255200028419495, "learning_rate": 4.691422904587143e-06, "loss": 0.9625, "step": 42630 }, { "epoch": 0.3086567207395021, "grad_norm": 0.16277821362018585, "learning_rate": 4.691350517926557e-06, "loss": 0.9501, "step": 42640 }, { "epoch": 0.3087291074000883, "grad_norm": 0.15837381780147552, "learning_rate": 4.691278131265971e-06, "loss": 0.9564, "step": 42650 }, { "epoch": 0.3088014940606745, "grad_norm": 0.16267119348049164, "learning_rate": 4.691205744605384e-06, "loss": 0.961, "step": 42660 }, { "epoch": 0.3088738807212607, "grad_norm": 0.17577193677425385, "learning_rate": 4.691133357944798e-06, "loss": 0.9807, "step": 42670 }, { "epoch": 0.3089462673818469, "grad_norm": 0.16068010032176971, "learning_rate": 4.691060971284212e-06, "loss": 0.9623, "step": 42680 }, { "epoch": 0.30901865404243306, "grad_norm": 0.16693639755249023, "learning_rate": 4.690988584623626e-06, "loss": 0.9602, "step": 42690 }, { "epoch": 0.30909104070301924, "grad_norm": 0.17629244923591614, "learning_rate": 4.69091619796304e-06, "loss": 0.9785, "step": 42700 }, { "epoch": 0.3091634273636054, "grad_norm": 0.15694406628608704, "learning_rate": 4.690843811302453e-06, "loss": 0.9756, "step": 42710 }, { "epoch": 0.30923581402419165, "grad_norm": 0.16043122112751007, "learning_rate": 4.690771424641867e-06, "loss": 0.9697, "step": 42720 }, { "epoch": 0.3093082006847778, "grad_norm": 0.18830914795398712, "learning_rate": 4.690699037981281e-06, "loss": 0.9642, "step": 42730 }, { "epoch": 0.309380587345364, "grad_norm": 0.1912577599287033, "learning_rate": 4.690626651320695e-06, "loss": 0.962, "step": 42740 }, { "epoch": 0.3094529740059502, "grad_norm": 0.16711075603961945, "learning_rate": 4.690554264660109e-06, "loss": 0.959, "step": 42750 }, { "epoch": 0.30952536066653635, "grad_norm": 0.17442958056926727, "learning_rate": 4.690481877999522e-06, "loss": 0.9752, "step": 42760 }, { "epoch": 0.3095977473271226, "grad_norm": 0.17477945983409882, "learning_rate": 4.690409491338937e-06, "loss": 0.9708, "step": 42770 }, { "epoch": 0.30967013398770876, "grad_norm": 0.1687520295381546, "learning_rate": 4.69033710467835e-06, "loss": 0.9602, "step": 42780 }, { "epoch": 0.30974252064829494, "grad_norm": 0.1918717324733734, "learning_rate": 4.690264718017764e-06, "loss": 0.9622, "step": 42790 }, { "epoch": 0.3098149073088811, "grad_norm": 0.17581704258918762, "learning_rate": 4.6901923313571775e-06, "loss": 0.9502, "step": 42800 }, { "epoch": 0.3098872939694673, "grad_norm": 0.1803891360759735, "learning_rate": 4.690119944696592e-06, "loss": 0.9743, "step": 42810 }, { "epoch": 0.30995968063005347, "grad_norm": 0.16164544224739075, "learning_rate": 4.690047558036006e-06, "loss": 0.9597, "step": 42820 }, { "epoch": 0.3100320672906397, "grad_norm": 0.15992704033851624, "learning_rate": 4.689975171375419e-06, "loss": 0.9745, "step": 42830 }, { "epoch": 0.3101044539512259, "grad_norm": 0.16522592306137085, "learning_rate": 4.689902784714833e-06, "loss": 0.9392, "step": 42840 }, { "epoch": 0.31017684061181205, "grad_norm": 0.17075496912002563, "learning_rate": 4.689830398054247e-06, "loss": 0.9706, "step": 42850 }, { "epoch": 0.31024922727239823, "grad_norm": 0.16021926701068878, "learning_rate": 4.689758011393661e-06, "loss": 0.9651, "step": 42860 }, { "epoch": 0.3103216139329844, "grad_norm": 0.16339780390262604, "learning_rate": 4.6896856247330745e-06, "loss": 0.9626, "step": 42870 }, { "epoch": 0.31039400059357064, "grad_norm": 0.15929320454597473, "learning_rate": 4.689613238072488e-06, "loss": 0.9484, "step": 42880 }, { "epoch": 0.3104663872541568, "grad_norm": 0.1761467158794403, "learning_rate": 4.689540851411903e-06, "loss": 0.9623, "step": 42890 }, { "epoch": 0.310538773914743, "grad_norm": 0.16710932552814484, "learning_rate": 4.689468464751316e-06, "loss": 0.959, "step": 42900 }, { "epoch": 0.31061116057532917, "grad_norm": 0.16465498507022858, "learning_rate": 4.68939607809073e-06, "loss": 0.9584, "step": 42910 }, { "epoch": 0.31068354723591535, "grad_norm": 0.1967269629240036, "learning_rate": 4.6893236914301435e-06, "loss": 0.9583, "step": 42920 }, { "epoch": 0.3107559338965016, "grad_norm": 0.16868357360363007, "learning_rate": 4.689251304769558e-06, "loss": 0.9559, "step": 42930 }, { "epoch": 0.31082832055708776, "grad_norm": 0.15778405964374542, "learning_rate": 4.6891789181089715e-06, "loss": 0.9707, "step": 42940 }, { "epoch": 0.31090070721767393, "grad_norm": 0.20243608951568604, "learning_rate": 4.689106531448385e-06, "loss": 0.9471, "step": 42950 }, { "epoch": 0.3109730938782601, "grad_norm": 0.17772892117500305, "learning_rate": 4.689034144787799e-06, "loss": 0.9589, "step": 42960 }, { "epoch": 0.3110454805388463, "grad_norm": 0.17439591884613037, "learning_rate": 4.688961758127213e-06, "loss": 0.9604, "step": 42970 }, { "epoch": 0.31111786719943246, "grad_norm": 0.15958340466022491, "learning_rate": 4.688889371466627e-06, "loss": 0.967, "step": 42980 }, { "epoch": 0.3111902538600187, "grad_norm": 0.16158387064933777, "learning_rate": 4.6888169848060405e-06, "loss": 0.9569, "step": 42990 }, { "epoch": 0.31126264052060487, "grad_norm": 0.17039985954761505, "learning_rate": 4.688744598145454e-06, "loss": 0.9571, "step": 43000 }, { "epoch": 0.31133502718119105, "grad_norm": 0.1581193506717682, "learning_rate": 4.6886722114848685e-06, "loss": 0.9568, "step": 43010 }, { "epoch": 0.3114074138417772, "grad_norm": 0.16639426350593567, "learning_rate": 4.688599824824282e-06, "loss": 0.9495, "step": 43020 }, { "epoch": 0.3114798005023634, "grad_norm": 0.1791532039642334, "learning_rate": 4.688527438163696e-06, "loss": 0.973, "step": 43030 }, { "epoch": 0.31155218716294963, "grad_norm": 0.16932880878448486, "learning_rate": 4.688455051503109e-06, "loss": 0.966, "step": 43040 }, { "epoch": 0.3116245738235358, "grad_norm": 0.17780663073062897, "learning_rate": 4.688382664842524e-06, "loss": 0.9554, "step": 43050 }, { "epoch": 0.311696960484122, "grad_norm": 0.1656964272260666, "learning_rate": 4.6883102781819375e-06, "loss": 0.9728, "step": 43060 }, { "epoch": 0.31176934714470816, "grad_norm": 0.15379230678081512, "learning_rate": 4.68823789152135e-06, "loss": 0.9739, "step": 43070 }, { "epoch": 0.31184173380529434, "grad_norm": 0.17420606315135956, "learning_rate": 4.688165504860765e-06, "loss": 0.9573, "step": 43080 }, { "epoch": 0.3119141204658806, "grad_norm": 0.17237995564937592, "learning_rate": 4.688093118200178e-06, "loss": 0.9633, "step": 43090 }, { "epoch": 0.31198650712646675, "grad_norm": 0.254451185464859, "learning_rate": 4.688020731539592e-06, "loss": 0.9597, "step": 43100 }, { "epoch": 0.3120588937870529, "grad_norm": 0.17835448682308197, "learning_rate": 4.6879483448790056e-06, "loss": 0.9491, "step": 43110 }, { "epoch": 0.3121312804476391, "grad_norm": 0.1625124216079712, "learning_rate": 4.68787595821842e-06, "loss": 0.9623, "step": 43120 }, { "epoch": 0.3122036671082253, "grad_norm": 0.1648617535829544, "learning_rate": 4.687803571557834e-06, "loss": 0.9626, "step": 43130 }, { "epoch": 0.31227605376881146, "grad_norm": 0.15769249200820923, "learning_rate": 4.687731184897247e-06, "loss": 0.9585, "step": 43140 }, { "epoch": 0.3123484404293977, "grad_norm": 0.16843098402023315, "learning_rate": 4.687658798236661e-06, "loss": 0.9519, "step": 43150 }, { "epoch": 0.31242082708998387, "grad_norm": 0.17640751600265503, "learning_rate": 4.687586411576075e-06, "loss": 0.9519, "step": 43160 }, { "epoch": 0.31249321375057004, "grad_norm": 0.2160252183675766, "learning_rate": 4.687514024915489e-06, "loss": 0.9505, "step": 43170 }, { "epoch": 0.3125656004111562, "grad_norm": 0.1599891185760498, "learning_rate": 4.6874416382549026e-06, "loss": 0.9592, "step": 43180 }, { "epoch": 0.3126379870717424, "grad_norm": 0.17435148358345032, "learning_rate": 4.687369251594316e-06, "loss": 0.9647, "step": 43190 }, { "epoch": 0.31271037373232863, "grad_norm": 0.18090033531188965, "learning_rate": 4.687296864933731e-06, "loss": 0.9587, "step": 43200 }, { "epoch": 0.3127827603929148, "grad_norm": 0.15829293429851532, "learning_rate": 4.687224478273144e-06, "loss": 0.9601, "step": 43210 }, { "epoch": 0.312855147053501, "grad_norm": 0.17692400515079498, "learning_rate": 4.687152091612558e-06, "loss": 0.964, "step": 43220 }, { "epoch": 0.31292753371408716, "grad_norm": 0.18780209124088287, "learning_rate": 4.6870797049519715e-06, "loss": 0.9679, "step": 43230 }, { "epoch": 0.31299992037467333, "grad_norm": 0.20248176157474518, "learning_rate": 4.687007318291386e-06, "loss": 0.9639, "step": 43240 }, { "epoch": 0.31307230703525957, "grad_norm": 0.1714249551296234, "learning_rate": 4.6869349316307996e-06, "loss": 0.9524, "step": 43250 }, { "epoch": 0.31314469369584574, "grad_norm": 0.1626296043395996, "learning_rate": 4.686862544970213e-06, "loss": 0.9643, "step": 43260 }, { "epoch": 0.3132170803564319, "grad_norm": 0.21206989884376526, "learning_rate": 4.686790158309627e-06, "loss": 0.9681, "step": 43270 }, { "epoch": 0.3132894670170181, "grad_norm": 0.16204078495502472, "learning_rate": 4.686717771649041e-06, "loss": 0.9671, "step": 43280 }, { "epoch": 0.3133618536776043, "grad_norm": 0.1752130091190338, "learning_rate": 4.686645384988455e-06, "loss": 0.969, "step": 43290 }, { "epoch": 0.31343424033819045, "grad_norm": 0.16563290357589722, "learning_rate": 4.6865729983278685e-06, "loss": 0.9686, "step": 43300 }, { "epoch": 0.3135066269987767, "grad_norm": 0.16079412400722504, "learning_rate": 4.686500611667282e-06, "loss": 0.9783, "step": 43310 }, { "epoch": 0.31357901365936286, "grad_norm": 0.19905321300029755, "learning_rate": 4.686428225006696e-06, "loss": 0.9657, "step": 43320 }, { "epoch": 0.31365140031994904, "grad_norm": 0.1775355190038681, "learning_rate": 4.68635583834611e-06, "loss": 0.9536, "step": 43330 }, { "epoch": 0.3137237869805352, "grad_norm": 0.16202837228775024, "learning_rate": 4.686283451685524e-06, "loss": 0.9783, "step": 43340 }, { "epoch": 0.3137961736411214, "grad_norm": 0.17238853871822357, "learning_rate": 4.686211065024937e-06, "loss": 0.9829, "step": 43350 }, { "epoch": 0.3138685603017076, "grad_norm": 0.16124895215034485, "learning_rate": 4.686138678364351e-06, "loss": 0.9646, "step": 43360 }, { "epoch": 0.3139409469622938, "grad_norm": 0.3990856111049652, "learning_rate": 4.6860662917037655e-06, "loss": 0.9583, "step": 43370 }, { "epoch": 0.31401333362288, "grad_norm": 0.168771892786026, "learning_rate": 4.685993905043179e-06, "loss": 0.9749, "step": 43380 }, { "epoch": 0.31408572028346615, "grad_norm": 0.16756808757781982, "learning_rate": 4.685921518382593e-06, "loss": 0.9551, "step": 43390 }, { "epoch": 0.31415810694405233, "grad_norm": 0.16926801204681396, "learning_rate": 4.685849131722006e-06, "loss": 0.9635, "step": 43400 }, { "epoch": 0.31423049360463856, "grad_norm": 0.17521944642066956, "learning_rate": 4.685776745061421e-06, "loss": 0.967, "step": 43410 }, { "epoch": 0.31430288026522474, "grad_norm": 0.16481991112232208, "learning_rate": 4.6857043584008344e-06, "loss": 0.9695, "step": 43420 }, { "epoch": 0.3143752669258109, "grad_norm": 0.15918724238872528, "learning_rate": 4.685631971740248e-06, "loss": 0.9679, "step": 43430 }, { "epoch": 0.3144476535863971, "grad_norm": 0.1576610803604126, "learning_rate": 4.685559585079662e-06, "loss": 0.9572, "step": 43440 }, { "epoch": 0.31452004024698327, "grad_norm": 0.16008280217647552, "learning_rate": 4.685487198419076e-06, "loss": 0.9626, "step": 43450 }, { "epoch": 0.3145924269075695, "grad_norm": 0.16831348836421967, "learning_rate": 4.68541481175849e-06, "loss": 0.9513, "step": 43460 }, { "epoch": 0.3146648135681557, "grad_norm": 0.2367899864912033, "learning_rate": 4.685342425097903e-06, "loss": 0.9394, "step": 43470 }, { "epoch": 0.31473720022874185, "grad_norm": 0.16397932171821594, "learning_rate": 4.685270038437317e-06, "loss": 0.9559, "step": 43480 }, { "epoch": 0.31480958688932803, "grad_norm": 0.16159677505493164, "learning_rate": 4.6851976517767314e-06, "loss": 0.9623, "step": 43490 }, { "epoch": 0.3148819735499142, "grad_norm": 0.16152982413768768, "learning_rate": 4.685125265116145e-06, "loss": 0.9662, "step": 43500 }, { "epoch": 0.3149543602105004, "grad_norm": 0.19313648343086243, "learning_rate": 4.685052878455559e-06, "loss": 0.9582, "step": 43510 }, { "epoch": 0.3150267468710866, "grad_norm": 0.16740714013576508, "learning_rate": 4.684980491794972e-06, "loss": 0.959, "step": 43520 }, { "epoch": 0.3150991335316728, "grad_norm": 0.1715911626815796, "learning_rate": 4.684908105134387e-06, "loss": 0.9513, "step": 43530 }, { "epoch": 0.31517152019225897, "grad_norm": 0.1693735122680664, "learning_rate": 4.6848357184738e-06, "loss": 0.9747, "step": 43540 }, { "epoch": 0.31524390685284515, "grad_norm": 0.16398315131664276, "learning_rate": 4.684763331813214e-06, "loss": 0.9639, "step": 43550 }, { "epoch": 0.3153162935134313, "grad_norm": 0.16864167153835297, "learning_rate": 4.684690945152628e-06, "loss": 0.9509, "step": 43560 }, { "epoch": 0.31538868017401755, "grad_norm": 0.15336643159389496, "learning_rate": 4.684618558492042e-06, "loss": 0.9669, "step": 43570 }, { "epoch": 0.31546106683460373, "grad_norm": 0.17718195915222168, "learning_rate": 4.684546171831456e-06, "loss": 0.9624, "step": 43580 }, { "epoch": 0.3155334534951899, "grad_norm": 0.17874468863010406, "learning_rate": 4.684473785170869e-06, "loss": 0.9659, "step": 43590 }, { "epoch": 0.3156058401557761, "grad_norm": 0.1714981645345688, "learning_rate": 4.684401398510283e-06, "loss": 0.9621, "step": 43600 }, { "epoch": 0.31567822681636226, "grad_norm": 0.15359127521514893, "learning_rate": 4.6843290118496965e-06, "loss": 0.9701, "step": 43610 }, { "epoch": 0.3157506134769485, "grad_norm": 0.2659284472465515, "learning_rate": 4.68425662518911e-06, "loss": 0.9573, "step": 43620 }, { "epoch": 0.31582300013753467, "grad_norm": 0.15532797574996948, "learning_rate": 4.684184238528524e-06, "loss": 0.96, "step": 43630 }, { "epoch": 0.31589538679812085, "grad_norm": 0.17984911799430847, "learning_rate": 4.684111851867938e-06, "loss": 0.9591, "step": 43640 }, { "epoch": 0.315967773458707, "grad_norm": 0.16166174411773682, "learning_rate": 4.684039465207352e-06, "loss": 0.9545, "step": 43650 }, { "epoch": 0.3160401601192932, "grad_norm": 0.16693688929080963, "learning_rate": 4.6839670785467655e-06, "loss": 0.972, "step": 43660 }, { "epoch": 0.3161125467798794, "grad_norm": 0.1600932478904724, "learning_rate": 4.683894691886179e-06, "loss": 0.9576, "step": 43670 }, { "epoch": 0.3161849334404656, "grad_norm": 0.1789911538362503, "learning_rate": 4.6838223052255935e-06, "loss": 0.9519, "step": 43680 }, { "epoch": 0.3162573201010518, "grad_norm": 0.16878649592399597, "learning_rate": 4.683749918565007e-06, "loss": 0.9537, "step": 43690 }, { "epoch": 0.31632970676163796, "grad_norm": 0.18558557331562042, "learning_rate": 4.683677531904421e-06, "loss": 0.9598, "step": 43700 }, { "epoch": 0.31640209342222414, "grad_norm": 0.1579427868127823, "learning_rate": 4.683605145243834e-06, "loss": 0.9635, "step": 43710 }, { "epoch": 0.3164744800828103, "grad_norm": 0.16161714494228363, "learning_rate": 4.683532758583249e-06, "loss": 0.9515, "step": 43720 }, { "epoch": 0.31654686674339655, "grad_norm": 0.15619002282619476, "learning_rate": 4.6834603719226625e-06, "loss": 0.9658, "step": 43730 }, { "epoch": 0.3166192534039827, "grad_norm": 0.17198404669761658, "learning_rate": 4.683387985262076e-06, "loss": 0.9492, "step": 43740 }, { "epoch": 0.3166916400645689, "grad_norm": 0.18472731113433838, "learning_rate": 4.68331559860149e-06, "loss": 0.9596, "step": 43750 }, { "epoch": 0.3167640267251551, "grad_norm": 0.17256297171115875, "learning_rate": 4.683243211940904e-06, "loss": 0.9668, "step": 43760 }, { "epoch": 0.31683641338574126, "grad_norm": 0.17887750267982483, "learning_rate": 4.683170825280318e-06, "loss": 0.9639, "step": 43770 }, { "epoch": 0.3169088000463275, "grad_norm": 0.17158237099647522, "learning_rate": 4.683098438619731e-06, "loss": 0.9678, "step": 43780 }, { "epoch": 0.31698118670691366, "grad_norm": 0.1695108711719513, "learning_rate": 4.683026051959145e-06, "loss": 0.9517, "step": 43790 }, { "epoch": 0.31705357336749984, "grad_norm": 0.1559198945760727, "learning_rate": 4.6829536652985595e-06, "loss": 0.9627, "step": 43800 }, { "epoch": 0.317125960028086, "grad_norm": 0.15818607807159424, "learning_rate": 4.682881278637973e-06, "loss": 0.9667, "step": 43810 }, { "epoch": 0.3171983466886722, "grad_norm": 0.1614452749490738, "learning_rate": 4.682808891977387e-06, "loss": 0.9597, "step": 43820 }, { "epoch": 0.31727073334925837, "grad_norm": 0.17955391108989716, "learning_rate": 4.6827365053168e-06, "loss": 0.9531, "step": 43830 }, { "epoch": 0.3173431200098446, "grad_norm": 0.1729903519153595, "learning_rate": 4.682664118656215e-06, "loss": 0.9437, "step": 43840 }, { "epoch": 0.3174155066704308, "grad_norm": 0.17108261585235596, "learning_rate": 4.682591731995628e-06, "loss": 0.9588, "step": 43850 }, { "epoch": 0.31748789333101696, "grad_norm": 0.16403260827064514, "learning_rate": 4.682519345335042e-06, "loss": 0.9561, "step": 43860 }, { "epoch": 0.31756027999160313, "grad_norm": 0.17436562478542328, "learning_rate": 4.682446958674456e-06, "loss": 0.9576, "step": 43870 }, { "epoch": 0.3176326666521893, "grad_norm": 0.16612768173217773, "learning_rate": 4.68237457201387e-06, "loss": 0.9558, "step": 43880 }, { "epoch": 0.31770505331277554, "grad_norm": 0.16263121366500854, "learning_rate": 4.682302185353284e-06, "loss": 0.9564, "step": 43890 }, { "epoch": 0.3177774399733617, "grad_norm": 0.24618317186832428, "learning_rate": 4.682229798692697e-06, "loss": 0.9667, "step": 43900 }, { "epoch": 0.3178498266339479, "grad_norm": 0.16008852422237396, "learning_rate": 4.682157412032111e-06, "loss": 0.973, "step": 43910 }, { "epoch": 0.3179222132945341, "grad_norm": 0.16652582585811615, "learning_rate": 4.682085025371525e-06, "loss": 0.9534, "step": 43920 }, { "epoch": 0.31799459995512025, "grad_norm": 0.19293102622032166, "learning_rate": 4.682012638710939e-06, "loss": 0.9546, "step": 43930 }, { "epoch": 0.3180669866157065, "grad_norm": 0.16652163863182068, "learning_rate": 4.681940252050353e-06, "loss": 0.9599, "step": 43940 }, { "epoch": 0.31813937327629266, "grad_norm": 0.16719068586826324, "learning_rate": 4.681867865389766e-06, "loss": 0.9807, "step": 43950 }, { "epoch": 0.31821175993687884, "grad_norm": 0.16578418016433716, "learning_rate": 4.68179547872918e-06, "loss": 0.9627, "step": 43960 }, { "epoch": 0.318284146597465, "grad_norm": 0.20773844420909882, "learning_rate": 4.681723092068594e-06, "loss": 0.9506, "step": 43970 }, { "epoch": 0.3183565332580512, "grad_norm": 0.20871974527835846, "learning_rate": 4.681650705408008e-06, "loss": 0.9609, "step": 43980 }, { "epoch": 0.3184289199186374, "grad_norm": 0.19961602985858917, "learning_rate": 4.6815783187474216e-06, "loss": 0.9696, "step": 43990 }, { "epoch": 0.3185013065792236, "grad_norm": 0.1632424145936966, "learning_rate": 4.681505932086835e-06, "loss": 0.9569, "step": 44000 }, { "epoch": 0.3185736932398098, "grad_norm": 0.15213897824287415, "learning_rate": 4.68143354542625e-06, "loss": 0.9503, "step": 44010 }, { "epoch": 0.31864607990039595, "grad_norm": 0.16649749875068665, "learning_rate": 4.681361158765663e-06, "loss": 0.9645, "step": 44020 }, { "epoch": 0.3187184665609821, "grad_norm": 0.16990630328655243, "learning_rate": 4.681288772105077e-06, "loss": 0.9532, "step": 44030 }, { "epoch": 0.3187908532215683, "grad_norm": 0.15943020582199097, "learning_rate": 4.6812163854444905e-06, "loss": 0.9601, "step": 44040 }, { "epoch": 0.31886323988215454, "grad_norm": 0.1748354434967041, "learning_rate": 4.681143998783905e-06, "loss": 0.9482, "step": 44050 }, { "epoch": 0.3189356265427407, "grad_norm": 0.16795013844966888, "learning_rate": 4.6810716121233186e-06, "loss": 0.9617, "step": 44060 }, { "epoch": 0.3190080132033269, "grad_norm": 0.17851009964942932, "learning_rate": 4.680999225462732e-06, "loss": 0.9599, "step": 44070 }, { "epoch": 0.31908039986391307, "grad_norm": 0.16591772437095642, "learning_rate": 4.680926838802146e-06, "loss": 0.9617, "step": 44080 }, { "epoch": 0.31915278652449924, "grad_norm": 0.16842985153198242, "learning_rate": 4.68085445214156e-06, "loss": 0.9649, "step": 44090 }, { "epoch": 0.3192251731850855, "grad_norm": 0.1664043366909027, "learning_rate": 4.680782065480974e-06, "loss": 0.9515, "step": 44100 }, { "epoch": 0.31929755984567165, "grad_norm": 0.1651202142238617, "learning_rate": 4.6807096788203875e-06, "loss": 0.9699, "step": 44110 }, { "epoch": 0.31936994650625783, "grad_norm": 0.18120470643043518, "learning_rate": 4.680637292159801e-06, "loss": 0.96, "step": 44120 }, { "epoch": 0.319442333166844, "grad_norm": 0.1606922298669815, "learning_rate": 4.680564905499215e-06, "loss": 0.9478, "step": 44130 }, { "epoch": 0.3195147198274302, "grad_norm": 0.16963617503643036, "learning_rate": 4.680492518838628e-06, "loss": 0.9625, "step": 44140 }, { "epoch": 0.3195871064880164, "grad_norm": 0.17569056153297424, "learning_rate": 4.680420132178042e-06, "loss": 0.9659, "step": 44150 }, { "epoch": 0.3196594931486026, "grad_norm": 0.16258999705314636, "learning_rate": 4.6803477455174564e-06, "loss": 0.9563, "step": 44160 }, { "epoch": 0.31973187980918877, "grad_norm": 0.22039775550365448, "learning_rate": 4.68027535885687e-06, "loss": 0.9474, "step": 44170 }, { "epoch": 0.31980426646977494, "grad_norm": 0.1699448972940445, "learning_rate": 4.680202972196284e-06, "loss": 0.9796, "step": 44180 }, { "epoch": 0.3198766531303611, "grad_norm": 0.15425299108028412, "learning_rate": 4.680130585535697e-06, "loss": 0.9552, "step": 44190 }, { "epoch": 0.3199490397909473, "grad_norm": 0.17444351315498352, "learning_rate": 4.680058198875112e-06, "loss": 0.9538, "step": 44200 }, { "epoch": 0.32002142645153353, "grad_norm": 0.17369182407855988, "learning_rate": 4.679985812214525e-06, "loss": 0.9556, "step": 44210 }, { "epoch": 0.3200938131121197, "grad_norm": 0.17337919771671295, "learning_rate": 4.679913425553939e-06, "loss": 0.9626, "step": 44220 }, { "epoch": 0.3201661997727059, "grad_norm": 0.1717376410961151, "learning_rate": 4.679841038893353e-06, "loss": 0.9431, "step": 44230 }, { "epoch": 0.32023858643329206, "grad_norm": 0.16457335650920868, "learning_rate": 4.679768652232767e-06, "loss": 0.9493, "step": 44240 }, { "epoch": 0.32031097309387824, "grad_norm": 0.17030712962150574, "learning_rate": 4.679696265572181e-06, "loss": 0.9625, "step": 44250 }, { "epoch": 0.32038335975446447, "grad_norm": 0.1790560930967331, "learning_rate": 4.679623878911594e-06, "loss": 0.9629, "step": 44260 }, { "epoch": 0.32045574641505065, "grad_norm": 0.15533442795276642, "learning_rate": 4.679551492251008e-06, "loss": 0.965, "step": 44270 }, { "epoch": 0.3205281330756368, "grad_norm": 0.15475206077098846, "learning_rate": 4.679479105590422e-06, "loss": 0.9614, "step": 44280 }, { "epoch": 0.320600519736223, "grad_norm": 0.15645426511764526, "learning_rate": 4.679406718929836e-06, "loss": 0.9481, "step": 44290 }, { "epoch": 0.3206729063968092, "grad_norm": 0.17138896882534027, "learning_rate": 4.67933433226925e-06, "loss": 0.9605, "step": 44300 }, { "epoch": 0.3207452930573954, "grad_norm": 0.17752832174301147, "learning_rate": 4.679261945608663e-06, "loss": 0.9538, "step": 44310 }, { "epoch": 0.3208176797179816, "grad_norm": 0.17925305664539337, "learning_rate": 4.679189558948078e-06, "loss": 0.9536, "step": 44320 }, { "epoch": 0.32089006637856776, "grad_norm": 0.16660036146640778, "learning_rate": 4.679117172287491e-06, "loss": 0.9566, "step": 44330 }, { "epoch": 0.32096245303915394, "grad_norm": 0.16511604189872742, "learning_rate": 4.679044785626905e-06, "loss": 0.9473, "step": 44340 }, { "epoch": 0.3210348396997401, "grad_norm": 0.17227579653263092, "learning_rate": 4.6789723989663185e-06, "loss": 0.9642, "step": 44350 }, { "epoch": 0.3211072263603263, "grad_norm": 0.17164403200149536, "learning_rate": 4.678900012305733e-06, "loss": 0.9601, "step": 44360 }, { "epoch": 0.3211796130209125, "grad_norm": 0.1575680375099182, "learning_rate": 4.678827625645147e-06, "loss": 0.9672, "step": 44370 }, { "epoch": 0.3212519996814987, "grad_norm": 0.15979178249835968, "learning_rate": 4.67875523898456e-06, "loss": 0.9615, "step": 44380 }, { "epoch": 0.3213243863420849, "grad_norm": 0.19064490497112274, "learning_rate": 4.678682852323974e-06, "loss": 0.9672, "step": 44390 }, { "epoch": 0.32139677300267105, "grad_norm": 0.16969703137874603, "learning_rate": 4.678610465663388e-06, "loss": 0.9418, "step": 44400 }, { "epoch": 0.32146915966325723, "grad_norm": 0.1616748571395874, "learning_rate": 4.678538079002802e-06, "loss": 0.9613, "step": 44410 }, { "epoch": 0.32154154632384346, "grad_norm": 0.15733925998210907, "learning_rate": 4.6784656923422155e-06, "loss": 0.9679, "step": 44420 }, { "epoch": 0.32161393298442964, "grad_norm": 0.1610860526561737, "learning_rate": 4.678393305681629e-06, "loss": 0.9692, "step": 44430 }, { "epoch": 0.3216863196450158, "grad_norm": 0.16648943722248077, "learning_rate": 4.678320919021044e-06, "loss": 0.9565, "step": 44440 }, { "epoch": 0.321758706305602, "grad_norm": 0.186976358294487, "learning_rate": 4.678248532360457e-06, "loss": 0.9474, "step": 44450 }, { "epoch": 0.32183109296618817, "grad_norm": 0.1633586436510086, "learning_rate": 4.678176145699871e-06, "loss": 0.9468, "step": 44460 }, { "epoch": 0.3219034796267744, "grad_norm": 0.197808638215065, "learning_rate": 4.6781037590392845e-06, "loss": 0.9601, "step": 44470 }, { "epoch": 0.3219758662873606, "grad_norm": 0.15820321440696716, "learning_rate": 4.678031372378699e-06, "loss": 0.9537, "step": 44480 }, { "epoch": 0.32204825294794676, "grad_norm": 0.1628575474023819, "learning_rate": 4.6779589857181125e-06, "loss": 0.9503, "step": 44490 }, { "epoch": 0.32212063960853293, "grad_norm": 0.1750411093235016, "learning_rate": 4.677886599057526e-06, "loss": 0.9594, "step": 44500 }, { "epoch": 0.3221930262691191, "grad_norm": 0.17343439161777496, "learning_rate": 4.67781421239694e-06, "loss": 0.9584, "step": 44510 }, { "epoch": 0.32226541292970534, "grad_norm": 0.16742408275604248, "learning_rate": 4.677741825736354e-06, "loss": 0.9638, "step": 44520 }, { "epoch": 0.3223377995902915, "grad_norm": 0.1644875556230545, "learning_rate": 4.677669439075768e-06, "loss": 0.9536, "step": 44530 }, { "epoch": 0.3224101862508777, "grad_norm": 0.16791434586048126, "learning_rate": 4.6775970524151815e-06, "loss": 0.9669, "step": 44540 }, { "epoch": 0.32248257291146387, "grad_norm": 0.16774770617485046, "learning_rate": 4.677524665754595e-06, "loss": 0.9569, "step": 44550 }, { "epoch": 0.32255495957205005, "grad_norm": 0.15673328936100006, "learning_rate": 4.677452279094009e-06, "loss": 0.9394, "step": 44560 }, { "epoch": 0.3226273462326362, "grad_norm": 0.17572014033794403, "learning_rate": 4.677379892433423e-06, "loss": 0.957, "step": 44570 }, { "epoch": 0.32269973289322246, "grad_norm": 0.18698441982269287, "learning_rate": 4.677307505772837e-06, "loss": 0.9637, "step": 44580 }, { "epoch": 0.32277211955380863, "grad_norm": 0.17538724839687347, "learning_rate": 4.67723511911225e-06, "loss": 0.968, "step": 44590 }, { "epoch": 0.3228445062143948, "grad_norm": 0.16755321621894836, "learning_rate": 4.677162732451664e-06, "loss": 0.9542, "step": 44600 }, { "epoch": 0.322916892874981, "grad_norm": 0.1889658123254776, "learning_rate": 4.6770903457910785e-06, "loss": 0.9668, "step": 44610 }, { "epoch": 0.32298927953556716, "grad_norm": 0.15155941247940063, "learning_rate": 4.677017959130492e-06, "loss": 0.963, "step": 44620 }, { "epoch": 0.3230616661961534, "grad_norm": 0.17296727001667023, "learning_rate": 4.676945572469906e-06, "loss": 0.9658, "step": 44630 }, { "epoch": 0.3231340528567396, "grad_norm": 0.15777067840099335, "learning_rate": 4.676873185809319e-06, "loss": 0.956, "step": 44640 }, { "epoch": 0.32320643951732575, "grad_norm": 0.1767292618751526, "learning_rate": 4.676800799148734e-06, "loss": 0.9635, "step": 44650 }, { "epoch": 0.3232788261779119, "grad_norm": 0.1630258858203888, "learning_rate": 4.6767284124881466e-06, "loss": 0.9621, "step": 44660 }, { "epoch": 0.3233512128384981, "grad_norm": 0.16284741461277008, "learning_rate": 4.676656025827561e-06, "loss": 0.9644, "step": 44670 }, { "epoch": 0.32342359949908434, "grad_norm": 0.16810114681720734, "learning_rate": 4.676583639166975e-06, "loss": 0.9712, "step": 44680 }, { "epoch": 0.3234959861596705, "grad_norm": 0.16822989284992218, "learning_rate": 4.676511252506388e-06, "loss": 0.9616, "step": 44690 }, { "epoch": 0.3235683728202567, "grad_norm": 0.1614549160003662, "learning_rate": 4.676438865845802e-06, "loss": 0.9493, "step": 44700 }, { "epoch": 0.32364075948084287, "grad_norm": 0.1698823720216751, "learning_rate": 4.676366479185216e-06, "loss": 0.9479, "step": 44710 }, { "epoch": 0.32371314614142904, "grad_norm": 0.16118445992469788, "learning_rate": 4.67629409252463e-06, "loss": 0.9754, "step": 44720 }, { "epoch": 0.3237855328020152, "grad_norm": 0.16466830670833588, "learning_rate": 4.6762217058640436e-06, "loss": 0.978, "step": 44730 }, { "epoch": 0.32385791946260145, "grad_norm": 0.1972661316394806, "learning_rate": 4.676149319203457e-06, "loss": 0.9743, "step": 44740 }, { "epoch": 0.32393030612318763, "grad_norm": 0.17572824656963348, "learning_rate": 4.676076932542871e-06, "loss": 0.9744, "step": 44750 }, { "epoch": 0.3240026927837738, "grad_norm": 0.18128469586372375, "learning_rate": 4.676004545882285e-06, "loss": 0.9504, "step": 44760 }, { "epoch": 0.32407507944436, "grad_norm": 0.2079382985830307, "learning_rate": 4.675932159221699e-06, "loss": 0.9561, "step": 44770 }, { "epoch": 0.32414746610494616, "grad_norm": 0.15759167075157166, "learning_rate": 4.6758597725611125e-06, "loss": 0.9527, "step": 44780 }, { "epoch": 0.3242198527655324, "grad_norm": 0.1726982742547989, "learning_rate": 4.675787385900526e-06, "loss": 0.9738, "step": 44790 }, { "epoch": 0.32429223942611857, "grad_norm": 0.2257843315601349, "learning_rate": 4.6757149992399406e-06, "loss": 0.9718, "step": 44800 }, { "epoch": 0.32436462608670474, "grad_norm": 0.16954675316810608, "learning_rate": 4.675642612579354e-06, "loss": 0.9619, "step": 44810 }, { "epoch": 0.3244370127472909, "grad_norm": 0.17804615199565887, "learning_rate": 4.675570225918768e-06, "loss": 0.9657, "step": 44820 }, { "epoch": 0.3245093994078771, "grad_norm": 0.18081621825695038, "learning_rate": 4.675497839258181e-06, "loss": 0.9528, "step": 44830 }, { "epoch": 0.32458178606846333, "grad_norm": 0.19913703203201294, "learning_rate": 4.675425452597596e-06, "loss": 0.9552, "step": 44840 }, { "epoch": 0.3246541727290495, "grad_norm": 0.15979525446891785, "learning_rate": 4.6753530659370095e-06, "loss": 0.9465, "step": 44850 }, { "epoch": 0.3247265593896357, "grad_norm": 0.1660212129354477, "learning_rate": 4.675280679276423e-06, "loss": 0.9517, "step": 44860 }, { "epoch": 0.32479894605022186, "grad_norm": 0.1735403835773468, "learning_rate": 4.675208292615837e-06, "loss": 0.9641, "step": 44870 }, { "epoch": 0.32487133271080804, "grad_norm": 0.15234126150608063, "learning_rate": 4.675135905955251e-06, "loss": 0.9705, "step": 44880 }, { "epoch": 0.3249437193713942, "grad_norm": 0.16520267724990845, "learning_rate": 4.675063519294665e-06, "loss": 0.9587, "step": 44890 }, { "epoch": 0.32501610603198045, "grad_norm": 0.1735251098871231, "learning_rate": 4.6749911326340784e-06, "loss": 0.9668, "step": 44900 }, { "epoch": 0.3250884926925666, "grad_norm": 0.17319133877754211, "learning_rate": 4.674918745973492e-06, "loss": 0.9523, "step": 44910 }, { "epoch": 0.3251608793531528, "grad_norm": 0.17401674389839172, "learning_rate": 4.6748463593129065e-06, "loss": 0.9708, "step": 44920 }, { "epoch": 0.325233266013739, "grad_norm": 0.16565395891666412, "learning_rate": 4.67477397265232e-06, "loss": 0.9531, "step": 44930 }, { "epoch": 0.32530565267432515, "grad_norm": 0.18349653482437134, "learning_rate": 4.674701585991734e-06, "loss": 0.9586, "step": 44940 }, { "epoch": 0.3253780393349114, "grad_norm": 0.15808264911174774, "learning_rate": 4.674629199331147e-06, "loss": 0.9453, "step": 44950 }, { "epoch": 0.32545042599549756, "grad_norm": 0.16079016029834747, "learning_rate": 4.674556812670562e-06, "loss": 0.9568, "step": 44960 }, { "epoch": 0.32552281265608374, "grad_norm": 0.1582205444574356, "learning_rate": 4.6744844260099754e-06, "loss": 0.9383, "step": 44970 }, { "epoch": 0.3255951993166699, "grad_norm": 0.1639067679643631, "learning_rate": 4.674412039349389e-06, "loss": 0.9554, "step": 44980 }, { "epoch": 0.3256675859772561, "grad_norm": 0.17060793936252594, "learning_rate": 4.674339652688803e-06, "loss": 0.9534, "step": 44990 }, { "epoch": 0.3257399726378423, "grad_norm": 0.18423733115196228, "learning_rate": 4.674267266028217e-06, "loss": 0.9478, "step": 45000 }, { "epoch": 0.3258123592984285, "grad_norm": 0.18846924602985382, "learning_rate": 4.674194879367631e-06, "loss": 0.9734, "step": 45010 }, { "epoch": 0.3258847459590147, "grad_norm": 0.16957645118236542, "learning_rate": 4.674122492707044e-06, "loss": 0.9489, "step": 45020 }, { "epoch": 0.32595713261960085, "grad_norm": 0.15767230093479156, "learning_rate": 4.674050106046458e-06, "loss": 0.9565, "step": 45030 }, { "epoch": 0.32602951928018703, "grad_norm": 0.15942777693271637, "learning_rate": 4.6739777193858724e-06, "loss": 0.9586, "step": 45040 }, { "epoch": 0.3261019059407732, "grad_norm": 0.16869819164276123, "learning_rate": 4.673905332725286e-06, "loss": 0.9599, "step": 45050 }, { "epoch": 0.32617429260135944, "grad_norm": 0.17203491926193237, "learning_rate": 4.6738329460647e-06, "loss": 0.9671, "step": 45060 }, { "epoch": 0.3262466792619456, "grad_norm": 0.15850797295570374, "learning_rate": 4.673760559404113e-06, "loss": 0.9588, "step": 45070 }, { "epoch": 0.3263190659225318, "grad_norm": 0.16606780886650085, "learning_rate": 4.673688172743528e-06, "loss": 0.9549, "step": 45080 }, { "epoch": 0.32639145258311797, "grad_norm": 0.1549416184425354, "learning_rate": 4.673615786082941e-06, "loss": 0.9558, "step": 45090 }, { "epoch": 0.32646383924370415, "grad_norm": 0.16568376123905182, "learning_rate": 4.673543399422355e-06, "loss": 0.9557, "step": 45100 }, { "epoch": 0.3265362259042904, "grad_norm": 0.17167599499225616, "learning_rate": 4.673471012761769e-06, "loss": 0.9581, "step": 45110 }, { "epoch": 0.32660861256487655, "grad_norm": 0.1696111410856247, "learning_rate": 4.673398626101183e-06, "loss": 0.9523, "step": 45120 }, { "epoch": 0.32668099922546273, "grad_norm": 0.15967723727226257, "learning_rate": 4.673326239440597e-06, "loss": 0.9456, "step": 45130 }, { "epoch": 0.3267533858860489, "grad_norm": 0.1622992306947708, "learning_rate": 4.67325385278001e-06, "loss": 0.9549, "step": 45140 }, { "epoch": 0.3268257725466351, "grad_norm": 0.17789630591869354, "learning_rate": 4.673181466119424e-06, "loss": 0.9494, "step": 45150 }, { "epoch": 0.3268981592072213, "grad_norm": 0.16522136330604553, "learning_rate": 4.673109079458838e-06, "loss": 0.974, "step": 45160 }, { "epoch": 0.3269705458678075, "grad_norm": 0.17646393179893494, "learning_rate": 4.673036692798252e-06, "loss": 0.9629, "step": 45170 }, { "epoch": 0.32704293252839367, "grad_norm": 0.16956348717212677, "learning_rate": 4.672964306137666e-06, "loss": 0.96, "step": 45180 }, { "epoch": 0.32711531918897985, "grad_norm": 0.166047602891922, "learning_rate": 4.672891919477079e-06, "loss": 0.9432, "step": 45190 }, { "epoch": 0.327187705849566, "grad_norm": 0.1728239208459854, "learning_rate": 4.672819532816493e-06, "loss": 0.9631, "step": 45200 }, { "epoch": 0.32726009251015226, "grad_norm": 0.16295363008975983, "learning_rate": 4.6727471461559065e-06, "loss": 0.9609, "step": 45210 }, { "epoch": 0.32733247917073843, "grad_norm": 0.16612671315670013, "learning_rate": 4.67267475949532e-06, "loss": 0.9499, "step": 45220 }, { "epoch": 0.3274048658313246, "grad_norm": 0.16281329095363617, "learning_rate": 4.6726023728347345e-06, "loss": 0.9565, "step": 45230 }, { "epoch": 0.3274772524919108, "grad_norm": 0.15651023387908936, "learning_rate": 4.672529986174148e-06, "loss": 0.9623, "step": 45240 }, { "epoch": 0.32754963915249696, "grad_norm": 0.21686317026615143, "learning_rate": 4.672457599513562e-06, "loss": 0.9478, "step": 45250 }, { "epoch": 0.32762202581308314, "grad_norm": 0.16553688049316406, "learning_rate": 4.672385212852975e-06, "loss": 0.9527, "step": 45260 }, { "epoch": 0.32769441247366937, "grad_norm": 0.16378672420978546, "learning_rate": 4.67231282619239e-06, "loss": 0.9625, "step": 45270 }, { "epoch": 0.32776679913425555, "grad_norm": 0.16440148651599884, "learning_rate": 4.6722404395318035e-06, "loss": 0.9716, "step": 45280 }, { "epoch": 0.3278391857948417, "grad_norm": 0.17290063202381134, "learning_rate": 4.672168052871217e-06, "loss": 0.9489, "step": 45290 }, { "epoch": 0.3279115724554279, "grad_norm": 0.16323043406009674, "learning_rate": 4.672095666210631e-06, "loss": 0.9543, "step": 45300 }, { "epoch": 0.3279839591160141, "grad_norm": 0.3352501690387726, "learning_rate": 4.672023279550045e-06, "loss": 0.964, "step": 45310 }, { "epoch": 0.3280563457766003, "grad_norm": 0.16609349846839905, "learning_rate": 4.671950892889459e-06, "loss": 0.9649, "step": 45320 }, { "epoch": 0.3281287324371865, "grad_norm": 0.17989186942577362, "learning_rate": 4.671878506228872e-06, "loss": 0.9558, "step": 45330 }, { "epoch": 0.32820111909777266, "grad_norm": 0.1710348278284073, "learning_rate": 4.671806119568286e-06, "loss": 0.9666, "step": 45340 }, { "epoch": 0.32827350575835884, "grad_norm": 0.16703477501869202, "learning_rate": 4.6717337329077e-06, "loss": 0.9709, "step": 45350 }, { "epoch": 0.328345892418945, "grad_norm": 0.16510546207427979, "learning_rate": 4.671661346247114e-06, "loss": 0.9561, "step": 45360 }, { "epoch": 0.32841827907953125, "grad_norm": 0.17808891832828522, "learning_rate": 4.671588959586528e-06, "loss": 0.9671, "step": 45370 }, { "epoch": 0.3284906657401174, "grad_norm": 0.15966980159282684, "learning_rate": 4.671516572925941e-06, "loss": 0.9631, "step": 45380 }, { "epoch": 0.3285630524007036, "grad_norm": 0.18822209537029266, "learning_rate": 4.671444186265355e-06, "loss": 0.9539, "step": 45390 }, { "epoch": 0.3286354390612898, "grad_norm": 0.16146880388259888, "learning_rate": 4.671371799604769e-06, "loss": 0.9603, "step": 45400 }, { "epoch": 0.32870782572187596, "grad_norm": 0.16964489221572876, "learning_rate": 4.671299412944183e-06, "loss": 0.9706, "step": 45410 }, { "epoch": 0.32878021238246213, "grad_norm": 0.17912639677524567, "learning_rate": 4.671227026283597e-06, "loss": 0.9671, "step": 45420 }, { "epoch": 0.32885259904304837, "grad_norm": 0.15870171785354614, "learning_rate": 4.67115463962301e-06, "loss": 0.9596, "step": 45430 }, { "epoch": 0.32892498570363454, "grad_norm": 0.16293965280056, "learning_rate": 4.671082252962425e-06, "loss": 0.9536, "step": 45440 }, { "epoch": 0.3289973723642207, "grad_norm": 0.1799435019493103, "learning_rate": 4.671009866301838e-06, "loss": 0.953, "step": 45450 }, { "epoch": 0.3290697590248069, "grad_norm": 0.16505300998687744, "learning_rate": 4.670937479641252e-06, "loss": 0.9579, "step": 45460 }, { "epoch": 0.3291421456853931, "grad_norm": 0.1620611697435379, "learning_rate": 4.6708650929806656e-06, "loss": 0.9364, "step": 45470 }, { "epoch": 0.3292145323459793, "grad_norm": 0.16515015065670013, "learning_rate": 4.67079270632008e-06, "loss": 0.9711, "step": 45480 }, { "epoch": 0.3292869190065655, "grad_norm": 0.16195544600486755, "learning_rate": 4.670720319659494e-06, "loss": 0.9485, "step": 45490 }, { "epoch": 0.32935930566715166, "grad_norm": 0.1693519651889801, "learning_rate": 4.670647932998907e-06, "loss": 0.9392, "step": 45500 }, { "epoch": 0.32943169232773784, "grad_norm": 0.16994911432266235, "learning_rate": 4.670575546338321e-06, "loss": 0.9574, "step": 45510 }, { "epoch": 0.329504078988324, "grad_norm": 0.16897796094417572, "learning_rate": 4.670503159677735e-06, "loss": 0.9588, "step": 45520 }, { "epoch": 0.32957646564891024, "grad_norm": 0.1709592044353485, "learning_rate": 4.670430773017149e-06, "loss": 0.9397, "step": 45530 }, { "epoch": 0.3296488523094964, "grad_norm": 0.177882120013237, "learning_rate": 4.6703583863565626e-06, "loss": 0.9669, "step": 45540 }, { "epoch": 0.3297212389700826, "grad_norm": 0.18497739732265472, "learning_rate": 4.670285999695976e-06, "loss": 0.9451, "step": 45550 }, { "epoch": 0.3297936256306688, "grad_norm": 0.16033156216144562, "learning_rate": 4.670213613035391e-06, "loss": 0.9574, "step": 45560 }, { "epoch": 0.32986601229125495, "grad_norm": 0.1641259789466858, "learning_rate": 4.670141226374804e-06, "loss": 0.94, "step": 45570 }, { "epoch": 0.3299383989518411, "grad_norm": 0.1763799786567688, "learning_rate": 4.670068839714218e-06, "loss": 0.9508, "step": 45580 }, { "epoch": 0.33001078561242736, "grad_norm": 0.17956474423408508, "learning_rate": 4.6699964530536315e-06, "loss": 0.9598, "step": 45590 }, { "epoch": 0.33008317227301354, "grad_norm": 0.21450495719909668, "learning_rate": 4.669924066393046e-06, "loss": 0.9572, "step": 45600 }, { "epoch": 0.3301555589335997, "grad_norm": 0.1846141368150711, "learning_rate": 4.66985167973246e-06, "loss": 0.968, "step": 45610 }, { "epoch": 0.3302279455941859, "grad_norm": 0.1598512977361679, "learning_rate": 4.669779293071873e-06, "loss": 0.9591, "step": 45620 }, { "epoch": 0.33030033225477207, "grad_norm": 0.15021023154258728, "learning_rate": 4.669706906411287e-06, "loss": 0.9389, "step": 45630 }, { "epoch": 0.3303727189153583, "grad_norm": 0.16507980227470398, "learning_rate": 4.669634519750701e-06, "loss": 0.9638, "step": 45640 }, { "epoch": 0.3304451055759445, "grad_norm": 0.16119886934757233, "learning_rate": 4.669562133090115e-06, "loss": 0.9521, "step": 45650 }, { "epoch": 0.33051749223653065, "grad_norm": 0.19781506061553955, "learning_rate": 4.6694897464295285e-06, "loss": 0.9473, "step": 45660 }, { "epoch": 0.33058987889711683, "grad_norm": 0.1651494801044464, "learning_rate": 4.669417359768942e-06, "loss": 0.9482, "step": 45670 }, { "epoch": 0.330662265557703, "grad_norm": 0.16081689298152924, "learning_rate": 4.669344973108357e-06, "loss": 0.9678, "step": 45680 }, { "epoch": 0.33073465221828924, "grad_norm": 0.1630435436964035, "learning_rate": 4.66927258644777e-06, "loss": 0.9627, "step": 45690 }, { "epoch": 0.3308070388788754, "grad_norm": 0.2068457007408142, "learning_rate": 4.669200199787184e-06, "loss": 0.9521, "step": 45700 }, { "epoch": 0.3308794255394616, "grad_norm": 0.168662890791893, "learning_rate": 4.6691278131265974e-06, "loss": 0.9453, "step": 45710 }, { "epoch": 0.33095181220004777, "grad_norm": 0.18764568865299225, "learning_rate": 4.669055426466011e-06, "loss": 0.9483, "step": 45720 }, { "epoch": 0.33102419886063394, "grad_norm": 0.1561020016670227, "learning_rate": 4.668983039805425e-06, "loss": 0.9565, "step": 45730 }, { "epoch": 0.3310965855212202, "grad_norm": 0.22187693417072296, "learning_rate": 4.668910653144838e-06, "loss": 0.9594, "step": 45740 }, { "epoch": 0.33116897218180635, "grad_norm": 0.16155502200126648, "learning_rate": 4.668838266484253e-06, "loss": 0.9664, "step": 45750 }, { "epoch": 0.33124135884239253, "grad_norm": 0.1851801574230194, "learning_rate": 4.668765879823666e-06, "loss": 0.9517, "step": 45760 }, { "epoch": 0.3313137455029787, "grad_norm": 0.16832536458969116, "learning_rate": 4.66869349316308e-06, "loss": 0.9532, "step": 45770 }, { "epoch": 0.3313861321635649, "grad_norm": 0.17694327235221863, "learning_rate": 4.668621106502494e-06, "loss": 0.9466, "step": 45780 }, { "epoch": 0.33145851882415106, "grad_norm": 0.16291755437850952, "learning_rate": 4.668548719841908e-06, "loss": 0.9544, "step": 45790 }, { "epoch": 0.3315309054847373, "grad_norm": 0.1645163744688034, "learning_rate": 4.668476333181322e-06, "loss": 0.9519, "step": 45800 }, { "epoch": 0.33160329214532347, "grad_norm": 0.1698862910270691, "learning_rate": 4.668403946520735e-06, "loss": 0.9583, "step": 45810 }, { "epoch": 0.33167567880590965, "grad_norm": 0.16201475262641907, "learning_rate": 4.668331559860149e-06, "loss": 0.9518, "step": 45820 }, { "epoch": 0.3317480654664958, "grad_norm": 0.2548742890357971, "learning_rate": 4.668259173199563e-06, "loss": 0.9571, "step": 45830 }, { "epoch": 0.331820452127082, "grad_norm": 0.17558850347995758, "learning_rate": 4.668186786538977e-06, "loss": 0.9502, "step": 45840 }, { "epoch": 0.33189283878766823, "grad_norm": 0.2049540877342224, "learning_rate": 4.668114399878391e-06, "loss": 0.9686, "step": 45850 }, { "epoch": 0.3319652254482544, "grad_norm": 0.15438438951969147, "learning_rate": 4.668042013217804e-06, "loss": 0.9603, "step": 45860 }, { "epoch": 0.3320376121088406, "grad_norm": 0.15590500831604004, "learning_rate": 4.667969626557219e-06, "loss": 0.9566, "step": 45870 }, { "epoch": 0.33210999876942676, "grad_norm": 0.15624962747097015, "learning_rate": 4.667897239896632e-06, "loss": 0.9586, "step": 45880 }, { "epoch": 0.33218238543001294, "grad_norm": 0.15918295085430145, "learning_rate": 4.667824853236046e-06, "loss": 0.951, "step": 45890 }, { "epoch": 0.33225477209059917, "grad_norm": 0.16058406233787537, "learning_rate": 4.6677524665754595e-06, "loss": 0.958, "step": 45900 }, { "epoch": 0.33232715875118535, "grad_norm": 0.1862640231847763, "learning_rate": 4.667680079914874e-06, "loss": 0.9443, "step": 45910 }, { "epoch": 0.3323995454117715, "grad_norm": 0.1649313122034073, "learning_rate": 4.667607693254288e-06, "loss": 0.9523, "step": 45920 }, { "epoch": 0.3324719320723577, "grad_norm": 0.17208698391914368, "learning_rate": 4.667535306593701e-06, "loss": 0.9776, "step": 45930 }, { "epoch": 0.3325443187329439, "grad_norm": 0.19187243282794952, "learning_rate": 4.667462919933115e-06, "loss": 0.9472, "step": 45940 }, { "epoch": 0.33261670539353005, "grad_norm": 0.17830708622932434, "learning_rate": 4.667390533272529e-06, "loss": 0.9662, "step": 45950 }, { "epoch": 0.3326890920541163, "grad_norm": 0.16068105399608612, "learning_rate": 4.667318146611943e-06, "loss": 0.9581, "step": 45960 }, { "epoch": 0.33276147871470246, "grad_norm": 0.15991735458374023, "learning_rate": 4.6672457599513565e-06, "loss": 0.9495, "step": 45970 }, { "epoch": 0.33283386537528864, "grad_norm": 0.20799915492534637, "learning_rate": 4.66717337329077e-06, "loss": 0.9464, "step": 45980 }, { "epoch": 0.3329062520358748, "grad_norm": 0.16113980114459991, "learning_rate": 4.667100986630184e-06, "loss": 0.9635, "step": 45990 }, { "epoch": 0.332978638696461, "grad_norm": 0.20556610822677612, "learning_rate": 4.667028599969598e-06, "loss": 0.9714, "step": 46000 }, { "epoch": 0.3330510253570472, "grad_norm": 0.17026638984680176, "learning_rate": 4.666956213309012e-06, "loss": 0.9572, "step": 46010 }, { "epoch": 0.3331234120176334, "grad_norm": 0.1647808998823166, "learning_rate": 4.6668838266484255e-06, "loss": 0.9645, "step": 46020 }, { "epoch": 0.3331957986782196, "grad_norm": 0.17708559334278107, "learning_rate": 4.666811439987839e-06, "loss": 0.9605, "step": 46030 }, { "epoch": 0.33326818533880576, "grad_norm": 0.17343778908252716, "learning_rate": 4.6667390533272535e-06, "loss": 0.9694, "step": 46040 }, { "epoch": 0.33334057199939193, "grad_norm": 0.18600593507289886, "learning_rate": 4.666666666666667e-06, "loss": 0.9542, "step": 46050 }, { "epoch": 0.33341295865997816, "grad_norm": 0.1633295863866806, "learning_rate": 4.666594280006081e-06, "loss": 0.9564, "step": 46060 }, { "epoch": 0.33348534532056434, "grad_norm": 0.16525378823280334, "learning_rate": 4.666521893345494e-06, "loss": 0.9482, "step": 46070 }, { "epoch": 0.3335577319811505, "grad_norm": 0.16414770483970642, "learning_rate": 4.666449506684909e-06, "loss": 0.9537, "step": 46080 }, { "epoch": 0.3336301186417367, "grad_norm": 0.1624254733324051, "learning_rate": 4.6663771200243225e-06, "loss": 0.9482, "step": 46090 }, { "epoch": 0.33370250530232287, "grad_norm": 0.18863092362880707, "learning_rate": 4.666304733363736e-06, "loss": 0.9574, "step": 46100 }, { "epoch": 0.33377489196290905, "grad_norm": 0.16010300815105438, "learning_rate": 4.66623234670315e-06, "loss": 0.9503, "step": 46110 }, { "epoch": 0.3338472786234953, "grad_norm": 0.16204185783863068, "learning_rate": 4.666159960042564e-06, "loss": 0.9577, "step": 46120 }, { "epoch": 0.33391966528408146, "grad_norm": 0.16618123650550842, "learning_rate": 4.666087573381978e-06, "loss": 0.9709, "step": 46130 }, { "epoch": 0.33399205194466763, "grad_norm": 0.1760234236717224, "learning_rate": 4.666015186721391e-06, "loss": 0.9694, "step": 46140 }, { "epoch": 0.3340644386052538, "grad_norm": 0.26092368364334106, "learning_rate": 4.665942800060805e-06, "loss": 0.9568, "step": 46150 }, { "epoch": 0.33413682526584, "grad_norm": 0.17464442551136017, "learning_rate": 4.6658704134002195e-06, "loss": 0.9586, "step": 46160 }, { "epoch": 0.3342092119264262, "grad_norm": 0.15855874121189117, "learning_rate": 4.665798026739633e-06, "loss": 0.9455, "step": 46170 }, { "epoch": 0.3342815985870124, "grad_norm": 0.1912544220685959, "learning_rate": 4.665725640079047e-06, "loss": 0.9646, "step": 46180 }, { "epoch": 0.3343539852475986, "grad_norm": 0.1546599417924881, "learning_rate": 4.66565325341846e-06, "loss": 0.9577, "step": 46190 }, { "epoch": 0.33442637190818475, "grad_norm": 0.17187544703483582, "learning_rate": 4.665580866757875e-06, "loss": 0.9353, "step": 46200 }, { "epoch": 0.3344987585687709, "grad_norm": 0.17121565341949463, "learning_rate": 4.665508480097288e-06, "loss": 0.9612, "step": 46210 }, { "epoch": 0.33457114522935716, "grad_norm": 0.15924374759197235, "learning_rate": 4.665436093436702e-06, "loss": 0.9596, "step": 46220 }, { "epoch": 0.33464353188994334, "grad_norm": 0.19250038266181946, "learning_rate": 4.665363706776116e-06, "loss": 0.9633, "step": 46230 }, { "epoch": 0.3347159185505295, "grad_norm": 0.17079396545886993, "learning_rate": 4.66529132011553e-06, "loss": 0.9491, "step": 46240 }, { "epoch": 0.3347883052111157, "grad_norm": 0.19403786957263947, "learning_rate": 4.665218933454943e-06, "loss": 0.9462, "step": 46250 }, { "epoch": 0.33486069187170187, "grad_norm": 0.18236984312534332, "learning_rate": 4.6651465467943565e-06, "loss": 0.9459, "step": 46260 }, { "epoch": 0.3349330785322881, "grad_norm": 0.14901652932167053, "learning_rate": 4.665074160133771e-06, "loss": 0.9659, "step": 46270 }, { "epoch": 0.3350054651928743, "grad_norm": 0.17317718267440796, "learning_rate": 4.6650017734731846e-06, "loss": 0.9512, "step": 46280 }, { "epoch": 0.33507785185346045, "grad_norm": 0.16543887555599213, "learning_rate": 4.664929386812598e-06, "loss": 0.9405, "step": 46290 }, { "epoch": 0.33515023851404663, "grad_norm": 0.16926343739032745, "learning_rate": 4.664857000152012e-06, "loss": 0.9575, "step": 46300 }, { "epoch": 0.3352226251746328, "grad_norm": 0.16885744035243988, "learning_rate": 4.664784613491426e-06, "loss": 0.9496, "step": 46310 }, { "epoch": 0.335295011835219, "grad_norm": 0.15920351445674896, "learning_rate": 4.66471222683084e-06, "loss": 0.9579, "step": 46320 }, { "epoch": 0.3353673984958052, "grad_norm": 0.15638776123523712, "learning_rate": 4.6646398401702535e-06, "loss": 0.9599, "step": 46330 }, { "epoch": 0.3354397851563914, "grad_norm": 0.15919549763202667, "learning_rate": 4.664567453509667e-06, "loss": 0.953, "step": 46340 }, { "epoch": 0.33551217181697757, "grad_norm": 0.1604597270488739, "learning_rate": 4.664495066849082e-06, "loss": 0.9597, "step": 46350 }, { "epoch": 0.33558455847756374, "grad_norm": 0.19076739251613617, "learning_rate": 4.664422680188495e-06, "loss": 0.9795, "step": 46360 }, { "epoch": 0.3356569451381499, "grad_norm": 0.15637139976024628, "learning_rate": 4.664350293527909e-06, "loss": 0.9549, "step": 46370 }, { "epoch": 0.33572933179873615, "grad_norm": 0.17368392646312714, "learning_rate": 4.6642779068673224e-06, "loss": 0.9644, "step": 46380 }, { "epoch": 0.33580171845932233, "grad_norm": 0.1643470972776413, "learning_rate": 4.664205520206737e-06, "loss": 0.9517, "step": 46390 }, { "epoch": 0.3358741051199085, "grad_norm": 0.17304439842700958, "learning_rate": 4.6641331335461505e-06, "loss": 0.9686, "step": 46400 }, { "epoch": 0.3359464917804947, "grad_norm": 0.15978851914405823, "learning_rate": 4.664060746885564e-06, "loss": 0.9658, "step": 46410 }, { "epoch": 0.33601887844108086, "grad_norm": 0.16269192099571228, "learning_rate": 4.663988360224978e-06, "loss": 0.9419, "step": 46420 }, { "epoch": 0.3360912651016671, "grad_norm": 0.16838060319423676, "learning_rate": 4.663915973564392e-06, "loss": 0.9444, "step": 46430 }, { "epoch": 0.33616365176225327, "grad_norm": 0.1612749993801117, "learning_rate": 4.663843586903806e-06, "loss": 0.9599, "step": 46440 }, { "epoch": 0.33623603842283944, "grad_norm": 0.1748184859752655, "learning_rate": 4.6637712002432194e-06, "loss": 0.9482, "step": 46450 }, { "epoch": 0.3363084250834256, "grad_norm": 0.1573769748210907, "learning_rate": 4.663698813582633e-06, "loss": 0.9552, "step": 46460 }, { "epoch": 0.3363808117440118, "grad_norm": 0.16278138756752014, "learning_rate": 4.6636264269220475e-06, "loss": 0.9531, "step": 46470 }, { "epoch": 0.336453198404598, "grad_norm": 0.19514703750610352, "learning_rate": 4.663554040261461e-06, "loss": 0.9648, "step": 46480 }, { "epoch": 0.3365255850651842, "grad_norm": 0.15883156657218933, "learning_rate": 4.663481653600875e-06, "loss": 0.9489, "step": 46490 }, { "epoch": 0.3365979717257704, "grad_norm": 0.20735056698322296, "learning_rate": 4.663409266940288e-06, "loss": 0.9552, "step": 46500 }, { "epoch": 0.33667035838635656, "grad_norm": 0.15435609221458435, "learning_rate": 4.663336880279703e-06, "loss": 0.9573, "step": 46510 }, { "epoch": 0.33674274504694274, "grad_norm": 0.15631069242954254, "learning_rate": 4.6632644936191164e-06, "loss": 0.9501, "step": 46520 }, { "epoch": 0.3368151317075289, "grad_norm": 0.17219269275665283, "learning_rate": 4.66319210695853e-06, "loss": 0.9483, "step": 46530 }, { "epoch": 0.33688751836811515, "grad_norm": 0.19126524031162262, "learning_rate": 4.663119720297944e-06, "loss": 0.9449, "step": 46540 }, { "epoch": 0.3369599050287013, "grad_norm": 0.16317123174667358, "learning_rate": 4.663047333637358e-06, "loss": 0.9521, "step": 46550 }, { "epoch": 0.3370322916892875, "grad_norm": 0.15335845947265625, "learning_rate": 4.662974946976772e-06, "loss": 0.9538, "step": 46560 }, { "epoch": 0.3371046783498737, "grad_norm": 0.1824522316455841, "learning_rate": 4.662902560316185e-06, "loss": 0.9511, "step": 46570 }, { "epoch": 0.33717706501045985, "grad_norm": 0.16539111733436584, "learning_rate": 4.662830173655599e-06, "loss": 0.9501, "step": 46580 }, { "epoch": 0.3372494516710461, "grad_norm": 0.16969068348407745, "learning_rate": 4.6627577869950134e-06, "loss": 0.9363, "step": 46590 }, { "epoch": 0.33732183833163226, "grad_norm": 0.15810436010360718, "learning_rate": 4.662685400334427e-06, "loss": 0.9563, "step": 46600 }, { "epoch": 0.33739422499221844, "grad_norm": 0.1647157222032547, "learning_rate": 4.662613013673841e-06, "loss": 0.9552, "step": 46610 }, { "epoch": 0.3374666116528046, "grad_norm": 0.1537630707025528, "learning_rate": 4.662540627013254e-06, "loss": 0.956, "step": 46620 }, { "epoch": 0.3375389983133908, "grad_norm": 0.16150793433189392, "learning_rate": 4.662468240352668e-06, "loss": 0.9567, "step": 46630 }, { "epoch": 0.33761138497397697, "grad_norm": 0.17372149229049683, "learning_rate": 4.662395853692082e-06, "loss": 0.9595, "step": 46640 }, { "epoch": 0.3376837716345632, "grad_norm": 0.18082787096500397, "learning_rate": 4.662323467031496e-06, "loss": 0.9577, "step": 46650 }, { "epoch": 0.3377561582951494, "grad_norm": 0.17019541561603546, "learning_rate": 4.66225108037091e-06, "loss": 0.9532, "step": 46660 }, { "epoch": 0.33782854495573555, "grad_norm": 0.15364877879619598, "learning_rate": 4.662178693710323e-06, "loss": 0.9584, "step": 46670 }, { "epoch": 0.33790093161632173, "grad_norm": 0.15855498611927032, "learning_rate": 4.662106307049738e-06, "loss": 0.9702, "step": 46680 }, { "epoch": 0.3379733182769079, "grad_norm": 0.23772907257080078, "learning_rate": 4.662033920389151e-06, "loss": 0.9503, "step": 46690 }, { "epoch": 0.33804570493749414, "grad_norm": 0.16535037755966187, "learning_rate": 4.661961533728565e-06, "loss": 0.9469, "step": 46700 }, { "epoch": 0.3381180915980803, "grad_norm": 0.17086680233478546, "learning_rate": 4.6618891470679785e-06, "loss": 0.9591, "step": 46710 }, { "epoch": 0.3381904782586665, "grad_norm": 0.16868987679481506, "learning_rate": 4.661816760407393e-06, "loss": 0.9501, "step": 46720 }, { "epoch": 0.33826286491925267, "grad_norm": 0.16112284362316132, "learning_rate": 4.661744373746807e-06, "loss": 0.9581, "step": 46730 }, { "epoch": 0.33833525157983885, "grad_norm": 0.17157232761383057, "learning_rate": 4.66167198708622e-06, "loss": 0.9574, "step": 46740 }, { "epoch": 0.3384076382404251, "grad_norm": 0.16588850319385529, "learning_rate": 4.661599600425634e-06, "loss": 0.9397, "step": 46750 }, { "epoch": 0.33848002490101126, "grad_norm": 0.16449251770973206, "learning_rate": 4.661527213765048e-06, "loss": 0.9504, "step": 46760 }, { "epoch": 0.33855241156159743, "grad_norm": 0.17265821993350983, "learning_rate": 4.661454827104462e-06, "loss": 0.9542, "step": 46770 }, { "epoch": 0.3386247982221836, "grad_norm": 0.1667519360780716, "learning_rate": 4.661382440443875e-06, "loss": 0.9454, "step": 46780 }, { "epoch": 0.3386971848827698, "grad_norm": 0.16387122869491577, "learning_rate": 4.661310053783289e-06, "loss": 0.9588, "step": 46790 }, { "epoch": 0.33876957154335596, "grad_norm": 0.15918706357479095, "learning_rate": 4.661237667122703e-06, "loss": 0.9506, "step": 46800 }, { "epoch": 0.3388419582039422, "grad_norm": 0.18269628286361694, "learning_rate": 4.661165280462116e-06, "loss": 0.9541, "step": 46810 }, { "epoch": 0.33891434486452837, "grad_norm": 0.16732244193553925, "learning_rate": 4.66109289380153e-06, "loss": 0.9683, "step": 46820 }, { "epoch": 0.33898673152511455, "grad_norm": 0.17757059633731842, "learning_rate": 4.6610205071409445e-06, "loss": 0.9507, "step": 46830 }, { "epoch": 0.3390591181857007, "grad_norm": 0.16032300889492035, "learning_rate": 4.660948120480358e-06, "loss": 0.9469, "step": 46840 }, { "epoch": 0.3391315048462869, "grad_norm": 0.1686878800392151, "learning_rate": 4.660875733819772e-06, "loss": 0.9517, "step": 46850 }, { "epoch": 0.33920389150687313, "grad_norm": 0.24848999083042145, "learning_rate": 4.660803347159185e-06, "loss": 0.9647, "step": 46860 }, { "epoch": 0.3392762781674593, "grad_norm": 0.16146300733089447, "learning_rate": 4.6607309604986e-06, "loss": 0.969, "step": 46870 }, { "epoch": 0.3393486648280455, "grad_norm": 0.17109398543834686, "learning_rate": 4.660658573838013e-06, "loss": 0.9475, "step": 46880 }, { "epoch": 0.33942105148863166, "grad_norm": 0.16845859587192535, "learning_rate": 4.660586187177427e-06, "loss": 0.9659, "step": 46890 }, { "epoch": 0.33949343814921784, "grad_norm": 0.17198343575000763, "learning_rate": 4.660513800516841e-06, "loss": 0.951, "step": 46900 }, { "epoch": 0.3395658248098041, "grad_norm": 0.1685718148946762, "learning_rate": 4.660441413856255e-06, "loss": 0.9511, "step": 46910 }, { "epoch": 0.33963821147039025, "grad_norm": 0.17462746798992157, "learning_rate": 4.660369027195669e-06, "loss": 0.9518, "step": 46920 }, { "epoch": 0.3397105981309764, "grad_norm": 0.15987993776798248, "learning_rate": 4.660296640535082e-06, "loss": 0.9521, "step": 46930 }, { "epoch": 0.3397829847915626, "grad_norm": 0.16598616540431976, "learning_rate": 4.660224253874496e-06, "loss": 0.9543, "step": 46940 }, { "epoch": 0.3398553714521488, "grad_norm": 0.16702087223529816, "learning_rate": 4.66015186721391e-06, "loss": 0.9475, "step": 46950 }, { "epoch": 0.339927758112735, "grad_norm": 0.20819000899791718, "learning_rate": 4.660079480553324e-06, "loss": 0.9534, "step": 46960 }, { "epoch": 0.3400001447733212, "grad_norm": 0.1900683492422104, "learning_rate": 4.660007093892738e-06, "loss": 0.9521, "step": 46970 }, { "epoch": 0.34007253143390737, "grad_norm": 0.15491485595703125, "learning_rate": 4.659934707232151e-06, "loss": 0.9561, "step": 46980 }, { "epoch": 0.34014491809449354, "grad_norm": 0.17071206867694855, "learning_rate": 4.659862320571566e-06, "loss": 0.9486, "step": 46990 }, { "epoch": 0.3402173047550797, "grad_norm": 0.17024581134319305, "learning_rate": 4.659789933910979e-06, "loss": 0.9481, "step": 47000 }, { "epoch": 0.3402896914156659, "grad_norm": 0.1717842072248459, "learning_rate": 4.659717547250393e-06, "loss": 0.9476, "step": 47010 }, { "epoch": 0.34036207807625213, "grad_norm": 0.16945697367191315, "learning_rate": 4.6596451605898066e-06, "loss": 0.9598, "step": 47020 }, { "epoch": 0.3404344647368383, "grad_norm": 0.1646365374326706, "learning_rate": 4.659572773929221e-06, "loss": 0.9447, "step": 47030 }, { "epoch": 0.3405068513974245, "grad_norm": 0.1725275218486786, "learning_rate": 4.659500387268635e-06, "loss": 0.9625, "step": 47040 }, { "epoch": 0.34057923805801066, "grad_norm": 0.15354198217391968, "learning_rate": 4.659428000608048e-06, "loss": 0.9535, "step": 47050 }, { "epoch": 0.34065162471859683, "grad_norm": 0.15781398117542267, "learning_rate": 4.659355613947462e-06, "loss": 0.9584, "step": 47060 }, { "epoch": 0.34072401137918307, "grad_norm": 0.22969284653663635, "learning_rate": 4.659283227286876e-06, "loss": 0.9555, "step": 47070 }, { "epoch": 0.34079639803976924, "grad_norm": 0.16445045173168182, "learning_rate": 4.65921084062629e-06, "loss": 0.9676, "step": 47080 }, { "epoch": 0.3408687847003554, "grad_norm": 0.16403111815452576, "learning_rate": 4.6591384539657036e-06, "loss": 0.9633, "step": 47090 }, { "epoch": 0.3409411713609416, "grad_norm": 0.17574258148670197, "learning_rate": 4.659066067305117e-06, "loss": 0.94, "step": 47100 }, { "epoch": 0.3410135580215278, "grad_norm": 0.16448186337947845, "learning_rate": 4.658993680644532e-06, "loss": 0.9693, "step": 47110 }, { "epoch": 0.341085944682114, "grad_norm": 0.15972661972045898, "learning_rate": 4.658921293983945e-06, "loss": 0.9592, "step": 47120 }, { "epoch": 0.3411583313427002, "grad_norm": 0.15439611673355103, "learning_rate": 4.658848907323359e-06, "loss": 0.9685, "step": 47130 }, { "epoch": 0.34123071800328636, "grad_norm": 0.1713487207889557, "learning_rate": 4.6587765206627725e-06, "loss": 0.9415, "step": 47140 }, { "epoch": 0.34130310466387254, "grad_norm": 0.15895883738994598, "learning_rate": 4.658704134002187e-06, "loss": 0.9427, "step": 47150 }, { "epoch": 0.3413754913244587, "grad_norm": 0.15406803786754608, "learning_rate": 4.658631747341601e-06, "loss": 0.9464, "step": 47160 }, { "epoch": 0.3414478779850449, "grad_norm": 0.1744023710489273, "learning_rate": 4.658559360681014e-06, "loss": 0.9538, "step": 47170 }, { "epoch": 0.3415202646456311, "grad_norm": 0.16929666697978973, "learning_rate": 4.658486974020428e-06, "loss": 0.9457, "step": 47180 }, { "epoch": 0.3415926513062173, "grad_norm": 0.1510300189256668, "learning_rate": 4.658414587359842e-06, "loss": 0.9548, "step": 47190 }, { "epoch": 0.3416650379668035, "grad_norm": 0.1694670170545578, "learning_rate": 4.658342200699256e-06, "loss": 0.9565, "step": 47200 }, { "epoch": 0.34173742462738965, "grad_norm": 0.1610107421875, "learning_rate": 4.6582698140386695e-06, "loss": 0.9569, "step": 47210 }, { "epoch": 0.34180981128797583, "grad_norm": 0.1698133498430252, "learning_rate": 4.658197427378083e-06, "loss": 0.9563, "step": 47220 }, { "epoch": 0.34188219794856206, "grad_norm": 0.17170600593090057, "learning_rate": 4.658125040717497e-06, "loss": 0.9611, "step": 47230 }, { "epoch": 0.34195458460914824, "grad_norm": 0.1630273014307022, "learning_rate": 4.658052654056911e-06, "loss": 0.9517, "step": 47240 }, { "epoch": 0.3420269712697344, "grad_norm": 0.1633155643939972, "learning_rate": 4.657980267396325e-06, "loss": 0.9635, "step": 47250 }, { "epoch": 0.3420993579303206, "grad_norm": 0.1674155741930008, "learning_rate": 4.6579078807357384e-06, "loss": 0.944, "step": 47260 }, { "epoch": 0.34217174459090677, "grad_norm": 0.18419358134269714, "learning_rate": 4.657835494075152e-06, "loss": 0.9554, "step": 47270 }, { "epoch": 0.342244131251493, "grad_norm": 0.17149895429611206, "learning_rate": 4.6577631074145665e-06, "loss": 0.9533, "step": 47280 }, { "epoch": 0.3423165179120792, "grad_norm": 0.15570290386676788, "learning_rate": 4.65769072075398e-06, "loss": 0.9394, "step": 47290 }, { "epoch": 0.34238890457266535, "grad_norm": 0.1557348072528839, "learning_rate": 4.657618334093394e-06, "loss": 0.9467, "step": 47300 }, { "epoch": 0.34246129123325153, "grad_norm": 0.16203594207763672, "learning_rate": 4.657545947432807e-06, "loss": 0.9445, "step": 47310 }, { "epoch": 0.3425336778938377, "grad_norm": 0.20134076476097107, "learning_rate": 4.657473560772221e-06, "loss": 0.948, "step": 47320 }, { "epoch": 0.3426060645544239, "grad_norm": 0.1600342094898224, "learning_rate": 4.657401174111635e-06, "loss": 0.9633, "step": 47330 }, { "epoch": 0.3426784512150101, "grad_norm": 0.17118972539901733, "learning_rate": 4.657328787451049e-06, "loss": 0.9599, "step": 47340 }, { "epoch": 0.3427508378755963, "grad_norm": 0.15433147549629211, "learning_rate": 4.657256400790463e-06, "loss": 0.9496, "step": 47350 }, { "epoch": 0.34282322453618247, "grad_norm": 0.1639435738325119, "learning_rate": 4.657184014129876e-06, "loss": 0.9455, "step": 47360 }, { "epoch": 0.34289561119676865, "grad_norm": 0.1590513288974762, "learning_rate": 4.65711162746929e-06, "loss": 0.968, "step": 47370 }, { "epoch": 0.3429679978573548, "grad_norm": 0.15320120751857758, "learning_rate": 4.657039240808704e-06, "loss": 0.9613, "step": 47380 }, { "epoch": 0.34304038451794105, "grad_norm": 0.1623704880475998, "learning_rate": 4.656966854148118e-06, "loss": 0.949, "step": 47390 }, { "epoch": 0.34311277117852723, "grad_norm": 0.28263241052627563, "learning_rate": 4.656894467487532e-06, "loss": 0.9426, "step": 47400 }, { "epoch": 0.3431851578391134, "grad_norm": 0.1681540608406067, "learning_rate": 4.656822080826945e-06, "loss": 0.9484, "step": 47410 }, { "epoch": 0.3432575444996996, "grad_norm": 0.17275528609752655, "learning_rate": 4.656749694166359e-06, "loss": 0.9456, "step": 47420 }, { "epoch": 0.34332993116028576, "grad_norm": 0.1764938235282898, "learning_rate": 4.656677307505773e-06, "loss": 0.9555, "step": 47430 }, { "epoch": 0.343402317820872, "grad_norm": 0.18242251873016357, "learning_rate": 4.656604920845187e-06, "loss": 0.9464, "step": 47440 }, { "epoch": 0.34347470448145817, "grad_norm": 0.1684602051973343, "learning_rate": 4.6565325341846005e-06, "loss": 0.9521, "step": 47450 }, { "epoch": 0.34354709114204435, "grad_norm": 0.448024719953537, "learning_rate": 4.656460147524014e-06, "loss": 0.9466, "step": 47460 }, { "epoch": 0.3436194778026305, "grad_norm": 0.20128124952316284, "learning_rate": 4.656387760863429e-06, "loss": 0.9388, "step": 47470 }, { "epoch": 0.3436918644632167, "grad_norm": 0.18526539206504822, "learning_rate": 4.656315374202842e-06, "loss": 0.9565, "step": 47480 }, { "epoch": 0.34376425112380293, "grad_norm": 0.1590823531150818, "learning_rate": 4.656242987542256e-06, "loss": 0.9447, "step": 47490 }, { "epoch": 0.3438366377843891, "grad_norm": 0.21153424680233002, "learning_rate": 4.6561706008816695e-06, "loss": 0.9503, "step": 47500 }, { "epoch": 0.3439090244449753, "grad_norm": 0.18983012437820435, "learning_rate": 4.656098214221084e-06, "loss": 0.9614, "step": 47510 }, { "epoch": 0.34398141110556146, "grad_norm": 0.1883876472711563, "learning_rate": 4.6560258275604975e-06, "loss": 0.9572, "step": 47520 }, { "epoch": 0.34405379776614764, "grad_norm": 0.15937159955501556, "learning_rate": 4.655953440899911e-06, "loss": 0.9579, "step": 47530 }, { "epoch": 0.3441261844267338, "grad_norm": 0.17712438106536865, "learning_rate": 4.655881054239325e-06, "loss": 0.9693, "step": 47540 }, { "epoch": 0.34419857108732005, "grad_norm": 0.17279085516929626, "learning_rate": 4.655808667578739e-06, "loss": 0.9572, "step": 47550 }, { "epoch": 0.3442709577479062, "grad_norm": 0.16314037144184113, "learning_rate": 4.655736280918153e-06, "loss": 0.9495, "step": 47560 }, { "epoch": 0.3443433444084924, "grad_norm": 0.1699339896440506, "learning_rate": 4.6556638942575665e-06, "loss": 0.9369, "step": 47570 }, { "epoch": 0.3444157310690786, "grad_norm": 0.16625383496284485, "learning_rate": 4.65559150759698e-06, "loss": 0.9402, "step": 47580 }, { "epoch": 0.34448811772966476, "grad_norm": 0.15905995666980743, "learning_rate": 4.6555191209363946e-06, "loss": 0.96, "step": 47590 }, { "epoch": 0.344560504390251, "grad_norm": 0.1678093671798706, "learning_rate": 4.655446734275808e-06, "loss": 0.9517, "step": 47600 }, { "epoch": 0.34463289105083716, "grad_norm": 0.1573289930820465, "learning_rate": 4.655374347615222e-06, "loss": 0.9514, "step": 47610 }, { "epoch": 0.34470527771142334, "grad_norm": 0.1935252696275711, "learning_rate": 4.655301960954635e-06, "loss": 0.9646, "step": 47620 }, { "epoch": 0.3447776643720095, "grad_norm": 0.16098780930042267, "learning_rate": 4.65522957429405e-06, "loss": 0.9546, "step": 47630 }, { "epoch": 0.3448500510325957, "grad_norm": 0.1690308153629303, "learning_rate": 4.6551571876334635e-06, "loss": 0.9697, "step": 47640 }, { "epoch": 0.3449224376931819, "grad_norm": 0.159522145986557, "learning_rate": 4.655084800972877e-06, "loss": 0.9472, "step": 47650 }, { "epoch": 0.3449948243537681, "grad_norm": 0.2347215861082077, "learning_rate": 4.655012414312291e-06, "loss": 0.9475, "step": 47660 }, { "epoch": 0.3450672110143543, "grad_norm": 0.16307930648326874, "learning_rate": 4.654940027651705e-06, "loss": 0.9627, "step": 47670 }, { "epoch": 0.34513959767494046, "grad_norm": 0.4456617832183838, "learning_rate": 4.654867640991119e-06, "loss": 0.9653, "step": 47680 }, { "epoch": 0.34521198433552663, "grad_norm": 0.19592955708503723, "learning_rate": 4.654795254330532e-06, "loss": 0.958, "step": 47690 }, { "epoch": 0.3452843709961128, "grad_norm": 0.1693570464849472, "learning_rate": 4.654722867669946e-06, "loss": 0.9591, "step": 47700 }, { "epoch": 0.34535675765669904, "grad_norm": 0.16405141353607178, "learning_rate": 4.6546504810093605e-06, "loss": 0.9568, "step": 47710 }, { "epoch": 0.3454291443172852, "grad_norm": 0.15746448934078217, "learning_rate": 4.654578094348774e-06, "loss": 0.9587, "step": 47720 }, { "epoch": 0.3455015309778714, "grad_norm": 0.17681455612182617, "learning_rate": 4.654505707688188e-06, "loss": 0.9533, "step": 47730 }, { "epoch": 0.3455739176384576, "grad_norm": 0.14864014089107513, "learning_rate": 4.654433321027601e-06, "loss": 0.9444, "step": 47740 }, { "epoch": 0.34564630429904375, "grad_norm": 0.17312194406986237, "learning_rate": 4.654360934367016e-06, "loss": 0.9546, "step": 47750 }, { "epoch": 0.34571869095963, "grad_norm": 0.17767411470413208, "learning_rate": 4.654288547706429e-06, "loss": 0.9547, "step": 47760 }, { "epoch": 0.34579107762021616, "grad_norm": 0.16189977526664734, "learning_rate": 4.654216161045843e-06, "loss": 0.959, "step": 47770 }, { "epoch": 0.34586346428080234, "grad_norm": 0.15381406247615814, "learning_rate": 4.654143774385257e-06, "loss": 0.9532, "step": 47780 }, { "epoch": 0.3459358509413885, "grad_norm": 0.1887199878692627, "learning_rate": 4.654071387724671e-06, "loss": 0.9523, "step": 47790 }, { "epoch": 0.3460082376019747, "grad_norm": 0.1715165674686432, "learning_rate": 4.653999001064085e-06, "loss": 0.9387, "step": 47800 }, { "epoch": 0.3460806242625609, "grad_norm": 0.17425018548965454, "learning_rate": 4.653926614403498e-06, "loss": 0.9659, "step": 47810 }, { "epoch": 0.3461530109231471, "grad_norm": 0.16004815697669983, "learning_rate": 4.653854227742912e-06, "loss": 0.9524, "step": 47820 }, { "epoch": 0.3462253975837333, "grad_norm": 0.16618654131889343, "learning_rate": 4.653781841082326e-06, "loss": 0.9573, "step": 47830 }, { "epoch": 0.34629778424431945, "grad_norm": 0.18237411975860596, "learning_rate": 4.653709454421739e-06, "loss": 0.9503, "step": 47840 }, { "epoch": 0.3463701709049056, "grad_norm": 0.17747117578983307, "learning_rate": 4.653637067761153e-06, "loss": 0.951, "step": 47850 }, { "epoch": 0.3464425575654918, "grad_norm": 0.1547970473766327, "learning_rate": 4.653564681100567e-06, "loss": 0.9493, "step": 47860 }, { "epoch": 0.34651494422607804, "grad_norm": 0.20299063622951508, "learning_rate": 4.653492294439981e-06, "loss": 0.9549, "step": 47870 }, { "epoch": 0.3465873308866642, "grad_norm": 0.16217590868473053, "learning_rate": 4.6534199077793945e-06, "loss": 0.951, "step": 47880 }, { "epoch": 0.3466597175472504, "grad_norm": 0.21849872171878815, "learning_rate": 4.653347521118808e-06, "loss": 0.9457, "step": 47890 }, { "epoch": 0.34673210420783657, "grad_norm": 0.22776605188846588, "learning_rate": 4.653275134458223e-06, "loss": 0.9705, "step": 47900 }, { "epoch": 0.34680449086842274, "grad_norm": 0.1702580451965332, "learning_rate": 4.653202747797636e-06, "loss": 0.9504, "step": 47910 }, { "epoch": 0.346876877529009, "grad_norm": 0.16190893948078156, "learning_rate": 4.65313036113705e-06, "loss": 0.9618, "step": 47920 }, { "epoch": 0.34694926418959515, "grad_norm": 0.18085525929927826, "learning_rate": 4.6530579744764634e-06, "loss": 0.9564, "step": 47930 }, { "epoch": 0.34702165085018133, "grad_norm": 0.16075456142425537, "learning_rate": 4.652985587815878e-06, "loss": 0.9576, "step": 47940 }, { "epoch": 0.3470940375107675, "grad_norm": 0.1670777052640915, "learning_rate": 4.6529132011552915e-06, "loss": 0.9598, "step": 47950 }, { "epoch": 0.3471664241713537, "grad_norm": 0.1586279422044754, "learning_rate": 4.652840814494705e-06, "loss": 0.9469, "step": 47960 }, { "epoch": 0.3472388108319399, "grad_norm": 0.163706973195076, "learning_rate": 4.652768427834119e-06, "loss": 0.9456, "step": 47970 }, { "epoch": 0.3473111974925261, "grad_norm": 0.16007377207279205, "learning_rate": 4.652696041173533e-06, "loss": 0.9445, "step": 47980 }, { "epoch": 0.34738358415311227, "grad_norm": 0.18147924542427063, "learning_rate": 4.652623654512947e-06, "loss": 0.9527, "step": 47990 }, { "epoch": 0.34745597081369844, "grad_norm": 0.1525728702545166, "learning_rate": 4.6525512678523604e-06, "loss": 0.9419, "step": 48000 }, { "epoch": 0.3475283574742846, "grad_norm": 0.168410062789917, "learning_rate": 4.652478881191774e-06, "loss": 0.9464, "step": 48010 }, { "epoch": 0.3476007441348708, "grad_norm": 0.16513432562351227, "learning_rate": 4.652406494531188e-06, "loss": 0.9467, "step": 48020 }, { "epoch": 0.34767313079545703, "grad_norm": 0.14979685842990875, "learning_rate": 4.652334107870602e-06, "loss": 0.9547, "step": 48030 }, { "epoch": 0.3477455174560432, "grad_norm": 0.17704786360263824, "learning_rate": 4.652261721210016e-06, "loss": 0.9491, "step": 48040 }, { "epoch": 0.3478179041166294, "grad_norm": 0.1605810523033142, "learning_rate": 4.652189334549429e-06, "loss": 0.9566, "step": 48050 }, { "epoch": 0.34789029077721556, "grad_norm": 0.1907358467578888, "learning_rate": 4.652116947888843e-06, "loss": 0.9568, "step": 48060 }, { "epoch": 0.34796267743780174, "grad_norm": 0.16195763647556305, "learning_rate": 4.6520445612282574e-06, "loss": 0.9532, "step": 48070 }, { "epoch": 0.34803506409838797, "grad_norm": 0.1610960215330124, "learning_rate": 4.651972174567671e-06, "loss": 0.9452, "step": 48080 }, { "epoch": 0.34810745075897415, "grad_norm": 0.17993375658988953, "learning_rate": 4.651899787907085e-06, "loss": 0.9497, "step": 48090 }, { "epoch": 0.3481798374195603, "grad_norm": 0.16861571371555328, "learning_rate": 4.651827401246498e-06, "loss": 0.9503, "step": 48100 }, { "epoch": 0.3482522240801465, "grad_norm": 0.16078142821788788, "learning_rate": 4.651755014585913e-06, "loss": 0.9387, "step": 48110 }, { "epoch": 0.3483246107407327, "grad_norm": 0.1687428504228592, "learning_rate": 4.651682627925326e-06, "loss": 0.9459, "step": 48120 }, { "epoch": 0.3483969974013189, "grad_norm": 0.17936325073242188, "learning_rate": 4.65161024126474e-06, "loss": 0.944, "step": 48130 }, { "epoch": 0.3484693840619051, "grad_norm": 0.16002655029296875, "learning_rate": 4.651537854604154e-06, "loss": 0.9528, "step": 48140 }, { "epoch": 0.34854177072249126, "grad_norm": 0.16870175302028656, "learning_rate": 4.651465467943568e-06, "loss": 0.9461, "step": 48150 }, { "epoch": 0.34861415738307744, "grad_norm": 0.37288644909858704, "learning_rate": 4.651393081282982e-06, "loss": 0.9438, "step": 48160 }, { "epoch": 0.3486865440436636, "grad_norm": 0.1594185084104538, "learning_rate": 4.651320694622395e-06, "loss": 0.9631, "step": 48170 }, { "epoch": 0.34875893070424985, "grad_norm": 0.16958339512348175, "learning_rate": 4.651248307961809e-06, "loss": 0.9672, "step": 48180 }, { "epoch": 0.348831317364836, "grad_norm": 0.2117815464735031, "learning_rate": 4.651175921301223e-06, "loss": 0.9624, "step": 48190 }, { "epoch": 0.3489037040254222, "grad_norm": 0.16270878911018372, "learning_rate": 4.651103534640637e-06, "loss": 0.9621, "step": 48200 }, { "epoch": 0.3489760906860084, "grad_norm": 0.1788318157196045, "learning_rate": 4.651031147980051e-06, "loss": 0.9604, "step": 48210 }, { "epoch": 0.34904847734659455, "grad_norm": 0.20249681174755096, "learning_rate": 4.650958761319464e-06, "loss": 0.9452, "step": 48220 }, { "epoch": 0.34912086400718073, "grad_norm": 0.16005674004554749, "learning_rate": 4.650886374658879e-06, "loss": 0.9375, "step": 48230 }, { "epoch": 0.34919325066776696, "grad_norm": 0.182306170463562, "learning_rate": 4.650813987998292e-06, "loss": 0.9549, "step": 48240 }, { "epoch": 0.34926563732835314, "grad_norm": 0.1851535588502884, "learning_rate": 4.650741601337706e-06, "loss": 0.9459, "step": 48250 }, { "epoch": 0.3493380239889393, "grad_norm": 0.1800885647535324, "learning_rate": 4.6506692146771195e-06, "loss": 0.9594, "step": 48260 }, { "epoch": 0.3494104106495255, "grad_norm": 0.1562681943178177, "learning_rate": 4.650596828016534e-06, "loss": 0.9548, "step": 48270 }, { "epoch": 0.34948279731011167, "grad_norm": 0.164944589138031, "learning_rate": 4.650524441355948e-06, "loss": 0.9595, "step": 48280 }, { "epoch": 0.3495551839706979, "grad_norm": 0.15770290791988373, "learning_rate": 4.650452054695361e-06, "loss": 0.9573, "step": 48290 }, { "epoch": 0.3496275706312841, "grad_norm": 0.19235900044441223, "learning_rate": 4.650379668034775e-06, "loss": 0.9381, "step": 48300 }, { "epoch": 0.34969995729187026, "grad_norm": 0.17154589295387268, "learning_rate": 4.650307281374189e-06, "loss": 0.9434, "step": 48310 }, { "epoch": 0.34977234395245643, "grad_norm": 0.19060273468494415, "learning_rate": 4.650234894713603e-06, "loss": 0.9666, "step": 48320 }, { "epoch": 0.3498447306130426, "grad_norm": 0.16510871052742004, "learning_rate": 4.6501625080530165e-06, "loss": 0.9498, "step": 48330 }, { "epoch": 0.34991711727362884, "grad_norm": 0.1761731505393982, "learning_rate": 4.65009012139243e-06, "loss": 0.9522, "step": 48340 }, { "epoch": 0.349989503934215, "grad_norm": 0.17731162905693054, "learning_rate": 4.650017734731845e-06, "loss": 0.9507, "step": 48350 }, { "epoch": 0.3500618905948012, "grad_norm": 0.17625099420547485, "learning_rate": 4.649945348071258e-06, "loss": 0.9528, "step": 48360 }, { "epoch": 0.35013427725538737, "grad_norm": 0.16574423015117645, "learning_rate": 4.649872961410671e-06, "loss": 0.969, "step": 48370 }, { "epoch": 0.35020666391597355, "grad_norm": 0.16655471920967102, "learning_rate": 4.6498005747500855e-06, "loss": 0.9462, "step": 48380 }, { "epoch": 0.3502790505765597, "grad_norm": 0.17919887602329254, "learning_rate": 4.649728188089499e-06, "loss": 0.9669, "step": 48390 }, { "epoch": 0.35035143723714596, "grad_norm": 0.1737910956144333, "learning_rate": 4.649655801428913e-06, "loss": 0.9544, "step": 48400 }, { "epoch": 0.35042382389773213, "grad_norm": 0.1664748638868332, "learning_rate": 4.649583414768326e-06, "loss": 0.9394, "step": 48410 }, { "epoch": 0.3504962105583183, "grad_norm": 0.1567506492137909, "learning_rate": 4.649511028107741e-06, "loss": 0.9453, "step": 48420 }, { "epoch": 0.3505685972189045, "grad_norm": 0.17070920765399933, "learning_rate": 4.649438641447154e-06, "loss": 0.9549, "step": 48430 }, { "epoch": 0.35064098387949066, "grad_norm": 0.16167403757572174, "learning_rate": 4.649366254786568e-06, "loss": 0.9453, "step": 48440 }, { "epoch": 0.3507133705400769, "grad_norm": 0.15973427891731262, "learning_rate": 4.649293868125982e-06, "loss": 0.9561, "step": 48450 }, { "epoch": 0.3507857572006631, "grad_norm": 0.16272811591625214, "learning_rate": 4.649221481465396e-06, "loss": 0.9554, "step": 48460 }, { "epoch": 0.35085814386124925, "grad_norm": 0.1659621149301529, "learning_rate": 4.64914909480481e-06, "loss": 0.9474, "step": 48470 }, { "epoch": 0.3509305305218354, "grad_norm": 0.16306394338607788, "learning_rate": 4.649076708144223e-06, "loss": 0.9511, "step": 48480 }, { "epoch": 0.3510029171824216, "grad_norm": 0.1689397245645523, "learning_rate": 4.649004321483637e-06, "loss": 0.9528, "step": 48490 }, { "epoch": 0.35107530384300784, "grad_norm": 0.16257858276367188, "learning_rate": 4.648931934823051e-06, "loss": 0.9544, "step": 48500 }, { "epoch": 0.351147690503594, "grad_norm": 0.1643674671649933, "learning_rate": 4.648859548162465e-06, "loss": 0.9441, "step": 48510 }, { "epoch": 0.3512200771641802, "grad_norm": 0.18621480464935303, "learning_rate": 4.648787161501879e-06, "loss": 0.9677, "step": 48520 }, { "epoch": 0.35129246382476637, "grad_norm": 0.15909579396247864, "learning_rate": 4.648714774841292e-06, "loss": 0.9404, "step": 48530 }, { "epoch": 0.35136485048535254, "grad_norm": 0.16073808073997498, "learning_rate": 4.648642388180707e-06, "loss": 0.9649, "step": 48540 }, { "epoch": 0.3514372371459387, "grad_norm": 0.15749648213386536, "learning_rate": 4.64857000152012e-06, "loss": 0.9665, "step": 48550 }, { "epoch": 0.35150962380652495, "grad_norm": 0.18623562157154083, "learning_rate": 4.648497614859534e-06, "loss": 0.9529, "step": 48560 }, { "epoch": 0.35158201046711113, "grad_norm": 0.16134946048259735, "learning_rate": 4.6484252281989476e-06, "loss": 0.9499, "step": 48570 }, { "epoch": 0.3516543971276973, "grad_norm": 0.15732122957706451, "learning_rate": 4.648352841538362e-06, "loss": 0.9572, "step": 48580 }, { "epoch": 0.3517267837882835, "grad_norm": 0.1690632700920105, "learning_rate": 4.648280454877776e-06, "loss": 0.9506, "step": 48590 }, { "epoch": 0.35179917044886966, "grad_norm": 0.16241760551929474, "learning_rate": 4.648208068217189e-06, "loss": 0.9518, "step": 48600 }, { "epoch": 0.3518715571094559, "grad_norm": 0.1884649395942688, "learning_rate": 4.648135681556603e-06, "loss": 0.9577, "step": 48610 }, { "epoch": 0.35194394377004207, "grad_norm": 0.16294220089912415, "learning_rate": 4.648063294896017e-06, "loss": 0.9432, "step": 48620 }, { "epoch": 0.35201633043062824, "grad_norm": 0.18577656149864197, "learning_rate": 4.647990908235431e-06, "loss": 0.9513, "step": 48630 }, { "epoch": 0.3520887170912144, "grad_norm": 0.16119709610939026, "learning_rate": 4.647918521574845e-06, "loss": 0.9478, "step": 48640 }, { "epoch": 0.3521611037518006, "grad_norm": 0.1651376187801361, "learning_rate": 4.647846134914258e-06, "loss": 0.9487, "step": 48650 }, { "epoch": 0.35223349041238683, "grad_norm": 0.18918465077877045, "learning_rate": 4.647773748253672e-06, "loss": 0.9562, "step": 48660 }, { "epoch": 0.352305877072973, "grad_norm": 0.1643255203962326, "learning_rate": 4.647701361593086e-06, "loss": 0.9502, "step": 48670 }, { "epoch": 0.3523782637335592, "grad_norm": 0.16544142365455627, "learning_rate": 4.6476289749325e-06, "loss": 0.9399, "step": 48680 }, { "epoch": 0.35245065039414536, "grad_norm": 0.1629703789949417, "learning_rate": 4.6475565882719135e-06, "loss": 0.9514, "step": 48690 }, { "epoch": 0.35252303705473154, "grad_norm": 0.15862901508808136, "learning_rate": 4.647484201611327e-06, "loss": 0.949, "step": 48700 }, { "epoch": 0.35259542371531777, "grad_norm": 0.1527785062789917, "learning_rate": 4.647411814950742e-06, "loss": 0.9548, "step": 48710 }, { "epoch": 0.35266781037590395, "grad_norm": 0.1874808520078659, "learning_rate": 4.647339428290155e-06, "loss": 0.9653, "step": 48720 }, { "epoch": 0.3527401970364901, "grad_norm": 0.18184229731559753, "learning_rate": 4.647267041629569e-06, "loss": 0.9433, "step": 48730 }, { "epoch": 0.3528125836970763, "grad_norm": 0.16013579070568085, "learning_rate": 4.6471946549689824e-06, "loss": 0.9545, "step": 48740 }, { "epoch": 0.3528849703576625, "grad_norm": 0.1711476594209671, "learning_rate": 4.647122268308397e-06, "loss": 0.9541, "step": 48750 }, { "epoch": 0.35295735701824865, "grad_norm": 0.16671577095985413, "learning_rate": 4.6470498816478105e-06, "loss": 0.9568, "step": 48760 }, { "epoch": 0.3530297436788349, "grad_norm": 0.2846975326538086, "learning_rate": 4.646977494987224e-06, "loss": 0.9496, "step": 48770 }, { "epoch": 0.35310213033942106, "grad_norm": 0.15886595845222473, "learning_rate": 4.646905108326638e-06, "loss": 0.9667, "step": 48780 }, { "epoch": 0.35317451700000724, "grad_norm": 0.1654849648475647, "learning_rate": 4.646832721666052e-06, "loss": 0.9483, "step": 48790 }, { "epoch": 0.3532469036605934, "grad_norm": 0.1595975011587143, "learning_rate": 4.646760335005466e-06, "loss": 0.952, "step": 48800 }, { "epoch": 0.3533192903211796, "grad_norm": 0.19034674763679504, "learning_rate": 4.6466879483448794e-06, "loss": 0.9593, "step": 48810 }, { "epoch": 0.3533916769817658, "grad_norm": 0.16194772720336914, "learning_rate": 4.646615561684293e-06, "loss": 0.9536, "step": 48820 }, { "epoch": 0.353464063642352, "grad_norm": 0.19409111142158508, "learning_rate": 4.6465431750237075e-06, "loss": 0.9578, "step": 48830 }, { "epoch": 0.3535364503029382, "grad_norm": 0.15368357300758362, "learning_rate": 4.646470788363121e-06, "loss": 0.9635, "step": 48840 }, { "epoch": 0.35360883696352435, "grad_norm": 0.1651766002178192, "learning_rate": 4.646398401702535e-06, "loss": 0.9542, "step": 48850 }, { "epoch": 0.35368122362411053, "grad_norm": 0.16302619874477386, "learning_rate": 4.646326015041948e-06, "loss": 0.9451, "step": 48860 }, { "epoch": 0.35375361028469676, "grad_norm": 0.16752517223358154, "learning_rate": 4.646253628381363e-06, "loss": 0.956, "step": 48870 }, { "epoch": 0.35382599694528294, "grad_norm": 0.16661721467971802, "learning_rate": 4.6461812417207765e-06, "loss": 0.9501, "step": 48880 }, { "epoch": 0.3538983836058691, "grad_norm": 0.16403451561927795, "learning_rate": 4.64610885506019e-06, "loss": 0.945, "step": 48890 }, { "epoch": 0.3539707702664553, "grad_norm": 0.16170132160186768, "learning_rate": 4.646036468399604e-06, "loss": 0.9528, "step": 48900 }, { "epoch": 0.35404315692704147, "grad_norm": 0.15352827310562134, "learning_rate": 4.645964081739017e-06, "loss": 0.964, "step": 48910 }, { "epoch": 0.35411554358762765, "grad_norm": 0.2216578722000122, "learning_rate": 4.645891695078431e-06, "loss": 0.9571, "step": 48920 }, { "epoch": 0.3541879302482139, "grad_norm": 0.1692575216293335, "learning_rate": 4.6458193084178445e-06, "loss": 0.9552, "step": 48930 }, { "epoch": 0.35426031690880005, "grad_norm": 0.16259534657001495, "learning_rate": 4.645746921757259e-06, "loss": 0.9497, "step": 48940 }, { "epoch": 0.35433270356938623, "grad_norm": 0.2892276644706726, "learning_rate": 4.645674535096673e-06, "loss": 0.9429, "step": 48950 }, { "epoch": 0.3544050902299724, "grad_norm": 0.1655515879392624, "learning_rate": 4.645602148436086e-06, "loss": 0.9362, "step": 48960 }, { "epoch": 0.3544774768905586, "grad_norm": 0.1588042974472046, "learning_rate": 4.6455297617755e-06, "loss": 0.9581, "step": 48970 }, { "epoch": 0.3545498635511448, "grad_norm": 0.16203071177005768, "learning_rate": 4.645457375114914e-06, "loss": 0.9525, "step": 48980 }, { "epoch": 0.354622250211731, "grad_norm": 0.1807693988084793, "learning_rate": 4.645384988454328e-06, "loss": 0.9412, "step": 48990 }, { "epoch": 0.35469463687231717, "grad_norm": 0.14859171211719513, "learning_rate": 4.6453126017937415e-06, "loss": 0.955, "step": 49000 }, { "epoch": 0.35476702353290335, "grad_norm": 0.17705701291561127, "learning_rate": 4.645240215133155e-06, "loss": 0.9459, "step": 49010 }, { "epoch": 0.3548394101934895, "grad_norm": 0.1593550145626068, "learning_rate": 4.64516782847257e-06, "loss": 0.9333, "step": 49020 }, { "epoch": 0.35491179685407576, "grad_norm": 0.16969063878059387, "learning_rate": 4.645095441811983e-06, "loss": 0.9572, "step": 49030 }, { "epoch": 0.35498418351466193, "grad_norm": 0.16061443090438843, "learning_rate": 4.645023055151397e-06, "loss": 0.9582, "step": 49040 }, { "epoch": 0.3550565701752481, "grad_norm": 0.17726626992225647, "learning_rate": 4.6449506684908105e-06, "loss": 0.9508, "step": 49050 }, { "epoch": 0.3551289568358343, "grad_norm": 0.1747400313615799, "learning_rate": 4.644878281830225e-06, "loss": 0.9436, "step": 49060 }, { "epoch": 0.35520134349642046, "grad_norm": 0.1950230598449707, "learning_rate": 4.6448058951696385e-06, "loss": 0.9571, "step": 49070 }, { "epoch": 0.35527373015700664, "grad_norm": 0.1573755443096161, "learning_rate": 4.644733508509052e-06, "loss": 0.9574, "step": 49080 }, { "epoch": 0.35534611681759287, "grad_norm": 0.17132724821567535, "learning_rate": 4.644661121848466e-06, "loss": 0.9586, "step": 49090 }, { "epoch": 0.35541850347817905, "grad_norm": 0.18015651404857635, "learning_rate": 4.64458873518788e-06, "loss": 0.9312, "step": 49100 }, { "epoch": 0.3554908901387652, "grad_norm": 0.17248773574829102, "learning_rate": 4.644516348527294e-06, "loss": 0.9444, "step": 49110 }, { "epoch": 0.3555632767993514, "grad_norm": 0.16664332151412964, "learning_rate": 4.6444439618667075e-06, "loss": 0.9524, "step": 49120 }, { "epoch": 0.3556356634599376, "grad_norm": 0.16630926728248596, "learning_rate": 4.644371575206121e-06, "loss": 0.9397, "step": 49130 }, { "epoch": 0.3557080501205238, "grad_norm": 0.17303983867168427, "learning_rate": 4.6442991885455356e-06, "loss": 0.9622, "step": 49140 }, { "epoch": 0.35578043678111, "grad_norm": 0.15973907709121704, "learning_rate": 4.644226801884949e-06, "loss": 0.9364, "step": 49150 }, { "epoch": 0.35585282344169616, "grad_norm": 0.1687871366739273, "learning_rate": 4.644154415224363e-06, "loss": 0.9577, "step": 49160 }, { "epoch": 0.35592521010228234, "grad_norm": 0.16706955432891846, "learning_rate": 4.644082028563776e-06, "loss": 0.9619, "step": 49170 }, { "epoch": 0.3559975967628685, "grad_norm": 0.19494742155075073, "learning_rate": 4.644009641903191e-06, "loss": 0.947, "step": 49180 }, { "epoch": 0.35606998342345475, "grad_norm": 0.1793723702430725, "learning_rate": 4.6439372552426045e-06, "loss": 0.9618, "step": 49190 }, { "epoch": 0.3561423700840409, "grad_norm": 0.16906292736530304, "learning_rate": 4.643864868582018e-06, "loss": 0.9433, "step": 49200 }, { "epoch": 0.3562147567446271, "grad_norm": 0.1776910424232483, "learning_rate": 4.643792481921432e-06, "loss": 0.9434, "step": 49210 }, { "epoch": 0.3562871434052133, "grad_norm": 0.17651747167110443, "learning_rate": 4.643720095260846e-06, "loss": 0.9524, "step": 49220 }, { "epoch": 0.35635953006579946, "grad_norm": 0.1679781824350357, "learning_rate": 4.64364770860026e-06, "loss": 0.9429, "step": 49230 }, { "epoch": 0.3564319167263857, "grad_norm": 0.17428244650363922, "learning_rate": 4.643575321939673e-06, "loss": 0.9497, "step": 49240 }, { "epoch": 0.35650430338697187, "grad_norm": 0.16317783296108246, "learning_rate": 4.643502935279087e-06, "loss": 0.9579, "step": 49250 }, { "epoch": 0.35657669004755804, "grad_norm": 0.17340287566184998, "learning_rate": 4.643430548618501e-06, "loss": 0.9584, "step": 49260 }, { "epoch": 0.3566490767081442, "grad_norm": 0.17054349184036255, "learning_rate": 4.643358161957915e-06, "loss": 0.9482, "step": 49270 }, { "epoch": 0.3567214633687304, "grad_norm": 0.15321429073810577, "learning_rate": 4.643285775297329e-06, "loss": 0.9532, "step": 49280 }, { "epoch": 0.3567938500293166, "grad_norm": 0.16249078512191772, "learning_rate": 4.643213388636742e-06, "loss": 0.9366, "step": 49290 }, { "epoch": 0.3568662366899028, "grad_norm": 0.23813359439373016, "learning_rate": 4.643141001976156e-06, "loss": 0.9628, "step": 49300 }, { "epoch": 0.356938623350489, "grad_norm": 0.18279701471328735, "learning_rate": 4.64306861531557e-06, "loss": 0.962, "step": 49310 }, { "epoch": 0.35701101001107516, "grad_norm": 0.1736493855714798, "learning_rate": 4.642996228654984e-06, "loss": 0.9578, "step": 49320 }, { "epoch": 0.35708339667166134, "grad_norm": 0.16122330725193024, "learning_rate": 4.642923841994398e-06, "loss": 0.9503, "step": 49330 }, { "epoch": 0.3571557833322475, "grad_norm": 0.15669801831245422, "learning_rate": 4.642851455333811e-06, "loss": 0.9562, "step": 49340 }, { "epoch": 0.35722816999283374, "grad_norm": 0.16338752210140228, "learning_rate": 4.642779068673226e-06, "loss": 0.9554, "step": 49350 }, { "epoch": 0.3573005566534199, "grad_norm": 0.16918496787548065, "learning_rate": 4.642706682012639e-06, "loss": 0.946, "step": 49360 }, { "epoch": 0.3573729433140061, "grad_norm": 0.17653869092464447, "learning_rate": 4.642634295352053e-06, "loss": 0.9528, "step": 49370 }, { "epoch": 0.3574453299745923, "grad_norm": 0.1767107993364334, "learning_rate": 4.642561908691467e-06, "loss": 0.9456, "step": 49380 }, { "epoch": 0.35751771663517845, "grad_norm": 0.16276022791862488, "learning_rate": 4.642489522030881e-06, "loss": 0.9418, "step": 49390 }, { "epoch": 0.3575901032957647, "grad_norm": 0.15793178975582123, "learning_rate": 4.642417135370295e-06, "loss": 0.9514, "step": 49400 }, { "epoch": 0.35766248995635086, "grad_norm": 0.16991020739078522, "learning_rate": 4.642344748709708e-06, "loss": 0.939, "step": 49410 }, { "epoch": 0.35773487661693704, "grad_norm": 0.1681062877178192, "learning_rate": 4.642272362049122e-06, "loss": 0.9462, "step": 49420 }, { "epoch": 0.3578072632775232, "grad_norm": 0.18085841834545135, "learning_rate": 4.6421999753885355e-06, "loss": 0.945, "step": 49430 }, { "epoch": 0.3578796499381094, "grad_norm": 0.1598934829235077, "learning_rate": 4.642127588727949e-06, "loss": 0.9579, "step": 49440 }, { "epoch": 0.35795203659869557, "grad_norm": 0.19113032519817352, "learning_rate": 4.642055202067363e-06, "loss": 0.9484, "step": 49450 }, { "epoch": 0.3580244232592818, "grad_norm": 0.16052620112895966, "learning_rate": 4.641982815406777e-06, "loss": 0.9557, "step": 49460 }, { "epoch": 0.358096809919868, "grad_norm": 0.16908268630504608, "learning_rate": 4.641910428746191e-06, "loss": 0.9453, "step": 49470 }, { "epoch": 0.35816919658045415, "grad_norm": 0.19930988550186157, "learning_rate": 4.6418380420856044e-06, "loss": 0.9453, "step": 49480 }, { "epoch": 0.35824158324104033, "grad_norm": 0.16531233489513397, "learning_rate": 4.641765655425018e-06, "loss": 0.9414, "step": 49490 }, { "epoch": 0.3583139699016265, "grad_norm": 0.16358982026576996, "learning_rate": 4.6416932687644325e-06, "loss": 0.9555, "step": 49500 }, { "epoch": 0.35838635656221274, "grad_norm": 0.16903631389141083, "learning_rate": 4.641620882103846e-06, "loss": 0.9489, "step": 49510 }, { "epoch": 0.3584587432227989, "grad_norm": 0.1676577776670456, "learning_rate": 4.64154849544326e-06, "loss": 0.956, "step": 49520 }, { "epoch": 0.3585311298833851, "grad_norm": 0.1515856236219406, "learning_rate": 4.641476108782673e-06, "loss": 0.9528, "step": 49530 }, { "epoch": 0.35860351654397127, "grad_norm": 0.16165132820606232, "learning_rate": 4.641403722122088e-06, "loss": 0.9508, "step": 49540 }, { "epoch": 0.35867590320455744, "grad_norm": 0.1639813631772995, "learning_rate": 4.6413313354615014e-06, "loss": 0.9484, "step": 49550 }, { "epoch": 0.3587482898651437, "grad_norm": 0.1556709110736847, "learning_rate": 4.641258948800915e-06, "loss": 0.9368, "step": 49560 }, { "epoch": 0.35882067652572985, "grad_norm": 0.16210544109344482, "learning_rate": 4.641186562140329e-06, "loss": 0.9479, "step": 49570 }, { "epoch": 0.35889306318631603, "grad_norm": 0.16633464395999908, "learning_rate": 4.641114175479743e-06, "loss": 0.9517, "step": 49580 }, { "epoch": 0.3589654498469022, "grad_norm": 0.16160385310649872, "learning_rate": 4.641041788819157e-06, "loss": 0.9523, "step": 49590 }, { "epoch": 0.3590378365074884, "grad_norm": 0.16910237073898315, "learning_rate": 4.64096940215857e-06, "loss": 0.9453, "step": 49600 }, { "epoch": 0.35911022316807456, "grad_norm": 0.20249851047992706, "learning_rate": 4.640897015497984e-06, "loss": 0.9446, "step": 49610 }, { "epoch": 0.3591826098286608, "grad_norm": 0.16274423897266388, "learning_rate": 4.6408246288373985e-06, "loss": 0.951, "step": 49620 }, { "epoch": 0.35925499648924697, "grad_norm": 0.16763456165790558, "learning_rate": 4.640752242176812e-06, "loss": 0.9379, "step": 49630 }, { "epoch": 0.35932738314983315, "grad_norm": 0.15942569077014923, "learning_rate": 4.640679855516226e-06, "loss": 0.9472, "step": 49640 }, { "epoch": 0.3593997698104193, "grad_norm": 0.17928923666477203, "learning_rate": 4.640607468855639e-06, "loss": 0.9577, "step": 49650 }, { "epoch": 0.3594721564710055, "grad_norm": 0.16758985817432404, "learning_rate": 4.640535082195054e-06, "loss": 0.9494, "step": 49660 }, { "epoch": 0.35954454313159173, "grad_norm": 0.18791483342647552, "learning_rate": 4.640462695534467e-06, "loss": 0.9423, "step": 49670 }, { "epoch": 0.3596169297921779, "grad_norm": 0.19120506942272186, "learning_rate": 4.640390308873881e-06, "loss": 0.9602, "step": 49680 }, { "epoch": 0.3596893164527641, "grad_norm": 0.17304813861846924, "learning_rate": 4.640317922213295e-06, "loss": 0.9485, "step": 49690 }, { "epoch": 0.35976170311335026, "grad_norm": 0.16456131637096405, "learning_rate": 4.640245535552709e-06, "loss": 0.9379, "step": 49700 }, { "epoch": 0.35983408977393644, "grad_norm": 0.1641353815793991, "learning_rate": 4.640173148892123e-06, "loss": 0.9485, "step": 49710 }, { "epoch": 0.35990647643452267, "grad_norm": 0.17807172238826752, "learning_rate": 4.640100762231536e-06, "loss": 0.939, "step": 49720 }, { "epoch": 0.35997886309510885, "grad_norm": 0.17257729172706604, "learning_rate": 4.64002837557095e-06, "loss": 0.9506, "step": 49730 }, { "epoch": 0.360051249755695, "grad_norm": 0.16776679456233978, "learning_rate": 4.639955988910364e-06, "loss": 0.9543, "step": 49740 }, { "epoch": 0.3601236364162812, "grad_norm": 0.16268086433410645, "learning_rate": 4.639883602249778e-06, "loss": 0.9506, "step": 49750 }, { "epoch": 0.3601960230768674, "grad_norm": 0.18560020625591278, "learning_rate": 4.639811215589192e-06, "loss": 0.952, "step": 49760 }, { "epoch": 0.36026840973745355, "grad_norm": 0.1764240264892578, "learning_rate": 4.639738828928605e-06, "loss": 0.9481, "step": 49770 }, { "epoch": 0.3603407963980398, "grad_norm": 0.18193010985851288, "learning_rate": 4.63966644226802e-06, "loss": 0.9451, "step": 49780 }, { "epoch": 0.36041318305862596, "grad_norm": 0.15828032791614532, "learning_rate": 4.639594055607433e-06, "loss": 0.9549, "step": 49790 }, { "epoch": 0.36048556971921214, "grad_norm": 0.20637375116348267, "learning_rate": 4.639521668946847e-06, "loss": 0.9618, "step": 49800 }, { "epoch": 0.3605579563797983, "grad_norm": 0.15375225245952606, "learning_rate": 4.6394492822862605e-06, "loss": 0.9475, "step": 49810 }, { "epoch": 0.3606303430403845, "grad_norm": 0.16718937456607819, "learning_rate": 4.639376895625675e-06, "loss": 0.9517, "step": 49820 }, { "epoch": 0.3607027297009707, "grad_norm": 0.1495947241783142, "learning_rate": 4.639304508965089e-06, "loss": 0.9442, "step": 49830 }, { "epoch": 0.3607751163615569, "grad_norm": 0.16354574263095856, "learning_rate": 4.639232122304502e-06, "loss": 0.9516, "step": 49840 }, { "epoch": 0.3608475030221431, "grad_norm": 0.16006174683570862, "learning_rate": 4.639159735643916e-06, "loss": 0.9571, "step": 49850 }, { "epoch": 0.36091988968272926, "grad_norm": 0.1511203497648239, "learning_rate": 4.63908734898333e-06, "loss": 0.9498, "step": 49860 }, { "epoch": 0.36099227634331543, "grad_norm": 0.16783535480499268, "learning_rate": 4.639014962322744e-06, "loss": 0.9507, "step": 49870 }, { "epoch": 0.36106466300390166, "grad_norm": 0.1579425036907196, "learning_rate": 4.6389425756621576e-06, "loss": 0.945, "step": 49880 }, { "epoch": 0.36113704966448784, "grad_norm": 0.16360943019390106, "learning_rate": 4.638870189001571e-06, "loss": 0.9373, "step": 49890 }, { "epoch": 0.361209436325074, "grad_norm": 0.15648721158504486, "learning_rate": 4.638797802340985e-06, "loss": 0.9672, "step": 49900 }, { "epoch": 0.3612818229856602, "grad_norm": 0.16514001786708832, "learning_rate": 4.638725415680399e-06, "loss": 0.9446, "step": 49910 }, { "epoch": 0.36135420964624637, "grad_norm": 0.20961642265319824, "learning_rate": 4.638653029019813e-06, "loss": 0.9411, "step": 49920 }, { "epoch": 0.3614265963068326, "grad_norm": 0.1592184156179428, "learning_rate": 4.6385806423592265e-06, "loss": 0.9524, "step": 49930 }, { "epoch": 0.3614989829674188, "grad_norm": 0.16908025741577148, "learning_rate": 4.63850825569864e-06, "loss": 0.9332, "step": 49940 }, { "epoch": 0.36157136962800496, "grad_norm": 0.15697824954986572, "learning_rate": 4.6384358690380546e-06, "loss": 0.9351, "step": 49950 }, { "epoch": 0.36164375628859113, "grad_norm": 0.16380582749843597, "learning_rate": 4.638363482377467e-06, "loss": 0.9462, "step": 49960 }, { "epoch": 0.3617161429491773, "grad_norm": 0.15993693470954895, "learning_rate": 4.638291095716882e-06, "loss": 0.9584, "step": 49970 }, { "epoch": 0.3617885296097635, "grad_norm": 0.17109087109565735, "learning_rate": 4.638218709056295e-06, "loss": 0.9411, "step": 49980 }, { "epoch": 0.3618609162703497, "grad_norm": 0.16840317845344543, "learning_rate": 4.638146322395709e-06, "loss": 0.9362, "step": 49990 }, { "epoch": 0.3619333029309359, "grad_norm": 0.16615648567676544, "learning_rate": 4.638073935735123e-06, "loss": 0.9501, "step": 50000 }, { "epoch": 0.3620056895915221, "grad_norm": 0.16905289888381958, "learning_rate": 4.638001549074537e-06, "loss": 0.9524, "step": 50010 }, { "epoch": 0.36207807625210825, "grad_norm": 0.17198826372623444, "learning_rate": 4.637929162413951e-06, "loss": 0.9435, "step": 50020 }, { "epoch": 0.3621504629126944, "grad_norm": 0.17047207057476044, "learning_rate": 4.637856775753364e-06, "loss": 0.9492, "step": 50030 }, { "epoch": 0.36222284957328066, "grad_norm": 0.18584772944450378, "learning_rate": 4.637784389092778e-06, "loss": 0.9482, "step": 50040 }, { "epoch": 0.36229523623386684, "grad_norm": 0.16773472726345062, "learning_rate": 4.637712002432192e-06, "loss": 0.9572, "step": 50050 }, { "epoch": 0.362367622894453, "grad_norm": 0.16679717600345612, "learning_rate": 4.637639615771606e-06, "loss": 0.9529, "step": 50060 }, { "epoch": 0.3624400095550392, "grad_norm": 0.15634585916996002, "learning_rate": 4.63756722911102e-06, "loss": 0.9401, "step": 50070 }, { "epoch": 0.36251239621562537, "grad_norm": 0.15569652616977692, "learning_rate": 4.637494842450433e-06, "loss": 0.9416, "step": 50080 }, { "epoch": 0.3625847828762116, "grad_norm": 0.16276825964450836, "learning_rate": 4.637422455789847e-06, "loss": 0.9422, "step": 50090 }, { "epoch": 0.3626571695367978, "grad_norm": 0.16607090830802917, "learning_rate": 4.637350069129261e-06, "loss": 0.9587, "step": 50100 }, { "epoch": 0.36272955619738395, "grad_norm": 0.17144577205181122, "learning_rate": 4.637277682468675e-06, "loss": 0.9356, "step": 50110 }, { "epoch": 0.36280194285797013, "grad_norm": 0.16335241496562958, "learning_rate": 4.637205295808089e-06, "loss": 0.9581, "step": 50120 }, { "epoch": 0.3628743295185563, "grad_norm": 0.17196884751319885, "learning_rate": 4.637132909147502e-06, "loss": 0.9345, "step": 50130 }, { "epoch": 0.3629467161791425, "grad_norm": 0.1646929383277893, "learning_rate": 4.637060522486917e-06, "loss": 0.9388, "step": 50140 }, { "epoch": 0.3630191028397287, "grad_norm": 0.16134804487228394, "learning_rate": 4.63698813582633e-06, "loss": 0.9546, "step": 50150 }, { "epoch": 0.3630914895003149, "grad_norm": 0.16641758382320404, "learning_rate": 4.636915749165744e-06, "loss": 0.9681, "step": 50160 }, { "epoch": 0.36316387616090107, "grad_norm": 0.1564699113368988, "learning_rate": 4.6368433625051575e-06, "loss": 0.9527, "step": 50170 }, { "epoch": 0.36323626282148724, "grad_norm": 0.16455498337745667, "learning_rate": 4.636770975844572e-06, "loss": 0.9407, "step": 50180 }, { "epoch": 0.3633086494820734, "grad_norm": 0.16031110286712646, "learning_rate": 4.636698589183986e-06, "loss": 0.9418, "step": 50190 }, { "epoch": 0.36338103614265965, "grad_norm": 0.1539280265569687, "learning_rate": 4.636626202523399e-06, "loss": 0.9496, "step": 50200 }, { "epoch": 0.36345342280324583, "grad_norm": 0.20575940608978271, "learning_rate": 4.636553815862813e-06, "loss": 0.9332, "step": 50210 }, { "epoch": 0.363525809463832, "grad_norm": 0.17900900542736053, "learning_rate": 4.636481429202227e-06, "loss": 0.9533, "step": 50220 }, { "epoch": 0.3635981961244182, "grad_norm": 0.1708286702632904, "learning_rate": 4.636409042541641e-06, "loss": 0.9504, "step": 50230 }, { "epoch": 0.36367058278500436, "grad_norm": 0.16686797142028809, "learning_rate": 4.6363366558810545e-06, "loss": 0.951, "step": 50240 }, { "epoch": 0.3637429694455906, "grad_norm": 0.1779949814081192, "learning_rate": 4.636264269220468e-06, "loss": 0.954, "step": 50250 }, { "epoch": 0.36381535610617677, "grad_norm": 0.16261711716651917, "learning_rate": 4.636191882559883e-06, "loss": 0.9552, "step": 50260 }, { "epoch": 0.36388774276676294, "grad_norm": 0.15804466605186462, "learning_rate": 4.636119495899296e-06, "loss": 0.952, "step": 50270 }, { "epoch": 0.3639601294273491, "grad_norm": 0.18854446709156036, "learning_rate": 4.63604710923871e-06, "loss": 0.937, "step": 50280 }, { "epoch": 0.3640325160879353, "grad_norm": 0.16362988948822021, "learning_rate": 4.6359747225781234e-06, "loss": 0.9439, "step": 50290 }, { "epoch": 0.3641049027485215, "grad_norm": 0.1906929761171341, "learning_rate": 4.635902335917538e-06, "loss": 0.9522, "step": 50300 }, { "epoch": 0.3641772894091077, "grad_norm": 0.1719408482313156, "learning_rate": 4.6358299492569515e-06, "loss": 0.9409, "step": 50310 }, { "epoch": 0.3642496760696939, "grad_norm": 0.17714013159275055, "learning_rate": 4.635757562596365e-06, "loss": 0.9425, "step": 50320 }, { "epoch": 0.36432206273028006, "grad_norm": 0.1718074232339859, "learning_rate": 4.635685175935779e-06, "loss": 0.9431, "step": 50330 }, { "epoch": 0.36439444939086624, "grad_norm": 0.16074173152446747, "learning_rate": 4.635612789275193e-06, "loss": 0.9598, "step": 50340 }, { "epoch": 0.3644668360514524, "grad_norm": 0.1573699712753296, "learning_rate": 4.635540402614607e-06, "loss": 0.9416, "step": 50350 }, { "epoch": 0.36453922271203865, "grad_norm": 0.15923991799354553, "learning_rate": 4.6354680159540205e-06, "loss": 0.9591, "step": 50360 }, { "epoch": 0.3646116093726248, "grad_norm": 0.17145873606204987, "learning_rate": 4.635395629293434e-06, "loss": 0.9399, "step": 50370 }, { "epoch": 0.364683996033211, "grad_norm": 0.16001589596271515, "learning_rate": 4.6353232426328485e-06, "loss": 0.9558, "step": 50380 }, { "epoch": 0.3647563826937972, "grad_norm": 0.16999055445194244, "learning_rate": 4.635250855972262e-06, "loss": 0.9505, "step": 50390 }, { "epoch": 0.36482876935438335, "grad_norm": 0.1643807590007782, "learning_rate": 4.635178469311676e-06, "loss": 0.9385, "step": 50400 }, { "epoch": 0.3649011560149696, "grad_norm": 0.1788293868303299, "learning_rate": 4.635106082651089e-06, "loss": 0.9312, "step": 50410 }, { "epoch": 0.36497354267555576, "grad_norm": 0.17690715193748474, "learning_rate": 4.635033695990504e-06, "loss": 0.9524, "step": 50420 }, { "epoch": 0.36504592933614194, "grad_norm": 0.17854566872119904, "learning_rate": 4.6349613093299175e-06, "loss": 0.952, "step": 50430 }, { "epoch": 0.3651183159967281, "grad_norm": 0.4458518326282501, "learning_rate": 4.634888922669331e-06, "loss": 0.9479, "step": 50440 }, { "epoch": 0.3651907026573143, "grad_norm": 0.15616470575332642, "learning_rate": 4.634816536008745e-06, "loss": 0.9488, "step": 50450 }, { "epoch": 0.3652630893179005, "grad_norm": 0.16164493560791016, "learning_rate": 4.634744149348159e-06, "loss": 0.9507, "step": 50460 }, { "epoch": 0.3653354759784867, "grad_norm": 0.16681145131587982, "learning_rate": 4.634671762687573e-06, "loss": 0.9473, "step": 50470 }, { "epoch": 0.3654078626390729, "grad_norm": 0.1800214797258377, "learning_rate": 4.634599376026986e-06, "loss": 0.9655, "step": 50480 }, { "epoch": 0.36548024929965905, "grad_norm": 0.1790829747915268, "learning_rate": 4.6345269893664e-06, "loss": 0.9581, "step": 50490 }, { "epoch": 0.36555263596024523, "grad_norm": 0.1491861194372177, "learning_rate": 4.634454602705814e-06, "loss": 0.9409, "step": 50500 }, { "epoch": 0.3656250226208314, "grad_norm": 0.15858380496501923, "learning_rate": 4.634382216045227e-06, "loss": 0.9392, "step": 50510 }, { "epoch": 0.36569740928141764, "grad_norm": 0.17849485576152802, "learning_rate": 4.634309829384641e-06, "loss": 0.9595, "step": 50520 }, { "epoch": 0.3657697959420038, "grad_norm": 0.16567130386829376, "learning_rate": 4.634237442724055e-06, "loss": 0.9496, "step": 50530 }, { "epoch": 0.36584218260259, "grad_norm": 0.22058269381523132, "learning_rate": 4.634165056063469e-06, "loss": 0.9507, "step": 50540 }, { "epoch": 0.36591456926317617, "grad_norm": 0.16320207715034485, "learning_rate": 4.6340926694028825e-06, "loss": 0.9408, "step": 50550 }, { "epoch": 0.36598695592376235, "grad_norm": 0.19167020916938782, "learning_rate": 4.634020282742296e-06, "loss": 0.9467, "step": 50560 }, { "epoch": 0.3660593425843486, "grad_norm": 0.17240388691425323, "learning_rate": 4.633947896081711e-06, "loss": 0.9623, "step": 50570 }, { "epoch": 0.36613172924493476, "grad_norm": 0.20315563678741455, "learning_rate": 4.633875509421124e-06, "loss": 0.9401, "step": 50580 }, { "epoch": 0.36620411590552093, "grad_norm": 0.1570211499929428, "learning_rate": 4.633803122760538e-06, "loss": 0.9451, "step": 50590 }, { "epoch": 0.3662765025661071, "grad_norm": 0.18627873063087463, "learning_rate": 4.6337307360999515e-06, "loss": 0.9548, "step": 50600 }, { "epoch": 0.3663488892266933, "grad_norm": 0.19180041551589966, "learning_rate": 4.633658349439366e-06, "loss": 0.9445, "step": 50610 }, { "epoch": 0.3664212758872795, "grad_norm": 0.19508033990859985, "learning_rate": 4.6335859627787796e-06, "loss": 0.9493, "step": 50620 }, { "epoch": 0.3664936625478657, "grad_norm": 0.18519370257854462, "learning_rate": 4.633513576118193e-06, "loss": 0.9308, "step": 50630 }, { "epoch": 0.36656604920845187, "grad_norm": 0.1680838167667389, "learning_rate": 4.633441189457607e-06, "loss": 0.9335, "step": 50640 }, { "epoch": 0.36663843586903805, "grad_norm": 0.16373924911022186, "learning_rate": 4.633368802797021e-06, "loss": 0.9413, "step": 50650 }, { "epoch": 0.3667108225296242, "grad_norm": 0.16160275042057037, "learning_rate": 4.633296416136435e-06, "loss": 0.9487, "step": 50660 }, { "epoch": 0.3667832091902104, "grad_norm": 0.1721445471048355, "learning_rate": 4.6332240294758485e-06, "loss": 0.9351, "step": 50670 }, { "epoch": 0.36685559585079663, "grad_norm": 0.1686573028564453, "learning_rate": 4.633151642815262e-06, "loss": 0.9441, "step": 50680 }, { "epoch": 0.3669279825113828, "grad_norm": 0.15622854232788086, "learning_rate": 4.633079256154676e-06, "loss": 0.9586, "step": 50690 }, { "epoch": 0.367000369171969, "grad_norm": 0.14963869750499725, "learning_rate": 4.63300686949409e-06, "loss": 0.9496, "step": 50700 }, { "epoch": 0.36707275583255516, "grad_norm": 0.17733648419380188, "learning_rate": 4.632934482833504e-06, "loss": 0.9453, "step": 50710 }, { "epoch": 0.36714514249314134, "grad_norm": 0.15873117744922638, "learning_rate": 4.632862096172917e-06, "loss": 0.9551, "step": 50720 }, { "epoch": 0.3672175291537276, "grad_norm": 0.15206924080848694, "learning_rate": 4.632789709512331e-06, "loss": 0.9318, "step": 50730 }, { "epoch": 0.36728991581431375, "grad_norm": 0.16713659465312958, "learning_rate": 4.6327173228517455e-06, "loss": 0.9512, "step": 50740 }, { "epoch": 0.3673623024748999, "grad_norm": 0.1614990532398224, "learning_rate": 4.632644936191159e-06, "loss": 0.9528, "step": 50750 }, { "epoch": 0.3674346891354861, "grad_norm": 0.16960258781909943, "learning_rate": 4.632572549530573e-06, "loss": 0.9465, "step": 50760 }, { "epoch": 0.3675070757960723, "grad_norm": 0.19546057283878326, "learning_rate": 4.632500162869986e-06, "loss": 0.9463, "step": 50770 }, { "epoch": 0.3675794624566585, "grad_norm": 0.1588112711906433, "learning_rate": 4.632427776209401e-06, "loss": 0.9295, "step": 50780 }, { "epoch": 0.3676518491172447, "grad_norm": 0.17281554639339447, "learning_rate": 4.632355389548814e-06, "loss": 0.956, "step": 50790 }, { "epoch": 0.36772423577783087, "grad_norm": 0.15607163310050964, "learning_rate": 4.632283002888228e-06, "loss": 0.9452, "step": 50800 }, { "epoch": 0.36779662243841704, "grad_norm": 0.16770051419734955, "learning_rate": 4.632210616227642e-06, "loss": 0.9582, "step": 50810 }, { "epoch": 0.3678690090990032, "grad_norm": 0.15941555798053741, "learning_rate": 4.632138229567056e-06, "loss": 0.9394, "step": 50820 }, { "epoch": 0.3679413957595894, "grad_norm": 0.1637001931667328, "learning_rate": 4.63206584290647e-06, "loss": 0.9377, "step": 50830 }, { "epoch": 0.36801378242017563, "grad_norm": 0.17539703845977783, "learning_rate": 4.631993456245883e-06, "loss": 0.9532, "step": 50840 }, { "epoch": 0.3680861690807618, "grad_norm": 0.15488503873348236, "learning_rate": 4.631921069585297e-06, "loss": 0.9572, "step": 50850 }, { "epoch": 0.368158555741348, "grad_norm": 0.17145054042339325, "learning_rate": 4.6318486829247114e-06, "loss": 0.9558, "step": 50860 }, { "epoch": 0.36823094240193416, "grad_norm": 0.16223368048667908, "learning_rate": 4.631776296264125e-06, "loss": 0.9489, "step": 50870 }, { "epoch": 0.36830332906252033, "grad_norm": 0.17038589715957642, "learning_rate": 4.631703909603539e-06, "loss": 0.9607, "step": 50880 }, { "epoch": 0.36837571572310657, "grad_norm": 0.17411848902702332, "learning_rate": 4.631631522942952e-06, "loss": 0.9467, "step": 50890 }, { "epoch": 0.36844810238369274, "grad_norm": 0.15501435101032257, "learning_rate": 4.631559136282367e-06, "loss": 0.946, "step": 50900 }, { "epoch": 0.3685204890442789, "grad_norm": 0.15863391757011414, "learning_rate": 4.63148674962178e-06, "loss": 0.9441, "step": 50910 }, { "epoch": 0.3685928757048651, "grad_norm": 0.17681151628494263, "learning_rate": 4.631414362961194e-06, "loss": 0.9548, "step": 50920 }, { "epoch": 0.3686652623654513, "grad_norm": 0.1619083285331726, "learning_rate": 4.631341976300608e-06, "loss": 0.9488, "step": 50930 }, { "epoch": 0.3687376490260375, "grad_norm": 0.16921238601207733, "learning_rate": 4.631269589640022e-06, "loss": 0.9414, "step": 50940 }, { "epoch": 0.3688100356866237, "grad_norm": 0.16636481881141663, "learning_rate": 4.631197202979436e-06, "loss": 0.941, "step": 50950 }, { "epoch": 0.36888242234720986, "grad_norm": 0.16682793200016022, "learning_rate": 4.631124816318849e-06, "loss": 0.9447, "step": 50960 }, { "epoch": 0.36895480900779604, "grad_norm": 0.1645735502243042, "learning_rate": 4.631052429658263e-06, "loss": 0.9495, "step": 50970 }, { "epoch": 0.3690271956683822, "grad_norm": 0.16850468516349792, "learning_rate": 4.630980042997677e-06, "loss": 0.9512, "step": 50980 }, { "epoch": 0.36909958232896845, "grad_norm": 0.16100184619426727, "learning_rate": 4.630907656337091e-06, "loss": 0.9483, "step": 50990 }, { "epoch": 0.3691719689895546, "grad_norm": 0.1832258701324463, "learning_rate": 4.630835269676505e-06, "loss": 0.9398, "step": 51000 }, { "epoch": 0.3692443556501408, "grad_norm": 0.1679748147726059, "learning_rate": 4.630762883015918e-06, "loss": 0.9545, "step": 51010 }, { "epoch": 0.369316742310727, "grad_norm": 0.5284795165061951, "learning_rate": 4.630690496355332e-06, "loss": 0.9481, "step": 51020 }, { "epoch": 0.36938912897131315, "grad_norm": 0.16089926660060883, "learning_rate": 4.6306181096947454e-06, "loss": 0.9499, "step": 51030 }, { "epoch": 0.36946151563189933, "grad_norm": 0.16643081605434418, "learning_rate": 4.630545723034159e-06, "loss": 0.948, "step": 51040 }, { "epoch": 0.36953390229248556, "grad_norm": 0.24914591014385223, "learning_rate": 4.6304733363735735e-06, "loss": 0.9461, "step": 51050 }, { "epoch": 0.36960628895307174, "grad_norm": 0.16817229986190796, "learning_rate": 4.630400949712987e-06, "loss": 0.9431, "step": 51060 }, { "epoch": 0.3696786756136579, "grad_norm": 0.15578976273536682, "learning_rate": 4.630328563052401e-06, "loss": 0.9523, "step": 51070 }, { "epoch": 0.3697510622742441, "grad_norm": 0.1646818220615387, "learning_rate": 4.630256176391814e-06, "loss": 0.9489, "step": 51080 }, { "epoch": 0.36982344893483027, "grad_norm": 0.15397506952285767, "learning_rate": 4.630183789731229e-06, "loss": 0.939, "step": 51090 }, { "epoch": 0.3698958355954165, "grad_norm": 0.1653456836938858, "learning_rate": 4.6301114030706424e-06, "loss": 0.9552, "step": 51100 }, { "epoch": 0.3699682222560027, "grad_norm": 0.1986481249332428, "learning_rate": 4.630039016410056e-06, "loss": 0.9519, "step": 51110 }, { "epoch": 0.37004060891658885, "grad_norm": 0.17512935400009155, "learning_rate": 4.62996662974947e-06, "loss": 0.9582, "step": 51120 }, { "epoch": 0.37011299557717503, "grad_norm": 0.17302803695201874, "learning_rate": 4.629894243088884e-06, "loss": 0.9275, "step": 51130 }, { "epoch": 0.3701853822377612, "grad_norm": 0.16957373917102814, "learning_rate": 4.629821856428298e-06, "loss": 0.9455, "step": 51140 }, { "epoch": 0.37025776889834744, "grad_norm": 0.1609341949224472, "learning_rate": 4.629749469767711e-06, "loss": 0.9449, "step": 51150 }, { "epoch": 0.3703301555589336, "grad_norm": 0.2060326784849167, "learning_rate": 4.629677083107125e-06, "loss": 0.9487, "step": 51160 }, { "epoch": 0.3704025422195198, "grad_norm": 0.15667761862277985, "learning_rate": 4.6296046964465395e-06, "loss": 0.9421, "step": 51170 }, { "epoch": 0.37047492888010597, "grad_norm": 0.15692569315433502, "learning_rate": 4.629532309785953e-06, "loss": 0.9383, "step": 51180 }, { "epoch": 0.37054731554069215, "grad_norm": 0.15926668047904968, "learning_rate": 4.629459923125367e-06, "loss": 0.9483, "step": 51190 }, { "epoch": 0.3706197022012783, "grad_norm": 0.16595801711082458, "learning_rate": 4.62938753646478e-06, "loss": 0.9489, "step": 51200 }, { "epoch": 0.37069208886186455, "grad_norm": 0.1648326963186264, "learning_rate": 4.629315149804195e-06, "loss": 0.9634, "step": 51210 }, { "epoch": 0.37076447552245073, "grad_norm": 0.15535977482795715, "learning_rate": 4.629242763143608e-06, "loss": 0.9404, "step": 51220 }, { "epoch": 0.3708368621830369, "grad_norm": 0.1696339249610901, "learning_rate": 4.629170376483022e-06, "loss": 0.9539, "step": 51230 }, { "epoch": 0.3709092488436231, "grad_norm": 0.15960168838500977, "learning_rate": 4.629097989822436e-06, "loss": 0.9377, "step": 51240 }, { "epoch": 0.37098163550420926, "grad_norm": 0.20475220680236816, "learning_rate": 4.62902560316185e-06, "loss": 0.9529, "step": 51250 }, { "epoch": 0.3710540221647955, "grad_norm": 0.16864748299121857, "learning_rate": 4.628953216501264e-06, "loss": 0.9446, "step": 51260 }, { "epoch": 0.37112640882538167, "grad_norm": 0.1571142077445984, "learning_rate": 4.628880829840677e-06, "loss": 0.9558, "step": 51270 }, { "epoch": 0.37119879548596785, "grad_norm": 0.19135212898254395, "learning_rate": 4.628808443180091e-06, "loss": 0.9484, "step": 51280 }, { "epoch": 0.371271182146554, "grad_norm": 0.15567857027053833, "learning_rate": 4.628736056519505e-06, "loss": 0.9503, "step": 51290 }, { "epoch": 0.3713435688071402, "grad_norm": 0.16534672677516937, "learning_rate": 4.628663669858919e-06, "loss": 0.9329, "step": 51300 }, { "epoch": 0.37141595546772643, "grad_norm": 0.16259850561618805, "learning_rate": 4.628591283198333e-06, "loss": 0.9419, "step": 51310 }, { "epoch": 0.3714883421283126, "grad_norm": 0.20146603882312775, "learning_rate": 4.628518896537746e-06, "loss": 0.956, "step": 51320 }, { "epoch": 0.3715607287888988, "grad_norm": 0.18213625252246857, "learning_rate": 4.62844650987716e-06, "loss": 0.9639, "step": 51330 }, { "epoch": 0.37163311544948496, "grad_norm": 0.15779289603233337, "learning_rate": 4.628374123216574e-06, "loss": 0.9425, "step": 51340 }, { "epoch": 0.37170550211007114, "grad_norm": 0.16919735074043274, "learning_rate": 4.628301736555988e-06, "loss": 0.9501, "step": 51350 }, { "epoch": 0.3717778887706573, "grad_norm": 0.1676865667104721, "learning_rate": 4.6282293498954016e-06, "loss": 0.9453, "step": 51360 }, { "epoch": 0.37185027543124355, "grad_norm": 0.15803247690200806, "learning_rate": 4.628156963234815e-06, "loss": 0.9435, "step": 51370 }, { "epoch": 0.3719226620918297, "grad_norm": 0.1553627997636795, "learning_rate": 4.62808457657423e-06, "loss": 0.9638, "step": 51380 }, { "epoch": 0.3719950487524159, "grad_norm": 0.16494029760360718, "learning_rate": 4.628012189913643e-06, "loss": 0.9366, "step": 51390 }, { "epoch": 0.3720674354130021, "grad_norm": 0.16437461972236633, "learning_rate": 4.627939803253057e-06, "loss": 0.94, "step": 51400 }, { "epoch": 0.37213982207358826, "grad_norm": 0.15786989033222198, "learning_rate": 4.6278674165924705e-06, "loss": 0.9574, "step": 51410 }, { "epoch": 0.3722122087341745, "grad_norm": 0.16579070687294006, "learning_rate": 4.627795029931885e-06, "loss": 0.949, "step": 51420 }, { "epoch": 0.37228459539476066, "grad_norm": 0.16631585359573364, "learning_rate": 4.6277226432712986e-06, "loss": 0.9393, "step": 51430 }, { "epoch": 0.37235698205534684, "grad_norm": 0.21473245322704315, "learning_rate": 4.627650256610712e-06, "loss": 0.9482, "step": 51440 }, { "epoch": 0.372429368715933, "grad_norm": 0.17821331322193146, "learning_rate": 4.627577869950126e-06, "loss": 0.9511, "step": 51450 }, { "epoch": 0.3725017553765192, "grad_norm": 0.16437208652496338, "learning_rate": 4.62750548328954e-06, "loss": 0.9522, "step": 51460 }, { "epoch": 0.3725741420371054, "grad_norm": 0.1748124063014984, "learning_rate": 4.627433096628954e-06, "loss": 0.951, "step": 51470 }, { "epoch": 0.3726465286976916, "grad_norm": 0.1691410392522812, "learning_rate": 4.6273607099683675e-06, "loss": 0.9452, "step": 51480 }, { "epoch": 0.3727189153582778, "grad_norm": 0.15897352993488312, "learning_rate": 4.627288323307781e-06, "loss": 0.9503, "step": 51490 }, { "epoch": 0.37279130201886396, "grad_norm": 0.17863501608371735, "learning_rate": 4.6272159366471956e-06, "loss": 0.9485, "step": 51500 }, { "epoch": 0.37286368867945013, "grad_norm": 0.1577998846769333, "learning_rate": 4.627143549986609e-06, "loss": 0.9468, "step": 51510 }, { "epoch": 0.3729360753400363, "grad_norm": 0.17091219127178192, "learning_rate": 4.627071163326023e-06, "loss": 0.949, "step": 51520 }, { "epoch": 0.37300846200062254, "grad_norm": 0.16450020670890808, "learning_rate": 4.626998776665436e-06, "loss": 0.9375, "step": 51530 }, { "epoch": 0.3730808486612087, "grad_norm": 0.20446856319904327, "learning_rate": 4.626926390004851e-06, "loss": 0.945, "step": 51540 }, { "epoch": 0.3731532353217949, "grad_norm": 0.18482069671154022, "learning_rate": 4.626854003344264e-06, "loss": 0.9365, "step": 51550 }, { "epoch": 0.3732256219823811, "grad_norm": 0.2597537934780121, "learning_rate": 4.626781616683677e-06, "loss": 0.9474, "step": 51560 }, { "epoch": 0.37329800864296725, "grad_norm": 0.16503006219863892, "learning_rate": 4.626709230023092e-06, "loss": 0.9384, "step": 51570 }, { "epoch": 0.3733703953035535, "grad_norm": 0.14666348695755005, "learning_rate": 4.626636843362505e-06, "loss": 0.9408, "step": 51580 }, { "epoch": 0.37344278196413966, "grad_norm": 0.1652412712574005, "learning_rate": 4.626564456701919e-06, "loss": 0.9559, "step": 51590 }, { "epoch": 0.37351516862472584, "grad_norm": 0.15990935266017914, "learning_rate": 4.626492070041333e-06, "loss": 0.9363, "step": 51600 }, { "epoch": 0.373587555285312, "grad_norm": 0.16256429255008698, "learning_rate": 4.626419683380747e-06, "loss": 0.947, "step": 51610 }, { "epoch": 0.3736599419458982, "grad_norm": 0.166569322347641, "learning_rate": 4.626347296720161e-06, "loss": 0.9548, "step": 51620 }, { "epoch": 0.3737323286064844, "grad_norm": 0.18789708614349365, "learning_rate": 4.626274910059574e-06, "loss": 0.9502, "step": 51630 }, { "epoch": 0.3738047152670706, "grad_norm": 0.16426165401935577, "learning_rate": 4.626202523398988e-06, "loss": 0.942, "step": 51640 }, { "epoch": 0.3738771019276568, "grad_norm": 0.15513013303279877, "learning_rate": 4.626130136738402e-06, "loss": 0.9541, "step": 51650 }, { "epoch": 0.37394948858824295, "grad_norm": 0.1891675591468811, "learning_rate": 4.626057750077816e-06, "loss": 0.956, "step": 51660 }, { "epoch": 0.3740218752488291, "grad_norm": 0.17240507900714874, "learning_rate": 4.62598536341723e-06, "loss": 0.9564, "step": 51670 }, { "epoch": 0.37409426190941536, "grad_norm": 0.16830044984817505, "learning_rate": 4.625912976756643e-06, "loss": 0.9326, "step": 51680 }, { "epoch": 0.37416664857000154, "grad_norm": 0.16331635415554047, "learning_rate": 4.625840590096058e-06, "loss": 0.945, "step": 51690 }, { "epoch": 0.3742390352305877, "grad_norm": 0.15753015875816345, "learning_rate": 4.625768203435471e-06, "loss": 0.9527, "step": 51700 }, { "epoch": 0.3743114218911739, "grad_norm": 0.16955150663852692, "learning_rate": 4.625695816774885e-06, "loss": 0.9453, "step": 51710 }, { "epoch": 0.37438380855176007, "grad_norm": 0.17448243498802185, "learning_rate": 4.6256234301142985e-06, "loss": 0.9526, "step": 51720 }, { "epoch": 0.37445619521234624, "grad_norm": 0.17462722957134247, "learning_rate": 4.625551043453713e-06, "loss": 0.9496, "step": 51730 }, { "epoch": 0.3745285818729325, "grad_norm": 0.15947075188159943, "learning_rate": 4.625478656793127e-06, "loss": 0.9412, "step": 51740 }, { "epoch": 0.37460096853351865, "grad_norm": 0.16033557057380676, "learning_rate": 4.62540627013254e-06, "loss": 0.9442, "step": 51750 }, { "epoch": 0.37467335519410483, "grad_norm": 0.15499283373355865, "learning_rate": 4.625333883471954e-06, "loss": 0.9436, "step": 51760 }, { "epoch": 0.374745741854691, "grad_norm": 0.16719605028629303, "learning_rate": 4.625261496811368e-06, "loss": 0.9456, "step": 51770 }, { "epoch": 0.3748181285152772, "grad_norm": 0.16747929155826569, "learning_rate": 4.625189110150782e-06, "loss": 0.9396, "step": 51780 }, { "epoch": 0.3748905151758634, "grad_norm": 0.16449685394763947, "learning_rate": 4.6251167234901955e-06, "loss": 0.9478, "step": 51790 }, { "epoch": 0.3749629018364496, "grad_norm": 0.16664811968803406, "learning_rate": 4.625044336829609e-06, "loss": 0.9462, "step": 51800 }, { "epoch": 0.37503528849703577, "grad_norm": 0.16544151306152344, "learning_rate": 4.624971950169024e-06, "loss": 0.9416, "step": 51810 }, { "epoch": 0.37510767515762194, "grad_norm": 0.1873922049999237, "learning_rate": 4.624899563508437e-06, "loss": 0.9471, "step": 51820 }, { "epoch": 0.3751800618182081, "grad_norm": 0.1557958424091339, "learning_rate": 4.624827176847851e-06, "loss": 0.9429, "step": 51830 }, { "epoch": 0.37525244847879435, "grad_norm": 0.1720801442861557, "learning_rate": 4.6247547901872644e-06, "loss": 0.9529, "step": 51840 }, { "epoch": 0.37532483513938053, "grad_norm": 0.2492019236087799, "learning_rate": 4.624682403526679e-06, "loss": 0.9338, "step": 51850 }, { "epoch": 0.3753972217999667, "grad_norm": 0.15926505625247955, "learning_rate": 4.6246100168660925e-06, "loss": 0.9455, "step": 51860 }, { "epoch": 0.3754696084605529, "grad_norm": 0.16041946411132812, "learning_rate": 4.624537630205506e-06, "loss": 0.9507, "step": 51870 }, { "epoch": 0.37554199512113906, "grad_norm": 0.1602320671081543, "learning_rate": 4.62446524354492e-06, "loss": 0.9493, "step": 51880 }, { "epoch": 0.37561438178172524, "grad_norm": 0.18607132136821747, "learning_rate": 4.624392856884334e-06, "loss": 0.9591, "step": 51890 }, { "epoch": 0.37568676844231147, "grad_norm": 0.1767975091934204, "learning_rate": 4.624320470223748e-06, "loss": 0.9571, "step": 51900 }, { "epoch": 0.37575915510289765, "grad_norm": 0.15797516703605652, "learning_rate": 4.6242480835631615e-06, "loss": 0.9301, "step": 51910 }, { "epoch": 0.3758315417634838, "grad_norm": 0.16194427013397217, "learning_rate": 4.624175696902575e-06, "loss": 0.9337, "step": 51920 }, { "epoch": 0.37590392842407, "grad_norm": 0.15718939900398254, "learning_rate": 4.624103310241989e-06, "loss": 0.9361, "step": 51930 }, { "epoch": 0.3759763150846562, "grad_norm": 0.15875135362148285, "learning_rate": 4.624030923581403e-06, "loss": 0.9309, "step": 51940 }, { "epoch": 0.3760487017452424, "grad_norm": 0.1533413827419281, "learning_rate": 4.623958536920817e-06, "loss": 0.9316, "step": 51950 }, { "epoch": 0.3761210884058286, "grad_norm": 0.21360956132411957, "learning_rate": 4.62388615026023e-06, "loss": 0.9535, "step": 51960 }, { "epoch": 0.37619347506641476, "grad_norm": 0.16147691011428833, "learning_rate": 4.623813763599644e-06, "loss": 0.9492, "step": 51970 }, { "epoch": 0.37626586172700094, "grad_norm": 0.1707833856344223, "learning_rate": 4.6237413769390585e-06, "loss": 0.9319, "step": 51980 }, { "epoch": 0.3763382483875871, "grad_norm": 0.17225024104118347, "learning_rate": 4.623668990278472e-06, "loss": 0.9323, "step": 51990 }, { "epoch": 0.37641063504817335, "grad_norm": 0.16834914684295654, "learning_rate": 4.623596603617886e-06, "loss": 0.9474, "step": 52000 }, { "epoch": 0.3764830217087595, "grad_norm": 0.1597452461719513, "learning_rate": 4.623524216957299e-06, "loss": 0.938, "step": 52010 }, { "epoch": 0.3765554083693457, "grad_norm": 0.15453481674194336, "learning_rate": 4.623451830296714e-06, "loss": 0.9591, "step": 52020 }, { "epoch": 0.3766277950299319, "grad_norm": 0.16118237376213074, "learning_rate": 4.623379443636127e-06, "loss": 0.9533, "step": 52030 }, { "epoch": 0.37670018169051805, "grad_norm": 0.1625969558954239, "learning_rate": 4.623307056975541e-06, "loss": 0.9415, "step": 52040 }, { "epoch": 0.37677256835110423, "grad_norm": 0.1555289477109909, "learning_rate": 4.623234670314955e-06, "loss": 0.9301, "step": 52050 }, { "epoch": 0.37684495501169046, "grad_norm": 0.16258127987384796, "learning_rate": 4.623162283654369e-06, "loss": 0.9373, "step": 52060 }, { "epoch": 0.37691734167227664, "grad_norm": 0.15739430487155914, "learning_rate": 4.623089896993783e-06, "loss": 0.9482, "step": 52070 }, { "epoch": 0.3769897283328628, "grad_norm": 0.1597059667110443, "learning_rate": 4.623017510333196e-06, "loss": 0.9505, "step": 52080 }, { "epoch": 0.377062114993449, "grad_norm": 0.16315515339374542, "learning_rate": 4.62294512367261e-06, "loss": 0.9478, "step": 52090 }, { "epoch": 0.37713450165403517, "grad_norm": 0.17426295578479767, "learning_rate": 4.6228727370120236e-06, "loss": 0.9359, "step": 52100 }, { "epoch": 0.3772068883146214, "grad_norm": 0.16051162779331207, "learning_rate": 4.622800350351437e-06, "loss": 0.9403, "step": 52110 }, { "epoch": 0.3772792749752076, "grad_norm": 0.17289088666439056, "learning_rate": 4.622727963690851e-06, "loss": 0.9381, "step": 52120 }, { "epoch": 0.37735166163579376, "grad_norm": 0.17087596654891968, "learning_rate": 4.622655577030265e-06, "loss": 0.9479, "step": 52130 }, { "epoch": 0.37742404829637993, "grad_norm": 0.16720347106456757, "learning_rate": 4.622583190369679e-06, "loss": 0.936, "step": 52140 }, { "epoch": 0.3774964349569661, "grad_norm": 0.18241596221923828, "learning_rate": 4.6225108037090925e-06, "loss": 0.943, "step": 52150 }, { "epoch": 0.37756882161755234, "grad_norm": 0.17475546896457672, "learning_rate": 4.622438417048506e-06, "loss": 0.9408, "step": 52160 }, { "epoch": 0.3776412082781385, "grad_norm": 0.15939339995384216, "learning_rate": 4.6223660303879206e-06, "loss": 0.948, "step": 52170 }, { "epoch": 0.3777135949387247, "grad_norm": 0.1768542230129242, "learning_rate": 4.622293643727334e-06, "loss": 0.9561, "step": 52180 }, { "epoch": 0.37778598159931087, "grad_norm": 0.15933936834335327, "learning_rate": 4.622221257066748e-06, "loss": 0.9486, "step": 52190 }, { "epoch": 0.37785836825989705, "grad_norm": 0.16799773275852203, "learning_rate": 4.622148870406161e-06, "loss": 0.9352, "step": 52200 }, { "epoch": 0.3779307549204833, "grad_norm": 0.2672235369682312, "learning_rate": 4.622076483745576e-06, "loss": 0.9393, "step": 52210 }, { "epoch": 0.37800314158106946, "grad_norm": 0.1704738289117813, "learning_rate": 4.6220040970849895e-06, "loss": 0.941, "step": 52220 }, { "epoch": 0.37807552824165563, "grad_norm": 0.16287149488925934, "learning_rate": 4.621931710424403e-06, "loss": 0.9248, "step": 52230 }, { "epoch": 0.3781479149022418, "grad_norm": 0.1523078829050064, "learning_rate": 4.621859323763817e-06, "loss": 0.9593, "step": 52240 }, { "epoch": 0.378220301562828, "grad_norm": 0.16731417179107666, "learning_rate": 4.621786937103231e-06, "loss": 0.9534, "step": 52250 }, { "epoch": 0.37829268822341416, "grad_norm": 0.2127581089735031, "learning_rate": 4.621714550442645e-06, "loss": 0.9454, "step": 52260 }, { "epoch": 0.3783650748840004, "grad_norm": 0.17544521391391754, "learning_rate": 4.621642163782058e-06, "loss": 0.9495, "step": 52270 }, { "epoch": 0.3784374615445866, "grad_norm": 0.1671367883682251, "learning_rate": 4.621569777121472e-06, "loss": 0.9431, "step": 52280 }, { "epoch": 0.37850984820517275, "grad_norm": 0.15478673577308655, "learning_rate": 4.6214973904608865e-06, "loss": 0.9475, "step": 52290 }, { "epoch": 0.3785822348657589, "grad_norm": 0.1520804613828659, "learning_rate": 4.6214250038003e-06, "loss": 0.9435, "step": 52300 }, { "epoch": 0.3786546215263451, "grad_norm": 0.1676923781633377, "learning_rate": 4.621352617139714e-06, "loss": 0.9409, "step": 52310 }, { "epoch": 0.37872700818693134, "grad_norm": 0.15937332808971405, "learning_rate": 4.621280230479127e-06, "loss": 0.9341, "step": 52320 }, { "epoch": 0.3787993948475175, "grad_norm": 0.16753974556922913, "learning_rate": 4.621207843818542e-06, "loss": 0.9533, "step": 52330 }, { "epoch": 0.3788717815081037, "grad_norm": 0.17047527432441711, "learning_rate": 4.621135457157955e-06, "loss": 0.948, "step": 52340 }, { "epoch": 0.37894416816868987, "grad_norm": 0.1727355569601059, "learning_rate": 4.621063070497369e-06, "loss": 0.9421, "step": 52350 }, { "epoch": 0.37901655482927604, "grad_norm": 0.1628546565771103, "learning_rate": 4.620990683836783e-06, "loss": 0.9464, "step": 52360 }, { "epoch": 0.3790889414898623, "grad_norm": 0.17123012244701385, "learning_rate": 4.620918297176197e-06, "loss": 0.9472, "step": 52370 }, { "epoch": 0.37916132815044845, "grad_norm": 0.1762952357530594, "learning_rate": 4.620845910515611e-06, "loss": 0.9495, "step": 52380 }, { "epoch": 0.37923371481103463, "grad_norm": 0.17170877754688263, "learning_rate": 4.620773523855024e-06, "loss": 0.9504, "step": 52390 }, { "epoch": 0.3793061014716208, "grad_norm": 0.16942241787910461, "learning_rate": 4.620701137194438e-06, "loss": 0.9413, "step": 52400 }, { "epoch": 0.379378488132207, "grad_norm": 0.15613293647766113, "learning_rate": 4.6206287505338524e-06, "loss": 0.9331, "step": 52410 }, { "epoch": 0.37945087479279316, "grad_norm": 0.18218636512756348, "learning_rate": 4.620556363873266e-06, "loss": 0.9552, "step": 52420 }, { "epoch": 0.3795232614533794, "grad_norm": 0.16046783328056335, "learning_rate": 4.62048397721268e-06, "loss": 0.9505, "step": 52430 }, { "epoch": 0.37959564811396557, "grad_norm": 0.16807179152965546, "learning_rate": 4.620411590552093e-06, "loss": 0.9554, "step": 52440 }, { "epoch": 0.37966803477455174, "grad_norm": 0.18919555842876434, "learning_rate": 4.620339203891508e-06, "loss": 0.9436, "step": 52450 }, { "epoch": 0.3797404214351379, "grad_norm": 0.16013583540916443, "learning_rate": 4.620266817230921e-06, "loss": 0.9366, "step": 52460 }, { "epoch": 0.3798128080957241, "grad_norm": 0.16914841532707214, "learning_rate": 4.620194430570335e-06, "loss": 0.9469, "step": 52470 }, { "epoch": 0.37988519475631033, "grad_norm": 0.1607542335987091, "learning_rate": 4.620122043909749e-06, "loss": 0.9475, "step": 52480 }, { "epoch": 0.3799575814168965, "grad_norm": 0.15814971923828125, "learning_rate": 4.620049657249163e-06, "loss": 0.9308, "step": 52490 }, { "epoch": 0.3800299680774827, "grad_norm": 0.1508866399526596, "learning_rate": 4.619977270588577e-06, "loss": 0.9608, "step": 52500 }, { "epoch": 0.38010235473806886, "grad_norm": 0.22690674662590027, "learning_rate": 4.61990488392799e-06, "loss": 0.9331, "step": 52510 }, { "epoch": 0.38017474139865504, "grad_norm": 0.1720598340034485, "learning_rate": 4.619832497267404e-06, "loss": 0.9616, "step": 52520 }, { "epoch": 0.38024712805924127, "grad_norm": 0.16492782533168793, "learning_rate": 4.619760110606818e-06, "loss": 0.9386, "step": 52530 }, { "epoch": 0.38031951471982745, "grad_norm": 0.1547376811504364, "learning_rate": 4.619687723946232e-06, "loss": 0.9459, "step": 52540 }, { "epoch": 0.3803919013804136, "grad_norm": 0.18093635141849518, "learning_rate": 4.619615337285646e-06, "loss": 0.9616, "step": 52550 }, { "epoch": 0.3804642880409998, "grad_norm": 0.16667254269123077, "learning_rate": 4.619542950625059e-06, "loss": 0.9431, "step": 52560 }, { "epoch": 0.380536674701586, "grad_norm": 0.15666356682777405, "learning_rate": 4.619470563964473e-06, "loss": 0.9355, "step": 52570 }, { "epoch": 0.38060906136217215, "grad_norm": 0.21181073784828186, "learning_rate": 4.619398177303887e-06, "loss": 0.9516, "step": 52580 }, { "epoch": 0.3806814480227584, "grad_norm": 0.16820743680000305, "learning_rate": 4.619325790643301e-06, "loss": 0.942, "step": 52590 }, { "epoch": 0.38075383468334456, "grad_norm": 0.19321396946907043, "learning_rate": 4.6192534039827145e-06, "loss": 0.9395, "step": 52600 }, { "epoch": 0.38082622134393074, "grad_norm": 0.16616934537887573, "learning_rate": 4.619181017322128e-06, "loss": 0.9376, "step": 52610 }, { "epoch": 0.3808986080045169, "grad_norm": 0.19867192208766937, "learning_rate": 4.619108630661542e-06, "loss": 0.945, "step": 52620 }, { "epoch": 0.3809709946651031, "grad_norm": 0.15694333612918854, "learning_rate": 4.619036244000955e-06, "loss": 0.9276, "step": 52630 }, { "epoch": 0.3810433813256893, "grad_norm": 0.16102519631385803, "learning_rate": 4.61896385734037e-06, "loss": 0.9493, "step": 52640 }, { "epoch": 0.3811157679862755, "grad_norm": 0.15607930719852448, "learning_rate": 4.6188914706797835e-06, "loss": 0.9453, "step": 52650 }, { "epoch": 0.3811881546468617, "grad_norm": 0.1629965603351593, "learning_rate": 4.618819084019197e-06, "loss": 0.9471, "step": 52660 }, { "epoch": 0.38126054130744785, "grad_norm": 0.15766113996505737, "learning_rate": 4.618746697358611e-06, "loss": 0.9396, "step": 52670 }, { "epoch": 0.38133292796803403, "grad_norm": 0.1607961505651474, "learning_rate": 4.618674310698025e-06, "loss": 0.9483, "step": 52680 }, { "epoch": 0.38140531462862026, "grad_norm": 0.1705811619758606, "learning_rate": 4.618601924037439e-06, "loss": 0.9418, "step": 52690 }, { "epoch": 0.38147770128920644, "grad_norm": 0.1601417511701584, "learning_rate": 4.618529537376852e-06, "loss": 0.9441, "step": 52700 }, { "epoch": 0.3815500879497926, "grad_norm": 0.1780814677476883, "learning_rate": 4.618457150716266e-06, "loss": 0.9551, "step": 52710 }, { "epoch": 0.3816224746103788, "grad_norm": 0.15561501681804657, "learning_rate": 4.61838476405568e-06, "loss": 0.9423, "step": 52720 }, { "epoch": 0.38169486127096497, "grad_norm": 0.15728692710399628, "learning_rate": 4.618312377395094e-06, "loss": 0.9354, "step": 52730 }, { "epoch": 0.3817672479315512, "grad_norm": 0.20734450221061707, "learning_rate": 4.618239990734508e-06, "loss": 0.9548, "step": 52740 }, { "epoch": 0.3818396345921374, "grad_norm": 0.17452536523342133, "learning_rate": 4.618167604073921e-06, "loss": 0.9418, "step": 52750 }, { "epoch": 0.38191202125272355, "grad_norm": 0.17230959236621857, "learning_rate": 4.618095217413335e-06, "loss": 0.9517, "step": 52760 }, { "epoch": 0.38198440791330973, "grad_norm": 0.16141583025455475, "learning_rate": 4.618022830752749e-06, "loss": 0.9378, "step": 52770 }, { "epoch": 0.3820567945738959, "grad_norm": 0.16719284653663635, "learning_rate": 4.617950444092163e-06, "loss": 0.9458, "step": 52780 }, { "epoch": 0.3821291812344821, "grad_norm": 0.1570596694946289, "learning_rate": 4.617878057431577e-06, "loss": 0.9495, "step": 52790 }, { "epoch": 0.3822015678950683, "grad_norm": 0.17521663010120392, "learning_rate": 4.61780567077099e-06, "loss": 0.9413, "step": 52800 }, { "epoch": 0.3822739545556545, "grad_norm": 0.1793406456708908, "learning_rate": 4.617733284110405e-06, "loss": 0.9346, "step": 52810 }, { "epoch": 0.38234634121624067, "grad_norm": 0.18589749932289124, "learning_rate": 4.617660897449818e-06, "loss": 0.9405, "step": 52820 }, { "epoch": 0.38241872787682685, "grad_norm": 0.15368783473968506, "learning_rate": 4.617588510789232e-06, "loss": 0.9345, "step": 52830 }, { "epoch": 0.382491114537413, "grad_norm": 0.1717151403427124, "learning_rate": 4.6175161241286456e-06, "loss": 0.9413, "step": 52840 }, { "epoch": 0.38256350119799926, "grad_norm": 0.1748562455177307, "learning_rate": 4.61744373746806e-06, "loss": 0.9458, "step": 52850 }, { "epoch": 0.38263588785858543, "grad_norm": 0.17434179782867432, "learning_rate": 4.617371350807474e-06, "loss": 0.9401, "step": 52860 }, { "epoch": 0.3827082745191716, "grad_norm": 0.1836162507534027, "learning_rate": 4.617298964146887e-06, "loss": 0.9455, "step": 52870 }, { "epoch": 0.3827806611797578, "grad_norm": 0.15718111395835876, "learning_rate": 4.617226577486301e-06, "loss": 0.9388, "step": 52880 }, { "epoch": 0.38285304784034396, "grad_norm": 0.15703000128269196, "learning_rate": 4.617154190825715e-06, "loss": 0.9423, "step": 52890 }, { "epoch": 0.3829254345009302, "grad_norm": 0.16674445569515228, "learning_rate": 4.617081804165129e-06, "loss": 0.951, "step": 52900 }, { "epoch": 0.38299782116151637, "grad_norm": 0.157858207821846, "learning_rate": 4.6170094175045426e-06, "loss": 0.9484, "step": 52910 }, { "epoch": 0.38307020782210255, "grad_norm": 0.17335231602191925, "learning_rate": 4.616937030843956e-06, "loss": 0.9482, "step": 52920 }, { "epoch": 0.3831425944826887, "grad_norm": 0.16465920209884644, "learning_rate": 4.616864644183371e-06, "loss": 0.9378, "step": 52930 }, { "epoch": 0.3832149811432749, "grad_norm": 0.17838311195373535, "learning_rate": 4.616792257522784e-06, "loss": 0.956, "step": 52940 }, { "epoch": 0.3832873678038611, "grad_norm": 0.1656845659017563, "learning_rate": 4.616719870862198e-06, "loss": 0.9409, "step": 52950 }, { "epoch": 0.3833597544644473, "grad_norm": 0.15346546471118927, "learning_rate": 4.6166474842016115e-06, "loss": 0.9334, "step": 52960 }, { "epoch": 0.3834321411250335, "grad_norm": 0.17252103984355927, "learning_rate": 4.616575097541026e-06, "loss": 0.9422, "step": 52970 }, { "epoch": 0.38350452778561966, "grad_norm": 0.16853582859039307, "learning_rate": 4.6165027108804396e-06, "loss": 0.944, "step": 52980 }, { "epoch": 0.38357691444620584, "grad_norm": 0.17308548092842102, "learning_rate": 4.616430324219853e-06, "loss": 0.9396, "step": 52990 }, { "epoch": 0.383649301106792, "grad_norm": 0.15582707524299622, "learning_rate": 4.616357937559267e-06, "loss": 0.9445, "step": 53000 }, { "epoch": 0.38372168776737825, "grad_norm": 0.15688510239124298, "learning_rate": 4.616285550898681e-06, "loss": 0.9359, "step": 53010 }, { "epoch": 0.3837940744279644, "grad_norm": 0.16674372553825378, "learning_rate": 4.616213164238095e-06, "loss": 0.9378, "step": 53020 }, { "epoch": 0.3838664610885506, "grad_norm": 0.15617065131664276, "learning_rate": 4.6161407775775085e-06, "loss": 0.9443, "step": 53030 }, { "epoch": 0.3839388477491368, "grad_norm": 0.18050551414489746, "learning_rate": 4.616068390916922e-06, "loss": 0.9282, "step": 53040 }, { "epoch": 0.38401123440972296, "grad_norm": 0.18156303465366364, "learning_rate": 4.6159960042563366e-06, "loss": 0.9467, "step": 53050 }, { "epoch": 0.3840836210703092, "grad_norm": 0.1625535637140274, "learning_rate": 4.61592361759575e-06, "loss": 0.9361, "step": 53060 }, { "epoch": 0.38415600773089537, "grad_norm": 0.150069460272789, "learning_rate": 4.615851230935164e-06, "loss": 0.9384, "step": 53070 }, { "epoch": 0.38422839439148154, "grad_norm": 0.17181698977947235, "learning_rate": 4.615778844274577e-06, "loss": 0.9275, "step": 53080 }, { "epoch": 0.3843007810520677, "grad_norm": 0.16848745942115784, "learning_rate": 4.615706457613992e-06, "loss": 0.9463, "step": 53090 }, { "epoch": 0.3843731677126539, "grad_norm": 0.15281365811824799, "learning_rate": 4.6156340709534055e-06, "loss": 0.9435, "step": 53100 }, { "epoch": 0.3844455543732401, "grad_norm": 0.16574618220329285, "learning_rate": 4.615561684292819e-06, "loss": 0.9379, "step": 53110 }, { "epoch": 0.3845179410338263, "grad_norm": 0.1556422859430313, "learning_rate": 4.615489297632233e-06, "loss": 0.9404, "step": 53120 }, { "epoch": 0.3845903276944125, "grad_norm": 0.16364730894565582, "learning_rate": 4.615416910971647e-06, "loss": 0.9436, "step": 53130 }, { "epoch": 0.38466271435499866, "grad_norm": 0.18387825787067413, "learning_rate": 4.61534452431106e-06, "loss": 0.9542, "step": 53140 }, { "epoch": 0.38473510101558483, "grad_norm": 0.16818496584892273, "learning_rate": 4.615272137650474e-06, "loss": 0.9338, "step": 53150 }, { "epoch": 0.384807487676171, "grad_norm": 0.17835207283496857, "learning_rate": 4.615199750989888e-06, "loss": 0.949, "step": 53160 }, { "epoch": 0.38487987433675724, "grad_norm": 0.15151627361774445, "learning_rate": 4.615127364329302e-06, "loss": 0.9357, "step": 53170 }, { "epoch": 0.3849522609973434, "grad_norm": 0.1659085601568222, "learning_rate": 4.615054977668715e-06, "loss": 0.939, "step": 53180 }, { "epoch": 0.3850246476579296, "grad_norm": 0.19778911769390106, "learning_rate": 4.614982591008129e-06, "loss": 0.95, "step": 53190 }, { "epoch": 0.3850970343185158, "grad_norm": 0.1595744490623474, "learning_rate": 4.614910204347543e-06, "loss": 0.9478, "step": 53200 }, { "epoch": 0.38516942097910195, "grad_norm": 0.16915087401866913, "learning_rate": 4.614837817686957e-06, "loss": 0.9462, "step": 53210 }, { "epoch": 0.3852418076396882, "grad_norm": 0.19996778666973114, "learning_rate": 4.614765431026371e-06, "loss": 0.9251, "step": 53220 }, { "epoch": 0.38531419430027436, "grad_norm": 0.17590823769569397, "learning_rate": 4.614693044365784e-06, "loss": 0.9577, "step": 53230 }, { "epoch": 0.38538658096086054, "grad_norm": 0.16508999466896057, "learning_rate": 4.614620657705199e-06, "loss": 0.9411, "step": 53240 }, { "epoch": 0.3854589676214467, "grad_norm": 0.15081745386123657, "learning_rate": 4.614548271044612e-06, "loss": 0.9475, "step": 53250 }, { "epoch": 0.3855313542820329, "grad_norm": 0.17549897730350494, "learning_rate": 4.614475884384026e-06, "loss": 0.9367, "step": 53260 }, { "epoch": 0.38560374094261907, "grad_norm": 0.1669255644083023, "learning_rate": 4.6144034977234395e-06, "loss": 0.9399, "step": 53270 }, { "epoch": 0.3856761276032053, "grad_norm": 0.15719322860240936, "learning_rate": 4.614331111062854e-06, "loss": 0.9427, "step": 53280 }, { "epoch": 0.3857485142637915, "grad_norm": 0.1589440554380417, "learning_rate": 4.614258724402268e-06, "loss": 0.9389, "step": 53290 }, { "epoch": 0.38582090092437765, "grad_norm": 0.1575065553188324, "learning_rate": 4.614186337741681e-06, "loss": 0.9479, "step": 53300 }, { "epoch": 0.38589328758496383, "grad_norm": 0.18554028868675232, "learning_rate": 4.614113951081095e-06, "loss": 0.9357, "step": 53310 }, { "epoch": 0.38596567424555, "grad_norm": 0.1604183316230774, "learning_rate": 4.614041564420509e-06, "loss": 0.9588, "step": 53320 }, { "epoch": 0.38603806090613624, "grad_norm": 0.18416175246238708, "learning_rate": 4.613969177759923e-06, "loss": 0.9475, "step": 53330 }, { "epoch": 0.3861104475667224, "grad_norm": 0.17273585498332977, "learning_rate": 4.6138967910993365e-06, "loss": 0.9493, "step": 53340 }, { "epoch": 0.3861828342273086, "grad_norm": 0.15361787378787994, "learning_rate": 4.61382440443875e-06, "loss": 0.9467, "step": 53350 }, { "epoch": 0.38625522088789477, "grad_norm": 0.16739359498023987, "learning_rate": 4.613752017778164e-06, "loss": 0.9473, "step": 53360 }, { "epoch": 0.38632760754848094, "grad_norm": 0.17038732767105103, "learning_rate": 4.613679631117578e-06, "loss": 0.9526, "step": 53370 }, { "epoch": 0.3863999942090672, "grad_norm": 0.16086862981319427, "learning_rate": 4.613607244456992e-06, "loss": 0.9518, "step": 53380 }, { "epoch": 0.38647238086965335, "grad_norm": 0.18573077023029327, "learning_rate": 4.6135348577964055e-06, "loss": 0.9483, "step": 53390 }, { "epoch": 0.38654476753023953, "grad_norm": 0.17494763433933258, "learning_rate": 4.613462471135819e-06, "loss": 0.9391, "step": 53400 }, { "epoch": 0.3866171541908257, "grad_norm": 0.19013044238090515, "learning_rate": 4.6133900844752335e-06, "loss": 0.9467, "step": 53410 }, { "epoch": 0.3866895408514119, "grad_norm": 0.16894245147705078, "learning_rate": 4.613317697814647e-06, "loss": 0.9394, "step": 53420 }, { "epoch": 0.3867619275119981, "grad_norm": 0.15704815089702606, "learning_rate": 4.613245311154061e-06, "loss": 0.9343, "step": 53430 }, { "epoch": 0.3868343141725843, "grad_norm": 0.15952664613723755, "learning_rate": 4.613172924493474e-06, "loss": 0.9402, "step": 53440 }, { "epoch": 0.38690670083317047, "grad_norm": 0.15518204867839813, "learning_rate": 4.613100537832889e-06, "loss": 0.9413, "step": 53450 }, { "epoch": 0.38697908749375665, "grad_norm": 0.16507835686206818, "learning_rate": 4.6130281511723025e-06, "loss": 0.9396, "step": 53460 }, { "epoch": 0.3870514741543428, "grad_norm": 0.17540588974952698, "learning_rate": 4.612955764511716e-06, "loss": 0.9663, "step": 53470 }, { "epoch": 0.387123860814929, "grad_norm": 0.16836467385292053, "learning_rate": 4.61288337785113e-06, "loss": 0.9458, "step": 53480 }, { "epoch": 0.38719624747551523, "grad_norm": 0.17496466636657715, "learning_rate": 4.612810991190544e-06, "loss": 0.9404, "step": 53490 }, { "epoch": 0.3872686341361014, "grad_norm": 0.17381425201892853, "learning_rate": 4.612738604529958e-06, "loss": 0.9517, "step": 53500 }, { "epoch": 0.3873410207966876, "grad_norm": 0.15964674949645996, "learning_rate": 4.612666217869371e-06, "loss": 0.9452, "step": 53510 }, { "epoch": 0.38741340745727376, "grad_norm": 0.16532573103904724, "learning_rate": 4.612593831208785e-06, "loss": 0.9341, "step": 53520 }, { "epoch": 0.38748579411785994, "grad_norm": 0.17337869107723236, "learning_rate": 4.6125214445481995e-06, "loss": 0.9402, "step": 53530 }, { "epoch": 0.38755818077844617, "grad_norm": 0.1612296998500824, "learning_rate": 4.612449057887613e-06, "loss": 0.9458, "step": 53540 }, { "epoch": 0.38763056743903235, "grad_norm": 0.1577647626399994, "learning_rate": 4.612376671227027e-06, "loss": 0.9357, "step": 53550 }, { "epoch": 0.3877029540996185, "grad_norm": 0.1562313437461853, "learning_rate": 4.61230428456644e-06, "loss": 0.9474, "step": 53560 }, { "epoch": 0.3877753407602047, "grad_norm": 0.1620018631219864, "learning_rate": 4.612231897905855e-06, "loss": 0.9408, "step": 53570 }, { "epoch": 0.3878477274207909, "grad_norm": 0.21448646485805511, "learning_rate": 4.612159511245268e-06, "loss": 0.954, "step": 53580 }, { "epoch": 0.3879201140813771, "grad_norm": 0.16676433384418488, "learning_rate": 4.612087124584682e-06, "loss": 0.9484, "step": 53590 }, { "epoch": 0.3879925007419633, "grad_norm": 0.16710571944713593, "learning_rate": 4.612014737924096e-06, "loss": 0.9479, "step": 53600 }, { "epoch": 0.38806488740254946, "grad_norm": 0.1688128262758255, "learning_rate": 4.61194235126351e-06, "loss": 0.941, "step": 53610 }, { "epoch": 0.38813727406313564, "grad_norm": 0.15826979279518127, "learning_rate": 4.611869964602924e-06, "loss": 0.9407, "step": 53620 }, { "epoch": 0.3882096607237218, "grad_norm": 0.3778460621833801, "learning_rate": 4.611797577942337e-06, "loss": 0.9465, "step": 53630 }, { "epoch": 0.388282047384308, "grad_norm": 0.1658174842596054, "learning_rate": 4.611725191281751e-06, "loss": 0.9414, "step": 53640 }, { "epoch": 0.3883544340448942, "grad_norm": 0.16166210174560547, "learning_rate": 4.611652804621165e-06, "loss": 0.9495, "step": 53650 }, { "epoch": 0.3884268207054804, "grad_norm": 0.17662234604358673, "learning_rate": 4.611580417960579e-06, "loss": 0.9516, "step": 53660 }, { "epoch": 0.3884992073660666, "grad_norm": 0.16207055747509003, "learning_rate": 4.611508031299992e-06, "loss": 0.9417, "step": 53670 }, { "epoch": 0.38857159402665276, "grad_norm": 0.17646333575248718, "learning_rate": 4.611435644639406e-06, "loss": 0.9415, "step": 53680 }, { "epoch": 0.38864398068723893, "grad_norm": 0.15925532579421997, "learning_rate": 4.61136325797882e-06, "loss": 0.9373, "step": 53690 }, { "epoch": 0.38871636734782516, "grad_norm": 0.15857084095478058, "learning_rate": 4.6112908713182335e-06, "loss": 0.9461, "step": 53700 }, { "epoch": 0.38878875400841134, "grad_norm": 0.16620729863643646, "learning_rate": 4.611218484657647e-06, "loss": 0.9419, "step": 53710 }, { "epoch": 0.3888611406689975, "grad_norm": 0.14982983469963074, "learning_rate": 4.6111460979970616e-06, "loss": 0.9434, "step": 53720 }, { "epoch": 0.3889335273295837, "grad_norm": 0.16091054677963257, "learning_rate": 4.611073711336475e-06, "loss": 0.939, "step": 53730 }, { "epoch": 0.38900591399016987, "grad_norm": 0.17934353649616241, "learning_rate": 4.611001324675889e-06, "loss": 0.9422, "step": 53740 }, { "epoch": 0.3890783006507561, "grad_norm": 0.22706985473632812, "learning_rate": 4.610928938015302e-06, "loss": 0.9458, "step": 53750 }, { "epoch": 0.3891506873113423, "grad_norm": 0.19281072914600372, "learning_rate": 4.610856551354717e-06, "loss": 0.9466, "step": 53760 }, { "epoch": 0.38922307397192846, "grad_norm": 0.17095661163330078, "learning_rate": 4.6107841646941305e-06, "loss": 0.9353, "step": 53770 }, { "epoch": 0.38929546063251463, "grad_norm": 0.1668483316898346, "learning_rate": 4.610711778033544e-06, "loss": 0.948, "step": 53780 }, { "epoch": 0.3893678472931008, "grad_norm": 0.1767456829547882, "learning_rate": 4.610639391372958e-06, "loss": 0.9435, "step": 53790 }, { "epoch": 0.389440233953687, "grad_norm": 0.15454381704330444, "learning_rate": 4.610567004712372e-06, "loss": 0.9393, "step": 53800 }, { "epoch": 0.3895126206142732, "grad_norm": 0.16419751942157745, "learning_rate": 4.610494618051786e-06, "loss": 0.9305, "step": 53810 }, { "epoch": 0.3895850072748594, "grad_norm": 0.1819341629743576, "learning_rate": 4.610422231391199e-06, "loss": 0.9309, "step": 53820 }, { "epoch": 0.3896573939354456, "grad_norm": 0.15951742231845856, "learning_rate": 4.610349844730613e-06, "loss": 0.945, "step": 53830 }, { "epoch": 0.38972978059603175, "grad_norm": 0.15791021287441254, "learning_rate": 4.6102774580700275e-06, "loss": 0.9441, "step": 53840 }, { "epoch": 0.3898021672566179, "grad_norm": 0.16241905093193054, "learning_rate": 4.610205071409441e-06, "loss": 0.9372, "step": 53850 }, { "epoch": 0.38987455391720416, "grad_norm": 0.1761135458946228, "learning_rate": 4.610132684748855e-06, "loss": 0.9509, "step": 53860 }, { "epoch": 0.38994694057779034, "grad_norm": 0.16063787043094635, "learning_rate": 4.610060298088268e-06, "loss": 0.9453, "step": 53870 }, { "epoch": 0.3900193272383765, "grad_norm": 0.17681707441806793, "learning_rate": 4.609987911427683e-06, "loss": 0.9474, "step": 53880 }, { "epoch": 0.3900917138989627, "grad_norm": 0.18907004594802856, "learning_rate": 4.6099155247670964e-06, "loss": 0.9496, "step": 53890 }, { "epoch": 0.39016410055954887, "grad_norm": 0.1664309799671173, "learning_rate": 4.60984313810651e-06, "loss": 0.9371, "step": 53900 }, { "epoch": 0.3902364872201351, "grad_norm": 0.16614876687526703, "learning_rate": 4.609770751445924e-06, "loss": 0.9383, "step": 53910 }, { "epoch": 0.3903088738807213, "grad_norm": 0.16756542026996613, "learning_rate": 4.609698364785338e-06, "loss": 0.9444, "step": 53920 }, { "epoch": 0.39038126054130745, "grad_norm": 0.19262923300266266, "learning_rate": 4.609625978124752e-06, "loss": 0.9412, "step": 53930 }, { "epoch": 0.3904536472018936, "grad_norm": 0.1710319072008133, "learning_rate": 4.609553591464165e-06, "loss": 0.9517, "step": 53940 }, { "epoch": 0.3905260338624798, "grad_norm": 0.15746866166591644, "learning_rate": 4.609481204803579e-06, "loss": 0.9364, "step": 53950 }, { "epoch": 0.39059842052306604, "grad_norm": 0.16860923171043396, "learning_rate": 4.6094088181429934e-06, "loss": 0.9342, "step": 53960 }, { "epoch": 0.3906708071836522, "grad_norm": 0.20959050953388214, "learning_rate": 4.609336431482407e-06, "loss": 0.9483, "step": 53970 }, { "epoch": 0.3907431938442384, "grad_norm": 0.1634407639503479, "learning_rate": 4.609264044821821e-06, "loss": 0.9353, "step": 53980 }, { "epoch": 0.39081558050482457, "grad_norm": 0.16322919726371765, "learning_rate": 4.609191658161234e-06, "loss": 0.9264, "step": 53990 }, { "epoch": 0.39088796716541074, "grad_norm": 0.14622409641742706, "learning_rate": 4.609119271500648e-06, "loss": 0.9348, "step": 54000 }, { "epoch": 0.3909603538259969, "grad_norm": 0.1500609666109085, "learning_rate": 4.609046884840062e-06, "loss": 0.9372, "step": 54010 }, { "epoch": 0.39103274048658315, "grad_norm": 0.18767625093460083, "learning_rate": 4.608974498179476e-06, "loss": 0.9268, "step": 54020 }, { "epoch": 0.39110512714716933, "grad_norm": 0.16851924359798431, "learning_rate": 4.60890211151889e-06, "loss": 0.9589, "step": 54030 }, { "epoch": 0.3911775138077555, "grad_norm": 0.1965654194355011, "learning_rate": 4.608829724858303e-06, "loss": 0.9422, "step": 54040 }, { "epoch": 0.3912499004683417, "grad_norm": 0.15963523089885712, "learning_rate": 4.608757338197718e-06, "loss": 0.9442, "step": 54050 }, { "epoch": 0.39132228712892786, "grad_norm": 0.16074392199516296, "learning_rate": 4.608684951537131e-06, "loss": 0.9486, "step": 54060 }, { "epoch": 0.3913946737895141, "grad_norm": 0.1722477674484253, "learning_rate": 4.608612564876545e-06, "loss": 0.9376, "step": 54070 }, { "epoch": 0.39146706045010027, "grad_norm": 0.16781273484230042, "learning_rate": 4.6085401782159585e-06, "loss": 0.9461, "step": 54080 }, { "epoch": 0.39153944711068644, "grad_norm": 0.17819204926490784, "learning_rate": 4.608467791555373e-06, "loss": 0.9365, "step": 54090 }, { "epoch": 0.3916118337712726, "grad_norm": 0.1631801426410675, "learning_rate": 4.608395404894787e-06, "loss": 0.9405, "step": 54100 }, { "epoch": 0.3916842204318588, "grad_norm": 0.1547449231147766, "learning_rate": 4.6083230182342e-06, "loss": 0.9377, "step": 54110 }, { "epoch": 0.39175660709244503, "grad_norm": 0.16404055058956146, "learning_rate": 4.608250631573614e-06, "loss": 0.9356, "step": 54120 }, { "epoch": 0.3918289937530312, "grad_norm": 0.1674279123544693, "learning_rate": 4.608178244913028e-06, "loss": 0.9219, "step": 54130 }, { "epoch": 0.3919013804136174, "grad_norm": 0.14838223159313202, "learning_rate": 4.608105858252442e-06, "loss": 0.9361, "step": 54140 }, { "epoch": 0.39197376707420356, "grad_norm": 0.16514058411121368, "learning_rate": 4.6080334715918555e-06, "loss": 0.9414, "step": 54150 }, { "epoch": 0.39204615373478974, "grad_norm": 0.16169683635234833, "learning_rate": 4.607961084931269e-06, "loss": 0.9349, "step": 54160 }, { "epoch": 0.3921185403953759, "grad_norm": 0.1660471111536026, "learning_rate": 4.607888698270684e-06, "loss": 0.9509, "step": 54170 }, { "epoch": 0.39219092705596215, "grad_norm": 0.1746322214603424, "learning_rate": 4.607816311610097e-06, "loss": 0.9371, "step": 54180 }, { "epoch": 0.3922633137165483, "grad_norm": 0.16147038340568542, "learning_rate": 4.607743924949511e-06, "loss": 0.9442, "step": 54190 }, { "epoch": 0.3923357003771345, "grad_norm": 0.1733742505311966, "learning_rate": 4.6076715382889245e-06, "loss": 0.9382, "step": 54200 }, { "epoch": 0.3924080870377207, "grad_norm": 0.18472987413406372, "learning_rate": 4.607599151628338e-06, "loss": 0.9304, "step": 54210 }, { "epoch": 0.39248047369830685, "grad_norm": 0.16237011551856995, "learning_rate": 4.607526764967752e-06, "loss": 0.9406, "step": 54220 }, { "epoch": 0.3925528603588931, "grad_norm": 0.2291804850101471, "learning_rate": 4.607454378307165e-06, "loss": 0.9429, "step": 54230 }, { "epoch": 0.39262524701947926, "grad_norm": 0.15868277847766876, "learning_rate": 4.60738199164658e-06, "loss": 0.9355, "step": 54240 }, { "epoch": 0.39269763368006544, "grad_norm": 0.17885923385620117, "learning_rate": 4.607309604985993e-06, "loss": 0.9412, "step": 54250 }, { "epoch": 0.3927700203406516, "grad_norm": 0.1670595407485962, "learning_rate": 4.607237218325407e-06, "loss": 0.9417, "step": 54260 }, { "epoch": 0.3928424070012378, "grad_norm": 0.15766389667987823, "learning_rate": 4.607164831664821e-06, "loss": 0.9434, "step": 54270 }, { "epoch": 0.392914793661824, "grad_norm": 0.16400344669818878, "learning_rate": 4.607092445004235e-06, "loss": 0.9412, "step": 54280 }, { "epoch": 0.3929871803224102, "grad_norm": 0.16506126523017883, "learning_rate": 4.607020058343649e-06, "loss": 0.9409, "step": 54290 }, { "epoch": 0.3930595669829964, "grad_norm": 0.16669459640979767, "learning_rate": 4.606947671683062e-06, "loss": 0.9329, "step": 54300 }, { "epoch": 0.39313195364358255, "grad_norm": 0.18172895908355713, "learning_rate": 4.606875285022476e-06, "loss": 0.9446, "step": 54310 }, { "epoch": 0.39320434030416873, "grad_norm": 0.1678895652294159, "learning_rate": 4.60680289836189e-06, "loss": 0.9421, "step": 54320 }, { "epoch": 0.3932767269647549, "grad_norm": 0.15718461573123932, "learning_rate": 4.606730511701304e-06, "loss": 0.9324, "step": 54330 }, { "epoch": 0.39334911362534114, "grad_norm": 0.16904427111148834, "learning_rate": 4.606658125040718e-06, "loss": 0.9321, "step": 54340 }, { "epoch": 0.3934215002859273, "grad_norm": 0.16376027464866638, "learning_rate": 4.606585738380131e-06, "loss": 0.9483, "step": 54350 }, { "epoch": 0.3934938869465135, "grad_norm": 0.15465879440307617, "learning_rate": 4.606513351719546e-06, "loss": 0.931, "step": 54360 }, { "epoch": 0.39356627360709967, "grad_norm": 0.171060249209404, "learning_rate": 4.606440965058959e-06, "loss": 0.9366, "step": 54370 }, { "epoch": 0.39363866026768585, "grad_norm": 0.1620955467224121, "learning_rate": 4.606368578398373e-06, "loss": 0.9333, "step": 54380 }, { "epoch": 0.3937110469282721, "grad_norm": 0.1702883541584015, "learning_rate": 4.6062961917377866e-06, "loss": 0.9442, "step": 54390 }, { "epoch": 0.39378343358885826, "grad_norm": 0.16319170594215393, "learning_rate": 4.606223805077201e-06, "loss": 0.9315, "step": 54400 }, { "epoch": 0.39385582024944443, "grad_norm": 0.16425073146820068, "learning_rate": 4.606151418416615e-06, "loss": 0.9407, "step": 54410 }, { "epoch": 0.3939282069100306, "grad_norm": 0.1441926509141922, "learning_rate": 4.606079031756028e-06, "loss": 0.9393, "step": 54420 }, { "epoch": 0.3940005935706168, "grad_norm": 0.1872769445180893, "learning_rate": 4.606006645095442e-06, "loss": 0.9417, "step": 54430 }, { "epoch": 0.394072980231203, "grad_norm": 0.15714821219444275, "learning_rate": 4.605934258434856e-06, "loss": 0.9343, "step": 54440 }, { "epoch": 0.3941453668917892, "grad_norm": 0.16071169078350067, "learning_rate": 4.60586187177427e-06, "loss": 0.9447, "step": 54450 }, { "epoch": 0.39421775355237537, "grad_norm": 0.15835289657115936, "learning_rate": 4.6057894851136836e-06, "loss": 0.95, "step": 54460 }, { "epoch": 0.39429014021296155, "grad_norm": 0.15297073125839233, "learning_rate": 4.605717098453097e-06, "loss": 0.9402, "step": 54470 }, { "epoch": 0.3943625268735477, "grad_norm": 0.1636633574962616, "learning_rate": 4.605644711792512e-06, "loss": 0.9444, "step": 54480 }, { "epoch": 0.39443491353413396, "grad_norm": 0.15706664323806763, "learning_rate": 4.605572325131925e-06, "loss": 0.9578, "step": 54490 }, { "epoch": 0.39450730019472013, "grad_norm": 0.15871857106685638, "learning_rate": 4.605499938471339e-06, "loss": 0.9271, "step": 54500 }, { "epoch": 0.3945796868553063, "grad_norm": 0.16093158721923828, "learning_rate": 4.6054275518107525e-06, "loss": 0.9584, "step": 54510 }, { "epoch": 0.3946520735158925, "grad_norm": 0.2100452184677124, "learning_rate": 4.605355165150167e-06, "loss": 0.9364, "step": 54520 }, { "epoch": 0.39472446017647866, "grad_norm": 0.1580246388912201, "learning_rate": 4.6052827784895806e-06, "loss": 0.9394, "step": 54530 }, { "epoch": 0.39479684683706484, "grad_norm": 0.16030821204185486, "learning_rate": 4.605210391828994e-06, "loss": 0.9389, "step": 54540 }, { "epoch": 0.3948692334976511, "grad_norm": 0.17322495579719543, "learning_rate": 4.605138005168408e-06, "loss": 0.9363, "step": 54550 }, { "epoch": 0.39494162015823725, "grad_norm": 0.19092749059200287, "learning_rate": 4.605065618507822e-06, "loss": 0.9458, "step": 54560 }, { "epoch": 0.3950140068188234, "grad_norm": 0.14710497856140137, "learning_rate": 4.604993231847236e-06, "loss": 0.9424, "step": 54570 }, { "epoch": 0.3950863934794096, "grad_norm": 0.17393282055854797, "learning_rate": 4.6049208451866495e-06, "loss": 0.9462, "step": 54580 }, { "epoch": 0.3951587801399958, "grad_norm": 0.1747346818447113, "learning_rate": 4.604848458526063e-06, "loss": 0.9349, "step": 54590 }, { "epoch": 0.395231166800582, "grad_norm": 0.1654556542634964, "learning_rate": 4.604776071865477e-06, "loss": 0.9427, "step": 54600 }, { "epoch": 0.3953035534611682, "grad_norm": 0.1744566261768341, "learning_rate": 4.604703685204891e-06, "loss": 0.9456, "step": 54610 }, { "epoch": 0.39537594012175437, "grad_norm": 0.1695273369550705, "learning_rate": 4.604631298544305e-06, "loss": 0.943, "step": 54620 }, { "epoch": 0.39544832678234054, "grad_norm": 0.17201577126979828, "learning_rate": 4.6045589118837184e-06, "loss": 0.9452, "step": 54630 }, { "epoch": 0.3955207134429267, "grad_norm": 0.18179161846637726, "learning_rate": 4.604486525223132e-06, "loss": 0.93, "step": 54640 }, { "epoch": 0.39559310010351295, "grad_norm": 0.16751891374588013, "learning_rate": 4.6044141385625465e-06, "loss": 0.9283, "step": 54650 }, { "epoch": 0.39566548676409913, "grad_norm": 0.15544885396957397, "learning_rate": 4.60434175190196e-06, "loss": 0.946, "step": 54660 }, { "epoch": 0.3957378734246853, "grad_norm": 0.15887774527072906, "learning_rate": 4.604269365241374e-06, "loss": 0.9441, "step": 54670 }, { "epoch": 0.3958102600852715, "grad_norm": 0.16352955996990204, "learning_rate": 4.604196978580787e-06, "loss": 0.9427, "step": 54680 }, { "epoch": 0.39588264674585766, "grad_norm": 0.1617666482925415, "learning_rate": 4.604124591920202e-06, "loss": 0.9415, "step": 54690 }, { "epoch": 0.39595503340644383, "grad_norm": 0.15895050764083862, "learning_rate": 4.6040522052596154e-06, "loss": 0.953, "step": 54700 }, { "epoch": 0.39602742006703007, "grad_norm": 0.158672496676445, "learning_rate": 4.603979818599029e-06, "loss": 0.9369, "step": 54710 }, { "epoch": 0.39609980672761624, "grad_norm": 0.17853258550167084, "learning_rate": 4.603907431938443e-06, "loss": 0.9429, "step": 54720 }, { "epoch": 0.3961721933882024, "grad_norm": 0.1713215857744217, "learning_rate": 4.603835045277856e-06, "loss": 0.935, "step": 54730 }, { "epoch": 0.3962445800487886, "grad_norm": 0.1543017029762268, "learning_rate": 4.60376265861727e-06, "loss": 0.9297, "step": 54740 }, { "epoch": 0.3963169667093748, "grad_norm": 0.17130827903747559, "learning_rate": 4.603690271956684e-06, "loss": 0.9472, "step": 54750 }, { "epoch": 0.396389353369961, "grad_norm": 0.1728035807609558, "learning_rate": 4.603617885296098e-06, "loss": 0.9448, "step": 54760 }, { "epoch": 0.3964617400305472, "grad_norm": 0.16144831478595734, "learning_rate": 4.603545498635512e-06, "loss": 0.9515, "step": 54770 }, { "epoch": 0.39653412669113336, "grad_norm": 0.15876467525959015, "learning_rate": 4.603473111974925e-06, "loss": 0.945, "step": 54780 }, { "epoch": 0.39660651335171954, "grad_norm": 0.1569388210773468, "learning_rate": 4.603400725314339e-06, "loss": 0.9366, "step": 54790 }, { "epoch": 0.3966789000123057, "grad_norm": 0.1643538773059845, "learning_rate": 4.603328338653753e-06, "loss": 0.9337, "step": 54800 }, { "epoch": 0.39675128667289195, "grad_norm": 0.15085361897945404, "learning_rate": 4.603255951993167e-06, "loss": 0.9506, "step": 54810 }, { "epoch": 0.3968236733334781, "grad_norm": 0.15235082805156708, "learning_rate": 4.6031835653325805e-06, "loss": 0.9261, "step": 54820 }, { "epoch": 0.3968960599940643, "grad_norm": 0.15740101039409637, "learning_rate": 4.603111178671994e-06, "loss": 0.9483, "step": 54830 }, { "epoch": 0.3969684466546505, "grad_norm": 0.17915703356266022, "learning_rate": 4.603038792011409e-06, "loss": 0.9333, "step": 54840 }, { "epoch": 0.39704083331523665, "grad_norm": 0.18708273768424988, "learning_rate": 4.602966405350822e-06, "loss": 0.9457, "step": 54850 }, { "epoch": 0.39711321997582283, "grad_norm": 0.1566181778907776, "learning_rate": 4.602894018690236e-06, "loss": 0.9296, "step": 54860 }, { "epoch": 0.39718560663640906, "grad_norm": 0.20385728776454926, "learning_rate": 4.6028216320296495e-06, "loss": 0.9426, "step": 54870 }, { "epoch": 0.39725799329699524, "grad_norm": 0.15828682482242584, "learning_rate": 4.602749245369064e-06, "loss": 0.9304, "step": 54880 }, { "epoch": 0.3973303799575814, "grad_norm": 0.16250182688236237, "learning_rate": 4.6026768587084775e-06, "loss": 0.9444, "step": 54890 }, { "epoch": 0.3974027666181676, "grad_norm": 0.16645143926143646, "learning_rate": 4.602604472047891e-06, "loss": 0.9462, "step": 54900 }, { "epoch": 0.39747515327875377, "grad_norm": 0.17297451198101044, "learning_rate": 4.602532085387305e-06, "loss": 0.9392, "step": 54910 }, { "epoch": 0.39754753993934, "grad_norm": 0.16319099068641663, "learning_rate": 4.602459698726719e-06, "loss": 0.9384, "step": 54920 }, { "epoch": 0.3976199265999262, "grad_norm": 0.17599353194236755, "learning_rate": 4.602387312066133e-06, "loss": 0.9392, "step": 54930 }, { "epoch": 0.39769231326051235, "grad_norm": 0.16673387587070465, "learning_rate": 4.6023149254055465e-06, "loss": 0.9367, "step": 54940 }, { "epoch": 0.39776469992109853, "grad_norm": 0.17548586428165436, "learning_rate": 4.60224253874496e-06, "loss": 0.9384, "step": 54950 }, { "epoch": 0.3978370865816847, "grad_norm": 0.15889111161231995, "learning_rate": 4.6021701520843745e-06, "loss": 0.9319, "step": 54960 }, { "epoch": 0.39790947324227094, "grad_norm": 0.15126605331897736, "learning_rate": 4.602097765423788e-06, "loss": 0.9389, "step": 54970 }, { "epoch": 0.3979818599028571, "grad_norm": 0.1905619353055954, "learning_rate": 4.602025378763202e-06, "loss": 0.9516, "step": 54980 }, { "epoch": 0.3980542465634433, "grad_norm": 0.18187619745731354, "learning_rate": 4.601952992102615e-06, "loss": 0.9277, "step": 54990 }, { "epoch": 0.39812663322402947, "grad_norm": 0.1634269803762436, "learning_rate": 4.60188060544203e-06, "loss": 0.9381, "step": 55000 }, { "epoch": 0.39819901988461565, "grad_norm": 0.16369114816188812, "learning_rate": 4.6018082187814435e-06, "loss": 0.9403, "step": 55010 }, { "epoch": 0.3982714065452018, "grad_norm": 0.17202910780906677, "learning_rate": 4.601735832120857e-06, "loss": 0.9411, "step": 55020 }, { "epoch": 0.39834379320578805, "grad_norm": 0.17395305633544922, "learning_rate": 4.601663445460271e-06, "loss": 0.9236, "step": 55030 }, { "epoch": 0.39841617986637423, "grad_norm": 0.17423111200332642, "learning_rate": 4.601591058799685e-06, "loss": 0.9404, "step": 55040 }, { "epoch": 0.3984885665269604, "grad_norm": 0.15829895436763763, "learning_rate": 4.601518672139099e-06, "loss": 0.9254, "step": 55050 }, { "epoch": 0.3985609531875466, "grad_norm": 0.17353561520576477, "learning_rate": 4.601446285478512e-06, "loss": 0.9326, "step": 55060 }, { "epoch": 0.39863333984813276, "grad_norm": 0.1960097998380661, "learning_rate": 4.601373898817926e-06, "loss": 0.9582, "step": 55070 }, { "epoch": 0.398705726508719, "grad_norm": 0.15898558497428894, "learning_rate": 4.6013015121573405e-06, "loss": 0.9513, "step": 55080 }, { "epoch": 0.39877811316930517, "grad_norm": 0.17398889362812042, "learning_rate": 4.601229125496754e-06, "loss": 0.9299, "step": 55090 }, { "epoch": 0.39885049982989135, "grad_norm": 0.21108154952526093, "learning_rate": 4.601156738836168e-06, "loss": 0.9514, "step": 55100 }, { "epoch": 0.3989228864904775, "grad_norm": 0.15072329342365265, "learning_rate": 4.601084352175581e-06, "loss": 0.9276, "step": 55110 }, { "epoch": 0.3989952731510637, "grad_norm": 0.18398626148700714, "learning_rate": 4.601011965514996e-06, "loss": 0.9373, "step": 55120 }, { "epoch": 0.39906765981164993, "grad_norm": 0.26923868060112, "learning_rate": 4.600939578854409e-06, "loss": 0.9442, "step": 55130 }, { "epoch": 0.3991400464722361, "grad_norm": 0.16427594423294067, "learning_rate": 4.600867192193823e-06, "loss": 0.9444, "step": 55140 }, { "epoch": 0.3992124331328223, "grad_norm": 0.17052388191223145, "learning_rate": 4.600794805533237e-06, "loss": 0.947, "step": 55150 }, { "epoch": 0.39928481979340846, "grad_norm": 0.18360772728919983, "learning_rate": 4.600722418872651e-06, "loss": 0.9419, "step": 55160 }, { "epoch": 0.39935720645399464, "grad_norm": 0.16440702974796295, "learning_rate": 4.600650032212065e-06, "loss": 0.9214, "step": 55170 }, { "epoch": 0.39942959311458087, "grad_norm": 0.15219788253307343, "learning_rate": 4.600577645551478e-06, "loss": 0.957, "step": 55180 }, { "epoch": 0.39950197977516705, "grad_norm": 0.16597037017345428, "learning_rate": 4.600505258890892e-06, "loss": 0.9424, "step": 55190 }, { "epoch": 0.3995743664357532, "grad_norm": 0.2117663323879242, "learning_rate": 4.600432872230306e-06, "loss": 0.9397, "step": 55200 }, { "epoch": 0.3996467530963394, "grad_norm": 0.16794174909591675, "learning_rate": 4.60036048556972e-06, "loss": 0.9553, "step": 55210 }, { "epoch": 0.3997191397569256, "grad_norm": 0.1689329296350479, "learning_rate": 4.600288098909134e-06, "loss": 0.9436, "step": 55220 }, { "epoch": 0.39979152641751176, "grad_norm": 0.1604575663805008, "learning_rate": 4.600215712248547e-06, "loss": 0.942, "step": 55230 }, { "epoch": 0.399863913078098, "grad_norm": 0.16758602857589722, "learning_rate": 4.600143325587961e-06, "loss": 0.9411, "step": 55240 }, { "epoch": 0.39993629973868416, "grad_norm": 0.167888805270195, "learning_rate": 4.600070938927375e-06, "loss": 0.9467, "step": 55250 }, { "epoch": 0.40000868639927034, "grad_norm": 0.16341939568519592, "learning_rate": 4.599998552266788e-06, "loss": 0.9469, "step": 55260 }, { "epoch": 0.4000810730598565, "grad_norm": 0.15733623504638672, "learning_rate": 4.5999261656062026e-06, "loss": 0.9472, "step": 55270 }, { "epoch": 0.4001534597204427, "grad_norm": 0.15709537267684937, "learning_rate": 4.599853778945616e-06, "loss": 0.9363, "step": 55280 }, { "epoch": 0.4002258463810289, "grad_norm": 0.1718069463968277, "learning_rate": 4.59978139228503e-06, "loss": 0.9339, "step": 55290 }, { "epoch": 0.4002982330416151, "grad_norm": 0.15917843580245972, "learning_rate": 4.599709005624443e-06, "loss": 0.9363, "step": 55300 }, { "epoch": 0.4003706197022013, "grad_norm": 0.15500162541866302, "learning_rate": 4.599636618963858e-06, "loss": 0.9295, "step": 55310 }, { "epoch": 0.40044300636278746, "grad_norm": 0.1756095588207245, "learning_rate": 4.5995642323032715e-06, "loss": 0.9356, "step": 55320 }, { "epoch": 0.40051539302337363, "grad_norm": 0.16309203207492828, "learning_rate": 4.599491845642685e-06, "loss": 0.9378, "step": 55330 }, { "epoch": 0.40058777968395987, "grad_norm": 0.16361068189144135, "learning_rate": 4.599419458982099e-06, "loss": 0.9376, "step": 55340 }, { "epoch": 0.40066016634454604, "grad_norm": 0.16683772206306458, "learning_rate": 4.599347072321513e-06, "loss": 0.9455, "step": 55350 }, { "epoch": 0.4007325530051322, "grad_norm": 0.1727033108472824, "learning_rate": 4.599274685660927e-06, "loss": 0.9312, "step": 55360 }, { "epoch": 0.4008049396657184, "grad_norm": 0.22760114073753357, "learning_rate": 4.5992022990003404e-06, "loss": 0.9331, "step": 55370 }, { "epoch": 0.4008773263263046, "grad_norm": 0.16040877997875214, "learning_rate": 4.599129912339754e-06, "loss": 0.9554, "step": 55380 }, { "epoch": 0.40094971298689075, "grad_norm": 0.17181342840194702, "learning_rate": 4.599057525679168e-06, "loss": 0.9522, "step": 55390 }, { "epoch": 0.401022099647477, "grad_norm": 0.16263987123966217, "learning_rate": 4.598985139018582e-06, "loss": 0.9407, "step": 55400 }, { "epoch": 0.40109448630806316, "grad_norm": 0.16319935023784637, "learning_rate": 4.598912752357996e-06, "loss": 0.9337, "step": 55410 }, { "epoch": 0.40116687296864934, "grad_norm": 0.17521025240421295, "learning_rate": 4.598840365697409e-06, "loss": 0.9386, "step": 55420 }, { "epoch": 0.4012392596292355, "grad_norm": 0.16432663798332214, "learning_rate": 4.598767979036823e-06, "loss": 0.9387, "step": 55430 }, { "epoch": 0.4013116462898217, "grad_norm": 0.16351726651191711, "learning_rate": 4.5986955923762374e-06, "loss": 0.9424, "step": 55440 }, { "epoch": 0.4013840329504079, "grad_norm": 0.15134920179843903, "learning_rate": 4.598623205715651e-06, "loss": 0.9225, "step": 55450 }, { "epoch": 0.4014564196109941, "grad_norm": 0.1916009783744812, "learning_rate": 4.598550819055065e-06, "loss": 0.942, "step": 55460 }, { "epoch": 0.4015288062715803, "grad_norm": 0.1707172840833664, "learning_rate": 4.598478432394478e-06, "loss": 0.9395, "step": 55470 }, { "epoch": 0.40160119293216645, "grad_norm": 0.20359551906585693, "learning_rate": 4.598406045733893e-06, "loss": 0.9398, "step": 55480 }, { "epoch": 0.4016735795927526, "grad_norm": 0.16758215427398682, "learning_rate": 4.598333659073306e-06, "loss": 0.9317, "step": 55490 }, { "epoch": 0.40174596625333886, "grad_norm": 0.17241047322750092, "learning_rate": 4.59826127241272e-06, "loss": 0.9463, "step": 55500 }, { "epoch": 0.40181835291392504, "grad_norm": 0.17256489396095276, "learning_rate": 4.598188885752134e-06, "loss": 0.9383, "step": 55510 }, { "epoch": 0.4018907395745112, "grad_norm": 0.16239801049232483, "learning_rate": 4.598116499091548e-06, "loss": 0.9344, "step": 55520 }, { "epoch": 0.4019631262350974, "grad_norm": 0.15352807939052582, "learning_rate": 4.598044112430962e-06, "loss": 0.9391, "step": 55530 }, { "epoch": 0.40203551289568357, "grad_norm": 0.15835584700107574, "learning_rate": 4.597971725770375e-06, "loss": 0.9441, "step": 55540 }, { "epoch": 0.40210789955626974, "grad_norm": 0.17135757207870483, "learning_rate": 4.597899339109789e-06, "loss": 0.9403, "step": 55550 }, { "epoch": 0.402180286216856, "grad_norm": 0.18612030148506165, "learning_rate": 4.597826952449203e-06, "loss": 0.954, "step": 55560 }, { "epoch": 0.40225267287744215, "grad_norm": 0.16078492999076843, "learning_rate": 4.597754565788617e-06, "loss": 0.947, "step": 55570 }, { "epoch": 0.40232505953802833, "grad_norm": 0.16282187402248383, "learning_rate": 4.597682179128031e-06, "loss": 0.9369, "step": 55580 }, { "epoch": 0.4023974461986145, "grad_norm": 0.18029029667377472, "learning_rate": 4.597609792467444e-06, "loss": 0.9432, "step": 55590 }, { "epoch": 0.4024698328592007, "grad_norm": 0.15934710204601288, "learning_rate": 4.597537405806859e-06, "loss": 0.9534, "step": 55600 }, { "epoch": 0.4025422195197869, "grad_norm": 0.16303454339504242, "learning_rate": 4.597465019146272e-06, "loss": 0.9452, "step": 55610 }, { "epoch": 0.4026146061803731, "grad_norm": 0.16990971565246582, "learning_rate": 4.597392632485686e-06, "loss": 0.9413, "step": 55620 }, { "epoch": 0.40268699284095927, "grad_norm": 0.18538545072078705, "learning_rate": 4.5973202458250995e-06, "loss": 0.9341, "step": 55630 }, { "epoch": 0.40275937950154544, "grad_norm": 0.16913770139217377, "learning_rate": 4.597247859164514e-06, "loss": 0.9415, "step": 55640 }, { "epoch": 0.4028317661621316, "grad_norm": 0.1886037439107895, "learning_rate": 4.597175472503928e-06, "loss": 0.9407, "step": 55650 }, { "epoch": 0.40290415282271785, "grad_norm": 0.1722469925880432, "learning_rate": 4.597103085843341e-06, "loss": 0.945, "step": 55660 }, { "epoch": 0.40297653948330403, "grad_norm": 0.16154325008392334, "learning_rate": 4.597030699182755e-06, "loss": 0.9329, "step": 55670 }, { "epoch": 0.4030489261438902, "grad_norm": 0.16594886779785156, "learning_rate": 4.596958312522169e-06, "loss": 0.946, "step": 55680 }, { "epoch": 0.4031213128044764, "grad_norm": 0.15325994789600372, "learning_rate": 4.596885925861583e-06, "loss": 0.9348, "step": 55690 }, { "epoch": 0.40319369946506256, "grad_norm": 0.1615227907896042, "learning_rate": 4.5968135392009965e-06, "loss": 0.9294, "step": 55700 }, { "epoch": 0.4032660861256488, "grad_norm": 0.1949525773525238, "learning_rate": 4.59674115254041e-06, "loss": 0.9388, "step": 55710 }, { "epoch": 0.40333847278623497, "grad_norm": 0.1611870676279068, "learning_rate": 4.596668765879825e-06, "loss": 0.9464, "step": 55720 }, { "epoch": 0.40341085944682115, "grad_norm": 0.16078144311904907, "learning_rate": 4.596596379219238e-06, "loss": 0.9239, "step": 55730 }, { "epoch": 0.4034832461074073, "grad_norm": 0.16597573459148407, "learning_rate": 4.596523992558652e-06, "loss": 0.9325, "step": 55740 }, { "epoch": 0.4035556327679935, "grad_norm": 0.16398844122886658, "learning_rate": 4.5964516058980655e-06, "loss": 0.942, "step": 55750 }, { "epoch": 0.4036280194285797, "grad_norm": 0.16814014315605164, "learning_rate": 4.59637921923748e-06, "loss": 0.9423, "step": 55760 }, { "epoch": 0.4037004060891659, "grad_norm": 0.1564667820930481, "learning_rate": 4.5963068325768935e-06, "loss": 0.939, "step": 55770 }, { "epoch": 0.4037727927497521, "grad_norm": 0.16583354771137238, "learning_rate": 4.596234445916307e-06, "loss": 0.9374, "step": 55780 }, { "epoch": 0.40384517941033826, "grad_norm": 0.1725219190120697, "learning_rate": 4.596162059255721e-06, "loss": 0.9323, "step": 55790 }, { "epoch": 0.40391756607092444, "grad_norm": 0.15034452080726624, "learning_rate": 4.596089672595134e-06, "loss": 0.9276, "step": 55800 }, { "epoch": 0.4039899527315106, "grad_norm": 0.17058463394641876, "learning_rate": 4.596017285934548e-06, "loss": 0.9497, "step": 55810 }, { "epoch": 0.40406233939209685, "grad_norm": 0.15496298670768738, "learning_rate": 4.595944899273962e-06, "loss": 0.9416, "step": 55820 }, { "epoch": 0.404134726052683, "grad_norm": 0.16956356167793274, "learning_rate": 4.595872512613376e-06, "loss": 0.9354, "step": 55830 }, { "epoch": 0.4042071127132692, "grad_norm": 0.15078359842300415, "learning_rate": 4.59580012595279e-06, "loss": 0.9287, "step": 55840 }, { "epoch": 0.4042794993738554, "grad_norm": 0.1605517864227295, "learning_rate": 4.595727739292203e-06, "loss": 0.9434, "step": 55850 }, { "epoch": 0.40435188603444155, "grad_norm": 0.16339904069900513, "learning_rate": 4.595655352631617e-06, "loss": 0.9406, "step": 55860 }, { "epoch": 0.4044242726950278, "grad_norm": 0.18486179411411285, "learning_rate": 4.595582965971031e-06, "loss": 0.9444, "step": 55870 }, { "epoch": 0.40449665935561396, "grad_norm": 0.16515012085437775, "learning_rate": 4.595510579310445e-06, "loss": 0.9375, "step": 55880 }, { "epoch": 0.40456904601620014, "grad_norm": 0.16577614843845367, "learning_rate": 4.595438192649859e-06, "loss": 0.9506, "step": 55890 }, { "epoch": 0.4046414326767863, "grad_norm": 0.16598597168922424, "learning_rate": 4.595365805989272e-06, "loss": 0.9229, "step": 55900 }, { "epoch": 0.4047138193373725, "grad_norm": 0.1684425175189972, "learning_rate": 4.595293419328687e-06, "loss": 0.9365, "step": 55910 }, { "epoch": 0.40478620599795867, "grad_norm": 0.15921281278133392, "learning_rate": 4.5952210326681e-06, "loss": 0.9394, "step": 55920 }, { "epoch": 0.4048585926585449, "grad_norm": 0.16952553391456604, "learning_rate": 4.595148646007514e-06, "loss": 0.9461, "step": 55930 }, { "epoch": 0.4049309793191311, "grad_norm": 0.1839471012353897, "learning_rate": 4.5950762593469276e-06, "loss": 0.9523, "step": 55940 }, { "epoch": 0.40500336597971726, "grad_norm": 0.16442671418190002, "learning_rate": 4.595003872686342e-06, "loss": 0.9517, "step": 55950 }, { "epoch": 0.40507575264030343, "grad_norm": 0.1691816747188568, "learning_rate": 4.594931486025756e-06, "loss": 0.9393, "step": 55960 }, { "epoch": 0.4051481393008896, "grad_norm": 0.17636831104755402, "learning_rate": 4.594859099365169e-06, "loss": 0.9443, "step": 55970 }, { "epoch": 0.40522052596147584, "grad_norm": 0.16882002353668213, "learning_rate": 4.594786712704583e-06, "loss": 0.9407, "step": 55980 }, { "epoch": 0.405292912622062, "grad_norm": 0.16932517290115356, "learning_rate": 4.594714326043997e-06, "loss": 0.9443, "step": 55990 }, { "epoch": 0.4053652992826482, "grad_norm": 0.17036424577236176, "learning_rate": 4.594641939383411e-06, "loss": 0.9496, "step": 56000 }, { "epoch": 0.40543768594323437, "grad_norm": 0.16463550925254822, "learning_rate": 4.5945695527228246e-06, "loss": 0.9404, "step": 56010 }, { "epoch": 0.40551007260382055, "grad_norm": 0.16589607298374176, "learning_rate": 4.594497166062238e-06, "loss": 0.9475, "step": 56020 }, { "epoch": 0.4055824592644068, "grad_norm": 0.18529045581817627, "learning_rate": 4.594424779401652e-06, "loss": 0.9464, "step": 56030 }, { "epoch": 0.40565484592499296, "grad_norm": 0.15794023871421814, "learning_rate": 4.594352392741066e-06, "loss": 0.9315, "step": 56040 }, { "epoch": 0.40572723258557913, "grad_norm": 0.16272369027137756, "learning_rate": 4.59428000608048e-06, "loss": 0.9486, "step": 56050 }, { "epoch": 0.4057996192461653, "grad_norm": 0.16422978043556213, "learning_rate": 4.5942076194198935e-06, "loss": 0.9569, "step": 56060 }, { "epoch": 0.4058720059067515, "grad_norm": 0.15604273974895477, "learning_rate": 4.594135232759307e-06, "loss": 0.9499, "step": 56070 }, { "epoch": 0.40594439256733766, "grad_norm": 0.17885945737361908, "learning_rate": 4.5940628460987216e-06, "loss": 0.9409, "step": 56080 }, { "epoch": 0.4060167792279239, "grad_norm": 0.19414786994457245, "learning_rate": 4.593990459438135e-06, "loss": 0.9399, "step": 56090 }, { "epoch": 0.4060891658885101, "grad_norm": 0.1650743931531906, "learning_rate": 4.593918072777549e-06, "loss": 0.9367, "step": 56100 }, { "epoch": 0.40616155254909625, "grad_norm": 0.1537282019853592, "learning_rate": 4.5938456861169624e-06, "loss": 0.9325, "step": 56110 }, { "epoch": 0.4062339392096824, "grad_norm": 0.16103175282478333, "learning_rate": 4.593773299456377e-06, "loss": 0.9262, "step": 56120 }, { "epoch": 0.4063063258702686, "grad_norm": 0.1802387833595276, "learning_rate": 4.5937009127957905e-06, "loss": 0.9475, "step": 56130 }, { "epoch": 0.40637871253085484, "grad_norm": 0.16064372658729553, "learning_rate": 4.593628526135204e-06, "loss": 0.9182, "step": 56140 }, { "epoch": 0.406451099191441, "grad_norm": 0.15981996059417725, "learning_rate": 4.593556139474618e-06, "loss": 0.934, "step": 56150 }, { "epoch": 0.4065234858520272, "grad_norm": 0.153685063123703, "learning_rate": 4.593483752814032e-06, "loss": 0.9424, "step": 56160 }, { "epoch": 0.40659587251261337, "grad_norm": 0.1737099438905716, "learning_rate": 4.593411366153446e-06, "loss": 0.9264, "step": 56170 }, { "epoch": 0.40666825917319954, "grad_norm": 0.15085779130458832, "learning_rate": 4.5933389794928594e-06, "loss": 0.9393, "step": 56180 }, { "epoch": 0.4067406458337858, "grad_norm": 0.16470050811767578, "learning_rate": 4.593266592832273e-06, "loss": 0.9372, "step": 56190 }, { "epoch": 0.40681303249437195, "grad_norm": 0.17371892929077148, "learning_rate": 4.5931942061716875e-06, "loss": 0.9426, "step": 56200 }, { "epoch": 0.40688541915495813, "grad_norm": 0.1519443243741989, "learning_rate": 4.593121819511101e-06, "loss": 0.9331, "step": 56210 }, { "epoch": 0.4069578058155443, "grad_norm": 0.1687236726284027, "learning_rate": 4.593049432850515e-06, "loss": 0.9469, "step": 56220 }, { "epoch": 0.4070301924761305, "grad_norm": 0.1575406938791275, "learning_rate": 4.592977046189928e-06, "loss": 0.938, "step": 56230 }, { "epoch": 0.4071025791367167, "grad_norm": 0.15486252307891846, "learning_rate": 4.592904659529343e-06, "loss": 0.9325, "step": 56240 }, { "epoch": 0.4071749657973029, "grad_norm": 0.16617028415203094, "learning_rate": 4.5928322728687564e-06, "loss": 0.9382, "step": 56250 }, { "epoch": 0.40724735245788907, "grad_norm": 0.16430149972438812, "learning_rate": 4.59275988620817e-06, "loss": 0.9316, "step": 56260 }, { "epoch": 0.40731973911847524, "grad_norm": 0.17749901115894318, "learning_rate": 4.592687499547584e-06, "loss": 0.9332, "step": 56270 }, { "epoch": 0.4073921257790614, "grad_norm": 0.21616603434085846, "learning_rate": 4.592615112886998e-06, "loss": 0.9409, "step": 56280 }, { "epoch": 0.4074645124396476, "grad_norm": 0.1658083200454712, "learning_rate": 4.592542726226412e-06, "loss": 0.9445, "step": 56290 }, { "epoch": 0.40753689910023383, "grad_norm": 0.15681838989257812, "learning_rate": 4.592470339565825e-06, "loss": 0.9343, "step": 56300 }, { "epoch": 0.40760928576082, "grad_norm": 0.15368938446044922, "learning_rate": 4.592397952905239e-06, "loss": 0.9424, "step": 56310 }, { "epoch": 0.4076816724214062, "grad_norm": 0.24562257528305054, "learning_rate": 4.5923255662446534e-06, "loss": 0.9345, "step": 56320 }, { "epoch": 0.40775405908199236, "grad_norm": 0.1594623327255249, "learning_rate": 4.592253179584066e-06, "loss": 0.9462, "step": 56330 }, { "epoch": 0.40782644574257854, "grad_norm": 0.1747690588235855, "learning_rate": 4.59218079292348e-06, "loss": 0.9347, "step": 56340 }, { "epoch": 0.40789883240316477, "grad_norm": 0.1642814725637436, "learning_rate": 4.592108406262894e-06, "loss": 0.9402, "step": 56350 }, { "epoch": 0.40797121906375095, "grad_norm": 0.15779854357242584, "learning_rate": 4.592036019602308e-06, "loss": 0.9327, "step": 56360 }, { "epoch": 0.4080436057243371, "grad_norm": 0.15080593526363373, "learning_rate": 4.5919636329417215e-06, "loss": 0.9267, "step": 56370 }, { "epoch": 0.4081159923849233, "grad_norm": 0.1907733529806137, "learning_rate": 4.591891246281135e-06, "loss": 0.9358, "step": 56380 }, { "epoch": 0.4081883790455095, "grad_norm": 0.18039512634277344, "learning_rate": 4.59181885962055e-06, "loss": 0.9398, "step": 56390 }, { "epoch": 0.4082607657060957, "grad_norm": 0.17534703016281128, "learning_rate": 4.591746472959963e-06, "loss": 0.94, "step": 56400 }, { "epoch": 0.4083331523666819, "grad_norm": 0.16364185512065887, "learning_rate": 4.591674086299377e-06, "loss": 0.9374, "step": 56410 }, { "epoch": 0.40840553902726806, "grad_norm": 0.16373589634895325, "learning_rate": 4.5916016996387905e-06, "loss": 0.9334, "step": 56420 }, { "epoch": 0.40847792568785424, "grad_norm": 0.249722421169281, "learning_rate": 4.591529312978205e-06, "loss": 0.9358, "step": 56430 }, { "epoch": 0.4085503123484404, "grad_norm": 0.16310162842273712, "learning_rate": 4.5914569263176185e-06, "loss": 0.9516, "step": 56440 }, { "epoch": 0.4086226990090266, "grad_norm": 0.15711535513401031, "learning_rate": 4.591384539657032e-06, "loss": 0.9374, "step": 56450 }, { "epoch": 0.4086950856696128, "grad_norm": 0.16699931025505066, "learning_rate": 4.591312152996446e-06, "loss": 0.9478, "step": 56460 }, { "epoch": 0.408767472330199, "grad_norm": 0.16310204565525055, "learning_rate": 4.59123976633586e-06, "loss": 0.9335, "step": 56470 }, { "epoch": 0.4088398589907852, "grad_norm": 0.1538310945034027, "learning_rate": 4.591167379675274e-06, "loss": 0.9424, "step": 56480 }, { "epoch": 0.40891224565137135, "grad_norm": 0.1658681482076645, "learning_rate": 4.5910949930146875e-06, "loss": 0.9302, "step": 56490 }, { "epoch": 0.40898463231195753, "grad_norm": 0.18781772255897522, "learning_rate": 4.591022606354101e-06, "loss": 0.9369, "step": 56500 }, { "epoch": 0.40905701897254376, "grad_norm": 0.17703670263290405, "learning_rate": 4.5909502196935155e-06, "loss": 0.9407, "step": 56510 }, { "epoch": 0.40912940563312994, "grad_norm": 0.16049979627132416, "learning_rate": 4.590877833032929e-06, "loss": 0.9452, "step": 56520 }, { "epoch": 0.4092017922937161, "grad_norm": 0.16507183015346527, "learning_rate": 4.590805446372343e-06, "loss": 0.9304, "step": 56530 }, { "epoch": 0.4092741789543023, "grad_norm": 0.17175514996051788, "learning_rate": 4.590733059711756e-06, "loss": 0.939, "step": 56540 }, { "epoch": 0.40934656561488847, "grad_norm": 0.15628288686275482, "learning_rate": 4.590660673051171e-06, "loss": 0.9413, "step": 56550 }, { "epoch": 0.4094189522754747, "grad_norm": 0.17340143024921417, "learning_rate": 4.5905882863905845e-06, "loss": 0.9347, "step": 56560 }, { "epoch": 0.4094913389360609, "grad_norm": 0.17264612019062042, "learning_rate": 4.590515899729998e-06, "loss": 0.9275, "step": 56570 }, { "epoch": 0.40956372559664705, "grad_norm": 0.1592729389667511, "learning_rate": 4.590443513069412e-06, "loss": 0.936, "step": 56580 }, { "epoch": 0.40963611225723323, "grad_norm": 0.17017214000225067, "learning_rate": 4.590371126408826e-06, "loss": 0.9455, "step": 56590 }, { "epoch": 0.4097084989178194, "grad_norm": 0.25028762221336365, "learning_rate": 4.59029873974824e-06, "loss": 0.9368, "step": 56600 }, { "epoch": 0.4097808855784056, "grad_norm": 0.1710430234670639, "learning_rate": 4.590226353087653e-06, "loss": 0.932, "step": 56610 }, { "epoch": 0.4098532722389918, "grad_norm": 0.18898555636405945, "learning_rate": 4.590153966427067e-06, "loss": 0.9284, "step": 56620 }, { "epoch": 0.409925658899578, "grad_norm": 0.15929999947547913, "learning_rate": 4.590081579766481e-06, "loss": 0.9334, "step": 56630 }, { "epoch": 0.40999804556016417, "grad_norm": 0.18224512040615082, "learning_rate": 4.590009193105895e-06, "loss": 0.9523, "step": 56640 }, { "epoch": 0.41007043222075035, "grad_norm": 0.16598254442214966, "learning_rate": 4.589936806445309e-06, "loss": 0.9455, "step": 56650 }, { "epoch": 0.4101428188813365, "grad_norm": 0.1503967046737671, "learning_rate": 4.589864419784722e-06, "loss": 0.9469, "step": 56660 }, { "epoch": 0.41021520554192276, "grad_norm": 0.17737853527069092, "learning_rate": 4.589792033124136e-06, "loss": 0.946, "step": 56670 }, { "epoch": 0.41028759220250893, "grad_norm": 0.16925589740276337, "learning_rate": 4.58971964646355e-06, "loss": 0.9484, "step": 56680 }, { "epoch": 0.4103599788630951, "grad_norm": 0.15373647212982178, "learning_rate": 4.589647259802964e-06, "loss": 0.9357, "step": 56690 }, { "epoch": 0.4104323655236813, "grad_norm": 0.1628134548664093, "learning_rate": 4.589574873142378e-06, "loss": 0.9328, "step": 56700 }, { "epoch": 0.41050475218426746, "grad_norm": 0.1642216444015503, "learning_rate": 4.589502486481791e-06, "loss": 0.9391, "step": 56710 }, { "epoch": 0.4105771388448537, "grad_norm": 0.18736900389194489, "learning_rate": 4.589430099821206e-06, "loss": 0.9277, "step": 56720 }, { "epoch": 0.41064952550543987, "grad_norm": 0.16991931200027466, "learning_rate": 4.589357713160619e-06, "loss": 0.9257, "step": 56730 }, { "epoch": 0.41072191216602605, "grad_norm": 0.1831955909729004, "learning_rate": 4.589285326500033e-06, "loss": 0.9251, "step": 56740 }, { "epoch": 0.4107942988266122, "grad_norm": 0.16035877168178558, "learning_rate": 4.5892129398394466e-06, "loss": 0.9424, "step": 56750 }, { "epoch": 0.4108666854871984, "grad_norm": 0.16036030650138855, "learning_rate": 4.589140553178861e-06, "loss": 0.9377, "step": 56760 }, { "epoch": 0.4109390721477846, "grad_norm": 0.1828073412179947, "learning_rate": 4.589068166518275e-06, "loss": 0.9371, "step": 56770 }, { "epoch": 0.4110114588083708, "grad_norm": 0.16397011280059814, "learning_rate": 4.588995779857688e-06, "loss": 0.9394, "step": 56780 }, { "epoch": 0.411083845468957, "grad_norm": 0.14961424469947815, "learning_rate": 4.588923393197102e-06, "loss": 0.9346, "step": 56790 }, { "epoch": 0.41115623212954316, "grad_norm": 0.16437679529190063, "learning_rate": 4.588851006536516e-06, "loss": 0.9223, "step": 56800 }, { "epoch": 0.41122861879012934, "grad_norm": 0.15859009325504303, "learning_rate": 4.58877861987593e-06, "loss": 0.9479, "step": 56810 }, { "epoch": 0.4113010054507155, "grad_norm": 0.15533125400543213, "learning_rate": 4.5887062332153436e-06, "loss": 0.9188, "step": 56820 }, { "epoch": 0.41137339211130175, "grad_norm": 0.16156156361103058, "learning_rate": 4.588633846554757e-06, "loss": 0.9446, "step": 56830 }, { "epoch": 0.4114457787718879, "grad_norm": 0.169430211186409, "learning_rate": 4.588561459894172e-06, "loss": 0.9298, "step": 56840 }, { "epoch": 0.4115181654324741, "grad_norm": 0.18322457373142242, "learning_rate": 4.588489073233585e-06, "loss": 0.9431, "step": 56850 }, { "epoch": 0.4115905520930603, "grad_norm": 0.17053550481796265, "learning_rate": 4.588416686572998e-06, "loss": 0.9349, "step": 56860 }, { "epoch": 0.41166293875364646, "grad_norm": 0.18169362843036652, "learning_rate": 4.5883442999124125e-06, "loss": 0.9257, "step": 56870 }, { "epoch": 0.4117353254142327, "grad_norm": 0.2942195236682892, "learning_rate": 4.588271913251826e-06, "loss": 0.9326, "step": 56880 }, { "epoch": 0.41180771207481887, "grad_norm": 0.21721670031547546, "learning_rate": 4.58819952659124e-06, "loss": 0.9412, "step": 56890 }, { "epoch": 0.41188009873540504, "grad_norm": 0.1641770750284195, "learning_rate": 4.588127139930653e-06, "loss": 0.9299, "step": 56900 }, { "epoch": 0.4119524853959912, "grad_norm": 0.17069405317306519, "learning_rate": 4.588054753270068e-06, "loss": 0.9389, "step": 56910 }, { "epoch": 0.4120248720565774, "grad_norm": 0.16552869975566864, "learning_rate": 4.5879823666094814e-06, "loss": 0.943, "step": 56920 }, { "epoch": 0.41209725871716363, "grad_norm": 0.2061159610748291, "learning_rate": 4.587909979948895e-06, "loss": 0.9433, "step": 56930 }, { "epoch": 0.4121696453777498, "grad_norm": 0.16571536660194397, "learning_rate": 4.587837593288309e-06, "loss": 0.9441, "step": 56940 }, { "epoch": 0.412242032038336, "grad_norm": 0.15703128278255463, "learning_rate": 4.587765206627723e-06, "loss": 0.94, "step": 56950 }, { "epoch": 0.41231441869892216, "grad_norm": 0.16326795518398285, "learning_rate": 4.587692819967137e-06, "loss": 0.9363, "step": 56960 }, { "epoch": 0.41238680535950833, "grad_norm": 0.16321924328804016, "learning_rate": 4.58762043330655e-06, "loss": 0.9219, "step": 56970 }, { "epoch": 0.4124591920200945, "grad_norm": 0.16104656457901, "learning_rate": 4.587548046645964e-06, "loss": 0.9407, "step": 56980 }, { "epoch": 0.41253157868068074, "grad_norm": 0.16342920064926147, "learning_rate": 4.5874756599853784e-06, "loss": 0.9396, "step": 56990 }, { "epoch": 0.4126039653412669, "grad_norm": 0.18352803587913513, "learning_rate": 4.587403273324792e-06, "loss": 0.9289, "step": 57000 }, { "epoch": 0.4126763520018531, "grad_norm": 0.14929810166358948, "learning_rate": 4.587330886664206e-06, "loss": 0.9403, "step": 57010 }, { "epoch": 0.4127487386624393, "grad_norm": 0.15996743738651276, "learning_rate": 4.587258500003619e-06, "loss": 0.9487, "step": 57020 }, { "epoch": 0.41282112532302545, "grad_norm": 0.15918391942977905, "learning_rate": 4.587186113343034e-06, "loss": 0.9334, "step": 57030 }, { "epoch": 0.4128935119836117, "grad_norm": 0.16165630519390106, "learning_rate": 4.587113726682447e-06, "loss": 0.9335, "step": 57040 }, { "epoch": 0.41296589864419786, "grad_norm": 0.16731780767440796, "learning_rate": 4.587041340021861e-06, "loss": 0.9401, "step": 57050 }, { "epoch": 0.41303828530478404, "grad_norm": 0.21615374088287354, "learning_rate": 4.586968953361275e-06, "loss": 0.9381, "step": 57060 }, { "epoch": 0.4131106719653702, "grad_norm": 0.17595277726650238, "learning_rate": 4.586896566700689e-06, "loss": 0.9435, "step": 57070 }, { "epoch": 0.4131830586259564, "grad_norm": 0.2352835237979889, "learning_rate": 4.586824180040103e-06, "loss": 0.9311, "step": 57080 }, { "epoch": 0.4132554452865426, "grad_norm": 0.15337133407592773, "learning_rate": 4.586751793379516e-06, "loss": 0.9489, "step": 57090 }, { "epoch": 0.4133278319471288, "grad_norm": 0.18751218914985657, "learning_rate": 4.58667940671893e-06, "loss": 0.9385, "step": 57100 }, { "epoch": 0.413400218607715, "grad_norm": 0.3108257055282593, "learning_rate": 4.586607020058344e-06, "loss": 0.9399, "step": 57110 }, { "epoch": 0.41347260526830115, "grad_norm": 0.15695133805274963, "learning_rate": 4.586534633397758e-06, "loss": 0.931, "step": 57120 }, { "epoch": 0.41354499192888733, "grad_norm": 0.1587478369474411, "learning_rate": 4.586462246737172e-06, "loss": 0.9228, "step": 57130 }, { "epoch": 0.4136173785894735, "grad_norm": 0.17306017875671387, "learning_rate": 4.586389860076585e-06, "loss": 0.9294, "step": 57140 }, { "epoch": 0.41368976525005974, "grad_norm": 0.1602945774793625, "learning_rate": 4.586317473416e-06, "loss": 0.9387, "step": 57150 }, { "epoch": 0.4137621519106459, "grad_norm": 0.17871308326721191, "learning_rate": 4.586245086755413e-06, "loss": 0.9349, "step": 57160 }, { "epoch": 0.4138345385712321, "grad_norm": 0.18266141414642334, "learning_rate": 4.586172700094827e-06, "loss": 0.9463, "step": 57170 }, { "epoch": 0.41390692523181827, "grad_norm": 0.15963898599147797, "learning_rate": 4.5861003134342405e-06, "loss": 0.9383, "step": 57180 }, { "epoch": 0.41397931189240444, "grad_norm": 0.18138666450977325, "learning_rate": 4.586027926773655e-06, "loss": 0.9332, "step": 57190 }, { "epoch": 0.4140516985529907, "grad_norm": 0.16729888319969177, "learning_rate": 4.585955540113069e-06, "loss": 0.9286, "step": 57200 }, { "epoch": 0.41412408521357685, "grad_norm": 0.17177051305770874, "learning_rate": 4.585883153452482e-06, "loss": 0.9317, "step": 57210 }, { "epoch": 0.41419647187416303, "grad_norm": 0.17003445327281952, "learning_rate": 4.585810766791896e-06, "loss": 0.9415, "step": 57220 }, { "epoch": 0.4142688585347492, "grad_norm": 0.3177216649055481, "learning_rate": 4.58573838013131e-06, "loss": 0.9546, "step": 57230 }, { "epoch": 0.4143412451953354, "grad_norm": 0.15487100183963776, "learning_rate": 4.585665993470724e-06, "loss": 0.9362, "step": 57240 }, { "epoch": 0.4144136318559216, "grad_norm": 0.158743754029274, "learning_rate": 4.5855936068101375e-06, "loss": 0.941, "step": 57250 }, { "epoch": 0.4144860185165078, "grad_norm": 0.16190417110919952, "learning_rate": 4.585521220149551e-06, "loss": 0.9304, "step": 57260 }, { "epoch": 0.41455840517709397, "grad_norm": 0.16800768673419952, "learning_rate": 4.585448833488965e-06, "loss": 0.9331, "step": 57270 }, { "epoch": 0.41463079183768015, "grad_norm": 0.15928290784358978, "learning_rate": 4.585376446828379e-06, "loss": 0.9298, "step": 57280 }, { "epoch": 0.4147031784982663, "grad_norm": 0.17402148246765137, "learning_rate": 4.585304060167793e-06, "loss": 0.9381, "step": 57290 }, { "epoch": 0.4147755651588525, "grad_norm": 0.16189469397068024, "learning_rate": 4.5852316735072065e-06, "loss": 0.9407, "step": 57300 }, { "epoch": 0.41484795181943873, "grad_norm": 0.16404691338539124, "learning_rate": 4.58515928684662e-06, "loss": 0.9396, "step": 57310 }, { "epoch": 0.4149203384800249, "grad_norm": 0.15537384152412415, "learning_rate": 4.5850869001860345e-06, "loss": 0.9291, "step": 57320 }, { "epoch": 0.4149927251406111, "grad_norm": 0.16837313771247864, "learning_rate": 4.585014513525448e-06, "loss": 0.9345, "step": 57330 }, { "epoch": 0.41506511180119726, "grad_norm": 0.17618413269519806, "learning_rate": 4.584942126864862e-06, "loss": 0.9325, "step": 57340 }, { "epoch": 0.41513749846178344, "grad_norm": 0.16522805392742157, "learning_rate": 4.584869740204275e-06, "loss": 0.9328, "step": 57350 }, { "epoch": 0.41520988512236967, "grad_norm": 0.18333765864372253, "learning_rate": 4.58479735354369e-06, "loss": 0.9246, "step": 57360 }, { "epoch": 0.41528227178295585, "grad_norm": 0.18241846561431885, "learning_rate": 4.5847249668831035e-06, "loss": 0.9327, "step": 57370 }, { "epoch": 0.415354658443542, "grad_norm": 0.1651061475276947, "learning_rate": 4.584652580222517e-06, "loss": 0.9293, "step": 57380 }, { "epoch": 0.4154270451041282, "grad_norm": 0.14222125709056854, "learning_rate": 4.584580193561931e-06, "loss": 0.9349, "step": 57390 }, { "epoch": 0.4154994317647144, "grad_norm": 0.17054183781147003, "learning_rate": 4.584507806901344e-06, "loss": 0.9352, "step": 57400 }, { "epoch": 0.4155718184253006, "grad_norm": 0.16434767842292786, "learning_rate": 4.584435420240758e-06, "loss": 0.9448, "step": 57410 }, { "epoch": 0.4156442050858868, "grad_norm": 0.15329429507255554, "learning_rate": 4.584363033580172e-06, "loss": 0.9352, "step": 57420 }, { "epoch": 0.41571659174647296, "grad_norm": 0.1638394594192505, "learning_rate": 4.584290646919586e-06, "loss": 0.927, "step": 57430 }, { "epoch": 0.41578897840705914, "grad_norm": 0.1653471738100052, "learning_rate": 4.584218260259e-06, "loss": 0.9451, "step": 57440 }, { "epoch": 0.4158613650676453, "grad_norm": 0.1822325438261032, "learning_rate": 4.584145873598413e-06, "loss": 0.9372, "step": 57450 }, { "epoch": 0.41593375172823155, "grad_norm": 0.16440334916114807, "learning_rate": 4.584073486937827e-06, "loss": 0.9434, "step": 57460 }, { "epoch": 0.4160061383888177, "grad_norm": 0.1566876322031021, "learning_rate": 4.584001100277241e-06, "loss": 0.9454, "step": 57470 }, { "epoch": 0.4160785250494039, "grad_norm": 0.15633165836334229, "learning_rate": 4.583928713616655e-06, "loss": 0.9438, "step": 57480 }, { "epoch": 0.4161509117099901, "grad_norm": 0.1904531866312027, "learning_rate": 4.5838563269560686e-06, "loss": 0.9433, "step": 57490 }, { "epoch": 0.41622329837057626, "grad_norm": 0.15552623569965363, "learning_rate": 4.583783940295482e-06, "loss": 0.9351, "step": 57500 }, { "epoch": 0.41629568503116243, "grad_norm": 0.2083786427974701, "learning_rate": 4.583711553634897e-06, "loss": 0.9324, "step": 57510 }, { "epoch": 0.41636807169174866, "grad_norm": 0.16016191244125366, "learning_rate": 4.58363916697431e-06, "loss": 0.9311, "step": 57520 }, { "epoch": 0.41644045835233484, "grad_norm": 0.1558094024658203, "learning_rate": 4.583566780313724e-06, "loss": 0.9262, "step": 57530 }, { "epoch": 0.416512845012921, "grad_norm": 0.16131691634655, "learning_rate": 4.5834943936531375e-06, "loss": 0.9343, "step": 57540 }, { "epoch": 0.4165852316735072, "grad_norm": 0.1501682698726654, "learning_rate": 4.583422006992552e-06, "loss": 0.944, "step": 57550 }, { "epoch": 0.41665761833409337, "grad_norm": 0.18568944931030273, "learning_rate": 4.5833496203319656e-06, "loss": 0.9248, "step": 57560 }, { "epoch": 0.4167300049946796, "grad_norm": 0.16975927352905273, "learning_rate": 4.583277233671379e-06, "loss": 0.9348, "step": 57570 }, { "epoch": 0.4168023916552658, "grad_norm": 0.15231750905513763, "learning_rate": 4.583204847010793e-06, "loss": 0.9445, "step": 57580 }, { "epoch": 0.41687477831585196, "grad_norm": 0.2114260494709015, "learning_rate": 4.583132460350207e-06, "loss": 0.9306, "step": 57590 }, { "epoch": 0.41694716497643813, "grad_norm": 0.17215648293495178, "learning_rate": 4.583060073689621e-06, "loss": 0.9206, "step": 57600 }, { "epoch": 0.4170195516370243, "grad_norm": 0.16319842636585236, "learning_rate": 4.5829876870290345e-06, "loss": 0.9321, "step": 57610 }, { "epoch": 0.41709193829761054, "grad_norm": 0.1795549988746643, "learning_rate": 4.582915300368448e-06, "loss": 0.9346, "step": 57620 }, { "epoch": 0.4171643249581967, "grad_norm": 0.18302929401397705, "learning_rate": 4.582842913707863e-06, "loss": 0.9413, "step": 57630 }, { "epoch": 0.4172367116187829, "grad_norm": 0.16946756839752197, "learning_rate": 4.582770527047276e-06, "loss": 0.9397, "step": 57640 }, { "epoch": 0.4173090982793691, "grad_norm": 0.16745789349079132, "learning_rate": 4.58269814038669e-06, "loss": 0.9381, "step": 57650 }, { "epoch": 0.41738148493995525, "grad_norm": 0.15687045454978943, "learning_rate": 4.5826257537261034e-06, "loss": 0.9341, "step": 57660 }, { "epoch": 0.4174538716005414, "grad_norm": 0.1755257248878479, "learning_rate": 4.582553367065518e-06, "loss": 0.9442, "step": 57670 }, { "epoch": 0.41752625826112766, "grad_norm": 0.1668943166732788, "learning_rate": 4.5824809804049315e-06, "loss": 0.9397, "step": 57680 }, { "epoch": 0.41759864492171384, "grad_norm": 0.16578532755374908, "learning_rate": 4.582408593744345e-06, "loss": 0.9375, "step": 57690 }, { "epoch": 0.4176710315823, "grad_norm": 0.15865838527679443, "learning_rate": 4.582336207083759e-06, "loss": 0.9431, "step": 57700 }, { "epoch": 0.4177434182428862, "grad_norm": 0.1591697633266449, "learning_rate": 4.582263820423173e-06, "loss": 0.9313, "step": 57710 }, { "epoch": 0.41781580490347237, "grad_norm": 0.23013950884342194, "learning_rate": 4.582191433762587e-06, "loss": 0.9357, "step": 57720 }, { "epoch": 0.4178881915640586, "grad_norm": 0.1570298671722412, "learning_rate": 4.5821190471020004e-06, "loss": 0.9206, "step": 57730 }, { "epoch": 0.4179605782246448, "grad_norm": 0.16404032707214355, "learning_rate": 4.582046660441414e-06, "loss": 0.9465, "step": 57740 }, { "epoch": 0.41803296488523095, "grad_norm": 0.16130004823207855, "learning_rate": 4.5819742737808285e-06, "loss": 0.9389, "step": 57750 }, { "epoch": 0.4181053515458171, "grad_norm": 0.1546400487422943, "learning_rate": 4.581901887120242e-06, "loss": 0.9361, "step": 57760 }, { "epoch": 0.4181777382064033, "grad_norm": 0.2353212833404541, "learning_rate": 4.581829500459656e-06, "loss": 0.9321, "step": 57770 }, { "epoch": 0.41825012486698954, "grad_norm": 0.16489247977733612, "learning_rate": 4.581757113799069e-06, "loss": 0.9414, "step": 57780 }, { "epoch": 0.4183225115275757, "grad_norm": 0.17833788692951202, "learning_rate": 4.581684727138484e-06, "loss": 0.9375, "step": 57790 }, { "epoch": 0.4183948981881619, "grad_norm": 0.16194747388362885, "learning_rate": 4.5816123404778974e-06, "loss": 0.9359, "step": 57800 }, { "epoch": 0.41846728484874807, "grad_norm": 0.16397050023078918, "learning_rate": 4.581539953817311e-06, "loss": 0.9314, "step": 57810 }, { "epoch": 0.41853967150933424, "grad_norm": 0.18386350572109222, "learning_rate": 4.581467567156725e-06, "loss": 0.9458, "step": 57820 }, { "epoch": 0.4186120581699204, "grad_norm": 0.17196989059448242, "learning_rate": 4.581395180496139e-06, "loss": 0.9213, "step": 57830 }, { "epoch": 0.41868444483050665, "grad_norm": 0.15747104585170746, "learning_rate": 4.581322793835553e-06, "loss": 0.9346, "step": 57840 }, { "epoch": 0.41875683149109283, "grad_norm": 0.19495005905628204, "learning_rate": 4.581250407174966e-06, "loss": 0.9384, "step": 57850 }, { "epoch": 0.418829218151679, "grad_norm": 0.15831686556339264, "learning_rate": 4.58117802051438e-06, "loss": 0.9382, "step": 57860 }, { "epoch": 0.4189016048122652, "grad_norm": 0.16458268463611603, "learning_rate": 4.5811056338537945e-06, "loss": 0.92, "step": 57870 }, { "epoch": 0.41897399147285136, "grad_norm": 0.1708781123161316, "learning_rate": 4.581033247193208e-06, "loss": 0.9377, "step": 57880 }, { "epoch": 0.4190463781334376, "grad_norm": 0.18208129703998566, "learning_rate": 4.580960860532622e-06, "loss": 0.9399, "step": 57890 }, { "epoch": 0.41911876479402377, "grad_norm": 0.15881425142288208, "learning_rate": 4.580888473872035e-06, "loss": 0.9298, "step": 57900 }, { "epoch": 0.41919115145460994, "grad_norm": 0.1868477463722229, "learning_rate": 4.580816087211449e-06, "loss": 0.9473, "step": 57910 }, { "epoch": 0.4192635381151961, "grad_norm": 0.17207710444927216, "learning_rate": 4.5807437005508625e-06, "loss": 0.9367, "step": 57920 }, { "epoch": 0.4193359247757823, "grad_norm": 0.19365693628787994, "learning_rate": 4.580671313890276e-06, "loss": 0.9333, "step": 57930 }, { "epoch": 0.41940831143636853, "grad_norm": 0.1523778736591339, "learning_rate": 4.580598927229691e-06, "loss": 0.9295, "step": 57940 }, { "epoch": 0.4194806980969547, "grad_norm": 0.15326263010501862, "learning_rate": 4.580526540569104e-06, "loss": 0.9508, "step": 57950 }, { "epoch": 0.4195530847575409, "grad_norm": 0.1532769799232483, "learning_rate": 4.580454153908518e-06, "loss": 0.9285, "step": 57960 }, { "epoch": 0.41962547141812706, "grad_norm": 0.15870678424835205, "learning_rate": 4.5803817672479315e-06, "loss": 0.9472, "step": 57970 }, { "epoch": 0.41969785807871324, "grad_norm": 0.1667070984840393, "learning_rate": 4.580309380587346e-06, "loss": 0.939, "step": 57980 }, { "epoch": 0.41977024473929947, "grad_norm": 0.4463184177875519, "learning_rate": 4.5802369939267595e-06, "loss": 0.9313, "step": 57990 }, { "epoch": 0.41984263139988565, "grad_norm": 0.15991568565368652, "learning_rate": 4.580164607266173e-06, "loss": 0.9425, "step": 58000 }, { "epoch": 0.4199150180604718, "grad_norm": 0.16935986280441284, "learning_rate": 4.580092220605587e-06, "loss": 0.9291, "step": 58010 }, { "epoch": 0.419987404721058, "grad_norm": 0.17099948227405548, "learning_rate": 4.580019833945001e-06, "loss": 0.9335, "step": 58020 }, { "epoch": 0.4200597913816442, "grad_norm": 0.15163956582546234, "learning_rate": 4.579947447284415e-06, "loss": 0.9305, "step": 58030 }, { "epoch": 0.42013217804223035, "grad_norm": 0.1671072095632553, "learning_rate": 4.5798750606238285e-06, "loss": 0.9379, "step": 58040 }, { "epoch": 0.4202045647028166, "grad_norm": 0.16619133949279785, "learning_rate": 4.579802673963242e-06, "loss": 0.9309, "step": 58050 }, { "epoch": 0.42027695136340276, "grad_norm": 0.16173526644706726, "learning_rate": 4.579730287302656e-06, "loss": 0.9369, "step": 58060 }, { "epoch": 0.42034933802398894, "grad_norm": 0.16461287438869476, "learning_rate": 4.57965790064207e-06, "loss": 0.9386, "step": 58070 }, { "epoch": 0.4204217246845751, "grad_norm": 0.19412462413311005, "learning_rate": 4.579585513981484e-06, "loss": 0.9284, "step": 58080 }, { "epoch": 0.4204941113451613, "grad_norm": 0.18617956340312958, "learning_rate": 4.579513127320897e-06, "loss": 0.9284, "step": 58090 }, { "epoch": 0.4205664980057475, "grad_norm": 0.1594972461462021, "learning_rate": 4.579440740660311e-06, "loss": 0.9367, "step": 58100 }, { "epoch": 0.4206388846663337, "grad_norm": 0.1762949526309967, "learning_rate": 4.5793683539997255e-06, "loss": 0.9305, "step": 58110 }, { "epoch": 0.4207112713269199, "grad_norm": 0.20204119384288788, "learning_rate": 4.579295967339139e-06, "loss": 0.939, "step": 58120 }, { "epoch": 0.42078365798750605, "grad_norm": 0.16462816298007965, "learning_rate": 4.579223580678553e-06, "loss": 0.9302, "step": 58130 }, { "epoch": 0.42085604464809223, "grad_norm": 0.15783581137657166, "learning_rate": 4.579151194017966e-06, "loss": 0.9372, "step": 58140 }, { "epoch": 0.42092843130867846, "grad_norm": 0.1847236305475235, "learning_rate": 4.579078807357381e-06, "loss": 0.9338, "step": 58150 }, { "epoch": 0.42100081796926464, "grad_norm": 0.16038931906223297, "learning_rate": 4.579006420696794e-06, "loss": 0.9347, "step": 58160 }, { "epoch": 0.4210732046298508, "grad_norm": 0.17341868579387665, "learning_rate": 4.578934034036208e-06, "loss": 0.9468, "step": 58170 }, { "epoch": 0.421145591290437, "grad_norm": 0.17107468843460083, "learning_rate": 4.578861647375622e-06, "loss": 0.9385, "step": 58180 }, { "epoch": 0.42121797795102317, "grad_norm": 0.16987687349319458, "learning_rate": 4.578789260715036e-06, "loss": 0.9256, "step": 58190 }, { "epoch": 0.42129036461160935, "grad_norm": 0.15816541016101837, "learning_rate": 4.57871687405445e-06, "loss": 0.9309, "step": 58200 }, { "epoch": 0.4213627512721956, "grad_norm": 0.17220225930213928, "learning_rate": 4.578644487393863e-06, "loss": 0.931, "step": 58210 }, { "epoch": 0.42143513793278176, "grad_norm": 0.15358738601207733, "learning_rate": 4.578572100733277e-06, "loss": 0.9363, "step": 58220 }, { "epoch": 0.42150752459336793, "grad_norm": 0.1880992203950882, "learning_rate": 4.578499714072691e-06, "loss": 0.9336, "step": 58230 }, { "epoch": 0.4215799112539541, "grad_norm": 0.1707928329706192, "learning_rate": 4.578427327412105e-06, "loss": 0.9444, "step": 58240 }, { "epoch": 0.4216522979145403, "grad_norm": 0.16345572471618652, "learning_rate": 4.578354940751519e-06, "loss": 0.9401, "step": 58250 }, { "epoch": 0.4217246845751265, "grad_norm": 0.15612851083278656, "learning_rate": 4.578282554090932e-06, "loss": 0.9325, "step": 58260 }, { "epoch": 0.4217970712357127, "grad_norm": 0.15962456166744232, "learning_rate": 4.578210167430347e-06, "loss": 0.933, "step": 58270 }, { "epoch": 0.42186945789629887, "grad_norm": 0.16563986241817474, "learning_rate": 4.57813778076976e-06, "loss": 0.9284, "step": 58280 }, { "epoch": 0.42194184455688505, "grad_norm": 0.18625997006893158, "learning_rate": 4.578065394109174e-06, "loss": 0.927, "step": 58290 }, { "epoch": 0.4220142312174712, "grad_norm": 0.16373422741889954, "learning_rate": 4.5779930074485876e-06, "loss": 0.9354, "step": 58300 }, { "epoch": 0.42208661787805746, "grad_norm": 0.16232380270957947, "learning_rate": 4.577920620788002e-06, "loss": 0.9257, "step": 58310 }, { "epoch": 0.42215900453864363, "grad_norm": 0.1764630377292633, "learning_rate": 4.577848234127416e-06, "loss": 0.9374, "step": 58320 }, { "epoch": 0.4222313911992298, "grad_norm": 0.1576317995786667, "learning_rate": 4.577775847466829e-06, "loss": 0.9403, "step": 58330 }, { "epoch": 0.422303777859816, "grad_norm": 0.18781952559947968, "learning_rate": 4.577703460806243e-06, "loss": 0.9278, "step": 58340 }, { "epoch": 0.42237616452040216, "grad_norm": 0.1682749092578888, "learning_rate": 4.577631074145657e-06, "loss": 0.9339, "step": 58350 }, { "epoch": 0.42244855118098834, "grad_norm": 0.17663206160068512, "learning_rate": 4.577558687485071e-06, "loss": 0.937, "step": 58360 }, { "epoch": 0.4225209378415746, "grad_norm": 0.16293081641197205, "learning_rate": 4.577486300824485e-06, "loss": 0.924, "step": 58370 }, { "epoch": 0.42259332450216075, "grad_norm": 0.17560496926307678, "learning_rate": 4.577413914163898e-06, "loss": 0.9319, "step": 58380 }, { "epoch": 0.4226657111627469, "grad_norm": 0.1667516529560089, "learning_rate": 4.577341527503313e-06, "loss": 0.9406, "step": 58390 }, { "epoch": 0.4227380978233331, "grad_norm": 0.15829908847808838, "learning_rate": 4.577269140842726e-06, "loss": 0.9388, "step": 58400 }, { "epoch": 0.4228104844839193, "grad_norm": 0.16750116646289825, "learning_rate": 4.57719675418214e-06, "loss": 0.9415, "step": 58410 }, { "epoch": 0.4228828711445055, "grad_norm": 0.21571747958660126, "learning_rate": 4.5771243675215535e-06, "loss": 0.9387, "step": 58420 }, { "epoch": 0.4229552578050917, "grad_norm": 0.16604144871234894, "learning_rate": 4.577051980860968e-06, "loss": 0.9345, "step": 58430 }, { "epoch": 0.42302764446567787, "grad_norm": 0.16571690142154694, "learning_rate": 4.576979594200382e-06, "loss": 0.9398, "step": 58440 }, { "epoch": 0.42310003112626404, "grad_norm": 0.1598815768957138, "learning_rate": 4.576907207539794e-06, "loss": 0.938, "step": 58450 }, { "epoch": 0.4231724177868502, "grad_norm": 0.16254471242427826, "learning_rate": 4.576834820879209e-06, "loss": 0.9153, "step": 58460 }, { "epoch": 0.42324480444743645, "grad_norm": 0.17115645110607147, "learning_rate": 4.5767624342186224e-06, "loss": 0.936, "step": 58470 }, { "epoch": 0.42331719110802263, "grad_norm": 0.1529657244682312, "learning_rate": 4.576690047558036e-06, "loss": 0.9381, "step": 58480 }, { "epoch": 0.4233895777686088, "grad_norm": 0.15278260409832, "learning_rate": 4.57661766089745e-06, "loss": 0.9446, "step": 58490 }, { "epoch": 0.423461964429195, "grad_norm": 0.15874941647052765, "learning_rate": 4.576545274236864e-06, "loss": 0.943, "step": 58500 }, { "epoch": 0.42353435108978116, "grad_norm": 0.1559789478778839, "learning_rate": 4.576472887576278e-06, "loss": 0.9342, "step": 58510 }, { "epoch": 0.42360673775036733, "grad_norm": 0.1483520269393921, "learning_rate": 4.576400500915691e-06, "loss": 0.927, "step": 58520 }, { "epoch": 0.42367912441095357, "grad_norm": 0.16274036467075348, "learning_rate": 4.576328114255105e-06, "loss": 0.9228, "step": 58530 }, { "epoch": 0.42375151107153974, "grad_norm": 0.16001389920711517, "learning_rate": 4.5762557275945194e-06, "loss": 0.9284, "step": 58540 }, { "epoch": 0.4238238977321259, "grad_norm": 0.16106966137886047, "learning_rate": 4.576183340933933e-06, "loss": 0.9275, "step": 58550 }, { "epoch": 0.4238962843927121, "grad_norm": 0.17756831645965576, "learning_rate": 4.576110954273347e-06, "loss": 0.9324, "step": 58560 }, { "epoch": 0.4239686710532983, "grad_norm": 0.18322084844112396, "learning_rate": 4.57603856761276e-06, "loss": 0.9276, "step": 58570 }, { "epoch": 0.4240410577138845, "grad_norm": 0.17278200387954712, "learning_rate": 4.575966180952175e-06, "loss": 0.9419, "step": 58580 }, { "epoch": 0.4241134443744707, "grad_norm": 0.16556476056575775, "learning_rate": 4.575893794291588e-06, "loss": 0.9291, "step": 58590 }, { "epoch": 0.42418583103505686, "grad_norm": 0.15898211300373077, "learning_rate": 4.575821407631002e-06, "loss": 0.9342, "step": 58600 }, { "epoch": 0.42425821769564304, "grad_norm": 0.16985763609409332, "learning_rate": 4.575749020970416e-06, "loss": 0.9345, "step": 58610 }, { "epoch": 0.4243306043562292, "grad_norm": 0.16115114092826843, "learning_rate": 4.57567663430983e-06, "loss": 0.9404, "step": 58620 }, { "epoch": 0.42440299101681545, "grad_norm": 0.16031764447689056, "learning_rate": 4.575604247649244e-06, "loss": 0.9347, "step": 58630 }, { "epoch": 0.4244753776774016, "grad_norm": 0.15465863049030304, "learning_rate": 4.575531860988657e-06, "loss": 0.9354, "step": 58640 }, { "epoch": 0.4245477643379878, "grad_norm": 0.17583829164505005, "learning_rate": 4.575459474328071e-06, "loss": 0.9308, "step": 58650 }, { "epoch": 0.424620150998574, "grad_norm": 0.16195513308048248, "learning_rate": 4.575387087667485e-06, "loss": 0.9312, "step": 58660 }, { "epoch": 0.42469253765916015, "grad_norm": 0.24764384329319, "learning_rate": 4.575314701006899e-06, "loss": 0.9256, "step": 58670 }, { "epoch": 0.4247649243197464, "grad_norm": 0.15488171577453613, "learning_rate": 4.575242314346313e-06, "loss": 0.9204, "step": 58680 }, { "epoch": 0.42483731098033256, "grad_norm": 0.16304126381874084, "learning_rate": 4.575169927685726e-06, "loss": 0.9366, "step": 58690 }, { "epoch": 0.42490969764091874, "grad_norm": 0.15404012799263, "learning_rate": 4.57509754102514e-06, "loss": 0.9253, "step": 58700 }, { "epoch": 0.4249820843015049, "grad_norm": 0.17125609517097473, "learning_rate": 4.575025154364554e-06, "loss": 0.9325, "step": 58710 }, { "epoch": 0.4250544709620911, "grad_norm": 0.17699052393436432, "learning_rate": 4.574952767703968e-06, "loss": 0.9248, "step": 58720 }, { "epoch": 0.42512685762267727, "grad_norm": 0.1671576052904129, "learning_rate": 4.5748803810433815e-06, "loss": 0.9407, "step": 58730 }, { "epoch": 0.4251992442832635, "grad_norm": 0.48583319783210754, "learning_rate": 4.574807994382795e-06, "loss": 0.9151, "step": 58740 }, { "epoch": 0.4252716309438497, "grad_norm": 0.15986119210720062, "learning_rate": 4.57473560772221e-06, "loss": 0.9351, "step": 58750 }, { "epoch": 0.42534401760443585, "grad_norm": 0.14684616029262543, "learning_rate": 4.574663221061623e-06, "loss": 0.9456, "step": 58760 }, { "epoch": 0.42541640426502203, "grad_norm": 0.2594200074672699, "learning_rate": 4.574590834401037e-06, "loss": 0.9368, "step": 58770 }, { "epoch": 0.4254887909256082, "grad_norm": 0.15739043056964874, "learning_rate": 4.5745184477404505e-06, "loss": 0.9336, "step": 58780 }, { "epoch": 0.42556117758619444, "grad_norm": 0.1712053120136261, "learning_rate": 4.574446061079865e-06, "loss": 0.9341, "step": 58790 }, { "epoch": 0.4256335642467806, "grad_norm": 0.1579931527376175, "learning_rate": 4.5743736744192785e-06, "loss": 0.9315, "step": 58800 }, { "epoch": 0.4257059509073668, "grad_norm": 0.16759207844734192, "learning_rate": 4.574301287758692e-06, "loss": 0.9334, "step": 58810 }, { "epoch": 0.42577833756795297, "grad_norm": 0.15357771515846252, "learning_rate": 4.574228901098106e-06, "loss": 0.9341, "step": 58820 }, { "epoch": 0.42585072422853915, "grad_norm": 0.16257642209529877, "learning_rate": 4.57415651443752e-06, "loss": 0.9319, "step": 58830 }, { "epoch": 0.4259231108891254, "grad_norm": 0.1543009728193283, "learning_rate": 4.574084127776934e-06, "loss": 0.9544, "step": 58840 }, { "epoch": 0.42599549754971155, "grad_norm": 0.18277451395988464, "learning_rate": 4.5740117411163475e-06, "loss": 0.9316, "step": 58850 }, { "epoch": 0.42606788421029773, "grad_norm": 0.1689336895942688, "learning_rate": 4.573939354455761e-06, "loss": 0.9406, "step": 58860 }, { "epoch": 0.4261402708708839, "grad_norm": 0.16068103909492493, "learning_rate": 4.5738669677951756e-06, "loss": 0.9412, "step": 58870 }, { "epoch": 0.4262126575314701, "grad_norm": 0.1947869062423706, "learning_rate": 4.573794581134589e-06, "loss": 0.9303, "step": 58880 }, { "epoch": 0.42628504419205626, "grad_norm": 0.16800038516521454, "learning_rate": 4.573722194474003e-06, "loss": 0.9349, "step": 58890 }, { "epoch": 0.4263574308526425, "grad_norm": 0.16128326952457428, "learning_rate": 4.573649807813416e-06, "loss": 0.9433, "step": 58900 }, { "epoch": 0.42642981751322867, "grad_norm": 0.16078263521194458, "learning_rate": 4.573577421152831e-06, "loss": 0.9305, "step": 58910 }, { "epoch": 0.42650220417381485, "grad_norm": 0.20979563891887665, "learning_rate": 4.5735050344922445e-06, "loss": 0.9481, "step": 58920 }, { "epoch": 0.426574590834401, "grad_norm": 0.16073863208293915, "learning_rate": 4.573432647831658e-06, "loss": 0.9395, "step": 58930 }, { "epoch": 0.4266469774949872, "grad_norm": 0.17131203413009644, "learning_rate": 4.573360261171072e-06, "loss": 0.9441, "step": 58940 }, { "epoch": 0.42671936415557343, "grad_norm": 0.150382399559021, "learning_rate": 4.573287874510486e-06, "loss": 0.926, "step": 58950 }, { "epoch": 0.4267917508161596, "grad_norm": 0.15955784916877747, "learning_rate": 4.5732154878499e-06, "loss": 0.9287, "step": 58960 }, { "epoch": 0.4268641374767458, "grad_norm": 0.16103602945804596, "learning_rate": 4.573143101189313e-06, "loss": 0.9269, "step": 58970 }, { "epoch": 0.42693652413733196, "grad_norm": 0.17586109042167664, "learning_rate": 4.573070714528727e-06, "loss": 0.9279, "step": 58980 }, { "epoch": 0.42700891079791814, "grad_norm": 0.1569269299507141, "learning_rate": 4.572998327868141e-06, "loss": 0.932, "step": 58990 }, { "epoch": 0.42708129745850437, "grad_norm": 0.1663513034582138, "learning_rate": 4.572925941207554e-06, "loss": 0.9276, "step": 59000 }, { "epoch": 0.42715368411909055, "grad_norm": 0.16358311474323273, "learning_rate": 4.572853554546968e-06, "loss": 0.936, "step": 59010 }, { "epoch": 0.4272260707796767, "grad_norm": 0.15506871044635773, "learning_rate": 4.572781167886382e-06, "loss": 0.9271, "step": 59020 }, { "epoch": 0.4272984574402629, "grad_norm": 0.156698539853096, "learning_rate": 4.572708781225796e-06, "loss": 0.9327, "step": 59030 }, { "epoch": 0.4273708441008491, "grad_norm": 0.1768382340669632, "learning_rate": 4.5726363945652096e-06, "loss": 0.9408, "step": 59040 }, { "epoch": 0.42744323076143526, "grad_norm": 0.15888147056102753, "learning_rate": 4.572564007904623e-06, "loss": 0.9294, "step": 59050 }, { "epoch": 0.4275156174220215, "grad_norm": 0.1897684782743454, "learning_rate": 4.572491621244038e-06, "loss": 0.9478, "step": 59060 }, { "epoch": 0.42758800408260766, "grad_norm": 0.18450258672237396, "learning_rate": 4.572419234583451e-06, "loss": 0.9202, "step": 59070 }, { "epoch": 0.42766039074319384, "grad_norm": 0.16853807866573334, "learning_rate": 4.572346847922865e-06, "loss": 0.9362, "step": 59080 }, { "epoch": 0.42773277740378, "grad_norm": 0.16247627139091492, "learning_rate": 4.5722744612622785e-06, "loss": 0.9351, "step": 59090 }, { "epoch": 0.4278051640643662, "grad_norm": 0.17450258135795593, "learning_rate": 4.572202074601693e-06, "loss": 0.9304, "step": 59100 }, { "epoch": 0.4278775507249524, "grad_norm": 0.22442488372325897, "learning_rate": 4.572129687941107e-06, "loss": 0.9176, "step": 59110 }, { "epoch": 0.4279499373855386, "grad_norm": 0.1596069484949112, "learning_rate": 4.57205730128052e-06, "loss": 0.9467, "step": 59120 }, { "epoch": 0.4280223240461248, "grad_norm": 0.16665290296077728, "learning_rate": 4.571984914619934e-06, "loss": 0.9333, "step": 59130 }, { "epoch": 0.42809471070671096, "grad_norm": 0.15915903449058533, "learning_rate": 4.571912527959348e-06, "loss": 0.9386, "step": 59140 }, { "epoch": 0.42816709736729713, "grad_norm": 0.16277122497558594, "learning_rate": 4.571840141298762e-06, "loss": 0.9298, "step": 59150 }, { "epoch": 0.42823948402788337, "grad_norm": 0.14925530552864075, "learning_rate": 4.5717677546381755e-06, "loss": 0.9454, "step": 59160 }, { "epoch": 0.42831187068846954, "grad_norm": 0.1798754781484604, "learning_rate": 4.571695367977589e-06, "loss": 0.9397, "step": 59170 }, { "epoch": 0.4283842573490557, "grad_norm": 0.16380873322486877, "learning_rate": 4.571622981317004e-06, "loss": 0.9515, "step": 59180 }, { "epoch": 0.4284566440096419, "grad_norm": 0.16432473063468933, "learning_rate": 4.571550594656417e-06, "loss": 0.9351, "step": 59190 }, { "epoch": 0.4285290306702281, "grad_norm": 0.14648674428462982, "learning_rate": 4.571478207995831e-06, "loss": 0.9283, "step": 59200 }, { "epoch": 0.4286014173308143, "grad_norm": 0.1510661244392395, "learning_rate": 4.5714058213352444e-06, "loss": 0.9295, "step": 59210 }, { "epoch": 0.4286738039914005, "grad_norm": 0.18739067018032074, "learning_rate": 4.571333434674659e-06, "loss": 0.937, "step": 59220 }, { "epoch": 0.42874619065198666, "grad_norm": 0.16375543177127838, "learning_rate": 4.5712610480140725e-06, "loss": 0.9354, "step": 59230 }, { "epoch": 0.42881857731257284, "grad_norm": 0.16390521824359894, "learning_rate": 4.571188661353486e-06, "loss": 0.9353, "step": 59240 }, { "epoch": 0.428890963973159, "grad_norm": 0.14936009049415588, "learning_rate": 4.5711162746929e-06, "loss": 0.9253, "step": 59250 }, { "epoch": 0.4289633506337452, "grad_norm": 0.16098490357398987, "learning_rate": 4.571043888032314e-06, "loss": 0.9187, "step": 59260 }, { "epoch": 0.4290357372943314, "grad_norm": 0.16449196636676788, "learning_rate": 4.570971501371728e-06, "loss": 0.93, "step": 59270 }, { "epoch": 0.4291081239549176, "grad_norm": 0.44586077332496643, "learning_rate": 4.5708991147111414e-06, "loss": 0.9421, "step": 59280 }, { "epoch": 0.4291805106155038, "grad_norm": 0.15456175804138184, "learning_rate": 4.570826728050555e-06, "loss": 0.9223, "step": 59290 }, { "epoch": 0.42925289727608995, "grad_norm": 0.17588649690151215, "learning_rate": 4.570754341389969e-06, "loss": 0.9438, "step": 59300 }, { "epoch": 0.4293252839366761, "grad_norm": 0.15864711999893188, "learning_rate": 4.570681954729383e-06, "loss": 0.9313, "step": 59310 }, { "epoch": 0.42939767059726236, "grad_norm": 0.16494864225387573, "learning_rate": 4.570609568068797e-06, "loss": 0.9457, "step": 59320 }, { "epoch": 0.42947005725784854, "grad_norm": 0.15359218418598175, "learning_rate": 4.57053718140821e-06, "loss": 0.9295, "step": 59330 }, { "epoch": 0.4295424439184347, "grad_norm": 0.16325341165065765, "learning_rate": 4.570464794747624e-06, "loss": 0.9434, "step": 59340 }, { "epoch": 0.4296148305790209, "grad_norm": 0.15335196256637573, "learning_rate": 4.5703924080870385e-06, "loss": 0.9255, "step": 59350 }, { "epoch": 0.42968721723960707, "grad_norm": 0.15363959968090057, "learning_rate": 4.570320021426452e-06, "loss": 0.9349, "step": 59360 }, { "epoch": 0.4297596039001933, "grad_norm": 0.16724181175231934, "learning_rate": 4.570247634765866e-06, "loss": 0.9281, "step": 59370 }, { "epoch": 0.4298319905607795, "grad_norm": 0.1659107208251953, "learning_rate": 4.570175248105279e-06, "loss": 0.9412, "step": 59380 }, { "epoch": 0.42990437722136565, "grad_norm": 0.17168617248535156, "learning_rate": 4.570102861444694e-06, "loss": 0.9401, "step": 59390 }, { "epoch": 0.42997676388195183, "grad_norm": 0.15352968871593475, "learning_rate": 4.570030474784107e-06, "loss": 0.9271, "step": 59400 }, { "epoch": 0.430049150542538, "grad_norm": 0.17253489792346954, "learning_rate": 4.569958088123521e-06, "loss": 0.9324, "step": 59410 }, { "epoch": 0.4301215372031242, "grad_norm": 0.15288107097148895, "learning_rate": 4.569885701462935e-06, "loss": 0.9274, "step": 59420 }, { "epoch": 0.4301939238637104, "grad_norm": 0.17181651294231415, "learning_rate": 4.569813314802349e-06, "loss": 0.9399, "step": 59430 }, { "epoch": 0.4302663105242966, "grad_norm": 0.16761989891529083, "learning_rate": 4.569740928141763e-06, "loss": 0.938, "step": 59440 }, { "epoch": 0.43033869718488277, "grad_norm": 0.16204999387264252, "learning_rate": 4.569668541481176e-06, "loss": 0.9332, "step": 59450 }, { "epoch": 0.43041108384546894, "grad_norm": 0.1525745391845703, "learning_rate": 4.56959615482059e-06, "loss": 0.93, "step": 59460 }, { "epoch": 0.4304834705060551, "grad_norm": 0.17561571300029755, "learning_rate": 4.569523768160004e-06, "loss": 0.9375, "step": 59470 }, { "epoch": 0.43055585716664135, "grad_norm": 0.1534716933965683, "learning_rate": 4.569451381499418e-06, "loss": 0.9513, "step": 59480 }, { "epoch": 0.43062824382722753, "grad_norm": 0.16287165880203247, "learning_rate": 4.569378994838832e-06, "loss": 0.9542, "step": 59490 }, { "epoch": 0.4307006304878137, "grad_norm": 0.1597660928964615, "learning_rate": 4.569306608178245e-06, "loss": 0.926, "step": 59500 }, { "epoch": 0.4307730171483999, "grad_norm": 0.18294881284236908, "learning_rate": 4.569234221517659e-06, "loss": 0.9382, "step": 59510 }, { "epoch": 0.43084540380898606, "grad_norm": 0.1525103747844696, "learning_rate": 4.5691618348570725e-06, "loss": 0.937, "step": 59520 }, { "epoch": 0.4309177904695723, "grad_norm": 0.15872421860694885, "learning_rate": 4.569089448196486e-06, "loss": 0.9225, "step": 59530 }, { "epoch": 0.43099017713015847, "grad_norm": 0.15989533066749573, "learning_rate": 4.5690170615359005e-06, "loss": 0.926, "step": 59540 }, { "epoch": 0.43106256379074465, "grad_norm": 0.16288037598133087, "learning_rate": 4.568944674875314e-06, "loss": 0.9269, "step": 59550 }, { "epoch": 0.4311349504513308, "grad_norm": 0.16788716614246368, "learning_rate": 4.568872288214728e-06, "loss": 0.9176, "step": 59560 }, { "epoch": 0.431207337111917, "grad_norm": 0.15858720242977142, "learning_rate": 4.568799901554141e-06, "loss": 0.9246, "step": 59570 }, { "epoch": 0.4312797237725032, "grad_norm": 0.17023035883903503, "learning_rate": 4.568727514893556e-06, "loss": 0.9416, "step": 59580 }, { "epoch": 0.4313521104330894, "grad_norm": 0.16211800277233124, "learning_rate": 4.5686551282329695e-06, "loss": 0.9295, "step": 59590 }, { "epoch": 0.4314244970936756, "grad_norm": 0.16199994087219238, "learning_rate": 4.568582741572383e-06, "loss": 0.9308, "step": 59600 }, { "epoch": 0.43149688375426176, "grad_norm": 0.1606810986995697, "learning_rate": 4.568510354911797e-06, "loss": 0.9309, "step": 59610 }, { "epoch": 0.43156927041484794, "grad_norm": 0.20290134847164154, "learning_rate": 4.568437968251211e-06, "loss": 0.9311, "step": 59620 }, { "epoch": 0.4316416570754341, "grad_norm": 0.18084190785884857, "learning_rate": 4.568365581590625e-06, "loss": 0.9283, "step": 59630 }, { "epoch": 0.43171404373602035, "grad_norm": 0.15636208653450012, "learning_rate": 4.568293194930038e-06, "loss": 0.939, "step": 59640 }, { "epoch": 0.4317864303966065, "grad_norm": 0.16248819231987, "learning_rate": 4.568220808269452e-06, "loss": 0.9457, "step": 59650 }, { "epoch": 0.4318588170571927, "grad_norm": 0.16725608706474304, "learning_rate": 4.5681484216088665e-06, "loss": 0.9239, "step": 59660 }, { "epoch": 0.4319312037177789, "grad_norm": 0.15592262148857117, "learning_rate": 4.56807603494828e-06, "loss": 0.9274, "step": 59670 }, { "epoch": 0.43200359037836505, "grad_norm": 0.16762572526931763, "learning_rate": 4.568003648287694e-06, "loss": 0.9387, "step": 59680 }, { "epoch": 0.4320759770389513, "grad_norm": 0.16156117618083954, "learning_rate": 4.567931261627107e-06, "loss": 0.9486, "step": 59690 }, { "epoch": 0.43214836369953746, "grad_norm": 0.1718205362558365, "learning_rate": 4.567858874966522e-06, "loss": 0.928, "step": 59700 }, { "epoch": 0.43222075036012364, "grad_norm": 0.15854863822460175, "learning_rate": 4.567786488305935e-06, "loss": 0.9373, "step": 59710 }, { "epoch": 0.4322931370207098, "grad_norm": 0.1781233698129654, "learning_rate": 4.567714101645349e-06, "loss": 0.9324, "step": 59720 }, { "epoch": 0.432365523681296, "grad_norm": 0.15047183632850647, "learning_rate": 4.567641714984763e-06, "loss": 0.9246, "step": 59730 }, { "epoch": 0.4324379103418822, "grad_norm": 0.1613796353340149, "learning_rate": 4.567569328324177e-06, "loss": 0.9407, "step": 59740 }, { "epoch": 0.4325102970024684, "grad_norm": 0.15587370097637177, "learning_rate": 4.567496941663591e-06, "loss": 0.9229, "step": 59750 }, { "epoch": 0.4325826836630546, "grad_norm": 0.15507788956165314, "learning_rate": 4.567424555003004e-06, "loss": 0.9281, "step": 59760 }, { "epoch": 0.43265507032364076, "grad_norm": 0.15560147166252136, "learning_rate": 4.567352168342418e-06, "loss": 0.9467, "step": 59770 }, { "epoch": 0.43272745698422693, "grad_norm": 0.1680835783481598, "learning_rate": 4.567279781681832e-06, "loss": 0.9317, "step": 59780 }, { "epoch": 0.4327998436448131, "grad_norm": 0.18736176192760468, "learning_rate": 4.567207395021246e-06, "loss": 0.9314, "step": 59790 }, { "epoch": 0.43287223030539934, "grad_norm": 0.1607762575149536, "learning_rate": 4.56713500836066e-06, "loss": 0.9391, "step": 59800 }, { "epoch": 0.4329446169659855, "grad_norm": 0.16497263312339783, "learning_rate": 4.567062621700073e-06, "loss": 0.9347, "step": 59810 }, { "epoch": 0.4330170036265717, "grad_norm": 0.14769291877746582, "learning_rate": 4.566990235039488e-06, "loss": 0.9089, "step": 59820 }, { "epoch": 0.43308939028715787, "grad_norm": 0.1630723625421524, "learning_rate": 4.566917848378901e-06, "loss": 0.9294, "step": 59830 }, { "epoch": 0.43316177694774405, "grad_norm": 0.1905062049627304, "learning_rate": 4.566845461718315e-06, "loss": 0.9248, "step": 59840 }, { "epoch": 0.4332341636083303, "grad_norm": 0.16832837462425232, "learning_rate": 4.566773075057729e-06, "loss": 0.9424, "step": 59850 }, { "epoch": 0.43330655026891646, "grad_norm": 0.16252058744430542, "learning_rate": 4.566700688397143e-06, "loss": 0.9272, "step": 59860 }, { "epoch": 0.43337893692950263, "grad_norm": 0.18382105231285095, "learning_rate": 4.566628301736557e-06, "loss": 0.9257, "step": 59870 }, { "epoch": 0.4334513235900888, "grad_norm": 0.16314184665679932, "learning_rate": 4.56655591507597e-06, "loss": 0.9395, "step": 59880 }, { "epoch": 0.433523710250675, "grad_norm": 0.17523783445358276, "learning_rate": 4.566483528415384e-06, "loss": 0.926, "step": 59890 }, { "epoch": 0.4335960969112612, "grad_norm": 0.16567225754261017, "learning_rate": 4.566411141754798e-06, "loss": 0.9382, "step": 59900 }, { "epoch": 0.4336684835718474, "grad_norm": 0.16518938541412354, "learning_rate": 4.566338755094212e-06, "loss": 0.9248, "step": 59910 }, { "epoch": 0.4337408702324336, "grad_norm": 0.147177055478096, "learning_rate": 4.566266368433626e-06, "loss": 0.9317, "step": 59920 }, { "epoch": 0.43381325689301975, "grad_norm": 0.1555219441652298, "learning_rate": 4.566193981773039e-06, "loss": 0.9323, "step": 59930 }, { "epoch": 0.4338856435536059, "grad_norm": 0.1518564671278, "learning_rate": 4.566121595112453e-06, "loss": 0.9413, "step": 59940 }, { "epoch": 0.4339580302141921, "grad_norm": 0.1824176162481308, "learning_rate": 4.566049208451867e-06, "loss": 0.9254, "step": 59950 }, { "epoch": 0.43403041687477834, "grad_norm": 0.16142070293426514, "learning_rate": 4.565976821791281e-06, "loss": 0.9313, "step": 59960 }, { "epoch": 0.4341028035353645, "grad_norm": 0.1917445957660675, "learning_rate": 4.5659044351306945e-06, "loss": 0.9205, "step": 59970 }, { "epoch": 0.4341751901959507, "grad_norm": 0.1893223375082016, "learning_rate": 4.565832048470108e-06, "loss": 0.932, "step": 59980 }, { "epoch": 0.43424757685653687, "grad_norm": 0.16585801541805267, "learning_rate": 4.565759661809523e-06, "loss": 0.9321, "step": 59990 }, { "epoch": 0.43431996351712304, "grad_norm": 0.1668858528137207, "learning_rate": 4.565687275148936e-06, "loss": 0.9322, "step": 60000 }, { "epoch": 0.4343923501777093, "grad_norm": 0.16714520752429962, "learning_rate": 4.56561488848835e-06, "loss": 0.9363, "step": 60010 }, { "epoch": 0.43446473683829545, "grad_norm": 0.5191177129745483, "learning_rate": 4.5655425018277634e-06, "loss": 0.9313, "step": 60020 }, { "epoch": 0.43453712349888163, "grad_norm": 0.16211098432540894, "learning_rate": 4.565470115167178e-06, "loss": 0.9287, "step": 60030 }, { "epoch": 0.4346095101594678, "grad_norm": 0.15494702756404877, "learning_rate": 4.565397728506591e-06, "loss": 0.9223, "step": 60040 }, { "epoch": 0.434681896820054, "grad_norm": 0.1619596928358078, "learning_rate": 4.565325341846005e-06, "loss": 0.9234, "step": 60050 }, { "epoch": 0.4347542834806402, "grad_norm": 0.1678442656993866, "learning_rate": 4.565252955185419e-06, "loss": 0.931, "step": 60060 }, { "epoch": 0.4348266701412264, "grad_norm": 0.1571994423866272, "learning_rate": 4.565180568524832e-06, "loss": 0.9274, "step": 60070 }, { "epoch": 0.43489905680181257, "grad_norm": 0.16465076804161072, "learning_rate": 4.565108181864246e-06, "loss": 0.9344, "step": 60080 }, { "epoch": 0.43497144346239874, "grad_norm": 0.1529812514781952, "learning_rate": 4.56503579520366e-06, "loss": 0.9154, "step": 60090 }, { "epoch": 0.4350438301229849, "grad_norm": 0.1658780574798584, "learning_rate": 4.564963408543074e-06, "loss": 0.9321, "step": 60100 }, { "epoch": 0.4351162167835711, "grad_norm": 0.15867140889167786, "learning_rate": 4.564891021882488e-06, "loss": 0.9317, "step": 60110 }, { "epoch": 0.43518860344415733, "grad_norm": 0.1777503490447998, "learning_rate": 4.564818635221901e-06, "loss": 0.9306, "step": 60120 }, { "epoch": 0.4352609901047435, "grad_norm": 0.15686187148094177, "learning_rate": 4.564746248561315e-06, "loss": 0.9357, "step": 60130 }, { "epoch": 0.4353333767653297, "grad_norm": 0.15787990391254425, "learning_rate": 4.564673861900729e-06, "loss": 0.9264, "step": 60140 }, { "epoch": 0.43540576342591586, "grad_norm": 0.16923680901527405, "learning_rate": 4.564601475240143e-06, "loss": 0.9343, "step": 60150 }, { "epoch": 0.43547815008650204, "grad_norm": 0.1720929890871048, "learning_rate": 4.564529088579557e-06, "loss": 0.9384, "step": 60160 }, { "epoch": 0.43555053674708827, "grad_norm": 0.2149965465068817, "learning_rate": 4.56445670191897e-06, "loss": 0.9439, "step": 60170 }, { "epoch": 0.43562292340767444, "grad_norm": 0.15889641642570496, "learning_rate": 4.564384315258385e-06, "loss": 0.926, "step": 60180 }, { "epoch": 0.4356953100682606, "grad_norm": 0.203876331448555, "learning_rate": 4.564311928597798e-06, "loss": 0.9298, "step": 60190 }, { "epoch": 0.4357676967288468, "grad_norm": 0.15438738465309143, "learning_rate": 4.564239541937212e-06, "loss": 0.9273, "step": 60200 }, { "epoch": 0.435840083389433, "grad_norm": 0.1764325052499771, "learning_rate": 4.5641671552766255e-06, "loss": 0.9513, "step": 60210 }, { "epoch": 0.4359124700500192, "grad_norm": 0.1527344286441803, "learning_rate": 4.56409476861604e-06, "loss": 0.9259, "step": 60220 }, { "epoch": 0.4359848567106054, "grad_norm": 0.15969283878803253, "learning_rate": 4.564022381955454e-06, "loss": 0.9343, "step": 60230 }, { "epoch": 0.43605724337119156, "grad_norm": 0.1652412861585617, "learning_rate": 4.563949995294867e-06, "loss": 0.9318, "step": 60240 }, { "epoch": 0.43612963003177774, "grad_norm": 0.1574745625257492, "learning_rate": 4.563877608634281e-06, "loss": 0.9204, "step": 60250 }, { "epoch": 0.4362020166923639, "grad_norm": 0.16842874884605408, "learning_rate": 4.563805221973695e-06, "loss": 0.928, "step": 60260 }, { "epoch": 0.4362744033529501, "grad_norm": 0.17006808519363403, "learning_rate": 4.563732835313109e-06, "loss": 0.9243, "step": 60270 }, { "epoch": 0.4363467900135363, "grad_norm": 0.16430647671222687, "learning_rate": 4.5636604486525225e-06, "loss": 0.9332, "step": 60280 }, { "epoch": 0.4364191766741225, "grad_norm": 0.15201468765735626, "learning_rate": 4.563588061991936e-06, "loss": 0.9301, "step": 60290 }, { "epoch": 0.4364915633347087, "grad_norm": 0.15162621438503265, "learning_rate": 4.563515675331351e-06, "loss": 0.9387, "step": 60300 }, { "epoch": 0.43656394999529485, "grad_norm": 0.16102218627929688, "learning_rate": 4.563443288670764e-06, "loss": 0.9418, "step": 60310 }, { "epoch": 0.43663633665588103, "grad_norm": 0.1847027838230133, "learning_rate": 4.563370902010178e-06, "loss": 0.9279, "step": 60320 }, { "epoch": 0.43670872331646726, "grad_norm": 0.18758545815944672, "learning_rate": 4.5632985153495915e-06, "loss": 0.9381, "step": 60330 }, { "epoch": 0.43678110997705344, "grad_norm": 0.16631188988685608, "learning_rate": 4.563226128689006e-06, "loss": 0.933, "step": 60340 }, { "epoch": 0.4368534966376396, "grad_norm": 0.1626015156507492, "learning_rate": 4.5631537420284196e-06, "loss": 0.9305, "step": 60350 }, { "epoch": 0.4369258832982258, "grad_norm": 0.16860458254814148, "learning_rate": 4.563081355367833e-06, "loss": 0.9341, "step": 60360 }, { "epoch": 0.43699826995881197, "grad_norm": 0.15493610501289368, "learning_rate": 4.563008968707247e-06, "loss": 0.9321, "step": 60370 }, { "epoch": 0.4370706566193982, "grad_norm": 0.16478878259658813, "learning_rate": 4.562936582046661e-06, "loss": 0.9218, "step": 60380 }, { "epoch": 0.4371430432799844, "grad_norm": 0.16642285883426666, "learning_rate": 4.562864195386075e-06, "loss": 0.9375, "step": 60390 }, { "epoch": 0.43721542994057055, "grad_norm": 0.1687694787979126, "learning_rate": 4.5627918087254885e-06, "loss": 0.922, "step": 60400 }, { "epoch": 0.43728781660115673, "grad_norm": 0.15206386148929596, "learning_rate": 4.562719422064902e-06, "loss": 0.9289, "step": 60410 }, { "epoch": 0.4373602032617429, "grad_norm": 0.1521196812391281, "learning_rate": 4.5626470354043166e-06, "loss": 0.9391, "step": 60420 }, { "epoch": 0.43743258992232914, "grad_norm": 0.1528043895959854, "learning_rate": 4.56257464874373e-06, "loss": 0.9537, "step": 60430 }, { "epoch": 0.4375049765829153, "grad_norm": 0.16489632427692413, "learning_rate": 4.562502262083144e-06, "loss": 0.9462, "step": 60440 }, { "epoch": 0.4375773632435015, "grad_norm": 0.1611616611480713, "learning_rate": 4.562429875422557e-06, "loss": 0.94, "step": 60450 }, { "epoch": 0.43764974990408767, "grad_norm": 0.1894521713256836, "learning_rate": 4.562357488761972e-06, "loss": 0.937, "step": 60460 }, { "epoch": 0.43772213656467385, "grad_norm": 0.1543973833322525, "learning_rate": 4.5622851021013855e-06, "loss": 0.9228, "step": 60470 }, { "epoch": 0.43779452322526, "grad_norm": 0.15751250088214874, "learning_rate": 4.562212715440799e-06, "loss": 0.9269, "step": 60480 }, { "epoch": 0.43786690988584626, "grad_norm": 0.15188553929328918, "learning_rate": 4.562140328780213e-06, "loss": 0.9333, "step": 60490 }, { "epoch": 0.43793929654643243, "grad_norm": 0.1614580750465393, "learning_rate": 4.562067942119627e-06, "loss": 0.9432, "step": 60500 }, { "epoch": 0.4380116832070186, "grad_norm": 0.18022243678569794, "learning_rate": 4.561995555459041e-06, "loss": 0.9232, "step": 60510 }, { "epoch": 0.4380840698676048, "grad_norm": 0.16363121569156647, "learning_rate": 4.561923168798454e-06, "loss": 0.9374, "step": 60520 }, { "epoch": 0.43815645652819096, "grad_norm": 0.16248828172683716, "learning_rate": 4.561850782137868e-06, "loss": 0.9284, "step": 60530 }, { "epoch": 0.4382288431887772, "grad_norm": 0.20947439968585968, "learning_rate": 4.5617783954772825e-06, "loss": 0.9374, "step": 60540 }, { "epoch": 0.43830122984936337, "grad_norm": 0.1605253666639328, "learning_rate": 4.561706008816696e-06, "loss": 0.9469, "step": 60550 }, { "epoch": 0.43837361650994955, "grad_norm": 0.1608964055776596, "learning_rate": 4.56163362215611e-06, "loss": 0.9317, "step": 60560 }, { "epoch": 0.4384460031705357, "grad_norm": 0.18973439931869507, "learning_rate": 4.561561235495523e-06, "loss": 0.9383, "step": 60570 }, { "epoch": 0.4385183898311219, "grad_norm": 0.16821689903736115, "learning_rate": 4.561488848834937e-06, "loss": 0.9244, "step": 60580 }, { "epoch": 0.43859077649170813, "grad_norm": 0.16728858649730682, "learning_rate": 4.561416462174351e-06, "loss": 0.9172, "step": 60590 }, { "epoch": 0.4386631631522943, "grad_norm": 0.15404008328914642, "learning_rate": 4.561344075513764e-06, "loss": 0.9352, "step": 60600 }, { "epoch": 0.4387355498128805, "grad_norm": 0.20690277218818665, "learning_rate": 4.561271688853179e-06, "loss": 0.9347, "step": 60610 }, { "epoch": 0.43880793647346666, "grad_norm": 0.16291332244873047, "learning_rate": 4.561199302192592e-06, "loss": 0.9295, "step": 60620 }, { "epoch": 0.43888032313405284, "grad_norm": 0.18108299374580383, "learning_rate": 4.561126915532006e-06, "loss": 0.9339, "step": 60630 }, { "epoch": 0.438952709794639, "grad_norm": 0.15784204006195068, "learning_rate": 4.5610545288714195e-06, "loss": 0.9267, "step": 60640 }, { "epoch": 0.43902509645522525, "grad_norm": 0.15953408181667328, "learning_rate": 4.560982142210834e-06, "loss": 0.9392, "step": 60650 }, { "epoch": 0.4390974831158114, "grad_norm": 0.14857307076454163, "learning_rate": 4.560909755550248e-06, "loss": 0.9322, "step": 60660 }, { "epoch": 0.4391698697763976, "grad_norm": 0.16134792566299438, "learning_rate": 4.560837368889661e-06, "loss": 0.9428, "step": 60670 }, { "epoch": 0.4392422564369838, "grad_norm": 0.15805503726005554, "learning_rate": 4.560764982229075e-06, "loss": 0.9246, "step": 60680 }, { "epoch": 0.43931464309756996, "grad_norm": 0.16385342180728912, "learning_rate": 4.560692595568489e-06, "loss": 0.9318, "step": 60690 }, { "epoch": 0.4393870297581562, "grad_norm": 0.16679242253303528, "learning_rate": 4.560620208907903e-06, "loss": 0.915, "step": 60700 }, { "epoch": 0.43945941641874237, "grad_norm": 0.15610092878341675, "learning_rate": 4.5605478222473165e-06, "loss": 0.926, "step": 60710 }, { "epoch": 0.43953180307932854, "grad_norm": 0.16888998448848724, "learning_rate": 4.56047543558673e-06, "loss": 0.9093, "step": 60720 }, { "epoch": 0.4396041897399147, "grad_norm": 0.31106677651405334, "learning_rate": 4.560403048926144e-06, "loss": 0.9279, "step": 60730 }, { "epoch": 0.4396765764005009, "grad_norm": 0.16773858666419983, "learning_rate": 4.560330662265558e-06, "loss": 0.9385, "step": 60740 }, { "epoch": 0.43974896306108713, "grad_norm": 0.1473310887813568, "learning_rate": 4.560258275604972e-06, "loss": 0.9347, "step": 60750 }, { "epoch": 0.4398213497216733, "grad_norm": 0.17835259437561035, "learning_rate": 4.5601858889443854e-06, "loss": 0.9369, "step": 60760 }, { "epoch": 0.4398937363822595, "grad_norm": 0.15652461349964142, "learning_rate": 4.560113502283799e-06, "loss": 0.9309, "step": 60770 }, { "epoch": 0.43996612304284566, "grad_norm": 0.15282343327999115, "learning_rate": 4.5600411156232135e-06, "loss": 0.927, "step": 60780 }, { "epoch": 0.44003850970343183, "grad_norm": 0.16254651546478271, "learning_rate": 4.559968728962627e-06, "loss": 0.9371, "step": 60790 }, { "epoch": 0.440110896364018, "grad_norm": 0.16506025195121765, "learning_rate": 4.559896342302041e-06, "loss": 0.9312, "step": 60800 }, { "epoch": 0.44018328302460424, "grad_norm": 0.16597755253314972, "learning_rate": 4.559823955641454e-06, "loss": 0.9336, "step": 60810 }, { "epoch": 0.4402556696851904, "grad_norm": 0.15858708322048187, "learning_rate": 4.559751568980869e-06, "loss": 0.9463, "step": 60820 }, { "epoch": 0.4403280563457766, "grad_norm": 0.16279590129852295, "learning_rate": 4.5596791823202824e-06, "loss": 0.9377, "step": 60830 }, { "epoch": 0.4404004430063628, "grad_norm": 0.16087251901626587, "learning_rate": 4.559606795659696e-06, "loss": 0.9271, "step": 60840 }, { "epoch": 0.44047282966694895, "grad_norm": 0.17947107553482056, "learning_rate": 4.55953440899911e-06, "loss": 0.9367, "step": 60850 }, { "epoch": 0.4405452163275352, "grad_norm": 0.17072343826293945, "learning_rate": 4.559462022338524e-06, "loss": 0.9294, "step": 60860 }, { "epoch": 0.44061760298812136, "grad_norm": 0.16467486321926117, "learning_rate": 4.559389635677938e-06, "loss": 0.9368, "step": 60870 }, { "epoch": 0.44068998964870754, "grad_norm": 0.1682046353816986, "learning_rate": 4.559317249017351e-06, "loss": 0.9272, "step": 60880 }, { "epoch": 0.4407623763092937, "grad_norm": 0.16743090748786926, "learning_rate": 4.559244862356765e-06, "loss": 0.9356, "step": 60890 }, { "epoch": 0.4408347629698799, "grad_norm": 0.16245803236961365, "learning_rate": 4.5591724756961795e-06, "loss": 0.9326, "step": 60900 }, { "epoch": 0.4409071496304661, "grad_norm": 0.1696007251739502, "learning_rate": 4.559100089035593e-06, "loss": 0.9131, "step": 60910 }, { "epoch": 0.4409795362910523, "grad_norm": 0.16754589974880219, "learning_rate": 4.559027702375007e-06, "loss": 0.9268, "step": 60920 }, { "epoch": 0.4410519229516385, "grad_norm": 0.17982327938079834, "learning_rate": 4.55895531571442e-06, "loss": 0.9366, "step": 60930 }, { "epoch": 0.44112430961222465, "grad_norm": 0.15991713106632233, "learning_rate": 4.558882929053835e-06, "loss": 0.9275, "step": 60940 }, { "epoch": 0.44119669627281083, "grad_norm": 0.16579997539520264, "learning_rate": 4.558810542393248e-06, "loss": 0.935, "step": 60950 }, { "epoch": 0.44126908293339706, "grad_norm": 0.1842290610074997, "learning_rate": 4.558738155732662e-06, "loss": 0.9342, "step": 60960 }, { "epoch": 0.44134146959398324, "grad_norm": 0.1753285974264145, "learning_rate": 4.558665769072076e-06, "loss": 0.9382, "step": 60970 }, { "epoch": 0.4414138562545694, "grad_norm": 0.1587180495262146, "learning_rate": 4.55859338241149e-06, "loss": 0.9397, "step": 60980 }, { "epoch": 0.4414862429151556, "grad_norm": 0.2102600336074829, "learning_rate": 4.558520995750904e-06, "loss": 0.9305, "step": 60990 }, { "epoch": 0.44155862957574177, "grad_norm": 0.1546296626329422, "learning_rate": 4.558448609090317e-06, "loss": 0.9228, "step": 61000 }, { "epoch": 0.44163101623632794, "grad_norm": 0.16108326613903046, "learning_rate": 4.558376222429731e-06, "loss": 0.94, "step": 61010 }, { "epoch": 0.4417034028969142, "grad_norm": 0.17151787877082825, "learning_rate": 4.558303835769145e-06, "loss": 0.9354, "step": 61020 }, { "epoch": 0.44177578955750035, "grad_norm": 0.17950987815856934, "learning_rate": 4.558231449108559e-06, "loss": 0.9282, "step": 61030 }, { "epoch": 0.44184817621808653, "grad_norm": 0.16002346575260162, "learning_rate": 4.558159062447973e-06, "loss": 0.9411, "step": 61040 }, { "epoch": 0.4419205628786727, "grad_norm": 0.15132936835289001, "learning_rate": 4.558086675787386e-06, "loss": 0.9273, "step": 61050 }, { "epoch": 0.4419929495392589, "grad_norm": 0.19634242355823517, "learning_rate": 4.558014289126801e-06, "loss": 0.9273, "step": 61060 }, { "epoch": 0.4420653361998451, "grad_norm": 0.17866051197052002, "learning_rate": 4.557941902466214e-06, "loss": 0.9362, "step": 61070 }, { "epoch": 0.4421377228604313, "grad_norm": 0.16125331819057465, "learning_rate": 4.557869515805628e-06, "loss": 0.9297, "step": 61080 }, { "epoch": 0.44221010952101747, "grad_norm": 0.16221415996551514, "learning_rate": 4.5577971291450416e-06, "loss": 0.925, "step": 61090 }, { "epoch": 0.44228249618160365, "grad_norm": 0.1527089923620224, "learning_rate": 4.557724742484455e-06, "loss": 0.9194, "step": 61100 }, { "epoch": 0.4423548828421898, "grad_norm": 0.17006568610668182, "learning_rate": 4.557652355823869e-06, "loss": 0.9305, "step": 61110 }, { "epoch": 0.44242726950277605, "grad_norm": 0.1684480905532837, "learning_rate": 4.557579969163282e-06, "loss": 0.9277, "step": 61120 }, { "epoch": 0.44249965616336223, "grad_norm": 0.16048870980739594, "learning_rate": 4.557507582502697e-06, "loss": 0.9333, "step": 61130 }, { "epoch": 0.4425720428239484, "grad_norm": 0.16578523814678192, "learning_rate": 4.5574351958421105e-06, "loss": 0.9288, "step": 61140 }, { "epoch": 0.4426444294845346, "grad_norm": 0.16607344150543213, "learning_rate": 4.557362809181524e-06, "loss": 0.9125, "step": 61150 }, { "epoch": 0.44271681614512076, "grad_norm": 0.1659523993730545, "learning_rate": 4.557290422520938e-06, "loss": 0.9324, "step": 61160 }, { "epoch": 0.44278920280570694, "grad_norm": 0.17935402691364288, "learning_rate": 4.557218035860352e-06, "loss": 0.9362, "step": 61170 }, { "epoch": 0.44286158946629317, "grad_norm": 0.16320429742336273, "learning_rate": 4.557145649199766e-06, "loss": 0.9305, "step": 61180 }, { "epoch": 0.44293397612687935, "grad_norm": 0.15417033433914185, "learning_rate": 4.557073262539179e-06, "loss": 0.9378, "step": 61190 }, { "epoch": 0.4430063627874655, "grad_norm": 0.14864222705364227, "learning_rate": 4.557000875878593e-06, "loss": 0.9401, "step": 61200 }, { "epoch": 0.4430787494480517, "grad_norm": 0.189174622297287, "learning_rate": 4.5569284892180075e-06, "loss": 0.9357, "step": 61210 }, { "epoch": 0.4431511361086379, "grad_norm": 0.15933531522750854, "learning_rate": 4.556856102557421e-06, "loss": 0.9296, "step": 61220 }, { "epoch": 0.4432235227692241, "grad_norm": 0.15585920214653015, "learning_rate": 4.556783715896835e-06, "loss": 0.9277, "step": 61230 }, { "epoch": 0.4432959094298103, "grad_norm": 0.17847231030464172, "learning_rate": 4.556711329236248e-06, "loss": 0.9382, "step": 61240 }, { "epoch": 0.44336829609039646, "grad_norm": 0.15778236091136932, "learning_rate": 4.556638942575663e-06, "loss": 0.9191, "step": 61250 }, { "epoch": 0.44344068275098264, "grad_norm": 0.15541084110736847, "learning_rate": 4.556566555915076e-06, "loss": 0.9389, "step": 61260 }, { "epoch": 0.4435130694115688, "grad_norm": 0.17525739967823029, "learning_rate": 4.55649416925449e-06, "loss": 0.9447, "step": 61270 }, { "epoch": 0.44358545607215505, "grad_norm": 0.15737952291965485, "learning_rate": 4.556421782593904e-06, "loss": 0.9284, "step": 61280 }, { "epoch": 0.4436578427327412, "grad_norm": 0.16541706025600433, "learning_rate": 4.556349395933318e-06, "loss": 0.9284, "step": 61290 }, { "epoch": 0.4437302293933274, "grad_norm": 0.1597539633512497, "learning_rate": 4.556277009272732e-06, "loss": 0.9297, "step": 61300 }, { "epoch": 0.4438026160539136, "grad_norm": 0.15804599225521088, "learning_rate": 4.556204622612145e-06, "loss": 0.9233, "step": 61310 }, { "epoch": 0.44387500271449976, "grad_norm": 0.1523711085319519, "learning_rate": 4.556132235951559e-06, "loss": 0.9259, "step": 61320 }, { "epoch": 0.44394738937508593, "grad_norm": 0.18194106221199036, "learning_rate": 4.556059849290973e-06, "loss": 0.937, "step": 61330 }, { "epoch": 0.44401977603567216, "grad_norm": 0.15899352729320526, "learning_rate": 4.555987462630387e-06, "loss": 0.9282, "step": 61340 }, { "epoch": 0.44409216269625834, "grad_norm": 0.14628228545188904, "learning_rate": 4.555915075969801e-06, "loss": 0.9209, "step": 61350 }, { "epoch": 0.4441645493568445, "grad_norm": 0.15761525928974152, "learning_rate": 4.555842689309214e-06, "loss": 0.9334, "step": 61360 }, { "epoch": 0.4442369360174307, "grad_norm": 0.1836678832769394, "learning_rate": 4.555770302648628e-06, "loss": 0.9142, "step": 61370 }, { "epoch": 0.44430932267801687, "grad_norm": 0.1548013538122177, "learning_rate": 4.555697915988042e-06, "loss": 0.9247, "step": 61380 }, { "epoch": 0.4443817093386031, "grad_norm": 0.16135182976722717, "learning_rate": 4.555625529327456e-06, "loss": 0.9311, "step": 61390 }, { "epoch": 0.4444540959991893, "grad_norm": 0.1575031578540802, "learning_rate": 4.55555314266687e-06, "loss": 0.9356, "step": 61400 }, { "epoch": 0.44452648265977546, "grad_norm": 0.170990988612175, "learning_rate": 4.555480756006283e-06, "loss": 0.9471, "step": 61410 }, { "epoch": 0.44459886932036163, "grad_norm": 0.157973051071167, "learning_rate": 4.555408369345698e-06, "loss": 0.9324, "step": 61420 }, { "epoch": 0.4446712559809478, "grad_norm": 0.15711332857608795, "learning_rate": 4.555335982685111e-06, "loss": 0.9344, "step": 61430 }, { "epoch": 0.44474364264153404, "grad_norm": 0.16744282841682434, "learning_rate": 4.555263596024525e-06, "loss": 0.9255, "step": 61440 }, { "epoch": 0.4448160293021202, "grad_norm": 0.16048872470855713, "learning_rate": 4.5551912093639385e-06, "loss": 0.9338, "step": 61450 }, { "epoch": 0.4448884159627064, "grad_norm": 0.17134064435958862, "learning_rate": 4.555118822703353e-06, "loss": 0.9264, "step": 61460 }, { "epoch": 0.4449608026232926, "grad_norm": 0.15895238518714905, "learning_rate": 4.555046436042767e-06, "loss": 0.9251, "step": 61470 }, { "epoch": 0.44503318928387875, "grad_norm": 0.16355475783348083, "learning_rate": 4.55497404938218e-06, "loss": 0.9235, "step": 61480 }, { "epoch": 0.445105575944465, "grad_norm": 0.15274769067764282, "learning_rate": 4.554901662721594e-06, "loss": 0.9244, "step": 61490 }, { "epoch": 0.44517796260505116, "grad_norm": 0.24952305853366852, "learning_rate": 4.554829276061008e-06, "loss": 0.9222, "step": 61500 }, { "epoch": 0.44525034926563734, "grad_norm": 0.17668789625167847, "learning_rate": 4.554756889400422e-06, "loss": 0.9155, "step": 61510 }, { "epoch": 0.4453227359262235, "grad_norm": 0.22048163414001465, "learning_rate": 4.5546845027398355e-06, "loss": 0.921, "step": 61520 }, { "epoch": 0.4453951225868097, "grad_norm": 0.1538519263267517, "learning_rate": 4.554612116079249e-06, "loss": 0.9302, "step": 61530 }, { "epoch": 0.44546750924739587, "grad_norm": 0.1744624376296997, "learning_rate": 4.554539729418664e-06, "loss": 0.9266, "step": 61540 }, { "epoch": 0.4455398959079821, "grad_norm": 0.17444263398647308, "learning_rate": 4.554467342758077e-06, "loss": 0.9306, "step": 61550 }, { "epoch": 0.4456122825685683, "grad_norm": 0.16291213035583496, "learning_rate": 4.554394956097491e-06, "loss": 0.9388, "step": 61560 }, { "epoch": 0.44568466922915445, "grad_norm": 0.16425669193267822, "learning_rate": 4.5543225694369044e-06, "loss": 0.9165, "step": 61570 }, { "epoch": 0.4457570558897406, "grad_norm": 0.16991227865219116, "learning_rate": 4.554250182776319e-06, "loss": 0.9208, "step": 61580 }, { "epoch": 0.4458294425503268, "grad_norm": 0.17110396921634674, "learning_rate": 4.5541777961157325e-06, "loss": 0.9288, "step": 61590 }, { "epoch": 0.44590182921091304, "grad_norm": 0.17774710059165955, "learning_rate": 4.554105409455146e-06, "loss": 0.9334, "step": 61600 }, { "epoch": 0.4459742158714992, "grad_norm": 0.18292616307735443, "learning_rate": 4.55403302279456e-06, "loss": 0.9189, "step": 61610 }, { "epoch": 0.4460466025320854, "grad_norm": 0.15850049257278442, "learning_rate": 4.553960636133974e-06, "loss": 0.9296, "step": 61620 }, { "epoch": 0.44611898919267157, "grad_norm": 0.18073786795139313, "learning_rate": 4.553888249473387e-06, "loss": 0.9159, "step": 61630 }, { "epoch": 0.44619137585325774, "grad_norm": 0.15304653346538544, "learning_rate": 4.553815862812801e-06, "loss": 0.9312, "step": 61640 }, { "epoch": 0.446263762513844, "grad_norm": 0.18145766854286194, "learning_rate": 4.553743476152215e-06, "loss": 0.9203, "step": 61650 }, { "epoch": 0.44633614917443015, "grad_norm": 0.16524021327495575, "learning_rate": 4.553671089491629e-06, "loss": 0.9272, "step": 61660 }, { "epoch": 0.44640853583501633, "grad_norm": 0.17342203855514526, "learning_rate": 4.553598702831042e-06, "loss": 0.9182, "step": 61670 }, { "epoch": 0.4464809224956025, "grad_norm": 0.165330708026886, "learning_rate": 4.553526316170456e-06, "loss": 0.9312, "step": 61680 }, { "epoch": 0.4465533091561887, "grad_norm": 0.166887104511261, "learning_rate": 4.55345392950987e-06, "loss": 0.9259, "step": 61690 }, { "epoch": 0.44662569581677486, "grad_norm": 0.16618217527866364, "learning_rate": 4.553381542849284e-06, "loss": 0.9182, "step": 61700 }, { "epoch": 0.4466980824773611, "grad_norm": 0.15841081738471985, "learning_rate": 4.553309156188698e-06, "loss": 0.9348, "step": 61710 }, { "epoch": 0.44677046913794727, "grad_norm": 0.15774931013584137, "learning_rate": 4.553236769528111e-06, "loss": 0.9285, "step": 61720 }, { "epoch": 0.44684285579853344, "grad_norm": 0.16773931682109833, "learning_rate": 4.553164382867526e-06, "loss": 0.9249, "step": 61730 }, { "epoch": 0.4469152424591196, "grad_norm": 0.16781432926654816, "learning_rate": 4.553091996206939e-06, "loss": 0.9372, "step": 61740 }, { "epoch": 0.4469876291197058, "grad_norm": 0.15466833114624023, "learning_rate": 4.553019609546353e-06, "loss": 0.9349, "step": 61750 }, { "epoch": 0.44706001578029203, "grad_norm": 0.1791720688343048, "learning_rate": 4.5529472228857665e-06, "loss": 0.9285, "step": 61760 }, { "epoch": 0.4471324024408782, "grad_norm": 0.15556438267230988, "learning_rate": 4.552874836225181e-06, "loss": 0.9269, "step": 61770 }, { "epoch": 0.4472047891014644, "grad_norm": 0.1610628068447113, "learning_rate": 4.552802449564595e-06, "loss": 0.919, "step": 61780 }, { "epoch": 0.44727717576205056, "grad_norm": 0.16492611169815063, "learning_rate": 4.552730062904008e-06, "loss": 0.9167, "step": 61790 }, { "epoch": 0.44734956242263674, "grad_norm": 0.15720947086811066, "learning_rate": 4.552657676243422e-06, "loss": 0.9226, "step": 61800 }, { "epoch": 0.44742194908322297, "grad_norm": 0.1607959121465683, "learning_rate": 4.552585289582836e-06, "loss": 0.9386, "step": 61810 }, { "epoch": 0.44749433574380915, "grad_norm": 0.19096055626869202, "learning_rate": 4.55251290292225e-06, "loss": 0.9261, "step": 61820 }, { "epoch": 0.4475667224043953, "grad_norm": 0.1570555865764618, "learning_rate": 4.5524405162616636e-06, "loss": 0.9382, "step": 61830 }, { "epoch": 0.4476391090649815, "grad_norm": 0.16010279953479767, "learning_rate": 4.552368129601077e-06, "loss": 0.9222, "step": 61840 }, { "epoch": 0.4477114957255677, "grad_norm": 0.1684756577014923, "learning_rate": 4.552295742940492e-06, "loss": 0.9308, "step": 61850 }, { "epoch": 0.44778388238615385, "grad_norm": 0.15752823650836945, "learning_rate": 4.552223356279905e-06, "loss": 0.9248, "step": 61860 }, { "epoch": 0.4478562690467401, "grad_norm": 0.15489928424358368, "learning_rate": 4.552150969619319e-06, "loss": 0.9236, "step": 61870 }, { "epoch": 0.44792865570732626, "grad_norm": 0.1638929694890976, "learning_rate": 4.5520785829587325e-06, "loss": 0.9329, "step": 61880 }, { "epoch": 0.44800104236791244, "grad_norm": 0.15785187482833862, "learning_rate": 4.552006196298147e-06, "loss": 0.9346, "step": 61890 }, { "epoch": 0.4480734290284986, "grad_norm": 0.16453564167022705, "learning_rate": 4.5519338096375606e-06, "loss": 0.9417, "step": 61900 }, { "epoch": 0.4481458156890848, "grad_norm": 0.15828080475330353, "learning_rate": 4.551861422976974e-06, "loss": 0.9302, "step": 61910 }, { "epoch": 0.448218202349671, "grad_norm": 0.1843886375427246, "learning_rate": 4.551789036316388e-06, "loss": 0.9294, "step": 61920 }, { "epoch": 0.4482905890102572, "grad_norm": 0.17021195590496063, "learning_rate": 4.551716649655802e-06, "loss": 0.9208, "step": 61930 }, { "epoch": 0.4483629756708434, "grad_norm": 0.16324369609355927, "learning_rate": 4.551644262995216e-06, "loss": 0.9285, "step": 61940 }, { "epoch": 0.44843536233142955, "grad_norm": 0.16327841579914093, "learning_rate": 4.5515718763346295e-06, "loss": 0.9336, "step": 61950 }, { "epoch": 0.44850774899201573, "grad_norm": 0.16559500992298126, "learning_rate": 4.551499489674043e-06, "loss": 0.9416, "step": 61960 }, { "epoch": 0.44858013565260196, "grad_norm": 0.16372480988502502, "learning_rate": 4.551427103013457e-06, "loss": 0.9315, "step": 61970 }, { "epoch": 0.44865252231318814, "grad_norm": 0.16200987994670868, "learning_rate": 4.551354716352871e-06, "loss": 0.9153, "step": 61980 }, { "epoch": 0.4487249089737743, "grad_norm": 0.17236877977848053, "learning_rate": 4.551282329692285e-06, "loss": 0.9357, "step": 61990 }, { "epoch": 0.4487972956343605, "grad_norm": 0.22826656699180603, "learning_rate": 4.551209943031698e-06, "loss": 0.9265, "step": 62000 }, { "epoch": 0.44886968229494667, "grad_norm": 0.1597038060426712, "learning_rate": 4.551137556371112e-06, "loss": 0.9292, "step": 62010 }, { "epoch": 0.44894206895553285, "grad_norm": 0.15479369461536407, "learning_rate": 4.5510651697105265e-06, "loss": 0.9452, "step": 62020 }, { "epoch": 0.4490144556161191, "grad_norm": 0.1617387980222702, "learning_rate": 4.55099278304994e-06, "loss": 0.9311, "step": 62030 }, { "epoch": 0.44908684227670526, "grad_norm": 0.17810650169849396, "learning_rate": 4.550920396389354e-06, "loss": 0.9337, "step": 62040 }, { "epoch": 0.44915922893729143, "grad_norm": 0.15863506495952606, "learning_rate": 4.550848009728767e-06, "loss": 0.9391, "step": 62050 }, { "epoch": 0.4492316155978776, "grad_norm": 0.18483230471611023, "learning_rate": 4.550775623068182e-06, "loss": 0.924, "step": 62060 }, { "epoch": 0.4493040022584638, "grad_norm": 0.15691833198070526, "learning_rate": 4.550703236407595e-06, "loss": 0.9286, "step": 62070 }, { "epoch": 0.44937638891905, "grad_norm": 0.15761037170886993, "learning_rate": 4.550630849747009e-06, "loss": 0.9414, "step": 62080 }, { "epoch": 0.4494487755796362, "grad_norm": 0.15282666683197021, "learning_rate": 4.550558463086423e-06, "loss": 0.931, "step": 62090 }, { "epoch": 0.44952116224022237, "grad_norm": 0.1992768943309784, "learning_rate": 4.550486076425837e-06, "loss": 0.938, "step": 62100 }, { "epoch": 0.44959354890080855, "grad_norm": 0.15177763998508453, "learning_rate": 4.550413689765251e-06, "loss": 0.9436, "step": 62110 }, { "epoch": 0.4496659355613947, "grad_norm": 0.16348634660243988, "learning_rate": 4.550341303104664e-06, "loss": 0.9394, "step": 62120 }, { "epoch": 0.44973832222198096, "grad_norm": 0.18338032066822052, "learning_rate": 4.550268916444078e-06, "loss": 0.9319, "step": 62130 }, { "epoch": 0.44981070888256713, "grad_norm": 0.22670073807239532, "learning_rate": 4.5501965297834924e-06, "loss": 0.936, "step": 62140 }, { "epoch": 0.4498830955431533, "grad_norm": 0.17168883979320526, "learning_rate": 4.550124143122906e-06, "loss": 0.9335, "step": 62150 }, { "epoch": 0.4499554822037395, "grad_norm": 0.16021141409873962, "learning_rate": 4.550051756462319e-06, "loss": 0.9373, "step": 62160 }, { "epoch": 0.45002786886432566, "grad_norm": 0.1671023815870285, "learning_rate": 4.549979369801733e-06, "loss": 0.927, "step": 62170 }, { "epoch": 0.4501002555249119, "grad_norm": 0.15953001379966736, "learning_rate": 4.549906983141147e-06, "loss": 0.9296, "step": 62180 }, { "epoch": 0.4501726421854981, "grad_norm": 0.19120389223098755, "learning_rate": 4.5498345964805605e-06, "loss": 0.9239, "step": 62190 }, { "epoch": 0.45024502884608425, "grad_norm": 0.1692996323108673, "learning_rate": 4.549762209819974e-06, "loss": 0.9201, "step": 62200 }, { "epoch": 0.4503174155066704, "grad_norm": 0.15161260962486267, "learning_rate": 4.549689823159389e-06, "loss": 0.933, "step": 62210 }, { "epoch": 0.4503898021672566, "grad_norm": 0.1791296750307083, "learning_rate": 4.549617436498802e-06, "loss": 0.9206, "step": 62220 }, { "epoch": 0.4504621888278428, "grad_norm": 0.16728129982948303, "learning_rate": 4.549545049838216e-06, "loss": 0.9243, "step": 62230 }, { "epoch": 0.450534575488429, "grad_norm": 0.15448437631130219, "learning_rate": 4.5494726631776294e-06, "loss": 0.922, "step": 62240 }, { "epoch": 0.4506069621490152, "grad_norm": 0.17442266643047333, "learning_rate": 4.549400276517044e-06, "loss": 0.9266, "step": 62250 }, { "epoch": 0.45067934880960137, "grad_norm": 0.16572901606559753, "learning_rate": 4.5493278898564575e-06, "loss": 0.9262, "step": 62260 }, { "epoch": 0.45075173547018754, "grad_norm": 0.19222629070281982, "learning_rate": 4.549255503195871e-06, "loss": 0.9271, "step": 62270 }, { "epoch": 0.4508241221307737, "grad_norm": 0.180272176861763, "learning_rate": 4.549183116535285e-06, "loss": 0.9252, "step": 62280 }, { "epoch": 0.45089650879135995, "grad_norm": 0.1528458446264267, "learning_rate": 4.549110729874699e-06, "loss": 0.916, "step": 62290 }, { "epoch": 0.45096889545194613, "grad_norm": 0.16604572534561157, "learning_rate": 4.549038343214113e-06, "loss": 0.9209, "step": 62300 }, { "epoch": 0.4510412821125323, "grad_norm": 0.1774618774652481, "learning_rate": 4.5489659565535264e-06, "loss": 0.9266, "step": 62310 }, { "epoch": 0.4511136687731185, "grad_norm": 0.16191911697387695, "learning_rate": 4.54889356989294e-06, "loss": 0.9276, "step": 62320 }, { "epoch": 0.45118605543370466, "grad_norm": 0.18299001455307007, "learning_rate": 4.5488211832323545e-06, "loss": 0.9334, "step": 62330 }, { "epoch": 0.4512584420942909, "grad_norm": 0.14922909438610077, "learning_rate": 4.548748796571768e-06, "loss": 0.9232, "step": 62340 }, { "epoch": 0.45133082875487707, "grad_norm": 0.14895036816596985, "learning_rate": 4.548676409911182e-06, "loss": 0.9299, "step": 62350 }, { "epoch": 0.45140321541546324, "grad_norm": 0.15128493309020996, "learning_rate": 4.548604023250595e-06, "loss": 0.9409, "step": 62360 }, { "epoch": 0.4514756020760494, "grad_norm": 0.1607513725757599, "learning_rate": 4.54853163659001e-06, "loss": 0.9143, "step": 62370 }, { "epoch": 0.4515479887366356, "grad_norm": 0.1618734747171402, "learning_rate": 4.5484592499294235e-06, "loss": 0.9249, "step": 62380 }, { "epoch": 0.4516203753972218, "grad_norm": 0.18562345206737518, "learning_rate": 4.548386863268837e-06, "loss": 0.9292, "step": 62390 }, { "epoch": 0.451692762057808, "grad_norm": 0.17333509027957916, "learning_rate": 4.548314476608251e-06, "loss": 0.929, "step": 62400 }, { "epoch": 0.4517651487183942, "grad_norm": 0.1685020625591278, "learning_rate": 4.548242089947665e-06, "loss": 0.9247, "step": 62410 }, { "epoch": 0.45183753537898036, "grad_norm": 0.16872678697109222, "learning_rate": 4.548169703287079e-06, "loss": 0.9225, "step": 62420 }, { "epoch": 0.45190992203956654, "grad_norm": 0.15158583223819733, "learning_rate": 4.548097316626492e-06, "loss": 0.9277, "step": 62430 }, { "epoch": 0.4519823087001527, "grad_norm": 0.18446789681911469, "learning_rate": 4.548024929965906e-06, "loss": 0.9227, "step": 62440 }, { "epoch": 0.45205469536073895, "grad_norm": 0.17495040595531464, "learning_rate": 4.5479525433053205e-06, "loss": 0.9362, "step": 62450 }, { "epoch": 0.4521270820213251, "grad_norm": 0.25016602873802185, "learning_rate": 4.547880156644734e-06, "loss": 0.9354, "step": 62460 }, { "epoch": 0.4521994686819113, "grad_norm": 0.1564486026763916, "learning_rate": 4.547807769984148e-06, "loss": 0.9244, "step": 62470 }, { "epoch": 0.4522718553424975, "grad_norm": 0.1539490669965744, "learning_rate": 4.547735383323561e-06, "loss": 0.9209, "step": 62480 }, { "epoch": 0.45234424200308365, "grad_norm": 0.17183518409729004, "learning_rate": 4.547662996662976e-06, "loss": 0.9361, "step": 62490 }, { "epoch": 0.4524166286636699, "grad_norm": 0.17086255550384521, "learning_rate": 4.547590610002389e-06, "loss": 0.9142, "step": 62500 }, { "epoch": 0.45248901532425606, "grad_norm": 0.16200555860996246, "learning_rate": 4.547518223341803e-06, "loss": 0.942, "step": 62510 }, { "epoch": 0.45256140198484224, "grad_norm": 0.1565650850534439, "learning_rate": 4.547445836681217e-06, "loss": 0.938, "step": 62520 }, { "epoch": 0.4526337886454284, "grad_norm": 0.17040525376796722, "learning_rate": 4.547373450020631e-06, "loss": 0.9279, "step": 62530 }, { "epoch": 0.4527061753060146, "grad_norm": 0.17260707914829254, "learning_rate": 4.547301063360045e-06, "loss": 0.9246, "step": 62540 }, { "epoch": 0.45277856196660077, "grad_norm": 0.16091448068618774, "learning_rate": 4.547228676699458e-06, "loss": 0.9301, "step": 62550 }, { "epoch": 0.452850948627187, "grad_norm": 0.15594513714313507, "learning_rate": 4.547156290038872e-06, "loss": 0.9388, "step": 62560 }, { "epoch": 0.4529233352877732, "grad_norm": 0.17561206221580505, "learning_rate": 4.547083903378286e-06, "loss": 0.9311, "step": 62570 }, { "epoch": 0.45299572194835935, "grad_norm": 0.1688334196805954, "learning_rate": 4.5470115167177e-06, "loss": 0.9374, "step": 62580 }, { "epoch": 0.45306810860894553, "grad_norm": 0.26716697216033936, "learning_rate": 4.546939130057114e-06, "loss": 0.9332, "step": 62590 }, { "epoch": 0.4531404952695317, "grad_norm": 0.16558301448822021, "learning_rate": 4.546866743396527e-06, "loss": 0.9361, "step": 62600 }, { "epoch": 0.45321288193011794, "grad_norm": 0.16215580701828003, "learning_rate": 4.546794356735941e-06, "loss": 0.944, "step": 62610 }, { "epoch": 0.4532852685907041, "grad_norm": 0.17625215649604797, "learning_rate": 4.546721970075355e-06, "loss": 0.9408, "step": 62620 }, { "epoch": 0.4533576552512903, "grad_norm": 0.1480870544910431, "learning_rate": 4.546649583414769e-06, "loss": 0.929, "step": 62630 }, { "epoch": 0.45343004191187647, "grad_norm": 0.17280828952789307, "learning_rate": 4.5465771967541826e-06, "loss": 0.9177, "step": 62640 }, { "epoch": 0.45350242857246265, "grad_norm": 0.1757204234600067, "learning_rate": 4.546504810093596e-06, "loss": 0.9396, "step": 62650 }, { "epoch": 0.4535748152330489, "grad_norm": 0.16843682527542114, "learning_rate": 4.546432423433011e-06, "loss": 0.9311, "step": 62660 }, { "epoch": 0.45364720189363505, "grad_norm": 0.14993290603160858, "learning_rate": 4.546360036772424e-06, "loss": 0.9257, "step": 62670 }, { "epoch": 0.45371958855422123, "grad_norm": 0.1843591332435608, "learning_rate": 4.546287650111838e-06, "loss": 0.9305, "step": 62680 }, { "epoch": 0.4537919752148074, "grad_norm": 0.1545378565788269, "learning_rate": 4.5462152634512515e-06, "loss": 0.93, "step": 62690 }, { "epoch": 0.4538643618753936, "grad_norm": 0.1681130826473236, "learning_rate": 4.546142876790665e-06, "loss": 0.9191, "step": 62700 }, { "epoch": 0.4539367485359798, "grad_norm": 0.15913967788219452, "learning_rate": 4.546070490130079e-06, "loss": 0.9356, "step": 62710 }, { "epoch": 0.454009135196566, "grad_norm": 0.16639158129692078, "learning_rate": 4.545998103469493e-06, "loss": 0.9405, "step": 62720 }, { "epoch": 0.45408152185715217, "grad_norm": 0.1884455382823944, "learning_rate": 4.545925716808907e-06, "loss": 0.931, "step": 62730 }, { "epoch": 0.45415390851773835, "grad_norm": 0.15364967286586761, "learning_rate": 4.54585333014832e-06, "loss": 0.9278, "step": 62740 }, { "epoch": 0.4542262951783245, "grad_norm": 0.15758123993873596, "learning_rate": 4.545780943487734e-06, "loss": 0.9337, "step": 62750 }, { "epoch": 0.4542986818389107, "grad_norm": 0.16586291790008545, "learning_rate": 4.545708556827148e-06, "loss": 0.9298, "step": 62760 }, { "epoch": 0.45437106849949693, "grad_norm": 0.15221761167049408, "learning_rate": 4.545636170166562e-06, "loss": 0.9273, "step": 62770 }, { "epoch": 0.4544434551600831, "grad_norm": 0.16838103532791138, "learning_rate": 4.545563783505976e-06, "loss": 0.9418, "step": 62780 }, { "epoch": 0.4545158418206693, "grad_norm": 0.15279430150985718, "learning_rate": 4.545491396845389e-06, "loss": 0.9251, "step": 62790 }, { "epoch": 0.45458822848125546, "grad_norm": 0.161879301071167, "learning_rate": 4.545419010184803e-06, "loss": 0.9355, "step": 62800 }, { "epoch": 0.45466061514184164, "grad_norm": 0.15122537314891815, "learning_rate": 4.545346623524217e-06, "loss": 0.9282, "step": 62810 }, { "epoch": 0.45473300180242787, "grad_norm": 0.1683684140443802, "learning_rate": 4.545274236863631e-06, "loss": 0.9341, "step": 62820 }, { "epoch": 0.45480538846301405, "grad_norm": 0.16994281113147736, "learning_rate": 4.545201850203045e-06, "loss": 0.9273, "step": 62830 }, { "epoch": 0.4548777751236002, "grad_norm": 0.15986701846122742, "learning_rate": 4.545129463542458e-06, "loss": 0.9214, "step": 62840 }, { "epoch": 0.4549501617841864, "grad_norm": 0.1651432365179062, "learning_rate": 4.545057076881873e-06, "loss": 0.932, "step": 62850 }, { "epoch": 0.4550225484447726, "grad_norm": 0.1712128072977066, "learning_rate": 4.544984690221286e-06, "loss": 0.9326, "step": 62860 }, { "epoch": 0.4550949351053588, "grad_norm": 0.17609210312366486, "learning_rate": 4.5449123035607e-06, "loss": 0.938, "step": 62870 }, { "epoch": 0.455167321765945, "grad_norm": 0.17556455731391907, "learning_rate": 4.544839916900114e-06, "loss": 0.9311, "step": 62880 }, { "epoch": 0.45523970842653116, "grad_norm": 0.1542162448167801, "learning_rate": 4.544767530239528e-06, "loss": 0.9262, "step": 62890 }, { "epoch": 0.45531209508711734, "grad_norm": 0.21663711965084076, "learning_rate": 4.544695143578942e-06, "loss": 0.9301, "step": 62900 }, { "epoch": 0.4553844817477035, "grad_norm": 0.1632581651210785, "learning_rate": 4.544622756918355e-06, "loss": 0.942, "step": 62910 }, { "epoch": 0.4554568684082897, "grad_norm": 0.1683775782585144, "learning_rate": 4.544550370257769e-06, "loss": 0.9313, "step": 62920 }, { "epoch": 0.4555292550688759, "grad_norm": 0.16265662014484406, "learning_rate": 4.544477983597183e-06, "loss": 0.9255, "step": 62930 }, { "epoch": 0.4556016417294621, "grad_norm": 0.16888990998268127, "learning_rate": 4.544405596936597e-06, "loss": 0.9327, "step": 62940 }, { "epoch": 0.4556740283900483, "grad_norm": 0.1617661714553833, "learning_rate": 4.544333210276011e-06, "loss": 0.9365, "step": 62950 }, { "epoch": 0.45574641505063446, "grad_norm": 0.15065929293632507, "learning_rate": 4.544260823615424e-06, "loss": 0.9411, "step": 62960 }, { "epoch": 0.45581880171122063, "grad_norm": 0.15729419887065887, "learning_rate": 4.544188436954839e-06, "loss": 0.9305, "step": 62970 }, { "epoch": 0.45589118837180687, "grad_norm": 0.1598915308713913, "learning_rate": 4.544116050294252e-06, "loss": 0.9236, "step": 62980 }, { "epoch": 0.45596357503239304, "grad_norm": 0.1573273241519928, "learning_rate": 4.544043663633666e-06, "loss": 0.9221, "step": 62990 }, { "epoch": 0.4560359616929792, "grad_norm": 0.17503662407398224, "learning_rate": 4.5439712769730795e-06, "loss": 0.9322, "step": 63000 }, { "epoch": 0.4561083483535654, "grad_norm": 0.16443364322185516, "learning_rate": 4.543898890312494e-06, "loss": 0.9312, "step": 63010 }, { "epoch": 0.4561807350141516, "grad_norm": 0.15545983612537384, "learning_rate": 4.543826503651908e-06, "loss": 0.9267, "step": 63020 }, { "epoch": 0.4562531216747378, "grad_norm": 0.25189727544784546, "learning_rate": 4.543754116991321e-06, "loss": 0.919, "step": 63030 }, { "epoch": 0.456325508335324, "grad_norm": 0.17259828746318817, "learning_rate": 4.543681730330735e-06, "loss": 0.9431, "step": 63040 }, { "epoch": 0.45639789499591016, "grad_norm": 0.1540062129497528, "learning_rate": 4.543609343670149e-06, "loss": 0.9239, "step": 63050 }, { "epoch": 0.45647028165649634, "grad_norm": 0.16821375489234924, "learning_rate": 4.543536957009563e-06, "loss": 0.9303, "step": 63060 }, { "epoch": 0.4565426683170825, "grad_norm": 0.15353234112262726, "learning_rate": 4.5434645703489765e-06, "loss": 0.9242, "step": 63070 }, { "epoch": 0.4566150549776687, "grad_norm": 0.15900860726833344, "learning_rate": 4.54339218368839e-06, "loss": 0.9232, "step": 63080 }, { "epoch": 0.4566874416382549, "grad_norm": 0.2517249882221222, "learning_rate": 4.543319797027805e-06, "loss": 0.9415, "step": 63090 }, { "epoch": 0.4567598282988411, "grad_norm": 0.16736070811748505, "learning_rate": 4.543247410367218e-06, "loss": 0.9241, "step": 63100 }, { "epoch": 0.4568322149594273, "grad_norm": 0.1665504276752472, "learning_rate": 4.543175023706632e-06, "loss": 0.9348, "step": 63110 }, { "epoch": 0.45690460162001345, "grad_norm": 0.1781657338142395, "learning_rate": 4.5431026370460455e-06, "loss": 0.9379, "step": 63120 }, { "epoch": 0.4569769882805996, "grad_norm": 0.15879395604133606, "learning_rate": 4.54303025038546e-06, "loss": 0.9426, "step": 63130 }, { "epoch": 0.45704937494118586, "grad_norm": 0.1565670371055603, "learning_rate": 4.5429578637248735e-06, "loss": 0.9176, "step": 63140 }, { "epoch": 0.45712176160177204, "grad_norm": 0.15761685371398926, "learning_rate": 4.542885477064287e-06, "loss": 0.9228, "step": 63150 }, { "epoch": 0.4571941482623582, "grad_norm": 0.15627005696296692, "learning_rate": 4.542813090403701e-06, "loss": 0.9354, "step": 63160 }, { "epoch": 0.4572665349229444, "grad_norm": 0.15798023343086243, "learning_rate": 4.542740703743115e-06, "loss": 0.9131, "step": 63170 }, { "epoch": 0.45733892158353057, "grad_norm": 0.1624073088169098, "learning_rate": 4.542668317082529e-06, "loss": 0.927, "step": 63180 }, { "epoch": 0.4574113082441168, "grad_norm": 0.15683312714099884, "learning_rate": 4.5425959304219425e-06, "loss": 0.9402, "step": 63190 }, { "epoch": 0.457483694904703, "grad_norm": 0.18702444434165955, "learning_rate": 4.542523543761356e-06, "loss": 0.9368, "step": 63200 }, { "epoch": 0.45755608156528915, "grad_norm": 0.15603706240653992, "learning_rate": 4.54245115710077e-06, "loss": 0.9298, "step": 63210 }, { "epoch": 0.45762846822587533, "grad_norm": 0.15618003904819489, "learning_rate": 4.542378770440183e-06, "loss": 0.9393, "step": 63220 }, { "epoch": 0.4577008548864615, "grad_norm": 0.1605190932750702, "learning_rate": 4.542306383779597e-06, "loss": 0.9349, "step": 63230 }, { "epoch": 0.4577732415470477, "grad_norm": 0.16419483721256256, "learning_rate": 4.542233997119011e-06, "loss": 0.9356, "step": 63240 }, { "epoch": 0.4578456282076339, "grad_norm": 0.15912272036075592, "learning_rate": 4.542161610458425e-06, "loss": 0.932, "step": 63250 }, { "epoch": 0.4579180148682201, "grad_norm": 0.16210229694843292, "learning_rate": 4.542089223797839e-06, "loss": 0.9353, "step": 63260 }, { "epoch": 0.45799040152880627, "grad_norm": 0.16142286360263824, "learning_rate": 4.542016837137252e-06, "loss": 0.9464, "step": 63270 }, { "epoch": 0.45806278818939244, "grad_norm": 0.21197547018527985, "learning_rate": 4.541944450476667e-06, "loss": 0.9221, "step": 63280 }, { "epoch": 0.4581351748499786, "grad_norm": 0.17035934329032898, "learning_rate": 4.54187206381608e-06, "loss": 0.9299, "step": 63290 }, { "epoch": 0.45820756151056485, "grad_norm": 0.20072509348392487, "learning_rate": 4.541799677155494e-06, "loss": 0.9152, "step": 63300 }, { "epoch": 0.45827994817115103, "grad_norm": 0.1753024309873581, "learning_rate": 4.5417272904949075e-06, "loss": 0.9106, "step": 63310 }, { "epoch": 0.4583523348317372, "grad_norm": 0.16419456899166107, "learning_rate": 4.541654903834322e-06, "loss": 0.9305, "step": 63320 }, { "epoch": 0.4584247214923234, "grad_norm": 0.15261687338352203, "learning_rate": 4.541582517173736e-06, "loss": 0.9305, "step": 63330 }, { "epoch": 0.45849710815290956, "grad_norm": 0.16415435075759888, "learning_rate": 4.541510130513149e-06, "loss": 0.9342, "step": 63340 }, { "epoch": 0.4585694948134958, "grad_norm": 0.15612776577472687, "learning_rate": 4.541437743852563e-06, "loss": 0.9125, "step": 63350 }, { "epoch": 0.45864188147408197, "grad_norm": 0.1619674116373062, "learning_rate": 4.541365357191977e-06, "loss": 0.9358, "step": 63360 }, { "epoch": 0.45871426813466815, "grad_norm": 0.1574641913175583, "learning_rate": 4.541292970531391e-06, "loss": 0.9322, "step": 63370 }, { "epoch": 0.4587866547952543, "grad_norm": 0.15913856029510498, "learning_rate": 4.5412205838708046e-06, "loss": 0.9454, "step": 63380 }, { "epoch": 0.4588590414558405, "grad_norm": 0.15667736530303955, "learning_rate": 4.541148197210218e-06, "loss": 0.9376, "step": 63390 }, { "epoch": 0.45893142811642673, "grad_norm": 0.15261350572109222, "learning_rate": 4.541075810549632e-06, "loss": 0.9474, "step": 63400 }, { "epoch": 0.4590038147770129, "grad_norm": 0.1693621575832367, "learning_rate": 4.541003423889046e-06, "loss": 0.9309, "step": 63410 }, { "epoch": 0.4590762014375991, "grad_norm": 0.15610769391059875, "learning_rate": 4.54093103722846e-06, "loss": 0.9228, "step": 63420 }, { "epoch": 0.45914858809818526, "grad_norm": 0.17140141129493713, "learning_rate": 4.5408586505678735e-06, "loss": 0.9351, "step": 63430 }, { "epoch": 0.45922097475877144, "grad_norm": 0.16948135197162628, "learning_rate": 4.540786263907287e-06, "loss": 0.9241, "step": 63440 }, { "epoch": 0.4592933614193576, "grad_norm": 0.15839575231075287, "learning_rate": 4.5407138772467016e-06, "loss": 0.9205, "step": 63450 }, { "epoch": 0.45936574807994385, "grad_norm": 0.16708028316497803, "learning_rate": 4.540641490586115e-06, "loss": 0.9118, "step": 63460 }, { "epoch": 0.45943813474053, "grad_norm": 0.16213096678256989, "learning_rate": 4.540569103925529e-06, "loss": 0.9338, "step": 63470 }, { "epoch": 0.4595105214011162, "grad_norm": 0.16923314332962036, "learning_rate": 4.540496717264942e-06, "loss": 0.9167, "step": 63480 }, { "epoch": 0.4595829080617024, "grad_norm": 0.15057291090488434, "learning_rate": 4.540424330604357e-06, "loss": 0.9275, "step": 63490 }, { "epoch": 0.45965529472228855, "grad_norm": 0.15825194120407104, "learning_rate": 4.5403519439437705e-06, "loss": 0.9367, "step": 63500 }, { "epoch": 0.4597276813828748, "grad_norm": 0.16979360580444336, "learning_rate": 4.540279557283184e-06, "loss": 0.9326, "step": 63510 }, { "epoch": 0.45980006804346096, "grad_norm": 0.16070100665092468, "learning_rate": 4.540207170622598e-06, "loss": 0.9278, "step": 63520 }, { "epoch": 0.45987245470404714, "grad_norm": 0.15810810029506683, "learning_rate": 4.540134783962012e-06, "loss": 0.9187, "step": 63530 }, { "epoch": 0.4599448413646333, "grad_norm": 0.15783362090587616, "learning_rate": 4.540062397301426e-06, "loss": 0.9289, "step": 63540 }, { "epoch": 0.4600172280252195, "grad_norm": 0.15892358124256134, "learning_rate": 4.539990010640839e-06, "loss": 0.9234, "step": 63550 }, { "epoch": 0.4600896146858057, "grad_norm": 0.15859895944595337, "learning_rate": 4.539917623980253e-06, "loss": 0.9265, "step": 63560 }, { "epoch": 0.4601620013463919, "grad_norm": 0.16993699967861176, "learning_rate": 4.5398452373196675e-06, "loss": 0.9481, "step": 63570 }, { "epoch": 0.4602343880069781, "grad_norm": 0.1587960124015808, "learning_rate": 4.539772850659081e-06, "loss": 0.9308, "step": 63580 }, { "epoch": 0.46030677466756426, "grad_norm": 0.17357853055000305, "learning_rate": 4.539700463998495e-06, "loss": 0.9403, "step": 63590 }, { "epoch": 0.46037916132815043, "grad_norm": 0.172526553273201, "learning_rate": 4.539628077337908e-06, "loss": 0.9237, "step": 63600 }, { "epoch": 0.4604515479887366, "grad_norm": 0.1549219787120819, "learning_rate": 4.539555690677323e-06, "loss": 0.9297, "step": 63610 }, { "epoch": 0.46052393464932284, "grad_norm": 0.1645515263080597, "learning_rate": 4.5394833040167364e-06, "loss": 0.9213, "step": 63620 }, { "epoch": 0.460596321309909, "grad_norm": 0.1633620709180832, "learning_rate": 4.53941091735615e-06, "loss": 0.9293, "step": 63630 }, { "epoch": 0.4606687079704952, "grad_norm": 0.17372886836528778, "learning_rate": 4.539338530695564e-06, "loss": 0.9202, "step": 63640 }, { "epoch": 0.46074109463108137, "grad_norm": 0.37556585669517517, "learning_rate": 4.539266144034978e-06, "loss": 0.9344, "step": 63650 }, { "epoch": 0.46081348129166755, "grad_norm": 0.161403089761734, "learning_rate": 4.539193757374392e-06, "loss": 0.9468, "step": 63660 }, { "epoch": 0.4608858679522538, "grad_norm": 0.1543300747871399, "learning_rate": 4.539121370713805e-06, "loss": 0.9162, "step": 63670 }, { "epoch": 0.46095825461283996, "grad_norm": 0.2087777554988861, "learning_rate": 4.539048984053219e-06, "loss": 0.9278, "step": 63680 }, { "epoch": 0.46103064127342613, "grad_norm": 0.15856918692588806, "learning_rate": 4.5389765973926334e-06, "loss": 0.9155, "step": 63690 }, { "epoch": 0.4611030279340123, "grad_norm": 0.156173437833786, "learning_rate": 4.538904210732047e-06, "loss": 0.9355, "step": 63700 }, { "epoch": 0.4611754145945985, "grad_norm": 0.1753517985343933, "learning_rate": 4.538831824071461e-06, "loss": 0.9267, "step": 63710 }, { "epoch": 0.4612478012551847, "grad_norm": 0.18503955006599426, "learning_rate": 4.538759437410874e-06, "loss": 0.9342, "step": 63720 }, { "epoch": 0.4613201879157709, "grad_norm": 0.1558404415845871, "learning_rate": 4.538687050750289e-06, "loss": 0.9214, "step": 63730 }, { "epoch": 0.4613925745763571, "grad_norm": 0.154067724943161, "learning_rate": 4.538614664089702e-06, "loss": 0.9238, "step": 63740 }, { "epoch": 0.46146496123694325, "grad_norm": 0.1731044501066208, "learning_rate": 4.538542277429115e-06, "loss": 0.9452, "step": 63750 }, { "epoch": 0.4615373478975294, "grad_norm": 0.17740470170974731, "learning_rate": 4.53846989076853e-06, "loss": 0.931, "step": 63760 }, { "epoch": 0.4616097345581156, "grad_norm": 0.15755429863929749, "learning_rate": 4.538397504107943e-06, "loss": 0.918, "step": 63770 }, { "epoch": 0.46168212121870184, "grad_norm": 0.15609146654605865, "learning_rate": 4.538325117447357e-06, "loss": 0.9306, "step": 63780 }, { "epoch": 0.461754507879288, "grad_norm": 0.17046941816806793, "learning_rate": 4.5382527307867704e-06, "loss": 0.927, "step": 63790 }, { "epoch": 0.4618268945398742, "grad_norm": 0.1781318038702011, "learning_rate": 4.538180344126185e-06, "loss": 0.9184, "step": 63800 }, { "epoch": 0.46189928120046037, "grad_norm": 0.16463392972946167, "learning_rate": 4.5381079574655985e-06, "loss": 0.929, "step": 63810 }, { "epoch": 0.46197166786104654, "grad_norm": 0.1558876931667328, "learning_rate": 4.538035570805012e-06, "loss": 0.9194, "step": 63820 }, { "epoch": 0.4620440545216328, "grad_norm": 0.17599675059318542, "learning_rate": 4.537963184144426e-06, "loss": 0.9254, "step": 63830 }, { "epoch": 0.46211644118221895, "grad_norm": 0.1566462218761444, "learning_rate": 4.53789079748384e-06, "loss": 0.9412, "step": 63840 }, { "epoch": 0.46218882784280513, "grad_norm": 0.15512123703956604, "learning_rate": 4.537818410823254e-06, "loss": 0.9367, "step": 63850 }, { "epoch": 0.4622612145033913, "grad_norm": 0.16452373564243317, "learning_rate": 4.5377460241626675e-06, "loss": 0.9274, "step": 63860 }, { "epoch": 0.4623336011639775, "grad_norm": 0.16838426887989044, "learning_rate": 4.537673637502081e-06, "loss": 0.9143, "step": 63870 }, { "epoch": 0.4624059878245637, "grad_norm": 0.16255596280097961, "learning_rate": 4.5376012508414955e-06, "loss": 0.9174, "step": 63880 }, { "epoch": 0.4624783744851499, "grad_norm": 0.18602770566940308, "learning_rate": 4.537528864180909e-06, "loss": 0.9149, "step": 63890 }, { "epoch": 0.46255076114573607, "grad_norm": 0.19781062006950378, "learning_rate": 4.537456477520323e-06, "loss": 0.936, "step": 63900 }, { "epoch": 0.46262314780632224, "grad_norm": 0.1478962004184723, "learning_rate": 4.537384090859736e-06, "loss": 0.9191, "step": 63910 }, { "epoch": 0.4626955344669084, "grad_norm": 0.15507012605667114, "learning_rate": 4.537311704199151e-06, "loss": 0.9242, "step": 63920 }, { "epoch": 0.46276792112749465, "grad_norm": 0.17668579518795013, "learning_rate": 4.5372393175385645e-06, "loss": 0.9339, "step": 63930 }, { "epoch": 0.46284030778808083, "grad_norm": 0.14959698915481567, "learning_rate": 4.537166930877978e-06, "loss": 0.9114, "step": 63940 }, { "epoch": 0.462912694448667, "grad_norm": 0.17925885319709778, "learning_rate": 4.537094544217392e-06, "loss": 0.9206, "step": 63950 }, { "epoch": 0.4629850811092532, "grad_norm": 0.16243740916252136, "learning_rate": 4.537022157556806e-06, "loss": 0.9196, "step": 63960 }, { "epoch": 0.46305746776983936, "grad_norm": 0.3166872560977936, "learning_rate": 4.53694977089622e-06, "loss": 0.9352, "step": 63970 }, { "epoch": 0.46312985443042554, "grad_norm": 0.16058658063411713, "learning_rate": 4.536877384235633e-06, "loss": 0.9097, "step": 63980 }, { "epoch": 0.46320224109101177, "grad_norm": 0.15642769634723663, "learning_rate": 4.536804997575047e-06, "loss": 0.9307, "step": 63990 }, { "epoch": 0.46327462775159794, "grad_norm": 0.41400203108787537, "learning_rate": 4.5367326109144615e-06, "loss": 0.9321, "step": 64000 }, { "epoch": 0.4633470144121841, "grad_norm": 0.17071636021137238, "learning_rate": 4.536660224253875e-06, "loss": 0.9323, "step": 64010 }, { "epoch": 0.4634194010727703, "grad_norm": 0.16446176171302795, "learning_rate": 4.536587837593289e-06, "loss": 0.9172, "step": 64020 }, { "epoch": 0.4634917877333565, "grad_norm": 0.17229726910591125, "learning_rate": 4.536515450932702e-06, "loss": 0.9246, "step": 64030 }, { "epoch": 0.4635641743939427, "grad_norm": 0.14730413258075714, "learning_rate": 4.536443064272116e-06, "loss": 0.9353, "step": 64040 }, { "epoch": 0.4636365610545289, "grad_norm": 0.1702919751405716, "learning_rate": 4.53637067761153e-06, "loss": 0.9239, "step": 64050 }, { "epoch": 0.46370894771511506, "grad_norm": 0.15090477466583252, "learning_rate": 4.536298290950944e-06, "loss": 0.9189, "step": 64060 }, { "epoch": 0.46378133437570124, "grad_norm": 0.15662406384944916, "learning_rate": 4.536225904290358e-06, "loss": 0.9161, "step": 64070 }, { "epoch": 0.4638537210362874, "grad_norm": 0.15603668987751007, "learning_rate": 4.536153517629771e-06, "loss": 0.9194, "step": 64080 }, { "epoch": 0.46392610769687365, "grad_norm": 0.16202674806118011, "learning_rate": 4.536081130969186e-06, "loss": 0.9402, "step": 64090 }, { "epoch": 0.4639984943574598, "grad_norm": 0.2072402536869049, "learning_rate": 4.536008744308599e-06, "loss": 0.9153, "step": 64100 }, { "epoch": 0.464070881018046, "grad_norm": 0.16444271802902222, "learning_rate": 4.535936357648013e-06, "loss": 0.9216, "step": 64110 }, { "epoch": 0.4641432676786322, "grad_norm": 0.19051018357276917, "learning_rate": 4.5358639709874266e-06, "loss": 0.9233, "step": 64120 }, { "epoch": 0.46421565433921835, "grad_norm": 0.1983797252178192, "learning_rate": 4.535791584326841e-06, "loss": 0.9345, "step": 64130 }, { "epoch": 0.46428804099980453, "grad_norm": 0.17344747483730316, "learning_rate": 4.535719197666255e-06, "loss": 0.9311, "step": 64140 }, { "epoch": 0.46436042766039076, "grad_norm": 0.16626109182834625, "learning_rate": 4.535646811005668e-06, "loss": 0.9188, "step": 64150 }, { "epoch": 0.46443281432097694, "grad_norm": 0.162200927734375, "learning_rate": 4.535574424345082e-06, "loss": 0.9248, "step": 64160 }, { "epoch": 0.4645052009815631, "grad_norm": 0.43572789430618286, "learning_rate": 4.535502037684496e-06, "loss": 0.9225, "step": 64170 }, { "epoch": 0.4645775876421493, "grad_norm": 0.18060703575611115, "learning_rate": 4.53542965102391e-06, "loss": 0.9357, "step": 64180 }, { "epoch": 0.46464997430273547, "grad_norm": 0.16320358216762543, "learning_rate": 4.5353572643633236e-06, "loss": 0.9227, "step": 64190 }, { "epoch": 0.4647223609633217, "grad_norm": 0.1623474657535553, "learning_rate": 4.535284877702737e-06, "loss": 0.9375, "step": 64200 }, { "epoch": 0.4647947476239079, "grad_norm": 0.1604037880897522, "learning_rate": 4.535212491042152e-06, "loss": 0.9255, "step": 64210 }, { "epoch": 0.46486713428449405, "grad_norm": 0.2360948622226715, "learning_rate": 4.535140104381565e-06, "loss": 0.9329, "step": 64220 }, { "epoch": 0.46493952094508023, "grad_norm": 0.17647242546081543, "learning_rate": 4.535067717720979e-06, "loss": 0.9377, "step": 64230 }, { "epoch": 0.4650119076056664, "grad_norm": 0.1619800180196762, "learning_rate": 4.5349953310603925e-06, "loss": 0.9352, "step": 64240 }, { "epoch": 0.46508429426625264, "grad_norm": 0.2056133896112442, "learning_rate": 4.534922944399807e-06, "loss": 0.9284, "step": 64250 }, { "epoch": 0.4651566809268388, "grad_norm": 0.16904328763484955, "learning_rate": 4.5348505577392206e-06, "loss": 0.9397, "step": 64260 }, { "epoch": 0.465229067587425, "grad_norm": 0.17163747549057007, "learning_rate": 4.534778171078634e-06, "loss": 0.9299, "step": 64270 }, { "epoch": 0.46530145424801117, "grad_norm": 0.15540345013141632, "learning_rate": 4.534705784418048e-06, "loss": 0.9273, "step": 64280 }, { "epoch": 0.46537384090859735, "grad_norm": 0.16602273285388947, "learning_rate": 4.534633397757461e-06, "loss": 0.9229, "step": 64290 }, { "epoch": 0.4654462275691835, "grad_norm": 0.15529848635196686, "learning_rate": 4.534561011096875e-06, "loss": 0.9227, "step": 64300 }, { "epoch": 0.46551861422976976, "grad_norm": 0.15982522070407867, "learning_rate": 4.534488624436289e-06, "loss": 0.9184, "step": 64310 }, { "epoch": 0.46559100089035593, "grad_norm": 0.16728608310222626, "learning_rate": 4.534416237775703e-06, "loss": 0.9283, "step": 64320 }, { "epoch": 0.4656633875509421, "grad_norm": 0.1668313890695572, "learning_rate": 4.534343851115117e-06, "loss": 0.9257, "step": 64330 }, { "epoch": 0.4657357742115283, "grad_norm": 0.16521133482456207, "learning_rate": 4.53427146445453e-06, "loss": 0.9199, "step": 64340 }, { "epoch": 0.46580816087211446, "grad_norm": 0.1640012413263321, "learning_rate": 4.534199077793944e-06, "loss": 0.9251, "step": 64350 }, { "epoch": 0.4658805475327007, "grad_norm": 0.15635322034358978, "learning_rate": 4.5341266911333584e-06, "loss": 0.9295, "step": 64360 }, { "epoch": 0.46595293419328687, "grad_norm": 0.1682094931602478, "learning_rate": 4.534054304472772e-06, "loss": 0.9379, "step": 64370 }, { "epoch": 0.46602532085387305, "grad_norm": 0.1606939285993576, "learning_rate": 4.533981917812186e-06, "loss": 0.925, "step": 64380 }, { "epoch": 0.4660977075144592, "grad_norm": 0.1753852218389511, "learning_rate": 4.533909531151599e-06, "loss": 0.9238, "step": 64390 }, { "epoch": 0.4661700941750454, "grad_norm": 0.20732562243938446, "learning_rate": 4.533837144491014e-06, "loss": 0.9268, "step": 64400 }, { "epoch": 0.46624248083563163, "grad_norm": 0.16475224494934082, "learning_rate": 4.533764757830427e-06, "loss": 0.9164, "step": 64410 }, { "epoch": 0.4663148674962178, "grad_norm": 0.17399507761001587, "learning_rate": 4.533692371169841e-06, "loss": 0.9204, "step": 64420 }, { "epoch": 0.466387254156804, "grad_norm": 0.2416888028383255, "learning_rate": 4.533619984509255e-06, "loss": 0.9399, "step": 64430 }, { "epoch": 0.46645964081739016, "grad_norm": 0.1748601794242859, "learning_rate": 4.533547597848669e-06, "loss": 0.9227, "step": 64440 }, { "epoch": 0.46653202747797634, "grad_norm": 0.15142974257469177, "learning_rate": 4.533475211188083e-06, "loss": 0.9286, "step": 64450 }, { "epoch": 0.4666044141385626, "grad_norm": 0.14932338893413544, "learning_rate": 4.533402824527496e-06, "loss": 0.9318, "step": 64460 }, { "epoch": 0.46667680079914875, "grad_norm": 0.16411228477954865, "learning_rate": 4.53333043786691e-06, "loss": 0.9183, "step": 64470 }, { "epoch": 0.4667491874597349, "grad_norm": 0.16134792566299438, "learning_rate": 4.533258051206324e-06, "loss": 0.9311, "step": 64480 }, { "epoch": 0.4668215741203211, "grad_norm": 0.15893922746181488, "learning_rate": 4.533185664545738e-06, "loss": 0.9197, "step": 64490 }, { "epoch": 0.4668939607809073, "grad_norm": 0.17452390491962433, "learning_rate": 4.533113277885152e-06, "loss": 0.9372, "step": 64500 }, { "epoch": 0.46696634744149346, "grad_norm": 0.16843093931674957, "learning_rate": 4.533040891224565e-06, "loss": 0.9192, "step": 64510 }, { "epoch": 0.4670387341020797, "grad_norm": 0.15596753358840942, "learning_rate": 4.53296850456398e-06, "loss": 0.9322, "step": 64520 }, { "epoch": 0.46711112076266587, "grad_norm": 0.1649407148361206, "learning_rate": 4.532896117903393e-06, "loss": 0.9248, "step": 64530 }, { "epoch": 0.46718350742325204, "grad_norm": 0.19480155408382416, "learning_rate": 4.532823731242807e-06, "loss": 0.9253, "step": 64540 }, { "epoch": 0.4672558940838382, "grad_norm": 0.1667308658361435, "learning_rate": 4.5327513445822205e-06, "loss": 0.9249, "step": 64550 }, { "epoch": 0.4673282807444244, "grad_norm": 0.16336029767990112, "learning_rate": 4.532678957921635e-06, "loss": 0.924, "step": 64560 }, { "epoch": 0.46740066740501063, "grad_norm": 0.1569215953350067, "learning_rate": 4.532606571261049e-06, "loss": 0.9127, "step": 64570 }, { "epoch": 0.4674730540655968, "grad_norm": 0.16221965849399567, "learning_rate": 4.532534184600462e-06, "loss": 0.9228, "step": 64580 }, { "epoch": 0.467545440726183, "grad_norm": 0.16710902750492096, "learning_rate": 4.532461797939876e-06, "loss": 0.918, "step": 64590 }, { "epoch": 0.46761782738676916, "grad_norm": 0.17595240473747253, "learning_rate": 4.53238941127929e-06, "loss": 0.9273, "step": 64600 }, { "epoch": 0.46769021404735533, "grad_norm": 0.16132394969463348, "learning_rate": 4.532317024618704e-06, "loss": 0.927, "step": 64610 }, { "epoch": 0.46776260070794157, "grad_norm": 0.14544646441936493, "learning_rate": 4.5322446379581175e-06, "loss": 0.925, "step": 64620 }, { "epoch": 0.46783498736852774, "grad_norm": 0.1635567545890808, "learning_rate": 4.532172251297531e-06, "loss": 0.9207, "step": 64630 }, { "epoch": 0.4679073740291139, "grad_norm": 0.15619686245918274, "learning_rate": 4.532099864636945e-06, "loss": 0.914, "step": 64640 }, { "epoch": 0.4679797606897001, "grad_norm": 0.17399942874908447, "learning_rate": 4.532027477976359e-06, "loss": 0.9301, "step": 64650 }, { "epoch": 0.4680521473502863, "grad_norm": 0.15665535628795624, "learning_rate": 4.531955091315773e-06, "loss": 0.9231, "step": 64660 }, { "epoch": 0.46812453401087245, "grad_norm": 0.16816446185112, "learning_rate": 4.5318827046551865e-06, "loss": 0.9334, "step": 64670 }, { "epoch": 0.4681969206714587, "grad_norm": 0.16004996001720428, "learning_rate": 4.5318103179946e-06, "loss": 0.9201, "step": 64680 }, { "epoch": 0.46826930733204486, "grad_norm": 0.15259157121181488, "learning_rate": 4.5317379313340145e-06, "loss": 0.9352, "step": 64690 }, { "epoch": 0.46834169399263104, "grad_norm": 0.1640315055847168, "learning_rate": 4.531665544673428e-06, "loss": 0.9199, "step": 64700 }, { "epoch": 0.4684140806532172, "grad_norm": 0.15679945051670074, "learning_rate": 4.531593158012842e-06, "loss": 0.9235, "step": 64710 }, { "epoch": 0.4684864673138034, "grad_norm": 0.16989102959632874, "learning_rate": 4.531520771352255e-06, "loss": 0.9157, "step": 64720 }, { "epoch": 0.4685588539743896, "grad_norm": 0.16276511549949646, "learning_rate": 4.53144838469167e-06, "loss": 0.9257, "step": 64730 }, { "epoch": 0.4686312406349758, "grad_norm": 0.1543579399585724, "learning_rate": 4.5313759980310835e-06, "loss": 0.91, "step": 64740 }, { "epoch": 0.468703627295562, "grad_norm": 0.1687450408935547, "learning_rate": 4.531303611370497e-06, "loss": 0.9423, "step": 64750 }, { "epoch": 0.46877601395614815, "grad_norm": 0.15913762152194977, "learning_rate": 4.531231224709911e-06, "loss": 0.9296, "step": 64760 }, { "epoch": 0.46884840061673433, "grad_norm": 0.18352152407169342, "learning_rate": 4.531158838049325e-06, "loss": 0.9307, "step": 64770 }, { "epoch": 0.46892078727732056, "grad_norm": 0.16911835968494415, "learning_rate": 4.531086451388739e-06, "loss": 0.9296, "step": 64780 }, { "epoch": 0.46899317393790674, "grad_norm": 0.17307941615581512, "learning_rate": 4.531014064728152e-06, "loss": 0.9198, "step": 64790 }, { "epoch": 0.4690655605984929, "grad_norm": 0.16192714869976044, "learning_rate": 4.530941678067566e-06, "loss": 0.9313, "step": 64800 }, { "epoch": 0.4691379472590791, "grad_norm": 0.16948965191841125, "learning_rate": 4.53086929140698e-06, "loss": 0.9317, "step": 64810 }, { "epoch": 0.46921033391966527, "grad_norm": 0.18143318593502045, "learning_rate": 4.530796904746393e-06, "loss": 0.9153, "step": 64820 }, { "epoch": 0.46928272058025144, "grad_norm": 0.15184900164604187, "learning_rate": 4.530724518085807e-06, "loss": 0.9259, "step": 64830 }, { "epoch": 0.4693551072408377, "grad_norm": 0.23423556983470917, "learning_rate": 4.530652131425221e-06, "loss": 0.9186, "step": 64840 }, { "epoch": 0.46942749390142385, "grad_norm": 0.15671993792057037, "learning_rate": 4.530579744764635e-06, "loss": 0.9295, "step": 64850 }, { "epoch": 0.46949988056201003, "grad_norm": 0.1595701277256012, "learning_rate": 4.5305073581040486e-06, "loss": 0.917, "step": 64860 }, { "epoch": 0.4695722672225962, "grad_norm": 0.1630265712738037, "learning_rate": 4.530434971443462e-06, "loss": 0.9234, "step": 64870 }, { "epoch": 0.4696446538831824, "grad_norm": 0.16177038848400116, "learning_rate": 4.530362584782877e-06, "loss": 0.9365, "step": 64880 }, { "epoch": 0.4697170405437686, "grad_norm": 0.16307857632637024, "learning_rate": 4.53029019812229e-06, "loss": 0.9355, "step": 64890 }, { "epoch": 0.4697894272043548, "grad_norm": 0.15251775085926056, "learning_rate": 4.530217811461704e-06, "loss": 0.9335, "step": 64900 }, { "epoch": 0.46986181386494097, "grad_norm": 0.172450453042984, "learning_rate": 4.5301454248011175e-06, "loss": 0.9269, "step": 64910 }, { "epoch": 0.46993420052552715, "grad_norm": 0.1565367430448532, "learning_rate": 4.530073038140532e-06, "loss": 0.9327, "step": 64920 }, { "epoch": 0.4700065871861133, "grad_norm": 0.3131227493286133, "learning_rate": 4.5300006514799456e-06, "loss": 0.9213, "step": 64930 }, { "epoch": 0.47007897384669955, "grad_norm": 0.1655091792345047, "learning_rate": 4.529928264819359e-06, "loss": 0.9409, "step": 64940 }, { "epoch": 0.47015136050728573, "grad_norm": 0.1536223590373993, "learning_rate": 4.529855878158773e-06, "loss": 0.92, "step": 64950 }, { "epoch": 0.4702237471678719, "grad_norm": 0.1615433245897293, "learning_rate": 4.529783491498187e-06, "loss": 0.9194, "step": 64960 }, { "epoch": 0.4702961338284581, "grad_norm": 0.15238776803016663, "learning_rate": 4.529711104837601e-06, "loss": 0.9202, "step": 64970 }, { "epoch": 0.47036852048904426, "grad_norm": 0.15168006718158722, "learning_rate": 4.5296387181770145e-06, "loss": 0.9292, "step": 64980 }, { "epoch": 0.47044090714963044, "grad_norm": 0.15964418649673462, "learning_rate": 4.529566331516428e-06, "loss": 0.9273, "step": 64990 }, { "epoch": 0.47051329381021667, "grad_norm": 0.1611081212759018, "learning_rate": 4.5294939448558426e-06, "loss": 0.9226, "step": 65000 }, { "epoch": 0.47058568047080285, "grad_norm": 0.168733611702919, "learning_rate": 4.529421558195256e-06, "loss": 0.9212, "step": 65010 }, { "epoch": 0.470658067131389, "grad_norm": 0.17109130322933197, "learning_rate": 4.52934917153467e-06, "loss": 0.9261, "step": 65020 }, { "epoch": 0.4707304537919752, "grad_norm": 0.16328299045562744, "learning_rate": 4.529276784874083e-06, "loss": 0.9293, "step": 65030 }, { "epoch": 0.4708028404525614, "grad_norm": 0.16657480597496033, "learning_rate": 4.529204398213498e-06, "loss": 0.9165, "step": 65040 }, { "epoch": 0.4708752271131476, "grad_norm": 0.1572638899087906, "learning_rate": 4.5291320115529115e-06, "loss": 0.9346, "step": 65050 }, { "epoch": 0.4709476137737338, "grad_norm": 0.16349254548549652, "learning_rate": 4.529059624892325e-06, "loss": 0.9294, "step": 65060 }, { "epoch": 0.47102000043431996, "grad_norm": 0.16738827526569366, "learning_rate": 4.528987238231739e-06, "loss": 0.9208, "step": 65070 }, { "epoch": 0.47109238709490614, "grad_norm": 0.1925373077392578, "learning_rate": 4.528914851571153e-06, "loss": 0.9135, "step": 65080 }, { "epoch": 0.4711647737554923, "grad_norm": 0.1586436629295349, "learning_rate": 4.528842464910567e-06, "loss": 0.929, "step": 65090 }, { "epoch": 0.47123716041607855, "grad_norm": 0.15072402358055115, "learning_rate": 4.5287700782499804e-06, "loss": 0.9166, "step": 65100 }, { "epoch": 0.4713095470766647, "grad_norm": 0.1562575101852417, "learning_rate": 4.528697691589394e-06, "loss": 0.9136, "step": 65110 }, { "epoch": 0.4713819337372509, "grad_norm": 1.8733214139938354, "learning_rate": 4.5286253049288085e-06, "loss": 0.9095, "step": 65120 }, { "epoch": 0.4714543203978371, "grad_norm": 0.1686108261346817, "learning_rate": 4.528552918268222e-06, "loss": 0.9196, "step": 65130 }, { "epoch": 0.47152670705842326, "grad_norm": 0.17261675000190735, "learning_rate": 4.528480531607636e-06, "loss": 0.9238, "step": 65140 }, { "epoch": 0.4715990937190095, "grad_norm": 0.19091512262821198, "learning_rate": 4.528408144947049e-06, "loss": 0.9248, "step": 65150 }, { "epoch": 0.47167148037959566, "grad_norm": 0.1787658929824829, "learning_rate": 4.528335758286464e-06, "loss": 0.9335, "step": 65160 }, { "epoch": 0.47174386704018184, "grad_norm": 0.1631409078836441, "learning_rate": 4.5282633716258774e-06, "loss": 0.9183, "step": 65170 }, { "epoch": 0.471816253700768, "grad_norm": 0.16262884438037872, "learning_rate": 4.528190984965291e-06, "loss": 0.9278, "step": 65180 }, { "epoch": 0.4718886403613542, "grad_norm": 0.16872116923332214, "learning_rate": 4.528118598304705e-06, "loss": 0.9303, "step": 65190 }, { "epoch": 0.47196102702194037, "grad_norm": 0.17301858961582184, "learning_rate": 4.528046211644119e-06, "loss": 0.941, "step": 65200 }, { "epoch": 0.4720334136825266, "grad_norm": 0.17453503608703613, "learning_rate": 4.527973824983533e-06, "loss": 0.9196, "step": 65210 }, { "epoch": 0.4721058003431128, "grad_norm": 0.1696014106273651, "learning_rate": 4.527901438322946e-06, "loss": 0.9275, "step": 65220 }, { "epoch": 0.47217818700369896, "grad_norm": 0.18995621800422668, "learning_rate": 4.52782905166236e-06, "loss": 0.9225, "step": 65230 }, { "epoch": 0.47225057366428513, "grad_norm": 0.1921137422323227, "learning_rate": 4.5277566650017744e-06, "loss": 0.9353, "step": 65240 }, { "epoch": 0.4723229603248713, "grad_norm": 0.15778936445713043, "learning_rate": 4.527684278341188e-06, "loss": 0.9268, "step": 65250 }, { "epoch": 0.47239534698545754, "grad_norm": 0.17273864150047302, "learning_rate": 4.527611891680602e-06, "loss": 0.9206, "step": 65260 }, { "epoch": 0.4724677336460437, "grad_norm": 0.16963228583335876, "learning_rate": 4.527539505020015e-06, "loss": 0.9257, "step": 65270 }, { "epoch": 0.4725401203066299, "grad_norm": 0.15939907729625702, "learning_rate": 4.527467118359429e-06, "loss": 0.9197, "step": 65280 }, { "epoch": 0.4726125069672161, "grad_norm": 0.16947107017040253, "learning_rate": 4.527394731698843e-06, "loss": 0.9286, "step": 65290 }, { "epoch": 0.47268489362780225, "grad_norm": 0.2224172055721283, "learning_rate": 4.527322345038257e-06, "loss": 0.9384, "step": 65300 }, { "epoch": 0.4727572802883885, "grad_norm": 0.16941924393177032, "learning_rate": 4.527249958377671e-06, "loss": 0.9309, "step": 65310 }, { "epoch": 0.47282966694897466, "grad_norm": 0.17061692476272583, "learning_rate": 4.527177571717084e-06, "loss": 0.9248, "step": 65320 }, { "epoch": 0.47290205360956084, "grad_norm": 0.18717791140079498, "learning_rate": 4.527105185056499e-06, "loss": 0.9196, "step": 65330 }, { "epoch": 0.472974440270147, "grad_norm": 0.163266122341156, "learning_rate": 4.5270327983959114e-06, "loss": 0.9331, "step": 65340 }, { "epoch": 0.4730468269307332, "grad_norm": 0.16074928641319275, "learning_rate": 4.526960411735326e-06, "loss": 0.9223, "step": 65350 }, { "epoch": 0.47311921359131937, "grad_norm": 0.15289919078350067, "learning_rate": 4.5268880250747395e-06, "loss": 0.9275, "step": 65360 }, { "epoch": 0.4731916002519056, "grad_norm": 0.15109410881996155, "learning_rate": 4.526815638414153e-06, "loss": 0.9392, "step": 65370 }, { "epoch": 0.4732639869124918, "grad_norm": 0.1612526923418045, "learning_rate": 4.526743251753567e-06, "loss": 0.9317, "step": 65380 }, { "epoch": 0.47333637357307795, "grad_norm": 0.1555621474981308, "learning_rate": 4.526670865092981e-06, "loss": 0.9258, "step": 65390 }, { "epoch": 0.4734087602336641, "grad_norm": 0.1611545830965042, "learning_rate": 4.526598478432395e-06, "loss": 0.9283, "step": 65400 }, { "epoch": 0.4734811468942503, "grad_norm": 0.15748530626296997, "learning_rate": 4.5265260917718085e-06, "loss": 0.928, "step": 65410 }, { "epoch": 0.47355353355483654, "grad_norm": 0.15606287121772766, "learning_rate": 4.526453705111222e-06, "loss": 0.9109, "step": 65420 }, { "epoch": 0.4736259202154227, "grad_norm": 0.15752071142196655, "learning_rate": 4.526381318450636e-06, "loss": 0.9277, "step": 65430 }, { "epoch": 0.4736983068760089, "grad_norm": 0.15977409482002258, "learning_rate": 4.52630893179005e-06, "loss": 0.9256, "step": 65440 }, { "epoch": 0.47377069353659507, "grad_norm": 0.18297438323497772, "learning_rate": 4.526236545129464e-06, "loss": 0.9271, "step": 65450 }, { "epoch": 0.47384308019718124, "grad_norm": 0.16020241379737854, "learning_rate": 4.526164158468877e-06, "loss": 0.9215, "step": 65460 }, { "epoch": 0.4739154668577675, "grad_norm": 0.16484937071800232, "learning_rate": 4.526091771808291e-06, "loss": 0.9252, "step": 65470 }, { "epoch": 0.47398785351835365, "grad_norm": 0.15512751042842865, "learning_rate": 4.5260193851477055e-06, "loss": 0.9287, "step": 65480 }, { "epoch": 0.47406024017893983, "grad_norm": 0.17977750301361084, "learning_rate": 4.525946998487119e-06, "loss": 0.9197, "step": 65490 }, { "epoch": 0.474132626839526, "grad_norm": 0.17437465488910675, "learning_rate": 4.525874611826533e-06, "loss": 0.9398, "step": 65500 }, { "epoch": 0.4742050135001122, "grad_norm": 0.1768895983695984, "learning_rate": 4.525802225165946e-06, "loss": 0.9288, "step": 65510 }, { "epoch": 0.47427740016069836, "grad_norm": 0.15914186835289001, "learning_rate": 4.525729838505361e-06, "loss": 0.9102, "step": 65520 }, { "epoch": 0.4743497868212846, "grad_norm": 0.1690351516008377, "learning_rate": 4.525657451844774e-06, "loss": 0.9324, "step": 65530 }, { "epoch": 0.47442217348187077, "grad_norm": 0.15182551741600037, "learning_rate": 4.525585065184188e-06, "loss": 0.9163, "step": 65540 }, { "epoch": 0.47449456014245694, "grad_norm": 0.17388121783733368, "learning_rate": 4.525512678523602e-06, "loss": 0.9364, "step": 65550 }, { "epoch": 0.4745669468030431, "grad_norm": 0.1595803201198578, "learning_rate": 4.525440291863016e-06, "loss": 0.934, "step": 65560 }, { "epoch": 0.4746393334636293, "grad_norm": 0.15675079822540283, "learning_rate": 4.52536790520243e-06, "loss": 0.9422, "step": 65570 }, { "epoch": 0.47471172012421553, "grad_norm": 0.37786662578582764, "learning_rate": 4.525295518541843e-06, "loss": 0.9165, "step": 65580 }, { "epoch": 0.4747841067848017, "grad_norm": 0.16655652225017548, "learning_rate": 4.525223131881257e-06, "loss": 0.9209, "step": 65590 }, { "epoch": 0.4748564934453879, "grad_norm": 0.18225440382957458, "learning_rate": 4.525150745220671e-06, "loss": 0.9251, "step": 65600 }, { "epoch": 0.47492888010597406, "grad_norm": 0.17587362229824066, "learning_rate": 4.525078358560085e-06, "loss": 0.9156, "step": 65610 }, { "epoch": 0.47500126676656024, "grad_norm": 0.1652805507183075, "learning_rate": 4.525005971899499e-06, "loss": 0.9384, "step": 65620 }, { "epoch": 0.47507365342714647, "grad_norm": 0.19912905991077423, "learning_rate": 4.524933585238912e-06, "loss": 0.9288, "step": 65630 }, { "epoch": 0.47514604008773265, "grad_norm": 0.16045509278774261, "learning_rate": 4.524861198578327e-06, "loss": 0.9382, "step": 65640 }, { "epoch": 0.4752184267483188, "grad_norm": 0.15262702107429504, "learning_rate": 4.52478881191774e-06, "loss": 0.9139, "step": 65650 }, { "epoch": 0.475290813408905, "grad_norm": 0.14551232755184174, "learning_rate": 4.524716425257154e-06, "loss": 0.9337, "step": 65660 }, { "epoch": 0.4753632000694912, "grad_norm": 0.1558513045310974, "learning_rate": 4.5246440385965676e-06, "loss": 0.923, "step": 65670 }, { "epoch": 0.4754355867300774, "grad_norm": 0.15776222944259644, "learning_rate": 4.524571651935982e-06, "loss": 0.9251, "step": 65680 }, { "epoch": 0.4755079733906636, "grad_norm": 0.15665759146213531, "learning_rate": 4.524499265275396e-06, "loss": 0.9342, "step": 65690 }, { "epoch": 0.47558036005124976, "grad_norm": 0.16208912432193756, "learning_rate": 4.524426878614809e-06, "loss": 0.9177, "step": 65700 }, { "epoch": 0.47565274671183594, "grad_norm": 0.15631617605686188, "learning_rate": 4.524354491954223e-06, "loss": 0.9125, "step": 65710 }, { "epoch": 0.4757251333724221, "grad_norm": 0.14883607625961304, "learning_rate": 4.524282105293637e-06, "loss": 0.9268, "step": 65720 }, { "epoch": 0.4757975200330083, "grad_norm": 0.1741674840450287, "learning_rate": 4.524209718633051e-06, "loss": 0.9362, "step": 65730 }, { "epoch": 0.4758699066935945, "grad_norm": 0.17127837240695953, "learning_rate": 4.5241373319724646e-06, "loss": 0.9356, "step": 65740 }, { "epoch": 0.4759422933541807, "grad_norm": 0.15458397567272186, "learning_rate": 4.524064945311878e-06, "loss": 0.9194, "step": 65750 }, { "epoch": 0.4760146800147669, "grad_norm": 0.17374902963638306, "learning_rate": 4.523992558651293e-06, "loss": 0.9327, "step": 65760 }, { "epoch": 0.47608706667535305, "grad_norm": 0.16826695203781128, "learning_rate": 4.523920171990706e-06, "loss": 0.9353, "step": 65770 }, { "epoch": 0.47615945333593923, "grad_norm": 0.17883165180683136, "learning_rate": 4.52384778533012e-06, "loss": 0.9363, "step": 65780 }, { "epoch": 0.47623183999652546, "grad_norm": 0.15835782885551453, "learning_rate": 4.5237753986695335e-06, "loss": 0.9221, "step": 65790 }, { "epoch": 0.47630422665711164, "grad_norm": 0.1708236187696457, "learning_rate": 4.523703012008948e-06, "loss": 0.932, "step": 65800 }, { "epoch": 0.4763766133176978, "grad_norm": 0.15170232951641083, "learning_rate": 4.5236306253483616e-06, "loss": 0.9146, "step": 65810 }, { "epoch": 0.476448999978284, "grad_norm": 0.23991255462169647, "learning_rate": 4.523558238687775e-06, "loss": 0.9293, "step": 65820 }, { "epoch": 0.47652138663887017, "grad_norm": 0.15470875799655914, "learning_rate": 4.523485852027189e-06, "loss": 0.9262, "step": 65830 }, { "epoch": 0.4765937732994564, "grad_norm": 0.1519736647605896, "learning_rate": 4.523413465366603e-06, "loss": 0.9267, "step": 65840 }, { "epoch": 0.4766661599600426, "grad_norm": 0.15895114839076996, "learning_rate": 4.523341078706017e-06, "loss": 0.9305, "step": 65850 }, { "epoch": 0.47673854662062876, "grad_norm": 0.21327605843544006, "learning_rate": 4.5232686920454305e-06, "loss": 0.9106, "step": 65860 }, { "epoch": 0.47681093328121493, "grad_norm": 0.15450114011764526, "learning_rate": 4.523196305384844e-06, "loss": 0.9167, "step": 65870 }, { "epoch": 0.4768833199418011, "grad_norm": 0.17385606467723846, "learning_rate": 4.523123918724258e-06, "loss": 0.9264, "step": 65880 }, { "epoch": 0.4769557066023873, "grad_norm": 0.15806423127651215, "learning_rate": 4.523051532063671e-06, "loss": 0.9311, "step": 65890 }, { "epoch": 0.4770280932629735, "grad_norm": 0.15671569108963013, "learning_rate": 4.522979145403085e-06, "loss": 0.9292, "step": 65900 }, { "epoch": 0.4771004799235597, "grad_norm": 0.18585889041423798, "learning_rate": 4.5229067587424994e-06, "loss": 0.9216, "step": 65910 }, { "epoch": 0.47717286658414587, "grad_norm": 0.1866776943206787, "learning_rate": 4.522834372081913e-06, "loss": 0.9221, "step": 65920 }, { "epoch": 0.47724525324473205, "grad_norm": 0.1546396166086197, "learning_rate": 4.522761985421327e-06, "loss": 0.9335, "step": 65930 }, { "epoch": 0.4773176399053182, "grad_norm": 0.16696631908416748, "learning_rate": 4.52268959876074e-06, "loss": 0.9369, "step": 65940 }, { "epoch": 0.47739002656590446, "grad_norm": 0.1489332616329193, "learning_rate": 4.522617212100155e-06, "loss": 0.9173, "step": 65950 }, { "epoch": 0.47746241322649063, "grad_norm": 0.16513670980930328, "learning_rate": 4.522544825439568e-06, "loss": 0.9357, "step": 65960 }, { "epoch": 0.4775347998870768, "grad_norm": 0.16803127527236938, "learning_rate": 4.522472438778982e-06, "loss": 0.9284, "step": 65970 }, { "epoch": 0.477607186547663, "grad_norm": 0.16002456843852997, "learning_rate": 4.522400052118396e-06, "loss": 0.9268, "step": 65980 }, { "epoch": 0.47767957320824916, "grad_norm": 0.17356227338314056, "learning_rate": 4.52232766545781e-06, "loss": 0.9188, "step": 65990 }, { "epoch": 0.4777519598688354, "grad_norm": 0.15143811702728271, "learning_rate": 4.522255278797224e-06, "loss": 0.925, "step": 66000 }, { "epoch": 0.4778243465294216, "grad_norm": 0.16914960741996765, "learning_rate": 4.522182892136637e-06, "loss": 0.9254, "step": 66010 }, { "epoch": 0.47789673319000775, "grad_norm": 0.18097072839736938, "learning_rate": 4.522110505476051e-06, "loss": 0.9262, "step": 66020 }, { "epoch": 0.4779691198505939, "grad_norm": 0.15266379714012146, "learning_rate": 4.522038118815465e-06, "loss": 0.9339, "step": 66030 }, { "epoch": 0.4780415065111801, "grad_norm": 0.16532346606254578, "learning_rate": 4.521965732154879e-06, "loss": 0.9308, "step": 66040 }, { "epoch": 0.4781138931717663, "grad_norm": 0.15536561608314514, "learning_rate": 4.521893345494293e-06, "loss": 0.9382, "step": 66050 }, { "epoch": 0.4781862798323525, "grad_norm": 0.17557422816753387, "learning_rate": 4.521820958833706e-06, "loss": 0.9254, "step": 66060 }, { "epoch": 0.4782586664929387, "grad_norm": 0.1694338172674179, "learning_rate": 4.52174857217312e-06, "loss": 0.9163, "step": 66070 }, { "epoch": 0.47833105315352487, "grad_norm": 0.1753464937210083, "learning_rate": 4.521676185512534e-06, "loss": 0.9185, "step": 66080 }, { "epoch": 0.47840343981411104, "grad_norm": 0.17190249264240265, "learning_rate": 4.521603798851948e-06, "loss": 0.9321, "step": 66090 }, { "epoch": 0.4784758264746972, "grad_norm": 0.19599749147891998, "learning_rate": 4.5215314121913615e-06, "loss": 0.9146, "step": 66100 }, { "epoch": 0.47854821313528345, "grad_norm": 0.15936939418315887, "learning_rate": 4.521459025530775e-06, "loss": 0.9135, "step": 66110 }, { "epoch": 0.47862059979586963, "grad_norm": 0.1538030058145523, "learning_rate": 4.52138663887019e-06, "loss": 0.9398, "step": 66120 }, { "epoch": 0.4786929864564558, "grad_norm": 0.15321306884288788, "learning_rate": 4.521314252209603e-06, "loss": 0.9364, "step": 66130 }, { "epoch": 0.478765373117042, "grad_norm": 0.17162488400936127, "learning_rate": 4.521241865549017e-06, "loss": 0.9273, "step": 66140 }, { "epoch": 0.47883775977762816, "grad_norm": 0.14776068925857544, "learning_rate": 4.5211694788884305e-06, "loss": 0.9183, "step": 66150 }, { "epoch": 0.4789101464382144, "grad_norm": 0.16864345967769623, "learning_rate": 4.521097092227845e-06, "loss": 0.9167, "step": 66160 }, { "epoch": 0.47898253309880057, "grad_norm": 0.1763739287853241, "learning_rate": 4.5210247055672585e-06, "loss": 0.9372, "step": 66170 }, { "epoch": 0.47905491975938674, "grad_norm": 0.1718112826347351, "learning_rate": 4.520952318906672e-06, "loss": 0.9237, "step": 66180 }, { "epoch": 0.4791273064199729, "grad_norm": 0.1696150302886963, "learning_rate": 4.520879932246086e-06, "loss": 0.9025, "step": 66190 }, { "epoch": 0.4791996930805591, "grad_norm": 0.15991909801959991, "learning_rate": 4.5208075455855e-06, "loss": 0.9252, "step": 66200 }, { "epoch": 0.47927207974114533, "grad_norm": 0.15851566195487976, "learning_rate": 4.520735158924914e-06, "loss": 0.9346, "step": 66210 }, { "epoch": 0.4793444664017315, "grad_norm": 0.1567084640264511, "learning_rate": 4.5206627722643275e-06, "loss": 0.9178, "step": 66220 }, { "epoch": 0.4794168530623177, "grad_norm": 0.20427605509757996, "learning_rate": 4.520590385603741e-06, "loss": 0.9117, "step": 66230 }, { "epoch": 0.47948923972290386, "grad_norm": 0.17024697363376617, "learning_rate": 4.5205179989431555e-06, "loss": 0.9149, "step": 66240 }, { "epoch": 0.47956162638349004, "grad_norm": 0.15808874368667603, "learning_rate": 4.520445612282569e-06, "loss": 0.9303, "step": 66250 }, { "epoch": 0.4796340130440762, "grad_norm": 0.15470923483371735, "learning_rate": 4.520373225621983e-06, "loss": 0.9306, "step": 66260 }, { "epoch": 0.47970639970466245, "grad_norm": 0.15453216433525085, "learning_rate": 4.520300838961396e-06, "loss": 0.9206, "step": 66270 }, { "epoch": 0.4797787863652486, "grad_norm": 0.15643690526485443, "learning_rate": 4.520228452300811e-06, "loss": 0.9228, "step": 66280 }, { "epoch": 0.4798511730258348, "grad_norm": 0.16580794751644135, "learning_rate": 4.5201560656402245e-06, "loss": 0.9398, "step": 66290 }, { "epoch": 0.479923559686421, "grad_norm": 0.15789580345153809, "learning_rate": 4.520083678979638e-06, "loss": 0.9263, "step": 66300 }, { "epoch": 0.47999594634700715, "grad_norm": 0.1554155945777893, "learning_rate": 4.520011292319052e-06, "loss": 0.9264, "step": 66310 }, { "epoch": 0.4800683330075934, "grad_norm": 0.15116317570209503, "learning_rate": 4.519938905658466e-06, "loss": 0.9056, "step": 66320 }, { "epoch": 0.48014071966817956, "grad_norm": 0.1687956303358078, "learning_rate": 4.51986651899788e-06, "loss": 0.9263, "step": 66330 }, { "epoch": 0.48021310632876574, "grad_norm": 0.16320586204528809, "learning_rate": 4.519794132337293e-06, "loss": 0.9258, "step": 66340 }, { "epoch": 0.4802854929893519, "grad_norm": 0.16672076284885406, "learning_rate": 4.519721745676707e-06, "loss": 0.9144, "step": 66350 }, { "epoch": 0.4803578796499381, "grad_norm": 0.1586775779724121, "learning_rate": 4.5196493590161215e-06, "loss": 0.9242, "step": 66360 }, { "epoch": 0.4804302663105243, "grad_norm": 0.15324345231056213, "learning_rate": 4.519576972355535e-06, "loss": 0.9235, "step": 66370 }, { "epoch": 0.4805026529711105, "grad_norm": 0.1635131537914276, "learning_rate": 4.519504585694949e-06, "loss": 0.9354, "step": 66380 }, { "epoch": 0.4805750396316967, "grad_norm": 0.16541409492492676, "learning_rate": 4.519432199034362e-06, "loss": 0.9113, "step": 66390 }, { "epoch": 0.48064742629228285, "grad_norm": 0.1589830070734024, "learning_rate": 4.519359812373776e-06, "loss": 0.9271, "step": 66400 }, { "epoch": 0.48071981295286903, "grad_norm": 0.1620132029056549, "learning_rate": 4.5192874257131896e-06, "loss": 0.9318, "step": 66410 }, { "epoch": 0.4807921996134552, "grad_norm": 0.1495799869298935, "learning_rate": 4.519215039052603e-06, "loss": 0.908, "step": 66420 }, { "epoch": 0.48086458627404144, "grad_norm": 0.17354585230350494, "learning_rate": 4.519142652392018e-06, "loss": 0.9334, "step": 66430 }, { "epoch": 0.4809369729346276, "grad_norm": 0.18685661256313324, "learning_rate": 4.519070265731431e-06, "loss": 0.9207, "step": 66440 }, { "epoch": 0.4810093595952138, "grad_norm": 0.16093280911445618, "learning_rate": 4.518997879070845e-06, "loss": 0.9236, "step": 66450 }, { "epoch": 0.48108174625579997, "grad_norm": 0.15578316152095795, "learning_rate": 4.5189254924102585e-06, "loss": 0.918, "step": 66460 }, { "epoch": 0.48115413291638615, "grad_norm": 0.1621652990579605, "learning_rate": 4.518853105749673e-06, "loss": 0.9276, "step": 66470 }, { "epoch": 0.4812265195769724, "grad_norm": 0.16943508386611938, "learning_rate": 4.5187807190890866e-06, "loss": 0.9465, "step": 66480 }, { "epoch": 0.48129890623755855, "grad_norm": 0.17045237123966217, "learning_rate": 4.5187083324285e-06, "loss": 0.9317, "step": 66490 }, { "epoch": 0.48137129289814473, "grad_norm": 0.16214360296726227, "learning_rate": 4.518635945767914e-06, "loss": 0.9179, "step": 66500 }, { "epoch": 0.4814436795587309, "grad_norm": 0.15441229939460754, "learning_rate": 4.518563559107328e-06, "loss": 0.9354, "step": 66510 }, { "epoch": 0.4815160662193171, "grad_norm": 0.14522579312324524, "learning_rate": 4.518491172446742e-06, "loss": 0.9209, "step": 66520 }, { "epoch": 0.4815884528799033, "grad_norm": 0.15296971797943115, "learning_rate": 4.5184187857861555e-06, "loss": 0.9226, "step": 66530 }, { "epoch": 0.4816608395404895, "grad_norm": 0.15403321385383606, "learning_rate": 4.518346399125569e-06, "loss": 0.9182, "step": 66540 }, { "epoch": 0.48173322620107567, "grad_norm": 0.16520103812217712, "learning_rate": 4.5182740124649836e-06, "loss": 0.9209, "step": 66550 }, { "epoch": 0.48180561286166185, "grad_norm": 0.16668029129505157, "learning_rate": 4.518201625804397e-06, "loss": 0.9384, "step": 66560 }, { "epoch": 0.481877999522248, "grad_norm": 0.16062092781066895, "learning_rate": 4.518129239143811e-06, "loss": 0.9313, "step": 66570 }, { "epoch": 0.4819503861828342, "grad_norm": 0.15920652449131012, "learning_rate": 4.518056852483224e-06, "loss": 0.9146, "step": 66580 }, { "epoch": 0.48202277284342043, "grad_norm": 0.1484328955411911, "learning_rate": 4.517984465822639e-06, "loss": 0.9161, "step": 66590 }, { "epoch": 0.4820951595040066, "grad_norm": 0.16604140400886536, "learning_rate": 4.5179120791620525e-06, "loss": 0.9102, "step": 66600 }, { "epoch": 0.4821675461645928, "grad_norm": 0.16721735894680023, "learning_rate": 4.517839692501466e-06, "loss": 0.9208, "step": 66610 }, { "epoch": 0.48223993282517896, "grad_norm": 0.17339125275611877, "learning_rate": 4.51776730584088e-06, "loss": 0.9244, "step": 66620 }, { "epoch": 0.48231231948576514, "grad_norm": 0.1743367463350296, "learning_rate": 4.517694919180294e-06, "loss": 0.9294, "step": 66630 }, { "epoch": 0.48238470614635137, "grad_norm": 0.1753169298171997, "learning_rate": 4.517622532519708e-06, "loss": 0.9206, "step": 66640 }, { "epoch": 0.48245709280693755, "grad_norm": 0.16888897120952606, "learning_rate": 4.5175501458591214e-06, "loss": 0.9175, "step": 66650 }, { "epoch": 0.4825294794675237, "grad_norm": 0.18841847777366638, "learning_rate": 4.517477759198535e-06, "loss": 0.9228, "step": 66660 }, { "epoch": 0.4826018661281099, "grad_norm": 0.1644550859928131, "learning_rate": 4.517405372537949e-06, "loss": 0.9295, "step": 66670 }, { "epoch": 0.4826742527886961, "grad_norm": 0.15950177609920502, "learning_rate": 4.517332985877363e-06, "loss": 0.9303, "step": 66680 }, { "epoch": 0.4827466394492823, "grad_norm": 0.1527274250984192, "learning_rate": 4.517260599216777e-06, "loss": 0.924, "step": 66690 }, { "epoch": 0.4828190261098685, "grad_norm": 0.1690022051334381, "learning_rate": 4.51718821255619e-06, "loss": 0.9212, "step": 66700 }, { "epoch": 0.48289141277045466, "grad_norm": 0.17802709341049194, "learning_rate": 4.517115825895604e-06, "loss": 0.9246, "step": 66710 }, { "epoch": 0.48296379943104084, "grad_norm": 0.15989567339420319, "learning_rate": 4.5170434392350184e-06, "loss": 0.9205, "step": 66720 }, { "epoch": 0.483036186091627, "grad_norm": 0.16513442993164062, "learning_rate": 4.516971052574432e-06, "loss": 0.9294, "step": 66730 }, { "epoch": 0.4831085727522132, "grad_norm": 0.157196044921875, "learning_rate": 4.516898665913846e-06, "loss": 0.9186, "step": 66740 }, { "epoch": 0.4831809594127994, "grad_norm": 0.17779704928398132, "learning_rate": 4.516826279253259e-06, "loss": 0.9111, "step": 66750 }, { "epoch": 0.4832533460733856, "grad_norm": 0.15791267156600952, "learning_rate": 4.516753892592674e-06, "loss": 0.938, "step": 66760 }, { "epoch": 0.4833257327339718, "grad_norm": 0.16559448838233948, "learning_rate": 4.516681505932087e-06, "loss": 0.9158, "step": 66770 }, { "epoch": 0.48339811939455796, "grad_norm": 0.1510489284992218, "learning_rate": 4.516609119271501e-06, "loss": 0.9308, "step": 66780 }, { "epoch": 0.48347050605514413, "grad_norm": 0.16266123950481415, "learning_rate": 4.516536732610915e-06, "loss": 0.9223, "step": 66790 }, { "epoch": 0.48354289271573037, "grad_norm": 0.16285644471645355, "learning_rate": 4.516464345950329e-06, "loss": 0.9035, "step": 66800 }, { "epoch": 0.48361527937631654, "grad_norm": 0.16523510217666626, "learning_rate": 4.516391959289743e-06, "loss": 0.923, "step": 66810 }, { "epoch": 0.4836876660369027, "grad_norm": 0.1816108375787735, "learning_rate": 4.516319572629156e-06, "loss": 0.9195, "step": 66820 }, { "epoch": 0.4837600526974889, "grad_norm": 0.1596839874982834, "learning_rate": 4.51624718596857e-06, "loss": 0.932, "step": 66830 }, { "epoch": 0.4838324393580751, "grad_norm": 0.16277308762073517, "learning_rate": 4.516174799307984e-06, "loss": 0.9247, "step": 66840 }, { "epoch": 0.4839048260186613, "grad_norm": 0.1530946046113968, "learning_rate": 4.516102412647398e-06, "loss": 0.9213, "step": 66850 }, { "epoch": 0.4839772126792475, "grad_norm": 0.16606640815734863, "learning_rate": 4.516030025986812e-06, "loss": 0.9209, "step": 66860 }, { "epoch": 0.48404959933983366, "grad_norm": 0.180929034948349, "learning_rate": 4.515957639326225e-06, "loss": 0.9209, "step": 66870 }, { "epoch": 0.48412198600041983, "grad_norm": 0.15386876463890076, "learning_rate": 4.51588525266564e-06, "loss": 0.9208, "step": 66880 }, { "epoch": 0.484194372661006, "grad_norm": 0.17483779788017273, "learning_rate": 4.515812866005053e-06, "loss": 0.9147, "step": 66890 }, { "epoch": 0.48426675932159224, "grad_norm": 0.17675867676734924, "learning_rate": 4.515740479344467e-06, "loss": 0.9216, "step": 66900 }, { "epoch": 0.4843391459821784, "grad_norm": 0.16373442113399506, "learning_rate": 4.5156680926838805e-06, "loss": 0.9273, "step": 66910 }, { "epoch": 0.4844115326427646, "grad_norm": 0.16552437841892242, "learning_rate": 4.515595706023295e-06, "loss": 0.9343, "step": 66920 }, { "epoch": 0.4844839193033508, "grad_norm": 0.16545453667640686, "learning_rate": 4.515523319362708e-06, "loss": 0.9205, "step": 66930 }, { "epoch": 0.48455630596393695, "grad_norm": 0.17034026980400085, "learning_rate": 4.515450932702121e-06, "loss": 0.9213, "step": 66940 }, { "epoch": 0.4846286926245231, "grad_norm": 0.2646459639072418, "learning_rate": 4.515378546041536e-06, "loss": 0.9172, "step": 66950 }, { "epoch": 0.48470107928510936, "grad_norm": 0.16209463775157928, "learning_rate": 4.5153061593809495e-06, "loss": 0.9155, "step": 66960 }, { "epoch": 0.48477346594569554, "grad_norm": 0.1605486124753952, "learning_rate": 4.515233772720363e-06, "loss": 0.9242, "step": 66970 }, { "epoch": 0.4848458526062817, "grad_norm": 0.16971814632415771, "learning_rate": 4.515161386059777e-06, "loss": 0.9143, "step": 66980 }, { "epoch": 0.4849182392668679, "grad_norm": 0.17342694103717804, "learning_rate": 4.515088999399191e-06, "loss": 0.9302, "step": 66990 }, { "epoch": 0.48499062592745407, "grad_norm": 0.15016865730285645, "learning_rate": 4.515016612738605e-06, "loss": 0.9206, "step": 67000 }, { "epoch": 0.4850630125880403, "grad_norm": 0.15992531180381775, "learning_rate": 4.514944226078018e-06, "loss": 0.9153, "step": 67010 }, { "epoch": 0.4851353992486265, "grad_norm": 0.1591133326292038, "learning_rate": 4.514871839417432e-06, "loss": 0.9185, "step": 67020 }, { "epoch": 0.48520778590921265, "grad_norm": 0.16176247596740723, "learning_rate": 4.5147994527568465e-06, "loss": 0.9246, "step": 67030 }, { "epoch": 0.48528017256979883, "grad_norm": 0.15282560884952545, "learning_rate": 4.51472706609626e-06, "loss": 0.9323, "step": 67040 }, { "epoch": 0.485352559230385, "grad_norm": 0.1511177271604538, "learning_rate": 4.514654679435674e-06, "loss": 0.9358, "step": 67050 }, { "epoch": 0.48542494589097124, "grad_norm": 0.15375123918056488, "learning_rate": 4.514582292775087e-06, "loss": 0.9322, "step": 67060 }, { "epoch": 0.4854973325515574, "grad_norm": 0.21929968893527985, "learning_rate": 4.514509906114502e-06, "loss": 0.9129, "step": 67070 }, { "epoch": 0.4855697192121436, "grad_norm": 0.16412512958049774, "learning_rate": 4.514437519453915e-06, "loss": 0.9264, "step": 67080 }, { "epoch": 0.48564210587272977, "grad_norm": 0.15203996002674103, "learning_rate": 4.514365132793329e-06, "loss": 0.922, "step": 67090 }, { "epoch": 0.48571449253331594, "grad_norm": 0.15428316593170166, "learning_rate": 4.514292746132743e-06, "loss": 0.9196, "step": 67100 }, { "epoch": 0.4857868791939021, "grad_norm": 0.171985924243927, "learning_rate": 4.514220359472157e-06, "loss": 0.9349, "step": 67110 }, { "epoch": 0.48585926585448835, "grad_norm": 0.16062787175178528, "learning_rate": 4.514147972811571e-06, "loss": 0.9108, "step": 67120 }, { "epoch": 0.48593165251507453, "grad_norm": 0.15661896765232086, "learning_rate": 4.514075586150984e-06, "loss": 0.9236, "step": 67130 }, { "epoch": 0.4860040391756607, "grad_norm": 0.16929559409618378, "learning_rate": 4.514003199490398e-06, "loss": 0.9322, "step": 67140 }, { "epoch": 0.4860764258362469, "grad_norm": 0.1664290726184845, "learning_rate": 4.513930812829812e-06, "loss": 0.9166, "step": 67150 }, { "epoch": 0.48614881249683306, "grad_norm": 0.17513130605220795, "learning_rate": 4.513858426169226e-06, "loss": 0.917, "step": 67160 }, { "epoch": 0.4862211991574193, "grad_norm": 0.16142228245735168, "learning_rate": 4.51378603950864e-06, "loss": 0.9296, "step": 67170 }, { "epoch": 0.48629358581800547, "grad_norm": 0.17332206666469574, "learning_rate": 4.513713652848053e-06, "loss": 0.9092, "step": 67180 }, { "epoch": 0.48636597247859165, "grad_norm": 0.15262171626091003, "learning_rate": 4.513641266187468e-06, "loss": 0.935, "step": 67190 }, { "epoch": 0.4864383591391778, "grad_norm": 0.1601051539182663, "learning_rate": 4.513568879526881e-06, "loss": 0.9203, "step": 67200 }, { "epoch": 0.486510745799764, "grad_norm": 0.16139155626296997, "learning_rate": 4.513496492866295e-06, "loss": 0.9336, "step": 67210 }, { "epoch": 0.48658313246035023, "grad_norm": 0.182013601064682, "learning_rate": 4.5134241062057086e-06, "loss": 0.9269, "step": 67220 }, { "epoch": 0.4866555191209364, "grad_norm": 0.19671277701854706, "learning_rate": 4.513351719545123e-06, "loss": 0.924, "step": 67230 }, { "epoch": 0.4867279057815226, "grad_norm": 0.17239131033420563, "learning_rate": 4.513279332884537e-06, "loss": 0.9346, "step": 67240 }, { "epoch": 0.48680029244210876, "grad_norm": 0.1634131819009781, "learning_rate": 4.51320694622395e-06, "loss": 0.9198, "step": 67250 }, { "epoch": 0.48687267910269494, "grad_norm": 0.1805330216884613, "learning_rate": 4.513134559563364e-06, "loss": 0.9201, "step": 67260 }, { "epoch": 0.4869450657632811, "grad_norm": 0.15409274399280548, "learning_rate": 4.513062172902778e-06, "loss": 0.9268, "step": 67270 }, { "epoch": 0.48701745242386735, "grad_norm": 0.14754238724708557, "learning_rate": 4.512989786242192e-06, "loss": 0.92, "step": 67280 }, { "epoch": 0.4870898390844535, "grad_norm": 0.14895077049732208, "learning_rate": 4.5129173995816056e-06, "loss": 0.926, "step": 67290 }, { "epoch": 0.4871622257450397, "grad_norm": 0.16120226681232452, "learning_rate": 4.512845012921019e-06, "loss": 0.9242, "step": 67300 }, { "epoch": 0.4872346124056259, "grad_norm": 0.16055871546268463, "learning_rate": 4.512772626260433e-06, "loss": 0.9222, "step": 67310 }, { "epoch": 0.48730699906621205, "grad_norm": 0.16574375331401825, "learning_rate": 4.512700239599847e-06, "loss": 0.9205, "step": 67320 }, { "epoch": 0.4873793857267983, "grad_norm": 0.15389981865882874, "learning_rate": 4.512627852939261e-06, "loss": 0.9151, "step": 67330 }, { "epoch": 0.48745177238738446, "grad_norm": 0.15907318890094757, "learning_rate": 4.5125554662786745e-06, "loss": 0.9225, "step": 67340 }, { "epoch": 0.48752415904797064, "grad_norm": 0.16848038136959076, "learning_rate": 4.512483079618088e-06, "loss": 0.9333, "step": 67350 }, { "epoch": 0.4875965457085568, "grad_norm": 0.15980379283428192, "learning_rate": 4.512410692957503e-06, "loss": 0.9227, "step": 67360 }, { "epoch": 0.487668932369143, "grad_norm": 0.15230245888233185, "learning_rate": 4.512338306296916e-06, "loss": 0.9143, "step": 67370 }, { "epoch": 0.4877413190297292, "grad_norm": 0.16187123954296112, "learning_rate": 4.51226591963633e-06, "loss": 0.9224, "step": 67380 }, { "epoch": 0.4878137056903154, "grad_norm": 0.16506314277648926, "learning_rate": 4.5121935329757434e-06, "loss": 0.9233, "step": 67390 }, { "epoch": 0.4878860923509016, "grad_norm": 0.21859927475452423, "learning_rate": 4.512121146315158e-06, "loss": 0.9181, "step": 67400 }, { "epoch": 0.48795847901148776, "grad_norm": 0.16471314430236816, "learning_rate": 4.5120487596545715e-06, "loss": 0.9317, "step": 67410 }, { "epoch": 0.48803086567207393, "grad_norm": 0.16795067489147186, "learning_rate": 4.511976372993985e-06, "loss": 0.9162, "step": 67420 }, { "epoch": 0.48810325233266016, "grad_norm": 0.20342256128787994, "learning_rate": 4.511903986333399e-06, "loss": 0.9284, "step": 67430 }, { "epoch": 0.48817563899324634, "grad_norm": 0.31586503982543945, "learning_rate": 4.511831599672813e-06, "loss": 0.9252, "step": 67440 }, { "epoch": 0.4882480256538325, "grad_norm": 0.18031372129917145, "learning_rate": 4.511759213012227e-06, "loss": 0.9218, "step": 67450 }, { "epoch": 0.4883204123144187, "grad_norm": 0.15259408950805664, "learning_rate": 4.5116868263516404e-06, "loss": 0.9252, "step": 67460 }, { "epoch": 0.48839279897500487, "grad_norm": 0.15039771795272827, "learning_rate": 4.511614439691054e-06, "loss": 0.9228, "step": 67470 }, { "epoch": 0.48846518563559105, "grad_norm": 0.15142498910427094, "learning_rate": 4.511542053030468e-06, "loss": 0.8998, "step": 67480 }, { "epoch": 0.4885375722961773, "grad_norm": 0.15406163036823273, "learning_rate": 4.511469666369881e-06, "loss": 0.9219, "step": 67490 }, { "epoch": 0.48860995895676346, "grad_norm": 0.16428889334201813, "learning_rate": 4.511397279709295e-06, "loss": 0.9298, "step": 67500 }, { "epoch": 0.48868234561734963, "grad_norm": 0.15274979174137115, "learning_rate": 4.511324893048709e-06, "loss": 0.9374, "step": 67510 }, { "epoch": 0.4887547322779358, "grad_norm": 0.15351366996765137, "learning_rate": 4.511252506388123e-06, "loss": 0.9262, "step": 67520 }, { "epoch": 0.488827118938522, "grad_norm": 0.18971464037895203, "learning_rate": 4.511180119727537e-06, "loss": 0.9329, "step": 67530 }, { "epoch": 0.4888995055991082, "grad_norm": 0.15166032314300537, "learning_rate": 4.51110773306695e-06, "loss": 0.9232, "step": 67540 }, { "epoch": 0.4889718922596944, "grad_norm": 0.15017013251781464, "learning_rate": 4.511035346406365e-06, "loss": 0.9279, "step": 67550 }, { "epoch": 0.4890442789202806, "grad_norm": 0.16144691407680511, "learning_rate": 4.510962959745778e-06, "loss": 0.9267, "step": 67560 }, { "epoch": 0.48911666558086675, "grad_norm": 0.1623421162366867, "learning_rate": 4.510890573085192e-06, "loss": 0.9219, "step": 67570 }, { "epoch": 0.4891890522414529, "grad_norm": 0.15922850370407104, "learning_rate": 4.5108181864246055e-06, "loss": 0.919, "step": 67580 }, { "epoch": 0.48926143890203916, "grad_norm": 0.16896429657936096, "learning_rate": 4.51074579976402e-06, "loss": 0.9263, "step": 67590 }, { "epoch": 0.48933382556262534, "grad_norm": 0.1584937423467636, "learning_rate": 4.510673413103434e-06, "loss": 0.93, "step": 67600 }, { "epoch": 0.4894062122232115, "grad_norm": 0.15415899455547333, "learning_rate": 4.510601026442847e-06, "loss": 0.9178, "step": 67610 }, { "epoch": 0.4894785988837977, "grad_norm": 0.15234239399433136, "learning_rate": 4.510528639782261e-06, "loss": 0.9174, "step": 67620 }, { "epoch": 0.48955098554438387, "grad_norm": 0.1630704402923584, "learning_rate": 4.510456253121675e-06, "loss": 0.926, "step": 67630 }, { "epoch": 0.48962337220497004, "grad_norm": 0.15613876283168793, "learning_rate": 4.510383866461089e-06, "loss": 0.9233, "step": 67640 }, { "epoch": 0.4896957588655563, "grad_norm": 0.16620004177093506, "learning_rate": 4.5103114798005025e-06, "loss": 0.9264, "step": 67650 }, { "epoch": 0.48976814552614245, "grad_norm": 0.15522471070289612, "learning_rate": 4.510239093139916e-06, "loss": 0.923, "step": 67660 }, { "epoch": 0.4898405321867286, "grad_norm": 0.15571705996990204, "learning_rate": 4.510166706479331e-06, "loss": 0.927, "step": 67670 }, { "epoch": 0.4899129188473148, "grad_norm": 0.15183138847351074, "learning_rate": 4.510094319818744e-06, "loss": 0.9162, "step": 67680 }, { "epoch": 0.489985305507901, "grad_norm": 0.1504392772912979, "learning_rate": 4.510021933158158e-06, "loss": 0.9206, "step": 67690 }, { "epoch": 0.4900576921684872, "grad_norm": 0.2166612446308136, "learning_rate": 4.5099495464975715e-06, "loss": 0.9206, "step": 67700 }, { "epoch": 0.4901300788290734, "grad_norm": 0.19607232511043549, "learning_rate": 4.509877159836986e-06, "loss": 0.9225, "step": 67710 }, { "epoch": 0.49020246548965957, "grad_norm": 0.1525793820619583, "learning_rate": 4.5098047731763995e-06, "loss": 0.9161, "step": 67720 }, { "epoch": 0.49027485215024574, "grad_norm": 0.16443735361099243, "learning_rate": 4.509732386515813e-06, "loss": 0.9173, "step": 67730 }, { "epoch": 0.4903472388108319, "grad_norm": 0.17903602123260498, "learning_rate": 4.509659999855227e-06, "loss": 0.9225, "step": 67740 }, { "epoch": 0.49041962547141815, "grad_norm": 0.1686403751373291, "learning_rate": 4.509587613194641e-06, "loss": 0.9224, "step": 67750 }, { "epoch": 0.49049201213200433, "grad_norm": 0.36816513538360596, "learning_rate": 4.509515226534055e-06, "loss": 0.9186, "step": 67760 }, { "epoch": 0.4905643987925905, "grad_norm": 0.15775349736213684, "learning_rate": 4.5094428398734685e-06, "loss": 0.9361, "step": 67770 }, { "epoch": 0.4906367854531767, "grad_norm": 0.17417992651462555, "learning_rate": 4.509370453212882e-06, "loss": 0.9245, "step": 67780 }, { "epoch": 0.49070917211376286, "grad_norm": 0.1487663984298706, "learning_rate": 4.5092980665522965e-06, "loss": 0.9302, "step": 67790 }, { "epoch": 0.49078155877434904, "grad_norm": 0.1552221029996872, "learning_rate": 4.50922567989171e-06, "loss": 0.926, "step": 67800 }, { "epoch": 0.49085394543493527, "grad_norm": 0.16190670430660248, "learning_rate": 4.509153293231124e-06, "loss": 0.9284, "step": 67810 }, { "epoch": 0.49092633209552144, "grad_norm": 0.17371685802936554, "learning_rate": 4.509080906570537e-06, "loss": 0.9413, "step": 67820 }, { "epoch": 0.4909987187561076, "grad_norm": 0.1726863831281662, "learning_rate": 4.509008519909952e-06, "loss": 0.9315, "step": 67830 }, { "epoch": 0.4910711054166938, "grad_norm": 0.15722721815109253, "learning_rate": 4.5089361332493655e-06, "loss": 0.9261, "step": 67840 }, { "epoch": 0.49114349207728, "grad_norm": 0.15430855751037598, "learning_rate": 4.508863746588779e-06, "loss": 0.9288, "step": 67850 }, { "epoch": 0.4912158787378662, "grad_norm": 0.1556122750043869, "learning_rate": 4.508791359928193e-06, "loss": 0.9227, "step": 67860 }, { "epoch": 0.4912882653984524, "grad_norm": 0.1647377461194992, "learning_rate": 4.508718973267607e-06, "loss": 0.9096, "step": 67870 }, { "epoch": 0.49136065205903856, "grad_norm": 0.1618121862411499, "learning_rate": 4.508646586607021e-06, "loss": 0.9199, "step": 67880 }, { "epoch": 0.49143303871962474, "grad_norm": 0.16955524682998657, "learning_rate": 4.508574199946434e-06, "loss": 0.9285, "step": 67890 }, { "epoch": 0.4915054253802109, "grad_norm": 0.15921622514724731, "learning_rate": 4.508501813285848e-06, "loss": 0.9237, "step": 67900 }, { "epoch": 0.49157781204079715, "grad_norm": 0.17753024399280548, "learning_rate": 4.5084294266252625e-06, "loss": 0.9264, "step": 67910 }, { "epoch": 0.4916501987013833, "grad_norm": 0.15799757838249207, "learning_rate": 4.508357039964676e-06, "loss": 0.914, "step": 67920 }, { "epoch": 0.4917225853619695, "grad_norm": 0.17389525473117828, "learning_rate": 4.50828465330409e-06, "loss": 0.935, "step": 67930 }, { "epoch": 0.4917949720225557, "grad_norm": 0.15797245502471924, "learning_rate": 4.508212266643503e-06, "loss": 0.9193, "step": 67940 }, { "epoch": 0.49186735868314185, "grad_norm": 0.1706414818763733, "learning_rate": 4.508139879982917e-06, "loss": 0.9221, "step": 67950 }, { "epoch": 0.4919397453437281, "grad_norm": 0.17515702545642853, "learning_rate": 4.508067493322331e-06, "loss": 0.924, "step": 67960 }, { "epoch": 0.49201213200431426, "grad_norm": 0.16736344993114471, "learning_rate": 4.507995106661745e-06, "loss": 0.9147, "step": 67970 }, { "epoch": 0.49208451866490044, "grad_norm": 0.24362386763095856, "learning_rate": 4.507922720001159e-06, "loss": 0.9217, "step": 67980 }, { "epoch": 0.4921569053254866, "grad_norm": 0.1629912108182907, "learning_rate": 4.507850333340572e-06, "loss": 0.921, "step": 67990 }, { "epoch": 0.4922292919860728, "grad_norm": 0.16399963200092316, "learning_rate": 4.507777946679986e-06, "loss": 0.9234, "step": 68000 }, { "epoch": 0.49230167864665897, "grad_norm": 0.15990719199180603, "learning_rate": 4.5077055600193995e-06, "loss": 0.9207, "step": 68010 }, { "epoch": 0.4923740653072452, "grad_norm": 0.1633741706609726, "learning_rate": 4.507633173358814e-06, "loss": 0.9153, "step": 68020 }, { "epoch": 0.4924464519678314, "grad_norm": 0.15352605283260345, "learning_rate": 4.5075607866982276e-06, "loss": 0.9147, "step": 68030 }, { "epoch": 0.49251883862841755, "grad_norm": 0.15996570885181427, "learning_rate": 4.507488400037641e-06, "loss": 0.9193, "step": 68040 }, { "epoch": 0.49259122528900373, "grad_norm": 0.15362408757209778, "learning_rate": 4.507416013377055e-06, "loss": 0.9308, "step": 68050 }, { "epoch": 0.4926636119495899, "grad_norm": 0.1646050661802292, "learning_rate": 4.507343626716469e-06, "loss": 0.9204, "step": 68060 }, { "epoch": 0.49273599861017614, "grad_norm": 0.1549074798822403, "learning_rate": 4.507271240055883e-06, "loss": 0.9312, "step": 68070 }, { "epoch": 0.4928083852707623, "grad_norm": 0.16783104836940765, "learning_rate": 4.5071988533952965e-06, "loss": 0.9225, "step": 68080 }, { "epoch": 0.4928807719313485, "grad_norm": 0.16458235681056976, "learning_rate": 4.50712646673471e-06, "loss": 0.936, "step": 68090 }, { "epoch": 0.49295315859193467, "grad_norm": 0.16786985099315643, "learning_rate": 4.507054080074124e-06, "loss": 0.9155, "step": 68100 }, { "epoch": 0.49302554525252085, "grad_norm": 0.16184143722057343, "learning_rate": 4.506981693413538e-06, "loss": 0.9124, "step": 68110 }, { "epoch": 0.4930979319131071, "grad_norm": 0.1618042141199112, "learning_rate": 4.506909306752952e-06, "loss": 0.9182, "step": 68120 }, { "epoch": 0.49317031857369326, "grad_norm": 0.47443485260009766, "learning_rate": 4.5068369200923654e-06, "loss": 0.9171, "step": 68130 }, { "epoch": 0.49324270523427943, "grad_norm": 0.16205495595932007, "learning_rate": 4.506764533431779e-06, "loss": 0.9191, "step": 68140 }, { "epoch": 0.4933150918948656, "grad_norm": 0.15748542547225952, "learning_rate": 4.5066921467711935e-06, "loss": 0.929, "step": 68150 }, { "epoch": 0.4933874785554518, "grad_norm": 0.15539519488811493, "learning_rate": 4.506619760110607e-06, "loss": 0.9244, "step": 68160 }, { "epoch": 0.49345986521603796, "grad_norm": 0.15781527757644653, "learning_rate": 4.506547373450021e-06, "loss": 0.9215, "step": 68170 }, { "epoch": 0.4935322518766242, "grad_norm": 0.15772010385990143, "learning_rate": 4.506474986789434e-06, "loss": 0.9173, "step": 68180 }, { "epoch": 0.49360463853721037, "grad_norm": 0.15966345369815826, "learning_rate": 4.506402600128849e-06, "loss": 0.9222, "step": 68190 }, { "epoch": 0.49367702519779655, "grad_norm": 0.15698853135108948, "learning_rate": 4.5063302134682624e-06, "loss": 0.9244, "step": 68200 }, { "epoch": 0.4937494118583827, "grad_norm": 0.16982385516166687, "learning_rate": 4.506257826807676e-06, "loss": 0.9258, "step": 68210 }, { "epoch": 0.4938217985189689, "grad_norm": 0.19245341420173645, "learning_rate": 4.50618544014709e-06, "loss": 0.9141, "step": 68220 }, { "epoch": 0.49389418517955513, "grad_norm": 0.14945447444915771, "learning_rate": 4.506113053486504e-06, "loss": 0.9339, "step": 68230 }, { "epoch": 0.4939665718401413, "grad_norm": 0.16145005822181702, "learning_rate": 4.506040666825918e-06, "loss": 0.9186, "step": 68240 }, { "epoch": 0.4940389585007275, "grad_norm": 0.15670140087604523, "learning_rate": 4.505968280165331e-06, "loss": 0.9277, "step": 68250 }, { "epoch": 0.49411134516131366, "grad_norm": 0.4954744577407837, "learning_rate": 4.505895893504745e-06, "loss": 0.9307, "step": 68260 }, { "epoch": 0.49418373182189984, "grad_norm": 0.15574707090854645, "learning_rate": 4.5058235068441594e-06, "loss": 0.9254, "step": 68270 }, { "epoch": 0.4942561184824861, "grad_norm": 0.1754721999168396, "learning_rate": 4.505751120183573e-06, "loss": 0.9294, "step": 68280 }, { "epoch": 0.49432850514307225, "grad_norm": 0.18038561940193176, "learning_rate": 4.505678733522987e-06, "loss": 0.9175, "step": 68290 }, { "epoch": 0.4944008918036584, "grad_norm": 0.16185778379440308, "learning_rate": 4.5056063468624e-06, "loss": 0.9344, "step": 68300 }, { "epoch": 0.4944732784642446, "grad_norm": 0.1598776876926422, "learning_rate": 4.505533960201815e-06, "loss": 0.9228, "step": 68310 }, { "epoch": 0.4945456651248308, "grad_norm": 0.4865586459636688, "learning_rate": 4.505461573541228e-06, "loss": 0.9308, "step": 68320 }, { "epoch": 0.49461805178541696, "grad_norm": 0.1811351776123047, "learning_rate": 4.505389186880642e-06, "loss": 0.9199, "step": 68330 }, { "epoch": 0.4946904384460032, "grad_norm": 0.15315648913383484, "learning_rate": 4.505316800220056e-06, "loss": 0.9264, "step": 68340 }, { "epoch": 0.49476282510658937, "grad_norm": 0.1656007021665573, "learning_rate": 4.50524441355947e-06, "loss": 0.9213, "step": 68350 }, { "epoch": 0.49483521176717554, "grad_norm": 0.17960675060749054, "learning_rate": 4.505172026898884e-06, "loss": 0.9258, "step": 68360 }, { "epoch": 0.4949075984277617, "grad_norm": 0.18778973817825317, "learning_rate": 4.505099640238297e-06, "loss": 0.9233, "step": 68370 }, { "epoch": 0.4949799850883479, "grad_norm": 0.1481725573539734, "learning_rate": 4.505027253577711e-06, "loss": 0.9133, "step": 68380 }, { "epoch": 0.49505237174893413, "grad_norm": 0.171866312623024, "learning_rate": 4.504954866917125e-06, "loss": 0.9296, "step": 68390 }, { "epoch": 0.4951247584095203, "grad_norm": 0.16532866656780243, "learning_rate": 4.504882480256539e-06, "loss": 0.8981, "step": 68400 }, { "epoch": 0.4951971450701065, "grad_norm": 0.1649603694677353, "learning_rate": 4.504810093595953e-06, "loss": 0.9199, "step": 68410 }, { "epoch": 0.49526953173069266, "grad_norm": 0.1862333118915558, "learning_rate": 4.504737706935366e-06, "loss": 0.9284, "step": 68420 }, { "epoch": 0.49534191839127883, "grad_norm": 0.17143741250038147, "learning_rate": 4.504665320274781e-06, "loss": 0.914, "step": 68430 }, { "epoch": 0.49541430505186507, "grad_norm": 0.16334125399589539, "learning_rate": 4.504592933614194e-06, "loss": 0.9182, "step": 68440 }, { "epoch": 0.49548669171245124, "grad_norm": 0.18255673348903656, "learning_rate": 4.504520546953608e-06, "loss": 0.9099, "step": 68450 }, { "epoch": 0.4955590783730374, "grad_norm": 0.16726234555244446, "learning_rate": 4.5044481602930215e-06, "loss": 0.9263, "step": 68460 }, { "epoch": 0.4956314650336236, "grad_norm": 0.16593337059020996, "learning_rate": 4.504375773632436e-06, "loss": 0.9165, "step": 68470 }, { "epoch": 0.4957038516942098, "grad_norm": 0.173760324716568, "learning_rate": 4.50430338697185e-06, "loss": 0.9228, "step": 68480 }, { "epoch": 0.49577623835479595, "grad_norm": 0.1431112438440323, "learning_rate": 4.504231000311263e-06, "loss": 0.9308, "step": 68490 }, { "epoch": 0.4958486250153822, "grad_norm": 0.22450746595859528, "learning_rate": 4.504158613650677e-06, "loss": 0.9277, "step": 68500 }, { "epoch": 0.49592101167596836, "grad_norm": 0.16971862316131592, "learning_rate": 4.504086226990091e-06, "loss": 0.924, "step": 68510 }, { "epoch": 0.49599339833655454, "grad_norm": 0.15365462005138397, "learning_rate": 4.504013840329504e-06, "loss": 0.9172, "step": 68520 }, { "epoch": 0.4960657849971407, "grad_norm": 0.17213942110538483, "learning_rate": 4.503941453668918e-06, "loss": 0.912, "step": 68530 }, { "epoch": 0.4961381716577269, "grad_norm": 0.16275885701179504, "learning_rate": 4.503869067008332e-06, "loss": 0.9113, "step": 68540 }, { "epoch": 0.4962105583183131, "grad_norm": 0.1540212631225586, "learning_rate": 4.503796680347746e-06, "loss": 0.9075, "step": 68550 }, { "epoch": 0.4962829449788993, "grad_norm": 0.15798866748809814, "learning_rate": 4.503724293687159e-06, "loss": 0.9271, "step": 68560 }, { "epoch": 0.4963553316394855, "grad_norm": 0.1525816023349762, "learning_rate": 4.503651907026573e-06, "loss": 0.913, "step": 68570 }, { "epoch": 0.49642771830007165, "grad_norm": 0.1655115783214569, "learning_rate": 4.5035795203659875e-06, "loss": 0.9094, "step": 68580 }, { "epoch": 0.49650010496065783, "grad_norm": 0.16454185545444489, "learning_rate": 4.503507133705401e-06, "loss": 0.9232, "step": 68590 }, { "epoch": 0.49657249162124406, "grad_norm": 0.15450453758239746, "learning_rate": 4.503434747044815e-06, "loss": 0.9256, "step": 68600 }, { "epoch": 0.49664487828183024, "grad_norm": 0.15737608075141907, "learning_rate": 4.503362360384228e-06, "loss": 0.913, "step": 68610 }, { "epoch": 0.4967172649424164, "grad_norm": 0.16449777781963348, "learning_rate": 4.503289973723643e-06, "loss": 0.9236, "step": 68620 }, { "epoch": 0.4967896516030026, "grad_norm": 0.15372395515441895, "learning_rate": 4.503217587063056e-06, "loss": 0.9174, "step": 68630 }, { "epoch": 0.49686203826358877, "grad_norm": 0.1783866584300995, "learning_rate": 4.50314520040247e-06, "loss": 0.9295, "step": 68640 }, { "epoch": 0.496934424924175, "grad_norm": 0.1594487428665161, "learning_rate": 4.503072813741884e-06, "loss": 0.9224, "step": 68650 }, { "epoch": 0.4970068115847612, "grad_norm": 0.16570931673049927, "learning_rate": 4.503000427081298e-06, "loss": 0.9139, "step": 68660 }, { "epoch": 0.49707919824534735, "grad_norm": 0.16301476955413818, "learning_rate": 4.502928040420712e-06, "loss": 0.9185, "step": 68670 }, { "epoch": 0.49715158490593353, "grad_norm": 0.17501094937324524, "learning_rate": 4.502855653760125e-06, "loss": 0.9253, "step": 68680 }, { "epoch": 0.4972239715665197, "grad_norm": 0.1674194633960724, "learning_rate": 4.502783267099539e-06, "loss": 0.9193, "step": 68690 }, { "epoch": 0.4972963582271059, "grad_norm": 0.16636104881763458, "learning_rate": 4.502710880438953e-06, "loss": 0.9098, "step": 68700 }, { "epoch": 0.4973687448876921, "grad_norm": 0.16473974287509918, "learning_rate": 4.502638493778367e-06, "loss": 0.9239, "step": 68710 }, { "epoch": 0.4974411315482783, "grad_norm": 0.17471805214881897, "learning_rate": 4.502566107117781e-06, "loss": 0.9208, "step": 68720 }, { "epoch": 0.49751351820886447, "grad_norm": 0.16630442440509796, "learning_rate": 4.502493720457194e-06, "loss": 0.9233, "step": 68730 }, { "epoch": 0.49758590486945065, "grad_norm": 0.14597173035144806, "learning_rate": 4.502421333796608e-06, "loss": 0.916, "step": 68740 }, { "epoch": 0.4976582915300368, "grad_norm": 0.1661393642425537, "learning_rate": 4.502348947136022e-06, "loss": 0.9205, "step": 68750 }, { "epoch": 0.49773067819062305, "grad_norm": 0.15321460366249084, "learning_rate": 4.502276560475436e-06, "loss": 0.9276, "step": 68760 }, { "epoch": 0.49780306485120923, "grad_norm": 0.15991902351379395, "learning_rate": 4.5022041738148496e-06, "loss": 0.9238, "step": 68770 }, { "epoch": 0.4978754515117954, "grad_norm": 0.17004790902137756, "learning_rate": 4.502131787154263e-06, "loss": 0.9171, "step": 68780 }, { "epoch": 0.4979478381723816, "grad_norm": 0.17030557990074158, "learning_rate": 4.502059400493678e-06, "loss": 0.9158, "step": 68790 }, { "epoch": 0.49802022483296776, "grad_norm": 0.16426852345466614, "learning_rate": 4.501987013833091e-06, "loss": 0.927, "step": 68800 }, { "epoch": 0.498092611493554, "grad_norm": 0.14977115392684937, "learning_rate": 4.501914627172505e-06, "loss": 0.9155, "step": 68810 }, { "epoch": 0.49816499815414017, "grad_norm": 0.16252385079860687, "learning_rate": 4.5018422405119185e-06, "loss": 0.9082, "step": 68820 }, { "epoch": 0.49823738481472635, "grad_norm": 0.15293535590171814, "learning_rate": 4.501769853851333e-06, "loss": 0.9093, "step": 68830 }, { "epoch": 0.4983097714753125, "grad_norm": 0.14948716759681702, "learning_rate": 4.501697467190747e-06, "loss": 0.9099, "step": 68840 }, { "epoch": 0.4983821581358987, "grad_norm": 0.14924530684947968, "learning_rate": 4.50162508053016e-06, "loss": 0.9288, "step": 68850 }, { "epoch": 0.4984545447964849, "grad_norm": 0.16742658615112305, "learning_rate": 4.501552693869574e-06, "loss": 0.9359, "step": 68860 }, { "epoch": 0.4985269314570711, "grad_norm": 0.1566425859928131, "learning_rate": 4.501480307208988e-06, "loss": 0.9298, "step": 68870 }, { "epoch": 0.4985993181176573, "grad_norm": 0.15507277846336365, "learning_rate": 4.501407920548402e-06, "loss": 0.9209, "step": 68880 }, { "epoch": 0.49867170477824346, "grad_norm": 0.2236766368150711, "learning_rate": 4.5013355338878155e-06, "loss": 0.9195, "step": 68890 }, { "epoch": 0.49874409143882964, "grad_norm": 0.14580419659614563, "learning_rate": 4.501263147227229e-06, "loss": 0.9099, "step": 68900 }, { "epoch": 0.4988164780994158, "grad_norm": 0.1595289558172226, "learning_rate": 4.501190760566644e-06, "loss": 0.9197, "step": 68910 }, { "epoch": 0.49888886476000205, "grad_norm": 0.15660600364208221, "learning_rate": 4.501118373906057e-06, "loss": 0.9246, "step": 68920 }, { "epoch": 0.4989612514205882, "grad_norm": 0.1797994077205658, "learning_rate": 4.501045987245471e-06, "loss": 0.9251, "step": 68930 }, { "epoch": 0.4990336380811744, "grad_norm": 0.16895923018455505, "learning_rate": 4.5009736005848844e-06, "loss": 0.9208, "step": 68940 }, { "epoch": 0.4991060247417606, "grad_norm": 0.17603491246700287, "learning_rate": 4.500901213924299e-06, "loss": 0.9205, "step": 68950 }, { "epoch": 0.49917841140234676, "grad_norm": 0.1691259890794754, "learning_rate": 4.5008288272637125e-06, "loss": 0.9282, "step": 68960 }, { "epoch": 0.499250798062933, "grad_norm": 0.1619505137205124, "learning_rate": 4.500756440603126e-06, "loss": 0.9324, "step": 68970 }, { "epoch": 0.49932318472351916, "grad_norm": 0.16095148026943207, "learning_rate": 4.50068405394254e-06, "loss": 0.9167, "step": 68980 }, { "epoch": 0.49939557138410534, "grad_norm": 0.14836084842681885, "learning_rate": 4.500611667281954e-06, "loss": 0.9127, "step": 68990 }, { "epoch": 0.4994679580446915, "grad_norm": 0.15759669244289398, "learning_rate": 4.500539280621368e-06, "loss": 0.9219, "step": 69000 }, { "epoch": 0.4995403447052777, "grad_norm": 0.17617014050483704, "learning_rate": 4.5004668939607814e-06, "loss": 0.9272, "step": 69010 }, { "epoch": 0.49961273136586387, "grad_norm": 0.15363344550132751, "learning_rate": 4.500394507300195e-06, "loss": 0.914, "step": 69020 }, { "epoch": 0.4996851180264501, "grad_norm": 0.163772851228714, "learning_rate": 4.5003221206396095e-06, "loss": 0.9352, "step": 69030 }, { "epoch": 0.4997575046870363, "grad_norm": 0.16267885267734528, "learning_rate": 4.500249733979023e-06, "loss": 0.9052, "step": 69040 }, { "epoch": 0.49982989134762246, "grad_norm": 0.18130633234977722, "learning_rate": 4.500177347318436e-06, "loss": 0.9282, "step": 69050 }, { "epoch": 0.49990227800820863, "grad_norm": 0.1978328377008438, "learning_rate": 4.50010496065785e-06, "loss": 0.9345, "step": 69060 }, { "epoch": 0.4999746646687948, "grad_norm": 0.15893995761871338, "learning_rate": 4.500032573997264e-06, "loss": 0.9136, "step": 69070 }, { "epoch": 0.500047051329381, "grad_norm": 0.17726098001003265, "learning_rate": 4.499960187336678e-06, "loss": 0.9239, "step": 69080 }, { "epoch": 0.5001194379899672, "grad_norm": 0.20466522872447968, "learning_rate": 4.499887800676091e-06, "loss": 0.916, "step": 69090 }, { "epoch": 0.5001918246505533, "grad_norm": 0.1550358682870865, "learning_rate": 4.499815414015506e-06, "loss": 0.9195, "step": 69100 }, { "epoch": 0.5002642113111396, "grad_norm": 0.14892955124378204, "learning_rate": 4.499743027354919e-06, "loss": 0.9103, "step": 69110 }, { "epoch": 0.5003365979717258, "grad_norm": 0.1611659675836563, "learning_rate": 4.499670640694333e-06, "loss": 0.9123, "step": 69120 }, { "epoch": 0.500408984632312, "grad_norm": 0.19101987779140472, "learning_rate": 4.4995982540337465e-06, "loss": 0.9135, "step": 69130 }, { "epoch": 0.5004813712928982, "grad_norm": 0.1674869805574417, "learning_rate": 4.499525867373161e-06, "loss": 0.9236, "step": 69140 }, { "epoch": 0.5005537579534843, "grad_norm": 0.15739434957504272, "learning_rate": 4.499453480712575e-06, "loss": 0.9267, "step": 69150 }, { "epoch": 0.5006261446140705, "grad_norm": 0.17267724871635437, "learning_rate": 4.499381094051988e-06, "loss": 0.9148, "step": 69160 }, { "epoch": 0.5006985312746567, "grad_norm": 0.1497461199760437, "learning_rate": 4.499308707391402e-06, "loss": 0.9141, "step": 69170 }, { "epoch": 0.5007709179352429, "grad_norm": 0.15880879759788513, "learning_rate": 4.499236320730816e-06, "loss": 0.9333, "step": 69180 }, { "epoch": 0.500843304595829, "grad_norm": 0.1679236739873886, "learning_rate": 4.49916393407023e-06, "loss": 0.902, "step": 69190 }, { "epoch": 0.5009156912564152, "grad_norm": 0.1678447723388672, "learning_rate": 4.4990915474096435e-06, "loss": 0.9136, "step": 69200 }, { "epoch": 0.5009880779170015, "grad_norm": 0.16553092002868652, "learning_rate": 4.499019160749057e-06, "loss": 0.9205, "step": 69210 }, { "epoch": 0.5010604645775877, "grad_norm": 0.15976086258888245, "learning_rate": 4.498946774088472e-06, "loss": 0.9124, "step": 69220 }, { "epoch": 0.5011328512381739, "grad_norm": 0.1608666628599167, "learning_rate": 4.498874387427885e-06, "loss": 0.9233, "step": 69230 }, { "epoch": 0.50120523789876, "grad_norm": 0.19113683700561523, "learning_rate": 4.498802000767299e-06, "loss": 0.9208, "step": 69240 }, { "epoch": 0.5012776245593462, "grad_norm": 0.1584874838590622, "learning_rate": 4.4987296141067125e-06, "loss": 0.9106, "step": 69250 }, { "epoch": 0.5013500112199324, "grad_norm": 0.17393231391906738, "learning_rate": 4.498657227446127e-06, "loss": 0.915, "step": 69260 }, { "epoch": 0.5014223978805186, "grad_norm": 0.1741904765367508, "learning_rate": 4.4985848407855405e-06, "loss": 0.9318, "step": 69270 }, { "epoch": 0.5014947845411047, "grad_norm": 0.15421254932880402, "learning_rate": 4.498512454124954e-06, "loss": 0.9146, "step": 69280 }, { "epoch": 0.5015671712016909, "grad_norm": 0.19196033477783203, "learning_rate": 4.498440067464368e-06, "loss": 0.9116, "step": 69290 }, { "epoch": 0.5016395578622771, "grad_norm": 0.15833470225334167, "learning_rate": 4.498367680803782e-06, "loss": 0.9232, "step": 69300 }, { "epoch": 0.5017119445228633, "grad_norm": 0.1550120860338211, "learning_rate": 4.498295294143196e-06, "loss": 0.9222, "step": 69310 }, { "epoch": 0.5017843311834496, "grad_norm": 0.15786628425121307, "learning_rate": 4.4982229074826095e-06, "loss": 0.936, "step": 69320 }, { "epoch": 0.5018567178440357, "grad_norm": 0.1613067090511322, "learning_rate": 4.498150520822023e-06, "loss": 0.9028, "step": 69330 }, { "epoch": 0.5019291045046219, "grad_norm": 0.20446714758872986, "learning_rate": 4.498078134161437e-06, "loss": 0.8997, "step": 69340 }, { "epoch": 0.5020014911652081, "grad_norm": 0.1550551950931549, "learning_rate": 4.498005747500851e-06, "loss": 0.9199, "step": 69350 }, { "epoch": 0.5020738778257943, "grad_norm": 0.15930670499801636, "learning_rate": 4.497933360840265e-06, "loss": 0.9126, "step": 69360 }, { "epoch": 0.5021462644863804, "grad_norm": 0.16248895227909088, "learning_rate": 4.497860974179678e-06, "loss": 0.9354, "step": 69370 }, { "epoch": 0.5022186511469666, "grad_norm": 0.1518125683069229, "learning_rate": 4.497788587519092e-06, "loss": 0.9201, "step": 69380 }, { "epoch": 0.5022910378075528, "grad_norm": 0.16246938705444336, "learning_rate": 4.4977162008585065e-06, "loss": 0.9231, "step": 69390 }, { "epoch": 0.502363424468139, "grad_norm": 0.17307987809181213, "learning_rate": 4.49764381419792e-06, "loss": 0.9279, "step": 69400 }, { "epoch": 0.5024358111287252, "grad_norm": 0.1634259670972824, "learning_rate": 4.497571427537334e-06, "loss": 0.9189, "step": 69410 }, { "epoch": 0.5025081977893114, "grad_norm": 0.1655704826116562, "learning_rate": 4.497499040876747e-06, "loss": 0.8979, "step": 69420 }, { "epoch": 0.5025805844498976, "grad_norm": 0.16415700316429138, "learning_rate": 4.497426654216162e-06, "loss": 0.9202, "step": 69430 }, { "epoch": 0.5026529711104838, "grad_norm": 0.16875231266021729, "learning_rate": 4.497354267555575e-06, "loss": 0.9152, "step": 69440 }, { "epoch": 0.50272535777107, "grad_norm": 0.19432953000068665, "learning_rate": 4.497281880894989e-06, "loss": 0.9217, "step": 69450 }, { "epoch": 0.5027977444316561, "grad_norm": 0.15906265377998352, "learning_rate": 4.497209494234403e-06, "loss": 0.9239, "step": 69460 }, { "epoch": 0.5028701310922423, "grad_norm": 0.16084323823451996, "learning_rate": 4.497137107573817e-06, "loss": 0.9084, "step": 69470 }, { "epoch": 0.5029425177528285, "grad_norm": 0.16277502477169037, "learning_rate": 4.497064720913231e-06, "loss": 0.9318, "step": 69480 }, { "epoch": 0.5030149044134147, "grad_norm": 0.16054733097553253, "learning_rate": 4.496992334252644e-06, "loss": 0.9187, "step": 69490 }, { "epoch": 0.5030872910740009, "grad_norm": 0.14251933991909027, "learning_rate": 4.496919947592058e-06, "loss": 0.9271, "step": 69500 }, { "epoch": 0.503159677734587, "grad_norm": 0.1685430407524109, "learning_rate": 4.496847560931472e-06, "loss": 0.9193, "step": 69510 }, { "epoch": 0.5032320643951732, "grad_norm": 0.15440599620342255, "learning_rate": 4.496775174270886e-06, "loss": 0.9206, "step": 69520 }, { "epoch": 0.5033044510557595, "grad_norm": 0.15421442687511444, "learning_rate": 4.4967027876103e-06, "loss": 0.923, "step": 69530 }, { "epoch": 0.5033768377163457, "grad_norm": 0.1564491093158722, "learning_rate": 4.496630400949713e-06, "loss": 0.9216, "step": 69540 }, { "epoch": 0.5034492243769318, "grad_norm": 0.16401636600494385, "learning_rate": 4.496558014289128e-06, "loss": 0.9185, "step": 69550 }, { "epoch": 0.503521611037518, "grad_norm": 0.1628132462501526, "learning_rate": 4.496485627628541e-06, "loss": 0.9235, "step": 69560 }, { "epoch": 0.5035939976981042, "grad_norm": 0.16760440170764923, "learning_rate": 4.496413240967955e-06, "loss": 0.9184, "step": 69570 }, { "epoch": 0.5036663843586904, "grad_norm": 0.15936897695064545, "learning_rate": 4.496340854307369e-06, "loss": 0.9206, "step": 69580 }, { "epoch": 0.5037387710192766, "grad_norm": 0.15765340626239777, "learning_rate": 4.496268467646782e-06, "loss": 0.9306, "step": 69590 }, { "epoch": 0.5038111576798627, "grad_norm": 0.17861410975456238, "learning_rate": 4.496196080986196e-06, "loss": 0.9187, "step": 69600 }, { "epoch": 0.5038835443404489, "grad_norm": 0.24393630027770996, "learning_rate": 4.4961236943256094e-06, "loss": 0.9149, "step": 69610 }, { "epoch": 0.5039559310010351, "grad_norm": 0.20668023824691772, "learning_rate": 4.496051307665024e-06, "loss": 0.9286, "step": 69620 }, { "epoch": 0.5040283176616213, "grad_norm": 0.15350449085235596, "learning_rate": 4.4959789210044375e-06, "loss": 0.9045, "step": 69630 }, { "epoch": 0.5041007043222075, "grad_norm": 0.16766700148582458, "learning_rate": 4.495906534343851e-06, "loss": 0.9155, "step": 69640 }, { "epoch": 0.5041730909827937, "grad_norm": 0.15862970054149628, "learning_rate": 4.495834147683265e-06, "loss": 0.916, "step": 69650 }, { "epoch": 0.5042454776433799, "grad_norm": 0.1742592453956604, "learning_rate": 4.495761761022679e-06, "loss": 0.9168, "step": 69660 }, { "epoch": 0.5043178643039661, "grad_norm": 0.15328940749168396, "learning_rate": 4.495689374362093e-06, "loss": 0.9151, "step": 69670 }, { "epoch": 0.5043902509645523, "grad_norm": 0.15543635189533234, "learning_rate": 4.4956169877015064e-06, "loss": 0.917, "step": 69680 }, { "epoch": 0.5044626376251384, "grad_norm": 0.1722266972064972, "learning_rate": 4.49554460104092e-06, "loss": 0.9218, "step": 69690 }, { "epoch": 0.5045350242857246, "grad_norm": 0.1570882499217987, "learning_rate": 4.4954722143803345e-06, "loss": 0.9202, "step": 69700 }, { "epoch": 0.5046074109463108, "grad_norm": 0.15502651035785675, "learning_rate": 4.495399827719748e-06, "loss": 0.9209, "step": 69710 }, { "epoch": 0.504679797606897, "grad_norm": 0.1683962345123291, "learning_rate": 4.495327441059162e-06, "loss": 0.9239, "step": 69720 }, { "epoch": 0.5047521842674831, "grad_norm": 0.15740440785884857, "learning_rate": 4.495255054398575e-06, "loss": 0.9221, "step": 69730 }, { "epoch": 0.5048245709280694, "grad_norm": 0.15507307648658752, "learning_rate": 4.49518266773799e-06, "loss": 0.9148, "step": 69740 }, { "epoch": 0.5048969575886556, "grad_norm": 0.14979131519794464, "learning_rate": 4.4951102810774034e-06, "loss": 0.9056, "step": 69750 }, { "epoch": 0.5049693442492418, "grad_norm": 0.1572350412607193, "learning_rate": 4.495037894416817e-06, "loss": 0.9233, "step": 69760 }, { "epoch": 0.505041730909828, "grad_norm": 0.15610437095165253, "learning_rate": 4.494965507756231e-06, "loss": 0.9252, "step": 69770 }, { "epoch": 0.5051141175704141, "grad_norm": 0.15537583827972412, "learning_rate": 4.494893121095645e-06, "loss": 0.9243, "step": 69780 }, { "epoch": 0.5051865042310003, "grad_norm": 0.19823171198368073, "learning_rate": 4.494820734435059e-06, "loss": 0.9286, "step": 69790 }, { "epoch": 0.5052588908915865, "grad_norm": 0.15276482701301575, "learning_rate": 4.494748347774472e-06, "loss": 0.9221, "step": 69800 }, { "epoch": 0.5053312775521727, "grad_norm": 0.15829552710056305, "learning_rate": 4.494675961113886e-06, "loss": 0.9058, "step": 69810 }, { "epoch": 0.5054036642127588, "grad_norm": 0.1616862267255783, "learning_rate": 4.4946035744533004e-06, "loss": 0.9224, "step": 69820 }, { "epoch": 0.505476050873345, "grad_norm": 0.15852747857570648, "learning_rate": 4.494531187792714e-06, "loss": 0.9288, "step": 69830 }, { "epoch": 0.5055484375339312, "grad_norm": 0.16059504449367523, "learning_rate": 4.494458801132128e-06, "loss": 0.9153, "step": 69840 }, { "epoch": 0.5056208241945175, "grad_norm": 0.16823314130306244, "learning_rate": 4.494386414471541e-06, "loss": 0.9232, "step": 69850 }, { "epoch": 0.5056932108551037, "grad_norm": 0.14530618488788605, "learning_rate": 4.494314027810956e-06, "loss": 0.9221, "step": 69860 }, { "epoch": 0.5057655975156898, "grad_norm": 0.15647926926612854, "learning_rate": 4.494241641150369e-06, "loss": 0.9297, "step": 69870 }, { "epoch": 0.505837984176276, "grad_norm": 0.18810240924358368, "learning_rate": 4.494169254489783e-06, "loss": 0.9132, "step": 69880 }, { "epoch": 0.5059103708368622, "grad_norm": 0.16139045357704163, "learning_rate": 4.494096867829197e-06, "loss": 0.9025, "step": 69890 }, { "epoch": 0.5059827574974484, "grad_norm": 0.14970675110816956, "learning_rate": 4.494024481168611e-06, "loss": 0.9192, "step": 69900 }, { "epoch": 0.5060551441580345, "grad_norm": 0.17512159049510956, "learning_rate": 4.493952094508025e-06, "loss": 0.9159, "step": 69910 }, { "epoch": 0.5061275308186207, "grad_norm": 0.1656593680381775, "learning_rate": 4.493879707847438e-06, "loss": 0.9287, "step": 69920 }, { "epoch": 0.5061999174792069, "grad_norm": 0.14336960017681122, "learning_rate": 4.493807321186852e-06, "loss": 0.913, "step": 69930 }, { "epoch": 0.5062723041397931, "grad_norm": 0.15477393567562103, "learning_rate": 4.493734934526266e-06, "loss": 0.9236, "step": 69940 }, { "epoch": 0.5063446908003794, "grad_norm": 0.16996079683303833, "learning_rate": 4.49366254786568e-06, "loss": 0.9288, "step": 69950 }, { "epoch": 0.5064170774609655, "grad_norm": 0.16285812854766846, "learning_rate": 4.493590161205094e-06, "loss": 0.9243, "step": 69960 }, { "epoch": 0.5064894641215517, "grad_norm": 0.16963927447795868, "learning_rate": 4.493517774544507e-06, "loss": 0.9154, "step": 69970 }, { "epoch": 0.5065618507821379, "grad_norm": 0.17112816870212555, "learning_rate": 4.493445387883921e-06, "loss": 0.923, "step": 69980 }, { "epoch": 0.5066342374427241, "grad_norm": 0.17348773777484894, "learning_rate": 4.493373001223335e-06, "loss": 0.9348, "step": 69990 }, { "epoch": 0.5067066241033102, "grad_norm": 0.16326965391635895, "learning_rate": 4.493300614562749e-06, "loss": 0.9317, "step": 70000 }, { "epoch": 0.5067790107638964, "grad_norm": 0.16024067997932434, "learning_rate": 4.4932282279021625e-06, "loss": 0.9252, "step": 70010 }, { "epoch": 0.5068513974244826, "grad_norm": 0.1615953892469406, "learning_rate": 4.493155841241576e-06, "loss": 0.9135, "step": 70020 }, { "epoch": 0.5069237840850688, "grad_norm": 0.14714552462100983, "learning_rate": 4.493083454580991e-06, "loss": 0.9307, "step": 70030 }, { "epoch": 0.506996170745655, "grad_norm": 0.15000593662261963, "learning_rate": 4.493011067920404e-06, "loss": 0.9198, "step": 70040 }, { "epoch": 0.5070685574062411, "grad_norm": 0.16868354380130768, "learning_rate": 4.492938681259818e-06, "loss": 0.9192, "step": 70050 }, { "epoch": 0.5071409440668274, "grad_norm": 0.1756649762392044, "learning_rate": 4.4928662945992315e-06, "loss": 0.9145, "step": 70060 }, { "epoch": 0.5072133307274136, "grad_norm": 0.14338277280330658, "learning_rate": 4.492793907938646e-06, "loss": 0.9162, "step": 70070 }, { "epoch": 0.5072857173879998, "grad_norm": 0.15879562497138977, "learning_rate": 4.4927215212780596e-06, "loss": 0.9136, "step": 70080 }, { "epoch": 0.507358104048586, "grad_norm": 0.16323305666446686, "learning_rate": 4.492649134617473e-06, "loss": 0.9207, "step": 70090 }, { "epoch": 0.5074304907091721, "grad_norm": 0.15792213380336761, "learning_rate": 4.492576747956887e-06, "loss": 0.9112, "step": 70100 }, { "epoch": 0.5075028773697583, "grad_norm": 0.15665297210216522, "learning_rate": 4.4925043612963e-06, "loss": 0.9182, "step": 70110 }, { "epoch": 0.5075752640303445, "grad_norm": 0.16642248630523682, "learning_rate": 4.492431974635714e-06, "loss": 0.9249, "step": 70120 }, { "epoch": 0.5076476506909307, "grad_norm": 0.1554255336523056, "learning_rate": 4.492359587975128e-06, "loss": 0.9203, "step": 70130 }, { "epoch": 0.5077200373515168, "grad_norm": 0.17137949168682098, "learning_rate": 4.492287201314542e-06, "loss": 0.9248, "step": 70140 }, { "epoch": 0.507792424012103, "grad_norm": 0.16739679872989655, "learning_rate": 4.492214814653956e-06, "loss": 0.9192, "step": 70150 }, { "epoch": 0.5078648106726892, "grad_norm": 0.16433091461658478, "learning_rate": 4.492142427993369e-06, "loss": 0.919, "step": 70160 }, { "epoch": 0.5079371973332755, "grad_norm": 0.16552671790122986, "learning_rate": 4.492070041332783e-06, "loss": 0.9291, "step": 70170 }, { "epoch": 0.5080095839938616, "grad_norm": 0.15935184061527252, "learning_rate": 4.491997654672197e-06, "loss": 0.9255, "step": 70180 }, { "epoch": 0.5080819706544478, "grad_norm": 0.16075530648231506, "learning_rate": 4.491925268011611e-06, "loss": 0.9286, "step": 70190 }, { "epoch": 0.508154357315034, "grad_norm": 0.16693368554115295, "learning_rate": 4.491852881351025e-06, "loss": 0.9312, "step": 70200 }, { "epoch": 0.5082267439756202, "grad_norm": 0.1627953052520752, "learning_rate": 4.491780494690438e-06, "loss": 0.914, "step": 70210 }, { "epoch": 0.5082991306362064, "grad_norm": 0.17071197926998138, "learning_rate": 4.491708108029853e-06, "loss": 0.9145, "step": 70220 }, { "epoch": 0.5083715172967925, "grad_norm": 0.1666022539138794, "learning_rate": 4.491635721369266e-06, "loss": 0.9181, "step": 70230 }, { "epoch": 0.5084439039573787, "grad_norm": 0.16290847957134247, "learning_rate": 4.49156333470868e-06, "loss": 0.9289, "step": 70240 }, { "epoch": 0.5085162906179649, "grad_norm": 0.2862056791782379, "learning_rate": 4.4914909480480936e-06, "loss": 0.9133, "step": 70250 }, { "epoch": 0.5085886772785511, "grad_norm": 0.1533055603504181, "learning_rate": 4.491418561387508e-06, "loss": 0.9179, "step": 70260 }, { "epoch": 0.5086610639391373, "grad_norm": 0.16260363161563873, "learning_rate": 4.491346174726922e-06, "loss": 0.9203, "step": 70270 }, { "epoch": 0.5087334505997235, "grad_norm": 0.17325079441070557, "learning_rate": 4.491273788066335e-06, "loss": 0.9229, "step": 70280 }, { "epoch": 0.5088058372603097, "grad_norm": 0.17838117480278015, "learning_rate": 4.491201401405749e-06, "loss": 0.9184, "step": 70290 }, { "epoch": 0.5088782239208959, "grad_norm": 0.17130929231643677, "learning_rate": 4.491129014745163e-06, "loss": 0.9226, "step": 70300 }, { "epoch": 0.508950610581482, "grad_norm": 0.1562015265226364, "learning_rate": 4.491056628084577e-06, "loss": 0.9384, "step": 70310 }, { "epoch": 0.5090229972420682, "grad_norm": 0.1642456352710724, "learning_rate": 4.490984241423991e-06, "loss": 0.9392, "step": 70320 }, { "epoch": 0.5090953839026544, "grad_norm": 0.16102585196495056, "learning_rate": 4.490911854763404e-06, "loss": 0.9269, "step": 70330 }, { "epoch": 0.5091677705632406, "grad_norm": 0.16825130581855774, "learning_rate": 4.490839468102819e-06, "loss": 0.9312, "step": 70340 }, { "epoch": 0.5092401572238268, "grad_norm": 0.18951553106307983, "learning_rate": 4.490767081442232e-06, "loss": 0.9149, "step": 70350 }, { "epoch": 0.5093125438844129, "grad_norm": 0.1539865881204605, "learning_rate": 4.490694694781646e-06, "loss": 0.9176, "step": 70360 }, { "epoch": 0.5093849305449991, "grad_norm": 0.16370318830013275, "learning_rate": 4.4906223081210595e-06, "loss": 0.9208, "step": 70370 }, { "epoch": 0.5094573172055854, "grad_norm": 0.1988501250743866, "learning_rate": 4.490549921460474e-06, "loss": 0.9306, "step": 70380 }, { "epoch": 0.5095297038661716, "grad_norm": 0.15401297807693481, "learning_rate": 4.490477534799888e-06, "loss": 0.931, "step": 70390 }, { "epoch": 0.5096020905267578, "grad_norm": 0.15856841206550598, "learning_rate": 4.490405148139301e-06, "loss": 0.9152, "step": 70400 }, { "epoch": 0.5096744771873439, "grad_norm": 0.15859901905059814, "learning_rate": 4.490332761478715e-06, "loss": 0.9092, "step": 70410 }, { "epoch": 0.5097468638479301, "grad_norm": 0.16617213189601898, "learning_rate": 4.490260374818129e-06, "loss": 0.9061, "step": 70420 }, { "epoch": 0.5098192505085163, "grad_norm": 0.159602552652359, "learning_rate": 4.490187988157543e-06, "loss": 0.9199, "step": 70430 }, { "epoch": 0.5098916371691025, "grad_norm": 0.17478512227535248, "learning_rate": 4.4901156014969565e-06, "loss": 0.9304, "step": 70440 }, { "epoch": 0.5099640238296886, "grad_norm": 0.1556614488363266, "learning_rate": 4.49004321483637e-06, "loss": 0.9188, "step": 70450 }, { "epoch": 0.5100364104902748, "grad_norm": 0.15821588039398193, "learning_rate": 4.489970828175785e-06, "loss": 0.9139, "step": 70460 }, { "epoch": 0.510108797150861, "grad_norm": 0.1509390026330948, "learning_rate": 4.489898441515198e-06, "loss": 0.9246, "step": 70470 }, { "epoch": 0.5101811838114473, "grad_norm": 0.15749859809875488, "learning_rate": 4.489826054854612e-06, "loss": 0.9151, "step": 70480 }, { "epoch": 0.5102535704720335, "grad_norm": 0.1502166986465454, "learning_rate": 4.4897536681940254e-06, "loss": 0.9084, "step": 70490 }, { "epoch": 0.5103259571326196, "grad_norm": 0.15976519882678986, "learning_rate": 4.48968128153344e-06, "loss": 0.9106, "step": 70500 }, { "epoch": 0.5103983437932058, "grad_norm": 0.15369580686092377, "learning_rate": 4.4896088948728535e-06, "loss": 0.9203, "step": 70510 }, { "epoch": 0.510470730453792, "grad_norm": 0.17383261024951935, "learning_rate": 4.489536508212267e-06, "loss": 0.9298, "step": 70520 }, { "epoch": 0.5105431171143782, "grad_norm": 0.1546318680047989, "learning_rate": 4.489464121551681e-06, "loss": 0.9357, "step": 70530 }, { "epoch": 0.5106155037749643, "grad_norm": 0.15537676215171814, "learning_rate": 4.489391734891095e-06, "loss": 0.9096, "step": 70540 }, { "epoch": 0.5106878904355505, "grad_norm": 0.14807634055614471, "learning_rate": 4.489319348230509e-06, "loss": 0.9225, "step": 70550 }, { "epoch": 0.5107602770961367, "grad_norm": 0.17068465054035187, "learning_rate": 4.4892469615699224e-06, "loss": 0.9229, "step": 70560 }, { "epoch": 0.5108326637567229, "grad_norm": 0.16985973715782166, "learning_rate": 4.489174574909336e-06, "loss": 0.918, "step": 70570 }, { "epoch": 0.510905050417309, "grad_norm": 0.1610952913761139, "learning_rate": 4.48910218824875e-06, "loss": 0.9191, "step": 70580 }, { "epoch": 0.5109774370778953, "grad_norm": 0.19032908976078033, "learning_rate": 4.489029801588164e-06, "loss": 0.9158, "step": 70590 }, { "epoch": 0.5110498237384815, "grad_norm": 0.1557486355304718, "learning_rate": 4.488957414927578e-06, "loss": 0.9177, "step": 70600 }, { "epoch": 0.5111222103990677, "grad_norm": 0.15214890241622925, "learning_rate": 4.488885028266991e-06, "loss": 0.9175, "step": 70610 }, { "epoch": 0.5111945970596539, "grad_norm": 0.16953812539577484, "learning_rate": 4.488812641606405e-06, "loss": 0.9178, "step": 70620 }, { "epoch": 0.51126698372024, "grad_norm": 0.16255024075508118, "learning_rate": 4.4887402549458195e-06, "loss": 0.9245, "step": 70630 }, { "epoch": 0.5113393703808262, "grad_norm": 0.15664204955101013, "learning_rate": 4.488667868285232e-06, "loss": 0.9172, "step": 70640 }, { "epoch": 0.5114117570414124, "grad_norm": 0.1526808738708496, "learning_rate": 4.488595481624647e-06, "loss": 0.9152, "step": 70650 }, { "epoch": 0.5114841437019986, "grad_norm": 0.18234345316886902, "learning_rate": 4.48852309496406e-06, "loss": 0.9152, "step": 70660 }, { "epoch": 0.5115565303625847, "grad_norm": 0.15891316533088684, "learning_rate": 4.488450708303474e-06, "loss": 0.9184, "step": 70670 }, { "epoch": 0.5116289170231709, "grad_norm": 0.15766972303390503, "learning_rate": 4.4883783216428875e-06, "loss": 0.9197, "step": 70680 }, { "epoch": 0.5117013036837571, "grad_norm": 0.16315676271915436, "learning_rate": 4.488305934982302e-06, "loss": 0.9245, "step": 70690 }, { "epoch": 0.5117736903443434, "grad_norm": 0.15983930230140686, "learning_rate": 4.488233548321716e-06, "loss": 0.9192, "step": 70700 }, { "epoch": 0.5118460770049296, "grad_norm": 0.14471487700939178, "learning_rate": 4.488161161661129e-06, "loss": 0.9174, "step": 70710 }, { "epoch": 0.5119184636655157, "grad_norm": 0.1613370180130005, "learning_rate": 4.488088775000543e-06, "loss": 0.9036, "step": 70720 }, { "epoch": 0.5119908503261019, "grad_norm": 0.16052919626235962, "learning_rate": 4.488016388339957e-06, "loss": 0.9103, "step": 70730 }, { "epoch": 0.5120632369866881, "grad_norm": 0.16880005598068237, "learning_rate": 4.487944001679371e-06, "loss": 0.9251, "step": 70740 }, { "epoch": 0.5121356236472743, "grad_norm": 0.1468336135149002, "learning_rate": 4.4878716150187845e-06, "loss": 0.9009, "step": 70750 }, { "epoch": 0.5122080103078605, "grad_norm": 0.16562113165855408, "learning_rate": 4.487799228358198e-06, "loss": 0.9216, "step": 70760 }, { "epoch": 0.5122803969684466, "grad_norm": 0.15913574397563934, "learning_rate": 4.487726841697612e-06, "loss": 0.9216, "step": 70770 }, { "epoch": 0.5123527836290328, "grad_norm": 0.1655045747756958, "learning_rate": 4.487654455037026e-06, "loss": 0.9086, "step": 70780 }, { "epoch": 0.512425170289619, "grad_norm": 0.15185129642486572, "learning_rate": 4.48758206837644e-06, "loss": 0.9242, "step": 70790 }, { "epoch": 0.5124975569502053, "grad_norm": 0.1671728938817978, "learning_rate": 4.4875096817158535e-06, "loss": 0.9166, "step": 70800 }, { "epoch": 0.5125699436107914, "grad_norm": 0.1865541785955429, "learning_rate": 4.487437295055267e-06, "loss": 0.9236, "step": 70810 }, { "epoch": 0.5126423302713776, "grad_norm": 0.1669655591249466, "learning_rate": 4.4873649083946816e-06, "loss": 0.9196, "step": 70820 }, { "epoch": 0.5127147169319638, "grad_norm": 0.17508293688297272, "learning_rate": 4.487292521734095e-06, "loss": 0.9211, "step": 70830 }, { "epoch": 0.51278710359255, "grad_norm": 0.1667715311050415, "learning_rate": 4.487220135073509e-06, "loss": 0.9144, "step": 70840 }, { "epoch": 0.5128594902531362, "grad_norm": 0.17849986255168915, "learning_rate": 4.487147748412922e-06, "loss": 0.9205, "step": 70850 }, { "epoch": 0.5129318769137223, "grad_norm": 0.15619997680187225, "learning_rate": 4.487075361752337e-06, "loss": 0.9379, "step": 70860 }, { "epoch": 0.5130042635743085, "grad_norm": 0.16368311643600464, "learning_rate": 4.4870029750917505e-06, "loss": 0.9072, "step": 70870 }, { "epoch": 0.5130766502348947, "grad_norm": 0.16930024325847626, "learning_rate": 4.486930588431164e-06, "loss": 0.91, "step": 70880 }, { "epoch": 0.5131490368954809, "grad_norm": 0.17948120832443237, "learning_rate": 4.486858201770578e-06, "loss": 0.9104, "step": 70890 }, { "epoch": 0.513221423556067, "grad_norm": 0.19163572788238525, "learning_rate": 4.486785815109992e-06, "loss": 0.8944, "step": 70900 }, { "epoch": 0.5132938102166533, "grad_norm": 0.15345646440982819, "learning_rate": 4.486713428449406e-06, "loss": 0.9268, "step": 70910 }, { "epoch": 0.5133661968772395, "grad_norm": 0.16913288831710815, "learning_rate": 4.486641041788819e-06, "loss": 0.9235, "step": 70920 }, { "epoch": 0.5134385835378257, "grad_norm": 0.17389893531799316, "learning_rate": 4.486568655128233e-06, "loss": 0.9248, "step": 70930 }, { "epoch": 0.5135109701984119, "grad_norm": 0.16675566136837006, "learning_rate": 4.4864962684676475e-06, "loss": 0.9113, "step": 70940 }, { "epoch": 0.513583356858998, "grad_norm": 0.15058213472366333, "learning_rate": 4.486423881807061e-06, "loss": 0.9248, "step": 70950 }, { "epoch": 0.5136557435195842, "grad_norm": 0.33218497037887573, "learning_rate": 4.486351495146475e-06, "loss": 0.9035, "step": 70960 }, { "epoch": 0.5137281301801704, "grad_norm": 0.1726812720298767, "learning_rate": 4.486279108485888e-06, "loss": 0.904, "step": 70970 }, { "epoch": 0.5138005168407566, "grad_norm": 0.1540847271680832, "learning_rate": 4.486206721825303e-06, "loss": 0.9153, "step": 70980 }, { "epoch": 0.5138729035013427, "grad_norm": 0.1801622062921524, "learning_rate": 4.486134335164716e-06, "loss": 0.9145, "step": 70990 }, { "epoch": 0.5139452901619289, "grad_norm": 0.14557209610939026, "learning_rate": 4.48606194850413e-06, "loss": 0.9214, "step": 71000 }, { "epoch": 0.5140176768225152, "grad_norm": 0.1920100599527359, "learning_rate": 4.485989561843544e-06, "loss": 0.9251, "step": 71010 }, { "epoch": 0.5140900634831014, "grad_norm": 0.1555316001176834, "learning_rate": 4.485917175182958e-06, "loss": 0.9179, "step": 71020 }, { "epoch": 0.5141624501436876, "grad_norm": 0.16274884343147278, "learning_rate": 4.485844788522372e-06, "loss": 0.9098, "step": 71030 }, { "epoch": 0.5142348368042737, "grad_norm": 0.17160306870937347, "learning_rate": 4.485772401861785e-06, "loss": 0.918, "step": 71040 }, { "epoch": 0.5143072234648599, "grad_norm": 0.1791687160730362, "learning_rate": 4.485700015201199e-06, "loss": 0.921, "step": 71050 }, { "epoch": 0.5143796101254461, "grad_norm": 0.1617984026670456, "learning_rate": 4.485627628540613e-06, "loss": 0.9106, "step": 71060 }, { "epoch": 0.5144519967860323, "grad_norm": 0.1553729772567749, "learning_rate": 4.485555241880027e-06, "loss": 0.9223, "step": 71070 }, { "epoch": 0.5145243834466184, "grad_norm": 0.2470700591802597, "learning_rate": 4.485482855219441e-06, "loss": 0.9151, "step": 71080 }, { "epoch": 0.5145967701072046, "grad_norm": 0.16699855029582977, "learning_rate": 4.485410468558854e-06, "loss": 0.9129, "step": 71090 }, { "epoch": 0.5146691567677908, "grad_norm": 0.21680283546447754, "learning_rate": 4.485338081898269e-06, "loss": 0.9169, "step": 71100 }, { "epoch": 0.514741543428377, "grad_norm": 0.16673427820205688, "learning_rate": 4.485265695237682e-06, "loss": 0.9362, "step": 71110 }, { "epoch": 0.5148139300889633, "grad_norm": 0.1613384336233139, "learning_rate": 4.485193308577096e-06, "loss": 0.9249, "step": 71120 }, { "epoch": 0.5148863167495494, "grad_norm": 0.1817154735326767, "learning_rate": 4.48512092191651e-06, "loss": 0.9287, "step": 71130 }, { "epoch": 0.5149587034101356, "grad_norm": 0.1700638085603714, "learning_rate": 4.485048535255924e-06, "loss": 0.9126, "step": 71140 }, { "epoch": 0.5150310900707218, "grad_norm": 0.17622961103916168, "learning_rate": 4.484976148595338e-06, "loss": 0.9131, "step": 71150 }, { "epoch": 0.515103476731308, "grad_norm": 0.1659720242023468, "learning_rate": 4.484903761934751e-06, "loss": 0.909, "step": 71160 }, { "epoch": 0.5151758633918941, "grad_norm": 0.16669756174087524, "learning_rate": 4.484831375274165e-06, "loss": 0.9083, "step": 71170 }, { "epoch": 0.5152482500524803, "grad_norm": 0.16893939673900604, "learning_rate": 4.4847589886135785e-06, "loss": 0.9122, "step": 71180 }, { "epoch": 0.5153206367130665, "grad_norm": 0.15991714596748352, "learning_rate": 4.484686601952992e-06, "loss": 0.915, "step": 71190 }, { "epoch": 0.5153930233736527, "grad_norm": 0.15963132679462433, "learning_rate": 4.484614215292406e-06, "loss": 0.9193, "step": 71200 }, { "epoch": 0.5154654100342388, "grad_norm": 0.1559552103281021, "learning_rate": 4.48454182863182e-06, "loss": 0.9138, "step": 71210 }, { "epoch": 0.515537796694825, "grad_norm": 0.17159438133239746, "learning_rate": 4.484469441971234e-06, "loss": 0.9046, "step": 71220 }, { "epoch": 0.5156101833554113, "grad_norm": 0.15353378653526306, "learning_rate": 4.4843970553106474e-06, "loss": 0.9144, "step": 71230 }, { "epoch": 0.5156825700159975, "grad_norm": 0.16255958378314972, "learning_rate": 4.484324668650061e-06, "loss": 0.9019, "step": 71240 }, { "epoch": 0.5157549566765837, "grad_norm": 0.16722968220710754, "learning_rate": 4.4842522819894755e-06, "loss": 0.9364, "step": 71250 }, { "epoch": 0.5158273433371698, "grad_norm": 0.16965439915657043, "learning_rate": 4.484179895328889e-06, "loss": 0.9201, "step": 71260 }, { "epoch": 0.515899729997756, "grad_norm": 0.16112767159938812, "learning_rate": 4.484107508668303e-06, "loss": 0.9206, "step": 71270 }, { "epoch": 0.5159721166583422, "grad_norm": 0.1666184961795807, "learning_rate": 4.484035122007716e-06, "loss": 0.9133, "step": 71280 }, { "epoch": 0.5160445033189284, "grad_norm": 0.16528722643852234, "learning_rate": 4.483962735347131e-06, "loss": 0.9199, "step": 71290 }, { "epoch": 0.5161168899795145, "grad_norm": 0.1569664627313614, "learning_rate": 4.4838903486865444e-06, "loss": 0.9239, "step": 71300 }, { "epoch": 0.5161892766401007, "grad_norm": 0.1667070984840393, "learning_rate": 4.483817962025958e-06, "loss": 0.9269, "step": 71310 }, { "epoch": 0.5162616633006869, "grad_norm": 0.15985529124736786, "learning_rate": 4.483745575365372e-06, "loss": 0.913, "step": 71320 }, { "epoch": 0.5163340499612732, "grad_norm": 0.17231838405132294, "learning_rate": 4.483673188704786e-06, "loss": 0.9246, "step": 71330 }, { "epoch": 0.5164064366218594, "grad_norm": 0.16398270428180695, "learning_rate": 4.4836008020442e-06, "loss": 0.919, "step": 71340 }, { "epoch": 0.5164788232824455, "grad_norm": 0.25354573130607605, "learning_rate": 4.483528415383613e-06, "loss": 0.9369, "step": 71350 }, { "epoch": 0.5165512099430317, "grad_norm": 0.15637996792793274, "learning_rate": 4.483456028723027e-06, "loss": 0.9173, "step": 71360 }, { "epoch": 0.5166235966036179, "grad_norm": 0.1540151685476303, "learning_rate": 4.4833836420624415e-06, "loss": 0.9172, "step": 71370 }, { "epoch": 0.5166959832642041, "grad_norm": 0.1704026758670807, "learning_rate": 4.483311255401855e-06, "loss": 0.9171, "step": 71380 }, { "epoch": 0.5167683699247902, "grad_norm": 0.152908593416214, "learning_rate": 4.483238868741269e-06, "loss": 0.9149, "step": 71390 }, { "epoch": 0.5168407565853764, "grad_norm": 0.15788275003433228, "learning_rate": 4.483166482080682e-06, "loss": 0.9224, "step": 71400 }, { "epoch": 0.5169131432459626, "grad_norm": 0.1816600263118744, "learning_rate": 4.483094095420096e-06, "loss": 0.9313, "step": 71410 }, { "epoch": 0.5169855299065488, "grad_norm": 0.3124025762081146, "learning_rate": 4.48302170875951e-06, "loss": 0.9189, "step": 71420 }, { "epoch": 0.517057916567135, "grad_norm": 0.16039884090423584, "learning_rate": 4.482949322098924e-06, "loss": 0.929, "step": 71430 }, { "epoch": 0.5171303032277212, "grad_norm": 0.1553005576133728, "learning_rate": 4.482876935438338e-06, "loss": 0.9131, "step": 71440 }, { "epoch": 0.5172026898883074, "grad_norm": 0.16902105510234833, "learning_rate": 4.482804548777751e-06, "loss": 0.9111, "step": 71450 }, { "epoch": 0.5172750765488936, "grad_norm": 0.17077742516994476, "learning_rate": 4.482732162117166e-06, "loss": 0.9269, "step": 71460 }, { "epoch": 0.5173474632094798, "grad_norm": 0.17665700614452362, "learning_rate": 4.482659775456579e-06, "loss": 0.908, "step": 71470 }, { "epoch": 0.517419849870066, "grad_norm": 0.16787657141685486, "learning_rate": 4.482587388795993e-06, "loss": 0.9067, "step": 71480 }, { "epoch": 0.5174922365306521, "grad_norm": 0.168588787317276, "learning_rate": 4.4825150021354065e-06, "loss": 0.9111, "step": 71490 }, { "epoch": 0.5175646231912383, "grad_norm": 0.16920819878578186, "learning_rate": 4.482442615474821e-06, "loss": 0.9262, "step": 71500 }, { "epoch": 0.5176370098518245, "grad_norm": 0.17827217280864716, "learning_rate": 4.482370228814235e-06, "loss": 0.9352, "step": 71510 }, { "epoch": 0.5177093965124107, "grad_norm": 0.18853449821472168, "learning_rate": 4.482297842153648e-06, "loss": 0.9177, "step": 71520 }, { "epoch": 0.5177817831729968, "grad_norm": 0.1763719618320465, "learning_rate": 4.482225455493062e-06, "loss": 0.9231, "step": 71530 }, { "epoch": 0.5178541698335831, "grad_norm": 0.15667404234409332, "learning_rate": 4.482153068832476e-06, "loss": 0.9272, "step": 71540 }, { "epoch": 0.5179265564941693, "grad_norm": 0.15414214134216309, "learning_rate": 4.48208068217189e-06, "loss": 0.9289, "step": 71550 }, { "epoch": 0.5179989431547555, "grad_norm": 0.15999796986579895, "learning_rate": 4.4820082955113036e-06, "loss": 0.9122, "step": 71560 }, { "epoch": 0.5180713298153417, "grad_norm": 0.1718224138021469, "learning_rate": 4.481935908850717e-06, "loss": 0.9087, "step": 71570 }, { "epoch": 0.5181437164759278, "grad_norm": 0.14979562163352966, "learning_rate": 4.481863522190132e-06, "loss": 0.9066, "step": 71580 }, { "epoch": 0.518216103136514, "grad_norm": 0.16340069472789764, "learning_rate": 4.481791135529545e-06, "loss": 0.923, "step": 71590 }, { "epoch": 0.5182884897971002, "grad_norm": 0.15064361691474915, "learning_rate": 4.481718748868959e-06, "loss": 0.9123, "step": 71600 }, { "epoch": 0.5183608764576864, "grad_norm": 0.16256332397460938, "learning_rate": 4.4816463622083725e-06, "loss": 0.9243, "step": 71610 }, { "epoch": 0.5184332631182725, "grad_norm": 0.16027949750423431, "learning_rate": 4.481573975547787e-06, "loss": 0.9185, "step": 71620 }, { "epoch": 0.5185056497788587, "grad_norm": 0.14856921136379242, "learning_rate": 4.4815015888872006e-06, "loss": 0.914, "step": 71630 }, { "epoch": 0.5185780364394449, "grad_norm": 0.17712818086147308, "learning_rate": 4.481429202226614e-06, "loss": 0.9299, "step": 71640 }, { "epoch": 0.5186504231000312, "grad_norm": 0.15580609440803528, "learning_rate": 4.481356815566028e-06, "loss": 0.924, "step": 71650 }, { "epoch": 0.5187228097606174, "grad_norm": 0.15209463238716125, "learning_rate": 4.481284428905442e-06, "loss": 0.9227, "step": 71660 }, { "epoch": 0.5187951964212035, "grad_norm": 0.1534406542778015, "learning_rate": 4.481212042244856e-06, "loss": 0.9206, "step": 71670 }, { "epoch": 0.5188675830817897, "grad_norm": 0.1686408370733261, "learning_rate": 4.4811396555842695e-06, "loss": 0.9148, "step": 71680 }, { "epoch": 0.5189399697423759, "grad_norm": 0.1665174961090088, "learning_rate": 4.481067268923683e-06, "loss": 0.9151, "step": 71690 }, { "epoch": 0.5190123564029621, "grad_norm": 0.17360328137874603, "learning_rate": 4.480994882263097e-06, "loss": 0.9171, "step": 71700 }, { "epoch": 0.5190847430635482, "grad_norm": 0.14976924657821655, "learning_rate": 4.48092249560251e-06, "loss": 0.9203, "step": 71710 }, { "epoch": 0.5191571297241344, "grad_norm": 0.16571447253227234, "learning_rate": 4.480850108941924e-06, "loss": 0.9062, "step": 71720 }, { "epoch": 0.5192295163847206, "grad_norm": 0.1702362298965454, "learning_rate": 4.480777722281338e-06, "loss": 0.9272, "step": 71730 }, { "epoch": 0.5193019030453068, "grad_norm": 0.16296398639678955, "learning_rate": 4.480705335620752e-06, "loss": 0.915, "step": 71740 }, { "epoch": 0.5193742897058929, "grad_norm": 0.15773184597492218, "learning_rate": 4.480632948960166e-06, "loss": 0.9159, "step": 71750 }, { "epoch": 0.5194466763664792, "grad_norm": 0.15796466171741486, "learning_rate": 4.480560562299579e-06, "loss": 0.923, "step": 71760 }, { "epoch": 0.5195190630270654, "grad_norm": 0.16592220962047577, "learning_rate": 4.480488175638994e-06, "loss": 0.9204, "step": 71770 }, { "epoch": 0.5195914496876516, "grad_norm": 0.161960631608963, "learning_rate": 4.480415788978407e-06, "loss": 0.9235, "step": 71780 }, { "epoch": 0.5196638363482378, "grad_norm": 0.17310741543769836, "learning_rate": 4.480343402317821e-06, "loss": 0.9062, "step": 71790 }, { "epoch": 0.5197362230088239, "grad_norm": 0.1718437820672989, "learning_rate": 4.4802710156572346e-06, "loss": 0.9064, "step": 71800 }, { "epoch": 0.5198086096694101, "grad_norm": 0.16727685928344727, "learning_rate": 4.480198628996649e-06, "loss": 0.9363, "step": 71810 }, { "epoch": 0.5198809963299963, "grad_norm": 0.17579713463783264, "learning_rate": 4.480126242336063e-06, "loss": 0.9249, "step": 71820 }, { "epoch": 0.5199533829905825, "grad_norm": 0.15877732634544373, "learning_rate": 4.480053855675476e-06, "loss": 0.9297, "step": 71830 }, { "epoch": 0.5200257696511686, "grad_norm": 0.1620732545852661, "learning_rate": 4.47998146901489e-06, "loss": 0.9168, "step": 71840 }, { "epoch": 0.5200981563117548, "grad_norm": 0.17274366319179535, "learning_rate": 4.479909082354304e-06, "loss": 0.9086, "step": 71850 }, { "epoch": 0.5201705429723411, "grad_norm": 0.1550474315881729, "learning_rate": 4.479836695693718e-06, "loss": 0.9206, "step": 71860 }, { "epoch": 0.5202429296329273, "grad_norm": 0.17657595872879028, "learning_rate": 4.479764309033132e-06, "loss": 0.9213, "step": 71870 }, { "epoch": 0.5203153162935135, "grad_norm": 0.16544876992702484, "learning_rate": 4.479691922372545e-06, "loss": 0.929, "step": 71880 }, { "epoch": 0.5203877029540996, "grad_norm": 0.1544915735721588, "learning_rate": 4.47961953571196e-06, "loss": 0.9057, "step": 71890 }, { "epoch": 0.5204600896146858, "grad_norm": 0.15595407783985138, "learning_rate": 4.479547149051373e-06, "loss": 0.9144, "step": 71900 }, { "epoch": 0.520532476275272, "grad_norm": 0.1735614836215973, "learning_rate": 4.479474762390787e-06, "loss": 0.9156, "step": 71910 }, { "epoch": 0.5206048629358582, "grad_norm": 0.1538870483636856, "learning_rate": 4.4794023757302005e-06, "loss": 0.9117, "step": 71920 }, { "epoch": 0.5206772495964443, "grad_norm": 0.15817460417747498, "learning_rate": 4.479329989069615e-06, "loss": 0.9233, "step": 71930 }, { "epoch": 0.5207496362570305, "grad_norm": 0.16701272130012512, "learning_rate": 4.479257602409029e-06, "loss": 0.9218, "step": 71940 }, { "epoch": 0.5208220229176167, "grad_norm": 0.15565438568592072, "learning_rate": 4.479185215748442e-06, "loss": 0.9113, "step": 71950 }, { "epoch": 0.5208944095782029, "grad_norm": 0.15946054458618164, "learning_rate": 4.479112829087856e-06, "loss": 0.9165, "step": 71960 }, { "epoch": 0.5209667962387892, "grad_norm": 0.1618921309709549, "learning_rate": 4.47904044242727e-06, "loss": 0.9241, "step": 71970 }, { "epoch": 0.5210391828993753, "grad_norm": 0.19003738462924957, "learning_rate": 4.478968055766684e-06, "loss": 0.9101, "step": 71980 }, { "epoch": 0.5211115695599615, "grad_norm": 0.16518519818782806, "learning_rate": 4.4788956691060975e-06, "loss": 0.9038, "step": 71990 }, { "epoch": 0.5211839562205477, "grad_norm": 0.15592487156391144, "learning_rate": 4.478823282445511e-06, "loss": 0.9162, "step": 72000 }, { "epoch": 0.5212563428811339, "grad_norm": 0.17761656641960144, "learning_rate": 4.478750895784925e-06, "loss": 0.9126, "step": 72010 }, { "epoch": 0.52132872954172, "grad_norm": 0.1610282063484192, "learning_rate": 4.478678509124339e-06, "loss": 0.9199, "step": 72020 }, { "epoch": 0.5214011162023062, "grad_norm": 0.16208316385746002, "learning_rate": 4.478606122463753e-06, "loss": 0.9129, "step": 72030 }, { "epoch": 0.5214735028628924, "grad_norm": 0.17312684655189514, "learning_rate": 4.4785337358031664e-06, "loss": 0.9022, "step": 72040 }, { "epoch": 0.5215458895234786, "grad_norm": 0.16080522537231445, "learning_rate": 4.47846134914258e-06, "loss": 0.908, "step": 72050 }, { "epoch": 0.5216182761840648, "grad_norm": 0.17630934715270996, "learning_rate": 4.4783889624819945e-06, "loss": 0.9169, "step": 72060 }, { "epoch": 0.521690662844651, "grad_norm": 0.1585136502981186, "learning_rate": 4.478316575821408e-06, "loss": 0.9079, "step": 72070 }, { "epoch": 0.5217630495052372, "grad_norm": 0.15829670429229736, "learning_rate": 4.478244189160822e-06, "loss": 0.9209, "step": 72080 }, { "epoch": 0.5218354361658234, "grad_norm": 0.1633504331111908, "learning_rate": 4.478171802500235e-06, "loss": 0.9202, "step": 72090 }, { "epoch": 0.5219078228264096, "grad_norm": 0.1850529909133911, "learning_rate": 4.47809941583965e-06, "loss": 0.9244, "step": 72100 }, { "epoch": 0.5219802094869957, "grad_norm": 0.16101692616939545, "learning_rate": 4.4780270291790635e-06, "loss": 0.9122, "step": 72110 }, { "epoch": 0.5220525961475819, "grad_norm": 0.16224358975887299, "learning_rate": 4.477954642518477e-06, "loss": 0.9149, "step": 72120 }, { "epoch": 0.5221249828081681, "grad_norm": 0.14560241997241974, "learning_rate": 4.477882255857891e-06, "loss": 0.9203, "step": 72130 }, { "epoch": 0.5221973694687543, "grad_norm": 0.15428347885608673, "learning_rate": 4.477809869197305e-06, "loss": 0.9212, "step": 72140 }, { "epoch": 0.5222697561293405, "grad_norm": 0.1555236577987671, "learning_rate": 4.477737482536719e-06, "loss": 0.9223, "step": 72150 }, { "epoch": 0.5223421427899266, "grad_norm": 0.16414514183998108, "learning_rate": 4.477665095876132e-06, "loss": 0.9153, "step": 72160 }, { "epoch": 0.5224145294505128, "grad_norm": 0.1530592143535614, "learning_rate": 4.477592709215546e-06, "loss": 0.9168, "step": 72170 }, { "epoch": 0.5224869161110991, "grad_norm": 0.17093126475811005, "learning_rate": 4.4775203225549605e-06, "loss": 0.9175, "step": 72180 }, { "epoch": 0.5225593027716853, "grad_norm": 0.16496798396110535, "learning_rate": 4.477447935894374e-06, "loss": 0.9173, "step": 72190 }, { "epoch": 0.5226316894322715, "grad_norm": 0.2403537482023239, "learning_rate": 4.477375549233788e-06, "loss": 0.9239, "step": 72200 }, { "epoch": 0.5227040760928576, "grad_norm": 0.1915457397699356, "learning_rate": 4.477303162573201e-06, "loss": 0.9279, "step": 72210 }, { "epoch": 0.5227764627534438, "grad_norm": 0.1616457849740982, "learning_rate": 4.477230775912616e-06, "loss": 0.9078, "step": 72220 }, { "epoch": 0.52284884941403, "grad_norm": 0.19186237454414368, "learning_rate": 4.4771583892520285e-06, "loss": 0.9307, "step": 72230 }, { "epoch": 0.5229212360746162, "grad_norm": 0.16343003511428833, "learning_rate": 4.477086002591442e-06, "loss": 0.9148, "step": 72240 }, { "epoch": 0.5229936227352023, "grad_norm": 0.15922591090202332, "learning_rate": 4.477013615930857e-06, "loss": 0.9101, "step": 72250 }, { "epoch": 0.5230660093957885, "grad_norm": 0.1629839390516281, "learning_rate": 4.47694122927027e-06, "loss": 0.9133, "step": 72260 }, { "epoch": 0.5231383960563747, "grad_norm": 0.1665726751089096, "learning_rate": 4.476868842609684e-06, "loss": 0.9105, "step": 72270 }, { "epoch": 0.5232107827169609, "grad_norm": 0.22242015600204468, "learning_rate": 4.4767964559490975e-06, "loss": 0.9184, "step": 72280 }, { "epoch": 0.5232831693775472, "grad_norm": 0.15692086517810822, "learning_rate": 4.476724069288512e-06, "loss": 0.9071, "step": 72290 }, { "epoch": 0.5233555560381333, "grad_norm": 0.16244766116142273, "learning_rate": 4.4766516826279255e-06, "loss": 0.924, "step": 72300 }, { "epoch": 0.5234279426987195, "grad_norm": 0.1637459546327591, "learning_rate": 4.476579295967339e-06, "loss": 0.9181, "step": 72310 }, { "epoch": 0.5235003293593057, "grad_norm": 0.16626401245594025, "learning_rate": 4.476506909306753e-06, "loss": 0.9203, "step": 72320 }, { "epoch": 0.5235727160198919, "grad_norm": 0.15717044472694397, "learning_rate": 4.476434522646167e-06, "loss": 0.9104, "step": 72330 }, { "epoch": 0.523645102680478, "grad_norm": 0.15641281008720398, "learning_rate": 4.476362135985581e-06, "loss": 0.9045, "step": 72340 }, { "epoch": 0.5237174893410642, "grad_norm": 0.15991607308387756, "learning_rate": 4.4762897493249945e-06, "loss": 0.9179, "step": 72350 }, { "epoch": 0.5237898760016504, "grad_norm": 0.14962872862815857, "learning_rate": 4.476217362664408e-06, "loss": 0.9184, "step": 72360 }, { "epoch": 0.5238622626622366, "grad_norm": 0.14683006703853607, "learning_rate": 4.4761449760038226e-06, "loss": 0.9173, "step": 72370 }, { "epoch": 0.5239346493228227, "grad_norm": 0.17192301154136658, "learning_rate": 4.476072589343236e-06, "loss": 0.9299, "step": 72380 }, { "epoch": 0.524007035983409, "grad_norm": 0.16342616081237793, "learning_rate": 4.47600020268265e-06, "loss": 0.9071, "step": 72390 }, { "epoch": 0.5240794226439952, "grad_norm": 0.1558561623096466, "learning_rate": 4.475927816022063e-06, "loss": 0.9077, "step": 72400 }, { "epoch": 0.5241518093045814, "grad_norm": 0.18777623772621155, "learning_rate": 4.475855429361478e-06, "loss": 0.912, "step": 72410 }, { "epoch": 0.5242241959651676, "grad_norm": 0.16315610706806183, "learning_rate": 4.4757830427008915e-06, "loss": 0.9415, "step": 72420 }, { "epoch": 0.5242965826257537, "grad_norm": 0.17795760929584503, "learning_rate": 4.475710656040305e-06, "loss": 0.9197, "step": 72430 }, { "epoch": 0.5243689692863399, "grad_norm": 0.16951826214790344, "learning_rate": 4.475638269379719e-06, "loss": 0.908, "step": 72440 }, { "epoch": 0.5244413559469261, "grad_norm": 0.18716560304164886, "learning_rate": 4.475565882719133e-06, "loss": 0.9293, "step": 72450 }, { "epoch": 0.5245137426075123, "grad_norm": 0.24817276000976562, "learning_rate": 4.475493496058547e-06, "loss": 0.9103, "step": 72460 }, { "epoch": 0.5245861292680984, "grad_norm": 0.15385325253009796, "learning_rate": 4.47542110939796e-06, "loss": 0.9171, "step": 72470 }, { "epoch": 0.5246585159286846, "grad_norm": 0.15869303047657013, "learning_rate": 4.475348722737374e-06, "loss": 0.9223, "step": 72480 }, { "epoch": 0.5247309025892708, "grad_norm": 0.14980655908584595, "learning_rate": 4.4752763360767885e-06, "loss": 0.9182, "step": 72490 }, { "epoch": 0.5248032892498571, "grad_norm": 0.16237030923366547, "learning_rate": 4.475203949416202e-06, "loss": 0.9184, "step": 72500 }, { "epoch": 0.5248756759104433, "grad_norm": 0.15368181467056274, "learning_rate": 4.475131562755616e-06, "loss": 0.9164, "step": 72510 }, { "epoch": 0.5249480625710294, "grad_norm": 0.16227343678474426, "learning_rate": 4.475059176095029e-06, "loss": 0.9022, "step": 72520 }, { "epoch": 0.5250204492316156, "grad_norm": 0.15485748648643494, "learning_rate": 4.474986789434444e-06, "loss": 0.9167, "step": 72530 }, { "epoch": 0.5250928358922018, "grad_norm": 0.17310456931591034, "learning_rate": 4.474914402773857e-06, "loss": 0.928, "step": 72540 }, { "epoch": 0.525165222552788, "grad_norm": 0.1546664834022522, "learning_rate": 4.474842016113271e-06, "loss": 0.9162, "step": 72550 }, { "epoch": 0.5252376092133741, "grad_norm": 0.17586833238601685, "learning_rate": 4.474769629452685e-06, "loss": 0.9126, "step": 72560 }, { "epoch": 0.5253099958739603, "grad_norm": 0.1783231645822525, "learning_rate": 4.474697242792099e-06, "loss": 0.9093, "step": 72570 }, { "epoch": 0.5253823825345465, "grad_norm": 0.17169401049613953, "learning_rate": 4.474624856131513e-06, "loss": 0.9267, "step": 72580 }, { "epoch": 0.5254547691951327, "grad_norm": 0.478100448846817, "learning_rate": 4.474552469470926e-06, "loss": 0.9089, "step": 72590 }, { "epoch": 0.5255271558557189, "grad_norm": 0.1514614224433899, "learning_rate": 4.47448008281034e-06, "loss": 0.9243, "step": 72600 }, { "epoch": 0.5255995425163051, "grad_norm": 0.15338928997516632, "learning_rate": 4.4744076961497544e-06, "loss": 0.9111, "step": 72610 }, { "epoch": 0.5256719291768913, "grad_norm": 0.16090184450149536, "learning_rate": 4.474335309489168e-06, "loss": 0.9188, "step": 72620 }, { "epoch": 0.5257443158374775, "grad_norm": 0.1639074832201004, "learning_rate": 4.474262922828582e-06, "loss": 0.9235, "step": 72630 }, { "epoch": 0.5258167024980637, "grad_norm": 0.16050408780574799, "learning_rate": 4.474190536167995e-06, "loss": 0.9176, "step": 72640 }, { "epoch": 0.5258890891586498, "grad_norm": 0.1604773849248886, "learning_rate": 4.474118149507409e-06, "loss": 0.9258, "step": 72650 }, { "epoch": 0.525961475819236, "grad_norm": 0.15453247725963593, "learning_rate": 4.474045762846823e-06, "loss": 0.9273, "step": 72660 }, { "epoch": 0.5260338624798222, "grad_norm": 0.17497363686561584, "learning_rate": 4.473973376186237e-06, "loss": 0.9219, "step": 72670 }, { "epoch": 0.5261062491404084, "grad_norm": 0.15889976918697357, "learning_rate": 4.473900989525651e-06, "loss": 0.9325, "step": 72680 }, { "epoch": 0.5261786358009946, "grad_norm": 0.18632720410823822, "learning_rate": 4.473828602865064e-06, "loss": 0.9188, "step": 72690 }, { "epoch": 0.5262510224615807, "grad_norm": 0.16045477986335754, "learning_rate": 4.473756216204479e-06, "loss": 0.9261, "step": 72700 }, { "epoch": 0.526323409122167, "grad_norm": 0.16817103326320648, "learning_rate": 4.473683829543892e-06, "loss": 0.9212, "step": 72710 }, { "epoch": 0.5263957957827532, "grad_norm": 0.1566298007965088, "learning_rate": 4.473611442883306e-06, "loss": 0.9188, "step": 72720 }, { "epoch": 0.5264681824433394, "grad_norm": 0.1550060659646988, "learning_rate": 4.4735390562227195e-06, "loss": 0.9263, "step": 72730 }, { "epoch": 0.5265405691039255, "grad_norm": 0.15189586579799652, "learning_rate": 4.473466669562134e-06, "loss": 0.9202, "step": 72740 }, { "epoch": 0.5266129557645117, "grad_norm": 0.14698752760887146, "learning_rate": 4.473394282901548e-06, "loss": 0.9105, "step": 72750 }, { "epoch": 0.5266853424250979, "grad_norm": 0.16903573274612427, "learning_rate": 4.473321896240961e-06, "loss": 0.9135, "step": 72760 }, { "epoch": 0.5267577290856841, "grad_norm": 0.16911180317401886, "learning_rate": 4.473249509580375e-06, "loss": 0.9249, "step": 72770 }, { "epoch": 0.5268301157462703, "grad_norm": 0.1543751209974289, "learning_rate": 4.4731771229197884e-06, "loss": 0.9126, "step": 72780 }, { "epoch": 0.5269025024068564, "grad_norm": 0.15658721327781677, "learning_rate": 4.473104736259202e-06, "loss": 0.9152, "step": 72790 }, { "epoch": 0.5269748890674426, "grad_norm": 0.16438843309879303, "learning_rate": 4.473032349598616e-06, "loss": 0.9226, "step": 72800 }, { "epoch": 0.5270472757280288, "grad_norm": 0.17810095846652985, "learning_rate": 4.47295996293803e-06, "loss": 0.9093, "step": 72810 }, { "epoch": 0.5271196623886151, "grad_norm": 0.1657225638628006, "learning_rate": 4.472887576277444e-06, "loss": 0.9233, "step": 72820 }, { "epoch": 0.5271920490492013, "grad_norm": 0.16005633771419525, "learning_rate": 4.472815189616857e-06, "loss": 0.9201, "step": 72830 }, { "epoch": 0.5272644357097874, "grad_norm": 0.16548793017864227, "learning_rate": 4.472742802956271e-06, "loss": 0.9304, "step": 72840 }, { "epoch": 0.5273368223703736, "grad_norm": 0.14762021601200104, "learning_rate": 4.4726704162956855e-06, "loss": 0.9137, "step": 72850 }, { "epoch": 0.5274092090309598, "grad_norm": 0.17930978536605835, "learning_rate": 4.472598029635099e-06, "loss": 0.9203, "step": 72860 }, { "epoch": 0.527481595691546, "grad_norm": 0.1988268494606018, "learning_rate": 4.472525642974513e-06, "loss": 0.9219, "step": 72870 }, { "epoch": 0.5275539823521321, "grad_norm": 0.1670868992805481, "learning_rate": 4.472453256313926e-06, "loss": 0.9083, "step": 72880 }, { "epoch": 0.5276263690127183, "grad_norm": 0.14923833310604095, "learning_rate": 4.472380869653341e-06, "loss": 0.9207, "step": 72890 }, { "epoch": 0.5276987556733045, "grad_norm": 0.15868178009986877, "learning_rate": 4.472308482992754e-06, "loss": 0.9055, "step": 72900 }, { "epoch": 0.5277711423338907, "grad_norm": 0.1550142616033554, "learning_rate": 4.472236096332168e-06, "loss": 0.906, "step": 72910 }, { "epoch": 0.527843528994477, "grad_norm": 0.15354187786579132, "learning_rate": 4.472163709671582e-06, "loss": 0.9138, "step": 72920 }, { "epoch": 0.5279159156550631, "grad_norm": 0.15525110065937042, "learning_rate": 4.472091323010996e-06, "loss": 0.9195, "step": 72930 }, { "epoch": 0.5279883023156493, "grad_norm": 0.15831002593040466, "learning_rate": 4.47201893635041e-06, "loss": 0.9161, "step": 72940 }, { "epoch": 0.5280606889762355, "grad_norm": 0.169021874666214, "learning_rate": 4.471946549689823e-06, "loss": 0.9256, "step": 72950 }, { "epoch": 0.5281330756368217, "grad_norm": 0.14959047734737396, "learning_rate": 4.471874163029237e-06, "loss": 0.8988, "step": 72960 }, { "epoch": 0.5282054622974078, "grad_norm": 0.1501626968383789, "learning_rate": 4.471801776368651e-06, "loss": 0.9179, "step": 72970 }, { "epoch": 0.528277848957994, "grad_norm": 0.15244172513484955, "learning_rate": 4.471729389708065e-06, "loss": 0.9218, "step": 72980 }, { "epoch": 0.5283502356185802, "grad_norm": 0.21361258625984192, "learning_rate": 4.471657003047479e-06, "loss": 0.9205, "step": 72990 }, { "epoch": 0.5284226222791664, "grad_norm": 0.1592887043952942, "learning_rate": 4.471584616386892e-06, "loss": 0.9192, "step": 73000 }, { "epoch": 0.5284950089397525, "grad_norm": 0.15822738409042358, "learning_rate": 4.471512229726307e-06, "loss": 0.9095, "step": 73010 }, { "epoch": 0.5285673956003387, "grad_norm": 0.18526582419872284, "learning_rate": 4.47143984306572e-06, "loss": 0.922, "step": 73020 }, { "epoch": 0.528639782260925, "grad_norm": 0.16115911304950714, "learning_rate": 4.471367456405134e-06, "loss": 0.9071, "step": 73030 }, { "epoch": 0.5287121689215112, "grad_norm": 0.16934490203857422, "learning_rate": 4.4712950697445475e-06, "loss": 0.9206, "step": 73040 }, { "epoch": 0.5287845555820974, "grad_norm": 0.15864363312721252, "learning_rate": 4.471222683083962e-06, "loss": 0.9344, "step": 73050 }, { "epoch": 0.5288569422426835, "grad_norm": 0.16181667149066925, "learning_rate": 4.471150296423376e-06, "loss": 0.9177, "step": 73060 }, { "epoch": 0.5289293289032697, "grad_norm": 0.15380777418613434, "learning_rate": 4.471077909762789e-06, "loss": 0.9181, "step": 73070 }, { "epoch": 0.5290017155638559, "grad_norm": 0.1445995569229126, "learning_rate": 4.471005523102203e-06, "loss": 0.9232, "step": 73080 }, { "epoch": 0.5290741022244421, "grad_norm": 0.1677917093038559, "learning_rate": 4.470933136441617e-06, "loss": 0.9231, "step": 73090 }, { "epoch": 0.5291464888850282, "grad_norm": 0.19374777376651764, "learning_rate": 4.470860749781031e-06, "loss": 0.9182, "step": 73100 }, { "epoch": 0.5292188755456144, "grad_norm": 0.18379537761211395, "learning_rate": 4.4707883631204446e-06, "loss": 0.9132, "step": 73110 }, { "epoch": 0.5292912622062006, "grad_norm": 0.1719239056110382, "learning_rate": 4.470715976459858e-06, "loss": 0.9188, "step": 73120 }, { "epoch": 0.5293636488667868, "grad_norm": 0.15979036688804626, "learning_rate": 4.470643589799273e-06, "loss": 0.9093, "step": 73130 }, { "epoch": 0.5294360355273731, "grad_norm": 0.15309134125709534, "learning_rate": 4.470571203138686e-06, "loss": 0.9195, "step": 73140 }, { "epoch": 0.5295084221879592, "grad_norm": 0.1547461748123169, "learning_rate": 4.4704988164781e-06, "loss": 0.9073, "step": 73150 }, { "epoch": 0.5295808088485454, "grad_norm": 0.17491716146469116, "learning_rate": 4.4704264298175135e-06, "loss": 0.9356, "step": 73160 }, { "epoch": 0.5296531955091316, "grad_norm": 0.15330342948436737, "learning_rate": 4.470354043156928e-06, "loss": 0.9124, "step": 73170 }, { "epoch": 0.5297255821697178, "grad_norm": 0.1625756025314331, "learning_rate": 4.4702816564963416e-06, "loss": 0.9254, "step": 73180 }, { "epoch": 0.529797968830304, "grad_norm": 0.1609082669019699, "learning_rate": 4.470209269835755e-06, "loss": 0.9108, "step": 73190 }, { "epoch": 0.5298703554908901, "grad_norm": 0.15871219336986542, "learning_rate": 4.470136883175169e-06, "loss": 0.9182, "step": 73200 }, { "epoch": 0.5299427421514763, "grad_norm": 0.20798322558403015, "learning_rate": 4.470064496514583e-06, "loss": 0.9201, "step": 73210 }, { "epoch": 0.5300151288120625, "grad_norm": 0.15819989144802094, "learning_rate": 4.469992109853997e-06, "loss": 0.9164, "step": 73220 }, { "epoch": 0.5300875154726487, "grad_norm": 0.16910409927368164, "learning_rate": 4.4699197231934105e-06, "loss": 0.912, "step": 73230 }, { "epoch": 0.5301599021332349, "grad_norm": 0.16933558881282806, "learning_rate": 4.469847336532824e-06, "loss": 0.9084, "step": 73240 }, { "epoch": 0.5302322887938211, "grad_norm": 0.1705089658498764, "learning_rate": 4.469774949872238e-06, "loss": 0.9249, "step": 73250 }, { "epoch": 0.5303046754544073, "grad_norm": 0.16231994330883026, "learning_rate": 4.469702563211652e-06, "loss": 0.9106, "step": 73260 }, { "epoch": 0.5303770621149935, "grad_norm": 0.1603698879480362, "learning_rate": 4.469630176551066e-06, "loss": 0.9157, "step": 73270 }, { "epoch": 0.5304494487755796, "grad_norm": 0.1552126109600067, "learning_rate": 4.469557789890479e-06, "loss": 0.9121, "step": 73280 }, { "epoch": 0.5305218354361658, "grad_norm": 0.16099673509597778, "learning_rate": 4.469485403229893e-06, "loss": 0.9216, "step": 73290 }, { "epoch": 0.530594222096752, "grad_norm": 0.16103273630142212, "learning_rate": 4.469413016569307e-06, "loss": 0.9206, "step": 73300 }, { "epoch": 0.5306666087573382, "grad_norm": 0.16438160836696625, "learning_rate": 4.46934062990872e-06, "loss": 0.9218, "step": 73310 }, { "epoch": 0.5307389954179244, "grad_norm": 0.17345352470874786, "learning_rate": 4.469268243248135e-06, "loss": 0.9085, "step": 73320 }, { "epoch": 0.5308113820785105, "grad_norm": 0.15736123919487, "learning_rate": 4.469195856587548e-06, "loss": 0.9238, "step": 73330 }, { "epoch": 0.5308837687390967, "grad_norm": 0.17544583976268768, "learning_rate": 4.469123469926962e-06, "loss": 0.9176, "step": 73340 }, { "epoch": 0.530956155399683, "grad_norm": 0.17113934457302094, "learning_rate": 4.469051083266376e-06, "loss": 0.9092, "step": 73350 }, { "epoch": 0.5310285420602692, "grad_norm": 0.1495228260755539, "learning_rate": 4.46897869660579e-06, "loss": 0.9151, "step": 73360 }, { "epoch": 0.5311009287208553, "grad_norm": 0.16176773607730865, "learning_rate": 4.468906309945204e-06, "loss": 0.8991, "step": 73370 }, { "epoch": 0.5311733153814415, "grad_norm": 0.15699774026870728, "learning_rate": 4.468833923284617e-06, "loss": 0.9167, "step": 73380 }, { "epoch": 0.5312457020420277, "grad_norm": 0.15788871049880981, "learning_rate": 4.468761536624031e-06, "loss": 0.8985, "step": 73390 }, { "epoch": 0.5313180887026139, "grad_norm": 0.155131995677948, "learning_rate": 4.468689149963445e-06, "loss": 0.9163, "step": 73400 }, { "epoch": 0.5313904753632, "grad_norm": 0.15978825092315674, "learning_rate": 4.468616763302859e-06, "loss": 0.911, "step": 73410 }, { "epoch": 0.5314628620237862, "grad_norm": 0.18127478659152985, "learning_rate": 4.468544376642273e-06, "loss": 0.9225, "step": 73420 }, { "epoch": 0.5315352486843724, "grad_norm": 0.15009476244449615, "learning_rate": 4.468471989981686e-06, "loss": 0.9069, "step": 73430 }, { "epoch": 0.5316076353449586, "grad_norm": 0.15387962758541107, "learning_rate": 4.4683996033211e-06, "loss": 0.9209, "step": 73440 }, { "epoch": 0.5316800220055449, "grad_norm": 0.15109896659851074, "learning_rate": 4.468327216660514e-06, "loss": 0.9285, "step": 73450 }, { "epoch": 0.531752408666131, "grad_norm": 0.20869199931621552, "learning_rate": 4.468254829999928e-06, "loss": 0.9189, "step": 73460 }, { "epoch": 0.5318247953267172, "grad_norm": 0.15885184705257416, "learning_rate": 4.4681824433393415e-06, "loss": 0.9301, "step": 73470 }, { "epoch": 0.5318971819873034, "grad_norm": 0.2871398627758026, "learning_rate": 4.468110056678755e-06, "loss": 0.9291, "step": 73480 }, { "epoch": 0.5319695686478896, "grad_norm": 0.14929638803005219, "learning_rate": 4.46803767001817e-06, "loss": 0.9143, "step": 73490 }, { "epoch": 0.5320419553084758, "grad_norm": 0.15041810274124146, "learning_rate": 4.467965283357583e-06, "loss": 0.9164, "step": 73500 }, { "epoch": 0.5321143419690619, "grad_norm": 0.1545180082321167, "learning_rate": 4.467892896696997e-06, "loss": 0.9144, "step": 73510 }, { "epoch": 0.5321867286296481, "grad_norm": 0.15915392339229584, "learning_rate": 4.4678205100364104e-06, "loss": 0.9088, "step": 73520 }, { "epoch": 0.5322591152902343, "grad_norm": 0.1939249187707901, "learning_rate": 4.467748123375825e-06, "loss": 0.9066, "step": 73530 }, { "epoch": 0.5323315019508205, "grad_norm": 0.1595548391342163, "learning_rate": 4.4676757367152385e-06, "loss": 0.9067, "step": 73540 }, { "epoch": 0.5324038886114066, "grad_norm": 0.15864749252796173, "learning_rate": 4.467603350054652e-06, "loss": 0.9134, "step": 73550 }, { "epoch": 0.5324762752719929, "grad_norm": 0.171446293592453, "learning_rate": 4.467530963394066e-06, "loss": 0.9157, "step": 73560 }, { "epoch": 0.5325486619325791, "grad_norm": 0.15038402378559113, "learning_rate": 4.46745857673348e-06, "loss": 0.9159, "step": 73570 }, { "epoch": 0.5326210485931653, "grad_norm": 0.15996231138706207, "learning_rate": 4.467386190072894e-06, "loss": 0.9059, "step": 73580 }, { "epoch": 0.5326934352537515, "grad_norm": 0.16054002940654755, "learning_rate": 4.4673138034123075e-06, "loss": 0.9136, "step": 73590 }, { "epoch": 0.5327658219143376, "grad_norm": 0.15865951776504517, "learning_rate": 4.467241416751721e-06, "loss": 0.9048, "step": 73600 }, { "epoch": 0.5328382085749238, "grad_norm": 0.15602022409439087, "learning_rate": 4.4671690300911355e-06, "loss": 0.901, "step": 73610 }, { "epoch": 0.53291059523551, "grad_norm": 0.16050325334072113, "learning_rate": 4.467096643430549e-06, "loss": 0.9012, "step": 73620 }, { "epoch": 0.5329829818960962, "grad_norm": 0.15464062988758087, "learning_rate": 4.467024256769963e-06, "loss": 0.918, "step": 73630 }, { "epoch": 0.5330553685566823, "grad_norm": 0.16735030710697174, "learning_rate": 4.466951870109376e-06, "loss": 0.9048, "step": 73640 }, { "epoch": 0.5331277552172685, "grad_norm": 0.17225484549999237, "learning_rate": 4.466879483448791e-06, "loss": 0.9131, "step": 73650 }, { "epoch": 0.5332001418778547, "grad_norm": 0.17411339282989502, "learning_rate": 4.4668070967882045e-06, "loss": 0.9063, "step": 73660 }, { "epoch": 0.533272528538441, "grad_norm": 0.19198282063007355, "learning_rate": 4.466734710127618e-06, "loss": 0.9239, "step": 73670 }, { "epoch": 0.5333449151990272, "grad_norm": 0.16125676035881042, "learning_rate": 4.466662323467032e-06, "loss": 0.9251, "step": 73680 }, { "epoch": 0.5334173018596133, "grad_norm": 0.1499517261981964, "learning_rate": 4.466589936806446e-06, "loss": 0.913, "step": 73690 }, { "epoch": 0.5334896885201995, "grad_norm": 0.16353091597557068, "learning_rate": 4.46651755014586e-06, "loss": 0.9166, "step": 73700 }, { "epoch": 0.5335620751807857, "grad_norm": 0.14786536991596222, "learning_rate": 4.466445163485273e-06, "loss": 0.9174, "step": 73710 }, { "epoch": 0.5336344618413719, "grad_norm": 0.15844851732254028, "learning_rate": 4.466372776824687e-06, "loss": 0.9162, "step": 73720 }, { "epoch": 0.533706848501958, "grad_norm": 0.18850095570087433, "learning_rate": 4.4663003901641015e-06, "loss": 0.9141, "step": 73730 }, { "epoch": 0.5337792351625442, "grad_norm": 0.1814296692609787, "learning_rate": 4.466228003503515e-06, "loss": 0.9087, "step": 73740 }, { "epoch": 0.5338516218231304, "grad_norm": 0.1553386151790619, "learning_rate": 4.466155616842929e-06, "loss": 0.9115, "step": 73750 }, { "epoch": 0.5339240084837166, "grad_norm": 0.1544741988182068, "learning_rate": 4.466083230182342e-06, "loss": 0.9066, "step": 73760 }, { "epoch": 0.5339963951443029, "grad_norm": 0.15202949941158295, "learning_rate": 4.466010843521757e-06, "loss": 0.9164, "step": 73770 }, { "epoch": 0.534068781804889, "grad_norm": 0.157356858253479, "learning_rate": 4.46593845686117e-06, "loss": 0.9272, "step": 73780 }, { "epoch": 0.5341411684654752, "grad_norm": 0.1528637409210205, "learning_rate": 4.465866070200584e-06, "loss": 0.9253, "step": 73790 }, { "epoch": 0.5342135551260614, "grad_norm": 0.1515146940946579, "learning_rate": 4.465793683539998e-06, "loss": 0.9025, "step": 73800 }, { "epoch": 0.5342859417866476, "grad_norm": 0.1715908944606781, "learning_rate": 4.465721296879412e-06, "loss": 0.9274, "step": 73810 }, { "epoch": 0.5343583284472337, "grad_norm": 0.17339174449443817, "learning_rate": 4.465648910218826e-06, "loss": 0.9196, "step": 73820 }, { "epoch": 0.5344307151078199, "grad_norm": 0.18492698669433594, "learning_rate": 4.4655765235582385e-06, "loss": 0.9117, "step": 73830 }, { "epoch": 0.5345031017684061, "grad_norm": 0.18511684238910675, "learning_rate": 4.465504136897653e-06, "loss": 0.9192, "step": 73840 }, { "epoch": 0.5345754884289923, "grad_norm": 0.15983940660953522, "learning_rate": 4.4654317502370666e-06, "loss": 0.9144, "step": 73850 }, { "epoch": 0.5346478750895785, "grad_norm": 0.1514192372560501, "learning_rate": 4.46535936357648e-06, "loss": 0.9289, "step": 73860 }, { "epoch": 0.5347202617501646, "grad_norm": 0.1741372048854828, "learning_rate": 4.465286976915894e-06, "loss": 0.9224, "step": 73870 }, { "epoch": 0.5347926484107509, "grad_norm": 0.1461644023656845, "learning_rate": 4.465214590255308e-06, "loss": 0.9044, "step": 73880 }, { "epoch": 0.5348650350713371, "grad_norm": 0.1568828821182251, "learning_rate": 4.465142203594722e-06, "loss": 0.9098, "step": 73890 }, { "epoch": 0.5349374217319233, "grad_norm": 0.16432945430278778, "learning_rate": 4.4650698169341355e-06, "loss": 0.9187, "step": 73900 }, { "epoch": 0.5350098083925094, "grad_norm": 0.16285908222198486, "learning_rate": 4.464997430273549e-06, "loss": 0.9172, "step": 73910 }, { "epoch": 0.5350821950530956, "grad_norm": 0.15735489130020142, "learning_rate": 4.4649250436129636e-06, "loss": 0.9198, "step": 73920 }, { "epoch": 0.5351545817136818, "grad_norm": 0.1569201946258545, "learning_rate": 4.464852656952377e-06, "loss": 0.9162, "step": 73930 }, { "epoch": 0.535226968374268, "grad_norm": 0.1620626151561737, "learning_rate": 4.464780270291791e-06, "loss": 0.9158, "step": 73940 }, { "epoch": 0.5352993550348542, "grad_norm": 0.16271735727787018, "learning_rate": 4.464707883631204e-06, "loss": 0.9015, "step": 73950 }, { "epoch": 0.5353717416954403, "grad_norm": 0.23674176633358002, "learning_rate": 4.464635496970619e-06, "loss": 0.9143, "step": 73960 }, { "epoch": 0.5354441283560265, "grad_norm": 0.18053658306598663, "learning_rate": 4.4645631103100325e-06, "loss": 0.9125, "step": 73970 }, { "epoch": 0.5355165150166128, "grad_norm": 0.16268426179885864, "learning_rate": 4.464490723649446e-06, "loss": 0.9208, "step": 73980 }, { "epoch": 0.535588901677199, "grad_norm": 0.15412411093711853, "learning_rate": 4.46441833698886e-06, "loss": 0.9183, "step": 73990 }, { "epoch": 0.5356612883377851, "grad_norm": 0.18611110746860504, "learning_rate": 4.464345950328274e-06, "loss": 0.9177, "step": 74000 }, { "epoch": 0.5357336749983713, "grad_norm": 0.14643755555152893, "learning_rate": 4.464273563667688e-06, "loss": 0.9048, "step": 74010 }, { "epoch": 0.5358060616589575, "grad_norm": 0.17977450788021088, "learning_rate": 4.464201177007101e-06, "loss": 0.9166, "step": 74020 }, { "epoch": 0.5358784483195437, "grad_norm": 0.159438818693161, "learning_rate": 4.464128790346515e-06, "loss": 0.9098, "step": 74030 }, { "epoch": 0.5359508349801299, "grad_norm": 0.1566077172756195, "learning_rate": 4.464056403685929e-06, "loss": 0.9198, "step": 74040 }, { "epoch": 0.536023221640716, "grad_norm": 0.17558586597442627, "learning_rate": 4.463984017025343e-06, "loss": 0.9303, "step": 74050 }, { "epoch": 0.5360956083013022, "grad_norm": 0.15491542220115662, "learning_rate": 4.463911630364757e-06, "loss": 0.9193, "step": 74060 }, { "epoch": 0.5361679949618884, "grad_norm": 0.17469727993011475, "learning_rate": 4.46383924370417e-06, "loss": 0.9117, "step": 74070 }, { "epoch": 0.5362403816224746, "grad_norm": 0.1587485820055008, "learning_rate": 4.463766857043584e-06, "loss": 0.9028, "step": 74080 }, { "epoch": 0.5363127682830608, "grad_norm": 0.15624359250068665, "learning_rate": 4.4636944703829984e-06, "loss": 0.9028, "step": 74090 }, { "epoch": 0.536385154943647, "grad_norm": 0.17197231948375702, "learning_rate": 4.463622083722412e-06, "loss": 0.9141, "step": 74100 }, { "epoch": 0.5364575416042332, "grad_norm": 0.1522126942873001, "learning_rate": 4.463549697061826e-06, "loss": 0.9279, "step": 74110 }, { "epoch": 0.5365299282648194, "grad_norm": 0.1569167822599411, "learning_rate": 4.463477310401239e-06, "loss": 0.9168, "step": 74120 }, { "epoch": 0.5366023149254056, "grad_norm": 0.18380215764045715, "learning_rate": 4.463404923740654e-06, "loss": 0.9146, "step": 74130 }, { "epoch": 0.5366747015859917, "grad_norm": 0.1586330085992813, "learning_rate": 4.463332537080067e-06, "loss": 0.8948, "step": 74140 }, { "epoch": 0.5367470882465779, "grad_norm": 0.1561429798603058, "learning_rate": 4.463260150419481e-06, "loss": 0.9113, "step": 74150 }, { "epoch": 0.5368194749071641, "grad_norm": 0.1750173717737198, "learning_rate": 4.463187763758895e-06, "loss": 0.9277, "step": 74160 }, { "epoch": 0.5368918615677503, "grad_norm": 0.15227210521697998, "learning_rate": 4.463115377098309e-06, "loss": 0.9042, "step": 74170 }, { "epoch": 0.5369642482283364, "grad_norm": 0.15368933975696564, "learning_rate": 4.463042990437723e-06, "loss": 0.9009, "step": 74180 }, { "epoch": 0.5370366348889226, "grad_norm": 0.1501675844192505, "learning_rate": 4.462970603777136e-06, "loss": 0.9168, "step": 74190 }, { "epoch": 0.5371090215495089, "grad_norm": 0.16025373339653015, "learning_rate": 4.46289821711655e-06, "loss": 0.9125, "step": 74200 }, { "epoch": 0.5371814082100951, "grad_norm": 0.1600399762392044, "learning_rate": 4.462825830455964e-06, "loss": 0.8988, "step": 74210 }, { "epoch": 0.5372537948706813, "grad_norm": 0.1532607227563858, "learning_rate": 4.462753443795378e-06, "loss": 0.9254, "step": 74220 }, { "epoch": 0.5373261815312674, "grad_norm": 0.1633194237947464, "learning_rate": 4.462681057134792e-06, "loss": 0.9143, "step": 74230 }, { "epoch": 0.5373985681918536, "grad_norm": 0.15434294939041138, "learning_rate": 4.462608670474205e-06, "loss": 0.9139, "step": 74240 }, { "epoch": 0.5374709548524398, "grad_norm": 0.15733137726783752, "learning_rate": 4.46253628381362e-06, "loss": 0.9254, "step": 74250 }, { "epoch": 0.537543341513026, "grad_norm": 0.16783663630485535, "learning_rate": 4.462463897153033e-06, "loss": 0.9132, "step": 74260 }, { "epoch": 0.5376157281736121, "grad_norm": 0.16016171872615814, "learning_rate": 4.462391510492447e-06, "loss": 0.9161, "step": 74270 }, { "epoch": 0.5376881148341983, "grad_norm": 0.1455632746219635, "learning_rate": 4.4623191238318605e-06, "loss": 0.9178, "step": 74280 }, { "epoch": 0.5377605014947845, "grad_norm": 0.16944627463817596, "learning_rate": 4.462246737171275e-06, "loss": 0.9171, "step": 74290 }, { "epoch": 0.5378328881553708, "grad_norm": 0.16209137439727783, "learning_rate": 4.462174350510689e-06, "loss": 0.9148, "step": 74300 }, { "epoch": 0.537905274815957, "grad_norm": 0.16766659915447235, "learning_rate": 4.462101963850102e-06, "loss": 0.9054, "step": 74310 }, { "epoch": 0.5379776614765431, "grad_norm": 0.15169428288936615, "learning_rate": 4.462029577189516e-06, "loss": 0.916, "step": 74320 }, { "epoch": 0.5380500481371293, "grad_norm": 0.17565733194351196, "learning_rate": 4.46195719052893e-06, "loss": 0.9056, "step": 74330 }, { "epoch": 0.5381224347977155, "grad_norm": 0.1468237191438675, "learning_rate": 4.461884803868344e-06, "loss": 0.9199, "step": 74340 }, { "epoch": 0.5381948214583017, "grad_norm": 0.15995702147483826, "learning_rate": 4.4618124172077575e-06, "loss": 0.9205, "step": 74350 }, { "epoch": 0.5382672081188878, "grad_norm": 0.28701356053352356, "learning_rate": 4.461740030547171e-06, "loss": 0.9032, "step": 74360 }, { "epoch": 0.538339594779474, "grad_norm": 0.15283870697021484, "learning_rate": 4.461667643886585e-06, "loss": 0.919, "step": 74370 }, { "epoch": 0.5384119814400602, "grad_norm": 0.2051803320646286, "learning_rate": 4.461595257225998e-06, "loss": 0.9115, "step": 74380 }, { "epoch": 0.5384843681006464, "grad_norm": 0.16268160939216614, "learning_rate": 4.461522870565412e-06, "loss": 0.9043, "step": 74390 }, { "epoch": 0.5385567547612325, "grad_norm": 0.16522175073623657, "learning_rate": 4.4614504839048265e-06, "loss": 0.9193, "step": 74400 }, { "epoch": 0.5386291414218188, "grad_norm": 0.1583668738603592, "learning_rate": 4.46137809724424e-06, "loss": 0.9083, "step": 74410 }, { "epoch": 0.538701528082405, "grad_norm": 0.15010447800159454, "learning_rate": 4.461305710583654e-06, "loss": 0.9025, "step": 74420 }, { "epoch": 0.5387739147429912, "grad_norm": 0.16112802922725677, "learning_rate": 4.461233323923067e-06, "loss": 0.9152, "step": 74430 }, { "epoch": 0.5388463014035774, "grad_norm": 0.16676628589630127, "learning_rate": 4.461160937262482e-06, "loss": 0.9022, "step": 74440 }, { "epoch": 0.5389186880641635, "grad_norm": 0.1548004299402237, "learning_rate": 4.461088550601895e-06, "loss": 0.9131, "step": 74450 }, { "epoch": 0.5389910747247497, "grad_norm": 0.17125369608402252, "learning_rate": 4.461016163941309e-06, "loss": 0.9151, "step": 74460 }, { "epoch": 0.5390634613853359, "grad_norm": 0.1690664291381836, "learning_rate": 4.460943777280723e-06, "loss": 0.9138, "step": 74470 }, { "epoch": 0.5391358480459221, "grad_norm": 0.17033086717128754, "learning_rate": 4.460871390620137e-06, "loss": 0.8982, "step": 74480 }, { "epoch": 0.5392082347065082, "grad_norm": 0.15503831207752228, "learning_rate": 4.460799003959551e-06, "loss": 0.9268, "step": 74490 }, { "epoch": 0.5392806213670944, "grad_norm": 0.15129579603672028, "learning_rate": 4.460726617298964e-06, "loss": 0.9067, "step": 74500 }, { "epoch": 0.5393530080276807, "grad_norm": 0.16346019506454468, "learning_rate": 4.460654230638378e-06, "loss": 0.9138, "step": 74510 }, { "epoch": 0.5394253946882669, "grad_norm": 0.15942130982875824, "learning_rate": 4.460581843977792e-06, "loss": 0.9109, "step": 74520 }, { "epoch": 0.5394977813488531, "grad_norm": 0.14833667874336243, "learning_rate": 4.460509457317206e-06, "loss": 0.9065, "step": 74530 }, { "epoch": 0.5395701680094392, "grad_norm": 0.15694496035575867, "learning_rate": 4.46043707065662e-06, "loss": 0.9225, "step": 74540 }, { "epoch": 0.5396425546700254, "grad_norm": 0.15417462587356567, "learning_rate": 4.460364683996033e-06, "loss": 0.8993, "step": 74550 }, { "epoch": 0.5397149413306116, "grad_norm": 0.16096115112304688, "learning_rate": 4.460292297335448e-06, "loss": 0.9104, "step": 74560 }, { "epoch": 0.5397873279911978, "grad_norm": 0.1507575362920761, "learning_rate": 4.460219910674861e-06, "loss": 0.9165, "step": 74570 }, { "epoch": 0.539859714651784, "grad_norm": 0.16418473422527313, "learning_rate": 4.460147524014275e-06, "loss": 0.9192, "step": 74580 }, { "epoch": 0.5399321013123701, "grad_norm": 0.1715206801891327, "learning_rate": 4.4600751373536886e-06, "loss": 0.9093, "step": 74590 }, { "epoch": 0.5400044879729563, "grad_norm": 0.1661735624074936, "learning_rate": 4.460002750693103e-06, "loss": 0.9224, "step": 74600 }, { "epoch": 0.5400768746335425, "grad_norm": 0.15840347111225128, "learning_rate": 4.459930364032517e-06, "loss": 0.9192, "step": 74610 }, { "epoch": 0.5401492612941288, "grad_norm": 0.15837319195270538, "learning_rate": 4.45985797737193e-06, "loss": 0.9191, "step": 74620 }, { "epoch": 0.540221647954715, "grad_norm": 0.1872469186782837, "learning_rate": 4.459785590711344e-06, "loss": 0.9116, "step": 74630 }, { "epoch": 0.5402940346153011, "grad_norm": 0.16932313144207, "learning_rate": 4.459713204050758e-06, "loss": 0.8963, "step": 74640 }, { "epoch": 0.5403664212758873, "grad_norm": 0.1552034318447113, "learning_rate": 4.459640817390172e-06, "loss": 0.9058, "step": 74650 }, { "epoch": 0.5404388079364735, "grad_norm": 0.1520053595304489, "learning_rate": 4.4595684307295856e-06, "loss": 0.9006, "step": 74660 }, { "epoch": 0.5405111945970597, "grad_norm": 0.15014509856700897, "learning_rate": 4.459496044068999e-06, "loss": 0.9045, "step": 74670 }, { "epoch": 0.5405835812576458, "grad_norm": 0.18093499541282654, "learning_rate": 4.459423657408413e-06, "loss": 0.9013, "step": 74680 }, { "epoch": 0.540655967918232, "grad_norm": 0.1501227766275406, "learning_rate": 4.459351270747827e-06, "loss": 0.9043, "step": 74690 }, { "epoch": 0.5407283545788182, "grad_norm": 0.1506822109222412, "learning_rate": 4.459278884087241e-06, "loss": 0.9163, "step": 74700 }, { "epoch": 0.5408007412394044, "grad_norm": 0.1608346849679947, "learning_rate": 4.4592064974266545e-06, "loss": 0.911, "step": 74710 }, { "epoch": 0.5408731278999905, "grad_norm": 0.1549476683139801, "learning_rate": 4.459134110766068e-06, "loss": 0.9132, "step": 74720 }, { "epoch": 0.5409455145605768, "grad_norm": 0.15968433022499084, "learning_rate": 4.4590617241054826e-06, "loss": 0.9119, "step": 74730 }, { "epoch": 0.541017901221163, "grad_norm": 0.17671333253383636, "learning_rate": 4.458989337444896e-06, "loss": 0.9038, "step": 74740 }, { "epoch": 0.5410902878817492, "grad_norm": 0.15897606313228607, "learning_rate": 4.45891695078431e-06, "loss": 0.9038, "step": 74750 }, { "epoch": 0.5411626745423354, "grad_norm": 0.15166926383972168, "learning_rate": 4.458844564123723e-06, "loss": 0.9155, "step": 74760 }, { "epoch": 0.5412350612029215, "grad_norm": 0.14908485114574432, "learning_rate": 4.458772177463138e-06, "loss": 0.9107, "step": 74770 }, { "epoch": 0.5413074478635077, "grad_norm": 0.15309011936187744, "learning_rate": 4.4586997908025515e-06, "loss": 0.9124, "step": 74780 }, { "epoch": 0.5413798345240939, "grad_norm": 0.15603891015052795, "learning_rate": 4.458627404141965e-06, "loss": 0.9081, "step": 74790 }, { "epoch": 0.5414522211846801, "grad_norm": 0.17669987678527832, "learning_rate": 4.458555017481379e-06, "loss": 0.9093, "step": 74800 }, { "epoch": 0.5415246078452662, "grad_norm": 0.1566879153251648, "learning_rate": 4.458482630820793e-06, "loss": 0.9079, "step": 74810 }, { "epoch": 0.5415969945058524, "grad_norm": 0.17799584567546844, "learning_rate": 4.458410244160207e-06, "loss": 0.9145, "step": 74820 }, { "epoch": 0.5416693811664387, "grad_norm": 0.1597883254289627, "learning_rate": 4.4583378574996204e-06, "loss": 0.9082, "step": 74830 }, { "epoch": 0.5417417678270249, "grad_norm": 0.16738228499889374, "learning_rate": 4.458265470839034e-06, "loss": 0.9198, "step": 74840 }, { "epoch": 0.541814154487611, "grad_norm": 0.16331464052200317, "learning_rate": 4.4581930841784485e-06, "loss": 0.9269, "step": 74850 }, { "epoch": 0.5418865411481972, "grad_norm": 0.1695605218410492, "learning_rate": 4.458120697517862e-06, "loss": 0.9043, "step": 74860 }, { "epoch": 0.5419589278087834, "grad_norm": 0.15982206165790558, "learning_rate": 4.458048310857276e-06, "loss": 0.9224, "step": 74870 }, { "epoch": 0.5420313144693696, "grad_norm": 0.16445820033550262, "learning_rate": 4.457975924196689e-06, "loss": 0.9069, "step": 74880 }, { "epoch": 0.5421037011299558, "grad_norm": 0.18443961441516876, "learning_rate": 4.457903537536103e-06, "loss": 0.9108, "step": 74890 }, { "epoch": 0.5421760877905419, "grad_norm": 0.1700538992881775, "learning_rate": 4.457831150875517e-06, "loss": 0.924, "step": 74900 }, { "epoch": 0.5422484744511281, "grad_norm": 0.174394428730011, "learning_rate": 4.45775876421493e-06, "loss": 0.9144, "step": 74910 }, { "epoch": 0.5423208611117143, "grad_norm": 0.1507367342710495, "learning_rate": 4.457686377554345e-06, "loss": 0.9172, "step": 74920 }, { "epoch": 0.5423932477723005, "grad_norm": 0.1828458607196808, "learning_rate": 4.457613990893758e-06, "loss": 0.9018, "step": 74930 }, { "epoch": 0.5424656344328868, "grad_norm": 0.15712609887123108, "learning_rate": 4.457541604233172e-06, "loss": 0.9145, "step": 74940 }, { "epoch": 0.5425380210934729, "grad_norm": 0.1707613319158554, "learning_rate": 4.4574692175725855e-06, "loss": 0.9128, "step": 74950 }, { "epoch": 0.5426104077540591, "grad_norm": 0.17930477857589722, "learning_rate": 4.457396830912e-06, "loss": 0.9068, "step": 74960 }, { "epoch": 0.5426827944146453, "grad_norm": 0.1521557718515396, "learning_rate": 4.457324444251414e-06, "loss": 0.9035, "step": 74970 }, { "epoch": 0.5427551810752315, "grad_norm": 0.17281807959079742, "learning_rate": 4.457252057590827e-06, "loss": 0.9113, "step": 74980 }, { "epoch": 0.5428275677358176, "grad_norm": 0.147269606590271, "learning_rate": 4.457179670930241e-06, "loss": 0.9171, "step": 74990 }, { "epoch": 0.5428999543964038, "grad_norm": 0.1692981719970703, "learning_rate": 4.457107284269655e-06, "loss": 0.9118, "step": 75000 }, { "epoch": 0.54297234105699, "grad_norm": 0.16155540943145752, "learning_rate": 4.457034897609069e-06, "loss": 0.9116, "step": 75010 }, { "epoch": 0.5430447277175762, "grad_norm": 0.15603849291801453, "learning_rate": 4.4569625109484825e-06, "loss": 0.9051, "step": 75020 }, { "epoch": 0.5431171143781623, "grad_norm": 0.16101206839084625, "learning_rate": 4.456890124287896e-06, "loss": 0.9154, "step": 75030 }, { "epoch": 0.5431895010387486, "grad_norm": 0.15974144637584686, "learning_rate": 4.456817737627311e-06, "loss": 0.9182, "step": 75040 }, { "epoch": 0.5432618876993348, "grad_norm": 0.169901043176651, "learning_rate": 4.456745350966724e-06, "loss": 0.9084, "step": 75050 }, { "epoch": 0.543334274359921, "grad_norm": 0.16322177648544312, "learning_rate": 4.456672964306138e-06, "loss": 0.9178, "step": 75060 }, { "epoch": 0.5434066610205072, "grad_norm": 0.16943545639514923, "learning_rate": 4.4566005776455514e-06, "loss": 0.9102, "step": 75070 }, { "epoch": 0.5434790476810933, "grad_norm": 0.16489852964878082, "learning_rate": 4.456528190984966e-06, "loss": 0.9265, "step": 75080 }, { "epoch": 0.5435514343416795, "grad_norm": 0.198415607213974, "learning_rate": 4.4564558043243795e-06, "loss": 0.9159, "step": 75090 }, { "epoch": 0.5436238210022657, "grad_norm": 0.15088145434856415, "learning_rate": 4.456383417663793e-06, "loss": 0.9112, "step": 75100 }, { "epoch": 0.5436962076628519, "grad_norm": 0.1523219645023346, "learning_rate": 4.456311031003207e-06, "loss": 0.9206, "step": 75110 }, { "epoch": 0.543768594323438, "grad_norm": 0.1473480463027954, "learning_rate": 4.456238644342621e-06, "loss": 0.918, "step": 75120 }, { "epoch": 0.5438409809840242, "grad_norm": 0.1520926058292389, "learning_rate": 4.456166257682035e-06, "loss": 0.9063, "step": 75130 }, { "epoch": 0.5439133676446104, "grad_norm": 0.15161623060703278, "learning_rate": 4.4560938710214485e-06, "loss": 0.9107, "step": 75140 }, { "epoch": 0.5439857543051967, "grad_norm": 0.15313850343227386, "learning_rate": 4.456021484360862e-06, "loss": 0.9073, "step": 75150 }, { "epoch": 0.5440581409657829, "grad_norm": 0.14688560366630554, "learning_rate": 4.4559490977002765e-06, "loss": 0.9086, "step": 75160 }, { "epoch": 0.544130527626369, "grad_norm": 0.15404599905014038, "learning_rate": 4.45587671103969e-06, "loss": 0.9174, "step": 75170 }, { "epoch": 0.5442029142869552, "grad_norm": 0.16538633406162262, "learning_rate": 4.455804324379104e-06, "loss": 0.9209, "step": 75180 }, { "epoch": 0.5442753009475414, "grad_norm": 0.16180939972400665, "learning_rate": 4.455731937718517e-06, "loss": 0.9071, "step": 75190 }, { "epoch": 0.5443476876081276, "grad_norm": 0.1648901402950287, "learning_rate": 4.455659551057932e-06, "loss": 0.9119, "step": 75200 }, { "epoch": 0.5444200742687137, "grad_norm": 0.1480080932378769, "learning_rate": 4.4555871643973455e-06, "loss": 0.9057, "step": 75210 }, { "epoch": 0.5444924609292999, "grad_norm": 0.16776438057422638, "learning_rate": 4.455514777736759e-06, "loss": 0.9274, "step": 75220 }, { "epoch": 0.5445648475898861, "grad_norm": 0.1602822095155716, "learning_rate": 4.455442391076173e-06, "loss": 0.9214, "step": 75230 }, { "epoch": 0.5446372342504723, "grad_norm": 0.14920540153980255, "learning_rate": 4.455370004415587e-06, "loss": 0.922, "step": 75240 }, { "epoch": 0.5447096209110585, "grad_norm": 0.14813263714313507, "learning_rate": 4.455297617755001e-06, "loss": 0.9304, "step": 75250 }, { "epoch": 0.5447820075716447, "grad_norm": 0.16396941244602203, "learning_rate": 4.455225231094414e-06, "loss": 0.9075, "step": 75260 }, { "epoch": 0.5448543942322309, "grad_norm": 0.15782274305820465, "learning_rate": 4.455152844433828e-06, "loss": 0.9049, "step": 75270 }, { "epoch": 0.5449267808928171, "grad_norm": 0.15898077189922333, "learning_rate": 4.4550804577732425e-06, "loss": 0.9095, "step": 75280 }, { "epoch": 0.5449991675534033, "grad_norm": 0.15519340336322784, "learning_rate": 4.455008071112656e-06, "loss": 0.9105, "step": 75290 }, { "epoch": 0.5450715542139895, "grad_norm": 0.1506635993719101, "learning_rate": 4.45493568445207e-06, "loss": 0.9096, "step": 75300 }, { "epoch": 0.5451439408745756, "grad_norm": 0.15947678685188293, "learning_rate": 4.454863297791483e-06, "loss": 0.9099, "step": 75310 }, { "epoch": 0.5452163275351618, "grad_norm": 0.1622234433889389, "learning_rate": 4.454790911130897e-06, "loss": 0.9208, "step": 75320 }, { "epoch": 0.545288714195748, "grad_norm": 0.16322208940982819, "learning_rate": 4.454718524470311e-06, "loss": 0.8956, "step": 75330 }, { "epoch": 0.5453611008563342, "grad_norm": 0.16314153373241425, "learning_rate": 4.454646137809725e-06, "loss": 0.9099, "step": 75340 }, { "epoch": 0.5454334875169203, "grad_norm": 0.15650366246700287, "learning_rate": 4.454573751149139e-06, "loss": 0.9143, "step": 75350 }, { "epoch": 0.5455058741775066, "grad_norm": 0.16160249710083008, "learning_rate": 4.454501364488552e-06, "loss": 0.9086, "step": 75360 }, { "epoch": 0.5455782608380928, "grad_norm": 0.18455857038497925, "learning_rate": 4.454428977827967e-06, "loss": 0.916, "step": 75370 }, { "epoch": 0.545650647498679, "grad_norm": 0.16547948122024536, "learning_rate": 4.45435659116738e-06, "loss": 0.9188, "step": 75380 }, { "epoch": 0.5457230341592652, "grad_norm": 0.15037128329277039, "learning_rate": 4.454284204506794e-06, "loss": 0.9107, "step": 75390 }, { "epoch": 0.5457954208198513, "grad_norm": 0.15458130836486816, "learning_rate": 4.4542118178462076e-06, "loss": 0.9162, "step": 75400 }, { "epoch": 0.5458678074804375, "grad_norm": 0.17746815085411072, "learning_rate": 4.454139431185622e-06, "loss": 0.9108, "step": 75410 }, { "epoch": 0.5459401941410237, "grad_norm": 0.15650658309459686, "learning_rate": 4.454067044525035e-06, "loss": 0.9192, "step": 75420 }, { "epoch": 0.5460125808016099, "grad_norm": 0.16250117123126984, "learning_rate": 4.453994657864449e-06, "loss": 0.9132, "step": 75430 }, { "epoch": 0.546084967462196, "grad_norm": 0.1670369803905487, "learning_rate": 4.453922271203863e-06, "loss": 0.9135, "step": 75440 }, { "epoch": 0.5461573541227822, "grad_norm": 1.3092010021209717, "learning_rate": 4.4538498845432765e-06, "loss": 0.9078, "step": 75450 }, { "epoch": 0.5462297407833684, "grad_norm": 0.17468515038490295, "learning_rate": 4.45377749788269e-06, "loss": 0.9231, "step": 75460 }, { "epoch": 0.5463021274439547, "grad_norm": 0.17053230106830597, "learning_rate": 4.453705111222104e-06, "loss": 0.9126, "step": 75470 }, { "epoch": 0.5463745141045409, "grad_norm": 0.17557454109191895, "learning_rate": 4.453632724561518e-06, "loss": 0.9228, "step": 75480 }, { "epoch": 0.546446900765127, "grad_norm": 0.17532938718795776, "learning_rate": 4.453560337900932e-06, "loss": 0.899, "step": 75490 }, { "epoch": 0.5465192874257132, "grad_norm": 0.15332704782485962, "learning_rate": 4.453487951240345e-06, "loss": 0.9157, "step": 75500 }, { "epoch": 0.5465916740862994, "grad_norm": 0.18341203033924103, "learning_rate": 4.453415564579759e-06, "loss": 0.9216, "step": 75510 }, { "epoch": 0.5466640607468856, "grad_norm": 0.15997199714183807, "learning_rate": 4.4533431779191735e-06, "loss": 0.9244, "step": 75520 }, { "epoch": 0.5467364474074717, "grad_norm": 0.1543131321668625, "learning_rate": 4.453270791258587e-06, "loss": 0.9075, "step": 75530 }, { "epoch": 0.5468088340680579, "grad_norm": 0.16920112073421478, "learning_rate": 4.453198404598001e-06, "loss": 0.912, "step": 75540 }, { "epoch": 0.5468812207286441, "grad_norm": 0.15737563371658325, "learning_rate": 4.453126017937414e-06, "loss": 0.9132, "step": 75550 }, { "epoch": 0.5469536073892303, "grad_norm": 0.175185427069664, "learning_rate": 4.453053631276829e-06, "loss": 0.9211, "step": 75560 }, { "epoch": 0.5470259940498166, "grad_norm": 0.163910910487175, "learning_rate": 4.4529812446162424e-06, "loss": 0.9075, "step": 75570 }, { "epoch": 0.5470983807104027, "grad_norm": 0.185495063662529, "learning_rate": 4.452908857955656e-06, "loss": 0.929, "step": 75580 }, { "epoch": 0.5471707673709889, "grad_norm": 0.159888356924057, "learning_rate": 4.45283647129507e-06, "loss": 0.9159, "step": 75590 }, { "epoch": 0.5472431540315751, "grad_norm": 0.14591063559055328, "learning_rate": 4.452764084634484e-06, "loss": 0.9206, "step": 75600 }, { "epoch": 0.5473155406921613, "grad_norm": 0.16549073159694672, "learning_rate": 4.452691697973898e-06, "loss": 0.8996, "step": 75610 }, { "epoch": 0.5473879273527474, "grad_norm": 0.16032549738883972, "learning_rate": 4.452619311313311e-06, "loss": 0.9198, "step": 75620 }, { "epoch": 0.5474603140133336, "grad_norm": 0.15921436250209808, "learning_rate": 4.452546924652725e-06, "loss": 0.8986, "step": 75630 }, { "epoch": 0.5475327006739198, "grad_norm": 0.15605443716049194, "learning_rate": 4.4524745379921394e-06, "loss": 0.911, "step": 75640 }, { "epoch": 0.547605087334506, "grad_norm": 0.1790502518415451, "learning_rate": 4.452402151331553e-06, "loss": 0.9153, "step": 75650 }, { "epoch": 0.5476774739950921, "grad_norm": 0.16580620408058167, "learning_rate": 4.452329764670967e-06, "loss": 0.9162, "step": 75660 }, { "epoch": 0.5477498606556783, "grad_norm": 0.19182774424552917, "learning_rate": 4.45225737801038e-06, "loss": 0.9188, "step": 75670 }, { "epoch": 0.5478222473162646, "grad_norm": 0.16452908515930176, "learning_rate": 4.452184991349795e-06, "loss": 0.9187, "step": 75680 }, { "epoch": 0.5478946339768508, "grad_norm": 0.15444496273994446, "learning_rate": 4.452112604689208e-06, "loss": 0.9169, "step": 75690 }, { "epoch": 0.547967020637437, "grad_norm": 0.149451345205307, "learning_rate": 4.452040218028622e-06, "loss": 0.9236, "step": 75700 }, { "epoch": 0.5480394072980231, "grad_norm": 0.16976690292358398, "learning_rate": 4.451967831368036e-06, "loss": 0.9086, "step": 75710 }, { "epoch": 0.5481117939586093, "grad_norm": 0.14885394275188446, "learning_rate": 4.45189544470745e-06, "loss": 0.9012, "step": 75720 }, { "epoch": 0.5481841806191955, "grad_norm": 0.1846446394920349, "learning_rate": 4.451823058046864e-06, "loss": 0.9183, "step": 75730 }, { "epoch": 0.5482565672797817, "grad_norm": 0.15907852351665497, "learning_rate": 4.451750671386277e-06, "loss": 0.9219, "step": 75740 }, { "epoch": 0.5483289539403678, "grad_norm": 0.1452861726284027, "learning_rate": 4.451678284725691e-06, "loss": 0.9102, "step": 75750 }, { "epoch": 0.548401340600954, "grad_norm": 0.16507689654827118, "learning_rate": 4.451605898065105e-06, "loss": 0.9243, "step": 75760 }, { "epoch": 0.5484737272615402, "grad_norm": 0.15828417241573334, "learning_rate": 4.451533511404519e-06, "loss": 0.9028, "step": 75770 }, { "epoch": 0.5485461139221264, "grad_norm": 0.18392127752304077, "learning_rate": 4.451461124743933e-06, "loss": 0.908, "step": 75780 }, { "epoch": 0.5486185005827127, "grad_norm": 0.15240317583084106, "learning_rate": 4.451388738083346e-06, "loss": 0.9013, "step": 75790 }, { "epoch": 0.5486908872432988, "grad_norm": 0.14687864482402802, "learning_rate": 4.451316351422761e-06, "loss": 0.9184, "step": 75800 }, { "epoch": 0.548763273903885, "grad_norm": 0.15710905194282532, "learning_rate": 4.451243964762174e-06, "loss": 0.9165, "step": 75810 }, { "epoch": 0.5488356605644712, "grad_norm": 0.1660161018371582, "learning_rate": 4.451171578101588e-06, "loss": 0.9045, "step": 75820 }, { "epoch": 0.5489080472250574, "grad_norm": 0.1465510129928589, "learning_rate": 4.4510991914410015e-06, "loss": 0.8998, "step": 75830 }, { "epoch": 0.5489804338856435, "grad_norm": 0.17025746405124664, "learning_rate": 4.451026804780416e-06, "loss": 0.9093, "step": 75840 }, { "epoch": 0.5490528205462297, "grad_norm": 0.18562918901443481, "learning_rate": 4.45095441811983e-06, "loss": 0.9195, "step": 75850 }, { "epoch": 0.5491252072068159, "grad_norm": 0.1641816943883896, "learning_rate": 4.450882031459243e-06, "loss": 0.9039, "step": 75860 }, { "epoch": 0.5491975938674021, "grad_norm": 0.19206029176712036, "learning_rate": 4.450809644798657e-06, "loss": 0.9224, "step": 75870 }, { "epoch": 0.5492699805279883, "grad_norm": 0.15584112703800201, "learning_rate": 4.450737258138071e-06, "loss": 0.9171, "step": 75880 }, { "epoch": 0.5493423671885745, "grad_norm": 0.1531420797109604, "learning_rate": 4.450664871477485e-06, "loss": 0.8936, "step": 75890 }, { "epoch": 0.5494147538491607, "grad_norm": 0.17042067646980286, "learning_rate": 4.4505924848168985e-06, "loss": 0.9002, "step": 75900 }, { "epoch": 0.5494871405097469, "grad_norm": 0.15222638845443726, "learning_rate": 4.450520098156312e-06, "loss": 0.9058, "step": 75910 }, { "epoch": 0.5495595271703331, "grad_norm": 0.1593593806028366, "learning_rate": 4.450447711495726e-06, "loss": 0.8956, "step": 75920 }, { "epoch": 0.5496319138309192, "grad_norm": 0.16259454190731049, "learning_rate": 4.45037532483514e-06, "loss": 0.9124, "step": 75930 }, { "epoch": 0.5497043004915054, "grad_norm": 0.23078764975070953, "learning_rate": 4.450302938174554e-06, "loss": 0.9127, "step": 75940 }, { "epoch": 0.5497766871520916, "grad_norm": 0.1593347191810608, "learning_rate": 4.4502305515139675e-06, "loss": 0.8963, "step": 75950 }, { "epoch": 0.5498490738126778, "grad_norm": 0.1672598272562027, "learning_rate": 4.450158164853381e-06, "loss": 0.9212, "step": 75960 }, { "epoch": 0.549921460473264, "grad_norm": 0.1680530607700348, "learning_rate": 4.450085778192795e-06, "loss": 0.9194, "step": 75970 }, { "epoch": 0.5499938471338501, "grad_norm": 0.15082144737243652, "learning_rate": 4.450013391532208e-06, "loss": 0.9028, "step": 75980 }, { "epoch": 0.5500662337944363, "grad_norm": 0.1934494823217392, "learning_rate": 4.449941004871623e-06, "loss": 0.907, "step": 75990 }, { "epoch": 0.5501386204550226, "grad_norm": 0.15488490462303162, "learning_rate": 4.449868618211036e-06, "loss": 0.9231, "step": 76000 }, { "epoch": 0.5502110071156088, "grad_norm": 0.1557077169418335, "learning_rate": 4.44979623155045e-06, "loss": 0.9252, "step": 76010 }, { "epoch": 0.550283393776195, "grad_norm": 0.16591419279575348, "learning_rate": 4.449723844889864e-06, "loss": 0.9057, "step": 76020 }, { "epoch": 0.5503557804367811, "grad_norm": 0.16087841987609863, "learning_rate": 4.449651458229278e-06, "loss": 0.9173, "step": 76030 }, { "epoch": 0.5504281670973673, "grad_norm": 0.16236597299575806, "learning_rate": 4.449579071568692e-06, "loss": 0.9211, "step": 76040 }, { "epoch": 0.5505005537579535, "grad_norm": 0.17655915021896362, "learning_rate": 4.449506684908105e-06, "loss": 0.9211, "step": 76050 }, { "epoch": 0.5505729404185397, "grad_norm": 0.1689653992652893, "learning_rate": 4.449434298247519e-06, "loss": 0.9139, "step": 76060 }, { "epoch": 0.5506453270791258, "grad_norm": 0.16405482590198517, "learning_rate": 4.449361911586933e-06, "loss": 0.9165, "step": 76070 }, { "epoch": 0.550717713739712, "grad_norm": 0.16635659337043762, "learning_rate": 4.449289524926347e-06, "loss": 0.8991, "step": 76080 }, { "epoch": 0.5507901004002982, "grad_norm": 0.1841815710067749, "learning_rate": 4.449217138265761e-06, "loss": 0.9175, "step": 76090 }, { "epoch": 0.5508624870608844, "grad_norm": 0.15842583775520325, "learning_rate": 4.449144751605174e-06, "loss": 0.9144, "step": 76100 }, { "epoch": 0.5509348737214707, "grad_norm": 0.16369058191776276, "learning_rate": 4.449072364944588e-06, "loss": 0.9071, "step": 76110 }, { "epoch": 0.5510072603820568, "grad_norm": 0.16025404632091522, "learning_rate": 4.448999978284002e-06, "loss": 0.9108, "step": 76120 }, { "epoch": 0.551079647042643, "grad_norm": 0.15726248919963837, "learning_rate": 4.448927591623416e-06, "loss": 0.9048, "step": 76130 }, { "epoch": 0.5511520337032292, "grad_norm": 0.149103045463562, "learning_rate": 4.4488552049628296e-06, "loss": 0.9041, "step": 76140 }, { "epoch": 0.5512244203638154, "grad_norm": 0.1646459996700287, "learning_rate": 4.448782818302243e-06, "loss": 0.9207, "step": 76150 }, { "epoch": 0.5512968070244015, "grad_norm": 0.16346558928489685, "learning_rate": 4.448710431641658e-06, "loss": 0.9152, "step": 76160 }, { "epoch": 0.5513691936849877, "grad_norm": 0.15449324250221252, "learning_rate": 4.448638044981071e-06, "loss": 0.9028, "step": 76170 }, { "epoch": 0.5514415803455739, "grad_norm": 0.1610867977142334, "learning_rate": 4.448565658320485e-06, "loss": 0.9255, "step": 76180 }, { "epoch": 0.5515139670061601, "grad_norm": 0.15043221414089203, "learning_rate": 4.4484932716598985e-06, "loss": 0.9077, "step": 76190 }, { "epoch": 0.5515863536667462, "grad_norm": 0.18590010702610016, "learning_rate": 4.448420884999313e-06, "loss": 0.9125, "step": 76200 }, { "epoch": 0.5516587403273325, "grad_norm": 0.16317640244960785, "learning_rate": 4.4483484983387266e-06, "loss": 0.9219, "step": 76210 }, { "epoch": 0.5517311269879187, "grad_norm": 0.15565603971481323, "learning_rate": 4.44827611167814e-06, "loss": 0.9128, "step": 76220 }, { "epoch": 0.5518035136485049, "grad_norm": 0.1605430394411087, "learning_rate": 4.448203725017554e-06, "loss": 0.9104, "step": 76230 }, { "epoch": 0.5518759003090911, "grad_norm": 0.17234712839126587, "learning_rate": 4.448131338356968e-06, "loss": 0.9054, "step": 76240 }, { "epoch": 0.5519482869696772, "grad_norm": 0.15464408695697784, "learning_rate": 4.448058951696382e-06, "loss": 0.9115, "step": 76250 }, { "epoch": 0.5520206736302634, "grad_norm": 0.15122352540493011, "learning_rate": 4.4479865650357955e-06, "loss": 0.9039, "step": 76260 }, { "epoch": 0.5520930602908496, "grad_norm": 0.1647047996520996, "learning_rate": 4.447914178375209e-06, "loss": 0.9161, "step": 76270 }, { "epoch": 0.5521654469514358, "grad_norm": 0.16340136528015137, "learning_rate": 4.4478417917146236e-06, "loss": 0.9097, "step": 76280 }, { "epoch": 0.5522378336120219, "grad_norm": 0.1576181948184967, "learning_rate": 4.447769405054037e-06, "loss": 0.9111, "step": 76290 }, { "epoch": 0.5523102202726081, "grad_norm": 0.17520330846309662, "learning_rate": 4.447697018393451e-06, "loss": 0.9019, "step": 76300 }, { "epoch": 0.5523826069331943, "grad_norm": 0.16088417172431946, "learning_rate": 4.447624631732864e-06, "loss": 0.9159, "step": 76310 }, { "epoch": 0.5524549935937806, "grad_norm": 0.15434597432613373, "learning_rate": 4.447552245072279e-06, "loss": 0.9033, "step": 76320 }, { "epoch": 0.5525273802543668, "grad_norm": 0.16620974242687225, "learning_rate": 4.4474798584116925e-06, "loss": 0.9203, "step": 76330 }, { "epoch": 0.5525997669149529, "grad_norm": 0.159925177693367, "learning_rate": 4.447407471751106e-06, "loss": 0.9086, "step": 76340 }, { "epoch": 0.5526721535755391, "grad_norm": 0.19963112473487854, "learning_rate": 4.44733508509052e-06, "loss": 0.9212, "step": 76350 }, { "epoch": 0.5527445402361253, "grad_norm": 0.1715710163116455, "learning_rate": 4.447262698429934e-06, "loss": 0.9205, "step": 76360 }, { "epoch": 0.5528169268967115, "grad_norm": 0.1506585329771042, "learning_rate": 4.447190311769348e-06, "loss": 0.9063, "step": 76370 }, { "epoch": 0.5528893135572976, "grad_norm": 0.16564472019672394, "learning_rate": 4.4471179251087614e-06, "loss": 0.9322, "step": 76380 }, { "epoch": 0.5529617002178838, "grad_norm": 0.1629706174135208, "learning_rate": 4.447045538448175e-06, "loss": 0.8981, "step": 76390 }, { "epoch": 0.55303408687847, "grad_norm": 0.15394346415996552, "learning_rate": 4.4469731517875895e-06, "loss": 0.918, "step": 76400 }, { "epoch": 0.5531064735390562, "grad_norm": 0.1582847237586975, "learning_rate": 4.446900765127003e-06, "loss": 0.9083, "step": 76410 }, { "epoch": 0.5531788601996425, "grad_norm": 0.16456353664398193, "learning_rate": 4.446828378466417e-06, "loss": 0.9225, "step": 76420 }, { "epoch": 0.5532512468602286, "grad_norm": 0.1710321456193924, "learning_rate": 4.44675599180583e-06, "loss": 0.916, "step": 76430 }, { "epoch": 0.5533236335208148, "grad_norm": 0.15686561167240143, "learning_rate": 4.446683605145245e-06, "loss": 0.9092, "step": 76440 }, { "epoch": 0.553396020181401, "grad_norm": 0.15640173852443695, "learning_rate": 4.4466112184846584e-06, "loss": 0.9061, "step": 76450 }, { "epoch": 0.5534684068419872, "grad_norm": 0.1626739203929901, "learning_rate": 4.446538831824072e-06, "loss": 0.9156, "step": 76460 }, { "epoch": 0.5535407935025733, "grad_norm": 0.15522697567939758, "learning_rate": 4.446466445163486e-06, "loss": 0.8936, "step": 76470 }, { "epoch": 0.5536131801631595, "grad_norm": 0.15257471799850464, "learning_rate": 4.446394058502899e-06, "loss": 0.9053, "step": 76480 }, { "epoch": 0.5536855668237457, "grad_norm": 0.16405083239078522, "learning_rate": 4.446321671842313e-06, "loss": 0.9124, "step": 76490 }, { "epoch": 0.5537579534843319, "grad_norm": 0.1680813431739807, "learning_rate": 4.4462492851817265e-06, "loss": 0.9134, "step": 76500 }, { "epoch": 0.553830340144918, "grad_norm": 0.16853106021881104, "learning_rate": 4.446176898521141e-06, "loss": 0.926, "step": 76510 }, { "epoch": 0.5539027268055042, "grad_norm": 0.164813831448555, "learning_rate": 4.446104511860555e-06, "loss": 0.9091, "step": 76520 }, { "epoch": 0.5539751134660905, "grad_norm": 0.1515646129846573, "learning_rate": 4.446032125199968e-06, "loss": 0.9083, "step": 76530 }, { "epoch": 0.5540475001266767, "grad_norm": 0.16372545063495636, "learning_rate": 4.445959738539382e-06, "loss": 0.8965, "step": 76540 }, { "epoch": 0.5541198867872629, "grad_norm": 0.1647547334432602, "learning_rate": 4.445887351878796e-06, "loss": 0.8986, "step": 76550 }, { "epoch": 0.554192273447849, "grad_norm": 0.1702655404806137, "learning_rate": 4.44581496521821e-06, "loss": 0.9111, "step": 76560 }, { "epoch": 0.5542646601084352, "grad_norm": 0.1671968400478363, "learning_rate": 4.4457425785576235e-06, "loss": 0.9038, "step": 76570 }, { "epoch": 0.5543370467690214, "grad_norm": 0.23342488706111908, "learning_rate": 4.445670191897037e-06, "loss": 0.8895, "step": 76580 }, { "epoch": 0.5544094334296076, "grad_norm": 0.17012055218219757, "learning_rate": 4.445597805236452e-06, "loss": 0.9223, "step": 76590 }, { "epoch": 0.5544818200901938, "grad_norm": 0.153640478849411, "learning_rate": 4.445525418575865e-06, "loss": 0.9189, "step": 76600 }, { "epoch": 0.5545542067507799, "grad_norm": 0.1719810962677002, "learning_rate": 4.445453031915279e-06, "loss": 0.8992, "step": 76610 }, { "epoch": 0.5546265934113661, "grad_norm": 0.16828425228595734, "learning_rate": 4.4453806452546925e-06, "loss": 0.9154, "step": 76620 }, { "epoch": 0.5546989800719523, "grad_norm": 0.1485888808965683, "learning_rate": 4.445308258594107e-06, "loss": 0.9044, "step": 76630 }, { "epoch": 0.5547713667325386, "grad_norm": 0.15807074308395386, "learning_rate": 4.4452358719335205e-06, "loss": 0.9193, "step": 76640 }, { "epoch": 0.5548437533931248, "grad_norm": 0.2003334015607834, "learning_rate": 4.445163485272934e-06, "loss": 0.9226, "step": 76650 }, { "epoch": 0.5549161400537109, "grad_norm": 0.16058491170406342, "learning_rate": 4.445091098612348e-06, "loss": 0.9208, "step": 76660 }, { "epoch": 0.5549885267142971, "grad_norm": 0.22712364792823792, "learning_rate": 4.445018711951762e-06, "loss": 0.9116, "step": 76670 }, { "epoch": 0.5550609133748833, "grad_norm": 0.16164818406105042, "learning_rate": 4.444946325291176e-06, "loss": 0.9143, "step": 76680 }, { "epoch": 0.5551333000354695, "grad_norm": 0.1746838092803955, "learning_rate": 4.4448739386305895e-06, "loss": 0.9179, "step": 76690 }, { "epoch": 0.5552056866960556, "grad_norm": 0.1547100841999054, "learning_rate": 4.444801551970003e-06, "loss": 0.9122, "step": 76700 }, { "epoch": 0.5552780733566418, "grad_norm": 0.1596509963274002, "learning_rate": 4.444729165309417e-06, "loss": 0.908, "step": 76710 }, { "epoch": 0.555350460017228, "grad_norm": 0.16364139318466187, "learning_rate": 4.444656778648831e-06, "loss": 0.9158, "step": 76720 }, { "epoch": 0.5554228466778142, "grad_norm": 0.17858783900737762, "learning_rate": 4.444584391988245e-06, "loss": 0.9134, "step": 76730 }, { "epoch": 0.5554952333384005, "grad_norm": 0.14799803495407104, "learning_rate": 4.444512005327658e-06, "loss": 0.9138, "step": 76740 }, { "epoch": 0.5555676199989866, "grad_norm": 0.156327486038208, "learning_rate": 4.444439618667072e-06, "loss": 0.9222, "step": 76750 }, { "epoch": 0.5556400066595728, "grad_norm": 0.16460415720939636, "learning_rate": 4.4443672320064865e-06, "loss": 0.9107, "step": 76760 }, { "epoch": 0.555712393320159, "grad_norm": 0.1606915295124054, "learning_rate": 4.4442948453459e-06, "loss": 0.8984, "step": 76770 }, { "epoch": 0.5557847799807452, "grad_norm": 0.15452681481838226, "learning_rate": 4.444222458685314e-06, "loss": 0.8986, "step": 76780 }, { "epoch": 0.5558571666413313, "grad_norm": 0.16996990144252777, "learning_rate": 4.444150072024727e-06, "loss": 0.919, "step": 76790 }, { "epoch": 0.5559295533019175, "grad_norm": 0.16095289587974548, "learning_rate": 4.444077685364142e-06, "loss": 0.9196, "step": 76800 }, { "epoch": 0.5560019399625037, "grad_norm": 0.16664938628673553, "learning_rate": 4.444005298703555e-06, "loss": 0.915, "step": 76810 }, { "epoch": 0.5560743266230899, "grad_norm": 0.15813925862312317, "learning_rate": 4.443932912042969e-06, "loss": 0.9096, "step": 76820 }, { "epoch": 0.556146713283676, "grad_norm": 0.1628894805908203, "learning_rate": 4.443860525382383e-06, "loss": 0.9181, "step": 76830 }, { "epoch": 0.5562190999442622, "grad_norm": 0.16485193371772766, "learning_rate": 4.443788138721797e-06, "loss": 0.8998, "step": 76840 }, { "epoch": 0.5562914866048485, "grad_norm": 0.15740840137004852, "learning_rate": 4.443715752061211e-06, "loss": 0.9215, "step": 76850 }, { "epoch": 0.5563638732654347, "grad_norm": 0.16431400179862976, "learning_rate": 4.443643365400624e-06, "loss": 0.9179, "step": 76860 }, { "epoch": 0.5564362599260209, "grad_norm": 0.17064224183559418, "learning_rate": 4.443570978740038e-06, "loss": 0.9115, "step": 76870 }, { "epoch": 0.556508646586607, "grad_norm": 0.15299154818058014, "learning_rate": 4.443498592079452e-06, "loss": 0.9046, "step": 76880 }, { "epoch": 0.5565810332471932, "grad_norm": 0.19055993854999542, "learning_rate": 4.443426205418866e-06, "loss": 0.9223, "step": 76890 }, { "epoch": 0.5566534199077794, "grad_norm": 0.16058650612831116, "learning_rate": 4.44335381875828e-06, "loss": 0.9083, "step": 76900 }, { "epoch": 0.5567258065683656, "grad_norm": 0.15559203922748566, "learning_rate": 4.443281432097693e-06, "loss": 0.9158, "step": 76910 }, { "epoch": 0.5567981932289517, "grad_norm": 0.4795348346233368, "learning_rate": 4.443209045437108e-06, "loss": 0.9116, "step": 76920 }, { "epoch": 0.5568705798895379, "grad_norm": 0.1700964719057083, "learning_rate": 4.443136658776521e-06, "loss": 0.907, "step": 76930 }, { "epoch": 0.5569429665501241, "grad_norm": 0.15994440019130707, "learning_rate": 4.443064272115935e-06, "loss": 0.9222, "step": 76940 }, { "epoch": 0.5570153532107104, "grad_norm": 0.17749464511871338, "learning_rate": 4.4429918854553486e-06, "loss": 0.9154, "step": 76950 }, { "epoch": 0.5570877398712966, "grad_norm": 0.16541603207588196, "learning_rate": 4.442919498794763e-06, "loss": 0.9177, "step": 76960 }, { "epoch": 0.5571601265318827, "grad_norm": 0.16451773047447205, "learning_rate": 4.442847112134177e-06, "loss": 0.9144, "step": 76970 }, { "epoch": 0.5572325131924689, "grad_norm": 0.16148695349693298, "learning_rate": 4.44277472547359e-06, "loss": 0.9084, "step": 76980 }, { "epoch": 0.5573048998530551, "grad_norm": 0.18457120656967163, "learning_rate": 4.442702338813004e-06, "loss": 0.9008, "step": 76990 }, { "epoch": 0.5573772865136413, "grad_norm": 0.1575312465429306, "learning_rate": 4.442629952152418e-06, "loss": 0.9129, "step": 77000 }, { "epoch": 0.5574496731742274, "grad_norm": 0.16345492005348206, "learning_rate": 4.442557565491831e-06, "loss": 0.9194, "step": 77010 }, { "epoch": 0.5575220598348136, "grad_norm": 0.17905868589878082, "learning_rate": 4.442485178831245e-06, "loss": 0.9271, "step": 77020 }, { "epoch": 0.5575944464953998, "grad_norm": 0.15946638584136963, "learning_rate": 4.442412792170659e-06, "loss": 0.9158, "step": 77030 }, { "epoch": 0.557666833155986, "grad_norm": 0.1656288057565689, "learning_rate": 4.442340405510073e-06, "loss": 0.897, "step": 77040 }, { "epoch": 0.5577392198165722, "grad_norm": 0.15466462075710297, "learning_rate": 4.442268018849486e-06, "loss": 0.9194, "step": 77050 }, { "epoch": 0.5578116064771584, "grad_norm": 0.17180196940898895, "learning_rate": 4.4421956321889e-06, "loss": 0.9056, "step": 77060 }, { "epoch": 0.5578839931377446, "grad_norm": 0.16797685623168945, "learning_rate": 4.4421232455283145e-06, "loss": 0.9069, "step": 77070 }, { "epoch": 0.5579563797983308, "grad_norm": 0.15991143882274628, "learning_rate": 4.442050858867728e-06, "loss": 0.9102, "step": 77080 }, { "epoch": 0.558028766458917, "grad_norm": 0.1536579132080078, "learning_rate": 4.441978472207142e-06, "loss": 0.9134, "step": 77090 }, { "epoch": 0.5581011531195031, "grad_norm": 0.15524393320083618, "learning_rate": 4.441906085546555e-06, "loss": 0.9237, "step": 77100 }, { "epoch": 0.5581735397800893, "grad_norm": 0.15506607294082642, "learning_rate": 4.44183369888597e-06, "loss": 0.925, "step": 77110 }, { "epoch": 0.5582459264406755, "grad_norm": 0.1472669243812561, "learning_rate": 4.4417613122253834e-06, "loss": 0.8972, "step": 77120 }, { "epoch": 0.5583183131012617, "grad_norm": 0.17821435630321503, "learning_rate": 4.441688925564797e-06, "loss": 0.9146, "step": 77130 }, { "epoch": 0.5583906997618479, "grad_norm": 0.152136892080307, "learning_rate": 4.441616538904211e-06, "loss": 0.9101, "step": 77140 }, { "epoch": 0.558463086422434, "grad_norm": 0.16555529832839966, "learning_rate": 4.441544152243625e-06, "loss": 0.915, "step": 77150 }, { "epoch": 0.5585354730830202, "grad_norm": 0.18034303188323975, "learning_rate": 4.441471765583039e-06, "loss": 0.9174, "step": 77160 }, { "epoch": 0.5586078597436065, "grad_norm": 0.1521337479352951, "learning_rate": 4.441399378922452e-06, "loss": 0.9194, "step": 77170 }, { "epoch": 0.5586802464041927, "grad_norm": 0.1529371738433838, "learning_rate": 4.441326992261866e-06, "loss": 0.893, "step": 77180 }, { "epoch": 0.5587526330647788, "grad_norm": 0.18080340325832367, "learning_rate": 4.4412546056012804e-06, "loss": 0.921, "step": 77190 }, { "epoch": 0.558825019725365, "grad_norm": 0.15729157626628876, "learning_rate": 4.441182218940694e-06, "loss": 0.905, "step": 77200 }, { "epoch": 0.5588974063859512, "grad_norm": 0.16402077674865723, "learning_rate": 4.441109832280108e-06, "loss": 0.9141, "step": 77210 }, { "epoch": 0.5589697930465374, "grad_norm": 0.14309856295585632, "learning_rate": 4.441037445619521e-06, "loss": 0.909, "step": 77220 }, { "epoch": 0.5590421797071236, "grad_norm": 0.168483704328537, "learning_rate": 4.440965058958936e-06, "loss": 0.9038, "step": 77230 }, { "epoch": 0.5591145663677097, "grad_norm": 0.15855084359645844, "learning_rate": 4.440892672298349e-06, "loss": 0.9107, "step": 77240 }, { "epoch": 0.5591869530282959, "grad_norm": 0.2141026258468628, "learning_rate": 4.440820285637763e-06, "loss": 0.917, "step": 77250 }, { "epoch": 0.5592593396888821, "grad_norm": 0.14907251298427582, "learning_rate": 4.440747898977177e-06, "loss": 0.9142, "step": 77260 }, { "epoch": 0.5593317263494684, "grad_norm": 0.16853876411914825, "learning_rate": 4.440675512316591e-06, "loss": 0.9273, "step": 77270 }, { "epoch": 0.5594041130100545, "grad_norm": 0.15877121686935425, "learning_rate": 4.440603125656005e-06, "loss": 0.9126, "step": 77280 }, { "epoch": 0.5594764996706407, "grad_norm": 0.1590542197227478, "learning_rate": 4.440530738995418e-06, "loss": 0.9254, "step": 77290 }, { "epoch": 0.5595488863312269, "grad_norm": 0.15544173121452332, "learning_rate": 4.440458352334832e-06, "loss": 0.9253, "step": 77300 }, { "epoch": 0.5596212729918131, "grad_norm": 0.15453557670116425, "learning_rate": 4.440385965674246e-06, "loss": 0.9185, "step": 77310 }, { "epoch": 0.5596936596523993, "grad_norm": 0.16094282269477844, "learning_rate": 4.44031357901366e-06, "loss": 0.9194, "step": 77320 }, { "epoch": 0.5597660463129854, "grad_norm": 0.15466651320457458, "learning_rate": 4.440241192353074e-06, "loss": 0.9163, "step": 77330 }, { "epoch": 0.5598384329735716, "grad_norm": 0.1567269265651703, "learning_rate": 4.440168805692487e-06, "loss": 0.9179, "step": 77340 }, { "epoch": 0.5599108196341578, "grad_norm": 0.2014339566230774, "learning_rate": 4.440096419031901e-06, "loss": 0.9073, "step": 77350 }, { "epoch": 0.559983206294744, "grad_norm": 0.16425397992134094, "learning_rate": 4.440024032371315e-06, "loss": 0.8897, "step": 77360 }, { "epoch": 0.5600555929553301, "grad_norm": 0.16576462984085083, "learning_rate": 4.439951645710729e-06, "loss": 0.9062, "step": 77370 }, { "epoch": 0.5601279796159164, "grad_norm": 0.15141081809997559, "learning_rate": 4.4398792590501425e-06, "loss": 0.9214, "step": 77380 }, { "epoch": 0.5602003662765026, "grad_norm": 0.16057878732681274, "learning_rate": 4.439806872389556e-06, "loss": 0.9085, "step": 77390 }, { "epoch": 0.5602727529370888, "grad_norm": 0.1669279783964157, "learning_rate": 4.439734485728971e-06, "loss": 0.9171, "step": 77400 }, { "epoch": 0.560345139597675, "grad_norm": 0.17052066326141357, "learning_rate": 4.439662099068384e-06, "loss": 0.9034, "step": 77410 }, { "epoch": 0.5604175262582611, "grad_norm": 0.16968189179897308, "learning_rate": 4.439589712407798e-06, "loss": 0.9182, "step": 77420 }, { "epoch": 0.5604899129188473, "grad_norm": 0.1690019816160202, "learning_rate": 4.4395173257472115e-06, "loss": 0.9208, "step": 77430 }, { "epoch": 0.5605622995794335, "grad_norm": 0.22552861273288727, "learning_rate": 4.439444939086626e-06, "loss": 0.9157, "step": 77440 }, { "epoch": 0.5606346862400197, "grad_norm": 0.16138851642608643, "learning_rate": 4.4393725524260395e-06, "loss": 0.9054, "step": 77450 }, { "epoch": 0.5607070729006058, "grad_norm": 0.15333735942840576, "learning_rate": 4.439300165765453e-06, "loss": 0.9084, "step": 77460 }, { "epoch": 0.560779459561192, "grad_norm": 0.26510128378868103, "learning_rate": 4.439227779104867e-06, "loss": 0.9149, "step": 77470 }, { "epoch": 0.5608518462217783, "grad_norm": 0.14910390973091125, "learning_rate": 4.439155392444281e-06, "loss": 0.9052, "step": 77480 }, { "epoch": 0.5609242328823645, "grad_norm": 0.15675272047519684, "learning_rate": 4.439083005783695e-06, "loss": 0.9103, "step": 77490 }, { "epoch": 0.5609966195429507, "grad_norm": 0.16790616512298584, "learning_rate": 4.4390106191231085e-06, "loss": 0.8932, "step": 77500 }, { "epoch": 0.5610690062035368, "grad_norm": 0.15522590279579163, "learning_rate": 4.438938232462522e-06, "loss": 0.9203, "step": 77510 }, { "epoch": 0.561141392864123, "grad_norm": 0.1486007571220398, "learning_rate": 4.4388658458019365e-06, "loss": 0.893, "step": 77520 }, { "epoch": 0.5612137795247092, "grad_norm": 0.16432353854179382, "learning_rate": 4.43879345914135e-06, "loss": 0.9105, "step": 77530 }, { "epoch": 0.5612861661852954, "grad_norm": 0.16237597167491913, "learning_rate": 4.438721072480763e-06, "loss": 0.9097, "step": 77540 }, { "epoch": 0.5613585528458815, "grad_norm": 0.15193864703178406, "learning_rate": 4.438648685820177e-06, "loss": 0.9242, "step": 77550 }, { "epoch": 0.5614309395064677, "grad_norm": 0.18787966668605804, "learning_rate": 4.438576299159591e-06, "loss": 0.9021, "step": 77560 }, { "epoch": 0.5615033261670539, "grad_norm": 0.17147335410118103, "learning_rate": 4.438503912499005e-06, "loss": 0.9167, "step": 77570 }, { "epoch": 0.5615757128276401, "grad_norm": 0.16556903719902039, "learning_rate": 4.438431525838418e-06, "loss": 0.9053, "step": 77580 }, { "epoch": 0.5616480994882264, "grad_norm": 0.17590190470218658, "learning_rate": 4.438359139177833e-06, "loss": 0.9204, "step": 77590 }, { "epoch": 0.5617204861488125, "grad_norm": 0.173880934715271, "learning_rate": 4.438286752517246e-06, "loss": 0.9062, "step": 77600 }, { "epoch": 0.5617928728093987, "grad_norm": 0.16164129972457886, "learning_rate": 4.43821436585666e-06, "loss": 0.9107, "step": 77610 }, { "epoch": 0.5618652594699849, "grad_norm": 0.1576722264289856, "learning_rate": 4.4381419791960736e-06, "loss": 0.9131, "step": 77620 }, { "epoch": 0.5619376461305711, "grad_norm": 0.17891205847263336, "learning_rate": 4.438069592535488e-06, "loss": 0.9132, "step": 77630 }, { "epoch": 0.5620100327911572, "grad_norm": 0.149394229054451, "learning_rate": 4.437997205874902e-06, "loss": 0.9029, "step": 77640 }, { "epoch": 0.5620824194517434, "grad_norm": 0.1493150144815445, "learning_rate": 4.437924819214315e-06, "loss": 0.9246, "step": 77650 }, { "epoch": 0.5621548061123296, "grad_norm": 0.1570667326450348, "learning_rate": 4.437852432553729e-06, "loss": 0.9175, "step": 77660 }, { "epoch": 0.5622271927729158, "grad_norm": 0.1554926335811615, "learning_rate": 4.437780045893143e-06, "loss": 0.907, "step": 77670 }, { "epoch": 0.562299579433502, "grad_norm": 0.18290413916110992, "learning_rate": 4.437707659232557e-06, "loss": 0.9032, "step": 77680 }, { "epoch": 0.5623719660940881, "grad_norm": 0.1693306416273117, "learning_rate": 4.4376352725719706e-06, "loss": 0.9073, "step": 77690 }, { "epoch": 0.5624443527546744, "grad_norm": 0.16810953617095947, "learning_rate": 4.437562885911384e-06, "loss": 0.9203, "step": 77700 }, { "epoch": 0.5625167394152606, "grad_norm": 0.1598738580942154, "learning_rate": 4.437490499250799e-06, "loss": 0.919, "step": 77710 }, { "epoch": 0.5625891260758468, "grad_norm": 0.17527292668819427, "learning_rate": 4.437418112590212e-06, "loss": 0.9263, "step": 77720 }, { "epoch": 0.562661512736433, "grad_norm": 0.14632010459899902, "learning_rate": 4.437345725929626e-06, "loss": 0.9096, "step": 77730 }, { "epoch": 0.5627338993970191, "grad_norm": 0.16045227646827698, "learning_rate": 4.4372733392690395e-06, "loss": 0.9228, "step": 77740 }, { "epoch": 0.5628062860576053, "grad_norm": 0.1670956164598465, "learning_rate": 4.437200952608454e-06, "loss": 0.9122, "step": 77750 }, { "epoch": 0.5628786727181915, "grad_norm": 0.15484373271465302, "learning_rate": 4.4371285659478676e-06, "loss": 0.9101, "step": 77760 }, { "epoch": 0.5629510593787777, "grad_norm": 0.1494317650794983, "learning_rate": 4.437056179287281e-06, "loss": 0.9119, "step": 77770 }, { "epoch": 0.5630234460393638, "grad_norm": 0.15059392154216766, "learning_rate": 4.436983792626695e-06, "loss": 0.9092, "step": 77780 }, { "epoch": 0.56309583269995, "grad_norm": 0.19551579654216766, "learning_rate": 4.436911405966109e-06, "loss": 0.9181, "step": 77790 }, { "epoch": 0.5631682193605363, "grad_norm": 0.14669376611709595, "learning_rate": 4.436839019305523e-06, "loss": 0.9141, "step": 77800 }, { "epoch": 0.5632406060211225, "grad_norm": 0.16056394577026367, "learning_rate": 4.4367666326449365e-06, "loss": 0.9127, "step": 77810 }, { "epoch": 0.5633129926817086, "grad_norm": 0.15070192515850067, "learning_rate": 4.43669424598435e-06, "loss": 0.8972, "step": 77820 }, { "epoch": 0.5633853793422948, "grad_norm": 0.16001105308532715, "learning_rate": 4.436621859323765e-06, "loss": 0.9019, "step": 77830 }, { "epoch": 0.563457766002881, "grad_norm": 0.14969342947006226, "learning_rate": 4.436549472663178e-06, "loss": 0.9138, "step": 77840 }, { "epoch": 0.5635301526634672, "grad_norm": 0.15888501703739166, "learning_rate": 4.436477086002592e-06, "loss": 0.9168, "step": 77850 }, { "epoch": 0.5636025393240534, "grad_norm": 0.16971467435359955, "learning_rate": 4.4364046993420054e-06, "loss": 0.9011, "step": 77860 }, { "epoch": 0.5636749259846395, "grad_norm": 0.18575704097747803, "learning_rate": 4.43633231268142e-06, "loss": 0.8922, "step": 77870 }, { "epoch": 0.5637473126452257, "grad_norm": 0.161865696310997, "learning_rate": 4.4362599260208335e-06, "loss": 0.9045, "step": 77880 }, { "epoch": 0.5638196993058119, "grad_norm": 0.22121845185756683, "learning_rate": 4.436187539360247e-06, "loss": 0.9184, "step": 77890 }, { "epoch": 0.5638920859663981, "grad_norm": 0.15823763608932495, "learning_rate": 4.436115152699661e-06, "loss": 0.898, "step": 77900 }, { "epoch": 0.5639644726269843, "grad_norm": 0.1676093190908432, "learning_rate": 4.436042766039075e-06, "loss": 0.9173, "step": 77910 }, { "epoch": 0.5640368592875705, "grad_norm": 0.15236294269561768, "learning_rate": 4.435970379378489e-06, "loss": 0.9052, "step": 77920 }, { "epoch": 0.5641092459481567, "grad_norm": 0.16066612303256989, "learning_rate": 4.4358979927179024e-06, "loss": 0.9102, "step": 77930 }, { "epoch": 0.5641816326087429, "grad_norm": 0.162504181265831, "learning_rate": 4.435825606057316e-06, "loss": 0.8935, "step": 77940 }, { "epoch": 0.564254019269329, "grad_norm": 0.1564251035451889, "learning_rate": 4.4357532193967305e-06, "loss": 0.9219, "step": 77950 }, { "epoch": 0.5643264059299152, "grad_norm": 0.14474442601203918, "learning_rate": 4.435680832736144e-06, "loss": 0.9118, "step": 77960 }, { "epoch": 0.5643987925905014, "grad_norm": 0.1615808606147766, "learning_rate": 4.435608446075558e-06, "loss": 0.9025, "step": 77970 }, { "epoch": 0.5644711792510876, "grad_norm": 0.16067107021808624, "learning_rate": 4.435536059414971e-06, "loss": 0.9064, "step": 77980 }, { "epoch": 0.5645435659116738, "grad_norm": 0.1663515865802765, "learning_rate": 4.435463672754385e-06, "loss": 0.8929, "step": 77990 }, { "epoch": 0.5646159525722599, "grad_norm": 0.15532717108726501, "learning_rate": 4.4353912860937994e-06, "loss": 0.9231, "step": 78000 }, { "epoch": 0.5646883392328462, "grad_norm": 0.16401223838329315, "learning_rate": 4.435318899433213e-06, "loss": 0.9048, "step": 78010 }, { "epoch": 0.5647607258934324, "grad_norm": 0.15295349061489105, "learning_rate": 4.435246512772627e-06, "loss": 0.9135, "step": 78020 }, { "epoch": 0.5648331125540186, "grad_norm": 0.16399818658828735, "learning_rate": 4.43517412611204e-06, "loss": 0.9169, "step": 78030 }, { "epoch": 0.5649054992146048, "grad_norm": 0.15173108875751495, "learning_rate": 4.435101739451455e-06, "loss": 0.9184, "step": 78040 }, { "epoch": 0.5649778858751909, "grad_norm": 0.15851818025112152, "learning_rate": 4.435029352790868e-06, "loss": 0.907, "step": 78050 }, { "epoch": 0.5650502725357771, "grad_norm": 0.14833968877792358, "learning_rate": 4.434956966130282e-06, "loss": 0.9057, "step": 78060 }, { "epoch": 0.5651226591963633, "grad_norm": 0.15308476984500885, "learning_rate": 4.434884579469696e-06, "loss": 0.9124, "step": 78070 }, { "epoch": 0.5651950458569495, "grad_norm": 0.1614348590373993, "learning_rate": 4.434812192809109e-06, "loss": 0.9196, "step": 78080 }, { "epoch": 0.5652674325175356, "grad_norm": 0.15268519520759583, "learning_rate": 4.434739806148523e-06, "loss": 0.9167, "step": 78090 }, { "epoch": 0.5653398191781218, "grad_norm": 0.20000995695590973, "learning_rate": 4.434667419487937e-06, "loss": 0.8982, "step": 78100 }, { "epoch": 0.565412205838708, "grad_norm": 0.19799144566059113, "learning_rate": 4.434595032827351e-06, "loss": 0.9041, "step": 78110 }, { "epoch": 0.5654845924992943, "grad_norm": 0.16405373811721802, "learning_rate": 4.4345226461667645e-06, "loss": 0.9125, "step": 78120 }, { "epoch": 0.5655569791598805, "grad_norm": 0.14727067947387695, "learning_rate": 4.434450259506178e-06, "loss": 0.9143, "step": 78130 }, { "epoch": 0.5656293658204666, "grad_norm": 0.15156599879264832, "learning_rate": 4.434377872845592e-06, "loss": 0.9086, "step": 78140 }, { "epoch": 0.5657017524810528, "grad_norm": 0.16756990551948547, "learning_rate": 4.434305486185006e-06, "loss": 0.919, "step": 78150 }, { "epoch": 0.565774139141639, "grad_norm": 0.159501850605011, "learning_rate": 4.43423309952442e-06, "loss": 0.8931, "step": 78160 }, { "epoch": 0.5658465258022252, "grad_norm": 0.1752566546201706, "learning_rate": 4.4341607128638335e-06, "loss": 0.9072, "step": 78170 }, { "epoch": 0.5659189124628113, "grad_norm": 0.15048715472221375, "learning_rate": 4.434088326203247e-06, "loss": 0.8956, "step": 78180 }, { "epoch": 0.5659912991233975, "grad_norm": 0.18843425810337067, "learning_rate": 4.4340159395426615e-06, "loss": 0.9209, "step": 78190 }, { "epoch": 0.5660636857839837, "grad_norm": 0.23466771841049194, "learning_rate": 4.433943552882075e-06, "loss": 0.9072, "step": 78200 }, { "epoch": 0.5661360724445699, "grad_norm": 0.1651633083820343, "learning_rate": 4.433871166221489e-06, "loss": 0.9061, "step": 78210 }, { "epoch": 0.566208459105156, "grad_norm": 0.18153083324432373, "learning_rate": 4.433798779560902e-06, "loss": 0.9043, "step": 78220 }, { "epoch": 0.5662808457657423, "grad_norm": 0.14733073115348816, "learning_rate": 4.433726392900317e-06, "loss": 0.9011, "step": 78230 }, { "epoch": 0.5663532324263285, "grad_norm": 0.16242343187332153, "learning_rate": 4.4336540062397305e-06, "loss": 0.9121, "step": 78240 }, { "epoch": 0.5664256190869147, "grad_norm": 0.2215704470872879, "learning_rate": 4.433581619579144e-06, "loss": 0.9137, "step": 78250 }, { "epoch": 0.5664980057475009, "grad_norm": 0.15027424693107605, "learning_rate": 4.433509232918558e-06, "loss": 0.911, "step": 78260 }, { "epoch": 0.566570392408087, "grad_norm": 0.15450255572795868, "learning_rate": 4.433436846257972e-06, "loss": 0.9016, "step": 78270 }, { "epoch": 0.5666427790686732, "grad_norm": 0.15546143054962158, "learning_rate": 4.433364459597386e-06, "loss": 0.9196, "step": 78280 }, { "epoch": 0.5667151657292594, "grad_norm": 0.2212473452091217, "learning_rate": 4.433292072936799e-06, "loss": 0.9095, "step": 78290 }, { "epoch": 0.5667875523898456, "grad_norm": 0.21143324673175812, "learning_rate": 4.433219686276213e-06, "loss": 0.9151, "step": 78300 }, { "epoch": 0.5668599390504317, "grad_norm": 0.15912073850631714, "learning_rate": 4.4331472996156275e-06, "loss": 0.9131, "step": 78310 }, { "epoch": 0.5669323257110179, "grad_norm": 0.16560253500938416, "learning_rate": 4.433074912955041e-06, "loss": 0.9257, "step": 78320 }, { "epoch": 0.5670047123716042, "grad_norm": 0.15693403780460358, "learning_rate": 4.433002526294455e-06, "loss": 0.9085, "step": 78330 }, { "epoch": 0.5670770990321904, "grad_norm": 0.15633919835090637, "learning_rate": 4.432930139633868e-06, "loss": 0.9304, "step": 78340 }, { "epoch": 0.5671494856927766, "grad_norm": 0.2166070193052292, "learning_rate": 4.432857752973283e-06, "loss": 0.9092, "step": 78350 }, { "epoch": 0.5672218723533627, "grad_norm": 0.154709130525589, "learning_rate": 4.432785366312696e-06, "loss": 0.9122, "step": 78360 }, { "epoch": 0.5672942590139489, "grad_norm": 0.1582053154706955, "learning_rate": 4.43271297965211e-06, "loss": 0.9011, "step": 78370 }, { "epoch": 0.5673666456745351, "grad_norm": 0.16059152781963348, "learning_rate": 4.432640592991524e-06, "loss": 0.9082, "step": 78380 }, { "epoch": 0.5674390323351213, "grad_norm": 0.1610398292541504, "learning_rate": 4.432568206330938e-06, "loss": 0.9072, "step": 78390 }, { "epoch": 0.5675114189957075, "grad_norm": 0.1508665531873703, "learning_rate": 4.432495819670352e-06, "loss": 0.9011, "step": 78400 }, { "epoch": 0.5675838056562936, "grad_norm": 0.16580963134765625, "learning_rate": 4.432423433009765e-06, "loss": 0.9156, "step": 78410 }, { "epoch": 0.5676561923168798, "grad_norm": 0.17013192176818848, "learning_rate": 4.432351046349179e-06, "loss": 0.9119, "step": 78420 }, { "epoch": 0.567728578977466, "grad_norm": 0.15810072422027588, "learning_rate": 4.432278659688593e-06, "loss": 0.9018, "step": 78430 }, { "epoch": 0.5678009656380523, "grad_norm": 0.1526464819908142, "learning_rate": 4.432206273028007e-06, "loss": 0.9058, "step": 78440 }, { "epoch": 0.5678733522986384, "grad_norm": 0.15462712943553925, "learning_rate": 4.432133886367421e-06, "loss": 0.9117, "step": 78450 }, { "epoch": 0.5679457389592246, "grad_norm": 0.1546926200389862, "learning_rate": 4.432061499706834e-06, "loss": 0.9001, "step": 78460 }, { "epoch": 0.5680181256198108, "grad_norm": 0.152653768658638, "learning_rate": 4.431989113046249e-06, "loss": 0.8992, "step": 78470 }, { "epoch": 0.568090512280397, "grad_norm": 0.20859988033771515, "learning_rate": 4.431916726385662e-06, "loss": 0.9182, "step": 78480 }, { "epoch": 0.5681628989409832, "grad_norm": 0.16723498702049255, "learning_rate": 4.431844339725076e-06, "loss": 0.898, "step": 78490 }, { "epoch": 0.5682352856015693, "grad_norm": 0.16271011531352997, "learning_rate": 4.4317719530644896e-06, "loss": 0.9084, "step": 78500 }, { "epoch": 0.5683076722621555, "grad_norm": 0.15765948593616486, "learning_rate": 4.431699566403904e-06, "loss": 0.9245, "step": 78510 }, { "epoch": 0.5683800589227417, "grad_norm": 0.15575428307056427, "learning_rate": 4.431627179743318e-06, "loss": 0.9282, "step": 78520 }, { "epoch": 0.5684524455833279, "grad_norm": 0.17210975289344788, "learning_rate": 4.431554793082731e-06, "loss": 0.9234, "step": 78530 }, { "epoch": 0.5685248322439141, "grad_norm": 0.15548361837863922, "learning_rate": 4.431482406422145e-06, "loss": 0.9141, "step": 78540 }, { "epoch": 0.5685972189045003, "grad_norm": 0.15747930109500885, "learning_rate": 4.431410019761559e-06, "loss": 0.9105, "step": 78550 }, { "epoch": 0.5686696055650865, "grad_norm": 0.19277207553386688, "learning_rate": 4.431337633100973e-06, "loss": 0.8969, "step": 78560 }, { "epoch": 0.5687419922256727, "grad_norm": 0.15888191759586334, "learning_rate": 4.431265246440387e-06, "loss": 0.9133, "step": 78570 }, { "epoch": 0.5688143788862589, "grad_norm": 0.18703103065490723, "learning_rate": 4.4311928597798e-06, "loss": 0.9204, "step": 78580 }, { "epoch": 0.568886765546845, "grad_norm": 0.15291064977645874, "learning_rate": 4.431120473119214e-06, "loss": 0.9055, "step": 78590 }, { "epoch": 0.5689591522074312, "grad_norm": 0.15586498379707336, "learning_rate": 4.4310480864586274e-06, "loss": 0.9007, "step": 78600 }, { "epoch": 0.5690315388680174, "grad_norm": 0.17706800997257233, "learning_rate": 4.430975699798041e-06, "loss": 0.9223, "step": 78610 }, { "epoch": 0.5691039255286036, "grad_norm": 0.1869611293077469, "learning_rate": 4.4309033131374555e-06, "loss": 0.9168, "step": 78620 }, { "epoch": 0.5691763121891897, "grad_norm": 0.20068153738975525, "learning_rate": 4.430830926476869e-06, "loss": 0.9092, "step": 78630 }, { "epoch": 0.5692486988497759, "grad_norm": 0.15373751521110535, "learning_rate": 4.430758539816283e-06, "loss": 0.9068, "step": 78640 }, { "epoch": 0.5693210855103622, "grad_norm": 0.161079540848732, "learning_rate": 4.430686153155696e-06, "loss": 0.9072, "step": 78650 }, { "epoch": 0.5693934721709484, "grad_norm": 0.17405648529529572, "learning_rate": 4.430613766495111e-06, "loss": 0.9152, "step": 78660 }, { "epoch": 0.5694658588315346, "grad_norm": 0.20295019447803497, "learning_rate": 4.4305413798345244e-06, "loss": 0.9121, "step": 78670 }, { "epoch": 0.5695382454921207, "grad_norm": 0.16853387653827667, "learning_rate": 4.430468993173938e-06, "loss": 0.9134, "step": 78680 }, { "epoch": 0.5696106321527069, "grad_norm": 0.21727709472179413, "learning_rate": 4.430396606513352e-06, "loss": 0.9049, "step": 78690 }, { "epoch": 0.5696830188132931, "grad_norm": 0.15580429136753082, "learning_rate": 4.430324219852766e-06, "loss": 0.9035, "step": 78700 }, { "epoch": 0.5697554054738793, "grad_norm": 0.1704913228750229, "learning_rate": 4.43025183319218e-06, "loss": 0.9269, "step": 78710 }, { "epoch": 0.5698277921344654, "grad_norm": 0.1460624784231186, "learning_rate": 4.430179446531593e-06, "loss": 0.9113, "step": 78720 }, { "epoch": 0.5699001787950516, "grad_norm": 0.16129523515701294, "learning_rate": 4.430107059871007e-06, "loss": 0.9033, "step": 78730 }, { "epoch": 0.5699725654556378, "grad_norm": 0.15936952829360962, "learning_rate": 4.4300346732104214e-06, "loss": 0.9164, "step": 78740 }, { "epoch": 0.570044952116224, "grad_norm": 0.15949364006519318, "learning_rate": 4.429962286549835e-06, "loss": 0.9013, "step": 78750 }, { "epoch": 0.5701173387768103, "grad_norm": 0.16319353878498077, "learning_rate": 4.429889899889249e-06, "loss": 0.9086, "step": 78760 }, { "epoch": 0.5701897254373964, "grad_norm": 0.16823211312294006, "learning_rate": 4.429817513228662e-06, "loss": 0.9163, "step": 78770 }, { "epoch": 0.5702621120979826, "grad_norm": 0.16069024801254272, "learning_rate": 4.429745126568076e-06, "loss": 0.9148, "step": 78780 }, { "epoch": 0.5703344987585688, "grad_norm": 0.15689004957675934, "learning_rate": 4.42967273990749e-06, "loss": 0.9087, "step": 78790 }, { "epoch": 0.570406885419155, "grad_norm": 0.19310441613197327, "learning_rate": 4.429600353246904e-06, "loss": 0.9031, "step": 78800 }, { "epoch": 0.5704792720797411, "grad_norm": 0.15190792083740234, "learning_rate": 4.429527966586318e-06, "loss": 0.908, "step": 78810 }, { "epoch": 0.5705516587403273, "grad_norm": 0.1643807291984558, "learning_rate": 4.429455579925731e-06, "loss": 0.9052, "step": 78820 }, { "epoch": 0.5706240454009135, "grad_norm": 0.16099394857883453, "learning_rate": 4.429383193265146e-06, "loss": 0.8997, "step": 78830 }, { "epoch": 0.5706964320614997, "grad_norm": 0.1543867588043213, "learning_rate": 4.429310806604559e-06, "loss": 0.9139, "step": 78840 }, { "epoch": 0.5707688187220858, "grad_norm": 0.1512497216463089, "learning_rate": 4.429238419943973e-06, "loss": 0.8972, "step": 78850 }, { "epoch": 0.5708412053826721, "grad_norm": 0.1591501533985138, "learning_rate": 4.4291660332833865e-06, "loss": 0.9097, "step": 78860 }, { "epoch": 0.5709135920432583, "grad_norm": 0.1647246778011322, "learning_rate": 4.429093646622801e-06, "loss": 0.903, "step": 78870 }, { "epoch": 0.5709859787038445, "grad_norm": 0.1652226597070694, "learning_rate": 4.429021259962215e-06, "loss": 0.9128, "step": 78880 }, { "epoch": 0.5710583653644307, "grad_norm": 0.16023515164852142, "learning_rate": 4.428948873301628e-06, "loss": 0.9068, "step": 78890 }, { "epoch": 0.5711307520250168, "grad_norm": 0.16677772998809814, "learning_rate": 4.428876486641042e-06, "loss": 0.9067, "step": 78900 }, { "epoch": 0.571203138685603, "grad_norm": 0.15754565596580505, "learning_rate": 4.428804099980456e-06, "loss": 0.8981, "step": 78910 }, { "epoch": 0.5712755253461892, "grad_norm": 0.1568695604801178, "learning_rate": 4.42873171331987e-06, "loss": 0.9127, "step": 78920 }, { "epoch": 0.5713479120067754, "grad_norm": 0.14470389485359192, "learning_rate": 4.4286593266592835e-06, "loss": 0.908, "step": 78930 }, { "epoch": 0.5714202986673615, "grad_norm": 0.17773394286632538, "learning_rate": 4.428586939998697e-06, "loss": 0.9145, "step": 78940 }, { "epoch": 0.5714926853279477, "grad_norm": 0.18091265857219696, "learning_rate": 4.428514553338112e-06, "loss": 0.9129, "step": 78950 }, { "epoch": 0.5715650719885339, "grad_norm": 0.17283041775226593, "learning_rate": 4.428442166677525e-06, "loss": 0.9103, "step": 78960 }, { "epoch": 0.5716374586491202, "grad_norm": 0.17135843634605408, "learning_rate": 4.428369780016939e-06, "loss": 0.9144, "step": 78970 }, { "epoch": 0.5717098453097064, "grad_norm": 0.15438847243785858, "learning_rate": 4.4282973933563525e-06, "loss": 0.9039, "step": 78980 }, { "epoch": 0.5717822319702925, "grad_norm": 0.16643427312374115, "learning_rate": 4.428225006695767e-06, "loss": 0.9042, "step": 78990 }, { "epoch": 0.5718546186308787, "grad_norm": 0.17289410531520844, "learning_rate": 4.4281526200351805e-06, "loss": 0.909, "step": 79000 }, { "epoch": 0.5719270052914649, "grad_norm": 0.15444664657115936, "learning_rate": 4.428080233374594e-06, "loss": 0.9129, "step": 79010 }, { "epoch": 0.5719993919520511, "grad_norm": 0.1560271829366684, "learning_rate": 4.428007846714008e-06, "loss": 0.9051, "step": 79020 }, { "epoch": 0.5720717786126372, "grad_norm": 0.1660393625497818, "learning_rate": 4.427935460053422e-06, "loss": 0.9067, "step": 79030 }, { "epoch": 0.5721441652732234, "grad_norm": 0.16919827461242676, "learning_rate": 4.427863073392836e-06, "loss": 0.9075, "step": 79040 }, { "epoch": 0.5722165519338096, "grad_norm": 0.16163021326065063, "learning_rate": 4.4277906867322495e-06, "loss": 0.9167, "step": 79050 }, { "epoch": 0.5722889385943958, "grad_norm": 0.15363195538520813, "learning_rate": 4.427718300071663e-06, "loss": 0.9091, "step": 79060 }, { "epoch": 0.5723613252549821, "grad_norm": 0.22884926199913025, "learning_rate": 4.4276459134110776e-06, "loss": 0.9061, "step": 79070 }, { "epoch": 0.5724337119155682, "grad_norm": 0.17008480429649353, "learning_rate": 4.427573526750491e-06, "loss": 0.8936, "step": 79080 }, { "epoch": 0.5725060985761544, "grad_norm": 0.1554049849510193, "learning_rate": 4.427501140089905e-06, "loss": 0.9041, "step": 79090 }, { "epoch": 0.5725784852367406, "grad_norm": 0.15605086088180542, "learning_rate": 4.427428753429318e-06, "loss": 0.9061, "step": 79100 }, { "epoch": 0.5726508718973268, "grad_norm": 0.18999440968036652, "learning_rate": 4.427356366768733e-06, "loss": 0.918, "step": 79110 }, { "epoch": 0.572723258557913, "grad_norm": 0.14956019818782806, "learning_rate": 4.4272839801081465e-06, "loss": 0.9107, "step": 79120 }, { "epoch": 0.5727956452184991, "grad_norm": 0.16016794741153717, "learning_rate": 4.427211593447559e-06, "loss": 0.9077, "step": 79130 }, { "epoch": 0.5728680318790853, "grad_norm": 0.16640208661556244, "learning_rate": 4.427139206786974e-06, "loss": 0.9131, "step": 79140 }, { "epoch": 0.5729404185396715, "grad_norm": 0.14928004145622253, "learning_rate": 4.427066820126387e-06, "loss": 0.9216, "step": 79150 }, { "epoch": 0.5730128052002577, "grad_norm": 0.17138387262821198, "learning_rate": 4.426994433465801e-06, "loss": 0.912, "step": 79160 }, { "epoch": 0.5730851918608438, "grad_norm": 0.16592368483543396, "learning_rate": 4.4269220468052146e-06, "loss": 0.9082, "step": 79170 }, { "epoch": 0.5731575785214301, "grad_norm": 0.15400424599647522, "learning_rate": 4.426849660144629e-06, "loss": 0.8956, "step": 79180 }, { "epoch": 0.5732299651820163, "grad_norm": 0.16034607589244843, "learning_rate": 4.426777273484043e-06, "loss": 0.9104, "step": 79190 }, { "epoch": 0.5733023518426025, "grad_norm": 0.16131481528282166, "learning_rate": 4.426704886823456e-06, "loss": 0.9076, "step": 79200 }, { "epoch": 0.5733747385031887, "grad_norm": 0.15663005411624908, "learning_rate": 4.42663250016287e-06, "loss": 0.9027, "step": 79210 }, { "epoch": 0.5734471251637748, "grad_norm": 0.15593023598194122, "learning_rate": 4.426560113502284e-06, "loss": 0.8946, "step": 79220 }, { "epoch": 0.573519511824361, "grad_norm": 0.15859454870224, "learning_rate": 4.426487726841698e-06, "loss": 0.908, "step": 79230 }, { "epoch": 0.5735918984849472, "grad_norm": 0.2031136453151703, "learning_rate": 4.4264153401811116e-06, "loss": 0.903, "step": 79240 }, { "epoch": 0.5736642851455334, "grad_norm": 0.1812455952167511, "learning_rate": 4.426342953520525e-06, "loss": 0.9156, "step": 79250 }, { "epoch": 0.5737366718061195, "grad_norm": 0.16677896678447723, "learning_rate": 4.42627056685994e-06, "loss": 0.9107, "step": 79260 }, { "epoch": 0.5738090584667057, "grad_norm": 0.19645458459854126, "learning_rate": 4.426198180199353e-06, "loss": 0.9055, "step": 79270 }, { "epoch": 0.5738814451272919, "grad_norm": 0.15948496758937836, "learning_rate": 4.426125793538767e-06, "loss": 0.9226, "step": 79280 }, { "epoch": 0.5739538317878782, "grad_norm": 0.152080699801445, "learning_rate": 4.4260534068781805e-06, "loss": 0.9106, "step": 79290 }, { "epoch": 0.5740262184484644, "grad_norm": 0.16088615357875824, "learning_rate": 4.425981020217595e-06, "loss": 0.9034, "step": 79300 }, { "epoch": 0.5740986051090505, "grad_norm": 0.1661704033613205, "learning_rate": 4.425908633557009e-06, "loss": 0.9064, "step": 79310 }, { "epoch": 0.5741709917696367, "grad_norm": 0.175064355134964, "learning_rate": 4.425836246896422e-06, "loss": 0.9014, "step": 79320 }, { "epoch": 0.5742433784302229, "grad_norm": 0.1588856726884842, "learning_rate": 4.425763860235836e-06, "loss": 0.904, "step": 79330 }, { "epoch": 0.5743157650908091, "grad_norm": 0.16363567113876343, "learning_rate": 4.42569147357525e-06, "loss": 0.9076, "step": 79340 }, { "epoch": 0.5743881517513952, "grad_norm": 0.1526729166507721, "learning_rate": 4.425619086914664e-06, "loss": 0.9174, "step": 79350 }, { "epoch": 0.5744605384119814, "grad_norm": 0.15896102786064148, "learning_rate": 4.4255467002540775e-06, "loss": 0.9168, "step": 79360 }, { "epoch": 0.5745329250725676, "grad_norm": 0.15912756323814392, "learning_rate": 4.425474313593491e-06, "loss": 0.9176, "step": 79370 }, { "epoch": 0.5746053117331538, "grad_norm": 0.15687337517738342, "learning_rate": 4.425401926932905e-06, "loss": 0.9177, "step": 79380 }, { "epoch": 0.57467769839374, "grad_norm": 0.1485118865966797, "learning_rate": 4.425329540272319e-06, "loss": 0.9097, "step": 79390 }, { "epoch": 0.5747500850543262, "grad_norm": 0.1573365032672882, "learning_rate": 4.425257153611733e-06, "loss": 0.9106, "step": 79400 }, { "epoch": 0.5748224717149124, "grad_norm": 0.16917355358600616, "learning_rate": 4.4251847669511464e-06, "loss": 0.9, "step": 79410 }, { "epoch": 0.5748948583754986, "grad_norm": 0.15793545544147491, "learning_rate": 4.42511238029056e-06, "loss": 0.9028, "step": 79420 }, { "epoch": 0.5749672450360848, "grad_norm": 0.16056816279888153, "learning_rate": 4.4250399936299745e-06, "loss": 0.9103, "step": 79430 }, { "epoch": 0.5750396316966709, "grad_norm": 0.21023601293563843, "learning_rate": 4.424967606969388e-06, "loss": 0.9131, "step": 79440 }, { "epoch": 0.5751120183572571, "grad_norm": 0.16455906629562378, "learning_rate": 4.424895220308802e-06, "loss": 0.9136, "step": 79450 }, { "epoch": 0.5751844050178433, "grad_norm": 0.15624572336673737, "learning_rate": 4.424822833648215e-06, "loss": 0.8955, "step": 79460 }, { "epoch": 0.5752567916784295, "grad_norm": 0.15977784991264343, "learning_rate": 4.42475044698763e-06, "loss": 0.9127, "step": 79470 }, { "epoch": 0.5753291783390156, "grad_norm": 0.15706151723861694, "learning_rate": 4.4246780603270434e-06, "loss": 0.9218, "step": 79480 }, { "epoch": 0.5754015649996018, "grad_norm": 0.1580117791891098, "learning_rate": 4.424605673666457e-06, "loss": 0.9063, "step": 79490 }, { "epoch": 0.5754739516601881, "grad_norm": 0.1586533933877945, "learning_rate": 4.424533287005871e-06, "loss": 0.9001, "step": 79500 }, { "epoch": 0.5755463383207743, "grad_norm": 0.15484578907489777, "learning_rate": 4.424460900345285e-06, "loss": 0.9188, "step": 79510 }, { "epoch": 0.5756187249813605, "grad_norm": 0.16581545770168304, "learning_rate": 4.424388513684699e-06, "loss": 0.9029, "step": 79520 }, { "epoch": 0.5756911116419466, "grad_norm": 0.2024676501750946, "learning_rate": 4.424316127024112e-06, "loss": 0.9066, "step": 79530 }, { "epoch": 0.5757634983025328, "grad_norm": 0.17313936352729797, "learning_rate": 4.424243740363526e-06, "loss": 0.9075, "step": 79540 }, { "epoch": 0.575835884963119, "grad_norm": 0.15734641253948212, "learning_rate": 4.4241713537029404e-06, "loss": 0.891, "step": 79550 }, { "epoch": 0.5759082716237052, "grad_norm": 0.16273874044418335, "learning_rate": 4.424098967042354e-06, "loss": 0.8987, "step": 79560 }, { "epoch": 0.5759806582842913, "grad_norm": 0.1653600037097931, "learning_rate": 4.424026580381768e-06, "loss": 0.9086, "step": 79570 }, { "epoch": 0.5760530449448775, "grad_norm": 0.14946460723876953, "learning_rate": 4.423954193721181e-06, "loss": 0.9125, "step": 79580 }, { "epoch": 0.5761254316054637, "grad_norm": 0.15209373831748962, "learning_rate": 4.423881807060596e-06, "loss": 0.9055, "step": 79590 }, { "epoch": 0.5761978182660499, "grad_norm": 0.15624649822711945, "learning_rate": 4.423809420400009e-06, "loss": 0.9077, "step": 79600 }, { "epoch": 0.5762702049266362, "grad_norm": 0.15583081543445587, "learning_rate": 4.423737033739423e-06, "loss": 0.8949, "step": 79610 }, { "epoch": 0.5763425915872223, "grad_norm": 0.17833974957466125, "learning_rate": 4.423664647078837e-06, "loss": 0.9099, "step": 79620 }, { "epoch": 0.5764149782478085, "grad_norm": 0.15480463206768036, "learning_rate": 4.423592260418251e-06, "loss": 0.9146, "step": 79630 }, { "epoch": 0.5764873649083947, "grad_norm": 0.17085537314414978, "learning_rate": 4.423519873757665e-06, "loss": 0.9139, "step": 79640 }, { "epoch": 0.5765597515689809, "grad_norm": 0.14300264418125153, "learning_rate": 4.423447487097078e-06, "loss": 0.9212, "step": 79650 }, { "epoch": 0.576632138229567, "grad_norm": 0.1670181304216385, "learning_rate": 4.423375100436492e-06, "loss": 0.8988, "step": 79660 }, { "epoch": 0.5767045248901532, "grad_norm": 0.16258497536182404, "learning_rate": 4.4233027137759055e-06, "loss": 0.9033, "step": 79670 }, { "epoch": 0.5767769115507394, "grad_norm": 0.16200661659240723, "learning_rate": 4.423230327115319e-06, "loss": 0.9015, "step": 79680 }, { "epoch": 0.5768492982113256, "grad_norm": 0.1577218770980835, "learning_rate": 4.423157940454733e-06, "loss": 0.9133, "step": 79690 }, { "epoch": 0.5769216848719118, "grad_norm": 0.1620890349149704, "learning_rate": 4.423085553794147e-06, "loss": 0.9146, "step": 79700 }, { "epoch": 0.576994071532498, "grad_norm": 0.18708209693431854, "learning_rate": 4.423013167133561e-06, "loss": 0.8999, "step": 79710 }, { "epoch": 0.5770664581930842, "grad_norm": 0.1733100414276123, "learning_rate": 4.4229407804729745e-06, "loss": 0.9122, "step": 79720 }, { "epoch": 0.5771388448536704, "grad_norm": 0.15812858939170837, "learning_rate": 4.422868393812388e-06, "loss": 0.907, "step": 79730 }, { "epoch": 0.5772112315142566, "grad_norm": 0.1608811616897583, "learning_rate": 4.4227960071518025e-06, "loss": 0.9055, "step": 79740 }, { "epoch": 0.5772836181748427, "grad_norm": 0.17216205596923828, "learning_rate": 4.422723620491216e-06, "loss": 0.919, "step": 79750 }, { "epoch": 0.5773560048354289, "grad_norm": 0.36230647563934326, "learning_rate": 4.42265123383063e-06, "loss": 0.9139, "step": 79760 }, { "epoch": 0.5774283914960151, "grad_norm": 0.15506798028945923, "learning_rate": 4.422578847170043e-06, "loss": 0.9009, "step": 79770 }, { "epoch": 0.5775007781566013, "grad_norm": 0.1626974493265152, "learning_rate": 4.422506460509458e-06, "loss": 0.8987, "step": 79780 }, { "epoch": 0.5775731648171875, "grad_norm": 0.1675325185060501, "learning_rate": 4.4224340738488715e-06, "loss": 0.9121, "step": 79790 }, { "epoch": 0.5776455514777736, "grad_norm": 0.15305018424987793, "learning_rate": 4.422361687188285e-06, "loss": 0.9116, "step": 79800 }, { "epoch": 0.5777179381383598, "grad_norm": 0.16215454041957855, "learning_rate": 4.422289300527699e-06, "loss": 0.9109, "step": 79810 }, { "epoch": 0.5777903247989461, "grad_norm": 0.1520712971687317, "learning_rate": 4.422216913867113e-06, "loss": 0.9134, "step": 79820 }, { "epoch": 0.5778627114595323, "grad_norm": 0.16776016354560852, "learning_rate": 4.422144527206527e-06, "loss": 0.9024, "step": 79830 }, { "epoch": 0.5779350981201185, "grad_norm": 0.16810259222984314, "learning_rate": 4.42207214054594e-06, "loss": 0.9109, "step": 79840 }, { "epoch": 0.5780074847807046, "grad_norm": 0.16688929498195648, "learning_rate": 4.421999753885354e-06, "loss": 0.9119, "step": 79850 }, { "epoch": 0.5780798714412908, "grad_norm": 0.15468403697013855, "learning_rate": 4.4219273672247685e-06, "loss": 0.9151, "step": 79860 }, { "epoch": 0.578152258101877, "grad_norm": 0.14916957914829254, "learning_rate": 4.421854980564182e-06, "loss": 0.9155, "step": 79870 }, { "epoch": 0.5782246447624632, "grad_norm": 0.15850642323493958, "learning_rate": 4.421782593903596e-06, "loss": 0.9105, "step": 79880 }, { "epoch": 0.5782970314230493, "grad_norm": 0.15652024745941162, "learning_rate": 4.421710207243009e-06, "loss": 0.9018, "step": 79890 }, { "epoch": 0.5783694180836355, "grad_norm": 0.15815021097660065, "learning_rate": 4.421637820582424e-06, "loss": 0.9002, "step": 79900 }, { "epoch": 0.5784418047442217, "grad_norm": 0.15399345755577087, "learning_rate": 4.421565433921837e-06, "loss": 0.9184, "step": 79910 }, { "epoch": 0.578514191404808, "grad_norm": 0.16379056870937347, "learning_rate": 4.421493047261251e-06, "loss": 0.8932, "step": 79920 }, { "epoch": 0.5785865780653942, "grad_norm": 0.1618824154138565, "learning_rate": 4.421420660600665e-06, "loss": 0.9028, "step": 79930 }, { "epoch": 0.5786589647259803, "grad_norm": 0.1554708182811737, "learning_rate": 4.421348273940079e-06, "loss": 0.8998, "step": 79940 }, { "epoch": 0.5787313513865665, "grad_norm": 0.16406132280826569, "learning_rate": 4.421275887279493e-06, "loss": 0.9058, "step": 79950 }, { "epoch": 0.5788037380471527, "grad_norm": 0.15660354495048523, "learning_rate": 4.421203500618906e-06, "loss": 0.9134, "step": 79960 }, { "epoch": 0.5788761247077389, "grad_norm": 0.15395161509513855, "learning_rate": 4.42113111395832e-06, "loss": 0.8982, "step": 79970 }, { "epoch": 0.578948511368325, "grad_norm": 0.15442846715450287, "learning_rate": 4.421058727297734e-06, "loss": 0.9148, "step": 79980 }, { "epoch": 0.5790208980289112, "grad_norm": 0.156655415892601, "learning_rate": 4.420986340637148e-06, "loss": 0.9125, "step": 79990 }, { "epoch": 0.5790932846894974, "grad_norm": 0.16474436223506927, "learning_rate": 4.420913953976562e-06, "loss": 0.9103, "step": 80000 }, { "epoch": 0.5791656713500836, "grad_norm": 0.17078536748886108, "learning_rate": 4.420841567315975e-06, "loss": 0.9094, "step": 80010 }, { "epoch": 0.5792380580106697, "grad_norm": 0.1581786423921585, "learning_rate": 4.420769180655389e-06, "loss": 0.8923, "step": 80020 }, { "epoch": 0.579310444671256, "grad_norm": 0.16456280648708344, "learning_rate": 4.420696793994803e-06, "loss": 0.9152, "step": 80030 }, { "epoch": 0.5793828313318422, "grad_norm": 0.1432550996541977, "learning_rate": 4.420624407334217e-06, "loss": 0.8975, "step": 80040 }, { "epoch": 0.5794552179924284, "grad_norm": 0.1518087089061737, "learning_rate": 4.4205520206736306e-06, "loss": 0.9096, "step": 80050 }, { "epoch": 0.5795276046530146, "grad_norm": 0.15951353311538696, "learning_rate": 4.420479634013044e-06, "loss": 0.8996, "step": 80060 }, { "epoch": 0.5795999913136007, "grad_norm": 0.16211937367916107, "learning_rate": 4.420407247352459e-06, "loss": 0.912, "step": 80070 }, { "epoch": 0.5796723779741869, "grad_norm": 0.14895065128803253, "learning_rate": 4.420334860691872e-06, "loss": 0.9152, "step": 80080 }, { "epoch": 0.5797447646347731, "grad_norm": 0.14833849668502808, "learning_rate": 4.420262474031286e-06, "loss": 0.9243, "step": 80090 }, { "epoch": 0.5798171512953593, "grad_norm": 0.1672501266002655, "learning_rate": 4.4201900873706995e-06, "loss": 0.9072, "step": 80100 }, { "epoch": 0.5798895379559454, "grad_norm": 0.16042830049991608, "learning_rate": 4.420117700710114e-06, "loss": 0.9099, "step": 80110 }, { "epoch": 0.5799619246165316, "grad_norm": 0.16218118369579315, "learning_rate": 4.420045314049528e-06, "loss": 0.9036, "step": 80120 }, { "epoch": 0.5800343112771178, "grad_norm": 0.1452185958623886, "learning_rate": 4.419972927388941e-06, "loss": 0.9013, "step": 80130 }, { "epoch": 0.5801066979377041, "grad_norm": 0.1564382016658783, "learning_rate": 4.419900540728355e-06, "loss": 0.8933, "step": 80140 }, { "epoch": 0.5801790845982903, "grad_norm": 0.1605406403541565, "learning_rate": 4.419828154067769e-06, "loss": 0.8964, "step": 80150 }, { "epoch": 0.5802514712588764, "grad_norm": 0.16458991169929504, "learning_rate": 4.419755767407183e-06, "loss": 0.9147, "step": 80160 }, { "epoch": 0.5803238579194626, "grad_norm": 0.16406084597110748, "learning_rate": 4.4196833807465965e-06, "loss": 0.9032, "step": 80170 }, { "epoch": 0.5803962445800488, "grad_norm": 0.16171269118785858, "learning_rate": 4.41961099408601e-06, "loss": 0.8972, "step": 80180 }, { "epoch": 0.580468631240635, "grad_norm": 0.1607089638710022, "learning_rate": 4.419538607425424e-06, "loss": 0.8913, "step": 80190 }, { "epoch": 0.5805410179012211, "grad_norm": 0.15454769134521484, "learning_rate": 4.419466220764837e-06, "loss": 0.9014, "step": 80200 }, { "epoch": 0.5806134045618073, "grad_norm": 0.15976151823997498, "learning_rate": 4.419393834104251e-06, "loss": 0.8891, "step": 80210 }, { "epoch": 0.5806857912223935, "grad_norm": 0.15922151505947113, "learning_rate": 4.4193214474436654e-06, "loss": 0.9069, "step": 80220 }, { "epoch": 0.5807581778829797, "grad_norm": 0.1480971723794937, "learning_rate": 4.419249060783079e-06, "loss": 0.9023, "step": 80230 }, { "epoch": 0.580830564543566, "grad_norm": 0.3126855492591858, "learning_rate": 4.419176674122493e-06, "loss": 0.9113, "step": 80240 }, { "epoch": 0.5809029512041521, "grad_norm": 0.15098969638347626, "learning_rate": 4.419104287461906e-06, "loss": 0.9212, "step": 80250 }, { "epoch": 0.5809753378647383, "grad_norm": 0.16232796013355255, "learning_rate": 4.419031900801321e-06, "loss": 0.8948, "step": 80260 }, { "epoch": 0.5810477245253245, "grad_norm": 0.16059771180152893, "learning_rate": 4.418959514140734e-06, "loss": 0.9039, "step": 80270 }, { "epoch": 0.5811201111859107, "grad_norm": 0.15262576937675476, "learning_rate": 4.418887127480148e-06, "loss": 0.9075, "step": 80280 }, { "epoch": 0.5811924978464968, "grad_norm": 0.15464602410793304, "learning_rate": 4.418814740819562e-06, "loss": 0.9119, "step": 80290 }, { "epoch": 0.581264884507083, "grad_norm": 0.1607392579317093, "learning_rate": 4.418742354158976e-06, "loss": 0.903, "step": 80300 }, { "epoch": 0.5813372711676692, "grad_norm": 0.16211529076099396, "learning_rate": 4.41866996749839e-06, "loss": 0.9033, "step": 80310 }, { "epoch": 0.5814096578282554, "grad_norm": 0.1596810221672058, "learning_rate": 4.418597580837803e-06, "loss": 0.9072, "step": 80320 }, { "epoch": 0.5814820444888416, "grad_norm": 0.15583199262619019, "learning_rate": 4.418525194177217e-06, "loss": 0.9146, "step": 80330 }, { "epoch": 0.5815544311494277, "grad_norm": 0.1569822132587433, "learning_rate": 4.418452807516631e-06, "loss": 0.9046, "step": 80340 }, { "epoch": 0.581626817810014, "grad_norm": 0.15323781967163086, "learning_rate": 4.418380420856045e-06, "loss": 0.9146, "step": 80350 }, { "epoch": 0.5816992044706002, "grad_norm": 0.161820650100708, "learning_rate": 4.418308034195459e-06, "loss": 0.9147, "step": 80360 }, { "epoch": 0.5817715911311864, "grad_norm": 0.1547018438577652, "learning_rate": 4.418235647534872e-06, "loss": 0.9052, "step": 80370 }, { "epoch": 0.5818439777917725, "grad_norm": 0.16685166954994202, "learning_rate": 4.418163260874287e-06, "loss": 0.9076, "step": 80380 }, { "epoch": 0.5819163644523587, "grad_norm": 0.1596868634223938, "learning_rate": 4.4180908742137e-06, "loss": 0.8918, "step": 80390 }, { "epoch": 0.5819887511129449, "grad_norm": 0.16917213797569275, "learning_rate": 4.418018487553114e-06, "loss": 0.8946, "step": 80400 }, { "epoch": 0.5820611377735311, "grad_norm": 0.15056735277175903, "learning_rate": 4.4179461008925275e-06, "loss": 0.9045, "step": 80410 }, { "epoch": 0.5821335244341173, "grad_norm": 0.1574123352766037, "learning_rate": 4.417873714231942e-06, "loss": 0.9067, "step": 80420 }, { "epoch": 0.5822059110947034, "grad_norm": 0.15226131677627563, "learning_rate": 4.417801327571356e-06, "loss": 0.9183, "step": 80430 }, { "epoch": 0.5822782977552896, "grad_norm": 0.15113304555416107, "learning_rate": 4.417728940910769e-06, "loss": 0.9102, "step": 80440 }, { "epoch": 0.5823506844158759, "grad_norm": 0.14975102245807648, "learning_rate": 4.417656554250183e-06, "loss": 0.902, "step": 80450 }, { "epoch": 0.5824230710764621, "grad_norm": 0.1500435620546341, "learning_rate": 4.417584167589597e-06, "loss": 0.9046, "step": 80460 }, { "epoch": 0.5824954577370483, "grad_norm": 0.15473416447639465, "learning_rate": 4.417511780929011e-06, "loss": 0.9146, "step": 80470 }, { "epoch": 0.5825678443976344, "grad_norm": 0.1687445044517517, "learning_rate": 4.4174393942684245e-06, "loss": 0.9172, "step": 80480 }, { "epoch": 0.5826402310582206, "grad_norm": 0.18456241488456726, "learning_rate": 4.417367007607838e-06, "loss": 0.9059, "step": 80490 }, { "epoch": 0.5827126177188068, "grad_norm": 0.16103129088878632, "learning_rate": 4.417294620947253e-06, "loss": 0.9129, "step": 80500 }, { "epoch": 0.582785004379393, "grad_norm": 0.15495523810386658, "learning_rate": 4.417222234286666e-06, "loss": 0.8998, "step": 80510 }, { "epoch": 0.5828573910399791, "grad_norm": 0.1617932915687561, "learning_rate": 4.41714984762608e-06, "loss": 0.9129, "step": 80520 }, { "epoch": 0.5829297777005653, "grad_norm": 0.1493421494960785, "learning_rate": 4.4170774609654935e-06, "loss": 0.8968, "step": 80530 }, { "epoch": 0.5830021643611515, "grad_norm": 0.15675191581249237, "learning_rate": 4.417005074304908e-06, "loss": 0.9182, "step": 80540 }, { "epoch": 0.5830745510217377, "grad_norm": 0.16273775696754456, "learning_rate": 4.4169326876443216e-06, "loss": 0.9103, "step": 80550 }, { "epoch": 0.583146937682324, "grad_norm": 0.171338751912117, "learning_rate": 4.416860300983735e-06, "loss": 0.9151, "step": 80560 }, { "epoch": 0.5832193243429101, "grad_norm": 0.16506828367710114, "learning_rate": 4.416787914323149e-06, "loss": 0.9137, "step": 80570 }, { "epoch": 0.5832917110034963, "grad_norm": 0.1581031233072281, "learning_rate": 4.416715527662563e-06, "loss": 0.8992, "step": 80580 }, { "epoch": 0.5833640976640825, "grad_norm": 0.1575007438659668, "learning_rate": 4.416643141001977e-06, "loss": 0.9053, "step": 80590 }, { "epoch": 0.5834364843246687, "grad_norm": 0.17189882695674896, "learning_rate": 4.4165707543413905e-06, "loss": 0.9148, "step": 80600 }, { "epoch": 0.5835088709852548, "grad_norm": 0.16765065491199493, "learning_rate": 4.416498367680804e-06, "loss": 0.9063, "step": 80610 }, { "epoch": 0.583581257645841, "grad_norm": 0.15300755202770233, "learning_rate": 4.416425981020218e-06, "loss": 0.9059, "step": 80620 }, { "epoch": 0.5836536443064272, "grad_norm": 0.16409176588058472, "learning_rate": 4.416353594359632e-06, "loss": 0.9074, "step": 80630 }, { "epoch": 0.5837260309670134, "grad_norm": 0.1527976095676422, "learning_rate": 4.416281207699046e-06, "loss": 0.9057, "step": 80640 }, { "epoch": 0.5837984176275995, "grad_norm": 0.17555196583271027, "learning_rate": 4.416208821038459e-06, "loss": 0.918, "step": 80650 }, { "epoch": 0.5838708042881857, "grad_norm": 0.15772706270217896, "learning_rate": 4.416136434377873e-06, "loss": 0.9064, "step": 80660 }, { "epoch": 0.583943190948772, "grad_norm": 0.16107985377311707, "learning_rate": 4.4160640477172875e-06, "loss": 0.9153, "step": 80670 }, { "epoch": 0.5840155776093582, "grad_norm": 0.27143919467926025, "learning_rate": 4.415991661056701e-06, "loss": 0.9147, "step": 80680 }, { "epoch": 0.5840879642699444, "grad_norm": 0.1891665756702423, "learning_rate": 4.415919274396115e-06, "loss": 0.9206, "step": 80690 }, { "epoch": 0.5841603509305305, "grad_norm": 0.15576356649398804, "learning_rate": 4.415846887735528e-06, "loss": 0.9045, "step": 80700 }, { "epoch": 0.5842327375911167, "grad_norm": 0.16257961094379425, "learning_rate": 4.415774501074943e-06, "loss": 0.8978, "step": 80710 }, { "epoch": 0.5843051242517029, "grad_norm": 0.17086383700370789, "learning_rate": 4.4157021144143556e-06, "loss": 0.9039, "step": 80720 }, { "epoch": 0.5843775109122891, "grad_norm": 0.205778568983078, "learning_rate": 4.41562972775377e-06, "loss": 0.9195, "step": 80730 }, { "epoch": 0.5844498975728752, "grad_norm": 0.15765634179115295, "learning_rate": 4.415557341093184e-06, "loss": 0.9038, "step": 80740 }, { "epoch": 0.5845222842334614, "grad_norm": 0.14532588422298431, "learning_rate": 4.415484954432597e-06, "loss": 0.9122, "step": 80750 }, { "epoch": 0.5845946708940476, "grad_norm": 0.172866091132164, "learning_rate": 4.415412567772011e-06, "loss": 0.9122, "step": 80760 }, { "epoch": 0.5846670575546339, "grad_norm": 0.16551901400089264, "learning_rate": 4.415340181111425e-06, "loss": 0.9049, "step": 80770 }, { "epoch": 0.5847394442152201, "grad_norm": 0.15034866333007812, "learning_rate": 4.415267794450839e-06, "loss": 0.9006, "step": 80780 }, { "epoch": 0.5848118308758062, "grad_norm": 0.1626167744398117, "learning_rate": 4.4151954077902526e-06, "loss": 0.9122, "step": 80790 }, { "epoch": 0.5848842175363924, "grad_norm": 0.2411961555480957, "learning_rate": 4.415123021129666e-06, "loss": 0.9039, "step": 80800 }, { "epoch": 0.5849566041969786, "grad_norm": 0.16269677877426147, "learning_rate": 4.41505063446908e-06, "loss": 0.923, "step": 80810 }, { "epoch": 0.5850289908575648, "grad_norm": 0.15651679039001465, "learning_rate": 4.414978247808494e-06, "loss": 0.9018, "step": 80820 }, { "epoch": 0.585101377518151, "grad_norm": 0.1721392273902893, "learning_rate": 4.414905861147908e-06, "loss": 0.9058, "step": 80830 }, { "epoch": 0.5851737641787371, "grad_norm": 0.1486913561820984, "learning_rate": 4.4148334744873215e-06, "loss": 0.9067, "step": 80840 }, { "epoch": 0.5852461508393233, "grad_norm": 0.16526785492897034, "learning_rate": 4.414761087826735e-06, "loss": 0.9126, "step": 80850 }, { "epoch": 0.5853185374999095, "grad_norm": 0.18631219863891602, "learning_rate": 4.41468870116615e-06, "loss": 0.9067, "step": 80860 }, { "epoch": 0.5853909241604957, "grad_norm": 0.15394018590450287, "learning_rate": 4.414616314505563e-06, "loss": 0.9065, "step": 80870 }, { "epoch": 0.5854633108210819, "grad_norm": 0.15232212841510773, "learning_rate": 4.414543927844977e-06, "loss": 0.907, "step": 80880 }, { "epoch": 0.5855356974816681, "grad_norm": 0.15810738503932953, "learning_rate": 4.4144715411843904e-06, "loss": 0.9133, "step": 80890 }, { "epoch": 0.5856080841422543, "grad_norm": 0.15687547624111176, "learning_rate": 4.414399154523805e-06, "loss": 0.9222, "step": 80900 }, { "epoch": 0.5856804708028405, "grad_norm": 0.1554637998342514, "learning_rate": 4.4143267678632185e-06, "loss": 0.9004, "step": 80910 }, { "epoch": 0.5857528574634266, "grad_norm": 0.14952871203422546, "learning_rate": 4.414254381202632e-06, "loss": 0.9071, "step": 80920 }, { "epoch": 0.5858252441240128, "grad_norm": 0.15082089602947235, "learning_rate": 4.414181994542046e-06, "loss": 0.9173, "step": 80930 }, { "epoch": 0.585897630784599, "grad_norm": 0.1659436672925949, "learning_rate": 4.41410960788146e-06, "loss": 0.9038, "step": 80940 }, { "epoch": 0.5859700174451852, "grad_norm": 0.16675877571105957, "learning_rate": 4.414037221220874e-06, "loss": 0.9022, "step": 80950 }, { "epoch": 0.5860424041057714, "grad_norm": 0.17415930330753326, "learning_rate": 4.4139648345602874e-06, "loss": 0.912, "step": 80960 }, { "epoch": 0.5861147907663575, "grad_norm": 0.16868185997009277, "learning_rate": 4.413892447899701e-06, "loss": 0.9139, "step": 80970 }, { "epoch": 0.5861871774269438, "grad_norm": 0.1576145589351654, "learning_rate": 4.4138200612391155e-06, "loss": 0.9084, "step": 80980 }, { "epoch": 0.58625956408753, "grad_norm": 0.16312648355960846, "learning_rate": 4.413747674578529e-06, "loss": 0.9107, "step": 80990 }, { "epoch": 0.5863319507481162, "grad_norm": 0.1908407360315323, "learning_rate": 4.413675287917943e-06, "loss": 0.9057, "step": 81000 }, { "epoch": 0.5864043374087023, "grad_norm": 0.19931037724018097, "learning_rate": 4.413602901257356e-06, "loss": 0.9036, "step": 81010 }, { "epoch": 0.5864767240692885, "grad_norm": 0.15866592526435852, "learning_rate": 4.413530514596771e-06, "loss": 0.9027, "step": 81020 }, { "epoch": 0.5865491107298747, "grad_norm": 0.1934008151292801, "learning_rate": 4.4134581279361844e-06, "loss": 0.9087, "step": 81030 }, { "epoch": 0.5866214973904609, "grad_norm": 0.1468328833580017, "learning_rate": 4.413385741275598e-06, "loss": 0.9099, "step": 81040 }, { "epoch": 0.586693884051047, "grad_norm": 0.16777294874191284, "learning_rate": 4.413313354615012e-06, "loss": 0.8982, "step": 81050 }, { "epoch": 0.5867662707116332, "grad_norm": 0.15594753623008728, "learning_rate": 4.413240967954426e-06, "loss": 0.9146, "step": 81060 }, { "epoch": 0.5868386573722194, "grad_norm": 0.15250714123249054, "learning_rate": 4.41316858129384e-06, "loss": 0.9137, "step": 81070 }, { "epoch": 0.5869110440328056, "grad_norm": 0.15024009346961975, "learning_rate": 4.413096194633253e-06, "loss": 0.9134, "step": 81080 }, { "epoch": 0.5869834306933919, "grad_norm": 0.1556905061006546, "learning_rate": 4.413023807972667e-06, "loss": 0.9178, "step": 81090 }, { "epoch": 0.587055817353978, "grad_norm": 0.1638568490743637, "learning_rate": 4.4129514213120815e-06, "loss": 0.9126, "step": 81100 }, { "epoch": 0.5871282040145642, "grad_norm": 0.15844887495040894, "learning_rate": 4.412879034651495e-06, "loss": 0.9135, "step": 81110 }, { "epoch": 0.5872005906751504, "grad_norm": 0.1853482723236084, "learning_rate": 4.412806647990909e-06, "loss": 0.897, "step": 81120 }, { "epoch": 0.5872729773357366, "grad_norm": 0.15754495561122894, "learning_rate": 4.412734261330322e-06, "loss": 0.9092, "step": 81130 }, { "epoch": 0.5873453639963228, "grad_norm": 0.16885913908481598, "learning_rate": 4.412661874669737e-06, "loss": 0.9082, "step": 81140 }, { "epoch": 0.5874177506569089, "grad_norm": 0.16286881268024445, "learning_rate": 4.41258948800915e-06, "loss": 0.893, "step": 81150 }, { "epoch": 0.5874901373174951, "grad_norm": 0.14878816902637482, "learning_rate": 4.412517101348564e-06, "loss": 0.9028, "step": 81160 }, { "epoch": 0.5875625239780813, "grad_norm": 0.17409685254096985, "learning_rate": 4.412444714687978e-06, "loss": 0.9155, "step": 81170 }, { "epoch": 0.5876349106386675, "grad_norm": 0.1579102873802185, "learning_rate": 4.412372328027392e-06, "loss": 0.9146, "step": 81180 }, { "epoch": 0.5877072972992536, "grad_norm": 0.15453718602657318, "learning_rate": 4.412299941366806e-06, "loss": 0.8998, "step": 81190 }, { "epoch": 0.5877796839598399, "grad_norm": 0.16412879526615143, "learning_rate": 4.412227554706219e-06, "loss": 0.907, "step": 81200 }, { "epoch": 0.5878520706204261, "grad_norm": 0.1556544452905655, "learning_rate": 4.412155168045633e-06, "loss": 0.9067, "step": 81210 }, { "epoch": 0.5879244572810123, "grad_norm": 0.1514187902212143, "learning_rate": 4.412082781385047e-06, "loss": 0.909, "step": 81220 }, { "epoch": 0.5879968439415985, "grad_norm": 0.154129296541214, "learning_rate": 4.412010394724461e-06, "loss": 0.9017, "step": 81230 }, { "epoch": 0.5880692306021846, "grad_norm": 0.16333553194999695, "learning_rate": 4.411938008063875e-06, "loss": 0.9039, "step": 81240 }, { "epoch": 0.5881416172627708, "grad_norm": 0.1498739868402481, "learning_rate": 4.411865621403288e-06, "loss": 0.8942, "step": 81250 }, { "epoch": 0.588214003923357, "grad_norm": 0.1580556333065033, "learning_rate": 4.411793234742702e-06, "loss": 0.8984, "step": 81260 }, { "epoch": 0.5882863905839432, "grad_norm": 0.15687990188598633, "learning_rate": 4.4117208480821155e-06, "loss": 0.9096, "step": 81270 }, { "epoch": 0.5883587772445293, "grad_norm": 0.17736192047595978, "learning_rate": 4.411648461421529e-06, "loss": 0.9031, "step": 81280 }, { "epoch": 0.5884311639051155, "grad_norm": 0.1527496576309204, "learning_rate": 4.4115760747609435e-06, "loss": 0.8972, "step": 81290 }, { "epoch": 0.5885035505657018, "grad_norm": 0.17745208740234375, "learning_rate": 4.411503688100357e-06, "loss": 0.917, "step": 81300 }, { "epoch": 0.588575937226288, "grad_norm": 0.20360904932022095, "learning_rate": 4.411431301439771e-06, "loss": 0.9189, "step": 81310 }, { "epoch": 0.5886483238868742, "grad_norm": 0.1483248472213745, "learning_rate": 4.411358914779184e-06, "loss": 0.9098, "step": 81320 }, { "epoch": 0.5887207105474603, "grad_norm": 0.1655409038066864, "learning_rate": 4.411286528118599e-06, "loss": 0.9164, "step": 81330 }, { "epoch": 0.5887930972080465, "grad_norm": 0.1597280353307724, "learning_rate": 4.4112141414580125e-06, "loss": 0.9058, "step": 81340 }, { "epoch": 0.5888654838686327, "grad_norm": 0.16111089289188385, "learning_rate": 4.411141754797426e-06, "loss": 0.9048, "step": 81350 }, { "epoch": 0.5889378705292189, "grad_norm": 0.16747364401817322, "learning_rate": 4.41106936813684e-06, "loss": 0.9183, "step": 81360 }, { "epoch": 0.589010257189805, "grad_norm": 0.14955104887485504, "learning_rate": 4.410996981476254e-06, "loss": 0.9064, "step": 81370 }, { "epoch": 0.5890826438503912, "grad_norm": 0.15736231207847595, "learning_rate": 4.410924594815668e-06, "loss": 0.9048, "step": 81380 }, { "epoch": 0.5891550305109774, "grad_norm": 0.20818248391151428, "learning_rate": 4.410852208155081e-06, "loss": 0.8868, "step": 81390 }, { "epoch": 0.5892274171715636, "grad_norm": 0.15236660838127136, "learning_rate": 4.410779821494495e-06, "loss": 0.9082, "step": 81400 }, { "epoch": 0.5892998038321499, "grad_norm": 0.1863219141960144, "learning_rate": 4.4107074348339095e-06, "loss": 0.9106, "step": 81410 }, { "epoch": 0.589372190492736, "grad_norm": 0.14689262211322784, "learning_rate": 4.410635048173323e-06, "loss": 0.9158, "step": 81420 }, { "epoch": 0.5894445771533222, "grad_norm": 0.15385663509368896, "learning_rate": 4.410562661512737e-06, "loss": 0.9033, "step": 81430 }, { "epoch": 0.5895169638139084, "grad_norm": 0.18120580911636353, "learning_rate": 4.41049027485215e-06, "loss": 0.9147, "step": 81440 }, { "epoch": 0.5895893504744946, "grad_norm": 0.16570447385311127, "learning_rate": 4.410417888191564e-06, "loss": 0.9152, "step": 81450 }, { "epoch": 0.5896617371350807, "grad_norm": 0.15214945375919342, "learning_rate": 4.410345501530978e-06, "loss": 0.9046, "step": 81460 }, { "epoch": 0.5897341237956669, "grad_norm": 0.1923670470714569, "learning_rate": 4.410273114870392e-06, "loss": 0.918, "step": 81470 }, { "epoch": 0.5898065104562531, "grad_norm": 0.1635698527097702, "learning_rate": 4.410200728209806e-06, "loss": 0.9058, "step": 81480 }, { "epoch": 0.5898788971168393, "grad_norm": 0.1771516501903534, "learning_rate": 4.410128341549219e-06, "loss": 0.9068, "step": 81490 }, { "epoch": 0.5899512837774255, "grad_norm": 0.16354140639305115, "learning_rate": 4.410055954888634e-06, "loss": 0.9066, "step": 81500 }, { "epoch": 0.5900236704380117, "grad_norm": 0.16545943915843964, "learning_rate": 4.409983568228047e-06, "loss": 0.9138, "step": 81510 }, { "epoch": 0.5900960570985979, "grad_norm": 0.15025997161865234, "learning_rate": 4.409911181567461e-06, "loss": 0.8911, "step": 81520 }, { "epoch": 0.5901684437591841, "grad_norm": 0.17573940753936768, "learning_rate": 4.4098387949068746e-06, "loss": 0.9041, "step": 81530 }, { "epoch": 0.5902408304197703, "grad_norm": 0.17626507580280304, "learning_rate": 4.409766408246289e-06, "loss": 0.9036, "step": 81540 }, { "epoch": 0.5903132170803564, "grad_norm": 0.16366511583328247, "learning_rate": 4.409694021585703e-06, "loss": 0.9016, "step": 81550 }, { "epoch": 0.5903856037409426, "grad_norm": 0.17350682616233826, "learning_rate": 4.409621634925116e-06, "loss": 0.9112, "step": 81560 }, { "epoch": 0.5904579904015288, "grad_norm": 0.16151121258735657, "learning_rate": 4.40954924826453e-06, "loss": 0.9051, "step": 81570 }, { "epoch": 0.590530377062115, "grad_norm": 0.16566744446754456, "learning_rate": 4.409476861603944e-06, "loss": 0.9037, "step": 81580 }, { "epoch": 0.5906027637227012, "grad_norm": 0.16139444708824158, "learning_rate": 4.409404474943358e-06, "loss": 0.907, "step": 81590 }, { "epoch": 0.5906751503832873, "grad_norm": 0.16810381412506104, "learning_rate": 4.409332088282772e-06, "loss": 0.8919, "step": 81600 }, { "epoch": 0.5907475370438735, "grad_norm": 0.1652746945619583, "learning_rate": 4.409259701622185e-06, "loss": 0.9017, "step": 81610 }, { "epoch": 0.5908199237044598, "grad_norm": 0.1639404594898224, "learning_rate": 4.4091873149616e-06, "loss": 0.8995, "step": 81620 }, { "epoch": 0.590892310365046, "grad_norm": 0.1466279774904251, "learning_rate": 4.409114928301013e-06, "loss": 0.9177, "step": 81630 }, { "epoch": 0.5909646970256321, "grad_norm": 0.16004787385463715, "learning_rate": 4.409042541640427e-06, "loss": 0.9039, "step": 81640 }, { "epoch": 0.5910370836862183, "grad_norm": 0.15307894349098206, "learning_rate": 4.4089701549798405e-06, "loss": 0.9036, "step": 81650 }, { "epoch": 0.5911094703468045, "grad_norm": 0.19654370844364166, "learning_rate": 4.408897768319255e-06, "loss": 0.9107, "step": 81660 }, { "epoch": 0.5911818570073907, "grad_norm": 0.17902538180351257, "learning_rate": 4.408825381658669e-06, "loss": 0.9045, "step": 81670 }, { "epoch": 0.5912542436679769, "grad_norm": 0.1651085615158081, "learning_rate": 4.408752994998082e-06, "loss": 0.9106, "step": 81680 }, { "epoch": 0.591326630328563, "grad_norm": 0.1591925323009491, "learning_rate": 4.408680608337496e-06, "loss": 0.904, "step": 81690 }, { "epoch": 0.5913990169891492, "grad_norm": 0.1673898696899414, "learning_rate": 4.40860822167691e-06, "loss": 0.9005, "step": 81700 }, { "epoch": 0.5914714036497354, "grad_norm": 0.16726025938987732, "learning_rate": 4.408535835016324e-06, "loss": 0.9051, "step": 81710 }, { "epoch": 0.5915437903103216, "grad_norm": 0.16342008113861084, "learning_rate": 4.4084634483557375e-06, "loss": 0.9207, "step": 81720 }, { "epoch": 0.5916161769709078, "grad_norm": 0.15118926763534546, "learning_rate": 4.408391061695151e-06, "loss": 0.9022, "step": 81730 }, { "epoch": 0.591688563631494, "grad_norm": 0.15618331730365753, "learning_rate": 4.408318675034566e-06, "loss": 0.9089, "step": 81740 }, { "epoch": 0.5917609502920802, "grad_norm": 0.16020120680332184, "learning_rate": 4.408246288373979e-06, "loss": 0.9077, "step": 81750 }, { "epoch": 0.5918333369526664, "grad_norm": 0.15385285019874573, "learning_rate": 4.408173901713393e-06, "loss": 0.909, "step": 81760 }, { "epoch": 0.5919057236132526, "grad_norm": 0.15946872532367706, "learning_rate": 4.4081015150528064e-06, "loss": 0.9251, "step": 81770 }, { "epoch": 0.5919781102738387, "grad_norm": 0.16759130358695984, "learning_rate": 4.40802912839222e-06, "loss": 0.9045, "step": 81780 }, { "epoch": 0.5920504969344249, "grad_norm": 0.1478552371263504, "learning_rate": 4.407956741731634e-06, "loss": 0.9143, "step": 81790 }, { "epoch": 0.5921228835950111, "grad_norm": 0.18161530792713165, "learning_rate": 4.407884355071047e-06, "loss": 0.9115, "step": 81800 }, { "epoch": 0.5921952702555973, "grad_norm": 0.16163845360279083, "learning_rate": 4.407811968410462e-06, "loss": 0.8982, "step": 81810 }, { "epoch": 0.5922676569161834, "grad_norm": 0.15115785598754883, "learning_rate": 4.407739581749875e-06, "loss": 0.9166, "step": 81820 }, { "epoch": 0.5923400435767697, "grad_norm": 0.1659201681613922, "learning_rate": 4.407667195089289e-06, "loss": 0.9008, "step": 81830 }, { "epoch": 0.5924124302373559, "grad_norm": 0.1570078432559967, "learning_rate": 4.407594808428703e-06, "loss": 0.8988, "step": 81840 }, { "epoch": 0.5924848168979421, "grad_norm": 0.1584419459104538, "learning_rate": 4.407522421768117e-06, "loss": 0.9097, "step": 81850 }, { "epoch": 0.5925572035585283, "grad_norm": 0.18121954798698425, "learning_rate": 4.407450035107531e-06, "loss": 0.8932, "step": 81860 }, { "epoch": 0.5926295902191144, "grad_norm": 0.15848958492279053, "learning_rate": 4.407377648446944e-06, "loss": 0.9048, "step": 81870 }, { "epoch": 0.5927019768797006, "grad_norm": 0.17608241736888885, "learning_rate": 4.407305261786358e-06, "loss": 0.9077, "step": 81880 }, { "epoch": 0.5927743635402868, "grad_norm": 0.16470035910606384, "learning_rate": 4.407232875125772e-06, "loss": 0.9114, "step": 81890 }, { "epoch": 0.592846750200873, "grad_norm": 0.14142341911792755, "learning_rate": 4.407160488465186e-06, "loss": 0.8911, "step": 81900 }, { "epoch": 0.5929191368614591, "grad_norm": 0.151789128780365, "learning_rate": 4.4070881018046e-06, "loss": 0.9079, "step": 81910 }, { "epoch": 0.5929915235220453, "grad_norm": 0.15422183275222778, "learning_rate": 4.407015715144013e-06, "loss": 0.9028, "step": 81920 }, { "epoch": 0.5930639101826315, "grad_norm": 0.1466582864522934, "learning_rate": 4.406943328483428e-06, "loss": 0.9025, "step": 81930 }, { "epoch": 0.5931362968432178, "grad_norm": 0.16983705759048462, "learning_rate": 4.406870941822841e-06, "loss": 0.9048, "step": 81940 }, { "epoch": 0.593208683503804, "grad_norm": 0.1635872721672058, "learning_rate": 4.406798555162255e-06, "loss": 0.901, "step": 81950 }, { "epoch": 0.5932810701643901, "grad_norm": 0.17607009410858154, "learning_rate": 4.4067261685016685e-06, "loss": 0.9248, "step": 81960 }, { "epoch": 0.5933534568249763, "grad_norm": 0.1671140491962433, "learning_rate": 4.406653781841083e-06, "loss": 0.9005, "step": 81970 }, { "epoch": 0.5934258434855625, "grad_norm": 0.17591434717178345, "learning_rate": 4.406581395180497e-06, "loss": 0.909, "step": 81980 }, { "epoch": 0.5934982301461487, "grad_norm": 0.17302151024341583, "learning_rate": 4.40650900851991e-06, "loss": 0.9135, "step": 81990 }, { "epoch": 0.5935706168067348, "grad_norm": 0.169133722782135, "learning_rate": 4.406436621859324e-06, "loss": 0.8986, "step": 82000 }, { "epoch": 0.593643003467321, "grad_norm": 0.1573852002620697, "learning_rate": 4.406364235198738e-06, "loss": 0.9064, "step": 82010 }, { "epoch": 0.5937153901279072, "grad_norm": 0.1580667495727539, "learning_rate": 4.406291848538152e-06, "loss": 0.9076, "step": 82020 }, { "epoch": 0.5937877767884934, "grad_norm": 0.15899869799613953, "learning_rate": 4.4062194618775655e-06, "loss": 0.9142, "step": 82030 }, { "epoch": 0.5938601634490797, "grad_norm": 0.17980434000492096, "learning_rate": 4.406147075216979e-06, "loss": 0.9041, "step": 82040 }, { "epoch": 0.5939325501096658, "grad_norm": 0.1649356186389923, "learning_rate": 4.406074688556393e-06, "loss": 0.9299, "step": 82050 }, { "epoch": 0.594004936770252, "grad_norm": 0.15136809647083282, "learning_rate": 4.406002301895807e-06, "loss": 0.9082, "step": 82060 }, { "epoch": 0.5940773234308382, "grad_norm": 0.15223738551139832, "learning_rate": 4.405929915235221e-06, "loss": 0.9107, "step": 82070 }, { "epoch": 0.5941497100914244, "grad_norm": 0.1510484218597412, "learning_rate": 4.4058575285746345e-06, "loss": 0.9013, "step": 82080 }, { "epoch": 0.5942220967520105, "grad_norm": 0.17146191000938416, "learning_rate": 4.405785141914048e-06, "loss": 0.9101, "step": 82090 }, { "epoch": 0.5942944834125967, "grad_norm": 0.19103531539440155, "learning_rate": 4.4057127552534626e-06, "loss": 0.9044, "step": 82100 }, { "epoch": 0.5943668700731829, "grad_norm": 0.1852683573961258, "learning_rate": 4.405640368592876e-06, "loss": 0.905, "step": 82110 }, { "epoch": 0.5944392567337691, "grad_norm": 0.17115405201911926, "learning_rate": 4.40556798193229e-06, "loss": 0.903, "step": 82120 }, { "epoch": 0.5945116433943552, "grad_norm": 0.1565488874912262, "learning_rate": 4.405495595271703e-06, "loss": 0.8996, "step": 82130 }, { "epoch": 0.5945840300549414, "grad_norm": 0.16356854140758514, "learning_rate": 4.405423208611118e-06, "loss": 0.9087, "step": 82140 }, { "epoch": 0.5946564167155277, "grad_norm": 0.16093464195728302, "learning_rate": 4.4053508219505315e-06, "loss": 0.9107, "step": 82150 }, { "epoch": 0.5947288033761139, "grad_norm": 0.16113322973251343, "learning_rate": 4.405278435289945e-06, "loss": 0.9077, "step": 82160 }, { "epoch": 0.5948011900367001, "grad_norm": 0.19098584353923798, "learning_rate": 4.405206048629359e-06, "loss": 0.8965, "step": 82170 }, { "epoch": 0.5948735766972862, "grad_norm": 0.46058592200279236, "learning_rate": 4.405133661968773e-06, "loss": 0.9037, "step": 82180 }, { "epoch": 0.5949459633578724, "grad_norm": 0.15246206521987915, "learning_rate": 4.405061275308187e-06, "loss": 0.9013, "step": 82190 }, { "epoch": 0.5950183500184586, "grad_norm": 0.16424813866615295, "learning_rate": 4.4049888886476e-06, "loss": 0.8992, "step": 82200 }, { "epoch": 0.5950907366790448, "grad_norm": 0.16102401912212372, "learning_rate": 4.404916501987014e-06, "loss": 0.9074, "step": 82210 }, { "epoch": 0.595163123339631, "grad_norm": 0.16575241088867188, "learning_rate": 4.4048441153264285e-06, "loss": 0.9125, "step": 82220 }, { "epoch": 0.5952355100002171, "grad_norm": 0.15276582539081573, "learning_rate": 4.404771728665842e-06, "loss": 0.8948, "step": 82230 }, { "epoch": 0.5953078966608033, "grad_norm": 0.16686709225177765, "learning_rate": 4.404699342005256e-06, "loss": 0.9082, "step": 82240 }, { "epoch": 0.5953802833213895, "grad_norm": 0.19125671684741974, "learning_rate": 4.404626955344669e-06, "loss": 0.9021, "step": 82250 }, { "epoch": 0.5954526699819758, "grad_norm": 0.15619054436683655, "learning_rate": 4.404554568684084e-06, "loss": 0.9076, "step": 82260 }, { "epoch": 0.595525056642562, "grad_norm": 0.1474914848804474, "learning_rate": 4.404482182023497e-06, "loss": 0.9121, "step": 82270 }, { "epoch": 0.5955974433031481, "grad_norm": 0.1519433706998825, "learning_rate": 4.404409795362911e-06, "loss": 0.9154, "step": 82280 }, { "epoch": 0.5956698299637343, "grad_norm": 0.15717659890651703, "learning_rate": 4.404337408702325e-06, "loss": 0.9113, "step": 82290 }, { "epoch": 0.5957422166243205, "grad_norm": 0.1966000646352768, "learning_rate": 4.404265022041739e-06, "loss": 0.9156, "step": 82300 }, { "epoch": 0.5958146032849067, "grad_norm": 0.16030791401863098, "learning_rate": 4.404192635381152e-06, "loss": 0.9195, "step": 82310 }, { "epoch": 0.5958869899454928, "grad_norm": 0.16544552147388458, "learning_rate": 4.4041202487205655e-06, "loss": 0.9117, "step": 82320 }, { "epoch": 0.595959376606079, "grad_norm": 0.15709573030471802, "learning_rate": 4.40404786205998e-06, "loss": 0.9049, "step": 82330 }, { "epoch": 0.5960317632666652, "grad_norm": 0.15841509401798248, "learning_rate": 4.403975475399394e-06, "loss": 0.9054, "step": 82340 }, { "epoch": 0.5961041499272514, "grad_norm": 0.15718714892864227, "learning_rate": 4.403903088738807e-06, "loss": 0.8977, "step": 82350 }, { "epoch": 0.5961765365878376, "grad_norm": 0.1640692800283432, "learning_rate": 4.403830702078221e-06, "loss": 0.9121, "step": 82360 }, { "epoch": 0.5962489232484238, "grad_norm": 0.1538037806749344, "learning_rate": 4.403758315417635e-06, "loss": 0.9113, "step": 82370 }, { "epoch": 0.59632130990901, "grad_norm": 0.18401135504245758, "learning_rate": 4.403685928757049e-06, "loss": 0.8984, "step": 82380 }, { "epoch": 0.5963936965695962, "grad_norm": 0.1522941142320633, "learning_rate": 4.4036135420964625e-06, "loss": 0.9004, "step": 82390 }, { "epoch": 0.5964660832301824, "grad_norm": 0.16432693600654602, "learning_rate": 4.403541155435876e-06, "loss": 0.9088, "step": 82400 }, { "epoch": 0.5965384698907685, "grad_norm": 0.16186141967773438, "learning_rate": 4.403468768775291e-06, "loss": 0.8907, "step": 82410 }, { "epoch": 0.5966108565513547, "grad_norm": 0.16021370887756348, "learning_rate": 4.403396382114704e-06, "loss": 0.906, "step": 82420 }, { "epoch": 0.5966832432119409, "grad_norm": 0.1591646671295166, "learning_rate": 4.403323995454118e-06, "loss": 0.9104, "step": 82430 }, { "epoch": 0.5967556298725271, "grad_norm": 0.16218051314353943, "learning_rate": 4.4032516087935314e-06, "loss": 0.8933, "step": 82440 }, { "epoch": 0.5968280165331132, "grad_norm": 0.1592613160610199, "learning_rate": 4.403179222132946e-06, "loss": 0.9027, "step": 82450 }, { "epoch": 0.5969004031936994, "grad_norm": 0.15859758853912354, "learning_rate": 4.4031068354723595e-06, "loss": 0.9131, "step": 82460 }, { "epoch": 0.5969727898542857, "grad_norm": 0.1566203236579895, "learning_rate": 4.403034448811773e-06, "loss": 0.9032, "step": 82470 }, { "epoch": 0.5970451765148719, "grad_norm": 0.14999212324619293, "learning_rate": 4.402962062151187e-06, "loss": 0.9117, "step": 82480 }, { "epoch": 0.597117563175458, "grad_norm": 0.18934468924999237, "learning_rate": 4.402889675490601e-06, "loss": 0.909, "step": 82490 }, { "epoch": 0.5971899498360442, "grad_norm": 0.15167193114757538, "learning_rate": 4.402817288830015e-06, "loss": 0.8999, "step": 82500 }, { "epoch": 0.5972623364966304, "grad_norm": 0.16032280027866364, "learning_rate": 4.4027449021694284e-06, "loss": 0.8981, "step": 82510 }, { "epoch": 0.5973347231572166, "grad_norm": 0.16216394305229187, "learning_rate": 4.402672515508842e-06, "loss": 0.9033, "step": 82520 }, { "epoch": 0.5974071098178028, "grad_norm": 0.15446747839450836, "learning_rate": 4.4026001288482565e-06, "loss": 0.9062, "step": 82530 }, { "epoch": 0.5974794964783889, "grad_norm": 0.1542857438325882, "learning_rate": 4.40252774218767e-06, "loss": 0.9097, "step": 82540 }, { "epoch": 0.5975518831389751, "grad_norm": 0.1710725724697113, "learning_rate": 4.402455355527084e-06, "loss": 0.9131, "step": 82550 }, { "epoch": 0.5976242697995613, "grad_norm": 0.16666147112846375, "learning_rate": 4.402382968866497e-06, "loss": 0.905, "step": 82560 }, { "epoch": 0.5976966564601475, "grad_norm": 0.17649833858013153, "learning_rate": 4.402310582205912e-06, "loss": 0.8965, "step": 82570 }, { "epoch": 0.5977690431207338, "grad_norm": 0.15569385886192322, "learning_rate": 4.4022381955453255e-06, "loss": 0.8984, "step": 82580 }, { "epoch": 0.5978414297813199, "grad_norm": 0.1647312492132187, "learning_rate": 4.402165808884739e-06, "loss": 0.9114, "step": 82590 }, { "epoch": 0.5979138164419061, "grad_norm": 0.15604494512081146, "learning_rate": 4.402093422224153e-06, "loss": 0.9011, "step": 82600 }, { "epoch": 0.5979862031024923, "grad_norm": 0.16003774106502533, "learning_rate": 4.402021035563567e-06, "loss": 0.9036, "step": 82610 }, { "epoch": 0.5980585897630785, "grad_norm": 0.15925242006778717, "learning_rate": 4.401948648902981e-06, "loss": 0.8946, "step": 82620 }, { "epoch": 0.5981309764236646, "grad_norm": 0.1530761569738388, "learning_rate": 4.401876262242394e-06, "loss": 0.9071, "step": 82630 }, { "epoch": 0.5982033630842508, "grad_norm": 0.15942499041557312, "learning_rate": 4.401803875581808e-06, "loss": 0.9162, "step": 82640 }, { "epoch": 0.598275749744837, "grad_norm": 0.1493762582540512, "learning_rate": 4.4017314889212225e-06, "loss": 0.905, "step": 82650 }, { "epoch": 0.5983481364054232, "grad_norm": 0.1528092622756958, "learning_rate": 4.401659102260636e-06, "loss": 0.9097, "step": 82660 }, { "epoch": 0.5984205230660093, "grad_norm": 0.18498297035694122, "learning_rate": 4.40158671560005e-06, "loss": 0.8937, "step": 82670 }, { "epoch": 0.5984929097265956, "grad_norm": 0.19264182448387146, "learning_rate": 4.401514328939463e-06, "loss": 0.9167, "step": 82680 }, { "epoch": 0.5985652963871818, "grad_norm": 0.14750641584396362, "learning_rate": 4.401441942278877e-06, "loss": 0.913, "step": 82690 }, { "epoch": 0.598637683047768, "grad_norm": 0.2046324908733368, "learning_rate": 4.401369555618291e-06, "loss": 0.9094, "step": 82700 }, { "epoch": 0.5987100697083542, "grad_norm": 0.16712640225887299, "learning_rate": 4.401297168957705e-06, "loss": 0.9057, "step": 82710 }, { "epoch": 0.5987824563689403, "grad_norm": 0.1629089117050171, "learning_rate": 4.401224782297119e-06, "loss": 0.9194, "step": 82720 }, { "epoch": 0.5988548430295265, "grad_norm": 0.15542463958263397, "learning_rate": 4.401152395636532e-06, "loss": 0.8874, "step": 82730 }, { "epoch": 0.5989272296901127, "grad_norm": 0.16220228374004364, "learning_rate": 4.401080008975947e-06, "loss": 0.9143, "step": 82740 }, { "epoch": 0.5989996163506989, "grad_norm": 0.15400896966457367, "learning_rate": 4.40100762231536e-06, "loss": 0.8985, "step": 82750 }, { "epoch": 0.599072003011285, "grad_norm": 0.16913191974163055, "learning_rate": 4.400935235654774e-06, "loss": 0.9012, "step": 82760 }, { "epoch": 0.5991443896718712, "grad_norm": 0.1506996899843216, "learning_rate": 4.4008628489941875e-06, "loss": 0.9109, "step": 82770 }, { "epoch": 0.5992167763324574, "grad_norm": 0.15028440952301025, "learning_rate": 4.400790462333602e-06, "loss": 0.9002, "step": 82780 }, { "epoch": 0.5992891629930437, "grad_norm": 0.15182295441627502, "learning_rate": 4.400718075673016e-06, "loss": 0.8906, "step": 82790 }, { "epoch": 0.5993615496536299, "grad_norm": 0.1721028983592987, "learning_rate": 4.400645689012429e-06, "loss": 0.9059, "step": 82800 }, { "epoch": 0.599433936314216, "grad_norm": 0.1762334257364273, "learning_rate": 4.400573302351843e-06, "loss": 0.8982, "step": 82810 }, { "epoch": 0.5995063229748022, "grad_norm": 0.15542656183242798, "learning_rate": 4.400500915691257e-06, "loss": 0.9063, "step": 82820 }, { "epoch": 0.5995787096353884, "grad_norm": 0.1622530221939087, "learning_rate": 4.400428529030671e-06, "loss": 0.906, "step": 82830 }, { "epoch": 0.5996510962959746, "grad_norm": 0.19038499891757965, "learning_rate": 4.400356142370084e-06, "loss": 0.9196, "step": 82840 }, { "epoch": 0.5997234829565607, "grad_norm": 0.15594981610774994, "learning_rate": 4.400283755709498e-06, "loss": 0.8982, "step": 82850 }, { "epoch": 0.5997958696171469, "grad_norm": 0.1520778387784958, "learning_rate": 4.400211369048912e-06, "loss": 0.912, "step": 82860 }, { "epoch": 0.5998682562777331, "grad_norm": 0.15902015566825867, "learning_rate": 4.400138982388325e-06, "loss": 0.9152, "step": 82870 }, { "epoch": 0.5999406429383193, "grad_norm": 0.1633022576570511, "learning_rate": 4.400066595727739e-06, "loss": 0.8891, "step": 82880 }, { "epoch": 0.6000130295989056, "grad_norm": 0.1755109429359436, "learning_rate": 4.3999942090671535e-06, "loss": 0.906, "step": 82890 }, { "epoch": 0.6000854162594917, "grad_norm": 0.15575723350048065, "learning_rate": 4.399921822406567e-06, "loss": 0.9105, "step": 82900 }, { "epoch": 0.6001578029200779, "grad_norm": 0.1682191789150238, "learning_rate": 4.399849435745981e-06, "loss": 0.8904, "step": 82910 }, { "epoch": 0.6002301895806641, "grad_norm": 0.1746690571308136, "learning_rate": 4.399777049085394e-06, "loss": 0.9284, "step": 82920 }, { "epoch": 0.6003025762412503, "grad_norm": 0.16141672432422638, "learning_rate": 4.399704662424809e-06, "loss": 0.8947, "step": 82930 }, { "epoch": 0.6003749629018365, "grad_norm": 0.17700308561325073, "learning_rate": 4.399632275764222e-06, "loss": 0.9184, "step": 82940 }, { "epoch": 0.6004473495624226, "grad_norm": 0.16985797882080078, "learning_rate": 4.399559889103636e-06, "loss": 0.905, "step": 82950 }, { "epoch": 0.6005197362230088, "grad_norm": 0.17443716526031494, "learning_rate": 4.39948750244305e-06, "loss": 0.9197, "step": 82960 }, { "epoch": 0.600592122883595, "grad_norm": 0.1507861465215683, "learning_rate": 4.399415115782464e-06, "loss": 0.9189, "step": 82970 }, { "epoch": 0.6006645095441812, "grad_norm": 0.16039422154426575, "learning_rate": 4.399342729121878e-06, "loss": 0.9017, "step": 82980 }, { "epoch": 0.6007368962047673, "grad_norm": 0.15274271368980408, "learning_rate": 4.399270342461291e-06, "loss": 0.9117, "step": 82990 }, { "epoch": 0.6008092828653536, "grad_norm": 0.18588446080684662, "learning_rate": 4.399197955800705e-06, "loss": 0.9046, "step": 83000 }, { "epoch": 0.6008816695259398, "grad_norm": 0.15456266701221466, "learning_rate": 4.399125569140119e-06, "loss": 0.8961, "step": 83010 }, { "epoch": 0.600954056186526, "grad_norm": 0.17130137979984283, "learning_rate": 4.399053182479533e-06, "loss": 0.9042, "step": 83020 }, { "epoch": 0.6010264428471122, "grad_norm": 0.15653465688228607, "learning_rate": 4.398980795818947e-06, "loss": 0.9129, "step": 83030 }, { "epoch": 0.6010988295076983, "grad_norm": 0.1832706183195114, "learning_rate": 4.39890840915836e-06, "loss": 0.9146, "step": 83040 }, { "epoch": 0.6011712161682845, "grad_norm": 0.1502702534198761, "learning_rate": 4.398836022497775e-06, "loss": 0.8918, "step": 83050 }, { "epoch": 0.6012436028288707, "grad_norm": 0.19595682621002197, "learning_rate": 4.398763635837188e-06, "loss": 0.8992, "step": 83060 }, { "epoch": 0.6013159894894569, "grad_norm": 0.16274172067642212, "learning_rate": 4.398691249176602e-06, "loss": 0.8992, "step": 83070 }, { "epoch": 0.601388376150043, "grad_norm": 0.15060241520404816, "learning_rate": 4.398618862516016e-06, "loss": 0.9031, "step": 83080 }, { "epoch": 0.6014607628106292, "grad_norm": 0.19672121107578278, "learning_rate": 4.39854647585543e-06, "loss": 0.9027, "step": 83090 }, { "epoch": 0.6015331494712154, "grad_norm": 0.1655958890914917, "learning_rate": 4.398474089194844e-06, "loss": 0.9133, "step": 83100 }, { "epoch": 0.6016055361318017, "grad_norm": 0.16771474480628967, "learning_rate": 4.398401702534257e-06, "loss": 0.9074, "step": 83110 }, { "epoch": 0.6016779227923879, "grad_norm": 0.16556835174560547, "learning_rate": 4.398329315873671e-06, "loss": 0.8982, "step": 83120 }, { "epoch": 0.601750309452974, "grad_norm": 0.15332920849323273, "learning_rate": 4.398256929213085e-06, "loss": 0.8985, "step": 83130 }, { "epoch": 0.6018226961135602, "grad_norm": 0.1574867218732834, "learning_rate": 4.398184542552499e-06, "loss": 0.9085, "step": 83140 }, { "epoch": 0.6018950827741464, "grad_norm": 0.17546038329601288, "learning_rate": 4.398112155891913e-06, "loss": 0.9062, "step": 83150 }, { "epoch": 0.6019674694347326, "grad_norm": 0.1600867062807083, "learning_rate": 4.398039769231326e-06, "loss": 0.914, "step": 83160 }, { "epoch": 0.6020398560953187, "grad_norm": 0.1494045853614807, "learning_rate": 4.397967382570741e-06, "loss": 0.8953, "step": 83170 }, { "epoch": 0.6021122427559049, "grad_norm": 0.15468309819698334, "learning_rate": 4.397894995910154e-06, "loss": 0.9284, "step": 83180 }, { "epoch": 0.6021846294164911, "grad_norm": 0.2398003190755844, "learning_rate": 4.397822609249568e-06, "loss": 0.9161, "step": 83190 }, { "epoch": 0.6022570160770773, "grad_norm": 0.1596948802471161, "learning_rate": 4.3977502225889815e-06, "loss": 0.8963, "step": 83200 }, { "epoch": 0.6023294027376636, "grad_norm": 0.16264113783836365, "learning_rate": 4.397677835928396e-06, "loss": 0.9142, "step": 83210 }, { "epoch": 0.6024017893982497, "grad_norm": 0.1623883843421936, "learning_rate": 4.39760544926781e-06, "loss": 0.8912, "step": 83220 }, { "epoch": 0.6024741760588359, "grad_norm": 0.16414840519428253, "learning_rate": 4.397533062607223e-06, "loss": 0.9061, "step": 83230 }, { "epoch": 0.6025465627194221, "grad_norm": 0.16708609461784363, "learning_rate": 4.397460675946637e-06, "loss": 0.9081, "step": 83240 }, { "epoch": 0.6026189493800083, "grad_norm": 0.15145167708396912, "learning_rate": 4.397388289286051e-06, "loss": 0.8966, "step": 83250 }, { "epoch": 0.6026913360405944, "grad_norm": 0.31119629740715027, "learning_rate": 4.397315902625465e-06, "loss": 0.9119, "step": 83260 }, { "epoch": 0.6027637227011806, "grad_norm": 0.16512273252010345, "learning_rate": 4.3972435159648785e-06, "loss": 0.9058, "step": 83270 }, { "epoch": 0.6028361093617668, "grad_norm": 0.2106596678495407, "learning_rate": 4.397171129304292e-06, "loss": 0.896, "step": 83280 }, { "epoch": 0.602908496022353, "grad_norm": 0.16792406141757965, "learning_rate": 4.397098742643706e-06, "loss": 0.9028, "step": 83290 }, { "epoch": 0.6029808826829391, "grad_norm": 0.1586570292711258, "learning_rate": 4.39702635598312e-06, "loss": 0.9083, "step": 83300 }, { "epoch": 0.6030532693435253, "grad_norm": 0.18892773985862732, "learning_rate": 4.396953969322534e-06, "loss": 0.9072, "step": 83310 }, { "epoch": 0.6031256560041116, "grad_norm": 0.17914731800556183, "learning_rate": 4.3968815826619475e-06, "loss": 0.8988, "step": 83320 }, { "epoch": 0.6031980426646978, "grad_norm": 0.15073609352111816, "learning_rate": 4.396809196001361e-06, "loss": 0.9003, "step": 83330 }, { "epoch": 0.603270429325284, "grad_norm": 0.16799314320087433, "learning_rate": 4.3967368093407755e-06, "loss": 0.9045, "step": 83340 }, { "epoch": 0.6033428159858701, "grad_norm": 0.16455517709255219, "learning_rate": 4.396664422680189e-06, "loss": 0.9059, "step": 83350 }, { "epoch": 0.6034152026464563, "grad_norm": 0.22611026465892792, "learning_rate": 4.396592036019603e-06, "loss": 0.9087, "step": 83360 }, { "epoch": 0.6034875893070425, "grad_norm": 0.15722450613975525, "learning_rate": 4.396519649359016e-06, "loss": 0.8941, "step": 83370 }, { "epoch": 0.6035599759676287, "grad_norm": 0.1701124906539917, "learning_rate": 4.39644726269843e-06, "loss": 0.9138, "step": 83380 }, { "epoch": 0.6036323626282148, "grad_norm": 0.1639043539762497, "learning_rate": 4.396374876037844e-06, "loss": 0.91, "step": 83390 }, { "epoch": 0.603704749288801, "grad_norm": 0.15046794712543488, "learning_rate": 4.396302489377258e-06, "loss": 0.898, "step": 83400 }, { "epoch": 0.6037771359493872, "grad_norm": 0.1862155944108963, "learning_rate": 4.396230102716672e-06, "loss": 0.9124, "step": 83410 }, { "epoch": 0.6038495226099735, "grad_norm": 0.16142509877681732, "learning_rate": 4.396157716056085e-06, "loss": 0.8921, "step": 83420 }, { "epoch": 0.6039219092705597, "grad_norm": 0.14882884919643402, "learning_rate": 4.396085329395499e-06, "loss": 0.9176, "step": 83430 }, { "epoch": 0.6039942959311458, "grad_norm": 0.1499185711145401, "learning_rate": 4.396012942734913e-06, "loss": 0.9056, "step": 83440 }, { "epoch": 0.604066682591732, "grad_norm": 0.15857931971549988, "learning_rate": 4.395940556074327e-06, "loss": 0.8983, "step": 83450 }, { "epoch": 0.6041390692523182, "grad_norm": 0.16343648731708527, "learning_rate": 4.395868169413741e-06, "loss": 0.9089, "step": 83460 }, { "epoch": 0.6042114559129044, "grad_norm": 0.1648421734571457, "learning_rate": 4.395795782753154e-06, "loss": 0.9001, "step": 83470 }, { "epoch": 0.6042838425734905, "grad_norm": 0.15347935259342194, "learning_rate": 4.395723396092568e-06, "loss": 0.9173, "step": 83480 }, { "epoch": 0.6043562292340767, "grad_norm": 0.1825142502784729, "learning_rate": 4.395651009431982e-06, "loss": 0.8914, "step": 83490 }, { "epoch": 0.6044286158946629, "grad_norm": 0.17557092010974884, "learning_rate": 4.395578622771396e-06, "loss": 0.8938, "step": 83500 }, { "epoch": 0.6045010025552491, "grad_norm": 0.15079079568386078, "learning_rate": 4.3955062361108095e-06, "loss": 0.8986, "step": 83510 }, { "epoch": 0.6045733892158353, "grad_norm": 0.16225622594356537, "learning_rate": 4.395433849450223e-06, "loss": 0.8948, "step": 83520 }, { "epoch": 0.6046457758764215, "grad_norm": 0.14633281528949738, "learning_rate": 4.395361462789638e-06, "loss": 0.9021, "step": 83530 }, { "epoch": 0.6047181625370077, "grad_norm": 0.15504805743694305, "learning_rate": 4.395289076129051e-06, "loss": 0.9164, "step": 83540 }, { "epoch": 0.6047905491975939, "grad_norm": 0.1608634889125824, "learning_rate": 4.395216689468465e-06, "loss": 0.9065, "step": 83550 }, { "epoch": 0.6048629358581801, "grad_norm": 0.15265637636184692, "learning_rate": 4.3951443028078785e-06, "loss": 0.9109, "step": 83560 }, { "epoch": 0.6049353225187662, "grad_norm": 0.16638517379760742, "learning_rate": 4.395071916147293e-06, "loss": 0.8959, "step": 83570 }, { "epoch": 0.6050077091793524, "grad_norm": 0.15558472275733948, "learning_rate": 4.3949995294867066e-06, "loss": 0.8929, "step": 83580 }, { "epoch": 0.6050800958399386, "grad_norm": 0.1535881906747818, "learning_rate": 4.39492714282612e-06, "loss": 0.9172, "step": 83590 }, { "epoch": 0.6051524825005248, "grad_norm": 0.14769315719604492, "learning_rate": 4.394854756165534e-06, "loss": 0.9152, "step": 83600 }, { "epoch": 0.605224869161111, "grad_norm": 0.1421438306570053, "learning_rate": 4.394782369504948e-06, "loss": 0.91, "step": 83610 }, { "epoch": 0.6052972558216971, "grad_norm": 0.18984383344650269, "learning_rate": 4.394709982844362e-06, "loss": 0.9114, "step": 83620 }, { "epoch": 0.6053696424822833, "grad_norm": 0.15073958039283752, "learning_rate": 4.3946375961837755e-06, "loss": 0.9094, "step": 83630 }, { "epoch": 0.6054420291428696, "grad_norm": 0.15686622262001038, "learning_rate": 4.394565209523189e-06, "loss": 0.9015, "step": 83640 }, { "epoch": 0.6055144158034558, "grad_norm": 0.15890394151210785, "learning_rate": 4.3944928228626036e-06, "loss": 0.9264, "step": 83650 }, { "epoch": 0.605586802464042, "grad_norm": 0.15338025987148285, "learning_rate": 4.394420436202017e-06, "loss": 0.8971, "step": 83660 }, { "epoch": 0.6056591891246281, "grad_norm": 0.1526980698108673, "learning_rate": 4.394348049541431e-06, "loss": 0.9015, "step": 83670 }, { "epoch": 0.6057315757852143, "grad_norm": 0.15152287483215332, "learning_rate": 4.394275662880844e-06, "loss": 0.8988, "step": 83680 }, { "epoch": 0.6058039624458005, "grad_norm": 0.15338623523712158, "learning_rate": 4.394203276220259e-06, "loss": 0.9021, "step": 83690 }, { "epoch": 0.6058763491063867, "grad_norm": 0.1615828573703766, "learning_rate": 4.3941308895596725e-06, "loss": 0.9109, "step": 83700 }, { "epoch": 0.6059487357669728, "grad_norm": 0.16938403248786926, "learning_rate": 4.394058502899086e-06, "loss": 0.8999, "step": 83710 }, { "epoch": 0.606021122427559, "grad_norm": 0.154579296708107, "learning_rate": 4.3939861162385e-06, "loss": 0.8897, "step": 83720 }, { "epoch": 0.6060935090881452, "grad_norm": 0.1727149486541748, "learning_rate": 4.393913729577914e-06, "loss": 0.9162, "step": 83730 }, { "epoch": 0.6061658957487315, "grad_norm": 0.16071633994579315, "learning_rate": 4.393841342917328e-06, "loss": 0.9169, "step": 83740 }, { "epoch": 0.6062382824093177, "grad_norm": 0.17092163860797882, "learning_rate": 4.393768956256741e-06, "loss": 0.9072, "step": 83750 }, { "epoch": 0.6063106690699038, "grad_norm": 0.15256325900554657, "learning_rate": 4.393696569596155e-06, "loss": 0.8937, "step": 83760 }, { "epoch": 0.60638305573049, "grad_norm": 0.16219750046730042, "learning_rate": 4.3936241829355695e-06, "loss": 0.9149, "step": 83770 }, { "epoch": 0.6064554423910762, "grad_norm": 0.15577305853366852, "learning_rate": 4.393551796274983e-06, "loss": 0.9136, "step": 83780 }, { "epoch": 0.6065278290516624, "grad_norm": 0.20615074038505554, "learning_rate": 4.393479409614397e-06, "loss": 0.9098, "step": 83790 }, { "epoch": 0.6066002157122485, "grad_norm": 0.1500859558582306, "learning_rate": 4.39340702295381e-06, "loss": 0.908, "step": 83800 }, { "epoch": 0.6066726023728347, "grad_norm": 0.1555171012878418, "learning_rate": 4.393334636293225e-06, "loss": 0.8914, "step": 83810 }, { "epoch": 0.6067449890334209, "grad_norm": 0.16960106790065765, "learning_rate": 4.3932622496326384e-06, "loss": 0.9178, "step": 83820 }, { "epoch": 0.6068173756940071, "grad_norm": 0.1492023915052414, "learning_rate": 4.393189862972052e-06, "loss": 0.9121, "step": 83830 }, { "epoch": 0.6068897623545932, "grad_norm": 0.16274462640285492, "learning_rate": 4.393117476311466e-06, "loss": 0.9152, "step": 83840 }, { "epoch": 0.6069621490151795, "grad_norm": 0.1625206470489502, "learning_rate": 4.39304508965088e-06, "loss": 0.9116, "step": 83850 }, { "epoch": 0.6070345356757657, "grad_norm": 0.15123465657234192, "learning_rate": 4.392972702990294e-06, "loss": 0.9061, "step": 83860 }, { "epoch": 0.6071069223363519, "grad_norm": 0.17103265225887299, "learning_rate": 4.392900316329707e-06, "loss": 0.9021, "step": 83870 }, { "epoch": 0.6071793089969381, "grad_norm": 0.19635428488254547, "learning_rate": 4.392827929669121e-06, "loss": 0.8978, "step": 83880 }, { "epoch": 0.6072516956575242, "grad_norm": 0.14300402998924255, "learning_rate": 4.3927555430085354e-06, "loss": 0.9072, "step": 83890 }, { "epoch": 0.6073240823181104, "grad_norm": 0.1507839858531952, "learning_rate": 4.392683156347948e-06, "loss": 0.9102, "step": 83900 }, { "epoch": 0.6073964689786966, "grad_norm": 0.16211600601673126, "learning_rate": 4.392610769687362e-06, "loss": 0.9038, "step": 83910 }, { "epoch": 0.6074688556392828, "grad_norm": 0.1655724048614502, "learning_rate": 4.392538383026776e-06, "loss": 0.9158, "step": 83920 }, { "epoch": 0.6075412422998689, "grad_norm": 0.16653649508953094, "learning_rate": 4.39246599636619e-06, "loss": 0.8987, "step": 83930 }, { "epoch": 0.6076136289604551, "grad_norm": 0.1796729415655136, "learning_rate": 4.3923936097056035e-06, "loss": 0.9065, "step": 83940 }, { "epoch": 0.6076860156210414, "grad_norm": 0.16883957386016846, "learning_rate": 4.392321223045017e-06, "loss": 0.9116, "step": 83950 }, { "epoch": 0.6077584022816276, "grad_norm": 0.18032489717006683, "learning_rate": 4.392248836384432e-06, "loss": 0.8919, "step": 83960 }, { "epoch": 0.6078307889422138, "grad_norm": 0.18664510548114777, "learning_rate": 4.392176449723845e-06, "loss": 0.9099, "step": 83970 }, { "epoch": 0.6079031756027999, "grad_norm": 0.16412018239498138, "learning_rate": 4.392104063063259e-06, "loss": 0.902, "step": 83980 }, { "epoch": 0.6079755622633861, "grad_norm": 0.17010684311389923, "learning_rate": 4.3920316764026724e-06, "loss": 0.9389, "step": 83990 }, { "epoch": 0.6080479489239723, "grad_norm": 0.150782972574234, "learning_rate": 4.391959289742087e-06, "loss": 0.8912, "step": 84000 }, { "epoch": 0.6081203355845585, "grad_norm": 0.17593304812908173, "learning_rate": 4.3918869030815005e-06, "loss": 0.8924, "step": 84010 }, { "epoch": 0.6081927222451446, "grad_norm": 0.1518871784210205, "learning_rate": 4.391814516420914e-06, "loss": 0.9028, "step": 84020 }, { "epoch": 0.6082651089057308, "grad_norm": 0.1600775122642517, "learning_rate": 4.391742129760328e-06, "loss": 0.9032, "step": 84030 }, { "epoch": 0.608337495566317, "grad_norm": 0.1590421050786972, "learning_rate": 4.391669743099742e-06, "loss": 0.8984, "step": 84040 }, { "epoch": 0.6084098822269032, "grad_norm": 0.17040304839611053, "learning_rate": 4.391597356439156e-06, "loss": 0.8969, "step": 84050 }, { "epoch": 0.6084822688874895, "grad_norm": 0.1723751425743103, "learning_rate": 4.3915249697785694e-06, "loss": 0.8904, "step": 84060 }, { "epoch": 0.6085546555480756, "grad_norm": 0.14594772458076477, "learning_rate": 4.391452583117983e-06, "loss": 0.9142, "step": 84070 }, { "epoch": 0.6086270422086618, "grad_norm": 0.16909512877464294, "learning_rate": 4.391380196457397e-06, "loss": 0.8943, "step": 84080 }, { "epoch": 0.608699428869248, "grad_norm": 0.1714186817407608, "learning_rate": 4.391307809796811e-06, "loss": 0.9039, "step": 84090 }, { "epoch": 0.6087718155298342, "grad_norm": 0.16077734529972076, "learning_rate": 4.391235423136225e-06, "loss": 0.912, "step": 84100 }, { "epoch": 0.6088442021904203, "grad_norm": 0.1786806732416153, "learning_rate": 4.391163036475638e-06, "loss": 0.915, "step": 84110 }, { "epoch": 0.6089165888510065, "grad_norm": 0.15787489712238312, "learning_rate": 4.391090649815052e-06, "loss": 0.9037, "step": 84120 }, { "epoch": 0.6089889755115927, "grad_norm": 0.14963692426681519, "learning_rate": 4.3910182631544665e-06, "loss": 0.8954, "step": 84130 }, { "epoch": 0.6090613621721789, "grad_norm": 0.15370801091194153, "learning_rate": 4.39094587649388e-06, "loss": 0.9128, "step": 84140 }, { "epoch": 0.609133748832765, "grad_norm": 0.16809116303920746, "learning_rate": 4.390873489833294e-06, "loss": 0.9006, "step": 84150 }, { "epoch": 0.6092061354933512, "grad_norm": 0.16092905402183533, "learning_rate": 4.390801103172707e-06, "loss": 0.8976, "step": 84160 }, { "epoch": 0.6092785221539375, "grad_norm": 0.1607845276594162, "learning_rate": 4.390728716512122e-06, "loss": 0.9273, "step": 84170 }, { "epoch": 0.6093509088145237, "grad_norm": 0.15682317316532135, "learning_rate": 4.390656329851535e-06, "loss": 0.9062, "step": 84180 }, { "epoch": 0.6094232954751099, "grad_norm": 0.154841810464859, "learning_rate": 4.390583943190949e-06, "loss": 0.9231, "step": 84190 }, { "epoch": 0.609495682135696, "grad_norm": 0.16527412831783295, "learning_rate": 4.390511556530363e-06, "loss": 0.9122, "step": 84200 }, { "epoch": 0.6095680687962822, "grad_norm": 0.15766416490077972, "learning_rate": 4.390439169869777e-06, "loss": 0.8958, "step": 84210 }, { "epoch": 0.6096404554568684, "grad_norm": 0.17075148224830627, "learning_rate": 4.390366783209191e-06, "loss": 0.9055, "step": 84220 }, { "epoch": 0.6097128421174546, "grad_norm": 0.15440765023231506, "learning_rate": 4.390294396548604e-06, "loss": 0.9209, "step": 84230 }, { "epoch": 0.6097852287780408, "grad_norm": 0.1687108278274536, "learning_rate": 4.390222009888018e-06, "loss": 0.9068, "step": 84240 }, { "epoch": 0.6098576154386269, "grad_norm": 0.15964575111865997, "learning_rate": 4.390149623227432e-06, "loss": 0.9099, "step": 84250 }, { "epoch": 0.6099300020992131, "grad_norm": 0.15133626759052277, "learning_rate": 4.390077236566846e-06, "loss": 0.912, "step": 84260 }, { "epoch": 0.6100023887597994, "grad_norm": 0.16388767957687378, "learning_rate": 4.39000484990626e-06, "loss": 0.9091, "step": 84270 }, { "epoch": 0.6100747754203856, "grad_norm": 0.16977226734161377, "learning_rate": 4.389932463245673e-06, "loss": 0.9041, "step": 84280 }, { "epoch": 0.6101471620809717, "grad_norm": 0.2612716555595398, "learning_rate": 4.389860076585088e-06, "loss": 0.9024, "step": 84290 }, { "epoch": 0.6102195487415579, "grad_norm": 0.17897847294807434, "learning_rate": 4.389787689924501e-06, "loss": 0.898, "step": 84300 }, { "epoch": 0.6102919354021441, "grad_norm": 0.16913948953151703, "learning_rate": 4.389715303263915e-06, "loss": 0.9011, "step": 84310 }, { "epoch": 0.6103643220627303, "grad_norm": 0.1693422794342041, "learning_rate": 4.3896429166033286e-06, "loss": 0.91, "step": 84320 }, { "epoch": 0.6104367087233165, "grad_norm": 0.14860454201698303, "learning_rate": 4.389570529942743e-06, "loss": 0.9087, "step": 84330 }, { "epoch": 0.6105090953839026, "grad_norm": 0.17605887353420258, "learning_rate": 4.389498143282157e-06, "loss": 0.8988, "step": 84340 }, { "epoch": 0.6105814820444888, "grad_norm": 0.2861873507499695, "learning_rate": 4.38942575662157e-06, "loss": 0.8955, "step": 84350 }, { "epoch": 0.610653868705075, "grad_norm": 0.1683700680732727, "learning_rate": 4.389353369960984e-06, "loss": 0.8973, "step": 84360 }, { "epoch": 0.6107262553656612, "grad_norm": 0.15339066088199615, "learning_rate": 4.389280983300398e-06, "loss": 0.9137, "step": 84370 }, { "epoch": 0.6107986420262475, "grad_norm": 0.15601833164691925, "learning_rate": 4.389208596639812e-06, "loss": 0.9166, "step": 84380 }, { "epoch": 0.6108710286868336, "grad_norm": 0.16695159673690796, "learning_rate": 4.3891362099792256e-06, "loss": 0.902, "step": 84390 }, { "epoch": 0.6109434153474198, "grad_norm": 0.1545722484588623, "learning_rate": 4.389063823318639e-06, "loss": 0.9112, "step": 84400 }, { "epoch": 0.611015802008006, "grad_norm": 0.17581118643283844, "learning_rate": 4.388991436658054e-06, "loss": 0.9104, "step": 84410 }, { "epoch": 0.6110881886685922, "grad_norm": 0.16836950182914734, "learning_rate": 4.388919049997467e-06, "loss": 0.9007, "step": 84420 }, { "epoch": 0.6111605753291783, "grad_norm": 0.1791825294494629, "learning_rate": 4.38884666333688e-06, "loss": 0.8978, "step": 84430 }, { "epoch": 0.6112329619897645, "grad_norm": 0.17053841054439545, "learning_rate": 4.3887742766762945e-06, "loss": 0.9075, "step": 84440 }, { "epoch": 0.6113053486503507, "grad_norm": 0.15153911709785461, "learning_rate": 4.388701890015708e-06, "loss": 0.9048, "step": 84450 }, { "epoch": 0.6113777353109369, "grad_norm": 0.15731872618198395, "learning_rate": 4.388629503355122e-06, "loss": 0.9043, "step": 84460 }, { "epoch": 0.611450121971523, "grad_norm": 0.15405894815921783, "learning_rate": 4.388557116694535e-06, "loss": 0.9059, "step": 84470 }, { "epoch": 0.6115225086321093, "grad_norm": 0.15265247225761414, "learning_rate": 4.38848473003395e-06, "loss": 0.903, "step": 84480 }, { "epoch": 0.6115948952926955, "grad_norm": 0.16418184340000153, "learning_rate": 4.388412343373363e-06, "loss": 0.898, "step": 84490 }, { "epoch": 0.6116672819532817, "grad_norm": 0.16227209568023682, "learning_rate": 4.388339956712777e-06, "loss": 0.9145, "step": 84500 }, { "epoch": 0.6117396686138679, "grad_norm": 0.20757746696472168, "learning_rate": 4.388267570052191e-06, "loss": 0.9047, "step": 84510 }, { "epoch": 0.611812055274454, "grad_norm": 0.1717897206544876, "learning_rate": 4.388195183391605e-06, "loss": 0.8976, "step": 84520 }, { "epoch": 0.6118844419350402, "grad_norm": 0.1659894585609436, "learning_rate": 4.388122796731019e-06, "loss": 0.9076, "step": 84530 }, { "epoch": 0.6119568285956264, "grad_norm": 0.15432026982307434, "learning_rate": 4.388050410070432e-06, "loss": 0.9111, "step": 84540 }, { "epoch": 0.6120292152562126, "grad_norm": 0.15473110973834991, "learning_rate": 4.387978023409846e-06, "loss": 0.9041, "step": 84550 }, { "epoch": 0.6121016019167987, "grad_norm": 0.18219618499279022, "learning_rate": 4.3879056367492604e-06, "loss": 0.8956, "step": 84560 }, { "epoch": 0.6121739885773849, "grad_norm": 0.15730948746204376, "learning_rate": 4.387833250088674e-06, "loss": 0.9085, "step": 84570 }, { "epoch": 0.6122463752379711, "grad_norm": 0.14876548945903778, "learning_rate": 4.387760863428088e-06, "loss": 0.9082, "step": 84580 }, { "epoch": 0.6123187618985574, "grad_norm": 0.1581803560256958, "learning_rate": 4.387688476767501e-06, "loss": 0.9161, "step": 84590 }, { "epoch": 0.6123911485591436, "grad_norm": 0.1603010892868042, "learning_rate": 4.387616090106916e-06, "loss": 0.9022, "step": 84600 }, { "epoch": 0.6124635352197297, "grad_norm": 0.17957136034965515, "learning_rate": 4.387543703446329e-06, "loss": 0.9077, "step": 84610 }, { "epoch": 0.6125359218803159, "grad_norm": 0.1617906540632248, "learning_rate": 4.387471316785743e-06, "loss": 0.9247, "step": 84620 }, { "epoch": 0.6126083085409021, "grad_norm": 0.1665765643119812, "learning_rate": 4.387398930125157e-06, "loss": 0.9073, "step": 84630 }, { "epoch": 0.6126806952014883, "grad_norm": 0.1667291522026062, "learning_rate": 4.387326543464571e-06, "loss": 0.8995, "step": 84640 }, { "epoch": 0.6127530818620744, "grad_norm": 0.1612149178981781, "learning_rate": 4.387254156803985e-06, "loss": 0.9066, "step": 84650 }, { "epoch": 0.6128254685226606, "grad_norm": 0.15186448395252228, "learning_rate": 4.387181770143398e-06, "loss": 0.9118, "step": 84660 }, { "epoch": 0.6128978551832468, "grad_norm": 0.15720537304878235, "learning_rate": 4.387109383482812e-06, "loss": 0.8964, "step": 84670 }, { "epoch": 0.612970241843833, "grad_norm": 0.1654808670282364, "learning_rate": 4.387036996822226e-06, "loss": 0.9022, "step": 84680 }, { "epoch": 0.6130426285044192, "grad_norm": 0.15336300432682037, "learning_rate": 4.38696461016164e-06, "loss": 0.9128, "step": 84690 }, { "epoch": 0.6131150151650054, "grad_norm": 0.15341922640800476, "learning_rate": 4.386892223501054e-06, "loss": 0.9072, "step": 84700 }, { "epoch": 0.6131874018255916, "grad_norm": 0.15757760405540466, "learning_rate": 4.386819836840467e-06, "loss": 0.895, "step": 84710 }, { "epoch": 0.6132597884861778, "grad_norm": 0.1582041084766388, "learning_rate": 4.386747450179881e-06, "loss": 0.9047, "step": 84720 }, { "epoch": 0.613332175146764, "grad_norm": 0.19254916906356812, "learning_rate": 4.386675063519295e-06, "loss": 0.8918, "step": 84730 }, { "epoch": 0.6134045618073501, "grad_norm": 0.14358896017074585, "learning_rate": 4.386602676858709e-06, "loss": 0.8909, "step": 84740 }, { "epoch": 0.6134769484679363, "grad_norm": 0.15055955946445465, "learning_rate": 4.3865302901981225e-06, "loss": 0.9093, "step": 84750 }, { "epoch": 0.6135493351285225, "grad_norm": 0.1698206514120102, "learning_rate": 4.386457903537536e-06, "loss": 0.8958, "step": 84760 }, { "epoch": 0.6136217217891087, "grad_norm": 0.1727665215730667, "learning_rate": 4.386385516876951e-06, "loss": 0.9116, "step": 84770 }, { "epoch": 0.6136941084496949, "grad_norm": 0.1568523496389389, "learning_rate": 4.386313130216364e-06, "loss": 0.9059, "step": 84780 }, { "epoch": 0.613766495110281, "grad_norm": 0.17023316025733948, "learning_rate": 4.386240743555778e-06, "loss": 0.9019, "step": 84790 }, { "epoch": 0.6138388817708673, "grad_norm": 0.15724539756774902, "learning_rate": 4.3861683568951914e-06, "loss": 0.9167, "step": 84800 }, { "epoch": 0.6139112684314535, "grad_norm": 0.17113758623600006, "learning_rate": 4.386095970234606e-06, "loss": 0.8977, "step": 84810 }, { "epoch": 0.6139836550920397, "grad_norm": 0.166314959526062, "learning_rate": 4.3860235835740195e-06, "loss": 0.9104, "step": 84820 }, { "epoch": 0.6140560417526258, "grad_norm": 0.1817166805267334, "learning_rate": 4.385951196913433e-06, "loss": 0.902, "step": 84830 }, { "epoch": 0.614128428413212, "grad_norm": 0.1507468968629837, "learning_rate": 4.385878810252847e-06, "loss": 0.8974, "step": 84840 }, { "epoch": 0.6142008150737982, "grad_norm": 0.15710368752479553, "learning_rate": 4.385806423592261e-06, "loss": 0.9036, "step": 84850 }, { "epoch": 0.6142732017343844, "grad_norm": 0.14728538691997528, "learning_rate": 4.385734036931675e-06, "loss": 0.9031, "step": 84860 }, { "epoch": 0.6143455883949706, "grad_norm": 0.15462151169776917, "learning_rate": 4.3856616502710885e-06, "loss": 0.9021, "step": 84870 }, { "epoch": 0.6144179750555567, "grad_norm": 0.1638130396604538, "learning_rate": 4.385589263610502e-06, "loss": 0.92, "step": 84880 }, { "epoch": 0.6144903617161429, "grad_norm": 0.1549723744392395, "learning_rate": 4.3855168769499165e-06, "loss": 0.9028, "step": 84890 }, { "epoch": 0.6145627483767291, "grad_norm": 0.15805752575397491, "learning_rate": 4.38544449028933e-06, "loss": 0.899, "step": 84900 }, { "epoch": 0.6146351350373154, "grad_norm": 0.17812702059745789, "learning_rate": 4.385372103628744e-06, "loss": 0.9045, "step": 84910 }, { "epoch": 0.6147075216979015, "grad_norm": 0.15838688611984253, "learning_rate": 4.385299716968157e-06, "loss": 0.9154, "step": 84920 }, { "epoch": 0.6147799083584877, "grad_norm": 0.16311374306678772, "learning_rate": 4.385227330307572e-06, "loss": 0.8976, "step": 84930 }, { "epoch": 0.6148522950190739, "grad_norm": 0.15058547258377075, "learning_rate": 4.3851549436469855e-06, "loss": 0.8918, "step": 84940 }, { "epoch": 0.6149246816796601, "grad_norm": 0.15357309579849243, "learning_rate": 4.385082556986399e-06, "loss": 0.9086, "step": 84950 }, { "epoch": 0.6149970683402463, "grad_norm": 0.15655238926410675, "learning_rate": 4.385010170325813e-06, "loss": 0.9006, "step": 84960 }, { "epoch": 0.6150694550008324, "grad_norm": 0.17502596974372864, "learning_rate": 4.384937783665226e-06, "loss": 0.8993, "step": 84970 }, { "epoch": 0.6151418416614186, "grad_norm": 0.1721956878900528, "learning_rate": 4.38486539700464e-06, "loss": 0.8934, "step": 84980 }, { "epoch": 0.6152142283220048, "grad_norm": 0.1560133844614029, "learning_rate": 4.3847930103440535e-06, "loss": 0.9097, "step": 84990 }, { "epoch": 0.615286614982591, "grad_norm": 0.15889939665794373, "learning_rate": 4.384720623683468e-06, "loss": 0.8942, "step": 85000 }, { "epoch": 0.6153590016431773, "grad_norm": 0.16215567290782928, "learning_rate": 4.384648237022882e-06, "loss": 0.9017, "step": 85010 }, { "epoch": 0.6154313883037634, "grad_norm": 0.16176781058311462, "learning_rate": 4.384575850362295e-06, "loss": 0.8989, "step": 85020 }, { "epoch": 0.6155037749643496, "grad_norm": 0.1544492542743683, "learning_rate": 4.384503463701709e-06, "loss": 0.9044, "step": 85030 }, { "epoch": 0.6155761616249358, "grad_norm": 0.14962157607078552, "learning_rate": 4.384431077041123e-06, "loss": 0.8937, "step": 85040 }, { "epoch": 0.615648548285522, "grad_norm": 0.16228331625461578, "learning_rate": 4.384358690380537e-06, "loss": 0.9054, "step": 85050 }, { "epoch": 0.6157209349461081, "grad_norm": 0.1617509126663208, "learning_rate": 4.3842863037199506e-06, "loss": 0.8999, "step": 85060 }, { "epoch": 0.6157933216066943, "grad_norm": 0.15040123462677002, "learning_rate": 4.384213917059364e-06, "loss": 0.9004, "step": 85070 }, { "epoch": 0.6158657082672805, "grad_norm": 0.15811018645763397, "learning_rate": 4.384141530398779e-06, "loss": 0.8944, "step": 85080 }, { "epoch": 0.6159380949278667, "grad_norm": 0.17127995193004608, "learning_rate": 4.384069143738192e-06, "loss": 0.8957, "step": 85090 }, { "epoch": 0.6160104815884528, "grad_norm": 0.16223451495170593, "learning_rate": 4.383996757077606e-06, "loss": 0.8946, "step": 85100 }, { "epoch": 0.616082868249039, "grad_norm": 0.1593468189239502, "learning_rate": 4.3839243704170195e-06, "loss": 0.908, "step": 85110 }, { "epoch": 0.6161552549096253, "grad_norm": 0.16201543807983398, "learning_rate": 4.383851983756434e-06, "loss": 0.9083, "step": 85120 }, { "epoch": 0.6162276415702115, "grad_norm": 0.15319061279296875, "learning_rate": 4.3837795970958476e-06, "loss": 0.9078, "step": 85130 }, { "epoch": 0.6163000282307977, "grad_norm": 0.15424944460391998, "learning_rate": 4.383707210435261e-06, "loss": 0.9044, "step": 85140 }, { "epoch": 0.6163724148913838, "grad_norm": 0.17157965898513794, "learning_rate": 4.383634823774675e-06, "loss": 0.9082, "step": 85150 }, { "epoch": 0.61644480155197, "grad_norm": 0.15128067135810852, "learning_rate": 4.383562437114089e-06, "loss": 0.9109, "step": 85160 }, { "epoch": 0.6165171882125562, "grad_norm": 0.170233815908432, "learning_rate": 4.383490050453503e-06, "loss": 0.901, "step": 85170 }, { "epoch": 0.6165895748731424, "grad_norm": 0.16256168484687805, "learning_rate": 4.3834176637929165e-06, "loss": 0.9104, "step": 85180 }, { "epoch": 0.6166619615337285, "grad_norm": 0.16661547124385834, "learning_rate": 4.38334527713233e-06, "loss": 0.9079, "step": 85190 }, { "epoch": 0.6167343481943147, "grad_norm": 0.17816273868083954, "learning_rate": 4.3832728904717446e-06, "loss": 0.9076, "step": 85200 }, { "epoch": 0.6168067348549009, "grad_norm": 0.16999828815460205, "learning_rate": 4.383200503811158e-06, "loss": 0.9152, "step": 85210 }, { "epoch": 0.6168791215154871, "grad_norm": 0.14718665182590485, "learning_rate": 4.383128117150572e-06, "loss": 0.8982, "step": 85220 }, { "epoch": 0.6169515081760734, "grad_norm": 0.2667122781276703, "learning_rate": 4.383055730489985e-06, "loss": 0.9059, "step": 85230 }, { "epoch": 0.6170238948366595, "grad_norm": 0.1408415138721466, "learning_rate": 4.3829833438294e-06, "loss": 0.9018, "step": 85240 }, { "epoch": 0.6170962814972457, "grad_norm": 0.16204825043678284, "learning_rate": 4.3829109571688135e-06, "loss": 0.8952, "step": 85250 }, { "epoch": 0.6171686681578319, "grad_norm": 0.1726893186569214, "learning_rate": 4.382838570508227e-06, "loss": 0.9127, "step": 85260 }, { "epoch": 0.6172410548184181, "grad_norm": 0.1730419099330902, "learning_rate": 4.382766183847641e-06, "loss": 0.9003, "step": 85270 }, { "epoch": 0.6173134414790042, "grad_norm": 0.17287707328796387, "learning_rate": 4.382693797187055e-06, "loss": 0.8961, "step": 85280 }, { "epoch": 0.6173858281395904, "grad_norm": 0.1501106470823288, "learning_rate": 4.382621410526469e-06, "loss": 0.9104, "step": 85290 }, { "epoch": 0.6174582148001766, "grad_norm": 0.16863562166690826, "learning_rate": 4.382549023865882e-06, "loss": 0.8988, "step": 85300 }, { "epoch": 0.6175306014607628, "grad_norm": 0.1819497048854828, "learning_rate": 4.382476637205296e-06, "loss": 0.9, "step": 85310 }, { "epoch": 0.617602988121349, "grad_norm": 0.15243135392665863, "learning_rate": 4.3824042505447105e-06, "loss": 0.9003, "step": 85320 }, { "epoch": 0.6176753747819352, "grad_norm": 0.1578928381204605, "learning_rate": 4.382331863884124e-06, "loss": 0.9003, "step": 85330 }, { "epoch": 0.6177477614425214, "grad_norm": 0.15596744418144226, "learning_rate": 4.382259477223538e-06, "loss": 0.9036, "step": 85340 }, { "epoch": 0.6178201481031076, "grad_norm": 0.16625262796878815, "learning_rate": 4.382187090562951e-06, "loss": 0.9022, "step": 85350 }, { "epoch": 0.6178925347636938, "grad_norm": 0.14543043076992035, "learning_rate": 4.382114703902365e-06, "loss": 0.8922, "step": 85360 }, { "epoch": 0.61796492142428, "grad_norm": 0.14668937027454376, "learning_rate": 4.3820423172417794e-06, "loss": 0.8963, "step": 85370 }, { "epoch": 0.6180373080848661, "grad_norm": 0.15720996260643005, "learning_rate": 4.381969930581193e-06, "loss": 0.9104, "step": 85380 }, { "epoch": 0.6181096947454523, "grad_norm": 0.20788639783859253, "learning_rate": 4.381897543920607e-06, "loss": 0.908, "step": 85390 }, { "epoch": 0.6181820814060385, "grad_norm": 0.1491943746805191, "learning_rate": 4.38182515726002e-06, "loss": 0.9125, "step": 85400 }, { "epoch": 0.6182544680666247, "grad_norm": 0.16570164263248444, "learning_rate": 4.381752770599435e-06, "loss": 0.8988, "step": 85410 }, { "epoch": 0.6183268547272108, "grad_norm": 0.15333351492881775, "learning_rate": 4.381680383938848e-06, "loss": 0.9015, "step": 85420 }, { "epoch": 0.618399241387797, "grad_norm": 0.15450377762317657, "learning_rate": 4.381607997278262e-06, "loss": 0.9206, "step": 85430 }, { "epoch": 0.6184716280483833, "grad_norm": 0.15417149662971497, "learning_rate": 4.381535610617676e-06, "loss": 0.9062, "step": 85440 }, { "epoch": 0.6185440147089695, "grad_norm": 0.1625894010066986, "learning_rate": 4.38146322395709e-06, "loss": 0.9074, "step": 85450 }, { "epoch": 0.6186164013695556, "grad_norm": 0.15775802731513977, "learning_rate": 4.381390837296504e-06, "loss": 0.9068, "step": 85460 }, { "epoch": 0.6186887880301418, "grad_norm": 0.16794852912425995, "learning_rate": 4.381318450635917e-06, "loss": 0.896, "step": 85470 }, { "epoch": 0.618761174690728, "grad_norm": 0.158334419131279, "learning_rate": 4.381246063975331e-06, "loss": 0.8872, "step": 85480 }, { "epoch": 0.6188335613513142, "grad_norm": 0.15617749094963074, "learning_rate": 4.3811736773147445e-06, "loss": 0.9026, "step": 85490 }, { "epoch": 0.6189059480119004, "grad_norm": 0.16775725781917572, "learning_rate": 4.381101290654158e-06, "loss": 0.9062, "step": 85500 }, { "epoch": 0.6189783346724865, "grad_norm": 0.16923066973686218, "learning_rate": 4.381028903993572e-06, "loss": 0.9077, "step": 85510 }, { "epoch": 0.6190507213330727, "grad_norm": 0.16332891583442688, "learning_rate": 4.380956517332986e-06, "loss": 0.8984, "step": 85520 }, { "epoch": 0.6191231079936589, "grad_norm": 0.1725182682275772, "learning_rate": 4.3808841306724e-06, "loss": 0.8937, "step": 85530 }, { "epoch": 0.6191954946542452, "grad_norm": 0.17352226376533508, "learning_rate": 4.3808117440118134e-06, "loss": 0.9188, "step": 85540 }, { "epoch": 0.6192678813148313, "grad_norm": 0.1709306836128235, "learning_rate": 4.380739357351227e-06, "loss": 0.8912, "step": 85550 }, { "epoch": 0.6193402679754175, "grad_norm": 0.2021743208169937, "learning_rate": 4.3806669706906415e-06, "loss": 0.9081, "step": 85560 }, { "epoch": 0.6194126546360037, "grad_norm": 0.14920145273208618, "learning_rate": 4.380594584030055e-06, "loss": 0.8997, "step": 85570 }, { "epoch": 0.6194850412965899, "grad_norm": 0.1603516936302185, "learning_rate": 4.380522197369469e-06, "loss": 0.9109, "step": 85580 }, { "epoch": 0.619557427957176, "grad_norm": 0.14569969475269318, "learning_rate": 4.380449810708882e-06, "loss": 0.896, "step": 85590 }, { "epoch": 0.6196298146177622, "grad_norm": 0.16068822145462036, "learning_rate": 4.380377424048297e-06, "loss": 0.8974, "step": 85600 }, { "epoch": 0.6197022012783484, "grad_norm": 0.16494551301002502, "learning_rate": 4.3803050373877105e-06, "loss": 0.8991, "step": 85610 }, { "epoch": 0.6197745879389346, "grad_norm": 0.15262630581855774, "learning_rate": 4.380232650727124e-06, "loss": 0.8966, "step": 85620 }, { "epoch": 0.6198469745995208, "grad_norm": 0.15079142153263092, "learning_rate": 4.380160264066538e-06, "loss": 0.8946, "step": 85630 }, { "epoch": 0.6199193612601069, "grad_norm": 0.15024054050445557, "learning_rate": 4.380087877405952e-06, "loss": 0.9155, "step": 85640 }, { "epoch": 0.6199917479206932, "grad_norm": 0.17157381772994995, "learning_rate": 4.380015490745366e-06, "loss": 0.9006, "step": 85650 }, { "epoch": 0.6200641345812794, "grad_norm": 0.1453818827867508, "learning_rate": 4.379943104084779e-06, "loss": 0.9009, "step": 85660 }, { "epoch": 0.6201365212418656, "grad_norm": 0.15377745032310486, "learning_rate": 4.379870717424193e-06, "loss": 0.8998, "step": 85670 }, { "epoch": 0.6202089079024518, "grad_norm": 0.16755272448062897, "learning_rate": 4.3797983307636075e-06, "loss": 0.8894, "step": 85680 }, { "epoch": 0.6202812945630379, "grad_norm": 0.1771673858165741, "learning_rate": 4.379725944103021e-06, "loss": 0.9014, "step": 85690 }, { "epoch": 0.6203536812236241, "grad_norm": 0.1448373943567276, "learning_rate": 4.379653557442435e-06, "loss": 0.8946, "step": 85700 }, { "epoch": 0.6204260678842103, "grad_norm": 0.15097440779209137, "learning_rate": 4.379581170781848e-06, "loss": 0.8906, "step": 85710 }, { "epoch": 0.6204984545447965, "grad_norm": 0.1607360988855362, "learning_rate": 4.379508784121263e-06, "loss": 0.9011, "step": 85720 }, { "epoch": 0.6205708412053826, "grad_norm": 0.16542325913906097, "learning_rate": 4.379436397460676e-06, "loss": 0.9062, "step": 85730 }, { "epoch": 0.6206432278659688, "grad_norm": 0.18820612132549286, "learning_rate": 4.37936401080009e-06, "loss": 0.9047, "step": 85740 }, { "epoch": 0.620715614526555, "grad_norm": 0.15945078432559967, "learning_rate": 4.379291624139504e-06, "loss": 0.9064, "step": 85750 }, { "epoch": 0.6207880011871413, "grad_norm": 0.2132113128900528, "learning_rate": 4.379219237478918e-06, "loss": 0.9078, "step": 85760 }, { "epoch": 0.6208603878477275, "grad_norm": 0.15202796459197998, "learning_rate": 4.379146850818332e-06, "loss": 0.901, "step": 85770 }, { "epoch": 0.6209327745083136, "grad_norm": 0.15480341017246246, "learning_rate": 4.379074464157745e-06, "loss": 0.9037, "step": 85780 }, { "epoch": 0.6210051611688998, "grad_norm": 0.22787299752235413, "learning_rate": 4.379002077497159e-06, "loss": 0.9112, "step": 85790 }, { "epoch": 0.621077547829486, "grad_norm": 0.16583016514778137, "learning_rate": 4.378929690836573e-06, "loss": 0.9095, "step": 85800 }, { "epoch": 0.6211499344900722, "grad_norm": 0.1868494153022766, "learning_rate": 4.378857304175987e-06, "loss": 0.8924, "step": 85810 }, { "epoch": 0.6212223211506583, "grad_norm": 0.18490175902843475, "learning_rate": 4.378784917515401e-06, "loss": 0.8871, "step": 85820 }, { "epoch": 0.6212947078112445, "grad_norm": 0.15634137392044067, "learning_rate": 4.378712530854814e-06, "loss": 0.9004, "step": 85830 }, { "epoch": 0.6213670944718307, "grad_norm": 0.24686381220817566, "learning_rate": 4.378640144194229e-06, "loss": 0.8762, "step": 85840 }, { "epoch": 0.6214394811324169, "grad_norm": 0.1837376058101654, "learning_rate": 4.378567757533642e-06, "loss": 0.9133, "step": 85850 }, { "epoch": 0.6215118677930032, "grad_norm": 0.16701044142246246, "learning_rate": 4.378495370873056e-06, "loss": 0.9067, "step": 85860 }, { "epoch": 0.6215842544535893, "grad_norm": 0.15616366267204285, "learning_rate": 4.3784229842124696e-06, "loss": 0.9053, "step": 85870 }, { "epoch": 0.6216566411141755, "grad_norm": 0.1612260639667511, "learning_rate": 4.378350597551884e-06, "loss": 0.8892, "step": 85880 }, { "epoch": 0.6217290277747617, "grad_norm": 0.17558954656124115, "learning_rate": 4.378278210891298e-06, "loss": 0.9048, "step": 85890 }, { "epoch": 0.6218014144353479, "grad_norm": 0.175954669713974, "learning_rate": 4.378205824230711e-06, "loss": 0.8994, "step": 85900 }, { "epoch": 0.621873801095934, "grad_norm": 0.1546930968761444, "learning_rate": 4.378133437570125e-06, "loss": 0.8875, "step": 85910 }, { "epoch": 0.6219461877565202, "grad_norm": 0.1583179086446762, "learning_rate": 4.378061050909539e-06, "loss": 0.8928, "step": 85920 }, { "epoch": 0.6220185744171064, "grad_norm": 0.15575550496578217, "learning_rate": 4.377988664248953e-06, "loss": 0.9023, "step": 85930 }, { "epoch": 0.6220909610776926, "grad_norm": 0.1556166708469391, "learning_rate": 4.3779162775883666e-06, "loss": 0.9032, "step": 85940 }, { "epoch": 0.6221633477382787, "grad_norm": 0.16194866597652435, "learning_rate": 4.37784389092778e-06, "loss": 0.9187, "step": 85950 }, { "epoch": 0.6222357343988649, "grad_norm": 0.15536653995513916, "learning_rate": 4.377771504267194e-06, "loss": 0.907, "step": 85960 }, { "epoch": 0.6223081210594512, "grad_norm": 0.15262702107429504, "learning_rate": 4.377699117606608e-06, "loss": 0.8914, "step": 85970 }, { "epoch": 0.6223805077200374, "grad_norm": 0.26444554328918457, "learning_rate": 4.377626730946022e-06, "loss": 0.9096, "step": 85980 }, { "epoch": 0.6224528943806236, "grad_norm": 0.14140740036964417, "learning_rate": 4.3775543442854355e-06, "loss": 0.898, "step": 85990 }, { "epoch": 0.6225252810412097, "grad_norm": 0.17096258699893951, "learning_rate": 4.377481957624849e-06, "loss": 0.9021, "step": 86000 }, { "epoch": 0.6225976677017959, "grad_norm": 0.16008111834526062, "learning_rate": 4.3774095709642636e-06, "loss": 0.8982, "step": 86010 }, { "epoch": 0.6226700543623821, "grad_norm": 0.1551225781440735, "learning_rate": 4.377337184303676e-06, "loss": 0.9093, "step": 86020 }, { "epoch": 0.6227424410229683, "grad_norm": 0.14841316640377045, "learning_rate": 4.377264797643091e-06, "loss": 0.902, "step": 86030 }, { "epoch": 0.6228148276835545, "grad_norm": 0.18173016607761383, "learning_rate": 4.377192410982504e-06, "loss": 0.9144, "step": 86040 }, { "epoch": 0.6228872143441406, "grad_norm": 0.14934363961219788, "learning_rate": 4.377120024321918e-06, "loss": 0.8974, "step": 86050 }, { "epoch": 0.6229596010047268, "grad_norm": 0.17030176520347595, "learning_rate": 4.377047637661332e-06, "loss": 0.8943, "step": 86060 }, { "epoch": 0.623031987665313, "grad_norm": 0.16661794483661652, "learning_rate": 4.376975251000746e-06, "loss": 0.9049, "step": 86070 }, { "epoch": 0.6231043743258993, "grad_norm": 0.16051805019378662, "learning_rate": 4.37690286434016e-06, "loss": 0.899, "step": 86080 }, { "epoch": 0.6231767609864854, "grad_norm": 0.16653110086917877, "learning_rate": 4.376830477679573e-06, "loss": 0.9057, "step": 86090 }, { "epoch": 0.6232491476470716, "grad_norm": 0.16507036983966827, "learning_rate": 4.376758091018987e-06, "loss": 0.907, "step": 86100 }, { "epoch": 0.6233215343076578, "grad_norm": 0.16348743438720703, "learning_rate": 4.3766857043584014e-06, "loss": 0.9077, "step": 86110 }, { "epoch": 0.623393920968244, "grad_norm": 0.15697596967220306, "learning_rate": 4.376613317697815e-06, "loss": 0.9028, "step": 86120 }, { "epoch": 0.6234663076288302, "grad_norm": 0.15257367491722107, "learning_rate": 4.376540931037229e-06, "loss": 0.896, "step": 86130 }, { "epoch": 0.6235386942894163, "grad_norm": 0.16256658732891083, "learning_rate": 4.376468544376642e-06, "loss": 0.9138, "step": 86140 }, { "epoch": 0.6236110809500025, "grad_norm": 0.19833579659461975, "learning_rate": 4.376396157716056e-06, "loss": 0.9078, "step": 86150 }, { "epoch": 0.6236834676105887, "grad_norm": 0.15901575982570648, "learning_rate": 4.37632377105547e-06, "loss": 0.8936, "step": 86160 }, { "epoch": 0.6237558542711749, "grad_norm": 0.16411714255809784, "learning_rate": 4.376251384394884e-06, "loss": 0.9072, "step": 86170 }, { "epoch": 0.6238282409317611, "grad_norm": 0.1543898582458496, "learning_rate": 4.376178997734298e-06, "loss": 0.91, "step": 86180 }, { "epoch": 0.6239006275923473, "grad_norm": 0.15518857538700104, "learning_rate": 4.376106611073711e-06, "loss": 0.9141, "step": 86190 }, { "epoch": 0.6239730142529335, "grad_norm": 0.15521982312202454, "learning_rate": 4.376034224413126e-06, "loss": 0.9045, "step": 86200 }, { "epoch": 0.6240454009135197, "grad_norm": 0.1619000881910324, "learning_rate": 4.375961837752539e-06, "loss": 0.8963, "step": 86210 }, { "epoch": 0.6241177875741059, "grad_norm": 0.15840552747249603, "learning_rate": 4.375889451091953e-06, "loss": 0.8982, "step": 86220 }, { "epoch": 0.624190174234692, "grad_norm": 0.1774725317955017, "learning_rate": 4.3758170644313665e-06, "loss": 0.9161, "step": 86230 }, { "epoch": 0.6242625608952782, "grad_norm": 0.160127192735672, "learning_rate": 4.375744677770781e-06, "loss": 0.8993, "step": 86240 }, { "epoch": 0.6243349475558644, "grad_norm": 0.1587544083595276, "learning_rate": 4.375672291110195e-06, "loss": 0.892, "step": 86250 }, { "epoch": 0.6244073342164506, "grad_norm": 0.1745489090681076, "learning_rate": 4.375599904449608e-06, "loss": 0.9089, "step": 86260 }, { "epoch": 0.6244797208770367, "grad_norm": 0.1463412344455719, "learning_rate": 4.375527517789022e-06, "loss": 0.8934, "step": 86270 }, { "epoch": 0.6245521075376229, "grad_norm": 0.16872276365756989, "learning_rate": 4.375455131128436e-06, "loss": 0.9004, "step": 86280 }, { "epoch": 0.6246244941982092, "grad_norm": 0.1642751842737198, "learning_rate": 4.37538274446785e-06, "loss": 0.8958, "step": 86290 }, { "epoch": 0.6246968808587954, "grad_norm": 0.17172108590602875, "learning_rate": 4.3753103578072635e-06, "loss": 0.9021, "step": 86300 }, { "epoch": 0.6247692675193816, "grad_norm": 0.1646968573331833, "learning_rate": 4.375237971146677e-06, "loss": 0.8952, "step": 86310 }, { "epoch": 0.6248416541799677, "grad_norm": 0.15084651112556458, "learning_rate": 4.375165584486092e-06, "loss": 0.894, "step": 86320 }, { "epoch": 0.6249140408405539, "grad_norm": 0.15649060904979706, "learning_rate": 4.375093197825505e-06, "loss": 0.9019, "step": 86330 }, { "epoch": 0.6249864275011401, "grad_norm": 0.16130280494689941, "learning_rate": 4.375020811164919e-06, "loss": 0.9091, "step": 86340 }, { "epoch": 0.6250588141617263, "grad_norm": 0.15296019613742828, "learning_rate": 4.3749484245043325e-06, "loss": 0.9127, "step": 86350 }, { "epoch": 0.6251312008223124, "grad_norm": 0.15848460793495178, "learning_rate": 4.374876037843747e-06, "loss": 0.8889, "step": 86360 }, { "epoch": 0.6252035874828986, "grad_norm": 0.15533143281936646, "learning_rate": 4.3748036511831605e-06, "loss": 0.9016, "step": 86370 }, { "epoch": 0.6252759741434848, "grad_norm": 0.15972629189491272, "learning_rate": 4.374731264522574e-06, "loss": 0.9124, "step": 86380 }, { "epoch": 0.6253483608040711, "grad_norm": 0.18219304084777832, "learning_rate": 4.374658877861988e-06, "loss": 0.8992, "step": 86390 }, { "epoch": 0.6254207474646573, "grad_norm": 0.14863041043281555, "learning_rate": 4.374586491201402e-06, "loss": 0.8854, "step": 86400 }, { "epoch": 0.6254931341252434, "grad_norm": 0.1658528745174408, "learning_rate": 4.374514104540816e-06, "loss": 0.8979, "step": 86410 }, { "epoch": 0.6255655207858296, "grad_norm": 0.15939390659332275, "learning_rate": 4.3744417178802295e-06, "loss": 0.9003, "step": 86420 }, { "epoch": 0.6256379074464158, "grad_norm": 0.17238759994506836, "learning_rate": 4.374369331219643e-06, "loss": 0.9056, "step": 86430 }, { "epoch": 0.625710294107002, "grad_norm": 0.1758257895708084, "learning_rate": 4.3742969445590575e-06, "loss": 0.9052, "step": 86440 }, { "epoch": 0.6257826807675881, "grad_norm": 0.1517690122127533, "learning_rate": 4.374224557898471e-06, "loss": 0.8987, "step": 86450 }, { "epoch": 0.6258550674281743, "grad_norm": 0.16682100296020508, "learning_rate": 4.374152171237885e-06, "loss": 0.9072, "step": 86460 }, { "epoch": 0.6259274540887605, "grad_norm": 0.14764508605003357, "learning_rate": 4.374079784577298e-06, "loss": 0.8948, "step": 86470 }, { "epoch": 0.6259998407493467, "grad_norm": 0.1788049340248108, "learning_rate": 4.374007397916713e-06, "loss": 0.8991, "step": 86480 }, { "epoch": 0.6260722274099328, "grad_norm": 0.15142115950584412, "learning_rate": 4.3739350112561265e-06, "loss": 0.9062, "step": 86490 }, { "epoch": 0.6261446140705191, "grad_norm": 0.1992928683757782, "learning_rate": 4.37386262459554e-06, "loss": 0.9139, "step": 86500 }, { "epoch": 0.6262170007311053, "grad_norm": 0.1716473549604416, "learning_rate": 4.373790237934954e-06, "loss": 0.8933, "step": 86510 }, { "epoch": 0.6262893873916915, "grad_norm": 0.19439038634300232, "learning_rate": 4.373717851274368e-06, "loss": 0.8969, "step": 86520 }, { "epoch": 0.6263617740522777, "grad_norm": 0.13993479311466217, "learning_rate": 4.373645464613782e-06, "loss": 0.9131, "step": 86530 }, { "epoch": 0.6264341607128638, "grad_norm": 0.1706664115190506, "learning_rate": 4.373573077953195e-06, "loss": 0.9075, "step": 86540 }, { "epoch": 0.62650654737345, "grad_norm": 0.17721545696258545, "learning_rate": 4.373500691292609e-06, "loss": 0.9067, "step": 86550 }, { "epoch": 0.6265789340340362, "grad_norm": 0.1569727510213852, "learning_rate": 4.373428304632023e-06, "loss": 0.9073, "step": 86560 }, { "epoch": 0.6266513206946224, "grad_norm": 0.15974320471286774, "learning_rate": 4.373355917971436e-06, "loss": 0.9023, "step": 86570 }, { "epoch": 0.6267237073552085, "grad_norm": 0.14838136732578278, "learning_rate": 4.37328353131085e-06, "loss": 0.8885, "step": 86580 }, { "epoch": 0.6267960940157947, "grad_norm": 0.1551826149225235, "learning_rate": 4.373211144650264e-06, "loss": 0.905, "step": 86590 }, { "epoch": 0.6268684806763809, "grad_norm": 0.158196359872818, "learning_rate": 4.373138757989678e-06, "loss": 0.8934, "step": 86600 }, { "epoch": 0.6269408673369672, "grad_norm": 0.17901091277599335, "learning_rate": 4.3730663713290916e-06, "loss": 0.919, "step": 86610 }, { "epoch": 0.6270132539975534, "grad_norm": 0.1554396152496338, "learning_rate": 4.372993984668505e-06, "loss": 0.896, "step": 86620 }, { "epoch": 0.6270856406581395, "grad_norm": 0.1537899672985077, "learning_rate": 4.37292159800792e-06, "loss": 0.8975, "step": 86630 }, { "epoch": 0.6271580273187257, "grad_norm": 0.16763430833816528, "learning_rate": 4.372849211347333e-06, "loss": 0.8944, "step": 86640 }, { "epoch": 0.6272304139793119, "grad_norm": 0.15677796304225922, "learning_rate": 4.372776824686747e-06, "loss": 0.9166, "step": 86650 }, { "epoch": 0.6273028006398981, "grad_norm": 0.15729351341724396, "learning_rate": 4.3727044380261605e-06, "loss": 0.9052, "step": 86660 }, { "epoch": 0.6273751873004842, "grad_norm": 0.16490468382835388, "learning_rate": 4.372632051365575e-06, "loss": 0.8986, "step": 86670 }, { "epoch": 0.6274475739610704, "grad_norm": 0.16459213197231293, "learning_rate": 4.3725596647049886e-06, "loss": 0.8967, "step": 86680 }, { "epoch": 0.6275199606216566, "grad_norm": 0.16037428379058838, "learning_rate": 4.372487278044402e-06, "loss": 0.9144, "step": 86690 }, { "epoch": 0.6275923472822428, "grad_norm": 0.16006019711494446, "learning_rate": 4.372414891383816e-06, "loss": 0.9089, "step": 86700 }, { "epoch": 0.6276647339428291, "grad_norm": 0.1548503190279007, "learning_rate": 4.37234250472323e-06, "loss": 0.9, "step": 86710 }, { "epoch": 0.6277371206034152, "grad_norm": 0.21827849745750427, "learning_rate": 4.372270118062644e-06, "loss": 0.8975, "step": 86720 }, { "epoch": 0.6278095072640014, "grad_norm": 0.16216625273227692, "learning_rate": 4.3721977314020575e-06, "loss": 0.9022, "step": 86730 }, { "epoch": 0.6278818939245876, "grad_norm": 0.17740298807621002, "learning_rate": 4.372125344741471e-06, "loss": 0.9114, "step": 86740 }, { "epoch": 0.6279542805851738, "grad_norm": 0.15223225951194763, "learning_rate": 4.372052958080885e-06, "loss": 0.886, "step": 86750 }, { "epoch": 0.62802666724576, "grad_norm": 0.1714017689228058, "learning_rate": 4.371980571420299e-06, "loss": 0.8929, "step": 86760 }, { "epoch": 0.6280990539063461, "grad_norm": 0.17561236023902893, "learning_rate": 4.371908184759713e-06, "loss": 0.9036, "step": 86770 }, { "epoch": 0.6281714405669323, "grad_norm": 0.16735588014125824, "learning_rate": 4.371835798099126e-06, "loss": 0.9127, "step": 86780 }, { "epoch": 0.6282438272275185, "grad_norm": 0.15860114991664886, "learning_rate": 4.37176341143854e-06, "loss": 0.9031, "step": 86790 }, { "epoch": 0.6283162138881047, "grad_norm": 0.23566380143165588, "learning_rate": 4.3716910247779545e-06, "loss": 0.9014, "step": 86800 }, { "epoch": 0.6283886005486908, "grad_norm": 0.18236784636974335, "learning_rate": 4.371618638117368e-06, "loss": 0.8992, "step": 86810 }, { "epoch": 0.6284609872092771, "grad_norm": 0.1653483808040619, "learning_rate": 4.371546251456782e-06, "loss": 0.9042, "step": 86820 }, { "epoch": 0.6285333738698633, "grad_norm": 0.15193116664886475, "learning_rate": 4.371473864796195e-06, "loss": 0.8964, "step": 86830 }, { "epoch": 0.6286057605304495, "grad_norm": 0.15298867225646973, "learning_rate": 4.37140147813561e-06, "loss": 0.8856, "step": 86840 }, { "epoch": 0.6286781471910357, "grad_norm": 0.15323877334594727, "learning_rate": 4.3713290914750234e-06, "loss": 0.9021, "step": 86850 }, { "epoch": 0.6287505338516218, "grad_norm": 0.159663587808609, "learning_rate": 4.371256704814437e-06, "loss": 0.8852, "step": 86860 }, { "epoch": 0.628822920512208, "grad_norm": 0.1943221390247345, "learning_rate": 4.371184318153851e-06, "loss": 0.9021, "step": 86870 }, { "epoch": 0.6288953071727942, "grad_norm": 0.16797471046447754, "learning_rate": 4.371111931493265e-06, "loss": 0.9143, "step": 86880 }, { "epoch": 0.6289676938333804, "grad_norm": 0.17109879851341248, "learning_rate": 4.371039544832679e-06, "loss": 0.905, "step": 86890 }, { "epoch": 0.6290400804939665, "grad_norm": 0.17516149580478668, "learning_rate": 4.370967158172092e-06, "loss": 0.9148, "step": 86900 }, { "epoch": 0.6291124671545527, "grad_norm": 0.15488648414611816, "learning_rate": 4.370894771511506e-06, "loss": 0.9063, "step": 86910 }, { "epoch": 0.629184853815139, "grad_norm": 0.16343455016613007, "learning_rate": 4.3708223848509204e-06, "loss": 0.9098, "step": 86920 }, { "epoch": 0.6292572404757252, "grad_norm": 0.1545439064502716, "learning_rate": 4.370749998190334e-06, "loss": 0.9044, "step": 86930 }, { "epoch": 0.6293296271363114, "grad_norm": 0.14563705027103424, "learning_rate": 4.370677611529748e-06, "loss": 0.8909, "step": 86940 }, { "epoch": 0.6294020137968975, "grad_norm": 0.16632063686847687, "learning_rate": 4.370605224869161e-06, "loss": 0.9051, "step": 86950 }, { "epoch": 0.6294744004574837, "grad_norm": 0.1433282047510147, "learning_rate": 4.370532838208576e-06, "loss": 0.9143, "step": 86960 }, { "epoch": 0.6295467871180699, "grad_norm": 0.16106697916984558, "learning_rate": 4.370460451547989e-06, "loss": 0.8937, "step": 86970 }, { "epoch": 0.6296191737786561, "grad_norm": 0.15034160017967224, "learning_rate": 4.370388064887403e-06, "loss": 0.9059, "step": 86980 }, { "epoch": 0.6296915604392422, "grad_norm": 0.15533864498138428, "learning_rate": 4.370315678226817e-06, "loss": 0.9062, "step": 86990 }, { "epoch": 0.6297639470998284, "grad_norm": 0.17288139462471008, "learning_rate": 4.370243291566231e-06, "loss": 0.896, "step": 87000 }, { "epoch": 0.6298363337604146, "grad_norm": 0.1603853702545166, "learning_rate": 4.370170904905645e-06, "loss": 0.9013, "step": 87010 }, { "epoch": 0.6299087204210008, "grad_norm": 0.1669483482837677, "learning_rate": 4.370098518245058e-06, "loss": 0.8986, "step": 87020 }, { "epoch": 0.629981107081587, "grad_norm": 0.15310995280742645, "learning_rate": 4.370026131584472e-06, "loss": 0.9085, "step": 87030 }, { "epoch": 0.6300534937421732, "grad_norm": 0.1482187807559967, "learning_rate": 4.369953744923886e-06, "loss": 0.9044, "step": 87040 }, { "epoch": 0.6301258804027594, "grad_norm": 0.16005839407444, "learning_rate": 4.3698813582633e-06, "loss": 0.9033, "step": 87050 }, { "epoch": 0.6301982670633456, "grad_norm": 0.1608879268169403, "learning_rate": 4.369808971602714e-06, "loss": 0.8938, "step": 87060 }, { "epoch": 0.6302706537239318, "grad_norm": 0.15144924819469452, "learning_rate": 4.369736584942127e-06, "loss": 0.905, "step": 87070 }, { "epoch": 0.6303430403845179, "grad_norm": 0.1928846538066864, "learning_rate": 4.369664198281541e-06, "loss": 0.897, "step": 87080 }, { "epoch": 0.6304154270451041, "grad_norm": 0.15390628576278687, "learning_rate": 4.3695918116209545e-06, "loss": 0.908, "step": 87090 }, { "epoch": 0.6304878137056903, "grad_norm": 0.15502314269542694, "learning_rate": 4.369519424960368e-06, "loss": 0.8921, "step": 87100 }, { "epoch": 0.6305602003662765, "grad_norm": 0.1564696878194809, "learning_rate": 4.3694470382997825e-06, "loss": 0.8971, "step": 87110 }, { "epoch": 0.6306325870268626, "grad_norm": 0.1649816632270813, "learning_rate": 4.369374651639196e-06, "loss": 0.898, "step": 87120 }, { "epoch": 0.6307049736874488, "grad_norm": 0.16276071965694427, "learning_rate": 4.36930226497861e-06, "loss": 0.8981, "step": 87130 }, { "epoch": 0.6307773603480351, "grad_norm": 0.14670120179653168, "learning_rate": 4.369229878318023e-06, "loss": 0.8952, "step": 87140 }, { "epoch": 0.6308497470086213, "grad_norm": 0.15809543430805206, "learning_rate": 4.369157491657438e-06, "loss": 0.9064, "step": 87150 }, { "epoch": 0.6309221336692075, "grad_norm": 0.16645848751068115, "learning_rate": 4.3690851049968515e-06, "loss": 0.8973, "step": 87160 }, { "epoch": 0.6309945203297936, "grad_norm": 0.1631806641817093, "learning_rate": 4.369012718336265e-06, "loss": 0.8984, "step": 87170 }, { "epoch": 0.6310669069903798, "grad_norm": 0.20448337495326996, "learning_rate": 4.368940331675679e-06, "loss": 0.9061, "step": 87180 }, { "epoch": 0.631139293650966, "grad_norm": 0.3554750084877014, "learning_rate": 4.368867945015093e-06, "loss": 0.9172, "step": 87190 }, { "epoch": 0.6312116803115522, "grad_norm": 0.15781621634960175, "learning_rate": 4.368795558354507e-06, "loss": 0.9017, "step": 87200 }, { "epoch": 0.6312840669721383, "grad_norm": 0.16704939305782318, "learning_rate": 4.36872317169392e-06, "loss": 0.8875, "step": 87210 }, { "epoch": 0.6313564536327245, "grad_norm": 0.16684958338737488, "learning_rate": 4.368650785033334e-06, "loss": 0.9216, "step": 87220 }, { "epoch": 0.6314288402933107, "grad_norm": 0.16323356330394745, "learning_rate": 4.3685783983727485e-06, "loss": 0.9072, "step": 87230 }, { "epoch": 0.631501226953897, "grad_norm": 0.16825905442237854, "learning_rate": 4.368506011712162e-06, "loss": 0.8985, "step": 87240 }, { "epoch": 0.6315736136144832, "grad_norm": 0.1925610899925232, "learning_rate": 4.368433625051576e-06, "loss": 0.9055, "step": 87250 }, { "epoch": 0.6316460002750693, "grad_norm": 0.16499613225460052, "learning_rate": 4.368361238390989e-06, "loss": 0.9002, "step": 87260 }, { "epoch": 0.6317183869356555, "grad_norm": 0.16235573589801788, "learning_rate": 4.368288851730404e-06, "loss": 0.9047, "step": 87270 }, { "epoch": 0.6317907735962417, "grad_norm": 0.15934467315673828, "learning_rate": 4.368216465069817e-06, "loss": 0.8983, "step": 87280 }, { "epoch": 0.6318631602568279, "grad_norm": 0.15878109633922577, "learning_rate": 4.368144078409231e-06, "loss": 0.9026, "step": 87290 }, { "epoch": 0.631935546917414, "grad_norm": 0.14932307600975037, "learning_rate": 4.368071691748645e-06, "loss": 0.8983, "step": 87300 }, { "epoch": 0.6320079335780002, "grad_norm": 0.1681598424911499, "learning_rate": 4.367999305088059e-06, "loss": 0.9061, "step": 87310 }, { "epoch": 0.6320803202385864, "grad_norm": 0.14897821843624115, "learning_rate": 4.367926918427473e-06, "loss": 0.8846, "step": 87320 }, { "epoch": 0.6321527068991726, "grad_norm": 0.4469514787197113, "learning_rate": 4.367854531766886e-06, "loss": 0.892, "step": 87330 }, { "epoch": 0.6322250935597588, "grad_norm": 0.15947720408439636, "learning_rate": 4.3677821451063e-06, "loss": 0.8925, "step": 87340 }, { "epoch": 0.632297480220345, "grad_norm": 0.15741553902626038, "learning_rate": 4.367709758445714e-06, "loss": 0.8912, "step": 87350 }, { "epoch": 0.6323698668809312, "grad_norm": 0.1720518171787262, "learning_rate": 4.367637371785128e-06, "loss": 0.9073, "step": 87360 }, { "epoch": 0.6324422535415174, "grad_norm": 0.14960655570030212, "learning_rate": 4.367564985124542e-06, "loss": 0.8995, "step": 87370 }, { "epoch": 0.6325146402021036, "grad_norm": 0.16579873859882355, "learning_rate": 4.367492598463955e-06, "loss": 0.8921, "step": 87380 }, { "epoch": 0.6325870268626897, "grad_norm": 0.15160775184631348, "learning_rate": 4.367420211803369e-06, "loss": 0.8939, "step": 87390 }, { "epoch": 0.6326594135232759, "grad_norm": 0.15509670972824097, "learning_rate": 4.367347825142783e-06, "loss": 0.8962, "step": 87400 }, { "epoch": 0.6327318001838621, "grad_norm": 0.16785472631454468, "learning_rate": 4.367275438482197e-06, "loss": 0.9102, "step": 87410 }, { "epoch": 0.6328041868444483, "grad_norm": 0.18528971076011658, "learning_rate": 4.3672030518216106e-06, "loss": 0.901, "step": 87420 }, { "epoch": 0.6328765735050345, "grad_norm": 0.1553627848625183, "learning_rate": 4.367130665161024e-06, "loss": 0.9006, "step": 87430 }, { "epoch": 0.6329489601656206, "grad_norm": 0.15704572200775146, "learning_rate": 4.367058278500439e-06, "loss": 0.8956, "step": 87440 }, { "epoch": 0.6330213468262069, "grad_norm": 0.16988041996955872, "learning_rate": 4.366985891839852e-06, "loss": 0.8945, "step": 87450 }, { "epoch": 0.6330937334867931, "grad_norm": 0.15681475400924683, "learning_rate": 4.366913505179266e-06, "loss": 0.8865, "step": 87460 }, { "epoch": 0.6331661201473793, "grad_norm": 0.15510614216327667, "learning_rate": 4.3668411185186795e-06, "loss": 0.8965, "step": 87470 }, { "epoch": 0.6332385068079655, "grad_norm": 0.16827818751335144, "learning_rate": 4.366768731858094e-06, "loss": 0.892, "step": 87480 }, { "epoch": 0.6333108934685516, "grad_norm": 0.1560061275959015, "learning_rate": 4.3666963451975076e-06, "loss": 0.9041, "step": 87490 }, { "epoch": 0.6333832801291378, "grad_norm": 0.15482322871685028, "learning_rate": 4.366623958536921e-06, "loss": 0.9012, "step": 87500 }, { "epoch": 0.633455666789724, "grad_norm": 0.1474655419588089, "learning_rate": 4.366551571876335e-06, "loss": 0.896, "step": 87510 }, { "epoch": 0.6335280534503102, "grad_norm": 0.15873880684375763, "learning_rate": 4.366479185215749e-06, "loss": 0.8961, "step": 87520 }, { "epoch": 0.6336004401108963, "grad_norm": 0.2057919055223465, "learning_rate": 4.366406798555163e-06, "loss": 0.9079, "step": 87530 }, { "epoch": 0.6336728267714825, "grad_norm": 0.1464945524930954, "learning_rate": 4.3663344118945765e-06, "loss": 0.9039, "step": 87540 }, { "epoch": 0.6337452134320687, "grad_norm": 0.15978756546974182, "learning_rate": 4.36626202523399e-06, "loss": 0.9076, "step": 87550 }, { "epoch": 0.633817600092655, "grad_norm": 0.15238073468208313, "learning_rate": 4.366189638573405e-06, "loss": 0.8772, "step": 87560 }, { "epoch": 0.6338899867532412, "grad_norm": 0.1541658192873001, "learning_rate": 4.366117251912818e-06, "loss": 0.9014, "step": 87570 }, { "epoch": 0.6339623734138273, "grad_norm": 0.15558382868766785, "learning_rate": 4.366044865252232e-06, "loss": 0.9151, "step": 87580 }, { "epoch": 0.6340347600744135, "grad_norm": 0.22495369613170624, "learning_rate": 4.3659724785916454e-06, "loss": 0.9017, "step": 87590 }, { "epoch": 0.6341071467349997, "grad_norm": 0.14666642248630524, "learning_rate": 4.36590009193106e-06, "loss": 0.906, "step": 87600 }, { "epoch": 0.6341795333955859, "grad_norm": 0.1527988612651825, "learning_rate": 4.365827705270473e-06, "loss": 0.8978, "step": 87610 }, { "epoch": 0.634251920056172, "grad_norm": 0.2135559469461441, "learning_rate": 4.365755318609886e-06, "loss": 0.9118, "step": 87620 }, { "epoch": 0.6343243067167582, "grad_norm": 0.15013383328914642, "learning_rate": 4.365682931949301e-06, "loss": 0.8956, "step": 87630 }, { "epoch": 0.6343966933773444, "grad_norm": 0.15739452838897705, "learning_rate": 4.365610545288714e-06, "loss": 0.8912, "step": 87640 }, { "epoch": 0.6344690800379306, "grad_norm": 0.16141614317893982, "learning_rate": 4.365538158628128e-06, "loss": 0.9124, "step": 87650 }, { "epoch": 0.6345414666985167, "grad_norm": 0.1528901308774948, "learning_rate": 4.365465771967542e-06, "loss": 0.903, "step": 87660 }, { "epoch": 0.634613853359103, "grad_norm": 0.1768021136522293, "learning_rate": 4.365393385306956e-06, "loss": 0.8949, "step": 87670 }, { "epoch": 0.6346862400196892, "grad_norm": 0.15449446439743042, "learning_rate": 4.36532099864637e-06, "loss": 0.9019, "step": 87680 }, { "epoch": 0.6347586266802754, "grad_norm": 0.19363653659820557, "learning_rate": 4.365248611985783e-06, "loss": 0.9071, "step": 87690 }, { "epoch": 0.6348310133408616, "grad_norm": 0.1683684140443802, "learning_rate": 4.365176225325197e-06, "loss": 0.895, "step": 87700 }, { "epoch": 0.6349034000014477, "grad_norm": 0.15621653199195862, "learning_rate": 4.365103838664611e-06, "loss": 0.8939, "step": 87710 }, { "epoch": 0.6349757866620339, "grad_norm": 0.16487151384353638, "learning_rate": 4.365031452004025e-06, "loss": 0.8915, "step": 87720 }, { "epoch": 0.6350481733226201, "grad_norm": 0.1636609584093094, "learning_rate": 4.364959065343439e-06, "loss": 0.895, "step": 87730 }, { "epoch": 0.6351205599832063, "grad_norm": 0.1616763025522232, "learning_rate": 4.364886678682852e-06, "loss": 0.9072, "step": 87740 }, { "epoch": 0.6351929466437924, "grad_norm": 0.1616402268409729, "learning_rate": 4.364814292022267e-06, "loss": 0.8905, "step": 87750 }, { "epoch": 0.6352653333043786, "grad_norm": 0.1978462189435959, "learning_rate": 4.36474190536168e-06, "loss": 0.9042, "step": 87760 }, { "epoch": 0.6353377199649649, "grad_norm": 0.16519363224506378, "learning_rate": 4.364669518701094e-06, "loss": 0.8818, "step": 87770 }, { "epoch": 0.6354101066255511, "grad_norm": 0.16044704616069794, "learning_rate": 4.3645971320405075e-06, "loss": 0.8968, "step": 87780 }, { "epoch": 0.6354824932861373, "grad_norm": 0.16723746061325073, "learning_rate": 4.364524745379922e-06, "loss": 0.9008, "step": 87790 }, { "epoch": 0.6355548799467234, "grad_norm": 0.15763086080551147, "learning_rate": 4.364452358719336e-06, "loss": 0.9046, "step": 87800 }, { "epoch": 0.6356272666073096, "grad_norm": 0.16559143364429474, "learning_rate": 4.364379972058749e-06, "loss": 0.9074, "step": 87810 }, { "epoch": 0.6356996532678958, "grad_norm": 0.17605692148208618, "learning_rate": 4.364307585398163e-06, "loss": 0.8845, "step": 87820 }, { "epoch": 0.635772039928482, "grad_norm": 0.1688181757926941, "learning_rate": 4.364235198737577e-06, "loss": 0.9096, "step": 87830 }, { "epoch": 0.6358444265890681, "grad_norm": 0.15427358448505402, "learning_rate": 4.364162812076991e-06, "loss": 0.9048, "step": 87840 }, { "epoch": 0.6359168132496543, "grad_norm": 0.17729778587818146, "learning_rate": 4.3640904254164045e-06, "loss": 0.8993, "step": 87850 }, { "epoch": 0.6359891999102405, "grad_norm": 0.17065225541591644, "learning_rate": 4.364018038755818e-06, "loss": 0.8954, "step": 87860 }, { "epoch": 0.6360615865708267, "grad_norm": 0.15446516871452332, "learning_rate": 4.363945652095233e-06, "loss": 0.9051, "step": 87870 }, { "epoch": 0.636133973231413, "grad_norm": 0.19369186460971832, "learning_rate": 4.363873265434646e-06, "loss": 0.8988, "step": 87880 }, { "epoch": 0.6362063598919991, "grad_norm": 0.15911339223384857, "learning_rate": 4.36380087877406e-06, "loss": 0.9014, "step": 87890 }, { "epoch": 0.6362787465525853, "grad_norm": 0.1556956171989441, "learning_rate": 4.3637284921134735e-06, "loss": 0.8952, "step": 87900 }, { "epoch": 0.6363511332131715, "grad_norm": 0.176447331905365, "learning_rate": 4.363656105452888e-06, "loss": 0.8821, "step": 87910 }, { "epoch": 0.6364235198737577, "grad_norm": 0.15189889073371887, "learning_rate": 4.3635837187923015e-06, "loss": 0.896, "step": 87920 }, { "epoch": 0.6364959065343438, "grad_norm": 0.20345427095890045, "learning_rate": 4.363511332131715e-06, "loss": 0.9084, "step": 87930 }, { "epoch": 0.63656829319493, "grad_norm": 0.1482352763414383, "learning_rate": 4.363438945471129e-06, "loss": 0.8937, "step": 87940 }, { "epoch": 0.6366406798555162, "grad_norm": 0.16106894612312317, "learning_rate": 4.363366558810543e-06, "loss": 0.8992, "step": 87950 }, { "epoch": 0.6367130665161024, "grad_norm": 0.16146771609783173, "learning_rate": 4.363294172149957e-06, "loss": 0.8968, "step": 87960 }, { "epoch": 0.6367854531766886, "grad_norm": 0.15578705072402954, "learning_rate": 4.3632217854893705e-06, "loss": 0.9023, "step": 87970 }, { "epoch": 0.6368578398372748, "grad_norm": 0.19550129771232605, "learning_rate": 4.363149398828784e-06, "loss": 0.9035, "step": 87980 }, { "epoch": 0.636930226497861, "grad_norm": 0.18265287578105927, "learning_rate": 4.3630770121681985e-06, "loss": 0.9067, "step": 87990 }, { "epoch": 0.6370026131584472, "grad_norm": 0.14970242977142334, "learning_rate": 4.363004625507612e-06, "loss": 0.9117, "step": 88000 }, { "epoch": 0.6370749998190334, "grad_norm": 0.1472383439540863, "learning_rate": 4.362932238847026e-06, "loss": 0.8982, "step": 88010 }, { "epoch": 0.6371473864796195, "grad_norm": 0.14995744824409485, "learning_rate": 4.362859852186439e-06, "loss": 0.8951, "step": 88020 }, { "epoch": 0.6372197731402057, "grad_norm": 0.15421155095100403, "learning_rate": 4.362787465525853e-06, "loss": 0.8772, "step": 88030 }, { "epoch": 0.6372921598007919, "grad_norm": 0.1862330138683319, "learning_rate": 4.3627150788652675e-06, "loss": 0.9068, "step": 88040 }, { "epoch": 0.6373645464613781, "grad_norm": 0.15791213512420654, "learning_rate": 4.362642692204681e-06, "loss": 0.8989, "step": 88050 }, { "epoch": 0.6374369331219643, "grad_norm": 0.1469462811946869, "learning_rate": 4.362570305544095e-06, "loss": 0.8983, "step": 88060 }, { "epoch": 0.6375093197825504, "grad_norm": 0.15179064869880676, "learning_rate": 4.362497918883508e-06, "loss": 0.893, "step": 88070 }, { "epoch": 0.6375817064431366, "grad_norm": 0.15898427367210388, "learning_rate": 4.362425532222923e-06, "loss": 0.8959, "step": 88080 }, { "epoch": 0.6376540931037229, "grad_norm": 0.1530866026878357, "learning_rate": 4.362353145562336e-06, "loss": 0.8991, "step": 88090 }, { "epoch": 0.6377264797643091, "grad_norm": 0.15585550665855408, "learning_rate": 4.36228075890175e-06, "loss": 0.8949, "step": 88100 }, { "epoch": 0.6377988664248952, "grad_norm": 0.2193233221769333, "learning_rate": 4.362208372241164e-06, "loss": 0.9041, "step": 88110 }, { "epoch": 0.6378712530854814, "grad_norm": 0.144108384847641, "learning_rate": 4.362135985580578e-06, "loss": 0.9028, "step": 88120 }, { "epoch": 0.6379436397460676, "grad_norm": 0.18072602152824402, "learning_rate": 4.362063598919992e-06, "loss": 0.9006, "step": 88130 }, { "epoch": 0.6380160264066538, "grad_norm": 0.15635623037815094, "learning_rate": 4.361991212259405e-06, "loss": 0.9059, "step": 88140 }, { "epoch": 0.63808841306724, "grad_norm": 0.16746172308921814, "learning_rate": 4.361918825598819e-06, "loss": 0.8977, "step": 88150 }, { "epoch": 0.6381607997278261, "grad_norm": 0.1613510549068451, "learning_rate": 4.3618464389382326e-06, "loss": 0.9092, "step": 88160 }, { "epoch": 0.6382331863884123, "grad_norm": 0.1660555601119995, "learning_rate": 4.361774052277646e-06, "loss": 0.908, "step": 88170 }, { "epoch": 0.6383055730489985, "grad_norm": 0.19197924435138702, "learning_rate": 4.36170166561706e-06, "loss": 0.8946, "step": 88180 }, { "epoch": 0.6383779597095847, "grad_norm": 0.1495658904314041, "learning_rate": 4.361629278956474e-06, "loss": 0.9006, "step": 88190 }, { "epoch": 0.638450346370171, "grad_norm": 0.15957580506801605, "learning_rate": 4.361556892295888e-06, "loss": 0.9024, "step": 88200 }, { "epoch": 0.6385227330307571, "grad_norm": 0.1745164394378662, "learning_rate": 4.3614845056353015e-06, "loss": 0.8863, "step": 88210 }, { "epoch": 0.6385951196913433, "grad_norm": 0.15032242238521576, "learning_rate": 4.361412118974715e-06, "loss": 0.8906, "step": 88220 }, { "epoch": 0.6386675063519295, "grad_norm": 0.15983961522579193, "learning_rate": 4.3613397323141296e-06, "loss": 0.9114, "step": 88230 }, { "epoch": 0.6387398930125157, "grad_norm": 0.21465584635734558, "learning_rate": 4.361267345653543e-06, "loss": 0.8914, "step": 88240 }, { "epoch": 0.6388122796731018, "grad_norm": 0.15455862879753113, "learning_rate": 4.361194958992957e-06, "loss": 0.9126, "step": 88250 }, { "epoch": 0.638884666333688, "grad_norm": 0.14769776165485382, "learning_rate": 4.36112257233237e-06, "loss": 0.8858, "step": 88260 }, { "epoch": 0.6389570529942742, "grad_norm": 1.265124797821045, "learning_rate": 4.361050185671785e-06, "loss": 0.8948, "step": 88270 }, { "epoch": 0.6390294396548604, "grad_norm": 0.1922004669904709, "learning_rate": 4.3609777990111985e-06, "loss": 0.9017, "step": 88280 }, { "epoch": 0.6391018263154465, "grad_norm": 0.16533154249191284, "learning_rate": 4.360905412350612e-06, "loss": 0.8987, "step": 88290 }, { "epoch": 0.6391742129760328, "grad_norm": 0.15893571078777313, "learning_rate": 4.360833025690026e-06, "loss": 0.8976, "step": 88300 }, { "epoch": 0.639246599636619, "grad_norm": 0.150678813457489, "learning_rate": 4.36076063902944e-06, "loss": 0.894, "step": 88310 }, { "epoch": 0.6393189862972052, "grad_norm": 0.14992178976535797, "learning_rate": 4.360688252368854e-06, "loss": 0.9037, "step": 88320 }, { "epoch": 0.6393913729577914, "grad_norm": 0.1573602557182312, "learning_rate": 4.3606158657082674e-06, "loss": 0.9089, "step": 88330 }, { "epoch": 0.6394637596183775, "grad_norm": 0.1497740000486374, "learning_rate": 4.360543479047681e-06, "loss": 0.9014, "step": 88340 }, { "epoch": 0.6395361462789637, "grad_norm": 0.1509128361940384, "learning_rate": 4.3604710923870955e-06, "loss": 0.9033, "step": 88350 }, { "epoch": 0.6396085329395499, "grad_norm": 0.15733207762241364, "learning_rate": 4.360398705726509e-06, "loss": 0.9074, "step": 88360 }, { "epoch": 0.6396809196001361, "grad_norm": 0.16280068457126617, "learning_rate": 4.360326319065923e-06, "loss": 0.9005, "step": 88370 }, { "epoch": 0.6397533062607222, "grad_norm": 0.15421195328235626, "learning_rate": 4.360253932405336e-06, "loss": 0.9046, "step": 88380 }, { "epoch": 0.6398256929213084, "grad_norm": 0.22842425107955933, "learning_rate": 4.360181545744751e-06, "loss": 0.9101, "step": 88390 }, { "epoch": 0.6398980795818946, "grad_norm": 0.1527772694826126, "learning_rate": 4.3601091590841644e-06, "loss": 0.9021, "step": 88400 }, { "epoch": 0.6399704662424809, "grad_norm": 0.15494686365127563, "learning_rate": 4.360036772423578e-06, "loss": 0.8911, "step": 88410 }, { "epoch": 0.6400428529030671, "grad_norm": 0.16015708446502686, "learning_rate": 4.359964385762992e-06, "loss": 0.916, "step": 88420 }, { "epoch": 0.6401152395636532, "grad_norm": 0.15892557799816132, "learning_rate": 4.359891999102406e-06, "loss": 0.8768, "step": 88430 }, { "epoch": 0.6401876262242394, "grad_norm": 0.1498948186635971, "learning_rate": 4.35981961244182e-06, "loss": 0.9047, "step": 88440 }, { "epoch": 0.6402600128848256, "grad_norm": 0.1665375530719757, "learning_rate": 4.359747225781233e-06, "loss": 0.8941, "step": 88450 }, { "epoch": 0.6403323995454118, "grad_norm": 0.15249140560626984, "learning_rate": 4.359674839120647e-06, "loss": 0.8964, "step": 88460 }, { "epoch": 0.640404786205998, "grad_norm": 0.1530652791261673, "learning_rate": 4.3596024524600614e-06, "loss": 0.9092, "step": 88470 }, { "epoch": 0.6404771728665841, "grad_norm": 0.1995309293270111, "learning_rate": 4.359530065799475e-06, "loss": 0.8985, "step": 88480 }, { "epoch": 0.6405495595271703, "grad_norm": 0.17197419703006744, "learning_rate": 4.359457679138889e-06, "loss": 0.9123, "step": 88490 }, { "epoch": 0.6406219461877565, "grad_norm": 0.17037421464920044, "learning_rate": 4.359385292478302e-06, "loss": 0.8954, "step": 88500 }, { "epoch": 0.6406943328483428, "grad_norm": 0.14726117253303528, "learning_rate": 4.359312905817717e-06, "loss": 0.9045, "step": 88510 }, { "epoch": 0.6407667195089289, "grad_norm": 0.14530207216739655, "learning_rate": 4.35924051915713e-06, "loss": 0.8884, "step": 88520 }, { "epoch": 0.6408391061695151, "grad_norm": 0.15528494119644165, "learning_rate": 4.359168132496544e-06, "loss": 0.8854, "step": 88530 }, { "epoch": 0.6409114928301013, "grad_norm": 0.16812381148338318, "learning_rate": 4.359095745835958e-06, "loss": 0.9137, "step": 88540 }, { "epoch": 0.6409838794906875, "grad_norm": 0.15406684577465057, "learning_rate": 4.359023359175372e-06, "loss": 0.9013, "step": 88550 }, { "epoch": 0.6410562661512736, "grad_norm": 0.15804636478424072, "learning_rate": 4.358950972514786e-06, "loss": 0.903, "step": 88560 }, { "epoch": 0.6411286528118598, "grad_norm": 0.15636689960956573, "learning_rate": 4.358878585854199e-06, "loss": 0.9006, "step": 88570 }, { "epoch": 0.641201039472446, "grad_norm": 0.18669186532497406, "learning_rate": 4.358806199193613e-06, "loss": 0.9171, "step": 88580 }, { "epoch": 0.6412734261330322, "grad_norm": 0.17967639863491058, "learning_rate": 4.358733812533027e-06, "loss": 0.8925, "step": 88590 }, { "epoch": 0.6413458127936184, "grad_norm": 0.15877005457878113, "learning_rate": 4.358661425872441e-06, "loss": 0.8923, "step": 88600 }, { "epoch": 0.6414181994542045, "grad_norm": 0.1480860710144043, "learning_rate": 4.358589039211855e-06, "loss": 0.9054, "step": 88610 }, { "epoch": 0.6414905861147908, "grad_norm": 0.15211063623428345, "learning_rate": 4.358516652551268e-06, "loss": 0.9001, "step": 88620 }, { "epoch": 0.641562972775377, "grad_norm": 0.1518232822418213, "learning_rate": 4.358444265890682e-06, "loss": 0.9031, "step": 88630 }, { "epoch": 0.6416353594359632, "grad_norm": 0.1446765810251236, "learning_rate": 4.358371879230096e-06, "loss": 0.9166, "step": 88640 }, { "epoch": 0.6417077460965493, "grad_norm": 0.17465344071388245, "learning_rate": 4.35829949256951e-06, "loss": 0.8882, "step": 88650 }, { "epoch": 0.6417801327571355, "grad_norm": 0.1594177931547165, "learning_rate": 4.3582271059089235e-06, "loss": 0.9004, "step": 88660 }, { "epoch": 0.6418525194177217, "grad_norm": 0.1680103987455368, "learning_rate": 4.358154719248337e-06, "loss": 0.891, "step": 88670 }, { "epoch": 0.6419249060783079, "grad_norm": 0.1576722413301468, "learning_rate": 4.358082332587751e-06, "loss": 0.9137, "step": 88680 }, { "epoch": 0.641997292738894, "grad_norm": 0.14920315146446228, "learning_rate": 4.358009945927164e-06, "loss": 0.8867, "step": 88690 }, { "epoch": 0.6420696793994802, "grad_norm": 0.15702413022518158, "learning_rate": 4.357937559266579e-06, "loss": 0.8899, "step": 88700 }, { "epoch": 0.6421420660600664, "grad_norm": 0.14902466535568237, "learning_rate": 4.3578651726059925e-06, "loss": 0.9046, "step": 88710 }, { "epoch": 0.6422144527206526, "grad_norm": 0.1598339080810547, "learning_rate": 4.357792785945406e-06, "loss": 0.896, "step": 88720 }, { "epoch": 0.6422868393812389, "grad_norm": 0.15564772486686707, "learning_rate": 4.35772039928482e-06, "loss": 0.9102, "step": 88730 }, { "epoch": 0.642359226041825, "grad_norm": 0.14430661499500275, "learning_rate": 4.357648012624234e-06, "loss": 0.8899, "step": 88740 }, { "epoch": 0.6424316127024112, "grad_norm": 0.15602971613407135, "learning_rate": 4.357575625963648e-06, "loss": 0.8982, "step": 88750 }, { "epoch": 0.6425039993629974, "grad_norm": 0.16379621624946594, "learning_rate": 4.357503239303061e-06, "loss": 0.9031, "step": 88760 }, { "epoch": 0.6425763860235836, "grad_norm": 0.15604230761528015, "learning_rate": 4.357430852642475e-06, "loss": 0.8916, "step": 88770 }, { "epoch": 0.6426487726841698, "grad_norm": 0.15678240358829498, "learning_rate": 4.3573584659818895e-06, "loss": 0.8947, "step": 88780 }, { "epoch": 0.6427211593447559, "grad_norm": 0.1667027622461319, "learning_rate": 4.357286079321303e-06, "loss": 0.8967, "step": 88790 }, { "epoch": 0.6427935460053421, "grad_norm": 0.1623423546552658, "learning_rate": 4.357213692660717e-06, "loss": 0.8882, "step": 88800 }, { "epoch": 0.6428659326659283, "grad_norm": 0.15376125276088715, "learning_rate": 4.35714130600013e-06, "loss": 0.8956, "step": 88810 }, { "epoch": 0.6429383193265145, "grad_norm": 0.1492316871881485, "learning_rate": 4.357068919339544e-06, "loss": 0.8974, "step": 88820 }, { "epoch": 0.6430107059871008, "grad_norm": 0.1571977287530899, "learning_rate": 4.356996532678958e-06, "loss": 0.9096, "step": 88830 }, { "epoch": 0.6430830926476869, "grad_norm": 0.1929909884929657, "learning_rate": 4.356924146018372e-06, "loss": 0.8849, "step": 88840 }, { "epoch": 0.6431554793082731, "grad_norm": 0.15631908178329468, "learning_rate": 4.356851759357786e-06, "loss": 0.9151, "step": 88850 }, { "epoch": 0.6432278659688593, "grad_norm": 0.14949972927570343, "learning_rate": 4.356779372697199e-06, "loss": 0.8962, "step": 88860 }, { "epoch": 0.6433002526294455, "grad_norm": 0.16994498670101166, "learning_rate": 4.356706986036614e-06, "loss": 0.8988, "step": 88870 }, { "epoch": 0.6433726392900316, "grad_norm": 0.16940979659557343, "learning_rate": 4.356634599376027e-06, "loss": 0.9085, "step": 88880 }, { "epoch": 0.6434450259506178, "grad_norm": 0.15808720886707306, "learning_rate": 4.356562212715441e-06, "loss": 0.9043, "step": 88890 }, { "epoch": 0.643517412611204, "grad_norm": 0.15764841437339783, "learning_rate": 4.3564898260548546e-06, "loss": 0.8932, "step": 88900 }, { "epoch": 0.6435897992717902, "grad_norm": 0.15186621248722076, "learning_rate": 4.356417439394269e-06, "loss": 0.9061, "step": 88910 }, { "epoch": 0.6436621859323763, "grad_norm": 0.14821207523345947, "learning_rate": 4.356345052733683e-06, "loss": 0.8926, "step": 88920 }, { "epoch": 0.6437345725929625, "grad_norm": 0.15951776504516602, "learning_rate": 4.356272666073096e-06, "loss": 0.887, "step": 88930 }, { "epoch": 0.6438069592535488, "grad_norm": 0.1618291735649109, "learning_rate": 4.35620027941251e-06, "loss": 0.8978, "step": 88940 }, { "epoch": 0.643879345914135, "grad_norm": 0.1718137413263321, "learning_rate": 4.356127892751924e-06, "loss": 0.902, "step": 88950 }, { "epoch": 0.6439517325747212, "grad_norm": 0.1487615555524826, "learning_rate": 4.356055506091338e-06, "loss": 0.897, "step": 88960 }, { "epoch": 0.6440241192353073, "grad_norm": 0.1924024522304535, "learning_rate": 4.3559831194307516e-06, "loss": 0.9099, "step": 88970 }, { "epoch": 0.6440965058958935, "grad_norm": 0.15762527287006378, "learning_rate": 4.355910732770165e-06, "loss": 0.891, "step": 88980 }, { "epoch": 0.6441688925564797, "grad_norm": 0.16442440450191498, "learning_rate": 4.35583834610958e-06, "loss": 0.8914, "step": 88990 }, { "epoch": 0.6442412792170659, "grad_norm": 0.15198470652103424, "learning_rate": 4.355765959448993e-06, "loss": 0.9036, "step": 89000 }, { "epoch": 0.644313665877652, "grad_norm": 0.16357183456420898, "learning_rate": 4.355693572788407e-06, "loss": 0.9018, "step": 89010 }, { "epoch": 0.6443860525382382, "grad_norm": 0.15125641226768494, "learning_rate": 4.3556211861278205e-06, "loss": 0.9005, "step": 89020 }, { "epoch": 0.6444584391988244, "grad_norm": 0.1542976200580597, "learning_rate": 4.355548799467235e-06, "loss": 0.9062, "step": 89030 }, { "epoch": 0.6445308258594107, "grad_norm": 0.16405089199543, "learning_rate": 4.3554764128066486e-06, "loss": 0.8959, "step": 89040 }, { "epoch": 0.6446032125199969, "grad_norm": 0.15201352536678314, "learning_rate": 4.355404026146062e-06, "loss": 0.8916, "step": 89050 }, { "epoch": 0.644675599180583, "grad_norm": 0.1623867303133011, "learning_rate": 4.355331639485476e-06, "loss": 0.889, "step": 89060 }, { "epoch": 0.6447479858411692, "grad_norm": 0.1591363102197647, "learning_rate": 4.35525925282489e-06, "loss": 0.9057, "step": 89070 }, { "epoch": 0.6448203725017554, "grad_norm": 0.16368667781352997, "learning_rate": 4.355186866164304e-06, "loss": 0.8983, "step": 89080 }, { "epoch": 0.6448927591623416, "grad_norm": 0.15838485956192017, "learning_rate": 4.3551144795037175e-06, "loss": 0.8975, "step": 89090 }, { "epoch": 0.6449651458229277, "grad_norm": 0.14503420889377594, "learning_rate": 4.355042092843131e-06, "loss": 0.8942, "step": 89100 }, { "epoch": 0.6450375324835139, "grad_norm": 0.1557312160730362, "learning_rate": 4.354969706182546e-06, "loss": 0.8893, "step": 89110 }, { "epoch": 0.6451099191441001, "grad_norm": 0.16098271310329437, "learning_rate": 4.354897319521959e-06, "loss": 0.9019, "step": 89120 }, { "epoch": 0.6451823058046863, "grad_norm": 0.2473098188638687, "learning_rate": 4.354824932861373e-06, "loss": 0.8977, "step": 89130 }, { "epoch": 0.6452546924652725, "grad_norm": 0.15116575360298157, "learning_rate": 4.3547525462007864e-06, "loss": 0.896, "step": 89140 }, { "epoch": 0.6453270791258587, "grad_norm": 0.16768385469913483, "learning_rate": 4.354680159540201e-06, "loss": 0.8977, "step": 89150 }, { "epoch": 0.6453994657864449, "grad_norm": 0.15651331841945648, "learning_rate": 4.3546077728796145e-06, "loss": 0.8983, "step": 89160 }, { "epoch": 0.6454718524470311, "grad_norm": 0.18052604794502258, "learning_rate": 4.354535386219028e-06, "loss": 0.9187, "step": 89170 }, { "epoch": 0.6455442391076173, "grad_norm": 0.15085764229297638, "learning_rate": 4.354462999558442e-06, "loss": 0.8947, "step": 89180 }, { "epoch": 0.6456166257682034, "grad_norm": 0.15222260355949402, "learning_rate": 4.354390612897856e-06, "loss": 0.8996, "step": 89190 }, { "epoch": 0.6456890124287896, "grad_norm": 0.16957657039165497, "learning_rate": 4.354318226237269e-06, "loss": 0.9059, "step": 89200 }, { "epoch": 0.6457613990893758, "grad_norm": 0.17988649010658264, "learning_rate": 4.354245839576683e-06, "loss": 0.9038, "step": 89210 }, { "epoch": 0.645833785749962, "grad_norm": 0.15836915373802185, "learning_rate": 4.354173452916097e-06, "loss": 0.9061, "step": 89220 }, { "epoch": 0.6459061724105482, "grad_norm": 0.16917668282985687, "learning_rate": 4.354101066255511e-06, "loss": 0.9056, "step": 89230 }, { "epoch": 0.6459785590711343, "grad_norm": 0.17027825117111206, "learning_rate": 4.354028679594924e-06, "loss": 0.8929, "step": 89240 }, { "epoch": 0.6460509457317205, "grad_norm": 0.15539249777793884, "learning_rate": 4.353956292934338e-06, "loss": 0.8926, "step": 89250 }, { "epoch": 0.6461233323923068, "grad_norm": 0.17285488545894623, "learning_rate": 4.353883906273752e-06, "loss": 0.9006, "step": 89260 }, { "epoch": 0.646195719052893, "grad_norm": 0.15279121696949005, "learning_rate": 4.353811519613166e-06, "loss": 0.9059, "step": 89270 }, { "epoch": 0.6462681057134791, "grad_norm": 0.16332434117794037, "learning_rate": 4.35373913295258e-06, "loss": 0.8942, "step": 89280 }, { "epoch": 0.6463404923740653, "grad_norm": 0.15591338276863098, "learning_rate": 4.353666746291993e-06, "loss": 0.8902, "step": 89290 }, { "epoch": 0.6464128790346515, "grad_norm": 0.15603341162204742, "learning_rate": 4.353594359631408e-06, "loss": 0.8933, "step": 89300 }, { "epoch": 0.6464852656952377, "grad_norm": 0.17558693885803223, "learning_rate": 4.353521972970821e-06, "loss": 0.8918, "step": 89310 }, { "epoch": 0.6465576523558239, "grad_norm": 0.1703271120786667, "learning_rate": 4.353449586310235e-06, "loss": 0.8969, "step": 89320 }, { "epoch": 0.64663003901641, "grad_norm": 0.15776929259300232, "learning_rate": 4.3533771996496485e-06, "loss": 0.8974, "step": 89330 }, { "epoch": 0.6467024256769962, "grad_norm": 0.1533646285533905, "learning_rate": 4.353304812989063e-06, "loss": 0.8994, "step": 89340 }, { "epoch": 0.6467748123375824, "grad_norm": 0.3359949290752411, "learning_rate": 4.353232426328477e-06, "loss": 0.9038, "step": 89350 }, { "epoch": 0.6468471989981687, "grad_norm": 0.1611877977848053, "learning_rate": 4.35316003966789e-06, "loss": 0.8968, "step": 89360 }, { "epoch": 0.6469195856587548, "grad_norm": 0.1490904539823532, "learning_rate": 4.353087653007304e-06, "loss": 0.8771, "step": 89370 }, { "epoch": 0.646991972319341, "grad_norm": 0.15412895381450653, "learning_rate": 4.353015266346718e-06, "loss": 0.9061, "step": 89380 }, { "epoch": 0.6470643589799272, "grad_norm": 0.16382527351379395, "learning_rate": 4.352942879686132e-06, "loss": 0.8989, "step": 89390 }, { "epoch": 0.6471367456405134, "grad_norm": 0.15625497698783875, "learning_rate": 4.3528704930255455e-06, "loss": 0.9106, "step": 89400 }, { "epoch": 0.6472091323010996, "grad_norm": 0.149552121758461, "learning_rate": 4.352798106364959e-06, "loss": 0.9041, "step": 89410 }, { "epoch": 0.6472815189616857, "grad_norm": 0.16269992291927338, "learning_rate": 4.352725719704373e-06, "loss": 0.9069, "step": 89420 }, { "epoch": 0.6473539056222719, "grad_norm": 0.15287677943706512, "learning_rate": 4.352653333043787e-06, "loss": 0.8955, "step": 89430 }, { "epoch": 0.6474262922828581, "grad_norm": 0.1846160590648651, "learning_rate": 4.352580946383201e-06, "loss": 0.8977, "step": 89440 }, { "epoch": 0.6474986789434443, "grad_norm": 0.15473540127277374, "learning_rate": 4.3525085597226145e-06, "loss": 0.8942, "step": 89450 }, { "epoch": 0.6475710656040304, "grad_norm": 0.16567891836166382, "learning_rate": 4.352436173062028e-06, "loss": 0.9138, "step": 89460 }, { "epoch": 0.6476434522646167, "grad_norm": 0.15014810860157013, "learning_rate": 4.3523637864014425e-06, "loss": 0.9014, "step": 89470 }, { "epoch": 0.6477158389252029, "grad_norm": 0.15278102457523346, "learning_rate": 4.352291399740856e-06, "loss": 0.8891, "step": 89480 }, { "epoch": 0.6477882255857891, "grad_norm": 0.15812081098556519, "learning_rate": 4.35221901308027e-06, "loss": 0.9111, "step": 89490 }, { "epoch": 0.6478606122463753, "grad_norm": 0.15760880708694458, "learning_rate": 4.352146626419683e-06, "loss": 0.8998, "step": 89500 }, { "epoch": 0.6479329989069614, "grad_norm": 0.15096792578697205, "learning_rate": 4.352074239759098e-06, "loss": 0.8996, "step": 89510 }, { "epoch": 0.6480053855675476, "grad_norm": 0.16857214272022247, "learning_rate": 4.3520018530985115e-06, "loss": 0.8953, "step": 89520 }, { "epoch": 0.6480777722281338, "grad_norm": 0.16111542284488678, "learning_rate": 4.351929466437925e-06, "loss": 0.8887, "step": 89530 }, { "epoch": 0.64815015888872, "grad_norm": 0.15649932622909546, "learning_rate": 4.351857079777339e-06, "loss": 0.8869, "step": 89540 }, { "epoch": 0.6482225455493061, "grad_norm": 0.1554279923439026, "learning_rate": 4.351784693116753e-06, "loss": 0.8913, "step": 89550 }, { "epoch": 0.6482949322098923, "grad_norm": 0.15444056689739227, "learning_rate": 4.351712306456167e-06, "loss": 0.8978, "step": 89560 }, { "epoch": 0.6483673188704785, "grad_norm": 0.16410863399505615, "learning_rate": 4.35163991979558e-06, "loss": 0.9021, "step": 89570 }, { "epoch": 0.6484397055310648, "grad_norm": 0.181893453001976, "learning_rate": 4.351567533134994e-06, "loss": 0.8952, "step": 89580 }, { "epoch": 0.648512092191651, "grad_norm": 0.15560483932495117, "learning_rate": 4.3514951464744085e-06, "loss": 0.8824, "step": 89590 }, { "epoch": 0.6485844788522371, "grad_norm": 0.16299884021282196, "learning_rate": 4.351422759813822e-06, "loss": 0.8884, "step": 89600 }, { "epoch": 0.6486568655128233, "grad_norm": 0.16149957478046417, "learning_rate": 4.351350373153236e-06, "loss": 0.8942, "step": 89610 }, { "epoch": 0.6487292521734095, "grad_norm": 0.15844495594501495, "learning_rate": 4.351277986492649e-06, "loss": 0.8916, "step": 89620 }, { "epoch": 0.6488016388339957, "grad_norm": 0.15929342806339264, "learning_rate": 4.351205599832064e-06, "loss": 0.9046, "step": 89630 }, { "epoch": 0.6488740254945818, "grad_norm": 0.14573869109153748, "learning_rate": 4.351133213171477e-06, "loss": 0.8981, "step": 89640 }, { "epoch": 0.648946412155168, "grad_norm": 0.14629115164279938, "learning_rate": 4.351060826510891e-06, "loss": 0.9027, "step": 89650 }, { "epoch": 0.6490187988157542, "grad_norm": 0.16613242030143738, "learning_rate": 4.350988439850305e-06, "loss": 0.8883, "step": 89660 }, { "epoch": 0.6490911854763404, "grad_norm": 0.18370899558067322, "learning_rate": 4.350916053189719e-06, "loss": 0.8997, "step": 89670 }, { "epoch": 0.6491635721369267, "grad_norm": 0.15053485333919525, "learning_rate": 4.350843666529133e-06, "loss": 0.8909, "step": 89680 }, { "epoch": 0.6492359587975128, "grad_norm": 0.1765306442975998, "learning_rate": 4.350771279868546e-06, "loss": 0.8805, "step": 89690 }, { "epoch": 0.649308345458099, "grad_norm": 0.1697237193584442, "learning_rate": 4.35069889320796e-06, "loss": 0.9051, "step": 89700 }, { "epoch": 0.6493807321186852, "grad_norm": 0.15003421902656555, "learning_rate": 4.350626506547374e-06, "loss": 0.9008, "step": 89710 }, { "epoch": 0.6494531187792714, "grad_norm": 0.15888899564743042, "learning_rate": 4.350554119886788e-06, "loss": 0.9043, "step": 89720 }, { "epoch": 0.6495255054398575, "grad_norm": 0.15244218707084656, "learning_rate": 4.350481733226202e-06, "loss": 0.898, "step": 89730 }, { "epoch": 0.6495978921004437, "grad_norm": 0.1715979427099228, "learning_rate": 4.350409346565615e-06, "loss": 0.8955, "step": 89740 }, { "epoch": 0.6496702787610299, "grad_norm": 0.15656600892543793, "learning_rate": 4.350336959905029e-06, "loss": 0.8979, "step": 89750 }, { "epoch": 0.6497426654216161, "grad_norm": 0.1492892950773239, "learning_rate": 4.3502645732444425e-06, "loss": 0.8935, "step": 89760 }, { "epoch": 0.6498150520822022, "grad_norm": 0.15817688405513763, "learning_rate": 4.350192186583856e-06, "loss": 0.8975, "step": 89770 }, { "epoch": 0.6498874387427884, "grad_norm": 0.17220188677310944, "learning_rate": 4.3501197999232706e-06, "loss": 0.904, "step": 89780 }, { "epoch": 0.6499598254033747, "grad_norm": 0.15756267309188843, "learning_rate": 4.350047413262684e-06, "loss": 0.8945, "step": 89790 }, { "epoch": 0.6500322120639609, "grad_norm": 0.14775143563747406, "learning_rate": 4.349975026602098e-06, "loss": 0.892, "step": 89800 }, { "epoch": 0.6501045987245471, "grad_norm": 0.14889222383499146, "learning_rate": 4.3499026399415114e-06, "loss": 0.883, "step": 89810 }, { "epoch": 0.6501769853851332, "grad_norm": 0.15578392148017883, "learning_rate": 4.349830253280926e-06, "loss": 0.8863, "step": 89820 }, { "epoch": 0.6502493720457194, "grad_norm": 0.15579867362976074, "learning_rate": 4.3497578666203395e-06, "loss": 0.9008, "step": 89830 }, { "epoch": 0.6503217587063056, "grad_norm": 0.1711921989917755, "learning_rate": 4.349685479959753e-06, "loss": 0.9018, "step": 89840 }, { "epoch": 0.6503941453668918, "grad_norm": 0.17373885214328766, "learning_rate": 4.349613093299167e-06, "loss": 0.8876, "step": 89850 }, { "epoch": 0.650466532027478, "grad_norm": 0.15983055531978607, "learning_rate": 4.349540706638581e-06, "loss": 0.9056, "step": 89860 }, { "epoch": 0.6505389186880641, "grad_norm": 0.17014628648757935, "learning_rate": 4.349468319977995e-06, "loss": 0.8816, "step": 89870 }, { "epoch": 0.6506113053486503, "grad_norm": 0.15592381358146667, "learning_rate": 4.3493959333174084e-06, "loss": 0.8784, "step": 89880 }, { "epoch": 0.6506836920092366, "grad_norm": 0.16757424175739288, "learning_rate": 4.349323546656822e-06, "loss": 0.8985, "step": 89890 }, { "epoch": 0.6507560786698228, "grad_norm": 0.15414102375507355, "learning_rate": 4.3492511599962365e-06, "loss": 0.8938, "step": 89900 }, { "epoch": 0.650828465330409, "grad_norm": 0.16440652310848236, "learning_rate": 4.34917877333565e-06, "loss": 0.9051, "step": 89910 }, { "epoch": 0.6509008519909951, "grad_norm": 0.17210443317890167, "learning_rate": 4.349106386675064e-06, "loss": 0.8878, "step": 89920 }, { "epoch": 0.6509732386515813, "grad_norm": 0.17794910073280334, "learning_rate": 4.349034000014477e-06, "loss": 0.9026, "step": 89930 }, { "epoch": 0.6510456253121675, "grad_norm": 0.15437255799770355, "learning_rate": 4.348961613353892e-06, "loss": 0.9072, "step": 89940 }, { "epoch": 0.6511180119727537, "grad_norm": 0.15506230294704437, "learning_rate": 4.3488892266933054e-06, "loss": 0.8998, "step": 89950 }, { "epoch": 0.6511903986333398, "grad_norm": 0.1564485728740692, "learning_rate": 4.348816840032719e-06, "loss": 0.8746, "step": 89960 }, { "epoch": 0.651262785293926, "grad_norm": 0.15721721947193146, "learning_rate": 4.348744453372133e-06, "loss": 0.8953, "step": 89970 }, { "epoch": 0.6513351719545122, "grad_norm": 0.16001653671264648, "learning_rate": 4.348672066711547e-06, "loss": 0.9131, "step": 89980 }, { "epoch": 0.6514075586150984, "grad_norm": 0.15154358744621277, "learning_rate": 4.348599680050961e-06, "loss": 0.8969, "step": 89990 }, { "epoch": 0.6514799452756846, "grad_norm": 0.17069634795188904, "learning_rate": 4.348527293390374e-06, "loss": 0.9152, "step": 90000 }, { "epoch": 0.6515523319362708, "grad_norm": 0.16475622355937958, "learning_rate": 4.348454906729788e-06, "loss": 0.9022, "step": 90010 }, { "epoch": 0.651624718596857, "grad_norm": 0.16313569247722626, "learning_rate": 4.3483825200692024e-06, "loss": 0.8895, "step": 90020 }, { "epoch": 0.6516971052574432, "grad_norm": 0.15203681588172913, "learning_rate": 4.348310133408616e-06, "loss": 0.8818, "step": 90030 }, { "epoch": 0.6517694919180294, "grad_norm": 0.1589788943529129, "learning_rate": 4.34823774674803e-06, "loss": 0.9019, "step": 90040 }, { "epoch": 0.6518418785786155, "grad_norm": 0.1522030234336853, "learning_rate": 4.348165360087443e-06, "loss": 0.8986, "step": 90050 }, { "epoch": 0.6519142652392017, "grad_norm": 0.15555304288864136, "learning_rate": 4.348092973426857e-06, "loss": 0.9048, "step": 90060 }, { "epoch": 0.6519866518997879, "grad_norm": 0.15009433031082153, "learning_rate": 4.348020586766271e-06, "loss": 0.9054, "step": 90070 }, { "epoch": 0.6520590385603741, "grad_norm": 0.1470109075307846, "learning_rate": 4.347948200105685e-06, "loss": 0.9064, "step": 90080 }, { "epoch": 0.6521314252209602, "grad_norm": 0.17592494189739227, "learning_rate": 4.347875813445099e-06, "loss": 0.8949, "step": 90090 }, { "epoch": 0.6522038118815464, "grad_norm": 0.14667287468910217, "learning_rate": 4.347803426784512e-06, "loss": 0.9068, "step": 90100 }, { "epoch": 0.6522761985421327, "grad_norm": 0.15610606968402863, "learning_rate": 4.347731040123927e-06, "loss": 0.8909, "step": 90110 }, { "epoch": 0.6523485852027189, "grad_norm": 0.17158207297325134, "learning_rate": 4.34765865346334e-06, "loss": 0.8969, "step": 90120 }, { "epoch": 0.652420971863305, "grad_norm": 0.15405572950839996, "learning_rate": 4.347586266802754e-06, "loss": 0.9042, "step": 90130 }, { "epoch": 0.6524933585238912, "grad_norm": 0.1571338176727295, "learning_rate": 4.3475138801421675e-06, "loss": 0.8955, "step": 90140 }, { "epoch": 0.6525657451844774, "grad_norm": 0.14696629345417023, "learning_rate": 4.347441493481582e-06, "loss": 0.8909, "step": 90150 }, { "epoch": 0.6526381318450636, "grad_norm": 0.16139934957027435, "learning_rate": 4.347369106820996e-06, "loss": 0.8968, "step": 90160 }, { "epoch": 0.6527105185056498, "grad_norm": 0.15784136950969696, "learning_rate": 4.347296720160409e-06, "loss": 0.9019, "step": 90170 }, { "epoch": 0.6527829051662359, "grad_norm": 0.1769580841064453, "learning_rate": 4.347224333499823e-06, "loss": 0.9009, "step": 90180 }, { "epoch": 0.6528552918268221, "grad_norm": 0.15210415422916412, "learning_rate": 4.347151946839237e-06, "loss": 0.8969, "step": 90190 }, { "epoch": 0.6529276784874083, "grad_norm": 0.15724553167819977, "learning_rate": 4.347079560178651e-06, "loss": 0.8949, "step": 90200 }, { "epoch": 0.6530000651479946, "grad_norm": 0.15788982808589935, "learning_rate": 4.3470071735180645e-06, "loss": 0.8914, "step": 90210 }, { "epoch": 0.6530724518085808, "grad_norm": 0.16926227509975433, "learning_rate": 4.346934786857478e-06, "loss": 0.8868, "step": 90220 }, { "epoch": 0.6531448384691669, "grad_norm": 0.16521447896957397, "learning_rate": 4.346862400196893e-06, "loss": 0.8966, "step": 90230 }, { "epoch": 0.6532172251297531, "grad_norm": 0.17298093438148499, "learning_rate": 4.346790013536306e-06, "loss": 0.8916, "step": 90240 }, { "epoch": 0.6532896117903393, "grad_norm": 0.1661769151687622, "learning_rate": 4.34671762687572e-06, "loss": 0.8891, "step": 90250 }, { "epoch": 0.6533619984509255, "grad_norm": 0.16557759046554565, "learning_rate": 4.3466452402151335e-06, "loss": 0.9243, "step": 90260 }, { "epoch": 0.6534343851115116, "grad_norm": 0.15999002754688263, "learning_rate": 4.346572853554547e-06, "loss": 0.8908, "step": 90270 }, { "epoch": 0.6535067717720978, "grad_norm": 0.15427353978157043, "learning_rate": 4.346500466893961e-06, "loss": 0.9055, "step": 90280 }, { "epoch": 0.653579158432684, "grad_norm": 0.16352301836013794, "learning_rate": 4.346428080233374e-06, "loss": 0.8789, "step": 90290 }, { "epoch": 0.6536515450932702, "grad_norm": 0.15975026786327362, "learning_rate": 4.346355693572789e-06, "loss": 0.9023, "step": 90300 }, { "epoch": 0.6537239317538563, "grad_norm": 0.17519934475421906, "learning_rate": 4.346283306912202e-06, "loss": 0.903, "step": 90310 }, { "epoch": 0.6537963184144426, "grad_norm": 0.1745595932006836, "learning_rate": 4.346210920251616e-06, "loss": 0.9025, "step": 90320 }, { "epoch": 0.6538687050750288, "grad_norm": 0.16801311075687408, "learning_rate": 4.34613853359103e-06, "loss": 0.8968, "step": 90330 }, { "epoch": 0.653941091735615, "grad_norm": 0.1629055142402649, "learning_rate": 4.346066146930444e-06, "loss": 0.8973, "step": 90340 }, { "epoch": 0.6540134783962012, "grad_norm": 0.17443734407424927, "learning_rate": 4.345993760269858e-06, "loss": 0.8937, "step": 90350 }, { "epoch": 0.6540858650567873, "grad_norm": 0.15698504447937012, "learning_rate": 4.345921373609271e-06, "loss": 0.902, "step": 90360 }, { "epoch": 0.6541582517173735, "grad_norm": 0.1658298522233963, "learning_rate": 4.345848986948685e-06, "loss": 0.9065, "step": 90370 }, { "epoch": 0.6542306383779597, "grad_norm": 0.17639166116714478, "learning_rate": 4.345776600288099e-06, "loss": 0.8894, "step": 90380 }, { "epoch": 0.6543030250385459, "grad_norm": 0.17266248166561127, "learning_rate": 4.345704213627513e-06, "loss": 0.8957, "step": 90390 }, { "epoch": 0.654375411699132, "grad_norm": 0.1632227897644043, "learning_rate": 4.345631826966927e-06, "loss": 0.8963, "step": 90400 }, { "epoch": 0.6544477983597182, "grad_norm": 0.16069155931472778, "learning_rate": 4.34555944030634e-06, "loss": 0.9013, "step": 90410 }, { "epoch": 0.6545201850203045, "grad_norm": 0.1587107628583908, "learning_rate": 4.345487053645755e-06, "loss": 0.8857, "step": 90420 }, { "epoch": 0.6545925716808907, "grad_norm": 0.16049619019031525, "learning_rate": 4.345414666985168e-06, "loss": 0.9042, "step": 90430 }, { "epoch": 0.6546649583414769, "grad_norm": 0.16169418394565582, "learning_rate": 4.345342280324582e-06, "loss": 0.8816, "step": 90440 }, { "epoch": 0.654737345002063, "grad_norm": 0.17584997415542603, "learning_rate": 4.3452698936639956e-06, "loss": 0.8948, "step": 90450 }, { "epoch": 0.6548097316626492, "grad_norm": 0.15903721749782562, "learning_rate": 4.34519750700341e-06, "loss": 0.8819, "step": 90460 }, { "epoch": 0.6548821183232354, "grad_norm": 0.1641675978899002, "learning_rate": 4.345125120342824e-06, "loss": 0.8808, "step": 90470 }, { "epoch": 0.6549545049838216, "grad_norm": 0.15496835112571716, "learning_rate": 4.345052733682237e-06, "loss": 0.8954, "step": 90480 }, { "epoch": 0.6550268916444077, "grad_norm": 0.15261691808700562, "learning_rate": 4.344980347021651e-06, "loss": 0.9, "step": 90490 }, { "epoch": 0.6550992783049939, "grad_norm": 0.15243463218212128, "learning_rate": 4.344907960361065e-06, "loss": 0.9019, "step": 90500 }, { "epoch": 0.6551716649655801, "grad_norm": 0.16035859286785126, "learning_rate": 4.344835573700479e-06, "loss": 0.9019, "step": 90510 }, { "epoch": 0.6552440516261663, "grad_norm": 0.17638221383094788, "learning_rate": 4.3447631870398926e-06, "loss": 0.9142, "step": 90520 }, { "epoch": 0.6553164382867526, "grad_norm": 0.15220975875854492, "learning_rate": 4.344690800379306e-06, "loss": 0.9064, "step": 90530 }, { "epoch": 0.6553888249473387, "grad_norm": 0.14705011248588562, "learning_rate": 4.344618413718721e-06, "loss": 0.8898, "step": 90540 }, { "epoch": 0.6554612116079249, "grad_norm": 0.164079487323761, "learning_rate": 4.344546027058134e-06, "loss": 0.905, "step": 90550 }, { "epoch": 0.6555335982685111, "grad_norm": 0.15466266870498657, "learning_rate": 4.344473640397548e-06, "loss": 0.9071, "step": 90560 }, { "epoch": 0.6556059849290973, "grad_norm": 0.1539478898048401, "learning_rate": 4.3444012537369615e-06, "loss": 0.8988, "step": 90570 }, { "epoch": 0.6556783715896835, "grad_norm": 0.15907570719718933, "learning_rate": 4.344328867076376e-06, "loss": 0.8937, "step": 90580 }, { "epoch": 0.6557507582502696, "grad_norm": 0.16037173569202423, "learning_rate": 4.34425648041579e-06, "loss": 0.8899, "step": 90590 }, { "epoch": 0.6558231449108558, "grad_norm": 0.1608818769454956, "learning_rate": 4.344184093755203e-06, "loss": 0.8924, "step": 90600 }, { "epoch": 0.655895531571442, "grad_norm": 0.17891094088554382, "learning_rate": 4.344111707094617e-06, "loss": 0.904, "step": 90610 }, { "epoch": 0.6559679182320282, "grad_norm": 0.15963943302631378, "learning_rate": 4.344039320434031e-06, "loss": 0.8965, "step": 90620 }, { "epoch": 0.6560403048926143, "grad_norm": 0.1626274734735489, "learning_rate": 4.343966933773445e-06, "loss": 0.8977, "step": 90630 }, { "epoch": 0.6561126915532006, "grad_norm": 0.14938683807849884, "learning_rate": 4.3438945471128585e-06, "loss": 0.8972, "step": 90640 }, { "epoch": 0.6561850782137868, "grad_norm": 0.15236683189868927, "learning_rate": 4.343822160452272e-06, "loss": 0.8866, "step": 90650 }, { "epoch": 0.656257464874373, "grad_norm": 0.1745002120733261, "learning_rate": 4.343749773791686e-06, "loss": 0.8951, "step": 90660 }, { "epoch": 0.6563298515349592, "grad_norm": 0.15428422391414642, "learning_rate": 4.3436773871311e-06, "loss": 0.9125, "step": 90670 }, { "epoch": 0.6564022381955453, "grad_norm": 0.18317624926567078, "learning_rate": 4.343605000470514e-06, "loss": 0.8919, "step": 90680 }, { "epoch": 0.6564746248561315, "grad_norm": 0.15780498087406158, "learning_rate": 4.3435326138099274e-06, "loss": 0.9001, "step": 90690 }, { "epoch": 0.6565470115167177, "grad_norm": 0.1752166748046875, "learning_rate": 4.343460227149341e-06, "loss": 0.9079, "step": 90700 }, { "epoch": 0.6566193981773039, "grad_norm": 0.17048229277133942, "learning_rate": 4.3433878404887555e-06, "loss": 0.8877, "step": 90710 }, { "epoch": 0.65669178483789, "grad_norm": 0.15478329360485077, "learning_rate": 4.343315453828169e-06, "loss": 0.8885, "step": 90720 }, { "epoch": 0.6567641714984762, "grad_norm": 0.15634417533874512, "learning_rate": 4.343243067167583e-06, "loss": 0.8944, "step": 90730 }, { "epoch": 0.6568365581590625, "grad_norm": 0.15285581350326538, "learning_rate": 4.343170680506996e-06, "loss": 0.9014, "step": 90740 }, { "epoch": 0.6569089448196487, "grad_norm": 0.1574464589357376, "learning_rate": 4.343098293846411e-06, "loss": 0.8962, "step": 90750 }, { "epoch": 0.6569813314802349, "grad_norm": 0.19475358724594116, "learning_rate": 4.3430259071858244e-06, "loss": 0.8974, "step": 90760 }, { "epoch": 0.657053718140821, "grad_norm": 0.21129895746707916, "learning_rate": 4.342953520525238e-06, "loss": 0.8898, "step": 90770 }, { "epoch": 0.6571261048014072, "grad_norm": 0.1641937643289566, "learning_rate": 4.342881133864652e-06, "loss": 0.8914, "step": 90780 }, { "epoch": 0.6571984914619934, "grad_norm": 0.158699169754982, "learning_rate": 4.342808747204066e-06, "loss": 0.8855, "step": 90790 }, { "epoch": 0.6572708781225796, "grad_norm": 0.1632438600063324, "learning_rate": 4.342736360543479e-06, "loss": 0.8882, "step": 90800 }, { "epoch": 0.6573432647831657, "grad_norm": 0.16449430584907532, "learning_rate": 4.342663973882893e-06, "loss": 0.8886, "step": 90810 }, { "epoch": 0.6574156514437519, "grad_norm": 0.16445982456207275, "learning_rate": 4.342591587222307e-06, "loss": 0.8873, "step": 90820 }, { "epoch": 0.6574880381043381, "grad_norm": 0.18143968284130096, "learning_rate": 4.342519200561721e-06, "loss": 0.9047, "step": 90830 }, { "epoch": 0.6575604247649243, "grad_norm": 0.15910190343856812, "learning_rate": 4.342446813901134e-06, "loss": 0.8998, "step": 90840 }, { "epoch": 0.6576328114255106, "grad_norm": 0.1646713763475418, "learning_rate": 4.342374427240548e-06, "loss": 0.9012, "step": 90850 }, { "epoch": 0.6577051980860967, "grad_norm": 0.1441211849451065, "learning_rate": 4.342302040579962e-06, "loss": 0.897, "step": 90860 }, { "epoch": 0.6577775847466829, "grad_norm": 0.15770696103572845, "learning_rate": 4.342229653919376e-06, "loss": 0.9049, "step": 90870 }, { "epoch": 0.6578499714072691, "grad_norm": 0.1501597911119461, "learning_rate": 4.3421572672587895e-06, "loss": 0.901, "step": 90880 }, { "epoch": 0.6579223580678553, "grad_norm": 0.16416409611701965, "learning_rate": 4.342084880598203e-06, "loss": 0.9032, "step": 90890 }, { "epoch": 0.6579947447284414, "grad_norm": 0.16274628043174744, "learning_rate": 4.342012493937618e-06, "loss": 0.887, "step": 90900 }, { "epoch": 0.6580671313890276, "grad_norm": 0.14996199309825897, "learning_rate": 4.341940107277031e-06, "loss": 0.8815, "step": 90910 }, { "epoch": 0.6581395180496138, "grad_norm": 0.2303011268377304, "learning_rate": 4.341867720616445e-06, "loss": 0.8799, "step": 90920 }, { "epoch": 0.6582119047102, "grad_norm": 0.14957544207572937, "learning_rate": 4.3417953339558585e-06, "loss": 0.903, "step": 90930 }, { "epoch": 0.6582842913707861, "grad_norm": 0.15761502087116241, "learning_rate": 4.341722947295273e-06, "loss": 0.9141, "step": 90940 }, { "epoch": 0.6583566780313724, "grad_norm": 0.17332914471626282, "learning_rate": 4.3416505606346865e-06, "loss": 0.8998, "step": 90950 }, { "epoch": 0.6584290646919586, "grad_norm": 0.1537407785654068, "learning_rate": 4.3415781739741e-06, "loss": 0.8852, "step": 90960 }, { "epoch": 0.6585014513525448, "grad_norm": 0.14254407584667206, "learning_rate": 4.341505787313514e-06, "loss": 0.8973, "step": 90970 }, { "epoch": 0.658573838013131, "grad_norm": 0.1535002887248993, "learning_rate": 4.341433400652928e-06, "loss": 0.8981, "step": 90980 }, { "epoch": 0.6586462246737171, "grad_norm": 0.15489903092384338, "learning_rate": 4.341361013992342e-06, "loss": 0.8854, "step": 90990 }, { "epoch": 0.6587186113343033, "grad_norm": 0.16978639364242554, "learning_rate": 4.3412886273317555e-06, "loss": 0.9023, "step": 91000 }, { "epoch": 0.6587909979948895, "grad_norm": 0.1680452525615692, "learning_rate": 4.341216240671169e-06, "loss": 0.9008, "step": 91010 }, { "epoch": 0.6588633846554757, "grad_norm": 0.15573959052562714, "learning_rate": 4.3411438540105835e-06, "loss": 0.889, "step": 91020 }, { "epoch": 0.6589357713160618, "grad_norm": 0.1434423178434372, "learning_rate": 4.341071467349997e-06, "loss": 0.8804, "step": 91030 }, { "epoch": 0.659008157976648, "grad_norm": 0.16832110285758972, "learning_rate": 4.340999080689411e-06, "loss": 0.9065, "step": 91040 }, { "epoch": 0.6590805446372342, "grad_norm": 0.15029281377792358, "learning_rate": 4.340926694028824e-06, "loss": 0.8851, "step": 91050 }, { "epoch": 0.6591529312978205, "grad_norm": 0.1768171489238739, "learning_rate": 4.340854307368239e-06, "loss": 0.9025, "step": 91060 }, { "epoch": 0.6592253179584067, "grad_norm": 0.14737635850906372, "learning_rate": 4.3407819207076525e-06, "loss": 0.9048, "step": 91070 }, { "epoch": 0.6592977046189928, "grad_norm": 0.1616610735654831, "learning_rate": 4.340709534047066e-06, "loss": 0.9036, "step": 91080 }, { "epoch": 0.659370091279579, "grad_norm": 0.15665307641029358, "learning_rate": 4.34063714738648e-06, "loss": 0.8956, "step": 91090 }, { "epoch": 0.6594424779401652, "grad_norm": 0.15417052805423737, "learning_rate": 4.340564760725894e-06, "loss": 0.9022, "step": 91100 }, { "epoch": 0.6595148646007514, "grad_norm": 0.14735175669193268, "learning_rate": 4.340492374065308e-06, "loss": 0.8875, "step": 91110 }, { "epoch": 0.6595872512613375, "grad_norm": 0.18449713289737701, "learning_rate": 4.340419987404721e-06, "loss": 0.8886, "step": 91120 }, { "epoch": 0.6596596379219237, "grad_norm": 0.15880465507507324, "learning_rate": 4.340347600744135e-06, "loss": 0.8961, "step": 91130 }, { "epoch": 0.6597320245825099, "grad_norm": 0.1495341658592224, "learning_rate": 4.3402752140835495e-06, "loss": 0.8861, "step": 91140 }, { "epoch": 0.6598044112430961, "grad_norm": 0.1608497053384781, "learning_rate": 4.340202827422963e-06, "loss": 0.8905, "step": 91150 }, { "epoch": 0.6598767979036823, "grad_norm": 0.18891900777816772, "learning_rate": 4.340130440762377e-06, "loss": 0.894, "step": 91160 }, { "epoch": 0.6599491845642685, "grad_norm": 0.1456620842218399, "learning_rate": 4.34005805410179e-06, "loss": 0.8951, "step": 91170 }, { "epoch": 0.6600215712248547, "grad_norm": 0.16527323424816132, "learning_rate": 4.339985667441205e-06, "loss": 0.8914, "step": 91180 }, { "epoch": 0.6600939578854409, "grad_norm": 0.15947329998016357, "learning_rate": 4.339913280780618e-06, "loss": 0.8964, "step": 91190 }, { "epoch": 0.6601663445460271, "grad_norm": 0.1482989341020584, "learning_rate": 4.339840894120032e-06, "loss": 0.8923, "step": 91200 }, { "epoch": 0.6602387312066132, "grad_norm": 0.15714868903160095, "learning_rate": 4.339768507459446e-06, "loss": 0.892, "step": 91210 }, { "epoch": 0.6603111178671994, "grad_norm": 0.18459376692771912, "learning_rate": 4.33969612079886e-06, "loss": 0.8944, "step": 91220 }, { "epoch": 0.6603835045277856, "grad_norm": 0.1577734649181366, "learning_rate": 4.339623734138274e-06, "loss": 0.8919, "step": 91230 }, { "epoch": 0.6604558911883718, "grad_norm": 0.19559817016124725, "learning_rate": 4.339551347477687e-06, "loss": 0.9021, "step": 91240 }, { "epoch": 0.660528277848958, "grad_norm": 0.16929206252098083, "learning_rate": 4.339478960817101e-06, "loss": 0.9089, "step": 91250 }, { "epoch": 0.6606006645095441, "grad_norm": 0.1482090950012207, "learning_rate": 4.339406574156515e-06, "loss": 0.8995, "step": 91260 }, { "epoch": 0.6606730511701304, "grad_norm": 0.17333853244781494, "learning_rate": 4.339334187495929e-06, "loss": 0.8998, "step": 91270 }, { "epoch": 0.6607454378307166, "grad_norm": 0.16415739059448242, "learning_rate": 4.339261800835343e-06, "loss": 0.8956, "step": 91280 }, { "epoch": 0.6608178244913028, "grad_norm": 0.14835873246192932, "learning_rate": 4.339189414174756e-06, "loss": 0.8809, "step": 91290 }, { "epoch": 0.660890211151889, "grad_norm": 0.16368795931339264, "learning_rate": 4.33911702751417e-06, "loss": 0.9006, "step": 91300 }, { "epoch": 0.6609625978124751, "grad_norm": 0.1738886833190918, "learning_rate": 4.339044640853584e-06, "loss": 0.8993, "step": 91310 }, { "epoch": 0.6610349844730613, "grad_norm": 0.1818006932735443, "learning_rate": 4.338972254192998e-06, "loss": 0.8997, "step": 91320 }, { "epoch": 0.6611073711336475, "grad_norm": 0.16817954182624817, "learning_rate": 4.338899867532412e-06, "loss": 0.8994, "step": 91330 }, { "epoch": 0.6611797577942337, "grad_norm": 0.15932884812355042, "learning_rate": 4.338827480871825e-06, "loss": 0.8976, "step": 91340 }, { "epoch": 0.6612521444548198, "grad_norm": 0.15406905114650726, "learning_rate": 4.338755094211239e-06, "loss": 0.8906, "step": 91350 }, { "epoch": 0.661324531115406, "grad_norm": 0.16480666399002075, "learning_rate": 4.3386827075506524e-06, "loss": 0.9069, "step": 91360 }, { "epoch": 0.6613969177759922, "grad_norm": 0.17040395736694336, "learning_rate": 4.338610320890067e-06, "loss": 0.8925, "step": 91370 }, { "epoch": 0.6614693044365785, "grad_norm": 0.1497850865125656, "learning_rate": 4.3385379342294805e-06, "loss": 0.8956, "step": 91380 }, { "epoch": 0.6615416910971647, "grad_norm": 0.326239675283432, "learning_rate": 4.338465547568894e-06, "loss": 0.8959, "step": 91390 }, { "epoch": 0.6616140777577508, "grad_norm": 0.17361514270305634, "learning_rate": 4.338393160908308e-06, "loss": 0.9122, "step": 91400 }, { "epoch": 0.661686464418337, "grad_norm": 0.16059482097625732, "learning_rate": 4.338320774247722e-06, "loss": 0.8971, "step": 91410 }, { "epoch": 0.6617588510789232, "grad_norm": 0.16254779696464539, "learning_rate": 4.338248387587136e-06, "loss": 0.8937, "step": 91420 }, { "epoch": 0.6618312377395094, "grad_norm": 0.16201868653297424, "learning_rate": 4.3381760009265494e-06, "loss": 0.8988, "step": 91430 }, { "epoch": 0.6619036244000955, "grad_norm": 0.15529035031795502, "learning_rate": 4.338103614265963e-06, "loss": 0.8974, "step": 91440 }, { "epoch": 0.6619760110606817, "grad_norm": 0.18519388139247894, "learning_rate": 4.3380312276053775e-06, "loss": 0.8969, "step": 91450 }, { "epoch": 0.6620483977212679, "grad_norm": 0.14886817336082458, "learning_rate": 4.337958840944791e-06, "loss": 0.8927, "step": 91460 }, { "epoch": 0.6621207843818541, "grad_norm": 0.18947485089302063, "learning_rate": 4.337886454284205e-06, "loss": 0.8954, "step": 91470 }, { "epoch": 0.6621931710424404, "grad_norm": 0.14915327727794647, "learning_rate": 4.337814067623618e-06, "loss": 0.904, "step": 91480 }, { "epoch": 0.6622655577030265, "grad_norm": 0.3526378273963928, "learning_rate": 4.337741680963032e-06, "loss": 0.907, "step": 91490 }, { "epoch": 0.6623379443636127, "grad_norm": 0.17231549322605133, "learning_rate": 4.3376692943024464e-06, "loss": 0.8878, "step": 91500 }, { "epoch": 0.6624103310241989, "grad_norm": 0.15187296271324158, "learning_rate": 4.33759690764186e-06, "loss": 0.8893, "step": 91510 }, { "epoch": 0.6624827176847851, "grad_norm": 0.16822843253612518, "learning_rate": 4.337524520981274e-06, "loss": 0.893, "step": 91520 }, { "epoch": 0.6625551043453712, "grad_norm": 0.16215179860591888, "learning_rate": 4.337452134320687e-06, "loss": 0.893, "step": 91530 }, { "epoch": 0.6626274910059574, "grad_norm": 0.15672698616981506, "learning_rate": 4.337379747660102e-06, "loss": 0.8875, "step": 91540 }, { "epoch": 0.6626998776665436, "grad_norm": 0.14577756822109222, "learning_rate": 4.337307360999515e-06, "loss": 0.9028, "step": 91550 }, { "epoch": 0.6627722643271298, "grad_norm": 0.1537584662437439, "learning_rate": 4.337234974338929e-06, "loss": 0.8978, "step": 91560 }, { "epoch": 0.6628446509877159, "grad_norm": 0.15644492208957672, "learning_rate": 4.337162587678343e-06, "loss": 0.886, "step": 91570 }, { "epoch": 0.6629170376483021, "grad_norm": 0.16348648071289062, "learning_rate": 4.337090201017757e-06, "loss": 0.9081, "step": 91580 }, { "epoch": 0.6629894243088884, "grad_norm": 0.15281915664672852, "learning_rate": 4.337017814357171e-06, "loss": 0.8931, "step": 91590 }, { "epoch": 0.6630618109694746, "grad_norm": 0.1640012115240097, "learning_rate": 4.336945427696584e-06, "loss": 0.9045, "step": 91600 }, { "epoch": 0.6631341976300608, "grad_norm": 0.17783017456531525, "learning_rate": 4.336873041035998e-06, "loss": 0.8932, "step": 91610 }, { "epoch": 0.6632065842906469, "grad_norm": 0.23584623634815216, "learning_rate": 4.336800654375412e-06, "loss": 0.9074, "step": 91620 }, { "epoch": 0.6632789709512331, "grad_norm": 0.15153855085372925, "learning_rate": 4.336728267714826e-06, "loss": 0.8817, "step": 91630 }, { "epoch": 0.6633513576118193, "grad_norm": 0.1572607457637787, "learning_rate": 4.33665588105424e-06, "loss": 0.8969, "step": 91640 }, { "epoch": 0.6634237442724055, "grad_norm": 0.15882018208503723, "learning_rate": 4.336583494393653e-06, "loss": 0.8956, "step": 91650 }, { "epoch": 0.6634961309329916, "grad_norm": 0.19259294867515564, "learning_rate": 4.336511107733068e-06, "loss": 0.8947, "step": 91660 }, { "epoch": 0.6635685175935778, "grad_norm": 0.15729625523090363, "learning_rate": 4.336438721072481e-06, "loss": 0.8938, "step": 91670 }, { "epoch": 0.663640904254164, "grad_norm": 0.18913884460926056, "learning_rate": 4.336366334411895e-06, "loss": 0.898, "step": 91680 }, { "epoch": 0.6637132909147502, "grad_norm": 0.23208874464035034, "learning_rate": 4.3362939477513085e-06, "loss": 0.8831, "step": 91690 }, { "epoch": 0.6637856775753365, "grad_norm": 0.15006226301193237, "learning_rate": 4.336221561090723e-06, "loss": 0.8933, "step": 91700 }, { "epoch": 0.6638580642359226, "grad_norm": 0.17437438666820526, "learning_rate": 4.336149174430137e-06, "loss": 0.8922, "step": 91710 }, { "epoch": 0.6639304508965088, "grad_norm": 0.1585850864648819, "learning_rate": 4.33607678776955e-06, "loss": 0.8929, "step": 91720 }, { "epoch": 0.664002837557095, "grad_norm": 0.15892000496387482, "learning_rate": 4.336004401108964e-06, "loss": 0.8981, "step": 91730 }, { "epoch": 0.6640752242176812, "grad_norm": 0.15951044857501984, "learning_rate": 4.335932014448378e-06, "loss": 0.8968, "step": 91740 }, { "epoch": 0.6641476108782673, "grad_norm": 0.16389575600624084, "learning_rate": 4.335859627787792e-06, "loss": 0.9076, "step": 91750 }, { "epoch": 0.6642199975388535, "grad_norm": 0.23026235401630402, "learning_rate": 4.3357872411272055e-06, "loss": 0.8934, "step": 91760 }, { "epoch": 0.6642923841994397, "grad_norm": 0.15285776555538177, "learning_rate": 4.335714854466619e-06, "loss": 0.8961, "step": 91770 }, { "epoch": 0.6643647708600259, "grad_norm": 0.15406441688537598, "learning_rate": 4.335642467806034e-06, "loss": 0.8853, "step": 91780 }, { "epoch": 0.664437157520612, "grad_norm": 0.1550244837999344, "learning_rate": 4.335570081145447e-06, "loss": 0.9001, "step": 91790 }, { "epoch": 0.6645095441811983, "grad_norm": 0.16414350271224976, "learning_rate": 4.335497694484861e-06, "loss": 0.9078, "step": 91800 }, { "epoch": 0.6645819308417845, "grad_norm": 0.15932433307170868, "learning_rate": 4.3354253078242745e-06, "loss": 0.8989, "step": 91810 }, { "epoch": 0.6646543175023707, "grad_norm": 0.1505679041147232, "learning_rate": 4.335352921163689e-06, "loss": 0.8885, "step": 91820 }, { "epoch": 0.6647267041629569, "grad_norm": 0.16631951928138733, "learning_rate": 4.3352805345031026e-06, "loss": 0.8972, "step": 91830 }, { "epoch": 0.664799090823543, "grad_norm": 0.17262007296085358, "learning_rate": 4.335208147842516e-06, "loss": 0.8933, "step": 91840 }, { "epoch": 0.6648714774841292, "grad_norm": 0.16659270226955414, "learning_rate": 4.33513576118193e-06, "loss": 0.9059, "step": 91850 }, { "epoch": 0.6649438641447154, "grad_norm": 0.15563061833381653, "learning_rate": 4.335063374521343e-06, "loss": 0.9071, "step": 91860 }, { "epoch": 0.6650162508053016, "grad_norm": 0.16777054965496063, "learning_rate": 4.334990987860757e-06, "loss": 0.8994, "step": 91870 }, { "epoch": 0.6650886374658878, "grad_norm": 0.15433162450790405, "learning_rate": 4.334918601200171e-06, "loss": 0.9064, "step": 91880 }, { "epoch": 0.6651610241264739, "grad_norm": 0.16042456030845642, "learning_rate": 4.334846214539585e-06, "loss": 0.8952, "step": 91890 }, { "epoch": 0.6652334107870601, "grad_norm": 0.189911887049675, "learning_rate": 4.334773827878999e-06, "loss": 0.8903, "step": 91900 }, { "epoch": 0.6653057974476464, "grad_norm": 0.1497250199317932, "learning_rate": 4.334701441218412e-06, "loss": 0.8836, "step": 91910 }, { "epoch": 0.6653781841082326, "grad_norm": 0.16076631844043732, "learning_rate": 4.334629054557826e-06, "loss": 0.8971, "step": 91920 }, { "epoch": 0.6654505707688187, "grad_norm": 0.15558098256587982, "learning_rate": 4.33455666789724e-06, "loss": 0.8838, "step": 91930 }, { "epoch": 0.6655229574294049, "grad_norm": 0.1555255949497223, "learning_rate": 4.334484281236654e-06, "loss": 0.9026, "step": 91940 }, { "epoch": 0.6655953440899911, "grad_norm": 0.15714456140995026, "learning_rate": 4.334411894576068e-06, "loss": 0.8889, "step": 91950 }, { "epoch": 0.6656677307505773, "grad_norm": 0.15416781604290009, "learning_rate": 4.334339507915481e-06, "loss": 0.8943, "step": 91960 }, { "epoch": 0.6657401174111635, "grad_norm": 0.16116608679294586, "learning_rate": 4.334267121254896e-06, "loss": 0.8844, "step": 91970 }, { "epoch": 0.6658125040717496, "grad_norm": 0.141585111618042, "learning_rate": 4.334194734594309e-06, "loss": 0.8845, "step": 91980 }, { "epoch": 0.6658848907323358, "grad_norm": 0.1526327133178711, "learning_rate": 4.334122347933723e-06, "loss": 0.8922, "step": 91990 }, { "epoch": 0.665957277392922, "grad_norm": 0.16217315196990967, "learning_rate": 4.3340499612731366e-06, "loss": 0.889, "step": 92000 }, { "epoch": 0.6660296640535083, "grad_norm": 0.16622312366962433, "learning_rate": 4.333977574612551e-06, "loss": 0.8993, "step": 92010 }, { "epoch": 0.6661020507140945, "grad_norm": 0.19960935413837433, "learning_rate": 4.333905187951965e-06, "loss": 0.8982, "step": 92020 }, { "epoch": 0.6661744373746806, "grad_norm": 0.16707240045070648, "learning_rate": 4.333832801291378e-06, "loss": 0.9088, "step": 92030 }, { "epoch": 0.6662468240352668, "grad_norm": 0.15086571872234344, "learning_rate": 4.333760414630792e-06, "loss": 0.8991, "step": 92040 }, { "epoch": 0.666319210695853, "grad_norm": 0.17651046812534332, "learning_rate": 4.333688027970206e-06, "loss": 0.8987, "step": 92050 }, { "epoch": 0.6663915973564392, "grad_norm": 0.1702297031879425, "learning_rate": 4.33361564130962e-06, "loss": 0.898, "step": 92060 }, { "epoch": 0.6664639840170253, "grad_norm": 0.17619454860687256, "learning_rate": 4.333543254649034e-06, "loss": 0.894, "step": 92070 }, { "epoch": 0.6665363706776115, "grad_norm": 0.16067981719970703, "learning_rate": 4.333470867988447e-06, "loss": 0.9031, "step": 92080 }, { "epoch": 0.6666087573381977, "grad_norm": 0.16957126557826996, "learning_rate": 4.333398481327861e-06, "loss": 0.8966, "step": 92090 }, { "epoch": 0.6666811439987839, "grad_norm": 0.1614599972963333, "learning_rate": 4.333326094667275e-06, "loss": 0.8976, "step": 92100 }, { "epoch": 0.66675353065937, "grad_norm": 0.17003393173217773, "learning_rate": 4.333253708006689e-06, "loss": 0.9072, "step": 92110 }, { "epoch": 0.6668259173199563, "grad_norm": 0.17658734321594238, "learning_rate": 4.3331813213461025e-06, "loss": 0.8964, "step": 92120 }, { "epoch": 0.6668983039805425, "grad_norm": 0.15573443472385406, "learning_rate": 4.333108934685516e-06, "loss": 0.8984, "step": 92130 }, { "epoch": 0.6669706906411287, "grad_norm": 0.17280741035938263, "learning_rate": 4.333036548024931e-06, "loss": 0.8982, "step": 92140 }, { "epoch": 0.6670430773017149, "grad_norm": 0.1615643948316574, "learning_rate": 4.332964161364344e-06, "loss": 0.8958, "step": 92150 }, { "epoch": 0.667115463962301, "grad_norm": 0.16449616849422455, "learning_rate": 4.332891774703758e-06, "loss": 0.8938, "step": 92160 }, { "epoch": 0.6671878506228872, "grad_norm": 0.1627790331840515, "learning_rate": 4.3328193880431714e-06, "loss": 0.8861, "step": 92170 }, { "epoch": 0.6672602372834734, "grad_norm": 0.15564194321632385, "learning_rate": 4.332747001382586e-06, "loss": 0.9061, "step": 92180 }, { "epoch": 0.6673326239440596, "grad_norm": 0.18332041800022125, "learning_rate": 4.3326746147219995e-06, "loss": 0.9071, "step": 92190 }, { "epoch": 0.6674050106046457, "grad_norm": 0.165358766913414, "learning_rate": 4.332602228061413e-06, "loss": 0.8964, "step": 92200 }, { "epoch": 0.6674773972652319, "grad_norm": 0.1612190455198288, "learning_rate": 4.332529841400827e-06, "loss": 0.8976, "step": 92210 }, { "epoch": 0.6675497839258181, "grad_norm": 0.16194748878479004, "learning_rate": 4.332457454740241e-06, "loss": 0.8953, "step": 92220 }, { "epoch": 0.6676221705864044, "grad_norm": 0.16928349435329437, "learning_rate": 4.332385068079655e-06, "loss": 0.9029, "step": 92230 }, { "epoch": 0.6676945572469906, "grad_norm": 0.15044300258159637, "learning_rate": 4.3323126814190684e-06, "loss": 0.8986, "step": 92240 }, { "epoch": 0.6677669439075767, "grad_norm": 0.1588992476463318, "learning_rate": 4.332240294758482e-06, "loss": 0.9034, "step": 92250 }, { "epoch": 0.6678393305681629, "grad_norm": 0.16093967854976654, "learning_rate": 4.3321679080978965e-06, "loss": 0.8845, "step": 92260 }, { "epoch": 0.6679117172287491, "grad_norm": 0.17110575735569, "learning_rate": 4.33209552143731e-06, "loss": 0.8848, "step": 92270 }, { "epoch": 0.6679841038893353, "grad_norm": 0.15584023296833038, "learning_rate": 4.332023134776724e-06, "loss": 0.8858, "step": 92280 }, { "epoch": 0.6680564905499214, "grad_norm": 0.16196756064891815, "learning_rate": 4.331950748116137e-06, "loss": 0.8951, "step": 92290 }, { "epoch": 0.6681288772105076, "grad_norm": 0.1788647472858429, "learning_rate": 4.331878361455552e-06, "loss": 0.8988, "step": 92300 }, { "epoch": 0.6682012638710938, "grad_norm": 0.1447635442018509, "learning_rate": 4.3318059747949655e-06, "loss": 0.8857, "step": 92310 }, { "epoch": 0.66827365053168, "grad_norm": 0.16333886981010437, "learning_rate": 4.331733588134379e-06, "loss": 0.8831, "step": 92320 }, { "epoch": 0.6683460371922663, "grad_norm": 0.16351155936717987, "learning_rate": 4.331661201473793e-06, "loss": 0.9038, "step": 92330 }, { "epoch": 0.6684184238528524, "grad_norm": 0.16826938092708588, "learning_rate": 4.331588814813207e-06, "loss": 0.8982, "step": 92340 }, { "epoch": 0.6684908105134386, "grad_norm": 0.16592121124267578, "learning_rate": 4.331516428152621e-06, "loss": 0.8828, "step": 92350 }, { "epoch": 0.6685631971740248, "grad_norm": 0.1630096733570099, "learning_rate": 4.331444041492034e-06, "loss": 0.8867, "step": 92360 }, { "epoch": 0.668635583834611, "grad_norm": 0.18398292362689972, "learning_rate": 4.331371654831448e-06, "loss": 0.9008, "step": 92370 }, { "epoch": 0.6687079704951971, "grad_norm": 0.16176696121692657, "learning_rate": 4.3312992681708625e-06, "loss": 0.9025, "step": 92380 }, { "epoch": 0.6687803571557833, "grad_norm": 0.15559855103492737, "learning_rate": 4.331226881510275e-06, "loss": 0.9031, "step": 92390 }, { "epoch": 0.6688527438163695, "grad_norm": 0.16191458702087402, "learning_rate": 4.331154494849689e-06, "loss": 0.8954, "step": 92400 }, { "epoch": 0.6689251304769557, "grad_norm": 0.15984484553337097, "learning_rate": 4.331082108189103e-06, "loss": 0.8888, "step": 92410 }, { "epoch": 0.6689975171375419, "grad_norm": 0.15444859862327576, "learning_rate": 4.331009721528517e-06, "loss": 0.8966, "step": 92420 }, { "epoch": 0.669069903798128, "grad_norm": 0.1950903832912445, "learning_rate": 4.3309373348679305e-06, "loss": 0.901, "step": 92430 }, { "epoch": 0.6691422904587143, "grad_norm": 0.1582936942577362, "learning_rate": 4.330864948207344e-06, "loss": 0.891, "step": 92440 }, { "epoch": 0.6692146771193005, "grad_norm": 0.15177102386951447, "learning_rate": 4.330792561546759e-06, "loss": 0.8893, "step": 92450 }, { "epoch": 0.6692870637798867, "grad_norm": 0.14646144211292267, "learning_rate": 4.330720174886172e-06, "loss": 0.8964, "step": 92460 }, { "epoch": 0.6693594504404728, "grad_norm": 0.17326298356056213, "learning_rate": 4.330647788225586e-06, "loss": 0.8839, "step": 92470 }, { "epoch": 0.669431837101059, "grad_norm": 0.16231794655323029, "learning_rate": 4.3305754015649995e-06, "loss": 0.8983, "step": 92480 }, { "epoch": 0.6695042237616452, "grad_norm": 0.1864873170852661, "learning_rate": 4.330503014904414e-06, "loss": 0.897, "step": 92490 }, { "epoch": 0.6695766104222314, "grad_norm": 0.16087661683559418, "learning_rate": 4.3304306282438275e-06, "loss": 0.8815, "step": 92500 }, { "epoch": 0.6696489970828176, "grad_norm": 0.1563531458377838, "learning_rate": 4.330358241583241e-06, "loss": 0.8971, "step": 92510 }, { "epoch": 0.6697213837434037, "grad_norm": 0.15433651208877563, "learning_rate": 4.330285854922655e-06, "loss": 0.8948, "step": 92520 }, { "epoch": 0.6697937704039899, "grad_norm": 0.18349480628967285, "learning_rate": 4.330213468262069e-06, "loss": 0.9053, "step": 92530 }, { "epoch": 0.6698661570645762, "grad_norm": 0.186500683426857, "learning_rate": 4.330141081601483e-06, "loss": 0.8941, "step": 92540 }, { "epoch": 0.6699385437251624, "grad_norm": 0.17492738366127014, "learning_rate": 4.3300686949408965e-06, "loss": 0.8914, "step": 92550 }, { "epoch": 0.6700109303857485, "grad_norm": 0.1956550031900406, "learning_rate": 4.32999630828031e-06, "loss": 0.8992, "step": 92560 }, { "epoch": 0.6700833170463347, "grad_norm": 0.1706896424293518, "learning_rate": 4.3299239216197246e-06, "loss": 0.8879, "step": 92570 }, { "epoch": 0.6701557037069209, "grad_norm": 0.16048724949359894, "learning_rate": 4.329851534959138e-06, "loss": 0.8983, "step": 92580 }, { "epoch": 0.6702280903675071, "grad_norm": 0.14965306222438812, "learning_rate": 4.329779148298552e-06, "loss": 0.8997, "step": 92590 }, { "epoch": 0.6703004770280933, "grad_norm": 0.15428891777992249, "learning_rate": 4.329706761637965e-06, "loss": 0.8933, "step": 92600 }, { "epoch": 0.6703728636886794, "grad_norm": 0.15892021358013153, "learning_rate": 4.32963437497738e-06, "loss": 0.8951, "step": 92610 }, { "epoch": 0.6704452503492656, "grad_norm": 0.1577247977256775, "learning_rate": 4.3295619883167935e-06, "loss": 0.8999, "step": 92620 }, { "epoch": 0.6705176370098518, "grad_norm": 0.15700013935565948, "learning_rate": 4.329489601656207e-06, "loss": 0.8752, "step": 92630 }, { "epoch": 0.670590023670438, "grad_norm": 0.18034988641738892, "learning_rate": 4.329417214995621e-06, "loss": 0.9094, "step": 92640 }, { "epoch": 0.6706624103310243, "grad_norm": 0.15093854069709778, "learning_rate": 4.329344828335035e-06, "loss": 0.909, "step": 92650 }, { "epoch": 0.6707347969916104, "grad_norm": 0.15224012732505798, "learning_rate": 4.329272441674449e-06, "loss": 0.8964, "step": 92660 }, { "epoch": 0.6708071836521966, "grad_norm": 0.16921748220920563, "learning_rate": 4.329200055013862e-06, "loss": 0.893, "step": 92670 }, { "epoch": 0.6708795703127828, "grad_norm": 0.15135690569877625, "learning_rate": 4.329127668353276e-06, "loss": 0.8859, "step": 92680 }, { "epoch": 0.670951956973369, "grad_norm": 0.15335379540920258, "learning_rate": 4.3290552816926905e-06, "loss": 0.8956, "step": 92690 }, { "epoch": 0.6710243436339551, "grad_norm": 0.1561633050441742, "learning_rate": 4.328982895032104e-06, "loss": 0.8801, "step": 92700 }, { "epoch": 0.6710967302945413, "grad_norm": 0.1546168178319931, "learning_rate": 4.328910508371518e-06, "loss": 0.9001, "step": 92710 }, { "epoch": 0.6711691169551275, "grad_norm": 0.16860635578632355, "learning_rate": 4.328838121710931e-06, "loss": 0.9041, "step": 92720 }, { "epoch": 0.6712415036157137, "grad_norm": 0.1650303453207016, "learning_rate": 4.328765735050345e-06, "loss": 0.8919, "step": 92730 }, { "epoch": 0.6713138902762998, "grad_norm": 0.15512806177139282, "learning_rate": 4.328693348389759e-06, "loss": 0.884, "step": 92740 }, { "epoch": 0.671386276936886, "grad_norm": 0.17656339704990387, "learning_rate": 4.328620961729173e-06, "loss": 0.8928, "step": 92750 }, { "epoch": 0.6714586635974723, "grad_norm": 0.1842651218175888, "learning_rate": 4.328548575068587e-06, "loss": 0.9057, "step": 92760 }, { "epoch": 0.6715310502580585, "grad_norm": 0.15689325332641602, "learning_rate": 4.328476188408e-06, "loss": 0.8871, "step": 92770 }, { "epoch": 0.6716034369186447, "grad_norm": 0.16614311933517456, "learning_rate": 4.328403801747415e-06, "loss": 0.8998, "step": 92780 }, { "epoch": 0.6716758235792308, "grad_norm": 0.16957563161849976, "learning_rate": 4.328331415086828e-06, "loss": 0.8937, "step": 92790 }, { "epoch": 0.671748210239817, "grad_norm": 0.15638667345046997, "learning_rate": 4.328259028426242e-06, "loss": 0.894, "step": 92800 }, { "epoch": 0.6718205969004032, "grad_norm": 0.16604597866535187, "learning_rate": 4.328186641765656e-06, "loss": 0.8891, "step": 92810 }, { "epoch": 0.6718929835609894, "grad_norm": 0.1521102339029312, "learning_rate": 4.32811425510507e-06, "loss": 0.8946, "step": 92820 }, { "epoch": 0.6719653702215755, "grad_norm": 0.1847507506608963, "learning_rate": 4.328041868444484e-06, "loss": 0.906, "step": 92830 }, { "epoch": 0.6720377568821617, "grad_norm": 0.15854564309120178, "learning_rate": 4.327969481783897e-06, "loss": 0.8913, "step": 92840 }, { "epoch": 0.6721101435427479, "grad_norm": 0.16507163643836975, "learning_rate": 4.327897095123311e-06, "loss": 0.8888, "step": 92850 }, { "epoch": 0.6721825302033342, "grad_norm": 0.1640719473361969, "learning_rate": 4.327824708462725e-06, "loss": 0.8969, "step": 92860 }, { "epoch": 0.6722549168639204, "grad_norm": 0.15480723977088928, "learning_rate": 4.327752321802139e-06, "loss": 0.9023, "step": 92870 }, { "epoch": 0.6723273035245065, "grad_norm": 0.1514626145362854, "learning_rate": 4.327679935141553e-06, "loss": 0.8934, "step": 92880 }, { "epoch": 0.6723996901850927, "grad_norm": 0.16035489737987518, "learning_rate": 4.327607548480966e-06, "loss": 0.8847, "step": 92890 }, { "epoch": 0.6724720768456789, "grad_norm": 0.15631358325481415, "learning_rate": 4.327535161820381e-06, "loss": 0.9011, "step": 92900 }, { "epoch": 0.6725444635062651, "grad_norm": 0.16039063036441803, "learning_rate": 4.327462775159794e-06, "loss": 0.8872, "step": 92910 }, { "epoch": 0.6726168501668512, "grad_norm": 0.15340082347393036, "learning_rate": 4.327390388499207e-06, "loss": 0.8918, "step": 92920 }, { "epoch": 0.6726892368274374, "grad_norm": 0.16243009269237518, "learning_rate": 4.3273180018386215e-06, "loss": 0.8937, "step": 92930 }, { "epoch": 0.6727616234880236, "grad_norm": 0.15922003984451294, "learning_rate": 4.327245615178035e-06, "loss": 0.8906, "step": 92940 }, { "epoch": 0.6728340101486098, "grad_norm": 0.1569734811782837, "learning_rate": 4.327173228517449e-06, "loss": 0.8916, "step": 92950 }, { "epoch": 0.672906396809196, "grad_norm": 0.1412716507911682, "learning_rate": 4.327100841856862e-06, "loss": 0.881, "step": 92960 }, { "epoch": 0.6729787834697822, "grad_norm": 0.16564616560935974, "learning_rate": 4.327028455196277e-06, "loss": 0.8876, "step": 92970 }, { "epoch": 0.6730511701303684, "grad_norm": 0.18585346639156342, "learning_rate": 4.3269560685356904e-06, "loss": 0.8932, "step": 92980 }, { "epoch": 0.6731235567909546, "grad_norm": 0.15012194216251373, "learning_rate": 4.326883681875104e-06, "loss": 0.8916, "step": 92990 }, { "epoch": 0.6731959434515408, "grad_norm": 0.14845649898052216, "learning_rate": 4.326811295214518e-06, "loss": 0.8963, "step": 93000 }, { "epoch": 0.673268330112127, "grad_norm": 0.15878477692604065, "learning_rate": 4.326738908553932e-06, "loss": 0.8983, "step": 93010 }, { "epoch": 0.6733407167727131, "grad_norm": 0.15004906058311462, "learning_rate": 4.326666521893346e-06, "loss": 0.8929, "step": 93020 }, { "epoch": 0.6734131034332993, "grad_norm": 0.16124626994132996, "learning_rate": 4.326594135232759e-06, "loss": 0.8837, "step": 93030 }, { "epoch": 0.6734854900938855, "grad_norm": 0.15695308148860931, "learning_rate": 4.326521748572173e-06, "loss": 0.8991, "step": 93040 }, { "epoch": 0.6735578767544717, "grad_norm": 0.1830526888370514, "learning_rate": 4.3264493619115874e-06, "loss": 0.8826, "step": 93050 }, { "epoch": 0.6736302634150578, "grad_norm": 0.1436559408903122, "learning_rate": 4.326376975251001e-06, "loss": 0.8939, "step": 93060 }, { "epoch": 0.673702650075644, "grad_norm": 0.15827639400959015, "learning_rate": 4.326304588590415e-06, "loss": 0.907, "step": 93070 }, { "epoch": 0.6737750367362303, "grad_norm": 0.15548427402973175, "learning_rate": 4.326232201929828e-06, "loss": 0.9136, "step": 93080 }, { "epoch": 0.6738474233968165, "grad_norm": 0.15338309109210968, "learning_rate": 4.326159815269243e-06, "loss": 0.8979, "step": 93090 }, { "epoch": 0.6739198100574026, "grad_norm": 0.16544108092784882, "learning_rate": 4.326087428608656e-06, "loss": 0.8817, "step": 93100 }, { "epoch": 0.6739921967179888, "grad_norm": 0.16785688698291779, "learning_rate": 4.32601504194807e-06, "loss": 0.8912, "step": 93110 }, { "epoch": 0.674064583378575, "grad_norm": 0.17321330308914185, "learning_rate": 4.325942655287484e-06, "loss": 0.8863, "step": 93120 }, { "epoch": 0.6741369700391612, "grad_norm": 0.16258351504802704, "learning_rate": 4.325870268626898e-06, "loss": 0.9073, "step": 93130 }, { "epoch": 0.6742093566997474, "grad_norm": 0.16058233380317688, "learning_rate": 4.325797881966312e-06, "loss": 0.9084, "step": 93140 }, { "epoch": 0.6742817433603335, "grad_norm": 0.16303308308124542, "learning_rate": 4.325725495305725e-06, "loss": 0.8965, "step": 93150 }, { "epoch": 0.6743541300209197, "grad_norm": 0.15831464529037476, "learning_rate": 4.325653108645139e-06, "loss": 0.8865, "step": 93160 }, { "epoch": 0.6744265166815059, "grad_norm": 0.1554798036813736, "learning_rate": 4.325580721984553e-06, "loss": 0.8803, "step": 93170 }, { "epoch": 0.6744989033420922, "grad_norm": 0.16166193783283234, "learning_rate": 4.325508335323967e-06, "loss": 0.9018, "step": 93180 }, { "epoch": 0.6745712900026783, "grad_norm": 0.15309026837348938, "learning_rate": 4.325435948663381e-06, "loss": 0.8972, "step": 93190 }, { "epoch": 0.6746436766632645, "grad_norm": 0.15099987387657166, "learning_rate": 4.325363562002794e-06, "loss": 0.9075, "step": 93200 }, { "epoch": 0.6747160633238507, "grad_norm": 0.17467179894447327, "learning_rate": 4.325291175342209e-06, "loss": 0.9024, "step": 93210 }, { "epoch": 0.6747884499844369, "grad_norm": 0.15384790301322937, "learning_rate": 4.325218788681622e-06, "loss": 0.9, "step": 93220 }, { "epoch": 0.674860836645023, "grad_norm": 0.1675538271665573, "learning_rate": 4.325146402021036e-06, "loss": 0.893, "step": 93230 }, { "epoch": 0.6749332233056092, "grad_norm": 0.15132692456245422, "learning_rate": 4.3250740153604495e-06, "loss": 0.9021, "step": 93240 }, { "epoch": 0.6750056099661954, "grad_norm": 0.15508437156677246, "learning_rate": 4.325001628699864e-06, "loss": 0.9039, "step": 93250 }, { "epoch": 0.6750779966267816, "grad_norm": 0.15347804129123688, "learning_rate": 4.324929242039278e-06, "loss": 0.8787, "step": 93260 }, { "epoch": 0.6751503832873678, "grad_norm": 0.15671610832214355, "learning_rate": 4.324856855378691e-06, "loss": 0.8973, "step": 93270 }, { "epoch": 0.6752227699479539, "grad_norm": 0.15267059206962585, "learning_rate": 4.324784468718105e-06, "loss": 0.8862, "step": 93280 }, { "epoch": 0.6752951566085402, "grad_norm": 0.1843479573726654, "learning_rate": 4.324712082057519e-06, "loss": 0.9027, "step": 93290 }, { "epoch": 0.6753675432691264, "grad_norm": 0.18705353140830994, "learning_rate": 4.324639695396933e-06, "loss": 0.8873, "step": 93300 }, { "epoch": 0.6754399299297126, "grad_norm": 0.16083325445652008, "learning_rate": 4.3245673087363466e-06, "loss": 0.8973, "step": 93310 }, { "epoch": 0.6755123165902988, "grad_norm": 0.16239725053310394, "learning_rate": 4.32449492207576e-06, "loss": 0.8963, "step": 93320 }, { "epoch": 0.6755847032508849, "grad_norm": 0.15586166083812714, "learning_rate": 4.324422535415174e-06, "loss": 0.8988, "step": 93330 }, { "epoch": 0.6756570899114711, "grad_norm": 0.17255674302577972, "learning_rate": 4.324350148754588e-06, "loss": 0.8979, "step": 93340 }, { "epoch": 0.6757294765720573, "grad_norm": 0.15488573908805847, "learning_rate": 4.324277762094002e-06, "loss": 0.8938, "step": 93350 }, { "epoch": 0.6758018632326435, "grad_norm": 0.1733180284500122, "learning_rate": 4.3242053754334155e-06, "loss": 0.8936, "step": 93360 }, { "epoch": 0.6758742498932296, "grad_norm": 0.16540446877479553, "learning_rate": 4.324132988772829e-06, "loss": 0.8812, "step": 93370 }, { "epoch": 0.6759466365538158, "grad_norm": 0.16037392616271973, "learning_rate": 4.3240606021122436e-06, "loss": 0.8979, "step": 93380 }, { "epoch": 0.6760190232144021, "grad_norm": 0.17287902534008026, "learning_rate": 4.323988215451657e-06, "loss": 0.8928, "step": 93390 }, { "epoch": 0.6760914098749883, "grad_norm": 0.19038693606853485, "learning_rate": 4.323915828791071e-06, "loss": 0.896, "step": 93400 }, { "epoch": 0.6761637965355745, "grad_norm": 0.14845329523086548, "learning_rate": 4.323843442130484e-06, "loss": 0.8969, "step": 93410 }, { "epoch": 0.6762361831961606, "grad_norm": 0.15594539046287537, "learning_rate": 4.323771055469899e-06, "loss": 0.8967, "step": 93420 }, { "epoch": 0.6763085698567468, "grad_norm": 0.1664988100528717, "learning_rate": 4.3236986688093125e-06, "loss": 0.8907, "step": 93430 }, { "epoch": 0.676380956517333, "grad_norm": 0.16152945160865784, "learning_rate": 4.323626282148726e-06, "loss": 0.8887, "step": 93440 }, { "epoch": 0.6764533431779192, "grad_norm": 0.16336587071418762, "learning_rate": 4.32355389548814e-06, "loss": 0.8937, "step": 93450 }, { "epoch": 0.6765257298385053, "grad_norm": 0.17990297079086304, "learning_rate": 4.323481508827553e-06, "loss": 0.8885, "step": 93460 }, { "epoch": 0.6765981164990915, "grad_norm": 0.1632242649793625, "learning_rate": 4.323409122166967e-06, "loss": 0.9011, "step": 93470 }, { "epoch": 0.6766705031596777, "grad_norm": 0.1489850878715515, "learning_rate": 4.323336735506381e-06, "loss": 0.8834, "step": 93480 }, { "epoch": 0.6767428898202639, "grad_norm": 0.16526971757411957, "learning_rate": 4.323264348845795e-06, "loss": 0.8924, "step": 93490 }, { "epoch": 0.6768152764808502, "grad_norm": 0.14482301473617554, "learning_rate": 4.323191962185209e-06, "loss": 0.8818, "step": 93500 }, { "epoch": 0.6768876631414363, "grad_norm": 0.19359660148620605, "learning_rate": 4.323119575524622e-06, "loss": 0.9027, "step": 93510 }, { "epoch": 0.6769600498020225, "grad_norm": 0.15199698507785797, "learning_rate": 4.323047188864036e-06, "loss": 0.9103, "step": 93520 }, { "epoch": 0.6770324364626087, "grad_norm": 0.1570311337709427, "learning_rate": 4.32297480220345e-06, "loss": 0.8866, "step": 93530 }, { "epoch": 0.6771048231231949, "grad_norm": 0.16493958234786987, "learning_rate": 4.322902415542864e-06, "loss": 0.8965, "step": 93540 }, { "epoch": 0.677177209783781, "grad_norm": 0.15966171026229858, "learning_rate": 4.322830028882278e-06, "loss": 0.9059, "step": 93550 }, { "epoch": 0.6772495964443672, "grad_norm": 0.15570668876171112, "learning_rate": 4.322757642221691e-06, "loss": 0.9031, "step": 93560 }, { "epoch": 0.6773219831049534, "grad_norm": 0.15496212244033813, "learning_rate": 4.322685255561106e-06, "loss": 0.9177, "step": 93570 }, { "epoch": 0.6773943697655396, "grad_norm": 0.15817132592201233, "learning_rate": 4.322612868900519e-06, "loss": 0.8852, "step": 93580 }, { "epoch": 0.6774667564261257, "grad_norm": 0.15422877669334412, "learning_rate": 4.322540482239933e-06, "loss": 0.8944, "step": 93590 }, { "epoch": 0.6775391430867119, "grad_norm": 0.15881921350955963, "learning_rate": 4.3224680955793465e-06, "loss": 0.9014, "step": 93600 }, { "epoch": 0.6776115297472982, "grad_norm": 0.24979665875434875, "learning_rate": 4.322395708918761e-06, "loss": 0.8926, "step": 93610 }, { "epoch": 0.6776839164078844, "grad_norm": 0.19371944665908813, "learning_rate": 4.322323322258175e-06, "loss": 0.879, "step": 93620 }, { "epoch": 0.6777563030684706, "grad_norm": 0.15385429561138153, "learning_rate": 4.322250935597588e-06, "loss": 0.9084, "step": 93630 }, { "epoch": 0.6778286897290567, "grad_norm": 0.16214215755462646, "learning_rate": 4.322178548937002e-06, "loss": 0.8968, "step": 93640 }, { "epoch": 0.6779010763896429, "grad_norm": 0.21187572181224823, "learning_rate": 4.322106162276416e-06, "loss": 0.8969, "step": 93650 }, { "epoch": 0.6779734630502291, "grad_norm": 0.15346659719944, "learning_rate": 4.32203377561583e-06, "loss": 0.8985, "step": 93660 }, { "epoch": 0.6780458497108153, "grad_norm": 0.14978167414665222, "learning_rate": 4.3219613889552435e-06, "loss": 0.8998, "step": 93670 }, { "epoch": 0.6781182363714015, "grad_norm": 0.16475534439086914, "learning_rate": 4.321889002294657e-06, "loss": 0.8935, "step": 93680 }, { "epoch": 0.6781906230319876, "grad_norm": 0.1509210169315338, "learning_rate": 4.321816615634072e-06, "loss": 0.9166, "step": 93690 }, { "epoch": 0.6782630096925738, "grad_norm": 0.15706129372119904, "learning_rate": 4.321744228973485e-06, "loss": 0.884, "step": 93700 }, { "epoch": 0.6783353963531601, "grad_norm": 0.19146853685379028, "learning_rate": 4.321671842312899e-06, "loss": 0.8988, "step": 93710 }, { "epoch": 0.6784077830137463, "grad_norm": 0.14886470139026642, "learning_rate": 4.3215994556523124e-06, "loss": 0.8954, "step": 93720 }, { "epoch": 0.6784801696743324, "grad_norm": 0.15223486721515656, "learning_rate": 4.321527068991727e-06, "loss": 0.8994, "step": 93730 }, { "epoch": 0.6785525563349186, "grad_norm": 0.1696760505437851, "learning_rate": 4.3214546823311405e-06, "loss": 0.9005, "step": 93740 }, { "epoch": 0.6786249429955048, "grad_norm": 0.17853619158267975, "learning_rate": 4.321382295670554e-06, "loss": 0.883, "step": 93750 }, { "epoch": 0.678697329656091, "grad_norm": 0.1491808295249939, "learning_rate": 4.321309909009968e-06, "loss": 0.8852, "step": 93760 }, { "epoch": 0.6787697163166772, "grad_norm": 0.19821669161319733, "learning_rate": 4.321237522349382e-06, "loss": 0.8976, "step": 93770 }, { "epoch": 0.6788421029772633, "grad_norm": 0.1570088416337967, "learning_rate": 4.321165135688796e-06, "loss": 0.8913, "step": 93780 }, { "epoch": 0.6789144896378495, "grad_norm": 0.15688063204288483, "learning_rate": 4.3210927490282094e-06, "loss": 0.9008, "step": 93790 }, { "epoch": 0.6789868762984357, "grad_norm": 0.19363504648208618, "learning_rate": 4.321020362367623e-06, "loss": 0.8946, "step": 93800 }, { "epoch": 0.6790592629590219, "grad_norm": 0.16203875839710236, "learning_rate": 4.3209479757070375e-06, "loss": 0.8867, "step": 93810 }, { "epoch": 0.6791316496196081, "grad_norm": 0.15617011487483978, "learning_rate": 4.320875589046451e-06, "loss": 0.9006, "step": 93820 }, { "epoch": 0.6792040362801943, "grad_norm": 0.1507921814918518, "learning_rate": 4.320803202385865e-06, "loss": 0.9161, "step": 93830 }, { "epoch": 0.6792764229407805, "grad_norm": 0.16400021314620972, "learning_rate": 4.320730815725278e-06, "loss": 0.8899, "step": 93840 }, { "epoch": 0.6793488096013667, "grad_norm": 0.15807297825813293, "learning_rate": 4.320658429064693e-06, "loss": 0.9041, "step": 93850 }, { "epoch": 0.6794211962619529, "grad_norm": 0.15214060246944427, "learning_rate": 4.3205860424041065e-06, "loss": 0.8978, "step": 93860 }, { "epoch": 0.679493582922539, "grad_norm": 0.16088810563087463, "learning_rate": 4.32051365574352e-06, "loss": 0.8974, "step": 93870 }, { "epoch": 0.6795659695831252, "grad_norm": 0.1684071570634842, "learning_rate": 4.320441269082934e-06, "loss": 0.8949, "step": 93880 }, { "epoch": 0.6796383562437114, "grad_norm": 0.15487946569919586, "learning_rate": 4.320368882422348e-06, "loss": 0.8905, "step": 93890 }, { "epoch": 0.6797107429042976, "grad_norm": 0.1507687270641327, "learning_rate": 4.320296495761762e-06, "loss": 0.8959, "step": 93900 }, { "epoch": 0.6797831295648837, "grad_norm": 0.16048942506313324, "learning_rate": 4.320224109101175e-06, "loss": 0.8903, "step": 93910 }, { "epoch": 0.67985551622547, "grad_norm": 0.1652323603630066, "learning_rate": 4.320151722440589e-06, "loss": 0.8897, "step": 93920 }, { "epoch": 0.6799279028860562, "grad_norm": 0.16126669943332672, "learning_rate": 4.3200793357800035e-06, "loss": 0.8921, "step": 93930 }, { "epoch": 0.6800002895466424, "grad_norm": 0.15348832309246063, "learning_rate": 4.320006949119417e-06, "loss": 0.8927, "step": 93940 }, { "epoch": 0.6800726762072286, "grad_norm": 0.2017420381307602, "learning_rate": 4.319934562458831e-06, "loss": 0.8945, "step": 93950 }, { "epoch": 0.6801450628678147, "grad_norm": 0.17592447996139526, "learning_rate": 4.319862175798244e-06, "loss": 0.8905, "step": 93960 }, { "epoch": 0.6802174495284009, "grad_norm": 0.15097962319850922, "learning_rate": 4.319789789137658e-06, "loss": 0.8964, "step": 93970 }, { "epoch": 0.6802898361889871, "grad_norm": 0.16152705252170563, "learning_rate": 4.3197174024770715e-06, "loss": 0.8875, "step": 93980 }, { "epoch": 0.6803622228495733, "grad_norm": 0.17407093942165375, "learning_rate": 4.319645015816485e-06, "loss": 0.9065, "step": 93990 }, { "epoch": 0.6804346095101594, "grad_norm": 0.16338643431663513, "learning_rate": 4.3195726291559e-06, "loss": 0.907, "step": 94000 }, { "epoch": 0.6805069961707456, "grad_norm": 0.15193772315979004, "learning_rate": 4.319500242495313e-06, "loss": 0.8908, "step": 94010 }, { "epoch": 0.6805793828313318, "grad_norm": 0.15826401114463806, "learning_rate": 4.319427855834727e-06, "loss": 0.889, "step": 94020 }, { "epoch": 0.6806517694919181, "grad_norm": 0.15474501252174377, "learning_rate": 4.3193554691741405e-06, "loss": 0.8907, "step": 94030 }, { "epoch": 0.6807241561525043, "grad_norm": 0.1633521169424057, "learning_rate": 4.319283082513555e-06, "loss": 0.8997, "step": 94040 }, { "epoch": 0.6807965428130904, "grad_norm": 0.19621436297893524, "learning_rate": 4.3192106958529686e-06, "loss": 0.9016, "step": 94050 }, { "epoch": 0.6808689294736766, "grad_norm": 0.15351617336273193, "learning_rate": 4.319138309192382e-06, "loss": 0.9004, "step": 94060 }, { "epoch": 0.6809413161342628, "grad_norm": 0.1630709171295166, "learning_rate": 4.319065922531796e-06, "loss": 0.8924, "step": 94070 }, { "epoch": 0.681013702794849, "grad_norm": 0.17487677931785583, "learning_rate": 4.31899353587121e-06, "loss": 0.8983, "step": 94080 }, { "epoch": 0.6810860894554351, "grad_norm": 0.15708361566066742, "learning_rate": 4.318921149210624e-06, "loss": 0.8904, "step": 94090 }, { "epoch": 0.6811584761160213, "grad_norm": 0.15445072948932648, "learning_rate": 4.3188487625500375e-06, "loss": 0.9044, "step": 94100 }, { "epoch": 0.6812308627766075, "grad_norm": 0.16823984682559967, "learning_rate": 4.318776375889451e-06, "loss": 0.8893, "step": 94110 }, { "epoch": 0.6813032494371937, "grad_norm": 0.1907905787229538, "learning_rate": 4.318703989228865e-06, "loss": 0.906, "step": 94120 }, { "epoch": 0.6813756360977798, "grad_norm": 0.1714591085910797, "learning_rate": 4.318631602568279e-06, "loss": 0.8967, "step": 94130 }, { "epoch": 0.6814480227583661, "grad_norm": 0.15924982726573944, "learning_rate": 4.318559215907693e-06, "loss": 0.8899, "step": 94140 }, { "epoch": 0.6815204094189523, "grad_norm": 0.18446829915046692, "learning_rate": 4.318486829247106e-06, "loss": 0.9054, "step": 94150 }, { "epoch": 0.6815927960795385, "grad_norm": 0.1584208905696869, "learning_rate": 4.31841444258652e-06, "loss": 0.9035, "step": 94160 }, { "epoch": 0.6816651827401247, "grad_norm": 0.15730392932891846, "learning_rate": 4.3183420559259345e-06, "loss": 0.9004, "step": 94170 }, { "epoch": 0.6817375694007108, "grad_norm": 0.15665248036384583, "learning_rate": 4.318269669265348e-06, "loss": 0.8768, "step": 94180 }, { "epoch": 0.681809956061297, "grad_norm": 0.15125800669193268, "learning_rate": 4.318197282604762e-06, "loss": 0.9052, "step": 94190 }, { "epoch": 0.6818823427218832, "grad_norm": 0.15670722723007202, "learning_rate": 4.318124895944175e-06, "loss": 0.8914, "step": 94200 }, { "epoch": 0.6819547293824694, "grad_norm": 0.16642498970031738, "learning_rate": 4.31805250928359e-06, "loss": 0.8978, "step": 94210 }, { "epoch": 0.6820271160430555, "grad_norm": 0.17175577580928802, "learning_rate": 4.317980122623003e-06, "loss": 0.9012, "step": 94220 }, { "epoch": 0.6820995027036417, "grad_norm": 0.16106824576854706, "learning_rate": 4.317907735962417e-06, "loss": 0.8985, "step": 94230 }, { "epoch": 0.682171889364228, "grad_norm": 0.15547014772891998, "learning_rate": 4.317835349301831e-06, "loss": 0.9019, "step": 94240 }, { "epoch": 0.6822442760248142, "grad_norm": 0.16611367464065552, "learning_rate": 4.317762962641245e-06, "loss": 0.8933, "step": 94250 }, { "epoch": 0.6823166626854004, "grad_norm": 0.33474913239479065, "learning_rate": 4.317690575980659e-06, "loss": 0.8996, "step": 94260 }, { "epoch": 0.6823890493459865, "grad_norm": 0.17985767126083374, "learning_rate": 4.317618189320072e-06, "loss": 0.9111, "step": 94270 }, { "epoch": 0.6824614360065727, "grad_norm": 0.14204993844032288, "learning_rate": 4.317545802659486e-06, "loss": 0.9077, "step": 94280 }, { "epoch": 0.6825338226671589, "grad_norm": 0.14709369838237762, "learning_rate": 4.3174734159989e-06, "loss": 0.8779, "step": 94290 }, { "epoch": 0.6826062093277451, "grad_norm": 0.16410696506500244, "learning_rate": 4.317401029338314e-06, "loss": 0.8993, "step": 94300 }, { "epoch": 0.6826785959883312, "grad_norm": 0.15575379133224487, "learning_rate": 4.317328642677728e-06, "loss": 0.8887, "step": 94310 }, { "epoch": 0.6827509826489174, "grad_norm": 0.16469940543174744, "learning_rate": 4.317256256017141e-06, "loss": 0.8954, "step": 94320 }, { "epoch": 0.6828233693095036, "grad_norm": 0.15340863168239594, "learning_rate": 4.317183869356556e-06, "loss": 0.8975, "step": 94330 }, { "epoch": 0.6828957559700898, "grad_norm": 0.16109062731266022, "learning_rate": 4.317111482695969e-06, "loss": 0.8972, "step": 94340 }, { "epoch": 0.6829681426306761, "grad_norm": 0.16391950845718384, "learning_rate": 4.317039096035383e-06, "loss": 0.9028, "step": 94350 }, { "epoch": 0.6830405292912622, "grad_norm": 0.15392933785915375, "learning_rate": 4.316966709374797e-06, "loss": 0.8991, "step": 94360 }, { "epoch": 0.6831129159518484, "grad_norm": 0.16828075051307678, "learning_rate": 4.316894322714211e-06, "loss": 0.9025, "step": 94370 }, { "epoch": 0.6831853026124346, "grad_norm": 0.147588312625885, "learning_rate": 4.316821936053625e-06, "loss": 0.9008, "step": 94380 }, { "epoch": 0.6832576892730208, "grad_norm": 0.14783324301242828, "learning_rate": 4.316749549393038e-06, "loss": 0.9054, "step": 94390 }, { "epoch": 0.683330075933607, "grad_norm": 0.1539517641067505, "learning_rate": 4.316677162732452e-06, "loss": 0.8891, "step": 94400 }, { "epoch": 0.6834024625941931, "grad_norm": 0.15323641896247864, "learning_rate": 4.316604776071866e-06, "loss": 0.9112, "step": 94410 }, { "epoch": 0.6834748492547793, "grad_norm": 0.1703089028596878, "learning_rate": 4.31653238941128e-06, "loss": 0.8885, "step": 94420 }, { "epoch": 0.6835472359153655, "grad_norm": 0.15150368213653564, "learning_rate": 4.316460002750694e-06, "loss": 0.8762, "step": 94430 }, { "epoch": 0.6836196225759517, "grad_norm": 0.15605489909648895, "learning_rate": 4.316387616090107e-06, "loss": 0.9036, "step": 94440 }, { "epoch": 0.683692009236538, "grad_norm": 0.14873306453227997, "learning_rate": 4.316315229429522e-06, "loss": 0.8936, "step": 94450 }, { "epoch": 0.6837643958971241, "grad_norm": 0.15489499270915985, "learning_rate": 4.316242842768935e-06, "loss": 0.8927, "step": 94460 }, { "epoch": 0.6838367825577103, "grad_norm": 0.14702056348323822, "learning_rate": 4.316170456108349e-06, "loss": 0.8919, "step": 94470 }, { "epoch": 0.6839091692182965, "grad_norm": 0.17012947797775269, "learning_rate": 4.3160980694477625e-06, "loss": 0.9027, "step": 94480 }, { "epoch": 0.6839815558788827, "grad_norm": 0.23859137296676636, "learning_rate": 4.316025682787177e-06, "loss": 0.8989, "step": 94490 }, { "epoch": 0.6840539425394688, "grad_norm": 0.14270347356796265, "learning_rate": 4.315953296126591e-06, "loss": 0.8966, "step": 94500 }, { "epoch": 0.684126329200055, "grad_norm": 0.15070217847824097, "learning_rate": 4.315880909466003e-06, "loss": 0.8967, "step": 94510 }, { "epoch": 0.6841987158606412, "grad_norm": 0.14996741712093353, "learning_rate": 4.315808522805418e-06, "loss": 0.8973, "step": 94520 }, { "epoch": 0.6842711025212274, "grad_norm": 0.1512274593114853, "learning_rate": 4.3157361361448314e-06, "loss": 0.8937, "step": 94530 }, { "epoch": 0.6843434891818135, "grad_norm": 0.1495598703622818, "learning_rate": 4.315663749484245e-06, "loss": 0.894, "step": 94540 }, { "epoch": 0.6844158758423997, "grad_norm": 0.1526460349559784, "learning_rate": 4.315591362823659e-06, "loss": 0.9058, "step": 94550 }, { "epoch": 0.684488262502986, "grad_norm": 0.22177492082118988, "learning_rate": 4.315518976163073e-06, "loss": 0.8909, "step": 94560 }, { "epoch": 0.6845606491635722, "grad_norm": 0.1475263386964798, "learning_rate": 4.315446589502487e-06, "loss": 0.8894, "step": 94570 }, { "epoch": 0.6846330358241584, "grad_norm": 0.15898674726486206, "learning_rate": 4.3153742028419e-06, "loss": 0.9013, "step": 94580 }, { "epoch": 0.6847054224847445, "grad_norm": 0.160345658659935, "learning_rate": 4.315301816181314e-06, "loss": 0.8894, "step": 94590 }, { "epoch": 0.6847778091453307, "grad_norm": 0.16944102942943573, "learning_rate": 4.3152294295207285e-06, "loss": 0.9001, "step": 94600 }, { "epoch": 0.6848501958059169, "grad_norm": 0.17007231712341309, "learning_rate": 4.315157042860142e-06, "loss": 0.9066, "step": 94610 }, { "epoch": 0.6849225824665031, "grad_norm": 0.14328494668006897, "learning_rate": 4.315084656199556e-06, "loss": 0.8942, "step": 94620 }, { "epoch": 0.6849949691270892, "grad_norm": 0.17920292913913727, "learning_rate": 4.315012269538969e-06, "loss": 0.9016, "step": 94630 }, { "epoch": 0.6850673557876754, "grad_norm": 0.15425662696361542, "learning_rate": 4.314939882878384e-06, "loss": 0.885, "step": 94640 }, { "epoch": 0.6851397424482616, "grad_norm": 0.15390869975090027, "learning_rate": 4.314867496217797e-06, "loss": 0.8941, "step": 94650 }, { "epoch": 0.6852121291088478, "grad_norm": 0.15865157544612885, "learning_rate": 4.314795109557211e-06, "loss": 0.9016, "step": 94660 }, { "epoch": 0.685284515769434, "grad_norm": 0.14358533918857574, "learning_rate": 4.314722722896625e-06, "loss": 0.8833, "step": 94670 }, { "epoch": 0.6853569024300202, "grad_norm": 0.15622635185718536, "learning_rate": 4.314650336236039e-06, "loss": 0.8932, "step": 94680 }, { "epoch": 0.6854292890906064, "grad_norm": 0.1485530436038971, "learning_rate": 4.314577949575453e-06, "loss": 0.9085, "step": 94690 }, { "epoch": 0.6855016757511926, "grad_norm": 0.1571887731552124, "learning_rate": 4.314505562914866e-06, "loss": 0.9022, "step": 94700 }, { "epoch": 0.6855740624117788, "grad_norm": 0.15338723361492157, "learning_rate": 4.31443317625428e-06, "loss": 0.8992, "step": 94710 }, { "epoch": 0.6856464490723649, "grad_norm": 0.14642809331417084, "learning_rate": 4.314360789593694e-06, "loss": 0.8907, "step": 94720 }, { "epoch": 0.6857188357329511, "grad_norm": 0.1597226858139038, "learning_rate": 4.314288402933108e-06, "loss": 0.9018, "step": 94730 }, { "epoch": 0.6857912223935373, "grad_norm": 0.16934312880039215, "learning_rate": 4.314216016272522e-06, "loss": 0.8962, "step": 94740 }, { "epoch": 0.6858636090541235, "grad_norm": 0.16490796208381653, "learning_rate": 4.314143629611935e-06, "loss": 0.8977, "step": 94750 }, { "epoch": 0.6859359957147096, "grad_norm": 0.1624918133020401, "learning_rate": 4.314071242951349e-06, "loss": 0.8968, "step": 94760 }, { "epoch": 0.6860083823752959, "grad_norm": 0.16102252900600433, "learning_rate": 4.313998856290763e-06, "loss": 0.8991, "step": 94770 }, { "epoch": 0.6860807690358821, "grad_norm": 0.15310189127922058, "learning_rate": 4.313926469630177e-06, "loss": 0.894, "step": 94780 }, { "epoch": 0.6861531556964683, "grad_norm": 0.15352487564086914, "learning_rate": 4.3138540829695906e-06, "loss": 0.8779, "step": 94790 }, { "epoch": 0.6862255423570545, "grad_norm": 0.15823419392108917, "learning_rate": 4.313781696309004e-06, "loss": 0.906, "step": 94800 }, { "epoch": 0.6862979290176406, "grad_norm": 0.14976033568382263, "learning_rate": 4.313709309648419e-06, "loss": 0.8872, "step": 94810 }, { "epoch": 0.6863703156782268, "grad_norm": 0.1584375500679016, "learning_rate": 4.313636922987832e-06, "loss": 0.8847, "step": 94820 }, { "epoch": 0.686442702338813, "grad_norm": 0.16619914770126343, "learning_rate": 4.313564536327246e-06, "loss": 0.8942, "step": 94830 }, { "epoch": 0.6865150889993992, "grad_norm": 0.16585594415664673, "learning_rate": 4.3134921496666595e-06, "loss": 0.8958, "step": 94840 }, { "epoch": 0.6865874756599853, "grad_norm": 0.1556151807308197, "learning_rate": 4.313419763006074e-06, "loss": 0.9076, "step": 94850 }, { "epoch": 0.6866598623205715, "grad_norm": 0.16103480756282806, "learning_rate": 4.3133473763454876e-06, "loss": 0.8927, "step": 94860 }, { "epoch": 0.6867322489811577, "grad_norm": 0.15770234167575836, "learning_rate": 4.313274989684901e-06, "loss": 0.8901, "step": 94870 }, { "epoch": 0.686804635641744, "grad_norm": 0.15498584508895874, "learning_rate": 4.313202603024315e-06, "loss": 0.9026, "step": 94880 }, { "epoch": 0.6868770223023302, "grad_norm": 0.15212376415729523, "learning_rate": 4.313130216363729e-06, "loss": 0.8837, "step": 94890 }, { "epoch": 0.6869494089629163, "grad_norm": 0.16598445177078247, "learning_rate": 4.313057829703143e-06, "loss": 0.8998, "step": 94900 }, { "epoch": 0.6870217956235025, "grad_norm": 0.15342400968074799, "learning_rate": 4.3129854430425565e-06, "loss": 0.8858, "step": 94910 }, { "epoch": 0.6870941822840887, "grad_norm": 0.1750396341085434, "learning_rate": 4.31291305638197e-06, "loss": 0.8823, "step": 94920 }, { "epoch": 0.6871665689446749, "grad_norm": 0.14746147394180298, "learning_rate": 4.3128406697213846e-06, "loss": 0.8892, "step": 94930 }, { "epoch": 0.687238955605261, "grad_norm": 0.16607217490673065, "learning_rate": 4.312768283060798e-06, "loss": 0.9003, "step": 94940 }, { "epoch": 0.6873113422658472, "grad_norm": 0.15274251997470856, "learning_rate": 4.312695896400212e-06, "loss": 0.8967, "step": 94950 }, { "epoch": 0.6873837289264334, "grad_norm": 0.15561649203300476, "learning_rate": 4.312623509739625e-06, "loss": 0.8926, "step": 94960 }, { "epoch": 0.6874561155870196, "grad_norm": 0.1617891639471054, "learning_rate": 4.31255112307904e-06, "loss": 0.8919, "step": 94970 }, { "epoch": 0.6875285022476059, "grad_norm": 0.15291917324066162, "learning_rate": 4.3124787364184535e-06, "loss": 0.8961, "step": 94980 }, { "epoch": 0.687600888908192, "grad_norm": 0.15395525097846985, "learning_rate": 4.312406349757867e-06, "loss": 0.9067, "step": 94990 }, { "epoch": 0.6876732755687782, "grad_norm": 0.15367929637432098, "learning_rate": 4.312333963097281e-06, "loss": 0.8899, "step": 95000 }, { "epoch": 0.6877456622293644, "grad_norm": 0.16362635791301727, "learning_rate": 4.312261576436695e-06, "loss": 0.8971, "step": 95010 }, { "epoch": 0.6878180488899506, "grad_norm": 0.1670594960451126, "learning_rate": 4.312189189776109e-06, "loss": 0.8869, "step": 95020 }, { "epoch": 0.6878904355505367, "grad_norm": 0.161848783493042, "learning_rate": 4.312116803115522e-06, "loss": 0.8845, "step": 95030 }, { "epoch": 0.6879628222111229, "grad_norm": 0.16515681147575378, "learning_rate": 4.312044416454936e-06, "loss": 0.8909, "step": 95040 }, { "epoch": 0.6880352088717091, "grad_norm": 0.14597424864768982, "learning_rate": 4.31197202979435e-06, "loss": 0.896, "step": 95050 }, { "epoch": 0.6881075955322953, "grad_norm": 0.1595359891653061, "learning_rate": 4.311899643133763e-06, "loss": 0.8876, "step": 95060 }, { "epoch": 0.6881799821928815, "grad_norm": 0.1476903259754181, "learning_rate": 4.311827256473177e-06, "loss": 0.8913, "step": 95070 }, { "epoch": 0.6882523688534676, "grad_norm": 0.18288491666316986, "learning_rate": 4.311754869812591e-06, "loss": 0.9013, "step": 95080 }, { "epoch": 0.6883247555140539, "grad_norm": 0.16732849180698395, "learning_rate": 4.311682483152005e-06, "loss": 0.8962, "step": 95090 }, { "epoch": 0.6883971421746401, "grad_norm": 0.15418116748332977, "learning_rate": 4.311610096491419e-06, "loss": 0.8913, "step": 95100 }, { "epoch": 0.6884695288352263, "grad_norm": 0.15506073832511902, "learning_rate": 4.311537709830832e-06, "loss": 0.9081, "step": 95110 }, { "epoch": 0.6885419154958125, "grad_norm": 0.15247192978858948, "learning_rate": 4.311465323170247e-06, "loss": 0.9026, "step": 95120 }, { "epoch": 0.6886143021563986, "grad_norm": 0.15282821655273438, "learning_rate": 4.31139293650966e-06, "loss": 0.8913, "step": 95130 }, { "epoch": 0.6886866888169848, "grad_norm": 0.14951321482658386, "learning_rate": 4.311320549849074e-06, "loss": 0.8937, "step": 95140 }, { "epoch": 0.688759075477571, "grad_norm": 0.15877053141593933, "learning_rate": 4.3112481631884875e-06, "loss": 0.889, "step": 95150 }, { "epoch": 0.6888314621381572, "grad_norm": 0.15322931110858917, "learning_rate": 4.311175776527902e-06, "loss": 0.8997, "step": 95160 }, { "epoch": 0.6889038487987433, "grad_norm": 0.16098260879516602, "learning_rate": 4.311103389867316e-06, "loss": 0.8823, "step": 95170 }, { "epoch": 0.6889762354593295, "grad_norm": 0.16160458326339722, "learning_rate": 4.311031003206729e-06, "loss": 0.8972, "step": 95180 }, { "epoch": 0.6890486221199157, "grad_norm": 0.16651996970176697, "learning_rate": 4.310958616546143e-06, "loss": 0.8879, "step": 95190 }, { "epoch": 0.689121008780502, "grad_norm": 0.1751406192779541, "learning_rate": 4.310886229885557e-06, "loss": 0.9041, "step": 95200 }, { "epoch": 0.6891933954410882, "grad_norm": 0.1623631864786148, "learning_rate": 4.310813843224971e-06, "loss": 0.8999, "step": 95210 }, { "epoch": 0.6892657821016743, "grad_norm": 0.1833324134349823, "learning_rate": 4.3107414565643845e-06, "loss": 0.8994, "step": 95220 }, { "epoch": 0.6893381687622605, "grad_norm": 0.1521463841199875, "learning_rate": 4.310669069903798e-06, "loss": 0.9037, "step": 95230 }, { "epoch": 0.6894105554228467, "grad_norm": 0.16113896667957306, "learning_rate": 4.310596683243213e-06, "loss": 0.8856, "step": 95240 }, { "epoch": 0.6894829420834329, "grad_norm": 0.1714179962873459, "learning_rate": 4.310524296582626e-06, "loss": 0.8825, "step": 95250 }, { "epoch": 0.689555328744019, "grad_norm": 0.16432242095470428, "learning_rate": 4.31045190992204e-06, "loss": 0.8871, "step": 95260 }, { "epoch": 0.6896277154046052, "grad_norm": 0.16391827166080475, "learning_rate": 4.3103795232614534e-06, "loss": 0.8978, "step": 95270 }, { "epoch": 0.6897001020651914, "grad_norm": 0.1498943716287613, "learning_rate": 4.310307136600868e-06, "loss": 0.8863, "step": 95280 }, { "epoch": 0.6897724887257776, "grad_norm": 0.1618964523077011, "learning_rate": 4.3102347499402815e-06, "loss": 0.8955, "step": 95290 }, { "epoch": 0.6898448753863639, "grad_norm": 0.1474188268184662, "learning_rate": 4.310162363279695e-06, "loss": 0.8935, "step": 95300 }, { "epoch": 0.68991726204695, "grad_norm": 0.1580621600151062, "learning_rate": 4.310089976619109e-06, "loss": 0.8884, "step": 95310 }, { "epoch": 0.6899896487075362, "grad_norm": 0.16119031608104706, "learning_rate": 4.310017589958523e-06, "loss": 0.8994, "step": 95320 }, { "epoch": 0.6900620353681224, "grad_norm": 0.19750839471817017, "learning_rate": 4.309945203297937e-06, "loss": 0.8847, "step": 95330 }, { "epoch": 0.6901344220287086, "grad_norm": 0.15879863500595093, "learning_rate": 4.3098728166373505e-06, "loss": 0.8885, "step": 95340 }, { "epoch": 0.6902068086892947, "grad_norm": 0.14561788737773895, "learning_rate": 4.309800429976764e-06, "loss": 0.8881, "step": 95350 }, { "epoch": 0.6902791953498809, "grad_norm": 0.16508562862873077, "learning_rate": 4.3097280433161785e-06, "loss": 0.8895, "step": 95360 }, { "epoch": 0.6903515820104671, "grad_norm": 0.14818058907985687, "learning_rate": 4.309655656655592e-06, "loss": 0.8812, "step": 95370 }, { "epoch": 0.6904239686710533, "grad_norm": 0.15891733765602112, "learning_rate": 4.309583269995006e-06, "loss": 0.8962, "step": 95380 }, { "epoch": 0.6904963553316394, "grad_norm": 0.15448154509067535, "learning_rate": 4.309510883334419e-06, "loss": 0.8874, "step": 95390 }, { "epoch": 0.6905687419922256, "grad_norm": 0.15475550293922424, "learning_rate": 4.309438496673833e-06, "loss": 0.8954, "step": 95400 }, { "epoch": 0.6906411286528119, "grad_norm": 0.1788392812013626, "learning_rate": 4.3093661100132475e-06, "loss": 0.8905, "step": 95410 }, { "epoch": 0.6907135153133981, "grad_norm": 0.17869041860103607, "learning_rate": 4.309293723352661e-06, "loss": 0.8919, "step": 95420 }, { "epoch": 0.6907859019739843, "grad_norm": 0.1565256416797638, "learning_rate": 4.309221336692075e-06, "loss": 0.8814, "step": 95430 }, { "epoch": 0.6908582886345704, "grad_norm": 0.30670905113220215, "learning_rate": 4.309148950031488e-06, "loss": 0.8798, "step": 95440 }, { "epoch": 0.6909306752951566, "grad_norm": 0.1644957959651947, "learning_rate": 4.309076563370903e-06, "loss": 0.8952, "step": 95450 }, { "epoch": 0.6910030619557428, "grad_norm": 0.17856526374816895, "learning_rate": 4.309004176710316e-06, "loss": 0.9032, "step": 95460 }, { "epoch": 0.691075448616329, "grad_norm": 0.1568627655506134, "learning_rate": 4.30893179004973e-06, "loss": 0.893, "step": 95470 }, { "epoch": 0.6911478352769151, "grad_norm": 0.2152179330587387, "learning_rate": 4.308859403389144e-06, "loss": 0.8926, "step": 95480 }, { "epoch": 0.6912202219375013, "grad_norm": 0.16184626519680023, "learning_rate": 4.308787016728558e-06, "loss": 0.8783, "step": 95490 }, { "epoch": 0.6912926085980875, "grad_norm": 0.1587613821029663, "learning_rate": 4.308714630067972e-06, "loss": 0.8855, "step": 95500 }, { "epoch": 0.6913649952586738, "grad_norm": 0.18116620182991028, "learning_rate": 4.308642243407385e-06, "loss": 0.8977, "step": 95510 }, { "epoch": 0.69143738191926, "grad_norm": 0.14871811866760254, "learning_rate": 4.308569856746799e-06, "loss": 0.8962, "step": 95520 }, { "epoch": 0.6915097685798461, "grad_norm": 0.14631856977939606, "learning_rate": 4.308497470086213e-06, "loss": 0.8987, "step": 95530 }, { "epoch": 0.6915821552404323, "grad_norm": 0.16045540571212769, "learning_rate": 4.308425083425627e-06, "loss": 0.8954, "step": 95540 }, { "epoch": 0.6916545419010185, "grad_norm": 0.22909776866436005, "learning_rate": 4.308352696765041e-06, "loss": 0.9038, "step": 95550 }, { "epoch": 0.6917269285616047, "grad_norm": 0.1496800035238266, "learning_rate": 4.308280310104454e-06, "loss": 0.8881, "step": 95560 }, { "epoch": 0.6917993152221908, "grad_norm": 0.16365250945091248, "learning_rate": 4.308207923443868e-06, "loss": 0.8915, "step": 95570 }, { "epoch": 0.691871701882777, "grad_norm": 0.16174180805683136, "learning_rate": 4.3081355367832815e-06, "loss": 0.9061, "step": 95580 }, { "epoch": 0.6919440885433632, "grad_norm": 0.1649826467037201, "learning_rate": 4.308063150122695e-06, "loss": 0.8954, "step": 95590 }, { "epoch": 0.6920164752039494, "grad_norm": 0.17469272017478943, "learning_rate": 4.3079907634621096e-06, "loss": 0.887, "step": 95600 }, { "epoch": 0.6920888618645356, "grad_norm": 0.1564358025789261, "learning_rate": 4.307918376801523e-06, "loss": 0.8889, "step": 95610 }, { "epoch": 0.6921612485251218, "grad_norm": 0.1583586186170578, "learning_rate": 4.307845990140937e-06, "loss": 0.8859, "step": 95620 }, { "epoch": 0.692233635185708, "grad_norm": 0.1620096117258072, "learning_rate": 4.30777360348035e-06, "loss": 0.8862, "step": 95630 }, { "epoch": 0.6923060218462942, "grad_norm": 0.16032646596431732, "learning_rate": 4.307701216819765e-06, "loss": 0.8894, "step": 95640 }, { "epoch": 0.6923784085068804, "grad_norm": 0.21608605980873108, "learning_rate": 4.3076288301591785e-06, "loss": 0.8925, "step": 95650 }, { "epoch": 0.6924507951674665, "grad_norm": 0.14863435924053192, "learning_rate": 4.307556443498592e-06, "loss": 0.8989, "step": 95660 }, { "epoch": 0.6925231818280527, "grad_norm": 0.16338156163692474, "learning_rate": 4.307484056838006e-06, "loss": 0.8888, "step": 95670 }, { "epoch": 0.6925955684886389, "grad_norm": 0.15900051593780518, "learning_rate": 4.30741167017742e-06, "loss": 0.8827, "step": 95680 }, { "epoch": 0.6926679551492251, "grad_norm": 0.16256722807884216, "learning_rate": 4.307339283516834e-06, "loss": 0.8878, "step": 95690 }, { "epoch": 0.6927403418098113, "grad_norm": 0.15759159624576569, "learning_rate": 4.307266896856247e-06, "loss": 0.8929, "step": 95700 }, { "epoch": 0.6928127284703974, "grad_norm": 0.15648800134658813, "learning_rate": 4.307194510195661e-06, "loss": 0.8818, "step": 95710 }, { "epoch": 0.6928851151309836, "grad_norm": 0.1834617257118225, "learning_rate": 4.3071221235350755e-06, "loss": 0.8918, "step": 95720 }, { "epoch": 0.6929575017915699, "grad_norm": 0.162217915058136, "learning_rate": 4.307049736874489e-06, "loss": 0.9029, "step": 95730 }, { "epoch": 0.6930298884521561, "grad_norm": 0.15800444781780243, "learning_rate": 4.306977350213903e-06, "loss": 0.8949, "step": 95740 }, { "epoch": 0.6931022751127422, "grad_norm": 0.16938433051109314, "learning_rate": 4.306904963553316e-06, "loss": 0.8983, "step": 95750 }, { "epoch": 0.6931746617733284, "grad_norm": 0.16872501373291016, "learning_rate": 4.306832576892731e-06, "loss": 0.8944, "step": 95760 }, { "epoch": 0.6932470484339146, "grad_norm": 0.1726616472005844, "learning_rate": 4.306760190232144e-06, "loss": 0.8868, "step": 95770 }, { "epoch": 0.6933194350945008, "grad_norm": 0.1653449535369873, "learning_rate": 4.306687803571558e-06, "loss": 0.8915, "step": 95780 }, { "epoch": 0.693391821755087, "grad_norm": 0.15586352348327637, "learning_rate": 4.306615416910972e-06, "loss": 0.8864, "step": 95790 }, { "epoch": 0.6934642084156731, "grad_norm": 0.16180157661437988, "learning_rate": 4.306543030250386e-06, "loss": 0.8912, "step": 95800 }, { "epoch": 0.6935365950762593, "grad_norm": 0.16136401891708374, "learning_rate": 4.3064706435898e-06, "loss": 0.8899, "step": 95810 }, { "epoch": 0.6936089817368455, "grad_norm": 0.1651676446199417, "learning_rate": 4.306398256929213e-06, "loss": 0.8796, "step": 95820 }, { "epoch": 0.6936813683974318, "grad_norm": 0.16563965380191803, "learning_rate": 4.306325870268627e-06, "loss": 0.8822, "step": 95830 }, { "epoch": 0.693753755058018, "grad_norm": 0.15649251639842987, "learning_rate": 4.3062534836080414e-06, "loss": 0.8911, "step": 95840 }, { "epoch": 0.6938261417186041, "grad_norm": 0.1557883769273758, "learning_rate": 4.306181096947455e-06, "loss": 0.9081, "step": 95850 }, { "epoch": 0.6938985283791903, "grad_norm": 0.18621514737606049, "learning_rate": 4.306108710286869e-06, "loss": 0.8983, "step": 95860 }, { "epoch": 0.6939709150397765, "grad_norm": 0.16845759749412537, "learning_rate": 4.306036323626282e-06, "loss": 0.8874, "step": 95870 }, { "epoch": 0.6940433017003627, "grad_norm": 0.16313624382019043, "learning_rate": 4.305963936965697e-06, "loss": 0.8855, "step": 95880 }, { "epoch": 0.6941156883609488, "grad_norm": 0.160096675157547, "learning_rate": 4.30589155030511e-06, "loss": 0.8966, "step": 95890 }, { "epoch": 0.694188075021535, "grad_norm": 0.15571869909763336, "learning_rate": 4.305819163644524e-06, "loss": 0.8865, "step": 95900 }, { "epoch": 0.6942604616821212, "grad_norm": 0.14915607869625092, "learning_rate": 4.305746776983938e-06, "loss": 0.8954, "step": 95910 }, { "epoch": 0.6943328483427074, "grad_norm": 0.15486833453178406, "learning_rate": 4.305674390323352e-06, "loss": 0.893, "step": 95920 }, { "epoch": 0.6944052350032935, "grad_norm": 0.16171999275684357, "learning_rate": 4.305602003662766e-06, "loss": 0.8914, "step": 95930 }, { "epoch": 0.6944776216638798, "grad_norm": 0.16727794706821442, "learning_rate": 4.305529617002179e-06, "loss": 0.8979, "step": 95940 }, { "epoch": 0.694550008324466, "grad_norm": 0.19044362008571625, "learning_rate": 4.305457230341593e-06, "loss": 0.8751, "step": 95950 }, { "epoch": 0.6946223949850522, "grad_norm": 0.16111010313034058, "learning_rate": 4.305384843681007e-06, "loss": 0.9069, "step": 95960 }, { "epoch": 0.6946947816456384, "grad_norm": 0.20112472772598267, "learning_rate": 4.305312457020421e-06, "loss": 0.8972, "step": 95970 }, { "epoch": 0.6947671683062245, "grad_norm": 0.19165970385074615, "learning_rate": 4.305240070359835e-06, "loss": 0.9029, "step": 95980 }, { "epoch": 0.6948395549668107, "grad_norm": 0.1580514758825302, "learning_rate": 4.305167683699248e-06, "loss": 0.8961, "step": 95990 }, { "epoch": 0.6949119416273969, "grad_norm": 0.1498570591211319, "learning_rate": 4.305095297038662e-06, "loss": 0.8919, "step": 96000 }, { "epoch": 0.6949843282879831, "grad_norm": 0.15419946610927582, "learning_rate": 4.305022910378076e-06, "loss": 0.888, "step": 96010 }, { "epoch": 0.6950567149485692, "grad_norm": 0.17555716633796692, "learning_rate": 4.30495052371749e-06, "loss": 0.8925, "step": 96020 }, { "epoch": 0.6951291016091554, "grad_norm": 0.15583516657352448, "learning_rate": 4.3048781370569035e-06, "loss": 0.8886, "step": 96030 }, { "epoch": 0.6952014882697416, "grad_norm": 0.15992936491966248, "learning_rate": 4.304805750396317e-06, "loss": 0.9023, "step": 96040 }, { "epoch": 0.6952738749303279, "grad_norm": 0.16176897287368774, "learning_rate": 4.304733363735732e-06, "loss": 0.8834, "step": 96050 }, { "epoch": 0.6953462615909141, "grad_norm": 0.14862500131130219, "learning_rate": 4.304660977075145e-06, "loss": 0.9043, "step": 96060 }, { "epoch": 0.6954186482515002, "grad_norm": 0.16168230772018433, "learning_rate": 4.304588590414559e-06, "loss": 0.9008, "step": 96070 }, { "epoch": 0.6954910349120864, "grad_norm": 0.15463055670261383, "learning_rate": 4.3045162037539725e-06, "loss": 0.8975, "step": 96080 }, { "epoch": 0.6955634215726726, "grad_norm": 0.15740707516670227, "learning_rate": 4.304443817093387e-06, "loss": 0.8999, "step": 96090 }, { "epoch": 0.6956358082332588, "grad_norm": 0.1548217087984085, "learning_rate": 4.3043714304328e-06, "loss": 0.8997, "step": 96100 }, { "epoch": 0.695708194893845, "grad_norm": 0.17392714321613312, "learning_rate": 4.304299043772214e-06, "loss": 0.8889, "step": 96110 }, { "epoch": 0.6957805815544311, "grad_norm": 0.16982245445251465, "learning_rate": 4.304226657111628e-06, "loss": 0.8908, "step": 96120 }, { "epoch": 0.6958529682150173, "grad_norm": 0.33574557304382324, "learning_rate": 4.304154270451041e-06, "loss": 0.8943, "step": 96130 }, { "epoch": 0.6959253548756035, "grad_norm": 0.1604243814945221, "learning_rate": 4.304081883790455e-06, "loss": 0.891, "step": 96140 }, { "epoch": 0.6959977415361898, "grad_norm": 0.1695748269557953, "learning_rate": 4.3040094971298695e-06, "loss": 0.9052, "step": 96150 }, { "epoch": 0.6960701281967759, "grad_norm": 0.15508553385734558, "learning_rate": 4.303937110469283e-06, "loss": 0.8917, "step": 96160 }, { "epoch": 0.6961425148573621, "grad_norm": 0.16269119083881378, "learning_rate": 4.303864723808697e-06, "loss": 0.891, "step": 96170 }, { "epoch": 0.6962149015179483, "grad_norm": 0.15743182599544525, "learning_rate": 4.30379233714811e-06, "loss": 0.8944, "step": 96180 }, { "epoch": 0.6962872881785345, "grad_norm": 0.15545742213726044, "learning_rate": 4.303719950487524e-06, "loss": 0.8862, "step": 96190 }, { "epoch": 0.6963596748391206, "grad_norm": 0.2056836634874344, "learning_rate": 4.303647563826938e-06, "loss": 0.8911, "step": 96200 }, { "epoch": 0.6964320614997068, "grad_norm": 0.15505336225032806, "learning_rate": 4.303575177166352e-06, "loss": 0.8959, "step": 96210 }, { "epoch": 0.696504448160293, "grad_norm": 0.155269056558609, "learning_rate": 4.303502790505766e-06, "loss": 0.8989, "step": 96220 }, { "epoch": 0.6965768348208792, "grad_norm": 0.16013725101947784, "learning_rate": 4.303430403845179e-06, "loss": 0.8941, "step": 96230 }, { "epoch": 0.6966492214814654, "grad_norm": 0.15824046730995178, "learning_rate": 4.303358017184594e-06, "loss": 0.8998, "step": 96240 }, { "epoch": 0.6967216081420515, "grad_norm": 0.17412789165973663, "learning_rate": 4.303285630524007e-06, "loss": 0.9062, "step": 96250 }, { "epoch": 0.6967939948026378, "grad_norm": 0.22454731166362762, "learning_rate": 4.303213243863421e-06, "loss": 0.8884, "step": 96260 }, { "epoch": 0.696866381463224, "grad_norm": 0.16182060539722443, "learning_rate": 4.3031408572028345e-06, "loss": 0.8888, "step": 96270 }, { "epoch": 0.6969387681238102, "grad_norm": 0.17886275053024292, "learning_rate": 4.303068470542249e-06, "loss": 0.8823, "step": 96280 }, { "epoch": 0.6970111547843963, "grad_norm": 0.1985713690519333, "learning_rate": 4.302996083881663e-06, "loss": 0.8882, "step": 96290 }, { "epoch": 0.6970835414449825, "grad_norm": 0.1513681411743164, "learning_rate": 4.302923697221076e-06, "loss": 0.9012, "step": 96300 }, { "epoch": 0.6971559281055687, "grad_norm": 0.15492072701454163, "learning_rate": 4.30285131056049e-06, "loss": 0.899, "step": 96310 }, { "epoch": 0.6972283147661549, "grad_norm": 0.15350612998008728, "learning_rate": 4.302778923899904e-06, "loss": 0.9022, "step": 96320 }, { "epoch": 0.697300701426741, "grad_norm": 0.15936022996902466, "learning_rate": 4.302706537239318e-06, "loss": 0.8985, "step": 96330 }, { "epoch": 0.6973730880873272, "grad_norm": 0.15187810361385345, "learning_rate": 4.3026341505787316e-06, "loss": 0.8867, "step": 96340 }, { "epoch": 0.6974454747479134, "grad_norm": 0.16198959946632385, "learning_rate": 4.302561763918145e-06, "loss": 0.903, "step": 96350 }, { "epoch": 0.6975178614084997, "grad_norm": 0.1528894305229187, "learning_rate": 4.30248937725756e-06, "loss": 0.8793, "step": 96360 }, { "epoch": 0.6975902480690859, "grad_norm": 0.17313164472579956, "learning_rate": 4.302416990596973e-06, "loss": 0.8992, "step": 96370 }, { "epoch": 0.697662634729672, "grad_norm": 0.15843333303928375, "learning_rate": 4.302344603936387e-06, "loss": 0.8963, "step": 96380 }, { "epoch": 0.6977350213902582, "grad_norm": 0.1584440916776657, "learning_rate": 4.3022722172758005e-06, "loss": 0.9057, "step": 96390 }, { "epoch": 0.6978074080508444, "grad_norm": 0.14555677771568298, "learning_rate": 4.302199830615215e-06, "loss": 0.8841, "step": 96400 }, { "epoch": 0.6978797947114306, "grad_norm": 0.22453060746192932, "learning_rate": 4.3021274439546286e-06, "loss": 0.8912, "step": 96410 }, { "epoch": 0.6979521813720168, "grad_norm": 0.16137515008449554, "learning_rate": 4.302055057294042e-06, "loss": 0.8757, "step": 96420 }, { "epoch": 0.6980245680326029, "grad_norm": 0.2278577834367752, "learning_rate": 4.301982670633456e-06, "loss": 0.8918, "step": 96430 }, { "epoch": 0.6980969546931891, "grad_norm": 0.1604139506816864, "learning_rate": 4.30191028397287e-06, "loss": 0.8988, "step": 96440 }, { "epoch": 0.6981693413537753, "grad_norm": 0.1709796041250229, "learning_rate": 4.301837897312284e-06, "loss": 0.8999, "step": 96450 }, { "epoch": 0.6982417280143615, "grad_norm": 0.16104479134082794, "learning_rate": 4.3017655106516975e-06, "loss": 0.8961, "step": 96460 }, { "epoch": 0.6983141146749478, "grad_norm": 0.1653842329978943, "learning_rate": 4.301693123991111e-06, "loss": 0.8805, "step": 96470 }, { "epoch": 0.6983865013355339, "grad_norm": 0.16090551018714905, "learning_rate": 4.3016207373305256e-06, "loss": 0.9097, "step": 96480 }, { "epoch": 0.6984588879961201, "grad_norm": 0.3694705367088318, "learning_rate": 4.301548350669939e-06, "loss": 0.8856, "step": 96490 }, { "epoch": 0.6985312746567063, "grad_norm": 0.1503128856420517, "learning_rate": 4.301475964009353e-06, "loss": 0.8856, "step": 96500 }, { "epoch": 0.6986036613172925, "grad_norm": 0.16498462855815887, "learning_rate": 4.301403577348766e-06, "loss": 0.8975, "step": 96510 }, { "epoch": 0.6986760479778786, "grad_norm": 0.1551860123872757, "learning_rate": 4.301331190688181e-06, "loss": 0.895, "step": 96520 }, { "epoch": 0.6987484346384648, "grad_norm": 0.16694217920303345, "learning_rate": 4.3012588040275945e-06, "loss": 0.8932, "step": 96530 }, { "epoch": 0.698820821299051, "grad_norm": 0.9841502904891968, "learning_rate": 4.301186417367008e-06, "loss": 0.9151, "step": 96540 }, { "epoch": 0.6988932079596372, "grad_norm": 0.3107115626335144, "learning_rate": 4.301114030706422e-06, "loss": 0.8826, "step": 96550 }, { "epoch": 0.6989655946202233, "grad_norm": 0.3002294898033142, "learning_rate": 4.301041644045836e-06, "loss": 0.8991, "step": 96560 }, { "epoch": 0.6990379812808095, "grad_norm": 0.1559462547302246, "learning_rate": 4.30096925738525e-06, "loss": 0.8887, "step": 96570 }, { "epoch": 0.6991103679413958, "grad_norm": 0.1544226109981537, "learning_rate": 4.3008968707246634e-06, "loss": 0.8921, "step": 96580 }, { "epoch": 0.699182754601982, "grad_norm": 0.18448671698570251, "learning_rate": 4.300824484064077e-06, "loss": 0.8846, "step": 96590 }, { "epoch": 0.6992551412625682, "grad_norm": 0.1595505028963089, "learning_rate": 4.3007520974034915e-06, "loss": 0.8977, "step": 96600 }, { "epoch": 0.6993275279231543, "grad_norm": 0.15914291143417358, "learning_rate": 4.300679710742905e-06, "loss": 0.9054, "step": 96610 }, { "epoch": 0.6993999145837405, "grad_norm": 0.14967529475688934, "learning_rate": 4.300607324082319e-06, "loss": 0.8893, "step": 96620 }, { "epoch": 0.6994723012443267, "grad_norm": 0.1701008379459381, "learning_rate": 4.300534937421732e-06, "loss": 0.8854, "step": 96630 }, { "epoch": 0.6995446879049129, "grad_norm": 0.15182742476463318, "learning_rate": 4.300462550761146e-06, "loss": 0.8878, "step": 96640 }, { "epoch": 0.699617074565499, "grad_norm": 0.16047948598861694, "learning_rate": 4.30039016410056e-06, "loss": 0.8819, "step": 96650 }, { "epoch": 0.6996894612260852, "grad_norm": 0.15404923260211945, "learning_rate": 4.300317777439973e-06, "loss": 0.8876, "step": 96660 }, { "epoch": 0.6997618478866714, "grad_norm": 0.14895722270011902, "learning_rate": 4.300245390779388e-06, "loss": 0.8889, "step": 96670 }, { "epoch": 0.6998342345472577, "grad_norm": 0.1480094939470291, "learning_rate": 4.300173004118801e-06, "loss": 0.8798, "step": 96680 }, { "epoch": 0.6999066212078439, "grad_norm": 0.15281951427459717, "learning_rate": 4.300100617458215e-06, "loss": 0.8828, "step": 96690 }, { "epoch": 0.69997900786843, "grad_norm": 0.15964744985103607, "learning_rate": 4.3000282307976285e-06, "loss": 0.8885, "step": 96700 }, { "epoch": 0.7000513945290162, "grad_norm": 0.15603557229042053, "learning_rate": 4.299955844137043e-06, "loss": 0.8863, "step": 96710 }, { "epoch": 0.7001237811896024, "grad_norm": 0.1674613654613495, "learning_rate": 4.299883457476457e-06, "loss": 0.8982, "step": 96720 }, { "epoch": 0.7001961678501886, "grad_norm": 0.15703196823596954, "learning_rate": 4.29981107081587e-06, "loss": 0.8825, "step": 96730 }, { "epoch": 0.7002685545107747, "grad_norm": 0.17176085710525513, "learning_rate": 4.299738684155284e-06, "loss": 0.8873, "step": 96740 }, { "epoch": 0.7003409411713609, "grad_norm": 0.16086554527282715, "learning_rate": 4.299666297494698e-06, "loss": 0.8854, "step": 96750 }, { "epoch": 0.7004133278319471, "grad_norm": 0.16075126826763153, "learning_rate": 4.299593910834112e-06, "loss": 0.9035, "step": 96760 }, { "epoch": 0.7004857144925333, "grad_norm": 0.14747561514377594, "learning_rate": 4.2995215241735255e-06, "loss": 0.8975, "step": 96770 }, { "epoch": 0.7005581011531195, "grad_norm": 0.16974276304244995, "learning_rate": 4.299449137512939e-06, "loss": 0.8975, "step": 96780 }, { "epoch": 0.7006304878137057, "grad_norm": 0.16168342530727386, "learning_rate": 4.299376750852353e-06, "loss": 0.8864, "step": 96790 }, { "epoch": 0.7007028744742919, "grad_norm": 0.1794668585062027, "learning_rate": 4.299304364191767e-06, "loss": 0.8973, "step": 96800 }, { "epoch": 0.7007752611348781, "grad_norm": 0.16029122471809387, "learning_rate": 4.299231977531181e-06, "loss": 0.8895, "step": 96810 }, { "epoch": 0.7008476477954643, "grad_norm": 0.18477699160575867, "learning_rate": 4.2991595908705945e-06, "loss": 0.8888, "step": 96820 }, { "epoch": 0.7009200344560504, "grad_norm": 0.14988726377487183, "learning_rate": 4.299087204210008e-06, "loss": 0.8774, "step": 96830 }, { "epoch": 0.7009924211166366, "grad_norm": 0.1730933040380478, "learning_rate": 4.2990148175494225e-06, "loss": 0.9006, "step": 96840 }, { "epoch": 0.7010648077772228, "grad_norm": 0.1729445457458496, "learning_rate": 4.298942430888836e-06, "loss": 0.892, "step": 96850 }, { "epoch": 0.701137194437809, "grad_norm": 0.15627533197402954, "learning_rate": 4.29887004422825e-06, "loss": 0.8931, "step": 96860 }, { "epoch": 0.7012095810983952, "grad_norm": 0.16592872142791748, "learning_rate": 4.298797657567663e-06, "loss": 0.9052, "step": 96870 }, { "epoch": 0.7012819677589813, "grad_norm": 0.15809662640094757, "learning_rate": 4.298725270907078e-06, "loss": 0.8885, "step": 96880 }, { "epoch": 0.7013543544195676, "grad_norm": 0.1509890854358673, "learning_rate": 4.2986528842464915e-06, "loss": 0.8884, "step": 96890 }, { "epoch": 0.7014267410801538, "grad_norm": 0.14822304248809814, "learning_rate": 4.298580497585905e-06, "loss": 0.8768, "step": 96900 }, { "epoch": 0.70149912774074, "grad_norm": 0.15169335901737213, "learning_rate": 4.298508110925319e-06, "loss": 0.8876, "step": 96910 }, { "epoch": 0.7015715144013261, "grad_norm": 0.16953204572200775, "learning_rate": 4.298435724264733e-06, "loss": 0.9045, "step": 96920 }, { "epoch": 0.7016439010619123, "grad_norm": 0.16186213493347168, "learning_rate": 4.298363337604147e-06, "loss": 0.8963, "step": 96930 }, { "epoch": 0.7017162877224985, "grad_norm": 0.16287651658058167, "learning_rate": 4.29829095094356e-06, "loss": 0.9003, "step": 96940 }, { "epoch": 0.7017886743830847, "grad_norm": 0.17633283138275146, "learning_rate": 4.298218564282974e-06, "loss": 0.8685, "step": 96950 }, { "epoch": 0.7018610610436709, "grad_norm": 0.22455504536628723, "learning_rate": 4.2981461776223885e-06, "loss": 0.9052, "step": 96960 }, { "epoch": 0.701933447704257, "grad_norm": 0.14891460537910461, "learning_rate": 4.298073790961802e-06, "loss": 0.8836, "step": 96970 }, { "epoch": 0.7020058343648432, "grad_norm": 0.1637570559978485, "learning_rate": 4.298001404301216e-06, "loss": 0.8994, "step": 96980 }, { "epoch": 0.7020782210254294, "grad_norm": 0.1491258293390274, "learning_rate": 4.297929017640629e-06, "loss": 0.8972, "step": 96990 }, { "epoch": 0.7021506076860157, "grad_norm": 0.15172043442726135, "learning_rate": 4.297856630980044e-06, "loss": 0.8946, "step": 97000 }, { "epoch": 0.7022229943466018, "grad_norm": 0.1903785616159439, "learning_rate": 4.297784244319457e-06, "loss": 0.885, "step": 97010 }, { "epoch": 0.702295381007188, "grad_norm": 0.1513489931821823, "learning_rate": 4.297711857658871e-06, "loss": 0.8886, "step": 97020 }, { "epoch": 0.7023677676677742, "grad_norm": 0.14946502447128296, "learning_rate": 4.297639470998285e-06, "loss": 0.8916, "step": 97030 }, { "epoch": 0.7024401543283604, "grad_norm": 0.16224405169487, "learning_rate": 4.297567084337699e-06, "loss": 0.8992, "step": 97040 }, { "epoch": 0.7025125409889466, "grad_norm": 0.17303119599819183, "learning_rate": 4.297494697677113e-06, "loss": 0.9058, "step": 97050 }, { "epoch": 0.7025849276495327, "grad_norm": 0.16080045700073242, "learning_rate": 4.297422311016526e-06, "loss": 0.8884, "step": 97060 }, { "epoch": 0.7026573143101189, "grad_norm": 0.1557897925376892, "learning_rate": 4.29734992435594e-06, "loss": 0.8916, "step": 97070 }, { "epoch": 0.7027297009707051, "grad_norm": 0.14745230972766876, "learning_rate": 4.297277537695354e-06, "loss": 0.888, "step": 97080 }, { "epoch": 0.7028020876312913, "grad_norm": 0.16206741333007812, "learning_rate": 4.297205151034768e-06, "loss": 0.889, "step": 97090 }, { "epoch": 0.7028744742918774, "grad_norm": 0.23554551601409912, "learning_rate": 4.297132764374182e-06, "loss": 0.8866, "step": 97100 }, { "epoch": 0.7029468609524637, "grad_norm": 0.15564242005348206, "learning_rate": 4.297060377713595e-06, "loss": 0.8908, "step": 97110 }, { "epoch": 0.7030192476130499, "grad_norm": 0.15459449589252472, "learning_rate": 4.29698799105301e-06, "loss": 0.8898, "step": 97120 }, { "epoch": 0.7030916342736361, "grad_norm": 0.1636834740638733, "learning_rate": 4.296915604392423e-06, "loss": 0.8917, "step": 97130 }, { "epoch": 0.7031640209342223, "grad_norm": 0.1554258018732071, "learning_rate": 4.296843217731837e-06, "loss": 0.887, "step": 97140 }, { "epoch": 0.7032364075948084, "grad_norm": 0.1681572049856186, "learning_rate": 4.2967708310712506e-06, "loss": 0.8967, "step": 97150 }, { "epoch": 0.7033087942553946, "grad_norm": 0.15658222138881683, "learning_rate": 4.296698444410664e-06, "loss": 0.8905, "step": 97160 }, { "epoch": 0.7033811809159808, "grad_norm": 0.159566730260849, "learning_rate": 4.296626057750078e-06, "loss": 0.8944, "step": 97170 }, { "epoch": 0.703453567576567, "grad_norm": 0.16223838925361633, "learning_rate": 4.296553671089491e-06, "loss": 0.8846, "step": 97180 }, { "epoch": 0.7035259542371531, "grad_norm": 0.16831691563129425, "learning_rate": 4.296481284428906e-06, "loss": 0.8883, "step": 97190 }, { "epoch": 0.7035983408977393, "grad_norm": 0.17067785561084747, "learning_rate": 4.2964088977683195e-06, "loss": 0.8852, "step": 97200 }, { "epoch": 0.7036707275583256, "grad_norm": 0.1619025319814682, "learning_rate": 4.296336511107733e-06, "loss": 0.8959, "step": 97210 }, { "epoch": 0.7037431142189118, "grad_norm": 0.17006492614746094, "learning_rate": 4.296264124447147e-06, "loss": 0.8926, "step": 97220 }, { "epoch": 0.703815500879498, "grad_norm": 0.16066719591617584, "learning_rate": 4.296191737786561e-06, "loss": 0.8944, "step": 97230 }, { "epoch": 0.7038878875400841, "grad_norm": 0.16666296124458313, "learning_rate": 4.296119351125975e-06, "loss": 0.8865, "step": 97240 }, { "epoch": 0.7039602742006703, "grad_norm": 0.152059406042099, "learning_rate": 4.296046964465388e-06, "loss": 0.8949, "step": 97250 }, { "epoch": 0.7040326608612565, "grad_norm": 0.15612152218818665, "learning_rate": 4.295974577804802e-06, "loss": 0.9075, "step": 97260 }, { "epoch": 0.7041050475218427, "grad_norm": 0.15522941946983337, "learning_rate": 4.2959021911442165e-06, "loss": 0.8996, "step": 97270 }, { "epoch": 0.7041774341824288, "grad_norm": 0.1641325205564499, "learning_rate": 4.29582980448363e-06, "loss": 0.8991, "step": 97280 }, { "epoch": 0.704249820843015, "grad_norm": 0.1451716423034668, "learning_rate": 4.295757417823044e-06, "loss": 0.9109, "step": 97290 }, { "epoch": 0.7043222075036012, "grad_norm": 0.211027592420578, "learning_rate": 4.295685031162457e-06, "loss": 0.8876, "step": 97300 }, { "epoch": 0.7043945941641874, "grad_norm": 0.15049049258232117, "learning_rate": 4.295612644501872e-06, "loss": 0.8982, "step": 97310 }, { "epoch": 0.7044669808247737, "grad_norm": 0.1562298983335495, "learning_rate": 4.2955402578412854e-06, "loss": 0.9015, "step": 97320 }, { "epoch": 0.7045393674853598, "grad_norm": 0.1571105718612671, "learning_rate": 4.295467871180699e-06, "loss": 0.8958, "step": 97330 }, { "epoch": 0.704611754145946, "grad_norm": 0.1848883330821991, "learning_rate": 4.295395484520113e-06, "loss": 0.894, "step": 97340 }, { "epoch": 0.7046841408065322, "grad_norm": 0.15157540142536163, "learning_rate": 4.295323097859527e-06, "loss": 0.8881, "step": 97350 }, { "epoch": 0.7047565274671184, "grad_norm": 0.16974295675754547, "learning_rate": 4.295250711198941e-06, "loss": 0.8998, "step": 97360 }, { "epoch": 0.7048289141277045, "grad_norm": 0.15676848590373993, "learning_rate": 4.295178324538354e-06, "loss": 0.8847, "step": 97370 }, { "epoch": 0.7049013007882907, "grad_norm": 0.15643902122974396, "learning_rate": 4.295105937877768e-06, "loss": 0.9044, "step": 97380 }, { "epoch": 0.7049736874488769, "grad_norm": 0.15624825656414032, "learning_rate": 4.2950335512171824e-06, "loss": 0.8963, "step": 97390 }, { "epoch": 0.7050460741094631, "grad_norm": 0.16203194856643677, "learning_rate": 4.294961164556596e-06, "loss": 0.8655, "step": 97400 }, { "epoch": 0.7051184607700492, "grad_norm": 0.16650012135505676, "learning_rate": 4.29488877789601e-06, "loss": 0.9065, "step": 97410 }, { "epoch": 0.7051908474306355, "grad_norm": 0.16026648879051208, "learning_rate": 4.294816391235423e-06, "loss": 0.9028, "step": 97420 }, { "epoch": 0.7052632340912217, "grad_norm": 0.15564897656440735, "learning_rate": 4.294744004574837e-06, "loss": 0.8955, "step": 97430 }, { "epoch": 0.7053356207518079, "grad_norm": 0.15881158411502838, "learning_rate": 4.294671617914251e-06, "loss": 0.9105, "step": 97440 }, { "epoch": 0.7054080074123941, "grad_norm": 0.14864668250083923, "learning_rate": 4.294599231253665e-06, "loss": 0.8795, "step": 97450 }, { "epoch": 0.7054803940729802, "grad_norm": 0.16540266573429108, "learning_rate": 4.294526844593079e-06, "loss": 0.8881, "step": 97460 }, { "epoch": 0.7055527807335664, "grad_norm": 0.1469249576330185, "learning_rate": 4.294454457932492e-06, "loss": 0.8988, "step": 97470 }, { "epoch": 0.7056251673941526, "grad_norm": 0.1481475979089737, "learning_rate": 4.294382071271907e-06, "loss": 0.9061, "step": 97480 }, { "epoch": 0.7056975540547388, "grad_norm": 0.14965546131134033, "learning_rate": 4.29430968461132e-06, "loss": 0.9011, "step": 97490 }, { "epoch": 0.705769940715325, "grad_norm": 0.1463448852300644, "learning_rate": 4.294237297950734e-06, "loss": 0.8892, "step": 97500 }, { "epoch": 0.7058423273759111, "grad_norm": 0.157759428024292, "learning_rate": 4.2941649112901475e-06, "loss": 0.8854, "step": 97510 }, { "epoch": 0.7059147140364973, "grad_norm": 0.15894946455955505, "learning_rate": 4.294092524629562e-06, "loss": 0.8911, "step": 97520 }, { "epoch": 0.7059871006970836, "grad_norm": 0.16521485149860382, "learning_rate": 4.294020137968976e-06, "loss": 0.8939, "step": 97530 }, { "epoch": 0.7060594873576698, "grad_norm": 0.16126827895641327, "learning_rate": 4.293947751308389e-06, "loss": 0.8838, "step": 97540 }, { "epoch": 0.706131874018256, "grad_norm": 0.18010060489177704, "learning_rate": 4.293875364647803e-06, "loss": 0.9076, "step": 97550 }, { "epoch": 0.7062042606788421, "grad_norm": 0.15842019021511078, "learning_rate": 4.293802977987217e-06, "loss": 0.8983, "step": 97560 }, { "epoch": 0.7062766473394283, "grad_norm": 0.14757438004016876, "learning_rate": 4.293730591326631e-06, "loss": 0.893, "step": 97570 }, { "epoch": 0.7063490340000145, "grad_norm": 0.15686437487602234, "learning_rate": 4.2936582046660445e-06, "loss": 0.8923, "step": 97580 }, { "epoch": 0.7064214206606007, "grad_norm": 0.15694831311702728, "learning_rate": 4.293585818005458e-06, "loss": 0.9027, "step": 97590 }, { "epoch": 0.7064938073211868, "grad_norm": 0.15480537712574005, "learning_rate": 4.293513431344873e-06, "loss": 0.8853, "step": 97600 }, { "epoch": 0.706566193981773, "grad_norm": 0.17100360989570618, "learning_rate": 4.293441044684286e-06, "loss": 0.8935, "step": 97610 }, { "epoch": 0.7066385806423592, "grad_norm": 0.15572969615459442, "learning_rate": 4.2933686580237e-06, "loss": 0.8777, "step": 97620 }, { "epoch": 0.7067109673029454, "grad_norm": 0.16251128911972046, "learning_rate": 4.2932962713631135e-06, "loss": 0.8977, "step": 97630 }, { "epoch": 0.7067833539635316, "grad_norm": 0.15367302298545837, "learning_rate": 4.293223884702528e-06, "loss": 0.8939, "step": 97640 }, { "epoch": 0.7068557406241178, "grad_norm": 0.1467921882867813, "learning_rate": 4.2931514980419415e-06, "loss": 0.8904, "step": 97650 }, { "epoch": 0.706928127284704, "grad_norm": 0.14249904453754425, "learning_rate": 4.293079111381355e-06, "loss": 0.8977, "step": 97660 }, { "epoch": 0.7070005139452902, "grad_norm": 0.15507516264915466, "learning_rate": 4.293006724720769e-06, "loss": 0.9006, "step": 97670 }, { "epoch": 0.7070729006058764, "grad_norm": 0.147030770778656, "learning_rate": 4.292934338060183e-06, "loss": 0.8973, "step": 97680 }, { "epoch": 0.7071452872664625, "grad_norm": 0.14783620834350586, "learning_rate": 4.292861951399596e-06, "loss": 0.8842, "step": 97690 }, { "epoch": 0.7072176739270487, "grad_norm": 0.16624851524829865, "learning_rate": 4.29278956473901e-06, "loss": 0.8908, "step": 97700 }, { "epoch": 0.7072900605876349, "grad_norm": 0.13825160264968872, "learning_rate": 4.292717178078424e-06, "loss": 0.8955, "step": 97710 }, { "epoch": 0.7073624472482211, "grad_norm": 0.1511533409357071, "learning_rate": 4.292644791417838e-06, "loss": 0.8846, "step": 97720 }, { "epoch": 0.7074348339088072, "grad_norm": 0.1551799476146698, "learning_rate": 4.292572404757251e-06, "loss": 0.8946, "step": 97730 }, { "epoch": 0.7075072205693935, "grad_norm": 0.1462963968515396, "learning_rate": 4.292500018096665e-06, "loss": 0.8802, "step": 97740 }, { "epoch": 0.7075796072299797, "grad_norm": 0.15262961387634277, "learning_rate": 4.292427631436079e-06, "loss": 0.9035, "step": 97750 }, { "epoch": 0.7076519938905659, "grad_norm": 0.1675291806459427, "learning_rate": 4.292355244775493e-06, "loss": 0.8842, "step": 97760 }, { "epoch": 0.707724380551152, "grad_norm": 0.15412959456443787, "learning_rate": 4.292282858114907e-06, "loss": 0.8899, "step": 97770 }, { "epoch": 0.7077967672117382, "grad_norm": 0.16402597725391388, "learning_rate": 4.29221047145432e-06, "loss": 0.8956, "step": 97780 }, { "epoch": 0.7078691538723244, "grad_norm": 0.15542785823345184, "learning_rate": 4.292138084793735e-06, "loss": 0.8926, "step": 97790 }, { "epoch": 0.7079415405329106, "grad_norm": 0.16312408447265625, "learning_rate": 4.292065698133148e-06, "loss": 0.9012, "step": 97800 }, { "epoch": 0.7080139271934968, "grad_norm": 0.16359078884124756, "learning_rate": 4.291993311472562e-06, "loss": 0.889, "step": 97810 }, { "epoch": 0.7080863138540829, "grad_norm": 0.1581135094165802, "learning_rate": 4.2919209248119756e-06, "loss": 0.8854, "step": 97820 }, { "epoch": 0.7081587005146691, "grad_norm": 0.16701552271842957, "learning_rate": 4.29184853815139e-06, "loss": 0.8877, "step": 97830 }, { "epoch": 0.7082310871752553, "grad_norm": 0.18646441400051117, "learning_rate": 4.291776151490804e-06, "loss": 0.8825, "step": 97840 }, { "epoch": 0.7083034738358416, "grad_norm": 0.15613111853599548, "learning_rate": 4.291703764830217e-06, "loss": 0.8889, "step": 97850 }, { "epoch": 0.7083758604964278, "grad_norm": 0.15725113451480865, "learning_rate": 4.291631378169631e-06, "loss": 0.892, "step": 97860 }, { "epoch": 0.7084482471570139, "grad_norm": 0.1500454545021057, "learning_rate": 4.291558991509045e-06, "loss": 0.8845, "step": 97870 }, { "epoch": 0.7085206338176001, "grad_norm": 0.14962556958198547, "learning_rate": 4.291486604848459e-06, "loss": 0.9005, "step": 97880 }, { "epoch": 0.7085930204781863, "grad_norm": 0.15193293988704681, "learning_rate": 4.2914142181878726e-06, "loss": 0.898, "step": 97890 }, { "epoch": 0.7086654071387725, "grad_norm": 0.19393183290958405, "learning_rate": 4.291341831527286e-06, "loss": 0.8808, "step": 97900 }, { "epoch": 0.7087377937993586, "grad_norm": 0.16170886158943176, "learning_rate": 4.291269444866701e-06, "loss": 0.8892, "step": 97910 }, { "epoch": 0.7088101804599448, "grad_norm": 0.16696853935718536, "learning_rate": 4.291197058206114e-06, "loss": 0.8952, "step": 97920 }, { "epoch": 0.708882567120531, "grad_norm": 0.14689412713050842, "learning_rate": 4.291124671545528e-06, "loss": 0.8976, "step": 97930 }, { "epoch": 0.7089549537811172, "grad_norm": 0.18432478606700897, "learning_rate": 4.2910522848849415e-06, "loss": 0.888, "step": 97940 }, { "epoch": 0.7090273404417035, "grad_norm": 0.14978989958763123, "learning_rate": 4.290979898224356e-06, "loss": 0.8878, "step": 97950 }, { "epoch": 0.7090997271022896, "grad_norm": 0.14927172660827637, "learning_rate": 4.2909075115637696e-06, "loss": 0.9018, "step": 97960 }, { "epoch": 0.7091721137628758, "grad_norm": 0.1598445326089859, "learning_rate": 4.290835124903183e-06, "loss": 0.8776, "step": 97970 }, { "epoch": 0.709244500423462, "grad_norm": 0.15578649938106537, "learning_rate": 4.290762738242597e-06, "loss": 0.8925, "step": 97980 }, { "epoch": 0.7093168870840482, "grad_norm": 0.15951481461524963, "learning_rate": 4.290690351582011e-06, "loss": 0.8979, "step": 97990 }, { "epoch": 0.7093892737446343, "grad_norm": 0.15647555887699127, "learning_rate": 4.290617964921425e-06, "loss": 0.9094, "step": 98000 }, { "epoch": 0.7094616604052205, "grad_norm": 0.15051069855690002, "learning_rate": 4.2905455782608385e-06, "loss": 0.8905, "step": 98010 }, { "epoch": 0.7095340470658067, "grad_norm": 0.17179308831691742, "learning_rate": 4.290473191600252e-06, "loss": 0.883, "step": 98020 }, { "epoch": 0.7096064337263929, "grad_norm": 0.16709086298942566, "learning_rate": 4.290400804939666e-06, "loss": 0.9033, "step": 98030 }, { "epoch": 0.709678820386979, "grad_norm": 0.17320266366004944, "learning_rate": 4.29032841827908e-06, "loss": 0.9088, "step": 98040 }, { "epoch": 0.7097512070475652, "grad_norm": 0.16562733054161072, "learning_rate": 4.290256031618494e-06, "loss": 0.8882, "step": 98050 }, { "epoch": 0.7098235937081515, "grad_norm": 0.15800151228904724, "learning_rate": 4.2901836449579074e-06, "loss": 0.8935, "step": 98060 }, { "epoch": 0.7098959803687377, "grad_norm": 0.15598443150520325, "learning_rate": 4.290111258297321e-06, "loss": 0.8878, "step": 98070 }, { "epoch": 0.7099683670293239, "grad_norm": 0.14312607049942017, "learning_rate": 4.2900388716367355e-06, "loss": 0.8901, "step": 98080 }, { "epoch": 0.71004075368991, "grad_norm": 0.16290390491485596, "learning_rate": 4.289966484976149e-06, "loss": 0.8821, "step": 98090 }, { "epoch": 0.7101131403504962, "grad_norm": 0.1655735820531845, "learning_rate": 4.289894098315563e-06, "loss": 0.8994, "step": 98100 }, { "epoch": 0.7101855270110824, "grad_norm": 0.1505131721496582, "learning_rate": 4.289821711654976e-06, "loss": 0.8779, "step": 98110 }, { "epoch": 0.7102579136716686, "grad_norm": 0.1438320130109787, "learning_rate": 4.289749324994391e-06, "loss": 0.8743, "step": 98120 }, { "epoch": 0.7103303003322547, "grad_norm": 0.18201330304145813, "learning_rate": 4.2896769383338044e-06, "loss": 0.8858, "step": 98130 }, { "epoch": 0.7104026869928409, "grad_norm": 0.18367905914783478, "learning_rate": 4.289604551673218e-06, "loss": 0.9101, "step": 98140 }, { "epoch": 0.7104750736534271, "grad_norm": 0.15578269958496094, "learning_rate": 4.289532165012632e-06, "loss": 0.893, "step": 98150 }, { "epoch": 0.7105474603140133, "grad_norm": 0.15970683097839355, "learning_rate": 4.289459778352046e-06, "loss": 0.8996, "step": 98160 }, { "epoch": 0.7106198469745996, "grad_norm": 0.16508597135543823, "learning_rate": 4.28938739169146e-06, "loss": 0.8862, "step": 98170 }, { "epoch": 0.7106922336351857, "grad_norm": 0.174842968583107, "learning_rate": 4.289315005030873e-06, "loss": 0.9011, "step": 98180 }, { "epoch": 0.7107646202957719, "grad_norm": 0.17075632512569427, "learning_rate": 4.289242618370287e-06, "loss": 0.8765, "step": 98190 }, { "epoch": 0.7108370069563581, "grad_norm": 0.15258832275867462, "learning_rate": 4.2891702317097014e-06, "loss": 0.895, "step": 98200 }, { "epoch": 0.7109093936169443, "grad_norm": 0.14689311385154724, "learning_rate": 4.289097845049115e-06, "loss": 0.8874, "step": 98210 }, { "epoch": 0.7109817802775305, "grad_norm": 0.16319765150547028, "learning_rate": 4.289025458388528e-06, "loss": 0.8929, "step": 98220 }, { "epoch": 0.7110541669381166, "grad_norm": 0.16500282287597656, "learning_rate": 4.288953071727942e-06, "loss": 0.887, "step": 98230 }, { "epoch": 0.7111265535987028, "grad_norm": 0.15623874962329865, "learning_rate": 4.288880685067356e-06, "loss": 0.8959, "step": 98240 }, { "epoch": 0.711198940259289, "grad_norm": 0.15647245943546295, "learning_rate": 4.2888082984067695e-06, "loss": 0.8864, "step": 98250 }, { "epoch": 0.7112713269198752, "grad_norm": 0.1665528565645218, "learning_rate": 4.288735911746183e-06, "loss": 0.8903, "step": 98260 }, { "epoch": 0.7113437135804614, "grad_norm": 0.14643557369709015, "learning_rate": 4.288663525085598e-06, "loss": 0.8978, "step": 98270 }, { "epoch": 0.7114161002410476, "grad_norm": 0.154141366481781, "learning_rate": 4.288591138425011e-06, "loss": 0.8978, "step": 98280 }, { "epoch": 0.7114884869016338, "grad_norm": 0.14892645180225372, "learning_rate": 4.288518751764425e-06, "loss": 0.8925, "step": 98290 }, { "epoch": 0.71156087356222, "grad_norm": 0.17983193695545197, "learning_rate": 4.2884463651038384e-06, "loss": 0.8798, "step": 98300 }, { "epoch": 0.7116332602228062, "grad_norm": 0.15208591520786285, "learning_rate": 4.288373978443253e-06, "loss": 0.8917, "step": 98310 }, { "epoch": 0.7117056468833923, "grad_norm": 0.15199977159500122, "learning_rate": 4.2883015917826665e-06, "loss": 0.8878, "step": 98320 }, { "epoch": 0.7117780335439785, "grad_norm": 0.16912555694580078, "learning_rate": 4.28822920512208e-06, "loss": 0.8876, "step": 98330 }, { "epoch": 0.7118504202045647, "grad_norm": 0.18677960336208344, "learning_rate": 4.288156818461494e-06, "loss": 0.8971, "step": 98340 }, { "epoch": 0.7119228068651509, "grad_norm": 0.16484694182872772, "learning_rate": 4.288084431800908e-06, "loss": 0.8867, "step": 98350 }, { "epoch": 0.711995193525737, "grad_norm": 0.18507961928844452, "learning_rate": 4.288012045140322e-06, "loss": 0.9005, "step": 98360 }, { "epoch": 0.7120675801863232, "grad_norm": 0.16130656003952026, "learning_rate": 4.2879396584797355e-06, "loss": 0.9067, "step": 98370 }, { "epoch": 0.7121399668469095, "grad_norm": 0.16613750159740448, "learning_rate": 4.287867271819149e-06, "loss": 0.8972, "step": 98380 }, { "epoch": 0.7122123535074957, "grad_norm": 0.1898043304681778, "learning_rate": 4.2877948851585635e-06, "loss": 0.8951, "step": 98390 }, { "epoch": 0.7122847401680819, "grad_norm": 0.15496717393398285, "learning_rate": 4.287722498497977e-06, "loss": 0.8843, "step": 98400 }, { "epoch": 0.712357126828668, "grad_norm": 0.15950420498847961, "learning_rate": 4.287650111837391e-06, "loss": 0.8907, "step": 98410 }, { "epoch": 0.7124295134892542, "grad_norm": 0.16205261647701263, "learning_rate": 4.287577725176804e-06, "loss": 0.8871, "step": 98420 }, { "epoch": 0.7125019001498404, "grad_norm": 0.16304804384708405, "learning_rate": 4.287505338516219e-06, "loss": 0.8884, "step": 98430 }, { "epoch": 0.7125742868104266, "grad_norm": 0.15807406604290009, "learning_rate": 4.2874329518556325e-06, "loss": 0.8893, "step": 98440 }, { "epoch": 0.7126466734710127, "grad_norm": 0.16383838653564453, "learning_rate": 4.287360565195046e-06, "loss": 0.9048, "step": 98450 }, { "epoch": 0.7127190601315989, "grad_norm": 0.17542411386966705, "learning_rate": 4.28728817853446e-06, "loss": 0.8776, "step": 98460 }, { "epoch": 0.7127914467921851, "grad_norm": 0.18369115889072418, "learning_rate": 4.287215791873874e-06, "loss": 0.888, "step": 98470 }, { "epoch": 0.7128638334527714, "grad_norm": 0.21220123767852783, "learning_rate": 4.287143405213288e-06, "loss": 0.8996, "step": 98480 }, { "epoch": 0.7129362201133576, "grad_norm": 0.17068584263324738, "learning_rate": 4.287071018552701e-06, "loss": 0.8882, "step": 98490 }, { "epoch": 0.7130086067739437, "grad_norm": 0.15225493907928467, "learning_rate": 4.286998631892115e-06, "loss": 0.8924, "step": 98500 }, { "epoch": 0.7130809934345299, "grad_norm": 0.14265893399715424, "learning_rate": 4.2869262452315295e-06, "loss": 0.8832, "step": 98510 }, { "epoch": 0.7131533800951161, "grad_norm": 0.18743206560611725, "learning_rate": 4.286853858570943e-06, "loss": 0.9077, "step": 98520 }, { "epoch": 0.7132257667557023, "grad_norm": 0.15794509649276733, "learning_rate": 4.286781471910357e-06, "loss": 0.8924, "step": 98530 }, { "epoch": 0.7132981534162884, "grad_norm": 0.16649536788463593, "learning_rate": 4.28670908524977e-06, "loss": 0.8789, "step": 98540 }, { "epoch": 0.7133705400768746, "grad_norm": 0.1717004030942917, "learning_rate": 4.286636698589185e-06, "loss": 0.8966, "step": 98550 }, { "epoch": 0.7134429267374608, "grad_norm": 0.1472935825586319, "learning_rate": 4.286564311928598e-06, "loss": 0.8928, "step": 98560 }, { "epoch": 0.713515313398047, "grad_norm": 0.1435527801513672, "learning_rate": 4.286491925268012e-06, "loss": 0.872, "step": 98570 }, { "epoch": 0.7135877000586331, "grad_norm": 0.17352406680583954, "learning_rate": 4.286419538607426e-06, "loss": 0.883, "step": 98580 }, { "epoch": 0.7136600867192194, "grad_norm": 0.14839379489421844, "learning_rate": 4.28634715194684e-06, "loss": 0.8919, "step": 98590 }, { "epoch": 0.7137324733798056, "grad_norm": 0.1555548906326294, "learning_rate": 4.286274765286254e-06, "loss": 0.8863, "step": 98600 }, { "epoch": 0.7138048600403918, "grad_norm": 0.1623990833759308, "learning_rate": 4.286202378625667e-06, "loss": 0.8832, "step": 98610 }, { "epoch": 0.713877246700978, "grad_norm": 0.19231371581554413, "learning_rate": 4.286129991965081e-06, "loss": 0.8975, "step": 98620 }, { "epoch": 0.7139496333615641, "grad_norm": 0.15244658291339874, "learning_rate": 4.286057605304495e-06, "loss": 0.9031, "step": 98630 }, { "epoch": 0.7140220200221503, "grad_norm": 0.15277956426143646, "learning_rate": 4.285985218643909e-06, "loss": 0.888, "step": 98640 }, { "epoch": 0.7140944066827365, "grad_norm": 0.15656965970993042, "learning_rate": 4.285912831983323e-06, "loss": 0.8905, "step": 98650 }, { "epoch": 0.7141667933433227, "grad_norm": 0.18114785850048065, "learning_rate": 4.285840445322736e-06, "loss": 0.8754, "step": 98660 }, { "epoch": 0.7142391800039088, "grad_norm": 0.23680664598941803, "learning_rate": 4.28576805866215e-06, "loss": 0.894, "step": 98670 }, { "epoch": 0.714311566664495, "grad_norm": 0.14668525755405426, "learning_rate": 4.285695672001564e-06, "loss": 0.8903, "step": 98680 }, { "epoch": 0.7143839533250812, "grad_norm": 0.1626420021057129, "learning_rate": 4.285623285340978e-06, "loss": 0.8976, "step": 98690 }, { "epoch": 0.7144563399856675, "grad_norm": 0.154447540640831, "learning_rate": 4.2855508986803916e-06, "loss": 0.8961, "step": 98700 }, { "epoch": 0.7145287266462537, "grad_norm": 0.15234994888305664, "learning_rate": 4.285478512019805e-06, "loss": 0.8995, "step": 98710 }, { "epoch": 0.7146011133068398, "grad_norm": 0.15986403822898865, "learning_rate": 4.28540612535922e-06, "loss": 0.8968, "step": 98720 }, { "epoch": 0.714673499967426, "grad_norm": 0.16405628621578217, "learning_rate": 4.285333738698633e-06, "loss": 0.8877, "step": 98730 }, { "epoch": 0.7147458866280122, "grad_norm": 0.14801183342933655, "learning_rate": 4.285261352038047e-06, "loss": 0.8865, "step": 98740 }, { "epoch": 0.7148182732885984, "grad_norm": 0.1582917869091034, "learning_rate": 4.2851889653774605e-06, "loss": 0.8968, "step": 98750 }, { "epoch": 0.7148906599491845, "grad_norm": 0.14904256165027618, "learning_rate": 4.285116578716874e-06, "loss": 0.8825, "step": 98760 }, { "epoch": 0.7149630466097707, "grad_norm": 0.15857091546058655, "learning_rate": 4.285044192056288e-06, "loss": 0.8883, "step": 98770 }, { "epoch": 0.7150354332703569, "grad_norm": 0.1512639969587326, "learning_rate": 4.284971805395702e-06, "loss": 0.9077, "step": 98780 }, { "epoch": 0.7151078199309431, "grad_norm": 0.1620493084192276, "learning_rate": 4.284899418735116e-06, "loss": 0.8921, "step": 98790 }, { "epoch": 0.7151802065915294, "grad_norm": 0.15523122251033783, "learning_rate": 4.2848270320745294e-06, "loss": 0.8975, "step": 98800 }, { "epoch": 0.7152525932521155, "grad_norm": 0.15379586815834045, "learning_rate": 4.284754645413943e-06, "loss": 0.9056, "step": 98810 }, { "epoch": 0.7153249799127017, "grad_norm": 0.15808962285518646, "learning_rate": 4.2846822587533575e-06, "loss": 0.8923, "step": 98820 }, { "epoch": 0.7153973665732879, "grad_norm": 0.21590928733348846, "learning_rate": 4.284609872092771e-06, "loss": 0.8851, "step": 98830 }, { "epoch": 0.7154697532338741, "grad_norm": 0.16815008223056793, "learning_rate": 4.284537485432185e-06, "loss": 0.8893, "step": 98840 }, { "epoch": 0.7155421398944602, "grad_norm": 0.15785428881645203, "learning_rate": 4.284465098771598e-06, "loss": 0.8874, "step": 98850 }, { "epoch": 0.7156145265550464, "grad_norm": 0.1511503905057907, "learning_rate": 4.284392712111012e-06, "loss": 0.8789, "step": 98860 }, { "epoch": 0.7156869132156326, "grad_norm": 0.15221667289733887, "learning_rate": 4.2843203254504264e-06, "loss": 0.8909, "step": 98870 }, { "epoch": 0.7157592998762188, "grad_norm": 0.15201707184314728, "learning_rate": 4.28424793878984e-06, "loss": 0.8908, "step": 98880 }, { "epoch": 0.715831686536805, "grad_norm": 0.1675233393907547, "learning_rate": 4.284175552129254e-06, "loss": 0.9029, "step": 98890 }, { "epoch": 0.7159040731973911, "grad_norm": 0.1582505851984024, "learning_rate": 4.284103165468667e-06, "loss": 0.894, "step": 98900 }, { "epoch": 0.7159764598579774, "grad_norm": 0.14627264440059662, "learning_rate": 4.284030778808082e-06, "loss": 0.8948, "step": 98910 }, { "epoch": 0.7160488465185636, "grad_norm": 0.15614593029022217, "learning_rate": 4.283958392147495e-06, "loss": 0.8895, "step": 98920 }, { "epoch": 0.7161212331791498, "grad_norm": 0.17109185457229614, "learning_rate": 4.283886005486909e-06, "loss": 0.9064, "step": 98930 }, { "epoch": 0.716193619839736, "grad_norm": 0.17292575538158417, "learning_rate": 4.283813618826323e-06, "loss": 0.8936, "step": 98940 }, { "epoch": 0.7162660065003221, "grad_norm": 0.15228956937789917, "learning_rate": 4.283741232165737e-06, "loss": 0.8948, "step": 98950 }, { "epoch": 0.7163383931609083, "grad_norm": 0.15983666479587555, "learning_rate": 4.283668845505151e-06, "loss": 0.892, "step": 98960 }, { "epoch": 0.7164107798214945, "grad_norm": 0.15323320031166077, "learning_rate": 4.283596458844564e-06, "loss": 0.885, "step": 98970 }, { "epoch": 0.7164831664820807, "grad_norm": 0.1651761531829834, "learning_rate": 4.283524072183978e-06, "loss": 0.8907, "step": 98980 }, { "epoch": 0.7165555531426668, "grad_norm": 0.1607891172170639, "learning_rate": 4.283451685523392e-06, "loss": 0.8986, "step": 98990 }, { "epoch": 0.716627939803253, "grad_norm": 0.15800441801548004, "learning_rate": 4.283379298862806e-06, "loss": 0.8871, "step": 99000 }, { "epoch": 0.7167003264638393, "grad_norm": 0.15280944108963013, "learning_rate": 4.28330691220222e-06, "loss": 0.892, "step": 99010 }, { "epoch": 0.7167727131244255, "grad_norm": 0.15718095004558563, "learning_rate": 4.283234525541633e-06, "loss": 0.8885, "step": 99020 }, { "epoch": 0.7168450997850117, "grad_norm": 0.15296663343906403, "learning_rate": 4.283162138881048e-06, "loss": 0.8945, "step": 99030 }, { "epoch": 0.7169174864455978, "grad_norm": 0.1575295478105545, "learning_rate": 4.283089752220461e-06, "loss": 0.895, "step": 99040 }, { "epoch": 0.716989873106184, "grad_norm": 0.16054439544677734, "learning_rate": 4.283017365559875e-06, "loss": 0.8788, "step": 99050 }, { "epoch": 0.7170622597667702, "grad_norm": 0.15393316745758057, "learning_rate": 4.2829449788992885e-06, "loss": 0.881, "step": 99060 }, { "epoch": 0.7171346464273564, "grad_norm": 0.16581709682941437, "learning_rate": 4.282872592238703e-06, "loss": 0.8949, "step": 99070 }, { "epoch": 0.7172070330879425, "grad_norm": 0.16176964342594147, "learning_rate": 4.282800205578117e-06, "loss": 0.8834, "step": 99080 }, { "epoch": 0.7172794197485287, "grad_norm": 0.15313786268234253, "learning_rate": 4.28272781891753e-06, "loss": 0.8849, "step": 99090 }, { "epoch": 0.7173518064091149, "grad_norm": 0.1565057635307312, "learning_rate": 4.282655432256944e-06, "loss": 0.8898, "step": 99100 }, { "epoch": 0.7174241930697011, "grad_norm": 0.17131567001342773, "learning_rate": 4.282583045596358e-06, "loss": 0.8981, "step": 99110 }, { "epoch": 0.7174965797302874, "grad_norm": 0.16490551829338074, "learning_rate": 4.282510658935772e-06, "loss": 0.8875, "step": 99120 }, { "epoch": 0.7175689663908735, "grad_norm": 0.14752908051013947, "learning_rate": 4.2824382722751855e-06, "loss": 0.8903, "step": 99130 }, { "epoch": 0.7176413530514597, "grad_norm": 0.1532769799232483, "learning_rate": 4.282365885614599e-06, "loss": 0.8946, "step": 99140 }, { "epoch": 0.7177137397120459, "grad_norm": 0.16182607412338257, "learning_rate": 4.282293498954014e-06, "loss": 0.8948, "step": 99150 }, { "epoch": 0.7177861263726321, "grad_norm": 0.1951036900281906, "learning_rate": 4.282221112293427e-06, "loss": 0.8873, "step": 99160 }, { "epoch": 0.7178585130332182, "grad_norm": 0.16654542088508606, "learning_rate": 4.282148725632841e-06, "loss": 0.8903, "step": 99170 }, { "epoch": 0.7179308996938044, "grad_norm": 0.17811834812164307, "learning_rate": 4.2820763389722545e-06, "loss": 0.8951, "step": 99180 }, { "epoch": 0.7180032863543906, "grad_norm": 0.14857879281044006, "learning_rate": 4.282003952311669e-06, "loss": 0.8927, "step": 99190 }, { "epoch": 0.7180756730149768, "grad_norm": 0.16096261143684387, "learning_rate": 4.2819315656510825e-06, "loss": 0.8919, "step": 99200 }, { "epoch": 0.7181480596755629, "grad_norm": 0.15119074285030365, "learning_rate": 4.281859178990496e-06, "loss": 0.8969, "step": 99210 }, { "epoch": 0.7182204463361491, "grad_norm": 0.15937365591526031, "learning_rate": 4.28178679232991e-06, "loss": 0.8907, "step": 99220 }, { "epoch": 0.7182928329967354, "grad_norm": 0.15309041738510132, "learning_rate": 4.281714405669324e-06, "loss": 0.8832, "step": 99230 }, { "epoch": 0.7183652196573216, "grad_norm": 0.20644572377204895, "learning_rate": 4.281642019008738e-06, "loss": 0.8935, "step": 99240 }, { "epoch": 0.7184376063179078, "grad_norm": 0.17840343713760376, "learning_rate": 4.2815696323481515e-06, "loss": 0.8981, "step": 99250 }, { "epoch": 0.7185099929784939, "grad_norm": 0.16128534078598022, "learning_rate": 4.281497245687565e-06, "loss": 0.8925, "step": 99260 }, { "epoch": 0.7185823796390801, "grad_norm": 0.16106021404266357, "learning_rate": 4.2814248590269796e-06, "loss": 0.8902, "step": 99270 }, { "epoch": 0.7186547662996663, "grad_norm": 0.15783260762691498, "learning_rate": 4.281352472366392e-06, "loss": 0.878, "step": 99280 }, { "epoch": 0.7187271529602525, "grad_norm": 0.17158517241477966, "learning_rate": 4.281280085705806e-06, "loss": 0.895, "step": 99290 }, { "epoch": 0.7187995396208386, "grad_norm": 0.16213983297348022, "learning_rate": 4.28120769904522e-06, "loss": 0.8746, "step": 99300 }, { "epoch": 0.7188719262814248, "grad_norm": 0.17305901646614075, "learning_rate": 4.281135312384634e-06, "loss": 0.8907, "step": 99310 }, { "epoch": 0.718944312942011, "grad_norm": 0.18499404191970825, "learning_rate": 4.281062925724048e-06, "loss": 0.884, "step": 99320 }, { "epoch": 0.7190166996025973, "grad_norm": 0.15206129848957062, "learning_rate": 4.280990539063461e-06, "loss": 0.8882, "step": 99330 }, { "epoch": 0.7190890862631835, "grad_norm": 0.15170209109783173, "learning_rate": 4.280918152402876e-06, "loss": 0.9006, "step": 99340 }, { "epoch": 0.7191614729237696, "grad_norm": 0.14841248095035553, "learning_rate": 4.280845765742289e-06, "loss": 0.8907, "step": 99350 }, { "epoch": 0.7192338595843558, "grad_norm": 0.16877888143062592, "learning_rate": 4.280773379081703e-06, "loss": 0.8853, "step": 99360 }, { "epoch": 0.719306246244942, "grad_norm": 0.14914186298847198, "learning_rate": 4.2807009924211166e-06, "loss": 0.9001, "step": 99370 }, { "epoch": 0.7193786329055282, "grad_norm": 0.15811428427696228, "learning_rate": 4.280628605760531e-06, "loss": 0.8884, "step": 99380 }, { "epoch": 0.7194510195661143, "grad_norm": 0.15445317327976227, "learning_rate": 4.280556219099945e-06, "loss": 0.8969, "step": 99390 }, { "epoch": 0.7195234062267005, "grad_norm": 0.1613607257604599, "learning_rate": 4.280483832439358e-06, "loss": 0.8747, "step": 99400 }, { "epoch": 0.7195957928872867, "grad_norm": 0.1554788202047348, "learning_rate": 4.280411445778772e-06, "loss": 0.8949, "step": 99410 }, { "epoch": 0.7196681795478729, "grad_norm": 0.15722106397151947, "learning_rate": 4.280339059118186e-06, "loss": 0.888, "step": 99420 }, { "epoch": 0.719740566208459, "grad_norm": 0.1679772436618805, "learning_rate": 4.2802666724576e-06, "loss": 0.8853, "step": 99430 }, { "epoch": 0.7198129528690453, "grad_norm": 0.676620602607727, "learning_rate": 4.2801942857970136e-06, "loss": 0.884, "step": 99440 }, { "epoch": 0.7198853395296315, "grad_norm": 0.15480059385299683, "learning_rate": 4.280121899136427e-06, "loss": 0.8963, "step": 99450 }, { "epoch": 0.7199577261902177, "grad_norm": 0.15194228291511536, "learning_rate": 4.280049512475841e-06, "loss": 0.8896, "step": 99460 }, { "epoch": 0.7200301128508039, "grad_norm": 0.17319074273109436, "learning_rate": 4.279977125815255e-06, "loss": 0.8986, "step": 99470 }, { "epoch": 0.72010249951139, "grad_norm": 0.15072764456272125, "learning_rate": 4.279904739154669e-06, "loss": 0.8776, "step": 99480 }, { "epoch": 0.7201748861719762, "grad_norm": 0.16070221364498138, "learning_rate": 4.2798323524940825e-06, "loss": 0.8977, "step": 99490 }, { "epoch": 0.7202472728325624, "grad_norm": 0.156993567943573, "learning_rate": 4.279759965833496e-06, "loss": 0.8952, "step": 99500 }, { "epoch": 0.7203196594931486, "grad_norm": 0.1609060913324356, "learning_rate": 4.2796875791729106e-06, "loss": 0.896, "step": 99510 }, { "epoch": 0.7203920461537348, "grad_norm": 0.15467387437820435, "learning_rate": 4.279615192512324e-06, "loss": 0.8862, "step": 99520 }, { "epoch": 0.7204644328143209, "grad_norm": 0.2120065689086914, "learning_rate": 4.279542805851738e-06, "loss": 0.8828, "step": 99530 }, { "epoch": 0.7205368194749071, "grad_norm": 0.16634128987789154, "learning_rate": 4.279470419191151e-06, "loss": 0.8791, "step": 99540 }, { "epoch": 0.7206092061354934, "grad_norm": 0.18684551119804382, "learning_rate": 4.279398032530566e-06, "loss": 0.8796, "step": 99550 }, { "epoch": 0.7206815927960796, "grad_norm": 0.17714783549308777, "learning_rate": 4.2793256458699795e-06, "loss": 0.8972, "step": 99560 }, { "epoch": 0.7207539794566657, "grad_norm": 0.14424735307693481, "learning_rate": 4.279253259209393e-06, "loss": 0.8898, "step": 99570 }, { "epoch": 0.7208263661172519, "grad_norm": 0.1793050915002823, "learning_rate": 4.279180872548807e-06, "loss": 0.8837, "step": 99580 }, { "epoch": 0.7208987527778381, "grad_norm": 0.15566468238830566, "learning_rate": 4.279108485888221e-06, "loss": 0.8823, "step": 99590 }, { "epoch": 0.7209711394384243, "grad_norm": 0.14767152070999146, "learning_rate": 4.279036099227635e-06, "loss": 0.8748, "step": 99600 }, { "epoch": 0.7210435260990105, "grad_norm": 0.1632547825574875, "learning_rate": 4.2789637125670484e-06, "loss": 0.8904, "step": 99610 }, { "epoch": 0.7211159127595966, "grad_norm": 0.15623623132705688, "learning_rate": 4.278891325906462e-06, "loss": 0.8852, "step": 99620 }, { "epoch": 0.7211882994201828, "grad_norm": 0.20870515704154968, "learning_rate": 4.2788189392458765e-06, "loss": 0.8955, "step": 99630 }, { "epoch": 0.721260686080769, "grad_norm": 0.15594063699245453, "learning_rate": 4.27874655258529e-06, "loss": 0.9089, "step": 99640 }, { "epoch": 0.7213330727413553, "grad_norm": 0.16253912448883057, "learning_rate": 4.278674165924704e-06, "loss": 0.8859, "step": 99650 }, { "epoch": 0.7214054594019415, "grad_norm": 0.3714236319065094, "learning_rate": 4.278601779264117e-06, "loss": 0.8875, "step": 99660 }, { "epoch": 0.7214778460625276, "grad_norm": 0.14919225871562958, "learning_rate": 4.278529392603532e-06, "loss": 0.8868, "step": 99670 }, { "epoch": 0.7215502327231138, "grad_norm": 0.16863149404525757, "learning_rate": 4.2784570059429454e-06, "loss": 0.8998, "step": 99680 }, { "epoch": 0.7216226193837, "grad_norm": 0.15315097570419312, "learning_rate": 4.278384619282359e-06, "loss": 0.8859, "step": 99690 }, { "epoch": 0.7216950060442862, "grad_norm": 0.14236433804035187, "learning_rate": 4.278312232621773e-06, "loss": 0.8921, "step": 99700 }, { "epoch": 0.7217673927048723, "grad_norm": 0.16768266260623932, "learning_rate": 4.278239845961187e-06, "loss": 0.8741, "step": 99710 }, { "epoch": 0.7218397793654585, "grad_norm": 0.1807376593351364, "learning_rate": 4.278167459300601e-06, "loss": 0.888, "step": 99720 }, { "epoch": 0.7219121660260447, "grad_norm": 0.16242581605911255, "learning_rate": 4.278095072640014e-06, "loss": 0.8871, "step": 99730 }, { "epoch": 0.7219845526866309, "grad_norm": 0.1657247543334961, "learning_rate": 4.278022685979428e-06, "loss": 0.8905, "step": 99740 }, { "epoch": 0.722056939347217, "grad_norm": 0.1562010794878006, "learning_rate": 4.2779502993188424e-06, "loss": 0.865, "step": 99750 }, { "epoch": 0.7221293260078033, "grad_norm": 0.1501951813697815, "learning_rate": 4.277877912658256e-06, "loss": 0.8991, "step": 99760 }, { "epoch": 0.7222017126683895, "grad_norm": 0.15544991195201874, "learning_rate": 4.27780552599767e-06, "loss": 0.8812, "step": 99770 }, { "epoch": 0.7222740993289757, "grad_norm": 0.17391003668308258, "learning_rate": 4.277733139337083e-06, "loss": 0.8923, "step": 99780 }, { "epoch": 0.7223464859895619, "grad_norm": 0.16744978725910187, "learning_rate": 4.277660752676498e-06, "loss": 0.8973, "step": 99790 }, { "epoch": 0.722418872650148, "grad_norm": 0.1574922800064087, "learning_rate": 4.277588366015911e-06, "loss": 0.8932, "step": 99800 }, { "epoch": 0.7224912593107342, "grad_norm": 0.14947380125522614, "learning_rate": 4.277515979355324e-06, "loss": 0.8859, "step": 99810 }, { "epoch": 0.7225636459713204, "grad_norm": 0.16751673817634583, "learning_rate": 4.277443592694739e-06, "loss": 0.8879, "step": 99820 }, { "epoch": 0.7226360326319066, "grad_norm": 0.1539609283208847, "learning_rate": 4.277371206034152e-06, "loss": 0.8991, "step": 99830 }, { "epoch": 0.7227084192924927, "grad_norm": 0.19024856388568878, "learning_rate": 4.277298819373566e-06, "loss": 0.8886, "step": 99840 }, { "epoch": 0.7227808059530789, "grad_norm": 0.1468716859817505, "learning_rate": 4.2772264327129795e-06, "loss": 0.8967, "step": 99850 }, { "epoch": 0.7228531926136652, "grad_norm": 0.15970027446746826, "learning_rate": 4.277154046052394e-06, "loss": 0.8892, "step": 99860 }, { "epoch": 0.7229255792742514, "grad_norm": 0.1565774530172348, "learning_rate": 4.2770816593918075e-06, "loss": 0.8853, "step": 99870 }, { "epoch": 0.7229979659348376, "grad_norm": 0.19593319296836853, "learning_rate": 4.277009272731221e-06, "loss": 0.8866, "step": 99880 }, { "epoch": 0.7230703525954237, "grad_norm": 0.14920704066753387, "learning_rate": 4.276936886070635e-06, "loss": 0.8842, "step": 99890 }, { "epoch": 0.7231427392560099, "grad_norm": 0.18510152399539948, "learning_rate": 4.276864499410049e-06, "loss": 0.8763, "step": 99900 }, { "epoch": 0.7232151259165961, "grad_norm": 0.16031725704669952, "learning_rate": 4.276792112749463e-06, "loss": 0.8836, "step": 99910 }, { "epoch": 0.7232875125771823, "grad_norm": 0.18583592772483826, "learning_rate": 4.2767197260888765e-06, "loss": 0.8894, "step": 99920 }, { "epoch": 0.7233598992377684, "grad_norm": 0.15419869124889374, "learning_rate": 4.27664733942829e-06, "loss": 0.8858, "step": 99930 }, { "epoch": 0.7234322858983546, "grad_norm": 0.15521828830242157, "learning_rate": 4.2765749527677045e-06, "loss": 0.893, "step": 99940 }, { "epoch": 0.7235046725589408, "grad_norm": 0.15606524050235748, "learning_rate": 4.276502566107118e-06, "loss": 0.9011, "step": 99950 }, { "epoch": 0.723577059219527, "grad_norm": 0.15376631915569305, "learning_rate": 4.276430179446532e-06, "loss": 0.8898, "step": 99960 }, { "epoch": 0.7236494458801133, "grad_norm": 0.14439062774181366, "learning_rate": 4.276357792785945e-06, "loss": 0.8911, "step": 99970 }, { "epoch": 0.7237218325406994, "grad_norm": 0.1527758240699768, "learning_rate": 4.27628540612536e-06, "loss": 0.8911, "step": 99980 }, { "epoch": 0.7237942192012856, "grad_norm": 0.15964572131633759, "learning_rate": 4.2762130194647735e-06, "loss": 0.8951, "step": 99990 }, { "epoch": 0.7238666058618718, "grad_norm": 0.15122631192207336, "learning_rate": 4.276140632804187e-06, "loss": 0.8813, "step": 100000 }, { "epoch": 0.723938992522458, "grad_norm": 0.16392894089221954, "learning_rate": 4.276068246143601e-06, "loss": 0.8888, "step": 100010 }, { "epoch": 0.7240113791830441, "grad_norm": 0.1608874499797821, "learning_rate": 4.275995859483015e-06, "loss": 0.8825, "step": 100020 }, { "epoch": 0.7240837658436303, "grad_norm": 0.16042576730251312, "learning_rate": 4.275923472822429e-06, "loss": 0.8939, "step": 100030 }, { "epoch": 0.7241561525042165, "grad_norm": 0.15833930671215057, "learning_rate": 4.275851086161842e-06, "loss": 0.8763, "step": 100040 }, { "epoch": 0.7242285391648027, "grad_norm": 0.1503579467535019, "learning_rate": 4.275778699501256e-06, "loss": 0.8852, "step": 100050 }, { "epoch": 0.7243009258253889, "grad_norm": 0.1671973466873169, "learning_rate": 4.2757063128406705e-06, "loss": 0.8856, "step": 100060 }, { "epoch": 0.724373312485975, "grad_norm": 0.14335432648658752, "learning_rate": 4.275633926180084e-06, "loss": 0.8775, "step": 100070 }, { "epoch": 0.7244456991465613, "grad_norm": 0.1621866524219513, "learning_rate": 4.275561539519498e-06, "loss": 0.8868, "step": 100080 }, { "epoch": 0.7245180858071475, "grad_norm": 0.1560746431350708, "learning_rate": 4.275489152858911e-06, "loss": 0.8857, "step": 100090 }, { "epoch": 0.7245904724677337, "grad_norm": 0.1637454777956009, "learning_rate": 4.275416766198325e-06, "loss": 0.8863, "step": 100100 }, { "epoch": 0.7246628591283198, "grad_norm": 0.16514307260513306, "learning_rate": 4.275344379537739e-06, "loss": 0.893, "step": 100110 }, { "epoch": 0.724735245788906, "grad_norm": 0.15878024697303772, "learning_rate": 4.275271992877153e-06, "loss": 0.8852, "step": 100120 }, { "epoch": 0.7248076324494922, "grad_norm": 0.15798494219779968, "learning_rate": 4.275199606216567e-06, "loss": 0.8922, "step": 100130 }, { "epoch": 0.7248800191100784, "grad_norm": 0.1547410637140274, "learning_rate": 4.27512721955598e-06, "loss": 0.8779, "step": 100140 }, { "epoch": 0.7249524057706646, "grad_norm": 0.14897017180919647, "learning_rate": 4.275054832895395e-06, "loss": 0.8924, "step": 100150 }, { "epoch": 0.7250247924312507, "grad_norm": 0.1668456345796585, "learning_rate": 4.274982446234808e-06, "loss": 0.8803, "step": 100160 }, { "epoch": 0.7250971790918369, "grad_norm": 0.1529316008090973, "learning_rate": 4.274910059574222e-06, "loss": 0.8984, "step": 100170 }, { "epoch": 0.7251695657524232, "grad_norm": 0.14754995703697205, "learning_rate": 4.2748376729136356e-06, "loss": 0.8735, "step": 100180 }, { "epoch": 0.7252419524130094, "grad_norm": 0.16086558997631073, "learning_rate": 4.27476528625305e-06, "loss": 0.8852, "step": 100190 }, { "epoch": 0.7253143390735955, "grad_norm": 0.16596639156341553, "learning_rate": 4.274692899592464e-06, "loss": 0.9067, "step": 100200 }, { "epoch": 0.7253867257341817, "grad_norm": 0.18739081919193268, "learning_rate": 4.274620512931877e-06, "loss": 0.8872, "step": 100210 }, { "epoch": 0.7254591123947679, "grad_norm": 0.15418687462806702, "learning_rate": 4.274548126271291e-06, "loss": 0.8934, "step": 100220 }, { "epoch": 0.7255314990553541, "grad_norm": 0.16099806129932404, "learning_rate": 4.274475739610705e-06, "loss": 0.8943, "step": 100230 }, { "epoch": 0.7256038857159403, "grad_norm": 0.15143683552742004, "learning_rate": 4.274403352950119e-06, "loss": 0.8825, "step": 100240 }, { "epoch": 0.7256762723765264, "grad_norm": 0.15264058113098145, "learning_rate": 4.2743309662895326e-06, "loss": 0.8817, "step": 100250 }, { "epoch": 0.7257486590371126, "grad_norm": 0.1594517081975937, "learning_rate": 4.274258579628946e-06, "loss": 0.8968, "step": 100260 }, { "epoch": 0.7258210456976988, "grad_norm": 0.1539693921804428, "learning_rate": 4.274186192968361e-06, "loss": 0.8994, "step": 100270 }, { "epoch": 0.725893432358285, "grad_norm": 0.1556989848613739, "learning_rate": 4.274113806307774e-06, "loss": 0.8952, "step": 100280 }, { "epoch": 0.7259658190188713, "grad_norm": 0.15270110964775085, "learning_rate": 4.274041419647188e-06, "loss": 0.8867, "step": 100290 }, { "epoch": 0.7260382056794574, "grad_norm": 0.16615337133407593, "learning_rate": 4.2739690329866015e-06, "loss": 0.881, "step": 100300 }, { "epoch": 0.7261105923400436, "grad_norm": 0.15638002753257751, "learning_rate": 4.273896646326016e-06, "loss": 0.8889, "step": 100310 }, { "epoch": 0.7261829790006298, "grad_norm": 0.1828213930130005, "learning_rate": 4.27382425966543e-06, "loss": 0.8921, "step": 100320 }, { "epoch": 0.726255365661216, "grad_norm": 0.14612999558448792, "learning_rate": 4.273751873004843e-06, "loss": 0.889, "step": 100330 }, { "epoch": 0.7263277523218021, "grad_norm": 0.15252217650413513, "learning_rate": 4.273679486344257e-06, "loss": 0.8879, "step": 100340 }, { "epoch": 0.7264001389823883, "grad_norm": 0.1537008434534073, "learning_rate": 4.2736070996836704e-06, "loss": 0.899, "step": 100350 }, { "epoch": 0.7264725256429745, "grad_norm": 0.1628774255514145, "learning_rate": 4.273534713023084e-06, "loss": 0.8934, "step": 100360 }, { "epoch": 0.7265449123035607, "grad_norm": 0.1571381837129593, "learning_rate": 4.273462326362498e-06, "loss": 0.8825, "step": 100370 }, { "epoch": 0.7266172989641468, "grad_norm": 0.16100090742111206, "learning_rate": 4.273389939701912e-06, "loss": 0.8901, "step": 100380 }, { "epoch": 0.7266896856247331, "grad_norm": 0.14620473980903625, "learning_rate": 4.273317553041326e-06, "loss": 0.8886, "step": 100390 }, { "epoch": 0.7267620722853193, "grad_norm": 0.15393929183483124, "learning_rate": 4.273245166380739e-06, "loss": 0.8974, "step": 100400 }, { "epoch": 0.7268344589459055, "grad_norm": 0.16305720806121826, "learning_rate": 4.273172779720153e-06, "loss": 0.884, "step": 100410 }, { "epoch": 0.7269068456064917, "grad_norm": 0.1579166203737259, "learning_rate": 4.2731003930595674e-06, "loss": 0.8762, "step": 100420 }, { "epoch": 0.7269792322670778, "grad_norm": 0.14451733231544495, "learning_rate": 4.273028006398981e-06, "loss": 0.8893, "step": 100430 }, { "epoch": 0.727051618927664, "grad_norm": 0.14748463034629822, "learning_rate": 4.272955619738395e-06, "loss": 0.8861, "step": 100440 }, { "epoch": 0.7271240055882502, "grad_norm": 0.16086864471435547, "learning_rate": 4.272883233077808e-06, "loss": 0.8885, "step": 100450 }, { "epoch": 0.7271963922488364, "grad_norm": 0.14903460443019867, "learning_rate": 4.272810846417223e-06, "loss": 0.8832, "step": 100460 }, { "epoch": 0.7272687789094225, "grad_norm": 0.16146931052207947, "learning_rate": 4.272738459756636e-06, "loss": 0.8856, "step": 100470 }, { "epoch": 0.7273411655700087, "grad_norm": 0.1918579488992691, "learning_rate": 4.27266607309605e-06, "loss": 0.8953, "step": 100480 }, { "epoch": 0.7274135522305949, "grad_norm": 0.1559261530637741, "learning_rate": 4.272593686435464e-06, "loss": 0.8855, "step": 100490 }, { "epoch": 0.7274859388911812, "grad_norm": 0.15730531513690948, "learning_rate": 4.272521299774878e-06, "loss": 0.8946, "step": 100500 }, { "epoch": 0.7275583255517674, "grad_norm": 0.1542884111404419, "learning_rate": 4.272448913114292e-06, "loss": 0.8932, "step": 100510 }, { "epoch": 0.7276307122123535, "grad_norm": 0.15835349261760712, "learning_rate": 4.272376526453705e-06, "loss": 0.903, "step": 100520 }, { "epoch": 0.7277030988729397, "grad_norm": 0.1627766340970993, "learning_rate": 4.272304139793119e-06, "loss": 0.8901, "step": 100530 }, { "epoch": 0.7277754855335259, "grad_norm": 0.15794923901557922, "learning_rate": 4.272231753132533e-06, "loss": 0.8939, "step": 100540 }, { "epoch": 0.7278478721941121, "grad_norm": 0.15838219225406647, "learning_rate": 4.272159366471947e-06, "loss": 0.8933, "step": 100550 }, { "epoch": 0.7279202588546982, "grad_norm": 0.15067455172538757, "learning_rate": 4.272086979811361e-06, "loss": 0.8892, "step": 100560 }, { "epoch": 0.7279926455152844, "grad_norm": 0.158193901181221, "learning_rate": 4.272014593150774e-06, "loss": 0.8793, "step": 100570 }, { "epoch": 0.7280650321758706, "grad_norm": 0.15686438977718353, "learning_rate": 4.271942206490189e-06, "loss": 0.8775, "step": 100580 }, { "epoch": 0.7281374188364568, "grad_norm": 0.1513579934835434, "learning_rate": 4.271869819829602e-06, "loss": 0.8954, "step": 100590 }, { "epoch": 0.728209805497043, "grad_norm": 0.15484221279621124, "learning_rate": 4.271797433169016e-06, "loss": 0.8944, "step": 100600 }, { "epoch": 0.7282821921576292, "grad_norm": 0.14668524265289307, "learning_rate": 4.2717250465084295e-06, "loss": 0.8753, "step": 100610 }, { "epoch": 0.7283545788182154, "grad_norm": 0.1556718647480011, "learning_rate": 4.271652659847844e-06, "loss": 0.8875, "step": 100620 }, { "epoch": 0.7284269654788016, "grad_norm": 0.16115809977054596, "learning_rate": 4.271580273187258e-06, "loss": 0.8909, "step": 100630 }, { "epoch": 0.7284993521393878, "grad_norm": 0.152574360370636, "learning_rate": 4.271507886526671e-06, "loss": 0.8916, "step": 100640 }, { "epoch": 0.728571738799974, "grad_norm": 0.16087447106838226, "learning_rate": 4.271435499866085e-06, "loss": 0.8859, "step": 100650 }, { "epoch": 0.7286441254605601, "grad_norm": 0.21453756093978882, "learning_rate": 4.271363113205499e-06, "loss": 0.8862, "step": 100660 }, { "epoch": 0.7287165121211463, "grad_norm": 0.156850203871727, "learning_rate": 4.271290726544913e-06, "loss": 0.8844, "step": 100670 }, { "epoch": 0.7287888987817325, "grad_norm": 0.16212424635887146, "learning_rate": 4.2712183398843265e-06, "loss": 0.8892, "step": 100680 }, { "epoch": 0.7288612854423187, "grad_norm": 0.1557214856147766, "learning_rate": 4.27114595322374e-06, "loss": 0.8843, "step": 100690 }, { "epoch": 0.7289336721029048, "grad_norm": 0.1972065418958664, "learning_rate": 4.271073566563154e-06, "loss": 0.8747, "step": 100700 }, { "epoch": 0.7290060587634911, "grad_norm": 0.1524554044008255, "learning_rate": 4.271001179902568e-06, "loss": 0.8857, "step": 100710 }, { "epoch": 0.7290784454240773, "grad_norm": 0.15759634971618652, "learning_rate": 4.270928793241982e-06, "loss": 0.8856, "step": 100720 }, { "epoch": 0.7291508320846635, "grad_norm": 0.1459013819694519, "learning_rate": 4.2708564065813955e-06, "loss": 0.8737, "step": 100730 }, { "epoch": 0.7292232187452496, "grad_norm": 0.15204448997974396, "learning_rate": 4.270784019920809e-06, "loss": 0.8969, "step": 100740 }, { "epoch": 0.7292956054058358, "grad_norm": 0.16425342857837677, "learning_rate": 4.2707116332602235e-06, "loss": 0.8876, "step": 100750 }, { "epoch": 0.729367992066422, "grad_norm": 0.16584093868732452, "learning_rate": 4.270639246599637e-06, "loss": 0.885, "step": 100760 }, { "epoch": 0.7294403787270082, "grad_norm": 0.16400320827960968, "learning_rate": 4.270566859939051e-06, "loss": 0.8939, "step": 100770 }, { "epoch": 0.7295127653875944, "grad_norm": 0.15366198122501373, "learning_rate": 4.270494473278464e-06, "loss": 0.8716, "step": 100780 }, { "epoch": 0.7295851520481805, "grad_norm": 0.15150944888591766, "learning_rate": 4.270422086617879e-06, "loss": 0.89, "step": 100790 }, { "epoch": 0.7296575387087667, "grad_norm": 0.1443614810705185, "learning_rate": 4.2703496999572925e-06, "loss": 0.8889, "step": 100800 }, { "epoch": 0.7297299253693529, "grad_norm": 0.15036936104297638, "learning_rate": 4.270277313296706e-06, "loss": 0.9045, "step": 100810 }, { "epoch": 0.7298023120299392, "grad_norm": 0.16487093269824982, "learning_rate": 4.27020492663612e-06, "loss": 0.8815, "step": 100820 }, { "epoch": 0.7298746986905253, "grad_norm": 0.15365450084209442, "learning_rate": 4.270132539975534e-06, "loss": 0.894, "step": 100830 }, { "epoch": 0.7299470853511115, "grad_norm": 0.1608169972896576, "learning_rate": 4.270060153314948e-06, "loss": 0.9013, "step": 100840 }, { "epoch": 0.7300194720116977, "grad_norm": 0.16800221800804138, "learning_rate": 4.269987766654361e-06, "loss": 0.8778, "step": 100850 }, { "epoch": 0.7300918586722839, "grad_norm": 0.15907716751098633, "learning_rate": 4.269915379993775e-06, "loss": 0.8859, "step": 100860 }, { "epoch": 0.73016424533287, "grad_norm": 0.15400275588035583, "learning_rate": 4.269842993333189e-06, "loss": 0.8904, "step": 100870 }, { "epoch": 0.7302366319934562, "grad_norm": 0.1577254682779312, "learning_rate": 4.269770606672602e-06, "loss": 0.8833, "step": 100880 }, { "epoch": 0.7303090186540424, "grad_norm": 0.1583620309829712, "learning_rate": 4.269698220012016e-06, "loss": 0.8808, "step": 100890 }, { "epoch": 0.7303814053146286, "grad_norm": 0.1512048840522766, "learning_rate": 4.26962583335143e-06, "loss": 0.892, "step": 100900 }, { "epoch": 0.7304537919752148, "grad_norm": 0.1588258147239685, "learning_rate": 4.269553446690844e-06, "loss": 0.8959, "step": 100910 }, { "epoch": 0.730526178635801, "grad_norm": 0.15298683941364288, "learning_rate": 4.2694810600302576e-06, "loss": 0.885, "step": 100920 }, { "epoch": 0.7305985652963872, "grad_norm": 0.15516585111618042, "learning_rate": 4.269408673369671e-06, "loss": 0.8925, "step": 100930 }, { "epoch": 0.7306709519569734, "grad_norm": 0.1673891842365265, "learning_rate": 4.269336286709086e-06, "loss": 0.9016, "step": 100940 }, { "epoch": 0.7307433386175596, "grad_norm": 0.16964946687221527, "learning_rate": 4.269263900048499e-06, "loss": 0.9035, "step": 100950 }, { "epoch": 0.7308157252781458, "grad_norm": 0.21331481635570526, "learning_rate": 4.269191513387913e-06, "loss": 0.8832, "step": 100960 }, { "epoch": 0.7308881119387319, "grad_norm": 0.1504168063402176, "learning_rate": 4.2691191267273265e-06, "loss": 0.8759, "step": 100970 }, { "epoch": 0.7309604985993181, "grad_norm": 0.14905038475990295, "learning_rate": 4.269046740066741e-06, "loss": 0.887, "step": 100980 }, { "epoch": 0.7310328852599043, "grad_norm": 0.1641206294298172, "learning_rate": 4.2689743534061546e-06, "loss": 0.8813, "step": 100990 }, { "epoch": 0.7311052719204905, "grad_norm": 0.3293008804321289, "learning_rate": 4.268901966745568e-06, "loss": 0.874, "step": 101000 }, { "epoch": 0.7311776585810766, "grad_norm": 0.1844899207353592, "learning_rate": 4.268829580084982e-06, "loss": 0.8987, "step": 101010 }, { "epoch": 0.7312500452416628, "grad_norm": 0.17655082046985626, "learning_rate": 4.268757193424396e-06, "loss": 0.8979, "step": 101020 }, { "epoch": 0.7313224319022491, "grad_norm": 0.15127238631248474, "learning_rate": 4.26868480676381e-06, "loss": 0.8825, "step": 101030 }, { "epoch": 0.7313948185628353, "grad_norm": 0.15606975555419922, "learning_rate": 4.2686124201032235e-06, "loss": 0.8874, "step": 101040 }, { "epoch": 0.7314672052234215, "grad_norm": 0.1606336385011673, "learning_rate": 4.268540033442637e-06, "loss": 0.8774, "step": 101050 }, { "epoch": 0.7315395918840076, "grad_norm": 0.16057442128658295, "learning_rate": 4.268467646782052e-06, "loss": 0.8981, "step": 101060 }, { "epoch": 0.7316119785445938, "grad_norm": 0.1635402888059616, "learning_rate": 4.268395260121465e-06, "loss": 0.8952, "step": 101070 }, { "epoch": 0.73168436520518, "grad_norm": 0.1442507654428482, "learning_rate": 4.268322873460879e-06, "loss": 0.8981, "step": 101080 }, { "epoch": 0.7317567518657662, "grad_norm": 0.13882668316364288, "learning_rate": 4.2682504868002924e-06, "loss": 0.8905, "step": 101090 }, { "epoch": 0.7318291385263523, "grad_norm": 0.15066657960414886, "learning_rate": 4.268178100139707e-06, "loss": 0.8987, "step": 101100 }, { "epoch": 0.7319015251869385, "grad_norm": 0.1645701825618744, "learning_rate": 4.2681057134791205e-06, "loss": 0.8886, "step": 101110 }, { "epoch": 0.7319739118475247, "grad_norm": 0.15226510167121887, "learning_rate": 4.268033326818534e-06, "loss": 0.8932, "step": 101120 }, { "epoch": 0.7320462985081109, "grad_norm": 0.15741746127605438, "learning_rate": 4.267960940157948e-06, "loss": 0.8793, "step": 101130 }, { "epoch": 0.7321186851686972, "grad_norm": 0.15548457205295563, "learning_rate": 4.267888553497362e-06, "loss": 0.881, "step": 101140 }, { "epoch": 0.7321910718292833, "grad_norm": 0.2032490223646164, "learning_rate": 4.267816166836776e-06, "loss": 0.8916, "step": 101150 }, { "epoch": 0.7322634584898695, "grad_norm": 0.2026352435350418, "learning_rate": 4.2677437801761894e-06, "loss": 0.8932, "step": 101160 }, { "epoch": 0.7323358451504557, "grad_norm": 0.15487812459468842, "learning_rate": 4.267671393515603e-06, "loss": 0.8805, "step": 101170 }, { "epoch": 0.7324082318110419, "grad_norm": 0.16896316409111023, "learning_rate": 4.2675990068550175e-06, "loss": 0.8811, "step": 101180 }, { "epoch": 0.732480618471628, "grad_norm": 0.20788662135601044, "learning_rate": 4.267526620194431e-06, "loss": 0.8858, "step": 101190 }, { "epoch": 0.7325530051322142, "grad_norm": 0.15774105489253998, "learning_rate": 4.267454233533845e-06, "loss": 0.9004, "step": 101200 }, { "epoch": 0.7326253917928004, "grad_norm": 0.1576681286096573, "learning_rate": 4.267381846873258e-06, "loss": 0.9003, "step": 101210 }, { "epoch": 0.7326977784533866, "grad_norm": 0.19305309653282166, "learning_rate": 4.267309460212673e-06, "loss": 0.894, "step": 101220 }, { "epoch": 0.7327701651139727, "grad_norm": 0.16262653470039368, "learning_rate": 4.2672370735520864e-06, "loss": 0.8774, "step": 101230 }, { "epoch": 0.732842551774559, "grad_norm": 0.14455868303775787, "learning_rate": 4.2671646868915e-06, "loss": 0.8963, "step": 101240 }, { "epoch": 0.7329149384351452, "grad_norm": 0.19319617748260498, "learning_rate": 4.267092300230914e-06, "loss": 0.8832, "step": 101250 }, { "epoch": 0.7329873250957314, "grad_norm": 0.2210075408220291, "learning_rate": 4.267019913570328e-06, "loss": 0.8884, "step": 101260 }, { "epoch": 0.7330597117563176, "grad_norm": 0.16288064420223236, "learning_rate": 4.266947526909742e-06, "loss": 0.8762, "step": 101270 }, { "epoch": 0.7331320984169037, "grad_norm": 0.16447566449642181, "learning_rate": 4.266875140249155e-06, "loss": 0.8895, "step": 101280 }, { "epoch": 0.7332044850774899, "grad_norm": 0.15744997560977936, "learning_rate": 4.266802753588569e-06, "loss": 0.8948, "step": 101290 }, { "epoch": 0.7332768717380761, "grad_norm": 0.1907864809036255, "learning_rate": 4.2667303669279835e-06, "loss": 0.8816, "step": 101300 }, { "epoch": 0.7333492583986623, "grad_norm": 0.15406233072280884, "learning_rate": 4.266657980267397e-06, "loss": 0.8851, "step": 101310 }, { "epoch": 0.7334216450592485, "grad_norm": 0.16773687303066254, "learning_rate": 4.266585593606811e-06, "loss": 0.8864, "step": 101320 }, { "epoch": 0.7334940317198346, "grad_norm": 0.15521180629730225, "learning_rate": 4.266513206946224e-06, "loss": 0.8834, "step": 101330 }, { "epoch": 0.7335664183804208, "grad_norm": 0.15735216438770294, "learning_rate": 4.266440820285638e-06, "loss": 0.8814, "step": 101340 }, { "epoch": 0.7336388050410071, "grad_norm": 0.14961345493793488, "learning_rate": 4.266368433625052e-06, "loss": 0.8863, "step": 101350 }, { "epoch": 0.7337111917015933, "grad_norm": 0.15976914763450623, "learning_rate": 4.266296046964466e-06, "loss": 0.8944, "step": 101360 }, { "epoch": 0.7337835783621794, "grad_norm": 0.16493217647075653, "learning_rate": 4.26622366030388e-06, "loss": 0.8897, "step": 101370 }, { "epoch": 0.7338559650227656, "grad_norm": 0.15834347903728485, "learning_rate": 4.266151273643293e-06, "loss": 0.8842, "step": 101380 }, { "epoch": 0.7339283516833518, "grad_norm": 0.14904192090034485, "learning_rate": 4.266078886982708e-06, "loss": 0.8796, "step": 101390 }, { "epoch": 0.734000738343938, "grad_norm": 0.16918309032917023, "learning_rate": 4.2660065003221205e-06, "loss": 0.8919, "step": 101400 }, { "epoch": 0.7340731250045242, "grad_norm": 0.14453521370887756, "learning_rate": 4.265934113661535e-06, "loss": 0.888, "step": 101410 }, { "epoch": 0.7341455116651103, "grad_norm": 0.15393070876598358, "learning_rate": 4.2658617270009485e-06, "loss": 0.8833, "step": 101420 }, { "epoch": 0.7342178983256965, "grad_norm": 0.1721397191286087, "learning_rate": 4.265789340340362e-06, "loss": 0.8918, "step": 101430 }, { "epoch": 0.7342902849862827, "grad_norm": 0.15190811455249786, "learning_rate": 4.265716953679776e-06, "loss": 0.8889, "step": 101440 }, { "epoch": 0.734362671646869, "grad_norm": 0.14311714470386505, "learning_rate": 4.26564456701919e-06, "loss": 0.8907, "step": 101450 }, { "epoch": 0.7344350583074551, "grad_norm": 0.17075756192207336, "learning_rate": 4.265572180358604e-06, "loss": 0.8994, "step": 101460 }, { "epoch": 0.7345074449680413, "grad_norm": 0.1581360548734665, "learning_rate": 4.2654997936980175e-06, "loss": 0.8876, "step": 101470 }, { "epoch": 0.7345798316286275, "grad_norm": 0.15147803723812103, "learning_rate": 4.265427407037431e-06, "loss": 0.9041, "step": 101480 }, { "epoch": 0.7346522182892137, "grad_norm": 0.161724254488945, "learning_rate": 4.265355020376845e-06, "loss": 0.8929, "step": 101490 }, { "epoch": 0.7347246049497999, "grad_norm": 0.1595461070537567, "learning_rate": 4.265282633716259e-06, "loss": 0.8882, "step": 101500 }, { "epoch": 0.734796991610386, "grad_norm": 0.15513767302036285, "learning_rate": 4.265210247055673e-06, "loss": 0.8998, "step": 101510 }, { "epoch": 0.7348693782709722, "grad_norm": 0.15473343431949615, "learning_rate": 4.265137860395086e-06, "loss": 0.9025, "step": 101520 }, { "epoch": 0.7349417649315584, "grad_norm": 0.16891096532344818, "learning_rate": 4.2650654737345e-06, "loss": 0.8868, "step": 101530 }, { "epoch": 0.7350141515921446, "grad_norm": 0.15746982395648956, "learning_rate": 4.2649930870739145e-06, "loss": 0.9042, "step": 101540 }, { "epoch": 0.7350865382527307, "grad_norm": 0.16814929246902466, "learning_rate": 4.264920700413328e-06, "loss": 0.9029, "step": 101550 }, { "epoch": 0.735158924913317, "grad_norm": 0.14766864478588104, "learning_rate": 4.264848313752742e-06, "loss": 0.8741, "step": 101560 }, { "epoch": 0.7352313115739032, "grad_norm": 0.16124971210956573, "learning_rate": 4.264775927092155e-06, "loss": 0.8771, "step": 101570 }, { "epoch": 0.7353036982344894, "grad_norm": 0.16348862648010254, "learning_rate": 4.26470354043157e-06, "loss": 0.8955, "step": 101580 }, { "epoch": 0.7353760848950756, "grad_norm": 0.16436149179935455, "learning_rate": 4.264631153770983e-06, "loss": 0.8889, "step": 101590 }, { "epoch": 0.7354484715556617, "grad_norm": 0.14332278072834015, "learning_rate": 4.264558767110397e-06, "loss": 0.8866, "step": 101600 }, { "epoch": 0.7355208582162479, "grad_norm": 0.15618641674518585, "learning_rate": 4.264486380449811e-06, "loss": 0.8853, "step": 101610 }, { "epoch": 0.7355932448768341, "grad_norm": 0.1528848558664322, "learning_rate": 4.264413993789225e-06, "loss": 0.8882, "step": 101620 }, { "epoch": 0.7356656315374203, "grad_norm": 0.1625969558954239, "learning_rate": 4.264341607128639e-06, "loss": 0.8774, "step": 101630 }, { "epoch": 0.7357380181980064, "grad_norm": 0.16516810655593872, "learning_rate": 4.264269220468052e-06, "loss": 0.887, "step": 101640 }, { "epoch": 0.7358104048585926, "grad_norm": 0.1680750846862793, "learning_rate": 4.264196833807466e-06, "loss": 0.887, "step": 101650 }, { "epoch": 0.7358827915191788, "grad_norm": 0.16871851682662964, "learning_rate": 4.26412444714688e-06, "loss": 0.8886, "step": 101660 }, { "epoch": 0.7359551781797651, "grad_norm": 0.15388178825378418, "learning_rate": 4.264052060486294e-06, "loss": 0.8906, "step": 101670 }, { "epoch": 0.7360275648403513, "grad_norm": 0.15053468942642212, "learning_rate": 4.263979673825708e-06, "loss": 0.8882, "step": 101680 }, { "epoch": 0.7360999515009374, "grad_norm": 0.16372324526309967, "learning_rate": 4.263907287165121e-06, "loss": 0.8683, "step": 101690 }, { "epoch": 0.7361723381615236, "grad_norm": 0.1610361635684967, "learning_rate": 4.263834900504536e-06, "loss": 0.8854, "step": 101700 }, { "epoch": 0.7362447248221098, "grad_norm": 0.16573579609394073, "learning_rate": 4.263762513843949e-06, "loss": 0.9022, "step": 101710 }, { "epoch": 0.736317111482696, "grad_norm": 0.16359181702136993, "learning_rate": 4.263690127183363e-06, "loss": 0.8873, "step": 101720 }, { "epoch": 0.7363894981432821, "grad_norm": 0.296243280172348, "learning_rate": 4.2636177405227766e-06, "loss": 0.8911, "step": 101730 }, { "epoch": 0.7364618848038683, "grad_norm": 0.16400732100009918, "learning_rate": 4.263545353862191e-06, "loss": 0.8859, "step": 101740 }, { "epoch": 0.7365342714644545, "grad_norm": 0.15146301686763763, "learning_rate": 4.263472967201605e-06, "loss": 0.8912, "step": 101750 }, { "epoch": 0.7366066581250407, "grad_norm": 0.1514797806739807, "learning_rate": 4.263400580541018e-06, "loss": 0.8805, "step": 101760 }, { "epoch": 0.736679044785627, "grad_norm": 0.15184786915779114, "learning_rate": 4.263328193880432e-06, "loss": 0.8869, "step": 101770 }, { "epoch": 0.7367514314462131, "grad_norm": 0.1572055220603943, "learning_rate": 4.263255807219846e-06, "loss": 0.8827, "step": 101780 }, { "epoch": 0.7368238181067993, "grad_norm": 0.16821490228176117, "learning_rate": 4.26318342055926e-06, "loss": 0.9025, "step": 101790 }, { "epoch": 0.7368962047673855, "grad_norm": 0.14948932826519012, "learning_rate": 4.263111033898674e-06, "loss": 0.8809, "step": 101800 }, { "epoch": 0.7369685914279717, "grad_norm": 0.1437656432390213, "learning_rate": 4.263038647238087e-06, "loss": 0.894, "step": 101810 }, { "epoch": 0.7370409780885578, "grad_norm": 0.16621340811252594, "learning_rate": 4.262966260577502e-06, "loss": 0.881, "step": 101820 }, { "epoch": 0.737113364749144, "grad_norm": 0.14426559209823608, "learning_rate": 4.262893873916915e-06, "loss": 0.8927, "step": 101830 }, { "epoch": 0.7371857514097302, "grad_norm": 0.16779235005378723, "learning_rate": 4.262821487256329e-06, "loss": 0.889, "step": 101840 }, { "epoch": 0.7372581380703164, "grad_norm": 0.14878766238689423, "learning_rate": 4.2627491005957425e-06, "loss": 0.8903, "step": 101850 }, { "epoch": 0.7373305247309025, "grad_norm": 0.15575487911701202, "learning_rate": 4.262676713935157e-06, "loss": 0.8779, "step": 101860 }, { "epoch": 0.7374029113914887, "grad_norm": 0.1539398729801178, "learning_rate": 4.262604327274571e-06, "loss": 0.8843, "step": 101870 }, { "epoch": 0.737475298052075, "grad_norm": 0.1627013087272644, "learning_rate": 4.262531940613984e-06, "loss": 0.8893, "step": 101880 }, { "epoch": 0.7375476847126612, "grad_norm": 0.17876599729061127, "learning_rate": 4.262459553953398e-06, "loss": 0.8884, "step": 101890 }, { "epoch": 0.7376200713732474, "grad_norm": 0.16160640120506287, "learning_rate": 4.262387167292812e-06, "loss": 0.8803, "step": 101900 }, { "epoch": 0.7376924580338335, "grad_norm": 0.18297509849071503, "learning_rate": 4.262314780632226e-06, "loss": 0.8956, "step": 101910 }, { "epoch": 0.7377648446944197, "grad_norm": 0.1643095761537552, "learning_rate": 4.2622423939716395e-06, "loss": 0.8855, "step": 101920 }, { "epoch": 0.7378372313550059, "grad_norm": 0.17934858798980713, "learning_rate": 4.262170007311053e-06, "loss": 0.882, "step": 101930 }, { "epoch": 0.7379096180155921, "grad_norm": 0.15763209760189056, "learning_rate": 4.262097620650467e-06, "loss": 0.8848, "step": 101940 }, { "epoch": 0.7379820046761782, "grad_norm": 0.1518256813287735, "learning_rate": 4.26202523398988e-06, "loss": 0.8955, "step": 101950 }, { "epoch": 0.7380543913367644, "grad_norm": 0.15647168457508087, "learning_rate": 4.261952847329294e-06, "loss": 0.9097, "step": 101960 }, { "epoch": 0.7381267779973506, "grad_norm": 0.15015296638011932, "learning_rate": 4.2618804606687084e-06, "loss": 0.8752, "step": 101970 }, { "epoch": 0.7381991646579369, "grad_norm": 0.15466606616973877, "learning_rate": 4.261808074008122e-06, "loss": 0.8849, "step": 101980 }, { "epoch": 0.7382715513185231, "grad_norm": 0.17152267694473267, "learning_rate": 4.261735687347536e-06, "loss": 0.8728, "step": 101990 }, { "epoch": 0.7383439379791092, "grad_norm": 0.14666572213172913, "learning_rate": 4.261663300686949e-06, "loss": 0.8757, "step": 102000 }, { "epoch": 0.7384163246396954, "grad_norm": 0.1615079790353775, "learning_rate": 4.261590914026364e-06, "loss": 0.895, "step": 102010 }, { "epoch": 0.7384887113002816, "grad_norm": 0.17585870623588562, "learning_rate": 4.261518527365777e-06, "loss": 0.8775, "step": 102020 }, { "epoch": 0.7385610979608678, "grad_norm": 0.177878275513649, "learning_rate": 4.261446140705191e-06, "loss": 0.8664, "step": 102030 }, { "epoch": 0.738633484621454, "grad_norm": 0.15934938192367554, "learning_rate": 4.261373754044605e-06, "loss": 0.8739, "step": 102040 }, { "epoch": 0.7387058712820401, "grad_norm": 0.1503947675228119, "learning_rate": 4.261301367384019e-06, "loss": 0.8869, "step": 102050 }, { "epoch": 0.7387782579426263, "grad_norm": 0.1672418862581253, "learning_rate": 4.261228980723433e-06, "loss": 0.8923, "step": 102060 }, { "epoch": 0.7388506446032125, "grad_norm": 0.15783722698688507, "learning_rate": 4.261156594062846e-06, "loss": 0.9022, "step": 102070 }, { "epoch": 0.7389230312637987, "grad_norm": 0.15607894957065582, "learning_rate": 4.26108420740226e-06, "loss": 0.8853, "step": 102080 }, { "epoch": 0.738995417924385, "grad_norm": 0.14554698765277863, "learning_rate": 4.261011820741674e-06, "loss": 0.8896, "step": 102090 }, { "epoch": 0.7390678045849711, "grad_norm": 0.15600426495075226, "learning_rate": 4.260939434081088e-06, "loss": 0.8755, "step": 102100 }, { "epoch": 0.7391401912455573, "grad_norm": 0.14329098165035248, "learning_rate": 4.260867047420502e-06, "loss": 0.9022, "step": 102110 }, { "epoch": 0.7392125779061435, "grad_norm": 0.1529379040002823, "learning_rate": 4.260794660759915e-06, "loss": 0.8871, "step": 102120 }, { "epoch": 0.7392849645667297, "grad_norm": 0.15350185334682465, "learning_rate": 4.260722274099329e-06, "loss": 0.8908, "step": 102130 }, { "epoch": 0.7393573512273158, "grad_norm": 0.17035044729709625, "learning_rate": 4.260649887438743e-06, "loss": 0.8809, "step": 102140 }, { "epoch": 0.739429737887902, "grad_norm": 0.14859919250011444, "learning_rate": 4.260577500778157e-06, "loss": 0.901, "step": 102150 }, { "epoch": 0.7395021245484882, "grad_norm": 0.15911567211151123, "learning_rate": 4.2605051141175705e-06, "loss": 0.8832, "step": 102160 }, { "epoch": 0.7395745112090744, "grad_norm": 0.17544224858283997, "learning_rate": 4.260432727456984e-06, "loss": 0.884, "step": 102170 }, { "epoch": 0.7396468978696605, "grad_norm": 0.15622149407863617, "learning_rate": 4.260360340796399e-06, "loss": 0.8888, "step": 102180 }, { "epoch": 0.7397192845302467, "grad_norm": 0.20478832721710205, "learning_rate": 4.260287954135812e-06, "loss": 0.8916, "step": 102190 }, { "epoch": 0.739791671190833, "grad_norm": 0.15085692703723907, "learning_rate": 4.260215567475226e-06, "loss": 0.8811, "step": 102200 }, { "epoch": 0.7398640578514192, "grad_norm": 0.15468019247055054, "learning_rate": 4.2601431808146395e-06, "loss": 0.8901, "step": 102210 }, { "epoch": 0.7399364445120054, "grad_norm": 0.15799905359745026, "learning_rate": 4.260070794154054e-06, "loss": 0.8872, "step": 102220 }, { "epoch": 0.7400088311725915, "grad_norm": 0.14824278652668, "learning_rate": 4.2599984074934675e-06, "loss": 0.8929, "step": 102230 }, { "epoch": 0.7400812178331777, "grad_norm": 0.15417352318763733, "learning_rate": 4.259926020832881e-06, "loss": 0.8917, "step": 102240 }, { "epoch": 0.7401536044937639, "grad_norm": 0.16246527433395386, "learning_rate": 4.259853634172295e-06, "loss": 0.8828, "step": 102250 }, { "epoch": 0.7402259911543501, "grad_norm": 0.14646829664707184, "learning_rate": 4.259781247511709e-06, "loss": 0.8966, "step": 102260 }, { "epoch": 0.7402983778149362, "grad_norm": 0.15677151083946228, "learning_rate": 4.259708860851123e-06, "loss": 0.8897, "step": 102270 }, { "epoch": 0.7403707644755224, "grad_norm": 0.16189172863960266, "learning_rate": 4.2596364741905365e-06, "loss": 0.8789, "step": 102280 }, { "epoch": 0.7404431511361086, "grad_norm": 0.15842679142951965, "learning_rate": 4.25956408752995e-06, "loss": 0.8876, "step": 102290 }, { "epoch": 0.7405155377966949, "grad_norm": 0.14995113015174866, "learning_rate": 4.2594917008693646e-06, "loss": 0.8881, "step": 102300 }, { "epoch": 0.740587924457281, "grad_norm": 0.1539173424243927, "learning_rate": 4.259419314208778e-06, "loss": 0.8848, "step": 102310 }, { "epoch": 0.7406603111178672, "grad_norm": 0.17772093415260315, "learning_rate": 4.259346927548192e-06, "loss": 0.8902, "step": 102320 }, { "epoch": 0.7407326977784534, "grad_norm": 0.1554667055606842, "learning_rate": 4.259274540887605e-06, "loss": 0.8865, "step": 102330 }, { "epoch": 0.7408050844390396, "grad_norm": 0.15641778707504272, "learning_rate": 4.25920215422702e-06, "loss": 0.9031, "step": 102340 }, { "epoch": 0.7408774710996258, "grad_norm": 0.15458346903324127, "learning_rate": 4.2591297675664335e-06, "loss": 0.8843, "step": 102350 }, { "epoch": 0.7409498577602119, "grad_norm": 0.15070627629756927, "learning_rate": 4.259057380905847e-06, "loss": 0.8975, "step": 102360 }, { "epoch": 0.7410222444207981, "grad_norm": 0.15110282599925995, "learning_rate": 4.258984994245261e-06, "loss": 0.8928, "step": 102370 }, { "epoch": 0.7410946310813843, "grad_norm": 0.1538836807012558, "learning_rate": 4.258912607584675e-06, "loss": 0.8946, "step": 102380 }, { "epoch": 0.7411670177419705, "grad_norm": 0.15167924761772156, "learning_rate": 4.258840220924089e-06, "loss": 0.8839, "step": 102390 }, { "epoch": 0.7412394044025566, "grad_norm": 0.15393514931201935, "learning_rate": 4.258767834263502e-06, "loss": 0.887, "step": 102400 }, { "epoch": 0.7413117910631429, "grad_norm": 0.1549709439277649, "learning_rate": 4.258695447602916e-06, "loss": 0.8871, "step": 102410 }, { "epoch": 0.7413841777237291, "grad_norm": 0.16167686879634857, "learning_rate": 4.2586230609423305e-06, "loss": 0.8783, "step": 102420 }, { "epoch": 0.7414565643843153, "grad_norm": 0.15924765169620514, "learning_rate": 4.258550674281744e-06, "loss": 0.8769, "step": 102430 }, { "epoch": 0.7415289510449015, "grad_norm": 0.15444104373455048, "learning_rate": 4.258478287621158e-06, "loss": 0.8888, "step": 102440 }, { "epoch": 0.7416013377054876, "grad_norm": 0.14428524672985077, "learning_rate": 4.258405900960571e-06, "loss": 0.8683, "step": 102450 }, { "epoch": 0.7416737243660738, "grad_norm": 0.16138869524002075, "learning_rate": 4.258333514299985e-06, "loss": 0.8838, "step": 102460 }, { "epoch": 0.74174611102666, "grad_norm": 0.16185913980007172, "learning_rate": 4.2582611276393986e-06, "loss": 0.8833, "step": 102470 }, { "epoch": 0.7418184976872462, "grad_norm": 0.1485951989889145, "learning_rate": 4.258188740978812e-06, "loss": 0.8956, "step": 102480 }, { "epoch": 0.7418908843478323, "grad_norm": 0.16537578403949738, "learning_rate": 4.258116354318227e-06, "loss": 0.8927, "step": 102490 }, { "epoch": 0.7419632710084185, "grad_norm": 0.14754781126976013, "learning_rate": 4.25804396765764e-06, "loss": 0.881, "step": 102500 }, { "epoch": 0.7420356576690048, "grad_norm": 0.14885316789150238, "learning_rate": 4.257971580997054e-06, "loss": 0.8927, "step": 102510 }, { "epoch": 0.742108044329591, "grad_norm": 0.17432914674282074, "learning_rate": 4.2578991943364675e-06, "loss": 0.8965, "step": 102520 }, { "epoch": 0.7421804309901772, "grad_norm": 0.1665223240852356, "learning_rate": 4.257826807675882e-06, "loss": 0.8862, "step": 102530 }, { "epoch": 0.7422528176507633, "grad_norm": 0.15308286249637604, "learning_rate": 4.257754421015296e-06, "loss": 0.8913, "step": 102540 }, { "epoch": 0.7423252043113495, "grad_norm": 0.1635119467973709, "learning_rate": 4.257682034354709e-06, "loss": 0.8989, "step": 102550 }, { "epoch": 0.7423975909719357, "grad_norm": 0.15072479844093323, "learning_rate": 4.257609647694123e-06, "loss": 0.8836, "step": 102560 }, { "epoch": 0.7424699776325219, "grad_norm": 0.17337563633918762, "learning_rate": 4.257537261033537e-06, "loss": 0.8954, "step": 102570 }, { "epoch": 0.742542364293108, "grad_norm": 0.14887109398841858, "learning_rate": 4.257464874372951e-06, "loss": 0.8954, "step": 102580 }, { "epoch": 0.7426147509536942, "grad_norm": 0.16400760412216187, "learning_rate": 4.2573924877123645e-06, "loss": 0.8847, "step": 102590 }, { "epoch": 0.7426871376142804, "grad_norm": 0.15407805144786835, "learning_rate": 4.257320101051778e-06, "loss": 0.8885, "step": 102600 }, { "epoch": 0.7427595242748666, "grad_norm": 0.15420646965503693, "learning_rate": 4.257247714391193e-06, "loss": 0.8844, "step": 102610 }, { "epoch": 0.7428319109354529, "grad_norm": 0.15996070206165314, "learning_rate": 4.257175327730606e-06, "loss": 0.8854, "step": 102620 }, { "epoch": 0.742904297596039, "grad_norm": 0.16860343515872955, "learning_rate": 4.25710294107002e-06, "loss": 0.8879, "step": 102630 }, { "epoch": 0.7429766842566252, "grad_norm": 0.15210293233394623, "learning_rate": 4.2570305544094334e-06, "loss": 0.8922, "step": 102640 }, { "epoch": 0.7430490709172114, "grad_norm": 0.17375917732715607, "learning_rate": 4.256958167748848e-06, "loss": 0.8811, "step": 102650 }, { "epoch": 0.7431214575777976, "grad_norm": 0.16159164905548096, "learning_rate": 4.2568857810882615e-06, "loss": 0.8704, "step": 102660 }, { "epoch": 0.7431938442383837, "grad_norm": 0.1597278118133545, "learning_rate": 4.256813394427675e-06, "loss": 0.8723, "step": 102670 }, { "epoch": 0.7432662308989699, "grad_norm": 0.16430126130580902, "learning_rate": 4.256741007767089e-06, "loss": 0.8836, "step": 102680 }, { "epoch": 0.7433386175595561, "grad_norm": 0.15877282619476318, "learning_rate": 4.256668621106503e-06, "loss": 0.887, "step": 102690 }, { "epoch": 0.7434110042201423, "grad_norm": 0.15375186502933502, "learning_rate": 4.256596234445917e-06, "loss": 0.8792, "step": 102700 }, { "epoch": 0.7434833908807285, "grad_norm": 0.177475243806839, "learning_rate": 4.2565238477853304e-06, "loss": 0.8842, "step": 102710 }, { "epoch": 0.7435557775413146, "grad_norm": 0.14837265014648438, "learning_rate": 4.256451461124744e-06, "loss": 0.8891, "step": 102720 }, { "epoch": 0.7436281642019009, "grad_norm": 0.1412384957075119, "learning_rate": 4.2563790744641585e-06, "loss": 0.8849, "step": 102730 }, { "epoch": 0.7437005508624871, "grad_norm": 0.15624116361141205, "learning_rate": 4.256306687803572e-06, "loss": 0.893, "step": 102740 }, { "epoch": 0.7437729375230733, "grad_norm": 0.15126636624336243, "learning_rate": 4.256234301142986e-06, "loss": 0.8776, "step": 102750 }, { "epoch": 0.7438453241836595, "grad_norm": 0.1584903746843338, "learning_rate": 4.256161914482399e-06, "loss": 0.8855, "step": 102760 }, { "epoch": 0.7439177108442456, "grad_norm": 0.1627122163772583, "learning_rate": 4.256089527821813e-06, "loss": 0.89, "step": 102770 }, { "epoch": 0.7439900975048318, "grad_norm": 0.15770822763442993, "learning_rate": 4.2560171411612274e-06, "loss": 0.8928, "step": 102780 }, { "epoch": 0.744062484165418, "grad_norm": 0.1587304323911667, "learning_rate": 4.255944754500641e-06, "loss": 0.8867, "step": 102790 }, { "epoch": 0.7441348708260042, "grad_norm": 0.15314078330993652, "learning_rate": 4.255872367840055e-06, "loss": 0.8957, "step": 102800 }, { "epoch": 0.7442072574865903, "grad_norm": 0.14915263652801514, "learning_rate": 4.255799981179468e-06, "loss": 0.8805, "step": 102810 }, { "epoch": 0.7442796441471765, "grad_norm": 0.14680497348308563, "learning_rate": 4.255727594518883e-06, "loss": 0.8882, "step": 102820 }, { "epoch": 0.7443520308077628, "grad_norm": 0.15903522074222565, "learning_rate": 4.255655207858296e-06, "loss": 0.8976, "step": 102830 }, { "epoch": 0.744424417468349, "grad_norm": 0.1559790074825287, "learning_rate": 4.25558282119771e-06, "loss": 0.8856, "step": 102840 }, { "epoch": 0.7444968041289352, "grad_norm": 0.15489821135997772, "learning_rate": 4.255510434537124e-06, "loss": 0.8878, "step": 102850 }, { "epoch": 0.7445691907895213, "grad_norm": 0.15823949873447418, "learning_rate": 4.255438047876538e-06, "loss": 0.8764, "step": 102860 }, { "epoch": 0.7446415774501075, "grad_norm": 0.1716890037059784, "learning_rate": 4.255365661215952e-06, "loss": 0.8994, "step": 102870 }, { "epoch": 0.7447139641106937, "grad_norm": 0.15007559955120087, "learning_rate": 4.255293274555365e-06, "loss": 0.8947, "step": 102880 }, { "epoch": 0.7447863507712799, "grad_norm": 0.16091489791870117, "learning_rate": 4.255220887894779e-06, "loss": 0.8847, "step": 102890 }, { "epoch": 0.744858737431866, "grad_norm": 0.1675153523683548, "learning_rate": 4.255148501234193e-06, "loss": 0.8804, "step": 102900 }, { "epoch": 0.7449311240924522, "grad_norm": 0.15456238389015198, "learning_rate": 4.255076114573607e-06, "loss": 0.8986, "step": 102910 }, { "epoch": 0.7450035107530384, "grad_norm": 0.15441204607486725, "learning_rate": 4.255003727913021e-06, "loss": 0.8742, "step": 102920 }, { "epoch": 0.7450758974136246, "grad_norm": 0.16027353703975677, "learning_rate": 4.254931341252434e-06, "loss": 0.8856, "step": 102930 }, { "epoch": 0.7451482840742109, "grad_norm": 0.16300475597381592, "learning_rate": 4.254858954591849e-06, "loss": 0.882, "step": 102940 }, { "epoch": 0.745220670734797, "grad_norm": 0.15692980587482452, "learning_rate": 4.254786567931262e-06, "loss": 0.8875, "step": 102950 }, { "epoch": 0.7452930573953832, "grad_norm": 0.16681601107120514, "learning_rate": 4.254714181270676e-06, "loss": 0.8866, "step": 102960 }, { "epoch": 0.7453654440559694, "grad_norm": 0.16296398639678955, "learning_rate": 4.2546417946100895e-06, "loss": 0.8969, "step": 102970 }, { "epoch": 0.7454378307165556, "grad_norm": 0.1497073471546173, "learning_rate": 4.254569407949504e-06, "loss": 0.8949, "step": 102980 }, { "epoch": 0.7455102173771417, "grad_norm": 0.17505697906017303, "learning_rate": 4.254497021288917e-06, "loss": 0.8935, "step": 102990 }, { "epoch": 0.7455826040377279, "grad_norm": 0.15840458869934082, "learning_rate": 4.25442463462833e-06, "loss": 0.8957, "step": 103000 }, { "epoch": 0.7456549906983141, "grad_norm": 0.1545899212360382, "learning_rate": 4.254352247967745e-06, "loss": 0.8853, "step": 103010 }, { "epoch": 0.7457273773589003, "grad_norm": 0.1966148018836975, "learning_rate": 4.2542798613071585e-06, "loss": 0.8883, "step": 103020 }, { "epoch": 0.7457997640194864, "grad_norm": 0.1721884310245514, "learning_rate": 4.254207474646572e-06, "loss": 0.8887, "step": 103030 }, { "epoch": 0.7458721506800726, "grad_norm": 0.19111919403076172, "learning_rate": 4.254135087985986e-06, "loss": 0.8829, "step": 103040 }, { "epoch": 0.7459445373406589, "grad_norm": 0.15273134410381317, "learning_rate": 4.2540627013254e-06, "loss": 0.8952, "step": 103050 }, { "epoch": 0.7460169240012451, "grad_norm": 0.16259609162807465, "learning_rate": 4.253990314664814e-06, "loss": 0.8866, "step": 103060 }, { "epoch": 0.7460893106618313, "grad_norm": 0.15013474225997925, "learning_rate": 4.253917928004227e-06, "loss": 0.8792, "step": 103070 }, { "epoch": 0.7461616973224174, "grad_norm": 0.17235596477985382, "learning_rate": 4.253845541343641e-06, "loss": 0.9079, "step": 103080 }, { "epoch": 0.7462340839830036, "grad_norm": 0.16108207404613495, "learning_rate": 4.2537731546830555e-06, "loss": 0.8797, "step": 103090 }, { "epoch": 0.7463064706435898, "grad_norm": 0.16072994470596313, "learning_rate": 4.253700768022469e-06, "loss": 0.8804, "step": 103100 }, { "epoch": 0.746378857304176, "grad_norm": 0.1662396937608719, "learning_rate": 4.253628381361883e-06, "loss": 0.8866, "step": 103110 }, { "epoch": 0.7464512439647621, "grad_norm": 0.16367369890213013, "learning_rate": 4.253555994701296e-06, "loss": 0.8975, "step": 103120 }, { "epoch": 0.7465236306253483, "grad_norm": 0.15681147575378418, "learning_rate": 4.253483608040711e-06, "loss": 0.8995, "step": 103130 }, { "epoch": 0.7465960172859345, "grad_norm": 0.1485588252544403, "learning_rate": 4.253411221380124e-06, "loss": 0.8863, "step": 103140 }, { "epoch": 0.7466684039465208, "grad_norm": 0.1582004874944687, "learning_rate": 4.253338834719538e-06, "loss": 0.8895, "step": 103150 }, { "epoch": 0.746740790607107, "grad_norm": 0.17631734907627106, "learning_rate": 4.253266448058952e-06, "loss": 0.8993, "step": 103160 }, { "epoch": 0.7468131772676931, "grad_norm": 0.15485930442810059, "learning_rate": 4.253194061398366e-06, "loss": 0.8882, "step": 103170 }, { "epoch": 0.7468855639282793, "grad_norm": 0.18264351785182953, "learning_rate": 4.25312167473778e-06, "loss": 0.8989, "step": 103180 }, { "epoch": 0.7469579505888655, "grad_norm": 0.15158303081989288, "learning_rate": 4.253049288077193e-06, "loss": 0.9056, "step": 103190 }, { "epoch": 0.7470303372494517, "grad_norm": 0.1658327281475067, "learning_rate": 4.252976901416607e-06, "loss": 0.8896, "step": 103200 }, { "epoch": 0.7471027239100378, "grad_norm": 0.15178616344928741, "learning_rate": 4.252904514756021e-06, "loss": 0.8784, "step": 103210 }, { "epoch": 0.747175110570624, "grad_norm": 0.15876880288124084, "learning_rate": 4.252832128095435e-06, "loss": 0.8802, "step": 103220 }, { "epoch": 0.7472474972312102, "grad_norm": 0.15720294415950775, "learning_rate": 4.252759741434849e-06, "loss": 0.8837, "step": 103230 }, { "epoch": 0.7473198838917964, "grad_norm": 0.16239741444587708, "learning_rate": 4.252687354774262e-06, "loss": 0.8917, "step": 103240 }, { "epoch": 0.7473922705523826, "grad_norm": 0.19867613911628723, "learning_rate": 4.252614968113677e-06, "loss": 0.8822, "step": 103250 }, { "epoch": 0.7474646572129688, "grad_norm": 0.15661980211734772, "learning_rate": 4.25254258145309e-06, "loss": 0.8789, "step": 103260 }, { "epoch": 0.747537043873555, "grad_norm": 0.16807293891906738, "learning_rate": 4.252470194792504e-06, "loss": 0.9001, "step": 103270 }, { "epoch": 0.7476094305341412, "grad_norm": 0.15940342843532562, "learning_rate": 4.252397808131918e-06, "loss": 0.8969, "step": 103280 }, { "epoch": 0.7476818171947274, "grad_norm": 0.1640319973230362, "learning_rate": 4.252325421471332e-06, "loss": 0.8795, "step": 103290 }, { "epoch": 0.7477542038553135, "grad_norm": 0.17689263820648193, "learning_rate": 4.252253034810746e-06, "loss": 0.891, "step": 103300 }, { "epoch": 0.7478265905158997, "grad_norm": 0.14825983345508575, "learning_rate": 4.252180648150159e-06, "loss": 0.8791, "step": 103310 }, { "epoch": 0.7478989771764859, "grad_norm": 0.162586972117424, "learning_rate": 4.252108261489573e-06, "loss": 0.8809, "step": 103320 }, { "epoch": 0.7479713638370721, "grad_norm": 0.1549188792705536, "learning_rate": 4.252035874828987e-06, "loss": 0.8842, "step": 103330 }, { "epoch": 0.7480437504976583, "grad_norm": 0.1467401683330536, "learning_rate": 4.251963488168401e-06, "loss": 0.8806, "step": 103340 }, { "epoch": 0.7481161371582444, "grad_norm": 0.16220062971115112, "learning_rate": 4.251891101507815e-06, "loss": 0.8836, "step": 103350 }, { "epoch": 0.7481885238188307, "grad_norm": 0.1533544659614563, "learning_rate": 4.251818714847228e-06, "loss": 0.8901, "step": 103360 }, { "epoch": 0.7482609104794169, "grad_norm": 0.1560657024383545, "learning_rate": 4.251746328186642e-06, "loss": 0.8769, "step": 103370 }, { "epoch": 0.7483332971400031, "grad_norm": 0.1645650416612625, "learning_rate": 4.251673941526056e-06, "loss": 0.8804, "step": 103380 }, { "epoch": 0.7484056838005892, "grad_norm": 0.1500735580921173, "learning_rate": 4.25160155486547e-06, "loss": 0.8849, "step": 103390 }, { "epoch": 0.7484780704611754, "grad_norm": 0.1593628227710724, "learning_rate": 4.2515291682048835e-06, "loss": 0.8793, "step": 103400 }, { "epoch": 0.7485504571217616, "grad_norm": 0.16192486882209778, "learning_rate": 4.251456781544297e-06, "loss": 0.8873, "step": 103410 }, { "epoch": 0.7486228437823478, "grad_norm": 0.147709459066391, "learning_rate": 4.251384394883712e-06, "loss": 0.8904, "step": 103420 }, { "epoch": 0.748695230442934, "grad_norm": 0.16684521734714508, "learning_rate": 4.251312008223125e-06, "loss": 0.895, "step": 103430 }, { "epoch": 0.7487676171035201, "grad_norm": 0.14957979321479797, "learning_rate": 4.251239621562539e-06, "loss": 0.8818, "step": 103440 }, { "epoch": 0.7488400037641063, "grad_norm": 0.15316711366176605, "learning_rate": 4.2511672349019524e-06, "loss": 0.8816, "step": 103450 }, { "epoch": 0.7489123904246925, "grad_norm": 0.15910635888576508, "learning_rate": 4.251094848241367e-06, "loss": 0.8801, "step": 103460 }, { "epoch": 0.7489847770852788, "grad_norm": 0.1638316661119461, "learning_rate": 4.2510224615807805e-06, "loss": 0.8842, "step": 103470 }, { "epoch": 0.749057163745865, "grad_norm": 0.15128594636917114, "learning_rate": 4.250950074920194e-06, "loss": 0.8978, "step": 103480 }, { "epoch": 0.7491295504064511, "grad_norm": 0.15200836956501007, "learning_rate": 4.250877688259608e-06, "loss": 0.8843, "step": 103490 }, { "epoch": 0.7492019370670373, "grad_norm": 0.19085319340229034, "learning_rate": 4.250805301599022e-06, "loss": 0.8784, "step": 103500 }, { "epoch": 0.7492743237276235, "grad_norm": 0.15487466752529144, "learning_rate": 4.250732914938436e-06, "loss": 0.8954, "step": 103510 }, { "epoch": 0.7493467103882097, "grad_norm": 0.15988904237747192, "learning_rate": 4.2506605282778494e-06, "loss": 0.8951, "step": 103520 }, { "epoch": 0.7494190970487958, "grad_norm": 0.1502448171377182, "learning_rate": 4.250588141617263e-06, "loss": 0.8985, "step": 103530 }, { "epoch": 0.749491483709382, "grad_norm": 0.153715580701828, "learning_rate": 4.250515754956677e-06, "loss": 0.8955, "step": 103540 }, { "epoch": 0.7495638703699682, "grad_norm": 0.1608363538980484, "learning_rate": 4.25044336829609e-06, "loss": 0.8897, "step": 103550 }, { "epoch": 0.7496362570305544, "grad_norm": 0.1529805064201355, "learning_rate": 4.250370981635504e-06, "loss": 0.8936, "step": 103560 }, { "epoch": 0.7497086436911405, "grad_norm": 0.15650229156017303, "learning_rate": 4.250298594974918e-06, "loss": 0.8815, "step": 103570 }, { "epoch": 0.7497810303517268, "grad_norm": 0.1722974181175232, "learning_rate": 4.250226208314332e-06, "loss": 0.9026, "step": 103580 }, { "epoch": 0.749853417012313, "grad_norm": 0.16492018103599548, "learning_rate": 4.250153821653746e-06, "loss": 0.8814, "step": 103590 }, { "epoch": 0.7499258036728992, "grad_norm": 0.15420758724212646, "learning_rate": 4.250081434993159e-06, "loss": 0.888, "step": 103600 }, { "epoch": 0.7499981903334854, "grad_norm": 0.15207520127296448, "learning_rate": 4.250009048332574e-06, "loss": 0.8778, "step": 103610 }, { "epoch": 0.7500705769940715, "grad_norm": 0.15864045917987823, "learning_rate": 4.249936661671987e-06, "loss": 0.885, "step": 103620 }, { "epoch": 0.7501429636546577, "grad_norm": 0.16581547260284424, "learning_rate": 4.249864275011401e-06, "loss": 0.8845, "step": 103630 }, { "epoch": 0.7502153503152439, "grad_norm": 0.16255062818527222, "learning_rate": 4.2497918883508145e-06, "loss": 0.8849, "step": 103640 }, { "epoch": 0.7502877369758301, "grad_norm": 0.15730057656764984, "learning_rate": 4.249719501690229e-06, "loss": 0.8975, "step": 103650 }, { "epoch": 0.7503601236364162, "grad_norm": 0.16164420545101166, "learning_rate": 4.249647115029643e-06, "loss": 0.8865, "step": 103660 }, { "epoch": 0.7504325102970024, "grad_norm": 0.1671602427959442, "learning_rate": 4.249574728369056e-06, "loss": 0.8888, "step": 103670 }, { "epoch": 0.7505048969575887, "grad_norm": 0.1699591726064682, "learning_rate": 4.24950234170847e-06, "loss": 0.87, "step": 103680 }, { "epoch": 0.7505772836181749, "grad_norm": 0.16800114512443542, "learning_rate": 4.249429955047884e-06, "loss": 0.9011, "step": 103690 }, { "epoch": 0.7506496702787611, "grad_norm": 0.18705791234970093, "learning_rate": 4.249357568387298e-06, "loss": 0.8766, "step": 103700 }, { "epoch": 0.7507220569393472, "grad_norm": 0.1909557431936264, "learning_rate": 4.2492851817267115e-06, "loss": 0.9045, "step": 103710 }, { "epoch": 0.7507944435999334, "grad_norm": 0.15778642892837524, "learning_rate": 4.249212795066125e-06, "loss": 0.8715, "step": 103720 }, { "epoch": 0.7508668302605196, "grad_norm": 0.1488863229751587, "learning_rate": 4.24914040840554e-06, "loss": 0.8756, "step": 103730 }, { "epoch": 0.7509392169211058, "grad_norm": 0.15307924151420593, "learning_rate": 4.249068021744953e-06, "loss": 0.8738, "step": 103740 }, { "epoch": 0.7510116035816919, "grad_norm": 0.18600323796272278, "learning_rate": 4.248995635084367e-06, "loss": 0.8883, "step": 103750 }, { "epoch": 0.7510839902422781, "grad_norm": 0.16874092817306519, "learning_rate": 4.2489232484237805e-06, "loss": 0.8835, "step": 103760 }, { "epoch": 0.7511563769028643, "grad_norm": 0.1458248794078827, "learning_rate": 4.248850861763195e-06, "loss": 0.878, "step": 103770 }, { "epoch": 0.7512287635634505, "grad_norm": 0.1547769457101822, "learning_rate": 4.2487784751026086e-06, "loss": 0.888, "step": 103780 }, { "epoch": 0.7513011502240368, "grad_norm": 0.15332648158073425, "learning_rate": 4.248706088442022e-06, "loss": 0.8808, "step": 103790 }, { "epoch": 0.7513735368846229, "grad_norm": 0.16886559128761292, "learning_rate": 4.248633701781436e-06, "loss": 0.884, "step": 103800 }, { "epoch": 0.7514459235452091, "grad_norm": 0.16709497570991516, "learning_rate": 4.24856131512085e-06, "loss": 0.8972, "step": 103810 }, { "epoch": 0.7515183102057953, "grad_norm": 0.15135438740253448, "learning_rate": 4.248488928460264e-06, "loss": 0.8969, "step": 103820 }, { "epoch": 0.7515906968663815, "grad_norm": 0.15672151744365692, "learning_rate": 4.2484165417996775e-06, "loss": 0.893, "step": 103830 }, { "epoch": 0.7516630835269676, "grad_norm": 0.14277760684490204, "learning_rate": 4.248344155139091e-06, "loss": 0.8874, "step": 103840 }, { "epoch": 0.7517354701875538, "grad_norm": 0.15540048480033875, "learning_rate": 4.2482717684785056e-06, "loss": 0.8904, "step": 103850 }, { "epoch": 0.75180785684814, "grad_norm": 0.14748062193393707, "learning_rate": 4.248199381817919e-06, "loss": 0.8929, "step": 103860 }, { "epoch": 0.7518802435087262, "grad_norm": 0.154066801071167, "learning_rate": 4.248126995157333e-06, "loss": 0.8819, "step": 103870 }, { "epoch": 0.7519526301693124, "grad_norm": 0.15664027631282806, "learning_rate": 4.248054608496746e-06, "loss": 0.8937, "step": 103880 }, { "epoch": 0.7520250168298986, "grad_norm": 0.15651507675647736, "learning_rate": 4.247982221836161e-06, "loss": 0.8824, "step": 103890 }, { "epoch": 0.7520974034904848, "grad_norm": 0.15763552486896515, "learning_rate": 4.2479098351755745e-06, "loss": 0.8743, "step": 103900 }, { "epoch": 0.752169790151071, "grad_norm": 0.15285298228263855, "learning_rate": 4.247837448514988e-06, "loss": 0.8947, "step": 103910 }, { "epoch": 0.7522421768116572, "grad_norm": 0.15118758380413055, "learning_rate": 4.247765061854402e-06, "loss": 0.8839, "step": 103920 }, { "epoch": 0.7523145634722433, "grad_norm": 0.16061747074127197, "learning_rate": 4.247692675193816e-06, "loss": 0.8882, "step": 103930 }, { "epoch": 0.7523869501328295, "grad_norm": 0.15958862006664276, "learning_rate": 4.24762028853323e-06, "loss": 0.8741, "step": 103940 }, { "epoch": 0.7524593367934157, "grad_norm": 0.152791365981102, "learning_rate": 4.247547901872643e-06, "loss": 0.8921, "step": 103950 }, { "epoch": 0.7525317234540019, "grad_norm": 0.15019190311431885, "learning_rate": 4.247475515212057e-06, "loss": 0.8939, "step": 103960 }, { "epoch": 0.752604110114588, "grad_norm": 0.15106353163719177, "learning_rate": 4.2474031285514715e-06, "loss": 0.877, "step": 103970 }, { "epoch": 0.7526764967751742, "grad_norm": 0.15266717970371246, "learning_rate": 4.247330741890885e-06, "loss": 0.8791, "step": 103980 }, { "epoch": 0.7527488834357604, "grad_norm": 0.16882237792015076, "learning_rate": 4.247258355230299e-06, "loss": 0.8827, "step": 103990 }, { "epoch": 0.7528212700963467, "grad_norm": 0.15489928424358368, "learning_rate": 4.247185968569712e-06, "loss": 0.8958, "step": 104000 }, { "epoch": 0.7528936567569329, "grad_norm": 0.15682126581668854, "learning_rate": 4.247113581909126e-06, "loss": 0.8914, "step": 104010 }, { "epoch": 0.752966043417519, "grad_norm": 0.1847575455904007, "learning_rate": 4.24704119524854e-06, "loss": 0.8897, "step": 104020 }, { "epoch": 0.7530384300781052, "grad_norm": 0.15872369706630707, "learning_rate": 4.246968808587954e-06, "loss": 0.8898, "step": 104030 }, { "epoch": 0.7531108167386914, "grad_norm": 0.16287031769752502, "learning_rate": 4.246896421927368e-06, "loss": 0.8857, "step": 104040 }, { "epoch": 0.7531832033992776, "grad_norm": 0.14909902215003967, "learning_rate": 4.246824035266781e-06, "loss": 0.8943, "step": 104050 }, { "epoch": 0.7532555900598638, "grad_norm": 0.1613544374704361, "learning_rate": 4.246751648606195e-06, "loss": 0.8926, "step": 104060 }, { "epoch": 0.7533279767204499, "grad_norm": 0.14487162232398987, "learning_rate": 4.2466792619456085e-06, "loss": 0.8836, "step": 104070 }, { "epoch": 0.7534003633810361, "grad_norm": 0.16244520246982574, "learning_rate": 4.246606875285023e-06, "loss": 0.8857, "step": 104080 }, { "epoch": 0.7534727500416223, "grad_norm": 0.15103542804718018, "learning_rate": 4.246534488624437e-06, "loss": 0.8759, "step": 104090 }, { "epoch": 0.7535451367022085, "grad_norm": 0.15711627900600433, "learning_rate": 4.24646210196385e-06, "loss": 0.8965, "step": 104100 }, { "epoch": 0.7536175233627948, "grad_norm": 0.15874861180782318, "learning_rate": 4.246389715303264e-06, "loss": 0.9034, "step": 104110 }, { "epoch": 0.7536899100233809, "grad_norm": 0.14917586743831635, "learning_rate": 4.246317328642678e-06, "loss": 0.8858, "step": 104120 }, { "epoch": 0.7537622966839671, "grad_norm": 0.14904716610908508, "learning_rate": 4.246244941982092e-06, "loss": 0.8882, "step": 104130 }, { "epoch": 0.7538346833445533, "grad_norm": 0.15895779430866241, "learning_rate": 4.2461725553215055e-06, "loss": 0.8892, "step": 104140 }, { "epoch": 0.7539070700051395, "grad_norm": 0.2372187227010727, "learning_rate": 4.246100168660919e-06, "loss": 0.89, "step": 104150 }, { "epoch": 0.7539794566657256, "grad_norm": 0.18698617815971375, "learning_rate": 4.246027782000333e-06, "loss": 0.9019, "step": 104160 }, { "epoch": 0.7540518433263118, "grad_norm": 0.16438809037208557, "learning_rate": 4.245955395339747e-06, "loss": 0.8853, "step": 104170 }, { "epoch": 0.754124229986898, "grad_norm": 0.17748364806175232, "learning_rate": 4.245883008679161e-06, "loss": 0.8724, "step": 104180 }, { "epoch": 0.7541966166474842, "grad_norm": 0.14284367859363556, "learning_rate": 4.2458106220185744e-06, "loss": 0.889, "step": 104190 }, { "epoch": 0.7542690033080703, "grad_norm": 0.15544088184833527, "learning_rate": 4.245738235357988e-06, "loss": 0.875, "step": 104200 }, { "epoch": 0.7543413899686566, "grad_norm": 0.16390086710453033, "learning_rate": 4.2456658486974025e-06, "loss": 0.8853, "step": 104210 }, { "epoch": 0.7544137766292428, "grad_norm": 0.15978281199932098, "learning_rate": 4.245593462036816e-06, "loss": 0.8848, "step": 104220 }, { "epoch": 0.754486163289829, "grad_norm": 0.1489153504371643, "learning_rate": 4.24552107537623e-06, "loss": 0.8817, "step": 104230 }, { "epoch": 0.7545585499504152, "grad_norm": 0.15438929200172424, "learning_rate": 4.245448688715643e-06, "loss": 0.8807, "step": 104240 }, { "epoch": 0.7546309366110013, "grad_norm": 0.18427646160125732, "learning_rate": 4.245376302055058e-06, "loss": 0.89, "step": 104250 }, { "epoch": 0.7547033232715875, "grad_norm": 0.15060459077358246, "learning_rate": 4.2453039153944714e-06, "loss": 0.8951, "step": 104260 }, { "epoch": 0.7547757099321737, "grad_norm": 0.16309115290641785, "learning_rate": 4.245231528733885e-06, "loss": 0.8921, "step": 104270 }, { "epoch": 0.7548480965927599, "grad_norm": 0.15942274034023285, "learning_rate": 4.245159142073299e-06, "loss": 0.8872, "step": 104280 }, { "epoch": 0.754920483253346, "grad_norm": 0.1642897129058838, "learning_rate": 4.245086755412713e-06, "loss": 0.8889, "step": 104290 }, { "epoch": 0.7549928699139322, "grad_norm": 0.15134483575820923, "learning_rate": 4.245014368752127e-06, "loss": 0.8769, "step": 104300 }, { "epoch": 0.7550652565745184, "grad_norm": 0.16363342106342316, "learning_rate": 4.24494198209154e-06, "loss": 0.8751, "step": 104310 }, { "epoch": 0.7551376432351047, "grad_norm": 0.1584373414516449, "learning_rate": 4.244869595430954e-06, "loss": 0.8846, "step": 104320 }, { "epoch": 0.7552100298956909, "grad_norm": 0.15007972717285156, "learning_rate": 4.2447972087703685e-06, "loss": 0.8924, "step": 104330 }, { "epoch": 0.755282416556277, "grad_norm": 0.16009405255317688, "learning_rate": 4.244724822109782e-06, "loss": 0.8788, "step": 104340 }, { "epoch": 0.7553548032168632, "grad_norm": 0.14448218047618866, "learning_rate": 4.244652435449196e-06, "loss": 0.8778, "step": 104350 }, { "epoch": 0.7554271898774494, "grad_norm": 0.1468927413225174, "learning_rate": 4.244580048788609e-06, "loss": 0.8914, "step": 104360 }, { "epoch": 0.7554995765380356, "grad_norm": 0.14824378490447998, "learning_rate": 4.244507662128024e-06, "loss": 0.8868, "step": 104370 }, { "epoch": 0.7555719631986217, "grad_norm": 0.15983250737190247, "learning_rate": 4.244435275467437e-06, "loss": 0.8888, "step": 104380 }, { "epoch": 0.7556443498592079, "grad_norm": 0.15000095963478088, "learning_rate": 4.244362888806851e-06, "loss": 0.9001, "step": 104390 }, { "epoch": 0.7557167365197941, "grad_norm": 0.15501455962657928, "learning_rate": 4.244290502146265e-06, "loss": 0.8791, "step": 104400 }, { "epoch": 0.7557891231803803, "grad_norm": 0.14795927703380585, "learning_rate": 4.244218115485679e-06, "loss": 0.8769, "step": 104410 }, { "epoch": 0.7558615098409666, "grad_norm": 0.16325373947620392, "learning_rate": 4.244145728825093e-06, "loss": 0.8827, "step": 104420 }, { "epoch": 0.7559338965015527, "grad_norm": 0.15596868097782135, "learning_rate": 4.244073342164506e-06, "loss": 0.8761, "step": 104430 }, { "epoch": 0.7560062831621389, "grad_norm": 0.14845876395702362, "learning_rate": 4.24400095550392e-06, "loss": 0.8798, "step": 104440 }, { "epoch": 0.7560786698227251, "grad_norm": 0.1506635695695877, "learning_rate": 4.243928568843334e-06, "loss": 0.8831, "step": 104450 }, { "epoch": 0.7561510564833113, "grad_norm": 0.14508011937141418, "learning_rate": 4.243856182182748e-06, "loss": 0.8923, "step": 104460 }, { "epoch": 0.7562234431438974, "grad_norm": 0.16762535274028778, "learning_rate": 4.243783795522162e-06, "loss": 0.8903, "step": 104470 }, { "epoch": 0.7562958298044836, "grad_norm": 0.15426135063171387, "learning_rate": 4.243711408861575e-06, "loss": 0.8721, "step": 104480 }, { "epoch": 0.7563682164650698, "grad_norm": 0.15148262679576874, "learning_rate": 4.24363902220099e-06, "loss": 0.8973, "step": 104490 }, { "epoch": 0.756440603125656, "grad_norm": 0.14398479461669922, "learning_rate": 4.243566635540403e-06, "loss": 0.8937, "step": 104500 }, { "epoch": 0.7565129897862422, "grad_norm": 0.16111652553081512, "learning_rate": 4.243494248879817e-06, "loss": 0.8981, "step": 104510 }, { "epoch": 0.7565853764468283, "grad_norm": 0.14975638687610626, "learning_rate": 4.2434218622192306e-06, "loss": 0.8924, "step": 104520 }, { "epoch": 0.7566577631074146, "grad_norm": 0.1521756798028946, "learning_rate": 4.243349475558645e-06, "loss": 0.88, "step": 104530 }, { "epoch": 0.7567301497680008, "grad_norm": 0.17231915891170502, "learning_rate": 4.243277088898059e-06, "loss": 0.8897, "step": 104540 }, { "epoch": 0.756802536428587, "grad_norm": 0.16323207318782806, "learning_rate": 4.243204702237472e-06, "loss": 0.8935, "step": 104550 }, { "epoch": 0.7568749230891731, "grad_norm": 0.14698879420757294, "learning_rate": 4.243132315576886e-06, "loss": 0.8892, "step": 104560 }, { "epoch": 0.7569473097497593, "grad_norm": 0.15942469239234924, "learning_rate": 4.2430599289163e-06, "loss": 0.8943, "step": 104570 }, { "epoch": 0.7570196964103455, "grad_norm": 0.16518035531044006, "learning_rate": 4.242987542255713e-06, "loss": 0.8947, "step": 104580 }, { "epoch": 0.7570920830709317, "grad_norm": 0.14789389073848724, "learning_rate": 4.242915155595127e-06, "loss": 0.8858, "step": 104590 }, { "epoch": 0.7571644697315179, "grad_norm": 0.1392948478460312, "learning_rate": 4.242842768934541e-06, "loss": 0.8891, "step": 104600 }, { "epoch": 0.757236856392104, "grad_norm": 0.15600177645683289, "learning_rate": 4.242770382273955e-06, "loss": 0.9023, "step": 104610 }, { "epoch": 0.7573092430526902, "grad_norm": 0.16960683465003967, "learning_rate": 4.242697995613368e-06, "loss": 0.879, "step": 104620 }, { "epoch": 0.7573816297132764, "grad_norm": 0.15232223272323608, "learning_rate": 4.242625608952782e-06, "loss": 0.8809, "step": 104630 }, { "epoch": 0.7574540163738627, "grad_norm": 0.1578724980354309, "learning_rate": 4.2425532222921965e-06, "loss": 0.9066, "step": 104640 }, { "epoch": 0.7575264030344488, "grad_norm": 0.1544656753540039, "learning_rate": 4.24248083563161e-06, "loss": 0.8877, "step": 104650 }, { "epoch": 0.757598789695035, "grad_norm": 0.154565691947937, "learning_rate": 4.242408448971024e-06, "loss": 0.8876, "step": 104660 }, { "epoch": 0.7576711763556212, "grad_norm": 0.15601760149002075, "learning_rate": 4.242336062310437e-06, "loss": 0.8946, "step": 104670 }, { "epoch": 0.7577435630162074, "grad_norm": 0.16208244860172272, "learning_rate": 4.242263675649852e-06, "loss": 0.8835, "step": 104680 }, { "epoch": 0.7578159496767936, "grad_norm": 0.1622067540884018, "learning_rate": 4.242191288989265e-06, "loss": 0.8915, "step": 104690 }, { "epoch": 0.7578883363373797, "grad_norm": 0.1567181497812271, "learning_rate": 4.242118902328679e-06, "loss": 0.8854, "step": 104700 }, { "epoch": 0.7579607229979659, "grad_norm": 0.17146632075309753, "learning_rate": 4.242046515668093e-06, "loss": 0.8797, "step": 104710 }, { "epoch": 0.7580331096585521, "grad_norm": 0.15992100536823273, "learning_rate": 4.241974129007507e-06, "loss": 0.8882, "step": 104720 }, { "epoch": 0.7581054963191383, "grad_norm": 0.14678719639778137, "learning_rate": 4.241901742346921e-06, "loss": 0.8827, "step": 104730 }, { "epoch": 0.7581778829797245, "grad_norm": 0.15735352039337158, "learning_rate": 4.241829355686334e-06, "loss": 0.8882, "step": 104740 }, { "epoch": 0.7582502696403107, "grad_norm": 0.16120299696922302, "learning_rate": 4.241756969025748e-06, "loss": 0.8893, "step": 104750 }, { "epoch": 0.7583226563008969, "grad_norm": 0.15673673152923584, "learning_rate": 4.241684582365162e-06, "loss": 0.8916, "step": 104760 }, { "epoch": 0.7583950429614831, "grad_norm": 0.1934574395418167, "learning_rate": 4.241612195704576e-06, "loss": 0.8908, "step": 104770 }, { "epoch": 0.7584674296220693, "grad_norm": 0.17176105082035065, "learning_rate": 4.24153980904399e-06, "loss": 0.8849, "step": 104780 }, { "epoch": 0.7585398162826554, "grad_norm": 0.16662052273750305, "learning_rate": 4.241467422383403e-06, "loss": 0.8946, "step": 104790 }, { "epoch": 0.7586122029432416, "grad_norm": 0.14775685966014862, "learning_rate": 4.241395035722817e-06, "loss": 0.8891, "step": 104800 }, { "epoch": 0.7586845896038278, "grad_norm": 0.15374141931533813, "learning_rate": 4.241322649062231e-06, "loss": 0.8965, "step": 104810 }, { "epoch": 0.758756976264414, "grad_norm": 0.15306535363197327, "learning_rate": 4.241250262401645e-06, "loss": 0.8876, "step": 104820 }, { "epoch": 0.7588293629250001, "grad_norm": 0.14979711174964905, "learning_rate": 4.241177875741059e-06, "loss": 0.8802, "step": 104830 }, { "epoch": 0.7589017495855863, "grad_norm": 0.15853987634181976, "learning_rate": 4.241105489080472e-06, "loss": 0.8827, "step": 104840 }, { "epoch": 0.7589741362461726, "grad_norm": 0.15730775892734528, "learning_rate": 4.241033102419887e-06, "loss": 0.8964, "step": 104850 }, { "epoch": 0.7590465229067588, "grad_norm": 0.1486206203699112, "learning_rate": 4.2409607157593e-06, "loss": 0.9028, "step": 104860 }, { "epoch": 0.759118909567345, "grad_norm": 0.15792833268642426, "learning_rate": 4.240888329098714e-06, "loss": 0.8772, "step": 104870 }, { "epoch": 0.7591912962279311, "grad_norm": 0.16453076899051666, "learning_rate": 4.2408159424381275e-06, "loss": 0.8917, "step": 104880 }, { "epoch": 0.7592636828885173, "grad_norm": 0.1514229029417038, "learning_rate": 4.240743555777542e-06, "loss": 0.8884, "step": 104890 }, { "epoch": 0.7593360695491035, "grad_norm": 0.15336290001869202, "learning_rate": 4.240671169116956e-06, "loss": 0.8828, "step": 104900 }, { "epoch": 0.7594084562096897, "grad_norm": 0.1626693159341812, "learning_rate": 4.240598782456369e-06, "loss": 0.8948, "step": 104910 }, { "epoch": 0.7594808428702758, "grad_norm": 0.15300175547599792, "learning_rate": 4.240526395795783e-06, "loss": 0.8984, "step": 104920 }, { "epoch": 0.759553229530862, "grad_norm": 0.1569639891386032, "learning_rate": 4.240454009135197e-06, "loss": 0.869, "step": 104930 }, { "epoch": 0.7596256161914482, "grad_norm": 0.15857458114624023, "learning_rate": 4.240381622474611e-06, "loss": 0.8881, "step": 104940 }, { "epoch": 0.7596980028520345, "grad_norm": 0.15155275166034698, "learning_rate": 4.2403092358140245e-06, "loss": 0.8706, "step": 104950 }, { "epoch": 0.7597703895126207, "grad_norm": 0.16041214764118195, "learning_rate": 4.240236849153438e-06, "loss": 0.8975, "step": 104960 }, { "epoch": 0.7598427761732068, "grad_norm": 0.16059066355228424, "learning_rate": 4.240164462492853e-06, "loss": 0.8794, "step": 104970 }, { "epoch": 0.759915162833793, "grad_norm": 0.15635879337787628, "learning_rate": 4.240092075832266e-06, "loss": 0.8938, "step": 104980 }, { "epoch": 0.7599875494943792, "grad_norm": 0.14419741928577423, "learning_rate": 4.24001968917168e-06, "loss": 0.8759, "step": 104990 }, { "epoch": 0.7600599361549654, "grad_norm": 0.18103042244911194, "learning_rate": 4.2399473025110934e-06, "loss": 0.8966, "step": 105000 }, { "epoch": 0.7601323228155515, "grad_norm": 0.15891040861606598, "learning_rate": 4.239874915850508e-06, "loss": 0.8981, "step": 105010 }, { "epoch": 0.7602047094761377, "grad_norm": 0.17460259795188904, "learning_rate": 4.2398025291899215e-06, "loss": 0.8862, "step": 105020 }, { "epoch": 0.7602770961367239, "grad_norm": 0.17469242215156555, "learning_rate": 4.239730142529335e-06, "loss": 0.8902, "step": 105030 }, { "epoch": 0.7603494827973101, "grad_norm": 0.1552739441394806, "learning_rate": 4.239657755868749e-06, "loss": 0.8872, "step": 105040 }, { "epoch": 0.7604218694578962, "grad_norm": 0.16493083536624908, "learning_rate": 4.239585369208163e-06, "loss": 0.8876, "step": 105050 }, { "epoch": 0.7604942561184825, "grad_norm": 0.1514502614736557, "learning_rate": 4.239512982547577e-06, "loss": 0.876, "step": 105060 }, { "epoch": 0.7605666427790687, "grad_norm": 0.16472308337688446, "learning_rate": 4.2394405958869905e-06, "loss": 0.8893, "step": 105070 }, { "epoch": 0.7606390294396549, "grad_norm": 0.15388494729995728, "learning_rate": 4.239368209226404e-06, "loss": 0.8934, "step": 105080 }, { "epoch": 0.7607114161002411, "grad_norm": 0.1528458446264267, "learning_rate": 4.2392958225658185e-06, "loss": 0.8948, "step": 105090 }, { "epoch": 0.7607838027608272, "grad_norm": 0.1468532532453537, "learning_rate": 4.239223435905232e-06, "loss": 0.8768, "step": 105100 }, { "epoch": 0.7608561894214134, "grad_norm": 0.14190086722373962, "learning_rate": 4.239151049244645e-06, "loss": 0.8829, "step": 105110 }, { "epoch": 0.7609285760819996, "grad_norm": 0.1714107096195221, "learning_rate": 4.239078662584059e-06, "loss": 0.8895, "step": 105120 }, { "epoch": 0.7610009627425858, "grad_norm": 0.15026678144931793, "learning_rate": 4.239006275923473e-06, "loss": 0.895, "step": 105130 }, { "epoch": 0.761073349403172, "grad_norm": 0.16360147297382355, "learning_rate": 4.238933889262887e-06, "loss": 0.8825, "step": 105140 }, { "epoch": 0.7611457360637581, "grad_norm": 0.17714418470859528, "learning_rate": 4.2388615026023e-06, "loss": 0.8734, "step": 105150 }, { "epoch": 0.7612181227243443, "grad_norm": 0.1679549664258957, "learning_rate": 4.238789115941715e-06, "loss": 0.8955, "step": 105160 }, { "epoch": 0.7612905093849306, "grad_norm": 0.1560448259115219, "learning_rate": 4.238716729281128e-06, "loss": 0.893, "step": 105170 }, { "epoch": 0.7613628960455168, "grad_norm": 0.15543989837169647, "learning_rate": 4.238644342620542e-06, "loss": 0.8896, "step": 105180 }, { "epoch": 0.761435282706103, "grad_norm": 0.15329879522323608, "learning_rate": 4.2385719559599555e-06, "loss": 0.8894, "step": 105190 }, { "epoch": 0.7615076693666891, "grad_norm": 0.15145981311798096, "learning_rate": 4.23849956929937e-06, "loss": 0.8888, "step": 105200 }, { "epoch": 0.7615800560272753, "grad_norm": 0.14573727548122406, "learning_rate": 4.238427182638784e-06, "loss": 0.8841, "step": 105210 }, { "epoch": 0.7616524426878615, "grad_norm": 0.15317559242248535, "learning_rate": 4.238354795978197e-06, "loss": 0.8869, "step": 105220 }, { "epoch": 0.7617248293484477, "grad_norm": 0.17463774979114532, "learning_rate": 4.238282409317611e-06, "loss": 0.8971, "step": 105230 }, { "epoch": 0.7617972160090338, "grad_norm": 0.14841240644454956, "learning_rate": 4.238210022657025e-06, "loss": 0.8845, "step": 105240 }, { "epoch": 0.76186960266962, "grad_norm": 0.19012483954429626, "learning_rate": 4.238137635996439e-06, "loss": 0.8936, "step": 105250 }, { "epoch": 0.7619419893302062, "grad_norm": 0.2200062870979309, "learning_rate": 4.2380652493358525e-06, "loss": 0.8938, "step": 105260 }, { "epoch": 0.7620143759907925, "grad_norm": 0.16414809226989746, "learning_rate": 4.237992862675266e-06, "loss": 0.8925, "step": 105270 }, { "epoch": 0.7620867626513786, "grad_norm": 0.16722236573696136, "learning_rate": 4.237920476014681e-06, "loss": 0.8895, "step": 105280 }, { "epoch": 0.7621591493119648, "grad_norm": 0.17089565098285675, "learning_rate": 4.237848089354094e-06, "loss": 0.8922, "step": 105290 }, { "epoch": 0.762231535972551, "grad_norm": 0.1593043953180313, "learning_rate": 4.237775702693508e-06, "loss": 0.8754, "step": 105300 }, { "epoch": 0.7623039226331372, "grad_norm": 0.16244159638881683, "learning_rate": 4.2377033160329215e-06, "loss": 0.8918, "step": 105310 }, { "epoch": 0.7623763092937234, "grad_norm": 0.155963733792305, "learning_rate": 4.237630929372336e-06, "loss": 0.8795, "step": 105320 }, { "epoch": 0.7624486959543095, "grad_norm": 0.15185341238975525, "learning_rate": 4.2375585427117496e-06, "loss": 0.8946, "step": 105330 }, { "epoch": 0.7625210826148957, "grad_norm": 0.16116154193878174, "learning_rate": 4.237486156051163e-06, "loss": 0.8874, "step": 105340 }, { "epoch": 0.7625934692754819, "grad_norm": 0.15430016815662384, "learning_rate": 4.237413769390577e-06, "loss": 0.8861, "step": 105350 }, { "epoch": 0.7626658559360681, "grad_norm": 0.15945422649383545, "learning_rate": 4.237341382729991e-06, "loss": 0.8794, "step": 105360 }, { "epoch": 0.7627382425966542, "grad_norm": 0.1454370766878128, "learning_rate": 4.237268996069405e-06, "loss": 0.8782, "step": 105370 }, { "epoch": 0.7628106292572405, "grad_norm": 0.14304585754871368, "learning_rate": 4.2371966094088185e-06, "loss": 0.8795, "step": 105380 }, { "epoch": 0.7628830159178267, "grad_norm": 0.165323406457901, "learning_rate": 4.237124222748232e-06, "loss": 0.8894, "step": 105390 }, { "epoch": 0.7629554025784129, "grad_norm": 0.1540648490190506, "learning_rate": 4.2370518360876466e-06, "loss": 0.8751, "step": 105400 }, { "epoch": 0.763027789238999, "grad_norm": 0.1511969268321991, "learning_rate": 4.23697944942706e-06, "loss": 0.8812, "step": 105410 }, { "epoch": 0.7631001758995852, "grad_norm": 0.15881909430027008, "learning_rate": 4.236907062766474e-06, "loss": 0.8935, "step": 105420 }, { "epoch": 0.7631725625601714, "grad_norm": 0.1552649736404419, "learning_rate": 4.236834676105887e-06, "loss": 0.8867, "step": 105430 }, { "epoch": 0.7632449492207576, "grad_norm": 0.16431519389152527, "learning_rate": 4.236762289445301e-06, "loss": 0.8761, "step": 105440 }, { "epoch": 0.7633173358813438, "grad_norm": 0.17564263939857483, "learning_rate": 4.2366899027847155e-06, "loss": 0.8907, "step": 105450 }, { "epoch": 0.7633897225419299, "grad_norm": 0.15047433972358704, "learning_rate": 4.236617516124129e-06, "loss": 0.8874, "step": 105460 }, { "epoch": 0.7634621092025161, "grad_norm": 0.2053581178188324, "learning_rate": 4.236545129463543e-06, "loss": 0.8866, "step": 105470 }, { "epoch": 0.7635344958631024, "grad_norm": 0.16297610104084015, "learning_rate": 4.236472742802956e-06, "loss": 0.8818, "step": 105480 }, { "epoch": 0.7636068825236886, "grad_norm": 0.1815577745437622, "learning_rate": 4.236400356142371e-06, "loss": 0.8894, "step": 105490 }, { "epoch": 0.7636792691842748, "grad_norm": 0.14771369099617004, "learning_rate": 4.236327969481784e-06, "loss": 0.8805, "step": 105500 }, { "epoch": 0.7637516558448609, "grad_norm": 0.22556520998477936, "learning_rate": 4.236255582821198e-06, "loss": 0.8771, "step": 105510 }, { "epoch": 0.7638240425054471, "grad_norm": 0.15500441193580627, "learning_rate": 4.236183196160612e-06, "loss": 0.8816, "step": 105520 }, { "epoch": 0.7638964291660333, "grad_norm": 0.18774858117103577, "learning_rate": 4.236110809500026e-06, "loss": 0.8809, "step": 105530 }, { "epoch": 0.7639688158266195, "grad_norm": 0.15794971585273743, "learning_rate": 4.23603842283944e-06, "loss": 0.886, "step": 105540 }, { "epoch": 0.7640412024872056, "grad_norm": 0.16550391912460327, "learning_rate": 4.235966036178853e-06, "loss": 0.8963, "step": 105550 }, { "epoch": 0.7641135891477918, "grad_norm": 0.15253488719463348, "learning_rate": 4.235893649518267e-06, "loss": 0.8904, "step": 105560 }, { "epoch": 0.764185975808378, "grad_norm": 0.16290663182735443, "learning_rate": 4.2358212628576814e-06, "loss": 0.8908, "step": 105570 }, { "epoch": 0.7642583624689642, "grad_norm": 0.16055020689964294, "learning_rate": 4.235748876197095e-06, "loss": 0.8807, "step": 105580 }, { "epoch": 0.7643307491295505, "grad_norm": 0.1527642160654068, "learning_rate": 4.235676489536509e-06, "loss": 0.8882, "step": 105590 }, { "epoch": 0.7644031357901366, "grad_norm": 0.1476249247789383, "learning_rate": 4.235604102875922e-06, "loss": 0.8715, "step": 105600 }, { "epoch": 0.7644755224507228, "grad_norm": 0.15459424257278442, "learning_rate": 4.235531716215337e-06, "loss": 0.8853, "step": 105610 }, { "epoch": 0.764547909111309, "grad_norm": 0.17739763855934143, "learning_rate": 4.23545932955475e-06, "loss": 0.8846, "step": 105620 }, { "epoch": 0.7646202957718952, "grad_norm": 0.158504918217659, "learning_rate": 4.235386942894164e-06, "loss": 0.8892, "step": 105630 }, { "epoch": 0.7646926824324813, "grad_norm": 0.16947391629219055, "learning_rate": 4.235314556233578e-06, "loss": 0.8886, "step": 105640 }, { "epoch": 0.7647650690930675, "grad_norm": 0.15372443199157715, "learning_rate": 4.235242169572991e-06, "loss": 0.8757, "step": 105650 }, { "epoch": 0.7648374557536537, "grad_norm": 0.16209058463573456, "learning_rate": 4.235169782912405e-06, "loss": 0.8973, "step": 105660 }, { "epoch": 0.7649098424142399, "grad_norm": 0.15519694983959198, "learning_rate": 4.2350973962518184e-06, "loss": 0.8926, "step": 105670 }, { "epoch": 0.764982229074826, "grad_norm": 0.15895330905914307, "learning_rate": 4.235025009591233e-06, "loss": 0.8882, "step": 105680 }, { "epoch": 0.7650546157354122, "grad_norm": 0.14420805871486664, "learning_rate": 4.2349526229306465e-06, "loss": 0.8822, "step": 105690 }, { "epoch": 0.7651270023959985, "grad_norm": 0.15106813609600067, "learning_rate": 4.23488023627006e-06, "loss": 0.8966, "step": 105700 }, { "epoch": 0.7651993890565847, "grad_norm": 0.14976173639297485, "learning_rate": 4.234807849609474e-06, "loss": 0.8817, "step": 105710 }, { "epoch": 0.7652717757171709, "grad_norm": 0.16191081702709198, "learning_rate": 4.234735462948888e-06, "loss": 0.8792, "step": 105720 }, { "epoch": 0.765344162377757, "grad_norm": 0.15428221225738525, "learning_rate": 4.234663076288302e-06, "loss": 0.8836, "step": 105730 }, { "epoch": 0.7654165490383432, "grad_norm": 0.15489520132541656, "learning_rate": 4.2345906896277154e-06, "loss": 0.8786, "step": 105740 }, { "epoch": 0.7654889356989294, "grad_norm": 0.16444027423858643, "learning_rate": 4.234518302967129e-06, "loss": 0.883, "step": 105750 }, { "epoch": 0.7655613223595156, "grad_norm": 0.1668035089969635, "learning_rate": 4.2344459163065435e-06, "loss": 0.893, "step": 105760 }, { "epoch": 0.7656337090201017, "grad_norm": 0.15421725809574127, "learning_rate": 4.234373529645957e-06, "loss": 0.8888, "step": 105770 }, { "epoch": 0.7657060956806879, "grad_norm": 0.15628722310066223, "learning_rate": 4.234301142985371e-06, "loss": 0.8877, "step": 105780 }, { "epoch": 0.7657784823412741, "grad_norm": 0.16434313356876373, "learning_rate": 4.234228756324784e-06, "loss": 0.8856, "step": 105790 }, { "epoch": 0.7658508690018604, "grad_norm": 0.14600999653339386, "learning_rate": 4.234156369664199e-06, "loss": 0.8916, "step": 105800 }, { "epoch": 0.7659232556624466, "grad_norm": 0.23364056646823883, "learning_rate": 4.2340839830036125e-06, "loss": 0.8865, "step": 105810 }, { "epoch": 0.7659956423230327, "grad_norm": 0.15147335827350616, "learning_rate": 4.234011596343026e-06, "loss": 0.875, "step": 105820 }, { "epoch": 0.7660680289836189, "grad_norm": 0.16839240491390228, "learning_rate": 4.23393920968244e-06, "loss": 0.8807, "step": 105830 }, { "epoch": 0.7661404156442051, "grad_norm": 0.1934797465801239, "learning_rate": 4.233866823021854e-06, "loss": 0.8983, "step": 105840 }, { "epoch": 0.7662128023047913, "grad_norm": 0.15738998353481293, "learning_rate": 4.233794436361268e-06, "loss": 0.8784, "step": 105850 }, { "epoch": 0.7662851889653775, "grad_norm": 0.14478467404842377, "learning_rate": 4.233722049700681e-06, "loss": 0.896, "step": 105860 }, { "epoch": 0.7663575756259636, "grad_norm": 0.17345765233039856, "learning_rate": 4.233649663040095e-06, "loss": 0.8857, "step": 105870 }, { "epoch": 0.7664299622865498, "grad_norm": 0.15138719975948334, "learning_rate": 4.2335772763795095e-06, "loss": 0.8979, "step": 105880 }, { "epoch": 0.766502348947136, "grad_norm": 0.15025945007801056, "learning_rate": 4.233504889718923e-06, "loss": 0.8786, "step": 105890 }, { "epoch": 0.7665747356077222, "grad_norm": 0.16647055745124817, "learning_rate": 4.233432503058337e-06, "loss": 0.897, "step": 105900 }, { "epoch": 0.7666471222683084, "grad_norm": 0.16044731438159943, "learning_rate": 4.23336011639775e-06, "loss": 0.8921, "step": 105910 }, { "epoch": 0.7667195089288946, "grad_norm": 0.1512782722711563, "learning_rate": 4.233287729737165e-06, "loss": 0.8839, "step": 105920 }, { "epoch": 0.7667918955894808, "grad_norm": 0.16747620701789856, "learning_rate": 4.233215343076578e-06, "loss": 0.8857, "step": 105930 }, { "epoch": 0.766864282250067, "grad_norm": 0.17304562032222748, "learning_rate": 4.233142956415992e-06, "loss": 0.8897, "step": 105940 }, { "epoch": 0.7669366689106532, "grad_norm": 0.15581056475639343, "learning_rate": 4.233070569755406e-06, "loss": 0.8906, "step": 105950 }, { "epoch": 0.7670090555712393, "grad_norm": 0.17081882059574127, "learning_rate": 4.23299818309482e-06, "loss": 0.8815, "step": 105960 }, { "epoch": 0.7670814422318255, "grad_norm": 0.15507552027702332, "learning_rate": 4.232925796434234e-06, "loss": 0.8847, "step": 105970 }, { "epoch": 0.7671538288924117, "grad_norm": 0.16203370690345764, "learning_rate": 4.232853409773647e-06, "loss": 0.8817, "step": 105980 }, { "epoch": 0.7672262155529979, "grad_norm": 0.1585424691438675, "learning_rate": 4.232781023113061e-06, "loss": 0.8779, "step": 105990 }, { "epoch": 0.767298602213584, "grad_norm": 0.15579846501350403, "learning_rate": 4.232708636452475e-06, "loss": 0.8773, "step": 106000 }, { "epoch": 0.7673709888741703, "grad_norm": 0.1483168751001358, "learning_rate": 4.232636249791889e-06, "loss": 0.8916, "step": 106010 }, { "epoch": 0.7674433755347565, "grad_norm": 0.18050970137119293, "learning_rate": 4.232563863131303e-06, "loss": 0.8731, "step": 106020 }, { "epoch": 0.7675157621953427, "grad_norm": 0.17349663376808167, "learning_rate": 4.232491476470716e-06, "loss": 0.883, "step": 106030 }, { "epoch": 0.7675881488559289, "grad_norm": 0.17154832184314728, "learning_rate": 4.23241908981013e-06, "loss": 0.8769, "step": 106040 }, { "epoch": 0.767660535516515, "grad_norm": 0.15172946453094482, "learning_rate": 4.232346703149544e-06, "loss": 0.8715, "step": 106050 }, { "epoch": 0.7677329221771012, "grad_norm": 0.16642993688583374, "learning_rate": 4.232274316488958e-06, "loss": 0.8857, "step": 106060 }, { "epoch": 0.7678053088376874, "grad_norm": 0.15337328612804413, "learning_rate": 4.2322019298283716e-06, "loss": 0.872, "step": 106070 }, { "epoch": 0.7678776954982736, "grad_norm": 0.15362398326396942, "learning_rate": 4.232129543167785e-06, "loss": 0.8852, "step": 106080 }, { "epoch": 0.7679500821588597, "grad_norm": 0.16665634512901306, "learning_rate": 4.2320571565072e-06, "loss": 0.8877, "step": 106090 }, { "epoch": 0.7680224688194459, "grad_norm": 0.14505624771118164, "learning_rate": 4.231984769846613e-06, "loss": 0.8936, "step": 106100 }, { "epoch": 0.7680948554800321, "grad_norm": 0.16204003989696503, "learning_rate": 4.231912383186027e-06, "loss": 0.8903, "step": 106110 }, { "epoch": 0.7681672421406184, "grad_norm": 0.16394628584384918, "learning_rate": 4.2318399965254405e-06, "loss": 0.8986, "step": 106120 }, { "epoch": 0.7682396288012046, "grad_norm": 0.15969203412532806, "learning_rate": 4.231767609864855e-06, "loss": 0.8769, "step": 106130 }, { "epoch": 0.7683120154617907, "grad_norm": 0.1562148928642273, "learning_rate": 4.2316952232042686e-06, "loss": 0.872, "step": 106140 }, { "epoch": 0.7683844021223769, "grad_norm": 0.171428382396698, "learning_rate": 4.231622836543682e-06, "loss": 0.8811, "step": 106150 }, { "epoch": 0.7684567887829631, "grad_norm": 0.16853584349155426, "learning_rate": 4.231550449883096e-06, "loss": 0.8836, "step": 106160 }, { "epoch": 0.7685291754435493, "grad_norm": 0.15788713097572327, "learning_rate": 4.23147806322251e-06, "loss": 0.8912, "step": 106170 }, { "epoch": 0.7686015621041354, "grad_norm": 0.15766339004039764, "learning_rate": 4.231405676561923e-06, "loss": 0.8992, "step": 106180 }, { "epoch": 0.7686739487647216, "grad_norm": 0.14977054297924042, "learning_rate": 4.2313332899013375e-06, "loss": 0.8844, "step": 106190 }, { "epoch": 0.7687463354253078, "grad_norm": 0.15274566411972046, "learning_rate": 4.231260903240751e-06, "loss": 0.8939, "step": 106200 }, { "epoch": 0.768818722085894, "grad_norm": 0.17646168172359467, "learning_rate": 4.231188516580165e-06, "loss": 0.8851, "step": 106210 }, { "epoch": 0.7688911087464801, "grad_norm": 0.16536547243595123, "learning_rate": 4.231116129919578e-06, "loss": 0.879, "step": 106220 }, { "epoch": 0.7689634954070664, "grad_norm": 0.17476871609687805, "learning_rate": 4.231043743258992e-06, "loss": 0.8759, "step": 106230 }, { "epoch": 0.7690358820676526, "grad_norm": 0.1473916620016098, "learning_rate": 4.230971356598406e-06, "loss": 0.8778, "step": 106240 }, { "epoch": 0.7691082687282388, "grad_norm": 0.15702074766159058, "learning_rate": 4.23089896993782e-06, "loss": 0.8932, "step": 106250 }, { "epoch": 0.769180655388825, "grad_norm": 0.14984412491321564, "learning_rate": 4.230826583277234e-06, "loss": 0.8931, "step": 106260 }, { "epoch": 0.7692530420494111, "grad_norm": 0.15122853219509125, "learning_rate": 4.230754196616647e-06, "loss": 0.8831, "step": 106270 }, { "epoch": 0.7693254287099973, "grad_norm": 0.16905367374420166, "learning_rate": 4.230681809956062e-06, "loss": 0.8764, "step": 106280 }, { "epoch": 0.7693978153705835, "grad_norm": 0.15527789294719696, "learning_rate": 4.230609423295475e-06, "loss": 0.8804, "step": 106290 }, { "epoch": 0.7694702020311697, "grad_norm": 0.16785407066345215, "learning_rate": 4.230537036634889e-06, "loss": 0.8924, "step": 106300 }, { "epoch": 0.7695425886917558, "grad_norm": 0.15901590883731842, "learning_rate": 4.230464649974303e-06, "loss": 0.8922, "step": 106310 }, { "epoch": 0.769614975352342, "grad_norm": 0.17190630733966827, "learning_rate": 4.230392263313717e-06, "loss": 0.8802, "step": 106320 }, { "epoch": 0.7696873620129283, "grad_norm": 0.27805203199386597, "learning_rate": 4.230319876653131e-06, "loss": 0.8847, "step": 106330 }, { "epoch": 0.7697597486735145, "grad_norm": 0.1641608327627182, "learning_rate": 4.230247489992544e-06, "loss": 0.8951, "step": 106340 }, { "epoch": 0.7698321353341007, "grad_norm": 0.15645158290863037, "learning_rate": 4.230175103331958e-06, "loss": 0.8771, "step": 106350 }, { "epoch": 0.7699045219946868, "grad_norm": 0.15807659924030304, "learning_rate": 4.230102716671372e-06, "loss": 0.8841, "step": 106360 }, { "epoch": 0.769976908655273, "grad_norm": 0.15633799135684967, "learning_rate": 4.230030330010786e-06, "loss": 0.8917, "step": 106370 }, { "epoch": 0.7700492953158592, "grad_norm": 0.15708708763122559, "learning_rate": 4.2299579433502e-06, "loss": 0.8789, "step": 106380 }, { "epoch": 0.7701216819764454, "grad_norm": 0.15080325305461884, "learning_rate": 4.229885556689613e-06, "loss": 0.8861, "step": 106390 }, { "epoch": 0.7701940686370315, "grad_norm": 0.14527060091495514, "learning_rate": 4.229813170029028e-06, "loss": 0.8835, "step": 106400 }, { "epoch": 0.7702664552976177, "grad_norm": 0.1920609176158905, "learning_rate": 4.229740783368441e-06, "loss": 0.8916, "step": 106410 }, { "epoch": 0.7703388419582039, "grad_norm": 0.14528389275074005, "learning_rate": 4.229668396707855e-06, "loss": 0.8716, "step": 106420 }, { "epoch": 0.7704112286187901, "grad_norm": 0.1617295742034912, "learning_rate": 4.2295960100472685e-06, "loss": 0.8786, "step": 106430 }, { "epoch": 0.7704836152793764, "grad_norm": 0.15510478615760803, "learning_rate": 4.229523623386683e-06, "loss": 0.8832, "step": 106440 }, { "epoch": 0.7705560019399625, "grad_norm": 0.1545783132314682, "learning_rate": 4.229451236726097e-06, "loss": 0.8805, "step": 106450 }, { "epoch": 0.7706283886005487, "grad_norm": 0.15689776837825775, "learning_rate": 4.22937885006551e-06, "loss": 0.8718, "step": 106460 }, { "epoch": 0.7707007752611349, "grad_norm": 0.1707046777009964, "learning_rate": 4.229306463404924e-06, "loss": 0.8843, "step": 106470 }, { "epoch": 0.7707731619217211, "grad_norm": 0.16164378821849823, "learning_rate": 4.229234076744338e-06, "loss": 0.8876, "step": 106480 }, { "epoch": 0.7708455485823072, "grad_norm": 0.16344380378723145, "learning_rate": 4.229161690083752e-06, "loss": 0.8894, "step": 106490 }, { "epoch": 0.7709179352428934, "grad_norm": 0.15482939779758453, "learning_rate": 4.2290893034231655e-06, "loss": 0.887, "step": 106500 }, { "epoch": 0.7709903219034796, "grad_norm": 0.16493044793605804, "learning_rate": 4.229016916762579e-06, "loss": 0.8868, "step": 106510 }, { "epoch": 0.7710627085640658, "grad_norm": 0.15701046586036682, "learning_rate": 4.228944530101994e-06, "loss": 0.8802, "step": 106520 }, { "epoch": 0.771135095224652, "grad_norm": 0.15896005928516388, "learning_rate": 4.228872143441407e-06, "loss": 0.8828, "step": 106530 }, { "epoch": 0.7712074818852381, "grad_norm": 0.15199479460716248, "learning_rate": 4.228799756780821e-06, "loss": 0.8855, "step": 106540 }, { "epoch": 0.7712798685458244, "grad_norm": 0.14489343762397766, "learning_rate": 4.2287273701202345e-06, "loss": 0.8969, "step": 106550 }, { "epoch": 0.7713522552064106, "grad_norm": 0.1755223423242569, "learning_rate": 4.228654983459649e-06, "loss": 0.8788, "step": 106560 }, { "epoch": 0.7714246418669968, "grad_norm": 0.17546701431274414, "learning_rate": 4.2285825967990625e-06, "loss": 0.8877, "step": 106570 }, { "epoch": 0.771497028527583, "grad_norm": 0.1525055468082428, "learning_rate": 4.228510210138476e-06, "loss": 0.8842, "step": 106580 }, { "epoch": 0.7715694151881691, "grad_norm": 0.14916154742240906, "learning_rate": 4.22843782347789e-06, "loss": 0.8826, "step": 106590 }, { "epoch": 0.7716418018487553, "grad_norm": 0.16282054781913757, "learning_rate": 4.228365436817304e-06, "loss": 0.8843, "step": 106600 }, { "epoch": 0.7717141885093415, "grad_norm": 0.16194520890712738, "learning_rate": 4.228293050156718e-06, "loss": 0.8881, "step": 106610 }, { "epoch": 0.7717865751699277, "grad_norm": 0.1717979609966278, "learning_rate": 4.2282206634961315e-06, "loss": 0.8785, "step": 106620 }, { "epoch": 0.7718589618305138, "grad_norm": 0.24075248837471008, "learning_rate": 4.228148276835545e-06, "loss": 0.8907, "step": 106630 }, { "epoch": 0.7719313484911, "grad_norm": 0.3043558597564697, "learning_rate": 4.2280758901749595e-06, "loss": 0.8813, "step": 106640 }, { "epoch": 0.7720037351516863, "grad_norm": 0.17792464792728424, "learning_rate": 4.228003503514373e-06, "loss": 0.8881, "step": 106650 }, { "epoch": 0.7720761218122725, "grad_norm": 0.16710121929645538, "learning_rate": 4.227931116853787e-06, "loss": 0.8809, "step": 106660 }, { "epoch": 0.7721485084728587, "grad_norm": 0.14636112749576569, "learning_rate": 4.2278587301932e-06, "loss": 0.8869, "step": 106670 }, { "epoch": 0.7722208951334448, "grad_norm": 0.16227993369102478, "learning_rate": 4.227786343532614e-06, "loss": 0.8696, "step": 106680 }, { "epoch": 0.772293281794031, "grad_norm": 0.15392231941223145, "learning_rate": 4.2277139568720285e-06, "loss": 0.8921, "step": 106690 }, { "epoch": 0.7723656684546172, "grad_norm": 0.1547156274318695, "learning_rate": 4.227641570211442e-06, "loss": 0.8892, "step": 106700 }, { "epoch": 0.7724380551152034, "grad_norm": 0.15668383240699768, "learning_rate": 4.227569183550856e-06, "loss": 0.8795, "step": 106710 }, { "epoch": 0.7725104417757895, "grad_norm": 0.15469415485858917, "learning_rate": 4.227496796890269e-06, "loss": 0.8863, "step": 106720 }, { "epoch": 0.7725828284363757, "grad_norm": 0.16153530776500702, "learning_rate": 4.227424410229683e-06, "loss": 0.8813, "step": 106730 }, { "epoch": 0.7726552150969619, "grad_norm": 0.15066730976104736, "learning_rate": 4.2273520235690965e-06, "loss": 0.8876, "step": 106740 }, { "epoch": 0.7727276017575481, "grad_norm": 0.15237820148468018, "learning_rate": 4.227279636908511e-06, "loss": 0.8915, "step": 106750 }, { "epoch": 0.7727999884181344, "grad_norm": 0.1508363038301468, "learning_rate": 4.227207250247925e-06, "loss": 0.8846, "step": 106760 }, { "epoch": 0.7728723750787205, "grad_norm": 0.16356633603572845, "learning_rate": 4.227134863587338e-06, "loss": 0.8851, "step": 106770 }, { "epoch": 0.7729447617393067, "grad_norm": 0.1548301726579666, "learning_rate": 4.227062476926752e-06, "loss": 0.8941, "step": 106780 }, { "epoch": 0.7730171483998929, "grad_norm": 0.18178719282150269, "learning_rate": 4.226990090266166e-06, "loss": 0.8839, "step": 106790 }, { "epoch": 0.7730895350604791, "grad_norm": 0.16349080204963684, "learning_rate": 4.22691770360558e-06, "loss": 0.8818, "step": 106800 }, { "epoch": 0.7731619217210652, "grad_norm": 0.15550731122493744, "learning_rate": 4.2268453169449936e-06, "loss": 0.8791, "step": 106810 }, { "epoch": 0.7732343083816514, "grad_norm": 0.15300001204013824, "learning_rate": 4.226772930284407e-06, "loss": 0.8669, "step": 106820 }, { "epoch": 0.7733066950422376, "grad_norm": 0.15288126468658447, "learning_rate": 4.226700543623821e-06, "loss": 0.8753, "step": 106830 }, { "epoch": 0.7733790817028238, "grad_norm": 0.17350119352340698, "learning_rate": 4.226628156963235e-06, "loss": 0.8931, "step": 106840 }, { "epoch": 0.7734514683634099, "grad_norm": 0.144486203789711, "learning_rate": 4.226555770302649e-06, "loss": 0.8797, "step": 106850 }, { "epoch": 0.7735238550239962, "grad_norm": 0.15180061757564545, "learning_rate": 4.2264833836420625e-06, "loss": 0.883, "step": 106860 }, { "epoch": 0.7735962416845824, "grad_norm": 0.16879640519618988, "learning_rate": 4.226410996981476e-06, "loss": 0.8795, "step": 106870 }, { "epoch": 0.7736686283451686, "grad_norm": 0.17711849510669708, "learning_rate": 4.2263386103208906e-06, "loss": 0.881, "step": 106880 }, { "epoch": 0.7737410150057548, "grad_norm": 0.15794023871421814, "learning_rate": 4.226266223660304e-06, "loss": 0.883, "step": 106890 }, { "epoch": 0.7738134016663409, "grad_norm": 0.16354835033416748, "learning_rate": 4.226193836999718e-06, "loss": 0.8808, "step": 106900 }, { "epoch": 0.7738857883269271, "grad_norm": 0.16571597754955292, "learning_rate": 4.226121450339131e-06, "loss": 0.879, "step": 106910 }, { "epoch": 0.7739581749875133, "grad_norm": 0.16619838774204254, "learning_rate": 4.226049063678546e-06, "loss": 0.8966, "step": 106920 }, { "epoch": 0.7740305616480995, "grad_norm": 0.16915611922740936, "learning_rate": 4.2259766770179595e-06, "loss": 0.867, "step": 106930 }, { "epoch": 0.7741029483086856, "grad_norm": 0.15059566497802734, "learning_rate": 4.225904290357373e-06, "loss": 0.8969, "step": 106940 }, { "epoch": 0.7741753349692718, "grad_norm": 0.15447822213172913, "learning_rate": 4.225831903696787e-06, "loss": 0.8692, "step": 106950 }, { "epoch": 0.774247721629858, "grad_norm": 0.15346454083919525, "learning_rate": 4.225759517036201e-06, "loss": 0.879, "step": 106960 }, { "epoch": 0.7743201082904443, "grad_norm": 0.15431329607963562, "learning_rate": 4.225687130375615e-06, "loss": 0.8781, "step": 106970 }, { "epoch": 0.7743924949510305, "grad_norm": 0.15542955696582794, "learning_rate": 4.225614743715028e-06, "loss": 0.8815, "step": 106980 }, { "epoch": 0.7744648816116166, "grad_norm": 0.15447083115577698, "learning_rate": 4.225542357054442e-06, "loss": 0.8799, "step": 106990 }, { "epoch": 0.7745372682722028, "grad_norm": 0.17327377200126648, "learning_rate": 4.2254699703938565e-06, "loss": 0.8878, "step": 107000 }, { "epoch": 0.774609654932789, "grad_norm": 0.15756526589393616, "learning_rate": 4.22539758373327e-06, "loss": 0.8798, "step": 107010 }, { "epoch": 0.7746820415933752, "grad_norm": 0.15248951315879822, "learning_rate": 4.225325197072684e-06, "loss": 0.8899, "step": 107020 }, { "epoch": 0.7747544282539613, "grad_norm": 0.1477757692337036, "learning_rate": 4.225252810412097e-06, "loss": 0.8826, "step": 107030 }, { "epoch": 0.7748268149145475, "grad_norm": 0.1861177235841751, "learning_rate": 4.225180423751512e-06, "loss": 0.8875, "step": 107040 }, { "epoch": 0.7748992015751337, "grad_norm": 0.14727658033370972, "learning_rate": 4.2251080370909254e-06, "loss": 0.8786, "step": 107050 }, { "epoch": 0.7749715882357199, "grad_norm": 0.15688657760620117, "learning_rate": 4.225035650430339e-06, "loss": 0.8887, "step": 107060 }, { "epoch": 0.775043974896306, "grad_norm": 0.15266485512256622, "learning_rate": 4.224963263769753e-06, "loss": 0.8883, "step": 107070 }, { "epoch": 0.7751163615568923, "grad_norm": 0.15053589642047882, "learning_rate": 4.224890877109167e-06, "loss": 0.8763, "step": 107080 }, { "epoch": 0.7751887482174785, "grad_norm": 0.1497945785522461, "learning_rate": 4.224818490448581e-06, "loss": 0.8768, "step": 107090 }, { "epoch": 0.7752611348780647, "grad_norm": 0.16105857491493225, "learning_rate": 4.224746103787994e-06, "loss": 0.8819, "step": 107100 }, { "epoch": 0.7753335215386509, "grad_norm": 0.15862345695495605, "learning_rate": 4.224673717127408e-06, "loss": 0.8753, "step": 107110 }, { "epoch": 0.775405908199237, "grad_norm": 0.15614831447601318, "learning_rate": 4.2246013304668224e-06, "loss": 0.8811, "step": 107120 }, { "epoch": 0.7754782948598232, "grad_norm": 0.19445110857486725, "learning_rate": 4.224528943806236e-06, "loss": 0.8964, "step": 107130 }, { "epoch": 0.7755506815204094, "grad_norm": 0.15942369401454926, "learning_rate": 4.22445655714565e-06, "loss": 0.8658, "step": 107140 }, { "epoch": 0.7756230681809956, "grad_norm": 0.1634572595357895, "learning_rate": 4.224384170485063e-06, "loss": 0.8844, "step": 107150 }, { "epoch": 0.7756954548415818, "grad_norm": 0.14864301681518555, "learning_rate": 4.224311783824478e-06, "loss": 0.8885, "step": 107160 }, { "epoch": 0.7757678415021679, "grad_norm": 0.1603207290172577, "learning_rate": 4.224239397163891e-06, "loss": 0.8811, "step": 107170 }, { "epoch": 0.7758402281627542, "grad_norm": 0.15209531784057617, "learning_rate": 4.224167010503305e-06, "loss": 0.8856, "step": 107180 }, { "epoch": 0.7759126148233404, "grad_norm": 0.1611732542514801, "learning_rate": 4.224094623842719e-06, "loss": 0.8802, "step": 107190 }, { "epoch": 0.7759850014839266, "grad_norm": 0.16331136226654053, "learning_rate": 4.224022237182133e-06, "loss": 0.8931, "step": 107200 }, { "epoch": 0.7760573881445127, "grad_norm": 0.17706800997257233, "learning_rate": 4.223949850521547e-06, "loss": 0.8834, "step": 107210 }, { "epoch": 0.7761297748050989, "grad_norm": 0.14724914729595184, "learning_rate": 4.22387746386096e-06, "loss": 0.8868, "step": 107220 }, { "epoch": 0.7762021614656851, "grad_norm": 0.16513299942016602, "learning_rate": 4.223805077200374e-06, "loss": 0.8752, "step": 107230 }, { "epoch": 0.7762745481262713, "grad_norm": 0.17045806348323822, "learning_rate": 4.2237326905397875e-06, "loss": 0.8909, "step": 107240 }, { "epoch": 0.7763469347868575, "grad_norm": 0.189479261636734, "learning_rate": 4.223660303879201e-06, "loss": 0.884, "step": 107250 }, { "epoch": 0.7764193214474436, "grad_norm": 0.15895524621009827, "learning_rate": 4.223587917218615e-06, "loss": 0.8878, "step": 107260 }, { "epoch": 0.7764917081080298, "grad_norm": 0.16213759779930115, "learning_rate": 4.223515530558029e-06, "loss": 0.8825, "step": 107270 }, { "epoch": 0.776564094768616, "grad_norm": 0.1568875014781952, "learning_rate": 4.223443143897443e-06, "loss": 0.8751, "step": 107280 }, { "epoch": 0.7766364814292023, "grad_norm": 0.1578979790210724, "learning_rate": 4.2233707572368565e-06, "loss": 0.883, "step": 107290 }, { "epoch": 0.7767088680897885, "grad_norm": 0.1542271375656128, "learning_rate": 4.22329837057627e-06, "loss": 0.8786, "step": 107300 }, { "epoch": 0.7767812547503746, "grad_norm": 0.16791146993637085, "learning_rate": 4.2232259839156845e-06, "loss": 0.8914, "step": 107310 }, { "epoch": 0.7768536414109608, "grad_norm": 0.1576090306043625, "learning_rate": 4.223153597255098e-06, "loss": 0.894, "step": 107320 }, { "epoch": 0.776926028071547, "grad_norm": 0.15662311017513275, "learning_rate": 4.223081210594512e-06, "loss": 0.8998, "step": 107330 }, { "epoch": 0.7769984147321332, "grad_norm": 0.14646415412425995, "learning_rate": 4.223008823933925e-06, "loss": 0.867, "step": 107340 }, { "epoch": 0.7770708013927193, "grad_norm": 0.14766213297843933, "learning_rate": 4.22293643727334e-06, "loss": 0.8937, "step": 107350 }, { "epoch": 0.7771431880533055, "grad_norm": 0.1753314882516861, "learning_rate": 4.2228640506127535e-06, "loss": 0.8736, "step": 107360 }, { "epoch": 0.7772155747138917, "grad_norm": 0.15868641436100006, "learning_rate": 4.222791663952167e-06, "loss": 0.9, "step": 107370 }, { "epoch": 0.7772879613744779, "grad_norm": 0.15650039911270142, "learning_rate": 4.222719277291581e-06, "loss": 0.886, "step": 107380 }, { "epoch": 0.7773603480350642, "grad_norm": 0.17144428193569183, "learning_rate": 4.222646890630995e-06, "loss": 0.892, "step": 107390 }, { "epoch": 0.7774327346956503, "grad_norm": 0.1921321600675583, "learning_rate": 4.222574503970409e-06, "loss": 0.8881, "step": 107400 }, { "epoch": 0.7775051213562365, "grad_norm": 0.1584097146987915, "learning_rate": 4.222502117309822e-06, "loss": 0.8806, "step": 107410 }, { "epoch": 0.7775775080168227, "grad_norm": 0.15249648690223694, "learning_rate": 4.222429730649236e-06, "loss": 0.8956, "step": 107420 }, { "epoch": 0.7776498946774089, "grad_norm": 0.15863776206970215, "learning_rate": 4.2223573439886505e-06, "loss": 0.8773, "step": 107430 }, { "epoch": 0.777722281337995, "grad_norm": 0.16656135022640228, "learning_rate": 4.222284957328064e-06, "loss": 0.8973, "step": 107440 }, { "epoch": 0.7777946679985812, "grad_norm": 0.15339358150959015, "learning_rate": 4.222212570667478e-06, "loss": 0.8851, "step": 107450 }, { "epoch": 0.7778670546591674, "grad_norm": 0.1573115736246109, "learning_rate": 4.222140184006891e-06, "loss": 0.8841, "step": 107460 }, { "epoch": 0.7779394413197536, "grad_norm": 0.1542723923921585, "learning_rate": 4.222067797346305e-06, "loss": 0.8954, "step": 107470 }, { "epoch": 0.7780118279803397, "grad_norm": 0.1786673665046692, "learning_rate": 4.221995410685719e-06, "loss": 0.8873, "step": 107480 }, { "epoch": 0.7780842146409259, "grad_norm": 0.17204001545906067, "learning_rate": 4.221923024025133e-06, "loss": 0.8873, "step": 107490 }, { "epoch": 0.7781566013015122, "grad_norm": 0.14830465614795685, "learning_rate": 4.221850637364547e-06, "loss": 0.8775, "step": 107500 }, { "epoch": 0.7782289879620984, "grad_norm": 0.1740427315235138, "learning_rate": 4.22177825070396e-06, "loss": 0.8825, "step": 107510 }, { "epoch": 0.7783013746226846, "grad_norm": 0.1551501452922821, "learning_rate": 4.221705864043375e-06, "loss": 0.9021, "step": 107520 }, { "epoch": 0.7783737612832707, "grad_norm": 0.1553511619567871, "learning_rate": 4.221633477382788e-06, "loss": 0.8773, "step": 107530 }, { "epoch": 0.7784461479438569, "grad_norm": 0.15967567265033722, "learning_rate": 4.221561090722202e-06, "loss": 0.8829, "step": 107540 }, { "epoch": 0.7785185346044431, "grad_norm": 0.14767323434352875, "learning_rate": 4.2214887040616156e-06, "loss": 0.8687, "step": 107550 }, { "epoch": 0.7785909212650293, "grad_norm": 0.14567668735980988, "learning_rate": 4.22141631740103e-06, "loss": 0.8757, "step": 107560 }, { "epoch": 0.7786633079256154, "grad_norm": 0.14703209698200226, "learning_rate": 4.221343930740444e-06, "loss": 0.8835, "step": 107570 }, { "epoch": 0.7787356945862016, "grad_norm": 0.15349993109703064, "learning_rate": 4.221271544079857e-06, "loss": 0.8685, "step": 107580 }, { "epoch": 0.7788080812467878, "grad_norm": 0.16429787874221802, "learning_rate": 4.221199157419271e-06, "loss": 0.8891, "step": 107590 }, { "epoch": 0.778880467907374, "grad_norm": 0.15371567010879517, "learning_rate": 4.221126770758685e-06, "loss": 0.8729, "step": 107600 }, { "epoch": 0.7789528545679603, "grad_norm": 0.1502668410539627, "learning_rate": 4.221054384098099e-06, "loss": 0.8834, "step": 107610 }, { "epoch": 0.7790252412285464, "grad_norm": 0.166198268532753, "learning_rate": 4.2209819974375126e-06, "loss": 0.876, "step": 107620 }, { "epoch": 0.7790976278891326, "grad_norm": 0.1724855899810791, "learning_rate": 4.220909610776926e-06, "loss": 0.8752, "step": 107630 }, { "epoch": 0.7791700145497188, "grad_norm": 0.15645526349544525, "learning_rate": 4.220837224116341e-06, "loss": 0.8787, "step": 107640 }, { "epoch": 0.779242401210305, "grad_norm": 0.16536635160446167, "learning_rate": 4.220764837455754e-06, "loss": 0.8852, "step": 107650 }, { "epoch": 0.7793147878708911, "grad_norm": 0.1508960872888565, "learning_rate": 4.220692450795168e-06, "loss": 0.8768, "step": 107660 }, { "epoch": 0.7793871745314773, "grad_norm": 0.1536785364151001, "learning_rate": 4.2206200641345815e-06, "loss": 0.8825, "step": 107670 }, { "epoch": 0.7794595611920635, "grad_norm": 0.16157682240009308, "learning_rate": 4.220547677473996e-06, "loss": 0.8861, "step": 107680 }, { "epoch": 0.7795319478526497, "grad_norm": 0.15818680822849274, "learning_rate": 4.2204752908134096e-06, "loss": 0.8745, "step": 107690 }, { "epoch": 0.7796043345132359, "grad_norm": 0.16059935092926025, "learning_rate": 4.220402904152823e-06, "loss": 0.8934, "step": 107700 }, { "epoch": 0.7796767211738221, "grad_norm": 0.15591976046562195, "learning_rate": 4.220330517492237e-06, "loss": 0.8838, "step": 107710 }, { "epoch": 0.7797491078344083, "grad_norm": 0.15530046820640564, "learning_rate": 4.220258130831651e-06, "loss": 0.8794, "step": 107720 }, { "epoch": 0.7798214944949945, "grad_norm": 0.1756300926208496, "learning_rate": 4.220185744171065e-06, "loss": 0.8819, "step": 107730 }, { "epoch": 0.7798938811555807, "grad_norm": 0.16055084764957428, "learning_rate": 4.2201133575104785e-06, "loss": 0.8594, "step": 107740 }, { "epoch": 0.7799662678161668, "grad_norm": 0.1619112342596054, "learning_rate": 4.220040970849892e-06, "loss": 0.8905, "step": 107750 }, { "epoch": 0.780038654476753, "grad_norm": 0.15275640785694122, "learning_rate": 4.2199685841893066e-06, "loss": 0.8799, "step": 107760 }, { "epoch": 0.7801110411373392, "grad_norm": 0.1480998545885086, "learning_rate": 4.219896197528719e-06, "loss": 0.8672, "step": 107770 }, { "epoch": 0.7801834277979254, "grad_norm": 0.15729357302188873, "learning_rate": 4.219823810868133e-06, "loss": 0.8956, "step": 107780 }, { "epoch": 0.7802558144585116, "grad_norm": 0.1509513109922409, "learning_rate": 4.2197514242075474e-06, "loss": 0.8796, "step": 107790 }, { "epoch": 0.7803282011190977, "grad_norm": 0.15310880541801453, "learning_rate": 4.219679037546961e-06, "loss": 0.8726, "step": 107800 }, { "epoch": 0.7804005877796839, "grad_norm": 0.16130521893501282, "learning_rate": 4.219606650886375e-06, "loss": 0.8855, "step": 107810 }, { "epoch": 0.7804729744402702, "grad_norm": 0.18404695391654968, "learning_rate": 4.219534264225788e-06, "loss": 0.8839, "step": 107820 }, { "epoch": 0.7805453611008564, "grad_norm": 0.16077350080013275, "learning_rate": 4.219461877565203e-06, "loss": 0.8933, "step": 107830 }, { "epoch": 0.7806177477614425, "grad_norm": 0.1624414175748825, "learning_rate": 4.219389490904616e-06, "loss": 0.871, "step": 107840 }, { "epoch": 0.7806901344220287, "grad_norm": 0.16145184636116028, "learning_rate": 4.21931710424403e-06, "loss": 0.8888, "step": 107850 }, { "epoch": 0.7807625210826149, "grad_norm": 0.17542187869548798, "learning_rate": 4.219244717583444e-06, "loss": 0.8951, "step": 107860 }, { "epoch": 0.7808349077432011, "grad_norm": 0.14952528476715088, "learning_rate": 4.219172330922858e-06, "loss": 0.8886, "step": 107870 }, { "epoch": 0.7809072944037873, "grad_norm": 0.1723993569612503, "learning_rate": 4.219099944262272e-06, "loss": 0.8807, "step": 107880 }, { "epoch": 0.7809796810643734, "grad_norm": 0.1505005955696106, "learning_rate": 4.219027557601685e-06, "loss": 0.8711, "step": 107890 }, { "epoch": 0.7810520677249596, "grad_norm": 0.15281620621681213, "learning_rate": 4.218955170941099e-06, "loss": 0.887, "step": 107900 }, { "epoch": 0.7811244543855458, "grad_norm": 0.1522083282470703, "learning_rate": 4.218882784280513e-06, "loss": 0.8973, "step": 107910 }, { "epoch": 0.7811968410461321, "grad_norm": 0.16179485619068146, "learning_rate": 4.218810397619927e-06, "loss": 0.8856, "step": 107920 }, { "epoch": 0.7812692277067183, "grad_norm": 0.15184660255908966, "learning_rate": 4.218738010959341e-06, "loss": 0.8856, "step": 107930 }, { "epoch": 0.7813416143673044, "grad_norm": 0.19640418887138367, "learning_rate": 4.218665624298754e-06, "loss": 0.8837, "step": 107940 }, { "epoch": 0.7814140010278906, "grad_norm": 0.16016776859760284, "learning_rate": 4.218593237638169e-06, "loss": 0.887, "step": 107950 }, { "epoch": 0.7814863876884768, "grad_norm": 0.1551421880722046, "learning_rate": 4.218520850977582e-06, "loss": 0.8665, "step": 107960 }, { "epoch": 0.781558774349063, "grad_norm": 0.16472944617271423, "learning_rate": 4.218448464316996e-06, "loss": 0.8814, "step": 107970 }, { "epoch": 0.7816311610096491, "grad_norm": 0.1580583155155182, "learning_rate": 4.2183760776564095e-06, "loss": 0.8823, "step": 107980 }, { "epoch": 0.7817035476702353, "grad_norm": 0.1603555530309677, "learning_rate": 4.218303690995824e-06, "loss": 0.872, "step": 107990 }, { "epoch": 0.7817759343308215, "grad_norm": 0.1556985229253769, "learning_rate": 4.218231304335238e-06, "loss": 0.8787, "step": 108000 }, { "epoch": 0.7818483209914077, "grad_norm": 0.1534159928560257, "learning_rate": 4.218158917674651e-06, "loss": 0.9064, "step": 108010 }, { "epoch": 0.7819207076519938, "grad_norm": 0.1716359406709671, "learning_rate": 4.218086531014065e-06, "loss": 0.8834, "step": 108020 }, { "epoch": 0.7819930943125801, "grad_norm": 0.15171417593955994, "learning_rate": 4.218014144353479e-06, "loss": 0.8825, "step": 108030 }, { "epoch": 0.7820654809731663, "grad_norm": 0.1647900491952896, "learning_rate": 4.217941757692893e-06, "loss": 0.8718, "step": 108040 }, { "epoch": 0.7821378676337525, "grad_norm": 0.1467769294977188, "learning_rate": 4.2178693710323065e-06, "loss": 0.8832, "step": 108050 }, { "epoch": 0.7822102542943387, "grad_norm": 0.15823620557785034, "learning_rate": 4.21779698437172e-06, "loss": 0.8914, "step": 108060 }, { "epoch": 0.7822826409549248, "grad_norm": 0.17605745792388916, "learning_rate": 4.217724597711134e-06, "loss": 0.8871, "step": 108070 }, { "epoch": 0.782355027615511, "grad_norm": 0.15321215987205505, "learning_rate": 4.217652211050548e-06, "loss": 0.8906, "step": 108080 }, { "epoch": 0.7824274142760972, "grad_norm": 0.15181642770767212, "learning_rate": 4.217579824389962e-06, "loss": 0.8662, "step": 108090 }, { "epoch": 0.7824998009366834, "grad_norm": 0.17298097908496857, "learning_rate": 4.2175074377293755e-06, "loss": 0.8795, "step": 108100 }, { "epoch": 0.7825721875972695, "grad_norm": 0.1724858433008194, "learning_rate": 4.217435051068789e-06, "loss": 0.9011, "step": 108110 }, { "epoch": 0.7826445742578557, "grad_norm": 0.1518513262271881, "learning_rate": 4.2173626644082035e-06, "loss": 0.8938, "step": 108120 }, { "epoch": 0.7827169609184419, "grad_norm": 0.15819215774536133, "learning_rate": 4.217290277747617e-06, "loss": 0.8725, "step": 108130 }, { "epoch": 0.7827893475790282, "grad_norm": 0.1756516396999359, "learning_rate": 4.217217891087031e-06, "loss": 0.8867, "step": 108140 }, { "epoch": 0.7828617342396144, "grad_norm": 0.1819458156824112, "learning_rate": 4.217145504426444e-06, "loss": 0.8883, "step": 108150 }, { "epoch": 0.7829341209002005, "grad_norm": 0.14967991411685944, "learning_rate": 4.217073117765859e-06, "loss": 0.8814, "step": 108160 }, { "epoch": 0.7830065075607867, "grad_norm": 0.15189316868782043, "learning_rate": 4.2170007311052725e-06, "loss": 0.8822, "step": 108170 }, { "epoch": 0.7830788942213729, "grad_norm": 0.159009650349617, "learning_rate": 4.216928344444686e-06, "loss": 0.8774, "step": 108180 }, { "epoch": 0.7831512808819591, "grad_norm": 0.15388379991054535, "learning_rate": 4.2168559577841e-06, "loss": 0.8821, "step": 108190 }, { "epoch": 0.7832236675425452, "grad_norm": 0.1746244877576828, "learning_rate": 4.216783571123514e-06, "loss": 0.8937, "step": 108200 }, { "epoch": 0.7832960542031314, "grad_norm": 0.1544540673494339, "learning_rate": 4.216711184462928e-06, "loss": 0.8892, "step": 108210 }, { "epoch": 0.7833684408637176, "grad_norm": 0.15060152113437653, "learning_rate": 4.216638797802341e-06, "loss": 0.8786, "step": 108220 }, { "epoch": 0.7834408275243038, "grad_norm": 0.15450206398963928, "learning_rate": 4.216566411141755e-06, "loss": 0.8823, "step": 108230 }, { "epoch": 0.7835132141848901, "grad_norm": 0.1713588386774063, "learning_rate": 4.2164940244811695e-06, "loss": 0.896, "step": 108240 }, { "epoch": 0.7835856008454762, "grad_norm": 0.1668989658355713, "learning_rate": 4.216421637820583e-06, "loss": 0.8828, "step": 108250 }, { "epoch": 0.7836579875060624, "grad_norm": 0.1604335904121399, "learning_rate": 4.216349251159997e-06, "loss": 0.8747, "step": 108260 }, { "epoch": 0.7837303741666486, "grad_norm": 0.1643938422203064, "learning_rate": 4.21627686449941e-06, "loss": 0.8799, "step": 108270 }, { "epoch": 0.7838027608272348, "grad_norm": 0.1590445637702942, "learning_rate": 4.216204477838825e-06, "loss": 0.8859, "step": 108280 }, { "epoch": 0.783875147487821, "grad_norm": 0.16419881582260132, "learning_rate": 4.216132091178238e-06, "loss": 0.8834, "step": 108290 }, { "epoch": 0.7839475341484071, "grad_norm": 0.14862844347953796, "learning_rate": 4.216059704517651e-06, "loss": 0.8793, "step": 108300 }, { "epoch": 0.7840199208089933, "grad_norm": 0.1492660492658615, "learning_rate": 4.215987317857066e-06, "loss": 0.8808, "step": 108310 }, { "epoch": 0.7840923074695795, "grad_norm": 0.14114709198474884, "learning_rate": 4.215914931196479e-06, "loss": 0.865, "step": 108320 }, { "epoch": 0.7841646941301657, "grad_norm": 0.16671665012836456, "learning_rate": 4.215842544535893e-06, "loss": 0.8726, "step": 108330 }, { "epoch": 0.7842370807907518, "grad_norm": 0.1602155566215515, "learning_rate": 4.2157701578753065e-06, "loss": 0.8726, "step": 108340 }, { "epoch": 0.7843094674513381, "grad_norm": 0.1590067595243454, "learning_rate": 4.215697771214721e-06, "loss": 0.8723, "step": 108350 }, { "epoch": 0.7843818541119243, "grad_norm": 0.16449514031410217, "learning_rate": 4.2156253845541346e-06, "loss": 0.8795, "step": 108360 }, { "epoch": 0.7844542407725105, "grad_norm": 0.16773192584514618, "learning_rate": 4.215552997893548e-06, "loss": 0.8772, "step": 108370 }, { "epoch": 0.7845266274330966, "grad_norm": 0.15345753729343414, "learning_rate": 4.215480611232962e-06, "loss": 0.8893, "step": 108380 }, { "epoch": 0.7845990140936828, "grad_norm": 0.1570970118045807, "learning_rate": 4.215408224572376e-06, "loss": 0.8911, "step": 108390 }, { "epoch": 0.784671400754269, "grad_norm": 0.480564683675766, "learning_rate": 4.21533583791179e-06, "loss": 0.881, "step": 108400 }, { "epoch": 0.7847437874148552, "grad_norm": 0.1536029726266861, "learning_rate": 4.2152634512512035e-06, "loss": 0.8657, "step": 108410 }, { "epoch": 0.7848161740754414, "grad_norm": 0.15583275258541107, "learning_rate": 4.215191064590617e-06, "loss": 0.8856, "step": 108420 }, { "epoch": 0.7848885607360275, "grad_norm": 0.17179587483406067, "learning_rate": 4.2151186779300316e-06, "loss": 0.8915, "step": 108430 }, { "epoch": 0.7849609473966137, "grad_norm": 0.16397567093372345, "learning_rate": 4.215046291269445e-06, "loss": 0.87, "step": 108440 }, { "epoch": 0.7850333340572, "grad_norm": 0.1486414670944214, "learning_rate": 4.214973904608859e-06, "loss": 0.8715, "step": 108450 }, { "epoch": 0.7851057207177862, "grad_norm": 0.14893367886543274, "learning_rate": 4.214901517948272e-06, "loss": 0.88, "step": 108460 }, { "epoch": 0.7851781073783723, "grad_norm": 0.15935064852237701, "learning_rate": 4.214829131287687e-06, "loss": 0.8744, "step": 108470 }, { "epoch": 0.7852504940389585, "grad_norm": 0.16078276932239532, "learning_rate": 4.2147567446271005e-06, "loss": 0.8863, "step": 108480 }, { "epoch": 0.7853228806995447, "grad_norm": 0.16032767295837402, "learning_rate": 4.214684357966514e-06, "loss": 0.8813, "step": 108490 }, { "epoch": 0.7853952673601309, "grad_norm": 0.2287614494562149, "learning_rate": 4.214611971305928e-06, "loss": 0.8818, "step": 108500 }, { "epoch": 0.785467654020717, "grad_norm": 0.16381344199180603, "learning_rate": 4.214539584645342e-06, "loss": 0.884, "step": 108510 }, { "epoch": 0.7855400406813032, "grad_norm": 0.16530375182628632, "learning_rate": 4.214467197984756e-06, "loss": 0.8746, "step": 108520 }, { "epoch": 0.7856124273418894, "grad_norm": 0.14450208842754364, "learning_rate": 4.2143948113241694e-06, "loss": 0.8855, "step": 108530 }, { "epoch": 0.7856848140024756, "grad_norm": 0.16523830592632294, "learning_rate": 4.214322424663583e-06, "loss": 0.8822, "step": 108540 }, { "epoch": 0.7857572006630618, "grad_norm": 0.175279900431633, "learning_rate": 4.2142500380029975e-06, "loss": 0.8739, "step": 108550 }, { "epoch": 0.785829587323648, "grad_norm": 0.16809509694576263, "learning_rate": 4.214177651342411e-06, "loss": 0.8929, "step": 108560 }, { "epoch": 0.7859019739842342, "grad_norm": 0.14289383590221405, "learning_rate": 4.214105264681825e-06, "loss": 0.8728, "step": 108570 }, { "epoch": 0.7859743606448204, "grad_norm": 0.14453014731407166, "learning_rate": 4.214032878021238e-06, "loss": 0.8962, "step": 108580 }, { "epoch": 0.7860467473054066, "grad_norm": 0.1566169410943985, "learning_rate": 4.213960491360653e-06, "loss": 0.8758, "step": 108590 }, { "epoch": 0.7861191339659928, "grad_norm": 0.1643407642841339, "learning_rate": 4.2138881047000664e-06, "loss": 0.882, "step": 108600 }, { "epoch": 0.7861915206265789, "grad_norm": 0.16235338151454926, "learning_rate": 4.21381571803948e-06, "loss": 0.8701, "step": 108610 }, { "epoch": 0.7862639072871651, "grad_norm": 0.153314471244812, "learning_rate": 4.213743331378894e-06, "loss": 0.8827, "step": 108620 }, { "epoch": 0.7863362939477513, "grad_norm": 0.15935984253883362, "learning_rate": 4.213670944718308e-06, "loss": 0.8793, "step": 108630 }, { "epoch": 0.7864086806083375, "grad_norm": 0.15511925518512726, "learning_rate": 4.213598558057722e-06, "loss": 0.8916, "step": 108640 }, { "epoch": 0.7864810672689236, "grad_norm": 0.15609268844127655, "learning_rate": 4.213526171397135e-06, "loss": 0.8937, "step": 108650 }, { "epoch": 0.7865534539295098, "grad_norm": 0.14593185484409332, "learning_rate": 4.213453784736549e-06, "loss": 0.8766, "step": 108660 }, { "epoch": 0.7866258405900961, "grad_norm": 0.16142848134040833, "learning_rate": 4.2133813980759634e-06, "loss": 0.8711, "step": 108670 }, { "epoch": 0.7866982272506823, "grad_norm": 0.15131999552249908, "learning_rate": 4.213309011415377e-06, "loss": 0.8837, "step": 108680 }, { "epoch": 0.7867706139112685, "grad_norm": 0.16211840510368347, "learning_rate": 4.213236624754791e-06, "loss": 0.8916, "step": 108690 }, { "epoch": 0.7868430005718546, "grad_norm": 0.16512945294380188, "learning_rate": 4.213164238094204e-06, "loss": 0.8844, "step": 108700 }, { "epoch": 0.7869153872324408, "grad_norm": 0.15159179270267487, "learning_rate": 4.213091851433618e-06, "loss": 0.8814, "step": 108710 }, { "epoch": 0.786987773893027, "grad_norm": 0.17902809381484985, "learning_rate": 4.213019464773032e-06, "loss": 0.8774, "step": 108720 }, { "epoch": 0.7870601605536132, "grad_norm": 0.16519932448863983, "learning_rate": 4.212947078112446e-06, "loss": 0.8849, "step": 108730 }, { "epoch": 0.7871325472141993, "grad_norm": 0.16074302792549133, "learning_rate": 4.21287469145186e-06, "loss": 0.8875, "step": 108740 }, { "epoch": 0.7872049338747855, "grad_norm": 0.1652543991804123, "learning_rate": 4.212802304791273e-06, "loss": 0.8727, "step": 108750 }, { "epoch": 0.7872773205353717, "grad_norm": 0.17259052395820618, "learning_rate": 4.212729918130688e-06, "loss": 0.8849, "step": 108760 }, { "epoch": 0.787349707195958, "grad_norm": 0.17187197506427765, "learning_rate": 4.212657531470101e-06, "loss": 0.8853, "step": 108770 }, { "epoch": 0.7874220938565442, "grad_norm": 0.1663118600845337, "learning_rate": 4.212585144809515e-06, "loss": 0.8764, "step": 108780 }, { "epoch": 0.7874944805171303, "grad_norm": 0.15814106166362762, "learning_rate": 4.2125127581489285e-06, "loss": 0.8648, "step": 108790 }, { "epoch": 0.7875668671777165, "grad_norm": 0.17825469374656677, "learning_rate": 4.212440371488343e-06, "loss": 0.8818, "step": 108800 }, { "epoch": 0.7876392538383027, "grad_norm": 0.191095769405365, "learning_rate": 4.212367984827757e-06, "loss": 0.8861, "step": 108810 }, { "epoch": 0.7877116404988889, "grad_norm": 0.16488607227802277, "learning_rate": 4.21229559816717e-06, "loss": 0.8915, "step": 108820 }, { "epoch": 0.787784027159475, "grad_norm": 0.15883904695510864, "learning_rate": 4.212223211506584e-06, "loss": 0.8806, "step": 108830 }, { "epoch": 0.7878564138200612, "grad_norm": 0.1884288191795349, "learning_rate": 4.2121508248459975e-06, "loss": 0.8955, "step": 108840 }, { "epoch": 0.7879288004806474, "grad_norm": 0.15324948728084564, "learning_rate": 4.212078438185411e-06, "loss": 0.8832, "step": 108850 }, { "epoch": 0.7880011871412336, "grad_norm": 0.16065652668476105, "learning_rate": 4.2120060515248255e-06, "loss": 0.8755, "step": 108860 }, { "epoch": 0.7880735738018197, "grad_norm": 0.15885667502880096, "learning_rate": 4.211933664864239e-06, "loss": 0.8754, "step": 108870 }, { "epoch": 0.788145960462406, "grad_norm": 0.1391364336013794, "learning_rate": 4.211861278203653e-06, "loss": 0.8806, "step": 108880 }, { "epoch": 0.7882183471229922, "grad_norm": 0.15506839752197266, "learning_rate": 4.211788891543066e-06, "loss": 0.8831, "step": 108890 }, { "epoch": 0.7882907337835784, "grad_norm": 0.15888062119483948, "learning_rate": 4.21171650488248e-06, "loss": 0.8819, "step": 108900 }, { "epoch": 0.7883631204441646, "grad_norm": 0.1553460657596588, "learning_rate": 4.2116441182218945e-06, "loss": 0.8916, "step": 108910 }, { "epoch": 0.7884355071047507, "grad_norm": 0.14751103520393372, "learning_rate": 4.211571731561308e-06, "loss": 0.876, "step": 108920 }, { "epoch": 0.7885078937653369, "grad_norm": 0.17180080711841583, "learning_rate": 4.211499344900722e-06, "loss": 0.8759, "step": 108930 }, { "epoch": 0.7885802804259231, "grad_norm": 0.15554192662239075, "learning_rate": 4.211426958240135e-06, "loss": 0.8813, "step": 108940 }, { "epoch": 0.7886526670865093, "grad_norm": 0.1551918089389801, "learning_rate": 4.21135457157955e-06, "loss": 0.884, "step": 108950 }, { "epoch": 0.7887250537470955, "grad_norm": 0.17789483070373535, "learning_rate": 4.211282184918963e-06, "loss": 0.8821, "step": 108960 }, { "epoch": 0.7887974404076816, "grad_norm": 0.16537193953990936, "learning_rate": 4.211209798258377e-06, "loss": 0.8805, "step": 108970 }, { "epoch": 0.7888698270682679, "grad_norm": 0.15402130782604218, "learning_rate": 4.211137411597791e-06, "loss": 0.8804, "step": 108980 }, { "epoch": 0.7889422137288541, "grad_norm": 0.15392087399959564, "learning_rate": 4.211065024937205e-06, "loss": 0.8975, "step": 108990 }, { "epoch": 0.7890146003894403, "grad_norm": 0.1604469269514084, "learning_rate": 4.210992638276619e-06, "loss": 0.8778, "step": 109000 }, { "epoch": 0.7890869870500264, "grad_norm": 0.14916957914829254, "learning_rate": 4.210920251616032e-06, "loss": 0.8859, "step": 109010 }, { "epoch": 0.7891593737106126, "grad_norm": 0.15784524381160736, "learning_rate": 4.210847864955446e-06, "loss": 0.8833, "step": 109020 }, { "epoch": 0.7892317603711988, "grad_norm": 0.26077041029930115, "learning_rate": 4.21077547829486e-06, "loss": 0.8962, "step": 109030 }, { "epoch": 0.789304147031785, "grad_norm": 0.16116316616535187, "learning_rate": 4.210703091634274e-06, "loss": 0.8888, "step": 109040 }, { "epoch": 0.7893765336923712, "grad_norm": 0.24238155782222748, "learning_rate": 4.210630704973688e-06, "loss": 0.8827, "step": 109050 }, { "epoch": 0.7894489203529573, "grad_norm": 0.14621655642986298, "learning_rate": 4.210558318313101e-06, "loss": 0.8893, "step": 109060 }, { "epoch": 0.7895213070135435, "grad_norm": 0.2018691748380661, "learning_rate": 4.210485931652516e-06, "loss": 0.8831, "step": 109070 }, { "epoch": 0.7895936936741297, "grad_norm": 0.148194819688797, "learning_rate": 4.210413544991929e-06, "loss": 0.876, "step": 109080 }, { "epoch": 0.789666080334716, "grad_norm": 0.15002113580703735, "learning_rate": 4.210341158331343e-06, "loss": 0.8754, "step": 109090 }, { "epoch": 0.7897384669953021, "grad_norm": 0.15236304700374603, "learning_rate": 4.2102687716707566e-06, "loss": 0.8849, "step": 109100 }, { "epoch": 0.7898108536558883, "grad_norm": 0.17015591263771057, "learning_rate": 4.210196385010171e-06, "loss": 0.8818, "step": 109110 }, { "epoch": 0.7898832403164745, "grad_norm": 0.16506314277648926, "learning_rate": 4.210123998349585e-06, "loss": 0.8789, "step": 109120 }, { "epoch": 0.7899556269770607, "grad_norm": 0.16899830102920532, "learning_rate": 4.210051611688998e-06, "loss": 0.8763, "step": 109130 }, { "epoch": 0.7900280136376469, "grad_norm": 0.16103564202785492, "learning_rate": 4.209979225028412e-06, "loss": 0.8816, "step": 109140 }, { "epoch": 0.790100400298233, "grad_norm": 0.15541161596775055, "learning_rate": 4.209906838367826e-06, "loss": 0.8864, "step": 109150 }, { "epoch": 0.7901727869588192, "grad_norm": 0.15353739261627197, "learning_rate": 4.20983445170724e-06, "loss": 0.8881, "step": 109160 }, { "epoch": 0.7902451736194054, "grad_norm": 0.1619664430618286, "learning_rate": 4.2097620650466536e-06, "loss": 0.8927, "step": 109170 }, { "epoch": 0.7903175602799916, "grad_norm": 0.15257126092910767, "learning_rate": 4.209689678386067e-06, "loss": 0.8725, "step": 109180 }, { "epoch": 0.7903899469405777, "grad_norm": 0.1462344229221344, "learning_rate": 4.209617291725482e-06, "loss": 0.8893, "step": 109190 }, { "epoch": 0.790462333601164, "grad_norm": 0.15440794825553894, "learning_rate": 4.209544905064895e-06, "loss": 0.8796, "step": 109200 }, { "epoch": 0.7905347202617502, "grad_norm": 0.1578925997018814, "learning_rate": 4.209472518404309e-06, "loss": 0.8796, "step": 109210 }, { "epoch": 0.7906071069223364, "grad_norm": 0.1685589849948883, "learning_rate": 4.2094001317437225e-06, "loss": 0.8761, "step": 109220 }, { "epoch": 0.7906794935829226, "grad_norm": 0.16151396930217743, "learning_rate": 4.209327745083137e-06, "loss": 0.8849, "step": 109230 }, { "epoch": 0.7907518802435087, "grad_norm": 0.16625353693962097, "learning_rate": 4.2092553584225506e-06, "loss": 0.8767, "step": 109240 }, { "epoch": 0.7908242669040949, "grad_norm": 0.16339325904846191, "learning_rate": 4.209182971761964e-06, "loss": 0.8856, "step": 109250 }, { "epoch": 0.7908966535646811, "grad_norm": 0.16669736802577972, "learning_rate": 4.209110585101378e-06, "loss": 0.887, "step": 109260 }, { "epoch": 0.7909690402252673, "grad_norm": 0.15320435166358948, "learning_rate": 4.209038198440792e-06, "loss": 0.8849, "step": 109270 }, { "epoch": 0.7910414268858534, "grad_norm": 0.1580091118812561, "learning_rate": 4.208965811780206e-06, "loss": 0.8786, "step": 109280 }, { "epoch": 0.7911138135464396, "grad_norm": 0.15802930295467377, "learning_rate": 4.2088934251196195e-06, "loss": 0.8855, "step": 109290 }, { "epoch": 0.7911862002070259, "grad_norm": 0.1613604873418808, "learning_rate": 4.208821038459033e-06, "loss": 0.8679, "step": 109300 }, { "epoch": 0.7912585868676121, "grad_norm": 0.17970921099185944, "learning_rate": 4.208748651798448e-06, "loss": 0.8885, "step": 109310 }, { "epoch": 0.7913309735281983, "grad_norm": 0.18362759053707123, "learning_rate": 4.208676265137861e-06, "loss": 0.8811, "step": 109320 }, { "epoch": 0.7914033601887844, "grad_norm": 0.16677528619766235, "learning_rate": 4.208603878477275e-06, "loss": 0.8826, "step": 109330 }, { "epoch": 0.7914757468493706, "grad_norm": 0.17493285238742828, "learning_rate": 4.2085314918166884e-06, "loss": 0.873, "step": 109340 }, { "epoch": 0.7915481335099568, "grad_norm": 0.16501615941524506, "learning_rate": 4.208459105156102e-06, "loss": 0.8832, "step": 109350 }, { "epoch": 0.791620520170543, "grad_norm": 0.15602637827396393, "learning_rate": 4.208386718495516e-06, "loss": 0.8867, "step": 109360 }, { "epoch": 0.7916929068311291, "grad_norm": 0.1481214165687561, "learning_rate": 4.208314331834929e-06, "loss": 0.8815, "step": 109370 }, { "epoch": 0.7917652934917153, "grad_norm": 0.1560535579919815, "learning_rate": 4.208241945174344e-06, "loss": 0.8858, "step": 109380 }, { "epoch": 0.7918376801523015, "grad_norm": 0.21015238761901855, "learning_rate": 4.208169558513757e-06, "loss": 0.8827, "step": 109390 }, { "epoch": 0.7919100668128877, "grad_norm": 0.16126051545143127, "learning_rate": 4.208097171853171e-06, "loss": 0.8978, "step": 109400 }, { "epoch": 0.791982453473474, "grad_norm": 0.15194587409496307, "learning_rate": 4.208024785192585e-06, "loss": 0.8785, "step": 109410 }, { "epoch": 0.7920548401340601, "grad_norm": 0.1646006554365158, "learning_rate": 4.207952398531999e-06, "loss": 0.88, "step": 109420 }, { "epoch": 0.7921272267946463, "grad_norm": 0.21086090803146362, "learning_rate": 4.207880011871413e-06, "loss": 0.8909, "step": 109430 }, { "epoch": 0.7921996134552325, "grad_norm": 0.1583826094865799, "learning_rate": 4.207807625210826e-06, "loss": 0.8872, "step": 109440 }, { "epoch": 0.7922720001158187, "grad_norm": 0.15756238996982574, "learning_rate": 4.20773523855024e-06, "loss": 0.8844, "step": 109450 }, { "epoch": 0.7923443867764048, "grad_norm": 0.1494230180978775, "learning_rate": 4.207662851889654e-06, "loss": 0.8814, "step": 109460 }, { "epoch": 0.792416773436991, "grad_norm": 0.17796126008033752, "learning_rate": 4.207590465229068e-06, "loss": 0.8824, "step": 109470 }, { "epoch": 0.7924891600975772, "grad_norm": 0.16645440459251404, "learning_rate": 4.207518078568482e-06, "loss": 0.892, "step": 109480 }, { "epoch": 0.7925615467581634, "grad_norm": 0.16064368188381195, "learning_rate": 4.207445691907895e-06, "loss": 0.8843, "step": 109490 }, { "epoch": 0.7926339334187495, "grad_norm": 0.18398964405059814, "learning_rate": 4.207373305247309e-06, "loss": 0.8878, "step": 109500 }, { "epoch": 0.7927063200793357, "grad_norm": 0.14704552292823792, "learning_rate": 4.207300918586723e-06, "loss": 0.8858, "step": 109510 }, { "epoch": 0.792778706739922, "grad_norm": 0.15785852074623108, "learning_rate": 4.207228531926137e-06, "loss": 0.8876, "step": 109520 }, { "epoch": 0.7928510934005082, "grad_norm": 0.16036728024482727, "learning_rate": 4.2071561452655505e-06, "loss": 0.8926, "step": 109530 }, { "epoch": 0.7929234800610944, "grad_norm": 0.1538962423801422, "learning_rate": 4.207083758604964e-06, "loss": 0.89, "step": 109540 }, { "epoch": 0.7929958667216805, "grad_norm": 0.15451432764530182, "learning_rate": 4.207011371944379e-06, "loss": 0.8863, "step": 109550 }, { "epoch": 0.7930682533822667, "grad_norm": 0.1714572012424469, "learning_rate": 4.206938985283792e-06, "loss": 0.8779, "step": 109560 }, { "epoch": 0.7931406400428529, "grad_norm": 0.15013469755649567, "learning_rate": 4.206866598623206e-06, "loss": 0.8753, "step": 109570 }, { "epoch": 0.7932130267034391, "grad_norm": 0.15864507853984833, "learning_rate": 4.2067942119626195e-06, "loss": 0.8817, "step": 109580 }, { "epoch": 0.7932854133640252, "grad_norm": 0.18490475416183472, "learning_rate": 4.206721825302034e-06, "loss": 0.8838, "step": 109590 }, { "epoch": 0.7933578000246114, "grad_norm": 0.15001821517944336, "learning_rate": 4.2066494386414475e-06, "loss": 0.8807, "step": 109600 }, { "epoch": 0.7934301866851976, "grad_norm": 0.1856260895729065, "learning_rate": 4.206577051980861e-06, "loss": 0.8846, "step": 109610 }, { "epoch": 0.7935025733457839, "grad_norm": 0.14746098220348358, "learning_rate": 4.206504665320275e-06, "loss": 0.8684, "step": 109620 }, { "epoch": 0.7935749600063701, "grad_norm": 0.16342267394065857, "learning_rate": 4.206432278659689e-06, "loss": 0.8689, "step": 109630 }, { "epoch": 0.7936473466669562, "grad_norm": 0.15328651666641235, "learning_rate": 4.206359891999103e-06, "loss": 0.8678, "step": 109640 }, { "epoch": 0.7937197333275424, "grad_norm": 0.164528951048851, "learning_rate": 4.2062875053385165e-06, "loss": 0.8783, "step": 109650 }, { "epoch": 0.7937921199881286, "grad_norm": 0.1499549150466919, "learning_rate": 4.20621511867793e-06, "loss": 0.8813, "step": 109660 }, { "epoch": 0.7938645066487148, "grad_norm": 0.14804977178573608, "learning_rate": 4.2061427320173445e-06, "loss": 0.8748, "step": 109670 }, { "epoch": 0.793936893309301, "grad_norm": 0.1729278266429901, "learning_rate": 4.206070345356758e-06, "loss": 0.8744, "step": 109680 }, { "epoch": 0.7940092799698871, "grad_norm": 0.15878424048423767, "learning_rate": 4.205997958696172e-06, "loss": 0.8887, "step": 109690 }, { "epoch": 0.7940816666304733, "grad_norm": 0.15942630171775818, "learning_rate": 4.205925572035585e-06, "loss": 0.8915, "step": 109700 }, { "epoch": 0.7941540532910595, "grad_norm": 0.16669262945652008, "learning_rate": 4.205853185375e-06, "loss": 0.8915, "step": 109710 }, { "epoch": 0.7942264399516457, "grad_norm": 0.1515069305896759, "learning_rate": 4.2057807987144135e-06, "loss": 0.8784, "step": 109720 }, { "epoch": 0.794298826612232, "grad_norm": 0.1678621619939804, "learning_rate": 4.205708412053827e-06, "loss": 0.8901, "step": 109730 }, { "epoch": 0.7943712132728181, "grad_norm": 0.17552782595157623, "learning_rate": 4.205636025393241e-06, "loss": 0.8664, "step": 109740 }, { "epoch": 0.7944435999334043, "grad_norm": 0.1632026731967926, "learning_rate": 4.205563638732655e-06, "loss": 0.8854, "step": 109750 }, { "epoch": 0.7945159865939905, "grad_norm": 0.15322089195251465, "learning_rate": 4.205491252072069e-06, "loss": 0.8788, "step": 109760 }, { "epoch": 0.7945883732545767, "grad_norm": 0.1526355892419815, "learning_rate": 4.205418865411482e-06, "loss": 0.8773, "step": 109770 }, { "epoch": 0.7946607599151628, "grad_norm": 0.1596207320690155, "learning_rate": 4.205346478750896e-06, "loss": 0.8807, "step": 109780 }, { "epoch": 0.794733146575749, "grad_norm": 0.16599130630493164, "learning_rate": 4.2052740920903105e-06, "loss": 0.8716, "step": 109790 }, { "epoch": 0.7948055332363352, "grad_norm": 0.1544189453125, "learning_rate": 4.205201705429724e-06, "loss": 0.8769, "step": 109800 }, { "epoch": 0.7948779198969214, "grad_norm": 0.1597534716129303, "learning_rate": 4.205129318769138e-06, "loss": 0.8723, "step": 109810 }, { "epoch": 0.7949503065575075, "grad_norm": 0.15642835199832916, "learning_rate": 4.205056932108551e-06, "loss": 0.8835, "step": 109820 }, { "epoch": 0.7950226932180938, "grad_norm": 0.16617697477340698, "learning_rate": 4.204984545447966e-06, "loss": 0.8745, "step": 109830 }, { "epoch": 0.79509507987868, "grad_norm": 0.15826547145843506, "learning_rate": 4.204912158787379e-06, "loss": 0.8885, "step": 109840 }, { "epoch": 0.7951674665392662, "grad_norm": 0.16026552021503448, "learning_rate": 4.204839772126793e-06, "loss": 0.8901, "step": 109850 }, { "epoch": 0.7952398531998524, "grad_norm": 0.16128748655319214, "learning_rate": 4.204767385466207e-06, "loss": 0.8934, "step": 109860 }, { "epoch": 0.7953122398604385, "grad_norm": 0.1773352324962616, "learning_rate": 4.204694998805621e-06, "loss": 0.8806, "step": 109870 }, { "epoch": 0.7953846265210247, "grad_norm": 0.16145089268684387, "learning_rate": 4.204622612145035e-06, "loss": 0.8779, "step": 109880 }, { "epoch": 0.7954570131816109, "grad_norm": 0.14754821360111237, "learning_rate": 4.2045502254844475e-06, "loss": 0.8763, "step": 109890 }, { "epoch": 0.7955293998421971, "grad_norm": 0.15587793290615082, "learning_rate": 4.204477838823862e-06, "loss": 0.8794, "step": 109900 }, { "epoch": 0.7956017865027832, "grad_norm": 0.14746980369091034, "learning_rate": 4.2044054521632756e-06, "loss": 0.887, "step": 109910 }, { "epoch": 0.7956741731633694, "grad_norm": 0.15477393567562103, "learning_rate": 4.204333065502689e-06, "loss": 0.8808, "step": 109920 }, { "epoch": 0.7957465598239556, "grad_norm": 0.14774759113788605, "learning_rate": 4.204260678842103e-06, "loss": 0.8813, "step": 109930 }, { "epoch": 0.7958189464845419, "grad_norm": 0.1507684290409088, "learning_rate": 4.204188292181517e-06, "loss": 0.8908, "step": 109940 }, { "epoch": 0.795891333145128, "grad_norm": 0.15101462602615356, "learning_rate": 4.204115905520931e-06, "loss": 0.8836, "step": 109950 }, { "epoch": 0.7959637198057142, "grad_norm": 0.14433503150939941, "learning_rate": 4.2040435188603445e-06, "loss": 0.8769, "step": 109960 }, { "epoch": 0.7960361064663004, "grad_norm": 0.17462539672851562, "learning_rate": 4.203971132199758e-06, "loss": 0.8851, "step": 109970 }, { "epoch": 0.7961084931268866, "grad_norm": 0.1587734818458557, "learning_rate": 4.2038987455391726e-06, "loss": 0.8821, "step": 109980 }, { "epoch": 0.7961808797874728, "grad_norm": 0.14876559376716614, "learning_rate": 4.203826358878586e-06, "loss": 0.8717, "step": 109990 }, { "epoch": 0.7962532664480589, "grad_norm": 0.1661464273929596, "learning_rate": 4.203753972218e-06, "loss": 0.8884, "step": 110000 }, { "epoch": 0.7963256531086451, "grad_norm": 0.21970897912979126, "learning_rate": 4.203681585557413e-06, "loss": 0.8812, "step": 110010 }, { "epoch": 0.7963980397692313, "grad_norm": 0.15922003984451294, "learning_rate": 4.203609198896828e-06, "loss": 0.8924, "step": 110020 }, { "epoch": 0.7964704264298175, "grad_norm": 0.1996181309223175, "learning_rate": 4.2035368122362415e-06, "loss": 0.8815, "step": 110030 }, { "epoch": 0.7965428130904036, "grad_norm": 0.14531823992729187, "learning_rate": 4.203464425575655e-06, "loss": 0.8849, "step": 110040 }, { "epoch": 0.7966151997509899, "grad_norm": 0.15441548824310303, "learning_rate": 4.203392038915069e-06, "loss": 0.8793, "step": 110050 }, { "epoch": 0.7966875864115761, "grad_norm": 0.15467193722724915, "learning_rate": 4.203319652254483e-06, "loss": 0.8773, "step": 110060 }, { "epoch": 0.7967599730721623, "grad_norm": 0.15833494067192078, "learning_rate": 4.203247265593897e-06, "loss": 0.8748, "step": 110070 }, { "epoch": 0.7968323597327485, "grad_norm": 0.1572531759738922, "learning_rate": 4.2031748789333104e-06, "loss": 0.8769, "step": 110080 }, { "epoch": 0.7969047463933346, "grad_norm": 0.16923896968364716, "learning_rate": 4.203102492272724e-06, "loss": 0.8714, "step": 110090 }, { "epoch": 0.7969771330539208, "grad_norm": 0.15771649777889252, "learning_rate": 4.2030301056121385e-06, "loss": 0.8861, "step": 110100 }, { "epoch": 0.797049519714507, "grad_norm": 0.1439589411020279, "learning_rate": 4.202957718951552e-06, "loss": 0.8705, "step": 110110 }, { "epoch": 0.7971219063750932, "grad_norm": 0.14609919488430023, "learning_rate": 4.202885332290966e-06, "loss": 0.883, "step": 110120 }, { "epoch": 0.7971942930356793, "grad_norm": 0.15220913290977478, "learning_rate": 4.202812945630379e-06, "loss": 0.889, "step": 110130 }, { "epoch": 0.7972666796962655, "grad_norm": 0.16518516838550568, "learning_rate": 4.202740558969793e-06, "loss": 0.8745, "step": 110140 }, { "epoch": 0.7973390663568518, "grad_norm": 0.15049389004707336, "learning_rate": 4.2026681723092074e-06, "loss": 0.8761, "step": 110150 }, { "epoch": 0.797411453017438, "grad_norm": 0.13817547261714935, "learning_rate": 4.202595785648621e-06, "loss": 0.8687, "step": 110160 }, { "epoch": 0.7974838396780242, "grad_norm": 0.15669967234134674, "learning_rate": 4.202523398988035e-06, "loss": 0.8928, "step": 110170 }, { "epoch": 0.7975562263386103, "grad_norm": 0.15119336545467377, "learning_rate": 4.202451012327448e-06, "loss": 0.8827, "step": 110180 }, { "epoch": 0.7976286129991965, "grad_norm": 0.14785991609096527, "learning_rate": 4.202378625666863e-06, "loss": 0.8917, "step": 110190 }, { "epoch": 0.7977009996597827, "grad_norm": 0.15478040277957916, "learning_rate": 4.202306239006276e-06, "loss": 0.873, "step": 110200 }, { "epoch": 0.7977733863203689, "grad_norm": 0.17014062404632568, "learning_rate": 4.20223385234569e-06, "loss": 0.885, "step": 110210 }, { "epoch": 0.797845772980955, "grad_norm": 0.1526460498571396, "learning_rate": 4.202161465685104e-06, "loss": 0.8793, "step": 110220 }, { "epoch": 0.7979181596415412, "grad_norm": 0.15299007296562195, "learning_rate": 4.202089079024518e-06, "loss": 0.8705, "step": 110230 }, { "epoch": 0.7979905463021274, "grad_norm": 0.1601126492023468, "learning_rate": 4.202016692363932e-06, "loss": 0.8854, "step": 110240 }, { "epoch": 0.7980629329627136, "grad_norm": 0.14891035854816437, "learning_rate": 4.201944305703345e-06, "loss": 0.8736, "step": 110250 }, { "epoch": 0.7981353196232999, "grad_norm": 0.17252424359321594, "learning_rate": 4.201871919042759e-06, "loss": 0.8858, "step": 110260 }, { "epoch": 0.798207706283886, "grad_norm": 0.16554145514965057, "learning_rate": 4.201799532382173e-06, "loss": 0.8679, "step": 110270 }, { "epoch": 0.7982800929444722, "grad_norm": 0.17143869400024414, "learning_rate": 4.201727145721587e-06, "loss": 0.8895, "step": 110280 }, { "epoch": 0.7983524796050584, "grad_norm": 0.14797469973564148, "learning_rate": 4.201654759061001e-06, "loss": 0.8776, "step": 110290 }, { "epoch": 0.7984248662656446, "grad_norm": 0.15571478009223938, "learning_rate": 4.201582372400414e-06, "loss": 0.8765, "step": 110300 }, { "epoch": 0.7984972529262307, "grad_norm": 0.1487504541873932, "learning_rate": 4.201509985739829e-06, "loss": 0.8822, "step": 110310 }, { "epoch": 0.7985696395868169, "grad_norm": 0.15721207857131958, "learning_rate": 4.201437599079242e-06, "loss": 0.8609, "step": 110320 }, { "epoch": 0.7986420262474031, "grad_norm": 0.14724335074424744, "learning_rate": 4.201365212418656e-06, "loss": 0.8808, "step": 110330 }, { "epoch": 0.7987144129079893, "grad_norm": 0.1642737239599228, "learning_rate": 4.2012928257580695e-06, "loss": 0.8882, "step": 110340 }, { "epoch": 0.7987867995685755, "grad_norm": 0.1529485136270523, "learning_rate": 4.201220439097484e-06, "loss": 0.8793, "step": 110350 }, { "epoch": 0.7988591862291617, "grad_norm": 0.14472009241580963, "learning_rate": 4.201148052436898e-06, "loss": 0.8705, "step": 110360 }, { "epoch": 0.7989315728897479, "grad_norm": 0.16149692237377167, "learning_rate": 4.201075665776311e-06, "loss": 0.8788, "step": 110370 }, { "epoch": 0.7990039595503341, "grad_norm": 0.16890741884708405, "learning_rate": 4.201003279115725e-06, "loss": 0.8714, "step": 110380 }, { "epoch": 0.7990763462109203, "grad_norm": 0.16988790035247803, "learning_rate": 4.200930892455139e-06, "loss": 0.8946, "step": 110390 }, { "epoch": 0.7991487328715065, "grad_norm": 0.14879997074604034, "learning_rate": 4.200858505794553e-06, "loss": 0.8776, "step": 110400 }, { "epoch": 0.7992211195320926, "grad_norm": 0.38371655344963074, "learning_rate": 4.2007861191339665e-06, "loss": 0.892, "step": 110410 }, { "epoch": 0.7992935061926788, "grad_norm": 0.14874762296676636, "learning_rate": 4.20071373247338e-06, "loss": 0.8787, "step": 110420 }, { "epoch": 0.799365892853265, "grad_norm": 0.1458701342344284, "learning_rate": 4.200641345812794e-06, "loss": 0.8788, "step": 110430 }, { "epoch": 0.7994382795138512, "grad_norm": 0.1612052172422409, "learning_rate": 4.200568959152207e-06, "loss": 0.8816, "step": 110440 }, { "epoch": 0.7995106661744373, "grad_norm": 0.1589481681585312, "learning_rate": 4.200496572491621e-06, "loss": 0.8847, "step": 110450 }, { "epoch": 0.7995830528350235, "grad_norm": 0.15650621056556702, "learning_rate": 4.2004241858310355e-06, "loss": 0.8862, "step": 110460 }, { "epoch": 0.7996554394956098, "grad_norm": 0.1470274180173874, "learning_rate": 4.200351799170449e-06, "loss": 0.8822, "step": 110470 }, { "epoch": 0.799727826156196, "grad_norm": 0.15329478681087494, "learning_rate": 4.200279412509863e-06, "loss": 0.8777, "step": 110480 }, { "epoch": 0.7998002128167822, "grad_norm": 0.16613824665546417, "learning_rate": 4.200207025849276e-06, "loss": 0.8915, "step": 110490 }, { "epoch": 0.7998725994773683, "grad_norm": 0.15577919781208038, "learning_rate": 4.200134639188691e-06, "loss": 0.8799, "step": 110500 }, { "epoch": 0.7999449861379545, "grad_norm": 0.16274091601371765, "learning_rate": 4.200062252528104e-06, "loss": 0.8822, "step": 110510 }, { "epoch": 0.8000173727985407, "grad_norm": 0.16509264707565308, "learning_rate": 4.199989865867518e-06, "loss": 0.8921, "step": 110520 }, { "epoch": 0.8000897594591269, "grad_norm": 0.16428282856941223, "learning_rate": 4.199917479206932e-06, "loss": 0.861, "step": 110530 }, { "epoch": 0.800162146119713, "grad_norm": 0.15787632763385773, "learning_rate": 4.199845092546346e-06, "loss": 0.8679, "step": 110540 }, { "epoch": 0.8002345327802992, "grad_norm": 0.21818390488624573, "learning_rate": 4.19977270588576e-06, "loss": 0.8813, "step": 110550 }, { "epoch": 0.8003069194408854, "grad_norm": 0.16257944703102112, "learning_rate": 4.199700319225173e-06, "loss": 0.8811, "step": 110560 }, { "epoch": 0.8003793061014716, "grad_norm": 0.16678005456924438, "learning_rate": 4.199627932564587e-06, "loss": 0.8873, "step": 110570 }, { "epoch": 0.8004516927620579, "grad_norm": 0.15862931311130524, "learning_rate": 4.199555545904001e-06, "loss": 0.8825, "step": 110580 }, { "epoch": 0.800524079422644, "grad_norm": 0.18531429767608643, "learning_rate": 4.199483159243415e-06, "loss": 0.8642, "step": 110590 }, { "epoch": 0.8005964660832302, "grad_norm": 0.15238986909389496, "learning_rate": 4.199410772582829e-06, "loss": 0.882, "step": 110600 }, { "epoch": 0.8006688527438164, "grad_norm": 0.15185806155204773, "learning_rate": 4.199338385922242e-06, "loss": 0.8812, "step": 110610 }, { "epoch": 0.8007412394044026, "grad_norm": 0.14736582338809967, "learning_rate": 4.199265999261657e-06, "loss": 0.8764, "step": 110620 }, { "epoch": 0.8008136260649887, "grad_norm": 0.191138356924057, "learning_rate": 4.19919361260107e-06, "loss": 0.8821, "step": 110630 }, { "epoch": 0.8008860127255749, "grad_norm": 0.1546011120080948, "learning_rate": 4.199121225940484e-06, "loss": 0.8797, "step": 110640 }, { "epoch": 0.8009583993861611, "grad_norm": 0.14093956351280212, "learning_rate": 4.1990488392798976e-06, "loss": 0.8744, "step": 110650 }, { "epoch": 0.8010307860467473, "grad_norm": 0.14860482513904572, "learning_rate": 4.198976452619312e-06, "loss": 0.8811, "step": 110660 }, { "epoch": 0.8011031727073334, "grad_norm": 0.15203125774860382, "learning_rate": 4.198904065958726e-06, "loss": 0.8726, "step": 110670 }, { "epoch": 0.8011755593679197, "grad_norm": 0.1518326997756958, "learning_rate": 4.198831679298139e-06, "loss": 0.8747, "step": 110680 }, { "epoch": 0.8012479460285059, "grad_norm": 0.16520649194717407, "learning_rate": 4.198759292637553e-06, "loss": 0.8794, "step": 110690 }, { "epoch": 0.8013203326890921, "grad_norm": 0.16305768489837646, "learning_rate": 4.198686905976967e-06, "loss": 0.8866, "step": 110700 }, { "epoch": 0.8013927193496783, "grad_norm": 0.15262234210968018, "learning_rate": 4.198614519316381e-06, "loss": 0.8865, "step": 110710 }, { "epoch": 0.8014651060102644, "grad_norm": 0.163343608379364, "learning_rate": 4.1985421326557946e-06, "loss": 0.8781, "step": 110720 }, { "epoch": 0.8015374926708506, "grad_norm": 0.15089716017246246, "learning_rate": 4.198469745995208e-06, "loss": 0.883, "step": 110730 }, { "epoch": 0.8016098793314368, "grad_norm": 0.1560911238193512, "learning_rate": 4.198397359334622e-06, "loss": 0.8744, "step": 110740 }, { "epoch": 0.801682265992023, "grad_norm": 1.2095776796340942, "learning_rate": 4.198324972674036e-06, "loss": 0.8845, "step": 110750 }, { "epoch": 0.8017546526526091, "grad_norm": 0.15118081867694855, "learning_rate": 4.19825258601345e-06, "loss": 0.8761, "step": 110760 }, { "epoch": 0.8018270393131953, "grad_norm": 0.15144529938697815, "learning_rate": 4.1981801993528635e-06, "loss": 0.879, "step": 110770 }, { "epoch": 0.8018994259737815, "grad_norm": 0.16330844163894653, "learning_rate": 4.198107812692277e-06, "loss": 0.8809, "step": 110780 }, { "epoch": 0.8019718126343678, "grad_norm": 0.14975833892822266, "learning_rate": 4.198035426031692e-06, "loss": 0.8788, "step": 110790 }, { "epoch": 0.802044199294954, "grad_norm": 0.15585237741470337, "learning_rate": 4.197963039371105e-06, "loss": 0.882, "step": 110800 }, { "epoch": 0.8021165859555401, "grad_norm": 0.14996300637722015, "learning_rate": 4.197890652710519e-06, "loss": 0.8803, "step": 110810 }, { "epoch": 0.8021889726161263, "grad_norm": 0.1632111817598343, "learning_rate": 4.1978182660499324e-06, "loss": 0.8709, "step": 110820 }, { "epoch": 0.8022613592767125, "grad_norm": 0.1543271541595459, "learning_rate": 4.197745879389347e-06, "loss": 0.8823, "step": 110830 }, { "epoch": 0.8023337459372987, "grad_norm": 0.25150734186172485, "learning_rate": 4.1976734927287605e-06, "loss": 0.8912, "step": 110840 }, { "epoch": 0.8024061325978848, "grad_norm": 0.14821644127368927, "learning_rate": 4.197601106068174e-06, "loss": 0.8951, "step": 110850 }, { "epoch": 0.802478519258471, "grad_norm": 0.5984487533569336, "learning_rate": 4.197528719407588e-06, "loss": 0.8829, "step": 110860 }, { "epoch": 0.8025509059190572, "grad_norm": 0.1596580147743225, "learning_rate": 4.197456332747002e-06, "loss": 0.8795, "step": 110870 }, { "epoch": 0.8026232925796434, "grad_norm": 0.1528296023607254, "learning_rate": 4.197383946086416e-06, "loss": 0.8802, "step": 110880 }, { "epoch": 0.8026956792402297, "grad_norm": 0.15530245006084442, "learning_rate": 4.1973115594258294e-06, "loss": 0.8859, "step": 110890 }, { "epoch": 0.8027680659008158, "grad_norm": 0.1550951898097992, "learning_rate": 4.197239172765243e-06, "loss": 0.8788, "step": 110900 }, { "epoch": 0.802840452561402, "grad_norm": 0.17830154299736023, "learning_rate": 4.1971667861046575e-06, "loss": 0.8782, "step": 110910 }, { "epoch": 0.8029128392219882, "grad_norm": 0.1467135101556778, "learning_rate": 4.197094399444071e-06, "loss": 0.8776, "step": 110920 }, { "epoch": 0.8029852258825744, "grad_norm": 0.1502387672662735, "learning_rate": 4.197022012783485e-06, "loss": 0.8782, "step": 110930 }, { "epoch": 0.8030576125431605, "grad_norm": 0.14576847851276398, "learning_rate": 4.196949626122898e-06, "loss": 0.8663, "step": 110940 }, { "epoch": 0.8031299992037467, "grad_norm": 0.14512509107589722, "learning_rate": 4.196877239462312e-06, "loss": 0.878, "step": 110950 }, { "epoch": 0.8032023858643329, "grad_norm": 0.1578640192747116, "learning_rate": 4.196804852801726e-06, "loss": 0.8812, "step": 110960 }, { "epoch": 0.8032747725249191, "grad_norm": 0.1540568768978119, "learning_rate": 4.196732466141139e-06, "loss": 0.8948, "step": 110970 }, { "epoch": 0.8033471591855053, "grad_norm": 0.16598433256149292, "learning_rate": 4.196660079480554e-06, "loss": 0.8928, "step": 110980 }, { "epoch": 0.8034195458460914, "grad_norm": 0.15447190403938293, "learning_rate": 4.196587692819967e-06, "loss": 0.8822, "step": 110990 }, { "epoch": 0.8034919325066777, "grad_norm": 0.15389618277549744, "learning_rate": 4.196515306159381e-06, "loss": 0.872, "step": 111000 }, { "epoch": 0.8035643191672639, "grad_norm": 0.1530335396528244, "learning_rate": 4.1964429194987945e-06, "loss": 0.871, "step": 111010 }, { "epoch": 0.8036367058278501, "grad_norm": 0.15146398544311523, "learning_rate": 4.196370532838209e-06, "loss": 0.8834, "step": 111020 }, { "epoch": 0.8037090924884362, "grad_norm": 0.15704941749572754, "learning_rate": 4.196298146177623e-06, "loss": 0.9041, "step": 111030 }, { "epoch": 0.8037814791490224, "grad_norm": 0.15479826927185059, "learning_rate": 4.196225759517036e-06, "loss": 0.8853, "step": 111040 }, { "epoch": 0.8038538658096086, "grad_norm": 0.1650967299938202, "learning_rate": 4.19615337285645e-06, "loss": 0.8816, "step": 111050 }, { "epoch": 0.8039262524701948, "grad_norm": 0.15710927546024323, "learning_rate": 4.196080986195864e-06, "loss": 0.8873, "step": 111060 }, { "epoch": 0.803998639130781, "grad_norm": 0.15104107558727264, "learning_rate": 4.196008599535278e-06, "loss": 0.8729, "step": 111070 }, { "epoch": 0.8040710257913671, "grad_norm": 0.15065807104110718, "learning_rate": 4.1959362128746915e-06, "loss": 0.8849, "step": 111080 }, { "epoch": 0.8041434124519533, "grad_norm": 0.16708062589168549, "learning_rate": 4.195863826214105e-06, "loss": 0.8839, "step": 111090 }, { "epoch": 0.8042157991125395, "grad_norm": 0.155837744474411, "learning_rate": 4.19579143955352e-06, "loss": 0.8737, "step": 111100 }, { "epoch": 0.8042881857731258, "grad_norm": 0.1683686077594757, "learning_rate": 4.195719052892933e-06, "loss": 0.8767, "step": 111110 }, { "epoch": 0.804360572433712, "grad_norm": 0.15654446184635162, "learning_rate": 4.195646666232347e-06, "loss": 0.8707, "step": 111120 }, { "epoch": 0.8044329590942981, "grad_norm": 0.15169669687747955, "learning_rate": 4.1955742795717605e-06, "loss": 0.883, "step": 111130 }, { "epoch": 0.8045053457548843, "grad_norm": 0.15075841546058655, "learning_rate": 4.195501892911175e-06, "loss": 0.8914, "step": 111140 }, { "epoch": 0.8045777324154705, "grad_norm": 0.16259559988975525, "learning_rate": 4.1954295062505885e-06, "loss": 0.8853, "step": 111150 }, { "epoch": 0.8046501190760567, "grad_norm": 0.2971009910106659, "learning_rate": 4.195357119590002e-06, "loss": 0.8743, "step": 111160 }, { "epoch": 0.8047225057366428, "grad_norm": 0.17599447071552277, "learning_rate": 4.195284732929416e-06, "loss": 0.8837, "step": 111170 }, { "epoch": 0.804794892397229, "grad_norm": 0.1559291034936905, "learning_rate": 4.19521234626883e-06, "loss": 0.8789, "step": 111180 }, { "epoch": 0.8048672790578152, "grad_norm": 0.14445562660694122, "learning_rate": 4.195139959608244e-06, "loss": 0.8738, "step": 111190 }, { "epoch": 0.8049396657184014, "grad_norm": 0.1521482765674591, "learning_rate": 4.1950675729476575e-06, "loss": 0.8897, "step": 111200 }, { "epoch": 0.8050120523789877, "grad_norm": 0.15571485459804535, "learning_rate": 4.194995186287071e-06, "loss": 0.8981, "step": 111210 }, { "epoch": 0.8050844390395738, "grad_norm": 0.1559680998325348, "learning_rate": 4.1949227996264855e-06, "loss": 0.89, "step": 111220 }, { "epoch": 0.80515682570016, "grad_norm": 0.1483583301305771, "learning_rate": 4.194850412965899e-06, "loss": 0.8838, "step": 111230 }, { "epoch": 0.8052292123607462, "grad_norm": 0.15938538312911987, "learning_rate": 4.194778026305313e-06, "loss": 0.8759, "step": 111240 }, { "epoch": 0.8053015990213324, "grad_norm": 0.15668749809265137, "learning_rate": 4.194705639644726e-06, "loss": 0.8753, "step": 111250 }, { "epoch": 0.8053739856819185, "grad_norm": 0.1507354974746704, "learning_rate": 4.194633252984141e-06, "loss": 0.8911, "step": 111260 }, { "epoch": 0.8054463723425047, "grad_norm": 0.15574651956558228, "learning_rate": 4.1945608663235545e-06, "loss": 0.879, "step": 111270 }, { "epoch": 0.8055187590030909, "grad_norm": 0.2068503350019455, "learning_rate": 4.194488479662968e-06, "loss": 0.8897, "step": 111280 }, { "epoch": 0.8055911456636771, "grad_norm": 0.1596960425376892, "learning_rate": 4.194416093002382e-06, "loss": 0.8782, "step": 111290 }, { "epoch": 0.8056635323242632, "grad_norm": 0.1368640810251236, "learning_rate": 4.194343706341796e-06, "loss": 0.8767, "step": 111300 }, { "epoch": 0.8057359189848494, "grad_norm": 0.18080635368824005, "learning_rate": 4.19427131968121e-06, "loss": 0.8798, "step": 111310 }, { "epoch": 0.8058083056454357, "grad_norm": 0.1573970466852188, "learning_rate": 4.194198933020623e-06, "loss": 0.8708, "step": 111320 }, { "epoch": 0.8058806923060219, "grad_norm": 0.1502685248851776, "learning_rate": 4.194126546360037e-06, "loss": 0.8842, "step": 111330 }, { "epoch": 0.8059530789666081, "grad_norm": 0.15076951682567596, "learning_rate": 4.1940541596994515e-06, "loss": 0.8821, "step": 111340 }, { "epoch": 0.8060254656271942, "grad_norm": 0.15020160377025604, "learning_rate": 4.193981773038865e-06, "loss": 0.886, "step": 111350 }, { "epoch": 0.8060978522877804, "grad_norm": 0.356048047542572, "learning_rate": 4.193909386378279e-06, "loss": 0.8936, "step": 111360 }, { "epoch": 0.8061702389483666, "grad_norm": 0.15093164145946503, "learning_rate": 4.193836999717692e-06, "loss": 0.8714, "step": 111370 }, { "epoch": 0.8062426256089528, "grad_norm": 0.16395637392997742, "learning_rate": 4.193764613057106e-06, "loss": 0.876, "step": 111380 }, { "epoch": 0.8063150122695389, "grad_norm": 0.15686947107315063, "learning_rate": 4.19369222639652e-06, "loss": 0.8807, "step": 111390 }, { "epoch": 0.8063873989301251, "grad_norm": 0.15679305791854858, "learning_rate": 4.193619839735934e-06, "loss": 0.8945, "step": 111400 }, { "epoch": 0.8064597855907113, "grad_norm": 0.15202337503433228, "learning_rate": 4.193547453075348e-06, "loss": 0.8781, "step": 111410 }, { "epoch": 0.8065321722512976, "grad_norm": 0.1549692004919052, "learning_rate": 4.193475066414761e-06, "loss": 0.8694, "step": 111420 }, { "epoch": 0.8066045589118838, "grad_norm": 0.15202471613883972, "learning_rate": 4.193402679754176e-06, "loss": 0.8718, "step": 111430 }, { "epoch": 0.8066769455724699, "grad_norm": 0.15716615319252014, "learning_rate": 4.193330293093589e-06, "loss": 0.888, "step": 111440 }, { "epoch": 0.8067493322330561, "grad_norm": 2.5106468200683594, "learning_rate": 4.193257906433003e-06, "loss": 0.87, "step": 111450 }, { "epoch": 0.8068217188936423, "grad_norm": 0.15499623119831085, "learning_rate": 4.1931855197724166e-06, "loss": 0.8882, "step": 111460 }, { "epoch": 0.8068941055542285, "grad_norm": 0.14410343766212463, "learning_rate": 4.193113133111831e-06, "loss": 0.8796, "step": 111470 }, { "epoch": 0.8069664922148146, "grad_norm": 0.14761362969875336, "learning_rate": 4.193040746451244e-06, "loss": 0.8715, "step": 111480 }, { "epoch": 0.8070388788754008, "grad_norm": 0.1514003425836563, "learning_rate": 4.192968359790658e-06, "loss": 0.8761, "step": 111490 }, { "epoch": 0.807111265535987, "grad_norm": 0.15471474826335907, "learning_rate": 4.192895973130072e-06, "loss": 0.877, "step": 111500 }, { "epoch": 0.8071836521965732, "grad_norm": 0.17473427951335907, "learning_rate": 4.1928235864694855e-06, "loss": 0.8767, "step": 111510 }, { "epoch": 0.8072560388571594, "grad_norm": 0.14548401534557343, "learning_rate": 4.192751199808899e-06, "loss": 0.8961, "step": 111520 }, { "epoch": 0.8073284255177456, "grad_norm": 0.15971076488494873, "learning_rate": 4.192678813148313e-06, "loss": 0.8899, "step": 111530 }, { "epoch": 0.8074008121783318, "grad_norm": 0.16011503338813782, "learning_rate": 4.192606426487727e-06, "loss": 0.8803, "step": 111540 }, { "epoch": 0.807473198838918, "grad_norm": 0.15592749416828156, "learning_rate": 4.192534039827141e-06, "loss": 0.878, "step": 111550 }, { "epoch": 0.8075455854995042, "grad_norm": 0.14613130688667297, "learning_rate": 4.1924616531665544e-06, "loss": 0.8678, "step": 111560 }, { "epoch": 0.8076179721600903, "grad_norm": 0.15929226577281952, "learning_rate": 4.192389266505968e-06, "loss": 0.8775, "step": 111570 }, { "epoch": 0.8076903588206765, "grad_norm": 0.1534443497657776, "learning_rate": 4.1923168798453825e-06, "loss": 0.8827, "step": 111580 }, { "epoch": 0.8077627454812627, "grad_norm": 0.145346000790596, "learning_rate": 4.192244493184796e-06, "loss": 0.8658, "step": 111590 }, { "epoch": 0.8078351321418489, "grad_norm": 0.14903073012828827, "learning_rate": 4.19217210652421e-06, "loss": 0.8829, "step": 111600 }, { "epoch": 0.807907518802435, "grad_norm": 0.14224445819854736, "learning_rate": 4.192099719863623e-06, "loss": 0.8843, "step": 111610 }, { "epoch": 0.8079799054630212, "grad_norm": 0.3385683596134186, "learning_rate": 4.192027333203038e-06, "loss": 0.8851, "step": 111620 }, { "epoch": 0.8080522921236074, "grad_norm": 0.14634902775287628, "learning_rate": 4.1919549465424514e-06, "loss": 0.8848, "step": 111630 }, { "epoch": 0.8081246787841937, "grad_norm": 0.16326604783535004, "learning_rate": 4.191882559881865e-06, "loss": 0.8758, "step": 111640 }, { "epoch": 0.8081970654447799, "grad_norm": 0.15901891887187958, "learning_rate": 4.191810173221279e-06, "loss": 0.8679, "step": 111650 }, { "epoch": 0.808269452105366, "grad_norm": 0.15597860515117645, "learning_rate": 4.191737786560693e-06, "loss": 0.8848, "step": 111660 }, { "epoch": 0.8083418387659522, "grad_norm": 0.15394975244998932, "learning_rate": 4.191665399900107e-06, "loss": 0.8767, "step": 111670 }, { "epoch": 0.8084142254265384, "grad_norm": 0.1595843881368637, "learning_rate": 4.19159301323952e-06, "loss": 0.8717, "step": 111680 }, { "epoch": 0.8084866120871246, "grad_norm": 0.15284503996372223, "learning_rate": 4.191520626578934e-06, "loss": 0.8774, "step": 111690 }, { "epoch": 0.8085589987477108, "grad_norm": 0.16651558876037598, "learning_rate": 4.1914482399183484e-06, "loss": 0.8686, "step": 111700 }, { "epoch": 0.8086313854082969, "grad_norm": 0.16672812402248383, "learning_rate": 4.191375853257762e-06, "loss": 0.8755, "step": 111710 }, { "epoch": 0.8087037720688831, "grad_norm": 0.1647217869758606, "learning_rate": 4.191303466597176e-06, "loss": 0.8887, "step": 111720 }, { "epoch": 0.8087761587294693, "grad_norm": 0.16164937615394592, "learning_rate": 4.191231079936589e-06, "loss": 0.8733, "step": 111730 }, { "epoch": 0.8088485453900556, "grad_norm": 0.1532217264175415, "learning_rate": 4.191158693276004e-06, "loss": 0.877, "step": 111740 }, { "epoch": 0.8089209320506417, "grad_norm": 0.16159088909626007, "learning_rate": 4.191086306615417e-06, "loss": 0.8977, "step": 111750 }, { "epoch": 0.8089933187112279, "grad_norm": 0.1658451408147812, "learning_rate": 4.191013919954831e-06, "loss": 0.8906, "step": 111760 }, { "epoch": 0.8090657053718141, "grad_norm": 0.43993157148361206, "learning_rate": 4.190941533294245e-06, "loss": 0.8782, "step": 111770 }, { "epoch": 0.8091380920324003, "grad_norm": 0.1686534732580185, "learning_rate": 4.190869146633659e-06, "loss": 0.8833, "step": 111780 }, { "epoch": 0.8092104786929865, "grad_norm": 0.16583459079265594, "learning_rate": 4.190796759973073e-06, "loss": 0.8893, "step": 111790 }, { "epoch": 0.8092828653535726, "grad_norm": 0.21636223793029785, "learning_rate": 4.190724373312486e-06, "loss": 0.89, "step": 111800 }, { "epoch": 0.8093552520141588, "grad_norm": 0.22253377735614777, "learning_rate": 4.1906519866519e-06, "loss": 0.8847, "step": 111810 }, { "epoch": 0.809427638674745, "grad_norm": 0.15331260859966278, "learning_rate": 4.190579599991314e-06, "loss": 0.875, "step": 111820 }, { "epoch": 0.8095000253353312, "grad_norm": 0.1543157696723938, "learning_rate": 4.190507213330728e-06, "loss": 0.8843, "step": 111830 }, { "epoch": 0.8095724119959173, "grad_norm": 0.17324155569076538, "learning_rate": 4.190434826670142e-06, "loss": 0.8831, "step": 111840 }, { "epoch": 0.8096447986565036, "grad_norm": 0.16567941009998322, "learning_rate": 4.190362440009555e-06, "loss": 0.8866, "step": 111850 }, { "epoch": 0.8097171853170898, "grad_norm": 0.15467321872711182, "learning_rate": 4.19029005334897e-06, "loss": 0.8789, "step": 111860 }, { "epoch": 0.809789571977676, "grad_norm": 0.1545843631029129, "learning_rate": 4.190217666688383e-06, "loss": 0.8849, "step": 111870 }, { "epoch": 0.8098619586382622, "grad_norm": 0.15270915627479553, "learning_rate": 4.190145280027797e-06, "loss": 0.8768, "step": 111880 }, { "epoch": 0.8099343452988483, "grad_norm": 0.14915820956230164, "learning_rate": 4.1900728933672105e-06, "loss": 0.878, "step": 111890 }, { "epoch": 0.8100067319594345, "grad_norm": 0.15363983809947968, "learning_rate": 4.190000506706625e-06, "loss": 0.8841, "step": 111900 }, { "epoch": 0.8100791186200207, "grad_norm": 0.1456158310174942, "learning_rate": 4.189928120046039e-06, "loss": 0.8803, "step": 111910 }, { "epoch": 0.8101515052806069, "grad_norm": 0.16374152898788452, "learning_rate": 4.189855733385452e-06, "loss": 0.8793, "step": 111920 }, { "epoch": 0.810223891941193, "grad_norm": 0.15842416882514954, "learning_rate": 4.189783346724866e-06, "loss": 0.8783, "step": 111930 }, { "epoch": 0.8102962786017792, "grad_norm": 0.15646661818027496, "learning_rate": 4.18971096006428e-06, "loss": 0.8745, "step": 111940 }, { "epoch": 0.8103686652623655, "grad_norm": 0.16457587480545044, "learning_rate": 4.189638573403694e-06, "loss": 0.8782, "step": 111950 }, { "epoch": 0.8104410519229517, "grad_norm": 0.14522863924503326, "learning_rate": 4.1895661867431075e-06, "loss": 0.874, "step": 111960 }, { "epoch": 0.8105134385835379, "grad_norm": 0.15910083055496216, "learning_rate": 4.189493800082521e-06, "loss": 0.8912, "step": 111970 }, { "epoch": 0.810585825244124, "grad_norm": 0.164401113986969, "learning_rate": 4.189421413421935e-06, "loss": 0.8799, "step": 111980 }, { "epoch": 0.8106582119047102, "grad_norm": 0.15682414174079895, "learning_rate": 4.189349026761349e-06, "loss": 0.8782, "step": 111990 }, { "epoch": 0.8107305985652964, "grad_norm": 0.17602966725826263, "learning_rate": 4.189276640100763e-06, "loss": 0.8776, "step": 112000 }, { "epoch": 0.8108029852258826, "grad_norm": 0.15074023604393005, "learning_rate": 4.1892042534401765e-06, "loss": 0.8892, "step": 112010 }, { "epoch": 0.8108753718864687, "grad_norm": 0.1462225317955017, "learning_rate": 4.18913186677959e-06, "loss": 0.8541, "step": 112020 }, { "epoch": 0.8109477585470549, "grad_norm": 0.15102677047252655, "learning_rate": 4.189059480119004e-06, "loss": 0.8776, "step": 112030 }, { "epoch": 0.8110201452076411, "grad_norm": 0.40401482582092285, "learning_rate": 4.188987093458417e-06, "loss": 0.8831, "step": 112040 }, { "epoch": 0.8110925318682273, "grad_norm": 0.3952445983886719, "learning_rate": 4.188914706797832e-06, "loss": 0.878, "step": 112050 }, { "epoch": 0.8111649185288136, "grad_norm": 0.15976792573928833, "learning_rate": 4.188842320137245e-06, "loss": 0.8804, "step": 112060 }, { "epoch": 0.8112373051893997, "grad_norm": 0.14807958900928497, "learning_rate": 4.188769933476659e-06, "loss": 0.8783, "step": 112070 }, { "epoch": 0.8113096918499859, "grad_norm": 0.15941010415554047, "learning_rate": 4.188697546816073e-06, "loss": 0.8738, "step": 112080 }, { "epoch": 0.8113820785105721, "grad_norm": 0.14998309314250946, "learning_rate": 4.188625160155487e-06, "loss": 0.8743, "step": 112090 }, { "epoch": 0.8114544651711583, "grad_norm": 0.15989407896995544, "learning_rate": 4.188552773494901e-06, "loss": 0.8853, "step": 112100 }, { "epoch": 0.8115268518317444, "grad_norm": 0.15547819435596466, "learning_rate": 4.188480386834314e-06, "loss": 0.8755, "step": 112110 }, { "epoch": 0.8115992384923306, "grad_norm": 0.15554924309253693, "learning_rate": 4.188408000173728e-06, "loss": 0.8842, "step": 112120 }, { "epoch": 0.8116716251529168, "grad_norm": 0.15123188495635986, "learning_rate": 4.188335613513142e-06, "loss": 0.8748, "step": 112130 }, { "epoch": 0.811744011813503, "grad_norm": 0.16083499789237976, "learning_rate": 4.188263226852556e-06, "loss": 0.8765, "step": 112140 }, { "epoch": 0.8118163984740892, "grad_norm": 0.14771822094917297, "learning_rate": 4.18819084019197e-06, "loss": 0.875, "step": 112150 }, { "epoch": 0.8118887851346753, "grad_norm": 0.1459280550479889, "learning_rate": 4.188118453531383e-06, "loss": 0.8924, "step": 112160 }, { "epoch": 0.8119611717952616, "grad_norm": 0.1660243421792984, "learning_rate": 4.188046066870797e-06, "loss": 0.8802, "step": 112170 }, { "epoch": 0.8120335584558478, "grad_norm": 0.17147773504257202, "learning_rate": 4.187973680210211e-06, "loss": 0.883, "step": 112180 }, { "epoch": 0.812105945116434, "grad_norm": 0.16224630177021027, "learning_rate": 4.187901293549625e-06, "loss": 0.8721, "step": 112190 }, { "epoch": 0.8121783317770201, "grad_norm": 0.1489465981721878, "learning_rate": 4.1878289068890386e-06, "loss": 0.8739, "step": 112200 }, { "epoch": 0.8122507184376063, "grad_norm": 0.14807015657424927, "learning_rate": 4.187756520228452e-06, "loss": 0.8842, "step": 112210 }, { "epoch": 0.8123231050981925, "grad_norm": 0.15409156680107117, "learning_rate": 4.187684133567867e-06, "loss": 0.8738, "step": 112220 }, { "epoch": 0.8123954917587787, "grad_norm": 0.16499954462051392, "learning_rate": 4.18761174690728e-06, "loss": 0.8848, "step": 112230 }, { "epoch": 0.8124678784193649, "grad_norm": 0.15704099833965302, "learning_rate": 4.187539360246694e-06, "loss": 0.8767, "step": 112240 }, { "epoch": 0.812540265079951, "grad_norm": 0.1506892442703247, "learning_rate": 4.1874669735861075e-06, "loss": 0.8802, "step": 112250 }, { "epoch": 0.8126126517405372, "grad_norm": 0.1446923166513443, "learning_rate": 4.187394586925522e-06, "loss": 0.8753, "step": 112260 }, { "epoch": 0.8126850384011235, "grad_norm": 0.16159547865390778, "learning_rate": 4.187322200264936e-06, "loss": 0.8751, "step": 112270 }, { "epoch": 0.8127574250617097, "grad_norm": 0.17383888363838196, "learning_rate": 4.187249813604349e-06, "loss": 0.8793, "step": 112280 }, { "epoch": 0.8128298117222958, "grad_norm": 0.1629524528980255, "learning_rate": 4.187177426943763e-06, "loss": 0.8804, "step": 112290 }, { "epoch": 0.812902198382882, "grad_norm": 0.17006246745586395, "learning_rate": 4.187105040283177e-06, "loss": 0.8731, "step": 112300 }, { "epoch": 0.8129745850434682, "grad_norm": 0.16095757484436035, "learning_rate": 4.187032653622591e-06, "loss": 0.8835, "step": 112310 }, { "epoch": 0.8130469717040544, "grad_norm": 0.16775591671466827, "learning_rate": 4.1869602669620045e-06, "loss": 0.8859, "step": 112320 }, { "epoch": 0.8131193583646406, "grad_norm": 0.16004826128482819, "learning_rate": 4.186887880301418e-06, "loss": 0.865, "step": 112330 }, { "epoch": 0.8131917450252267, "grad_norm": 0.15483014285564423, "learning_rate": 4.186815493640833e-06, "loss": 0.8848, "step": 112340 }, { "epoch": 0.8132641316858129, "grad_norm": 0.16190212965011597, "learning_rate": 4.186743106980246e-06, "loss": 0.8848, "step": 112350 }, { "epoch": 0.8133365183463991, "grad_norm": 0.14579403400421143, "learning_rate": 4.18667072031966e-06, "loss": 0.8696, "step": 112360 }, { "epoch": 0.8134089050069853, "grad_norm": 0.16750074923038483, "learning_rate": 4.1865983336590734e-06, "loss": 0.8826, "step": 112370 }, { "epoch": 0.8134812916675715, "grad_norm": 0.1532454639673233, "learning_rate": 4.186525946998488e-06, "loss": 0.8722, "step": 112380 }, { "epoch": 0.8135536783281577, "grad_norm": 0.15037626028060913, "learning_rate": 4.1864535603379015e-06, "loss": 0.8787, "step": 112390 }, { "epoch": 0.8136260649887439, "grad_norm": 0.16376477479934692, "learning_rate": 4.186381173677315e-06, "loss": 0.8689, "step": 112400 }, { "epoch": 0.8136984516493301, "grad_norm": 0.15625233948230743, "learning_rate": 4.186308787016729e-06, "loss": 0.8647, "step": 112410 }, { "epoch": 0.8137708383099163, "grad_norm": 0.1553266942501068, "learning_rate": 4.186236400356143e-06, "loss": 0.8792, "step": 112420 }, { "epoch": 0.8138432249705024, "grad_norm": 0.1567511111497879, "learning_rate": 4.186164013695557e-06, "loss": 0.8802, "step": 112430 }, { "epoch": 0.8139156116310886, "grad_norm": 0.15767507255077362, "learning_rate": 4.1860916270349704e-06, "loss": 0.8783, "step": 112440 }, { "epoch": 0.8139879982916748, "grad_norm": 0.1529453545808792, "learning_rate": 4.186019240374384e-06, "loss": 0.8856, "step": 112450 }, { "epoch": 0.814060384952261, "grad_norm": 0.14311917126178741, "learning_rate": 4.1859468537137985e-06, "loss": 0.8824, "step": 112460 }, { "epoch": 0.8141327716128471, "grad_norm": 0.1506299078464508, "learning_rate": 4.185874467053212e-06, "loss": 0.8952, "step": 112470 }, { "epoch": 0.8142051582734334, "grad_norm": 0.14917585253715515, "learning_rate": 4.185802080392626e-06, "loss": 0.8784, "step": 112480 }, { "epoch": 0.8142775449340196, "grad_norm": 0.16028118133544922, "learning_rate": 4.185729693732039e-06, "loss": 0.8708, "step": 112490 }, { "epoch": 0.8143499315946058, "grad_norm": 0.18150749802589417, "learning_rate": 4.185657307071454e-06, "loss": 0.8822, "step": 112500 }, { "epoch": 0.814422318255192, "grad_norm": 0.1553160399198532, "learning_rate": 4.1855849204108674e-06, "loss": 0.8889, "step": 112510 }, { "epoch": 0.8144947049157781, "grad_norm": 0.15606872737407684, "learning_rate": 4.185512533750281e-06, "loss": 0.8843, "step": 112520 }, { "epoch": 0.8145670915763643, "grad_norm": 0.1746978759765625, "learning_rate": 4.185440147089695e-06, "loss": 0.8768, "step": 112530 }, { "epoch": 0.8146394782369505, "grad_norm": 0.20451655983924866, "learning_rate": 4.185367760429108e-06, "loss": 0.8694, "step": 112540 }, { "epoch": 0.8147118648975367, "grad_norm": 0.15979830920696259, "learning_rate": 4.185295373768522e-06, "loss": 0.8778, "step": 112550 }, { "epoch": 0.8147842515581228, "grad_norm": 0.15347421169281006, "learning_rate": 4.1852229871079355e-06, "loss": 0.8757, "step": 112560 }, { "epoch": 0.814856638218709, "grad_norm": 0.1704648733139038, "learning_rate": 4.18515060044735e-06, "loss": 0.8759, "step": 112570 }, { "epoch": 0.8149290248792952, "grad_norm": 0.14998449385166168, "learning_rate": 4.185078213786764e-06, "loss": 0.8819, "step": 112580 }, { "epoch": 0.8150014115398815, "grad_norm": 0.1555691659450531, "learning_rate": 4.185005827126177e-06, "loss": 0.8858, "step": 112590 }, { "epoch": 0.8150737982004677, "grad_norm": 0.15047410130500793, "learning_rate": 4.184933440465591e-06, "loss": 0.8789, "step": 112600 }, { "epoch": 0.8151461848610538, "grad_norm": 0.14893095195293427, "learning_rate": 4.184861053805005e-06, "loss": 0.8807, "step": 112610 }, { "epoch": 0.81521857152164, "grad_norm": 0.14945295453071594, "learning_rate": 4.184788667144419e-06, "loss": 0.8863, "step": 112620 }, { "epoch": 0.8152909581822262, "grad_norm": 0.1564187854528427, "learning_rate": 4.1847162804838325e-06, "loss": 0.8864, "step": 112630 }, { "epoch": 0.8153633448428124, "grad_norm": 0.15783673524856567, "learning_rate": 4.184643893823246e-06, "loss": 0.8753, "step": 112640 }, { "epoch": 0.8154357315033985, "grad_norm": 0.16287674009799957, "learning_rate": 4.184571507162661e-06, "loss": 0.8739, "step": 112650 }, { "epoch": 0.8155081181639847, "grad_norm": 0.1520308554172516, "learning_rate": 4.184499120502074e-06, "loss": 0.8993, "step": 112660 }, { "epoch": 0.8155805048245709, "grad_norm": 0.14746682345867157, "learning_rate": 4.184426733841488e-06, "loss": 0.8857, "step": 112670 }, { "epoch": 0.8156528914851571, "grad_norm": 0.15371263027191162, "learning_rate": 4.1843543471809015e-06, "loss": 0.869, "step": 112680 }, { "epoch": 0.8157252781457432, "grad_norm": 0.1501394808292389, "learning_rate": 4.184281960520316e-06, "loss": 0.8704, "step": 112690 }, { "epoch": 0.8157976648063295, "grad_norm": 0.18657702207565308, "learning_rate": 4.1842095738597295e-06, "loss": 0.8919, "step": 112700 }, { "epoch": 0.8158700514669157, "grad_norm": 0.15628504753112793, "learning_rate": 4.184137187199143e-06, "loss": 0.8848, "step": 112710 }, { "epoch": 0.8159424381275019, "grad_norm": 0.14718109369277954, "learning_rate": 4.184064800538557e-06, "loss": 0.8754, "step": 112720 }, { "epoch": 0.8160148247880881, "grad_norm": 0.15502269566059113, "learning_rate": 4.183992413877971e-06, "loss": 0.8684, "step": 112730 }, { "epoch": 0.8160872114486742, "grad_norm": 0.15216968953609467, "learning_rate": 4.183920027217385e-06, "loss": 0.8889, "step": 112740 }, { "epoch": 0.8161595981092604, "grad_norm": 0.23924054205417633, "learning_rate": 4.1838476405567985e-06, "loss": 0.8764, "step": 112750 }, { "epoch": 0.8162319847698466, "grad_norm": 0.1529170572757721, "learning_rate": 4.183775253896212e-06, "loss": 0.8778, "step": 112760 }, { "epoch": 0.8163043714304328, "grad_norm": 0.15385037660598755, "learning_rate": 4.1837028672356266e-06, "loss": 0.8746, "step": 112770 }, { "epoch": 0.816376758091019, "grad_norm": 0.15637922286987305, "learning_rate": 4.18363048057504e-06, "loss": 0.8715, "step": 112780 }, { "epoch": 0.8164491447516051, "grad_norm": 0.1572360396385193, "learning_rate": 4.183558093914454e-06, "loss": 0.8725, "step": 112790 }, { "epoch": 0.8165215314121914, "grad_norm": 0.14621710777282715, "learning_rate": 4.183485707253867e-06, "loss": 0.8683, "step": 112800 }, { "epoch": 0.8165939180727776, "grad_norm": 0.15248066186904907, "learning_rate": 4.183413320593281e-06, "loss": 0.8836, "step": 112810 }, { "epoch": 0.8166663047333638, "grad_norm": 0.16547314822673798, "learning_rate": 4.1833409339326955e-06, "loss": 0.8836, "step": 112820 }, { "epoch": 0.81673869139395, "grad_norm": 0.15647459030151367, "learning_rate": 4.183268547272109e-06, "loss": 0.88, "step": 112830 }, { "epoch": 0.8168110780545361, "grad_norm": 0.16002123057842255, "learning_rate": 4.183196160611523e-06, "loss": 0.887, "step": 112840 }, { "epoch": 0.8168834647151223, "grad_norm": 0.14930887520313263, "learning_rate": 4.183123773950936e-06, "loss": 0.8787, "step": 112850 }, { "epoch": 0.8169558513757085, "grad_norm": 0.15242883563041687, "learning_rate": 4.183051387290351e-06, "loss": 0.8712, "step": 112860 }, { "epoch": 0.8170282380362947, "grad_norm": 0.1501031219959259, "learning_rate": 4.182979000629764e-06, "loss": 0.8827, "step": 112870 }, { "epoch": 0.8171006246968808, "grad_norm": 0.14793910086154938, "learning_rate": 4.182906613969178e-06, "loss": 0.8773, "step": 112880 }, { "epoch": 0.817173011357467, "grad_norm": 0.14584796130657196, "learning_rate": 4.182834227308592e-06, "loss": 0.8774, "step": 112890 }, { "epoch": 0.8172453980180532, "grad_norm": 0.16166086494922638, "learning_rate": 4.182761840648006e-06, "loss": 0.8875, "step": 112900 }, { "epoch": 0.8173177846786395, "grad_norm": 0.16169136762619019, "learning_rate": 4.18268945398742e-06, "loss": 0.876, "step": 112910 }, { "epoch": 0.8173901713392256, "grad_norm": 0.15373431146144867, "learning_rate": 4.182617067326833e-06, "loss": 0.8835, "step": 112920 }, { "epoch": 0.8174625579998118, "grad_norm": 0.15151308476924896, "learning_rate": 4.182544680666247e-06, "loss": 0.8961, "step": 112930 }, { "epoch": 0.817534944660398, "grad_norm": 0.150752454996109, "learning_rate": 4.182472294005661e-06, "loss": 0.8785, "step": 112940 }, { "epoch": 0.8176073313209842, "grad_norm": 0.15306270122528076, "learning_rate": 4.182399907345075e-06, "loss": 0.8856, "step": 112950 }, { "epoch": 0.8176797179815704, "grad_norm": 0.15430527925491333, "learning_rate": 4.182327520684489e-06, "loss": 0.8826, "step": 112960 }, { "epoch": 0.8177521046421565, "grad_norm": 0.1908227801322937, "learning_rate": 4.182255134023902e-06, "loss": 0.8828, "step": 112970 }, { "epoch": 0.8178244913027427, "grad_norm": 0.15534339845180511, "learning_rate": 4.182182747363317e-06, "loss": 0.8882, "step": 112980 }, { "epoch": 0.8178968779633289, "grad_norm": 0.1552184671163559, "learning_rate": 4.18211036070273e-06, "loss": 0.875, "step": 112990 }, { "epoch": 0.8179692646239151, "grad_norm": 0.16356508433818817, "learning_rate": 4.182037974042144e-06, "loss": 0.8764, "step": 113000 }, { "epoch": 0.8180416512845012, "grad_norm": 0.14462150633335114, "learning_rate": 4.1819655873815576e-06, "loss": 0.8844, "step": 113010 }, { "epoch": 0.8181140379450875, "grad_norm": 0.15308737754821777, "learning_rate": 4.181893200720972e-06, "loss": 0.8758, "step": 113020 }, { "epoch": 0.8181864246056737, "grad_norm": 0.1581387221813202, "learning_rate": 4.181820814060386e-06, "loss": 0.886, "step": 113030 }, { "epoch": 0.8182588112662599, "grad_norm": 0.16034311056137085, "learning_rate": 4.181748427399799e-06, "loss": 0.8668, "step": 113040 }, { "epoch": 0.818331197926846, "grad_norm": 0.15236437320709229, "learning_rate": 4.181676040739213e-06, "loss": 0.8861, "step": 113050 }, { "epoch": 0.8184035845874322, "grad_norm": 0.20184694230556488, "learning_rate": 4.181603654078627e-06, "loss": 0.8921, "step": 113060 }, { "epoch": 0.8184759712480184, "grad_norm": 0.15360352396965027, "learning_rate": 4.18153126741804e-06, "loss": 0.8988, "step": 113070 }, { "epoch": 0.8185483579086046, "grad_norm": 0.1674332171678543, "learning_rate": 4.181458880757454e-06, "loss": 0.8742, "step": 113080 }, { "epoch": 0.8186207445691908, "grad_norm": 0.16225215792655945, "learning_rate": 4.181386494096868e-06, "loss": 0.877, "step": 113090 }, { "epoch": 0.8186931312297769, "grad_norm": 0.1606452614068985, "learning_rate": 4.181314107436282e-06, "loss": 0.8686, "step": 113100 }, { "epoch": 0.8187655178903631, "grad_norm": 0.1443140208721161, "learning_rate": 4.1812417207756954e-06, "loss": 0.9018, "step": 113110 }, { "epoch": 0.8188379045509494, "grad_norm": 0.16372881829738617, "learning_rate": 4.181169334115109e-06, "loss": 0.8783, "step": 113120 }, { "epoch": 0.8189102912115356, "grad_norm": 0.1642576903104782, "learning_rate": 4.1810969474545235e-06, "loss": 0.8701, "step": 113130 }, { "epoch": 0.8189826778721218, "grad_norm": 0.15896013379096985, "learning_rate": 4.181024560793937e-06, "loss": 0.8781, "step": 113140 }, { "epoch": 0.8190550645327079, "grad_norm": 0.17695783078670502, "learning_rate": 4.180952174133351e-06, "loss": 0.8797, "step": 113150 }, { "epoch": 0.8191274511932941, "grad_norm": 0.15274715423583984, "learning_rate": 4.180879787472764e-06, "loss": 0.8736, "step": 113160 }, { "epoch": 0.8191998378538803, "grad_norm": 0.14954541623592377, "learning_rate": 4.180807400812179e-06, "loss": 0.8771, "step": 113170 }, { "epoch": 0.8192722245144665, "grad_norm": 0.14884912967681885, "learning_rate": 4.1807350141515924e-06, "loss": 0.8703, "step": 113180 }, { "epoch": 0.8193446111750526, "grad_norm": 0.18822531402111053, "learning_rate": 4.180662627491006e-06, "loss": 0.8762, "step": 113190 }, { "epoch": 0.8194169978356388, "grad_norm": 0.16255638003349304, "learning_rate": 4.18059024083042e-06, "loss": 0.8758, "step": 113200 }, { "epoch": 0.819489384496225, "grad_norm": 0.16536031663417816, "learning_rate": 4.180517854169834e-06, "loss": 0.8704, "step": 113210 }, { "epoch": 0.8195617711568112, "grad_norm": 0.16202905774116516, "learning_rate": 4.180445467509248e-06, "loss": 0.8886, "step": 113220 }, { "epoch": 0.8196341578173975, "grad_norm": 0.2304823398590088, "learning_rate": 4.180373080848661e-06, "loss": 0.8815, "step": 113230 }, { "epoch": 0.8197065444779836, "grad_norm": 0.15564051270484924, "learning_rate": 4.180300694188075e-06, "loss": 0.8844, "step": 113240 }, { "epoch": 0.8197789311385698, "grad_norm": 0.1432119905948639, "learning_rate": 4.1802283075274894e-06, "loss": 0.8713, "step": 113250 }, { "epoch": 0.819851317799156, "grad_norm": 0.16350963711738586, "learning_rate": 4.180155920866903e-06, "loss": 0.8831, "step": 113260 }, { "epoch": 0.8199237044597422, "grad_norm": 0.16729813814163208, "learning_rate": 4.180083534206317e-06, "loss": 0.879, "step": 113270 }, { "epoch": 0.8199960911203283, "grad_norm": 0.1555587649345398, "learning_rate": 4.18001114754573e-06, "loss": 0.8793, "step": 113280 }, { "epoch": 0.8200684777809145, "grad_norm": 0.1441824585199356, "learning_rate": 4.179938760885145e-06, "loss": 0.8703, "step": 113290 }, { "epoch": 0.8201408644415007, "grad_norm": 0.1523018628358841, "learning_rate": 4.179866374224558e-06, "loss": 0.8796, "step": 113300 }, { "epoch": 0.8202132511020869, "grad_norm": 0.15191130340099335, "learning_rate": 4.179793987563972e-06, "loss": 0.8681, "step": 113310 }, { "epoch": 0.820285637762673, "grad_norm": 0.14424999058246613, "learning_rate": 4.179721600903386e-06, "loss": 0.8875, "step": 113320 }, { "epoch": 0.8203580244232593, "grad_norm": 0.1524539440870285, "learning_rate": 4.1796492142428e-06, "loss": 0.8752, "step": 113330 }, { "epoch": 0.8204304110838455, "grad_norm": 0.14882370829582214, "learning_rate": 4.179576827582214e-06, "loss": 0.8885, "step": 113340 }, { "epoch": 0.8205027977444317, "grad_norm": 0.15619249641895294, "learning_rate": 4.179504440921627e-06, "loss": 0.8795, "step": 113350 }, { "epoch": 0.8205751844050179, "grad_norm": 0.16282761096954346, "learning_rate": 4.179432054261041e-06, "loss": 0.8792, "step": 113360 }, { "epoch": 0.820647571065604, "grad_norm": 0.15634344518184662, "learning_rate": 4.179359667600455e-06, "loss": 0.8811, "step": 113370 }, { "epoch": 0.8207199577261902, "grad_norm": 0.15985414385795593, "learning_rate": 4.179287280939869e-06, "loss": 0.8803, "step": 113380 }, { "epoch": 0.8207923443867764, "grad_norm": 0.1588483601808548, "learning_rate": 4.179214894279283e-06, "loss": 0.8858, "step": 113390 }, { "epoch": 0.8208647310473626, "grad_norm": 0.1567647010087967, "learning_rate": 4.179142507618696e-06, "loss": 0.8786, "step": 113400 }, { "epoch": 0.8209371177079487, "grad_norm": 0.18194611370563507, "learning_rate": 4.17907012095811e-06, "loss": 0.8917, "step": 113410 }, { "epoch": 0.8210095043685349, "grad_norm": 0.1811460703611374, "learning_rate": 4.178997734297524e-06, "loss": 0.8777, "step": 113420 }, { "epoch": 0.8210818910291211, "grad_norm": 0.15702205896377563, "learning_rate": 4.178925347636938e-06, "loss": 0.8673, "step": 113430 }, { "epoch": 0.8211542776897074, "grad_norm": 0.1575247198343277, "learning_rate": 4.1788529609763515e-06, "loss": 0.8919, "step": 113440 }, { "epoch": 0.8212266643502936, "grad_norm": 0.16069388389587402, "learning_rate": 4.178780574315765e-06, "loss": 0.8805, "step": 113450 }, { "epoch": 0.8212990510108797, "grad_norm": 0.15721113979816437, "learning_rate": 4.17870818765518e-06, "loss": 0.8744, "step": 113460 }, { "epoch": 0.8213714376714659, "grad_norm": 0.18812179565429688, "learning_rate": 4.178635800994593e-06, "loss": 0.884, "step": 113470 }, { "epoch": 0.8214438243320521, "grad_norm": 0.1596520096063614, "learning_rate": 4.178563414334007e-06, "loss": 0.878, "step": 113480 }, { "epoch": 0.8215162109926383, "grad_norm": 0.15708090364933014, "learning_rate": 4.1784910276734205e-06, "loss": 0.8749, "step": 113490 }, { "epoch": 0.8215885976532245, "grad_norm": 0.1581474244594574, "learning_rate": 4.178418641012835e-06, "loss": 0.8897, "step": 113500 }, { "epoch": 0.8216609843138106, "grad_norm": 0.15259206295013428, "learning_rate": 4.1783462543522486e-06, "loss": 0.8834, "step": 113510 }, { "epoch": 0.8217333709743968, "grad_norm": 0.15696731209754944, "learning_rate": 4.178273867691662e-06, "loss": 0.8793, "step": 113520 }, { "epoch": 0.821805757634983, "grad_norm": 0.15300579369068146, "learning_rate": 4.178201481031076e-06, "loss": 0.878, "step": 113530 }, { "epoch": 0.8218781442955692, "grad_norm": 0.15157224237918854, "learning_rate": 4.17812909437049e-06, "loss": 0.8733, "step": 113540 }, { "epoch": 0.8219505309561554, "grad_norm": 0.15784545242786407, "learning_rate": 4.178056707709904e-06, "loss": 0.879, "step": 113550 }, { "epoch": 0.8220229176167416, "grad_norm": 0.1615738868713379, "learning_rate": 4.1779843210493175e-06, "loss": 0.8809, "step": 113560 }, { "epoch": 0.8220953042773278, "grad_norm": 0.16567973792552948, "learning_rate": 4.177911934388731e-06, "loss": 0.8967, "step": 113570 }, { "epoch": 0.822167690937914, "grad_norm": 0.15392477810382843, "learning_rate": 4.1778395477281456e-06, "loss": 0.8712, "step": 113580 }, { "epoch": 0.8222400775985002, "grad_norm": 0.17302165925502777, "learning_rate": 4.177767161067559e-06, "loss": 0.8868, "step": 113590 }, { "epoch": 0.8223124642590863, "grad_norm": 0.16119389235973358, "learning_rate": 4.177694774406972e-06, "loss": 0.8805, "step": 113600 }, { "epoch": 0.8223848509196725, "grad_norm": 0.16274970769882202, "learning_rate": 4.177622387746386e-06, "loss": 0.8818, "step": 113610 }, { "epoch": 0.8224572375802587, "grad_norm": 0.1551283448934555, "learning_rate": 4.1775500010858e-06, "loss": 0.8802, "step": 113620 }, { "epoch": 0.8225296242408449, "grad_norm": 0.1489950567483902, "learning_rate": 4.177477614425214e-06, "loss": 0.8803, "step": 113630 }, { "epoch": 0.822602010901431, "grad_norm": 0.15743660926818848, "learning_rate": 4.177405227764627e-06, "loss": 0.8856, "step": 113640 }, { "epoch": 0.8226743975620173, "grad_norm": 0.16645437479019165, "learning_rate": 4.177332841104042e-06, "loss": 0.8809, "step": 113650 }, { "epoch": 0.8227467842226035, "grad_norm": 0.14299704134464264, "learning_rate": 4.177260454443455e-06, "loss": 0.8884, "step": 113660 }, { "epoch": 0.8228191708831897, "grad_norm": 0.1666107475757599, "learning_rate": 4.177188067782869e-06, "loss": 0.8872, "step": 113670 }, { "epoch": 0.8228915575437759, "grad_norm": 0.1572570949792862, "learning_rate": 4.1771156811222826e-06, "loss": 0.8743, "step": 113680 }, { "epoch": 0.822963944204362, "grad_norm": 0.15440626442432404, "learning_rate": 4.177043294461697e-06, "loss": 0.8864, "step": 113690 }, { "epoch": 0.8230363308649482, "grad_norm": 0.14731813967227936, "learning_rate": 4.176970907801111e-06, "loss": 0.8812, "step": 113700 }, { "epoch": 0.8231087175255344, "grad_norm": 0.15947289764881134, "learning_rate": 4.176898521140524e-06, "loss": 0.8917, "step": 113710 }, { "epoch": 0.8231811041861206, "grad_norm": 0.15511277318000793, "learning_rate": 4.176826134479938e-06, "loss": 0.8687, "step": 113720 }, { "epoch": 0.8232534908467067, "grad_norm": 0.14976176619529724, "learning_rate": 4.176753747819352e-06, "loss": 0.8789, "step": 113730 }, { "epoch": 0.8233258775072929, "grad_norm": 0.1585080772638321, "learning_rate": 4.176681361158766e-06, "loss": 0.8886, "step": 113740 }, { "epoch": 0.8233982641678791, "grad_norm": 0.1489742547273636, "learning_rate": 4.1766089744981796e-06, "loss": 0.8814, "step": 113750 }, { "epoch": 0.8234706508284654, "grad_norm": 0.15232981741428375, "learning_rate": 4.176536587837593e-06, "loss": 0.8869, "step": 113760 }, { "epoch": 0.8235430374890516, "grad_norm": 0.15081308782100677, "learning_rate": 4.176464201177008e-06, "loss": 0.8853, "step": 113770 }, { "epoch": 0.8236154241496377, "grad_norm": 0.1606776863336563, "learning_rate": 4.176391814516421e-06, "loss": 0.8917, "step": 113780 }, { "epoch": 0.8236878108102239, "grad_norm": 0.15332511067390442, "learning_rate": 4.176319427855835e-06, "loss": 0.8662, "step": 113790 }, { "epoch": 0.8237601974708101, "grad_norm": 0.15498222410678864, "learning_rate": 4.1762470411952485e-06, "loss": 0.8738, "step": 113800 }, { "epoch": 0.8238325841313963, "grad_norm": 0.174130380153656, "learning_rate": 4.176174654534663e-06, "loss": 0.8872, "step": 113810 }, { "epoch": 0.8239049707919824, "grad_norm": 0.15380537509918213, "learning_rate": 4.176102267874077e-06, "loss": 0.8666, "step": 113820 }, { "epoch": 0.8239773574525686, "grad_norm": 0.15310508012771606, "learning_rate": 4.17602988121349e-06, "loss": 0.8979, "step": 113830 }, { "epoch": 0.8240497441131548, "grad_norm": 0.1675233244895935, "learning_rate": 4.175957494552904e-06, "loss": 0.8791, "step": 113840 }, { "epoch": 0.824122130773741, "grad_norm": 0.14090007543563843, "learning_rate": 4.175885107892318e-06, "loss": 0.8752, "step": 113850 }, { "epoch": 0.8241945174343273, "grad_norm": 0.15238140523433685, "learning_rate": 4.175812721231732e-06, "loss": 0.868, "step": 113860 }, { "epoch": 0.8242669040949134, "grad_norm": 0.16735051572322845, "learning_rate": 4.1757403345711455e-06, "loss": 0.8843, "step": 113870 }, { "epoch": 0.8243392907554996, "grad_norm": 0.18536004424095154, "learning_rate": 4.175667947910559e-06, "loss": 0.8736, "step": 113880 }, { "epoch": 0.8244116774160858, "grad_norm": 0.15690474212169647, "learning_rate": 4.175595561249974e-06, "loss": 0.8772, "step": 113890 }, { "epoch": 0.824484064076672, "grad_norm": 0.17251384258270264, "learning_rate": 4.175523174589387e-06, "loss": 0.878, "step": 113900 }, { "epoch": 0.8245564507372581, "grad_norm": 0.15271571278572083, "learning_rate": 4.175450787928801e-06, "loss": 0.8737, "step": 113910 }, { "epoch": 0.8246288373978443, "grad_norm": 0.1521008163690567, "learning_rate": 4.1753784012682144e-06, "loss": 0.89, "step": 113920 }, { "epoch": 0.8247012240584305, "grad_norm": 0.15803080797195435, "learning_rate": 4.175306014607629e-06, "loss": 0.8795, "step": 113930 }, { "epoch": 0.8247736107190167, "grad_norm": 0.14416874945163727, "learning_rate": 4.1752336279470425e-06, "loss": 0.8853, "step": 113940 }, { "epoch": 0.8248459973796028, "grad_norm": 0.16256214678287506, "learning_rate": 4.175161241286456e-06, "loss": 0.8969, "step": 113950 }, { "epoch": 0.824918384040189, "grad_norm": 0.15503908693790436, "learning_rate": 4.17508885462587e-06, "loss": 0.8903, "step": 113960 }, { "epoch": 0.8249907707007753, "grad_norm": 0.23007570207118988, "learning_rate": 4.175016467965284e-06, "loss": 0.8737, "step": 113970 }, { "epoch": 0.8250631573613615, "grad_norm": 0.15308153629302979, "learning_rate": 4.174944081304698e-06, "loss": 0.8723, "step": 113980 }, { "epoch": 0.8251355440219477, "grad_norm": 0.14756432175636292, "learning_rate": 4.1748716946441114e-06, "loss": 0.8749, "step": 113990 }, { "epoch": 0.8252079306825338, "grad_norm": 0.17486760020256042, "learning_rate": 4.174799307983525e-06, "loss": 0.8632, "step": 114000 }, { "epoch": 0.82528031734312, "grad_norm": 0.1607871949672699, "learning_rate": 4.1747269213229395e-06, "loss": 0.8716, "step": 114010 }, { "epoch": 0.8253527040037062, "grad_norm": 0.1484338939189911, "learning_rate": 4.174654534662353e-06, "loss": 0.8746, "step": 114020 }, { "epoch": 0.8254250906642924, "grad_norm": 0.21677958965301514, "learning_rate": 4.174582148001767e-06, "loss": 0.8857, "step": 114030 }, { "epoch": 0.8254974773248785, "grad_norm": 0.16989293694496155, "learning_rate": 4.17450976134118e-06, "loss": 0.8781, "step": 114040 }, { "epoch": 0.8255698639854647, "grad_norm": 0.1774820238351822, "learning_rate": 4.174437374680594e-06, "loss": 0.8694, "step": 114050 }, { "epoch": 0.8256422506460509, "grad_norm": 0.15557865798473358, "learning_rate": 4.1743649880200085e-06, "loss": 0.8745, "step": 114060 }, { "epoch": 0.8257146373066371, "grad_norm": 0.15898308157920837, "learning_rate": 4.174292601359422e-06, "loss": 0.8887, "step": 114070 }, { "epoch": 0.8257870239672234, "grad_norm": 0.17024999856948853, "learning_rate": 4.174220214698836e-06, "loss": 0.8772, "step": 114080 }, { "epoch": 0.8258594106278095, "grad_norm": 0.1510993391275406, "learning_rate": 4.174147828038249e-06, "loss": 0.8678, "step": 114090 }, { "epoch": 0.8259317972883957, "grad_norm": 0.15980158746242523, "learning_rate": 4.174075441377664e-06, "loss": 0.8756, "step": 114100 }, { "epoch": 0.8260041839489819, "grad_norm": 0.15219931304454803, "learning_rate": 4.174003054717077e-06, "loss": 0.8959, "step": 114110 }, { "epoch": 0.8260765706095681, "grad_norm": 0.14390040934085846, "learning_rate": 4.173930668056491e-06, "loss": 0.8794, "step": 114120 }, { "epoch": 0.8261489572701542, "grad_norm": 0.14578518271446228, "learning_rate": 4.173858281395905e-06, "loss": 0.8719, "step": 114130 }, { "epoch": 0.8262213439307404, "grad_norm": 0.1575828641653061, "learning_rate": 4.173785894735318e-06, "loss": 0.8812, "step": 114140 }, { "epoch": 0.8262937305913266, "grad_norm": 0.21511991322040558, "learning_rate": 4.173713508074732e-06, "loss": 0.8674, "step": 114150 }, { "epoch": 0.8263661172519128, "grad_norm": 0.16267074644565582, "learning_rate": 4.173641121414146e-06, "loss": 0.8874, "step": 114160 }, { "epoch": 0.826438503912499, "grad_norm": 0.14810219407081604, "learning_rate": 4.17356873475356e-06, "loss": 0.8732, "step": 114170 }, { "epoch": 0.8265108905730852, "grad_norm": 0.14335951209068298, "learning_rate": 4.1734963480929735e-06, "loss": 0.8814, "step": 114180 }, { "epoch": 0.8265832772336714, "grad_norm": 0.14947238564491272, "learning_rate": 4.173423961432387e-06, "loss": 0.8886, "step": 114190 }, { "epoch": 0.8266556638942576, "grad_norm": 0.15292997658252716, "learning_rate": 4.173351574771801e-06, "loss": 0.8808, "step": 114200 }, { "epoch": 0.8267280505548438, "grad_norm": 0.16210046410560608, "learning_rate": 4.173279188111215e-06, "loss": 0.8807, "step": 114210 }, { "epoch": 0.82680043721543, "grad_norm": 0.1552553027868271, "learning_rate": 4.173206801450629e-06, "loss": 0.9006, "step": 114220 }, { "epoch": 0.8268728238760161, "grad_norm": 0.15879151225090027, "learning_rate": 4.1731344147900425e-06, "loss": 0.8699, "step": 114230 }, { "epoch": 0.8269452105366023, "grad_norm": 0.1656922549009323, "learning_rate": 4.173062028129456e-06, "loss": 0.8742, "step": 114240 }, { "epoch": 0.8270175971971885, "grad_norm": 0.18779303133487701, "learning_rate": 4.1729896414688705e-06, "loss": 0.876, "step": 114250 }, { "epoch": 0.8270899838577747, "grad_norm": 0.15145491063594818, "learning_rate": 4.172917254808284e-06, "loss": 0.8818, "step": 114260 }, { "epoch": 0.8271623705183608, "grad_norm": 0.1533774733543396, "learning_rate": 4.172844868147698e-06, "loss": 0.8762, "step": 114270 }, { "epoch": 0.827234757178947, "grad_norm": 0.15074725449085236, "learning_rate": 4.172772481487111e-06, "loss": 0.8766, "step": 114280 }, { "epoch": 0.8273071438395333, "grad_norm": 0.15934348106384277, "learning_rate": 4.172700094826526e-06, "loss": 0.8789, "step": 114290 }, { "epoch": 0.8273795305001195, "grad_norm": 0.1573365181684494, "learning_rate": 4.1726277081659395e-06, "loss": 0.8783, "step": 114300 }, { "epoch": 0.8274519171607057, "grad_norm": 0.15947797894477844, "learning_rate": 4.172555321505353e-06, "loss": 0.8645, "step": 114310 }, { "epoch": 0.8275243038212918, "grad_norm": 0.1520025134086609, "learning_rate": 4.172482934844767e-06, "loss": 0.8753, "step": 114320 }, { "epoch": 0.827596690481878, "grad_norm": 0.15688809752464294, "learning_rate": 4.172410548184181e-06, "loss": 0.8712, "step": 114330 }, { "epoch": 0.8276690771424642, "grad_norm": 0.23521029949188232, "learning_rate": 4.172338161523595e-06, "loss": 0.8903, "step": 114340 }, { "epoch": 0.8277414638030504, "grad_norm": 0.15197953581809998, "learning_rate": 4.172265774863008e-06, "loss": 0.8847, "step": 114350 }, { "epoch": 0.8278138504636365, "grad_norm": 0.151088684797287, "learning_rate": 4.172193388202422e-06, "loss": 0.8656, "step": 114360 }, { "epoch": 0.8278862371242227, "grad_norm": 0.14569281041622162, "learning_rate": 4.1721210015418365e-06, "loss": 0.8814, "step": 114370 }, { "epoch": 0.8279586237848089, "grad_norm": 0.1544409692287445, "learning_rate": 4.17204861488125e-06, "loss": 0.8876, "step": 114380 }, { "epoch": 0.8280310104453952, "grad_norm": 0.17135676741600037, "learning_rate": 4.171976228220664e-06, "loss": 0.8809, "step": 114390 }, { "epoch": 0.8281033971059814, "grad_norm": 0.15658897161483765, "learning_rate": 4.171903841560077e-06, "loss": 0.8838, "step": 114400 }, { "epoch": 0.8281757837665675, "grad_norm": 0.14812763035297394, "learning_rate": 4.171831454899492e-06, "loss": 0.8784, "step": 114410 }, { "epoch": 0.8282481704271537, "grad_norm": 0.15863005816936493, "learning_rate": 4.171759068238905e-06, "loss": 0.89, "step": 114420 }, { "epoch": 0.8283205570877399, "grad_norm": 0.15445846319198608, "learning_rate": 4.171686681578319e-06, "loss": 0.8813, "step": 114430 }, { "epoch": 0.8283929437483261, "grad_norm": 0.15649083256721497, "learning_rate": 4.171614294917733e-06, "loss": 0.8706, "step": 114440 }, { "epoch": 0.8284653304089122, "grad_norm": 0.15367910265922546, "learning_rate": 4.171541908257147e-06, "loss": 0.8636, "step": 114450 }, { "epoch": 0.8285377170694984, "grad_norm": 0.16421107947826385, "learning_rate": 4.171469521596561e-06, "loss": 0.8749, "step": 114460 }, { "epoch": 0.8286101037300846, "grad_norm": 0.1638004332780838, "learning_rate": 4.171397134935974e-06, "loss": 0.8675, "step": 114470 }, { "epoch": 0.8286824903906708, "grad_norm": 0.15033425390720367, "learning_rate": 4.171324748275388e-06, "loss": 0.8858, "step": 114480 }, { "epoch": 0.8287548770512569, "grad_norm": 0.16598929464817047, "learning_rate": 4.171252361614802e-06, "loss": 0.8906, "step": 114490 }, { "epoch": 0.8288272637118432, "grad_norm": 0.17145119607448578, "learning_rate": 4.171179974954216e-06, "loss": 0.8716, "step": 114500 }, { "epoch": 0.8288996503724294, "grad_norm": 0.20420026779174805, "learning_rate": 4.17110758829363e-06, "loss": 0.8753, "step": 114510 }, { "epoch": 0.8289720370330156, "grad_norm": 0.15310752391815186, "learning_rate": 4.171035201633043e-06, "loss": 0.8767, "step": 114520 }, { "epoch": 0.8290444236936018, "grad_norm": 0.15935362875461578, "learning_rate": 4.170962814972458e-06, "loss": 0.8764, "step": 114530 }, { "epoch": 0.8291168103541879, "grad_norm": 0.16182856261730194, "learning_rate": 4.170890428311871e-06, "loss": 0.8739, "step": 114540 }, { "epoch": 0.8291891970147741, "grad_norm": 0.15881557762622833, "learning_rate": 4.170818041651285e-06, "loss": 0.8855, "step": 114550 }, { "epoch": 0.8292615836753603, "grad_norm": 0.1685194969177246, "learning_rate": 4.170745654990699e-06, "loss": 0.8758, "step": 114560 }, { "epoch": 0.8293339703359465, "grad_norm": 0.14720526337623596, "learning_rate": 4.170673268330113e-06, "loss": 0.873, "step": 114570 }, { "epoch": 0.8294063569965326, "grad_norm": 0.16784626245498657, "learning_rate": 4.170600881669527e-06, "loss": 0.8853, "step": 114580 }, { "epoch": 0.8294787436571188, "grad_norm": 0.1470102220773697, "learning_rate": 4.17052849500894e-06, "loss": 0.8832, "step": 114590 }, { "epoch": 0.829551130317705, "grad_norm": 0.16039815545082092, "learning_rate": 4.170456108348354e-06, "loss": 0.8632, "step": 114600 }, { "epoch": 0.8296235169782913, "grad_norm": 0.1540583372116089, "learning_rate": 4.170383721687768e-06, "loss": 0.8813, "step": 114610 }, { "epoch": 0.8296959036388775, "grad_norm": 0.15052036941051483, "learning_rate": 4.170311335027182e-06, "loss": 0.8849, "step": 114620 }, { "epoch": 0.8297682902994636, "grad_norm": 0.1492500752210617, "learning_rate": 4.170238948366596e-06, "loss": 0.885, "step": 114630 }, { "epoch": 0.8298406769600498, "grad_norm": 0.15592175722122192, "learning_rate": 4.170166561706009e-06, "loss": 0.8654, "step": 114640 }, { "epoch": 0.829913063620636, "grad_norm": 0.15170545876026154, "learning_rate": 4.170094175045423e-06, "loss": 0.8728, "step": 114650 }, { "epoch": 0.8299854502812222, "grad_norm": 0.45990192890167236, "learning_rate": 4.1700217883848364e-06, "loss": 0.8802, "step": 114660 }, { "epoch": 0.8300578369418083, "grad_norm": 0.1639576107263565, "learning_rate": 4.16994940172425e-06, "loss": 0.8959, "step": 114670 }, { "epoch": 0.8301302236023945, "grad_norm": 0.15342135727405548, "learning_rate": 4.1698770150636645e-06, "loss": 0.8907, "step": 114680 }, { "epoch": 0.8302026102629807, "grad_norm": 0.15938065946102142, "learning_rate": 4.169804628403078e-06, "loss": 0.873, "step": 114690 }, { "epoch": 0.8302749969235669, "grad_norm": 0.15566521883010864, "learning_rate": 4.169732241742492e-06, "loss": 0.8848, "step": 114700 }, { "epoch": 0.8303473835841532, "grad_norm": 0.1593281626701355, "learning_rate": 4.169659855081905e-06, "loss": 0.8775, "step": 114710 }, { "epoch": 0.8304197702447393, "grad_norm": 0.15656022727489471, "learning_rate": 4.16958746842132e-06, "loss": 0.8735, "step": 114720 }, { "epoch": 0.8304921569053255, "grad_norm": 0.15564171969890594, "learning_rate": 4.1695150817607334e-06, "loss": 0.8776, "step": 114730 }, { "epoch": 0.8305645435659117, "grad_norm": 0.15010464191436768, "learning_rate": 4.169442695100147e-06, "loss": 0.8914, "step": 114740 }, { "epoch": 0.8306369302264979, "grad_norm": 0.15904924273490906, "learning_rate": 4.169370308439561e-06, "loss": 0.8804, "step": 114750 }, { "epoch": 0.830709316887084, "grad_norm": 0.161235973238945, "learning_rate": 4.169297921778975e-06, "loss": 0.8811, "step": 114760 }, { "epoch": 0.8307817035476702, "grad_norm": 0.15518715977668762, "learning_rate": 4.169225535118389e-06, "loss": 0.8712, "step": 114770 }, { "epoch": 0.8308540902082564, "grad_norm": 0.15426094830036163, "learning_rate": 4.169153148457802e-06, "loss": 0.881, "step": 114780 }, { "epoch": 0.8309264768688426, "grad_norm": 0.15717415511608124, "learning_rate": 4.169080761797216e-06, "loss": 0.8656, "step": 114790 }, { "epoch": 0.8309988635294288, "grad_norm": 0.19086652994155884, "learning_rate": 4.1690083751366305e-06, "loss": 0.8722, "step": 114800 }, { "epoch": 0.8310712501900149, "grad_norm": 0.18602590262889862, "learning_rate": 4.168935988476044e-06, "loss": 0.8677, "step": 114810 }, { "epoch": 0.8311436368506012, "grad_norm": 0.16731368005275726, "learning_rate": 4.168863601815458e-06, "loss": 0.8814, "step": 114820 }, { "epoch": 0.8312160235111874, "grad_norm": 0.162908136844635, "learning_rate": 4.168791215154871e-06, "loss": 0.8795, "step": 114830 }, { "epoch": 0.8312884101717736, "grad_norm": 0.15628595650196075, "learning_rate": 4.168718828494285e-06, "loss": 0.8818, "step": 114840 }, { "epoch": 0.8313607968323597, "grad_norm": 0.15655048191547394, "learning_rate": 4.168646441833699e-06, "loss": 0.8835, "step": 114850 }, { "epoch": 0.8314331834929459, "grad_norm": 0.15166090428829193, "learning_rate": 4.168574055173113e-06, "loss": 0.888, "step": 114860 }, { "epoch": 0.8315055701535321, "grad_norm": 0.19675646722316742, "learning_rate": 4.168501668512527e-06, "loss": 0.8821, "step": 114870 }, { "epoch": 0.8315779568141183, "grad_norm": 0.15683965384960175, "learning_rate": 4.16842928185194e-06, "loss": 0.8894, "step": 114880 }, { "epoch": 0.8316503434747045, "grad_norm": 0.15531839430332184, "learning_rate": 4.168356895191355e-06, "loss": 0.8811, "step": 114890 }, { "epoch": 0.8317227301352906, "grad_norm": 0.1496214121580124, "learning_rate": 4.168284508530768e-06, "loss": 0.873, "step": 114900 }, { "epoch": 0.8317951167958768, "grad_norm": 0.16152338683605194, "learning_rate": 4.168212121870182e-06, "loss": 0.8725, "step": 114910 }, { "epoch": 0.8318675034564631, "grad_norm": 0.17109687626361847, "learning_rate": 4.1681397352095955e-06, "loss": 0.8678, "step": 114920 }, { "epoch": 0.8319398901170493, "grad_norm": 0.15160439908504486, "learning_rate": 4.16806734854901e-06, "loss": 0.8881, "step": 114930 }, { "epoch": 0.8320122767776355, "grad_norm": 0.15334677696228027, "learning_rate": 4.167994961888424e-06, "loss": 0.882, "step": 114940 }, { "epoch": 0.8320846634382216, "grad_norm": 0.16867488622665405, "learning_rate": 4.167922575227837e-06, "loss": 0.8815, "step": 114950 }, { "epoch": 0.8321570500988078, "grad_norm": 0.16307266056537628, "learning_rate": 4.167850188567251e-06, "loss": 0.8737, "step": 114960 }, { "epoch": 0.832229436759394, "grad_norm": 0.17095071077346802, "learning_rate": 4.167777801906665e-06, "loss": 0.868, "step": 114970 }, { "epoch": 0.8323018234199802, "grad_norm": 0.1598438024520874, "learning_rate": 4.167705415246079e-06, "loss": 0.8826, "step": 114980 }, { "epoch": 0.8323742100805663, "grad_norm": 0.15811651945114136, "learning_rate": 4.1676330285854925e-06, "loss": 0.8842, "step": 114990 }, { "epoch": 0.8324465967411525, "grad_norm": 0.14370112121105194, "learning_rate": 4.167560641924906e-06, "loss": 0.8631, "step": 115000 }, { "epoch": 0.8325189834017387, "grad_norm": 0.15269970893859863, "learning_rate": 4.167488255264321e-06, "loss": 0.8808, "step": 115010 }, { "epoch": 0.8325913700623249, "grad_norm": 0.15361453592777252, "learning_rate": 4.167415868603734e-06, "loss": 0.87, "step": 115020 }, { "epoch": 0.8326637567229112, "grad_norm": 0.1495579481124878, "learning_rate": 4.167343481943148e-06, "loss": 0.8709, "step": 115030 }, { "epoch": 0.8327361433834973, "grad_norm": 0.14331714808940887, "learning_rate": 4.1672710952825615e-06, "loss": 0.8837, "step": 115040 }, { "epoch": 0.8328085300440835, "grad_norm": 0.2528897225856781, "learning_rate": 4.167198708621976e-06, "loss": 0.8815, "step": 115050 }, { "epoch": 0.8328809167046697, "grad_norm": 0.15450528264045715, "learning_rate": 4.1671263219613896e-06, "loss": 0.8856, "step": 115060 }, { "epoch": 0.8329533033652559, "grad_norm": 0.1559913456439972, "learning_rate": 4.167053935300803e-06, "loss": 0.8908, "step": 115070 }, { "epoch": 0.833025690025842, "grad_norm": 0.1556912660598755, "learning_rate": 4.166981548640217e-06, "loss": 0.8851, "step": 115080 }, { "epoch": 0.8330980766864282, "grad_norm": 0.15410785377025604, "learning_rate": 4.166909161979631e-06, "loss": 0.8828, "step": 115090 }, { "epoch": 0.8331704633470144, "grad_norm": 0.20597724616527557, "learning_rate": 4.166836775319045e-06, "loss": 0.8725, "step": 115100 }, { "epoch": 0.8332428500076006, "grad_norm": 0.1543237566947937, "learning_rate": 4.1667643886584585e-06, "loss": 0.8656, "step": 115110 }, { "epoch": 0.8333152366681867, "grad_norm": 0.14461509883403778, "learning_rate": 4.166692001997872e-06, "loss": 0.8763, "step": 115120 }, { "epoch": 0.8333876233287729, "grad_norm": 0.14833268523216248, "learning_rate": 4.1666196153372866e-06, "loss": 0.8725, "step": 115130 }, { "epoch": 0.8334600099893592, "grad_norm": 0.16073955595493317, "learning_rate": 4.1665472286767e-06, "loss": 0.8727, "step": 115140 }, { "epoch": 0.8335323966499454, "grad_norm": 0.13932476937770844, "learning_rate": 4.166474842016114e-06, "loss": 0.8792, "step": 115150 }, { "epoch": 0.8336047833105316, "grad_norm": 0.1595870852470398, "learning_rate": 4.166402455355527e-06, "loss": 0.8769, "step": 115160 }, { "epoch": 0.8336771699711177, "grad_norm": 0.14972224831581116, "learning_rate": 4.166330068694942e-06, "loss": 0.8873, "step": 115170 }, { "epoch": 0.8337495566317039, "grad_norm": 0.14646044373512268, "learning_rate": 4.1662576820343555e-06, "loss": 0.8845, "step": 115180 }, { "epoch": 0.8338219432922901, "grad_norm": 0.1509612500667572, "learning_rate": 4.166185295373768e-06, "loss": 0.8673, "step": 115190 }, { "epoch": 0.8338943299528763, "grad_norm": 0.1453905701637268, "learning_rate": 4.166112908713183e-06, "loss": 0.8754, "step": 115200 }, { "epoch": 0.8339667166134624, "grad_norm": 0.14863267540931702, "learning_rate": 4.166040522052596e-06, "loss": 0.869, "step": 115210 }, { "epoch": 0.8340391032740486, "grad_norm": 0.1638236790895462, "learning_rate": 4.16596813539201e-06, "loss": 0.8878, "step": 115220 }, { "epoch": 0.8341114899346348, "grad_norm": 0.17066480219364166, "learning_rate": 4.1658957487314236e-06, "loss": 0.8719, "step": 115230 }, { "epoch": 0.8341838765952211, "grad_norm": 0.16603226959705353, "learning_rate": 4.165823362070838e-06, "loss": 0.8733, "step": 115240 }, { "epoch": 0.8342562632558073, "grad_norm": 0.141592338681221, "learning_rate": 4.165750975410252e-06, "loss": 0.8729, "step": 115250 }, { "epoch": 0.8343286499163934, "grad_norm": 0.15747976303100586, "learning_rate": 4.165678588749665e-06, "loss": 0.8633, "step": 115260 }, { "epoch": 0.8344010365769796, "grad_norm": 0.1882728487253189, "learning_rate": 4.165606202089079e-06, "loss": 0.8736, "step": 115270 }, { "epoch": 0.8344734232375658, "grad_norm": 0.16340763866901398, "learning_rate": 4.165533815428493e-06, "loss": 0.8885, "step": 115280 }, { "epoch": 0.834545809898152, "grad_norm": 0.1481885462999344, "learning_rate": 4.165461428767907e-06, "loss": 0.8885, "step": 115290 }, { "epoch": 0.8346181965587381, "grad_norm": 0.15606743097305298, "learning_rate": 4.165389042107321e-06, "loss": 0.8767, "step": 115300 }, { "epoch": 0.8346905832193243, "grad_norm": 0.14937707781791687, "learning_rate": 4.165316655446734e-06, "loss": 0.875, "step": 115310 }, { "epoch": 0.8347629698799105, "grad_norm": 0.16030281782150269, "learning_rate": 4.165244268786149e-06, "loss": 0.8741, "step": 115320 }, { "epoch": 0.8348353565404967, "grad_norm": 0.15192563831806183, "learning_rate": 4.165171882125562e-06, "loss": 0.8686, "step": 115330 }, { "epoch": 0.8349077432010829, "grad_norm": 0.15150035917758942, "learning_rate": 4.165099495464976e-06, "loss": 0.8875, "step": 115340 }, { "epoch": 0.8349801298616691, "grad_norm": 0.1519050896167755, "learning_rate": 4.1650271088043895e-06, "loss": 0.8844, "step": 115350 }, { "epoch": 0.8350525165222553, "grad_norm": 0.15744005143642426, "learning_rate": 4.164954722143804e-06, "loss": 0.8619, "step": 115360 }, { "epoch": 0.8351249031828415, "grad_norm": 0.16231480240821838, "learning_rate": 4.164882335483218e-06, "loss": 0.8722, "step": 115370 }, { "epoch": 0.8351972898434277, "grad_norm": 0.15285032987594604, "learning_rate": 4.164809948822631e-06, "loss": 0.8796, "step": 115380 }, { "epoch": 0.8352696765040138, "grad_norm": 0.16190151870250702, "learning_rate": 4.164737562162045e-06, "loss": 0.8804, "step": 115390 }, { "epoch": 0.8353420631646, "grad_norm": 0.16439662873744965, "learning_rate": 4.164665175501459e-06, "loss": 0.8944, "step": 115400 }, { "epoch": 0.8354144498251862, "grad_norm": 0.14439240097999573, "learning_rate": 4.164592788840873e-06, "loss": 0.8802, "step": 115410 }, { "epoch": 0.8354868364857724, "grad_norm": 0.14769050478935242, "learning_rate": 4.1645204021802865e-06, "loss": 0.8807, "step": 115420 }, { "epoch": 0.8355592231463586, "grad_norm": 0.15976214408874512, "learning_rate": 4.1644480155197e-06, "loss": 0.8796, "step": 115430 }, { "epoch": 0.8356316098069447, "grad_norm": 0.15138176083564758, "learning_rate": 4.164375628859115e-06, "loss": 0.8711, "step": 115440 }, { "epoch": 0.835703996467531, "grad_norm": 0.15986670553684235, "learning_rate": 4.164303242198528e-06, "loss": 0.889, "step": 115450 }, { "epoch": 0.8357763831281172, "grad_norm": 0.1555429995059967, "learning_rate": 4.164230855537942e-06, "loss": 0.865, "step": 115460 }, { "epoch": 0.8358487697887034, "grad_norm": 0.15238524973392487, "learning_rate": 4.1641584688773554e-06, "loss": 0.8672, "step": 115470 }, { "epoch": 0.8359211564492895, "grad_norm": 0.16362081468105316, "learning_rate": 4.164086082216769e-06, "loss": 0.8658, "step": 115480 }, { "epoch": 0.8359935431098757, "grad_norm": 0.16536161303520203, "learning_rate": 4.1640136955561835e-06, "loss": 0.8911, "step": 115490 }, { "epoch": 0.8360659297704619, "grad_norm": 0.15160474181175232, "learning_rate": 4.163941308895597e-06, "loss": 0.8678, "step": 115500 }, { "epoch": 0.8361383164310481, "grad_norm": 0.16615518927574158, "learning_rate": 4.163868922235011e-06, "loss": 0.8866, "step": 115510 }, { "epoch": 0.8362107030916343, "grad_norm": 0.15683193504810333, "learning_rate": 4.163796535574424e-06, "loss": 0.866, "step": 115520 }, { "epoch": 0.8362830897522204, "grad_norm": 0.149534672498703, "learning_rate": 4.163724148913839e-06, "loss": 0.8748, "step": 115530 }, { "epoch": 0.8363554764128066, "grad_norm": 0.15053287148475647, "learning_rate": 4.1636517622532525e-06, "loss": 0.8713, "step": 115540 }, { "epoch": 0.8364278630733928, "grad_norm": 0.16546009480953217, "learning_rate": 4.163579375592666e-06, "loss": 0.8776, "step": 115550 }, { "epoch": 0.8365002497339791, "grad_norm": 0.1460997462272644, "learning_rate": 4.16350698893208e-06, "loss": 0.8818, "step": 115560 }, { "epoch": 0.8365726363945652, "grad_norm": 0.15319761633872986, "learning_rate": 4.163434602271494e-06, "loss": 0.8797, "step": 115570 }, { "epoch": 0.8366450230551514, "grad_norm": 0.234059140086174, "learning_rate": 4.163362215610908e-06, "loss": 0.8659, "step": 115580 }, { "epoch": 0.8367174097157376, "grad_norm": 0.147258460521698, "learning_rate": 4.163289828950321e-06, "loss": 0.8754, "step": 115590 }, { "epoch": 0.8367897963763238, "grad_norm": 0.15908755362033844, "learning_rate": 4.163217442289735e-06, "loss": 0.872, "step": 115600 }, { "epoch": 0.83686218303691, "grad_norm": 0.15638591349124908, "learning_rate": 4.1631450556291495e-06, "loss": 0.8971, "step": 115610 }, { "epoch": 0.8369345696974961, "grad_norm": 0.1701616644859314, "learning_rate": 4.163072668968563e-06, "loss": 0.8726, "step": 115620 }, { "epoch": 0.8370069563580823, "grad_norm": 0.15191833674907684, "learning_rate": 4.163000282307977e-06, "loss": 0.8767, "step": 115630 }, { "epoch": 0.8370793430186685, "grad_norm": 0.15473678708076477, "learning_rate": 4.16292789564739e-06, "loss": 0.8773, "step": 115640 }, { "epoch": 0.8371517296792547, "grad_norm": 0.15959081053733826, "learning_rate": 4.162855508986805e-06, "loss": 0.8745, "step": 115650 }, { "epoch": 0.8372241163398408, "grad_norm": 0.1590321958065033, "learning_rate": 4.162783122326218e-06, "loss": 0.8818, "step": 115660 }, { "epoch": 0.8372965030004271, "grad_norm": 0.16567932069301605, "learning_rate": 4.162710735665632e-06, "loss": 0.8798, "step": 115670 }, { "epoch": 0.8373688896610133, "grad_norm": 0.15759676694869995, "learning_rate": 4.162638349005046e-06, "loss": 0.8689, "step": 115680 }, { "epoch": 0.8374412763215995, "grad_norm": 0.15198834240436554, "learning_rate": 4.16256596234446e-06, "loss": 0.8867, "step": 115690 }, { "epoch": 0.8375136629821857, "grad_norm": 0.1595429629087448, "learning_rate": 4.162493575683874e-06, "loss": 0.8792, "step": 115700 }, { "epoch": 0.8375860496427718, "grad_norm": 0.15620209276676178, "learning_rate": 4.162421189023287e-06, "loss": 0.8751, "step": 115710 }, { "epoch": 0.837658436303358, "grad_norm": 0.18191352486610413, "learning_rate": 4.162348802362701e-06, "loss": 0.8682, "step": 115720 }, { "epoch": 0.8377308229639442, "grad_norm": 0.1610066294670105, "learning_rate": 4.1622764157021145e-06, "loss": 0.8813, "step": 115730 }, { "epoch": 0.8378032096245304, "grad_norm": 0.16806887090206146, "learning_rate": 4.162204029041528e-06, "loss": 0.8786, "step": 115740 }, { "epoch": 0.8378755962851165, "grad_norm": 0.1434309184551239, "learning_rate": 4.162131642380942e-06, "loss": 0.8844, "step": 115750 }, { "epoch": 0.8379479829457027, "grad_norm": 0.17224647104740143, "learning_rate": 4.162059255720356e-06, "loss": 0.8848, "step": 115760 }, { "epoch": 0.838020369606289, "grad_norm": 0.1476389765739441, "learning_rate": 4.16198686905977e-06, "loss": 0.8813, "step": 115770 }, { "epoch": 0.8380927562668752, "grad_norm": 0.1633952558040619, "learning_rate": 4.1619144823991835e-06, "loss": 0.8842, "step": 115780 }, { "epoch": 0.8381651429274614, "grad_norm": 0.16024617850780487, "learning_rate": 4.161842095738597e-06, "loss": 0.8829, "step": 115790 }, { "epoch": 0.8382375295880475, "grad_norm": 0.15520702302455902, "learning_rate": 4.1617697090780116e-06, "loss": 0.8744, "step": 115800 }, { "epoch": 0.8383099162486337, "grad_norm": 0.161240816116333, "learning_rate": 4.161697322417425e-06, "loss": 0.8912, "step": 115810 }, { "epoch": 0.8383823029092199, "grad_norm": 0.14296600222587585, "learning_rate": 4.161624935756839e-06, "loss": 0.8727, "step": 115820 }, { "epoch": 0.8384546895698061, "grad_norm": 0.16847427189350128, "learning_rate": 4.161552549096252e-06, "loss": 0.8812, "step": 115830 }, { "epoch": 0.8385270762303922, "grad_norm": 0.14801815152168274, "learning_rate": 4.161480162435667e-06, "loss": 0.8822, "step": 115840 }, { "epoch": 0.8385994628909784, "grad_norm": 0.15415239334106445, "learning_rate": 4.1614077757750805e-06, "loss": 0.8754, "step": 115850 }, { "epoch": 0.8386718495515646, "grad_norm": 0.1483432799577713, "learning_rate": 4.161335389114494e-06, "loss": 0.8831, "step": 115860 }, { "epoch": 0.8387442362121508, "grad_norm": 0.1458449810743332, "learning_rate": 4.161263002453908e-06, "loss": 0.8847, "step": 115870 }, { "epoch": 0.8388166228727371, "grad_norm": 0.15387925505638123, "learning_rate": 4.161190615793322e-06, "loss": 0.8719, "step": 115880 }, { "epoch": 0.8388890095333232, "grad_norm": 0.14547225832939148, "learning_rate": 4.161118229132736e-06, "loss": 0.8866, "step": 115890 }, { "epoch": 0.8389613961939094, "grad_norm": 0.15607166290283203, "learning_rate": 4.161045842472149e-06, "loss": 0.8811, "step": 115900 }, { "epoch": 0.8390337828544956, "grad_norm": 0.14948482811450958, "learning_rate": 4.160973455811563e-06, "loss": 0.8838, "step": 115910 }, { "epoch": 0.8391061695150818, "grad_norm": 0.16144073009490967, "learning_rate": 4.1609010691509775e-06, "loss": 0.8786, "step": 115920 }, { "epoch": 0.839178556175668, "grad_norm": 0.15592895448207855, "learning_rate": 4.160828682490391e-06, "loss": 0.8812, "step": 115930 }, { "epoch": 0.8392509428362541, "grad_norm": 0.18400424718856812, "learning_rate": 4.160756295829805e-06, "loss": 0.87, "step": 115940 }, { "epoch": 0.8393233294968403, "grad_norm": 0.1547369509935379, "learning_rate": 4.160683909169218e-06, "loss": 0.8918, "step": 115950 }, { "epoch": 0.8393957161574265, "grad_norm": 0.16612297296524048, "learning_rate": 4.160611522508633e-06, "loss": 0.8735, "step": 115960 }, { "epoch": 0.8394681028180127, "grad_norm": 0.14709250628948212, "learning_rate": 4.160539135848046e-06, "loss": 0.8752, "step": 115970 }, { "epoch": 0.8395404894785989, "grad_norm": 0.15569816529750824, "learning_rate": 4.16046674918746e-06, "loss": 0.8725, "step": 115980 }, { "epoch": 0.8396128761391851, "grad_norm": 0.16147053241729736, "learning_rate": 4.160394362526874e-06, "loss": 0.8772, "step": 115990 }, { "epoch": 0.8396852627997713, "grad_norm": 0.15154972672462463, "learning_rate": 4.160321975866288e-06, "loss": 0.881, "step": 116000 }, { "epoch": 0.8397576494603575, "grad_norm": 0.15269294381141663, "learning_rate": 4.160249589205702e-06, "loss": 0.883, "step": 116010 }, { "epoch": 0.8398300361209436, "grad_norm": 0.17886610329151154, "learning_rate": 4.160177202545115e-06, "loss": 0.8735, "step": 116020 }, { "epoch": 0.8399024227815298, "grad_norm": 0.15143749117851257, "learning_rate": 4.160104815884529e-06, "loss": 0.8752, "step": 116030 }, { "epoch": 0.839974809442116, "grad_norm": 0.15021578967571259, "learning_rate": 4.1600324292239434e-06, "loss": 0.8707, "step": 116040 }, { "epoch": 0.8400471961027022, "grad_norm": 0.15974397957324982, "learning_rate": 4.159960042563357e-06, "loss": 0.8743, "step": 116050 }, { "epoch": 0.8401195827632884, "grad_norm": 0.15328460931777954, "learning_rate": 4.159887655902771e-06, "loss": 0.878, "step": 116060 }, { "epoch": 0.8401919694238745, "grad_norm": 0.16074135899543762, "learning_rate": 4.159815269242184e-06, "loss": 0.8775, "step": 116070 }, { "epoch": 0.8402643560844607, "grad_norm": 0.14766909182071686, "learning_rate": 4.159742882581598e-06, "loss": 0.8734, "step": 116080 }, { "epoch": 0.840336742745047, "grad_norm": 0.1620090901851654, "learning_rate": 4.159670495921012e-06, "loss": 0.884, "step": 116090 }, { "epoch": 0.8404091294056332, "grad_norm": 0.1521306186914444, "learning_rate": 4.159598109260426e-06, "loss": 0.8904, "step": 116100 }, { "epoch": 0.8404815160662193, "grad_norm": 0.15059085190296173, "learning_rate": 4.15952572259984e-06, "loss": 0.8783, "step": 116110 }, { "epoch": 0.8405539027268055, "grad_norm": 0.159867063164711, "learning_rate": 4.159453335939253e-06, "loss": 0.8777, "step": 116120 }, { "epoch": 0.8406262893873917, "grad_norm": 0.15567173063755035, "learning_rate": 4.159380949278668e-06, "loss": 0.8751, "step": 116130 }, { "epoch": 0.8406986760479779, "grad_norm": 0.16036923229694366, "learning_rate": 4.159308562618081e-06, "loss": 0.8946, "step": 116140 }, { "epoch": 0.840771062708564, "grad_norm": 0.151793971657753, "learning_rate": 4.159236175957495e-06, "loss": 0.8785, "step": 116150 }, { "epoch": 0.8408434493691502, "grad_norm": 0.17096100747585297, "learning_rate": 4.1591637892969085e-06, "loss": 0.8795, "step": 116160 }, { "epoch": 0.8409158360297364, "grad_norm": 0.1503993719816208, "learning_rate": 4.159091402636323e-06, "loss": 0.8877, "step": 116170 }, { "epoch": 0.8409882226903226, "grad_norm": 0.1676565408706665, "learning_rate": 4.159019015975737e-06, "loss": 0.8669, "step": 116180 }, { "epoch": 0.8410606093509088, "grad_norm": 0.14891086518764496, "learning_rate": 4.15894662931515e-06, "loss": 0.885, "step": 116190 }, { "epoch": 0.841132996011495, "grad_norm": 0.15103653073310852, "learning_rate": 4.158874242654564e-06, "loss": 0.8782, "step": 116200 }, { "epoch": 0.8412053826720812, "grad_norm": 0.15480820834636688, "learning_rate": 4.158801855993978e-06, "loss": 0.8741, "step": 116210 }, { "epoch": 0.8412777693326674, "grad_norm": 0.17282061278820038, "learning_rate": 4.158729469333392e-06, "loss": 0.8843, "step": 116220 }, { "epoch": 0.8413501559932536, "grad_norm": 0.15457768738269806, "learning_rate": 4.1586570826728055e-06, "loss": 0.8808, "step": 116230 }, { "epoch": 0.8414225426538398, "grad_norm": 0.16778331995010376, "learning_rate": 4.158584696012219e-06, "loss": 0.8677, "step": 116240 }, { "epoch": 0.8414949293144259, "grad_norm": 0.15565545856952667, "learning_rate": 4.158512309351633e-06, "loss": 0.8793, "step": 116250 }, { "epoch": 0.8415673159750121, "grad_norm": 0.14847850799560547, "learning_rate": 4.158439922691046e-06, "loss": 0.8677, "step": 116260 }, { "epoch": 0.8416397026355983, "grad_norm": 0.15020453929901123, "learning_rate": 4.15836753603046e-06, "loss": 0.8863, "step": 116270 }, { "epoch": 0.8417120892961845, "grad_norm": 0.16893228888511658, "learning_rate": 4.1582951493698745e-06, "loss": 0.8815, "step": 116280 }, { "epoch": 0.8417844759567706, "grad_norm": 0.17535464465618134, "learning_rate": 4.158222762709288e-06, "loss": 0.8735, "step": 116290 }, { "epoch": 0.8418568626173569, "grad_norm": 0.15316392481327057, "learning_rate": 4.158150376048702e-06, "loss": 0.876, "step": 116300 }, { "epoch": 0.8419292492779431, "grad_norm": 0.16685836017131805, "learning_rate": 4.158077989388115e-06, "loss": 0.873, "step": 116310 }, { "epoch": 0.8420016359385293, "grad_norm": 0.16232609748840332, "learning_rate": 4.15800560272753e-06, "loss": 0.8792, "step": 116320 }, { "epoch": 0.8420740225991155, "grad_norm": 0.1534125953912735, "learning_rate": 4.157933216066943e-06, "loss": 0.8802, "step": 116330 }, { "epoch": 0.8421464092597016, "grad_norm": 0.16465212404727936, "learning_rate": 4.157860829406357e-06, "loss": 0.8792, "step": 116340 }, { "epoch": 0.8422187959202878, "grad_norm": 0.1542976200580597, "learning_rate": 4.157788442745771e-06, "loss": 0.8803, "step": 116350 }, { "epoch": 0.842291182580874, "grad_norm": 0.1525125801563263, "learning_rate": 4.157716056085185e-06, "loss": 0.8787, "step": 116360 }, { "epoch": 0.8423635692414602, "grad_norm": 0.16965779662132263, "learning_rate": 4.157643669424599e-06, "loss": 0.8799, "step": 116370 }, { "epoch": 0.8424359559020463, "grad_norm": 0.14481857419013977, "learning_rate": 4.157571282764012e-06, "loss": 0.8803, "step": 116380 }, { "epoch": 0.8425083425626325, "grad_norm": 0.15240401029586792, "learning_rate": 4.157498896103426e-06, "loss": 0.8813, "step": 116390 }, { "epoch": 0.8425807292232187, "grad_norm": 0.17337167263031006, "learning_rate": 4.15742650944284e-06, "loss": 0.8711, "step": 116400 }, { "epoch": 0.842653115883805, "grad_norm": 0.14756740629673004, "learning_rate": 4.157354122782254e-06, "loss": 0.8615, "step": 116410 }, { "epoch": 0.8427255025443912, "grad_norm": 0.16462619602680206, "learning_rate": 4.157281736121668e-06, "loss": 0.8717, "step": 116420 }, { "epoch": 0.8427978892049773, "grad_norm": 0.1649799644947052, "learning_rate": 4.157209349461081e-06, "loss": 0.8778, "step": 116430 }, { "epoch": 0.8428702758655635, "grad_norm": 0.1735529750585556, "learning_rate": 4.157136962800496e-06, "loss": 0.8802, "step": 116440 }, { "epoch": 0.8429426625261497, "grad_norm": 0.19218334555625916, "learning_rate": 4.157064576139909e-06, "loss": 0.8801, "step": 116450 }, { "epoch": 0.8430150491867359, "grad_norm": 0.15204769372940063, "learning_rate": 4.156992189479323e-06, "loss": 0.8728, "step": 116460 }, { "epoch": 0.843087435847322, "grad_norm": 0.15448568761348724, "learning_rate": 4.1569198028187365e-06, "loss": 0.8687, "step": 116470 }, { "epoch": 0.8431598225079082, "grad_norm": 0.15800927579402924, "learning_rate": 4.156847416158151e-06, "loss": 0.8742, "step": 116480 }, { "epoch": 0.8432322091684944, "grad_norm": 0.1503394991159439, "learning_rate": 4.156775029497565e-06, "loss": 0.875, "step": 116490 }, { "epoch": 0.8433045958290806, "grad_norm": 0.5347150564193726, "learning_rate": 4.156702642836978e-06, "loss": 0.8686, "step": 116500 }, { "epoch": 0.8433769824896667, "grad_norm": 0.1692754626274109, "learning_rate": 4.156630256176392e-06, "loss": 0.8775, "step": 116510 }, { "epoch": 0.843449369150253, "grad_norm": 0.15633852779865265, "learning_rate": 4.156557869515806e-06, "loss": 0.8743, "step": 116520 }, { "epoch": 0.8435217558108392, "grad_norm": 0.15997891128063202, "learning_rate": 4.15648548285522e-06, "loss": 0.8746, "step": 116530 }, { "epoch": 0.8435941424714254, "grad_norm": 0.15815088152885437, "learning_rate": 4.1564130961946336e-06, "loss": 0.8701, "step": 116540 }, { "epoch": 0.8436665291320116, "grad_norm": 0.1581558734178543, "learning_rate": 4.156340709534047e-06, "loss": 0.8803, "step": 116550 }, { "epoch": 0.8437389157925977, "grad_norm": 0.15266630053520203, "learning_rate": 4.156268322873462e-06, "loss": 0.8638, "step": 116560 }, { "epoch": 0.8438113024531839, "grad_norm": 0.15142551064491272, "learning_rate": 4.156195936212875e-06, "loss": 0.8699, "step": 116570 }, { "epoch": 0.8438836891137701, "grad_norm": 0.1561920940876007, "learning_rate": 4.156123549552289e-06, "loss": 0.8756, "step": 116580 }, { "epoch": 0.8439560757743563, "grad_norm": 0.16398534178733826, "learning_rate": 4.1560511628917025e-06, "loss": 0.8767, "step": 116590 }, { "epoch": 0.8440284624349425, "grad_norm": 0.1780395209789276, "learning_rate": 4.155978776231117e-06, "loss": 0.8656, "step": 116600 }, { "epoch": 0.8441008490955286, "grad_norm": 0.16467039287090302, "learning_rate": 4.1559063895705306e-06, "loss": 0.8734, "step": 116610 }, { "epoch": 0.8441732357561149, "grad_norm": 0.16053692996501923, "learning_rate": 4.155834002909944e-06, "loss": 0.8777, "step": 116620 }, { "epoch": 0.8442456224167011, "grad_norm": 0.15318801999092102, "learning_rate": 4.155761616249358e-06, "loss": 0.8797, "step": 116630 }, { "epoch": 0.8443180090772873, "grad_norm": 0.1649479866027832, "learning_rate": 4.155689229588772e-06, "loss": 0.886, "step": 116640 }, { "epoch": 0.8443903957378734, "grad_norm": 0.15145239233970642, "learning_rate": 4.155616842928186e-06, "loss": 0.8599, "step": 116650 }, { "epoch": 0.8444627823984596, "grad_norm": 0.15609456598758698, "learning_rate": 4.1555444562675995e-06, "loss": 0.8787, "step": 116660 }, { "epoch": 0.8445351690590458, "grad_norm": 0.14667485654354095, "learning_rate": 4.155472069607013e-06, "loss": 0.8736, "step": 116670 }, { "epoch": 0.844607555719632, "grad_norm": 0.1538429707288742, "learning_rate": 4.1553996829464276e-06, "loss": 0.8859, "step": 116680 }, { "epoch": 0.8446799423802182, "grad_norm": 0.1470920741558075, "learning_rate": 4.155327296285841e-06, "loss": 0.8801, "step": 116690 }, { "epoch": 0.8447523290408043, "grad_norm": 0.2034630924463272, "learning_rate": 4.155254909625255e-06, "loss": 0.8741, "step": 116700 }, { "epoch": 0.8448247157013905, "grad_norm": 0.14927048981189728, "learning_rate": 4.155182522964668e-06, "loss": 0.876, "step": 116710 }, { "epoch": 0.8448971023619767, "grad_norm": 0.16222061216831207, "learning_rate": 4.155110136304082e-06, "loss": 0.8748, "step": 116720 }, { "epoch": 0.844969489022563, "grad_norm": 0.15882587432861328, "learning_rate": 4.1550377496434965e-06, "loss": 0.885, "step": 116730 }, { "epoch": 0.8450418756831491, "grad_norm": 0.1530759185552597, "learning_rate": 4.15496536298291e-06, "loss": 0.8758, "step": 116740 }, { "epoch": 0.8451142623437353, "grad_norm": 0.15886208415031433, "learning_rate": 4.154892976322324e-06, "loss": 0.8756, "step": 116750 }, { "epoch": 0.8451866490043215, "grad_norm": 0.1658525913953781, "learning_rate": 4.154820589661737e-06, "loss": 0.8642, "step": 116760 }, { "epoch": 0.8452590356649077, "grad_norm": 0.15931586921215057, "learning_rate": 4.154748203001152e-06, "loss": 0.8726, "step": 116770 }, { "epoch": 0.8453314223254939, "grad_norm": 0.16602514684200287, "learning_rate": 4.154675816340565e-06, "loss": 0.8694, "step": 116780 }, { "epoch": 0.84540380898608, "grad_norm": 0.14427199959754944, "learning_rate": 4.154603429679979e-06, "loss": 0.8666, "step": 116790 }, { "epoch": 0.8454761956466662, "grad_norm": 0.15520738065242767, "learning_rate": 4.154531043019393e-06, "loss": 0.8778, "step": 116800 }, { "epoch": 0.8455485823072524, "grad_norm": 0.15710857510566711, "learning_rate": 4.154458656358806e-06, "loss": 0.8681, "step": 116810 }, { "epoch": 0.8456209689678386, "grad_norm": 0.1453404724597931, "learning_rate": 4.15438626969822e-06, "loss": 0.8734, "step": 116820 }, { "epoch": 0.8456933556284248, "grad_norm": 0.16362370550632477, "learning_rate": 4.154313883037634e-06, "loss": 0.8842, "step": 116830 }, { "epoch": 0.845765742289011, "grad_norm": 0.14579138159751892, "learning_rate": 4.154241496377048e-06, "loss": 0.8635, "step": 116840 }, { "epoch": 0.8458381289495972, "grad_norm": 0.1520829200744629, "learning_rate": 4.154169109716462e-06, "loss": 0.8801, "step": 116850 }, { "epoch": 0.8459105156101834, "grad_norm": 0.1613858938217163, "learning_rate": 4.154096723055875e-06, "loss": 0.8858, "step": 116860 }, { "epoch": 0.8459829022707696, "grad_norm": 0.14903327822685242, "learning_rate": 4.154024336395289e-06, "loss": 0.8801, "step": 116870 }, { "epoch": 0.8460552889313557, "grad_norm": 0.2931915819644928, "learning_rate": 4.153951949734703e-06, "loss": 0.8663, "step": 116880 }, { "epoch": 0.8461276755919419, "grad_norm": 0.1518881767988205, "learning_rate": 4.153879563074117e-06, "loss": 0.8729, "step": 116890 }, { "epoch": 0.8462000622525281, "grad_norm": 0.15249276161193848, "learning_rate": 4.1538071764135305e-06, "loss": 0.8809, "step": 116900 }, { "epoch": 0.8462724489131143, "grad_norm": 0.1551447957754135, "learning_rate": 4.153734789752944e-06, "loss": 0.8857, "step": 116910 }, { "epoch": 0.8463448355737004, "grad_norm": 0.17283110320568085, "learning_rate": 4.153662403092359e-06, "loss": 0.8796, "step": 116920 }, { "epoch": 0.8464172222342866, "grad_norm": 0.1588117629289627, "learning_rate": 4.153590016431772e-06, "loss": 0.8738, "step": 116930 }, { "epoch": 0.8464896088948729, "grad_norm": 0.15109075605869293, "learning_rate": 4.153517629771186e-06, "loss": 0.8842, "step": 116940 }, { "epoch": 0.8465619955554591, "grad_norm": 0.15133486688137054, "learning_rate": 4.1534452431105994e-06, "loss": 0.8784, "step": 116950 }, { "epoch": 0.8466343822160453, "grad_norm": 0.1452382504940033, "learning_rate": 4.153372856450014e-06, "loss": 0.8816, "step": 116960 }, { "epoch": 0.8467067688766314, "grad_norm": 0.14603115618228912, "learning_rate": 4.1533004697894275e-06, "loss": 0.89, "step": 116970 }, { "epoch": 0.8467791555372176, "grad_norm": 0.15353167057037354, "learning_rate": 4.153228083128841e-06, "loss": 0.8786, "step": 116980 }, { "epoch": 0.8468515421978038, "grad_norm": 0.14847798645496368, "learning_rate": 4.153155696468255e-06, "loss": 0.8782, "step": 116990 }, { "epoch": 0.84692392885839, "grad_norm": 0.15351562201976776, "learning_rate": 4.153083309807669e-06, "loss": 0.8894, "step": 117000 }, { "epoch": 0.8469963155189761, "grad_norm": 0.14607466757297516, "learning_rate": 4.153010923147083e-06, "loss": 0.8665, "step": 117010 }, { "epoch": 0.8470687021795623, "grad_norm": 0.1430024653673172, "learning_rate": 4.1529385364864964e-06, "loss": 0.8724, "step": 117020 }, { "epoch": 0.8471410888401485, "grad_norm": 0.15325815975666046, "learning_rate": 4.15286614982591e-06, "loss": 0.863, "step": 117030 }, { "epoch": 0.8472134755007347, "grad_norm": 0.14200498163700104, "learning_rate": 4.1527937631653245e-06, "loss": 0.8753, "step": 117040 }, { "epoch": 0.847285862161321, "grad_norm": 0.17369510233402252, "learning_rate": 4.152721376504738e-06, "loss": 0.8768, "step": 117050 }, { "epoch": 0.8473582488219071, "grad_norm": 0.15766370296478271, "learning_rate": 4.152648989844152e-06, "loss": 0.877, "step": 117060 }, { "epoch": 0.8474306354824933, "grad_norm": 0.16461563110351562, "learning_rate": 4.152576603183565e-06, "loss": 0.8692, "step": 117070 }, { "epoch": 0.8475030221430795, "grad_norm": 0.1531338095664978, "learning_rate": 4.15250421652298e-06, "loss": 0.8897, "step": 117080 }, { "epoch": 0.8475754088036657, "grad_norm": 0.15825554728507996, "learning_rate": 4.1524318298623935e-06, "loss": 0.8883, "step": 117090 }, { "epoch": 0.8476477954642518, "grad_norm": 0.1514403074979782, "learning_rate": 4.152359443201807e-06, "loss": 0.886, "step": 117100 }, { "epoch": 0.847720182124838, "grad_norm": 0.1458583027124405, "learning_rate": 4.152287056541221e-06, "loss": 0.8762, "step": 117110 }, { "epoch": 0.8477925687854242, "grad_norm": 0.18490411341190338, "learning_rate": 4.152214669880635e-06, "loss": 0.8753, "step": 117120 }, { "epoch": 0.8478649554460104, "grad_norm": 0.16968803107738495, "learning_rate": 4.152142283220049e-06, "loss": 0.8885, "step": 117130 }, { "epoch": 0.8479373421065965, "grad_norm": 0.15850697457790375, "learning_rate": 4.152069896559462e-06, "loss": 0.8867, "step": 117140 }, { "epoch": 0.8480097287671828, "grad_norm": 0.15073266625404358, "learning_rate": 4.151997509898876e-06, "loss": 0.8838, "step": 117150 }, { "epoch": 0.848082115427769, "grad_norm": 0.1633184403181076, "learning_rate": 4.1519251232382905e-06, "loss": 0.8646, "step": 117160 }, { "epoch": 0.8481545020883552, "grad_norm": 0.1570475846529007, "learning_rate": 4.151852736577704e-06, "loss": 0.8869, "step": 117170 }, { "epoch": 0.8482268887489414, "grad_norm": 0.152681365609169, "learning_rate": 4.151780349917118e-06, "loss": 0.874, "step": 117180 }, { "epoch": 0.8482992754095275, "grad_norm": 0.15066421031951904, "learning_rate": 4.151707963256531e-06, "loss": 0.8739, "step": 117190 }, { "epoch": 0.8483716620701137, "grad_norm": 0.14346186816692352, "learning_rate": 4.151635576595946e-06, "loss": 0.8754, "step": 117200 }, { "epoch": 0.8484440487306999, "grad_norm": 0.14868153631687164, "learning_rate": 4.151563189935359e-06, "loss": 0.8744, "step": 117210 }, { "epoch": 0.8485164353912861, "grad_norm": 0.16770720481872559, "learning_rate": 4.151490803274773e-06, "loss": 0.8894, "step": 117220 }, { "epoch": 0.8485888220518722, "grad_norm": 0.15046921372413635, "learning_rate": 4.151418416614187e-06, "loss": 0.8617, "step": 117230 }, { "epoch": 0.8486612087124584, "grad_norm": 0.596076488494873, "learning_rate": 4.151346029953601e-06, "loss": 0.8723, "step": 117240 }, { "epoch": 0.8487335953730446, "grad_norm": 0.15184585750102997, "learning_rate": 4.151273643293015e-06, "loss": 0.8841, "step": 117250 }, { "epoch": 0.8488059820336309, "grad_norm": 0.14896786212921143, "learning_rate": 4.151201256632428e-06, "loss": 0.8806, "step": 117260 }, { "epoch": 0.8488783686942171, "grad_norm": 0.15315642952919006, "learning_rate": 4.151128869971842e-06, "loss": 0.8845, "step": 117270 }, { "epoch": 0.8489507553548032, "grad_norm": 0.15430568158626556, "learning_rate": 4.151056483311256e-06, "loss": 0.8775, "step": 117280 }, { "epoch": 0.8490231420153894, "grad_norm": 0.170336052775383, "learning_rate": 4.15098409665067e-06, "loss": 0.8547, "step": 117290 }, { "epoch": 0.8490955286759756, "grad_norm": 0.14343459904193878, "learning_rate": 4.150911709990084e-06, "loss": 0.873, "step": 117300 }, { "epoch": 0.8491679153365618, "grad_norm": 0.20575720071792603, "learning_rate": 4.150839323329497e-06, "loss": 0.873, "step": 117310 }, { "epoch": 0.849240301997148, "grad_norm": 0.15282517671585083, "learning_rate": 4.150766936668911e-06, "loss": 0.8895, "step": 117320 }, { "epoch": 0.8493126886577341, "grad_norm": 0.1475241631269455, "learning_rate": 4.1506945500083245e-06, "loss": 0.8727, "step": 117330 }, { "epoch": 0.8493850753183203, "grad_norm": 0.1757318675518036, "learning_rate": 4.150622163347738e-06, "loss": 0.8726, "step": 117340 }, { "epoch": 0.8494574619789065, "grad_norm": 0.1519278883934021, "learning_rate": 4.1505497766871526e-06, "loss": 0.8614, "step": 117350 }, { "epoch": 0.8495298486394928, "grad_norm": 0.14336329698562622, "learning_rate": 4.150477390026566e-06, "loss": 0.879, "step": 117360 }, { "epoch": 0.849602235300079, "grad_norm": 0.15254177153110504, "learning_rate": 4.15040500336598e-06, "loss": 0.8768, "step": 117370 }, { "epoch": 0.8496746219606651, "grad_norm": 0.15974296629428864, "learning_rate": 4.150332616705393e-06, "loss": 0.8659, "step": 117380 }, { "epoch": 0.8497470086212513, "grad_norm": 0.15729989111423492, "learning_rate": 4.150260230044808e-06, "loss": 0.8687, "step": 117390 }, { "epoch": 0.8498193952818375, "grad_norm": 0.15651953220367432, "learning_rate": 4.1501878433842215e-06, "loss": 0.8749, "step": 117400 }, { "epoch": 0.8498917819424237, "grad_norm": 0.14588718116283417, "learning_rate": 4.150115456723635e-06, "loss": 0.8758, "step": 117410 }, { "epoch": 0.8499641686030098, "grad_norm": 0.15261155366897583, "learning_rate": 4.150043070063049e-06, "loss": 0.8791, "step": 117420 }, { "epoch": 0.850036555263596, "grad_norm": 0.16166460514068604, "learning_rate": 4.149970683402463e-06, "loss": 0.8829, "step": 117430 }, { "epoch": 0.8501089419241822, "grad_norm": 0.1595904678106308, "learning_rate": 4.149898296741877e-06, "loss": 0.8713, "step": 117440 }, { "epoch": 0.8501813285847684, "grad_norm": 0.1586706042289734, "learning_rate": 4.14982591008129e-06, "loss": 0.8689, "step": 117450 }, { "epoch": 0.8502537152453545, "grad_norm": 0.17200522124767303, "learning_rate": 4.149753523420704e-06, "loss": 0.88, "step": 117460 }, { "epoch": 0.8503261019059408, "grad_norm": 0.17994257807731628, "learning_rate": 4.1496811367601185e-06, "loss": 0.8885, "step": 117470 }, { "epoch": 0.850398488566527, "grad_norm": 0.1691487431526184, "learning_rate": 4.149608750099532e-06, "loss": 0.8759, "step": 117480 }, { "epoch": 0.8504708752271132, "grad_norm": 0.1492878645658493, "learning_rate": 4.149536363438946e-06, "loss": 0.8862, "step": 117490 }, { "epoch": 0.8505432618876994, "grad_norm": 0.15258543193340302, "learning_rate": 4.149463976778359e-06, "loss": 0.8684, "step": 117500 }, { "epoch": 0.8506156485482855, "grad_norm": 0.14704549312591553, "learning_rate": 4.149391590117773e-06, "loss": 0.8682, "step": 117510 }, { "epoch": 0.8506880352088717, "grad_norm": 0.1570146381855011, "learning_rate": 4.1493192034571874e-06, "loss": 0.8638, "step": 117520 }, { "epoch": 0.8507604218694579, "grad_norm": 0.1731637567281723, "learning_rate": 4.149246816796601e-06, "loss": 0.8875, "step": 117530 }, { "epoch": 0.8508328085300441, "grad_norm": 0.15617217123508453, "learning_rate": 4.149174430136015e-06, "loss": 0.8981, "step": 117540 }, { "epoch": 0.8509051951906302, "grad_norm": 0.15860138833522797, "learning_rate": 4.149102043475428e-06, "loss": 0.8783, "step": 117550 }, { "epoch": 0.8509775818512164, "grad_norm": 0.15152351558208466, "learning_rate": 4.149029656814843e-06, "loss": 0.8784, "step": 117560 }, { "epoch": 0.8510499685118026, "grad_norm": 0.15892478823661804, "learning_rate": 4.148957270154256e-06, "loss": 0.8943, "step": 117570 }, { "epoch": 0.8511223551723889, "grad_norm": 0.13866350054740906, "learning_rate": 4.14888488349367e-06, "loss": 0.8671, "step": 117580 }, { "epoch": 0.851194741832975, "grad_norm": 0.22664818167686462, "learning_rate": 4.148812496833084e-06, "loss": 0.87, "step": 117590 }, { "epoch": 0.8512671284935612, "grad_norm": 0.1927105337381363, "learning_rate": 4.148740110172498e-06, "loss": 0.8713, "step": 117600 }, { "epoch": 0.8513395151541474, "grad_norm": 0.19467182457447052, "learning_rate": 4.148667723511912e-06, "loss": 0.879, "step": 117610 }, { "epoch": 0.8514119018147336, "grad_norm": 0.1516137421131134, "learning_rate": 4.148595336851325e-06, "loss": 0.8693, "step": 117620 }, { "epoch": 0.8514842884753198, "grad_norm": 0.15127213299274445, "learning_rate": 4.148522950190739e-06, "loss": 0.8648, "step": 117630 }, { "epoch": 0.8515566751359059, "grad_norm": 0.16966821253299713, "learning_rate": 4.148450563530153e-06, "loss": 0.8681, "step": 117640 }, { "epoch": 0.8516290617964921, "grad_norm": 0.17517682909965515, "learning_rate": 4.148378176869567e-06, "loss": 0.8782, "step": 117650 }, { "epoch": 0.8517014484570783, "grad_norm": 0.154166117310524, "learning_rate": 4.148305790208981e-06, "loss": 0.8838, "step": 117660 }, { "epoch": 0.8517738351176645, "grad_norm": 0.16335949301719666, "learning_rate": 4.148233403548394e-06, "loss": 0.88, "step": 117670 }, { "epoch": 0.8518462217782508, "grad_norm": 0.15556417405605316, "learning_rate": 4.148161016887809e-06, "loss": 0.8693, "step": 117680 }, { "epoch": 0.8519186084388369, "grad_norm": 0.14646877348423004, "learning_rate": 4.148088630227222e-06, "loss": 0.8817, "step": 117690 }, { "epoch": 0.8519909950994231, "grad_norm": 0.15498358011245728, "learning_rate": 4.148016243566636e-06, "loss": 0.875, "step": 117700 }, { "epoch": 0.8520633817600093, "grad_norm": 0.17575348913669586, "learning_rate": 4.1479438569060495e-06, "loss": 0.8792, "step": 117710 }, { "epoch": 0.8521357684205955, "grad_norm": 0.14134739339351654, "learning_rate": 4.147871470245464e-06, "loss": 0.8778, "step": 117720 }, { "epoch": 0.8522081550811816, "grad_norm": 0.18208590149879456, "learning_rate": 4.147799083584878e-06, "loss": 0.8799, "step": 117730 }, { "epoch": 0.8522805417417678, "grad_norm": 0.15532927215099335, "learning_rate": 4.147726696924291e-06, "loss": 0.8852, "step": 117740 }, { "epoch": 0.852352928402354, "grad_norm": 0.15121373534202576, "learning_rate": 4.147654310263705e-06, "loss": 0.8639, "step": 117750 }, { "epoch": 0.8524253150629402, "grad_norm": 0.14457499980926514, "learning_rate": 4.147581923603119e-06, "loss": 0.8829, "step": 117760 }, { "epoch": 0.8524977017235263, "grad_norm": 0.1544269621372223, "learning_rate": 4.147509536942533e-06, "loss": 0.8777, "step": 117770 }, { "epoch": 0.8525700883841125, "grad_norm": 0.15642063319683075, "learning_rate": 4.1474371502819465e-06, "loss": 0.879, "step": 117780 }, { "epoch": 0.8526424750446988, "grad_norm": 0.3743261694908142, "learning_rate": 4.14736476362136e-06, "loss": 0.8811, "step": 117790 }, { "epoch": 0.852714861705285, "grad_norm": 0.14951781928539276, "learning_rate": 4.147292376960775e-06, "loss": 0.8695, "step": 117800 }, { "epoch": 0.8527872483658712, "grad_norm": 0.153753861784935, "learning_rate": 4.147219990300188e-06, "loss": 0.8706, "step": 117810 }, { "epoch": 0.8528596350264573, "grad_norm": 0.15326263010501862, "learning_rate": 4.147147603639602e-06, "loss": 0.8746, "step": 117820 }, { "epoch": 0.8529320216870435, "grad_norm": 0.16046510636806488, "learning_rate": 4.1470752169790155e-06, "loss": 0.8765, "step": 117830 }, { "epoch": 0.8530044083476297, "grad_norm": 0.15566720068454742, "learning_rate": 4.147002830318429e-06, "loss": 0.8806, "step": 117840 }, { "epoch": 0.8530767950082159, "grad_norm": 0.16101893782615662, "learning_rate": 4.146930443657843e-06, "loss": 0.8745, "step": 117850 }, { "epoch": 0.853149181668802, "grad_norm": 0.15617725253105164, "learning_rate": 4.146858056997256e-06, "loss": 0.8773, "step": 117860 }, { "epoch": 0.8532215683293882, "grad_norm": 0.19228564202785492, "learning_rate": 4.146785670336671e-06, "loss": 0.8827, "step": 117870 }, { "epoch": 0.8532939549899744, "grad_norm": 0.17179349064826965, "learning_rate": 4.146713283676084e-06, "loss": 0.8657, "step": 117880 }, { "epoch": 0.8533663416505607, "grad_norm": 0.1750807911157608, "learning_rate": 4.146640897015498e-06, "loss": 0.8613, "step": 117890 }, { "epoch": 0.8534387283111469, "grad_norm": 0.17412197589874268, "learning_rate": 4.146568510354912e-06, "loss": 0.8641, "step": 117900 }, { "epoch": 0.853511114971733, "grad_norm": 0.154750794172287, "learning_rate": 4.146496123694326e-06, "loss": 0.8799, "step": 117910 }, { "epoch": 0.8535835016323192, "grad_norm": 0.17946046590805054, "learning_rate": 4.14642373703374e-06, "loss": 0.8661, "step": 117920 }, { "epoch": 0.8536558882929054, "grad_norm": 0.17114438116550446, "learning_rate": 4.146351350373153e-06, "loss": 0.8755, "step": 117930 }, { "epoch": 0.8537282749534916, "grad_norm": 0.15185768902301788, "learning_rate": 4.146278963712567e-06, "loss": 0.8746, "step": 117940 }, { "epoch": 0.8538006616140777, "grad_norm": 0.17097008228302002, "learning_rate": 4.146206577051981e-06, "loss": 0.8732, "step": 117950 }, { "epoch": 0.8538730482746639, "grad_norm": 0.154009148478508, "learning_rate": 4.146134190391395e-06, "loss": 0.8758, "step": 117960 }, { "epoch": 0.8539454349352501, "grad_norm": 0.15704505145549774, "learning_rate": 4.146061803730809e-06, "loss": 0.8888, "step": 117970 }, { "epoch": 0.8540178215958363, "grad_norm": 0.19270089268684387, "learning_rate": 4.145989417070222e-06, "loss": 0.8657, "step": 117980 }, { "epoch": 0.8540902082564225, "grad_norm": 0.16790702939033508, "learning_rate": 4.145917030409637e-06, "loss": 0.8659, "step": 117990 }, { "epoch": 0.8541625949170087, "grad_norm": 0.15922467410564423, "learning_rate": 4.14584464374905e-06, "loss": 0.8762, "step": 118000 }, { "epoch": 0.8542349815775949, "grad_norm": 0.14866970479488373, "learning_rate": 4.145772257088464e-06, "loss": 0.881, "step": 118010 }, { "epoch": 0.8543073682381811, "grad_norm": 0.15993216633796692, "learning_rate": 4.1456998704278776e-06, "loss": 0.8748, "step": 118020 }, { "epoch": 0.8543797548987673, "grad_norm": 0.1770089566707611, "learning_rate": 4.145627483767292e-06, "loss": 0.8689, "step": 118030 }, { "epoch": 0.8544521415593535, "grad_norm": 0.1457049697637558, "learning_rate": 4.145555097106706e-06, "loss": 0.8777, "step": 118040 }, { "epoch": 0.8545245282199396, "grad_norm": 0.1655452698469162, "learning_rate": 4.145482710446119e-06, "loss": 0.8742, "step": 118050 }, { "epoch": 0.8545969148805258, "grad_norm": 0.15786688029766083, "learning_rate": 4.145410323785533e-06, "loss": 0.8852, "step": 118060 }, { "epoch": 0.854669301541112, "grad_norm": 0.14149673283100128, "learning_rate": 4.145337937124947e-06, "loss": 0.8803, "step": 118070 }, { "epoch": 0.8547416882016982, "grad_norm": 0.19031822681427002, "learning_rate": 4.145265550464361e-06, "loss": 0.8798, "step": 118080 }, { "epoch": 0.8548140748622843, "grad_norm": 0.1616319864988327, "learning_rate": 4.1451931638037746e-06, "loss": 0.8837, "step": 118090 }, { "epoch": 0.8548864615228705, "grad_norm": 0.16045629978179932, "learning_rate": 4.145120777143188e-06, "loss": 0.8746, "step": 118100 }, { "epoch": 0.8549588481834568, "grad_norm": 0.14599646627902985, "learning_rate": 4.145048390482602e-06, "loss": 0.878, "step": 118110 }, { "epoch": 0.855031234844043, "grad_norm": 0.1440078318119049, "learning_rate": 4.144976003822016e-06, "loss": 0.8766, "step": 118120 }, { "epoch": 0.8551036215046292, "grad_norm": 0.15769658982753754, "learning_rate": 4.14490361716143e-06, "loss": 0.8753, "step": 118130 }, { "epoch": 0.8551760081652153, "grad_norm": 0.15050294995307922, "learning_rate": 4.1448312305008435e-06, "loss": 0.8737, "step": 118140 }, { "epoch": 0.8552483948258015, "grad_norm": 0.15845508873462677, "learning_rate": 4.144758843840257e-06, "loss": 0.8665, "step": 118150 }, { "epoch": 0.8553207814863877, "grad_norm": 0.16100691258907318, "learning_rate": 4.1446864571796716e-06, "loss": 0.8818, "step": 118160 }, { "epoch": 0.8553931681469739, "grad_norm": 0.1659129559993744, "learning_rate": 4.144614070519085e-06, "loss": 0.874, "step": 118170 }, { "epoch": 0.85546555480756, "grad_norm": 0.15678264200687408, "learning_rate": 4.144541683858499e-06, "loss": 0.8712, "step": 118180 }, { "epoch": 0.8555379414681462, "grad_norm": 0.16013620793819427, "learning_rate": 4.144469297197912e-06, "loss": 0.8695, "step": 118190 }, { "epoch": 0.8556103281287324, "grad_norm": 0.16265377402305603, "learning_rate": 4.144396910537327e-06, "loss": 0.8779, "step": 118200 }, { "epoch": 0.8556827147893187, "grad_norm": 0.15654130280017853, "learning_rate": 4.1443245238767405e-06, "loss": 0.8683, "step": 118210 }, { "epoch": 0.8557551014499049, "grad_norm": 0.17270372807979584, "learning_rate": 4.144252137216154e-06, "loss": 0.8814, "step": 118220 }, { "epoch": 0.855827488110491, "grad_norm": 0.15407659113407135, "learning_rate": 4.144179750555568e-06, "loss": 0.8727, "step": 118230 }, { "epoch": 0.8558998747710772, "grad_norm": 0.14619654417037964, "learning_rate": 4.144107363894982e-06, "loss": 0.8825, "step": 118240 }, { "epoch": 0.8559722614316634, "grad_norm": 0.15296125411987305, "learning_rate": 4.144034977234396e-06, "loss": 0.8842, "step": 118250 }, { "epoch": 0.8560446480922496, "grad_norm": 0.14285480976104736, "learning_rate": 4.143962590573809e-06, "loss": 0.8817, "step": 118260 }, { "epoch": 0.8561170347528357, "grad_norm": 0.15438437461853027, "learning_rate": 4.143890203913223e-06, "loss": 0.8721, "step": 118270 }, { "epoch": 0.8561894214134219, "grad_norm": 0.14615854620933533, "learning_rate": 4.1438178172526375e-06, "loss": 0.8703, "step": 118280 }, { "epoch": 0.8562618080740081, "grad_norm": 0.1698932945728302, "learning_rate": 4.143745430592051e-06, "loss": 0.8623, "step": 118290 }, { "epoch": 0.8563341947345943, "grad_norm": 0.14447180926799774, "learning_rate": 4.143673043931465e-06, "loss": 0.8622, "step": 118300 }, { "epoch": 0.8564065813951804, "grad_norm": 0.15869450569152832, "learning_rate": 4.143600657270878e-06, "loss": 0.8625, "step": 118310 }, { "epoch": 0.8564789680557667, "grad_norm": 0.16322019696235657, "learning_rate": 4.143528270610293e-06, "loss": 0.8714, "step": 118320 }, { "epoch": 0.8565513547163529, "grad_norm": 0.15476880967617035, "learning_rate": 4.1434558839497064e-06, "loss": 0.8786, "step": 118330 }, { "epoch": 0.8566237413769391, "grad_norm": 0.17096053063869476, "learning_rate": 4.14338349728912e-06, "loss": 0.8831, "step": 118340 }, { "epoch": 0.8566961280375253, "grad_norm": 0.15711814165115356, "learning_rate": 4.143311110628534e-06, "loss": 0.8487, "step": 118350 }, { "epoch": 0.8567685146981114, "grad_norm": 0.15190854668617249, "learning_rate": 4.143238723967948e-06, "loss": 0.872, "step": 118360 }, { "epoch": 0.8568409013586976, "grad_norm": 0.15767568349838257, "learning_rate": 4.143166337307361e-06, "loss": 0.8787, "step": 118370 }, { "epoch": 0.8569132880192838, "grad_norm": 0.13986888527870178, "learning_rate": 4.1430939506467745e-06, "loss": 0.8605, "step": 118380 }, { "epoch": 0.85698567467987, "grad_norm": 0.1628597527742386, "learning_rate": 4.143021563986189e-06, "loss": 0.8824, "step": 118390 }, { "epoch": 0.8570580613404561, "grad_norm": 0.14581960439682007, "learning_rate": 4.142949177325603e-06, "loss": 0.8637, "step": 118400 }, { "epoch": 0.8571304480010423, "grad_norm": 0.15314272046089172, "learning_rate": 4.142876790665016e-06, "loss": 0.871, "step": 118410 }, { "epoch": 0.8572028346616286, "grad_norm": 0.13960693776607513, "learning_rate": 4.14280440400443e-06, "loss": 0.8791, "step": 118420 }, { "epoch": 0.8572752213222148, "grad_norm": 0.31276777386665344, "learning_rate": 4.142732017343844e-06, "loss": 0.8804, "step": 118430 }, { "epoch": 0.857347607982801, "grad_norm": 0.15629543364048004, "learning_rate": 4.142659630683258e-06, "loss": 0.8891, "step": 118440 }, { "epoch": 0.8574199946433871, "grad_norm": 0.15342466533184052, "learning_rate": 4.1425872440226715e-06, "loss": 0.883, "step": 118450 }, { "epoch": 0.8574923813039733, "grad_norm": 0.14505906403064728, "learning_rate": 4.142514857362085e-06, "loss": 0.8773, "step": 118460 }, { "epoch": 0.8575647679645595, "grad_norm": 0.17075826227664948, "learning_rate": 4.1424424707015e-06, "loss": 0.8876, "step": 118470 }, { "epoch": 0.8576371546251457, "grad_norm": 0.14717842638492584, "learning_rate": 4.142370084040913e-06, "loss": 0.875, "step": 118480 }, { "epoch": 0.8577095412857318, "grad_norm": 0.1587383896112442, "learning_rate": 4.142297697380327e-06, "loss": 0.878, "step": 118490 }, { "epoch": 0.857781927946318, "grad_norm": 0.15151575207710266, "learning_rate": 4.1422253107197404e-06, "loss": 0.8881, "step": 118500 }, { "epoch": 0.8578543146069042, "grad_norm": 0.17024588584899902, "learning_rate": 4.142152924059155e-06, "loss": 0.8746, "step": 118510 }, { "epoch": 0.8579267012674904, "grad_norm": 0.16007643938064575, "learning_rate": 4.1420805373985685e-06, "loss": 0.8731, "step": 118520 }, { "epoch": 0.8579990879280767, "grad_norm": 0.1677238941192627, "learning_rate": 4.142008150737982e-06, "loss": 0.87, "step": 118530 }, { "epoch": 0.8580714745886628, "grad_norm": 0.1518600583076477, "learning_rate": 4.141935764077396e-06, "loss": 0.8786, "step": 118540 }, { "epoch": 0.858143861249249, "grad_norm": 0.15245488286018372, "learning_rate": 4.14186337741681e-06, "loss": 0.8882, "step": 118550 }, { "epoch": 0.8582162479098352, "grad_norm": 0.16277900338172913, "learning_rate": 4.141790990756224e-06, "loss": 0.8778, "step": 118560 }, { "epoch": 0.8582886345704214, "grad_norm": 0.1517462432384491, "learning_rate": 4.1417186040956375e-06, "loss": 0.8821, "step": 118570 }, { "epoch": 0.8583610212310075, "grad_norm": 0.1491396576166153, "learning_rate": 4.141646217435051e-06, "loss": 0.8738, "step": 118580 }, { "epoch": 0.8584334078915937, "grad_norm": 0.19470219314098358, "learning_rate": 4.1415738307744655e-06, "loss": 0.8708, "step": 118590 }, { "epoch": 0.8585057945521799, "grad_norm": 0.30775144696235657, "learning_rate": 4.141501444113879e-06, "loss": 0.8803, "step": 118600 }, { "epoch": 0.8585781812127661, "grad_norm": 0.154507115483284, "learning_rate": 4.141429057453293e-06, "loss": 0.8731, "step": 118610 }, { "epoch": 0.8586505678733523, "grad_norm": 0.17000029981136322, "learning_rate": 4.141356670792706e-06, "loss": 0.8884, "step": 118620 }, { "epoch": 0.8587229545339384, "grad_norm": 0.15258820354938507, "learning_rate": 4.141284284132121e-06, "loss": 0.8584, "step": 118630 }, { "epoch": 0.8587953411945247, "grad_norm": 0.15323597192764282, "learning_rate": 4.1412118974715345e-06, "loss": 0.8639, "step": 118640 }, { "epoch": 0.8588677278551109, "grad_norm": 0.1578139066696167, "learning_rate": 4.141139510810948e-06, "loss": 0.8929, "step": 118650 }, { "epoch": 0.8589401145156971, "grad_norm": 0.16174130141735077, "learning_rate": 4.141067124150362e-06, "loss": 0.887, "step": 118660 }, { "epoch": 0.8590125011762832, "grad_norm": 0.15842093527317047, "learning_rate": 4.140994737489776e-06, "loss": 0.8724, "step": 118670 }, { "epoch": 0.8590848878368694, "grad_norm": 0.1600816696882248, "learning_rate": 4.14092235082919e-06, "loss": 0.8744, "step": 118680 }, { "epoch": 0.8591572744974556, "grad_norm": 0.17777082324028015, "learning_rate": 4.140849964168603e-06, "loss": 0.8832, "step": 118690 }, { "epoch": 0.8592296611580418, "grad_norm": 0.15657740831375122, "learning_rate": 4.140777577508017e-06, "loss": 0.8791, "step": 118700 }, { "epoch": 0.859302047818628, "grad_norm": 0.16433501243591309, "learning_rate": 4.1407051908474315e-06, "loss": 0.879, "step": 118710 }, { "epoch": 0.8593744344792141, "grad_norm": 0.148828387260437, "learning_rate": 4.140632804186845e-06, "loss": 0.8825, "step": 118720 }, { "epoch": 0.8594468211398003, "grad_norm": 0.1595117151737213, "learning_rate": 4.140560417526259e-06, "loss": 0.8729, "step": 118730 }, { "epoch": 0.8595192078003866, "grad_norm": 0.15022175014019012, "learning_rate": 4.140488030865672e-06, "loss": 0.8778, "step": 118740 }, { "epoch": 0.8595915944609728, "grad_norm": 0.16430038213729858, "learning_rate": 4.140415644205086e-06, "loss": 0.8535, "step": 118750 }, { "epoch": 0.859663981121559, "grad_norm": 0.1567695438861847, "learning_rate": 4.1403432575445e-06, "loss": 0.8784, "step": 118760 }, { "epoch": 0.8597363677821451, "grad_norm": 0.15546707808971405, "learning_rate": 4.140270870883914e-06, "loss": 0.874, "step": 118770 }, { "epoch": 0.8598087544427313, "grad_norm": 0.16317854821681976, "learning_rate": 4.140198484223328e-06, "loss": 0.8694, "step": 118780 }, { "epoch": 0.8598811411033175, "grad_norm": 0.15389098227024078, "learning_rate": 4.140126097562741e-06, "loss": 0.8778, "step": 118790 }, { "epoch": 0.8599535277639037, "grad_norm": 0.17565035820007324, "learning_rate": 4.140053710902156e-06, "loss": 0.8855, "step": 118800 }, { "epoch": 0.8600259144244898, "grad_norm": 0.16407647728919983, "learning_rate": 4.139981324241569e-06, "loss": 0.8706, "step": 118810 }, { "epoch": 0.860098301085076, "grad_norm": 0.15503662824630737, "learning_rate": 4.139908937580983e-06, "loss": 0.8748, "step": 118820 }, { "epoch": 0.8601706877456622, "grad_norm": 0.15762311220169067, "learning_rate": 4.1398365509203966e-06, "loss": 0.8765, "step": 118830 }, { "epoch": 0.8602430744062484, "grad_norm": 0.16244664788246155, "learning_rate": 4.139764164259811e-06, "loss": 0.8762, "step": 118840 }, { "epoch": 0.8603154610668347, "grad_norm": 0.15617172420024872, "learning_rate": 4.139691777599225e-06, "loss": 0.8736, "step": 118850 }, { "epoch": 0.8603878477274208, "grad_norm": 0.1622217446565628, "learning_rate": 4.139619390938638e-06, "loss": 0.8735, "step": 118860 }, { "epoch": 0.860460234388007, "grad_norm": 0.15860269963741302, "learning_rate": 4.139547004278052e-06, "loss": 0.8614, "step": 118870 }, { "epoch": 0.8605326210485932, "grad_norm": 0.1792127788066864, "learning_rate": 4.139474617617466e-06, "loss": 0.8649, "step": 118880 }, { "epoch": 0.8606050077091794, "grad_norm": 0.14843524992465973, "learning_rate": 4.13940223095688e-06, "loss": 0.8749, "step": 118890 }, { "epoch": 0.8606773943697655, "grad_norm": 0.15046702325344086, "learning_rate": 4.1393298442962936e-06, "loss": 0.8697, "step": 118900 }, { "epoch": 0.8607497810303517, "grad_norm": 0.15024615824222565, "learning_rate": 4.139257457635707e-06, "loss": 0.8616, "step": 118910 }, { "epoch": 0.8608221676909379, "grad_norm": 0.16122505068778992, "learning_rate": 4.139185070975121e-06, "loss": 0.8701, "step": 118920 }, { "epoch": 0.8608945543515241, "grad_norm": 0.16019709408283234, "learning_rate": 4.139112684314534e-06, "loss": 0.8733, "step": 118930 }, { "epoch": 0.8609669410121102, "grad_norm": 0.21425500512123108, "learning_rate": 4.139040297653948e-06, "loss": 0.8828, "step": 118940 }, { "epoch": 0.8610393276726965, "grad_norm": 0.15902172029018402, "learning_rate": 4.1389679109933625e-06, "loss": 0.8829, "step": 118950 }, { "epoch": 0.8611117143332827, "grad_norm": 0.15424564480781555, "learning_rate": 4.138895524332776e-06, "loss": 0.8682, "step": 118960 }, { "epoch": 0.8611841009938689, "grad_norm": 0.14803998172283173, "learning_rate": 4.13882313767219e-06, "loss": 0.8536, "step": 118970 }, { "epoch": 0.8612564876544551, "grad_norm": 0.17855031788349152, "learning_rate": 4.138750751011603e-06, "loss": 0.8756, "step": 118980 }, { "epoch": 0.8613288743150412, "grad_norm": 0.19807036221027374, "learning_rate": 4.138678364351018e-06, "loss": 0.8816, "step": 118990 }, { "epoch": 0.8614012609756274, "grad_norm": 0.15931545197963715, "learning_rate": 4.138605977690431e-06, "loss": 0.8715, "step": 119000 }, { "epoch": 0.8614736476362136, "grad_norm": 0.14279036223888397, "learning_rate": 4.138533591029845e-06, "loss": 0.8736, "step": 119010 }, { "epoch": 0.8615460342967998, "grad_norm": 0.14687687158584595, "learning_rate": 4.138461204369259e-06, "loss": 0.8761, "step": 119020 }, { "epoch": 0.8616184209573859, "grad_norm": 0.15899838507175446, "learning_rate": 4.138388817708673e-06, "loss": 0.8743, "step": 119030 }, { "epoch": 0.8616908076179721, "grad_norm": 0.18828865885734558, "learning_rate": 4.138316431048087e-06, "loss": 0.8716, "step": 119040 }, { "epoch": 0.8617631942785583, "grad_norm": 0.17458096146583557, "learning_rate": 4.1382440443875e-06, "loss": 0.8581, "step": 119050 }, { "epoch": 0.8618355809391446, "grad_norm": 0.1492881029844284, "learning_rate": 4.138171657726914e-06, "loss": 0.8767, "step": 119060 }, { "epoch": 0.8619079675997308, "grad_norm": 0.1459776908159256, "learning_rate": 4.1380992710663284e-06, "loss": 0.8777, "step": 119070 }, { "epoch": 0.8619803542603169, "grad_norm": 0.15040789544582367, "learning_rate": 4.138026884405742e-06, "loss": 0.8845, "step": 119080 }, { "epoch": 0.8620527409209031, "grad_norm": 0.14359919726848602, "learning_rate": 4.137954497745156e-06, "loss": 0.8878, "step": 119090 }, { "epoch": 0.8621251275814893, "grad_norm": 0.1604946255683899, "learning_rate": 4.137882111084569e-06, "loss": 0.875, "step": 119100 }, { "epoch": 0.8621975142420755, "grad_norm": 0.15070165693759918, "learning_rate": 4.137809724423984e-06, "loss": 0.8829, "step": 119110 }, { "epoch": 0.8622699009026616, "grad_norm": 0.15339529514312744, "learning_rate": 4.137737337763397e-06, "loss": 0.881, "step": 119120 }, { "epoch": 0.8623422875632478, "grad_norm": 0.15151731669902802, "learning_rate": 4.137664951102811e-06, "loss": 0.8773, "step": 119130 }, { "epoch": 0.862414674223834, "grad_norm": 0.17294517159461975, "learning_rate": 4.137592564442225e-06, "loss": 0.8631, "step": 119140 }, { "epoch": 0.8624870608844202, "grad_norm": 0.1720060110092163, "learning_rate": 4.137520177781639e-06, "loss": 0.8931, "step": 119150 }, { "epoch": 0.8625594475450064, "grad_norm": 0.15391665697097778, "learning_rate": 4.137447791121053e-06, "loss": 0.8624, "step": 119160 }, { "epoch": 0.8626318342055926, "grad_norm": 0.14894205331802368, "learning_rate": 4.137375404460466e-06, "loss": 0.8758, "step": 119170 }, { "epoch": 0.8627042208661788, "grad_norm": 0.16286589205265045, "learning_rate": 4.13730301779988e-06, "loss": 0.8836, "step": 119180 }, { "epoch": 0.862776607526765, "grad_norm": 0.1608862280845642, "learning_rate": 4.137230631139294e-06, "loss": 0.8801, "step": 119190 }, { "epoch": 0.8628489941873512, "grad_norm": 0.1520557999610901, "learning_rate": 4.137158244478708e-06, "loss": 0.8725, "step": 119200 }, { "epoch": 0.8629213808479373, "grad_norm": 0.17940600216388702, "learning_rate": 4.137085857818122e-06, "loss": 0.8713, "step": 119210 }, { "epoch": 0.8629937675085235, "grad_norm": 0.14350375533103943, "learning_rate": 4.137013471157535e-06, "loss": 0.8842, "step": 119220 }, { "epoch": 0.8630661541691097, "grad_norm": 0.15040133893489838, "learning_rate": 4.13694108449695e-06, "loss": 0.8723, "step": 119230 }, { "epoch": 0.8631385408296959, "grad_norm": 0.16370560228824615, "learning_rate": 4.136868697836363e-06, "loss": 0.8713, "step": 119240 }, { "epoch": 0.863210927490282, "grad_norm": 0.16433537006378174, "learning_rate": 4.136796311175777e-06, "loss": 0.8691, "step": 119250 }, { "epoch": 0.8632833141508682, "grad_norm": 0.15914808213710785, "learning_rate": 4.1367239245151905e-06, "loss": 0.8863, "step": 119260 }, { "epoch": 0.8633557008114545, "grad_norm": 0.16615451872348785, "learning_rate": 4.136651537854605e-06, "loss": 0.8857, "step": 119270 }, { "epoch": 0.8634280874720407, "grad_norm": 0.157662034034729, "learning_rate": 4.136579151194019e-06, "loss": 0.887, "step": 119280 }, { "epoch": 0.8635004741326269, "grad_norm": 0.5979766249656677, "learning_rate": 4.136506764533432e-06, "loss": 0.8737, "step": 119290 }, { "epoch": 0.863572860793213, "grad_norm": 0.15033407509326935, "learning_rate": 4.136434377872846e-06, "loss": 0.8869, "step": 119300 }, { "epoch": 0.8636452474537992, "grad_norm": 0.1554785966873169, "learning_rate": 4.13636199121226e-06, "loss": 0.8854, "step": 119310 }, { "epoch": 0.8637176341143854, "grad_norm": 0.17543356120586395, "learning_rate": 4.136289604551674e-06, "loss": 0.8879, "step": 119320 }, { "epoch": 0.8637900207749716, "grad_norm": 0.14748305082321167, "learning_rate": 4.1362172178910875e-06, "loss": 0.8729, "step": 119330 }, { "epoch": 0.8638624074355578, "grad_norm": 0.15673819184303284, "learning_rate": 4.136144831230501e-06, "loss": 0.8585, "step": 119340 }, { "epoch": 0.8639347940961439, "grad_norm": 0.1523292362689972, "learning_rate": 4.136072444569916e-06, "loss": 0.8832, "step": 119350 }, { "epoch": 0.8640071807567301, "grad_norm": 0.2544263005256653, "learning_rate": 4.136000057909329e-06, "loss": 0.8712, "step": 119360 }, { "epoch": 0.8640795674173163, "grad_norm": 0.15314503014087677, "learning_rate": 4.135927671248743e-06, "loss": 0.8739, "step": 119370 }, { "epoch": 0.8641519540779026, "grad_norm": 0.15046413242816925, "learning_rate": 4.1358552845881565e-06, "loss": 0.8834, "step": 119380 }, { "epoch": 0.8642243407384887, "grad_norm": 0.4037892520427704, "learning_rate": 4.13578289792757e-06, "loss": 0.8753, "step": 119390 }, { "epoch": 0.8642967273990749, "grad_norm": 0.15684808790683746, "learning_rate": 4.1357105112669845e-06, "loss": 0.8746, "step": 119400 }, { "epoch": 0.8643691140596611, "grad_norm": 0.15342099964618683, "learning_rate": 4.135638124606398e-06, "loss": 0.8746, "step": 119410 }, { "epoch": 0.8644415007202473, "grad_norm": 0.15408830344676971, "learning_rate": 4.135565737945812e-06, "loss": 0.8748, "step": 119420 }, { "epoch": 0.8645138873808335, "grad_norm": 0.16404138505458832, "learning_rate": 4.135493351285225e-06, "loss": 0.8657, "step": 119430 }, { "epoch": 0.8645862740414196, "grad_norm": 0.14688260853290558, "learning_rate": 4.135420964624639e-06, "loss": 0.8836, "step": 119440 }, { "epoch": 0.8646586607020058, "grad_norm": 0.16032230854034424, "learning_rate": 4.135348577964053e-06, "loss": 0.8735, "step": 119450 }, { "epoch": 0.864731047362592, "grad_norm": 0.14813324809074402, "learning_rate": 4.135276191303467e-06, "loss": 0.8774, "step": 119460 }, { "epoch": 0.8648034340231782, "grad_norm": 0.1528783142566681, "learning_rate": 4.135203804642881e-06, "loss": 0.8737, "step": 119470 }, { "epoch": 0.8648758206837645, "grad_norm": 0.15664593875408173, "learning_rate": 4.135131417982294e-06, "loss": 0.88, "step": 119480 }, { "epoch": 0.8649482073443506, "grad_norm": 0.1477356255054474, "learning_rate": 4.135059031321708e-06, "loss": 0.8672, "step": 119490 }, { "epoch": 0.8650205940049368, "grad_norm": 0.1740371733903885, "learning_rate": 4.134986644661122e-06, "loss": 0.8781, "step": 119500 }, { "epoch": 0.865092980665523, "grad_norm": 0.15743084251880646, "learning_rate": 4.134914258000536e-06, "loss": 0.8829, "step": 119510 }, { "epoch": 0.8651653673261092, "grad_norm": 0.14371630549430847, "learning_rate": 4.13484187133995e-06, "loss": 0.8789, "step": 119520 }, { "epoch": 0.8652377539866953, "grad_norm": 0.1605149209499359, "learning_rate": 4.134769484679363e-06, "loss": 0.8863, "step": 119530 }, { "epoch": 0.8653101406472815, "grad_norm": 0.16169756650924683, "learning_rate": 4.134697098018777e-06, "loss": 0.8839, "step": 119540 }, { "epoch": 0.8653825273078677, "grad_norm": 0.15459372103214264, "learning_rate": 4.134624711358191e-06, "loss": 0.8706, "step": 119550 }, { "epoch": 0.8654549139684539, "grad_norm": 0.2985534369945526, "learning_rate": 4.134552324697605e-06, "loss": 0.8732, "step": 119560 }, { "epoch": 0.86552730062904, "grad_norm": 0.1482762098312378, "learning_rate": 4.1344799380370186e-06, "loss": 0.8806, "step": 119570 }, { "epoch": 0.8655996872896262, "grad_norm": 0.15104518830776215, "learning_rate": 4.134407551376432e-06, "loss": 0.884, "step": 119580 }, { "epoch": 0.8656720739502125, "grad_norm": 0.15297864377498627, "learning_rate": 4.134335164715847e-06, "loss": 0.8779, "step": 119590 }, { "epoch": 0.8657444606107987, "grad_norm": 0.1583460420370102, "learning_rate": 4.13426277805526e-06, "loss": 0.8681, "step": 119600 }, { "epoch": 0.8658168472713849, "grad_norm": 0.15396851301193237, "learning_rate": 4.134190391394674e-06, "loss": 0.8802, "step": 119610 }, { "epoch": 0.865889233931971, "grad_norm": 0.16072429716587067, "learning_rate": 4.1341180047340875e-06, "loss": 0.8759, "step": 119620 }, { "epoch": 0.8659616205925572, "grad_norm": 0.14890296757221222, "learning_rate": 4.134045618073502e-06, "loss": 0.8725, "step": 119630 }, { "epoch": 0.8660340072531434, "grad_norm": 0.1567048579454422, "learning_rate": 4.1339732314129156e-06, "loss": 0.8839, "step": 119640 }, { "epoch": 0.8661063939137296, "grad_norm": 0.15268154442310333, "learning_rate": 4.133900844752329e-06, "loss": 0.8609, "step": 119650 }, { "epoch": 0.8661787805743157, "grad_norm": 0.15023602545261383, "learning_rate": 4.133828458091743e-06, "loss": 0.8758, "step": 119660 }, { "epoch": 0.8662511672349019, "grad_norm": 0.15714387595653534, "learning_rate": 4.133756071431157e-06, "loss": 0.873, "step": 119670 }, { "epoch": 0.8663235538954881, "grad_norm": 0.15032929182052612, "learning_rate": 4.133683684770571e-06, "loss": 0.8631, "step": 119680 }, { "epoch": 0.8663959405560743, "grad_norm": 0.14457829296588898, "learning_rate": 4.1336112981099845e-06, "loss": 0.8716, "step": 119690 }, { "epoch": 0.8664683272166606, "grad_norm": 0.17244747281074524, "learning_rate": 4.133538911449398e-06, "loss": 0.8842, "step": 119700 }, { "epoch": 0.8665407138772467, "grad_norm": 0.15346111357212067, "learning_rate": 4.1334665247888126e-06, "loss": 0.884, "step": 119710 }, { "epoch": 0.8666131005378329, "grad_norm": 0.1665041148662567, "learning_rate": 4.133394138128226e-06, "loss": 0.8807, "step": 119720 }, { "epoch": 0.8666854871984191, "grad_norm": 0.1579190045595169, "learning_rate": 4.13332175146764e-06, "loss": 0.882, "step": 119730 }, { "epoch": 0.8667578738590053, "grad_norm": 1.4202749729156494, "learning_rate": 4.133249364807053e-06, "loss": 0.8748, "step": 119740 }, { "epoch": 0.8668302605195914, "grad_norm": 0.1646961271762848, "learning_rate": 4.133176978146468e-06, "loss": 0.8626, "step": 119750 }, { "epoch": 0.8669026471801776, "grad_norm": 0.1607542634010315, "learning_rate": 4.1331045914858815e-06, "loss": 0.8768, "step": 119760 }, { "epoch": 0.8669750338407638, "grad_norm": 0.1496744155883789, "learning_rate": 4.133032204825295e-06, "loss": 0.8702, "step": 119770 }, { "epoch": 0.86704742050135, "grad_norm": 0.14871948957443237, "learning_rate": 4.132959818164709e-06, "loss": 0.8846, "step": 119780 }, { "epoch": 0.8671198071619362, "grad_norm": 0.14933007955551147, "learning_rate": 4.132887431504123e-06, "loss": 0.8641, "step": 119790 }, { "epoch": 0.8671921938225224, "grad_norm": 0.16172604262828827, "learning_rate": 4.132815044843537e-06, "loss": 0.8764, "step": 119800 }, { "epoch": 0.8672645804831086, "grad_norm": 0.1538577377796173, "learning_rate": 4.1327426581829504e-06, "loss": 0.8734, "step": 119810 }, { "epoch": 0.8673369671436948, "grad_norm": 0.14605380594730377, "learning_rate": 4.132670271522364e-06, "loss": 0.8692, "step": 119820 }, { "epoch": 0.867409353804281, "grad_norm": 0.16161808371543884, "learning_rate": 4.1325978848617785e-06, "loss": 0.8729, "step": 119830 }, { "epoch": 0.8674817404648671, "grad_norm": 0.2714543044567108, "learning_rate": 4.132525498201192e-06, "loss": 0.8787, "step": 119840 }, { "epoch": 0.8675541271254533, "grad_norm": 0.1541425883769989, "learning_rate": 4.132453111540606e-06, "loss": 0.853, "step": 119850 }, { "epoch": 0.8676265137860395, "grad_norm": 0.15290531516075134, "learning_rate": 4.132380724880019e-06, "loss": 0.8684, "step": 119860 }, { "epoch": 0.8676989004466257, "grad_norm": 0.1534222811460495, "learning_rate": 4.132308338219434e-06, "loss": 0.8673, "step": 119870 }, { "epoch": 0.8677712871072119, "grad_norm": 0.16768594086170197, "learning_rate": 4.1322359515588474e-06, "loss": 0.8653, "step": 119880 }, { "epoch": 0.867843673767798, "grad_norm": 0.1455199420452118, "learning_rate": 4.132163564898261e-06, "loss": 0.8762, "step": 119890 }, { "epoch": 0.8679160604283842, "grad_norm": 0.15997451543807983, "learning_rate": 4.132091178237675e-06, "loss": 0.884, "step": 119900 }, { "epoch": 0.8679884470889705, "grad_norm": 0.15855015814304352, "learning_rate": 4.132018791577089e-06, "loss": 0.8886, "step": 119910 }, { "epoch": 0.8680608337495567, "grad_norm": 0.1565515547990799, "learning_rate": 4.131946404916503e-06, "loss": 0.8721, "step": 119920 }, { "epoch": 0.8681332204101428, "grad_norm": 0.1573457270860672, "learning_rate": 4.131874018255916e-06, "loss": 0.8682, "step": 119930 }, { "epoch": 0.868205607070729, "grad_norm": 0.1630558967590332, "learning_rate": 4.13180163159533e-06, "loss": 0.8654, "step": 119940 }, { "epoch": 0.8682779937313152, "grad_norm": 0.1974228322505951, "learning_rate": 4.1317292449347444e-06, "loss": 0.8581, "step": 119950 }, { "epoch": 0.8683503803919014, "grad_norm": 0.14881473779678345, "learning_rate": 4.131656858274157e-06, "loss": 0.8827, "step": 119960 }, { "epoch": 0.8684227670524876, "grad_norm": 0.16378778219223022, "learning_rate": 4.131584471613571e-06, "loss": 0.8598, "step": 119970 }, { "epoch": 0.8684951537130737, "grad_norm": 0.1859802007675171, "learning_rate": 4.131512084952985e-06, "loss": 0.8768, "step": 119980 }, { "epoch": 0.8685675403736599, "grad_norm": 0.16006912291049957, "learning_rate": 4.131439698292399e-06, "loss": 0.8718, "step": 119990 }, { "epoch": 0.8686399270342461, "grad_norm": 0.25071021914482117, "learning_rate": 4.1313673116318125e-06, "loss": 0.8708, "step": 120000 }, { "epoch": 0.8687123136948323, "grad_norm": 0.15014329552650452, "learning_rate": 4.131294924971226e-06, "loss": 0.8719, "step": 120010 }, { "epoch": 0.8687847003554185, "grad_norm": 0.1454050987958908, "learning_rate": 4.131222538310641e-06, "loss": 0.87, "step": 120020 }, { "epoch": 0.8688570870160047, "grad_norm": 0.1666693240404129, "learning_rate": 4.131150151650054e-06, "loss": 0.8761, "step": 120030 }, { "epoch": 0.8689294736765909, "grad_norm": 0.16259945929050446, "learning_rate": 4.131077764989468e-06, "loss": 0.8679, "step": 120040 }, { "epoch": 0.8690018603371771, "grad_norm": 0.16060245037078857, "learning_rate": 4.1310053783288815e-06, "loss": 0.872, "step": 120050 }, { "epoch": 0.8690742469977633, "grad_norm": 0.3217458724975586, "learning_rate": 4.130932991668296e-06, "loss": 0.8638, "step": 120060 }, { "epoch": 0.8691466336583494, "grad_norm": 0.14817246794700623, "learning_rate": 4.1308606050077095e-06, "loss": 0.8703, "step": 120070 }, { "epoch": 0.8692190203189356, "grad_norm": 0.1485157608985901, "learning_rate": 4.130788218347123e-06, "loss": 0.8652, "step": 120080 }, { "epoch": 0.8692914069795218, "grad_norm": 0.16461946070194244, "learning_rate": 4.130715831686537e-06, "loss": 0.858, "step": 120090 }, { "epoch": 0.869363793640108, "grad_norm": 0.15408070385456085, "learning_rate": 4.130643445025951e-06, "loss": 0.8723, "step": 120100 }, { "epoch": 0.8694361803006941, "grad_norm": 0.15531690418720245, "learning_rate": 4.130571058365365e-06, "loss": 0.8761, "step": 120110 }, { "epoch": 0.8695085669612804, "grad_norm": 0.15016768872737885, "learning_rate": 4.1304986717047785e-06, "loss": 0.8732, "step": 120120 }, { "epoch": 0.8695809536218666, "grad_norm": 0.1652849316596985, "learning_rate": 4.130426285044192e-06, "loss": 0.8677, "step": 120130 }, { "epoch": 0.8696533402824528, "grad_norm": 0.15715721249580383, "learning_rate": 4.1303538983836065e-06, "loss": 0.8773, "step": 120140 }, { "epoch": 0.869725726943039, "grad_norm": 0.2709084749221802, "learning_rate": 4.13028151172302e-06, "loss": 0.8724, "step": 120150 }, { "epoch": 0.8697981136036251, "grad_norm": 0.147217258810997, "learning_rate": 4.130209125062434e-06, "loss": 0.8757, "step": 120160 }, { "epoch": 0.8698705002642113, "grad_norm": 0.15031427145004272, "learning_rate": 4.130136738401847e-06, "loss": 0.8757, "step": 120170 }, { "epoch": 0.8699428869247975, "grad_norm": 0.1699194461107254, "learning_rate": 4.130064351741261e-06, "loss": 0.8655, "step": 120180 }, { "epoch": 0.8700152735853837, "grad_norm": 0.17292308807373047, "learning_rate": 4.1299919650806755e-06, "loss": 0.8879, "step": 120190 }, { "epoch": 0.8700876602459698, "grad_norm": 0.15235485136508942, "learning_rate": 4.129919578420089e-06, "loss": 0.8707, "step": 120200 }, { "epoch": 0.870160046906556, "grad_norm": 0.15500420331954956, "learning_rate": 4.129847191759503e-06, "loss": 0.8743, "step": 120210 }, { "epoch": 0.8702324335671422, "grad_norm": 0.1642414629459381, "learning_rate": 4.129774805098916e-06, "loss": 0.8661, "step": 120220 }, { "epoch": 0.8703048202277285, "grad_norm": 0.16339725255966187, "learning_rate": 4.129702418438331e-06, "loss": 0.8755, "step": 120230 }, { "epoch": 0.8703772068883147, "grad_norm": 0.1617591381072998, "learning_rate": 4.129630031777744e-06, "loss": 0.8659, "step": 120240 }, { "epoch": 0.8704495935489008, "grad_norm": 0.14315065741539001, "learning_rate": 4.129557645117158e-06, "loss": 0.8774, "step": 120250 }, { "epoch": 0.870521980209487, "grad_norm": 0.16367150843143463, "learning_rate": 4.129485258456572e-06, "loss": 0.8774, "step": 120260 }, { "epoch": 0.8705943668700732, "grad_norm": 0.1460050344467163, "learning_rate": 4.129412871795986e-06, "loss": 0.8779, "step": 120270 }, { "epoch": 0.8706667535306594, "grad_norm": 0.17741134762763977, "learning_rate": 4.1293404851354e-06, "loss": 0.8806, "step": 120280 }, { "epoch": 0.8707391401912455, "grad_norm": 0.1527169644832611, "learning_rate": 4.129268098474813e-06, "loss": 0.8783, "step": 120290 }, { "epoch": 0.8708115268518317, "grad_norm": 0.1656719595193863, "learning_rate": 4.129195711814227e-06, "loss": 0.874, "step": 120300 }, { "epoch": 0.8708839135124179, "grad_norm": 0.15612590312957764, "learning_rate": 4.129123325153641e-06, "loss": 0.8766, "step": 120310 }, { "epoch": 0.8709563001730041, "grad_norm": 0.182773619890213, "learning_rate": 4.129050938493055e-06, "loss": 0.863, "step": 120320 }, { "epoch": 0.8710286868335904, "grad_norm": 0.1543162316083908, "learning_rate": 4.128978551832469e-06, "loss": 0.8738, "step": 120330 }, { "epoch": 0.8711010734941765, "grad_norm": 0.18090280890464783, "learning_rate": 4.128906165171882e-06, "loss": 0.8703, "step": 120340 }, { "epoch": 0.8711734601547627, "grad_norm": 0.15451836585998535, "learning_rate": 4.128833778511297e-06, "loss": 0.8816, "step": 120350 }, { "epoch": 0.8712458468153489, "grad_norm": 0.146149680018425, "learning_rate": 4.12876139185071e-06, "loss": 0.8759, "step": 120360 }, { "epoch": 0.8713182334759351, "grad_norm": 0.14919118583202362, "learning_rate": 4.128689005190124e-06, "loss": 0.8645, "step": 120370 }, { "epoch": 0.8713906201365212, "grad_norm": 0.6079381704330444, "learning_rate": 4.1286166185295376e-06, "loss": 0.8763, "step": 120380 }, { "epoch": 0.8714630067971074, "grad_norm": 0.162376269698143, "learning_rate": 4.128544231868952e-06, "loss": 0.8675, "step": 120390 }, { "epoch": 0.8715353934576936, "grad_norm": 0.16012080013751984, "learning_rate": 4.128471845208366e-06, "loss": 0.874, "step": 120400 }, { "epoch": 0.8716077801182798, "grad_norm": 0.16266874969005585, "learning_rate": 4.128399458547779e-06, "loss": 0.868, "step": 120410 }, { "epoch": 0.871680166778866, "grad_norm": 0.15482492744922638, "learning_rate": 4.128327071887193e-06, "loss": 0.8837, "step": 120420 }, { "epoch": 0.8717525534394521, "grad_norm": 0.15119405090808868, "learning_rate": 4.128254685226607e-06, "loss": 0.8738, "step": 120430 }, { "epoch": 0.8718249401000384, "grad_norm": 0.1444867104291916, "learning_rate": 4.128182298566021e-06, "loss": 0.8617, "step": 120440 }, { "epoch": 0.8718973267606246, "grad_norm": 0.15251514315605164, "learning_rate": 4.1281099119054346e-06, "loss": 0.8706, "step": 120450 }, { "epoch": 0.8719697134212108, "grad_norm": 0.16033178567886353, "learning_rate": 4.128037525244848e-06, "loss": 0.8756, "step": 120460 }, { "epoch": 0.872042100081797, "grad_norm": 0.1551380306482315, "learning_rate": 4.127965138584263e-06, "loss": 0.8642, "step": 120470 }, { "epoch": 0.8721144867423831, "grad_norm": 0.14764218032360077, "learning_rate": 4.127892751923676e-06, "loss": 0.8646, "step": 120480 }, { "epoch": 0.8721868734029693, "grad_norm": 0.14466458559036255, "learning_rate": 4.127820365263089e-06, "loss": 0.8727, "step": 120490 }, { "epoch": 0.8722592600635555, "grad_norm": 0.15080726146697998, "learning_rate": 4.1277479786025035e-06, "loss": 0.8934, "step": 120500 }, { "epoch": 0.8723316467241417, "grad_norm": 0.16469760239124298, "learning_rate": 4.127675591941917e-06, "loss": 0.8752, "step": 120510 }, { "epoch": 0.8724040333847278, "grad_norm": 0.1857549250125885, "learning_rate": 4.127603205281331e-06, "loss": 0.866, "step": 120520 }, { "epoch": 0.872476420045314, "grad_norm": 0.18460987508296967, "learning_rate": 4.127530818620744e-06, "loss": 0.8576, "step": 120530 }, { "epoch": 0.8725488067059002, "grad_norm": 0.15353545546531677, "learning_rate": 4.127458431960159e-06, "loss": 0.8828, "step": 120540 }, { "epoch": 0.8726211933664865, "grad_norm": 0.14069363474845886, "learning_rate": 4.1273860452995724e-06, "loss": 0.868, "step": 120550 }, { "epoch": 0.8726935800270726, "grad_norm": 0.1678398996591568, "learning_rate": 4.127313658638986e-06, "loss": 0.8802, "step": 120560 }, { "epoch": 0.8727659666876588, "grad_norm": 0.15249879658222198, "learning_rate": 4.1272412719784e-06, "loss": 0.8821, "step": 120570 }, { "epoch": 0.872838353348245, "grad_norm": 0.154356449842453, "learning_rate": 4.127168885317814e-06, "loss": 0.8736, "step": 120580 }, { "epoch": 0.8729107400088312, "grad_norm": 0.16177409887313843, "learning_rate": 4.127096498657228e-06, "loss": 0.8793, "step": 120590 }, { "epoch": 0.8729831266694174, "grad_norm": 0.14435283839702606, "learning_rate": 4.127024111996641e-06, "loss": 0.8678, "step": 120600 }, { "epoch": 0.8730555133300035, "grad_norm": 0.1692737489938736, "learning_rate": 4.126951725336055e-06, "loss": 0.8701, "step": 120610 }, { "epoch": 0.8731278999905897, "grad_norm": 0.14820191264152527, "learning_rate": 4.1268793386754694e-06, "loss": 0.8674, "step": 120620 }, { "epoch": 0.8732002866511759, "grad_norm": 0.15043896436691284, "learning_rate": 4.126806952014883e-06, "loss": 0.8733, "step": 120630 }, { "epoch": 0.8732726733117621, "grad_norm": 0.1535714864730835, "learning_rate": 4.126734565354297e-06, "loss": 0.8787, "step": 120640 }, { "epoch": 0.8733450599723483, "grad_norm": 0.1565728783607483, "learning_rate": 4.12666217869371e-06, "loss": 0.8652, "step": 120650 }, { "epoch": 0.8734174466329345, "grad_norm": 0.15889082849025726, "learning_rate": 4.126589792033125e-06, "loss": 0.8727, "step": 120660 }, { "epoch": 0.8734898332935207, "grad_norm": 0.15119129419326782, "learning_rate": 4.126517405372538e-06, "loss": 0.8723, "step": 120670 }, { "epoch": 0.8735622199541069, "grad_norm": 0.14388339221477509, "learning_rate": 4.126445018711952e-06, "loss": 0.8686, "step": 120680 }, { "epoch": 0.873634606614693, "grad_norm": 0.14522388577461243, "learning_rate": 4.126372632051366e-06, "loss": 0.8696, "step": 120690 }, { "epoch": 0.8737069932752792, "grad_norm": 0.15283215045928955, "learning_rate": 4.12630024539078e-06, "loss": 0.8731, "step": 120700 }, { "epoch": 0.8737793799358654, "grad_norm": 0.15034788846969604, "learning_rate": 4.126227858730194e-06, "loss": 0.8837, "step": 120710 }, { "epoch": 0.8738517665964516, "grad_norm": 0.15683099627494812, "learning_rate": 4.126155472069607e-06, "loss": 0.8686, "step": 120720 }, { "epoch": 0.8739241532570378, "grad_norm": 0.19069434702396393, "learning_rate": 4.126083085409021e-06, "loss": 0.8651, "step": 120730 }, { "epoch": 0.8739965399176239, "grad_norm": 0.15311843156814575, "learning_rate": 4.126010698748435e-06, "loss": 0.8822, "step": 120740 }, { "epoch": 0.8740689265782101, "grad_norm": 0.1613323837518692, "learning_rate": 4.125938312087849e-06, "loss": 0.8744, "step": 120750 }, { "epoch": 0.8741413132387964, "grad_norm": 0.1653766632080078, "learning_rate": 4.125865925427263e-06, "loss": 0.8658, "step": 120760 }, { "epoch": 0.8742136998993826, "grad_norm": 0.20106831192970276, "learning_rate": 4.125793538766676e-06, "loss": 0.867, "step": 120770 }, { "epoch": 0.8742860865599688, "grad_norm": 0.1640590876340866, "learning_rate": 4.12572115210609e-06, "loss": 0.8731, "step": 120780 }, { "epoch": 0.8743584732205549, "grad_norm": 0.16234862804412842, "learning_rate": 4.125648765445504e-06, "loss": 0.8798, "step": 120790 }, { "epoch": 0.8744308598811411, "grad_norm": 0.16816940903663635, "learning_rate": 4.125576378784918e-06, "loss": 0.8721, "step": 120800 }, { "epoch": 0.8745032465417273, "grad_norm": 0.15265819430351257, "learning_rate": 4.1255039921243315e-06, "loss": 0.8658, "step": 120810 }, { "epoch": 0.8745756332023135, "grad_norm": 0.15596769750118256, "learning_rate": 4.125431605463745e-06, "loss": 0.8784, "step": 120820 }, { "epoch": 0.8746480198628996, "grad_norm": 0.15130725502967834, "learning_rate": 4.12535921880316e-06, "loss": 0.8847, "step": 120830 }, { "epoch": 0.8747204065234858, "grad_norm": 0.22041760385036469, "learning_rate": 4.125286832142573e-06, "loss": 0.8765, "step": 120840 }, { "epoch": 0.874792793184072, "grad_norm": 0.14767880737781525, "learning_rate": 4.125214445481987e-06, "loss": 0.8817, "step": 120850 }, { "epoch": 0.8748651798446583, "grad_norm": 0.21697768568992615, "learning_rate": 4.1251420588214005e-06, "loss": 0.8704, "step": 120860 }, { "epoch": 0.8749375665052445, "grad_norm": 0.1474272608757019, "learning_rate": 4.125069672160815e-06, "loss": 0.8718, "step": 120870 }, { "epoch": 0.8750099531658306, "grad_norm": 0.18004824221134186, "learning_rate": 4.1249972855002285e-06, "loss": 0.8832, "step": 120880 }, { "epoch": 0.8750823398264168, "grad_norm": 0.1615532785654068, "learning_rate": 4.124924898839642e-06, "loss": 0.869, "step": 120890 }, { "epoch": 0.875154726487003, "grad_norm": 0.14899741113185883, "learning_rate": 4.124852512179056e-06, "loss": 0.8693, "step": 120900 }, { "epoch": 0.8752271131475892, "grad_norm": 0.16730321943759918, "learning_rate": 4.12478012551847e-06, "loss": 0.8847, "step": 120910 }, { "epoch": 0.8752994998081753, "grad_norm": 0.15044957399368286, "learning_rate": 4.124707738857884e-06, "loss": 0.8684, "step": 120920 }, { "epoch": 0.8753718864687615, "grad_norm": 0.17763184010982513, "learning_rate": 4.1246353521972975e-06, "loss": 0.8853, "step": 120930 }, { "epoch": 0.8754442731293477, "grad_norm": 0.16393078863620758, "learning_rate": 4.124562965536711e-06, "loss": 0.8696, "step": 120940 }, { "epoch": 0.8755166597899339, "grad_norm": 0.15573619306087494, "learning_rate": 4.1244905788761255e-06, "loss": 0.8609, "step": 120950 }, { "epoch": 0.87558904645052, "grad_norm": 0.15625816583633423, "learning_rate": 4.124418192215539e-06, "loss": 0.8735, "step": 120960 }, { "epoch": 0.8756614331111063, "grad_norm": 0.14690211415290833, "learning_rate": 4.124345805554953e-06, "loss": 0.8688, "step": 120970 }, { "epoch": 0.8757338197716925, "grad_norm": 0.17025691270828247, "learning_rate": 4.124273418894366e-06, "loss": 0.8582, "step": 120980 }, { "epoch": 0.8758062064322787, "grad_norm": 0.159259632229805, "learning_rate": 4.124201032233781e-06, "loss": 0.8892, "step": 120990 }, { "epoch": 0.8758785930928649, "grad_norm": 0.16585783660411835, "learning_rate": 4.1241286455731945e-06, "loss": 0.8723, "step": 121000 }, { "epoch": 0.875950979753451, "grad_norm": 0.15400756895542145, "learning_rate": 4.124056258912608e-06, "loss": 0.8656, "step": 121010 }, { "epoch": 0.8760233664140372, "grad_norm": 0.16609486937522888, "learning_rate": 4.123983872252022e-06, "loss": 0.8726, "step": 121020 }, { "epoch": 0.8760957530746234, "grad_norm": 0.16967037320137024, "learning_rate": 4.123911485591435e-06, "loss": 0.8606, "step": 121030 }, { "epoch": 0.8761681397352096, "grad_norm": 0.1568365842103958, "learning_rate": 4.123839098930849e-06, "loss": 0.8669, "step": 121040 }, { "epoch": 0.8762405263957957, "grad_norm": 0.16394557058811188, "learning_rate": 4.1237667122702626e-06, "loss": 0.8723, "step": 121050 }, { "epoch": 0.8763129130563819, "grad_norm": 0.15548640489578247, "learning_rate": 4.123694325609677e-06, "loss": 0.8712, "step": 121060 }, { "epoch": 0.8763852997169681, "grad_norm": 0.1536964625120163, "learning_rate": 4.123621938949091e-06, "loss": 0.8664, "step": 121070 }, { "epoch": 0.8764576863775544, "grad_norm": 0.14496274292469025, "learning_rate": 4.123549552288504e-06, "loss": 0.8656, "step": 121080 }, { "epoch": 0.8765300730381406, "grad_norm": 0.16302034258842468, "learning_rate": 4.123477165627918e-06, "loss": 0.8763, "step": 121090 }, { "epoch": 0.8766024596987267, "grad_norm": 0.15400786697864532, "learning_rate": 4.123404778967332e-06, "loss": 0.8673, "step": 121100 }, { "epoch": 0.8766748463593129, "grad_norm": 0.1585310846567154, "learning_rate": 4.123332392306746e-06, "loss": 0.874, "step": 121110 }, { "epoch": 0.8767472330198991, "grad_norm": 0.1589457243680954, "learning_rate": 4.1232600056461596e-06, "loss": 0.8741, "step": 121120 }, { "epoch": 0.8768196196804853, "grad_norm": 0.16599847376346588, "learning_rate": 4.123187618985573e-06, "loss": 0.8783, "step": 121130 }, { "epoch": 0.8768920063410715, "grad_norm": 0.1981654316186905, "learning_rate": 4.123115232324988e-06, "loss": 0.8759, "step": 121140 }, { "epoch": 0.8769643930016576, "grad_norm": 0.19641701877117157, "learning_rate": 4.123042845664401e-06, "loss": 0.8701, "step": 121150 }, { "epoch": 0.8770367796622438, "grad_norm": 0.16379612684249878, "learning_rate": 4.122970459003815e-06, "loss": 0.8603, "step": 121160 }, { "epoch": 0.87710916632283, "grad_norm": 0.16375406086444855, "learning_rate": 4.1228980723432285e-06, "loss": 0.8772, "step": 121170 }, { "epoch": 0.8771815529834163, "grad_norm": 0.16766558587551117, "learning_rate": 4.122825685682643e-06, "loss": 0.887, "step": 121180 }, { "epoch": 0.8772539396440024, "grad_norm": 0.14811073243618011, "learning_rate": 4.1227532990220566e-06, "loss": 0.8706, "step": 121190 }, { "epoch": 0.8773263263045886, "grad_norm": 0.16084469854831696, "learning_rate": 4.12268091236147e-06, "loss": 0.873, "step": 121200 }, { "epoch": 0.8773987129651748, "grad_norm": 0.17017702758312225, "learning_rate": 4.122608525700884e-06, "loss": 0.873, "step": 121210 }, { "epoch": 0.877471099625761, "grad_norm": 0.1788460612297058, "learning_rate": 4.122536139040298e-06, "loss": 0.875, "step": 121220 }, { "epoch": 0.8775434862863472, "grad_norm": 0.1578466147184372, "learning_rate": 4.122463752379712e-06, "loss": 0.8784, "step": 121230 }, { "epoch": 0.8776158729469333, "grad_norm": 0.14853112399578094, "learning_rate": 4.1223913657191255e-06, "loss": 0.8869, "step": 121240 }, { "epoch": 0.8776882596075195, "grad_norm": 0.15817275643348694, "learning_rate": 4.122318979058539e-06, "loss": 0.871, "step": 121250 }, { "epoch": 0.8777606462681057, "grad_norm": 0.1494825780391693, "learning_rate": 4.122246592397954e-06, "loss": 0.8697, "step": 121260 }, { "epoch": 0.8778330329286919, "grad_norm": 0.1578221619129181, "learning_rate": 4.122174205737367e-06, "loss": 0.8748, "step": 121270 }, { "epoch": 0.877905419589278, "grad_norm": 0.14515846967697144, "learning_rate": 4.122101819076781e-06, "loss": 0.8718, "step": 121280 }, { "epoch": 0.8779778062498643, "grad_norm": 0.15049944818019867, "learning_rate": 4.1220294324161944e-06, "loss": 0.8619, "step": 121290 }, { "epoch": 0.8780501929104505, "grad_norm": 0.15272891521453857, "learning_rate": 4.121957045755609e-06, "loss": 0.8802, "step": 121300 }, { "epoch": 0.8781225795710367, "grad_norm": 0.16228324174880981, "learning_rate": 4.1218846590950225e-06, "loss": 0.8673, "step": 121310 }, { "epoch": 0.8781949662316229, "grad_norm": 0.15620221197605133, "learning_rate": 4.121812272434436e-06, "loss": 0.8788, "step": 121320 }, { "epoch": 0.878267352892209, "grad_norm": 0.16147710382938385, "learning_rate": 4.12173988577385e-06, "loss": 0.887, "step": 121330 }, { "epoch": 0.8783397395527952, "grad_norm": 0.15584251284599304, "learning_rate": 4.121667499113264e-06, "loss": 0.8716, "step": 121340 }, { "epoch": 0.8784121262133814, "grad_norm": 0.14494866132736206, "learning_rate": 4.121595112452678e-06, "loss": 0.8705, "step": 121350 }, { "epoch": 0.8784845128739676, "grad_norm": 0.15706604719161987, "learning_rate": 4.1215227257920914e-06, "loss": 0.8711, "step": 121360 }, { "epoch": 0.8785568995345537, "grad_norm": 0.15880705416202545, "learning_rate": 4.121450339131505e-06, "loss": 0.8671, "step": 121370 }, { "epoch": 0.8786292861951399, "grad_norm": 0.1646871417760849, "learning_rate": 4.1213779524709195e-06, "loss": 0.8686, "step": 121380 }, { "epoch": 0.8787016728557262, "grad_norm": 0.14550799131393433, "learning_rate": 4.121305565810333e-06, "loss": 0.8771, "step": 121390 }, { "epoch": 0.8787740595163124, "grad_norm": 0.16066545248031616, "learning_rate": 4.121233179149747e-06, "loss": 0.8774, "step": 121400 }, { "epoch": 0.8788464461768986, "grad_norm": 0.16294661164283752, "learning_rate": 4.12116079248916e-06, "loss": 0.889, "step": 121410 }, { "epoch": 0.8789188328374847, "grad_norm": 0.16425122320652008, "learning_rate": 4.121088405828574e-06, "loss": 0.8681, "step": 121420 }, { "epoch": 0.8789912194980709, "grad_norm": 0.1627512127161026, "learning_rate": 4.1210160191679884e-06, "loss": 0.8666, "step": 121430 }, { "epoch": 0.8790636061586571, "grad_norm": 0.13960957527160645, "learning_rate": 4.120943632507402e-06, "loss": 0.8701, "step": 121440 }, { "epoch": 0.8791359928192433, "grad_norm": 0.16151773929595947, "learning_rate": 4.120871245846816e-06, "loss": 0.8845, "step": 121450 }, { "epoch": 0.8792083794798294, "grad_norm": 0.14862598478794098, "learning_rate": 4.120798859186229e-06, "loss": 0.8767, "step": 121460 }, { "epoch": 0.8792807661404156, "grad_norm": 0.15480075776576996, "learning_rate": 4.120726472525644e-06, "loss": 0.8746, "step": 121470 }, { "epoch": 0.8793531528010018, "grad_norm": 0.15683260560035706, "learning_rate": 4.120654085865057e-06, "loss": 0.8714, "step": 121480 }, { "epoch": 0.879425539461588, "grad_norm": 0.15848079323768616, "learning_rate": 4.120581699204471e-06, "loss": 0.8831, "step": 121490 }, { "epoch": 0.8794979261221743, "grad_norm": 0.16250035166740417, "learning_rate": 4.120509312543885e-06, "loss": 0.8825, "step": 121500 }, { "epoch": 0.8795703127827604, "grad_norm": 0.21030132472515106, "learning_rate": 4.120436925883299e-06, "loss": 0.8641, "step": 121510 }, { "epoch": 0.8796426994433466, "grad_norm": 0.1805466115474701, "learning_rate": 4.120364539222713e-06, "loss": 0.872, "step": 121520 }, { "epoch": 0.8797150861039328, "grad_norm": 0.14784234762191772, "learning_rate": 4.120292152562126e-06, "loss": 0.8725, "step": 121530 }, { "epoch": 0.879787472764519, "grad_norm": 0.16358047723770142, "learning_rate": 4.12021976590154e-06, "loss": 0.8708, "step": 121540 }, { "epoch": 0.8798598594251051, "grad_norm": 0.16846664249897003, "learning_rate": 4.1201473792409535e-06, "loss": 0.874, "step": 121550 }, { "epoch": 0.8799322460856913, "grad_norm": 0.15270091593265533, "learning_rate": 4.120074992580367e-06, "loss": 0.8719, "step": 121560 }, { "epoch": 0.8800046327462775, "grad_norm": 0.1571713536977768, "learning_rate": 4.120002605919781e-06, "loss": 0.8754, "step": 121570 }, { "epoch": 0.8800770194068637, "grad_norm": 0.1490432471036911, "learning_rate": 4.119930219259195e-06, "loss": 0.8702, "step": 121580 }, { "epoch": 0.8801494060674498, "grad_norm": 0.14698143303394318, "learning_rate": 4.119857832598609e-06, "loss": 0.8756, "step": 121590 }, { "epoch": 0.880221792728036, "grad_norm": 0.14973314106464386, "learning_rate": 4.1197854459380225e-06, "loss": 0.8748, "step": 121600 }, { "epoch": 0.8802941793886223, "grad_norm": 0.20785781741142273, "learning_rate": 4.119713059277436e-06, "loss": 0.8852, "step": 121610 }, { "epoch": 0.8803665660492085, "grad_norm": 0.16038250923156738, "learning_rate": 4.1196406726168505e-06, "loss": 0.8608, "step": 121620 }, { "epoch": 0.8804389527097947, "grad_norm": 0.15908242762088776, "learning_rate": 4.119568285956264e-06, "loss": 0.8659, "step": 121630 }, { "epoch": 0.8805113393703808, "grad_norm": 0.15829870104789734, "learning_rate": 4.119495899295678e-06, "loss": 0.869, "step": 121640 }, { "epoch": 0.880583726030967, "grad_norm": 0.16468794643878937, "learning_rate": 4.119423512635091e-06, "loss": 0.8803, "step": 121650 }, { "epoch": 0.8806561126915532, "grad_norm": 0.15394102036952972, "learning_rate": 4.119351125974506e-06, "loss": 0.8783, "step": 121660 }, { "epoch": 0.8807284993521394, "grad_norm": 0.15285733342170715, "learning_rate": 4.1192787393139195e-06, "loss": 0.8683, "step": 121670 }, { "epoch": 0.8808008860127255, "grad_norm": 0.16268157958984375, "learning_rate": 4.119206352653333e-06, "loss": 0.8668, "step": 121680 }, { "epoch": 0.8808732726733117, "grad_norm": 0.15528251230716705, "learning_rate": 4.119133965992747e-06, "loss": 0.8625, "step": 121690 }, { "epoch": 0.8809456593338979, "grad_norm": 0.15688292682170868, "learning_rate": 4.119061579332161e-06, "loss": 0.8734, "step": 121700 }, { "epoch": 0.8810180459944842, "grad_norm": 0.15693509578704834, "learning_rate": 4.118989192671575e-06, "loss": 0.8748, "step": 121710 }, { "epoch": 0.8810904326550704, "grad_norm": 0.16579411923885345, "learning_rate": 4.118916806010988e-06, "loss": 0.8751, "step": 121720 }, { "epoch": 0.8811628193156565, "grad_norm": 0.16935928165912628, "learning_rate": 4.118844419350402e-06, "loss": 0.8638, "step": 121730 }, { "epoch": 0.8812352059762427, "grad_norm": 0.194980189204216, "learning_rate": 4.1187720326898165e-06, "loss": 0.8801, "step": 121740 }, { "epoch": 0.8813075926368289, "grad_norm": 0.16746121644973755, "learning_rate": 4.11869964602923e-06, "loss": 0.8841, "step": 121750 }, { "epoch": 0.8813799792974151, "grad_norm": 0.1961222141981125, "learning_rate": 4.118627259368644e-06, "loss": 0.8761, "step": 121760 }, { "epoch": 0.8814523659580012, "grad_norm": 0.15616725385189056, "learning_rate": 4.118554872708057e-06, "loss": 0.869, "step": 121770 }, { "epoch": 0.8815247526185874, "grad_norm": 0.15364345908164978, "learning_rate": 4.118482486047472e-06, "loss": 0.8766, "step": 121780 }, { "epoch": 0.8815971392791736, "grad_norm": 0.15156231820583344, "learning_rate": 4.118410099386885e-06, "loss": 0.8673, "step": 121790 }, { "epoch": 0.8816695259397598, "grad_norm": 0.2237502783536911, "learning_rate": 4.118337712726299e-06, "loss": 0.8718, "step": 121800 }, { "epoch": 0.881741912600346, "grad_norm": 0.14461927115917206, "learning_rate": 4.118265326065713e-06, "loss": 0.872, "step": 121810 }, { "epoch": 0.8818142992609322, "grad_norm": 0.17010053992271423, "learning_rate": 4.118192939405127e-06, "loss": 0.8745, "step": 121820 }, { "epoch": 0.8818866859215184, "grad_norm": 0.16288408637046814, "learning_rate": 4.118120552744541e-06, "loss": 0.8762, "step": 121830 }, { "epoch": 0.8819590725821046, "grad_norm": 0.1665002405643463, "learning_rate": 4.118048166083954e-06, "loss": 0.8678, "step": 121840 }, { "epoch": 0.8820314592426908, "grad_norm": 0.16218537092208862, "learning_rate": 4.117975779423368e-06, "loss": 0.8829, "step": 121850 }, { "epoch": 0.882103845903277, "grad_norm": 0.14851604402065277, "learning_rate": 4.117903392762782e-06, "loss": 0.874, "step": 121860 }, { "epoch": 0.8821762325638631, "grad_norm": 0.1481228917837143, "learning_rate": 4.117831006102196e-06, "loss": 0.868, "step": 121870 }, { "epoch": 0.8822486192244493, "grad_norm": 0.16877874732017517, "learning_rate": 4.11775861944161e-06, "loss": 0.8754, "step": 121880 }, { "epoch": 0.8823210058850355, "grad_norm": 0.15102118253707886, "learning_rate": 4.117686232781023e-06, "loss": 0.8721, "step": 121890 }, { "epoch": 0.8823933925456217, "grad_norm": 0.15276771783828735, "learning_rate": 4.117613846120438e-06, "loss": 0.8833, "step": 121900 }, { "epoch": 0.8824657792062078, "grad_norm": 0.14909601211547852, "learning_rate": 4.117541459459851e-06, "loss": 0.8613, "step": 121910 }, { "epoch": 0.8825381658667941, "grad_norm": 0.15736235678195953, "learning_rate": 4.117469072799265e-06, "loss": 0.8756, "step": 121920 }, { "epoch": 0.8826105525273803, "grad_norm": 0.1750316172838211, "learning_rate": 4.1173966861386786e-06, "loss": 0.8851, "step": 121930 }, { "epoch": 0.8826829391879665, "grad_norm": 0.14961692690849304, "learning_rate": 4.117324299478093e-06, "loss": 0.871, "step": 121940 }, { "epoch": 0.8827553258485527, "grad_norm": 0.13990993797779083, "learning_rate": 4.117251912817507e-06, "loss": 0.8573, "step": 121950 }, { "epoch": 0.8828277125091388, "grad_norm": 0.1395825743675232, "learning_rate": 4.11717952615692e-06, "loss": 0.8589, "step": 121960 }, { "epoch": 0.882900099169725, "grad_norm": 0.15364444255828857, "learning_rate": 4.117107139496334e-06, "loss": 0.867, "step": 121970 }, { "epoch": 0.8829724858303112, "grad_norm": 3.056195020675659, "learning_rate": 4.117034752835748e-06, "loss": 0.882, "step": 121980 }, { "epoch": 0.8830448724908974, "grad_norm": 0.14895865321159363, "learning_rate": 4.116962366175162e-06, "loss": 0.8752, "step": 121990 }, { "epoch": 0.8831172591514835, "grad_norm": 0.1695874035358429, "learning_rate": 4.116889979514576e-06, "loss": 0.877, "step": 122000 }, { "epoch": 0.8831896458120697, "grad_norm": 0.1436607986688614, "learning_rate": 4.116817592853989e-06, "loss": 0.8785, "step": 122010 }, { "epoch": 0.8832620324726559, "grad_norm": 0.15994448959827423, "learning_rate": 4.116745206193403e-06, "loss": 0.8697, "step": 122020 }, { "epoch": 0.8833344191332422, "grad_norm": 0.15391020476818085, "learning_rate": 4.116672819532817e-06, "loss": 0.8702, "step": 122030 }, { "epoch": 0.8834068057938284, "grad_norm": 0.1519947201013565, "learning_rate": 4.116600432872231e-06, "loss": 0.8728, "step": 122040 }, { "epoch": 0.8834791924544145, "grad_norm": 0.14958973228931427, "learning_rate": 4.1165280462116445e-06, "loss": 0.8603, "step": 122050 }, { "epoch": 0.8835515791150007, "grad_norm": 0.15004867315292358, "learning_rate": 4.116455659551058e-06, "loss": 0.878, "step": 122060 }, { "epoch": 0.8836239657755869, "grad_norm": 0.16854992508888245, "learning_rate": 4.116383272890473e-06, "loss": 0.8622, "step": 122070 }, { "epoch": 0.8836963524361731, "grad_norm": 0.14855122566223145, "learning_rate": 4.116310886229885e-06, "loss": 0.8679, "step": 122080 }, { "epoch": 0.8837687390967592, "grad_norm": 0.17604343593120575, "learning_rate": 4.1162384995693e-06, "loss": 0.8664, "step": 122090 }, { "epoch": 0.8838411257573454, "grad_norm": 0.15962982177734375, "learning_rate": 4.1161661129087134e-06, "loss": 0.8831, "step": 122100 }, { "epoch": 0.8839135124179316, "grad_norm": 0.15554451942443848, "learning_rate": 4.116093726248127e-06, "loss": 0.8701, "step": 122110 }, { "epoch": 0.8839858990785178, "grad_norm": 0.15746499598026276, "learning_rate": 4.116021339587541e-06, "loss": 0.8682, "step": 122120 }, { "epoch": 0.8840582857391039, "grad_norm": 0.1579037606716156, "learning_rate": 4.115948952926955e-06, "loss": 0.8738, "step": 122130 }, { "epoch": 0.8841306723996902, "grad_norm": 0.16659896075725555, "learning_rate": 4.115876566266369e-06, "loss": 0.874, "step": 122140 }, { "epoch": 0.8842030590602764, "grad_norm": 0.183345228433609, "learning_rate": 4.115804179605782e-06, "loss": 0.8697, "step": 122150 }, { "epoch": 0.8842754457208626, "grad_norm": 0.15425361692905426, "learning_rate": 4.115731792945196e-06, "loss": 0.8637, "step": 122160 }, { "epoch": 0.8843478323814488, "grad_norm": 0.1493588238954544, "learning_rate": 4.1156594062846104e-06, "loss": 0.8512, "step": 122170 }, { "epoch": 0.8844202190420349, "grad_norm": 0.14579205214977264, "learning_rate": 4.115587019624024e-06, "loss": 0.8685, "step": 122180 }, { "epoch": 0.8844926057026211, "grad_norm": 0.14941352605819702, "learning_rate": 4.115514632963438e-06, "loss": 0.8795, "step": 122190 }, { "epoch": 0.8845649923632073, "grad_norm": 0.16284583508968353, "learning_rate": 4.115442246302851e-06, "loss": 0.8814, "step": 122200 }, { "epoch": 0.8846373790237935, "grad_norm": 0.15186572074890137, "learning_rate": 4.115369859642265e-06, "loss": 0.8693, "step": 122210 }, { "epoch": 0.8847097656843796, "grad_norm": 0.15203732252120972, "learning_rate": 4.115297472981679e-06, "loss": 0.879, "step": 122220 }, { "epoch": 0.8847821523449658, "grad_norm": 0.15315940976142883, "learning_rate": 4.115225086321093e-06, "loss": 0.8679, "step": 122230 }, { "epoch": 0.8848545390055521, "grad_norm": 0.15874212980270386, "learning_rate": 4.115152699660507e-06, "loss": 0.8919, "step": 122240 }, { "epoch": 0.8849269256661383, "grad_norm": 0.15111921727657318, "learning_rate": 4.11508031299992e-06, "loss": 0.876, "step": 122250 }, { "epoch": 0.8849993123267245, "grad_norm": 0.16939522325992584, "learning_rate": 4.115007926339335e-06, "loss": 0.8728, "step": 122260 }, { "epoch": 0.8850716989873106, "grad_norm": 0.14914065599441528, "learning_rate": 4.114935539678748e-06, "loss": 0.8666, "step": 122270 }, { "epoch": 0.8851440856478968, "grad_norm": 0.17796002328395844, "learning_rate": 4.114863153018162e-06, "loss": 0.86, "step": 122280 }, { "epoch": 0.885216472308483, "grad_norm": 0.14601661264896393, "learning_rate": 4.1147907663575755e-06, "loss": 0.8799, "step": 122290 }, { "epoch": 0.8852888589690692, "grad_norm": 0.14311879873275757, "learning_rate": 4.11471837969699e-06, "loss": 0.872, "step": 122300 }, { "epoch": 0.8853612456296553, "grad_norm": 0.14968661963939667, "learning_rate": 4.114645993036404e-06, "loss": 0.8701, "step": 122310 }, { "epoch": 0.8854336322902415, "grad_norm": 0.15330290794372559, "learning_rate": 4.114573606375817e-06, "loss": 0.8773, "step": 122320 }, { "epoch": 0.8855060189508277, "grad_norm": 0.14958740770816803, "learning_rate": 4.114501219715231e-06, "loss": 0.8634, "step": 122330 }, { "epoch": 0.8855784056114139, "grad_norm": 0.15254485607147217, "learning_rate": 4.114428833054645e-06, "loss": 0.863, "step": 122340 }, { "epoch": 0.8856507922720002, "grad_norm": 0.18143147230148315, "learning_rate": 4.114356446394059e-06, "loss": 0.862, "step": 122350 }, { "epoch": 0.8857231789325863, "grad_norm": 0.1426178365945816, "learning_rate": 4.1142840597334725e-06, "loss": 0.8654, "step": 122360 }, { "epoch": 0.8857955655931725, "grad_norm": 0.15494516491889954, "learning_rate": 4.114211673072886e-06, "loss": 0.8658, "step": 122370 }, { "epoch": 0.8858679522537587, "grad_norm": 0.15428562462329865, "learning_rate": 4.114139286412301e-06, "loss": 0.8713, "step": 122380 }, { "epoch": 0.8859403389143449, "grad_norm": 0.14963556826114655, "learning_rate": 4.114066899751714e-06, "loss": 0.8621, "step": 122390 }, { "epoch": 0.886012725574931, "grad_norm": 0.16026413440704346, "learning_rate": 4.113994513091128e-06, "loss": 0.8767, "step": 122400 }, { "epoch": 0.8860851122355172, "grad_norm": 0.14730548858642578, "learning_rate": 4.1139221264305415e-06, "loss": 0.866, "step": 122410 }, { "epoch": 0.8861574988961034, "grad_norm": 0.14938196539878845, "learning_rate": 4.113849739769956e-06, "loss": 0.8596, "step": 122420 }, { "epoch": 0.8862298855566896, "grad_norm": 0.16436588764190674, "learning_rate": 4.1137773531093695e-06, "loss": 0.8769, "step": 122430 }, { "epoch": 0.8863022722172758, "grad_norm": 0.15649719536304474, "learning_rate": 4.113704966448783e-06, "loss": 0.872, "step": 122440 }, { "epoch": 0.886374658877862, "grad_norm": 0.1726832240819931, "learning_rate": 4.113632579788197e-06, "loss": 0.8779, "step": 122450 }, { "epoch": 0.8864470455384482, "grad_norm": 0.14762026071548462, "learning_rate": 4.113560193127611e-06, "loss": 0.8725, "step": 122460 }, { "epoch": 0.8865194321990344, "grad_norm": 0.14345690608024597, "learning_rate": 4.113487806467025e-06, "loss": 0.8574, "step": 122470 }, { "epoch": 0.8865918188596206, "grad_norm": 0.16033095121383667, "learning_rate": 4.1134154198064385e-06, "loss": 0.8639, "step": 122480 }, { "epoch": 0.8866642055202067, "grad_norm": 0.14597992599010468, "learning_rate": 4.113343033145852e-06, "loss": 0.8729, "step": 122490 }, { "epoch": 0.8867365921807929, "grad_norm": 0.16098947823047638, "learning_rate": 4.1132706464852666e-06, "loss": 0.8659, "step": 122500 }, { "epoch": 0.8868089788413791, "grad_norm": 0.14861907064914703, "learning_rate": 4.11319825982468e-06, "loss": 0.8686, "step": 122510 }, { "epoch": 0.8868813655019653, "grad_norm": 0.15192726254463196, "learning_rate": 4.113125873164094e-06, "loss": 0.873, "step": 122520 }, { "epoch": 0.8869537521625515, "grad_norm": 0.16257117688655853, "learning_rate": 4.113053486503507e-06, "loss": 0.8881, "step": 122530 }, { "epoch": 0.8870261388231376, "grad_norm": 0.15565155446529388, "learning_rate": 4.112981099842922e-06, "loss": 0.8813, "step": 122540 }, { "epoch": 0.8870985254837238, "grad_norm": 0.16683930158615112, "learning_rate": 4.1129087131823355e-06, "loss": 0.8844, "step": 122550 }, { "epoch": 0.8871709121443101, "grad_norm": 0.16827575862407684, "learning_rate": 4.112836326521749e-06, "loss": 0.8845, "step": 122560 }, { "epoch": 0.8872432988048963, "grad_norm": 0.14905016124248505, "learning_rate": 4.112763939861163e-06, "loss": 0.8852, "step": 122570 }, { "epoch": 0.8873156854654825, "grad_norm": 0.15825332701206207, "learning_rate": 4.112691553200577e-06, "loss": 0.8702, "step": 122580 }, { "epoch": 0.8873880721260686, "grad_norm": 0.15095843374729156, "learning_rate": 4.112619166539991e-06, "loss": 0.8667, "step": 122590 }, { "epoch": 0.8874604587866548, "grad_norm": 0.15348178148269653, "learning_rate": 4.112546779879404e-06, "loss": 0.8696, "step": 122600 }, { "epoch": 0.887532845447241, "grad_norm": 0.14814035594463348, "learning_rate": 4.112474393218818e-06, "loss": 0.8742, "step": 122610 }, { "epoch": 0.8876052321078272, "grad_norm": 0.15248076617717743, "learning_rate": 4.112402006558232e-06, "loss": 0.8855, "step": 122620 }, { "epoch": 0.8876776187684133, "grad_norm": 0.15458175539970398, "learning_rate": 4.112329619897645e-06, "loss": 0.8722, "step": 122630 }, { "epoch": 0.8877500054289995, "grad_norm": 0.15646861493587494, "learning_rate": 4.112257233237059e-06, "loss": 0.87, "step": 122640 }, { "epoch": 0.8878223920895857, "grad_norm": 0.14675036072731018, "learning_rate": 4.112184846576473e-06, "loss": 0.8763, "step": 122650 }, { "epoch": 0.8878947787501719, "grad_norm": 0.16464652121067047, "learning_rate": 4.112112459915887e-06, "loss": 0.8817, "step": 122660 }, { "epoch": 0.8879671654107582, "grad_norm": 0.15563102066516876, "learning_rate": 4.1120400732553006e-06, "loss": 0.8728, "step": 122670 }, { "epoch": 0.8880395520713443, "grad_norm": 0.15346914529800415, "learning_rate": 4.111967686594714e-06, "loss": 0.8636, "step": 122680 }, { "epoch": 0.8881119387319305, "grad_norm": 0.15571348369121552, "learning_rate": 4.111895299934129e-06, "loss": 0.8643, "step": 122690 }, { "epoch": 0.8881843253925167, "grad_norm": 0.16230016946792603, "learning_rate": 4.111822913273542e-06, "loss": 0.8615, "step": 122700 }, { "epoch": 0.8882567120531029, "grad_norm": 0.1600496470928192, "learning_rate": 4.111750526612956e-06, "loss": 0.8824, "step": 122710 }, { "epoch": 0.888329098713689, "grad_norm": 0.14802932739257812, "learning_rate": 4.1116781399523695e-06, "loss": 0.8607, "step": 122720 }, { "epoch": 0.8884014853742752, "grad_norm": 0.14707952737808228, "learning_rate": 4.111605753291784e-06, "loss": 0.8645, "step": 122730 }, { "epoch": 0.8884738720348614, "grad_norm": 0.15505805611610413, "learning_rate": 4.1115333666311976e-06, "loss": 0.874, "step": 122740 }, { "epoch": 0.8885462586954476, "grad_norm": 0.1629432737827301, "learning_rate": 4.111460979970611e-06, "loss": 0.8765, "step": 122750 }, { "epoch": 0.8886186453560337, "grad_norm": 0.15821245312690735, "learning_rate": 4.111388593310025e-06, "loss": 0.8836, "step": 122760 }, { "epoch": 0.88869103201662, "grad_norm": 0.1640791893005371, "learning_rate": 4.111316206649439e-06, "loss": 0.8718, "step": 122770 }, { "epoch": 0.8887634186772062, "grad_norm": 0.1496168076992035, "learning_rate": 4.111243819988853e-06, "loss": 0.8776, "step": 122780 }, { "epoch": 0.8888358053377924, "grad_norm": 0.15713010728359222, "learning_rate": 4.1111714333282665e-06, "loss": 0.8767, "step": 122790 }, { "epoch": 0.8889081919983786, "grad_norm": 0.15316422283649445, "learning_rate": 4.11109904666768e-06, "loss": 0.8752, "step": 122800 }, { "epoch": 0.8889805786589647, "grad_norm": 0.17414002120494843, "learning_rate": 4.111026660007095e-06, "loss": 0.8743, "step": 122810 }, { "epoch": 0.8890529653195509, "grad_norm": 0.1568707376718521, "learning_rate": 4.110954273346508e-06, "loss": 0.8779, "step": 122820 }, { "epoch": 0.8891253519801371, "grad_norm": 0.15081565082073212, "learning_rate": 4.110881886685922e-06, "loss": 0.8679, "step": 122830 }, { "epoch": 0.8891977386407233, "grad_norm": 0.14779068529605865, "learning_rate": 4.1108095000253354e-06, "loss": 0.8696, "step": 122840 }, { "epoch": 0.8892701253013094, "grad_norm": 0.17924214899539948, "learning_rate": 4.110737113364749e-06, "loss": 0.8915, "step": 122850 }, { "epoch": 0.8893425119618956, "grad_norm": 0.1565881222486496, "learning_rate": 4.1106647267041635e-06, "loss": 0.8798, "step": 122860 }, { "epoch": 0.8894148986224818, "grad_norm": 0.16054075956344604, "learning_rate": 4.110592340043577e-06, "loss": 0.8543, "step": 122870 }, { "epoch": 0.8894872852830681, "grad_norm": 0.1694210320711136, "learning_rate": 4.110519953382991e-06, "loss": 0.877, "step": 122880 }, { "epoch": 0.8895596719436543, "grad_norm": 0.15159261226654053, "learning_rate": 4.110447566722404e-06, "loss": 0.8787, "step": 122890 }, { "epoch": 0.8896320586042404, "grad_norm": 0.1697186380624771, "learning_rate": 4.110375180061819e-06, "loss": 0.8796, "step": 122900 }, { "epoch": 0.8897044452648266, "grad_norm": 0.1503906100988388, "learning_rate": 4.1103027934012324e-06, "loss": 0.873, "step": 122910 }, { "epoch": 0.8897768319254128, "grad_norm": 0.14617328345775604, "learning_rate": 4.110230406740646e-06, "loss": 0.8758, "step": 122920 }, { "epoch": 0.889849218585999, "grad_norm": 0.16019441187381744, "learning_rate": 4.11015802008006e-06, "loss": 0.8714, "step": 122930 }, { "epoch": 0.8899216052465851, "grad_norm": 0.14964258670806885, "learning_rate": 4.110085633419474e-06, "loss": 0.8641, "step": 122940 }, { "epoch": 0.8899939919071713, "grad_norm": 0.15857601165771484, "learning_rate": 4.110013246758888e-06, "loss": 0.8818, "step": 122950 }, { "epoch": 0.8900663785677575, "grad_norm": 0.1659708172082901, "learning_rate": 4.109940860098301e-06, "loss": 0.8809, "step": 122960 }, { "epoch": 0.8901387652283437, "grad_norm": 0.15709270536899567, "learning_rate": 4.109868473437715e-06, "loss": 0.878, "step": 122970 }, { "epoch": 0.89021115188893, "grad_norm": 0.15638253092765808, "learning_rate": 4.1097960867771294e-06, "loss": 0.88, "step": 122980 }, { "epoch": 0.8902835385495161, "grad_norm": 0.2203262895345688, "learning_rate": 4.109723700116543e-06, "loss": 0.872, "step": 122990 }, { "epoch": 0.8903559252101023, "grad_norm": 0.1518968641757965, "learning_rate": 4.109651313455957e-06, "loss": 0.8815, "step": 123000 }, { "epoch": 0.8904283118706885, "grad_norm": 0.1435840129852295, "learning_rate": 4.10957892679537e-06, "loss": 0.8743, "step": 123010 }, { "epoch": 0.8905006985312747, "grad_norm": 0.15603044629096985, "learning_rate": 4.109506540134785e-06, "loss": 0.8781, "step": 123020 }, { "epoch": 0.8905730851918608, "grad_norm": 0.15523672103881836, "learning_rate": 4.109434153474198e-06, "loss": 0.8721, "step": 123030 }, { "epoch": 0.890645471852447, "grad_norm": 0.15901529788970947, "learning_rate": 4.109361766813612e-06, "loss": 0.8793, "step": 123040 }, { "epoch": 0.8907178585130332, "grad_norm": 0.14832401275634766, "learning_rate": 4.109289380153026e-06, "loss": 0.8713, "step": 123050 }, { "epoch": 0.8907902451736194, "grad_norm": 0.14594705402851105, "learning_rate": 4.10921699349244e-06, "loss": 0.8588, "step": 123060 }, { "epoch": 0.8908626318342056, "grad_norm": 0.21617244184017181, "learning_rate": 4.109144606831854e-06, "loss": 0.8714, "step": 123070 }, { "epoch": 0.8909350184947917, "grad_norm": 0.15614116191864014, "learning_rate": 4.109072220171267e-06, "loss": 0.8754, "step": 123080 }, { "epoch": 0.891007405155378, "grad_norm": 0.17854410409927368, "learning_rate": 4.108999833510681e-06, "loss": 0.8709, "step": 123090 }, { "epoch": 0.8910797918159642, "grad_norm": 0.146412193775177, "learning_rate": 4.108927446850095e-06, "loss": 0.8688, "step": 123100 }, { "epoch": 0.8911521784765504, "grad_norm": 0.16947603225708008, "learning_rate": 4.108855060189509e-06, "loss": 0.8774, "step": 123110 }, { "epoch": 0.8912245651371365, "grad_norm": 0.16192446649074554, "learning_rate": 4.108782673528923e-06, "loss": 0.8858, "step": 123120 }, { "epoch": 0.8912969517977227, "grad_norm": 0.15947166085243225, "learning_rate": 4.108710286868336e-06, "loss": 0.8668, "step": 123130 }, { "epoch": 0.8913693384583089, "grad_norm": 0.14823932945728302, "learning_rate": 4.108637900207751e-06, "loss": 0.8724, "step": 123140 }, { "epoch": 0.8914417251188951, "grad_norm": 0.15632663667201996, "learning_rate": 4.1085655135471635e-06, "loss": 0.8697, "step": 123150 }, { "epoch": 0.8915141117794813, "grad_norm": 0.18125022947788239, "learning_rate": 4.108493126886577e-06, "loss": 0.8844, "step": 123160 }, { "epoch": 0.8915864984400674, "grad_norm": 0.1472315490245819, "learning_rate": 4.1084207402259915e-06, "loss": 0.8656, "step": 123170 }, { "epoch": 0.8916588851006536, "grad_norm": 0.16403494775295258, "learning_rate": 4.108348353565405e-06, "loss": 0.8732, "step": 123180 }, { "epoch": 0.8917312717612398, "grad_norm": 0.17012234032154083, "learning_rate": 4.108275966904819e-06, "loss": 0.8715, "step": 123190 }, { "epoch": 0.8918036584218261, "grad_norm": 0.16016776859760284, "learning_rate": 4.108203580244232e-06, "loss": 0.8698, "step": 123200 }, { "epoch": 0.8918760450824122, "grad_norm": 0.1564156860113144, "learning_rate": 4.108131193583647e-06, "loss": 0.8808, "step": 123210 }, { "epoch": 0.8919484317429984, "grad_norm": 0.14456337690353394, "learning_rate": 4.1080588069230605e-06, "loss": 0.8633, "step": 123220 }, { "epoch": 0.8920208184035846, "grad_norm": 0.1573518067598343, "learning_rate": 4.107986420262474e-06, "loss": 0.873, "step": 123230 }, { "epoch": 0.8920932050641708, "grad_norm": 0.14703720808029175, "learning_rate": 4.107914033601888e-06, "loss": 0.8733, "step": 123240 }, { "epoch": 0.892165591724757, "grad_norm": 0.14381085336208344, "learning_rate": 4.107841646941302e-06, "loss": 0.8629, "step": 123250 }, { "epoch": 0.8922379783853431, "grad_norm": 0.15476030111312866, "learning_rate": 4.107769260280716e-06, "loss": 0.8765, "step": 123260 }, { "epoch": 0.8923103650459293, "grad_norm": 0.1462145894765854, "learning_rate": 4.107696873620129e-06, "loss": 0.8677, "step": 123270 }, { "epoch": 0.8923827517065155, "grad_norm": 0.18984442949295044, "learning_rate": 4.107624486959543e-06, "loss": 0.8851, "step": 123280 }, { "epoch": 0.8924551383671017, "grad_norm": 0.15868966281414032, "learning_rate": 4.1075521002989575e-06, "loss": 0.864, "step": 123290 }, { "epoch": 0.892527525027688, "grad_norm": 0.1640649437904358, "learning_rate": 4.107479713638371e-06, "loss": 0.8773, "step": 123300 }, { "epoch": 0.8925999116882741, "grad_norm": 0.16619038581848145, "learning_rate": 4.107407326977785e-06, "loss": 0.8643, "step": 123310 }, { "epoch": 0.8926722983488603, "grad_norm": 0.14695781469345093, "learning_rate": 4.107334940317198e-06, "loss": 0.8755, "step": 123320 }, { "epoch": 0.8927446850094465, "grad_norm": 0.14879372715950012, "learning_rate": 4.107262553656613e-06, "loss": 0.8826, "step": 123330 }, { "epoch": 0.8928170716700327, "grad_norm": 0.16833491623401642, "learning_rate": 4.107190166996026e-06, "loss": 0.8893, "step": 123340 }, { "epoch": 0.8928894583306188, "grad_norm": 0.15543371438980103, "learning_rate": 4.10711778033544e-06, "loss": 0.8869, "step": 123350 }, { "epoch": 0.892961844991205, "grad_norm": 0.15344348549842834, "learning_rate": 4.107045393674854e-06, "loss": 0.8791, "step": 123360 }, { "epoch": 0.8930342316517912, "grad_norm": 0.15514753758907318, "learning_rate": 4.106973007014268e-06, "loss": 0.8844, "step": 123370 }, { "epoch": 0.8931066183123774, "grad_norm": 0.16524678468704224, "learning_rate": 4.106900620353682e-06, "loss": 0.867, "step": 123380 }, { "epoch": 0.8931790049729635, "grad_norm": 0.16478219628334045, "learning_rate": 4.106828233693095e-06, "loss": 0.8817, "step": 123390 }, { "epoch": 0.8932513916335497, "grad_norm": 0.15810853242874146, "learning_rate": 4.106755847032509e-06, "loss": 0.8733, "step": 123400 }, { "epoch": 0.893323778294136, "grad_norm": 0.1545628160238266, "learning_rate": 4.106683460371923e-06, "loss": 0.8761, "step": 123410 }, { "epoch": 0.8933961649547222, "grad_norm": 0.176656574010849, "learning_rate": 4.106611073711337e-06, "loss": 0.8779, "step": 123420 }, { "epoch": 0.8934685516153084, "grad_norm": 0.15695984661579132, "learning_rate": 4.106538687050751e-06, "loss": 0.8856, "step": 123430 }, { "epoch": 0.8935409382758945, "grad_norm": 0.1536366492509842, "learning_rate": 4.106466300390164e-06, "loss": 0.8712, "step": 123440 }, { "epoch": 0.8936133249364807, "grad_norm": 0.2222108393907547, "learning_rate": 4.106393913729578e-06, "loss": 0.8721, "step": 123450 }, { "epoch": 0.8936857115970669, "grad_norm": 0.15123595297336578, "learning_rate": 4.106321527068992e-06, "loss": 0.8617, "step": 123460 }, { "epoch": 0.8937580982576531, "grad_norm": 0.14488713443279266, "learning_rate": 4.106249140408406e-06, "loss": 0.8686, "step": 123470 }, { "epoch": 0.8938304849182392, "grad_norm": 0.18304714560508728, "learning_rate": 4.1061767537478196e-06, "loss": 0.8768, "step": 123480 }, { "epoch": 0.8939028715788254, "grad_norm": 0.16296693682670593, "learning_rate": 4.106104367087233e-06, "loss": 0.8661, "step": 123490 }, { "epoch": 0.8939752582394116, "grad_norm": 0.14711807668209076, "learning_rate": 4.106031980426648e-06, "loss": 0.8769, "step": 123500 }, { "epoch": 0.8940476448999978, "grad_norm": 0.1431189477443695, "learning_rate": 4.105959593766061e-06, "loss": 0.8894, "step": 123510 }, { "epoch": 0.8941200315605841, "grad_norm": 0.1538097858428955, "learning_rate": 4.105887207105475e-06, "loss": 0.8737, "step": 123520 }, { "epoch": 0.8941924182211702, "grad_norm": 0.15925562381744385, "learning_rate": 4.1058148204448885e-06, "loss": 0.8678, "step": 123530 }, { "epoch": 0.8942648048817564, "grad_norm": 0.15328852832317352, "learning_rate": 4.105742433784303e-06, "loss": 0.8692, "step": 123540 }, { "epoch": 0.8943371915423426, "grad_norm": 0.17088687419891357, "learning_rate": 4.105670047123717e-06, "loss": 0.8799, "step": 123550 }, { "epoch": 0.8944095782029288, "grad_norm": 0.16045989096164703, "learning_rate": 4.10559766046313e-06, "loss": 0.8686, "step": 123560 }, { "epoch": 0.894481964863515, "grad_norm": 0.17168870568275452, "learning_rate": 4.105525273802544e-06, "loss": 0.8733, "step": 123570 }, { "epoch": 0.8945543515241011, "grad_norm": 0.16414594650268555, "learning_rate": 4.105452887141958e-06, "loss": 0.8873, "step": 123580 }, { "epoch": 0.8946267381846873, "grad_norm": 0.15379905700683594, "learning_rate": 4.105380500481372e-06, "loss": 0.8712, "step": 123590 }, { "epoch": 0.8946991248452735, "grad_norm": 0.1613912731409073, "learning_rate": 4.1053081138207855e-06, "loss": 0.8756, "step": 123600 }, { "epoch": 0.8947715115058597, "grad_norm": 0.1442667692899704, "learning_rate": 4.105235727160199e-06, "loss": 0.8774, "step": 123610 }, { "epoch": 0.8948438981664459, "grad_norm": 0.14742349088191986, "learning_rate": 4.105163340499614e-06, "loss": 0.8749, "step": 123620 }, { "epoch": 0.8949162848270321, "grad_norm": 0.15726658701896667, "learning_rate": 4.105090953839027e-06, "loss": 0.8714, "step": 123630 }, { "epoch": 0.8949886714876183, "grad_norm": 0.1479179859161377, "learning_rate": 4.105018567178441e-06, "loss": 0.8764, "step": 123640 }, { "epoch": 0.8950610581482045, "grad_norm": 0.14696402847766876, "learning_rate": 4.1049461805178544e-06, "loss": 0.8743, "step": 123650 }, { "epoch": 0.8951334448087906, "grad_norm": 0.19279779493808746, "learning_rate": 4.104873793857269e-06, "loss": 0.876, "step": 123660 }, { "epoch": 0.8952058314693768, "grad_norm": 0.1497318148612976, "learning_rate": 4.1048014071966825e-06, "loss": 0.8704, "step": 123670 }, { "epoch": 0.895278218129963, "grad_norm": 0.16485776007175446, "learning_rate": 4.104729020536095e-06, "loss": 0.882, "step": 123680 }, { "epoch": 0.8953506047905492, "grad_norm": 0.18929380178451538, "learning_rate": 4.10465663387551e-06, "loss": 0.8752, "step": 123690 }, { "epoch": 0.8954229914511354, "grad_norm": 0.1474197655916214, "learning_rate": 4.104584247214923e-06, "loss": 0.8665, "step": 123700 }, { "epoch": 0.8954953781117215, "grad_norm": 0.1479775607585907, "learning_rate": 4.104511860554337e-06, "loss": 0.8665, "step": 123710 }, { "epoch": 0.8955677647723077, "grad_norm": 0.26325809955596924, "learning_rate": 4.104439473893751e-06, "loss": 0.8675, "step": 123720 }, { "epoch": 0.895640151432894, "grad_norm": 0.16428937017917633, "learning_rate": 4.104367087233165e-06, "loss": 0.8761, "step": 123730 }, { "epoch": 0.8957125380934802, "grad_norm": 0.14757543802261353, "learning_rate": 4.104294700572579e-06, "loss": 0.8641, "step": 123740 }, { "epoch": 0.8957849247540663, "grad_norm": 0.15158720314502716, "learning_rate": 4.104222313911992e-06, "loss": 0.8716, "step": 123750 }, { "epoch": 0.8958573114146525, "grad_norm": 0.15300332009792328, "learning_rate": 4.104149927251406e-06, "loss": 0.8746, "step": 123760 }, { "epoch": 0.8959296980752387, "grad_norm": 0.14569737017154694, "learning_rate": 4.10407754059082e-06, "loss": 0.8769, "step": 123770 }, { "epoch": 0.8960020847358249, "grad_norm": 0.1715804487466812, "learning_rate": 4.104005153930234e-06, "loss": 0.871, "step": 123780 }, { "epoch": 0.896074471396411, "grad_norm": 0.149086594581604, "learning_rate": 4.103932767269648e-06, "loss": 0.8719, "step": 123790 }, { "epoch": 0.8961468580569972, "grad_norm": 0.14175446331501007, "learning_rate": 4.103860380609061e-06, "loss": 0.8699, "step": 123800 }, { "epoch": 0.8962192447175834, "grad_norm": 0.15866614878177643, "learning_rate": 4.103787993948476e-06, "loss": 0.8739, "step": 123810 }, { "epoch": 0.8962916313781696, "grad_norm": 0.15101435780525208, "learning_rate": 4.103715607287889e-06, "loss": 0.8812, "step": 123820 }, { "epoch": 0.8963640180387559, "grad_norm": 0.16456730663776398, "learning_rate": 4.103643220627303e-06, "loss": 0.8643, "step": 123830 }, { "epoch": 0.896436404699342, "grad_norm": 0.15458862483501434, "learning_rate": 4.1035708339667165e-06, "loss": 0.8746, "step": 123840 }, { "epoch": 0.8965087913599282, "grad_norm": 0.15625207126140594, "learning_rate": 4.103498447306131e-06, "loss": 0.8852, "step": 123850 }, { "epoch": 0.8965811780205144, "grad_norm": 0.1557016670703888, "learning_rate": 4.103426060645545e-06, "loss": 0.8615, "step": 123860 }, { "epoch": 0.8966535646811006, "grad_norm": 0.15922847390174866, "learning_rate": 4.103353673984958e-06, "loss": 0.8678, "step": 123870 }, { "epoch": 0.8967259513416868, "grad_norm": 0.15058737993240356, "learning_rate": 4.103281287324372e-06, "loss": 0.8657, "step": 123880 }, { "epoch": 0.8967983380022729, "grad_norm": 0.15507520735263824, "learning_rate": 4.103208900663786e-06, "loss": 0.8639, "step": 123890 }, { "epoch": 0.8968707246628591, "grad_norm": 0.1978985220193863, "learning_rate": 4.1031365140032e-06, "loss": 0.8783, "step": 123900 }, { "epoch": 0.8969431113234453, "grad_norm": 0.1425454467535019, "learning_rate": 4.1030641273426135e-06, "loss": 0.8741, "step": 123910 }, { "epoch": 0.8970154979840315, "grad_norm": 0.14902305603027344, "learning_rate": 4.102991740682027e-06, "loss": 0.8698, "step": 123920 }, { "epoch": 0.8970878846446176, "grad_norm": 0.1497499793767929, "learning_rate": 4.102919354021442e-06, "loss": 0.8657, "step": 123930 }, { "epoch": 0.8971602713052039, "grad_norm": 0.16021916270256042, "learning_rate": 4.102846967360855e-06, "loss": 0.8808, "step": 123940 }, { "epoch": 0.8972326579657901, "grad_norm": 0.14823076128959656, "learning_rate": 4.102774580700269e-06, "loss": 0.8701, "step": 123950 }, { "epoch": 0.8973050446263763, "grad_norm": 0.16842210292816162, "learning_rate": 4.1027021940396825e-06, "loss": 0.8782, "step": 123960 }, { "epoch": 0.8973774312869625, "grad_norm": 0.14396853744983673, "learning_rate": 4.102629807379097e-06, "loss": 0.8804, "step": 123970 }, { "epoch": 0.8974498179475486, "grad_norm": 0.1455616056919098, "learning_rate": 4.1025574207185105e-06, "loss": 0.869, "step": 123980 }, { "epoch": 0.8975222046081348, "grad_norm": 0.14303292334079742, "learning_rate": 4.102485034057924e-06, "loss": 0.8642, "step": 123990 }, { "epoch": 0.897594591268721, "grad_norm": 0.15414045751094818, "learning_rate": 4.102412647397338e-06, "loss": 0.8639, "step": 124000 }, { "epoch": 0.8976669779293072, "grad_norm": 0.15850482881069183, "learning_rate": 4.102340260736752e-06, "loss": 0.8639, "step": 124010 }, { "epoch": 0.8977393645898933, "grad_norm": 0.15761762857437134, "learning_rate": 4.102267874076166e-06, "loss": 0.8722, "step": 124020 }, { "epoch": 0.8978117512504795, "grad_norm": 0.1510854810476303, "learning_rate": 4.1021954874155795e-06, "loss": 0.8801, "step": 124030 }, { "epoch": 0.8978841379110657, "grad_norm": 0.14972640573978424, "learning_rate": 4.102123100754993e-06, "loss": 0.8731, "step": 124040 }, { "epoch": 0.897956524571652, "grad_norm": 0.15360723435878754, "learning_rate": 4.1020507140944076e-06, "loss": 0.8713, "step": 124050 }, { "epoch": 0.8980289112322382, "grad_norm": 0.16010282933712006, "learning_rate": 4.101978327433821e-06, "loss": 0.8626, "step": 124060 }, { "epoch": 0.8981012978928243, "grad_norm": 0.15653282403945923, "learning_rate": 4.101905940773235e-06, "loss": 0.8747, "step": 124070 }, { "epoch": 0.8981736845534105, "grad_norm": 0.14662018418312073, "learning_rate": 4.101833554112648e-06, "loss": 0.8779, "step": 124080 }, { "epoch": 0.8982460712139967, "grad_norm": 0.1570512056350708, "learning_rate": 4.101761167452062e-06, "loss": 0.8845, "step": 124090 }, { "epoch": 0.8983184578745829, "grad_norm": 0.16235743463039398, "learning_rate": 4.1016887807914765e-06, "loss": 0.8702, "step": 124100 }, { "epoch": 0.898390844535169, "grad_norm": 0.1602431684732437, "learning_rate": 4.10161639413089e-06, "loss": 0.8693, "step": 124110 }, { "epoch": 0.8984632311957552, "grad_norm": 0.15021947026252747, "learning_rate": 4.101544007470304e-06, "loss": 0.8704, "step": 124120 }, { "epoch": 0.8985356178563414, "grad_norm": 0.17709344625473022, "learning_rate": 4.101471620809717e-06, "loss": 0.8725, "step": 124130 }, { "epoch": 0.8986080045169276, "grad_norm": 0.3144311308860779, "learning_rate": 4.101399234149132e-06, "loss": 0.8644, "step": 124140 }, { "epoch": 0.8986803911775139, "grad_norm": 0.1658957153558731, "learning_rate": 4.101326847488545e-06, "loss": 0.8645, "step": 124150 }, { "epoch": 0.8987527778381, "grad_norm": 0.1864367425441742, "learning_rate": 4.101254460827959e-06, "loss": 0.8769, "step": 124160 }, { "epoch": 0.8988251644986862, "grad_norm": 0.1641472429037094, "learning_rate": 4.101182074167373e-06, "loss": 0.8743, "step": 124170 }, { "epoch": 0.8988975511592724, "grad_norm": 0.156347393989563, "learning_rate": 4.101109687506787e-06, "loss": 0.863, "step": 124180 }, { "epoch": 0.8989699378198586, "grad_norm": 0.14861541986465454, "learning_rate": 4.101037300846201e-06, "loss": 0.8728, "step": 124190 }, { "epoch": 0.8990423244804447, "grad_norm": 0.15074244141578674, "learning_rate": 4.100964914185614e-06, "loss": 0.8771, "step": 124200 }, { "epoch": 0.8991147111410309, "grad_norm": 0.14890417456626892, "learning_rate": 4.100892527525028e-06, "loss": 0.8639, "step": 124210 }, { "epoch": 0.8991870978016171, "grad_norm": 0.1627875566482544, "learning_rate": 4.1008201408644416e-06, "loss": 0.8691, "step": 124220 }, { "epoch": 0.8992594844622033, "grad_norm": 0.15136182308197021, "learning_rate": 4.100747754203855e-06, "loss": 0.8836, "step": 124230 }, { "epoch": 0.8993318711227895, "grad_norm": 0.14906705915927887, "learning_rate": 4.100675367543269e-06, "loss": 0.8721, "step": 124240 }, { "epoch": 0.8994042577833756, "grad_norm": 0.14865903556346893, "learning_rate": 4.100602980882683e-06, "loss": 0.873, "step": 124250 }, { "epoch": 0.8994766444439619, "grad_norm": 0.19429634511470795, "learning_rate": 4.100530594222097e-06, "loss": 0.8822, "step": 124260 }, { "epoch": 0.8995490311045481, "grad_norm": 0.16122131049633026, "learning_rate": 4.1004582075615105e-06, "loss": 0.874, "step": 124270 }, { "epoch": 0.8996214177651343, "grad_norm": 0.14246490597724915, "learning_rate": 4.100385820900924e-06, "loss": 0.8657, "step": 124280 }, { "epoch": 0.8996938044257204, "grad_norm": 0.15221592783927917, "learning_rate": 4.100313434240339e-06, "loss": 0.8718, "step": 124290 }, { "epoch": 0.8997661910863066, "grad_norm": 0.1513119488954544, "learning_rate": 4.100241047579752e-06, "loss": 0.8552, "step": 124300 }, { "epoch": 0.8998385777468928, "grad_norm": 0.1578131765127182, "learning_rate": 4.100168660919166e-06, "loss": 0.873, "step": 124310 }, { "epoch": 0.899910964407479, "grad_norm": 0.1699373573064804, "learning_rate": 4.1000962742585794e-06, "loss": 0.883, "step": 124320 }, { "epoch": 0.8999833510680652, "grad_norm": 0.16044111549854279, "learning_rate": 4.100023887597994e-06, "loss": 0.8759, "step": 124330 }, { "epoch": 0.9000557377286513, "grad_norm": 0.16332702338695526, "learning_rate": 4.0999515009374075e-06, "loss": 0.8708, "step": 124340 }, { "epoch": 0.9001281243892375, "grad_norm": 0.15864451229572296, "learning_rate": 4.099879114276821e-06, "loss": 0.8728, "step": 124350 }, { "epoch": 0.9002005110498238, "grad_norm": 0.16290296614170074, "learning_rate": 4.099806727616235e-06, "loss": 0.8597, "step": 124360 }, { "epoch": 0.90027289771041, "grad_norm": 0.16254830360412598, "learning_rate": 4.099734340955649e-06, "loss": 0.8672, "step": 124370 }, { "epoch": 0.9003452843709961, "grad_norm": 0.15096203982830048, "learning_rate": 4.099661954295063e-06, "loss": 0.884, "step": 124380 }, { "epoch": 0.9004176710315823, "grad_norm": 0.1599651724100113, "learning_rate": 4.0995895676344764e-06, "loss": 0.8764, "step": 124390 }, { "epoch": 0.9004900576921685, "grad_norm": 0.1575443595647812, "learning_rate": 4.09951718097389e-06, "loss": 0.8833, "step": 124400 }, { "epoch": 0.9005624443527547, "grad_norm": 0.15898630023002625, "learning_rate": 4.0994447943133045e-06, "loss": 0.8664, "step": 124410 }, { "epoch": 0.9006348310133409, "grad_norm": 0.14191798865795135, "learning_rate": 4.099372407652718e-06, "loss": 0.8758, "step": 124420 }, { "epoch": 0.900707217673927, "grad_norm": 0.15920232236385345, "learning_rate": 4.099300020992132e-06, "loss": 0.8628, "step": 124430 }, { "epoch": 0.9007796043345132, "grad_norm": 0.15842455625534058, "learning_rate": 4.099227634331545e-06, "loss": 0.8698, "step": 124440 }, { "epoch": 0.9008519909950994, "grad_norm": 0.1533687710762024, "learning_rate": 4.09915524767096e-06, "loss": 0.8723, "step": 124450 }, { "epoch": 0.9009243776556856, "grad_norm": 0.16112767159938812, "learning_rate": 4.0990828610103734e-06, "loss": 0.8559, "step": 124460 }, { "epoch": 0.9009967643162718, "grad_norm": 0.15855056047439575, "learning_rate": 4.099010474349787e-06, "loss": 0.8793, "step": 124470 }, { "epoch": 0.901069150976858, "grad_norm": 0.15525835752487183, "learning_rate": 4.098938087689201e-06, "loss": 0.8706, "step": 124480 }, { "epoch": 0.9011415376374442, "grad_norm": 0.14787094295024872, "learning_rate": 4.098865701028615e-06, "loss": 0.8685, "step": 124490 }, { "epoch": 0.9012139242980304, "grad_norm": 0.17414870858192444, "learning_rate": 4.098793314368029e-06, "loss": 0.8715, "step": 124500 }, { "epoch": 0.9012863109586166, "grad_norm": 0.15117833018302917, "learning_rate": 4.098720927707442e-06, "loss": 0.8646, "step": 124510 }, { "epoch": 0.9013586976192027, "grad_norm": 0.22992941737174988, "learning_rate": 4.098648541046856e-06, "loss": 0.8764, "step": 124520 }, { "epoch": 0.9014310842797889, "grad_norm": 0.15416789054870605, "learning_rate": 4.0985761543862705e-06, "loss": 0.881, "step": 124530 }, { "epoch": 0.9015034709403751, "grad_norm": 0.17136387526988983, "learning_rate": 4.098503767725684e-06, "loss": 0.8801, "step": 124540 }, { "epoch": 0.9015758576009613, "grad_norm": 0.1540147364139557, "learning_rate": 4.098431381065098e-06, "loss": 0.8798, "step": 124550 }, { "epoch": 0.9016482442615474, "grad_norm": 0.15776564180850983, "learning_rate": 4.098358994404511e-06, "loss": 0.8718, "step": 124560 }, { "epoch": 0.9017206309221336, "grad_norm": 0.15084987878799438, "learning_rate": 4.098286607743926e-06, "loss": 0.8809, "step": 124570 }, { "epoch": 0.9017930175827199, "grad_norm": 0.15924064815044403, "learning_rate": 4.098214221083339e-06, "loss": 0.8815, "step": 124580 }, { "epoch": 0.9018654042433061, "grad_norm": 0.14319369196891785, "learning_rate": 4.098141834422753e-06, "loss": 0.8619, "step": 124590 }, { "epoch": 0.9019377909038923, "grad_norm": 0.1575179100036621, "learning_rate": 4.098069447762167e-06, "loss": 0.8651, "step": 124600 }, { "epoch": 0.9020101775644784, "grad_norm": 0.1562921404838562, "learning_rate": 4.097997061101581e-06, "loss": 0.8687, "step": 124610 }, { "epoch": 0.9020825642250646, "grad_norm": 0.1615796834230423, "learning_rate": 4.097924674440995e-06, "loss": 0.8539, "step": 124620 }, { "epoch": 0.9021549508856508, "grad_norm": 0.14882075786590576, "learning_rate": 4.097852287780408e-06, "loss": 0.8667, "step": 124630 }, { "epoch": 0.902227337546237, "grad_norm": 0.15828663110733032, "learning_rate": 4.097779901119822e-06, "loss": 0.8749, "step": 124640 }, { "epoch": 0.9022997242068231, "grad_norm": 0.15658116340637207, "learning_rate": 4.097707514459236e-06, "loss": 0.8891, "step": 124650 }, { "epoch": 0.9023721108674093, "grad_norm": 0.15174657106399536, "learning_rate": 4.09763512779865e-06, "loss": 0.8691, "step": 124660 }, { "epoch": 0.9024444975279955, "grad_norm": 0.14801457524299622, "learning_rate": 4.097562741138064e-06, "loss": 0.8774, "step": 124670 }, { "epoch": 0.9025168841885818, "grad_norm": 0.17308656871318817, "learning_rate": 4.097490354477477e-06, "loss": 0.8792, "step": 124680 }, { "epoch": 0.902589270849168, "grad_norm": 0.13914184272289276, "learning_rate": 4.097417967816891e-06, "loss": 0.8703, "step": 124690 }, { "epoch": 0.9026616575097541, "grad_norm": 0.16653895378112793, "learning_rate": 4.097345581156305e-06, "loss": 0.8584, "step": 124700 }, { "epoch": 0.9027340441703403, "grad_norm": 0.15545400977134705, "learning_rate": 4.097273194495719e-06, "loss": 0.8746, "step": 124710 }, { "epoch": 0.9028064308309265, "grad_norm": 0.15247924625873566, "learning_rate": 4.0972008078351325e-06, "loss": 0.8756, "step": 124720 }, { "epoch": 0.9028788174915127, "grad_norm": 0.15128053724765778, "learning_rate": 4.097128421174546e-06, "loss": 0.8757, "step": 124730 }, { "epoch": 0.9029512041520988, "grad_norm": 0.1656970977783203, "learning_rate": 4.09705603451396e-06, "loss": 0.864, "step": 124740 }, { "epoch": 0.903023590812685, "grad_norm": 0.15432408452033997, "learning_rate": 4.096983647853373e-06, "loss": 0.8673, "step": 124750 }, { "epoch": 0.9030959774732712, "grad_norm": 0.14504657685756683, "learning_rate": 4.096911261192788e-06, "loss": 0.8722, "step": 124760 }, { "epoch": 0.9031683641338574, "grad_norm": 0.181019127368927, "learning_rate": 4.0968388745322015e-06, "loss": 0.8777, "step": 124770 }, { "epoch": 0.9032407507944435, "grad_norm": 0.1841985583305359, "learning_rate": 4.096766487871615e-06, "loss": 0.8764, "step": 124780 }, { "epoch": 0.9033131374550298, "grad_norm": 0.14566998183727264, "learning_rate": 4.096694101211029e-06, "loss": 0.8788, "step": 124790 }, { "epoch": 0.903385524115616, "grad_norm": 0.15821722149848938, "learning_rate": 4.096621714550443e-06, "loss": 0.8675, "step": 124800 }, { "epoch": 0.9034579107762022, "grad_norm": 0.14918698370456696, "learning_rate": 4.096549327889857e-06, "loss": 0.8612, "step": 124810 }, { "epoch": 0.9035302974367884, "grad_norm": 0.16349859535694122, "learning_rate": 4.09647694122927e-06, "loss": 0.8707, "step": 124820 }, { "epoch": 0.9036026840973745, "grad_norm": 0.15375465154647827, "learning_rate": 4.096404554568684e-06, "loss": 0.8806, "step": 124830 }, { "epoch": 0.9036750707579607, "grad_norm": 0.152941033244133, "learning_rate": 4.0963321679080985e-06, "loss": 0.881, "step": 124840 }, { "epoch": 0.9037474574185469, "grad_norm": 0.22806885838508606, "learning_rate": 4.096259781247512e-06, "loss": 0.8626, "step": 124850 }, { "epoch": 0.9038198440791331, "grad_norm": 0.15706026554107666, "learning_rate": 4.096187394586926e-06, "loss": 0.8711, "step": 124860 }, { "epoch": 0.9038922307397192, "grad_norm": 0.1524626761674881, "learning_rate": 4.096115007926339e-06, "loss": 0.8779, "step": 124870 }, { "epoch": 0.9039646174003054, "grad_norm": 0.16618412733078003, "learning_rate": 4.096042621265753e-06, "loss": 0.8715, "step": 124880 }, { "epoch": 0.9040370040608917, "grad_norm": 0.1479671746492386, "learning_rate": 4.095970234605167e-06, "loss": 0.8798, "step": 124890 }, { "epoch": 0.9041093907214779, "grad_norm": 0.22761158645153046, "learning_rate": 4.095897847944581e-06, "loss": 0.8708, "step": 124900 }, { "epoch": 0.9041817773820641, "grad_norm": 0.15918904542922974, "learning_rate": 4.095825461283995e-06, "loss": 0.8679, "step": 124910 }, { "epoch": 0.9042541640426502, "grad_norm": 0.16984111070632935, "learning_rate": 4.095753074623408e-06, "loss": 0.8686, "step": 124920 }, { "epoch": 0.9043265507032364, "grad_norm": 0.15530432760715485, "learning_rate": 4.095680687962823e-06, "loss": 0.8733, "step": 124930 }, { "epoch": 0.9043989373638226, "grad_norm": 0.15321482717990875, "learning_rate": 4.095608301302236e-06, "loss": 0.868, "step": 124940 }, { "epoch": 0.9044713240244088, "grad_norm": 0.15977735817432404, "learning_rate": 4.09553591464165e-06, "loss": 0.8704, "step": 124950 }, { "epoch": 0.904543710684995, "grad_norm": 0.14585714042186737, "learning_rate": 4.0954635279810636e-06, "loss": 0.8688, "step": 124960 }, { "epoch": 0.9046160973455811, "grad_norm": 0.16191209852695465, "learning_rate": 4.095391141320478e-06, "loss": 0.8638, "step": 124970 }, { "epoch": 0.9046884840061673, "grad_norm": 0.27564188838005066, "learning_rate": 4.095318754659892e-06, "loss": 0.8708, "step": 124980 }, { "epoch": 0.9047608706667535, "grad_norm": 0.14919497072696686, "learning_rate": 4.095246367999305e-06, "loss": 0.8662, "step": 124990 }, { "epoch": 0.9048332573273398, "grad_norm": 0.1528129130601883, "learning_rate": 4.095173981338719e-06, "loss": 0.8759, "step": 125000 }, { "epoch": 0.904905643987926, "grad_norm": 0.15867576003074646, "learning_rate": 4.095101594678133e-06, "loss": 0.888, "step": 125010 }, { "epoch": 0.9049780306485121, "grad_norm": 0.16656945645809174, "learning_rate": 4.095029208017547e-06, "loss": 0.8661, "step": 125020 }, { "epoch": 0.9050504173090983, "grad_norm": 0.1601569950580597, "learning_rate": 4.094956821356961e-06, "loss": 0.8645, "step": 125030 }, { "epoch": 0.9051228039696845, "grad_norm": 0.1524617075920105, "learning_rate": 4.094884434696374e-06, "loss": 0.8727, "step": 125040 }, { "epoch": 0.9051951906302707, "grad_norm": 0.15600845217704773, "learning_rate": 4.094812048035789e-06, "loss": 0.8757, "step": 125050 }, { "epoch": 0.9052675772908568, "grad_norm": 0.15215645730495453, "learning_rate": 4.094739661375202e-06, "loss": 0.8556, "step": 125060 }, { "epoch": 0.905339963951443, "grad_norm": 0.14857865869998932, "learning_rate": 4.094667274714616e-06, "loss": 0.8629, "step": 125070 }, { "epoch": 0.9054123506120292, "grad_norm": 0.15590626001358032, "learning_rate": 4.0945948880540295e-06, "loss": 0.8733, "step": 125080 }, { "epoch": 0.9054847372726154, "grad_norm": 0.15361756086349487, "learning_rate": 4.094522501393444e-06, "loss": 0.8775, "step": 125090 }, { "epoch": 0.9055571239332015, "grad_norm": 0.14809496700763702, "learning_rate": 4.094450114732858e-06, "loss": 0.8752, "step": 125100 }, { "epoch": 0.9056295105937878, "grad_norm": 0.1699819564819336, "learning_rate": 4.094377728072271e-06, "loss": 0.8768, "step": 125110 }, { "epoch": 0.905701897254374, "grad_norm": 0.3279818892478943, "learning_rate": 4.094305341411685e-06, "loss": 0.8726, "step": 125120 }, { "epoch": 0.9057742839149602, "grad_norm": 0.14810748398303986, "learning_rate": 4.094232954751099e-06, "loss": 0.874, "step": 125130 }, { "epoch": 0.9058466705755464, "grad_norm": 0.14194615185260773, "learning_rate": 4.094160568090513e-06, "loss": 0.8608, "step": 125140 }, { "epoch": 0.9059190572361325, "grad_norm": 0.1805509328842163, "learning_rate": 4.0940881814299265e-06, "loss": 0.8603, "step": 125150 }, { "epoch": 0.9059914438967187, "grad_norm": 0.15983721613883972, "learning_rate": 4.09401579476934e-06, "loss": 0.8724, "step": 125160 }, { "epoch": 0.9060638305573049, "grad_norm": 0.15340054035186768, "learning_rate": 4.093943408108755e-06, "loss": 0.8659, "step": 125170 }, { "epoch": 0.9061362172178911, "grad_norm": 0.14701798558235168, "learning_rate": 4.093871021448168e-06, "loss": 0.8699, "step": 125180 }, { "epoch": 0.9062086038784772, "grad_norm": 0.14838139712810516, "learning_rate": 4.093798634787582e-06, "loss": 0.8712, "step": 125190 }, { "epoch": 0.9062809905390634, "grad_norm": 0.1553456038236618, "learning_rate": 4.0937262481269954e-06, "loss": 0.8742, "step": 125200 }, { "epoch": 0.9063533771996497, "grad_norm": 0.15716488659381866, "learning_rate": 4.09365386146641e-06, "loss": 0.8726, "step": 125210 }, { "epoch": 0.9064257638602359, "grad_norm": 0.15702371299266815, "learning_rate": 4.0935814748058235e-06, "loss": 0.8847, "step": 125220 }, { "epoch": 0.906498150520822, "grad_norm": 0.16816936433315277, "learning_rate": 4.093509088145237e-06, "loss": 0.8732, "step": 125230 }, { "epoch": 0.9065705371814082, "grad_norm": 0.14697718620300293, "learning_rate": 4.093436701484651e-06, "loss": 0.8725, "step": 125240 }, { "epoch": 0.9066429238419944, "grad_norm": 0.1436859369277954, "learning_rate": 4.093364314824065e-06, "loss": 0.8681, "step": 125250 }, { "epoch": 0.9067153105025806, "grad_norm": 0.1546768695116043, "learning_rate": 4.093291928163479e-06, "loss": 0.8904, "step": 125260 }, { "epoch": 0.9067876971631668, "grad_norm": 0.16405488550662994, "learning_rate": 4.093219541502892e-06, "loss": 0.8736, "step": 125270 }, { "epoch": 0.9068600838237529, "grad_norm": 0.15231408178806305, "learning_rate": 4.093147154842306e-06, "loss": 0.8566, "step": 125280 }, { "epoch": 0.9069324704843391, "grad_norm": 0.15230032801628113, "learning_rate": 4.09307476818172e-06, "loss": 0.8713, "step": 125290 }, { "epoch": 0.9070048571449253, "grad_norm": 0.15494680404663086, "learning_rate": 4.093002381521133e-06, "loss": 0.8725, "step": 125300 }, { "epoch": 0.9070772438055115, "grad_norm": 0.17046596109867096, "learning_rate": 4.092929994860547e-06, "loss": 0.8698, "step": 125310 }, { "epoch": 0.9071496304660978, "grad_norm": 0.16179198026657104, "learning_rate": 4.092857608199961e-06, "loss": 0.8697, "step": 125320 }, { "epoch": 0.9072220171266839, "grad_norm": 0.15248672664165497, "learning_rate": 4.092785221539375e-06, "loss": 0.8528, "step": 125330 }, { "epoch": 0.9072944037872701, "grad_norm": 0.1498788446187973, "learning_rate": 4.092712834878789e-06, "loss": 0.8753, "step": 125340 }, { "epoch": 0.9073667904478563, "grad_norm": 0.1529855877161026, "learning_rate": 4.092640448218202e-06, "loss": 0.8749, "step": 125350 }, { "epoch": 0.9074391771084425, "grad_norm": 0.154828742146492, "learning_rate": 4.092568061557617e-06, "loss": 0.8729, "step": 125360 }, { "epoch": 0.9075115637690286, "grad_norm": 0.15640468895435333, "learning_rate": 4.09249567489703e-06, "loss": 0.8613, "step": 125370 }, { "epoch": 0.9075839504296148, "grad_norm": 0.16197548806667328, "learning_rate": 4.092423288236444e-06, "loss": 0.8664, "step": 125380 }, { "epoch": 0.907656337090201, "grad_norm": 0.15120939910411835, "learning_rate": 4.0923509015758575e-06, "loss": 0.8811, "step": 125390 }, { "epoch": 0.9077287237507872, "grad_norm": 0.14062069356441498, "learning_rate": 4.092278514915272e-06, "loss": 0.8531, "step": 125400 }, { "epoch": 0.9078011104113733, "grad_norm": 0.15563814342021942, "learning_rate": 4.092206128254686e-06, "loss": 0.8861, "step": 125410 }, { "epoch": 0.9078734970719596, "grad_norm": 0.16093406081199646, "learning_rate": 4.092133741594099e-06, "loss": 0.8706, "step": 125420 }, { "epoch": 0.9079458837325458, "grad_norm": 0.15003357827663422, "learning_rate": 4.092061354933513e-06, "loss": 0.8709, "step": 125430 }, { "epoch": 0.908018270393132, "grad_norm": 0.16230422258377075, "learning_rate": 4.091988968272927e-06, "loss": 0.8653, "step": 125440 }, { "epoch": 0.9080906570537182, "grad_norm": 0.14883364737033844, "learning_rate": 4.091916581612341e-06, "loss": 0.8632, "step": 125450 }, { "epoch": 0.9081630437143043, "grad_norm": 0.15273550152778625, "learning_rate": 4.0918441949517545e-06, "loss": 0.8826, "step": 125460 }, { "epoch": 0.9082354303748905, "grad_norm": 0.1566764861345291, "learning_rate": 4.091771808291168e-06, "loss": 0.8754, "step": 125470 }, { "epoch": 0.9083078170354767, "grad_norm": 0.17311665415763855, "learning_rate": 4.091699421630582e-06, "loss": 0.8671, "step": 125480 }, { "epoch": 0.9083802036960629, "grad_norm": 0.15010863542556763, "learning_rate": 4.091627034969996e-06, "loss": 0.8626, "step": 125490 }, { "epoch": 0.908452590356649, "grad_norm": 0.1590012162923813, "learning_rate": 4.09155464830941e-06, "loss": 0.8547, "step": 125500 }, { "epoch": 0.9085249770172352, "grad_norm": 0.6873774528503418, "learning_rate": 4.0914822616488235e-06, "loss": 0.8869, "step": 125510 }, { "epoch": 0.9085973636778214, "grad_norm": 0.1598593294620514, "learning_rate": 4.091409874988237e-06, "loss": 0.8807, "step": 125520 }, { "epoch": 0.9086697503384077, "grad_norm": 0.1523318886756897, "learning_rate": 4.0913374883276516e-06, "loss": 0.8734, "step": 125530 }, { "epoch": 0.9087421369989939, "grad_norm": 0.15472367405891418, "learning_rate": 4.091265101667065e-06, "loss": 0.8717, "step": 125540 }, { "epoch": 0.90881452365958, "grad_norm": 0.15248602628707886, "learning_rate": 4.091192715006479e-06, "loss": 0.8718, "step": 125550 }, { "epoch": 0.9088869103201662, "grad_norm": 0.15343599021434784, "learning_rate": 4.091120328345892e-06, "loss": 0.878, "step": 125560 }, { "epoch": 0.9089592969807524, "grad_norm": 0.16457286477088928, "learning_rate": 4.091047941685307e-06, "loss": 0.8691, "step": 125570 }, { "epoch": 0.9090316836413386, "grad_norm": 0.15641097724437714, "learning_rate": 4.0909755550247205e-06, "loss": 0.8795, "step": 125580 }, { "epoch": 0.9091040703019247, "grad_norm": 0.18575336039066315, "learning_rate": 4.090903168364134e-06, "loss": 0.8739, "step": 125590 }, { "epoch": 0.9091764569625109, "grad_norm": 0.1508977711200714, "learning_rate": 4.090830781703548e-06, "loss": 0.8898, "step": 125600 }, { "epoch": 0.9092488436230971, "grad_norm": 0.14920225739479065, "learning_rate": 4.090758395042962e-06, "loss": 0.8587, "step": 125610 }, { "epoch": 0.9093212302836833, "grad_norm": 0.15833619236946106, "learning_rate": 4.090686008382376e-06, "loss": 0.8692, "step": 125620 }, { "epoch": 0.9093936169442695, "grad_norm": 0.15184055268764496, "learning_rate": 4.090613621721789e-06, "loss": 0.8775, "step": 125630 }, { "epoch": 0.9094660036048557, "grad_norm": 0.14898662269115448, "learning_rate": 4.090541235061203e-06, "loss": 0.8754, "step": 125640 }, { "epoch": 0.9095383902654419, "grad_norm": 0.16337692737579346, "learning_rate": 4.0904688484006175e-06, "loss": 0.875, "step": 125650 }, { "epoch": 0.9096107769260281, "grad_norm": 0.15499453246593475, "learning_rate": 4.090396461740031e-06, "loss": 0.8584, "step": 125660 }, { "epoch": 0.9096831635866143, "grad_norm": 0.15772931277751923, "learning_rate": 4.090324075079445e-06, "loss": 0.8766, "step": 125670 }, { "epoch": 0.9097555502472005, "grad_norm": 0.14884252846240997, "learning_rate": 4.090251688418858e-06, "loss": 0.8841, "step": 125680 }, { "epoch": 0.9098279369077866, "grad_norm": 0.14398325979709625, "learning_rate": 4.090179301758273e-06, "loss": 0.876, "step": 125690 }, { "epoch": 0.9099003235683728, "grad_norm": 0.1522645205259323, "learning_rate": 4.090106915097686e-06, "loss": 0.8766, "step": 125700 }, { "epoch": 0.909972710228959, "grad_norm": 0.18375910818576813, "learning_rate": 4.0900345284371e-06, "loss": 0.8715, "step": 125710 }, { "epoch": 0.9100450968895452, "grad_norm": 0.1611146479845047, "learning_rate": 4.089962141776514e-06, "loss": 0.8661, "step": 125720 }, { "epoch": 0.9101174835501313, "grad_norm": 0.2902297079563141, "learning_rate": 4.089889755115928e-06, "loss": 0.8631, "step": 125730 }, { "epoch": 0.9101898702107176, "grad_norm": 0.18060381710529327, "learning_rate": 4.089817368455342e-06, "loss": 0.8654, "step": 125740 }, { "epoch": 0.9102622568713038, "grad_norm": 0.15165190398693085, "learning_rate": 4.089744981794755e-06, "loss": 0.8734, "step": 125750 }, { "epoch": 0.91033464353189, "grad_norm": 0.16775788366794586, "learning_rate": 4.089672595134169e-06, "loss": 0.8643, "step": 125760 }, { "epoch": 0.9104070301924762, "grad_norm": 0.1607564091682434, "learning_rate": 4.0896002084735834e-06, "loss": 0.8653, "step": 125770 }, { "epoch": 0.9104794168530623, "grad_norm": 0.15660177171230316, "learning_rate": 4.089527821812997e-06, "loss": 0.8727, "step": 125780 }, { "epoch": 0.9105518035136485, "grad_norm": 0.14147257804870605, "learning_rate": 4.089455435152411e-06, "loss": 0.8755, "step": 125790 }, { "epoch": 0.9106241901742347, "grad_norm": 0.1649179309606552, "learning_rate": 4.089383048491824e-06, "loss": 0.87, "step": 125800 }, { "epoch": 0.9106965768348209, "grad_norm": 0.15854406356811523, "learning_rate": 4.089310661831238e-06, "loss": 0.8649, "step": 125810 }, { "epoch": 0.910768963495407, "grad_norm": 0.16019472479820251, "learning_rate": 4.0892382751706515e-06, "loss": 0.8806, "step": 125820 }, { "epoch": 0.9108413501559932, "grad_norm": 0.16454534232616425, "learning_rate": 4.089165888510065e-06, "loss": 0.874, "step": 125830 }, { "epoch": 0.9109137368165794, "grad_norm": 0.14616376161575317, "learning_rate": 4.08909350184948e-06, "loss": 0.8781, "step": 125840 }, { "epoch": 0.9109861234771657, "grad_norm": 0.16948239505290985, "learning_rate": 4.089021115188893e-06, "loss": 0.8746, "step": 125850 }, { "epoch": 0.9110585101377519, "grad_norm": 0.1634904146194458, "learning_rate": 4.088948728528307e-06, "loss": 0.8729, "step": 125860 }, { "epoch": 0.911130896798338, "grad_norm": 0.16780352592468262, "learning_rate": 4.0888763418677204e-06, "loss": 0.8731, "step": 125870 }, { "epoch": 0.9112032834589242, "grad_norm": 0.1554594188928604, "learning_rate": 4.088803955207135e-06, "loss": 0.8751, "step": 125880 }, { "epoch": 0.9112756701195104, "grad_norm": 0.1543656289577484, "learning_rate": 4.0887315685465485e-06, "loss": 0.8811, "step": 125890 }, { "epoch": 0.9113480567800966, "grad_norm": 0.14425186812877655, "learning_rate": 4.088659181885962e-06, "loss": 0.8709, "step": 125900 }, { "epoch": 0.9114204434406827, "grad_norm": 0.15529589354991913, "learning_rate": 4.088586795225376e-06, "loss": 0.8594, "step": 125910 }, { "epoch": 0.9114928301012689, "grad_norm": 0.1789928823709488, "learning_rate": 4.08851440856479e-06, "loss": 0.867, "step": 125920 }, { "epoch": 0.9115652167618551, "grad_norm": 0.15948614478111267, "learning_rate": 4.088442021904204e-06, "loss": 0.8622, "step": 125930 }, { "epoch": 0.9116376034224413, "grad_norm": 0.18289946019649506, "learning_rate": 4.0883696352436174e-06, "loss": 0.8776, "step": 125940 }, { "epoch": 0.9117099900830276, "grad_norm": 0.16194994747638702, "learning_rate": 4.088297248583031e-06, "loss": 0.8787, "step": 125950 }, { "epoch": 0.9117823767436137, "grad_norm": 0.15668463706970215, "learning_rate": 4.0882248619224455e-06, "loss": 0.8665, "step": 125960 }, { "epoch": 0.9118547634041999, "grad_norm": 0.14782801270484924, "learning_rate": 4.088152475261859e-06, "loss": 0.8671, "step": 125970 }, { "epoch": 0.9119271500647861, "grad_norm": 0.14951111376285553, "learning_rate": 4.088080088601273e-06, "loss": 0.8714, "step": 125980 }, { "epoch": 0.9119995367253723, "grad_norm": 0.15753866732120514, "learning_rate": 4.088007701940686e-06, "loss": 0.8804, "step": 125990 }, { "epoch": 0.9120719233859584, "grad_norm": 0.1543973982334137, "learning_rate": 4.087935315280101e-06, "loss": 0.8618, "step": 126000 }, { "epoch": 0.9121443100465446, "grad_norm": 0.16389751434326172, "learning_rate": 4.0878629286195145e-06, "loss": 0.865, "step": 126010 }, { "epoch": 0.9122166967071308, "grad_norm": 0.15064431726932526, "learning_rate": 4.087790541958928e-06, "loss": 0.8691, "step": 126020 }, { "epoch": 0.912289083367717, "grad_norm": 0.15450918674468994, "learning_rate": 4.087718155298342e-06, "loss": 0.8802, "step": 126030 }, { "epoch": 0.9123614700283031, "grad_norm": 0.15472367405891418, "learning_rate": 4.087645768637756e-06, "loss": 0.8812, "step": 126040 }, { "epoch": 0.9124338566888893, "grad_norm": 0.16118258237838745, "learning_rate": 4.08757338197717e-06, "loss": 0.8693, "step": 126050 }, { "epoch": 0.9125062433494756, "grad_norm": 0.21667267382144928, "learning_rate": 4.087500995316583e-06, "loss": 0.8782, "step": 126060 }, { "epoch": 0.9125786300100618, "grad_norm": 0.1615353375673294, "learning_rate": 4.087428608655997e-06, "loss": 0.8707, "step": 126070 }, { "epoch": 0.912651016670648, "grad_norm": 0.15932497382164001, "learning_rate": 4.0873562219954115e-06, "loss": 0.8674, "step": 126080 }, { "epoch": 0.9127234033312341, "grad_norm": 0.14429889619350433, "learning_rate": 4.087283835334825e-06, "loss": 0.8676, "step": 126090 }, { "epoch": 0.9127957899918203, "grad_norm": 0.1498817354440689, "learning_rate": 4.087211448674239e-06, "loss": 0.8839, "step": 126100 }, { "epoch": 0.9128681766524065, "grad_norm": 0.1536143273115158, "learning_rate": 4.087139062013652e-06, "loss": 0.866, "step": 126110 }, { "epoch": 0.9129405633129927, "grad_norm": 0.14300012588500977, "learning_rate": 4.087066675353066e-06, "loss": 0.8694, "step": 126120 }, { "epoch": 0.9130129499735788, "grad_norm": 0.15428438782691956, "learning_rate": 4.08699428869248e-06, "loss": 0.8779, "step": 126130 }, { "epoch": 0.913085336634165, "grad_norm": 0.17595326900482178, "learning_rate": 4.086921902031894e-06, "loss": 0.8634, "step": 126140 }, { "epoch": 0.9131577232947512, "grad_norm": 0.15784429013729095, "learning_rate": 4.086849515371308e-06, "loss": 0.8764, "step": 126150 }, { "epoch": 0.9132301099553374, "grad_norm": 0.15594114363193512, "learning_rate": 4.086777128710721e-06, "loss": 0.8722, "step": 126160 }, { "epoch": 0.9133024966159237, "grad_norm": 0.14309413731098175, "learning_rate": 4.086704742050136e-06, "loss": 0.8726, "step": 126170 }, { "epoch": 0.9133748832765098, "grad_norm": 0.15265756845474243, "learning_rate": 4.086632355389549e-06, "loss": 0.8784, "step": 126180 }, { "epoch": 0.913447269937096, "grad_norm": 0.20048628747463226, "learning_rate": 4.086559968728963e-06, "loss": 0.8717, "step": 126190 }, { "epoch": 0.9135196565976822, "grad_norm": 0.15623369812965393, "learning_rate": 4.0864875820683765e-06, "loss": 0.8691, "step": 126200 }, { "epoch": 0.9135920432582684, "grad_norm": 0.1530992090702057, "learning_rate": 4.086415195407791e-06, "loss": 0.8678, "step": 126210 }, { "epoch": 0.9136644299188545, "grad_norm": 0.15348680317401886, "learning_rate": 4.086342808747205e-06, "loss": 0.8609, "step": 126220 }, { "epoch": 0.9137368165794407, "grad_norm": 0.1623658686876297, "learning_rate": 4.086270422086618e-06, "loss": 0.874, "step": 126230 }, { "epoch": 0.9138092032400269, "grad_norm": 0.16523189842700958, "learning_rate": 4.086198035426032e-06, "loss": 0.8834, "step": 126240 }, { "epoch": 0.9138815899006131, "grad_norm": 0.148997500538826, "learning_rate": 4.086125648765446e-06, "loss": 0.8689, "step": 126250 }, { "epoch": 0.9139539765611993, "grad_norm": 0.15621952712535858, "learning_rate": 4.08605326210486e-06, "loss": 0.8786, "step": 126260 }, { "epoch": 0.9140263632217855, "grad_norm": 0.14902496337890625, "learning_rate": 4.0859808754442736e-06, "loss": 0.87, "step": 126270 }, { "epoch": 0.9140987498823717, "grad_norm": 0.15417183935642242, "learning_rate": 4.085908488783687e-06, "loss": 0.8674, "step": 126280 }, { "epoch": 0.9141711365429579, "grad_norm": 0.19318684935569763, "learning_rate": 4.085836102123102e-06, "loss": 0.858, "step": 126290 }, { "epoch": 0.9142435232035441, "grad_norm": 0.15379764139652252, "learning_rate": 4.085763715462515e-06, "loss": 0.8635, "step": 126300 }, { "epoch": 0.9143159098641302, "grad_norm": 0.37275516986846924, "learning_rate": 4.085691328801929e-06, "loss": 0.8751, "step": 126310 }, { "epoch": 0.9143882965247164, "grad_norm": 0.1587948054075241, "learning_rate": 4.0856189421413425e-06, "loss": 0.8655, "step": 126320 }, { "epoch": 0.9144606831853026, "grad_norm": 0.15148475766181946, "learning_rate": 4.085546555480756e-06, "loss": 0.863, "step": 126330 }, { "epoch": 0.9145330698458888, "grad_norm": 0.16496942937374115, "learning_rate": 4.08547416882017e-06, "loss": 0.8457, "step": 126340 }, { "epoch": 0.914605456506475, "grad_norm": 0.15391036868095398, "learning_rate": 4.085401782159583e-06, "loss": 0.8728, "step": 126350 }, { "epoch": 0.9146778431670611, "grad_norm": 0.15899689495563507, "learning_rate": 4.085329395498998e-06, "loss": 0.8644, "step": 126360 }, { "epoch": 0.9147502298276473, "grad_norm": 0.1451529711484909, "learning_rate": 4.085257008838411e-06, "loss": 0.871, "step": 126370 }, { "epoch": 0.9148226164882336, "grad_norm": 0.14316491782665253, "learning_rate": 4.085184622177825e-06, "loss": 0.8741, "step": 126380 }, { "epoch": 0.9148950031488198, "grad_norm": 0.1564132571220398, "learning_rate": 4.085112235517239e-06, "loss": 0.8763, "step": 126390 }, { "epoch": 0.914967389809406, "grad_norm": 0.16010133922100067, "learning_rate": 4.085039848856653e-06, "loss": 0.8729, "step": 126400 }, { "epoch": 0.9150397764699921, "grad_norm": 0.15824563801288605, "learning_rate": 4.084967462196067e-06, "loss": 0.8694, "step": 126410 }, { "epoch": 0.9151121631305783, "grad_norm": 0.16496235132217407, "learning_rate": 4.08489507553548e-06, "loss": 0.886, "step": 126420 }, { "epoch": 0.9151845497911645, "grad_norm": 0.15750516951084137, "learning_rate": 4.084822688874894e-06, "loss": 0.8668, "step": 126430 }, { "epoch": 0.9152569364517507, "grad_norm": 0.1529252976179123, "learning_rate": 4.084750302214308e-06, "loss": 0.8663, "step": 126440 }, { "epoch": 0.9153293231123368, "grad_norm": 0.15626297891139984, "learning_rate": 4.084677915553722e-06, "loss": 0.8725, "step": 126450 }, { "epoch": 0.915401709772923, "grad_norm": 0.15462850034236908, "learning_rate": 4.084605528893136e-06, "loss": 0.8787, "step": 126460 }, { "epoch": 0.9154740964335092, "grad_norm": 0.15536385774612427, "learning_rate": 4.084533142232549e-06, "loss": 0.8707, "step": 126470 }, { "epoch": 0.9155464830940954, "grad_norm": 0.1599668562412262, "learning_rate": 4.084460755571964e-06, "loss": 0.8703, "step": 126480 }, { "epoch": 0.9156188697546817, "grad_norm": 0.1455414891242981, "learning_rate": 4.084388368911377e-06, "loss": 0.8686, "step": 126490 }, { "epoch": 0.9156912564152678, "grad_norm": 0.15002378821372986, "learning_rate": 4.084315982250791e-06, "loss": 0.8834, "step": 126500 }, { "epoch": 0.915763643075854, "grad_norm": 0.16886621713638306, "learning_rate": 4.084243595590205e-06, "loss": 0.8663, "step": 126510 }, { "epoch": 0.9158360297364402, "grad_norm": 0.15072478353977203, "learning_rate": 4.084171208929619e-06, "loss": 0.8776, "step": 126520 }, { "epoch": 0.9159084163970264, "grad_norm": 0.18889220058918, "learning_rate": 4.084098822269033e-06, "loss": 0.8762, "step": 126530 }, { "epoch": 0.9159808030576125, "grad_norm": 0.1520804464817047, "learning_rate": 4.084026435608446e-06, "loss": 0.8669, "step": 126540 }, { "epoch": 0.9160531897181987, "grad_norm": 0.16211113333702087, "learning_rate": 4.08395404894786e-06, "loss": 0.8734, "step": 126550 }, { "epoch": 0.9161255763787849, "grad_norm": 0.21941564977169037, "learning_rate": 4.083881662287274e-06, "loss": 0.876, "step": 126560 }, { "epoch": 0.9161979630393711, "grad_norm": 0.14378584921360016, "learning_rate": 4.083809275626688e-06, "loss": 0.8728, "step": 126570 }, { "epoch": 0.9162703496999572, "grad_norm": 0.14332708716392517, "learning_rate": 4.083736888966102e-06, "loss": 0.8764, "step": 126580 }, { "epoch": 0.9163427363605435, "grad_norm": 0.14835390448570251, "learning_rate": 4.083664502305515e-06, "loss": 0.8651, "step": 126590 }, { "epoch": 0.9164151230211297, "grad_norm": 0.1542014479637146, "learning_rate": 4.08359211564493e-06, "loss": 0.8678, "step": 126600 }, { "epoch": 0.9164875096817159, "grad_norm": 0.1536477953195572, "learning_rate": 4.083519728984343e-06, "loss": 0.88, "step": 126610 }, { "epoch": 0.9165598963423021, "grad_norm": 0.16406361758708954, "learning_rate": 4.083447342323757e-06, "loss": 0.8792, "step": 126620 }, { "epoch": 0.9166322830028882, "grad_norm": 0.14225560426712036, "learning_rate": 4.0833749556631705e-06, "loss": 0.878, "step": 126630 }, { "epoch": 0.9167046696634744, "grad_norm": 0.1528317928314209, "learning_rate": 4.083302569002585e-06, "loss": 0.8706, "step": 126640 }, { "epoch": 0.9167770563240606, "grad_norm": 0.1531572937965393, "learning_rate": 4.083230182341999e-06, "loss": 0.8681, "step": 126650 }, { "epoch": 0.9168494429846468, "grad_norm": 0.163166806101799, "learning_rate": 4.083157795681412e-06, "loss": 0.8694, "step": 126660 }, { "epoch": 0.9169218296452329, "grad_norm": 0.1512100100517273, "learning_rate": 4.083085409020826e-06, "loss": 0.8655, "step": 126670 }, { "epoch": 0.9169942163058191, "grad_norm": 0.14937952160835266, "learning_rate": 4.08301302236024e-06, "loss": 0.8743, "step": 126680 }, { "epoch": 0.9170666029664053, "grad_norm": 0.16589467227458954, "learning_rate": 4.082940635699654e-06, "loss": 0.8736, "step": 126690 }, { "epoch": 0.9171389896269916, "grad_norm": 0.15574873983860016, "learning_rate": 4.0828682490390675e-06, "loss": 0.8724, "step": 126700 }, { "epoch": 0.9172113762875778, "grad_norm": 0.15169155597686768, "learning_rate": 4.082795862378481e-06, "loss": 0.8803, "step": 126710 }, { "epoch": 0.9172837629481639, "grad_norm": 0.15095655620098114, "learning_rate": 4.082723475717896e-06, "loss": 0.8695, "step": 126720 }, { "epoch": 0.9173561496087501, "grad_norm": 0.144984170794487, "learning_rate": 4.082651089057309e-06, "loss": 0.8814, "step": 126730 }, { "epoch": 0.9174285362693363, "grad_norm": 0.15031631290912628, "learning_rate": 4.082578702396723e-06, "loss": 0.8549, "step": 126740 }, { "epoch": 0.9175009229299225, "grad_norm": 0.15027374029159546, "learning_rate": 4.0825063157361364e-06, "loss": 0.8781, "step": 126750 }, { "epoch": 0.9175733095905086, "grad_norm": 0.1555909812450409, "learning_rate": 4.08243392907555e-06, "loss": 0.8563, "step": 126760 }, { "epoch": 0.9176456962510948, "grad_norm": 0.1572585105895996, "learning_rate": 4.0823615424149645e-06, "loss": 0.88, "step": 126770 }, { "epoch": 0.917718082911681, "grad_norm": 0.15121687948703766, "learning_rate": 4.082289155754378e-06, "loss": 0.864, "step": 126780 }, { "epoch": 0.9177904695722672, "grad_norm": 0.14316906034946442, "learning_rate": 4.082216769093792e-06, "loss": 0.877, "step": 126790 }, { "epoch": 0.9178628562328535, "grad_norm": 0.1634669452905655, "learning_rate": 4.082144382433205e-06, "loss": 0.8763, "step": 126800 }, { "epoch": 0.9179352428934396, "grad_norm": 0.1625238060951233, "learning_rate": 4.08207199577262e-06, "loss": 0.8826, "step": 126810 }, { "epoch": 0.9180076295540258, "grad_norm": 0.1542353332042694, "learning_rate": 4.0819996091120335e-06, "loss": 0.8715, "step": 126820 }, { "epoch": 0.918080016214612, "grad_norm": 0.156948059797287, "learning_rate": 4.081927222451447e-06, "loss": 0.8703, "step": 126830 }, { "epoch": 0.9181524028751982, "grad_norm": 0.17528340220451355, "learning_rate": 4.081854835790861e-06, "loss": 0.8721, "step": 126840 }, { "epoch": 0.9182247895357843, "grad_norm": 0.15125548839569092, "learning_rate": 4.081782449130275e-06, "loss": 0.8732, "step": 126850 }, { "epoch": 0.9182971761963705, "grad_norm": 0.1517082154750824, "learning_rate": 4.081710062469688e-06, "loss": 0.8781, "step": 126860 }, { "epoch": 0.9183695628569567, "grad_norm": 0.16170631349086761, "learning_rate": 4.081637675809102e-06, "loss": 0.8683, "step": 126870 }, { "epoch": 0.9184419495175429, "grad_norm": 0.15255410969257355, "learning_rate": 4.081565289148516e-06, "loss": 0.8665, "step": 126880 }, { "epoch": 0.918514336178129, "grad_norm": 0.158643901348114, "learning_rate": 4.08149290248793e-06, "loss": 0.8795, "step": 126890 }, { "epoch": 0.9185867228387152, "grad_norm": 0.15852761268615723, "learning_rate": 4.081420515827343e-06, "loss": 0.87, "step": 126900 }, { "epoch": 0.9186591094993015, "grad_norm": 0.2482609897851944, "learning_rate": 4.081348129166757e-06, "loss": 0.8684, "step": 126910 }, { "epoch": 0.9187314961598877, "grad_norm": 0.16423265635967255, "learning_rate": 4.081275742506171e-06, "loss": 0.8796, "step": 126920 }, { "epoch": 0.9188038828204739, "grad_norm": 0.1586569845676422, "learning_rate": 4.081203355845585e-06, "loss": 0.8862, "step": 126930 }, { "epoch": 0.91887626948106, "grad_norm": 0.1470942199230194, "learning_rate": 4.0811309691849985e-06, "loss": 0.8605, "step": 126940 }, { "epoch": 0.9189486561416462, "grad_norm": 0.17311806976795197, "learning_rate": 4.081058582524412e-06, "loss": 0.8771, "step": 126950 }, { "epoch": 0.9190210428022324, "grad_norm": 0.16579419374465942, "learning_rate": 4.080986195863827e-06, "loss": 0.8782, "step": 126960 }, { "epoch": 0.9190934294628186, "grad_norm": 0.1688244491815567, "learning_rate": 4.08091380920324e-06, "loss": 0.8752, "step": 126970 }, { "epoch": 0.9191658161234048, "grad_norm": 0.14942839741706848, "learning_rate": 4.080841422542654e-06, "loss": 0.8588, "step": 126980 }, { "epoch": 0.9192382027839909, "grad_norm": 0.18559300899505615, "learning_rate": 4.0807690358820675e-06, "loss": 0.8744, "step": 126990 }, { "epoch": 0.9193105894445771, "grad_norm": 0.15597479045391083, "learning_rate": 4.080696649221482e-06, "loss": 0.8656, "step": 127000 }, { "epoch": 0.9193829761051633, "grad_norm": 0.15126492083072662, "learning_rate": 4.0806242625608956e-06, "loss": 0.87, "step": 127010 }, { "epoch": 0.9194553627657496, "grad_norm": 0.1641504019498825, "learning_rate": 4.080551875900309e-06, "loss": 0.8796, "step": 127020 }, { "epoch": 0.9195277494263357, "grad_norm": 0.15217795968055725, "learning_rate": 4.080479489239723e-06, "loss": 0.8741, "step": 127030 }, { "epoch": 0.9196001360869219, "grad_norm": 0.16015304625034332, "learning_rate": 4.080407102579137e-06, "loss": 0.8903, "step": 127040 }, { "epoch": 0.9196725227475081, "grad_norm": 0.15204580128192902, "learning_rate": 4.080334715918551e-06, "loss": 0.8619, "step": 127050 }, { "epoch": 0.9197449094080943, "grad_norm": 0.14324235916137695, "learning_rate": 4.0802623292579645e-06, "loss": 0.8609, "step": 127060 }, { "epoch": 0.9198172960686805, "grad_norm": 0.1518729031085968, "learning_rate": 4.080189942597378e-06, "loss": 0.8761, "step": 127070 }, { "epoch": 0.9198896827292666, "grad_norm": 0.15307843685150146, "learning_rate": 4.0801175559367926e-06, "loss": 0.8673, "step": 127080 }, { "epoch": 0.9199620693898528, "grad_norm": 0.14242909848690033, "learning_rate": 4.080045169276206e-06, "loss": 0.8756, "step": 127090 }, { "epoch": 0.920034456050439, "grad_norm": 0.16166019439697266, "learning_rate": 4.07997278261562e-06, "loss": 0.8742, "step": 127100 }, { "epoch": 0.9201068427110252, "grad_norm": 0.15393179655075073, "learning_rate": 4.079900395955033e-06, "loss": 0.8783, "step": 127110 }, { "epoch": 0.9201792293716115, "grad_norm": 0.16669778525829315, "learning_rate": 4.079828009294448e-06, "loss": 0.8653, "step": 127120 }, { "epoch": 0.9202516160321976, "grad_norm": 0.15323293209075928, "learning_rate": 4.0797556226338615e-06, "loss": 0.8681, "step": 127130 }, { "epoch": 0.9203240026927838, "grad_norm": 0.1925155520439148, "learning_rate": 4.079683235973275e-06, "loss": 0.8669, "step": 127140 }, { "epoch": 0.92039638935337, "grad_norm": 0.16045774519443512, "learning_rate": 4.079610849312689e-06, "loss": 0.8688, "step": 127150 }, { "epoch": 0.9204687760139562, "grad_norm": 0.16183599829673767, "learning_rate": 4.079538462652103e-06, "loss": 0.8602, "step": 127160 }, { "epoch": 0.9205411626745423, "grad_norm": 0.15572576224803925, "learning_rate": 4.079466075991517e-06, "loss": 0.8722, "step": 127170 }, { "epoch": 0.9206135493351285, "grad_norm": 0.15877743065357208, "learning_rate": 4.07939368933093e-06, "loss": 0.8783, "step": 127180 }, { "epoch": 0.9206859359957147, "grad_norm": 0.14954781532287598, "learning_rate": 4.079321302670344e-06, "loss": 0.8597, "step": 127190 }, { "epoch": 0.9207583226563009, "grad_norm": 0.1656097173690796, "learning_rate": 4.0792489160097585e-06, "loss": 0.8725, "step": 127200 }, { "epoch": 0.920830709316887, "grad_norm": 0.15802302956581116, "learning_rate": 4.079176529349172e-06, "loss": 0.8739, "step": 127210 }, { "epoch": 0.9209030959774732, "grad_norm": 0.14850866794586182, "learning_rate": 4.079104142688586e-06, "loss": 0.854, "step": 127220 }, { "epoch": 0.9209754826380595, "grad_norm": 0.15718205273151398, "learning_rate": 4.079031756027999e-06, "loss": 0.876, "step": 127230 }, { "epoch": 0.9210478692986457, "grad_norm": 0.15593793988227844, "learning_rate": 4.078959369367414e-06, "loss": 0.8773, "step": 127240 }, { "epoch": 0.9211202559592319, "grad_norm": 0.14483477175235748, "learning_rate": 4.0788869827068274e-06, "loss": 0.8722, "step": 127250 }, { "epoch": 0.921192642619818, "grad_norm": 0.16658411920070648, "learning_rate": 4.078814596046241e-06, "loss": 0.8677, "step": 127260 }, { "epoch": 0.9212650292804042, "grad_norm": 0.1465682089328766, "learning_rate": 4.078742209385655e-06, "loss": 0.8688, "step": 127270 }, { "epoch": 0.9213374159409904, "grad_norm": 0.16307535767555237, "learning_rate": 4.078669822725069e-06, "loss": 0.8704, "step": 127280 }, { "epoch": 0.9214098026015766, "grad_norm": 0.303844153881073, "learning_rate": 4.078597436064483e-06, "loss": 0.8708, "step": 127290 }, { "epoch": 0.9214821892621627, "grad_norm": 0.14299264550209045, "learning_rate": 4.078525049403896e-06, "loss": 0.8653, "step": 127300 }, { "epoch": 0.9215545759227489, "grad_norm": 0.14078418910503387, "learning_rate": 4.07845266274331e-06, "loss": 0.8736, "step": 127310 }, { "epoch": 0.9216269625833351, "grad_norm": 0.14355109632015228, "learning_rate": 4.0783802760827244e-06, "loss": 0.8658, "step": 127320 }, { "epoch": 0.9216993492439214, "grad_norm": 0.18450787663459778, "learning_rate": 4.078307889422138e-06, "loss": 0.8626, "step": 127330 }, { "epoch": 0.9217717359045076, "grad_norm": 0.16404852271080017, "learning_rate": 4.078235502761552e-06, "loss": 0.8701, "step": 127340 }, { "epoch": 0.9218441225650937, "grad_norm": 0.15658390522003174, "learning_rate": 4.078163116100965e-06, "loss": 0.8798, "step": 127350 }, { "epoch": 0.9219165092256799, "grad_norm": 0.2226731926202774, "learning_rate": 4.078090729440379e-06, "loss": 0.8801, "step": 127360 }, { "epoch": 0.9219888958862661, "grad_norm": 0.19657254219055176, "learning_rate": 4.078018342779793e-06, "loss": 0.8749, "step": 127370 }, { "epoch": 0.9220612825468523, "grad_norm": 0.15708957612514496, "learning_rate": 4.077945956119207e-06, "loss": 0.8697, "step": 127380 }, { "epoch": 0.9221336692074384, "grad_norm": 0.16604341566562653, "learning_rate": 4.077873569458621e-06, "loss": 0.8535, "step": 127390 }, { "epoch": 0.9222060558680246, "grad_norm": 0.14287860691547394, "learning_rate": 4.077801182798034e-06, "loss": 0.8556, "step": 127400 }, { "epoch": 0.9222784425286108, "grad_norm": 0.1693638414144516, "learning_rate": 4.077728796137448e-06, "loss": 0.8789, "step": 127410 }, { "epoch": 0.922350829189197, "grad_norm": 0.1491190791130066, "learning_rate": 4.0776564094768614e-06, "loss": 0.863, "step": 127420 }, { "epoch": 0.9224232158497832, "grad_norm": 0.14451493322849274, "learning_rate": 4.077584022816276e-06, "loss": 0.8768, "step": 127430 }, { "epoch": 0.9224956025103694, "grad_norm": 0.155923992395401, "learning_rate": 4.0775116361556895e-06, "loss": 0.8745, "step": 127440 }, { "epoch": 0.9225679891709556, "grad_norm": 0.1764858216047287, "learning_rate": 4.077439249495103e-06, "loss": 0.8792, "step": 127450 }, { "epoch": 0.9226403758315418, "grad_norm": 0.16587840020656586, "learning_rate": 4.077366862834517e-06, "loss": 0.8666, "step": 127460 }, { "epoch": 0.922712762492128, "grad_norm": 0.16507935523986816, "learning_rate": 4.077294476173931e-06, "loss": 0.8765, "step": 127470 }, { "epoch": 0.9227851491527141, "grad_norm": 0.15929803252220154, "learning_rate": 4.077222089513345e-06, "loss": 0.8667, "step": 127480 }, { "epoch": 0.9228575358133003, "grad_norm": 0.15834636986255646, "learning_rate": 4.0771497028527584e-06, "loss": 0.8802, "step": 127490 }, { "epoch": 0.9229299224738865, "grad_norm": 0.1508595049381256, "learning_rate": 4.077077316192172e-06, "loss": 0.8584, "step": 127500 }, { "epoch": 0.9230023091344727, "grad_norm": 0.14436863362789154, "learning_rate": 4.0770049295315865e-06, "loss": 0.8725, "step": 127510 }, { "epoch": 0.9230746957950589, "grad_norm": 0.17195680737495422, "learning_rate": 4.076932542871e-06, "loss": 0.8717, "step": 127520 }, { "epoch": 0.923147082455645, "grad_norm": 0.16129696369171143, "learning_rate": 4.076860156210414e-06, "loss": 0.8821, "step": 127530 }, { "epoch": 0.9232194691162312, "grad_norm": 0.15177470445632935, "learning_rate": 4.076787769549827e-06, "loss": 0.8636, "step": 127540 }, { "epoch": 0.9232918557768175, "grad_norm": 0.16349272429943085, "learning_rate": 4.076715382889241e-06, "loss": 0.8705, "step": 127550 }, { "epoch": 0.9233642424374037, "grad_norm": 0.14663663506507874, "learning_rate": 4.0766429962286555e-06, "loss": 0.8571, "step": 127560 }, { "epoch": 0.9234366290979898, "grad_norm": 0.16342896223068237, "learning_rate": 4.076570609568069e-06, "loss": 0.8687, "step": 127570 }, { "epoch": 0.923509015758576, "grad_norm": 0.15321224927902222, "learning_rate": 4.076498222907483e-06, "loss": 0.8622, "step": 127580 }, { "epoch": 0.9235814024191622, "grad_norm": 0.1598549485206604, "learning_rate": 4.076425836246896e-06, "loss": 0.8786, "step": 127590 }, { "epoch": 0.9236537890797484, "grad_norm": 0.16147591173648834, "learning_rate": 4.076353449586311e-06, "loss": 0.872, "step": 127600 }, { "epoch": 0.9237261757403346, "grad_norm": 0.14562930166721344, "learning_rate": 4.076281062925724e-06, "loss": 0.8682, "step": 127610 }, { "epoch": 0.9237985624009207, "grad_norm": 0.1647154986858368, "learning_rate": 4.076208676265138e-06, "loss": 0.8815, "step": 127620 }, { "epoch": 0.9238709490615069, "grad_norm": 0.1684718132019043, "learning_rate": 4.076136289604552e-06, "loss": 0.8714, "step": 127630 }, { "epoch": 0.9239433357220931, "grad_norm": 0.16165629029273987, "learning_rate": 4.076063902943966e-06, "loss": 0.8705, "step": 127640 }, { "epoch": 0.9240157223826794, "grad_norm": 0.15720994770526886, "learning_rate": 4.07599151628338e-06, "loss": 0.8629, "step": 127650 }, { "epoch": 0.9240881090432655, "grad_norm": 0.16634640097618103, "learning_rate": 4.075919129622793e-06, "loss": 0.883, "step": 127660 }, { "epoch": 0.9241604957038517, "grad_norm": 0.1512623280286789, "learning_rate": 4.075846742962207e-06, "loss": 0.8628, "step": 127670 }, { "epoch": 0.9242328823644379, "grad_norm": 0.14942419528961182, "learning_rate": 4.075774356301621e-06, "loss": 0.8543, "step": 127680 }, { "epoch": 0.9243052690250241, "grad_norm": 0.15057924389839172, "learning_rate": 4.075701969641035e-06, "loss": 0.8695, "step": 127690 }, { "epoch": 0.9243776556856103, "grad_norm": 0.15089672803878784, "learning_rate": 4.075629582980449e-06, "loss": 0.8924, "step": 127700 }, { "epoch": 0.9244500423461964, "grad_norm": 0.15450294315814972, "learning_rate": 4.075557196319862e-06, "loss": 0.8642, "step": 127710 }, { "epoch": 0.9245224290067826, "grad_norm": 0.14952602982521057, "learning_rate": 4.075484809659277e-06, "loss": 0.8694, "step": 127720 }, { "epoch": 0.9245948156673688, "grad_norm": 0.16396868228912354, "learning_rate": 4.07541242299869e-06, "loss": 0.8905, "step": 127730 }, { "epoch": 0.924667202327955, "grad_norm": 0.1575574278831482, "learning_rate": 4.075340036338104e-06, "loss": 0.8772, "step": 127740 }, { "epoch": 0.9247395889885411, "grad_norm": 0.16081462800502777, "learning_rate": 4.0752676496775176e-06, "loss": 0.8768, "step": 127750 }, { "epoch": 0.9248119756491274, "grad_norm": 0.14505434036254883, "learning_rate": 4.075195263016932e-06, "loss": 0.8524, "step": 127760 }, { "epoch": 0.9248843623097136, "grad_norm": 0.15242122113704681, "learning_rate": 4.075122876356346e-06, "loss": 0.8701, "step": 127770 }, { "epoch": 0.9249567489702998, "grad_norm": 0.14712516963481903, "learning_rate": 4.075050489695759e-06, "loss": 0.8619, "step": 127780 }, { "epoch": 0.925029135630886, "grad_norm": 0.15457618236541748, "learning_rate": 4.074978103035173e-06, "loss": 0.8745, "step": 127790 }, { "epoch": 0.9251015222914721, "grad_norm": 0.15267246961593628, "learning_rate": 4.074905716374587e-06, "loss": 0.8584, "step": 127800 }, { "epoch": 0.9251739089520583, "grad_norm": 0.15300290286540985, "learning_rate": 4.074833329714001e-06, "loss": 0.8626, "step": 127810 }, { "epoch": 0.9252462956126445, "grad_norm": 0.14224673807621002, "learning_rate": 4.0747609430534146e-06, "loss": 0.8753, "step": 127820 }, { "epoch": 0.9253186822732307, "grad_norm": 0.15373513102531433, "learning_rate": 4.074688556392828e-06, "loss": 0.8811, "step": 127830 }, { "epoch": 0.9253910689338168, "grad_norm": 0.15759536623954773, "learning_rate": 4.074616169732243e-06, "loss": 0.8691, "step": 127840 }, { "epoch": 0.925463455594403, "grad_norm": 0.16300243139266968, "learning_rate": 4.074543783071656e-06, "loss": 0.8709, "step": 127850 }, { "epoch": 0.9255358422549893, "grad_norm": 0.16847741603851318, "learning_rate": 4.07447139641107e-06, "loss": 0.8689, "step": 127860 }, { "epoch": 0.9256082289155755, "grad_norm": 0.16154174506664276, "learning_rate": 4.0743990097504835e-06, "loss": 0.8533, "step": 127870 }, { "epoch": 0.9256806155761617, "grad_norm": 0.15363070368766785, "learning_rate": 4.074326623089898e-06, "loss": 0.8616, "step": 127880 }, { "epoch": 0.9257530022367478, "grad_norm": 0.14643029868602753, "learning_rate": 4.0742542364293116e-06, "loss": 0.8536, "step": 127890 }, { "epoch": 0.925825388897334, "grad_norm": 0.14773331582546234, "learning_rate": 4.074181849768725e-06, "loss": 0.8606, "step": 127900 }, { "epoch": 0.9258977755579202, "grad_norm": 0.1448742300271988, "learning_rate": 4.074109463108139e-06, "loss": 0.8676, "step": 127910 }, { "epoch": 0.9259701622185064, "grad_norm": 0.18105871975421906, "learning_rate": 4.074037076447552e-06, "loss": 0.8664, "step": 127920 }, { "epoch": 0.9260425488790925, "grad_norm": 0.1581772118806839, "learning_rate": 4.073964689786966e-06, "loss": 0.8803, "step": 127930 }, { "epoch": 0.9261149355396787, "grad_norm": 0.1465241014957428, "learning_rate": 4.07389230312638e-06, "loss": 0.8762, "step": 127940 }, { "epoch": 0.9261873222002649, "grad_norm": 0.16206571459770203, "learning_rate": 4.073819916465794e-06, "loss": 0.8781, "step": 127950 }, { "epoch": 0.9262597088608511, "grad_norm": 0.1485998034477234, "learning_rate": 4.073747529805208e-06, "loss": 0.8712, "step": 127960 }, { "epoch": 0.9263320955214374, "grad_norm": 0.15023688971996307, "learning_rate": 4.073675143144621e-06, "loss": 0.8626, "step": 127970 }, { "epoch": 0.9264044821820235, "grad_norm": 0.14640280604362488, "learning_rate": 4.073602756484035e-06, "loss": 0.8683, "step": 127980 }, { "epoch": 0.9264768688426097, "grad_norm": 0.15513427555561066, "learning_rate": 4.073530369823449e-06, "loss": 0.8722, "step": 127990 }, { "epoch": 0.9265492555031959, "grad_norm": 0.14883030951023102, "learning_rate": 4.073457983162863e-06, "loss": 0.8616, "step": 128000 }, { "epoch": 0.9266216421637821, "grad_norm": 0.17207932472229004, "learning_rate": 4.073385596502277e-06, "loss": 0.8649, "step": 128010 }, { "epoch": 0.9266940288243682, "grad_norm": 0.14062434434890747, "learning_rate": 4.07331320984169e-06, "loss": 0.8589, "step": 128020 }, { "epoch": 0.9267664154849544, "grad_norm": 0.14233818650245667, "learning_rate": 4.073240823181105e-06, "loss": 0.8628, "step": 128030 }, { "epoch": 0.9268388021455406, "grad_norm": 0.15095971524715424, "learning_rate": 4.073168436520518e-06, "loss": 0.8647, "step": 128040 }, { "epoch": 0.9269111888061268, "grad_norm": 0.164928138256073, "learning_rate": 4.073096049859932e-06, "loss": 0.8679, "step": 128050 }, { "epoch": 0.926983575466713, "grad_norm": 0.15687911212444305, "learning_rate": 4.073023663199346e-06, "loss": 0.8691, "step": 128060 }, { "epoch": 0.9270559621272991, "grad_norm": 0.16557860374450684, "learning_rate": 4.07295127653876e-06, "loss": 0.8834, "step": 128070 }, { "epoch": 0.9271283487878854, "grad_norm": 0.15757247805595398, "learning_rate": 4.072878889878174e-06, "loss": 0.8759, "step": 128080 }, { "epoch": 0.9272007354484716, "grad_norm": 0.15857826173305511, "learning_rate": 4.072806503217587e-06, "loss": 0.8754, "step": 128090 }, { "epoch": 0.9272731221090578, "grad_norm": 0.15188080072402954, "learning_rate": 4.072734116557001e-06, "loss": 0.8669, "step": 128100 }, { "epoch": 0.927345508769644, "grad_norm": 0.15522122383117676, "learning_rate": 4.072661729896415e-06, "loss": 0.8643, "step": 128110 }, { "epoch": 0.9274178954302301, "grad_norm": 0.14088667929172516, "learning_rate": 4.072589343235829e-06, "loss": 0.8784, "step": 128120 }, { "epoch": 0.9274902820908163, "grad_norm": 0.1532406508922577, "learning_rate": 4.072516956575243e-06, "loss": 0.8637, "step": 128130 }, { "epoch": 0.9275626687514025, "grad_norm": 0.14696896076202393, "learning_rate": 4.072444569914656e-06, "loss": 0.8601, "step": 128140 }, { "epoch": 0.9276350554119887, "grad_norm": 0.1489928513765335, "learning_rate": 4.07237218325407e-06, "loss": 0.8698, "step": 128150 }, { "epoch": 0.9277074420725748, "grad_norm": 0.1538485586643219, "learning_rate": 4.072299796593484e-06, "loss": 0.8817, "step": 128160 }, { "epoch": 0.927779828733161, "grad_norm": 0.14813606441020966, "learning_rate": 4.072227409932898e-06, "loss": 0.8636, "step": 128170 }, { "epoch": 0.9278522153937473, "grad_norm": 0.1586228609085083, "learning_rate": 4.0721550232723115e-06, "loss": 0.868, "step": 128180 }, { "epoch": 0.9279246020543335, "grad_norm": 0.21530722081661224, "learning_rate": 4.072082636611725e-06, "loss": 0.8735, "step": 128190 }, { "epoch": 0.9279969887149196, "grad_norm": 0.18316511809825897, "learning_rate": 4.07201024995114e-06, "loss": 0.8693, "step": 128200 }, { "epoch": 0.9280693753755058, "grad_norm": 0.15171143412590027, "learning_rate": 4.071937863290553e-06, "loss": 0.8617, "step": 128210 }, { "epoch": 0.928141762036092, "grad_norm": 0.15644243359565735, "learning_rate": 4.071865476629967e-06, "loss": 0.8728, "step": 128220 }, { "epoch": 0.9282141486966782, "grad_norm": 0.16042940318584442, "learning_rate": 4.0717930899693804e-06, "loss": 0.8778, "step": 128230 }, { "epoch": 0.9282865353572644, "grad_norm": 0.15380287170410156, "learning_rate": 4.071720703308795e-06, "loss": 0.8594, "step": 128240 }, { "epoch": 0.9283589220178505, "grad_norm": 0.15428663790225983, "learning_rate": 4.0716483166482085e-06, "loss": 0.8817, "step": 128250 }, { "epoch": 0.9284313086784367, "grad_norm": 0.14984562993049622, "learning_rate": 4.071575929987622e-06, "loss": 0.8781, "step": 128260 }, { "epoch": 0.9285036953390229, "grad_norm": 0.16001556813716888, "learning_rate": 4.071503543327036e-06, "loss": 0.8666, "step": 128270 }, { "epoch": 0.9285760819996091, "grad_norm": 0.1445271223783493, "learning_rate": 4.07143115666645e-06, "loss": 0.8772, "step": 128280 }, { "epoch": 0.9286484686601953, "grad_norm": 0.1755678355693817, "learning_rate": 4.071358770005864e-06, "loss": 0.8805, "step": 128290 }, { "epoch": 0.9287208553207815, "grad_norm": 0.16216261684894562, "learning_rate": 4.0712863833452775e-06, "loss": 0.8694, "step": 128300 }, { "epoch": 0.9287932419813677, "grad_norm": 0.15545831620693207, "learning_rate": 4.071213996684691e-06, "loss": 0.8681, "step": 128310 }, { "epoch": 0.9288656286419539, "grad_norm": 0.15312260389328003, "learning_rate": 4.0711416100241055e-06, "loss": 0.8721, "step": 128320 }, { "epoch": 0.92893801530254, "grad_norm": 0.1504194438457489, "learning_rate": 4.071069223363519e-06, "loss": 0.8624, "step": 128330 }, { "epoch": 0.9290104019631262, "grad_norm": 0.15465429425239563, "learning_rate": 4.070996836702933e-06, "loss": 0.8799, "step": 128340 }, { "epoch": 0.9290827886237124, "grad_norm": 0.14774566888809204, "learning_rate": 4.070924450042346e-06, "loss": 0.8717, "step": 128350 }, { "epoch": 0.9291551752842986, "grad_norm": 0.16979655623435974, "learning_rate": 4.070852063381761e-06, "loss": 0.8552, "step": 128360 }, { "epoch": 0.9292275619448848, "grad_norm": 0.15105919539928436, "learning_rate": 4.0707796767211745e-06, "loss": 0.8624, "step": 128370 }, { "epoch": 0.9292999486054709, "grad_norm": 0.155971959233284, "learning_rate": 4.070707290060588e-06, "loss": 0.8592, "step": 128380 }, { "epoch": 0.9293723352660572, "grad_norm": 0.162667378783226, "learning_rate": 4.070634903400002e-06, "loss": 0.8771, "step": 128390 }, { "epoch": 0.9294447219266434, "grad_norm": 0.1585136353969574, "learning_rate": 4.070562516739416e-06, "loss": 0.8714, "step": 128400 }, { "epoch": 0.9295171085872296, "grad_norm": 0.14089208841323853, "learning_rate": 4.07049013007883e-06, "loss": 0.8691, "step": 128410 }, { "epoch": 0.9295894952478158, "grad_norm": 0.14816270768642426, "learning_rate": 4.070417743418243e-06, "loss": 0.8754, "step": 128420 }, { "epoch": 0.9296618819084019, "grad_norm": 0.14089035987854004, "learning_rate": 4.070345356757657e-06, "loss": 0.869, "step": 128430 }, { "epoch": 0.9297342685689881, "grad_norm": 0.15903660655021667, "learning_rate": 4.0702729700970715e-06, "loss": 0.8669, "step": 128440 }, { "epoch": 0.9298066552295743, "grad_norm": 0.15594375133514404, "learning_rate": 4.070200583436484e-06, "loss": 0.8594, "step": 128450 }, { "epoch": 0.9298790418901605, "grad_norm": 0.15002770721912384, "learning_rate": 4.070128196775898e-06, "loss": 0.858, "step": 128460 }, { "epoch": 0.9299514285507466, "grad_norm": 0.1600421667098999, "learning_rate": 4.070055810115312e-06, "loss": 0.8613, "step": 128470 }, { "epoch": 0.9300238152113328, "grad_norm": 0.16615080833435059, "learning_rate": 4.069983423454726e-06, "loss": 0.8763, "step": 128480 }, { "epoch": 0.930096201871919, "grad_norm": 0.15148785710334778, "learning_rate": 4.0699110367941396e-06, "loss": 0.8695, "step": 128490 }, { "epoch": 0.9301685885325053, "grad_norm": 0.14840847253799438, "learning_rate": 4.069838650133553e-06, "loss": 0.8739, "step": 128500 }, { "epoch": 0.9302409751930915, "grad_norm": 0.13647682964801788, "learning_rate": 4.069766263472968e-06, "loss": 0.8672, "step": 128510 }, { "epoch": 0.9303133618536776, "grad_norm": 0.1561964750289917, "learning_rate": 4.069693876812381e-06, "loss": 0.8637, "step": 128520 }, { "epoch": 0.9303857485142638, "grad_norm": 0.22199493646621704, "learning_rate": 4.069621490151795e-06, "loss": 0.8707, "step": 128530 }, { "epoch": 0.93045813517485, "grad_norm": 0.1517588496208191, "learning_rate": 4.0695491034912085e-06, "loss": 0.8663, "step": 128540 }, { "epoch": 0.9305305218354362, "grad_norm": 0.15259967744350433, "learning_rate": 4.069476716830623e-06, "loss": 0.8684, "step": 128550 }, { "epoch": 0.9306029084960223, "grad_norm": 0.15910792350769043, "learning_rate": 4.0694043301700366e-06, "loss": 0.8676, "step": 128560 }, { "epoch": 0.9306752951566085, "grad_norm": 0.16429047286510468, "learning_rate": 4.06933194350945e-06, "loss": 0.8783, "step": 128570 }, { "epoch": 0.9307476818171947, "grad_norm": 0.1512821912765503, "learning_rate": 4.069259556848864e-06, "loss": 0.8691, "step": 128580 }, { "epoch": 0.9308200684777809, "grad_norm": 0.154913067817688, "learning_rate": 4.069187170188278e-06, "loss": 0.8761, "step": 128590 }, { "epoch": 0.930892455138367, "grad_norm": 0.14742477238178253, "learning_rate": 4.069114783527692e-06, "loss": 0.8685, "step": 128600 }, { "epoch": 0.9309648417989533, "grad_norm": 0.15343081951141357, "learning_rate": 4.0690423968671055e-06, "loss": 0.8675, "step": 128610 }, { "epoch": 0.9310372284595395, "grad_norm": 0.18212413787841797, "learning_rate": 4.068970010206519e-06, "loss": 0.8671, "step": 128620 }, { "epoch": 0.9311096151201257, "grad_norm": 0.14590677618980408, "learning_rate": 4.0688976235459336e-06, "loss": 0.8758, "step": 128630 }, { "epoch": 0.9311820017807119, "grad_norm": 0.15685223042964935, "learning_rate": 4.068825236885347e-06, "loss": 0.8634, "step": 128640 }, { "epoch": 0.931254388441298, "grad_norm": 0.1610851287841797, "learning_rate": 4.068752850224761e-06, "loss": 0.8678, "step": 128650 }, { "epoch": 0.9313267751018842, "grad_norm": 0.1528211534023285, "learning_rate": 4.068680463564174e-06, "loss": 0.8797, "step": 128660 }, { "epoch": 0.9313991617624704, "grad_norm": 0.1573326289653778, "learning_rate": 4.068608076903589e-06, "loss": 0.8785, "step": 128670 }, { "epoch": 0.9314715484230566, "grad_norm": 0.15934540331363678, "learning_rate": 4.0685356902430025e-06, "loss": 0.8626, "step": 128680 }, { "epoch": 0.9315439350836427, "grad_norm": 0.14272333681583405, "learning_rate": 4.068463303582416e-06, "loss": 0.8685, "step": 128690 }, { "epoch": 0.9316163217442289, "grad_norm": 0.1473149210214615, "learning_rate": 4.06839091692183e-06, "loss": 0.8655, "step": 128700 }, { "epoch": 0.9316887084048152, "grad_norm": 0.1499226689338684, "learning_rate": 4.068318530261244e-06, "loss": 0.8633, "step": 128710 }, { "epoch": 0.9317610950654014, "grad_norm": 0.14663182199001312, "learning_rate": 4.068246143600658e-06, "loss": 0.852, "step": 128720 }, { "epoch": 0.9318334817259876, "grad_norm": 0.15294471383094788, "learning_rate": 4.068173756940071e-06, "loss": 0.8769, "step": 128730 }, { "epoch": 0.9319058683865737, "grad_norm": 0.16538985073566437, "learning_rate": 4.068101370279485e-06, "loss": 0.8754, "step": 128740 }, { "epoch": 0.9319782550471599, "grad_norm": 0.14424091577529907, "learning_rate": 4.0680289836188995e-06, "loss": 0.8745, "step": 128750 }, { "epoch": 0.9320506417077461, "grad_norm": 0.18575291335582733, "learning_rate": 4.067956596958313e-06, "loss": 0.8542, "step": 128760 }, { "epoch": 0.9321230283683323, "grad_norm": 0.156608447432518, "learning_rate": 4.067884210297727e-06, "loss": 0.8703, "step": 128770 }, { "epoch": 0.9321954150289185, "grad_norm": 0.16681206226348877, "learning_rate": 4.06781182363714e-06, "loss": 0.8691, "step": 128780 }, { "epoch": 0.9322678016895046, "grad_norm": 0.18856379389762878, "learning_rate": 4.067739436976554e-06, "loss": 0.8727, "step": 128790 }, { "epoch": 0.9323401883500908, "grad_norm": 0.15221746265888214, "learning_rate": 4.0676670503159684e-06, "loss": 0.8771, "step": 128800 }, { "epoch": 0.932412575010677, "grad_norm": 0.15797759592533112, "learning_rate": 4.067594663655382e-06, "loss": 0.8682, "step": 128810 }, { "epoch": 0.9324849616712633, "grad_norm": 0.1687854826450348, "learning_rate": 4.067522276994796e-06, "loss": 0.8743, "step": 128820 }, { "epoch": 0.9325573483318494, "grad_norm": 0.15136481821537018, "learning_rate": 4.067449890334209e-06, "loss": 0.8712, "step": 128830 }, { "epoch": 0.9326297349924356, "grad_norm": 0.1667127013206482, "learning_rate": 4.067377503673624e-06, "loss": 0.862, "step": 128840 }, { "epoch": 0.9327021216530218, "grad_norm": 0.1545359492301941, "learning_rate": 4.067305117013037e-06, "loss": 0.8564, "step": 128850 }, { "epoch": 0.932774508313608, "grad_norm": 0.18718428909778595, "learning_rate": 4.067232730352451e-06, "loss": 0.8584, "step": 128860 }, { "epoch": 0.9328468949741942, "grad_norm": 0.14906947314739227, "learning_rate": 4.067160343691865e-06, "loss": 0.8737, "step": 128870 }, { "epoch": 0.9329192816347803, "grad_norm": 0.1455237865447998, "learning_rate": 4.067087957031279e-06, "loss": 0.872, "step": 128880 }, { "epoch": 0.9329916682953665, "grad_norm": 0.1723933070898056, "learning_rate": 4.067015570370693e-06, "loss": 0.8785, "step": 128890 }, { "epoch": 0.9330640549559527, "grad_norm": 0.1570318043231964, "learning_rate": 4.066943183710106e-06, "loss": 0.8693, "step": 128900 }, { "epoch": 0.9331364416165389, "grad_norm": 0.16334332525730133, "learning_rate": 4.06687079704952e-06, "loss": 0.8744, "step": 128910 }, { "epoch": 0.9332088282771251, "grad_norm": 0.15056326985359192, "learning_rate": 4.066798410388934e-06, "loss": 0.8666, "step": 128920 }, { "epoch": 0.9332812149377113, "grad_norm": 0.15324917435646057, "learning_rate": 4.066726023728348e-06, "loss": 0.8814, "step": 128930 }, { "epoch": 0.9333536015982975, "grad_norm": 0.1748197376728058, "learning_rate": 4.066653637067762e-06, "loss": 0.869, "step": 128940 }, { "epoch": 0.9334259882588837, "grad_norm": 0.1483113169670105, "learning_rate": 4.066581250407175e-06, "loss": 0.8726, "step": 128950 }, { "epoch": 0.9334983749194699, "grad_norm": 0.14639945328235626, "learning_rate": 4.06650886374659e-06, "loss": 0.8641, "step": 128960 }, { "epoch": 0.933570761580056, "grad_norm": 0.1470886915922165, "learning_rate": 4.066436477086003e-06, "loss": 0.8671, "step": 128970 }, { "epoch": 0.9336431482406422, "grad_norm": 0.15743905305862427, "learning_rate": 4.066364090425416e-06, "loss": 0.8751, "step": 128980 }, { "epoch": 0.9337155349012284, "grad_norm": 0.14820967614650726, "learning_rate": 4.0662917037648305e-06, "loss": 0.8643, "step": 128990 }, { "epoch": 0.9337879215618146, "grad_norm": 0.15819282829761505, "learning_rate": 4.066219317104244e-06, "loss": 0.8715, "step": 129000 }, { "epoch": 0.9338603082224007, "grad_norm": 0.16119031608104706, "learning_rate": 4.066146930443658e-06, "loss": 0.885, "step": 129010 }, { "epoch": 0.9339326948829869, "grad_norm": 0.1570100635290146, "learning_rate": 4.066074543783071e-06, "loss": 0.863, "step": 129020 }, { "epoch": 0.9340050815435732, "grad_norm": 0.15377597510814667, "learning_rate": 4.066002157122486e-06, "loss": 0.8705, "step": 129030 }, { "epoch": 0.9340774682041594, "grad_norm": 0.1488153040409088, "learning_rate": 4.0659297704618995e-06, "loss": 0.8709, "step": 129040 }, { "epoch": 0.9341498548647456, "grad_norm": 0.1504792720079422, "learning_rate": 4.065857383801313e-06, "loss": 0.8645, "step": 129050 }, { "epoch": 0.9342222415253317, "grad_norm": 0.15076880156993866, "learning_rate": 4.065784997140727e-06, "loss": 0.8786, "step": 129060 }, { "epoch": 0.9342946281859179, "grad_norm": 0.15644735097885132, "learning_rate": 4.065712610480141e-06, "loss": 0.8497, "step": 129070 }, { "epoch": 0.9343670148465041, "grad_norm": 0.15488304197788239, "learning_rate": 4.065640223819555e-06, "loss": 0.8716, "step": 129080 }, { "epoch": 0.9344394015070903, "grad_norm": 0.15287454426288605, "learning_rate": 4.065567837158968e-06, "loss": 0.8699, "step": 129090 }, { "epoch": 0.9345117881676764, "grad_norm": 0.15674547851085663, "learning_rate": 4.065495450498382e-06, "loss": 0.8544, "step": 129100 }, { "epoch": 0.9345841748282626, "grad_norm": 0.14883847534656525, "learning_rate": 4.0654230638377965e-06, "loss": 0.8748, "step": 129110 }, { "epoch": 0.9346565614888488, "grad_norm": 0.1638948619365692, "learning_rate": 4.06535067717721e-06, "loss": 0.8734, "step": 129120 }, { "epoch": 0.934728948149435, "grad_norm": 0.1496628224849701, "learning_rate": 4.065278290516624e-06, "loss": 0.8622, "step": 129130 }, { "epoch": 0.9348013348100213, "grad_norm": 0.15044906735420227, "learning_rate": 4.065205903856037e-06, "loss": 0.8845, "step": 129140 }, { "epoch": 0.9348737214706074, "grad_norm": 0.1459684520959854, "learning_rate": 4.065133517195452e-06, "loss": 0.8753, "step": 129150 }, { "epoch": 0.9349461081311936, "grad_norm": 0.14683480560779572, "learning_rate": 4.065061130534865e-06, "loss": 0.8597, "step": 129160 }, { "epoch": 0.9350184947917798, "grad_norm": 0.17215889692306519, "learning_rate": 4.064988743874279e-06, "loss": 0.8613, "step": 129170 }, { "epoch": 0.935090881452366, "grad_norm": 0.15645846724510193, "learning_rate": 4.064916357213693e-06, "loss": 0.8649, "step": 129180 }, { "epoch": 0.9351632681129521, "grad_norm": 0.15681461989879608, "learning_rate": 4.064843970553107e-06, "loss": 0.8675, "step": 129190 }, { "epoch": 0.9352356547735383, "grad_norm": 0.15553514659404755, "learning_rate": 4.064771583892521e-06, "loss": 0.865, "step": 129200 }, { "epoch": 0.9353080414341245, "grad_norm": 0.1579878181219101, "learning_rate": 4.064699197231934e-06, "loss": 0.8564, "step": 129210 }, { "epoch": 0.9353804280947107, "grad_norm": 0.1529635339975357, "learning_rate": 4.064626810571348e-06, "loss": 0.8701, "step": 129220 }, { "epoch": 0.9354528147552968, "grad_norm": 0.17497135698795319, "learning_rate": 4.064554423910762e-06, "loss": 0.858, "step": 129230 }, { "epoch": 0.9355252014158831, "grad_norm": 0.1518746018409729, "learning_rate": 4.064482037250176e-06, "loss": 0.8595, "step": 129240 }, { "epoch": 0.9355975880764693, "grad_norm": 0.15486803650856018, "learning_rate": 4.06440965058959e-06, "loss": 0.8762, "step": 129250 }, { "epoch": 0.9356699747370555, "grad_norm": 0.16987523436546326, "learning_rate": 4.064337263929003e-06, "loss": 0.8698, "step": 129260 }, { "epoch": 0.9357423613976417, "grad_norm": 0.15838555991649628, "learning_rate": 4.064264877268418e-06, "loss": 0.8614, "step": 129270 }, { "epoch": 0.9358147480582278, "grad_norm": 0.15161006152629852, "learning_rate": 4.064192490607831e-06, "loss": 0.8582, "step": 129280 }, { "epoch": 0.935887134718814, "grad_norm": 0.16381420195102692, "learning_rate": 4.064120103947245e-06, "loss": 0.8612, "step": 129290 }, { "epoch": 0.9359595213794002, "grad_norm": 0.15253403782844543, "learning_rate": 4.0640477172866586e-06, "loss": 0.8672, "step": 129300 }, { "epoch": 0.9360319080399864, "grad_norm": 0.14792008697986603, "learning_rate": 4.063975330626073e-06, "loss": 0.8663, "step": 129310 }, { "epoch": 0.9361042947005725, "grad_norm": 0.15635430812835693, "learning_rate": 4.063902943965487e-06, "loss": 0.8706, "step": 129320 }, { "epoch": 0.9361766813611587, "grad_norm": 0.16064366698265076, "learning_rate": 4.0638305573049e-06, "loss": 0.8765, "step": 129330 }, { "epoch": 0.9362490680217449, "grad_norm": 0.156932070851326, "learning_rate": 4.063758170644314e-06, "loss": 0.881, "step": 129340 }, { "epoch": 0.9363214546823312, "grad_norm": 0.1553388088941574, "learning_rate": 4.063685783983728e-06, "loss": 0.8881, "step": 129350 }, { "epoch": 0.9363938413429174, "grad_norm": 0.13980628550052643, "learning_rate": 4.063613397323142e-06, "loss": 0.8828, "step": 129360 }, { "epoch": 0.9364662280035035, "grad_norm": 0.14694495499134064, "learning_rate": 4.0635410106625556e-06, "loss": 0.8716, "step": 129370 }, { "epoch": 0.9365386146640897, "grad_norm": 0.15779894590377808, "learning_rate": 4.063468624001969e-06, "loss": 0.8684, "step": 129380 }, { "epoch": 0.9366110013246759, "grad_norm": 0.15921930968761444, "learning_rate": 4.063396237341384e-06, "loss": 0.868, "step": 129390 }, { "epoch": 0.9366833879852621, "grad_norm": 0.15195946395397186, "learning_rate": 4.063323850680797e-06, "loss": 0.8714, "step": 129400 }, { "epoch": 0.9367557746458482, "grad_norm": 0.1530548632144928, "learning_rate": 4.063251464020211e-06, "loss": 0.8691, "step": 129410 }, { "epoch": 0.9368281613064344, "grad_norm": 0.15170645713806152, "learning_rate": 4.0631790773596245e-06, "loss": 0.8762, "step": 129420 }, { "epoch": 0.9369005479670206, "grad_norm": 0.15962299704551697, "learning_rate": 4.063106690699038e-06, "loss": 0.871, "step": 129430 }, { "epoch": 0.9369729346276068, "grad_norm": 0.16227169334888458, "learning_rate": 4.0630343040384526e-06, "loss": 0.8512, "step": 129440 }, { "epoch": 0.9370453212881931, "grad_norm": 0.1503174751996994, "learning_rate": 4.062961917377866e-06, "loss": 0.8683, "step": 129450 }, { "epoch": 0.9371177079487792, "grad_norm": 0.1538417786359787, "learning_rate": 4.06288953071728e-06, "loss": 0.8637, "step": 129460 }, { "epoch": 0.9371900946093654, "grad_norm": 0.14984267950057983, "learning_rate": 4.062817144056693e-06, "loss": 0.8632, "step": 129470 }, { "epoch": 0.9372624812699516, "grad_norm": 0.1558646708726883, "learning_rate": 4.062744757396108e-06, "loss": 0.8677, "step": 129480 }, { "epoch": 0.9373348679305378, "grad_norm": 0.16419059038162231, "learning_rate": 4.0626723707355215e-06, "loss": 0.8634, "step": 129490 }, { "epoch": 0.937407254591124, "grad_norm": 0.15692633390426636, "learning_rate": 4.062599984074935e-06, "loss": 0.8717, "step": 129500 }, { "epoch": 0.9374796412517101, "grad_norm": 0.15984247624874115, "learning_rate": 4.062527597414349e-06, "loss": 0.8783, "step": 129510 }, { "epoch": 0.9375520279122963, "grad_norm": 0.15747103095054626, "learning_rate": 4.062455210753762e-06, "loss": 0.8726, "step": 129520 }, { "epoch": 0.9376244145728825, "grad_norm": 0.15458211302757263, "learning_rate": 4.062382824093176e-06, "loss": 0.8605, "step": 129530 }, { "epoch": 0.9376968012334687, "grad_norm": 0.14359694719314575, "learning_rate": 4.0623104374325904e-06, "loss": 0.8852, "step": 129540 }, { "epoch": 0.9377691878940548, "grad_norm": 0.16120558977127075, "learning_rate": 4.062238050772004e-06, "loss": 0.8745, "step": 129550 }, { "epoch": 0.9378415745546411, "grad_norm": 0.15251369774341583, "learning_rate": 4.062165664111418e-06, "loss": 0.8723, "step": 129560 }, { "epoch": 0.9379139612152273, "grad_norm": 0.15283681452274323, "learning_rate": 4.062093277450831e-06, "loss": 0.8648, "step": 129570 }, { "epoch": 0.9379863478758135, "grad_norm": 0.15063002705574036, "learning_rate": 4.062020890790245e-06, "loss": 0.8652, "step": 129580 }, { "epoch": 0.9380587345363997, "grad_norm": 0.1525861620903015, "learning_rate": 4.061948504129659e-06, "loss": 0.8836, "step": 129590 }, { "epoch": 0.9381311211969858, "grad_norm": 0.1666775345802307, "learning_rate": 4.061876117469073e-06, "loss": 0.8762, "step": 129600 }, { "epoch": 0.938203507857572, "grad_norm": 0.15316708385944366, "learning_rate": 4.061803730808487e-06, "loss": 0.8482, "step": 129610 }, { "epoch": 0.9382758945181582, "grad_norm": 0.15615612268447876, "learning_rate": 4.0617313441479e-06, "loss": 0.8639, "step": 129620 }, { "epoch": 0.9383482811787444, "grad_norm": 0.13969020545482635, "learning_rate": 4.061658957487315e-06, "loss": 0.8829, "step": 129630 }, { "epoch": 0.9384206678393305, "grad_norm": 0.1508314609527588, "learning_rate": 4.061586570826728e-06, "loss": 0.8611, "step": 129640 }, { "epoch": 0.9384930544999167, "grad_norm": 0.16280554234981537, "learning_rate": 4.061514184166142e-06, "loss": 0.883, "step": 129650 }, { "epoch": 0.9385654411605029, "grad_norm": 0.15639394521713257, "learning_rate": 4.0614417975055555e-06, "loss": 0.8711, "step": 129660 }, { "epoch": 0.9386378278210892, "grad_norm": 0.1565892994403839, "learning_rate": 4.06136941084497e-06, "loss": 0.8667, "step": 129670 }, { "epoch": 0.9387102144816754, "grad_norm": 0.16035987436771393, "learning_rate": 4.061297024184384e-06, "loss": 0.8624, "step": 129680 }, { "epoch": 0.9387826011422615, "grad_norm": 0.15157711505889893, "learning_rate": 4.061224637523797e-06, "loss": 0.8751, "step": 129690 }, { "epoch": 0.9388549878028477, "grad_norm": 0.15233691036701202, "learning_rate": 4.061152250863211e-06, "loss": 0.8743, "step": 129700 }, { "epoch": 0.9389273744634339, "grad_norm": 0.15194357931613922, "learning_rate": 4.061079864202625e-06, "loss": 0.875, "step": 129710 }, { "epoch": 0.9389997611240201, "grad_norm": 0.1659833937883377, "learning_rate": 4.061007477542039e-06, "loss": 0.8714, "step": 129720 }, { "epoch": 0.9390721477846062, "grad_norm": 0.15151038765907288, "learning_rate": 4.0609350908814525e-06, "loss": 0.863, "step": 129730 }, { "epoch": 0.9391445344451924, "grad_norm": 0.16103753447532654, "learning_rate": 4.060862704220866e-06, "loss": 0.8715, "step": 129740 }, { "epoch": 0.9392169211057786, "grad_norm": 0.1853325515985489, "learning_rate": 4.060790317560281e-06, "loss": 0.8664, "step": 129750 }, { "epoch": 0.9392893077663648, "grad_norm": 0.15947386622428894, "learning_rate": 4.060717930899694e-06, "loss": 0.8698, "step": 129760 }, { "epoch": 0.939361694426951, "grad_norm": 0.14465877413749695, "learning_rate": 4.060645544239108e-06, "loss": 0.8654, "step": 129770 }, { "epoch": 0.9394340810875372, "grad_norm": 0.15076486766338348, "learning_rate": 4.0605731575785215e-06, "loss": 0.8733, "step": 129780 }, { "epoch": 0.9395064677481234, "grad_norm": 0.14492474496364594, "learning_rate": 4.060500770917936e-06, "loss": 0.8629, "step": 129790 }, { "epoch": 0.9395788544087096, "grad_norm": 0.15159480273723602, "learning_rate": 4.0604283842573495e-06, "loss": 0.875, "step": 129800 }, { "epoch": 0.9396512410692958, "grad_norm": 0.14613956212997437, "learning_rate": 4.060355997596763e-06, "loss": 0.8781, "step": 129810 }, { "epoch": 0.9397236277298819, "grad_norm": 0.16877029836177826, "learning_rate": 4.060283610936177e-06, "loss": 0.8768, "step": 129820 }, { "epoch": 0.9397960143904681, "grad_norm": 0.15022648870944977, "learning_rate": 4.060211224275591e-06, "loss": 0.8699, "step": 129830 }, { "epoch": 0.9398684010510543, "grad_norm": 0.1675875037908554, "learning_rate": 4.060138837615005e-06, "loss": 0.8626, "step": 129840 }, { "epoch": 0.9399407877116405, "grad_norm": 0.15652434527873993, "learning_rate": 4.0600664509544185e-06, "loss": 0.8588, "step": 129850 }, { "epoch": 0.9400131743722266, "grad_norm": 0.16846764087677002, "learning_rate": 4.059994064293832e-06, "loss": 0.859, "step": 129860 }, { "epoch": 0.9400855610328128, "grad_norm": 0.1613306701183319, "learning_rate": 4.0599216776332465e-06, "loss": 0.8593, "step": 129870 }, { "epoch": 0.9401579476933991, "grad_norm": 0.14691142737865448, "learning_rate": 4.05984929097266e-06, "loss": 0.8657, "step": 129880 }, { "epoch": 0.9402303343539853, "grad_norm": 0.20344461500644684, "learning_rate": 4.059776904312074e-06, "loss": 0.8688, "step": 129890 }, { "epoch": 0.9403027210145715, "grad_norm": 0.1496155560016632, "learning_rate": 4.059704517651487e-06, "loss": 0.862, "step": 129900 }, { "epoch": 0.9403751076751576, "grad_norm": 0.14814414083957672, "learning_rate": 4.059632130990902e-06, "loss": 0.8479, "step": 129910 }, { "epoch": 0.9404474943357438, "grad_norm": 0.1548141986131668, "learning_rate": 4.0595597443303155e-06, "loss": 0.8626, "step": 129920 }, { "epoch": 0.94051988099633, "grad_norm": 0.14892597496509552, "learning_rate": 4.059487357669729e-06, "loss": 0.8761, "step": 129930 }, { "epoch": 0.9405922676569162, "grad_norm": 0.14724701642990112, "learning_rate": 4.059414971009143e-06, "loss": 0.8737, "step": 129940 }, { "epoch": 0.9406646543175023, "grad_norm": 0.1490176022052765, "learning_rate": 4.059342584348557e-06, "loss": 0.8762, "step": 129950 }, { "epoch": 0.9407370409780885, "grad_norm": 0.1549617052078247, "learning_rate": 4.059270197687971e-06, "loss": 0.876, "step": 129960 }, { "epoch": 0.9408094276386747, "grad_norm": 0.1603880673646927, "learning_rate": 4.059197811027384e-06, "loss": 0.8737, "step": 129970 }, { "epoch": 0.9408818142992609, "grad_norm": 0.14581482112407684, "learning_rate": 4.059125424366798e-06, "loss": 0.8723, "step": 129980 }, { "epoch": 0.9409542009598472, "grad_norm": 0.14896036684513092, "learning_rate": 4.0590530377062125e-06, "loss": 0.8674, "step": 129990 }, { "epoch": 0.9410265876204333, "grad_norm": 0.1456650048494339, "learning_rate": 4.058980651045626e-06, "loss": 0.8697, "step": 130000 }, { "epoch": 0.9410989742810195, "grad_norm": 0.16549433767795563, "learning_rate": 4.05890826438504e-06, "loss": 0.8668, "step": 130010 }, { "epoch": 0.9411713609416057, "grad_norm": 0.15788212418556213, "learning_rate": 4.058835877724453e-06, "loss": 0.8884, "step": 130020 }, { "epoch": 0.9412437476021919, "grad_norm": 0.20761068165302277, "learning_rate": 4.058763491063867e-06, "loss": 0.8641, "step": 130030 }, { "epoch": 0.941316134262778, "grad_norm": 0.1683483123779297, "learning_rate": 4.0586911044032806e-06, "loss": 0.852, "step": 130040 }, { "epoch": 0.9413885209233642, "grad_norm": 0.16688086092472076, "learning_rate": 4.058618717742694e-06, "loss": 0.8703, "step": 130050 }, { "epoch": 0.9414609075839504, "grad_norm": 0.15116585791110992, "learning_rate": 4.058546331082109e-06, "loss": 0.8814, "step": 130060 }, { "epoch": 0.9415332942445366, "grad_norm": 0.1498250663280487, "learning_rate": 4.058473944421522e-06, "loss": 0.8785, "step": 130070 }, { "epoch": 0.9416056809051228, "grad_norm": 0.14295758306980133, "learning_rate": 4.058401557760936e-06, "loss": 0.8625, "step": 130080 }, { "epoch": 0.941678067565709, "grad_norm": 0.16779643297195435, "learning_rate": 4.0583291711003495e-06, "loss": 0.8589, "step": 130090 }, { "epoch": 0.9417504542262952, "grad_norm": 0.14837302267551422, "learning_rate": 4.058256784439764e-06, "loss": 0.8672, "step": 130100 }, { "epoch": 0.9418228408868814, "grad_norm": 0.15054334700107574, "learning_rate": 4.0581843977791776e-06, "loss": 0.856, "step": 130110 }, { "epoch": 0.9418952275474676, "grad_norm": 0.1450866162776947, "learning_rate": 4.058112011118591e-06, "loss": 0.8576, "step": 130120 }, { "epoch": 0.9419676142080537, "grad_norm": 0.1485680341720581, "learning_rate": 4.058039624458005e-06, "loss": 0.8712, "step": 130130 }, { "epoch": 0.9420400008686399, "grad_norm": 0.14476777613162994, "learning_rate": 4.057967237797419e-06, "loss": 0.8852, "step": 130140 }, { "epoch": 0.9421123875292261, "grad_norm": 0.14453761279582977, "learning_rate": 4.057894851136833e-06, "loss": 0.8608, "step": 130150 }, { "epoch": 0.9421847741898123, "grad_norm": 0.1491234302520752, "learning_rate": 4.0578224644762465e-06, "loss": 0.8662, "step": 130160 }, { "epoch": 0.9422571608503985, "grad_norm": 0.1594741940498352, "learning_rate": 4.05775007781566e-06, "loss": 0.8748, "step": 130170 }, { "epoch": 0.9423295475109846, "grad_norm": 0.15636101365089417, "learning_rate": 4.0576776911550746e-06, "loss": 0.8674, "step": 130180 }, { "epoch": 0.9424019341715708, "grad_norm": 0.1590276062488556, "learning_rate": 4.057605304494488e-06, "loss": 0.8547, "step": 130190 }, { "epoch": 0.9424743208321571, "grad_norm": 0.15569545328617096, "learning_rate": 4.057532917833902e-06, "loss": 0.8607, "step": 130200 }, { "epoch": 0.9425467074927433, "grad_norm": 0.15706123411655426, "learning_rate": 4.057460531173315e-06, "loss": 0.8647, "step": 130210 }, { "epoch": 0.9426190941533295, "grad_norm": 0.17118285596370697, "learning_rate": 4.057388144512729e-06, "loss": 0.8688, "step": 130220 }, { "epoch": 0.9426914808139156, "grad_norm": 0.1491112858057022, "learning_rate": 4.0573157578521435e-06, "loss": 0.8635, "step": 130230 }, { "epoch": 0.9427638674745018, "grad_norm": 0.15699629485607147, "learning_rate": 4.057243371191557e-06, "loss": 0.8717, "step": 130240 }, { "epoch": 0.942836254135088, "grad_norm": 0.1544734388589859, "learning_rate": 4.057170984530971e-06, "loss": 0.8608, "step": 130250 }, { "epoch": 0.9429086407956742, "grad_norm": 0.15462660789489746, "learning_rate": 4.057098597870384e-06, "loss": 0.8624, "step": 130260 }, { "epoch": 0.9429810274562603, "grad_norm": 0.1702507883310318, "learning_rate": 4.057026211209799e-06, "loss": 0.8809, "step": 130270 }, { "epoch": 0.9430534141168465, "grad_norm": 0.1495058238506317, "learning_rate": 4.0569538245492124e-06, "loss": 0.8564, "step": 130280 }, { "epoch": 0.9431258007774327, "grad_norm": 0.1570599526166916, "learning_rate": 4.056881437888626e-06, "loss": 0.8738, "step": 130290 }, { "epoch": 0.943198187438019, "grad_norm": 0.14418993890285492, "learning_rate": 4.05680905122804e-06, "loss": 0.8863, "step": 130300 }, { "epoch": 0.9432705740986052, "grad_norm": 0.1544889211654663, "learning_rate": 4.056736664567454e-06, "loss": 0.878, "step": 130310 }, { "epoch": 0.9433429607591913, "grad_norm": 0.151493638753891, "learning_rate": 4.056664277906868e-06, "loss": 0.8675, "step": 130320 }, { "epoch": 0.9434153474197775, "grad_norm": 0.15243935585021973, "learning_rate": 4.056591891246281e-06, "loss": 0.8659, "step": 130330 }, { "epoch": 0.9434877340803637, "grad_norm": 0.15640780329704285, "learning_rate": 4.056519504585695e-06, "loss": 0.8789, "step": 130340 }, { "epoch": 0.9435601207409499, "grad_norm": 0.16906537115573883, "learning_rate": 4.0564471179251094e-06, "loss": 0.8718, "step": 130350 }, { "epoch": 0.943632507401536, "grad_norm": 0.13774484395980835, "learning_rate": 4.056374731264523e-06, "loss": 0.8684, "step": 130360 }, { "epoch": 0.9437048940621222, "grad_norm": 0.18474681675434113, "learning_rate": 4.056302344603937e-06, "loss": 0.8737, "step": 130370 }, { "epoch": 0.9437772807227084, "grad_norm": 0.15171298384666443, "learning_rate": 4.05622995794335e-06, "loss": 0.8742, "step": 130380 }, { "epoch": 0.9438496673832946, "grad_norm": 0.1634998321533203, "learning_rate": 4.056157571282765e-06, "loss": 0.8671, "step": 130390 }, { "epoch": 0.9439220540438807, "grad_norm": 0.15286368131637573, "learning_rate": 4.056085184622178e-06, "loss": 0.874, "step": 130400 }, { "epoch": 0.943994440704467, "grad_norm": 0.17988328635692596, "learning_rate": 4.056012797961592e-06, "loss": 0.8764, "step": 130410 }, { "epoch": 0.9440668273650532, "grad_norm": 0.1676689237356186, "learning_rate": 4.055940411301006e-06, "loss": 0.8714, "step": 130420 }, { "epoch": 0.9441392140256394, "grad_norm": 0.15611009299755096, "learning_rate": 4.05586802464042e-06, "loss": 0.8603, "step": 130430 }, { "epoch": 0.9442116006862256, "grad_norm": 0.1466500610113144, "learning_rate": 4.055795637979834e-06, "loss": 0.8706, "step": 130440 }, { "epoch": 0.9442839873468117, "grad_norm": 0.1896083503961563, "learning_rate": 4.055723251319247e-06, "loss": 0.8681, "step": 130450 }, { "epoch": 0.9443563740073979, "grad_norm": 0.15304064750671387, "learning_rate": 4.055650864658661e-06, "loss": 0.8754, "step": 130460 }, { "epoch": 0.9444287606679841, "grad_norm": 0.15458323061466217, "learning_rate": 4.055578477998075e-06, "loss": 0.8714, "step": 130470 }, { "epoch": 0.9445011473285703, "grad_norm": 0.15180428326129913, "learning_rate": 4.055506091337489e-06, "loss": 0.87, "step": 130480 }, { "epoch": 0.9445735339891564, "grad_norm": 0.1695541888475418, "learning_rate": 4.055433704676903e-06, "loss": 0.862, "step": 130490 }, { "epoch": 0.9446459206497426, "grad_norm": 0.1498434692621231, "learning_rate": 4.055361318016316e-06, "loss": 0.8605, "step": 130500 }, { "epoch": 0.9447183073103288, "grad_norm": 0.1435045748949051, "learning_rate": 4.055288931355731e-06, "loss": 0.8729, "step": 130510 }, { "epoch": 0.9447906939709151, "grad_norm": 0.14459870755672455, "learning_rate": 4.055216544695144e-06, "loss": 0.8589, "step": 130520 }, { "epoch": 0.9448630806315013, "grad_norm": 0.1518053263425827, "learning_rate": 4.055144158034558e-06, "loss": 0.8549, "step": 130530 }, { "epoch": 0.9449354672920874, "grad_norm": 0.1755157858133316, "learning_rate": 4.0550717713739715e-06, "loss": 0.8627, "step": 130540 }, { "epoch": 0.9450078539526736, "grad_norm": 0.16715474426746368, "learning_rate": 4.054999384713386e-06, "loss": 0.8732, "step": 130550 }, { "epoch": 0.9450802406132598, "grad_norm": 0.16011476516723633, "learning_rate": 4.0549269980528e-06, "loss": 0.8697, "step": 130560 }, { "epoch": 0.945152627273846, "grad_norm": 0.1612127423286438, "learning_rate": 4.054854611392212e-06, "loss": 0.8733, "step": 130570 }, { "epoch": 0.9452250139344321, "grad_norm": 0.15211746096611023, "learning_rate": 4.054782224731627e-06, "loss": 0.8724, "step": 130580 }, { "epoch": 0.9452974005950183, "grad_norm": 0.1653570979833603, "learning_rate": 4.0547098380710405e-06, "loss": 0.8681, "step": 130590 }, { "epoch": 0.9453697872556045, "grad_norm": 0.14813409745693207, "learning_rate": 4.054637451410454e-06, "loss": 0.8693, "step": 130600 }, { "epoch": 0.9454421739161907, "grad_norm": 0.15209084749221802, "learning_rate": 4.054565064749868e-06, "loss": 0.8623, "step": 130610 }, { "epoch": 0.945514560576777, "grad_norm": 0.15478059649467468, "learning_rate": 4.054492678089282e-06, "loss": 0.874, "step": 130620 }, { "epoch": 0.9455869472373631, "grad_norm": 0.3934093713760376, "learning_rate": 4.054420291428696e-06, "loss": 0.8708, "step": 130630 }, { "epoch": 0.9456593338979493, "grad_norm": 0.1564350724220276, "learning_rate": 4.054347904768109e-06, "loss": 0.8693, "step": 130640 }, { "epoch": 0.9457317205585355, "grad_norm": 0.14715883135795593, "learning_rate": 4.054275518107523e-06, "loss": 0.8701, "step": 130650 }, { "epoch": 0.9458041072191217, "grad_norm": 0.1694241613149643, "learning_rate": 4.0542031314469375e-06, "loss": 0.8761, "step": 130660 }, { "epoch": 0.9458764938797078, "grad_norm": 0.16506516933441162, "learning_rate": 4.054130744786351e-06, "loss": 0.8759, "step": 130670 }, { "epoch": 0.945948880540294, "grad_norm": 0.14801914989948273, "learning_rate": 4.054058358125765e-06, "loss": 0.8667, "step": 130680 }, { "epoch": 0.9460212672008802, "grad_norm": 0.15450334548950195, "learning_rate": 4.053985971465178e-06, "loss": 0.8587, "step": 130690 }, { "epoch": 0.9460936538614664, "grad_norm": 0.1489882618188858, "learning_rate": 4.053913584804593e-06, "loss": 0.8715, "step": 130700 }, { "epoch": 0.9461660405220526, "grad_norm": 0.1521863490343094, "learning_rate": 4.053841198144006e-06, "loss": 0.8708, "step": 130710 }, { "epoch": 0.9462384271826387, "grad_norm": 0.15034830570220947, "learning_rate": 4.05376881148342e-06, "loss": 0.8874, "step": 130720 }, { "epoch": 0.946310813843225, "grad_norm": 0.1745532602071762, "learning_rate": 4.053696424822834e-06, "loss": 0.8755, "step": 130730 }, { "epoch": 0.9463832005038112, "grad_norm": 0.15382978320121765, "learning_rate": 4.053624038162248e-06, "loss": 0.8506, "step": 130740 }, { "epoch": 0.9464555871643974, "grad_norm": 0.16145150363445282, "learning_rate": 4.053551651501662e-06, "loss": 0.8639, "step": 130750 }, { "epoch": 0.9465279738249835, "grad_norm": 0.14968262612819672, "learning_rate": 4.053479264841075e-06, "loss": 0.8719, "step": 130760 }, { "epoch": 0.9466003604855697, "grad_norm": 0.17151197791099548, "learning_rate": 4.053406878180489e-06, "loss": 0.8707, "step": 130770 }, { "epoch": 0.9466727471461559, "grad_norm": 0.149154931306839, "learning_rate": 4.053334491519903e-06, "loss": 0.8683, "step": 130780 }, { "epoch": 0.9467451338067421, "grad_norm": 0.19101570546627045, "learning_rate": 4.053262104859317e-06, "loss": 0.8669, "step": 130790 }, { "epoch": 0.9468175204673283, "grad_norm": 0.14117294549942017, "learning_rate": 4.053189718198731e-06, "loss": 0.8595, "step": 130800 }, { "epoch": 0.9468899071279144, "grad_norm": 0.14957231283187866, "learning_rate": 4.053117331538144e-06, "loss": 0.863, "step": 130810 }, { "epoch": 0.9469622937885006, "grad_norm": 0.1667657345533371, "learning_rate": 4.053044944877558e-06, "loss": 0.8785, "step": 130820 }, { "epoch": 0.9470346804490869, "grad_norm": 0.1488480567932129, "learning_rate": 4.052972558216972e-06, "loss": 0.8482, "step": 130830 }, { "epoch": 0.9471070671096731, "grad_norm": 0.16403774917125702, "learning_rate": 4.052900171556386e-06, "loss": 0.8699, "step": 130840 }, { "epoch": 0.9471794537702592, "grad_norm": 0.14479616284370422, "learning_rate": 4.0528277848957996e-06, "loss": 0.8675, "step": 130850 }, { "epoch": 0.9472518404308454, "grad_norm": 0.157504141330719, "learning_rate": 4.052755398235213e-06, "loss": 0.862, "step": 130860 }, { "epoch": 0.9473242270914316, "grad_norm": 0.15569128096103668, "learning_rate": 4.052683011574628e-06, "loss": 0.8629, "step": 130870 }, { "epoch": 0.9473966137520178, "grad_norm": 0.15487146377563477, "learning_rate": 4.052610624914041e-06, "loss": 0.87, "step": 130880 }, { "epoch": 0.947469000412604, "grad_norm": 0.152805894613266, "learning_rate": 4.052538238253455e-06, "loss": 0.8711, "step": 130890 }, { "epoch": 0.9475413870731901, "grad_norm": 0.15563584864139557, "learning_rate": 4.0524658515928685e-06, "loss": 0.8485, "step": 130900 }, { "epoch": 0.9476137737337763, "grad_norm": 0.1475389450788498, "learning_rate": 4.052393464932283e-06, "loss": 0.8782, "step": 130910 }, { "epoch": 0.9476861603943625, "grad_norm": 0.160760760307312, "learning_rate": 4.0523210782716966e-06, "loss": 0.8786, "step": 130920 }, { "epoch": 0.9477585470549487, "grad_norm": 0.1502762883901596, "learning_rate": 4.05224869161111e-06, "loss": 0.8633, "step": 130930 }, { "epoch": 0.947830933715535, "grad_norm": 0.14664162695407867, "learning_rate": 4.052176304950524e-06, "loss": 0.878, "step": 130940 }, { "epoch": 0.9479033203761211, "grad_norm": 0.15366783738136292, "learning_rate": 4.052103918289938e-06, "loss": 0.8672, "step": 130950 }, { "epoch": 0.9479757070367073, "grad_norm": 0.1406678408384323, "learning_rate": 4.052031531629352e-06, "loss": 0.8716, "step": 130960 }, { "epoch": 0.9480480936972935, "grad_norm": 0.14894571900367737, "learning_rate": 4.0519591449687655e-06, "loss": 0.8601, "step": 130970 }, { "epoch": 0.9481204803578797, "grad_norm": 0.14522773027420044, "learning_rate": 4.051886758308179e-06, "loss": 0.8797, "step": 130980 }, { "epoch": 0.9481928670184658, "grad_norm": 0.1636369526386261, "learning_rate": 4.051814371647594e-06, "loss": 0.87, "step": 130990 }, { "epoch": 0.948265253679052, "grad_norm": 0.19674119353294373, "learning_rate": 4.051741984987007e-06, "loss": 0.8732, "step": 131000 }, { "epoch": 0.9483376403396382, "grad_norm": 0.16425974667072296, "learning_rate": 4.051669598326421e-06, "loss": 0.8701, "step": 131010 }, { "epoch": 0.9484100270002244, "grad_norm": 0.15251362323760986, "learning_rate": 4.0515972116658344e-06, "loss": 0.8686, "step": 131020 }, { "epoch": 0.9484824136608105, "grad_norm": 0.15562012791633606, "learning_rate": 4.051524825005249e-06, "loss": 0.8516, "step": 131030 }, { "epoch": 0.9485548003213967, "grad_norm": 0.14685966074466705, "learning_rate": 4.0514524383446625e-06, "loss": 0.8677, "step": 131040 }, { "epoch": 0.948627186981983, "grad_norm": 0.15165336430072784, "learning_rate": 4.051380051684076e-06, "loss": 0.8594, "step": 131050 }, { "epoch": 0.9486995736425692, "grad_norm": 0.13826829195022583, "learning_rate": 4.05130766502349e-06, "loss": 0.8599, "step": 131060 }, { "epoch": 0.9487719603031554, "grad_norm": 0.19004106521606445, "learning_rate": 4.051235278362904e-06, "loss": 0.8743, "step": 131070 }, { "epoch": 0.9488443469637415, "grad_norm": 0.1457131803035736, "learning_rate": 4.051162891702318e-06, "loss": 0.8736, "step": 131080 }, { "epoch": 0.9489167336243277, "grad_norm": 0.16379250586032867, "learning_rate": 4.0510905050417314e-06, "loss": 0.86, "step": 131090 }, { "epoch": 0.9489891202849139, "grad_norm": 0.14875370264053345, "learning_rate": 4.051018118381145e-06, "loss": 0.8776, "step": 131100 }, { "epoch": 0.9490615069455001, "grad_norm": 0.14659325778484344, "learning_rate": 4.050945731720559e-06, "loss": 0.8758, "step": 131110 }, { "epoch": 0.9491338936060862, "grad_norm": 0.15121561288833618, "learning_rate": 4.050873345059972e-06, "loss": 0.8544, "step": 131120 }, { "epoch": 0.9492062802666724, "grad_norm": 0.14305542409420013, "learning_rate": 4.050800958399386e-06, "loss": 0.8744, "step": 131130 }, { "epoch": 0.9492786669272586, "grad_norm": 0.15504080057144165, "learning_rate": 4.0507285717388e-06, "loss": 0.8705, "step": 131140 }, { "epoch": 0.9493510535878449, "grad_norm": 0.14514702558517456, "learning_rate": 4.050656185078214e-06, "loss": 0.8657, "step": 131150 }, { "epoch": 0.9494234402484311, "grad_norm": 0.15616527199745178, "learning_rate": 4.050583798417628e-06, "loss": 0.8626, "step": 131160 }, { "epoch": 0.9494958269090172, "grad_norm": 0.15663272142410278, "learning_rate": 4.050511411757041e-06, "loss": 0.8605, "step": 131170 }, { "epoch": 0.9495682135696034, "grad_norm": 0.1529805064201355, "learning_rate": 4.050439025096456e-06, "loss": 0.8636, "step": 131180 }, { "epoch": 0.9496406002301896, "grad_norm": 0.16288772225379944, "learning_rate": 4.050366638435869e-06, "loss": 0.8525, "step": 131190 }, { "epoch": 0.9497129868907758, "grad_norm": 0.14723965525627136, "learning_rate": 4.050294251775283e-06, "loss": 0.8596, "step": 131200 }, { "epoch": 0.9497853735513619, "grad_norm": 0.14269182085990906, "learning_rate": 4.0502218651146965e-06, "loss": 0.8667, "step": 131210 }, { "epoch": 0.9498577602119481, "grad_norm": 0.16036245226860046, "learning_rate": 4.050149478454111e-06, "loss": 0.8724, "step": 131220 }, { "epoch": 0.9499301468725343, "grad_norm": 0.15207916498184204, "learning_rate": 4.050077091793525e-06, "loss": 0.8688, "step": 131230 }, { "epoch": 0.9500025335331205, "grad_norm": 0.1509644240140915, "learning_rate": 4.050004705132938e-06, "loss": 0.8553, "step": 131240 }, { "epoch": 0.9500749201937067, "grad_norm": 0.14878825843334198, "learning_rate": 4.049932318472352e-06, "loss": 0.8579, "step": 131250 }, { "epoch": 0.9501473068542929, "grad_norm": 0.1610831767320633, "learning_rate": 4.049859931811766e-06, "loss": 0.8732, "step": 131260 }, { "epoch": 0.9502196935148791, "grad_norm": 0.1690327674150467, "learning_rate": 4.04978754515118e-06, "loss": 0.8738, "step": 131270 }, { "epoch": 0.9502920801754653, "grad_norm": 0.16976772248744965, "learning_rate": 4.0497151584905935e-06, "loss": 0.8626, "step": 131280 }, { "epoch": 0.9503644668360515, "grad_norm": 0.1506384015083313, "learning_rate": 4.049642771830007e-06, "loss": 0.8642, "step": 131290 }, { "epoch": 0.9504368534966376, "grad_norm": 0.1462264209985733, "learning_rate": 4.049570385169422e-06, "loss": 0.8646, "step": 131300 }, { "epoch": 0.9505092401572238, "grad_norm": 0.14484186470508575, "learning_rate": 4.049497998508835e-06, "loss": 0.8771, "step": 131310 }, { "epoch": 0.95058162681781, "grad_norm": 0.15840958058834076, "learning_rate": 4.049425611848249e-06, "loss": 0.8607, "step": 131320 }, { "epoch": 0.9506540134783962, "grad_norm": 0.15120358765125275, "learning_rate": 4.0493532251876625e-06, "loss": 0.8766, "step": 131330 }, { "epoch": 0.9507264001389824, "grad_norm": 0.15043680369853973, "learning_rate": 4.049280838527077e-06, "loss": 0.871, "step": 131340 }, { "epoch": 0.9507987867995685, "grad_norm": 0.15595075488090515, "learning_rate": 4.0492084518664905e-06, "loss": 0.8668, "step": 131350 }, { "epoch": 0.9508711734601548, "grad_norm": 0.15385489165782928, "learning_rate": 4.049136065205904e-06, "loss": 0.8748, "step": 131360 }, { "epoch": 0.950943560120741, "grad_norm": 0.19055695831775665, "learning_rate": 4.049063678545318e-06, "loss": 0.8791, "step": 131370 }, { "epoch": 0.9510159467813272, "grad_norm": 0.1542825996875763, "learning_rate": 4.048991291884732e-06, "loss": 0.8667, "step": 131380 }, { "epoch": 0.9510883334419133, "grad_norm": 0.16472730040550232, "learning_rate": 4.048918905224146e-06, "loss": 0.8755, "step": 131390 }, { "epoch": 0.9511607201024995, "grad_norm": 0.17082463204860687, "learning_rate": 4.0488465185635595e-06, "loss": 0.8434, "step": 131400 }, { "epoch": 0.9512331067630857, "grad_norm": 0.1629323959350586, "learning_rate": 4.048774131902973e-06, "loss": 0.8712, "step": 131410 }, { "epoch": 0.9513054934236719, "grad_norm": 0.1467832624912262, "learning_rate": 4.0487017452423875e-06, "loss": 0.8609, "step": 131420 }, { "epoch": 0.951377880084258, "grad_norm": 0.15191030502319336, "learning_rate": 4.048629358581801e-06, "loss": 0.8718, "step": 131430 }, { "epoch": 0.9514502667448442, "grad_norm": 0.17958003282546997, "learning_rate": 4.048556971921215e-06, "loss": 0.8696, "step": 131440 }, { "epoch": 0.9515226534054304, "grad_norm": 0.16682404279708862, "learning_rate": 4.048484585260628e-06, "loss": 0.8676, "step": 131450 }, { "epoch": 0.9515950400660166, "grad_norm": 0.1475543975830078, "learning_rate": 4.048412198600042e-06, "loss": 0.87, "step": 131460 }, { "epoch": 0.9516674267266029, "grad_norm": 0.15572024881839752, "learning_rate": 4.0483398119394565e-06, "loss": 0.8793, "step": 131470 }, { "epoch": 0.951739813387189, "grad_norm": 0.1579056978225708, "learning_rate": 4.04826742527887e-06, "loss": 0.8578, "step": 131480 }, { "epoch": 0.9518122000477752, "grad_norm": 0.1470668464899063, "learning_rate": 4.048195038618284e-06, "loss": 0.8634, "step": 131490 }, { "epoch": 0.9518845867083614, "grad_norm": 0.1533782035112381, "learning_rate": 4.048122651957697e-06, "loss": 0.852, "step": 131500 }, { "epoch": 0.9519569733689476, "grad_norm": 0.16424772143363953, "learning_rate": 4.048050265297112e-06, "loss": 0.8566, "step": 131510 }, { "epoch": 0.9520293600295338, "grad_norm": 0.17163357138633728, "learning_rate": 4.047977878636525e-06, "loss": 0.8657, "step": 131520 }, { "epoch": 0.9521017466901199, "grad_norm": 0.15258784592151642, "learning_rate": 4.047905491975939e-06, "loss": 0.8786, "step": 131530 }, { "epoch": 0.9521741333507061, "grad_norm": 0.15149055421352386, "learning_rate": 4.047833105315353e-06, "loss": 0.8627, "step": 131540 }, { "epoch": 0.9522465200112923, "grad_norm": 0.15331679582595825, "learning_rate": 4.047760718654767e-06, "loss": 0.8613, "step": 131550 }, { "epoch": 0.9523189066718785, "grad_norm": 0.15409405529499054, "learning_rate": 4.047688331994181e-06, "loss": 0.8754, "step": 131560 }, { "epoch": 0.9523912933324646, "grad_norm": 0.16300641000270844, "learning_rate": 4.047615945333594e-06, "loss": 0.8851, "step": 131570 }, { "epoch": 0.9524636799930509, "grad_norm": 0.1475706696510315, "learning_rate": 4.047543558673008e-06, "loss": 0.8686, "step": 131580 }, { "epoch": 0.9525360666536371, "grad_norm": 0.14005255699157715, "learning_rate": 4.047471172012422e-06, "loss": 0.8771, "step": 131590 }, { "epoch": 0.9526084533142233, "grad_norm": 0.152145653963089, "learning_rate": 4.047398785351836e-06, "loss": 0.8652, "step": 131600 }, { "epoch": 0.9526808399748095, "grad_norm": 0.15361985564231873, "learning_rate": 4.04732639869125e-06, "loss": 0.8609, "step": 131610 }, { "epoch": 0.9527532266353956, "grad_norm": 0.15738271176815033, "learning_rate": 4.047254012030663e-06, "loss": 0.8731, "step": 131620 }, { "epoch": 0.9528256132959818, "grad_norm": 0.15454953908920288, "learning_rate": 4.047181625370077e-06, "loss": 0.8549, "step": 131630 }, { "epoch": 0.952897999956568, "grad_norm": 0.17715829610824585, "learning_rate": 4.0471092387094905e-06, "loss": 0.864, "step": 131640 }, { "epoch": 0.9529703866171542, "grad_norm": 0.16041779518127441, "learning_rate": 4.047036852048904e-06, "loss": 0.8823, "step": 131650 }, { "epoch": 0.9530427732777403, "grad_norm": 0.1558634638786316, "learning_rate": 4.0469644653883186e-06, "loss": 0.8816, "step": 131660 }, { "epoch": 0.9531151599383265, "grad_norm": 0.16207058727741241, "learning_rate": 4.046892078727732e-06, "loss": 0.8568, "step": 131670 }, { "epoch": 0.9531875465989128, "grad_norm": 0.16530075669288635, "learning_rate": 4.046819692067146e-06, "loss": 0.8695, "step": 131680 }, { "epoch": 0.953259933259499, "grad_norm": 0.16175314784049988, "learning_rate": 4.046747305406559e-06, "loss": 0.8662, "step": 131690 }, { "epoch": 0.9533323199200852, "grad_norm": 0.1511378288269043, "learning_rate": 4.046674918745974e-06, "loss": 0.8655, "step": 131700 }, { "epoch": 0.9534047065806713, "grad_norm": 0.15488837659358978, "learning_rate": 4.0466025320853875e-06, "loss": 0.8634, "step": 131710 }, { "epoch": 0.9534770932412575, "grad_norm": 0.15649256110191345, "learning_rate": 4.046530145424801e-06, "loss": 0.8821, "step": 131720 }, { "epoch": 0.9535494799018437, "grad_norm": 0.14642643928527832, "learning_rate": 4.046457758764215e-06, "loss": 0.8704, "step": 131730 }, { "epoch": 0.9536218665624299, "grad_norm": 0.15652616322040558, "learning_rate": 4.046385372103629e-06, "loss": 0.8604, "step": 131740 }, { "epoch": 0.953694253223016, "grad_norm": 0.15622968971729279, "learning_rate": 4.046312985443043e-06, "loss": 0.8704, "step": 131750 }, { "epoch": 0.9537666398836022, "grad_norm": 0.1728099137544632, "learning_rate": 4.0462405987824564e-06, "loss": 0.8708, "step": 131760 }, { "epoch": 0.9538390265441884, "grad_norm": 0.15809345245361328, "learning_rate": 4.04616821212187e-06, "loss": 0.8614, "step": 131770 }, { "epoch": 0.9539114132047746, "grad_norm": 0.1720665991306305, "learning_rate": 4.0460958254612845e-06, "loss": 0.861, "step": 131780 }, { "epoch": 0.9539837998653609, "grad_norm": 0.15099675953388214, "learning_rate": 4.046023438800698e-06, "loss": 0.8784, "step": 131790 }, { "epoch": 0.954056186525947, "grad_norm": 0.14246854186058044, "learning_rate": 4.045951052140112e-06, "loss": 0.8659, "step": 131800 }, { "epoch": 0.9541285731865332, "grad_norm": 0.15169619023799896, "learning_rate": 4.045878665479525e-06, "loss": 0.8672, "step": 131810 }, { "epoch": 0.9542009598471194, "grad_norm": 0.15939058363437653, "learning_rate": 4.04580627881894e-06, "loss": 0.8608, "step": 131820 }, { "epoch": 0.9542733465077056, "grad_norm": 0.16501100361347198, "learning_rate": 4.0457338921583534e-06, "loss": 0.857, "step": 131830 }, { "epoch": 0.9543457331682917, "grad_norm": 1.6813664436340332, "learning_rate": 4.045661505497767e-06, "loss": 0.8666, "step": 131840 }, { "epoch": 0.9544181198288779, "grad_norm": 0.14207109808921814, "learning_rate": 4.045589118837181e-06, "loss": 0.8631, "step": 131850 }, { "epoch": 0.9544905064894641, "grad_norm": 0.14771947264671326, "learning_rate": 4.045516732176595e-06, "loss": 0.8668, "step": 131860 }, { "epoch": 0.9545628931500503, "grad_norm": 0.15695475041866302, "learning_rate": 4.045444345516009e-06, "loss": 0.8629, "step": 131870 }, { "epoch": 0.9546352798106365, "grad_norm": 0.15353751182556152, "learning_rate": 4.045371958855422e-06, "loss": 0.858, "step": 131880 }, { "epoch": 0.9547076664712227, "grad_norm": 0.15183933079242706, "learning_rate": 4.045299572194836e-06, "loss": 0.8694, "step": 131890 }, { "epoch": 0.9547800531318089, "grad_norm": 0.16019755601882935, "learning_rate": 4.0452271855342504e-06, "loss": 0.8774, "step": 131900 }, { "epoch": 0.9548524397923951, "grad_norm": 0.15118776261806488, "learning_rate": 4.045154798873664e-06, "loss": 0.8733, "step": 131910 }, { "epoch": 0.9549248264529813, "grad_norm": 0.15187504887580872, "learning_rate": 4.045082412213078e-06, "loss": 0.8695, "step": 131920 }, { "epoch": 0.9549972131135674, "grad_norm": 0.15506109595298767, "learning_rate": 4.045010025552491e-06, "loss": 0.8637, "step": 131930 }, { "epoch": 0.9550695997741536, "grad_norm": 0.14858941733837128, "learning_rate": 4.044937638891906e-06, "loss": 0.8727, "step": 131940 }, { "epoch": 0.9551419864347398, "grad_norm": 0.15059681236743927, "learning_rate": 4.044865252231319e-06, "loss": 0.8503, "step": 131950 }, { "epoch": 0.955214373095326, "grad_norm": 0.14260925352573395, "learning_rate": 4.044792865570733e-06, "loss": 0.8507, "step": 131960 }, { "epoch": 0.9552867597559122, "grad_norm": 0.16660186648368835, "learning_rate": 4.044720478910147e-06, "loss": 0.8809, "step": 131970 }, { "epoch": 0.9553591464164983, "grad_norm": 0.1452503353357315, "learning_rate": 4.044648092249561e-06, "loss": 0.8595, "step": 131980 }, { "epoch": 0.9554315330770845, "grad_norm": 0.14808453619480133, "learning_rate": 4.044575705588975e-06, "loss": 0.8595, "step": 131990 }, { "epoch": 0.9555039197376708, "grad_norm": 0.14562194049358368, "learning_rate": 4.044503318928388e-06, "loss": 0.8679, "step": 132000 }, { "epoch": 0.955576306398257, "grad_norm": 0.1467532068490982, "learning_rate": 4.044430932267802e-06, "loss": 0.8722, "step": 132010 }, { "epoch": 0.9556486930588431, "grad_norm": 0.15089406073093414, "learning_rate": 4.044358545607216e-06, "loss": 0.8647, "step": 132020 }, { "epoch": 0.9557210797194293, "grad_norm": 0.1548631340265274, "learning_rate": 4.04428615894663e-06, "loss": 0.8675, "step": 132030 }, { "epoch": 0.9557934663800155, "grad_norm": 0.15700556337833405, "learning_rate": 4.044213772286044e-06, "loss": 0.8643, "step": 132040 }, { "epoch": 0.9558658530406017, "grad_norm": 0.1600215882062912, "learning_rate": 4.044141385625457e-06, "loss": 0.8749, "step": 132050 }, { "epoch": 0.9559382397011879, "grad_norm": 0.1461002379655838, "learning_rate": 4.044068998964871e-06, "loss": 0.8759, "step": 132060 }, { "epoch": 0.956010626361774, "grad_norm": 0.1531788557767868, "learning_rate": 4.043996612304285e-06, "loss": 0.8736, "step": 132070 }, { "epoch": 0.9560830130223602, "grad_norm": 0.15142254531383514, "learning_rate": 4.043924225643699e-06, "loss": 0.857, "step": 132080 }, { "epoch": 0.9561553996829464, "grad_norm": 0.1594085544347763, "learning_rate": 4.0438518389831125e-06, "loss": 0.87, "step": 132090 }, { "epoch": 0.9562277863435326, "grad_norm": 0.16034048795700073, "learning_rate": 4.043779452322526e-06, "loss": 0.8755, "step": 132100 }, { "epoch": 0.9563001730041188, "grad_norm": 0.1539410948753357, "learning_rate": 4.043707065661941e-06, "loss": 0.874, "step": 132110 }, { "epoch": 0.956372559664705, "grad_norm": 0.15013930201530457, "learning_rate": 4.043634679001354e-06, "loss": 0.8666, "step": 132120 }, { "epoch": 0.9564449463252912, "grad_norm": 0.15976254642009735, "learning_rate": 4.043562292340768e-06, "loss": 0.8697, "step": 132130 }, { "epoch": 0.9565173329858774, "grad_norm": 0.16351239383220673, "learning_rate": 4.0434899056801815e-06, "loss": 0.8731, "step": 132140 }, { "epoch": 0.9565897196464636, "grad_norm": 0.15584561228752136, "learning_rate": 4.043417519019596e-06, "loss": 0.8833, "step": 132150 }, { "epoch": 0.9566621063070497, "grad_norm": 0.1562098264694214, "learning_rate": 4.043345132359009e-06, "loss": 0.872, "step": 132160 }, { "epoch": 0.9567344929676359, "grad_norm": 0.156274676322937, "learning_rate": 4.043272745698423e-06, "loss": 0.8708, "step": 132170 }, { "epoch": 0.9568068796282221, "grad_norm": 0.22564736008644104, "learning_rate": 4.043200359037837e-06, "loss": 0.8636, "step": 132180 }, { "epoch": 0.9568792662888083, "grad_norm": 0.1602523922920227, "learning_rate": 4.04312797237725e-06, "loss": 0.8775, "step": 132190 }, { "epoch": 0.9569516529493944, "grad_norm": 0.15143248438835144, "learning_rate": 4.043055585716664e-06, "loss": 0.8635, "step": 132200 }, { "epoch": 0.9570240396099807, "grad_norm": 0.14714032411575317, "learning_rate": 4.0429831990560785e-06, "loss": 0.8642, "step": 132210 }, { "epoch": 0.9570964262705669, "grad_norm": 0.1613510251045227, "learning_rate": 4.042910812395492e-06, "loss": 0.8645, "step": 132220 }, { "epoch": 0.9571688129311531, "grad_norm": 0.17830723524093628, "learning_rate": 4.042838425734906e-06, "loss": 0.8672, "step": 132230 }, { "epoch": 0.9572411995917393, "grad_norm": 0.15344306826591492, "learning_rate": 4.042766039074319e-06, "loss": 0.8679, "step": 132240 }, { "epoch": 0.9573135862523254, "grad_norm": 0.15479233860969543, "learning_rate": 4.042693652413733e-06, "loss": 0.8674, "step": 132250 }, { "epoch": 0.9573859729129116, "grad_norm": 0.1529737114906311, "learning_rate": 4.042621265753147e-06, "loss": 0.8539, "step": 132260 }, { "epoch": 0.9574583595734978, "grad_norm": 0.14475677907466888, "learning_rate": 4.042548879092561e-06, "loss": 0.8693, "step": 132270 }, { "epoch": 0.957530746234084, "grad_norm": 0.167455792427063, "learning_rate": 4.042476492431975e-06, "loss": 0.8576, "step": 132280 }, { "epoch": 0.9576031328946701, "grad_norm": 0.14674553275108337, "learning_rate": 4.042404105771388e-06, "loss": 0.8753, "step": 132290 }, { "epoch": 0.9576755195552563, "grad_norm": 0.20534364879131317, "learning_rate": 4.042331719110803e-06, "loss": 0.8661, "step": 132300 }, { "epoch": 0.9577479062158425, "grad_norm": 0.16672727465629578, "learning_rate": 4.042259332450216e-06, "loss": 0.8645, "step": 132310 }, { "epoch": 0.9578202928764288, "grad_norm": 0.14698807895183563, "learning_rate": 4.04218694578963e-06, "loss": 0.8645, "step": 132320 }, { "epoch": 0.957892679537015, "grad_norm": 0.15321475267410278, "learning_rate": 4.0421145591290436e-06, "loss": 0.8692, "step": 132330 }, { "epoch": 0.9579650661976011, "grad_norm": 0.15178924798965454, "learning_rate": 4.042042172468458e-06, "loss": 0.8705, "step": 132340 }, { "epoch": 0.9580374528581873, "grad_norm": 0.701418936252594, "learning_rate": 4.041969785807872e-06, "loss": 0.8685, "step": 132350 }, { "epoch": 0.9581098395187735, "grad_norm": 0.15373194217681885, "learning_rate": 4.041897399147285e-06, "loss": 0.867, "step": 132360 }, { "epoch": 0.9581822261793597, "grad_norm": 0.15666644275188446, "learning_rate": 4.041825012486699e-06, "loss": 0.8715, "step": 132370 }, { "epoch": 0.9582546128399458, "grad_norm": 0.14554648101329803, "learning_rate": 4.041752625826113e-06, "loss": 0.8612, "step": 132380 }, { "epoch": 0.958326999500532, "grad_norm": 0.15833979845046997, "learning_rate": 4.041680239165527e-06, "loss": 0.8694, "step": 132390 }, { "epoch": 0.9583993861611182, "grad_norm": 0.1463756412267685, "learning_rate": 4.0416078525049406e-06, "loss": 0.8691, "step": 132400 }, { "epoch": 0.9584717728217044, "grad_norm": 0.14751847088336945, "learning_rate": 4.041535465844354e-06, "loss": 0.8666, "step": 132410 }, { "epoch": 0.9585441594822907, "grad_norm": 0.16007982194423676, "learning_rate": 4.041463079183769e-06, "loss": 0.8766, "step": 132420 }, { "epoch": 0.9586165461428768, "grad_norm": 0.1559884399175644, "learning_rate": 4.041390692523182e-06, "loss": 0.857, "step": 132430 }, { "epoch": 0.958688932803463, "grad_norm": 0.16903673112392426, "learning_rate": 4.041318305862596e-06, "loss": 0.8728, "step": 132440 }, { "epoch": 0.9587613194640492, "grad_norm": 0.15044556558132172, "learning_rate": 4.0412459192020095e-06, "loss": 0.8705, "step": 132450 }, { "epoch": 0.9588337061246354, "grad_norm": 0.1696968823671341, "learning_rate": 4.041173532541424e-06, "loss": 0.8593, "step": 132460 }, { "epoch": 0.9589060927852215, "grad_norm": 0.1612185686826706, "learning_rate": 4.0411011458808376e-06, "loss": 0.8674, "step": 132470 }, { "epoch": 0.9589784794458077, "grad_norm": 0.1500626504421234, "learning_rate": 4.041028759220251e-06, "loss": 0.873, "step": 132480 }, { "epoch": 0.9590508661063939, "grad_norm": 0.145673006772995, "learning_rate": 4.040956372559665e-06, "loss": 0.8659, "step": 132490 }, { "epoch": 0.9591232527669801, "grad_norm": 0.15620875358581543, "learning_rate": 4.040883985899079e-06, "loss": 0.8645, "step": 132500 }, { "epoch": 0.9591956394275662, "grad_norm": 0.16961318254470825, "learning_rate": 4.040811599238493e-06, "loss": 0.8722, "step": 132510 }, { "epoch": 0.9592680260881524, "grad_norm": 0.14998477697372437, "learning_rate": 4.0407392125779065e-06, "loss": 0.8499, "step": 132520 }, { "epoch": 0.9593404127487387, "grad_norm": 0.1547980010509491, "learning_rate": 4.04066682591732e-06, "loss": 0.8705, "step": 132530 }, { "epoch": 0.9594127994093249, "grad_norm": 0.14744049310684204, "learning_rate": 4.040594439256735e-06, "loss": 0.8626, "step": 132540 }, { "epoch": 0.9594851860699111, "grad_norm": 0.1580532044172287, "learning_rate": 4.040522052596148e-06, "loss": 0.8651, "step": 132550 }, { "epoch": 0.9595575727304972, "grad_norm": 0.14146533608436584, "learning_rate": 4.040449665935562e-06, "loss": 0.8675, "step": 132560 }, { "epoch": 0.9596299593910834, "grad_norm": 0.15694855153560638, "learning_rate": 4.0403772792749754e-06, "loss": 0.8776, "step": 132570 }, { "epoch": 0.9597023460516696, "grad_norm": 0.15668827295303345, "learning_rate": 4.04030489261439e-06, "loss": 0.8643, "step": 132580 }, { "epoch": 0.9597747327122558, "grad_norm": 0.15416784584522247, "learning_rate": 4.0402325059538035e-06, "loss": 0.8595, "step": 132590 }, { "epoch": 0.959847119372842, "grad_norm": 0.15903611481189728, "learning_rate": 4.040160119293217e-06, "loss": 0.8715, "step": 132600 }, { "epoch": 0.9599195060334281, "grad_norm": 0.14831218123435974, "learning_rate": 4.040087732632631e-06, "loss": 0.8719, "step": 132610 }, { "epoch": 0.9599918926940143, "grad_norm": 0.16342884302139282, "learning_rate": 4.040015345972045e-06, "loss": 0.8718, "step": 132620 }, { "epoch": 0.9600642793546005, "grad_norm": 0.16396649181842804, "learning_rate": 4.039942959311459e-06, "loss": 0.8556, "step": 132630 }, { "epoch": 0.9601366660151868, "grad_norm": 0.16489477455615997, "learning_rate": 4.0398705726508724e-06, "loss": 0.8738, "step": 132640 }, { "epoch": 0.960209052675773, "grad_norm": 0.14648328721523285, "learning_rate": 4.039798185990286e-06, "loss": 0.882, "step": 132650 }, { "epoch": 0.9602814393363591, "grad_norm": 0.15724149346351624, "learning_rate": 4.0397257993297005e-06, "loss": 0.8638, "step": 132660 }, { "epoch": 0.9603538259969453, "grad_norm": 0.15346843004226685, "learning_rate": 4.039653412669114e-06, "loss": 0.8824, "step": 132670 }, { "epoch": 0.9604262126575315, "grad_norm": 0.15470701456069946, "learning_rate": 4.039581026008528e-06, "loss": 0.8663, "step": 132680 }, { "epoch": 0.9604985993181177, "grad_norm": 0.14876993000507355, "learning_rate": 4.039508639347941e-06, "loss": 0.8618, "step": 132690 }, { "epoch": 0.9605709859787038, "grad_norm": 0.14748069643974304, "learning_rate": 4.039436252687355e-06, "loss": 0.8742, "step": 132700 }, { "epoch": 0.96064337263929, "grad_norm": 0.14472724497318268, "learning_rate": 4.039363866026769e-06, "loss": 0.8591, "step": 132710 }, { "epoch": 0.9607157592998762, "grad_norm": 0.16464345157146454, "learning_rate": 4.039291479366182e-06, "loss": 0.868, "step": 132720 }, { "epoch": 0.9607881459604624, "grad_norm": 0.15630069375038147, "learning_rate": 4.039219092705597e-06, "loss": 0.8675, "step": 132730 }, { "epoch": 0.9608605326210486, "grad_norm": 0.1542377769947052, "learning_rate": 4.03914670604501e-06, "loss": 0.8643, "step": 132740 }, { "epoch": 0.9609329192816348, "grad_norm": 0.15142075717449188, "learning_rate": 4.039074319384424e-06, "loss": 0.8583, "step": 132750 }, { "epoch": 0.961005305942221, "grad_norm": 0.14970579743385315, "learning_rate": 4.0390019327238375e-06, "loss": 0.8726, "step": 132760 }, { "epoch": 0.9610776926028072, "grad_norm": 0.1593393087387085, "learning_rate": 4.038929546063252e-06, "loss": 0.8746, "step": 132770 }, { "epoch": 0.9611500792633934, "grad_norm": 0.154205784201622, "learning_rate": 4.038857159402666e-06, "loss": 0.8675, "step": 132780 }, { "epoch": 0.9612224659239795, "grad_norm": 0.15986141562461853, "learning_rate": 4.038784772742079e-06, "loss": 0.8697, "step": 132790 }, { "epoch": 0.9612948525845657, "grad_norm": 0.1516096591949463, "learning_rate": 4.038712386081493e-06, "loss": 0.8603, "step": 132800 }, { "epoch": 0.9613672392451519, "grad_norm": 0.14772486686706543, "learning_rate": 4.038639999420907e-06, "loss": 0.8557, "step": 132810 }, { "epoch": 0.9614396259057381, "grad_norm": 0.152225062251091, "learning_rate": 4.038567612760321e-06, "loss": 0.8603, "step": 132820 }, { "epoch": 0.9615120125663242, "grad_norm": 0.1596631109714508, "learning_rate": 4.0384952260997345e-06, "loss": 0.8656, "step": 132830 }, { "epoch": 0.9615843992269104, "grad_norm": 0.15353646874427795, "learning_rate": 4.038422839439148e-06, "loss": 0.8606, "step": 132840 }, { "epoch": 0.9616567858874967, "grad_norm": 0.15093198418617249, "learning_rate": 4.038350452778563e-06, "loss": 0.8651, "step": 132850 }, { "epoch": 0.9617291725480829, "grad_norm": 0.15121009945869446, "learning_rate": 4.038278066117976e-06, "loss": 0.8747, "step": 132860 }, { "epoch": 0.961801559208669, "grad_norm": 0.142518550157547, "learning_rate": 4.03820567945739e-06, "loss": 0.8627, "step": 132870 }, { "epoch": 0.9618739458692552, "grad_norm": 0.14168381690979004, "learning_rate": 4.0381332927968035e-06, "loss": 0.872, "step": 132880 }, { "epoch": 0.9619463325298414, "grad_norm": 0.16068200767040253, "learning_rate": 4.038060906136217e-06, "loss": 0.8666, "step": 132890 }, { "epoch": 0.9620187191904276, "grad_norm": 0.15603065490722656, "learning_rate": 4.0379885194756315e-06, "loss": 0.8768, "step": 132900 }, { "epoch": 0.9620911058510138, "grad_norm": 0.16330550611019135, "learning_rate": 4.037916132815045e-06, "loss": 0.8713, "step": 132910 }, { "epoch": 0.9621634925115999, "grad_norm": 0.15388086438179016, "learning_rate": 4.037843746154459e-06, "loss": 0.8824, "step": 132920 }, { "epoch": 0.9622358791721861, "grad_norm": 0.1486055701971054, "learning_rate": 4.037771359493872e-06, "loss": 0.8652, "step": 132930 }, { "epoch": 0.9623082658327723, "grad_norm": 0.16068662703037262, "learning_rate": 4.037698972833287e-06, "loss": 0.8553, "step": 132940 }, { "epoch": 0.9623806524933586, "grad_norm": 0.14844128489494324, "learning_rate": 4.0376265861727005e-06, "loss": 0.8692, "step": 132950 }, { "epoch": 0.9624530391539448, "grad_norm": 0.1547292321920395, "learning_rate": 4.037554199512114e-06, "loss": 0.8598, "step": 132960 }, { "epoch": 0.9625254258145309, "grad_norm": 0.14296914637088776, "learning_rate": 4.037481812851528e-06, "loss": 0.8745, "step": 132970 }, { "epoch": 0.9625978124751171, "grad_norm": 0.1457308977842331, "learning_rate": 4.037409426190942e-06, "loss": 0.8685, "step": 132980 }, { "epoch": 0.9626701991357033, "grad_norm": 0.16108804941177368, "learning_rate": 4.037337039530356e-06, "loss": 0.8675, "step": 132990 }, { "epoch": 0.9627425857962895, "grad_norm": 0.1842719167470932, "learning_rate": 4.037264652869769e-06, "loss": 0.8723, "step": 133000 }, { "epoch": 0.9628149724568756, "grad_norm": 0.1494869738817215, "learning_rate": 4.037192266209183e-06, "loss": 0.8629, "step": 133010 }, { "epoch": 0.9628873591174618, "grad_norm": 0.1545685976743698, "learning_rate": 4.0371198795485975e-06, "loss": 0.8732, "step": 133020 }, { "epoch": 0.962959745778048, "grad_norm": 0.1837606430053711, "learning_rate": 4.037047492888011e-06, "loss": 0.8715, "step": 133030 }, { "epoch": 0.9630321324386342, "grad_norm": 0.15043866634368896, "learning_rate": 4.036975106227425e-06, "loss": 0.8642, "step": 133040 }, { "epoch": 0.9631045190992203, "grad_norm": 0.1640947014093399, "learning_rate": 4.036902719566838e-06, "loss": 0.8633, "step": 133050 }, { "epoch": 0.9631769057598066, "grad_norm": 0.16168645024299622, "learning_rate": 4.036830332906253e-06, "loss": 0.8725, "step": 133060 }, { "epoch": 0.9632492924203928, "grad_norm": 0.16159774363040924, "learning_rate": 4.036757946245666e-06, "loss": 0.8642, "step": 133070 }, { "epoch": 0.963321679080979, "grad_norm": 0.19764064252376556, "learning_rate": 4.03668555958508e-06, "loss": 0.8708, "step": 133080 }, { "epoch": 0.9633940657415652, "grad_norm": 0.16055376827716827, "learning_rate": 4.036613172924494e-06, "loss": 0.8577, "step": 133090 }, { "epoch": 0.9634664524021513, "grad_norm": 0.14640270173549652, "learning_rate": 4.036540786263908e-06, "loss": 0.8696, "step": 133100 }, { "epoch": 0.9635388390627375, "grad_norm": 0.18850268423557281, "learning_rate": 4.036468399603322e-06, "loss": 0.8629, "step": 133110 }, { "epoch": 0.9636112257233237, "grad_norm": 0.16357038915157318, "learning_rate": 4.036396012942735e-06, "loss": 0.8767, "step": 133120 }, { "epoch": 0.9636836123839099, "grad_norm": 0.17244000732898712, "learning_rate": 4.036323626282149e-06, "loss": 0.8624, "step": 133130 }, { "epoch": 0.963755999044496, "grad_norm": 0.15212216973304749, "learning_rate": 4.036251239621563e-06, "loss": 0.8561, "step": 133140 }, { "epoch": 0.9638283857050822, "grad_norm": 0.1720954179763794, "learning_rate": 4.036178852960977e-06, "loss": 0.8683, "step": 133150 }, { "epoch": 0.9639007723656684, "grad_norm": 0.21422089636325836, "learning_rate": 4.036106466300391e-06, "loss": 0.8845, "step": 133160 }, { "epoch": 0.9639731590262547, "grad_norm": 0.15769261121749878, "learning_rate": 4.036034079639804e-06, "loss": 0.8543, "step": 133170 }, { "epoch": 0.9640455456868409, "grad_norm": 0.15512368083000183, "learning_rate": 4.035961692979219e-06, "loss": 0.8571, "step": 133180 }, { "epoch": 0.964117932347427, "grad_norm": 0.16007642447948456, "learning_rate": 4.035889306318632e-06, "loss": 0.8621, "step": 133190 }, { "epoch": 0.9641903190080132, "grad_norm": 0.16909575462341309, "learning_rate": 4.035816919658046e-06, "loss": 0.8751, "step": 133200 }, { "epoch": 0.9642627056685994, "grad_norm": 0.158931702375412, "learning_rate": 4.0357445329974596e-06, "loss": 0.8655, "step": 133210 }, { "epoch": 0.9643350923291856, "grad_norm": 0.16062036156654358, "learning_rate": 4.035672146336873e-06, "loss": 0.8705, "step": 133220 }, { "epoch": 0.9644074789897717, "grad_norm": 0.15879908204078674, "learning_rate": 4.035599759676287e-06, "loss": 0.8609, "step": 133230 }, { "epoch": 0.9644798656503579, "grad_norm": 0.15936613082885742, "learning_rate": 4.0355273730157e-06, "loss": 0.8663, "step": 133240 }, { "epoch": 0.9645522523109441, "grad_norm": 0.159668430685997, "learning_rate": 4.035454986355115e-06, "loss": 0.8778, "step": 133250 }, { "epoch": 0.9646246389715303, "grad_norm": 0.15538303554058075, "learning_rate": 4.0353825996945285e-06, "loss": 0.8709, "step": 133260 }, { "epoch": 0.9646970256321166, "grad_norm": 0.19383670389652252, "learning_rate": 4.035310213033942e-06, "loss": 0.8635, "step": 133270 }, { "epoch": 0.9647694122927027, "grad_norm": 0.14985784888267517, "learning_rate": 4.035237826373356e-06, "loss": 0.8639, "step": 133280 }, { "epoch": 0.9648417989532889, "grad_norm": 0.15540927648544312, "learning_rate": 4.03516543971277e-06, "loss": 0.8737, "step": 133290 }, { "epoch": 0.9649141856138751, "grad_norm": 0.16484995186328888, "learning_rate": 4.035093053052184e-06, "loss": 0.8722, "step": 133300 }, { "epoch": 0.9649865722744613, "grad_norm": 0.15824109315872192, "learning_rate": 4.0350206663915974e-06, "loss": 0.8558, "step": 133310 }, { "epoch": 0.9650589589350475, "grad_norm": 0.16974005103111267, "learning_rate": 4.034948279731011e-06, "loss": 0.8753, "step": 133320 }, { "epoch": 0.9651313455956336, "grad_norm": 0.1548565924167633, "learning_rate": 4.0348758930704255e-06, "loss": 0.872, "step": 133330 }, { "epoch": 0.9652037322562198, "grad_norm": 0.16429255902767181, "learning_rate": 4.034803506409839e-06, "loss": 0.8556, "step": 133340 }, { "epoch": 0.965276118916806, "grad_norm": 0.16265404224395752, "learning_rate": 4.034731119749253e-06, "loss": 0.8644, "step": 133350 }, { "epoch": 0.9653485055773922, "grad_norm": 0.15244075655937195, "learning_rate": 4.034658733088666e-06, "loss": 0.8821, "step": 133360 }, { "epoch": 0.9654208922379783, "grad_norm": 0.14360937476158142, "learning_rate": 4.034586346428081e-06, "loss": 0.8896, "step": 133370 }, { "epoch": 0.9654932788985646, "grad_norm": 0.22526615858078003, "learning_rate": 4.0345139597674944e-06, "loss": 0.8681, "step": 133380 }, { "epoch": 0.9655656655591508, "grad_norm": 0.14955149590969086, "learning_rate": 4.034441573106908e-06, "loss": 0.8433, "step": 133390 }, { "epoch": 0.965638052219737, "grad_norm": 0.1530320793390274, "learning_rate": 4.034369186446322e-06, "loss": 0.8705, "step": 133400 }, { "epoch": 0.9657104388803232, "grad_norm": 0.15504902601242065, "learning_rate": 4.034296799785736e-06, "loss": 0.8548, "step": 133410 }, { "epoch": 0.9657828255409093, "grad_norm": 0.1458277404308319, "learning_rate": 4.03422441312515e-06, "loss": 0.8773, "step": 133420 }, { "epoch": 0.9658552122014955, "grad_norm": 0.14981764554977417, "learning_rate": 4.034152026464563e-06, "loss": 0.8849, "step": 133430 }, { "epoch": 0.9659275988620817, "grad_norm": 0.15874448418617249, "learning_rate": 4.034079639803977e-06, "loss": 0.8573, "step": 133440 }, { "epoch": 0.9659999855226679, "grad_norm": 0.1638738363981247, "learning_rate": 4.0340072531433914e-06, "loss": 0.859, "step": 133450 }, { "epoch": 0.966072372183254, "grad_norm": 0.15449586510658264, "learning_rate": 4.033934866482805e-06, "loss": 0.8728, "step": 133460 }, { "epoch": 0.9661447588438402, "grad_norm": 0.148344486951828, "learning_rate": 4.033862479822219e-06, "loss": 0.8585, "step": 133470 }, { "epoch": 0.9662171455044264, "grad_norm": 0.14319495856761932, "learning_rate": 4.033790093161632e-06, "loss": 0.8753, "step": 133480 }, { "epoch": 0.9662895321650127, "grad_norm": 0.16567128896713257, "learning_rate": 4.033717706501046e-06, "loss": 0.8751, "step": 133490 }, { "epoch": 0.9663619188255989, "grad_norm": 0.14397595822811127, "learning_rate": 4.03364531984046e-06, "loss": 0.8629, "step": 133500 }, { "epoch": 0.966434305486185, "grad_norm": 0.15650872886180878, "learning_rate": 4.033572933179874e-06, "loss": 0.8647, "step": 133510 }, { "epoch": 0.9665066921467712, "grad_norm": 0.15653972327709198, "learning_rate": 4.033500546519288e-06, "loss": 0.8722, "step": 133520 }, { "epoch": 0.9665790788073574, "grad_norm": 0.15006305277347565, "learning_rate": 4.033428159858701e-06, "loss": 0.8495, "step": 133530 }, { "epoch": 0.9666514654679436, "grad_norm": 0.14864414930343628, "learning_rate": 4.033355773198116e-06, "loss": 0.8673, "step": 133540 }, { "epoch": 0.9667238521285297, "grad_norm": 0.14984916150569916, "learning_rate": 4.033283386537529e-06, "loss": 0.8704, "step": 133550 }, { "epoch": 0.9667962387891159, "grad_norm": 0.14670242369174957, "learning_rate": 4.033210999876943e-06, "loss": 0.8673, "step": 133560 }, { "epoch": 0.9668686254497021, "grad_norm": 0.1624784618616104, "learning_rate": 4.0331386132163565e-06, "loss": 0.863, "step": 133570 }, { "epoch": 0.9669410121102883, "grad_norm": 0.1424523890018463, "learning_rate": 4.033066226555771e-06, "loss": 0.8602, "step": 133580 }, { "epoch": 0.9670133987708746, "grad_norm": 0.17853686213493347, "learning_rate": 4.032993839895185e-06, "loss": 0.8608, "step": 133590 }, { "epoch": 0.9670857854314607, "grad_norm": 0.15653735399246216, "learning_rate": 4.032921453234598e-06, "loss": 0.8705, "step": 133600 }, { "epoch": 0.9671581720920469, "grad_norm": 0.14804360270500183, "learning_rate": 4.032849066574012e-06, "loss": 0.8701, "step": 133610 }, { "epoch": 0.9672305587526331, "grad_norm": 0.1479026824235916, "learning_rate": 4.032776679913426e-06, "loss": 0.8601, "step": 133620 }, { "epoch": 0.9673029454132193, "grad_norm": 0.15589579939842224, "learning_rate": 4.03270429325284e-06, "loss": 0.8698, "step": 133630 }, { "epoch": 0.9673753320738054, "grad_norm": 0.14816348254680634, "learning_rate": 4.0326319065922535e-06, "loss": 0.8706, "step": 133640 }, { "epoch": 0.9674477187343916, "grad_norm": 0.15198203921318054, "learning_rate": 4.032559519931667e-06, "loss": 0.8615, "step": 133650 }, { "epoch": 0.9675201053949778, "grad_norm": 0.16135896742343903, "learning_rate": 4.032487133271082e-06, "loss": 0.8692, "step": 133660 }, { "epoch": 0.967592492055564, "grad_norm": 0.1466723084449768, "learning_rate": 4.032414746610495e-06, "loss": 0.8766, "step": 133670 }, { "epoch": 0.9676648787161501, "grad_norm": 0.1564931869506836, "learning_rate": 4.032342359949909e-06, "loss": 0.8609, "step": 133680 }, { "epoch": 0.9677372653767363, "grad_norm": 0.18006184697151184, "learning_rate": 4.0322699732893225e-06, "loss": 0.8721, "step": 133690 }, { "epoch": 0.9678096520373226, "grad_norm": 0.1543225795030594, "learning_rate": 4.032197586628737e-06, "loss": 0.8594, "step": 133700 }, { "epoch": 0.9678820386979088, "grad_norm": 0.14942817389965057, "learning_rate": 4.0321251999681505e-06, "loss": 0.8686, "step": 133710 }, { "epoch": 0.967954425358495, "grad_norm": 0.1509200930595398, "learning_rate": 4.032052813307564e-06, "loss": 0.8603, "step": 133720 }, { "epoch": 0.9680268120190811, "grad_norm": 0.20642779767513275, "learning_rate": 4.031980426646978e-06, "loss": 0.8635, "step": 133730 }, { "epoch": 0.9680991986796673, "grad_norm": 0.1582535356283188, "learning_rate": 4.031908039986392e-06, "loss": 0.8653, "step": 133740 }, { "epoch": 0.9681715853402535, "grad_norm": 0.15433159470558167, "learning_rate": 4.031835653325805e-06, "loss": 0.866, "step": 133750 }, { "epoch": 0.9682439720008397, "grad_norm": 0.14288122951984406, "learning_rate": 4.031763266665219e-06, "loss": 0.8631, "step": 133760 }, { "epoch": 0.9683163586614258, "grad_norm": 0.14594586193561554, "learning_rate": 4.031690880004633e-06, "loss": 0.8562, "step": 133770 }, { "epoch": 0.968388745322012, "grad_norm": 0.1446864902973175, "learning_rate": 4.031618493344047e-06, "loss": 0.8636, "step": 133780 }, { "epoch": 0.9684611319825982, "grad_norm": 0.15992321074008942, "learning_rate": 4.03154610668346e-06, "loss": 0.8636, "step": 133790 }, { "epoch": 0.9685335186431845, "grad_norm": 0.15092787146568298, "learning_rate": 4.031473720022874e-06, "loss": 0.8564, "step": 133800 }, { "epoch": 0.9686059053037707, "grad_norm": 0.1624569296836853, "learning_rate": 4.031401333362288e-06, "loss": 0.8754, "step": 133810 }, { "epoch": 0.9686782919643568, "grad_norm": 0.14972303807735443, "learning_rate": 4.031328946701702e-06, "loss": 0.852, "step": 133820 }, { "epoch": 0.968750678624943, "grad_norm": 0.1559324413537979, "learning_rate": 4.031256560041116e-06, "loss": 0.8814, "step": 133830 }, { "epoch": 0.9688230652855292, "grad_norm": 0.14922378957271576, "learning_rate": 4.031184173380529e-06, "loss": 0.8717, "step": 133840 }, { "epoch": 0.9688954519461154, "grad_norm": 0.15362094342708588, "learning_rate": 4.031111786719944e-06, "loss": 0.8554, "step": 133850 }, { "epoch": 0.9689678386067015, "grad_norm": 0.16382701694965363, "learning_rate": 4.031039400059357e-06, "loss": 0.8724, "step": 133860 }, { "epoch": 0.9690402252672877, "grad_norm": 0.15398184955120087, "learning_rate": 4.030967013398771e-06, "loss": 0.8605, "step": 133870 }, { "epoch": 0.9691126119278739, "grad_norm": 0.16252100467681885, "learning_rate": 4.0308946267381846e-06, "loss": 0.8688, "step": 133880 }, { "epoch": 0.9691849985884601, "grad_norm": 0.15495038032531738, "learning_rate": 4.030822240077599e-06, "loss": 0.8642, "step": 133890 }, { "epoch": 0.9692573852490463, "grad_norm": 0.15822090208530426, "learning_rate": 4.030749853417013e-06, "loss": 0.8567, "step": 133900 }, { "epoch": 0.9693297719096325, "grad_norm": 0.1519637256860733, "learning_rate": 4.030677466756426e-06, "loss": 0.8769, "step": 133910 }, { "epoch": 0.9694021585702187, "grad_norm": 0.1706445962190628, "learning_rate": 4.03060508009584e-06, "loss": 0.861, "step": 133920 }, { "epoch": 0.9694745452308049, "grad_norm": 0.15888677537441254, "learning_rate": 4.030532693435254e-06, "loss": 0.8584, "step": 133930 }, { "epoch": 0.9695469318913911, "grad_norm": 0.18062569200992584, "learning_rate": 4.030460306774668e-06, "loss": 0.8679, "step": 133940 }, { "epoch": 0.9696193185519772, "grad_norm": 0.1512601226568222, "learning_rate": 4.0303879201140816e-06, "loss": 0.8515, "step": 133950 }, { "epoch": 0.9696917052125634, "grad_norm": 0.1415674239397049, "learning_rate": 4.030315533453495e-06, "loss": 0.8646, "step": 133960 }, { "epoch": 0.9697640918731496, "grad_norm": 0.1439589112997055, "learning_rate": 4.03024314679291e-06, "loss": 0.8688, "step": 133970 }, { "epoch": 0.9698364785337358, "grad_norm": 0.17175428569316864, "learning_rate": 4.030170760132323e-06, "loss": 0.8702, "step": 133980 }, { "epoch": 0.969908865194322, "grad_norm": 0.15791040658950806, "learning_rate": 4.030098373471737e-06, "loss": 0.8602, "step": 133990 }, { "epoch": 0.9699812518549081, "grad_norm": 0.163300022482872, "learning_rate": 4.0300259868111505e-06, "loss": 0.8759, "step": 134000 }, { "epoch": 0.9700536385154943, "grad_norm": 0.15517699718475342, "learning_rate": 4.029953600150565e-06, "loss": 0.8717, "step": 134010 }, { "epoch": 0.9701260251760806, "grad_norm": 0.15258009731769562, "learning_rate": 4.029881213489979e-06, "loss": 0.8838, "step": 134020 }, { "epoch": 0.9701984118366668, "grad_norm": 0.16164755821228027, "learning_rate": 4.029808826829392e-06, "loss": 0.8635, "step": 134030 }, { "epoch": 0.970270798497253, "grad_norm": 0.1693074256181717, "learning_rate": 4.029736440168806e-06, "loss": 0.8551, "step": 134040 }, { "epoch": 0.9703431851578391, "grad_norm": 0.15383243560791016, "learning_rate": 4.02966405350822e-06, "loss": 0.87, "step": 134050 }, { "epoch": 0.9704155718184253, "grad_norm": 0.1397397667169571, "learning_rate": 4.029591666847634e-06, "loss": 0.8602, "step": 134060 }, { "epoch": 0.9704879584790115, "grad_norm": 0.15033358335494995, "learning_rate": 4.0295192801870475e-06, "loss": 0.858, "step": 134070 }, { "epoch": 0.9705603451395977, "grad_norm": 0.1515614092350006, "learning_rate": 4.029446893526461e-06, "loss": 0.8869, "step": 134080 }, { "epoch": 0.9706327318001838, "grad_norm": 0.1568913608789444, "learning_rate": 4.029374506865876e-06, "loss": 0.8814, "step": 134090 }, { "epoch": 0.97070511846077, "grad_norm": 0.1536082923412323, "learning_rate": 4.029302120205289e-06, "loss": 0.8603, "step": 134100 }, { "epoch": 0.9707775051213562, "grad_norm": 0.16278186440467834, "learning_rate": 4.029229733544703e-06, "loss": 0.8659, "step": 134110 }, { "epoch": 0.9708498917819425, "grad_norm": 0.15788668394088745, "learning_rate": 4.0291573468841164e-06, "loss": 0.8711, "step": 134120 }, { "epoch": 0.9709222784425287, "grad_norm": 0.14995813369750977, "learning_rate": 4.02908496022353e-06, "loss": 0.8567, "step": 134130 }, { "epoch": 0.9709946651031148, "grad_norm": 0.1554499715566635, "learning_rate": 4.0290125735629445e-06, "loss": 0.8612, "step": 134140 }, { "epoch": 0.971067051763701, "grad_norm": 0.15231886506080627, "learning_rate": 4.028940186902358e-06, "loss": 0.8668, "step": 134150 }, { "epoch": 0.9711394384242872, "grad_norm": 0.16602960228919983, "learning_rate": 4.028867800241772e-06, "loss": 0.8603, "step": 134160 }, { "epoch": 0.9712118250848734, "grad_norm": 0.15559101104736328, "learning_rate": 4.028795413581185e-06, "loss": 0.8724, "step": 134170 }, { "epoch": 0.9712842117454595, "grad_norm": 0.15899375081062317, "learning_rate": 4.0287230269206e-06, "loss": 0.8588, "step": 134180 }, { "epoch": 0.9713565984060457, "grad_norm": 0.1541290581226349, "learning_rate": 4.0286506402600134e-06, "loss": 0.8554, "step": 134190 }, { "epoch": 0.9714289850666319, "grad_norm": 0.16547095775604248, "learning_rate": 4.028578253599427e-06, "loss": 0.8729, "step": 134200 }, { "epoch": 0.9715013717272181, "grad_norm": 0.14189139008522034, "learning_rate": 4.028505866938841e-06, "loss": 0.8666, "step": 134210 }, { "epoch": 0.9715737583878042, "grad_norm": 0.16001036763191223, "learning_rate": 4.028433480278255e-06, "loss": 0.87, "step": 134220 }, { "epoch": 0.9716461450483905, "grad_norm": 0.163381889462471, "learning_rate": 4.028361093617669e-06, "loss": 0.8479, "step": 134230 }, { "epoch": 0.9717185317089767, "grad_norm": 0.15338028967380524, "learning_rate": 4.028288706957082e-06, "loss": 0.8646, "step": 134240 }, { "epoch": 0.9717909183695629, "grad_norm": 0.152128666639328, "learning_rate": 4.028216320296496e-06, "loss": 0.8672, "step": 134250 }, { "epoch": 0.9718633050301491, "grad_norm": 0.1537809669971466, "learning_rate": 4.0281439336359105e-06, "loss": 0.8555, "step": 134260 }, { "epoch": 0.9719356916907352, "grad_norm": 0.14656396210193634, "learning_rate": 4.028071546975324e-06, "loss": 0.8636, "step": 134270 }, { "epoch": 0.9720080783513214, "grad_norm": 0.15450994670391083, "learning_rate": 4.027999160314737e-06, "loss": 0.8658, "step": 134280 }, { "epoch": 0.9720804650119076, "grad_norm": 0.15121851861476898, "learning_rate": 4.027926773654151e-06, "loss": 0.8681, "step": 134290 }, { "epoch": 0.9721528516724938, "grad_norm": 0.1601051241159439, "learning_rate": 4.027854386993565e-06, "loss": 0.864, "step": 134300 }, { "epoch": 0.9722252383330799, "grad_norm": 0.14622844755649567, "learning_rate": 4.0277820003329785e-06, "loss": 0.8713, "step": 134310 }, { "epoch": 0.9722976249936661, "grad_norm": 0.14876750111579895, "learning_rate": 4.027709613672392e-06, "loss": 0.8719, "step": 134320 }, { "epoch": 0.9723700116542524, "grad_norm": 0.17520910501480103, "learning_rate": 4.027637227011807e-06, "loss": 0.8661, "step": 134330 }, { "epoch": 0.9724423983148386, "grad_norm": 0.1505105048418045, "learning_rate": 4.02756484035122e-06, "loss": 0.8659, "step": 134340 }, { "epoch": 0.9725147849754248, "grad_norm": 0.18113350868225098, "learning_rate": 4.027492453690634e-06, "loss": 0.8705, "step": 134350 }, { "epoch": 0.9725871716360109, "grad_norm": 0.14961007237434387, "learning_rate": 4.0274200670300475e-06, "loss": 0.8694, "step": 134360 }, { "epoch": 0.9726595582965971, "grad_norm": 0.14745749533176422, "learning_rate": 4.027347680369462e-06, "loss": 0.8727, "step": 134370 }, { "epoch": 0.9727319449571833, "grad_norm": 0.14691106975078583, "learning_rate": 4.0272752937088755e-06, "loss": 0.8673, "step": 134380 }, { "epoch": 0.9728043316177695, "grad_norm": 0.145847350358963, "learning_rate": 4.027202907048289e-06, "loss": 0.8603, "step": 134390 }, { "epoch": 0.9728767182783556, "grad_norm": 0.16000697016716003, "learning_rate": 4.027130520387703e-06, "loss": 0.8592, "step": 134400 }, { "epoch": 0.9729491049389418, "grad_norm": 0.15306724607944489, "learning_rate": 4.027058133727117e-06, "loss": 0.8608, "step": 134410 }, { "epoch": 0.973021491599528, "grad_norm": 0.1496717482805252, "learning_rate": 4.026985747066531e-06, "loss": 0.8751, "step": 134420 }, { "epoch": 0.9730938782601142, "grad_norm": 0.1527402400970459, "learning_rate": 4.0269133604059445e-06, "loss": 0.8678, "step": 134430 }, { "epoch": 0.9731662649207005, "grad_norm": 0.1510053426027298, "learning_rate": 4.026840973745358e-06, "loss": 0.8683, "step": 134440 }, { "epoch": 0.9732386515812866, "grad_norm": 0.157288059592247, "learning_rate": 4.0267685870847725e-06, "loss": 0.8571, "step": 134450 }, { "epoch": 0.9733110382418728, "grad_norm": 0.1452171355485916, "learning_rate": 4.026696200424186e-06, "loss": 0.8649, "step": 134460 }, { "epoch": 0.973383424902459, "grad_norm": 0.15778832137584686, "learning_rate": 4.0266238137636e-06, "loss": 0.8636, "step": 134470 }, { "epoch": 0.9734558115630452, "grad_norm": 0.14281177520751953, "learning_rate": 4.026551427103013e-06, "loss": 0.8588, "step": 134480 }, { "epoch": 0.9735281982236313, "grad_norm": 0.14823725819587708, "learning_rate": 4.026479040442428e-06, "loss": 0.8644, "step": 134490 }, { "epoch": 0.9736005848842175, "grad_norm": 0.14900419116020203, "learning_rate": 4.0264066537818415e-06, "loss": 0.8796, "step": 134500 }, { "epoch": 0.9736729715448037, "grad_norm": 0.1535101979970932, "learning_rate": 4.026334267121255e-06, "loss": 0.8621, "step": 134510 }, { "epoch": 0.9737453582053899, "grad_norm": 0.1498989462852478, "learning_rate": 4.026261880460669e-06, "loss": 0.8546, "step": 134520 }, { "epoch": 0.973817744865976, "grad_norm": 0.16558587551116943, "learning_rate": 4.026189493800083e-06, "loss": 0.8622, "step": 134530 }, { "epoch": 0.9738901315265622, "grad_norm": 0.15267030894756317, "learning_rate": 4.026117107139497e-06, "loss": 0.8666, "step": 134540 }, { "epoch": 0.9739625181871485, "grad_norm": 0.15370555222034454, "learning_rate": 4.02604472047891e-06, "loss": 0.8474, "step": 134550 }, { "epoch": 0.9740349048477347, "grad_norm": 0.1497911959886551, "learning_rate": 4.025972333818324e-06, "loss": 0.8697, "step": 134560 }, { "epoch": 0.9741072915083209, "grad_norm": 0.15965795516967773, "learning_rate": 4.0258999471577385e-06, "loss": 0.869, "step": 134570 }, { "epoch": 0.974179678168907, "grad_norm": 0.14840437471866608, "learning_rate": 4.025827560497152e-06, "loss": 0.8601, "step": 134580 }, { "epoch": 0.9742520648294932, "grad_norm": 0.15610599517822266, "learning_rate": 4.025755173836566e-06, "loss": 0.8606, "step": 134590 }, { "epoch": 0.9743244514900794, "grad_norm": 0.14686964452266693, "learning_rate": 4.025682787175979e-06, "loss": 0.8566, "step": 134600 }, { "epoch": 0.9743968381506656, "grad_norm": 0.16029107570648193, "learning_rate": 4.025610400515394e-06, "loss": 0.8528, "step": 134610 }, { "epoch": 0.9744692248112518, "grad_norm": 0.15175573527812958, "learning_rate": 4.025538013854807e-06, "loss": 0.8688, "step": 134620 }, { "epoch": 0.9745416114718379, "grad_norm": 0.16867345571517944, "learning_rate": 4.025465627194221e-06, "loss": 0.8564, "step": 134630 }, { "epoch": 0.9746139981324241, "grad_norm": 0.1488535851240158, "learning_rate": 4.025393240533635e-06, "loss": 0.8589, "step": 134640 }, { "epoch": 0.9746863847930104, "grad_norm": 0.147738978266716, "learning_rate": 4.025320853873049e-06, "loss": 0.8754, "step": 134650 }, { "epoch": 0.9747587714535966, "grad_norm": 0.14682996273040771, "learning_rate": 4.025248467212463e-06, "loss": 0.8585, "step": 134660 }, { "epoch": 0.9748311581141827, "grad_norm": 0.17877760529518127, "learning_rate": 4.025176080551876e-06, "loss": 0.8617, "step": 134670 }, { "epoch": 0.9749035447747689, "grad_norm": 0.3457520008087158, "learning_rate": 4.02510369389129e-06, "loss": 0.8692, "step": 134680 }, { "epoch": 0.9749759314353551, "grad_norm": 0.1764483004808426, "learning_rate": 4.025031307230704e-06, "loss": 0.8658, "step": 134690 }, { "epoch": 0.9750483180959413, "grad_norm": 0.17922911047935486, "learning_rate": 4.024958920570118e-06, "loss": 0.8704, "step": 134700 }, { "epoch": 0.9751207047565275, "grad_norm": 0.1529940515756607, "learning_rate": 4.024886533909532e-06, "loss": 0.8758, "step": 134710 }, { "epoch": 0.9751930914171136, "grad_norm": 0.1562705934047699, "learning_rate": 4.024814147248945e-06, "loss": 0.8639, "step": 134720 }, { "epoch": 0.9752654780776998, "grad_norm": 0.15895576775074005, "learning_rate": 4.024741760588359e-06, "loss": 0.8625, "step": 134730 }, { "epoch": 0.975337864738286, "grad_norm": 0.14802898466587067, "learning_rate": 4.024669373927773e-06, "loss": 0.8735, "step": 134740 }, { "epoch": 0.9754102513988722, "grad_norm": 0.17037954926490784, "learning_rate": 4.024596987267187e-06, "loss": 0.873, "step": 134750 }, { "epoch": 0.9754826380594585, "grad_norm": 0.16572614014148712, "learning_rate": 4.024524600606601e-06, "loss": 0.859, "step": 134760 }, { "epoch": 0.9755550247200446, "grad_norm": 0.15422946214675903, "learning_rate": 4.024452213946014e-06, "loss": 0.8608, "step": 134770 }, { "epoch": 0.9756274113806308, "grad_norm": 0.14978817105293274, "learning_rate": 4.024379827285429e-06, "loss": 0.8539, "step": 134780 }, { "epoch": 0.975699798041217, "grad_norm": 0.15020768344402313, "learning_rate": 4.024307440624842e-06, "loss": 0.8647, "step": 134790 }, { "epoch": 0.9757721847018032, "grad_norm": 0.1630668342113495, "learning_rate": 4.024235053964256e-06, "loss": 0.8748, "step": 134800 }, { "epoch": 0.9758445713623893, "grad_norm": 0.1647442728281021, "learning_rate": 4.0241626673036695e-06, "loss": 0.8916, "step": 134810 }, { "epoch": 0.9759169580229755, "grad_norm": 0.16220815479755402, "learning_rate": 4.024090280643083e-06, "loss": 0.8647, "step": 134820 }, { "epoch": 0.9759893446835617, "grad_norm": 0.15508753061294556, "learning_rate": 4.024017893982497e-06, "loss": 0.8776, "step": 134830 }, { "epoch": 0.9760617313441479, "grad_norm": 0.14630411565303802, "learning_rate": 4.023945507321911e-06, "loss": 0.862, "step": 134840 }, { "epoch": 0.976134118004734, "grad_norm": 0.16463924944400787, "learning_rate": 4.023873120661325e-06, "loss": 0.8738, "step": 134850 }, { "epoch": 0.9762065046653203, "grad_norm": 0.14505267143249512, "learning_rate": 4.0238007340007384e-06, "loss": 0.8578, "step": 134860 }, { "epoch": 0.9762788913259065, "grad_norm": 0.15662142634391785, "learning_rate": 4.023728347340152e-06, "loss": 0.8646, "step": 134870 }, { "epoch": 0.9763512779864927, "grad_norm": 0.3217943012714386, "learning_rate": 4.0236559606795665e-06, "loss": 0.87, "step": 134880 }, { "epoch": 0.9764236646470789, "grad_norm": 0.1467258632183075, "learning_rate": 4.02358357401898e-06, "loss": 0.8735, "step": 134890 }, { "epoch": 0.976496051307665, "grad_norm": 0.1595139056444168, "learning_rate": 4.023511187358394e-06, "loss": 0.8677, "step": 134900 }, { "epoch": 0.9765684379682512, "grad_norm": 0.16887742280960083, "learning_rate": 4.023438800697807e-06, "loss": 0.8625, "step": 134910 }, { "epoch": 0.9766408246288374, "grad_norm": 0.2070261687040329, "learning_rate": 4.023366414037221e-06, "loss": 0.8599, "step": 134920 }, { "epoch": 0.9767132112894236, "grad_norm": 0.1530458927154541, "learning_rate": 4.0232940273766354e-06, "loss": 0.859, "step": 134930 }, { "epoch": 0.9767855979500097, "grad_norm": 0.17413754761219025, "learning_rate": 4.023221640716049e-06, "loss": 0.8744, "step": 134940 }, { "epoch": 0.9768579846105959, "grad_norm": 0.14189285039901733, "learning_rate": 4.023149254055463e-06, "loss": 0.8678, "step": 134950 }, { "epoch": 0.9769303712711821, "grad_norm": 0.16996034979820251, "learning_rate": 4.023076867394876e-06, "loss": 0.8654, "step": 134960 }, { "epoch": 0.9770027579317684, "grad_norm": 0.15220043063163757, "learning_rate": 4.023004480734291e-06, "loss": 0.8705, "step": 134970 }, { "epoch": 0.9770751445923546, "grad_norm": 0.1522054523229599, "learning_rate": 4.022932094073704e-06, "loss": 0.8677, "step": 134980 }, { "epoch": 0.9771475312529407, "grad_norm": 0.14856263995170593, "learning_rate": 4.022859707413118e-06, "loss": 0.8604, "step": 134990 }, { "epoch": 0.9772199179135269, "grad_norm": 0.17470620572566986, "learning_rate": 4.022787320752532e-06, "loss": 0.8735, "step": 135000 }, { "epoch": 0.9772923045741131, "grad_norm": 0.16149313747882843, "learning_rate": 4.022714934091946e-06, "loss": 0.8694, "step": 135010 }, { "epoch": 0.9773646912346993, "grad_norm": 0.1480475217103958, "learning_rate": 4.02264254743136e-06, "loss": 0.8597, "step": 135020 }, { "epoch": 0.9774370778952854, "grad_norm": 0.16102159023284912, "learning_rate": 4.022570160770773e-06, "loss": 0.8735, "step": 135030 }, { "epoch": 0.9775094645558716, "grad_norm": 0.14369907975196838, "learning_rate": 4.022497774110187e-06, "loss": 0.8643, "step": 135040 }, { "epoch": 0.9775818512164578, "grad_norm": 0.1809437870979309, "learning_rate": 4.022425387449601e-06, "loss": 0.8584, "step": 135050 }, { "epoch": 0.977654237877044, "grad_norm": 0.15481232106685638, "learning_rate": 4.022353000789015e-06, "loss": 0.8688, "step": 135060 }, { "epoch": 0.9777266245376302, "grad_norm": 0.1535213589668274, "learning_rate": 4.022280614128429e-06, "loss": 0.8592, "step": 135070 }, { "epoch": 0.9777990111982164, "grad_norm": 0.14975209534168243, "learning_rate": 4.022208227467842e-06, "loss": 0.8657, "step": 135080 }, { "epoch": 0.9778713978588026, "grad_norm": 0.15290719270706177, "learning_rate": 4.022135840807257e-06, "loss": 0.8633, "step": 135090 }, { "epoch": 0.9779437845193888, "grad_norm": 0.15675273537635803, "learning_rate": 4.02206345414667e-06, "loss": 0.8642, "step": 135100 }, { "epoch": 0.978016171179975, "grad_norm": 0.17015263438224792, "learning_rate": 4.021991067486084e-06, "loss": 0.8693, "step": 135110 }, { "epoch": 0.9780885578405611, "grad_norm": 0.15289832651615143, "learning_rate": 4.0219186808254975e-06, "loss": 0.8696, "step": 135120 }, { "epoch": 0.9781609445011473, "grad_norm": 0.15227100253105164, "learning_rate": 4.021846294164912e-06, "loss": 0.8625, "step": 135130 }, { "epoch": 0.9782333311617335, "grad_norm": 0.19440871477127075, "learning_rate": 4.021773907504326e-06, "loss": 0.8697, "step": 135140 }, { "epoch": 0.9783057178223197, "grad_norm": 0.15097714960575104, "learning_rate": 4.021701520843739e-06, "loss": 0.854, "step": 135150 }, { "epoch": 0.9783781044829059, "grad_norm": 0.16234318912029266, "learning_rate": 4.021629134183153e-06, "loss": 0.8774, "step": 135160 }, { "epoch": 0.978450491143492, "grad_norm": 0.19647642970085144, "learning_rate": 4.021556747522567e-06, "loss": 0.8644, "step": 135170 }, { "epoch": 0.9785228778040783, "grad_norm": 0.14601005613803864, "learning_rate": 4.021484360861981e-06, "loss": 0.8577, "step": 135180 }, { "epoch": 0.9785952644646645, "grad_norm": 0.15095117688179016, "learning_rate": 4.0214119742013945e-06, "loss": 0.8604, "step": 135190 }, { "epoch": 0.9786676511252507, "grad_norm": 0.14409467577934265, "learning_rate": 4.021339587540808e-06, "loss": 0.8597, "step": 135200 }, { "epoch": 0.9787400377858368, "grad_norm": 0.14906567335128784, "learning_rate": 4.021267200880223e-06, "loss": 0.8643, "step": 135210 }, { "epoch": 0.978812424446423, "grad_norm": 0.15724803507328033, "learning_rate": 4.021194814219636e-06, "loss": 0.8458, "step": 135220 }, { "epoch": 0.9788848111070092, "grad_norm": 0.14980719983577728, "learning_rate": 4.02112242755905e-06, "loss": 0.8623, "step": 135230 }, { "epoch": 0.9789571977675954, "grad_norm": 0.14800138771533966, "learning_rate": 4.0210500408984635e-06, "loss": 0.8602, "step": 135240 }, { "epoch": 0.9790295844281816, "grad_norm": 0.24829064309597015, "learning_rate": 4.020977654237878e-06, "loss": 0.8596, "step": 135250 }, { "epoch": 0.9791019710887677, "grad_norm": 0.159278005361557, "learning_rate": 4.0209052675772916e-06, "loss": 0.8695, "step": 135260 }, { "epoch": 0.9791743577493539, "grad_norm": 0.16504423320293427, "learning_rate": 4.020832880916705e-06, "loss": 0.8636, "step": 135270 }, { "epoch": 0.9792467444099401, "grad_norm": 0.1613035798072815, "learning_rate": 4.020760494256119e-06, "loss": 0.8675, "step": 135280 }, { "epoch": 0.9793191310705264, "grad_norm": 0.16693058609962463, "learning_rate": 4.020688107595533e-06, "loss": 0.8688, "step": 135290 }, { "epoch": 0.9793915177311125, "grad_norm": 0.14621016383171082, "learning_rate": 4.020615720934947e-06, "loss": 0.8563, "step": 135300 }, { "epoch": 0.9794639043916987, "grad_norm": 0.15493078529834747, "learning_rate": 4.0205433342743605e-06, "loss": 0.865, "step": 135310 }, { "epoch": 0.9795362910522849, "grad_norm": 0.1841004192829132, "learning_rate": 4.020470947613774e-06, "loss": 0.8632, "step": 135320 }, { "epoch": 0.9796086777128711, "grad_norm": 0.1623116284608841, "learning_rate": 4.0203985609531886e-06, "loss": 0.864, "step": 135330 }, { "epoch": 0.9796810643734573, "grad_norm": 0.14910165965557098, "learning_rate": 4.020326174292601e-06, "loss": 0.8692, "step": 135340 }, { "epoch": 0.9797534510340434, "grad_norm": 0.17828305065631866, "learning_rate": 4.020253787632015e-06, "loss": 0.8739, "step": 135350 }, { "epoch": 0.9798258376946296, "grad_norm": 0.1424138993024826, "learning_rate": 4.020181400971429e-06, "loss": 0.8643, "step": 135360 }, { "epoch": 0.9798982243552158, "grad_norm": 0.16113391518592834, "learning_rate": 4.020109014310843e-06, "loss": 0.8732, "step": 135370 }, { "epoch": 0.979970611015802, "grad_norm": 0.17208503186702728, "learning_rate": 4.020036627650257e-06, "loss": 0.85, "step": 135380 }, { "epoch": 0.9800429976763883, "grad_norm": 0.17494194209575653, "learning_rate": 4.01996424098967e-06, "loss": 0.8593, "step": 135390 }, { "epoch": 0.9801153843369744, "grad_norm": 0.15494690835475922, "learning_rate": 4.019891854329085e-06, "loss": 0.8766, "step": 135400 }, { "epoch": 0.9801877709975606, "grad_norm": 0.15408842265605927, "learning_rate": 4.019819467668498e-06, "loss": 0.8711, "step": 135410 }, { "epoch": 0.9802601576581468, "grad_norm": 0.15154585242271423, "learning_rate": 4.019747081007912e-06, "loss": 0.8571, "step": 135420 }, { "epoch": 0.980332544318733, "grad_norm": 0.16975507140159607, "learning_rate": 4.0196746943473256e-06, "loss": 0.8818, "step": 135430 }, { "epoch": 0.9804049309793191, "grad_norm": 0.14920401573181152, "learning_rate": 4.01960230768674e-06, "loss": 0.869, "step": 135440 }, { "epoch": 0.9804773176399053, "grad_norm": 0.15514759719371796, "learning_rate": 4.019529921026154e-06, "loss": 0.8593, "step": 135450 }, { "epoch": 0.9805497043004915, "grad_norm": 0.16312751173973083, "learning_rate": 4.019457534365567e-06, "loss": 0.8585, "step": 135460 }, { "epoch": 0.9806220909610777, "grad_norm": 0.14903192222118378, "learning_rate": 4.019385147704981e-06, "loss": 0.8557, "step": 135470 }, { "epoch": 0.9806944776216638, "grad_norm": 0.15658678114414215, "learning_rate": 4.019312761044395e-06, "loss": 0.8683, "step": 135480 }, { "epoch": 0.98076686428225, "grad_norm": 0.15279506146907806, "learning_rate": 4.019240374383809e-06, "loss": 0.8585, "step": 135490 }, { "epoch": 0.9808392509428363, "grad_norm": 0.14755424857139587, "learning_rate": 4.019167987723223e-06, "loss": 0.8533, "step": 135500 }, { "epoch": 0.9809116376034225, "grad_norm": 0.14603859186172485, "learning_rate": 4.019095601062636e-06, "loss": 0.8648, "step": 135510 }, { "epoch": 0.9809840242640087, "grad_norm": 0.1589314043521881, "learning_rate": 4.01902321440205e-06, "loss": 0.8613, "step": 135520 }, { "epoch": 0.9810564109245948, "grad_norm": 0.1501326709985733, "learning_rate": 4.018950827741464e-06, "loss": 0.8706, "step": 135530 }, { "epoch": 0.981128797585181, "grad_norm": 0.15923535823822021, "learning_rate": 4.018878441080878e-06, "loss": 0.8739, "step": 135540 }, { "epoch": 0.9812011842457672, "grad_norm": 0.15568740665912628, "learning_rate": 4.0188060544202915e-06, "loss": 0.8517, "step": 135550 }, { "epoch": 0.9812735709063534, "grad_norm": 0.1511840522289276, "learning_rate": 4.018733667759705e-06, "loss": 0.8474, "step": 135560 }, { "epoch": 0.9813459575669395, "grad_norm": 0.155585378408432, "learning_rate": 4.01866128109912e-06, "loss": 0.8598, "step": 135570 }, { "epoch": 0.9814183442275257, "grad_norm": 0.15840795636177063, "learning_rate": 4.018588894438533e-06, "loss": 0.8683, "step": 135580 }, { "epoch": 0.9814907308881119, "grad_norm": 0.1394014209508896, "learning_rate": 4.018516507777947e-06, "loss": 0.8745, "step": 135590 }, { "epoch": 0.9815631175486981, "grad_norm": 0.15405994653701782, "learning_rate": 4.0184441211173604e-06, "loss": 0.876, "step": 135600 }, { "epoch": 0.9816355042092844, "grad_norm": 0.15901432931423187, "learning_rate": 4.018371734456775e-06, "loss": 0.8676, "step": 135610 }, { "epoch": 0.9817078908698705, "grad_norm": 0.150760680437088, "learning_rate": 4.0182993477961885e-06, "loss": 0.8553, "step": 135620 }, { "epoch": 0.9817802775304567, "grad_norm": 0.22623537480831146, "learning_rate": 4.018226961135602e-06, "loss": 0.861, "step": 135630 }, { "epoch": 0.9818526641910429, "grad_norm": 0.15437884628772736, "learning_rate": 4.018154574475016e-06, "loss": 0.8691, "step": 135640 }, { "epoch": 0.9819250508516291, "grad_norm": 0.15233850479125977, "learning_rate": 4.01808218781443e-06, "loss": 0.8692, "step": 135650 }, { "epoch": 0.9819974375122152, "grad_norm": 0.1725022792816162, "learning_rate": 4.018009801153844e-06, "loss": 0.8626, "step": 135660 }, { "epoch": 0.9820698241728014, "grad_norm": 0.5819795727729797, "learning_rate": 4.0179374144932574e-06, "loss": 0.8739, "step": 135670 }, { "epoch": 0.9821422108333876, "grad_norm": 0.20204482972621918, "learning_rate": 4.017865027832671e-06, "loss": 0.8681, "step": 135680 }, { "epoch": 0.9822145974939738, "grad_norm": 0.15785406529903412, "learning_rate": 4.0177926411720855e-06, "loss": 0.8545, "step": 135690 }, { "epoch": 0.98228698415456, "grad_norm": 0.17018094658851624, "learning_rate": 4.017720254511499e-06, "loss": 0.8569, "step": 135700 }, { "epoch": 0.9823593708151462, "grad_norm": 0.1424306482076645, "learning_rate": 4.017647867850913e-06, "loss": 0.8755, "step": 135710 }, { "epoch": 0.9824317574757324, "grad_norm": 0.146108016371727, "learning_rate": 4.017575481190326e-06, "loss": 0.8748, "step": 135720 }, { "epoch": 0.9825041441363186, "grad_norm": 0.16462048888206482, "learning_rate": 4.017503094529741e-06, "loss": 0.8623, "step": 135730 }, { "epoch": 0.9825765307969048, "grad_norm": 0.14882566034793854, "learning_rate": 4.0174307078691544e-06, "loss": 0.8583, "step": 135740 }, { "epoch": 0.982648917457491, "grad_norm": 0.15076982975006104, "learning_rate": 4.017358321208568e-06, "loss": 0.8668, "step": 135750 }, { "epoch": 0.9827213041180771, "grad_norm": 0.14368361234664917, "learning_rate": 4.017285934547982e-06, "loss": 0.8635, "step": 135760 }, { "epoch": 0.9827936907786633, "grad_norm": 0.16168615221977234, "learning_rate": 4.017213547887396e-06, "loss": 0.8649, "step": 135770 }, { "epoch": 0.9828660774392495, "grad_norm": 0.1403905153274536, "learning_rate": 4.01714116122681e-06, "loss": 0.8717, "step": 135780 }, { "epoch": 0.9829384640998357, "grad_norm": 0.14840847253799438, "learning_rate": 4.017068774566223e-06, "loss": 0.8564, "step": 135790 }, { "epoch": 0.9830108507604218, "grad_norm": 0.174706369638443, "learning_rate": 4.016996387905637e-06, "loss": 0.8632, "step": 135800 }, { "epoch": 0.983083237421008, "grad_norm": 0.16106009483337402, "learning_rate": 4.0169240012450515e-06, "loss": 0.8746, "step": 135810 }, { "epoch": 0.9831556240815943, "grad_norm": 0.15594612061977386, "learning_rate": 4.016851614584465e-06, "loss": 0.8661, "step": 135820 }, { "epoch": 0.9832280107421805, "grad_norm": 0.14947769045829773, "learning_rate": 4.016779227923879e-06, "loss": 0.8688, "step": 135830 }, { "epoch": 0.9833003974027666, "grad_norm": 0.15984922647476196, "learning_rate": 4.016706841263292e-06, "loss": 0.8712, "step": 135840 }, { "epoch": 0.9833727840633528, "grad_norm": 0.1563669741153717, "learning_rate": 4.016634454602707e-06, "loss": 0.8738, "step": 135850 }, { "epoch": 0.983445170723939, "grad_norm": 0.15901511907577515, "learning_rate": 4.01656206794212e-06, "loss": 0.8672, "step": 135860 }, { "epoch": 0.9835175573845252, "grad_norm": 0.15088501572608948, "learning_rate": 4.016489681281533e-06, "loss": 0.8752, "step": 135870 }, { "epoch": 0.9835899440451114, "grad_norm": 0.4412364065647125, "learning_rate": 4.016417294620948e-06, "loss": 0.8594, "step": 135880 }, { "epoch": 0.9836623307056975, "grad_norm": 0.14504173398017883, "learning_rate": 4.016344907960361e-06, "loss": 0.8658, "step": 135890 }, { "epoch": 0.9837347173662837, "grad_norm": 0.15376900136470795, "learning_rate": 4.016272521299775e-06, "loss": 0.8752, "step": 135900 }, { "epoch": 0.9838071040268699, "grad_norm": 0.42392078042030334, "learning_rate": 4.0162001346391885e-06, "loss": 0.8586, "step": 135910 }, { "epoch": 0.9838794906874562, "grad_norm": 0.17398519814014435, "learning_rate": 4.016127747978603e-06, "loss": 0.8659, "step": 135920 }, { "epoch": 0.9839518773480423, "grad_norm": 0.15040136873722076, "learning_rate": 4.0160553613180165e-06, "loss": 0.863, "step": 135930 }, { "epoch": 0.9840242640086285, "grad_norm": 0.15446005761623383, "learning_rate": 4.01598297465743e-06, "loss": 0.8706, "step": 135940 }, { "epoch": 0.9840966506692147, "grad_norm": 0.14991377294063568, "learning_rate": 4.015910587996844e-06, "loss": 0.8666, "step": 135950 }, { "epoch": 0.9841690373298009, "grad_norm": 0.15501756966114044, "learning_rate": 4.015838201336258e-06, "loss": 0.867, "step": 135960 }, { "epoch": 0.984241423990387, "grad_norm": 0.1403217762708664, "learning_rate": 4.015765814675672e-06, "loss": 0.8682, "step": 135970 }, { "epoch": 0.9843138106509732, "grad_norm": 0.1542348712682724, "learning_rate": 4.0156934280150855e-06, "loss": 0.8656, "step": 135980 }, { "epoch": 0.9843861973115594, "grad_norm": 0.16103000938892365, "learning_rate": 4.015621041354499e-06, "loss": 0.8642, "step": 135990 }, { "epoch": 0.9844585839721456, "grad_norm": 0.14834414422512054, "learning_rate": 4.0155486546939136e-06, "loss": 0.8572, "step": 136000 }, { "epoch": 0.9845309706327318, "grad_norm": 0.16515208780765533, "learning_rate": 4.015476268033327e-06, "loss": 0.8589, "step": 136010 }, { "epoch": 0.9846033572933179, "grad_norm": 0.16281743347644806, "learning_rate": 4.015403881372741e-06, "loss": 0.867, "step": 136020 }, { "epoch": 0.9846757439539042, "grad_norm": 0.14419202506542206, "learning_rate": 4.015331494712154e-06, "loss": 0.8527, "step": 136030 }, { "epoch": 0.9847481306144904, "grad_norm": 0.16212014853954315, "learning_rate": 4.015259108051569e-06, "loss": 0.8629, "step": 136040 }, { "epoch": 0.9848205172750766, "grad_norm": 0.16474084556102753, "learning_rate": 4.0151867213909825e-06, "loss": 0.8652, "step": 136050 }, { "epoch": 0.9848929039356628, "grad_norm": 0.19735056161880493, "learning_rate": 4.015114334730396e-06, "loss": 0.8733, "step": 136060 }, { "epoch": 0.9849652905962489, "grad_norm": 0.164767324924469, "learning_rate": 4.01504194806981e-06, "loss": 0.8641, "step": 136070 }, { "epoch": 0.9850376772568351, "grad_norm": 0.14573654532432556, "learning_rate": 4.014969561409224e-06, "loss": 0.8648, "step": 136080 }, { "epoch": 0.9851100639174213, "grad_norm": 0.15756194293498993, "learning_rate": 4.014897174748638e-06, "loss": 0.8658, "step": 136090 }, { "epoch": 0.9851824505780075, "grad_norm": 0.16832998394966125, "learning_rate": 4.014824788088051e-06, "loss": 0.8561, "step": 136100 }, { "epoch": 0.9852548372385936, "grad_norm": 0.1867925524711609, "learning_rate": 4.014752401427465e-06, "loss": 0.8723, "step": 136110 }, { "epoch": 0.9853272238991798, "grad_norm": 0.1441367268562317, "learning_rate": 4.0146800147668795e-06, "loss": 0.8627, "step": 136120 }, { "epoch": 0.985399610559766, "grad_norm": 0.1538180559873581, "learning_rate": 4.014607628106293e-06, "loss": 0.8647, "step": 136130 }, { "epoch": 0.9854719972203523, "grad_norm": 0.15871921181678772, "learning_rate": 4.014535241445707e-06, "loss": 0.8555, "step": 136140 }, { "epoch": 0.9855443838809385, "grad_norm": 0.15353409945964813, "learning_rate": 4.01446285478512e-06, "loss": 0.8575, "step": 136150 }, { "epoch": 0.9856167705415246, "grad_norm": 0.15181076526641846, "learning_rate": 4.014390468124534e-06, "loss": 0.8714, "step": 136160 }, { "epoch": 0.9856891572021108, "grad_norm": 0.15089160203933716, "learning_rate": 4.014318081463948e-06, "loss": 0.8763, "step": 136170 }, { "epoch": 0.985761543862697, "grad_norm": 0.15391786396503448, "learning_rate": 4.014245694803362e-06, "loss": 0.8725, "step": 136180 }, { "epoch": 0.9858339305232832, "grad_norm": 0.14188186824321747, "learning_rate": 4.014173308142776e-06, "loss": 0.8615, "step": 136190 }, { "epoch": 0.9859063171838693, "grad_norm": 0.16253648698329926, "learning_rate": 4.014100921482189e-06, "loss": 0.8683, "step": 136200 }, { "epoch": 0.9859787038444555, "grad_norm": 0.154387429356575, "learning_rate": 4.014028534821604e-06, "loss": 0.859, "step": 136210 }, { "epoch": 0.9860510905050417, "grad_norm": 0.15761414170265198, "learning_rate": 4.013956148161017e-06, "loss": 0.8724, "step": 136220 }, { "epoch": 0.9861234771656279, "grad_norm": 0.14925530552864075, "learning_rate": 4.013883761500431e-06, "loss": 0.8658, "step": 136230 }, { "epoch": 0.9861958638262142, "grad_norm": 0.15609656274318695, "learning_rate": 4.013811374839845e-06, "loss": 0.8684, "step": 136240 }, { "epoch": 0.9862682504868003, "grad_norm": 0.1742173135280609, "learning_rate": 4.013738988179259e-06, "loss": 0.8502, "step": 136250 }, { "epoch": 0.9863406371473865, "grad_norm": 0.15762127935886383, "learning_rate": 4.013666601518673e-06, "loss": 0.8603, "step": 136260 }, { "epoch": 0.9864130238079727, "grad_norm": 0.178837388753891, "learning_rate": 4.013594214858086e-06, "loss": 0.8645, "step": 136270 }, { "epoch": 0.9864854104685589, "grad_norm": 0.16362306475639343, "learning_rate": 4.0135218281975e-06, "loss": 0.8626, "step": 136280 }, { "epoch": 0.986557797129145, "grad_norm": 0.14631548523902893, "learning_rate": 4.013449441536914e-06, "loss": 0.8687, "step": 136290 }, { "epoch": 0.9866301837897312, "grad_norm": 0.14602495729923248, "learning_rate": 4.013377054876328e-06, "loss": 0.8654, "step": 136300 }, { "epoch": 0.9867025704503174, "grad_norm": 0.15310421586036682, "learning_rate": 4.013304668215742e-06, "loss": 0.8742, "step": 136310 }, { "epoch": 0.9867749571109036, "grad_norm": 0.14649897813796997, "learning_rate": 4.013232281555155e-06, "loss": 0.8571, "step": 136320 }, { "epoch": 0.9868473437714897, "grad_norm": 0.15480268001556396, "learning_rate": 4.01315989489457e-06, "loss": 0.8737, "step": 136330 }, { "epoch": 0.9869197304320759, "grad_norm": 0.161812886595726, "learning_rate": 4.013087508233983e-06, "loss": 0.8601, "step": 136340 }, { "epoch": 0.9869921170926622, "grad_norm": 0.17308774590492249, "learning_rate": 4.013015121573397e-06, "loss": 0.8818, "step": 136350 }, { "epoch": 0.9870645037532484, "grad_norm": 0.14882414042949677, "learning_rate": 4.0129427349128105e-06, "loss": 0.8744, "step": 136360 }, { "epoch": 0.9871368904138346, "grad_norm": 0.14824624359607697, "learning_rate": 4.012870348252225e-06, "loss": 0.8686, "step": 136370 }, { "epoch": 0.9872092770744207, "grad_norm": 0.16246159374713898, "learning_rate": 4.012797961591639e-06, "loss": 0.8634, "step": 136380 }, { "epoch": 0.9872816637350069, "grad_norm": 0.17609475553035736, "learning_rate": 4.012725574931052e-06, "loss": 0.8657, "step": 136390 }, { "epoch": 0.9873540503955931, "grad_norm": 0.14540891349315643, "learning_rate": 4.012653188270466e-06, "loss": 0.8443, "step": 136400 }, { "epoch": 0.9874264370561793, "grad_norm": 0.15471509099006653, "learning_rate": 4.0125808016098794e-06, "loss": 0.8676, "step": 136410 }, { "epoch": 0.9874988237167655, "grad_norm": 0.15584422647953033, "learning_rate": 4.012508414949293e-06, "loss": 0.8639, "step": 136420 }, { "epoch": 0.9875712103773516, "grad_norm": 0.1576376110315323, "learning_rate": 4.012436028288707e-06, "loss": 0.8676, "step": 136430 }, { "epoch": 0.9876435970379378, "grad_norm": 0.19695936143398285, "learning_rate": 4.012363641628121e-06, "loss": 0.8697, "step": 136440 }, { "epoch": 0.9877159836985241, "grad_norm": 0.1736014187335968, "learning_rate": 4.012291254967535e-06, "loss": 0.8727, "step": 136450 }, { "epoch": 0.9877883703591103, "grad_norm": 0.14788353443145752, "learning_rate": 4.012218868306948e-06, "loss": 0.8549, "step": 136460 }, { "epoch": 0.9878607570196964, "grad_norm": 0.16188114881515503, "learning_rate": 4.012146481646362e-06, "loss": 0.8411, "step": 136470 }, { "epoch": 0.9879331436802826, "grad_norm": 0.14878244698047638, "learning_rate": 4.0120740949857764e-06, "loss": 0.8626, "step": 136480 }, { "epoch": 0.9880055303408688, "grad_norm": 0.16054673492908478, "learning_rate": 4.01200170832519e-06, "loss": 0.8722, "step": 136490 }, { "epoch": 0.988077917001455, "grad_norm": 0.18446345627307892, "learning_rate": 4.011929321664604e-06, "loss": 0.8663, "step": 136500 }, { "epoch": 0.9881503036620412, "grad_norm": 0.15131662786006927, "learning_rate": 4.011856935004017e-06, "loss": 0.8707, "step": 136510 }, { "epoch": 0.9882226903226273, "grad_norm": 0.15739837288856506, "learning_rate": 4.011784548343432e-06, "loss": 0.8606, "step": 136520 }, { "epoch": 0.9882950769832135, "grad_norm": 0.17093366384506226, "learning_rate": 4.011712161682845e-06, "loss": 0.8555, "step": 136530 }, { "epoch": 0.9883674636437997, "grad_norm": 0.1519378423690796, "learning_rate": 4.011639775022259e-06, "loss": 0.8563, "step": 136540 }, { "epoch": 0.9884398503043859, "grad_norm": 0.13930349051952362, "learning_rate": 4.011567388361673e-06, "loss": 0.8667, "step": 136550 }, { "epoch": 0.9885122369649721, "grad_norm": 0.1486721783876419, "learning_rate": 4.011495001701087e-06, "loss": 0.8662, "step": 136560 }, { "epoch": 0.9885846236255583, "grad_norm": 0.14712847769260406, "learning_rate": 4.011422615040501e-06, "loss": 0.8593, "step": 136570 }, { "epoch": 0.9886570102861445, "grad_norm": 0.15622474253177643, "learning_rate": 4.011350228379914e-06, "loss": 0.8692, "step": 136580 }, { "epoch": 0.9887293969467307, "grad_norm": 0.15203312039375305, "learning_rate": 4.011277841719328e-06, "loss": 0.8552, "step": 136590 }, { "epoch": 0.9888017836073169, "grad_norm": 0.15190285444259644, "learning_rate": 4.011205455058742e-06, "loss": 0.8699, "step": 136600 }, { "epoch": 0.988874170267903, "grad_norm": 0.14632107317447662, "learning_rate": 4.011133068398156e-06, "loss": 0.8657, "step": 136610 }, { "epoch": 0.9889465569284892, "grad_norm": 0.16472133994102478, "learning_rate": 4.01106068173757e-06, "loss": 0.8669, "step": 136620 }, { "epoch": 0.9890189435890754, "grad_norm": 0.17338450253009796, "learning_rate": 4.010988295076983e-06, "loss": 0.8675, "step": 136630 }, { "epoch": 0.9890913302496616, "grad_norm": 0.1531408429145813, "learning_rate": 4.010915908416398e-06, "loss": 0.8706, "step": 136640 }, { "epoch": 0.9891637169102477, "grad_norm": 0.2573656141757965, "learning_rate": 4.010843521755811e-06, "loss": 0.8592, "step": 136650 }, { "epoch": 0.9892361035708339, "grad_norm": 0.17421439290046692, "learning_rate": 4.010771135095225e-06, "loss": 0.8501, "step": 136660 }, { "epoch": 0.9893084902314202, "grad_norm": 0.1428343653678894, "learning_rate": 4.0106987484346385e-06, "loss": 0.8451, "step": 136670 }, { "epoch": 0.9893808768920064, "grad_norm": 0.1828918159008026, "learning_rate": 4.010626361774053e-06, "loss": 0.8663, "step": 136680 }, { "epoch": 0.9894532635525926, "grad_norm": 0.14895032346248627, "learning_rate": 4.010553975113467e-06, "loss": 0.8611, "step": 136690 }, { "epoch": 0.9895256502131787, "grad_norm": 0.17357683181762695, "learning_rate": 4.01048158845288e-06, "loss": 0.8655, "step": 136700 }, { "epoch": 0.9895980368737649, "grad_norm": 0.1530769318342209, "learning_rate": 4.010409201792294e-06, "loss": 0.852, "step": 136710 }, { "epoch": 0.9896704235343511, "grad_norm": 0.14855614304542542, "learning_rate": 4.010336815131708e-06, "loss": 0.8639, "step": 136720 }, { "epoch": 0.9897428101949373, "grad_norm": 0.15136641263961792, "learning_rate": 4.010264428471122e-06, "loss": 0.8603, "step": 136730 }, { "epoch": 0.9898151968555234, "grad_norm": 0.15471212565898895, "learning_rate": 4.0101920418105356e-06, "loss": 0.8762, "step": 136740 }, { "epoch": 0.9898875835161096, "grad_norm": 0.15268218517303467, "learning_rate": 4.010119655149949e-06, "loss": 0.8656, "step": 136750 }, { "epoch": 0.9899599701766958, "grad_norm": 0.15985944867134094, "learning_rate": 4.010047268489364e-06, "loss": 0.8624, "step": 136760 }, { "epoch": 0.9900323568372821, "grad_norm": 0.1638825535774231, "learning_rate": 4.009974881828777e-06, "loss": 0.8669, "step": 136770 }, { "epoch": 0.9901047434978683, "grad_norm": 0.15115493535995483, "learning_rate": 4.009902495168191e-06, "loss": 0.8826, "step": 136780 }, { "epoch": 0.9901771301584544, "grad_norm": 0.14795368909835815, "learning_rate": 4.0098301085076045e-06, "loss": 0.8596, "step": 136790 }, { "epoch": 0.9902495168190406, "grad_norm": 0.17082315683364868, "learning_rate": 4.009757721847018e-06, "loss": 0.8785, "step": 136800 }, { "epoch": 0.9903219034796268, "grad_norm": 0.1484622359275818, "learning_rate": 4.0096853351864326e-06, "loss": 0.8675, "step": 136810 }, { "epoch": 0.990394290140213, "grad_norm": 0.16692779958248138, "learning_rate": 4.009612948525846e-06, "loss": 0.8716, "step": 136820 }, { "epoch": 0.9904666768007991, "grad_norm": 0.16581696271896362, "learning_rate": 4.00954056186526e-06, "loss": 0.8664, "step": 136830 }, { "epoch": 0.9905390634613853, "grad_norm": 0.15264694392681122, "learning_rate": 4.009468175204673e-06, "loss": 0.8654, "step": 136840 }, { "epoch": 0.9906114501219715, "grad_norm": 0.17103955149650574, "learning_rate": 4.009395788544088e-06, "loss": 0.8827, "step": 136850 }, { "epoch": 0.9906838367825577, "grad_norm": 0.15730604529380798, "learning_rate": 4.0093234018835015e-06, "loss": 0.8646, "step": 136860 }, { "epoch": 0.9907562234431438, "grad_norm": 0.1455976516008377, "learning_rate": 4.009251015222915e-06, "loss": 0.8596, "step": 136870 }, { "epoch": 0.9908286101037301, "grad_norm": 0.1492074877023697, "learning_rate": 4.009178628562329e-06, "loss": 0.8698, "step": 136880 }, { "epoch": 0.9909009967643163, "grad_norm": 0.14965695142745972, "learning_rate": 4.009106241901743e-06, "loss": 0.867, "step": 136890 }, { "epoch": 0.9909733834249025, "grad_norm": 0.19759173691272736, "learning_rate": 4.009033855241157e-06, "loss": 0.8611, "step": 136900 }, { "epoch": 0.9910457700854887, "grad_norm": 0.17178118228912354, "learning_rate": 4.00896146858057e-06, "loss": 0.8834, "step": 136910 }, { "epoch": 0.9911181567460748, "grad_norm": 0.1561564952135086, "learning_rate": 4.008889081919984e-06, "loss": 0.8574, "step": 136920 }, { "epoch": 0.991190543406661, "grad_norm": 0.16888576745986938, "learning_rate": 4.008816695259398e-06, "loss": 0.8651, "step": 136930 }, { "epoch": 0.9912629300672472, "grad_norm": 0.1438201516866684, "learning_rate": 4.008744308598811e-06, "loss": 0.8731, "step": 136940 }, { "epoch": 0.9913353167278334, "grad_norm": 0.20436105132102966, "learning_rate": 4.008671921938225e-06, "loss": 0.8619, "step": 136950 }, { "epoch": 0.9914077033884195, "grad_norm": 0.15767519176006317, "learning_rate": 4.008599535277639e-06, "loss": 0.8681, "step": 136960 }, { "epoch": 0.9914800900490057, "grad_norm": 0.14873795211315155, "learning_rate": 4.008527148617053e-06, "loss": 0.8852, "step": 136970 }, { "epoch": 0.9915524767095919, "grad_norm": 0.1454266458749771, "learning_rate": 4.0084547619564666e-06, "loss": 0.8646, "step": 136980 }, { "epoch": 0.9916248633701782, "grad_norm": 0.14362385869026184, "learning_rate": 4.00838237529588e-06, "loss": 0.8638, "step": 136990 }, { "epoch": 0.9916972500307644, "grad_norm": 0.2043345868587494, "learning_rate": 4.008309988635295e-06, "loss": 0.8635, "step": 137000 }, { "epoch": 0.9917696366913505, "grad_norm": 0.15229232609272003, "learning_rate": 4.008237601974708e-06, "loss": 0.8684, "step": 137010 }, { "epoch": 0.9918420233519367, "grad_norm": 0.17375898361206055, "learning_rate": 4.008165215314122e-06, "loss": 0.8571, "step": 137020 }, { "epoch": 0.9919144100125229, "grad_norm": 0.16473175585269928, "learning_rate": 4.0080928286535355e-06, "loss": 0.8716, "step": 137030 }, { "epoch": 0.9919867966731091, "grad_norm": 0.1511010229587555, "learning_rate": 4.00802044199295e-06, "loss": 0.8704, "step": 137040 }, { "epoch": 0.9920591833336952, "grad_norm": 0.1537337750196457, "learning_rate": 4.007948055332364e-06, "loss": 0.8637, "step": 137050 }, { "epoch": 0.9921315699942814, "grad_norm": 0.46637365221977234, "learning_rate": 4.007875668671777e-06, "loss": 0.8569, "step": 137060 }, { "epoch": 0.9922039566548676, "grad_norm": 0.1638781875371933, "learning_rate": 4.007803282011191e-06, "loss": 0.8521, "step": 137070 }, { "epoch": 0.9922763433154538, "grad_norm": 0.1593899428844452, "learning_rate": 4.007730895350605e-06, "loss": 0.8755, "step": 137080 }, { "epoch": 0.9923487299760401, "grad_norm": 0.15153998136520386, "learning_rate": 4.007658508690019e-06, "loss": 0.864, "step": 137090 }, { "epoch": 0.9924211166366262, "grad_norm": 0.15272100269794464, "learning_rate": 4.0075861220294325e-06, "loss": 0.8598, "step": 137100 }, { "epoch": 0.9924935032972124, "grad_norm": 0.1559947431087494, "learning_rate": 4.007513735368846e-06, "loss": 0.8733, "step": 137110 }, { "epoch": 0.9925658899577986, "grad_norm": 0.15066994726657867, "learning_rate": 4.007441348708261e-06, "loss": 0.8694, "step": 137120 }, { "epoch": 0.9926382766183848, "grad_norm": 0.14346741139888763, "learning_rate": 4.007368962047674e-06, "loss": 0.8621, "step": 137130 }, { "epoch": 0.992710663278971, "grad_norm": 0.14388960599899292, "learning_rate": 4.007296575387088e-06, "loss": 0.863, "step": 137140 }, { "epoch": 0.9927830499395571, "grad_norm": 0.14946885406970978, "learning_rate": 4.0072241887265014e-06, "loss": 0.8577, "step": 137150 }, { "epoch": 0.9928554366001433, "grad_norm": 0.15995468199253082, "learning_rate": 4.007151802065916e-06, "loss": 0.8613, "step": 137160 }, { "epoch": 0.9929278232607295, "grad_norm": 0.15818224847316742, "learning_rate": 4.0070794154053295e-06, "loss": 0.871, "step": 137170 }, { "epoch": 0.9930002099213157, "grad_norm": 0.15521134436130524, "learning_rate": 4.007007028744743e-06, "loss": 0.8568, "step": 137180 }, { "epoch": 0.9930725965819018, "grad_norm": 0.15648885071277618, "learning_rate": 4.006934642084157e-06, "loss": 0.8688, "step": 137190 }, { "epoch": 0.9931449832424881, "grad_norm": 0.15690802037715912, "learning_rate": 4.006862255423571e-06, "loss": 0.8696, "step": 137200 }, { "epoch": 0.9932173699030743, "grad_norm": 0.17651623487472534, "learning_rate": 4.006789868762985e-06, "loss": 0.858, "step": 137210 }, { "epoch": 0.9932897565636605, "grad_norm": 0.1453973650932312, "learning_rate": 4.0067174821023984e-06, "loss": 0.8623, "step": 137220 }, { "epoch": 0.9933621432242467, "grad_norm": 0.16270014643669128, "learning_rate": 4.006645095441812e-06, "loss": 0.8702, "step": 137230 }, { "epoch": 0.9934345298848328, "grad_norm": 0.2834886610507965, "learning_rate": 4.0065727087812265e-06, "loss": 0.8558, "step": 137240 }, { "epoch": 0.993506916545419, "grad_norm": 0.1739642173051834, "learning_rate": 4.00650032212064e-06, "loss": 0.8631, "step": 137250 }, { "epoch": 0.9935793032060052, "grad_norm": 0.16178597509860992, "learning_rate": 4.006427935460054e-06, "loss": 0.8616, "step": 137260 }, { "epoch": 0.9936516898665914, "grad_norm": 0.1678241342306137, "learning_rate": 4.006355548799467e-06, "loss": 0.861, "step": 137270 }, { "epoch": 0.9937240765271775, "grad_norm": 0.15866810083389282, "learning_rate": 4.006283162138882e-06, "loss": 0.8549, "step": 137280 }, { "epoch": 0.9937964631877637, "grad_norm": 0.14100779592990875, "learning_rate": 4.0062107754782955e-06, "loss": 0.8714, "step": 137290 }, { "epoch": 0.99386884984835, "grad_norm": 0.1390538066625595, "learning_rate": 4.006138388817709e-06, "loss": 0.8686, "step": 137300 }, { "epoch": 0.9939412365089362, "grad_norm": 0.1549810916185379, "learning_rate": 4.006066002157123e-06, "loss": 0.8599, "step": 137310 }, { "epoch": 0.9940136231695224, "grad_norm": 0.14956602454185486, "learning_rate": 4.005993615496537e-06, "loss": 0.8582, "step": 137320 }, { "epoch": 0.9940860098301085, "grad_norm": 0.15790285170078278, "learning_rate": 4.005921228835951e-06, "loss": 0.8617, "step": 137330 }, { "epoch": 0.9941583964906947, "grad_norm": 0.153904527425766, "learning_rate": 4.005848842175364e-06, "loss": 0.873, "step": 137340 }, { "epoch": 0.9942307831512809, "grad_norm": 0.15151570737361908, "learning_rate": 4.005776455514778e-06, "loss": 0.8688, "step": 137350 }, { "epoch": 0.9943031698118671, "grad_norm": 0.15569916367530823, "learning_rate": 4.0057040688541925e-06, "loss": 0.8518, "step": 137360 }, { "epoch": 0.9943755564724532, "grad_norm": 0.14543747901916504, "learning_rate": 4.005631682193606e-06, "loss": 0.8739, "step": 137370 }, { "epoch": 0.9944479431330394, "grad_norm": 0.16408121585845947, "learning_rate": 4.00555929553302e-06, "loss": 0.868, "step": 137380 }, { "epoch": 0.9945203297936256, "grad_norm": 0.1823720484972, "learning_rate": 4.005486908872433e-06, "loss": 0.8547, "step": 137390 }, { "epoch": 0.9945927164542118, "grad_norm": 0.15718266367912292, "learning_rate": 4.005414522211847e-06, "loss": 0.8677, "step": 137400 }, { "epoch": 0.994665103114798, "grad_norm": 0.14774106442928314, "learning_rate": 4.005342135551261e-06, "loss": 0.8584, "step": 137410 }, { "epoch": 0.9947374897753842, "grad_norm": 0.14286896586418152, "learning_rate": 4.005269748890675e-06, "loss": 0.8526, "step": 137420 }, { "epoch": 0.9948098764359704, "grad_norm": 0.17574062943458557, "learning_rate": 4.005197362230089e-06, "loss": 0.8604, "step": 137430 }, { "epoch": 0.9948822630965566, "grad_norm": 0.15135468542575836, "learning_rate": 4.005124975569502e-06, "loss": 0.8667, "step": 137440 }, { "epoch": 0.9949546497571428, "grad_norm": 0.16362632811069489, "learning_rate": 4.005052588908917e-06, "loss": 0.8574, "step": 137450 }, { "epoch": 0.9950270364177289, "grad_norm": 0.19329750537872314, "learning_rate": 4.0049802022483295e-06, "loss": 0.8783, "step": 137460 }, { "epoch": 0.9950994230783151, "grad_norm": 0.1522989422082901, "learning_rate": 4.004907815587744e-06, "loss": 0.8557, "step": 137470 }, { "epoch": 0.9951718097389013, "grad_norm": 0.17292699217796326, "learning_rate": 4.0048354289271576e-06, "loss": 0.8522, "step": 137480 }, { "epoch": 0.9952441963994875, "grad_norm": 0.1753862351179123, "learning_rate": 4.004763042266571e-06, "loss": 0.8537, "step": 137490 }, { "epoch": 0.9953165830600736, "grad_norm": 0.1441674530506134, "learning_rate": 4.004690655605985e-06, "loss": 0.8697, "step": 137500 }, { "epoch": 0.9953889697206598, "grad_norm": 0.17849737405776978, "learning_rate": 4.004618268945399e-06, "loss": 0.8556, "step": 137510 }, { "epoch": 0.9954613563812461, "grad_norm": 0.1520448625087738, "learning_rate": 4.004545882284813e-06, "loss": 0.8633, "step": 137520 }, { "epoch": 0.9955337430418323, "grad_norm": 0.1547725945711136, "learning_rate": 4.0044734956242265e-06, "loss": 0.86, "step": 137530 }, { "epoch": 0.9956061297024185, "grad_norm": 0.17297573387622833, "learning_rate": 4.00440110896364e-06, "loss": 0.8737, "step": 137540 }, { "epoch": 0.9956785163630046, "grad_norm": 0.15290264785289764, "learning_rate": 4.0043287223030546e-06, "loss": 0.8615, "step": 137550 }, { "epoch": 0.9957509030235908, "grad_norm": 0.15568116307258606, "learning_rate": 4.004256335642468e-06, "loss": 0.8603, "step": 137560 }, { "epoch": 0.995823289684177, "grad_norm": 0.1544651985168457, "learning_rate": 4.004183948981882e-06, "loss": 0.8653, "step": 137570 }, { "epoch": 0.9958956763447632, "grad_norm": 0.15817542374134064, "learning_rate": 4.004111562321295e-06, "loss": 0.8681, "step": 137580 }, { "epoch": 0.9959680630053493, "grad_norm": 0.15345756709575653, "learning_rate": 4.004039175660709e-06, "loss": 0.8569, "step": 137590 }, { "epoch": 0.9960404496659355, "grad_norm": 0.18895161151885986, "learning_rate": 4.0039667890001235e-06, "loss": 0.8733, "step": 137600 }, { "epoch": 0.9961128363265217, "grad_norm": 0.1601308286190033, "learning_rate": 4.003894402339537e-06, "loss": 0.8627, "step": 137610 }, { "epoch": 0.996185222987108, "grad_norm": 0.15279871225357056, "learning_rate": 4.003822015678951e-06, "loss": 0.8684, "step": 137620 }, { "epoch": 0.9962576096476942, "grad_norm": 0.15466183423995972, "learning_rate": 4.003749629018364e-06, "loss": 0.8547, "step": 137630 }, { "epoch": 0.9963299963082803, "grad_norm": 0.14806103706359863, "learning_rate": 4.003677242357779e-06, "loss": 0.8609, "step": 137640 }, { "epoch": 0.9964023829688665, "grad_norm": 0.14731131494045258, "learning_rate": 4.003604855697192e-06, "loss": 0.8463, "step": 137650 }, { "epoch": 0.9964747696294527, "grad_norm": 0.16127149760723114, "learning_rate": 4.003532469036606e-06, "loss": 0.8709, "step": 137660 }, { "epoch": 0.9965471562900389, "grad_norm": 0.1602654606103897, "learning_rate": 4.00346008237602e-06, "loss": 0.8626, "step": 137670 }, { "epoch": 0.996619542950625, "grad_norm": 0.18218478560447693, "learning_rate": 4.003387695715434e-06, "loss": 0.8643, "step": 137680 }, { "epoch": 0.9966919296112112, "grad_norm": 0.19056251645088196, "learning_rate": 4.003315309054848e-06, "loss": 0.8618, "step": 137690 }, { "epoch": 0.9967643162717974, "grad_norm": 0.15740570425987244, "learning_rate": 4.003242922394261e-06, "loss": 0.8707, "step": 137700 }, { "epoch": 0.9968367029323836, "grad_norm": 0.15710806846618652, "learning_rate": 4.003170535733675e-06, "loss": 0.8691, "step": 137710 }, { "epoch": 0.9969090895929698, "grad_norm": 0.16603271663188934, "learning_rate": 4.003098149073089e-06, "loss": 0.8569, "step": 137720 }, { "epoch": 0.996981476253556, "grad_norm": 0.14341507852077484, "learning_rate": 4.003025762412503e-06, "loss": 0.8617, "step": 137730 }, { "epoch": 0.9970538629141422, "grad_norm": 0.1590244024991989, "learning_rate": 4.002953375751917e-06, "loss": 0.8796, "step": 137740 }, { "epoch": 0.9971262495747284, "grad_norm": 0.5351874232292175, "learning_rate": 4.00288098909133e-06, "loss": 0.8664, "step": 137750 }, { "epoch": 0.9971986362353146, "grad_norm": 0.15063074231147766, "learning_rate": 4.002808602430745e-06, "loss": 0.862, "step": 137760 }, { "epoch": 0.9972710228959007, "grad_norm": 0.18847909569740295, "learning_rate": 4.002736215770158e-06, "loss": 0.8713, "step": 137770 }, { "epoch": 0.9973434095564869, "grad_norm": 0.1513339728116989, "learning_rate": 4.002663829109572e-06, "loss": 0.8714, "step": 137780 }, { "epoch": 0.9974157962170731, "grad_norm": 0.14384783804416656, "learning_rate": 4.002591442448986e-06, "loss": 0.8723, "step": 137790 }, { "epoch": 0.9974881828776593, "grad_norm": 0.1539144068956375, "learning_rate": 4.0025190557884e-06, "loss": 0.8618, "step": 137800 }, { "epoch": 0.9975605695382455, "grad_norm": 0.1452489197254181, "learning_rate": 4.002446669127814e-06, "loss": 0.8714, "step": 137810 }, { "epoch": 0.9976329561988316, "grad_norm": 0.15493297576904297, "learning_rate": 4.002374282467227e-06, "loss": 0.8562, "step": 137820 }, { "epoch": 0.9977053428594179, "grad_norm": 0.154521182179451, "learning_rate": 4.002301895806641e-06, "loss": 0.8663, "step": 137830 }, { "epoch": 0.9977777295200041, "grad_norm": 0.14628452062606812, "learning_rate": 4.002229509146055e-06, "loss": 0.8623, "step": 137840 }, { "epoch": 0.9978501161805903, "grad_norm": 0.14919553697109222, "learning_rate": 4.002157122485469e-06, "loss": 0.8685, "step": 137850 }, { "epoch": 0.9979225028411765, "grad_norm": 0.1762702614068985, "learning_rate": 4.002084735824883e-06, "loss": 0.8657, "step": 137860 }, { "epoch": 0.9979948895017626, "grad_norm": 0.151996448636055, "learning_rate": 4.002012349164296e-06, "loss": 0.8568, "step": 137870 }, { "epoch": 0.9980672761623488, "grad_norm": 0.15440037846565247, "learning_rate": 4.001939962503711e-06, "loss": 0.852, "step": 137880 }, { "epoch": 0.998139662822935, "grad_norm": 0.16397789120674133, "learning_rate": 4.001867575843124e-06, "loss": 0.854, "step": 137890 }, { "epoch": 0.9982120494835212, "grad_norm": 0.19279788434505463, "learning_rate": 4.001795189182538e-06, "loss": 0.8581, "step": 137900 }, { "epoch": 0.9982844361441073, "grad_norm": 0.15897057950496674, "learning_rate": 4.0017228025219515e-06, "loss": 0.8711, "step": 137910 }, { "epoch": 0.9983568228046935, "grad_norm": 0.1570604145526886, "learning_rate": 4.001650415861366e-06, "loss": 0.8645, "step": 137920 }, { "epoch": 0.9984292094652797, "grad_norm": 0.16585461795330048, "learning_rate": 4.00157802920078e-06, "loss": 0.8796, "step": 137930 }, { "epoch": 0.998501596125866, "grad_norm": 0.15717382729053497, "learning_rate": 4.001505642540193e-06, "loss": 0.8606, "step": 137940 }, { "epoch": 0.9985739827864522, "grad_norm": 0.16818828880786896, "learning_rate": 4.001433255879607e-06, "loss": 0.8777, "step": 137950 }, { "epoch": 0.9986463694470383, "grad_norm": 0.18249420821666718, "learning_rate": 4.001360869219021e-06, "loss": 0.866, "step": 137960 }, { "epoch": 0.9987187561076245, "grad_norm": 0.16219259798526764, "learning_rate": 4.001288482558435e-06, "loss": 0.8638, "step": 137970 }, { "epoch": 0.9987911427682107, "grad_norm": 0.15621773898601532, "learning_rate": 4.0012160958978485e-06, "loss": 0.8691, "step": 137980 }, { "epoch": 0.9988635294287969, "grad_norm": 0.1662006676197052, "learning_rate": 4.001143709237262e-06, "loss": 0.8813, "step": 137990 }, { "epoch": 0.998935916089383, "grad_norm": 0.13916003704071045, "learning_rate": 4.001071322576676e-06, "loss": 0.8568, "step": 138000 }, { "epoch": 0.9990083027499692, "grad_norm": 0.15064559876918793, "learning_rate": 4.000998935916089e-06, "loss": 0.8619, "step": 138010 }, { "epoch": 0.9990806894105554, "grad_norm": 0.16008462011814117, "learning_rate": 4.000926549255503e-06, "loss": 0.8738, "step": 138020 }, { "epoch": 0.9991530760711416, "grad_norm": 0.1454089879989624, "learning_rate": 4.0008541625949175e-06, "loss": 0.8681, "step": 138030 }, { "epoch": 0.9992254627317277, "grad_norm": 0.3634811341762543, "learning_rate": 4.000781775934331e-06, "loss": 0.8603, "step": 138040 }, { "epoch": 0.999297849392314, "grad_norm": 0.16307184100151062, "learning_rate": 4.000709389273745e-06, "loss": 0.8636, "step": 138050 }, { "epoch": 0.9993702360529002, "grad_norm": 0.15558481216430664, "learning_rate": 4.000637002613158e-06, "loss": 0.8703, "step": 138060 }, { "epoch": 0.9994426227134864, "grad_norm": 0.1434854418039322, "learning_rate": 4.000564615952573e-06, "loss": 0.8678, "step": 138070 }, { "epoch": 0.9995150093740726, "grad_norm": 0.13864096999168396, "learning_rate": 4.000492229291986e-06, "loss": 0.8635, "step": 138080 }, { "epoch": 0.9995873960346587, "grad_norm": 0.1576741486787796, "learning_rate": 4.0004198426314e-06, "loss": 0.8626, "step": 138090 }, { "epoch": 0.9996597826952449, "grad_norm": 0.14672447741031647, "learning_rate": 4.000347455970814e-06, "loss": 0.8483, "step": 138100 }, { "epoch": 0.9997321693558311, "grad_norm": 0.16229544579982758, "learning_rate": 4.000275069310228e-06, "loss": 0.8708, "step": 138110 }, { "epoch": 0.9998045560164173, "grad_norm": 0.14865851402282715, "learning_rate": 4.000202682649642e-06, "loss": 0.8589, "step": 138120 }, { "epoch": 0.9998769426770034, "grad_norm": 0.14516963064670563, "learning_rate": 4.000130295989055e-06, "loss": 0.8713, "step": 138130 }, { "epoch": 0.9999493293375896, "grad_norm": 0.14701320230960846, "learning_rate": 4.000057909328469e-06, "loss": 0.8514, "step": 138140 }, { "epoch": 1.000021715998176, "grad_norm": 0.15110774338245392, "learning_rate": 3.999985522667883e-06, "loss": 0.8665, "step": 138150 }, { "epoch": 1.000094102658762, "grad_norm": 0.14296278357505798, "learning_rate": 3.999913136007297e-06, "loss": 0.8619, "step": 138160 }, { "epoch": 1.0001664893193483, "grad_norm": 0.14643356204032898, "learning_rate": 3.999840749346711e-06, "loss": 0.8675, "step": 138170 }, { "epoch": 1.0002388759799343, "grad_norm": 0.16440226137638092, "learning_rate": 3.999768362686124e-06, "loss": 0.8456, "step": 138180 }, { "epoch": 1.0003112626405206, "grad_norm": 0.14877723157405853, "learning_rate": 3.999695976025538e-06, "loss": 0.8591, "step": 138190 }, { "epoch": 1.0003836493011067, "grad_norm": 0.150392085313797, "learning_rate": 3.999623589364952e-06, "loss": 0.8588, "step": 138200 }, { "epoch": 1.000456035961693, "grad_norm": 0.1557486355304718, "learning_rate": 3.999551202704366e-06, "loss": 0.8715, "step": 138210 }, { "epoch": 1.0005284226222793, "grad_norm": 0.16424834728240967, "learning_rate": 3.9994788160437795e-06, "loss": 0.8752, "step": 138220 }, { "epoch": 1.0006008092828653, "grad_norm": 0.1615014523267746, "learning_rate": 3.999406429383193e-06, "loss": 0.8734, "step": 138230 }, { "epoch": 1.0006731959434516, "grad_norm": 0.1606891006231308, "learning_rate": 3.999334042722608e-06, "loss": 0.8675, "step": 138240 }, { "epoch": 1.0007455826040377, "grad_norm": 0.16052010655403137, "learning_rate": 3.999261656062021e-06, "loss": 0.8599, "step": 138250 }, { "epoch": 1.000817969264624, "grad_norm": 0.154453843832016, "learning_rate": 3.999189269401435e-06, "loss": 0.8503, "step": 138260 }, { "epoch": 1.00089035592521, "grad_norm": 0.1499967873096466, "learning_rate": 3.9991168827408485e-06, "loss": 0.8631, "step": 138270 }, { "epoch": 1.0009627425857963, "grad_norm": 0.1510349065065384, "learning_rate": 3.999044496080263e-06, "loss": 0.8663, "step": 138280 }, { "epoch": 1.0010351292463824, "grad_norm": 0.18189969658851624, "learning_rate": 3.9989721094196766e-06, "loss": 0.8725, "step": 138290 }, { "epoch": 1.0011075159069687, "grad_norm": 0.1505780965089798, "learning_rate": 3.99889972275909e-06, "loss": 0.8589, "step": 138300 }, { "epoch": 1.001179902567555, "grad_norm": 0.15750384330749512, "learning_rate": 3.998827336098504e-06, "loss": 0.8675, "step": 138310 }, { "epoch": 1.001252289228141, "grad_norm": 0.14412176609039307, "learning_rate": 3.998754949437918e-06, "loss": 0.8613, "step": 138320 }, { "epoch": 1.0013246758887273, "grad_norm": 0.1520223170518875, "learning_rate": 3.998682562777332e-06, "loss": 0.8639, "step": 138330 }, { "epoch": 1.0013970625493134, "grad_norm": 0.15672039985656738, "learning_rate": 3.9986101761167455e-06, "loss": 0.8651, "step": 138340 }, { "epoch": 1.0014694492098997, "grad_norm": 0.19693726301193237, "learning_rate": 3.998537789456159e-06, "loss": 0.8727, "step": 138350 }, { "epoch": 1.0015418358704857, "grad_norm": 0.18891595304012299, "learning_rate": 3.9984654027955736e-06, "loss": 0.8756, "step": 138360 }, { "epoch": 1.001614222531072, "grad_norm": 0.14918550848960876, "learning_rate": 3.998393016134987e-06, "loss": 0.8603, "step": 138370 }, { "epoch": 1.001686609191658, "grad_norm": 0.16212646663188934, "learning_rate": 3.998320629474401e-06, "loss": 0.8653, "step": 138380 }, { "epoch": 1.0017589958522444, "grad_norm": 0.14969107508659363, "learning_rate": 3.998248242813814e-06, "loss": 0.8777, "step": 138390 }, { "epoch": 1.0018313825128304, "grad_norm": 0.14508505165576935, "learning_rate": 3.998175856153229e-06, "loss": 0.8573, "step": 138400 }, { "epoch": 1.0019037691734167, "grad_norm": 0.1480925977230072, "learning_rate": 3.9981034694926425e-06, "loss": 0.872, "step": 138410 }, { "epoch": 1.001976155834003, "grad_norm": 0.16480182111263275, "learning_rate": 3.998031082832056e-06, "loss": 0.881, "step": 138420 }, { "epoch": 1.002048542494589, "grad_norm": 0.14416438341140747, "learning_rate": 3.99795869617147e-06, "loss": 0.867, "step": 138430 }, { "epoch": 1.0021209291551754, "grad_norm": 0.14154788851737976, "learning_rate": 3.997886309510884e-06, "loss": 0.8568, "step": 138440 }, { "epoch": 1.0021933158157614, "grad_norm": 0.1673395335674286, "learning_rate": 3.997813922850298e-06, "loss": 0.8553, "step": 138450 }, { "epoch": 1.0022657024763477, "grad_norm": 0.16951638460159302, "learning_rate": 3.997741536189711e-06, "loss": 0.8569, "step": 138460 }, { "epoch": 1.0023380891369338, "grad_norm": 0.16748495399951935, "learning_rate": 3.997669149529125e-06, "loss": 0.8657, "step": 138470 }, { "epoch": 1.00241047579752, "grad_norm": 0.1609235256910324, "learning_rate": 3.9975967628685395e-06, "loss": 0.8493, "step": 138480 }, { "epoch": 1.0024828624581061, "grad_norm": 0.15038661658763885, "learning_rate": 3.997524376207953e-06, "loss": 0.8577, "step": 138490 }, { "epoch": 1.0025552491186924, "grad_norm": 0.15699024498462677, "learning_rate": 3.997451989547367e-06, "loss": 0.8584, "step": 138500 }, { "epoch": 1.0026276357792785, "grad_norm": 0.153981551527977, "learning_rate": 3.99737960288678e-06, "loss": 0.8621, "step": 138510 }, { "epoch": 1.0027000224398648, "grad_norm": 0.14797601103782654, "learning_rate": 3.997307216226194e-06, "loss": 0.8603, "step": 138520 }, { "epoch": 1.002772409100451, "grad_norm": 0.18665072321891785, "learning_rate": 3.997234829565608e-06, "loss": 0.8615, "step": 138530 }, { "epoch": 1.0028447957610371, "grad_norm": 0.18154151737689972, "learning_rate": 3.997162442905021e-06, "loss": 0.868, "step": 138540 }, { "epoch": 1.0029171824216234, "grad_norm": 0.14707796275615692, "learning_rate": 3.997090056244436e-06, "loss": 0.8544, "step": 138550 }, { "epoch": 1.0029895690822095, "grad_norm": 0.1541696935892105, "learning_rate": 3.997017669583849e-06, "loss": 0.8633, "step": 138560 }, { "epoch": 1.0030619557427958, "grad_norm": 0.15165592730045319, "learning_rate": 3.996945282923263e-06, "loss": 0.8684, "step": 138570 }, { "epoch": 1.0031343424033818, "grad_norm": 0.1640002727508545, "learning_rate": 3.9968728962626765e-06, "loss": 0.8738, "step": 138580 }, { "epoch": 1.0032067290639681, "grad_norm": 0.14847826957702637, "learning_rate": 3.996800509602091e-06, "loss": 0.8608, "step": 138590 }, { "epoch": 1.0032791157245542, "grad_norm": 0.16807755827903748, "learning_rate": 3.996728122941505e-06, "loss": 0.8656, "step": 138600 }, { "epoch": 1.0033515023851405, "grad_norm": 0.175052210688591, "learning_rate": 3.996655736280918e-06, "loss": 0.8663, "step": 138610 }, { "epoch": 1.0034238890457265, "grad_norm": 0.18418249487876892, "learning_rate": 3.996583349620332e-06, "loss": 0.8619, "step": 138620 }, { "epoch": 1.0034962757063128, "grad_norm": 0.14789465069770813, "learning_rate": 3.996510962959746e-06, "loss": 0.8597, "step": 138630 }, { "epoch": 1.0035686623668991, "grad_norm": 0.14474569261074066, "learning_rate": 3.99643857629916e-06, "loss": 0.8721, "step": 138640 }, { "epoch": 1.0036410490274852, "grad_norm": 0.14751043915748596, "learning_rate": 3.9963661896385735e-06, "loss": 0.8922, "step": 138650 }, { "epoch": 1.0037134356880715, "grad_norm": 0.14138489961624146, "learning_rate": 3.996293802977987e-06, "loss": 0.8643, "step": 138660 }, { "epoch": 1.0037858223486575, "grad_norm": 0.14229081571102142, "learning_rate": 3.996221416317402e-06, "loss": 0.8595, "step": 138670 }, { "epoch": 1.0038582090092438, "grad_norm": 0.24338965117931366, "learning_rate": 3.996149029656815e-06, "loss": 0.861, "step": 138680 }, { "epoch": 1.00393059566983, "grad_norm": 0.2033909410238266, "learning_rate": 3.996076642996229e-06, "loss": 0.864, "step": 138690 }, { "epoch": 1.0040029823304162, "grad_norm": 0.15376165509223938, "learning_rate": 3.9960042563356424e-06, "loss": 0.8662, "step": 138700 }, { "epoch": 1.0040753689910022, "grad_norm": 0.14948670566082, "learning_rate": 3.995931869675057e-06, "loss": 0.8744, "step": 138710 }, { "epoch": 1.0041477556515885, "grad_norm": 0.15459775924682617, "learning_rate": 3.9958594830144705e-06, "loss": 0.8763, "step": 138720 }, { "epoch": 1.0042201423121746, "grad_norm": 0.16300074756145477, "learning_rate": 3.995787096353884e-06, "loss": 0.8532, "step": 138730 }, { "epoch": 1.004292528972761, "grad_norm": 0.14748498797416687, "learning_rate": 3.995714709693298e-06, "loss": 0.8659, "step": 138740 }, { "epoch": 1.0043649156333472, "grad_norm": 0.14953255653381348, "learning_rate": 3.995642323032712e-06, "loss": 0.8737, "step": 138750 }, { "epoch": 1.0044373022939332, "grad_norm": 0.17030136287212372, "learning_rate": 3.995569936372126e-06, "loss": 0.8659, "step": 138760 }, { "epoch": 1.0045096889545195, "grad_norm": 0.15466326475143433, "learning_rate": 3.9954975497115395e-06, "loss": 0.8597, "step": 138770 }, { "epoch": 1.0045820756151056, "grad_norm": 0.2016264945268631, "learning_rate": 3.995425163050953e-06, "loss": 0.8675, "step": 138780 }, { "epoch": 1.0046544622756919, "grad_norm": 0.2277584671974182, "learning_rate": 3.9953527763903675e-06, "loss": 0.8689, "step": 138790 }, { "epoch": 1.004726848936278, "grad_norm": 0.14678841829299927, "learning_rate": 3.995280389729781e-06, "loss": 0.8658, "step": 138800 }, { "epoch": 1.0047992355968642, "grad_norm": 0.18819843232631683, "learning_rate": 3.995208003069195e-06, "loss": 0.8627, "step": 138810 }, { "epoch": 1.0048716222574503, "grad_norm": 0.14515884220600128, "learning_rate": 3.995135616408608e-06, "loss": 0.8478, "step": 138820 }, { "epoch": 1.0049440089180366, "grad_norm": 0.15614381432533264, "learning_rate": 3.995063229748022e-06, "loss": 0.8636, "step": 138830 }, { "epoch": 1.0050163955786229, "grad_norm": 0.14769770205020905, "learning_rate": 3.9949908430874365e-06, "loss": 0.8577, "step": 138840 }, { "epoch": 1.005088782239209, "grad_norm": 0.15718652307987213, "learning_rate": 3.99491845642685e-06, "loss": 0.8645, "step": 138850 }, { "epoch": 1.0051611688997952, "grad_norm": 0.14552608132362366, "learning_rate": 3.994846069766264e-06, "loss": 0.8609, "step": 138860 }, { "epoch": 1.0052335555603813, "grad_norm": 0.1629728376865387, "learning_rate": 3.994773683105677e-06, "loss": 0.8725, "step": 138870 }, { "epoch": 1.0053059422209676, "grad_norm": 0.1548013985157013, "learning_rate": 3.994701296445092e-06, "loss": 0.8592, "step": 138880 }, { "epoch": 1.0053783288815537, "grad_norm": 0.1612953543663025, "learning_rate": 3.994628909784505e-06, "loss": 0.8649, "step": 138890 }, { "epoch": 1.00545071554214, "grad_norm": 0.15613959729671478, "learning_rate": 3.994556523123919e-06, "loss": 0.852, "step": 138900 }, { "epoch": 1.005523102202726, "grad_norm": 0.14688079059123993, "learning_rate": 3.994484136463333e-06, "loss": 0.8625, "step": 138910 }, { "epoch": 1.0055954888633123, "grad_norm": 0.16856560111045837, "learning_rate": 3.994411749802747e-06, "loss": 0.866, "step": 138920 }, { "epoch": 1.0056678755238984, "grad_norm": 0.14123956859111786, "learning_rate": 3.994339363142161e-06, "loss": 0.8595, "step": 138930 }, { "epoch": 1.0057402621844846, "grad_norm": 0.15855082869529724, "learning_rate": 3.994266976481574e-06, "loss": 0.863, "step": 138940 }, { "epoch": 1.005812648845071, "grad_norm": 0.16692784428596497, "learning_rate": 3.994194589820988e-06, "loss": 0.8534, "step": 138950 }, { "epoch": 1.005885035505657, "grad_norm": 0.1530875861644745, "learning_rate": 3.994122203160402e-06, "loss": 0.855, "step": 138960 }, { "epoch": 1.0059574221662433, "grad_norm": 0.16144618391990662, "learning_rate": 3.994049816499816e-06, "loss": 0.8496, "step": 138970 }, { "epoch": 1.0060298088268294, "grad_norm": 0.15884657204151154, "learning_rate": 3.99397742983923e-06, "loss": 0.8641, "step": 138980 }, { "epoch": 1.0061021954874156, "grad_norm": 0.1514037847518921, "learning_rate": 3.993905043178643e-06, "loss": 0.8562, "step": 138990 }, { "epoch": 1.0061745821480017, "grad_norm": 0.1448083519935608, "learning_rate": 3.993832656518058e-06, "loss": 0.8545, "step": 139000 }, { "epoch": 1.006246968808588, "grad_norm": 0.153413325548172, "learning_rate": 3.993760269857471e-06, "loss": 0.8668, "step": 139010 }, { "epoch": 1.006319355469174, "grad_norm": 0.16846509277820587, "learning_rate": 3.993687883196885e-06, "loss": 0.8534, "step": 139020 }, { "epoch": 1.0063917421297603, "grad_norm": 0.14701557159423828, "learning_rate": 3.9936154965362986e-06, "loss": 0.8597, "step": 139030 }, { "epoch": 1.0064641287903464, "grad_norm": 0.15452063083648682, "learning_rate": 3.993543109875713e-06, "loss": 0.8734, "step": 139040 }, { "epoch": 1.0065365154509327, "grad_norm": 0.14439797401428223, "learning_rate": 3.993470723215126e-06, "loss": 0.8762, "step": 139050 }, { "epoch": 1.006608902111519, "grad_norm": 0.15302760899066925, "learning_rate": 3.993398336554539e-06, "loss": 0.8597, "step": 139060 }, { "epoch": 1.006681288772105, "grad_norm": 0.1926603466272354, "learning_rate": 3.993325949893954e-06, "loss": 0.8524, "step": 139070 }, { "epoch": 1.0067536754326913, "grad_norm": 0.15659856796264648, "learning_rate": 3.9932535632333675e-06, "loss": 0.8558, "step": 139080 }, { "epoch": 1.0068260620932774, "grad_norm": 0.18837852776050568, "learning_rate": 3.993181176572781e-06, "loss": 0.8756, "step": 139090 }, { "epoch": 1.0068984487538637, "grad_norm": 0.14618010818958282, "learning_rate": 3.993108789912195e-06, "loss": 0.8688, "step": 139100 }, { "epoch": 1.0069708354144498, "grad_norm": 0.1539430171251297, "learning_rate": 3.993036403251609e-06, "loss": 0.8525, "step": 139110 }, { "epoch": 1.007043222075036, "grad_norm": 0.1436515599489212, "learning_rate": 3.992964016591023e-06, "loss": 0.8657, "step": 139120 }, { "epoch": 1.0071156087356221, "grad_norm": 0.15015138685703278, "learning_rate": 3.992891629930436e-06, "loss": 0.8606, "step": 139130 }, { "epoch": 1.0071879953962084, "grad_norm": 0.15750154852867126, "learning_rate": 3.99281924326985e-06, "loss": 0.849, "step": 139140 }, { "epoch": 1.0072603820567945, "grad_norm": 0.15625479817390442, "learning_rate": 3.9927468566092645e-06, "loss": 0.8614, "step": 139150 }, { "epoch": 1.0073327687173808, "grad_norm": 0.1461600959300995, "learning_rate": 3.992674469948678e-06, "loss": 0.8485, "step": 139160 }, { "epoch": 1.007405155377967, "grad_norm": 0.16277381777763367, "learning_rate": 3.992602083288092e-06, "loss": 0.8705, "step": 139170 }, { "epoch": 1.007477542038553, "grad_norm": 0.3381327986717224, "learning_rate": 3.992529696627505e-06, "loss": 0.8686, "step": 139180 }, { "epoch": 1.0075499286991394, "grad_norm": 0.14702801406383514, "learning_rate": 3.99245730996692e-06, "loss": 0.8526, "step": 139190 }, { "epoch": 1.0076223153597255, "grad_norm": 0.15568077564239502, "learning_rate": 3.992384923306333e-06, "loss": 0.8497, "step": 139200 }, { "epoch": 1.0076947020203117, "grad_norm": 0.16276511549949646, "learning_rate": 3.992312536645747e-06, "loss": 0.8634, "step": 139210 }, { "epoch": 1.0077670886808978, "grad_norm": 0.1559554636478424, "learning_rate": 3.992240149985161e-06, "loss": 0.8703, "step": 139220 }, { "epoch": 1.007839475341484, "grad_norm": 0.156097874045372, "learning_rate": 3.992167763324575e-06, "loss": 0.8721, "step": 139230 }, { "epoch": 1.0079118620020702, "grad_norm": 0.1414407193660736, "learning_rate": 3.992095376663989e-06, "loss": 0.8661, "step": 139240 }, { "epoch": 1.0079842486626565, "grad_norm": 0.15653790533542633, "learning_rate": 3.992022990003402e-06, "loss": 0.8547, "step": 139250 }, { "epoch": 1.0080566353232427, "grad_norm": 0.15031273663043976, "learning_rate": 3.991950603342816e-06, "loss": 0.858, "step": 139260 }, { "epoch": 1.0081290219838288, "grad_norm": 0.17239518463611603, "learning_rate": 3.9918782166822304e-06, "loss": 0.8548, "step": 139270 }, { "epoch": 1.008201408644415, "grad_norm": 0.1503749042749405, "learning_rate": 3.991805830021644e-06, "loss": 0.8711, "step": 139280 }, { "epoch": 1.0082737953050012, "grad_norm": 0.15691126883029938, "learning_rate": 3.991733443361058e-06, "loss": 0.8525, "step": 139290 }, { "epoch": 1.0083461819655875, "grad_norm": 0.15124544501304626, "learning_rate": 3.991661056700471e-06, "loss": 0.8684, "step": 139300 }, { "epoch": 1.0084185686261735, "grad_norm": 0.16640368103981018, "learning_rate": 3.991588670039886e-06, "loss": 0.8621, "step": 139310 }, { "epoch": 1.0084909552867598, "grad_norm": 0.14733107388019562, "learning_rate": 3.991516283379299e-06, "loss": 0.8595, "step": 139320 }, { "epoch": 1.0085633419473459, "grad_norm": 0.14905431866645813, "learning_rate": 3.991443896718713e-06, "loss": 0.8636, "step": 139330 }, { "epoch": 1.0086357286079322, "grad_norm": 0.16022346913814545, "learning_rate": 3.991371510058127e-06, "loss": 0.8564, "step": 139340 }, { "epoch": 1.0087081152685182, "grad_norm": 0.14993324875831604, "learning_rate": 3.991299123397541e-06, "loss": 0.8704, "step": 139350 }, { "epoch": 1.0087805019291045, "grad_norm": 0.14608387649059296, "learning_rate": 3.991226736736955e-06, "loss": 0.848, "step": 139360 }, { "epoch": 1.0088528885896908, "grad_norm": 0.14675495028495789, "learning_rate": 3.991154350076368e-06, "loss": 0.8736, "step": 139370 }, { "epoch": 1.0089252752502769, "grad_norm": 0.13962878286838531, "learning_rate": 3.991081963415782e-06, "loss": 0.8634, "step": 139380 }, { "epoch": 1.0089976619108632, "grad_norm": 0.1584140658378601, "learning_rate": 3.991009576755196e-06, "loss": 0.8708, "step": 139390 }, { "epoch": 1.0090700485714492, "grad_norm": 0.1543683111667633, "learning_rate": 3.99093719009461e-06, "loss": 0.8651, "step": 139400 }, { "epoch": 1.0091424352320355, "grad_norm": 0.1464136689901352, "learning_rate": 3.990864803434024e-06, "loss": 0.8693, "step": 139410 }, { "epoch": 1.0092148218926216, "grad_norm": 0.1557295322418213, "learning_rate": 3.990792416773437e-06, "loss": 0.8609, "step": 139420 }, { "epoch": 1.0092872085532079, "grad_norm": 0.14724186062812805, "learning_rate": 3.990720030112851e-06, "loss": 0.8753, "step": 139430 }, { "epoch": 1.009359595213794, "grad_norm": 0.1482950747013092, "learning_rate": 3.990647643452265e-06, "loss": 0.8607, "step": 139440 }, { "epoch": 1.0094319818743802, "grad_norm": 0.15687072277069092, "learning_rate": 3.990575256791679e-06, "loss": 0.857, "step": 139450 }, { "epoch": 1.0095043685349663, "grad_norm": 0.15880662202835083, "learning_rate": 3.9905028701310925e-06, "loss": 0.8595, "step": 139460 }, { "epoch": 1.0095767551955526, "grad_norm": 0.14912816882133484, "learning_rate": 3.990430483470506e-06, "loss": 0.8613, "step": 139470 }, { "epoch": 1.0096491418561389, "grad_norm": 0.15200045704841614, "learning_rate": 3.990358096809921e-06, "loss": 0.8597, "step": 139480 }, { "epoch": 1.009721528516725, "grad_norm": 0.1544422060251236, "learning_rate": 3.990285710149334e-06, "loss": 0.858, "step": 139490 }, { "epoch": 1.0097939151773112, "grad_norm": 0.16103899478912354, "learning_rate": 3.990213323488748e-06, "loss": 0.8596, "step": 139500 }, { "epoch": 1.0098663018378973, "grad_norm": 0.1505272537469864, "learning_rate": 3.9901409368281615e-06, "loss": 0.859, "step": 139510 }, { "epoch": 1.0099386884984836, "grad_norm": 0.16239230334758759, "learning_rate": 3.990068550167576e-06, "loss": 0.8547, "step": 139520 }, { "epoch": 1.0100110751590696, "grad_norm": 0.15705235302448273, "learning_rate": 3.9899961635069895e-06, "loss": 0.8604, "step": 139530 }, { "epoch": 1.010083461819656, "grad_norm": 0.15264788269996643, "learning_rate": 3.989923776846403e-06, "loss": 0.8729, "step": 139540 }, { "epoch": 1.010155848480242, "grad_norm": 0.15049117803573608, "learning_rate": 3.989851390185817e-06, "loss": 0.8613, "step": 139550 }, { "epoch": 1.0102282351408283, "grad_norm": 0.17415769398212433, "learning_rate": 3.989779003525231e-06, "loss": 0.8613, "step": 139560 }, { "epoch": 1.0103006218014143, "grad_norm": 0.151180699467659, "learning_rate": 3.989706616864645e-06, "loss": 0.8631, "step": 139570 }, { "epoch": 1.0103730084620006, "grad_norm": 0.14194343984127045, "learning_rate": 3.9896342302040585e-06, "loss": 0.8597, "step": 139580 }, { "epoch": 1.010445395122587, "grad_norm": 0.15992465615272522, "learning_rate": 3.989561843543472e-06, "loss": 0.8636, "step": 139590 }, { "epoch": 1.010517781783173, "grad_norm": 0.15397891402244568, "learning_rate": 3.989489456882886e-06, "loss": 0.8599, "step": 139600 }, { "epoch": 1.0105901684437593, "grad_norm": 0.16052521765232086, "learning_rate": 3.989417070222299e-06, "loss": 0.8576, "step": 139610 }, { "epoch": 1.0106625551043453, "grad_norm": 0.15102127194404602, "learning_rate": 3.989344683561713e-06, "loss": 0.872, "step": 139620 }, { "epoch": 1.0107349417649316, "grad_norm": 0.1572069674730301, "learning_rate": 3.989272296901127e-06, "loss": 0.8688, "step": 139630 }, { "epoch": 1.0108073284255177, "grad_norm": 0.14729195833206177, "learning_rate": 3.989199910240541e-06, "loss": 0.8668, "step": 139640 }, { "epoch": 1.010879715086104, "grad_norm": 0.16329136490821838, "learning_rate": 3.989127523579955e-06, "loss": 0.8643, "step": 139650 }, { "epoch": 1.01095210174669, "grad_norm": 0.15864083170890808, "learning_rate": 3.989055136919368e-06, "loss": 0.8686, "step": 139660 }, { "epoch": 1.0110244884072763, "grad_norm": 0.1520940214395523, "learning_rate": 3.988982750258783e-06, "loss": 0.8573, "step": 139670 }, { "epoch": 1.0110968750678624, "grad_norm": 0.2237161546945572, "learning_rate": 3.988910363598196e-06, "loss": 0.8685, "step": 139680 }, { "epoch": 1.0111692617284487, "grad_norm": 0.15291093289852142, "learning_rate": 3.98883797693761e-06, "loss": 0.8554, "step": 139690 }, { "epoch": 1.011241648389035, "grad_norm": 0.1433836966753006, "learning_rate": 3.9887655902770235e-06, "loss": 0.8553, "step": 139700 }, { "epoch": 1.011314035049621, "grad_norm": 0.15727286040782928, "learning_rate": 3.988693203616438e-06, "loss": 0.8692, "step": 139710 }, { "epoch": 1.0113864217102073, "grad_norm": 0.14590954780578613, "learning_rate": 3.988620816955852e-06, "loss": 0.8654, "step": 139720 }, { "epoch": 1.0114588083707934, "grad_norm": 0.15376925468444824, "learning_rate": 3.988548430295265e-06, "loss": 0.8614, "step": 139730 }, { "epoch": 1.0115311950313797, "grad_norm": 0.14909863471984863, "learning_rate": 3.988476043634679e-06, "loss": 0.8432, "step": 139740 }, { "epoch": 1.0116035816919657, "grad_norm": 0.15601171553134918, "learning_rate": 3.988403656974093e-06, "loss": 0.8543, "step": 139750 }, { "epoch": 1.011675968352552, "grad_norm": 0.1624634563922882, "learning_rate": 3.988331270313507e-06, "loss": 0.8587, "step": 139760 }, { "epoch": 1.011748355013138, "grad_norm": 0.15104293823242188, "learning_rate": 3.9882588836529206e-06, "loss": 0.8621, "step": 139770 }, { "epoch": 1.0118207416737244, "grad_norm": 0.15875457227230072, "learning_rate": 3.988186496992334e-06, "loss": 0.8702, "step": 139780 }, { "epoch": 1.0118931283343104, "grad_norm": 0.14369283616542816, "learning_rate": 3.988114110331749e-06, "loss": 0.8654, "step": 139790 }, { "epoch": 1.0119655149948967, "grad_norm": 0.15194371342658997, "learning_rate": 3.988041723671162e-06, "loss": 0.8584, "step": 139800 }, { "epoch": 1.012037901655483, "grad_norm": 0.18892666697502136, "learning_rate": 3.987969337010576e-06, "loss": 0.8606, "step": 139810 }, { "epoch": 1.012110288316069, "grad_norm": 0.16798314452171326, "learning_rate": 3.9878969503499895e-06, "loss": 0.8621, "step": 139820 }, { "epoch": 1.0121826749766554, "grad_norm": 0.15889109671115875, "learning_rate": 3.987824563689404e-06, "loss": 0.8675, "step": 139830 }, { "epoch": 1.0122550616372414, "grad_norm": 0.14668938517570496, "learning_rate": 3.9877521770288176e-06, "loss": 0.8611, "step": 139840 }, { "epoch": 1.0123274482978277, "grad_norm": 0.15897607803344727, "learning_rate": 3.987679790368231e-06, "loss": 0.8696, "step": 139850 }, { "epoch": 1.0123998349584138, "grad_norm": 0.14037030935287476, "learning_rate": 3.987607403707645e-06, "loss": 0.8669, "step": 139860 }, { "epoch": 1.012472221619, "grad_norm": 0.161557137966156, "learning_rate": 3.987535017047059e-06, "loss": 0.8397, "step": 139870 }, { "epoch": 1.0125446082795861, "grad_norm": 0.14961223304271698, "learning_rate": 3.987462630386473e-06, "loss": 0.8637, "step": 139880 }, { "epoch": 1.0126169949401724, "grad_norm": 0.16417111456394196, "learning_rate": 3.9873902437258865e-06, "loss": 0.8535, "step": 139890 }, { "epoch": 1.0126893816007587, "grad_norm": 0.1531134694814682, "learning_rate": 3.9873178570653e-06, "loss": 0.8662, "step": 139900 }, { "epoch": 1.0127617682613448, "grad_norm": 0.16314208507537842, "learning_rate": 3.9872454704047146e-06, "loss": 0.874, "step": 139910 }, { "epoch": 1.012834154921931, "grad_norm": 0.1492464244365692, "learning_rate": 3.987173083744128e-06, "loss": 0.8594, "step": 139920 }, { "epoch": 1.0129065415825171, "grad_norm": 0.1546127200126648, "learning_rate": 3.987100697083542e-06, "loss": 0.8683, "step": 139930 }, { "epoch": 1.0129789282431034, "grad_norm": 0.23250380158424377, "learning_rate": 3.987028310422955e-06, "loss": 0.8602, "step": 139940 }, { "epoch": 1.0130513149036895, "grad_norm": 0.15371747314929962, "learning_rate": 3.98695592376237e-06, "loss": 0.8577, "step": 139950 }, { "epoch": 1.0131237015642758, "grad_norm": 0.15039215981960297, "learning_rate": 3.9868835371017835e-06, "loss": 0.8775, "step": 139960 }, { "epoch": 1.0131960882248618, "grad_norm": 0.14372408390045166, "learning_rate": 3.986811150441197e-06, "loss": 0.8603, "step": 139970 }, { "epoch": 1.0132684748854481, "grad_norm": 0.18086668848991394, "learning_rate": 3.986738763780611e-06, "loss": 0.8697, "step": 139980 }, { "epoch": 1.0133408615460342, "grad_norm": 0.16421173512935638, "learning_rate": 3.986666377120025e-06, "loss": 0.8727, "step": 139990 }, { "epoch": 1.0134132482066205, "grad_norm": 0.149836003780365, "learning_rate": 3.986593990459439e-06, "loss": 0.8633, "step": 140000 }, { "epoch": 1.0134856348672068, "grad_norm": 0.15972940623760223, "learning_rate": 3.9865216037988524e-06, "loss": 0.8598, "step": 140010 }, { "epoch": 1.0135580215277928, "grad_norm": 0.15127001702785492, "learning_rate": 3.986449217138266e-06, "loss": 0.8639, "step": 140020 }, { "epoch": 1.0136304081883791, "grad_norm": 0.1499137580394745, "learning_rate": 3.9863768304776805e-06, "loss": 0.8712, "step": 140030 }, { "epoch": 1.0137027948489652, "grad_norm": 0.1619751751422882, "learning_rate": 3.986304443817094e-06, "loss": 0.8751, "step": 140040 }, { "epoch": 1.0137751815095515, "grad_norm": 0.15871897339820862, "learning_rate": 3.986232057156508e-06, "loss": 0.8627, "step": 140050 }, { "epoch": 1.0138475681701375, "grad_norm": 0.15638139843940735, "learning_rate": 3.986159670495921e-06, "loss": 0.8587, "step": 140060 }, { "epoch": 1.0139199548307238, "grad_norm": 0.1525679975748062, "learning_rate": 3.986087283835335e-06, "loss": 0.8644, "step": 140070 }, { "epoch": 1.01399234149131, "grad_norm": 0.14840306341648102, "learning_rate": 3.9860148971747494e-06, "loss": 0.8653, "step": 140080 }, { "epoch": 1.0140647281518962, "grad_norm": 0.15386714041233063, "learning_rate": 3.985942510514163e-06, "loss": 0.8709, "step": 140090 }, { "epoch": 1.0141371148124823, "grad_norm": 0.17089219391345978, "learning_rate": 3.985870123853577e-06, "loss": 0.8653, "step": 140100 }, { "epoch": 1.0142095014730685, "grad_norm": 0.16779905557632446, "learning_rate": 3.98579773719299e-06, "loss": 0.8503, "step": 140110 }, { "epoch": 1.0142818881336548, "grad_norm": 0.1514463871717453, "learning_rate": 3.985725350532404e-06, "loss": 0.8604, "step": 140120 }, { "epoch": 1.014354274794241, "grad_norm": 0.15929542481899261, "learning_rate": 3.9856529638718175e-06, "loss": 0.87, "step": 140130 }, { "epoch": 1.0144266614548272, "grad_norm": 0.1467376947402954, "learning_rate": 3.985580577211232e-06, "loss": 0.8597, "step": 140140 }, { "epoch": 1.0144990481154132, "grad_norm": 0.14556573331356049, "learning_rate": 3.985508190550646e-06, "loss": 0.8666, "step": 140150 }, { "epoch": 1.0145714347759995, "grad_norm": 0.15759846568107605, "learning_rate": 3.985435803890059e-06, "loss": 0.8605, "step": 140160 }, { "epoch": 1.0146438214365856, "grad_norm": 0.16892042756080627, "learning_rate": 3.985363417229473e-06, "loss": 0.8745, "step": 140170 }, { "epoch": 1.014716208097172, "grad_norm": 0.16491712629795074, "learning_rate": 3.985291030568887e-06, "loss": 0.863, "step": 140180 }, { "epoch": 1.014788594757758, "grad_norm": 0.14999713003635406, "learning_rate": 3.985218643908301e-06, "loss": 0.8698, "step": 140190 }, { "epoch": 1.0148609814183442, "grad_norm": 0.15313737094402313, "learning_rate": 3.9851462572477145e-06, "loss": 0.8486, "step": 140200 }, { "epoch": 1.0149333680789303, "grad_norm": 0.15523867309093475, "learning_rate": 3.985073870587128e-06, "loss": 0.8521, "step": 140210 }, { "epoch": 1.0150057547395166, "grad_norm": 1.109740972518921, "learning_rate": 3.985001483926543e-06, "loss": 0.8812, "step": 140220 }, { "epoch": 1.0150781414001029, "grad_norm": 0.15085531771183014, "learning_rate": 3.984929097265956e-06, "loss": 0.8549, "step": 140230 }, { "epoch": 1.015150528060689, "grad_norm": 0.17226199805736542, "learning_rate": 3.98485671060537e-06, "loss": 0.8643, "step": 140240 }, { "epoch": 1.0152229147212752, "grad_norm": 0.15006664395332336, "learning_rate": 3.9847843239447835e-06, "loss": 0.8721, "step": 140250 }, { "epoch": 1.0152953013818613, "grad_norm": 0.1507822424173355, "learning_rate": 3.984711937284197e-06, "loss": 0.8708, "step": 140260 }, { "epoch": 1.0153676880424476, "grad_norm": 0.1650809794664383, "learning_rate": 3.9846395506236115e-06, "loss": 0.8617, "step": 140270 }, { "epoch": 1.0154400747030337, "grad_norm": 0.14646883308887482, "learning_rate": 3.984567163963025e-06, "loss": 0.8711, "step": 140280 }, { "epoch": 1.01551246136362, "grad_norm": 0.16238850355148315, "learning_rate": 3.984494777302439e-06, "loss": 0.8666, "step": 140290 }, { "epoch": 1.015584848024206, "grad_norm": 0.15635709464550018, "learning_rate": 3.984422390641852e-06, "loss": 0.8715, "step": 140300 }, { "epoch": 1.0156572346847923, "grad_norm": 0.15769830346107483, "learning_rate": 3.984350003981267e-06, "loss": 0.8673, "step": 140310 }, { "epoch": 1.0157296213453786, "grad_norm": 0.1550094187259674, "learning_rate": 3.9842776173206805e-06, "loss": 0.8631, "step": 140320 }, { "epoch": 1.0158020080059647, "grad_norm": 0.16687199473381042, "learning_rate": 3.984205230660094e-06, "loss": 0.8557, "step": 140330 }, { "epoch": 1.015874394666551, "grad_norm": 0.19312235713005066, "learning_rate": 3.984132843999508e-06, "loss": 0.868, "step": 140340 }, { "epoch": 1.015946781327137, "grad_norm": 0.15332721173763275, "learning_rate": 3.984060457338922e-06, "loss": 0.8644, "step": 140350 }, { "epoch": 1.0160191679877233, "grad_norm": 0.15439341962337494, "learning_rate": 3.983988070678336e-06, "loss": 0.8557, "step": 140360 }, { "epoch": 1.0160915546483094, "grad_norm": 0.140192449092865, "learning_rate": 3.983915684017749e-06, "loss": 0.864, "step": 140370 }, { "epoch": 1.0161639413088956, "grad_norm": 0.15483497083187103, "learning_rate": 3.983843297357163e-06, "loss": 0.8618, "step": 140380 }, { "epoch": 1.0162363279694817, "grad_norm": 0.15083526074886322, "learning_rate": 3.9837709106965775e-06, "loss": 0.8493, "step": 140390 }, { "epoch": 1.016308714630068, "grad_norm": 0.15921801328659058, "learning_rate": 3.983698524035991e-06, "loss": 0.8575, "step": 140400 }, { "epoch": 1.016381101290654, "grad_norm": 0.15354293584823608, "learning_rate": 3.983626137375405e-06, "loss": 0.8575, "step": 140410 }, { "epoch": 1.0164534879512404, "grad_norm": 0.159376859664917, "learning_rate": 3.983553750714818e-06, "loss": 0.8627, "step": 140420 }, { "epoch": 1.0165258746118266, "grad_norm": 0.16422039270401, "learning_rate": 3.983481364054233e-06, "loss": 0.8659, "step": 140430 }, { "epoch": 1.0165982612724127, "grad_norm": 0.16659992933273315, "learning_rate": 3.983408977393646e-06, "loss": 0.8698, "step": 140440 }, { "epoch": 1.016670647932999, "grad_norm": 0.155796617269516, "learning_rate": 3.98333659073306e-06, "loss": 0.8626, "step": 140450 }, { "epoch": 1.016743034593585, "grad_norm": 0.15610454976558685, "learning_rate": 3.983264204072474e-06, "loss": 0.8471, "step": 140460 }, { "epoch": 1.0168154212541713, "grad_norm": 0.17221376299858093, "learning_rate": 3.983191817411888e-06, "loss": 0.8756, "step": 140470 }, { "epoch": 1.0168878079147574, "grad_norm": 0.1467757523059845, "learning_rate": 3.983119430751302e-06, "loss": 0.8566, "step": 140480 }, { "epoch": 1.0169601945753437, "grad_norm": 0.14400814473628998, "learning_rate": 3.983047044090715e-06, "loss": 0.8487, "step": 140490 }, { "epoch": 1.0170325812359298, "grad_norm": 0.15027961134910583, "learning_rate": 3.982974657430129e-06, "loss": 0.8633, "step": 140500 }, { "epoch": 1.017104967896516, "grad_norm": 0.14819438755512238, "learning_rate": 3.982902270769543e-06, "loss": 0.854, "step": 140510 }, { "epoch": 1.0171773545571021, "grad_norm": 0.1465308964252472, "learning_rate": 3.982829884108957e-06, "loss": 0.8568, "step": 140520 }, { "epoch": 1.0172497412176884, "grad_norm": 0.32729414105415344, "learning_rate": 3.982757497448371e-06, "loss": 0.8546, "step": 140530 }, { "epoch": 1.0173221278782747, "grad_norm": 0.15239901840686798, "learning_rate": 3.982685110787784e-06, "loss": 0.8547, "step": 140540 }, { "epoch": 1.0173945145388608, "grad_norm": 0.15983672440052032, "learning_rate": 3.982612724127199e-06, "loss": 0.8539, "step": 140550 }, { "epoch": 1.017466901199447, "grad_norm": 0.15606176853179932, "learning_rate": 3.982540337466612e-06, "loss": 0.8574, "step": 140560 }, { "epoch": 1.0175392878600331, "grad_norm": 0.1544933319091797, "learning_rate": 3.982467950806026e-06, "loss": 0.8641, "step": 140570 }, { "epoch": 1.0176116745206194, "grad_norm": 0.14850009977817535, "learning_rate": 3.9823955641454396e-06, "loss": 0.8581, "step": 140580 }, { "epoch": 1.0176840611812055, "grad_norm": 0.16443096101284027, "learning_rate": 3.982323177484854e-06, "loss": 0.853, "step": 140590 }, { "epoch": 1.0177564478417918, "grad_norm": 0.15893018245697021, "learning_rate": 3.982250790824268e-06, "loss": 0.8707, "step": 140600 }, { "epoch": 1.0178288345023778, "grad_norm": 0.16136851906776428, "learning_rate": 3.982178404163681e-06, "loss": 0.8528, "step": 140610 }, { "epoch": 1.017901221162964, "grad_norm": 0.14998020231723785, "learning_rate": 3.982106017503095e-06, "loss": 0.8641, "step": 140620 }, { "epoch": 1.0179736078235502, "grad_norm": 0.16709375381469727, "learning_rate": 3.982033630842509e-06, "loss": 0.8502, "step": 140630 }, { "epoch": 1.0180459944841365, "grad_norm": 0.15164078772068024, "learning_rate": 3.981961244181923e-06, "loss": 0.8566, "step": 140640 }, { "epoch": 1.0181183811447228, "grad_norm": 0.16210389137268066, "learning_rate": 3.981888857521336e-06, "loss": 0.8707, "step": 140650 }, { "epoch": 1.0181907678053088, "grad_norm": 0.15444253385066986, "learning_rate": 3.98181647086075e-06, "loss": 0.8628, "step": 140660 }, { "epoch": 1.018263154465895, "grad_norm": 0.15437939763069153, "learning_rate": 3.981744084200164e-06, "loss": 0.8504, "step": 140670 }, { "epoch": 1.0183355411264812, "grad_norm": 0.15111126005649567, "learning_rate": 3.981671697539577e-06, "loss": 0.8609, "step": 140680 }, { "epoch": 1.0184079277870675, "grad_norm": 0.1448373794555664, "learning_rate": 3.981599310878991e-06, "loss": 0.8588, "step": 140690 }, { "epoch": 1.0184803144476535, "grad_norm": 0.14929819107055664, "learning_rate": 3.9815269242184055e-06, "loss": 0.868, "step": 140700 }, { "epoch": 1.0185527011082398, "grad_norm": 0.1808697134256363, "learning_rate": 3.981454537557819e-06, "loss": 0.8635, "step": 140710 }, { "epoch": 1.0186250877688259, "grad_norm": 0.1492968201637268, "learning_rate": 3.981382150897233e-06, "loss": 0.8852, "step": 140720 }, { "epoch": 1.0186974744294122, "grad_norm": 0.15429550409317017, "learning_rate": 3.981309764236646e-06, "loss": 0.87, "step": 140730 }, { "epoch": 1.0187698610899982, "grad_norm": 0.1998952180147171, "learning_rate": 3.981237377576061e-06, "loss": 0.8681, "step": 140740 }, { "epoch": 1.0188422477505845, "grad_norm": 0.14868871867656708, "learning_rate": 3.9811649909154744e-06, "loss": 0.8709, "step": 140750 }, { "epoch": 1.0189146344111708, "grad_norm": 0.16644510626792908, "learning_rate": 3.981092604254888e-06, "loss": 0.8687, "step": 140760 }, { "epoch": 1.0189870210717569, "grad_norm": 0.14398843050003052, "learning_rate": 3.981020217594302e-06, "loss": 0.8643, "step": 140770 }, { "epoch": 1.0190594077323432, "grad_norm": 0.14528527855873108, "learning_rate": 3.980947830933716e-06, "loss": 0.8783, "step": 140780 }, { "epoch": 1.0191317943929292, "grad_norm": 0.15322700142860413, "learning_rate": 3.98087544427313e-06, "loss": 0.8527, "step": 140790 }, { "epoch": 1.0192041810535155, "grad_norm": 0.1468924880027771, "learning_rate": 3.980803057612543e-06, "loss": 0.8566, "step": 140800 }, { "epoch": 1.0192765677141016, "grad_norm": 0.1553269475698471, "learning_rate": 3.980730670951957e-06, "loss": 0.8583, "step": 140810 }, { "epoch": 1.0193489543746879, "grad_norm": 0.14764392375946045, "learning_rate": 3.9806582842913714e-06, "loss": 0.8567, "step": 140820 }, { "epoch": 1.019421341035274, "grad_norm": 0.16374526917934418, "learning_rate": 3.980585897630785e-06, "loss": 0.8495, "step": 140830 }, { "epoch": 1.0194937276958602, "grad_norm": 0.1522345393896103, "learning_rate": 3.980513510970199e-06, "loss": 0.8623, "step": 140840 }, { "epoch": 1.0195661143564463, "grad_norm": 0.16136972606182098, "learning_rate": 3.980441124309612e-06, "loss": 0.8658, "step": 140850 }, { "epoch": 1.0196385010170326, "grad_norm": 0.15984009206295013, "learning_rate": 3.980368737649026e-06, "loss": 0.8635, "step": 140860 }, { "epoch": 1.0197108876776189, "grad_norm": 0.14360426366329193, "learning_rate": 3.98029635098844e-06, "loss": 0.8678, "step": 140870 }, { "epoch": 1.019783274338205, "grad_norm": 0.17367321252822876, "learning_rate": 3.980223964327854e-06, "loss": 0.869, "step": 140880 }, { "epoch": 1.0198556609987912, "grad_norm": 0.14711663126945496, "learning_rate": 3.980151577667268e-06, "loss": 0.8577, "step": 140890 }, { "epoch": 1.0199280476593773, "grad_norm": 0.15861208736896515, "learning_rate": 3.980079191006681e-06, "loss": 0.8562, "step": 140900 }, { "epoch": 1.0200004343199636, "grad_norm": 0.15030620992183685, "learning_rate": 3.980006804346096e-06, "loss": 0.863, "step": 140910 }, { "epoch": 1.0200728209805496, "grad_norm": 0.14951910078525543, "learning_rate": 3.979934417685509e-06, "loss": 0.8614, "step": 140920 }, { "epoch": 1.020145207641136, "grad_norm": 0.1687590330839157, "learning_rate": 3.979862031024923e-06, "loss": 0.878, "step": 140930 }, { "epoch": 1.020217594301722, "grad_norm": 0.17207682132720947, "learning_rate": 3.9797896443643365e-06, "loss": 0.8576, "step": 140940 }, { "epoch": 1.0202899809623083, "grad_norm": 0.14084044098854065, "learning_rate": 3.979717257703751e-06, "loss": 0.8559, "step": 140950 }, { "epoch": 1.0203623676228946, "grad_norm": 0.19523248076438904, "learning_rate": 3.979644871043165e-06, "loss": 0.8645, "step": 140960 }, { "epoch": 1.0204347542834806, "grad_norm": 0.14832082390785217, "learning_rate": 3.979572484382578e-06, "loss": 0.8652, "step": 140970 }, { "epoch": 1.020507140944067, "grad_norm": 0.21129783987998962, "learning_rate": 3.979500097721992e-06, "loss": 0.8643, "step": 140980 }, { "epoch": 1.020579527604653, "grad_norm": 0.16113729774951935, "learning_rate": 3.979427711061406e-06, "loss": 0.8615, "step": 140990 }, { "epoch": 1.0206519142652393, "grad_norm": 0.15041108429431915, "learning_rate": 3.97935532440082e-06, "loss": 0.8556, "step": 141000 }, { "epoch": 1.0207243009258253, "grad_norm": 0.1588127315044403, "learning_rate": 3.9792829377402335e-06, "loss": 0.8617, "step": 141010 }, { "epoch": 1.0207966875864116, "grad_norm": 0.1841687262058258, "learning_rate": 3.979210551079647e-06, "loss": 0.8581, "step": 141020 }, { "epoch": 1.0208690742469977, "grad_norm": 0.1596136838197708, "learning_rate": 3.979138164419062e-06, "loss": 0.87, "step": 141030 }, { "epoch": 1.020941460907584, "grad_norm": 0.15191321074962616, "learning_rate": 3.979065777758475e-06, "loss": 0.865, "step": 141040 }, { "epoch": 1.02101384756817, "grad_norm": 0.14728322625160217, "learning_rate": 3.978993391097889e-06, "loss": 0.8643, "step": 141050 }, { "epoch": 1.0210862342287563, "grad_norm": 0.15328173339366913, "learning_rate": 3.9789210044373025e-06, "loss": 0.8609, "step": 141060 }, { "epoch": 1.0211586208893426, "grad_norm": 0.14939600229263306, "learning_rate": 3.978848617776717e-06, "loss": 0.8711, "step": 141070 }, { "epoch": 1.0212310075499287, "grad_norm": 0.16299229860305786, "learning_rate": 3.9787762311161305e-06, "loss": 0.8558, "step": 141080 }, { "epoch": 1.021303394210515, "grad_norm": 0.14965036511421204, "learning_rate": 3.978703844455544e-06, "loss": 0.8579, "step": 141090 }, { "epoch": 1.021375780871101, "grad_norm": 0.1521276980638504, "learning_rate": 3.978631457794958e-06, "loss": 0.8616, "step": 141100 }, { "epoch": 1.0214481675316873, "grad_norm": 0.17580242455005646, "learning_rate": 3.978559071134372e-06, "loss": 0.8586, "step": 141110 }, { "epoch": 1.0215205541922734, "grad_norm": 0.15669845044612885, "learning_rate": 3.978486684473786e-06, "loss": 0.8737, "step": 141120 }, { "epoch": 1.0215929408528597, "grad_norm": 0.1495697796344757, "learning_rate": 3.9784142978131995e-06, "loss": 0.8624, "step": 141130 }, { "epoch": 1.0216653275134457, "grad_norm": 0.16285309195518494, "learning_rate": 3.978341911152613e-06, "loss": 0.8776, "step": 141140 }, { "epoch": 1.021737714174032, "grad_norm": 0.14582541584968567, "learning_rate": 3.9782695244920275e-06, "loss": 0.8666, "step": 141150 }, { "epoch": 1.021810100834618, "grad_norm": 0.15949222445487976, "learning_rate": 3.978197137831441e-06, "loss": 0.8504, "step": 141160 }, { "epoch": 1.0218824874952044, "grad_norm": 0.1575903445482254, "learning_rate": 3.978124751170855e-06, "loss": 0.8695, "step": 141170 }, { "epoch": 1.0219548741557907, "grad_norm": 0.17722275853157043, "learning_rate": 3.978052364510268e-06, "loss": 0.8657, "step": 141180 }, { "epoch": 1.0220272608163767, "grad_norm": 0.14309902489185333, "learning_rate": 3.977979977849682e-06, "loss": 0.8619, "step": 141190 }, { "epoch": 1.022099647476963, "grad_norm": 0.14356312155723572, "learning_rate": 3.977907591189096e-06, "loss": 0.8674, "step": 141200 }, { "epoch": 1.022172034137549, "grad_norm": 0.15702299773693085, "learning_rate": 3.977835204528509e-06, "loss": 0.8666, "step": 141210 }, { "epoch": 1.0222444207981354, "grad_norm": 0.16147883236408234, "learning_rate": 3.977762817867924e-06, "loss": 0.8675, "step": 141220 }, { "epoch": 1.0223168074587214, "grad_norm": 0.1556180715560913, "learning_rate": 3.977690431207337e-06, "loss": 0.8506, "step": 141230 }, { "epoch": 1.0223891941193077, "grad_norm": 0.17178305983543396, "learning_rate": 3.977618044546751e-06, "loss": 0.8649, "step": 141240 }, { "epoch": 1.0224615807798938, "grad_norm": 0.16344556212425232, "learning_rate": 3.9775456578861646e-06, "loss": 0.8561, "step": 141250 }, { "epoch": 1.02253396744048, "grad_norm": 0.20177888870239258, "learning_rate": 3.977473271225579e-06, "loss": 0.8556, "step": 141260 }, { "epoch": 1.0226063541010662, "grad_norm": 0.16756558418273926, "learning_rate": 3.977400884564993e-06, "loss": 0.8527, "step": 141270 }, { "epoch": 1.0226787407616524, "grad_norm": 0.14939232170581818, "learning_rate": 3.977328497904406e-06, "loss": 0.8596, "step": 141280 }, { "epoch": 1.0227511274222387, "grad_norm": 0.14247731864452362, "learning_rate": 3.97725611124382e-06, "loss": 0.8656, "step": 141290 }, { "epoch": 1.0228235140828248, "grad_norm": 0.18383385241031647, "learning_rate": 3.977183724583234e-06, "loss": 0.86, "step": 141300 }, { "epoch": 1.022895900743411, "grad_norm": 0.16358555853366852, "learning_rate": 3.977111337922648e-06, "loss": 0.873, "step": 141310 }, { "epoch": 1.0229682874039971, "grad_norm": 0.1494084596633911, "learning_rate": 3.9770389512620616e-06, "loss": 0.8498, "step": 141320 }, { "epoch": 1.0230406740645834, "grad_norm": 0.14796346426010132, "learning_rate": 3.976966564601475e-06, "loss": 0.8665, "step": 141330 }, { "epoch": 1.0231130607251695, "grad_norm": 0.2109985500574112, "learning_rate": 3.97689417794089e-06, "loss": 0.8533, "step": 141340 }, { "epoch": 1.0231854473857558, "grad_norm": 0.14924803376197815, "learning_rate": 3.976821791280303e-06, "loss": 0.867, "step": 141350 }, { "epoch": 1.0232578340463419, "grad_norm": 0.16223694384098053, "learning_rate": 3.976749404619717e-06, "loss": 0.87, "step": 141360 }, { "epoch": 1.0233302207069281, "grad_norm": 0.14216773211956024, "learning_rate": 3.9766770179591305e-06, "loss": 0.8584, "step": 141370 }, { "epoch": 1.0234026073675142, "grad_norm": 0.15058837831020355, "learning_rate": 3.976604631298545e-06, "loss": 0.8655, "step": 141380 }, { "epoch": 1.0234749940281005, "grad_norm": 0.14986705780029297, "learning_rate": 3.9765322446379586e-06, "loss": 0.8506, "step": 141390 }, { "epoch": 1.0235473806886868, "grad_norm": 0.1716388761997223, "learning_rate": 3.976459857977372e-06, "loss": 0.8623, "step": 141400 }, { "epoch": 1.0236197673492728, "grad_norm": 0.1996438354253769, "learning_rate": 3.976387471316786e-06, "loss": 0.8684, "step": 141410 }, { "epoch": 1.0236921540098591, "grad_norm": 0.14589093625545502, "learning_rate": 3.9763150846562e-06, "loss": 0.8659, "step": 141420 }, { "epoch": 1.0237645406704452, "grad_norm": 0.1426560878753662, "learning_rate": 3.976242697995614e-06, "loss": 0.8633, "step": 141430 }, { "epoch": 1.0238369273310315, "grad_norm": 0.21811816096305847, "learning_rate": 3.9761703113350275e-06, "loss": 0.857, "step": 141440 }, { "epoch": 1.0239093139916176, "grad_norm": 0.13757973909378052, "learning_rate": 3.976097924674441e-06, "loss": 0.8659, "step": 141450 }, { "epoch": 1.0239817006522038, "grad_norm": 0.15585637092590332, "learning_rate": 3.9760255380138556e-06, "loss": 0.8663, "step": 141460 }, { "epoch": 1.02405408731279, "grad_norm": 0.1470966935157776, "learning_rate": 3.975953151353269e-06, "loss": 0.8585, "step": 141470 }, { "epoch": 1.0241264739733762, "grad_norm": 0.15229181945323944, "learning_rate": 3.975880764692683e-06, "loss": 0.8514, "step": 141480 }, { "epoch": 1.0241988606339625, "grad_norm": 0.1502394825220108, "learning_rate": 3.9758083780320964e-06, "loss": 0.8598, "step": 141490 }, { "epoch": 1.0242712472945485, "grad_norm": 0.1439654380083084, "learning_rate": 3.97573599137151e-06, "loss": 0.8768, "step": 141500 }, { "epoch": 1.0243436339551348, "grad_norm": 0.14857643842697144, "learning_rate": 3.9756636047109245e-06, "loss": 0.871, "step": 141510 }, { "epoch": 1.024416020615721, "grad_norm": 0.1647205948829651, "learning_rate": 3.975591218050338e-06, "loss": 0.8624, "step": 141520 }, { "epoch": 1.0244884072763072, "grad_norm": 0.16993491351604462, "learning_rate": 3.975518831389752e-06, "loss": 0.8779, "step": 141530 }, { "epoch": 1.0245607939368933, "grad_norm": 0.16677454113960266, "learning_rate": 3.975446444729165e-06, "loss": 0.861, "step": 141540 }, { "epoch": 1.0246331805974795, "grad_norm": 0.15071935951709747, "learning_rate": 3.97537405806858e-06, "loss": 0.87, "step": 141550 }, { "epoch": 1.0247055672580656, "grad_norm": 0.15021325647830963, "learning_rate": 3.9753016714079934e-06, "loss": 0.8523, "step": 141560 }, { "epoch": 1.024777953918652, "grad_norm": 0.14703767001628876, "learning_rate": 3.975229284747407e-06, "loss": 0.8572, "step": 141570 }, { "epoch": 1.024850340579238, "grad_norm": 0.17009884119033813, "learning_rate": 3.975156898086821e-06, "loss": 0.8612, "step": 141580 }, { "epoch": 1.0249227272398242, "grad_norm": 0.21612617373466492, "learning_rate": 3.975084511426235e-06, "loss": 0.8604, "step": 141590 }, { "epoch": 1.0249951139004105, "grad_norm": 0.1644405573606491, "learning_rate": 3.975012124765649e-06, "loss": 0.858, "step": 141600 }, { "epoch": 1.0250675005609966, "grad_norm": 0.1592147797346115, "learning_rate": 3.974939738105062e-06, "loss": 0.8701, "step": 141610 }, { "epoch": 1.025139887221583, "grad_norm": 0.1543736755847931, "learning_rate": 3.974867351444476e-06, "loss": 0.851, "step": 141620 }, { "epoch": 1.025212273882169, "grad_norm": 0.15851731598377228, "learning_rate": 3.9747949647838904e-06, "loss": 0.8625, "step": 141630 }, { "epoch": 1.0252846605427552, "grad_norm": 0.1598898321390152, "learning_rate": 3.974722578123304e-06, "loss": 0.8513, "step": 141640 }, { "epoch": 1.0253570472033413, "grad_norm": 0.1485353261232376, "learning_rate": 3.974650191462718e-06, "loss": 0.8666, "step": 141650 }, { "epoch": 1.0254294338639276, "grad_norm": 0.15137670934200287, "learning_rate": 3.974577804802131e-06, "loss": 0.8551, "step": 141660 }, { "epoch": 1.0255018205245137, "grad_norm": 0.14559021592140198, "learning_rate": 3.974505418141546e-06, "loss": 0.8602, "step": 141670 }, { "epoch": 1.0255742071851, "grad_norm": 0.15725764632225037, "learning_rate": 3.974433031480959e-06, "loss": 0.8529, "step": 141680 }, { "epoch": 1.025646593845686, "grad_norm": 0.15133067965507507, "learning_rate": 3.974360644820373e-06, "loss": 0.8677, "step": 141690 }, { "epoch": 1.0257189805062723, "grad_norm": 0.16057194769382477, "learning_rate": 3.974288258159787e-06, "loss": 0.8644, "step": 141700 }, { "epoch": 1.0257913671668586, "grad_norm": 0.14923806488513947, "learning_rate": 3.9742158714992e-06, "loss": 0.8539, "step": 141710 }, { "epoch": 1.0258637538274447, "grad_norm": 0.1376243233680725, "learning_rate": 3.974143484838614e-06, "loss": 0.8581, "step": 141720 }, { "epoch": 1.025936140488031, "grad_norm": 0.15476512908935547, "learning_rate": 3.9740710981780274e-06, "loss": 0.8533, "step": 141730 }, { "epoch": 1.026008527148617, "grad_norm": 0.16103792190551758, "learning_rate": 3.973998711517442e-06, "loss": 0.8599, "step": 141740 }, { "epoch": 1.0260809138092033, "grad_norm": 0.1493249237537384, "learning_rate": 3.9739263248568555e-06, "loss": 0.8548, "step": 141750 }, { "epoch": 1.0261533004697894, "grad_norm": 0.1508176475763321, "learning_rate": 3.973853938196269e-06, "loss": 0.8681, "step": 141760 }, { "epoch": 1.0262256871303757, "grad_norm": 0.16366364061832428, "learning_rate": 3.973781551535683e-06, "loss": 0.8723, "step": 141770 }, { "epoch": 1.0262980737909617, "grad_norm": 0.1778716742992401, "learning_rate": 3.973709164875097e-06, "loss": 0.87, "step": 141780 }, { "epoch": 1.026370460451548, "grad_norm": 0.1537426859140396, "learning_rate": 3.973636778214511e-06, "loss": 0.8588, "step": 141790 }, { "epoch": 1.026442847112134, "grad_norm": 0.15936316549777985, "learning_rate": 3.9735643915539245e-06, "loss": 0.8524, "step": 141800 }, { "epoch": 1.0265152337727204, "grad_norm": 0.15303193032741547, "learning_rate": 3.973492004893338e-06, "loss": 0.8655, "step": 141810 }, { "epoch": 1.0265876204333066, "grad_norm": 0.1577739119529724, "learning_rate": 3.9734196182327525e-06, "loss": 0.8629, "step": 141820 }, { "epoch": 1.0266600070938927, "grad_norm": 0.15587298572063446, "learning_rate": 3.973347231572166e-06, "loss": 0.8514, "step": 141830 }, { "epoch": 1.026732393754479, "grad_norm": 0.15615636110305786, "learning_rate": 3.97327484491158e-06, "loss": 0.8699, "step": 141840 }, { "epoch": 1.026804780415065, "grad_norm": 0.15278807282447815, "learning_rate": 3.973202458250993e-06, "loss": 0.8551, "step": 141850 }, { "epoch": 1.0268771670756514, "grad_norm": 0.1699153482913971, "learning_rate": 3.973130071590408e-06, "loss": 0.8701, "step": 141860 }, { "epoch": 1.0269495537362374, "grad_norm": 0.1508774608373642, "learning_rate": 3.9730576849298215e-06, "loss": 0.8502, "step": 141870 }, { "epoch": 1.0270219403968237, "grad_norm": 0.1427360475063324, "learning_rate": 3.972985298269235e-06, "loss": 0.8529, "step": 141880 }, { "epoch": 1.0270943270574098, "grad_norm": 0.15359556674957275, "learning_rate": 3.972912911608649e-06, "loss": 0.8513, "step": 141890 }, { "epoch": 1.027166713717996, "grad_norm": 0.15233898162841797, "learning_rate": 3.972840524948063e-06, "loss": 0.8758, "step": 141900 }, { "epoch": 1.0272391003785821, "grad_norm": 0.1512978971004486, "learning_rate": 3.972768138287477e-06, "loss": 0.8616, "step": 141910 }, { "epoch": 1.0273114870391684, "grad_norm": 0.15592820942401886, "learning_rate": 3.97269575162689e-06, "loss": 0.8621, "step": 141920 }, { "epoch": 1.0273838736997547, "grad_norm": 0.15350952744483948, "learning_rate": 3.972623364966304e-06, "loss": 0.8509, "step": 141930 }, { "epoch": 1.0274562603603408, "grad_norm": 0.16110540926456451, "learning_rate": 3.9725509783057185e-06, "loss": 0.8634, "step": 141940 }, { "epoch": 1.027528647020927, "grad_norm": 0.15037639439105988, "learning_rate": 3.972478591645132e-06, "loss": 0.8537, "step": 141950 }, { "epoch": 1.0276010336815131, "grad_norm": 0.16510790586471558, "learning_rate": 3.972406204984546e-06, "loss": 0.8515, "step": 141960 }, { "epoch": 1.0276734203420994, "grad_norm": 0.16185833513736725, "learning_rate": 3.972333818323959e-06, "loss": 0.8547, "step": 141970 }, { "epoch": 1.0277458070026855, "grad_norm": 0.14903537929058075, "learning_rate": 3.972261431663374e-06, "loss": 0.8765, "step": 141980 }, { "epoch": 1.0278181936632718, "grad_norm": 0.14744271337985992, "learning_rate": 3.972189045002787e-06, "loss": 0.8588, "step": 141990 }, { "epoch": 1.0278905803238578, "grad_norm": 0.16065943241119385, "learning_rate": 3.972116658342201e-06, "loss": 0.8532, "step": 142000 }, { "epoch": 1.0279629669844441, "grad_norm": 0.15768252313137054, "learning_rate": 3.972044271681615e-06, "loss": 0.8627, "step": 142010 }, { "epoch": 1.0280353536450304, "grad_norm": 0.16931724548339844, "learning_rate": 3.971971885021029e-06, "loss": 0.8667, "step": 142020 }, { "epoch": 1.0281077403056165, "grad_norm": 0.14971376955509186, "learning_rate": 3.971899498360443e-06, "loss": 0.8592, "step": 142030 }, { "epoch": 1.0281801269662028, "grad_norm": 0.1479533463716507, "learning_rate": 3.971827111699856e-06, "loss": 0.8577, "step": 142040 }, { "epoch": 1.0282525136267888, "grad_norm": 0.15381501615047455, "learning_rate": 3.97175472503927e-06, "loss": 0.8658, "step": 142050 }, { "epoch": 1.028324900287375, "grad_norm": 0.1562289297580719, "learning_rate": 3.971682338378684e-06, "loss": 0.8571, "step": 142060 }, { "epoch": 1.0283972869479612, "grad_norm": 0.15410132706165314, "learning_rate": 3.971609951718098e-06, "loss": 0.8519, "step": 142070 }, { "epoch": 1.0284696736085475, "grad_norm": 0.19577772915363312, "learning_rate": 3.971537565057512e-06, "loss": 0.8558, "step": 142080 }, { "epoch": 1.0285420602691335, "grad_norm": 0.15742763876914978, "learning_rate": 3.971465178396925e-06, "loss": 0.8577, "step": 142090 }, { "epoch": 1.0286144469297198, "grad_norm": 0.16709017753601074, "learning_rate": 3.971392791736339e-06, "loss": 0.865, "step": 142100 }, { "epoch": 1.0286868335903059, "grad_norm": 0.15220534801483154, "learning_rate": 3.971320405075753e-06, "loss": 0.8565, "step": 142110 }, { "epoch": 1.0287592202508922, "grad_norm": 0.18524552881717682, "learning_rate": 3.971248018415167e-06, "loss": 0.8583, "step": 142120 }, { "epoch": 1.0288316069114785, "grad_norm": 0.1528216153383255, "learning_rate": 3.9711756317545806e-06, "loss": 0.8781, "step": 142130 }, { "epoch": 1.0289039935720645, "grad_norm": 0.15787044167518616, "learning_rate": 3.971103245093994e-06, "loss": 0.861, "step": 142140 }, { "epoch": 1.0289763802326508, "grad_norm": 0.15298911929130554, "learning_rate": 3.971030858433409e-06, "loss": 0.8647, "step": 142150 }, { "epoch": 1.0290487668932369, "grad_norm": 0.15209560096263885, "learning_rate": 3.970958471772822e-06, "loss": 0.8553, "step": 142160 }, { "epoch": 1.0291211535538232, "grad_norm": 0.14856834709644318, "learning_rate": 3.970886085112236e-06, "loss": 0.8556, "step": 142170 }, { "epoch": 1.0291935402144092, "grad_norm": 0.14934612810611725, "learning_rate": 3.9708136984516495e-06, "loss": 0.867, "step": 142180 }, { "epoch": 1.0292659268749955, "grad_norm": 0.1754796802997589, "learning_rate": 3.970741311791064e-06, "loss": 0.8633, "step": 142190 }, { "epoch": 1.0293383135355816, "grad_norm": 0.15860222280025482, "learning_rate": 3.9706689251304776e-06, "loss": 0.8635, "step": 142200 }, { "epoch": 1.0294107001961679, "grad_norm": 0.16497546434402466, "learning_rate": 3.970596538469891e-06, "loss": 0.8575, "step": 142210 }, { "epoch": 1.029483086856754, "grad_norm": 0.20866011083126068, "learning_rate": 3.970524151809305e-06, "loss": 0.8699, "step": 142220 }, { "epoch": 1.0295554735173402, "grad_norm": 0.17648346722126007, "learning_rate": 3.970451765148719e-06, "loss": 0.8578, "step": 142230 }, { "epoch": 1.0296278601779265, "grad_norm": 0.1537626087665558, "learning_rate": 3.970379378488132e-06, "loss": 0.8485, "step": 142240 }, { "epoch": 1.0297002468385126, "grad_norm": 0.14427883923053741, "learning_rate": 3.9703069918275465e-06, "loss": 0.856, "step": 142250 }, { "epoch": 1.0297726334990989, "grad_norm": 0.1432482898235321, "learning_rate": 3.97023460516696e-06, "loss": 0.8569, "step": 142260 }, { "epoch": 1.029845020159685, "grad_norm": 0.14573949575424194, "learning_rate": 3.970162218506374e-06, "loss": 0.8622, "step": 142270 }, { "epoch": 1.0299174068202712, "grad_norm": 0.1648380309343338, "learning_rate": 3.970089831845787e-06, "loss": 0.8654, "step": 142280 }, { "epoch": 1.0299897934808573, "grad_norm": 0.16308258473873138, "learning_rate": 3.970017445185201e-06, "loss": 0.8474, "step": 142290 }, { "epoch": 1.0300621801414436, "grad_norm": 0.1557963788509369, "learning_rate": 3.9699450585246154e-06, "loss": 0.8688, "step": 142300 }, { "epoch": 1.0301345668020296, "grad_norm": 0.16031388938426971, "learning_rate": 3.969872671864029e-06, "loss": 0.8593, "step": 142310 }, { "epoch": 1.030206953462616, "grad_norm": 0.17652627825737, "learning_rate": 3.969800285203443e-06, "loss": 0.8639, "step": 142320 }, { "epoch": 1.030279340123202, "grad_norm": 0.1523296982049942, "learning_rate": 3.969727898542856e-06, "loss": 0.8444, "step": 142330 }, { "epoch": 1.0303517267837883, "grad_norm": 0.16042089462280273, "learning_rate": 3.969655511882271e-06, "loss": 0.8583, "step": 142340 }, { "epoch": 1.0304241134443746, "grad_norm": 0.15083900094032288, "learning_rate": 3.969583125221684e-06, "loss": 0.8611, "step": 142350 }, { "epoch": 1.0304965001049606, "grad_norm": 0.16533105075359344, "learning_rate": 3.969510738561098e-06, "loss": 0.8657, "step": 142360 }, { "epoch": 1.030568886765547, "grad_norm": 0.16816429793834686, "learning_rate": 3.969438351900512e-06, "loss": 0.8586, "step": 142370 }, { "epoch": 1.030641273426133, "grad_norm": 0.15156440436840057, "learning_rate": 3.969365965239926e-06, "loss": 0.8609, "step": 142380 }, { "epoch": 1.0307136600867193, "grad_norm": 0.15166370570659637, "learning_rate": 3.96929357857934e-06, "loss": 0.8651, "step": 142390 }, { "epoch": 1.0307860467473053, "grad_norm": 0.15877971053123474, "learning_rate": 3.969221191918753e-06, "loss": 0.8612, "step": 142400 }, { "epoch": 1.0308584334078916, "grad_norm": 0.15745185315608978, "learning_rate": 3.969148805258167e-06, "loss": 0.8582, "step": 142410 }, { "epoch": 1.0309308200684777, "grad_norm": 0.16884928941726685, "learning_rate": 3.969076418597581e-06, "loss": 0.8673, "step": 142420 }, { "epoch": 1.031003206729064, "grad_norm": 0.14777499437332153, "learning_rate": 3.969004031936995e-06, "loss": 0.8598, "step": 142430 }, { "epoch": 1.03107559338965, "grad_norm": 0.17125903069972992, "learning_rate": 3.968931645276409e-06, "loss": 0.8579, "step": 142440 }, { "epoch": 1.0311479800502363, "grad_norm": 0.18040338158607483, "learning_rate": 3.968859258615822e-06, "loss": 0.8556, "step": 142450 }, { "epoch": 1.0312203667108226, "grad_norm": 0.14645922183990479, "learning_rate": 3.968786871955237e-06, "loss": 0.8515, "step": 142460 }, { "epoch": 1.0312927533714087, "grad_norm": 0.17631933093070984, "learning_rate": 3.96871448529465e-06, "loss": 0.8559, "step": 142470 }, { "epoch": 1.031365140031995, "grad_norm": 0.19517827033996582, "learning_rate": 3.968642098634064e-06, "loss": 0.8675, "step": 142480 }, { "epoch": 1.031437526692581, "grad_norm": 0.5200027227401733, "learning_rate": 3.9685697119734775e-06, "loss": 0.8619, "step": 142490 }, { "epoch": 1.0315099133531673, "grad_norm": 0.15519946813583374, "learning_rate": 3.968497325312892e-06, "loss": 0.8543, "step": 142500 }, { "epoch": 1.0315823000137534, "grad_norm": 0.147262305021286, "learning_rate": 3.968424938652306e-06, "loss": 0.8527, "step": 142510 }, { "epoch": 1.0316546866743397, "grad_norm": 0.15960846841335297, "learning_rate": 3.968352551991719e-06, "loss": 0.8642, "step": 142520 }, { "epoch": 1.0317270733349257, "grad_norm": 0.16142988204956055, "learning_rate": 3.968280165331133e-06, "loss": 0.853, "step": 142530 }, { "epoch": 1.031799459995512, "grad_norm": 0.14241138100624084, "learning_rate": 3.968207778670547e-06, "loss": 0.8441, "step": 142540 }, { "epoch": 1.0318718466560983, "grad_norm": 0.15053856372833252, "learning_rate": 3.968135392009961e-06, "loss": 0.8477, "step": 142550 }, { "epoch": 1.0319442333166844, "grad_norm": 0.14978879690170288, "learning_rate": 3.9680630053493745e-06, "loss": 0.852, "step": 142560 }, { "epoch": 1.0320166199772707, "grad_norm": 0.15513886511325836, "learning_rate": 3.967990618688788e-06, "loss": 0.8575, "step": 142570 }, { "epoch": 1.0320890066378567, "grad_norm": 0.14662916958332062, "learning_rate": 3.967918232028203e-06, "loss": 0.8626, "step": 142580 }, { "epoch": 1.032161393298443, "grad_norm": 0.15396258234977722, "learning_rate": 3.967845845367616e-06, "loss": 0.8612, "step": 142590 }, { "epoch": 1.032233779959029, "grad_norm": 0.1600462794303894, "learning_rate": 3.96777345870703e-06, "loss": 0.873, "step": 142600 }, { "epoch": 1.0323061666196154, "grad_norm": 0.15220437943935394, "learning_rate": 3.9677010720464435e-06, "loss": 0.8591, "step": 142610 }, { "epoch": 1.0323785532802014, "grad_norm": 0.14761093258857727, "learning_rate": 3.967628685385858e-06, "loss": 0.8484, "step": 142620 }, { "epoch": 1.0324509399407877, "grad_norm": 0.15190240740776062, "learning_rate": 3.9675562987252715e-06, "loss": 0.8556, "step": 142630 }, { "epoch": 1.0325233266013738, "grad_norm": 0.15413150191307068, "learning_rate": 3.967483912064685e-06, "loss": 0.8524, "step": 142640 }, { "epoch": 1.03259571326196, "grad_norm": 0.16066095232963562, "learning_rate": 3.967411525404099e-06, "loss": 0.8588, "step": 142650 }, { "epoch": 1.0326680999225464, "grad_norm": 0.1533208042383194, "learning_rate": 3.967339138743513e-06, "loss": 0.8539, "step": 142660 }, { "epoch": 1.0327404865831324, "grad_norm": 0.1481548249721527, "learning_rate": 3.967266752082927e-06, "loss": 0.8609, "step": 142670 }, { "epoch": 1.0328128732437187, "grad_norm": 0.16002528369426727, "learning_rate": 3.9671943654223405e-06, "loss": 0.8602, "step": 142680 }, { "epoch": 1.0328852599043048, "grad_norm": 0.14188747107982635, "learning_rate": 3.967121978761754e-06, "loss": 0.8648, "step": 142690 }, { "epoch": 1.032957646564891, "grad_norm": 0.1650628298521042, "learning_rate": 3.9670495921011685e-06, "loss": 0.8579, "step": 142700 }, { "epoch": 1.0330300332254772, "grad_norm": 0.15831433236598969, "learning_rate": 3.966977205440582e-06, "loss": 0.8578, "step": 142710 }, { "epoch": 1.0331024198860634, "grad_norm": 0.14619334042072296, "learning_rate": 3.966904818779996e-06, "loss": 0.8735, "step": 142720 }, { "epoch": 1.0331748065466495, "grad_norm": 0.1563662737607956, "learning_rate": 3.966832432119409e-06, "loss": 0.8541, "step": 142730 }, { "epoch": 1.0332471932072358, "grad_norm": 0.155498206615448, "learning_rate": 3.966760045458823e-06, "loss": 0.8615, "step": 142740 }, { "epoch": 1.0333195798678219, "grad_norm": 0.1640671342611313, "learning_rate": 3.9666876587982375e-06, "loss": 0.8612, "step": 142750 }, { "epoch": 1.0333919665284081, "grad_norm": 0.15346024930477142, "learning_rate": 3.966615272137651e-06, "loss": 0.8689, "step": 142760 }, { "epoch": 1.0334643531889944, "grad_norm": 0.14893843233585358, "learning_rate": 3.966542885477065e-06, "loss": 0.8617, "step": 142770 }, { "epoch": 1.0335367398495805, "grad_norm": 0.1711651086807251, "learning_rate": 3.966470498816478e-06, "loss": 0.8623, "step": 142780 }, { "epoch": 1.0336091265101668, "grad_norm": 0.1499413400888443, "learning_rate": 3.966398112155892e-06, "loss": 0.8654, "step": 142790 }, { "epoch": 1.0336815131707529, "grad_norm": 0.16550101339817047, "learning_rate": 3.9663257254953056e-06, "loss": 0.8597, "step": 142800 }, { "epoch": 1.0337538998313391, "grad_norm": 0.15514644980430603, "learning_rate": 3.96625333883472e-06, "loss": 0.8546, "step": 142810 }, { "epoch": 1.0338262864919252, "grad_norm": 0.15630175173282623, "learning_rate": 3.966180952174134e-06, "loss": 0.8613, "step": 142820 }, { "epoch": 1.0338986731525115, "grad_norm": 0.15911681950092316, "learning_rate": 3.966108565513547e-06, "loss": 0.8679, "step": 142830 }, { "epoch": 1.0339710598130976, "grad_norm": 0.14707563817501068, "learning_rate": 3.966036178852961e-06, "loss": 0.8566, "step": 142840 }, { "epoch": 1.0340434464736838, "grad_norm": 0.15566620230674744, "learning_rate": 3.965963792192375e-06, "loss": 0.8664, "step": 142850 }, { "epoch": 1.03411583313427, "grad_norm": 0.16189688444137573, "learning_rate": 3.965891405531789e-06, "loss": 0.8587, "step": 142860 }, { "epoch": 1.0341882197948562, "grad_norm": 0.1611325591802597, "learning_rate": 3.9658190188712026e-06, "loss": 0.8551, "step": 142870 }, { "epoch": 1.0342606064554425, "grad_norm": 0.15380004048347473, "learning_rate": 3.965746632210616e-06, "loss": 0.8503, "step": 142880 }, { "epoch": 1.0343329931160286, "grad_norm": 0.15085004270076752, "learning_rate": 3.96567424555003e-06, "loss": 0.8579, "step": 142890 }, { "epoch": 1.0344053797766148, "grad_norm": 0.15508107841014862, "learning_rate": 3.965601858889444e-06, "loss": 0.8644, "step": 142900 }, { "epoch": 1.034477766437201, "grad_norm": 0.16797524690628052, "learning_rate": 3.965529472228858e-06, "loss": 0.8579, "step": 142910 }, { "epoch": 1.0345501530977872, "grad_norm": 0.16412454843521118, "learning_rate": 3.9654570855682715e-06, "loss": 0.8653, "step": 142920 }, { "epoch": 1.0346225397583733, "grad_norm": 0.1560967117547989, "learning_rate": 3.965384698907685e-06, "loss": 0.8617, "step": 142930 }, { "epoch": 1.0346949264189595, "grad_norm": 0.1702379286289215, "learning_rate": 3.9653123122470996e-06, "loss": 0.8819, "step": 142940 }, { "epoch": 1.0347673130795456, "grad_norm": 0.16101233661174774, "learning_rate": 3.965239925586513e-06, "loss": 0.8702, "step": 142950 }, { "epoch": 1.034839699740132, "grad_norm": 0.1507912427186966, "learning_rate": 3.965167538925927e-06, "loss": 0.8644, "step": 142960 }, { "epoch": 1.034912086400718, "grad_norm": 0.14601871371269226, "learning_rate": 3.96509515226534e-06, "loss": 0.8614, "step": 142970 }, { "epoch": 1.0349844730613043, "grad_norm": 0.1494961827993393, "learning_rate": 3.965022765604755e-06, "loss": 0.8673, "step": 142980 }, { "epoch": 1.0350568597218905, "grad_norm": 0.1469757854938507, "learning_rate": 3.9649503789441685e-06, "loss": 0.8586, "step": 142990 }, { "epoch": 1.0351292463824766, "grad_norm": 0.1592247635126114, "learning_rate": 3.964877992283582e-06, "loss": 0.8657, "step": 143000 }, { "epoch": 1.035201633043063, "grad_norm": 0.1555202305316925, "learning_rate": 3.964805605622996e-06, "loss": 0.8588, "step": 143010 }, { "epoch": 1.035274019703649, "grad_norm": 0.15788942575454712, "learning_rate": 3.96473321896241e-06, "loss": 0.8612, "step": 143020 }, { "epoch": 1.0353464063642352, "grad_norm": 0.15312904119491577, "learning_rate": 3.964660832301824e-06, "loss": 0.8609, "step": 143030 }, { "epoch": 1.0354187930248213, "grad_norm": 0.1483820229768753, "learning_rate": 3.9645884456412374e-06, "loss": 0.8604, "step": 143040 }, { "epoch": 1.0354911796854076, "grad_norm": 0.15752215683460236, "learning_rate": 3.964516058980651e-06, "loss": 0.8596, "step": 143050 }, { "epoch": 1.0355635663459937, "grad_norm": 0.15568195283412933, "learning_rate": 3.9644436723200655e-06, "loss": 0.8665, "step": 143060 }, { "epoch": 1.03563595300658, "grad_norm": 0.14673691987991333, "learning_rate": 3.964371285659479e-06, "loss": 0.8612, "step": 143070 }, { "epoch": 1.035708339667166, "grad_norm": 0.15292707085609436, "learning_rate": 3.964298898998893e-06, "loss": 0.8511, "step": 143080 }, { "epoch": 1.0357807263277523, "grad_norm": 0.1723155379295349, "learning_rate": 3.964226512338306e-06, "loss": 0.8634, "step": 143090 }, { "epoch": 1.0358531129883386, "grad_norm": 0.15540607273578644, "learning_rate": 3.964154125677721e-06, "loss": 0.8704, "step": 143100 }, { "epoch": 1.0359254996489247, "grad_norm": 0.14979644119739532, "learning_rate": 3.9640817390171344e-06, "loss": 0.8563, "step": 143110 }, { "epoch": 1.035997886309511, "grad_norm": 0.1446203589439392, "learning_rate": 3.964009352356548e-06, "loss": 0.8557, "step": 143120 }, { "epoch": 1.036070272970097, "grad_norm": 0.15356165170669556, "learning_rate": 3.963936965695962e-06, "loss": 0.8653, "step": 143130 }, { "epoch": 1.0361426596306833, "grad_norm": 0.1651306450366974, "learning_rate": 3.963864579035376e-06, "loss": 0.857, "step": 143140 }, { "epoch": 1.0362150462912694, "grad_norm": 0.16041286289691925, "learning_rate": 3.96379219237479e-06, "loss": 0.8544, "step": 143150 }, { "epoch": 1.0362874329518557, "grad_norm": 0.14968006312847137, "learning_rate": 3.963719805714203e-06, "loss": 0.8596, "step": 143160 }, { "epoch": 1.0363598196124417, "grad_norm": 0.1907356232404709, "learning_rate": 3.963647419053617e-06, "loss": 0.8688, "step": 143170 }, { "epoch": 1.036432206273028, "grad_norm": 0.15427495539188385, "learning_rate": 3.9635750323930314e-06, "loss": 0.8584, "step": 143180 }, { "epoch": 1.0365045929336143, "grad_norm": 0.15541379153728485, "learning_rate": 3.963502645732445e-06, "loss": 0.8563, "step": 143190 }, { "epoch": 1.0365769795942004, "grad_norm": 0.14748956263065338, "learning_rate": 3.963430259071859e-06, "loss": 0.8564, "step": 143200 }, { "epoch": 1.0366493662547867, "grad_norm": 0.15645940601825714, "learning_rate": 3.963357872411272e-06, "loss": 0.8467, "step": 143210 }, { "epoch": 1.0367217529153727, "grad_norm": 0.21893535554409027, "learning_rate": 3.963285485750687e-06, "loss": 0.8673, "step": 143220 }, { "epoch": 1.036794139575959, "grad_norm": 0.15868252515792847, "learning_rate": 3.9632130990901e-06, "loss": 0.8582, "step": 143230 }, { "epoch": 1.036866526236545, "grad_norm": 0.1767452508211136, "learning_rate": 3.963140712429514e-06, "loss": 0.8549, "step": 143240 }, { "epoch": 1.0369389128971314, "grad_norm": 0.14982333779335022, "learning_rate": 3.963068325768928e-06, "loss": 0.8538, "step": 143250 }, { "epoch": 1.0370112995577174, "grad_norm": 0.1804773211479187, "learning_rate": 3.962995939108342e-06, "loss": 0.8644, "step": 143260 }, { "epoch": 1.0370836862183037, "grad_norm": 0.15341947972774506, "learning_rate": 3.962923552447756e-06, "loss": 0.8605, "step": 143270 }, { "epoch": 1.0371560728788898, "grad_norm": 0.15054786205291748, "learning_rate": 3.962851165787169e-06, "loss": 0.8607, "step": 143280 }, { "epoch": 1.037228459539476, "grad_norm": 0.15896694362163544, "learning_rate": 3.962778779126583e-06, "loss": 0.845, "step": 143290 }, { "epoch": 1.0373008462000624, "grad_norm": 0.1585065871477127, "learning_rate": 3.9627063924659965e-06, "loss": 0.8481, "step": 143300 }, { "epoch": 1.0373732328606484, "grad_norm": 0.1433423012495041, "learning_rate": 3.96263400580541e-06, "loss": 0.8576, "step": 143310 }, { "epoch": 1.0374456195212347, "grad_norm": 0.14972993731498718, "learning_rate": 3.962561619144824e-06, "loss": 0.8564, "step": 143320 }, { "epoch": 1.0375180061818208, "grad_norm": 0.1547120362520218, "learning_rate": 3.962489232484238e-06, "loss": 0.8754, "step": 143330 }, { "epoch": 1.037590392842407, "grad_norm": 0.15354642271995544, "learning_rate": 3.962416845823652e-06, "loss": 0.8516, "step": 143340 }, { "epoch": 1.0376627795029931, "grad_norm": 0.15381920337677002, "learning_rate": 3.9623444591630655e-06, "loss": 0.8588, "step": 143350 }, { "epoch": 1.0377351661635794, "grad_norm": 0.1736004650592804, "learning_rate": 3.962272072502479e-06, "loss": 0.863, "step": 143360 }, { "epoch": 1.0378075528241655, "grad_norm": 0.1495305895805359, "learning_rate": 3.9621996858418935e-06, "loss": 0.861, "step": 143370 }, { "epoch": 1.0378799394847518, "grad_norm": 0.16278128325939178, "learning_rate": 3.962127299181307e-06, "loss": 0.8673, "step": 143380 }, { "epoch": 1.0379523261453378, "grad_norm": 0.16452762484550476, "learning_rate": 3.962054912520721e-06, "loss": 0.8692, "step": 143390 }, { "epoch": 1.0380247128059241, "grad_norm": 0.15840129554271698, "learning_rate": 3.961982525860134e-06, "loss": 0.8589, "step": 143400 }, { "epoch": 1.0380970994665104, "grad_norm": 0.14838318526744843, "learning_rate": 3.961910139199549e-06, "loss": 0.8547, "step": 143410 }, { "epoch": 1.0381694861270965, "grad_norm": 0.16569004952907562, "learning_rate": 3.9618377525389625e-06, "loss": 0.847, "step": 143420 }, { "epoch": 1.0382418727876828, "grad_norm": 0.1536937952041626, "learning_rate": 3.961765365878376e-06, "loss": 0.8547, "step": 143430 }, { "epoch": 1.0383142594482688, "grad_norm": 0.16339300572872162, "learning_rate": 3.96169297921779e-06, "loss": 0.8637, "step": 143440 }, { "epoch": 1.0383866461088551, "grad_norm": 0.1539309173822403, "learning_rate": 3.961620592557204e-06, "loss": 0.8667, "step": 143450 }, { "epoch": 1.0384590327694412, "grad_norm": 0.16559149324893951, "learning_rate": 3.961548205896618e-06, "loss": 0.8595, "step": 143460 }, { "epoch": 1.0385314194300275, "grad_norm": 0.1649179905653, "learning_rate": 3.961475819236031e-06, "loss": 0.8685, "step": 143470 }, { "epoch": 1.0386038060906135, "grad_norm": 0.15256711840629578, "learning_rate": 3.961403432575445e-06, "loss": 0.856, "step": 143480 }, { "epoch": 1.0386761927511998, "grad_norm": 0.1560206562280655, "learning_rate": 3.9613310459148595e-06, "loss": 0.8654, "step": 143490 }, { "epoch": 1.038748579411786, "grad_norm": 0.16528300940990448, "learning_rate": 3.961258659254273e-06, "loss": 0.8678, "step": 143500 }, { "epoch": 1.0388209660723722, "grad_norm": 0.1495116949081421, "learning_rate": 3.961186272593687e-06, "loss": 0.8661, "step": 143510 }, { "epoch": 1.0388933527329585, "grad_norm": 0.16050077974796295, "learning_rate": 3.9611138859331e-06, "loss": 0.8632, "step": 143520 }, { "epoch": 1.0389657393935445, "grad_norm": 0.14846397936344147, "learning_rate": 3.961041499272514e-06, "loss": 0.8513, "step": 143530 }, { "epoch": 1.0390381260541308, "grad_norm": 0.15063372254371643, "learning_rate": 3.960969112611928e-06, "loss": 0.8696, "step": 143540 }, { "epoch": 1.0391105127147169, "grad_norm": 0.15840241312980652, "learning_rate": 3.960896725951342e-06, "loss": 0.8648, "step": 143550 }, { "epoch": 1.0391828993753032, "grad_norm": 0.22342956066131592, "learning_rate": 3.960824339290756e-06, "loss": 0.8598, "step": 143560 }, { "epoch": 1.0392552860358892, "grad_norm": 0.15167798101902008, "learning_rate": 3.960751952630169e-06, "loss": 0.8531, "step": 143570 }, { "epoch": 1.0393276726964755, "grad_norm": 0.158100888133049, "learning_rate": 3.960679565969584e-06, "loss": 0.8568, "step": 143580 }, { "epoch": 1.0394000593570616, "grad_norm": 0.1609964668750763, "learning_rate": 3.960607179308997e-06, "loss": 0.8516, "step": 143590 }, { "epoch": 1.0394724460176479, "grad_norm": 0.21195641160011292, "learning_rate": 3.960534792648411e-06, "loss": 0.8686, "step": 143600 }, { "epoch": 1.0395448326782342, "grad_norm": 0.1418725848197937, "learning_rate": 3.9604624059878246e-06, "loss": 0.8644, "step": 143610 }, { "epoch": 1.0396172193388202, "grad_norm": 0.15254880487918854, "learning_rate": 3.960390019327239e-06, "loss": 0.8557, "step": 143620 }, { "epoch": 1.0396896059994065, "grad_norm": 0.1374930441379547, "learning_rate": 3.960317632666653e-06, "loss": 0.8588, "step": 143630 }, { "epoch": 1.0397619926599926, "grad_norm": 0.1622588187456131, "learning_rate": 3.960245246006066e-06, "loss": 0.8537, "step": 143640 }, { "epoch": 1.0398343793205789, "grad_norm": 0.1530616283416748, "learning_rate": 3.96017285934548e-06, "loss": 0.863, "step": 143650 }, { "epoch": 1.039906765981165, "grad_norm": 0.16928766667842865, "learning_rate": 3.960100472684894e-06, "loss": 0.8646, "step": 143660 }, { "epoch": 1.0399791526417512, "grad_norm": 0.15035808086395264, "learning_rate": 3.960028086024308e-06, "loss": 0.8539, "step": 143670 }, { "epoch": 1.0400515393023373, "grad_norm": 0.1483323574066162, "learning_rate": 3.9599556993637216e-06, "loss": 0.8753, "step": 143680 }, { "epoch": 1.0401239259629236, "grad_norm": 0.15935401618480682, "learning_rate": 3.959883312703135e-06, "loss": 0.8563, "step": 143690 }, { "epoch": 1.0401963126235096, "grad_norm": 0.15291538834571838, "learning_rate": 3.95981092604255e-06, "loss": 0.8726, "step": 143700 }, { "epoch": 1.040268699284096, "grad_norm": 0.14946019649505615, "learning_rate": 3.959738539381963e-06, "loss": 0.8623, "step": 143710 }, { "epoch": 1.0403410859446822, "grad_norm": 0.13929761946201324, "learning_rate": 3.959666152721377e-06, "loss": 0.8612, "step": 143720 }, { "epoch": 1.0404134726052683, "grad_norm": 0.1517852246761322, "learning_rate": 3.9595937660607905e-06, "loss": 0.8605, "step": 143730 }, { "epoch": 1.0404858592658546, "grad_norm": 0.14445209503173828, "learning_rate": 3.959521379400205e-06, "loss": 0.8802, "step": 143740 }, { "epoch": 1.0405582459264406, "grad_norm": 0.1581372618675232, "learning_rate": 3.959448992739619e-06, "loss": 0.8513, "step": 143750 }, { "epoch": 1.040630632587027, "grad_norm": 0.16304130852222443, "learning_rate": 3.959376606079032e-06, "loss": 0.869, "step": 143760 }, { "epoch": 1.040703019247613, "grad_norm": 0.17016209661960602, "learning_rate": 3.959304219418446e-06, "loss": 0.8516, "step": 143770 }, { "epoch": 1.0407754059081993, "grad_norm": 0.1455002725124359, "learning_rate": 3.95923183275786e-06, "loss": 0.8541, "step": 143780 }, { "epoch": 1.0408477925687853, "grad_norm": 0.3031005263328552, "learning_rate": 3.959159446097274e-06, "loss": 0.8615, "step": 143790 }, { "epoch": 1.0409201792293716, "grad_norm": 0.15537060797214508, "learning_rate": 3.9590870594366875e-06, "loss": 0.8534, "step": 143800 }, { "epoch": 1.0409925658899577, "grad_norm": 0.18739604949951172, "learning_rate": 3.959014672776101e-06, "loss": 0.8595, "step": 143810 }, { "epoch": 1.041064952550544, "grad_norm": 0.15703223645687103, "learning_rate": 3.958942286115516e-06, "loss": 0.8672, "step": 143820 }, { "epoch": 1.0411373392111303, "grad_norm": 0.15714220702648163, "learning_rate": 3.958869899454928e-06, "loss": 0.8801, "step": 143830 }, { "epoch": 1.0412097258717163, "grad_norm": 0.14824450016021729, "learning_rate": 3.958797512794342e-06, "loss": 0.8673, "step": 143840 }, { "epoch": 1.0412821125323026, "grad_norm": 0.14315825700759888, "learning_rate": 3.9587251261337564e-06, "loss": 0.8553, "step": 143850 }, { "epoch": 1.0413544991928887, "grad_norm": 0.1702081710100174, "learning_rate": 3.95865273947317e-06, "loss": 0.8644, "step": 143860 }, { "epoch": 1.041426885853475, "grad_norm": 0.18592719733715057, "learning_rate": 3.958580352812584e-06, "loss": 0.865, "step": 143870 }, { "epoch": 1.041499272514061, "grad_norm": 0.15795159339904785, "learning_rate": 3.958507966151997e-06, "loss": 0.8566, "step": 143880 }, { "epoch": 1.0415716591746473, "grad_norm": 0.16113890707492828, "learning_rate": 3.958435579491412e-06, "loss": 0.8577, "step": 143890 }, { "epoch": 1.0416440458352334, "grad_norm": 0.29650846123695374, "learning_rate": 3.958363192830825e-06, "loss": 0.8594, "step": 143900 }, { "epoch": 1.0417164324958197, "grad_norm": 0.14694488048553467, "learning_rate": 3.958290806170239e-06, "loss": 0.8693, "step": 143910 }, { "epoch": 1.0417888191564058, "grad_norm": 0.15277814865112305, "learning_rate": 3.958218419509653e-06, "loss": 0.8697, "step": 143920 }, { "epoch": 1.041861205816992, "grad_norm": 0.16288557648658752, "learning_rate": 3.958146032849067e-06, "loss": 0.8669, "step": 143930 }, { "epoch": 1.0419335924775783, "grad_norm": 0.14526166021823883, "learning_rate": 3.958073646188481e-06, "loss": 0.8649, "step": 143940 }, { "epoch": 1.0420059791381644, "grad_norm": 0.15091979503631592, "learning_rate": 3.958001259527894e-06, "loss": 0.859, "step": 143950 }, { "epoch": 1.0420783657987507, "grad_norm": 0.14309091866016388, "learning_rate": 3.957928872867308e-06, "loss": 0.8614, "step": 143960 }, { "epoch": 1.0421507524593367, "grad_norm": 0.15367577970027924, "learning_rate": 3.957856486206722e-06, "loss": 0.8606, "step": 143970 }, { "epoch": 1.042223139119923, "grad_norm": 0.15176285803318024, "learning_rate": 3.957784099546136e-06, "loss": 0.8542, "step": 143980 }, { "epoch": 1.042295525780509, "grad_norm": 0.1759510636329651, "learning_rate": 3.95771171288555e-06, "loss": 0.858, "step": 143990 }, { "epoch": 1.0423679124410954, "grad_norm": 0.17628808319568634, "learning_rate": 3.957639326224963e-06, "loss": 0.8684, "step": 144000 }, { "epoch": 1.0424402991016815, "grad_norm": 0.14174871146678925, "learning_rate": 3.957566939564378e-06, "loss": 0.8565, "step": 144010 }, { "epoch": 1.0425126857622677, "grad_norm": 0.14700306951999664, "learning_rate": 3.957494552903791e-06, "loss": 0.8608, "step": 144020 }, { "epoch": 1.0425850724228538, "grad_norm": 0.14982403814792633, "learning_rate": 3.957422166243205e-06, "loss": 0.8552, "step": 144030 }, { "epoch": 1.04265745908344, "grad_norm": 0.1603231132030487, "learning_rate": 3.9573497795826185e-06, "loss": 0.8628, "step": 144040 }, { "epoch": 1.0427298457440264, "grad_norm": 0.173030823469162, "learning_rate": 3.957277392922033e-06, "loss": 0.8695, "step": 144050 }, { "epoch": 1.0428022324046125, "grad_norm": 0.18161053955554962, "learning_rate": 3.957205006261447e-06, "loss": 0.8633, "step": 144060 }, { "epoch": 1.0428746190651987, "grad_norm": 0.15263399481773376, "learning_rate": 3.95713261960086e-06, "loss": 0.8574, "step": 144070 }, { "epoch": 1.0429470057257848, "grad_norm": 0.15920861065387726, "learning_rate": 3.957060232940274e-06, "loss": 0.8629, "step": 144080 }, { "epoch": 1.043019392386371, "grad_norm": 0.15555734932422638, "learning_rate": 3.956987846279688e-06, "loss": 0.8768, "step": 144090 }, { "epoch": 1.0430917790469572, "grad_norm": 0.1550634801387787, "learning_rate": 3.956915459619102e-06, "loss": 0.8533, "step": 144100 }, { "epoch": 1.0431641657075434, "grad_norm": 0.14551453292369843, "learning_rate": 3.9568430729585155e-06, "loss": 0.8579, "step": 144110 }, { "epoch": 1.0432365523681295, "grad_norm": 0.14766032993793488, "learning_rate": 3.956770686297929e-06, "loss": 0.8678, "step": 144120 }, { "epoch": 1.0433089390287158, "grad_norm": 0.15198858082294464, "learning_rate": 3.956698299637344e-06, "loss": 0.8638, "step": 144130 }, { "epoch": 1.0433813256893019, "grad_norm": 0.16540324687957764, "learning_rate": 3.956625912976757e-06, "loss": 0.8662, "step": 144140 }, { "epoch": 1.0434537123498882, "grad_norm": 0.14517223834991455, "learning_rate": 3.956553526316171e-06, "loss": 0.8454, "step": 144150 }, { "epoch": 1.0435260990104744, "grad_norm": 0.15670832991600037, "learning_rate": 3.9564811396555845e-06, "loss": 0.8574, "step": 144160 }, { "epoch": 1.0435984856710605, "grad_norm": 0.16372807323932648, "learning_rate": 3.956408752994998e-06, "loss": 0.8549, "step": 144170 }, { "epoch": 1.0436708723316468, "grad_norm": 0.157616525888443, "learning_rate": 3.9563363663344125e-06, "loss": 0.844, "step": 144180 }, { "epoch": 1.0437432589922329, "grad_norm": 0.14867444336414337, "learning_rate": 3.956263979673826e-06, "loss": 0.8581, "step": 144190 }, { "epoch": 1.0438156456528191, "grad_norm": 0.15793468058109283, "learning_rate": 3.95619159301324e-06, "loss": 0.8503, "step": 144200 }, { "epoch": 1.0438880323134052, "grad_norm": 0.14294303953647614, "learning_rate": 3.956119206352653e-06, "loss": 0.8643, "step": 144210 }, { "epoch": 1.0439604189739915, "grad_norm": 0.15803156793117523, "learning_rate": 3.956046819692068e-06, "loss": 0.8626, "step": 144220 }, { "epoch": 1.0440328056345776, "grad_norm": 0.16307860612869263, "learning_rate": 3.9559744330314815e-06, "loss": 0.8663, "step": 144230 }, { "epoch": 1.0441051922951639, "grad_norm": 0.14012962579727173, "learning_rate": 3.955902046370895e-06, "loss": 0.8566, "step": 144240 }, { "epoch": 1.0441775789557501, "grad_norm": 0.19442588090896606, "learning_rate": 3.955829659710309e-06, "loss": 0.8765, "step": 144250 }, { "epoch": 1.0442499656163362, "grad_norm": 0.15497778356075287, "learning_rate": 3.955757273049723e-06, "loss": 0.8596, "step": 144260 }, { "epoch": 1.0443223522769225, "grad_norm": 0.15223382413387299, "learning_rate": 3.955684886389137e-06, "loss": 0.8815, "step": 144270 }, { "epoch": 1.0443947389375086, "grad_norm": 0.15136075019836426, "learning_rate": 3.95561249972855e-06, "loss": 0.8648, "step": 144280 }, { "epoch": 1.0444671255980948, "grad_norm": 0.15024292469024658, "learning_rate": 3.955540113067964e-06, "loss": 0.86, "step": 144290 }, { "epoch": 1.044539512258681, "grad_norm": 0.1588437706232071, "learning_rate": 3.9554677264073785e-06, "loss": 0.8607, "step": 144300 }, { "epoch": 1.0446118989192672, "grad_norm": 0.15004202723503113, "learning_rate": 3.955395339746792e-06, "loss": 0.8609, "step": 144310 }, { "epoch": 1.0446842855798533, "grad_norm": 0.14561429619789124, "learning_rate": 3.955322953086206e-06, "loss": 0.8673, "step": 144320 }, { "epoch": 1.0447566722404396, "grad_norm": 0.1565464287996292, "learning_rate": 3.955250566425619e-06, "loss": 0.8591, "step": 144330 }, { "epoch": 1.0448290589010256, "grad_norm": 0.1545923948287964, "learning_rate": 3.955178179765034e-06, "loss": 0.8445, "step": 144340 }, { "epoch": 1.044901445561612, "grad_norm": 0.15572689473628998, "learning_rate": 3.955105793104447e-06, "loss": 0.8552, "step": 144350 }, { "epoch": 1.0449738322221982, "grad_norm": 0.1447010338306427, "learning_rate": 3.95503340644386e-06, "loss": 0.8587, "step": 144360 }, { "epoch": 1.0450462188827843, "grad_norm": 0.15390467643737793, "learning_rate": 3.954961019783275e-06, "loss": 0.8602, "step": 144370 }, { "epoch": 1.0451186055433705, "grad_norm": 0.16179361939430237, "learning_rate": 3.954888633122688e-06, "loss": 0.8546, "step": 144380 }, { "epoch": 1.0451909922039566, "grad_norm": 0.1796809881925583, "learning_rate": 3.954816246462102e-06, "loss": 0.8601, "step": 144390 }, { "epoch": 1.045263378864543, "grad_norm": 0.16800828278064728, "learning_rate": 3.9547438598015155e-06, "loss": 0.8624, "step": 144400 }, { "epoch": 1.045335765525129, "grad_norm": 0.14844734966754913, "learning_rate": 3.95467147314093e-06, "loss": 0.8666, "step": 144410 }, { "epoch": 1.0454081521857153, "grad_norm": 0.1545286625623703, "learning_rate": 3.9545990864803436e-06, "loss": 0.8598, "step": 144420 }, { "epoch": 1.0454805388463013, "grad_norm": 0.1570822298526764, "learning_rate": 3.954526699819757e-06, "loss": 0.8608, "step": 144430 }, { "epoch": 1.0455529255068876, "grad_norm": 0.14228971302509308, "learning_rate": 3.954454313159171e-06, "loss": 0.8523, "step": 144440 }, { "epoch": 1.0456253121674737, "grad_norm": 0.14609818160533905, "learning_rate": 3.954381926498585e-06, "loss": 0.8584, "step": 144450 }, { "epoch": 1.04569769882806, "grad_norm": 0.1522809863090515, "learning_rate": 3.954309539837999e-06, "loss": 0.8569, "step": 144460 }, { "epoch": 1.0457700854886463, "grad_norm": 0.2182849943637848, "learning_rate": 3.9542371531774125e-06, "loss": 0.8412, "step": 144470 }, { "epoch": 1.0458424721492323, "grad_norm": 0.148259699344635, "learning_rate": 3.954164766516826e-06, "loss": 0.8521, "step": 144480 }, { "epoch": 1.0459148588098186, "grad_norm": 0.14706644415855408, "learning_rate": 3.954092379856241e-06, "loss": 0.8622, "step": 144490 }, { "epoch": 1.0459872454704047, "grad_norm": 0.15702740848064423, "learning_rate": 3.954019993195654e-06, "loss": 0.8563, "step": 144500 }, { "epoch": 1.046059632130991, "grad_norm": 0.1426214873790741, "learning_rate": 3.953947606535068e-06, "loss": 0.8628, "step": 144510 }, { "epoch": 1.046132018791577, "grad_norm": 0.14611200988292694, "learning_rate": 3.9538752198744814e-06, "loss": 0.8544, "step": 144520 }, { "epoch": 1.0462044054521633, "grad_norm": 0.15756775438785553, "learning_rate": 3.953802833213896e-06, "loss": 0.8568, "step": 144530 }, { "epoch": 1.0462767921127494, "grad_norm": 0.15759259462356567, "learning_rate": 3.9537304465533095e-06, "loss": 0.8634, "step": 144540 }, { "epoch": 1.0463491787733357, "grad_norm": 0.15958790481090546, "learning_rate": 3.953658059892723e-06, "loss": 0.8763, "step": 144550 }, { "epoch": 1.0464215654339217, "grad_norm": 0.15510474145412445, "learning_rate": 3.953585673232137e-06, "loss": 0.873, "step": 144560 }, { "epoch": 1.046493952094508, "grad_norm": 0.1597568690776825, "learning_rate": 3.953513286571551e-06, "loss": 0.8488, "step": 144570 }, { "epoch": 1.0465663387550943, "grad_norm": 0.14834840595722198, "learning_rate": 3.953440899910965e-06, "loss": 0.8545, "step": 144580 }, { "epoch": 1.0466387254156804, "grad_norm": 0.13909775018692017, "learning_rate": 3.9533685132503784e-06, "loss": 0.8478, "step": 144590 }, { "epoch": 1.0467111120762667, "grad_norm": 0.14474329352378845, "learning_rate": 3.953296126589792e-06, "loss": 0.8699, "step": 144600 }, { "epoch": 1.0467834987368527, "grad_norm": 0.16412761807441711, "learning_rate": 3.9532237399292065e-06, "loss": 0.8616, "step": 144610 }, { "epoch": 1.046855885397439, "grad_norm": 0.1485072374343872, "learning_rate": 3.95315135326862e-06, "loss": 0.8594, "step": 144620 }, { "epoch": 1.046928272058025, "grad_norm": 0.1748332530260086, "learning_rate": 3.953078966608034e-06, "loss": 0.8552, "step": 144630 }, { "epoch": 1.0470006587186114, "grad_norm": 0.15388593077659607, "learning_rate": 3.953006579947447e-06, "loss": 0.8633, "step": 144640 }, { "epoch": 1.0470730453791974, "grad_norm": 0.1663622111082077, "learning_rate": 3.952934193286862e-06, "loss": 0.8675, "step": 144650 }, { "epoch": 1.0471454320397837, "grad_norm": 0.1697302758693695, "learning_rate": 3.9528618066262754e-06, "loss": 0.8561, "step": 144660 }, { "epoch": 1.04721781870037, "grad_norm": 0.15296570956707, "learning_rate": 3.952789419965689e-06, "loss": 0.8699, "step": 144670 }, { "epoch": 1.047290205360956, "grad_norm": 0.1404290646314621, "learning_rate": 3.952717033305103e-06, "loss": 0.8566, "step": 144680 }, { "epoch": 1.0473625920215424, "grad_norm": 0.16811597347259521, "learning_rate": 3.952644646644517e-06, "loss": 0.8771, "step": 144690 }, { "epoch": 1.0474349786821284, "grad_norm": 0.15282821655273438, "learning_rate": 3.952572259983931e-06, "loss": 0.8612, "step": 144700 }, { "epoch": 1.0475073653427147, "grad_norm": 0.16427479684352875, "learning_rate": 3.952499873323344e-06, "loss": 0.8633, "step": 144710 }, { "epoch": 1.0475797520033008, "grad_norm": 0.16405102610588074, "learning_rate": 3.952427486662758e-06, "loss": 0.8652, "step": 144720 }, { "epoch": 1.047652138663887, "grad_norm": 0.14191152155399323, "learning_rate": 3.9523551000021724e-06, "loss": 0.8628, "step": 144730 }, { "epoch": 1.0477245253244731, "grad_norm": 0.15622809529304504, "learning_rate": 3.952282713341586e-06, "loss": 0.8697, "step": 144740 }, { "epoch": 1.0477969119850594, "grad_norm": 0.15117841958999634, "learning_rate": 3.952210326681e-06, "loss": 0.8628, "step": 144750 }, { "epoch": 1.0478692986456455, "grad_norm": 0.14459557831287384, "learning_rate": 3.952137940020413e-06, "loss": 0.861, "step": 144760 }, { "epoch": 1.0479416853062318, "grad_norm": 0.14188896119594574, "learning_rate": 3.952065553359827e-06, "loss": 0.8772, "step": 144770 }, { "epoch": 1.048014071966818, "grad_norm": 0.16736344993114471, "learning_rate": 3.951993166699241e-06, "loss": 0.8569, "step": 144780 }, { "epoch": 1.0480864586274041, "grad_norm": 0.16879191994667053, "learning_rate": 3.951920780038655e-06, "loss": 0.8496, "step": 144790 }, { "epoch": 1.0481588452879904, "grad_norm": 0.16716881096363068, "learning_rate": 3.951848393378069e-06, "loss": 0.8536, "step": 144800 }, { "epoch": 1.0482312319485765, "grad_norm": 0.14509662985801697, "learning_rate": 3.951776006717482e-06, "loss": 0.8582, "step": 144810 }, { "epoch": 1.0483036186091628, "grad_norm": 0.16360674798488617, "learning_rate": 3.951703620056897e-06, "loss": 0.8664, "step": 144820 }, { "epoch": 1.0483760052697488, "grad_norm": 0.15307435393333435, "learning_rate": 3.95163123339631e-06, "loss": 0.8481, "step": 144830 }, { "epoch": 1.0484483919303351, "grad_norm": 0.1502341479063034, "learning_rate": 3.951558846735724e-06, "loss": 0.8533, "step": 144840 }, { "epoch": 1.0485207785909212, "grad_norm": 0.15502779185771942, "learning_rate": 3.9514864600751375e-06, "loss": 0.8588, "step": 144850 }, { "epoch": 1.0485931652515075, "grad_norm": 0.15955467522144318, "learning_rate": 3.951414073414552e-06, "loss": 0.858, "step": 144860 }, { "epoch": 1.0486655519120935, "grad_norm": 0.14335303008556366, "learning_rate": 3.951341686753966e-06, "loss": 0.8591, "step": 144870 }, { "epoch": 1.0487379385726798, "grad_norm": 0.15716949105262756, "learning_rate": 3.951269300093379e-06, "loss": 0.8433, "step": 144880 }, { "epoch": 1.0488103252332661, "grad_norm": 0.16508370637893677, "learning_rate": 3.951196913432793e-06, "loss": 0.8454, "step": 144890 }, { "epoch": 1.0488827118938522, "grad_norm": 0.14553935825824738, "learning_rate": 3.9511245267722065e-06, "loss": 0.8657, "step": 144900 }, { "epoch": 1.0489550985544385, "grad_norm": 0.16298197209835052, "learning_rate": 3.95105214011162e-06, "loss": 0.8496, "step": 144910 }, { "epoch": 1.0490274852150245, "grad_norm": 0.15694205462932587, "learning_rate": 3.9509797534510345e-06, "loss": 0.8463, "step": 144920 }, { "epoch": 1.0490998718756108, "grad_norm": 0.14872339367866516, "learning_rate": 3.950907366790448e-06, "loss": 0.8512, "step": 144930 }, { "epoch": 1.0491722585361969, "grad_norm": 0.14613871276378632, "learning_rate": 3.950834980129862e-06, "loss": 0.8468, "step": 144940 }, { "epoch": 1.0492446451967832, "grad_norm": 0.175027996301651, "learning_rate": 3.950762593469275e-06, "loss": 0.8601, "step": 144950 }, { "epoch": 1.0493170318573692, "grad_norm": 0.15894457697868347, "learning_rate": 3.950690206808689e-06, "loss": 0.8597, "step": 144960 }, { "epoch": 1.0493894185179555, "grad_norm": 0.14936257898807526, "learning_rate": 3.9506178201481035e-06, "loss": 0.8526, "step": 144970 }, { "epoch": 1.0494618051785416, "grad_norm": 0.15866105258464813, "learning_rate": 3.950545433487517e-06, "loss": 0.8633, "step": 144980 }, { "epoch": 1.0495341918391279, "grad_norm": 0.1673537641763687, "learning_rate": 3.950473046826931e-06, "loss": 0.8502, "step": 144990 }, { "epoch": 1.0496065784997142, "grad_norm": 0.14420992136001587, "learning_rate": 3.950400660166344e-06, "loss": 0.8576, "step": 145000 }, { "epoch": 1.0496789651603002, "grad_norm": 0.16589012742042542, "learning_rate": 3.950328273505759e-06, "loss": 0.8598, "step": 145010 }, { "epoch": 1.0497513518208865, "grad_norm": 0.15447083115577698, "learning_rate": 3.950255886845172e-06, "loss": 0.8461, "step": 145020 }, { "epoch": 1.0498237384814726, "grad_norm": 0.15255771577358246, "learning_rate": 3.950183500184586e-06, "loss": 0.8643, "step": 145030 }, { "epoch": 1.0498961251420589, "grad_norm": 0.17621764540672302, "learning_rate": 3.950111113524e-06, "loss": 0.8766, "step": 145040 }, { "epoch": 1.049968511802645, "grad_norm": 0.15463955700397491, "learning_rate": 3.950038726863414e-06, "loss": 0.8675, "step": 145050 }, { "epoch": 1.0500408984632312, "grad_norm": 0.14184853434562683, "learning_rate": 3.949966340202828e-06, "loss": 0.8533, "step": 145060 }, { "epoch": 1.0501132851238173, "grad_norm": 0.1818603277206421, "learning_rate": 3.949893953542241e-06, "loss": 0.8644, "step": 145070 }, { "epoch": 1.0501856717844036, "grad_norm": 0.1516370177268982, "learning_rate": 3.949821566881655e-06, "loss": 0.8632, "step": 145080 }, { "epoch": 1.0502580584449897, "grad_norm": 0.15722733736038208, "learning_rate": 3.949749180221069e-06, "loss": 0.8562, "step": 145090 }, { "epoch": 1.050330445105576, "grad_norm": 0.1518261581659317, "learning_rate": 3.949676793560483e-06, "loss": 0.8694, "step": 145100 }, { "epoch": 1.0504028317661622, "grad_norm": 0.15784966945648193, "learning_rate": 3.949604406899897e-06, "loss": 0.8699, "step": 145110 }, { "epoch": 1.0504752184267483, "grad_norm": 0.15036171674728394, "learning_rate": 3.94953202023931e-06, "loss": 0.8759, "step": 145120 }, { "epoch": 1.0505476050873346, "grad_norm": 0.1495043933391571, "learning_rate": 3.949459633578725e-06, "loss": 0.8592, "step": 145130 }, { "epoch": 1.0506199917479206, "grad_norm": 0.16956430673599243, "learning_rate": 3.949387246918138e-06, "loss": 0.8543, "step": 145140 }, { "epoch": 1.050692378408507, "grad_norm": 0.1504315882921219, "learning_rate": 3.949314860257552e-06, "loss": 0.8614, "step": 145150 }, { "epoch": 1.050764765069093, "grad_norm": 0.16208255290985107, "learning_rate": 3.9492424735969656e-06, "loss": 0.8619, "step": 145160 }, { "epoch": 1.0508371517296793, "grad_norm": 0.15502387285232544, "learning_rate": 3.94917008693638e-06, "loss": 0.8673, "step": 145170 }, { "epoch": 1.0509095383902654, "grad_norm": 0.1422945111989975, "learning_rate": 3.949097700275794e-06, "loss": 0.8584, "step": 145180 }, { "epoch": 1.0509819250508516, "grad_norm": 0.15001416206359863, "learning_rate": 3.949025313615207e-06, "loss": 0.8614, "step": 145190 }, { "epoch": 1.0510543117114377, "grad_norm": 0.1533329039812088, "learning_rate": 3.948952926954621e-06, "loss": 0.8755, "step": 145200 }, { "epoch": 1.051126698372024, "grad_norm": 0.16097886860370636, "learning_rate": 3.948880540294035e-06, "loss": 0.8498, "step": 145210 }, { "epoch": 1.0511990850326103, "grad_norm": 0.16414912045001984, "learning_rate": 3.948808153633449e-06, "loss": 0.8579, "step": 145220 }, { "epoch": 1.0512714716931963, "grad_norm": 0.1741316169500351, "learning_rate": 3.948735766972863e-06, "loss": 0.8486, "step": 145230 }, { "epoch": 1.0513438583537826, "grad_norm": 0.15596316754817963, "learning_rate": 3.948663380312276e-06, "loss": 0.8546, "step": 145240 }, { "epoch": 1.0514162450143687, "grad_norm": 0.15392540395259857, "learning_rate": 3.948590993651691e-06, "loss": 0.8443, "step": 145250 }, { "epoch": 1.051488631674955, "grad_norm": 0.15028880536556244, "learning_rate": 3.948518606991104e-06, "loss": 0.8501, "step": 145260 }, { "epoch": 1.051561018335541, "grad_norm": 0.14961551129817963, "learning_rate": 3.948446220330518e-06, "loss": 0.8549, "step": 145270 }, { "epoch": 1.0516334049961273, "grad_norm": 0.16350728273391724, "learning_rate": 3.9483738336699315e-06, "loss": 0.8723, "step": 145280 }, { "epoch": 1.0517057916567134, "grad_norm": 0.1536688506603241, "learning_rate": 3.948301447009346e-06, "loss": 0.8512, "step": 145290 }, { "epoch": 1.0517781783172997, "grad_norm": 0.1536385864019394, "learning_rate": 3.94822906034876e-06, "loss": 0.8598, "step": 145300 }, { "epoch": 1.051850564977886, "grad_norm": 0.1381438672542572, "learning_rate": 3.948156673688173e-06, "loss": 0.8638, "step": 145310 }, { "epoch": 1.051922951638472, "grad_norm": 0.17076510190963745, "learning_rate": 3.948084287027587e-06, "loss": 0.8629, "step": 145320 }, { "epoch": 1.0519953382990583, "grad_norm": 0.15944698452949524, "learning_rate": 3.948011900367001e-06, "loss": 0.8617, "step": 145330 }, { "epoch": 1.0520677249596444, "grad_norm": 0.15273165702819824, "learning_rate": 3.947939513706415e-06, "loss": 0.8553, "step": 145340 }, { "epoch": 1.0521401116202307, "grad_norm": 0.14267678558826447, "learning_rate": 3.9478671270458285e-06, "loss": 0.8529, "step": 145350 }, { "epoch": 1.0522124982808168, "grad_norm": 0.1617887020111084, "learning_rate": 3.947794740385242e-06, "loss": 0.8506, "step": 145360 }, { "epoch": 1.052284884941403, "grad_norm": 0.1612195074558258, "learning_rate": 3.947722353724657e-06, "loss": 0.8655, "step": 145370 }, { "epoch": 1.052357271601989, "grad_norm": 0.16076426208019257, "learning_rate": 3.94764996706407e-06, "loss": 0.853, "step": 145380 }, { "epoch": 1.0524296582625754, "grad_norm": 0.14427517354488373, "learning_rate": 3.947577580403484e-06, "loss": 0.8647, "step": 145390 }, { "epoch": 1.0525020449231615, "grad_norm": 0.1446637511253357, "learning_rate": 3.9475051937428974e-06, "loss": 0.8605, "step": 145400 }, { "epoch": 1.0525744315837477, "grad_norm": 0.15358710289001465, "learning_rate": 3.947432807082311e-06, "loss": 0.8512, "step": 145410 }, { "epoch": 1.052646818244334, "grad_norm": 0.1505446434020996, "learning_rate": 3.947360420421725e-06, "loss": 0.8567, "step": 145420 }, { "epoch": 1.05271920490492, "grad_norm": 0.1601179987192154, "learning_rate": 3.947288033761138e-06, "loss": 0.8596, "step": 145430 }, { "epoch": 1.0527915915655064, "grad_norm": 0.1553214192390442, "learning_rate": 3.947215647100553e-06, "loss": 0.8566, "step": 145440 }, { "epoch": 1.0528639782260925, "grad_norm": 0.1488734483718872, "learning_rate": 3.947143260439966e-06, "loss": 0.8725, "step": 145450 }, { "epoch": 1.0529363648866787, "grad_norm": 0.15073513984680176, "learning_rate": 3.94707087377938e-06, "loss": 0.8653, "step": 145460 }, { "epoch": 1.0530087515472648, "grad_norm": 0.14692334830760956, "learning_rate": 3.946998487118794e-06, "loss": 0.8713, "step": 145470 }, { "epoch": 1.053081138207851, "grad_norm": 0.14102409780025482, "learning_rate": 3.946926100458208e-06, "loss": 0.866, "step": 145480 }, { "epoch": 1.0531535248684372, "grad_norm": 0.15512901544570923, "learning_rate": 3.946853713797622e-06, "loss": 0.8619, "step": 145490 }, { "epoch": 1.0532259115290235, "grad_norm": 0.15089280903339386, "learning_rate": 3.946781327137035e-06, "loss": 0.8563, "step": 145500 }, { "epoch": 1.0532982981896095, "grad_norm": 0.14949414134025574, "learning_rate": 3.946708940476449e-06, "loss": 0.8751, "step": 145510 }, { "epoch": 1.0533706848501958, "grad_norm": 0.18709976971149445, "learning_rate": 3.946636553815863e-06, "loss": 0.8556, "step": 145520 }, { "epoch": 1.053443071510782, "grad_norm": 0.15413489937782288, "learning_rate": 3.946564167155277e-06, "loss": 0.8572, "step": 145530 }, { "epoch": 1.0535154581713682, "grad_norm": 0.16273820400238037, "learning_rate": 3.946491780494691e-06, "loss": 0.8619, "step": 145540 }, { "epoch": 1.0535878448319544, "grad_norm": 0.1684591919183731, "learning_rate": 3.946419393834104e-06, "loss": 0.8659, "step": 145550 }, { "epoch": 1.0536602314925405, "grad_norm": 0.14503608644008636, "learning_rate": 3.946347007173518e-06, "loss": 0.8567, "step": 145560 }, { "epoch": 1.0537326181531268, "grad_norm": 0.1562882363796234, "learning_rate": 3.946274620512932e-06, "loss": 0.8636, "step": 145570 }, { "epoch": 1.0538050048137129, "grad_norm": 0.1774781495332718, "learning_rate": 3.946202233852346e-06, "loss": 0.8586, "step": 145580 }, { "epoch": 1.0538773914742992, "grad_norm": 0.1477038562297821, "learning_rate": 3.9461298471917595e-06, "loss": 0.8533, "step": 145590 }, { "epoch": 1.0539497781348852, "grad_norm": 0.15526357293128967, "learning_rate": 3.946057460531173e-06, "loss": 0.8632, "step": 145600 }, { "epoch": 1.0540221647954715, "grad_norm": 0.15866219997406006, "learning_rate": 3.945985073870588e-06, "loss": 0.8617, "step": 145610 }, { "epoch": 1.0540945514560576, "grad_norm": 0.1477918028831482, "learning_rate": 3.945912687210001e-06, "loss": 0.8589, "step": 145620 }, { "epoch": 1.0541669381166439, "grad_norm": 0.15647591650485992, "learning_rate": 3.945840300549415e-06, "loss": 0.858, "step": 145630 }, { "epoch": 1.0542393247772301, "grad_norm": 0.15782573819160461, "learning_rate": 3.9457679138888285e-06, "loss": 0.8497, "step": 145640 }, { "epoch": 1.0543117114378162, "grad_norm": 0.15249016880989075, "learning_rate": 3.945695527228243e-06, "loss": 0.8575, "step": 145650 }, { "epoch": 1.0543840980984025, "grad_norm": 0.15300941467285156, "learning_rate": 3.9456231405676565e-06, "loss": 0.8714, "step": 145660 }, { "epoch": 1.0544564847589886, "grad_norm": 0.15666401386260986, "learning_rate": 3.94555075390707e-06, "loss": 0.8728, "step": 145670 }, { "epoch": 1.0545288714195749, "grad_norm": 0.13935892283916473, "learning_rate": 3.945478367246484e-06, "loss": 0.8641, "step": 145680 }, { "epoch": 1.054601258080161, "grad_norm": 0.1481926590204239, "learning_rate": 3.945405980585898e-06, "loss": 0.8426, "step": 145690 }, { "epoch": 1.0546736447407472, "grad_norm": 0.14881548285484314, "learning_rate": 3.945333593925312e-06, "loss": 0.8627, "step": 145700 }, { "epoch": 1.0547460314013333, "grad_norm": 0.15182042121887207, "learning_rate": 3.9452612072647255e-06, "loss": 0.8521, "step": 145710 }, { "epoch": 1.0548184180619196, "grad_norm": 0.14741186797618866, "learning_rate": 3.945188820604139e-06, "loss": 0.8544, "step": 145720 }, { "epoch": 1.0548908047225058, "grad_norm": 0.1624380648136139, "learning_rate": 3.9451164339435536e-06, "loss": 0.8632, "step": 145730 }, { "epoch": 1.054963191383092, "grad_norm": 0.14663566648960114, "learning_rate": 3.945044047282967e-06, "loss": 0.8584, "step": 145740 }, { "epoch": 1.0550355780436782, "grad_norm": 0.1530272662639618, "learning_rate": 3.944971660622381e-06, "loss": 0.8637, "step": 145750 }, { "epoch": 1.0551079647042643, "grad_norm": 0.14073669910430908, "learning_rate": 3.944899273961794e-06, "loss": 0.8538, "step": 145760 }, { "epoch": 1.0551803513648506, "grad_norm": 0.1432807892560959, "learning_rate": 3.944826887301209e-06, "loss": 0.8553, "step": 145770 }, { "epoch": 1.0552527380254366, "grad_norm": 0.15058694779872894, "learning_rate": 3.9447545006406225e-06, "loss": 0.8688, "step": 145780 }, { "epoch": 1.055325124686023, "grad_norm": 0.14936070144176483, "learning_rate": 3.944682113980036e-06, "loss": 0.8571, "step": 145790 }, { "epoch": 1.055397511346609, "grad_norm": 0.1473982185125351, "learning_rate": 3.94460972731945e-06, "loss": 0.8464, "step": 145800 }, { "epoch": 1.0554698980071953, "grad_norm": 0.14957574009895325, "learning_rate": 3.944537340658864e-06, "loss": 0.863, "step": 145810 }, { "epoch": 1.0555422846677813, "grad_norm": 0.16294653713703156, "learning_rate": 3.944464953998278e-06, "loss": 0.8702, "step": 145820 }, { "epoch": 1.0556146713283676, "grad_norm": 0.15721982717514038, "learning_rate": 3.944392567337691e-06, "loss": 0.8644, "step": 145830 }, { "epoch": 1.055687057988954, "grad_norm": 0.15910634398460388, "learning_rate": 3.944320180677105e-06, "loss": 0.8531, "step": 145840 }, { "epoch": 1.05575944464954, "grad_norm": 0.14400066435337067, "learning_rate": 3.9442477940165195e-06, "loss": 0.8654, "step": 145850 }, { "epoch": 1.0558318313101263, "grad_norm": 0.15780583024024963, "learning_rate": 3.944175407355933e-06, "loss": 0.838, "step": 145860 }, { "epoch": 1.0559042179707123, "grad_norm": 0.14663490653038025, "learning_rate": 3.944103020695347e-06, "loss": 0.8495, "step": 145870 }, { "epoch": 1.0559766046312986, "grad_norm": 0.14983581006526947, "learning_rate": 3.94403063403476e-06, "loss": 0.8636, "step": 145880 }, { "epoch": 1.0560489912918847, "grad_norm": 0.15457117557525635, "learning_rate": 3.943958247374175e-06, "loss": 0.8544, "step": 145890 }, { "epoch": 1.056121377952471, "grad_norm": 0.15235184133052826, "learning_rate": 3.943885860713588e-06, "loss": 0.8751, "step": 145900 }, { "epoch": 1.056193764613057, "grad_norm": 0.14950567483901978, "learning_rate": 3.943813474053002e-06, "loss": 0.8557, "step": 145910 }, { "epoch": 1.0562661512736433, "grad_norm": 0.15957853198051453, "learning_rate": 3.943741087392416e-06, "loss": 0.8671, "step": 145920 }, { "epoch": 1.0563385379342294, "grad_norm": 0.15900222957134247, "learning_rate": 3.94366870073183e-06, "loss": 0.8576, "step": 145930 }, { "epoch": 1.0564109245948157, "grad_norm": 0.16637851297855377, "learning_rate": 3.943596314071244e-06, "loss": 0.862, "step": 145940 }, { "epoch": 1.056483311255402, "grad_norm": 0.14471204578876495, "learning_rate": 3.9435239274106565e-06, "loss": 0.8473, "step": 145950 }, { "epoch": 1.056555697915988, "grad_norm": 0.16126057505607605, "learning_rate": 3.943451540750071e-06, "loss": 0.8646, "step": 145960 }, { "epoch": 1.0566280845765743, "grad_norm": 0.15724702179431915, "learning_rate": 3.943379154089485e-06, "loss": 0.8513, "step": 145970 }, { "epoch": 1.0567004712371604, "grad_norm": 0.14262165129184723, "learning_rate": 3.943306767428898e-06, "loss": 0.8657, "step": 145980 }, { "epoch": 1.0567728578977467, "grad_norm": 0.15832731127738953, "learning_rate": 3.943234380768312e-06, "loss": 0.8564, "step": 145990 }, { "epoch": 1.0568452445583327, "grad_norm": 0.16884136199951172, "learning_rate": 3.943161994107726e-06, "loss": 0.8436, "step": 146000 }, { "epoch": 1.056917631218919, "grad_norm": 0.16266973316669464, "learning_rate": 3.94308960744714e-06, "loss": 0.866, "step": 146010 }, { "epoch": 1.056990017879505, "grad_norm": 0.15265819430351257, "learning_rate": 3.9430172207865535e-06, "loss": 0.8672, "step": 146020 }, { "epoch": 1.0570624045400914, "grad_norm": 0.14945794641971588, "learning_rate": 3.942944834125967e-06, "loss": 0.852, "step": 146030 }, { "epoch": 1.0571347912006774, "grad_norm": 0.15644454956054688, "learning_rate": 3.942872447465382e-06, "loss": 0.8661, "step": 146040 }, { "epoch": 1.0572071778612637, "grad_norm": 0.21663767099380493, "learning_rate": 3.942800060804795e-06, "loss": 0.8755, "step": 146050 }, { "epoch": 1.05727956452185, "grad_norm": 0.16206009685993195, "learning_rate": 3.942727674144209e-06, "loss": 0.8679, "step": 146060 }, { "epoch": 1.057351951182436, "grad_norm": 0.15549540519714355, "learning_rate": 3.9426552874836224e-06, "loss": 0.8578, "step": 146070 }, { "epoch": 1.0574243378430224, "grad_norm": 0.16803592443466187, "learning_rate": 3.942582900823037e-06, "loss": 0.8662, "step": 146080 }, { "epoch": 1.0574967245036084, "grad_norm": 0.1614563912153244, "learning_rate": 3.9425105141624505e-06, "loss": 0.8658, "step": 146090 }, { "epoch": 1.0575691111641947, "grad_norm": 0.14888714253902435, "learning_rate": 3.942438127501864e-06, "loss": 0.8591, "step": 146100 }, { "epoch": 1.0576414978247808, "grad_norm": 0.16620272397994995, "learning_rate": 3.942365740841278e-06, "loss": 0.8625, "step": 146110 }, { "epoch": 1.057713884485367, "grad_norm": 0.28313207626342773, "learning_rate": 3.942293354180692e-06, "loss": 0.8607, "step": 146120 }, { "epoch": 1.0577862711459531, "grad_norm": 0.17238175868988037, "learning_rate": 3.942220967520106e-06, "loss": 0.8728, "step": 146130 }, { "epoch": 1.0578586578065394, "grad_norm": 0.14445391297340393, "learning_rate": 3.9421485808595194e-06, "loss": 0.8665, "step": 146140 }, { "epoch": 1.0579310444671255, "grad_norm": 0.15155573189258575, "learning_rate": 3.942076194198933e-06, "loss": 0.8549, "step": 146150 }, { "epoch": 1.0580034311277118, "grad_norm": 0.16635553538799286, "learning_rate": 3.9420038075383475e-06, "loss": 0.8578, "step": 146160 }, { "epoch": 1.058075817788298, "grad_norm": 0.19425910711288452, "learning_rate": 3.941931420877761e-06, "loss": 0.8583, "step": 146170 }, { "epoch": 1.0581482044488841, "grad_norm": 0.14456264674663544, "learning_rate": 3.941859034217175e-06, "loss": 0.8481, "step": 146180 }, { "epoch": 1.0582205911094704, "grad_norm": 0.15371955931186676, "learning_rate": 3.941786647556588e-06, "loss": 0.8503, "step": 146190 }, { "epoch": 1.0582929777700565, "grad_norm": 0.14967955648899078, "learning_rate": 3.941714260896002e-06, "loss": 0.8658, "step": 146200 }, { "epoch": 1.0583653644306428, "grad_norm": 0.14629121124744415, "learning_rate": 3.9416418742354164e-06, "loss": 0.8577, "step": 146210 }, { "epoch": 1.0584377510912288, "grad_norm": 0.13868474960327148, "learning_rate": 3.94156948757483e-06, "loss": 0.8489, "step": 146220 }, { "epoch": 1.0585101377518151, "grad_norm": 0.16160985827445984, "learning_rate": 3.941497100914244e-06, "loss": 0.8684, "step": 146230 }, { "epoch": 1.0585825244124012, "grad_norm": 0.23618648946285248, "learning_rate": 3.941424714253657e-06, "loss": 0.8524, "step": 146240 }, { "epoch": 1.0586549110729875, "grad_norm": 0.15129047632217407, "learning_rate": 3.941352327593072e-06, "loss": 0.8782, "step": 146250 }, { "epoch": 1.0587272977335735, "grad_norm": 0.144764244556427, "learning_rate": 3.941279940932485e-06, "loss": 0.8546, "step": 146260 }, { "epoch": 1.0587996843941598, "grad_norm": 0.14293217658996582, "learning_rate": 3.941207554271899e-06, "loss": 0.8508, "step": 146270 }, { "epoch": 1.0588720710547461, "grad_norm": 0.1651974767446518, "learning_rate": 3.941135167611313e-06, "loss": 0.8733, "step": 146280 }, { "epoch": 1.0589444577153322, "grad_norm": 0.15078642964363098, "learning_rate": 3.941062780950727e-06, "loss": 0.8563, "step": 146290 }, { "epoch": 1.0590168443759185, "grad_norm": 0.1709497570991516, "learning_rate": 3.940990394290141e-06, "loss": 0.8564, "step": 146300 }, { "epoch": 1.0590892310365045, "grad_norm": 0.15568095445632935, "learning_rate": 3.940918007629554e-06, "loss": 0.8716, "step": 146310 }, { "epoch": 1.0591616176970908, "grad_norm": 0.15851420164108276, "learning_rate": 3.940845620968968e-06, "loss": 0.861, "step": 146320 }, { "epoch": 1.059234004357677, "grad_norm": 0.15701240301132202, "learning_rate": 3.940773234308382e-06, "loss": 0.861, "step": 146330 }, { "epoch": 1.0593063910182632, "grad_norm": 0.160984069108963, "learning_rate": 3.940700847647796e-06, "loss": 0.8659, "step": 146340 }, { "epoch": 1.0593787776788492, "grad_norm": 0.15175053477287292, "learning_rate": 3.94062846098721e-06, "loss": 0.8628, "step": 146350 }, { "epoch": 1.0594511643394355, "grad_norm": 0.15705060958862305, "learning_rate": 3.940556074326623e-06, "loss": 0.8726, "step": 146360 }, { "epoch": 1.0595235510000216, "grad_norm": 0.1432766169309616, "learning_rate": 3.940483687666038e-06, "loss": 0.8457, "step": 146370 }, { "epoch": 1.059595937660608, "grad_norm": 0.15701285004615784, "learning_rate": 3.940411301005451e-06, "loss": 0.8669, "step": 146380 }, { "epoch": 1.0596683243211942, "grad_norm": 0.15151244401931763, "learning_rate": 3.940338914344865e-06, "loss": 0.8623, "step": 146390 }, { "epoch": 1.0597407109817802, "grad_norm": 0.19881872832775116, "learning_rate": 3.9402665276842785e-06, "loss": 0.8554, "step": 146400 }, { "epoch": 1.0598130976423665, "grad_norm": 0.14868730306625366, "learning_rate": 3.940194141023693e-06, "loss": 0.8686, "step": 146410 }, { "epoch": 1.0598854843029526, "grad_norm": 0.14795683324337006, "learning_rate": 3.940121754363107e-06, "loss": 0.858, "step": 146420 }, { "epoch": 1.0599578709635389, "grad_norm": 0.144784614443779, "learning_rate": 3.94004936770252e-06, "loss": 0.8613, "step": 146430 }, { "epoch": 1.060030257624125, "grad_norm": 0.15263043344020844, "learning_rate": 3.939976981041934e-06, "loss": 0.8662, "step": 146440 }, { "epoch": 1.0601026442847112, "grad_norm": 0.1521047055721283, "learning_rate": 3.939904594381348e-06, "loss": 0.8434, "step": 146450 }, { "epoch": 1.0601750309452973, "grad_norm": 0.15608127415180206, "learning_rate": 3.939832207720762e-06, "loss": 0.855, "step": 146460 }, { "epoch": 1.0602474176058836, "grad_norm": 0.15321072936058044, "learning_rate": 3.9397598210601756e-06, "loss": 0.8676, "step": 146470 }, { "epoch": 1.0603198042664699, "grad_norm": 0.175469309091568, "learning_rate": 3.939687434399589e-06, "loss": 0.857, "step": 146480 }, { "epoch": 1.060392190927056, "grad_norm": 0.15530693531036377, "learning_rate": 3.939615047739003e-06, "loss": 0.8568, "step": 146490 }, { "epoch": 1.0604645775876422, "grad_norm": 0.14557047188282013, "learning_rate": 3.939542661078416e-06, "loss": 0.8633, "step": 146500 }, { "epoch": 1.0605369642482283, "grad_norm": 0.1573035567998886, "learning_rate": 3.93947027441783e-06, "loss": 0.8631, "step": 146510 }, { "epoch": 1.0606093509088146, "grad_norm": 0.15351547300815582, "learning_rate": 3.9393978877572445e-06, "loss": 0.8401, "step": 146520 }, { "epoch": 1.0606817375694007, "grad_norm": 0.21780115365982056, "learning_rate": 3.939325501096658e-06, "loss": 0.8566, "step": 146530 }, { "epoch": 1.060754124229987, "grad_norm": 0.19611738622188568, "learning_rate": 3.939253114436072e-06, "loss": 0.8488, "step": 146540 }, { "epoch": 1.060826510890573, "grad_norm": 0.1587083786725998, "learning_rate": 3.939180727775485e-06, "loss": 0.8542, "step": 146550 }, { "epoch": 1.0608988975511593, "grad_norm": 0.17451640963554382, "learning_rate": 3.9391083411149e-06, "loss": 0.8686, "step": 146560 }, { "epoch": 1.0609712842117454, "grad_norm": 0.1856866180896759, "learning_rate": 3.939035954454313e-06, "loss": 0.8569, "step": 146570 }, { "epoch": 1.0610436708723316, "grad_norm": 0.14287951588630676, "learning_rate": 3.938963567793727e-06, "loss": 0.8559, "step": 146580 }, { "epoch": 1.061116057532918, "grad_norm": 0.17988088726997375, "learning_rate": 3.938891181133141e-06, "loss": 0.8711, "step": 146590 }, { "epoch": 1.061188444193504, "grad_norm": 0.1705288589000702, "learning_rate": 3.938818794472555e-06, "loss": 0.8664, "step": 146600 }, { "epoch": 1.0612608308540903, "grad_norm": 0.16595390439033508, "learning_rate": 3.938746407811969e-06, "loss": 0.8768, "step": 146610 }, { "epoch": 1.0613332175146764, "grad_norm": 0.15512683987617493, "learning_rate": 3.938674021151382e-06, "loss": 0.86, "step": 146620 }, { "epoch": 1.0614056041752626, "grad_norm": 0.1589556485414505, "learning_rate": 3.938601634490796e-06, "loss": 0.8608, "step": 146630 }, { "epoch": 1.0614779908358487, "grad_norm": 0.1618974655866623, "learning_rate": 3.93852924783021e-06, "loss": 0.8557, "step": 146640 }, { "epoch": 1.061550377496435, "grad_norm": 0.1717182695865631, "learning_rate": 3.938456861169624e-06, "loss": 0.8621, "step": 146650 }, { "epoch": 1.061622764157021, "grad_norm": 0.1516030728816986, "learning_rate": 3.938384474509038e-06, "loss": 0.8814, "step": 146660 }, { "epoch": 1.0616951508176073, "grad_norm": 0.14224623143672943, "learning_rate": 3.938312087848451e-06, "loss": 0.8649, "step": 146670 }, { "epoch": 1.0617675374781934, "grad_norm": 0.1500004678964615, "learning_rate": 3.938239701187866e-06, "loss": 0.8508, "step": 146680 }, { "epoch": 1.0618399241387797, "grad_norm": 0.14933015406131744, "learning_rate": 3.938167314527279e-06, "loss": 0.8643, "step": 146690 }, { "epoch": 1.061912310799366, "grad_norm": 0.15696154534816742, "learning_rate": 3.938094927866693e-06, "loss": 0.8618, "step": 146700 }, { "epoch": 1.061984697459952, "grad_norm": 0.1518896073102951, "learning_rate": 3.9380225412061066e-06, "loss": 0.8556, "step": 146710 }, { "epoch": 1.0620570841205383, "grad_norm": 0.1495170295238495, "learning_rate": 3.937950154545521e-06, "loss": 0.855, "step": 146720 }, { "epoch": 1.0621294707811244, "grad_norm": 0.15522539615631104, "learning_rate": 3.937877767884935e-06, "loss": 0.8592, "step": 146730 }, { "epoch": 1.0622018574417107, "grad_norm": 0.14690908789634705, "learning_rate": 3.937805381224348e-06, "loss": 0.8489, "step": 146740 }, { "epoch": 1.0622742441022968, "grad_norm": 0.17294001579284668, "learning_rate": 3.937732994563762e-06, "loss": 0.8548, "step": 146750 }, { "epoch": 1.062346630762883, "grad_norm": 0.16013190150260925, "learning_rate": 3.937660607903176e-06, "loss": 0.8541, "step": 146760 }, { "epoch": 1.0624190174234691, "grad_norm": 0.15690450370311737, "learning_rate": 3.93758822124259e-06, "loss": 0.8535, "step": 146770 }, { "epoch": 1.0624914040840554, "grad_norm": 0.1489821821451187, "learning_rate": 3.937515834582004e-06, "loss": 0.8614, "step": 146780 }, { "epoch": 1.0625637907446417, "grad_norm": 0.15481127798557281, "learning_rate": 3.937443447921417e-06, "loss": 0.8483, "step": 146790 }, { "epoch": 1.0626361774052278, "grad_norm": 0.16512636840343475, "learning_rate": 3.937371061260832e-06, "loss": 0.8513, "step": 146800 }, { "epoch": 1.062708564065814, "grad_norm": 0.1462734192609787, "learning_rate": 3.937298674600245e-06, "loss": 0.8618, "step": 146810 }, { "epoch": 1.0627809507264, "grad_norm": 0.1540970355272293, "learning_rate": 3.937226287939659e-06, "loss": 0.8699, "step": 146820 }, { "epoch": 1.0628533373869864, "grad_norm": 0.14309470355510712, "learning_rate": 3.9371539012790725e-06, "loss": 0.8544, "step": 146830 }, { "epoch": 1.0629257240475725, "grad_norm": 0.14380641281604767, "learning_rate": 3.937081514618486e-06, "loss": 0.8579, "step": 146840 }, { "epoch": 1.0629981107081587, "grad_norm": 0.15650497376918793, "learning_rate": 3.937009127957901e-06, "loss": 0.8614, "step": 146850 }, { "epoch": 1.0630704973687448, "grad_norm": 0.15885330736637115, "learning_rate": 3.936936741297314e-06, "loss": 0.8604, "step": 146860 }, { "epoch": 1.063142884029331, "grad_norm": 0.15795765817165375, "learning_rate": 3.936864354636728e-06, "loss": 0.8521, "step": 146870 }, { "epoch": 1.0632152706899172, "grad_norm": 0.15613383054733276, "learning_rate": 3.9367919679761414e-06, "loss": 0.8605, "step": 146880 }, { "epoch": 1.0632876573505035, "grad_norm": 0.14996646344661713, "learning_rate": 3.936719581315556e-06, "loss": 0.863, "step": 146890 }, { "epoch": 1.0633600440110897, "grad_norm": 0.1522240936756134, "learning_rate": 3.9366471946549695e-06, "loss": 0.8565, "step": 146900 }, { "epoch": 1.0634324306716758, "grad_norm": 0.1428409069776535, "learning_rate": 3.936574807994383e-06, "loss": 0.8535, "step": 146910 }, { "epoch": 1.063504817332262, "grad_norm": 0.14927205443382263, "learning_rate": 3.936502421333797e-06, "loss": 0.8608, "step": 146920 }, { "epoch": 1.0635772039928482, "grad_norm": 0.15207083523273468, "learning_rate": 3.936430034673211e-06, "loss": 0.8657, "step": 146930 }, { "epoch": 1.0636495906534345, "grad_norm": 0.15743815898895264, "learning_rate": 3.936357648012625e-06, "loss": 0.8576, "step": 146940 }, { "epoch": 1.0637219773140205, "grad_norm": 0.14594559371471405, "learning_rate": 3.9362852613520384e-06, "loss": 0.8709, "step": 146950 }, { "epoch": 1.0637943639746068, "grad_norm": 0.1405821442604065, "learning_rate": 3.936212874691452e-06, "loss": 0.8572, "step": 146960 }, { "epoch": 1.0638667506351929, "grad_norm": 0.15676797926425934, "learning_rate": 3.9361404880308665e-06, "loss": 0.8538, "step": 146970 }, { "epoch": 1.0639391372957792, "grad_norm": 0.14403656125068665, "learning_rate": 3.93606810137028e-06, "loss": 0.8515, "step": 146980 }, { "epoch": 1.0640115239563652, "grad_norm": 0.14301113784313202, "learning_rate": 3.935995714709694e-06, "loss": 0.8572, "step": 146990 }, { "epoch": 1.0640839106169515, "grad_norm": 0.15309615433216095, "learning_rate": 3.935923328049107e-06, "loss": 0.861, "step": 147000 }, { "epoch": 1.0641562972775378, "grad_norm": 0.3443004786968231, "learning_rate": 3.935850941388521e-06, "loss": 0.8656, "step": 147010 }, { "epoch": 1.0642286839381239, "grad_norm": 0.13839933276176453, "learning_rate": 3.935778554727935e-06, "loss": 0.8555, "step": 147020 }, { "epoch": 1.0643010705987102, "grad_norm": 0.14977069199085236, "learning_rate": 3.935706168067348e-06, "loss": 0.8623, "step": 147030 }, { "epoch": 1.0643734572592962, "grad_norm": 0.166509747505188, "learning_rate": 3.935633781406763e-06, "loss": 0.8703, "step": 147040 }, { "epoch": 1.0644458439198825, "grad_norm": 0.15191148221492767, "learning_rate": 3.935561394746176e-06, "loss": 0.8508, "step": 147050 }, { "epoch": 1.0645182305804686, "grad_norm": 0.15983009338378906, "learning_rate": 3.93548900808559e-06, "loss": 0.8578, "step": 147060 }, { "epoch": 1.0645906172410549, "grad_norm": 0.1482071876525879, "learning_rate": 3.9354166214250035e-06, "loss": 0.8419, "step": 147070 }, { "epoch": 1.064663003901641, "grad_norm": 0.1566699892282486, "learning_rate": 3.935344234764418e-06, "loss": 0.8658, "step": 147080 }, { "epoch": 1.0647353905622272, "grad_norm": 0.14756731688976288, "learning_rate": 3.935271848103832e-06, "loss": 0.8493, "step": 147090 }, { "epoch": 1.0648077772228133, "grad_norm": 0.15499506890773773, "learning_rate": 3.935199461443245e-06, "loss": 0.8484, "step": 147100 }, { "epoch": 1.0648801638833996, "grad_norm": 0.15901382267475128, "learning_rate": 3.935127074782659e-06, "loss": 0.8613, "step": 147110 }, { "epoch": 1.0649525505439859, "grad_norm": 0.1550762802362442, "learning_rate": 3.935054688122073e-06, "loss": 0.8441, "step": 147120 }, { "epoch": 1.065024937204572, "grad_norm": 0.17926537990570068, "learning_rate": 3.934982301461487e-06, "loss": 0.8558, "step": 147130 }, { "epoch": 1.0650973238651582, "grad_norm": 0.31348535418510437, "learning_rate": 3.9349099148009005e-06, "loss": 0.8624, "step": 147140 }, { "epoch": 1.0651697105257443, "grad_norm": 0.15891113877296448, "learning_rate": 3.934837528140314e-06, "loss": 0.8495, "step": 147150 }, { "epoch": 1.0652420971863306, "grad_norm": 0.15291360020637512, "learning_rate": 3.934765141479729e-06, "loss": 0.8455, "step": 147160 }, { "epoch": 1.0653144838469166, "grad_norm": 0.25229060649871826, "learning_rate": 3.934692754819142e-06, "loss": 0.8589, "step": 147170 }, { "epoch": 1.065386870507503, "grad_norm": 0.17564287781715393, "learning_rate": 3.934620368158556e-06, "loss": 0.8675, "step": 147180 }, { "epoch": 1.065459257168089, "grad_norm": 0.15342505276203156, "learning_rate": 3.9345479814979695e-06, "loss": 0.8534, "step": 147190 }, { "epoch": 1.0655316438286753, "grad_norm": 0.15959271788597107, "learning_rate": 3.934475594837384e-06, "loss": 0.8614, "step": 147200 }, { "epoch": 1.0656040304892613, "grad_norm": 0.15670059621334076, "learning_rate": 3.9344032081767976e-06, "loss": 0.8668, "step": 147210 }, { "epoch": 1.0656764171498476, "grad_norm": 0.16998760402202606, "learning_rate": 3.934330821516211e-06, "loss": 0.8682, "step": 147220 }, { "epoch": 1.065748803810434, "grad_norm": 0.1576838493347168, "learning_rate": 3.934258434855625e-06, "loss": 0.848, "step": 147230 }, { "epoch": 1.06582119047102, "grad_norm": 0.13807885348796844, "learning_rate": 3.934186048195039e-06, "loss": 0.8532, "step": 147240 }, { "epoch": 1.0658935771316063, "grad_norm": 0.15785497426986694, "learning_rate": 3.934113661534453e-06, "loss": 0.8594, "step": 147250 }, { "epoch": 1.0659659637921923, "grad_norm": 0.16951316595077515, "learning_rate": 3.9340412748738665e-06, "loss": 0.8581, "step": 147260 }, { "epoch": 1.0660383504527786, "grad_norm": 0.2050899863243103, "learning_rate": 3.93396888821328e-06, "loss": 0.8607, "step": 147270 }, { "epoch": 1.0661107371133647, "grad_norm": 0.17105598747730255, "learning_rate": 3.9338965015526946e-06, "loss": 0.8594, "step": 147280 }, { "epoch": 1.066183123773951, "grad_norm": 0.15434543788433075, "learning_rate": 3.933824114892108e-06, "loss": 0.8475, "step": 147290 }, { "epoch": 1.066255510434537, "grad_norm": 0.1630607694387436, "learning_rate": 3.933751728231522e-06, "loss": 0.8562, "step": 147300 }, { "epoch": 1.0663278970951233, "grad_norm": 0.15573401749134064, "learning_rate": 3.933679341570935e-06, "loss": 0.8537, "step": 147310 }, { "epoch": 1.0664002837557094, "grad_norm": 0.15144288539886475, "learning_rate": 3.93360695491035e-06, "loss": 0.8598, "step": 147320 }, { "epoch": 1.0664726704162957, "grad_norm": 0.14449411630630493, "learning_rate": 3.9335345682497635e-06, "loss": 0.8529, "step": 147330 }, { "epoch": 1.066545057076882, "grad_norm": 0.15416362881660461, "learning_rate": 3.933462181589177e-06, "loss": 0.8507, "step": 147340 }, { "epoch": 1.066617443737468, "grad_norm": 0.1476205438375473, "learning_rate": 3.933389794928591e-06, "loss": 0.8652, "step": 147350 }, { "epoch": 1.0666898303980543, "grad_norm": 0.1484995037317276, "learning_rate": 3.933317408268005e-06, "loss": 0.8602, "step": 147360 }, { "epoch": 1.0667622170586404, "grad_norm": 0.15167966485023499, "learning_rate": 3.933245021607419e-06, "loss": 0.841, "step": 147370 }, { "epoch": 1.0668346037192267, "grad_norm": 0.15195885300636292, "learning_rate": 3.933172634946832e-06, "loss": 0.8577, "step": 147380 }, { "epoch": 1.0669069903798127, "grad_norm": 0.15130667388439178, "learning_rate": 3.933100248286246e-06, "loss": 0.8491, "step": 147390 }, { "epoch": 1.066979377040399, "grad_norm": 0.15865103900432587, "learning_rate": 3.9330278616256605e-06, "loss": 0.8707, "step": 147400 }, { "epoch": 1.067051763700985, "grad_norm": 0.15017808973789215, "learning_rate": 3.932955474965074e-06, "loss": 0.8559, "step": 147410 }, { "epoch": 1.0671241503615714, "grad_norm": 0.15235696732997894, "learning_rate": 3.932883088304488e-06, "loss": 0.8593, "step": 147420 }, { "epoch": 1.0671965370221574, "grad_norm": 0.19113440811634064, "learning_rate": 3.932810701643901e-06, "loss": 0.8677, "step": 147430 }, { "epoch": 1.0672689236827437, "grad_norm": 0.19408351182937622, "learning_rate": 3.932738314983315e-06, "loss": 0.854, "step": 147440 }, { "epoch": 1.06734131034333, "grad_norm": 0.15390466153621674, "learning_rate": 3.932665928322729e-06, "loss": 0.8565, "step": 147450 }, { "epoch": 1.067413697003916, "grad_norm": 0.1523483693599701, "learning_rate": 3.932593541662143e-06, "loss": 0.8531, "step": 147460 }, { "epoch": 1.0674860836645024, "grad_norm": 0.17553357779979706, "learning_rate": 3.932521155001557e-06, "loss": 0.8569, "step": 147470 }, { "epoch": 1.0675584703250884, "grad_norm": 0.19476906955242157, "learning_rate": 3.93244876834097e-06, "loss": 0.8691, "step": 147480 }, { "epoch": 1.0676308569856747, "grad_norm": 0.2501210570335388, "learning_rate": 3.932376381680385e-06, "loss": 0.8737, "step": 147490 }, { "epoch": 1.0677032436462608, "grad_norm": 0.17125388979911804, "learning_rate": 3.932303995019798e-06, "loss": 0.8577, "step": 147500 }, { "epoch": 1.067775630306847, "grad_norm": 0.15461327135562897, "learning_rate": 3.932231608359212e-06, "loss": 0.8669, "step": 147510 }, { "epoch": 1.0678480169674331, "grad_norm": 0.1496717482805252, "learning_rate": 3.932159221698626e-06, "loss": 0.864, "step": 147520 }, { "epoch": 1.0679204036280194, "grad_norm": 0.14370813965797424, "learning_rate": 3.93208683503804e-06, "loss": 0.8496, "step": 147530 }, { "epoch": 1.0679927902886055, "grad_norm": 0.16169150173664093, "learning_rate": 3.932014448377453e-06, "loss": 0.8587, "step": 147540 }, { "epoch": 1.0680651769491918, "grad_norm": 0.15165531635284424, "learning_rate": 3.931942061716867e-06, "loss": 0.8602, "step": 147550 }, { "epoch": 1.068137563609778, "grad_norm": 0.14875246584415436, "learning_rate": 3.931869675056281e-06, "loss": 0.8488, "step": 147560 }, { "epoch": 1.0682099502703641, "grad_norm": 0.15369826555252075, "learning_rate": 3.9317972883956945e-06, "loss": 0.8615, "step": 147570 }, { "epoch": 1.0682823369309504, "grad_norm": 0.15338794887065887, "learning_rate": 3.931724901735108e-06, "loss": 0.8682, "step": 147580 }, { "epoch": 1.0683547235915365, "grad_norm": 0.17521625757217407, "learning_rate": 3.931652515074523e-06, "loss": 0.8585, "step": 147590 }, { "epoch": 1.0684271102521228, "grad_norm": 0.15786945819854736, "learning_rate": 3.931580128413936e-06, "loss": 0.8668, "step": 147600 }, { "epoch": 1.0684994969127088, "grad_norm": 0.16188044846057892, "learning_rate": 3.93150774175335e-06, "loss": 0.8529, "step": 147610 }, { "epoch": 1.0685718835732951, "grad_norm": 0.15496951341629028, "learning_rate": 3.9314353550927634e-06, "loss": 0.8538, "step": 147620 }, { "epoch": 1.0686442702338812, "grad_norm": 0.1538427770137787, "learning_rate": 3.931362968432177e-06, "loss": 0.871, "step": 147630 }, { "epoch": 1.0687166568944675, "grad_norm": 0.14457300305366516, "learning_rate": 3.9312905817715915e-06, "loss": 0.8551, "step": 147640 }, { "epoch": 1.0687890435550538, "grad_norm": 0.14885105192661285, "learning_rate": 3.931218195111005e-06, "loss": 0.8498, "step": 147650 }, { "epoch": 1.0688614302156398, "grad_norm": 0.16504499316215515, "learning_rate": 3.931145808450419e-06, "loss": 0.853, "step": 147660 }, { "epoch": 1.0689338168762261, "grad_norm": 0.15119002759456635, "learning_rate": 3.931073421789832e-06, "loss": 0.8574, "step": 147670 }, { "epoch": 1.0690062035368122, "grad_norm": 0.15814948081970215, "learning_rate": 3.931001035129247e-06, "loss": 0.8593, "step": 147680 }, { "epoch": 1.0690785901973985, "grad_norm": 0.14475545287132263, "learning_rate": 3.9309286484686604e-06, "loss": 0.8555, "step": 147690 }, { "epoch": 1.0691509768579845, "grad_norm": 0.1500001847743988, "learning_rate": 3.930856261808074e-06, "loss": 0.8552, "step": 147700 }, { "epoch": 1.0692233635185708, "grad_norm": 0.15069542825222015, "learning_rate": 3.930783875147488e-06, "loss": 0.8675, "step": 147710 }, { "epoch": 1.069295750179157, "grad_norm": 0.14854957163333893, "learning_rate": 3.930711488486902e-06, "loss": 0.8658, "step": 147720 }, { "epoch": 1.0693681368397432, "grad_norm": 0.17693032324314117, "learning_rate": 3.930639101826316e-06, "loss": 0.8453, "step": 147730 }, { "epoch": 1.0694405235003295, "grad_norm": 0.15209737420082092, "learning_rate": 3.930566715165729e-06, "loss": 0.8566, "step": 147740 }, { "epoch": 1.0695129101609155, "grad_norm": 0.1621839553117752, "learning_rate": 3.930494328505143e-06, "loss": 0.8516, "step": 147750 }, { "epoch": 1.0695852968215018, "grad_norm": 0.14790567755699158, "learning_rate": 3.9304219418445575e-06, "loss": 0.8514, "step": 147760 }, { "epoch": 1.069657683482088, "grad_norm": 0.14647607505321503, "learning_rate": 3.930349555183971e-06, "loss": 0.8419, "step": 147770 }, { "epoch": 1.0697300701426742, "grad_norm": 0.16838887333869934, "learning_rate": 3.930277168523385e-06, "loss": 0.8465, "step": 147780 }, { "epoch": 1.0698024568032602, "grad_norm": 0.1428205966949463, "learning_rate": 3.930204781862798e-06, "loss": 0.8642, "step": 147790 }, { "epoch": 1.0698748434638465, "grad_norm": 0.14637254178524017, "learning_rate": 3.930132395202213e-06, "loss": 0.862, "step": 147800 }, { "epoch": 1.0699472301244326, "grad_norm": 0.14838822185993195, "learning_rate": 3.930060008541626e-06, "loss": 0.8473, "step": 147810 }, { "epoch": 1.070019616785019, "grad_norm": 0.15595309436321259, "learning_rate": 3.92998762188104e-06, "loss": 0.8575, "step": 147820 }, { "epoch": 1.070092003445605, "grad_norm": 0.15181201696395874, "learning_rate": 3.929915235220454e-06, "loss": 0.8763, "step": 147830 }, { "epoch": 1.0701643901061912, "grad_norm": 0.14543366432189941, "learning_rate": 3.929842848559868e-06, "loss": 0.8537, "step": 147840 }, { "epoch": 1.0702367767667775, "grad_norm": 0.16064667701721191, "learning_rate": 3.929770461899282e-06, "loss": 0.8557, "step": 147850 }, { "epoch": 1.0703091634273636, "grad_norm": 0.1469428986310959, "learning_rate": 3.929698075238695e-06, "loss": 0.8442, "step": 147860 }, { "epoch": 1.0703815500879499, "grad_norm": 0.16561655700206757, "learning_rate": 3.929625688578109e-06, "loss": 0.849, "step": 147870 }, { "epoch": 1.070453936748536, "grad_norm": 0.14810919761657715, "learning_rate": 3.929553301917523e-06, "loss": 0.8645, "step": 147880 }, { "epoch": 1.0705263234091222, "grad_norm": 0.15601563453674316, "learning_rate": 3.929480915256937e-06, "loss": 0.8638, "step": 147890 }, { "epoch": 1.0705987100697083, "grad_norm": 0.15862402319908142, "learning_rate": 3.929408528596351e-06, "loss": 0.8574, "step": 147900 }, { "epoch": 1.0706710967302946, "grad_norm": 0.1481819897890091, "learning_rate": 3.929336141935764e-06, "loss": 0.8584, "step": 147910 }, { "epoch": 1.0707434833908807, "grad_norm": 0.17046231031417847, "learning_rate": 3.929263755275179e-06, "loss": 0.8599, "step": 147920 }, { "epoch": 1.070815870051467, "grad_norm": 0.155575230717659, "learning_rate": 3.929191368614592e-06, "loss": 0.8527, "step": 147930 }, { "epoch": 1.070888256712053, "grad_norm": 0.1649925410747528, "learning_rate": 3.929118981954006e-06, "loss": 0.8621, "step": 147940 }, { "epoch": 1.0709606433726393, "grad_norm": 0.14515741169452667, "learning_rate": 3.9290465952934195e-06, "loss": 0.8572, "step": 147950 }, { "epoch": 1.0710330300332256, "grad_norm": 0.1484888792037964, "learning_rate": 3.928974208632834e-06, "loss": 0.8535, "step": 147960 }, { "epoch": 1.0711054166938117, "grad_norm": 0.14969849586486816, "learning_rate": 3.928901821972248e-06, "loss": 0.8612, "step": 147970 }, { "epoch": 1.071177803354398, "grad_norm": 0.1582787185907364, "learning_rate": 3.928829435311661e-06, "loss": 0.862, "step": 147980 }, { "epoch": 1.071250190014984, "grad_norm": 0.14706555008888245, "learning_rate": 3.928757048651075e-06, "loss": 0.8579, "step": 147990 }, { "epoch": 1.0713225766755703, "grad_norm": 0.1515503078699112, "learning_rate": 3.928684661990489e-06, "loss": 0.8742, "step": 148000 }, { "epoch": 1.0713949633361564, "grad_norm": 0.16300860047340393, "learning_rate": 3.928612275329903e-06, "loss": 0.8495, "step": 148010 }, { "epoch": 1.0714673499967426, "grad_norm": 0.15342256426811218, "learning_rate": 3.9285398886693166e-06, "loss": 0.864, "step": 148020 }, { "epoch": 1.0715397366573287, "grad_norm": 0.1569945365190506, "learning_rate": 3.92846750200873e-06, "loss": 0.8606, "step": 148030 }, { "epoch": 1.071612123317915, "grad_norm": 0.15621396899223328, "learning_rate": 3.928395115348145e-06, "loss": 0.8419, "step": 148040 }, { "epoch": 1.071684509978501, "grad_norm": 0.14876516163349152, "learning_rate": 3.928322728687558e-06, "loss": 0.8576, "step": 148050 }, { "epoch": 1.0717568966390874, "grad_norm": 0.15889860689640045, "learning_rate": 3.928250342026972e-06, "loss": 0.8586, "step": 148060 }, { "epoch": 1.0718292832996736, "grad_norm": 0.15577679872512817, "learning_rate": 3.9281779553663855e-06, "loss": 0.8577, "step": 148070 }, { "epoch": 1.0719016699602597, "grad_norm": 0.1523328423500061, "learning_rate": 3.928105568705799e-06, "loss": 0.8556, "step": 148080 }, { "epoch": 1.071974056620846, "grad_norm": 0.15466256439685822, "learning_rate": 3.928033182045213e-06, "loss": 0.8548, "step": 148090 }, { "epoch": 1.072046443281432, "grad_norm": 0.14170676469802856, "learning_rate": 3.927960795384626e-06, "loss": 0.8438, "step": 148100 }, { "epoch": 1.0721188299420183, "grad_norm": 0.1515391618013382, "learning_rate": 3.927888408724041e-06, "loss": 0.8612, "step": 148110 }, { "epoch": 1.0721912166026044, "grad_norm": 0.17052294313907623, "learning_rate": 3.927816022063454e-06, "loss": 0.846, "step": 148120 }, { "epoch": 1.0722636032631907, "grad_norm": 0.16018043458461761, "learning_rate": 3.927743635402868e-06, "loss": 0.8622, "step": 148130 }, { "epoch": 1.0723359899237768, "grad_norm": 0.15600261092185974, "learning_rate": 3.927671248742282e-06, "loss": 0.8633, "step": 148140 }, { "epoch": 1.072408376584363, "grad_norm": 0.15749092400074005, "learning_rate": 3.927598862081696e-06, "loss": 0.843, "step": 148150 }, { "epoch": 1.0724807632449491, "grad_norm": 0.15799428522586823, "learning_rate": 3.92752647542111e-06, "loss": 0.8613, "step": 148160 }, { "epoch": 1.0725531499055354, "grad_norm": 0.16896884143352509, "learning_rate": 3.927454088760523e-06, "loss": 0.8452, "step": 148170 }, { "epoch": 1.0726255365661217, "grad_norm": 0.1738024652004242, "learning_rate": 3.927381702099937e-06, "loss": 0.8582, "step": 148180 }, { "epoch": 1.0726979232267078, "grad_norm": 0.1570395976305008, "learning_rate": 3.927309315439351e-06, "loss": 0.8687, "step": 148190 }, { "epoch": 1.072770309887294, "grad_norm": 0.14952725172042847, "learning_rate": 3.927236928778765e-06, "loss": 0.8601, "step": 148200 }, { "epoch": 1.0728426965478801, "grad_norm": 0.14488628506660461, "learning_rate": 3.927164542118179e-06, "loss": 0.847, "step": 148210 }, { "epoch": 1.0729150832084664, "grad_norm": 0.16457195580005646, "learning_rate": 3.927092155457592e-06, "loss": 0.862, "step": 148220 }, { "epoch": 1.0729874698690525, "grad_norm": 0.15635018050670624, "learning_rate": 3.927019768797006e-06, "loss": 0.85, "step": 148230 }, { "epoch": 1.0730598565296388, "grad_norm": 0.15710517764091492, "learning_rate": 3.92694738213642e-06, "loss": 0.85, "step": 148240 }, { "epoch": 1.0731322431902248, "grad_norm": 0.14973808825016022, "learning_rate": 3.926874995475834e-06, "loss": 0.8561, "step": 148250 }, { "epoch": 1.073204629850811, "grad_norm": 0.18000011146068573, "learning_rate": 3.926802608815248e-06, "loss": 0.8432, "step": 148260 }, { "epoch": 1.0732770165113972, "grad_norm": 0.14735475182533264, "learning_rate": 3.926730222154661e-06, "loss": 0.8719, "step": 148270 }, { "epoch": 1.0733494031719835, "grad_norm": 0.1539383828639984, "learning_rate": 3.926657835494076e-06, "loss": 0.8637, "step": 148280 }, { "epoch": 1.0734217898325698, "grad_norm": 0.26289451122283936, "learning_rate": 3.926585448833489e-06, "loss": 0.8712, "step": 148290 }, { "epoch": 1.0734941764931558, "grad_norm": 0.16455796360969543, "learning_rate": 3.926513062172903e-06, "loss": 0.8612, "step": 148300 }, { "epoch": 1.073566563153742, "grad_norm": 0.15031065046787262, "learning_rate": 3.9264406755123165e-06, "loss": 0.8556, "step": 148310 }, { "epoch": 1.0736389498143282, "grad_norm": 0.14603126049041748, "learning_rate": 3.926368288851731e-06, "loss": 0.8603, "step": 148320 }, { "epoch": 1.0737113364749145, "grad_norm": 0.1502143293619156, "learning_rate": 3.926295902191145e-06, "loss": 0.8621, "step": 148330 }, { "epoch": 1.0737837231355005, "grad_norm": 0.15308043360710144, "learning_rate": 3.926223515530558e-06, "loss": 0.8633, "step": 148340 }, { "epoch": 1.0738561097960868, "grad_norm": 0.15625415742397308, "learning_rate": 3.926151128869972e-06, "loss": 0.8654, "step": 148350 }, { "epoch": 1.0739284964566729, "grad_norm": 0.16124477982521057, "learning_rate": 3.926078742209386e-06, "loss": 0.8583, "step": 148360 }, { "epoch": 1.0740008831172592, "grad_norm": 0.14821426570415497, "learning_rate": 3.9260063555488e-06, "loss": 0.8497, "step": 148370 }, { "epoch": 1.0740732697778452, "grad_norm": 0.14356663823127747, "learning_rate": 3.9259339688882135e-06, "loss": 0.8511, "step": 148380 }, { "epoch": 1.0741456564384315, "grad_norm": 0.14811255037784576, "learning_rate": 3.925861582227627e-06, "loss": 0.8453, "step": 148390 }, { "epoch": 1.0742180430990178, "grad_norm": 0.18157772719860077, "learning_rate": 3.925789195567042e-06, "loss": 0.8601, "step": 148400 }, { "epoch": 1.0742904297596039, "grad_norm": 0.16508889198303223, "learning_rate": 3.925716808906455e-06, "loss": 0.8523, "step": 148410 }, { "epoch": 1.0743628164201902, "grad_norm": 0.15497782826423645, "learning_rate": 3.925644422245869e-06, "loss": 0.8588, "step": 148420 }, { "epoch": 1.0744352030807762, "grad_norm": 0.1482570469379425, "learning_rate": 3.9255720355852824e-06, "loss": 0.8657, "step": 148430 }, { "epoch": 1.0745075897413625, "grad_norm": 0.15788806974887848, "learning_rate": 3.925499648924697e-06, "loss": 0.8588, "step": 148440 }, { "epoch": 1.0745799764019486, "grad_norm": 0.1528482884168625, "learning_rate": 3.9254272622641105e-06, "loss": 0.8502, "step": 148450 }, { "epoch": 1.0746523630625349, "grad_norm": 0.14317600429058075, "learning_rate": 3.925354875603524e-06, "loss": 0.8432, "step": 148460 }, { "epoch": 1.074724749723121, "grad_norm": 0.15992312133312225, "learning_rate": 3.925282488942938e-06, "loss": 0.8555, "step": 148470 }, { "epoch": 1.0747971363837072, "grad_norm": 0.16133040189743042, "learning_rate": 3.925210102282352e-06, "loss": 0.8568, "step": 148480 }, { "epoch": 1.0748695230442933, "grad_norm": 0.1480935513973236, "learning_rate": 3.925137715621766e-06, "loss": 0.8637, "step": 148490 }, { "epoch": 1.0749419097048796, "grad_norm": 0.16463685035705566, "learning_rate": 3.9250653289611795e-06, "loss": 0.8598, "step": 148500 }, { "epoch": 1.0750142963654659, "grad_norm": 0.1424337923526764, "learning_rate": 3.924992942300593e-06, "loss": 0.855, "step": 148510 }, { "epoch": 1.075086683026052, "grad_norm": 0.25730541348457336, "learning_rate": 3.9249205556400075e-06, "loss": 0.8715, "step": 148520 }, { "epoch": 1.0751590696866382, "grad_norm": 0.15047651529312134, "learning_rate": 3.924848168979421e-06, "loss": 0.8611, "step": 148530 }, { "epoch": 1.0752314563472243, "grad_norm": 0.15243446826934814, "learning_rate": 3.924775782318835e-06, "loss": 0.8515, "step": 148540 }, { "epoch": 1.0753038430078106, "grad_norm": 0.14074011147022247, "learning_rate": 3.924703395658248e-06, "loss": 0.8418, "step": 148550 }, { "epoch": 1.0753762296683966, "grad_norm": 0.14536842703819275, "learning_rate": 3.924631008997663e-06, "loss": 0.8604, "step": 148560 }, { "epoch": 1.075448616328983, "grad_norm": 0.1618451476097107, "learning_rate": 3.9245586223370765e-06, "loss": 0.8389, "step": 148570 }, { "epoch": 1.075521002989569, "grad_norm": 0.1468305140733719, "learning_rate": 3.92448623567649e-06, "loss": 0.8525, "step": 148580 }, { "epoch": 1.0755933896501553, "grad_norm": 0.22342850267887115, "learning_rate": 3.924413849015904e-06, "loss": 0.8623, "step": 148590 }, { "epoch": 1.0756657763107413, "grad_norm": 0.1797487586736679, "learning_rate": 3.924341462355317e-06, "loss": 0.8519, "step": 148600 }, { "epoch": 1.0757381629713276, "grad_norm": 0.16016285121440887, "learning_rate": 3.924269075694731e-06, "loss": 0.8712, "step": 148610 }, { "epoch": 1.075810549631914, "grad_norm": 0.16447670757770538, "learning_rate": 3.9241966890341445e-06, "loss": 0.8615, "step": 148620 }, { "epoch": 1.0758829362925, "grad_norm": 0.18734236061573029, "learning_rate": 3.924124302373559e-06, "loss": 0.8522, "step": 148630 }, { "epoch": 1.0759553229530863, "grad_norm": 0.14720183610916138, "learning_rate": 3.924051915712973e-06, "loss": 0.8525, "step": 148640 }, { "epoch": 1.0760277096136723, "grad_norm": 0.1844429075717926, "learning_rate": 3.923979529052386e-06, "loss": 0.8465, "step": 148650 }, { "epoch": 1.0761000962742586, "grad_norm": 0.15892651677131653, "learning_rate": 3.9239071423918e-06, "loss": 0.8523, "step": 148660 }, { "epoch": 1.0761724829348447, "grad_norm": 0.15322613716125488, "learning_rate": 3.923834755731214e-06, "loss": 0.8584, "step": 148670 }, { "epoch": 1.076244869595431, "grad_norm": 0.1480284184217453, "learning_rate": 3.923762369070628e-06, "loss": 0.8334, "step": 148680 }, { "epoch": 1.076317256256017, "grad_norm": 0.16445012390613556, "learning_rate": 3.9236899824100415e-06, "loss": 0.8602, "step": 148690 }, { "epoch": 1.0763896429166033, "grad_norm": 0.15210187435150146, "learning_rate": 3.923617595749455e-06, "loss": 0.8627, "step": 148700 }, { "epoch": 1.0764620295771896, "grad_norm": 0.1529064029455185, "learning_rate": 3.92354520908887e-06, "loss": 0.8646, "step": 148710 }, { "epoch": 1.0765344162377757, "grad_norm": 0.15188395977020264, "learning_rate": 3.923472822428283e-06, "loss": 0.8636, "step": 148720 }, { "epoch": 1.076606802898362, "grad_norm": 0.14736254513263702, "learning_rate": 3.923400435767697e-06, "loss": 0.8703, "step": 148730 }, { "epoch": 1.076679189558948, "grad_norm": 0.14823542535305023, "learning_rate": 3.9233280491071105e-06, "loss": 0.863, "step": 148740 }, { "epoch": 1.0767515762195343, "grad_norm": 0.15504339337348938, "learning_rate": 3.923255662446525e-06, "loss": 0.845, "step": 148750 }, { "epoch": 1.0768239628801204, "grad_norm": 0.1641693115234375, "learning_rate": 3.9231832757859386e-06, "loss": 0.8574, "step": 148760 }, { "epoch": 1.0768963495407067, "grad_norm": 0.1393650621175766, "learning_rate": 3.923110889125352e-06, "loss": 0.8528, "step": 148770 }, { "epoch": 1.0769687362012927, "grad_norm": 0.7887364625930786, "learning_rate": 3.923038502464766e-06, "loss": 0.8631, "step": 148780 }, { "epoch": 1.077041122861879, "grad_norm": 0.14820659160614014, "learning_rate": 3.92296611580418e-06, "loss": 0.8757, "step": 148790 }, { "epoch": 1.0771135095224653, "grad_norm": 0.1467932164669037, "learning_rate": 3.922893729143594e-06, "loss": 0.8715, "step": 148800 }, { "epoch": 1.0771858961830514, "grad_norm": 0.1651478409767151, "learning_rate": 3.9228213424830075e-06, "loss": 0.8704, "step": 148810 }, { "epoch": 1.0772582828436377, "grad_norm": 0.1475069671869278, "learning_rate": 3.922748955822421e-06, "loss": 0.8651, "step": 148820 }, { "epoch": 1.0773306695042237, "grad_norm": 0.1452004313468933, "learning_rate": 3.9226765691618356e-06, "loss": 0.8628, "step": 148830 }, { "epoch": 1.07740305616481, "grad_norm": 0.15325690805912018, "learning_rate": 3.922604182501249e-06, "loss": 0.8556, "step": 148840 }, { "epoch": 1.077475442825396, "grad_norm": 0.16279549896717072, "learning_rate": 3.922531795840663e-06, "loss": 0.8468, "step": 148850 }, { "epoch": 1.0775478294859824, "grad_norm": 0.1613786667585373, "learning_rate": 3.922459409180076e-06, "loss": 0.8632, "step": 148860 }, { "epoch": 1.0776202161465684, "grad_norm": 0.15279968082904816, "learning_rate": 3.92238702251949e-06, "loss": 0.8663, "step": 148870 }, { "epoch": 1.0776926028071547, "grad_norm": 0.16398665308952332, "learning_rate": 3.9223146358589045e-06, "loss": 0.8583, "step": 148880 }, { "epoch": 1.0777649894677408, "grad_norm": 0.15407854318618774, "learning_rate": 3.922242249198318e-06, "loss": 0.8703, "step": 148890 }, { "epoch": 1.077837376128327, "grad_norm": 0.16437429189682007, "learning_rate": 3.922169862537732e-06, "loss": 0.8619, "step": 148900 }, { "epoch": 1.0779097627889134, "grad_norm": 0.14691048860549927, "learning_rate": 3.922097475877145e-06, "loss": 0.8478, "step": 148910 }, { "epoch": 1.0779821494494994, "grad_norm": 0.14637692272663116, "learning_rate": 3.92202508921656e-06, "loss": 0.8654, "step": 148920 }, { "epoch": 1.0780545361100857, "grad_norm": 0.1593993455171585, "learning_rate": 3.921952702555973e-06, "loss": 0.8506, "step": 148930 }, { "epoch": 1.0781269227706718, "grad_norm": 0.14419616758823395, "learning_rate": 3.921880315895387e-06, "loss": 0.846, "step": 148940 }, { "epoch": 1.078199309431258, "grad_norm": 0.15830476582050323, "learning_rate": 3.921807929234801e-06, "loss": 0.8575, "step": 148950 }, { "epoch": 1.0782716960918441, "grad_norm": 0.16840828955173492, "learning_rate": 3.921735542574215e-06, "loss": 0.8483, "step": 148960 }, { "epoch": 1.0783440827524304, "grad_norm": 0.15455256402492523, "learning_rate": 3.921663155913629e-06, "loss": 0.8433, "step": 148970 }, { "epoch": 1.0784164694130165, "grad_norm": 0.1700802743434906, "learning_rate": 3.921590769253042e-06, "loss": 0.8655, "step": 148980 }, { "epoch": 1.0784888560736028, "grad_norm": 0.1804065704345703, "learning_rate": 3.921518382592456e-06, "loss": 0.8648, "step": 148990 }, { "epoch": 1.0785612427341889, "grad_norm": 0.16987882554531097, "learning_rate": 3.9214459959318704e-06, "loss": 0.8674, "step": 149000 }, { "epoch": 1.0786336293947751, "grad_norm": 0.14747925102710724, "learning_rate": 3.921373609271284e-06, "loss": 0.858, "step": 149010 }, { "epoch": 1.0787060160553614, "grad_norm": 0.1391853392124176, "learning_rate": 3.921301222610698e-06, "loss": 0.8458, "step": 149020 }, { "epoch": 1.0787784027159475, "grad_norm": 0.15131983160972595, "learning_rate": 3.921228835950111e-06, "loss": 0.8572, "step": 149030 }, { "epoch": 1.0788507893765338, "grad_norm": 0.14283746480941772, "learning_rate": 3.921156449289526e-06, "loss": 0.8434, "step": 149040 }, { "epoch": 1.0789231760371198, "grad_norm": 0.1681165099143982, "learning_rate": 3.921084062628939e-06, "loss": 0.8539, "step": 149050 }, { "epoch": 1.0789955626977061, "grad_norm": 0.15962868928909302, "learning_rate": 3.921011675968353e-06, "loss": 0.8488, "step": 149060 }, { "epoch": 1.0790679493582922, "grad_norm": 0.14654859900474548, "learning_rate": 3.920939289307767e-06, "loss": 0.848, "step": 149070 }, { "epoch": 1.0791403360188785, "grad_norm": 0.1605050265789032, "learning_rate": 3.920866902647181e-06, "loss": 0.8612, "step": 149080 }, { "epoch": 1.0792127226794646, "grad_norm": 0.15435899794101715, "learning_rate": 3.920794515986595e-06, "loss": 0.8665, "step": 149090 }, { "epoch": 1.0792851093400508, "grad_norm": 0.15466824173927307, "learning_rate": 3.920722129326008e-06, "loss": 0.8633, "step": 149100 }, { "epoch": 1.079357496000637, "grad_norm": 0.1811055988073349, "learning_rate": 3.920649742665422e-06, "loss": 0.8626, "step": 149110 }, { "epoch": 1.0794298826612232, "grad_norm": 0.15361252427101135, "learning_rate": 3.920577356004836e-06, "loss": 0.8588, "step": 149120 }, { "epoch": 1.0795022693218095, "grad_norm": 0.15100868046283722, "learning_rate": 3.920504969344249e-06, "loss": 0.8744, "step": 149130 }, { "epoch": 1.0795746559823955, "grad_norm": 0.16140015423297882, "learning_rate": 3.920432582683663e-06, "loss": 0.8592, "step": 149140 }, { "epoch": 1.0796470426429818, "grad_norm": 0.1692638099193573, "learning_rate": 3.920360196023077e-06, "loss": 0.8445, "step": 149150 }, { "epoch": 1.079719429303568, "grad_norm": 0.1461089849472046, "learning_rate": 3.920287809362491e-06, "loss": 0.8389, "step": 149160 }, { "epoch": 1.0797918159641542, "grad_norm": 0.15460637211799622, "learning_rate": 3.9202154227019044e-06, "loss": 0.8514, "step": 149170 }, { "epoch": 1.0798642026247403, "grad_norm": 0.15934206545352936, "learning_rate": 3.920143036041318e-06, "loss": 0.8502, "step": 149180 }, { "epoch": 1.0799365892853265, "grad_norm": 0.17823490500450134, "learning_rate": 3.9200706493807325e-06, "loss": 0.8531, "step": 149190 }, { "epoch": 1.0800089759459126, "grad_norm": 0.15246717631816864, "learning_rate": 3.919998262720146e-06, "loss": 0.8649, "step": 149200 }, { "epoch": 1.080081362606499, "grad_norm": 0.1905028522014618, "learning_rate": 3.91992587605956e-06, "loss": 0.8662, "step": 149210 }, { "epoch": 1.080153749267085, "grad_norm": 0.16161444783210754, "learning_rate": 3.919853489398973e-06, "loss": 0.8635, "step": 149220 }, { "epoch": 1.0802261359276712, "grad_norm": 0.1495893895626068, "learning_rate": 3.919781102738388e-06, "loss": 0.8637, "step": 149230 }, { "epoch": 1.0802985225882575, "grad_norm": 0.15014596283435822, "learning_rate": 3.9197087160778015e-06, "loss": 0.8642, "step": 149240 }, { "epoch": 1.0803709092488436, "grad_norm": 0.13995687663555145, "learning_rate": 3.919636329417215e-06, "loss": 0.8641, "step": 149250 }, { "epoch": 1.08044329590943, "grad_norm": 0.15207460522651672, "learning_rate": 3.919563942756629e-06, "loss": 0.852, "step": 149260 }, { "epoch": 1.080515682570016, "grad_norm": 0.15077516436576843, "learning_rate": 3.919491556096043e-06, "loss": 0.8663, "step": 149270 }, { "epoch": 1.0805880692306022, "grad_norm": 0.15563450753688812, "learning_rate": 3.919419169435457e-06, "loss": 0.8547, "step": 149280 }, { "epoch": 1.0806604558911883, "grad_norm": 0.1566159725189209, "learning_rate": 3.91934678277487e-06, "loss": 0.8581, "step": 149290 }, { "epoch": 1.0807328425517746, "grad_norm": 0.1530294120311737, "learning_rate": 3.919274396114284e-06, "loss": 0.8684, "step": 149300 }, { "epoch": 1.0808052292123607, "grad_norm": 0.14563488960266113, "learning_rate": 3.9192020094536985e-06, "loss": 0.8668, "step": 149310 }, { "epoch": 1.080877615872947, "grad_norm": 0.36675575375556946, "learning_rate": 3.919129622793112e-06, "loss": 0.8571, "step": 149320 }, { "epoch": 1.080950002533533, "grad_norm": 0.17297199368476868, "learning_rate": 3.919057236132526e-06, "loss": 0.867, "step": 149330 }, { "epoch": 1.0810223891941193, "grad_norm": 0.1599206179380417, "learning_rate": 3.918984849471939e-06, "loss": 0.8633, "step": 149340 }, { "epoch": 1.0810947758547056, "grad_norm": 0.16135400533676147, "learning_rate": 3.918912462811354e-06, "loss": 0.8661, "step": 149350 }, { "epoch": 1.0811671625152917, "grad_norm": 0.16339783370494843, "learning_rate": 3.918840076150767e-06, "loss": 0.8668, "step": 149360 }, { "epoch": 1.081239549175878, "grad_norm": 0.14185838401317596, "learning_rate": 3.918767689490181e-06, "loss": 0.851, "step": 149370 }, { "epoch": 1.081311935836464, "grad_norm": 0.14981046319007874, "learning_rate": 3.918695302829595e-06, "loss": 0.862, "step": 149380 }, { "epoch": 1.0813843224970503, "grad_norm": 0.14530541002750397, "learning_rate": 3.918622916169009e-06, "loss": 0.8553, "step": 149390 }, { "epoch": 1.0814567091576364, "grad_norm": 0.14957144856452942, "learning_rate": 3.918550529508423e-06, "loss": 0.8599, "step": 149400 }, { "epoch": 1.0815290958182227, "grad_norm": 0.14457090198993683, "learning_rate": 3.918478142847836e-06, "loss": 0.8483, "step": 149410 }, { "epoch": 1.0816014824788087, "grad_norm": 0.1516246795654297, "learning_rate": 3.91840575618725e-06, "loss": 0.8654, "step": 149420 }, { "epoch": 1.081673869139395, "grad_norm": 0.15999293327331543, "learning_rate": 3.918333369526664e-06, "loss": 0.8624, "step": 149430 }, { "epoch": 1.081746255799981, "grad_norm": 0.16079509258270264, "learning_rate": 3.918260982866078e-06, "loss": 0.8486, "step": 149440 }, { "epoch": 1.0818186424605674, "grad_norm": 0.16731907427310944, "learning_rate": 3.918188596205492e-06, "loss": 0.8651, "step": 149450 }, { "epoch": 1.0818910291211536, "grad_norm": 0.16714203357696533, "learning_rate": 3.918116209544905e-06, "loss": 0.8646, "step": 149460 }, { "epoch": 1.0819634157817397, "grad_norm": 0.14884908497333527, "learning_rate": 3.918043822884319e-06, "loss": 0.8628, "step": 149470 }, { "epoch": 1.082035802442326, "grad_norm": 0.1482784003019333, "learning_rate": 3.917971436223733e-06, "loss": 0.86, "step": 149480 }, { "epoch": 1.082108189102912, "grad_norm": 0.17224667966365814, "learning_rate": 3.917899049563147e-06, "loss": 0.863, "step": 149490 }, { "epoch": 1.0821805757634984, "grad_norm": 0.15392819046974182, "learning_rate": 3.9178266629025606e-06, "loss": 0.8601, "step": 149500 }, { "epoch": 1.0822529624240844, "grad_norm": 0.15251944959163666, "learning_rate": 3.917754276241974e-06, "loss": 0.8585, "step": 149510 }, { "epoch": 1.0823253490846707, "grad_norm": 0.14979909360408783, "learning_rate": 3.917681889581389e-06, "loss": 0.8553, "step": 149520 }, { "epoch": 1.0823977357452568, "grad_norm": 0.1591353863477707, "learning_rate": 3.917609502920802e-06, "loss": 0.8604, "step": 149530 }, { "epoch": 1.082470122405843, "grad_norm": 0.1540420949459076, "learning_rate": 3.917537116260216e-06, "loss": 0.8674, "step": 149540 }, { "epoch": 1.0825425090664291, "grad_norm": 0.43315520882606506, "learning_rate": 3.9174647295996295e-06, "loss": 0.8565, "step": 149550 }, { "epoch": 1.0826148957270154, "grad_norm": 0.14986060559749603, "learning_rate": 3.917392342939044e-06, "loss": 0.8639, "step": 149560 }, { "epoch": 1.0826872823876017, "grad_norm": 0.1607617884874344, "learning_rate": 3.9173199562784576e-06, "loss": 0.8578, "step": 149570 }, { "epoch": 1.0827596690481878, "grad_norm": 0.1520787626504898, "learning_rate": 3.917247569617871e-06, "loss": 0.8506, "step": 149580 }, { "epoch": 1.082832055708774, "grad_norm": 0.15222980082035065, "learning_rate": 3.917175182957285e-06, "loss": 0.8564, "step": 149590 }, { "epoch": 1.0829044423693601, "grad_norm": 0.14734259247779846, "learning_rate": 3.917102796296699e-06, "loss": 0.8571, "step": 149600 }, { "epoch": 1.0829768290299464, "grad_norm": 0.15078797936439514, "learning_rate": 3.917030409636113e-06, "loss": 0.8483, "step": 149610 }, { "epoch": 1.0830492156905325, "grad_norm": 0.15337948501110077, "learning_rate": 3.9169580229755265e-06, "loss": 0.8463, "step": 149620 }, { "epoch": 1.0831216023511188, "grad_norm": 0.24916933476924896, "learning_rate": 3.91688563631494e-06, "loss": 0.8624, "step": 149630 }, { "epoch": 1.0831939890117048, "grad_norm": 0.15374572575092316, "learning_rate": 3.9168132496543546e-06, "loss": 0.8475, "step": 149640 }, { "epoch": 1.0832663756722911, "grad_norm": 0.17309990525245667, "learning_rate": 3.916740862993768e-06, "loss": 0.8458, "step": 149650 }, { "epoch": 1.0833387623328772, "grad_norm": 0.16714857518672943, "learning_rate": 3.916668476333181e-06, "loss": 0.8569, "step": 149660 }, { "epoch": 1.0834111489934635, "grad_norm": 0.15130509436130524, "learning_rate": 3.916596089672595e-06, "loss": 0.8534, "step": 149670 }, { "epoch": 1.0834835356540498, "grad_norm": 0.1503904163837433, "learning_rate": 3.916523703012009e-06, "loss": 0.8673, "step": 149680 }, { "epoch": 1.0835559223146358, "grad_norm": 0.15241362154483795, "learning_rate": 3.916451316351423e-06, "loss": 0.8575, "step": 149690 }, { "epoch": 1.083628308975222, "grad_norm": 0.15771149098873138, "learning_rate": 3.916378929690836e-06, "loss": 0.8571, "step": 149700 }, { "epoch": 1.0837006956358082, "grad_norm": 0.17620976269245148, "learning_rate": 3.916306543030251e-06, "loss": 0.8588, "step": 149710 }, { "epoch": 1.0837730822963945, "grad_norm": 0.15407635271549225, "learning_rate": 3.916234156369664e-06, "loss": 0.8585, "step": 149720 }, { "epoch": 1.0838454689569805, "grad_norm": 0.14716872572898865, "learning_rate": 3.916161769709078e-06, "loss": 0.8473, "step": 149730 }, { "epoch": 1.0839178556175668, "grad_norm": 0.14605097472667694, "learning_rate": 3.916089383048492e-06, "loss": 0.8604, "step": 149740 }, { "epoch": 1.0839902422781529, "grad_norm": 0.15472210943698883, "learning_rate": 3.916016996387906e-06, "loss": 0.8726, "step": 149750 }, { "epoch": 1.0840626289387392, "grad_norm": 0.15628187358379364, "learning_rate": 3.91594460972732e-06, "loss": 0.8521, "step": 149760 }, { "epoch": 1.0841350155993255, "grad_norm": 0.1554306596517563, "learning_rate": 3.915872223066733e-06, "loss": 0.8629, "step": 149770 }, { "epoch": 1.0842074022599115, "grad_norm": 0.14699378609657288, "learning_rate": 3.915799836406147e-06, "loss": 0.8479, "step": 149780 }, { "epoch": 1.0842797889204978, "grad_norm": 0.1461055874824524, "learning_rate": 3.915727449745561e-06, "loss": 0.8519, "step": 149790 }, { "epoch": 1.0843521755810839, "grad_norm": 0.1457691639661789, "learning_rate": 3.915655063084975e-06, "loss": 0.8723, "step": 149800 }, { "epoch": 1.0844245622416702, "grad_norm": 0.1460854858160019, "learning_rate": 3.915582676424389e-06, "loss": 0.8567, "step": 149810 }, { "epoch": 1.0844969489022562, "grad_norm": 0.14653198421001434, "learning_rate": 3.915510289763802e-06, "loss": 0.8545, "step": 149820 }, { "epoch": 1.0845693355628425, "grad_norm": 0.1542053073644638, "learning_rate": 3.915437903103217e-06, "loss": 0.8549, "step": 149830 }, { "epoch": 1.0846417222234286, "grad_norm": 0.1533014178276062, "learning_rate": 3.91536551644263e-06, "loss": 0.8684, "step": 149840 }, { "epoch": 1.0847141088840149, "grad_norm": 0.1554461568593979, "learning_rate": 3.915293129782044e-06, "loss": 0.8568, "step": 149850 }, { "epoch": 1.084786495544601, "grad_norm": 0.15294988453388214, "learning_rate": 3.9152207431214575e-06, "loss": 0.8692, "step": 149860 }, { "epoch": 1.0848588822051872, "grad_norm": 0.1588299423456192, "learning_rate": 3.915148356460872e-06, "loss": 0.8545, "step": 149870 }, { "epoch": 1.0849312688657735, "grad_norm": 0.14999845623970032, "learning_rate": 3.915075969800286e-06, "loss": 0.8722, "step": 149880 }, { "epoch": 1.0850036555263596, "grad_norm": 0.1531199961900711, "learning_rate": 3.915003583139699e-06, "loss": 0.8564, "step": 149890 }, { "epoch": 1.0850760421869459, "grad_norm": 0.1617061197757721, "learning_rate": 3.914931196479113e-06, "loss": 0.8435, "step": 149900 }, { "epoch": 1.085148428847532, "grad_norm": 0.1557346135377884, "learning_rate": 3.914858809818527e-06, "loss": 0.8582, "step": 149910 }, { "epoch": 1.0852208155081182, "grad_norm": 0.14734655618667603, "learning_rate": 3.914786423157941e-06, "loss": 0.8694, "step": 149920 }, { "epoch": 1.0852932021687043, "grad_norm": 0.1435697227716446, "learning_rate": 3.9147140364973545e-06, "loss": 0.8429, "step": 149930 }, { "epoch": 1.0853655888292906, "grad_norm": 0.15137584507465363, "learning_rate": 3.914641649836768e-06, "loss": 0.866, "step": 149940 }, { "epoch": 1.0854379754898766, "grad_norm": 0.1859600692987442, "learning_rate": 3.914569263176183e-06, "loss": 0.8518, "step": 149950 }, { "epoch": 1.085510362150463, "grad_norm": 0.15186361968517303, "learning_rate": 3.914496876515596e-06, "loss": 0.8694, "step": 149960 }, { "epoch": 1.0855827488110492, "grad_norm": 0.1560952514410019, "learning_rate": 3.91442448985501e-06, "loss": 0.8628, "step": 149970 }, { "epoch": 1.0856551354716353, "grad_norm": 0.14067547023296356, "learning_rate": 3.9143521031944234e-06, "loss": 0.8585, "step": 149980 }, { "epoch": 1.0857275221322216, "grad_norm": 0.15680783987045288, "learning_rate": 3.914279716533838e-06, "loss": 0.8498, "step": 149990 }, { "epoch": 1.0857999087928076, "grad_norm": 0.15567612648010254, "learning_rate": 3.9142073298732515e-06, "loss": 0.8562, "step": 150000 }, { "epoch": 1.085872295453394, "grad_norm": 0.1518632471561432, "learning_rate": 3.914134943212665e-06, "loss": 0.8667, "step": 150010 }, { "epoch": 1.08594468211398, "grad_norm": 0.15622593462467194, "learning_rate": 3.914062556552079e-06, "loss": 0.8498, "step": 150020 }, { "epoch": 1.0860170687745663, "grad_norm": 0.15988045930862427, "learning_rate": 3.913990169891493e-06, "loss": 0.8651, "step": 150030 }, { "epoch": 1.0860894554351523, "grad_norm": 0.15659677982330322, "learning_rate": 3.913917783230907e-06, "loss": 0.8574, "step": 150040 }, { "epoch": 1.0861618420957386, "grad_norm": 0.15888313949108124, "learning_rate": 3.9138453965703205e-06, "loss": 0.8573, "step": 150050 }, { "epoch": 1.0862342287563247, "grad_norm": 0.16083279252052307, "learning_rate": 3.913773009909734e-06, "loss": 0.8572, "step": 150060 }, { "epoch": 1.086306615416911, "grad_norm": 0.14704735577106476, "learning_rate": 3.9137006232491485e-06, "loss": 0.8524, "step": 150070 }, { "epoch": 1.0863790020774973, "grad_norm": 0.15133285522460938, "learning_rate": 3.913628236588562e-06, "loss": 0.8759, "step": 150080 }, { "epoch": 1.0864513887380833, "grad_norm": 0.14864712953567505, "learning_rate": 3.913555849927976e-06, "loss": 0.8561, "step": 150090 }, { "epoch": 1.0865237753986696, "grad_norm": 0.14330703020095825, "learning_rate": 3.913483463267389e-06, "loss": 0.8613, "step": 150100 }, { "epoch": 1.0865961620592557, "grad_norm": 0.1465221792459488, "learning_rate": 3.913411076606803e-06, "loss": 0.8581, "step": 150110 }, { "epoch": 1.086668548719842, "grad_norm": 0.14701513946056366, "learning_rate": 3.9133386899462175e-06, "loss": 0.8542, "step": 150120 }, { "epoch": 1.086740935380428, "grad_norm": 0.15257854759693146, "learning_rate": 3.913266303285631e-06, "loss": 0.868, "step": 150130 }, { "epoch": 1.0868133220410143, "grad_norm": 0.15649712085723877, "learning_rate": 3.913193916625045e-06, "loss": 0.8602, "step": 150140 }, { "epoch": 1.0868857087016004, "grad_norm": 0.14844316244125366, "learning_rate": 3.913121529964458e-06, "loss": 0.8488, "step": 150150 }, { "epoch": 1.0869580953621867, "grad_norm": 0.14990560710430145, "learning_rate": 3.913049143303873e-06, "loss": 0.8656, "step": 150160 }, { "epoch": 1.0870304820227727, "grad_norm": 0.17032568156719208, "learning_rate": 3.912976756643286e-06, "loss": 0.8592, "step": 150170 }, { "epoch": 1.087102868683359, "grad_norm": 0.16417841613292694, "learning_rate": 3.9129043699827e-06, "loss": 0.8502, "step": 150180 }, { "epoch": 1.0871752553439453, "grad_norm": 0.16040199995040894, "learning_rate": 3.912831983322114e-06, "loss": 0.8603, "step": 150190 }, { "epoch": 1.0872476420045314, "grad_norm": 0.15688760578632355, "learning_rate": 3.912759596661527e-06, "loss": 0.8449, "step": 150200 }, { "epoch": 1.0873200286651177, "grad_norm": 0.14727970957756042, "learning_rate": 3.912687210000941e-06, "loss": 0.8688, "step": 150210 }, { "epoch": 1.0873924153257037, "grad_norm": 0.1582842767238617, "learning_rate": 3.912614823340355e-06, "loss": 0.8636, "step": 150220 }, { "epoch": 1.08746480198629, "grad_norm": 0.15592309832572937, "learning_rate": 3.912542436679769e-06, "loss": 0.8656, "step": 150230 }, { "epoch": 1.087537188646876, "grad_norm": 0.1545068770647049, "learning_rate": 3.9124700500191826e-06, "loss": 0.8603, "step": 150240 }, { "epoch": 1.0876095753074624, "grad_norm": 0.15706755220890045, "learning_rate": 3.912397663358596e-06, "loss": 0.8549, "step": 150250 }, { "epoch": 1.0876819619680484, "grad_norm": 0.15659025311470032, "learning_rate": 3.912325276698011e-06, "loss": 0.8617, "step": 150260 }, { "epoch": 1.0877543486286347, "grad_norm": 0.18718166649341583, "learning_rate": 3.912252890037424e-06, "loss": 0.8653, "step": 150270 }, { "epoch": 1.0878267352892208, "grad_norm": 0.15322886407375336, "learning_rate": 3.912180503376838e-06, "loss": 0.8603, "step": 150280 }, { "epoch": 1.087899121949807, "grad_norm": 0.16585904359817505, "learning_rate": 3.9121081167162515e-06, "loss": 0.8705, "step": 150290 }, { "epoch": 1.0879715086103934, "grad_norm": 0.13771013915538788, "learning_rate": 3.912035730055665e-06, "loss": 0.8482, "step": 150300 }, { "epoch": 1.0880438952709794, "grad_norm": 0.16550597548484802, "learning_rate": 3.9119633433950796e-06, "loss": 0.8535, "step": 150310 }, { "epoch": 1.0881162819315657, "grad_norm": 0.1450309306383133, "learning_rate": 3.911890956734493e-06, "loss": 0.8588, "step": 150320 }, { "epoch": 1.0881886685921518, "grad_norm": 0.15524572134017944, "learning_rate": 3.911818570073907e-06, "loss": 0.8563, "step": 150330 }, { "epoch": 1.088261055252738, "grad_norm": 0.1510109007358551, "learning_rate": 3.91174618341332e-06, "loss": 0.8731, "step": 150340 }, { "epoch": 1.0883334419133242, "grad_norm": 0.15307964384555817, "learning_rate": 3.911673796752735e-06, "loss": 0.864, "step": 150350 }, { "epoch": 1.0884058285739104, "grad_norm": 0.15317986905574799, "learning_rate": 3.9116014100921485e-06, "loss": 0.8565, "step": 150360 }, { "epoch": 1.0884782152344965, "grad_norm": 0.15566273033618927, "learning_rate": 3.911529023431562e-06, "loss": 0.8512, "step": 150370 }, { "epoch": 1.0885506018950828, "grad_norm": 0.14965330064296722, "learning_rate": 3.911456636770976e-06, "loss": 0.8651, "step": 150380 }, { "epoch": 1.0886229885556689, "grad_norm": 0.1521342396736145, "learning_rate": 3.91138425011039e-06, "loss": 0.8568, "step": 150390 }, { "epoch": 1.0886953752162551, "grad_norm": 0.1588059663772583, "learning_rate": 3.911311863449804e-06, "loss": 0.8595, "step": 150400 }, { "epoch": 1.0887677618768414, "grad_norm": 0.145501509308815, "learning_rate": 3.911239476789217e-06, "loss": 0.8497, "step": 150410 }, { "epoch": 1.0888401485374275, "grad_norm": 0.14711380004882812, "learning_rate": 3.911167090128631e-06, "loss": 0.849, "step": 150420 }, { "epoch": 1.0889125351980138, "grad_norm": 0.1537601202726364, "learning_rate": 3.9110947034680455e-06, "loss": 0.8739, "step": 150430 }, { "epoch": 1.0889849218585999, "grad_norm": 0.1484655886888504, "learning_rate": 3.911022316807459e-06, "loss": 0.8626, "step": 150440 }, { "epoch": 1.0890573085191861, "grad_norm": 0.14920607209205627, "learning_rate": 3.910949930146873e-06, "loss": 0.8466, "step": 150450 }, { "epoch": 1.0891296951797722, "grad_norm": 0.16342855989933014, "learning_rate": 3.910877543486286e-06, "loss": 0.858, "step": 150460 }, { "epoch": 1.0892020818403585, "grad_norm": 0.15427295863628387, "learning_rate": 3.910805156825701e-06, "loss": 0.859, "step": 150470 }, { "epoch": 1.0892744685009446, "grad_norm": 0.15797512233257294, "learning_rate": 3.9107327701651144e-06, "loss": 0.8466, "step": 150480 }, { "epoch": 1.0893468551615308, "grad_norm": 0.15120099484920502, "learning_rate": 3.910660383504528e-06, "loss": 0.8429, "step": 150490 }, { "epoch": 1.089419241822117, "grad_norm": 0.16131120920181274, "learning_rate": 3.910587996843942e-06, "loss": 0.8562, "step": 150500 }, { "epoch": 1.0894916284827032, "grad_norm": 0.15284837782382965, "learning_rate": 3.910515610183356e-06, "loss": 0.8584, "step": 150510 }, { "epoch": 1.0895640151432895, "grad_norm": 0.1720409095287323, "learning_rate": 3.91044322352277e-06, "loss": 0.8637, "step": 150520 }, { "epoch": 1.0896364018038756, "grad_norm": 0.16043171286582947, "learning_rate": 3.910370836862183e-06, "loss": 0.8538, "step": 150530 }, { "epoch": 1.0897087884644618, "grad_norm": 0.16862253844738007, "learning_rate": 3.910298450201597e-06, "loss": 0.8509, "step": 150540 }, { "epoch": 1.089781175125048, "grad_norm": 0.1581002175807953, "learning_rate": 3.9102260635410114e-06, "loss": 0.851, "step": 150550 }, { "epoch": 1.0898535617856342, "grad_norm": 0.15931819379329681, "learning_rate": 3.910153676880425e-06, "loss": 0.8528, "step": 150560 }, { "epoch": 1.0899259484462203, "grad_norm": 0.1453382819890976, "learning_rate": 3.910081290219839e-06, "loss": 0.862, "step": 150570 }, { "epoch": 1.0899983351068065, "grad_norm": 0.14687404036521912, "learning_rate": 3.910008903559252e-06, "loss": 0.8474, "step": 150580 }, { "epoch": 1.0900707217673926, "grad_norm": 0.17053967714309692, "learning_rate": 3.909936516898667e-06, "loss": 0.8553, "step": 150590 }, { "epoch": 1.090143108427979, "grad_norm": 0.1455235779285431, "learning_rate": 3.90986413023808e-06, "loss": 0.8503, "step": 150600 }, { "epoch": 1.090215495088565, "grad_norm": 0.15172958374023438, "learning_rate": 3.909791743577494e-06, "loss": 0.8488, "step": 150610 }, { "epoch": 1.0902878817491513, "grad_norm": 0.15800823271274567, "learning_rate": 3.909719356916908e-06, "loss": 0.8566, "step": 150620 }, { "epoch": 1.0903602684097375, "grad_norm": 0.13901235163211823, "learning_rate": 3.909646970256322e-06, "loss": 0.8373, "step": 150630 }, { "epoch": 1.0904326550703236, "grad_norm": 0.1460275948047638, "learning_rate": 3.909574583595736e-06, "loss": 0.8577, "step": 150640 }, { "epoch": 1.09050504173091, "grad_norm": 0.1747177392244339, "learning_rate": 3.909502196935149e-06, "loss": 0.8456, "step": 150650 }, { "epoch": 1.090577428391496, "grad_norm": 0.1587553173303604, "learning_rate": 3.909429810274563e-06, "loss": 0.8445, "step": 150660 }, { "epoch": 1.0906498150520822, "grad_norm": 0.1453934758901596, "learning_rate": 3.909357423613977e-06, "loss": 0.8659, "step": 150670 }, { "epoch": 1.0907222017126683, "grad_norm": 0.14467094838619232, "learning_rate": 3.909285036953391e-06, "loss": 0.8556, "step": 150680 }, { "epoch": 1.0907945883732546, "grad_norm": 0.16606931388378143, "learning_rate": 3.909212650292805e-06, "loss": 0.8616, "step": 150690 }, { "epoch": 1.0908669750338407, "grad_norm": 0.1615527719259262, "learning_rate": 3.909140263632218e-06, "loss": 0.866, "step": 150700 }, { "epoch": 1.090939361694427, "grad_norm": 0.15768449008464813, "learning_rate": 3.909067876971633e-06, "loss": 0.8517, "step": 150710 }, { "epoch": 1.091011748355013, "grad_norm": 0.15066152811050415, "learning_rate": 3.9089954903110454e-06, "loss": 0.8518, "step": 150720 }, { "epoch": 1.0910841350155993, "grad_norm": 0.1462964415550232, "learning_rate": 3.908923103650459e-06, "loss": 0.8543, "step": 150730 }, { "epoch": 1.0911565216761856, "grad_norm": 0.16555063426494598, "learning_rate": 3.9088507169898735e-06, "loss": 0.8591, "step": 150740 }, { "epoch": 1.0912289083367717, "grad_norm": 0.14814066886901855, "learning_rate": 3.908778330329287e-06, "loss": 0.856, "step": 150750 }, { "epoch": 1.091301294997358, "grad_norm": 0.16804130375385284, "learning_rate": 3.908705943668701e-06, "loss": 0.8561, "step": 150760 }, { "epoch": 1.091373681657944, "grad_norm": 0.14669744670391083, "learning_rate": 3.908633557008114e-06, "loss": 0.8556, "step": 150770 }, { "epoch": 1.0914460683185303, "grad_norm": 0.16342781484127045, "learning_rate": 3.908561170347529e-06, "loss": 0.8664, "step": 150780 }, { "epoch": 1.0915184549791164, "grad_norm": 0.16906608641147614, "learning_rate": 3.9084887836869425e-06, "loss": 0.8687, "step": 150790 }, { "epoch": 1.0915908416397027, "grad_norm": 0.15913209319114685, "learning_rate": 3.908416397026356e-06, "loss": 0.8517, "step": 150800 }, { "epoch": 1.0916632283002887, "grad_norm": 0.15466493368148804, "learning_rate": 3.90834401036577e-06, "loss": 0.8546, "step": 150810 }, { "epoch": 1.091735614960875, "grad_norm": 0.19748826324939728, "learning_rate": 3.908271623705184e-06, "loss": 0.866, "step": 150820 }, { "epoch": 1.0918080016214613, "grad_norm": 0.15401212871074677, "learning_rate": 3.908199237044598e-06, "loss": 0.8585, "step": 150830 }, { "epoch": 1.0918803882820474, "grad_norm": 0.1531715989112854, "learning_rate": 3.908126850384011e-06, "loss": 0.8631, "step": 150840 }, { "epoch": 1.0919527749426337, "grad_norm": 0.14852984249591827, "learning_rate": 3.908054463723425e-06, "loss": 0.8497, "step": 150850 }, { "epoch": 1.0920251616032197, "grad_norm": 0.1562282145023346, "learning_rate": 3.9079820770628395e-06, "loss": 0.8547, "step": 150860 }, { "epoch": 1.092097548263806, "grad_norm": 0.14857418835163116, "learning_rate": 3.907909690402253e-06, "loss": 0.8633, "step": 150870 }, { "epoch": 1.092169934924392, "grad_norm": 0.1435396671295166, "learning_rate": 3.907837303741667e-06, "loss": 0.8574, "step": 150880 }, { "epoch": 1.0922423215849784, "grad_norm": 0.1506817787885666, "learning_rate": 3.90776491708108e-06, "loss": 0.8692, "step": 150890 }, { "epoch": 1.0923147082455644, "grad_norm": 0.1591109335422516, "learning_rate": 3.907692530420494e-06, "loss": 0.8619, "step": 150900 }, { "epoch": 1.0923870949061507, "grad_norm": 0.16759736835956573, "learning_rate": 3.907620143759908e-06, "loss": 0.8634, "step": 150910 }, { "epoch": 1.0924594815667368, "grad_norm": 0.15088944137096405, "learning_rate": 3.907547757099322e-06, "loss": 0.8632, "step": 150920 }, { "epoch": 1.092531868227323, "grad_norm": 0.16850528120994568, "learning_rate": 3.907475370438736e-06, "loss": 0.8562, "step": 150930 }, { "epoch": 1.0926042548879094, "grad_norm": 0.15842682123184204, "learning_rate": 3.907402983778149e-06, "loss": 0.8653, "step": 150940 }, { "epoch": 1.0926766415484954, "grad_norm": 0.30550140142440796, "learning_rate": 3.907330597117564e-06, "loss": 0.852, "step": 150950 }, { "epoch": 1.0927490282090817, "grad_norm": 0.139174684882164, "learning_rate": 3.907258210456977e-06, "loss": 0.8543, "step": 150960 }, { "epoch": 1.0928214148696678, "grad_norm": 0.15121084451675415, "learning_rate": 3.907185823796391e-06, "loss": 0.8517, "step": 150970 }, { "epoch": 1.092893801530254, "grad_norm": 0.1478102058172226, "learning_rate": 3.9071134371358046e-06, "loss": 0.8651, "step": 150980 }, { "epoch": 1.0929661881908401, "grad_norm": 0.1502637416124344, "learning_rate": 3.907041050475219e-06, "loss": 0.8538, "step": 150990 }, { "epoch": 1.0930385748514264, "grad_norm": 0.14458067715168, "learning_rate": 3.906968663814633e-06, "loss": 0.8546, "step": 151000 }, { "epoch": 1.0931109615120125, "grad_norm": 0.14992700517177582, "learning_rate": 3.906896277154046e-06, "loss": 0.8584, "step": 151010 }, { "epoch": 1.0931833481725988, "grad_norm": 0.15242300927639008, "learning_rate": 3.90682389049346e-06, "loss": 0.8674, "step": 151020 }, { "epoch": 1.093255734833185, "grad_norm": 0.14612126350402832, "learning_rate": 3.906751503832874e-06, "loss": 0.8515, "step": 151030 }, { "epoch": 1.0933281214937711, "grad_norm": 0.1628275364637375, "learning_rate": 3.906679117172288e-06, "loss": 0.8642, "step": 151040 }, { "epoch": 1.0934005081543574, "grad_norm": 0.1532110571861267, "learning_rate": 3.9066067305117016e-06, "loss": 0.8566, "step": 151050 }, { "epoch": 1.0934728948149435, "grad_norm": 0.15354089438915253, "learning_rate": 3.906534343851115e-06, "loss": 0.8534, "step": 151060 }, { "epoch": 1.0935452814755298, "grad_norm": 0.14480741322040558, "learning_rate": 3.90646195719053e-06, "loss": 0.8601, "step": 151070 }, { "epoch": 1.0936176681361158, "grad_norm": 0.15542499721050262, "learning_rate": 3.906389570529943e-06, "loss": 0.8666, "step": 151080 }, { "epoch": 1.0936900547967021, "grad_norm": 0.1690310835838318, "learning_rate": 3.906317183869357e-06, "loss": 0.8571, "step": 151090 }, { "epoch": 1.0937624414572882, "grad_norm": 0.15053977072238922, "learning_rate": 3.9062447972087705e-06, "loss": 0.8588, "step": 151100 }, { "epoch": 1.0938348281178745, "grad_norm": 0.15322209894657135, "learning_rate": 3.906172410548185e-06, "loss": 0.8564, "step": 151110 }, { "epoch": 1.0939072147784605, "grad_norm": 0.15235215425491333, "learning_rate": 3.9061000238875986e-06, "loss": 0.8649, "step": 151120 }, { "epoch": 1.0939796014390468, "grad_norm": 0.15845470130443573, "learning_rate": 3.906027637227012e-06, "loss": 0.8585, "step": 151130 }, { "epoch": 1.094051988099633, "grad_norm": 0.1422303318977356, "learning_rate": 3.905955250566426e-06, "loss": 0.8602, "step": 151140 }, { "epoch": 1.0941243747602192, "grad_norm": 0.16013941168785095, "learning_rate": 3.90588286390584e-06, "loss": 0.8649, "step": 151150 }, { "epoch": 1.0941967614208055, "grad_norm": 0.16993825137615204, "learning_rate": 3.905810477245254e-06, "loss": 0.8522, "step": 151160 }, { "epoch": 1.0942691480813915, "grad_norm": 0.16168084740638733, "learning_rate": 3.9057380905846675e-06, "loss": 0.8599, "step": 151170 }, { "epoch": 1.0943415347419778, "grad_norm": 0.15797677636146545, "learning_rate": 3.905665703924081e-06, "loss": 0.8621, "step": 151180 }, { "epoch": 1.0944139214025639, "grad_norm": 0.15897414088249207, "learning_rate": 3.9055933172634956e-06, "loss": 0.8508, "step": 151190 }, { "epoch": 1.0944863080631502, "grad_norm": 0.16979162395000458, "learning_rate": 3.905520930602909e-06, "loss": 0.8518, "step": 151200 }, { "epoch": 1.0945586947237362, "grad_norm": 0.1656196266412735, "learning_rate": 3.905448543942323e-06, "loss": 0.8492, "step": 151210 }, { "epoch": 1.0946310813843225, "grad_norm": 0.15596655011177063, "learning_rate": 3.905376157281736e-06, "loss": 0.8483, "step": 151220 }, { "epoch": 1.0947034680449086, "grad_norm": 0.15505443513393402, "learning_rate": 3.905303770621151e-06, "loss": 0.8654, "step": 151230 }, { "epoch": 1.0947758547054949, "grad_norm": 0.1517280638217926, "learning_rate": 3.9052313839605645e-06, "loss": 0.8622, "step": 151240 }, { "epoch": 1.0948482413660812, "grad_norm": 0.1682533472776413, "learning_rate": 3.905158997299977e-06, "loss": 0.8528, "step": 151250 }, { "epoch": 1.0949206280266672, "grad_norm": 0.1618666797876358, "learning_rate": 3.905086610639392e-06, "loss": 0.8599, "step": 151260 }, { "epoch": 1.0949930146872535, "grad_norm": 0.15311026573181152, "learning_rate": 3.905014223978805e-06, "loss": 0.8581, "step": 151270 }, { "epoch": 1.0950654013478396, "grad_norm": 0.18201227486133575, "learning_rate": 3.904941837318219e-06, "loss": 0.8654, "step": 151280 }, { "epoch": 1.0951377880084259, "grad_norm": 0.1799287348985672, "learning_rate": 3.904869450657633e-06, "loss": 0.8493, "step": 151290 }, { "epoch": 1.095210174669012, "grad_norm": 0.13958740234375, "learning_rate": 3.904797063997047e-06, "loss": 0.8565, "step": 151300 }, { "epoch": 1.0952825613295982, "grad_norm": 0.1535644382238388, "learning_rate": 3.904724677336461e-06, "loss": 0.8495, "step": 151310 }, { "epoch": 1.0953549479901843, "grad_norm": 0.14460021257400513, "learning_rate": 3.904652290675874e-06, "loss": 0.8723, "step": 151320 }, { "epoch": 1.0954273346507706, "grad_norm": 0.16038860380649567, "learning_rate": 3.904579904015288e-06, "loss": 0.8534, "step": 151330 }, { "epoch": 1.0954997213113566, "grad_norm": 0.17048697173595428, "learning_rate": 3.904507517354702e-06, "loss": 0.8529, "step": 151340 }, { "epoch": 1.095572107971943, "grad_norm": 0.15420754253864288, "learning_rate": 3.904435130694116e-06, "loss": 0.8602, "step": 151350 }, { "epoch": 1.0956444946325292, "grad_norm": 0.14572323858737946, "learning_rate": 3.90436274403353e-06, "loss": 0.8527, "step": 151360 }, { "epoch": 1.0957168812931153, "grad_norm": 0.16259993612766266, "learning_rate": 3.904290357372943e-06, "loss": 0.8697, "step": 151370 }, { "epoch": 1.0957892679537016, "grad_norm": 0.15503832697868347, "learning_rate": 3.904217970712358e-06, "loss": 0.862, "step": 151380 }, { "epoch": 1.0958616546142876, "grad_norm": 0.1643747240304947, "learning_rate": 3.904145584051771e-06, "loss": 0.8619, "step": 151390 }, { "epoch": 1.095934041274874, "grad_norm": 0.1568668931722641, "learning_rate": 3.904073197391185e-06, "loss": 0.8584, "step": 151400 }, { "epoch": 1.09600642793546, "grad_norm": 0.1458984911441803, "learning_rate": 3.9040008107305985e-06, "loss": 0.8597, "step": 151410 }, { "epoch": 1.0960788145960463, "grad_norm": 0.15451470017433167, "learning_rate": 3.903928424070013e-06, "loss": 0.8573, "step": 151420 }, { "epoch": 1.0961512012566323, "grad_norm": 0.15595468878746033, "learning_rate": 3.903856037409427e-06, "loss": 0.8785, "step": 151430 }, { "epoch": 1.0962235879172186, "grad_norm": 0.15538018941879272, "learning_rate": 3.90378365074884e-06, "loss": 0.8642, "step": 151440 }, { "epoch": 1.0962959745778047, "grad_norm": 0.15324239432811737, "learning_rate": 3.903711264088254e-06, "loss": 0.8584, "step": 151450 }, { "epoch": 1.096368361238391, "grad_norm": 0.14837108552455902, "learning_rate": 3.903638877427668e-06, "loss": 0.8654, "step": 151460 }, { "epoch": 1.0964407478989773, "grad_norm": 0.1704886555671692, "learning_rate": 3.903566490767082e-06, "loss": 0.8615, "step": 151470 }, { "epoch": 1.0965131345595633, "grad_norm": 0.16142944991588593, "learning_rate": 3.9034941041064955e-06, "loss": 0.8701, "step": 151480 }, { "epoch": 1.0965855212201496, "grad_norm": 0.16319426894187927, "learning_rate": 3.903421717445909e-06, "loss": 0.8603, "step": 151490 }, { "epoch": 1.0966579078807357, "grad_norm": 0.18104782700538635, "learning_rate": 3.903349330785324e-06, "loss": 0.8598, "step": 151500 }, { "epoch": 1.096730294541322, "grad_norm": 0.15671753883361816, "learning_rate": 3.903276944124737e-06, "loss": 0.8621, "step": 151510 }, { "epoch": 1.096802681201908, "grad_norm": 0.1673654317855835, "learning_rate": 3.903204557464151e-06, "loss": 0.8553, "step": 151520 }, { "epoch": 1.0968750678624943, "grad_norm": 0.14993630349636078, "learning_rate": 3.9031321708035645e-06, "loss": 0.846, "step": 151530 }, { "epoch": 1.0969474545230804, "grad_norm": 0.16005301475524902, "learning_rate": 3.903059784142978e-06, "loss": 0.8595, "step": 151540 }, { "epoch": 1.0970198411836667, "grad_norm": 0.1531304568052292, "learning_rate": 3.9029873974823925e-06, "loss": 0.8566, "step": 151550 }, { "epoch": 1.0970922278442528, "grad_norm": 0.15316185355186462, "learning_rate": 3.902915010821806e-06, "loss": 0.848, "step": 151560 }, { "epoch": 1.097164614504839, "grad_norm": 0.16039323806762695, "learning_rate": 3.90284262416122e-06, "loss": 0.8562, "step": 151570 }, { "epoch": 1.0972370011654253, "grad_norm": 0.14785118401050568, "learning_rate": 3.902770237500633e-06, "loss": 0.8508, "step": 151580 }, { "epoch": 1.0973093878260114, "grad_norm": 0.16044607758522034, "learning_rate": 3.902697850840048e-06, "loss": 0.8569, "step": 151590 }, { "epoch": 1.0973817744865977, "grad_norm": 0.16360950469970703, "learning_rate": 3.9026254641794615e-06, "loss": 0.8555, "step": 151600 }, { "epoch": 1.0974541611471837, "grad_norm": 0.15035872161388397, "learning_rate": 3.902553077518875e-06, "loss": 0.8524, "step": 151610 }, { "epoch": 1.09752654780777, "grad_norm": 0.19104793667793274, "learning_rate": 3.902480690858289e-06, "loss": 0.8468, "step": 151620 }, { "epoch": 1.097598934468356, "grad_norm": 0.1884557455778122, "learning_rate": 3.902408304197703e-06, "loss": 0.8692, "step": 151630 }, { "epoch": 1.0976713211289424, "grad_norm": 0.15501312911510468, "learning_rate": 3.902335917537117e-06, "loss": 0.8693, "step": 151640 }, { "epoch": 1.0977437077895285, "grad_norm": 0.1506832242012024, "learning_rate": 3.90226353087653e-06, "loss": 0.8568, "step": 151650 }, { "epoch": 1.0978160944501147, "grad_norm": 0.1559952199459076, "learning_rate": 3.902191144215944e-06, "loss": 0.8556, "step": 151660 }, { "epoch": 1.0978884811107008, "grad_norm": 0.17316295206546783, "learning_rate": 3.9021187575553585e-06, "loss": 0.8683, "step": 151670 }, { "epoch": 1.097960867771287, "grad_norm": 0.1504284292459488, "learning_rate": 3.902046370894772e-06, "loss": 0.8607, "step": 151680 }, { "epoch": 1.0980332544318734, "grad_norm": 0.15068010985851288, "learning_rate": 3.901973984234186e-06, "loss": 0.849, "step": 151690 }, { "epoch": 1.0981056410924595, "grad_norm": 0.14526309072971344, "learning_rate": 3.901901597573599e-06, "loss": 0.8576, "step": 151700 }, { "epoch": 1.0981780277530457, "grad_norm": 0.1651238054037094, "learning_rate": 3.901829210913014e-06, "loss": 0.8712, "step": 151710 }, { "epoch": 1.0982504144136318, "grad_norm": 0.16406778991222382, "learning_rate": 3.901756824252427e-06, "loss": 0.8533, "step": 151720 }, { "epoch": 1.098322801074218, "grad_norm": 0.15506918728351593, "learning_rate": 3.901684437591841e-06, "loss": 0.8644, "step": 151730 }, { "epoch": 1.0983951877348042, "grad_norm": 0.1499270647764206, "learning_rate": 3.901612050931255e-06, "loss": 0.8435, "step": 151740 }, { "epoch": 1.0984675743953904, "grad_norm": 0.15782611072063446, "learning_rate": 3.901539664270669e-06, "loss": 0.8602, "step": 151750 }, { "epoch": 1.0985399610559765, "grad_norm": 0.14612479507923126, "learning_rate": 3.901467277610083e-06, "loss": 0.8569, "step": 151760 }, { "epoch": 1.0986123477165628, "grad_norm": 0.15989379584789276, "learning_rate": 3.901394890949496e-06, "loss": 0.8473, "step": 151770 }, { "epoch": 1.0986847343771489, "grad_norm": 0.14152558147907257, "learning_rate": 3.90132250428891e-06, "loss": 0.8588, "step": 151780 }, { "epoch": 1.0987571210377352, "grad_norm": 0.14922699332237244, "learning_rate": 3.9012501176283236e-06, "loss": 0.8483, "step": 151790 }, { "epoch": 1.0988295076983214, "grad_norm": 0.15178348124027252, "learning_rate": 3.901177730967737e-06, "loss": 0.8548, "step": 151800 }, { "epoch": 1.0989018943589075, "grad_norm": 0.1589786410331726, "learning_rate": 3.901105344307151e-06, "loss": 0.8626, "step": 151810 }, { "epoch": 1.0989742810194938, "grad_norm": 0.15005895495414734, "learning_rate": 3.901032957646565e-06, "loss": 0.8627, "step": 151820 }, { "epoch": 1.0990466676800799, "grad_norm": 0.14310167729854584, "learning_rate": 3.900960570985979e-06, "loss": 0.8621, "step": 151830 }, { "epoch": 1.0991190543406661, "grad_norm": 0.1532042920589447, "learning_rate": 3.9008881843253925e-06, "loss": 0.8571, "step": 151840 }, { "epoch": 1.0991914410012522, "grad_norm": 0.15156692266464233, "learning_rate": 3.900815797664806e-06, "loss": 0.856, "step": 151850 }, { "epoch": 1.0992638276618385, "grad_norm": 0.17657136917114258, "learning_rate": 3.9007434110042206e-06, "loss": 0.8521, "step": 151860 }, { "epoch": 1.0993362143224246, "grad_norm": 0.17290574312210083, "learning_rate": 3.900671024343634e-06, "loss": 0.8681, "step": 151870 }, { "epoch": 1.0994086009830109, "grad_norm": 0.14646093547344208, "learning_rate": 3.900598637683048e-06, "loss": 0.8464, "step": 151880 }, { "epoch": 1.0994809876435971, "grad_norm": 0.14447811245918274, "learning_rate": 3.900526251022461e-06, "loss": 0.8395, "step": 151890 }, { "epoch": 1.0995533743041832, "grad_norm": 0.16864459216594696, "learning_rate": 3.900453864361876e-06, "loss": 0.8542, "step": 151900 }, { "epoch": 1.0996257609647695, "grad_norm": 0.1442280113697052, "learning_rate": 3.9003814777012895e-06, "loss": 0.853, "step": 151910 }, { "epoch": 1.0996981476253556, "grad_norm": 0.15601858496665955, "learning_rate": 3.900309091040703e-06, "loss": 0.8549, "step": 151920 }, { "epoch": 1.0997705342859418, "grad_norm": 0.14018070697784424, "learning_rate": 3.900236704380117e-06, "loss": 0.8535, "step": 151930 }, { "epoch": 1.099842920946528, "grad_norm": 0.14601945877075195, "learning_rate": 3.900164317719531e-06, "loss": 0.8679, "step": 151940 }, { "epoch": 1.0999153076071142, "grad_norm": 0.1592642217874527, "learning_rate": 3.900091931058945e-06, "loss": 0.8574, "step": 151950 }, { "epoch": 1.0999876942677003, "grad_norm": 0.1683703064918518, "learning_rate": 3.900019544398358e-06, "loss": 0.8592, "step": 151960 }, { "epoch": 1.1000600809282866, "grad_norm": 0.15897010266780853, "learning_rate": 3.899947157737772e-06, "loss": 0.8487, "step": 151970 }, { "epoch": 1.1001324675888726, "grad_norm": 0.14617374539375305, "learning_rate": 3.8998747710771865e-06, "loss": 0.8556, "step": 151980 }, { "epoch": 1.100204854249459, "grad_norm": 0.157510906457901, "learning_rate": 3.8998023844166e-06, "loss": 0.857, "step": 151990 }, { "epoch": 1.1002772409100452, "grad_norm": 0.1601264476776123, "learning_rate": 3.899729997756014e-06, "loss": 0.8583, "step": 152000 }, { "epoch": 1.1003496275706313, "grad_norm": 0.16230392456054688, "learning_rate": 3.899657611095427e-06, "loss": 0.8503, "step": 152010 }, { "epoch": 1.1004220142312175, "grad_norm": 0.14697298407554626, "learning_rate": 3.899585224434842e-06, "loss": 0.8452, "step": 152020 }, { "epoch": 1.1004944008918036, "grad_norm": 0.1654907613992691, "learning_rate": 3.8995128377742554e-06, "loss": 0.8531, "step": 152030 }, { "epoch": 1.10056678755239, "grad_norm": 0.1564827561378479, "learning_rate": 3.899440451113669e-06, "loss": 0.8554, "step": 152040 }, { "epoch": 1.100639174212976, "grad_norm": 0.14684629440307617, "learning_rate": 3.899368064453083e-06, "loss": 0.8562, "step": 152050 }, { "epoch": 1.1007115608735623, "grad_norm": 0.15104876458644867, "learning_rate": 3.899295677792497e-06, "loss": 0.8607, "step": 152060 }, { "epoch": 1.1007839475341483, "grad_norm": 0.15481539070606232, "learning_rate": 3.899223291131911e-06, "loss": 0.8587, "step": 152070 }, { "epoch": 1.1008563341947346, "grad_norm": 0.15481428802013397, "learning_rate": 3.899150904471324e-06, "loss": 0.8526, "step": 152080 }, { "epoch": 1.100928720855321, "grad_norm": 0.1640326827764511, "learning_rate": 3.899078517810738e-06, "loss": 0.8632, "step": 152090 }, { "epoch": 1.101001107515907, "grad_norm": 0.1505155861377716, "learning_rate": 3.8990061311501524e-06, "loss": 0.8439, "step": 152100 }, { "epoch": 1.1010734941764933, "grad_norm": 0.1723697930574417, "learning_rate": 3.898933744489566e-06, "loss": 0.8604, "step": 152110 }, { "epoch": 1.1011458808370793, "grad_norm": 0.16168347001075745, "learning_rate": 3.89886135782898e-06, "loss": 0.8418, "step": 152120 }, { "epoch": 1.1012182674976656, "grad_norm": 1.0787012577056885, "learning_rate": 3.898788971168393e-06, "loss": 0.8555, "step": 152130 }, { "epoch": 1.1012906541582517, "grad_norm": 0.15610916912555695, "learning_rate": 3.898716584507807e-06, "loss": 0.8459, "step": 152140 }, { "epoch": 1.101363040818838, "grad_norm": 0.1682717204093933, "learning_rate": 3.898644197847221e-06, "loss": 0.8496, "step": 152150 }, { "epoch": 1.101435427479424, "grad_norm": 0.34548187255859375, "learning_rate": 3.898571811186635e-06, "loss": 0.8519, "step": 152160 }, { "epoch": 1.1015078141400103, "grad_norm": 0.15088237822055817, "learning_rate": 3.898499424526049e-06, "loss": 0.8588, "step": 152170 }, { "epoch": 1.1015802008005964, "grad_norm": 0.13609455525875092, "learning_rate": 3.898427037865462e-06, "loss": 0.847, "step": 152180 }, { "epoch": 1.1016525874611827, "grad_norm": 0.14786964654922485, "learning_rate": 3.898354651204877e-06, "loss": 0.8687, "step": 152190 }, { "epoch": 1.101724974121769, "grad_norm": 0.14596401154994965, "learning_rate": 3.89828226454429e-06, "loss": 0.8543, "step": 152200 }, { "epoch": 1.101797360782355, "grad_norm": 0.1763083040714264, "learning_rate": 3.898209877883704e-06, "loss": 0.8499, "step": 152210 }, { "epoch": 1.1018697474429413, "grad_norm": 0.19486874341964722, "learning_rate": 3.8981374912231175e-06, "loss": 0.857, "step": 152220 }, { "epoch": 1.1019421341035274, "grad_norm": 0.15471166372299194, "learning_rate": 3.898065104562532e-06, "loss": 0.8454, "step": 152230 }, { "epoch": 1.1020145207641137, "grad_norm": 0.14786270260810852, "learning_rate": 3.897992717901946e-06, "loss": 0.8673, "step": 152240 }, { "epoch": 1.1020869074246997, "grad_norm": 0.14872819185256958, "learning_rate": 3.897920331241359e-06, "loss": 0.8543, "step": 152250 }, { "epoch": 1.102159294085286, "grad_norm": 0.1608414351940155, "learning_rate": 3.897847944580773e-06, "loss": 0.8618, "step": 152260 }, { "epoch": 1.102231680745872, "grad_norm": 0.1444377303123474, "learning_rate": 3.897775557920187e-06, "loss": 0.8486, "step": 152270 }, { "epoch": 1.1023040674064584, "grad_norm": 0.16134855151176453, "learning_rate": 3.897703171259601e-06, "loss": 0.8642, "step": 152280 }, { "epoch": 1.1023764540670444, "grad_norm": 0.1678602695465088, "learning_rate": 3.8976307845990145e-06, "loss": 0.8583, "step": 152290 }, { "epoch": 1.1024488407276307, "grad_norm": 0.15565115213394165, "learning_rate": 3.897558397938428e-06, "loss": 0.8503, "step": 152300 }, { "epoch": 1.102521227388217, "grad_norm": 0.16068263351917267, "learning_rate": 3.897486011277842e-06, "loss": 0.8734, "step": 152310 }, { "epoch": 1.102593614048803, "grad_norm": 0.15741673111915588, "learning_rate": 3.897413624617255e-06, "loss": 0.8602, "step": 152320 }, { "epoch": 1.1026660007093894, "grad_norm": 0.17430439591407776, "learning_rate": 3.897341237956669e-06, "loss": 0.8542, "step": 152330 }, { "epoch": 1.1027383873699754, "grad_norm": 0.15103577077388763, "learning_rate": 3.8972688512960835e-06, "loss": 0.8624, "step": 152340 }, { "epoch": 1.1028107740305617, "grad_norm": 0.15457139909267426, "learning_rate": 3.897196464635497e-06, "loss": 0.8546, "step": 152350 }, { "epoch": 1.1028831606911478, "grad_norm": 0.1677873432636261, "learning_rate": 3.897124077974911e-06, "loss": 0.8774, "step": 152360 }, { "epoch": 1.102955547351734, "grad_norm": 0.1446024477481842, "learning_rate": 3.897051691314324e-06, "loss": 0.8584, "step": 152370 }, { "epoch": 1.1030279340123201, "grad_norm": 0.1553516536951065, "learning_rate": 3.896979304653739e-06, "loss": 0.855, "step": 152380 }, { "epoch": 1.1031003206729064, "grad_norm": 0.15812979638576508, "learning_rate": 3.896906917993152e-06, "loss": 0.8538, "step": 152390 }, { "epoch": 1.1031727073334925, "grad_norm": 0.16017301380634308, "learning_rate": 3.896834531332566e-06, "loss": 0.8485, "step": 152400 }, { "epoch": 1.1032450939940788, "grad_norm": 0.15883536636829376, "learning_rate": 3.89676214467198e-06, "loss": 0.8547, "step": 152410 }, { "epoch": 1.103317480654665, "grad_norm": 0.16042304039001465, "learning_rate": 3.896689758011394e-06, "loss": 0.8527, "step": 152420 }, { "epoch": 1.1033898673152511, "grad_norm": 0.1522279679775238, "learning_rate": 3.896617371350808e-06, "loss": 0.8571, "step": 152430 }, { "epoch": 1.1034622539758374, "grad_norm": 0.15726742148399353, "learning_rate": 3.896544984690221e-06, "loss": 0.8514, "step": 152440 }, { "epoch": 1.1035346406364235, "grad_norm": 0.1631903052330017, "learning_rate": 3.896472598029635e-06, "loss": 0.8575, "step": 152450 }, { "epoch": 1.1036070272970098, "grad_norm": 0.15091995894908905, "learning_rate": 3.896400211369049e-06, "loss": 0.8496, "step": 152460 }, { "epoch": 1.1036794139575958, "grad_norm": 0.1674959510564804, "learning_rate": 3.896327824708463e-06, "loss": 0.863, "step": 152470 }, { "epoch": 1.1037518006181821, "grad_norm": 0.15883536636829376, "learning_rate": 3.896255438047877e-06, "loss": 0.8545, "step": 152480 }, { "epoch": 1.1038241872787682, "grad_norm": 0.17401011288166046, "learning_rate": 3.89618305138729e-06, "loss": 0.859, "step": 152490 }, { "epoch": 1.1038965739393545, "grad_norm": 0.1531701236963272, "learning_rate": 3.896110664726705e-06, "loss": 0.8492, "step": 152500 }, { "epoch": 1.1039689605999405, "grad_norm": 0.15053513646125793, "learning_rate": 3.896038278066118e-06, "loss": 0.8485, "step": 152510 }, { "epoch": 1.1040413472605268, "grad_norm": 0.15643155574798584, "learning_rate": 3.895965891405532e-06, "loss": 0.8485, "step": 152520 }, { "epoch": 1.1041137339211131, "grad_norm": 0.14518050849437714, "learning_rate": 3.8958935047449456e-06, "loss": 0.8522, "step": 152530 }, { "epoch": 1.1041861205816992, "grad_norm": 0.16403423249721527, "learning_rate": 3.89582111808436e-06, "loss": 0.8595, "step": 152540 }, { "epoch": 1.1042585072422855, "grad_norm": 0.18213549256324768, "learning_rate": 3.895748731423774e-06, "loss": 0.8591, "step": 152550 }, { "epoch": 1.1043308939028715, "grad_norm": 0.15698887407779694, "learning_rate": 3.895676344763187e-06, "loss": 0.8547, "step": 152560 }, { "epoch": 1.1044032805634578, "grad_norm": 0.1551319658756256, "learning_rate": 3.895603958102601e-06, "loss": 0.8563, "step": 152570 }, { "epoch": 1.1044756672240439, "grad_norm": 0.1502751260995865, "learning_rate": 3.895531571442015e-06, "loss": 0.8594, "step": 152580 }, { "epoch": 1.1045480538846302, "grad_norm": 0.15094530582427979, "learning_rate": 3.895459184781429e-06, "loss": 0.8501, "step": 152590 }, { "epoch": 1.1046204405452162, "grad_norm": 0.14504896104335785, "learning_rate": 3.8953867981208426e-06, "loss": 0.8475, "step": 152600 }, { "epoch": 1.1046928272058025, "grad_norm": 0.14708517491817474, "learning_rate": 3.895314411460256e-06, "loss": 0.8641, "step": 152610 }, { "epoch": 1.1047652138663886, "grad_norm": 0.15217843651771545, "learning_rate": 3.895242024799671e-06, "loss": 0.8524, "step": 152620 }, { "epoch": 1.1048376005269749, "grad_norm": 0.15076854825019836, "learning_rate": 3.895169638139084e-06, "loss": 0.8604, "step": 152630 }, { "epoch": 1.1049099871875612, "grad_norm": 0.1549665480852127, "learning_rate": 3.895097251478498e-06, "loss": 0.8572, "step": 152640 }, { "epoch": 1.1049823738481472, "grad_norm": 0.146447092294693, "learning_rate": 3.8950248648179115e-06, "loss": 0.8508, "step": 152650 }, { "epoch": 1.1050547605087335, "grad_norm": 0.156124547123909, "learning_rate": 3.894952478157326e-06, "loss": 0.863, "step": 152660 }, { "epoch": 1.1051271471693196, "grad_norm": 0.14300669729709625, "learning_rate": 3.8948800914967396e-06, "loss": 0.8673, "step": 152670 }, { "epoch": 1.1051995338299059, "grad_norm": 0.1642896682024002, "learning_rate": 3.894807704836153e-06, "loss": 0.8495, "step": 152680 }, { "epoch": 1.105271920490492, "grad_norm": 0.15257836878299713, "learning_rate": 3.894735318175567e-06, "loss": 0.8463, "step": 152690 }, { "epoch": 1.1053443071510782, "grad_norm": 0.16251547634601593, "learning_rate": 3.894662931514981e-06, "loss": 0.8402, "step": 152700 }, { "epoch": 1.1054166938116643, "grad_norm": 0.145891934633255, "learning_rate": 3.894590544854395e-06, "loss": 0.8492, "step": 152710 }, { "epoch": 1.1054890804722506, "grad_norm": 0.16337795555591583, "learning_rate": 3.8945181581938085e-06, "loss": 0.8457, "step": 152720 }, { "epoch": 1.1055614671328367, "grad_norm": 0.1507614105939865, "learning_rate": 3.894445771533222e-06, "loss": 0.8649, "step": 152730 }, { "epoch": 1.105633853793423, "grad_norm": 0.16109922528266907, "learning_rate": 3.894373384872637e-06, "loss": 0.8611, "step": 152740 }, { "epoch": 1.1057062404540092, "grad_norm": 0.14710114896297455, "learning_rate": 3.89430099821205e-06, "loss": 0.854, "step": 152750 }, { "epoch": 1.1057786271145953, "grad_norm": 0.14651963114738464, "learning_rate": 3.894228611551464e-06, "loss": 0.8518, "step": 152760 }, { "epoch": 1.1058510137751816, "grad_norm": 0.1510373204946518, "learning_rate": 3.8941562248908774e-06, "loss": 0.8726, "step": 152770 }, { "epoch": 1.1059234004357676, "grad_norm": 0.14552970230579376, "learning_rate": 3.894083838230291e-06, "loss": 0.8645, "step": 152780 }, { "epoch": 1.105995787096354, "grad_norm": 0.1477714329957962, "learning_rate": 3.8940114515697055e-06, "loss": 0.8663, "step": 152790 }, { "epoch": 1.10606817375694, "grad_norm": 0.1511882096529007, "learning_rate": 3.893939064909119e-06, "loss": 0.8589, "step": 152800 }, { "epoch": 1.1061405604175263, "grad_norm": 0.16555480659008026, "learning_rate": 3.893866678248533e-06, "loss": 0.8446, "step": 152810 }, { "epoch": 1.1062129470781124, "grad_norm": 0.15422996878623962, "learning_rate": 3.893794291587946e-06, "loss": 0.8476, "step": 152820 }, { "epoch": 1.1062853337386986, "grad_norm": 0.16590246558189392, "learning_rate": 3.893721904927361e-06, "loss": 0.8658, "step": 152830 }, { "epoch": 1.1063577203992847, "grad_norm": 0.15505556762218475, "learning_rate": 3.893649518266774e-06, "loss": 0.8438, "step": 152840 }, { "epoch": 1.106430107059871, "grad_norm": 0.15418249368667603, "learning_rate": 3.893577131606188e-06, "loss": 0.8507, "step": 152850 }, { "epoch": 1.1065024937204573, "grad_norm": 0.15402624011039734, "learning_rate": 3.893504744945602e-06, "loss": 0.8656, "step": 152860 }, { "epoch": 1.1065748803810433, "grad_norm": 0.15612943470478058, "learning_rate": 3.893432358285015e-06, "loss": 0.8594, "step": 152870 }, { "epoch": 1.1066472670416296, "grad_norm": 0.1624891459941864, "learning_rate": 3.893359971624429e-06, "loss": 0.8577, "step": 152880 }, { "epoch": 1.1067196537022157, "grad_norm": 0.18142132461071014, "learning_rate": 3.893287584963843e-06, "loss": 0.8541, "step": 152890 }, { "epoch": 1.106792040362802, "grad_norm": 0.16291342675685883, "learning_rate": 3.893215198303257e-06, "loss": 0.8546, "step": 152900 }, { "epoch": 1.106864427023388, "grad_norm": 0.15221039950847626, "learning_rate": 3.893142811642671e-06, "loss": 0.8548, "step": 152910 }, { "epoch": 1.1069368136839743, "grad_norm": 0.15222559869289398, "learning_rate": 3.893070424982084e-06, "loss": 0.8612, "step": 152920 }, { "epoch": 1.1070092003445604, "grad_norm": 0.15900181233882904, "learning_rate": 3.892998038321498e-06, "loss": 0.8725, "step": 152930 }, { "epoch": 1.1070815870051467, "grad_norm": 0.1619795560836792, "learning_rate": 3.892925651660912e-06, "loss": 0.8511, "step": 152940 }, { "epoch": 1.107153973665733, "grad_norm": 0.1571727842092514, "learning_rate": 3.892853265000326e-06, "loss": 0.8482, "step": 152950 }, { "epoch": 1.107226360326319, "grad_norm": 0.13933265209197998, "learning_rate": 3.8927808783397395e-06, "loss": 0.8563, "step": 152960 }, { "epoch": 1.1072987469869053, "grad_norm": 0.14725233614444733, "learning_rate": 3.892708491679153e-06, "loss": 0.8507, "step": 152970 }, { "epoch": 1.1073711336474914, "grad_norm": 0.1523694097995758, "learning_rate": 3.892636105018568e-06, "loss": 0.8503, "step": 152980 }, { "epoch": 1.1074435203080777, "grad_norm": 0.1705412119626999, "learning_rate": 3.892563718357981e-06, "loss": 0.8545, "step": 152990 }, { "epoch": 1.1075159069686638, "grad_norm": 0.14305712282657623, "learning_rate": 3.892491331697395e-06, "loss": 0.8585, "step": 153000 }, { "epoch": 1.10758829362925, "grad_norm": 0.1486334204673767, "learning_rate": 3.8924189450368085e-06, "loss": 0.847, "step": 153010 }, { "epoch": 1.107660680289836, "grad_norm": 0.16062171757221222, "learning_rate": 3.892346558376223e-06, "loss": 0.8527, "step": 153020 }, { "epoch": 1.1077330669504224, "grad_norm": 0.14781929552555084, "learning_rate": 3.8922741717156365e-06, "loss": 0.8677, "step": 153030 }, { "epoch": 1.1078054536110085, "grad_norm": 0.14651741087436676, "learning_rate": 3.89220178505505e-06, "loss": 0.8535, "step": 153040 }, { "epoch": 1.1078778402715947, "grad_norm": 0.14890103042125702, "learning_rate": 3.892129398394464e-06, "loss": 0.863, "step": 153050 }, { "epoch": 1.107950226932181, "grad_norm": 0.14752887189388275, "learning_rate": 3.892057011733878e-06, "loss": 0.8494, "step": 153060 }, { "epoch": 1.108022613592767, "grad_norm": 0.15425044298171997, "learning_rate": 3.891984625073292e-06, "loss": 0.8693, "step": 153070 }, { "epoch": 1.1080950002533534, "grad_norm": 0.14572696387767792, "learning_rate": 3.8919122384127055e-06, "loss": 0.8467, "step": 153080 }, { "epoch": 1.1081673869139395, "grad_norm": 0.16281406581401825, "learning_rate": 3.891839851752119e-06, "loss": 0.8794, "step": 153090 }, { "epoch": 1.1082397735745257, "grad_norm": 0.152713343501091, "learning_rate": 3.8917674650915335e-06, "loss": 0.8605, "step": 153100 }, { "epoch": 1.1083121602351118, "grad_norm": 0.14870685338974, "learning_rate": 3.891695078430947e-06, "loss": 0.8602, "step": 153110 }, { "epoch": 1.108384546895698, "grad_norm": 0.15383371710777283, "learning_rate": 3.891622691770361e-06, "loss": 0.8538, "step": 153120 }, { "epoch": 1.1084569335562842, "grad_norm": 0.1520891636610031, "learning_rate": 3.891550305109774e-06, "loss": 0.8539, "step": 153130 }, { "epoch": 1.1085293202168705, "grad_norm": 0.15309153497219086, "learning_rate": 3.891477918449189e-06, "loss": 0.8459, "step": 153140 }, { "epoch": 1.1086017068774567, "grad_norm": 0.16597382724285126, "learning_rate": 3.8914055317886025e-06, "loss": 0.8529, "step": 153150 }, { "epoch": 1.1086740935380428, "grad_norm": 0.1510355919599533, "learning_rate": 3.891333145128016e-06, "loss": 0.8605, "step": 153160 }, { "epoch": 1.108746480198629, "grad_norm": 0.14412082731723785, "learning_rate": 3.89126075846743e-06, "loss": 0.8532, "step": 153170 }, { "epoch": 1.1088188668592152, "grad_norm": 0.1505080908536911, "learning_rate": 3.891188371806844e-06, "loss": 0.8613, "step": 153180 }, { "epoch": 1.1088912535198014, "grad_norm": 0.14691261947155, "learning_rate": 3.891115985146258e-06, "loss": 0.8596, "step": 153190 }, { "epoch": 1.1089636401803875, "grad_norm": 0.15509304404258728, "learning_rate": 3.891043598485671e-06, "loss": 0.8393, "step": 153200 }, { "epoch": 1.1090360268409738, "grad_norm": 0.14733615517616272, "learning_rate": 3.890971211825085e-06, "loss": 0.8485, "step": 153210 }, { "epoch": 1.1091084135015599, "grad_norm": 0.1337285041809082, "learning_rate": 3.8908988251644995e-06, "loss": 0.8556, "step": 153220 }, { "epoch": 1.1091808001621462, "grad_norm": 0.14776895940303802, "learning_rate": 3.890826438503913e-06, "loss": 0.8796, "step": 153230 }, { "epoch": 1.1092531868227322, "grad_norm": 0.1420595645904541, "learning_rate": 3.890754051843327e-06, "loss": 0.8565, "step": 153240 }, { "epoch": 1.1093255734833185, "grad_norm": 0.16995348036289215, "learning_rate": 3.89068166518274e-06, "loss": 0.8469, "step": 153250 }, { "epoch": 1.1093979601439048, "grad_norm": 0.14591839909553528, "learning_rate": 3.890609278522155e-06, "loss": 0.8588, "step": 153260 }, { "epoch": 1.1094703468044909, "grad_norm": 0.15660324692726135, "learning_rate": 3.890536891861568e-06, "loss": 0.8571, "step": 153270 }, { "epoch": 1.1095427334650771, "grad_norm": 0.16159191727638245, "learning_rate": 3.890464505200982e-06, "loss": 0.8689, "step": 153280 }, { "epoch": 1.1096151201256632, "grad_norm": 0.15314868092536926, "learning_rate": 3.890392118540396e-06, "loss": 0.8534, "step": 153290 }, { "epoch": 1.1096875067862495, "grad_norm": 0.1325388103723526, "learning_rate": 3.89031973187981e-06, "loss": 0.8674, "step": 153300 }, { "epoch": 1.1097598934468356, "grad_norm": 0.15876524150371552, "learning_rate": 3.890247345219224e-06, "loss": 0.859, "step": 153310 }, { "epoch": 1.1098322801074219, "grad_norm": 0.14967800676822662, "learning_rate": 3.890174958558637e-06, "loss": 0.8601, "step": 153320 }, { "epoch": 1.109904666768008, "grad_norm": 0.14991550147533417, "learning_rate": 3.890102571898051e-06, "loss": 0.8454, "step": 153330 }, { "epoch": 1.1099770534285942, "grad_norm": 0.15167036652565002, "learning_rate": 3.890030185237465e-06, "loss": 0.8558, "step": 153340 }, { "epoch": 1.1100494400891803, "grad_norm": 0.15944884717464447, "learning_rate": 3.889957798576879e-06, "loss": 0.8634, "step": 153350 }, { "epoch": 1.1101218267497666, "grad_norm": 0.14728944003582, "learning_rate": 3.889885411916293e-06, "loss": 0.8759, "step": 153360 }, { "epoch": 1.1101942134103528, "grad_norm": 0.16900379955768585, "learning_rate": 3.889813025255706e-06, "loss": 0.8672, "step": 153370 }, { "epoch": 1.110266600070939, "grad_norm": 0.14968164265155792, "learning_rate": 3.88974063859512e-06, "loss": 0.8545, "step": 153380 }, { "epoch": 1.1103389867315252, "grad_norm": 0.16399233043193817, "learning_rate": 3.8896682519345335e-06, "loss": 0.864, "step": 153390 }, { "epoch": 1.1104113733921113, "grad_norm": 0.1753818243741989, "learning_rate": 3.889595865273947e-06, "loss": 0.851, "step": 153400 }, { "epoch": 1.1104837600526976, "grad_norm": 0.18223677575588226, "learning_rate": 3.8895234786133616e-06, "loss": 0.8468, "step": 153410 }, { "epoch": 1.1105561467132836, "grad_norm": 0.1732460856437683, "learning_rate": 3.889451091952775e-06, "loss": 0.8565, "step": 153420 }, { "epoch": 1.11062853337387, "grad_norm": 0.15468300879001617, "learning_rate": 3.889378705292189e-06, "loss": 0.8635, "step": 153430 }, { "epoch": 1.110700920034456, "grad_norm": 0.15871159732341766, "learning_rate": 3.889306318631602e-06, "loss": 0.8611, "step": 153440 }, { "epoch": 1.1107733066950423, "grad_norm": 0.14641880989074707, "learning_rate": 3.889233931971017e-06, "loss": 0.8447, "step": 153450 }, { "epoch": 1.1108456933556283, "grad_norm": 0.1457509547472, "learning_rate": 3.8891615453104305e-06, "loss": 0.8662, "step": 153460 }, { "epoch": 1.1109180800162146, "grad_norm": 0.1556580513715744, "learning_rate": 3.889089158649844e-06, "loss": 0.8524, "step": 153470 }, { "epoch": 1.110990466676801, "grad_norm": 0.14962519705295563, "learning_rate": 3.889016771989258e-06, "loss": 0.8592, "step": 153480 }, { "epoch": 1.111062853337387, "grad_norm": 0.1524748057126999, "learning_rate": 3.888944385328672e-06, "loss": 0.8583, "step": 153490 }, { "epoch": 1.1111352399979733, "grad_norm": 0.17120623588562012, "learning_rate": 3.888871998668086e-06, "loss": 0.862, "step": 153500 }, { "epoch": 1.1112076266585593, "grad_norm": 0.14351938664913177, "learning_rate": 3.8887996120074994e-06, "loss": 0.8434, "step": 153510 }, { "epoch": 1.1112800133191456, "grad_norm": 0.14822779595851898, "learning_rate": 3.888727225346913e-06, "loss": 0.8371, "step": 153520 }, { "epoch": 1.1113523999797317, "grad_norm": 0.20761913061141968, "learning_rate": 3.8886548386863275e-06, "loss": 0.8579, "step": 153530 }, { "epoch": 1.111424786640318, "grad_norm": 0.15165117383003235, "learning_rate": 3.888582452025741e-06, "loss": 0.8596, "step": 153540 }, { "epoch": 1.111497173300904, "grad_norm": 0.15417353808879852, "learning_rate": 3.888510065365155e-06, "loss": 0.8515, "step": 153550 }, { "epoch": 1.1115695599614903, "grad_norm": 0.17663225531578064, "learning_rate": 3.888437678704568e-06, "loss": 0.8599, "step": 153560 }, { "epoch": 1.1116419466220764, "grad_norm": 0.17057162523269653, "learning_rate": 3.888365292043982e-06, "loss": 0.8655, "step": 153570 }, { "epoch": 1.1117143332826627, "grad_norm": 0.15689362585544586, "learning_rate": 3.8882929053833964e-06, "loss": 0.8619, "step": 153580 }, { "epoch": 1.111786719943249, "grad_norm": 0.15272989869117737, "learning_rate": 3.88822051872281e-06, "loss": 0.8598, "step": 153590 }, { "epoch": 1.111859106603835, "grad_norm": 0.15261130034923553, "learning_rate": 3.888148132062224e-06, "loss": 0.8659, "step": 153600 }, { "epoch": 1.1119314932644213, "grad_norm": 0.15566353499889374, "learning_rate": 3.888075745401637e-06, "loss": 0.8488, "step": 153610 }, { "epoch": 1.1120038799250074, "grad_norm": 0.155398890376091, "learning_rate": 3.888003358741052e-06, "loss": 0.8547, "step": 153620 }, { "epoch": 1.1120762665855937, "grad_norm": 0.14617139101028442, "learning_rate": 3.887930972080465e-06, "loss": 0.8598, "step": 153630 }, { "epoch": 1.1121486532461797, "grad_norm": 0.1497737020254135, "learning_rate": 3.887858585419879e-06, "loss": 0.8527, "step": 153640 }, { "epoch": 1.112221039906766, "grad_norm": 0.143030047416687, "learning_rate": 3.887786198759293e-06, "loss": 0.8613, "step": 153650 }, { "epoch": 1.112293426567352, "grad_norm": 0.15191875398159027, "learning_rate": 3.887713812098707e-06, "loss": 0.8623, "step": 153660 }, { "epoch": 1.1123658132279384, "grad_norm": 0.14382189512252808, "learning_rate": 3.887641425438121e-06, "loss": 0.8587, "step": 153670 }, { "epoch": 1.1124381998885244, "grad_norm": 0.15472808480262756, "learning_rate": 3.887569038777534e-06, "loss": 0.8605, "step": 153680 }, { "epoch": 1.1125105865491107, "grad_norm": 0.15003816783428192, "learning_rate": 3.887496652116948e-06, "loss": 0.8393, "step": 153690 }, { "epoch": 1.112582973209697, "grad_norm": 0.17156195640563965, "learning_rate": 3.887424265456362e-06, "loss": 0.8712, "step": 153700 }, { "epoch": 1.112655359870283, "grad_norm": 0.13888807594776154, "learning_rate": 3.887351878795776e-06, "loss": 0.8557, "step": 153710 }, { "epoch": 1.1127277465308694, "grad_norm": 0.13991951942443848, "learning_rate": 3.88727949213519e-06, "loss": 0.8459, "step": 153720 }, { "epoch": 1.1128001331914554, "grad_norm": 0.16196198761463165, "learning_rate": 3.887207105474603e-06, "loss": 0.8508, "step": 153730 }, { "epoch": 1.1128725198520417, "grad_norm": 0.14788612723350525, "learning_rate": 3.887134718814018e-06, "loss": 0.8425, "step": 153740 }, { "epoch": 1.1129449065126278, "grad_norm": 0.1603209227323532, "learning_rate": 3.887062332153431e-06, "loss": 0.85, "step": 153750 }, { "epoch": 1.113017293173214, "grad_norm": 0.16001610457897186, "learning_rate": 3.886989945492845e-06, "loss": 0.842, "step": 153760 }, { "epoch": 1.1130896798338001, "grad_norm": 0.18758459389209747, "learning_rate": 3.8869175588322585e-06, "loss": 0.8503, "step": 153770 }, { "epoch": 1.1131620664943864, "grad_norm": 0.1522257775068283, "learning_rate": 3.886845172171673e-06, "loss": 0.8627, "step": 153780 }, { "epoch": 1.1132344531549725, "grad_norm": 0.15141525864601135, "learning_rate": 3.886772785511087e-06, "loss": 0.8585, "step": 153790 }, { "epoch": 1.1133068398155588, "grad_norm": 0.22884730994701385, "learning_rate": 3.8867003988505e-06, "loss": 0.8604, "step": 153800 }, { "epoch": 1.113379226476145, "grad_norm": 0.15862050652503967, "learning_rate": 3.886628012189914e-06, "loss": 0.8575, "step": 153810 }, { "epoch": 1.1134516131367311, "grad_norm": 0.17081834375858307, "learning_rate": 3.886555625529328e-06, "loss": 0.8704, "step": 153820 }, { "epoch": 1.1135239997973174, "grad_norm": 0.14992080628871918, "learning_rate": 3.886483238868742e-06, "loss": 0.864, "step": 153830 }, { "epoch": 1.1135963864579035, "grad_norm": 0.16578203439712524, "learning_rate": 3.8864108522081555e-06, "loss": 0.8579, "step": 153840 }, { "epoch": 1.1136687731184898, "grad_norm": 0.1445264369249344, "learning_rate": 3.886338465547569e-06, "loss": 0.8607, "step": 153850 }, { "epoch": 1.1137411597790758, "grad_norm": 0.20468395948410034, "learning_rate": 3.886266078886984e-06, "loss": 0.8534, "step": 153860 }, { "epoch": 1.1138135464396621, "grad_norm": 0.15679140388965607, "learning_rate": 3.886193692226397e-06, "loss": 0.8537, "step": 153870 }, { "epoch": 1.1138859331002482, "grad_norm": 0.1447470486164093, "learning_rate": 3.886121305565811e-06, "loss": 0.8484, "step": 153880 }, { "epoch": 1.1139583197608345, "grad_norm": 0.15149566531181335, "learning_rate": 3.8860489189052245e-06, "loss": 0.8667, "step": 153890 }, { "epoch": 1.1140307064214205, "grad_norm": 0.15107187628746033, "learning_rate": 3.885976532244638e-06, "loss": 0.8702, "step": 153900 }, { "epoch": 1.1141030930820068, "grad_norm": 0.15003450214862823, "learning_rate": 3.885904145584052e-06, "loss": 0.8469, "step": 153910 }, { "epoch": 1.1141754797425931, "grad_norm": 0.1784355640411377, "learning_rate": 3.885831758923465e-06, "loss": 0.8632, "step": 153920 }, { "epoch": 1.1142478664031792, "grad_norm": 0.16152457892894745, "learning_rate": 3.88575937226288e-06, "loss": 0.8586, "step": 153930 }, { "epoch": 1.1143202530637655, "grad_norm": 0.15195733308792114, "learning_rate": 3.885686985602293e-06, "loss": 0.863, "step": 153940 }, { "epoch": 1.1143926397243515, "grad_norm": 0.14799615740776062, "learning_rate": 3.885614598941707e-06, "loss": 0.8429, "step": 153950 }, { "epoch": 1.1144650263849378, "grad_norm": 0.15094245970249176, "learning_rate": 3.885542212281121e-06, "loss": 0.8488, "step": 153960 }, { "epoch": 1.114537413045524, "grad_norm": 0.15210039913654327, "learning_rate": 3.885469825620535e-06, "loss": 0.8584, "step": 153970 }, { "epoch": 1.1146097997061102, "grad_norm": 0.16268359124660492, "learning_rate": 3.885397438959949e-06, "loss": 0.8587, "step": 153980 }, { "epoch": 1.1146821863666962, "grad_norm": 0.15050064027309418, "learning_rate": 3.885325052299362e-06, "loss": 0.8557, "step": 153990 }, { "epoch": 1.1147545730272825, "grad_norm": 0.15507516264915466, "learning_rate": 3.885252665638776e-06, "loss": 0.867, "step": 154000 }, { "epoch": 1.1148269596878686, "grad_norm": 0.2021208256483078, "learning_rate": 3.88518027897819e-06, "loss": 0.8476, "step": 154010 }, { "epoch": 1.114899346348455, "grad_norm": 0.15524426102638245, "learning_rate": 3.885107892317604e-06, "loss": 0.8567, "step": 154020 }, { "epoch": 1.1149717330090412, "grad_norm": 0.14437155425548553, "learning_rate": 3.885035505657018e-06, "loss": 0.8575, "step": 154030 }, { "epoch": 1.1150441196696272, "grad_norm": 0.15939441323280334, "learning_rate": 3.884963118996431e-06, "loss": 0.8591, "step": 154040 }, { "epoch": 1.1151165063302135, "grad_norm": 0.15175363421440125, "learning_rate": 3.884890732335846e-06, "loss": 0.866, "step": 154050 }, { "epoch": 1.1151888929907996, "grad_norm": 0.14421996474266052, "learning_rate": 3.884818345675259e-06, "loss": 0.8485, "step": 154060 }, { "epoch": 1.1152612796513859, "grad_norm": 0.14754317700862885, "learning_rate": 3.884745959014673e-06, "loss": 0.8681, "step": 154070 }, { "epoch": 1.115333666311972, "grad_norm": 0.16109803318977356, "learning_rate": 3.8846735723540866e-06, "loss": 0.8769, "step": 154080 }, { "epoch": 1.1154060529725582, "grad_norm": 0.16321797668933868, "learning_rate": 3.884601185693501e-06, "loss": 0.8612, "step": 154090 }, { "epoch": 1.1154784396331443, "grad_norm": 0.15551266074180603, "learning_rate": 3.884528799032915e-06, "loss": 0.865, "step": 154100 }, { "epoch": 1.1155508262937306, "grad_norm": 0.15028153359889984, "learning_rate": 3.884456412372328e-06, "loss": 0.8584, "step": 154110 }, { "epoch": 1.1156232129543169, "grad_norm": 0.14665335416793823, "learning_rate": 3.884384025711742e-06, "loss": 0.8349, "step": 154120 }, { "epoch": 1.115695599614903, "grad_norm": 0.1615133285522461, "learning_rate": 3.884311639051156e-06, "loss": 0.8697, "step": 154130 }, { "epoch": 1.1157679862754892, "grad_norm": 0.14545638859272003, "learning_rate": 3.88423925239057e-06, "loss": 0.8582, "step": 154140 }, { "epoch": 1.1158403729360753, "grad_norm": 0.18917188048362732, "learning_rate": 3.8841668657299836e-06, "loss": 0.8593, "step": 154150 }, { "epoch": 1.1159127595966616, "grad_norm": 0.1670539379119873, "learning_rate": 3.884094479069397e-06, "loss": 0.8695, "step": 154160 }, { "epoch": 1.1159851462572477, "grad_norm": 0.1593600958585739, "learning_rate": 3.884022092408812e-06, "loss": 0.8738, "step": 154170 }, { "epoch": 1.116057532917834, "grad_norm": 0.15494653582572937, "learning_rate": 3.883949705748225e-06, "loss": 0.8465, "step": 154180 }, { "epoch": 1.11612991957842, "grad_norm": 0.15124863386154175, "learning_rate": 3.883877319087639e-06, "loss": 0.8538, "step": 154190 }, { "epoch": 1.1162023062390063, "grad_norm": 0.1503598988056183, "learning_rate": 3.8838049324270525e-06, "loss": 0.8617, "step": 154200 }, { "epoch": 1.1162746928995926, "grad_norm": 0.1482527107000351, "learning_rate": 3.883732545766466e-06, "loss": 0.8542, "step": 154210 }, { "epoch": 1.1163470795601786, "grad_norm": 0.1634032428264618, "learning_rate": 3.883660159105881e-06, "loss": 0.8605, "step": 154220 }, { "epoch": 1.116419466220765, "grad_norm": 0.14771081507205963, "learning_rate": 3.883587772445294e-06, "loss": 0.8564, "step": 154230 }, { "epoch": 1.116491852881351, "grad_norm": 0.16932451725006104, "learning_rate": 3.883515385784708e-06, "loss": 0.8578, "step": 154240 }, { "epoch": 1.1165642395419373, "grad_norm": 0.1720747947692871, "learning_rate": 3.8834429991241214e-06, "loss": 0.8435, "step": 154250 }, { "epoch": 1.1166366262025234, "grad_norm": 0.15350791811943054, "learning_rate": 3.883370612463536e-06, "loss": 0.8628, "step": 154260 }, { "epoch": 1.1167090128631096, "grad_norm": 0.162908136844635, "learning_rate": 3.8832982258029495e-06, "loss": 0.8472, "step": 154270 }, { "epoch": 1.1167813995236957, "grad_norm": 0.1558998078107834, "learning_rate": 3.883225839142363e-06, "loss": 0.8482, "step": 154280 }, { "epoch": 1.116853786184282, "grad_norm": 0.15881863236427307, "learning_rate": 3.883153452481777e-06, "loss": 0.8572, "step": 154290 }, { "epoch": 1.116926172844868, "grad_norm": 0.15854284167289734, "learning_rate": 3.883081065821191e-06, "loss": 0.873, "step": 154300 }, { "epoch": 1.1169985595054543, "grad_norm": 0.29879847168922424, "learning_rate": 3.883008679160605e-06, "loss": 0.8582, "step": 154310 }, { "epoch": 1.1170709461660406, "grad_norm": 0.15802979469299316, "learning_rate": 3.8829362925000184e-06, "loss": 0.8642, "step": 154320 }, { "epoch": 1.1171433328266267, "grad_norm": 0.1594230681657791, "learning_rate": 3.882863905839432e-06, "loss": 0.8585, "step": 154330 }, { "epoch": 1.117215719487213, "grad_norm": 0.1390036940574646, "learning_rate": 3.8827915191788465e-06, "loss": 0.8615, "step": 154340 }, { "epoch": 1.117288106147799, "grad_norm": 0.1467415988445282, "learning_rate": 3.88271913251826e-06, "loss": 0.8429, "step": 154350 }, { "epoch": 1.1173604928083853, "grad_norm": 0.1545180082321167, "learning_rate": 3.882646745857674e-06, "loss": 0.857, "step": 154360 }, { "epoch": 1.1174328794689714, "grad_norm": 0.1605067402124405, "learning_rate": 3.882574359197087e-06, "loss": 0.8628, "step": 154370 }, { "epoch": 1.1175052661295577, "grad_norm": 0.1610914170742035, "learning_rate": 3.882501972536502e-06, "loss": 0.8567, "step": 154380 }, { "epoch": 1.1175776527901438, "grad_norm": 0.19544139504432678, "learning_rate": 3.8824295858759154e-06, "loss": 0.851, "step": 154390 }, { "epoch": 1.11765003945073, "grad_norm": 0.16312572360038757, "learning_rate": 3.882357199215329e-06, "loss": 0.8662, "step": 154400 }, { "epoch": 1.1177224261113161, "grad_norm": 0.15379171073436737, "learning_rate": 3.882284812554743e-06, "loss": 0.8593, "step": 154410 }, { "epoch": 1.1177948127719024, "grad_norm": 0.1567648947238922, "learning_rate": 3.882212425894157e-06, "loss": 0.8636, "step": 154420 }, { "epoch": 1.1178671994324887, "grad_norm": 0.1432834267616272, "learning_rate": 3.88214003923357e-06, "loss": 0.8443, "step": 154430 }, { "epoch": 1.1179395860930748, "grad_norm": 0.1487468183040619, "learning_rate": 3.8820676525729835e-06, "loss": 0.8509, "step": 154440 }, { "epoch": 1.118011972753661, "grad_norm": 0.14194174110889435, "learning_rate": 3.881995265912398e-06, "loss": 0.8439, "step": 154450 }, { "epoch": 1.118084359414247, "grad_norm": 0.15703342854976654, "learning_rate": 3.881922879251812e-06, "loss": 0.8675, "step": 154460 }, { "epoch": 1.1181567460748334, "grad_norm": 0.15480118989944458, "learning_rate": 3.881850492591225e-06, "loss": 0.8605, "step": 154470 }, { "epoch": 1.1182291327354195, "grad_norm": 0.14932669699192047, "learning_rate": 3.881778105930639e-06, "loss": 0.8424, "step": 154480 }, { "epoch": 1.1183015193960057, "grad_norm": 0.16219106316566467, "learning_rate": 3.881705719270053e-06, "loss": 0.8539, "step": 154490 }, { "epoch": 1.1183739060565918, "grad_norm": 0.1499941051006317, "learning_rate": 3.881633332609467e-06, "loss": 0.8615, "step": 154500 }, { "epoch": 1.118446292717178, "grad_norm": 0.16468805074691772, "learning_rate": 3.8815609459488805e-06, "loss": 0.8536, "step": 154510 }, { "epoch": 1.1185186793777642, "grad_norm": 0.1566213071346283, "learning_rate": 3.881488559288294e-06, "loss": 0.8603, "step": 154520 }, { "epoch": 1.1185910660383505, "grad_norm": 0.14781454205513, "learning_rate": 3.881416172627709e-06, "loss": 0.8494, "step": 154530 }, { "epoch": 1.1186634526989367, "grad_norm": 0.1491243541240692, "learning_rate": 3.881343785967122e-06, "loss": 0.8571, "step": 154540 }, { "epoch": 1.1187358393595228, "grad_norm": 0.15416626632213593, "learning_rate": 3.881271399306536e-06, "loss": 0.8502, "step": 154550 }, { "epoch": 1.118808226020109, "grad_norm": 0.1574944406747818, "learning_rate": 3.8811990126459495e-06, "loss": 0.8377, "step": 154560 }, { "epoch": 1.1188806126806952, "grad_norm": 0.15383422374725342, "learning_rate": 3.881126625985364e-06, "loss": 0.8587, "step": 154570 }, { "epoch": 1.1189529993412815, "grad_norm": 0.15116389095783234, "learning_rate": 3.8810542393247775e-06, "loss": 0.8461, "step": 154580 }, { "epoch": 1.1190253860018675, "grad_norm": 0.15596124529838562, "learning_rate": 3.880981852664191e-06, "loss": 0.8434, "step": 154590 }, { "epoch": 1.1190977726624538, "grad_norm": 0.15235568583011627, "learning_rate": 3.880909466003605e-06, "loss": 0.8597, "step": 154600 }, { "epoch": 1.1191701593230399, "grad_norm": 0.1518736332654953, "learning_rate": 3.880837079343019e-06, "loss": 0.8583, "step": 154610 }, { "epoch": 1.1192425459836262, "grad_norm": 0.14196760952472687, "learning_rate": 3.880764692682433e-06, "loss": 0.8663, "step": 154620 }, { "epoch": 1.1193149326442122, "grad_norm": 0.15060287714004517, "learning_rate": 3.8806923060218465e-06, "loss": 0.8448, "step": 154630 }, { "epoch": 1.1193873193047985, "grad_norm": 0.20425279438495636, "learning_rate": 3.88061991936126e-06, "loss": 0.8628, "step": 154640 }, { "epoch": 1.1194597059653848, "grad_norm": 0.1478336602449417, "learning_rate": 3.8805475327006745e-06, "loss": 0.8626, "step": 154650 }, { "epoch": 1.1195320926259709, "grad_norm": 0.14704637229442596, "learning_rate": 3.880475146040088e-06, "loss": 0.8552, "step": 154660 }, { "epoch": 1.1196044792865572, "grad_norm": 0.2208547443151474, "learning_rate": 3.880402759379502e-06, "loss": 0.8501, "step": 154670 }, { "epoch": 1.1196768659471432, "grad_norm": 0.15612949430942535, "learning_rate": 3.880330372718915e-06, "loss": 0.8516, "step": 154680 }, { "epoch": 1.1197492526077295, "grad_norm": 0.15186789631843567, "learning_rate": 3.88025798605833e-06, "loss": 0.8542, "step": 154690 }, { "epoch": 1.1198216392683156, "grad_norm": 0.16197703778743744, "learning_rate": 3.8801855993977435e-06, "loss": 0.8628, "step": 154700 }, { "epoch": 1.1198940259289019, "grad_norm": 0.1460353136062622, "learning_rate": 3.880113212737157e-06, "loss": 0.871, "step": 154710 }, { "epoch": 1.119966412589488, "grad_norm": 0.1506577581167221, "learning_rate": 3.880040826076571e-06, "loss": 0.8598, "step": 154720 }, { "epoch": 1.1200387992500742, "grad_norm": 0.16504830121994019, "learning_rate": 3.879968439415985e-06, "loss": 0.8634, "step": 154730 }, { "epoch": 1.1201111859106603, "grad_norm": 0.15591880679130554, "learning_rate": 3.879896052755399e-06, "loss": 0.8572, "step": 154740 }, { "epoch": 1.1201835725712466, "grad_norm": 0.14666037261486053, "learning_rate": 3.879823666094812e-06, "loss": 0.8661, "step": 154750 }, { "epoch": 1.1202559592318329, "grad_norm": 0.14609721302986145, "learning_rate": 3.879751279434226e-06, "loss": 0.8442, "step": 154760 }, { "epoch": 1.120328345892419, "grad_norm": 0.15813161432743073, "learning_rate": 3.8796788927736405e-06, "loss": 0.8426, "step": 154770 }, { "epoch": 1.1204007325530052, "grad_norm": 0.15691469609737396, "learning_rate": 3.879606506113054e-06, "loss": 0.8623, "step": 154780 }, { "epoch": 1.1204731192135913, "grad_norm": 0.14903950691223145, "learning_rate": 3.879534119452468e-06, "loss": 0.8414, "step": 154790 }, { "epoch": 1.1205455058741776, "grad_norm": 0.1798841953277588, "learning_rate": 3.879461732791881e-06, "loss": 0.8486, "step": 154800 }, { "epoch": 1.1206178925347636, "grad_norm": 0.1467403918504715, "learning_rate": 3.879389346131295e-06, "loss": 0.8564, "step": 154810 }, { "epoch": 1.12069027919535, "grad_norm": 0.14768868684768677, "learning_rate": 3.879316959470709e-06, "loss": 0.8532, "step": 154820 }, { "epoch": 1.120762665855936, "grad_norm": 0.15052692592144012, "learning_rate": 3.879244572810123e-06, "loss": 0.8576, "step": 154830 }, { "epoch": 1.1208350525165223, "grad_norm": 0.15779510140419006, "learning_rate": 3.879172186149537e-06, "loss": 0.8538, "step": 154840 }, { "epoch": 1.1209074391771083, "grad_norm": 0.16950878500938416, "learning_rate": 3.87909979948895e-06, "loss": 0.852, "step": 154850 }, { "epoch": 1.1209798258376946, "grad_norm": 0.16230851411819458, "learning_rate": 3.879027412828365e-06, "loss": 0.8514, "step": 154860 }, { "epoch": 1.121052212498281, "grad_norm": 0.16090090572834015, "learning_rate": 3.878955026167778e-06, "loss": 0.8537, "step": 154870 }, { "epoch": 1.121124599158867, "grad_norm": 0.15824168920516968, "learning_rate": 3.878882639507192e-06, "loss": 0.8446, "step": 154880 }, { "epoch": 1.1211969858194533, "grad_norm": 0.145647332072258, "learning_rate": 3.8788102528466056e-06, "loss": 0.854, "step": 154890 }, { "epoch": 1.1212693724800393, "grad_norm": 0.15013916790485382, "learning_rate": 3.87873786618602e-06, "loss": 0.8671, "step": 154900 }, { "epoch": 1.1213417591406256, "grad_norm": 0.16897442936897278, "learning_rate": 3.878665479525434e-06, "loss": 0.8543, "step": 154910 }, { "epoch": 1.1214141458012117, "grad_norm": 0.14996156096458435, "learning_rate": 3.878593092864847e-06, "loss": 0.8535, "step": 154920 }, { "epoch": 1.121486532461798, "grad_norm": 0.16847410798072815, "learning_rate": 3.878520706204261e-06, "loss": 0.8508, "step": 154930 }, { "epoch": 1.121558919122384, "grad_norm": 0.1546623855829239, "learning_rate": 3.878448319543675e-06, "loss": 0.8584, "step": 154940 }, { "epoch": 1.1216313057829703, "grad_norm": 0.1652345508337021, "learning_rate": 3.878375932883089e-06, "loss": 0.8633, "step": 154950 }, { "epoch": 1.1217036924435564, "grad_norm": 0.20880626142024994, "learning_rate": 3.878303546222503e-06, "loss": 0.8632, "step": 154960 }, { "epoch": 1.1217760791041427, "grad_norm": 0.16059084236621857, "learning_rate": 3.878231159561916e-06, "loss": 0.8494, "step": 154970 }, { "epoch": 1.121848465764729, "grad_norm": 0.15037423372268677, "learning_rate": 3.87815877290133e-06, "loss": 0.8516, "step": 154980 }, { "epoch": 1.121920852425315, "grad_norm": 0.14622123539447784, "learning_rate": 3.8780863862407434e-06, "loss": 0.8621, "step": 154990 }, { "epoch": 1.1219932390859013, "grad_norm": 0.1608671396970749, "learning_rate": 3.878013999580157e-06, "loss": 0.8603, "step": 155000 } ], "logging_steps": 10, "max_steps": 690735, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.131881219057621e+19, "train_batch_size": 80, "trial_name": null, "trial_params": null }